]> git.dujemihanovic.xyz Git - linux.git/commitdiff
btrfs: only unlock the to-be-submitted ranges inside a folio
authorQu Wenruo <wqu@suse.com>
Mon, 2 Sep 2024 04:59:06 +0000 (14:29 +0930)
committerDavid Sterba <dsterba@suse.com>
Tue, 10 Sep 2024 14:51:22 +0000 (16:51 +0200)
[SUBPAGE COMPRESSION LIMITS]
Currently inside writepage_delalloc(), if a delalloc range is going to
be submitted asynchronously (inline or compression, the page
dirty/writeback/unlock are all handled in at different time, not at the
submission time), then we return 1 and extent_writepage() will skip the
submission.

This is fine if every sector matches page size, but if a sector is
smaller than page size (aka, subpage case), then it can be very
problematic, for example for the following 64K page:

     0     16K     32K    48K     64K
     |/|   |///////|      |/|
       |                    |
       4K                   52K

Where |/| is the dirty range we need to submit.

In the above case, we need the following different handling for the 3
ranges:

- [0, 4K) needs to be submitted for regular write
  A single sector cannot be compressed.

- [16K, 32K) needs to be submitted for compressed write

- [48K, 52K) needs to be submitted for regular write.

Above, if we try to submit [16K, 32K) for compressed write, we will
return 1 and immediately, and without submitting the remaining
[48K, 52K) range.

Furthermore, since extent_writepage() will exit without unlocking any
sectors, the submitted range [0, 4K) will not have sector unlocked.

That's the reason why for now subpage is only allowed for full page
range.

[ENHANCEMENT]
- Introduce a submission bitmap at btrfs_bio_ctrl::submit_bitmap
  This records which sectors will be submitted by extent_writepage_io().
  This allows us to track which sectors needs to be submitted thus later
  to be properly unlocked.

  For asynchronously submitted range (inline/compression), the
  corresponding bits will be cleared from that bitmap.

- Only return 1 if no sector needs to be submitted in
  writepage_delalloc()

- Only submit sectors marked by submission bitmap inside
  extent_writepage_io()
  So we won't touch the asynchronously submitted part.

- Introduce btrfs_folio_end_writer_lock_bitmap() helper
  This will only unlock the involved sectors specified by @bitmap
  parameter, to avoid touching the range asynchronously submitted.

Please note that, since subpage compression is still limited to page
aligned range, this change is only a preparation for future sector
perfect compression support for subpage.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/extent_io.c
fs/btrfs/subpage.c
fs/btrfs/subpage.h

index 70be1150c34e318b8acf899ffc1766de537eb244..39c9677c47d5a8155d545ec00baf7444d5c6a6df 100644 (file)
@@ -101,6 +101,13 @@ struct btrfs_bio_ctrl {
        blk_opf_t opf;
        btrfs_bio_end_io_t end_io_func;
        struct writeback_control *wbc;
+
+       /*
+        * The sectors of the page which are going to be submitted by
+        * extent_writepage_io().
+        * This is to avoid touching ranges covered by compression/inline.
+        */
+       unsigned long submit_bitmap;
 };
 
 static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl)
@@ -1106,9 +1113,10 @@ int btrfs_read_folio(struct file *file, struct folio *folio)
  */
 static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
                                                 struct folio *folio,
-                                                struct writeback_control *wbc)
+                                                struct btrfs_bio_ctrl *bio_ctrl)
 {
        struct btrfs_fs_info *fs_info = inode_to_fs_info(&inode->vfs_inode);
+       struct writeback_control *wbc = bio_ctrl->wbc;
        const bool is_subpage = btrfs_is_subpage(fs_info, folio->mapping);
        const u64 page_start = folio_pos(folio);
        const u64 page_end = page_start + folio_size(folio) - 1;
@@ -1123,6 +1131,14 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
        u64 delalloc_to_write = 0;
        int ret = 0;
 
+       /* Save the dirty bitmap as our submission bitmap will be a subset of it. */
+       if (btrfs_is_subpage(fs_info, inode->vfs_inode.i_mapping)) {
+               ASSERT(fs_info->sectors_per_page > 1);
+               btrfs_get_subpage_dirty_bitmap(fs_info, folio, &bio_ctrl->submit_bitmap);
+       } else {
+               bio_ctrl->submit_bitmap = 1;
+       }
+
        /* Lock all (subpage) delalloc ranges inside the folio first. */
        while (delalloc_start < page_end) {
                delalloc_end = page_end;
@@ -1190,22 +1206,18 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
                }
 
                /*
-                * We can hit btrfs_run_delalloc_range() with >0 return value.
-                *
-                * This happens when either the IO is already done and folio
-                * unlocked (inline) or the IO submission and folio unlock would
-                * be handled as async (compression).
-                *
-                * Inline is only possible for regular sectorsize for now.
-                *
-                * Compression is possible for both subpage and regular cases,
-                * but even for subpage compression only happens for page aligned
-                * range, thus the found delalloc range must go beyond current
-                * folio.
+                * We have some ranges that's going to be submitted asynchronously
+                * (compression or inline).  These range have their own control
+                * on when to unlock the pages.  We should not touch them
+                * anymore, so clear the range from the submission bitmap.
                 */
-               if (ret > 0)
-                       ASSERT(!is_subpage || found_start + found_len >= page_end);
-
+               if (ret > 0) {
+                       unsigned int start_bit = (found_start - page_start) >>
+                                                fs_info->sectorsize_bits;
+                       unsigned int end_bit = (min(page_end + 1, found_start + found_len) -
+                                               page_start) >> fs_info->sectorsize_bits;
+                       bitmap_clear(&bio_ctrl->submit_bitmap, start_bit, end_bit - start_bit);
+               }
                /*
                 * Above btrfs_run_delalloc_range() may have unlocked the folio,
                 * thus for the last range, we cannot touch the folio anymore.
@@ -1230,10 +1242,10 @@ out:
                DIV_ROUND_UP(delalloc_end + 1 - page_start, PAGE_SIZE);
 
        /*
-        * If btrfs_run_dealloc_range() already started I/O and unlocked
-        * the folios, we just need to account for them here.
+        * If all ranges are submitted asynchronously, we just need to account
+        * for them here.
         */
-       if (ret == 1) {
+       if (bitmap_empty(&bio_ctrl->submit_bitmap, fs_info->sectors_per_page)) {
                wbc->nr_to_write -= delalloc_to_write;
                return 1;
        }
@@ -1331,15 +1343,6 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
 {
        struct btrfs_fs_info *fs_info = inode->root->fs_info;
        unsigned long range_bitmap = 0;
-       /*
-        * This is the default value for sectorsize == PAGE_SIZE case.
-        * We known we need to write the dirty sector (aka the page),
-        * even if the page is not dirty (we cleared it before entering).
-        *
-        * For subpage cases we will get the correct bitmap later.
-        */
-       unsigned long dirty_bitmap = 1;
-       unsigned int bitmap_size = 1;
        bool submitted_io = false;
        const u64 folio_start = folio_pos(folio);
        u64 cur;
@@ -1357,18 +1360,14 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
                return 1;
        }
 
-       if (btrfs_is_subpage(fs_info, inode->vfs_inode.i_mapping)) {
-               ASSERT(fs_info->sectors_per_page > 1);
-               btrfs_get_subpage_dirty_bitmap(fs_info, folio, &dirty_bitmap);
-               bitmap_size = fs_info->sectors_per_page;
-       }
        for (cur = start; cur < start + len; cur += fs_info->sectorsize)
                set_bit((cur - folio_start) >> fs_info->sectorsize_bits, &range_bitmap);
-       bitmap_and(&dirty_bitmap, &dirty_bitmap, &range_bitmap, bitmap_size);
+       bitmap_and(&bio_ctrl->submit_bitmap, &bio_ctrl->submit_bitmap, &range_bitmap,
+                  fs_info->sectors_per_page);
 
        bio_ctrl->end_io_func = end_bbio_data_write;
 
-       for_each_set_bit(bit, &dirty_bitmap, bitmap_size) {
+       for_each_set_bit(bit, &bio_ctrl->submit_bitmap, fs_info->sectors_per_page) {
                cur = folio_pos(folio) + (bit << fs_info->sectorsize_bits);
 
                if (cur >= i_size) {
@@ -1421,6 +1420,7 @@ out:
 static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl)
 {
        struct inode *inode = folio->mapping->host;
+       struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
        const u64 page_start = folio_pos(folio);
        int ret;
        size_t pg_offset;
@@ -1442,11 +1442,16 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl
        if (folio->index == end_index)
                folio_zero_range(folio, pg_offset, folio_size(folio) - pg_offset);
 
+       /*
+        * Default to unlock the whole folio.
+        * The proper bitmap can only be initialized until writepage_delalloc().
+        */
+       bio_ctrl->submit_bitmap = (unsigned long)-1;
        ret = set_folio_extent_mapped(folio);
        if (ret < 0)
                goto done;
 
-       ret = writepage_delalloc(BTRFS_I(inode), folio, bio_ctrl->wbc);
+       ret = writepage_delalloc(BTRFS_I(inode), folio, bio_ctrl);
        if (ret == 1)
                return 0;
        if (ret)
@@ -1466,8 +1471,11 @@ done:
                mapping_set_error(folio->mapping, ret);
        }
 
-       btrfs_folio_end_writer_lock(inode_to_fs_info(inode), folio,
-                                   page_start, PAGE_SIZE);
+       /*
+        * Only unlock ranges that are submitted. As there can be some async
+        * submitted ranges inside the folio.
+        */
+       btrfs_folio_end_writer_lock_bitmap(fs_info, folio, bio_ctrl->submit_bitmap);
        ASSERT(ret <= 0);
        return ret;
 }
@@ -2210,6 +2218,11 @@ void extent_write_locked_range(struct inode *inode, const struct folio *locked_f
                if (pages_dirty && folio != locked_folio)
                        ASSERT(folio_test_dirty(folio));
 
+               /*
+                * Set the submission bitmap to submit all sectors.
+                * extent_writepage_io() will do the truncation correctly.
+                */
+               bio_ctrl.submit_bitmap = (unsigned long)-1;
                ret = extent_writepage_io(BTRFS_I(inode), folio, cur, cur_len,
                                          &bio_ctrl, i_size);
                if (ret == 1)
index 83660fa82c3236deb21a26b1614ccfb696f7ffe0..fe4d719d506bf5cbe1d3b10a92adfcd3f5c5ea9b 100644 (file)
@@ -424,6 +424,39 @@ void btrfs_folio_end_writer_lock(const struct btrfs_fs_info *fs_info,
                folio_unlock(folio);
 }
 
+void btrfs_folio_end_writer_lock_bitmap(const struct btrfs_fs_info *fs_info,
+                                       struct folio *folio, unsigned long bitmap)
+{
+       struct btrfs_subpage *subpage = folio_get_private(folio);
+       const int start_bit = fs_info->sectors_per_page * btrfs_bitmap_nr_locked;
+       unsigned long flags;
+       bool last = false;
+       int cleared = 0;
+       int bit;
+
+       if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio->mapping)) {
+               folio_unlock(folio);
+               return;
+       }
+
+       if (atomic_read(&subpage->writers) == 0) {
+               /* No writers, locked by plain lock_page(). */
+               folio_unlock(folio);
+               return;
+       }
+
+       spin_lock_irqsave(&subpage->lock, flags);
+       for_each_set_bit(bit, &bitmap, fs_info->sectors_per_page) {
+               if (test_and_clear_bit(bit + start_bit, subpage->bitmaps))
+                       cleared++;
+       }
+       ASSERT(atomic_read(&subpage->writers) >= cleared);
+       last = atomic_sub_and_test(cleared, &subpage->writers);
+       spin_unlock_irqrestore(&subpage->lock, flags);
+       if (last)
+               folio_unlock(folio);
+}
+
 #define subpage_test_bitmap_all_set(fs_info, subpage, name)            \
        bitmap_test_range_all_set(subpage->bitmaps,                     \
                        fs_info->sectors_per_page * btrfs_bitmap_nr_##name, \
index f805261e0999c3a84abe7aea0090a57b38dd83c7..4b85d91d0e18b01114e20b2095e5557261b288a5 100644 (file)
@@ -106,6 +106,8 @@ void btrfs_folio_end_writer_lock(const struct btrfs_fs_info *fs_info,
                                 struct folio *folio, u64 start, u32 len);
 void btrfs_folio_set_writer_lock(const struct btrfs_fs_info *fs_info,
                                 struct folio *folio, u64 start, u32 len);
+void btrfs_folio_end_writer_lock_bitmap(const struct btrfs_fs_info *fs_info,
+                                       struct folio *folio, unsigned long bitmap);
 bool btrfs_subpage_find_writer_locked(const struct btrfs_fs_info *fs_info,
                                      struct folio *folio, u64 search_start,
                                      u64 *found_start_ret, u32 *found_len_ret);