]> git.dujemihanovic.xyz Git - linux.git/commitdiff
bcachefs: JOURNAL_SPACE_LOW
authorKent Overstreet <kent.overstreet@linux.dev>
Sat, 6 Apr 2024 03:27:27 +0000 (23:27 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sat, 6 Apr 2024 17:50:26 +0000 (13:50 -0400)
"bcachefs; Fix deadlock in bch2_btree_update_start()" was a significant
performance regression (nearly 50%) on multithreaded random writes with
fio.

The reason is that the journal watermark checks multiple things,
including the state of the btree write buffer, and on multithreaded
update heavy workloads we're bottleneked on write buffer flushing - we
don't want kicknig off btree updates to depend on the state of the write
buffer.

This isn't strictly correct; the interior btree update path does do
write buffer updates, but it's a tiny fraction of total accounting
updates and we're more concerned with space in the journal itself.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/btree_key_cache.c
fs/bcachefs/btree_update_interior.c
fs/bcachefs/journal_reclaim.c
fs/bcachefs/journal_types.h
fs/bcachefs/util.h

index 581edcb0911bfa39e9ec6242686bd213c47f352c..4ae7890f07c6e39f9acb1c3f8e85303a2b7369c2 100644 (file)
@@ -659,7 +659,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
                commit_flags |= BCH_WATERMARK_reclaim;
 
        if (ck->journal.seq != journal_last_seq(j) ||
-           j->watermark == BCH_WATERMARK_stripe)
+           !test_bit(JOURNAL_SPACE_LOW, &c->journal.flags))
                commit_flags |= BCH_TRANS_COMMIT_no_journal_res;
 
        ret   = bch2_btree_iter_traverse(&b_iter) ?:
index 24fbd5be70a202ede1d00bcc5c45e379bc7d4be3..a4a63e363047dbe66a1bcb6d0b83799c1e030cae 100644 (file)
@@ -1125,18 +1125,14 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
        flags &= ~BCH_WATERMARK_MASK;
        flags |= watermark;
 
-       if (watermark < c->journal.watermark) {
-               struct journal_res res = { 0 };
-               unsigned journal_flags = watermark|JOURNAL_RES_GET_CHECK;
+       if (watermark < BCH_WATERMARK_reclaim &&
+           test_bit(JOURNAL_SPACE_LOW, &c->journal.flags)) {
+               if (flags & BCH_TRANS_COMMIT_journal_reclaim)
+                       return ERR_PTR(-BCH_ERR_journal_reclaim_would_deadlock);
 
-               if ((flags & BCH_TRANS_COMMIT_journal_reclaim) &&
-                   watermark < BCH_WATERMARK_reclaim)
-                       journal_flags |= JOURNAL_RES_GET_NONBLOCK;
-
-               ret = drop_locks_do(trans,
-                       bch2_journal_res_get(&c->journal, &res, 1, journal_flags));
-               if (bch2_err_matches(ret, BCH_ERR_operation_blocked))
-                       ret = -BCH_ERR_journal_reclaim_would_deadlock;
+               bch2_trans_unlock(trans);
+               wait_event(c->journal.wait, !test_bit(JOURNAL_SPACE_LOW, &c->journal.flags));
+               ret = bch2_trans_relock(trans);
                if (ret)
                        return ERR_PTR(ret);
        }
index ab811c0dad26accfb4924eaef4cccb3ab957087c..04a577848b015cd900a1a040ec0565ffb2f69811 100644 (file)
@@ -67,6 +67,8 @@ void bch2_journal_set_watermark(struct journal *j)
            track_event_change(&c->times[BCH_TIME_blocked_write_buffer_full], low_on_wb))
                trace_and_count(c, journal_full, c);
 
+       mod_bit(JOURNAL_SPACE_LOW, &j->flags, low_on_space || low_on_pin);
+
        swap(watermark, j->watermark);
        if (watermark > j->watermark)
                journal_wake(j);
index 8c053cb64ca5ee25b9a5b2613f2fcd9e03d517d3..b5161b5d76a00874ed9ed88a0969927f2cfc9dbe 100644 (file)
@@ -134,6 +134,7 @@ enum journal_flags {
        JOURNAL_STARTED,
        JOURNAL_MAY_SKIP_FLUSH,
        JOURNAL_NEED_FLUSH_WRITE,
+       JOURNAL_SPACE_LOW,
 };
 
 /* Reasons we may fail to get a journal reservation: */
index de639e8a3ab5381e1cde8ec68df78493af4b43ab..5cf885b09986ac95effa15f7a37fc78bd56323cb 100644 (file)
@@ -788,6 +788,14 @@ static inline int copy_from_user_errcode(void *to, const void __user *from, unsi
 
 #endif
 
+static inline void mod_bit(long nr, volatile unsigned long *addr, bool v)
+{
+       if (v)
+               set_bit(nr, addr);
+       else
+               clear_bit(nr, addr);
+}
+
 static inline void __set_bit_le64(size_t bit, __le64 *addr)
 {
        addr[bit / 64] |= cpu_to_le64(BIT_ULL(bit % 64));