From: Kent Overstreet Date: Fri, 6 Sep 2024 23:14:36 +0000 (-0400) Subject: bcachefs: bch2_ec_stripe_head_get() now checks for change in rw devices X-Git-Url: https://git.dujemihanovic.xyz/?a=commitdiff_plain;h=035d72f72c9172a29bba4e09620d286ed8496356;p=linux.git bcachefs: bch2_ec_stripe_head_get() now checks for change in rw devices This factors out ec_strie_head_devs_update(), which initializes the bitmap of devices we're allocating from, and runs it every time c->rw_devs_change_count changes. We also cancel pending, not allocated stripes, since they may refer to devices that are no longer available. Signed-off-by: Kent Overstreet --- diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index f58c3e78ea45..f167ea454ded 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -1572,10 +1572,12 @@ void bch2_ec_do_stripe_creates(struct bch_fs *c) bch2_write_ref_put(c, BCH_WRITE_REF_stripe_create); } -static void ec_stripe_set_pending(struct bch_fs *c, struct ec_stripe_head *h) +static void ec_stripe_new_set_pending(struct bch_fs *c, struct ec_stripe_head *h) { struct ec_stripe_new *s = h->s; + lockdep_assert_held(&h->lock); + BUG_ON(!s->allocated && !s->err); h->s = NULL; @@ -1588,6 +1590,12 @@ static void ec_stripe_set_pending(struct bch_fs *c, struct ec_stripe_head *h) ec_stripe_new_put(c, s, STRIPE_REF_io); } +static void ec_stripe_new_cancel(struct bch_fs *c, struct ec_stripe_head *h, int err) +{ + h->s->err = err; + ec_stripe_new_set_pending(c, h); +} + void bch2_ec_bucket_cancel(struct bch_fs *c, struct open_bucket *ob) { struct ec_stripe_new *s = ob->ec; @@ -1711,27 +1719,14 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h) return 0; } -static struct ec_stripe_head * -ec_new_stripe_head_alloc(struct bch_fs *c, unsigned disk_label, - unsigned algo, unsigned redundancy, - enum bch_watermark watermark) +static void ec_stripe_head_devs_update(struct bch_fs *c, struct ec_stripe_head *h) { - struct ec_stripe_head *h; - - h = kzalloc(sizeof(*h), GFP_KERNEL); - if (!h) - return NULL; - - mutex_init(&h->lock); - BUG_ON(!mutex_trylock(&h->lock)); - - h->disk_label = disk_label; - h->algo = algo; - h->redundancy = redundancy; - h->watermark = watermark; + struct bch_devs_mask devs = h->devs; rcu_read_lock(); - h->devs = target_rw_devs(c, BCH_DATA_user, disk_label ? group_to_target(disk_label - 1) : 0); + h->devs = target_rw_devs(c, BCH_DATA_user, h->disk_label + ? group_to_target(h->disk_label - 1) + : 0); unsigned nr_devs = dev_mask_nr(&h->devs); for_each_member_device_rcu(c, ca, &h->devs) @@ -1741,6 +1736,7 @@ ec_new_stripe_head_alloc(struct bch_fs *c, unsigned disk_label, h->blocksize = pick_blocksize(c, &h->devs); + h->nr_active_devs = 0; for_each_member_device_rcu(c, ca, &h->devs) if (ca->mi.bucket_size == h->blocksize) h->nr_active_devs++; @@ -1751,7 +1747,9 @@ ec_new_stripe_head_alloc(struct bch_fs *c, unsigned disk_label, * If we only have redundancy + 1 devices, we're better off with just * replication: */ - if (h->nr_active_devs < h->redundancy + 2) { + h->insufficient_devs = h->nr_active_devs < h->redundancy + 2; + + if (h->insufficient_devs) { const char *err; if (nr_devs < h->redundancy + 2) @@ -1766,6 +1764,34 @@ ec_new_stripe_head_alloc(struct bch_fs *c, unsigned disk_label, h->nr_active_devs, h->redundancy + 2, err); } + struct bch_devs_mask devs_leaving; + bitmap_andnot(devs_leaving.d, devs.d, h->devs.d, BCH_SB_MEMBERS_MAX); + + if (h->s && !h->s->allocated && dev_mask_nr(&devs_leaving)) + ec_stripe_new_cancel(c, h, -EINTR); + + h->rw_devs_change_count = c->rw_devs_change_count; +} + +static struct ec_stripe_head * +ec_new_stripe_head_alloc(struct bch_fs *c, unsigned disk_label, + unsigned algo, unsigned redundancy, + enum bch_watermark watermark) +{ + struct ec_stripe_head *h; + + h = kzalloc(sizeof(*h), GFP_KERNEL); + if (!h) + return NULL; + + mutex_init(&h->lock); + BUG_ON(!mutex_trylock(&h->lock)); + + h->disk_label = disk_label; + h->algo = algo; + h->redundancy = redundancy; + h->watermark = watermark; + list_add(&h->list, &c->ec_stripe_head_list); return h; } @@ -1776,7 +1802,7 @@ void bch2_ec_stripe_head_put(struct bch_fs *c, struct ec_stripe_head *h) h->s->allocated && bitmap_weight(h->s->blocks_allocated, h->s->nr_data) == h->s->nr_data) - ec_stripe_set_pending(c, h); + ec_stripe_new_set_pending(c, h); mutex_unlock(&h->lock); } @@ -1801,7 +1827,7 @@ __bch2_ec_stripe_head_get(struct btree_trans *trans, if (test_bit(BCH_FS_going_ro, &c->flags)) { h = ERR_PTR(-BCH_ERR_erofs_no_writes); - goto found; + goto err; } list_for_each_entry(h, &c->ec_stripe_head_list, list) @@ -1810,18 +1836,23 @@ __bch2_ec_stripe_head_get(struct btree_trans *trans, h->redundancy == redundancy && h->watermark == watermark) { ret = bch2_trans_mutex_lock(trans, &h->lock); - if (ret) + if (ret) { h = ERR_PTR(ret); + goto err; + } goto found; } h = ec_new_stripe_head_alloc(c, disk_label, algo, redundancy, watermark); found: - if (!IS_ERR_OR_NULL(h) && - h->nr_active_devs < h->redundancy + 2) { + if (h->rw_devs_change_count != c->rw_devs_change_count) + ec_stripe_head_devs_update(c, h); + + if (h->insufficient_devs) { mutex_unlock(&h->lock); h = NULL; } +err: mutex_unlock(&c->ec_stripe_head_lock); return h; } @@ -2261,8 +2292,7 @@ static void __bch2_ec_stop(struct bch_fs *c, struct bch_dev *ca) } goto unlock; found: - h->s->err = -BCH_ERR_erofs_no_writes; - ec_stripe_set_pending(c, h); + ec_stripe_new_cancel(c, h, -BCH_ERR_erofs_no_writes); unlock: mutex_unlock(&h->lock); } diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h index c432040238cd..43326370b410 100644 --- a/fs/bcachefs/ec.h +++ b/fs/bcachefs/ec.h @@ -192,6 +192,9 @@ struct ec_stripe_head { unsigned algo; unsigned redundancy; enum bch_watermark watermark; + bool insufficient_devs; + + unsigned long rw_devs_change_count; u64 nr_created;