]> git.dujemihanovic.xyz Git - linux.git/commitdiff
bcachefs: bch2_ec_stripe_head_get() now checks for change in rw devices
authorKent Overstreet <kent.overstreet@linux.dev>
Fri, 6 Sep 2024 23:14:36 +0000 (19:14 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sat, 21 Sep 2024 15:39:49 +0000 (11:39 -0400)
This factors out ec_strie_head_devs_update(), which initializes the
bitmap of devices we're allocating from, and runs it every time
c->rw_devs_change_count changes.

We also cancel pending, not allocated stripes, since they may refer to
devices that are no longer available.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/ec.c
fs/bcachefs/ec.h

index f58c3e78ea45b1befedf972f7493bd867782c922..f167ea454ded17a6245655f5fbfc57cf05e8d2b4 100644 (file)
@@ -1572,10 +1572,12 @@ void bch2_ec_do_stripe_creates(struct bch_fs *c)
                bch2_write_ref_put(c, BCH_WRITE_REF_stripe_create);
 }
 
-static void ec_stripe_set_pending(struct bch_fs *c, struct ec_stripe_head *h)
+static void ec_stripe_new_set_pending(struct bch_fs *c, struct ec_stripe_head *h)
 {
        struct ec_stripe_new *s = h->s;
 
+       lockdep_assert_held(&h->lock);
+
        BUG_ON(!s->allocated && !s->err);
 
        h->s            = NULL;
@@ -1588,6 +1590,12 @@ static void ec_stripe_set_pending(struct bch_fs *c, struct ec_stripe_head *h)
        ec_stripe_new_put(c, s, STRIPE_REF_io);
 }
 
+static void ec_stripe_new_cancel(struct bch_fs *c, struct ec_stripe_head *h, int err)
+{
+       h->s->err = err;
+       ec_stripe_new_set_pending(c, h);
+}
+
 void bch2_ec_bucket_cancel(struct bch_fs *c, struct open_bucket *ob)
 {
        struct ec_stripe_new *s = ob->ec;
@@ -1711,27 +1719,14 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
        return 0;
 }
 
-static struct ec_stripe_head *
-ec_new_stripe_head_alloc(struct bch_fs *c, unsigned disk_label,
-                        unsigned algo, unsigned redundancy,
-                        enum bch_watermark watermark)
+static void ec_stripe_head_devs_update(struct bch_fs *c, struct ec_stripe_head *h)
 {
-       struct ec_stripe_head *h;
-
-       h = kzalloc(sizeof(*h), GFP_KERNEL);
-       if (!h)
-               return NULL;
-
-       mutex_init(&h->lock);
-       BUG_ON(!mutex_trylock(&h->lock));
-
-       h->disk_label   = disk_label;
-       h->algo         = algo;
-       h->redundancy   = redundancy;
-       h->watermark    = watermark;
+       struct bch_devs_mask devs = h->devs;
 
        rcu_read_lock();
-       h->devs = target_rw_devs(c, BCH_DATA_user, disk_label ? group_to_target(disk_label - 1) : 0);
+       h->devs = target_rw_devs(c, BCH_DATA_user, h->disk_label
+                                ? group_to_target(h->disk_label - 1)
+                                : 0);
        unsigned nr_devs = dev_mask_nr(&h->devs);
 
        for_each_member_device_rcu(c, ca, &h->devs)
@@ -1741,6 +1736,7 @@ ec_new_stripe_head_alloc(struct bch_fs *c, unsigned disk_label,
 
        h->blocksize = pick_blocksize(c, &h->devs);
 
+       h->nr_active_devs = 0;
        for_each_member_device_rcu(c, ca, &h->devs)
                if (ca->mi.bucket_size == h->blocksize)
                        h->nr_active_devs++;
@@ -1751,7 +1747,9 @@ ec_new_stripe_head_alloc(struct bch_fs *c, unsigned disk_label,
         * If we only have redundancy + 1 devices, we're better off with just
         * replication:
         */
-       if (h->nr_active_devs < h->redundancy + 2) {
+       h->insufficient_devs = h->nr_active_devs < h->redundancy + 2;
+
+       if (h->insufficient_devs) {
                const char *err;
 
                if (nr_devs < h->redundancy + 2)
@@ -1766,6 +1764,34 @@ ec_new_stripe_head_alloc(struct bch_fs *c, unsigned disk_label,
                                h->nr_active_devs, h->redundancy + 2, err);
        }
 
+       struct bch_devs_mask devs_leaving;
+       bitmap_andnot(devs_leaving.d, devs.d, h->devs.d, BCH_SB_MEMBERS_MAX);
+
+       if (h->s && !h->s->allocated && dev_mask_nr(&devs_leaving))
+               ec_stripe_new_cancel(c, h, -EINTR);
+
+       h->rw_devs_change_count = c->rw_devs_change_count;
+}
+
+static struct ec_stripe_head *
+ec_new_stripe_head_alloc(struct bch_fs *c, unsigned disk_label,
+                        unsigned algo, unsigned redundancy,
+                        enum bch_watermark watermark)
+{
+       struct ec_stripe_head *h;
+
+       h = kzalloc(sizeof(*h), GFP_KERNEL);
+       if (!h)
+               return NULL;
+
+       mutex_init(&h->lock);
+       BUG_ON(!mutex_trylock(&h->lock));
+
+       h->disk_label   = disk_label;
+       h->algo         = algo;
+       h->redundancy   = redundancy;
+       h->watermark    = watermark;
+
        list_add(&h->list, &c->ec_stripe_head_list);
        return h;
 }
@@ -1776,7 +1802,7 @@ void bch2_ec_stripe_head_put(struct bch_fs *c, struct ec_stripe_head *h)
            h->s->allocated &&
            bitmap_weight(h->s->blocks_allocated,
                          h->s->nr_data) == h->s->nr_data)
-               ec_stripe_set_pending(c, h);
+               ec_stripe_new_set_pending(c, h);
 
        mutex_unlock(&h->lock);
 }
@@ -1801,7 +1827,7 @@ __bch2_ec_stripe_head_get(struct btree_trans *trans,
 
        if (test_bit(BCH_FS_going_ro, &c->flags)) {
                h = ERR_PTR(-BCH_ERR_erofs_no_writes);
-               goto found;
+               goto err;
        }
 
        list_for_each_entry(h, &c->ec_stripe_head_list, list)
@@ -1810,18 +1836,23 @@ __bch2_ec_stripe_head_get(struct btree_trans *trans,
                    h->redundancy       == redundancy &&
                    h->watermark        == watermark) {
                        ret = bch2_trans_mutex_lock(trans, &h->lock);
-                       if (ret)
+                       if (ret) {
                                h = ERR_PTR(ret);
+                               goto err;
+                       }
                        goto found;
                }
 
        h = ec_new_stripe_head_alloc(c, disk_label, algo, redundancy, watermark);
 found:
-       if (!IS_ERR_OR_NULL(h) &&
-           h->nr_active_devs < h->redundancy + 2) {
+       if (h->rw_devs_change_count != c->rw_devs_change_count)
+               ec_stripe_head_devs_update(c, h);
+
+       if (h->insufficient_devs) {
                mutex_unlock(&h->lock);
                h = NULL;
        }
+err:
        mutex_unlock(&c->ec_stripe_head_lock);
        return h;
 }
@@ -2261,8 +2292,7 @@ static void __bch2_ec_stop(struct bch_fs *c, struct bch_dev *ca)
                }
                goto unlock;
 found:
-               h->s->err = -BCH_ERR_erofs_no_writes;
-               ec_stripe_set_pending(c, h);
+               ec_stripe_new_cancel(c, h, -BCH_ERR_erofs_no_writes);
 unlock:
                mutex_unlock(&h->lock);
        }
index c432040238cdb356346d25c4808542f10c6bdd94..43326370b410a65e93969db37fe83c389d4ab71d 100644 (file)
@@ -192,6 +192,9 @@ struct ec_stripe_head {
        unsigned                algo;
        unsigned                redundancy;
        enum bch_watermark      watermark;
+       bool                    insufficient_devs;
+
+       unsigned long           rw_devs_change_count;
 
        u64                     nr_created;