bcachefs: bch2_ec_stripe_head_get() now checks for change in rw devices

author Kent Overstreet <kent.overstreet@linux.dev>

Fri, 6 Sep 2024 23:14:36 +0000 (19:14 -0400)

committer Kent Overstreet <kent.overstreet@linux.dev>

Sat, 21 Sep 2024 15:39:49 +0000 (11:39 -0400)
author Kent Overstreet <kent.overstreet@linux.dev>
Fri, 6 Sep 2024 23:14:36 +0000 (19:14 -0400)
committer Kent Overstreet <kent.overstreet@linux.dev>
Sat, 21 Sep 2024 15:39:49 +0000 (11:39 -0400)
diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c

index f58c3e78ea45b1befedf972f7493bd867782c922..f167ea454ded17a6245655f5fbfc57cf05e8d2b4 100644 (file)
--- a/fs/bcachefs/ec.c
+++ b/fs/bcachefs/ec.c
@@ -1572,10 +1572,12 @@ void bch2_ec_do_stripe_creates(struct bch_fs *c)
                 bch2_write_ref_put(c, BCH_WRITE_REF_stripe_create);
  }
  
-static void ec_stripe_set_pending(struct bch_fs *c, struct ec_stripe_head *h)
+static void ec_stripe_new_set_pending(struct bch_fs *c, struct ec_stripe_head *h)
  {
         struct ec_stripe_new *s = h->s;
  
+       lockdep_assert_held(&h->lock);
+
         BUG_ON(!s->allocated && !s->err);
  
         h->s            = NULL;
@@ -1588,6 +1590,12 @@ static void ec_stripe_set_pending(struct bch_fs *c, struct ec_stripe_head *h)
         ec_stripe_new_put(c, s, STRIPE_REF_io);
  }
  
+static void ec_stripe_new_cancel(struct bch_fs *c, struct ec_stripe_head *h, int err)
+{
+       h->s->err = err;
+       ec_stripe_new_set_pending(c, h);
+}
+
  void bch2_ec_bucket_cancel(struct bch_fs *c, struct open_bucket *ob)
  {
         struct ec_stripe_new *s = ob->ec;
@@ -1711,27 +1719,14 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
         return 0;
  }
  
-static struct ec_stripe_head *
-ec_new_stripe_head_alloc(struct bch_fs *c, unsigned disk_label,
-                        unsigned algo, unsigned redundancy,
-                        enum bch_watermark watermark)
+static void ec_stripe_head_devs_update(struct bch_fs *c, struct ec_stripe_head *h)
  {
-       struct ec_stripe_head *h;
-
-       h = kzalloc(sizeof(*h), GFP_KERNEL);
-       if (!h)
-               return NULL;
-
-       mutex_init(&h->lock);
-       BUG_ON(!mutex_trylock(&h->lock));
-
-       h->disk_label   = disk_label;
-       h->algo         = algo;
-       h->redundancy   = redundancy;
-       h->watermark    = watermark;
+       struct bch_devs_mask devs = h->devs;
  
         rcu_read_lock();
-       h->devs = target_rw_devs(c, BCH_DATA_user, disk_label ? group_to_target(disk_label - 1) : 0);
+       h->devs = target_rw_devs(c, BCH_DATA_user, h->disk_label
+                                ? group_to_target(h->disk_label - 1)
+                                : 0);
         unsigned nr_devs = dev_mask_nr(&h->devs);
  
         for_each_member_device_rcu(c, ca, &h->devs)
@@ -1741,6 +1736,7 @@ ec_new_stripe_head_alloc(struct bch_fs *c, unsigned disk_label,
  
         h->blocksize = pick_blocksize(c, &h->devs);
  
+       h->nr_active_devs = 0;
         for_each_member_device_rcu(c, ca, &h->devs)
                 if (ca->mi.bucket_size == h->blocksize)
                         h->nr_active_devs++;
@@ -1751,7 +1747,9 @@ ec_new_stripe_head_alloc(struct bch_fs *c, unsigned disk_label,
          * If we only have redundancy + 1 devices, we're better off with just
          * replication:
          */
-       if (h->nr_active_devs < h->redundancy + 2) {
+       h->insufficient_devs = h->nr_active_devs < h->redundancy + 2;
+
+       if (h->insufficient_devs) {
                 const char *err;
  
                 if (nr_devs < h->redundancy + 2)
@@ -1766,6 +1764,34 @@ ec_new_stripe_head_alloc(struct bch_fs *c, unsigned disk_label,
                                 h->nr_active_devs, h->redundancy + 2, err);
         }
  
+       struct bch_devs_mask devs_leaving;
+       bitmap_andnot(devs_leaving.d, devs.d, h->devs.d, BCH_SB_MEMBERS_MAX);
+
+       if (h->s && !h->s->allocated && dev_mask_nr(&devs_leaving))
+               ec_stripe_new_cancel(c, h, -EINTR);
+
+       h->rw_devs_change_count = c->rw_devs_change_count;
+}
+
+static struct ec_stripe_head *
+ec_new_stripe_head_alloc(struct bch_fs *c, unsigned disk_label,
+                        unsigned algo, unsigned redundancy,
+                        enum bch_watermark watermark)
+{
+       struct ec_stripe_head *h;
+
+       h = kzalloc(sizeof(*h), GFP_KERNEL);
+       if (!h)
+               return NULL;
+
+       mutex_init(&h->lock);
+       BUG_ON(!mutex_trylock(&h->lock));
+
+       h->disk_label   = disk_label;
+       h->algo         = algo;
+       h->redundancy   = redundancy;
+       h->watermark    = watermark;
+
         list_add(&h->list, &c->ec_stripe_head_list);
         return h;
  }
@@ -1776,7 +1802,7 @@ void bch2_ec_stripe_head_put(struct bch_fs *c, struct ec_stripe_head *h)
             h->s->allocated &&
             bitmap_weight(h->s->blocks_allocated,
                           h->s->nr_data) == h->s->nr_data)
-               ec_stripe_set_pending(c, h);
+               ec_stripe_new_set_pending(c, h);
  
         mutex_unlock(&h->lock);
  }
@@ -1801,7 +1827,7 @@ __bch2_ec_stripe_head_get(struct btree_trans *trans,
  
         if (test_bit(BCH_FS_going_ro, &c->flags)) {
                 h = ERR_PTR(-BCH_ERR_erofs_no_writes);
-               goto found;
+               goto err;
         }
  
         list_for_each_entry(h, &c->ec_stripe_head_list, list)
@@ -1810,18 +1836,23 @@ __bch2_ec_stripe_head_get(struct btree_trans *trans,
                     h->redundancy       == redundancy &&
                     h->watermark        == watermark) {
                         ret = bch2_trans_mutex_lock(trans, &h->lock);
-                       if (ret)
+                       if (ret) {
                                 h = ERR_PTR(ret);
+                               goto err;
+                       }
                         goto found;
                 }
  
         h = ec_new_stripe_head_alloc(c, disk_label, algo, redundancy, watermark);
  found:
-       if (!IS_ERR_OR_NULL(h) &&
-           h->nr_active_devs < h->redundancy + 2) {
+       if (h->rw_devs_change_count != c->rw_devs_change_count)
+               ec_stripe_head_devs_update(c, h);
+
+       if (h->insufficient_devs) {
                 mutex_unlock(&h->lock);
                 h = NULL;
         }
+err:
         mutex_unlock(&c->ec_stripe_head_lock);
         return h;
  }
@@ -2261,8 +2292,7 @@ static void __bch2_ec_stop(struct bch_fs *c, struct bch_dev *ca)
                 }
                 goto unlock;
  found:
-               h->s->err = -BCH_ERR_erofs_no_writes;
-               ec_stripe_set_pending(c, h);
+               ec_stripe_new_cancel(c, h, -BCH_ERR_erofs_no_writes);
  unlock:
                 mutex_unlock(&h->lock);
         }
diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h

index c432040238cdb356346d25c4808542f10c6bdd94..43326370b410a65e93969db37fe83c389d4ab71d 100644 (file)
--- a/fs/bcachefs/ec.h
+++ b/fs/bcachefs/ec.h
@@ -192,6 +192,9 @@ struct ec_stripe_head {
         unsigned                algo;
         unsigned                redundancy;
         enum bch_watermark      watermark;
+       bool                    insufficient_devs;
+
+       unsigned long           rw_devs_change_count;
  
         u64                     nr_created;
author	Kent Overstreet <kent.overstreet@linux.dev>
	Fri, 6 Sep 2024 23:14:36 +0000 (19:14 -0400)
committer	Kent Overstreet <kent.overstreet@linux.dev>
	Sat, 21 Sep 2024 15:39:49 +0000 (11:39 -0400)
fs/bcachefs/ec.c		patch \| blob \| history
fs/bcachefs/ec.h		patch \| blob \| history