]> git.dujemihanovic.xyz Git - linux.git/commitdiff
ceph: defer stopping mdsc delayed_work
authorXiubo Li <xiubli@redhat.com>
Tue, 25 Jul 2023 04:03:59 +0000 (12:03 +0800)
committerIlya Dryomov <idryomov@gmail.com>
Tue, 1 Aug 2023 22:13:02 +0000 (00:13 +0200)
Flushing the dirty buffer may take a long time if the cluster is
overloaded or if there is network issue. So we should ping the
MDSs periodically to keep alive, else the MDS will blocklist
the kclient.

Cc: stable@vger.kernel.org
Link: https://tracker.ceph.com/issues/61843
Signed-off-by: Xiubo Li <xiubli@redhat.com>
Reviewed-by: Milind Changire <mchangir@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
fs/ceph/mds_client.c
fs/ceph/mds_client.h
fs/ceph/super.c

index 66048a86c480ca9bd2075ea753cd013cf4593f1b..5fb367b1d4b066545ea36b9046a3c04fec4d314f 100644 (file)
@@ -4764,7 +4764,7 @@ static void delayed_work(struct work_struct *work)
 
        dout("mdsc delayed_work\n");
 
-       if (mdsc->stopping)
+       if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHED)
                return;
 
        mutex_lock(&mdsc->mutex);
@@ -4943,7 +4943,7 @@ void send_flush_mdlog(struct ceph_mds_session *s)
 void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
 {
        dout("pre_umount\n");
-       mdsc->stopping = 1;
+       mdsc->stopping = CEPH_MDSC_STOPPING_BEGIN;
 
        ceph_mdsc_iterate_sessions(mdsc, send_flush_mdlog, true);
        ceph_mdsc_iterate_sessions(mdsc, lock_unlock_session, false);
index 724307ff89cd96e9e8f1e2ff0a61f714c315c357..86d2965e68a1fd1a52dde85f742b5733e62ec7c4 100644 (file)
@@ -380,6 +380,11 @@ struct cap_wait {
        int                     want;
 };
 
+enum {
+       CEPH_MDSC_STOPPING_BEGIN = 1,
+       CEPH_MDSC_STOPPING_FLUSHED = 2,
+};
+
 /*
  * mds client state
  */
index 3fc48b43cab0a2c5bccfe16445f09a9ecdd55761..a5f52013314d65d2c4ff576a01c08deffba872ae 100644 (file)
@@ -1374,6 +1374,16 @@ static void ceph_kill_sb(struct super_block *s)
        ceph_mdsc_pre_umount(fsc->mdsc);
        flush_fs_workqueues(fsc);
 
+       /*
+        * Though the kill_anon_super() will finally trigger the
+        * sync_filesystem() anyway, we still need to do it here
+        * and then bump the stage of shutdown to stop the work
+        * queue as earlier as possible.
+        */
+       sync_filesystem(s);
+
+       fsc->mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED;
+
        kill_anon_super(s);
 
        fsc->client->extra_mon_dispatch = NULL;