xfs: fix unlink vs cluster buffer instantiation race

author Dave Chinner <dchinner@redhat.com>

Wed, 12 Jun 2024 22:51:48 +0000 (08:51 +1000)

committer Chandan Babu R <chandanbabu@kernel.org>

Mon, 17 Jun 2024 05:47:09 +0000 (11:17 +0530)
author Dave Chinner <dchinner@redhat.com>
Wed, 12 Jun 2024 22:51:48 +0000 (08:51 +1000)
committer Chandan Babu R <chandanbabu@kernel.org>
Mon, 17 Jun 2024 05:47:09 +0000 (11:17 +0530)
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c

index 58fb7a5062e1e66ace91a4d5c176eb7fb23bf631..f36091e1e7f50bedeb6eb1dd2540643da1801f4e 100644 (file)
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2548,11 +2548,26 @@ xfs_ifree_cluster(
                  * This buffer may not have been correctly initialised as we
                  * didn't read it from disk. That's not important because we are
                  * only using to mark the buffer as stale in the log, and to
-                * attach stale cached inodes on it. That means it will never be
-                * dispatched for IO. If it is, we want to know about it, and we
-                * want it to fail. We can acheive this by adding a write
-                * verifier to the buffer.
+                * attach stale cached inodes on it.
+                *
+                * For the inode that triggered the cluster freeing, this
+                * attachment may occur in xfs_inode_item_precommit() after we
+                * have marked this buffer stale.  If this buffer was not in
+                * memory before xfs_ifree_cluster() started, it will not be
+                * marked XBF_DONE and this will cause problems later in
+                * xfs_inode_item_precommit() when we trip over a (stale, !done)
+                * buffer to attached to the transaction.
+                *
+                * Hence we have to mark the buffer as XFS_DONE here. This is
+                * safe because we are also marking the buffer as XBF_STALE and
+                * XFS_BLI_STALE. That means it will never be dispatched for
+                * IO and it won't be unlocked until the cluster freeing has
+                * been committed to the journal and the buffer unpinned. If it
+                * is written, we want to know about it, and we want it to
+                * fail. We can acheive this by adding a write verifier to the
+                * buffer.
                  */
+               bp->b_flags |= XBF_DONE;
                 bp->b_ops = &xfs_inode_buf_ops;
  
                 /*
author	Dave Chinner <dchinner@redhat.com>
	Wed, 12 Jun 2024 22:51:48 +0000 (08:51 +1000)
committer	Chandan Babu R <chandanbabu@kernel.org>
	Mon, 17 Jun 2024 05:47:09 +0000 (11:17 +0530)