]> git.dujemihanovic.xyz Git - linux.git/commitdiff
btrfs: dump all space infos if we abort transaction due to ENOSPC
authorQu Wenruo <wqu@suse.com>
Thu, 25 Aug 2022 07:09:10 +0000 (15:09 +0800)
committerDavid Sterba <dsterba@suse.com>
Mon, 26 Sep 2022 10:27:59 +0000 (12:27 +0200)
We have hit some transaction abort due to -ENOSPC internally.

Normally we should always reserve enough space for metadata for every
transaction, thus hitting -ENOSPC should really indicate some cases we
didn't expect.

But unfortunately current error reporting will only give a kernel
warning and stack trace, not really helpful to debug what's causing the
problem.

And mount option debug_enospc can only help when user can reproduce the
problem, but under most cases, such transaction abort by -ENOSPC is
really hard to reproduce.

So this patch will dump all space infos (data, metadata, system) when we
abort the first transaction with -ENOSPC.

This should at least provide some clue to us.

The example of a dump would look like this:

  BTRFS: Transaction aborted (error -28)
  WARNING: CPU: 8 PID: 3366 at fs/btrfs/transaction.c:2137 btrfs_commit_transaction+0xf81/0xfb0 [btrfs]
  <call trace skipped>
  ---[ end trace 0000000000000000 ]---
  BTRFS info (device dm-1: state A): dumping space info:
  BTRFS info (device dm-1: state A): space_info DATA has 6791168 free, is not full
  BTRFS info (device dm-1: state A): space_info total=8388608, used=1597440, pinned=0, reserved=0, may_use=0, readonly=0 zone_unusable=0
  BTRFS info (device dm-1: state A): space_info METADATA has 257114112 free, is not full
  BTRFS info (device dm-1: state A): space_info total=268435456, used=131072, pinned=180224, reserved=65536, may_use=10878976, readonly=65536 zone_unusable=0
  BTRFS info (device dm-1: state A): space_info SYSTEM has 8372224 free, is not full
  BTRFS info (device dm-1: state A): space_info total=8388608, used=16384, pinned=0, reserved=0, may_use=0, readonly=0 zone_unusable=0
  BTRFS info (device dm-1: state A): global_block_rsv: size 3670016 reserved 3670016
  BTRFS info (device dm-1: state A): trans_block_rsv: size 0 reserved 0
  BTRFS info (device dm-1: state A): chunk_block_rsv: size 0 reserved 0
  BTRFS info (device dm-1: state A): delayed_block_rsv: size 4063232 reserved 4063232
  BTRFS info (device dm-1: state A): delayed_refs_rsv: size 3145728 reserved 3145728
  BTRFS: error (device dm-1: state A) in btrfs_commit_transaction:2137: errno=-28 No space left
  BTRFS info (device dm-1: state EA): forced readonly

Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/ctree.h
fs/btrfs/space-info.c
fs/btrfs/space-info.h
fs/btrfs/super.c

index 5d03fc267b58c51dec9fd370b9fb270c547d6fe7..b38cd6e2eb5d6f050b53d1c970f1e0ac5b6cdb8a 100644 (file)
@@ -3828,7 +3828,7 @@ const char * __attribute_const__ btrfs_decode_error(int errno);
 __cold
 void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
                               const char *function,
-                              unsigned int line, int errno);
+                              unsigned int line, int errno, bool first_hit);
 
 /*
  * Call btrfs_abort_transaction as early as possible when an error condition is
@@ -3836,9 +3836,11 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
  */
 #define btrfs_abort_transaction(trans, errno)          \
 do {                                                           \
+       bool first = false;                                     \
        /* Report first abort since mount */                    \
        if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED,     \
                        &((trans)->fs_info->fs_state))) {       \
+               first = true;                                   \
                if ((errno) != -EIO && (errno) != -EROFS) {             \
                        WARN(1, KERN_DEBUG                              \
                        "BTRFS: Transaction aborted (error %d)\n",      \
@@ -3850,7 +3852,7 @@ do {                                                              \
                }                                               \
        }                                                       \
        __btrfs_abort_transaction((trans), __func__,            \
-                                 __LINE__, (errno));           \
+                                 __LINE__, (errno), first);    \
 } while (0)
 
 #ifdef CONFIG_PRINTK_INDEX
index 3527276f35e9d9bcf561603db17abfd3bcad81f9..2e06b7c422c7f786210afc13741f7db029647da9 100644 (file)
@@ -492,6 +492,15 @@ static const char *space_info_flag_to_str(const struct btrfs_space_info *space_i
        }
 }
 
+static void dump_global_block_rsv(struct btrfs_fs_info *fs_info)
+{
+       DUMP_BLOCK_RSV(fs_info, global_block_rsv);
+       DUMP_BLOCK_RSV(fs_info, trans_block_rsv);
+       DUMP_BLOCK_RSV(fs_info, chunk_block_rsv);
+       DUMP_BLOCK_RSV(fs_info, delayed_block_rsv);
+       DUMP_BLOCK_RSV(fs_info, delayed_refs_rsv);
+}
+
 static void __btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
                                    struct btrfs_space_info *info)
 {
@@ -508,13 +517,6 @@ static void __btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
                info->total_bytes, info->bytes_used, info->bytes_pinned,
                info->bytes_reserved, info->bytes_may_use,
                info->bytes_readonly, info->bytes_zone_unusable);
-
-       DUMP_BLOCK_RSV(fs_info, global_block_rsv);
-       DUMP_BLOCK_RSV(fs_info, trans_block_rsv);
-       DUMP_BLOCK_RSV(fs_info, chunk_block_rsv);
-       DUMP_BLOCK_RSV(fs_info, delayed_block_rsv);
-       DUMP_BLOCK_RSV(fs_info, delayed_refs_rsv);
-
 }
 
 void btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
@@ -526,6 +528,7 @@ void btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
 
        spin_lock(&info->lock);
        __btrfs_dump_space_info(fs_info, info);
+       dump_global_block_rsv(fs_info);
        spin_unlock(&info->lock);
 
        if (!dump_block_groups)
@@ -1770,3 +1773,17 @@ int btrfs_reserve_data_bytes(struct btrfs_fs_info *fs_info, u64 bytes,
        }
        return ret;
 }
+
+/* Dump all the space infos when we abort a transaction due to ENOSPC. */
+__cold void btrfs_dump_space_info_for_trans_abort(struct btrfs_fs_info *fs_info)
+{
+       struct btrfs_space_info *space_info;
+
+       btrfs_info(fs_info, "dumping space info:");
+       list_for_each_entry(space_info, &fs_info->space_info, list) {
+               spin_lock(&space_info->lock);
+               __btrfs_dump_space_info(fs_info, space_info);
+               spin_unlock(&space_info->lock);
+       }
+       dump_global_block_rsv(fs_info);
+}
index 2039096803ed8148670ac3c86b7987022f6b4e56..8f594874094103a0f9c28664c6191039327195c2 100644 (file)
@@ -157,4 +157,6 @@ static inline void btrfs_space_info_free_bytes_may_use(
 }
 int btrfs_reserve_data_bytes(struct btrfs_fs_info *fs_info, u64 bytes,
                             enum btrfs_reserve_flush_enum flush);
+void btrfs_dump_space_info_for_trans_abort(struct btrfs_fs_info *fs_info);
+
 #endif /* BTRFS_SPACE_INFO_H */
index 51449619a0230ca12e1bf010774aea44e53e08eb..7291e9d67e92f867aa722495fa9befe0fe5c03c5 100644 (file)
@@ -346,12 +346,14 @@ void __cold btrfs_err_32bit_limit(struct btrfs_fs_info *fs_info)
 __cold
 void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
                               const char *function,
-                              unsigned int line, int errno)
+                              unsigned int line, int errno, bool first_hit)
 {
        struct btrfs_fs_info *fs_info = trans->fs_info;
 
        WRITE_ONCE(trans->aborted, errno);
        WRITE_ONCE(trans->transaction->aborted, errno);
+       if (first_hit && errno == -ENOSPC)
+               btrfs_dump_space_info_for_trans_abort(fs_info);
        /* Wake up anybody who may be waiting on this transaction */
        wake_up(&fs_info->transaction_wait);
        wake_up(&fs_info->transaction_blocked_wait);