We now track IO errors per device since filesystem creation.
IO error counts can be viewed in sysfs, or with the 'bcachefs
show-super' command.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
* Committed by bch2_write_super() -> bch_fs_mi_update()
*/
struct bch_member_cpu mi;
+ atomic64_t errors[BCH_MEMBER_ERROR_NR];
+
__uuid_t uuid;
char name[BDEVNAME_SIZE];
BCH_IOPS_NR
};
+#define BCH_MEMBER_ERROR_TYPES() \
+ x(read, 0) \
+ x(write, 1) \
+ x(checksum, 2)
+
+enum bch_member_error_type {
+#define x(t, n) BCH_MEMBER_ERROR_##t = n,
+ BCH_MEMBER_ERROR_TYPES()
+#undef x
+ BCH_MEMBER_ERROR_NR
+};
+
struct bch_member {
__uuid_t uuid;
__le64 nbuckets; /* device size */
__le64 flags;
__le32 iops[4];
+ __le64 errors[BCH_MEMBER_ERROR_NR];
+ __le64 errors_at_reset[BCH_MEMBER_ERROR_NR];
+ __le64 errors_reset_time;
};
#define BCH_MEMBER_V1_BYTES 56
while (b->written < (ptr_written ?: btree_sectors(c))) {
unsigned sectors;
struct nonce nonce;
- struct bch_csum csum;
bool first = !b->written;
+ bool csum_bad;
if (!b->written) {
i = &b->data->keys;
BSET_CSUM_TYPE(i));
nonce = btree_nonce(i, b->written << 9);
- csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data);
- btree_err_on(bch2_crc_cmp(csum, b->data->csum),
+ csum_bad = bch2_crc_cmp(b->data->csum,
+ csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data));
+ if (csum_bad)
+ bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
+
+ btree_err_on(csum_bad,
-BCH_ERR_btree_node_read_err_want_retry, c, ca, b, i,
"invalid checksum");
BSET_CSUM_TYPE(i));
nonce = btree_nonce(i, b->written << 9);
- csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
+ csum_bad = bch2_crc_cmp(bne->csum,
+ csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne));
+ if (csum_bad)
+ bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
- btree_err_on(bch2_crc_cmp(csum, bne->csum),
+ btree_err_on(csum_bad,
-BCH_ERR_btree_node_read_err_want_retry, c, ca, b, i,
"invalid checksum");
start:
printbuf_reset(&buf);
bch2_btree_pos_to_text(&buf, c, b);
- bch2_dev_io_err_on(bio->bi_status, ca, "btree read error %s for %s",
+ bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_read,
+ "btree read error %s for %s",
bch2_blk_status_to_str(bio->bi_status), buf.buf);
if (rb->have_ioref)
percpu_ref_put(&ca->io_ref);
if (wbio->have_ioref)
bch2_latency_acct(ca, wbio->submit_time, WRITE);
- if (bch2_dev_io_err_on(bio->bi_status, ca, "btree write error: %s",
+ if (bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_write,
+ "btree write error: %s",
bch2_blk_status_to_str(bio->bi_status)) ||
bch2_meta_write_fault("btree")) {
spin_lock_irqsave(&c->btree_write_error_lock, flags);
struct bch_dev *ca = ec_bio->ca;
struct closure *cl = bio->bi_private;
- if (bch2_dev_io_err_on(bio->bi_status, ca, "erasure coding %s error: %s",
+ if (bch2_dev_io_err_on(bio->bi_status, ca,
+ bio_data_dir(bio)
+ ? BCH_MEMBER_ERROR_write
+ : BCH_MEMBER_ERROR_read,
+ "erasure coding %s error: %s",
bio_data_dir(bio) ? "write" : "read",
bch2_blk_status_to_str(bio->bi_status)))
clear_bit(ec_bio->idx, ec_bio->buf->valid);
up_write(&c->state_lock);
}
-void bch2_io_error(struct bch_dev *ca)
+void bch2_io_error(struct bch_dev *ca, enum bch_member_error_type type)
{
+ atomic64_inc(&ca->errors[type]);
//queue_work(system_long_wq, &ca->io_error_work);
}
void bch2_io_error_work(struct work_struct *);
/* Does the error handling without logging a message */
-void bch2_io_error(struct bch_dev *);
+void bch2_io_error(struct bch_dev *, enum bch_member_error_type);
-#define bch2_dev_io_err_on(cond, ca, ...) \
+#define bch2_dev_io_err_on(cond, ca, _type, ...) \
({ \
bool _ret = (cond); \
\
if (_ret) { \
bch_err_dev_ratelimited(ca, __VA_ARGS__); \
- bch2_io_error(ca); \
+ bch2_io_error(ca, _type); \
} \
_ret; \
})
-#define bch2_dev_inum_io_err_on(cond, ca, ...) \
+#define bch2_dev_inum_io_err_on(cond, ca, _type, ...) \
({ \
bool _ret = (cond); \
\
if (_ret) { \
bch_err_inum_offset_ratelimited(ca, __VA_ARGS__); \
- bch2_io_error(ca); \
+ bch2_io_error(ca, _type); \
} \
_ret; \
})
"data checksum error: expected %0llx:%0llx got %0llx:%0llx (type %s)",
rbio->pick.crc.csum.hi, rbio->pick.crc.csum.lo,
csum.hi, csum.lo, bch2_csum_types[crc.csum_type]);
- bch2_io_error(ca);
+ bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
goto out;
decompression_err:
if (!rbio->split)
rbio->bio.bi_end_io = rbio->end_io;
- if (bch2_dev_inum_io_err_on(bio->bi_status, ca,
+ if (bch2_dev_inum_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_read,
rbio->read_pos.inode,
rbio->read_pos.offset,
"data read error: %s",
struct bch_fs *c = wbio->c;
struct bch_dev *ca = bch_dev_bkey_exists(c, wbio->dev);
- if (bch2_dev_inum_io_err_on(bio->bi_status, ca,
+ if (bch2_dev_inum_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_write,
op->pos.inode,
wbio->inode_offset << 9,
"data write error: %s",
ret = submit_bio_wait(bio);
kfree(bio);
- if (bch2_dev_io_err_on(ret, ca,
+ if (bch2_dev_io_err_on(ret, ca, BCH_MEMBER_ERROR_read,
"journal read error: sector %llu",
offset) ||
bch2_meta_read_fault("journal")) {
ja->bucket_seq[bucket] = le64_to_cpu(j->seq);
csum_good = jset_csum_good(c, j);
- if (!csum_good)
+ if (bch2_dev_io_err_on(!csum_good, ca, BCH_MEMBER_ERROR_checksum,
+ "journal checksum error"))
saw_bad = true;
ret = bch2_encrypt(c, JSET_CSUM_TYPE(j), journal_nonce(j),
struct journal_buf *w = journal_last_unwritten_buf(j);
unsigned long flags;
- if (bch2_dev_io_err_on(bio->bi_status, ca, "error writing journal entry %llu: %s",
+ if (bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_write,
+ "error writing journal entry %llu: %s",
le64_to_cpu(w->data->seq),
bch2_blk_status_to_str(bio->bi_status)) ||
bch2_meta_write_fault("journal")) {
#define x(t, n, ...) [n] = #t,
-const char * const bch2_iops_measurements[] = {
- BCH_IOPS_MEASUREMENTS()
- NULL
-};
-
const char * const bch2_error_actions[] = {
BCH_ERROR_ACTIONS()
NULL
struct bch_fs;
-extern const char * const bch2_iops_measurements[];
extern const char * const bch2_error_actions[];
extern const char * const bch2_fsck_fix_opts[];
extern const char * const bch2_version_upgrade_opts[];
#include "sb-members.h"
#include "super-io.h"
+#define x(t, n, ...) [n] = #t,
+static const char * const bch2_iops_measurements[] = {
+ BCH_IOPS_MEASUREMENTS()
+ NULL
+};
+
+char * const bch2_member_error_strs[] = {
+ BCH_MEMBER_ERROR_TYPES()
+ NULL
+};
+#undef x
+
/* Code for bch_sb_field_members_v1: */
static struct bch_member *members_v2_get_mut(struct bch_sb_field_members_v2 *mi, int i)
return sb_members_v2_resize_entries(c);
}
-int bch_members_cpy_v2_v1(struct bch_sb_handle *disk_sb)
+int bch2_sb_members_cpy_v2_v1(struct bch_sb_handle *disk_sb)
{
struct bch_sb_field_members_v1 *mi1;
struct bch_sb_field_members_v2 *mi2;
u64 bucket_size = le16_to_cpu(m.bucket_size);
u64 device_size = le64_to_cpu(m.nbuckets) * bucket_size;
-
prt_printf(out, "Device:");
prt_tab(out);
prt_printf(out, "%u", i);
printbuf_indent_add(out, 2);
+ prt_printf(out, "Label:");
+ prt_tab(out);
+ if (BCH_MEMBER_GROUP(&m)) {
+ unsigned idx = BCH_MEMBER_GROUP(&m) - 1;
+
+ if (idx < disk_groups_nr(gi))
+ prt_printf(out, "%s (%u)",
+ gi->entries[idx].label, idx);
+ else
+ prt_printf(out, "(bad disk labels section)");
+ } else {
+ prt_printf(out, "(none)");
+ }
+ prt_newline(out);
+
prt_printf(out, "UUID:");
prt_tab(out);
pr_uuid(out, m.uuid.b);
prt_units_u64(out, device_size << 9);
prt_newline(out);
+ for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) {
+ prt_printf(out, "%s errors:", bch2_member_error_strs[i]);
+ prt_tab(out);
+ prt_u64(out, le64_to_cpu(m.errors[i]));
+ prt_newline(out);
+ }
+
for (unsigned i = 0; i < BCH_IOPS_NR; i++) {
prt_printf(out, "%s iops:", bch2_iops_measurements[i]);
prt_tab(out);
: "unknown");
prt_newline(out);
- prt_printf(out, "Label:");
- prt_tab(out);
- if (BCH_MEMBER_GROUP(&m)) {
- unsigned idx = BCH_MEMBER_GROUP(&m) - 1;
-
- if (idx < disk_groups_nr(gi))
- prt_printf(out, "%s (%u)",
- gi->entries[idx].label, idx);
- else
- prt_printf(out, "(bad disk labels section)");
- } else {
- prt_printf(out, "(none)");
- }
- prt_newline(out);
-
prt_printf(out, "Data allowed:");
prt_tab(out);
if (BCH_MEMBER_DATA_ALLOWED(&m))
.validate = bch2_sb_members_v2_validate,
.to_text = bch2_sb_members_v2_to_text,
};
+
+void bch2_sb_members_from_cpu(struct bch_fs *c)
+{
+ struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
+ struct bch_dev *ca;
+ unsigned i, e;
+
+ rcu_read_lock();
+ for_each_member_device_rcu(ca, c, i, NULL) {
+ struct bch_member *m = members_v2_get_mut(mi, i);
+
+ for (e = 0; e < BCH_MEMBER_ERROR_NR; e++)
+ m->errors[e] = cpu_to_le64(atomic64_read(&ca->errors[e]));
+ }
+ rcu_read_unlock();
+}
+
+void bch2_dev_io_errors_to_text(struct printbuf *out, struct bch_dev *ca)
+{
+ struct bch_fs *c = ca->fs;
+ struct bch_member m;
+
+ mutex_lock(&ca->fs->sb_lock);
+ m = bch2_sb_member_get(c->disk_sb.sb, ca->dev_idx);
+ mutex_unlock(&ca->fs->sb_lock);
+
+ printbuf_tabstop_push(out, 12);
+
+ prt_str(out, "IO errors since filesystem creation");
+ prt_newline(out);
+
+ printbuf_indent_add(out, 2);
+ for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) {
+ prt_printf(out, "%s:", bch2_member_error_strs[i]);
+ prt_tab(out);
+ prt_u64(out, atomic64_read(&ca->errors[i]));
+ prt_newline(out);
+ }
+ printbuf_indent_sub(out, 2);
+
+ prt_str(out, "IO errors since ");
+ bch2_pr_time_units(out, (ktime_get_real_seconds() - le64_to_cpu(m.errors_reset_time)) * NSEC_PER_SEC);
+ prt_str(out, " ago");
+ prt_newline(out);
+
+ printbuf_indent_add(out, 2);
+ for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) {
+ prt_printf(out, "%s:", bch2_member_error_strs[i]);
+ prt_tab(out);
+ prt_u64(out, atomic64_read(&ca->errors[i]) - le64_to_cpu(m.errors_at_reset[i]));
+ prt_newline(out);
+ }
+ printbuf_indent_sub(out, 2);
+}
+
+void bch2_dev_errors_reset(struct bch_dev *ca)
+{
+ struct bch_fs *c = ca->fs;
+ struct bch_member *m;
+
+ mutex_lock(&c->sb_lock);
+ m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
+ for (unsigned i = 0; i < ARRAY_SIZE(m->errors_at_reset); i++)
+ m->errors_at_reset[i] = cpu_to_le64(atomic64_read(&ca->errors[i]));
+ m->errors_reset_time = ktime_get_real_seconds();
+
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+}
#ifndef _BCACHEFS_SB_MEMBERS_H
#define _BCACHEFS_SB_MEMBERS_H
+extern char * const bch2_member_error_strs[];
+
int bch2_members_v2_init(struct bch_fs *c);
-int bch_members_cpy_v2_v1(struct bch_sb_handle *disk_sb);
+int bch2_sb_members_cpy_v2_v1(struct bch_sb_handle *disk_sb);
struct bch_member *bch2_members_v2_get_mut(struct bch_sb *sb, int i);
struct bch_member bch2_sb_member_get(struct bch_sb *sb, int i);
extern const struct bch_sb_field_ops bch_sb_field_ops_members_v1;
extern const struct bch_sb_field_ops bch_sb_field_ops_members_v2;
+static inline bool bch2_member_exists(struct bch_member *m)
+{
+ return !bch2_is_zero(&m->uuid, sizeof(m->uuid));
+}
+
+static inline bool bch2_dev_exists(struct bch_sb *sb,
+ unsigned dev)
+{
+ if (dev < sb->nr_devices) {
+ struct bch_member m = bch2_sb_member_get(sb, dev);
+ return bch2_member_exists(&m);
+ }
+ return false;
+}
+
+static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi)
+{
+ return (struct bch_member_cpu) {
+ .nbuckets = le64_to_cpu(mi->nbuckets),
+ .first_bucket = le16_to_cpu(mi->first_bucket),
+ .bucket_size = le16_to_cpu(mi->bucket_size),
+ .group = BCH_MEMBER_GROUP(mi),
+ .state = BCH_MEMBER_STATE(mi),
+ .discard = BCH_MEMBER_DISCARD(mi),
+ .data_allowed = BCH_MEMBER_DATA_ALLOWED(mi),
+ .durability = BCH_MEMBER_DURABILITY(mi)
+ ? BCH_MEMBER_DURABILITY(mi) - 1
+ : 1,
+ .freespace_initialized = BCH_MEMBER_FREESPACE_INITIALIZED(mi),
+ .valid = bch2_member_exists(mi),
+ };
+}
+
+void bch2_sb_members_from_cpu(struct bch_fs *);
+
+void bch2_dev_io_errors_to_text(struct printbuf *, struct bch_dev *);
+void bch2_dev_errors_reset(struct bch_dev *);
+
#endif /* _BCACHEFS_SB_MEMBERS_H */
/* XXX: return errors directly */
- if (bch2_dev_io_err_on(bio->bi_status, ca, "superblock write error: %s",
+ if (bch2_dev_io_err_on(bio->bi_status, ca,
+ bio_data_dir(bio)
+ ? BCH_MEMBER_ERROR_write
+ : BCH_MEMBER_ERROR_read,
+ "superblock %s error: %s",
+ bio_data_dir(bio) ? "write" : "read",
bch2_blk_status_to_str(bio->bi_status)))
ca->sb_write_error = 1;
SET_BCH_SB_BIG_ENDIAN(c->disk_sb.sb, CPU_BIG_ENDIAN);
bch2_sb_counters_from_cpu(c);
- bch_members_cpy_v2_v1(&c->disk_sb);
+ bch2_sb_members_cpy_v2_v1(&c->disk_sb);
for_each_online_member(ca, c, i)
bch2_sb_from_fs(c, ca);
__bch2_check_set_feature(c, feat);
}
-/* BCH_SB_FIELD_members_v1: */
-
-static inline bool bch2_member_exists(struct bch_member *m)
-{
- return !bch2_is_zero(&m->uuid, sizeof(m->uuid));
-}
-
-static inline bool bch2_dev_exists(struct bch_sb *sb,
- unsigned dev)
-{
- if (dev < sb->nr_devices) {
- struct bch_member m = bch2_sb_member_get(sb, dev);
- return bch2_member_exists(&m);
- }
- return false;
-}
-
-static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi)
-{
- return (struct bch_member_cpu) {
- .nbuckets = le64_to_cpu(mi->nbuckets),
- .first_bucket = le16_to_cpu(mi->first_bucket),
- .bucket_size = le16_to_cpu(mi->bucket_size),
- .group = BCH_MEMBER_GROUP(mi),
- .state = BCH_MEMBER_STATE(mi),
- .discard = BCH_MEMBER_DISCARD(mi),
- .data_allowed = BCH_MEMBER_DATA_ALLOWED(mi),
- .durability = BCH_MEMBER_DURABILITY(mi)
- ? BCH_MEMBER_DURABILITY(mi) - 1
- : 1,
- .freespace_initialized = BCH_MEMBER_FREESPACE_INITIALIZED(mi),
- .valid = bch2_member_exists(mi),
- };
-}
-
void bch2_sb_maybe_downgrade(struct bch_fs *);
void bch2_sb_upgrade(struct bch_fs *, unsigned);
struct bch_member *member)
{
struct bch_dev *ca;
+ unsigned i;
ca = kzalloc(sizeof(*ca), GFP_KERNEL);
if (!ca)
bch2_time_stats_init(&ca->io_latency[WRITE]);
ca->mi = bch2_mi_to_cpu(member);
+
+ for (i = 0; i < ARRAY_SIZE(member->errors); i++)
+ atomic64_set(&ca->errors[i], le64_to_cpu(member->errors[i]));
+
ca->uuid = member->uuid;
ca->nr_btree_reserve = DIV_ROUND_UP(BTREE_NODE_RESERVE,
read_attribute(first_bucket);
read_attribute(nbuckets);
rw_attribute(durability);
-read_attribute(iodone);
+read_attribute(io_done);
+read_attribute(io_errors);
+write_attribute(io_errors_reset);
read_attribute(io_latency_read);
read_attribute(io_latency_write);
NULL
};
-static void dev_iodone_to_text(struct printbuf *out, struct bch_dev *ca)
+static void dev_io_done_to_text(struct printbuf *out, struct bch_dev *ca)
{
int rw, i;
prt_char(out, '\n');
}
- if (attr == &sysfs_iodone)
- dev_iodone_to_text(out, ca);
+ if (attr == &sysfs_io_done)
+ dev_io_done_to_text(out, ca);
+
+ if (attr == &sysfs_io_errors)
+ bch2_dev_io_errors_to_text(out, ca);
sysfs_print(io_latency_read, atomic64_read(&ca->cur_latency[READ]));
sysfs_print(io_latency_write, atomic64_read(&ca->cur_latency[WRITE]));
return ret;
}
+ if (attr == &sysfs_io_errors_reset)
+ bch2_dev_errors_reset(ca);
+
return size;
}
SYSFS_OPS(bch2_dev);
&sysfs_label,
&sysfs_has_data,
- &sysfs_iodone,
+ &sysfs_io_done,
+ &sysfs_io_errors,
+ &sysfs_io_errors_reset,
&sysfs_io_latency_read,
&sysfs_io_latency_write,