diff options
| -rw-r--r-- | fs/xfs/libxfs/xfs_fs.h | 15 | ||||
| -rw-r--r-- | fs/xfs/xfs_healthmon.c | 66 | ||||
| -rw-r--r-- | fs/xfs/xfs_healthmon.h | 16 | ||||
| -rw-r--r-- | fs/xfs/xfs_notify_failure.c | 17 | ||||
| -rw-r--r-- | fs/xfs/xfs_trace.c | 1 | ||||
| -rw-r--r-- | fs/xfs/xfs_trace.h | 38 |
6 files changed, 148 insertions, 5 deletions
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index c8f7011a7ef8..38aeb1b0d87b 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -1014,6 +1014,11 @@ struct xfs_rtgroup_geometry { #define XFS_HEALTH_MONITOR_DOMAIN_INODE (3) #define XFS_HEALTH_MONITOR_DOMAIN_RTGROUP (4) +/* disk events */ +#define XFS_HEALTH_MONITOR_DOMAIN_DATADEV (5) +#define XFS_HEALTH_MONITOR_DOMAIN_RTDEV (6) +#define XFS_HEALTH_MONITOR_DOMAIN_LOGDEV (7) + /* Health monitor event types */ /* status of the monitor itself */ @@ -1031,6 +1036,9 @@ struct xfs_rtgroup_geometry { /* filesystem shutdown */ #define XFS_HEALTH_MONITOR_TYPE_SHUTDOWN (6) +/* media errors */ +#define XFS_HEALTH_MONITOR_TYPE_MEDIA_ERROR (7) + /* lost events */ struct xfs_health_monitor_lost { __u64 count; @@ -1071,6 +1079,12 @@ struct xfs_health_monitor_shutdown { __u32 reasons; }; +/* disk media errors */ +struct xfs_health_monitor_media { + __u64 daddr; + __u64 bbcount; +}; + struct xfs_health_monitor_event { /* XFS_HEALTH_MONITOR_DOMAIN_* */ __u32 domain; @@ -1092,6 +1106,7 @@ struct xfs_health_monitor_event { struct xfs_health_monitor_group group; struct xfs_health_monitor_inode inode; struct xfs_health_monitor_shutdown shutdown; + struct xfs_health_monitor_media media; } e; /* zeroes */ diff --git a/fs/xfs/xfs_healthmon.c b/fs/xfs/xfs_healthmon.c index 97f764e79541..773bd4414d94 100644 --- a/fs/xfs/xfs_healthmon.c +++ b/fs/xfs/xfs_healthmon.c @@ -21,6 +21,7 @@ #include "xfs_health.h" #include "xfs_healthmon.h" #include "xfs_fsops.h" +#include "xfs_notify_failure.h" #include <linux/anon_inodes.h> #include <linux/eventpoll.h> @@ -208,6 +209,19 @@ xfs_healthmon_merge_events( /* yes, we can race to shutdown */ existing->flags |= new->flags; return true; + + case XFS_HEALTHMON_MEDIA_ERROR: + /* physically adjacent errors can merge */ + if (existing->daddr + existing->bbcount == new->daddr) { + existing->bbcount += new->bbcount; + return true; + } + if (new->daddr + new->bbcount == existing->daddr) { + existing->daddr = new->daddr; + existing->bbcount += new->bbcount; + return true; + } + return false; } return false; @@ -522,6 +536,48 @@ xfs_healthmon_report_shutdown( xfs_healthmon_put(hm); } +static inline enum xfs_healthmon_domain +media_error_domain( + enum xfs_device fdev) +{ + switch (fdev) { + case XFS_DEV_DATA: + return XFS_HEALTHMON_DATADEV; + case XFS_DEV_LOG: + return XFS_HEALTHMON_LOGDEV; + case XFS_DEV_RT: + return XFS_HEALTHMON_RTDEV; + } + + ASSERT(0); + return 0; +} + +/* Add a media error event to the reporting queue. */ +void +xfs_healthmon_report_media( + struct xfs_mount *mp, + enum xfs_device fdev, + xfs_daddr_t daddr, + uint64_t bbcount) +{ + struct xfs_healthmon_event event = { + .type = XFS_HEALTHMON_MEDIA_ERROR, + .domain = media_error_domain(fdev), + .daddr = daddr, + .bbcount = bbcount, + }; + struct xfs_healthmon *hm = xfs_healthmon_get(mp); + + if (!hm) + return; + + trace_xfs_healthmon_report_media(hm, fdev, &event); + + xfs_healthmon_push(hm, &event); + xfs_healthmon_put(hm); +} + static inline void xfs_healthmon_reset_outbuf( struct xfs_healthmon *hm) @@ -574,6 +630,9 @@ static const unsigned int domain_map[] = { [XFS_HEALTHMON_AG] = XFS_HEALTH_MONITOR_DOMAIN_AG, [XFS_HEALTHMON_INODE] = XFS_HEALTH_MONITOR_DOMAIN_INODE, [XFS_HEALTHMON_RTGROUP] = XFS_HEALTH_MONITOR_DOMAIN_RTGROUP, + [XFS_HEALTHMON_DATADEV] = XFS_HEALTH_MONITOR_DOMAIN_DATADEV, + [XFS_HEALTHMON_RTDEV] = XFS_HEALTH_MONITOR_DOMAIN_RTDEV, + [XFS_HEALTHMON_LOGDEV] = XFS_HEALTH_MONITOR_DOMAIN_LOGDEV, }; static const unsigned int type_map[] = { @@ -584,6 +643,7 @@ static const unsigned int type_map[] = { [XFS_HEALTHMON_HEALTHY] = XFS_HEALTH_MONITOR_TYPE_HEALTHY, [XFS_HEALTHMON_UNMOUNT] = XFS_HEALTH_MONITOR_TYPE_UNMOUNT, [XFS_HEALTHMON_SHUTDOWN] = XFS_HEALTH_MONITOR_TYPE_SHUTDOWN, + [XFS_HEALTHMON_MEDIA_ERROR] = XFS_HEALTH_MONITOR_TYPE_MEDIA_ERROR, }; /* Render event as a V0 structure */ @@ -635,6 +695,12 @@ xfs_healthmon_format_v0( hme.e.inode.ino = event->ino; hme.e.inode.gen = event->gen; break; + case XFS_HEALTHMON_DATADEV: + case XFS_HEALTHMON_LOGDEV: + case XFS_HEALTHMON_RTDEV: + hme.e.media.daddr = event->daddr; + hme.e.media.bbcount = event->bbcount; + break; default: break; } diff --git a/fs/xfs/xfs_healthmon.h b/fs/xfs/xfs_healthmon.h index 1f68b5d65a8e..54536aac4278 100644 --- a/fs/xfs/xfs_healthmon.h +++ b/fs/xfs/xfs_healthmon.h @@ -79,6 +79,9 @@ enum xfs_healthmon_type { XFS_HEALTHMON_SICK, /* runtime corruption observed */ XFS_HEALTHMON_CORRUPT, /* fsck reported corruption */ XFS_HEALTHMON_HEALTHY, /* fsck reported healthy structure */ + + /* media errors */ + XFS_HEALTHMON_MEDIA_ERROR, }; enum xfs_healthmon_domain { @@ -89,6 +92,11 @@ enum xfs_healthmon_domain { XFS_HEALTHMON_AG, /* allocation group metadata */ XFS_HEALTHMON_INODE, /* inode metadata */ XFS_HEALTHMON_RTGROUP, /* realtime group metadata */ + + /* media errors */ + XFS_HEALTHMON_DATADEV, + XFS_HEALTHMON_RTDEV, + XFS_HEALTHMON_LOGDEV, }; struct xfs_healthmon_event { @@ -126,6 +134,11 @@ struct xfs_healthmon_event { struct { unsigned int flags; }; + /* media errors */ + struct { + xfs_daddr_t daddr; + uint64_t bbcount; + }; }; }; @@ -141,6 +154,9 @@ void xfs_healthmon_report_inode(struct xfs_inode *ip, void xfs_healthmon_report_shutdown(struct xfs_mount *mp, uint32_t flags); +void xfs_healthmon_report_media(struct xfs_mount *mp, enum xfs_device fdev, + xfs_daddr_t daddr, uint64_t bbcount); + long xfs_ioc_health_monitor(struct file *file, struct xfs_health_monitor __user *arg); diff --git a/fs/xfs/xfs_notify_failure.c b/fs/xfs/xfs_notify_failure.c index 6d5002413c2c..1edc4ddd10cd 100644 --- a/fs/xfs/xfs_notify_failure.c +++ b/fs/xfs/xfs_notify_failure.c @@ -22,6 +22,7 @@ #include "xfs_notify_failure.h" #include "xfs_rtgroup.h" #include "xfs_rtrmap_btree.h" +#include "xfs_healthmon.h" #include <linux/mm.h> #include <linux/dax.h> @@ -219,6 +220,8 @@ xfs_dax_notify_logdev_failure( if (error) return error; + xfs_healthmon_report_media(mp, XFS_DEV_LOG, daddr, bblen); + /* * In the pre-remove case the failure notification is attempting to * trigger a force unmount. The expectation is that the device is @@ -252,16 +255,20 @@ xfs_dax_notify_dev_failure( uint64_t bblen; struct xfs_group *xg = NULL; - if (!xfs_has_rmapbt(mp)) { - xfs_debug(mp, "notify_failure() needs rmapbt enabled!"); - return -EOPNOTSUPP; - } - error = xfs_dax_translate_range(xfs_group_type_buftarg(mp, type), offset, len, &daddr, &bblen); if (error) return error; + xfs_healthmon_report_media(mp, + type == XG_TYPE_RTG ? XFS_DEV_RT : XFS_DEV_DATA, + daddr, bblen); + + if (!xfs_has_rmapbt(mp)) { + xfs_debug(mp, "notify_failure() needs rmapbt enabled!"); + return -EOPNOTSUPP; + } + if (type == XG_TYPE_RTG) { start_bno = xfs_daddr_to_rtb(mp, daddr); end_bno = xfs_daddr_to_rtb(mp, daddr + bblen - 1); diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c index d42b864a3837..08ddab700a6c 100644 --- a/fs/xfs/xfs_trace.c +++ b/fs/xfs/xfs_trace.c @@ -53,6 +53,7 @@ #include "xfs_zone_priv.h" #include "xfs_health.h" #include "xfs_healthmon.h" +#include "xfs_notify_failure.h" /* * We include this last to have the helpers above available for the trace diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index ec99a6d3dd31..fe7295a4e917 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -6086,6 +6086,12 @@ DECLARE_EVENT_CLASS(xfs_healthmon_event_class, __entry->ino = event->ino; __entry->gen = event->gen; break; + case XFS_HEALTHMON_DATADEV: + case XFS_HEALTHMON_LOGDEV: + case XFS_HEALTHMON_RTDEV: + __entry->offset = event->daddr; + __entry->length = event->bbcount; + break; } ), TP_printk("dev %d:%d type %s domain %s mask 0x%x ino 0x%llx gen 0x%x offset 0x%llx len 0x%llx group 0x%x lost %llu", @@ -6228,6 +6234,38 @@ TRACE_EVENT(xfs_healthmon_report_shutdown, __print_flags(__entry->shutdown_flags, "|", XFS_SHUTDOWN_STRINGS)) ); +#define XFS_DEVICE_STRINGS \ + { XFS_DEV_DATA, "datadev" }, \ + { XFS_DEV_RT, "rtdev" }, \ + { XFS_DEV_LOG, "logdev" } + +TRACE_DEFINE_ENUM(XFS_DEV_DATA); +TRACE_DEFINE_ENUM(XFS_DEV_RT); +TRACE_DEFINE_ENUM(XFS_DEV_LOG); + +TRACE_EVENT(xfs_healthmon_report_media, + TP_PROTO(const struct xfs_healthmon *hm, enum xfs_device fdev, + const struct xfs_healthmon_event *event), + TP_ARGS(hm, fdev, event), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned int, error_dev) + __field(uint64_t, daddr) + __field(uint64_t, bbcount) + ), + TP_fast_assign( + __entry->dev = hm->dev; + __entry->error_dev = fdev; + __entry->daddr = event->daddr; + __entry->bbcount = event->bbcount; + ), + TP_printk("dev %d:%d %s daddr 0x%llx bbcount 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __print_symbolic(__entry->error_dev, XFS_DEVICE_STRINGS), + __entry->daddr, + __entry->bbcount) +); + #endif /* _TRACE_XFS_H */ #undef TRACE_INCLUDE_PATH |
