From 602544773763da411ffa67567fa1d146f3a40231 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 12 Jan 2026 16:31:09 -0800 Subject: uapi: promote EFSCORRUPTED and EUCLEAN to errno.h Stop definining these privately and instead move them to the uapi errno.h so that they become canonical instead of copy pasta. Cc: linux-api@vger.kernel.org Signed-off-by: Darrick J. Wong Link: https://patch.msgid.link/176826402587.3490369.17659117524205214600.stgit@frogsfrogsfrogs Reviewed-by: Gao Xiang Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- arch/alpha/include/uapi/asm/errno.h | 2 ++ arch/mips/include/uapi/asm/errno.h | 2 ++ arch/parisc/include/uapi/asm/errno.h | 2 ++ arch/sparc/include/uapi/asm/errno.h | 2 ++ fs/erofs/internal.h | 2 -- fs/ext2/ext2.h | 1 - fs/ext4/ext4.h | 3 --- fs/f2fs/f2fs.h | 3 --- fs/minix/minix.h | 2 -- fs/udf/udf_sb.h | 2 -- fs/xfs/xfs_linux.h | 2 -- include/linux/jbd2.h | 3 --- include/uapi/asm-generic/errno.h | 2 ++ tools/arch/alpha/include/uapi/asm/errno.h | 2 ++ tools/arch/mips/include/uapi/asm/errno.h | 2 ++ tools/arch/parisc/include/uapi/asm/errno.h | 2 ++ tools/arch/sparc/include/uapi/asm/errno.h | 2 ++ tools/include/uapi/asm-generic/errno.h | 2 ++ 18 files changed, 20 insertions(+), 18 deletions(-) diff --git a/arch/alpha/include/uapi/asm/errno.h b/arch/alpha/include/uapi/asm/errno.h index 3d265f6babaf..6791f6508632 100644 --- a/arch/alpha/include/uapi/asm/errno.h +++ b/arch/alpha/include/uapi/asm/errno.h @@ -55,6 +55,7 @@ #define ENOSR 82 /* Out of streams resources */ #define ETIME 83 /* Timer expired */ #define EBADMSG 84 /* Not a data message */ +#define EFSBADCRC EBADMSG /* Bad CRC detected */ #define EPROTO 85 /* Protocol error */ #define ENODATA 86 /* No data available */ #define ENOSTR 87 /* Device not a stream */ @@ -96,6 +97,7 @@ #define EREMCHG 115 /* Remote address changed */ #define EUCLEAN 117 /* Structure needs cleaning */ +#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ #define ENOTNAM 118 /* Not a XENIX named type file */ #define ENAVAIL 119 /* No XENIX semaphores available */ #define EISNAM 120 /* Is a named type file */ diff --git a/arch/mips/include/uapi/asm/errno.h b/arch/mips/include/uapi/asm/errno.h index 2fb714e2d6d8..c01ed91b1ef4 100644 --- a/arch/mips/include/uapi/asm/errno.h +++ b/arch/mips/include/uapi/asm/errno.h @@ -50,6 +50,7 @@ #define EDOTDOT 73 /* RFS specific error */ #define EMULTIHOP 74 /* Multihop attempted */ #define EBADMSG 77 /* Not a data message */ +#define EFSBADCRC EBADMSG /* Bad CRC detected */ #define ENAMETOOLONG 78 /* File name too long */ #define EOVERFLOW 79 /* Value too large for defined data type */ #define ENOTUNIQ 80 /* Name not unique on network */ @@ -88,6 +89,7 @@ #define EISCONN 133 /* Transport endpoint is already connected */ #define ENOTCONN 134 /* Transport endpoint is not connected */ #define EUCLEAN 135 /* Structure needs cleaning */ +#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ #define ENOTNAM 137 /* Not a XENIX named type file */ #define ENAVAIL 138 /* No XENIX semaphores available */ #define EISNAM 139 /* Is a named type file */ diff --git a/arch/parisc/include/uapi/asm/errno.h b/arch/parisc/include/uapi/asm/errno.h index 8d94739d75c6..8cbc07c1903e 100644 --- a/arch/parisc/include/uapi/asm/errno.h +++ b/arch/parisc/include/uapi/asm/errno.h @@ -36,6 +36,7 @@ #define EDOTDOT 66 /* RFS specific error */ #define EBADMSG 67 /* Not a data message */ +#define EFSBADCRC EBADMSG /* Bad CRC detected */ #define EUSERS 68 /* Too many users */ #define EDQUOT 69 /* Quota exceeded */ #define ESTALE 70 /* Stale file handle */ @@ -62,6 +63,7 @@ #define ERESTART 175 /* Interrupted system call should be restarted */ #define ESTRPIPE 176 /* Streams pipe error */ #define EUCLEAN 177 /* Structure needs cleaning */ +#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ #define ENOTNAM 178 /* Not a XENIX named type file */ #define ENAVAIL 179 /* No XENIX semaphores available */ #define EISNAM 180 /* Is a named type file */ diff --git a/arch/sparc/include/uapi/asm/errno.h b/arch/sparc/include/uapi/asm/errno.h index 81a732b902ee..4a41e7835fd5 100644 --- a/arch/sparc/include/uapi/asm/errno.h +++ b/arch/sparc/include/uapi/asm/errno.h @@ -48,6 +48,7 @@ #define ENOSR 74 /* Out of streams resources */ #define ENOMSG 75 /* No message of desired type */ #define EBADMSG 76 /* Not a data message */ +#define EFSBADCRC EBADMSG /* Bad CRC detected */ #define EIDRM 77 /* Identifier removed */ #define EDEADLK 78 /* Resource deadlock would occur */ #define ENOLCK 79 /* No record locks available */ @@ -91,6 +92,7 @@ #define ENOTUNIQ 115 /* Name not unique on network */ #define ERESTART 116 /* Interrupted syscall should be restarted */ #define EUCLEAN 117 /* Structure needs cleaning */ +#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ #define ENOTNAM 118 /* Not a XENIX named type file */ #define ENAVAIL 119 /* No XENIX semaphores available */ #define EISNAM 120 /* Is a named type file */ diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index f7f622836198..d06e99baf5d5 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -541,6 +541,4 @@ long erofs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); long erofs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); -#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ - #endif /* __EROFS_INTERNAL_H */ diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index cf97b76e9fd3..5e0c6c5fcb6c 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -357,7 +357,6 @@ struct ext2_inode { */ #define EXT2_VALID_FS 0x0001 /* Unmounted cleanly */ #define EXT2_ERROR_FS 0x0002 /* Errors detected */ -#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ /* * Mount flags diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 56112f201cac..62c091b52bac 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -3938,7 +3938,4 @@ extern int ext4_block_write_begin(handle_t *handle, struct folio *folio, get_block_t *get_block); #endif /* __KERNEL__ */ -#define EFSBADCRC EBADMSG /* Bad CRC detected */ -#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ - #endif /* _EXT4_H */ diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 20edbb99b814..9f3aa3c7f126 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -5004,7 +5004,4 @@ static inline void f2fs_invalidate_internal_cache(struct f2fs_sb_info *sbi, f2fs_invalidate_compress_pages_range(sbi, blkaddr, len); } -#define EFSBADCRC EBADMSG /* Bad CRC detected */ -#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ - #endif /* _LINUX_F2FS_H */ diff --git a/fs/minix/minix.h b/fs/minix/minix.h index 2bfaf377f208..7e1f652f16d3 100644 --- a/fs/minix/minix.h +++ b/fs/minix/minix.h @@ -175,6 +175,4 @@ static inline int minix_test_bit(int nr, const void *vaddr) __minix_error_inode((inode), __func__, __LINE__, \ (fmt), ##__VA_ARGS__) -#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ - #endif /* FS_MINIX_H */ diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h index 08ec8756b948..8399accc788d 100644 --- a/fs/udf/udf_sb.h +++ b/fs/udf/udf_sb.h @@ -55,8 +55,6 @@ #define MF_DUPLICATE_MD 0x01 #define MF_MIRROR_FE_LOADED 0x02 -#define EFSCORRUPTED EUCLEAN - struct udf_meta_data { __u32 s_meta_file_loc; __u32 s_mirror_file_loc; diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index 4dd747bdbcca..55064228c4d5 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -121,8 +121,6 @@ typedef __u32 xfs_nlink_t; #define ENOATTR ENODATA /* Attribute not found */ #define EWRONGFS EINVAL /* Mount with wrong filesystem type */ -#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ -#define EFSBADCRC EBADMSG /* Bad CRC detected */ #define __return_address __builtin_return_address(0) diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index f5eaf76198f3..a53a00d36228 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1815,7 +1815,4 @@ static inline int jbd2_handle_buffer_credits(handle_t *handle) #endif /* __KERNEL__ */ -#define EFSBADCRC EBADMSG /* Bad CRC detected */ -#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ - #endif /* _LINUX_JBD2_H */ diff --git a/include/uapi/asm-generic/errno.h b/include/uapi/asm-generic/errno.h index cf9c51ac49f9..92e7ae493ee3 100644 --- a/include/uapi/asm-generic/errno.h +++ b/include/uapi/asm-generic/errno.h @@ -55,6 +55,7 @@ #define EMULTIHOP 72 /* Multihop attempted */ #define EDOTDOT 73 /* RFS specific error */ #define EBADMSG 74 /* Not a data message */ +#define EFSBADCRC EBADMSG /* Bad CRC detected */ #define EOVERFLOW 75 /* Value too large for defined data type */ #define ENOTUNIQ 76 /* Name not unique on network */ #define EBADFD 77 /* File descriptor in bad state */ @@ -98,6 +99,7 @@ #define EINPROGRESS 115 /* Operation now in progress */ #define ESTALE 116 /* Stale file handle */ #define EUCLEAN 117 /* Structure needs cleaning */ +#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ #define ENOTNAM 118 /* Not a XENIX named type file */ #define ENAVAIL 119 /* No XENIX semaphores available */ #define EISNAM 120 /* Is a named type file */ diff --git a/tools/arch/alpha/include/uapi/asm/errno.h b/tools/arch/alpha/include/uapi/asm/errno.h index 3d265f6babaf..6791f6508632 100644 --- a/tools/arch/alpha/include/uapi/asm/errno.h +++ b/tools/arch/alpha/include/uapi/asm/errno.h @@ -55,6 +55,7 @@ #define ENOSR 82 /* Out of streams resources */ #define ETIME 83 /* Timer expired */ #define EBADMSG 84 /* Not a data message */ +#define EFSBADCRC EBADMSG /* Bad CRC detected */ #define EPROTO 85 /* Protocol error */ #define ENODATA 86 /* No data available */ #define ENOSTR 87 /* Device not a stream */ @@ -96,6 +97,7 @@ #define EREMCHG 115 /* Remote address changed */ #define EUCLEAN 117 /* Structure needs cleaning */ +#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ #define ENOTNAM 118 /* Not a XENIX named type file */ #define ENAVAIL 119 /* No XENIX semaphores available */ #define EISNAM 120 /* Is a named type file */ diff --git a/tools/arch/mips/include/uapi/asm/errno.h b/tools/arch/mips/include/uapi/asm/errno.h index 2fb714e2d6d8..c01ed91b1ef4 100644 --- a/tools/arch/mips/include/uapi/asm/errno.h +++ b/tools/arch/mips/include/uapi/asm/errno.h @@ -50,6 +50,7 @@ #define EDOTDOT 73 /* RFS specific error */ #define EMULTIHOP 74 /* Multihop attempted */ #define EBADMSG 77 /* Not a data message */ +#define EFSBADCRC EBADMSG /* Bad CRC detected */ #define ENAMETOOLONG 78 /* File name too long */ #define EOVERFLOW 79 /* Value too large for defined data type */ #define ENOTUNIQ 80 /* Name not unique on network */ @@ -88,6 +89,7 @@ #define EISCONN 133 /* Transport endpoint is already connected */ #define ENOTCONN 134 /* Transport endpoint is not connected */ #define EUCLEAN 135 /* Structure needs cleaning */ +#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ #define ENOTNAM 137 /* Not a XENIX named type file */ #define ENAVAIL 138 /* No XENIX semaphores available */ #define EISNAM 139 /* Is a named type file */ diff --git a/tools/arch/parisc/include/uapi/asm/errno.h b/tools/arch/parisc/include/uapi/asm/errno.h index 8d94739d75c6..8cbc07c1903e 100644 --- a/tools/arch/parisc/include/uapi/asm/errno.h +++ b/tools/arch/parisc/include/uapi/asm/errno.h @@ -36,6 +36,7 @@ #define EDOTDOT 66 /* RFS specific error */ #define EBADMSG 67 /* Not a data message */ +#define EFSBADCRC EBADMSG /* Bad CRC detected */ #define EUSERS 68 /* Too many users */ #define EDQUOT 69 /* Quota exceeded */ #define ESTALE 70 /* Stale file handle */ @@ -62,6 +63,7 @@ #define ERESTART 175 /* Interrupted system call should be restarted */ #define ESTRPIPE 176 /* Streams pipe error */ #define EUCLEAN 177 /* Structure needs cleaning */ +#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ #define ENOTNAM 178 /* Not a XENIX named type file */ #define ENAVAIL 179 /* No XENIX semaphores available */ #define EISNAM 180 /* Is a named type file */ diff --git a/tools/arch/sparc/include/uapi/asm/errno.h b/tools/arch/sparc/include/uapi/asm/errno.h index 81a732b902ee..4a41e7835fd5 100644 --- a/tools/arch/sparc/include/uapi/asm/errno.h +++ b/tools/arch/sparc/include/uapi/asm/errno.h @@ -48,6 +48,7 @@ #define ENOSR 74 /* Out of streams resources */ #define ENOMSG 75 /* No message of desired type */ #define EBADMSG 76 /* Not a data message */ +#define EFSBADCRC EBADMSG /* Bad CRC detected */ #define EIDRM 77 /* Identifier removed */ #define EDEADLK 78 /* Resource deadlock would occur */ #define ENOLCK 79 /* No record locks available */ @@ -91,6 +92,7 @@ #define ENOTUNIQ 115 /* Name not unique on network */ #define ERESTART 116 /* Interrupted syscall should be restarted */ #define EUCLEAN 117 /* Structure needs cleaning */ +#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ #define ENOTNAM 118 /* Not a XENIX named type file */ #define ENAVAIL 119 /* No XENIX semaphores available */ #define EISNAM 120 /* Is a named type file */ diff --git a/tools/include/uapi/asm-generic/errno.h b/tools/include/uapi/asm-generic/errno.h index cf9c51ac49f9..92e7ae493ee3 100644 --- a/tools/include/uapi/asm-generic/errno.h +++ b/tools/include/uapi/asm-generic/errno.h @@ -55,6 +55,7 @@ #define EMULTIHOP 72 /* Multihop attempted */ #define EDOTDOT 73 /* RFS specific error */ #define EBADMSG 74 /* Not a data message */ +#define EFSBADCRC EBADMSG /* Bad CRC detected */ #define EOVERFLOW 75 /* Value too large for defined data type */ #define ENOTUNIQ 76 /* Name not unique on network */ #define EBADFD 77 /* File descriptor in bad state */ @@ -98,6 +99,7 @@ #define EINPROGRESS 115 /* Operation now in progress */ #define ESTALE 116 /* Stale file handle */ #define EUCLEAN 117 /* Structure needs cleaning */ +#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ #define ENOTNAM 118 /* Not a XENIX named type file */ #define ENAVAIL 119 /* No XENIX semaphores available */ #define EISNAM 120 /* Is a named type file */ -- cgit v1.2.3 From 21945e6cb5168395d7d6f9052cd16ec4eac13973 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 12 Jan 2026 16:31:25 -0800 Subject: fs: report filesystem and file I/O errors to fsnotify Create some wrapper code around struct super_block so that filesystems have a standard way to queue filesystem metadata and file I/O error reports to have them sent to fsnotify. If a filesystem wants to provide an error number, it must supply only negative error numbers. These are stored internally as negative numbers, but they are converted to positive error numbers before being passed to fanotify, per the fanotify(7) manpage. Implementations of super_operations::report_error are passed the raw internal event data. Note that we have to play some shenanigans with mempools and queue_work so that the error handling doesn't happen outside of process context, and the event handler functions (both ->report_error and fsnotify) can handle file I/O error messages without having to worry about whatever locks might be held. This asynchronicity requires that unmount wait for pending events to clear. Add a new callback to the superblock operations structure so that filesystem drivers can themselves respond to file I/O errors if they so desire. This will be used for an upcoming self-healing patchset for XFS. Suggested-by: Christoph Hellwig Signed-off-by: Darrick J. Wong Link: https://patch.msgid.link/176826402610.3490369.4378391061533403171.stgit@frogsfrogsfrogs Reviewed-by: Jan Kara Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner --- fs/Makefile | 2 +- fs/fserror.c | 194 +++++++++++++++++++++++++++++++++++++++++ fs/super.c | 3 + include/linux/fs/super_types.h | 7 ++ include/linux/fserror.h | 75 ++++++++++++++++ 5 files changed, 280 insertions(+), 1 deletion(-) create mode 100644 fs/fserror.c create mode 100644 include/linux/fserror.h diff --git a/fs/Makefile b/fs/Makefile index a04274a3c854..f238cc5ea2e9 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -16,7 +16,7 @@ obj-y := open.o read_write.o file_table.o super.o \ stack.o fs_struct.o statfs.o fs_pin.o nsfs.o \ fs_dirent.o fs_context.o fs_parser.o fsopen.o init.o \ kernel_read_file.o mnt_idmapping.o remap_range.o pidfs.o \ - file_attr.o + file_attr.o fserror.o obj-$(CONFIG_BUFFER_HEAD) += buffer.o mpage.o obj-$(CONFIG_PROC_FS) += proc_namespace.o diff --git a/fs/fserror.c b/fs/fserror.c new file mode 100644 index 000000000000..06ca86adab9b --- /dev/null +++ b/fs/fserror.c @@ -0,0 +1,194 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2025 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#include +#include +#include +#include + +#define FSERROR_DEFAULT_EVENT_POOL_SIZE (32) + +static struct mempool fserror_events_pool; + +void fserror_mount(struct super_block *sb) +{ + /* + * The pending error counter is biased by 1 so that we don't wake_var + * until we're actually trying to unmount. + */ + refcount_set(&sb->s_pending_errors, 1); +} + +void fserror_unmount(struct super_block *sb) +{ + /* + * If we don't drop the pending error count to zero, then wait for it + * to drop below 1, which means that the pending errors cleared and + * hopefully we didn't saturate with 1 billion+ concurrent events. + */ + if (!refcount_dec_and_test(&sb->s_pending_errors)) + wait_var_event(&sb->s_pending_errors, + refcount_read(&sb->s_pending_errors) < 1); +} + +static inline void fserror_pending_dec(struct super_block *sb) +{ + if (refcount_dec_and_test(&sb->s_pending_errors)) + wake_up_var(&sb->s_pending_errors); +} + +static inline void fserror_free_event(struct fserror_event *event) +{ + fserror_pending_dec(event->sb); + mempool_free(event, &fserror_events_pool); +} + +static void fserror_worker(struct work_struct *work) +{ + struct fserror_event *event = + container_of(work, struct fserror_event, work); + struct super_block *sb = event->sb; + + if (sb->s_flags & SB_ACTIVE) { + struct fs_error_report report = { + /* send positive error number to userspace */ + .error = -event->error, + .inode = event->inode, + .sb = event->sb, + }; + + if (sb->s_op->report_error) + sb->s_op->report_error(event); + + fsnotify(FS_ERROR, &report, FSNOTIFY_EVENT_ERROR, NULL, NULL, + NULL, 0); + } + + iput(event->inode); + fserror_free_event(event); +} + +static inline struct fserror_event *fserror_alloc_event(struct super_block *sb, + gfp_t gfp_flags) +{ + struct fserror_event *event = NULL; + + /* + * If pending_errors already reached zero or is no longer active, + * the superblock is being deactivated so there's no point in + * continuing. + * + * The order of the check of s_pending_errors and SB_ACTIVE are + * mandated by order of accesses in generic_shutdown_super and + * fserror_unmount. Barriers are implicitly provided by the refcount + * manipulations in this function and fserror_unmount. + */ + if (!refcount_inc_not_zero(&sb->s_pending_errors)) + return NULL; + if (!(sb->s_flags & SB_ACTIVE)) + goto out_pending; + + event = mempool_alloc(&fserror_events_pool, gfp_flags); + if (!event) + goto out_pending; + + /* mempool_alloc doesn't support GFP_ZERO */ + memset(event, 0, sizeof(*event)); + event->sb = sb; + INIT_WORK(&event->work, fserror_worker); + + return event; + +out_pending: + fserror_pending_dec(sb); + return NULL; +} + +/** + * fserror_report - report a filesystem error of some kind + * + * @sb: superblock of the filesystem + * @inode: inode within that filesystem, if applicable + * @type: type of error encountered + * @pos: start of inode range affected, if applicable + * @len: length of inode range affected, if applicable + * @error: error number encountered, must be negative + * @gfp: memory allocation flags for conveying the event to a worker, + * since this function can be called from atomic contexts + * + * Report details of a filesystem error to the super_operations::report_error + * callback if present; and to fsnotify for distribution to userspace. @sb, + * @gfp, @type, and @error must all be specified. For file I/O errors, the + * @inode, @pos, and @len fields must also be specified. For file metadata + * errors, @inode must be specified. If @inode is not NULL, then @inode->i_sb + * must point to @sb. + * + * Reporting work is deferred to a workqueue to ensure that ->report_error is + * called from process context without any locks held. An active reference to + * the inode is maintained until event handling is complete, and unmount will + * wait for queued events to drain. + */ +void fserror_report(struct super_block *sb, struct inode *inode, + enum fserror_type type, loff_t pos, u64 len, int error, + gfp_t gfp) +{ + struct fserror_event *event; + + /* sb and inode must be from the same filesystem */ + WARN_ON_ONCE(inode && inode->i_sb != sb); + + /* error number must be negative */ + WARN_ON_ONCE(error >= 0); + + event = fserror_alloc_event(sb, gfp); + if (!event) + goto lost; + + event->type = type; + event->pos = pos; + event->len = len; + event->error = error; + + /* + * Can't iput from non-sleeping context, so grabbing another reference + * to the inode must be the last thing before submitting the event. + */ + if (inode) { + event->inode = igrab(inode); + if (!event->inode) + goto lost_event; + } + + /* + * Use schedule_work here even if we're already in process context so + * that fsnotify and super_operations::report_error implementations are + * guaranteed to run in process context without any locks held. Since + * errors are supposed to be rare, the overhead shouldn't kill us any + * more than the failing device will. + */ + schedule_work(&event->work); + return; + +lost_event: + fserror_free_event(event); +lost: + if (inode) + pr_err_ratelimited( + "%s: lost file I/O error report for ino %lu type %u pos 0x%llx len 0x%llx error %d", + sb->s_id, inode->i_ino, type, pos, len, error); + else + pr_err_ratelimited( + "%s: lost filesystem error report for type %u error %d", + sb->s_id, type, error); +} +EXPORT_SYMBOL_GPL(fserror_report); + +static int __init fserror_init(void) +{ + return mempool_init_kmalloc_pool(&fserror_events_pool, + FSERROR_DEFAULT_EVENT_POOL_SIZE, + sizeof(struct fserror_event)); +} +fs_initcall(fserror_init); diff --git a/fs/super.c b/fs/super.c index 3d85265d1400..b13c1fd6a6f4 100644 --- a/fs/super.c +++ b/fs/super.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include "internal.h" @@ -363,6 +364,7 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags, spin_lock_init(&s->s_inode_list_lock); INIT_LIST_HEAD(&s->s_inodes_wb); spin_lock_init(&s->s_inode_wblist_lock); + fserror_mount(s); s->s_count = 1; atomic_set(&s->s_active, 1); @@ -622,6 +624,7 @@ void generic_shutdown_super(struct super_block *sb) sync_filesystem(sb); sb->s_flags &= ~SB_ACTIVE; + fserror_unmount(sb); cgroup_writeback_umount(sb); /* Evict all inodes with zero refcount. */ diff --git a/include/linux/fs/super_types.h b/include/linux/fs/super_types.h index 6bd3009e09b3..97a8552d8f2b 100644 --- a/include/linux/fs/super_types.h +++ b/include/linux/fs/super_types.h @@ -35,6 +35,7 @@ struct user_namespace; struct workqueue_struct; struct writeback_control; struct xattr_handler; +struct fserror_event; extern struct super_block *blockdev_superblock; @@ -124,6 +125,9 @@ struct super_operations { */ int (*remove_bdev)(struct super_block *sb, struct block_device *bdev); void (*shutdown)(struct super_block *sb); + + /* Report a filesystem error */ + void (*report_error)(const struct fserror_event *event); }; struct super_block { @@ -268,6 +272,9 @@ struct super_block { spinlock_t s_inode_wblist_lock; struct list_head s_inodes_wb; /* writeback inodes */ long s_min_writeback_pages; + + /* number of fserrors that are being sent to fsnotify/filesystems */ + refcount_t s_pending_errors; } __randomize_layout; /* diff --git a/include/linux/fserror.h b/include/linux/fserror.h new file mode 100644 index 000000000000..5e1ad78c346e --- /dev/null +++ b/include/linux/fserror.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2025 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#ifndef _LINUX_FSERROR_H__ +#define _LINUX_FSERROR_H__ + +void fserror_mount(struct super_block *sb); +void fserror_unmount(struct super_block *sb); + +enum fserror_type { + /* pagecache I/O failed */ + FSERR_BUFFERED_READ, + FSERR_BUFFERED_WRITE, + + /* direct I/O failed */ + FSERR_DIRECTIO_READ, + FSERR_DIRECTIO_WRITE, + + /* out of band media error reported */ + FSERR_DATA_LOST, + + /* filesystem metadata */ + FSERR_METADATA, +}; + +struct fserror_event { + struct work_struct work; + struct super_block *sb; + struct inode *inode; + loff_t pos; + u64 len; + enum fserror_type type; + + /* negative error number */ + int error; +}; + +void fserror_report(struct super_block *sb, struct inode *inode, + enum fserror_type type, loff_t pos, u64 len, int error, + gfp_t gfp); + +static inline void fserror_report_io(struct inode *inode, + enum fserror_type type, loff_t pos, + u64 len, int error, gfp_t gfp) +{ + fserror_report(inode->i_sb, inode, type, pos, len, error, gfp); +} + +static inline void fserror_report_data_lost(struct inode *inode, loff_t pos, + u64 len, gfp_t gfp) +{ + fserror_report(inode->i_sb, inode, FSERR_DATA_LOST, pos, len, -EIO, + gfp); +} + +static inline void fserror_report_file_metadata(struct inode *inode, int error, + gfp_t gfp) +{ + fserror_report(inode->i_sb, inode, FSERR_METADATA, 0, 0, error, gfp); +} + +static inline void fserror_report_metadata(struct super_block *sb, int error, + gfp_t gfp) +{ + fserror_report(sb, NULL, FSERR_METADATA, 0, 0, error, gfp); +} + +static inline void fserror_report_shutdown(struct super_block *sb, gfp_t gfp) +{ + fserror_report(sb, NULL, FSERR_METADATA, 0, 0, -ESHUTDOWN, gfp); +} + +#endif /* _LINUX_FSERROR_H__ */ -- cgit v1.2.3 From a9d573ee88af980f14fdadb5c12bbf6a195fb3f1 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 12 Jan 2026 16:31:40 -0800 Subject: iomap: report file I/O errors to the VFS Wire up iomap so that it reports all file read and write errors to the VFS (and hence fsnotify) via the new fserror mechanism. Signed-off-by: Darrick J. Wong Link: https://patch.msgid.link/176826402631.3490369.729008983502742314.stgit@frogsfrogsfrogs Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- fs/iomap/buffered-io.c | 23 ++++++++++++++++++++++- fs/iomap/direct-io.c | 12 ++++++++++++ fs/iomap/ioend.c | 6 ++++++ 3 files changed, 40 insertions(+), 1 deletion(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index e5c1ca440d93..b21e989b9fa5 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "internal.h" #include "trace.h" @@ -371,8 +372,11 @@ static int iomap_read_inline_data(const struct iomap_iter *iter, if (folio_test_uptodate(folio)) return 0; - if (WARN_ON_ONCE(size > iomap->length)) + if (WARN_ON_ONCE(size > iomap->length)) { + fserror_report_io(iter->inode, FSERR_BUFFERED_READ, + iomap->offset, size, -EIO, GFP_NOFS); return -EIO; + } if (offset > 0) ifs_alloc(iter->inode, folio, iter->flags); @@ -399,6 +403,11 @@ void iomap_finish_folio_read(struct folio *folio, size_t off, size_t len, spin_unlock_irqrestore(&ifs->state_lock, flags); } + if (error) + fserror_report_io(folio->mapping->host, FSERR_BUFFERED_READ, + folio_pos(folio) + off, len, error, + GFP_ATOMIC); + if (finished) folio_end_read(folio, uptodate); } @@ -540,6 +549,10 @@ static int iomap_read_folio_iter(struct iomap_iter *iter, if (!*bytes_submitted) iomap_read_init(folio); ret = ctx->ops->read_folio_range(iter, ctx, plen); + if (ret < 0) + fserror_report_io(iter->inode, + FSERR_BUFFERED_READ, pos, + plen, ret, GFP_NOFS); if (ret) return ret; *bytes_submitted += plen; @@ -815,6 +828,10 @@ static int __iomap_write_begin(const struct iomap_iter *iter, else status = iomap_bio_read_folio_range_sync(iter, folio, block_start, plen); + if (status < 0) + fserror_report_io(iter->inode, + FSERR_BUFFERED_READ, pos, + len, status, GFP_NOFS); if (status) return status; } @@ -1805,6 +1822,7 @@ int iomap_writeback_folio(struct iomap_writepage_ctx *wpc, struct folio *folio) u64 pos = folio_pos(folio); u64 end_pos = pos + folio_size(folio); u64 end_aligned = 0; + loff_t orig_pos = pos; size_t bytes_submitted = 0; int error = 0; u32 rlen; @@ -1848,6 +1866,9 @@ int iomap_writeback_folio(struct iomap_writepage_ctx *wpc, struct folio *folio) if (bytes_submitted) wpc->nr_folios++; + if (error && pos > orig_pos) + fserror_report_io(inode, FSERR_BUFFERED_WRITE, orig_pos, 0, + error, GFP_NOFS); /* * We can have dirty bits set past end of file in page_mkwrite path diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index 8e273408453a..a06c73eaa890 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -7,6 +7,7 @@ #include #include #include +#include #include "internal.h" #include "trace.h" @@ -78,6 +79,13 @@ static void iomap_dio_submit_bio(const struct iomap_iter *iter, } } +static inline enum fserror_type iomap_dio_err_type(const struct iomap_dio *dio) +{ + if (dio->flags & IOMAP_DIO_WRITE) + return FSERR_DIRECTIO_WRITE; + return FSERR_DIRECTIO_READ; +} + ssize_t iomap_dio_complete(struct iomap_dio *dio) { const struct iomap_dio_ops *dops = dio->dops; @@ -87,6 +95,10 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio) if (dops && dops->end_io) ret = dops->end_io(iocb, dio->size, ret, dio->flags); + if (dio->error) + fserror_report_io(file_inode(iocb->ki_filp), + iomap_dio_err_type(dio), offset, dio->size, + dio->error, GFP_NOFS); if (likely(!ret)) { ret = dio->size; diff --git a/fs/iomap/ioend.c b/fs/iomap/ioend.c index 86f44922ed3b..5b27ee988967 100644 --- a/fs/iomap/ioend.c +++ b/fs/iomap/ioend.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "internal.h" #include "trace.h" @@ -55,6 +56,11 @@ static u32 iomap_finish_ioend_buffered(struct iomap_ioend *ioend) /* walk all folios in bio, ending page IO on them */ bio_for_each_folio_all(fi, bio) { + if (ioend->io_error) + fserror_report_io(inode, FSERR_BUFFERED_WRITE, + folio_pos(fi.folio) + fi.offset, + fi.length, ioend->io_error, + GFP_ATOMIC); iomap_finish_folio_write(inode, fi.folio, fi.length); folio_count++; } -- cgit v1.2.3 From efd87a10072966e1555e1288a570c883c73182c3 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 12 Jan 2026 16:31:56 -0800 Subject: xfs: report fs metadata errors via fsnotify Report filesystem corruption problems to the fserror helpers so that fsnotify can also convey metadata problems to userspace. Signed-off-by: Darrick J. Wong Link: https://patch.msgid.link/176826402652.3490369.2609467634858507969.stgit@frogsfrogsfrogs Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner --- fs/xfs/xfs_fsops.c | 4 ++++ fs/xfs/xfs_health.c | 14 ++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 0ada73569394..b7c21f68edc7 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -26,6 +26,8 @@ #include "xfs_rtrefcount_btree.h" #include "xfs_metafile.h" +#include + /* * Write new AG headers to disk. Non-transactional, but need to be * written and completed prior to the growfs transaction being logged. @@ -540,6 +542,8 @@ xfs_do_force_shutdown( "Please unmount the filesystem and rectify the problem(s)"); if (xfs_error_level >= XFS_ERRLEVEL_HIGH) xfs_stack_trace(); + + fserror_report_shutdown(mp->m_super, GFP_KERNEL); } /* diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c index 3c1557fb1cf0..fbb8886c72fe 100644 --- a/fs/xfs/xfs_health.c +++ b/fs/xfs/xfs_health.c @@ -20,6 +20,8 @@ #include "xfs_quota_defs.h" #include "xfs_rtgroup.h" +#include + static void xfs_health_unmount_group( struct xfs_group *xg, @@ -111,6 +113,8 @@ xfs_fs_mark_sick( spin_lock(&mp->m_sb_lock); mp->m_fs_sick |= mask; spin_unlock(&mp->m_sb_lock); + + fserror_report_metadata(mp->m_super, -EFSCORRUPTED, GFP_NOFS); } /* Mark per-fs metadata as having been checked and found unhealthy by fsck. */ @@ -126,6 +130,8 @@ xfs_fs_mark_corrupt( mp->m_fs_sick |= mask; mp->m_fs_checked |= mask; spin_unlock(&mp->m_sb_lock); + + fserror_report_metadata(mp->m_super, -EFSCORRUPTED, GFP_NOFS); } /* Mark a per-fs metadata healed. */ @@ -198,6 +204,8 @@ xfs_group_mark_sick( spin_lock(&xg->xg_state_lock); xg->xg_sick |= mask; spin_unlock(&xg->xg_state_lock); + + fserror_report_metadata(xg->xg_mount->m_super, -EFSCORRUPTED, GFP_NOFS); } /* @@ -215,6 +223,8 @@ xfs_group_mark_corrupt( xg->xg_sick |= mask; xg->xg_checked |= mask; spin_unlock(&xg->xg_state_lock); + + fserror_report_metadata(xg->xg_mount->m_super, -EFSCORRUPTED, GFP_NOFS); } /* @@ -287,6 +297,8 @@ xfs_inode_mark_sick( spin_lock(&VFS_I(ip)->i_lock); inode_state_clear(VFS_I(ip), I_DONTCACHE); spin_unlock(&VFS_I(ip)->i_lock); + + fserror_report_file_metadata(VFS_I(ip), -EFSCORRUPTED, GFP_NOFS); } /* Mark inode metadata as having been checked and found unhealthy by fsck. */ @@ -311,6 +323,8 @@ xfs_inode_mark_corrupt( spin_lock(&VFS_I(ip)->i_lock); inode_state_clear(VFS_I(ip), I_DONTCACHE); spin_unlock(&VFS_I(ip)->i_lock); + + fserror_report_file_metadata(VFS_I(ip), -EFSCORRUPTED, GFP_NOFS); } /* Mark parts of an inode healed. */ -- cgit v1.2.3 From 94503211d2fdcc5183d01c2719258a7795cdecb0 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 12 Jan 2026 16:32:12 -0800 Subject: xfs: translate fsdax media errors into file "data lost" errors when convenient Translate fsdax persistent failure notifications into file data loss events when it's convenient, aka when the inode is already incore. Signed-off-by: Darrick J. Wong Link: https://patch.msgid.link/176826402673.3490369.1672039530408369208.stgit@frogsfrogsfrogs Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner --- fs/xfs/xfs_notify_failure.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/xfs/xfs_notify_failure.c b/fs/xfs/xfs_notify_failure.c index b17672889942..6d5002413c2c 100644 --- a/fs/xfs/xfs_notify_failure.c +++ b/fs/xfs/xfs_notify_failure.c @@ -26,6 +26,7 @@ #include #include #include +#include struct xfs_failure_info { xfs_agblock_t startblock; @@ -116,6 +117,9 @@ xfs_dax_failure_fn( invalidate_inode_pages2_range(mapping, pgoff, pgoff + pgcnt - 1); + fserror_report_data_lost(VFS_I(ip), (u64)pgoff << PAGE_SHIFT, + (u64)pgcnt << PAGE_SHIFT, GFP_NOFS); + xfs_irele(ip); return error; } -- cgit v1.2.3 From 81d2e13a57c9d73582527966fae24d4fd73826ca Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 12 Jan 2026 16:32:27 -0800 Subject: ext4: convert to new fserror helpers Use the new fserror functions to report metadata errors to fsnotify. Note that ext4 inconsistently passes around negative and positive error numbers all over the codebase, so we force them all to negative for consistency in what we report to fserror, and fserror ensures that only positive error numbers are passed to fanotify, per the fanotify(7) manpage. Signed-off-by: Darrick J. Wong Link: https://patch.msgid.link/176826402693.3490369.5875002879192895558.stgit@frogsfrogsfrogs Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- fs/ext4/ioctl.c | 2 ++ fs/ext4/super.c | 13 +++++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 7ce0fc40aec2..ea26cd03d3ce 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -26,6 +26,7 @@ #include #include "fsmap.h" #include +#include typedef void ext4_update_sb_callback(struct ext4_sb_info *sbi, struct ext4_super_block *es, @@ -844,6 +845,7 @@ int ext4_force_shutdown(struct super_block *sb, u32 flags) return -EINVAL; } clear_opt(sb, DISCARD); + fserror_report_shutdown(sb, GFP_KERNEL); return 0; } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 87205660c5d0..a6241ffb8639 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -48,6 +48,7 @@ #include #include #include +#include #include "ext4.h" #include "ext4_extents.h" /* Needed for trace points definition */ @@ -824,7 +825,8 @@ void __ext4_error(struct super_block *sb, const char *function, sb->s_id, function, line, current->comm, &vaf); va_end(args); } - fsnotify_sb_error(sb, NULL, error ? error : EFSCORRUPTED); + fserror_report_metadata(sb, error ? -abs(error) : -EFSCORRUPTED, + GFP_ATOMIC); ext4_handle_error(sb, force_ro, error, 0, block, function, line); } @@ -856,7 +858,9 @@ void __ext4_error_inode(struct inode *inode, const char *function, current->comm, &vaf); va_end(args); } - fsnotify_sb_error(inode->i_sb, inode, error ? error : EFSCORRUPTED); + fserror_report_file_metadata(inode, + error ? -abs(error) : -EFSCORRUPTED, + GFP_ATOMIC); ext4_handle_error(inode->i_sb, false, error, inode->i_ino, block, function, line); @@ -896,7 +900,7 @@ void __ext4_error_file(struct file *file, const char *function, current->comm, path, &vaf); va_end(args); } - fsnotify_sb_error(inode->i_sb, inode, EFSCORRUPTED); + fserror_report_file_metadata(inode, -EFSCORRUPTED, GFP_ATOMIC); ext4_handle_error(inode->i_sb, false, EFSCORRUPTED, inode->i_ino, block, function, line); @@ -965,7 +969,8 @@ void __ext4_std_error(struct super_block *sb, const char *function, printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n", sb->s_id, function, line, errstr); } - fsnotify_sb_error(sb, NULL, errno ? errno : EFSCORRUPTED); + fserror_report_metadata(sb, errno ? -abs(errno) : -EFSCORRUPTED, + GFP_ATOMIC); ext4_handle_error(sb, false, -errno, 0, 0, function, line); } -- cgit v1.2.3