summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-11-18 20:35:30 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2024-11-18 20:35:30 +0300
commit70e7730c2a78313e3ccc932410c939816e3ba1bc (patch)
treede90ad4ba9a7ad7d069178559ad3341daf73df53 /include/linux
parent4eb98b7760e8078dbc984ee08b02b5b4c3cff088 (diff)
parentaefff51e1c2986e16f2780ca8e4c97b784800ab5 (diff)
downloadlinux-70e7730c2a78313e3ccc932410c939816e3ba1bc.tar.xz
Merge tag 'vfs-6.13.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull misc vfs updates from Christian Brauner: "Features: - Fixup and improve NLM and kNFSD file lock callbacks Last year both GFS2 and OCFS2 had some work done to make their locking more robust when exported over NFS. Unfortunately, part of that work caused both NLM (for NFS v3 exports) and kNFSD (for NFSv4.1+ exports) to no longer send lock notifications to clients This in itself is not a huge problem because most NFS clients will still poll the server in order to acquire a conflicted lock It's important for NLM and kNFSD that they do not block their kernel threads inside filesystem's file_lock implementations because that can produce deadlocks. We used to make sure of this by only trusting that posix_lock_file() can correctly handle blocking lock calls asynchronously, so the lock managers would only setup their file_lock requests for async callbacks if the filesystem did not define its own lock() file operation However, when GFS2 and OCFS2 grew the capability to correctly handle blocking lock requests asynchronously, they started signalling this behavior with EXPORT_OP_ASYNC_LOCK, and the check for also trusting posix_lock_file() was inadvertently dropped, so now most filesystems no longer produce lock notifications when exported over NFS Fix this by using an fop_flag which greatly simplifies the problem and grooms the way for future uses by both filesystems and lock managers alike - Add a sysctl to delete the dentry when a file is removed instead of making it a negative dentry Commit 681ce8623567 ("vfs: Delete the associated dentry when deleting a file") introduced an unconditional deletion of the associated dentry when a file is removed. However, this led to performance regressions in specific benchmarks, such as ilebench.sum_operations/s, prompting a revert in commit 4a4be1ad3a6e ("Revert "vfs: Delete the associated dentry when deleting a file""). This reintroduces the concept conditionally through a sysctl - Expand the statmount() system call: * Report the filesystem subtype in a new fs_subtype field to e.g., report fuse filesystem subtypes * Report the superblock source in a new sb_source field * Add a new way to return filesystem specific mount options in an option array that returns filesystem specific mount options separated by zero bytes and unescaped. This allows caller's to retrieve filesystem specific mount options and immediately pass them to e.g., fsconfig() without having to unescape or split them * Report security (LSM) specific mount options in a separate security option array. We don't lump them together with filesystem specific mount options as security mount options are generic and most users aren't interested in them The format is the same as for the filesystem specific mount option array - Support relative paths in fsconfig()'s FSCONFIG_SET_STRING command - Optimize acl_permission_check() to avoid costly {g,u}id ownership checks if possible - Use smp_mb__after_spinlock() to avoid full smp_mb() in evict() - Add synchronous wakeup support for ep_poll_callback. Currently, epoll only uses wake_up() to wake up task. But sometimes there are epoll users which want to use the synchronous wakeup flag to give a hint to the scheduler, e.g., the Android binder driver. So add a wake_up_sync() define, and use wake_up_sync() when sync is true in ep_poll_callback() Fixes: - Fix kernel documentation for inode_insert5() and iget5_locked() - Annotate racy epoll check on file->f_ep - Make F_DUPFD_QUERY associative - Avoid filename buffer overrun in initramfs - Don't let statmount() return empty strings - Add a cond_resched() to dump_user_range() to avoid hogging the CPU - Don't query the device logical blocksize multiple times for hfsplus - Make filemap_read() check that the offset is positive or zero Cleanups: - Various typo fixes - Cleanup wbc_attach_fdatawrite_inode() - Add __releases annotation to wbc_attach_and_unlock_inode() - Add hugetlbfs tracepoints - Fix various vfs kernel doc parameters - Remove obsolete TODO comment from io_cancel() - Convert wbc_account_cgroup_owner() to take a folio - Fix comments for BANDWITH_INTERVAL and wb_domain_writeout_add() - Reorder struct posix_acl to save 8 bytes - Annotate struct posix_acl with __counted_by() - Replace one-element array with flexible array member in freevxfs - Use idiomatic atomic64_inc_return() in alloc_mnt_ns()" * tag 'vfs-6.13.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (35 commits) statmount: retrieve security mount options vfs: make evict() use smp_mb__after_spinlock instead of smp_mb statmount: add flag to retrieve unescaped options fs: add the ability for statmount() to report the sb_source writeback: wbc_attach_fdatawrite_inode out of line writeback: add a __releases annoation to wbc_attach_and_unlock_inode fs: add the ability for statmount() to report the fs_subtype fs: don't let statmount return empty strings fs:aio: Remove TODO comment suggesting hash or array usage in io_cancel() hfsplus: don't query the device logical block size multiple times freevxfs: Replace one-element array with flexible array member fs: optimize acl_permission_check() initramfs: avoid filename buffer overrun fs/writeback: convert wbc_account_cgroup_owner to take a folio acl: Annotate struct posix_acl with __counted_by() acl: Realign struct posix_acl to save 8 bytes epoll: Add synchronous wakeup support for ep_poll_callback coredump: add cond_resched() to dump_user_range mm/page-writeback.c: Fix comment of wb_domain_writeout_add() mm/page-writeback.c: Update comment for BANDWIDTH_INTERVAL ...
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/eventpoll.h2
-rw-r--r--include/linux/exportfs.h13
-rw-r--r--include/linux/filelock.h5
-rw-r--r--include/linux/fs.h2
-rw-r--r--include/linux/posix_acl.h6
-rw-r--r--include/linux/wait.h1
-rw-r--r--include/linux/writeback.h32
7 files changed, 16 insertions, 45 deletions
diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
index 3337745d81bd..0c0d00fcd131 100644
--- a/include/linux/eventpoll.h
+++ b/include/linux/eventpoll.h
@@ -42,7 +42,7 @@ static inline void eventpoll_release(struct file *file)
* because the file in on the way to be removed and nobody ( but
* eventpoll ) has still a reference to this file.
*/
- if (likely(!file->f_ep))
+ if (likely(!READ_ONCE(file->f_ep)))
return;
/*
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 893a1d21dc1c..1ab165c2939f 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -250,19 +250,6 @@ struct export_operations {
unsigned long flags;
};
-/**
- * exportfs_lock_op_is_async() - export op supports async lock operation
- * @export_ops: the nfs export operations to check
- *
- * Returns true if the nfs export_operations structure has
- * EXPORT_OP_ASYNC_LOCK in their flags set
- */
-static inline bool
-exportfs_lock_op_is_async(const struct export_operations *export_ops)
-{
- return export_ops->flags & EXPORT_OP_ASYNC_LOCK;
-}
-
extern int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid,
int *max_len, struct inode *parent,
int flags);
diff --git a/include/linux/filelock.h b/include/linux/filelock.h
index bb44224c6676..c412ded9171e 100644
--- a/include/linux/filelock.h
+++ b/include/linux/filelock.h
@@ -180,6 +180,11 @@ static inline void locks_wake_up(struct file_lock *fl)
wake_up(&fl->c.flc_wait);
}
+static inline bool locks_can_async_lock(const struct file_operations *fops)
+{
+ return !fops->lock || fops->fop_flags & FOP_ASYNC_LOCK;
+}
+
/* fs/locks.c */
void locks_free_lock_context(struct inode *inode);
void locks_free_lock(struct file_lock *fl);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index d32c6f6298b1..6b062f62cbb8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2124,6 +2124,8 @@ struct file_operations {
#define FOP_HUGE_PAGES ((__force fop_flags_t)(1 << 4))
/* Treat loff_t as unsigned (e.g., /dev/mem) */
#define FOP_UNSIGNED_OFFSET ((__force fop_flags_t)(1 << 5))
+/* Supports asynchronous lock callbacks */
+#define FOP_ASYNC_LOCK ((__force fop_flags_t)(1 << 6))
/* Wrap a directory iterator that needs exclusive inode access */
int wrap_directory_iterator(struct file *, struct dir_context *,
diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h
index 0e65b3d634d9..e2d47eb1a7f3 100644
--- a/include/linux/posix_acl.h
+++ b/include/linux/posix_acl.h
@@ -28,9 +28,9 @@ struct posix_acl_entry {
struct posix_acl {
refcount_t a_refcount;
- struct rcu_head a_rcu;
unsigned int a_count;
- struct posix_acl_entry a_entries[];
+ struct rcu_head a_rcu;
+ struct posix_acl_entry a_entries[] __counted_by(a_count);
};
#define FOREACH_ACL_ENTRY(pa, acl, pe) \
@@ -62,7 +62,7 @@ posix_acl_release(struct posix_acl *acl)
/* posix_acl.c */
extern void posix_acl_init(struct posix_acl *, int);
-extern struct posix_acl *posix_acl_alloc(int, gfp_t);
+extern struct posix_acl *posix_acl_alloc(unsigned int count, gfp_t flags);
extern struct posix_acl *posix_acl_from_mode(umode_t, gfp_t);
extern int posix_acl_equiv_mode(const struct posix_acl *, umode_t *);
extern int __posix_acl_create(struct posix_acl **, gfp_t, umode_t *);
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 8aa3372f21a0..2b322a9b88a2 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -221,6 +221,7 @@ void __wake_up_pollfree(struct wait_queue_head *wq_head);
#define wake_up_all(x) __wake_up(x, TASK_NORMAL, 0, NULL)
#define wake_up_locked(x) __wake_up_locked((x), TASK_NORMAL, 1)
#define wake_up_all_locked(x) __wake_up_locked((x), TASK_NORMAL, 0)
+#define wake_up_sync(x) __wake_up_sync(x, TASK_NORMAL)
#define wake_up_interruptible(x) __wake_up(x, TASK_INTERRUPTIBLE, 1, NULL)
#define wake_up_interruptible_nr(x, nr) __wake_up(x, TASK_INTERRUPTIBLE, nr, NULL)
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index d6db822e4bb3..d11b903c2edb 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -213,11 +213,8 @@ static inline void wait_on_inode(struct inode *inode)
#include <linux/bio.h>
void __inode_attach_wb(struct inode *inode, struct folio *folio);
-void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
- struct inode *inode)
- __releases(&inode->i_lock);
void wbc_detach_inode(struct writeback_control *wbc);
-void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page,
+void wbc_account_cgroup_owner(struct writeback_control *wbc, struct folio *folio,
size_t bytes);
int cgroup_writeback_by_id(u64 bdi_id, int memcg_id,
enum wb_reason reason, struct wb_completion *done);
@@ -254,22 +251,8 @@ static inline void inode_detach_wb(struct inode *inode)
}
}
-/**
- * wbc_attach_fdatawrite_inode - associate wbc and inode for fdatawrite
- * @wbc: writeback_control of interest
- * @inode: target inode
- *
- * This function is to be used by __filemap_fdatawrite_range(), which is an
- * alternative entry point into writeback code, and first ensures @inode is
- * associated with a bdi_writeback and attaches it to @wbc.
- */
-static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc,
- struct inode *inode)
-{
- spin_lock(&inode->i_lock);
- inode_attach_wb(inode, NULL);
- wbc_attach_and_unlock_inode(wbc, inode);
-}
+void wbc_attach_fdatawrite_inode(struct writeback_control *wbc,
+ struct inode *inode);
/**
* wbc_init_bio - writeback specific initializtion of bio
@@ -303,13 +286,6 @@ static inline void inode_detach_wb(struct inode *inode)
{
}
-static inline void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
- struct inode *inode)
- __releases(&inode->i_lock)
-{
- spin_unlock(&inode->i_lock);
-}
-
static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc,
struct inode *inode)
{
@@ -324,7 +300,7 @@ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
}
static inline void wbc_account_cgroup_owner(struct writeback_control *wbc,
- struct page *page, size_t bytes)
+ struct folio *folio, size_t bytes)
{
}