summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-10-21 20:48:24 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2024-10-21 20:48:24 +0300
commit7166c32651fa2a5712215980d1b54d4b9ccca6b5 (patch)
treeab08cf3baa12f0bb2becb209f6c4b77f101317df
parenta777c32ca42b9a8a5e5abd915883a73620d9044b (diff)
parent197231da7f6a2e9884f84a4a463f53f9f491d920 (diff)
downloadlinux-7166c32651fa2a5712215980d1b54d4b9ccca6b5.tar.xz
Merge tag 'vfs-6.12-rc5.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull vfs fixes from Christian Brauner: "afs: - Fix a lock recursion in afs_wake_up_async_call() on ->notify_lock netfs: - Drop the references to a folio immediately after the folio has been extracted to prevent races with future I/O collection - Fix a documenation build error - Downgrade the i_rwsem for buffered writes to fix a cifs reported performance regression when switching to netfslib vfs: - Explicitly return -E2BIG from openat2() if the specified size is unexpectedly large. This aligns openat2() with other extensible struct based system calls - When copying a mount namespace ensure that we only try to remove the new copy from the mount namespace rbtree if it has already been added to it nilfs: - Clear the buffer delay flag when clearing the buffer state clags when a buffer head is discarded to prevent a kernel OOPs ocfs2: - Fix an unitialized value warning in ocfs2_setattr() proc: - Fix a kernel doc warning" * tag 'vfs-6.12-rc5.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: proc: Fix W=1 build kernel-doc warning afs: Fix lock recursion fs: Fix uninitialized value issue in from_kuid and from_kgid fs: don't try and remove empty rbtree node netfs: Downgrade i_rwsem for a buffered write nilfs2: fix kernel bug due to missing clearing of buffer delay flag openat2: explicitly return -E2BIG for (usize > PAGE_SIZE) netfs: fix documentation build error netfs: In readahead, put the folio refs as soon extracted
-rw-r--r--Documentation/filesystems/netfs_library.rst1
-rw-r--r--fs/afs/internal.h2
-rw-r--r--fs/afs/rxrpc.c83
-rw-r--r--fs/namespace.c4
-rw-r--r--fs/netfs/buffered_read.c47
-rw-r--r--fs/netfs/locking.c3
-rw-r--r--fs/netfs/read_collect.c2
-rw-r--r--fs/nilfs2/page.c6
-rw-r--r--fs/ocfs2/file.c9
-rw-r--r--fs/open.c2
-rw-r--r--fs/proc/fd.c2
-rw-r--r--include/trace/events/netfs.h1
12 files changed, 95 insertions, 67 deletions
diff --git a/Documentation/filesystems/netfs_library.rst b/Documentation/filesystems/netfs_library.rst
index f0d2cb257bb8..73f0bfd7e903 100644
--- a/Documentation/filesystems/netfs_library.rst
+++ b/Documentation/filesystems/netfs_library.rst
@@ -592,4 +592,3 @@ API Function Reference
.. kernel-doc:: include/linux/netfs.h
.. kernel-doc:: fs/netfs/buffered_read.c
-.. kernel-doc:: fs/netfs/io.c
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 6e1d3c4daf72..52aab09a32a9 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -130,6 +130,7 @@ struct afs_call {
wait_queue_head_t waitq; /* processes awaiting completion */
struct work_struct async_work; /* async I/O processor */
struct work_struct work; /* actual work processor */
+ struct work_struct free_work; /* Deferred free processor */
struct rxrpc_call *rxcall; /* RxRPC call handle */
struct rxrpc_peer *peer; /* Remote endpoint */
struct key *key; /* security for this call */
@@ -1331,6 +1332,7 @@ extern int __net_init afs_open_socket(struct afs_net *);
extern void __net_exit afs_close_socket(struct afs_net *);
extern void afs_charge_preallocation(struct work_struct *);
extern void afs_put_call(struct afs_call *);
+void afs_deferred_put_call(struct afs_call *call);
void afs_make_call(struct afs_call *call, gfp_t gfp);
void afs_wait_for_call_to_complete(struct afs_call *call);
extern struct afs_call *afs_alloc_flat_call(struct afs_net *,
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index c453428f3c8b..9f2a3bb56ec6 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -18,6 +18,7 @@
struct workqueue_struct *afs_async_calls;
+static void afs_deferred_free_worker(struct work_struct *work);
static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long);
static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long);
static void afs_process_async_call(struct work_struct *);
@@ -149,6 +150,7 @@ static struct afs_call *afs_alloc_call(struct afs_net *net,
call->debug_id = atomic_inc_return(&rxrpc_debug_id);
refcount_set(&call->ref, 1);
INIT_WORK(&call->async_work, afs_process_async_call);
+ INIT_WORK(&call->free_work, afs_deferred_free_worker);
init_waitqueue_head(&call->waitq);
spin_lock_init(&call->state_lock);
call->iter = &call->def_iter;
@@ -159,6 +161,36 @@ static struct afs_call *afs_alloc_call(struct afs_net *net,
return call;
}
+static void afs_free_call(struct afs_call *call)
+{
+ struct afs_net *net = call->net;
+ int o;
+
+ ASSERT(!work_pending(&call->async_work));
+
+ rxrpc_kernel_put_peer(call->peer);
+
+ if (call->rxcall) {
+ rxrpc_kernel_shutdown_call(net->socket, call->rxcall);
+ rxrpc_kernel_put_call(net->socket, call->rxcall);
+ call->rxcall = NULL;
+ }
+ if (call->type->destructor)
+ call->type->destructor(call);
+
+ afs_unuse_server_notime(call->net, call->server, afs_server_trace_put_call);
+ kfree(call->request);
+
+ o = atomic_read(&net->nr_outstanding_calls);
+ trace_afs_call(call->debug_id, afs_call_trace_free, 0, o,
+ __builtin_return_address(0));
+ kfree(call);
+
+ o = atomic_dec_return(&net->nr_outstanding_calls);
+ if (o == 0)
+ wake_up_var(&net->nr_outstanding_calls);
+}
+
/*
* Dispose of a reference on a call.
*/
@@ -173,32 +205,34 @@ void afs_put_call(struct afs_call *call)
o = atomic_read(&net->nr_outstanding_calls);
trace_afs_call(debug_id, afs_call_trace_put, r - 1, o,
__builtin_return_address(0));
+ if (zero)
+ afs_free_call(call);
+}
- if (zero) {
- ASSERT(!work_pending(&call->async_work));
- ASSERT(call->type->name != NULL);
-
- rxrpc_kernel_put_peer(call->peer);
-
- if (call->rxcall) {
- rxrpc_kernel_shutdown_call(net->socket, call->rxcall);
- rxrpc_kernel_put_call(net->socket, call->rxcall);
- call->rxcall = NULL;
- }
- if (call->type->destructor)
- call->type->destructor(call);
+static void afs_deferred_free_worker(struct work_struct *work)
+{
+ struct afs_call *call = container_of(work, struct afs_call, free_work);
- afs_unuse_server_notime(call->net, call->server, afs_server_trace_put_call);
- kfree(call->request);
+ afs_free_call(call);
+}
- trace_afs_call(call->debug_id, afs_call_trace_free, 0, o,
- __builtin_return_address(0));
- kfree(call);
+/*
+ * Dispose of a reference on a call, deferring the cleanup to a workqueue
+ * to avoid lock recursion.
+ */
+void afs_deferred_put_call(struct afs_call *call)
+{
+ struct afs_net *net = call->net;
+ unsigned int debug_id = call->debug_id;
+ bool zero;
+ int r, o;
- o = atomic_dec_return(&net->nr_outstanding_calls);
- if (o == 0)
- wake_up_var(&net->nr_outstanding_calls);
- }
+ zero = __refcount_dec_and_test(&call->ref, &r);
+ o = atomic_read(&net->nr_outstanding_calls);
+ trace_afs_call(debug_id, afs_call_trace_put, r - 1, o,
+ __builtin_return_address(0));
+ if (zero)
+ schedule_work(&call->free_work);
}
static struct afs_call *afs_get_call(struct afs_call *call,
@@ -640,7 +674,8 @@ static void afs_wake_up_call_waiter(struct sock *sk, struct rxrpc_call *rxcall,
}
/*
- * wake up an asynchronous call
+ * Wake up an asynchronous call. The caller is holding the call notify
+ * spinlock around this, so we can't call afs_put_call().
*/
static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall,
unsigned long call_user_ID)
@@ -657,7 +692,7 @@ static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall,
__builtin_return_address(0));
if (!queue_work(afs_async_calls, &call->async_work))
- afs_put_call(call);
+ afs_deferred_put_call(call);
}
}
diff --git a/fs/namespace.c b/fs/namespace.c
index 93c377816d75..d26f5e6d2ca3 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3944,7 +3944,9 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
new = copy_tree(old, old->mnt.mnt_root, copy_flags);
if (IS_ERR(new)) {
namespace_unlock();
- free_mnt_ns(new_ns);
+ ns_free_inum(&new_ns->ns);
+ dec_mnt_namespaces(new_ns->ucounts);
+ mnt_ns_release(new_ns);
return ERR_CAST(new);
}
if (user_ns != ns->user_ns) {
diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c
index c40e226053cc..af46a598f4d7 100644
--- a/fs/netfs/buffered_read.c
+++ b/fs/netfs/buffered_read.c
@@ -67,7 +67,8 @@ static int netfs_begin_cache_read(struct netfs_io_request *rreq, struct netfs_in
* Decant the list of folios to read into a rolling buffer.
*/
static size_t netfs_load_buffer_from_ra(struct netfs_io_request *rreq,
- struct folio_queue *folioq)
+ struct folio_queue *folioq,
+ struct folio_batch *put_batch)
{
unsigned int order, nr;
size_t size = 0;
@@ -82,6 +83,9 @@ static size_t netfs_load_buffer_from_ra(struct netfs_io_request *rreq,
order = folio_order(folio);
folioq->orders[i] = order;
size += PAGE_SIZE << order;
+
+ if (!folio_batch_add(put_batch, folio))
+ folio_batch_release(put_batch);
}
for (int i = nr; i < folioq_nr_slots(folioq); i++)
@@ -120,6 +124,9 @@ static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq)
* that we will need to release later - but we don't want to do
* that until after we've started the I/O.
*/
+ struct folio_batch put_batch;
+
+ folio_batch_init(&put_batch);
while (rreq->submitted < subreq->start + rsize) {
struct folio_queue *tail = rreq->buffer_tail, *new;
size_t added;
@@ -132,10 +139,11 @@ static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq)
new->prev = tail;
tail->next = new;
rreq->buffer_tail = new;
- added = netfs_load_buffer_from_ra(rreq, new);
+ added = netfs_load_buffer_from_ra(rreq, new, &put_batch);
rreq->iter.count += added;
rreq->submitted += added;
}
+ folio_batch_release(&put_batch);
}
subreq->len = rsize;
@@ -348,6 +356,7 @@ static int netfs_wait_for_read(struct netfs_io_request *rreq)
static int netfs_prime_buffer(struct netfs_io_request *rreq)
{
struct folio_queue *folioq;
+ struct folio_batch put_batch;
size_t added;
folioq = kmalloc(sizeof(*folioq), GFP_KERNEL);
@@ -360,39 +369,14 @@ static int netfs_prime_buffer(struct netfs_io_request *rreq)
rreq->submitted = rreq->start;
iov_iter_folio_queue(&rreq->iter, ITER_DEST, folioq, 0, 0, 0);
- added = netfs_load_buffer_from_ra(rreq, folioq);
+ folio_batch_init(&put_batch);
+ added = netfs_load_buffer_from_ra(rreq, folioq, &put_batch);
+ folio_batch_release(&put_batch);
rreq->iter.count += added;
rreq->submitted += added;
return 0;
}
-/*
- * Drop the ref on each folio that we inherited from the VM readahead code. We
- * still have the folio locks to pin the page until we complete the I/O.
- *
- * Note that we can't just release the batch in each queue struct as we use the
- * occupancy count in other places.
- */
-static void netfs_put_ra_refs(struct folio_queue *folioq)
-{
- struct folio_batch fbatch;
-
- folio_batch_init(&fbatch);
- while (folioq) {
- for (unsigned int slot = 0; slot < folioq_count(folioq); slot++) {
- struct folio *folio = folioq_folio(folioq, slot);
- if (!folio)
- continue;
- trace_netfs_folio(folio, netfs_folio_trace_read_put);
- if (!folio_batch_add(&fbatch, folio))
- folio_batch_release(&fbatch);
- }
- folioq = folioq->next;
- }
-
- folio_batch_release(&fbatch);
-}
-
/**
* netfs_readahead - Helper to manage a read request
* @ractl: The description of the readahead request
@@ -436,9 +420,6 @@ void netfs_readahead(struct readahead_control *ractl)
goto cleanup_free;
netfs_read_to_pagecache(rreq);
- /* Release the folio refs whilst we're waiting for the I/O. */
- netfs_put_ra_refs(rreq->buffer);
-
netfs_put_request(rreq, true, netfs_rreq_trace_put_return);
return;
diff --git a/fs/netfs/locking.c b/fs/netfs/locking.c
index 21eab56ee2f9..2249ecd09d0a 100644
--- a/fs/netfs/locking.c
+++ b/fs/netfs/locking.c
@@ -109,6 +109,7 @@ int netfs_start_io_write(struct inode *inode)
up_write(&inode->i_rwsem);
return -ERESTARTSYS;
}
+ downgrade_write(&inode->i_rwsem);
return 0;
}
EXPORT_SYMBOL(netfs_start_io_write);
@@ -123,7 +124,7 @@ EXPORT_SYMBOL(netfs_start_io_write);
void netfs_end_io_write(struct inode *inode)
__releases(inode->i_rwsem)
{
- up_write(&inode->i_rwsem);
+ up_read(&inode->i_rwsem);
}
EXPORT_SYMBOL(netfs_end_io_write);
diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c
index b18c65ba5580..3cbb289535a8 100644
--- a/fs/netfs/read_collect.c
+++ b/fs/netfs/read_collect.c
@@ -77,6 +77,8 @@ static void netfs_unlock_read_folio(struct netfs_io_subrequest *subreq,
folio_unlock(folio);
}
}
+
+ folioq_clear(folioq, slot);
}
/*
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 9c0b7cddeaae..5436eb0424bd 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -77,7 +77,8 @@ void nilfs_forget_buffer(struct buffer_head *bh)
const unsigned long clear_bits =
(BIT(BH_Uptodate) | BIT(BH_Dirty) | BIT(BH_Mapped) |
BIT(BH_Async_Write) | BIT(BH_NILFS_Volatile) |
- BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected));
+ BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected) |
+ BIT(BH_Delay));
lock_buffer(bh);
set_mask_bits(&bh->b_state, clear_bits, 0);
@@ -406,7 +407,8 @@ void nilfs_clear_folio_dirty(struct folio *folio)
const unsigned long clear_bits =
(BIT(BH_Uptodate) | BIT(BH_Dirty) | BIT(BH_Mapped) |
BIT(BH_Async_Write) | BIT(BH_NILFS_Volatile) |
- BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected));
+ BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected) |
+ BIT(BH_Delay));
bh = head;
do {
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index ad131a2fc58e..58887456e3c5 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1129,9 +1129,12 @@ int ocfs2_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
trace_ocfs2_setattr(inode, dentry,
(unsigned long long)OCFS2_I(inode)->ip_blkno,
dentry->d_name.len, dentry->d_name.name,
- attr->ia_valid, attr->ia_mode,
- from_kuid(&init_user_ns, attr->ia_uid),
- from_kgid(&init_user_ns, attr->ia_gid));
+ attr->ia_valid,
+ attr->ia_valid & ATTR_MODE ? attr->ia_mode : 0,
+ attr->ia_valid & ATTR_UID ?
+ from_kuid(&init_user_ns, attr->ia_uid) : 0,
+ attr->ia_valid & ATTR_GID ?
+ from_kgid(&init_user_ns, attr->ia_gid) : 0);
/* ensuring we don't even attempt to truncate a symlink */
if (S_ISLNK(inode->i_mode))
diff --git a/fs/open.c b/fs/open.c
index acaeb3e25c88..5da4df2f9b18 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1457,6 +1457,8 @@ SYSCALL_DEFINE4(openat2, int, dfd, const char __user *, filename,
if (unlikely(usize < OPEN_HOW_SIZE_VER0))
return -EINVAL;
+ if (unlikely(usize > PAGE_SIZE))
+ return -E2BIG;
err = copy_struct_from_user(&tmp, sizeof(tmp), how, usize);
if (err)
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 1f54a54bfb91..5e391cbca7a3 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -77,7 +77,7 @@ static int seq_fdinfo_open(struct inode *inode, struct file *file)
return single_open(file, seq_show, inode);
}
-/**
+/*
* Shared /proc/pid/fdinfo and /proc/pid/fdinfo/fd permission helper to ensure
* that the current task has PTRACE_MODE_READ in addition to the normal
* POSIX-like checks.
diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h
index 1d7c52821e55..69975c9c6823 100644
--- a/include/trace/events/netfs.h
+++ b/include/trace/events/netfs.h
@@ -172,7 +172,6 @@
EM(netfs_folio_trace_read, "read") \
EM(netfs_folio_trace_read_done, "read-done") \
EM(netfs_folio_trace_read_gaps, "read-gaps") \
- EM(netfs_folio_trace_read_put, "read-put") \
EM(netfs_folio_trace_read_unlock, "read-unlock") \
EM(netfs_folio_trace_redirtied, "redirtied") \
EM(netfs_folio_trace_store, "store") \