summaryrefslogtreecommitdiff
path: root/fs/netfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/netfs')
-rw-r--r--fs/netfs/Makefile5
-rw-r--r--fs/netfs/buffered_read.c297
-rw-r--r--fs/netfs/direct_read.c84
-rw-r--r--fs/netfs/direct_write.c10
-rw-r--r--fs/netfs/internal.h45
-rw-r--r--fs/netfs/main.c6
-rw-r--r--fs/netfs/misc.c164
-rw-r--r--fs/netfs/objects.c21
-rw-r--r--fs/netfs/read_collect.c764
-rw-r--r--fs/netfs/read_pgpriv2.c208
-rw-r--r--fs/netfs/read_retry.c234
-rw-r--r--fs/netfs/read_single.c195
-rw-r--r--fs/netfs/rolling_buffer.c222
-rw-r--r--fs/netfs/stats.c13
-rw-r--r--fs/netfs/write_collect.c284
-rw-r--r--fs/netfs/write_issue.c242
-rw-r--r--fs/netfs/write_retry.c234
17 files changed, 1914 insertions, 1114 deletions
diff --git a/fs/netfs/Makefile b/fs/netfs/Makefile
index d08b0bfb6756..b43188d64bd8 100644
--- a/fs/netfs/Makefile
+++ b/fs/netfs/Makefile
@@ -13,8 +13,11 @@ netfs-y := \
read_collect.o \
read_pgpriv2.o \
read_retry.o \
+ read_single.o \
+ rolling_buffer.o \
write_collect.o \
- write_issue.o
+ write_issue.o \
+ write_retry.o
netfs-$(CONFIG_NETFS_STATS) += stats.o
diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c
index 4dc9b8286355..0d1b6d35ff3b 100644
--- a/fs/netfs/buffered_read.c
+++ b/fs/netfs/buffered_read.c
@@ -64,37 +64,6 @@ static int netfs_begin_cache_read(struct netfs_io_request *rreq, struct netfs_in
}
/*
- * Decant the list of folios to read into a rolling buffer.
- */
-static size_t netfs_load_buffer_from_ra(struct netfs_io_request *rreq,
- struct folio_queue *folioq,
- struct folio_batch *put_batch)
-{
- unsigned int order, nr;
- size_t size = 0;
-
- nr = __readahead_batch(rreq->ractl, (struct page **)folioq->vec.folios,
- ARRAY_SIZE(folioq->vec.folios));
- folioq->vec.nr = nr;
- for (int i = 0; i < nr; i++) {
- struct folio *folio = folioq_folio(folioq, i);
-
- trace_netfs_folio(folio, netfs_folio_trace_read);
- order = folio_order(folio);
- folioq->orders[i] = order;
- size += PAGE_SIZE << order;
-
- if (!folio_batch_add(put_batch, folio))
- folio_batch_release(put_batch);
- }
-
- for (int i = nr; i < folioq_nr_slots(folioq); i++)
- folioq_clear(folioq, i);
-
- return size;
-}
-
-/*
* netfs_prepare_read_iterator - Prepare the subreq iterator for I/O
* @subreq: The subrequest to be set up
*
@@ -128,19 +97,12 @@ static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq)
folio_batch_init(&put_batch);
while (rreq->submitted < subreq->start + rsize) {
- struct folio_queue *tail = rreq->buffer_tail, *new;
- size_t added;
-
- new = kmalloc(sizeof(*new), GFP_NOFS);
- if (!new)
- return -ENOMEM;
- netfs_stat(&netfs_n_folioq);
- folioq_init(new);
- new->prev = tail;
- tail->next = new;
- rreq->buffer_tail = new;
- added = netfs_load_buffer_from_ra(rreq, new, &put_batch);
- rreq->iter.count += added;
+ ssize_t added;
+
+ added = rolling_buffer_load_from_ra(&rreq->buffer, rreq->ractl,
+ &put_batch);
+ if (added < 0)
+ return added;
rreq->submitted += added;
}
folio_batch_release(&put_batch);
@@ -148,7 +110,7 @@ static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq)
subreq->len = rsize;
if (unlikely(rreq->io_streams[0].sreq_max_segs)) {
- size_t limit = netfs_limit_iter(&rreq->iter, 0, rsize,
+ size_t limit = netfs_limit_iter(&rreq->buffer.iter, 0, rsize,
rreq->io_streams[0].sreq_max_segs);
if (limit < rsize) {
@@ -157,20 +119,10 @@ static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq)
}
}
- subreq->io_iter = rreq->iter;
-
- if (iov_iter_is_folioq(&subreq->io_iter)) {
- if (subreq->io_iter.folioq_slot >= folioq_nr_slots(subreq->io_iter.folioq)) {
- subreq->io_iter.folioq = subreq->io_iter.folioq->next;
- subreq->io_iter.folioq_slot = 0;
- }
- subreq->curr_folioq = (struct folio_queue *)subreq->io_iter.folioq;
- subreq->curr_folioq_slot = subreq->io_iter.folioq_slot;
- subreq->curr_folio_order = subreq->curr_folioq->orders[subreq->curr_folioq_slot];
- }
+ subreq->io_iter = rreq->buffer.iter;
iov_iter_truncate(&subreq->io_iter, subreq->len);
- iov_iter_advance(&rreq->iter, subreq->len);
+ rolling_buffer_advance(&rreq->buffer, subreq->len);
return subreq->len;
}
@@ -179,25 +131,14 @@ static enum netfs_io_source netfs_cache_prepare_read(struct netfs_io_request *rr
loff_t i_size)
{
struct netfs_cache_resources *cres = &rreq->cache_resources;
+ enum netfs_io_source source;
if (!cres->ops)
return NETFS_DOWNLOAD_FROM_SERVER;
- return cres->ops->prepare_read(subreq, i_size);
-}
-
-static void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error,
- bool was_async)
-{
- struct netfs_io_subrequest *subreq = priv;
-
- if (transferred_or_error < 0) {
- netfs_read_subreq_terminated(subreq, transferred_or_error, was_async);
- return;
- }
+ source = cres->ops->prepare_read(subreq, i_size);
+ trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
+ return source;
- if (transferred_or_error > 0)
- subreq->transferred += transferred_or_error;
- netfs_read_subreq_terminated(subreq, 0, was_async);
}
/*
@@ -214,6 +155,57 @@ static void netfs_read_cache_to_pagecache(struct netfs_io_request *rreq,
netfs_cache_read_terminated, subreq);
}
+static void netfs_queue_read(struct netfs_io_request *rreq,
+ struct netfs_io_subrequest *subreq,
+ bool last_subreq)
+{
+ struct netfs_io_stream *stream = &rreq->io_streams[0];
+
+ __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
+
+ /* We add to the end of the list whilst the collector may be walking
+ * the list. The collector only goes nextwards and uses the lock to
+ * remove entries off of the front.
+ */
+ spin_lock(&rreq->lock);
+ list_add_tail(&subreq->rreq_link, &stream->subrequests);
+ if (list_is_first(&subreq->rreq_link, &stream->subrequests)) {
+ stream->front = subreq;
+ if (!stream->active) {
+ stream->collected_to = stream->front->start;
+ /* Store list pointers before active flag */
+ smp_store_release(&stream->active, true);
+ }
+ }
+
+ if (last_subreq) {
+ smp_wmb(); /* Write lists before ALL_QUEUED. */
+ set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
+ }
+
+ spin_unlock(&rreq->lock);
+}
+
+static void netfs_issue_read(struct netfs_io_request *rreq,
+ struct netfs_io_subrequest *subreq)
+{
+ switch (subreq->source) {
+ case NETFS_DOWNLOAD_FROM_SERVER:
+ rreq->netfs_ops->issue_read(subreq);
+ break;
+ case NETFS_READ_FROM_CACHE:
+ netfs_read_cache_to_pagecache(rreq, subreq);
+ break;
+ default:
+ __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
+ subreq->error = 0;
+ iov_iter_zero(subreq->len, &subreq->io_iter);
+ subreq->transferred = subreq->len;
+ netfs_read_subreq_terminated(subreq);
+ break;
+ }
+}
+
/*
* Perform a read to the pagecache from a series of sources of different types,
* slicing up the region to be read according to available cache blocks and
@@ -226,11 +218,9 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq)
ssize_t size = rreq->len;
int ret = 0;
- atomic_inc(&rreq->nr_outstanding);
-
do {
struct netfs_io_subrequest *subreq;
- enum netfs_io_source source = NETFS_DOWNLOAD_FROM_SERVER;
+ enum netfs_io_source source = NETFS_SOURCE_UNKNOWN;
ssize_t slice;
subreq = netfs_alloc_subrequest(rreq);
@@ -242,20 +232,14 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq)
subreq->start = start;
subreq->len = size;
- atomic_inc(&rreq->nr_outstanding);
- spin_lock_bh(&rreq->lock);
- list_add_tail(&subreq->rreq_link, &rreq->subrequests);
- subreq->prev_donated = rreq->prev_donated;
- rreq->prev_donated = 0;
- trace_netfs_sreq(subreq, netfs_sreq_trace_added);
- spin_unlock_bh(&rreq->lock);
-
source = netfs_cache_prepare_read(rreq, subreq, rreq->i_size);
subreq->source = source;
if (source == NETFS_DOWNLOAD_FROM_SERVER) {
unsigned long long zp = umin(ictx->zero_point, rreq->i_size);
size_t len = subreq->len;
+ if (unlikely(rreq->origin == NETFS_READ_SINGLE))
+ zp = rreq->i_size;
if (subreq->start >= zp) {
subreq->source = source = NETFS_FILL_WITH_ZEROES;
goto fill_with_zeroes;
@@ -275,17 +259,18 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq)
netfs_stat(&netfs_n_rh_download);
if (rreq->netfs_ops->prepare_read) {
ret = rreq->netfs_ops->prepare_read(subreq);
- if (ret < 0)
- goto prep_failed;
+ if (ret < 0) {
+ subreq->error = ret;
+ /* Not queued - release both refs. */
+ netfs_put_subrequest(subreq, false,
+ netfs_sreq_trace_put_cancel);
+ netfs_put_subrequest(subreq, false,
+ netfs_sreq_trace_put_cancel);
+ break;
+ }
trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
}
-
- slice = netfs_prepare_read_iterator(subreq);
- if (slice < 0)
- goto prep_iter_failed;
-
- rreq->netfs_ops->issue_read(subreq);
- goto done;
+ goto issue;
}
fill_with_zeroes:
@@ -293,94 +278,47 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq)
subreq->source = NETFS_FILL_WITH_ZEROES;
trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
netfs_stat(&netfs_n_rh_zero);
- slice = netfs_prepare_read_iterator(subreq);
- if (slice < 0)
- goto prep_iter_failed;
- __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
- netfs_read_subreq_terminated(subreq, 0, false);
- goto done;
+ goto issue;
}
if (source == NETFS_READ_FROM_CACHE) {
trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
- slice = netfs_prepare_read_iterator(subreq);
- if (slice < 0)
- goto prep_iter_failed;
- netfs_read_cache_to_pagecache(rreq, subreq);
- goto done;
+ goto issue;
}
pr_err("Unexpected read source %u\n", source);
WARN_ON_ONCE(1);
break;
- prep_iter_failed:
- ret = slice;
- prep_failed:
- subreq->error = ret;
- atomic_dec(&rreq->nr_outstanding);
- netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel);
- break;
-
- done:
+ issue:
+ slice = netfs_prepare_read_iterator(subreq);
+ if (slice < 0) {
+ ret = slice;
+ subreq->error = ret;
+ trace_netfs_sreq(subreq, netfs_sreq_trace_cancel);
+ /* Not queued - release both refs. */
+ netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel);
+ netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel);
+ break;
+ }
size -= slice;
start += slice;
+
+ netfs_queue_read(rreq, subreq, size <= 0);
+ netfs_issue_read(rreq, subreq);
cond_resched();
} while (size > 0);
- if (atomic_dec_and_test(&rreq->nr_outstanding))
- netfs_rreq_terminated(rreq, false);
+ if (unlikely(size > 0)) {
+ smp_wmb(); /* Write lists before ALL_QUEUED. */
+ set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
+ netfs_wake_read_collector(rreq);
+ }
/* Defer error return as we may need to wait for outstanding I/O. */
cmpxchg(&rreq->error, 0, ret);
}
-/*
- * Wait for the read operation to complete, successfully or otherwise.
- */
-static int netfs_wait_for_read(struct netfs_io_request *rreq)
-{
- int ret;
-
- trace_netfs_rreq(rreq, netfs_rreq_trace_wait_ip);
- wait_on_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS, TASK_UNINTERRUPTIBLE);
- ret = rreq->error;
- if (ret == 0 && rreq->submitted < rreq->len) {
- trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read);
- ret = -EIO;
- }
-
- return ret;
-}
-
-/*
- * Set up the initial folioq of buffer folios in the rolling buffer and set the
- * iterator to refer to it.
- */
-static int netfs_prime_buffer(struct netfs_io_request *rreq)
-{
- struct folio_queue *folioq;
- struct folio_batch put_batch;
- size_t added;
-
- folioq = kmalloc(sizeof(*folioq), GFP_KERNEL);
- if (!folioq)
- return -ENOMEM;
- netfs_stat(&netfs_n_folioq);
- folioq_init(folioq);
- rreq->buffer = folioq;
- rreq->buffer_tail = folioq;
- rreq->submitted = rreq->start;
- iov_iter_folio_queue(&rreq->iter, ITER_DEST, folioq, 0, 0, 0);
-
- folio_batch_init(&put_batch);
- added = netfs_load_buffer_from_ra(rreq, folioq, &put_batch);
- folio_batch_release(&put_batch);
- rreq->iter.count += added;
- rreq->submitted += added;
- return 0;
-}
-
/**
* netfs_readahead - Helper to manage a read request
* @ractl: The description of the readahead request
@@ -409,6 +347,8 @@ void netfs_readahead(struct readahead_control *ractl)
if (IS_ERR(rreq))
return;
+ __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags);
+
ret = netfs_begin_cache_read(rreq, ictx);
if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
goto cleanup_free;
@@ -420,7 +360,8 @@ void netfs_readahead(struct readahead_control *ractl)
netfs_rreq_expand(rreq, ractl);
rreq->ractl = ractl;
- if (netfs_prime_buffer(rreq) < 0)
+ rreq->submitted = rreq->start;
+ if (rolling_buffer_init(&rreq->buffer, rreq->debug_id, ITER_DEST) < 0)
goto cleanup_free;
netfs_read_to_pagecache(rreq);
@@ -436,23 +377,18 @@ EXPORT_SYMBOL(netfs_readahead);
/*
* Create a rolling buffer with a single occupying folio.
*/
-static int netfs_create_singular_buffer(struct netfs_io_request *rreq, struct folio *folio)
+static int netfs_create_singular_buffer(struct netfs_io_request *rreq, struct folio *folio,
+ unsigned int rollbuf_flags)
{
- struct folio_queue *folioq;
+ ssize_t added;
- folioq = kmalloc(sizeof(*folioq), GFP_KERNEL);
- if (!folioq)
+ if (rolling_buffer_init(&rreq->buffer, rreq->debug_id, ITER_DEST) < 0)
return -ENOMEM;
- netfs_stat(&netfs_n_folioq);
- folioq_init(folioq);
- folioq_append(folioq, folio);
- BUG_ON(folioq_folio(folioq, 0) != folio);
- BUG_ON(folioq_folio_order(folioq, 0) != folio_order(folio));
- rreq->buffer = folioq;
- rreq->buffer_tail = folioq;
- rreq->submitted = rreq->start + rreq->len;
- iov_iter_folio_queue(&rreq->iter, ITER_DEST, folioq, 0, 0, rreq->len);
+ added = rolling_buffer_append(&rreq->buffer, folio, rollbuf_flags);
+ if (added < 0)
+ return added;
+ rreq->submitted = rreq->start + added;
rreq->ractl = (struct readahead_control *)1UL;
return 0;
}
@@ -520,7 +456,7 @@ static int netfs_read_gaps(struct file *file, struct folio *folio)
}
if (to < flen)
bvec_set_folio(&bvec[i++], folio, flen - to, to);
- iov_iter_bvec(&rreq->iter, ITER_DEST, bvec, i, rreq->len);
+ iov_iter_bvec(&rreq->buffer.iter, ITER_DEST, bvec, i, rreq->len);
rreq->submitted = rreq->start + flen;
netfs_read_to_pagecache(rreq);
@@ -529,7 +465,7 @@ static int netfs_read_gaps(struct file *file, struct folio *folio)
folio_put(sink);
ret = netfs_wait_for_read(rreq);
- if (ret == 0) {
+ if (ret >= 0) {
flush_dcache_folio(folio);
folio_mark_uptodate(folio);
}
@@ -588,7 +524,7 @@ int netfs_read_folio(struct file *file, struct folio *folio)
trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage);
/* Set up the output buffer */
- ret = netfs_create_singular_buffer(rreq, folio);
+ ret = netfs_create_singular_buffer(rreq, folio, 0);
if (ret < 0)
goto discard;
@@ -745,7 +681,7 @@ retry:
trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin);
/* Set up the output buffer */
- ret = netfs_create_singular_buffer(rreq, folio);
+ ret = netfs_create_singular_buffer(rreq, folio, 0);
if (ret < 0)
goto error_put;
@@ -810,15 +746,14 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio,
trace_netfs_read(rreq, start, flen, netfs_read_trace_prefetch_for_write);
/* Set up the output buffer */
- ret = netfs_create_singular_buffer(rreq, folio);
+ ret = netfs_create_singular_buffer(rreq, folio, NETFS_ROLLBUF_PAGECACHE_MARK);
if (ret < 0)
goto error_put;
- folioq_mark2(rreq->buffer, 0);
netfs_read_to_pagecache(rreq);
ret = netfs_wait_for_read(rreq);
netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
- return ret;
+ return ret < 0 ? ret : 0;
error_put:
netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
diff --git a/fs/netfs/direct_read.c b/fs/netfs/direct_read.c
index b1a66a6e6bc2..5e3f0aeb51f3 100644
--- a/fs/netfs/direct_read.c
+++ b/fs/netfs/direct_read.c
@@ -25,7 +25,7 @@ static void netfs_prepare_dio_read_iterator(struct netfs_io_subrequest *subreq)
subreq->len = rsize;
if (unlikely(rreq->io_streams[0].sreq_max_segs)) {
- size_t limit = netfs_limit_iter(&rreq->iter, 0, rsize,
+ size_t limit = netfs_limit_iter(&rreq->buffer.iter, 0, rsize,
rreq->io_streams[0].sreq_max_segs);
if (limit < rsize) {
@@ -36,9 +36,9 @@ static void netfs_prepare_dio_read_iterator(struct netfs_io_subrequest *subreq)
trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
- subreq->io_iter = rreq->iter;
+ subreq->io_iter = rreq->buffer.iter;
iov_iter_truncate(&subreq->io_iter, subreq->len);
- iov_iter_advance(&rreq->iter, subreq->len);
+ iov_iter_advance(&rreq->buffer.iter, subreq->len);
}
/*
@@ -47,12 +47,11 @@ static void netfs_prepare_dio_read_iterator(struct netfs_io_subrequest *subreq)
*/
static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq)
{
+ struct netfs_io_stream *stream = &rreq->io_streams[0];
unsigned long long start = rreq->start;
ssize_t size = rreq->len;
int ret = 0;
- atomic_set(&rreq->nr_outstanding, 1);
-
do {
struct netfs_io_subrequest *subreq;
ssize_t slice;
@@ -67,19 +66,25 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq)
subreq->start = start;
subreq->len = size;
- atomic_inc(&rreq->nr_outstanding);
- spin_lock_bh(&rreq->lock);
- list_add_tail(&subreq->rreq_link, &rreq->subrequests);
- subreq->prev_donated = rreq->prev_donated;
- rreq->prev_donated = 0;
+ __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
+
+ spin_lock(&rreq->lock);
+ list_add_tail(&subreq->rreq_link, &stream->subrequests);
+ if (list_is_first(&subreq->rreq_link, &stream->subrequests)) {
+ stream->front = subreq;
+ if (!stream->active) {
+ stream->collected_to = stream->front->start;
+ /* Store list pointers before active flag */
+ smp_store_release(&stream->active, true);
+ }
+ }
trace_netfs_sreq(subreq, netfs_sreq_trace_added);
- spin_unlock_bh(&rreq->lock);
+ spin_unlock(&rreq->lock);
netfs_stat(&netfs_n_rh_download);
if (rreq->netfs_ops->prepare_read) {
ret = rreq->netfs_ops->prepare_read(subreq);
if (ret < 0) {
- atomic_dec(&rreq->nr_outstanding);
netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel);
break;
}
@@ -87,20 +92,32 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq)
netfs_prepare_dio_read_iterator(subreq);
slice = subreq->len;
- rreq->netfs_ops->issue_read(subreq);
-
size -= slice;
start += slice;
rreq->submitted += slice;
+ if (size <= 0) {
+ smp_wmb(); /* Write lists before ALL_QUEUED. */
+ set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
+ }
+ rreq->netfs_ops->issue_read(subreq);
+
+ if (test_bit(NETFS_RREQ_PAUSE, &rreq->flags))
+ netfs_wait_for_pause(rreq);
+ if (test_bit(NETFS_RREQ_FAILED, &rreq->flags))
+ break;
if (test_bit(NETFS_RREQ_BLOCKED, &rreq->flags) &&
test_bit(NETFS_RREQ_NONBLOCK, &rreq->flags))
break;
cond_resched();
} while (size > 0);
- if (atomic_dec_and_test(&rreq->nr_outstanding))
- netfs_rreq_terminated(rreq, false);
+ if (unlikely(size > 0)) {
+ smp_wmb(); /* Write lists before ALL_QUEUED. */
+ set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
+ netfs_wake_read_collector(rreq);
+ }
+
return ret;
}
@@ -108,9 +125,9 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq)
* Perform a read to an application buffer, bypassing the pagecache and the
* local disk cache.
*/
-static int netfs_unbuffered_read(struct netfs_io_request *rreq, bool sync)
+static ssize_t netfs_unbuffered_read(struct netfs_io_request *rreq, bool sync)
{
- int ret;
+ ssize_t ret;
_enter("R=%x %llx-%llx",
rreq->debug_id, rreq->start, rreq->start + rreq->len - 1);
@@ -133,23 +150,12 @@ static int netfs_unbuffered_read(struct netfs_io_request *rreq, bool sync)
goto out;
}
- if (sync) {
- trace_netfs_rreq(rreq, netfs_rreq_trace_wait_ip);
- wait_on_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS,
- TASK_UNINTERRUPTIBLE);
-
- ret = rreq->error;
- if (ret == 0 && rreq->submitted < rreq->len &&
- rreq->origin != NETFS_DIO_READ) {
- trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read);
- ret = -EIO;
- }
- } else {
+ if (sync)
+ ret = netfs_wait_for_read(rreq);
+ else
ret = -EIOCBQUEUED;
- }
-
out:
- _leave(" = %d", ret);
+ _leave(" = %zd", ret);
return ret;
}
@@ -199,15 +205,15 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i
* the request.
*/
if (user_backed_iter(iter)) {
- ret = netfs_extract_user_iter(iter, rreq->len, &rreq->iter, 0);
+ ret = netfs_extract_user_iter(iter, rreq->len, &rreq->buffer.iter, 0);
if (ret < 0)
goto out;
- rreq->direct_bv = (struct bio_vec *)rreq->iter.bvec;
+ rreq->direct_bv = (struct bio_vec *)rreq->buffer.iter.bvec;
rreq->direct_bv_count = ret;
rreq->direct_bv_unpin = iov_iter_extract_will_pin(iter);
- rreq->len = iov_iter_count(&rreq->iter);
+ rreq->len = iov_iter_count(&rreq->buffer.iter);
} else {
- rreq->iter = *iter;
+ rreq->buffer.iter = *iter;
rreq->len = orig_count;
rreq->direct_bv_unpin = false;
iov_iter_advance(iter, orig_count);
@@ -215,8 +221,10 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i
// TODO: Set up bounce buffer if needed
- if (!sync)
+ if (!sync) {
rreq->iocb = iocb;
+ __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags);
+ }
ret = netfs_unbuffered_read(rreq, sync);
if (ret < 0)
diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c
index f9421f3e6d37..42ce53cc216e 100644
--- a/fs/netfs/direct_write.c
+++ b/fs/netfs/direct_write.c
@@ -68,12 +68,12 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
* request.
*/
if (user_backed_iter(iter)) {
- n = netfs_extract_user_iter(iter, len, &wreq->iter, 0);
+ n = netfs_extract_user_iter(iter, len, &wreq->buffer.iter, 0);
if (n < 0) {
ret = n;
goto out;
}
- wreq->direct_bv = (struct bio_vec *)wreq->iter.bvec;
+ wreq->direct_bv = (struct bio_vec *)wreq->buffer.iter.bvec;
wreq->direct_bv_count = n;
wreq->direct_bv_unpin = iov_iter_extract_will_pin(iter);
} else {
@@ -82,10 +82,8 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
* (eg. a bio_vec array) will persist till the end of
* the op.
*/
- wreq->iter = *iter;
+ wreq->buffer.iter = *iter;
}
-
- wreq->io_iter = wreq->iter;
}
__set_bit(NETFS_RREQ_USE_IO_ITER, &wreq->flags);
@@ -97,7 +95,7 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
__set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags);
if (async)
wreq->iocb = iocb;
- wreq->len = iov_iter_count(&wreq->io_iter);
+ wreq->len = iov_iter_count(&wreq->buffer.iter);
wreq->cleanup = netfs_cleanup_dio_write;
ret = netfs_unbuffered_write(wreq, is_sync_kiocb(iocb), wreq->len);
if (ret < 0) {
diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h
index c562aec3b483..1c4f953c3d68 100644
--- a/fs/netfs/internal.h
+++ b/fs/netfs/internal.h
@@ -23,6 +23,7 @@
/*
* buffered_read.c
*/
+void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, bool was_async);
int netfs_prefetch_for_write(struct file *file, struct folio *folio,
size_t offset, size_t len);
@@ -58,11 +59,8 @@ static inline void netfs_proc_del_rreq(struct netfs_io_request *rreq) {}
/*
* misc.c
*/
-struct folio_queue *netfs_buffer_make_space(struct netfs_io_request *rreq);
-int netfs_buffer_append_folio(struct netfs_io_request *rreq, struct folio *folio,
- bool needs_put);
-struct folio_queue *netfs_delete_buffer_head(struct netfs_io_request *wreq);
-void netfs_clear_buffer(struct netfs_io_request *rreq);
+struct folio_queue *netfs_buffer_make_space(struct netfs_io_request *rreq,
+ enum netfs_folioq_trace trace);
void netfs_reset_iter(struct netfs_io_subrequest *subreq);
/*
@@ -84,20 +82,27 @@ static inline void netfs_see_request(struct netfs_io_request *rreq,
trace_netfs_rreq_ref(rreq->debug_id, refcount_read(&rreq->ref), what);
}
+static inline void netfs_see_subrequest(struct netfs_io_subrequest *subreq,
+ enum netfs_sreq_ref_trace what)
+{
+ trace_netfs_sreq_ref(subreq->rreq->debug_id, subreq->debug_index,
+ refcount_read(&subreq->ref), what);
+}
+
/*
* read_collect.c
*/
-void netfs_read_termination_worker(struct work_struct *work);
-void netfs_rreq_terminated(struct netfs_io_request *rreq, bool was_async);
+void netfs_read_collection_worker(struct work_struct *work);
+void netfs_wake_read_collector(struct netfs_io_request *rreq);
+void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, bool was_async);
+ssize_t netfs_wait_for_read(struct netfs_io_request *rreq);
+void netfs_wait_for_pause(struct netfs_io_request *rreq);
/*
* read_pgpriv2.c
*/
-void netfs_pgpriv2_mark_copy_to_cache(struct netfs_io_subrequest *subreq,
- struct netfs_io_request *rreq,
- struct folio_queue *folioq,
- int slot);
-void netfs_pgpriv2_write_to_the_cache(struct netfs_io_request *rreq);
+void netfs_pgpriv2_copy_to_cache(struct netfs_io_request *rreq, struct folio *folio);
+void netfs_pgpriv2_end_copy_to_cache(struct netfs_io_request *rreq);
bool netfs_pgpriv2_unlock_copied_folios(struct netfs_io_request *wreq);
/*
@@ -113,6 +118,7 @@ void netfs_unlock_abandoned_read_pages(struct netfs_io_request *rreq);
extern atomic_t netfs_n_rh_dio_read;
extern atomic_t netfs_n_rh_readahead;
extern atomic_t netfs_n_rh_read_folio;
+extern atomic_t netfs_n_rh_read_single;
extern atomic_t netfs_n_rh_rreq;
extern atomic_t netfs_n_rh_sreq;
extern atomic_t netfs_n_rh_download;
@@ -129,6 +135,8 @@ extern atomic_t netfs_n_rh_write_begin;
extern atomic_t netfs_n_rh_write_done;
extern atomic_t netfs_n_rh_write_failed;
extern atomic_t netfs_n_rh_write_zskip;
+extern atomic_t netfs_n_rh_retry_read_req;
+extern atomic_t netfs_n_rh_retry_read_subreq;
extern atomic_t netfs_n_wh_buffered_write;
extern atomic_t netfs_n_wh_writethrough;
extern atomic_t netfs_n_wh_dio_write;
@@ -141,6 +149,8 @@ extern atomic_t netfs_n_wh_upload_failed;
extern atomic_t netfs_n_wh_write;
extern atomic_t netfs_n_wh_write_done;
extern atomic_t netfs_n_wh_write_failed;
+extern atomic_t netfs_n_wh_retry_write_req;
+extern atomic_t netfs_n_wh_retry_write_subreq;
extern atomic_t netfs_n_wb_lock_skip;
extern atomic_t netfs_n_wb_lock_wait;
extern atomic_t netfs_n_folioq;
@@ -181,9 +191,9 @@ void netfs_reissue_write(struct netfs_io_stream *stream,
struct iov_iter *source);
void netfs_issue_write(struct netfs_io_request *wreq,
struct netfs_io_stream *stream);
-int netfs_advance_write(struct netfs_io_request *wreq,
- struct netfs_io_stream *stream,
- loff_t start, size_t len, bool to_eof);
+size_t netfs_advance_write(struct netfs_io_request *wreq,
+ struct netfs_io_stream *stream,
+ loff_t start, size_t len, bool to_eof);
struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len);
int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc,
struct folio *folio, size_t copied, bool to_page_end,
@@ -193,6 +203,11 @@ int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_contr
int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t len);
/*
+ * write_retry.c
+ */
+void netfs_retry_writes(struct netfs_io_request *wreq);
+
+/*
* Miscellaneous functions.
*/
static inline bool netfs_is_cache_enabled(struct netfs_inode *ctx)
diff --git a/fs/netfs/main.c b/fs/netfs/main.c
index 6c7be1377ee0..4e3e62040831 100644
--- a/fs/netfs/main.c
+++ b/fs/netfs/main.c
@@ -37,9 +37,11 @@ static const char *netfs_origins[nr__netfs_io_origin] = {
[NETFS_READAHEAD] = "RA",
[NETFS_READPAGE] = "RP",
[NETFS_READ_GAPS] = "RG",
+ [NETFS_READ_SINGLE] = "R1",
[NETFS_READ_FOR_WRITE] = "RW",
[NETFS_DIO_READ] = "DR",
[NETFS_WRITEBACK] = "WB",
+ [NETFS_WRITEBACK_SINGLE] = "W1",
[NETFS_WRITETHROUGH] = "WT",
[NETFS_UNBUFFERED_WRITE] = "UW",
[NETFS_DIO_WRITE] = "DW",
@@ -69,7 +71,7 @@ static int netfs_requests_seq_show(struct seq_file *m, void *v)
refcount_read(&rreq->ref),
rreq->flags,
rreq->error,
- atomic_read(&rreq->nr_outstanding),
+ 0,
rreq->start, rreq->submitted, rreq->len);
seq_putc(m, '\n');
return 0;
@@ -116,7 +118,7 @@ static int __init netfs_init(void)
goto error_reqpool;
netfs_subrequest_slab = kmem_cache_create("netfs_subrequest",
- sizeof(struct netfs_io_subrequest), 0,
+ sizeof(struct netfs_io_subrequest) + 16, 0,
SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT,
NULL);
if (!netfs_subrequest_slab)
diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c
index 78fe5796b2b2..7099aa07737a 100644
--- a/fs/netfs/misc.c
+++ b/fs/netfs/misc.c
@@ -8,113 +8,101 @@
#include <linux/swap.h>
#include "internal.h"
-/*
- * Make sure there's space in the rolling queue.
+/**
+ * netfs_alloc_folioq_buffer - Allocate buffer space into a folio queue
+ * @mapping: Address space to set on the folio (or NULL).
+ * @_buffer: Pointer to the folio queue to add to (may point to a NULL; updated).
+ * @_cur_size: Current size of the buffer (updated).
+ * @size: Target size of the buffer.
+ * @gfp: The allocation constraints.
*/
-struct folio_queue *netfs_buffer_make_space(struct netfs_io_request *rreq)
+int netfs_alloc_folioq_buffer(struct address_space *mapping,
+ struct folio_queue **_buffer,
+ size_t *_cur_size, ssize_t size, gfp_t gfp)
{
- struct folio_queue *tail = rreq->buffer_tail, *prev;
- unsigned int prev_nr_slots = 0;
-
- if (WARN_ON_ONCE(!rreq->buffer && tail) ||
- WARN_ON_ONCE(rreq->buffer && !tail))
- return ERR_PTR(-EIO);
-
- prev = tail;
- if (prev) {
- if (!folioq_full(tail))
- return tail;
- prev_nr_slots = folioq_nr_slots(tail);
- }
-
- tail = kmalloc(sizeof(*tail), GFP_NOFS);
- if (!tail)
- return ERR_PTR(-ENOMEM);
- netfs_stat(&netfs_n_folioq);
- folioq_init(tail);
- tail->prev = prev;
- if (prev)
- /* [!] NOTE: After we set prev->next, the consumer is entirely
- * at liberty to delete prev.
- */
- WRITE_ONCE(prev->next, tail);
-
- rreq->buffer_tail = tail;
- if (!rreq->buffer) {
- rreq->buffer = tail;
- iov_iter_folio_queue(&rreq->io_iter, ITER_SOURCE, tail, 0, 0, 0);
- } else {
- /* Make sure we don't leave the master iterator pointing to a
- * block that might get immediately consumed.
- */
- if (rreq->io_iter.folioq == prev &&
- rreq->io_iter.folioq_slot == prev_nr_slots) {
- rreq->io_iter.folioq = tail;
- rreq->io_iter.folioq_slot = 0;
+ struct folio_queue *tail = *_buffer, *p;
+
+ size = round_up(size, PAGE_SIZE);
+ if (*_cur_size >= size)
+ return 0;
+
+ if (tail)
+ while (tail->next)
+ tail = tail->next;
+
+ do {
+ struct folio *folio;
+ int order = 0, slot;
+
+ if (!tail || folioq_full(tail)) {
+ p = netfs_folioq_alloc(0, GFP_NOFS, netfs_trace_folioq_alloc_buffer);
+ if (!p)
+ return -ENOMEM;
+ if (tail) {
+ tail->next = p;
+ p->prev = tail;
+ } else {
+ *_buffer = p;
+ }
+ tail = p;
}
- }
- rreq->buffer_tail_slot = 0;
- return tail;
-}
-/*
- * Append a folio to the rolling queue.
- */
-int netfs_buffer_append_folio(struct netfs_io_request *rreq, struct folio *folio,
- bool needs_put)
-{
- struct folio_queue *tail;
- unsigned int slot, order = folio_order(folio);
+ if (size - *_cur_size > PAGE_SIZE)
+ order = umin(ilog2(size - *_cur_size) - PAGE_SHIFT,
+ MAX_PAGECACHE_ORDER);
- tail = netfs_buffer_make_space(rreq);
- if (IS_ERR(tail))
- return PTR_ERR(tail);
+ folio = folio_alloc(gfp, order);
+ if (!folio && order > 0)
+ folio = folio_alloc(gfp, 0);
+ if (!folio)
+ return -ENOMEM;
- rreq->io_iter.count += PAGE_SIZE << order;
+ folio->mapping = mapping;
+ folio->index = *_cur_size / PAGE_SIZE;
+ trace_netfs_folio(folio, netfs_folio_trace_alloc_buffer);
+ slot = folioq_append_mark(tail, folio);
+ *_cur_size += folioq_folio_size(tail, slot);
+ } while (*_cur_size < size);
- slot = folioq_append(tail, folio);
- /* Store the counter after setting the slot. */
- smp_store_release(&rreq->buffer_tail_slot, slot);
return 0;
}
+EXPORT_SYMBOL(netfs_alloc_folioq_buffer);
-/*
- * Delete the head of a rolling queue.
+/**
+ * netfs_free_folioq_buffer - Free a folio queue.
+ * @fq: The start of the folio queue to free
+ *
+ * Free up a chain of folio_queues and, if marked, the marked folios they point
+ * to.
*/
-struct folio_queue *netfs_delete_buffer_head(struct netfs_io_request *wreq)
+void netfs_free_folioq_buffer(struct folio_queue *fq)
{
- struct folio_queue *head = wreq->buffer, *next = head->next;
-
- if (next)
- next->prev = NULL;
- netfs_stat_d(&netfs_n_folioq);
- kfree(head);
- wreq->buffer = next;
- return next;
-}
+ struct folio_queue *next;
+ struct folio_batch fbatch;
-/*
- * Clear out a rolling queue.
- */
-void netfs_clear_buffer(struct netfs_io_request *rreq)
-{
- struct folio_queue *p;
+ folio_batch_init(&fbatch);
+
+ for (; fq; fq = next) {
+ for (int slot = 0; slot < folioq_count(fq); slot++) {
+ struct folio *folio = folioq_folio(fq, slot);
- while ((p = rreq->buffer)) {
- rreq->buffer = p->next;
- for (int slot = 0; slot < folioq_count(p); slot++) {
- struct folio *folio = folioq_folio(p, slot);
- if (!folio)
+ if (!folio ||
+ !folioq_is_marked(fq, slot))
continue;
- if (folioq_is_marked(p, slot)) {
- trace_netfs_folio(folio, netfs_folio_trace_put);
- folio_put(folio);
- }
+
+ trace_netfs_folio(folio, netfs_folio_trace_put);
+ if (folio_batch_add(&fbatch, folio))
+ folio_batch_release(&fbatch);
}
+
netfs_stat_d(&netfs_n_folioq);
- kfree(p);
+ next = fq->next;
+ kfree(fq);
}
+
+ folio_batch_release(&fbatch);
}
+EXPORT_SYMBOL(netfs_free_folioq_buffer);
/*
* Reset the subrequest iterator to refer just to the region remaining to be
diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c
index 31e388ec6e48..dc6b41ef18b0 100644
--- a/fs/netfs/objects.c
+++ b/fs/netfs/objects.c
@@ -48,17 +48,20 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
spin_lock_init(&rreq->lock);
INIT_LIST_HEAD(&rreq->io_streams[0].subrequests);
INIT_LIST_HEAD(&rreq->io_streams[1].subrequests);
- INIT_LIST_HEAD(&rreq->subrequests);
+ init_waitqueue_head(&rreq->waitq);
refcount_set(&rreq->ref, 1);
if (origin == NETFS_READAHEAD ||
origin == NETFS_READPAGE ||
origin == NETFS_READ_GAPS ||
+ origin == NETFS_READ_SINGLE ||
origin == NETFS_READ_FOR_WRITE ||
- origin == NETFS_DIO_READ)
- INIT_WORK(&rreq->work, netfs_read_termination_worker);
- else
+ origin == NETFS_DIO_READ) {
+ INIT_WORK(&rreq->work, netfs_read_collection_worker);
+ rreq->io_streams[0].avail = true;
+ } else {
INIT_WORK(&rreq->work, netfs_write_collection_worker);
+ }
__set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
if (file && file->f_flags & O_NONBLOCK)
@@ -92,14 +95,6 @@ void netfs_clear_subrequests(struct netfs_io_request *rreq, bool was_async)
struct netfs_io_stream *stream;
int s;
- while (!list_empty(&rreq->subrequests)) {
- subreq = list_first_entry(&rreq->subrequests,
- struct netfs_io_subrequest, rreq_link);
- list_del(&subreq->rreq_link);
- netfs_put_subrequest(subreq, was_async,
- netfs_sreq_trace_put_clear);
- }
-
for (s = 0; s < ARRAY_SIZE(rreq->io_streams); s++) {
stream = &rreq->io_streams[s];
while (!list_empty(&stream->subrequests)) {
@@ -143,7 +138,7 @@ static void netfs_free_request(struct work_struct *work)
}
kvfree(rreq->direct_bv);
}
- netfs_clear_buffer(rreq);
+ rolling_buffer_clear(&rreq->buffer);
if (atomic_dec_and_test(&ictx->io_count))
wake_up_var(&ictx->io_count);
diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c
index 8878b46589ff..23c75755ad4e 100644
--- a/fs/netfs/read_collect.c
+++ b/fs/netfs/read_collect.c
@@ -14,6 +14,14 @@
#include <linux/task_io_accounting_ops.h>
#include "internal.h"
+/* Notes made in the collector */
+#define HIT_PENDING 0x01 /* A front op was still pending */
+#define MADE_PROGRESS 0x04 /* Made progress cleaning up a stream or the folio set */
+#define BUFFERED 0x08 /* The pagecache needs cleaning up */
+#define NEED_RETRY 0x10 /* A front op requests retrying */
+#define COPY_TO_CACHE 0x40 /* Need to copy subrequest to cache */
+#define ABANDON_SREQ 0x80 /* Need to abandon untransferred part of subrequest */
+
/*
* Clear the unread part of an I/O request.
*/
@@ -31,14 +39,18 @@ static void netfs_clear_unread(struct netfs_io_subrequest *subreq)
* cache the folio, we set the group to NETFS_FOLIO_COPY_TO_CACHE, mark it
* dirty and let writeback handle it.
*/
-static void netfs_unlock_read_folio(struct netfs_io_subrequest *subreq,
- struct netfs_io_request *rreq,
+static void netfs_unlock_read_folio(struct netfs_io_request *rreq,
struct folio_queue *folioq,
int slot)
{
struct netfs_folio *finfo;
struct folio *folio = folioq_folio(folioq, slot);
+ if (unlikely(folio_pos(folio) < rreq->abandon_to)) {
+ trace_netfs_folio(folio, netfs_folio_trace_abandon);
+ goto just_unlock;
+ }
+
flush_dcache_folio(folio);
folio_mark_uptodate(folio);
@@ -53,7 +65,7 @@ static void netfs_unlock_read_folio(struct netfs_io_subrequest *subreq,
kfree(finfo);
}
- if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) {
+ if (test_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags)) {
if (!WARN_ON_ONCE(folio_get_private(folio) != NULL)) {
trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache);
folio_attach_private(folio, NETFS_FOLIO_COPY_TO_CACHE);
@@ -66,12 +78,11 @@ static void netfs_unlock_read_folio(struct netfs_io_subrequest *subreq,
folioq_clear(folioq, slot);
} else {
// TODO: Use of PG_private_2 is deprecated.
- if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags))
- netfs_pgpriv2_mark_copy_to_cache(subreq, rreq, folioq, slot);
- else
- folioq_clear(folioq, slot);
+ if (test_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags))
+ netfs_pgpriv2_copy_to_cache(rreq, folio);
}
+just_unlock:
if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
if (folio->index == rreq->no_unlock_folio &&
test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) {
@@ -81,239 +92,249 @@ static void netfs_unlock_read_folio(struct netfs_io_subrequest *subreq,
folio_unlock(folio);
}
}
+
+ folioq_clear(folioq, slot);
}
/*
- * Unlock any folios that are now completely read. Returns true if the
- * subrequest is removed from the list.
+ * Unlock any folios we've finished with.
*/
-static bool netfs_consume_read_data(struct netfs_io_subrequest *subreq, bool was_async)
+static void netfs_read_unlock_folios(struct netfs_io_request *rreq,
+ unsigned int *notes)
{
- struct netfs_io_subrequest *prev, *next;
- struct netfs_io_request *rreq = subreq->rreq;
- struct folio_queue *folioq = subreq->curr_folioq;
- size_t avail, prev_donated, next_donated, fsize, part, excess;
- loff_t fpos, start;
- loff_t fend;
- int slot = subreq->curr_folioq_slot;
-
- if (WARN(subreq->transferred > subreq->len,
- "Subreq overread: R%x[%x] %zu > %zu",
- rreq->debug_id, subreq->debug_index,
- subreq->transferred, subreq->len))
- subreq->transferred = subreq->len;
-
-next_folio:
- fsize = PAGE_SIZE << subreq->curr_folio_order;
- fpos = round_down(subreq->start + subreq->consumed, fsize);
- fend = fpos + fsize;
-
- if (WARN_ON_ONCE(!folioq) ||
- WARN_ON_ONCE(!folioq_folio(folioq, slot)) ||
- WARN_ON_ONCE(folioq_folio(folioq, slot)->index != fpos / PAGE_SIZE)) {
- pr_err("R=%08x[%x] s=%llx-%llx ctl=%zx/%zx/%zx sl=%u\n",
- rreq->debug_id, subreq->debug_index,
- subreq->start, subreq->start + subreq->transferred - 1,
- subreq->consumed, subreq->transferred, subreq->len,
- slot);
- if (folioq) {
- struct folio *folio = folioq_folio(folioq, slot);
-
- pr_err("folioq: orders=%02x%02x%02x%02x\n",
- folioq->orders[0], folioq->orders[1],
- folioq->orders[2], folioq->orders[3]);
- if (folio)
- pr_err("folio: %llx-%llx ix=%llx o=%u qo=%u\n",
- fpos, fend - 1, folio_pos(folio), folio_order(folio),
- folioq_folio_order(folioq, slot));
- }
- }
+ struct folio_queue *folioq = rreq->buffer.tail;
+ unsigned long long collected_to = rreq->collected_to;
+ unsigned int slot = rreq->buffer.first_tail_slot;
-donation_changed:
- /* Try to consume the current folio if we've hit or passed the end of
- * it. There's a possibility that this subreq doesn't start at the
- * beginning of the folio, in which case we need to donate to/from the
- * preceding subreq.
- *
- * We also need to include any potential donation back from the
- * following subreq.
- */
- prev_donated = READ_ONCE(subreq->prev_donated);
- next_donated = READ_ONCE(subreq->next_donated);
- if (prev_donated || next_donated) {
- spin_lock_bh(&rreq->lock);
- prev_donated = subreq->prev_donated;
- next_donated = subreq->next_donated;
- subreq->start -= prev_donated;
- subreq->len += prev_donated;
- subreq->transferred += prev_donated;
- prev_donated = subreq->prev_donated = 0;
- if (subreq->transferred == subreq->len) {
- subreq->len += next_donated;
- subreq->transferred += next_donated;
- next_donated = subreq->next_donated = 0;
+ if (rreq->cleaned_to >= rreq->collected_to)
+ return;
+
+ // TODO: Begin decryption
+
+ if (slot >= folioq_nr_slots(folioq)) {
+ folioq = rolling_buffer_delete_spent(&rreq->buffer);
+ if (!folioq) {
+ rreq->front_folio_order = 0;
+ return;
}
- trace_netfs_sreq(subreq, netfs_sreq_trace_add_donations);
- spin_unlock_bh(&rreq->lock);
+ slot = 0;
}
- avail = subreq->transferred;
- if (avail == subreq->len)
- avail += next_donated;
- start = subreq->start;
- if (subreq->consumed == 0) {
- start -= prev_donated;
- avail += prev_donated;
- } else {
- start += subreq->consumed;
- avail -= subreq->consumed;
- }
- part = umin(avail, fsize);
-
- trace_netfs_progress(subreq, start, avail, part);
-
- if (start + avail >= fend) {
- if (fpos == start) {
- /* Flush, unlock and mark for caching any folio we've just read. */
- subreq->consumed = fend - subreq->start;
- netfs_unlock_read_folio(subreq, rreq, folioq, slot);
- folioq_mark2(folioq, slot);
- if (subreq->consumed >= subreq->len)
- goto remove_subreq;
- } else if (fpos < start) {
- excess = fend - subreq->start;
-
- spin_lock_bh(&rreq->lock);
- /* If we complete first on a folio split with the
- * preceding subreq, donate to that subreq - otherwise
- * we get the responsibility.
- */
- if (subreq->prev_donated != prev_donated) {
- spin_unlock_bh(&rreq->lock);
- goto donation_changed;
- }
+ for (;;) {
+ struct folio *folio;
+ unsigned long long fpos, fend;
+ unsigned int order;
+ size_t fsize;
- if (list_is_first(&subreq->rreq_link, &rreq->subrequests)) {
- spin_unlock_bh(&rreq->lock);
- pr_err("Can't donate prior to front\n");
- goto bad;
- }
+ if (*notes & COPY_TO_CACHE)
+ set_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags);
- prev = list_prev_entry(subreq, rreq_link);
- WRITE_ONCE(prev->next_donated, prev->next_donated + excess);
- subreq->start += excess;
- subreq->len -= excess;
- subreq->transferred -= excess;
- trace_netfs_donate(rreq, subreq, prev, excess,
- netfs_trace_donate_tail_to_prev);
- trace_netfs_sreq(subreq, netfs_sreq_trace_donate_to_prev);
-
- if (subreq->consumed >= subreq->len)
- goto remove_subreq_locked;
- spin_unlock_bh(&rreq->lock);
- } else {
- pr_err("fpos > start\n");
- goto bad;
- }
+ folio = folioq_folio(folioq, slot);
+ if (WARN_ONCE(!folio_test_locked(folio),
+ "R=%08x: folio %lx is not locked\n",
+ rreq->debug_id, folio->index))
+ trace_netfs_folio(folio, netfs_folio_trace_not_locked);
+
+ order = folioq_folio_order(folioq, slot);
+ rreq->front_folio_order = order;
+ fsize = PAGE_SIZE << order;
+ fpos = folio_pos(folio);
+ fend = umin(fpos + fsize, rreq->i_size);
+
+ trace_netfs_collect_folio(rreq, folio, fend, collected_to);
- /* Advance the rolling buffer to the next folio. */
+ /* Unlock any folio we've transferred all of. */
+ if (collected_to < fend)
+ break;
+
+ netfs_unlock_read_folio(rreq, folioq, slot);
+ WRITE_ONCE(rreq->cleaned_to, fpos + fsize);
+ *notes |= MADE_PROGRESS;
+
+ clear_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags);
+
+ /* Clean up the head folioq. If we clear an entire folioq, then
+ * we can get rid of it provided it's not also the tail folioq
+ * being filled by the issuer.
+ */
+ folioq_clear(folioq, slot);
slot++;
if (slot >= folioq_nr_slots(folioq)) {
+ folioq = rolling_buffer_delete_spent(&rreq->buffer);
+ if (!folioq)
+ goto done;
slot = 0;
- folioq = folioq->next;
- subreq->curr_folioq = folioq;
+ trace_netfs_folioq(folioq, netfs_trace_folioq_read_progress);
}
- subreq->curr_folioq_slot = slot;
- if (folioq && folioq_folio(folioq, slot))
- subreq->curr_folio_order = folioq->orders[slot];
- if (!was_async)
- cond_resched();
- goto next_folio;
+
+ if (fpos + fsize >= collected_to)
+ break;
}
- /* Deal with partial progress. */
- if (subreq->transferred < subreq->len)
- return false;
+ rreq->buffer.tail = folioq;
+done:
+ rreq->buffer.first_tail_slot = slot;
+}
- /* Donate the remaining downloaded data to one of the neighbouring
- * subrequests. Note that we may race with them doing the same thing.
+/*
+ * Collect and assess the results of various read subrequests. We may need to
+ * retry some of the results.
+ *
+ * Note that we have a sequence of subrequests, which may be drawing on
+ * different sources and may or may not be the same size or starting position
+ * and may not even correspond in boundary alignment.
+ */
+static void netfs_collect_read_results(struct netfs_io_request *rreq)
+{
+ struct netfs_io_subrequest *front, *remove;
+ struct netfs_io_stream *stream = &rreq->io_streams[0];
+ unsigned int notes;
+
+ _enter("%llx-%llx", rreq->start, rreq->start + rreq->len);
+ trace_netfs_rreq(rreq, netfs_rreq_trace_collect);
+ trace_netfs_collect(rreq);
+
+reassess:
+ if (rreq->origin == NETFS_READAHEAD ||
+ rreq->origin == NETFS_READPAGE ||
+ rreq->origin == NETFS_READ_FOR_WRITE)
+ notes = BUFFERED;
+ else
+ notes = 0;
+
+ /* Remove completed subrequests from the front of the stream and
+ * advance the completion point. We stop when we hit something that's
+ * in progress. The issuer thread may be adding stuff to the tail
+ * whilst we're doing this.
*/
- spin_lock_bh(&rreq->lock);
+ front = READ_ONCE(stream->front);
+ while (front) {
+ size_t transferred;
- if (subreq->prev_donated != prev_donated ||
- subreq->next_donated != next_donated) {
- spin_unlock_bh(&rreq->lock);
- cond_resched();
- goto donation_changed;
- }
+ trace_netfs_collect_sreq(rreq, front);
+ _debug("sreq [%x] %llx %zx/%zx",
+ front->debug_index, front->start, front->transferred, front->len);
- /* Deal with the trickiest case: that this subreq is in the middle of a
- * folio, not touching either edge, but finishes first. In such a
- * case, we donate to the previous subreq, if there is one and if it is
- * contiguous, so that the donation is only handled when that completes
- * - and remove this subreq from the list.
- *
- * If the previous subreq finished first, we will have acquired their
- * donation and should be able to unlock folios and/or donate nextwards.
- */
- if (!subreq->consumed &&
- !prev_donated &&
- !list_is_first(&subreq->rreq_link, &rreq->subrequests)) {
- prev = list_prev_entry(subreq, rreq_link);
- if (subreq->start == prev->start + prev->len) {
- WRITE_ONCE(prev->next_donated, prev->next_donated + subreq->len);
- subreq->start += subreq->len;
- subreq->len = 0;
- subreq->transferred = 0;
- trace_netfs_donate(rreq, subreq, prev, subreq->len,
- netfs_trace_donate_to_prev);
- trace_netfs_sreq(subreq, netfs_sreq_trace_donate_to_prev);
- goto remove_subreq_locked;
+ if (stream->collected_to < front->start) {
+ trace_netfs_collect_gap(rreq, stream, front->start, 'F');
+ stream->collected_to = front->start;
+ }
+
+ if (test_bit(NETFS_SREQ_IN_PROGRESS, &front->flags))
+ notes |= HIT_PENDING;
+ smp_rmb(); /* Read counters after IN_PROGRESS flag. */
+ transferred = READ_ONCE(front->transferred);
+
+ /* If we can now collect the next folio, do so. We don't want
+ * to defer this as we have to decide whether we need to copy
+ * to the cache or not, and that may differ between adjacent
+ * subreqs.
+ */
+ if (notes & BUFFERED) {
+ size_t fsize = PAGE_SIZE << rreq->front_folio_order;
+
+ /* Clear the tail of a short read. */
+ if (!(notes & HIT_PENDING) &&
+ front->error == 0 &&
+ transferred < front->len &&
+ (test_bit(NETFS_SREQ_HIT_EOF, &front->flags) ||
+ test_bit(NETFS_SREQ_CLEAR_TAIL, &front->flags))) {
+ netfs_clear_unread(front);
+ transferred = front->transferred = front->len;
+ trace_netfs_sreq(front, netfs_sreq_trace_clear);
+ }
+
+ stream->collected_to = front->start + transferred;
+ rreq->collected_to = stream->collected_to;
+
+ if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &front->flags))
+ notes |= COPY_TO_CACHE;
+
+ if (test_bit(NETFS_SREQ_FAILED, &front->flags)) {
+ rreq->abandon_to = front->start + front->len;
+ front->transferred = front->len;
+ transferred = front->len;
+ trace_netfs_rreq(rreq, netfs_rreq_trace_set_abandon);
+ }
+ if (front->start + transferred >= rreq->cleaned_to + fsize ||
+ test_bit(NETFS_SREQ_HIT_EOF, &front->flags))
+ netfs_read_unlock_folios(rreq, &notes);
+ } else {
+ stream->collected_to = front->start + transferred;
+ rreq->collected_to = stream->collected_to;
}
+
+ /* Stall if the front is still undergoing I/O. */
+ if (notes & HIT_PENDING)
+ break;
+
+ if (test_bit(NETFS_SREQ_FAILED, &front->flags)) {
+ if (!stream->failed) {
+ stream->error = front->error;
+ rreq->error = front->error;
+ set_bit(NETFS_RREQ_FAILED, &rreq->flags);
+ stream->failed = true;
+ }
+ notes |= MADE_PROGRESS | ABANDON_SREQ;
+ } else if (test_bit(NETFS_SREQ_NEED_RETRY, &front->flags)) {
+ stream->need_retry = true;
+ notes |= NEED_RETRY | MADE_PROGRESS;
+ break;
+ } else {
+ if (!stream->failed)
+ stream->transferred = stream->collected_to - rreq->start;
+ notes |= MADE_PROGRESS;
+ }
+
+ /* Remove if completely consumed. */
+ stream->source = front->source;
+ spin_lock(&rreq->lock);
+
+ remove = front;
+ trace_netfs_sreq(front, netfs_sreq_trace_discard);
+ list_del_init(&front->rreq_link);
+ front = list_first_entry_or_null(&stream->subrequests,
+ struct netfs_io_subrequest, rreq_link);
+ stream->front = front;
+ spin_unlock(&rreq->lock);
+ netfs_put_subrequest(remove, false,
+ notes & ABANDON_SREQ ?
+ netfs_sreq_trace_put_abandon :
+ netfs_sreq_trace_put_done);
}
- /* If we can't donate down the chain, donate up the chain instead. */
- excess = subreq->len - subreq->consumed + next_donated;
+ trace_netfs_collect_stream(rreq, stream);
+ trace_netfs_collect_state(rreq, rreq->collected_to, notes);
- if (!subreq->consumed)
- excess += prev_donated;
+ if (!(notes & BUFFERED))
+ rreq->cleaned_to = rreq->collected_to;
- if (list_is_last(&subreq->rreq_link, &rreq->subrequests)) {
- rreq->prev_donated = excess;
- trace_netfs_donate(rreq, subreq, NULL, excess,
- netfs_trace_donate_to_deferred_next);
- } else {
- next = list_next_entry(subreq, rreq_link);
- WRITE_ONCE(next->prev_donated, excess);
- trace_netfs_donate(rreq, subreq, next, excess,
- netfs_trace_donate_to_next);
+ if (notes & NEED_RETRY)
+ goto need_retry;
+ if ((notes & MADE_PROGRESS) && test_bit(NETFS_RREQ_PAUSE, &rreq->flags)) {
+ trace_netfs_rreq(rreq, netfs_rreq_trace_unpause);
+ clear_bit_unlock(NETFS_RREQ_PAUSE, &rreq->flags);
+ smp_mb__after_atomic(); /* Set PAUSE before task state */
+ wake_up(&rreq->waitq);
+ }
+
+ if (notes & MADE_PROGRESS) {
+ //cond_resched();
+ goto reassess;
}
- trace_netfs_sreq(subreq, netfs_sreq_trace_donate_to_next);
- subreq->len = subreq->consumed;
- subreq->transferred = subreq->consumed;
- goto remove_subreq_locked;
-
-remove_subreq:
- spin_lock_bh(&rreq->lock);
-remove_subreq_locked:
- subreq->consumed = subreq->len;
- list_del(&subreq->rreq_link);
- spin_unlock_bh(&rreq->lock);
- netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_consumed);
- return true;
-
-bad:
- /* Errr... prev and next both donated to us, but insufficient to finish
- * the folio.
+
+out:
+ _leave(" = %x", notes);
+ return;
+
+need_retry:
+ /* Okay... We're going to have to retry parts of the stream. Note
+ * that any partially completed op will have had any wholly transferred
+ * folios removed from it.
*/
- printk("R=%08x[%x] s=%llx-%llx %zx/%zx/%zx\n",
- rreq->debug_id, subreq->debug_index,
- subreq->start, subreq->start + subreq->transferred - 1,
- subreq->consumed, subreq->transferred, subreq->len);
- printk("folio: %llx-%llx\n", fpos, fend - 1);
- printk("donated: prev=%zx next=%zx\n", prev_donated, next_donated);
- printk("s=%llx av=%zx part=%zx\n", start, avail, part);
- BUG();
+ _debug("retry");
+ netfs_retry_reads(rreq);
+ goto out;
}
/*
@@ -322,12 +343,13 @@ bad:
static void netfs_rreq_assess_dio(struct netfs_io_request *rreq)
{
struct netfs_io_subrequest *subreq;
+ struct netfs_io_stream *stream = &rreq->io_streams[0];
unsigned int i;
/* Collect unbuffered reads and direct reads, adding up the transfer
* sizes until we find the first short or failed subrequest.
*/
- list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
+ list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
rreq->transferred += subreq->transferred;
if (subreq->transferred < subreq->len ||
@@ -360,25 +382,67 @@ static void netfs_rreq_assess_dio(struct netfs_io_request *rreq)
}
/*
- * Assess the state of a read request and decide what to do next.
+ * Do processing after reading a monolithic single object.
+ */
+static void netfs_rreq_assess_single(struct netfs_io_request *rreq)
+{
+ struct netfs_io_stream *stream = &rreq->io_streams[0];
+
+ if (!rreq->error && stream->source == NETFS_DOWNLOAD_FROM_SERVER &&
+ fscache_resources_valid(&rreq->cache_resources)) {
+ trace_netfs_rreq(rreq, netfs_rreq_trace_dirty);
+ netfs_single_mark_inode_dirty(rreq->inode);
+ }
+
+ if (rreq->iocb) {
+ rreq->iocb->ki_pos += rreq->transferred;
+ if (rreq->iocb->ki_complete)
+ rreq->iocb->ki_complete(
+ rreq->iocb, rreq->error ? rreq->error : rreq->transferred);
+ }
+ if (rreq->netfs_ops->done)
+ rreq->netfs_ops->done(rreq);
+}
+
+/*
+ * Perform the collection of subrequests and folios.
*
* Note that we're in normal kernel thread context at this point, possibly
* running on a workqueue.
*/
-static void netfs_rreq_assess(struct netfs_io_request *rreq)
+static void netfs_read_collection(struct netfs_io_request *rreq)
{
- trace_netfs_rreq(rreq, netfs_rreq_trace_assess);
+ struct netfs_io_stream *stream = &rreq->io_streams[0];
- //netfs_rreq_is_still_valid(rreq);
+ netfs_collect_read_results(rreq);
- if (test_and_clear_bit(NETFS_RREQ_NEED_RETRY, &rreq->flags)) {
- netfs_retry_reads(rreq);
+ /* We're done when the app thread has finished posting subreqs and the
+ * queue is empty.
+ */
+ if (!test_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags))
return;
- }
+ smp_rmb(); /* Read ALL_QUEUED before subreq lists. */
+
+ if (!list_empty(&stream->subrequests))
+ return;
+
+ /* Okay, declare that all I/O is complete. */
+ rreq->transferred = stream->transferred;
+ trace_netfs_rreq(rreq, netfs_rreq_trace_complete);
- if (rreq->origin == NETFS_DIO_READ ||
- rreq->origin == NETFS_READ_GAPS)
+ //netfs_rreq_is_still_valid(rreq);
+
+ switch (rreq->origin) {
+ case NETFS_DIO_READ:
+ case NETFS_READ_GAPS:
netfs_rreq_assess_dio(rreq);
+ break;
+ case NETFS_READ_SINGLE:
+ netfs_rreq_assess_single(rreq);
+ break;
+ default:
+ break;
+ }
task_io_account_read(rreq->transferred);
trace_netfs_rreq(rreq, netfs_rreq_trace_wake_ip);
@@ -387,61 +451,67 @@ static void netfs_rreq_assess(struct netfs_io_request *rreq)
trace_netfs_rreq(rreq, netfs_rreq_trace_done);
netfs_clear_subrequests(rreq, false);
netfs_unlock_abandoned_read_pages(rreq);
- if (unlikely(test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)))
- netfs_pgpriv2_write_to_the_cache(rreq);
+ if (unlikely(rreq->copy_to_cache))
+ netfs_pgpriv2_end_copy_to_cache(rreq);
}
-void netfs_read_termination_worker(struct work_struct *work)
+void netfs_read_collection_worker(struct work_struct *work)
{
- struct netfs_io_request *rreq =
- container_of(work, struct netfs_io_request, work);
+ struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work);
+
netfs_see_request(rreq, netfs_rreq_trace_see_work);
- netfs_rreq_assess(rreq);
- netfs_put_request(rreq, false, netfs_rreq_trace_put_work_complete);
+ if (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags))
+ netfs_read_collection(rreq);
+ netfs_put_request(rreq, false, netfs_rreq_trace_put_work);
}
/*
- * Handle the completion of all outstanding I/O operations on a read request.
- * We inherit a ref from the caller.
+ * Wake the collection work item.
*/
-void netfs_rreq_terminated(struct netfs_io_request *rreq, bool was_async)
+void netfs_wake_read_collector(struct netfs_io_request *rreq)
{
- if (!was_async)
- return netfs_rreq_assess(rreq);
- if (!work_pending(&rreq->work)) {
- netfs_get_request(rreq, netfs_rreq_trace_get_work);
- if (!queue_work(system_unbound_wq, &rreq->work))
- netfs_put_request(rreq, was_async, netfs_rreq_trace_put_work_nq);
+ if (test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags) &&
+ !test_bit(NETFS_RREQ_RETRYING, &rreq->flags)) {
+ if (!work_pending(&rreq->work)) {
+ netfs_get_request(rreq, netfs_rreq_trace_get_work);
+ if (!queue_work(system_unbound_wq, &rreq->work))
+ netfs_put_request(rreq, true, netfs_rreq_trace_put_work_nq);
+ }
+ } else {
+ trace_netfs_rreq(rreq, netfs_rreq_trace_wake_queue);
+ wake_up(&rreq->waitq);
}
}
/**
* netfs_read_subreq_progress - Note progress of a read operation.
* @subreq: The read request that has terminated.
- * @was_async: True if we're in an asynchronous context.
*
* This tells the read side of netfs lib that a contributory I/O operation has
* made some progress and that it may be possible to unlock some folios.
*
* Before calling, the filesystem should update subreq->transferred to track
* the amount of data copied into the output buffer.
- *
- * If @was_async is true, the caller might be running in softirq or interrupt
- * context and we can't sleep.
*/
-void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq,
- bool was_async)
+void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq)
{
struct netfs_io_request *rreq = subreq->rreq;
+ struct netfs_io_stream *stream = &rreq->io_streams[0];
+ size_t fsize = PAGE_SIZE << rreq->front_folio_order;
trace_netfs_sreq(subreq, netfs_sreq_trace_progress);
- if (subreq->transferred > subreq->consumed &&
+ /* If we are at the head of the queue, wake up the collector,
+ * getting a ref to it if we were the ones to do so.
+ */
+ if (subreq->start + subreq->transferred > rreq->cleaned_to + fsize &&
(rreq->origin == NETFS_READAHEAD ||
rreq->origin == NETFS_READPAGE ||
- rreq->origin == NETFS_READ_FOR_WRITE)) {
- netfs_consume_read_data(subreq, was_async);
+ rreq->origin == NETFS_READ_FOR_WRITE) &&
+ list_is_first(&subreq->rreq_link, &stream->subrequests)
+ ) {
__set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
+ netfs_wake_read_collector(rreq);
}
}
EXPORT_SYMBOL(netfs_read_subreq_progress);
@@ -449,27 +519,23 @@ EXPORT_SYMBOL(netfs_read_subreq_progress);
/**
* netfs_read_subreq_terminated - Note the termination of an I/O operation.
* @subreq: The I/O request that has terminated.
- * @error: Error code indicating type of completion.
- * @was_async: The termination was asynchronous
*
* This tells the read helper that a contributory I/O operation has terminated,
* one way or another, and that it should integrate the results.
*
- * The caller indicates the outcome of the operation through @error, supplying
- * 0 to indicate a successful or retryable transfer (if NETFS_SREQ_NEED_RETRY
- * is set) or a negative error code. The helper will look after reissuing I/O
- * operations as appropriate and writing downloaded data to the cache.
+ * The caller indicates the outcome of the operation through @subreq->error,
+ * supplying 0 to indicate a successful or retryable transfer (if
+ * NETFS_SREQ_NEED_RETRY is set) or a negative error code. The helper will
+ * look after reissuing I/O operations as appropriate and writing downloaded
+ * data to the cache.
*
* Before calling, the filesystem should update subreq->transferred to track
* the amount of data copied into the output buffer.
- *
- * If @was_async is true, the caller might be running in softirq or interrupt
- * context and we can't sleep.
*/
-void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq,
- int error, bool was_async)
+void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq)
{
struct netfs_io_request *rreq = subreq->rreq;
+ struct netfs_io_stream *stream = &rreq->io_streams[0];
switch (subreq->source) {
case NETFS_READ_FROM_CACHE:
@@ -482,71 +548,159 @@ void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq,
break;
}
- if (rreq->origin != NETFS_DIO_READ) {
- /* Collect buffered reads.
- *
- * If the read completed validly short, then we can clear the
- * tail before going on to unlock the folios.
- */
- if (error == 0 && subreq->transferred < subreq->len &&
- (test_bit(NETFS_SREQ_HIT_EOF, &subreq->flags) ||
- test_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags))) {
- netfs_clear_unread(subreq);
- subreq->transferred = subreq->len;
- trace_netfs_sreq(subreq, netfs_sreq_trace_clear);
- }
- if (subreq->transferred > subreq->consumed &&
- (rreq->origin == NETFS_READAHEAD ||
- rreq->origin == NETFS_READPAGE ||
- rreq->origin == NETFS_READ_FOR_WRITE)) {
- netfs_consume_read_data(subreq, was_async);
- __set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
- }
- rreq->transferred += subreq->transferred;
- }
-
/* Deal with retry requests, short reads and errors. If we retry
* but don't make progress, we abandon the attempt.
*/
- if (!error && subreq->transferred < subreq->len) {
+ if (!subreq->error && subreq->transferred < subreq->len) {
if (test_bit(NETFS_SREQ_HIT_EOF, &subreq->flags)) {
trace_netfs_sreq(subreq, netfs_sreq_trace_hit_eof);
+ } else if (test_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags)) {
+ trace_netfs_sreq(subreq, netfs_sreq_trace_need_clear);
+ } else if (test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) {
+ trace_netfs_sreq(subreq, netfs_sreq_trace_need_retry);
+ } else if (test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags)) {
+ __set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
+ trace_netfs_sreq(subreq, netfs_sreq_trace_partial_read);
} else {
+ __set_bit(NETFS_SREQ_FAILED, &subreq->flags);
+ subreq->error = -ENODATA;
trace_netfs_sreq(subreq, netfs_sreq_trace_short);
- if (subreq->transferred > subreq->consumed) {
- /* If we didn't read new data, abandon retry. */
- if (subreq->retry_count &&
- test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags)) {
- __set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
- set_bit(NETFS_RREQ_NEED_RETRY, &rreq->flags);
- }
- } else if (test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags)) {
- __set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
- set_bit(NETFS_RREQ_NEED_RETRY, &rreq->flags);
- } else {
- __set_bit(NETFS_SREQ_FAILED, &subreq->flags);
- error = -ENODATA;
- }
}
}
- subreq->error = error;
- trace_netfs_sreq(subreq, netfs_sreq_trace_terminated);
-
- if (unlikely(error < 0)) {
- trace_netfs_failure(rreq, subreq, error, netfs_fail_read);
+ if (unlikely(subreq->error < 0)) {
+ trace_netfs_failure(rreq, subreq, subreq->error, netfs_fail_read);
if (subreq->source == NETFS_READ_FROM_CACHE) {
netfs_stat(&netfs_n_rh_read_failed);
+ __set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
} else {
netfs_stat(&netfs_n_rh_download_failed);
- set_bit(NETFS_RREQ_FAILED, &rreq->flags);
- rreq->error = subreq->error;
+ __set_bit(NETFS_SREQ_FAILED, &subreq->flags);
}
+ trace_netfs_rreq(rreq, netfs_rreq_trace_set_pause);
+ set_bit(NETFS_RREQ_PAUSE, &rreq->flags);
}
- if (atomic_dec_and_test(&rreq->nr_outstanding))
- netfs_rreq_terminated(rreq, was_async);
+ trace_netfs_sreq(subreq, netfs_sreq_trace_terminated);
+
+ clear_bit_unlock(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
+ smp_mb__after_atomic(); /* Clear IN_PROGRESS before task state */
- netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated);
+ /* If we are at the head of the queue, wake up the collector. */
+ if (list_is_first(&subreq->rreq_link, &stream->subrequests) ||
+ test_bit(NETFS_RREQ_RETRYING, &rreq->flags))
+ netfs_wake_read_collector(rreq);
+
+ netfs_put_subrequest(subreq, true, netfs_sreq_trace_put_terminated);
}
EXPORT_SYMBOL(netfs_read_subreq_terminated);
+
+/*
+ * Handle termination of a read from the cache.
+ */
+void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, bool was_async)
+{
+ struct netfs_io_subrequest *subreq = priv;
+
+ if (transferred_or_error > 0) {
+ subreq->error = 0;
+ if (transferred_or_error > 0) {
+ subreq->transferred += transferred_or_error;
+ __set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
+ }
+ } else {
+ subreq->error = transferred_or_error;
+ }
+ netfs_read_subreq_terminated(subreq);
+}
+
+/*
+ * Wait for the read operation to complete, successfully or otherwise.
+ */
+ssize_t netfs_wait_for_read(struct netfs_io_request *rreq)
+{
+ struct netfs_io_subrequest *subreq;
+ struct netfs_io_stream *stream = &rreq->io_streams[0];
+ DEFINE_WAIT(myself);
+ ssize_t ret;
+
+ for (;;) {
+ trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue);
+ prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE);
+
+ subreq = list_first_entry_or_null(&stream->subrequests,
+ struct netfs_io_subrequest, rreq_link);
+ if (subreq &&
+ (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags) ||
+ test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags))) {
+ __set_current_state(TASK_RUNNING);
+ netfs_read_collection(rreq);
+ continue;
+ }
+
+ if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags))
+ break;
+
+ schedule();
+ trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue);
+ }
+
+ finish_wait(&rreq->waitq, &myself);
+
+ ret = rreq->error;
+ if (ret == 0) {
+ ret = rreq->transferred;
+ switch (rreq->origin) {
+ case NETFS_DIO_READ:
+ case NETFS_READ_SINGLE:
+ ret = rreq->transferred;
+ break;
+ default:
+ if (rreq->submitted < rreq->len) {
+ trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read);
+ ret = -EIO;
+ }
+ break;
+ }
+ }
+
+ return ret;
+}
+
+/*
+ * Wait for a paused read operation to unpause or complete in some manner.
+ */
+void netfs_wait_for_pause(struct netfs_io_request *rreq)
+{
+ struct netfs_io_subrequest *subreq;
+ struct netfs_io_stream *stream = &rreq->io_streams[0];
+ DEFINE_WAIT(myself);
+
+ trace_netfs_rreq(rreq, netfs_rreq_trace_wait_pause);
+
+ for (;;) {
+ trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue);
+ prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE);
+
+ if (!test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) {
+ subreq = list_first_entry_or_null(&stream->subrequests,
+ struct netfs_io_subrequest, rreq_link);
+ if (subreq &&
+ (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags) ||
+ test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags))) {
+ __set_current_state(TASK_RUNNING);
+ netfs_read_collection(rreq);
+ continue;
+ }
+ }
+
+ if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags) ||
+ !test_bit(NETFS_RREQ_PAUSE, &rreq->flags))
+ break;
+
+ schedule();
+ trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue);
+ }
+
+ finish_wait(&rreq->waitq, &myself);
+}
diff --git a/fs/netfs/read_pgpriv2.c b/fs/netfs/read_pgpriv2.c
index e72f5e674834..cf7727060215 100644
--- a/fs/netfs/read_pgpriv2.c
+++ b/fs/netfs/read_pgpriv2.c
@@ -14,52 +14,11 @@
#include "internal.h"
/*
- * [DEPRECATED] Mark page as requiring copy-to-cache using PG_private_2. The
- * third mark in the folio queue is used to indicate that this folio needs
- * writing.
- */
-void netfs_pgpriv2_mark_copy_to_cache(struct netfs_io_subrequest *subreq,
- struct netfs_io_request *rreq,
- struct folio_queue *folioq,
- int slot)
-{
- struct folio *folio = folioq_folio(folioq, slot);
-
- trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache);
- folio_start_private_2(folio);
- folioq_mark3(folioq, slot);
-}
-
-/*
- * [DEPRECATED] Cancel PG_private_2 on all marked folios in the event of an
- * unrecoverable error.
- */
-static void netfs_pgpriv2_cancel(struct folio_queue *folioq)
-{
- struct folio *folio;
- int slot;
-
- while (folioq) {
- if (!folioq->marks3) {
- folioq = folioq->next;
- continue;
- }
-
- slot = __ffs(folioq->marks3);
- folio = folioq_folio(folioq, slot);
-
- trace_netfs_folio(folio, netfs_folio_trace_cancel_copy);
- folio_end_private_2(folio);
- folioq_unmark3(folioq, slot);
- }
-}
-
-/*
* [DEPRECATED] Copy a folio to the cache with PG_private_2 set.
*/
-static int netfs_pgpriv2_copy_folio(struct netfs_io_request *wreq, struct folio *folio)
+static void netfs_pgpriv2_copy_folio(struct netfs_io_request *creq, struct folio *folio)
{
- struct netfs_io_stream *cache = &wreq->io_streams[1];
+ struct netfs_io_stream *cache = &creq->io_streams[1];
size_t fsize = folio_size(folio), flen = fsize;
loff_t fpos = folio_pos(folio), i_size;
bool to_eof = false;
@@ -70,17 +29,17 @@ static int netfs_pgpriv2_copy_folio(struct netfs_io_request *wreq, struct folio
* of the page to beyond it, but cannot move i_size into or through the
* page since we have it locked.
*/
- i_size = i_size_read(wreq->inode);
+ i_size = i_size_read(creq->inode);
if (fpos >= i_size) {
/* mmap beyond eof. */
_debug("beyond eof");
folio_end_private_2(folio);
- return 0;
+ return;
}
- if (fpos + fsize > wreq->i_size)
- wreq->i_size = i_size;
+ if (fpos + fsize > creq->i_size)
+ creq->i_size = i_size;
if (flen > i_size - fpos) {
flen = i_size - fpos;
@@ -94,8 +53,10 @@ static int netfs_pgpriv2_copy_folio(struct netfs_io_request *wreq, struct folio
trace_netfs_folio(folio, netfs_folio_trace_store_copy);
/* Attach the folio to the rolling buffer. */
- if (netfs_buffer_append_folio(wreq, folio, false) < 0)
- return -ENOMEM;
+ if (rolling_buffer_append(&creq->buffer, folio, 0) < 0) {
+ clear_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &creq->flags);
+ return;
+ }
cache->submit_extendable_to = fsize;
cache->submit_off = 0;
@@ -109,11 +70,11 @@ static int netfs_pgpriv2_copy_folio(struct netfs_io_request *wreq, struct folio
do {
ssize_t part;
- wreq->io_iter.iov_offset = cache->submit_off;
+ creq->buffer.iter.iov_offset = cache->submit_off;
- atomic64_set(&wreq->issued_to, fpos + cache->submit_off);
+ atomic64_set(&creq->issued_to, fpos + cache->submit_off);
cache->submit_extendable_to = fsize - cache->submit_off;
- part = netfs_advance_write(wreq, cache, fpos + cache->submit_off,
+ part = netfs_advance_write(creq, cache, fpos + cache->submit_off,
cache->submit_len, to_eof);
cache->submit_off += part;
if (part > cache->submit_len)
@@ -122,99 +83,95 @@ static int netfs_pgpriv2_copy_folio(struct netfs_io_request *wreq, struct folio
cache->submit_len -= part;
} while (cache->submit_len > 0);
- wreq->io_iter.iov_offset = 0;
- iov_iter_advance(&wreq->io_iter, fsize);
- atomic64_set(&wreq->issued_to, fpos + fsize);
+ creq->buffer.iter.iov_offset = 0;
+ rolling_buffer_advance(&creq->buffer, fsize);
+ atomic64_set(&creq->issued_to, fpos + fsize);
if (flen < fsize)
- netfs_issue_write(wreq, cache);
-
- _leave(" = 0");
- return 0;
+ netfs_issue_write(creq, cache);
}
/*
- * [DEPRECATED] Go through the buffer and write any folios that are marked with
- * the third mark to the cache.
+ * [DEPRECATED] Set up copying to the cache.
*/
-void netfs_pgpriv2_write_to_the_cache(struct netfs_io_request *rreq)
+static struct netfs_io_request *netfs_pgpriv2_begin_copy_to_cache(
+ struct netfs_io_request *rreq, struct folio *folio)
{
- struct netfs_io_request *wreq;
- struct folio_queue *folioq;
- struct folio *folio;
- int error = 0;
- int slot = 0;
-
- _enter("");
+ struct netfs_io_request *creq;
if (!fscache_resources_valid(&rreq->cache_resources))
- goto couldnt_start;
+ goto cancel;
- /* Need the first folio to be able to set up the op. */
- for (folioq = rreq->buffer; folioq; folioq = folioq->next) {
- if (folioq->marks3) {
- slot = __ffs(folioq->marks3);
- break;
- }
- }
- if (!folioq)
- return;
- folio = folioq_folio(folioq, slot);
-
- wreq = netfs_create_write_req(rreq->mapping, NULL, folio_pos(folio),
+ creq = netfs_create_write_req(rreq->mapping, NULL, folio_pos(folio),
NETFS_PGPRIV2_COPY_TO_CACHE);
- if (IS_ERR(wreq)) {
- kleave(" [create %ld]", PTR_ERR(wreq));
- goto couldnt_start;
- }
+ if (IS_ERR(creq))
+ goto cancel;
- trace_netfs_write(wreq, netfs_write_trace_copy_to_cache);
+ if (!creq->io_streams[1].avail)
+ goto cancel_put;
+
+ trace_netfs_write(creq, netfs_write_trace_copy_to_cache);
netfs_stat(&netfs_n_wh_copy_to_cache);
- if (!wreq->io_streams[1].avail) {
- netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
- goto couldnt_start;
- }
+ rreq->copy_to_cache = creq;
+ return creq;
+
+cancel_put:
+ netfs_put_request(creq, false, netfs_rreq_trace_put_return);
+cancel:
+ rreq->copy_to_cache = ERR_PTR(-ENOBUFS);
+ clear_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags);
+ return ERR_PTR(-ENOBUFS);
+}
- for (;;) {
- error = netfs_pgpriv2_copy_folio(wreq, folio);
- if (error < 0)
- break;
+/*
+ * [DEPRECATED] Mark page as requiring copy-to-cache using PG_private_2 and add
+ * it to the copy write request.
+ */
+void netfs_pgpriv2_copy_to_cache(struct netfs_io_request *rreq, struct folio *folio)
+{
+ struct netfs_io_request *creq = rreq->copy_to_cache;
- folioq_unmark3(folioq, slot);
- while (!folioq->marks3) {
- folioq = folioq->next;
- if (!folioq)
- goto end_of_queue;
- }
+ if (!creq)
+ creq = netfs_pgpriv2_begin_copy_to_cache(rreq, folio);
+ if (IS_ERR(creq))
+ return;
- slot = __ffs(folioq->marks3);
- folio = folioq_folio(folioq, slot);
- }
+ trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache);
+ folio_start_private_2(folio);
+ netfs_pgpriv2_copy_folio(creq, folio);
+}
-end_of_queue:
- netfs_issue_write(wreq, &wreq->io_streams[1]);
+/*
+ * [DEPRECATED] End writing to the cache, flushing out any outstanding writes.
+ */
+void netfs_pgpriv2_end_copy_to_cache(struct netfs_io_request *rreq)
+{
+ struct netfs_io_request *creq = rreq->copy_to_cache;
+
+ if (IS_ERR_OR_NULL(creq))
+ return;
+
+ netfs_issue_write(creq, &creq->io_streams[1]);
smp_wmb(); /* Write lists before ALL_QUEUED. */
- set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags);
+ set_bit(NETFS_RREQ_ALL_QUEUED, &creq->flags);
- netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
- _leave(" = %d", error);
-couldnt_start:
- netfs_pgpriv2_cancel(rreq->buffer);
+ netfs_put_request(creq, false, netfs_rreq_trace_put_return);
+ creq->copy_to_cache = NULL;
}
/*
* [DEPRECATED] Remove the PG_private_2 mark from any folios we've finished
* copying.
*/
-bool netfs_pgpriv2_unlock_copied_folios(struct netfs_io_request *wreq)
+bool netfs_pgpriv2_unlock_copied_folios(struct netfs_io_request *creq)
{
- struct folio_queue *folioq = wreq->buffer;
- unsigned long long collected_to = wreq->collected_to;
- unsigned int slot = wreq->buffer_head_slot;
+ struct folio_queue *folioq = creq->buffer.tail;
+ unsigned long long collected_to = creq->collected_to;
+ unsigned int slot = creq->buffer.first_tail_slot;
bool made_progress = false;
if (slot >= folioq_nr_slots(folioq)) {
- folioq = netfs_delete_buffer_head(wreq);
+ folioq = rolling_buffer_delete_spent(&creq->buffer);
slot = 0;
}
@@ -226,16 +183,16 @@ bool netfs_pgpriv2_unlock_copied_folios(struct netfs_io_request *wreq)
folio = folioq_folio(folioq, slot);
if (WARN_ONCE(!folio_test_private_2(folio),
"R=%08x: folio %lx is not marked private_2\n",
- wreq->debug_id, folio->index))
+ creq->debug_id, folio->index))
trace_netfs_folio(folio, netfs_folio_trace_not_under_wback);
fpos = folio_pos(folio);
fsize = folio_size(folio);
flen = fsize;
- fend = min_t(unsigned long long, fpos + flen, wreq->i_size);
+ fend = min_t(unsigned long long, fpos + flen, creq->i_size);
- trace_netfs_collect_folio(wreq, folio, fend, collected_to);
+ trace_netfs_collect_folio(creq, folio, fend, collected_to);
/* Unlock any folio we've transferred all of. */
if (collected_to < fend)
@@ -243,7 +200,7 @@ bool netfs_pgpriv2_unlock_copied_folios(struct netfs_io_request *wreq)
trace_netfs_folio(folio, netfs_folio_trace_end_copy);
folio_end_private_2(folio);
- wreq->cleaned_to = fpos + fsize;
+ creq->cleaned_to = fpos + fsize;
made_progress = true;
/* Clean up the head folioq. If we clear an entire folioq, then
@@ -253,9 +210,9 @@ bool netfs_pgpriv2_unlock_copied_folios(struct netfs_io_request *wreq)
folioq_clear(folioq, slot);
slot++;
if (slot >= folioq_nr_slots(folioq)) {
- if (READ_ONCE(wreq->buffer_tail) == folioq)
- break;
- folioq = netfs_delete_buffer_head(wreq);
+ folioq = rolling_buffer_delete_spent(&creq->buffer);
+ if (!folioq)
+ goto done;
slot = 0;
}
@@ -263,7 +220,8 @@ bool netfs_pgpriv2_unlock_copied_folios(struct netfs_io_request *wreq)
break;
}
- wreq->buffer = folioq;
- wreq->buffer_head_slot = slot;
+ creq->buffer.tail = folioq;
+done:
+ creq->buffer.first_tail_slot = slot;
return made_progress;
}
diff --git a/fs/netfs/read_retry.c b/fs/netfs/read_retry.c
index 16b676c68dcd..0f294b26e08c 100644
--- a/fs/netfs/read_retry.c
+++ b/fs/netfs/read_retry.c
@@ -12,17 +12,9 @@
static void netfs_reissue_read(struct netfs_io_request *rreq,
struct netfs_io_subrequest *subreq)
{
- struct iov_iter *io_iter = &subreq->io_iter;
-
- if (iov_iter_is_folioq(io_iter)) {
- subreq->curr_folioq = (struct folio_queue *)io_iter->folioq;
- subreq->curr_folioq_slot = io_iter->folioq_slot;
- subreq->curr_folio_order = subreq->curr_folioq->orders[subreq->curr_folioq_slot];
- }
-
- atomic_inc(&rreq->nr_outstanding);
+ __clear_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
__set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
- netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
+ netfs_stat(&netfs_n_rh_retry_read_subreq);
subreq->rreq->netfs_ops->issue_read(subreq);
}
@@ -33,13 +25,12 @@ static void netfs_reissue_read(struct netfs_io_request *rreq,
static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
{
struct netfs_io_subrequest *subreq;
- struct netfs_io_stream *stream0 = &rreq->io_streams[0];
- LIST_HEAD(sublist);
- LIST_HEAD(queue);
+ struct netfs_io_stream *stream = &rreq->io_streams[0];
+ struct list_head *next;
_enter("R=%x", rreq->debug_id);
- if (list_empty(&rreq->subrequests))
+ if (list_empty(&stream->subrequests))
return;
if (rreq->netfs_ops->retry_request)
@@ -50,15 +41,14 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
*/
if (!rreq->netfs_ops->prepare_read &&
!rreq->cache_resources.ops) {
- struct netfs_io_subrequest *subreq;
-
- list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
+ list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
if (test_bit(NETFS_SREQ_FAILED, &subreq->flags))
break;
if (__test_and_clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) {
__clear_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
subreq->retry_count++;
netfs_reset_iter(subreq);
+ netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
netfs_reissue_read(rreq, subreq);
}
}
@@ -75,48 +65,44 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
* populating with smaller subrequests. In the event that the subreq
* we just launched finishes before we insert the next subreq, it'll
* fill in rreq->prev_donated instead.
-
+ *
* Note: Alternatively, we could split the tail subrequest right before
* we reissue it and fix up the donations under lock.
*/
- list_splice_init(&rreq->subrequests, &queue);
+ next = stream->subrequests.next;
do {
- struct netfs_io_subrequest *from;
+ struct netfs_io_subrequest *from, *to, *tmp;
struct iov_iter source;
unsigned long long start, len;
- size_t part, deferred_next_donated = 0;
- bool boundary = false;
+ size_t part;
+ bool boundary = false, subreq_superfluous = false;
/* Go through the subreqs and find the next span of contiguous
* buffer that we then rejig (cifs, for example, needs the
* rsize renegotiating) and reissue.
*/
- from = list_first_entry(&queue, struct netfs_io_subrequest, rreq_link);
- list_move_tail(&from->rreq_link, &sublist);
+ from = list_entry(next, struct netfs_io_subrequest, rreq_link);
+ to = from;
start = from->start + from->transferred;
len = from->len - from->transferred;
- _debug("from R=%08x[%x] s=%llx ctl=%zx/%zx/%zx",
+ _debug("from R=%08x[%x] s=%llx ctl=%zx/%zx",
rreq->debug_id, from->debug_index,
- from->start, from->consumed, from->transferred, from->len);
+ from->start, from->transferred, from->len);
if (test_bit(NETFS_SREQ_FAILED, &from->flags) ||
!test_bit(NETFS_SREQ_NEED_RETRY, &from->flags))
goto abandon;
- deferred_next_donated = from->next_donated;
- while ((subreq = list_first_entry_or_null(
- &queue, struct netfs_io_subrequest, rreq_link))) {
- if (subreq->start != start + len ||
- subreq->transferred > 0 ||
+ list_for_each_continue(next, &stream->subrequests) {
+ subreq = list_entry(next, struct netfs_io_subrequest, rreq_link);
+ if (subreq->start + subreq->transferred != start + len ||
+ test_bit(NETFS_SREQ_BOUNDARY, &subreq->flags) ||
!test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags))
break;
- list_move_tail(&subreq->rreq_link, &sublist);
- len += subreq->len;
- deferred_next_donated = subreq->next_donated;
- if (test_bit(NETFS_SREQ_BOUNDARY, &subreq->flags))
- break;
+ to = subreq;
+ len += to->len;
}
_debug(" - range: %llx-%llx %llx", start, start + len - 1, len);
@@ -129,38 +115,33 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
source.count = len;
/* Work through the sublist. */
- while ((subreq = list_first_entry_or_null(
- &sublist, struct netfs_io_subrequest, rreq_link))) {
- list_del(&subreq->rreq_link);
-
+ subreq = from;
+ list_for_each_entry_from(subreq, &stream->subrequests, rreq_link) {
+ if (!len) {
+ subreq_superfluous = true;
+ break;
+ }
subreq->source = NETFS_DOWNLOAD_FROM_SERVER;
subreq->start = start - subreq->transferred;
subreq->len = len + subreq->transferred;
- stream0->sreq_max_len = subreq->len;
-
__clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
__clear_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
subreq->retry_count++;
- spin_lock_bh(&rreq->lock);
- list_add_tail(&subreq->rreq_link, &rreq->subrequests);
- subreq->prev_donated += rreq->prev_donated;
- rreq->prev_donated = 0;
trace_netfs_sreq(subreq, netfs_sreq_trace_retry);
- spin_unlock_bh(&rreq->lock);
-
- BUG_ON(!len);
/* Renegotiate max_len (rsize) */
+ stream->sreq_max_len = subreq->len;
if (rreq->netfs_ops->prepare_read &&
rreq->netfs_ops->prepare_read(subreq) < 0) {
trace_netfs_sreq(subreq, netfs_sreq_trace_reprep_failed);
__set_bit(NETFS_SREQ_FAILED, &subreq->flags);
+ goto abandon;
}
- part = umin(len, stream0->sreq_max_len);
- if (unlikely(rreq->io_streams[0].sreq_max_segs))
- part = netfs_limit_iter(&source, 0, part, stream0->sreq_max_segs);
+ part = umin(len, stream->sreq_max_len);
+ if (unlikely(stream->sreq_max_segs))
+ part = netfs_limit_iter(&source, 0, part, stream->sreq_max_segs);
subreq->len = subreq->transferred + part;
subreq->io_iter = source;
iov_iter_truncate(&subreq->io_iter, part);
@@ -170,57 +151,105 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
if (!len) {
if (boundary)
__set_bit(NETFS_SREQ_BOUNDARY, &subreq->flags);
- subreq->next_donated = deferred_next_donated;
} else {
__clear_bit(NETFS_SREQ_BOUNDARY, &subreq->flags);
- subreq->next_donated = 0;
}
+ netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
netfs_reissue_read(rreq, subreq);
- if (!len)
+ if (subreq == to) {
+ subreq_superfluous = false;
break;
-
- /* If we ran out of subrequests, allocate another. */
- if (list_empty(&sublist)) {
- subreq = netfs_alloc_subrequest(rreq);
- if (!subreq)
- goto abandon;
- subreq->source = NETFS_DOWNLOAD_FROM_SERVER;
- subreq->start = start;
-
- /* We get two refs, but need just one. */
- netfs_put_subrequest(subreq, false, netfs_sreq_trace_new);
- trace_netfs_sreq(subreq, netfs_sreq_trace_split);
- list_add_tail(&subreq->rreq_link, &sublist);
}
}
/* If we managed to use fewer subreqs, we can discard the
- * excess.
+ * excess; if we used the same number, then we're done.
*/
- while ((subreq = list_first_entry_or_null(
- &sublist, struct netfs_io_subrequest, rreq_link))) {
- trace_netfs_sreq(subreq, netfs_sreq_trace_discard);
- list_del(&subreq->rreq_link);
- netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done);
+ if (!len) {
+ if (!subreq_superfluous)
+ continue;
+ list_for_each_entry_safe_from(subreq, tmp,
+ &stream->subrequests, rreq_link) {
+ trace_netfs_sreq(subreq, netfs_sreq_trace_superfluous);
+ list_del(&subreq->rreq_link);
+ netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done);
+ if (subreq == to)
+ break;
+ }
+ continue;
}
- } while (!list_empty(&queue));
+ /* We ran out of subrequests, so we need to allocate some more
+ * and insert them after.
+ */
+ do {
+ subreq = netfs_alloc_subrequest(rreq);
+ if (!subreq) {
+ subreq = to;
+ goto abandon_after;
+ }
+ subreq->source = NETFS_DOWNLOAD_FROM_SERVER;
+ subreq->start = start;
+ subreq->len = len;
+ subreq->stream_nr = stream->stream_nr;
+ subreq->retry_count = 1;
+
+ trace_netfs_sreq_ref(rreq->debug_id, subreq->debug_index,
+ refcount_read(&subreq->ref),
+ netfs_sreq_trace_new);
+
+ list_add(&subreq->rreq_link, &to->rreq_link);
+ to = list_next_entry(to, rreq_link);
+ trace_netfs_sreq(subreq, netfs_sreq_trace_retry);
+
+ stream->sreq_max_len = umin(len, rreq->rsize);
+ stream->sreq_max_segs = 0;
+ if (unlikely(stream->sreq_max_segs))
+ part = netfs_limit_iter(&source, 0, part, stream->sreq_max_segs);
+
+ netfs_stat(&netfs_n_rh_download);
+ if (rreq->netfs_ops->prepare_read(subreq) < 0) {
+ trace_netfs_sreq(subreq, netfs_sreq_trace_reprep_failed);
+ __set_bit(NETFS_SREQ_FAILED, &subreq->flags);
+ goto abandon;
+ }
+
+ part = umin(len, stream->sreq_max_len);
+ subreq->len = subreq->transferred + part;
+ subreq->io_iter = source;
+ iov_iter_truncate(&subreq->io_iter, part);
+ iov_iter_advance(&source, part);
+
+ len -= part;
+ start += part;
+ if (!len && boundary) {
+ __set_bit(NETFS_SREQ_BOUNDARY, &to->flags);
+ boundary = false;
+ }
+
+ netfs_reissue_read(rreq, subreq);
+ } while (len);
+
+ } while (!list_is_head(next, &stream->subrequests));
return;
- /* If we hit ENOMEM, fail all remaining subrequests */
+ /* If we hit an error, fail all remaining incomplete subrequests */
+abandon_after:
+ if (list_is_last(&subreq->rreq_link, &stream->subrequests))
+ return;
+ subreq = list_next_entry(subreq, rreq_link);
abandon:
- list_splice_init(&sublist, &queue);
- list_for_each_entry(subreq, &queue, rreq_link) {
- if (!subreq->error)
- subreq->error = -ENOMEM;
- __clear_bit(NETFS_SREQ_FAILED, &subreq->flags);
+ list_for_each_entry_from(subreq, &stream->subrequests, rreq_link) {
+ if (!subreq->error &&
+ !test_bit(NETFS_SREQ_FAILED, &subreq->flags) &&
+ !test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags))
+ continue;
+ subreq->error = -ENOMEM;
+ __set_bit(NETFS_SREQ_FAILED, &subreq->flags);
__clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
}
- spin_lock_bh(&rreq->lock);
- list_splice_tail_init(&queue, &rreq->subrequests);
- spin_unlock_bh(&rreq->lock);
}
/*
@@ -228,14 +257,39 @@ abandon:
*/
void netfs_retry_reads(struct netfs_io_request *rreq)
{
- trace_netfs_rreq(rreq, netfs_rreq_trace_resubmit);
+ struct netfs_io_subrequest *subreq;
+ struct netfs_io_stream *stream = &rreq->io_streams[0];
+ DEFINE_WAIT(myself);
- atomic_inc(&rreq->nr_outstanding);
+ netfs_stat(&netfs_n_rh_retry_read_req);
- netfs_retry_read_subrequests(rreq);
+ set_bit(NETFS_RREQ_RETRYING, &rreq->flags);
+
+ /* Wait for all outstanding I/O to quiesce before performing retries as
+ * we may need to renegotiate the I/O sizes.
+ */
+ list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
+ if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags))
+ continue;
+
+ trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue);
+ for (;;) {
+ prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE);
+
+ if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags))
+ break;
- if (atomic_dec_and_test(&rreq->nr_outstanding))
- netfs_rreq_terminated(rreq, false);
+ trace_netfs_sreq(subreq, netfs_sreq_trace_wait_for);
+ schedule();
+ trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue);
+ }
+
+ finish_wait(&rreq->waitq, &myself);
+ }
+ clear_bit(NETFS_RREQ_RETRYING, &rreq->flags);
+
+ trace_netfs_rreq(rreq, netfs_rreq_trace_resubmit);
+ netfs_retry_read_subrequests(rreq);
}
/*
@@ -246,7 +300,7 @@ void netfs_unlock_abandoned_read_pages(struct netfs_io_request *rreq)
{
struct folio_queue *p;
- for (p = rreq->buffer; p; p = p->next) {
+ for (p = rreq->buffer.tail; p; p = p->next) {
for (int slot = 0; slot < folioq_count(p); slot++) {
struct folio *folio = folioq_folio(p, slot);
diff --git a/fs/netfs/read_single.c b/fs/netfs/read_single.c
new file mode 100644
index 000000000000..fea0ecdecc53
--- /dev/null
+++ b/fs/netfs/read_single.c
@@ -0,0 +1,195 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Single, monolithic object support (e.g. AFS directory).
+ *
+ * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/export.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/uio.h>
+#include <linux/sched/mm.h>
+#include <linux/task_io_accounting_ops.h>
+#include <linux/netfs.h>
+#include "internal.h"
+
+/**
+ * netfs_single_mark_inode_dirty - Mark a single, monolithic object inode dirty
+ * @inode: The inode to mark
+ *
+ * Mark an inode that contains a single, monolithic object as dirty so that its
+ * writepages op will get called. If set, the SINGLE_NO_UPLOAD flag indicates
+ * that the object will only be written to the cache and not uploaded (e.g. AFS
+ * directory contents).
+ */
+void netfs_single_mark_inode_dirty(struct inode *inode)
+{
+ struct netfs_inode *ictx = netfs_inode(inode);
+ bool cache_only = test_bit(NETFS_ICTX_SINGLE_NO_UPLOAD, &ictx->flags);
+ bool caching = fscache_cookie_enabled(netfs_i_cookie(netfs_inode(inode)));
+
+ if (cache_only && !caching)
+ return;
+
+ mark_inode_dirty(inode);
+
+ if (caching && !(inode->i_state & I_PINNING_NETFS_WB)) {
+ bool need_use = false;
+
+ spin_lock(&inode->i_lock);
+ if (!(inode->i_state & I_PINNING_NETFS_WB)) {
+ inode->i_state |= I_PINNING_NETFS_WB;
+ need_use = true;
+ }
+ spin_unlock(&inode->i_lock);
+
+ if (need_use)
+ fscache_use_cookie(netfs_i_cookie(ictx), true);
+ }
+
+}
+EXPORT_SYMBOL(netfs_single_mark_inode_dirty);
+
+static int netfs_single_begin_cache_read(struct netfs_io_request *rreq, struct netfs_inode *ctx)
+{
+ return fscache_begin_read_operation(&rreq->cache_resources, netfs_i_cookie(ctx));
+}
+
+static void netfs_single_cache_prepare_read(struct netfs_io_request *rreq,
+ struct netfs_io_subrequest *subreq)
+{
+ struct netfs_cache_resources *cres = &rreq->cache_resources;
+
+ if (!cres->ops) {
+ subreq->source = NETFS_DOWNLOAD_FROM_SERVER;
+ return;
+ }
+ subreq->source = cres->ops->prepare_read(subreq, rreq->i_size);
+ trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
+
+}
+
+static void netfs_single_read_cache(struct netfs_io_request *rreq,
+ struct netfs_io_subrequest *subreq)
+{
+ struct netfs_cache_resources *cres = &rreq->cache_resources;
+
+ _enter("R=%08x[%x]", rreq->debug_id, subreq->debug_index);
+ netfs_stat(&netfs_n_rh_read);
+ cres->ops->read(cres, subreq->start, &subreq->io_iter, NETFS_READ_HOLE_FAIL,
+ netfs_cache_read_terminated, subreq);
+}
+
+/*
+ * Perform a read to a buffer from the cache or the server. Only a single
+ * subreq is permitted as the object must be fetched in a single transaction.
+ */
+static int netfs_single_dispatch_read(struct netfs_io_request *rreq)
+{
+ struct netfs_io_stream *stream = &rreq->io_streams[0];
+ struct netfs_io_subrequest *subreq;
+ int ret = 0;
+
+ subreq = netfs_alloc_subrequest(rreq);
+ if (!subreq)
+ return -ENOMEM;
+
+ subreq->source = NETFS_SOURCE_UNKNOWN;
+ subreq->start = 0;
+ subreq->len = rreq->len;
+ subreq->io_iter = rreq->buffer.iter;
+
+ __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
+
+ spin_lock(&rreq->lock);
+ list_add_tail(&subreq->rreq_link, &stream->subrequests);
+ trace_netfs_sreq(subreq, netfs_sreq_trace_added);
+ stream->front = subreq;
+ /* Store list pointers before active flag */
+ smp_store_release(&stream->active, true);
+ spin_unlock(&rreq->lock);
+
+ netfs_single_cache_prepare_read(rreq, subreq);
+ switch (subreq->source) {
+ case NETFS_DOWNLOAD_FROM_SERVER:
+ netfs_stat(&netfs_n_rh_download);
+ if (rreq->netfs_ops->prepare_read) {
+ ret = rreq->netfs_ops->prepare_read(subreq);
+ if (ret < 0)
+ goto cancel;
+ }
+
+ rreq->netfs_ops->issue_read(subreq);
+ rreq->submitted += subreq->len;
+ break;
+ case NETFS_READ_FROM_CACHE:
+ trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
+ netfs_single_read_cache(rreq, subreq);
+ rreq->submitted += subreq->len;
+ ret = 0;
+ break;
+ default:
+ pr_warn("Unexpected single-read source %u\n", subreq->source);
+ WARN_ON_ONCE(true);
+ ret = -EIO;
+ break;
+ }
+
+ smp_wmb(); /* Write lists before ALL_QUEUED. */
+ set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
+ return ret;
+cancel:
+ netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel);
+ return ret;
+}
+
+/**
+ * netfs_read_single - Synchronously read a single blob of pages.
+ * @inode: The inode to read from.
+ * @file: The file we're using to read or NULL.
+ * @iter: The buffer we're reading into.
+ *
+ * Fulfil a read request for a single monolithic object by drawing data from
+ * the cache if possible, or the netfs if not. The buffer may be larger than
+ * the file content; unused beyond the EOF will be zero-filled. The content
+ * will be read with a single I/O request (though this may be retried).
+ *
+ * The calling netfs must initialise a netfs context contiguous to the vfs
+ * inode before calling this.
+ *
+ * This is usable whether or not caching is enabled. If caching is enabled,
+ * the data will be stored as a single object into the cache.
+ */
+ssize_t netfs_read_single(struct inode *inode, struct file *file, struct iov_iter *iter)
+{
+ struct netfs_io_request *rreq;
+ struct netfs_inode *ictx = netfs_inode(inode);
+ ssize_t ret;
+
+ rreq = netfs_alloc_request(inode->i_mapping, file, 0, iov_iter_count(iter),
+ NETFS_READ_SINGLE);
+ if (IS_ERR(rreq))
+ return PTR_ERR(rreq);
+
+ ret = netfs_single_begin_cache_read(rreq, ictx);
+ if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
+ goto cleanup_free;
+
+ netfs_stat(&netfs_n_rh_read_single);
+ trace_netfs_read(rreq, 0, rreq->len, netfs_read_trace_read_single);
+
+ rreq->buffer.iter = *iter;
+ netfs_single_dispatch_read(rreq);
+
+ ret = netfs_wait_for_read(rreq);
+ netfs_put_request(rreq, true, netfs_rreq_trace_put_return);
+ return ret;
+
+cleanup_free:
+ netfs_put_request(rreq, false, netfs_rreq_trace_put_failed);
+ return ret;
+}
+EXPORT_SYMBOL(netfs_read_single);
diff --git a/fs/netfs/rolling_buffer.c b/fs/netfs/rolling_buffer.c
new file mode 100644
index 000000000000..207b6a326651
--- /dev/null
+++ b/fs/netfs/rolling_buffer.c
@@ -0,0 +1,222 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Rolling buffer helpers
+ *
+ * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/bitops.h>
+#include <linux/pagemap.h>
+#include <linux/rolling_buffer.h>
+#include <linux/slab.h>
+#include "internal.h"
+
+static atomic_t debug_ids;
+
+/**
+ * netfs_folioq_alloc - Allocate a folio_queue struct
+ * @rreq_id: Associated debugging ID for tracing purposes
+ * @gfp: Allocation constraints
+ * @trace: Trace tag to indicate the purpose of the allocation
+ *
+ * Allocate, initialise and account the folio_queue struct and log a trace line
+ * to mark the allocation.
+ */
+struct folio_queue *netfs_folioq_alloc(unsigned int rreq_id, gfp_t gfp,
+ unsigned int /*enum netfs_folioq_trace*/ trace)
+{
+ struct folio_queue *fq;
+
+ fq = kmalloc(sizeof(*fq), gfp);
+ if (fq) {
+ netfs_stat(&netfs_n_folioq);
+ folioq_init(fq, rreq_id);
+ fq->debug_id = atomic_inc_return(&debug_ids);
+ trace_netfs_folioq(fq, trace);
+ }
+ return fq;
+}
+EXPORT_SYMBOL(netfs_folioq_alloc);
+
+/**
+ * netfs_folioq_free - Free a folio_queue struct
+ * @folioq: The object to free
+ * @trace: Trace tag to indicate which free
+ *
+ * Free and unaccount the folio_queue struct.
+ */
+void netfs_folioq_free(struct folio_queue *folioq,
+ unsigned int /*enum netfs_trace_folioq*/ trace)
+{
+ trace_netfs_folioq(folioq, trace);
+ netfs_stat_d(&netfs_n_folioq);
+ kfree(folioq);
+}
+EXPORT_SYMBOL(netfs_folioq_free);
+
+/*
+ * Initialise a rolling buffer. We allocate an empty folio queue struct to so
+ * that the pointers can be independently driven by the producer and the
+ * consumer.
+ */
+int rolling_buffer_init(struct rolling_buffer *roll, unsigned int rreq_id,
+ unsigned int direction)
+{
+ struct folio_queue *fq;
+
+ fq = netfs_folioq_alloc(rreq_id, GFP_NOFS, netfs_trace_folioq_rollbuf_init);
+ if (!fq)
+ return -ENOMEM;
+
+ roll->head = fq;
+ roll->tail = fq;
+ iov_iter_folio_queue(&roll->iter, direction, fq, 0, 0, 0);
+ return 0;
+}
+
+/*
+ * Add another folio_queue to a rolling buffer if there's no space left.
+ */
+int rolling_buffer_make_space(struct rolling_buffer *roll)
+{
+ struct folio_queue *fq, *head = roll->head;
+
+ if (!folioq_full(head))
+ return 0;
+
+ fq = netfs_folioq_alloc(head->rreq_id, GFP_NOFS, netfs_trace_folioq_make_space);
+ if (!fq)
+ return -ENOMEM;
+ fq->prev = head;
+
+ roll->head = fq;
+ if (folioq_full(head)) {
+ /* Make sure we don't leave the master iterator pointing to a
+ * block that might get immediately consumed.
+ */
+ if (roll->iter.folioq == head &&
+ roll->iter.folioq_slot == folioq_nr_slots(head)) {
+ roll->iter.folioq = fq;
+ roll->iter.folioq_slot = 0;
+ }
+ }
+
+ /* Make sure the initialisation is stored before the next pointer.
+ *
+ * [!] NOTE: After we set head->next, the consumer is at liberty to
+ * immediately delete the old head.
+ */
+ smp_store_release(&head->next, fq);
+ return 0;
+}
+
+/*
+ * Decant the list of folios to read into a rolling buffer.
+ */
+ssize_t rolling_buffer_load_from_ra(struct rolling_buffer *roll,
+ struct readahead_control *ractl,
+ struct folio_batch *put_batch)
+{
+ struct folio_queue *fq;
+ struct page **vec;
+ int nr, ix, to;
+ ssize_t size = 0;
+
+ if (rolling_buffer_make_space(roll) < 0)
+ return -ENOMEM;
+
+ fq = roll->head;
+ vec = (struct page **)fq->vec.folios;
+ nr = __readahead_batch(ractl, vec + folio_batch_count(&fq->vec),
+ folio_batch_space(&fq->vec));
+ ix = fq->vec.nr;
+ to = ix + nr;
+ fq->vec.nr = to;
+ for (; ix < to; ix++) {
+ struct folio *folio = folioq_folio(fq, ix);
+ unsigned int order = folio_order(folio);
+
+ fq->orders[ix] = order;
+ size += PAGE_SIZE << order;
+ trace_netfs_folio(folio, netfs_folio_trace_read);
+ if (!folio_batch_add(put_batch, folio))
+ folio_batch_release(put_batch);
+ }
+ WRITE_ONCE(roll->iter.count, roll->iter.count + size);
+
+ /* Store the counter after setting the slot. */
+ smp_store_release(&roll->next_head_slot, to);
+ return size;
+}
+
+/*
+ * Append a folio to the rolling buffer.
+ */
+ssize_t rolling_buffer_append(struct rolling_buffer *roll, struct folio *folio,
+ unsigned int flags)
+{
+ ssize_t size = folio_size(folio);
+ int slot;
+
+ if (rolling_buffer_make_space(roll) < 0)
+ return -ENOMEM;
+
+ slot = folioq_append(roll->head, folio);
+ if (flags & ROLLBUF_MARK_1)
+ folioq_mark(roll->head, slot);
+ if (flags & ROLLBUF_MARK_2)
+ folioq_mark2(roll->head, slot);
+
+ WRITE_ONCE(roll->iter.count, roll->iter.count + size);
+
+ /* Store the counter after setting the slot. */
+ smp_store_release(&roll->next_head_slot, slot);
+ return size;
+}
+
+/*
+ * Delete a spent buffer from a rolling queue and return the next in line. We
+ * don't return the last buffer to keep the pointers independent, but return
+ * NULL instead.
+ */
+struct folio_queue *rolling_buffer_delete_spent(struct rolling_buffer *roll)
+{
+ struct folio_queue *spent = roll->tail, *next = READ_ONCE(spent->next);
+
+ if (!next)
+ return NULL;
+ next->prev = NULL;
+ netfs_folioq_free(spent, netfs_trace_folioq_delete);
+ roll->tail = next;
+ return next;
+}
+
+/*
+ * Clear out a rolling queue. Folios that have mark 1 set are put.
+ */
+void rolling_buffer_clear(struct rolling_buffer *roll)
+{
+ struct folio_batch fbatch;
+ struct folio_queue *p;
+
+ folio_batch_init(&fbatch);
+
+ while ((p = roll->tail)) {
+ roll->tail = p->next;
+ for (int slot = 0; slot < folioq_count(p); slot++) {
+ struct folio *folio = folioq_folio(p, slot);
+
+ if (!folio)
+ continue;
+ if (folioq_is_marked(p, slot)) {
+ trace_netfs_folio(folio, netfs_folio_trace_put);
+ if (!folio_batch_add(&fbatch, folio))
+ folio_batch_release(&fbatch);
+ }
+ }
+
+ netfs_folioq_free(p, netfs_trace_folioq_clear);
+ }
+
+ folio_batch_release(&fbatch);
+}
diff --git a/fs/netfs/stats.c b/fs/netfs/stats.c
index 8e63516b40f6..ab6b916addc4 100644
--- a/fs/netfs/stats.c
+++ b/fs/netfs/stats.c
@@ -12,6 +12,7 @@
atomic_t netfs_n_rh_dio_read;
atomic_t netfs_n_rh_readahead;
atomic_t netfs_n_rh_read_folio;
+atomic_t netfs_n_rh_read_single;
atomic_t netfs_n_rh_rreq;
atomic_t netfs_n_rh_sreq;
atomic_t netfs_n_rh_download;
@@ -28,6 +29,8 @@ atomic_t netfs_n_rh_write_begin;
atomic_t netfs_n_rh_write_done;
atomic_t netfs_n_rh_write_failed;
atomic_t netfs_n_rh_write_zskip;
+atomic_t netfs_n_rh_retry_read_req;
+atomic_t netfs_n_rh_retry_read_subreq;
atomic_t netfs_n_wh_buffered_write;
atomic_t netfs_n_wh_writethrough;
atomic_t netfs_n_wh_dio_write;
@@ -40,16 +43,19 @@ atomic_t netfs_n_wh_upload_failed;
atomic_t netfs_n_wh_write;
atomic_t netfs_n_wh_write_done;
atomic_t netfs_n_wh_write_failed;
+atomic_t netfs_n_wh_retry_write_req;
+atomic_t netfs_n_wh_retry_write_subreq;
atomic_t netfs_n_wb_lock_skip;
atomic_t netfs_n_wb_lock_wait;
atomic_t netfs_n_folioq;
int netfs_stats_show(struct seq_file *m, void *v)
{
- seq_printf(m, "Reads : DR=%u RA=%u RF=%u WB=%u WBZ=%u\n",
+ seq_printf(m, "Reads : DR=%u RA=%u RF=%u RS=%u WB=%u WBZ=%u\n",
atomic_read(&netfs_n_rh_dio_read),
atomic_read(&netfs_n_rh_readahead),
atomic_read(&netfs_n_rh_read_folio),
+ atomic_read(&netfs_n_rh_read_single),
atomic_read(&netfs_n_rh_write_begin),
atomic_read(&netfs_n_rh_write_zskip));
seq_printf(m, "Writes : BW=%u WT=%u DW=%u WP=%u 2C=%u\n",
@@ -79,6 +85,11 @@ int netfs_stats_show(struct seq_file *m, void *v)
atomic_read(&netfs_n_wh_write),
atomic_read(&netfs_n_wh_write_done),
atomic_read(&netfs_n_wh_write_failed));
+ seq_printf(m, "Retries: rq=%u rs=%u wq=%u ws=%u\n",
+ atomic_read(&netfs_n_rh_retry_read_req),
+ atomic_read(&netfs_n_rh_retry_read_subreq),
+ atomic_read(&netfs_n_wh_retry_write_req),
+ atomic_read(&netfs_n_wh_retry_write_subreq));
seq_printf(m, "Objs : rr=%u sr=%u foq=%u wsc=%u\n",
atomic_read(&netfs_n_rh_rreq),
atomic_read(&netfs_n_rh_sreq),
diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c
index ca3a11ed9b54..3fca59e6475d 100644
--- a/fs/netfs/write_collect.c
+++ b/fs/netfs/write_collect.c
@@ -17,10 +17,38 @@
#define HIT_PENDING 0x01 /* A front op was still pending */
#define NEED_REASSESS 0x02 /* Need to loop round and reassess */
#define MADE_PROGRESS 0x04 /* Made progress cleaning up a stream or the folio set */
-#define BUFFERED 0x08 /* The pagecache needs cleaning up */
+#define NEED_UNLOCK 0x08 /* The pagecache needs unlocking */
#define NEED_RETRY 0x10 /* A front op requests retrying */
#define SAW_FAILURE 0x20 /* One stream or hit a permanent failure */
+static void netfs_dump_request(const struct netfs_io_request *rreq)
+{
+ pr_err("Request R=%08x r=%d fl=%lx or=%x e=%ld\n",
+ rreq->debug_id, refcount_read(&rreq->ref), rreq->flags,
+ rreq->origin, rreq->error);
+ pr_err(" st=%llx tsl=%zx/%llx/%llx\n",
+ rreq->start, rreq->transferred, rreq->submitted, rreq->len);
+ pr_err(" cci=%llx/%llx/%llx\n",
+ rreq->cleaned_to, rreq->collected_to, atomic64_read(&rreq->issued_to));
+ pr_err(" iw=%pSR\n", rreq->netfs_ops->issue_write);
+ for (int i = 0; i < NR_IO_STREAMS; i++) {
+ const struct netfs_io_subrequest *sreq;
+ const struct netfs_io_stream *s = &rreq->io_streams[i];
+
+ pr_err(" str[%x] s=%x e=%d acnf=%u,%u,%u,%u\n",
+ s->stream_nr, s->source, s->error,
+ s->avail, s->active, s->need_retry, s->failed);
+ pr_err(" str[%x] ct=%llx t=%zx\n",
+ s->stream_nr, s->collected_to, s->transferred);
+ list_for_each_entry(sreq, &s->subrequests, rreq_link) {
+ pr_err(" sreq[%x:%x] sc=%u s=%llx t=%zx/%zx r=%d f=%lx\n",
+ sreq->stream_nr, sreq->debug_index, sreq->source,
+ sreq->start, sreq->transferred, sreq->len,
+ refcount_read(&sreq->ref), sreq->flags);
+ }
+ }
+}
+
/*
* Successful completion of write of a folio to the server and/or cache. Note
* that we are not allowed to lock the folio here on pain of deadlocking with
@@ -83,9 +111,15 @@ end_wb:
static void netfs_writeback_unlock_folios(struct netfs_io_request *wreq,
unsigned int *notes)
{
- struct folio_queue *folioq = wreq->buffer;
+ struct folio_queue *folioq = wreq->buffer.tail;
unsigned long long collected_to = wreq->collected_to;
- unsigned int slot = wreq->buffer_head_slot;
+ unsigned int slot = wreq->buffer.first_tail_slot;
+
+ if (WARN_ON_ONCE(!folioq)) {
+ pr_err("[!] Writeback unlock found empty rolling buffer!\n");
+ netfs_dump_request(wreq);
+ return;
+ }
if (wreq->origin == NETFS_PGPRIV2_COPY_TO_CACHE) {
if (netfs_pgpriv2_unlock_copied_folios(wreq))
@@ -94,7 +128,9 @@ static void netfs_writeback_unlock_folios(struct netfs_io_request *wreq,
}
if (slot >= folioq_nr_slots(folioq)) {
- folioq = netfs_delete_buffer_head(wreq);
+ folioq = rolling_buffer_delete_spent(&wreq->buffer);
+ if (!folioq)
+ return;
slot = 0;
}
@@ -134,9 +170,9 @@ static void netfs_writeback_unlock_folios(struct netfs_io_request *wreq,
folioq_clear(folioq, slot);
slot++;
if (slot >= folioq_nr_slots(folioq)) {
- if (READ_ONCE(wreq->buffer_tail) == folioq)
- break;
- folioq = netfs_delete_buffer_head(wreq);
+ folioq = rolling_buffer_delete_spent(&wreq->buffer);
+ if (!folioq)
+ goto done;
slot = 0;
}
@@ -144,222 +180,9 @@ static void netfs_writeback_unlock_folios(struct netfs_io_request *wreq,
break;
}
- wreq->buffer = folioq;
- wreq->buffer_head_slot = slot;
-}
-
-/*
- * Perform retries on the streams that need it.
- */
-static void netfs_retry_write_stream(struct netfs_io_request *wreq,
- struct netfs_io_stream *stream)
-{
- struct list_head *next;
-
- _enter("R=%x[%x:]", wreq->debug_id, stream->stream_nr);
-
- if (list_empty(&stream->subrequests))
- return;
-
- if (stream->source == NETFS_UPLOAD_TO_SERVER &&
- wreq->netfs_ops->retry_request)
- wreq->netfs_ops->retry_request(wreq, stream);
-
- if (unlikely(stream->failed))
- return;
-
- /* If there's no renegotiation to do, just resend each failed subreq. */
- if (!stream->prepare_write) {
- struct netfs_io_subrequest *subreq;
-
- list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
- if (test_bit(NETFS_SREQ_FAILED, &subreq->flags))
- break;
- if (__test_and_clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) {
- struct iov_iter source = subreq->io_iter;
-
- iov_iter_revert(&source, subreq->len - source.count);
- netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
- netfs_reissue_write(stream, subreq, &source);
- }
- }
- return;
- }
-
- next = stream->subrequests.next;
-
- do {
- struct netfs_io_subrequest *subreq = NULL, *from, *to, *tmp;
- struct iov_iter source;
- unsigned long long start, len;
- size_t part;
- bool boundary = false;
-
- /* Go through the stream and find the next span of contiguous
- * data that we then rejig (cifs, for example, needs the wsize
- * renegotiating) and reissue.
- */
- from = list_entry(next, struct netfs_io_subrequest, rreq_link);
- to = from;
- start = from->start + from->transferred;
- len = from->len - from->transferred;
-
- if (test_bit(NETFS_SREQ_FAILED, &from->flags) ||
- !test_bit(NETFS_SREQ_NEED_RETRY, &from->flags))
- return;
-
- list_for_each_continue(next, &stream->subrequests) {
- subreq = list_entry(next, struct netfs_io_subrequest, rreq_link);
- if (subreq->start + subreq->transferred != start + len ||
- test_bit(NETFS_SREQ_BOUNDARY, &subreq->flags) ||
- !test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags))
- break;
- to = subreq;
- len += to->len;
- }
-
- /* Determine the set of buffers we're going to use. Each
- * subreq gets a subset of a single overall contiguous buffer.
- */
- netfs_reset_iter(from);
- source = from->io_iter;
- source.count = len;
-
- /* Work through the sublist. */
- subreq = from;
- list_for_each_entry_from(subreq, &stream->subrequests, rreq_link) {
- if (!len)
- break;
- /* Renegotiate max_len (wsize) */
- trace_netfs_sreq(subreq, netfs_sreq_trace_retry);
- __clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
- subreq->retry_count++;
- stream->prepare_write(subreq);
-
- part = min(len, stream->sreq_max_len);
- subreq->len = part;
- subreq->start = start;
- subreq->transferred = 0;
- len -= part;
- start += part;
- if (len && subreq == to &&
- __test_and_clear_bit(NETFS_SREQ_BOUNDARY, &to->flags))
- boundary = true;
-
- netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
- netfs_reissue_write(stream, subreq, &source);
- if (subreq == to)
- break;
- }
-
- /* If we managed to use fewer subreqs, we can discard the
- * excess; if we used the same number, then we're done.
- */
- if (!len) {
- if (subreq == to)
- continue;
- list_for_each_entry_safe_from(subreq, tmp,
- &stream->subrequests, rreq_link) {
- trace_netfs_sreq(subreq, netfs_sreq_trace_discard);
- list_del(&subreq->rreq_link);
- netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done);
- if (subreq == to)
- break;
- }
- continue;
- }
-
- /* We ran out of subrequests, so we need to allocate some more
- * and insert them after.
- */
- do {
- subreq = netfs_alloc_subrequest(wreq);
- subreq->source = to->source;
- subreq->start = start;
- subreq->debug_index = atomic_inc_return(&wreq->subreq_counter);
- subreq->stream_nr = to->stream_nr;
- subreq->retry_count = 1;
-
- trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index,
- refcount_read(&subreq->ref),
- netfs_sreq_trace_new);
- netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
-
- list_add(&subreq->rreq_link, &to->rreq_link);
- to = list_next_entry(to, rreq_link);
- trace_netfs_sreq(subreq, netfs_sreq_trace_retry);
-
- stream->sreq_max_len = len;
- stream->sreq_max_segs = INT_MAX;
- switch (stream->source) {
- case NETFS_UPLOAD_TO_SERVER:
- netfs_stat(&netfs_n_wh_upload);
- stream->sreq_max_len = umin(len, wreq->wsize);
- break;
- case NETFS_WRITE_TO_CACHE:
- netfs_stat(&netfs_n_wh_write);
- break;
- default:
- WARN_ON_ONCE(1);
- }
-
- stream->prepare_write(subreq);
-
- part = umin(len, stream->sreq_max_len);
- subreq->len = subreq->transferred + part;
- len -= part;
- start += part;
- if (!len && boundary) {
- __set_bit(NETFS_SREQ_BOUNDARY, &to->flags);
- boundary = false;
- }
-
- netfs_reissue_write(stream, subreq, &source);
- if (!len)
- break;
-
- } while (len);
-
- } while (!list_is_head(next, &stream->subrequests));
-}
-
-/*
- * Perform retries on the streams that need it. If we're doing content
- * encryption and the server copy changed due to a third-party write, we may
- * need to do an RMW cycle and also rewrite the data to the cache.
- */
-static void netfs_retry_writes(struct netfs_io_request *wreq)
-{
- struct netfs_io_subrequest *subreq;
- struct netfs_io_stream *stream;
- int s;
-
- /* Wait for all outstanding I/O to quiesce before performing retries as
- * we may need to renegotiate the I/O sizes.
- */
- for (s = 0; s < NR_IO_STREAMS; s++) {
- stream = &wreq->io_streams[s];
- if (!stream->active)
- continue;
-
- list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
- wait_on_bit(&subreq->flags, NETFS_SREQ_IN_PROGRESS,
- TASK_UNINTERRUPTIBLE);
- }
- }
-
- // TODO: Enc: Fetch changed partial pages
- // TODO: Enc: Reencrypt content if needed.
- // TODO: Enc: Wind back transferred point.
- // TODO: Enc: Mark cache pages for retry.
-
- for (s = 0; s < NR_IO_STREAMS; s++) {
- stream = &wreq->io_streams[s];
- if (stream->need_retry) {
- stream->need_retry = false;
- netfs_retry_write_stream(wreq, stream);
- }
- }
+ wreq->buffer.tail = folioq;
+done:
+ wreq->buffer.first_tail_slot = slot;
}
/*
@@ -390,7 +213,7 @@ reassess_streams:
if (wreq->origin == NETFS_WRITEBACK ||
wreq->origin == NETFS_WRITETHROUGH ||
wreq->origin == NETFS_PGPRIV2_COPY_TO_CACHE)
- notes = BUFFERED;
+ notes = NEED_UNLOCK;
else
notes = 0;
@@ -449,14 +272,14 @@ reassess_streams:
cancel:
/* Remove if completely consumed. */
- spin_lock_bh(&wreq->lock);
+ spin_lock(&wreq->lock);
remove = front;
list_del_init(&front->rreq_link);
front = list_first_entry_or_null(&stream->subrequests,
struct netfs_io_subrequest, rreq_link);
stream->front = front;
- spin_unlock_bh(&wreq->lock);
+ spin_unlock(&wreq->lock);
netfs_put_subrequest(remove, false,
notes & SAW_FAILURE ?
netfs_sreq_trace_put_cancel :
@@ -487,7 +310,7 @@ reassess_streams:
trace_netfs_collect_state(wreq, wreq->collected_to, notes);
/* Unlock any folios that we have now finished with. */
- if (notes & BUFFERED) {
+ if (notes & NEED_UNLOCK) {
if (wreq->cleaned_to < wreq->collected_to)
netfs_writeback_unlock_folios(wreq, &notes);
} else {
@@ -500,7 +323,9 @@ reassess_streams:
goto need_retry;
if ((notes & MADE_PROGRESS) && test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) {
trace_netfs_rreq(wreq, netfs_rreq_trace_unpause);
- clear_and_wake_up_bit(NETFS_RREQ_PAUSE, &wreq->flags);
+ clear_bit_unlock(NETFS_RREQ_PAUSE, &wreq->flags);
+ smp_mb__after_atomic(); /* Set PAUSE before task state */
+ wake_up(&wreq->waitq);
}
if (notes & NEED_REASSESS) {
@@ -575,7 +400,8 @@ void netfs_write_collection_worker(struct work_struct *work)
trace_netfs_rreq(wreq, netfs_rreq_trace_write_done);
if (wreq->io_streams[1].active &&
- wreq->io_streams[1].failed) {
+ wreq->io_streams[1].failed &&
+ ictx->ops->invalidate_cache) {
/* Cache write failure doesn't prevent writeback completion
* unless we're in disconnected mode.
*/
diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c
index ff0e82505a0b..77279fc5b5a7 100644
--- a/fs/netfs/write_issue.c
+++ b/fs/netfs/write_issue.c
@@ -94,9 +94,10 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
{
struct netfs_io_request *wreq;
struct netfs_inode *ictx;
- bool is_buffered = (origin == NETFS_WRITEBACK ||
- origin == NETFS_WRITETHROUGH ||
- origin == NETFS_PGPRIV2_COPY_TO_CACHE);
+ bool is_cacheable = (origin == NETFS_WRITEBACK ||
+ origin == NETFS_WRITEBACK_SINGLE ||
+ origin == NETFS_WRITETHROUGH ||
+ origin == NETFS_PGPRIV2_COPY_TO_CACHE);
wreq = netfs_alloc_request(mapping, file, start, 0, origin);
if (IS_ERR(wreq))
@@ -105,8 +106,10 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
_enter("R=%x", wreq->debug_id);
ictx = netfs_inode(wreq->inode);
- if (is_buffered && netfs_is_cache_enabled(ictx))
+ if (is_cacheable && netfs_is_cache_enabled(ictx))
fscache_begin_write_operation(&wreq->cache_resources, netfs_i_cookie(ictx));
+ if (rolling_buffer_init(&wreq->buffer, wreq->debug_id, ITER_SOURCE) < 0)
+ goto nomem;
wreq->cleaned_to = wreq->start;
@@ -129,6 +132,10 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
}
return wreq;
+nomem:
+ wreq->error = -ENOMEM;
+ netfs_put_request(wreq, false, netfs_rreq_trace_put_failed);
+ return ERR_PTR(-ENOMEM);
}
/**
@@ -153,16 +160,15 @@ static void netfs_prepare_write(struct netfs_io_request *wreq,
loff_t start)
{
struct netfs_io_subrequest *subreq;
- struct iov_iter *wreq_iter = &wreq->io_iter;
+ struct iov_iter *wreq_iter = &wreq->buffer.iter;
/* Make sure we don't point the iterator at a used-up folio_queue
* struct being used as a placeholder to prevent the queue from
* collapsing. In such a case, extend the queue.
*/
if (iov_iter_is_folioq(wreq_iter) &&
- wreq_iter->folioq_slot >= folioq_nr_slots(wreq_iter->folioq)) {
- netfs_buffer_make_space(wreq);
- }
+ wreq_iter->folioq_slot >= folioq_nr_slots(wreq_iter->folioq))
+ rolling_buffer_make_space(&wreq->buffer);
subreq = netfs_alloc_subrequest(wreq);
subreq->source = stream->source;
@@ -198,7 +204,7 @@ static void netfs_prepare_write(struct netfs_io_request *wreq,
* the list. The collector only goes nextwards and uses the lock to
* remove entries off of the front.
*/
- spin_lock_bh(&wreq->lock);
+ spin_lock(&wreq->lock);
list_add_tail(&subreq->rreq_link, &stream->subrequests);
if (list_is_first(&subreq->rreq_link, &stream->subrequests)) {
stream->front = subreq;
@@ -209,7 +215,7 @@ static void netfs_prepare_write(struct netfs_io_request *wreq,
}
}
- spin_unlock_bh(&wreq->lock);
+ spin_unlock(&wreq->lock);
stream->construct = subreq;
}
@@ -247,6 +253,7 @@ void netfs_reissue_write(struct netfs_io_stream *stream,
subreq->retry_count++;
__clear_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
__set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
+ netfs_stat(&netfs_n_wh_retry_write_subreq);
netfs_do_issue_write(stream, subreq);
}
@@ -268,9 +275,9 @@ void netfs_issue_write(struct netfs_io_request *wreq,
* we can avoid overrunning the credits obtained (cifs) and try to parallelise
* content-crypto preparation with network writes.
*/
-int netfs_advance_write(struct netfs_io_request *wreq,
- struct netfs_io_stream *stream,
- loff_t start, size_t len, bool to_eof)
+size_t netfs_advance_write(struct netfs_io_request *wreq,
+ struct netfs_io_stream *stream,
+ loff_t start, size_t len, bool to_eof)
{
struct netfs_io_subrequest *subreq = stream->construct;
size_t part;
@@ -327,6 +334,9 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
_enter("");
+ if (rolling_buffer_make_space(&wreq->buffer) < 0)
+ return -ENOMEM;
+
/* netfs_perform_write() may shift i_size around the page or from out
* of the page to beyond it, but cannot move i_size into or through the
* page since we have it locked.
@@ -431,7 +441,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
}
/* Attach the folio to the rolling buffer. */
- netfs_buffer_append_folio(wreq, folio, false);
+ rolling_buffer_append(&wreq->buffer, folio, 0);
/* Move the submission point forward to allow for write-streaming data
* not starting at the front of the page. We don't do write-streaming
@@ -444,7 +454,8 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
stream = &wreq->io_streams[s];
stream->submit_off = foff;
stream->submit_len = flen;
- if ((stream->source == NETFS_WRITE_TO_CACHE && streamw) ||
+ if (!stream->avail ||
+ (stream->source == NETFS_WRITE_TO_CACHE && streamw) ||
(stream->source == NETFS_UPLOAD_TO_SERVER &&
fgroup == NETFS_FOLIO_COPY_TO_CACHE)) {
stream->submit_off = UINT_MAX;
@@ -478,7 +489,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
/* Advance the iterator(s). */
if (stream->submit_off > iter_off) {
- iov_iter_advance(&wreq->io_iter, stream->submit_off - iter_off);
+ rolling_buffer_advance(&wreq->buffer, stream->submit_off - iter_off);
iter_off = stream->submit_off;
}
@@ -496,7 +507,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
}
if (fsize > iter_off)
- iov_iter_advance(&wreq->io_iter, fsize - iter_off);
+ rolling_buffer_advance(&wreq->buffer, fsize - iter_off);
atomic64_set(&wreq->issued_to, fpos + fsize);
if (!debug)
@@ -635,7 +646,7 @@ int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_c
struct folio **writethrough_cache)
{
_enter("R=%x ic=%zu ws=%u cp=%zu tp=%u",
- wreq->debug_id, wreq->iter.count, wreq->wsize, copied, to_page_end);
+ wreq->debug_id, wreq->buffer.iter.count, wreq->wsize, copied, to_page_end);
if (!*writethrough_cache) {
if (folio_test_dirty(folio))
@@ -710,10 +721,10 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t
part = netfs_advance_write(wreq, upload, start, len, false);
start += part;
len -= part;
- iov_iter_advance(&wreq->io_iter, part);
+ rolling_buffer_advance(&wreq->buffer, part);
if (test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) {
trace_netfs_rreq(wreq, netfs_rreq_trace_wait_pause);
- wait_on_bit(&wreq->flags, NETFS_RREQ_PAUSE, TASK_UNINTERRUPTIBLE);
+ wait_event(wreq->waitq, !test_bit(NETFS_RREQ_PAUSE, &wreq->flags));
}
if (test_bit(NETFS_RREQ_FAILED, &wreq->flags))
break;
@@ -723,3 +734,194 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t
_leave(" = %d", error);
return error;
}
+
+/*
+ * Write some of a pending folio data back to the server and/or the cache.
+ */
+static int netfs_write_folio_single(struct netfs_io_request *wreq,
+ struct folio *folio)
+{
+ struct netfs_io_stream *upload = &wreq->io_streams[0];
+ struct netfs_io_stream *cache = &wreq->io_streams[1];
+ struct netfs_io_stream *stream;
+ size_t iter_off = 0;
+ size_t fsize = folio_size(folio), flen;
+ loff_t fpos = folio_pos(folio);
+ bool to_eof = false;
+ bool no_debug = false;
+
+ _enter("");
+
+ flen = folio_size(folio);
+ if (flen > wreq->i_size - fpos) {
+ flen = wreq->i_size - fpos;
+ folio_zero_segment(folio, flen, fsize);
+ to_eof = true;
+ } else if (flen == wreq->i_size - fpos) {
+ to_eof = true;
+ }
+
+ _debug("folio %zx/%zx", flen, fsize);
+
+ if (!upload->avail && !cache->avail) {
+ trace_netfs_folio(folio, netfs_folio_trace_cancel_store);
+ return 0;
+ }
+
+ if (!upload->construct)
+ trace_netfs_folio(folio, netfs_folio_trace_store);
+ else
+ trace_netfs_folio(folio, netfs_folio_trace_store_plus);
+
+ /* Attach the folio to the rolling buffer. */
+ folio_get(folio);
+ rolling_buffer_append(&wreq->buffer, folio, NETFS_ROLLBUF_PUT_MARK);
+
+ /* Move the submission point forward to allow for write-streaming data
+ * not starting at the front of the page. We don't do write-streaming
+ * with the cache as the cache requires DIO alignment.
+ *
+ * Also skip uploading for data that's been read and just needs copying
+ * to the cache.
+ */
+ for (int s = 0; s < NR_IO_STREAMS; s++) {
+ stream = &wreq->io_streams[s];
+ stream->submit_off = 0;
+ stream->submit_len = flen;
+ if (!stream->avail) {
+ stream->submit_off = UINT_MAX;
+ stream->submit_len = 0;
+ }
+ }
+
+ /* Attach the folio to one or more subrequests. For a big folio, we
+ * could end up with thousands of subrequests if the wsize is small -
+ * but we might need to wait during the creation of subrequests for
+ * network resources (eg. SMB credits).
+ */
+ for (;;) {
+ ssize_t part;
+ size_t lowest_off = ULONG_MAX;
+ int choose_s = -1;
+
+ /* Always add to the lowest-submitted stream first. */
+ for (int s = 0; s < NR_IO_STREAMS; s++) {
+ stream = &wreq->io_streams[s];
+ if (stream->submit_len > 0 &&
+ stream->submit_off < lowest_off) {
+ lowest_off = stream->submit_off;
+ choose_s = s;
+ }
+ }
+
+ if (choose_s < 0)
+ break;
+ stream = &wreq->io_streams[choose_s];
+
+ /* Advance the iterator(s). */
+ if (stream->submit_off > iter_off) {
+ rolling_buffer_advance(&wreq->buffer, stream->submit_off - iter_off);
+ iter_off = stream->submit_off;
+ }
+
+ atomic64_set(&wreq->issued_to, fpos + stream->submit_off);
+ stream->submit_extendable_to = fsize - stream->submit_off;
+ part = netfs_advance_write(wreq, stream, fpos + stream->submit_off,
+ stream->submit_len, to_eof);
+ stream->submit_off += part;
+ if (part > stream->submit_len)
+ stream->submit_len = 0;
+ else
+ stream->submit_len -= part;
+ if (part > 0)
+ no_debug = true;
+ }
+
+ wreq->buffer.iter.iov_offset = 0;
+ if (fsize > iter_off)
+ rolling_buffer_advance(&wreq->buffer, fsize - iter_off);
+ atomic64_set(&wreq->issued_to, fpos + fsize);
+
+ if (!no_debug)
+ kdebug("R=%x: No submit", wreq->debug_id);
+ _leave(" = 0");
+ return 0;
+}
+
+/**
+ * netfs_writeback_single - Write back a monolithic payload
+ * @mapping: The mapping to write from
+ * @wbc: Hints from the VM
+ * @iter: Data to write, must be ITER_FOLIOQ.
+ *
+ * Write a monolithic, non-pagecache object back to the server and/or
+ * the cache.
+ */
+int netfs_writeback_single(struct address_space *mapping,
+ struct writeback_control *wbc,
+ struct iov_iter *iter)
+{
+ struct netfs_io_request *wreq;
+ struct netfs_inode *ictx = netfs_inode(mapping->host);
+ struct folio_queue *fq;
+ size_t size = iov_iter_count(iter);
+ int ret;
+
+ if (WARN_ON_ONCE(!iov_iter_is_folioq(iter)))
+ return -EIO;
+
+ if (!mutex_trylock(&ictx->wb_lock)) {
+ if (wbc->sync_mode == WB_SYNC_NONE) {
+ netfs_stat(&netfs_n_wb_lock_skip);
+ return 0;
+ }
+ netfs_stat(&netfs_n_wb_lock_wait);
+ mutex_lock(&ictx->wb_lock);
+ }
+
+ wreq = netfs_create_write_req(mapping, NULL, 0, NETFS_WRITEBACK_SINGLE);
+ if (IS_ERR(wreq)) {
+ ret = PTR_ERR(wreq);
+ goto couldnt_start;
+ }
+
+ trace_netfs_write(wreq, netfs_write_trace_writeback);
+ netfs_stat(&netfs_n_wh_writepages);
+
+ if (__test_and_set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags))
+ wreq->netfs_ops->begin_writeback(wreq);
+
+ for (fq = (struct folio_queue *)iter->folioq; fq; fq = fq->next) {
+ for (int slot = 0; slot < folioq_count(fq); slot++) {
+ struct folio *folio = folioq_folio(fq, slot);
+ size_t part = umin(folioq_folio_size(fq, slot), size);
+
+ _debug("wbiter %lx %llx", folio->index, atomic64_read(&wreq->issued_to));
+
+ ret = netfs_write_folio_single(wreq, folio);
+ if (ret < 0)
+ goto stop;
+ size -= part;
+ if (size <= 0)
+ goto stop;
+ }
+ }
+
+stop:
+ for (int s = 0; s < NR_IO_STREAMS; s++)
+ netfs_issue_write(wreq, &wreq->io_streams[s]);
+ smp_wmb(); /* Write lists before ALL_QUEUED. */
+ set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags);
+
+ mutex_unlock(&ictx->wb_lock);
+
+ netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
+ _leave(" = %d", ret);
+ return ret;
+
+couldnt_start:
+ mutex_unlock(&ictx->wb_lock);
+ _leave(" = %d", ret);
+ return ret;
+}
+EXPORT_SYMBOL(netfs_writeback_single);
diff --git a/fs/netfs/write_retry.c b/fs/netfs/write_retry.c
new file mode 100644
index 000000000000..545d33079a77
--- /dev/null
+++ b/fs/netfs/write_retry.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Network filesystem write retrying.
+ *
+ * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include "internal.h"
+
+/*
+ * Perform retries on the streams that need it.
+ */
+static void netfs_retry_write_stream(struct netfs_io_request *wreq,
+ struct netfs_io_stream *stream)
+{
+ struct list_head *next;
+
+ _enter("R=%x[%x:]", wreq->debug_id, stream->stream_nr);
+
+ if (list_empty(&stream->subrequests))
+ return;
+
+ if (stream->source == NETFS_UPLOAD_TO_SERVER &&
+ wreq->netfs_ops->retry_request)
+ wreq->netfs_ops->retry_request(wreq, stream);
+
+ if (unlikely(stream->failed))
+ return;
+
+ /* If there's no renegotiation to do, just resend each failed subreq. */
+ if (!stream->prepare_write) {
+ struct netfs_io_subrequest *subreq;
+
+ list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
+ if (test_bit(NETFS_SREQ_FAILED, &subreq->flags))
+ break;
+ if (__test_and_clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) {
+ struct iov_iter source = subreq->io_iter;
+
+ iov_iter_revert(&source, subreq->len - source.count);
+ netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
+ netfs_reissue_write(stream, subreq, &source);
+ }
+ }
+ return;
+ }
+
+ next = stream->subrequests.next;
+
+ do {
+ struct netfs_io_subrequest *subreq = NULL, *from, *to, *tmp;
+ struct iov_iter source;
+ unsigned long long start, len;
+ size_t part;
+ bool boundary = false;
+
+ /* Go through the stream and find the next span of contiguous
+ * data that we then rejig (cifs, for example, needs the wsize
+ * renegotiating) and reissue.
+ */
+ from = list_entry(next, struct netfs_io_subrequest, rreq_link);
+ to = from;
+ start = from->start + from->transferred;
+ len = from->len - from->transferred;
+
+ if (test_bit(NETFS_SREQ_FAILED, &from->flags) ||
+ !test_bit(NETFS_SREQ_NEED_RETRY, &from->flags))
+ return;
+
+ list_for_each_continue(next, &stream->subrequests) {
+ subreq = list_entry(next, struct netfs_io_subrequest, rreq_link);
+ if (subreq->start + subreq->transferred != start + len ||
+ test_bit(NETFS_SREQ_BOUNDARY, &subreq->flags) ||
+ !test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags))
+ break;
+ to = subreq;
+ len += to->len;
+ }
+
+ /* Determine the set of buffers we're going to use. Each
+ * subreq gets a subset of a single overall contiguous buffer.
+ */
+ netfs_reset_iter(from);
+ source = from->io_iter;
+ source.count = len;
+
+ /* Work through the sublist. */
+ subreq = from;
+ list_for_each_entry_from(subreq, &stream->subrequests, rreq_link) {
+ if (!len)
+ break;
+
+ subreq->start = start;
+ subreq->len = len;
+ __clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
+ subreq->retry_count++;
+ trace_netfs_sreq(subreq, netfs_sreq_trace_retry);
+
+ /* Renegotiate max_len (wsize) */
+ stream->sreq_max_len = len;
+ stream->prepare_write(subreq);
+
+ part = umin(len, stream->sreq_max_len);
+ if (unlikely(stream->sreq_max_segs))
+ part = netfs_limit_iter(&source, 0, part, stream->sreq_max_segs);
+ subreq->len = part;
+ subreq->transferred = 0;
+ len -= part;
+ start += part;
+ if (len && subreq == to &&
+ __test_and_clear_bit(NETFS_SREQ_BOUNDARY, &to->flags))
+ boundary = true;
+
+ netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
+ netfs_reissue_write(stream, subreq, &source);
+ if (subreq == to)
+ break;
+ }
+
+ /* If we managed to use fewer subreqs, we can discard the
+ * excess; if we used the same number, then we're done.
+ */
+ if (!len) {
+ if (subreq == to)
+ continue;
+ list_for_each_entry_safe_from(subreq, tmp,
+ &stream->subrequests, rreq_link) {
+ trace_netfs_sreq(subreq, netfs_sreq_trace_discard);
+ list_del(&subreq->rreq_link);
+ netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done);
+ if (subreq == to)
+ break;
+ }
+ continue;
+ }
+
+ /* We ran out of subrequests, so we need to allocate some more
+ * and insert them after.
+ */
+ do {
+ subreq = netfs_alloc_subrequest(wreq);
+ subreq->source = to->source;
+ subreq->start = start;
+ subreq->debug_index = atomic_inc_return(&wreq->subreq_counter);
+ subreq->stream_nr = to->stream_nr;
+ subreq->retry_count = 1;
+
+ trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index,
+ refcount_read(&subreq->ref),
+ netfs_sreq_trace_new);
+ netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
+
+ list_add(&subreq->rreq_link, &to->rreq_link);
+ to = list_next_entry(to, rreq_link);
+ trace_netfs_sreq(subreq, netfs_sreq_trace_retry);
+
+ stream->sreq_max_len = len;
+ stream->sreq_max_segs = INT_MAX;
+ switch (stream->source) {
+ case NETFS_UPLOAD_TO_SERVER:
+ netfs_stat(&netfs_n_wh_upload);
+ stream->sreq_max_len = umin(len, wreq->wsize);
+ break;
+ case NETFS_WRITE_TO_CACHE:
+ netfs_stat(&netfs_n_wh_write);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ }
+
+ stream->prepare_write(subreq);
+
+ part = umin(len, stream->sreq_max_len);
+ subreq->len = subreq->transferred + part;
+ len -= part;
+ start += part;
+ if (!len && boundary) {
+ __set_bit(NETFS_SREQ_BOUNDARY, &to->flags);
+ boundary = false;
+ }
+
+ netfs_reissue_write(stream, subreq, &source);
+ if (!len)
+ break;
+
+ } while (len);
+
+ } while (!list_is_head(next, &stream->subrequests));
+}
+
+/*
+ * Perform retries on the streams that need it. If we're doing content
+ * encryption and the server copy changed due to a third-party write, we may
+ * need to do an RMW cycle and also rewrite the data to the cache.
+ */
+void netfs_retry_writes(struct netfs_io_request *wreq)
+{
+ struct netfs_io_subrequest *subreq;
+ struct netfs_io_stream *stream;
+ int s;
+
+ netfs_stat(&netfs_n_wh_retry_write_req);
+
+ /* Wait for all outstanding I/O to quiesce before performing retries as
+ * we may need to renegotiate the I/O sizes.
+ */
+ for (s = 0; s < NR_IO_STREAMS; s++) {
+ stream = &wreq->io_streams[s];
+ if (!stream->active)
+ continue;
+
+ list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
+ wait_on_bit(&subreq->flags, NETFS_SREQ_IN_PROGRESS,
+ TASK_UNINTERRUPTIBLE);
+ }
+ }
+
+ // TODO: Enc: Fetch changed partial pages
+ // TODO: Enc: Reencrypt content if needed.
+ // TODO: Enc: Wind back transferred point.
+ // TODO: Enc: Mark cache pages for retry.
+
+ for (s = 0; s < NR_IO_STREAMS; s++) {
+ stream = &wreq->io_streams[s];
+ if (stream->need_retry) {
+ stream->need_retry = false;
+ netfs_retry_write_stream(wreq, stream);
+ }
+ }
+}