diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-09-16 13:13:31 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-09-16 13:13:31 +0300 |
commit | 35219bc5c71f4197c8bd10297597de797c1eece5 (patch) | |
tree | 2448156135b78f54cd341a8457ccd84a371ddac7 /lib | |
parent | 9020d0d844ad58a051f90b1e5b82ba34123925b9 (diff) | |
parent | 4b40d43d9f951d87ae8dc414c2ef5ae50303a266 (diff) | |
download | linux-35219bc5c71f4197c8bd10297597de797c1eece5.tar.xz |
Merge tag 'vfs-6.12.netfs' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull netfs updates from Christian Brauner:
"This contains the work to improve read/write performance for the new
netfs library.
The main performance enhancing changes are:
- Define a structure, struct folio_queue, and a new iterator type,
ITER_FOLIOQ, to hold a buffer as a replacement for ITER_XARRAY. See
that patch for questions about naming and form.
ITER_FOLIOQ is provided as a replacement for ITER_XARRAY. The
problem with an xarray is that accessing it requires the use of a
lock (typically the RCU read lock) - and this means that we can't
supply iterate_and_advance() with a step function that might sleep
(crypto for example) without having to drop the lock between pages.
ITER_FOLIOQ is the iterator for a chain of folio_queue structs,
where each folio_queue holds a small list of folios. A folio_queue
struct is a simpler structure than xarray and is not subject to
concurrent manipulation by the VM. folio_queue is used rather than
a bvec[] as it can form lists of indefinite size, adding to one end
and removing from the other on the fly.
- Provide a copy_folio_from_iter() wrapper.
- Make cifs RDMA support ITER_FOLIOQ.
- Use folio queues in the write-side helpers instead of xarrays.
- Add a function to reset the iterator in a subrequest.
- Simplify the write-side helpers to use sheaves to skip gaps rather
than trying to work out where gaps are.
- In afs, make the read subrequests asynchronous, putting them into
work items to allow the next patch to do progressive
unlocking/reading.
- Overhaul the read-side helpers to improve performance.
- Fix the caching of a partial block at the end of a file.
- Allow a store to be cancelled.
Then some changes for cifs to make it use folio queues instead of
xarrays for crypto bufferage:
- Use raw iteration functions rather than manually coding iteration
when hashing data.
- Switch to using folio_queue for crypto buffers.
- Remove the xarray bits.
Make some adjustments to the /proc/fs/netfs/stats file such that:
- All the netfs stats lines begin 'Netfs:' but change this to
something a bit more useful.
- Add a couple of stats counters to track the numbers of skips and
waits on the per-inode writeback serialisation lock to make it
easier to check for this as a source of performance loss.
Miscellaneous work:
- Ensure that the sb_writers lock is taken around
vfs_{set,remove}xattr() in the cachefiles code.
- Reduce the number of conditional branches in netfs_perform_write().
- Move the CIFS_INO_MODIFIED_ATTR flag to the netfs_inode struct and
remove cifs_post_modify().
- Move the max_len/max_nr_segs members from netfs_io_subrequest to
netfs_io_request as they're only needed for one subreq at a time.
- Add an 'unknown' source value for tracing purposes.
- Remove NETFS_COPY_TO_CACHE as it's no longer used.
- Set the request work function up front at allocation time.
- Use bh-disabling spinlocks for rreq->lock as cachefiles completion
may be run from block-filesystem DIO completion in softirq context.
- Remove fs/netfs/io.c"
* tag 'vfs-6.12.netfs' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (25 commits)
docs: filesystems: corrected grammar of netfs page
cifs: Don't support ITER_XARRAY
cifs: Switch crypto buffer to use a folio_queue rather than an xarray
cifs: Use iterate_and_advance*() routines directly for hashing
netfs: Cancel dirty folios that have no storage destination
cachefiles, netfs: Fix write to partial block at EOF
netfs: Remove fs/netfs/io.c
netfs: Speed up buffered reading
afs: Make read subreqs async
netfs: Simplify the writeback code
netfs: Provide an iterator-reset function
netfs: Use new folio_queue data type and iterator instead of xarray iter
cifs: Provide the capability to extract from ITER_FOLIOQ to RDMA SGEs
iov_iter: Provide copy_folio_from_iter()
mm: Define struct folio_queue and ITER_FOLIOQ to handle a sequence of folios
netfs: Use bh-disabling spinlocks for rreq->lock
netfs: Set the request work function upon allocation
netfs: Remove NETFS_COPY_TO_CACHE
netfs: Reserve netfs_sreq_source 0 as unset/unknown
netfs: Move max_len/max_nr_segs from netfs_io_subrequest to netfs_io_stream
...
Diffstat (limited to 'lib')
-rw-r--r-- | lib/iov_iter.c | 240 | ||||
-rw-r--r-- | lib/kunit_iov_iter.c | 259 | ||||
-rw-r--r-- | lib/scatterlist.c | 69 |
3 files changed, 564 insertions, 4 deletions
diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 4a6a9f419bd7..97003155bfac 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -527,6 +527,39 @@ static void iov_iter_iovec_advance(struct iov_iter *i, size_t size) i->__iov = iov; } +static void iov_iter_folioq_advance(struct iov_iter *i, size_t size) +{ + const struct folio_queue *folioq = i->folioq; + unsigned int slot = i->folioq_slot; + + if (!i->count) + return; + i->count -= size; + + if (slot >= folioq_nr_slots(folioq)) { + folioq = folioq->next; + slot = 0; + } + + size += i->iov_offset; /* From beginning of current segment. */ + do { + size_t fsize = folioq_folio_size(folioq, slot); + + if (likely(size < fsize)) + break; + size -= fsize; + slot++; + if (slot >= folioq_nr_slots(folioq) && folioq->next) { + folioq = folioq->next; + slot = 0; + } + } while (size); + + i->iov_offset = size; + i->folioq_slot = slot; + i->folioq = folioq; +} + void iov_iter_advance(struct iov_iter *i, size_t size) { if (unlikely(i->count < size)) @@ -539,12 +572,40 @@ void iov_iter_advance(struct iov_iter *i, size_t size) iov_iter_iovec_advance(i, size); } else if (iov_iter_is_bvec(i)) { iov_iter_bvec_advance(i, size); + } else if (iov_iter_is_folioq(i)) { + iov_iter_folioq_advance(i, size); } else if (iov_iter_is_discard(i)) { i->count -= size; } } EXPORT_SYMBOL(iov_iter_advance); +static void iov_iter_folioq_revert(struct iov_iter *i, size_t unroll) +{ + const struct folio_queue *folioq = i->folioq; + unsigned int slot = i->folioq_slot; + + for (;;) { + size_t fsize; + + if (slot == 0) { + folioq = folioq->prev; + slot = folioq_nr_slots(folioq); + } + slot--; + + fsize = folioq_folio_size(folioq, slot); + if (unroll <= fsize) { + i->iov_offset = fsize - unroll; + break; + } + unroll -= fsize; + } + + i->folioq_slot = slot; + i->folioq = folioq; +} + void iov_iter_revert(struct iov_iter *i, size_t unroll) { if (!unroll) @@ -576,6 +637,9 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll) } unroll -= n; } + } else if (iov_iter_is_folioq(i)) { + i->iov_offset = 0; + iov_iter_folioq_revert(i, unroll); } else { /* same logics for iovec and kvec */ const struct iovec *iov = iter_iov(i); while (1) { @@ -603,6 +667,9 @@ size_t iov_iter_single_seg_count(const struct iov_iter *i) if (iov_iter_is_bvec(i)) return min(i->count, i->bvec->bv_len - i->iov_offset); } + if (unlikely(iov_iter_is_folioq(i))) + return !i->count ? 0 : + umin(folioq_folio_size(i->folioq, i->folioq_slot), i->count); return i->count; } EXPORT_SYMBOL(iov_iter_single_seg_count); @@ -640,6 +707,36 @@ void iov_iter_bvec(struct iov_iter *i, unsigned int direction, EXPORT_SYMBOL(iov_iter_bvec); /** + * iov_iter_folio_queue - Initialise an I/O iterator to use the folios in a folio queue + * @i: The iterator to initialise. + * @direction: The direction of the transfer. + * @folioq: The starting point in the folio queue. + * @first_slot: The first slot in the folio queue to use + * @offset: The offset into the folio in the first slot to start at + * @count: The size of the I/O buffer in bytes. + * + * Set up an I/O iterator to either draw data out of the pages attached to an + * inode or to inject data into those pages. The pages *must* be prevented + * from evaporation, either by taking a ref on them or locking them by the + * caller. + */ +void iov_iter_folio_queue(struct iov_iter *i, unsigned int direction, + const struct folio_queue *folioq, unsigned int first_slot, + unsigned int offset, size_t count) +{ + BUG_ON(direction & ~1); + *i = (struct iov_iter) { + .iter_type = ITER_FOLIOQ, + .data_source = direction, + .folioq = folioq, + .folioq_slot = first_slot, + .count = count, + .iov_offset = offset, + }; +} +EXPORT_SYMBOL(iov_iter_folio_queue); + +/** * iov_iter_xarray - Initialise an I/O iterator to use the pages in an xarray * @i: The iterator to initialise. * @direction: The direction of the transfer. @@ -765,12 +862,19 @@ bool iov_iter_is_aligned(const struct iov_iter *i, unsigned addr_mask, if (iov_iter_is_bvec(i)) return iov_iter_aligned_bvec(i, addr_mask, len_mask); + /* With both xarray and folioq types, we're dealing with whole folios. */ if (iov_iter_is_xarray(i)) { if (i->count & len_mask) return false; if ((i->xarray_start + i->iov_offset) & addr_mask) return false; } + if (iov_iter_is_folioq(i)) { + if (i->count & len_mask) + return false; + if (i->iov_offset & addr_mask) + return false; + } return true; } @@ -835,6 +939,9 @@ unsigned long iov_iter_alignment(const struct iov_iter *i) if (iov_iter_is_bvec(i)) return iov_iter_alignment_bvec(i); + /* With both xarray and folioq types, we're dealing with whole folios. */ + if (iov_iter_is_folioq(i)) + return i->iov_offset | i->count; if (iov_iter_is_xarray(i)) return (i->xarray_start + i->iov_offset) | i->count; @@ -887,6 +994,62 @@ static int want_pages_array(struct page ***res, size_t size, return count; } +static ssize_t iter_folioq_get_pages(struct iov_iter *iter, + struct page ***ppages, size_t maxsize, + unsigned maxpages, size_t *_start_offset) +{ + const struct folio_queue *folioq = iter->folioq; + struct page **pages; + unsigned int slot = iter->folioq_slot; + size_t extracted = 0, count = iter->count, iov_offset = iter->iov_offset; + + if (slot >= folioq_nr_slots(folioq)) { + folioq = folioq->next; + slot = 0; + if (WARN_ON(iov_offset != 0)) + return -EIO; + } + + maxpages = want_pages_array(ppages, maxsize, iov_offset & ~PAGE_MASK, maxpages); + if (!maxpages) + return -ENOMEM; + *_start_offset = iov_offset & ~PAGE_MASK; + pages = *ppages; + + for (;;) { + struct folio *folio = folioq_folio(folioq, slot); + size_t offset = iov_offset, fsize = folioq_folio_size(folioq, slot); + size_t part = PAGE_SIZE - offset % PAGE_SIZE; + + part = umin(part, umin(maxsize - extracted, fsize - offset)); + count -= part; + iov_offset += part; + extracted += part; + + *pages = folio_page(folio, offset / PAGE_SIZE); + get_page(*pages); + pages++; + maxpages--; + if (maxpages == 0 || extracted >= maxsize) + break; + + if (offset >= fsize) { + iov_offset = 0; + slot++; + if (slot == folioq_nr_slots(folioq) && folioq->next) { + folioq = folioq->next; + slot = 0; + } + } + } + + iter->count = count; + iter->iov_offset = iov_offset; + iter->folioq = folioq; + iter->folioq_slot = slot; + return extracted; +} + static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa, pgoff_t index, unsigned int nr_pages) { @@ -1034,6 +1197,8 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, } return maxsize; } + if (iov_iter_is_folioq(i)) + return iter_folioq_get_pages(i, pages, maxsize, maxpages, start); if (iov_iter_is_xarray(i)) return iter_xarray_get_pages(i, pages, maxsize, maxpages, start); return -EFAULT; @@ -1118,6 +1283,11 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages) return iov_npages(i, maxpages); if (iov_iter_is_bvec(i)) return bvec_npages(i, maxpages); + if (iov_iter_is_folioq(i)) { + unsigned offset = i->iov_offset % PAGE_SIZE; + int npages = DIV_ROUND_UP(offset + i->count, PAGE_SIZE); + return min(npages, maxpages); + } if (iov_iter_is_xarray(i)) { unsigned offset = (i->xarray_start + i->iov_offset) % PAGE_SIZE; int npages = DIV_ROUND_UP(offset + i->count, PAGE_SIZE); @@ -1399,6 +1569,68 @@ void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state) } /* + * Extract a list of contiguous pages from an ITER_FOLIOQ iterator. This does + * not get references on the pages, nor does it get a pin on them. + */ +static ssize_t iov_iter_extract_folioq_pages(struct iov_iter *i, + struct page ***pages, size_t maxsize, + unsigned int maxpages, + iov_iter_extraction_t extraction_flags, + size_t *offset0) +{ + const struct folio_queue *folioq = i->folioq; + struct page **p; + unsigned int nr = 0; + size_t extracted = 0, offset, slot = i->folioq_slot; + + if (slot >= folioq_nr_slots(folioq)) { + folioq = folioq->next; + slot = 0; + if (WARN_ON(i->iov_offset != 0)) + return -EIO; + } + + offset = i->iov_offset & ~PAGE_MASK; + *offset0 = offset; + + maxpages = want_pages_array(pages, maxsize, offset, maxpages); + if (!maxpages) + return -ENOMEM; + p = *pages; + + for (;;) { + struct folio *folio = folioq_folio(folioq, slot); + size_t offset = i->iov_offset, fsize = folioq_folio_size(folioq, slot); + size_t part = PAGE_SIZE - offset % PAGE_SIZE; + + if (offset < fsize) { + part = umin(part, umin(maxsize - extracted, fsize - offset)); + i->count -= part; + i->iov_offset += part; + extracted += part; + + p[nr++] = folio_page(folio, offset / PAGE_SIZE); + } + + if (nr >= maxpages || extracted >= maxsize) + break; + + if (i->iov_offset >= fsize) { + i->iov_offset = 0; + slot++; + if (slot == folioq_nr_slots(folioq) && folioq->next) { + folioq = folioq->next; + slot = 0; + } + } + } + + i->folioq = folioq; + i->folioq_slot = slot; + return extracted; +} + +/* * Extract a list of contiguous pages from an ITER_XARRAY iterator. This does not * get references on the pages, nor does it get a pin on them. */ @@ -1618,8 +1850,8 @@ static ssize_t iov_iter_extract_user_pages(struct iov_iter *i, * added to the pages, but refs will not be taken. * iov_iter_extract_will_pin() will return true. * - * (*) If the iterator is ITER_KVEC, ITER_BVEC or ITER_XARRAY, the pages are - * merely listed; no extra refs or pins are obtained. + * (*) If the iterator is ITER_KVEC, ITER_BVEC, ITER_FOLIOQ or ITER_XARRAY, the + * pages are merely listed; no extra refs or pins are obtained. * iov_iter_extract_will_pin() will return 0. * * Note also: @@ -1654,6 +1886,10 @@ ssize_t iov_iter_extract_pages(struct iov_iter *i, return iov_iter_extract_bvec_pages(i, pages, maxsize, maxpages, extraction_flags, offset0); + if (iov_iter_is_folioq(i)) + return iov_iter_extract_folioq_pages(i, pages, maxsize, + maxpages, extraction_flags, + offset0); if (iov_iter_is_xarray(i)) return iov_iter_extract_xarray_pages(i, pages, maxsize, maxpages, extraction_flags, diff --git a/lib/kunit_iov_iter.c b/lib/kunit_iov_iter.c index 27e0c8ee71d8..13e15687675a 100644 --- a/lib/kunit_iov_iter.c +++ b/lib/kunit_iov_iter.c @@ -12,6 +12,7 @@ #include <linux/mm.h> #include <linux/uio.h> #include <linux/bvec.h> +#include <linux/folio_queue.h> #include <kunit/test.h> MODULE_DESCRIPTION("iov_iter testing"); @@ -62,6 +63,9 @@ static void *__init iov_kunit_create_buffer(struct kunit *test, KUNIT_ASSERT_EQ(test, got, npages); } + for (int i = 0; i < npages; i++) + pages[i]->index = i; + buffer = vmap(pages, npages, VM_MAP | VM_MAP_PUT_PAGES, PAGE_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, buffer); @@ -362,6 +366,179 @@ stop: KUNIT_SUCCEED(test); } +static void iov_kunit_destroy_folioq(void *data) +{ + struct folio_queue *folioq, *next; + + for (folioq = data; folioq; folioq = next) { + next = folioq->next; + for (int i = 0; i < folioq_nr_slots(folioq); i++) + if (folioq_folio(folioq, i)) + folio_put(folioq_folio(folioq, i)); + kfree(folioq); + } +} + +static void __init iov_kunit_load_folioq(struct kunit *test, + struct iov_iter *iter, int dir, + struct folio_queue *folioq, + struct page **pages, size_t npages) +{ + struct folio_queue *p = folioq; + size_t size = 0; + int i; + + for (i = 0; i < npages; i++) { + if (folioq_full(p)) { + p->next = kzalloc(sizeof(struct folio_queue), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, p->next); + folioq_init(p->next); + p->next->prev = p; + p = p->next; + } + folioq_append(p, page_folio(pages[i])); + size += PAGE_SIZE; + } + iov_iter_folio_queue(iter, dir, folioq, 0, 0, size); +} + +static struct folio_queue *iov_kunit_create_folioq(struct kunit *test) +{ + struct folio_queue *folioq; + + folioq = kzalloc(sizeof(struct folio_queue), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, folioq); + kunit_add_action_or_reset(test, iov_kunit_destroy_folioq, folioq); + folioq_init(folioq); + return folioq; +} + +/* + * Test copying to a ITER_FOLIOQ-type iterator. + */ +static void __init iov_kunit_copy_to_folioq(struct kunit *test) +{ + const struct kvec_test_range *pr; + struct iov_iter iter; + struct folio_queue *folioq; + struct page **spages, **bpages; + u8 *scratch, *buffer; + size_t bufsize, npages, size, copied; + int i, patt; + + bufsize = 0x100000; + npages = bufsize / PAGE_SIZE; + + folioq = iov_kunit_create_folioq(test); + + scratch = iov_kunit_create_buffer(test, &spages, npages); + for (i = 0; i < bufsize; i++) + scratch[i] = pattern(i); + + buffer = iov_kunit_create_buffer(test, &bpages, npages); + memset(buffer, 0, bufsize); + + iov_kunit_load_folioq(test, &iter, READ, folioq, bpages, npages); + + i = 0; + for (pr = kvec_test_ranges; pr->from >= 0; pr++) { + size = pr->to - pr->from; + KUNIT_ASSERT_LE(test, pr->to, bufsize); + + iov_iter_folio_queue(&iter, READ, folioq, 0, 0, pr->to); + iov_iter_advance(&iter, pr->from); + copied = copy_to_iter(scratch + i, size, &iter); + + KUNIT_EXPECT_EQ(test, copied, size); + KUNIT_EXPECT_EQ(test, iter.count, 0); + KUNIT_EXPECT_EQ(test, iter.iov_offset, pr->to % PAGE_SIZE); + i += size; + if (test->status == KUNIT_FAILURE) + goto stop; + } + + /* Build the expected image in the scratch buffer. */ + patt = 0; + memset(scratch, 0, bufsize); + for (pr = kvec_test_ranges; pr->from >= 0; pr++) + for (i = pr->from; i < pr->to; i++) + scratch[i] = pattern(patt++); + + /* Compare the images */ + for (i = 0; i < bufsize; i++) { + KUNIT_EXPECT_EQ_MSG(test, buffer[i], scratch[i], "at i=%x", i); + if (buffer[i] != scratch[i]) + return; + } + +stop: + KUNIT_SUCCEED(test); +} + +/* + * Test copying from a ITER_FOLIOQ-type iterator. + */ +static void __init iov_kunit_copy_from_folioq(struct kunit *test) +{ + const struct kvec_test_range *pr; + struct iov_iter iter; + struct folio_queue *folioq; + struct page **spages, **bpages; + u8 *scratch, *buffer; + size_t bufsize, npages, size, copied; + int i, j; + + bufsize = 0x100000; + npages = bufsize / PAGE_SIZE; + + folioq = iov_kunit_create_folioq(test); + + buffer = iov_kunit_create_buffer(test, &bpages, npages); + for (i = 0; i < bufsize; i++) + buffer[i] = pattern(i); + + scratch = iov_kunit_create_buffer(test, &spages, npages); + memset(scratch, 0, bufsize); + + iov_kunit_load_folioq(test, &iter, READ, folioq, bpages, npages); + + i = 0; + for (pr = kvec_test_ranges; pr->from >= 0; pr++) { + size = pr->to - pr->from; + KUNIT_ASSERT_LE(test, pr->to, bufsize); + + iov_iter_folio_queue(&iter, WRITE, folioq, 0, 0, pr->to); + iov_iter_advance(&iter, pr->from); + copied = copy_from_iter(scratch + i, size, &iter); + + KUNIT_EXPECT_EQ(test, copied, size); + KUNIT_EXPECT_EQ(test, iter.count, 0); + KUNIT_EXPECT_EQ(test, iter.iov_offset, pr->to % PAGE_SIZE); + i += size; + } + + /* Build the expected image in the main buffer. */ + i = 0; + memset(buffer, 0, bufsize); + for (pr = kvec_test_ranges; pr->from >= 0; pr++) { + for (j = pr->from; j < pr->to; j++) { + buffer[i++] = pattern(j); + if (i >= bufsize) + goto stop; + } + } +stop: + + /* Compare the images */ + for (i = 0; i < bufsize; i++) { + KUNIT_EXPECT_EQ_MSG(test, scratch[i], buffer[i], "at i=%x", i); + if (scratch[i] != buffer[i]) + return; + } + + KUNIT_SUCCEED(test); +} + static void iov_kunit_destroy_xarray(void *data) { struct xarray *xarray = data; @@ -678,6 +855,85 @@ stop: } /* + * Test the extraction of ITER_FOLIOQ-type iterators. + */ +static void __init iov_kunit_extract_pages_folioq(struct kunit *test) +{ + const struct kvec_test_range *pr; + struct folio_queue *folioq; + struct iov_iter iter; + struct page **bpages, *pagelist[8], **pages = pagelist; + ssize_t len; + size_t bufsize, size = 0, npages; + int i, from; + + bufsize = 0x100000; + npages = bufsize / PAGE_SIZE; + + folioq = iov_kunit_create_folioq(test); + + iov_kunit_create_buffer(test, &bpages, npages); + iov_kunit_load_folioq(test, &iter, READ, folioq, bpages, npages); + + for (pr = kvec_test_ranges; pr->from >= 0; pr++) { + from = pr->from; + size = pr->to - from; + KUNIT_ASSERT_LE(test, pr->to, bufsize); + + iov_iter_folio_queue(&iter, WRITE, folioq, 0, 0, pr->to); + iov_iter_advance(&iter, from); + + do { + size_t offset0 = LONG_MAX; + + for (i = 0; i < ARRAY_SIZE(pagelist); i++) + pagelist[i] = (void *)(unsigned long)0xaa55aa55aa55aa55ULL; + + len = iov_iter_extract_pages(&iter, &pages, 100 * 1024, + ARRAY_SIZE(pagelist), 0, &offset0); + KUNIT_EXPECT_GE(test, len, 0); + if (len < 0) + break; + KUNIT_EXPECT_LE(test, len, size); + KUNIT_EXPECT_EQ(test, iter.count, size - len); + if (len == 0) + break; + size -= len; + KUNIT_EXPECT_GE(test, (ssize_t)offset0, 0); + KUNIT_EXPECT_LT(test, offset0, PAGE_SIZE); + + for (i = 0; i < ARRAY_SIZE(pagelist); i++) { + struct page *p; + ssize_t part = min_t(ssize_t, len, PAGE_SIZE - offset0); + int ix; + + KUNIT_ASSERT_GE(test, part, 0); + ix = from / PAGE_SIZE; + KUNIT_ASSERT_LT(test, ix, npages); + p = bpages[ix]; + KUNIT_EXPECT_PTR_EQ(test, pagelist[i], p); + KUNIT_EXPECT_EQ(test, offset0, from % PAGE_SIZE); + from += part; + len -= part; + KUNIT_ASSERT_GE(test, len, 0); + if (len == 0) + break; + offset0 = 0; + } + + if (test->status == KUNIT_FAILURE) + goto stop; + } while (iov_iter_count(&iter) > 0); + + KUNIT_EXPECT_EQ(test, size, 0); + KUNIT_EXPECT_EQ(test, iter.count, 0); + } + +stop: + KUNIT_SUCCEED(test); +} + +/* * Test the extraction of ITER_XARRAY-type iterators. */ static void __init iov_kunit_extract_pages_xarray(struct kunit *test) @@ -761,10 +1017,13 @@ static struct kunit_case __refdata iov_kunit_cases[] = { KUNIT_CASE(iov_kunit_copy_from_kvec), KUNIT_CASE(iov_kunit_copy_to_bvec), KUNIT_CASE(iov_kunit_copy_from_bvec), + KUNIT_CASE(iov_kunit_copy_to_folioq), + KUNIT_CASE(iov_kunit_copy_from_folioq), KUNIT_CASE(iov_kunit_copy_to_xarray), KUNIT_CASE(iov_kunit_copy_from_xarray), KUNIT_CASE(iov_kunit_extract_pages_kvec), KUNIT_CASE(iov_kunit_extract_pages_bvec), + KUNIT_CASE(iov_kunit_extract_pages_folioq), KUNIT_CASE(iov_kunit_extract_pages_xarray), {} }; diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 7bc2220fea80..473b2646f71c 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -11,6 +11,7 @@ #include <linux/kmemleak.h> #include <linux/bvec.h> #include <linux/uio.h> +#include <linux/folio_queue.h> /** * sg_next - return the next scatterlist entry in a list @@ -1262,6 +1263,67 @@ static ssize_t extract_kvec_to_sg(struct iov_iter *iter, } /* + * Extract up to sg_max folios from an FOLIOQ-type iterator and add them to + * the scatterlist. The pages are not pinned. + */ +static ssize_t extract_folioq_to_sg(struct iov_iter *iter, + ssize_t maxsize, + struct sg_table *sgtable, + unsigned int sg_max, + iov_iter_extraction_t extraction_flags) +{ + const struct folio_queue *folioq = iter->folioq; + struct scatterlist *sg = sgtable->sgl + sgtable->nents; + unsigned int slot = iter->folioq_slot; + ssize_t ret = 0; + size_t offset = iter->iov_offset; + + BUG_ON(!folioq); + + if (slot >= folioq_nr_slots(folioq)) { + folioq = folioq->next; + if (WARN_ON_ONCE(!folioq)) + return 0; + slot = 0; + } + + do { + struct folio *folio = folioq_folio(folioq, slot); + size_t fsize = folioq_folio_size(folioq, slot); + + if (offset < fsize) { + size_t part = umin(maxsize - ret, fsize - offset); + + sg_set_page(sg, folio_page(folio, 0), part, offset); + sgtable->nents++; + sg++; + sg_max--; + offset += part; + ret += part; + } + + if (offset >= fsize) { + offset = 0; + slot++; + if (slot >= folioq_nr_slots(folioq)) { + if (!folioq->next) { + WARN_ON_ONCE(ret < iter->count); + break; + } + folioq = folioq->next; + slot = 0; + } + } + } while (sg_max > 0 && ret < maxsize); + + iter->folioq = folioq; + iter->folioq_slot = slot; + iter->iov_offset = offset; + iter->count -= ret; + return ret; +} + +/* * Extract up to sg_max folios from an XARRAY-type iterator and add them to * the scatterlist. The pages are not pinned. */ @@ -1323,8 +1385,8 @@ static ssize_t extract_xarray_to_sg(struct iov_iter *iter, * addition of @sg_max elements. * * The pages referred to by UBUF- and IOVEC-type iterators are extracted and - * pinned; BVEC-, KVEC- and XARRAY-type are extracted but aren't pinned; PIPE- - * and DISCARD-type are not supported. + * pinned; BVEC-, KVEC-, FOLIOQ- and XARRAY-type are extracted but aren't + * pinned; DISCARD-type is not supported. * * No end mark is placed on the scatterlist; that's left to the caller. * @@ -1356,6 +1418,9 @@ ssize_t extract_iter_to_sg(struct iov_iter *iter, size_t maxsize, case ITER_KVEC: return extract_kvec_to_sg(iter, maxsize, sgtable, sg_max, extraction_flags); + case ITER_FOLIOQ: + return extract_folioq_to_sg(iter, maxsize, sgtable, sg_max, + extraction_flags); case ITER_XARRAY: return extract_xarray_to_sg(iter, maxsize, sgtable, sg_max, extraction_flags); |