From ee4cdf7ba857a894ad1650d6ab77669cbbfa329e Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 2 Jul 2024 00:40:22 +0100 Subject: netfs: Speed up buffered reading Improve the efficiency of buffered reads in a number of ways: (1) Overhaul the algorithm in general so that it's a lot more compact and split the read submission code between buffered and unbuffered versions. The unbuffered version can be vastly simplified. (2) Read-result collection is handed off to a work queue rather than being done in the I/O thread. Multiple subrequests can be processes simultaneously. (3) When a subrequest is collected, any folios it fully spans are collected and "spare" data on either side is donated to either the previous or the next subrequest in the sequence. Notes: (*) Readahead expansion is massively slows down fio, presumably because it causes a load of extra allocations, both folio and xarray, up front before RPC requests can be transmitted. (*) RDMA with cifs does appear to work, both with SIW and RXE. (*) PG_private_2-based reading and copy-to-cache is split out into its own file and altered to use folio_queue. Note that the copy to the cache now creates a new write transaction against the cache and adds the folios to be copied into it. This allows it to use part of the writeback I/O code. Signed-off-by: David Howells cc: Jeff Layton cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20240814203850.2240469-20-dhowells@redhat.com/ # v2 Signed-off-by: Christian Brauner --- include/linux/folio_queue.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux/folio_queue.h') diff --git a/include/linux/folio_queue.h b/include/linux/folio_queue.h index 52773613bf23..955680c3bb5f 100644 --- a/include/linux/folio_queue.h +++ b/include/linux/folio_queue.h @@ -27,6 +27,7 @@ struct folio_queue { struct folio_queue *prev; /* Previous queue segment of NULL */ unsigned long marks; /* 1-bit mark per folio */ unsigned long marks2; /* Second 1-bit mark per folio */ + unsigned long marks3; /* Third 1-bit mark per folio */ #if PAGEVEC_SIZE > BITS_PER_LONG #error marks is not big enough #endif @@ -39,6 +40,7 @@ static inline void folioq_init(struct folio_queue *folioq) folioq->prev = NULL; folioq->marks = 0; folioq->marks2 = 0; + folioq->marks3 = 0; } static inline unsigned int folioq_nr_slots(const struct folio_queue *folioq) @@ -87,6 +89,21 @@ static inline void folioq_unmark2(struct folio_queue *folioq, unsigned int slot) clear_bit(slot, &folioq->marks2); } +static inline bool folioq_is_marked3(const struct folio_queue *folioq, unsigned int slot) +{ + return test_bit(slot, &folioq->marks3); +} + +static inline void folioq_mark3(struct folio_queue *folioq, unsigned int slot) +{ + set_bit(slot, &folioq->marks3); +} + +static inline void folioq_unmark3(struct folio_queue *folioq, unsigned int slot) +{ + clear_bit(slot, &folioq->marks3); +} + static inline unsigned int __folio_order(struct folio *folio) { if (!folio_test_large(folio)) @@ -133,6 +150,7 @@ static inline void folioq_clear(struct folio_queue *folioq, unsigned int slot) folioq->vec.folios[slot] = NULL; folioq_unmark(folioq, slot); folioq_unmark2(folioq, slot); + folioq_unmark3(folioq, slot); } #endif /* _LINUX_FOLIO_QUEUE_H */ -- cgit v1.2.3