From f5b23d6704e478b5a97dbba5df9dea96a9cbf847 Mon Sep 17 00:00:00 2001 From: "Fabio M. De Francesco" Date: Tue, 9 Aug 2022 22:31:02 +0200 Subject: hfsplus: unmap the page in the "fail_page" label MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "hfsplus: Replace kmap() with kmap_local_page()". kmap() is being deprecated in favor of kmap_local_page(). There are two main problems with kmap(): (1) It comes with an overhead as mapping space is restricted and protected by a global lock for synchronization and (2) it also requires global TLB invalidation when the kmap’s pool wraps and it might block when the mapping space is fully utilized until a slot becomes available. With kmap_local_page() the mappings are per thread, CPU local, can take page faults, and can be called from any context (including interrupts). It is faster than kmap() in kernels with HIGHMEM enabled. Furthermore, the tasks can be preempted and, when they are scheduled to run again, the kernel virtual addresses are restored and still valid. Since its use in fs/hfsplus is safe everywhere, it should be preferred. Therefore, replace kmap() with kmap_local_page() in fs/hfsplus. Where possible, use the suited standard helpers (memzero_page(), memcpy_page()) instead of open coding kmap_local_page() plus memset() or memcpy(). Fix a bug due to a page being not unmapped if the code jumps to the "fail_page" label (1/4). Tested in a QEMU/KVM x86_32 VM, 6GB RAM, booting a kernel with HIGHMEM64GB enabled. This patch (of 4): Several paths within hfs_btree_open() jump to the "fail_page" label where put_page() is called while the page is still mapped. Call kunmap() to unmap the page soon before put_page(). Link: https://lkml.kernel.org/r/20220809203105.26183-1-fmdefrancesco@gmail.com Link: https://lkml.kernel.org/r/20220809203105.26183-2-fmdefrancesco@gmail.com Signed-off-by: Fabio M. De Francesco Reviewed-by: Ira Weiny Reviewed-by: Viacheslav Dubeyko Cc: Matthew Wilcox Cc: Fabio M. De Francesco Cc: Jens Axboe Cc: Bart Van Assche Cc: Kees Cook Cc: Muchun Song Signed-off-by: Andrew Morton --- fs/hfsplus/btree.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c index 66774f4cb4fd..3a917a9a4edd 100644 --- a/fs/hfsplus/btree.c +++ b/fs/hfsplus/btree.c @@ -245,6 +245,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) return tree; fail_page: + kunmap(page); put_page(page); free_inode: tree->inode->i_mapping->a_ops = &hfsplus_aops; -- cgit v1.2.3 From 6c3014a67a44f11dc1020c8b47a1d1d626f007a9 Mon Sep 17 00:00:00 2001 From: "Fabio M. De Francesco" Date: Tue, 9 Aug 2022 22:31:03 +0200 Subject: hfsplus: convert kmap() to kmap_local_page() in bnode.c kmap() is being deprecated in favor of kmap_local_page(). Two main problems with kmap(): (1) It comes with an overhead as mapping space is restricted and protected by a global lock for synchronization and (2) it also requires global TLB invalidation when the kmap's pool wraps and it might block when the mapping space is fully utilized until a slot becomes available. With kmap_local_page() the mappings are per thread, CPU local, can take page faults, and can be called from any context (including interrupts). It is faster than kmap() in kernels with HIGHMEM enabled. Furthermore, the tasks can be preempted and, when they are scheduled to run again, the kernel virtual addresses are restored and still valid. Since its use in bnode.c is safe everywhere, it should be preferred. Therefore, replace kmap() with kmap_local_page() in bnode.c. Where possible, use the suited standard helpers (memzero_page(), memcpy_page()) instead of open coding kmap_local_page() plus memset() or memcpy(). Tested in a QEMU/KVM x86_32 VM, 6GB RAM, booting a kernel with HIGHMEM64GB enabled. Link: https://lkml.kernel.org/r/20220809203105.26183-3-fmdefrancesco@gmail.com Signed-off-by: Fabio M. De Francesco Suggested-by: Ira Weiny Reviewed-by: Ira Weiny Reviewed-by: Viacheslav Dubeyko Cc: Bart Van Assche Cc: Jens Axboe Cc: Kees Cook Cc: Matthew Wilcox Cc: Muchun Song Signed-off-by: Andrew Morton --- fs/hfsplus/bnode.c | 105 ++++++++++++++++++++++++----------------------------- 1 file changed, 48 insertions(+), 57 deletions(-) (limited to 'fs') diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c index a5ab00e54220..87974d5e6791 100644 --- a/fs/hfsplus/bnode.c +++ b/fs/hfsplus/bnode.c @@ -29,14 +29,12 @@ void hfs_bnode_read(struct hfs_bnode *node, void *buf, int off, int len) off &= ~PAGE_MASK; l = min_t(int, len, PAGE_SIZE - off); - memcpy(buf, kmap(*pagep) + off, l); - kunmap(*pagep); + memcpy_from_page(buf, *pagep, off, l); while ((len -= l) != 0) { buf += l; l = min_t(int, len, PAGE_SIZE); - memcpy(buf, kmap(*++pagep), l); - kunmap(*pagep); + memcpy_from_page(buf, *++pagep, 0, l); } } @@ -82,16 +80,14 @@ void hfs_bnode_write(struct hfs_bnode *node, void *buf, int off, int len) off &= ~PAGE_MASK; l = min_t(int, len, PAGE_SIZE - off); - memcpy(kmap(*pagep) + off, buf, l); + memcpy_to_page(*pagep, off, buf, l); set_page_dirty(*pagep); - kunmap(*pagep); while ((len -= l) != 0) { buf += l; l = min_t(int, len, PAGE_SIZE); - memcpy(kmap(*++pagep), buf, l); + memcpy_to_page(*++pagep, 0, buf, l); set_page_dirty(*pagep); - kunmap(*pagep); } } @@ -112,15 +108,13 @@ void hfs_bnode_clear(struct hfs_bnode *node, int off, int len) off &= ~PAGE_MASK; l = min_t(int, len, PAGE_SIZE - off); - memset(kmap(*pagep) + off, 0, l); + memzero_page(*pagep, off, l); set_page_dirty(*pagep); - kunmap(*pagep); while ((len -= l) != 0) { l = min_t(int, len, PAGE_SIZE); - memset(kmap(*++pagep), 0, l); + memzero_page(*++pagep, 0, l); set_page_dirty(*pagep); - kunmap(*pagep); } } @@ -142,24 +136,20 @@ void hfs_bnode_copy(struct hfs_bnode *dst_node, int dst, if (src == dst) { l = min_t(int, len, PAGE_SIZE - src); - memcpy(kmap(*dst_page) + src, kmap(*src_page) + src, l); - kunmap(*src_page); + memcpy_page(*dst_page, src, *src_page, src, l); set_page_dirty(*dst_page); - kunmap(*dst_page); while ((len -= l) != 0) { l = min_t(int, len, PAGE_SIZE); - memcpy(kmap(*++dst_page), kmap(*++src_page), l); - kunmap(*src_page); + memcpy_page(*++dst_page, 0, *++src_page, 0, l); set_page_dirty(*dst_page); - kunmap(*dst_page); } } else { void *src_ptr, *dst_ptr; do { - src_ptr = kmap(*src_page) + src; - dst_ptr = kmap(*dst_page) + dst; + dst_ptr = kmap_local_page(*dst_page) + dst; + src_ptr = kmap_local_page(*src_page) + src; if (PAGE_SIZE - src < PAGE_SIZE - dst) { l = PAGE_SIZE - src; src = 0; @@ -171,9 +161,9 @@ void hfs_bnode_copy(struct hfs_bnode *dst_node, int dst, } l = min(len, l); memcpy(dst_ptr, src_ptr, l); - kunmap(*src_page); + kunmap_local(src_ptr); set_page_dirty(*dst_page); - kunmap(*dst_page); + kunmap_local(dst_ptr); if (!dst) dst_page++; else @@ -185,6 +175,7 @@ void hfs_bnode_copy(struct hfs_bnode *dst_node, int dst, void hfs_bnode_move(struct hfs_bnode *node, int dst, int src, int len) { struct page **src_page, **dst_page; + void *src_ptr, *dst_ptr; int l; hfs_dbg(BNODE_MOD, "movebytes: %u,%u,%u\n", dst, src, len); @@ -202,27 +193,28 @@ void hfs_bnode_move(struct hfs_bnode *node, int dst, int src, int len) if (src == dst) { while (src < len) { - memmove(kmap(*dst_page), kmap(*src_page), src); - kunmap(*src_page); + dst_ptr = kmap_local_page(*dst_page); + src_ptr = kmap_local_page(*src_page); + memmove(dst_ptr, src_ptr, src); + kunmap_local(src_ptr); set_page_dirty(*dst_page); - kunmap(*dst_page); + kunmap_local(dst_ptr); len -= src; src = PAGE_SIZE; src_page--; dst_page--; } src -= len; - memmove(kmap(*dst_page) + src, - kmap(*src_page) + src, len); - kunmap(*src_page); + dst_ptr = kmap_local_page(*dst_page); + src_ptr = kmap_local_page(*src_page); + memmove(dst_ptr + src, src_ptr + src, len); + kunmap_local(src_ptr); set_page_dirty(*dst_page); - kunmap(*dst_page); + kunmap_local(dst_ptr); } else { - void *src_ptr, *dst_ptr; - do { - src_ptr = kmap(*src_page) + src; - dst_ptr = kmap(*dst_page) + dst; + dst_ptr = kmap_local_page(*dst_page) + dst; + src_ptr = kmap_local_page(*src_page) + src; if (src < dst) { l = src; src = PAGE_SIZE; @@ -234,9 +226,9 @@ void hfs_bnode_move(struct hfs_bnode *node, int dst, int src, int len) } l = min(len, l); memmove(dst_ptr - l, src_ptr - l, l); - kunmap(*src_page); + kunmap_local(src_ptr); set_page_dirty(*dst_page); - kunmap(*dst_page); + kunmap_local(dst_ptr); if (dst == PAGE_SIZE) dst_page--; else @@ -251,26 +243,27 @@ void hfs_bnode_move(struct hfs_bnode *node, int dst, int src, int len) if (src == dst) { l = min_t(int, len, PAGE_SIZE - src); - memmove(kmap(*dst_page) + src, - kmap(*src_page) + src, l); - kunmap(*src_page); + + dst_ptr = kmap_local_page(*dst_page) + src; + src_ptr = kmap_local_page(*src_page) + src; + memmove(dst_ptr, src_ptr, l); + kunmap_local(src_ptr); set_page_dirty(*dst_page); - kunmap(*dst_page); + kunmap_local(dst_ptr); while ((len -= l) != 0) { l = min_t(int, len, PAGE_SIZE); - memmove(kmap(*++dst_page), - kmap(*++src_page), l); - kunmap(*src_page); + dst_ptr = kmap_local_page(*++dst_page); + src_ptr = kmap_local_page(*++src_page); + memmove(dst_ptr, src_ptr, l); + kunmap_local(src_ptr); set_page_dirty(*dst_page); - kunmap(*dst_page); + kunmap_local(dst_ptr); } } else { - void *src_ptr, *dst_ptr; - do { - src_ptr = kmap(*src_page) + src; - dst_ptr = kmap(*dst_page) + dst; + dst_ptr = kmap_local_page(*dst_page) + dst; + src_ptr = kmap_local_page(*src_page) + src; if (PAGE_SIZE - src < PAGE_SIZE - dst) { l = PAGE_SIZE - src; @@ -283,9 +276,9 @@ void hfs_bnode_move(struct hfs_bnode *node, int dst, int src, int len) } l = min(len, l); memmove(dst_ptr, src_ptr, l); - kunmap(*src_page); + kunmap_local(src_ptr); set_page_dirty(*dst_page); - kunmap(*dst_page); + kunmap_local(dst_ptr); if (!dst) dst_page++; else @@ -498,14 +491,14 @@ struct hfs_bnode *hfs_bnode_find(struct hfs_btree *tree, u32 num) if (!test_bit(HFS_BNODE_NEW, &node->flags)) return node; - desc = (struct hfs_bnode_desc *)(kmap(node->page[0]) + - node->page_offset); + desc = (struct hfs_bnode_desc *)(kmap_local_page(node->page[0]) + + node->page_offset); node->prev = be32_to_cpu(desc->prev); node->next = be32_to_cpu(desc->next); node->num_recs = be16_to_cpu(desc->num_recs); node->type = desc->type; node->height = desc->height; - kunmap(node->page[0]); + kunmap_local(desc); switch (node->type) { case HFS_NODE_HEADER: @@ -589,14 +582,12 @@ struct hfs_bnode *hfs_bnode_create(struct hfs_btree *tree, u32 num) } pagep = node->page; - memset(kmap(*pagep) + node->page_offset, 0, - min_t(int, PAGE_SIZE, tree->node_size)); + memzero_page(*pagep, node->page_offset, + min_t(int, PAGE_SIZE, tree->node_size)); set_page_dirty(*pagep); - kunmap(*pagep); for (i = 1; i < tree->pages_per_bnode; i++) { - memset(kmap(*++pagep), 0, PAGE_SIZE); + memzero_page(*++pagep, 0, PAGE_SIZE); set_page_dirty(*pagep); - kunmap(*pagep); } clear_bit(HFS_BNODE_NEW, &node->flags); wake_up(&node->lock_wq); -- cgit v1.2.3 From f9ef3b95a305874de0b36b7294f6cc8a0e07951e Mon Sep 17 00:00:00 2001 From: "Fabio M. De Francesco" Date: Tue, 9 Aug 2022 22:31:04 +0200 Subject: hfsplus: convert kmap() to kmap_local_page() in bitmap.c kmap() is being deprecated in favor of kmap_local_page(). There are two main problems with kmap(): (1) It comes with an overhead as mapping space is restricted and protected by a global lock for synchronization and (2) it also requires global TLB invalidation when the kmap's pool wraps and it might block when the mapping space is fully utilized until a slot becomes available. With kmap_local_page() the mappings are per thread, CPU local, can take page faults, and can be called from any context (including interrupts). It is faster than kmap() in kernels with HIGHMEM enabled. Furthermore, the tasks can be preempted and, when they are scheduled to run again, the kernel virtual addresses are restored and are still valid. Since its use in bitmap.c is safe everywhere, it should be preferred. Therefore, replace kmap() with kmap_local_page() in bitmap.c. Tested in a QEMU/KVM x86_32 VM, 6GB RAM, booting a kernel with HIGHMEM64GB enabled. Link: https://lkml.kernel.org/r/20220809203105.26183-4-fmdefrancesco@gmail.com Signed-off-by: Fabio M. De Francesco Suggested-by: Ira Weiny Reviewed-by: Ira Weiny Reviewed-by: Viacheslav Dubeyko Cc: Bart Van Assche Cc: Jens Axboe Cc: Kees Cook Cc: Matthew Wilcox Cc: Muchun Song Signed-off-by: Andrew Morton --- fs/hfsplus/bitmap.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/hfsplus/bitmap.c b/fs/hfsplus/bitmap.c index cebce0cfe340..bd8dcea85588 100644 --- a/fs/hfsplus/bitmap.c +++ b/fs/hfsplus/bitmap.c @@ -39,7 +39,7 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size, start = size; goto out; } - pptr = kmap(page); + pptr = kmap_local_page(page); curr = pptr + (offset & (PAGE_CACHE_BITS - 1)) / 32; i = offset % 32; offset &= ~(PAGE_CACHE_BITS - 1); @@ -74,7 +74,7 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size, } curr++; } - kunmap(page); + kunmap_local(pptr); offset += PAGE_CACHE_BITS; if (offset >= size) break; @@ -84,7 +84,7 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size, start = size; goto out; } - curr = pptr = kmap(page); + curr = pptr = kmap_local_page(page); if ((size ^ offset) / PAGE_CACHE_BITS) end = pptr + PAGE_CACHE_BITS / 32; else @@ -127,7 +127,7 @@ found: len -= 32; } set_page_dirty(page); - kunmap(page); + kunmap_local(pptr); offset += PAGE_CACHE_BITS; page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS, NULL); @@ -135,7 +135,7 @@ found: start = size; goto out; } - pptr = kmap(page); + pptr = kmap_local_page(page); curr = pptr; end = pptr + PAGE_CACHE_BITS / 32; } @@ -151,7 +151,7 @@ last: done: *curr = cpu_to_be32(n); set_page_dirty(page); - kunmap(page); + kunmap_local(pptr); *max = offset + (curr - pptr) * 32 + i - start; sbi->free_blocks -= *max; hfsplus_mark_mdb_dirty(sb); @@ -185,7 +185,7 @@ int hfsplus_block_free(struct super_block *sb, u32 offset, u32 count) page = read_mapping_page(mapping, pnr, NULL); if (IS_ERR(page)) goto kaboom; - pptr = kmap(page); + pptr = kmap_local_page(page); curr = pptr + (offset & (PAGE_CACHE_BITS - 1)) / 32; end = pptr + PAGE_CACHE_BITS / 32; len = count; @@ -215,11 +215,11 @@ int hfsplus_block_free(struct super_block *sb, u32 offset, u32 count) if (!count) break; set_page_dirty(page); - kunmap(page); + kunmap_local(pptr); page = read_mapping_page(mapping, ++pnr, NULL); if (IS_ERR(page)) goto kaboom; - pptr = kmap(page); + pptr = kmap_local_page(page); curr = pptr; end = pptr + PAGE_CACHE_BITS / 32; } @@ -231,7 +231,7 @@ done: } out: set_page_dirty(page); - kunmap(page); + kunmap_local(pptr); sbi->free_blocks += len; hfsplus_mark_mdb_dirty(sb); mutex_unlock(&sbi->alloc_mutex); -- cgit v1.2.3 From 9f25f357c557bfea39a2d6a629a6317d2b3dfc64 Mon Sep 17 00:00:00 2001 From: "Fabio M. De Francesco" Date: Tue, 9 Aug 2022 22:31:05 +0200 Subject: hfsplus: convert kmap() to kmap_local_page() in btree.c kmap() is being deprecated in favor of kmap_local_page(). There are two main problems with kmap(): (1) It comes with an overhead as mapping space is restricted and protected by a global lock for synchronization and (2) it also requires global TLB invalidation when the kmap's pool wraps and it might block when the mapping space is fully utilized until a slot becomes available. With kmap_local_page() the mappings are per thread, CPU local, can take page faults, and can be called from any context (including interrupts). It is faster than kmap() in kernels with HIGHMEM enabled. Furthermore, the tasks can be preempted and, when they are scheduled to run again, the kernel virtual addresses are restored and are still valid. Since its use in btree.c is safe everywhere, it should be preferred. Therefore, replace kmap() with kmap_local_page() in btree.c. Tested in a QEMU/KVM x86_32 VM, 6GB RAM, booting a kernel with HIGHMEM64GB enabled. Link: https://lkml.kernel.org/r/20220809203105.26183-5-fmdefrancesco@gmail.com Signed-off-by: Fabio M. De Francesco Suggested-by: Ira Weiny Reviewed-by: Ira Weiny Reviewed-by: Viacheslav Dubeyko Cc: Bart Van Assche Cc: Jens Axboe Cc: Kees Cook Cc: Matthew Wilcox Cc: Muchun Song Signed-off-by: Andrew Morton --- fs/hfsplus/btree.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c index 3a917a9a4edd..9e1732a2b92a 100644 --- a/fs/hfsplus/btree.c +++ b/fs/hfsplus/btree.c @@ -163,7 +163,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) goto free_inode; /* Load the header */ - head = (struct hfs_btree_header_rec *)(kmap(page) + + head = (struct hfs_btree_header_rec *)(kmap_local_page(page) + sizeof(struct hfs_bnode_desc)); tree->root = be32_to_cpu(head->root); tree->leaf_count = be32_to_cpu(head->leaf_count); @@ -240,12 +240,12 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) (tree->node_size + PAGE_SIZE - 1) >> PAGE_SHIFT; - kunmap(page); + kunmap_local(head); put_page(page); return tree; fail_page: - kunmap(page); + kunmap_local(head); put_page(page); free_inode: tree->inode->i_mapping->a_ops = &hfsplus_aops; @@ -292,7 +292,7 @@ int hfs_btree_write(struct hfs_btree *tree) return -EIO; /* Load the header */ page = node->page[0]; - head = (struct hfs_btree_header_rec *)(kmap(page) + + head = (struct hfs_btree_header_rec *)(kmap_local_page(page) + sizeof(struct hfs_bnode_desc)); head->root = cpu_to_be32(tree->root); @@ -304,7 +304,7 @@ int hfs_btree_write(struct hfs_btree *tree) head->attributes = cpu_to_be32(tree->attributes); head->depth = cpu_to_be16(tree->depth); - kunmap(page); + kunmap_local(head); set_page_dirty(page); hfs_bnode_put(node); return 0; @@ -395,7 +395,7 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree) off += node->page_offset; pagep = node->page + (off >> PAGE_SHIFT); - data = kmap(*pagep); + data = kmap_local_page(*pagep); off &= ~PAGE_MASK; idx = 0; @@ -408,7 +408,7 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree) idx += i; data[off] |= m; set_page_dirty(*pagep); - kunmap(*pagep); + kunmap_local(data); tree->free_nodes--; mark_inode_dirty(tree->inode); hfs_bnode_put(node); @@ -418,14 +418,14 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree) } } if (++off >= PAGE_SIZE) { - kunmap(*pagep); - data = kmap(*++pagep); + kunmap_local(data); + data = kmap_local_page(*++pagep); off = 0; } idx += 8; len--; } - kunmap(*pagep); + kunmap_local(data); nidx = node->next; if (!nidx) { hfs_dbg(BNODE_MOD, "create new bmap node\n"); @@ -441,7 +441,7 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree) off = off16; off += node->page_offset; pagep = node->page + (off >> PAGE_SHIFT); - data = kmap(*pagep); + data = kmap_local_page(*pagep); off &= ~PAGE_MASK; } } @@ -491,7 +491,7 @@ void hfs_bmap_free(struct hfs_bnode *node) } off += node->page_offset + nidx / 8; page = node->page[off >> PAGE_SHIFT]; - data = kmap(page); + data = kmap_local_page(page); off &= ~PAGE_MASK; m = 1 << (~nidx & 7); byte = data[off]; @@ -499,13 +499,13 @@ void hfs_bmap_free(struct hfs_bnode *node) pr_crit("trying to free free bnode " "%u(%d)\n", node->this, node->type); - kunmap(page); + kunmap_local(data); hfs_bnode_put(node); return; } data[off] = byte & ~m; set_page_dirty(page); - kunmap(page); + kunmap_local(data); hfs_bnode_put(node); tree->free_nodes++; mark_inode_dirty(tree->inode); -- cgit v1.2.3 From 5bb6ce3aeb02497668a4e8971268b041aa61de8d Mon Sep 17 00:00:00 2001 From: "Fabio M. De Francesco" Date: Mon, 1 Aug 2022 14:27:09 +0200 Subject: fs/isofs: replace kmap() with kmap_local_page() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The use of kmap() is being deprecated in favor of kmap_local_page(). There are two main problems with kmap(): (1) It comes with an overhead as mapping space is restricted and protected by a global lock for synchronization and (2) it also requires global TLB invalidation when the kmap's pool wraps and it might block when the mapping space is fully utilized until a slot becomes available. With kmap_local_page() the mappings are per thread, CPU local, can take page faults, and can be called from any context (including interrupts). Tasks can be preempted and, when scheduled to run again, the kernel virtual addresses are restored and still valid. It is faster than kmap() in kernels with HIGHMEM enabled. Since kmap_local_page() can be safely used in compress.c, it should be called everywhere instead of kmap(). Therefore, replace kmap() with kmap_local_page() in compress.c. Where it is needed, use memzero_page() instead of open coding kmap_local_page() plus memset() to fill the pages with zeros. Delete the redundant flush_dcache_page() in the two call sites of memzero_page(). Tested with mkisofs on a QEMU/KVM x86_32 VM, 6GB RAM, booting a kernel with HIGHMEM64GB enabled. Link: https://lkml.kernel.org/r/20220801122709.8164-1-fmdefrancesco@gmail.com Signed-off-by: Fabio M. De Francesco Suggested-by: Ira Weiny Reviewed-by: Jan Kara Cc: Matthew Wilcox (Oracle) Cc: Roman Gushchin Cc: Pali Rohár Cc: Muchun Song Cc: Theodore Ts'o Signed-off-by: Andrew Morton --- fs/isofs/compress.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c index b466172eec25..09285e82eb08 100644 --- a/fs/isofs/compress.c +++ b/fs/isofs/compress.c @@ -67,8 +67,7 @@ static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start, for ( i = 0 ; i < pcount ; i++ ) { if (!pages[i]) continue; - memset(page_address(pages[i]), 0, PAGE_SIZE); - flush_dcache_page(pages[i]); + memzero_page(pages[i], 0, PAGE_SIZE); SetPageUptodate(pages[i]); } return ((loff_t)pcount) << PAGE_SHIFT; @@ -120,7 +119,7 @@ static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start, zerr != Z_STREAM_END) { if (!stream.avail_out) { if (pages[curpage]) { - stream.next_out = page_address(pages[curpage]) + stream.next_out = kmap_local_page(pages[curpage]) + poffset; stream.avail_out = PAGE_SIZE - poffset; poffset = 0; @@ -176,6 +175,10 @@ static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start, flush_dcache_page(pages[curpage]); SetPageUptodate(pages[curpage]); } + if (stream.next_out != (unsigned char *)zisofs_sink_page) { + kunmap_local(stream.next_out); + stream.next_out = NULL; + } curpage++; } if (!stream.avail_in) @@ -183,6 +186,8 @@ static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start, } inflate_out: zlib_inflateEnd(&stream); + if (stream.next_out && stream.next_out != (unsigned char *)zisofs_sink_page) + kunmap_local(stream.next_out); z_eio: mutex_unlock(&zisofs_zlib_lock); @@ -283,9 +288,7 @@ static int zisofs_fill_pages(struct inode *inode, int full_page, int pcount, } if (poffset && *pages) { - memset(page_address(*pages) + poffset, 0, - PAGE_SIZE - poffset); - flush_dcache_page(*pages); + memzero_page(*pages, poffset, PAGE_SIZE - poffset); SetPageUptodate(*pages); } return 0; @@ -343,10 +346,8 @@ static int zisofs_read_folio(struct file *file, struct folio *folio) for (i = 0; i < pcount; i++, index++) { if (i != full_page) pages[i] = grab_cache_page_nowait(mapping, index); - if (pages[i]) { + if (pages[i]) ClearPageError(pages[i]); - kmap(pages[i]); - } } err = zisofs_fill_pages(inode, full_page, pcount, pages); @@ -357,7 +358,6 @@ static int zisofs_read_folio(struct file *file, struct folio *folio) flush_dcache_page(pages[i]); if (i == full_page && err) SetPageError(pages[i]); - kunmap(pages[i]); unlock_page(pages[i]); if (i != full_page) put_page(pages[i]); -- cgit v1.2.3 From cba7543e1515e79a85d37df85a0eb2cf0f07d115 Mon Sep 17 00:00:00 2001 From: Minghao Chi Date: Fri, 19 Aug 2022 08:18:19 +0000 Subject: fs/qnx6: delete unnecessary checks before brelse() brelse() tests whether its argument is NULL and then returns immediately. Thus remove the tests which are not needed around the shown calls. Link: https://lkml.kernel.org/r/20220819081819.96347-1-chi.minghao@zte.com.cn Signed-off-by: Minghao Chi Reported-by: Zeal Robot Cc: CGEL ZTE Cc: Matthew Wilcox Cc: Minghao Chi Cc: Muchun Song Cc: Theodore Ts'o Signed-off-by: Andrew Morton --- fs/qnx6/inode.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c index b9895afca9d1..85b2fa3b211c 100644 --- a/fs/qnx6/inode.c +++ b/fs/qnx6/inode.c @@ -470,10 +470,8 @@ out2: out1: iput(sbi->inodes); out: - if (bh1) - brelse(bh1); - if (bh2) - brelse(bh2); + brelse(bh1); + brelse(bh2); outnobh: kfree(qs); s->s_fs_info = NULL; -- cgit v1.2.3 From 693fc06e98514c2d5951ead4aca40cf8b21100b1 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Thu, 14 Jul 2022 19:32:55 +0200 Subject: epoll: use try_cmpxchg in list_add_tail_lockless Use try_cmpxchg instead of cmpxchg (*ptr, old, new) == old in list_add_tail_lockless. x86 CMPXCHG instruction returns success in ZF flag, so this change saves a compare after cmpxchg (and related move instruction in front of cmpxchg). No functional change intended. Link: https://lkml.kernel.org/r/20220714173255.12987-1-ubizjak@gmail.com Signed-off-by: Uros Bizjak Cc: Alexander Viro Signed-off-by: Andrew Morton --- fs/eventpoll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 8b56b94e2f56..52954d4637b5 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1065,7 +1065,7 @@ static inline bool list_add_tail_lockless(struct list_head *new, * added to the list from another CPU: the winner observes * new->next == new. */ - if (cmpxchg(&new->next, new, head) != new) + if (!try_cmpxchg(&new->next, &new, head)) return false; /* -- cgit v1.2.3 From b0192296b45232872720d969366449c001ab1f4a Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Thu, 14 Jul 2022 19:16:53 +0200 Subject: buffer: use try_cmpxchg in discard_buffer Use try_cmpxchg instead of cmpxchg (*ptr, old, new) == old in discard_buffer. x86 CMPXCHG instruction returns success in ZF flag, so this change saves a compare after cmpxchg (and related move instruction in front of cmpxchg). Also, try_cmpxchg implicitly assigns old *ptr value to "old" when cmpxchg fails, enabling further code simplifications. Note that the value from *ptr should be read using READ_ONCE to prevent the compiler from merging, refetching or reordering the read. No functional change intended. Link: https://lkml.kernel.org/r/20220714171653.12128-1-ubizjak@gmail.com Signed-off-by: Uros Bizjak Cc: Alexander Viro Signed-off-by: Andrew Morton --- fs/buffer.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/buffer.c b/fs/buffer.c index 55e762a58eb6..2fd98bb7d74e 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1464,19 +1464,15 @@ EXPORT_SYMBOL(set_bh_page); static void discard_buffer(struct buffer_head * bh) { - unsigned long b_state, b_state_old; + unsigned long b_state; lock_buffer(bh); clear_buffer_dirty(bh); bh->b_bdev = NULL; - b_state = bh->b_state; - for (;;) { - b_state_old = cmpxchg(&bh->b_state, b_state, - (b_state & ~BUFFER_FLAGS_DISCARD)); - if (b_state_old == b_state) - break; - b_state = b_state_old; - } + b_state = READ_ONCE(bh->b_state); + do { + } while (!try_cmpxchg(&bh->b_state, &b_state, + b_state & ~BUFFER_FLAGS_DISCARD)); unlock_buffer(bh); } -- cgit v1.2.3 From 38ace0d513d9d2556beca4d07102d25e9a73c53c Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Thu, 14 Jul 2022 18:48:51 +0200 Subject: aio: use atomic_try_cmpxchg in __get_reqs_available Use atomic_try_cmpxchg instead of atomic_cmpxchg (*ptr, old, new) == old in __get_reqs_available. x86 CMPXCHG instruction returns success in ZF flag, so this change saves a compare after cmpxchg (and related move instruction in front of cmpxchg). Also, atomic_try_cmpxchg implicitly assigns old *ptr value to "old" when cmpxchg fails, enabling further code simplifications. No functional change intended. Link: https://lkml.kernel.org/r/20220714164851.3055-1-ubizjak@gmail.com Signed-off-by: Uros Bizjak Cc: Benjamin LaHaise Cc: Alexander Viro Signed-off-by: Andrew Morton --- fs/aio.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/aio.c b/fs/aio.c index 606613e9d1f4..5b2ff20ad322 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -951,16 +951,13 @@ static bool __get_reqs_available(struct kioctx *ctx) local_irq_save(flags); kcpu = this_cpu_ptr(ctx->cpu); if (!kcpu->reqs_available) { - int old, avail = atomic_read(&ctx->reqs_available); + int avail = atomic_read(&ctx->reqs_available); do { if (avail < ctx->req_batch) goto out; - - old = avail; - avail = atomic_cmpxchg(&ctx->reqs_available, - avail, avail - ctx->req_batch); - } while (avail != old); + } while (!atomic_try_cmpxchg(&ctx->reqs_available, + &avail, avail - ctx->req_batch)); kcpu->reqs_available += ctx->req_batch; } -- cgit v1.2.3 From d75e9a4bccf4e19928534dec797935e650f85b09 Mon Sep 17 00:00:00 2001 From: "Fabio M. De Francesco" Date: Sun, 21 Aug 2022 20:03:58 +0200 Subject: hfs: unmap the page in the "fail_page" label Patch series "hfs: Replace kmap() with kmap_local_page()". kmap() is being deprecated in favor of kmap_local_page(). There are two main problems with kmap(): (1) It comes with an overhead as mapping space is restricted and protected by a global lock for synchronization and (2) it also requires global TLB invalidation when the kmaps pool wraps and it might block when the mapping space is fully utilized until a slot becomes available. With kmap_local_page() the mappings are per thread, CPU local, can take page faults, and can be called from any context (including interrupts). It is faster than kmap() in kernels with HIGHMEM enabled. Furthermore, the tasks can be preempted and, when they are scheduled to run again, the kernel virtual addresses are restored and still valid. Since its use in fs/hfs is safe everywhere, it should be preferred. Therefore, replace kmap() with kmap_local_page() in fs/hfs. Where possible, use the suited standard helpers (memzero_page(), memcpy_page()) instead of open coding kmap_local_page() plus memset() or memcpy(). Fix a bug due to a page being not unmapped if the code jumps to the "fail_page" label (1/3). Tested in a QEMU/KVM x86_32 VM, 6GB RAM, booting a kernel with HIGHMEM64GB enabled. This patch (of 3): Several paths within hfs_btree_open() jump to the "fail_page" label where put_page() is called while the page is still mapped. Call kunmap() to unmap the page soon before put_page(). Link: https://lkml.kernel.org/r/20220821180400.8198-1-fmdefrancesco@gmail.com Link: https://lkml.kernel.org/r/20220821180400.8198-2-fmdefrancesco@gmail.com Signed-off-by: Fabio M. De Francesco Reviewed-by: Ira Weiny Reviewed-by: Viacheslav Dubeyko Cc: Arnd Bergmann Cc: Chaitanya Kulkarni Cc: Christian Brauner (Microsoft) Cc: Damien Le Moal Cc: Matthew Wilcox ] Cc: Jeff Layton Cc: Jens Axboe Cc: Kees Cook Cc: Martin K. Petersen Cc: Muchun Song Cc: Roman Gushchin Cc: Theodore Ts'o Signed-off-by: Andrew Morton --- fs/hfs/btree.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c index 19017d296173..56c6782436e9 100644 --- a/fs/hfs/btree.c +++ b/fs/hfs/btree.c @@ -124,6 +124,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke return tree; fail_page: + kunmap(page); put_page(page); free_inode: tree->inode->i_mapping->a_ops = &hfs_aops; -- cgit v1.2.3 From ca0ac8dfd35b218b3c95d3b38c695fbff35d94ca Mon Sep 17 00:00:00 2001 From: "Fabio M. De Francesco" Date: Sun, 21 Aug 2022 20:03:59 +0200 Subject: hfs: replace kmap() with kmap_local_page() in bnode.c kmap() is being deprecated in favor of kmap_local_page(). Two main problems with kmap(): (1) It comes with an overhead as mapping space is restricted and protected by a global lock for synchronization and (2) it also requires global TLB invalidation when the kmap's pool wraps and it might block when the mapping space is fully utilized until a slot becomes available. With kmap_local_page() the mappings are per thread, CPU local, can take page faults, and can be called from any context (including interrupts). It is faster than kmap() in kernels with HIGHMEM enabled. Furthermore, the tasks can be preempted and, when they are scheduled to run again, the kernel virtual addresses are restored and still valid. Since its use in bnode.c is safe everywhere, it should be preferred. Therefore, replace kmap() with kmap_local_page() in bnode.c. Where possible, use the suited standard helpers (memzero_page(), memcpy_page()) instead of open coding kmap_local_page() plus memset() or memcpy(). Tested in a QEMU/KVM x86_32 VM, 6GB RAM, booting a kernel with HIGHMEM64GB enabled. Link: https://lkml.kernel.org/r/20220821180400.8198-3-fmdefrancesco@gmail.com Signed-off-by: Fabio M. De Francesco Suggested-by: Ira Weiny Reviewed-by: Viacheslav Dubeyko Cc: Arnd Bergmann Cc: Chaitanya Kulkarni Cc: Christian Brauner (Microsoft) Cc: Damien Le Moal Cc: Jeff Layton Cc: Jens Axboe Cc: Kees Cook Cc: Martin K. Petersen Cc: Matthew Wilcox Cc: Muchun Song Cc: Roman Gushchin Cc: Theodore Ts'o Signed-off-by: Andrew Morton --- fs/hfs/bnode.c | 32 ++++++++++++-------------------- 1 file changed, 12 insertions(+), 20 deletions(-) (limited to 'fs') diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c index c83fd0e8404d..2015e42e752a 100644 --- a/fs/hfs/bnode.c +++ b/fs/hfs/bnode.c @@ -21,7 +21,6 @@ void hfs_bnode_read(struct hfs_bnode *node, void *buf, int off, int len) int pagenum; int bytes_read; int bytes_to_read; - void *vaddr; off += node->page_offset; pagenum = off >> PAGE_SHIFT; @@ -33,9 +32,7 @@ void hfs_bnode_read(struct hfs_bnode *node, void *buf, int off, int len) page = node->page[pagenum]; bytes_to_read = min_t(int, len - bytes_read, PAGE_SIZE - off); - vaddr = kmap_atomic(page); - memcpy(buf + bytes_read, vaddr + off, bytes_to_read); - kunmap_atomic(vaddr); + memcpy_from_page(buf + bytes_read, page, off, bytes_to_read); pagenum++; off = 0; /* page offset only applies to the first page */ @@ -80,8 +77,7 @@ void hfs_bnode_write(struct hfs_bnode *node, void *buf, int off, int len) off += node->page_offset; page = node->page[0]; - memcpy(kmap(page) + off, buf, len); - kunmap(page); + memcpy_to_page(page, off, buf, len); set_page_dirty(page); } @@ -105,8 +101,7 @@ void hfs_bnode_clear(struct hfs_bnode *node, int off, int len) off += node->page_offset; page = node->page[0]; - memset(kmap(page) + off, 0, len); - kunmap(page); + memzero_page(page, off, len); set_page_dirty(page); } @@ -123,9 +118,7 @@ void hfs_bnode_copy(struct hfs_bnode *dst_node, int dst, src_page = src_node->page[0]; dst_page = dst_node->page[0]; - memcpy(kmap(dst_page) + dst, kmap(src_page) + src, len); - kunmap(src_page); - kunmap(dst_page); + memcpy_page(dst_page, dst, src_page, src, len); set_page_dirty(dst_page); } @@ -140,9 +133,9 @@ void hfs_bnode_move(struct hfs_bnode *node, int dst, int src, int len) src += node->page_offset; dst += node->page_offset; page = node->page[0]; - ptr = kmap(page); + ptr = kmap_local_page(page); memmove(ptr + dst, ptr + src, len); - kunmap(page); + kunmap_local(ptr); set_page_dirty(page); } @@ -346,13 +339,14 @@ struct hfs_bnode *hfs_bnode_find(struct hfs_btree *tree, u32 num) if (!test_bit(HFS_BNODE_NEW, &node->flags)) return node; - desc = (struct hfs_bnode_desc *)(kmap(node->page[0]) + node->page_offset); + desc = (struct hfs_bnode_desc *)(kmap_local_page(node->page[0]) + + node->page_offset); node->prev = be32_to_cpu(desc->prev); node->next = be32_to_cpu(desc->next); node->num_recs = be16_to_cpu(desc->num_recs); node->type = desc->type; node->height = desc->height; - kunmap(node->page[0]); + kunmap_local(desc); switch (node->type) { case HFS_NODE_HEADER: @@ -436,14 +430,12 @@ struct hfs_bnode *hfs_bnode_create(struct hfs_btree *tree, u32 num) } pagep = node->page; - memset(kmap(*pagep) + node->page_offset, 0, - min((int)PAGE_SIZE, (int)tree->node_size)); + memzero_page(*pagep, node->page_offset, + min((int)PAGE_SIZE, (int)tree->node_size)); set_page_dirty(*pagep); - kunmap(*pagep); for (i = 1; i < tree->pages_per_bnode; i++) { - memset(kmap(*++pagep), 0, PAGE_SIZE); + memzero_page(*++pagep, 0, PAGE_SIZE); set_page_dirty(*pagep); - kunmap(*pagep); } clear_bit(HFS_BNODE_NEW, &node->flags); wake_up(&node->lock_wq); -- cgit v1.2.3 From 21490eff121555b123f7088b935e40ee43d2c642 Mon Sep 17 00:00:00 2001 From: "Fabio M. De Francesco" Date: Sun, 21 Aug 2022 20:04:00 +0200 Subject: hfs: replace kmap() with kmap_local_page() in btree.c kmap() is being deprecated in favor of kmap_local_page(). Two main problems with kmap(): (1) It comes with an overhead as mapping space is restricted and protected by a global lock for synchronization and (2) it also requires global TLB invalidation when the kmap's pool wraps and it might block when the mapping space is fully utilized until a slot becomes available. With kmap_local_page() the mappings are per thread, CPU local, can take page faults, and can be called from any context (including interrupts). It is faster than kmap() in kernels with HIGHMEM enabled. Furthermore, the tasks can be preempted and, when they are scheduled to run again, the kernel virtual addresses are restored and still valid. Since its use in btree.c is safe everywhere, it should be preferred. Therefore, replace kmap() with kmap_local_page() in btree.c. Where possible, use the suited standard helpers (memzero_page(), memcpy_page()) instead of open coding kmap_local_page() plus memset() or memcpy(). Tested in a QEMU/KVM x86_32 VM, 6GB RAM, booting a kernel with HIGHMEM64GB enabled. Link: https://lkml.kernel.org/r/20220821180400.8198-4-fmdefrancesco@gmail.com Signed-off-by: Fabio M. De Francesco Suggested-by: Ira Weiny Reviewed-by: Viacheslav Dubeyko Cc: Arnd Bergmann Cc: Chaitanya Kulkarni Cc: Christian Brauner (Microsoft) Cc: Damien Le Moal Cc: Jeff Layton Cc: Jens Axboe Cc: Kees Cook Cc: Martin K. Petersen Cc: Matthew Wilcox Cc: Muchun Song Cc: Roman Gushchin Cc: Theodore Ts'o Signed-off-by: Andrew Morton --- fs/hfs/btree.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c index 56c6782436e9..2fa4b1f8cc7f 100644 --- a/fs/hfs/btree.c +++ b/fs/hfs/btree.c @@ -80,7 +80,8 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke goto free_inode; /* Load the header */ - head = (struct hfs_btree_header_rec *)(kmap(page) + sizeof(struct hfs_bnode_desc)); + head = (struct hfs_btree_header_rec *)(kmap_local_page(page) + + sizeof(struct hfs_bnode_desc)); tree->root = be32_to_cpu(head->root); tree->leaf_count = be32_to_cpu(head->leaf_count); tree->leaf_head = be32_to_cpu(head->leaf_head); @@ -119,12 +120,12 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke tree->node_size_shift = ffs(size) - 1; tree->pages_per_bnode = (tree->node_size + PAGE_SIZE - 1) >> PAGE_SHIFT; - kunmap(page); + kunmap_local(head); put_page(page); return tree; fail_page: - kunmap(page); + kunmap_local(head); put_page(page); free_inode: tree->inode->i_mapping->a_ops = &hfs_aops; @@ -170,7 +171,8 @@ void hfs_btree_write(struct hfs_btree *tree) return; /* Load the header */ page = node->page[0]; - head = (struct hfs_btree_header_rec *)(kmap(page) + sizeof(struct hfs_bnode_desc)); + head = (struct hfs_btree_header_rec *)(kmap_local_page(page) + + sizeof(struct hfs_bnode_desc)); head->root = cpu_to_be32(tree->root); head->leaf_count = cpu_to_be32(tree->leaf_count); @@ -181,7 +183,7 @@ void hfs_btree_write(struct hfs_btree *tree) head->attributes = cpu_to_be32(tree->attributes); head->depth = cpu_to_be16(tree->depth); - kunmap(page); + kunmap_local(head); set_page_dirty(page); hfs_bnode_put(node); } @@ -269,7 +271,7 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree) off += node->page_offset; pagep = node->page + (off >> PAGE_SHIFT); - data = kmap(*pagep); + data = kmap_local_page(*pagep); off &= ~PAGE_MASK; idx = 0; @@ -282,7 +284,7 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree) idx += i; data[off] |= m; set_page_dirty(*pagep); - kunmap(*pagep); + kunmap_local(data); tree->free_nodes--; mark_inode_dirty(tree->inode); hfs_bnode_put(node); @@ -291,14 +293,14 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree) } } if (++off >= PAGE_SIZE) { - kunmap(*pagep); - data = kmap(*++pagep); + kunmap_local(data); + data = kmap_local_page(*++pagep); off = 0; } idx += 8; len--; } - kunmap(*pagep); + kunmap_local(data); nidx = node->next; if (!nidx) { printk(KERN_DEBUG "create new bmap node...\n"); @@ -314,7 +316,7 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree) off = off16; off += node->page_offset; pagep = node->page + (off >> PAGE_SHIFT); - data = kmap(*pagep); + data = kmap_local_page(*pagep); off &= ~PAGE_MASK; } } @@ -361,20 +363,20 @@ void hfs_bmap_free(struct hfs_bnode *node) } off += node->page_offset + nidx / 8; page = node->page[off >> PAGE_SHIFT]; - data = kmap(page); + data = kmap_local_page(page); off &= ~PAGE_MASK; m = 1 << (~nidx & 7); byte = data[off]; if (!(byte & m)) { pr_crit("trying to free free bnode %u(%d)\n", node->this, node->type); - kunmap(page); + kunmap_local(data); hfs_bnode_put(node); return; } data[off] = byte & ~m; set_page_dirty(page); - kunmap(page); + kunmap_local(data); hfs_bnode_put(node); tree->free_nodes++; mark_inode_dirty(tree->inode); -- cgit v1.2.3 From c97e21fe91ed1d59eb36cac1728bcb9a82167c7a Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 18 Aug 2022 23:01:13 +0200 Subject: ocfs2: move from strlcpy with unused retval to strscpy Follow the advice of the below link and prefer 'strscpy' in this subsystem. Conversion is 1:1 because the return value is not used. Generated by a coccinelle script. Link: https://lore.kernel.org/r/CAHk-=wgfRnXz0W3D37d01q3JFkr_i_uTL=V6A6G1oUZcprmknw@mail.gmail.com/ Link: https://lkml.kernel.org/r/20220818210123.7637-4-wsa+renesas@sang-engineering.com Signed-off-by: Wolfram Sang Acked-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Signed-off-by: Andrew Morton --- fs/ocfs2/stackglue.c | 4 ++-- fs/ocfs2/super.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index dd77b7aaabf5..317126261523 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -334,10 +334,10 @@ int ocfs2_cluster_connect(const char *stack_name, goto out; } - strlcpy(new_conn->cc_name, group, GROUP_NAME_MAX + 1); + strscpy(new_conn->cc_name, group, GROUP_NAME_MAX + 1); new_conn->cc_namelen = grouplen; if (cluster_name_len) - strlcpy(new_conn->cc_cluster_name, cluster_name, + strscpy(new_conn->cc_cluster_name, cluster_name, CLUSTER_NAME_MAX + 1); new_conn->cc_cluster_name_len = cluster_name_len; new_conn->cc_recovery_handler = recovery_handler; diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index e2cc9eec287c..660bc1795848 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -2221,7 +2221,7 @@ static int ocfs2_initialize_super(struct super_block *sb, goto out_journal; } - strlcpy(osb->vol_label, di->id2.i_super.s_label, + strscpy(osb->vol_label, di->id2.i_super.s_label, OCFS2_MAX_VOL_LABEL_LEN); osb->root_blkno = le64_to_cpu(di->id2.i_super.s_root_blkno); osb->system_dir_blkno = le64_to_cpu(di->id2.i_super.s_system_dir_blkno); -- cgit v1.2.3 From 512cb7e4c110133e49da9f69885df3ed41aa284f Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 18 Aug 2022 23:01:53 +0200 Subject: reiserfs: move from strlcpy with unused retval to strscpy Follow the advice of the below link and prefer 'strscpy' in this subsystem. Conversion is 1:1 because the return value is not used. Generated by a coccinelle script. Link: https://lore.kernel.org/r/CAHk-=wgfRnXz0W3D37d01q3JFkr_i_uTL=V6A6G1oUZcprmknw@mail.gmail.com/ Link: https://lkml.kernel.org/r/20220818210153.8095-1-wsa+renesas@sang-engineering.com Signed-off-by: Wolfram Sang Cc: Jan Kara Signed-off-by: Andrew Morton --- fs/reiserfs/procfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c index 4a7cb16e9345..3dba8acf4e83 100644 --- a/fs/reiserfs/procfs.c +++ b/fs/reiserfs/procfs.c @@ -411,7 +411,7 @@ int reiserfs_proc_info_init(struct super_block *sb) char *s; /* Some block devices use /'s */ - strlcpy(b, sb->s_id, BDEVNAME_SIZE); + strscpy(b, sb->s_id, BDEVNAME_SIZE); s = strchr(b, '/'); if (s) *s = '!'; @@ -441,7 +441,7 @@ int reiserfs_proc_info_done(struct super_block *sb) char *s; /* Some block devices use /'s */ - strlcpy(b, sb->s_id, BDEVNAME_SIZE); + strscpy(b, sb->s_id, BDEVNAME_SIZE); s = strchr(b, '/'); if (s) *s = '!'; -- cgit v1.2.3 From d85a1bec8e8d552ab13163ca1874dcd82f3d1550 Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Thu, 1 Sep 2022 00:09:34 +0800 Subject: ntfs: fix use-after-free in ntfs_attr_find() Patch series "ntfs: fix bugs about Attribute", v2. This patchset fixes three bugs relative to Attribute in record: Patch 1 adds a sanity check to ensure that, attrs_offset field in first mft record loading from disk is within bounds. Patch 2 moves the ATTR_RECORD's bounds checking earlier, to avoid dereferencing ATTR_RECORD before checking this ATTR_RECORD is within bounds. Patch 3 adds an overflow checking to avoid possible forever loop in ntfs_attr_find(). Without patch 1 and patch 2, the kernel triggersa KASAN use-after-free detection as reported by Syzkaller. Although one of patch 1 or patch 2 can fix this, we still need both of them. Because patch 1 fixes the root cause, and patch 2 not only fixes the direct cause, but also fixes the potential out-of-bounds bug. This patch (of 3): Syzkaller reported use-after-free read as follows: ================================================================== BUG: KASAN: use-after-free in ntfs_attr_find+0xc02/0xce0 fs/ntfs/attrib.c:597 Read of size 2 at addr ffff88807e352009 by task syz-executor153/3607 [...] Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:317 [inline] print_report.cold+0x2ba/0x719 mm/kasan/report.c:433 kasan_report+0xb1/0x1e0 mm/kasan/report.c:495 ntfs_attr_find+0xc02/0xce0 fs/ntfs/attrib.c:597 ntfs_attr_lookup+0x1056/0x2070 fs/ntfs/attrib.c:1193 ntfs_read_inode_mount+0x89a/0x2580 fs/ntfs/inode.c:1845 ntfs_fill_super+0x1799/0x9320 fs/ntfs/super.c:2854 mount_bdev+0x34d/0x410 fs/super.c:1400 legacy_get_tree+0x105/0x220 fs/fs_context.c:610 vfs_get_tree+0x89/0x2f0 fs/super.c:1530 do_new_mount fs/namespace.c:3040 [inline] path_mount+0x1326/0x1e20 fs/namespace.c:3370 do_mount fs/namespace.c:3383 [inline] __do_sys_mount fs/namespace.c:3591 [inline] __se_sys_mount fs/namespace.c:3568 [inline] __x64_sys_mount+0x27f/0x300 fs/namespace.c:3568 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd [...] The buggy address belongs to the physical page: page:ffffea0001f8d400 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x7e350 head:ffffea0001f8d400 order:3 compound_mapcount:0 compound_pincount:0 flags: 0xfff00000010200(slab|head|node=0|zone=1|lastcpupid=0x7ff) raw: 00fff00000010200 0000000000000000 dead000000000122 ffff888011842140 raw: 0000000000000000 0000000000040004 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88807e351f00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88807e351f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff88807e352000: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88807e352080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88807e352100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Kernel will loads $MFT/$DATA's first mft record in ntfs_read_inode_mount(). Yet the problem is that after loading, kernel doesn't check whether attrs_offset field is a valid value. To be more specific, if attrs_offset field is larger than bytes_allocated field, then it may trigger the out-of-bounds read bug(reported as use-after-free bug) in ntfs_attr_find(), when kernel tries to access the corresponding mft record's attribute. This patch solves it by adding the sanity check between attrs_offset field and bytes_allocated field, after loading the first mft record. Link: https://lkml.kernel.org/r/20220831160935.3409-1-yin31149@gmail.com Link: https://lkml.kernel.org/r/20220831160935.3409-2-yin31149@gmail.com Signed-off-by: Hawkins Jiawei Cc: Anton Altaparmakov Cc: ChenXiaoSong Cc: syzkaller-bugs Cc: Dan Carpenter Signed-off-by: Andrew Morton --- fs/ntfs/inode.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs') diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index db0f1995aedd..08c659332e26 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -1829,6 +1829,13 @@ int ntfs_read_inode_mount(struct inode *vi) goto err_out; } + /* Sanity check offset to the first attribute */ + if (le16_to_cpu(m->attrs_offset) >= le32_to_cpu(m->bytes_allocated)) { + ntfs_error(sb, "Incorrect mft offset to the first attribute %u in superblock.", + le16_to_cpu(m->attrs_offset)); + goto err_out; + } + /* Need this to sanity check attribute list references to $MFT. */ vi->i_generation = ni->seq_no = le16_to_cpu(m->sequence_number); -- cgit v1.2.3 From 36a4d82dddbbd421d2b8e79e1cab68c8126d5075 Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Thu, 1 Sep 2022 00:09:36 +0800 Subject: ntfs: fix out-of-bounds read in ntfs_attr_find() Kernel iterates over ATTR_RECORDs in mft record in ntfs_attr_find(). To ensure access on these ATTR_RECORDs are within bounds, kernel will do some checking during iteration. The problem is that during checking whether ATTR_RECORD's name is within bounds, kernel will dereferences the ATTR_RECORD name_offset field, before checking this ATTR_RECORD strcture is within bounds. This problem may result out-of-bounds read in ntfs_attr_find(), reported by Syzkaller: ================================================================== BUG: KASAN: use-after-free in ntfs_attr_find+0xc02/0xce0 fs/ntfs/attrib.c:597 Read of size 2 at addr ffff88807e352009 by task syz-executor153/3607 [...] Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:317 [inline] print_report.cold+0x2ba/0x719 mm/kasan/report.c:433 kasan_report+0xb1/0x1e0 mm/kasan/report.c:495 ntfs_attr_find+0xc02/0xce0 fs/ntfs/attrib.c:597 ntfs_attr_lookup+0x1056/0x2070 fs/ntfs/attrib.c:1193 ntfs_read_inode_mount+0x89a/0x2580 fs/ntfs/inode.c:1845 ntfs_fill_super+0x1799/0x9320 fs/ntfs/super.c:2854 mount_bdev+0x34d/0x410 fs/super.c:1400 legacy_get_tree+0x105/0x220 fs/fs_context.c:610 vfs_get_tree+0x89/0x2f0 fs/super.c:1530 do_new_mount fs/namespace.c:3040 [inline] path_mount+0x1326/0x1e20 fs/namespace.c:3370 do_mount fs/namespace.c:3383 [inline] __do_sys_mount fs/namespace.c:3591 [inline] __se_sys_mount fs/namespace.c:3568 [inline] __x64_sys_mount+0x27f/0x300 fs/namespace.c:3568 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd [...] The buggy address belongs to the physical page: page:ffffea0001f8d400 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x7e350 head:ffffea0001f8d400 order:3 compound_mapcount:0 compound_pincount:0 flags: 0xfff00000010200(slab|head|node=0|zone=1|lastcpupid=0x7ff) raw: 00fff00000010200 0000000000000000 dead000000000122 ffff888011842140 raw: 0000000000000000 0000000000040004 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88807e351f00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88807e351f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff88807e352000: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88807e352080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88807e352100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== This patch solves it by moving the ATTR_RECORD strcture's bounds checking earlier, then checking whether ATTR_RECORD's name is within bounds. What's more, this patch also add some comments to improve its maintainability. Link: https://lkml.kernel.org/r/20220831160935.3409-3-yin31149@gmail.com Link: https://lore.kernel.org/all/1636796c-c85e-7f47-e96f-e074fee3c7d3@huawei.com/ Link: https://groups.google.com/g/syzkaller-bugs/c/t_XdeKPGTR4/m/LECAuIGcBgAJ Signed-off-by: chenxiaosong (A) Signed-off-by: Dan Carpenter Signed-off-by: Hawkins Jiawei Reported-by: syzbot+5f8dcabe4a3b2c51c607@syzkaller.appspotmail.com Tested-by: syzbot+5f8dcabe4a3b2c51c607@syzkaller.appspotmail.com Cc: Anton Altaparmakov Cc: syzkaller-bugs Signed-off-by: Andrew Morton --- fs/ntfs/attrib.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c index 52615e6090e1..cec4be2a2d23 100644 --- a/fs/ntfs/attrib.c +++ b/fs/ntfs/attrib.c @@ -594,11 +594,23 @@ static int ntfs_attr_find(const ATTR_TYPE type, const ntfschar *name, for (;; a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length))) { u8 *mrec_end = (u8 *)ctx->mrec + le32_to_cpu(ctx->mrec->bytes_allocated); - u8 *name_end = (u8 *)a + le16_to_cpu(a->name_offset) + - a->name_length * sizeof(ntfschar); - if ((u8*)a < (u8*)ctx->mrec || (u8*)a > mrec_end || - name_end > mrec_end) + u8 *name_end; + + /* check whether ATTR_RECORD wrap */ + if ((u8 *)a < (u8 *)ctx->mrec) + break; + + /* check whether Attribute Record Header is within bounds */ + if ((u8 *)a > mrec_end || + (u8 *)a + sizeof(ATTR_RECORD) > mrec_end) break; + + /* check whether ATTR_RECORD's name is within bounds */ + name_end = (u8 *)a + le16_to_cpu(a->name_offset) + + a->name_length * sizeof(ntfschar); + if (name_end > mrec_end) + break; + ctx->attr = a; if (unlikely(le32_to_cpu(a->type) > le32_to_cpu(type) || a->type == AT_END)) -- cgit v1.2.3 From 63095f4f3af59322bea984a6ae44337439348fe0 Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Thu, 1 Sep 2022 00:09:38 +0800 Subject: ntfs: check overflow when iterating ATTR_RECORDs Kernel iterates over ATTR_RECORDs in mft record in ntfs_attr_find(). Because the ATTR_RECORDs are next to each other, kernel can get the next ATTR_RECORD from end address of current ATTR_RECORD, through current ATTR_RECORD length field. The problem is that during iteration, when kernel calculates the end address of current ATTR_RECORD, kernel may trigger an integer overflow bug in executing `a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length))`. This may wrap, leading to a forever iteration on 32bit systems. This patch solves it by adding some checks on calculating end address of current ATTR_RECORD during iteration. Link: https://lkml.kernel.org/r/20220831160935.3409-4-yin31149@gmail.com Link: https://lore.kernel.org/all/20220827105842.GM2030@kadam/ Signed-off-by: Hawkins Jiawei Suggested-by: Dan Carpenter Cc: Anton Altaparmakov Cc: chenxiaosong (A) Cc: syzkaller-bugs Signed-off-by: Andrew Morton --- fs/ntfs/attrib.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'fs') diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c index cec4be2a2d23..a3865bc4a0c6 100644 --- a/fs/ntfs/attrib.c +++ b/fs/ntfs/attrib.c @@ -617,6 +617,14 @@ static int ntfs_attr_find(const ATTR_TYPE type, const ntfschar *name, return -ENOENT; if (unlikely(!a->length)) break; + + /* check whether ATTR_RECORD's length wrap */ + if ((u8 *)a + le32_to_cpu(a->length) < (u8 *)a) + break; + /* check whether ATTR_RECORD's length is within bounds */ + if ((u8 *)a + le32_to_cpu(a->length) > mrec_end) + break; + if (a->type != type) continue; /* -- cgit v1.2.3 From 6e4a53ee7989c8a2b9fc3b14cd90f6e2d613ca76 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sat, 3 Sep 2022 00:59:36 +0100 Subject: ocfs2: replace zero-length arrays with DECLARE_FLEX_ARRAY() helper Zero-length arrays are deprecated and we are moving towards adopting C99 flexible-array members, instead. So, replace zero-length array declarations in a couple of structures and unions with the new DECLARE_FLEX_ARRAY() helper macro. This helper allows for a flexible-array member in a union and as only member in a structure. Also, this addresses multiple warnings reported when building with Clang-15 and -Wzero-length-array. Lastly, this will also help memcpy (in a coming hardening update) execute proper bounds-checking on variable length object i_symlink at fs/ocfs2/namei.c:1973: fs/ocfs2/namei.c: 1973 memcpy((char *) fe->id2.i_symlink, symname, l); Link: https://github.com/KSPP/linux/issues/21 Link: https://github.com/KSPP/linux/issues/193 Link: https://github.com/KSPP/linux/issues/197 Link: https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html Link: https://lkml.kernel.org/r/YxKY6O2hmdwNh8r8@work Signed-off-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Signed-off-by: Andrew Morton --- fs/ocfs2/ocfs2_fs.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 638d875eccc7..7aebdbf5cc0a 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -527,7 +527,7 @@ struct ocfs2_extent_block * value -1 (0xFFFF) is OCFS2_INVALID_SLOT. This marks a slot empty. */ struct ocfs2_slot_map { -/*00*/ __le16 sm_slots[0]; +/*00*/ DECLARE_FLEX_ARRAY(__le16, sm_slots); /* * Actual on-disk size is one block. OCFS2_MAX_SLOTS is 255, * 255 * sizeof(__le16) == 512B, within the 512B block minimum blocksize. @@ -548,7 +548,7 @@ struct ocfs2_extended_slot { * i_size. */ struct ocfs2_slot_map_extended { -/*00*/ struct ocfs2_extended_slot se_slots[0]; +/*00*/ DECLARE_FLEX_ARRAY(struct ocfs2_extended_slot, se_slots); /* * Actual size is i_size of the slot_map system file. It should * match s_max_slots * sizeof(struct ocfs2_extended_slot) @@ -727,7 +727,7 @@ struct ocfs2_dinode { struct ocfs2_extent_list i_list; struct ocfs2_truncate_log i_dealloc; struct ocfs2_inline_data i_data; - __u8 i_symlink[0]; + DECLARE_FLEX_ARRAY(__u8, i_symlink); } id2; /* Actual on-disk size is one block */ }; @@ -892,7 +892,7 @@ struct ocfs2_group_desc /*30*/ struct ocfs2_block_check bg_check; /* Error checking */ __le64 bg_reserved2; /*40*/ union { - __u8 bg_bitmap[0]; + DECLARE_FLEX_ARRAY(__u8, bg_bitmap); struct { /* * Block groups may be discontiguous when -- cgit v1.2.3 From 1c320cfa17701cbf98085b34ad6159e9f41e5268 Mon Sep 17 00:00:00 2001 From: Jiangshan Yi Date: Mon, 5 Sep 2022 14:16:56 +0800 Subject: fs/ocfs2/suballoc.h: fix spelling typo in comment Fix spelling typo in comment. Link: https://lkml.kernel.org/r/20220905061656.1829179-1-13667453960@163.com Signed-off-by: Jiangshan Yi Reported-by: k2ci Acked-by: Joseph Qi Signed-off-by: Andrew Morton --- fs/ocfs2/suballoc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index 5805a03d100b..9c74eace3adc 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h @@ -106,7 +106,7 @@ int ocfs2_claim_clusters(handle_t *handle, u32 *cluster_start, u32 *num_clusters); /* - * Use this variant of ocfs2_claim_clusters to specify a maxiumum + * Use this variant of ocfs2_claim_clusters to specify a maximum * number of clusters smaller than the allocation reserved. */ int __ocfs2_claim_clusters(handle_t *handle, -- cgit v1.2.3 From 5ca14835dc429c09fefb290f60343fe266382760 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 9 Sep 2022 13:57:41 -0700 Subject: fs: uninline inode_maybe_inc_iversion() It has many callsites and is large. text data bss dec hex filename 91796 15984 512 108292 1a704 mm/shmem.o-before 91180 15984 512 107676 1a49c mm/shmem.o-after Acked-by: Jeff Layton Cc: Chuck Lever Cc: Alexander Viro Cc: Hugh Dickins Signed-off-by: Andrew Morton --- fs/libfs.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/iversion.h | 46 +--------------------------------------------- 2 files changed, 47 insertions(+), 45 deletions(-) (limited to 'fs') diff --git a/fs/libfs.c b/fs/libfs.c index 31b0ddf01c31..682d56345a1c 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include /* sync_mapping_buffers */ #include @@ -1520,3 +1521,48 @@ void generic_set_encrypted_ci_d_ops(struct dentry *dentry) #endif } EXPORT_SYMBOL(generic_set_encrypted_ci_d_ops); + +/** + * inode_maybe_inc_iversion - increments i_version + * @inode: inode with the i_version that should be updated + * @force: increment the counter even if it's not necessary? + * + * Every time the inode is modified, the i_version field must be seen to have + * changed by any observer. + * + * If "force" is set or the QUERIED flag is set, then ensure that we increment + * the value, and clear the queried flag. + * + * In the common case where neither is set, then we can return "false" without + * updating i_version. + * + * If this function returns false, and no other metadata has changed, then we + * can avoid logging the metadata. + */ +bool inode_maybe_inc_iversion(struct inode *inode, bool force) +{ + u64 cur, new; + + /* + * The i_version field is not strictly ordered with any other inode + * information, but the legacy inode_inc_iversion code used a spinlock + * to serialize increments. + * + * Here, we add full memory barriers to ensure that any de-facto + * ordering with other info is preserved. + * + * This barrier pairs with the barrier in inode_query_iversion() + */ + smp_mb(); + cur = inode_peek_iversion_raw(inode); + do { + /* If flag is clear then we needn't do anything */ + if (!force && !(cur & I_VERSION_QUERIED)) + return false; + + /* Since lowest bit is flag, add 2 to avoid it */ + new = (cur & ~I_VERSION_QUERIED) + I_VERSION_INCREMENT; + } while (!atomic64_try_cmpxchg(&inode->i_version, &cur, new)); + return true; +} +EXPORT_SYMBOL(inode_maybe_inc_iversion); diff --git a/include/linux/iversion.h b/include/linux/iversion.h index eb5a15810169..e27bd4f55d84 100644 --- a/include/linux/iversion.h +++ b/include/linux/iversion.h @@ -172,51 +172,7 @@ inode_set_iversion_queried(struct inode *inode, u64 val) I_VERSION_QUERIED); } -/** - * inode_maybe_inc_iversion - increments i_version - * @inode: inode with the i_version that should be updated - * @force: increment the counter even if it's not necessary? - * - * Every time the inode is modified, the i_version field must be seen to have - * changed by any observer. - * - * If "force" is set or the QUERIED flag is set, then ensure that we increment - * the value, and clear the queried flag. - * - * In the common case where neither is set, then we can return "false" without - * updating i_version. - * - * If this function returns false, and no other metadata has changed, then we - * can avoid logging the metadata. - */ -static inline bool -inode_maybe_inc_iversion(struct inode *inode, bool force) -{ - u64 cur, new; - - /* - * The i_version field is not strictly ordered with any other inode - * information, but the legacy inode_inc_iversion code used a spinlock - * to serialize increments. - * - * Here, we add full memory barriers to ensure that any de-facto - * ordering with other info is preserved. - * - * This barrier pairs with the barrier in inode_query_iversion() - */ - smp_mb(); - cur = inode_peek_iversion_raw(inode); - do { - /* If flag is clear then we needn't do anything */ - if (!force && !(cur & I_VERSION_QUERIED)) - return false; - - /* Since lowest bit is flag, add 2 to avoid it */ - new = (cur & ~I_VERSION_QUERIED) + I_VERSION_INCREMENT; - } while (!atomic64_try_cmpxchg(&inode->i_version, &cur, new)); - return true; -} - +bool inode_maybe_inc_iversion(struct inode *inode, bool force); /** * inode_inc_iversion - forcibly increment i_version -- cgit v1.2.3 From 7ec354baa2ad6dcf1b481a5a582293cec0eb2a67 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Fri, 9 Sep 2022 14:25:29 +0200 Subject: proc: make config PROC_CHILDREN depend on PROC_FS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 2e13ba54a268 ("fs, proc: introduce CONFIG_PROC_CHILDREN") introduces the config PROC_CHILDREN to configure kernels to provide the /proc//task//children file. When one deselects PROC_FS for kernel builds without /proc/, the config PROC_CHILDREN has no effect anymore, but is still visible in menuconfig. Add the dependency on PROC_FS to make the PROC_CHILDREN option disappear for kernel builds without /proc/. Link: https://lkml.kernel.org/r/20220909122529.1941-1-lukas.bulwahn@gmail.com Fixes: 2e13ba54a268 ("fs, proc: introduce CONFIG_PROC_CHILDREN") Signed-off-by: Lukas Bulwahn Cc: Iago López Galeiras Cc: Alexey Dobriyan Signed-off-by: Andrew Morton --- fs/proc/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig index c930001056f9..32b1116ae137 100644 --- a/fs/proc/Kconfig +++ b/fs/proc/Kconfig @@ -92,6 +92,7 @@ config PROC_PAGE_MONITOR config PROC_CHILDREN bool "Include /proc//task//children file" + depends on PROC_FS default n help Provides a fast way to retrieve first level children pids of a task. See -- cgit v1.2.3 From e77999c1d4d26df7a3fe83627d05898052437269 Mon Sep 17 00:00:00 2001 From: wangjianli Date: Thu, 8 Sep 2022 21:00:36 +0800 Subject: fs/ocfs2: fix repeated words in comments Delete the redundant word 'to'. Link: https://lkml.kernel.org/r/20220908130036.31149-1-wangjianli@cdjrlc.com Signed-off-by: wangjianli Acked-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Signed-off-by: Andrew Morton --- fs/ocfs2/refcounttree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 1358981e80a3..623db358b1ef 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -2614,7 +2614,7 @@ static inline unsigned int ocfs2_cow_align_length(struct super_block *sb, } /* - * Calculate out the start and number of virtual clusters we need to to CoW. + * Calculate out the start and number of virtual clusters we need to CoW. * * cpos is vitual start cluster position we want to do CoW in a * file and write_len is the cluster length. -- cgit v1.2.3 From 0badb2e46a7699d09ef09a2c7f8f4fe66c15e606 Mon Sep 17 00:00:00 2001 From: Minghao Chi Date: Wed, 21 Sep 2022 12:48:02 +0900 Subject: nilfs2: delete unnecessary checks before brelse() Patch series "nilfs2 minor amendments". This patch (of 2): The brelse() inline function tests whether its argument is NULL and then returns immediately. Thus remove the tests which are not needed around the shown calls. Link: https://lkml.kernel.org/r/20220921034803.2476-1-konishi.ryusuke@gmail.com Link: https://lkml.kernel.org/r/20220819081700.96279-1-chi.minghao@zte.com.cn Link: https://lkml.kernel.org/r/20220921034803.2476-2-konishi.ryusuke@gmail.com Reported-by: Zeal Robot Signed-off-by: Minghao Chi Signed-off-by: Ryusuke Konishi Cc: ye xingchen Signed-off-by: Andrew Morton --- fs/nilfs2/btree.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 9f4d9432d38a..b9d15c3df3cc 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -1668,8 +1668,7 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key) maxkey = nilfs_btree_node_get_key(node, nchildren - 1); nextmaxkey = (nchildren > 1) ? nilfs_btree_node_get_key(node, nchildren - 2) : 0; - if (bh != NULL) - brelse(bh); + brelse(bh); return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW); } @@ -1717,8 +1716,7 @@ static int nilfs_btree_gather_data(struct nilfs_bmap *btree, ptrs[i] = le64_to_cpu(dptrs[i]); } - if (bh != NULL) - brelse(bh); + brelse(bh); return nitems; } -- cgit v1.2.3 From da6f79164e98de4ab3f2fdeea4875207fe282014 Mon Sep 17 00:00:00 2001 From: ye xingchen Date: Wed, 21 Sep 2022 12:48:03 +0900 Subject: nilfs2: remove the unneeded result variable Return the value nilfs_segctor_sync() directly instead of storing it in another redundant variable. Link: https://lkml.kernel.org/r/20220831033403.302184-1-ye.xingchen@zte.com.cn Link: https://lkml.kernel.org/r/20220921034803.2476-3-konishi.ryusuke@gmail.com Reported-by: Zeal Robot Signed-off-by: ye xingchen Signed-off-by: Ryusuke Konishi Cc: Minghao Chi Signed-off-by: Andrew Morton --- fs/nilfs2/segment.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 0afe0832c754..9abae2c9120e 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2235,7 +2235,6 @@ int nilfs_construct_segment(struct super_block *sb) struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_sc_info *sci = nilfs->ns_writer; struct nilfs_transaction_info *ti; - int err; if (!sci) return -EROFS; @@ -2243,8 +2242,7 @@ int nilfs_construct_segment(struct super_block *sb) /* A call inside transactions causes a deadlock. */ BUG_ON((ti = current->journal_info) && ti->ti_magic == NILFS_TI_MAGIC); - err = nilfs_segctor_sync(sci); - return err; + return nilfs_segctor_sync(sci); } /** -- cgit v1.2.3 From ef1d61781bc6708ccc4a21262cc80a7dad952e04 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 20 Sep 2022 20:35:23 +0300 Subject: proc: mark more files as permanent Mark /proc/devices /proc/kpagecount /proc/kpageflags /proc/kpagecgroup /proc/loadavg /proc/meminfo /proc/softirqs /proc/uptime /proc/version as permanent /proc entries, saving alloc/free and some list/spinlock ops per use. These files are never removed by the kernel so it is OK to mark them. Link: https://lkml.kernel.org/r/Yyn527DzDMa+r0Yj@localhost.localdomain Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton --- fs/proc/devices.c | 6 +++++- fs/proc/internal.h | 5 +++++ fs/proc/loadavg.c | 6 +++++- fs/proc/meminfo.c | 5 ++++- fs/proc/page.c | 3 +++ fs/proc/softirqs.c | 6 +++++- fs/proc/uptime.c | 6 +++++- fs/proc/version.c | 6 +++++- 8 files changed, 37 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/proc/devices.c b/fs/proc/devices.c index 837971e74109..fe7bfcb7d049 100644 --- a/fs/proc/devices.c +++ b/fs/proc/devices.c @@ -4,6 +4,7 @@ #include #include #include +#include "internal.h" static int devinfo_show(struct seq_file *f, void *v) { @@ -54,7 +55,10 @@ static const struct seq_operations devinfo_ops = { static int __init proc_devices_init(void) { - proc_create_seq("devices", 0, NULL, &devinfo_ops); + struct proc_dir_entry *pde; + + pde = proc_create_seq("devices", 0, NULL, &devinfo_ops); + pde_make_permanent(pde); return 0; } fs_initcall(proc_devices_init); diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 06a80f78433d..af277184b807 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -79,6 +79,11 @@ static inline bool pde_is_permanent(const struct proc_dir_entry *pde) return pde->flags & PROC_ENTRY_PERMANENT; } +static inline void pde_make_permanent(struct proc_dir_entry *pde) +{ + pde->flags |= PROC_ENTRY_PERMANENT; +} + extern struct kmem_cache *proc_dir_entry_cache; void pde_free(struct proc_dir_entry *pde); diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c index f32878d9a39f..817981e57223 100644 --- a/fs/proc/loadavg.c +++ b/fs/proc/loadavg.c @@ -9,6 +9,7 @@ #include #include #include +#include "internal.h" static int loadavg_proc_show(struct seq_file *m, void *v) { @@ -27,7 +28,10 @@ static int loadavg_proc_show(struct seq_file *m, void *v) static int __init proc_loadavg_init(void) { - proc_create_single("loadavg", 0, NULL, loadavg_proc_show); + struct proc_dir_entry *pde; + + pde = proc_create_single("loadavg", 0, NULL, loadavg_proc_show); + pde_make_permanent(pde); return 0; } fs_initcall(proc_loadavg_init); diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 6e89f0e2fd20..70e5294052d5 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -162,7 +162,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v) static int __init proc_meminfo_init(void) { - proc_create_single("meminfo", 0, NULL, meminfo_proc_show); + struct proc_dir_entry *pde; + + pde = proc_create_single("meminfo", 0, NULL, meminfo_proc_show); + pde_make_permanent(pde); return 0; } fs_initcall(proc_meminfo_init); diff --git a/fs/proc/page.c b/fs/proc/page.c index a2873a617ae8..f2273b164535 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -91,6 +91,7 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf, } static const struct proc_ops kpagecount_proc_ops = { + .proc_flags = PROC_ENTRY_PERMANENT, .proc_lseek = mem_lseek, .proc_read = kpagecount_read, }; @@ -268,6 +269,7 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf, } static const struct proc_ops kpageflags_proc_ops = { + .proc_flags = PROC_ENTRY_PERMANENT, .proc_lseek = mem_lseek, .proc_read = kpageflags_read, }; @@ -322,6 +324,7 @@ static ssize_t kpagecgroup_read(struct file *file, char __user *buf, } static const struct proc_ops kpagecgroup_proc_ops = { + .proc_flags = PROC_ENTRY_PERMANENT, .proc_lseek = mem_lseek, .proc_read = kpagecgroup_read, }; diff --git a/fs/proc/softirqs.c b/fs/proc/softirqs.c index 12901dcf57e2..f4616083faef 100644 --- a/fs/proc/softirqs.c +++ b/fs/proc/softirqs.c @@ -3,6 +3,7 @@ #include #include #include +#include "internal.h" /* * /proc/softirqs ... display the number of softirqs @@ -27,7 +28,10 @@ static int show_softirqs(struct seq_file *p, void *v) static int __init proc_softirqs_init(void) { - proc_create_single("softirqs", 0, NULL, show_softirqs); + struct proc_dir_entry *pde; + + pde = proc_create_single("softirqs", 0, NULL, show_softirqs); + pde_make_permanent(pde); return 0; } fs_initcall(proc_softirqs_init); diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c index deb99bc9b7e6..b5343d209381 100644 --- a/fs/proc/uptime.c +++ b/fs/proc/uptime.c @@ -7,6 +7,7 @@ #include #include #include +#include "internal.h" static int uptime_proc_show(struct seq_file *m, void *v) { @@ -39,7 +40,10 @@ static int uptime_proc_show(struct seq_file *m, void *v) static int __init proc_uptime_init(void) { - proc_create_single("uptime", 0, NULL, uptime_proc_show); + struct proc_dir_entry *pde; + + pde = proc_create_single("uptime", 0, NULL, uptime_proc_show); + pde_make_permanent(pde); return 0; } fs_initcall(proc_uptime_init); diff --git a/fs/proc/version.c b/fs/proc/version.c index b449f186577f..02e3c3cd4a9a 100644 --- a/fs/proc/version.c +++ b/fs/proc/version.c @@ -5,6 +5,7 @@ #include #include #include +#include "internal.h" static int version_proc_show(struct seq_file *m, void *v) { @@ -17,7 +18,10 @@ static int version_proc_show(struct seq_file *m, void *v) static int __init proc_version_init(void) { - proc_create_single("version", 0, NULL, version_proc_show); + struct proc_dir_entry *pde; + + pde = proc_create_single("version", 0, NULL, version_proc_show); + pde_make_permanent(pde); return 0; } fs_initcall(proc_version_init); -- cgit v1.2.3 From 723ac751208f6d6540191689cfbf6c77135a7a1b Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Thu, 29 Sep 2022 21:33:30 +0900 Subject: nilfs2: replace WARN_ONs by nilfs_error for checkpoint acquisition failure If creation or finalization of a checkpoint fails due to anomalies in the checkpoint metadata on disk, a kernel warning is generated. This patch replaces the WARN_ONs by nilfs_error, so that a kernel, booted with panic_on_warn, does not panic. A nilfs_error is appropriate here to handle the abnormal filesystem condition. This also replaces the detected error codes with an I/O error so that neither of the internal error codes is returned to callers. Link: https://lkml.kernel.org/r/20220929123330.19658-1-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Reported-by: syzbot+fbb3e0b24e8dae5a16ee@syzkaller.appspotmail.com Signed-off-by: Andrew Morton --- fs/nilfs2/segment.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 9abae2c9120e..6e4a7c4228c7 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -875,9 +875,11 @@ static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci) nilfs_mdt_mark_dirty(nilfs->ns_cpfile); nilfs_cpfile_put_checkpoint( nilfs->ns_cpfile, nilfs->ns_cno, bh_cp); - } else - WARN_ON(err == -EINVAL || err == -ENOENT); - + } else if (err == -EINVAL || err == -ENOENT) { + nilfs_error(sci->sc_super, + "checkpoint creation failed due to metadata corruption."); + err = -EIO; + } return err; } @@ -891,7 +893,11 @@ static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci) err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 0, &raw_cp, &bh_cp); if (unlikely(err)) { - WARN_ON(err == -EINVAL || err == -ENOENT); + if (err == -EINVAL || err == -ENOENT) { + nilfs_error(sci->sc_super, + "checkpoint finalization failed due to metadata corruption."); + err = -EIO; + } goto failed_ibh; } raw_cp->cp_snapshot_list.ssl_next = 0; -- cgit v1.2.3