From 04d168c6d42d1772d35372301a14bb20784c81c5 Mon Sep 17 00:00:00 2001 From: Jakob Koschel Date: Fri, 29 Apr 2022 14:37:59 -0700 Subject: fs/proc/kcore.c: remove check of list iterator against head past the loop body When list_for_each_entry() completes the iteration over the whole list without breaking the loop, the iterator value will be a bogus pointer computed based on the head element. While it is safe to use the pointer to determine if it was computed based on the head element, either with list_entry_is_head() or &pos->member == head, using the iterator variable after the loop should be avoided. In preparation to limit the scope of a list iterator to the list traversal loop, use a dedicated pointer to point to the found element [1]. [akpm@linux-foundation.org: reduce scope of `iter'] Link: https://lore.kernel.org/all/CAHk-=wgRr_D8CB-D9Kg-c=EHreAsk5SqXPwr9Y7k9sA6cWXJ6w@mail.gmail.com/ [1] Link: https://lkml.kernel.org/r/20220331223700.902556-1-jakobkoschel@gmail.com Signed-off-by: Jakob Koschel Cc: Mike Rapoport Cc: David Hildenbrand Cc: Oscar Salvador Cc: "Brian Johannesmeyer" Cc: Cristiano Giuffrida Cc: "Bos, H.J." Cc: Alexey Dobriyan Signed-off-by: Andrew Morton --- fs/proc/kcore.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'fs/proc') diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 982e694aae77..dff921f7ca33 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -479,10 +479,15 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) * the previous entry, search for a matching entry. */ if (!m || start < m->addr || start >= m->addr + m->size) { - list_for_each_entry(m, &kclist_head, list) { - if (start >= m->addr && - start < m->addr + m->size) + struct kcore_list *iter; + + m = NULL; + list_for_each_entry(iter, &kclist_head, list) { + if (start >= iter->addr && + start < iter->addr + iter->size) { + m = iter; break; + } } } @@ -492,12 +497,11 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) page_offline_freeze(); } - if (&m->list == &kclist_head) { + if (!m) { if (clear_user(buffer, tsz)) { ret = -EFAULT; goto out; } - m = NULL; /* skip the list anchor */ goto skip; } -- cgit v1.2.3 From 5d8de293c224896a4da99763fce4f9794308caf4 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 29 Apr 2022 14:37:59 -0700 Subject: vmcore: convert copy_oldmem_page() to take an iov_iter Patch series "Convert vmcore to use an iov_iter", v5. For some reason several people have been sending bad patches to fix compiler warnings in vmcore recently. Here's how it should be done. Compile-tested only on x86. As noted in the first patch, s390 should take this conversion a bit further, but I'm not inclined to do that work myself. This patch (of 3): Instead of passing in a 'buf' and 'userbuf' argument, pass in an iov_iter. s390 needs more work to pass the iov_iter down further, or refactor, but I'd be more comfortable if someone who can test on s390 did that work. It's more convenient to convert the whole of read_from_oldmem() to take an iov_iter at the same time, so rename it to read_from_oldmem_iter() and add a temporary read_from_oldmem() wrapper that creates an iov_iter. Link: https://lkml.kernel.org/r/20220408090636.560886-1-bhe@redhat.com Link: https://lkml.kernel.org/r/20220408090636.560886-2-bhe@redhat.com Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Baoquan He Reviewed-by: Christoph Hellwig Cc: Heiko Carstens Signed-off-by: Andrew Morton --- arch/arm/kernel/crash_dump.c | 27 +++----------------- arch/arm64/kernel/crash_dump.c | 29 +++------------------ arch/ia64/kernel/crash_dump.c | 32 +++--------------------- arch/mips/kernel/crash_dump.c | 27 +++----------------- arch/powerpc/kernel/crash_dump.c | 35 ++++---------------------- arch/riscv/kernel/crash_dump.c | 26 +++---------------- arch/s390/kernel/crash_dump.c | 13 ++++++---- arch/sh/kernel/crash_dump.c | 29 ++++----------------- arch/x86/kernel/crash_dump_32.c | 29 +++------------------ arch/x86/kernel/crash_dump_64.c | 41 ++++++++---------------------- fs/proc/vmcore.c | 54 +++++++++++++++++++++++++--------------- include/linux/crash_dump.h | 9 +++---- 12 files changed, 91 insertions(+), 260 deletions(-) (limited to 'fs/proc') diff --git a/arch/arm/kernel/crash_dump.c b/arch/arm/kernel/crash_dump.c index 53cb92435392..938bd932df9a 100644 --- a/arch/arm/kernel/crash_dump.c +++ b/arch/arm/kernel/crash_dump.c @@ -14,22 +14,10 @@ #include #include #include +#include -/** - * copy_oldmem_page() - copy one page from old kernel memory - * @pfn: page frame number to be copied - * @buf: buffer where the copied page is placed - * @csize: number of bytes to copy - * @offset: offset in bytes into the page - * @userbuf: if set, @buf is int he user address space - * - * This function copies one page from old kernel memory into buffer pointed by - * @buf. If @buf is in userspace, set @userbuf to %1. Returns number of bytes - * copied or negative error in case of failure. - */ -ssize_t copy_oldmem_page(unsigned long pfn, char *buf, - size_t csize, unsigned long offset, - int userbuf) +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, + size_t csize, unsigned long offset) { void *vaddr; @@ -40,14 +28,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, if (!vaddr) return -ENOMEM; - if (userbuf) { - if (copy_to_user(buf, vaddr + offset, csize)) { - iounmap(vaddr); - return -EFAULT; - } - } else { - memcpy(buf, vaddr + offset, csize); - } + csize = copy_to_iter(vaddr + offset, csize, iter); iounmap(vaddr); return csize; diff --git a/arch/arm64/kernel/crash_dump.c b/arch/arm64/kernel/crash_dump.c index 58303a9ec32c..670e4ce81822 100644 --- a/arch/arm64/kernel/crash_dump.c +++ b/arch/arm64/kernel/crash_dump.c @@ -9,25 +9,11 @@ #include #include #include -#include -#include +#include #include -/** - * copy_oldmem_page() - copy one page from old kernel memory - * @pfn: page frame number to be copied - * @buf: buffer where the copied page is placed - * @csize: number of bytes to copy - * @offset: offset in bytes into the page - * @userbuf: if set, @buf is in a user address space - * - * This function copies one page from old kernel memory into buffer pointed by - * @buf. If @buf is in userspace, set @userbuf to %1. Returns number of bytes - * copied or negative error in case of failure. - */ -ssize_t copy_oldmem_page(unsigned long pfn, char *buf, - size_t csize, unsigned long offset, - int userbuf) +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, + size_t csize, unsigned long offset) { void *vaddr; @@ -38,14 +24,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, if (!vaddr) return -ENOMEM; - if (userbuf) { - if (copy_to_user((char __user *)buf, vaddr + offset, csize)) { - memunmap(vaddr); - return -EFAULT; - } - } else { - memcpy(buf, vaddr + offset, csize); - } + csize = copy_to_iter(vaddr + offset, csize, iter); memunmap(vaddr); diff --git a/arch/ia64/kernel/crash_dump.c b/arch/ia64/kernel/crash_dump.c index 0ed3c3dee4cd..4ef68e2aa757 100644 --- a/arch/ia64/kernel/crash_dump.c +++ b/arch/ia64/kernel/crash_dump.c @@ -10,42 +10,18 @@ #include #include #include - +#include #include -#include -/** - * copy_oldmem_page - copy one page from "oldmem" - * @pfn: page frame number to be copied - * @buf: target memory address for the copy; this can be in kernel address - * space or user address space (see @userbuf) - * @csize: number of bytes to copy - * @offset: offset in bytes into the page (based on pfn) to begin the copy - * @userbuf: if set, @buf is in user address space, use copy_to_user(), - * otherwise @buf is in kernel address space, use memcpy(). - * - * Copy a page from "oldmem". For this page, there is no pte mapped - * in the current kernel. We stitch up a pte, similar to kmap_atomic. - * - * Calling copy_to_user() in atomic context is not desirable. Hence first - * copying the data to a pre-allocated kernel page and then copying to user - * space in non-atomic context. - */ -ssize_t -copy_oldmem_page(unsigned long pfn, char *buf, - size_t csize, unsigned long offset, int userbuf) +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, + size_t csize, unsigned long offset) { void *vaddr; if (!csize) return 0; vaddr = __va(pfn< #include +#include -/** - * copy_oldmem_page - copy one page from "oldmem" - * @pfn: page frame number to be copied - * @buf: target memory address for the copy; this can be in kernel address - * space or user address space (see @userbuf) - * @csize: number of bytes to copy - * @offset: offset in bytes into the page (based on pfn) to begin the copy - * @userbuf: if set, @buf is in user address space, use copy_to_user(), - * otherwise @buf is in kernel address space, use memcpy(). - * - * Copy a page from "oldmem". For this page, there is no pte mapped - * in the current kernel. - */ -ssize_t copy_oldmem_page(unsigned long pfn, char *buf, - size_t csize, unsigned long offset, int userbuf) +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, + size_t csize, unsigned long offset) { void *vaddr; @@ -24,14 +12,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, return 0; vaddr = kmap_local_pfn(pfn); - - if (!userbuf) { - memcpy(buf, vaddr + offset, csize); - } else { - if (copy_to_user(buf, vaddr + offset, csize)) - csize = -EFAULT; - } - + csize = copy_to_iter(vaddr + offset, csize, iter); kunmap_local(vaddr); return csize; diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 5693e1c67c2b..32b4a97f1b79 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include @@ -68,33 +68,8 @@ void __init setup_kdump_trampoline(void) } #endif /* CONFIG_NONSTATIC_KERNEL */ -static size_t copy_oldmem_vaddr(void *vaddr, char *buf, size_t csize, - unsigned long offset, int userbuf) -{ - if (userbuf) { - if (copy_to_user((char __user *)buf, (vaddr + offset), csize)) - return -EFAULT; - } else - memcpy(buf, (vaddr + offset), csize); - - return csize; -} - -/** - * copy_oldmem_page - copy one page from "oldmem" - * @pfn: page frame number to be copied - * @buf: target memory address for the copy; this can be in kernel address - * space or user address space (see @userbuf) - * @csize: number of bytes to copy - * @offset: offset in bytes into the page (based on pfn) to begin the copy - * @userbuf: if set, @buf is in user address space, use copy_to_user(), - * otherwise @buf is in kernel address space, use memcpy(). - * - * Copy a page from "oldmem". For this page, there is no pte mapped - * in the current kernel. We stitch up a pte, similar to kmap_atomic. - */ -ssize_t copy_oldmem_page(unsigned long pfn, char *buf, - size_t csize, unsigned long offset, int userbuf) +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, + size_t csize, unsigned long offset) { void *vaddr; phys_addr_t paddr; @@ -107,10 +82,10 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, if (memblock_is_region_memory(paddr, csize)) { vaddr = __va(paddr); - csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf); + csize = copy_to_iter(vaddr + offset, csize, iter); } else { vaddr = ioremap_cache(paddr, PAGE_SIZE); - csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf); + csize = copy_to_iter(vaddr + offset, csize, iter); iounmap(vaddr); } diff --git a/arch/riscv/kernel/crash_dump.c b/arch/riscv/kernel/crash_dump.c index 86cc0ada5752..ea2158cee97b 100644 --- a/arch/riscv/kernel/crash_dump.c +++ b/arch/riscv/kernel/crash_dump.c @@ -7,22 +7,10 @@ #include #include +#include -/** - * copy_oldmem_page() - copy one page from old kernel memory - * @pfn: page frame number to be copied - * @buf: buffer where the copied page is placed - * @csize: number of bytes to copy - * @offset: offset in bytes into the page - * @userbuf: if set, @buf is in a user address space - * - * This function copies one page from old kernel memory into buffer pointed by - * @buf. If @buf is in userspace, set @userbuf to %1. Returns number of bytes - * copied or negative error in case of failure. - */ -ssize_t copy_oldmem_page(unsigned long pfn, char *buf, - size_t csize, unsigned long offset, - int userbuf) +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, + size_t csize, unsigned long offset) { void *vaddr; @@ -33,13 +21,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, if (!vaddr) return -ENOMEM; - if (userbuf) { - if (copy_to_user((char __user *)buf, vaddr + offset, csize)) { - memunmap(vaddr); - return -EFAULT; - } - } else - memcpy(buf, vaddr + offset, csize); + csize = copy_to_iter(vaddr + offset, csize, iter); memunmap(vaddr); return csize; diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index 69819b765250..a2c1c55daec0 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -212,8 +213,8 @@ static int copy_oldmem_user(void __user *dst, unsigned long src, size_t count) /* * Copy one page from "oldmem" */ -ssize_t copy_oldmem_page(unsigned long pfn, char *buf, size_t csize, - unsigned long offset, int userbuf) +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, size_t csize, + unsigned long offset) { unsigned long src; int rc; @@ -221,10 +222,12 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, size_t csize, if (!csize) return 0; src = pfn_to_phys(pfn) + offset; - if (userbuf) - rc = copy_oldmem_user((void __force __user *) buf, src, csize); + + /* XXX: pass the iov_iter down to a common function */ + if (iter_is_iovec(iter)) + rc = copy_oldmem_user(iter->iov->iov_base, src, csize); else - rc = copy_oldmem_kernel((void *) buf, src, csize); + rc = copy_oldmem_kernel(iter->kvec->iov_base, src, csize); return rc; } diff --git a/arch/sh/kernel/crash_dump.c b/arch/sh/kernel/crash_dump.c index 5b41b59698c1..19ce6a950aac 100644 --- a/arch/sh/kernel/crash_dump.c +++ b/arch/sh/kernel/crash_dump.c @@ -8,23 +8,11 @@ #include #include #include +#include #include -/** - * copy_oldmem_page - copy one page from "oldmem" - * @pfn: page frame number to be copied - * @buf: target memory address for the copy; this can be in kernel address - * space or user address space (see @userbuf) - * @csize: number of bytes to copy - * @offset: offset in bytes into the page (based on pfn) to begin the copy - * @userbuf: if set, @buf is in user address space, use copy_to_user(), - * otherwise @buf is in kernel address space, use memcpy(). - * - * Copy a page from "oldmem". For this page, there is no pte mapped - * in the current kernel. We stitch up a pte, similar to kmap_atomic. - */ -ssize_t copy_oldmem_page(unsigned long pfn, char *buf, - size_t csize, unsigned long offset, int userbuf) +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, + size_t csize, unsigned long offset) { void __iomem *vaddr; @@ -32,15 +20,8 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, return 0; vaddr = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); - - if (userbuf) { - if (copy_to_user((void __user *)buf, (vaddr + offset), csize)) { - iounmap(vaddr); - return -EFAULT; - } - } else - memcpy(buf, (vaddr + offset), csize); - + csize = copy_to_iter(vaddr + offset, csize, iter); iounmap(vaddr); + return csize; } diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c index 5fcac46aaf6b..5f4ae5476e19 100644 --- a/arch/x86/kernel/crash_dump_32.c +++ b/arch/x86/kernel/crash_dump_32.c @@ -10,8 +10,7 @@ #include #include #include - -#include +#include static inline bool is_crashed_pfn_valid(unsigned long pfn) { @@ -29,21 +28,8 @@ static inline bool is_crashed_pfn_valid(unsigned long pfn) #endif } -/** - * copy_oldmem_page - copy one page from "oldmem" - * @pfn: page frame number to be copied - * @buf: target memory address for the copy; this can be in kernel address - * space or user address space (see @userbuf) - * @csize: number of bytes to copy - * @offset: offset in bytes into the page (based on pfn) to begin the copy - * @userbuf: if set, @buf is in user address space, use copy_to_user(), - * otherwise @buf is in kernel address space, use memcpy(). - * - * Copy a page from "oldmem". For this page, there might be no pte mapped - * in the current kernel. - */ -ssize_t copy_oldmem_page(unsigned long pfn, char *buf, size_t csize, - unsigned long offset, int userbuf) +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, size_t csize, + unsigned long offset) { void *vaddr; @@ -54,14 +40,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, size_t csize, return -EFAULT; vaddr = kmap_local_pfn(pfn); - - if (!userbuf) { - memcpy(buf, vaddr + offset, csize); - } else { - if (copy_to_user(buf, vaddr + offset, csize)) - csize = -EFAULT; - } - + csize = copy_to_iter(vaddr + offset, csize, iter); kunmap_local(vaddr); return csize; diff --git a/arch/x86/kernel/crash_dump_64.c b/arch/x86/kernel/crash_dump_64.c index 97529552dd24..94fe4aff9694 100644 --- a/arch/x86/kernel/crash_dump_64.c +++ b/arch/x86/kernel/crash_dump_64.c @@ -8,12 +8,12 @@ #include #include -#include +#include #include #include -static ssize_t __copy_oldmem_page(unsigned long pfn, char *buf, size_t csize, - unsigned long offset, int userbuf, +static ssize_t __copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, + size_t csize, unsigned long offset, bool encrypted) { void *vaddr; @@ -29,46 +29,27 @@ static ssize_t __copy_oldmem_page(unsigned long pfn, char *buf, size_t csize, if (!vaddr) return -ENOMEM; - if (userbuf) { - if (copy_to_user((void __user *)buf, vaddr + offset, csize)) { - iounmap((void __iomem *)vaddr); - return -EFAULT; - } - } else - memcpy(buf, vaddr + offset, csize); + csize = copy_to_iter(vaddr + offset, csize, iter); iounmap((void __iomem *)vaddr); return csize; } -/** - * copy_oldmem_page - copy one page of memory - * @pfn: page frame number to be copied - * @buf: target memory address for the copy; this can be in kernel address - * space or user address space (see @userbuf) - * @csize: number of bytes to copy - * @offset: offset in bytes into the page (based on pfn) to begin the copy - * @userbuf: if set, @buf is in user address space, use copy_to_user(), - * otherwise @buf is in kernel address space, use memcpy(). - * - * Copy a page from the old kernel's memory. For this page, there is no pte - * mapped in the current kernel. We stitch up a pte, similar to kmap_atomic. - */ -ssize_t copy_oldmem_page(unsigned long pfn, char *buf, size_t csize, - unsigned long offset, int userbuf) +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, size_t csize, + unsigned long offset) { - return __copy_oldmem_page(pfn, buf, csize, offset, userbuf, false); + return __copy_oldmem_page(iter, pfn, csize, offset, false); } -/** +/* * copy_oldmem_page_encrypted - same as copy_oldmem_page() above but ioremap the * memory with the encryption mask set to accommodate kdump on SME-enabled * machines. */ -ssize_t copy_oldmem_page_encrypted(unsigned long pfn, char *buf, size_t csize, - unsigned long offset, int userbuf) +ssize_t copy_oldmem_page_encrypted(struct iov_iter *iter, unsigned long pfn, + size_t csize, unsigned long offset) { - return __copy_oldmem_page(pfn, buf, csize, offset, userbuf, true); + return __copy_oldmem_page(iter, pfn, csize, offset, true); } ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos) diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 6f1b8ddc6f7a..54dda2e19ed1 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include "internal.h" @@ -128,9 +129,8 @@ static int open_vmcore(struct inode *inode, struct file *file) } /* Reads a page from the oldmem device from given offset. */ -ssize_t read_from_oldmem(char *buf, size_t count, - u64 *ppos, int userbuf, - bool encrypted) +static ssize_t read_from_oldmem_iter(struct iov_iter *iter, size_t count, + u64 *ppos, bool encrypted) { unsigned long pfn, offset; size_t nr_bytes; @@ -152,29 +152,23 @@ ssize_t read_from_oldmem(char *buf, size_t count, /* If pfn is not ram, return zeros for sparse dump files */ if (!pfn_is_ram(pfn)) { - tmp = 0; - if (!userbuf) - memset(buf, 0, nr_bytes); - else if (clear_user(buf, nr_bytes)) - tmp = -EFAULT; + tmp = iov_iter_zero(nr_bytes, iter); } else { if (encrypted) - tmp = copy_oldmem_page_encrypted(pfn, buf, + tmp = copy_oldmem_page_encrypted(iter, pfn, nr_bytes, - offset, - userbuf); + offset); else - tmp = copy_oldmem_page(pfn, buf, nr_bytes, - offset, userbuf); + tmp = copy_oldmem_page(iter, pfn, nr_bytes, + offset); } - if (tmp < 0) { + if (tmp < nr_bytes) { srcu_read_unlock(&vmcore_cb_srcu, idx); - return tmp; + return -EFAULT; } *ppos += nr_bytes; count -= nr_bytes; - buf += nr_bytes; read += nr_bytes; ++pfn; offset = 0; @@ -184,6 +178,27 @@ ssize_t read_from_oldmem(char *buf, size_t count, return read; } +ssize_t read_from_oldmem(char *buf, size_t count, + u64 *ppos, int userbuf, + bool encrypted) +{ + struct iov_iter iter; + struct iovec iov; + struct kvec kvec; + + if (userbuf) { + iov.iov_base = (__force void __user *)buf; + iov.iov_len = count; + iov_iter_init(&iter, READ, &iov, 1, count); + } else { + kvec.iov_base = buf; + kvec.iov_len = count; + iov_iter_kvec(&iter, READ, &kvec, 1, count); + } + + return read_from_oldmem_iter(&iter, count, ppos, encrypted); +} + /* * Architectures may override this function to allocate ELF header in 2nd kernel */ @@ -228,11 +243,10 @@ int __weak remap_oldmem_pfn_range(struct vm_area_struct *vma, /* * Architectures which support memory encryption override this. */ -ssize_t __weak -copy_oldmem_page_encrypted(unsigned long pfn, char *buf, size_t csize, - unsigned long offset, int userbuf) +ssize_t __weak copy_oldmem_page_encrypted(struct iov_iter *iter, + unsigned long pfn, size_t csize, unsigned long offset) { - return copy_oldmem_page(pfn, buf, csize, offset, userbuf); + return copy_oldmem_page(iter, pfn, csize, offset); } /* diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index 620821549b23..a1cf7d5c03c7 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h @@ -24,11 +24,10 @@ extern int remap_oldmem_pfn_range(struct vm_area_struct *vma, unsigned long from, unsigned long pfn, unsigned long size, pgprot_t prot); -extern ssize_t copy_oldmem_page(unsigned long, char *, size_t, - unsigned long, int); -extern ssize_t copy_oldmem_page_encrypted(unsigned long pfn, char *buf, - size_t csize, unsigned long offset, - int userbuf); +ssize_t copy_oldmem_page(struct iov_iter *i, unsigned long pfn, size_t csize, + unsigned long offset); +ssize_t copy_oldmem_page_encrypted(struct iov_iter *iter, unsigned long pfn, + size_t csize, unsigned long offset); void vmcore_cleanup(void); -- cgit v1.2.3 From 4a22fd20379ca897a6bfdb8372b4f9601e430332 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 29 Apr 2022 14:37:59 -0700 Subject: vmcore: convert __read_vmcore to use an iov_iter This gets rid of copy_to() and let us use proc_read_iter() instead of proc_read(). Link: https://lkml.kernel.org/r/20220408090636.560886-3-bhe@redhat.com Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Baoquan He Reviewed-by: Christoph Hellwig Signed-off-by: Andrew Morton --- fs/proc/vmcore.c | 82 +++++++++++++++++++++----------------------------------- 1 file changed, 30 insertions(+), 52 deletions(-) (limited to 'fs/proc') diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 54dda2e19ed1..4a721865b5cd 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -249,22 +249,8 @@ ssize_t __weak copy_oldmem_page_encrypted(struct iov_iter *iter, return copy_oldmem_page(iter, pfn, csize, offset); } -/* - * Copy to either kernel or user space - */ -static int copy_to(void *target, void *src, size_t size, int userbuf) -{ - if (userbuf) { - if (copy_to_user((char __user *) target, src, size)) - return -EFAULT; - } else { - memcpy(target, src, size); - } - return 0; -} - #ifdef CONFIG_PROC_VMCORE_DEVICE_DUMP -static int vmcoredd_copy_dumps(void *dst, u64 start, size_t size, int userbuf) +static int vmcoredd_copy_dumps(struct iov_iter *iter, u64 start, size_t size) { struct vmcoredd_node *dump; u64 offset = 0; @@ -277,14 +263,13 @@ static int vmcoredd_copy_dumps(void *dst, u64 start, size_t size, int userbuf) if (start < offset + dump->size) { tsz = min(offset + (u64)dump->size - start, (u64)size); buf = dump->buf + start - offset; - if (copy_to(dst, buf, tsz, userbuf)) { + if (copy_to_iter(buf, tsz, iter) < tsz) { ret = -EFAULT; goto out_unlock; } size -= tsz; start += tsz; - dst += tsz; /* Leave now if buffer filled already */ if (!size) @@ -340,33 +325,28 @@ out_unlock: /* Read from the ELF header and then the crash dump. On error, negative value is * returned otherwise number of bytes read are returned. */ -static ssize_t __read_vmcore(char *buffer, size_t buflen, loff_t *fpos, - int userbuf) +static ssize_t __read_vmcore(struct iov_iter *iter, loff_t *fpos) { ssize_t acc = 0, tmp; size_t tsz; u64 start; struct vmcore *m = NULL; - if (buflen == 0 || *fpos >= vmcore_size) + if (!iov_iter_count(iter) || *fpos >= vmcore_size) return 0; - /* trim buflen to not go beyond EOF */ - if (buflen > vmcore_size - *fpos) - buflen = vmcore_size - *fpos; + iov_iter_truncate(iter, vmcore_size - *fpos); /* Read ELF core header */ if (*fpos < elfcorebuf_sz) { - tsz = min(elfcorebuf_sz - (size_t)*fpos, buflen); - if (copy_to(buffer, elfcorebuf + *fpos, tsz, userbuf)) + tsz = min(elfcorebuf_sz - (size_t)*fpos, iov_iter_count(iter)); + if (copy_to_iter(elfcorebuf + *fpos, tsz, iter) < tsz) return -EFAULT; - buflen -= tsz; *fpos += tsz; - buffer += tsz; acc += tsz; /* leave now if filled buffer already */ - if (buflen == 0) + if (!iov_iter_count(iter)) return acc; } @@ -387,35 +367,32 @@ static ssize_t __read_vmcore(char *buffer, size_t buflen, loff_t *fpos, /* Read device dumps */ if (*fpos < elfcorebuf_sz + vmcoredd_orig_sz) { tsz = min(elfcorebuf_sz + vmcoredd_orig_sz - - (size_t)*fpos, buflen); + (size_t)*fpos, iov_iter_count(iter)); start = *fpos - elfcorebuf_sz; - if (vmcoredd_copy_dumps(buffer, start, tsz, userbuf)) + if (vmcoredd_copy_dumps(iter, start, tsz)) return -EFAULT; - buflen -= tsz; *fpos += tsz; - buffer += tsz; acc += tsz; /* leave now if filled buffer already */ - if (!buflen) + if (!iov_iter_count(iter)) return acc; } #endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */ /* Read remaining elf notes */ - tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)*fpos, buflen); + tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)*fpos, + iov_iter_count(iter)); kaddr = elfnotes_buf + *fpos - elfcorebuf_sz - vmcoredd_orig_sz; - if (copy_to(buffer, kaddr, tsz, userbuf)) + if (copy_to_iter(kaddr, tsz, iter) < tsz) return -EFAULT; - buflen -= tsz; *fpos += tsz; - buffer += tsz; acc += tsz; /* leave now if filled buffer already */ - if (buflen == 0) + if (!iov_iter_count(iter)) return acc; } @@ -423,19 +400,17 @@ static ssize_t __read_vmcore(char *buffer, size_t buflen, loff_t *fpos, if (*fpos < m->offset + m->size) { tsz = (size_t)min_t(unsigned long long, m->offset + m->size - *fpos, - buflen); + iov_iter_count(iter)); start = m->paddr + *fpos - m->offset; - tmp = read_from_oldmem(buffer, tsz, &start, - userbuf, cc_platform_has(CC_ATTR_MEM_ENCRYPT)); + tmp = read_from_oldmem_iter(iter, tsz, &start, + cc_platform_has(CC_ATTR_MEM_ENCRYPT)); if (tmp < 0) return tmp; - buflen -= tsz; *fpos += tsz; - buffer += tsz; acc += tsz; /* leave now if filled buffer already */ - if (buflen == 0) + if (!iov_iter_count(iter)) return acc; } } @@ -443,15 +418,14 @@ static ssize_t __read_vmcore(char *buffer, size_t buflen, loff_t *fpos, return acc; } -static ssize_t read_vmcore(struct file *file, char __user *buffer, - size_t buflen, loff_t *fpos) +static ssize_t read_vmcore(struct kiocb *iocb, struct iov_iter *iter) { - return __read_vmcore((__force char *) buffer, buflen, fpos, 1); + return __read_vmcore(iter, &iocb->ki_pos); } /* * The vmcore fault handler uses the page cache and fills data using the - * standard __vmcore_read() function. + * standard __read_vmcore() function. * * On s390 the fault handler is used for memory regions that can't be mapped * directly with remap_pfn_range(). @@ -461,9 +435,10 @@ static vm_fault_t mmap_vmcore_fault(struct vm_fault *vmf) #ifdef CONFIG_S390 struct address_space *mapping = vmf->vma->vm_file->f_mapping; pgoff_t index = vmf->pgoff; + struct iov_iter iter; + struct kvec kvec; struct page *page; loff_t offset; - char *buf; int rc; page = find_or_create_page(mapping, index, GFP_KERNEL); @@ -471,8 +446,11 @@ static vm_fault_t mmap_vmcore_fault(struct vm_fault *vmf) return VM_FAULT_OOM; if (!PageUptodate(page)) { offset = (loff_t) index << PAGE_SHIFT; - buf = __va((page_to_pfn(page) << PAGE_SHIFT)); - rc = __read_vmcore(buf, PAGE_SIZE, &offset, 0); + kvec.iov_base = page_address(page); + kvec.iov_len = PAGE_SIZE; + iov_iter_kvec(&iter, READ, &kvec, 1, PAGE_SIZE); + + rc = __read_vmcore(&iter, &offset); if (rc < 0) { unlock_page(page); put_page(page); @@ -722,7 +700,7 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) static const struct proc_ops vmcore_proc_ops = { .proc_open = open_vmcore, - .proc_read = read_vmcore, + .proc_read_iter = read_vmcore, .proc_lseek = default_llseek, .proc_mmap = mmap_vmcore, }; -- cgit v1.2.3 From e0690479917cbce740eef51fa3de92c69647a5ad Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 29 Apr 2022 14:37:59 -0700 Subject: vmcore: convert read_from_oldmem() to take an iov_iter Remove the read_from_oldmem() wrapper introduced earlier and convert all the remaining callers to pass an iov_iter. Link: https://lkml.kernel.org/r/20220408090636.560886-4-bhe@redhat.com Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Baoquan He Reviewed-by: Christoph Hellwig Cc: Tiezhu Yang Cc: Amit Daniel Kachhap Cc: Al Viro Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- arch/x86/kernel/crash_dump_64.c | 7 ++++++- fs/proc/vmcore.c | 40 +++++++++++++++------------------------- include/linux/crash_dump.h | 10 ++++------ 3 files changed, 25 insertions(+), 32 deletions(-) (limited to 'fs/proc') diff --git a/arch/x86/kernel/crash_dump_64.c b/arch/x86/kernel/crash_dump_64.c index 94fe4aff9694..e75bc2f217ff 100644 --- a/arch/x86/kernel/crash_dump_64.c +++ b/arch/x86/kernel/crash_dump_64.c @@ -54,6 +54,11 @@ ssize_t copy_oldmem_page_encrypted(struct iov_iter *iter, unsigned long pfn, ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos) { - return read_from_oldmem(buf, count, ppos, 0, + struct kvec kvec = { .iov_base = buf, .iov_len = count }; + struct iov_iter iter; + + iov_iter_kvec(&iter, READ, &kvec, 1, count); + + return read_from_oldmem(&iter, count, ppos, cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)); } diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 4a721865b5cd..4eaeb645e759 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -129,7 +129,7 @@ static int open_vmcore(struct inode *inode, struct file *file) } /* Reads a page from the oldmem device from given offset. */ -static ssize_t read_from_oldmem_iter(struct iov_iter *iter, size_t count, +ssize_t read_from_oldmem(struct iov_iter *iter, size_t count, u64 *ppos, bool encrypted) { unsigned long pfn, offset; @@ -178,27 +178,6 @@ static ssize_t read_from_oldmem_iter(struct iov_iter *iter, size_t count, return read; } -ssize_t read_from_oldmem(char *buf, size_t count, - u64 *ppos, int userbuf, - bool encrypted) -{ - struct iov_iter iter; - struct iovec iov; - struct kvec kvec; - - if (userbuf) { - iov.iov_base = (__force void __user *)buf; - iov.iov_len = count; - iov_iter_init(&iter, READ, &iov, 1, count); - } else { - kvec.iov_base = buf; - kvec.iov_len = count; - iov_iter_kvec(&iter, READ, &kvec, 1, count); - } - - return read_from_oldmem_iter(&iter, count, ppos, encrypted); -} - /* * Architectures may override this function to allocate ELF header in 2nd kernel */ @@ -218,7 +197,12 @@ void __weak elfcorehdr_free(unsigned long long addr) */ ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos) { - return read_from_oldmem(buf, count, ppos, 0, false); + struct kvec kvec = { .iov_base = buf, .iov_len = count }; + struct iov_iter iter; + + iov_iter_kvec(&iter, READ, &kvec, 1, count); + + return read_from_oldmem(&iter, count, ppos, false); } /* @@ -226,7 +210,13 @@ ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos) */ ssize_t __weak elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos) { - return read_from_oldmem(buf, count, ppos, 0, cc_platform_has(CC_ATTR_MEM_ENCRYPT)); + struct kvec kvec = { .iov_base = buf, .iov_len = count }; + struct iov_iter iter; + + iov_iter_kvec(&iter, READ, &kvec, 1, count); + + return read_from_oldmem(&iter, count, ppos, + cc_platform_has(CC_ATTR_MEM_ENCRYPT)); } /* @@ -402,7 +392,7 @@ static ssize_t __read_vmcore(struct iov_iter *iter, loff_t *fpos) m->offset + m->size - *fpos, iov_iter_count(iter)); start = m->paddr + *fpos - m->offset; - tmp = read_from_oldmem_iter(iter, tsz, &start, + tmp = read_from_oldmem(iter, tsz, &start, cc_platform_has(CC_ATTR_MEM_ENCRYPT)); if (tmp < 0) return tmp; diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index a1cf7d5c03c7..0f3a656293b0 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h @@ -134,13 +134,11 @@ static inline int vmcore_add_device_dump(struct vmcoredd_data *data) #endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */ #ifdef CONFIG_PROC_VMCORE -ssize_t read_from_oldmem(char *buf, size_t count, - u64 *ppos, int userbuf, - bool encrypted); +ssize_t read_from_oldmem(struct iov_iter *iter, size_t count, + u64 *ppos, bool encrypted); #else -static inline ssize_t read_from_oldmem(char *buf, size_t count, - u64 *ppos, int userbuf, - bool encrypted) +static inline ssize_t read_from_oldmem(struct iov_iter *iter, size_t count, + u64 *ppos, bool encrypted) { return -EOPNOTSUPP; } -- cgit v1.2.3 From 7055197705709c59b8ab77e6a5c7d46d61edd96e Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 9 May 2022 18:29:19 -0700 Subject: proc: fix dentry/inode overinstantiating under /proc/${pid}/net When a process exits, /proc/${pid}, and /proc/${pid}/net dentries are flushed. However some leaf dentries like /proc/${pid}/net/arp_cache aren't. That's because respective PDEs have proc_misc_d_revalidate() hook which returns 1 and leaves dentries/inodes in the LRU. Force revalidation/lookup on everything under /proc/${pid}/net by inheriting proc_net_dentry_ops. [akpm@linux-foundation.org: coding-style cleanups] Link: https://lkml.kernel.org/r/YjdVHgildbWO7diJ@localhost.localdomain Fixes: c6c75deda813 ("proc: fix lookup in /proc/net subdirectories after setns(2)") Signed-off-by: Alexey Dobriyan Reported-by: hui li Cc: Al Viro Signed-off-by: Andrew Morton --- fs/proc/generic.c | 3 +++ fs/proc/proc_net.c | 3 +++ 2 files changed, 6 insertions(+) (limited to 'fs/proc') diff --git a/fs/proc/generic.c b/fs/proc/generic.c index f2132407e133..587b91d9d998 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -448,6 +448,9 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, proc_set_user(ent, (*parent)->uid, (*parent)->gid); ent->proc_dops = &proc_misc_dentry_ops; + /* Revalidate everything under /proc/${pid}/net */ + if ((*parent)->proc_dops == &proc_net_dentry_ops) + pde_force_lookup(ent); out: return ent; diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index e1cfeda397f3..913e5acefbb6 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -376,6 +376,9 @@ static __net_init int proc_net_ns_init(struct net *net) proc_set_user(netd, uid, gid); + /* Seed dentry revalidation for /proc/${pid}/net */ + pde_force_lookup(netd); + err = -EEXIST; net_statd = proc_net_mkdir(net, "stat", netd); if (!net_statd) -- cgit v1.2.3