From 97abcfedc87cb501071e8947184ee0c5acca6874 Mon Sep 17 00:00:00 2001 From: Xin Yin Date: Mon, 17 Jan 2022 17:36:54 +0800 Subject: ext4: fast commit may not fallback for ineligible commit [ Upstream commit e85c81ba8859a4c839bcd69c5d83b32954133a5b ] For the follow scenario: 1. jbd start commit transaction n 2. task A get new handle for transaction n+1 3. task A do some ineligible actions and mark FC_INELIGIBLE 4. jbd complete transaction n and clean FC_INELIGIBLE 5. task A call fsync In this case fast commit will not fallback to full commit and transaction n+1 also not handled by jbd. Make ext4_fc_mark_ineligible() also record transaction tid for latest ineligible case, when call ext4_fc_cleanup() check current transaction tid, if small than latest ineligible tid do not clear the EXT4_MF_FC_INELIGIBLE. Reported-by: kernel test robot Reported-by: Dan Carpenter Reported-by: Ritesh Harjani Suggested-by: Harshad Shirwadkar Signed-off-by: Xin Yin Link: https://lore.kernel.org/r/20220117093655.35160-2-yinxin.x@bytedance.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Sasha Levin --- include/linux/jbd2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index fd933c45281a..d63b8106796e 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1295,7 +1295,7 @@ struct journal_s * Clean-up after fast commit or full commit. JBD2 calls this function * after every commit operation. */ - void (*j_fc_cleanup_callback)(struct journal_s *journal, int); + void (*j_fc_cleanup_callback)(struct journal_s *journal, int full, tid_t tid); /** * @j_fc_replay_callback: -- cgit v1.2.3 From 2de88544b3dbe9f4be3220750e34c97dd86386c1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 30 Sep 2021 17:06:21 -0400 Subject: NFSD: Have legacy NFSD WRITE decoders use xdr_stream_subsegment() [ Upstream commit dae9a6cab8009e526570e7477ce858dcdfeb256e ] Refactor. Now that the NFSv2 and NFSv3 XDR decoders have been converted to use xdr_streams, the WRITE decoder functions can use xdr_stream_subsegment() to extract the WRITE payload into its own xdr_buf, just as the NFSv4 WRITE XDR decoder currently does. That makes it possible to pass the first kvec, pages array + length, page_base, and total payload length via a single function parameter. The payload's page_base is not yet assigned or used, but will be in subsequent patches. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields Signed-off-by: Sasha Levin --- fs/nfsd/nfs3proc.c | 3 +-- fs/nfsd/nfs3xdr.c | 12 ++---------- fs/nfsd/nfs4proc.c | 3 +-- fs/nfsd/nfsproc.c | 3 +-- fs/nfsd/nfsxdr.c | 9 +-------- fs/nfsd/xdr.h | 2 +- fs/nfsd/xdr3.h | 2 +- include/linux/sunrpc/svc.h | 3 +-- net/sunrpc/svc.c | 11 ++++++----- 9 files changed, 15 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 9918d6ad23ec..354bbfcd96aa 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -210,8 +210,7 @@ nfsd3_proc_write(struct svc_rqst *rqstp) fh_copy(&resp->fh, &argp->fh); resp->committed = argp->stable; - nvecs = svc_fill_write_vector(rqstp, rqstp->rq_arg.pages, - &argp->first, cnt); + nvecs = svc_fill_write_vector(rqstp, &argp->payload); if (!nvecs) { resp->status = nfserr_io; goto out; diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 7a900131d20c..0ee156b9c9d7 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -621,9 +621,6 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p) struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_writeargs *args = rqstp->rq_argp; u32 max_blocksize = svc_max_payload(rqstp); - struct kvec *head = rqstp->rq_arg.head; - struct kvec *tail = rqstp->rq_arg.tail; - size_t remaining; if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) return 0; @@ -641,17 +638,12 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p) /* request sanity */ if (args->count != args->len) return 0; - remaining = head->iov_len + rqstp->rq_arg.page_len + tail->iov_len; - remaining -= xdr_stream_pos(xdr); - if (remaining < xdr_align_size(args->len)) - return 0; if (args->count > max_blocksize) { args->count = max_blocksize; args->len = max_blocksize; } - - args->first.iov_base = xdr->p; - args->first.iov_len = head->iov_len - xdr_stream_pos(xdr); + if (!xdr_stream_subsegment(xdr, &args->payload, args->count)) + return 0; return 1; } diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 4b9a3b90a41f..65200910107f 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1038,8 +1038,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, write->wr_how_written = write->wr_stable_how; - nvecs = svc_fill_write_vector(rqstp, write->wr_payload.pages, - write->wr_payload.head, write->wr_buflen); + nvecs = svc_fill_write_vector(rqstp, &write->wr_payload); WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec)); status = nfsd_vfs_write(rqstp, &cstate->current_fh, nf, diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 19c568b8a527..de282f3273c5 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -234,8 +234,7 @@ nfsd_proc_write(struct svc_rqst *rqstp) SVCFH_fmt(&argp->fh), argp->len, argp->offset); - nvecs = svc_fill_write_vector(rqstp, rqstp->rq_arg.pages, - &argp->first, cnt); + nvecs = svc_fill_write_vector(rqstp, &argp->payload); if (!nvecs) { resp->status = nfserr_io; goto out; diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index a06c05fe3b42..26a42f87c240 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -325,10 +325,7 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p) { struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd_writeargs *args = rqstp->rq_argp; - struct kvec *head = rqstp->rq_arg.head; - struct kvec *tail = rqstp->rq_arg.tail; u32 beginoffset, totalcount; - size_t remaining; if (!svcxdr_decode_fhandle(xdr, &args->fh)) return 0; @@ -346,12 +343,8 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p) return 0; if (args->len > NFSSVC_MAXBLKSIZE_V2) return 0; - remaining = head->iov_len + rqstp->rq_arg.page_len + tail->iov_len; - remaining -= xdr_stream_pos(xdr); - if (remaining < xdr_align_size(args->len)) + if (!xdr_stream_subsegment(xdr, &args->payload, args->len)) return 0; - args->first.iov_base = xdr->p; - args->first.iov_len = head->iov_len - xdr_stream_pos(xdr); return 1; } diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h index f45b4bc93f52..80fd6d7f3404 100644 --- a/fs/nfsd/xdr.h +++ b/fs/nfsd/xdr.h @@ -33,7 +33,7 @@ struct nfsd_writeargs { svc_fh fh; __u32 offset; int len; - struct kvec first; + struct xdr_buf payload; }; struct nfsd_createargs { diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h index 933008382bbe..712c117300cb 100644 --- a/fs/nfsd/xdr3.h +++ b/fs/nfsd/xdr3.h @@ -40,7 +40,7 @@ struct nfsd3_writeargs { __u32 count; int stable; __u32 len; - struct kvec first; + struct xdr_buf payload; }; struct nfsd3_createargs { diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 064c96157d1f..6263410c948a 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -532,8 +532,7 @@ int svc_encode_result_payload(struct svc_rqst *rqstp, unsigned int offset, unsigned int length); unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, - struct page **pages, - struct kvec *first, size_t total); + struct xdr_buf *payload); char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, struct kvec *first, void *p, size_t total); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index a3bbe5ce4570..08ca797bb8a4 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1676,16 +1676,17 @@ EXPORT_SYMBOL_GPL(svc_encode_result_payload); /** * svc_fill_write_vector - Construct data argument for VFS write call * @rqstp: svc_rqst to operate on - * @pages: list of pages containing data payload - * @first: buffer containing first section of write payload - * @total: total number of bytes of write payload + * @payload: xdr_buf containing only the write data payload * * Fills in rqstp::rq_vec, and returns the number of elements. */ -unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, struct page **pages, - struct kvec *first, size_t total) +unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, + struct xdr_buf *payload) { + struct page **pages = payload->pages; + struct kvec *first = payload->head; struct kvec *vec = rqstp->rq_vec; + size_t total = payload->len; unsigned int i; /* Some types of transport can present the write payload -- cgit v1.2.3 From f1675103e0f361f4f636438c1c911dbe83f5f334 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Fri, 14 Jan 2022 14:04:11 -0800 Subject: mm: defer kmemleak object creation of module_alloc() [ Upstream commit 60115fa54ad7b913b7cb5844e6b7ffeb842d55f2 ] Yongqiang reports a kmemleak panic when module insmod/rmmod with KASAN enabled(without KASAN_VMALLOC) on x86[1]. When the module area allocates memory, it's kmemleak_object is created successfully, but the KASAN shadow memory of module allocation is not ready, so when kmemleak scan the module's pointer, it will panic due to no shadow memory with KASAN check. module_alloc __vmalloc_node_range kmemleak_vmalloc kmemleak_scan update_checksum kasan_module_alloc kmemleak_ignore Note, there is no problem if KASAN_VMALLOC enabled, the modules area entire shadow memory is preallocated. Thus, the bug only exits on ARCH which supports dynamic allocation of module area per module load, for now, only x86/arm64/s390 are involved. Add a VM_DEFER_KMEMLEAK flags, defer vmalloc'ed object register of kmemleak in module_alloc() to fix this issue. [1] https://lore.kernel.org/all/6d41e2b9-4692-5ec4-b1cd-cbe29ae89739@huawei.com/ [wangkefeng.wang@huawei.com: fix build] Link: https://lkml.kernel.org/r/20211125080307.27225-1-wangkefeng.wang@huawei.com [akpm@linux-foundation.org: simplify ifdefs, per Andrey] Link: https://lkml.kernel.org/r/CA+fCnZcnwJHUQq34VuRxpdoY6_XbJCDJ-jopksS5Eia4PijPzw@mail.gmail.com Link: https://lkml.kernel.org/r/20211124142034.192078-1-wangkefeng.wang@huawei.com Fixes: 793213a82de4 ("s390/kasan: dynamic shadow mem allocation for modules") Fixes: 39d114ddc682 ("arm64: add KASAN support") Fixes: bebf56a1b176 ("kasan: enable instrumentation of global variables") Signed-off-by: Kefeng Wang Reported-by: Yongqiang Liu Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Catalin Marinas Cc: Will Deacon Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Alexander Gordeev Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: Alexander Potapenko Cc: Kefeng Wang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- arch/arm64/kernel/module.c | 4 ++-- arch/s390/kernel/module.c | 5 +++-- arch/x86/kernel/module.c | 7 ++++--- include/linux/kasan.h | 4 ++-- include/linux/vmalloc.h | 7 +++++++ mm/kasan/shadow.c | 9 +++++++-- mm/vmalloc.c | 3 ++- 7 files changed, 27 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index b5ec010c481f..309a27553c87 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -36,7 +36,7 @@ void *module_alloc(unsigned long size) module_alloc_end = MODULES_END; p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base, - module_alloc_end, gfp_mask, PAGE_KERNEL, 0, + module_alloc_end, gfp_mask, PAGE_KERNEL, VM_DEFER_KMEMLEAK, NUMA_NO_NODE, __builtin_return_address(0)); if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && @@ -58,7 +58,7 @@ void *module_alloc(unsigned long size) PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); - if (p && (kasan_module_alloc(p, size) < 0)) { + if (p && (kasan_module_alloc(p, size, gfp_mask) < 0)) { vfree(p); return NULL; } diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index a805ea5cb92d..b032e556eeb7 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -37,14 +37,15 @@ void *module_alloc(unsigned long size) { + gfp_t gfp_mask = GFP_KERNEL; void *p; if (PAGE_ALIGN(size) > MODULES_LEN) return NULL; p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR, MODULES_END, - GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, + gfp_mask, PAGE_KERNEL_EXEC, VM_DEFER_KMEMLEAK, NUMA_NO_NODE, __builtin_return_address(0)); - if (p && (kasan_module_alloc(p, size) < 0)) { + if (p && (kasan_module_alloc(p, size, gfp_mask) < 0)) { vfree(p); return NULL; } diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 5e9a34b5bd74..867a341a0c7e 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -67,6 +67,7 @@ static unsigned long int get_module_load_offset(void) void *module_alloc(unsigned long size) { + gfp_t gfp_mask = GFP_KERNEL; void *p; if (PAGE_ALIGN(size) > MODULES_LEN) @@ -74,10 +75,10 @@ void *module_alloc(unsigned long size) p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR + get_module_load_offset(), - MODULES_END, GFP_KERNEL, - PAGE_KERNEL, 0, NUMA_NO_NODE, + MODULES_END, gfp_mask, + PAGE_KERNEL, VM_DEFER_KMEMLEAK, NUMA_NO_NODE, __builtin_return_address(0)); - if (p && (kasan_module_alloc(p, size) < 0)) { + if (p && (kasan_module_alloc(p, size, gfp_mask) < 0)) { vfree(p); return NULL; } diff --git a/include/linux/kasan.h b/include/linux/kasan.h index dd874a1ee862..f407e937241a 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -461,12 +461,12 @@ static inline void kasan_release_vmalloc(unsigned long start, * allocations with real shadow memory. With KASAN vmalloc, the special * case is unnecessary, as the work is handled in the generic case. */ -int kasan_module_alloc(void *addr, size_t size); +int kasan_module_alloc(void *addr, size_t size, gfp_t gfp_mask); void kasan_free_shadow(const struct vm_struct *vm); #else /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */ -static inline int kasan_module_alloc(void *addr, size_t size) { return 0; } +static inline int kasan_module_alloc(void *addr, size_t size, gfp_t gfp_mask) { return 0; } static inline void kasan_free_shadow(const struct vm_struct *vm) {} #endif /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */ diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 671d402c3778..4fe9e885bbfa 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -28,6 +28,13 @@ struct notifier_block; /* in notifier.h */ #define VM_MAP_PUT_PAGES 0x00000200 /* put pages and free array in vfree */ #define VM_NO_HUGE_VMAP 0x00000400 /* force PAGE_SIZE pte mapping */ +#if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \ + !defined(CONFIG_KASAN_VMALLOC) +#define VM_DEFER_KMEMLEAK 0x00000800 /* defer kmemleak object creation */ +#else +#define VM_DEFER_KMEMLEAK 0 +#endif + /* * VM_KASAN is used slightly differently depending on CONFIG_KASAN_VMALLOC. * diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c index 8d95ee52d019..dd79840e6096 100644 --- a/mm/kasan/shadow.c +++ b/mm/kasan/shadow.c @@ -493,7 +493,7 @@ void kasan_release_vmalloc(unsigned long start, unsigned long end, #else /* CONFIG_KASAN_VMALLOC */ -int kasan_module_alloc(void *addr, size_t size) +int kasan_module_alloc(void *addr, size_t size, gfp_t gfp_mask) { void *ret; size_t scaled_size; @@ -515,9 +515,14 @@ int kasan_module_alloc(void *addr, size_t size) __builtin_return_address(0)); if (ret) { + struct vm_struct *vm = find_vm_area(addr); __memset(ret, KASAN_SHADOW_INIT, shadow_size); - find_vm_area(addr)->flags |= VM_KASAN; + vm->flags |= VM_KASAN; kmemleak_ignore(ret); + + if (vm->flags & VM_DEFER_KMEMLEAK) + kmemleak_vmalloc(vm, size, gfp_mask); + return 0; } diff --git a/mm/vmalloc.c b/mm/vmalloc.c index e8a807c78110..8375eecc55de 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -3032,7 +3032,8 @@ again: clear_vm_uninitialized_flag(area); size = PAGE_ALIGN(size); - kmemleak_vmalloc(area, size, gfp_mask); + if (!(vm_flags & VM_DEFER_KMEMLEAK)) + kmemleak_vmalloc(area, size, gfp_mask); return addr; -- cgit v1.2.3 From 462c5e6cb2414fcde7d0a76f5e0661744caaa0aa Mon Sep 17 00:00:00 2001 From: Moshe Tal Date: Thu, 20 Jan 2022 11:55:50 +0200 Subject: ethtool: Fix link extended state for big endian [ Upstream commit e2f08207c558bc0bc8abaa557cdb29bad776ac7b ] The link extended sub-states are assigned as enum that is an integer size but read from a union as u8, this is working for small values on little endian systems but for big endian this always give 0. Fix the variable in the union to match the enum size. Fixes: ecc31c60240b ("ethtool: Add link extended state") Signed-off-by: Moshe Tal Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Reviewed-by: Gal Pressman Reviewed-by: Amit Cohen Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/linux/ethtool.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 849524b55d89..3fad741df53e 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -94,7 +94,7 @@ struct ethtool_link_ext_state_info { enum ethtool_link_ext_substate_link_logical_mismatch link_logical_mismatch; enum ethtool_link_ext_substate_bad_signal_integrity bad_signal_integrity; enum ethtool_link_ext_substate_cable_issue cable_issue; - u8 __link_ext_substate; + u32 __link_ext_substate; }; }; -- cgit v1.2.3 From aa5040691cb761a04c206a893e5771a904d22ab7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 26 Oct 2021 14:41:33 -0700 Subject: bpf: Use u64_stats_t in struct bpf_prog_stats [ Upstream commit 61a0abaee2092eee69e44fe60336aa2f5b578938 ] Commit 316580b69d0a ("u64_stats: provide u64_stats_t type") fixed possible load/store tearing on 64bit arches. For instance the following C code stats->nsecs += sched_clock() - start; Could be rightfully implemented like this by a compiler, confusing concurrent readers a lot: stats->nsecs += sched_clock(); // arbitrary delay stats->nsecs -= start; Signed-off-by: Eric Dumazet Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211026214133.3114279-4-eric.dumazet@gmail.com Signed-off-by: Sasha Levin --- include/linux/filter.h | 10 +++++----- kernel/bpf/syscall.c | 18 ++++++++++++------ kernel/bpf/trampoline.c | 6 +++--- 3 files changed, 20 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 1611dc9d4420..a9956b681f09 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -554,9 +554,9 @@ struct bpf_binary_header { }; struct bpf_prog_stats { - u64 cnt; - u64 nsecs; - u64 misses; + u64_stats_t cnt; + u64_stats_t nsecs; + u64_stats_t misses; struct u64_stats_sync syncp; } __aligned(2 * sizeof(u64)); @@ -618,8 +618,8 @@ static __always_inline u32 __bpf_prog_run(const struct bpf_prog *prog, ret = dfunc(ctx, prog->insnsi, prog->bpf_func); stats = this_cpu_ptr(prog->stats); flags = u64_stats_update_begin_irqsave(&stats->syncp); - stats->cnt++; - stats->nsecs += sched_clock() - start; + u64_stats_inc(&stats->cnt); + u64_stats_add(&stats->nsecs, sched_clock() - start); u64_stats_update_end_irqrestore(&stats->syncp, flags); } else { ret = dfunc(ctx, prog->insnsi, prog->bpf_func); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 53384622e8da..42490c39dfbf 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1824,8 +1824,14 @@ static int bpf_prog_release(struct inode *inode, struct file *filp) return 0; } +struct bpf_prog_kstats { + u64 nsecs; + u64 cnt; + u64 misses; +}; + static void bpf_prog_get_stats(const struct bpf_prog *prog, - struct bpf_prog_stats *stats) + struct bpf_prog_kstats *stats) { u64 nsecs = 0, cnt = 0, misses = 0; int cpu; @@ -1838,9 +1844,9 @@ static void bpf_prog_get_stats(const struct bpf_prog *prog, st = per_cpu_ptr(prog->stats, cpu); do { start = u64_stats_fetch_begin_irq(&st->syncp); - tnsecs = st->nsecs; - tcnt = st->cnt; - tmisses = st->misses; + tnsecs = u64_stats_read(&st->nsecs); + tcnt = u64_stats_read(&st->cnt); + tmisses = u64_stats_read(&st->misses); } while (u64_stats_fetch_retry_irq(&st->syncp, start)); nsecs += tnsecs; cnt += tcnt; @@ -1856,7 +1862,7 @@ static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) { const struct bpf_prog *prog = filp->private_data; char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; - struct bpf_prog_stats stats; + struct bpf_prog_kstats stats; bpf_prog_get_stats(prog, &stats); bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); @@ -3595,7 +3601,7 @@ static int bpf_prog_get_info_by_fd(struct file *file, struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info); struct bpf_prog_info info; u32 info_len = attr->info.info_len; - struct bpf_prog_stats stats; + struct bpf_prog_kstats stats; char __user *uinsns; u32 ulen; int err; diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index d3a307a8c42b..6933a9bfee63 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -544,7 +544,7 @@ static void notrace inc_misses_counter(struct bpf_prog *prog) stats = this_cpu_ptr(prog->stats); u64_stats_update_begin(&stats->syncp); - stats->misses++; + u64_stats_inc(&stats->misses); u64_stats_update_end(&stats->syncp); } @@ -589,8 +589,8 @@ static void notrace update_prog_stats(struct bpf_prog *prog, stats = this_cpu_ptr(prog->stats); flags = u64_stats_update_begin_irqsave(&stats->syncp); - stats->cnt++; - stats->nsecs += sched_clock() - start; + u64_stats_inc(&stats->cnt); + u64_stats_add(&stats->nsecs, sched_clock() - start); u64_stats_update_end_irqrestore(&stats->syncp, flags); } } -- cgit v1.2.3 From 3a3aa0881aebf1b81d0a37bb07d2b1aa307e4895 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 6 Oct 2021 18:06:54 -0700 Subject: of: net: move of_net under net/ [ Upstream commit e330fb14590c5c80f7195c3d8c9b4bcf79e1a5cd ] Rob suggests to move of_net.c from under drivers/of/ somewhere to the networking code. Suggested-by: Rob Herring Signed-off-by: Jakub Kicinski Reviewed-by: Rob Herring Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/amd/Kconfig | 2 +- drivers/net/ethernet/arc/Kconfig | 4 +- drivers/net/ethernet/ezchip/Kconfig | 2 +- drivers/net/ethernet/litex/Kconfig | 2 +- drivers/net/ethernet/mscc/Kconfig | 2 +- drivers/of/Kconfig | 4 - drivers/of/Makefile | 1 - drivers/of/of_net.c | 145 ------------------------------------ include/linux/of_net.h | 2 +- net/core/Makefile | 1 + net/core/net-sysfs.c | 2 +- net/core/of_net.c | 145 ++++++++++++++++++++++++++++++++++++ 12 files changed, 154 insertions(+), 158 deletions(-) delete mode 100644 drivers/of/of_net.c create mode 100644 net/core/of_net.c (limited to 'include/linux') diff --git a/drivers/net/ethernet/amd/Kconfig b/drivers/net/ethernet/amd/Kconfig index 4786f0504691..899c8a2a34b6 100644 --- a/drivers/net/ethernet/amd/Kconfig +++ b/drivers/net/ethernet/amd/Kconfig @@ -168,7 +168,7 @@ config SUNLANCE config AMD_XGBE tristate "AMD 10GbE Ethernet driver" - depends on ((OF_NET && OF_ADDRESS) || ACPI || PCI) && HAS_IOMEM + depends on (OF_ADDRESS || ACPI || PCI) && HAS_IOMEM depends on X86 || ARM64 || COMPILE_TEST depends on PTP_1588_CLOCK_OPTIONAL select BITREVERSE diff --git a/drivers/net/ethernet/arc/Kconfig b/drivers/net/ethernet/arc/Kconfig index 92a79c4ffa2c..0a67612af228 100644 --- a/drivers/net/ethernet/arc/Kconfig +++ b/drivers/net/ethernet/arc/Kconfig @@ -26,7 +26,7 @@ config ARC_EMAC_CORE config ARC_EMAC tristate "ARC EMAC support" select ARC_EMAC_CORE - depends on OF_IRQ && OF_NET + depends on OF_IRQ depends on ARC || COMPILE_TEST help On some legacy ARC (Synopsys) FPGA boards such as ARCAngel4/ML50x @@ -36,7 +36,7 @@ config ARC_EMAC config EMAC_ROCKCHIP tristate "Rockchip EMAC support" select ARC_EMAC_CORE - depends on OF_IRQ && OF_NET && REGULATOR + depends on OF_IRQ && REGULATOR depends on ARCH_ROCKCHIP || COMPILE_TEST help Support for Rockchip RK3036/RK3066/RK3188 EMAC ethernet controllers. diff --git a/drivers/net/ethernet/ezchip/Kconfig b/drivers/net/ethernet/ezchip/Kconfig index 38aa824efb25..9241b9b1c7a3 100644 --- a/drivers/net/ethernet/ezchip/Kconfig +++ b/drivers/net/ethernet/ezchip/Kconfig @@ -18,7 +18,7 @@ if NET_VENDOR_EZCHIP config EZCHIP_NPS_MANAGEMENT_ENET tristate "EZchip NPS management enet support" - depends on OF_IRQ && OF_NET + depends on OF_IRQ depends on HAS_IOMEM help Simple LAN device for debug or management purposes. diff --git a/drivers/net/ethernet/litex/Kconfig b/drivers/net/ethernet/litex/Kconfig index 63bf01d28f0c..f99adbf26ab4 100644 --- a/drivers/net/ethernet/litex/Kconfig +++ b/drivers/net/ethernet/litex/Kconfig @@ -17,7 +17,7 @@ if NET_VENDOR_LITEX config LITEX_LITEETH tristate "LiteX Ethernet support" - depends on OF_NET + depends on OF help If you wish to compile a kernel for hardware with a LiteX LiteEth device then you should answer Y to this. diff --git a/drivers/net/ethernet/mscc/Kconfig b/drivers/net/ethernet/mscc/Kconfig index b6a73d151dec..8dd8c7f425d2 100644 --- a/drivers/net/ethernet/mscc/Kconfig +++ b/drivers/net/ethernet/mscc/Kconfig @@ -28,7 +28,7 @@ config MSCC_OCELOT_SWITCH depends on BRIDGE || BRIDGE=n depends on NET_SWITCHDEV depends on HAS_IOMEM - depends on OF_NET + depends on OF select MSCC_OCELOT_SWITCH_LIB select GENERIC_PHY help diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig index 3dfeae8912df..80b5fd44ab1c 100644 --- a/drivers/of/Kconfig +++ b/drivers/of/Kconfig @@ -70,10 +70,6 @@ config OF_IRQ def_bool y depends on !SPARC && IRQ_DOMAIN -config OF_NET - depends on NETDEVICES - def_bool y - config OF_RESERVED_MEM def_bool OF_EARLY_FLATTREE diff --git a/drivers/of/Makefile b/drivers/of/Makefile index c13b982084a3..e0360a44306e 100644 --- a/drivers/of/Makefile +++ b/drivers/of/Makefile @@ -7,7 +7,6 @@ obj-$(CONFIG_OF_EARLY_FLATTREE) += fdt_address.o obj-$(CONFIG_OF_PROMTREE) += pdt.o obj-$(CONFIG_OF_ADDRESS) += address.o obj-$(CONFIG_OF_IRQ) += irq.o -obj-$(CONFIG_OF_NET) += of_net.o obj-$(CONFIG_OF_UNITTEST) += unittest.o obj-$(CONFIG_OF_RESERVED_MEM) += of_reserved_mem.o obj-$(CONFIG_OF_RESOLVE) += resolver.o diff --git a/drivers/of/of_net.c b/drivers/of/of_net.c deleted file mode 100644 index dbac3a172a11..000000000000 --- a/drivers/of/of_net.c +++ /dev/null @@ -1,145 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * OF helpers for network devices. - * - * Initially copied out of arch/powerpc/kernel/prom_parse.c - */ -#include -#include -#include -#include -#include -#include -#include -#include - -/** - * of_get_phy_mode - Get phy mode for given device_node - * @np: Pointer to the given device_node - * @interface: Pointer to the result - * - * The function gets phy interface string from property 'phy-mode' or - * 'phy-connection-type'. The index in phy_modes table is set in - * interface and 0 returned. In case of error interface is set to - * PHY_INTERFACE_MODE_NA and an errno is returned, e.g. -ENODEV. - */ -int of_get_phy_mode(struct device_node *np, phy_interface_t *interface) -{ - const char *pm; - int err, i; - - *interface = PHY_INTERFACE_MODE_NA; - - err = of_property_read_string(np, "phy-mode", &pm); - if (err < 0) - err = of_property_read_string(np, "phy-connection-type", &pm); - if (err < 0) - return err; - - for (i = 0; i < PHY_INTERFACE_MODE_MAX; i++) - if (!strcasecmp(pm, phy_modes(i))) { - *interface = i; - return 0; - } - - return -ENODEV; -} -EXPORT_SYMBOL_GPL(of_get_phy_mode); - -static int of_get_mac_addr(struct device_node *np, const char *name, u8 *addr) -{ - struct property *pp = of_find_property(np, name, NULL); - - if (pp && pp->length == ETH_ALEN && is_valid_ether_addr(pp->value)) { - memcpy(addr, pp->value, ETH_ALEN); - return 0; - } - return -ENODEV; -} - -static int of_get_mac_addr_nvmem(struct device_node *np, u8 *addr) -{ - struct platform_device *pdev = of_find_device_by_node(np); - struct nvmem_cell *cell; - const void *mac; - size_t len; - int ret; - - /* Try lookup by device first, there might be a nvmem_cell_lookup - * associated with a given device. - */ - if (pdev) { - ret = nvmem_get_mac_address(&pdev->dev, addr); - put_device(&pdev->dev); - return ret; - } - - cell = of_nvmem_cell_get(np, "mac-address"); - if (IS_ERR(cell)) - return PTR_ERR(cell); - - mac = nvmem_cell_read(cell, &len); - nvmem_cell_put(cell); - - if (IS_ERR(mac)) - return PTR_ERR(mac); - - if (len != ETH_ALEN || !is_valid_ether_addr(mac)) { - kfree(mac); - return -EINVAL; - } - - memcpy(addr, mac, ETH_ALEN); - kfree(mac); - - return 0; -} - -/** - * of_get_mac_address() - * @np: Caller's Device Node - * @addr: Pointer to a six-byte array for the result - * - * Search the device tree for the best MAC address to use. 'mac-address' is - * checked first, because that is supposed to contain to "most recent" MAC - * address. If that isn't set, then 'local-mac-address' is checked next, - * because that is the default address. If that isn't set, then the obsolete - * 'address' is checked, just in case we're using an old device tree. If any - * of the above isn't set, then try to get MAC address from nvmem cell named - * 'mac-address'. - * - * Note that the 'address' property is supposed to contain a virtual address of - * the register set, but some DTS files have redefined that property to be the - * MAC address. - * - * All-zero MAC addresses are rejected, because those could be properties that - * exist in the device tree, but were not set by U-Boot. For example, the - * DTS could define 'mac-address' and 'local-mac-address', with zero MAC - * addresses. Some older U-Boots only initialized 'local-mac-address'. In - * this case, the real MAC is in 'local-mac-address', and 'mac-address' exists - * but is all zeros. - * - * Return: 0 on success and errno in case of error. -*/ -int of_get_mac_address(struct device_node *np, u8 *addr) -{ - int ret; - - if (!np) - return -ENODEV; - - ret = of_get_mac_addr(np, "mac-address", addr); - if (!ret) - return 0; - - ret = of_get_mac_addr(np, "local-mac-address", addr); - if (!ret) - return 0; - - ret = of_get_mac_addr(np, "address", addr); - if (!ret) - return 0; - - return of_get_mac_addr_nvmem(np, addr); -} -EXPORT_SYMBOL(of_get_mac_address); diff --git a/include/linux/of_net.h b/include/linux/of_net.h index daef3b0d9270..cf31188329b5 100644 --- a/include/linux/of_net.h +++ b/include/linux/of_net.h @@ -8,7 +8,7 @@ #include -#ifdef CONFIG_OF_NET +#ifdef CONFIG_OF #include struct net_device; diff --git a/net/core/Makefile b/net/core/Makefile index 35ced6201814..4268846f2f47 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -36,3 +36,4 @@ obj-$(CONFIG_FAILOVER) += failover.o obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o obj-$(CONFIG_BPF_SYSCALL) += sock_map.o obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o +obj-$(CONFIG_OF) += of_net.o diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index a4ae65263384..d7f9ee830d34 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1927,7 +1927,7 @@ static struct class net_class __ro_after_init = { .get_ownership = net_get_ownership, }; -#ifdef CONFIG_OF_NET +#ifdef CONFIG_OF static int of_dev_node_match(struct device *dev, const void *data) { for (; dev; dev = dev->parent) { diff --git a/net/core/of_net.c b/net/core/of_net.c new file mode 100644 index 000000000000..dbac3a172a11 --- /dev/null +++ b/net/core/of_net.c @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * OF helpers for network devices. + * + * Initially copied out of arch/powerpc/kernel/prom_parse.c + */ +#include +#include +#include +#include +#include +#include +#include +#include + +/** + * of_get_phy_mode - Get phy mode for given device_node + * @np: Pointer to the given device_node + * @interface: Pointer to the result + * + * The function gets phy interface string from property 'phy-mode' or + * 'phy-connection-type'. The index in phy_modes table is set in + * interface and 0 returned. In case of error interface is set to + * PHY_INTERFACE_MODE_NA and an errno is returned, e.g. -ENODEV. + */ +int of_get_phy_mode(struct device_node *np, phy_interface_t *interface) +{ + const char *pm; + int err, i; + + *interface = PHY_INTERFACE_MODE_NA; + + err = of_property_read_string(np, "phy-mode", &pm); + if (err < 0) + err = of_property_read_string(np, "phy-connection-type", &pm); + if (err < 0) + return err; + + for (i = 0; i < PHY_INTERFACE_MODE_MAX; i++) + if (!strcasecmp(pm, phy_modes(i))) { + *interface = i; + return 0; + } + + return -ENODEV; +} +EXPORT_SYMBOL_GPL(of_get_phy_mode); + +static int of_get_mac_addr(struct device_node *np, const char *name, u8 *addr) +{ + struct property *pp = of_find_property(np, name, NULL); + + if (pp && pp->length == ETH_ALEN && is_valid_ether_addr(pp->value)) { + memcpy(addr, pp->value, ETH_ALEN); + return 0; + } + return -ENODEV; +} + +static int of_get_mac_addr_nvmem(struct device_node *np, u8 *addr) +{ + struct platform_device *pdev = of_find_device_by_node(np); + struct nvmem_cell *cell; + const void *mac; + size_t len; + int ret; + + /* Try lookup by device first, there might be a nvmem_cell_lookup + * associated with a given device. + */ + if (pdev) { + ret = nvmem_get_mac_address(&pdev->dev, addr); + put_device(&pdev->dev); + return ret; + } + + cell = of_nvmem_cell_get(np, "mac-address"); + if (IS_ERR(cell)) + return PTR_ERR(cell); + + mac = nvmem_cell_read(cell, &len); + nvmem_cell_put(cell); + + if (IS_ERR(mac)) + return PTR_ERR(mac); + + if (len != ETH_ALEN || !is_valid_ether_addr(mac)) { + kfree(mac); + return -EINVAL; + } + + memcpy(addr, mac, ETH_ALEN); + kfree(mac); + + return 0; +} + +/** + * of_get_mac_address() + * @np: Caller's Device Node + * @addr: Pointer to a six-byte array for the result + * + * Search the device tree for the best MAC address to use. 'mac-address' is + * checked first, because that is supposed to contain to "most recent" MAC + * address. If that isn't set, then 'local-mac-address' is checked next, + * because that is the default address. If that isn't set, then the obsolete + * 'address' is checked, just in case we're using an old device tree. If any + * of the above isn't set, then try to get MAC address from nvmem cell named + * 'mac-address'. + * + * Note that the 'address' property is supposed to contain a virtual address of + * the register set, but some DTS files have redefined that property to be the + * MAC address. + * + * All-zero MAC addresses are rejected, because those could be properties that + * exist in the device tree, but were not set by U-Boot. For example, the + * DTS could define 'mac-address' and 'local-mac-address', with zero MAC + * addresses. Some older U-Boots only initialized 'local-mac-address'. In + * this case, the real MAC is in 'local-mac-address', and 'mac-address' exists + * but is all zeros. + * + * Return: 0 on success and errno in case of error. +*/ +int of_get_mac_address(struct device_node *np, u8 *addr) +{ + int ret; + + if (!np) + return -ENODEV; + + ret = of_get_mac_addr(np, "mac-address", addr); + if (!ret) + return 0; + + ret = of_get_mac_addr(np, "local-mac-address", addr); + if (!ret) + return 0; + + ret = of_get_mac_addr(np, "address", addr); + if (!ret) + return 0; + + return of_get_mac_addr_nvmem(np, addr); +} +EXPORT_SYMBOL(of_get_mac_address); -- cgit v1.2.3 From 9285523b41746110814058f99a940f3edf2b5582 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 14 Oct 2021 11:00:37 +0200 Subject: net: of: fix stub of_net helpers for CONFIG_NET=n [ Upstream commit 8b017fbe0bbb98dd71fb4850f6b9cc0e136a26b8 ] Moving the of_net code from drivers/of/ to net/core means we no longer stub out the helpers when networking is disabled, which leads to a randconfig build failure with at least one ARM platform that calls this from non-networking code: arm-linux-gnueabi-ld: arch/arm/mach-mvebu/kirkwood.o: in function `kirkwood_dt_eth_fixup': kirkwood.c:(.init.text+0x54): undefined reference to `of_get_mac_address' Restore the way this worked before by changing that #ifdef check back to testing for both CONFIG_OF and CONFIG_NET. Fixes: e330fb14590c ("of: net: move of_net under net/") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20211014090055.2058949-1-arnd@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/linux/of_net.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/of_net.h b/include/linux/of_net.h index cf31188329b5..55460ecfa50a 100644 --- a/include/linux/of_net.h +++ b/include/linux/of_net.h @@ -8,7 +8,7 @@ #include -#ifdef CONFIG_OF +#if defined(CONFIG_OF) && defined(CONFIG_NET) #include struct net_device; -- cgit v1.2.3 From 3411613611a5cddf7e80908010dc87cb527dd13b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Feb 2022 10:16:57 +0100 Subject: sched: Fix yet more sched_fork() races commit b1e8206582f9d680cff7d04828708c8b6ab32957 upstream. Where commit 4ef0c5c6b5ba ("kernel/sched: Fix sched_fork() access an invalid sched_task_group") fixed a fork race vs cgroup, it opened up a race vs syscalls by not placing the task on the runqueue before it gets exposed through the pidhash. Commit 13765de8148f ("sched/fair: Fix fault in reweight_entity") is trying to fix a single instance of this, instead fix the whole class of issues, effectively reverting this commit. Fixes: 4ef0c5c6b5ba ("kernel/sched: Fix sched_fork() access an invalid sched_task_group") Reported-by: Linus Torvalds Signed-off-by: Peter Zijlstra (Intel) Tested-by: Tadeusz Struk Tested-by: Zhang Qiao Tested-by: Dietmar Eggemann Link: https://lkml.kernel.org/r/YgoeCbwj5mbCR0qA@hirez.programming.kicks-ass.net Signed-off-by: Greg Kroah-Hartman --- include/linux/sched/task.h | 4 ++-- kernel/fork.c | 13 ++++++++++++- kernel/sched/core.c | 34 +++++++++++++++++++++------------- 3 files changed, 35 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 058d7f371e25..8db25fcc1eba 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -54,8 +54,8 @@ extern asmlinkage void schedule_tail(struct task_struct *prev); extern void init_idle(struct task_struct *idle, int cpu); extern int sched_fork(unsigned long clone_flags, struct task_struct *p); -extern void sched_post_fork(struct task_struct *p, - struct kernel_clone_args *kargs); +extern void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs); +extern void sched_post_fork(struct task_struct *p); extern void sched_dead(struct task_struct *p); void __noreturn do_task_dead(void); diff --git a/kernel/fork.c b/kernel/fork.c index 28aee1a8875b..89475c994ca9 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2296,6 +2296,17 @@ static __latent_entropy struct task_struct *copy_process( if (retval) goto bad_fork_put_pidfd; + /* + * Now that the cgroups are pinned, re-clone the parent cgroup and put + * the new task on the correct runqueue. All this *before* the task + * becomes visible. + * + * This isn't part of ->can_fork() because while the re-cloning is + * cgroup specific, it unconditionally needs to place the task on a + * runqueue. + */ + sched_cgroup_fork(p, args); + /* * From this point on we must avoid any synchronous user-space * communication until we take the tasklist-lock. In particular, we do @@ -2405,7 +2416,7 @@ static __latent_entropy struct task_struct *copy_process( fd_install(pidfd, pidfile); proc_fork_connector(p); - sched_post_fork(p, args); + sched_post_fork(p); cgroup_post_fork(p, args); perf_event_fork(p); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 57e5c7914296..a0747eaa2dba 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1199,9 +1199,8 @@ int tg_nop(struct task_group *tg, void *data) } #endif -static void set_load_weight(struct task_struct *p) +static void set_load_weight(struct task_struct *p, bool update_load) { - bool update_load = !(READ_ONCE(p->__state) & TASK_NEW); int prio = p->static_prio - MAX_RT_PRIO; struct load_weight *load = &p->se.load; @@ -4359,7 +4358,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) p->static_prio = NICE_TO_PRIO(0); p->prio = p->normal_prio = p->static_prio; - set_load_weight(p); + set_load_weight(p, false); /* * We don't need the reset flag anymore after the fork. It has @@ -4377,6 +4376,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) init_entity_runnable_average(&p->se); + #ifdef CONFIG_SCHED_INFO if (likely(sched_info_on())) memset(&p->sched_info, 0, sizeof(p->sched_info)); @@ -4392,18 +4392,23 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) return 0; } -void sched_post_fork(struct task_struct *p, struct kernel_clone_args *kargs) +void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs) { unsigned long flags; -#ifdef CONFIG_CGROUP_SCHED - struct task_group *tg; -#endif + /* + * Because we're not yet on the pid-hash, p->pi_lock isn't strictly + * required yet, but lockdep gets upset if rules are violated. + */ raw_spin_lock_irqsave(&p->pi_lock, flags); #ifdef CONFIG_CGROUP_SCHED - tg = container_of(kargs->cset->subsys[cpu_cgrp_id], - struct task_group, css); - p->sched_task_group = autogroup_task_group(p, tg); + if (1) { + struct task_group *tg; + tg = container_of(kargs->cset->subsys[cpu_cgrp_id], + struct task_group, css); + tg = autogroup_task_group(p, tg); + p->sched_task_group = tg; + } #endif rseq_migrate(p); /* @@ -4414,7 +4419,10 @@ void sched_post_fork(struct task_struct *p, struct kernel_clone_args *kargs) if (p->sched_class->task_fork) p->sched_class->task_fork(p); raw_spin_unlock_irqrestore(&p->pi_lock, flags); +} +void sched_post_fork(struct task_struct *p) +{ uclamp_post_fork(p); } @@ -6903,7 +6911,7 @@ void set_user_nice(struct task_struct *p, long nice) put_prev_task(rq, p); p->static_prio = NICE_TO_PRIO(nice); - set_load_weight(p); + set_load_weight(p, true); old_prio = p->prio; p->prio = effective_prio(p); @@ -7194,7 +7202,7 @@ static void __setscheduler_params(struct task_struct *p, */ p->rt_priority = attr->sched_priority; p->normal_prio = normal_prio(p); - set_load_weight(p); + set_load_weight(p, true); } /* @@ -9432,7 +9440,7 @@ void __init sched_init(void) #endif } - set_load_weight(&init_task); + set_load_weight(&init_task, false); /* * The boot idle thread does lazy MMU switching as well: -- cgit v1.2.3 From 316e4a16524a2d2ce321f57c1abe4df9ef90f950 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 18 Feb 2022 11:49:08 -0800 Subject: x86/speculation: Include unprivileged eBPF status in Spectre v2 mitigation reporting commit 44a3918c8245ab10c6c9719dd12e7a8d291980d8 upstream. With unprivileged eBPF enabled, eIBRS (without retpoline) is vulnerable to Spectre v2 BHB-based attacks. When both are enabled, print a warning message and report it in the 'spectre_v2' sysfs vulnerabilities file. Signed-off-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Reviewed-by: Thomas Gleixner [fllinden@amazon.com: backported to 5.15] Signed-off-by: Frank van der Linden Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 35 +++++++++++++++++++++++++++++------ include/linux/bpf.h | 12 ++++++++++++ kernel/sysctl.c | 7 +++++++ 3 files changed, 48 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 693f92b3af1b..1de7f1fe8939 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -650,6 +651,16 @@ static inline const char *spectre_v2_module_string(void) static inline const char *spectre_v2_module_string(void) { return ""; } #endif +#define SPECTRE_V2_EIBRS_EBPF_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS on, data leaks possible via Spectre v2 BHB attacks!\n" + +#ifdef CONFIG_BPF_SYSCALL +void unpriv_ebpf_notify(int new_state) +{ + if (spectre_v2_enabled == SPECTRE_V2_EIBRS && !new_state) + pr_err(SPECTRE_V2_EIBRS_EBPF_MSG); +} +#endif + static inline bool match_option(const char *arg, int arglen, const char *opt) { int len = strlen(opt); @@ -994,6 +1005,9 @@ static void __init spectre_v2_select_mitigation(void) break; } + if (mode == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled()) + pr_err(SPECTRE_V2_EIBRS_EBPF_MSG); + if (spectre_v2_in_eibrs_mode(mode)) { /* Force it so VMEXIT will restore correctly */ x86_spec_ctrl_base |= SPEC_CTRL_IBRS; @@ -1780,6 +1794,20 @@ static char *ibpb_state(void) return ""; } +static ssize_t spectre_v2_show_state(char *buf) +{ + if (spectre_v2_enabled == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled()) + return sprintf(buf, "Vulnerable: Unprivileged eBPF enabled\n"); + + return sprintf(buf, "%s%s%s%s%s%s\n", + spectre_v2_strings[spectre_v2_enabled], + ibpb_state(), + boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", + stibp_state(), + boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", + spectre_v2_module_string()); +} + static ssize_t srbds_show_state(char *buf) { return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]); @@ -1805,12 +1833,7 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr return sprintf(buf, "%s\n", spectre_v1_strings[spectre_v1_mitigation]); case X86_BUG_SPECTRE_V2: - return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], - ibpb_state(), - boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", - stibp_state(), - boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", - spectre_v2_module_string()); + return spectre_v2_show_state(buf); case X86_BUG_SPEC_STORE_BYPASS: return sprintf(buf, "%s\n", ssb_strings[ssb_mode]); diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d9049f2a78ca..15b690a0cecb 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1666,6 +1666,12 @@ bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog); const struct btf_func_model * bpf_jit_find_kfunc_model(const struct bpf_prog *prog, const struct bpf_insn *insn); + +static inline bool unprivileged_ebpf_enabled(void) +{ + return !sysctl_unprivileged_bpf_disabled; +} + #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@ -1884,6 +1890,12 @@ bpf_jit_find_kfunc_model(const struct bpf_prog *prog, { return NULL; } + +static inline bool unprivileged_ebpf_enabled(void) +{ + return false; +} + #endif /* CONFIG_BPF_SYSCALL */ void __bpf_free_used_btfs(struct bpf_prog_aux *aux, diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 083be6af29d7..0586047f7323 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -228,6 +228,10 @@ static int bpf_stats_handler(struct ctl_table *table, int write, return ret; } +void __weak unpriv_ebpf_notify(int new_state) +{ +} + static int bpf_unpriv_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { @@ -245,6 +249,9 @@ static int bpf_unpriv_handler(struct ctl_table *table, int write, return -EPERM; *(int *)table->data = unpriv_enable; } + + unpriv_ebpf_notify(unpriv_enable); + return ret; } #endif /* CONFIG_BPF_SYSCALL && CONFIG_SYSCTL */ -- cgit v1.2.3 From 50e700a117669e072fb9e47ff3ea49e4a8cacf04 Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 18 Nov 2021 13:59:46 +0000 Subject: arm64: entry: Add vectors that have the bhb mitigation sequences commit ba2689234be92024e5635d30fe744f4853ad97db upstream. Some CPUs affected by Spectre-BHB need a sequence of branches, or a firmware call to be run before any indirect branch. This needs to go in the vectors. No CPU needs both. While this can be patched in, it would run on all CPUs as there is a single set of vectors. If only one part of a big/little combination is affected, the unaffected CPUs have to run the mitigation too. Create extra vectors that include the sequence. Subsequent patches will allow affected CPUs to select this set of vectors. Later patches will modify the loop count to match what the CPU requires. Reviewed-by: Catalin Marinas Signed-off-by: James Morse Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/assembler.h | 24 +++++++++++++++++ arch/arm64/include/asm/vectors.h | 34 ++++++++++++++++++++++++ arch/arm64/kernel/entry.S | 53 +++++++++++++++++++++++++++++++------- arch/arm64/kernel/proton-pack.c | 16 ++++++++++++ include/linux/arm-smccc.h | 5 ++++ 5 files changed, 123 insertions(+), 9 deletions(-) create mode 100644 arch/arm64/include/asm/vectors.h (limited to 'include/linux') diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index bfa58409a4d4..0f0a9be8cfcb 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -830,4 +830,28 @@ alternative_endif #endif /* GNU_PROPERTY_AARCH64_FEATURE_1_DEFAULT */ + .macro __mitigate_spectre_bhb_loop tmp +#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY + mov \tmp, #32 +.Lspectre_bhb_loop\@: + b . + 4 + subs \tmp, \tmp, #1 + b.ne .Lspectre_bhb_loop\@ + sb +#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */ + .endm + + /* Save/restores x0-x3 to the stack */ + .macro __mitigate_spectre_bhb_fw +#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY + stp x0, x1, [sp, #-16]! + stp x2, x3, [sp, #-16]! + mov w0, #ARM_SMCCC_ARCH_WORKAROUND_3 +alternative_cb smccc_patch_fw_mitigation_conduit + nop // Patched to SMC/HVC #0 +alternative_cb_end + ldp x2, x3, [sp], #16 + ldp x0, x1, [sp], #16 +#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */ + .endm #endif /* __ASM_ASSEMBLER_H */ diff --git a/arch/arm64/include/asm/vectors.h b/arch/arm64/include/asm/vectors.h new file mode 100644 index 000000000000..16ca74260375 --- /dev/null +++ b/arch/arm64/include/asm/vectors.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2022 ARM Ltd. + */ +#ifndef __ASM_VECTORS_H +#define __ASM_VECTORS_H + +/* + * Note: the order of this enum corresponds to two arrays in entry.S: + * tramp_vecs and __bp_harden_el1_vectors. By default the canonical + * 'full fat' vectors are used directly. + */ +enum arm64_bp_harden_el1_vectors { +#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY + /* + * Perform the BHB loop mitigation, before branching to the canonical + * vectors. + */ + EL1_VECTOR_BHB_LOOP, + + /* + * Make the SMC call for firmware mitigation, before branching to the + * canonical vectors. + */ + EL1_VECTOR_BHB_FW, +#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */ + + /* + * Remap the kernel before branching to the canonical vectors. + */ + EL1_VECTOR_KPTI, +}; + +#endif /* __ASM_VECTORS_H */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 2c5dbeb5ceb9..ddd4665d861e 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -646,13 +646,26 @@ alternative_else_nop_endif sub \dst, \dst, PAGE_SIZE .endm - .macro tramp_ventry, vector_start, regsize, kpti + +#define BHB_MITIGATION_NONE 0 +#define BHB_MITIGATION_LOOP 1 +#define BHB_MITIGATION_FW 2 + + .macro tramp_ventry, vector_start, regsize, kpti, bhb .align 7 1: .if \regsize == 64 msr tpidrro_el0, x30 // Restored in kernel_ventry .endif + .if \bhb == BHB_MITIGATION_LOOP + /* + * This sequence must appear before the first indirect branch. i.e. the + * ret out of tramp_ventry. It appears here because x30 is free. + */ + __mitigate_spectre_bhb_loop x30 + .endif // \bhb == BHB_MITIGATION_LOOP + .if \kpti == 1 /* * Defend against branch aliasing attacks by pushing a dummy @@ -680,6 +693,15 @@ alternative_else_nop_endif ldr x30, =vectors .endif // \kpti == 1 + .if \bhb == BHB_MITIGATION_FW + /* + * The firmware sequence must appear before the first indirect branch. + * i.e. the ret out of tramp_ventry. But it also needs the stack to be + * mapped to save/restore the registers the SMC clobbers. + */ + __mitigate_spectre_bhb_fw + .endif // \bhb == BHB_MITIGATION_FW + add x30, x30, #(1b - \vector_start + 4) ret .org 1b + 128 // Did we overflow the ventry slot? @@ -687,6 +709,9 @@ alternative_else_nop_endif .macro tramp_exit, regsize = 64 adr x30, tramp_vectors +#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY + add x30, x30, SZ_4K +#endif msr vbar_el1, x30 ldr lr, [sp, #S_LR] tramp_unmap_kernel x29 @@ -698,26 +723,32 @@ alternative_else_nop_endif sb .endm - .macro generate_tramp_vector, kpti + .macro generate_tramp_vector, kpti, bhb .Lvector_start\@: .space 0x400 .rept 4 - tramp_ventry .Lvector_start\@, 64, \kpti + tramp_ventry .Lvector_start\@, 64, \kpti, \bhb .endr .rept 4 - tramp_ventry .Lvector_start\@, 32, \kpti + tramp_ventry .Lvector_start\@, 32, \kpti, \bhb .endr .endm #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 /* * Exception vectors trampoline. + * The order must match __bp_harden_el1_vectors and the + * arm64_bp_harden_el1_vectors enum. */ .pushsection ".entry.tramp.text", "ax" .align 11 SYM_CODE_START_NOALIGN(tramp_vectors) - generate_tramp_vector kpti=1 +#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY + generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_LOOP + generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_FW +#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */ + generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_NONE SYM_CODE_END(tramp_vectors) SYM_CODE_START(tramp_exit_native) @@ -744,7 +775,7 @@ SYM_DATA_END(__entry_tramp_data_start) * Exception vectors for spectre mitigations on entry from EL1 when * kpti is not in use. */ - .macro generate_el1_vector + .macro generate_el1_vector, bhb .Lvector_start\@: kernel_ventry 1, t, 64, sync // Synchronous EL1t kernel_ventry 1, t, 64, irq // IRQ EL1t @@ -757,17 +788,21 @@ SYM_DATA_END(__entry_tramp_data_start) kernel_ventry 1, h, 64, error // Error EL1h .rept 4 - tramp_ventry .Lvector_start\@, 64, kpti=0 + tramp_ventry .Lvector_start\@, 64, 0, \bhb .endr .rept 4 - tramp_ventry .Lvector_start\@, 32, kpti=0 + tramp_ventry .Lvector_start\@, 32, 0, \bhb .endr .endm +/* The order must match tramp_vecs and the arm64_bp_harden_el1_vectors enum. */ .pushsection ".entry.text", "ax" .align 11 SYM_CODE_START(__bp_harden_el1_vectors) - generate_el1_vector +#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY + generate_el1_vector bhb=BHB_MITIGATION_LOOP + generate_el1_vector bhb=BHB_MITIGATION_FW +#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */ SYM_CODE_END(__bp_harden_el1_vectors) .popsection diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index 9394f21d7566..6a5eeb8beea3 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -770,3 +770,19 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) return -ENODEV; } } + +/* Patched to NOP when enabled */ +void noinstr spectre_bhb_patch_loop_mitigation_enable(struct alt_instr *alt, + __le32 *origptr, + __le32 *updptr, int nr_inst) +{ + BUG_ON(nr_inst != 1); +} + +/* Patched to NOP when enabled */ +void noinstr spectre_bhb_patch_fw_mitigation_enabled(struct alt_instr *alt, + __le32 *origptr, + __le32 *updptr, int nr_inst) +{ + BUG_ON(nr_inst != 1); +} diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 63ccb5252190..220c8c60e021 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -92,6 +92,11 @@ ARM_SMCCC_SMC_32, \ 0, 0x7fff) +#define ARM_SMCCC_ARCH_WORKAROUND_3 \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + 0, 0x3fff) + #define ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID \ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ ARM_SMCCC_SMC_32, \ -- cgit v1.2.3 From b3d4a7dcb9cae0b74ac9909a5421f9c20d60d380 Mon Sep 17 00:00:00 2001 From: Mohammad Kabat Date: Thu, 25 Mar 2021 14:38:55 +0200 Subject: net/mlx5: Fix size field in bufferx_reg struct [ Upstream commit ac77998b7ac3044f0509b097da9637184598980d ] According to HW spec the field "size" should be 16 bits in bufferx register. Fixes: e281682bf294 ("net/mlx5_core: HW data structs/types definitions cleanup") Signed-off-by: Mohammad Kabat Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- include/linux/mlx5/mlx5_ifc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 25d775764a5a..fdf4589ab4d4 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -9508,8 +9508,8 @@ struct mlx5_ifc_bufferx_reg_bits { u8 reserved_at_0[0x6]; u8 lossy[0x1]; u8 epsb[0x1]; - u8 reserved_at_8[0xc]; - u8 size[0xc]; + u8 reserved_at_8[0x8]; + u8 size[0x10]; u8 xoff_threshold[0x10]; u8 xon_threshold[0x10]; -- cgit v1.2.3 From 7403f4118ab94be837ab9d770507537a8057bc63 Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Fri, 11 Feb 2022 02:12:52 +0100 Subject: swiotlb: fix info leak with DMA_FROM_DEVICE [ Upstream commit ddbd89deb7d32b1fbb879f48d68fda1a8ac58e8e ] The problem I'm addressing was discovered by the LTP test covering cve-2018-1000204. A short description of what happens follows: 1) The test case issues a command code 00 (TEST UNIT READY) via the SG_IO interface with: dxfer_len == 524288, dxdfer_dir == SG_DXFER_FROM_DEV and a corresponding dxferp. The peculiar thing about this is that TUR is not reading from the device. 2) In sg_start_req() the invocation of blk_rq_map_user() effectively bounces the user-space buffer. As if the device was to transfer into it. Since commit a45b599ad808 ("scsi: sg: allocate with __GFP_ZERO in sg_build_indirect()") we make sure this first bounce buffer is allocated with GFP_ZERO. 3) For the rest of the story we keep ignoring that we have a TUR, so the device won't touch the buffer we prepare as if the we had a DMA_FROM_DEVICE type of situation. My setup uses a virtio-scsi device and the buffer allocated by SG is mapped by the function virtqueue_add_split() which uses DMA_FROM_DEVICE for the "in" sgs (here scatter-gather and not scsi generics). This mapping involves bouncing via the swiotlb (we need swiotlb to do virtio in protected guest like s390 Secure Execution, or AMD SEV). 4) When the SCSI TUR is done, we first copy back the content of the second (that is swiotlb) bounce buffer (which most likely contains some previous IO data), to the first bounce buffer, which contains all zeros. Then we copy back the content of the first bounce buffer to the user-space buffer. 5) The test case detects that the buffer, which it zero-initialized, ain't all zeros and fails. One can argue that this is an swiotlb problem, because without swiotlb we leak all zeros, and the swiotlb should be transparent in a sense that it does not affect the outcome (if all other participants are well behaved). Copying the content of the original buffer into the swiotlb buffer is the only way I can think of to make swiotlb transparent in such scenarios. So let's do just that if in doubt, but allow the driver to tell us that the whole mapped buffer is going to be overwritten, in which case we can preserve the old behavior and avoid the performance impact of the extra bounce. Signed-off-by: Halil Pasic Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- Documentation/core-api/dma-attributes.rst | 8 ++++++++ include/linux/dma-mapping.h | 8 ++++++++ kernel/dma/swiotlb.c | 3 ++- 3 files changed, 18 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/Documentation/core-api/dma-attributes.rst b/Documentation/core-api/dma-attributes.rst index 1887d92e8e92..17706dc91ec9 100644 --- a/Documentation/core-api/dma-attributes.rst +++ b/Documentation/core-api/dma-attributes.rst @@ -130,3 +130,11 @@ accesses to DMA buffers in both privileged "supervisor" and unprivileged subsystem that the buffer is fully accessible at the elevated privilege level (and ideally inaccessible or at least read-only at the lesser-privileged levels). + +DMA_ATTR_OVERWRITE +------------------ + +This is a hint to the DMA-mapping subsystem that the device is expected to +overwrite the entire mapped size, thus the caller does not require any of the +previous buffer contents to be preserved. This allows bounce-buffering +implementations to optimise DMA_FROM_DEVICE transfers. diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index dca2b1355bb1..6150d11a607e 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -61,6 +61,14 @@ */ #define DMA_ATTR_PRIVILEGED (1UL << 9) +/* + * This is a hint to the DMA-mapping subsystem that the device is expected + * to overwrite the entire mapped size, thus the caller does not require any + * of the previous buffer contents to be preserved. This allows + * bounce-buffering implementations to optimise DMA_FROM_DEVICE transfers. + */ +#define DMA_ATTR_OVERWRITE (1UL << 10) + /* * A dma_addr_t can hold any valid DMA or bus address for the platform. It can * be given to a device to use as a DMA source or target. It is specific to a diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 87c40517e822..aca0690550e2 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -579,7 +579,8 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, mem->slots[index + i].orig_addr = slot_addr(orig_addr, i); tlb_addr = slot_addr(mem->start, index) + offset; if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && - (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) + (!(attrs & DMA_ATTR_OVERWRITE) || dir == DMA_TO_DEVICE || + dir == DMA_BIDIRECTIONAL)) swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE); return tlb_addr; } -- cgit v1.2.3 From 2c1f97af38be151527380796d31d3c9adb054bf9 Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Sat, 5 Mar 2022 18:07:14 +0100 Subject: swiotlb: rework "fix info leak with DMA_FROM_DEVICE" commit aa6f8dcbab473f3a3c7454b74caa46d36cdc5d13 upstream. Unfortunately, we ended up merging an old version of the patch "fix info leak with DMA_FROM_DEVICE" instead of merging the latest one. Christoph (the swiotlb maintainer), he asked me to create an incremental fix (after I have pointed this out the mix up, and asked him for guidance). So here we go. The main differences between what we got and what was agreed are: * swiotlb_sync_single_for_device is also required to do an extra bounce * We decided not to introduce DMA_ATTR_OVERWRITE until we have exploiters * The implantation of DMA_ATTR_OVERWRITE is flawed: DMA_ATTR_OVERWRITE must take precedence over DMA_ATTR_SKIP_CPU_SYNC Thus this patch removes DMA_ATTR_OVERWRITE, and makes swiotlb_sync_single_for_device() bounce unconditionally (that is, also when dir == DMA_TO_DEVICE) in order do avoid synchronising back stale data from the swiotlb buffer. Let me note, that if the size used with dma_sync_* API is less than the size used with dma_[un]map_*, under certain circumstances we may still end up with swiotlb not being transparent. In that sense, this is no perfect fix either. To get this bullet proof, we would have to bounce the entire mapping/bounce buffer. For that we would have to figure out the starting address, and the size of the mapping in swiotlb_sync_single_for_device(). While this does seem possible, there seems to be no firm consensus on how things are supposed to work. Signed-off-by: Halil Pasic Fixes: ddbd89deb7d3 ("swiotlb: fix info leak with DMA_FROM_DEVICE") Cc: stable@vger.kernel.org Reviewed-by: Christoph Hellwig Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- Documentation/core-api/dma-attributes.rst | 8 -------- include/linux/dma-mapping.h | 8 -------- kernel/dma/swiotlb.c | 23 +++++++++++++++-------- 3 files changed, 15 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/Documentation/core-api/dma-attributes.rst b/Documentation/core-api/dma-attributes.rst index 17706dc91ec9..1887d92e8e92 100644 --- a/Documentation/core-api/dma-attributes.rst +++ b/Documentation/core-api/dma-attributes.rst @@ -130,11 +130,3 @@ accesses to DMA buffers in both privileged "supervisor" and unprivileged subsystem that the buffer is fully accessible at the elevated privilege level (and ideally inaccessible or at least read-only at the lesser-privileged levels). - -DMA_ATTR_OVERWRITE ------------------- - -This is a hint to the DMA-mapping subsystem that the device is expected to -overwrite the entire mapped size, thus the caller does not require any of the -previous buffer contents to be preserved. This allows bounce-buffering -implementations to optimise DMA_FROM_DEVICE transfers. diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 6150d11a607e..dca2b1355bb1 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -61,14 +61,6 @@ */ #define DMA_ATTR_PRIVILEGED (1UL << 9) -/* - * This is a hint to the DMA-mapping subsystem that the device is expected - * to overwrite the entire mapped size, thus the caller does not require any - * of the previous buffer contents to be preserved. This allows - * bounce-buffering implementations to optimise DMA_FROM_DEVICE transfers. - */ -#define DMA_ATTR_OVERWRITE (1UL << 10) - /* * A dma_addr_t can hold any valid DMA or bus address for the platform. It can * be given to a device to use as a DMA source or target. It is specific to a diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index aca0690550e2..e58dce93c661 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -578,10 +578,14 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, for (i = 0; i < nr_slots(alloc_size + offset); i++) mem->slots[index + i].orig_addr = slot_addr(orig_addr, i); tlb_addr = slot_addr(mem->start, index) + offset; - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && - (!(attrs & DMA_ATTR_OVERWRITE) || dir == DMA_TO_DEVICE || - dir == DMA_BIDIRECTIONAL)) - swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE); + /* + * When dir == DMA_FROM_DEVICE we could omit the copy from the orig + * to the tlb buffer, if we knew for sure the device will + * overwirte the entire current content. But we don't. Thus + * unconditional bounce may prevent leaking swiotlb content (i.e. + * kernel memory) to user-space. + */ + swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE); return tlb_addr; } @@ -648,10 +652,13 @@ void swiotlb_tbl_unmap_single(struct device *dev, phys_addr_t tlb_addr, void swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr, size_t size, enum dma_data_direction dir) { - if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) - swiotlb_bounce(dev, tlb_addr, size, DMA_TO_DEVICE); - else - BUG_ON(dir != DMA_FROM_DEVICE); + /* + * Unconditional bounce is necessary to avoid corruption on + * sync_*_for_cpu or dma_ummap_* when the device didn't overwrite + * the whole lengt of the bounce buffer. + */ + swiotlb_bounce(dev, tlb_addr, size, DMA_TO_DEVICE); + BUG_ON(!valid_dma_direction(dir)); } void swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr, -- cgit v1.2.3 From 22823b1a0dc95810a430b831ef00cf9ac98aedf0 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 14 Jan 2022 14:56:15 -0500 Subject: virtio: unexport virtio_finalize_features commit 838d6d3461db0fdbf33fc5f8a69c27b50b4a46da upstream. virtio_finalize_features is only used internally within virtio. No reason to export it. Signed-off-by: Michael S. Tsirkin Reviewed-by: Cornelia Huck Acked-by: Jason Wang Signed-off-by: Greg Kroah-Hartman --- drivers/virtio/virtio.c | 3 +-- include/linux/virtio.h | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 236081afe9a2..ed3646054346 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -166,7 +166,7 @@ void virtio_add_status(struct virtio_device *dev, unsigned int status) } EXPORT_SYMBOL_GPL(virtio_add_status); -int virtio_finalize_features(struct virtio_device *dev) +static int virtio_finalize_features(struct virtio_device *dev) { int ret = dev->config->finalize_features(dev); unsigned status; @@ -202,7 +202,6 @@ int virtio_finalize_features(struct virtio_device *dev) } return 0; } -EXPORT_SYMBOL_GPL(virtio_finalize_features); static int virtio_dev_probe(struct device *_d) { diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 41edbc01ffa4..1af8d65d4c8f 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -133,7 +133,6 @@ bool is_virtio_device(struct device *dev); void virtio_break_device(struct virtio_device *dev); void virtio_config_changed(struct virtio_device *dev); -int virtio_finalize_features(struct virtio_device *dev); #ifdef CONFIG_PM_SLEEP int virtio_device_freeze(struct virtio_device *dev); int virtio_device_restore(struct virtio_device *dev); -- cgit v1.2.3 From cbb726e6c652149e3bef4e4bc976a3864ec23c2b Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 14 Jan 2022 14:58:41 -0500 Subject: virtio: acknowledge all features before access commit 4fa59ede95195f267101a1b8916992cf3f245cdb upstream. The feature negotiation was designed in a way that makes it possible for devices to know which config fields will be accessed by drivers. This is broken since commit 404123c2db79 ("virtio: allow drivers to validate features") with fallout in at least block and net. We have a partial work-around in commit 2f9a174f918e ("virtio: write back F_VERSION_1 before validate") which at least lets devices find out which format should config space have, but this is a partial fix: guests should not access config space without acknowledging features since otherwise we'll never be able to change the config space format. To fix, split finalize_features from virtio_finalize_features and call finalize_features with all feature bits before validation, and then - if validation changed any bits - once again after. Since virtio_finalize_features no longer writes out features rename it to virtio_features_ok - since that is what it does: checks that features are ok with the device. As a side effect, this also reduces the amount of hypervisor accesses - we now only acknowledge features once unless we are clearing any features when validating (which is uncommon). IRC I think that this was more or less always the intent in the spec but unfortunately the way the spec is worded does not say this explicitly, I plan to address this at the spec level, too. Acked-by: Jason Wang Cc: stable@vger.kernel.org Fixes: 404123c2db79 ("virtio: allow drivers to validate features") Fixes: 2f9a174f918e ("virtio: write back F_VERSION_1 before validate") Cc: "Halil Pasic" Signed-off-by: Michael S. Tsirkin Signed-off-by: Greg Kroah-Hartman --- drivers/virtio/virtio.c | 39 ++++++++++++++++++++++----------------- include/linux/virtio_config.h | 3 ++- 2 files changed, 24 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index ed3646054346..c2b733ef95b0 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -166,14 +166,13 @@ void virtio_add_status(struct virtio_device *dev, unsigned int status) } EXPORT_SYMBOL_GPL(virtio_add_status); -static int virtio_finalize_features(struct virtio_device *dev) +/* Do some validation, then set FEATURES_OK */ +static int virtio_features_ok(struct virtio_device *dev) { - int ret = dev->config->finalize_features(dev); unsigned status; + int ret; might_sleep(); - if (ret) - return ret; ret = arch_has_restricted_virtio_memory_access(); if (ret) { @@ -238,17 +237,6 @@ static int virtio_dev_probe(struct device *_d) driver_features_legacy = driver_features; } - /* - * Some devices detect legacy solely via F_VERSION_1. Write - * F_VERSION_1 to force LE config space accesses before FEATURES_OK for - * these when needed. - */ - if (drv->validate && !virtio_legacy_is_little_endian() - && device_features & BIT_ULL(VIRTIO_F_VERSION_1)) { - dev->features = BIT_ULL(VIRTIO_F_VERSION_1); - dev->config->finalize_features(dev); - } - if (device_features & (1ULL << VIRTIO_F_VERSION_1)) dev->features = driver_features & device_features; else @@ -259,13 +247,26 @@ static int virtio_dev_probe(struct device *_d) if (device_features & (1ULL << i)) __virtio_set_bit(dev, i); + err = dev->config->finalize_features(dev); + if (err) + goto err; + if (drv->validate) { + u64 features = dev->features; + err = drv->validate(dev); if (err) goto err; + + /* Did validation change any features? Then write them again. */ + if (features != dev->features) { + err = dev->config->finalize_features(dev); + if (err) + goto err; + } } - err = virtio_finalize_features(dev); + err = virtio_features_ok(dev); if (err) goto err; @@ -489,7 +490,11 @@ int virtio_device_restore(struct virtio_device *dev) /* We have a driver! */ virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER); - ret = virtio_finalize_features(dev); + ret = dev->config->finalize_features(dev); + if (ret) + goto err; + + ret = virtio_features_ok(dev); if (ret) goto err; diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 8519b3ae5d52..b341dd62aa4d 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -62,8 +62,9 @@ struct virtio_shm_region { * Returns the first 64 feature bits (all we currently need). * @finalize_features: confirm what device features we'll be using. * vdev: the virtio_device - * This gives the final feature bits for the device: it can change + * This sends the driver feature bits to the device: it can change * the dev->feature bits if it wants. + * Note: despite the name this can be called any number of times. * Returns 0 on success or error status * @bus_name: return the bus name associated with the device (optional) * vdev: the virtio_device -- cgit v1.2.3 From 1b09f28f70a5046acd64138075ae3f095238b045 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 11 Mar 2022 13:23:31 +0000 Subject: watch_queue: Fix filter limit check commit c993ee0f9f81caf5767a50d1faeba39a0dc82af2 upstream. In watch_queue_set_filter(), there are a couple of places where we check that the filter type value does not exceed what the type_filter bitmap can hold. One place calculates the number of bits by: if (tf[i].type >= sizeof(wfilter->type_filter) * 8) which is fine, but the second does: if (tf[i].type >= sizeof(wfilter->type_filter) * BITS_PER_LONG) which is not. This can lead to a couple of out-of-bounds writes due to a too-large type: (1) __set_bit() on wfilter->type_filter (2) Writing more elements in wfilter->filters[] than we allocated. Fix this by just using the proper WATCH_TYPE__NR instead, which is the number of types we actually know about. The bug may cause an oops looking something like: BUG: KASAN: slab-out-of-bounds in watch_queue_set_filter+0x659/0x740 Write of size 4 at addr ffff88800d2c66bc by task watch_queue_oob/611 ... Call Trace: dump_stack_lvl+0x45/0x59 print_address_description.constprop.0+0x1f/0x150 ... kasan_report.cold+0x7f/0x11b ... watch_queue_set_filter+0x659/0x740 ... __x64_sys_ioctl+0x127/0x190 do_syscall_64+0x43/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae Allocated by task 611: kasan_save_stack+0x1e/0x40 __kasan_kmalloc+0x81/0xa0 watch_queue_set_filter+0x23a/0x740 __x64_sys_ioctl+0x127/0x190 do_syscall_64+0x43/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae The buggy address belongs to the object at ffff88800d2c66a0 which belongs to the cache kmalloc-32 of size 32 The buggy address is located 28 bytes inside of 32-byte region [ffff88800d2c66a0, ffff88800d2c66c0) Fixes: c73be61cede5 ("pipe: Add general notification queue support") Reported-by: Jann Horn Signed-off-by: David Howells Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/watch_queue.h | 3 ++- kernel/watch_queue.c | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/watch_queue.h b/include/linux/watch_queue.h index c994d1b2cdba..3b9a40ae8bdb 100644 --- a/include/linux/watch_queue.h +++ b/include/linux/watch_queue.h @@ -28,7 +28,8 @@ struct watch_type_filter { struct watch_filter { union { struct rcu_head rcu; - unsigned long type_filter[2]; /* Bitmask of accepted types */ + /* Bitmask of accepted types */ + DECLARE_BITMAP(type_filter, WATCH_TYPE__NR); }; u32 nr_filters; /* Number of filters */ struct watch_type_filter filters[]; diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c index 9c9eb20dd2c5..427b0318e303 100644 --- a/kernel/watch_queue.c +++ b/kernel/watch_queue.c @@ -320,7 +320,7 @@ long watch_queue_set_filter(struct pipe_inode_info *pipe, tf[i].info_mask & WATCH_INFO_LENGTH) goto err_filter; /* Ignore any unknown types */ - if (tf[i].type >= sizeof(wfilter->type_filter) * 8) + if (tf[i].type >= WATCH_TYPE__NR) continue; nr_filter++; } @@ -336,7 +336,7 @@ long watch_queue_set_filter(struct pipe_inode_info *pipe, q = wfilter->filters; for (i = 0; i < filter.nr_filters; i++) { - if (tf[i].type >= sizeof(wfilter->type_filter) * BITS_PER_LONG) + if (tf[i].type >= WATCH_TYPE__NR) continue; q->type = tf[i].type; -- cgit v1.2.3 From 69b80587f650875a473c66df2a1120c0e656362b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 20 Sep 2021 14:33:25 +0200 Subject: block: drop unused includes in commit b81e0c2372e65e5627864ba034433b64b2fc73f5 upstream. Drop various include not actually used in genhd.h itself, and move the remaning includes closer together. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20210920123328.1399408-15-hch@lst.de Signed-off-by: Jens Axboe Reported-by: Sudip Mukherjee a Reported-by: "H. Nikolaus Schaller" Reported-by: Guenter Roeck Cc: "Maciej W. Rozycki" [ resolves MIPS build failure by luck, root cause needs to be fixed in Linus's tree properly, but this is needed for now to fix the build - gregkh ] Signed-off-by: Greg Kroah-Hartman --- arch/um/drivers/ubd_kern.c | 1 + block/genhd.c | 1 + block/holder.c | 1 + block/partitions/core.c | 1 + drivers/block/amiflop.c | 1 + drivers/block/ataflop.c | 1 + drivers/block/floppy.c | 1 + drivers/block/swim.c | 1 + drivers/block/xen-blkfront.c | 1 + drivers/md/md.c | 1 + drivers/s390/block/dasd_genhd.c | 1 + drivers/scsi/sd.c | 1 + drivers/scsi/sg.c | 1 + drivers/scsi/sr.c | 1 + drivers/scsi/st.c | 1 + include/linux/genhd.h | 14 ++------------ include/linux/part_stat.h | 1 + 17 files changed, 18 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index cd9dc0556e91..fefd343412c7 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include diff --git a/block/genhd.c b/block/genhd.c index 2dcedbe4ef04..0276a2846adf 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/block/holder.c b/block/holder.c index 9dc084182337..27cddce1b446 100644 --- a/block/holder.c +++ b/block/holder.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only #include +#include struct bd_holder_disk { struct list_head list; diff --git a/block/partitions/core.c b/block/partitions/core.c index 7bea19dd9458..b9e9af84f518 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -5,6 +5,7 @@ * Copyright (C) 2020 Christoph Hellwig */ #include +#include #include #include #include diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index 8b1714021498..1ed557cb5ed2 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -61,6 +61,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index aab48b292a3b..82faaa458157 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -68,6 +68,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 4a6a74177b3c..0f58594c5a4d 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -184,6 +184,7 @@ static int print_unex = 1; #include #include #include +#include #include #include #include diff --git a/drivers/block/swim.c b/drivers/block/swim.c index 7ccc8d2a41bc..3911d0833e1b 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 1becbbb3be13..390817cf1221 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/md/md.c b/drivers/md/md.c index 2d31a079be33..5ce2648cbe5b 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c index fa966e0db6ca..3a6f3af240fa 100644 --- a/drivers/s390/block/dasd_genhd.c +++ b/drivers/s390/block/dasd_genhd.c @@ -14,6 +14,7 @@ #define KMSG_COMPONENT "dasd" #include +#include #include #include diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 78ead3369779..564a21b5da9d 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 8f05248920e8..3c98f08dc25d 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -31,6 +31,7 @@ static int sg_version_num = 30536; /* 2 digits for each component */ #include #include #include +#include #include #include #include diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 1203374828b9..973d6e079b02 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index ae8636d3780b..9933722acfd9 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -32,6 +32,7 @@ static const char *verstr = "20160209"; #include #include #include +#include #include #include #include diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 0f5315c2b5a3..0b48a0cf4262 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -12,12 +12,10 @@ #include #include -#include -#include -#include #include #include -#include +#include +#include extern const struct device_type disk_type; extern struct device_type part_type; @@ -26,14 +24,6 @@ extern struct class block_class; #define DISK_MAX_PARTS 256 #define DISK_NAME_LEN 32 -#include -#include -#include -#include -#include -#include -#include - #define PARTITION_META_INFO_VOLNAMELTH 64 /* * Enough for the string representation of any kind of UUID plus NULL. diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h index d2558121d48c..6f7949b2fd8d 100644 --- a/include/linux/part_stat.h +++ b/include/linux/part_stat.h @@ -3,6 +3,7 @@ #define _LINUX_PART_STAT_H #include +#include struct disk_stats { u64 nsecs[NR_STAT_GROUPS]; -- cgit v1.2.3