diff options
117 files changed, 925 insertions, 493 deletions
diff --git a/Documentation/driver-api/usb/power-management.rst b/Documentation/driver-api/usb/power-management.rst index 79beb807996b..4a74cf6f2797 100644 --- a/Documentation/driver-api/usb/power-management.rst +++ b/Documentation/driver-api/usb/power-management.rst @@ -370,11 +370,15 @@ autosuspend the interface's device. When the usage counter is = 0 then the interface is considered to be idle, and the kernel may autosuspend the device. -Drivers need not be concerned about balancing changes to the usage -counter; the USB core will undo any remaining "get"s when a driver -is unbound from its interface. As a corollary, drivers must not call -any of the ``usb_autopm_*`` functions after their ``disconnect`` -routine has returned. +Drivers must be careful to balance their overall changes to the usage +counter. Unbalanced "get"s will remain in effect when a driver is +unbound from its interface, preventing the device from going into +runtime suspend should the interface be bound to a driver again. On +the other hand, drivers are allowed to achieve this balance by calling +the ``usb_autopm_*`` functions even after their ``disconnect`` routine +has returned -- say from within a work-queue routine -- provided they +retain an active reference to the interface (via ``usb_get_intf`` and +``usb_put_intf``). Drivers using the async routines are responsible for their own synchronization and mutual exclusion. diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 6af24cdb25cc..3f13d8599337 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -866,14 +866,14 @@ The intent is that compaction has less work to do in the future and to increase the success rate of future high-order allocations such as SLUB allocations, THP and hugetlbfs pages. -To make it sensible with respect to the watermark_scale_factor parameter, -the unit is in fractions of 10,000. The default value of 15,000 means -that up to 150% of the high watermark will be reclaimed in the event of -a pageblock being mixed due to fragmentation. The level of reclaim is -determined by the number of fragmentation events that occurred in the -recent past. If this value is smaller than a pageblock then a pageblocks -worth of pages will be reclaimed (e.g. 2MB on 64-bit x86). A boost factor -of 0 will disable the feature. +To make it sensible with respect to the watermark_scale_factor +parameter, the unit is in fractions of 10,000. The default value of +15,000 on !DISCONTIGMEM configurations means that up to 150% of the high +watermark will be reclaimed in the event of a pageblock being mixed due +to fragmentation. The level of reclaim is determined by the number of +fragmentation events that occurred in the recent past. If this value is +smaller than a pageblock then a pageblocks worth of pages will be reclaimed +(e.g. 2MB on 64-bit x86). A boost factor of 0 will disable the feature. ============================================================= @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 1 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Shy Crocodile # *DOCUMENTATION* @@ -678,6 +678,7 @@ KBUILD_CFLAGS += $(call cc-disable-warning,frame-address,) KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation) KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow) KBUILD_CFLAGS += $(call cc-disable-warning, int-in-bool-context) +KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE KBUILD_CFLAGS += -Os @@ -719,7 +720,6 @@ ifdef CONFIG_CC_IS_CLANG KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,) KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier) KBUILD_CFLAGS += $(call cc-disable-warning, gnu) -KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) # Quiet clang warning: comparison of unsigned expression < 0 is always false KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare) # CLANG uses a _MergedGlobals as optimization, but this breaks modpost, as the diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts index 69bc1c9e8e50..7425bb0f2d1b 100644 --- a/arch/arc/boot/dts/hsdk.dts +++ b/arch/arc/boot/dts/hsdk.dts @@ -18,8 +18,8 @@ model = "snps,hsdk"; compatible = "snps,hsdk"; - #address-cells = <1>; - #size-cells = <1>; + #address-cells = <2>; + #size-cells = <2>; chosen { bootargs = "earlycon=uart8250,mmio32,0xf0005000,115200n8 console=ttyS0,115200n8 debug print-fatal-signals=1"; @@ -105,7 +105,7 @@ #size-cells = <1>; interrupt-parent = <&idu_intc>; - ranges = <0x00000000 0xf0000000 0x10000000>; + ranges = <0x00000000 0x0 0xf0000000 0x10000000>; cgu_rst: reset-controller@8a0 { compatible = "snps,hsdk-reset"; @@ -269,9 +269,10 @@ }; memory@80000000 { - #address-cells = <1>; - #size-cells = <1>; + #address-cells = <2>; + #size-cells = <2>; device_type = "memory"; - reg = <0x80000000 0x40000000>; /* 1 GiB */ + reg = <0x0 0x80000000 0x0 0x40000000>; /* 1 GB lowmem */ + /* 0x1 0x00000000 0x0 0x40000000>; 1 GB highmem */ }; }; diff --git a/arch/arc/lib/memset-archs.S b/arch/arc/lib/memset-archs.S index f230bb7092fd..b3373f5c88e0 100644 --- a/arch/arc/lib/memset-archs.S +++ b/arch/arc/lib/memset-archs.S @@ -30,10 +30,10 @@ #else -.macro PREALLOC_INSTR +.macro PREALLOC_INSTR reg, off .endm -.macro PREFETCHW_INSTR +.macro PREFETCHW_INSTR reg, off .endm #endif diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index 4135abec3fb0..63e6e6504699 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -113,10 +113,24 @@ static void read_decode_cache_bcr_arcv2(int cpu) } READ_BCR(ARC_REG_CLUSTER_BCR, cbcr); - if (cbcr.c) + if (cbcr.c) { ioc_exists = 1; - else + + /* + * As for today we don't support both IOC and ZONE_HIGHMEM enabled + * simultaneously. This happens because as of today IOC aperture covers + * only ZONE_NORMAL (low mem) and any dma transactions outside this + * region won't be HW coherent. + * If we want to use both IOC and ZONE_HIGHMEM we can use + * bounce_buffer to handle dma transactions to HIGHMEM. + * Also it is possible to modify dma_direct cache ops or increase IOC + * aperture size if we are planning to use HIGHMEM without PAE. + */ + if (IS_ENABLED(CONFIG_HIGHMEM) || is_pae40_enabled()) + ioc_enable = 0; + } else { ioc_enable = 0; + } /* HS 2.0 didn't have AUX_VOL */ if (cpuinfo_arc700[cpu].core.family > 0x51) { @@ -1158,19 +1172,6 @@ noinline void __init arc_ioc_setup(void) if (!ioc_enable) return; - /* - * As for today we don't support both IOC and ZONE_HIGHMEM enabled - * simultaneously. This happens because as of today IOC aperture covers - * only ZONE_NORMAL (low mem) and any dma transactions outside this - * region won't be HW coherent. - * If we want to use both IOC and ZONE_HIGHMEM we can use - * bounce_buffer to handle dma transactions to HIGHMEM. - * Also it is possible to modify dma_direct cache ops or increase IOC - * aperture size if we are planning to use HIGHMEM without PAE. - */ - if (IS_ENABLED(CONFIG_HIGHMEM)) - panic("IOC and HIGHMEM can't be used simultaneously"); - /* Flush + invalidate + disable L1 dcache */ __dc_disable(); diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 850b4805e2d1..9aed25a6019b 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -73,7 +73,7 @@ config ARM select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU select HAVE_EXIT_THREAD select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL - select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL + select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG select HAVE_FUNCTION_TRACER if !XIP_KERNEL select HAVE_GCC_PLUGINS select HAVE_HW_BREAKPOINT if PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7) diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index 6d6e0330930b..e388af4594a6 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -47,8 +47,8 @@ config DEBUG_WX choice prompt "Choose kernel unwinder" - default UNWINDER_ARM if AEABI && !FUNCTION_GRAPH_TRACER - default UNWINDER_FRAME_POINTER if !AEABI || FUNCTION_GRAPH_TRACER + default UNWINDER_ARM if AEABI + default UNWINDER_FRAME_POINTER if !AEABI help This determines which method will be used for unwinding kernel stack traces for panics, oopses, bugs, warnings, perf, /proc/<pid>/stack, @@ -65,7 +65,7 @@ config UNWINDER_FRAME_POINTER config UNWINDER_ARM bool "ARM EABI stack unwinder" - depends on AEABI + depends on AEABI && !FUNCTION_GRAPH_TRACER select ARM_UNWIND help This option enables stack unwinding support in the kernel diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 6c7ccb428c07..7135820f76d4 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -1438,7 +1438,21 @@ ENTRY(efi_stub_entry) @ Preserve return value of efi_entry() in r4 mov r4, r0 - bl cache_clean_flush + + @ our cache maintenance code relies on CP15 barrier instructions + @ but since we arrived here with the MMU and caches configured + @ by UEFI, we must check that the CP15BEN bit is set in SCTLR. + @ Note that this bit is RAO/WI on v6 and earlier, so the ISB in + @ the enable path will be executed on v7+ only. + mrc p15, 0, r1, c1, c0, 0 @ read SCTLR + tst r1, #(1 << 5) @ CP15BEN bit set? + bne 0f + orr r1, r1, #(1 << 5) @ CP15 barrier instructions + mcr p15, 0, r1, c1, c0, 0 @ write SCTLR + ARM( .inst 0xf57ff06f @ v7+ isb ) + THUMB( isb ) + +0: bl cache_clean_flush bl cache_off @ Set parameters for booting zImage according to boot protocol diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S index c08d2d890f7b..b38bbd011b35 100644 --- a/arch/arm/kernel/head-nommu.S +++ b/arch/arm/kernel/head-nommu.S @@ -133,9 +133,9 @@ __secondary_data: */ .text __after_proc_init: -#ifdef CONFIG_ARM_MPU M_CLASS(movw r12, #:lower16:BASEADDR_V7M_SCB) M_CLASS(movt r12, #:upper16:BASEADDR_V7M_SCB) +#ifdef CONFIG_ARM_MPU M_CLASS(ldr r3, [r12, 0x50]) AR_CLASS(mrc p15, 0, r3, c0, c1, 4) @ Read ID_MMFR0 and r3, r3, #(MMFR0_PMSA) @ PMSA field diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 07b298120182..65a51331088e 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -103,10 +103,15 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) * to be revisited if support for multiple ftrace entry points * is added in the future, but for now, the pr_err() below * deals with a theoretical issue only. + * + * Note that PLTs are place relative, and plt_entries_equal() + * checks whether they point to the same target. Here, we need + * to check if the actual opcodes are in fact identical, + * regardless of the offset in memory so use memcmp() instead. */ trampoline = get_plt_entry(addr, mod->arch.ftrace_trampoline); - if (!plt_entries_equal(mod->arch.ftrace_trampoline, - &trampoline)) { + if (memcmp(mod->arch.ftrace_trampoline, &trampoline, + sizeof(trampoline))) { if (plt_entry_is_initialized(mod->arch.ftrace_trampoline)) { pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n"); return -EINVAL; diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 6bc135042f5e..7cae155e81a5 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -363,7 +363,7 @@ void __init arm64_memblock_init(void) * Otherwise, this is a no-op */ u64 base = phys_initrd_start & PAGE_MASK; - u64 size = PAGE_ALIGN(phys_initrd_size); + u64 size = PAGE_ALIGN(phys_initrd_start + phys_initrd_size) - base; /* * We can only add back the initrd memory if we don't end up diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig index 5ba131c30f6b..1bcd468ab422 100644 --- a/arch/powerpc/configs/skiroot_defconfig +++ b/arch/powerpc/configs/skiroot_defconfig @@ -266,6 +266,7 @@ CONFIG_UDF_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=m CONFIG_PROC_KCORE=y +CONFIG_HUGETLBFS=y # CONFIG_MISC_FILESYSTEMS is not set # CONFIG_NETWORK_FILESYSTEMS is not set CONFIG_NLS=y diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c index e7a9c4f6bfca..8330f135294f 100644 --- a/arch/powerpc/mm/mmu_context_iommu.c +++ b/arch/powerpc/mm/mmu_context_iommu.c @@ -95,28 +95,15 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua, unsigned long entries, unsigned long dev_hpa, struct mm_iommu_table_group_mem_t **pmem) { - struct mm_iommu_table_group_mem_t *mem; - long i, ret, locked_entries = 0; + struct mm_iommu_table_group_mem_t *mem, *mem2; + long i, ret, locked_entries = 0, pinned = 0; unsigned int pageshift; - - mutex_lock(&mem_list_mutex); - - list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, - next) { - /* Overlap? */ - if ((mem->ua < (ua + (entries << PAGE_SHIFT))) && - (ua < (mem->ua + - (mem->entries << PAGE_SHIFT)))) { - ret = -EINVAL; - goto unlock_exit; - } - - } + unsigned long entry, chunk; if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) { ret = mm_iommu_adjust_locked_vm(mm, entries, true); if (ret) - goto unlock_exit; + return ret; locked_entries = entries; } @@ -148,17 +135,27 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua, } down_read(&mm->mmap_sem); - ret = get_user_pages_longterm(ua, entries, FOLL_WRITE, mem->hpages, NULL); + chunk = (1UL << (PAGE_SHIFT + MAX_ORDER - 1)) / + sizeof(struct vm_area_struct *); + chunk = min(chunk, entries); + for (entry = 0; entry < entries; entry += chunk) { + unsigned long n = min(entries - entry, chunk); + + ret = get_user_pages_longterm(ua + (entry << PAGE_SHIFT), n, + FOLL_WRITE, mem->hpages + entry, NULL); + if (ret == n) { + pinned += n; + continue; + } + if (ret > 0) + pinned += ret; + break; + } up_read(&mm->mmap_sem); - if (ret != entries) { - /* free the reference taken */ - for (i = 0; i < ret; i++) - put_page(mem->hpages[i]); - - vfree(mem->hpas); - kfree(mem); - ret = -EFAULT; - goto unlock_exit; + if (pinned != entries) { + if (!ret) + ret = -EFAULT; + goto free_exit; } pageshift = PAGE_SHIFT; @@ -183,21 +180,43 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua, } good_exit: - ret = 0; atomic64_set(&mem->mapped, 1); mem->used = 1; mem->ua = ua; mem->entries = entries; - *pmem = mem; - list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list); + mutex_lock(&mem_list_mutex); -unlock_exit: - if (locked_entries && ret) - mm_iommu_adjust_locked_vm(mm, locked_entries, false); + list_for_each_entry_rcu(mem2, &mm->context.iommu_group_mem_list, next) { + /* Overlap? */ + if ((mem2->ua < (ua + (entries << PAGE_SHIFT))) && + (ua < (mem2->ua + + (mem2->entries << PAGE_SHIFT)))) { + ret = -EINVAL; + mutex_unlock(&mem_list_mutex); + goto free_exit; + } + } + + list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list); mutex_unlock(&mem_list_mutex); + *pmem = mem; + + return 0; + +free_exit: + /* free the reference taken */ + for (i = 0; i < pinned; i++) + put_page(mem->hpages[i]); + + vfree(mem->hpas); + kfree(mem); + +unlock_exit: + mm_iommu_adjust_locked_vm(mm, locked_entries, false); + return ret; } @@ -266,7 +285,7 @@ static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem) long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem) { long ret = 0; - unsigned long entries, dev_hpa; + unsigned long unlock_entries = 0; mutex_lock(&mem_list_mutex); @@ -287,17 +306,17 @@ long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem) goto unlock_exit; } + if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) + unlock_entries = mem->entries; + /* @mapped became 0 so now mappings are disabled, release the region */ - entries = mem->entries; - dev_hpa = mem->dev_hpa; mm_iommu_release(mem); - if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) - mm_iommu_adjust_locked_vm(mm, entries, false); - unlock_exit: mutex_unlock(&mem_list_mutex); + mm_iommu_adjust_locked_vm(mm, unlock_entries, false); + return ret; } EXPORT_SYMBOL_GPL(mm_iommu_put); diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 842b2c7e156a..50cd09b4e05d 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -324,7 +324,7 @@ config ARCH_ENABLE_SPLIT_PMD_PTLOCK config PPC_RADIX_MMU bool "Radix MMU Support" - depends on PPC_BOOK3S_64 + depends on PPC_BOOK3S_64 && HUGETLB_PAGE select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA default y help diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index c0d6c560df69..5a237e8dbf8d 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -352,7 +352,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, boot_params->hdr.loadflags &= ~KASLR_FLAG; /* Save RSDP address for later use. */ - boot_params->acpi_rsdp_addr = get_rsdp_addr(); + /* boot_params->acpi_rsdp_addr = get_rsdp_addr(); */ sanitize_boot_params(boot_params); diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index 007b3fe9d727..98c7d12b945c 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -29,12 +29,12 @@ extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); extern time_t __vdso_time(time_t *t); #ifdef CONFIG_PARAVIRT_CLOCK -extern u8 pvclock_page +extern u8 pvclock_page[PAGE_SIZE] __attribute__((visibility("hidden"))); #endif #ifdef CONFIG_HYPERV_TSCPAGE -extern u8 hvclock_page +extern u8 hvclock_page[PAGE_SIZE] __attribute__((visibility("hidden"))); #endif diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 94a4b7fc75d0..d41de9af7a39 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -76,15 +76,15 @@ * Scope: Package (physical package) * MSR_PKG_C8_RESIDENCY: Package C8 Residency Counter. * perf code: 0x04 - * Available model: HSW ULT,CNL + * Available model: HSW ULT,KBL,CNL * Scope: Package (physical package) * MSR_PKG_C9_RESIDENCY: Package C9 Residency Counter. * perf code: 0x05 - * Available model: HSW ULT,CNL + * Available model: HSW ULT,KBL,CNL * Scope: Package (physical package) * MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter. * perf code: 0x06 - * Available model: HSW ULT,GLM,CNL + * Available model: HSW ULT,KBL,GLM,CNL * Scope: Package (physical package) * */ @@ -566,8 +566,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates), X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_X, snb_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_MOBILE, snb_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_DESKTOP, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_MOBILE, hswult_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_DESKTOP, hswult_cstates), X86_CSTATES_MODEL(INTEL_FAM6_CANNONLAKE_MOBILE, cnl_cstates), diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 2779ace16d23..50b3e2d963c9 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -46,7 +46,7 @@ void ptdump_walk_user_pgd_level_checkwx(void); */ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __visible; -#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) +#define ZERO_PAGE(vaddr) ((void)(vaddr),virt_to_page(empty_zero_page)) extern spinlock_t pgd_lock; extern struct list_head pgd_list; diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index f905a2371080..8dacdb96899e 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -5,6 +5,7 @@ #include <linux/memblock.h> #include <linux/swapfile.h> #include <linux/swapops.h> +#include <linux/kmemleak.h> #include <asm/set_memory.h> #include <asm/e820/api.h> @@ -766,6 +767,11 @@ void free_init_pages(const char *what, unsigned long begin, unsigned long end) if (debug_pagealloc_enabled()) { pr_info("debug: unmapping init [mem %#010lx-%#010lx]\n", begin, end - 1); + /* + * Inform kmemleak about the hole in the memory since the + * corresponding pages will be unmapped. + */ + kmemleak_free_part((void *)begin, end - begin); set_memory_np(begin, (end - begin) >> PAGE_SHIFT); } else { /* diff --git a/crypto/lrw.c b/crypto/lrw.c index 0430ccd08728..08a0e458bc3e 100644 --- a/crypto/lrw.c +++ b/crypto/lrw.c @@ -212,8 +212,12 @@ static void crypt_done(struct crypto_async_request *areq, int err) { struct skcipher_request *req = areq->data; - if (!err) + if (!err) { + struct rctx *rctx = skcipher_request_ctx(req); + + rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; err = xor_tweak_post(req); + } skcipher_request_complete(req, err); } diff --git a/crypto/xts.c b/crypto/xts.c index 847f54f76789..2f948328cabb 100644 --- a/crypto/xts.c +++ b/crypto/xts.c @@ -137,8 +137,12 @@ static void crypt_done(struct crypto_async_request *areq, int err) { struct skcipher_request *req = areq->data; - if (!err) + if (!err) { + struct rctx *rctx = skcipher_request_ctx(req); + + rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; err = xor_tweak_post(req); + } skcipher_request_complete(req, err); } diff --git a/drivers/acpi/acpica/evgpe.c b/drivers/acpi/acpica/evgpe.c index 5e9d7348c16f..62d3aa74277b 100644 --- a/drivers/acpi/acpica/evgpe.c +++ b/drivers/acpi/acpica/evgpe.c @@ -81,12 +81,8 @@ acpi_status acpi_ev_enable_gpe(struct acpi_gpe_event_info *gpe_event_info) ACPI_FUNCTION_TRACE(ev_enable_gpe); - /* Clear the GPE status */ - status = acpi_hw_clear_gpe(gpe_event_info); - if (ACPI_FAILURE(status)) - return_ACPI_STATUS(status); - /* Enable the requested GPE */ + status = acpi_hw_low_set_gpe(gpe_event_info, ACPI_GPE_ENABLE); return_ACPI_STATUS(status); } diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 399cad7daae7..d58a359a6622 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -774,18 +774,18 @@ struct zram_work { struct zram *zram; unsigned long entry; struct bio *bio; + struct bio_vec bvec; }; #if PAGE_SIZE != 4096 static void zram_sync_read(struct work_struct *work) { - struct bio_vec bvec; struct zram_work *zw = container_of(work, struct zram_work, work); struct zram *zram = zw->zram; unsigned long entry = zw->entry; struct bio *bio = zw->bio; - read_from_bdev_async(zram, &bvec, entry, bio); + read_from_bdev_async(zram, &zw->bvec, entry, bio); } /* @@ -798,6 +798,7 @@ static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, { struct zram_work work; + work.bvec = *bvec; work.zram = zram; work.entry = entry; work.bio = bio; diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c index ec8a291d62ba..54093ffd0aef 100644 --- a/drivers/dma/bcm2835-dma.c +++ b/drivers/dma/bcm2835-dma.c @@ -671,7 +671,7 @@ static struct dma_async_tx_descriptor *bcm2835_dma_prep_slave_sg( d = bcm2835_dma_create_cb_chain(chan, direction, false, info, extra, frames, src, dst, 0, 0, - GFP_KERNEL); + GFP_NOWAIT); if (!d) return NULL; diff --git a/drivers/dma/mediatek/mtk-cqdma.c b/drivers/dma/mediatek/mtk-cqdma.c index 131f3974740d..814853842e29 100644 --- a/drivers/dma/mediatek/mtk-cqdma.c +++ b/drivers/dma/mediatek/mtk-cqdma.c @@ -253,7 +253,7 @@ static void mtk_cqdma_start(struct mtk_cqdma_pchan *pc, #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT mtk_dma_set(pc, MTK_CQDMA_DST2, cvd->dest >> MTK_CQDMA_ADDR2_SHFIT); #else - mtk_dma_set(pc, MTK_CQDMA_SRC2, 0); + mtk_dma_set(pc, MTK_CQDMA_DST2, 0); #endif /* setup the length */ diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c index 2b4f25698169..e2a5398f89b5 100644 --- a/drivers/dma/sh/rcar-dmac.c +++ b/drivers/dma/sh/rcar-dmac.c @@ -1282,6 +1282,9 @@ static unsigned int rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan, enum dma_status status; unsigned int residue = 0; unsigned int dptr = 0; + unsigned int chcrb; + unsigned int tcrb; + unsigned int i; if (!desc) return 0; @@ -1330,14 +1333,31 @@ static unsigned int rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan, } /* + * We need to read two registers. + * Make sure the control register does not skip to next chunk + * while reading the counter. + * Trying it 3 times should be enough: Initial read, retry, retry + * for the paranoid. + */ + for (i = 0; i < 3; i++) { + chcrb = rcar_dmac_chan_read(chan, RCAR_DMACHCRB) & + RCAR_DMACHCRB_DPTR_MASK; + tcrb = rcar_dmac_chan_read(chan, RCAR_DMATCRB); + /* Still the same? */ + if (chcrb == (rcar_dmac_chan_read(chan, RCAR_DMACHCRB) & + RCAR_DMACHCRB_DPTR_MASK)) + break; + } + WARN_ONCE(i >= 3, "residue might be not continuous!"); + + /* * In descriptor mode the descriptor running pointer is not maintained * by the interrupt handler, find the running descriptor from the * descriptor pointer field in the CHCRB register. In non-descriptor * mode just use the running descriptor pointer. */ if (desc->hwdescs.use) { - dptr = (rcar_dmac_chan_read(chan, RCAR_DMACHCRB) & - RCAR_DMACHCRB_DPTR_MASK) >> RCAR_DMACHCRB_DPTR_SHIFT; + dptr = chcrb >> RCAR_DMACHCRB_DPTR_SHIFT; if (dptr == 0) dptr = desc->nchunks; dptr--; @@ -1355,7 +1375,7 @@ static unsigned int rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan, } /* Add the residue for the current chunk. */ - residue += rcar_dmac_chan_read(chan, RCAR_DMATCRB) << desc->xfer_shift; + residue += tcrb << desc->xfer_shift; return residue; } @@ -1368,6 +1388,7 @@ static enum dma_status rcar_dmac_tx_status(struct dma_chan *chan, enum dma_status status; unsigned long flags; unsigned int residue; + bool cyclic; status = dma_cookie_status(chan, cookie, txstate); if (status == DMA_COMPLETE || !txstate) @@ -1375,10 +1396,11 @@ static enum dma_status rcar_dmac_tx_status(struct dma_chan *chan, spin_lock_irqsave(&rchan->lock, flags); residue = rcar_dmac_chan_get_residue(rchan, cookie); + cyclic = rchan->desc.running ? rchan->desc.running->cyclic : false; spin_unlock_irqrestore(&rchan->lock, flags); /* if there's no residue, the cookie is complete */ - if (!residue) + if (!residue && !cyclic) return DMA_COMPLETE; dma_set_residue(txstate, residue); diff --git a/drivers/gpio/gpio-eic-sprd.c b/drivers/gpio/gpio-eic-sprd.c index f0223cee9774..77092268ee95 100644 --- a/drivers/gpio/gpio-eic-sprd.c +++ b/drivers/gpio/gpio-eic-sprd.c @@ -414,6 +414,7 @@ static int sprd_eic_irq_set_type(struct irq_data *data, unsigned int flow_type) irq_set_handler_locked(data, handle_edge_irq); break; case IRQ_TYPE_EDGE_BOTH: + sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTMODE, 0); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTBOTH, 1); irq_set_handler_locked(data, handle_edge_irq); break; diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 0495bf1d480a..bca3e7740ef6 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1379,7 +1379,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, status = gpiochip_add_irqchip(chip, lock_key, request_key); if (status) - goto err_remove_chip; + goto err_free_gpiochip_mask; status = of_gpiochip_add(chip); if (status) @@ -1387,7 +1387,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, status = gpiochip_init_valid_mask(chip); if (status) - goto err_remove_chip; + goto err_remove_of_chip; for (i = 0; i < chip->ngpio; i++) { struct gpio_desc *desc = &gdev->descs[i]; @@ -1415,14 +1415,18 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, if (gpiolib_initialized) { status = gpiochip_setup_dev(gdev); if (status) - goto err_remove_chip; + goto err_remove_acpi_chip; } return 0; -err_remove_chip: +err_remove_acpi_chip: acpi_gpiochip_remove(chip); +err_remove_of_chip: gpiochip_free_hogs(chip); of_gpiochip_remove(chip); +err_remove_chip: + gpiochip_irqchip_remove(chip); +err_free_gpiochip_mask: gpiochip_free_valid_mask(chip); err_remove_irqchip_mask: gpiochip_irqchip_free_valid_mask(chip); diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c index db761329a1e3..ab7968c8f6a2 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c @@ -1046,6 +1046,10 @@ static bool dw_hdmi_support_scdc(struct dw_hdmi *hdmi) if (hdmi->version < 0x200a) return false; + /* Disable if no DDC bus */ + if (!hdmi->ddc) + return false; + /* Disable if SCDC is not supported, or if an HF-VSDB block is absent */ if (!display->hdmi.scdc.supported || !display->hdmi.scdc.scrambling.supported) @@ -1684,13 +1688,13 @@ static void hdmi_av_composer(struct dw_hdmi *hdmi, * Source Devices compliant shall set the * Source Version = 1. */ - drm_scdc_readb(&hdmi->i2c->adap, SCDC_SINK_VERSION, + drm_scdc_readb(hdmi->ddc, SCDC_SINK_VERSION, &bytes); - drm_scdc_writeb(&hdmi->i2c->adap, SCDC_SOURCE_VERSION, + drm_scdc_writeb(hdmi->ddc, SCDC_SOURCE_VERSION, min_t(u8, bytes, SCDC_MIN_SOURCE_VERSION)); /* Enabled Scrambling in the Sink */ - drm_scdc_set_scrambling(&hdmi->i2c->adap, 1); + drm_scdc_set_scrambling(hdmi->ddc, 1); /* * To activate the scrambler feature, you must ensure @@ -1706,7 +1710,7 @@ static void hdmi_av_composer(struct dw_hdmi *hdmi, hdmi_writeb(hdmi, 0, HDMI_FC_SCRAMBLER_CTRL); hdmi_writeb(hdmi, (u8)~HDMI_MC_SWRSTZ_TMDSSWRST_REQ, HDMI_MC_SWRSTZ); - drm_scdc_set_scrambling(&hdmi->i2c->adap, 0); + drm_scdc_set_scrambling(hdmi->ddc, 0); } } @@ -1800,6 +1804,8 @@ static void dw_hdmi_clear_overflow(struct dw_hdmi *hdmi) * iteration for others. * The Amlogic Meson GX SoCs (v2.01a) have been identified as needing * the workaround with a single iteration. + * The Rockchip RK3288 SoC (v2.00a) and RK3328/RK3399 SoCs (v2.11a) have + * been identified as needing the workaround with a single iteration. */ switch (hdmi->version) { @@ -1808,7 +1814,9 @@ static void dw_hdmi_clear_overflow(struct dw_hdmi *hdmi) break; case 0x131a: case 0x132a: + case 0x200a: case 0x201a: + case 0x211a: case 0x212a: count = 1; break; diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index ab4e60dfd6a3..98cea1f4b3bf 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -3862,14 +3862,16 @@ static int intel_ddi_compute_config(struct intel_encoder *encoder, ret = intel_hdmi_compute_config(encoder, pipe_config, conn_state); else ret = intel_dp_compute_config(encoder, pipe_config, conn_state); + if (ret) + return ret; - if (IS_GEN9_LP(dev_priv) && ret) + if (IS_GEN9_LP(dev_priv)) pipe_config->lane_lat_optim_mask = bxt_ddi_phy_calc_lane_lat_optim_mask(pipe_config->lane_count); intel_ddi_compute_min_voltage_level(dev_priv, pipe_config); - return ret; + return 0; } diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 8891f29a8c7f..48da4a969a0a 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1886,6 +1886,9 @@ static int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, int pipe_bpp; int ret; + pipe_config->fec_enable = !intel_dp_is_edp(intel_dp) && + intel_dp_supports_fec(intel_dp, pipe_config); + if (!intel_dp_supports_dsc(intel_dp, pipe_config)) return -EINVAL; @@ -2116,9 +2119,6 @@ intel_dp_compute_config(struct intel_encoder *encoder, if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK) return -EINVAL; - pipe_config->fec_enable = !intel_dp_is_edp(intel_dp) && - intel_dp_supports_fec(intel_dp, pipe_config); - ret = intel_dp_compute_link_config(encoder, pipe_config, conn_state); if (ret < 0) return ret; diff --git a/drivers/gpu/drm/imx/ipuv3-crtc.c b/drivers/gpu/drm/imx/ipuv3-crtc.c index ec3602ebbc1c..54011df8c2e8 100644 --- a/drivers/gpu/drm/imx/ipuv3-crtc.c +++ b/drivers/gpu/drm/imx/ipuv3-crtc.c @@ -71,7 +71,7 @@ static void ipu_crtc_disable_planes(struct ipu_crtc *ipu_crtc, if (disable_partial) ipu_plane_disable(ipu_crtc->plane[1], true); if (disable_full) - ipu_plane_disable(ipu_crtc->plane[0], false); + ipu_plane_disable(ipu_crtc->plane[0], true); } static void ipu_crtc_atomic_disable(struct drm_crtc *crtc, diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 19fc601c9eeb..a1bec2779e76 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -366,10 +366,9 @@ void drm_sched_increase_karma(struct drm_sched_job *bad) EXPORT_SYMBOL(drm_sched_increase_karma); /** - * drm_sched_hw_job_reset - stop the scheduler if it contains the bad job + * drm_sched_stop - stop the scheduler * * @sched: scheduler instance - * @bad: bad scheduler job * */ void drm_sched_stop(struct drm_gpu_scheduler *sched) diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c index 3ebd9f5e2719..29258b404e54 100644 --- a/drivers/gpu/drm/sun4i/sun4i_drv.c +++ b/drivers/gpu/drm/sun4i/sun4i_drv.c @@ -16,6 +16,7 @@ #include <linux/of_reserved_mem.h> #include <drm/drmP.h> +#include <drm/drm_atomic_helper.h> #include <drm/drm_fb_cma_helper.h> #include <drm/drm_fb_helper.h> #include <drm/drm_gem_cma_helper.h> @@ -85,6 +86,8 @@ static int sun4i_drv_bind(struct device *dev) ret = -ENOMEM; goto free_drm; } + + dev_set_drvdata(dev, drm); drm->dev_private = drv; INIT_LIST_HEAD(&drv->frontend_list); INIT_LIST_HEAD(&drv->engine_list); @@ -144,8 +147,12 @@ static void sun4i_drv_unbind(struct device *dev) drm_dev_unregister(drm); drm_kms_helper_poll_fini(drm); + drm_atomic_helper_shutdown(drm); drm_mode_config_cleanup(drm); + + component_unbind_all(dev, NULL); of_reserved_mem_device_release(dev); + drm_dev_put(drm); } @@ -395,6 +402,8 @@ static int sun4i_drv_probe(struct platform_device *pdev) static int sun4i_drv_remove(struct platform_device *pdev) { + component_master_del(&pdev->dev, &sun4i_drv_master_ops); + return 0; } diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 0fa5034b9f9e..1a01669b159a 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -49,9 +49,8 @@ static void ttm_bo_global_kobj_release(struct kobject *kobj); * ttm_global_mutex - protecting the global BO state */ DEFINE_MUTEX(ttm_global_mutex); -struct ttm_bo_global ttm_bo_glob = { - .use_count = 0 -}; +unsigned ttm_bo_glob_use_count; +struct ttm_bo_global ttm_bo_glob; static struct attribute ttm_bo_count = { .name = "bo_count", @@ -1531,12 +1530,13 @@ static void ttm_bo_global_release(void) struct ttm_bo_global *glob = &ttm_bo_glob; mutex_lock(&ttm_global_mutex); - if (--glob->use_count > 0) + if (--ttm_bo_glob_use_count > 0) goto out; kobject_del(&glob->kobj); kobject_put(&glob->kobj); ttm_mem_global_release(&ttm_mem_glob); + memset(glob, 0, sizeof(*glob)); out: mutex_unlock(&ttm_global_mutex); } @@ -1548,7 +1548,7 @@ static int ttm_bo_global_init(void) unsigned i; mutex_lock(&ttm_global_mutex); - if (++glob->use_count > 1) + if (++ttm_bo_glob_use_count > 1) goto out; ret = ttm_mem_global_init(&ttm_mem_glob); diff --git a/drivers/gpu/drm/ttm/ttm_memory.c b/drivers/gpu/drm/ttm/ttm_memory.c index f1567c353b54..9a0909decb36 100644 --- a/drivers/gpu/drm/ttm/ttm_memory.c +++ b/drivers/gpu/drm/ttm/ttm_memory.c @@ -461,8 +461,8 @@ out_no_zone: void ttm_mem_global_release(struct ttm_mem_global *glob) { - unsigned int i; struct ttm_mem_zone *zone; + unsigned int i; /* let the page allocator first stop the shrink work. */ ttm_page_alloc_fini(); @@ -475,9 +475,10 @@ void ttm_mem_global_release(struct ttm_mem_global *glob) zone = glob->zones[i]; kobject_del(&zone->kobj); kobject_put(&zone->kobj); - } + } kobject_del(&glob->kobj); kobject_put(&glob->kobj); + memset(glob, 0, sizeof(*glob)); } static void ttm_check_swapping(struct ttm_mem_global *glob) diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index 730008d3da76..1baa10e94484 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -1042,7 +1042,7 @@ static void vc4_crtc_reset(struct drm_crtc *crtc) { if (crtc->state) - __drm_atomic_helper_crtc_destroy_state(crtc->state); + vc4_crtc_destroy_state(crtc, crtc->state); crtc->state = kzalloc(sizeof(struct vc4_crtc_state), GFP_KERNEL); if (crtc->state) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 6165fe2c4504..1bfa353d995c 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -546,29 +546,13 @@ static void vmw_get_initial_size(struct vmw_private *dev_priv) } /** - * vmw_assume_iommu - Figure out whether coherent dma-remapping might be - * taking place. - * @dev: Pointer to the struct drm_device. - * - * Return: true if iommu present, false otherwise. - */ -static bool vmw_assume_iommu(struct drm_device *dev) -{ - const struct dma_map_ops *ops = get_dma_ops(dev->dev); - - return !dma_is_direct(ops) && ops && - ops->map_page != dma_direct_map_page; -} - -/** * vmw_dma_select_mode - Determine how DMA mappings should be set up for this * system. * * @dev_priv: Pointer to a struct vmw_private * - * This functions tries to determine the IOMMU setup and what actions - * need to be taken by the driver to make system pages visible to the - * device. + * This functions tries to determine what actions need to be taken by the + * driver to make system pages visible to the device. * If this function decides that DMA is not possible, it returns -EINVAL. * The driver may then try to disable features of the device that require * DMA. @@ -578,23 +562,16 @@ static int vmw_dma_select_mode(struct vmw_private *dev_priv) static const char *names[vmw_dma_map_max] = { [vmw_dma_phys] = "Using physical TTM page addresses.", [vmw_dma_alloc_coherent] = "Using coherent TTM pages.", - [vmw_dma_map_populate] = "Keeping DMA mappings.", + [vmw_dma_map_populate] = "Caching DMA mappings.", [vmw_dma_map_bind] = "Giving up DMA mappings early."}; if (vmw_force_coherent) dev_priv->map_mode = vmw_dma_alloc_coherent; - else if (vmw_assume_iommu(dev_priv->dev)) - dev_priv->map_mode = vmw_dma_map_populate; - else if (!vmw_force_iommu) - dev_priv->map_mode = vmw_dma_phys; - else if (IS_ENABLED(CONFIG_SWIOTLB) && swiotlb_nr_tbl()) - dev_priv->map_mode = vmw_dma_alloc_coherent; + else if (vmw_restrict_iommu) + dev_priv->map_mode = vmw_dma_map_bind; else dev_priv->map_mode = vmw_dma_map_populate; - if (dev_priv->map_mode == vmw_dma_map_populate && vmw_restrict_iommu) - dev_priv->map_mode = vmw_dma_map_bind; - /* No TTM coherent page pool? FIXME: Ask TTM instead! */ if (!(IS_ENABLED(CONFIG_SWIOTLB) || IS_ENABLED(CONFIG_INTEL_IOMMU)) && (dev_priv->map_mode == vmw_dma_alloc_coherent)) diff --git a/drivers/gpu/ipu-v3/ipu-dp.c b/drivers/gpu/ipu-v3/ipu-dp.c index 9b2b3fa479c4..5e44ff1f2085 100644 --- a/drivers/gpu/ipu-v3/ipu-dp.c +++ b/drivers/gpu/ipu-v3/ipu-dp.c @@ -195,7 +195,8 @@ int ipu_dp_setup_channel(struct ipu_dp *dp, ipu_dp_csc_init(flow, flow->foreground.in_cs, flow->out_cs, DP_COM_CONF_CSC_DEF_BOTH); } else { - if (flow->foreground.in_cs == flow->out_cs) + if (flow->foreground.in_cs == IPUV3_COLORSPACE_UNKNOWN || + flow->foreground.in_cs == flow->out_cs) /* * foreground identical to output, apply color * conversion on background @@ -261,6 +262,8 @@ void ipu_dp_disable_channel(struct ipu_dp *dp, bool sync) struct ipu_dp_priv *priv = flow->priv; u32 reg, csc; + dp->in_cs = IPUV3_COLORSPACE_UNKNOWN; + if (!dp->foreground) return; @@ -268,8 +271,9 @@ void ipu_dp_disable_channel(struct ipu_dp *dp, bool sync) reg = readl(flow->base + DP_COM_CONF); csc = reg & DP_COM_CONF_CSC_DEF_MASK; - if (csc == DP_COM_CONF_CSC_DEF_FG) - reg &= ~DP_COM_CONF_CSC_DEF_MASK; + reg &= ~DP_COM_CONF_CSC_DEF_MASK; + if (csc == DP_COM_CONF_CSC_DEF_BOTH || csc == DP_COM_CONF_CSC_DEF_BG) + reg |= DP_COM_CONF_CSC_DEF_BG; reg &= ~DP_COM_CONF_FG_EN; writel(reg, flow->base + DP_COM_CONF); @@ -347,6 +351,8 @@ int ipu_dp_init(struct ipu_soc *ipu, struct device *dev, unsigned long base) mutex_init(&priv->mutex); for (i = 0; i < IPUV3_NUM_FLOWS; i++) { + priv->flow[i].background.in_cs = IPUV3_COLORSPACE_UNKNOWN; + priv->flow[i].foreground.in_cs = IPUV3_COLORSPACE_UNKNOWN; priv->flow[i].foreground.foreground = true; priv->flow[i].base = priv->base + ipu_dp_flow_base[i]; priv->flow[i].priv = priv; diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 38af18645133..c480ca385ffb 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -185,7 +185,7 @@ static int i2c_generic_bus_free(struct i2c_adapter *adap) int i2c_generic_scl_recovery(struct i2c_adapter *adap) { struct i2c_bus_recovery_info *bri = adap->bus_recovery_info; - int i = 0, scl = 1, ret; + int i = 0, scl = 1, ret = 0; if (bri->prepare_recovery) bri->prepare_recovery(adap); diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index ea0bc6885517..32cc8fe7902f 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -160,6 +160,7 @@ struct ib_uverbs_file { struct mutex umap_lock; struct list_head umaps; + struct page *disassociate_page; struct idr idr; /* spinlock protects write access to idr */ diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index f2e7ffe6fc54..c489f545baae 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -208,6 +208,9 @@ void ib_uverbs_release_file(struct kref *ref) kref_put(&file->async_file->ref, ib_uverbs_release_async_event_file); put_device(&file->device->dev); + + if (file->disassociate_page) + __free_pages(file->disassociate_page, 0); kfree(file); } @@ -877,9 +880,50 @@ static void rdma_umap_close(struct vm_area_struct *vma) kfree(priv); } +/* + * Once the zap_vma_ptes has been called touches to the VMA will come here and + * we return a dummy writable zero page for all the pfns. + */ +static vm_fault_t rdma_umap_fault(struct vm_fault *vmf) +{ + struct ib_uverbs_file *ufile = vmf->vma->vm_file->private_data; + struct rdma_umap_priv *priv = vmf->vma->vm_private_data; + vm_fault_t ret = 0; + + if (!priv) + return VM_FAULT_SIGBUS; + + /* Read only pages can just use the system zero page. */ + if (!(vmf->vma->vm_flags & (VM_WRITE | VM_MAYWRITE))) { + vmf->page = ZERO_PAGE(vmf->address); + get_page(vmf->page); + return 0; + } + + mutex_lock(&ufile->umap_lock); + if (!ufile->disassociate_page) + ufile->disassociate_page = + alloc_pages(vmf->gfp_mask | __GFP_ZERO, 0); + + if (ufile->disassociate_page) { + /* + * This VMA is forced to always be shared so this doesn't have + * to worry about COW. + */ + vmf->page = ufile->disassociate_page; + get_page(vmf->page); + } else { + ret = VM_FAULT_SIGBUS; + } + mutex_unlock(&ufile->umap_lock); + + return ret; +} + static const struct vm_operations_struct rdma_umap_ops = { .open = rdma_umap_open, .close = rdma_umap_close, + .fault = rdma_umap_fault, }; static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext, @@ -889,6 +933,9 @@ static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext, struct ib_uverbs_file *ufile = ucontext->ufile; struct rdma_umap_priv *priv; + if (!(vma->vm_flags & VM_SHARED)) + return ERR_PTR(-EINVAL); + if (vma->vm_end - vma->vm_start != size) return ERR_PTR(-EINVAL); @@ -992,7 +1039,7 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) * at a time to get the lock ordering right. Typically there * will only be one mm, so no big deal. */ - down_write(&mm->mmap_sem); + down_read(&mm->mmap_sem); if (!mmget_still_valid(mm)) goto skip_mm; mutex_lock(&ufile->umap_lock); @@ -1006,11 +1053,10 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start); - vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE); } mutex_unlock(&ufile->umap_lock); skip_mm: - up_write(&mm->mmap_sem); + up_read(&mm->mmap_sem); mmput(mm); } } diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 66cdf625534f..60cf9f03e941 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -533,7 +533,7 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev, static int hns_roce_qp_has_sq(struct ib_qp_init_attr *attr) { - if (attr->qp_type == IB_QPT_XRC_TGT) + if (attr->qp_type == IB_QPT_XRC_TGT || !attr->cap.max_send_wr) return 0; return 1; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 531ff20b32ad..d3dd290ae1b1 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1119,6 +1119,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (MLX5_CAP_GEN(mdev, qp_packet_based)) resp.flags |= MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE; + + resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_SCAT2CQE_DCT; } if (field_avail(typeof(resp), sw_parsing_caps, @@ -2066,6 +2068,7 @@ static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev, if (vma->vm_flags & VM_WRITE) return -EPERM; + vma->vm_flags &= ~VM_MAYWRITE; if (!dev->mdev->clock_info_page) return -EOPNOTSUPP; @@ -2231,19 +2234,18 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm if (vma->vm_flags & VM_WRITE) return -EPERM; + vma->vm_flags &= ~VM_MAYWRITE; /* Don't expose to user-space information it shouldn't have */ if (PAGE_SIZE > 4096) return -EOPNOTSUPP; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); pfn = (dev->mdev->iseg_base + offsetof(struct mlx5_init_seg, internal_timer_h)) >> PAGE_SHIFT; - if (io_remap_pfn_range(vma, vma->vm_start, pfn, - PAGE_SIZE, vma->vm_page_prot)) - return -EAGAIN; - break; + return rdma_user_mmap_io(&context->ibucontext, vma, pfn, + PAGE_SIZE, + pgprot_noncached(vma->vm_page_prot)); case MLX5_IB_MMAP_CLOCK_INFO: return mlx5_ib_mmap_clock_info_page(dev, vma, context); diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 7cd006da1dae..8870c350fda0 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1818,13 +1818,16 @@ static void configure_responder_scat_cqe(struct ib_qp_init_attr *init_attr, rcqe_sz = mlx5_ib_get_cqe_size(init_attr->recv_cq); - if (rcqe_sz == 128) { - MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE); + if (init_attr->qp_type == MLX5_IB_QPT_DCT) { + if (rcqe_sz == 128) + MLX5_SET(dctc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE); + return; } - if (init_attr->qp_type != MLX5_IB_QPT_DCT) - MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA32_CQE); + MLX5_SET(qpc, qpc, cs_res, + rcqe_sz == 128 ? MLX5_RES_SCAT_DATA64_CQE : + MLX5_RES_SCAT_DATA32_CQE); } static void configure_requester_scat_cqe(struct mlx5_ib_dev *dev, diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 728795043496..0bb6e39dd03a 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -608,11 +608,6 @@ static int rvt_set_page(struct ib_mr *ibmr, u64 addr) if (unlikely(mapped_segs == mr->mr.max_segs)) return -ENOMEM; - if (mr->mr.length == 0) { - mr->mr.user_base = addr; - mr->mr.iova = addr; - } - m = mapped_segs / RVT_SEGSZ; n = mapped_segs % RVT_SEGSZ; mr->mr.map[m]->segs[n].vaddr = (void *)addr; @@ -630,17 +625,24 @@ static int rvt_set_page(struct ib_mr *ibmr, u64 addr) * @sg_nents: number of entries in sg * @sg_offset: offset in bytes into sg * + * Overwrite rvt_mr length with mr length calculated by ib_sg_to_pages. + * * Return: number of sg elements mapped to the memory region */ int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset) { struct rvt_mr *mr = to_imr(ibmr); + int ret; mr->mr.length = 0; mr->mr.page_shift = PAGE_SHIFT; - return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, - rvt_set_page); + ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rvt_set_page); + mr->mr.user_base = ibmr->iova; + mr->mr.iova = ibmr->iova; + mr->mr.offset = ibmr->iova - (u64)mr->mr.map[0]->segs[0].vaddr; + mr->mr.length = (size_t)ibmr->length; + return ret; } /** @@ -671,6 +673,7 @@ int rvt_fast_reg_mr(struct rvt_qp *qp, struct ib_mr *ibmr, u32 key, ibmr->rkey = key; mr->mr.lkey = key; mr->mr.access_flags = access; + mr->mr.iova = ibmr->iova; atomic_set(&mr->mr.lkey_invalid, 0); return 0; diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig index a878351f1643..52d7f55fca32 100644 --- a/drivers/input/keyboard/Kconfig +++ b/drivers/input/keyboard/Kconfig @@ -420,7 +420,7 @@ config KEYBOARD_MPR121 config KEYBOARD_SNVS_PWRKEY tristate "IMX SNVS Power Key Driver" - depends on SOC_IMX6SX || SOC_IMX7D + depends on ARCH_MXC || COMPILE_TEST depends on OF help This is the snvs powerkey driver for the Freescale i.MX application diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c index fc3ab93b7aea..7fb358f96195 100644 --- a/drivers/input/rmi4/rmi_driver.c +++ b/drivers/input/rmi4/rmi_driver.c @@ -860,7 +860,7 @@ static int rmi_create_function(struct rmi_device *rmi_dev, error = rmi_register_function(fn); if (error) - goto err_put_fn; + return error; if (pdt->function_number == 0x01) data->f01_container = fn; @@ -870,10 +870,6 @@ static int rmi_create_function(struct rmi_device *rmi_dev, list_add_tail(&fn->node, &data->function_list); return RMI_SCAN_CONTINUE; - -err_put_fn: - put_device(&fn->dev); - return error; } void rmi_enable_irq(struct rmi_device *rmi_dev, bool clear_wake) diff --git a/drivers/input/rmi4/rmi_f11.c b/drivers/input/rmi4/rmi_f11.c index df64d6aed4f7..93901ebd122a 100644 --- a/drivers/input/rmi4/rmi_f11.c +++ b/drivers/input/rmi4/rmi_f11.c @@ -1230,7 +1230,7 @@ static int rmi_f11_initialize(struct rmi_function *fn) } rc = f11_write_control_regs(fn, &f11->sens_query, - &f11->dev_controls, fn->fd.query_base_addr); + &f11->dev_controls, fn->fd.control_base_addr); if (rc) dev_warn(&fn->dev, "Failed to write control registers\n"); diff --git a/drivers/isdn/gigaset/bas-gigaset.c b/drivers/isdn/gigaset/bas-gigaset.c index ecdeb89645d0..149b1aca52a2 100644 --- a/drivers/isdn/gigaset/bas-gigaset.c +++ b/drivers/isdn/gigaset/bas-gigaset.c @@ -958,6 +958,7 @@ static void write_iso_callback(struct urb *urb) */ static int starturbs(struct bc_state *bcs) { + struct usb_device *udev = bcs->cs->hw.bas->udev; struct bas_bc_state *ubc = bcs->hw.bas; struct urb *urb; int j, k; @@ -975,8 +976,8 @@ static int starturbs(struct bc_state *bcs) rc = -EFAULT; goto error; } - usb_fill_int_urb(urb, bcs->cs->hw.bas->udev, - usb_rcvisocpipe(urb->dev, 3 + 2 * bcs->channel), + usb_fill_int_urb(urb, udev, + usb_rcvisocpipe(udev, 3 + 2 * bcs->channel), ubc->isoinbuf + k * BAS_INBUFSIZE, BAS_INBUFSIZE, read_iso_callback, bcs, BAS_FRAMETIME); @@ -1006,8 +1007,8 @@ static int starturbs(struct bc_state *bcs) rc = -EFAULT; goto error; } - usb_fill_int_urb(urb, bcs->cs->hw.bas->udev, - usb_sndisocpipe(urb->dev, 4 + 2 * bcs->channel), + usb_fill_int_urb(urb, udev, + usb_sndisocpipe(udev, 4 + 2 * bcs->channel), ubc->isooutbuf->data, sizeof(ubc->isooutbuf->data), write_iso_callback, &ubc->isoouturbs[k], diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c index f38e5c1b87e4..d984538980e2 100644 --- a/drivers/mtd/nand/raw/marvell_nand.c +++ b/drivers/mtd/nand/raw/marvell_nand.c @@ -722,12 +722,6 @@ static void marvell_nfc_select_target(struct nand_chip *chip, struct marvell_nfc *nfc = to_marvell_nfc(chip->controller); u32 ndcr_generic; - if (chip == nfc->selected_chip && die_nr == marvell_nand->selected_die) - return; - - writel_relaxed(marvell_nand->ndtr0, nfc->regs + NDTR0); - writel_relaxed(marvell_nand->ndtr1, nfc->regs + NDTR1); - /* * Reset the NDCR register to a clean state for this particular chip, * also clear ND_RUN bit. @@ -739,6 +733,12 @@ static void marvell_nfc_select_target(struct nand_chip *chip, /* Also reset the interrupt status register */ marvell_nfc_clear_int(nfc, NDCR_ALL_INT); + if (chip == nfc->selected_chip && die_nr == marvell_nand->selected_die) + return; + + writel_relaxed(marvell_nand->ndtr0, nfc->regs + NDTR0); + writel_relaxed(marvell_nand->ndtr1, nfc->regs + NDTR1); + nfc->selected_chip = chip; marvell_nand->selected_die = die_nr; } diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index b17d435de09f..4cdd8459c37e 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -731,7 +731,7 @@ static int ena_com_config_llq_info(struct ena_com_dev *ena_dev, if (rc) pr_err("Cannot set LLQ configuration: %d\n", rc); - return 0; + return rc; } static int ena_com_wait_and_process_admin_cq_interrupts(struct ena_comp_ctx *comp_ctx, @@ -2195,7 +2195,7 @@ int ena_com_set_hash_function(struct ena_com_dev *ena_dev) if (unlikely(ret)) return ret; - if (get_resp.u.flow_hash_func.supported_func & (1 << rss->hash_func)) { + if (!(get_resp.u.flow_hash_func.supported_func & BIT(rss->hash_func))) { pr_err("Func hash %d isn't supported by device, abort\n", rss->hash_func); return -EOPNOTSUPP; @@ -2280,6 +2280,7 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev, return -EINVAL; } + rss->hash_func = func; rc = ena_com_set_hash_function(ena_dev); /* Restore the old function */ @@ -2802,7 +2803,11 @@ int ena_com_init_interrupt_moderation(struct ena_com_dev *ena_dev) /* if moderation is supported by device we set adaptive moderation */ delay_resolution = get_resp.u.intr_moderation.intr_delay_resolution; ena_com_update_intr_delay_resolution(ena_dev, delay_resolution); - ena_com_enable_adaptive_moderation(ena_dev); + + /* Disable adaptive moderation by default - can be enabled from + * ethtool + */ + ena_com_disable_adaptive_moderation(ena_dev); return 0; err: diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index f3a5a384e6e8..fe596bc30a96 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -697,8 +697,8 @@ static int ena_set_rxfh(struct net_device *netdev, const u32 *indir, if (indir) { for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) { rc = ena_com_indirect_table_fill_entry(ena_dev, - ENA_IO_RXQ_IDX(indir[i]), - i); + i, + ENA_IO_RXQ_IDX(indir[i])); if (unlikely(rc)) { netif_err(adapter, drv, netdev, "Cannot fill indirect table (index is too large)\n"); diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index a6eacf2099c3..bc5997a5f809 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -224,28 +224,23 @@ static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid) if (!tx_ring->tx_buffer_info) { tx_ring->tx_buffer_info = vzalloc(size); if (!tx_ring->tx_buffer_info) - return -ENOMEM; + goto err_tx_buffer_info; } size = sizeof(u16) * tx_ring->ring_size; tx_ring->free_tx_ids = vzalloc_node(size, node); if (!tx_ring->free_tx_ids) { tx_ring->free_tx_ids = vzalloc(size); - if (!tx_ring->free_tx_ids) { - vfree(tx_ring->tx_buffer_info); - return -ENOMEM; - } + if (!tx_ring->free_tx_ids) + goto err_free_tx_ids; } size = tx_ring->tx_max_header_size; tx_ring->push_buf_intermediate_buf = vzalloc_node(size, node); if (!tx_ring->push_buf_intermediate_buf) { tx_ring->push_buf_intermediate_buf = vzalloc(size); - if (!tx_ring->push_buf_intermediate_buf) { - vfree(tx_ring->tx_buffer_info); - vfree(tx_ring->free_tx_ids); - return -ENOMEM; - } + if (!tx_ring->push_buf_intermediate_buf) + goto err_push_buf_intermediate_buf; } /* Req id ring for TX out of order completions */ @@ -259,6 +254,15 @@ static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid) tx_ring->next_to_clean = 0; tx_ring->cpu = ena_irq->cpu; return 0; + +err_push_buf_intermediate_buf: + vfree(tx_ring->free_tx_ids); + tx_ring->free_tx_ids = NULL; +err_free_tx_ids: + vfree(tx_ring->tx_buffer_info); + tx_ring->tx_buffer_info = NULL; +err_tx_buffer_info: + return -ENOMEM; } /* ena_free_tx_resources - Free I/O Tx Resources per Queue @@ -378,6 +382,7 @@ static int ena_setup_rx_resources(struct ena_adapter *adapter, rx_ring->free_rx_ids = vzalloc(size); if (!rx_ring->free_rx_ids) { vfree(rx_ring->rx_buffer_info); + rx_ring->rx_buffer_info = NULL; return -ENOMEM; } } @@ -1820,6 +1825,7 @@ err_setup_rx: err_setup_tx: ena_free_io_irq(adapter); err_req_irq: + ena_del_napi(adapter); return rc; } @@ -2292,7 +2298,7 @@ static void ena_config_host_info(struct ena_com_dev *ena_dev, host_info->bdf = (pdev->bus->number << 8) | pdev->devfn; host_info->os_type = ENA_ADMIN_OS_LINUX; host_info->kernel_ver = LINUX_VERSION_CODE; - strncpy(host_info->kernel_ver_str, utsname()->version, + strlcpy(host_info->kernel_ver_str, utsname()->version, sizeof(host_info->kernel_ver_str) - 1); host_info->os_dist = 0; strncpy(host_info->os_dist_str, utsname()->release, diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index dfebc30c4841..d3f2408dc9e8 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -1648,7 +1648,7 @@ static struct sk_buff *dpaa_cleanup_tx_fd(const struct dpaa_priv *priv, qm_sg_entry_get_len(&sgt[0]), dma_dir); /* remaining pages were mapped with skb_frag_dma_map() */ - for (i = 1; i < nr_frags; i++) { + for (i = 1; i <= nr_frags; i++) { WARN_ON(qm_sg_entry_is_ext(&sgt[i])); dma_unmap_page(dev, qm_sg_addr(&sgt[i]), diff --git a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c index 0beee2cc2ddd..722b6de24816 100644 --- a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c +++ b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c @@ -252,14 +252,12 @@ uec_set_ringparam(struct net_device *netdev, return -EINVAL; } + if (netif_running(netdev)) + return -EBUSY; + ug_info->bdRingLenRx[queue] = ring->rx_pending; ug_info->bdRingLenTx[queue] = ring->tx_pending; - if (netif_running(netdev)) { - /* FIXME: restart automatically */ - netdev_info(netdev, "Please re-open the interface\n"); - } - return ret; } diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index e0dce373cdd9..3d4a166a49d5 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -875,12 +875,6 @@ static inline int netvsc_send_pkt( } else if (ret == -EAGAIN) { netif_tx_stop_queue(txq); ndev_ctx->eth_stats.stop_queue++; - if (atomic_read(&nvchan->queue_sends) < 1 && - !net_device->tx_disable) { - netif_tx_wake_queue(txq); - ndev_ctx->eth_stats.wake_queue++; - ret = -ENOSPC; - } } else { netdev_err(ndev, "Unable to send packet pages %u len %u, ret %d\n", @@ -888,6 +882,15 @@ static inline int netvsc_send_pkt( ret); } + if (netif_tx_queue_stopped(txq) && + atomic_read(&nvchan->queue_sends) < 1 && + !net_device->tx_disable) { + netif_tx_wake_queue(txq); + ndev_ctx->eth_stats.wake_queue++; + if (ret == -EAGAIN) + ret = -ENOSPC; + } + return ret; } diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 77068c545de0..cd5966b0db57 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -2044,11 +2044,14 @@ bool phy_validate_pause(struct phy_device *phydev, struct ethtool_pauseparam *pp) { if (!linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, - phydev->supported) || - (!linkmode_test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, - phydev->supported) && - pp->rx_pause != pp->tx_pause)) + phydev->supported) && pp->rx_pause) return false; + + if (!linkmode_test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, + phydev->supported) && + pp->rx_pause != pp->tx_pause) + return false; + return true; } EXPORT_SYMBOL(phy_validate_pause); diff --git a/drivers/net/slip/slhc.c b/drivers/net/slip/slhc.c index f4e93f5fc204..ea90db3c7705 100644 --- a/drivers/net/slip/slhc.c +++ b/drivers/net/slip/slhc.c @@ -153,7 +153,7 @@ out_fail: void slhc_free(struct slcompress *comp) { - if ( comp == NULLSLCOMPR ) + if ( IS_ERR_OR_NULL(comp) ) return; if ( comp->tstate != NULLSLSTATE ) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 7c1b362f599a..766f5779db92 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -6262,8 +6262,7 @@ static int __init pci_setup(char *str) } else if (!strncmp(str, "pcie_scan_all", 13)) { pci_add_flags(PCI_SCAN_ALL_PCIE_DEVS); } else if (!strncmp(str, "disable_acs_redir=", 18)) { - disable_acs_redir_param = - kstrdup(str + 18, GFP_KERNEL); + disable_acs_redir_param = str + 18; } else { printk(KERN_ERR "PCI: Unknown option `%s'\n", str); @@ -6274,3 +6273,19 @@ static int __init pci_setup(char *str) return 0; } early_param("pci", pci_setup); + +/* + * 'disable_acs_redir_param' is initialized in pci_setup(), above, to point + * to data in the __initdata section which will be freed after the init + * sequence is complete. We can't allocate memory in pci_setup() because some + * architectures do not have any memory allocation service available during + * an early_param() call. So we allocate memory and copy the variable here + * before the init section is freed. + */ +static int __init pci_realloc_setup_params(void) +{ + disable_acs_redir_param = kstrdup(disable_acs_redir_param, GFP_KERNEL); + + return 0; +} +pure_initcall(pci_realloc_setup_params); diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig index 5cbdbca904ac..362eb8cfa53b 100644 --- a/drivers/pci/pcie/Kconfig +++ b/drivers/pci/pcie/Kconfig @@ -142,3 +142,11 @@ config PCIE_PTM This is only useful if you have devices that support PTM, but it is safe to enable even if you don't. + +config PCIE_BW + bool "PCI Express Bandwidth Change Notification" + depends on PCIEPORTBUS + help + This enables PCI Express Bandwidth Change Notification. If + you know link width or rate changes occur only to correct + unreliable links, you may answer Y. diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile index f1d7bc1e5efa..efb9d2e71e9e 100644 --- a/drivers/pci/pcie/Makefile +++ b/drivers/pci/pcie/Makefile @@ -3,7 +3,6 @@ # Makefile for PCI Express features and port driver pcieportdrv-y := portdrv_core.o portdrv_pci.o err.o -pcieportdrv-y += bw_notification.o obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o @@ -13,3 +12,4 @@ obj-$(CONFIG_PCIEAER_INJECT) += aer_inject.o obj-$(CONFIG_PCIE_PME) += pme.o obj-$(CONFIG_PCIE_DPC) += dpc.o obj-$(CONFIG_PCIE_PTM) += ptm.o +obj-$(CONFIG_PCIE_BW) += bw_notification.o diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index 1d50dc58ac40..944827a8c7d3 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -49,7 +49,11 @@ int pcie_dpc_init(void); static inline int pcie_dpc_init(void) { return 0; } #endif +#ifdef CONFIG_PCIE_BW int pcie_bandwidth_notification_init(void); +#else +static inline int pcie_bandwidth_notification_init(void) { return 0; } +#endif /* Port Type */ #define PCIE_ANY_PORT (~0) diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index 7d04f9d087a6..1b330129089f 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -55,7 +55,8 @@ static int pcie_message_numbers(struct pci_dev *dev, int mask, * 7.8.2, 7.10.10, 7.31.2. */ - if (mask & (PCIE_PORT_SERVICE_PME | PCIE_PORT_SERVICE_HP)) { + if (mask & (PCIE_PORT_SERVICE_PME | PCIE_PORT_SERVICE_HP | + PCIE_PORT_SERVICE_BWNOTIF)) { pcie_capability_read_word(dev, PCI_EXP_FLAGS, ®16); *pme = (reg16 & PCI_EXP_FLAGS_IRQ) >> 9; nvec = *pme + 1; diff --git a/drivers/power/supply/cpcap-battery.c b/drivers/power/supply/cpcap-battery.c index 08d5037fd052..6887870ba32c 100644 --- a/drivers/power/supply/cpcap-battery.c +++ b/drivers/power/supply/cpcap-battery.c @@ -221,6 +221,9 @@ static int cpcap_battery_cc_raw_div(struct cpcap_battery_ddata *ddata, int avg_current; u32 cc_lsb; + if (!divider) + return 0; + sample &= 0xffffff; /* 24-bits, unsigned */ offset &= 0x7ff; /* 10-bits, signed */ diff --git a/drivers/power/supply/power_supply_sysfs.c b/drivers/power/supply/power_supply_sysfs.c index dce24f596160..5358a80d854f 100644 --- a/drivers/power/supply/power_supply_sysfs.c +++ b/drivers/power/supply/power_supply_sysfs.c @@ -383,15 +383,11 @@ int power_supply_uevent(struct device *dev, struct kobj_uevent_env *env) char *prop_buf; char *attrname; - dev_dbg(dev, "uevent\n"); - if (!psy || !psy->desc) { dev_dbg(dev, "No power supply yet\n"); return ret; } - dev_dbg(dev, "POWER_SUPPLY_NAME=%s\n", psy->desc->name); - ret = add_uevent_var(env, "POWER_SUPPLY_NAME=%s", psy->desc->name); if (ret) return ret; @@ -427,8 +423,6 @@ int power_supply_uevent(struct device *dev, struct kobj_uevent_env *env) goto out; } - dev_dbg(dev, "prop %s=%s\n", attrname, prop_buf); - ret = add_uevent_var(env, "POWER_SUPPLY_%s=%s", attrname, prop_buf); kfree(attrname); if (ret) diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c index 8987cec9549d..ebcadaad89d1 100644 --- a/drivers/usb/core/driver.c +++ b/drivers/usb/core/driver.c @@ -473,11 +473,6 @@ static int usb_unbind_interface(struct device *dev) pm_runtime_disable(dev); pm_runtime_set_suspended(dev); - /* Undo any residual pm_autopm_get_interface_* calls */ - for (r = atomic_read(&intf->pm_usage_cnt); r > 0; --r) - usb_autopm_put_interface_no_suspend(intf); - atomic_set(&intf->pm_usage_cnt, 0); - if (!error) usb_autosuspend_device(udev); @@ -1633,7 +1628,6 @@ void usb_autopm_put_interface(struct usb_interface *intf) int status; usb_mark_last_busy(udev); - atomic_dec(&intf->pm_usage_cnt); status = pm_runtime_put_sync(&intf->dev); dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n", __func__, atomic_read(&intf->dev.power.usage_count), @@ -1662,7 +1656,6 @@ void usb_autopm_put_interface_async(struct usb_interface *intf) int status; usb_mark_last_busy(udev); - atomic_dec(&intf->pm_usage_cnt); status = pm_runtime_put(&intf->dev); dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n", __func__, atomic_read(&intf->dev.power.usage_count), @@ -1684,7 +1677,6 @@ void usb_autopm_put_interface_no_suspend(struct usb_interface *intf) struct usb_device *udev = interface_to_usbdev(intf); usb_mark_last_busy(udev); - atomic_dec(&intf->pm_usage_cnt); pm_runtime_put_noidle(&intf->dev); } EXPORT_SYMBOL_GPL(usb_autopm_put_interface_no_suspend); @@ -1715,8 +1707,6 @@ int usb_autopm_get_interface(struct usb_interface *intf) status = pm_runtime_get_sync(&intf->dev); if (status < 0) pm_runtime_put_sync(&intf->dev); - else - atomic_inc(&intf->pm_usage_cnt); dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n", __func__, atomic_read(&intf->dev.power.usage_count), status); @@ -1750,8 +1740,6 @@ int usb_autopm_get_interface_async(struct usb_interface *intf) status = pm_runtime_get(&intf->dev); if (status < 0 && status != -EINPROGRESS) pm_runtime_put_noidle(&intf->dev); - else - atomic_inc(&intf->pm_usage_cnt); dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n", __func__, atomic_read(&intf->dev.power.usage_count), status); @@ -1775,7 +1763,6 @@ void usb_autopm_get_interface_no_resume(struct usb_interface *intf) struct usb_device *udev = interface_to_usbdev(intf); usb_mark_last_busy(udev); - atomic_inc(&intf->pm_usage_cnt); pm_runtime_get_noresume(&intf->dev); } EXPORT_SYMBOL_GPL(usb_autopm_get_interface_no_resume); diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index 82239f27c4cc..e844bb7b5676 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -820,9 +820,11 @@ int usb_string(struct usb_device *dev, int index, char *buf, size_t size) if (dev->state == USB_STATE_SUSPENDED) return -EHOSTUNREACH; - if (size <= 0 || !buf || !index) + if (size <= 0 || !buf) return -EINVAL; buf[0] = 0; + if (index <= 0 || index >= 256) + return -EINVAL; tbuf = kmalloc(256, GFP_NOIO); if (!tbuf) return -ENOMEM; diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c index baf72f95f0f1..213b52508621 100644 --- a/drivers/usb/gadget/udc/dummy_hcd.c +++ b/drivers/usb/gadget/udc/dummy_hcd.c @@ -979,8 +979,18 @@ static int dummy_udc_start(struct usb_gadget *g, struct dummy_hcd *dum_hcd = gadget_to_dummy_hcd(g); struct dummy *dum = dum_hcd->dum; - if (driver->max_speed == USB_SPEED_UNKNOWN) + switch (g->speed) { + /* All the speeds we support */ + case USB_SPEED_LOW: + case USB_SPEED_FULL: + case USB_SPEED_HIGH: + case USB_SPEED_SUPER: + break; + default: + dev_err(dummy_dev(dum_hcd), "Unsupported driver max speed %d\n", + driver->max_speed); return -EINVAL; + } /* * SLAVE side init ... the layer above hardware, which @@ -1784,9 +1794,10 @@ static void dummy_timer(struct timer_list *t) /* Bus speed is 500000 bytes/ms, so use a little less */ total = 490000; break; - default: + default: /* Can't happen */ dev_err(dummy_dev(dum_hcd), "bogus device speed\n"); - return; + total = 0; + break; } /* FIXME if HZ != 1000 this will probably misbehave ... */ @@ -1828,7 +1839,7 @@ restart: /* Used up this frame's bandwidth? */ if (total <= 0) - break; + continue; /* find the gadget's ep for this request (if configured) */ address = usb_pipeendpoint (urb->pipe); diff --git a/drivers/usb/misc/yurex.c b/drivers/usb/misc/yurex.c index 6d9fd5f64903..7b306aa22d25 100644 --- a/drivers/usb/misc/yurex.c +++ b/drivers/usb/misc/yurex.c @@ -314,6 +314,7 @@ static void yurex_disconnect(struct usb_interface *interface) usb_deregister_dev(interface, &yurex_class); /* prevent more I/O from starting */ + usb_poison_urb(dev->urb); mutex_lock(&dev->io_mutex); dev->interface = NULL; mutex_unlock(&dev->io_mutex); diff --git a/drivers/usb/storage/realtek_cr.c b/drivers/usb/storage/realtek_cr.c index 31b024441938..cc794e25a0b6 100644 --- a/drivers/usb/storage/realtek_cr.c +++ b/drivers/usb/storage/realtek_cr.c @@ -763,18 +763,16 @@ static void rts51x_suspend_timer_fn(struct timer_list *t) break; case RTS51X_STAT_IDLE: case RTS51X_STAT_SS: - usb_stor_dbg(us, "RTS51X_STAT_SS, intf->pm_usage_cnt:%d, power.usage:%d\n", - atomic_read(&us->pusb_intf->pm_usage_cnt), + usb_stor_dbg(us, "RTS51X_STAT_SS, power.usage:%d\n", atomic_read(&us->pusb_intf->dev.power.usage_count)); - if (atomic_read(&us->pusb_intf->pm_usage_cnt) > 0) { + if (atomic_read(&us->pusb_intf->dev.power.usage_count) > 0) { usb_stor_dbg(us, "Ready to enter SS state\n"); rts51x_set_stat(chip, RTS51X_STAT_SS); /* ignore mass storage interface's children */ pm_suspend_ignore_children(&us->pusb_intf->dev, true); usb_autopm_put_interface_async(us->pusb_intf); - usb_stor_dbg(us, "RTS51X_STAT_SS 01, intf->pm_usage_cnt:%d, power.usage:%d\n", - atomic_read(&us->pusb_intf->pm_usage_cnt), + usb_stor_dbg(us, "RTS51X_STAT_SS 01, power.usage:%d\n", atomic_read(&us->pusb_intf->dev.power.usage_count)); } break; @@ -807,11 +805,10 @@ static void rts51x_invoke_transport(struct scsi_cmnd *srb, struct us_data *us) int ret; if (working_scsi(srb)) { - usb_stor_dbg(us, "working scsi, intf->pm_usage_cnt:%d, power.usage:%d\n", - atomic_read(&us->pusb_intf->pm_usage_cnt), + usb_stor_dbg(us, "working scsi, power.usage:%d\n", atomic_read(&us->pusb_intf->dev.power.usage_count)); - if (atomic_read(&us->pusb_intf->pm_usage_cnt) <= 0) { + if (atomic_read(&us->pusb_intf->dev.power.usage_count) <= 0) { ret = usb_autopm_get_interface(us->pusb_intf); usb_stor_dbg(us, "working scsi, ret=%d\n", ret); } diff --git a/drivers/usb/usbip/stub_rx.c b/drivers/usb/usbip/stub_rx.c index 97b09a42a10c..dbfb2f24d71e 100644 --- a/drivers/usb/usbip/stub_rx.c +++ b/drivers/usb/usbip/stub_rx.c @@ -361,16 +361,10 @@ static int get_pipe(struct stub_device *sdev, struct usbip_header *pdu) } if (usb_endpoint_xfer_isoc(epd)) { - /* validate packet size and number of packets */ - unsigned int maxp, packets, bytes; - - maxp = usb_endpoint_maxp(epd); - maxp *= usb_endpoint_maxp_mult(epd); - bytes = pdu->u.cmd_submit.transfer_buffer_length; - packets = DIV_ROUND_UP(bytes, maxp); - + /* validate number of packets */ if (pdu->u.cmd_submit.number_of_packets < 0 || - pdu->u.cmd_submit.number_of_packets > packets) { + pdu->u.cmd_submit.number_of_packets > + USBIP_MAX_ISO_PACKETS) { dev_err(&sdev->udev->dev, "CMD_SUBMIT: isoc invalid num packets %d\n", pdu->u.cmd_submit.number_of_packets); diff --git a/drivers/usb/usbip/usbip_common.h b/drivers/usb/usbip/usbip_common.h index bf8afe9b5883..8be857a4fa13 100644 --- a/drivers/usb/usbip/usbip_common.h +++ b/drivers/usb/usbip/usbip_common.h @@ -121,6 +121,13 @@ extern struct device_attribute dev_attr_usbip_debug; #define USBIP_DIR_OUT 0x00 #define USBIP_DIR_IN 0x01 +/* + * Arbitrary limit for the maximum number of isochronous packets in an URB, + * compare for example the uhci_submit_isochronous function in + * drivers/usb/host/uhci-q.c + */ +#define USBIP_MAX_ISO_PACKETS 1024 + /** * struct usbip_header_basic - data pertinent to every request * @command: the usbip request type diff --git a/drivers/w1/masters/ds2490.c b/drivers/w1/masters/ds2490.c index 0f4ecfcdb549..a9fb77585272 100644 --- a/drivers/w1/masters/ds2490.c +++ b/drivers/w1/masters/ds2490.c @@ -1016,15 +1016,15 @@ static int ds_probe(struct usb_interface *intf, /* alternative 3, 1ms interrupt (greatly speeds search), 64 byte bulk */ alt = 3; err = usb_set_interface(dev->udev, - intf->altsetting[alt].desc.bInterfaceNumber, alt); + intf->cur_altsetting->desc.bInterfaceNumber, alt); if (err) { dev_err(&dev->udev->dev, "Failed to set alternative setting %d " "for %d interface: err=%d.\n", alt, - intf->altsetting[alt].desc.bInterfaceNumber, err); + intf->cur_altsetting->desc.bInterfaceNumber, err); goto err_out_clear; } - iface_desc = &intf->altsetting[alt]; + iface_desc = intf->cur_altsetting; if (iface_desc->desc.bNumEndpoints != NUM_EP-1) { pr_info("Num endpoints=%d. It is not DS9490R.\n", iface_desc->desc.bNumEndpoints); diff --git a/fs/block_dev.c b/fs/block_dev.c index 24615c76c1d0..bb28e2ead679 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -264,7 +264,8 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, bio_for_each_segment_all(bvec, &bio, i, iter_all) { if (should_dirty && !PageCompound(bvec->bv_page)) set_page_dirty_lock(bvec->bv_page); - put_page(bvec->bv_page); + if (!bio_flagged(&bio, BIO_NO_PAGE_REF)) + put_page(bvec->bv_page); } if (unlikely(bio.bi_status)) diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 920bf3b4b0ef..cccc75d15970 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -7,6 +7,7 @@ #include <linux/slab.h> #include <linux/pagemap.h> #include <linux/highmem.h> +#include <linux/sched/mm.h> #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -427,9 +428,13 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio, unsigned long this_sum_bytes = 0; int i; u64 offset; + unsigned nofs_flag; + + nofs_flag = memalloc_nofs_save(); + sums = kvzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size), + GFP_KERNEL); + memalloc_nofs_restore(nofs_flag); - sums = kzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size), - GFP_NOFS); if (!sums) return BLK_STS_RESOURCE; @@ -472,8 +477,10 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio, bytes_left = bio->bi_iter.bi_size - total_bytes; - sums = kzalloc(btrfs_ordered_sum_size(fs_info, bytes_left), - GFP_NOFS); + nofs_flag = memalloc_nofs_save(); + sums = kvzalloc(btrfs_ordered_sum_size(fs_info, + bytes_left), GFP_KERNEL); + memalloc_nofs_restore(nofs_flag); BUG_ON(!sums); /* -ENOMEM */ sums->len = bytes_left; ordered = btrfs_lookup_ordered_extent(inode, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 82fdda8ff5ab..2973608824ec 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6783,7 +6783,7 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode, u64 extent_start = 0; u64 extent_end = 0; u64 objectid = btrfs_ino(inode); - u8 extent_type; + int extent_type = -1; struct btrfs_path *path = NULL; struct btrfs_root *root = inode->root; struct btrfs_file_extent_item *item; diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 6fde2b2741ef..45e3cfd1198b 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -6,6 +6,7 @@ #include <linux/slab.h> #include <linux/blkdev.h> #include <linux/writeback.h> +#include <linux/sched/mm.h> #include "ctree.h" #include "transaction.h" #include "btrfs_inode.h" @@ -442,7 +443,7 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) cur = entry->list.next; sum = list_entry(cur, struct btrfs_ordered_sum, list); list_del(&sum->list); - kfree(sum); + kvfree(sum); } kmem_cache_free(btrfs_ordered_extent_cache, entry); } diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index a8f429882249..0637149fb9f9 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1766,6 +1766,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn) { struct ceph_inode_info *dci = ceph_inode(dir); + unsigned hash; switch (dci->i_dir_layout.dl_dir_hash) { case 0: /* for backward compat */ @@ -1773,8 +1774,11 @@ unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn) return dn->d_name.hash; default: - return ceph_str_hash(dci->i_dir_layout.dl_dir_hash, + spin_lock(&dn->d_lock); + hash = ceph_str_hash(dci->i_dir_layout.dl_dir_hash, dn->d_name.name, dn->d_name.len); + spin_unlock(&dn->d_lock); + return hash; } } diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 2d61ddda9bf5..c2feb310ac1e 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1163,6 +1163,19 @@ static int splice_dentry(struct dentry **pdn, struct inode *in) return 0; } +static int d_name_cmp(struct dentry *dentry, const char *name, size_t len) +{ + int ret; + + /* take d_lock to ensure dentry->d_name stability */ + spin_lock(&dentry->d_lock); + ret = dentry->d_name.len - len; + if (!ret) + ret = memcmp(dentry->d_name.name, name, len); + spin_unlock(&dentry->d_lock); + return ret; +} + /* * Incorporate results into the local cache. This is either just * one inode, or a directory, dentry, and possibly linked-to inode (e.g., @@ -1412,7 +1425,8 @@ retry_lookup: err = splice_dentry(&req->r_dentry, in); if (err < 0) goto done; - } else if (rinfo->head->is_dentry) { + } else if (rinfo->head->is_dentry && + !d_name_cmp(req->r_dentry, rinfo->dname, rinfo->dname_len)) { struct ceph_vino *ptvino = NULL; if ((le32_to_cpu(rinfo->diri.in->cap.caps) & CEPH_CAP_FILE_SHARED) || diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 21c33ed048ed..9049c2a3e972 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1414,6 +1414,15 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove); ci->i_prealloc_cap_flush = NULL; } + + if (drop && + ci->i_wrbuffer_ref_head == 0 && + ci->i_wr_ref == 0 && + ci->i_dirty_caps == 0 && + ci->i_flushing_caps == 0) { + ceph_put_snap_context(ci->i_head_snapc); + ci->i_head_snapc = NULL; + } } spin_unlock(&ci->i_ceph_lock); while (!list_empty(&to_remove)) { @@ -2161,10 +2170,39 @@ retry: return path; } +/* Duplicate the dentry->d_name.name safely */ +static int clone_dentry_name(struct dentry *dentry, const char **ppath, + int *ppathlen) +{ + u32 len; + char *name; + +retry: + len = READ_ONCE(dentry->d_name.len); + name = kmalloc(len + 1, GFP_NOFS); + if (!name) + return -ENOMEM; + + spin_lock(&dentry->d_lock); + if (dentry->d_name.len != len) { + spin_unlock(&dentry->d_lock); + kfree(name); + goto retry; + } + memcpy(name, dentry->d_name.name, len); + spin_unlock(&dentry->d_lock); + + name[len] = '\0'; + *ppath = name; + *ppathlen = len; + return 0; +} + static int build_dentry_path(struct dentry *dentry, struct inode *dir, const char **ppath, int *ppathlen, u64 *pino, - int *pfreepath) + bool *pfreepath, bool parent_locked) { + int ret; char *path; rcu_read_lock(); @@ -2173,8 +2211,15 @@ static int build_dentry_path(struct dentry *dentry, struct inode *dir, if (dir && ceph_snap(dir) == CEPH_NOSNAP) { *pino = ceph_ino(dir); rcu_read_unlock(); - *ppath = dentry->d_name.name; - *ppathlen = dentry->d_name.len; + if (parent_locked) { + *ppath = dentry->d_name.name; + *ppathlen = dentry->d_name.len; + } else { + ret = clone_dentry_name(dentry, ppath, ppathlen); + if (ret) + return ret; + *pfreepath = true; + } return 0; } rcu_read_unlock(); @@ -2182,13 +2227,13 @@ static int build_dentry_path(struct dentry *dentry, struct inode *dir, if (IS_ERR(path)) return PTR_ERR(path); *ppath = path; - *pfreepath = 1; + *pfreepath = true; return 0; } static int build_inode_path(struct inode *inode, const char **ppath, int *ppathlen, u64 *pino, - int *pfreepath) + bool *pfreepath) { struct dentry *dentry; char *path; @@ -2204,7 +2249,7 @@ static int build_inode_path(struct inode *inode, if (IS_ERR(path)) return PTR_ERR(path); *ppath = path; - *pfreepath = 1; + *pfreepath = true; return 0; } @@ -2215,7 +2260,7 @@ static int build_inode_path(struct inode *inode, static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry, struct inode *rdiri, const char *rpath, u64 rino, const char **ppath, int *pathlen, - u64 *ino, int *freepath) + u64 *ino, bool *freepath, bool parent_locked) { int r = 0; @@ -2225,7 +2270,7 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry, ceph_snap(rinode)); } else if (rdentry) { r = build_dentry_path(rdentry, rdiri, ppath, pathlen, ino, - freepath); + freepath, parent_locked); dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen, *ppath); } else if (rpath || rino) { @@ -2251,7 +2296,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, const char *path2 = NULL; u64 ino1 = 0, ino2 = 0; int pathlen1 = 0, pathlen2 = 0; - int freepath1 = 0, freepath2 = 0; + bool freepath1 = false, freepath2 = false; int len; u16 releases; void *p, *end; @@ -2259,16 +2304,19 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, ret = set_request_path_attr(req->r_inode, req->r_dentry, req->r_parent, req->r_path1, req->r_ino1.ino, - &path1, &pathlen1, &ino1, &freepath1); + &path1, &pathlen1, &ino1, &freepath1, + test_bit(CEPH_MDS_R_PARENT_LOCKED, + &req->r_req_flags)); if (ret < 0) { msg = ERR_PTR(ret); goto out; } + /* If r_old_dentry is set, then assume that its parent is locked */ ret = set_request_path_attr(NULL, req->r_old_dentry, req->r_old_dentry_dir, req->r_path2, req->r_ino2.ino, - &path2, &pathlen2, &ino2, &freepath2); + &path2, &pathlen2, &ino2, &freepath2, true); if (ret < 0) { msg = ERR_PTR(ret); goto out_free1; diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 89aa37fa0f84..b26e12cd8ec3 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -572,7 +572,12 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) old_snapc = NULL; update_snapc: - if (ci->i_head_snapc) { + if (ci->i_wrbuffer_ref_head == 0 && + ci->i_wr_ref == 0 && + ci->i_dirty_caps == 0 && + ci->i_flushing_caps == 0) { + ci->i_head_snapc = NULL; + } else { ci->i_head_snapc = ceph_get_snap_context(new_snapc); dout(" new snapc is %p\n", new_snapc); } diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 9c0ccc06d172..7037a137fa53 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2877,7 +2877,6 @@ static void collect_uncached_write_data(struct cifs_aio_ctx *ctx) struct cifs_tcon *tcon; struct cifs_sb_info *cifs_sb; struct dentry *dentry = ctx->cfile->dentry; - unsigned int i; int rc; tcon = tlink_tcon(ctx->cfile->tlink); @@ -2941,10 +2940,6 @@ restart_loop: kref_put(&wdata->refcount, cifs_uncached_writedata_release); } - if (!ctx->direct_io) - for (i = 0; i < ctx->npages; i++) - put_page(ctx->bv[i].bv_page); - cifs_stats_bytes_written(tcon, ctx->total_len); set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags); @@ -3582,7 +3577,6 @@ collect_uncached_read_data(struct cifs_aio_ctx *ctx) struct iov_iter *to = &ctx->iter; struct cifs_sb_info *cifs_sb; struct cifs_tcon *tcon; - unsigned int i; int rc; tcon = tlink_tcon(ctx->cfile->tlink); @@ -3666,15 +3660,8 @@ again: kref_put(&rdata->refcount, cifs_uncached_readdata_release); } - if (!ctx->direct_io) { - for (i = 0; i < ctx->npages; i++) { - if (ctx->should_dirty) - set_page_dirty(ctx->bv[i].bv_page); - put_page(ctx->bv[i].bv_page); - } - + if (!ctx->direct_io) ctx->total_len = ctx->len - iov_iter_count(to); - } /* mask nodata case */ if (rc == -ENODATA) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 53fdb5df0d2e..538fd7d807e4 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1735,6 +1735,10 @@ cifs_do_rename(const unsigned int xid, struct dentry *from_dentry, if (rc == 0 || rc != -EBUSY) goto do_rename_exit; + /* Don't fall back to using SMB on SMB 2+ mount */ + if (server->vals->protocol_id != 0) + goto do_rename_exit; + /* open-file renames don't work across directories */ if (to_dentry->d_parent != from_dentry->d_parent) goto do_rename_exit; diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 1e1626a2cfc3..0dc6f08020ac 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -789,6 +789,11 @@ cifs_aio_ctx_alloc(void) { struct cifs_aio_ctx *ctx; + /* + * Must use kzalloc to initialize ctx->bv to NULL and ctx->direct_io + * to false so that we know when we have to unreference pages within + * cifs_aio_ctx_release() + */ ctx = kzalloc(sizeof(struct cifs_aio_ctx), GFP_KERNEL); if (!ctx) return NULL; @@ -807,7 +812,23 @@ cifs_aio_ctx_release(struct kref *refcount) struct cifs_aio_ctx, refcount); cifsFileInfo_put(ctx->cfile); - kvfree(ctx->bv); + + /* + * ctx->bv is only set if setup_aio_ctx_iter() was call successfuly + * which means that iov_iter_get_pages() was a success and thus that + * we have taken reference on pages. + */ + if (ctx->bv) { + unsigned i; + + for (i = 0; i < ctx->npages; i++) { + if (ctx->should_dirty) + set_page_dirty(ctx->bv[i].bv_page); + put_page(ctx->bv[i].bv_page); + } + kvfree(ctx->bv); + } + kfree(ctx); } diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index b8f7262ac354..a37774a55f3a 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -3466,6 +3466,7 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms, io_parms->tcon->tid, ses->Suid, io_parms->offset, 0); free_rsp_buf(resp_buftype, rsp_iov.iov_base); + cifs_small_buf_release(req); return rc == -ENODATA ? 0 : rc; } else trace_smb3_read_done(xid, req->PersistentFileId, diff --git a/fs/io_uring.c b/fs/io_uring.c index f65f85d89217..84efb8956734 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4,15 +4,28 @@ * supporting fast/efficient IO. * * A note on the read/write ordering memory barriers that are matched between - * the application and kernel side. When the application reads the CQ ring - * tail, it must use an appropriate smp_rmb() to order with the smp_wmb() - * the kernel uses after writing the tail. Failure to do so could cause a - * delay in when the application notices that completion events available. - * This isn't a fatal condition. Likewise, the application must use an - * appropriate smp_wmb() both before writing the SQ tail, and after writing - * the SQ tail. The first one orders the sqe writes with the tail write, and - * the latter is paired with the smp_rmb() the kernel will issue before - * reading the SQ tail on submission. + * the application and kernel side. + * + * After the application reads the CQ ring tail, it must use an + * appropriate smp_rmb() to pair with the smp_wmb() the kernel uses + * before writing the tail (using smp_load_acquire to read the tail will + * do). It also needs a smp_mb() before updating CQ head (ordering the + * entry load(s) with the head store), pairing with an implicit barrier + * through a control-dependency in io_get_cqring (smp_store_release to + * store head will do). Failure to do so could lead to reading invalid + * CQ entries. + * + * Likewise, the application must use an appropriate smp_wmb() before + * writing the SQ tail (ordering SQ entry stores with the tail store), + * which pairs with smp_load_acquire in io_get_sqring (smp_store_release + * to store the tail will do). And it needs a barrier ordering the SQ + * head load before writing new SQ entries (smp_load_acquire to read + * head will do). + * + * When using the SQ poll thread (IORING_SETUP_SQPOLL), the application + * needs to check the SQ flags for IORING_SQ_NEED_WAKEUP *after* + * updating the SQ tail; a full memory barrier smp_mb() is needed + * between. * * Also see the examples in the liburing library: * @@ -70,20 +83,108 @@ struct io_uring { u32 tail ____cacheline_aligned_in_smp; }; +/* + * This data is shared with the application through the mmap at offset + * IORING_OFF_SQ_RING. + * + * The offsets to the member fields are published through struct + * io_sqring_offsets when calling io_uring_setup. + */ struct io_sq_ring { + /* + * Head and tail offsets into the ring; the offsets need to be + * masked to get valid indices. + * + * The kernel controls head and the application controls tail. + */ struct io_uring r; + /* + * Bitmask to apply to head and tail offsets (constant, equals + * ring_entries - 1) + */ u32 ring_mask; + /* Ring size (constant, power of 2) */ u32 ring_entries; + /* + * Number of invalid entries dropped by the kernel due to + * invalid index stored in array + * + * Written by the kernel, shouldn't be modified by the + * application (i.e. get number of "new events" by comparing to + * cached value). + * + * After a new SQ head value was read by the application this + * counter includes all submissions that were dropped reaching + * the new SQ head (and possibly more). + */ u32 dropped; + /* + * Runtime flags + * + * Written by the kernel, shouldn't be modified by the + * application. + * + * The application needs a full memory barrier before checking + * for IORING_SQ_NEED_WAKEUP after updating the sq tail. + */ u32 flags; + /* + * Ring buffer of indices into array of io_uring_sqe, which is + * mmapped by the application using the IORING_OFF_SQES offset. + * + * This indirection could e.g. be used to assign fixed + * io_uring_sqe entries to operations and only submit them to + * the queue when needed. + * + * The kernel modifies neither the indices array nor the entries + * array. + */ u32 array[]; }; +/* + * This data is shared with the application through the mmap at offset + * IORING_OFF_CQ_RING. + * + * The offsets to the member fields are published through struct + * io_cqring_offsets when calling io_uring_setup. + */ struct io_cq_ring { + /* + * Head and tail offsets into the ring; the offsets need to be + * masked to get valid indices. + * + * The application controls head and the kernel tail. + */ struct io_uring r; + /* + * Bitmask to apply to head and tail offsets (constant, equals + * ring_entries - 1) + */ u32 ring_mask; + /* Ring size (constant, power of 2) */ u32 ring_entries; + /* + * Number of completion events lost because the queue was full; + * this should be avoided by the application by making sure + * there are not more requests pending thatn there is space in + * the completion queue. + * + * Written by the kernel, shouldn't be modified by the + * application (i.e. get number of "new events" by comparing to + * cached value). + * + * As completion events come in out of order this counter is not + * ordered with any other data. + */ u32 overflow; + /* + * Ring buffer of completion events. + * + * The kernel writes completion events fresh every time they are + * produced, so the application is allowed to modify pending + * entries. + */ struct io_uring_cqe cqes[]; }; @@ -221,7 +322,7 @@ struct io_kiocb { struct list_head list; unsigned int flags; refcount_t refs; -#define REQ_F_FORCE_NONBLOCK 1 /* inline submission attempt */ +#define REQ_F_NOWAIT 1 /* must not punt to workers */ #define REQ_F_IOPOLL_COMPLETED 2 /* polled IO has completed */ #define REQ_F_FIXED_FILE 4 /* ctx owns file */ #define REQ_F_SEQ_PREV 8 /* sequential with previous */ @@ -317,12 +418,6 @@ static void io_commit_cqring(struct io_ring_ctx *ctx) /* order cqe stores with ring update */ smp_store_release(&ring->r.tail, ctx->cached_cq_tail); - /* - * Write sider barrier of tail update, app has read side. See - * comment at the top of this file. - */ - smp_wmb(); - if (wq_has_sleeper(&ctx->cq_wait)) { wake_up_interruptible(&ctx->cq_wait); kill_fasync(&ctx->cq_fasync, SIGIO, POLL_IN); @@ -336,8 +431,11 @@ static struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx) unsigned tail; tail = ctx->cached_cq_tail; - /* See comment at the top of the file */ - smp_rmb(); + /* + * writes to the cq entry need to come after reading head; the + * control dependency is enough as we're using WRITE_ONCE to + * fill the cq entry + */ if (tail - READ_ONCE(ring->r.head) == ring->ring_entries) return NULL; @@ -740,7 +838,7 @@ static bool io_file_supports_async(struct file *file) } static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s, - bool force_nonblock, struct io_submit_state *state) + bool force_nonblock) { const struct io_uring_sqe *sqe = s->sqe; struct io_ring_ctx *ctx = req->ctx; @@ -774,10 +872,14 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s, ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags)); if (unlikely(ret)) return ret; - if (force_nonblock) { + + /* don't allow async punt if RWF_NOWAIT was requested */ + if (kiocb->ki_flags & IOCB_NOWAIT) + req->flags |= REQ_F_NOWAIT; + + if (force_nonblock) kiocb->ki_flags |= IOCB_NOWAIT; - req->flags |= REQ_F_FORCE_NONBLOCK; - } + if (ctx->flags & IORING_SETUP_IOPOLL) { if (!(kiocb->ki_flags & IOCB_DIRECT) || !kiocb->ki_filp->f_op->iopoll) @@ -938,7 +1040,7 @@ static void io_async_list_note(int rw, struct io_kiocb *req, size_t len) } static int io_read(struct io_kiocb *req, const struct sqe_submit *s, - bool force_nonblock, struct io_submit_state *state) + bool force_nonblock) { struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; struct kiocb *kiocb = &req->rw; @@ -947,7 +1049,7 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s, size_t iov_count; int ret; - ret = io_prep_rw(req, s, force_nonblock, state); + ret = io_prep_rw(req, s, force_nonblock); if (ret) return ret; file = kiocb->ki_filp; @@ -985,7 +1087,7 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s, } static int io_write(struct io_kiocb *req, const struct sqe_submit *s, - bool force_nonblock, struct io_submit_state *state) + bool force_nonblock) { struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; struct kiocb *kiocb = &req->rw; @@ -994,7 +1096,7 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s, size_t iov_count; int ret; - ret = io_prep_rw(req, s, force_nonblock, state); + ret = io_prep_rw(req, s, force_nonblock); if (ret) return ret; @@ -1336,8 +1438,7 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe) } static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, - const struct sqe_submit *s, bool force_nonblock, - struct io_submit_state *state) + const struct sqe_submit *s, bool force_nonblock) { int ret, opcode; @@ -1353,18 +1454,18 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, case IORING_OP_READV: if (unlikely(s->sqe->buf_index)) return -EINVAL; - ret = io_read(req, s, force_nonblock, state); + ret = io_read(req, s, force_nonblock); break; case IORING_OP_WRITEV: if (unlikely(s->sqe->buf_index)) return -EINVAL; - ret = io_write(req, s, force_nonblock, state); + ret = io_write(req, s, force_nonblock); break; case IORING_OP_READ_FIXED: - ret = io_read(req, s, force_nonblock, state); + ret = io_read(req, s, force_nonblock); break; case IORING_OP_WRITE_FIXED: - ret = io_write(req, s, force_nonblock, state); + ret = io_write(req, s, force_nonblock); break; case IORING_OP_FSYNC: ret = io_fsync(req, s->sqe, force_nonblock); @@ -1437,8 +1538,7 @@ restart: struct sqe_submit *s = &req->submit; const struct io_uring_sqe *sqe = s->sqe; - /* Ensure we clear previously set forced non-block flag */ - req->flags &= ~REQ_F_FORCE_NONBLOCK; + /* Ensure we clear previously set non-block flag */ req->rw.ki_flags &= ~IOCB_NOWAIT; ret = 0; @@ -1457,7 +1557,7 @@ restart: s->has_user = cur_mm != NULL; s->needs_lock = true; do { - ret = __io_submit_sqe(ctx, req, s, false, NULL); + ret = __io_submit_sqe(ctx, req, s, false); /* * We can get EAGAIN for polled IO even though * we're forcing a sync submission from here, @@ -1468,10 +1568,11 @@ restart: break; cond_resched(); } while (1); - - /* drop submission reference */ - io_put_req(req); } + + /* drop submission reference */ + io_put_req(req); + if (ret) { io_cqring_add_event(ctx, sqe->user_data, ret, 0); io_put_req(req); @@ -1623,8 +1724,8 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s, if (unlikely(ret)) goto out; - ret = __io_submit_sqe(ctx, req, s, true, state); - if (ret == -EAGAIN) { + ret = __io_submit_sqe(ctx, req, s, true); + if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) { struct io_uring_sqe *sqe_copy; sqe_copy = kmalloc(sizeof(*sqe_copy), GFP_KERNEL); @@ -1698,24 +1799,10 @@ static void io_commit_sqring(struct io_ring_ctx *ctx) * write new data to them. */ smp_store_release(&ring->r.head, ctx->cached_sq_head); - - /* - * write side barrier of head update, app has read side. See - * comment at the top of this file - */ - smp_wmb(); } } /* - * Undo last io_get_sqring() - */ -static void io_drop_sqring(struct io_ring_ctx *ctx) -{ - ctx->cached_sq_head--; -} - -/* * Fetch an sqe, if one is available. Note that s->sqe will point to memory * that is mapped by userspace. This means that care needs to be taken to * ensure that reads are stable, as we cannot rely on userspace always @@ -1737,9 +1824,8 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s) * though the application is the one updating it. */ head = ctx->cached_sq_head; - /* See comment at the top of this file */ - smp_rmb(); - if (head == READ_ONCE(ring->r.tail)) + /* make sure SQ entry isn't read before tail */ + if (head == smp_load_acquire(&ring->r.tail)) return false; head = READ_ONCE(ring->array[head & ctx->sq_mask]); @@ -1753,8 +1839,6 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s) /* drop invalid entries */ ctx->cached_sq_head++; ring->dropped++; - /* See comment at the top of this file */ - smp_wmb(); return false; } @@ -1864,7 +1948,8 @@ static int io_sq_thread(void *data) /* Tell userspace we may need a wakeup call */ ctx->sq_ring->flags |= IORING_SQ_NEED_WAKEUP; - smp_wmb(); + /* make sure to read SQ tail after writing flags */ + smp_mb(); if (!io_get_sqring(ctx, &sqes[0])) { if (kthread_should_stop()) { @@ -1877,13 +1962,11 @@ static int io_sq_thread(void *data) finish_wait(&ctx->sqo_wait, &wait); ctx->sq_ring->flags &= ~IORING_SQ_NEED_WAKEUP; - smp_wmb(); continue; } finish_wait(&ctx->sqo_wait, &wait); ctx->sq_ring->flags &= ~IORING_SQ_NEED_WAKEUP; - smp_wmb(); } i = 0; @@ -1928,7 +2011,7 @@ static int io_sq_thread(void *data) static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit) { struct io_submit_state state, *statep = NULL; - int i, ret = 0, submit = 0; + int i, submit = 0; if (to_submit > IO_PLUG_THRESHOLD) { io_submit_state_start(&state, ctx, to_submit); @@ -1937,6 +2020,7 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit) for (i = 0; i < to_submit; i++) { struct sqe_submit s; + int ret; if (!io_get_sqring(ctx, &s)) break; @@ -1944,21 +2028,18 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit) s.has_user = true; s.needs_lock = false; s.needs_fixed_file = false; + submit++; ret = io_submit_sqe(ctx, &s, statep); - if (ret) { - io_drop_sqring(ctx); - break; - } - - submit++; + if (ret) + io_cqring_add_event(ctx, s.sqe->user_data, ret, 0); } io_commit_sqring(ctx); if (statep) io_submit_state_end(statep); - return submit ? submit : ret; + return submit; } static unsigned io_cqring_events(struct io_cq_ring *ring) @@ -2239,10 +2320,6 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx, mmgrab(current->mm); ctx->sqo_mm = current->mm; - ret = -EINVAL; - if (!cpu_possible(p->sq_thread_cpu)) - goto err; - if (ctx->flags & IORING_SETUP_SQPOLL) { ret = -EPERM; if (!capable(CAP_SYS_ADMIN)) @@ -2253,11 +2330,11 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx, ctx->sq_thread_idle = HZ; if (p->flags & IORING_SETUP_SQ_AFF) { - int cpu; + int cpu = array_index_nospec(p->sq_thread_cpu, + nr_cpu_ids); - cpu = array_index_nospec(p->sq_thread_cpu, NR_CPUS); ret = -EINVAL; - if (!cpu_possible(p->sq_thread_cpu)) + if (!cpu_possible(cpu)) goto err; ctx->sqo_thread = kthread_create_on_cpu(io_sq_thread, @@ -2320,8 +2397,12 @@ static int io_account_mem(struct user_struct *user, unsigned long nr_pages) static void io_mem_free(void *ptr) { - struct page *page = virt_to_head_page(ptr); + struct page *page; + + if (!ptr) + return; + page = virt_to_head_page(ptr); if (put_page_testzero(page)) free_compound_page(page); } @@ -2362,7 +2443,7 @@ static int io_sqe_buffer_unregister(struct io_ring_ctx *ctx) if (ctx->account_mem) io_unaccount_mem(ctx->user, imu->nr_bvecs); - kfree(imu->bvec); + kvfree(imu->bvec); imu->nr_bvecs = 0; } @@ -2454,9 +2535,9 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg, if (!pages || nr_pages > got_pages) { kfree(vmas); kfree(pages); - pages = kmalloc_array(nr_pages, sizeof(struct page *), + pages = kvmalloc_array(nr_pages, sizeof(struct page *), GFP_KERNEL); - vmas = kmalloc_array(nr_pages, + vmas = kvmalloc_array(nr_pages, sizeof(struct vm_area_struct *), GFP_KERNEL); if (!pages || !vmas) { @@ -2468,7 +2549,7 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg, got_pages = nr_pages; } - imu->bvec = kmalloc_array(nr_pages, sizeof(struct bio_vec), + imu->bvec = kvmalloc_array(nr_pages, sizeof(struct bio_vec), GFP_KERNEL); ret = -ENOMEM; if (!imu->bvec) { @@ -2507,6 +2588,7 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg, } if (ctx->account_mem) io_unaccount_mem(ctx->user, nr_pages); + kvfree(imu->bvec); goto err; } @@ -2529,12 +2611,12 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg, ctx->nr_user_bufs++; } - kfree(pages); - kfree(vmas); + kvfree(pages); + kvfree(vmas); return 0; err: - kfree(pages); - kfree(vmas); + kvfree(pages); + kvfree(vmas); io_sqe_buffer_unregister(ctx); return ret; } @@ -2572,9 +2654,13 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait) __poll_t mask = 0; poll_wait(file, &ctx->cq_wait, wait); - /* See comment at the top of this file */ + /* + * synchronizes with barrier from wq_has_sleeper call in + * io_commit_cqring + */ smp_rmb(); - if (READ_ONCE(ctx->sq_ring->r.tail) + 1 != ctx->cached_sq_head) + if (READ_ONCE(ctx->sq_ring->r.tail) - ctx->cached_sq_head != + ctx->sq_ring->ring_entries) mask |= EPOLLOUT | EPOLLWRNORM; if (READ_ONCE(ctx->cq_ring->r.head) != ctx->cached_cq_tail) mask |= EPOLLIN | EPOLLRDNORM; @@ -2685,24 +2771,12 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, mutex_lock(&ctx->uring_lock); submitted = io_ring_submit(ctx, to_submit); mutex_unlock(&ctx->uring_lock); - - if (submitted < 0) - goto out_ctx; } if (flags & IORING_ENTER_GETEVENTS) { unsigned nr_events = 0; min_complete = min(min_complete, ctx->cq_entries); - /* - * The application could have included the 'to_submit' count - * in how many events it wanted to wait for. If we failed to - * submit the desired count, we may need to adjust the number - * of events to poll/wait for. - */ - if (submitted < to_submit) - min_complete = min_t(unsigned, submitted, min_complete); - if (ctx->flags & IORING_SETUP_IOPOLL) { mutex_lock(&ctx->uring_lock); ret = io_iopoll_check(ctx, &nr_events, min_complete); @@ -2748,17 +2822,12 @@ static int io_allocate_scq_urings(struct io_ring_ctx *ctx, return -EOVERFLOW; ctx->sq_sqes = io_mem_alloc(size); - if (!ctx->sq_sqes) { - io_mem_free(ctx->sq_ring); + if (!ctx->sq_sqes) return -ENOMEM; - } cq_ring = io_mem_alloc(struct_size(cq_ring, cqes, p->cq_entries)); - if (!cq_ring) { - io_mem_free(ctx->sq_ring); - io_mem_free(ctx->sq_sqes); + if (!cq_ring) return -ENOMEM; - } ctx->cq_ring = cq_ring; cq_ring->ring_mask = p->cq_entries - 1; @@ -2934,6 +3003,14 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, { int ret; + /* + * We're inside the ring mutex, if the ref is already dying, then + * someone else killed the ctx or is already going through + * io_uring_register(). + */ + if (percpu_ref_is_dying(&ctx->refs)) + return -ENXIO; + percpu_ref_kill(&ctx->refs); /* diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 6b9c27548997..63c6bb1f8c4d 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -346,10 +346,16 @@ static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info) __kernel_fsid_t fsid = {}; fsnotify_foreach_obj_type(type) { + struct fsnotify_mark_connector *conn; + if (!fsnotify_iter_should_report_type(iter_info, type)) continue; - fsid = iter_info->marks[type]->connector->fsid; + conn = READ_ONCE(iter_info->marks[type]->connector); + /* Mark is just getting destroyed or created? */ + if (!conn) + continue; + fsid = conn->fsid; if (WARN_ON_ONCE(!fsid.val[0] && !fsid.val[1])) continue; return fsid; @@ -408,8 +414,12 @@ static int fanotify_handle_event(struct fsnotify_group *group, return 0; } - if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) + if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) { fsid = fanotify_get_fsid(iter_info); + /* Racing with mark destruction or creation? */ + if (!fsid.val[0] && !fsid.val[1]) + return 0; + } event = fanotify_alloc_event(group, inode, mask, data, data_type, &fsid); diff --git a/fs/notify/mark.c b/fs/notify/mark.c index d593d4269561..22acb0a79b53 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -239,13 +239,13 @@ static void fsnotify_drop_object(unsigned int type, void *objp) void fsnotify_put_mark(struct fsnotify_mark *mark) { - struct fsnotify_mark_connector *conn; + struct fsnotify_mark_connector *conn = READ_ONCE(mark->connector); void *objp = NULL; unsigned int type = FSNOTIFY_OBJ_TYPE_DETACHED; bool free_conn = false; /* Catch marks that were actually never attached to object */ - if (!mark->connector) { + if (!conn) { if (refcount_dec_and_test(&mark->refcnt)) fsnotify_final_mark_destroy(mark); return; @@ -255,10 +255,9 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) * We have to be careful so that traversals of obj_list under lock can * safely grab mark reference. */ - if (!refcount_dec_and_lock(&mark->refcnt, &mark->connector->lock)) + if (!refcount_dec_and_lock(&mark->refcnt, &conn->lock)) return; - conn = mark->connector; hlist_del_init_rcu(&mark->obj_list); if (hlist_empty(&conn->list)) { objp = fsnotify_detach_connector_from_object(conn, &type); @@ -266,7 +265,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) } else { __fsnotify_recalc_mask(conn); } - mark->connector = NULL; + WRITE_ONCE(mark->connector, NULL); spin_unlock(&conn->lock); fsnotify_drop_object(type, objp); @@ -620,7 +619,7 @@ restart: /* mark should be the last entry. last is the current last entry */ hlist_add_behind_rcu(&mark->obj_list, &last->obj_list); added: - mark->connector = conn; + WRITE_ONCE(mark->connector, conn); out_err: spin_unlock(&conn->lock); spin_unlock(&mark->lock); @@ -808,6 +807,7 @@ void fsnotify_init_mark(struct fsnotify_mark *mark, refcount_set(&mark->refcnt, 1); fsnotify_get_group(group); mark->group = group; + WRITE_ONCE(mark->connector, NULL); } /* diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index d65390727541..7325baa8f9d4 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -1626,9 +1626,11 @@ static void drop_sysctl_table(struct ctl_table_header *header) if (--header->nreg) return; - if (parent) + if (parent) { put_links(header); - start_unregistering(header); + start_unregistering(header); + } + if (!--header->count) kfree_rcu(header, rcu); diff --git a/fs/splice.c b/fs/splice.c index 98943d9b219c..25212dcca2df 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -330,8 +330,8 @@ const struct pipe_buf_operations default_pipe_buf_ops = { .get = generic_pipe_buf_get, }; -static int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe, - struct pipe_buffer *buf) +int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) { return 1; } diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index cbf3180cb612..668ad971cd7b 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -420,7 +420,6 @@ extern struct ttm_bo_global { /** * Protected by ttm_global_mutex. */ - unsigned int use_count; struct list_head device_list; /** diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index abb2dac3da9b..5c626fdc10db 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -176,6 +176,7 @@ void free_pipe_info(struct pipe_inode_info *); bool generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *); int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *); int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *); +int generic_pipe_buf_nosteal(struct pipe_inode_info *, struct pipe_buffer *); void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *); void pipe_buf_mark_unmergeable(struct pipe_buffer *buf); diff --git a/include/linux/uio.h b/include/linux/uio.h index f184af1999a8..2d0131ad4604 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -60,7 +60,7 @@ struct iov_iter { static inline enum iter_type iov_iter_type(const struct iov_iter *i) { - return i->type & ~(READ | WRITE); + return i->type & ~(READ | WRITE | ITER_BVEC_FLAG_NO_REF); } static inline bool iter_is_iovec(const struct iov_iter *i) diff --git a/include/linux/usb.h b/include/linux/usb.h index 5e49e82c4368..ff010d1fd1c7 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -200,7 +200,6 @@ usb_find_last_int_out_endpoint(struct usb_host_interface *alt, * @dev: driver model's view of this device * @usb_dev: if an interface is bound to the USB major, this will point * to the sysfs representation for that device. - * @pm_usage_cnt: PM usage counter for this interface * @reset_ws: Used for scheduling resets from atomic context. * @resetting_device: USB core reset the device, so use alt setting 0 as * current; needs bandwidth alloc after reset. @@ -257,7 +256,6 @@ struct usb_interface { struct device dev; /* interface specific device info */ struct device *usb_dev; - atomic_t pm_usage_cnt; /* usage counter for autosuspend */ struct work_struct reset_ws; /* for resets in atomic context */ }; #define to_usb_interface(d) container_of(d, struct usb_interface, dev) diff --git a/include/net/arp.h b/include/net/arp.h index 977aabfcdc03..c8f580a0e6b1 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -18,6 +18,7 @@ static inline u32 arp_hashfn(const void *pkey, const struct net_device *dev, u32 return val * hash_rnd[0]; } +#ifdef CONFIG_INET static inline struct neighbour *__ipv4_neigh_lookup_noref(struct net_device *dev, u32 key) { if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) @@ -25,6 +26,13 @@ static inline struct neighbour *__ipv4_neigh_lookup_noref(struct net_device *dev return ___neigh_lookup_noref(&arp_tbl, neigh_key_eq32, arp_hashfn, &key, dev); } +#else +static inline +struct neighbour *__ipv4_neigh_lookup_noref(struct net_device *dev, u32 key) +{ + return NULL; +} +#endif static inline struct neighbour *__ipv4_neigh_lookup(struct net_device *dev, u32 key) { diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h index 28aa9b30aece..1f77fb4dc79d 100644 --- a/include/net/ipv6_frag.h +++ b/include/net/ipv6_frag.h @@ -94,7 +94,6 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq) goto out; head->dev = dev; - skb_get(head); spin_unlock(&fq->q.lock); icmpv6_send(head, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0); diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 87b3198f4b5d..f4d4010b7e3e 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -238,6 +238,7 @@ enum mlx5_ib_query_dev_resp_flags { MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_COMP = 1 << 0, MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD = 1 << 1, MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE = 1 << 2, + MLX5_IB_QUERY_DEV_RESP_FLAGS_SCAT2CQE_DCT = 1 << 3, }; enum mlx5_ib_tunnel_offloads { diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index a4d9e14bf138..35f3ea375084 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2007,6 +2007,10 @@ static u64 numa_get_avg_runtime(struct task_struct *p, u64 *period) if (p->last_task_numa_placement) { delta = runtime - p->last_sum_exec_runtime; *period = now - p->last_task_numa_placement; + + /* Avoid time going backwards, prevent potential divide error: */ + if (unlikely((s64)*period < 0)) + *period = 0; } else { delta = p->se.avg.load_sum; *period = LOAD_AVG_MAX; diff --git a/kernel/seccomp.c b/kernel/seccomp.c index df27e499956a..3582eeb59893 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -502,7 +502,10 @@ out: * * Caller must be holding current->sighand->siglock lock. * - * Returns 0 on success, -ve on error. + * Returns 0 on success, -ve on error, or + * - in TSYNC mode: the pid of a thread which was either not in the correct + * seccomp mode or did not have an ancestral seccomp filter + * - in NEW_LISTENER mode: the fd of the new listener */ static long seccomp_attach_filter(unsigned int flags, struct seccomp_filter *filter) @@ -1258,6 +1261,16 @@ static long seccomp_set_mode_filter(unsigned int flags, if (flags & ~SECCOMP_FILTER_FLAG_MASK) return -EINVAL; + /* + * In the successful case, NEW_LISTENER returns the new listener fd. + * But in the failure case, TSYNC returns the thread that died. If you + * combine these two flags, there's no way to tell whether something + * succeeded or failed. So, let's disallow this combination. + */ + if ((flags & SECCOMP_FILTER_FLAG_TSYNC) && + (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER)) + return -EINVAL; + /* Prepare the new filter before holding any locks. */ prepared = seccomp_prepare_user_filter(filter); if (IS_ERR(prepared)) @@ -1304,7 +1317,7 @@ out: mutex_unlock(¤t->signal->cred_guard_mutex); out_put_fd: if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) { - if (ret < 0) { + if (ret) { listener_f->private_data = NULL; fput(listener_f); put_unused_fd(listener); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 41b6f96e5366..4ee8d8aa3d0f 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -762,7 +762,7 @@ u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu) preempt_disable_notrace(); time = rb_time_stamp(buffer); - preempt_enable_no_resched_notrace(); + preempt_enable_notrace(); return time; } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 6c24755655c7..ca1ee656d6d8 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -496,8 +496,10 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, * not modified. */ pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL); - if (!pid_list) + if (!pid_list) { + trace_parser_put(&parser); return -ENOMEM; + } pid_list->pid_max = READ_ONCE(pid_max); @@ -507,6 +509,7 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3); if (!pid_list->pids) { + trace_parser_put(&parser); kfree(pid_list); return -ENOMEM; } @@ -7025,19 +7028,23 @@ struct buffer_ref { struct ring_buffer *buffer; void *page; int cpu; - int ref; + refcount_t refcount; }; +static void buffer_ref_release(struct buffer_ref *ref) +{ + if (!refcount_dec_and_test(&ref->refcount)) + return; + ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page); + kfree(ref); +} + static void buffer_pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { struct buffer_ref *ref = (struct buffer_ref *)buf->private; - if (--ref->ref) - return; - - ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page); - kfree(ref); + buffer_ref_release(ref); buf->private = 0; } @@ -7046,10 +7053,10 @@ static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe, { struct buffer_ref *ref = (struct buffer_ref *)buf->private; - if (ref->ref > INT_MAX/2) + if (refcount_read(&ref->refcount) > INT_MAX/2) return false; - ref->ref++; + refcount_inc(&ref->refcount); return true; } @@ -7057,7 +7064,7 @@ static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe, static const struct pipe_buf_operations buffer_pipe_buf_ops = { .confirm = generic_pipe_buf_confirm, .release = buffer_pipe_buf_release, - .steal = generic_pipe_buf_steal, + .steal = generic_pipe_buf_nosteal, .get = buffer_pipe_buf_get, }; @@ -7070,11 +7077,7 @@ static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i) struct buffer_ref *ref = (struct buffer_ref *)spd->partial[i].private; - if (--ref->ref) - return; - - ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page); - kfree(ref); + buffer_ref_release(ref); spd->partial[i].private = 0; } @@ -7129,7 +7132,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, break; } - ref->ref = 1; + refcount_set(&ref->refcount, 1); ref->buffer = iter->trace_buffer->buffer; ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file); if (IS_ERR(ref->page)) { diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 00dbcdbc9a0d..d5a4a4036d2f 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1929,6 +1929,7 @@ config TEST_KMOD depends on m depends on BLOCK && (64BIT || LBDAF) # for XFS, BTRFS depends on NETDEVICES && NET_CORE && INET # for TUN + depends on BLOCK select TEST_LKM select XFS_FS select TUN diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c index 83cdcaa82bf6..f832b095afba 100644 --- a/lib/test_vmalloc.c +++ b/lib/test_vmalloc.c @@ -383,14 +383,14 @@ static void shuffle_array(int *arr, int n) static int test_func(void *private) { struct test_driver *t = private; - cpumask_t newmask = CPU_MASK_NONE; int random_array[ARRAY_SIZE(test_case_array)]; int index, i, j, ret; ktime_t kt; u64 delta; - cpumask_set_cpu(t->cpu, &newmask); - set_cpus_allowed_ptr(current, &newmask); + ret = set_cpus_allowed_ptr(current, cpumask_of(t->cpu)); + if (ret < 0) + pr_err("Failed to set affinity to %d CPU\n", t->cpu); for (i = 0; i < ARRAY_SIZE(test_case_array); i++) random_array[i] = i; diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 0082d699be94..b236069ff0d8 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -874,6 +874,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ */ mem = find_memory_block(__pfn_to_section(pfn)); nid = mem->nid; + put_device(&mem->dev); /* associate pfn range with the zone */ zone = move_pfn_range(online_type, nid, pfn, nr_pages); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c6ce20aaf80b..c02cff1ed56e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -266,7 +266,20 @@ compound_page_dtor * const compound_page_dtors[] = { int min_free_kbytes = 1024; int user_min_free_kbytes = -1; +#ifdef CONFIG_DISCONTIGMEM +/* + * DiscontigMem defines memory ranges as separate pg_data_t even if the ranges + * are not on separate NUMA nodes. Functionally this works but with + * watermark_boost_factor, it can reclaim prematurely as the ranges can be + * quite small. By default, do not boost watermarks on discontigmem as in + * many cases very high-order allocations like THP are likely to be + * unsupported and the premature reclaim offsets the advantage of long-term + * fragmentation avoidance. + */ +int watermark_boost_factor __read_mostly; +#else int watermark_boost_factor __read_mostly = 15000; +#endif int watermark_scale_factor = 10; static unsigned long nr_kernel_pages __initdata; @@ -3419,8 +3432,11 @@ alloc_flags_nofragment(struct zone *zone, gfp_t gfp_mask) alloc_flags |= ALLOC_KSWAPD; #ifdef CONFIG_ZONE_DMA32 + if (!zone) + return alloc_flags; + if (zone_idx(zone) != ZONE_NORMAL) - goto out; + return alloc_flags; /* * If ZONE_DMA32 exists, assume it is the one after ZONE_NORMAL and @@ -3429,9 +3445,9 @@ alloc_flags_nofragment(struct zone *zone, gfp_t gfp_mask) */ BUILD_BUG_ON(ZONE_NORMAL - ZONE_DMA32 != 1); if (nr_online_nodes > 1 && !populated_zone(--zone)) - goto out; + return alloc_flags; -out: + alloc_flags |= ALLOC_NOFRAGMENT; #endif /* CONFIG_ZONE_DMA32 */ return alloc_flags; } @@ -3773,11 +3789,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, memalloc_noreclaim_restore(noreclaim_flag); psi_memstall_leave(&pflags); - if (*compact_result <= COMPACT_INACTIVE) { - WARN_ON_ONCE(page); - return NULL; - } - /* * At least in one zone compaction wasn't deferred or skipped, so let's * count a compaction stall diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 30f6fd8f68e0..9b9da5142613 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -31,6 +31,7 @@ #include <linux/times.h> #include <net/net_namespace.h> #include <net/neighbour.h> +#include <net/arp.h> #include <net/dst.h> #include <net/sock.h> #include <net/netevent.h> @@ -663,6 +664,8 @@ out: out_tbl_unlock: write_unlock_bh(&tbl->lock); out_neigh_release: + if (!exempt_from_gc) + atomic_dec(&tbl->gc_entries); neigh_release(n); goto out; } @@ -2982,7 +2985,13 @@ int neigh_xmit(int index, struct net_device *dev, if (!tbl) goto out; rcu_read_lock_bh(); - neigh = __neigh_lookup_noref(tbl, addr, dev); + if (index == NEIGH_ARP_TABLE) { + u32 key = *((u32 *)addr); + + neigh = __ipv4_neigh_lookup_noref(dev, key); + } else { + neigh = __neigh_lookup_noref(tbl, addr, dev); + } if (!neigh) neigh = __neigh_create(tbl, addr, dev, false); err = PTR_ERR(neigh); diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c index 3e614cc824f7..3a1af50bd0a5 100644 --- a/net/ipv4/ipmr_base.c +++ b/net/ipv4/ipmr_base.c @@ -335,8 +335,6 @@ next_entry2: } spin_unlock_bh(lock); err = 0; - e = 0; - out: cb->args[1] = e; return err; @@ -374,6 +372,7 @@ int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb, err = mr_table_dump(mrt, skb, cb, fill, lock, filter); if (err < 0) break; + cb->args[1] = 0; next_table: t++; } diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index d8776b2110c1..065334b41d57 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -352,6 +352,7 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head, struct sk_buff *pp = NULL; struct udphdr *uh2; struct sk_buff *p; + unsigned int ulen; /* requires non zero csum, for symmetry with GSO */ if (!uh->check) { @@ -359,6 +360,12 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head, return NULL; } + /* Do not deal with padded or malicious packets, sorry ! */ + ulen = ntohs(uh->len); + if (ulen <= sizeof(*uh) || ulen != skb_gro_len(skb)) { + NAPI_GRO_CB(skb)->flush = 1; + return NULL; + } /* pull encapsulating udp header */ skb_gro_pull(skb, sizeof(struct udphdr)); skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr)); @@ -377,12 +384,12 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head, /* Terminate the flow on len mismatch or if it grow "too much". * Under small packet flood GRO count could elsewhere grow a lot - * leading to execessive truesize values. + * leading to excessive truesize values. * On len mismatch merge the first packet shorter than gso_size, * otherwise complete the GRO packet. */ - if (uh->len > uh2->len || skb_gro_receive(p, skb) || - uh->len != uh2->len || + if (ulen > ntohs(uh2->len) || skb_gro_receive(p, skb) || + ulen != ntohs(uh2->len) || NAPI_GRO_CB(p)->count >= UDP_GRO_CNT_MAX) pp = p; diff --git a/net/rds/tcp.c b/net/rds/tcp.c index faf726e00e27..66121bc6f34e 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -551,7 +551,7 @@ static __net_init int rds_tcp_init_net(struct net *net) tbl = kmemdup(rds_tcp_sysctl_table, sizeof(rds_tcp_sysctl_table), GFP_KERNEL); if (!tbl) { - pr_warn("could not set allocate syctl table\n"); + pr_warn("could not set allocate sysctl table\n"); return -ENOMEM; } rtn->ctl_table = tbl; diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 4c1567854f95..706a160142ea 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -32,6 +32,8 @@ static int cls_cgroup_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct cls_cgroup_head *head = rcu_dereference_bh(tp->root); u32 classid = task_get_classid(skb); + if (unlikely(!head)) + return -1; if (!classid) return -1; if (!tcf_em_tree_match(skb, &head->ematches, NULL)) diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index a13bc351a414..3d021f2aad1c 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -32,6 +32,9 @@ static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp, { struct cls_mall_head *head = rcu_dereference_bh(tp->root); + if (unlikely(!head)) + return -1; + if (tc_skip_sw(head->flags)) return -1; diff --git a/scripts/selinux/genheaders/genheaders.c b/scripts/selinux/genheaders/genheaders.c index 1ceedea847dd..544ca126a8a8 100644 --- a/scripts/selinux/genheaders/genheaders.c +++ b/scripts/selinux/genheaders/genheaders.c @@ -9,7 +9,6 @@ #include <string.h> #include <errno.h> #include <ctype.h> -#include <sys/socket.h> struct security_class_mapping { const char *name; diff --git a/scripts/selinux/mdp/mdp.c b/scripts/selinux/mdp/mdp.c index 073fe7537f6c..6d51b74bc679 100644 --- a/scripts/selinux/mdp/mdp.c +++ b/scripts/selinux/mdp/mdp.c @@ -32,7 +32,6 @@ #include <stdlib.h> #include <unistd.h> #include <string.h> -#include <sys/socket.h> static void usage(char *name) { diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h index bd5fe0d3204a..201f7e588a29 100644 --- a/security/selinux/include/classmap.h +++ b/security/selinux/include/classmap.h @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/capability.h> +#include <linux/socket.h> #define COMMON_FILE_SOCK_PERMS "ioctl", "read", "write", "create", \ "getattr", "setattr", "lock", "relabelfrom", "relabelto", "append", "map" diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index f69d2ee29742..5019cdae5d0b 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -2166,11 +2166,14 @@ TEST(detect_seccomp_filter_flags) SECCOMP_FILTER_FLAG_LOG, SECCOMP_FILTER_FLAG_SPEC_ALLOW, SECCOMP_FILTER_FLAG_NEW_LISTENER }; - unsigned int flag, all_flags; + unsigned int exclusive[] = { + SECCOMP_FILTER_FLAG_TSYNC, + SECCOMP_FILTER_FLAG_NEW_LISTENER }; + unsigned int flag, all_flags, exclusive_mask; int i; long ret; - /* Test detection of known-good filter flags */ + /* Test detection of individual known-good filter flags */ for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) { int bits = 0; @@ -2197,16 +2200,29 @@ TEST(detect_seccomp_filter_flags) all_flags |= flag; } - /* Test detection of all known-good filter flags */ - ret = seccomp(SECCOMP_SET_MODE_FILTER, all_flags, NULL); - EXPECT_EQ(-1, ret); - EXPECT_EQ(EFAULT, errno) { - TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!", - all_flags); + /* + * Test detection of all known-good filter flags combined. But + * for the exclusive flags we need to mask them out and try them + * individually for the "all flags" testing. + */ + exclusive_mask = 0; + for (i = 0; i < ARRAY_SIZE(exclusive); i++) + exclusive_mask |= exclusive[i]; + for (i = 0; i < ARRAY_SIZE(exclusive); i++) { + flag = all_flags & ~exclusive_mask; + flag |= exclusive[i]; + + ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EFAULT, errno) { + TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!", + flag); + } } - /* Test detection of an unknown filter flag */ + /* Test detection of an unknown filter flags, without exclusives. */ flag = -1; + flag &= ~exclusive_mask; ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); EXPECT_EQ(-1, ret); EXPECT_EQ(EINVAL, errno) { |