diff options
178 files changed, 1441 insertions, 934 deletions
@@ -1637,6 +1637,10 @@ S: Panoramastrasse 18 S: D-69126 Heidelberg S: Germany +N: Simon Horman +M: horms@verge.net.au +D: Renesas ARM/ARM64 SoC maintainer + N: Christopher Horn E: chorn@warwick.net D: Miscellaneous sysctl hacks diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 0fa8c0e615c2..5361ebec3361 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -615,8 +615,8 @@ on an IO device and is an example of this type. Protections ----------- -A cgroup is protected to be allocated upto the configured amount of -the resource if the usages of all its ancestors are under their +A cgroup is protected upto the configured amount of the resource +as long as the usages of all its ancestors are under their protected levels. Protections can be hard guarantees or best effort soft boundaries. Protections can also be over-committed in which case only upto the amount available to the parent is protected among @@ -1096,7 +1096,10 @@ PAGE_SIZE multiple when read back. is within its effective min boundary, the cgroup's memory won't be reclaimed under any conditions. If there is no unprotected reclaimable memory available, OOM killer - is invoked. + is invoked. Above the effective min boundary (or + effective low boundary if it is higher), pages are reclaimed + proportionally to the overage, reducing reclaim pressure for + smaller overages. Effective min boundary is limited by memory.min values of all ancestor cgroups. If there is memory.min overcommitment @@ -1118,7 +1121,10 @@ PAGE_SIZE multiple when read back. Best-effort memory protection. If the memory usage of a cgroup is within its effective low boundary, the cgroup's memory won't be reclaimed unless memory can be reclaimed - from unprotected cgroups. + from unprotected cgroups. Above the effective low boundary (or + effective min boundary if it is higher), pages are reclaimed + proportionally to the overage, reducing reclaim pressure for + smaller overages. Effective low boundary is limited by memory.low values of all ancestor cgroups. If there is memory.low overcommitment @@ -2482,8 +2488,10 @@ system performance due to overreclaim, to the point where the feature becomes self-defeating. The memory.low boundary on the other hand is a top-down allocated -reserve. A cgroup enjoys reclaim protection when it's within its low, -which makes delegation of subtrees possible. +reserve. A cgroup enjoys reclaim protection when it's within its +effective low, which makes delegation of subtrees possible. It also +enjoys having reclaim pressure proportional to its overage when +above its effective low. The original high boundary, the hard limit, is defined as a strict limit that can not budge, even if the OOM killer has to be called. diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index c7ac2f3ac99f..a84a83f8881e 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5302,6 +5302,10 @@ the unplug protocol never -- do not unplug even if version check succeeds + xen_legacy_crash [X86,XEN] + Crash from Xen panic notifier, without executing late + panic() code such as dumping handler. + xen_nopvspin [X86,XEN] Disables the ticketlock slowpath using Xen PV optimizations. diff --git a/Documentation/arm64/memory.rst b/Documentation/arm64/memory.rst index b040909e45f8..02e02175e6f5 100644 --- a/Documentation/arm64/memory.rst +++ b/Documentation/arm64/memory.rst @@ -154,11 +154,18 @@ return virtual addresses to userspace from a 48-bit range. Software can "opt-in" to receiving VAs from a 52-bit space by specifying an mmap hint parameter that is larger than 48-bit. + For example: - maybe_high_address = mmap(~0UL, size, prot, flags,...); + +.. code-block:: c + + maybe_high_address = mmap(~0UL, size, prot, flags,...); It is also possible to build a debug kernel that returns addresses from a 52-bit space by enabling the following kernel config options: + +.. code-block:: sh + CONFIG_EXPERT=y && CONFIG_ARM64_FORCE_52BIT=y Note that this option is only intended for debugging applications diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst index fa16a0538dcb..ab0eae1c153a 100644 --- a/Documentation/core-api/index.rst +++ b/Documentation/core-api/index.rst @@ -38,6 +38,7 @@ Core utilities protection-keys ../RCU/index gcc-plugins + symbol-namespaces Interfaces for kernel debugging diff --git a/Documentation/core-api/memory-allocation.rst b/Documentation/core-api/memory-allocation.rst index 7744aa3bf2e0..939e3dfc86e9 100644 --- a/Documentation/core-api/memory-allocation.rst +++ b/Documentation/core-api/memory-allocation.rst @@ -98,6 +98,10 @@ limited. The actual limit depends on the hardware and the kernel configuration, but it is a good practice to use `kmalloc` for objects smaller than page size. +The address of a chunk allocated with `kmalloc` is aligned to at least +ARCH_KMALLOC_MINALIGN bytes. For sizes which are a power of two, the +alignment is also guaranteed to be at least the respective size. + For large allocations you can use :c:func:`vmalloc` and :c:func:`vzalloc`, or directly request pages from the page allocator. The memory allocated by `vmalloc` and related functions is diff --git a/Documentation/kbuild/namespaces.rst b/Documentation/core-api/symbol-namespaces.rst index 982ed7b568ac..982ed7b568ac 100644 --- a/Documentation/kbuild/namespaces.rst +++ b/Documentation/core-api/symbol-namespaces.rst diff --git a/Documentation/dev-tools/kselftest.rst b/Documentation/dev-tools/kselftest.rst index 25604904fa6e..ecdfdc9d4b03 100644 --- a/Documentation/dev-tools/kselftest.rst +++ b/Documentation/dev-tools/kselftest.rst @@ -89,6 +89,22 @@ To build, save output files in a separate directory with KBUILD_OUTPUT :: $ export KBUILD_OUTPUT=/tmp/kselftest; make TARGETS="size timers" kselftest +Additionally you can use the "SKIP_TARGETS" variable on the make command +line to specify one or more targets to exclude from the TARGETS list. + +To run all tests but a single subsystem:: + + $ make -C tools/testing/selftests SKIP_TARGETS=ptrace run_tests + +You can specify multiple tests to skip:: + + $ make SKIP_TARGETS="size timers" kselftest + +You can also specify a restricted list of tests to run together with a +dedicated skiplist:: + + $ make TARGETS="bpf breakpoints size timers" SKIP_TARGETS=bpf kselftest + See the top-level tools/testing/selftests/Makefile for the list of all possible targets. diff --git a/Documentation/process/coding-style.rst b/Documentation/process/coding-style.rst index f4a2198187f9..ada573b7d703 100644 --- a/Documentation/process/coding-style.rst +++ b/Documentation/process/coding-style.rst @@ -56,7 +56,7 @@ instead of ``double-indenting`` the ``case`` labels. E.g.: case 'K': case 'k': mem <<= 10; - /* fall through */ + fallthrough; default: break; } diff --git a/Documentation/process/deprecated.rst b/Documentation/process/deprecated.rst index 053b24a6dd38..179f2a5625a0 100644 --- a/Documentation/process/deprecated.rst +++ b/Documentation/process/deprecated.rst @@ -122,14 +122,27 @@ memory adjacent to the stack (when built without `CONFIG_VMAP_STACK=y`) Implicit switch case fall-through --------------------------------- -The C language allows switch cases to "fall through" when -a "break" statement is missing at the end of a case. This, -however, introduces ambiguity in the code, as it's not always -clear if the missing break is intentional or a bug. As there -have been a long list of flaws `due to missing "break" statements +The C language allows switch cases to "fall-through" when a "break" statement +is missing at the end of a case. This, however, introduces ambiguity in the +code, as it's not always clear if the missing break is intentional or a bug. + +As there have been a long list of flaws `due to missing "break" statements <https://cwe.mitre.org/data/definitions/484.html>`_, we no longer allow -"implicit fall-through". In order to identify an intentional fall-through -case, we have adopted the marking used by static analyzers: a comment -saying `/* Fall through */`. Once the C++17 `__attribute__((fallthrough))` -is more widely handled by C compilers, static analyzers, and IDEs, we can -switch to using that instead. +"implicit fall-through". + +In order to identify intentional fall-through cases, we have adopted a +pseudo-keyword macro 'fallthrough' which expands to gcc's extension +__attribute__((__fallthrough__)). `Statement Attributes +<https://gcc.gnu.org/onlinedocs/gcc/Statement-Attributes.html>`_ + +When the C17/C18 [[fallthrough]] syntax is more commonly supported by +C compilers, static analyzers, and IDEs, we can switch to using that syntax +for the macro pseudo-keyword. + +All switch/case blocks must end in one of: + + break; + fallthrough; + continue; + goto <label>; + return [expression]; diff --git a/MAINTAINERS b/MAINTAINERS index 55199ef7fa74..e9b0a4100a29 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2165,12 +2165,10 @@ F: arch/arm64/boot/dts/realtek/ F: Documentation/devicetree/bindings/arm/realtek.yaml ARM/RENESAS ARM64 ARCHITECTURE -M: Simon Horman <horms@verge.net.au> M: Geert Uytterhoeven <geert+renesas@glider.be> M: Magnus Damm <magnus.damm@gmail.com> L: linux-renesas-soc@vger.kernel.org Q: http://patchwork.kernel.org/project/linux-renesas-soc/list/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/horms/renesas.git next T: git git://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-devel.git next S: Supported F: arch/arm64/boot/dts/renesas/ @@ -2282,12 +2280,10 @@ S: Maintained F: drivers/media/platform/s5p-mfc/ ARM/SHMOBILE ARM ARCHITECTURE -M: Simon Horman <horms@verge.net.au> M: Geert Uytterhoeven <geert+renesas@glider.be> M: Magnus Damm <magnus.damm@gmail.com> L: linux-renesas-soc@vger.kernel.org Q: http://patchwork.kernel.org/project/linux-renesas-soc/list/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/horms/renesas.git next T: git git://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-devel.git next S: Supported F: arch/arm/boot/dts/emev2* @@ -9187,6 +9183,7 @@ M: Pavel Machek <pavel@ucw.cz> R: Dan Murphy <dmurphy@ti.com> L: linux-leds@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/j.anaszewski/linux-leds.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/pavel/linux-leds.git S: Maintained F: Documentation/devicetree/bindings/leds/ F: drivers/leds/ @@ -11547,6 +11544,7 @@ NSDEPS M: Matthias Maennich <maennich@google.com> S: Maintained F: scripts/nsdeps +F: Documentation/core-api/symbol-namespaces.rst NTB AMD DRIVER M: Shyam Sundar S K <Shyam-sundar.S-k@amd.com> @@ -599,7 +599,7 @@ endif # in addition to whatever we do anyway. # Just "make" or "make all" shall build modules as well -ifneq ($(filter all _all modules,$(MAKECMDGOALS)),) +ifneq ($(filter all _all modules nsdeps,$(MAKECMDGOALS)),) KBUILD_MODULES := 1 endif @@ -1217,9 +1217,8 @@ PHONY += kselftest kselftest: $(Q)$(MAKE) -C $(srctree)/tools/testing/selftests run_tests -PHONY += kselftest-clean -kselftest-clean: - $(Q)$(MAKE) -C $(srctree)/tools/testing/selftests clean +kselftest-%: FORCE + $(Q)$(MAKE) -C $(srctree)/tools/testing/selftests $* PHONY += kselftest-merge kselftest-merge: diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index b24df84a1d7a..043b0b18bf7e 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -98,6 +98,7 @@ config CRYPTO_AES_ARM_CE tristate "Accelerated AES using ARMv8 Crypto Extensions" depends on KERNEL_MODE_NEON select CRYPTO_BLKCIPHER + select CRYPTO_LIB_AES select CRYPTO_SIMD help Use an implementation of AES in CBC, CTR and XTS modes that uses diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S index b978cdf133af..4d1707388d94 100644 --- a/arch/arm/crypto/aes-ce-core.S +++ b/arch/arm/crypto/aes-ce-core.S @@ -9,6 +9,7 @@ #include <asm/assembler.h> .text + .arch armv8-a .fpu crypto-neon-fp-armv8 .align 3 diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 41a9b4257b72..950a56b71ff0 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -110,7 +110,6 @@ config ARM64 select GENERIC_STRNLEN_USER select GENERIC_TIME_VSYSCALL select GENERIC_GETTIMEOFDAY - select GENERIC_COMPAT_VDSO if (!CPU_BIG_ENDIAN && COMPAT) select HANDLE_DOMAIN_IRQ select HARDIRQS_SW_RESEND select HAVE_PCI @@ -1159,7 +1158,7 @@ menuconfig COMPAT if COMPAT config KUSER_HELPERS - bool "Enable kuser helpers page for 32 bit applications" + bool "Enable kuser helpers page for 32-bit applications" default y help Warning: disabling this option may break 32-bit user programs. @@ -1185,6 +1184,18 @@ config KUSER_HELPERS Say N here only if you are absolutely certain that you do not need these helpers; otherwise, the safe option is to say Y. +config COMPAT_VDSO + bool "Enable vDSO for 32-bit applications" + depends on !CPU_BIG_ENDIAN && "$(CROSS_COMPILE_COMPAT)" != "" + select GENERIC_COMPAT_VDSO + default y + help + Place in the process address space of 32-bit applications an + ELF shared object providing fast implementations of gettimeofday + and clock_gettime. + + You must have a 32-bit build of glibc 2.22 or later for programs + to seamlessly take advantage of this. menuconfig ARMV8_DEPRECATED bool "Emulate deprecated/obsolete ARMv8 instructions" diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 84a3d502c5a5..2c0238ce0551 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -53,22 +53,6 @@ $(warning Detected assembler with broken .inst; disassembly will be unreliable) endif endif -ifeq ($(CONFIG_GENERIC_COMPAT_VDSO), y) - CROSS_COMPILE_COMPAT ?= $(CONFIG_CROSS_COMPILE_COMPAT_VDSO:"%"=%) - - ifeq ($(CONFIG_CC_IS_CLANG), y) - $(warning CROSS_COMPILE_COMPAT is clang, the compat vDSO will not be built) - else ifeq ($(strip $(CROSS_COMPILE_COMPAT)),) - $(warning CROSS_COMPILE_COMPAT not defined or empty, the compat vDSO will not be built) - else ifeq ($(shell which $(CROSS_COMPILE_COMPAT)gcc 2> /dev/null),) - $(error $(CROSS_COMPILE_COMPAT)gcc not found, check CROSS_COMPILE_COMPAT) - else - export CROSS_COMPILE_COMPAT - export CONFIG_COMPAT_VDSO := y - compat_vdso := -DCONFIG_COMPAT_VDSO=1 - endif -endif - KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) $(brokengasinst) \ $(compat_vdso) $(cc_has_k_constraint) KBUILD_CFLAGS += -fno-asynchronous-unwind-tables diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index c6bd87d2915b..574808b9df4c 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -321,7 +321,8 @@ static inline s64 __lse_atomic64_dec_if_positive(atomic64_t *v) } #define __CMPXCHG_CASE(w, sfx, name, sz, mb, cl...) \ -static inline u##sz __lse__cmpxchg_case_##name##sz(volatile void *ptr, \ +static __always_inline u##sz \ +__lse__cmpxchg_case_##name##sz(volatile void *ptr, \ u##sz old, \ u##sz new) \ { \ @@ -362,7 +363,8 @@ __CMPXCHG_CASE(x, , mb_, 64, al, "memory") #undef __CMPXCHG_CASE #define __CMPXCHG_DBL(name, mb, cl...) \ -static inline long __lse__cmpxchg_double##name(unsigned long old1, \ +static __always_inline long \ +__lse__cmpxchg_double##name(unsigned long old1, \ unsigned long old2, \ unsigned long new1, \ unsigned long new2, \ diff --git a/arch/arm64/include/asm/vdso/compat_barrier.h b/arch/arm64/include/asm/vdso/compat_barrier.h index fb60a88b5ed4..3fd8fd6d8fc2 100644 --- a/arch/arm64/include/asm/vdso/compat_barrier.h +++ b/arch/arm64/include/asm/vdso/compat_barrier.h @@ -20,7 +20,7 @@ #define dmb(option) __asm__ __volatile__ ("dmb " #option : : : "memory") -#if __LINUX_ARM_ARCH__ >= 8 +#if __LINUX_ARM_ARCH__ >= 8 && defined(CONFIG_AS_DMB_ISHLD) #define aarch32_smp_mb() dmb(ish) #define aarch32_smp_rmb() dmb(ishld) #define aarch32_smp_wmb() dmb(ishst) diff --git a/arch/arm64/include/asm/vdso_datapage.h b/arch/arm64/include/asm/vdso_datapage.h deleted file mode 100644 index 1f38bf330a6e..000000000000 --- a/arch/arm64/include/asm/vdso_datapage.h +++ /dev/null @@ -1,33 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2012 ARM Limited - */ -#ifndef __ASM_VDSO_DATAPAGE_H -#define __ASM_VDSO_DATAPAGE_H - -#ifndef __ASSEMBLY__ - -struct vdso_data { - __u64 cs_cycle_last; /* Timebase at clocksource init */ - __u64 raw_time_sec; /* Raw time */ - __u64 raw_time_nsec; - __u64 xtime_clock_sec; /* Kernel time */ - __u64 xtime_clock_nsec; - __u64 xtime_coarse_sec; /* Coarse time */ - __u64 xtime_coarse_nsec; - __u64 wtm_clock_sec; /* Wall to monotonic time */ - __u64 wtm_clock_nsec; - __u32 tb_seq_count; /* Timebase sequence counter */ - /* cs_* members must be adjacent and in this order (ldp accesses) */ - __u32 cs_mono_mult; /* NTP-adjusted clocksource multiplier */ - __u32 cs_shift; /* Clocksource shift (mono = raw) */ - __u32 cs_raw_mult; /* Raw clocksource multiplier */ - __u32 tz_minuteswest; /* Whacky timezone stuff */ - __u32 tz_dsttime; - __u32 use_syscall; - __u32 hrtimer_res; -}; - -#endif /* !__ASSEMBLY__ */ - -#endif /* __ASM_VDSO_DATAPAGE_H */ diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 2ec09debc2bb..ca158be21f83 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -174,6 +174,9 @@ static void __init register_insn_emulation(struct insn_emulation_ops *ops) struct insn_emulation *insn; insn = kzalloc(sizeof(*insn), GFP_KERNEL); + if (!insn) + return; + insn->ops = ops; insn->min = INSN_UNDEF; @@ -233,6 +236,8 @@ static void __init register_insn_emulation_sysctl(void) insns_sysctl = kcalloc(nr_insn_emulated + 1, sizeof(*sysctl), GFP_KERNEL); + if (!insns_sysctl) + return; raw_spin_lock_irqsave(&insn_emulation_lock, flags); list_for_each_entry(insn, &insn_emulation, node) { diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 1e43ba5c79b7..f593f4cffc0d 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -128,8 +128,8 @@ static void install_bp_hardening_cb(bp_hardening_cb_t fn, int cpu, slot = -1; /* - * enable_smccc_arch_workaround_1() passes NULL for the hyp_vecs - * start/end if we're a guest. Skip the hyp-vectors work. + * detect_harden_bp_fw() passes NULL for the hyp_vecs start/end if + * we're a guest. Skip the hyp-vectors work. */ if (!hyp_vecs_start) { __this_cpu_write(bp_hardening_data.fn, fn); diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 9323bcc40a58..cabebf1a7976 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -136,6 +136,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_SB_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_FRINTTS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_GPI_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 84a822748c84..e304fe04b098 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -775,6 +775,7 @@ el0_sync_compat: b.ge el0_dbg b el0_inv el0_svc_compat: + gic_prio_kentry_setup tmp=x1 mov x0, sp bl el0_svc_compat_handler b ret_to_user diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 171773257974..06e56b470315 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -121,10 +121,16 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) /* * Ensure updated trampoline is visible to instruction - * fetch before we patch in the branch. + * fetch before we patch in the branch. Although the + * architecture doesn't require an IPI in this case, + * Neoverse-N1 erratum #1542419 does require one + * if the TLB maintenance in module_enable_ro() is + * skipped due to rodata_enabled. It doesn't seem worth + * it to make it conditional given that this is + * certainly not a fast-path. */ - __flush_icache_range((unsigned long)&dst[0], - (unsigned long)&dst[1]); + flush_icache_range((unsigned long)&dst[0], + (unsigned long)&dst[1]); } addr = (unsigned long)dst; #else /* CONFIG_ARM64_MODULE_PLTS */ diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index a47462def04b..1fb2819fc048 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -332,22 +332,27 @@ void arch_release_task_struct(struct task_struct *tsk) fpsimd_release_task(tsk); } -/* - * src and dst may temporarily have aliased sve_state after task_struct - * is copied. We cannot fix this properly here, because src may have - * live SVE state and dst's thread_info may not exist yet, so tweaking - * either src's or dst's TIF_SVE is not safe. - * - * The unaliasing is done in copy_thread() instead. This works because - * dst is not schedulable or traceable until both of these functions - * have been called. - */ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { if (current->mm) fpsimd_preserve_current_state(); *dst = *src; + /* We rely on the above assignment to initialize dst's thread_flags: */ + BUILD_BUG_ON(!IS_ENABLED(CONFIG_THREAD_INFO_IN_TASK)); + + /* + * Detach src's sve_state (if any) from dst so that it does not + * get erroneously used or freed prematurely. dst's sve_state + * will be allocated on demand later on if dst uses SVE. + * For consistency, also clear TIF_SVE here: this could be done + * later in copy_process(), but to avoid tripping up future + * maintainers it is best not to leave TIF_SVE and sve_state in + * an inconsistent state, even temporarily. + */ + dst->thread.sve_state = NULL; + clear_tsk_thread_flag(dst, TIF_SVE); + return 0; } @@ -361,13 +366,6 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context)); /* - * Unalias p->thread.sve_state (if any) from the parent task - * and disable discard SVE state for p: - */ - clear_tsk_thread_flag(p, TIF_SVE); - p->thread.sve_state = NULL; - - /* * In case p was allocated the same task_struct pointer as some * other recently-exited task, make sure p is disassociated from * any cpu that may have run that now-exited task recently. diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S deleted file mode 100644 index e69de29bb2d1..000000000000 --- a/arch/arm64/kernel/vdso/gettimeofday.S +++ /dev/null diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 1fba0776ed40..76b327f88fbb 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -8,15 +8,21 @@ ARCH_REL_TYPE_ABS := R_ARM_JUMP_SLOT|R_ARM_GLOB_DAT|R_ARM_ABS32 include $(srctree)/lib/vdso/Makefile -COMPATCC := $(CROSS_COMPILE_COMPAT)gcc +# Same as cc-*option, but using CC_COMPAT instead of CC +ifeq ($(CONFIG_CC_IS_CLANG), y) +CC_COMPAT ?= $(CC) +else +CC_COMPAT ?= $(CROSS_COMPILE_COMPAT)gcc +endif -# Same as cc-*option, but using COMPATCC instead of CC cc32-option = $(call try-run,\ - $(COMPATCC) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2)) + $(CC_COMPAT) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2)) cc32-disable-warning = $(call try-run,\ - $(COMPATCC) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1))) + $(CC_COMPAT) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1))) cc32-ldoption = $(call try-run,\ - $(COMPATCC) $(1) -nostdlib -x c /dev/null -o "$$TMP",$(1),$(2)) + $(CC_COMPAT) $(1) -nostdlib -x c /dev/null -o "$$TMP",$(1),$(2)) +cc32-as-instr = $(call try-run,\ + printf "%b\n" "$(1)" | $(CC_COMPAT) $(VDSO_AFLAGS) -c -x assembler -o "$$TMP" -,$(2),$(3)) # We cannot use the global flags to compile the vDSO files, the main reason # being that the 32-bit compiler may be older than the main (64-bit) compiler @@ -25,22 +31,21 @@ cc32-ldoption = $(call try-run,\ # arm64 one. # As a result we set our own flags here. -# From top-level Makefile -# NOSTDINC_FLAGS -VDSO_CPPFLAGS := -nostdinc -isystem $(shell $(COMPATCC) -print-file-name=include) +# KBUILD_CPPFLAGS and NOSTDINC_FLAGS from top-level Makefile +VDSO_CPPFLAGS := -D__KERNEL__ -nostdinc -isystem $(shell $(CC_COMPAT) -print-file-name=include) VDSO_CPPFLAGS += $(LINUXINCLUDE) -VDSO_CPPFLAGS += $(KBUILD_CPPFLAGS) # Common C and assembly flags # From top-level Makefile VDSO_CAFLAGS := $(VDSO_CPPFLAGS) +ifneq ($(shell $(CC_COMPAT) --version 2>&1 | head -n 1 | grep clang),) +VDSO_CAFLAGS += --target=$(notdir $(CROSS_COMPILE_COMPAT:%-=%)) +endif + VDSO_CAFLAGS += $(call cc32-option,-fno-PIE) ifdef CONFIG_DEBUG_INFO VDSO_CAFLAGS += -g endif -ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(COMPATCC)), y) -VDSO_CAFLAGS += -DCC_HAVE_ASM_GOTO -endif # From arm Makefile VDSO_CAFLAGS += $(call cc32-option,-fno-dwarf2-cfi-asm) @@ -55,6 +60,7 @@ endif VDSO_CAFLAGS += -fPIC -fno-builtin -fno-stack-protector VDSO_CAFLAGS += -DDISABLE_BRANCH_PROFILING + # Try to compile for ARMv8. If the compiler is too old and doesn't support it, # fall back to v7. There is no easy way to check for what architecture the code # is being compiled, so define a macro specifying that (see arch/arm/Makefile). @@ -91,6 +97,12 @@ VDSO_CFLAGS += -Wno-int-to-pointer-cast VDSO_AFLAGS := $(VDSO_CAFLAGS) VDSO_AFLAGS += -D__ASSEMBLY__ +# Check for binutils support for dmb ishld +dmbinstr := $(call cc32-as-instr,dmb ishld,-DCONFIG_AS_DMB_ISHLD=1) + +VDSO_CFLAGS += $(dmbinstr) +VDSO_AFLAGS += $(dmbinstr) + VDSO_LDFLAGS := $(VDSO_CPPFLAGS) # From arm vDSO Makefile VDSO_LDFLAGS += -Wl,-Bsymbolic -Wl,--no-undefined -Wl,-soname=linux-vdso.so.1 @@ -159,14 +171,14 @@ quiet_cmd_vdsold_and_vdso_check = LD32 $@ cmd_vdsold_and_vdso_check = $(cmd_vdsold); $(cmd_vdso_check) quiet_cmd_vdsold = LD32 $@ - cmd_vdsold = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_LDFLAGS) \ + cmd_vdsold = $(CC_COMPAT) -Wp,-MD,$(depfile) $(VDSO_LDFLAGS) \ -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) -o $@ quiet_cmd_vdsocc = CC32 $@ - cmd_vdsocc = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) -c -o $@ $< + cmd_vdsocc = $(CC_COMPAT) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) -c -o $@ $< quiet_cmd_vdsocc_gettimeofday = CC32 $@ - cmd_vdsocc_gettimeofday = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) $(VDSO_CFLAGS_gettimeofday_o) -c -o $@ $< + cmd_vdsocc_gettimeofday = $(CC_COMPAT) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) $(VDSO_CFLAGS_gettimeofday_o) -c -o $@ $< quiet_cmd_vdsoas = AS32 $@ - cmd_vdsoas = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_AFLAGS) -c -o $@ $< + cmd_vdsoas = $(CC_COMPAT) -Wp,-MD,$(depfile) $(VDSO_AFLAGS) -c -o $@ $< quiet_cmd_vdsomunge = MUNGE $@ cmd_vdsomunge = $(obj)/$(munge) $< $@ diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 115d7a0e4b08..855f2a7954e6 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -113,6 +113,15 @@ static inline bool is_ttbr1_addr(unsigned long addr) return arch_kasan_reset_tag(addr) >= PAGE_OFFSET; } +static inline unsigned long mm_to_pgd_phys(struct mm_struct *mm) +{ + /* Either init_pg_dir or swapper_pg_dir */ + if (mm == &init_mm) + return __pa_symbol(mm->pgd); + + return (unsigned long)virt_to_phys(mm->pgd); +} + /* * Dump out the page tables associated with 'addr' in the currently active mm. */ @@ -141,7 +150,7 @@ static void show_pte(unsigned long addr) pr_alert("%s pgtable: %luk pages, %llu-bit VAs, pgdp=%016lx\n", mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K, - vabits_actual, (unsigned long)virt_to_phys(mm->pgd)); + vabits_actual, mm_to_pgd_phys(mm)); pgdp = pgd_offset(mm, addr); pgd = READ_ONCE(*pgdp); pr_alert("[%016lx] pgd=%016llx", addr, pgd_val(pgd)); @@ -266,7 +275,7 @@ static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr, * If we got a different type of fault from the AT instruction, * treat the translation fault as spurious. */ - dfsc = FIELD_PREP(SYS_PAR_EL1_FST, par); + dfsc = FIELD_GET(SYS_PAR_EL1_FST, par); return (dfsc & ESR_ELx_FSC_TYPE) != ESR_ELx_FSC_FAULT; } diff --git a/arch/mips/fw/sni/sniprom.c b/arch/mips/fw/sni/sniprom.c index 8772617b64ce..80112f2298b6 100644 --- a/arch/mips/fw/sni/sniprom.c +++ b/arch/mips/fw/sni/sniprom.c @@ -43,7 +43,7 @@ /* O32 stack has to be 8-byte aligned. */ static u64 o32_stk[4096]; -#define O32_STK &o32_stk[sizeof(o32_stk)] +#define O32_STK (&o32_stk[ARRAY_SIZE(o32_stk)]) #define __PROM_O32(fun, arg) fun arg __asm__(#fun); \ __asm__(#fun " = call_o32") diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h index 79bf34efbc04..f6136871561d 100644 --- a/arch/mips/include/asm/cmpxchg.h +++ b/arch/mips/include/asm/cmpxchg.h @@ -77,8 +77,8 @@ extern unsigned long __xchg_called_with_bad_pointer(void) extern unsigned long __xchg_small(volatile void *ptr, unsigned long val, unsigned int size); -static inline unsigned long __xchg(volatile void *ptr, unsigned long x, - int size) +static __always_inline +unsigned long __xchg(volatile void *ptr, unsigned long x, int size) { switch (size) { case 1: @@ -153,8 +153,9 @@ static inline unsigned long __xchg(volatile void *ptr, unsigned long x, extern unsigned long __cmpxchg_small(volatile void *ptr, unsigned long old, unsigned long new, unsigned int size); -static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, - unsigned long new, unsigned int size) +static __always_inline +unsigned long __cmpxchg(volatile void *ptr, unsigned long old, + unsigned long new, unsigned int size) { switch (size) { case 1: diff --git a/arch/mips/include/uapi/asm/hwcap.h b/arch/mips/include/uapi/asm/hwcap.h index a2aba4b059e6..1ade1daa4921 100644 --- a/arch/mips/include/uapi/asm/hwcap.h +++ b/arch/mips/include/uapi/asm/hwcap.h @@ -6,5 +6,16 @@ #define HWCAP_MIPS_R6 (1 << 0) #define HWCAP_MIPS_MSA (1 << 1) #define HWCAP_MIPS_CRC32 (1 << 2) +#define HWCAP_MIPS_MIPS16 (1 << 3) +#define HWCAP_MIPS_MDMX (1 << 4) +#define HWCAP_MIPS_MIPS3D (1 << 5) +#define HWCAP_MIPS_SMARTMIPS (1 << 6) +#define HWCAP_MIPS_DSP (1 << 7) +#define HWCAP_MIPS_DSP2 (1 << 8) +#define HWCAP_MIPS_DSP3 (1 << 9) +#define HWCAP_MIPS_MIPS16E2 (1 << 10) +#define HWCAP_LOONGSON_MMI (1 << 11) +#define HWCAP_LOONGSON_EXT (1 << 12) +#define HWCAP_LOONGSON_EXT2 (1 << 13) #endif /* _UAPI_ASM_HWCAP_H */ diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index c2eb392597bf..f521cbf934e7 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -2180,6 +2180,39 @@ void cpu_probe(void) elf_hwcap |= HWCAP_MIPS_MSA; } + if (cpu_has_mips16) + elf_hwcap |= HWCAP_MIPS_MIPS16; + + if (cpu_has_mdmx) + elf_hwcap |= HWCAP_MIPS_MDMX; + + if (cpu_has_mips3d) + elf_hwcap |= HWCAP_MIPS_MIPS3D; + + if (cpu_has_smartmips) + elf_hwcap |= HWCAP_MIPS_SMARTMIPS; + + if (cpu_has_dsp) + elf_hwcap |= HWCAP_MIPS_DSP; + + if (cpu_has_dsp2) + elf_hwcap |= HWCAP_MIPS_DSP2; + + if (cpu_has_dsp3) + elf_hwcap |= HWCAP_MIPS_DSP3; + + if (cpu_has_mips16e2) + elf_hwcap |= HWCAP_MIPS_MIPS16E2; + + if (cpu_has_loongson_mmi) + elf_hwcap |= HWCAP_LOONGSON_MMI; + + if (cpu_has_loongson_ext) + elf_hwcap |= HWCAP_LOONGSON_EXT; + + if (cpu_has_loongson_ext2) + elf_hwcap |= HWCAP_LOONGSON_EXT2; + if (cpu_has_vz) cpu_probe_vz(c); diff --git a/arch/mips/loongson64/Platform b/arch/mips/loongson64/Platform index c1a4d4dc4665..9f79908f5063 100644 --- a/arch/mips/loongson64/Platform +++ b/arch/mips/loongson64/Platform @@ -66,6 +66,10 @@ else $(call cc-option,-march=mips64r2,-mips64r2 -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64) endif +# Some -march= flags enable MMI instructions, and GCC complains about that +# support being enabled alongside -msoft-float. Thus explicitly disable MMI. +cflags-y += $(call cc-option,-mno-loongson-mmi) + # # Loongson Machines' Support # diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile index 807f0f782f75..996a934ece7d 100644 --- a/arch/mips/vdso/Makefile +++ b/arch/mips/vdso/Makefile @@ -15,6 +15,7 @@ ccflags-vdso := \ $(filter -mmicromips,$(KBUILD_CFLAGS)) \ $(filter -march=%,$(KBUILD_CFLAGS)) \ $(filter -m%-float,$(KBUILD_CFLAGS)) \ + $(filter -mno-loongson-%,$(KBUILD_CFLAGS)) \ -D__VDSO__ ifdef CONFIG_CC_IS_CLANG diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h index 4ce795d30377..ca8db193ae38 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h @@ -35,6 +35,10 @@ static inline void radix__flush_all_lpid(unsigned int lpid) { WARN_ON(1); } +static inline void radix__flush_all_lpid_guest(unsigned int lpid) +{ + WARN_ON(1); +} #endif extern void radix__flush_hugetlb_tlb_range(struct vm_area_struct *vma, diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 74a9cfe84aee..faebcbb8c4db 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1921,6 +1921,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mtspr SPRN_PCR, r6 18: /* Signal secondary CPUs to continue */ + li r0, 0 stb r0,VCORE_IN_GUEST(r5) 19: lis r8,0x7fff /* MAX_INT@h */ mtspr SPRN_HDEC,r8 diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 1d93e55a2de1..2dd452a047cd 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -761,6 +761,7 @@ static int spufs_init_fs_context(struct fs_context *fc) ctx->gid = current_gid(); ctx->mode = 0755; + fc->fs_private = ctx; fc->s_fs_info = sbi; fc->ops = &spufs_context_ops; return 0; diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index b53359258d99..f87a5c64e24d 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -1419,6 +1419,9 @@ void __init pseries_lpar_read_hblkrm_characteristics(void) unsigned char local_buffer[SPLPAR_TLB_BIC_MAXLENGTH]; int call_status, len, idx, bpsize; + if (!firmware_has_feature(FW_FEATURE_BLOCK_REMOVE)) + return; + spin_lock(&rtas_data_buf_lock); memset(rtas_data_buf, 0, RTAS_DATA_BUF_SIZE); call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1, diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 2d592da1e776..8ca479831142 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -273,12 +273,11 @@ restore_all: resume_kernel: REG_L s0, TASK_TI_PREEMPT_COUNT(tp) bnez s0, restore_all -need_resched: REG_L s0, TASK_TI_FLAGS(tp) andi s0, s0, _TIF_NEED_RESCHED beqz s0, restore_all call preempt_schedule_irq - j need_resched + j restore_all #endif work_pending: diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 424eb72d56b1..93742df9067f 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -124,24 +124,24 @@ static inline unsigned long get_break_insn_length(unsigned long pc) asmlinkage void do_trap_break(struct pt_regs *regs) { -#ifdef CONFIG_GENERIC_BUG if (!user_mode(regs)) { enum bug_trap_type type; type = report_bug(regs->sepc, regs); switch (type) { - case BUG_TRAP_TYPE_NONE: - break; +#ifdef CONFIG_GENERIC_BUG case BUG_TRAP_TYPE_WARN: regs->sepc += get_break_insn_length(regs->sepc); - break; + return; case BUG_TRAP_TYPE_BUG: +#endif /* CONFIG_GENERIC_BUG */ + default: die(regs, "Kernel BUG"); } + } else { + force_sig_fault(SIGTRAP, TRAP_BRKPT, + (void __user *)(regs->sepc)); } -#endif /* CONFIG_GENERIC_BUG */ - - force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)(regs->sepc)); } #ifdef CONFIG_GENERIC_BUG diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index bd2fd9a7821d..a470f1fa9f2a 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -83,7 +83,7 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n); __rc; \ }) -static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) +static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) { unsigned long spec = 0x010000UL; int rc; @@ -113,7 +113,7 @@ static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) return rc; } -static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size) +static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size) { unsigned long spec = 0x01UL; int rc; diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h index cff3f3f3bfe0..8348f7d69fd5 100644 --- a/arch/x86/include/asm/cpu_entry_area.h +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_CPU_ENTRY_AREA_H #define _ASM_X86_CPU_ENTRY_AREA_H diff --git a/arch/x86/include/asm/pti.h b/arch/x86/include/asm/pti.h index 5df09a0b80b8..07375b476c4f 100644 --- a/arch/x86/include/asm/pti.h +++ b/arch/x86/include/asm/pti.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_PTI_H #define _ASM_X86_PTI_H #ifndef __ASSEMBLY__ diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 35c225ede0e4..61d93f062a36 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -734,5 +734,28 @@ do { \ if (unlikely(__gu_err)) goto err_label; \ } while (0) +/* + * We want the unsafe accessors to always be inlined and use + * the error labels - thus the macro games. + */ +#define unsafe_copy_loop(dst, src, len, type, label) \ + while (len >= sizeof(type)) { \ + unsafe_put_user(*(type *)src,(type __user *)dst,label); \ + dst += sizeof(type); \ + src += sizeof(type); \ + len -= sizeof(type); \ + } + +#define unsafe_copy_to_user(_dst,_src,_len,label) \ +do { \ + char __user *__ucu_dst = (_dst); \ + const char *__ucu_src = (_src); \ + size_t __ucu_len = (_len); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u64, label); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u32, label); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u16, label); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u8, label); \ +} while (0) + #endif /* _ASM_X86_UACCESS_H */ diff --git a/arch/x86/kernel/process.h b/arch/x86/kernel/process.h index 320ab978fb1f..1d0797b2338a 100644 --- a/arch/x86/kernel/process.h +++ b/arch/x86/kernel/process.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ // // Code shared between 32 and 64 bit diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 750f46ad018a..205b1176084f 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -269,19 +269,41 @@ void xen_reboot(int reason) BUG(); } +static int reboot_reason = SHUTDOWN_reboot; +static bool xen_legacy_crash; void xen_emergency_restart(void) { - xen_reboot(SHUTDOWN_reboot); + xen_reboot(reboot_reason); } static int xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr) { - if (!kexec_crash_loaded()) - xen_reboot(SHUTDOWN_crash); + if (!kexec_crash_loaded()) { + if (xen_legacy_crash) + xen_reboot(SHUTDOWN_crash); + + reboot_reason = SHUTDOWN_crash; + + /* + * If panic_timeout==0 then we are supposed to wait forever. + * However, to preserve original dom0 behavior we have to drop + * into hypervisor. (domU behavior is controlled by its + * config file) + */ + if (panic_timeout == 0) + panic_timeout = -1; + } return NOTIFY_DONE; } +static int __init parse_xen_legacy_crash(char *arg) +{ + xen_legacy_crash = true; + return 0; +} +early_param("xen_legacy_crash", parse_xen_legacy_crash); + static struct notifier_block xen_panic_block = { .notifier_call = xen_panic_event, .priority = INT_MIN diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c index 61b635bc2a31..656460636ad3 100644 --- a/block/blk-rq-qos.c +++ b/block/blk-rq-qos.c @@ -160,24 +160,27 @@ bool rq_depth_calc_max_depth(struct rq_depth *rqd) return ret; } -void rq_depth_scale_up(struct rq_depth *rqd) +/* Returns true on success and false if scaling up wasn't possible */ +bool rq_depth_scale_up(struct rq_depth *rqd) { /* * Hit max in previous round, stop here */ if (rqd->scaled_max) - return; + return false; rqd->scale_step--; rqd->scaled_max = rq_depth_calc_max_depth(rqd); + return true; } /* * Scale rwb down. If 'hard_throttle' is set, do it quicker, since we - * had a latency violation. + * had a latency violation. Returns true on success and returns false if + * scaling down wasn't possible. */ -void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle) +bool rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle) { /* * Stop scaling down when we've hit the limit. This also prevents @@ -185,7 +188,7 @@ void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle) * keep up. */ if (rqd->max_depth == 1) - return; + return false; if (rqd->scale_step < 0 && hard_throttle) rqd->scale_step = 0; @@ -194,6 +197,7 @@ void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle) rqd->scaled_max = false; rq_depth_calc_max_depth(rqd); + return true; } struct rq_qos_wait_data { diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h index 08a09dbe0f4b..e8cb68f6958a 100644 --- a/block/blk-rq-qos.h +++ b/block/blk-rq-qos.h @@ -130,8 +130,8 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data, acquire_inflight_cb_t *acquire_inflight_cb, cleanup_cb_t *cleanup_cb); bool rq_wait_inc_below(struct rq_wait *rq_wait, unsigned int limit); -void rq_depth_scale_up(struct rq_depth *rqd); -void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle); +bool rq_depth_scale_up(struct rq_depth *rqd); +bool rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle); bool rq_depth_calc_max_depth(struct rq_depth *rqd); void __rq_qos_cleanup(struct rq_qos *rqos, struct bio *bio); diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 8af553a0ba00..8641ba9793c5 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -308,7 +308,8 @@ static void calc_wb_limits(struct rq_wb *rwb) static void scale_up(struct rq_wb *rwb) { - rq_depth_scale_up(&rwb->rq_depth); + if (!rq_depth_scale_up(&rwb->rq_depth)) + return; calc_wb_limits(rwb); rwb->unknown_cnt = 0; rwb_wake_all(rwb); @@ -317,7 +318,8 @@ static void scale_up(struct rq_wb *rwb) static void scale_down(struct rq_wb *rwb, bool hard_throttle) { - rq_depth_scale_down(&rwb->rq_depth, hard_throttle); + if (!rq_depth_scale_down(&rwb->rq_depth, hard_throttle)) + return; calc_wb_limits(rwb); rwb->unknown_cnt = 0; rwb_trace_step(rwb, "scale down"); diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 76d0f9de767b..58e09ffe8b9c 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -4791,27 +4791,6 @@ void ata_scsi_hotplug(struct work_struct *work) return; } - /* - * XXX - UGLY HACK - * - * The block layer suspend/resume path is fundamentally broken due - * to freezable kthreads and workqueue and may deadlock if a block - * device gets removed while resume is in progress. I don't know - * what the solution is short of removing freezable kthreads and - * workqueues altogether. - * - * The following is an ugly hack to avoid kicking off device - * removal while freezer is active. This is a joke but does avoid - * this particular deadlock scenario. - * - * https://bugzilla.kernel.org/show_bug.cgi?id=62801 - * http://marc.info/?l=linux-kernel&m=138695698516487 - */ -#ifdef CONFIG_FREEZER - while (pm_freezing) - msleep(10); -#endif - DPRINTK("ENTER\n"); mutex_lock(&ap->scsi_scan_mutex); diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index ac07e8c94c79..478aa86fc1f2 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -248,8 +248,8 @@ static void nbd_put(struct nbd_device *nbd) if (refcount_dec_and_mutex_lock(&nbd->refs, &nbd_index_mutex)) { idr_remove(&nbd_index_idr, nbd->index); - mutex_unlock(&nbd_index_mutex); nbd_dev_remove(nbd); + mutex_unlock(&nbd_index_mutex); } } diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c index eabc116832a7..3d7fdea872f8 100644 --- a/drivers/block/null_blk_zoned.c +++ b/drivers/block/null_blk_zoned.c @@ -142,8 +142,7 @@ static blk_status_t null_zone_reset(struct nullb_cmd *cmd, sector_t sector) zone->wp = zone->start; break; default: - cmd->error = BLK_STS_NOTSUPP; - break; + return BLK_STS_NOTSUPP; } return BLK_STS_OK; } diff --git a/drivers/gpio/gpio-eic-sprd.c b/drivers/gpio/gpio-eic-sprd.c index fe7a73f52329..bb287f35cf40 100644 --- a/drivers/gpio/gpio-eic-sprd.c +++ b/drivers/gpio/gpio-eic-sprd.c @@ -530,11 +530,12 @@ static void sprd_eic_handle_one_type(struct gpio_chip *chip) } for_each_set_bit(n, ®, SPRD_EIC_PER_BANK_NR) { - girq = irq_find_mapping(chip->irq.domain, - bank * SPRD_EIC_PER_BANK_NR + n); + u32 offset = bank * SPRD_EIC_PER_BANK_NR + n; + + girq = irq_find_mapping(chip->irq.domain, offset); generic_handle_irq(girq); - sprd_eic_toggle_trigger(chip, girq, n); + sprd_eic_toggle_trigger(chip, girq, offset); } } } diff --git a/drivers/gpio/gpio-max77620.c b/drivers/gpio/gpio-max77620.c index 47d05e357e61..faf86ea9c51a 100644 --- a/drivers/gpio/gpio-max77620.c +++ b/drivers/gpio/gpio-max77620.c @@ -192,13 +192,13 @@ static int max77620_gpio_set_debounce(struct max77620_gpio *mgpio, case 0: val = MAX77620_CNFG_GPIO_DBNC_None; break; - case 1 ... 8: + case 1000 ... 8000: val = MAX77620_CNFG_GPIO_DBNC_8ms; break; - case 9 ... 16: + case 9000 ... 16000: val = MAX77620_CNFG_GPIO_DBNC_16ms; break; - case 17 ... 32: + case 17000 ... 32000: val = MAX77620_CNFG_GPIO_DBNC_32ms; break; default: diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index 1eea2c6c2e1d..80ea49f570f4 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -317,7 +317,7 @@ struct gpio_desc *gpiod_get_from_of_node(struct device_node *node, transitory = flags & OF_GPIO_TRANSITORY; ret = gpiod_request(desc, label); - if (ret == -EBUSY && (flags & GPIOD_FLAGS_BIT_NONEXCLUSIVE)) + if (ret == -EBUSY && (dflags & GPIOD_FLAGS_BIT_NONEXCLUSIVE)) return desc; if (ret) return ERR_PTR(ret); diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index bdbc1649eafa..5833e4f380d6 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -3070,8 +3070,10 @@ int gpiod_direction_output(struct gpio_desc *desc, int value) if (!ret) goto set_output_value; /* Emulate open drain by not actively driving the line high */ - if (value) - return gpiod_direction_input(desc); + if (value) { + ret = gpiod_direction_input(desc); + goto set_output_flag; + } } else if (test_bit(FLAG_OPEN_SOURCE, &desc->flags)) { ret = gpio_set_config(gc, gpio_chip_hwgpio(desc), @@ -3079,8 +3081,10 @@ int gpiod_direction_output(struct gpio_desc *desc, int value) if (!ret) goto set_output_value; /* Emulate open source by not actively driving the line low */ - if (!value) - return gpiod_direction_input(desc); + if (!value) { + ret = gpiod_direction_input(desc); + goto set_output_flag; + } } else { gpio_set_config(gc, gpio_chip_hwgpio(desc), PIN_CONFIG_DRIVE_PUSH_PULL); @@ -3088,6 +3092,17 @@ int gpiod_direction_output(struct gpio_desc *desc, int value) set_output_value: return gpiod_direction_output_raw_commit(desc, value); + +set_output_flag: + /* + * When emulating open-source or open-drain functionalities by not + * actively driving the line (setting mode to input) we still need to + * set the IS_OUT flag or otherwise we won't be able to set the line + * value anymore. + */ + if (ret == 0) + set_bit(FLAG_IS_OUT, &desc->flags); + return ret; } EXPORT_SYMBOL_GPL(gpiod_direction_output); @@ -3448,8 +3463,6 @@ static void gpio_set_open_drain_value_commit(struct gpio_desc *desc, bool value) if (value) { ret = chip->direction_input(chip, offset); - if (!ret) - clear_bit(FLAG_IS_OUT, &desc->flags); } else { ret = chip->direction_output(chip, offset, 0); if (!ret) @@ -3479,8 +3492,6 @@ static void gpio_set_open_source_value_commit(struct gpio_desc *desc, bool value set_bit(FLAG_IS_OUT, &desc->flags); } else { ret = chip->direction_input(chip, offset); - if (!ret) - clear_bit(FLAG_IS_OUT, &desc->flags); } trace_gpio_direction(desc_to_gpio(desc), !value, ret); if (ret < 0) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 7bcf86c61999..61e38e43ad1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -270,7 +270,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, r = amdgpu_bo_create_list_entry_array(&args->in, &info); if (r) - goto error_free; + return r; switch (args->in.operation) { case AMDGPU_BO_LIST_OP_CREATE: @@ -283,8 +283,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL); mutex_unlock(&fpriv->bo_list_lock); if (r < 0) { - amdgpu_bo_list_put(list); - return r; + goto error_put_list; } handle = r; @@ -306,9 +305,8 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, mutex_unlock(&fpriv->bo_list_lock); if (IS_ERR(old)) { - amdgpu_bo_list_put(list); r = PTR_ERR(old); - goto error_free; + goto error_put_list; } amdgpu_bo_list_put(old); @@ -325,8 +323,10 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, return 0; +error_put_list: + amdgpu_bo_list_put(list); + error_free: - if (info) - kvfree(info); + kvfree(info); return r; } diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index cebc8e620820..8a8d605021f0 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -728,6 +728,8 @@ static int tc_set_video_mode(struct tc_data *tc, int lower_margin = mode->vsync_start - mode->vdisplay; int vsync_len = mode->vsync_end - mode->vsync_start; u32 dp0_syncval; + u32 bits_per_pixel = 24; + u32 in_bw, out_bw; /* * Recommended maximum number of symbols transferred in a transfer unit: @@ -735,7 +737,10 @@ static int tc_set_video_mode(struct tc_data *tc, * (output active video bandwidth in bytes)) * Must be less than tu_size. */ - max_tu_symbol = TU_SIZE_RECOMMENDED - 1; + + in_bw = mode->clock * bits_per_pixel / 8; + out_bw = tc->link.base.num_lanes * tc->link.base.rate; + max_tu_symbol = DIV_ROUND_UP(in_bw * TU_SIZE_RECOMMENDED, out_bw); dev_dbg(tc->dev, "set mode %dx%d\n", mode->hdisplay, mode->vdisplay); diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index ce05e805b08f..aa54bb22796d 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -3280,7 +3280,20 @@ static int skl_max_plane_width(const struct drm_framebuffer *fb, switch (fb->modifier) { case DRM_FORMAT_MOD_LINEAR: case I915_FORMAT_MOD_X_TILED: - return 4096; + /* + * Validated limit is 4k, but has 5k should + * work apart from the following features: + * - Ytile (already limited to 4k) + * - FP16 (already limited to 4k) + * - render compression (already limited to 4k) + * - KVMR sprite and cursor (don't care) + * - horizontal panning (TODO verify this) + * - pipe and plane scaling (TODO verify this) + */ + if (cpp == 8) + return 4096; + else + return 5120; case I915_FORMAT_MOD_Y_TILED_CCS: case I915_FORMAT_MOD_Yf_TILED_CCS: /* FIXME AUX plane? */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 261c9bd83f51..91051e178021 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -245,11 +245,9 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) wakeref = intel_runtime_pm_get(rpm); - srcu = intel_gt_reset_trylock(ggtt->vm.gt); - if (srcu < 0) { - ret = srcu; + ret = intel_gt_reset_trylock(ggtt->vm.gt, &srcu); + if (ret) goto err_rpm; - } ret = i915_mutex_lock_interruptible(dev); if (ret) @@ -318,7 +316,11 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) intel_wakeref_auto(&i915->ggtt.userfault_wakeref, msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)); - i915_vma_set_ggtt_write(vma); + if (write) { + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + i915_vma_set_ggtt_write(vma); + obj->mm.dirty = true; + } err_fence: i915_vma_unpin_fence(vma); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 92e53c25424c..ad2a63dbcac2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -241,9 +241,6 @@ void i915_gem_resume(struct drm_i915_private *i915) mutex_lock(&i915->drm.struct_mutex); intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); - i915_gem_restore_gtt_mappings(i915); - i915_gem_restore_fences(i915); - if (i915_gem_init_hw(i915)) goto err_wedged; diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index d3c6993f4f46..22aab8593abf 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -136,6 +136,20 @@ execlists_active(const struct intel_engine_execlists *execlists) return READ_ONCE(*execlists->active); } +static inline void +execlists_active_lock_bh(struct intel_engine_execlists *execlists) +{ + local_bh_disable(); /* prevent local softirq and lock recursion */ + tasklet_lock(&execlists->tasklet); +} + +static inline void +execlists_active_unlock_bh(struct intel_engine_execlists *execlists) +{ + tasklet_unlock(&execlists->tasklet); + local_bh_enable(); /* restore softirq, and kick ksoftirqd! */ +} + struct i915_request * execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 82630db0394b..4ce8626b140e 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1197,9 +1197,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, struct drm_printer *m) { struct drm_i915_private *dev_priv = engine->i915; - const struct intel_engine_execlists * const execlists = - &engine->execlists; - unsigned long flags; + struct intel_engine_execlists * const execlists = &engine->execlists; u64 addr; if (engine->id == RENDER_CLASS && IS_GEN_RANGE(dev_priv, 4, 7)) @@ -1281,7 +1279,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, idx, hws[idx * 2], hws[idx * 2 + 1]); } - spin_lock_irqsave(&engine->active.lock, flags); + execlists_active_lock_bh(execlists); for (port = execlists->active; (rq = *port); port++) { char hdr[80]; int len; @@ -1309,7 +1307,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, hwsp_seqno(rq)); print_request(m, rq, hdr); } - spin_unlock_irqrestore(&engine->active.lock, flags); + execlists_active_unlock_bh(execlists); } else if (INTEL_GEN(dev_priv) > 6) { drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n", ENGINE_READ(engine, RING_PP_DIR_BASE)); @@ -1440,8 +1438,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) if (!intel_engine_supports_stats(engine)) return -ENODEV; - spin_lock_irqsave(&engine->active.lock, flags); - write_seqlock(&engine->stats.lock); + execlists_active_lock_bh(execlists); + write_seqlock_irqsave(&engine->stats.lock, flags); if (unlikely(engine->stats.enabled == ~0)) { err = -EBUSY; @@ -1469,8 +1467,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) } unlock: - write_sequnlock(&engine->stats.lock); - spin_unlock_irqrestore(&engine->active.lock, flags); + write_sequnlock_irqrestore(&engine->stats.lock, flags); + execlists_active_unlock_bh(execlists); return err; } diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index d42584439f51..bdfcc7bdadbf 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -631,7 +631,6 @@ execlists_schedule_out(struct i915_request *rq) struct intel_engine_cs *cur, *old; trace_i915_request_out(rq); - GEM_BUG_ON(intel_context_inflight(ce) != rq->engine); old = READ_ONCE(ce->inflight); do @@ -797,6 +796,17 @@ static bool can_merge_rq(const struct i915_request *prev, GEM_BUG_ON(prev == next); GEM_BUG_ON(!assert_priority_queue(prev, next)); + /* + * We do not submit known completed requests. Therefore if the next + * request is already completed, we can pretend to merge it in + * with the previous context (and we will skip updating the ELSP + * and tracking). Thus hopefully keeping the ELSP full with active + * contexts, despite the best efforts of preempt-to-busy to confuse + * us. + */ + if (i915_request_completed(next)) + return true; + if (!can_merge_ctx(prev->hw_context, next->hw_context)) return false; @@ -893,7 +903,7 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve, static struct i915_request * last_active(const struct intel_engine_execlists *execlists) { - struct i915_request * const *last = execlists->active; + struct i915_request * const *last = READ_ONCE(execlists->active); while (*last && i915_request_completed(*last)) last++; @@ -1172,21 +1182,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) continue; } - if (i915_request_completed(rq)) { - ve->request = NULL; - ve->base.execlists.queue_priority_hint = INT_MIN; - rb_erase_cached(rb, &execlists->virtual); - RB_CLEAR_NODE(rb); - - rq->engine = engine; - __i915_request_submit(rq); - - spin_unlock(&ve->base.active.lock); - - rb = rb_first_cached(&execlists->virtual); - continue; - } - if (last && !can_merge_rq(last, rq)) { spin_unlock(&ve->base.active.lock); return; /* leave this for another */ @@ -1237,11 +1232,23 @@ static void execlists_dequeue(struct intel_engine_cs *engine) GEM_BUG_ON(ve->siblings[0] != engine); } - __i915_request_submit(rq); - if (!i915_request_completed(rq)) { + if (__i915_request_submit(rq)) { submit = true; last = rq; } + + /* + * Hmm, we have a bunch of virtual engine requests, + * but the first one was already completed (thanks + * preempt-to-busy!). Keep looking at the veng queue + * until we have no more relevant requests (i.e. + * the normal submit queue has higher priority). + */ + if (!submit) { + spin_unlock(&ve->base.active.lock); + rb = rb_first_cached(&execlists->virtual); + continue; + } } spin_unlock(&ve->base.active.lock); @@ -1254,8 +1261,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) int i; priolist_for_each_request_consume(rq, rn, p, i) { - if (i915_request_completed(rq)) - goto skip; + bool merge = true; /* * Can we combine this request with the current port? @@ -1296,14 +1302,23 @@ static void execlists_dequeue(struct intel_engine_cs *engine) ctx_single_port_submission(rq->hw_context)) goto done; - *port = execlists_schedule_in(last, port - execlists->pending); - port++; + merge = false; } - last = rq; - submit = true; -skip: - __i915_request_submit(rq); + if (__i915_request_submit(rq)) { + if (!merge) { + *port = execlists_schedule_in(last, port - execlists->pending); + port++; + last = NULL; + } + + GEM_BUG_ON(last && + !can_merge_ctx(last->hw_context, + rq->hw_context)); + + submit = true; + last = rq; + } } rb_erase_cached(&p->node, &execlists->queue); @@ -1593,8 +1608,11 @@ static void process_csb(struct intel_engine_cs *engine) static void __execlists_submission_tasklet(struct intel_engine_cs *const engine) { lockdep_assert_held(&engine->active.lock); - if (!engine->execlists.pending[0]) + if (!engine->execlists.pending[0]) { + rcu_read_lock(); /* protect peeking at execlists->active */ execlists_dequeue(engine); + rcu_read_unlock(); + } } /* @@ -2399,10 +2417,14 @@ static void reset_csb_pointers(struct intel_engine_cs *engine) static struct i915_request *active_request(struct i915_request *rq) { - const struct list_head * const list = &rq->timeline->requests; const struct intel_context * const ce = rq->hw_context; struct i915_request *active = NULL; + struct list_head *list; + + if (!i915_request_is_active(rq)) /* unwound, but incomplete! */ + return rq; + list = &rq->timeline->requests; list_for_each_entry_from_reverse(rq, list, link) { if (i915_request_completed(rq)) break; @@ -2565,7 +2587,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) int i; priolist_for_each_request_consume(rq, rn, p, i) { - list_del_init(&rq->sched.link); __i915_request_submit(rq); dma_fence_set_error(&rq->fence, -EIO); i915_request_mark_complete(rq); @@ -3631,18 +3652,22 @@ static void virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal) { struct virtual_engine *ve = to_virtual_engine(rq->engine); + intel_engine_mask_t allowed, exec; struct ve_bond *bond; + allowed = ~to_request(signal)->engine->mask; + bond = virtual_find_bond(ve, to_request(signal)->engine); - if (bond) { - intel_engine_mask_t old, new, cmp; + if (bond) + allowed &= bond->sibling_mask; - cmp = READ_ONCE(rq->execution_mask); - do { - old = cmp; - new = cmp & bond->sibling_mask; - } while ((cmp = cmpxchg(&rq->execution_mask, old, new)) != old); - } + /* Restrict the bonded request to run on only the available engines */ + exec = READ_ONCE(rq->execution_mask); + while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed)) + ; + + /* Prevent the master from being re-run on the bonded engines */ + to_request(signal)->execution_mask &= ~allowed; } struct intel_context * diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index b9d84d52e986..8cea42379dd7 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -42,11 +42,10 @@ static void engine_skip_context(struct i915_request *rq) struct intel_engine_cs *engine = rq->engine; struct i915_gem_context *hung_ctx = rq->gem_context; - lockdep_assert_held(&engine->active.lock); - if (!i915_request_is_active(rq)) return; + lockdep_assert_held(&engine->active.lock); list_for_each_entry_continue(rq, &engine->active.requests, sched.link) if (rq->gem_context == hung_ctx) i915_request_skip(rq, -EIO); @@ -123,7 +122,6 @@ void __i915_request_reset(struct i915_request *rq, bool guilty) rq->fence.seqno, yesno(guilty)); - lockdep_assert_held(&rq->engine->active.lock); GEM_BUG_ON(i915_request_completed(rq)); if (guilty) { @@ -1214,10 +1212,8 @@ out: intel_runtime_pm_put(>->i915->runtime_pm, wakeref); } -int intel_gt_reset_trylock(struct intel_gt *gt) +int intel_gt_reset_trylock(struct intel_gt *gt, int *srcu) { - int srcu; - might_lock(>->reset.backoff_srcu); might_sleep(); @@ -1232,10 +1228,10 @@ int intel_gt_reset_trylock(struct intel_gt *gt) rcu_read_lock(); } - srcu = srcu_read_lock(>->reset.backoff_srcu); + *srcu = srcu_read_lock(>->reset.backoff_srcu); rcu_read_unlock(); - return srcu; + return 0; } void intel_gt_reset_unlock(struct intel_gt *gt, int tag) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h index 37a987b17108..52c00199e069 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.h +++ b/drivers/gpu/drm/i915/gt/intel_reset.h @@ -38,7 +38,7 @@ int intel_engine_reset(struct intel_engine_cs *engine, void __i915_request_reset(struct i915_request *rq, bool guilty); -int __must_check intel_gt_reset_trylock(struct intel_gt *gt); +int __must_check intel_gt_reset_trylock(struct intel_gt *gt, int *srcu); void intel_gt_reset_unlock(struct intel_gt *gt, int tag); void intel_gt_set_wedged(struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index 601c16239fdf..bacaa7bb8c9a 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -1573,7 +1573,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) struct intel_engine_cs *engine = rq->engine; enum intel_engine_id id; const int num_engines = - IS_HSW_GT1(i915) ? RUNTIME_INFO(i915)->num_engines - 1 : 0; + IS_HASWELL(i915) ? RUNTIME_INFO(i915)->num_engines - 1 : 0; bool force_restore = false; int len; u32 *cs; diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 45481eb1fa3c..5f6ec2fd29a0 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1063,6 +1063,9 @@ static void gen9_whitelist_build(struct i915_wa_list *w) /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */ whitelist_reg(w, GEN8_HDC_CHICKEN1); + + /* WaSendPushConstantsFromMMIO:skl,bxt */ + whitelist_reg(w, COMMON_SLICE_CHICKEN2); } static void skl_whitelist_build(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 020696726f9e..bb6f86c7067a 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1924,6 +1924,11 @@ static int i915_drm_resume(struct drm_device *dev) if (ret) DRM_ERROR("failed to re-enable GGTT\n"); + mutex_lock(&dev_priv->drm.struct_mutex); + i915_gem_restore_gtt_mappings(dev_priv); + i915_gem_restore_fences(dev_priv); + mutex_unlock(&dev_priv->drm.struct_mutex); + intel_csr_ucode_resume(dev_priv); i915_restore_state(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index 167a7b56ed5b..6795f1daa3d5 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -77,6 +77,12 @@ struct drm_i915_private; #define I915_GEM_IDLE_TIMEOUT (HZ / 5) +static inline void tasklet_lock(struct tasklet_struct *t) +{ + while (!tasklet_trylock(t)) + cpu_relax(); +} + static inline void __tasklet_disable_sync_once(struct tasklet_struct *t) { if (!atomic_fetch_inc(&t->count)) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index a53777dd371c..1c5506822dc7 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -194,6 +194,27 @@ static void free_capture_list(struct i915_request *request) } } +static void remove_from_engine(struct i915_request *rq) +{ + struct intel_engine_cs *engine, *locked; + + /* + * Virtual engines complicate acquiring the engine timeline lock, + * as their rq->engine pointer is not stable until under that + * engine lock. The simple ploy we use is to take the lock then + * check that the rq still belongs to the newly locked engine. + */ + locked = READ_ONCE(rq->engine); + spin_lock(&locked->active.lock); + while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { + spin_unlock(&locked->active.lock); + spin_lock(&engine->active.lock); + locked = engine; + } + list_del(&rq->sched.link); + spin_unlock(&locked->active.lock); +} + static bool i915_request_retire(struct i915_request *rq) { struct i915_active_request *active, *next; @@ -259,9 +280,7 @@ static bool i915_request_retire(struct i915_request *rq) * request that we have removed from the HW and put back on a run * queue. */ - spin_lock(&rq->engine->active.lock); - list_del(&rq->sched.link); - spin_unlock(&rq->engine->active.lock); + remove_from_engine(rq); spin_lock(&rq->lock); i915_request_mark_complete(rq); @@ -358,9 +377,10 @@ __i915_request_await_execution(struct i915_request *rq, return 0; } -void __i915_request_submit(struct i915_request *request) +bool __i915_request_submit(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; + bool result = false; GEM_TRACE("%s fence %llx:%lld, current %d\n", engine->name, @@ -370,6 +390,25 @@ void __i915_request_submit(struct i915_request *request) GEM_BUG_ON(!irqs_disabled()); lockdep_assert_held(&engine->active.lock); + /* + * With the advent of preempt-to-busy, we frequently encounter + * requests that we have unsubmitted from HW, but left running + * until the next ack and so have completed in the meantime. On + * resubmission of that completed request, we can skip + * updating the payload, and execlists can even skip submitting + * the request. + * + * We must remove the request from the caller's priority queue, + * and the caller must only call us when the request is in their + * priority queue, under the active.lock. This ensures that the + * request has *not* yet been retired and we can safely move + * the request into the engine->active.list where it will be + * dropped upon retiring. (Otherwise if resubmit a *retired* + * request, this would be a horrible use-after-free.) + */ + if (i915_request_completed(request)) + goto xfer; + if (i915_gem_context_is_banned(request->gem_context)) i915_request_skip(request, -EIO); @@ -393,13 +432,18 @@ void __i915_request_submit(struct i915_request *request) i915_sw_fence_signaled(&request->semaphore)) engine->saturated |= request->sched.semaphores; - /* We may be recursing from the signal callback of another i915 fence */ - spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); + engine->emit_fini_breadcrumb(request, + request->ring->vaddr + request->postfix); - list_move_tail(&request->sched.link, &engine->active.requests); + trace_i915_request_execute(request); + engine->serial++; + result = true; + +xfer: /* We may be recursing from the signal callback of another i915 fence */ + spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); - GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); - set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); + if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)) + list_move_tail(&request->sched.link, &engine->active.requests); if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) && @@ -410,12 +454,7 @@ void __i915_request_submit(struct i915_request *request) spin_unlock(&request->lock); - engine->emit_fini_breadcrumb(request, - request->ring->vaddr + request->postfix); - - engine->serial++; - - trace_i915_request_execute(request); + return result; } void i915_request_submit(struct i915_request *request) diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 8ac6e1226a56..e4dd013761e8 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -292,7 +292,7 @@ int i915_request_await_execution(struct i915_request *rq, void i915_request_add(struct i915_request *rq); -void __i915_request_submit(struct i915_request *request); +bool __i915_request_submit(struct i915_request *request); void i915_request_submit(struct i915_request *request); void i915_request_skip(struct i915_request *request, int error); diff --git a/drivers/gpu/drm/i915/intel_pch.c b/drivers/gpu/drm/i915/intel_pch.c index fa864d8f2b73..15f8bff141f9 100644 --- a/drivers/gpu/drm/i915/intel_pch.c +++ b/drivers/gpu/drm/i915/intel_pch.c @@ -69,6 +69,7 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id) WARN_ON(!IS_CANNONLAKE(dev_priv) && !IS_COFFEELAKE(dev_priv)); return PCH_CNP; case INTEL_PCH_CMP_DEVICE_ID_TYPE: + case INTEL_PCH_CMP2_DEVICE_ID_TYPE: DRM_DEBUG_KMS("Found Comet Lake PCH (CMP)\n"); WARN_ON(!IS_COFFEELAKE(dev_priv)); /* CometPoint is CNP Compatible */ diff --git a/drivers/gpu/drm/i915/intel_pch.h b/drivers/gpu/drm/i915/intel_pch.h index e6a2d65f19c6..c29c81ec7971 100644 --- a/drivers/gpu/drm/i915/intel_pch.h +++ b/drivers/gpu/drm/i915/intel_pch.h @@ -41,6 +41,7 @@ enum intel_pch { #define INTEL_PCH_CNP_DEVICE_ID_TYPE 0xA300 #define INTEL_PCH_CNP_LP_DEVICE_ID_TYPE 0x9D80 #define INTEL_PCH_CMP_DEVICE_ID_TYPE 0x0280 +#define INTEL_PCH_CMP2_DEVICE_ID_TYPE 0x0680 #define INTEL_PCH_ICP_DEVICE_ID_TYPE 0x3480 #define INTEL_PCH_MCC_DEVICE_ID_TYPE 0x4B00 #define INTEL_PCH_MCC2_DEVICE_ID_TYPE 0x3880 diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c index bb6dd54a6ff3..37593831b539 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c @@ -118,6 +118,12 @@ static void pm_resume(struct drm_i915_private *i915) with_intel_runtime_pm(&i915->runtime_pm, wakeref) { intel_gt_sanitize(&i915->gt, false); i915_gem_sanitize(i915); + + mutex_lock(&i915->drm.struct_mutex); + i915_gem_restore_gtt_mappings(i915); + i915_gem_restore_fences(i915); + mutex_unlock(&i915->drm.struct_mutex); + i915_gem_resume(i915); } } diff --git a/drivers/gpu/drm/panel/panel-lg-lb035q02.c b/drivers/gpu/drm/panel/panel-lg-lb035q02.c index fc82a525b071..ee4379729a5b 100644 --- a/drivers/gpu/drm/panel/panel-lg-lb035q02.c +++ b/drivers/gpu/drm/panel/panel-lg-lb035q02.c @@ -220,9 +220,17 @@ static const struct of_device_id lb035q02_of_match[] = { MODULE_DEVICE_TABLE(of, lb035q02_of_match); +static const struct spi_device_id lb035q02_ids[] = { + { "lb035q02", 0 }, + { /* sentinel */ } +}; + +MODULE_DEVICE_TABLE(spi, lb035q02_ids); + static struct spi_driver lb035q02_driver = { .probe = lb035q02_probe, .remove = lb035q02_remove, + .id_table = lb035q02_ids, .driver = { .name = "panel-lg-lb035q02", .of_match_table = lb035q02_of_match, @@ -231,7 +239,6 @@ static struct spi_driver lb035q02_driver = { module_spi_driver(lb035q02_driver); -MODULE_ALIAS("spi:lgphilips,lb035q02"); MODULE_AUTHOR("Tomi Valkeinen <tomi.valkeinen@ti.com>"); MODULE_DESCRIPTION("LG.Philips LB035Q02 LCD Panel driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/panel/panel-nec-nl8048hl11.c b/drivers/gpu/drm/panel/panel-nec-nl8048hl11.c index 299b217c83e1..20f17e46e65d 100644 --- a/drivers/gpu/drm/panel/panel-nec-nl8048hl11.c +++ b/drivers/gpu/drm/panel/panel-nec-nl8048hl11.c @@ -230,9 +230,17 @@ static const struct of_device_id nl8048_of_match[] = { MODULE_DEVICE_TABLE(of, nl8048_of_match); +static const struct spi_device_id nl8048_ids[] = { + { "nl8048hl11", 0 }, + { /* sentinel */ } +}; + +MODULE_DEVICE_TABLE(spi, nl8048_ids); + static struct spi_driver nl8048_driver = { .probe = nl8048_probe, .remove = nl8048_remove, + .id_table = nl8048_ids, .driver = { .name = "panel-nec-nl8048hl11", .pm = &nl8048_pm_ops, @@ -242,7 +250,6 @@ static struct spi_driver nl8048_driver = { module_spi_driver(nl8048_driver); -MODULE_ALIAS("spi:nec,nl8048hl11"); MODULE_AUTHOR("Erik Gilling <konkers@android.com>"); MODULE_DESCRIPTION("NEC-NL8048HL11 Driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/panel/panel-sony-acx565akm.c b/drivers/gpu/drm/panel/panel-sony-acx565akm.c index 305259b58767..3d5b9c4f68d9 100644 --- a/drivers/gpu/drm/panel/panel-sony-acx565akm.c +++ b/drivers/gpu/drm/panel/panel-sony-acx565akm.c @@ -684,9 +684,17 @@ static const struct of_device_id acx565akm_of_match[] = { MODULE_DEVICE_TABLE(of, acx565akm_of_match); +static const struct spi_device_id acx565akm_ids[] = { + { "acx565akm", 0 }, + { /* sentinel */ } +}; + +MODULE_DEVICE_TABLE(spi, acx565akm_ids); + static struct spi_driver acx565akm_driver = { .probe = acx565akm_probe, .remove = acx565akm_remove, + .id_table = acx565akm_ids, .driver = { .name = "panel-sony-acx565akm", .of_match_table = acx565akm_of_match, @@ -695,7 +703,6 @@ static struct spi_driver acx565akm_driver = { module_spi_driver(acx565akm_driver); -MODULE_ALIAS("spi:sony,acx565akm"); MODULE_AUTHOR("Nokia Corporation"); MODULE_DESCRIPTION("Sony ACX565AKM LCD Panel Driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/panel/panel-tpo-td028ttec1.c b/drivers/gpu/drm/panel/panel-tpo-td028ttec1.c index d7b2e34626ef..f2baff827f50 100644 --- a/drivers/gpu/drm/panel/panel-tpo-td028ttec1.c +++ b/drivers/gpu/drm/panel/panel-tpo-td028ttec1.c @@ -375,8 +375,7 @@ static const struct of_device_id td028ttec1_of_match[] = { MODULE_DEVICE_TABLE(of, td028ttec1_of_match); static const struct spi_device_id td028ttec1_ids[] = { - { "tpo,td028ttec1", 0}, - { "toppoly,td028ttec1", 0 }, + { "td028ttec1", 0 }, { /* sentinel */ } }; diff --git a/drivers/gpu/drm/panel/panel-tpo-td043mtea1.c b/drivers/gpu/drm/panel/panel-tpo-td043mtea1.c index 84370562910f..ba163c779084 100644 --- a/drivers/gpu/drm/panel/panel-tpo-td043mtea1.c +++ b/drivers/gpu/drm/panel/panel-tpo-td043mtea1.c @@ -491,9 +491,17 @@ static const struct of_device_id td043mtea1_of_match[] = { MODULE_DEVICE_TABLE(of, td043mtea1_of_match); +static const struct spi_device_id td043mtea1_ids[] = { + { "td043mtea1", 0 }, + { /* sentinel */ } +}; + +MODULE_DEVICE_TABLE(spi, td043mtea1_ids); + static struct spi_driver td043mtea1_driver = { .probe = td043mtea1_probe, .remove = td043mtea1_remove, + .id_table = td043mtea1_ids, .driver = { .name = "panel-tpo-td043mtea1", .pm = &td043mtea1_pm_ops, @@ -503,7 +511,6 @@ static struct spi_driver td043mtea1_driver = { module_spi_driver(td043mtea1_driver); -MODULE_ALIAS("spi:tpo,td043mtea1"); MODULE_AUTHOR("Gražvydas Ignotas <notasas@gmail.com>"); MODULE_DESCRIPTION("TPO TD043MTEA1 Panel Driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/xen/xen_drm_front.c b/drivers/gpu/drm/xen/xen_drm_front.c index ba1828acd8c9..4be49c1aef51 100644 --- a/drivers/gpu/drm/xen/xen_drm_front.c +++ b/drivers/gpu/drm/xen/xen_drm_front.c @@ -718,17 +718,9 @@ static int xen_drv_probe(struct xenbus_device *xb_dev, struct device *dev = &xb_dev->dev; int ret; - /* - * The device is not spawn from a device tree, so arch_setup_dma_ops - * is not called, thus leaving the device with dummy DMA ops. - * This makes the device return error on PRIME buffer import, which - * is not correct: to fix this call of_dma_configure() with a NULL - * node to set default DMA ops. - */ - dev->coherent_dma_mask = DMA_BIT_MASK(32); - ret = of_dma_configure(dev, NULL, true); + ret = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(64)); if (ret < 0) { - DRM_ERROR("Cannot setup DMA ops, ret %d", ret); + DRM_ERROR("Cannot setup DMA mask, ret %d", ret); return ret; } diff --git a/drivers/hid/hid-hyperv.c b/drivers/hid/hid-hyperv.c index cc5b09b87ab0..79a28fc91521 100644 --- a/drivers/hid/hid-hyperv.c +++ b/drivers/hid/hid-hyperv.c @@ -314,60 +314,24 @@ static void mousevsc_on_receive(struct hv_device *device, static void mousevsc_on_channel_callback(void *context) { - const int packet_size = 0x100; - int ret; struct hv_device *device = context; - u32 bytes_recvd; - u64 req_id; struct vmpacket_descriptor *desc; - unsigned char *buffer; - int bufferlen = packet_size; - - buffer = kmalloc(bufferlen, GFP_ATOMIC); - if (!buffer) - return; - - do { - ret = vmbus_recvpacket_raw(device->channel, buffer, - bufferlen, &bytes_recvd, &req_id); - - switch (ret) { - case 0: - if (bytes_recvd <= 0) { - kfree(buffer); - return; - } - desc = (struct vmpacket_descriptor *)buffer; - - switch (desc->type) { - case VM_PKT_COMP: - break; - - case VM_PKT_DATA_INBAND: - mousevsc_on_receive(device, desc); - break; - - default: - pr_err("unhandled packet type %d, tid %llx len %d\n", - desc->type, req_id, bytes_recvd); - break; - } + foreach_vmbus_pkt(desc, device->channel) { + switch (desc->type) { + case VM_PKT_COMP: break; - case -ENOBUFS: - kfree(buffer); - /* Handle large packet */ - bufferlen = bytes_recvd; - buffer = kmalloc(bytes_recvd, GFP_ATOMIC); - - if (!buffer) - return; + case VM_PKT_DATA_INBAND: + mousevsc_on_receive(device, desc); + break; + default: + pr_err("Unhandled packet type %d, tid %llx len %d\n", + desc->type, desc->trans_id, desc->len8 * 8); break; } - } while (1); - + } } static int mousevsc_connect_to_vsp(struct hv_device *device) diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 391f0b225c9a..53a60c81e220 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -912,6 +912,7 @@ static void vmbus_shutdown(struct device *child_device) drv->shutdown(dev); } +#ifdef CONFIG_PM_SLEEP /* * vmbus_suspend - Suspend a vmbus device */ @@ -949,6 +950,7 @@ static int vmbus_resume(struct device *child_device) return drv->resume(dev); } +#endif /* CONFIG_PM_SLEEP */ /* * vmbus_device_release - Final callback release of the vmbus child device @@ -1070,6 +1072,7 @@ msg_handled: vmbus_signal_eom(msg, message_type); } +#ifdef CONFIG_PM_SLEEP /* * Fake RESCIND_CHANNEL messages to clean up hv_sock channels by force for * hibernation, because hv_sock connections can not persist across hibernation. @@ -1105,6 +1108,7 @@ static void vmbus_force_channel_rescinded(struct vmbus_channel *channel) vmbus_connection.work_queue, &ctx->work); } +#endif /* CONFIG_PM_SLEEP */ /* * Direct callback for channels using other deferred processing @@ -2125,6 +2129,7 @@ acpi_walk_err: return ret_val; } +#ifdef CONFIG_PM_SLEEP static int vmbus_bus_suspend(struct device *dev) { struct vmbus_channel *channel, *sc; @@ -2247,6 +2252,7 @@ static int vmbus_bus_resume(struct device *dev) return 0; } +#endif /* CONFIG_PM_SLEEP */ static const struct acpi_device_id vmbus_acpi_device_ids[] = { {"VMBUS", 0}, diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index da10e6ccb43c..5920c0085d35 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -4399,6 +4399,7 @@ error2: error1: port_modify.set_port_cap_mask = 0; port_modify.clr_port_cap_mask = IB_PORT_CM_SUP; + kfree(port); while (--i) { if (!rdma_cap_ib_cm(ib_device, i)) continue; @@ -4407,6 +4408,7 @@ error1: ib_modify_port(ib_device, port->port_num, 0, &port_modify); ib_unregister_mad_agent(port->mad_agent); cm_remove_port_fs(port); + kfree(port); } free: kfree(cm_dev); @@ -4460,6 +4462,7 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data) spin_unlock_irq(&cm.state_lock); ib_unregister_mad_agent(cur_mad_agent); cm_remove_port_fs(port); + kfree(port); } kfree(cm_dev); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 0e3cf3461999..d78f67623f24 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2396,9 +2396,10 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, conn_id->cm_id.iw = NULL; cma_exch(conn_id, RDMA_CM_DESTROYING); mutex_unlock(&conn_id->handler_mutex); + mutex_unlock(&listen_id->handler_mutex); cma_deref_id(conn_id); rdma_destroy_id(&conn_id->id); - goto out; + return ret; } mutex_unlock(&conn_id->handler_mutex); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 99c4a55545cf..2dd2cfe9b561 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1987,8 +1987,6 @@ static int iw_query_port(struct ib_device *device, if (!netdev) return -ENODEV; - dev_put(netdev); - port_attr->max_mtu = IB_MTU_4096; port_attr->active_mtu = ib_mtu_int_to_enum(netdev->mtu); @@ -1996,19 +1994,22 @@ static int iw_query_port(struct ib_device *device, port_attr->state = IB_PORT_DOWN; port_attr->phys_state = IB_PORT_PHYS_STATE_DISABLED; } else { - inetdev = in_dev_get(netdev); + rcu_read_lock(); + inetdev = __in_dev_get_rcu(netdev); if (inetdev && inetdev->ifa_list) { port_attr->state = IB_PORT_ACTIVE; port_attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP; - in_dev_put(inetdev); } else { port_attr->state = IB_PORT_INIT; port_attr->phys_state = IB_PORT_PHYS_STATE_PORT_CONFIGURATION_TRAINING; } + + rcu_read_unlock(); } + dev_put(netdev); err = device->ops.query_port(device, port_num, port_attr); if (err) return err; diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 7a7474000100..65b36548bc17 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -1230,7 +1230,7 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh, msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; - goto err; + goto err_get; } nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, @@ -1787,10 +1787,6 @@ static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh, cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]); qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]); - ret = rdma_counter_unbind_qpn(device, port, qpn, cntn); - if (ret) - goto err_unbind; - if (fill_nldev_handle(msg, device) || nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) || nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) || @@ -1799,13 +1795,15 @@ static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh, goto err_fill; } + ret = rdma_counter_unbind_qpn(device, port, qpn, cntn); + if (ret) + goto err_fill; + nlmsg_end(msg, nlh); ib_device_put(device); return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); err_fill: - rdma_counter_bind_qpn(device, port, qpn, cntn); -err_unbind: nlmsg_free(msg); err: ib_device_put(device); diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c index 1ab423b19f77..6eb6d2717ca5 100644 --- a/drivers/infiniband/core/security.c +++ b/drivers/infiniband/core/security.c @@ -426,7 +426,7 @@ int ib_create_qp_security(struct ib_qp *qp, struct ib_device *dev) int ret; rdma_for_each_port (dev, i) { - is_ib = rdma_protocol_ib(dev, i++); + is_ib = rdma_protocol_ib(dev, i); if (is_ib) break; } diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index f67a30fda1ed..163ff7ba92b7 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -451,8 +451,10 @@ void ib_umem_odp_release(struct ib_umem_odp *umem_odp) * that the hardware will not attempt to access the MR any more. */ if (!umem_odp->is_implicit_odp) { + mutex_lock(&umem_odp->umem_mutex); ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp), ib_umem_end(umem_odp)); + mutex_unlock(&umem_odp->umem_mutex); kvfree(umem_odp->dma_list); kvfree(umem_odp->page_list); } @@ -719,6 +721,8 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt, u64 addr; struct ib_device *dev = umem_odp->umem.ibdev; + lockdep_assert_held(&umem_odp->umem_mutex); + virt = max_t(u64, virt, ib_umem_start(umem_odp)); bound = min_t(u64, bound, ib_umem_end(umem_odp)); /* Note that during the run of this function, the @@ -726,7 +730,6 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt, * faults from completion. We might be racing with other * invalidations, so we must make sure we free each page only * once. */ - mutex_lock(&umem_odp->umem_mutex); for (addr = virt; addr < bound; addr += BIT(umem_odp->page_shift)) { idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift; if (umem_odp->page_list[idx]) { @@ -757,7 +760,6 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt, umem_odp->npages--; } } - mutex_unlock(&umem_odp->umem_mutex); } EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages); diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index a8b9548bd1a2..599340c1f0b8 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -242,10 +242,13 @@ static void set_ep_sin6_addrs(struct c4iw_ep *ep, } } -static int dump_qp(struct c4iw_qp *qp, struct c4iw_debugfs_data *qpd) +static int dump_qp(unsigned long id, struct c4iw_qp *qp, + struct c4iw_debugfs_data *qpd) { int space; int cc; + if (id != qp->wq.sq.qid) + return 0; space = qpd->bufsize - qpd->pos - 1; if (space == 0) @@ -350,7 +353,7 @@ static int qp_open(struct inode *inode, struct file *file) xa_lock_irq(&qpd->devp->qps); xa_for_each(&qpd->devp->qps, index, qp) - dump_qp(qp, qpd); + dump_qp(index, qp, qpd); xa_unlock_irq(&qpd->devp->qps); qpd->buf[qpd->pos++] = 0; diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index aa772ee0706f..35c284af574d 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -275,13 +275,17 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, struct sk_buff *skb, struct c4iw_wr_wait *wr_waitp) { int err; - struct fw_ri_tpte tpt; + struct fw_ri_tpte *tpt; u32 stag_idx; static atomic_t key; if (c4iw_fatal_error(rdev)) return -EIO; + tpt = kmalloc(sizeof(*tpt), GFP_KERNEL); + if (!tpt) + return -ENOMEM; + stag_state = stag_state > 0; stag_idx = (*stag) >> 8; @@ -291,6 +295,7 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, mutex_lock(&rdev->stats.lock); rdev->stats.stag.fail++; mutex_unlock(&rdev->stats.lock); + kfree(tpt); return -ENOMEM; } mutex_lock(&rdev->stats.lock); @@ -305,28 +310,28 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, /* write TPT entry */ if (reset_tpt_entry) - memset(&tpt, 0, sizeof(tpt)); + memset(tpt, 0, sizeof(*tpt)); else { - tpt.valid_to_pdid = cpu_to_be32(FW_RI_TPTE_VALID_F | + tpt->valid_to_pdid = cpu_to_be32(FW_RI_TPTE_VALID_F | FW_RI_TPTE_STAGKEY_V((*stag & FW_RI_TPTE_STAGKEY_M)) | FW_RI_TPTE_STAGSTATE_V(stag_state) | FW_RI_TPTE_STAGTYPE_V(type) | FW_RI_TPTE_PDID_V(pdid)); - tpt.locread_to_qpid = cpu_to_be32(FW_RI_TPTE_PERM_V(perm) | + tpt->locread_to_qpid = cpu_to_be32(FW_RI_TPTE_PERM_V(perm) | (bind_enabled ? FW_RI_TPTE_MWBINDEN_F : 0) | FW_RI_TPTE_ADDRTYPE_V((zbva ? FW_RI_ZERO_BASED_TO : FW_RI_VA_BASED_TO))| FW_RI_TPTE_PS_V(page_size)); - tpt.nosnoop_pbladdr = !pbl_size ? 0 : cpu_to_be32( + tpt->nosnoop_pbladdr = !pbl_size ? 0 : cpu_to_be32( FW_RI_TPTE_PBLADDR_V(PBL_OFF(rdev, pbl_addr)>>3)); - tpt.len_lo = cpu_to_be32((u32)(len & 0xffffffffUL)); - tpt.va_hi = cpu_to_be32((u32)(to >> 32)); - tpt.va_lo_fbo = cpu_to_be32((u32)(to & 0xffffffffUL)); - tpt.dca_mwbcnt_pstag = cpu_to_be32(0); - tpt.len_hi = cpu_to_be32((u32)(len >> 32)); + tpt->len_lo = cpu_to_be32((u32)(len & 0xffffffffUL)); + tpt->va_hi = cpu_to_be32((u32)(to >> 32)); + tpt->va_lo_fbo = cpu_to_be32((u32)(to & 0xffffffffUL)); + tpt->dca_mwbcnt_pstag = cpu_to_be32(0); + tpt->len_hi = cpu_to_be32((u32)(len >> 32)); } err = write_adapter_mem(rdev, stag_idx + (rdev->lldi.vr->stag.start >> 5), - sizeof(tpt), &tpt, skb, wr_waitp); + sizeof(*tpt), tpt, skb, wr_waitp); if (reset_tpt_entry) { c4iw_put_resource(&rdev->resource.tpt_table, stag_idx); @@ -334,6 +339,7 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, rdev->stats.stag.cur -= 32; mutex_unlock(&rdev->stats.lock); } + kfree(tpt); return err; } diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index eb9368be28c1..bbcac539777a 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -2737,15 +2737,11 @@ int c4iw_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *attrs, if (CHELSIO_CHIP_VERSION(rhp->rdev.lldi.adapter_type) > CHELSIO_T6) srq->flags = T4_SRQ_LIMIT_SUPPORT; - ret = xa_insert_irq(&rhp->qps, srq->wq.qid, srq, GFP_KERNEL); - if (ret) - goto err_free_queue; - if (udata) { srq_key_mm = kmalloc(sizeof(*srq_key_mm), GFP_KERNEL); if (!srq_key_mm) { ret = -ENOMEM; - goto err_remove_handle; + goto err_free_queue; } srq_db_key_mm = kmalloc(sizeof(*srq_db_key_mm), GFP_KERNEL); if (!srq_db_key_mm) { @@ -2789,8 +2785,6 @@ err_free_srq_db_key_mm: kfree(srq_db_key_mm); err_free_srq_key_mm: kfree(srq_key_mm); -err_remove_handle: - xa_erase_irq(&rhp->qps, srq->wq.qid); err_free_queue: free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx, srq->wr_waitp); @@ -2813,8 +2807,6 @@ void c4iw_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) rhp = srq->rhp; pr_debug("%s id %d\n", __func__, srq->wq.qid); - - xa_erase_irq(&rhp->qps, srq->wq.qid); ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext, ibucontext); free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx, diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 2395fd4233a7..2ed7bfd5feea 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -1526,8 +1526,11 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) } ret = rhashtable_init(tmp_sdma_rht, &sdma_rht_params); - if (ret < 0) + if (ret < 0) { + kfree(tmp_sdma_rht); goto bail; + } + dd->sdma_rht = tmp_sdma_rht; dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma); diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 8056930bbe2c..cd9ee1664a69 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -2773,6 +2773,10 @@ int i40iw_register_rdma_device(struct i40iw_device *iwdev) return -ENOMEM; iwibdev = iwdev->iwibdev; rdma_set_device_sysfs_group(&iwibdev->ibdev, &i40iw_attr_group); + ret = ib_device_set_netdev(&iwibdev->ibdev, iwdev->netdev, 1); + if (ret) + goto error; + ret = ib_register_device(&iwibdev->ibdev, "i40iw%d"); if (ret) goto error; diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 59022b744144..d609f4659afb 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1298,29 +1298,6 @@ static int devx_handle_mkey_create(struct mlx5_ib_dev *dev, return 0; } -static void devx_free_indirect_mkey(struct rcu_head *rcu) -{ - kfree(container_of(rcu, struct devx_obj, devx_mr.rcu)); -} - -/* This function to delete from the radix tree needs to be called before - * destroying the underlying mkey. Otherwise a race might occur in case that - * other thread will get the same mkey before this one will be deleted, - * in that case it will fail via inserting to the tree its own data. - * - * Note: - * An error in the destroy is not expected unless there is some other indirect - * mkey which points to this one. In a kernel cleanup flow it will be just - * destroyed in the iterative destruction call. In a user flow, in case - * the application didn't close in the expected order it's its own problem, - * the mkey won't be part of the tree, in both cases the kernel is safe. - */ -static void devx_cleanup_mkey(struct devx_obj *obj) -{ - xa_erase(&obj->ib_dev->mdev->priv.mkey_table, - mlx5_base_mkey(obj->devx_mr.mmkey.key)); -} - static void devx_cleanup_subscription(struct mlx5_ib_dev *dev, struct devx_event_subscription *sub) { @@ -1362,8 +1339,16 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, int ret; dev = mlx5_udata_to_mdev(&attrs->driver_udata); - if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) - devx_cleanup_mkey(obj); + if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) { + /* + * The pagefault_single_data_segment() does commands against + * the mmkey, we must wait for that to stop before freeing the + * mkey, as another allocation could get the same mkey #. + */ + xa_erase(&obj->ib_dev->mdev->priv.mkey_table, + mlx5_base_mkey(obj->devx_mr.mmkey.key)); + synchronize_srcu(&dev->mr_srcu); + } if (obj->flags & DEVX_OBJ_FLAGS_DCT) ret = mlx5_core_destroy_dct(obj->ib_dev->mdev, &obj->core_dct); @@ -1382,12 +1367,6 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, devx_cleanup_subscription(dev, sub_entry); mutex_unlock(&devx_event_table->event_xa_lock); - if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) { - call_srcu(&dev->mr_srcu, &obj->devx_mr.rcu, - devx_free_indirect_mkey); - return ret; - } - kfree(obj); return ret; } @@ -1491,26 +1470,21 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( &obj_id); WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32)); - if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) { - err = devx_handle_mkey_indirect(obj, dev, cmd_in, cmd_out); - if (err) - goto obj_destroy; - } - err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len); if (err) - goto err_copy; + goto obj_destroy; if (opcode == MLX5_CMD_OP_CREATE_GENERAL_OBJECT) obj_type = MLX5_GET(general_obj_in_cmd_hdr, cmd_in, obj_type); - obj->obj_id = get_enc_obj_id(opcode | obj_type << 16, obj_id); + if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) { + err = devx_handle_mkey_indirect(obj, dev, cmd_in, cmd_out); + if (err) + goto obj_destroy; + } return 0; -err_copy: - if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) - devx_cleanup_mkey(obj); obj_destroy: if (obj->flags & DEVX_OBJ_FLAGS_DCT) mlx5_core_destroy_dct(obj->ib_dev->mdev, &obj->core_dct); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 2ceaef3ea3fb..1a98ee2e01c4 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -606,7 +606,7 @@ struct mlx5_ib_mr { struct mlx5_ib_dev *dev; u32 out[MLX5_ST_SZ_DW(create_mkey_out)]; struct mlx5_core_sig_ctx *sig; - int live; + unsigned int live; void *descs_alloc; int access_flags; /* Needed for rereg MR */ @@ -639,7 +639,6 @@ struct mlx5_ib_mw { struct mlx5_ib_devx_mr { struct mlx5_core_mkey mmkey; int ndescs; - struct rcu_head rcu; }; struct mlx5_ib_umr_context { diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 1eff031ef048..630599311586 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -84,32 +84,6 @@ static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length) length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1)); } -static void update_odp_mr(struct mlx5_ib_mr *mr) -{ - if (is_odp_mr(mr)) { - /* - * This barrier prevents the compiler from moving the - * setting of umem->odp_data->private to point to our - * MR, before reg_umr finished, to ensure that the MR - * initialization have finished before starting to - * handle invalidations. - */ - smp_wmb(); - to_ib_umem_odp(mr->umem)->private = mr; - /* - * Make sure we will see the new - * umem->odp_data->private value in the invalidation - * routines, before we can get page faults on the - * MR. Page faults can happen once we put the MR in - * the tree, below this line. Without the barrier, - * there can be a fault handling and an invalidation - * before umem->odp_data->private == mr is visible to - * the invalidation handler. - */ - smp_wmb(); - } -} - static void reg_mr_callback(int status, struct mlx5_async_work *context) { struct mlx5_ib_mr *mr = @@ -1346,8 +1320,6 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mr->umem = umem; set_mr_fields(dev, mr, npages, length, access_flags); - update_odp_mr(mr); - if (use_umr) { int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE; @@ -1363,10 +1335,12 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } } - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { - mr->live = 1; + if (is_odp_mr(mr)) { + to_ib_umem_odp(mr->umem)->private = mr; atomic_set(&mr->num_pending_prefetch, 0); } + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) + smp_store_release(&mr->live, 1); return &mr->ibmr; error: @@ -1441,6 +1415,9 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, if (!mr->umem) return -EINVAL; + if (is_odp_mr(mr)) + return -EOPNOTSUPP; + if (flags & IB_MR_REREG_TRANS) { addr = virt_addr; len = length; @@ -1486,8 +1463,6 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, } mr->allocated_from_cache = 0; - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) - mr->live = 1; } else { /* * Send a UMR WQE @@ -1516,7 +1491,6 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, set_mr_fields(dev, mr, npages, len, access_flags); - update_odp_mr(mr); return 0; err: @@ -1607,15 +1581,16 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) /* Prevent new page faults and * prefetch requests from succeeding */ - mr->live = 0; + WRITE_ONCE(mr->live, 0); + + /* Wait for all running page-fault handlers to finish. */ + synchronize_srcu(&dev->mr_srcu); /* dequeue pending prefetch requests for the mr */ if (atomic_read(&mr->num_pending_prefetch)) flush_workqueue(system_unbound_wq); WARN_ON(atomic_read(&mr->num_pending_prefetch)); - /* Wait for all running page-fault handlers to finish. */ - synchronize_srcu(&dev->mr_srcu); /* Destroy all page mappings */ if (!umem_odp->is_implicit_odp) mlx5_ib_invalidate_range(umem_odp, @@ -1987,14 +1962,25 @@ free: int mlx5_ib_dealloc_mw(struct ib_mw *mw) { + struct mlx5_ib_dev *dev = to_mdev(mw->device); struct mlx5_ib_mw *mmw = to_mmw(mw); int err; - err = mlx5_core_destroy_mkey((to_mdev(mw->device))->mdev, - &mmw->mmkey); - if (!err) - kfree(mmw); - return err; + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { + xa_erase(&dev->mdev->priv.mkey_table, + mlx5_base_mkey(mmw->mmkey.key)); + /* + * pagefault_single_data_segment() may be accessing mmw under + * SRCU if the user bound an ODP MR to this MW. + */ + synchronize_srcu(&dev->mr_srcu); + } + + err = mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey); + if (err) + return err; + kfree(mmw); + return 0; } int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 2e9b43061797..3f9478d19376 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -178,6 +178,29 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, return; } + /* + * The locking here is pretty subtle. Ideally the implicit children + * list would be protected by the umem_mutex, however that is not + * possible. Instead this uses a weaker update-then-lock pattern: + * + * srcu_read_lock() + * <change children list> + * mutex_lock(umem_mutex) + * mlx5_ib_update_xlt() + * mutex_unlock(umem_mutex) + * destroy lkey + * + * ie any change the children list must be followed by the locked + * update_xlt before destroying. + * + * The umem_mutex provides the acquire/release semantic needed to make + * the children list visible to a racing thread. While SRCU is not + * technically required, using it gives consistent use of the SRCU + * locking around the children list. + */ + lockdep_assert_held(&to_ib_umem_odp(mr->umem)->umem_mutex); + lockdep_assert_held(&mr->dev->mr_srcu); + odp = odp_lookup(offset * MLX5_IMR_MTT_SIZE, nentries * MLX5_IMR_MTT_SIZE, mr); @@ -202,15 +225,22 @@ static void mr_leaf_free_action(struct work_struct *work) struct ib_umem_odp *odp = container_of(work, struct ib_umem_odp, work); int idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT; struct mlx5_ib_mr *mr = odp->private, *imr = mr->parent; + struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); + int srcu_key; mr->parent = NULL; synchronize_srcu(&mr->dev->mr_srcu); - ib_umem_odp_release(odp); - if (imr->live) + if (smp_load_acquire(&imr->live)) { + srcu_key = srcu_read_lock(&mr->dev->mr_srcu); + mutex_lock(&odp_imr->umem_mutex); mlx5_ib_update_xlt(imr, idx, 1, 0, MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC); + mutex_unlock(&odp_imr->umem_mutex); + srcu_read_unlock(&mr->dev->mr_srcu, srcu_key); + } + ib_umem_odp_release(odp); mlx5_mr_cache_free(mr->dev, mr); if (atomic_dec_and_test(&imr->num_leaf_free)) @@ -278,7 +308,6 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, idx - blk_start_idx + 1, 0, MLX5_IB_UPD_XLT_ZAP | MLX5_IB_UPD_XLT_ATOMIC); - mutex_unlock(&umem_odp->umem_mutex); /* * We are now sure that the device will not access the * memory. We can safely unmap it, and mark it as dirty if @@ -289,10 +318,12 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, if (unlikely(!umem_odp->npages && mr->parent && !umem_odp->dying)) { - WRITE_ONCE(umem_odp->dying, 1); + WRITE_ONCE(mr->live, 0); + umem_odp->dying = 1; atomic_inc(&mr->parent->num_leaf_free); schedule_work(&umem_odp->work); } + mutex_unlock(&umem_odp->umem_mutex); } void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) @@ -429,8 +460,6 @@ static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd, mr->ibmr.lkey = mr->mmkey.key; mr->ibmr.rkey = mr->mmkey.key; - mr->live = 1; - mlx5_ib_dbg(dev, "key %x dev %p mr %p\n", mr->mmkey.key, dev->mdev, mr); @@ -484,6 +513,8 @@ next_mr: mtt->parent = mr; INIT_WORK(&odp->work, mr_leaf_free_action); + smp_store_release(&mtt->live, 1); + if (!nentries) start_idx = addr >> MLX5_IMR_MTT_SHIFT; nentries++; @@ -536,6 +567,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, init_waitqueue_head(&imr->q_leaf_free); atomic_set(&imr->num_leaf_free, 0); atomic_set(&imr->num_pending_prefetch, 0); + smp_store_release(&imr->live, 1); return imr; } @@ -555,15 +587,19 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) if (mr->parent != imr) continue; + mutex_lock(&umem_odp->umem_mutex); ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp), ib_umem_end(umem_odp)); - if (umem_odp->dying) + if (umem_odp->dying) { + mutex_unlock(&umem_odp->umem_mutex); continue; + } - WRITE_ONCE(umem_odp->dying, 1); + umem_odp->dying = 1; atomic_inc(&imr->num_leaf_free); schedule_work(&umem_odp->work); + mutex_unlock(&umem_odp->umem_mutex); } up_read(&per_mm->umem_rwsem); @@ -773,7 +809,7 @@ next_mr: switch (mmkey->type) { case MLX5_MKEY_MR: mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); - if (!mr->live || !mr->ibmr.pd) { + if (!smp_load_acquire(&mr->live) || !mr->ibmr.pd) { mlx5_ib_dbg(dev, "got dead MR\n"); ret = -EFAULT; goto srcu_unlock; @@ -1641,12 +1677,12 @@ static bool num_pending_prefetch_inc(struct ib_pd *pd, mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); - if (mr->ibmr.pd != pd) { + if (!smp_load_acquire(&mr->live)) { ret = false; break; } - if (!mr->live) { + if (mr->ibmr.pd != pd) { ret = false; break; } diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c index 6cac0c88cf39..36cdfbdbd325 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c @@ -230,8 +230,6 @@ static void pvrdma_free_srq(struct pvrdma_dev *dev, struct pvrdma_srq *srq) pvrdma_page_dir_cleanup(dev, &srq->pdir); - kfree(srq); - atomic_dec(&dev->num_srqs); } diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c index 430314c8abd9..52d402f39df9 100644 --- a/drivers/infiniband/sw/siw/siw_qp.c +++ b/drivers/infiniband/sw/siw/siw_qp.c @@ -182,12 +182,19 @@ void siw_qp_llp_close(struct siw_qp *qp) */ void siw_qp_llp_write_space(struct sock *sk) { - struct siw_cep *cep = sk_to_cep(sk); + struct siw_cep *cep; - cep->sk_write_space(sk); + read_lock(&sk->sk_callback_lock); + + cep = sk_to_cep(sk); + if (cep) { + cep->sk_write_space(sk); - if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) - (void)siw_sq_start(cep->qp); + if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) + (void)siw_sq_start(cep->qp); + } + + read_unlock(&sk->sk_callback_lock); } static int siw_qp_readq_init(struct siw_qp *qp, int irq_size, int orq_size) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 9231b39d18b2..c501bf2a0252 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -112,17 +112,11 @@ int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, u32 out[MLX5_ST_SZ_DW(destroy_mkey_out)] = {0}; u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)] = {0}; struct xarray *mkeys = &dev->priv.mkey_table; - struct mlx5_core_mkey *deleted_mkey; unsigned long flags; xa_lock_irqsave(mkeys, flags); - deleted_mkey = __xa_erase(mkeys, mlx5_base_mkey(mkey->key)); + __xa_erase(mkeys, mlx5_base_mkey(mkey->key)); xa_unlock_irqrestore(mkeys, flags); - if (!deleted_mkey) { - mlx5_core_dbg(dev, "failed xarray delete of mkey 0x%x\n", - mlx5_base_mkey(mkey->key)); - return -ENOENT; - } MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY); MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key)); diff --git a/drivers/s390/cio/cio.h b/drivers/s390/cio/cio.h index ba7d2480613b..dcdaba689b20 100644 --- a/drivers/s390/cio/cio.h +++ b/drivers/s390/cio/cio.h @@ -113,6 +113,7 @@ struct subchannel { enum sch_todo todo; struct work_struct todo_work; struct schib_config config; + u64 dma_mask; char *driver_override; /* Driver name to force a match */ } __attribute__ ((aligned(8))); diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index 1fbfb0a93f5f..831850435c23 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -232,7 +232,12 @@ struct subchannel *css_alloc_subchannel(struct subchannel_id schid, * belong to a subchannel need to fit 31 bit width (e.g. ccw). */ sch->dev.coherent_dma_mask = DMA_BIT_MASK(31); - sch->dev.dma_mask = &sch->dev.coherent_dma_mask; + /* + * But we don't have such restrictions imposed on the stuff that + * is handled by the streaming API. + */ + sch->dma_mask = DMA_BIT_MASK(64); + sch->dev.dma_mask = &sch->dma_mask; return sch; err: diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 131430bd48d9..0c6245fc7706 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -710,7 +710,7 @@ static struct ccw_device * io_subchannel_allocate_dev(struct subchannel *sch) if (!cdev->private) goto err_priv; cdev->dev.coherent_dma_mask = sch->dev.coherent_dma_mask; - cdev->dev.dma_mask = &cdev->dev.coherent_dma_mask; + cdev->dev.dma_mask = sch->dev.dma_mask; dma_pool = cio_gp_dma_create(&cdev->dev, 1); if (!dma_pool) goto err_dma_pool; diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index a446a7221e13..81401f386c9c 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -22,6 +22,7 @@ #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt +#include <linux/dma-mapping.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/init.h> @@ -34,9 +35,6 @@ #include <linux/slab.h> #include <linux/highmem.h> #include <linux/refcount.h> -#ifdef CONFIG_XEN_GRANT_DMA_ALLOC -#include <linux/of_device.h> -#endif #include <xen/xen.h> #include <xen/grant_table.h> @@ -625,14 +623,7 @@ static int gntdev_open(struct inode *inode, struct file *flip) flip->private_data = priv; #ifdef CONFIG_XEN_GRANT_DMA_ALLOC priv->dma_dev = gntdev_miscdev.this_device; - - /* - * The device is not spawn from a device tree, so arch_setup_dma_ops - * is not called, thus leaving the device with dummy DMA ops. - * Fix this by calling of_dma_configure() with a NULL node to set - * default DMA ops. - */ - of_dma_configure(priv->dma_dev, NULL, true); + dma_coerce_mask_and_coherent(priv->dma_dev, DMA_BIT_MASK(64)); #endif pr_debug("priv %p\n", priv); diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 7ea6fb6a2e5d..49b381e104ef 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -1363,8 +1363,7 @@ static int gnttab_setup(void) if (xen_feature(XENFEAT_auto_translated_physmap) && gnttab_shared.addr == NULL) { gnttab_shared.addr = xen_auto_xlat_grant_frames.vaddr; if (gnttab_shared.addr == NULL) { - pr_warn("gnttab share frames (addr=0x%08lx) is not mapped!\n", - (unsigned long)xen_auto_xlat_grant_frames.vaddr); + pr_warn("gnttab share frames is not mapped!\n"); return -ENOMEM; } } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 8fe4eb7e5045..27e5b269e729 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1591,7 +1591,6 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb, struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_root *root = BTRFS_I(inode)->root; struct page **pages = NULL; - struct extent_state *cached_state = NULL; struct extent_changeset *data_reserved = NULL; u64 release_bytes = 0; u64 lockstart; @@ -1611,6 +1610,7 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb, return -ENOMEM; while (iov_iter_count(i) > 0) { + struct extent_state *cached_state = NULL; size_t offset = offset_in_page(pos); size_t sector_offset; size_t write_bytes = min(iov_iter_count(i), @@ -1758,9 +1758,20 @@ again: if (copied > 0) ret = btrfs_dirty_pages(inode, pages, dirty_pages, pos, copied, &cached_state); + + /* + * If we have not locked the extent range, because the range's + * start offset is >= i_size, we might still have a non-NULL + * cached extent state, acquired while marking the extent range + * as delalloc through btrfs_dirty_pages(). Therefore free any + * possible cached extent state to avoid a memory leak. + */ if (extents_locked) unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, &cached_state); + else + free_extent_state(cached_state); + btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes, true); if (ret) { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a0546401bc0a..0f2754eaa05b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6305,13 +6305,16 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, u32 sizes[2]; int nitems = name ? 2 : 1; unsigned long ptr; + unsigned int nofs_flag; int ret; path = btrfs_alloc_path(); if (!path) return ERR_PTR(-ENOMEM); + nofs_flag = memalloc_nofs_save(); inode = new_inode(fs_info->sb); + memalloc_nofs_restore(nofs_flag); if (!inode) { btrfs_free_path(path); return ERR_PTR(-ENOMEM); diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c index e87cbdad02a3..b57f3618e58e 100644 --- a/fs/btrfs/ref-verify.c +++ b/fs/btrfs/ref-verify.c @@ -500,7 +500,7 @@ static int process_leaf(struct btrfs_root *root, struct btrfs_extent_data_ref *dref; struct btrfs_shared_data_ref *sref; u32 count; - int i = 0, tree_block_level = 0, ret; + int i = 0, tree_block_level = 0, ret = 0; struct btrfs_key key; int nritems = btrfs_header_nritems(leaf); diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index f3215028235c..123ac54af071 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -5085,7 +5085,7 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *path; struct btrfs_key key; int ret; - u64 clone_src_i_size; + u64 clone_src_i_size = 0; /* * Prevent cloning from a zero offset with a length matching the sector diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 29b82a795522..8a6cc600bf18 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2932,7 +2932,8 @@ out: * in the tree of log roots */ static int update_log_root(struct btrfs_trans_handle *trans, - struct btrfs_root *log) + struct btrfs_root *log, + struct btrfs_root_item *root_item) { struct btrfs_fs_info *fs_info = log->fs_info; int ret; @@ -2940,10 +2941,10 @@ static int update_log_root(struct btrfs_trans_handle *trans, if (log->log_transid == 1) { /* insert root item on the first sync */ ret = btrfs_insert_root(trans, fs_info->log_root_tree, - &log->root_key, &log->root_item); + &log->root_key, root_item); } else { ret = btrfs_update_root(trans, fs_info->log_root_tree, - &log->root_key, &log->root_item); + &log->root_key, root_item); } return ret; } @@ -3041,6 +3042,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_root *log = root->log_root; struct btrfs_root *log_root_tree = fs_info->log_root_tree; + struct btrfs_root_item new_root_item; int log_transid = 0; struct btrfs_log_ctx root_log_ctx; struct blk_plug plug; @@ -3104,18 +3106,26 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, goto out; } + /* + * We _must_ update under the root->log_mutex in order to make sure we + * have a consistent view of the log root we are trying to commit at + * this moment. + * + * We _must_ copy this into a local copy, because we are not holding the + * log_root_tree->log_mutex yet. This is important because when we + * commit the log_root_tree we must have a consistent view of the + * log_root_tree when we update the super block to point at the + * log_root_tree bytenr. If we update the log_root_tree here we'll race + * with the commit and possibly point at the new block which we may not + * have written out. + */ btrfs_set_root_node(&log->root_item, log->node); + memcpy(&new_root_item, &log->root_item, sizeof(new_root_item)); root->log_transid++; log->log_transid = root->log_transid; root->log_start_pid = 0; /* - * Update or create log root item under the root's log_mutex to prevent - * races with concurrent log syncs that can lead to failure to update - * log root item because it was not created yet. - */ - ret = update_log_root(trans, log); - /* * IO has been started, blocks of the log tree have WRITTEN flag set * in their headers. new modifications of the log will be written to * new positions. so it's safe to allow log writers to go in. @@ -3135,6 +3145,14 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, mutex_unlock(&log_root_tree->log_mutex); mutex_lock(&log_root_tree->log_mutex); + + /* + * Now we are safe to update the log_root_tree because we're under the + * log_mutex, and we're a current writer so we're holding the commit + * open until we drop the log_mutex. + */ + ret = update_log_root(trans, log, &new_root_item); + if (atomic_dec_and_test(&log_root_tree->log_writers)) { /* atomic_dec_and_test implies a barrier */ cond_wake_up_nomb(&log_root_tree->log_writer_wait); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index cdd7af424033..bdfe4493e43a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3845,7 +3845,11 @@ static int alloc_profile_is_valid(u64 flags, int extended) return !extended; /* "0" is valid for usual profiles */ /* true if exactly one bit set */ - return is_power_of_2(flags); + /* + * Don't use is_power_of_2(unsigned long) because it won't work + * for the single profile (1ULL << 48) on 32-bit CPUs. + */ + return flags != 0 && (flags & (flags - 1)) == 0; } static inline int balance_need_close(struct btrfs_fs_info *fs_info) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 2e9c7f493f99..c049c7b3aa87 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -169,18 +169,26 @@ cifs_read_super(struct super_block *sb) else sb->s_maxbytes = MAX_NON_LFS; - /* BB FIXME fix time_gran to be larger for LANMAN sessions */ - sb->s_time_gran = 100; - - if (tcon->unix_ext) { - ts = cifs_NTtimeToUnix(0); + /* Some very old servers like DOS and OS/2 used 2 second granularity */ + if ((tcon->ses->server->vals->protocol_id == SMB10_PROT_ID) && + ((tcon->ses->capabilities & + tcon->ses->server->vals->cap_nt_find) == 0) && + !tcon->unix_ext) { + sb->s_time_gran = 1000000000; /* 1 second is max allowed gran */ + ts = cnvrtDosUnixTm(cpu_to_le16(SMB_DATE_MIN), 0, 0); sb->s_time_min = ts.tv_sec; - ts = cifs_NTtimeToUnix(cpu_to_le64(S64_MAX)); + ts = cnvrtDosUnixTm(cpu_to_le16(SMB_DATE_MAX), + cpu_to_le16(SMB_TIME_MAX), 0); sb->s_time_max = ts.tv_sec; } else { - ts = cnvrtDosUnixTm(cpu_to_le16(SMB_DATE_MIN), 0, 0); + /* + * Almost every server, including all SMB2+, uses DCE TIME + * ie 100 nanosecond units, since 1601. See MS-DTYP and MS-FSCC + */ + sb->s_time_gran = 100; + ts = cifs_NTtimeToUnix(0); sb->s_time_min = ts.tv_sec; - ts = cnvrtDosUnixTm(cpu_to_le16(SMB_DATE_MAX), cpu_to_le16(SMB_TIME_MAX), 0); + ts = cifs_NTtimeToUnix(cpu_to_le64(S64_MAX)); sb->s_time_max = ts.tv_sec; } diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 2e960e1049db..50dfd9049370 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1210,7 +1210,7 @@ struct cifs_search_info { bool smallBuf:1; /* so we know which buf_release function to call */ }; -#define ACL_NO_MODE -1 +#define ACL_NO_MODE ((umode_t)(-1)) struct cifs_open_parms { struct cifs_tcon *tcon; struct cifs_sb_info *cifs_sb; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 2850c3ce4391..a64dfa95a925 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -4264,7 +4264,7 @@ static int mount_get_conns(struct smb_vol *vol, struct cifs_sb_info *cifs_sb, server->ops->qfs_tcon(*xid, tcon); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RO_CACHE) { if (tcon->fsDevInfo.DeviceCharacteristics & - FILE_READ_ONLY_DEVICE) + cpu_to_le32(FILE_READ_ONLY_DEVICE)) cifs_dbg(VFS, "mounted to read only share\n"); else if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RW_CACHE) == 0) @@ -4445,7 +4445,7 @@ static int setup_dfs_tgt_conn(const char *path, int rc; struct dfs_info3_param ref = {0}; char *mdata = NULL, *fake_devname = NULL; - struct smb_vol fake_vol = {0}; + struct smb_vol fake_vol = {NULL}; cifs_dbg(FYI, "%s: dfs path: %s\n", __func__, path); diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index dd5ac841aefa..7ce689d31aa2 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -738,10 +738,16 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, static int cifs_d_revalidate(struct dentry *direntry, unsigned int flags) { + struct inode *inode; + if (flags & LOOKUP_RCU) return -ECHILD; if (d_really_is_positive(direntry)) { + inode = d_inode(direntry); + if ((flags & LOOKUP_REVAL) && !CIFS_CACHE_READ(CIFS_I(inode))) + CIFS_I(inode)->time = 0; /* force reval */ + if (cifs_revalidate_dentry(direntry)) return 0; else { @@ -752,7 +758,7 @@ cifs_d_revalidate(struct dentry *direntry, unsigned int flags) * attributes will have been updated by * cifs_revalidate_dentry(). */ - if (IS_AUTOMOUNT(d_inode(direntry)) && + if (IS_AUTOMOUNT(inode) && !(direntry->d_flags & DCACHE_NEED_AUTOMOUNT)) { spin_lock(&direntry->d_lock); direntry->d_flags |= DCACHE_NEED_AUTOMOUNT; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 4b95700c507c..5ad15de2bb4f 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -253,6 +253,12 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb, xid, fid); + if (rc) { + server->ops->close(xid, tcon, fid); + if (rc == -ESTALE) + rc = -EOPENSTALE; + } + out: kfree(buf); return rc; @@ -1840,13 +1846,12 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode, { struct cifsFileInfo *open_file = NULL; struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb); - struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); /* only filter by fsuid on multiuser mounts */ if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) fsuid_only = false; - spin_lock(&tcon->open_file_lock); + spin_lock(&cifs_inode->open_file_lock); /* we could simply get the first_list_entry since write-only entries are always at the end of the list but since the first entry might have a close pending, we go through the whole list */ @@ -1858,7 +1863,7 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode, /* found a good file */ /* lock it so it will not be closed on us */ cifsFileInfo_get(open_file); - spin_unlock(&tcon->open_file_lock); + spin_unlock(&cifs_inode->open_file_lock); return open_file; } /* else might as well continue, and look for another, or simply have the caller reopen it @@ -1866,7 +1871,7 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode, } else /* write only file */ break; /* write only files are last so must be done */ } - spin_unlock(&tcon->open_file_lock); + spin_unlock(&cifs_inode->open_file_lock); return NULL; } @@ -1877,7 +1882,6 @@ cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only, { struct cifsFileInfo *open_file, *inv_file = NULL; struct cifs_sb_info *cifs_sb; - struct cifs_tcon *tcon; bool any_available = false; int rc = -EBADF; unsigned int refind = 0; @@ -1897,16 +1901,15 @@ cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only, } cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb); - tcon = cifs_sb_master_tcon(cifs_sb); /* only filter by fsuid on multiuser mounts */ if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) fsuid_only = false; - spin_lock(&tcon->open_file_lock); + spin_lock(&cifs_inode->open_file_lock); refind_writable: if (refind > MAX_REOPEN_ATT) { - spin_unlock(&tcon->open_file_lock); + spin_unlock(&cifs_inode->open_file_lock); return rc; } list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { @@ -1918,7 +1921,7 @@ refind_writable: if (!open_file->invalidHandle) { /* found a good writable file */ cifsFileInfo_get(open_file); - spin_unlock(&tcon->open_file_lock); + spin_unlock(&cifs_inode->open_file_lock); *ret_file = open_file; return 0; } else { @@ -1938,7 +1941,7 @@ refind_writable: cifsFileInfo_get(inv_file); } - spin_unlock(&tcon->open_file_lock); + spin_unlock(&cifs_inode->open_file_lock); if (inv_file) { rc = cifs_reopen_file(inv_file, false); @@ -1953,7 +1956,7 @@ refind_writable: cifsFileInfo_put(inv_file); ++refind; inv_file = NULL; - spin_lock(&tcon->open_file_lock); + spin_lock(&cifs_inode->open_file_lock); goto refind_writable; } @@ -4461,17 +4464,15 @@ static int cifs_readpage(struct file *file, struct page *page) static int is_inode_writable(struct cifsInodeInfo *cifs_inode) { struct cifsFileInfo *open_file; - struct cifs_tcon *tcon = - cifs_sb_master_tcon(CIFS_SB(cifs_inode->vfs_inode.i_sb)); - spin_lock(&tcon->open_file_lock); + spin_lock(&cifs_inode->open_file_lock); list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) { - spin_unlock(&tcon->open_file_lock); + spin_unlock(&cifs_inode->open_file_lock); return 1; } } - spin_unlock(&tcon->open_file_lock); + spin_unlock(&cifs_inode->open_file_lock); return 0; } diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 3bae2e53f0b8..5dcc95b38310 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -414,6 +414,7 @@ int cifs_get_inode_info_unix(struct inode **pinode, /* if uniqueid is different, return error */ if (unlikely(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM && CIFS_I(*pinode)->uniqueid != fattr.cf_uniqueid)) { + CIFS_I(*pinode)->time = 0; /* force reval */ rc = -ESTALE; goto cgiiu_exit; } @@ -421,6 +422,7 @@ int cifs_get_inode_info_unix(struct inode **pinode, /* if filetype is different, return error */ if (unlikely(((*pinode)->i_mode & S_IFMT) != (fattr.cf_mode & S_IFMT))) { + CIFS_I(*pinode)->time = 0; /* force reval */ rc = -ESTALE; goto cgiiu_exit; } @@ -933,6 +935,7 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, /* if uniqueid is different, return error */ if (unlikely(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM && CIFS_I(*inode)->uniqueid != fattr.cf_uniqueid)) { + CIFS_I(*inode)->time = 0; /* force reval */ rc = -ESTALE; goto cgii_exit; } @@ -940,6 +943,7 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, /* if filetype is different, return error */ if (unlikely(((*inode)->i_mode & S_IFMT) != (fattr.cf_mode & S_IFMT))) { + CIFS_I(*inode)->time = 0; /* force reval */ rc = -ESTALE; goto cgii_exit; } diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index 49c17ee18254..9b41436fb8db 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -117,10 +117,6 @@ static const struct smb_to_posix_error mapping_table_ERRSRV[] = { {0, 0} }; -static const struct smb_to_posix_error mapping_table_ERRHRD[] = { - {0, 0} -}; - /* * Convert a string containing text IPv4 or IPv6 address to binary form. * diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 85f9d614d968..05149862aea4 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -751,8 +751,8 @@ add_posix_context(struct kvec *iov, unsigned int *num_iovec, umode_t mode) unsigned int num = *num_iovec; iov[num].iov_base = create_posix_buf(mode); - if (mode == -1) - cifs_dbg(VFS, "illegal mode\n"); /* BB REMOVEME */ + if (mode == ACL_NO_MODE) + cifs_dbg(FYI, "illegal mode\n"); if (iov[num].iov_base == NULL) return -ENOMEM; iov[num].iov_len = sizeof(struct create_posix); @@ -2521,11 +2521,8 @@ SMB2_open_init(struct cifs_tcon *tcon, struct smb_rqst *rqst, __u8 *oplock, return rc; } - /* TODO: add handling for the mode on create */ - if (oparms->disposition == FILE_CREATE) - cifs_dbg(VFS, "mode is 0x%x\n", oparms->mode); /* BB REMOVEME */ - - if ((oparms->disposition == FILE_CREATE) && (oparms->mode != -1)) { + if ((oparms->disposition == FILE_CREATE) && + (oparms->mode != ACL_NO_MODE)) { if (n_iov > 2) { struct create_context *ccontext = (struct create_context *)iov[n_iov-1].iov_base; @@ -3217,7 +3214,8 @@ SMB2_notify_init(const unsigned int xid, struct smb_rqst *rqst, req->PersistentFileId = persistent_fid; req->VolatileFileId = volatile_fid; - req->OutputBufferLength = SMB2_MAX_BUFFER_SIZE - MAX_SMB2_HDR_SIZE; + req->OutputBufferLength = + cpu_to_le32(SMB2_MAX_BUFFER_SIZE - MAX_SMB2_HDR_SIZE); req->CompletionFilter = cpu_to_le32(completion_filter); if (watch_tree) req->Flags = cpu_to_le16(SMB2_WATCH_TREE); diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index da3a6d580808..71b2930b8e0b 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -150,6 +150,10 @@ extern int SMB2_ioctl_init(struct cifs_tcon *tcon, struct smb_rqst *rqst, bool is_fsctl, char *in_data, u32 indatalen, __u32 max_response_size); extern void SMB2_ioctl_free(struct smb_rqst *rqst); +extern int SMB2_change_notify(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_fid, u64 volatile_fid, bool watch_tree, + u32 completion_filter); + extern int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_file_id, u64 volatile_file_id); extern int SMB2_close_flags(const unsigned int xid, struct cifs_tcon *tcon, diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 8aaa7eec7b74..e88421d9a48d 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -164,8 +164,13 @@ static void finish_writeback_work(struct bdi_writeback *wb, if (work->auto_free) kfree(work); - if (done && atomic_dec_and_test(&done->cnt)) - wake_up_all(done->waitq); + if (done) { + wait_queue_head_t *waitq = done->waitq; + + /* @done can't be accessed after the following dec */ + if (atomic_dec_and_test(&done->cnt)) + wake_up_all(waitq); + } } static void wb_queue_work(struct bdi_writeback *wb, diff --git a/fs/io_uring.c b/fs/io_uring.c index 8a0381f1a43b..92972b5c1d00 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -591,14 +591,6 @@ static void io_cqring_add_event(struct io_ring_ctx *ctx, u64 user_data, io_cqring_ev_posted(ctx); } -static void io_ring_drop_ctx_refs(struct io_ring_ctx *ctx, unsigned refs) -{ - percpu_ref_put_many(&ctx->refs, refs); - - if (waitqueue_active(&ctx->wait)) - wake_up(&ctx->wait); -} - static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx, struct io_submit_state *state) { @@ -646,7 +638,7 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx, req->result = 0; return req; out: - io_ring_drop_ctx_refs(ctx, 1); + percpu_ref_put(&ctx->refs); return NULL; } @@ -654,7 +646,7 @@ static void io_free_req_many(struct io_ring_ctx *ctx, void **reqs, int *nr) { if (*nr) { kmem_cache_free_bulk(req_cachep, *nr, reqs); - io_ring_drop_ctx_refs(ctx, *nr); + percpu_ref_put_many(&ctx->refs, *nr); *nr = 0; } } @@ -663,7 +655,7 @@ static void __io_free_req(struct io_kiocb *req) { if (req->file && !(req->flags & REQ_F_FIXED_FILE)) fput(req->file); - io_ring_drop_ctx_refs(req->ctx, 1); + percpu_ref_put(&req->ctx->refs); kmem_cache_free(req_cachep, req); } @@ -2761,7 +2753,7 @@ out: if (link) io_queue_link_head(ctx, link, &link->submit, shadow_req, - block_for_last); + !block_for_last); if (statep) io_submit_state_end(statep); @@ -2920,8 +2912,12 @@ static void io_finish_async(struct io_ring_ctx *ctx) static void io_destruct_skb(struct sk_buff *skb) { struct io_ring_ctx *ctx = skb->sk->sk_user_data; + int i; + + for (i = 0; i < ARRAY_SIZE(ctx->sqo_wq); i++) + if (ctx->sqo_wq[i]) + flush_workqueue(ctx->sqo_wq[i]); - io_finish_async(ctx); unix_destruct_scm(skb); } @@ -3630,7 +3626,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, } } - io_ring_drop_ctx_refs(ctx, 1); + percpu_ref_put(&ctx->refs); out_fput: fdput(f); return submitted ? submitted : ret; diff --git a/fs/libfs.c b/fs/libfs.c index c9b2850c0f7c..540611b99b9a 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -89,58 +89,45 @@ int dcache_dir_close(struct inode *inode, struct file *file) EXPORT_SYMBOL(dcache_dir_close); /* parent is locked at least shared */ -static struct dentry *next_positive(struct dentry *parent, - struct list_head *from, - int count) +/* + * Returns an element of siblings' list. + * We are looking for <count>th positive after <p>; if + * found, dentry is grabbed and returned to caller. + * If no such element exists, NULL is returned. + */ +static struct dentry *scan_positives(struct dentry *cursor, + struct list_head *p, + loff_t count, + struct dentry *last) { - unsigned *seq = &parent->d_inode->i_dir_seq, n; - struct dentry *res; - struct list_head *p; - bool skipped; - int i; + struct dentry *dentry = cursor->d_parent, *found = NULL; -retry: - i = count; - skipped = false; - n = smp_load_acquire(seq) & ~1; - res = NULL; - rcu_read_lock(); - for (p = from->next; p != &parent->d_subdirs; p = p->next) { + spin_lock(&dentry->d_lock); + while ((p = p->next) != &dentry->d_subdirs) { struct dentry *d = list_entry(p, struct dentry, d_child); - if (!simple_positive(d)) { - skipped = true; - } else if (!--i) { - res = d; - break; + // we must at least skip cursors, to avoid livelocks + if (d->d_flags & DCACHE_DENTRY_CURSOR) + continue; + if (simple_positive(d) && !--count) { + spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED); + if (simple_positive(d)) + found = dget_dlock(d); + spin_unlock(&d->d_lock); + if (likely(found)) + break; + count = 1; + } + if (need_resched()) { + list_move(&cursor->d_child, p); + p = &cursor->d_child; + spin_unlock(&dentry->d_lock); + cond_resched(); + spin_lock(&dentry->d_lock); } } - rcu_read_unlock(); - if (skipped) { - smp_rmb(); - if (unlikely(*seq != n)) - goto retry; - } - return res; -} - -static void move_cursor(struct dentry *cursor, struct list_head *after) -{ - struct dentry *parent = cursor->d_parent; - unsigned n, *seq = &parent->d_inode->i_dir_seq; - spin_lock(&parent->d_lock); - for (;;) { - n = *seq; - if (!(n & 1) && cmpxchg(seq, n, n + 1) == n) - break; - cpu_relax(); - } - __list_del(cursor->d_child.prev, cursor->d_child.next); - if (after) - list_add(&cursor->d_child, after); - else - list_add_tail(&cursor->d_child, &parent->d_subdirs); - smp_store_release(seq, n + 2); - spin_unlock(&parent->d_lock); + spin_unlock(&dentry->d_lock); + dput(last); + return found; } loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) @@ -158,17 +145,25 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) return -EINVAL; } if (offset != file->f_pos) { + struct dentry *cursor = file->private_data; + struct dentry *to = NULL; + + inode_lock_shared(dentry->d_inode); + + if (offset > 2) + to = scan_positives(cursor, &dentry->d_subdirs, + offset - 2, NULL); + spin_lock(&dentry->d_lock); + if (to) + list_move(&cursor->d_child, &to->d_child); + else + list_del_init(&cursor->d_child); + spin_unlock(&dentry->d_lock); + dput(to); + file->f_pos = offset; - if (file->f_pos >= 2) { - struct dentry *cursor = file->private_data; - struct dentry *to; - loff_t n = file->f_pos - 2; - - inode_lock_shared(dentry->d_inode); - to = next_positive(dentry, &dentry->d_subdirs, n); - move_cursor(cursor, to ? &to->d_child : NULL); - inode_unlock_shared(dentry->d_inode); - } + + inode_unlock_shared(dentry->d_inode); } return offset; } @@ -190,25 +185,35 @@ int dcache_readdir(struct file *file, struct dir_context *ctx) { struct dentry *dentry = file->f_path.dentry; struct dentry *cursor = file->private_data; - struct list_head *p = &cursor->d_child; - struct dentry *next; - bool moved = false; + struct list_head *anchor = &dentry->d_subdirs; + struct dentry *next = NULL; + struct list_head *p; if (!dir_emit_dots(file, ctx)) return 0; if (ctx->pos == 2) - p = &dentry->d_subdirs; - while ((next = next_positive(dentry, p, 1)) != NULL) { + p = anchor; + else if (!list_empty(&cursor->d_child)) + p = &cursor->d_child; + else + return 0; + + while ((next = scan_positives(cursor, p, 1, next)) != NULL) { if (!dir_emit(ctx, next->d_name.name, next->d_name.len, d_inode(next)->i_ino, dt_type(d_inode(next)))) break; - moved = true; - p = &next->d_child; ctx->pos++; + p = &next->d_child; } - if (moved) - move_cursor(cursor, p); + spin_lock(&dentry->d_lock); + if (next) + list_move_tail(&cursor->d_child, &next->d_child); + else + list_del_init(&cursor->d_child); + spin_unlock(&dentry->d_lock); + dput(next); + return 0; } EXPORT_SYMBOL(dcache_readdir); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 222d7115db71..040a50fd9bf3 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -64,13 +64,6 @@ static struct kmem_cache *nfs_direct_cachep; -/* - * This represents a set of asynchronous requests that we're waiting on - */ -struct nfs_direct_mirror { - ssize_t count; -}; - struct nfs_direct_req { struct kref kref; /* release manager */ @@ -84,9 +77,6 @@ struct nfs_direct_req { atomic_t io_count; /* i/os we're waiting for */ spinlock_t lock; /* protect completion state */ - struct nfs_direct_mirror mirrors[NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX]; - int mirror_count; - loff_t io_start; /* Start offset for I/O */ ssize_t count, /* bytes actually processed */ max_count, /* max expected count */ @@ -123,32 +113,42 @@ static inline int put_dreq(struct nfs_direct_req *dreq) } static void -nfs_direct_good_bytes(struct nfs_direct_req *dreq, struct nfs_pgio_header *hdr) +nfs_direct_handle_truncated(struct nfs_direct_req *dreq, + const struct nfs_pgio_header *hdr, + ssize_t dreq_len) { - int i; - ssize_t count; + if (!(test_bit(NFS_IOHDR_ERROR, &hdr->flags) || + test_bit(NFS_IOHDR_EOF, &hdr->flags))) + return; + if (dreq->max_count >= dreq_len) { + dreq->max_count = dreq_len; + if (dreq->count > dreq_len) + dreq->count = dreq_len; + + if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) + dreq->error = hdr->error; + else /* Clear outstanding error if this is EOF */ + dreq->error = 0; + } +} - WARN_ON_ONCE(dreq->count >= dreq->max_count); +static void +nfs_direct_count_bytes(struct nfs_direct_req *dreq, + const struct nfs_pgio_header *hdr) +{ + loff_t hdr_end = hdr->io_start + hdr->good_bytes; + ssize_t dreq_len = 0; - if (dreq->mirror_count == 1) { - dreq->mirrors[hdr->pgio_mirror_idx].count += hdr->good_bytes; - dreq->count += hdr->good_bytes; - } else { - /* mirrored writes */ - count = dreq->mirrors[hdr->pgio_mirror_idx].count; - if (count + dreq->io_start < hdr->io_start + hdr->good_bytes) { - count = hdr->io_start + hdr->good_bytes - dreq->io_start; - dreq->mirrors[hdr->pgio_mirror_idx].count = count; - } - /* update the dreq->count by finding the minimum agreed count from all - * mirrors */ - count = dreq->mirrors[0].count; + if (hdr_end > dreq->io_start) + dreq_len = hdr_end - dreq->io_start; - for (i = 1; i < dreq->mirror_count; i++) - count = min(count, dreq->mirrors[i].count); + nfs_direct_handle_truncated(dreq, hdr, dreq_len); - dreq->count = count; - } + if (dreq_len > dreq->max_count) + dreq_len = dreq->max_count; + + if (dreq->count < dreq_len) + dreq->count = dreq_len; } /* @@ -293,18 +293,6 @@ void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo, cinfo->completion_ops = &nfs_direct_commit_completion_ops; } -static inline void nfs_direct_setup_mirroring(struct nfs_direct_req *dreq, - struct nfs_pageio_descriptor *pgio, - struct nfs_page *req) -{ - int mirror_count = 1; - - if (pgio->pg_ops->pg_get_mirror_count) - mirror_count = pgio->pg_ops->pg_get_mirror_count(pgio, req); - - dreq->mirror_count = mirror_count; -} - static inline struct nfs_direct_req *nfs_direct_req_alloc(void) { struct nfs_direct_req *dreq; @@ -319,7 +307,6 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void) INIT_LIST_HEAD(&dreq->mds_cinfo.list); dreq->verf.committed = NFS_INVALID_STABLE_HOW; /* not set yet */ INIT_WORK(&dreq->work, nfs_direct_write_schedule_work); - dreq->mirror_count = 1; spin_lock_init(&dreq->lock); return dreq; @@ -402,20 +389,12 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) struct nfs_direct_req *dreq = hdr->dreq; spin_lock(&dreq->lock); - if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) - dreq->error = hdr->error; - if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) { spin_unlock(&dreq->lock); goto out_put; } - if (hdr->good_bytes != 0) - nfs_direct_good_bytes(dreq, hdr); - - if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) - dreq->error = 0; - + nfs_direct_count_bytes(dreq, hdr); spin_unlock(&dreq->lock); while (!list_empty(&hdr->pages)) { @@ -646,29 +625,22 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) LIST_HEAD(reqs); struct nfs_commit_info cinfo; LIST_HEAD(failed); - int i; nfs_init_cinfo_from_dreq(&cinfo, dreq); nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo); dreq->count = 0; + dreq->max_count = 0; + list_for_each_entry(req, &reqs, wb_list) + dreq->max_count += req->wb_bytes; dreq->verf.committed = NFS_INVALID_STABLE_HOW; nfs_clear_pnfs_ds_commit_verifiers(&dreq->ds_cinfo); - for (i = 0; i < dreq->mirror_count; i++) - dreq->mirrors[i].count = 0; get_dreq(dreq); nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, false, &nfs_direct_write_completion_ops); desc.pg_dreq = dreq; - req = nfs_list_entry(reqs.next); - nfs_direct_setup_mirroring(dreq, &desc, req); - if (desc.pg_error < 0) { - list_splice_init(&reqs, &failed); - goto out_failed; - } - list_for_each_entry_safe(req, tmp, &reqs, wb_list) { /* Bump the transmission count */ req->wb_nio++; @@ -686,7 +658,6 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) } nfs_pageio_complete(&desc); -out_failed: while (!list_empty(&failed)) { req = nfs_list_entry(failed.next); nfs_list_remove_request(req); @@ -791,17 +762,13 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) nfs_init_cinfo_from_dreq(&cinfo, dreq); spin_lock(&dreq->lock); - - if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) - dreq->error = hdr->error; - if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) { spin_unlock(&dreq->lock); goto out_put; } + nfs_direct_count_bytes(dreq, hdr); if (hdr->good_bytes != 0) { - nfs_direct_good_bytes(dreq, hdr); if (nfs_write_need_commit(hdr)) { if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) request_commit = true; @@ -923,7 +890,6 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, break; } - nfs_direct_setup_mirroring(dreq, &desc, req); if (desc.pg_error < 0) { nfs_free_request(req); result = desc.pg_error; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 11eafcfc490b..ab8ca20fd579 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6106,6 +6106,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, status = nfs4_call_sync_custom(&task_setup_data); if (setclientid.sc_cred) { + kfree(clp->cl_acceptor); clp->cl_acceptor = rpcauth_stringify_acceptor(setclientid.sc_cred); put_rpccred(setclientid.sc_cred); } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 85ca49549b39..52cab65f91cf 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -786,7 +786,6 @@ static void nfs_inode_remove_request(struct nfs_page *req) struct nfs_inode *nfsi = NFS_I(inode); struct nfs_page *head; - atomic_long_dec(&nfsi->nrequests); if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) { head = req->wb_head; @@ -799,8 +798,10 @@ static void nfs_inode_remove_request(struct nfs_page *req) spin_unlock(&mapping->private_lock); } - if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) + if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) { nfs_release_request(req); + atomic_long_dec(&nfsi->nrequests); + } } static void diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 8de1c9d644f6..9cd0a6815933 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -2049,7 +2049,8 @@ out_write_size: inode->i_mtime = inode->i_ctime = current_time(inode); di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec); di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); - ocfs2_update_inode_fsync_trans(handle, inode, 1); + if (handle) + ocfs2_update_inode_fsync_trans(handle, inode, 1); } if (handle) ocfs2_journal_dirty(handle, wc->w_di_bh); @@ -2146,13 +2147,30 @@ static int ocfs2_dio_wr_get_block(struct inode *inode, sector_t iblock, struct ocfs2_dio_write_ctxt *dwc = NULL; struct buffer_head *di_bh = NULL; u64 p_blkno; - loff_t pos = iblock << inode->i_sb->s_blocksize_bits; + unsigned int i_blkbits = inode->i_sb->s_blocksize_bits; + loff_t pos = iblock << i_blkbits; + sector_t endblk = (i_size_read(inode) - 1) >> i_blkbits; unsigned len, total_len = bh_result->b_size; int ret = 0, first_get_block = 0; len = osb->s_clustersize - (pos & (osb->s_clustersize - 1)); len = min(total_len, len); + /* + * bh_result->b_size is count in get_more_blocks according to write + * "pos" and "end", we need map twice to return different buffer state: + * 1. area in file size, not set NEW; + * 2. area out file size, set NEW. + * + * iblock endblk + * |--------|---------|---------|--------- + * |<-------area in file------->| + */ + + if ((iblock <= endblk) && + ((iblock + ((len - 1) >> i_blkbits)) > endblk)) + len = (endblk - iblock + 1) << i_blkbits; + mlog(0, "get block of %lu at %llu:%u req %u\n", inode->i_ino, pos, len, total_len); @@ -2236,6 +2254,9 @@ static int ocfs2_dio_wr_get_block(struct inode *inode, sector_t iblock, if (desc->c_needs_zero) set_buffer_new(bh_result); + if (iblock > endblk) + set_buffer_new(bh_result); + /* May sleep in end_io. It should not happen in a irq context. So defer * it to dio work queue. */ set_buffer_defer_completion(bh_result); diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index d6f7b299eb23..efeea208fdeb 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -283,7 +283,7 @@ static int ocfs2_info_scan_inode_alloc(struct ocfs2_super *osb, if (inode_alloc) inode_lock(inode_alloc); - if (o2info_coherent(&fi->ifi_req)) { + if (inode_alloc && o2info_coherent(&fi->ifi_req)) { status = ocfs2_inode_lock(inode_alloc, &bh, 0); if (status < 0) { mlog_errno(status); diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 90c830e3758e..d8507972ee13 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -1490,18 +1490,6 @@ static int ocfs2_xa_check_space(struct ocfs2_xa_loc *loc, return loc->xl_ops->xlo_check_space(loc, xi); } -static void ocfs2_xa_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) -{ - loc->xl_ops->xlo_add_entry(loc, name_hash); - loc->xl_entry->xe_name_hash = cpu_to_le32(name_hash); - /* - * We can't leave the new entry's xe_name_offset at zero or - * add_namevalue() will go nuts. We set it to the size of our - * storage so that it can never be less than any other entry. - */ - loc->xl_entry->xe_name_offset = cpu_to_le16(loc->xl_size); -} - static void ocfs2_xa_add_namevalue(struct ocfs2_xa_loc *loc, struct ocfs2_xattr_info *xi) { @@ -2133,29 +2121,31 @@ static int ocfs2_xa_prepare_entry(struct ocfs2_xa_loc *loc, if (rc) goto out; - if (loc->xl_entry) { - if (ocfs2_xa_can_reuse_entry(loc, xi)) { - orig_value_size = loc->xl_entry->xe_value_size; - rc = ocfs2_xa_reuse_entry(loc, xi, ctxt); - if (rc) - goto out; - goto alloc_value; - } + if (!loc->xl_entry) { + rc = -EINVAL; + goto out; + } - if (!ocfs2_xattr_is_local(loc->xl_entry)) { - orig_clusters = ocfs2_xa_value_clusters(loc); - rc = ocfs2_xa_value_truncate(loc, 0, ctxt); - if (rc) { - mlog_errno(rc); - ocfs2_xa_cleanup_value_truncate(loc, - "overwriting", - orig_clusters); - goto out; - } + if (ocfs2_xa_can_reuse_entry(loc, xi)) { + orig_value_size = loc->xl_entry->xe_value_size; + rc = ocfs2_xa_reuse_entry(loc, xi, ctxt); + if (rc) + goto out; + goto alloc_value; + } + + if (!ocfs2_xattr_is_local(loc->xl_entry)) { + orig_clusters = ocfs2_xa_value_clusters(loc); + rc = ocfs2_xa_value_truncate(loc, 0, ctxt); + if (rc) { + mlog_errno(rc); + ocfs2_xa_cleanup_value_truncate(loc, + "overwriting", + orig_clusters); + goto out; } - ocfs2_xa_wipe_namevalue(loc); - } else - ocfs2_xa_add_entry(loc, name_hash); + } + ocfs2_xa_wipe_namevalue(loc); /* * If we get here, we have a blank entry. Fill it. We grow our diff --git a/fs/readdir.c b/fs/readdir.c index 19bea591c3f1..6e2623e57b2e 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -27,53 +27,13 @@ /* * Note the "unsafe_put_user() semantics: we goto a * label for errors. - * - * Also note how we use a "while()" loop here, even though - * only the biggest size needs to loop. The compiler (well, - * at least gcc) is smart enough to turn the smaller sizes - * into just if-statements, and this way we don't need to - * care whether 'u64' or 'u32' is the biggest size. - */ -#define unsafe_copy_loop(dst, src, len, type, label) \ - while (len >= sizeof(type)) { \ - unsafe_put_user(get_unaligned((type *)src), \ - (type __user *)dst, label); \ - dst += sizeof(type); \ - src += sizeof(type); \ - len -= sizeof(type); \ - } - -/* - * We avoid doing 64-bit copies on 32-bit architectures. They - * might be better, but the component names are mostly small, - * and the 64-bit cases can end up being much more complex and - * put much more register pressure on the code, so it's likely - * not worth the pain of unaligned accesses etc. - * - * So limit the copies to "unsigned long" size. I did verify - * that at least the x86-32 case is ok without this limiting, - * but I worry about random other legacy 32-bit cases that - * might not do as well. - */ -#define unsafe_copy_type(dst, src, len, type, label) do { \ - if (sizeof(type) <= sizeof(unsigned long)) \ - unsafe_copy_loop(dst, src, len, type, label); \ -} while (0) - -/* - * Copy the dirent name to user space, and NUL-terminate - * it. This should not be a function call, since we're doing - * the copy inside a "user_access_begin/end()" section. */ #define unsafe_copy_dirent_name(_dst, _src, _len, label) do { \ char __user *dst = (_dst); \ const char *src = (_src); \ size_t len = (_len); \ - unsafe_copy_type(dst, src, len, u64, label); \ - unsafe_copy_type(dst, src, len, u32, label); \ - unsafe_copy_type(dst, src, len, u16, label); \ - unsafe_copy_type(dst, src, len, u8, label); \ - unsafe_put_user(0, dst, label); \ + unsafe_put_user(0, dst+len, label); \ + unsafe_copy_to_user(dst, src, len, label); \ } while (0) diff --git a/fs/super.c b/fs/super.c index f627b7c53d2b..cfadab2cbf35 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1300,6 +1300,7 @@ int get_tree_bdev(struct fs_context *fc, mutex_lock(&bdev->bd_fsfreeze_mutex); if (bdev->bd_fsfreeze_count > 0) { mutex_unlock(&bdev->bd_fsfreeze_mutex); + blkdev_put(bdev, mode); warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev); return -EBUSY; } @@ -1308,8 +1309,10 @@ int get_tree_bdev(struct fs_context *fc, fc->sget_key = bdev; s = sget_fc(fc, test_bdev_super_fc, set_bdev_super_fc); mutex_unlock(&bdev->bd_fsfreeze_mutex); - if (IS_ERR(s)) + if (IS_ERR(s)) { + blkdev_put(bdev, mode); return PTR_ERR(s); + } if (s->s_root) { /* Don't summarily change the RO/RW state. */ diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index 5de296b34ab1..14fbdf22b7e7 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -28,12 +28,11 @@ xfs_get_aghdr_buf( struct xfs_mount *mp, xfs_daddr_t blkno, size_t numblks, - int flags, const struct xfs_buf_ops *ops) { struct xfs_buf *bp; - bp = xfs_buf_get_uncached(mp->m_ddev_targp, numblks, flags); + bp = xfs_buf_get_uncached(mp->m_ddev_targp, numblks, 0); if (!bp) return NULL; @@ -345,7 +344,7 @@ xfs_ag_init_hdr( { struct xfs_buf *bp; - bp = xfs_get_aghdr_buf(mp, id->daddr, id->numblks, 0, ops); + bp = xfs_get_aghdr_buf(mp, id->daddr, id->numblks, ops); if (!bp) return -ENOMEM; diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index b9f019603d0b..f0089e862216 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -826,32 +826,17 @@ xfs_attr_shortform_to_leaf( sf = (xfs_attr_shortform_t *)tmpbuffer; xfs_idata_realloc(dp, -size, XFS_ATTR_FORK); - xfs_bmap_local_to_extents_empty(dp, XFS_ATTR_FORK); + xfs_bmap_local_to_extents_empty(args->trans, dp, XFS_ATTR_FORK); bp = NULL; error = xfs_da_grow_inode(args, &blkno); - if (error) { - /* - * If we hit an IO error middle of the transaction inside - * grow_inode(), we may have inconsistent data. Bail out. - */ - if (error == -EIO) - goto out; - xfs_idata_realloc(dp, size, XFS_ATTR_FORK); /* try to put */ - memcpy(ifp->if_u1.if_data, tmpbuffer, size); /* it back */ + if (error) goto out; - } ASSERT(blkno == 0); error = xfs_attr3_leaf_create(args, blkno, &bp); - if (error) { - /* xfs_attr3_leaf_create may not have instantiated a block */ - if (bp && (xfs_da_shrink_inode(args, 0, bp) != 0)) - goto out; - xfs_idata_realloc(dp, size, XFS_ATTR_FORK); /* try to put */ - memcpy(ifp->if_u1.if_data, tmpbuffer, size); /* it back */ + if (error) goto out; - } memset((char *)&nargs, 0, sizeof(nargs)); nargs.dp = dp; diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 4edc25a2ba80..02469d59c787 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -792,6 +792,7 @@ out_root_realloc: */ void xfs_bmap_local_to_extents_empty( + struct xfs_trans *tp, struct xfs_inode *ip, int whichfork) { @@ -808,6 +809,7 @@ xfs_bmap_local_to_extents_empty( ifp->if_u1.if_root = NULL; ifp->if_height = 0; XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); } @@ -840,7 +842,7 @@ xfs_bmap_local_to_extents( ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); if (!ifp->if_bytes) { - xfs_bmap_local_to_extents_empty(ip, whichfork); + xfs_bmap_local_to_extents_empty(tp, ip, whichfork); flags = XFS_ILOG_CORE; goto done; } @@ -887,7 +889,7 @@ xfs_bmap_local_to_extents( /* account for the change in fork size */ xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); - xfs_bmap_local_to_extents_empty(ip, whichfork); + xfs_bmap_local_to_extents_empty(tp, ip, whichfork); flags |= XFS_ILOG_CORE; ifp->if_u1.if_root = NULL; diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 5bb446d80542..e2798c6f3a5f 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -182,7 +182,8 @@ void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno, xfs_filblks_t len); int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd); int xfs_bmap_set_attrforkoff(struct xfs_inode *ip, int size, int *version); -void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork); +void xfs_bmap_local_to_extents_empty(struct xfs_trans *tp, + struct xfs_inode *ip, int whichfork); void __xfs_bmap_add_free(struct xfs_trans *tp, xfs_fsblock_t bno, xfs_filblks_t len, const struct xfs_owner_info *oinfo, bool skip_discard); diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c index 9595ced393dc..49e4bc39e7bb 100644 --- a/fs/xfs/libxfs/xfs_dir2_block.c +++ b/fs/xfs/libxfs/xfs_dir2_block.c @@ -1096,7 +1096,7 @@ xfs_dir2_sf_to_block( memcpy(sfp, oldsfp, ifp->if_bytes); xfs_idata_realloc(dp, -ifp->if_bytes, XFS_DATA_FORK); - xfs_bmap_local_to_extents_empty(dp, XFS_DATA_FORK); + xfs_bmap_local_to_extents_empty(tp, dp, XFS_DATA_FORK); dp->i_d.di_size = 0; /* diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c index 93b3793bc5b3..0cab11a5d390 100644 --- a/fs/xfs/scrub/refcount.c +++ b/fs/xfs/scrub/refcount.c @@ -341,7 +341,6 @@ xchk_refcountbt_rec( xfs_extlen_t len; xfs_nlink_t refcount; bool has_cowflag; - int error = 0; bno = be32_to_cpu(rec->refc.rc_startblock); len = be32_to_cpu(rec->refc.rc_blockcount); @@ -366,7 +365,7 @@ xchk_refcountbt_rec( xchk_refcountbt_xref(bs->sc, bno, len, refcount); - return error; + return 0; } /* Make sure we have as many refc blocks as the rmap says. */ diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 0910cb75b65d..4f443703065e 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -864,6 +864,7 @@ xfs_alloc_file_space( xfs_filblks_t allocatesize_fsb; xfs_extlen_t extsz, temp; xfs_fileoff_t startoffset_fsb; + xfs_fileoff_t endoffset_fsb; int nimaps; int quota_flag; int rt; @@ -891,7 +892,8 @@ xfs_alloc_file_space( imapp = &imaps[0]; nimaps = 1; startoffset_fsb = XFS_B_TO_FSBT(mp, offset); - allocatesize_fsb = XFS_B_TO_FSB(mp, count); + endoffset_fsb = XFS_B_TO_FSB(mp, offset + count); + allocatesize_fsb = endoffset_fsb - startoffset_fsb; /* * Allocate file space until done or until there is an error diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 21c243622a79..0abba171aa89 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -345,6 +345,15 @@ xfs_buf_allocate_memory( unsigned short page_count, i; xfs_off_t start, end; int error; + xfs_km_flags_t kmflag_mask = 0; + + /* + * assure zeroed buffer for non-read cases. + */ + if (!(flags & XBF_READ)) { + kmflag_mask |= KM_ZERO; + gfp_mask |= __GFP_ZERO; + } /* * for buffers that are contained within a single page, just allocate @@ -354,7 +363,8 @@ xfs_buf_allocate_memory( size = BBTOB(bp->b_length); if (size < PAGE_SIZE) { int align_mask = xfs_buftarg_dma_alignment(bp->b_target); - bp->b_addr = kmem_alloc_io(size, align_mask, KM_NOFS); + bp->b_addr = kmem_alloc_io(size, align_mask, + KM_NOFS | kmflag_mask); if (!bp->b_addr) { /* low memory - use alloc_page loop instead */ goto use_alloc_page; diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index a2beee9f74da..641d07f30a27 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1443,7 +1443,7 @@ xlog_alloc_log( prev_iclog = iclog; iclog->ic_data = kmem_alloc_io(log->l_iclog_size, align_mask, - KM_MAYFAIL); + KM_MAYFAIL | KM_ZERO); if (!iclog->ic_data) goto out_free_iclog; #ifdef DEBUG diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 508319039dce..c1a514ffff55 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -127,7 +127,7 @@ xlog_alloc_buffer( if (nbblks > 1 && log->l_sectBBsize > 1) nbblks += log->l_sectBBsize; nbblks = round_up(nbblks, log->l_sectBBsize); - return kmem_alloc_io(BBTOB(nbblks), align_mask, KM_MAYFAIL); + return kmem_alloc_io(BBTOB(nbblks), align_mask, KM_MAYFAIL | KM_ZERO); } /* diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index 6b318efd8a74..cdf016596659 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -40,6 +40,7 @@ # define __GCC4_has_attribute___noclone__ 1 # define __GCC4_has_attribute___nonstring__ 0 # define __GCC4_has_attribute___no_sanitize_address__ (__GNUC_MINOR__ >= 8) +# define __GCC4_has_attribute___fallthrough__ 0 #endif /* @@ -186,6 +187,22 @@ #endif /* + * Add the pseudo keyword 'fallthrough' so case statement blocks + * must end with any of these keywords: + * break; + * fallthrough; + * goto <label>; + * return [expression]; + * + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Statement-Attributes.html#Statement-Attributes + */ +#if __has_attribute(__fallthrough__) +# define fallthrough __attribute__((__fallthrough__)) +#else +# define fallthrough do {} while (0) /* fallthrough */ +#endif + +/* * Note the missing underscores. * * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-noinline-function-attribute diff --git a/include/linux/export.h b/include/linux/export.h index 95f55b7f83a0..621158ecd2e2 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -52,10 +52,10 @@ extern struct module __this_module; __ADDRESSABLE(sym) \ asm(" .section \"___ksymtab" sec "+" #sym "\", \"a\" \n" \ " .balign 4 \n" \ - "__ksymtab_" #sym NS_SEPARATOR #ns ": \n" \ + "__ksymtab_" #ns NS_SEPARATOR #sym ": \n" \ " .long " #sym "- . \n" \ " .long __kstrtab_" #sym "- . \n" \ - " .long __kstrtab_ns_" #sym "- . \n" \ + " .long __kstrtabns_" #sym "- . \n" \ " .previous \n") #define __KSYMTAB_ENTRY(sym, sec) \ @@ -76,10 +76,10 @@ struct kernel_symbol { #else #define __KSYMTAB_ENTRY_NS(sym, sec, ns) \ static const struct kernel_symbol __ksymtab_##sym##__##ns \ - asm("__ksymtab_" #sym NS_SEPARATOR #ns) \ + asm("__ksymtab_" #ns NS_SEPARATOR #sym) \ __attribute__((section("___ksymtab" sec "+" #sym), used)) \ __aligned(sizeof(void *)) \ - = { (unsigned long)&sym, __kstrtab_##sym, __kstrtab_ns_##sym } + = { (unsigned long)&sym, __kstrtab_##sym, __kstrtabns_##sym } #define __KSYMTAB_ENTRY(sym, sec) \ static const struct kernel_symbol __ksymtab_##sym \ @@ -112,7 +112,7 @@ struct kernel_symbol { /* For every exported symbol, place a struct in the __ksymtab section */ #define ___EXPORT_SYMBOL_NS(sym, sec, ns) \ ___export_symbol_common(sym, sec); \ - static const char __kstrtab_ns_##sym[] \ + static const char __kstrtabns_##sym[] \ __attribute__((section("__ksymtab_strings"), used, aligned(1))) \ = #ns; \ __KSYMTAB_ENTRY_NS(sym, sec, ns) diff --git a/include/linux/leds.h b/include/linux/leds.h index b8df71193329..efb309dba914 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -247,7 +247,7 @@ extern void led_set_brightness(struct led_classdev *led_cdev, /** * led_set_brightness_sync - set LED brightness synchronously * @led_cdev: the LED to set - * @brightness: the brightness to set it to + * @value: the brightness to set it to * * Set an LED's brightness immediately. This function will block * the caller for the time required for accessing device registers, @@ -301,8 +301,7 @@ extern void led_sysfs_enable(struct led_classdev *led_cdev); /** * led_compose_name - compose LED class device name * @dev: LED controller device object - * @child: child fwnode_handle describing a LED or a group of synchronized LEDs; - * it must be provided only for fwnode based LEDs + * @init_data: the LED class device initialization data * @led_classdev_name: composed LED class device name * * Create LED class device name basing on the provided init_data argument. diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 9b60863429cc..ae703ea3ef48 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -356,6 +356,19 @@ static inline bool mem_cgroup_disabled(void) return !cgroup_subsys_enabled(memory_cgrp_subsys); } +static inline unsigned long mem_cgroup_protection(struct mem_cgroup *memcg, + bool in_low_reclaim) +{ + if (mem_cgroup_disabled()) + return 0; + + if (in_low_reclaim) + return READ_ONCE(memcg->memory.emin); + + return max(READ_ONCE(memcg->memory.emin), + READ_ONCE(memcg->memory.elow)); +} + enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root, struct mem_cgroup *memcg); @@ -537,6 +550,8 @@ void mem_cgroup_handle_over_high(void); unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg); +unsigned long mem_cgroup_size(struct mem_cgroup *memcg); + void mem_cgroup_print_oom_context(struct mem_cgroup *memcg, struct task_struct *p); @@ -829,6 +844,12 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm, { } +static inline unsigned long mem_cgroup_protection(struct mem_cgroup *memcg, + bool in_low_reclaim) +{ + return 0; +} + static inline enum mem_cgroup_protection mem_cgroup_protected( struct mem_cgroup *root, struct mem_cgroup *memcg) { @@ -968,6 +989,11 @@ static inline unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg) return 0; } +static inline unsigned long mem_cgroup_size(struct mem_cgroup *memcg) +{ + return 0; +} + static inline void mem_cgroup_print_oom_context(struct mem_cgroup *memcg, struct task_struct *p) { @@ -1264,6 +1290,9 @@ void mem_cgroup_track_foreign_dirty_slowpath(struct page *page, static inline void mem_cgroup_track_foreign_dirty(struct page *page, struct bdi_writeback *wb) { + if (mem_cgroup_disabled()) + return; + if (unlikely(&page->mem_cgroup->css != wb->memcg_css)) mem_cgroup_track_foreign_dirty_slowpath(page, wb); } diff --git a/include/linux/slab.h b/include/linux/slab.h index ab2b98ad76e1..4d2a2fa55ed5 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -493,6 +493,10 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags) * kmalloc is the normal method of allocating memory * for objects smaller than page size in the kernel. * + * The allocated object address is aligned to at least ARCH_KMALLOC_MINALIGN + * bytes. For @size of power of two bytes, the alignment is also guaranteed + * to be at least to the size. + * * The @flags argument may be one of the GFP flags defined at * include/linux/gfp.h and described at * :ref:`Documentation/core-api/mm-api.rst <mm-api-gfp-flags>` diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index 7638dbe7bc50..a940de03808d 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -61,6 +61,7 @@ struct sock_xprt { struct mutex recv_mutex; struct sockaddr_storage srcaddr; unsigned short srcport; + int xprt_err; /* * UDP socket buffer size parameters diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index e47d0522a1f4..d4ee6e942562 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -355,8 +355,10 @@ extern long strnlen_unsafe_user(const void __user *unsafe_addr, long count); #ifndef user_access_begin #define user_access_begin(ptr,len) access_ok(ptr, len) #define user_access_end() do { } while (0) -#define unsafe_get_user(x, ptr, err) do { if (unlikely(__get_user(x, ptr))) goto err; } while (0) -#define unsafe_put_user(x, ptr, err) do { if (unlikely(__put_user(x, ptr))) goto err; } while (0) +#define unsafe_op_wrap(op, err) do { if (unlikely(op)) goto err; } while (0) +#define unsafe_get_user(x,p,e) unsafe_op_wrap(__get_user(x,p),e) +#define unsafe_put_user(x,p,e) unsafe_op_wrap(__put_user(x,p),e) +#define unsafe_copy_to_user(d,s,l,e) unsafe_op_wrap(__copy_to_user(d,s,l),e) static inline unsigned long user_access_save(void) { return 0UL; } static inline void user_access_restore(unsigned long flags) { } #endif diff --git a/kernel/fork.c b/kernel/fork.c index 1f6c45f6a734..bcdf53125210 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2925,7 +2925,7 @@ int sysctl_max_threads(struct ctl_table *table, int write, struct ctl_table t; int ret; int threads = max_threads; - int min = MIN_THREADS; + int min = 1; int max = MAX_THREADS; t = *table; @@ -2937,7 +2937,7 @@ int sysctl_max_threads(struct ctl_table *table, int write, if (ret || !write) return ret; - set_max_threads(threads); + max_threads = threads; return 0; } diff --git a/kernel/freezer.c b/kernel/freezer.c index c0738424bb43..dc520f01f99d 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -22,12 +22,6 @@ EXPORT_SYMBOL(system_freezing_cnt); bool pm_freezing; bool pm_nosig_freezing; -/* - * Temporary export for the deadlock workaround in ata_scsi_hotplug(). - * Remove once the hack becomes unnecessary. - */ -EXPORT_SYMBOL_GPL(pm_freezing); - /* protects freezing and frozen transitions */ static DEFINE_SPINLOCK(freezer_lock); diff --git a/kernel/panic.c b/kernel/panic.c index 47e8ebccc22b..f470a038b05b 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -180,6 +180,7 @@ void panic(const char *fmt, ...) * after setting panic_cpu) from invoking panic() again. */ local_irq_disable(); + preempt_disable_notrace(); /* * It's possible to come here directly from a panic-assertion and diff --git a/lib/vdso/Kconfig b/lib/vdso/Kconfig index cc00364bd2c2..9fe698ff62ec 100644 --- a/lib/vdso/Kconfig +++ b/lib/vdso/Kconfig @@ -24,13 +24,4 @@ config GENERIC_COMPAT_VDSO help This config option enables the compat VDSO layer. -config CROSS_COMPILE_COMPAT_VDSO - string "32 bit Toolchain prefix for compat vDSO" - default "" - depends on GENERIC_COMPAT_VDSO - help - Defines the cross-compiler prefix for compiling compat vDSO. - If a 64 bit compiler (i.e. x86_64) can compile the VDSO for - 32 bit, it does not need to define this parameter. - endif diff --git a/mm/backing-dev.c b/mm/backing-dev.c index d9daa3e422d0..c360f6a6c844 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -239,8 +239,8 @@ static int __init default_bdi_init(void) { int err; - bdi_wq = alloc_workqueue("writeback", WQ_MEM_RECLAIM | WQ_FREEZABLE | - WQ_UNBOUND | WQ_SYSFS, 0); + bdi_wq = alloc_workqueue("writeback", WQ_MEM_RECLAIM | WQ_UNBOUND | + WQ_SYSFS, 0); if (!bdi_wq) return -ENOMEM; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c313c49074ca..bdac56009a38 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1567,6 +1567,11 @@ unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg) return max; } +unsigned long mem_cgroup_size(struct mem_cgroup *memcg) +{ + return page_counter_read(&memcg->memory); +} + static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, int order) { diff --git a/mm/memremap.c b/mm/memremap.c index 32c79b51af86..68204912cc0a 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -13,8 +13,6 @@ #include <linux/xarray.h> static DEFINE_XARRAY(pgmap_array); -#define SECTION_MASK ~((1UL << PA_SECTION_SHIFT) - 1) -#define SECTION_SIZE (1UL << PA_SECTION_SHIFT) #ifdef CONFIG_DEV_PAGEMAP_OPS DEFINE_STATIC_KEY_FALSE(devmap_managed_key); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 15c2050c629b..c0b2e0306720 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1175,11 +1175,17 @@ static __always_inline bool free_pages_prepare(struct page *page, debug_check_no_obj_freed(page_address(page), PAGE_SIZE << order); } - arch_free_page(page, order); if (want_init_on_free()) kernel_init_free_pages(page, 1 << order); kernel_poison_pages(page, 1 << order, 0); + /* + * arch_free_page() can make the page's contents inaccessible. s390 + * does this. So nothing which can access the page's contents should + * happen after this. + */ + arch_free_page(page, order); + if (debug_pagealloc_enabled()) kernel_map_pages(page, 1 << order, 0); diff --git a/mm/shmem.c b/mm/shmem.c index cd570cc79c76..220be9fa2c41 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -3482,6 +3482,12 @@ static int shmem_parse_options(struct fs_context *fc, void *data) { char *options = data; + if (options) { + int err = security_sb_eat_lsm_opts(options, &fc->security); + if (err) + return err; + } + while (options != NULL) { char *this_char = options; for (;;) { diff --git a/mm/shuffle.c b/mm/shuffle.c index 3ce12481b1dc..b3fe97fd6654 100644 --- a/mm/shuffle.c +++ b/mm/shuffle.c @@ -33,7 +33,7 @@ __meminit void page_alloc_shuffle(enum mm_shuffle_ctl ctl) } static bool shuffle_param; -extern int shuffle_show(char *buffer, const struct kernel_param *kp) +static int shuffle_show(char *buffer, const struct kernel_param *kp) { return sprintf(buffer, "%c\n", test_bit(SHUFFLE_ENABLE, &shuffle_state) ? 'Y' : 'N'); diff --git a/mm/slab_common.c b/mm/slab_common.c index 6491c3a41805..c29f03adca91 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -1030,10 +1030,19 @@ void __init create_boot_cache(struct kmem_cache *s, const char *name, unsigned int useroffset, unsigned int usersize) { int err; + unsigned int align = ARCH_KMALLOC_MINALIGN; s->name = name; s->size = s->object_size = size; - s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size); + + /* + * For power of two sizes, guarantee natural alignment for kmalloc + * caches, regardless of SL*B debugging options. + */ + if (is_power_of_2(size)) + align = max(align, size); + s->align = calculate_alignment(flags, align, size); + s->useroffset = useroffset; s->usersize = usersize; @@ -1287,12 +1296,16 @@ void __init create_kmalloc_caches(slab_flags_t flags) */ void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) { - void *ret; + void *ret = NULL; struct page *page; flags |= __GFP_COMP; page = alloc_pages(flags, order); - ret = page ? page_address(page) : NULL; + if (likely(page)) { + ret = page_address(page); + mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE, + 1 << order); + } ret = kasan_kmalloc_large(ret, size, flags); /* As ret might get tagged, call kmemleak hook after KASAN. */ kmemleak_alloc(ret, size, 1, flags); diff --git a/mm/slob.c b/mm/slob.c index cf377beab962..fa53e9f73893 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -190,7 +190,7 @@ static int slob_last(slob_t *s) static void *slob_new_pages(gfp_t gfp, int order, int node) { - void *page; + struct page *page; #ifdef CONFIG_NUMA if (node != NUMA_NO_NODE) @@ -202,14 +202,21 @@ static void *slob_new_pages(gfp_t gfp, int order, int node) if (!page) return NULL; + mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE, + 1 << order); return page_address(page); } static void slob_free_pages(void *b, int order) { + struct page *sp = virt_to_page(b); + if (current->reclaim_state) current->reclaim_state->reclaimed_slab += 1 << order; - free_pages((unsigned long)b, order); + + mod_node_page_state(page_pgdat(sp), NR_SLAB_UNRECLAIMABLE, + -(1 << order)); + __free_pages(sp, order); } /* @@ -217,6 +224,7 @@ static void slob_free_pages(void *b, int order) * @sp: Page to look in. * @size: Size of the allocation. * @align: Allocation alignment. + * @align_offset: Offset in the allocated block that will be aligned. * @page_removed_from_list: Return parameter. * * Tries to find a chunk of memory at least @size bytes big within @page. @@ -227,7 +235,7 @@ static void slob_free_pages(void *b, int order) * true (set to false otherwise). */ static void *slob_page_alloc(struct page *sp, size_t size, int align, - bool *page_removed_from_list) + int align_offset, bool *page_removed_from_list) { slob_t *prev, *cur, *aligned = NULL; int delta = 0, units = SLOB_UNITS(size); @@ -236,8 +244,17 @@ static void *slob_page_alloc(struct page *sp, size_t size, int align, for (prev = NULL, cur = sp->freelist; ; prev = cur, cur = slob_next(cur)) { slobidx_t avail = slob_units(cur); + /* + * 'aligned' will hold the address of the slob block so that the + * address 'aligned'+'align_offset' is aligned according to the + * 'align' parameter. This is for kmalloc() which prepends the + * allocated block with its size, so that the block itself is + * aligned when needed. + */ if (align) { - aligned = (slob_t *)ALIGN((unsigned long)cur, align); + aligned = (slob_t *) + (ALIGN((unsigned long)cur + align_offset, align) + - align_offset); delta = aligned - cur; } if (avail >= units + delta) { /* room enough? */ @@ -281,7 +298,8 @@ static void *slob_page_alloc(struct page *sp, size_t size, int align, /* * slob_alloc: entry point into the slob allocator. */ -static void *slob_alloc(size_t size, gfp_t gfp, int align, int node) +static void *slob_alloc(size_t size, gfp_t gfp, int align, int node, + int align_offset) { struct page *sp; struct list_head *slob_list; @@ -312,7 +330,7 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node) if (sp->units < SLOB_UNITS(size)) continue; - b = slob_page_alloc(sp, size, align, &page_removed_from_list); + b = slob_page_alloc(sp, size, align, align_offset, &page_removed_from_list); if (!b) continue; @@ -349,7 +367,7 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node) INIT_LIST_HEAD(&sp->slab_list); set_slob(b, SLOB_UNITS(PAGE_SIZE), b + SLOB_UNITS(PAGE_SIZE)); set_slob_page_free(sp, slob_list); - b = slob_page_alloc(sp, size, align, &_unused); + b = slob_page_alloc(sp, size, align, align_offset, &_unused); BUG_ON(!b); spin_unlock_irqrestore(&slob_lock, flags); } @@ -451,7 +469,7 @@ static __always_inline void * __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller) { unsigned int *m; - int align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); + int minalign = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); void *ret; gfp &= gfp_allowed_mask; @@ -459,19 +477,28 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller) fs_reclaim_acquire(gfp); fs_reclaim_release(gfp); - if (size < PAGE_SIZE - align) { + if (size < PAGE_SIZE - minalign) { + int align = minalign; + + /* + * For power of two sizes, guarantee natural alignment for + * kmalloc()'d objects. + */ + if (is_power_of_2(size)) + align = max(minalign, (int) size); + if (!size) return ZERO_SIZE_PTR; - m = slob_alloc(size + align, gfp, align, node); + m = slob_alloc(size + minalign, gfp, align, node, minalign); if (!m) return NULL; *m = size; - ret = (void *)m + align; + ret = (void *)m + minalign; trace_kmalloc_node(caller, ret, - size, size + align, gfp, node); + size, size + minalign, gfp, node); } else { unsigned int order = get_order(size); @@ -521,8 +548,13 @@ void kfree(const void *block) int align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); unsigned int *m = (unsigned int *)(block - align); slob_free(m, *m + align); - } else - __free_pages(sp, compound_order(sp)); + } else { + unsigned int order = compound_order(sp); + mod_node_page_state(page_pgdat(sp), NR_SLAB_UNRECLAIMABLE, + -(1 << order)); + __free_pages(sp, order); + + } } EXPORT_SYMBOL(kfree); @@ -567,7 +599,7 @@ static void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node) fs_reclaim_release(flags); if (c->size < PAGE_SIZE) { - b = slob_alloc(c->size, flags, c->align, node); + b = slob_alloc(c->size, flags, c->align, node, 0); trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size, SLOB_UNITS(c->size) * SLOB_UNIT, flags, node); diff --git a/mm/slub.c b/mm/slub.c index 42c1b3af3c98..3d63ae320d31 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3821,11 +3821,15 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node) { struct page *page; void *ptr = NULL; + unsigned int order = get_order(size); flags |= __GFP_COMP; - page = alloc_pages_node(node, flags, get_order(size)); - if (page) + page = alloc_pages_node(node, flags, order); + if (page) { ptr = page_address(page); + mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE, + 1 << order); + } return kmalloc_large_node_hook(ptr, size, flags); } @@ -3951,9 +3955,13 @@ void kfree(const void *x) page = virt_to_head_page(x); if (unlikely(!PageSlab(page))) { + unsigned int order = compound_order(page); + BUG_ON(!PageCompound(page)); kfree_hook(object); - __free_pages(page, compound_order(page)); + mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE, + -(1 << order)); + __free_pages(page, order); return; } slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_); diff --git a/mm/sparse.c b/mm/sparse.c index bf32de9e666b..f6891c1992b1 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -219,7 +219,7 @@ static inline unsigned long first_present_section_nr(void) return next_present_section_nr(-1); } -void subsection_mask_set(unsigned long *map, unsigned long pfn, +static void subsection_mask_set(unsigned long *map, unsigned long pfn, unsigned long nr_pages) { int idx = subsection_map_index(pfn); diff --git a/mm/vmpressure.c b/mm/vmpressure.c index f3b50811497a..4bac22fe1aa2 100644 --- a/mm/vmpressure.c +++ b/mm/vmpressure.c @@ -355,6 +355,9 @@ void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio) * "hierarchy" or "local"). * * To be used as memcg event method. + * + * Return: 0 on success, -ENOMEM on memory failure or -EINVAL if @args could + * not be parsed. */ int vmpressure_register_event(struct mem_cgroup *memcg, struct eventfd_ctx *eventfd, const char *args) @@ -362,7 +365,7 @@ int vmpressure_register_event(struct mem_cgroup *memcg, struct vmpressure *vmpr = memcg_to_vmpressure(memcg); struct vmpressure_event *ev; enum vmpressure_modes mode = VMPRESSURE_NO_PASSTHROUGH; - enum vmpressure_levels level = -1; + enum vmpressure_levels level; char *spec, *spec_orig; char *token; int ret = 0; @@ -375,20 +378,18 @@ int vmpressure_register_event(struct mem_cgroup *memcg, /* Find required level */ token = strsep(&spec, ","); - level = match_string(vmpressure_str_levels, VMPRESSURE_NUM_LEVELS, token); - if (level < 0) { - ret = level; + ret = match_string(vmpressure_str_levels, VMPRESSURE_NUM_LEVELS, token); + if (ret < 0) goto out; - } + level = ret; /* Find optional mode */ token = strsep(&spec, ","); if (token) { - mode = match_string(vmpressure_str_modes, VMPRESSURE_NUM_MODES, token); - if (mode < 0) { - ret = mode; + ret = match_string(vmpressure_str_modes, VMPRESSURE_NUM_MODES, token); + if (ret < 0) goto out; - } + mode = ret; } ev = kzalloc(sizeof(*ev), GFP_KERNEL); @@ -404,6 +405,7 @@ int vmpressure_register_event(struct mem_cgroup *memcg, mutex_lock(&vmpr->events_lock); list_add(&ev->node, &vmpr->events); mutex_unlock(&vmpr->events_lock); + ret = 0; out: kfree(spec_orig); return ret; diff --git a/mm/vmscan.c b/mm/vmscan.c index e5d52d6a24af..c6659bb758a4 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2459,17 +2459,70 @@ out: *lru_pages = 0; for_each_evictable_lru(lru) { int file = is_file_lru(lru); - unsigned long size; + unsigned long lruvec_size; unsigned long scan; + unsigned long protection; + + lruvec_size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx); + protection = mem_cgroup_protection(memcg, + sc->memcg_low_reclaim); + + if (protection) { + /* + * Scale a cgroup's reclaim pressure by proportioning + * its current usage to its memory.low or memory.min + * setting. + * + * This is important, as otherwise scanning aggression + * becomes extremely binary -- from nothing as we + * approach the memory protection threshold, to totally + * nominal as we exceed it. This results in requiring + * setting extremely liberal protection thresholds. It + * also means we simply get no protection at all if we + * set it too low, which is not ideal. + * + * If there is any protection in place, we reduce scan + * pressure by how much of the total memory used is + * within protection thresholds. + * + * There is one special case: in the first reclaim pass, + * we skip over all groups that are within their low + * protection. If that fails to reclaim enough pages to + * satisfy the reclaim goal, we come back and override + * the best-effort low protection. However, we still + * ideally want to honor how well-behaved groups are in + * that case instead of simply punishing them all + * equally. As such, we reclaim them based on how much + * memory they are using, reducing the scan pressure + * again by how much of the total memory used is under + * hard protection. + */ + unsigned long cgroup_size = mem_cgroup_size(memcg); + + /* Avoid TOCTOU with earlier protection check */ + cgroup_size = max(cgroup_size, protection); + + scan = lruvec_size - lruvec_size * protection / + cgroup_size; + + /* + * Minimally target SWAP_CLUSTER_MAX pages to keep + * reclaim moving forwards, avoiding decremeting + * sc->priority further than desirable. + */ + scan = max(scan, SWAP_CLUSTER_MAX); + } else { + scan = lruvec_size; + } + + scan >>= sc->priority; - size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx); - scan = size >> sc->priority; /* * If the cgroup's already been deleted, make sure to * scrape out the remaining cache. */ if (!scan && !mem_cgroup_online(memcg)) - scan = min(size, SWAP_CLUSTER_MAX); + scan = min(lruvec_size, SWAP_CLUSTER_MAX); switch (scan_balance) { case SCAN_EQUAL: @@ -2489,7 +2542,7 @@ out: case SCAN_ANON: /* Scan one type exclusively */ if ((scan_balance == SCAN_FILE) != file) { - size = 0; + lruvec_size = 0; scan = 0; } break; @@ -2498,7 +2551,7 @@ out: BUG(); } - *lru_pages += size; + *lru_pages += lruvec_size; nr[lru] = scan; } } @@ -2742,6 +2795,13 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc) memcg_memory_event(memcg, MEMCG_LOW); break; case MEMCG_PROT_NONE: + /* + * All protection thresholds breached. We may + * still choose to vary the scan pressure + * applied based on by how much the cgroup in + * question has exceeded its protection + * thresholds (see get_scan_count). + */ break; } diff --git a/mm/z3fold.c b/mm/z3fold.c index 05bdf90646e7..6d3d3f698ebb 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -998,9 +998,11 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) struct z3fold_header *zhdr; struct page *page; enum buddy bud; + bool page_claimed; zhdr = handle_to_z3fold_header(handle); page = virt_to_page(zhdr); + page_claimed = test_and_set_bit(PAGE_CLAIMED, &page->private); if (test_bit(PAGE_HEADLESS, &page->private)) { /* if a headless page is under reclaim, just leave. @@ -1008,7 +1010,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) * has not been set before, we release this page * immediately so we don't care about its value any more. */ - if (!test_and_set_bit(PAGE_CLAIMED, &page->private)) { + if (!page_claimed) { spin_lock(&pool->lock); list_del(&page->lru); spin_unlock(&pool->lock); @@ -1044,13 +1046,15 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) atomic64_dec(&pool->pages_nr); return; } - if (test_bit(PAGE_CLAIMED, &page->private)) { + if (page_claimed) { + /* the page has not been claimed by us */ z3fold_page_unlock(zhdr); return; } if (unlikely(PageIsolated(page)) || test_and_set_bit(NEEDS_COMPACTING, &page->private)) { z3fold_page_unlock(zhdr); + clear_bit(PAGE_CLAIMED, &page->private); return; } if (zhdr->cpu < 0 || !cpu_online(zhdr->cpu)) { @@ -1060,10 +1064,12 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) zhdr->cpu = -1; kref_get(&zhdr->refcount); do_compact_page(zhdr, true); + clear_bit(PAGE_CLAIMED, &page->private); return; } kref_get(&zhdr->refcount); queue_work_on(zhdr->cpu, pool->compact_wq, &zhdr->work); + clear_bit(PAGE_CLAIMED, &page->private); z3fold_page_unlock(zhdr); } diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index e41ed2e0ae7d..48d63956a68c 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2155,7 +2155,7 @@ static enum sctp_ierror sctp_verify_param(struct net *net, case SCTP_PARAM_SET_PRIMARY: if (ep->asconf_enable) break; - goto fallthrough; + goto unhandled; case SCTP_PARAM_HOST_NAME_ADDRESS: /* Tell the peer, we won't support this param. */ @@ -2166,11 +2166,11 @@ static enum sctp_ierror sctp_verify_param(struct net *net, case SCTP_PARAM_FWD_TSN_SUPPORT: if (ep->prsctp_enable) break; - goto fallthrough; + goto unhandled; case SCTP_PARAM_RANDOM: if (!ep->auth_enable) - goto fallthrough; + goto unhandled; /* SCTP-AUTH: Secion 6.1 * If the random number is not 32 byte long the association @@ -2187,7 +2187,7 @@ static enum sctp_ierror sctp_verify_param(struct net *net, case SCTP_PARAM_CHUNKS: if (!ep->auth_enable) - goto fallthrough; + goto unhandled; /* SCTP-AUTH: Section 3.2 * The CHUNKS parameter MUST be included once in the INIT or @@ -2203,7 +2203,7 @@ static enum sctp_ierror sctp_verify_param(struct net *net, case SCTP_PARAM_HMAC_ALGO: if (!ep->auth_enable) - goto fallthrough; + goto unhandled; hmacs = (struct sctp_hmac_algo_param *)param.p; n_elt = (ntohs(param.p->length) - @@ -2226,7 +2226,7 @@ static enum sctp_ierror sctp_verify_param(struct net *net, retval = SCTP_IERROR_ABORT; } break; -fallthrough: +unhandled: default: pr_debug("%s: unrecognized param:%d for chunk:%d\n", __func__, ntohs(param.p->type), cid); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 9ac88722fa83..70e52f567b2a 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1249,19 +1249,21 @@ static void xs_error_report(struct sock *sk) { struct sock_xprt *transport; struct rpc_xprt *xprt; - int err; read_lock_bh(&sk->sk_callback_lock); if (!(xprt = xprt_from_sock(sk))) goto out; transport = container_of(xprt, struct sock_xprt, xprt); - err = -sk->sk_err; - if (err == 0) + transport->xprt_err = -sk->sk_err; + if (transport->xprt_err == 0) goto out; dprintk("RPC: xs_error_report client %p, error=%d...\n", - xprt, -err); - trace_rpc_socket_error(xprt, sk->sk_socket, err); + xprt, -transport->xprt_err); + trace_rpc_socket_error(xprt, sk->sk_socket, transport->xprt_err); + + /* barrier ensures xprt_err is set before XPRT_SOCK_WAKE_ERROR */ + smp_mb__before_atomic(); xs_run_error_worker(transport, XPRT_SOCK_WAKE_ERROR); out: read_unlock_bh(&sk->sk_callback_lock); @@ -2476,7 +2478,6 @@ static void xs_wake_write(struct sock_xprt *transport) static void xs_wake_error(struct sock_xprt *transport) { int sockerr; - int sockerr_len = sizeof(sockerr); if (!test_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state)) return; @@ -2485,9 +2486,7 @@ static void xs_wake_error(struct sock_xprt *transport) goto out; if (!test_and_clear_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state)) goto out; - if (kernel_getsockopt(transport->sock, SOL_SOCKET, SO_ERROR, - (char *)&sockerr, &sockerr_len) != 0) - goto out; + sockerr = xchg(&transport->xprt_err, 0); if (sockerr < 0) xprt_wake_pending_tasks(&transport->xprt, sockerr); out: diff --git a/scripts/coccinelle/misc/add_namespace.cocci b/scripts/coccinelle/misc/add_namespace.cocci index c832bb6445a8..99e93a6c2e24 100644 --- a/scripts/coccinelle/misc/add_namespace.cocci +++ b/scripts/coccinelle/misc/add_namespace.cocci @@ -6,6 +6,8 @@ /// add a missing namespace tag to a module source file. /// +virtual report + @has_ns_import@ declarer name MODULE_IMPORT_NS; identifier virtual.ns; diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 442d5e2ad688..936d3ad23c83 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -166,7 +166,7 @@ struct symbol { struct module *module; unsigned int crc; int crc_valid; - const char *namespace; + char *namespace; unsigned int weak:1; unsigned int vmlinux:1; /* 1 if symbol is defined in vmlinux */ unsigned int kernel:1; /* 1 if symbol is from kernel @@ -348,20 +348,18 @@ static enum export export_from_sec(struct elf_info *elf, unsigned int sec) return export_unknown; } -static const char *sym_extract_namespace(const char **symname) +static char *sym_extract_namespace(const char **symname) { - size_t n; - char *dupsymname; + char *namespace = NULL; + char *ns_separator; - n = strcspn(*symname, "."); - if (n < strlen(*symname) - 1) { - dupsymname = NOFAIL(strdup(*symname)); - dupsymname[n] = '\0'; - *symname = dupsymname; - return dupsymname + n + 1; + ns_separator = strchr(*symname, '.'); + if (ns_separator) { + namespace = NOFAIL(strndup(*symname, ns_separator - *symname)); + *symname = ns_separator + 1; } - return NULL; + return namespace; } /** @@ -375,7 +373,6 @@ static struct symbol *sym_add_exported(const char *name, const char *namespace, if (!s) { s = new_symbol(name, mod, export); - s->namespace = namespace; } else { if (!s->preloaded) { warn("%s: '%s' exported twice. Previous export was in %s%s\n", @@ -386,6 +383,8 @@ static struct symbol *sym_add_exported(const char *name, const char *namespace, s->module = mod; } } + free(s->namespace); + s->namespace = namespace ? strdup(namespace) : NULL; s->preloaded = 0; s->vmlinux = is_vmlinux(mod->name); s->kernel = 0; @@ -672,7 +671,8 @@ static void handle_modversions(struct module *mod, struct elf_info *info, unsigned int crc; enum export export; bool is_crc = false; - const char *name, *namespace; + const char *name; + char *namespace; if ((!is_vmlinux(mod->name) || mod->is_dot_o) && strstarts(symname, "__ksymtab")) @@ -747,6 +747,7 @@ static void handle_modversions(struct module *mod, struct elf_info *info, name = symname + strlen("__ksymtab_"); namespace = sym_extract_namespace(&name); sym_add_exported(name, namespace, mod, export); + free(namespace); } if (strcmp(symname, "init_module") == 0) mod->has_init = 1; @@ -2195,7 +2196,7 @@ static int check_exports(struct module *mod) else basename = mod->name; - if (exp->namespace) { + if (exp->namespace && exp->namespace[0]) { add_namespace(&mod->required_namespaces, exp->namespace); diff --git a/scripts/nsdeps b/scripts/nsdeps index ac2b6031dd13..3754dac13b31 100644 --- a/scripts/nsdeps +++ b/scripts/nsdeps @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/sh # SPDX-License-Identifier: GPL-2.0 # Linux kernel symbol namespace import generator # @@ -41,7 +41,7 @@ generate_deps() { for source_file in $mod_source_files; do sed '/MODULE_IMPORT_NS/Q' $source_file > ${source_file}.tmp offset=$(wc -l ${source_file}.tmp | awk '{print $1;}') - cat $source_file | grep MODULE_IMPORT_NS | sort -u >> ${source_file}.tmp + cat $source_file | grep MODULE_IMPORT_NS | LANG=C sort -u >> ${source_file}.tmp tail -n +$((offset +1)) ${source_file} | grep -v MODULE_IMPORT_NS >> ${source_file}.tmp if ! diff -q ${source_file} ${source_file}.tmp; then mv ${source_file}.tmp ${source_file} diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 3a29e7c24ba9..a5813c7629c1 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -1946,7 +1946,14 @@ static int convert_context(struct context *oldc, struct context *newc, void *p) rc = string_to_context_struct(args->newp, NULL, s, newc, SECSID_NULL); if (rc == -EINVAL) { - /* Retain string representation for later mapping. */ + /* + * Retain string representation for later mapping. + * + * IMPORTANT: We need to copy the contents of oldc->str + * back into s again because string_to_context_struct() + * may have garbled it. + */ + memcpy(s, oldc->str, oldc->len); context_init(newc); newc->str = s; newc->len = oldc->len; diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index c3feccb99ff5..4cdbae6f4e61 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -63,6 +63,13 @@ TARGETS += zram TARGETS_HOTPLUG = cpu-hotplug TARGETS_HOTPLUG += memory-hotplug +# User can optionally provide a TARGETS skiplist. +SKIP_TARGETS ?= +ifneq ($(SKIP_TARGETS),) + TMP := $(filter-out $(SKIP_TARGETS), $(TARGETS)) + override TARGETS := $(TMP) +endif + # Clear LDFLAGS and MAKEFLAGS if called from main # Makefile to avoid test build failures when test # Makefile doesn't have explicit build rules. @@ -171,9 +178,12 @@ run_pstore_crash: # 1. output_dir=kernel_src # 2. a separate output directory is specified using O= KBUILD_OUTPUT # 3. a separate output directory is specified using KBUILD_OUTPUT +# Avoid conflict with INSTALL_PATH set by the main Makefile # -INSTALL_PATH ?= $(BUILD)/install -INSTALL_PATH := $(abspath $(INSTALL_PATH)) +KSFT_INSTALL_PATH ?= $(BUILD)/kselftest_install +KSFT_INSTALL_PATH := $(abspath $(KSFT_INSTALL_PATH)) +# Avoid changing the rest of the logic here and lib.mk. +INSTALL_PATH := $(KSFT_INSTALL_PATH) ALL_SCRIPT := $(INSTALL_PATH)/run_kselftest.sh install: all @@ -198,11 +208,16 @@ ifdef INSTALL_PATH echo " cat /dev/null > \$$logfile" >> $(ALL_SCRIPT) echo "fi" >> $(ALL_SCRIPT) + @# While building run_kselftest.sh skip also non-existent TARGET dirs: + @# they could be the result of a build failure and should NOT be + @# included in the generated runlist. for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ + [ ! -d $$INSTALL_PATH/$$TARGET ] && echo "Skipping non-existent dir: $$TARGET" && continue; \ echo "[ -w /dev/kmsg ] && echo \"kselftest: Running tests in $$TARGET\" >> /dev/kmsg" >> $(ALL_SCRIPT); \ echo "cd $$TARGET" >> $(ALL_SCRIPT); \ echo -n "run_many" >> $(ALL_SCRIPT); \ + echo -n "Emit Tests for $$TARGET\n"; \ $(MAKE) -s --no-print-directory OUTPUT=$$BUILD_TARGET -C $$TARGET emit_tests >> $(ALL_SCRIPT); \ echo "" >> $(ALL_SCRIPT); \ echo "cd \$$ROOT" >> $(ALL_SCRIPT); \ diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh index 00c9020bdda8..84de7bc74f2c 100644 --- a/tools/testing/selftests/kselftest/runner.sh +++ b/tools/testing/selftests/kselftest/runner.sh @@ -3,9 +3,14 @@ # # Runs a set of tests in a given subdirectory. export skip_rc=4 +export timeout_rc=124 export logfile=/dev/stdout export per_test_logging= +# Defaults for "settings" file fields: +# "timeout" how many seconds to let each test run before failing. +export kselftest_default_timeout=45 + # There isn't a shell-agnostic way to find the path of a sourced file, # so we must rely on BASE_DIR being set to find other tools. if [ -z "$BASE_DIR" ]; then @@ -24,6 +29,16 @@ tap_prefix() fi } +tap_timeout() +{ + # Make sure tests will time out if utility is available. + if [ -x /usr/bin/timeout ] ; then + /usr/bin/timeout "$kselftest_timeout" "$1" + else + "$1" + fi +} + run_one() { DIR="$1" @@ -32,6 +47,18 @@ run_one() BASENAME_TEST=$(basename $TEST) + # Reset any "settings"-file variables. + export kselftest_timeout="$kselftest_default_timeout" + # Load per-test-directory kselftest "settings" file. + settings="$BASE_DIR/$DIR/settings" + if [ -r "$settings" ] ; then + while read line ; do + field=$(echo "$line" | cut -d= -f1) + value=$(echo "$line" | cut -d= -f2-) + eval "kselftest_$field"="$value" + done < "$settings" + fi + TEST_HDR_MSG="selftests: $DIR: $BASENAME_TEST" echo "# $TEST_HDR_MSG" if [ ! -x "$TEST" ]; then @@ -44,14 +71,17 @@ run_one() echo "not ok $test_num $TEST_HDR_MSG" else cd `dirname $TEST` > /dev/null - (((((./$BASENAME_TEST 2>&1; echo $? >&3) | + ((((( tap_timeout ./$BASENAME_TEST 2>&1; echo $? >&3) | tap_prefix >&4) 3>&1) | (read xs; exit $xs)) 4>>"$logfile" && echo "ok $test_num $TEST_HDR_MSG") || - (if [ $? -eq $skip_rc ]; then \ + (rc=$?; \ + if [ $rc -eq $skip_rc ]; then \ echo "not ok $test_num $TEST_HDR_MSG # SKIP" + elif [ $rc -eq $timeout_rc ]; then \ + echo "not ok $test_num $TEST_HDR_MSG # TIMEOUT" else - echo "not ok $test_num $TEST_HDR_MSG" + echo "not ok $test_num $TEST_HDR_MSG # exit=$rc" fi) cd - >/dev/null fi diff --git a/tools/testing/selftests/kselftest_install.sh b/tools/testing/selftests/kselftest_install.sh index ec304463883c..e2e1911d62d5 100755 --- a/tools/testing/selftests/kselftest_install.sh +++ b/tools/testing/selftests/kselftest_install.sh @@ -24,12 +24,12 @@ main() echo "$0: Installing in specified location - $install_loc ..." fi - install_dir=$install_loc/kselftest + install_dir=$install_loc/kselftest_install # Create install directory mkdir -p $install_dir # Build tests - INSTALL_PATH=$install_dir make install + KSFT_INSTALL_PATH=$install_dir make install } main "$@" diff --git a/tools/testing/selftests/powerpc/mm/tlbie_test.c b/tools/testing/selftests/powerpc/mm/tlbie_test.c index 9868a5ddd847..f85a0938ab25 100644 --- a/tools/testing/selftests/powerpc/mm/tlbie_test.c +++ b/tools/testing/selftests/powerpc/mm/tlbie_test.c @@ -636,7 +636,7 @@ int main(int argc, char *argv[]) nrthreads = strtoul(optarg, NULL, 10); break; case 'l': - strncpy(logdir, optarg, LOGDIR_NAME_SIZE); + strncpy(logdir, optarg, LOGDIR_NAME_SIZE - 1); break; case 't': run_time = strtoul(optarg, NULL, 10); diff --git a/tools/testing/selftests/rtc/settings b/tools/testing/selftests/rtc/settings new file mode 100644 index 000000000000..ba4d85f74cd6 --- /dev/null +++ b/tools/testing/selftests/rtc/settings @@ -0,0 +1 @@ +timeout=90 diff --git a/tools/testing/selftests/watchdog/watchdog-test.c b/tools/testing/selftests/watchdog/watchdog-test.c index afff120c7be6..f45e510500c0 100644 --- a/tools/testing/selftests/watchdog/watchdog-test.c +++ b/tools/testing/selftests/watchdog/watchdog-test.c @@ -19,7 +19,7 @@ int fd; const char v = 'V'; -static const char sopts[] = "bdehp:t:Tn:NLf:"; +static const char sopts[] = "bdehp:t:Tn:NLf:i"; static const struct option lopts[] = { {"bootstatus", no_argument, NULL, 'b'}, {"disable", no_argument, NULL, 'd'}, @@ -32,6 +32,7 @@ static const struct option lopts[] = { {"getpretimeout", no_argument, NULL, 'N'}, {"gettimeleft", no_argument, NULL, 'L'}, {"file", required_argument, NULL, 'f'}, + {"info", no_argument, NULL, 'i'}, {NULL, no_argument, NULL, 0x0} }; @@ -72,6 +73,7 @@ static void usage(char *progname) printf("Usage: %s [options]\n", progname); printf(" -f, --file\t\tOpen watchdog device file\n"); printf("\t\t\tDefault is /dev/watchdog\n"); + printf(" -i, --info\t\tShow watchdog_info\n"); printf(" -b, --bootstatus\tGet last boot status (Watchdog/POR)\n"); printf(" -d, --disable\t\tTurn off the watchdog timer\n"); printf(" -e, --enable\t\tTurn on the watchdog timer\n"); @@ -97,6 +99,7 @@ int main(int argc, char *argv[]) int c; int oneshot = 0; char *file = "/dev/watchdog"; + struct watchdog_info info; setbuf(stdout, NULL); @@ -118,6 +121,16 @@ int main(int argc, char *argv[]) exit(-1); } + /* + * Validate that `file` is a watchdog device + */ + ret = ioctl(fd, WDIOC_GETSUPPORT, &info); + if (ret) { + printf("WDIOC_GETSUPPORT error '%s'\n", strerror(errno)); + close(fd); + exit(ret); + } + optind = 0; while ((c = getopt_long(argc, argv, sopts, lopts, NULL)) != -1) { @@ -205,6 +218,18 @@ int main(int argc, char *argv[]) case 'f': /* Handled above */ break; + case 'i': + /* + * watchdog_info was obtained as part of file open + * validation. So we just show it here. + */ + oneshot = 1; + printf("watchdog_info:\n"); + printf(" identity:\t\t%s\n", info.identity); + printf(" firmware_version:\t%u\n", + info.firmware_version); + printf(" options:\t\t%08x\n", info.options); + break; default: usage(argv[0]); |