diff options
298 files changed, 3874 insertions, 1421 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 986e44387dad..2ba45caabada 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -653,6 +653,9 @@ cpuidle.off=1 [CPU_IDLE] disable the cpuidle sub-system + cpufreq.off=1 [CPU_FREQ] + disable the cpufreq sub-system + cpu_init_udelay=N [X86] Delay for N microsec between assert and de-assert of APIC INIT to start processors. This delay occurs @@ -1183,6 +1186,12 @@ functions that can be changed at run time by the set_graph_notrace file in the debugfs tracing directory. + ftrace_graph_max_depth=<uint> + [FTRACE] Used with the function graph tracer. This is + the max depth it will trace into a function. This value + can be changed at run time by the max_graph_depth file + in the tracefs tracing directory. default: 0 (no limit) + gamecon.map[2|3]= [HW,JOY] Multisystem joystick and NES/SNES/PSX pad support via parallel port (up to 5 devices per port) diff --git a/Documentation/dev-tools/kcov.rst b/Documentation/dev-tools/kcov.rst index 2c41b713841f..44886c91e112 100644 --- a/Documentation/dev-tools/kcov.rst +++ b/Documentation/dev-tools/kcov.rst @@ -10,7 +10,7 @@ Note that kcov does not aim to collect as much coverage as possible. It aims to collect more or less stable coverage that is function of syscall inputs. To achieve this goal it does not collect coverage in soft/hard interrupts and instrumentation of some inherently non-deterministic parts of kernel is -disbled (e.g. scheduler, locking). +disabled (e.g. scheduler, locking). Usage ----- diff --git a/Documentation/devicetree/bindings/regulator/ti-abb-regulator.txt b/Documentation/devicetree/bindings/regulator/ti-abb-regulator.txt index c3f6546ebac7..6a23ad9ac53a 100644 --- a/Documentation/devicetree/bindings/regulator/ti-abb-regulator.txt +++ b/Documentation/devicetree/bindings/regulator/ti-abb-regulator.txt @@ -45,7 +45,7 @@ Required Properties: Optional Properties: - reg-names: In addition to the required properties, the following are optional - "efuse-address" - Contains efuse base address used to pick up ABB info. - - "ldo-address" - Contains address of ABB LDO overide register address. + - "ldo-address" - Contains address of ABB LDO override register. "efuse-address" is required for this. - ti,ldovbb-vset-mask - Required if ldo-address is set, mask for LDO override register to provide override vset value. diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt index e4991fb1eedc..41ef9d8efe95 100644 --- a/Documentation/trace/kprobetrace.txt +++ b/Documentation/trace/kprobetrace.txt @@ -12,7 +12,7 @@ kprobes can probe (this means, all functions body except for __kprobes functions). Unlike the Tracepoint based event, this can be added and removed dynamically, on the fly. -To enable this feature, build your kernel with CONFIG_KPROBE_EVENT=y. +To enable this feature, build your kernel with CONFIG_KPROBE_EVENTS=y. Similar to the events tracer, this doesn't need to be activated via current_tracer. Instead of that, add probe points via diff --git a/Documentation/trace/uprobetracer.txt b/Documentation/trace/uprobetracer.txt index fa7b680ee8a0..bf526a7c5559 100644 --- a/Documentation/trace/uprobetracer.txt +++ b/Documentation/trace/uprobetracer.txt @@ -7,7 +7,7 @@ Overview -------- Uprobe based trace events are similar to kprobe based trace events. -To enable this feature, build your kernel with CONFIG_UPROBE_EVENT=y. +To enable this feature, build your kernel with CONFIG_UPROBE_EVENTS=y. Similar to the kprobe-event tracer, this doesn't need to be activated via current_tracer. Instead of that, add probe points via diff --git a/Documentation/vm/userfaultfd.txt b/Documentation/vm/userfaultfd.txt index 0e5543a920e5..bb2f945f87ab 100644 --- a/Documentation/vm/userfaultfd.txt +++ b/Documentation/vm/userfaultfd.txt @@ -172,10 +172,6 @@ the same read(2) protocol as for the page fault notifications. The manager has to explicitly enable these events by setting appropriate bits in uffdio_api.features passed to UFFDIO_API ioctl: -UFFD_FEATURE_EVENT_EXIT - enable notification about exit() of the -non-cooperative process. When the monitored process exits, the uffd -manager will get UFFD_EVENT_EXIT. - UFFD_FEATURE_EVENT_FORK - enable userfaultfd hooks for fork(). When this feature is enabled, the userfaultfd context of the parent process is duplicated into the newly created process. The manager receives diff --git a/arch/arc/include/asm/hugepage.h b/arch/arc/include/asm/hugepage.h index 317ff773e1ca..b18fcb606908 100644 --- a/arch/arc/include/asm/hugepage.h +++ b/arch/arc/include/asm/hugepage.h @@ -11,6 +11,7 @@ #define _ASM_ARC_HUGEPAGE_H #include <linux/types.h> +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> static inline pte_t pmd_pte(pmd_t pmd) diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index e94ca72b974e..ee22d40afef4 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -37,6 +37,7 @@ #include <asm/page.h> #include <asm/mmu.h> +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #include <linux/const.h> diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index a8d656d9aec7..1c462381c225 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -20,6 +20,7 @@ #else +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopud.h> #include <asm/memory.h> #include <asm/pgtable-hwdef.h> diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index ce18c91b50a1..f0325d96b97a 100644 --- a/arch/arm/xen/mm.c +++ b/arch/arm/xen/mm.c @@ -198,6 +198,8 @@ static const struct dma_map_ops xen_swiotlb_dma_ops = { .unmap_page = xen_swiotlb_unmap_page, .dma_supported = xen_swiotlb_dma_supported, .set_dma_mask = xen_swiotlb_set_dma_mask, + .mmap = xen_swiotlb_dma_mmap, + .get_sgtable = xen_swiotlb_get_sgtable, }; int __init xen_mm_init(void) diff --git a/arch/arm64/include/asm/pgtable-types.h b/arch/arm64/include/asm/pgtable-types.h index 69b2fd41503c..345a072b5856 100644 --- a/arch/arm64/include/asm/pgtable-types.h +++ b/arch/arm64/include/asm/pgtable-types.h @@ -55,9 +55,13 @@ typedef struct { pteval_t pgprot; } pgprot_t; #define __pgprot(x) ((pgprot_t) { (x) } ) #if CONFIG_PGTABLE_LEVELS == 2 +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #elif CONFIG_PGTABLE_LEVELS == 3 +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopud.h> +#elif CONFIG_PGTABLE_LEVELS == 4 +#include <asm-generic/5level-fixup.h> #endif #endif /* __ASM_PGTABLE_TYPES_H */ diff --git a/arch/avr32/include/asm/pgtable-2level.h b/arch/avr32/include/asm/pgtable-2level.h index 425dd567b5b9..d5b1c63993ec 100644 --- a/arch/avr32/include/asm/pgtable-2level.h +++ b/arch/avr32/include/asm/pgtable-2level.h @@ -8,6 +8,7 @@ #ifndef __ASM_AVR32_PGTABLE_2LEVEL_H #define __ASM_AVR32_PGTABLE_2LEVEL_H +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> /* diff --git a/arch/avr32/oprofile/backtrace.c b/arch/avr32/oprofile/backtrace.c index 75d9ad6f99cf..29cf2f191bfd 100644 --- a/arch/avr32/oprofile/backtrace.c +++ b/arch/avr32/oprofile/backtrace.c @@ -14,7 +14,7 @@ */ #include <linux/oprofile.h> -#include <linux/sched.h> +#include <linux/ptrace.h> #include <linux/uaccess.h> /* The first two words of each frame on the stack look like this if we have diff --git a/arch/cris/arch-v32/drivers/cryptocop.c b/arch/cris/arch-v32/drivers/cryptocop.c index ae6903d7fdbe..14970f11bbf2 100644 --- a/arch/cris/arch-v32/drivers/cryptocop.c +++ b/arch/cris/arch-v32/drivers/cryptocop.c @@ -2086,7 +2086,7 @@ static void cryptocop_job_queue_close(void) dma_in_cfg.en = regk_dma_no; REG_WR(dma, IN_DMA_INST, rw_cfg, dma_in_cfg); - /* Disble the cryptocop. */ + /* Disable the cryptocop. */ rw_cfg = REG_RD(strcop, regi_strcop, rw_cfg); rw_cfg.en = 0; REG_WR(strcop, regi_strcop, rw_cfg, rw_cfg); diff --git a/arch/cris/include/asm/pgtable.h b/arch/cris/include/asm/pgtable.h index 2a3210ba4c72..fa3a73004cc5 100644 --- a/arch/cris/include/asm/pgtable.h +++ b/arch/cris/include/asm/pgtable.h @@ -6,6 +6,7 @@ #define _CRIS_PGTABLE_H #include <asm/page.h> +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #ifndef __ASSEMBLY__ diff --git a/arch/frv/include/asm/pgtable.h b/arch/frv/include/asm/pgtable.h index a0513d463a1f..ab6e7e961b54 100644 --- a/arch/frv/include/asm/pgtable.h +++ b/arch/frv/include/asm/pgtable.h @@ -16,6 +16,7 @@ #ifndef _ASM_PGTABLE_H #define _ASM_PGTABLE_H +#include <asm-generic/5level-fixup.h> #include <asm/mem-layout.h> #include <asm/setup.h> #include <asm/processor.h> diff --git a/arch/h8300/include/asm/pgtable.h b/arch/h8300/include/asm/pgtable.h index 8341db67821d..7d265d28ba5e 100644 --- a/arch/h8300/include/asm/pgtable.h +++ b/arch/h8300/include/asm/pgtable.h @@ -1,5 +1,6 @@ #ifndef _H8300_PGTABLE_H #define _H8300_PGTABLE_H +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopud.h> #include <asm-generic/pgtable.h> #define pgtable_cache_init() do { } while (0) diff --git a/arch/h8300/kernel/ptrace_h.c b/arch/h8300/kernel/ptrace_h.c index fe3b5673baba..f5ff3b794c85 100644 --- a/arch/h8300/kernel/ptrace_h.c +++ b/arch/h8300/kernel/ptrace_h.c @@ -9,7 +9,7 @@ */ #include <linux/linkage.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> #include <asm/ptrace.h> #define BREAKINST 0x5730 /* trapa #3 */ diff --git a/arch/hexagon/include/asm/pgtable.h b/arch/hexagon/include/asm/pgtable.h index 49eab8136ec3..24a9177fb897 100644 --- a/arch/hexagon/include/asm/pgtable.h +++ b/arch/hexagon/include/asm/pgtable.h @@ -26,6 +26,7 @@ */ #include <linux/swap.h> #include <asm/page.h> +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> /* A handy thing to have if one has the RAM. Declared in head.S */ diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h index 384794e665fc..6cc22c8d8923 100644 --- a/arch/ia64/include/asm/pgtable.h +++ b/arch/ia64/include/asm/pgtable.h @@ -587,8 +587,10 @@ extern struct page *zero_page_memmap_ptr; #if CONFIG_PGTABLE_LEVELS == 3 +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopud.h> #endif +#include <asm-generic/5level-fixup.h> #include <asm-generic/pgtable.h> #endif /* _ASM_IA64_PGTABLE_H */ diff --git a/arch/metag/include/asm/pgtable.h b/arch/metag/include/asm/pgtable.h index ffa3a3a2ecad..0c151e5af079 100644 --- a/arch/metag/include/asm/pgtable.h +++ b/arch/metag/include/asm/pgtable.h @@ -6,6 +6,7 @@ #define _METAG_PGTABLE_H #include <asm/pgtable-bits.h> +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> /* Invalid regions on Meta: 0x00000000-0x001FFFFF and 0xFFFF0000-0xFFFFFFFF */ diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h index fd850879854d..d506bb0893f9 100644 --- a/arch/microblaze/include/asm/page.h +++ b/arch/microblaze/include/asm/page.h @@ -95,7 +95,8 @@ typedef struct { unsigned long pgd; } pgd_t; # else /* CONFIG_MMU */ typedef struct { unsigned long ste[64]; } pmd_t; typedef struct { pmd_t pue[1]; } pud_t; -typedef struct { pud_t pge[1]; } pgd_t; +typedef struct { pud_t p4e[1]; } p4d_t; +typedef struct { p4d_t pge[1]; } pgd_t; # endif /* CONFIG_MMU */ # define pte_val(x) ((x).pte) diff --git a/arch/mips/cavium-octeon/cpu.c b/arch/mips/cavium-octeon/cpu.c index a5b427909b5c..036d56cc4591 100644 --- a/arch/mips/cavium-octeon/cpu.c +++ b/arch/mips/cavium-octeon/cpu.c @@ -10,7 +10,9 @@ #include <linux/irqflags.h> #include <linux/notifier.h> #include <linux/prefetch.h> +#include <linux/ptrace.h> #include <linux/sched.h> +#include <linux/sched/task_stack.h> #include <asm/cop2.h> #include <asm/current.h> diff --git a/arch/mips/cavium-octeon/crypto/octeon-crypto.c b/arch/mips/cavium-octeon/crypto/octeon-crypto.c index 4d22365844af..cfb4a146cf17 100644 --- a/arch/mips/cavium-octeon/crypto/octeon-crypto.c +++ b/arch/mips/cavium-octeon/crypto/octeon-crypto.c @@ -9,6 +9,7 @@ #include <asm/cop2.h> #include <linux/export.h> #include <linux/interrupt.h> +#include <linux/sched/task_stack.h> #include "octeon-crypto.h" diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c index 4b94b7fbafa3..3de786545ded 100644 --- a/arch/mips/cavium-octeon/smp.c +++ b/arch/mips/cavium-octeon/smp.c @@ -12,6 +12,7 @@ #include <linux/kernel_stat.h> #include <linux/sched.h> #include <linux/sched/hotplug.h> +#include <linux/sched/task_stack.h> #include <linux/init.h> #include <linux/export.h> diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h index 321752bcbab6..f94455f964ec 100644 --- a/arch/mips/include/asm/fpu.h +++ b/arch/mips/include/asm/fpu.h @@ -12,6 +12,7 @@ #include <linux/sched.h> #include <linux/sched/task_stack.h> +#include <linux/ptrace.h> #include <linux/thread_info.h> #include <linux/bitops.h> diff --git a/arch/mips/include/asm/pgtable-32.h b/arch/mips/include/asm/pgtable-32.h index d21f3da7bdb6..6f94bed571c4 100644 --- a/arch/mips/include/asm/pgtable-32.h +++ b/arch/mips/include/asm/pgtable-32.h @@ -16,6 +16,7 @@ #include <asm/cachectl.h> #include <asm/fixmap.h> +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> extern int temp_tlb_entry; diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h index 514cbc0a6a67..130a2a6c1531 100644 --- a/arch/mips/include/asm/pgtable-64.h +++ b/arch/mips/include/asm/pgtable-64.h @@ -17,6 +17,7 @@ #include <asm/cachectl.h> #include <asm/fixmap.h> +#define __ARCH_USE_5LEVEL_HACK #if defined(CONFIG_PAGE_SIZE_64KB) && !defined(CONFIG_MIPS_VA_BITS_48) #include <asm-generic/pgtable-nopmd.h> #else diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c index 3daa2cae50b0..1b070a76fcdd 100644 --- a/arch/mips/kernel/smp-bmips.c +++ b/arch/mips/kernel/smp-bmips.c @@ -11,6 +11,7 @@ #include <linux/init.h> #include <linux/sched.h> #include <linux/sched/hotplug.h> +#include <linux/sched/task_stack.h> #include <linux/mm.h> #include <linux/delay.h> #include <linux/smp.h> diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c index e077ea3e11fb..e398cbc3d776 100644 --- a/arch/mips/kernel/smp-mt.c +++ b/arch/mips/kernel/smp-mt.c @@ -23,6 +23,7 @@ #include <linux/interrupt.h> #include <linux/irqchip/mips-gic.h> #include <linux/compiler.h> +#include <linux/sched/task_stack.h> #include <linux/smp.h> #include <linux/atomic.h> diff --git a/arch/mips/loongson64/loongson-3/cop2-ex.c b/arch/mips/loongson64/loongson-3/cop2-ex.c index ea13764d0a03..621d6af5f6eb 100644 --- a/arch/mips/loongson64/loongson-3/cop2-ex.c +++ b/arch/mips/loongson64/loongson-3/cop2-ex.c @@ -13,6 +13,7 @@ #include <linux/init.h> #include <linux/sched.h> #include <linux/notifier.h> +#include <linux/ptrace.h> #include <asm/fpu.h> #include <asm/cop2.h> diff --git a/arch/mips/netlogic/common/smp.c b/arch/mips/netlogic/common/smp.c index 10d86d54880a..bddf1ef553a4 100644 --- a/arch/mips/netlogic/common/smp.c +++ b/arch/mips/netlogic/common/smp.c @@ -35,6 +35,7 @@ #include <linux/kernel.h> #include <linux/delay.h> #include <linux/init.h> +#include <linux/sched/task_stack.h> #include <linux/smp.h> #include <linux/irq.h> diff --git a/arch/mips/netlogic/xlp/cop2-ex.c b/arch/mips/netlogic/xlp/cop2-ex.c index 52bc5de42005..21e439b3db70 100644 --- a/arch/mips/netlogic/xlp/cop2-ex.c +++ b/arch/mips/netlogic/xlp/cop2-ex.c @@ -9,11 +9,14 @@ * Copyright (C) 2009 Wind River Systems, * written by Ralf Baechle <ralf@linux-mips.org> */ +#include <linux/capability.h> #include <linux/init.h> #include <linux/irqflags.h> #include <linux/notifier.h> #include <linux/prefetch.h> +#include <linux/ptrace.h> #include <linux/sched.h> +#include <linux/sched/task_stack.h> #include <asm/cop2.h> #include <asm/current.h> diff --git a/arch/mips/sgi-ip22/ip28-berr.c b/arch/mips/sgi-ip22/ip28-berr.c index 1f2a5bc4779e..75460e1e106b 100644 --- a/arch/mips/sgi-ip22/ip28-berr.c +++ b/arch/mips/sgi-ip22/ip28-berr.c @@ -9,6 +9,7 @@ #include <linux/kernel.h> #include <linux/sched.h> #include <linux/sched/debug.h> +#include <linux/sched/signal.h> #include <linux/seq_file.h> #include <asm/addrspace.h> diff --git a/arch/mips/sgi-ip27/ip27-berr.c b/arch/mips/sgi-ip27/ip27-berr.c index d12879eb2b1f..83efe03d5c60 100644 --- a/arch/mips/sgi-ip27/ip27-berr.c +++ b/arch/mips/sgi-ip27/ip27-berr.c @@ -12,7 +12,9 @@ #include <linux/signal.h> /* for SIGBUS */ #include <linux/sched.h> /* schow_regs(), force_sig() */ #include <linux/sched/debug.h> +#include <linux/sched/signal.h> +#include <asm/ptrace.h> #include <asm/sn/addrs.h> #include <asm/sn/arch.h> #include <asm/sn/sn0/hub.h> diff --git a/arch/mips/sgi-ip27/ip27-smp.c b/arch/mips/sgi-ip27/ip27-smp.c index f5ed45e8f442..4cd47d23d81a 100644 --- a/arch/mips/sgi-ip27/ip27-smp.c +++ b/arch/mips/sgi-ip27/ip27-smp.c @@ -8,10 +8,13 @@ */ #include <linux/init.h> #include <linux/sched.h> +#include <linux/sched/task_stack.h> #include <linux/topology.h> #include <linux/nodemask.h> + #include <asm/page.h> #include <asm/processor.h> +#include <asm/ptrace.h> #include <asm/sn/arch.h> #include <asm/sn/gda.h> #include <asm/sn/intr.h> diff --git a/arch/mips/sgi-ip32/ip32-berr.c b/arch/mips/sgi-ip32/ip32-berr.c index 57d8c7486fe6..c1f12a9cf305 100644 --- a/arch/mips/sgi-ip32/ip32-berr.c +++ b/arch/mips/sgi-ip32/ip32-berr.c @@ -11,6 +11,7 @@ #include <linux/kernel.h> #include <linux/sched.h> #include <linux/sched/debug.h> +#include <linux/sched/signal.h> #include <asm/traps.h> #include <linux/uaccess.h> #include <asm/addrspace.h> diff --git a/arch/mips/sgi-ip32/ip32-reset.c b/arch/mips/sgi-ip32/ip32-reset.c index 8bd415c8729f..b3b442def423 100644 --- a/arch/mips/sgi-ip32/ip32-reset.c +++ b/arch/mips/sgi-ip32/ip32-reset.c @@ -13,6 +13,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/sched.h> +#include <linux/sched/signal.h> #include <linux/notifier.h> #include <linux/delay.h> #include <linux/rtc/ds1685.h> diff --git a/arch/mn10300/include/asm/page.h b/arch/mn10300/include/asm/page.h index 3810a6f740fd..dfe730a5ede0 100644 --- a/arch/mn10300/include/asm/page.h +++ b/arch/mn10300/include/asm/page.h @@ -57,6 +57,7 @@ typedef struct page *pgtable_t; #define __pgd(x) ((pgd_t) { (x) }) #define __pgprot(x) ((pgprot_t) { (x) }) +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #endif /* !__ASSEMBLY__ */ diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h index 298393c3cb42..db4f7d179220 100644 --- a/arch/nios2/include/asm/pgtable.h +++ b/arch/nios2/include/asm/pgtable.h @@ -22,6 +22,7 @@ #include <asm/tlbflush.h> #include <asm/pgtable-bits.h> +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #define FIRST_USER_ADDRESS 0UL diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h index 3567aa7be555..ff97374ca069 100644 --- a/arch/openrisc/include/asm/pgtable.h +++ b/arch/openrisc/include/asm/pgtable.h @@ -25,6 +25,7 @@ #ifndef __ASM_OPENRISC_PGTABLE_H #define __ASM_OPENRISC_PGTABLE_H +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 494091762bd7..97a8bc8a095c 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -80,93 +80,99 @@ config ARCH_HAS_DMA_SET_COHERENT_MASK config PPC bool default y - select BUILDTIME_EXTABLE_SORT + # + # Please keep this list sorted alphabetically. + # + select ARCH_HAS_DEVMEM_IS_ALLOWED + select ARCH_HAS_DMA_SET_COHERENT_MASK + select ARCH_HAS_ELF_RANDOMIZE + select ARCH_HAS_GCOV_PROFILE_ALL + select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE + select ARCH_HAS_SG_CHAIN + select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST + select ARCH_HAS_UBSAN_SANITIZE_ALL + select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO + select ARCH_SUPPORTS_ATOMIC_RMW + select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT + select ARCH_USE_BUILTIN_BSWAP + select ARCH_USE_CMPXCHG_LOCKREF if PPC64 + select ARCH_WANT_IPC_PARSE_VERSION select BINFMT_ELF - select ARCH_HAS_ELF_RANDOMIZE - select OF - select OF_EARLY_FLATTREE - select OF_RESERVED_MEM - select HAVE_FTRACE_MCOUNT_RECORD + select BUILDTIME_EXTABLE_SORT + select CLONE_BACKWARDS + select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN + select EDAC_ATOMIC_SCRUB + select EDAC_SUPPORT + select GENERIC_ATOMIC64 if PPC32 + select GENERIC_CLOCKEVENTS + select GENERIC_CLOCKEVENTS_BROADCAST if SMP + select GENERIC_CMOS_UPDATE + select GENERIC_CPU_AUTOPROBE + select GENERIC_IRQ_SHOW + select GENERIC_IRQ_SHOW_LEVEL + select GENERIC_SMP_IDLE_THREAD + select GENERIC_STRNCPY_FROM_USER + select GENERIC_STRNLEN_USER + select GENERIC_TIME_VSYSCALL_OLD + select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_HARDENED_USERCOPY + select HAVE_ARCH_JUMP_LABEL + select HAVE_ARCH_KGDB + select HAVE_ARCH_SECCOMP_FILTER + select HAVE_ARCH_TRACEHOOK + select HAVE_CBPF_JIT if !PPC64 + select HAVE_CONTEXT_TRACKING if PPC64 + select HAVE_DEBUG_KMEMLEAK + select HAVE_DEBUG_STACKOVERFLOW + select HAVE_DMA_API_DEBUG select HAVE_DYNAMIC_FTRACE - select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL - select HAVE_FUNCTION_TRACER + select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL + select HAVE_EBPF_JIT if PPC64 + select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU) + select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FUNCTION_TRACER select HAVE_GCC_PLUGINS - select SYSCTL_EXCEPTION_TRACE - select VIRT_TO_BUS if !PPC64 + select HAVE_GENERIC_RCU_GUP + select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx) select HAVE_IDE select HAVE_IOREMAP_PROT - select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU) + select HAVE_IRQ_EXIT_ON_IRQ_STACK + select HAVE_KERNEL_GZIP select HAVE_KPROBES - select HAVE_OPTPROBES if PPC64 - select HAVE_ARCH_KGDB select HAVE_KRETPROBES - select HAVE_ARCH_TRACEHOOK + select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP - select HAVE_DMA_API_DEBUG + select HAVE_MOD_ARCH_SPECIFIC + select HAVE_NMI if PERF_EVENTS select HAVE_OPROFILE - select HAVE_DEBUG_KMEMLEAK - select ARCH_HAS_SG_CHAIN - select GENERIC_ATOMIC64 if PPC32 + select HAVE_OPTPROBES if PPC64 select HAVE_PERF_EVENTS + select HAVE_PERF_EVENTS_NMI if PPC64 select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP + select HAVE_RCU_TABLE_FREE if SMP select HAVE_REGS_AND_STACK_ACCESS_API - select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx) - select ARCH_WANT_IPC_PARSE_VERSION - select SPARSE_IRQ + select HAVE_SYSCALL_TRACEPOINTS + select HAVE_VIRT_CPU_ACCOUNTING select IRQ_DOMAIN - select GENERIC_IRQ_SHOW - select GENERIC_IRQ_SHOW_LEVEL select IRQ_FORCED_THREADING - select HAVE_RCU_TABLE_FREE if SMP - select HAVE_SYSCALL_TRACEPOINTS - select HAVE_CBPF_JIT if !PPC64 - select HAVE_EBPF_JIT if PPC64 - select HAVE_ARCH_JUMP_LABEL - select ARCH_HAVE_NMI_SAFE_CMPXCHG - select ARCH_HAS_GCOV_PROFILE_ALL - select GENERIC_SMP_IDLE_THREAD - select GENERIC_CMOS_UPDATE - select GENERIC_TIME_VSYSCALL_OLD - select GENERIC_CLOCKEVENTS - select GENERIC_CLOCKEVENTS_BROADCAST if SMP - select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST - select GENERIC_STRNCPY_FROM_USER - select GENERIC_STRNLEN_USER - select HAVE_MOD_ARCH_SPECIFIC select MODULES_USE_ELF_RELA - select CLONE_BACKWARDS - select ARCH_USE_BUILTIN_BSWAP - select OLD_SIGSUSPEND - select OLD_SIGACTION if PPC32 - select HAVE_DEBUG_STACKOVERFLOW - select HAVE_IRQ_EXIT_ON_IRQ_STACK - select ARCH_USE_CMPXCHG_LOCKREF if PPC64 - select HAVE_ARCH_AUDITSYSCALL - select ARCH_SUPPORTS_ATOMIC_RMW - select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN select NO_BOOTMEM - select HAVE_GENERIC_RCU_GUP - select HAVE_PERF_EVENTS_NMI if PPC64 - select HAVE_NMI if PERF_EVENTS - select EDAC_SUPPORT - select EDAC_ATOMIC_SCRUB - select ARCH_HAS_DMA_SET_COHERENT_MASK - select ARCH_HAS_DEVMEM_IS_ALLOWED - select HAVE_ARCH_SECCOMP_FILTER - select ARCH_HAS_UBSAN_SANITIZE_ALL - select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT - select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS - select GENERIC_CPU_AUTOPROBE - select HAVE_VIRT_CPU_ACCOUNTING - select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE - select HAVE_ARCH_HARDENED_USERCOPY - select HAVE_KERNEL_GZIP - select HAVE_CONTEXT_TRACKING if PPC64 + select OF + select OF_EARLY_FLATTREE + select OF_RESERVED_MEM + select OLD_SIGACTION if PPC32 + select OLD_SIGSUSPEND + select SPARSE_IRQ + select SYSCTL_EXCEPTION_TRACE + select VIRT_TO_BUS if !PPC64 + # + # Please keep this list sorted alphabetically. + # config GENERIC_CSUM def_bool n diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 31286fa7873c..19b0d1a81959 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -72,8 +72,15 @@ GNUTARGET := powerpc MULTIPLEWORD := -mmultiple endif -cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mbig-endian) +ifdef CONFIG_PPC64 +cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mabi=elfv1) +cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mcall-aixdesc) +aflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mabi=elfv1) +aflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mabi=elfv2 +endif + cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mlittle-endian +cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mbig-endian) ifneq ($(cc-name),clang) cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mno-strict-align endif @@ -113,7 +120,9 @@ ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2,$(call cc-option,-mcall-aixdesc)) AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2) else +CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcall-aixdesc) +AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1) endif CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcmodel=medium,$(call cc-option,-mminimal-toc)) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mno-pointers-to-nested-functions) diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 012223638815..26ed228d4dc6 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -1,6 +1,7 @@ #ifndef _ASM_POWERPC_BOOK3S_32_PGTABLE_H #define _ASM_POWERPC_BOOK3S_32_PGTABLE_H +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #include <asm/book3s/32/hash.h> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 1eeeb72c7015..8f4d41936e5a 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1,9 +1,12 @@ #ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ #define _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ +#include <asm-generic/5level-fixup.h> + #ifndef __ASSEMBLY__ #include <linux/mmdebug.h> #endif + /* * Common bits between hash and Radix page table */ @@ -347,23 +350,58 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm, __r; \ }) +static inline int __pte_write(pte_t pte) +{ + return !!(pte_raw(pte) & cpu_to_be64(_PAGE_WRITE)); +} + +#ifdef CONFIG_NUMA_BALANCING +#define pte_savedwrite pte_savedwrite +static inline bool pte_savedwrite(pte_t pte) +{ + /* + * Saved write ptes are prot none ptes that doesn't have + * privileged bit sit. We mark prot none as one which has + * present and pviliged bit set and RWX cleared. To mark + * protnone which used to have _PAGE_WRITE set we clear + * the privileged bit. + */ + return !(pte_raw(pte) & cpu_to_be64(_PAGE_RWX | _PAGE_PRIVILEGED)); +} +#else +#define pte_savedwrite pte_savedwrite +static inline bool pte_savedwrite(pte_t pte) +{ + return false; +} +#endif + +static inline int pte_write(pte_t pte) +{ + return __pte_write(pte) || pte_savedwrite(pte); +} + #define __HAVE_ARCH_PTEP_SET_WRPROTECT static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - if ((pte_raw(*ptep) & cpu_to_be64(_PAGE_WRITE)) == 0) - return; - - pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0); + if (__pte_write(*ptep)) + pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0); + else if (unlikely(pte_savedwrite(*ptep))) + pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 0); } static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - if ((pte_raw(*ptep) & cpu_to_be64(_PAGE_WRITE)) == 0) - return; - - pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1); + /* + * We should not find protnone for hugetlb, but this complete the + * interface. + */ + if (__pte_write(*ptep)) + pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1); + else if (unlikely(pte_savedwrite(*ptep))) + pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 1); } #define __HAVE_ARCH_PTEP_GET_AND_CLEAR @@ -397,11 +435,6 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_update(mm, addr, ptep, ~0UL, 0, 0); } -static inline int pte_write(pte_t pte) -{ - return !!(pte_raw(pte) & cpu_to_be64(_PAGE_WRITE)); -} - static inline int pte_dirty(pte_t pte) { return !!(pte_raw(pte) & cpu_to_be64(_PAGE_DIRTY)); @@ -465,19 +498,12 @@ static inline pte_t pte_clear_savedwrite(pte_t pte) VM_BUG_ON(!pte_protnone(pte)); return __pte(pte_val(pte) | _PAGE_PRIVILEGED); } - -#define pte_savedwrite pte_savedwrite -static inline bool pte_savedwrite(pte_t pte) +#else +#define pte_clear_savedwrite pte_clear_savedwrite +static inline pte_t pte_clear_savedwrite(pte_t pte) { - /* - * Saved write ptes are prot none ptes that doesn't have - * privileged bit sit. We mark prot none as one which has - * present and pviliged bit set and RWX cleared. To mark - * protnone which used to have _PAGE_WRITE set we clear - * the privileged bit. - */ - VM_BUG_ON(!pte_protnone(pte)); - return !(pte_raw(pte) & cpu_to_be64(_PAGE_RWX | _PAGE_PRIVILEGED)); + VM_WARN_ON(1); + return __pte(pte_val(pte) & ~_PAGE_WRITE); } #endif /* CONFIG_NUMA_BALANCING */ @@ -506,6 +532,8 @@ static inline unsigned long pte_pfn(pte_t pte) /* Generic modifiers for PTE bits */ static inline pte_t pte_wrprotect(pte_t pte) { + if (unlikely(pte_savedwrite(pte))) + return pte_clear_savedwrite(pte); return __pte(pte_val(pte) & ~_PAGE_WRITE); } @@ -926,6 +954,7 @@ static inline int pmd_protnone(pmd_t pmd) #define __HAVE_ARCH_PMD_WRITE #define pmd_write(pmd) pte_write(pmd_pte(pmd)) +#define __pmd_write(pmd) __pte_write(pmd_pte(pmd)) #define pmd_savedwrite(pmd) pte_savedwrite(pmd_pte(pmd)) #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -982,11 +1011,10 @@ static inline int __pmdp_test_and_clear_young(struct mm_struct *mm, static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { - - if ((pmd_raw(*pmdp) & cpu_to_be64(_PAGE_WRITE)) == 0) - return; - - pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0); + if (__pmd_write((*pmdp))) + pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0); + else if (unlikely(pmd_savedwrite(*pmdp))) + pmd_hugepage_update(mm, addr, pmdp, 0, _PAGE_PRIVILEGED); } static inline int pmd_trans_huge(pmd_t pmd) diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index 4e63787dc3be..842124b199b5 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -112,7 +112,7 @@ static inline __wsum csum_add(__wsum csum, __wsum addend) #ifdef __powerpc64__ res += (__force u64)addend; - return (__force __wsum)((u32)res + (res >> 32)); + return (__force __wsum) from64to32(res); #else asm("addc %0,%0,%1;" "addze %0,%0;" diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h index fd321eb423cb..155731557c9b 100644 --- a/arch/powerpc/include/asm/cpuidle.h +++ b/arch/powerpc/include/asm/cpuidle.h @@ -70,8 +70,8 @@ static inline void report_invalid_psscr_val(u64 psscr_val, int err) std r0,0(r1); \ ptesync; \ ld r0,0(r1); \ -1: cmpd cr0,r0,r0; \ - bne 1b; \ +236: cmpd cr0,r0,r0; \ + bne 236b; \ IDLE_INST; \ #define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST) \ diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h index 93b9b84568e8..09bde6e34f5d 100644 --- a/arch/powerpc/include/asm/elf.h +++ b/arch/powerpc/include/asm/elf.h @@ -144,8 +144,8 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm, #define ARCH_DLINFO_CACHE_GEOMETRY \ NEW_AUX_ENT(AT_L1I_CACHESIZE, ppc64_caches.l1i.size); \ NEW_AUX_ENT(AT_L1I_CACHEGEOMETRY, get_cache_geometry(l1i)); \ - NEW_AUX_ENT(AT_L1D_CACHESIZE, ppc64_caches.l1i.size); \ - NEW_AUX_ENT(AT_L1D_CACHEGEOMETRY, get_cache_geometry(l1i)); \ + NEW_AUX_ENT(AT_L1D_CACHESIZE, ppc64_caches.l1d.size); \ + NEW_AUX_ENT(AT_L1D_CACHEGEOMETRY, get_cache_geometry(l1d)); \ NEW_AUX_ENT(AT_L2_CACHESIZE, ppc64_caches.l2.size); \ NEW_AUX_ENT(AT_L2_CACHEGEOMETRY, get_cache_geometry(l2)); \ NEW_AUX_ENT(AT_L3_CACHESIZE, ppc64_caches.l3.size); \ diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index ba9921bf202e..5134ade2e850 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -1,6 +1,7 @@ #ifndef _ASM_POWERPC_NOHASH_32_PGTABLE_H #define _ASM_POWERPC_NOHASH_32_PGTABLE_H +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h index d0db98793dd8..9f4de0a1035e 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h @@ -1,5 +1,8 @@ #ifndef _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H #define _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H + +#include <asm-generic/5level-fixup.h> + /* * Entries per page directory level. The PTE level must use a 64b record * for each page table entry. The PMD and PGD level use a 32b record for diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-64k.h b/arch/powerpc/include/asm/nohash/64/pgtable-64k.h index 55b28ef3409a..1facb584dd29 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable-64k.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable-64k.h @@ -1,6 +1,7 @@ #ifndef _ASM_POWERPC_NOHASH_64_PGTABLE_64K_H #define _ASM_POWERPC_NOHASH_64_PGTABLE_64K_H +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopud.h> diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index 0cd8a3852763..e5805ad78e12 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -230,7 +230,7 @@ static inline int hugepd_ok(hugepd_t hpd) return ((hpd_val(hpd) & 0x4) != 0); #else /* We clear the top bit to indicate hugepd */ - return ((hpd_val(hpd) & PD_HUGE) == 0); + return (hpd_val(hpd) && (hpd_val(hpd) & PD_HUGE) == 0); #endif } diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index d99bd442aacb..e7d6d86563ee 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -284,6 +284,13 @@ #define PPC_INST_BRANCH_COND 0x40800000 #define PPC_INST_LBZCIX 0x7c0006aa #define PPC_INST_STBCIX 0x7c0007aa +#define PPC_INST_LWZX 0x7c00002e +#define PPC_INST_LFSX 0x7c00042e +#define PPC_INST_STFSX 0x7c00052e +#define PPC_INST_LFDX 0x7c0004ae +#define PPC_INST_STFDX 0x7c0005ae +#define PPC_INST_LVX 0x7c0000ce +#define PPC_INST_STVX 0x7c0001ce /* macros to insert fields into opcodes */ #define ___PPC_RA(a) (((a) & 0x1f) << 16) diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index 4a90634e8322..35c00d7a0cf8 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -160,12 +160,18 @@ struct of_drconf_cell { #define OV5_PFO_HW_ENCR 0x1120 /* PFO Encryption Accelerator */ #define OV5_SUB_PROCESSORS 0x1501 /* 1,2,or 4 Sub-Processors supported */ #define OV5_XIVE_EXPLOIT 0x1701 /* XIVE exploitation supported */ -#define OV5_MMU_RADIX_300 0x1880 /* ISA v3.00 radix MMU supported */ -#define OV5_MMU_HASH_300 0x1840 /* ISA v3.00 hash MMU supported */ -#define OV5_MMU_SEGM_RADIX 0x1820 /* radix mode (no segmentation) */ -#define OV5_MMU_PROC_TBL 0x1810 /* hcall selects SLB or proc table */ -#define OV5_MMU_SLB 0x1800 /* always use SLB */ -#define OV5_MMU_GTSE 0x1808 /* Guest translation shootdown */ +/* MMU Base Architecture */ +#define OV5_MMU_SUPPORT 0x18C0 /* MMU Mode Support Mask */ +#define OV5_MMU_HASH 0x1800 /* Hash MMU Only */ +#define OV5_MMU_RADIX 0x1840 /* Radix MMU Only */ +#define OV5_MMU_EITHER 0x1880 /* Hash or Radix Supported */ +#define OV5_MMU_DYNAMIC 0x18C0 /* Hash or Radix Can Switch Later */ +#define OV5_NMMU 0x1820 /* Nest MMU Available */ +/* Hash Table Extensions */ +#define OV5_HASH_SEG_TBL 0x1980 /* In Memory Segment Tables Available */ +#define OV5_HASH_GTSE 0x1940 /* Guest Translation Shoot Down Avail */ +/* Radix Table Extensions */ +#define OV5_RADIX_GTSE 0x1A40 /* Guest Translation Shoot Down Avail */ /* Option Vector 6: IBM PAPR hints */ #define OV6_LINUX 0x02 /* Linux is our OS */ diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 5f61cc0349c0..995728736677 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -276,19 +276,21 @@ power_enter_stop: */ andis. r4,r3,PSSCR_EC_ESL_MASK_SHIFTED clrldi r3,r3,60 /* r3 = Bits[60:63] = Requested Level (RL) */ - bne 1f + bne .Lhandle_esl_ec_set IDLE_STATE_ENTER_SEQ(PPC_STOP) li r3,0 /* Since we didn't lose state, return 0 */ b pnv_wakeup_noloss + +.Lhandle_esl_ec_set: /* * Check if the requested state is a deep idle state. */ -1: LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state) + LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state) ld r4,ADDROFF(pnv_first_deep_stop_state)(r5) cmpd r3,r4 - bge 2f + bge .Lhandle_deep_stop IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP) -2: +.Lhandle_deep_stop: /* * Entering deep idle state. * Clear thread bit in PACA_CORE_IDLE_STATE, save SPRs to diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index a3944540fe0d..1c1b44ec7642 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -168,6 +168,14 @@ static unsigned long __initdata prom_tce_alloc_start; static unsigned long __initdata prom_tce_alloc_end; #endif +static bool __initdata prom_radix_disable; + +struct platform_support { + bool hash_mmu; + bool radix_mmu; + bool radix_gtse; +}; + /* Platforms codes are now obsolete in the kernel. Now only used within this * file and ultimately gone too. Feel free to change them if you need, they * are not shared with anything outside of this file anymore @@ -626,6 +634,12 @@ static void __init early_cmdline_parse(void) prom_memory_limit = ALIGN(prom_memory_limit, 0x1000000); #endif } + + opt = strstr(prom_cmd_line, "disable_radix"); + if (opt) { + prom_debug("Radix disabled from cmdline\n"); + prom_radix_disable = true; + } } #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) @@ -695,6 +709,8 @@ struct option_vector5 { u8 byte22; u8 intarch; u8 mmu; + u8 hash_ext; + u8 radix_ext; } __packed; struct option_vector6 { @@ -850,8 +866,9 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = { .reserved3 = 0, .subprocessors = 1, .intarch = 0, - .mmu = OV5_FEAT(OV5_MMU_RADIX_300) | OV5_FEAT(OV5_MMU_HASH_300) | - OV5_FEAT(OV5_MMU_PROC_TBL) | OV5_FEAT(OV5_MMU_GTSE), + .mmu = 0, + .hash_ext = 0, + .radix_ext = 0, }, /* option vector 6: IBM PAPR hints */ @@ -990,6 +1007,92 @@ static int __init prom_count_smt_threads(void) } +static void __init prom_parse_mmu_model(u8 val, + struct platform_support *support) +{ + switch (val) { + case OV5_FEAT(OV5_MMU_DYNAMIC): + case OV5_FEAT(OV5_MMU_EITHER): /* Either Available */ + prom_debug("MMU - either supported\n"); + support->radix_mmu = !prom_radix_disable; + support->hash_mmu = true; + break; + case OV5_FEAT(OV5_MMU_RADIX): /* Only Radix */ + prom_debug("MMU - radix only\n"); + if (prom_radix_disable) { + /* + * If we __have__ to do radix, we're better off ignoring + * the command line rather than not booting. + */ + prom_printf("WARNING: Ignoring cmdline option disable_radix\n"); + } + support->radix_mmu = true; + break; + case OV5_FEAT(OV5_MMU_HASH): + prom_debug("MMU - hash only\n"); + support->hash_mmu = true; + break; + default: + prom_debug("Unknown mmu support option: 0x%x\n", val); + break; + } +} + +static void __init prom_parse_platform_support(u8 index, u8 val, + struct platform_support *support) +{ + switch (index) { + case OV5_INDX(OV5_MMU_SUPPORT): /* MMU Model */ + prom_parse_mmu_model(val & OV5_FEAT(OV5_MMU_SUPPORT), support); + break; + case OV5_INDX(OV5_RADIX_GTSE): /* Radix Extensions */ + if (val & OV5_FEAT(OV5_RADIX_GTSE)) { + prom_debug("Radix - GTSE supported\n"); + support->radix_gtse = true; + } + break; + } +} + +static void __init prom_check_platform_support(void) +{ + struct platform_support supported = { + .hash_mmu = false, + .radix_mmu = false, + .radix_gtse = false + }; + int prop_len = prom_getproplen(prom.chosen, + "ibm,arch-vec-5-platform-support"); + if (prop_len > 1) { + int i; + u8 vec[prop_len]; + prom_debug("Found ibm,arch-vec-5-platform-support, len: %d\n", + prop_len); + prom_getprop(prom.chosen, "ibm,arch-vec-5-platform-support", + &vec, sizeof(vec)); + for (i = 0; i < prop_len; i += 2) { + prom_debug("%d: index = 0x%x val = 0x%x\n", i / 2 + , vec[i] + , vec[i + 1]); + prom_parse_platform_support(vec[i], vec[i + 1], + &supported); + } + } + + if (supported.radix_mmu && supported.radix_gtse) { + /* Radix preferred - but we require GTSE for now */ + prom_debug("Asking for radix with GTSE\n"); + ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_RADIX); + ibm_architecture_vec.vec5.radix_ext = OV5_FEAT(OV5_RADIX_GTSE); + } else if (supported.hash_mmu) { + /* Default to hash mmu (if we can) */ + prom_debug("Asking for hash\n"); + ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_HASH); + } else { + /* We're probably on a legacy hypervisor */ + prom_debug("Assuming legacy hash support\n"); + } +} static void __init prom_send_capabilities(void) { @@ -997,6 +1100,9 @@ static void __init prom_send_capabilities(void) prom_arg_t ret; u32 cores; + /* Check ibm,arch-vec-5-platform-support and fixup vec5 if required */ + prom_check_platform_support(); + root = call_prom("open", 1, 1, ADDR("/")); if (root != 0) { /* We need to tell the FW about the number of cores we support. @@ -2993,6 +3099,11 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, */ prom_check_initrd(r3, r4); + /* + * Do early parsing of command line + */ + early_cmdline_parse(); + #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) /* * On pSeries, inform the firmware about our capabilities @@ -3009,11 +3120,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, copy_and_flush(0, kbase, 0x100, 0); /* - * Do early parsing of command line - */ - early_cmdline_parse(); - - /* * Initialize memory management within prom_init */ prom_init_mem(); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index adf2084f214b..9cfaa8b69b5f 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -408,7 +408,10 @@ static void init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize, info->line_size = lsize; info->block_size = bsize; info->log_block_size = __ilog2(bsize); - info->blocks_per_page = PAGE_SIZE / bsize; + if (bsize) + info->blocks_per_page = PAGE_SIZE / bsize; + else + info->blocks_per_page = 0; if (sets == 0) info->assoc = 0xffff; diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index f3158fb16de3..8c68145ba1bd 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -601,7 +601,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, hva, NULL, NULL); if (ptep) { pte = kvmppc_read_update_linux_pte(ptep, 1); - if (pte_write(pte)) + if (__pte_write(pte)) write_ok = 1; } local_irq_restore(flags); diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 6fca970373ee..ce6f2121fffe 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -256,7 +256,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, } pte = kvmppc_read_update_linux_pte(ptep, writing); if (pte_present(pte) && !pte_protnone(pte)) { - if (writing && !pte_write(pte)) + if (writing && !__pte_write(pte)) /* make the actual HPTE be read-only */ ptel = hpte_make_readonly(ptel); is_ci = pte_ci(pte); diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 0e649d72fe8d..2b5e09020cfe 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -20,6 +20,7 @@ obj64-y += copypage_64.o copyuser_64.o usercopy_64.o mem_64.o hweight_64.o \ obj64-$(CONFIG_SMP) += locks.o obj64-$(CONFIG_ALTIVEC) += vmx-helper.o +obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o obj-y += checksum_$(BITS).o checksum_wrappers.o diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 846dba2c6360..9c542ec70c5b 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1799,8 +1799,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto instr_done; case LARX: - if (regs->msr & MSR_LE) - return 0; if (op.ea & (size - 1)) break; /* can't handle misaligned */ if (!address_ok(regs, op.ea, size)) @@ -1823,8 +1821,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto ldst_done; case STCX: - if (regs->msr & MSR_LE) - return 0; if (op.ea & (size - 1)) break; /* can't handle misaligned */ if (!address_ok(regs, op.ea, size)) @@ -1849,8 +1845,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto ldst_done; case LOAD: - if (regs->msr & MSR_LE) - return 0; err = read_mem(®s->gpr[op.reg], op.ea, size, regs); if (!err) { if (op.type & SIGNEXT) @@ -1862,8 +1856,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) #ifdef CONFIG_PPC_FPU case LOAD_FP: - if (regs->msr & MSR_LE) - return 0; if (size == 4) err = do_fp_load(op.reg, do_lfs, op.ea, size, regs); else @@ -1872,15 +1864,11 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) #endif #ifdef CONFIG_ALTIVEC case LOAD_VMX: - if (regs->msr & MSR_LE) - return 0; err = do_vec_load(op.reg, do_lvx, op.ea & ~0xfUL, regs); goto ldst_done; #endif #ifdef CONFIG_VSX case LOAD_VSX: - if (regs->msr & MSR_LE) - return 0; err = do_vsx_load(op.reg, do_lxvd2x, op.ea, regs); goto ldst_done; #endif @@ -1903,8 +1891,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto instr_done; case STORE: - if (regs->msr & MSR_LE) - return 0; if ((op.type & UPDATE) && size == sizeof(long) && op.reg == 1 && op.update_reg == 1 && !(regs->msr & MSR_PR) && @@ -1917,8 +1903,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) #ifdef CONFIG_PPC_FPU case STORE_FP: - if (regs->msr & MSR_LE) - return 0; if (size == 4) err = do_fp_store(op.reg, do_stfs, op.ea, size, regs); else @@ -1927,15 +1911,11 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) #endif #ifdef CONFIG_ALTIVEC case STORE_VMX: - if (regs->msr & MSR_LE) - return 0; err = do_vec_store(op.reg, do_stvx, op.ea & ~0xfUL, regs); goto ldst_done; #endif #ifdef CONFIG_VSX case STORE_VSX: - if (regs->msr & MSR_LE) - return 0; err = do_vsx_store(op.reg, do_stxvd2x, op.ea, regs); goto ldst_done; #endif diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c new file mode 100644 index 000000000000..2534c1447554 --- /dev/null +++ b/arch/powerpc/lib/test_emulate_step.c @@ -0,0 +1,434 @@ +/* + * Simple sanity test for emulate_step load/store instructions. + * + * Copyright IBM Corp. 2016 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#define pr_fmt(fmt) "emulate_step_test: " fmt + +#include <linux/ptrace.h> +#include <asm/sstep.h> +#include <asm/ppc-opcode.h> + +#define IMM_L(i) ((uintptr_t)(i) & 0xffff) + +/* + * Defined with TEST_ prefix so it does not conflict with other + * definitions. + */ +#define TEST_LD(r, base, i) (PPC_INST_LD | ___PPC_RT(r) | \ + ___PPC_RA(base) | IMM_L(i)) +#define TEST_LWZ(r, base, i) (PPC_INST_LWZ | ___PPC_RT(r) | \ + ___PPC_RA(base) | IMM_L(i)) +#define TEST_LWZX(t, a, b) (PPC_INST_LWZX | ___PPC_RT(t) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_STD(r, base, i) (PPC_INST_STD | ___PPC_RS(r) | \ + ___PPC_RA(base) | ((i) & 0xfffc)) +#define TEST_LDARX(t, a, b, eh) (PPC_INST_LDARX | ___PPC_RT(t) | \ + ___PPC_RA(a) | ___PPC_RB(b) | \ + __PPC_EH(eh)) +#define TEST_STDCX(s, a, b) (PPC_INST_STDCX | ___PPC_RS(s) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_LFSX(t, a, b) (PPC_INST_LFSX | ___PPC_RT(t) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_STFSX(s, a, b) (PPC_INST_STFSX | ___PPC_RS(s) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_LFDX(t, a, b) (PPC_INST_LFDX | ___PPC_RT(t) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_STFDX(s, a, b) (PPC_INST_STFDX | ___PPC_RS(s) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_LVX(t, a, b) (PPC_INST_LVX | ___PPC_RT(t) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_STVX(s, a, b) (PPC_INST_STVX | ___PPC_RS(s) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_LXVD2X(s, a, b) (PPC_INST_LXVD2X | VSX_XX1((s), R##a, R##b)) +#define TEST_STXVD2X(s, a, b) (PPC_INST_STXVD2X | VSX_XX1((s), R##a, R##b)) + + +static void __init init_pt_regs(struct pt_regs *regs) +{ + static unsigned long msr; + static bool msr_cached; + + memset(regs, 0, sizeof(struct pt_regs)); + + if (likely(msr_cached)) { + regs->msr = msr; + return; + } + + asm volatile("mfmsr %0" : "=r"(regs->msr)); + + regs->msr |= MSR_FP; + regs->msr |= MSR_VEC; + regs->msr |= MSR_VSX; + + msr = regs->msr; + msr_cached = true; +} + +static void __init show_result(char *ins, char *result) +{ + pr_info("%-14s : %s\n", ins, result); +} + +static void __init test_ld(void) +{ + struct pt_regs regs; + unsigned long a = 0x23; + int stepped = -1; + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long) &a; + + /* ld r5, 0(r3) */ + stepped = emulate_step(®s, TEST_LD(5, 3, 0)); + + if (stepped == 1 && regs.gpr[5] == a) + show_result("ld", "PASS"); + else + show_result("ld", "FAIL"); +} + +static void __init test_lwz(void) +{ + struct pt_regs regs; + unsigned int a = 0x4545; + int stepped = -1; + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long) &a; + + /* lwz r5, 0(r3) */ + stepped = emulate_step(®s, TEST_LWZ(5, 3, 0)); + + if (stepped == 1 && regs.gpr[5] == a) + show_result("lwz", "PASS"); + else + show_result("lwz", "FAIL"); +} + +static void __init test_lwzx(void) +{ + struct pt_regs regs; + unsigned int a[3] = {0x0, 0x0, 0x1234}; + int stepped = -1; + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long) a; + regs.gpr[4] = 8; + regs.gpr[5] = 0x8765; + + /* lwzx r5, r3, r4 */ + stepped = emulate_step(®s, TEST_LWZX(5, 3, 4)); + if (stepped == 1 && regs.gpr[5] == a[2]) + show_result("lwzx", "PASS"); + else + show_result("lwzx", "FAIL"); +} + +static void __init test_std(void) +{ + struct pt_regs regs; + unsigned long a = 0x1234; + int stepped = -1; + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long) &a; + regs.gpr[5] = 0x5678; + + /* std r5, 0(r3) */ + stepped = emulate_step(®s, TEST_STD(5, 3, 0)); + if (stepped == 1 || regs.gpr[5] == a) + show_result("std", "PASS"); + else + show_result("std", "FAIL"); +} + +static void __init test_ldarx_stdcx(void) +{ + struct pt_regs regs; + unsigned long a = 0x1234; + int stepped = -1; + unsigned long cr0_eq = 0x1 << 29; /* eq bit of CR0 */ + + init_pt_regs(®s); + asm volatile("mfcr %0" : "=r"(regs.ccr)); + + + /*** ldarx ***/ + + regs.gpr[3] = (unsigned long) &a; + regs.gpr[4] = 0; + regs.gpr[5] = 0x5678; + + /* ldarx r5, r3, r4, 0 */ + stepped = emulate_step(®s, TEST_LDARX(5, 3, 4, 0)); + + /* + * Don't touch 'a' here. Touching 'a' can do Load/store + * of 'a' which result in failure of subsequent stdcx. + * Instead, use hardcoded value for comparison. + */ + if (stepped <= 0 || regs.gpr[5] != 0x1234) { + show_result("ldarx / stdcx.", "FAIL (ldarx)"); + return; + } + + + /*** stdcx. ***/ + + regs.gpr[5] = 0x9ABC; + + /* stdcx. r5, r3, r4 */ + stepped = emulate_step(®s, TEST_STDCX(5, 3, 4)); + + /* + * Two possible scenarios that indicates successful emulation + * of stdcx. : + * 1. Reservation is active and store is performed. In this + * case cr0.eq bit will be set to 1. + * 2. Reservation is not active and store is not performed. + * In this case cr0.eq bit will be set to 0. + */ + if (stepped == 1 && ((regs.gpr[5] == a && (regs.ccr & cr0_eq)) + || (regs.gpr[5] != a && !(regs.ccr & cr0_eq)))) + show_result("ldarx / stdcx.", "PASS"); + else + show_result("ldarx / stdcx.", "FAIL (stdcx.)"); +} + +#ifdef CONFIG_PPC_FPU +static void __init test_lfsx_stfsx(void) +{ + struct pt_regs regs; + union { + float a; + int b; + } c; + int cached_b; + int stepped = -1; + + init_pt_regs(®s); + + + /*** lfsx ***/ + + c.a = 123.45; + cached_b = c.b; + + regs.gpr[3] = (unsigned long) &c.a; + regs.gpr[4] = 0; + + /* lfsx frt10, r3, r4 */ + stepped = emulate_step(®s, TEST_LFSX(10, 3, 4)); + + if (stepped == 1) + show_result("lfsx", "PASS"); + else + show_result("lfsx", "FAIL"); + + + /*** stfsx ***/ + + c.a = 678.91; + + /* stfsx frs10, r3, r4 */ + stepped = emulate_step(®s, TEST_STFSX(10, 3, 4)); + + if (stepped == 1 && c.b == cached_b) + show_result("stfsx", "PASS"); + else + show_result("stfsx", "FAIL"); +} + +static void __init test_lfdx_stfdx(void) +{ + struct pt_regs regs; + union { + double a; + long b; + } c; + long cached_b; + int stepped = -1; + + init_pt_regs(®s); + + + /*** lfdx ***/ + + c.a = 123456.78; + cached_b = c.b; + + regs.gpr[3] = (unsigned long) &c.a; + regs.gpr[4] = 0; + + /* lfdx frt10, r3, r4 */ + stepped = emulate_step(®s, TEST_LFDX(10, 3, 4)); + + if (stepped == 1) + show_result("lfdx", "PASS"); + else + show_result("lfdx", "FAIL"); + + + /*** stfdx ***/ + + c.a = 987654.32; + + /* stfdx frs10, r3, r4 */ + stepped = emulate_step(®s, TEST_STFDX(10, 3, 4)); + + if (stepped == 1 && c.b == cached_b) + show_result("stfdx", "PASS"); + else + show_result("stfdx", "FAIL"); +} +#else +static void __init test_lfsx_stfsx(void) +{ + show_result("lfsx", "SKIP (CONFIG_PPC_FPU is not set)"); + show_result("stfsx", "SKIP (CONFIG_PPC_FPU is not set)"); +} + +static void __init test_lfdx_stfdx(void) +{ + show_result("lfdx", "SKIP (CONFIG_PPC_FPU is not set)"); + show_result("stfdx", "SKIP (CONFIG_PPC_FPU is not set)"); +} +#endif /* CONFIG_PPC_FPU */ + +#ifdef CONFIG_ALTIVEC +static void __init test_lvx_stvx(void) +{ + struct pt_regs regs; + union { + vector128 a; + u32 b[4]; + } c; + u32 cached_b[4]; + int stepped = -1; + + init_pt_regs(®s); + + + /*** lvx ***/ + + cached_b[0] = c.b[0] = 923745; + cached_b[1] = c.b[1] = 2139478; + cached_b[2] = c.b[2] = 9012; + cached_b[3] = c.b[3] = 982134; + + regs.gpr[3] = (unsigned long) &c.a; + regs.gpr[4] = 0; + + /* lvx vrt10, r3, r4 */ + stepped = emulate_step(®s, TEST_LVX(10, 3, 4)); + + if (stepped == 1) + show_result("lvx", "PASS"); + else + show_result("lvx", "FAIL"); + + + /*** stvx ***/ + + c.b[0] = 4987513; + c.b[1] = 84313948; + c.b[2] = 71; + c.b[3] = 498532; + + /* stvx vrs10, r3, r4 */ + stepped = emulate_step(®s, TEST_STVX(10, 3, 4)); + + if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] && + cached_b[2] == c.b[2] && cached_b[3] == c.b[3]) + show_result("stvx", "PASS"); + else + show_result("stvx", "FAIL"); +} +#else +static void __init test_lvx_stvx(void) +{ + show_result("lvx", "SKIP (CONFIG_ALTIVEC is not set)"); + show_result("stvx", "SKIP (CONFIG_ALTIVEC is not set)"); +} +#endif /* CONFIG_ALTIVEC */ + +#ifdef CONFIG_VSX +static void __init test_lxvd2x_stxvd2x(void) +{ + struct pt_regs regs; + union { + vector128 a; + u32 b[4]; + } c; + u32 cached_b[4]; + int stepped = -1; + + init_pt_regs(®s); + + + /*** lxvd2x ***/ + + cached_b[0] = c.b[0] = 18233; + cached_b[1] = c.b[1] = 34863571; + cached_b[2] = c.b[2] = 834; + cached_b[3] = c.b[3] = 6138911; + + regs.gpr[3] = (unsigned long) &c.a; + regs.gpr[4] = 0; + + /* lxvd2x vsr39, r3, r4 */ + stepped = emulate_step(®s, TEST_LXVD2X(39, 3, 4)); + + if (stepped == 1) + show_result("lxvd2x", "PASS"); + else + show_result("lxvd2x", "FAIL"); + + + /*** stxvd2x ***/ + + c.b[0] = 21379463; + c.b[1] = 87; + c.b[2] = 374234; + c.b[3] = 4; + + /* stxvd2x vsr39, r3, r4 */ + stepped = emulate_step(®s, TEST_STXVD2X(39, 3, 4)); + + if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] && + cached_b[2] == c.b[2] && cached_b[3] == c.b[3]) + show_result("stxvd2x", "PASS"); + else + show_result("stxvd2x", "FAIL"); +} +#else +static void __init test_lxvd2x_stxvd2x(void) +{ + show_result("lxvd2x", "SKIP (CONFIG_VSX is not set)"); + show_result("stxvd2x", "SKIP (CONFIG_VSX is not set)"); +} +#endif /* CONFIG_VSX */ + +static int __init test_emulate_step(void) +{ + test_ld(); + test_lwz(); + test_lwzx(); + test_std(); + test_ldarx_stdcx(); + test_lfsx_stfsx(); + test_lfdx_stfdx(); + test_lvx_stvx(); + test_lxvd2x_stxvd2x(); + + return 0; +} +late_initcall(test_emulate_step); diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 6aa3b76aa0d6..9be992083d2a 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -356,18 +356,42 @@ static void early_check_vec5(void) unsigned long root, chosen; int size; const u8 *vec5; + u8 mmu_supported; root = of_get_flat_dt_root(); chosen = of_get_flat_dt_subnode_by_name(root, "chosen"); - if (chosen == -FDT_ERR_NOTFOUND) + if (chosen == -FDT_ERR_NOTFOUND) { + cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; return; + } vec5 = of_get_flat_dt_prop(chosen, "ibm,architecture-vec-5", &size); - if (!vec5) + if (!vec5) { + cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; return; - if (size <= OV5_INDX(OV5_MMU_RADIX_300) || - !(vec5[OV5_INDX(OV5_MMU_RADIX_300)] & OV5_FEAT(OV5_MMU_RADIX_300))) - /* Hypervisor doesn't support radix */ + } + if (size <= OV5_INDX(OV5_MMU_SUPPORT)) { cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; + return; + } + + /* Check for supported configuration */ + mmu_supported = vec5[OV5_INDX(OV5_MMU_SUPPORT)] & + OV5_FEAT(OV5_MMU_SUPPORT); + if (mmu_supported == OV5_FEAT(OV5_MMU_RADIX)) { + /* Hypervisor only supports radix - check enabled && GTSE */ + if (!early_radix_enabled()) { + pr_warn("WARNING: Ignoring cmdline option disable_radix\n"); + } + if (!(vec5[OV5_INDX(OV5_RADIX_GTSE)] & + OV5_FEAT(OV5_RADIX_GTSE))) { + pr_warn("WARNING: Hypervisor doesn't support RADIX with GTSE\n"); + } + /* Do radix anyway - the hypervisor said we had to */ + cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX; + } else if (mmu_supported == OV5_FEAT(OV5_MMU_HASH)) { + /* Hypervisor only supports hash - disable radix */ + cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; + } } void __init mmu_early_init_devtree(void) @@ -383,7 +407,7 @@ void __init mmu_early_init_devtree(void) * even though the ibm,architecture-vec-5 property created by * skiboot doesn't have the necessary bits set. */ - if (early_radix_enabled() && !(mfmsr() & MSR_HV)) + if (!(mfmsr() & MSR_HV)) early_check_vec5(); if (early_radix_enabled()) diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 2a590a98e652..c28165d8970b 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -186,6 +186,10 @@ static void __init radix_init_pgtable(void) */ register_process_table(__pa(process_tb), 0, PRTB_SIZE_SHIFT - 12); pr_info("Process table %p and radix root for kernel: %p\n", process_tb, init_mm.pgd); + asm volatile("ptesync" : : : "memory"); + asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : : + "r" (TLBIEL_INVAL_SET_LPID), "r" (0)); + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); } static void __init radix_init_partition_table(void) diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 6693f75e93d1..da8a0f7a035c 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -39,8 +39,8 @@ opal_tracepoint_refcount: BEGIN_FTR_SECTION; \ b 1f; \ END_FTR_SECTION(0, 1); \ - ld r12,opal_tracepoint_refcount@toc(r2); \ - cmpdi r12,0; \ + ld r11,opal_tracepoint_refcount@toc(r2); \ + cmpdi r11,0; \ bne- LABEL; \ 1: diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c index ada29eaed6e2..f523ac883150 100644 --- a/arch/powerpc/sysdev/axonram.c +++ b/arch/powerpc/sysdev/axonram.c @@ -274,7 +274,9 @@ failed: if (bank->disk->major > 0) unregister_blkdev(bank->disk->major, bank->disk->disk_name); - del_gendisk(bank->disk); + if (bank->disk->flags & GENHD_FL_UP) + del_gendisk(bank->disk); + put_disk(bank->disk); } device->dev.platform_data = NULL; if (bank->io_addr != 0) @@ -299,6 +301,7 @@ axon_ram_remove(struct platform_device *device) device_remove_file(&device->dev, &dev_attr_ecc); free_irq(bank->irq_id, device); del_gendisk(bank->disk); + put_disk(bank->disk); iounmap((void __iomem *) bank->io_addr); kfree(bank); diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c index f9670eabfcfa..b53f80f0b4d8 100644 --- a/arch/powerpc/sysdev/xics/icp-opal.c +++ b/arch/powerpc/sysdev/xics/icp-opal.c @@ -91,6 +91,16 @@ static unsigned int icp_opal_get_irq(void) static void icp_opal_set_cpu_priority(unsigned char cppr) { + /* + * Here be dragons. The caller has asked to allow only IPI's and not + * external interrupts. But OPAL XIVE doesn't support that. So instead + * of allowing no interrupts allow all. That's still not right, but + * currently the only caller who does this is xics_migrate_irqs_away() + * and it works in that case. + */ + if (cppr >= DEFAULT_PRIORITY) + cppr = LOWEST_PRIORITY; + xics_set_base_cppr(cppr); opal_int_set_cppr(cppr); iosync(); diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c index 69d858e51ac7..23efe4e42172 100644 --- a/arch/powerpc/sysdev/xics/xics-common.c +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -20,6 +20,7 @@ #include <linux/of.h> #include <linux/slab.h> #include <linux/spinlock.h> +#include <linux/delay.h> #include <asm/prom.h> #include <asm/io.h> @@ -198,9 +199,6 @@ void xics_migrate_irqs_away(void) /* Remove ourselves from the global interrupt queue */ xics_set_cpu_giq(xics_default_distrib_server, 0); - /* Allow IPIs again... */ - icp_ops->set_priority(DEFAULT_PRIORITY); - for_each_irq_desc(virq, desc) { struct irq_chip *chip; long server; @@ -255,6 +253,19 @@ void xics_migrate_irqs_away(void) unlock: raw_spin_unlock_irqrestore(&desc->lock, flags); } + + /* Allow "sufficient" time to drop any inflight IRQ's */ + mdelay(5); + + /* + * Allow IPIs again. This is done at the very end, after migrating all + * interrupts, the expectation is that we'll only get woken up by an IPI + * interrupt beyond this point, but leave externals masked just to be + * safe. If we're using icp-opal this may actually allow all + * interrupts anyway, but that should be OK. + */ + icp_ops->set_priority(DEFAULT_PRIORITY); + } #endif /* CONFIG_HOTPLUG_CPU */ diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig index 143b1e00b818..4b176fe83da4 100644 --- a/arch/s390/configs/default_defconfig +++ b/arch/s390/configs/default_defconfig @@ -609,7 +609,7 @@ CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_UPROBE_EVENT=y +CONFIG_UPROBE_EVENTS=y CONFIG_FUNCTION_PROFILER=y CONFIG_HIST_TRIGGERS=y CONFIG_TRACE_ENUM_MAP_FILE=y diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig index f05d2d6e1087..0de46cc397f6 100644 --- a/arch/s390/configs/gcov_defconfig +++ b/arch/s390/configs/gcov_defconfig @@ -560,7 +560,7 @@ CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_UPROBE_EVENT=y +CONFIG_UPROBE_EVENTS=y CONFIG_FUNCTION_PROFILER=y CONFIG_HIST_TRIGGERS=y CONFIG_TRACE_ENUM_MAP_FILE=y diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig index 2358bf33c5ef..e167557b434c 100644 --- a/arch/s390/configs/performance_defconfig +++ b/arch/s390/configs/performance_defconfig @@ -558,7 +558,7 @@ CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_UPROBE_EVENT=y +CONFIG_UPROBE_EVENTS=y CONFIG_FUNCTION_PROFILER=y CONFIG_HIST_TRIGGERS=y CONFIG_TRACE_ENUM_MAP_FILE=y diff --git a/arch/s390/defconfig b/arch/s390/defconfig index 68bfd09f1b02..97189dbaf34b 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -179,7 +179,7 @@ CONFIG_FTRACE_SYSCALLS=y CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_UPROBE_EVENT=y +CONFIG_UPROBE_EVENTS=y CONFIG_FUNCTION_PROFILER=y CONFIG_TRACE_ENUM_MAP_FILE=y CONFIG_KPROBES_SANITY_TEST=y diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 7ed1972b1920..93e37b12e882 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -24,6 +24,7 @@ * the S390 page table tree. */ #ifndef __ASSEMBLY__ +#include <asm-generic/5level-fixup.h> #include <linux/sched.h> #include <linux/mm_types.h> #include <linux/page-flags.h> diff --git a/arch/score/include/asm/pgtable.h b/arch/score/include/asm/pgtable.h index 0553e5cd5985..46ff8fd678a7 100644 --- a/arch/score/include/asm/pgtable.h +++ b/arch/score/include/asm/pgtable.h @@ -2,6 +2,7 @@ #define _ASM_SCORE_PGTABLE_H #include <linux/const.h> +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #include <asm/fixmap.h> diff --git a/arch/sh/boards/mach-cayman/setup.c b/arch/sh/boards/mach-cayman/setup.c index 340fd40b381d..9c292c27e0d7 100644 --- a/arch/sh/boards/mach-cayman/setup.c +++ b/arch/sh/boards/mach-cayman/setup.c @@ -128,7 +128,6 @@ static int __init smsc_superio_setup(void) SMSC_SUPERIO_WRITE_INDEXED(1, SMSC_PRIMARY_INT_INDEX); SMSC_SUPERIO_WRITE_INDEXED(12, SMSC_SECONDARY_INT_INDEX); -#ifdef CONFIG_IDE /* * Only IDE1 exists on the Cayman */ @@ -158,7 +157,6 @@ static int __init smsc_superio_setup(void) SMSC_SUPERIO_WRITE_INDEXED(0x01, 0xc5); /* GP45 = IDE1_IRQ */ SMSC_SUPERIO_WRITE_INDEXED(0x00, 0xc6); /* GP46 = nIOROP */ SMSC_SUPERIO_WRITE_INDEXED(0x00, 0xc7); /* GP47 = nIOWOP */ -#endif /* Exit the configuration state */ outb(SMSC_EXIT_CONFIG_KEY, SMSC_CONFIG_PORT_ADDR); diff --git a/arch/sh/include/asm/pgtable-2level.h b/arch/sh/include/asm/pgtable-2level.h index 19bd89db17e7..f75cf4387257 100644 --- a/arch/sh/include/asm/pgtable-2level.h +++ b/arch/sh/include/asm/pgtable-2level.h @@ -1,6 +1,7 @@ #ifndef __ASM_SH_PGTABLE_2LEVEL_H #define __ASM_SH_PGTABLE_2LEVEL_H +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> /* diff --git a/arch/sh/include/asm/pgtable-3level.h b/arch/sh/include/asm/pgtable-3level.h index 249a985d9648..9b1e776eca31 100644 --- a/arch/sh/include/asm/pgtable-3level.h +++ b/arch/sh/include/asm/pgtable-3level.h @@ -1,6 +1,7 @@ #ifndef __ASM_SH_PGTABLE_3LEVEL_H #define __ASM_SH_PGTABLE_3LEVEL_H +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopud.h> /* diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 56e49c8f770d..8a598528ec1f 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -12,6 +12,7 @@ * the SpitFire page tables. */ +#include <asm-generic/5level-fixup.h> #include <linux/compiler.h> #include <linux/const.h> #include <asm/types.h> diff --git a/arch/tile/include/asm/pgtable_32.h b/arch/tile/include/asm/pgtable_32.h index d26a42279036..5f8c615cb5e9 100644 --- a/arch/tile/include/asm/pgtable_32.h +++ b/arch/tile/include/asm/pgtable_32.h @@ -74,6 +74,7 @@ extern unsigned long VMALLOC_RESERVE /* = CONFIG_VMALLOC_RESERVE */; #define MAXMEM (_VMALLOC_START - PAGE_OFFSET) /* We have no pmd or pud since we are strictly a two-level page table */ +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> static inline int pud_huge_page(pud_t pud) { return 0; } diff --git a/arch/tile/include/asm/pgtable_64.h b/arch/tile/include/asm/pgtable_64.h index e96cec52f6d8..96fe58b45118 100644 --- a/arch/tile/include/asm/pgtable_64.h +++ b/arch/tile/include/asm/pgtable_64.h @@ -59,6 +59,7 @@ #ifndef __ASSEMBLY__ /* We have no pud since we are a three-level page table. */ +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopud.h> /* diff --git a/arch/um/include/asm/pgtable-2level.h b/arch/um/include/asm/pgtable-2level.h index cfbe59752469..179c0ea87a0c 100644 --- a/arch/um/include/asm/pgtable-2level.h +++ b/arch/um/include/asm/pgtable-2level.h @@ -8,6 +8,7 @@ #ifndef __UM_PGTABLE_2LEVEL_H #define __UM_PGTABLE_2LEVEL_H +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> /* PGDIR_SHIFT determines what a third-level page table entry can map */ diff --git a/arch/um/include/asm/pgtable-3level.h b/arch/um/include/asm/pgtable-3level.h index bae8523a162f..c4d876dfb9ac 100644 --- a/arch/um/include/asm/pgtable-3level.h +++ b/arch/um/include/asm/pgtable-3level.h @@ -7,6 +7,7 @@ #ifndef __UM_PGTABLE_3LEVEL_H #define __UM_PGTABLE_3LEVEL_H +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopud.h> /* PGDIR_SHIFT determines what a third-level page table entry can map */ diff --git a/arch/unicore32/include/asm/pgtable.h b/arch/unicore32/include/asm/pgtable.h index 818d0f5598e3..a4f2bef37e70 100644 --- a/arch/unicore32/include/asm/pgtable.h +++ b/arch/unicore32/include/asm/pgtable.h @@ -12,6 +12,7 @@ #ifndef __UNICORE_PGTABLE_H__ #define __UNICORE_PGTABLE_H__ +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #include <asm/cpu-single.h> diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 7ef4a099defc..6205d3b81e6d 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -176,6 +176,7 @@ CONFIG_E1000E=y CONFIG_SKY2=y CONFIG_FORCEDETH=y CONFIG_8139TOO=y +CONFIG_R8169=y CONFIG_FDDI=y CONFIG_INPUT_POLLDEV=y # CONFIG_INPUT_MOUSEDEV_PSAUX is not set diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index afb222b63cae..c84584bb9402 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -604,7 +604,7 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, int idx, return &amd_f15_PMC20; } case AMD_EVENT_NB: - /* moved to perf_event_amd_uncore.c */ + /* moved to uncore.c */ return &emptyconstraint; default: return &emptyconstraint; diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index aff4b5b69d40..238ae3248ba5 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -1,5 +1,5 @@ /* - * perf_event_intel_cstate.c: support cstate residency counters + * Support cstate residency counters * * Copyright (C) 2015, Intel Corp. * Author: Kan Liang (kan.liang@intel.com) diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 22054ca49026..9d05c7e67f60 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -1,5 +1,5 @@ /* - * perf_event_intel_rapl.c: support Intel RAPL energy consumption counters + * Support Intel RAPL energy consumption counters * Copyright (C) 2013 Google, Inc., Stephane Eranian * * Intel RAPL interface is specified in the IA-32 Manual Vol3b diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index ad986c1e29bc..df5989f27b1b 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -360,7 +360,7 @@ extern struct list_head pci2phy_map_head; extern struct pci_extra_dev *uncore_extra_pci_dev; extern struct event_constraint uncore_constraint_empty; -/* perf_event_intel_uncore_snb.c */ +/* uncore_snb.c */ int snb_uncore_pci_init(void); int ivb_uncore_pci_init(void); int hsw_uncore_pci_init(void); @@ -371,7 +371,7 @@ void nhm_uncore_cpu_init(void); void skl_uncore_cpu_init(void); int snb_pci2phy_map_init(int devid); -/* perf_event_intel_uncore_snbep.c */ +/* uncore_snbep.c */ int snbep_uncore_pci_init(void); void snbep_uncore_cpu_init(void); int ivbep_uncore_pci_init(void); @@ -385,5 +385,5 @@ void knl_uncore_cpu_init(void); int skx_uncore_pci_init(void); void skx_uncore_cpu_init(void); -/* perf_event_intel_uncore_nhmex.c */ +/* uncore_nhmex.c */ void nhmex_uncore_cpu_init(void); diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index db64baf0e500..8bef70e7f3cc 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -158,13 +158,13 @@ void hyperv_init(void) clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100); return; } +register_msr_cs: #endif /* * For 32 bit guests just use the MSR based mechanism for reading * the partition counter. */ -register_msr_cs: hyperv_cs = &hyperv_cs_msr; if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE) clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100); diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 4e7772387c6e..b04bb6dfed7f 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -289,7 +289,8 @@ #define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ #define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ #define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ -#define X86_FEATURE_RDPID (16*32+ 22) /* RDPID instruction */ +#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ +#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ /* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */ #define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */ diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 8b4de22d6429..62484333673d 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -273,6 +273,8 @@ static inline pgdval_t pgd_flags(pgd_t pgd) } #if CONFIG_PGTABLE_LEVELS > 3 +#include <asm-generic/5level-fixup.h> + typedef struct { pudval_t pud; } pud_t; static inline pud_t native_make_pud(pmdval_t val) @@ -285,6 +287,7 @@ static inline pudval_t native_pud_val(pud_t pud) return pud.pud; } #else +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopud.h> static inline pudval_t native_pud_val(pud_t pud) @@ -306,6 +309,7 @@ static inline pmdval_t native_pmd_val(pmd_t pmd) return pmd.pmd; } #else +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> static inline pmdval_t native_pmd_val(pmd_t pmd) diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h index 34684adb6899..b3b09b98896d 100644 --- a/arch/x86/include/asm/pkeys.h +++ b/arch/x86/include/asm/pkeys.h @@ -46,6 +46,15 @@ extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey, static inline bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey) { + /* + * "Allocated" pkeys are those that have been returned + * from pkey_alloc(). pkey 0 is special, and never + * returned from pkey_alloc(). + */ + if (pkey <= 0) + return false; + if (pkey >= arch_max_pkey()) + return false; return mm_pkey_allocation_map(mm) & (1U << pkey); } @@ -82,12 +91,6 @@ int mm_pkey_alloc(struct mm_struct *mm) static inline int mm_pkey_free(struct mm_struct *mm, int pkey) { - /* - * pkey 0 is special, always allocated and can never - * be freed. - */ - if (!pkey) - return -EINVAL; if (!mm_pkey_is_allocated(mm, pkey)) return -EINVAL; diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index 5138dacf8bb8..07244ea16765 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -58,7 +58,7 @@ struct setup_header { __u32 header; __u16 version; __u32 realmode_swtch; - __u16 start_sys; + __u16 start_sys_seg; __u16 kernel_version; __u8 type_of_loader; __u8 loadflags; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 4261b3282ad9..aee7deddabd0 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1610,24 +1610,15 @@ static inline void try_to_enable_x2apic(int remap_mode) { } static inline void __x2apic_enable(void) { } #endif /* !CONFIG_X86_X2APIC */ -static int __init try_to_enable_IR(void) -{ -#ifdef CONFIG_X86_IO_APIC - if (!x2apic_enabled() && skip_ioapic_setup) { - pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n"); - return -1; - } -#endif - return irq_remapping_enable(); -} - void __init enable_IR_x2apic(void) { unsigned long flags; int ret, ir_stat; - if (skip_ioapic_setup) + if (skip_ioapic_setup) { + pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n"); return; + } ir_stat = irq_remapping_prepare(); if (ir_stat < 0 && !x2apic_supported()) @@ -1645,7 +1636,7 @@ void __init enable_IR_x2apic(void) /* If irq_remapping_prepare() succeeded, try to enable it */ if (ir_stat >= 0) - ir_stat = try_to_enable_IR(); + ir_stat = irq_remapping_enable(); /* ir_stat contains the remap mode or an error code */ try_to_enable_x2apic(ir_stat); @@ -2062,10 +2053,10 @@ static int allocate_logical_cpuid(int apicid) /* Allocate a new cpuid. */ if (nr_logical_cpuids >= nr_cpu_ids) { - WARN_ONCE(1, "Only %d processors supported." + WARN_ONCE(1, "APIC: NR_CPUS/possible_cpus limit of %i reached. " "Processor %d/0x%x and the rest are ignored.\n", - nr_cpu_ids - 1, nr_logical_cpuids, apicid); - return -1; + nr_cpu_ids, nr_logical_cpuids, apicid); + return -EINVAL; } cpuid_to_apicid[nr_logical_cpuids] = apicid; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 35a5d5dca2fa..c36140d788fe 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -556,10 +556,6 @@ static void early_init_amd(struct cpuinfo_x86 *c) if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); - if (check_tsc_unstable()) - clear_sched_clock_stable(); - } else { - clear_sched_clock_stable(); } /* Bit 12 of 8000_0007 edx is accumulated power mechanism. */ diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index adc0ebd8bed0..43955ee6715b 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -105,8 +105,6 @@ static void early_init_centaur(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_64 set_cpu_cap(c, X86_FEATURE_SYSENTER32); #endif - - clear_sched_clock_stable(); } static void init_centaur(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b11b38c3b0bd..58094a1f9e9d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -88,7 +88,6 @@ static void default_init(struct cpuinfo_x86 *c) strcpy(c->x86_model_id, "386"); } #endif - clear_sched_clock_stable(); } static const struct cpu_dev default_cpu = { @@ -1077,8 +1076,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) */ if (this_cpu->c_init) this_cpu->c_init(c); - else - clear_sched_clock_stable(); /* Disable the PN if appropriate */ squash_the_stupid_serial_number(c); diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 0a3bc19de017..a70fd61095f8 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -185,7 +185,6 @@ static void early_init_cyrix(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_CYRIX_ARR); break; } - clear_sched_clock_stable(); } static void init_cyrix(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index fe0a615a051b..063197771b8d 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -162,10 +162,6 @@ static void early_init_intel(struct cpuinfo_x86 *c) if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); - if (check_tsc_unstable()) - clear_sched_clock_stable(); - } else { - clear_sched_clock_stable(); } /* Penwell and Cloverview have the TSC which doesn't sleep on S3 */ diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index 0bbe0f3a039f..c05509d38b1f 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -28,7 +28,6 @@ #include <linux/sched/signal.h> #include <linux/sched/task.h> #include <linux/slab.h> -#include <linux/cpu.h> #include <linux/task_work.h> #include <uapi/linux/magic.h> diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index 8457b4978668..d77d07ab310b 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -16,8 +16,6 @@ static void early_init_transmeta(struct cpuinfo_x86 *c) if (xlvl >= 0x80860001) c->x86_capability[CPUID_8086_0001_EDX] = cpuid_edx(0x80860001); } - - clear_sched_clock_stable(); } static void init_transmeta(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 891f4dad7b2c..22403a28caf5 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -30,7 +30,6 @@ #include <asm/hypervisor.h> #include <asm/timer.h> #include <asm/apic.h> -#include <asm/timer.h> #undef pr_fmt #define pr_fmt(fmt) "vmware: " fmt diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 8639bb2ae058..8f3d9cf26ff9 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -535,7 +535,7 @@ static void run_sync(void) { int enable_irqs = irqs_disabled(); - /* We may be called with interrupts disbled (on bootup). */ + /* We may be called with interrupts disabled (on bootup). */ if (enable_irqs) local_irq_enable(); on_each_cpu(do_sync_core, NULL, 1); diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index dc6ba5bda9fc..89ff7af2de50 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -354,7 +354,7 @@ static int hpet_resume(struct clock_event_device *evt, int timer) irq_domain_deactivate_irq(irq_get_irq_data(hdev->irq)); irq_domain_activate_irq(irq_get_irq_data(hdev->irq)); - disable_irq(hdev->irq); + disable_hardirq(hdev->irq); irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu)); enable_irq(hdev->irq); } diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c index bdb83e431d89..38b64587b31b 100644 --- a/arch/x86/kernel/kdebugfs.c +++ b/arch/x86/kernel/kdebugfs.c @@ -167,7 +167,7 @@ static int __init boot_params_kdebugfs_init(void) struct dentry *dbp, *version, *data; int error = -ENOMEM; - dbp = debugfs_create_dir("boot_params", NULL); + dbp = debugfs_create_dir("boot_params", arch_debugfs_dir); if (!dbp) return -ENOMEM; diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h index c6ee63f927ab..d688826e5736 100644 --- a/arch/x86/kernel/kprobes/common.h +++ b/arch/x86/kernel/kprobes/common.h @@ -67,7 +67,7 @@ #endif /* Ensure if the instruction can be boostable */ -extern int can_boost(kprobe_opcode_t *instruction); +extern int can_boost(kprobe_opcode_t *instruction, void *addr); /* Recover instruction if given address is probed */ extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr); diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 6384eb754a58..993fa4fe4f68 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -167,12 +167,12 @@ NOKPROBE_SYMBOL(skip_prefixes); * Returns non-zero if opcode is boostable. * RIP relative instructions are adjusted at copying time in 64 bits mode */ -int can_boost(kprobe_opcode_t *opcodes) +int can_boost(kprobe_opcode_t *opcodes, void *addr) { kprobe_opcode_t opcode; kprobe_opcode_t *orig_opcodes = opcodes; - if (search_exception_tables((unsigned long)opcodes)) + if (search_exception_tables((unsigned long)addr)) return 0; /* Page fault may occur on this address. */ retry: @@ -417,7 +417,7 @@ static int arch_copy_kprobe(struct kprobe *p) * __copy_instruction can modify the displacement of the instruction, * but it doesn't affect boostable check. */ - if (can_boost(p->ainsn.insn)) + if (can_boost(p->ainsn.insn, p->addr)) p->ainsn.boostable = 0; else p->ainsn.boostable = -1; diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 3d1bee9d6a72..3e7c6e5a08ff 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -178,7 +178,7 @@ static int copy_optimized_instructions(u8 *dest, u8 *src) while (len < RELATIVEJUMP_SIZE) { ret = __copy_instruction(dest + len, src + len); - if (!ret || !can_boost(dest + len)) + if (!ret || !can_boost(dest + len, src + len)) return -EINVAL; len += ret; } diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index e244c19a2451..4194d6f9bb29 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -223,6 +223,22 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_BOARD_NAME, "P4S800"), }, }, + { /* Handle problems with rebooting on ASUS EeeBook X205TA */ + .callback = set_acpi_reboot, + .ident = "ASUS EeeBook X205TA", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "X205TAW"), + }, + }, + { /* Handle problems with rebooting on ASUS EeeBook X205TAW */ + .callback = set_acpi_reboot, + .ident = "ASUS EeeBook X205TAW", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "X205TAW"), + }, + }, /* Certec */ { /* Handle problems with rebooting on Certec BPC600 */ diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 46bcda4cb1c2..4f7a9833d8e5 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -327,9 +327,16 @@ unsigned long long sched_clock(void) { return paravirt_sched_clock(); } + +static inline bool using_native_sched_clock(void) +{ + return pv_time_ops.sched_clock == native_sched_clock; +} #else unsigned long long sched_clock(void) __attribute__((alias("native_sched_clock"))); + +static inline bool using_native_sched_clock(void) { return true; } #endif int check_tsc_unstable(void) @@ -1112,8 +1119,10 @@ static void tsc_cs_mark_unstable(struct clocksource *cs) { if (tsc_unstable) return; + tsc_unstable = 1; - clear_sched_clock_stable(); + if (using_native_sched_clock()) + clear_sched_clock_stable(); disable_sched_clock_irqtime(); pr_info("Marking TSC unstable due to clocksource watchdog\n"); } @@ -1135,18 +1144,20 @@ static struct clocksource clocksource_tsc = { void mark_tsc_unstable(char *reason) { - if (!tsc_unstable) { - tsc_unstable = 1; + if (tsc_unstable) + return; + + tsc_unstable = 1; + if (using_native_sched_clock()) clear_sched_clock_stable(); - disable_sched_clock_irqtime(); - pr_info("Marking TSC unstable due to %s\n", reason); - /* Change only the rating, when not registered */ - if (clocksource_tsc.mult) - clocksource_mark_unstable(&clocksource_tsc); - else { - clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE; - clocksource_tsc.rating = 0; - } + disable_sched_clock_irqtime(); + pr_info("Marking TSC unstable due to %s\n", reason); + /* Change only the rating, when not registered */ + if (clocksource_tsc.mult) { + clocksource_mark_unstable(&clocksource_tsc); + } else { + clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE; + clocksource_tsc.rating = 0; } } diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 99c7805a9693..1f3b6ef105cd 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -106,32 +106,35 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { struct dev_pagemap *pgmap = NULL; - int nr_start = *nr; - pte_t *ptep; + int nr_start = *nr, ret = 0; + pte_t *ptep, *ptem; - ptep = pte_offset_map(&pmd, addr); + /* + * Keep the original mapped PTE value (ptem) around since we + * might increment ptep off the end of the page when finishing + * our loop iteration. + */ + ptem = ptep = pte_offset_map(&pmd, addr); do { pte_t pte = gup_get_pte(ptep); struct page *page; /* Similar to the PMD case, NUMA hinting must take slow path */ - if (pte_protnone(pte)) { - pte_unmap(ptep); - return 0; - } + if (pte_protnone(pte)) + break; + + if (!pte_allows_gup(pte_val(pte), write)) + break; if (pte_devmap(pte)) { pgmap = get_dev_pagemap(pte_pfn(pte), pgmap); if (unlikely(!pgmap)) { undo_dev_pagemap(nr, nr_start, pages); - pte_unmap(ptep); - return 0; + break; } - } else if (!pte_allows_gup(pte_val(pte), write) || - pte_special(pte)) { - pte_unmap(ptep); - return 0; - } + } else if (pte_special(pte)) + break; + VM_BUG_ON(!pfn_valid(pte_pfn(pte))); page = pte_page(pte); get_page(page); @@ -141,9 +144,11 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, (*nr)++; } while (ptep++, addr += PAGE_SIZE, addr != end); - pte_unmap(ptep - 1); + if (addr == end) + ret = 1; + pte_unmap(ptem); - return 1; + return ret; } static inline void get_head_page_multiple(struct page *page, int nr) diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 0cb52ae0a8f0..190e718694b1 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -735,6 +735,15 @@ void pcibios_disable_device (struct pci_dev *dev) pcibios_disable_irq(dev); } +#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC +void pcibios_release_device(struct pci_dev *dev) +{ + if (atomic_dec_return(&dev->enable_cnt) >= 0) + pcibios_disable_device(dev); + +} +#endif + int pci_ext_cfg_avail(void) { if (raw_pci_ext_ops) diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index e1fb269c87af..292ab0364a89 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -234,23 +234,14 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) return 1; for_each_pci_msi_entry(msidesc, dev) { - __pci_read_msi_msg(msidesc, &msg); - pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) | - ((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff); - if (msg.data != XEN_PIRQ_MSI_DATA || - xen_irq_from_pirq(pirq) < 0) { - pirq = xen_allocate_pirq_msi(dev, msidesc); - if (pirq < 0) { - irq = -ENODEV; - goto error; - } - xen_msi_compose_msg(dev, pirq, &msg); - __pci_write_msi_msg(msidesc, &msg); - dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq); - } else { - dev_dbg(&dev->dev, - "xen: msi already bound to pirq=%d\n", pirq); + pirq = xen_allocate_pirq_msi(dev, msidesc); + if (pirq < 0) { + irq = -ENODEV; + goto error; } + xen_msi_compose_msg(dev, pirq, &msg); + __pci_write_msi_msg(msidesc, &msg); + dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq); irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, (type == PCI_CAP_ID_MSI) ? nvec : 1, (type == PCI_CAP_ID_MSIX) ? diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 766d4d3529a1..f25982cdff90 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -1847,7 +1847,6 @@ static void pq_init(int node, int pnode) ops.write_payload_first(pnode, first); ops.write_payload_last(pnode, last); - ops.write_g_sw_ack(pnode, 0xffffUL); /* in effect, all msg_type's are set to MSG_NOOP */ memset(pqp, 0, sizeof(struct bau_pq_entry) * DEST_Q_SIZE); diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c index 25e068ba3382..b6d5c8946e66 100644 --- a/arch/x86/purgatory/purgatory.c +++ b/arch/x86/purgatory/purgatory.c @@ -11,6 +11,7 @@ */ #include "sha256.h" +#include "purgatory.h" #include "../boot/string.h" struct sha_region { @@ -18,11 +19,11 @@ struct sha_region { unsigned long len; }; -unsigned long backup_dest = 0; -unsigned long backup_src = 0; -unsigned long backup_sz = 0; +static unsigned long backup_dest; +static unsigned long backup_src; +static unsigned long backup_sz; -u8 sha256_digest[SHA256_DIGEST_SIZE] = { 0 }; +static u8 sha256_digest[SHA256_DIGEST_SIZE] = { 0 }; struct sha_region sha_regions[16] = {}; @@ -39,7 +40,7 @@ static int copy_backup_region(void) return 0; } -int verify_sha256_digest(void) +static int verify_sha256_digest(void) { struct sha_region *ptr, *end; u8 digest[SHA256_DIGEST_SIZE]; diff --git a/arch/x86/purgatory/purgatory.h b/arch/x86/purgatory/purgatory.h new file mode 100644 index 000000000000..e2e365a6c192 --- /dev/null +++ b/arch/x86/purgatory/purgatory.h @@ -0,0 +1,8 @@ +#ifndef PURGATORY_H +#define PURGATORY_H + +#ifndef __ASSEMBLY__ +extern void purgatory(void); +#endif /* __ASSEMBLY__ */ + +#endif /* PURGATORY_H */ diff --git a/arch/x86/purgatory/setup-x86_64.S b/arch/x86/purgatory/setup-x86_64.S index fe3c91ba1bd0..f90e9dfa90bb 100644 --- a/arch/x86/purgatory/setup-x86_64.S +++ b/arch/x86/purgatory/setup-x86_64.S @@ -9,6 +9,7 @@ * This source code is licensed under the GNU General Public License, * Version 2. See the file COPYING for more details. */ +#include "purgatory.h" .text .globl purgatory_start diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h index 8aa0e0d9cbb2..30dd5b2e4ad5 100644 --- a/arch/xtensa/include/asm/pgtable.h +++ b/arch/xtensa/include/asm/pgtable.h @@ -11,6 +11,7 @@ #ifndef _XTENSA_PGTABLE_H #define _XTENSA_PGTABLE_H +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #include <asm/page.h> #include <asm/kmem_layout.h> diff --git a/block/blk-core.c b/block/blk-core.c index 1086dac8724c..0eeb99ef654f 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -578,8 +578,6 @@ void blk_cleanup_queue(struct request_queue *q) q->queue_lock = &q->__queue_lock; spin_unlock_irq(lock); - put_disk_devt(q->disk_devt); - /* @q is and will stay empty, shutdown and put */ blk_put_queue(q); } @@ -2017,17 +2015,34 @@ blk_qc_t generic_make_request(struct bio *bio) struct request_queue *q = bdev_get_queue(bio->bi_bdev); if (likely(blk_queue_enter(q, false) == 0)) { + struct bio_list hold; + struct bio_list lower, same; + + /* Create a fresh bio_list for all subordinate requests */ + hold = bio_list_on_stack; + bio_list_init(&bio_list_on_stack); ret = q->make_request_fn(q, bio); blk_queue_exit(q); - bio = bio_list_pop(current->bio_list); + /* sort new bios into those for a lower level + * and those for the same level + */ + bio_list_init(&lower); + bio_list_init(&same); + while ((bio = bio_list_pop(&bio_list_on_stack)) != NULL) + if (q == bdev_get_queue(bio->bi_bdev)) + bio_list_add(&same, bio); + else + bio_list_add(&lower, bio); + /* now assemble so we handle the lowest level first */ + bio_list_merge(&bio_list_on_stack, &lower); + bio_list_merge(&bio_list_on_stack, &same); + bio_list_merge(&bio_list_on_stack, &hold); } else { - struct bio *bio_next = bio_list_pop(current->bio_list); - bio_io_error(bio); - bio = bio_next; } + bio = bio_list_pop(current->bio_list); } while (bio); current->bio_list = NULL; /* deactivate */ diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 295e69670c39..d745ab81033a 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -17,6 +17,15 @@ static void blk_mq_sysfs_release(struct kobject *kobj) { } +static void blk_mq_hw_sysfs_release(struct kobject *kobj) +{ + struct blk_mq_hw_ctx *hctx = container_of(kobj, struct blk_mq_hw_ctx, + kobj); + free_cpumask_var(hctx->cpumask); + kfree(hctx->ctxs); + kfree(hctx); +} + struct blk_mq_ctx_sysfs_entry { struct attribute attr; ssize_t (*show)(struct blk_mq_ctx *, char *); @@ -200,7 +209,7 @@ static struct kobj_type blk_mq_ctx_ktype = { static struct kobj_type blk_mq_hw_ktype = { .sysfs_ops = &blk_mq_hw_sysfs_ops, .default_attrs = default_hw_ctx_attrs, - .release = blk_mq_sysfs_release, + .release = blk_mq_hw_sysfs_release, }; static void blk_mq_unregister_hctx(struct blk_mq_hw_ctx *hctx) @@ -242,24 +251,15 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx) static void __blk_mq_unregister_dev(struct device *dev, struct request_queue *q) { struct blk_mq_hw_ctx *hctx; - struct blk_mq_ctx *ctx; - int i, j; + int i; - queue_for_each_hw_ctx(q, hctx, i) { + queue_for_each_hw_ctx(q, hctx, i) blk_mq_unregister_hctx(hctx); - hctx_for_each_ctx(hctx, ctx, j) - kobject_put(&ctx->kobj); - - kobject_put(&hctx->kobj); - } - blk_mq_debugfs_unregister_hctxs(q); kobject_uevent(&q->mq_kobj, KOBJ_REMOVE); kobject_del(&q->mq_kobj); - kobject_put(&q->mq_kobj); - kobject_put(&dev->kobj); q->mq_sysfs_init_done = false; @@ -277,7 +277,19 @@ void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx) kobject_init(&hctx->kobj, &blk_mq_hw_ktype); } -static void blk_mq_sysfs_init(struct request_queue *q) +void blk_mq_sysfs_deinit(struct request_queue *q) +{ + struct blk_mq_ctx *ctx; + int cpu; + + for_each_possible_cpu(cpu) { + ctx = per_cpu_ptr(q->queue_ctx, cpu); + kobject_put(&ctx->kobj); + } + kobject_put(&q->mq_kobj); +} + +void blk_mq_sysfs_init(struct request_queue *q) { struct blk_mq_ctx *ctx; int cpu; @@ -297,8 +309,6 @@ int blk_mq_register_dev(struct device *dev, struct request_queue *q) blk_mq_disable_hotplug(); - blk_mq_sysfs_init(q); - ret = kobject_add(&q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq"); if (ret < 0) goto out; diff --git a/block/blk-mq.c b/block/blk-mq.c index b2fd175e84d7..159187a28d66 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1955,16 +1955,6 @@ static void blk_mq_exit_hw_queues(struct request_queue *q, } } -static void blk_mq_free_hw_queues(struct request_queue *q, - struct blk_mq_tag_set *set) -{ - struct blk_mq_hw_ctx *hctx; - unsigned int i; - - queue_for_each_hw_ctx(q, hctx, i) - free_cpumask_var(hctx->cpumask); -} - static int blk_mq_init_hctx(struct request_queue *q, struct blk_mq_tag_set *set, struct blk_mq_hw_ctx *hctx, unsigned hctx_idx) @@ -2045,7 +2035,6 @@ static void blk_mq_init_cpu_queues(struct request_queue *q, struct blk_mq_ctx *__ctx = per_cpu_ptr(q->queue_ctx, i); struct blk_mq_hw_ctx *hctx; - memset(__ctx, 0, sizeof(*__ctx)); __ctx->cpu = i; spin_lock_init(&__ctx->lock); INIT_LIST_HEAD(&__ctx->rq_list); @@ -2257,15 +2246,19 @@ void blk_mq_release(struct request_queue *q) queue_for_each_hw_ctx(q, hctx, i) { if (!hctx) continue; - kfree(hctx->ctxs); - kfree(hctx); + kobject_put(&hctx->kobj); } q->mq_map = NULL; kfree(q->queue_hw_ctx); - /* ctx kobj stays in queue_ctx */ + /* + * release .mq_kobj and sw queue's kobject now because + * both share lifetime with request queue. + */ + blk_mq_sysfs_deinit(q); + free_percpu(q->queue_ctx); } @@ -2330,10 +2323,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, if (hctx->tags) blk_mq_free_map_and_requests(set, j); blk_mq_exit_hctx(q, set, hctx, j); - free_cpumask_var(hctx->cpumask); kobject_put(&hctx->kobj); - kfree(hctx->ctxs); - kfree(hctx); hctxs[j] = NULL; } @@ -2352,6 +2342,9 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, if (!q->queue_ctx) goto err_exit; + /* init q->mq_kobj and sw queues' kobjects */ + blk_mq_sysfs_init(q); + q->queue_hw_ctx = kzalloc_node(nr_cpu_ids * sizeof(*(q->queue_hw_ctx)), GFP_KERNEL, set->numa_node); if (!q->queue_hw_ctx) @@ -2442,7 +2435,6 @@ void blk_mq_free_queue(struct request_queue *q) blk_mq_del_queue_tag_set(q); blk_mq_exit_hw_queues(q, set, set->nr_hw_queues); - blk_mq_free_hw_queues(q, set); } /* Basically redo blk_mq_init_queue with queue frozen */ diff --git a/block/blk-mq.h b/block/blk-mq.h index 088ced003c13..b79f9a7d8cf6 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -77,6 +77,8 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, /* * sysfs helpers */ +extern void blk_mq_sysfs_init(struct request_queue *q); +extern void blk_mq_sysfs_deinit(struct request_queue *q); extern int blk_mq_sysfs_register(struct request_queue *q); extern void blk_mq_sysfs_unregister(struct request_queue *q); extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx); diff --git a/block/genhd.c b/block/genhd.c index b26a5ea115d0..a9c516a8b37d 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -572,20 +572,6 @@ exit: disk_part_iter_exit(&piter); } -void put_disk_devt(struct disk_devt *disk_devt) -{ - if (disk_devt && atomic_dec_and_test(&disk_devt->count)) - disk_devt->release(disk_devt); -} -EXPORT_SYMBOL(put_disk_devt); - -void get_disk_devt(struct disk_devt *disk_devt) -{ - if (disk_devt) - atomic_inc(&disk_devt->count); -} -EXPORT_SYMBOL(get_disk_devt); - /** * device_add_disk - add partitioning information to kernel list * @parent: parent device for the disk @@ -626,13 +612,6 @@ void device_add_disk(struct device *parent, struct gendisk *disk) disk_alloc_events(disk); - /* - * Take a reference on the devt and assign it to queue since it - * must not be reallocated while the bdi is registered - */ - disk->queue->disk_devt = disk->disk_devt; - get_disk_devt(disk->disk_devt); - /* Register BDI before referencing it from bdev */ bdi = disk->queue->backing_dev_info; bdi_register_owner(bdi, disk_to_dev(disk)); @@ -681,12 +660,16 @@ void del_gendisk(struct gendisk *disk) disk->flags &= ~GENHD_FL_UP; sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); - /* - * Unregister bdi before releasing device numbers (as they can get - * reused and we'd get clashes in sysfs). - */ - bdi_unregister(disk->queue->backing_dev_info); - blk_unregister_queue(disk); + if (disk->queue) { + /* + * Unregister bdi before releasing device numbers (as they can + * get reused and we'd get clashes in sysfs). + */ + bdi_unregister(disk->queue->backing_dev_info); + blk_unregister_queue(disk); + } else { + WARN_ON(1); + } blk_unregister_region(disk_devt(disk), disk->minors); part_stat_set_all(&disk->part0, 0); diff --git a/block/sed-opal.c b/block/sed-opal.c index 1e18dca360fc..14035f826b5e 100644 --- a/block/sed-opal.c +++ b/block/sed-opal.c @@ -1023,7 +1023,6 @@ static int finalize_and_send(struct opal_dev *dev, cont_fn cont) static int gen_key(struct opal_dev *dev, void *data) { - const u8 *method; u8 uid[OPAL_UID_LENGTH]; int err = 0; @@ -1031,7 +1030,6 @@ static int gen_key(struct opal_dev *dev, void *data) set_comid(dev, dev->comid); memcpy(uid, dev->prev_data, min(sizeof(uid), dev->prev_d_len)); - method = opalmethod[OPAL_GENKEY]; kfree(dev->prev_data); dev->prev_data = NULL; @@ -1669,7 +1667,6 @@ static int add_user_to_lr(struct opal_dev *dev, void *data) static int lock_unlock_locking_range(struct opal_dev *dev, void *data) { u8 lr_buffer[OPAL_UID_LENGTH]; - const u8 *method; struct opal_lock_unlock *lkul = data; u8 read_locked = 1, write_locked = 1; int err = 0; @@ -1677,7 +1674,6 @@ static int lock_unlock_locking_range(struct opal_dev *dev, void *data) clear_opal_cmd(dev); set_comid(dev, dev->comid); - method = opalmethod[OPAL_SET]; if (build_locking_range(lr_buffer, sizeof(lr_buffer), lkul->session.opal_key.lr) < 0) return -ERANGE; @@ -1733,14 +1729,12 @@ static int lock_unlock_locking_range_sum(struct opal_dev *dev, void *data) { u8 lr_buffer[OPAL_UID_LENGTH]; u8 read_locked = 1, write_locked = 1; - const u8 *method; struct opal_lock_unlock *lkul = data; int ret; clear_opal_cmd(dev); set_comid(dev, dev->comid); - method = opalmethod[OPAL_SET]; if (build_locking_range(lr_buffer, sizeof(lr_buffer), lkul->session.opal_key.lr) < 0) return -ERANGE; @@ -2133,7 +2127,7 @@ static int opal_add_user_to_lr(struct opal_dev *dev, pr_err("Locking state was not RO or RW\n"); return -EINVAL; } - if (lk_unlk->session.who < OPAL_USER1 && + if (lk_unlk->session.who < OPAL_USER1 || lk_unlk->session.who > OPAL_USER9) { pr_err("Authority was not within the range of users: %d\n", lk_unlk->session.who); @@ -2316,7 +2310,7 @@ static int opal_activate_user(struct opal_dev *dev, int ret; /* We can't activate Admin1 it's active as manufactured */ - if (opal_session->who < OPAL_USER1 && + if (opal_session->who < OPAL_USER1 || opal_session->who > OPAL_USER9) { pr_err("Who was not a valid user: %d\n", opal_session->who); return -EINVAL; diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 219b90bc0922..f15900132912 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -41,8 +41,10 @@ void acpi_gpe_apply_masked_gpes(void); void acpi_container_init(void); void acpi_memory_hotplug_init(void); #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC +void pci_ioapic_remove(struct acpi_pci_root *root); int acpi_ioapic_remove(struct acpi_pci_root *root); #else +static inline void pci_ioapic_remove(struct acpi_pci_root *root) { return; } static inline int acpi_ioapic_remove(struct acpi_pci_root *root) { return 0; } #endif #ifdef CONFIG_ACPI_DOCK diff --git a/drivers/acpi/ioapic.c b/drivers/acpi/ioapic.c index 6d7ce6e12aaa..1120dfd625b8 100644 --- a/drivers/acpi/ioapic.c +++ b/drivers/acpi/ioapic.c @@ -206,24 +206,34 @@ int acpi_ioapic_add(acpi_handle root_handle) return ACPI_SUCCESS(status) && ACPI_SUCCESS(retval) ? 0 : -ENODEV; } -int acpi_ioapic_remove(struct acpi_pci_root *root) +void pci_ioapic_remove(struct acpi_pci_root *root) { - int retval = 0; struct acpi_pci_ioapic *ioapic, *tmp; mutex_lock(&ioapic_list_lock); list_for_each_entry_safe(ioapic, tmp, &ioapic_list, list) { if (root->device->handle != ioapic->root_handle) continue; - - if (acpi_unregister_ioapic(ioapic->handle, ioapic->gsi_base)) - retval = -EBUSY; - if (ioapic->pdev) { pci_release_region(ioapic->pdev, 0); pci_disable_device(ioapic->pdev); pci_dev_put(ioapic->pdev); } + } + mutex_unlock(&ioapic_list_lock); +} + +int acpi_ioapic_remove(struct acpi_pci_root *root) +{ + int retval = 0; + struct acpi_pci_ioapic *ioapic, *tmp; + + mutex_lock(&ioapic_list_lock); + list_for_each_entry_safe(ioapic, tmp, &ioapic_list, list) { + if (root->device->handle != ioapic->root_handle) + continue; + if (acpi_unregister_ioapic(ioapic->handle, ioapic->gsi_base)) + retval = -EBUSY; if (ioapic->res.flags && ioapic->res.parent) release_resource(&ioapic->res); list_del(&ioapic->list); diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index bf601d4df8cf..919be0aa2578 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -648,12 +648,12 @@ static void acpi_pci_root_remove(struct acpi_device *device) pci_stop_root_bus(root->bus); - WARN_ON(acpi_ioapic_remove(root)); - + pci_ioapic_remove(root); device_set_run_wake(root->bus->bridge, false); pci_acpi_remove_bus_pm_notifier(device); pci_remove_root_bus(root->bus); + WARN_ON(acpi_ioapic_remove(root)); dmar_device_remove(device->handle); diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index 10aed84244f5..939641d6e262 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -50,7 +50,7 @@ the slower the port i/o. In some cases, setting this to zero will speed up the device. (default -1) - major You may use this parameter to overide the + major You may use this parameter to override the default major number (46) that this driver will use. Be sure to change the device name as well. diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index 644ba0888bd4..9cfd2e06a649 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -61,7 +61,7 @@ first drive found. - major You may use this parameter to overide the + major You may use this parameter to override the default major number (45) that this driver will use. Be sure to change the device name as well. diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c index ed93e8badf56..14c5d32f5d8b 100644 --- a/drivers/block/paride/pf.c +++ b/drivers/block/paride/pf.c @@ -59,7 +59,7 @@ the slower the port i/o. In some cases, setting this to zero will speed up the device. (default -1) - major You may use this parameter to overide the + major You may use this parameter to override the default major number (47) that this driver will use. Be sure to change the device name as well. diff --git a/drivers/block/paride/pg.c b/drivers/block/paride/pg.c index 5db955fe3a94..3b5882bfb736 100644 --- a/drivers/block/paride/pg.c +++ b/drivers/block/paride/pg.c @@ -84,7 +84,7 @@ the slower the port i/o. In some cases, setting this to zero will speed up the device. (default -1) - major You may use this parameter to overide the + major You may use this parameter to override the default major number (97) that this driver will use. Be sure to change the device name as well. diff --git a/drivers/block/paride/pt.c b/drivers/block/paride/pt.c index 61fc6824299a..e815312a00ad 100644 --- a/drivers/block/paride/pt.c +++ b/drivers/block/paride/pt.c @@ -61,7 +61,7 @@ the slower the port i/o. In some cases, setting this to zero will speed up the device. (default -1) - major You may use this parameter to overide the + major You may use this parameter to override the default major number (96) that this driver will use. Be sure to change the device name as well. diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index e27d89a36c34..dceb5edd1e54 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1189,6 +1189,8 @@ static int zram_add(void) blk_queue_io_min(zram->disk->queue, PAGE_SIZE); blk_queue_io_opt(zram->disk->queue, PAGE_SIZE); zram->disk->queue->limits.discard_granularity = PAGE_SIZE; + zram->disk->queue->limits.max_sectors = SECTORS_PER_PAGE; + zram->disk->queue->limits.chunk_sectors = 0; blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX); /* * zram_bio_discard() will clear all logical blocks if logical block diff --git a/drivers/char/nwbutton.c b/drivers/char/nwbutton.c index a5b1eb276c0b..e6d0d271c58c 100644 --- a/drivers/char/nwbutton.c +++ b/drivers/char/nwbutton.c @@ -6,7 +6,7 @@ #include <linux/module.h> #include <linux/kernel.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> #include <linux/interrupt.h> #include <linux/time.h> #include <linux/timer.h> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index a47543281864..38b9fdf854a4 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2532,4 +2532,5 @@ static int __init cpufreq_core_init(void) return 0; } +module_param(off, int, 0444); core_initcall(cpufreq_core_init); diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index b1fbaa30ae04..3d37219a0dd7 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -377,6 +377,7 @@ static void intel_pstate_set_performance_limits(struct perf_limits *limits) intel_pstate_init_limits(limits); limits->min_perf_pct = 100; limits->min_perf = int_ext_tofp(1); + limits->min_sysfs_pct = 100; } static DEFINE_MUTEX(intel_pstate_driver_lock); @@ -968,11 +969,20 @@ static int intel_pstate_resume(struct cpufreq_policy *policy) } static void intel_pstate_update_policies(void) + __releases(&intel_pstate_limits_lock) + __acquires(&intel_pstate_limits_lock) { + struct perf_limits *saved_limits = limits; int cpu; + mutex_unlock(&intel_pstate_limits_lock); + for_each_possible_cpu(cpu) cpufreq_update_policy(cpu); + + mutex_lock(&intel_pstate_limits_lock); + + limits = saved_limits; } /************************** debugfs begin ************************/ @@ -1180,10 +1190,10 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, limits->no_turbo = clamp_t(int, input, 0, 1); - mutex_unlock(&intel_pstate_limits_lock); - intel_pstate_update_policies(); + mutex_unlock(&intel_pstate_limits_lock); + mutex_unlock(&intel_pstate_driver_lock); return count; @@ -1217,10 +1227,10 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, limits->max_perf_pct); limits->max_perf = div_ext_fp(limits->max_perf_pct, 100); - mutex_unlock(&intel_pstate_limits_lock); - intel_pstate_update_policies(); + mutex_unlock(&intel_pstate_limits_lock); + mutex_unlock(&intel_pstate_driver_lock); return count; @@ -1254,10 +1264,10 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, limits->min_perf_pct); limits->min_perf = div_ext_fp(limits->min_perf_pct, 100); - mutex_unlock(&intel_pstate_limits_lock); - intel_pstate_update_policies(); + mutex_unlock(&intel_pstate_limits_lock); + mutex_unlock(&intel_pstate_driver_lock); return count; @@ -1874,13 +1884,11 @@ static int intel_pstate_prepare_request(struct cpudata *cpu, int pstate) intel_pstate_get_min_max(cpu, &min_perf, &max_perf); pstate = clamp_t(int, pstate, min_perf, max_perf); - trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu); return pstate; } static void intel_pstate_update_pstate(struct cpudata *cpu, int pstate) { - pstate = intel_pstate_prepare_request(cpu, pstate); if (pstate == cpu->pstate.current_pstate) return; @@ -1900,6 +1908,8 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) update_turbo_state(); + target_pstate = intel_pstate_prepare_request(cpu, target_pstate); + trace_cpu_frequency(target_pstate * cpu->pstate.scaling, cpu->cpu); intel_pstate_update_pstate(cpu, target_pstate); sample = &cpu->sample; @@ -2132,16 +2142,11 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) mutex_lock(&intel_pstate_limits_lock); if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) { + pr_debug("set performance\n"); if (!perf_limits) { limits = &performance_limits; perf_limits = limits; } - if (policy->max >= policy->cpuinfo.max_freq && - !limits->no_turbo) { - pr_debug("set performance\n"); - intel_pstate_set_performance_limits(perf_limits); - goto out; - } } else { pr_debug("set powersave\n"); if (!perf_limits) { @@ -2152,7 +2157,7 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) } intel_pstate_update_perf_limits(policy, perf_limits); - out: + if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) { /* * NOHZ_FULL CPUs need this as the governor callback may not @@ -2198,9 +2203,9 @@ static int intel_pstate_verify_policy(struct cpufreq_policy *policy) unsigned int max_freq, min_freq; max_freq = policy->cpuinfo.max_freq * - limits->max_sysfs_pct / 100; + perf_limits->max_sysfs_pct / 100; min_freq = policy->cpuinfo.max_freq * - limits->min_sysfs_pct / 100; + perf_limits->min_sysfs_pct / 100; cpufreq_verify_within_limits(policy, min_freq, max_freq); } @@ -2243,13 +2248,8 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy) cpu = all_cpu_data[policy->cpu]; - /* - * We need sane value in the cpu->perf_limits, so inherit from global - * perf_limits limits, which are seeded with values based on the - * CONFIG_CPU_FREQ_DEFAULT_GOV_*, during boot up. - */ if (per_cpu_limits) - memcpy(cpu->perf_limits, limits, sizeof(struct perf_limits)); + intel_pstate_init_limits(cpu->perf_limits); policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling; policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling; @@ -2301,7 +2301,6 @@ static struct cpufreq_driver intel_pstate = { static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy) { struct cpudata *cpu = all_cpu_data[policy->cpu]; - struct perf_limits *perf_limits = limits; update_turbo_state(); policy->cpuinfo.max_freq = limits->turbo_disabled ? @@ -2309,15 +2308,6 @@ static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy) cpufreq_verify_within_cpu_limits(policy); - if (per_cpu_limits) - perf_limits = cpu->perf_limits; - - mutex_lock(&intel_pstate_limits_lock); - - intel_pstate_update_perf_limits(policy, perf_limits); - - mutex_unlock(&intel_pstate_limits_lock); - return 0; } @@ -2370,6 +2360,7 @@ static int intel_cpufreq_target(struct cpufreq_policy *policy, wrmsrl_on_cpu(policy->cpu, MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, target_pstate)); } + freqs.new = target_pstate * cpu->pstate.scaling; cpufreq_freq_transition_end(policy, &freqs, false); return 0; @@ -2383,8 +2374,9 @@ static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy, target_freq = intel_cpufreq_turbo_update(cpu, policy, target_freq); target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling); + target_pstate = intel_pstate_prepare_request(cpu, target_pstate); intel_pstate_update_pstate(cpu, target_pstate); - return target_freq; + return target_pstate * cpu->pstate.scaling; } static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy) @@ -2437,8 +2429,11 @@ static int intel_pstate_register_driver(void) intel_pstate_init_limits(&powersave_limits); intel_pstate_set_performance_limits(&performance_limits); - limits = IS_ENABLED(CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE) ? - &performance_limits : &powersave_limits; + if (IS_ENABLED(CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE) && + intel_pstate_driver == &intel_pstate) + limits = &performance_limits; + else + limits = &powersave_limits; ret = cpufreq_register_driver(intel_pstate_driver); if (ret) { diff --git a/drivers/crypto/ux500/cryp/cryp.c b/drivers/crypto/ux500/cryp/cryp.c index 43a0c8a26ab0..00a16ab601cb 100644 --- a/drivers/crypto/ux500/cryp/cryp.c +++ b/drivers/crypto/ux500/cryp/cryp.c @@ -82,7 +82,7 @@ void cryp_activity(struct cryp_device_data *device_data, void cryp_flush_inoutfifo(struct cryp_device_data *device_data) { /* - * We always need to disble the hardware before trying to flush the + * We always need to disable the hardware before trying to flush the * FIFO. This is something that isn't written in the design * specification, but we have been informed by the hardware designers * that this must be done. diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c index 349dc3e1e52e..974c5a31a005 100644 --- a/drivers/firmware/efi/arm-runtime.c +++ b/drivers/firmware/efi/arm-runtime.c @@ -65,6 +65,7 @@ static bool __init efi_virtmap_init(void) bool systab_found; efi_mm.pgd = pgd_alloc(&efi_mm); + mm_init_cpumask(&efi_mm); init_new_context(NULL, &efi_mm); systab_found = false; diff --git a/drivers/firmware/efi/libstub/secureboot.c b/drivers/firmware/efi/libstub/secureboot.c index 6def402bf569..5da36e56b36a 100644 --- a/drivers/firmware/efi/libstub/secureboot.c +++ b/drivers/firmware/efi/libstub/secureboot.c @@ -45,6 +45,8 @@ enum efi_secureboot_mode efi_get_secureboot(efi_system_table_t *sys_table_arg) size = sizeof(secboot); status = get_efi_var(efi_SecureBoot_name, &efi_variable_guid, NULL, &size, &secboot); + if (status == EFI_NOT_FOUND) + return efi_secureboot_mode_disabled; if (status != EFI_SUCCESS) goto out_efi_err; @@ -78,7 +80,5 @@ secure_boot_enabled: out_efi_err: pr_efi_err(sys_table_arg, "Could not determine UEFI Secure Boot status.\n"); - if (status == EFI_NOT_FOUND) - return efi_secureboot_mode_disabled; return efi_secureboot_mode_unknown; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 6abb238b25c9..4120b351a8e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2094,8 +2094,11 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) } r = amdgpu_late_init(adev); - if (r) + if (r) { + if (fbcon) + console_unlock(); return r; + } /* pin cursors */ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 75fc376ba735..f7adbace428a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -59,9 +59,10 @@ * - 3.7.0 - Add support for VCE clock list packet * - 3.8.0 - Add support raster config init in the kernel * - 3.9.0 - Add support for memory query info about VRAM and GTT. + * - 3.10.0 - Add support for new fences ioctl, new gem ioctl flags */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 9 +#define KMS_DRIVER_MINOR 10 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 51d759463384..106cf83c2e6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -202,6 +202,27 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, bool kernel = false; int r; + /* reject invalid gem flags */ + if (args->in.domain_flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | + AMDGPU_GEM_CREATE_NO_CPU_ACCESS | + AMDGPU_GEM_CREATE_CPU_GTT_USWC | + AMDGPU_GEM_CREATE_VRAM_CLEARED| + AMDGPU_GEM_CREATE_SHADOW | + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) { + r = -EINVAL; + goto error_unlock; + } + /* reject invalid gem domains */ + if (args->in.domains & ~(AMDGPU_GEM_DOMAIN_CPU | + AMDGPU_GEM_DOMAIN_GTT | + AMDGPU_GEM_DOMAIN_VRAM | + AMDGPU_GEM_DOMAIN_GDS | + AMDGPU_GEM_DOMAIN_GWS | + AMDGPU_GEM_DOMAIN_OA)) { + r = -EINVAL; + goto error_unlock; + } + /* create a gem object to contain this object in */ if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS | AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 31375bdde6f1..011800f621c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -788,7 +788,7 @@ static int sdma_v3_0_start(struct amdgpu_device *adev) } } - /* disble sdma engine before programing it */ + /* disable sdma engine before programing it */ sdma_v3_0_ctx_switch_enable(adev, false); sdma_v3_0_enable(adev, false); diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index c8baab9bee0d..ba58f1b11d1e 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -148,6 +148,9 @@ static const struct edid_quirk { /* Panel in Samsung NP700G7A-S01PL notebook reports 6bpc */ { "SEC", 0xd033, EDID_QUIRK_FORCE_8BPC }, + + /* Rotel RSX-1058 forwards sink's EDID but only does HDMI 1.1*/ + { "ETR", 13896, EDID_QUIRK_FORCE_8BPC }, }; /* diff --git a/drivers/gpu/drm/i915/gvt/cfg_space.c b/drivers/gpu/drm/i915/gvt/cfg_space.c index 4a6a2ed65732..b7d7721e72fa 100644 --- a/drivers/gpu/drm/i915/gvt/cfg_space.c +++ b/drivers/gpu/drm/i915/gvt/cfg_space.c @@ -41,6 +41,54 @@ enum { INTEL_GVT_PCI_BAR_MAX, }; +/* bitmap for writable bits (RW or RW1C bits, but cannot co-exist in one + * byte) byte by byte in standard pci configuration space. (not the full + * 256 bytes.) + */ +static const u8 pci_cfg_space_rw_bmp[PCI_INTERRUPT_LINE + 4] = { + [PCI_COMMAND] = 0xff, 0x07, + [PCI_STATUS] = 0x00, 0xf9, /* the only one RW1C byte */ + [PCI_CACHE_LINE_SIZE] = 0xff, + [PCI_BASE_ADDRESS_0 ... PCI_CARDBUS_CIS - 1] = 0xff, + [PCI_ROM_ADDRESS] = 0x01, 0xf8, 0xff, 0xff, + [PCI_INTERRUPT_LINE] = 0xff, +}; + +/** + * vgpu_pci_cfg_mem_write - write virtual cfg space memory + * + * Use this function to write virtual cfg space memory. + * For standard cfg space, only RW bits can be changed, + * and we emulates the RW1C behavior of PCI_STATUS register. + */ +static void vgpu_pci_cfg_mem_write(struct intel_vgpu *vgpu, unsigned int off, + u8 *src, unsigned int bytes) +{ + u8 *cfg_base = vgpu_cfg_space(vgpu); + u8 mask, new, old; + int i = 0; + + for (; i < bytes && (off + i < sizeof(pci_cfg_space_rw_bmp)); i++) { + mask = pci_cfg_space_rw_bmp[off + i]; + old = cfg_base[off + i]; + new = src[i] & mask; + + /** + * The PCI_STATUS high byte has RW1C bits, here + * emulates clear by writing 1 for these bits. + * Writing a 0b to RW1C bits has no effect. + */ + if (off + i == PCI_STATUS + 1) + new = (~new & old) & mask; + + cfg_base[off + i] = (old & ~mask) | new; + } + + /* For other configuration space directly copy as it is. */ + if (i < bytes) + memcpy(cfg_base + off + i, src + i, bytes - i); +} + /** * intel_vgpu_emulate_cfg_read - emulate vGPU configuration space read * @@ -123,7 +171,7 @@ static int emulate_pci_command_write(struct intel_vgpu *vgpu, u8 changed = old ^ new; int ret; - memcpy(vgpu_cfg_space(vgpu) + offset, p_data, bytes); + vgpu_pci_cfg_mem_write(vgpu, offset, p_data, bytes); if (!(changed & PCI_COMMAND_MEMORY)) return 0; @@ -237,6 +285,9 @@ int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset, { int ret; + if (vgpu->failsafe) + return 0; + if (WARN_ON(bytes > 4)) return -EINVAL; @@ -274,10 +325,10 @@ int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset, if (ret) return ret; - memcpy(vgpu_cfg_space(vgpu) + offset, p_data, bytes); + vgpu_pci_cfg_mem_write(vgpu, offset, p_data, bytes); break; default: - memcpy(vgpu_cfg_space(vgpu) + offset, p_data, bytes); + vgpu_pci_cfg_mem_write(vgpu, offset, p_data, bytes); break; } return 0; diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index b9c8e2407682..7ae6e2b241c8 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -668,7 +668,7 @@ static inline void print_opcode(u32 cmd, int ring_id) if (d_info == NULL) return; - gvt_err("opcode=0x%x %s sub_ops:", + gvt_dbg_cmd("opcode=0x%x %s sub_ops:", cmd >> (32 - d_info->op_len), d_info->name); for (i = 0; i < d_info->nr_sub_op; i++) @@ -693,23 +693,23 @@ static void parser_exec_state_dump(struct parser_exec_state *s) int cnt = 0; int i; - gvt_err(" vgpu%d RING%d: ring_start(%08lx) ring_end(%08lx)" + gvt_dbg_cmd(" vgpu%d RING%d: ring_start(%08lx) ring_end(%08lx)" " ring_head(%08lx) ring_tail(%08lx)\n", s->vgpu->id, s->ring_id, s->ring_start, s->ring_start + s->ring_size, s->ring_head, s->ring_tail); - gvt_err(" %s %s ip_gma(%08lx) ", + gvt_dbg_cmd(" %s %s ip_gma(%08lx) ", s->buf_type == RING_BUFFER_INSTRUCTION ? "RING_BUFFER" : "BATCH_BUFFER", s->buf_addr_type == GTT_BUFFER ? "GTT" : "PPGTT", s->ip_gma); if (s->ip_va == NULL) { - gvt_err(" ip_va(NULL)"); + gvt_dbg_cmd(" ip_va(NULL)"); return; } - gvt_err(" ip_va=%p: %08x %08x %08x %08x\n", + gvt_dbg_cmd(" ip_va=%p: %08x %08x %08x %08x\n", s->ip_va, cmd_val(s, 0), cmd_val(s, 1), cmd_val(s, 2), cmd_val(s, 3)); diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index 6d8fde880c39..5419ae6ec633 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -83,44 +83,80 @@ static int pipe_is_enabled(struct intel_vgpu *vgpu, int pipe) return 0; } +static unsigned char virtual_dp_monitor_edid[GVT_EDID_NUM][EDID_SIZE] = { + { +/* EDID with 1024x768 as its resolution */ + /*Header*/ + 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, + /* Vendor & Product Identification */ + 0x22, 0xf0, 0x54, 0x29, 0x00, 0x00, 0x00, 0x00, 0x04, 0x17, + /* Version & Revision */ + 0x01, 0x04, + /* Basic Display Parameters & Features */ + 0xa5, 0x34, 0x20, 0x78, 0x23, + /* Color Characteristics */ + 0xfc, 0x81, 0xa4, 0x55, 0x4d, 0x9d, 0x25, 0x12, 0x50, 0x54, + /* Established Timings: maximum resolution is 1024x768 */ + 0x21, 0x08, 0x00, + /* Standard Timings. All invalid */ + 0x00, 0xc0, 0x00, 0xc0, 0x00, 0x40, 0x00, 0x80, 0x00, 0x00, + 0x00, 0x40, 0x00, 0x00, 0x00, 0x01, + /* 18 Byte Data Blocks 1: invalid */ + 0x00, 0x00, 0x80, 0xa0, 0x70, 0xb0, + 0x23, 0x40, 0x30, 0x20, 0x36, 0x00, 0x06, 0x44, 0x21, 0x00, 0x00, 0x1a, + /* 18 Byte Data Blocks 2: invalid */ + 0x00, 0x00, 0x00, 0xfd, 0x00, 0x18, 0x3c, 0x18, 0x50, 0x11, 0x00, 0x0a, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + /* 18 Byte Data Blocks 3: invalid */ + 0x00, 0x00, 0x00, 0xfc, 0x00, 0x48, + 0x50, 0x20, 0x5a, 0x52, 0x32, 0x34, 0x34, 0x30, 0x77, 0x0a, 0x20, 0x20, + /* 18 Byte Data Blocks 4: invalid */ + 0x00, 0x00, 0x00, 0xff, 0x00, 0x43, 0x4e, 0x34, 0x33, 0x30, 0x34, 0x30, + 0x44, 0x58, 0x51, 0x0a, 0x20, 0x20, + /* Extension Block Count */ + 0x00, + /* Checksum */ + 0xef, + }, + { /* EDID with 1920x1200 as its resolution */ -static unsigned char virtual_dp_monitor_edid[] = { - /*Header*/ - 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, - /* Vendor & Product Identification */ - 0x22, 0xf0, 0x54, 0x29, 0x00, 0x00, 0x00, 0x00, 0x04, 0x17, - /* Version & Revision */ - 0x01, 0x04, - /* Basic Display Parameters & Features */ - 0xa5, 0x34, 0x20, 0x78, 0x23, - /* Color Characteristics */ - 0xfc, 0x81, 0xa4, 0x55, 0x4d, 0x9d, 0x25, 0x12, 0x50, 0x54, - /* Established Timings: maximum resolution is 1024x768 */ - 0x21, 0x08, 0x00, - /* - * Standard Timings. - * below new resolutions can be supported: - * 1920x1080, 1280x720, 1280x960, 1280x1024, - * 1440x900, 1600x1200, 1680x1050 - */ - 0xd1, 0xc0, 0x81, 0xc0, 0x81, 0x40, 0x81, 0x80, 0x95, 0x00, - 0xa9, 0x40, 0xb3, 0x00, 0x01, 0x01, - /* 18 Byte Data Blocks 1: max resolution is 1920x1200 */ - 0x28, 0x3c, 0x80, 0xa0, 0x70, 0xb0, - 0x23, 0x40, 0x30, 0x20, 0x36, 0x00, 0x06, 0x44, 0x21, 0x00, 0x00, 0x1a, - /* 18 Byte Data Blocks 2: invalid */ - 0x00, 0x00, 0x00, 0xfd, 0x00, 0x18, 0x3c, 0x18, 0x50, 0x11, 0x00, 0x0a, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - /* 18 Byte Data Blocks 3: invalid */ - 0x00, 0x00, 0x00, 0xfc, 0x00, 0x48, - 0x50, 0x20, 0x5a, 0x52, 0x32, 0x34, 0x34, 0x30, 0x77, 0x0a, 0x20, 0x20, - /* 18 Byte Data Blocks 4: invalid */ - 0x00, 0x00, 0x00, 0xff, 0x00, 0x43, 0x4e, 0x34, 0x33, 0x30, 0x34, 0x30, - 0x44, 0x58, 0x51, 0x0a, 0x20, 0x20, - /* Extension Block Count */ - 0x00, - /* Checksum */ - 0x45, + /*Header*/ + 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, + /* Vendor & Product Identification */ + 0x22, 0xf0, 0x54, 0x29, 0x00, 0x00, 0x00, 0x00, 0x04, 0x17, + /* Version & Revision */ + 0x01, 0x04, + /* Basic Display Parameters & Features */ + 0xa5, 0x34, 0x20, 0x78, 0x23, + /* Color Characteristics */ + 0xfc, 0x81, 0xa4, 0x55, 0x4d, 0x9d, 0x25, 0x12, 0x50, 0x54, + /* Established Timings: maximum resolution is 1024x768 */ + 0x21, 0x08, 0x00, + /* + * Standard Timings. + * below new resolutions can be supported: + * 1920x1080, 1280x720, 1280x960, 1280x1024, + * 1440x900, 1600x1200, 1680x1050 + */ + 0xd1, 0xc0, 0x81, 0xc0, 0x81, 0x40, 0x81, 0x80, 0x95, 0x00, + 0xa9, 0x40, 0xb3, 0x00, 0x01, 0x01, + /* 18 Byte Data Blocks 1: max resolution is 1920x1200 */ + 0x28, 0x3c, 0x80, 0xa0, 0x70, 0xb0, + 0x23, 0x40, 0x30, 0x20, 0x36, 0x00, 0x06, 0x44, 0x21, 0x00, 0x00, 0x1a, + /* 18 Byte Data Blocks 2: invalid */ + 0x00, 0x00, 0x00, 0xfd, 0x00, 0x18, 0x3c, 0x18, 0x50, 0x11, 0x00, 0x0a, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + /* 18 Byte Data Blocks 3: invalid */ + 0x00, 0x00, 0x00, 0xfc, 0x00, 0x48, + 0x50, 0x20, 0x5a, 0x52, 0x32, 0x34, 0x34, 0x30, 0x77, 0x0a, 0x20, 0x20, + /* 18 Byte Data Blocks 4: invalid */ + 0x00, 0x00, 0x00, 0xff, 0x00, 0x43, 0x4e, 0x34, 0x33, 0x30, 0x34, 0x30, + 0x44, 0x58, 0x51, 0x0a, 0x20, 0x20, + /* Extension Block Count */ + 0x00, + /* Checksum */ + 0x45, + }, }; #define DPCD_HEADER_SIZE 0xb @@ -140,14 +176,20 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) vgpu_vreg(vgpu, SDEISR) &= ~(SDE_PORTA_HOTPLUG_SPT | SDE_PORTE_HOTPLUG_SPT); - if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) + if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) { vgpu_vreg(vgpu, SDEISR) |= SDE_PORTB_HOTPLUG_CPT; + vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIB_DETECTED; + } - if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) + if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) { vgpu_vreg(vgpu, SDEISR) |= SDE_PORTC_HOTPLUG_CPT; + vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIC_DETECTED; + } - if (intel_vgpu_has_monitor_on_port(vgpu, PORT_D)) + if (intel_vgpu_has_monitor_on_port(vgpu, PORT_D)) { vgpu_vreg(vgpu, SDEISR) |= SDE_PORTD_HOTPLUG_CPT; + vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDID_DETECTED; + } if (IS_SKYLAKE(dev_priv) && intel_vgpu_has_monitor_on_port(vgpu, PORT_E)) { @@ -160,6 +202,8 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) GEN8_PORT_DP_A_HOTPLUG; else vgpu_vreg(vgpu, SDEISR) |= SDE_PORTA_HOTPLUG_SPT; + + vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_A)) |= DDI_INIT_DISPLAY_DETECTED; } } @@ -175,10 +219,13 @@ static void clean_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num) } static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num, - int type) + int type, unsigned int resolution) { struct intel_vgpu_port *port = intel_vgpu_port(vgpu, port_num); + if (WARN_ON(resolution >= GVT_EDID_NUM)) + return -EINVAL; + port->edid = kzalloc(sizeof(*(port->edid)), GFP_KERNEL); if (!port->edid) return -ENOMEM; @@ -189,7 +236,7 @@ static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num, return -ENOMEM; } - memcpy(port->edid->edid_block, virtual_dp_monitor_edid, + memcpy(port->edid->edid_block, virtual_dp_monitor_edid[resolution], EDID_SIZE); port->edid->data_valid = true; @@ -322,16 +369,18 @@ void intel_vgpu_clean_display(struct intel_vgpu *vgpu) * Zero on success, negative error code if failed. * */ -int intel_vgpu_init_display(struct intel_vgpu *vgpu) +int intel_vgpu_init_display(struct intel_vgpu *vgpu, u64 resolution) { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; intel_vgpu_init_i2c_edid(vgpu); if (IS_SKYLAKE(dev_priv)) - return setup_virtual_dp_monitor(vgpu, PORT_D, GVT_DP_D); + return setup_virtual_dp_monitor(vgpu, PORT_D, GVT_DP_D, + resolution); else - return setup_virtual_dp_monitor(vgpu, PORT_B, GVT_DP_B); + return setup_virtual_dp_monitor(vgpu, PORT_B, GVT_DP_B, + resolution); } /** diff --git a/drivers/gpu/drm/i915/gvt/display.h b/drivers/gpu/drm/i915/gvt/display.h index 8b234ea961f6..d73de22102e2 100644 --- a/drivers/gpu/drm/i915/gvt/display.h +++ b/drivers/gpu/drm/i915/gvt/display.h @@ -154,10 +154,28 @@ struct intel_vgpu_port { int type; }; +enum intel_vgpu_edid { + GVT_EDID_1024_768, + GVT_EDID_1920_1200, + GVT_EDID_NUM, +}; + +static inline char *vgpu_edid_str(enum intel_vgpu_edid id) +{ + switch (id) { + case GVT_EDID_1024_768: + return "1024x768"; + case GVT_EDID_1920_1200: + return "1920x1200"; + default: + return ""; + } +} + void intel_gvt_emulate_vblank(struct intel_gvt *gvt); void intel_gvt_check_vblank_emulation(struct intel_gvt *gvt); -int intel_vgpu_init_display(struct intel_vgpu *vgpu); +int intel_vgpu_init_display(struct intel_vgpu *vgpu, u64 resolution); void intel_vgpu_reset_display(struct intel_vgpu *vgpu); void intel_vgpu_clean_display(struct intel_vgpu *vgpu); diff --git a/drivers/gpu/drm/i915/gvt/firmware.c b/drivers/gpu/drm/i915/gvt/firmware.c index 1cb29b2d7dc6..933a7c211a1c 100644 --- a/drivers/gpu/drm/i915/gvt/firmware.c +++ b/drivers/gpu/drm/i915/gvt/firmware.c @@ -80,7 +80,7 @@ static int expose_firmware_sysfs(struct intel_gvt *gvt) int ret; size = sizeof(*h) + info->mmio_size + info->cfg_space_size - 1; - firmware = vmalloc(size); + firmware = vzalloc(size); if (!firmware) return -ENOMEM; diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 28c92346db0e..6a5ff23ded90 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -1825,11 +1825,8 @@ static int emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, gma = g_gtt_index << GTT_PAGE_SHIFT; /* the VM may configure the whole GM space when ballooning is used */ - if (WARN_ONCE(!vgpu_gmadr_is_valid(vgpu, gma), - "vgpu%d: found oob ggtt write, offset %x\n", - vgpu->id, off)) { + if (!vgpu_gmadr_is_valid(vgpu, gma)) return 0; - } ggtt_get_guest_entry(ggtt_mm, &e, g_gtt_index); @@ -2015,6 +2012,22 @@ int intel_vgpu_init_gtt(struct intel_vgpu *vgpu) return create_scratch_page_tree(vgpu); } +static void intel_vgpu_free_mm(struct intel_vgpu *vgpu, int type) +{ + struct list_head *pos, *n; + struct intel_vgpu_mm *mm; + + list_for_each_safe(pos, n, &vgpu->gtt.mm_list_head) { + mm = container_of(pos, struct intel_vgpu_mm, list); + if (mm->type == type) { + vgpu->gvt->gtt.mm_free_page_table(mm); + list_del(&mm->list); + list_del(&mm->lru_list); + kfree(mm); + } + } +} + /** * intel_vgpu_clean_gtt - clean up per-vGPU graphics memory virulization * @vgpu: a vGPU @@ -2027,19 +2040,11 @@ int intel_vgpu_init_gtt(struct intel_vgpu *vgpu) */ void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu) { - struct list_head *pos, *n; - struct intel_vgpu_mm *mm; - ppgtt_free_all_shadow_page(vgpu); release_scratch_page_tree(vgpu); - list_for_each_safe(pos, n, &vgpu->gtt.mm_list_head) { - mm = container_of(pos, struct intel_vgpu_mm, list); - vgpu->gvt->gtt.mm_free_page_table(mm); - list_del(&mm->list); - list_del(&mm->lru_list); - kfree(mm); - } + intel_vgpu_free_mm(vgpu, INTEL_GVT_MM_PPGTT); + intel_vgpu_free_mm(vgpu, INTEL_GVT_MM_GGTT); } static void clean_spt_oos(struct intel_gvt *gvt) @@ -2322,6 +2327,13 @@ void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu, bool dmlr) int i; ppgtt_free_all_shadow_page(vgpu); + + /* Shadow pages are only created when there is no page + * table tracking data, so remove page tracking data after + * removing the shadow pages. + */ + intel_vgpu_free_mm(vgpu, INTEL_GVT_MM_PPGTT); + if (!dmlr) return; diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index e227caf5859e..23791920ced1 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -143,6 +143,8 @@ struct intel_vgpu { int id; unsigned long handle; /* vGPU handle used by hypervisor MPT modules */ bool active; + bool pv_notified; + bool failsafe; bool resetting; void *sched_data; @@ -203,18 +205,18 @@ struct intel_gvt_firmware { }; struct intel_gvt_opregion { - void __iomem *opregion_va; + void *opregion_va; u32 opregion_pa; }; #define NR_MAX_INTEL_VGPU_TYPES 20 struct intel_vgpu_type { char name[16]; - unsigned int max_instance; unsigned int avail_instance; unsigned int low_gm_size; unsigned int high_gm_size; unsigned int fence; + enum intel_vgpu_edid resolution; }; struct intel_gvt { @@ -317,6 +319,7 @@ struct intel_vgpu_creation_params { __u64 low_gm_sz; /* in MB */ __u64 high_gm_sz; /* in MB */ __u64 fence_sz; + __u64 resolution; __s32 primary; __u64 vgpu_id; }; @@ -449,6 +452,11 @@ struct intel_gvt_ops { }; +enum { + GVT_FAILSAFE_UNSUPPORTED_GUEST, + GVT_FAILSAFE_INSUFFICIENT_RESOURCE, +}; + #include "mpt.h" #endif diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 1d450627ff65..8e43395c748a 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -121,6 +121,7 @@ static int new_mmio_info(struct intel_gvt *gvt, info->size = size; info->length = (i + 4) < end ? 4 : (end - i); info->addr_mask = addr_mask; + info->ro_mask = ro_mask; info->device = device; info->read = read ? read : intel_vgpu_default_mmio_read; info->write = write ? write : intel_vgpu_default_mmio_write; @@ -150,15 +151,44 @@ static int render_mmio_to_ring_id(struct intel_gvt *gvt, unsigned int reg) #define fence_num_to_offset(num) \ (num * 8 + i915_mmio_reg_offset(FENCE_REG_GEN6_LO(0))) + +static void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason) +{ + switch (reason) { + case GVT_FAILSAFE_UNSUPPORTED_GUEST: + pr_err("Detected your guest driver doesn't support GVT-g.\n"); + break; + case GVT_FAILSAFE_INSUFFICIENT_RESOURCE: + pr_err("Graphics resource is not enough for the guest\n"); + default: + break; + } + pr_err("Now vgpu %d will enter failsafe mode.\n", vgpu->id); + vgpu->failsafe = true; +} + static int sanitize_fence_mmio_access(struct intel_vgpu *vgpu, unsigned int fence_num, void *p_data, unsigned int bytes) { if (fence_num >= vgpu_fence_sz(vgpu)) { - gvt_err("vgpu%d: found oob fence register access\n", - vgpu->id); - gvt_err("vgpu%d: total fence num %d access fence num %d\n", - vgpu->id, vgpu_fence_sz(vgpu), fence_num); + + /* When guest access oob fence regs without access + * pv_info first, we treat guest not supporting GVT, + * and we will let vgpu enter failsafe mode. + */ + if (!vgpu->pv_notified) + enter_failsafe_mode(vgpu, + GVT_FAILSAFE_UNSUPPORTED_GUEST); + + if (!vgpu->mmio.disable_warn_untrack) { + gvt_err("vgpu%d: found oob fence register access\n", + vgpu->id); + gvt_err("vgpu%d: total fence %d, access fence %d\n", + vgpu->id, vgpu_fence_sz(vgpu), + fence_num); + } memset(p_data, 0, bytes); + return -EINVAL; } return 0; } @@ -369,6 +399,74 @@ static int pipeconf_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, return 0; } +/* ascendingly sorted */ +static i915_reg_t force_nonpriv_white_list[] = { + GEN9_CS_DEBUG_MODE1, //_MMIO(0x20ec) + GEN9_CTX_PREEMPT_REG,//_MMIO(0x2248) + GEN8_CS_CHICKEN1,//_MMIO(0x2580) + _MMIO(0x2690), + _MMIO(0x2694), + _MMIO(0x2698), + _MMIO(0x4de0), + _MMIO(0x4de4), + _MMIO(0x4dfc), + GEN7_COMMON_SLICE_CHICKEN1,//_MMIO(0x7010) + _MMIO(0x7014), + HDC_CHICKEN0,//_MMIO(0x7300) + GEN8_HDC_CHICKEN1,//_MMIO(0x7304) + _MMIO(0x7700), + _MMIO(0x7704), + _MMIO(0x7708), + _MMIO(0x770c), + _MMIO(0xb110), + GEN8_L3SQCREG4,//_MMIO(0xb118) + _MMIO(0xe100), + _MMIO(0xe18c), + _MMIO(0xe48c), + _MMIO(0xe5f4), +}; + +/* a simple bsearch */ +static inline bool in_whitelist(unsigned int reg) +{ + int left = 0, right = ARRAY_SIZE(force_nonpriv_white_list); + i915_reg_t *array = force_nonpriv_white_list; + + while (left < right) { + int mid = (left + right)/2; + + if (reg > array[mid].reg) + left = mid + 1; + else if (reg < array[mid].reg) + right = mid; + else + return true; + } + return false; +} + +static int force_nonpriv_write(struct intel_vgpu *vgpu, + unsigned int offset, void *p_data, unsigned int bytes) +{ + u32 reg_nonpriv = *(u32 *)p_data; + int ret = -EINVAL; + + if ((bytes != 4) || ((offset & (bytes - 1)) != 0)) { + gvt_err("vgpu(%d) Invalid FORCE_NONPRIV offset %x(%dB)\n", + vgpu->id, offset, bytes); + return ret; + } + + if (in_whitelist(reg_nonpriv)) { + ret = intel_vgpu_default_mmio_write(vgpu, offset, p_data, + bytes); + } else { + gvt_err("vgpu(%d) Invalid FORCE_NONPRIV write %x\n", + vgpu->id, reg_nonpriv); + } + return ret; +} + static int ddi_buf_ctl_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { @@ -1001,6 +1099,7 @@ static int pvinfo_mmio_read(struct intel_vgpu *vgpu, unsigned int offset, if (invalid_read) gvt_err("invalid pvinfo read: [%x:%x] = %x\n", offset, bytes, *(u32 *)p_data); + vgpu->pv_notified = true; return 0; } @@ -1039,7 +1138,7 @@ static int send_display_ready_uevent(struct intel_vgpu *vgpu, int ready) char vmid_str[20]; char display_ready_str[20]; - snprintf(display_ready_str, 20, "GVT_DISPLAY_READY=%d\n", ready); + snprintf(display_ready_str, 20, "GVT_DISPLAY_READY=%d", ready); env[0] = display_ready_str; snprintf(vmid_str, 20, "VMID=%d", vgpu->id); @@ -1078,6 +1177,9 @@ static int pvinfo_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, case _vgtif_reg(execlist_context_descriptor_lo): case _vgtif_reg(execlist_context_descriptor_hi): break; + case _vgtif_reg(rsv5[0])..._vgtif_reg(rsv5[3]): + enter_failsafe_mode(vgpu, GVT_FAILSAFE_INSUFFICIENT_RESOURCE); + break; default: gvt_err("invalid pvinfo write offset %x bytes %x data %x\n", offset, bytes, data); @@ -1203,26 +1305,37 @@ static int mailbox_write(struct intel_vgpu *vgpu, unsigned int offset, u32 *data0 = &vgpu_vreg(vgpu, GEN6_PCODE_DATA); switch (cmd) { - case 0x6: - /** - * "Read memory latency" command on gen9. - * Below memory latency values are read - * from skylake platform. - */ - if (!*data0) - *data0 = 0x1e1a1100; - else - *data0 = 0x61514b3d; + case GEN9_PCODE_READ_MEM_LATENCY: + if (IS_SKYLAKE(vgpu->gvt->dev_priv)) { + /** + * "Read memory latency" command on gen9. + * Below memory latency values are read + * from skylake platform. + */ + if (!*data0) + *data0 = 0x1e1a1100; + else + *data0 = 0x61514b3d; + } + break; + case SKL_PCODE_CDCLK_CONTROL: + if (IS_SKYLAKE(vgpu->gvt->dev_priv)) + *data0 = SKL_CDCLK_READY_FOR_CHANGE; break; - case 0x5: + case GEN6_PCODE_READ_RC6VIDS: *data0 |= 0x1; break; } gvt_dbg_core("VM(%d) write %x to mailbox, return data0 %x\n", vgpu->id, value, *data0); - - value &= ~(1 << 31); + /** + * PCODE_READY clear means ready for pcode read/write, + * PCODE_ERROR_MASK clear means no error happened. In GVT-g we + * always emulate as pcode read/write success and ready for access + * anytime, since we don't touch real physical registers here. + */ + value &= ~(GEN6_PCODE_READY | GEN6_PCODE_ERROR_MASK); return intel_vgpu_default_mmio_write(vgpu, offset, &value, bytes); } @@ -1318,6 +1431,17 @@ static int ring_mode_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, bool enable_execlist; write_vreg(vgpu, offset, p_data, bytes); + + /* when PPGTT mode enabled, we will check if guest has called + * pvinfo, if not, we will treat this guest as non-gvtg-aware + * guest, and stop emulating its cfg space, mmio, gtt, etc. + */ + if (((data & _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)) || + (data & _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE))) + && !vgpu->pv_notified) { + enter_failsafe_mode(vgpu, GVT_FAILSAFE_UNSUPPORTED_GUEST); + return 0; + } if ((data & _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE)) || (data & _MASKED_BIT_DISABLE(GFX_RUN_LIST_ENABLE))) { enable_execlist = !!(data & GFX_RUN_LIST_ENABLE); @@ -1400,6 +1524,9 @@ static int ring_reset_ctl_write(struct intel_vgpu *vgpu, #define MMIO_GM(reg, d, r, w) \ MMIO_F(reg, 4, F_GMADR, 0xFFFFF000, 0, d, r, w) +#define MMIO_GM_RDR(reg, d, r, w) \ + MMIO_F(reg, 4, F_GMADR | F_CMD_ACCESS, 0xFFFFF000, 0, d, r, w) + #define MMIO_RO(reg, d, f, rm, r, w) \ MMIO_F(reg, 4, F_RO | f, 0, rm, d, r, w) @@ -1419,6 +1546,9 @@ static int ring_reset_ctl_write(struct intel_vgpu *vgpu, #define MMIO_RING_GM(prefix, d, r, w) \ MMIO_RING_F(prefix, 4, F_GMADR, 0xFFFF0000, 0, d, r, w) +#define MMIO_RING_GM_RDR(prefix, d, r, w) \ + MMIO_RING_F(prefix, 4, F_GMADR | F_CMD_ACCESS, 0xFFFF0000, 0, d, r, w) + #define MMIO_RING_RO(prefix, d, f, rm, r, w) \ MMIO_RING_F(prefix, 4, F_RO | f, 0, rm, d, r, w) @@ -1427,73 +1557,81 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) struct drm_i915_private *dev_priv = gvt->dev_priv; int ret; - MMIO_RING_DFH(RING_IMR, D_ALL, 0, NULL, intel_vgpu_reg_imr_handler); + MMIO_RING_DFH(RING_IMR, D_ALL, F_CMD_ACCESS, NULL, + intel_vgpu_reg_imr_handler); MMIO_DFH(SDEIMR, D_ALL, 0, NULL, intel_vgpu_reg_imr_handler); MMIO_DFH(SDEIER, D_ALL, 0, NULL, intel_vgpu_reg_ier_handler); MMIO_DFH(SDEIIR, D_ALL, 0, NULL, intel_vgpu_reg_iir_handler); MMIO_D(SDEISR, D_ALL); - MMIO_RING_D(RING_HWSTAM, D_ALL); + MMIO_RING_DFH(RING_HWSTAM, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_GM(RENDER_HWS_PGA_GEN7, D_ALL, NULL, NULL); - MMIO_GM(BSD_HWS_PGA_GEN7, D_ALL, NULL, NULL); - MMIO_GM(BLT_HWS_PGA_GEN7, D_ALL, NULL, NULL); - MMIO_GM(VEBOX_HWS_PGA_GEN7, D_ALL, NULL, NULL); + MMIO_GM_RDR(RENDER_HWS_PGA_GEN7, D_ALL, NULL, NULL); + MMIO_GM_RDR(BSD_HWS_PGA_GEN7, D_ALL, NULL, NULL); + MMIO_GM_RDR(BLT_HWS_PGA_GEN7, D_ALL, NULL, NULL); + MMIO_GM_RDR(VEBOX_HWS_PGA_GEN7, D_ALL, NULL, NULL); #define RING_REG(base) (base + 0x28) - MMIO_RING_D(RING_REG, D_ALL); + MMIO_RING_DFH(RING_REG, D_ALL, F_CMD_ACCESS, NULL, NULL); #undef RING_REG #define RING_REG(base) (base + 0x134) - MMIO_RING_D(RING_REG, D_ALL); + MMIO_RING_DFH(RING_REG, D_ALL, F_CMD_ACCESS, NULL, NULL); #undef RING_REG - MMIO_GM(0x2148, D_ALL, NULL, NULL); - MMIO_GM(CCID, D_ALL, NULL, NULL); - MMIO_GM(0x12198, D_ALL, NULL, NULL); + MMIO_GM_RDR(0x2148, D_ALL, NULL, NULL); + MMIO_GM_RDR(CCID, D_ALL, NULL, NULL); + MMIO_GM_RDR(0x12198, D_ALL, NULL, NULL); MMIO_D(GEN7_CXT_SIZE, D_ALL); - MMIO_RING_D(RING_TAIL, D_ALL); - MMIO_RING_D(RING_HEAD, D_ALL); - MMIO_RING_D(RING_CTL, D_ALL); - MMIO_RING_D(RING_ACTHD, D_ALL); - MMIO_RING_GM(RING_START, D_ALL, NULL, NULL); + MMIO_RING_DFH(RING_TAIL, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_RING_DFH(RING_HEAD, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_RING_DFH(RING_CTL, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_RING_DFH(RING_ACTHD, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_RING_GM_RDR(RING_START, D_ALL, NULL, NULL); /* RING MODE */ #define RING_REG(base) (base + 0x29c) - MMIO_RING_DFH(RING_REG, D_ALL, F_MODE_MASK, NULL, ring_mode_mmio_write); + MMIO_RING_DFH(RING_REG, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, + ring_mode_mmio_write); #undef RING_REG - MMIO_RING_DFH(RING_MI_MODE, D_ALL, F_MODE_MASK, NULL, NULL); - MMIO_RING_DFH(RING_INSTPM, D_ALL, F_MODE_MASK, NULL, NULL); + MMIO_RING_DFH(RING_MI_MODE, D_ALL, F_MODE_MASK | F_CMD_ACCESS, + NULL, NULL); + MMIO_RING_DFH(RING_INSTPM, D_ALL, F_MODE_MASK | F_CMD_ACCESS, + NULL, NULL); MMIO_RING_DFH(RING_TIMESTAMP, D_ALL, F_CMD_ACCESS, ring_timestamp_mmio_read, NULL); MMIO_RING_DFH(RING_TIMESTAMP_UDW, D_ALL, F_CMD_ACCESS, ring_timestamp_mmio_read, NULL); - MMIO_DFH(GEN7_GT_MODE, D_ALL, F_MODE_MASK, NULL, NULL); - MMIO_DFH(CACHE_MODE_0_GEN7, D_ALL, F_MODE_MASK, NULL, NULL); + MMIO_DFH(GEN7_GT_MODE, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(CACHE_MODE_0_GEN7, D_ALL, F_MODE_MASK | F_CMD_ACCESS, + NULL, NULL); MMIO_DFH(CACHE_MODE_1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - - MMIO_DFH(0x20dc, D_ALL, F_MODE_MASK, NULL, NULL); - MMIO_DFH(_3D_CHICKEN3, D_ALL, F_MODE_MASK, NULL, NULL); - MMIO_DFH(0x2088, D_ALL, F_MODE_MASK, NULL, NULL); - MMIO_DFH(0x20e4, D_ALL, F_MODE_MASK, NULL, NULL); - MMIO_DFH(0x2470, D_ALL, F_MODE_MASK, NULL, NULL); - MMIO_D(GAM_ECOCHK, D_ALL); - MMIO_DFH(GEN7_COMMON_SLICE_CHICKEN1, D_ALL, F_MODE_MASK, NULL, NULL); + MMIO_DFH(CACHE_MODE_0, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x2124, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + + MMIO_DFH(0x20dc, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_3D_CHICKEN3, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x2088, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x20e4, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x2470, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(GAM_ECOCHK, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(GEN7_COMMON_SLICE_CHICKEN1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, + NULL, NULL); MMIO_DFH(COMMON_SLICE_CHICKEN2, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_D(0x9030, D_ALL); - MMIO_D(0x20a0, D_ALL); - MMIO_D(0x2420, D_ALL); - MMIO_D(0x2430, D_ALL); - MMIO_D(0x2434, D_ALL); - MMIO_D(0x2438, D_ALL); - MMIO_D(0x243c, D_ALL); - MMIO_DFH(0x7018, D_ALL, F_MODE_MASK, NULL, NULL); + MMIO_DFH(0x9030, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x20a0, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x2420, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x2430, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x2434, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x2438, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x243c, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x7018, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(HALF_SLICE_CHICKEN3, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0xe100, D_ALL, F_MODE_MASK, NULL, NULL); + MMIO_DFH(GEN7_HALF_SLICE_CHICKEN1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); /* display */ MMIO_F(0x60220, 0x20, 0, 0, 0, D_ALL, NULL, NULL); @@ -2022,8 +2160,8 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_D(FORCEWAKE_ACK, D_ALL); MMIO_D(GEN6_GT_CORE_STATUS, D_ALL); MMIO_D(GEN6_GT_THREAD_STATUS_REG, D_ALL); - MMIO_D(GTFIFODBG, D_ALL); - MMIO_D(GTFIFOCTL, D_ALL); + MMIO_DFH(GTFIFODBG, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(GTFIFOCTL, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DH(FORCEWAKE_MT, D_PRE_SKL, NULL, mul_force_wake_write); MMIO_DH(FORCEWAKE_ACK_HSW, D_HSW | D_BDW, NULL, NULL); MMIO_D(ECOBUS, D_ALL); @@ -2080,7 +2218,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_F(0x4f000, 0x90, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_D(GEN6_PCODE_MAILBOX, D_PRE_SKL); + MMIO_D(GEN6_PCODE_MAILBOX, D_PRE_BDW); MMIO_D(GEN6_PCODE_DATA, D_ALL); MMIO_D(0x13812c, D_ALL); MMIO_DH(GEN7_ERR_INT, D_ALL, NULL, NULL); @@ -2159,36 +2297,35 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_D(0x1a054, D_ALL); MMIO_D(0x44070, D_ALL); - - MMIO_D(0x215c, D_HSW_PLUS); + MMIO_DFH(0x215c, D_HSW_PLUS, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x2178, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x217c, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x12178, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x1217c, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_F(0x2290, 8, 0, 0, 0, D_HSW_PLUS, NULL, NULL); - MMIO_D(GEN7_OACONTROL, D_HSW); + MMIO_F(0x2290, 8, F_CMD_ACCESS, 0, 0, D_HSW_PLUS, NULL, NULL); + MMIO_DFH(GEN7_OACONTROL, D_HSW, F_CMD_ACCESS, NULL, NULL); MMIO_D(0x2b00, D_BDW_PLUS); MMIO_D(0x2360, D_BDW_PLUS); - MMIO_F(0x5200, 32, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(0x5240, 32, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(0x5280, 16, 0, 0, 0, D_ALL, NULL, NULL); + MMIO_F(0x5200, 32, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); + MMIO_F(0x5240, 32, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); + MMIO_F(0x5280, 16, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); MMIO_DFH(0x1c17c, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x1c178, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_D(BCS_SWCTRL, D_ALL); - - MMIO_F(HS_INVOCATION_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(DS_INVOCATION_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(IA_VERTICES_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(IA_PRIMITIVES_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(VS_INVOCATION_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(GS_INVOCATION_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(GS_PRIMITIVES_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(CL_INVOCATION_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(CL_PRIMITIVES_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(PS_INVOCATION_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(PS_DEPTH_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL); + MMIO_DFH(BCS_SWCTRL, D_ALL, F_CMD_ACCESS, NULL, NULL); + + MMIO_F(HS_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); + MMIO_F(DS_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); + MMIO_F(IA_VERTICES_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); + MMIO_F(IA_PRIMITIVES_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); + MMIO_F(VS_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); + MMIO_F(GS_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); + MMIO_F(GS_PRIMITIVES_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); + MMIO_F(CL_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); + MMIO_F(CL_PRIMITIVES_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); + MMIO_F(PS_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); + MMIO_F(PS_DEPTH_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); MMIO_DH(0x4260, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler); MMIO_DH(0x4264, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler); MMIO_DH(0x4268, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler); @@ -2196,6 +2333,17 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_DH(0x4270, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler); MMIO_DFH(0x4094, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(ARB_MODE, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_RING_GM_RDR(RING_BBADDR, D_ALL, NULL, NULL); + MMIO_DFH(0x2220, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x12220, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x22220, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_RING_DFH(RING_SYNC_1, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_RING_DFH(RING_SYNC_0, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x22178, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x1a178, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x1a17c, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x2217c, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); return 0; } @@ -2204,7 +2352,7 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) struct drm_i915_private *dev_priv = gvt->dev_priv; int ret; - MMIO_DH(RING_IMR(GEN8_BSD2_RING_BASE), D_BDW_PLUS, NULL, + MMIO_DFH(RING_IMR(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS, NULL, intel_vgpu_reg_imr_handler); MMIO_DH(GEN8_GT_IMR(0), D_BDW_PLUS, NULL, intel_vgpu_reg_imr_handler); @@ -2269,24 +2417,31 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_DH(GEN8_MASTER_IRQ, D_BDW_PLUS, NULL, intel_vgpu_reg_master_irq_handler); - MMIO_D(RING_HWSTAM(GEN8_BSD2_RING_BASE), D_BDW_PLUS); - MMIO_D(0x1c134, D_BDW_PLUS); - - MMIO_D(RING_TAIL(GEN8_BSD2_RING_BASE), D_BDW_PLUS); - MMIO_D(RING_HEAD(GEN8_BSD2_RING_BASE), D_BDW_PLUS); - MMIO_GM(RING_START(GEN8_BSD2_RING_BASE), D_BDW_PLUS, NULL, NULL); - MMIO_D(RING_CTL(GEN8_BSD2_RING_BASE), D_BDW_PLUS); - MMIO_D(RING_ACTHD(GEN8_BSD2_RING_BASE), D_BDW_PLUS); - MMIO_D(RING_ACTHD_UDW(GEN8_BSD2_RING_BASE), D_BDW_PLUS); - MMIO_DFH(0x1c29c, D_BDW_PLUS, F_MODE_MASK, NULL, ring_mode_mmio_write); - MMIO_DFH(RING_MI_MODE(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_MODE_MASK, - NULL, NULL); - MMIO_DFH(RING_INSTPM(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_MODE_MASK, - NULL, NULL); + MMIO_DFH(RING_HWSTAM(GEN8_BSD2_RING_BASE), D_BDW_PLUS, + F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x1c134, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + + MMIO_DFH(RING_TAIL(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS, + NULL, NULL); + MMIO_DFH(RING_HEAD(GEN8_BSD2_RING_BASE), D_BDW_PLUS, + F_CMD_ACCESS, NULL, NULL); + MMIO_GM_RDR(RING_START(GEN8_BSD2_RING_BASE), D_BDW_PLUS, NULL, NULL); + MMIO_DFH(RING_CTL(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS, + NULL, NULL); + MMIO_DFH(RING_ACTHD(GEN8_BSD2_RING_BASE), D_BDW_PLUS, + F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(RING_ACTHD_UDW(GEN8_BSD2_RING_BASE), D_BDW_PLUS, + F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x1c29c, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, + ring_mode_mmio_write); + MMIO_DFH(RING_MI_MODE(GEN8_BSD2_RING_BASE), D_BDW_PLUS, + F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(RING_INSTPM(GEN8_BSD2_RING_BASE), D_BDW_PLUS, + F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(RING_TIMESTAMP(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS, ring_timestamp_mmio_read, NULL); - MMIO_RING_D(RING_ACTHD_UDW, D_BDW_PLUS); + MMIO_RING_DFH(RING_ACTHD_UDW, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); #define RING_REG(base) (base + 0xd0) MMIO_RING_F(RING_REG, 4, F_RO, 0, @@ -2303,13 +2458,16 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) #undef RING_REG #define RING_REG(base) (base + 0x234) - MMIO_RING_F(RING_REG, 8, F_RO, 0, ~0, D_BDW_PLUS, NULL, NULL); - MMIO_F(RING_REG(GEN8_BSD2_RING_BASE), 4, F_RO, 0, ~0LL, D_BDW_PLUS, NULL, NULL); + MMIO_RING_F(RING_REG, 8, F_RO | F_CMD_ACCESS, 0, ~0, D_BDW_PLUS, + NULL, NULL); + MMIO_F(RING_REG(GEN8_BSD2_RING_BASE), 4, F_RO | F_CMD_ACCESS, 0, + ~0LL, D_BDW_PLUS, NULL, NULL); #undef RING_REG #define RING_REG(base) (base + 0x244) - MMIO_RING_D(RING_REG, D_BDW_PLUS); - MMIO_D(RING_REG(GEN8_BSD2_RING_BASE), D_BDW_PLUS); + MMIO_RING_DFH(RING_REG, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(RING_REG(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS, + NULL, NULL); #undef RING_REG #define RING_REG(base) (base + 0x370) @@ -2331,6 +2489,8 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_D(GEN7_MISCCPCTL, D_BDW_PLUS); MMIO_D(0x1c054, D_BDW_PLUS); + MMIO_DH(GEN6_PCODE_MAILBOX, D_BDW_PLUS, NULL, mailbox_write); + MMIO_D(GEN8_PRIVATE_PAT_LO, D_BDW_PLUS); MMIO_D(GEN8_PRIVATE_PAT_HI, D_BDW_PLUS); @@ -2341,14 +2501,14 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_F(RING_REG(GEN8_BSD2_RING_BASE), 32, 0, 0, 0, D_BDW_PLUS, NULL, NULL); #undef RING_REG - MMIO_RING_GM(RING_HWS_PGA, D_BDW_PLUS, NULL, NULL); - MMIO_GM(0x1c080, D_BDW_PLUS, NULL, NULL); + MMIO_RING_GM_RDR(RING_HWS_PGA, D_BDW_PLUS, NULL, NULL); + MMIO_GM_RDR(RING_HWS_PGA(GEN8_BSD2_RING_BASE), D_BDW_PLUS, NULL, NULL); MMIO_DFH(HDC_CHICKEN0, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_D(CHICKEN_PIPESL_1(PIPE_A), D_BDW); - MMIO_D(CHICKEN_PIPESL_1(PIPE_B), D_BDW); - MMIO_D(CHICKEN_PIPESL_1(PIPE_C), D_BDW); + MMIO_D(CHICKEN_PIPESL_1(PIPE_A), D_BDW_PLUS); + MMIO_D(CHICKEN_PIPESL_1(PIPE_B), D_BDW_PLUS); + MMIO_D(CHICKEN_PIPESL_1(PIPE_C), D_BDW_PLUS); MMIO_D(WM_MISC, D_BDW); MMIO_D(BDW_EDP_PSR_BASE, D_BDW); @@ -2362,27 +2522,31 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_D(GEN8_EU_DISABLE1, D_BDW_PLUS); MMIO_D(GEN8_EU_DISABLE2, D_BDW_PLUS); - MMIO_D(0xfdc, D_BDW); - MMIO_DFH(GEN8_ROW_CHICKEN, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_D(GEN7_ROW_CHICKEN2, D_BDW_PLUS); - MMIO_D(GEN8_UCGCTL6, D_BDW_PLUS); + MMIO_D(0xfdc, D_BDW_PLUS); + MMIO_DFH(GEN8_ROW_CHICKEN, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, + NULL, NULL); + MMIO_DFH(GEN7_ROW_CHICKEN2, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, + NULL, NULL); + MMIO_DFH(GEN8_UCGCTL6, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_D(0xb1f0, D_BDW); - MMIO_D(0xb1c0, D_BDW); + MMIO_DFH(0xb1f0, D_BDW, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0xb1c0, D_BDW, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(GEN8_L3SQCREG4, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_D(0xb100, D_BDW); - MMIO_D(0xb10c, D_BDW); + MMIO_DFH(0xb100, D_BDW, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0xb10c, D_BDW, F_CMD_ACCESS, NULL, NULL); MMIO_D(0xb110, D_BDW); - MMIO_DFH(0x24d0, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x24d4, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x24d8, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x24dc, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_F(0x24d0, 48, F_CMD_ACCESS, 0, 0, D_BDW_PLUS, + NULL, force_nonpriv_write); + + MMIO_D(0x22040, D_BDW_PLUS); + MMIO_D(0x44484, D_BDW_PLUS); + MMIO_D(0x4448c, D_BDW_PLUS); - MMIO_D(0x83a4, D_BDW); + MMIO_DFH(0x83a4, D_BDW, F_CMD_ACCESS, NULL, NULL); MMIO_D(GEN8_L3_LRA_1_GPGPU, D_BDW_PLUS); - MMIO_D(0x8430, D_BDW); + MMIO_DFH(0x8430, D_BDW, F_CMD_ACCESS, NULL, NULL); MMIO_D(0x110000, D_BDW_PLUS); @@ -2394,10 +2558,19 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_DFH(0xe194, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0xe188, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(HALF_SLICE_CHICKEN2, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x2580, D_BDW_PLUS, F_MODE_MASK, NULL, NULL); - - MMIO_D(0x2248, D_BDW); - + MMIO_DFH(0x2580, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + + MMIO_DFH(0x2248, D_BDW, F_CMD_ACCESS, NULL, NULL); + + MMIO_DFH(0xe220, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0xe230, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0xe240, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0xe260, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0xe270, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0xe280, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0xe2a0, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0xe2b0, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0xe2c0, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); return 0; } @@ -2420,7 +2593,6 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_D(HSW_PWR_WELL_BIOS, D_SKL); MMIO_DH(HSW_PWR_WELL_DRIVER, D_SKL, NULL, skl_power_well_ctl_write); - MMIO_DH(GEN6_PCODE_MAILBOX, D_SKL, NULL, mailbox_write); MMIO_D(0xa210, D_SKL_PLUS); MMIO_D(GEN9_MEDIA_PG_IDLE_HYSTERESIS, D_SKL_PLUS); MMIO_D(GEN9_RENDER_PG_IDLE_HYSTERESIS, D_SKL_PLUS); @@ -2578,16 +2750,16 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_F(0xb020, 0x80, F_CMD_ACCESS, 0, 0, D_SKL, NULL, NULL); MMIO_D(0xd08, D_SKL); - MMIO_D(0x20e0, D_SKL); - MMIO_D(0x20ec, D_SKL); + MMIO_DFH(0x20e0, D_SKL, F_MODE_MASK, NULL, NULL); + MMIO_DFH(0x20ec, D_SKL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); /* TRTT */ - MMIO_D(0x4de0, D_SKL); - MMIO_D(0x4de4, D_SKL); - MMIO_D(0x4de8, D_SKL); - MMIO_D(0x4dec, D_SKL); - MMIO_D(0x4df0, D_SKL); - MMIO_DH(0x4df4, D_SKL, NULL, gen9_trtte_write); + MMIO_DFH(0x4de0, D_SKL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x4de4, D_SKL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x4de8, D_SKL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x4dec, D_SKL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x4df0, D_SKL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x4df4, D_SKL, F_CMD_ACCESS, NULL, gen9_trtte_write); MMIO_DH(0x4dfc, D_SKL, NULL, gen9_trtt_chicken_write); MMIO_D(0x45008, D_SKL); @@ -2611,7 +2783,7 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_D(0x65f08, D_SKL); MMIO_D(0x320f0, D_SKL); - MMIO_D(_REG_VCS2_EXCC, D_SKL); + MMIO_DFH(_REG_VCS2_EXCC, D_SKL, F_CMD_ACCESS, NULL, NULL); MMIO_D(0x70034, D_SKL); MMIO_D(0x71034, D_SKL); MMIO_D(0x72034, D_SKL); @@ -2624,6 +2796,9 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_D(_PLANE_KEYMSK_1(PIPE_C), D_SKL); MMIO_D(0x44500, D_SKL); + MMIO_DFH(GEN9_CSFE_CHICKEN1_RCS, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(GEN8_HDC_CHICKEN1, D_SKL, F_MODE_MASK | F_CMD_ACCESS, + NULL, NULL); return 0; } diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 0f7f5d97f582..84d801638ede 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -96,10 +96,10 @@ static int gvt_dma_map_iova(struct intel_vgpu *vgpu, kvm_pfn_t pfn, struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev; dma_addr_t daddr; - page = pfn_to_page(pfn); - if (is_error_page(page)) + if (unlikely(!pfn_valid(pfn))) return -EFAULT; + page = pfn_to_page(pfn); daddr = dma_map_page(dev, page, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, daddr)) @@ -295,10 +295,10 @@ static ssize_t description_show(struct kobject *kobj, struct device *dev, return 0; return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n" - "fence: %d\n", - BYTES_TO_MB(type->low_gm_size), - BYTES_TO_MB(type->high_gm_size), - type->fence); + "fence: %d\nresolution: %s\n", + BYTES_TO_MB(type->low_gm_size), + BYTES_TO_MB(type->high_gm_size), + type->fence, vgpu_edid_str(type->resolution)); } static MDEV_TYPE_ATTR_RO(available_instances); diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c index 4df078bc5d04..60b698cb8365 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.c +++ b/drivers/gpu/drm/i915/gvt/mmio.c @@ -57,6 +57,58 @@ int intel_vgpu_gpa_to_mmio_offset(struct intel_vgpu *vgpu, u64 gpa) (reg >= gvt->device_info.gtt_start_offset \ && reg < gvt->device_info.gtt_start_offset + gvt_ggtt_sz(gvt)) +static void failsafe_emulate_mmio_rw(struct intel_vgpu *vgpu, uint64_t pa, + void *p_data, unsigned int bytes, bool read) +{ + struct intel_gvt *gvt = NULL; + void *pt = NULL; + unsigned int offset = 0; + + if (!vgpu || !p_data) + return; + + gvt = vgpu->gvt; + mutex_lock(&gvt->lock); + offset = intel_vgpu_gpa_to_mmio_offset(vgpu, pa); + if (reg_is_mmio(gvt, offset)) { + if (read) + intel_vgpu_default_mmio_read(vgpu, offset, p_data, + bytes); + else + intel_vgpu_default_mmio_write(vgpu, offset, p_data, + bytes); + } else if (reg_is_gtt(gvt, offset) && + vgpu->gtt.ggtt_mm->virtual_page_table) { + offset -= gvt->device_info.gtt_start_offset; + pt = vgpu->gtt.ggtt_mm->virtual_page_table + offset; + if (read) + memcpy(p_data, pt, bytes); + else + memcpy(pt, p_data, bytes); + + } else if (atomic_read(&vgpu->gtt.n_write_protected_guest_page)) { + struct intel_vgpu_guest_page *gp; + + /* Since we enter the failsafe mode early during guest boot, + * guest may not have chance to set up its ppgtt table, so + * there should not be any wp pages for guest. Keep the wp + * related code here in case we need to handle it in furture. + */ + gp = intel_vgpu_find_guest_page(vgpu, pa >> PAGE_SHIFT); + if (gp) { + /* remove write protection to prevent furture traps */ + intel_vgpu_clean_guest_page(vgpu, gp); + if (read) + intel_gvt_hypervisor_read_gpa(vgpu, pa, + p_data, bytes); + else + intel_gvt_hypervisor_write_gpa(vgpu, pa, + p_data, bytes); + } + } + mutex_unlock(&gvt->lock); +} + /** * intel_vgpu_emulate_mmio_read - emulate MMIO read * @vgpu: a vGPU @@ -75,6 +127,11 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa, unsigned int offset = 0; int ret = -EINVAL; + + if (vgpu->failsafe) { + failsafe_emulate_mmio_rw(vgpu, pa, p_data, bytes, true); + return 0; + } mutex_lock(&gvt->lock); if (atomic_read(&vgpu->gtt.n_write_protected_guest_page)) { @@ -188,6 +245,11 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa, u32 old_vreg = 0, old_sreg = 0; int ret = -EINVAL; + if (vgpu->failsafe) { + failsafe_emulate_mmio_rw(vgpu, pa, p_data, bytes, false); + return 0; + } + mutex_lock(&gvt->lock); if (atomic_read(&vgpu->gtt.n_write_protected_guest_page)) { @@ -236,7 +298,7 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa, mmio = intel_gvt_find_mmio_info(gvt, rounddown(offset, 4)); if (!mmio && !vgpu->mmio.disable_warn_untrack) - gvt_err("vgpu%d: write untracked MMIO %x len %d val %x\n", + gvt_dbg_mmio("vgpu%d: write untracked MMIO %x len %d val %x\n", vgpu->id, offset, bytes, *(u32 *)p_data); if (!intel_gvt_mmio_is_unalign(gvt, offset)) { @@ -322,6 +384,8 @@ void intel_vgpu_reset_mmio(struct intel_vgpu *vgpu) /* set the bit 0:2(Core C-State ) to C0 */ vgpu_vreg(vgpu, GEN6_GT_CORE_STATUS) = 0; + + vgpu->mmio.disable_warn_untrack = false; } /** diff --git a/drivers/gpu/drm/i915/gvt/opregion.c b/drivers/gpu/drm/i915/gvt/opregion.c index d9fb41ab7119..5d1caf9daba9 100644 --- a/drivers/gpu/drm/i915/gvt/opregion.c +++ b/drivers/gpu/drm/i915/gvt/opregion.c @@ -27,7 +27,6 @@ static int init_vgpu_opregion(struct intel_vgpu *vgpu, u32 gpa) { - void __iomem *host_va = vgpu->gvt->opregion.opregion_va; u8 *buf; int i; @@ -43,8 +42,8 @@ static int init_vgpu_opregion(struct intel_vgpu *vgpu, u32 gpa) if (!vgpu_opregion(vgpu)->va) return -ENOMEM; - memcpy_fromio(vgpu_opregion(vgpu)->va, host_va, - INTEL_GVT_OPREGION_SIZE); + memcpy(vgpu_opregion(vgpu)->va, vgpu->gvt->opregion.opregion_va, + INTEL_GVT_OPREGION_SIZE); for (i = 0; i < INTEL_GVT_OPREGION_PAGES; i++) vgpu_opregion(vgpu)->gfn[i] = (gpa >> PAGE_SHIFT) + i; diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c index 2b3a642284b6..73f052a4f424 100644 --- a/drivers/gpu/drm/i915/gvt/render.c +++ b/drivers/gpu/drm/i915/gvt/render.c @@ -53,6 +53,14 @@ static struct render_mmio gen8_render_mmio_list[] = { {RCS, _MMIO(0x24d4), 0, false}, {RCS, _MMIO(0x24d8), 0, false}, {RCS, _MMIO(0x24dc), 0, false}, + {RCS, _MMIO(0x24e0), 0, false}, + {RCS, _MMIO(0x24e4), 0, false}, + {RCS, _MMIO(0x24e8), 0, false}, + {RCS, _MMIO(0x24ec), 0, false}, + {RCS, _MMIO(0x24f0), 0, false}, + {RCS, _MMIO(0x24f4), 0, false}, + {RCS, _MMIO(0x24f8), 0, false}, + {RCS, _MMIO(0x24fc), 0, false}, {RCS, _MMIO(0x7004), 0xffff, true}, {RCS, _MMIO(0x7008), 0xffff, true}, {RCS, _MMIO(0x7000), 0xffff, true}, @@ -76,6 +84,14 @@ static struct render_mmio gen9_render_mmio_list[] = { {RCS, _MMIO(0x24d4), 0, false}, {RCS, _MMIO(0x24d8), 0, false}, {RCS, _MMIO(0x24dc), 0, false}, + {RCS, _MMIO(0x24e0), 0, false}, + {RCS, _MMIO(0x24e4), 0, false}, + {RCS, _MMIO(0x24e8), 0, false}, + {RCS, _MMIO(0x24ec), 0, false}, + {RCS, _MMIO(0x24f0), 0, false}, + {RCS, _MMIO(0x24f4), 0, false}, + {RCS, _MMIO(0x24f8), 0, false}, + {RCS, _MMIO(0x24fc), 0, false}, {RCS, _MMIO(0x7004), 0xffff, true}, {RCS, _MMIO(0x7008), 0xffff, true}, {RCS, _MMIO(0x7000), 0xffff, true}, diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index d6b6d0efdd1a..d3a56c949025 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -139,6 +139,9 @@ static int shadow_context_status_change(struct notifier_block *nb, struct intel_vgpu_workload *workload = scheduler->current_workload[req->engine->id]; + if (unlikely(!workload)) + return NOTIFY_OK; + switch (action) { case INTEL_CONTEXT_SCHEDULE_IN: intel_gvt_load_render_mmio(workload->vgpu, @@ -148,6 +151,15 @@ static int shadow_context_status_change(struct notifier_block *nb, case INTEL_CONTEXT_SCHEDULE_OUT: intel_gvt_restore_render_mmio(workload->vgpu, workload->ring_id); + /* If the status is -EINPROGRESS means this workload + * doesn't meet any issue during dispatching so when + * get the SCHEDULE_OUT set the status to be zero for + * good. If the status is NOT -EINPROGRESS means there + * is something wrong happened during dispatching and + * the status should not be set to zero + */ + if (workload->status == -EINPROGRESS) + workload->status = 0; atomic_set(&workload->shadow_ctx_active, 0); break; default: @@ -359,15 +371,23 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) workload = scheduler->current_workload[ring_id]; vgpu = workload->vgpu; - if (!workload->status && !vgpu->resetting) { + /* For the workload w/ request, needs to wait for the context + * switch to make sure request is completed. + * For the workload w/o request, directly complete the workload. + */ + if (workload->req) { wait_event(workload->shadow_ctx_status_wq, !atomic_read(&workload->shadow_ctx_active)); - update_guest_context(workload); + i915_gem_request_put(fetch_and_zero(&workload->req)); + + if (!workload->status && !vgpu->resetting) { + update_guest_context(workload); - for_each_set_bit(event, workload->pending_events, - INTEL_GVT_EVENT_MAX) - intel_vgpu_trigger_virtual_event(vgpu, event); + for_each_set_bit(event, workload->pending_events, + INTEL_GVT_EVENT_MAX) + intel_vgpu_trigger_virtual_event(vgpu, event); + } } gvt_dbg_sched("ring id %d complete workload %p status %d\n", @@ -397,7 +417,6 @@ static int workload_thread(void *priv) int ring_id = p->ring_id; struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; struct intel_vgpu_workload *workload = NULL; - long lret; int ret; bool need_force_wake = IS_SKYLAKE(gvt->dev_priv); DEFINE_WAIT_FUNC(wait, woken_wake_function); @@ -446,23 +465,24 @@ static int workload_thread(void *priv) gvt_dbg_sched("ring id %d wait workload %p\n", workload->ring_id, workload); - - lret = i915_wait_request(workload->req, +retry: + i915_wait_request(workload->req, 0, MAX_SCHEDULE_TIMEOUT); - if (lret < 0) { - workload->status = lret; - gvt_err("fail to wait workload, skip\n"); - } else { - workload->status = 0; + /* I915 has replay mechanism and a request will be replayed + * if there is i915 reset. So the seqno will be updated anyway. + * If the seqno is not updated yet after waiting, which means + * the replay may still be in progress and we can wait again. + */ + if (!i915_gem_request_completed(workload->req)) { + gvt_dbg_sched("workload %p not completed, wait again\n", + workload); + goto retry; } complete: gvt_dbg_sched("will complete workload %p, status: %d\n", workload, workload->status); - if (workload->req) - i915_gem_request_put(fetch_and_zero(&workload->req)); - complete_current_workload(gvt, ring_id); if (need_force_wake) diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 95a97aa0051e..41cfa5ccae84 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -64,6 +64,20 @@ void populate_pvinfo_page(struct intel_vgpu *vgpu) WARN_ON(sizeof(struct vgt_if) != VGT_PVINFO_SIZE); } +static struct { + unsigned int low_mm; + unsigned int high_mm; + unsigned int fence; + enum intel_vgpu_edid edid; + char *name; +} vgpu_types[] = { +/* Fixed vGPU type table */ + { MB_TO_BYTES(64), MB_TO_BYTES(512), 4, GVT_EDID_1024_768, "8" }, + { MB_TO_BYTES(128), MB_TO_BYTES(512), 4, GVT_EDID_1920_1200, "4" }, + { MB_TO_BYTES(256), MB_TO_BYTES(1024), 4, GVT_EDID_1920_1200, "2" }, + { MB_TO_BYTES(512), MB_TO_BYTES(2048), 4, GVT_EDID_1920_1200, "1" }, +}; + /** * intel_gvt_init_vgpu_types - initialize vGPU type list * @gvt : GVT device @@ -78,9 +92,8 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt) unsigned int min_low; /* vGPU type name is defined as GVTg_Vx_y which contains - * physical GPU generation type and 'y' means maximum vGPU - * instances user can create on one physical GPU for this - * type. + * physical GPU generation type (e.g V4 as BDW server, V5 as + * SKL server). * * Depend on physical SKU resource, might see vGPU types like * GVTg_V4_8, GVTg_V4_4, GVTg_V4_2, etc. We can create @@ -92,7 +105,7 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt) */ low_avail = gvt_aperture_sz(gvt) - HOST_LOW_GM_SIZE; high_avail = gvt_hidden_sz(gvt) - HOST_HIGH_GM_SIZE; - num_types = 4; + num_types = sizeof(vgpu_types) / sizeof(vgpu_types[0]); gvt->types = kzalloc(num_types * sizeof(struct intel_vgpu_type), GFP_KERNEL); @@ -101,28 +114,29 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt) min_low = MB_TO_BYTES(32); for (i = 0; i < num_types; ++i) { - if (low_avail / min_low == 0) + if (low_avail / vgpu_types[i].low_mm == 0) break; - gvt->types[i].low_gm_size = min_low; - gvt->types[i].high_gm_size = max((min_low<<3), MB_TO_BYTES(384U)); - gvt->types[i].fence = 4; - gvt->types[i].max_instance = min(low_avail / min_low, - high_avail / gvt->types[i].high_gm_size); - gvt->types[i].avail_instance = gvt->types[i].max_instance; + + gvt->types[i].low_gm_size = vgpu_types[i].low_mm; + gvt->types[i].high_gm_size = vgpu_types[i].high_mm; + gvt->types[i].fence = vgpu_types[i].fence; + gvt->types[i].resolution = vgpu_types[i].edid; + gvt->types[i].avail_instance = min(low_avail / vgpu_types[i].low_mm, + high_avail / vgpu_types[i].high_mm); if (IS_GEN8(gvt->dev_priv)) - sprintf(gvt->types[i].name, "GVTg_V4_%u", - gvt->types[i].max_instance); + sprintf(gvt->types[i].name, "GVTg_V4_%s", + vgpu_types[i].name); else if (IS_GEN9(gvt->dev_priv)) - sprintf(gvt->types[i].name, "GVTg_V5_%u", - gvt->types[i].max_instance); + sprintf(gvt->types[i].name, "GVTg_V5_%s", + vgpu_types[i].name); - min_low <<= 1; - gvt_dbg_core("type[%d]: %s max %u avail %u low %u high %u fence %u\n", - i, gvt->types[i].name, gvt->types[i].max_instance, + gvt_dbg_core("type[%d]: %s avail %u low %u high %u fence %u res %s\n", + i, gvt->types[i].name, gvt->types[i].avail_instance, gvt->types[i].low_gm_size, - gvt->types[i].high_gm_size, gvt->types[i].fence); + gvt->types[i].high_gm_size, gvt->types[i].fence, + vgpu_edid_str(gvt->types[i].resolution)); } gvt->num_types = i; @@ -138,7 +152,7 @@ static void intel_gvt_update_vgpu_types(struct intel_gvt *gvt) { int i; unsigned int low_gm_avail, high_gm_avail, fence_avail; - unsigned int low_gm_min, high_gm_min, fence_min, total_min; + unsigned int low_gm_min, high_gm_min, fence_min; /* Need to depend on maxium hw resource size but keep on * static config for now. @@ -154,12 +168,11 @@ static void intel_gvt_update_vgpu_types(struct intel_gvt *gvt) low_gm_min = low_gm_avail / gvt->types[i].low_gm_size; high_gm_min = high_gm_avail / gvt->types[i].high_gm_size; fence_min = fence_avail / gvt->types[i].fence; - total_min = min(min(low_gm_min, high_gm_min), fence_min); - gvt->types[i].avail_instance = min(gvt->types[i].max_instance, - total_min); + gvt->types[i].avail_instance = min(min(low_gm_min, high_gm_min), + fence_min); - gvt_dbg_core("update type[%d]: %s max %u avail %u low %u high %u fence %u\n", - i, gvt->types[i].name, gvt->types[i].max_instance, + gvt_dbg_core("update type[%d]: %s avail %u low %u high %u fence %u\n", + i, gvt->types[i].name, gvt->types[i].avail_instance, gvt->types[i].low_gm_size, gvt->types[i].high_gm_size, gvt->types[i].fence); } @@ -248,7 +261,7 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, if (ret) goto out_detach_hypervisor_vgpu; - ret = intel_vgpu_init_display(vgpu); + ret = intel_vgpu_init_display(vgpu, param->resolution); if (ret) goto out_clean_gtt; @@ -312,6 +325,7 @@ struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt, param.low_gm_sz = type->low_gm_size; param.high_gm_sz = type->high_gm_size; param.fence_sz = type->fence; + param.resolution = type->resolution; /* XXX current param based on MB */ param.low_gm_sz = BYTES_TO_MB(param.low_gm_sz); @@ -387,8 +401,12 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr, populate_pvinfo_page(vgpu); intel_vgpu_reset_display(vgpu); - if (dmlr) + if (dmlr) { intel_vgpu_reset_cfg_space(vgpu); + /* only reset the failsafe mode when dmlr reset */ + vgpu->failsafe = false; + vgpu->pv_notified = false; + } } vgpu->resetting = false; diff --git a/drivers/gpu/drm/mxsfb/mxsfb_crtc.c b/drivers/gpu/drm/mxsfb/mxsfb_crtc.c index e10a4eda4078..1144e0c9e894 100644 --- a/drivers/gpu/drm/mxsfb/mxsfb_crtc.c +++ b/drivers/gpu/drm/mxsfb/mxsfb_crtc.c @@ -65,13 +65,11 @@ static int mxsfb_set_pixel_fmt(struct mxsfb_drm_private *mxsfb) switch (format) { case DRM_FORMAT_RGB565: dev_dbg(drm->dev, "Setting up RGB565 mode\n"); - ctrl |= CTRL_SET_BUS_WIDTH(STMLCDIF_16BIT); ctrl |= CTRL_SET_WORD_LENGTH(0); ctrl1 |= CTRL1_SET_BYTE_PACKAGING(0xf); break; case DRM_FORMAT_XRGB8888: dev_dbg(drm->dev, "Setting up XRGB8888 mode\n"); - ctrl |= CTRL_SET_BUS_WIDTH(STMLCDIF_24BIT); ctrl |= CTRL_SET_WORD_LENGTH(3); /* Do not use packed pixels = one pixel per word instead. */ ctrl1 |= CTRL1_SET_BYTE_PACKAGING(0x7); @@ -87,6 +85,36 @@ static int mxsfb_set_pixel_fmt(struct mxsfb_drm_private *mxsfb) return 0; } +static void mxsfb_set_bus_fmt(struct mxsfb_drm_private *mxsfb) +{ + struct drm_crtc *crtc = &mxsfb->pipe.crtc; + struct drm_device *drm = crtc->dev; + u32 bus_format = MEDIA_BUS_FMT_RGB888_1X24; + u32 reg; + + reg = readl(mxsfb->base + LCDC_CTRL); + + if (mxsfb->connector.display_info.num_bus_formats) + bus_format = mxsfb->connector.display_info.bus_formats[0]; + + reg &= ~CTRL_BUS_WIDTH_MASK; + switch (bus_format) { + case MEDIA_BUS_FMT_RGB565_1X16: + reg |= CTRL_SET_BUS_WIDTH(STMLCDIF_16BIT); + break; + case MEDIA_BUS_FMT_RGB666_1X18: + reg |= CTRL_SET_BUS_WIDTH(STMLCDIF_18BIT); + break; + case MEDIA_BUS_FMT_RGB888_1X24: + reg |= CTRL_SET_BUS_WIDTH(STMLCDIF_24BIT); + break; + default: + dev_err(drm->dev, "Unknown media bus format %d\n", bus_format); + break; + } + writel(reg, mxsfb->base + LCDC_CTRL); +} + static void mxsfb_enable_controller(struct mxsfb_drm_private *mxsfb) { u32 reg; @@ -168,13 +196,22 @@ static void mxsfb_crtc_mode_set_nofb(struct mxsfb_drm_private *mxsfb) vdctrl0 |= VDCTRL0_HSYNC_ACT_HIGH; if (m->flags & DRM_MODE_FLAG_PVSYNC) vdctrl0 |= VDCTRL0_VSYNC_ACT_HIGH; - if (bus_flags & DRM_BUS_FLAG_DE_HIGH) + /* Make sure Data Enable is high active by default */ + if (!(bus_flags & DRM_BUS_FLAG_DE_LOW)) vdctrl0 |= VDCTRL0_ENABLE_ACT_HIGH; - if (bus_flags & DRM_BUS_FLAG_PIXDATA_NEGEDGE) + /* + * DRM_BUS_FLAG_PIXDATA_ defines are controller centric, + * controllers VDCTRL0_DOTCLK is display centric. + * Drive on positive edge -> display samples on falling edge + * DRM_BUS_FLAG_PIXDATA_POSEDGE -> VDCTRL0_DOTCLK_ACT_FALLING + */ + if (bus_flags & DRM_BUS_FLAG_PIXDATA_POSEDGE) vdctrl0 |= VDCTRL0_DOTCLK_ACT_FALLING; writel(vdctrl0, mxsfb->base + LCDC_VDCTRL0); + mxsfb_set_bus_fmt(mxsfb); + /* Frame length in lines. */ writel(m->crtc_vtotal, mxsfb->base + LCDC_VDCTRL1); @@ -184,8 +221,8 @@ static void mxsfb_crtc_mode_set_nofb(struct mxsfb_drm_private *mxsfb) VDCTRL2_SET_HSYNC_PERIOD(m->crtc_htotal), mxsfb->base + LCDC_VDCTRL2); - writel(SET_HOR_WAIT_CNT(m->crtc_hblank_end - m->crtc_hsync_end) | - SET_VERT_WAIT_CNT(m->crtc_vblank_end - m->crtc_vsync_end), + writel(SET_HOR_WAIT_CNT(m->crtc_htotal - m->crtc_hsync_start) | + SET_VERT_WAIT_CNT(m->crtc_vtotal - m->crtc_vsync_start), mxsfb->base + LCDC_VDCTRL3); writel(SET_DOTCLK_H_VALID_DATA_CNT(m->hdisplay), diff --git a/drivers/gpu/drm/mxsfb/mxsfb_drv.c b/drivers/gpu/drm/mxsfb/mxsfb_drv.c index cdfbe0284635..ff6d6a6f842e 100644 --- a/drivers/gpu/drm/mxsfb/mxsfb_drv.c +++ b/drivers/gpu/drm/mxsfb/mxsfb_drv.c @@ -102,14 +102,18 @@ static void mxsfb_pipe_enable(struct drm_simple_display_pipe *pipe, { struct mxsfb_drm_private *mxsfb = drm_pipe_to_mxsfb_drm_private(pipe); + drm_panel_prepare(mxsfb->panel); mxsfb_crtc_enable(mxsfb); + drm_panel_enable(mxsfb->panel); } static void mxsfb_pipe_disable(struct drm_simple_display_pipe *pipe) { struct mxsfb_drm_private *mxsfb = drm_pipe_to_mxsfb_drm_private(pipe); + drm_panel_disable(mxsfb->panel); mxsfb_crtc_disable(mxsfb); + drm_panel_unprepare(mxsfb->panel); } static void mxsfb_pipe_update(struct drm_simple_display_pipe *pipe, diff --git a/drivers/gpu/drm/mxsfb/mxsfb_out.c b/drivers/gpu/drm/mxsfb/mxsfb_out.c index fa8d17399407..b8e81422d4e2 100644 --- a/drivers/gpu/drm/mxsfb/mxsfb_out.c +++ b/drivers/gpu/drm/mxsfb/mxsfb_out.c @@ -112,6 +112,7 @@ static int mxsfb_attach_endpoint(struct drm_device *drm, int mxsfb_create_output(struct drm_device *drm) { + struct mxsfb_drm_private *mxsfb = drm->dev_private; struct device_node *ep_np = NULL; struct of_endpoint ep; int ret; @@ -127,5 +128,8 @@ int mxsfb_create_output(struct drm_device *drm) } } + if (!mxsfb->panel) + return -EPROBE_DEFER; + return 0; } diff --git a/drivers/gpu/drm/mxsfb/mxsfb_regs.h b/drivers/gpu/drm/mxsfb/mxsfb_regs.h index 31d62cd0d3d7..66a6ba9ec533 100644 --- a/drivers/gpu/drm/mxsfb/mxsfb_regs.h +++ b/drivers/gpu/drm/mxsfb/mxsfb_regs.h @@ -44,6 +44,7 @@ #define CTRL_DATA_SELECT (1 << 16) #define CTRL_SET_BUS_WIDTH(x) (((x) & 0x3) << 10) #define CTRL_GET_BUS_WIDTH(x) (((x) >> 10) & 0x3) +#define CTRL_BUS_WIDTH_MASK (0x3 << 10) #define CTRL_SET_WORD_LENGTH(x) (((x) & 0x3) << 8) #define CTRL_GET_WORD_LENGTH(x) (((x) >> 8) & 0x3) #define CTRL_MASTER (1 << 5) diff --git a/drivers/gpu/drm/rcar-du/rcar_du_vsp.c b/drivers/gpu/drm/rcar-du/rcar_du_vsp.c index b5bfbe50bd87..b0ff304ce3dc 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_vsp.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_vsp.c @@ -32,6 +32,10 @@ void rcar_du_vsp_enable(struct rcar_du_crtc *crtc) { const struct drm_display_mode *mode = &crtc->crtc.state->adjusted_mode; struct rcar_du_device *rcdu = crtc->group->dev; + struct vsp1_du_lif_config cfg = { + .width = mode->hdisplay, + .height = mode->vdisplay, + }; struct rcar_du_plane_state state = { .state = { .crtc = &crtc->crtc, @@ -66,12 +70,12 @@ void rcar_du_vsp_enable(struct rcar_du_crtc *crtc) */ crtc->group->need_restart = true; - vsp1_du_setup_lif(crtc->vsp->vsp, mode->hdisplay, mode->vdisplay); + vsp1_du_setup_lif(crtc->vsp->vsp, &cfg); } void rcar_du_vsp_disable(struct rcar_du_crtc *crtc) { - vsp1_du_setup_lif(crtc->vsp->vsp, 0, 0); + vsp1_du_setup_lif(crtc->vsp->vsp, NULL); } void rcar_du_vsp_atomic_begin(struct rcar_du_crtc *crtc) diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 81a80c82f1bd..bd0d1988feb2 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -543,7 +543,7 @@ static int vmbus_close_internal(struct vmbus_channel *channel) /* * In case a device driver's probe() fails (e.g., * util_probe() -> vmbus_open() returns -ENOMEM) and the device is - * rescinded later (e.g., we dynamically disble an Integrated Service + * rescinded later (e.g., we dynamically disable an Integrated Service * in Hyper-V Manager), the driver's remove() invokes vmbus_close(): * here we should skip most of the below cleanup work. */ diff --git a/drivers/irqchip/irq-crossbar.c b/drivers/irqchip/irq-crossbar.c index 1eef56a89b1f..05bbf171df37 100644 --- a/drivers/irqchip/irq-crossbar.c +++ b/drivers/irqchip/irq-crossbar.c @@ -198,7 +198,8 @@ static const struct irq_domain_ops crossbar_domain_ops = { static int __init crossbar_of_init(struct device_node *node) { - int i, size, max = 0, reserved = 0, entry; + int i, size, reserved = 0; + u32 max = 0, entry; const __be32 *irqsr; int ret = -ENOMEM; diff --git a/drivers/isdn/hisax/st5481_b.c b/drivers/isdn/hisax/st5481_b.c index 409849165838..f64a36007800 100644 --- a/drivers/isdn/hisax/st5481_b.c +++ b/drivers/isdn/hisax/st5481_b.c @@ -239,7 +239,7 @@ static void st5481B_mode(struct st5481_bcs *bcs, int mode) } } } else { - // Disble B channel interrupts + // Disable B channel interrupts st5481_usb_device_ctrl_msg(adapter, FFMSK_B1+(bcs->channel * 2), 0, NULL, NULL); // Disable B channel FIFOs diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index a126919ed102..5d13930f0f22 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h @@ -4,7 +4,6 @@ #include <linux/blkdev.h> #include <linux/errno.h> -#include <linux/blkdev.h> #include <linux/kernel.h> #include <linux/sched/clock.h> #include <linux/llist.h> diff --git a/drivers/media/dvb-frontends/drx39xyj/drx_driver.h b/drivers/media/dvb-frontends/drx39xyj/drx_driver.h index 7a681d8202c7..4442e478db72 100644 --- a/drivers/media/dvb-frontends/drx39xyj/drx_driver.h +++ b/drivers/media/dvb-frontends/drx39xyj/drx_driver.h @@ -256,8 +256,7 @@ int drxbsp_tuner_default_i2c_write_read(struct tuner_instance *tuner, * * The actual DAP implementation may be restricted to only one of the modes. * A compiler warning or error will be generated if the DAP implementation -* overides or cannot handle the mode defined below. -* +* overrides or cannot handle the mode defined below. */ #ifndef DRXDAP_SINGLE_MASTER #define DRXDAP_SINGLE_MASTER 1 @@ -272,7 +271,7 @@ int drxbsp_tuner_default_i2c_write_read(struct tuner_instance *tuner, * * This maximum size may be restricted by the actual DAP implementation. * A compiler warning or error will be generated if the DAP implementation -* overides or cannot handle the chunksize defined below. +* overrides or cannot handle the chunksize defined below. * * Beware that the DAP uses DRXDAP_MAX_WCHUNKSIZE to create a temporary data * buffer. Do not undefine or choose too large, unless your system is able to @@ -292,8 +291,7 @@ int drxbsp_tuner_default_i2c_write_read(struct tuner_instance *tuner, * * This maximum size may be restricted by the actual DAP implementation. * A compiler warning or error will be generated if the DAP implementation -* overides or cannot handle the chunksize defined below. -* +* overrides or cannot handle the chunksize defined below. */ #ifndef DRXDAP_MAX_RCHUNKSIZE #define DRXDAP_MAX_RCHUNKSIZE 60 diff --git a/drivers/media/platform/vsp1/vsp1_drm.c b/drivers/media/platform/vsp1/vsp1_drm.c index b4b583f7137a..b4c0f10fc3b0 100644 --- a/drivers/media/platform/vsp1/vsp1_drm.c +++ b/drivers/media/platform/vsp1/vsp1_drm.c @@ -54,12 +54,11 @@ EXPORT_SYMBOL_GPL(vsp1_du_init); /** * vsp1_du_setup_lif - Setup the output part of the VSP pipeline * @dev: the VSP device - * @width: output frame width in pixels - * @height: output frame height in pixels + * @cfg: the LIF configuration * - * Configure the output part of VSP DRM pipeline for the given frame @width and - * @height. This sets up formats on the BRU source pad, the WPF0 sink and source - * pads, and the LIF sink pad. + * Configure the output part of VSP DRM pipeline for the given frame @cfg.width + * and @cfg.height. This sets up formats on the BRU source pad, the WPF0 sink + * and source pads, and the LIF sink pad. * * As the media bus code on the BRU source pad is conditioned by the * configuration of the BRU sink 0 pad, we also set up the formats on all BRU @@ -69,8 +68,7 @@ EXPORT_SYMBOL_GPL(vsp1_du_init); * * Return 0 on success or a negative error code on failure. */ -int vsp1_du_setup_lif(struct device *dev, unsigned int width, - unsigned int height) +int vsp1_du_setup_lif(struct device *dev, const struct vsp1_du_lif_config *cfg) { struct vsp1_device *vsp1 = dev_get_drvdata(dev); struct vsp1_pipeline *pipe = &vsp1->drm->pipe; @@ -79,11 +77,8 @@ int vsp1_du_setup_lif(struct device *dev, unsigned int width, unsigned int i; int ret; - dev_dbg(vsp1->dev, "%s: configuring LIF with format %ux%u\n", - __func__, width, height); - - if (width == 0 || height == 0) { - /* Zero width or height means the CRTC is being disabled, stop + if (!cfg) { + /* NULL configuration means the CRTC is being disabled, stop * the pipeline and turn the light off. */ ret = vsp1_pipeline_stop(pipe); @@ -108,6 +103,9 @@ int vsp1_du_setup_lif(struct device *dev, unsigned int width, return 0; } + dev_dbg(vsp1->dev, "%s: configuring LIF with format %ux%u\n", + __func__, cfg->width, cfg->height); + /* Configure the format at the BRU sinks and propagate it through the * pipeline. */ @@ -117,8 +115,8 @@ int vsp1_du_setup_lif(struct device *dev, unsigned int width, for (i = 0; i < bru->entity.source_pad; ++i) { format.pad = i; - format.format.width = width; - format.format.height = height; + format.format.width = cfg->width; + format.format.height = cfg->height; format.format.code = MEDIA_BUS_FMT_ARGB8888_1X32; format.format.field = V4L2_FIELD_NONE; @@ -133,8 +131,8 @@ int vsp1_du_setup_lif(struct device *dev, unsigned int width, } format.pad = bru->entity.source_pad; - format.format.width = width; - format.format.height = height; + format.format.width = cfg->width; + format.format.height = cfg->height; format.format.code = MEDIA_BUS_FMT_ARGB8888_1X32; format.format.field = V4L2_FIELD_NONE; @@ -180,7 +178,8 @@ int vsp1_du_setup_lif(struct device *dev, unsigned int width, /* Verify that the format at the output of the pipeline matches the * requested frame size and media bus code. */ - if (format.format.width != width || format.format.height != height || + if (format.format.width != cfg->width || + format.format.height != cfg->height || format.format.code != MEDIA_BUS_FMT_ARGB8888_1X32) { dev_dbg(vsp1->dev, "%s: format mismatch\n", __func__); return -EPIPE; diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c index 393dccaabdd0..1688893a65bb 100644 --- a/drivers/media/rc/lirc_dev.c +++ b/drivers/media/rc/lirc_dev.c @@ -436,6 +436,8 @@ int lirc_dev_fop_open(struct inode *inode, struct file *file) return -ERESTARTSYS; ir = irctls[iminor(inode)]; + mutex_unlock(&lirc_dev_lock); + if (!ir) { retval = -ENODEV; goto error; @@ -476,8 +478,6 @@ int lirc_dev_fop_open(struct inode *inode, struct file *file) } error: - mutex_unlock(&lirc_dev_lock); - nonseekable_open(inode, file); return retval; diff --git a/drivers/media/rc/nuvoton-cir.c b/drivers/media/rc/nuvoton-cir.c index b109f8246b96..ec4b25bd2ec2 100644 --- a/drivers/media/rc/nuvoton-cir.c +++ b/drivers/media/rc/nuvoton-cir.c @@ -176,12 +176,13 @@ static void nvt_write_wakeup_codes(struct rc_dev *dev, { u8 tolerance, config; struct nvt_dev *nvt = dev->priv; + unsigned long flags; int i; /* hardcode the tolerance to 10% */ tolerance = DIV_ROUND_UP(count, 10); - spin_lock(&nvt->lock); + spin_lock_irqsave(&nvt->lock, flags); nvt_clear_cir_wake_fifo(nvt); nvt_cir_wake_reg_write(nvt, count, CIR_WAKE_FIFO_CMP_DEEP); @@ -203,7 +204,7 @@ static void nvt_write_wakeup_codes(struct rc_dev *dev, nvt_cir_wake_reg_write(nvt, config, CIR_WAKE_IRCON); - spin_unlock(&nvt->lock); + spin_unlock_irqrestore(&nvt->lock, flags); } static ssize_t wakeup_data_show(struct device *dev, diff --git a/drivers/media/rc/rc-main.c b/drivers/media/rc/rc-main.c index 2424946740e6..d84533699668 100644 --- a/drivers/media/rc/rc-main.c +++ b/drivers/media/rc/rc-main.c @@ -1663,6 +1663,7 @@ static int rc_setup_rx_device(struct rc_dev *dev) { int rc; struct rc_map *rc_map; + u64 rc_type; if (!dev->map_name) return -EINVAL; @@ -1677,15 +1678,18 @@ static int rc_setup_rx_device(struct rc_dev *dev) if (rc) return rc; - if (dev->change_protocol) { - u64 rc_type = (1ll << rc_map->rc_type); + rc_type = BIT_ULL(rc_map->rc_type); + if (dev->change_protocol) { rc = dev->change_protocol(dev, &rc_type); if (rc < 0) goto out_table; dev->enabled_protocols = rc_type; } + if (dev->driver_type == RC_DRIVER_IR_RAW) + ir_raw_load_modules(&rc_type); + set_bit(EV_KEY, dev->input_dev->evbit); set_bit(EV_REP, dev->input_dev->evbit); set_bit(EV_MSC, dev->input_dev->evbit); @@ -1777,12 +1781,6 @@ int rc_register_device(struct rc_dev *dev) dev->input_name ?: "Unspecified device", path ?: "N/A"); kfree(path); - if (dev->driver_type != RC_DRIVER_IR_RAW_TX) { - rc = rc_setup_rx_device(dev); - if (rc) - goto out_dev; - } - if (dev->driver_type == RC_DRIVER_IR_RAW || dev->driver_type == RC_DRIVER_IR_RAW_TX) { if (!raw_init) { @@ -1791,7 +1789,13 @@ int rc_register_device(struct rc_dev *dev) } rc = ir_raw_event_register(dev); if (rc < 0) - goto out_rx; + goto out_dev; + } + + if (dev->driver_type != RC_DRIVER_IR_RAW_TX) { + rc = rc_setup_rx_device(dev); + if (rc) + goto out_raw; } /* Allow the RC sysfs nodes to be accessible */ @@ -1803,8 +1807,8 @@ int rc_register_device(struct rc_dev *dev) return 0; -out_rx: - rc_free_rx_device(dev); +out_raw: + ir_raw_event_unregister(dev); out_dev: device_del(&dev->dev); out_unlock: diff --git a/drivers/media/rc/serial_ir.c b/drivers/media/rc/serial_ir.c index 923fb2299553..41b54e40176c 100644 --- a/drivers/media/rc/serial_ir.c +++ b/drivers/media/rc/serial_ir.c @@ -487,10 +487,69 @@ static void serial_ir_timeout(unsigned long arg) ir_raw_event_handle(serial_ir.rcdev); } +/* Needed by serial_ir_probe() */ +static int serial_ir_tx(struct rc_dev *dev, unsigned int *txbuf, + unsigned int count); +static int serial_ir_tx_duty_cycle(struct rc_dev *dev, u32 cycle); +static int serial_ir_tx_carrier(struct rc_dev *dev, u32 carrier); +static int serial_ir_open(struct rc_dev *rcdev); +static void serial_ir_close(struct rc_dev *rcdev); + static int serial_ir_probe(struct platform_device *dev) { + struct rc_dev *rcdev; int i, nlow, nhigh, result; + rcdev = devm_rc_allocate_device(&dev->dev, RC_DRIVER_IR_RAW); + if (!rcdev) + return -ENOMEM; + + if (hardware[type].send_pulse && hardware[type].send_space) + rcdev->tx_ir = serial_ir_tx; + if (hardware[type].set_send_carrier) + rcdev->s_tx_carrier = serial_ir_tx_carrier; + if (hardware[type].set_duty_cycle) + rcdev->s_tx_duty_cycle = serial_ir_tx_duty_cycle; + + switch (type) { + case IR_HOMEBREW: + rcdev->input_name = "Serial IR type home-brew"; + break; + case IR_IRDEO: + rcdev->input_name = "Serial IR type IRdeo"; + break; + case IR_IRDEO_REMOTE: + rcdev->input_name = "Serial IR type IRdeo remote"; + break; + case IR_ANIMAX: + rcdev->input_name = "Serial IR type AnimaX"; + break; + case IR_IGOR: + rcdev->input_name = "Serial IR type IgorPlug"; + break; + } + + rcdev->input_phys = KBUILD_MODNAME "/input0"; + rcdev->input_id.bustype = BUS_HOST; + rcdev->input_id.vendor = 0x0001; + rcdev->input_id.product = 0x0001; + rcdev->input_id.version = 0x0100; + rcdev->open = serial_ir_open; + rcdev->close = serial_ir_close; + rcdev->dev.parent = &serial_ir.pdev->dev; + rcdev->allowed_protocols = RC_BIT_ALL_IR_DECODER; + rcdev->driver_name = KBUILD_MODNAME; + rcdev->map_name = RC_MAP_RC6_MCE; + rcdev->min_timeout = 1; + rcdev->timeout = IR_DEFAULT_TIMEOUT; + rcdev->max_timeout = 10 * IR_DEFAULT_TIMEOUT; + rcdev->rx_resolution = 250000; + + serial_ir.rcdev = rcdev; + + setup_timer(&serial_ir.timeout_timer, serial_ir_timeout, + (unsigned long)&serial_ir); + result = devm_request_irq(&dev->dev, irq, serial_ir_irq_handler, share_irq ? IRQF_SHARED : 0, KBUILD_MODNAME, &hardware); @@ -516,9 +575,6 @@ static int serial_ir_probe(struct platform_device *dev) return -EBUSY; } - setup_timer(&serial_ir.timeout_timer, serial_ir_timeout, - (unsigned long)&serial_ir); - result = hardware_init_port(); if (result < 0) return result; @@ -552,7 +608,8 @@ static int serial_ir_probe(struct platform_device *dev) sense ? "low" : "high"); dev_dbg(&dev->dev, "Interrupt %d, port %04x obtained\n", irq, io); - return 0; + + return devm_rc_register_device(&dev->dev, rcdev); } static int serial_ir_open(struct rc_dev *rcdev) @@ -723,7 +780,6 @@ static void serial_ir_exit(void) static int __init serial_ir_init_module(void) { - struct rc_dev *rcdev; int result; switch (type) { @@ -754,63 +810,9 @@ static int __init serial_ir_init_module(void) sense = !!sense; result = serial_ir_init(); - if (result) - return result; - - rcdev = devm_rc_allocate_device(&serial_ir.pdev->dev, RC_DRIVER_IR_RAW); - if (!rcdev) { - result = -ENOMEM; - goto serial_cleanup; - } - - if (hardware[type].send_pulse && hardware[type].send_space) - rcdev->tx_ir = serial_ir_tx; - if (hardware[type].set_send_carrier) - rcdev->s_tx_carrier = serial_ir_tx_carrier; - if (hardware[type].set_duty_cycle) - rcdev->s_tx_duty_cycle = serial_ir_tx_duty_cycle; - - switch (type) { - case IR_HOMEBREW: - rcdev->input_name = "Serial IR type home-brew"; - break; - case IR_IRDEO: - rcdev->input_name = "Serial IR type IRdeo"; - break; - case IR_IRDEO_REMOTE: - rcdev->input_name = "Serial IR type IRdeo remote"; - break; - case IR_ANIMAX: - rcdev->input_name = "Serial IR type AnimaX"; - break; - case IR_IGOR: - rcdev->input_name = "Serial IR type IgorPlug"; - break; - } - - rcdev->input_phys = KBUILD_MODNAME "/input0"; - rcdev->input_id.bustype = BUS_HOST; - rcdev->input_id.vendor = 0x0001; - rcdev->input_id.product = 0x0001; - rcdev->input_id.version = 0x0100; - rcdev->open = serial_ir_open; - rcdev->close = serial_ir_close; - rcdev->dev.parent = &serial_ir.pdev->dev; - rcdev->allowed_protocols = RC_BIT_ALL_IR_DECODER; - rcdev->driver_name = KBUILD_MODNAME; - rcdev->map_name = RC_MAP_RC6_MCE; - rcdev->min_timeout = 1; - rcdev->timeout = IR_DEFAULT_TIMEOUT; - rcdev->max_timeout = 10 * IR_DEFAULT_TIMEOUT; - rcdev->rx_resolution = 250000; - - serial_ir.rcdev = rcdev; - - result = rc_register_device(rcdev); - if (!result) return 0; -serial_cleanup: + serial_ir_exit(); return result; } @@ -818,7 +820,6 @@ serial_cleanup: static void __exit serial_ir_exit_module(void) { del_timer_sync(&serial_ir.timeout_timer); - rc_unregister_device(serial_ir.rcdev); serial_ir_exit(); } diff --git a/drivers/media/usb/dvb-usb/dw2102.c b/drivers/media/usb/dvb-usb/dw2102.c index 6ca502d834b4..4f42d57f81d9 100644 --- a/drivers/media/usb/dvb-usb/dw2102.c +++ b/drivers/media/usb/dvb-usb/dw2102.c @@ -68,6 +68,7 @@ struct dw2102_state { u8 initialized; u8 last_lock; + u8 data[MAX_XFER_SIZE + 4]; struct i2c_client *i2c_client_demod; struct i2c_client *i2c_client_tuner; @@ -661,62 +662,72 @@ static int su3000_i2c_transfer(struct i2c_adapter *adap, struct i2c_msg msg[], int num) { struct dvb_usb_device *d = i2c_get_adapdata(adap); - u8 obuf[0x40], ibuf[0x40]; + struct dw2102_state *state; if (!d) return -ENODEV; + + state = d->priv; + if (mutex_lock_interruptible(&d->i2c_mutex) < 0) return -EAGAIN; + if (mutex_lock_interruptible(&d->data_mutex) < 0) { + mutex_unlock(&d->i2c_mutex); + return -EAGAIN; + } switch (num) { case 1: switch (msg[0].addr) { case SU3000_STREAM_CTRL: - obuf[0] = msg[0].buf[0] + 0x36; - obuf[1] = 3; - obuf[2] = 0; - if (dvb_usb_generic_rw(d, obuf, 3, ibuf, 0, 0) < 0) + state->data[0] = msg[0].buf[0] + 0x36; + state->data[1] = 3; + state->data[2] = 0; + if (dvb_usb_generic_rw(d, state->data, 3, + state->data, 0, 0) < 0) err("i2c transfer failed."); break; case DW2102_RC_QUERY: - obuf[0] = 0x10; - if (dvb_usb_generic_rw(d, obuf, 1, ibuf, 2, 0) < 0) + state->data[0] = 0x10; + if (dvb_usb_generic_rw(d, state->data, 1, + state->data, 2, 0) < 0) err("i2c transfer failed."); - msg[0].buf[1] = ibuf[0]; - msg[0].buf[0] = ibuf[1]; + msg[0].buf[1] = state->data[0]; + msg[0].buf[0] = state->data[1]; break; default: /* always i2c write*/ - obuf[0] = 0x08; - obuf[1] = msg[0].addr; - obuf[2] = msg[0].len; + state->data[0] = 0x08; + state->data[1] = msg[0].addr; + state->data[2] = msg[0].len; - memcpy(&obuf[3], msg[0].buf, msg[0].len); + memcpy(&state->data[3], msg[0].buf, msg[0].len); - if (dvb_usb_generic_rw(d, obuf, msg[0].len + 3, - ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, msg[0].len + 3, + state->data, 1, 0) < 0) err("i2c transfer failed."); } break; case 2: /* always i2c read */ - obuf[0] = 0x09; - obuf[1] = msg[0].len; - obuf[2] = msg[1].len; - obuf[3] = msg[0].addr; - memcpy(&obuf[4], msg[0].buf, msg[0].len); - - if (dvb_usb_generic_rw(d, obuf, msg[0].len + 4, - ibuf, msg[1].len + 1, 0) < 0) + state->data[0] = 0x09; + state->data[1] = msg[0].len; + state->data[2] = msg[1].len; + state->data[3] = msg[0].addr; + memcpy(&state->data[4], msg[0].buf, msg[0].len); + + if (dvb_usb_generic_rw(d, state->data, msg[0].len + 4, + state->data, msg[1].len + 1, 0) < 0) err("i2c transfer failed."); - memcpy(msg[1].buf, &ibuf[1], msg[1].len); + memcpy(msg[1].buf, &state->data[1], msg[1].len); break; default: warn("more than 2 i2c messages at a time is not handled yet."); break; } + mutex_unlock(&d->data_mutex); mutex_unlock(&d->i2c_mutex); return num; } @@ -844,17 +855,23 @@ static int su3000_streaming_ctrl(struct dvb_usb_adapter *adap, int onoff) static int su3000_power_ctrl(struct dvb_usb_device *d, int i) { struct dw2102_state *state = (struct dw2102_state *)d->priv; - u8 obuf[] = {0xde, 0}; + int ret = 0; info("%s: %d, initialized %d", __func__, i, state->initialized); if (i && !state->initialized) { + mutex_lock(&d->data_mutex); + + state->data[0] = 0xde; + state->data[1] = 0; + state->initialized = 1; /* reset board */ - return dvb_usb_generic_rw(d, obuf, 2, NULL, 0, 0); + ret = dvb_usb_generic_rw(d, state->data, 2, NULL, 0, 0); + mutex_unlock(&d->data_mutex); } - return 0; + return ret; } static int su3000_read_mac_address(struct dvb_usb_device *d, u8 mac[6]) @@ -1309,49 +1326,57 @@ static int prof_7500_frontend_attach(struct dvb_usb_adapter *d) return 0; } -static int su3000_frontend_attach(struct dvb_usb_adapter *d) +static int su3000_frontend_attach(struct dvb_usb_adapter *adap) { - u8 obuf[3] = { 0xe, 0x80, 0 }; - u8 ibuf[] = { 0 }; + struct dvb_usb_device *d = adap->dev; + struct dw2102_state *state = d->priv; + + mutex_lock(&d->data_mutex); + + state->data[0] = 0xe; + state->data[1] = 0x80; + state->data[2] = 0; - if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); - obuf[0] = 0xe; - obuf[1] = 0x02; - obuf[2] = 1; + state->data[0] = 0xe; + state->data[1] = 0x02; + state->data[2] = 1; - if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); msleep(300); - obuf[0] = 0xe; - obuf[1] = 0x83; - obuf[2] = 0; + state->data[0] = 0xe; + state->data[1] = 0x83; + state->data[2] = 0; - if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); - obuf[0] = 0xe; - obuf[1] = 0x83; - obuf[2] = 1; + state->data[0] = 0xe; + state->data[1] = 0x83; + state->data[2] = 1; - if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); - obuf[0] = 0x51; + state->data[0] = 0x51; - if (dvb_usb_generic_rw(d->dev, obuf, 1, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 1, state->data, 1, 0) < 0) err("command 0x51 transfer failed."); - d->fe_adap[0].fe = dvb_attach(ds3000_attach, &su3000_ds3000_config, - &d->dev->i2c_adap); - if (d->fe_adap[0].fe == NULL) + mutex_unlock(&d->data_mutex); + + adap->fe_adap[0].fe = dvb_attach(ds3000_attach, &su3000_ds3000_config, + &d->i2c_adap); + if (adap->fe_adap[0].fe == NULL) return -EIO; - if (dvb_attach(ts2020_attach, d->fe_adap[0].fe, + if (dvb_attach(ts2020_attach, adap->fe_adap[0].fe, &dw2104_ts2020_config, - &d->dev->i2c_adap)) { + &d->i2c_adap)) { info("Attached DS3000/TS2020!"); return 0; } @@ -1360,47 +1385,55 @@ static int su3000_frontend_attach(struct dvb_usb_adapter *d) return -EIO; } -static int t220_frontend_attach(struct dvb_usb_adapter *d) +static int t220_frontend_attach(struct dvb_usb_adapter *adap) { - u8 obuf[3] = { 0xe, 0x87, 0 }; - u8 ibuf[] = { 0 }; + struct dvb_usb_device *d = adap->dev; + struct dw2102_state *state = d->priv; + + mutex_lock(&d->data_mutex); - if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0) + state->data[0] = 0xe; + state->data[1] = 0x87; + state->data[2] = 0x0; + + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); - obuf[0] = 0xe; - obuf[1] = 0x86; - obuf[2] = 1; + state->data[0] = 0xe; + state->data[1] = 0x86; + state->data[2] = 1; - if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); - obuf[0] = 0xe; - obuf[1] = 0x80; - obuf[2] = 0; + state->data[0] = 0xe; + state->data[1] = 0x80; + state->data[2] = 0; - if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); msleep(50); - obuf[0] = 0xe; - obuf[1] = 0x80; - obuf[2] = 1; + state->data[0] = 0xe; + state->data[1] = 0x80; + state->data[2] = 1; - if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); - obuf[0] = 0x51; + state->data[0] = 0x51; - if (dvb_usb_generic_rw(d->dev, obuf, 1, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 1, state->data, 1, 0) < 0) err("command 0x51 transfer failed."); - d->fe_adap[0].fe = dvb_attach(cxd2820r_attach, &cxd2820r_config, - &d->dev->i2c_adap, NULL); - if (d->fe_adap[0].fe != NULL) { - if (dvb_attach(tda18271_attach, d->fe_adap[0].fe, 0x60, - &d->dev->i2c_adap, &tda18271_config)) { + mutex_unlock(&d->data_mutex); + + adap->fe_adap[0].fe = dvb_attach(cxd2820r_attach, &cxd2820r_config, + &d->i2c_adap, NULL); + if (adap->fe_adap[0].fe != NULL) { + if (dvb_attach(tda18271_attach, adap->fe_adap[0].fe, 0x60, + &d->i2c_adap, &tda18271_config)) { info("Attached TDA18271HD/CXD2820R!"); return 0; } @@ -1410,23 +1443,30 @@ static int t220_frontend_attach(struct dvb_usb_adapter *d) return -EIO; } -static int m88rs2000_frontend_attach(struct dvb_usb_adapter *d) +static int m88rs2000_frontend_attach(struct dvb_usb_adapter *adap) { - u8 obuf[] = { 0x51 }; - u8 ibuf[] = { 0 }; + struct dvb_usb_device *d = adap->dev; + struct dw2102_state *state = d->priv; + + mutex_lock(&d->data_mutex); - if (dvb_usb_generic_rw(d->dev, obuf, 1, ibuf, 1, 0) < 0) + state->data[0] = 0x51; + + if (dvb_usb_generic_rw(d, state->data, 1, state->data, 1, 0) < 0) err("command 0x51 transfer failed."); - d->fe_adap[0].fe = dvb_attach(m88rs2000_attach, &s421_m88rs2000_config, - &d->dev->i2c_adap); + mutex_unlock(&d->data_mutex); - if (d->fe_adap[0].fe == NULL) + adap->fe_adap[0].fe = dvb_attach(m88rs2000_attach, + &s421_m88rs2000_config, + &d->i2c_adap); + + if (adap->fe_adap[0].fe == NULL) return -EIO; - if (dvb_attach(ts2020_attach, d->fe_adap[0].fe, + if (dvb_attach(ts2020_attach, adap->fe_adap[0].fe, &dw2104_ts2020_config, - &d->dev->i2c_adap)) { + &d->i2c_adap)) { info("Attached RS2000/TS2020!"); return 0; } @@ -1439,44 +1479,50 @@ static int tt_s2_4600_frontend_attach(struct dvb_usb_adapter *adap) { struct dvb_usb_device *d = adap->dev; struct dw2102_state *state = d->priv; - u8 obuf[3] = { 0xe, 0x80, 0 }; - u8 ibuf[] = { 0 }; struct i2c_adapter *i2c_adapter; struct i2c_client *client; struct i2c_board_info board_info; struct m88ds3103_platform_data m88ds3103_pdata = {}; struct ts2020_config ts2020_config = {}; - if (dvb_usb_generic_rw(d, obuf, 3, ibuf, 1, 0) < 0) + mutex_lock(&d->data_mutex); + + state->data[0] = 0xe; + state->data[1] = 0x80; + state->data[2] = 0x0; + + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); - obuf[0] = 0xe; - obuf[1] = 0x02; - obuf[2] = 1; + state->data[0] = 0xe; + state->data[1] = 0x02; + state->data[2] = 1; - if (dvb_usb_generic_rw(d, obuf, 3, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); msleep(300); - obuf[0] = 0xe; - obuf[1] = 0x83; - obuf[2] = 0; + state->data[0] = 0xe; + state->data[1] = 0x83; + state->data[2] = 0; - if (dvb_usb_generic_rw(d, obuf, 3, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); - obuf[0] = 0xe; - obuf[1] = 0x83; - obuf[2] = 1; + state->data[0] = 0xe; + state->data[1] = 0x83; + state->data[2] = 1; - if (dvb_usb_generic_rw(d, obuf, 3, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); - obuf[0] = 0x51; + state->data[0] = 0x51; - if (dvb_usb_generic_rw(d, obuf, 1, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 1, state->data, 1, 0) < 0) err("command 0x51 transfer failed."); + mutex_unlock(&d->data_mutex); + /* attach demod */ m88ds3103_pdata.clk = 27000000; m88ds3103_pdata.i2c_wr_max = 33; diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c index 6fb773dbcd0c..93be82fc338a 100644 --- a/drivers/misc/sgi-gru/grufault.c +++ b/drivers/misc/sgi-gru/grufault.c @@ -219,15 +219,20 @@ static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr, int write, unsigned long *paddr, int *pageshift) { pgd_t *pgdp; - pmd_t *pmdp; + p4d_t *p4dp; pud_t *pudp; + pmd_t *pmdp; pte_t pte; pgdp = pgd_offset(vma->vm_mm, vaddr); if (unlikely(pgd_none(*pgdp))) goto err; - pudp = pud_offset(pgdp, vaddr); + p4dp = p4d_offset(pgdp, vaddr); + if (unlikely(p4d_none(*p4dp))) + goto err; + + pudp = pud_offset(p4dp, vaddr); if (unlikely(pud_none(*pudp))) goto err; diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index 1ae872bfc3ba..747645c74134 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -186,7 +186,7 @@ static inline int write_enable(struct spi_nor *nor) } /* - * Send write disble instruction to the chip. + * Send write disable instruction to the chip. */ static inline int write_disable(struct spi_nor *nor) { diff --git a/drivers/net/ethernet/qlogic/qlge/qlge.h b/drivers/net/ethernet/qlogic/qlge/qlge.h index 6d31f92ef2b6..84ac50f92c9c 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge.h +++ b/drivers/net/ethernet/qlogic/qlge/qlge.h @@ -1162,8 +1162,8 @@ struct ob_mac_tso_iocb_rsp { struct ib_mac_iocb_rsp { u8 opcode; /* 0x20 */ u8 flags1; -#define IB_MAC_IOCB_RSP_OI 0x01 /* Overide intr delay */ -#define IB_MAC_IOCB_RSP_I 0x02 /* Disble Intr Generation */ +#define IB_MAC_IOCB_RSP_OI 0x01 /* Override intr delay */ +#define IB_MAC_IOCB_RSP_I 0x02 /* Disable Intr Generation */ #define IB_MAC_CSUM_ERR_MASK 0x1c /* A mask to use for csum errs */ #define IB_MAC_IOCB_RSP_TE 0x04 /* Checksum error */ #define IB_MAC_IOCB_RSP_NU 0x08 /* No checksum rcvd */ diff --git a/drivers/pci/dwc/pci-exynos.c b/drivers/pci/dwc/pci-exynos.c index 993b650ef275..44f774c12fb2 100644 --- a/drivers/pci/dwc/pci-exynos.c +++ b/drivers/pci/dwc/pci-exynos.c @@ -132,10 +132,6 @@ static int exynos5440_pcie_get_mem_resources(struct platform_device *pdev, struct device *dev = pci->dev; struct resource *res; - /* If using the PHY framework, doesn't need to get other resource */ - if (ep->using_phy) - return 0; - ep->mem_res = devm_kzalloc(dev, sizeof(*ep->mem_res), GFP_KERNEL); if (!ep->mem_res) return -ENOMEM; @@ -145,6 +141,10 @@ static int exynos5440_pcie_get_mem_resources(struct platform_device *pdev, if (IS_ERR(ep->mem_res->elbi_base)) return PTR_ERR(ep->mem_res->elbi_base); + /* If using the PHY framework, doesn't need to get other resource */ + if (ep->using_phy) + return 0; + res = platform_get_resource(pdev, IORESOURCE_MEM, 1); ep->mem_res->phy_base = devm_ioremap_resource(dev, res); if (IS_ERR(ep->mem_res->phy_base)) diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 973472c23d89..1dfa10cc566b 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -478,7 +478,7 @@ static void aspm_calc_l1ss_info(struct pcie_link_state *link, static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist) { - struct pci_dev *child, *parent = link->pdev; + struct pci_dev *child = link->downstream, *parent = link->pdev; struct pci_bus *linkbus = parent->subordinate; struct aspm_register_info upreg, dwreg; @@ -491,9 +491,7 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist) /* Get upstream/downstream components' register state */ pcie_get_aspm_reg(parent, &upreg); - child = pci_function_0(linkbus); pcie_get_aspm_reg(child, &dwreg); - link->downstream = child; /* * If ASPM not supported, don't mess with the clocks and link, @@ -800,6 +798,7 @@ static struct pcie_link_state *alloc_pcie_link_state(struct pci_dev *pdev) INIT_LIST_HEAD(&link->children); INIT_LIST_HEAD(&link->link); link->pdev = pdev; + link->downstream = pci_function_0(pdev->subordinate); /* * Root Ports and PCI/PCI-X to PCIe Bridges are roots of PCIe diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index f754453fe754..673683660b5c 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2174,6 +2174,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005d, quirk_blacklist_vpd); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005f, quirk_blacklist_vpd); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATTANSIC, PCI_ANY_ID, quirk_blacklist_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_QLOGIC, 0x2261, quirk_blacklist_vpd); /* * For Broadcom 5706, 5708, 5709 rev. A nics, any read beyond the diff --git a/drivers/scsi/aic7xxx/aic79xx_core.c b/drivers/scsi/aic7xxx/aic79xx_core.c index 109e2c99e6c1..95d8f25cbcca 100644 --- a/drivers/scsi/aic7xxx/aic79xx_core.c +++ b/drivers/scsi/aic7xxx/aic79xx_core.c @@ -6278,7 +6278,7 @@ ahd_reset(struct ahd_softc *ahd, int reinit) * does not disable its parity logic prior to * the start of the reset. This may cause a * parity error to be detected and thus a - * spurious SERR or PERR assertion. Disble + * spurious SERR or PERR assertion. Disable * PERR and SERR responses during the CHIPRST. */ mod_cmd = cmd & ~(PCIM_CMD_PERRESPEN|PCIM_CMD_SERRESPEN); diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index c7839f6c35cc..d277e8620e3e 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3075,23 +3075,6 @@ static void sd_probe_async(void *data, async_cookie_t cookie) put_device(&sdkp->dev); } -struct sd_devt { - int idx; - struct disk_devt disk_devt; -}; - -static void sd_devt_release(struct disk_devt *disk_devt) -{ - struct sd_devt *sd_devt = container_of(disk_devt, struct sd_devt, - disk_devt); - - spin_lock(&sd_index_lock); - ida_remove(&sd_index_ida, sd_devt->idx); - spin_unlock(&sd_index_lock); - - kfree(sd_devt); -} - /** * sd_probe - called during driver initialization and whenever a * new scsi device is attached to the system. It is called once @@ -3113,7 +3096,6 @@ static void sd_devt_release(struct disk_devt *disk_devt) static int sd_probe(struct device *dev) { struct scsi_device *sdp = to_scsi_device(dev); - struct sd_devt *sd_devt; struct scsi_disk *sdkp; struct gendisk *gd; int index; @@ -3139,13 +3121,9 @@ static int sd_probe(struct device *dev) if (!sdkp) goto out; - sd_devt = kzalloc(sizeof(*sd_devt), GFP_KERNEL); - if (!sd_devt) - goto out_free; - gd = alloc_disk(SD_MINORS); if (!gd) - goto out_free_devt; + goto out_free; do { if (!ida_pre_get(&sd_index_ida, GFP_KERNEL)) @@ -3161,11 +3139,6 @@ static int sd_probe(struct device *dev) goto out_put; } - atomic_set(&sd_devt->disk_devt.count, 1); - sd_devt->disk_devt.release = sd_devt_release; - sd_devt->idx = index; - gd->disk_devt = &sd_devt->disk_devt; - error = sd_format_disk_name("sd", index, gd->disk_name, DISK_NAME_LEN); if (error) { sdev_printk(KERN_WARNING, sdp, "SCSI disk (sd) name length exceeded.\n"); @@ -3205,12 +3178,11 @@ static int sd_probe(struct device *dev) return 0; out_free_index: - put_disk_devt(&sd_devt->disk_devt); - sd_devt = NULL; + spin_lock(&sd_index_lock); + ida_remove(&sd_index_ida, index); + spin_unlock(&sd_index_lock); out_put: put_disk(gd); - out_free_devt: - kfree(sd_devt); out_free: kfree(sdkp); out: @@ -3271,7 +3243,10 @@ static void scsi_disk_release(struct device *dev) struct scsi_disk *sdkp = to_scsi_disk(dev); struct gendisk *disk = sdkp->disk; - put_disk_devt(disk->disk_devt); + spin_lock(&sd_index_lock); + ida_remove(&sd_index_ida, sdkp->index); + spin_unlock(&sd_index_lock); + disk->private_data = NULL; put_disk(disk); put_device(&sdkp->device->sdev_gendev); diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index a2615d64d07c..79a2d8fba6b6 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -84,8 +84,7 @@ static int ep_open(struct inode *, struct file *); /* /dev/gadget/$CHIP represents ep0 and the whole device */ enum ep0_state { - /* DISBLED is the initial state. - */ + /* DISABLED is the initial state. */ STATE_DEV_DISABLED = 0, /* Only one open() of /dev/gadget/$CHIP; only one file tracks diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 6d6c46000e56..50aee8b7718b 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -868,7 +868,7 @@ static void xhci_disable_port_wake_on_bits(struct xhci_hcd *xhci) spin_lock_irqsave(&xhci->lock, flags); - /* disble usb3 ports Wake bits*/ + /* disable usb3 ports Wake bits */ port_index = xhci->num_usb3_ports; port_array = xhci->usb3_ports; while (port_index--) { @@ -879,7 +879,7 @@ static void xhci_disable_port_wake_on_bits(struct xhci_hcd *xhci) writel(t2, port_array[port_index]); } - /* disble usb2 ports Wake bits*/ + /* disable usb2 ports Wake bits */ port_index = xhci->num_usb2_ports; port_array = xhci->usb2_ports; while (port_index--) { diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index f8afc6dcc29f..e8cef1ad0fe3 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -681,3 +681,50 @@ xen_swiotlb_set_dma_mask(struct device *dev, u64 dma_mask) return 0; } EXPORT_SYMBOL_GPL(xen_swiotlb_set_dma_mask); + +/* + * Create userspace mapping for the DMA-coherent memory. + * This function should be called with the pages from the current domain only, + * passing pages mapped from other domains would lead to memory corruption. + */ +int +xen_swiotlb_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs) +{ +#if defined(CONFIG_ARM) || defined(CONFIG_ARM64) + if (__generic_dma_ops(dev)->mmap) + return __generic_dma_ops(dev)->mmap(dev, vma, cpu_addr, + dma_addr, size, attrs); +#endif + return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size); +} +EXPORT_SYMBOL_GPL(xen_swiotlb_dma_mmap); + +/* + * This function should be called with the pages from the current domain only, + * passing pages mapped from other domains would lead to memory corruption. + */ +int +xen_swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t handle, size_t size, + unsigned long attrs) +{ +#if defined(CONFIG_ARM) || defined(CONFIG_ARM64) + if (__generic_dma_ops(dev)->get_sgtable) { +#if 0 + /* + * This check verifies that the page belongs to the current domain and + * is not one mapped from another domain. + * This check is for debug only, and should not go to production build + */ + unsigned long bfn = PHYS_PFN(dma_to_phys(dev, handle)); + BUG_ON (!page_is_ram(bfn)); +#endif + return __generic_dma_ops(dev)->get_sgtable(dev, sgt, cpu_addr, + handle, size, attrs); + } +#endif + return dma_common_get_sgtable(dev, sgt, cpu_addr, handle, size); +} +EXPORT_SYMBOL_GPL(xen_swiotlb_get_sgtable); diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c index 4d343eed08f5..1f4733b80c87 100644 --- a/drivers/xen/xenbus/xenbus_dev_frontend.c +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -55,7 +55,6 @@ #include <linux/string.h> #include <linux/slab.h> #include <linux/miscdevice.h> -#include <linux/init.h> #include <xen/xenbus.h> #include <xen/xen.h> diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 338d2f73eb29..a2c05f2ada6d 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -1359,6 +1359,16 @@ out: return 0; } +static void fat_dummy_inode_init(struct inode *inode) +{ + /* Initialize this dummy inode to work as no-op. */ + MSDOS_I(inode)->mmu_private = 0; + MSDOS_I(inode)->i_start = 0; + MSDOS_I(inode)->i_logstart = 0; + MSDOS_I(inode)->i_attrs = 0; + MSDOS_I(inode)->i_pos = 0; +} + static int fat_read_root(struct inode *inode) { struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); @@ -1803,12 +1813,13 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, fat_inode = new_inode(sb); if (!fat_inode) goto out_fail; - MSDOS_I(fat_inode)->i_pos = 0; + fat_dummy_inode_init(fat_inode); sbi->fat_inode = fat_inode; fsinfo_inode = new_inode(sb); if (!fsinfo_inode) goto out_fail; + fat_dummy_inode_init(fsinfo_inode); fsinfo_inode->i_ino = MSDOS_FSINFO_INO; sbi->fsinfo_inode = fsinfo_inode; insert_inode_hash(fsinfo_inode); diff --git a/fs/iomap.c b/fs/iomap.c index 3ca1a8e44135..141c3cd55a8b 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -846,7 +846,8 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, struct address_space *mapping = iocb->ki_filp->f_mapping; struct inode *inode = file_inode(iocb->ki_filp); size_t count = iov_iter_count(iter); - loff_t pos = iocb->ki_pos, end = iocb->ki_pos + count - 1, ret = 0; + loff_t pos = iocb->ki_pos, start = pos; + loff_t end = iocb->ki_pos + count - 1, ret = 0; unsigned int flags = IOMAP_DIRECT; struct blk_plug plug; struct iomap_dio *dio; @@ -887,12 +888,12 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, } if (mapping->nrpages) { - ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end); + ret = filemap_write_and_wait_range(mapping, start, end); if (ret) goto out_free_dio; ret = invalidate_inode_pages2_range(mapping, - iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT); + start >> PAGE_SHIFT, end >> PAGE_SHIFT); WARN_ON_ONCE(ret); ret = 0; } @@ -941,6 +942,8 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, __set_current_state(TASK_RUNNING); } + ret = iomap_dio_complete(dio); + /* * Try again to invalidate clean pages which might have been cached by * non-direct readahead, or faulted in by get_user_pages() if the source @@ -949,12 +952,12 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, * this invalidation fails, tough, the write still worked... */ if (iov_iter_rw(iter) == WRITE && mapping->nrpages) { - ret = invalidate_inode_pages2_range(mapping, - iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT); - WARN_ON_ONCE(ret); + int err = invalidate_inode_pages2_range(mapping, + start >> PAGE_SHIFT, end >> PAGE_SHIFT); + WARN_ON_ONCE(err); } - return iomap_dio_complete(dio); + return ret; out_free_dio: kfree(dio); diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 1953986ee6bc..6e610a205e15 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -12,7 +12,6 @@ #include <linux/slab.h> #include <linux/cred.h> #include <linux/xattr.h> -#include <linux/sched/signal.h> #include "overlayfs.h" #include "ovl_entry.h" diff --git a/fs/timerfd.c b/fs/timerfd.c index 384fa759a563..c543cdb5f8ed 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -400,9 +400,9 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) clockid != CLOCK_BOOTTIME_ALARM)) return -EINVAL; - if (!capable(CAP_WAKE_ALARM) && - (clockid == CLOCK_REALTIME_ALARM || - clockid == CLOCK_BOOTTIME_ALARM)) + if ((clockid == CLOCK_REALTIME_ALARM || + clockid == CLOCK_BOOTTIME_ALARM) && + !capable(CAP_WAKE_ALARM)) return -EPERM; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); @@ -449,7 +449,7 @@ static int do_timerfd_settime(int ufd, int flags, return ret; ctx = f.file->private_data; - if (!capable(CAP_WAKE_ALARM) && isalarm(ctx)) { + if (isalarm(ctx) && !capable(CAP_WAKE_ALARM)) { fdput(f); return -EPERM; } diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 973607df579d..1d227b0fcf49 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -138,8 +138,6 @@ out: * userfaultfd_ctx_get - Acquires a reference to the internal userfaultfd * context. * @ctx: [in] Pointer to the userfaultfd context. - * - * Returns: In case of success, returns not zero. */ static void userfaultfd_ctx_get(struct userfaultfd_ctx *ctx) { @@ -267,6 +265,7 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx, { struct mm_struct *mm = ctx->mm; pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd, _pmd; pte_t *pte; @@ -277,7 +276,10 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx, pgd = pgd_offset(mm, address); if (!pgd_present(*pgd)) goto out; - pud = pud_offset(pgd, address); + p4d = p4d_offset(pgd, address); + if (!p4d_present(*p4d)) + goto out; + pud = pud_offset(p4d, address); if (!pud_present(*pud)) goto out; pmd = pmd_offset(pud, address); @@ -490,7 +492,7 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason) * in such case. */ down_read(&mm->mmap_sem); - ret = 0; + ret = VM_FAULT_NOPAGE; } } @@ -527,10 +529,11 @@ out: return ret; } -static int userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, - struct userfaultfd_wait_queue *ewq) +static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, + struct userfaultfd_wait_queue *ewq) { - int ret = 0; + if (WARN_ON_ONCE(current->flags & PF_EXITING)) + goto out; ewq->ctx = ctx; init_waitqueue_entry(&ewq->wq, current); @@ -547,8 +550,16 @@ static int userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, break; if (ACCESS_ONCE(ctx->released) || fatal_signal_pending(current)) { - ret = -1; __remove_wait_queue(&ctx->event_wqh, &ewq->wq); + if (ewq->msg.event == UFFD_EVENT_FORK) { + struct userfaultfd_ctx *new; + + new = (struct userfaultfd_ctx *) + (unsigned long) + ewq->msg.arg.reserved.reserved1; + + userfaultfd_ctx_put(new); + } break; } @@ -566,9 +577,8 @@ static int userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, * ctx may go away after this if the userfault pseudo fd is * already released. */ - +out: userfaultfd_ctx_put(ctx); - return ret; } static void userfaultfd_event_complete(struct userfaultfd_ctx *ctx, @@ -626,7 +636,7 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs) return 0; } -static int dup_fctx(struct userfaultfd_fork_ctx *fctx) +static void dup_fctx(struct userfaultfd_fork_ctx *fctx) { struct userfaultfd_ctx *ctx = fctx->orig; struct userfaultfd_wait_queue ewq; @@ -636,17 +646,15 @@ static int dup_fctx(struct userfaultfd_fork_ctx *fctx) ewq.msg.event = UFFD_EVENT_FORK; ewq.msg.arg.reserved.reserved1 = (unsigned long)fctx->new; - return userfaultfd_event_wait_completion(ctx, &ewq); + userfaultfd_event_wait_completion(ctx, &ewq); } void dup_userfaultfd_complete(struct list_head *fcs) { - int ret = 0; struct userfaultfd_fork_ctx *fctx, *n; list_for_each_entry_safe(fctx, n, fcs, list) { - if (!ret) - ret = dup_fctx(fctx); + dup_fctx(fctx); list_del(&fctx->list); kfree(fctx); } @@ -689,8 +697,7 @@ void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *vm_ctx, userfaultfd_event_wait_completion(ctx, &ewq); } -void userfaultfd_remove(struct vm_area_struct *vma, - struct vm_area_struct **prev, +bool userfaultfd_remove(struct vm_area_struct *vma, unsigned long start, unsigned long end) { struct mm_struct *mm = vma->vm_mm; @@ -699,13 +706,11 @@ void userfaultfd_remove(struct vm_area_struct *vma, ctx = vma->vm_userfaultfd_ctx.ctx; if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_REMOVE)) - return; + return true; userfaultfd_ctx_get(ctx); up_read(&mm->mmap_sem); - *prev = NULL; /* We wait for ACK w/o the mmap semaphore */ - msg_init(&ewq.msg); ewq.msg.event = UFFD_EVENT_REMOVE; @@ -714,7 +719,7 @@ void userfaultfd_remove(struct vm_area_struct *vma, userfaultfd_event_wait_completion(ctx, &ewq); - down_read(&mm->mmap_sem); + return false; } static bool has_unmap_ctx(struct userfaultfd_ctx *ctx, struct list_head *unmaps, @@ -775,34 +780,6 @@ void userfaultfd_unmap_complete(struct mm_struct *mm, struct list_head *uf) } } -void userfaultfd_exit(struct mm_struct *mm) -{ - struct vm_area_struct *vma = mm->mmap; - - /* - * We can do the vma walk without locking because the caller - * (exit_mm) knows it now has exclusive access - */ - while (vma) { - struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx; - - if (ctx && (ctx->features & UFFD_FEATURE_EVENT_EXIT)) { - struct userfaultfd_wait_queue ewq; - - userfaultfd_ctx_get(ctx); - - msg_init(&ewq.msg); - ewq.msg.event = UFFD_EVENT_EXIT; - - userfaultfd_event_wait_completion(ctx, &ewq); - - ctx->features &= ~UFFD_FEATURE_EVENT_EXIT; - } - - vma = vma->vm_next; - } -} - static int userfaultfd_release(struct inode *inode, struct file *file) { struct userfaultfd_ctx *ctx = file->private_data; diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c index 2dfdc62f795e..70a5b55e0870 100644 --- a/fs/xfs/kmem.c +++ b/fs/xfs/kmem.c @@ -25,24 +25,6 @@ #include "kmem.h" #include "xfs_message.h" -/* - * Greedy allocation. May fail and may return vmalloced memory. - */ -void * -kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize) -{ - void *ptr; - size_t kmsize = maxsize; - - while (!(ptr = vzalloc(kmsize))) { - if ((kmsize >>= 1) <= minsize) - kmsize = minsize; - } - if (ptr) - *size = kmsize; - return ptr; -} - void * kmem_alloc(size_t size, xfs_km_flags_t flags) { diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h index 689f746224e7..f0fc84fcaac2 100644 --- a/fs/xfs/kmem.h +++ b/fs/xfs/kmem.h @@ -69,8 +69,6 @@ static inline void kmem_free(const void *ptr) } -extern void *kmem_zalloc_greedy(size_t *, size_t, size_t); - static inline void * kmem_zalloc(size_t size, xfs_km_flags_t flags) { diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index a9c66d47757a..9bd104f32908 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -763,8 +763,8 @@ xfs_bmap_extents_to_btree( args.type = XFS_ALLOCTYPE_START_BNO; args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino); } else if (dfops->dop_low) { -try_another_ag: args.type = XFS_ALLOCTYPE_START_BNO; +try_another_ag: args.fsbno = *firstblock; } else { args.type = XFS_ALLOCTYPE_NEAR_BNO; @@ -790,13 +790,17 @@ try_another_ag: if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) && args.fsbno == NULLFSBLOCK && args.type == XFS_ALLOCTYPE_NEAR_BNO) { - dfops->dop_low = true; + args.type = XFS_ALLOCTYPE_FIRST_AG; goto try_another_ag; } + if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) { + xfs_iroot_realloc(ip, -1, whichfork); + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + return -ENOSPC; + } /* * Allocation can't fail, the space was reserved. */ - ASSERT(args.fsbno != NULLFSBLOCK); ASSERT(*firstblock == NULLFSBLOCK || args.agno >= XFS_FSB_TO_AGNO(mp, *firstblock)); *firstblock = cur->bc_private.b.firstblock = args.fsbno; @@ -4150,6 +4154,19 @@ xfs_bmapi_read( return 0; } +/* + * Add a delayed allocation extent to an inode. Blocks are reserved from the + * global pool and the extent inserted into the inode in-core extent tree. + * + * On entry, got refers to the first extent beyond the offset of the extent to + * allocate or eof is specified if no such extent exists. On return, got refers + * to the extent record that was inserted to the inode fork. + * + * Note that the allocated extent may have been merged with contiguous extents + * during insertion into the inode fork. Thus, got does not reflect the current + * state of the inode fork on return. If necessary, the caller can use lastx to + * look up the updated record in the inode fork. + */ int xfs_bmapi_reserve_delalloc( struct xfs_inode *ip, @@ -4236,13 +4253,8 @@ xfs_bmapi_reserve_delalloc( got->br_startblock = nullstartblock(indlen); got->br_blockcount = alen; got->br_state = XFS_EXT_NORM; - xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got); - /* - * Update our extent pointer, given that xfs_bmap_add_extent_hole_delay - * might have merged it into one of the neighbouring ones. - */ - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got); + xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got); /* * Tag the inode if blocks were preallocated. Note that COW fork @@ -4254,10 +4266,6 @@ xfs_bmapi_reserve_delalloc( if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len)) xfs_inode_set_cowblocks_tag(ip); - ASSERT(got->br_startoff <= aoff); - ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen); - ASSERT(isnullstartblock(got->br_startblock)); - ASSERT(got->br_state == XFS_EXT_NORM); return 0; out_unreserve_blocks: diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index f93072b58a58..fd55db479385 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -447,8 +447,8 @@ xfs_bmbt_alloc_block( if (args.fsbno == NULLFSBLOCK) { args.fsbno = be64_to_cpu(start->l); -try_another_ag: args.type = XFS_ALLOCTYPE_START_BNO; +try_another_ag: /* * Make sure there is sufficient room left in the AG to * complete a full tree split for an extent insert. If @@ -488,8 +488,8 @@ try_another_ag: if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) && args.fsbno == NULLFSBLOCK && args.type == XFS_ALLOCTYPE_NEAR_BNO) { - cur->bc_private.b.dfops->dop_low = true; args.fsbno = cur->bc_private.b.firstblock; + args.type = XFS_ALLOCTYPE_FIRST_AG; goto try_another_ag; } @@ -506,7 +506,7 @@ try_another_ag: goto error0; cur->bc_private.b.dfops->dop_low = true; } - if (args.fsbno == NULLFSBLOCK) { + if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) { XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 0; return 0; diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index bf65a9ea8642..61494295d92f 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -274,54 +274,49 @@ xfs_end_io( struct xfs_ioend *ioend = container_of(work, struct xfs_ioend, io_work); struct xfs_inode *ip = XFS_I(ioend->io_inode); + xfs_off_t offset = ioend->io_offset; + size_t size = ioend->io_size; int error = ioend->io_bio->bi_error; /* - * Set an error if the mount has shut down and proceed with end I/O - * processing so it can perform whatever cleanups are necessary. + * Just clean up the in-memory strutures if the fs has been shut down. */ - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { error = -EIO; + goto done; + } /* - * For a CoW extent, we need to move the mapping from the CoW fork - * to the data fork. If instead an error happened, just dump the - * new blocks. + * Clean up any COW blocks on an I/O error. */ - if (ioend->io_type == XFS_IO_COW) { - if (error) - goto done; - if (ioend->io_bio->bi_error) { - error = xfs_reflink_cancel_cow_range(ip, - ioend->io_offset, ioend->io_size); - goto done; + if (unlikely(error)) { + switch (ioend->io_type) { + case XFS_IO_COW: + xfs_reflink_cancel_cow_range(ip, offset, size, true); + break; } - error = xfs_reflink_end_cow(ip, ioend->io_offset, - ioend->io_size); - if (error) - goto done; + + goto done; } /* - * For unwritten extents we need to issue transactions to convert a - * range to normal written extens after the data I/O has finished. - * Detecting and handling completion IO errors is done individually - * for each case as different cleanup operations need to be performed - * on error. + * Success: commit the COW or unwritten blocks if needed. */ - if (ioend->io_type == XFS_IO_UNWRITTEN) { - if (error) - goto done; - error = xfs_iomap_write_unwritten(ip, ioend->io_offset, - ioend->io_size); - } else if (ioend->io_append_trans) { - error = xfs_setfilesize_ioend(ioend, error); - } else { - ASSERT(!xfs_ioend_is_append(ioend) || - ioend->io_type == XFS_IO_COW); + switch (ioend->io_type) { + case XFS_IO_COW: + error = xfs_reflink_end_cow(ip, offset, size); + break; + case XFS_IO_UNWRITTEN: + error = xfs_iomap_write_unwritten(ip, offset, size); + break; + default: + ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans); + break; } done: + if (ioend->io_append_trans) + error = xfs_setfilesize_ioend(ioend, error); xfs_destroy_ioend(ioend, error); } diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 7234b9748c36..3531f8f72fa5 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -1608,7 +1608,7 @@ xfs_inode_free_cowblocks( xfs_ilock(ip, XFS_IOLOCK_EXCL); xfs_ilock(ip, XFS_MMAPLOCK_EXCL); - ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF); + ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false); xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); xfs_iunlock(ip, XFS_IOLOCK_EXCL); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index edfa6a55b064..7eaf1ef74e3c 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1615,7 +1615,7 @@ xfs_itruncate_extents( /* Remove all pending CoW reservations. */ error = xfs_reflink_cancel_cow_blocks(ip, &tp, first_unmap_block, - last_block); + last_block, true); if (error) goto out; diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 41662fb14e87..288ee5b840d7 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -630,6 +630,11 @@ retry: goto out_unlock; } + /* + * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch + * them out if the write happens to fail. + */ + iomap->flags = IOMAP_F_NEW; trace_xfs_iomap_alloc(ip, offset, count, 0, &got); done: if (isnullstartblock(got.br_startblock)) @@ -1071,16 +1076,22 @@ xfs_file_iomap_end_delalloc( struct xfs_inode *ip, loff_t offset, loff_t length, - ssize_t written) + ssize_t written, + struct iomap *iomap) { struct xfs_mount *mp = ip->i_mount; xfs_fileoff_t start_fsb; xfs_fileoff_t end_fsb; int error = 0; - /* behave as if the write failed if drop writes is enabled */ - if (xfs_mp_drop_writes(mp)) + /* + * Behave as if the write failed if drop writes is enabled. Set the NEW + * flag to force delalloc cleanup. + */ + if (xfs_mp_drop_writes(mp)) { + iomap->flags |= IOMAP_F_NEW; written = 0; + } /* * start_fsb refers to the first unused block after a short write. If @@ -1094,14 +1105,14 @@ xfs_file_iomap_end_delalloc( end_fsb = XFS_B_TO_FSB(mp, offset + length); /* - * Trim back delalloc blocks if we didn't manage to write the whole - * range reserved. + * Trim delalloc blocks if they were allocated by this write and we + * didn't manage to write the whole range. * * We don't need to care about racing delalloc as we hold i_mutex * across the reserve/allocate/unreserve calls. If there are delalloc * blocks in the range, they are ours. */ - if (start_fsb < end_fsb) { + if ((iomap->flags & IOMAP_F_NEW) && start_fsb < end_fsb) { truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb), XFS_FSB_TO_B(mp, end_fsb) - 1); @@ -1131,7 +1142,7 @@ xfs_file_iomap_end( { if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC) return xfs_file_iomap_end_delalloc(XFS_I(inode), offset, - length, written); + length, written, iomap); return 0; } diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 66e881790c17..2a6d9b1558e0 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -361,7 +361,6 @@ xfs_bulkstat( xfs_agino_t agino; /* inode # in allocation group */ xfs_agnumber_t agno; /* allocation group number */ xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ - size_t irbsize; /* size of irec buffer in bytes */ xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */ int nirbuf; /* size of irbuf */ int ubcount; /* size of user's buffer */ @@ -388,11 +387,10 @@ xfs_bulkstat( *ubcountp = 0; *done = 0; - irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4); + irbuf = kmem_zalloc_large(PAGE_SIZE * 4, KM_SLEEP); if (!irbuf) return -ENOMEM; - - nirbuf = irbsize / sizeof(*irbuf); + nirbuf = (PAGE_SIZE * 4) / sizeof(*irbuf); /* * Loop over the allocation groups, starting from the last diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 450bde68bb75..688ebff1f663 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -513,8 +513,7 @@ STATIC void xfs_set_inoalignment(xfs_mount_t *mp) { if (xfs_sb_version_hasalign(&mp->m_sb) && - mp->m_sb.sb_inoalignmt >= - XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) + mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp)) mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1; else mp->m_inoalign_mask = 0; diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index da6d08fb359c..4a84c5ea266d 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -548,14 +548,18 @@ xfs_reflink_trim_irec_to_next_cow( } /* - * Cancel all pending CoW reservations for some block range of an inode. + * Cancel CoW reservations for some block range of an inode. + * + * If cancel_real is true this function cancels all COW fork extents for the + * inode; if cancel_real is false, real extents are not cleared. */ int xfs_reflink_cancel_cow_blocks( struct xfs_inode *ip, struct xfs_trans **tpp, xfs_fileoff_t offset_fsb, - xfs_fileoff_t end_fsb) + xfs_fileoff_t end_fsb, + bool cancel_real) { struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); struct xfs_bmbt_irec got, del; @@ -579,7 +583,7 @@ xfs_reflink_cancel_cow_blocks( &idx, &got, &del); if (error) break; - } else { + } else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) { xfs_trans_ijoin(*tpp, ip, 0); xfs_defer_init(&dfops, &firstfsb); @@ -621,13 +625,17 @@ xfs_reflink_cancel_cow_blocks( } /* - * Cancel all pending CoW reservations for some byte range of an inode. + * Cancel CoW reservations for some byte range of an inode. + * + * If cancel_real is true this function cancels all COW fork extents for the + * inode; if cancel_real is false, real extents are not cleared. */ int xfs_reflink_cancel_cow_range( struct xfs_inode *ip, xfs_off_t offset, - xfs_off_t count) + xfs_off_t count, + bool cancel_real) { struct xfs_trans *tp; xfs_fileoff_t offset_fsb; @@ -653,7 +661,8 @@ xfs_reflink_cancel_cow_range( xfs_trans_ijoin(tp, ip, 0); /* Scrape out the old CoW reservations */ - error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb); + error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb, + cancel_real); if (error) goto out_cancel; @@ -1450,7 +1459,7 @@ next: * We didn't find any shared blocks so turn off the reflink flag. * First, get rid of any leftover CoW mappings. */ - error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF); + error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF, true); if (error) return error; diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h index 33ac9b8db683..d29a7967f029 100644 --- a/fs/xfs/xfs_reflink.h +++ b/fs/xfs/xfs_reflink.h @@ -39,9 +39,9 @@ extern void xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip, extern int xfs_reflink_cancel_cow_blocks(struct xfs_inode *ip, struct xfs_trans **tpp, xfs_fileoff_t offset_fsb, - xfs_fileoff_t end_fsb); + xfs_fileoff_t end_fsb, bool cancel_real); extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset, - xfs_off_t count); + xfs_off_t count, bool cancel_real); extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset, xfs_off_t count); extern int xfs_reflink_recover_cow(struct xfs_mount *mp); diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 890862f2447c..685c042a120f 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -953,7 +953,7 @@ xfs_fs_destroy_inode( XFS_STATS_INC(ip->i_mount, vn_remove); if (xfs_is_reflink_inode(ip)) { - error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF); + error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true); if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount)) xfs_warn(ip->i_mount, "Error %d while evicting CoW blocks for inode %llu.", diff --git a/include/asm-generic/4level-fixup.h b/include/asm-generic/4level-fixup.h index 5bdab6bffd23..928fd66b1271 100644 --- a/include/asm-generic/4level-fixup.h +++ b/include/asm-generic/4level-fixup.h @@ -15,7 +15,6 @@ ((unlikely(pgd_none(*(pud))) && __pmd_alloc(mm, pud, address))? \ NULL: pmd_offset(pud, address)) -#define pud_alloc(mm, pgd, address) (pgd) #define pud_offset(pgd, start) (pgd) #define pud_none(pud) 0 #define pud_bad(pud) 0 @@ -35,4 +34,6 @@ #undef pud_addr_end #define pud_addr_end(addr, end) (end) +#include <asm-generic/5level-fixup.h> + #endif diff --git a/include/asm-generic/5level-fixup.h b/include/asm-generic/5level-fixup.h new file mode 100644 index 000000000000..b5ca82dc4175 --- /dev/null +++ b/include/asm-generic/5level-fixup.h @@ -0,0 +1,41 @@ +#ifndef _5LEVEL_FIXUP_H +#define _5LEVEL_FIXUP_H + +#define __ARCH_HAS_5LEVEL_HACK +#define __PAGETABLE_P4D_FOLDED + +#define P4D_SHIFT PGDIR_SHIFT +#define P4D_SIZE PGDIR_SIZE +#define P4D_MASK PGDIR_MASK +#define PTRS_PER_P4D 1 + +#define p4d_t pgd_t + +#define pud_alloc(mm, p4d, address) \ + ((unlikely(pgd_none(*(p4d))) && __pud_alloc(mm, p4d, address)) ? \ + NULL : pud_offset(p4d, address)) + +#define p4d_alloc(mm, pgd, address) (pgd) +#define p4d_offset(pgd, start) (pgd) +#define p4d_none(p4d) 0 +#define p4d_bad(p4d) 0 +#define p4d_present(p4d) 1 +#define p4d_ERROR(p4d) do { } while (0) +#define p4d_clear(p4d) pgd_clear(p4d) +#define p4d_val(p4d) pgd_val(p4d) +#define p4d_populate(mm, p4d, pud) pgd_populate(mm, p4d, pud) +#define p4d_page(p4d) pgd_page(p4d) +#define p4d_page_vaddr(p4d) pgd_page_vaddr(p4d) + +#define __p4d(x) __pgd(x) +#define set_p4d(p4dp, p4d) set_pgd(p4dp, p4d) + +#undef p4d_free_tlb +#define p4d_free_tlb(tlb, x, addr) do { } while (0) +#define p4d_free(mm, x) do { } while (0) +#define __p4d_free_tlb(tlb, x, addr) do { } while (0) + +#undef p4d_addr_end +#define p4d_addr_end(addr, end) (end) + +#endif diff --git a/include/asm-generic/pgtable-nop4d-hack.h b/include/asm-generic/pgtable-nop4d-hack.h new file mode 100644 index 000000000000..752fb7511750 --- /dev/null +++ b/include/asm-generic/pgtable-nop4d-hack.h @@ -0,0 +1,62 @@ +#ifndef _PGTABLE_NOP4D_HACK_H +#define _PGTABLE_NOP4D_HACK_H + +#ifndef __ASSEMBLY__ +#include <asm-generic/5level-fixup.h> + +#define __PAGETABLE_PUD_FOLDED + +/* + * Having the pud type consist of a pgd gets the size right, and allows + * us to conceptually access the pgd entry that this pud is folded into + * without casting. + */ +typedef struct { pgd_t pgd; } pud_t; + +#define PUD_SHIFT PGDIR_SHIFT +#define PTRS_PER_PUD 1 +#define PUD_SIZE (1UL << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE-1)) + +/* + * The "pgd_xxx()" functions here are trivial for a folded two-level + * setup: the pud is never bad, and a pud always exists (as it's folded + * into the pgd entry) + */ +static inline int pgd_none(pgd_t pgd) { return 0; } +static inline int pgd_bad(pgd_t pgd) { return 0; } +static inline int pgd_present(pgd_t pgd) { return 1; } +static inline void pgd_clear(pgd_t *pgd) { } +#define pud_ERROR(pud) (pgd_ERROR((pud).pgd)) + +#define pgd_populate(mm, pgd, pud) do { } while (0) +/* + * (puds are folded into pgds so this doesn't get actually called, + * but the define is needed for a generic inline function.) + */ +#define set_pgd(pgdptr, pgdval) set_pud((pud_t *)(pgdptr), (pud_t) { pgdval }) + +static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address) +{ + return (pud_t *)pgd; +} + +#define pud_val(x) (pgd_val((x).pgd)) +#define __pud(x) ((pud_t) { __pgd(x) }) + +#define pgd_page(pgd) (pud_page((pud_t){ pgd })) +#define pgd_page_vaddr(pgd) (pud_page_vaddr((pud_t){ pgd })) + +/* + * allocating and freeing a pud is trivial: the 1-entry pud is + * inside the pgd, so has no extra memory associated with it. + */ +#define pud_alloc_one(mm, address) NULL +#define pud_free(mm, x) do { } while (0) +#define __pud_free_tlb(tlb, x, a) do { } while (0) + +#undef pud_addr_end +#define pud_addr_end(addr, end) (end) + +#endif /* __ASSEMBLY__ */ +#endif /* _PGTABLE_NOP4D_HACK_H */ diff --git a/include/asm-generic/pgtable-nop4d.h b/include/asm-generic/pgtable-nop4d.h new file mode 100644 index 000000000000..de364ecb8df6 --- /dev/null +++ b/include/asm-generic/pgtable-nop4d.h @@ -0,0 +1,56 @@ +#ifndef _PGTABLE_NOP4D_H +#define _PGTABLE_NOP4D_H + +#ifndef __ASSEMBLY__ + +#define __PAGETABLE_P4D_FOLDED + +typedef struct { pgd_t pgd; } p4d_t; + +#define P4D_SHIFT PGDIR_SHIFT +#define PTRS_PER_P4D 1 +#define P4D_SIZE (1UL << P4D_SHIFT) +#define P4D_MASK (~(P4D_SIZE-1)) + +/* + * The "pgd_xxx()" functions here are trivial for a folded two-level + * setup: the p4d is never bad, and a p4d always exists (as it's folded + * into the pgd entry) + */ +static inline int pgd_none(pgd_t pgd) { return 0; } +static inline int pgd_bad(pgd_t pgd) { return 0; } +static inline int pgd_present(pgd_t pgd) { return 1; } +static inline void pgd_clear(pgd_t *pgd) { } +#define p4d_ERROR(p4d) (pgd_ERROR((p4d).pgd)) + +#define pgd_populate(mm, pgd, p4d) do { } while (0) +/* + * (p4ds are folded into pgds so this doesn't get actually called, + * but the define is needed for a generic inline function.) + */ +#define set_pgd(pgdptr, pgdval) set_p4d((p4d_t *)(pgdptr), (p4d_t) { pgdval }) + +static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) +{ + return (p4d_t *)pgd; +} + +#define p4d_val(x) (pgd_val((x).pgd)) +#define __p4d(x) ((p4d_t) { __pgd(x) }) + +#define pgd_page(pgd) (p4d_page((p4d_t){ pgd })) +#define pgd_page_vaddr(pgd) (p4d_page_vaddr((p4d_t){ pgd })) + +/* + * allocating and freeing a p4d is trivial: the 1-entry p4d is + * inside the pgd, so has no extra memory associated with it. + */ +#define p4d_alloc_one(mm, address) NULL +#define p4d_free(mm, x) do { } while (0) +#define __p4d_free_tlb(tlb, x, a) do { } while (0) + +#undef p4d_addr_end +#define p4d_addr_end(addr, end) (end) + +#endif /* __ASSEMBLY__ */ +#endif /* _PGTABLE_NOP4D_H */ diff --git a/include/asm-generic/pgtable-nopud.h b/include/asm-generic/pgtable-nopud.h index 810431d8351b..c2b9b96d6268 100644 --- a/include/asm-generic/pgtable-nopud.h +++ b/include/asm-generic/pgtable-nopud.h @@ -3,52 +3,57 @@ #ifndef __ASSEMBLY__ +#ifdef __ARCH_USE_5LEVEL_HACK +#include <asm-generic/pgtable-nop4d-hack.h> +#else +#include <asm-generic/pgtable-nop4d.h> + #define __PAGETABLE_PUD_FOLDED /* - * Having the pud type consist of a pgd gets the size right, and allows - * us to conceptually access the pgd entry that this pud is folded into + * Having the pud type consist of a p4d gets the size right, and allows + * us to conceptually access the p4d entry that this pud is folded into * without casting. */ -typedef struct { pgd_t pgd; } pud_t; +typedef struct { p4d_t p4d; } pud_t; -#define PUD_SHIFT PGDIR_SHIFT +#define PUD_SHIFT P4D_SHIFT #define PTRS_PER_PUD 1 #define PUD_SIZE (1UL << PUD_SHIFT) #define PUD_MASK (~(PUD_SIZE-1)) /* - * The "pgd_xxx()" functions here are trivial for a folded two-level + * The "p4d_xxx()" functions here are trivial for a folded two-level * setup: the pud is never bad, and a pud always exists (as it's folded - * into the pgd entry) + * into the p4d entry) */ -static inline int pgd_none(pgd_t pgd) { return 0; } -static inline int pgd_bad(pgd_t pgd) { return 0; } -static inline int pgd_present(pgd_t pgd) { return 1; } -static inline void pgd_clear(pgd_t *pgd) { } -#define pud_ERROR(pud) (pgd_ERROR((pud).pgd)) +static inline int p4d_none(p4d_t p4d) { return 0; } +static inline int p4d_bad(p4d_t p4d) { return 0; } +static inline int p4d_present(p4d_t p4d) { return 1; } +static inline void p4d_clear(p4d_t *p4d) { } +#define pud_ERROR(pud) (p4d_ERROR((pud).p4d)) -#define pgd_populate(mm, pgd, pud) do { } while (0) +#define p4d_populate(mm, p4d, pud) do { } while (0) /* - * (puds are folded into pgds so this doesn't get actually called, + * (puds are folded into p4ds so this doesn't get actually called, * but the define is needed for a generic inline function.) */ -#define set_pgd(pgdptr, pgdval) set_pud((pud_t *)(pgdptr), (pud_t) { pgdval }) +#define set_p4d(p4dptr, p4dval) set_pud((pud_t *)(p4dptr), (pud_t) { p4dval }) -static inline pud_t * pud_offset(pgd_t * pgd, unsigned long address) +static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) { - return (pud_t *)pgd; + return (pud_t *)p4d; } -#define pud_val(x) (pgd_val((x).pgd)) -#define __pud(x) ((pud_t) { __pgd(x) } ) +#define pud_val(x) (p4d_val((x).p4d)) +#define __pud(x) ((pud_t) { __p4d(x) }) -#define pgd_page(pgd) (pud_page((pud_t){ pgd })) -#define pgd_page_vaddr(pgd) (pud_page_vaddr((pud_t){ pgd })) +#define p4d_page(p4d) (pud_page((pud_t){ p4d })) +#define p4d_page_vaddr(p4d) (pud_page_vaddr((pud_t){ p4d })) /* * allocating and freeing a pud is trivial: the 1-entry pud is - * inside the pgd, so has no extra memory associated with it. + * inside the p4d, so has no extra memory associated with it. */ #define pud_alloc_one(mm, address) NULL #define pud_free(mm, x) do { } while (0) @@ -58,4 +63,5 @@ static inline pud_t * pud_offset(pgd_t * pgd, unsigned long address) #define pud_addr_end(addr, end) (end) #endif /* __ASSEMBLY__ */ +#endif /* !__ARCH_USE_5LEVEL_HACK */ #endif /* _PGTABLE_NOPUD_H */ diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index f4ca23b158b3..1fad160f35de 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -10,9 +10,9 @@ #include <linux/bug.h> #include <linux/errno.h> -#if 4 - defined(__PAGETABLE_PUD_FOLDED) - defined(__PAGETABLE_PMD_FOLDED) != \ - CONFIG_PGTABLE_LEVELS -#error CONFIG_PGTABLE_LEVELS is not consistent with __PAGETABLE_{PUD,PMD}_FOLDED +#if 5 - defined(__PAGETABLE_P4D_FOLDED) - defined(__PAGETABLE_PUD_FOLDED) - \ + defined(__PAGETABLE_PMD_FOLDED) != CONFIG_PGTABLE_LEVELS +#error CONFIG_PGTABLE_LEVELS is not consistent with __PAGETABLE_{P4D,PUD,PMD}_FOLDED #endif /* @@ -424,6 +424,13 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) (__boundary - 1 < (end) - 1)? __boundary: (end); \ }) +#ifndef p4d_addr_end +#define p4d_addr_end(addr, end) \ +({ unsigned long __boundary = ((addr) + P4D_SIZE) & P4D_MASK; \ + (__boundary - 1 < (end) - 1)? __boundary: (end); \ +}) +#endif + #ifndef pud_addr_end #define pud_addr_end(addr, end) \ ({ unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK; \ @@ -444,6 +451,7 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) * Do the tests inline, but report and clear the bad entry in mm/memory.c. */ void pgd_clear_bad(pgd_t *); +void p4d_clear_bad(p4d_t *); void pud_clear_bad(pud_t *); void pmd_clear_bad(pmd_t *); @@ -458,6 +466,17 @@ static inline int pgd_none_or_clear_bad(pgd_t *pgd) return 0; } +static inline int p4d_none_or_clear_bad(p4d_t *p4d) +{ + if (p4d_none(*p4d)) + return 1; + if (unlikely(p4d_bad(*p4d))) { + p4d_clear_bad(p4d); + return 1; + } + return 0; +} + static inline int pud_none_or_clear_bad(pud_t *pud) { if (pud_none(*pud)) @@ -844,11 +863,30 @@ static inline int pmd_protnone(pmd_t pmd) #endif /* CONFIG_MMU */ #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP + +#ifndef __PAGETABLE_P4D_FOLDED +int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot); +int p4d_clear_huge(p4d_t *p4d); +#else +static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot) +{ + return 0; +} +static inline int p4d_clear_huge(p4d_t *p4d) +{ + return 0; +} +#endif /* !__PAGETABLE_P4D_FOLDED */ + int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot); int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot); int pud_clear_huge(pud_t *pud); int pmd_clear_huge(pmd_t *pmd); #else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */ +static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot) +{ + return 0; +} static inline int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) { return 0; @@ -857,6 +895,10 @@ static inline int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) { return 0; } +static inline int p4d_clear_huge(p4d_t *p4d) +{ + return 0; +} static inline int pud_clear_huge(pud_t *pud) { return 0; diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 4329bc6ef04b..8afa4335e5b2 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -270,6 +270,12 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, __pte_free_tlb(tlb, ptep, address); \ } while (0) +#define pmd_free_tlb(tlb, pmdp, address) \ + do { \ + __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + __pmd_free_tlb(tlb, pmdp, address); \ + } while (0) + #ifndef __ARCH_HAS_4LEVEL_HACK #define pud_free_tlb(tlb, pudp, address) \ do { \ @@ -278,11 +284,13 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, } while (0) #endif -#define pmd_free_tlb(tlb, pmdp, address) \ +#ifndef __ARCH_HAS_5LEVEL_HACK +#define p4d_free_tlb(tlb, pudp, address) \ do { \ - __tlb_adjust_range(tlb, address, PAGE_SIZE); \ - __pmd_free_tlb(tlb, pmdp, address); \ + __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + __p4d_free_tlb(tlb, pudp, address); \ } while (0) +#endif #define tlb_migrate_finish(mm) do {} while (0) diff --git a/include/dt-bindings/sound/cs42l42.h b/include/dt-bindings/sound/cs42l42.h index 399a123aed58..db69d84ed7d1 100644 --- a/include/dt-bindings/sound/cs42l42.h +++ b/include/dt-bindings/sound/cs42l42.h @@ -20,7 +20,7 @@ #define CS42L42_HPOUT_LOAD_1NF 0 #define CS42L42_HPOUT_LOAD_10NF 1 -/* HPOUT Clamp to GND Overide */ +/* HPOUT Clamp to GND Override */ #define CS42L42_HPOUT_CLAMP_EN 0 #define CS42L42_HPOUT_CLAMP_DIS 1 diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 796016e63c1d..5a7da607ca04 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -435,7 +435,6 @@ struct request_queue { struct delayed_work delay_work; struct backing_dev_info *backing_dev_info; - struct disk_devt *disk_devt; /* * The queue owner gets to use this for whatever they like. diff --git a/include/linux/fs.h b/include/linux/fs.h index aad3fd0ff5f8..7251f7bb45e8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2678,7 +2678,7 @@ static const char * const kernel_read_file_str[] = { static inline const char *kernel_read_file_id_str(enum kernel_read_file_id id) { - if (id < 0 || id >= READING_MAX_ID) + if ((unsigned)id >= READING_MAX_ID) return kernel_read_file_str[READING_UNKNOWN]; return kernel_read_file_str[id]; diff --git a/include/linux/genhd.h b/include/linux/genhd.h index a999d281a2f1..76f39754e7b0 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -167,13 +167,6 @@ struct blk_integrity { }; #endif /* CONFIG_BLK_DEV_INTEGRITY */ -struct disk_devt { - atomic_t count; - void (*release)(struct disk_devt *disk_devt); -}; - -void put_disk_devt(struct disk_devt *disk_devt); -void get_disk_devt(struct disk_devt *disk_devt); struct gendisk { /* major, first_minor and minors are input parameters only, @@ -183,7 +176,6 @@ struct gendisk { int first_minor; int minors; /* maximum number of minors, =1 for * disks that can't be partitioned. */ - struct disk_devt *disk_devt; char disk_name[DISK_NAME_LEN]; /* name of major driver */ char *(*devnode)(struct gendisk *gd, umode_t *mode); diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 503099d8aada..b857fc8cc2ec 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -122,7 +122,7 @@ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address, pud_t *pud, int flags); int pmd_huge(pmd_t pmd); -int pud_huge(pud_t pmd); +int pud_huge(pud_t pud); unsigned long hugetlb_change_protection(struct vm_area_struct *vma, unsigned long address, unsigned long end, pgprot_t newprot); @@ -197,6 +197,9 @@ static inline void __unmap_hugepage_range(struct mmu_gather *tlb, #ifndef pgd_huge #define pgd_huge(x) 0 #endif +#ifndef p4d_huge +#define p4d_huge(x) 0 +#endif #ifndef pgd_write static inline int pgd_write(pgd_t pgd) diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 8e06d758ee48..2afd74b9d844 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -90,6 +90,13 @@ extern bool static_key_initialized; struct static_key { atomic_t enabled; /* + * Note: + * To make anonymous unions work with old compilers, the static + * initialization of them requires brackets. This creates a dependency + * on the order of the struct with the initializers. If any fields + * are added, STATIC_KEY_INIT_TRUE and STATIC_KEY_INIT_FALSE may need + * to be modified. + * * bit 0 => 1 if key is initially true * 0 if initially false * bit 1 => 1 if points to struct static_key_mod @@ -166,10 +173,10 @@ extern void static_key_disable(struct static_key *key); */ #define STATIC_KEY_INIT_TRUE \ { .enabled = { 1 }, \ - .entries = (void *)JUMP_TYPE_TRUE } + { .entries = (void *)JUMP_TYPE_TRUE } } #define STATIC_KEY_INIT_FALSE \ { .enabled = { 0 }, \ - .entries = (void *)JUMP_TYPE_FALSE } + { .entries = (void *)JUMP_TYPE_FALSE } } #else /* !HAVE_JUMP_LABEL */ diff --git a/include/linux/kasan.h b/include/linux/kasan.h index ceb3fe78a0d3..1c823bef4c15 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -18,6 +18,7 @@ extern unsigned char kasan_zero_page[PAGE_SIZE]; extern pte_t kasan_zero_pte[PTRS_PER_PTE]; extern pmd_t kasan_zero_pmd[PTRS_PER_PMD]; extern pud_t kasan_zero_pud[PTRS_PER_PUD]; +extern p4d_t kasan_zero_p4d[PTRS_PER_P4D]; void kasan_populate_zero_shadow(const void *shadow_start, const void *shadow_end); diff --git a/include/linux/mm.h b/include/linux/mm.h index 0d65dd72c0f4..5f01c88f0800 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1560,14 +1560,24 @@ static inline pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr, return ptep; } +#ifdef __PAGETABLE_P4D_FOLDED +static inline int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, + unsigned long address) +{ + return 0; +} +#else +int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address); +#endif + #ifdef __PAGETABLE_PUD_FOLDED -static inline int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, +static inline int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address) { return 0; } #else -int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address); +int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address); #endif #if defined(__PAGETABLE_PMD_FOLDED) || !defined(CONFIG_MMU) @@ -1619,11 +1629,22 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address); * Remove it when 4level-fixup.h has been removed. */ #if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK) -static inline pud_t *pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) + +#ifndef __ARCH_HAS_5LEVEL_HACK +static inline p4d_t *p4d_alloc(struct mm_struct *mm, pgd_t *pgd, + unsigned long address) +{ + return (unlikely(pgd_none(*pgd)) && __p4d_alloc(mm, pgd, address)) ? + NULL : p4d_offset(pgd, address); +} + +static inline pud_t *pud_alloc(struct mm_struct *mm, p4d_t *p4d, + unsigned long address) { - return (unlikely(pgd_none(*pgd)) && __pud_alloc(mm, pgd, address))? - NULL: pud_offset(pgd, address); + return (unlikely(p4d_none(*p4d)) && __pud_alloc(mm, p4d, address)) ? + NULL : pud_offset(p4d, address); } +#endif /* !__ARCH_HAS_5LEVEL_HACK */ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) { @@ -2385,7 +2406,8 @@ void sparse_mem_maps_populate_node(struct page **map_map, struct page *sparse_mem_map_populate(unsigned long pnum, int nid); pgd_t *vmemmap_pgd_populate(unsigned long addr, int node); -pud_t *vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node); +p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node); +pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node); pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node); pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node); void *vmemmap_alloc_block(unsigned long size, int node); diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index ad3e5158e586..c9f795e9a2ee 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -65,7 +65,7 @@ struct regulator_state { int uV; /* suspend voltage */ unsigned int mode; /* suspend regulator operating mode */ int enabled; /* is regulator enabled in this suspend state */ - int disabled; /* is the regulator disbled in this suspend state */ + int disabled; /* is the regulator disabled in this suspend state */ }; /** diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index be765234c0a2..32354b4b4b2b 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -72,7 +72,7 @@ struct ucounts { struct hlist_node node; struct user_namespace *ns; kuid_t uid; - atomic_t count; + int count; atomic_t ucount[UCOUNT_COUNTS]; }; diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index 0468548acebf..48a3483dccb1 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -61,8 +61,7 @@ extern void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *, unsigned long from, unsigned long to, unsigned long len); -extern void userfaultfd_remove(struct vm_area_struct *vma, - struct vm_area_struct **prev, +extern bool userfaultfd_remove(struct vm_area_struct *vma, unsigned long start, unsigned long end); @@ -72,8 +71,6 @@ extern int userfaultfd_unmap_prep(struct vm_area_struct *vma, extern void userfaultfd_unmap_complete(struct mm_struct *mm, struct list_head *uf); -extern void userfaultfd_exit(struct mm_struct *mm); - #else /* CONFIG_USERFAULTFD */ /* mm helpers */ @@ -120,11 +117,11 @@ static inline void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *ctx, { } -static inline void userfaultfd_remove(struct vm_area_struct *vma, - struct vm_area_struct **prev, +static inline bool userfaultfd_remove(struct vm_area_struct *vma, unsigned long start, unsigned long end) { + return true; } static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma, @@ -139,10 +136,6 @@ static inline void userfaultfd_unmap_complete(struct mm_struct *mm, { } -static inline void userfaultfd_exit(struct mm_struct *mm) -{ -} - #endif /* CONFIG_USERFAULTFD */ #endif /* _LINUX_USERFAULTFD_K_H */ diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 6aa1b6cb5828..a80b7b59cf33 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -79,6 +79,9 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, THP_SPLIT_PAGE_FAILED, THP_DEFERRED_SPLIT_PAGE, THP_SPLIT_PMD, +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD + THP_SPLIT_PUD, +#endif THP_ZERO_PAGE_ALLOC, THP_ZERO_PAGE_ALLOC_FAILED, #endif diff --git a/include/linux/wait.h b/include/linux/wait.h index aacb1282d19a..db076ca7f11d 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -620,30 +620,19 @@ do { \ __ret; \ }) +extern int do_wait_intr(wait_queue_head_t *, wait_queue_t *); +extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_t *); -#define __wait_event_interruptible_locked(wq, condition, exclusive, irq) \ +#define __wait_event_interruptible_locked(wq, condition, exclusive, fn) \ ({ \ - int __ret = 0; \ + int __ret; \ DEFINE_WAIT(__wait); \ if (exclusive) \ __wait.flags |= WQ_FLAG_EXCLUSIVE; \ do { \ - if (likely(list_empty(&__wait.task_list))) \ - __add_wait_queue_tail(&(wq), &__wait); \ - set_current_state(TASK_INTERRUPTIBLE); \ - if (signal_pending(current)) { \ - __ret = -ERESTARTSYS; \ + __ret = fn(&(wq), &__wait); \ + if (__ret) \ break; \ - } \ - if (irq) \ - spin_unlock_irq(&(wq).lock); \ - else \ - spin_unlock(&(wq).lock); \ - schedule(); \ - if (irq) \ - spin_lock_irq(&(wq).lock); \ - else \ - spin_lock(&(wq).lock); \ } while (!(condition)); \ __remove_wait_queue(&(wq), &__wait); \ __set_current_state(TASK_RUNNING); \ @@ -676,7 +665,7 @@ do { \ */ #define wait_event_interruptible_locked(wq, condition) \ ((condition) \ - ? 0 : __wait_event_interruptible_locked(wq, condition, 0, 0)) + ? 0 : __wait_event_interruptible_locked(wq, condition, 0, do_wait_intr)) /** * wait_event_interruptible_locked_irq - sleep until a condition gets true @@ -703,7 +692,7 @@ do { \ */ #define wait_event_interruptible_locked_irq(wq, condition) \ ((condition) \ - ? 0 : __wait_event_interruptible_locked(wq, condition, 0, 1)) + ? 0 : __wait_event_interruptible_locked(wq, condition, 0, do_wait_intr_irq)) /** * wait_event_interruptible_exclusive_locked - sleep exclusively until a condition gets true @@ -734,7 +723,7 @@ do { \ */ #define wait_event_interruptible_exclusive_locked(wq, condition) \ ((condition) \ - ? 0 : __wait_event_interruptible_locked(wq, condition, 1, 0)) + ? 0 : __wait_event_interruptible_locked(wq, condition, 1, do_wait_intr)) /** * wait_event_interruptible_exclusive_locked_irq - sleep until a condition gets true @@ -765,7 +754,7 @@ do { \ */ #define wait_event_interruptible_exclusive_locked_irq(wq, condition) \ ((condition) \ - ? 0 : __wait_event_interruptible_locked(wq, condition, 1, 1)) + ? 0 : __wait_event_interruptible_locked(wq, condition, 1, do_wait_intr_irq)) #define __wait_event_killable(wq, condition) \ diff --git a/include/media/vsp1.h b/include/media/vsp1.h index 458b400373d4..38aac554dbba 100644 --- a/include/media/vsp1.h +++ b/include/media/vsp1.h @@ -20,8 +20,17 @@ struct device; int vsp1_du_init(struct device *dev); -int vsp1_du_setup_lif(struct device *dev, unsigned int width, - unsigned int height); +/** + * struct vsp1_du_lif_config - VSP LIF configuration + * @width: output frame width + * @height: output frame height + */ +struct vsp1_du_lif_config { + unsigned int width; + unsigned int height; +}; + +int vsp1_du_setup_lif(struct device *dev, const struct vsp1_du_lif_config *cfg); struct vsp1_du_atomic_config { u32 pixelformat; diff --git a/include/net/irda/timer.h b/include/net/irda/timer.h index cb2615ccf761..d784f242cf7b 100644 --- a/include/net/irda/timer.h +++ b/include/net/irda/timer.h @@ -59,7 +59,7 @@ struct lap_cb; * Slot timer must never exceed 85 ms, and must always be at least 25 ms, * suggested to 75-85 msec by IrDA lite. This doesn't work with a lot of * devices, and other stackes uses a lot more, so it's best we do it as well - * (Note : this is the default value and sysctl overides it - Jean II) + * (Note : this is the default value and sysctl overrides it - Jean II) */ #define SLOT_TIMEOUT (90*HZ/1000) diff --git a/include/trace/events/syscalls.h b/include/trace/events/syscalls.h index 14e49c798135..b35533b94277 100644 --- a/include/trace/events/syscalls.h +++ b/include/trace/events/syscalls.h @@ -1,5 +1,6 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM raw_syscalls +#undef TRACE_INCLUDE_FILE #define TRACE_INCLUDE_FILE syscalls #if !defined(_TRACE_EVENTS_SYSCALLS_H) || defined(TRACE_HEADER_MULTI_READ) diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index c055947c5c98..3b059530dac9 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -18,8 +18,7 @@ * means the userland is reading). */ #define UFFD_API ((__u64)0xAA) -#define UFFD_API_FEATURES (UFFD_FEATURE_EVENT_EXIT | \ - UFFD_FEATURE_EVENT_FORK | \ +#define UFFD_API_FEATURES (UFFD_FEATURE_EVENT_FORK | \ UFFD_FEATURE_EVENT_REMAP | \ UFFD_FEATURE_EVENT_REMOVE | \ UFFD_FEATURE_EVENT_UNMAP | \ @@ -113,7 +112,6 @@ struct uffd_msg { #define UFFD_EVENT_REMAP 0x14 #define UFFD_EVENT_REMOVE 0x15 #define UFFD_EVENT_UNMAP 0x16 -#define UFFD_EVENT_EXIT 0x17 /* flags for UFFD_EVENT_PAGEFAULT */ #define UFFD_PAGEFAULT_FLAG_WRITE (1<<0) /* If this was a write fault */ @@ -163,7 +161,6 @@ struct uffdio_api { #define UFFD_FEATURE_MISSING_HUGETLBFS (1<<4) #define UFFD_FEATURE_MISSING_SHMEM (1<<5) #define UFFD_FEATURE_EVENT_UNMAP (1<<6) -#define UFFD_FEATURE_EVENT_EXIT (1<<7) __u64 features; __u64 ioctls; diff --git a/include/xen/swiotlb-xen.h b/include/xen/swiotlb-xen.h index a0083be5d529..1f6d78f044b6 100644 --- a/include/xen/swiotlb-xen.h +++ b/include/xen/swiotlb-xen.h @@ -2,6 +2,7 @@ #define __LINUX_SWIOTLB_XEN_H #include <linux/dma-direction.h> +#include <linux/scatterlist.h> #include <linux/swiotlb.h> extern int xen_swiotlb_init(int verbose, bool early); @@ -55,4 +56,14 @@ xen_swiotlb_dma_supported(struct device *hwdev, u64 mask); extern int xen_swiotlb_set_dma_mask(struct device *dev, u64 dma_mask); + +extern int +xen_swiotlb_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs); + +extern int +xen_swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t handle, size_t size, + unsigned long attrs); #endif /* __LINUX_SWIOTLB_XEN_H */ diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 0125589c7428..48851327a15e 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -2669,7 +2669,7 @@ static bool css_visible(struct cgroup_subsys_state *css) * * Returns 0 on success, -errno on failure. On failure, csses which have * been processed already aren't cleaned up. The caller is responsible for - * cleaning up with cgroup_apply_control_disble(). + * cleaning up with cgroup_apply_control_disable(). */ static int cgroup_apply_control_enable(struct cgroup *cgrp) { diff --git a/kernel/events/core.c b/kernel/events/core.c index 6f41548f2e32..a17ed56c8ce1 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -998,7 +998,7 @@ list_update_cgroup_event(struct perf_event *event, */ #define PERF_CPU_HRTIMER (1000 / HZ) /* - * function must be called with interrupts disbled + * function must be called with interrupts disabled */ static enum hrtimer_restart perf_mux_hrtimer_handler(struct hrtimer *hr) { diff --git a/kernel/exit.c b/kernel/exit.c index e126ebf2400c..516acdb0e0ec 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -554,7 +554,6 @@ static void exit_mm(void) enter_lazy_tlb(mm, current); task_unlock(current); mm_update_next_owner(mm); - userfaultfd_exit(mm); mmput(mm); if (test_thread_flag(TIF_MEMDIE)) exit_oom_victim(); diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 12e38c213b70..a95e5d1f4a9c 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -3262,10 +3262,17 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, if (depth) { hlock = curr->held_locks + depth - 1; if (hlock->class_idx == class_idx && nest_lock) { - if (hlock->references) + if (hlock->references) { + /* + * Check: unsigned int references:12, overflow. + */ + if (DEBUG_LOCKS_WARN_ON(hlock->references == (1 << 12)-1)) + return 0; + hlock->references++; - else + } else { hlock->references = 2; + } return 1; } diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c index da6c9a34f62f..6b7abb334ca6 100644 --- a/kernel/locking/test-ww_mutex.c +++ b/kernel/locking/test-ww_mutex.c @@ -50,7 +50,7 @@ static void test_mutex_work(struct work_struct *work) if (mtx->flags & TEST_MTX_TRY) { while (!ww_mutex_trylock(&mtx->mutex)) - cpu_relax(); + cond_resched(); } else { ww_mutex_lock(&mtx->mutex, NULL); } @@ -88,7 +88,7 @@ static int __test_mutex(unsigned int flags) ret = -EINVAL; break; } - cpu_relax(); + cond_resched(); } while (time_before(jiffies, timeout)); } else { ret = wait_for_completion_timeout(&mtx.done, TIMEOUT); @@ -627,7 +627,7 @@ static int __init test_ww_mutex_init(void) if (ret) return ret; - ret = stress(4096, hweight32(STRESS_ALL)*ncpus, 1<<12, STRESS_ALL); + ret = stress(4095, hweight32(STRESS_ALL)*ncpus, 1<<12, STRESS_ALL); if (ret) return ret; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 956383844116..3b31fc05a0f1 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3287,10 +3287,15 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) struct task_struct *p; /* - * Optimization: we know that if all tasks are in - * the fair class we can call that function directly: + * Optimization: we know that if all tasks are in the fair class we can + * call that function directly, but only if the @prev task wasn't of a + * higher scheduling class, because otherwise those loose the + * opportunity to pull in more work from other CPUs. */ - if (likely(rq->nr_running == rq->cfs.h_nr_running)) { + if (likely((prev->sched_class == &idle_sched_class || + prev->sched_class == &fair_sched_class) && + rq->nr_running == rq->cfs.h_nr_running)) { + p = fair_sched_class.pick_next_task(rq, prev, rf); if (unlikely(p == RETRY_TASK)) goto again; diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 8f8de3d4d6b7..cd7cd489f739 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -36,6 +36,7 @@ struct sugov_policy { u64 last_freq_update_time; s64 freq_update_delay_ns; unsigned int next_freq; + unsigned int cached_raw_freq; /* The next fields are only needed if fast switch cannot be used. */ struct irq_work irq_work; @@ -52,7 +53,6 @@ struct sugov_cpu { struct update_util_data update_util; struct sugov_policy *sg_policy; - unsigned int cached_raw_freq; unsigned long iowait_boost; unsigned long iowait_boost_max; u64 last_update; @@ -116,7 +116,7 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, /** * get_next_freq - Compute a new frequency for a given cpufreq policy. - * @sg_cpu: schedutil cpu object to compute the new frequency for. + * @sg_policy: schedutil policy object to compute the new frequency for. * @util: Current CPU utilization. * @max: CPU capacity. * @@ -136,19 +136,18 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, * next_freq (as calculated above) is returned, subject to policy min/max and * cpufreq driver limitations. */ -static unsigned int get_next_freq(struct sugov_cpu *sg_cpu, unsigned long util, - unsigned long max) +static unsigned int get_next_freq(struct sugov_policy *sg_policy, + unsigned long util, unsigned long max) { - struct sugov_policy *sg_policy = sg_cpu->sg_policy; struct cpufreq_policy *policy = sg_policy->policy; unsigned int freq = arch_scale_freq_invariant() ? policy->cpuinfo.max_freq : policy->cur; freq = (freq + (freq >> 2)) * util / max; - if (freq == sg_cpu->cached_raw_freq && sg_policy->next_freq != UINT_MAX) + if (freq == sg_policy->cached_raw_freq && sg_policy->next_freq != UINT_MAX) return sg_policy->next_freq; - sg_cpu->cached_raw_freq = freq; + sg_policy->cached_raw_freq = freq; return cpufreq_driver_resolve_freq(policy, freq); } @@ -213,7 +212,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, } else { sugov_get_util(&util, &max); sugov_iowait_boost(sg_cpu, &util, &max); - next_f = get_next_freq(sg_cpu, util, max); + next_f = get_next_freq(sg_policy, util, max); } sugov_update_commit(sg_policy, time, next_f); } @@ -267,7 +266,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, sugov_iowait_boost(j_sg_cpu, &util, &max); } - return get_next_freq(sg_cpu, util, max); + return get_next_freq(sg_policy, util, max); } static void sugov_update_shared(struct update_util_data *hook, u64 time, @@ -580,6 +579,7 @@ static int sugov_start(struct cpufreq_policy *policy) sg_policy->next_freq = UINT_MAX; sg_policy->work_in_progress = false; sg_policy->need_freq_update = false; + sg_policy->cached_raw_freq = 0; for_each_cpu(cpu, policy->cpus) { struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); @@ -590,7 +590,6 @@ static int sugov_start(struct cpufreq_policy *policy) sg_cpu->max = 0; sg_cpu->flags = SCHED_CPUFREQ_RT; sg_cpu->last_update = 0; - sg_cpu->cached_raw_freq = 0; sg_cpu->iowait_boost = 0; sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq; cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 3e88b35ac157..dea138964b91 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5799,7 +5799,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t * Due to large variance we need a large fuzz factor; hackbench in * particularly is sensitive here. */ - if ((avg_idle / 512) < avg_cost) + if (sched_feat(SIS_AVG_CPU) && (avg_idle / 512) < avg_cost) return -1; time = local_clock(); diff --git a/kernel/sched/features.h b/kernel/sched/features.h index 69631fa46c2f..1b3c8189b286 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -51,6 +51,11 @@ SCHED_FEAT(NONTASK_CAPACITY, true) */ SCHED_FEAT(TTWU_QUEUE, true) +/* + * When doing wakeups, attempt to limit superfluous scans of the LLC domain. + */ +SCHED_FEAT(SIS_AVG_CPU, false) + #ifdef HAVE_RT_PUSH_IPI /* * In order to avoid a thundering herd attack of CPUs that are diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 4d2ea6f25568..b8c84c6dee64 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -242,6 +242,45 @@ long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state) } EXPORT_SYMBOL(prepare_to_wait_event); +/* + * Note! These two wait functions are entered with the + * wait-queue lock held (and interrupts off in the _irq + * case), so there is no race with testing the wakeup + * condition in the caller before they add the wait + * entry to the wake queue. + */ +int do_wait_intr(wait_queue_head_t *wq, wait_queue_t *wait) +{ + if (likely(list_empty(&wait->task_list))) + __add_wait_queue_tail(wq, wait); + + set_current_state(TASK_INTERRUPTIBLE); + if (signal_pending(current)) + return -ERESTARTSYS; + + spin_unlock(&wq->lock); + schedule(); + spin_lock(&wq->lock); + return 0; +} +EXPORT_SYMBOL(do_wait_intr); + +int do_wait_intr_irq(wait_queue_head_t *wq, wait_queue_t *wait) +{ + if (likely(list_empty(&wait->task_list))) + __add_wait_queue_tail(wq, wait); + + set_current_state(TASK_INTERRUPTIBLE); + if (signal_pending(current)) + return -ERESTARTSYS; + + spin_unlock_irq(&wq->lock); + schedule(); + spin_lock_irq(&wq->lock); + return 0; +} +EXPORT_SYMBOL(do_wait_intr_irq); + /** * finish_wait - clean up after waiting in a queue * @q: waitqueue waited on diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index 7906b3f0c41a..497719127bf9 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c @@ -125,7 +125,7 @@ int register_refined_jiffies(long cycles_per_second) shift_hz += cycles_per_tick/2; do_div(shift_hz, cycles_per_tick); /* Calculate nsec_per_tick using shift_hz */ - nsec_per_tick = (u64)TICK_NSEC << 8; + nsec_per_tick = (u64)NSEC_PER_SEC << 8; nsec_per_tick += (u32)shift_hz/2; do_div(nsec_per_tick, (u32)shift_hz); diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index d5038005eb5d..d4a06e714645 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -429,7 +429,7 @@ config BLK_DEV_IO_TRACE If unsure, say N. -config KPROBE_EVENT +config KPROBE_EVENTS depends on KPROBES depends on HAVE_REGS_AND_STACK_ACCESS_API bool "Enable kprobes-based dynamic events" @@ -447,7 +447,7 @@ config KPROBE_EVENT This option is also required by perf-probe subcommand of perf tools. If you want to use perf tools, this option is strongly recommended. -config UPROBE_EVENT +config UPROBE_EVENTS bool "Enable uprobes-based dynamic events" depends on ARCH_SUPPORTS_UPROBES depends on MMU @@ -466,7 +466,7 @@ config UPROBE_EVENT config BPF_EVENTS depends on BPF_SYSCALL - depends on (KPROBE_EVENT || UPROBE_EVENT) && PERF_EVENTS + depends on (KPROBE_EVENTS || UPROBE_EVENTS) && PERF_EVENTS bool default y help diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index e57980845549..90f2701d92a7 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -57,7 +57,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o obj-$(CONFIG_HIST_TRIGGERS) += trace_events_hist.o obj-$(CONFIG_BPF_EVENTS) += bpf_trace.o -obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o +obj-$(CONFIG_KPROBE_EVENTS) += trace_kprobe.o obj-$(CONFIG_TRACEPOINTS) += power-traces.o ifeq ($(CONFIG_PM),y) obj-$(CONFIG_TRACEPOINTS) += rpm-traces.o @@ -66,7 +66,7 @@ ifeq ($(CONFIG_TRACING),y) obj-$(CONFIG_KGDB_KDB) += trace_kdb.o endif obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o -obj-$(CONFIG_UPROBE_EVENT) += trace_uprobe.o +obj-$(CONFIG_UPROBE_EVENTS) += trace_uprobe.o obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 0d1597c9ee30..b9691ee8f6c1 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -4416,16 +4416,24 @@ static int __init set_graph_notrace_function(char *str) } __setup("ftrace_graph_notrace=", set_graph_notrace_function); +static int __init set_graph_max_depth_function(char *str) +{ + if (!str) + return 0; + fgraph_max_depth = simple_strtoul(str, NULL, 0); + return 1; +} +__setup("ftrace_graph_max_depth=", set_graph_max_depth_function); + static void __init set_ftrace_early_graph(char *buf, int enable) { int ret; char *func; struct ftrace_hash *hash; - if (enable) - hash = ftrace_graph_hash; - else - hash = ftrace_graph_notrace_hash; + hash = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS); + if (WARN_ON(!hash)) + return; while (buf) { func = strsep(&buf, ","); @@ -4435,6 +4443,11 @@ static void __init set_ftrace_early_graph(char *buf, int enable) printk(KERN_DEBUG "ftrace: function %s not " "traceable\n", func); } + + if (enable) + ftrace_graph_hash = hash; + else + ftrace_graph_notrace_hash = hash; } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ @@ -5488,7 +5501,7 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip, * Normally the mcount trampoline will call the ops->func, but there * are times that it should not. For example, if the ops does not * have its own recursion protection, then it should call the - * ftrace_ops_recurs_func() instead. + * ftrace_ops_assist_func() instead. * * Returns the function that the trampoline should call for @ops. */ diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 707445ceb7ef..f35109514a01 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4341,22 +4341,22 @@ static const char readme_msg[] = "\t\t\t traces\n" #endif #endif /* CONFIG_STACK_TRACER */ -#ifdef CONFIG_KPROBE_EVENT +#ifdef CONFIG_KPROBE_EVENTS " kprobe_events\t\t- Add/remove/show the kernel dynamic events\n" "\t\t\t Write into this file to define/undefine new trace events.\n" #endif -#ifdef CONFIG_UPROBE_EVENT +#ifdef CONFIG_UPROBE_EVENTS " uprobe_events\t\t- Add/remove/show the userspace dynamic events\n" "\t\t\t Write into this file to define/undefine new trace events.\n" #endif -#if defined(CONFIG_KPROBE_EVENT) || defined(CONFIG_UPROBE_EVENT) +#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) "\t accepts: event-definitions (one definition per line)\n" "\t Format: p|r[:[<group>/]<event>] <place> [<args>]\n" "\t -:[<group>/]<event>\n" -#ifdef CONFIG_KPROBE_EVENT +#ifdef CONFIG_KPROBE_EVENTS "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n" #endif -#ifdef CONFIG_UPROBE_EVENT +#ifdef CONFIG_UPROBE_EVENTS "\t place: <path>:<offset>\n" #endif "\t args: <name>=fetcharg[:type]\n" diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 0c0ae54d44c6..903273c93e61 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -248,7 +248,7 @@ ASSIGN_FETCH_FUNC(file_offset, ftype), \ #define FETCH_TYPE_STRING 0 #define FETCH_TYPE_STRSIZE 1 -#ifdef CONFIG_KPROBE_EVENT +#ifdef CONFIG_KPROBE_EVENTS struct symbol_cache; unsigned long update_symbol_cache(struct symbol_cache *sc); void free_symbol_cache(struct symbol_cache *sc); @@ -278,7 +278,7 @@ alloc_symbol_cache(const char *sym, long offset) { return NULL; } -#endif /* CONFIG_KPROBE_EVENT */ +#endif /* CONFIG_KPROBE_EVENTS */ struct probe_arg { struct fetch_param fetch; diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 1d68b5b7ad41..5fb1f2c87e6b 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -65,7 +65,7 @@ void stack_trace_print(void) } /* - * When arch-specific code overides this function, the following + * When arch-specific code overrides this function, the following * data should be filled up, assuming stack_trace_max_lock is held to * prevent concurrent updates. * stack_trace_index[] diff --git a/kernel/ucount.c b/kernel/ucount.c index 62630a40ab3a..b4eeee03934f 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -144,7 +144,7 @@ static struct ucounts *get_ucounts(struct user_namespace *ns, kuid_t uid) new->ns = ns; new->uid = uid; - atomic_set(&new->count, 0); + new->count = 0; spin_lock_irq(&ucounts_lock); ucounts = find_ucounts(ns, uid, hashent); @@ -155,8 +155,10 @@ static struct ucounts *get_ucounts(struct user_namespace *ns, kuid_t uid) ucounts = new; } } - if (!atomic_add_unless(&ucounts->count, 1, INT_MAX)) + if (ucounts->count == INT_MAX) ucounts = NULL; + else + ucounts->count += 1; spin_unlock_irq(&ucounts_lock); return ucounts; } @@ -165,13 +167,15 @@ static void put_ucounts(struct ucounts *ucounts) { unsigned long flags; - if (atomic_dec_and_test(&ucounts->count)) { - spin_lock_irqsave(&ucounts_lock, flags); + spin_lock_irqsave(&ucounts_lock, flags); + ucounts->count -= 1; + if (!ucounts->count) hlist_del_init(&ucounts->node); - spin_unlock_irqrestore(&ucounts_lock, flags); + else + ucounts = NULL; + spin_unlock_irqrestore(&ucounts_lock, flags); - kfree(ucounts); - } + kfree(ucounts); } static inline bool atomic_inc_below(atomic_t *v, int u) diff --git a/lib/ioremap.c b/lib/ioremap.c index a3e14ce92a56..4bb30206b942 100644 --- a/lib/ioremap.c +++ b/lib/ioremap.c @@ -14,6 +14,7 @@ #include <asm/pgtable.h> #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP +static int __read_mostly ioremap_p4d_capable; static int __read_mostly ioremap_pud_capable; static int __read_mostly ioremap_pmd_capable; static int __read_mostly ioremap_huge_disabled; @@ -35,6 +36,11 @@ void __init ioremap_huge_init(void) } } +static inline int ioremap_p4d_enabled(void) +{ + return ioremap_p4d_capable; +} + static inline int ioremap_pud_enabled(void) { return ioremap_pud_capable; @@ -46,6 +52,7 @@ static inline int ioremap_pmd_enabled(void) } #else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */ +static inline int ioremap_p4d_enabled(void) { return 0; } static inline int ioremap_pud_enabled(void) { return 0; } static inline int ioremap_pmd_enabled(void) { return 0; } #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ @@ -94,14 +101,14 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, return 0; } -static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr, +static inline int ioremap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { pud_t *pud; unsigned long next; phys_addr -= addr; - pud = pud_alloc(&init_mm, pgd, addr); + pud = pud_alloc(&init_mm, p4d, addr); if (!pud) return -ENOMEM; do { @@ -120,6 +127,32 @@ static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr, return 0; } +static inline int ioremap_p4d_range(pgd_t *pgd, unsigned long addr, + unsigned long end, phys_addr_t phys_addr, pgprot_t prot) +{ + p4d_t *p4d; + unsigned long next; + + phys_addr -= addr; + p4d = p4d_alloc(&init_mm, pgd, addr); + if (!p4d) + return -ENOMEM; + do { + next = p4d_addr_end(addr, end); + + if (ioremap_p4d_enabled() && + ((next - addr) == P4D_SIZE) && + IS_ALIGNED(phys_addr + addr, P4D_SIZE)) { + if (p4d_set_huge(p4d, phys_addr + addr, prot)) + continue; + } + + if (ioremap_pud_range(p4d, addr, next, phys_addr + addr, prot)) + return -ENOMEM; + } while (p4d++, addr = next, addr != end); + return 0; +} + int ioremap_page_range(unsigned long addr, unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { @@ -135,7 +168,7 @@ int ioremap_page_range(unsigned long addr, pgd = pgd_offset_k(addr); do { next = pgd_addr_end(addr, end); - err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, prot); + err = ioremap_p4d_range(pgd, addr, next, phys_addr+addr, prot); if (err) break; } while (pgd++, addr = next, addr != end); diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 5ed506d648c4..691a9ad48497 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -2129,8 +2129,8 @@ int ida_pre_get(struct ida *ida, gfp_t gfp) struct ida_bitmap *bitmap = kmalloc(sizeof(*bitmap), gfp); if (!bitmap) return 0; - bitmap = this_cpu_cmpxchg(ida_bitmap, NULL, bitmap); - kfree(bitmap); + if (this_cpu_cmpxchg(ida_bitmap, NULL, bitmap)) + kfree(bitmap); } return 1; diff --git a/lib/refcount.c b/lib/refcount.c index 1d33366189d1..aa09ad3c30b0 100644 --- a/lib/refcount.c +++ b/lib/refcount.c @@ -58,7 +58,7 @@ bool refcount_add_not_zero(unsigned int i, refcount_t *r) val = old; } - WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n"); + WARN_ONCE(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n"); return true; } @@ -66,7 +66,7 @@ EXPORT_SYMBOL_GPL(refcount_add_not_zero); void refcount_add(unsigned int i, refcount_t *r) { - WARN(!refcount_add_not_zero(i, r), "refcount_t: addition on 0; use-after-free.\n"); + WARN_ONCE(!refcount_add_not_zero(i, r), "refcount_t: addition on 0; use-after-free.\n"); } EXPORT_SYMBOL_GPL(refcount_add); @@ -97,7 +97,7 @@ bool refcount_inc_not_zero(refcount_t *r) val = old; } - WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n"); + WARN_ONCE(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n"); return true; } @@ -111,7 +111,7 @@ EXPORT_SYMBOL_GPL(refcount_inc_not_zero); */ void refcount_inc(refcount_t *r) { - WARN(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n"); + WARN_ONCE(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n"); } EXPORT_SYMBOL_GPL(refcount_inc); @@ -125,7 +125,7 @@ bool refcount_sub_and_test(unsigned int i, refcount_t *r) new = val - i; if (new > val) { - WARN(new > val, "refcount_t: underflow; use-after-free.\n"); + WARN_ONCE(new > val, "refcount_t: underflow; use-after-free.\n"); return false; } @@ -164,7 +164,7 @@ EXPORT_SYMBOL_GPL(refcount_dec_and_test); void refcount_dec(refcount_t *r) { - WARN(refcount_dec_and_test(r), "refcount_t: decrement hit 0; leaking memory.\n"); + WARN_ONCE(refcount_dec_and_test(r), "refcount_t: decrement hit 0; leaking memory.\n"); } EXPORT_SYMBOL_GPL(refcount_dec); @@ -204,7 +204,7 @@ bool refcount_dec_not_one(refcount_t *r) new = val - 1; if (new > val) { - WARN(new > val, "refcount_t: underflow; use-after-free.\n"); + WARN_ONCE(new > val, "refcount_t: underflow; use-after-free.\n"); return true; } diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 6d861d090e9f..c6f2a37028c2 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -683,33 +683,26 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi) static void cgwb_bdi_destroy(struct backing_dev_info *bdi) { struct radix_tree_iter iter; - struct rb_node *rbn; void **slot; WARN_ON(test_bit(WB_registered, &bdi->wb.state)); spin_lock_irq(&cgwb_lock); - radix_tree_for_each_slot(slot, &bdi->cgwb_tree, &iter, 0) cgwb_kill(*slot); - - while ((rbn = rb_first(&bdi->cgwb_congested_tree))) { - struct bdi_writeback_congested *congested = - rb_entry(rbn, struct bdi_writeback_congested, rb_node); - - rb_erase(rbn, &bdi->cgwb_congested_tree); - congested->bdi = NULL; /* mark @congested unlinked */ - } - spin_unlock_irq(&cgwb_lock); /* - * All cgwb's and their congested states must be shutdown and - * released before returning. Drain the usage counter to wait for - * all cgwb's and cgwb_congested's ever created on @bdi. + * All cgwb's must be shutdown and released before returning. Drain + * the usage counter to wait for all cgwb's ever created on @bdi. */ atomic_dec(&bdi->usage_cnt); wait_event(cgwb_release_wait, !atomic_read(&bdi->usage_cnt)); + /* + * Grab back our reference so that we hold it when @bdi gets + * re-registered. + */ + atomic_inc(&bdi->usage_cnt); } /** @@ -749,6 +742,21 @@ void wb_blkcg_offline(struct blkcg *blkcg) spin_unlock_irq(&cgwb_lock); } +static void cgwb_bdi_exit(struct backing_dev_info *bdi) +{ + struct rb_node *rbn; + + spin_lock_irq(&cgwb_lock); + while ((rbn = rb_first(&bdi->cgwb_congested_tree))) { + struct bdi_writeback_congested *congested = + rb_entry(rbn, struct bdi_writeback_congested, rb_node); + + rb_erase(rbn, &bdi->cgwb_congested_tree); + congested->bdi = NULL; /* mark @congested unlinked */ + } + spin_unlock_irq(&cgwb_lock); +} + #else /* CONFIG_CGROUP_WRITEBACK */ static int cgwb_bdi_init(struct backing_dev_info *bdi) @@ -769,7 +777,9 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi) return 0; } -static void cgwb_bdi_destroy(struct backing_dev_info *bdi) +static void cgwb_bdi_destroy(struct backing_dev_info *bdi) { } + +static void cgwb_bdi_exit(struct backing_dev_info *bdi) { wb_congested_put(bdi->wb_congested); } @@ -857,6 +867,8 @@ int bdi_register_owner(struct backing_dev_info *bdi, struct device *owner) MINOR(owner->devt)); if (rc) return rc; + /* Leaking owner reference... */ + WARN_ON(bdi->owner); bdi->owner = owner; get_device(owner); return 0; @@ -898,6 +910,7 @@ static void bdi_exit(struct backing_dev_info *bdi) { WARN_ON_ONCE(bdi->dev); wb_exit(&bdi->wb); + cgwb_bdi_exit(bdi); } static void release_bdi(struct kref *ref) @@ -226,6 +226,7 @@ struct page *follow_page_mask(struct vm_area_struct *vma, unsigned int *page_mask) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; spinlock_t *ptl; @@ -243,8 +244,13 @@ struct page *follow_page_mask(struct vm_area_struct *vma, pgd = pgd_offset(mm, address); if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) return no_page_table(vma, flags); - - pud = pud_offset(pgd, address); + p4d = p4d_offset(pgd, address); + if (p4d_none(*p4d)) + return no_page_table(vma, flags); + BUILD_BUG_ON(p4d_huge(*p4d)); + if (unlikely(p4d_bad(*p4d))) + return no_page_table(vma, flags); + pud = pud_offset(p4d, address); if (pud_none(*pud)) return no_page_table(vma, flags); if (pud_huge(*pud) && vma->vm_flags & VM_HUGETLB) { @@ -325,6 +331,7 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address, struct page **page) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *pte; @@ -338,7 +345,9 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address, else pgd = pgd_offset_gate(mm, address); BUG_ON(pgd_none(*pgd)); - pud = pud_offset(pgd, address); + p4d = p4d_offset(pgd, address); + BUG_ON(p4d_none(*p4d)); + pud = pud_offset(p4d, address); BUG_ON(pud_none(*pud)); pmd = pmd_offset(pud, address); if (pmd_none(*pmd)) @@ -1400,13 +1409,13 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, return 1; } -static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, +static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { unsigned long next; pud_t *pudp; - pudp = pud_offset(&pgd, addr); + pudp = pud_offset(&p4d, addr); do { pud_t pud = READ_ONCE(*pudp); @@ -1428,6 +1437,31 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, return 1; } +static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end, + int write, struct page **pages, int *nr) +{ + unsigned long next; + p4d_t *p4dp; + + p4dp = p4d_offset(&pgd, addr); + do { + p4d_t p4d = READ_ONCE(*p4dp); + + next = p4d_addr_end(addr, end); + if (p4d_none(p4d)) + return 0; + BUILD_BUG_ON(p4d_huge(p4d)); + if (unlikely(is_hugepd(__hugepd(p4d_val(p4d))))) { + if (!gup_huge_pd(__hugepd(p4d_val(p4d)), addr, + P4D_SHIFT, next, write, pages, nr)) + return 0; + } else if (!gup_p4d_range(p4d, addr, next, write, pages, nr)) + return 0; + } while (p4dp++, addr = next, addr != end); + + return 1; +} + /* * Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to * the regular GUP. It will only return non-negative values. @@ -1478,7 +1512,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr, PGDIR_SHIFT, next, write, pages, &nr)) break; - } else if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) + } else if (!gup_p4d_range(pgd, addr, next, write, pages, &nr)) break; } while (pgdp++, addr = next, addr != end); local_irq_restore(flags); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index d36b2af4d1bf..1ebc93e179f3 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1828,7 +1828,7 @@ static void __split_huge_pud_locked(struct vm_area_struct *vma, pud_t *pud, VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PUD_SIZE, vma); VM_BUG_ON(!pud_trans_huge(*pud) && !pud_devmap(*pud)); - count_vm_event(THP_SPLIT_PMD); + count_vm_event(THP_SPLIT_PUD); pudp_huge_clear_flush_notify(vma, haddr, pud); } @@ -2048,6 +2048,7 @@ void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address, bool freeze, struct page *page) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; @@ -2055,7 +2056,11 @@ void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address, if (!pgd_present(*pgd)) return; - pud = pud_offset(pgd, address); + p4d = p4d_offset(pgd, address); + if (!p4d_present(*p4d)) + return; + + pud = pud_offset(p4d, address); if (!pud_present(*pud)) return; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index a7aa811b7d14..3d0aab9ee80d 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4555,7 +4555,8 @@ out: int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) { pgd_t *pgd = pgd_offset(mm, *addr); - pud_t *pud = pud_offset(pgd, *addr); + p4d_t *p4d = p4d_offset(pgd, *addr); + pud_t *pud = pud_offset(p4d, *addr); BUG_ON(page_count(virt_to_page(ptep)) == 0); if (page_count(virt_to_page(ptep)) == 1) @@ -4586,11 +4587,13 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pte_t *pte = NULL; pgd = pgd_offset(mm, addr); - pud = pud_alloc(mm, pgd, addr); + p4d = p4d_offset(pgd, addr); + pud = pud_alloc(mm, p4d, addr); if (pud) { if (sz == PUD_SIZE) { pte = (pte_t *)pud; @@ -4610,18 +4613,22 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; - pmd_t *pmd = NULL; + pmd_t *pmd; pgd = pgd_offset(mm, addr); - if (pgd_present(*pgd)) { - pud = pud_offset(pgd, addr); - if (pud_present(*pud)) { - if (pud_huge(*pud)) - return (pte_t *)pud; - pmd = pmd_offset(pud, addr); - } - } + if (!pgd_present(*pgd)) + return NULL; + p4d = p4d_offset(pgd, addr); + if (!p4d_present(*p4d)) + return NULL; + pud = pud_offset(p4d, addr); + if (!pud_present(*pud)) + return NULL; + if (pud_huge(*pud)) + return (pte_t *)pud; + pmd = pmd_offset(pud, addr); return (pte_t *) pmd; } diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c index 31238dad85fb..b96a5f773d88 100644 --- a/mm/kasan/kasan_init.c +++ b/mm/kasan/kasan_init.c @@ -30,6 +30,9 @@ */ unsigned char kasan_zero_page[PAGE_SIZE] __page_aligned_bss; +#if CONFIG_PGTABLE_LEVELS > 4 +p4d_t kasan_zero_p4d[PTRS_PER_P4D] __page_aligned_bss; +#endif #if CONFIG_PGTABLE_LEVELS > 3 pud_t kasan_zero_pud[PTRS_PER_PUD] __page_aligned_bss; #endif @@ -82,10 +85,10 @@ static void __init zero_pmd_populate(pud_t *pud, unsigned long addr, } while (pmd++, addr = next, addr != end); } -static void __init zero_pud_populate(pgd_t *pgd, unsigned long addr, +static void __init zero_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long end) { - pud_t *pud = pud_offset(pgd, addr); + pud_t *pud = pud_offset(p4d, addr); unsigned long next; do { @@ -107,6 +110,23 @@ static void __init zero_pud_populate(pgd_t *pgd, unsigned long addr, } while (pud++, addr = next, addr != end); } +static void __init zero_p4d_populate(pgd_t *pgd, unsigned long addr, + unsigned long end) +{ + p4d_t *p4d = p4d_offset(pgd, addr); + unsigned long next; + + do { + next = p4d_addr_end(addr, end); + + if (p4d_none(*p4d)) { + p4d_populate(&init_mm, p4d, + early_alloc(PAGE_SIZE, NUMA_NO_NODE)); + } + zero_pud_populate(p4d, addr, next); + } while (p4d++, addr = next, addr != end); +} + /** * kasan_populate_zero_shadow - populate shadow memory region with * kasan_zero_page @@ -125,6 +145,7 @@ void __init kasan_populate_zero_shadow(const void *shadow_start, next = pgd_addr_end(addr, end); if (IS_ALIGNED(addr, PGDIR_SIZE) && end - addr >= PGDIR_SIZE) { + p4d_t *p4d; pud_t *pud; pmd_t *pmd; @@ -135,9 +156,22 @@ void __init kasan_populate_zero_shadow(const void *shadow_start, * 3,2 - level page tables where we don't have * puds,pmds, so pgd_populate(), pud_populate() * is noops. + * + * The ifndef is required to avoid build breakage. + * + * With 5level-fixup.h, pgd_populate() is not nop and + * we reference kasan_zero_p4d. It's not defined + * unless 5-level paging enabled. + * + * The ifndef can be dropped once all KASAN-enabled + * architectures will switch to pgtable-nop4d.h. */ - pgd_populate(&init_mm, pgd, lm_alias(kasan_zero_pud)); - pud = pud_offset(pgd, addr); +#ifndef __ARCH_HAS_5LEVEL_HACK + pgd_populate(&init_mm, pgd, lm_alias(kasan_zero_p4d)); +#endif + p4d = p4d_offset(pgd, addr); + p4d_populate(&init_mm, p4d, lm_alias(kasan_zero_pud)); + pud = pud_offset(p4d, addr); pud_populate(&init_mm, pud, lm_alias(kasan_zero_pmd)); pmd = pmd_offset(pud, addr); pmd_populate_kernel(&init_mm, pmd, lm_alias(kasan_zero_pte)); @@ -148,6 +182,6 @@ void __init kasan_populate_zero_shadow(const void *shadow_start, pgd_populate(&init_mm, pgd, early_alloc(PAGE_SIZE, NUMA_NO_NODE)); } - zero_pud_populate(pgd, addr, next); + zero_p4d_populate(pgd, addr, next); } while (pgd++, addr = next, addr != end); } diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c index 6f1ed1630873..3a8ddf8baf7d 100644 --- a/mm/kasan/quarantine.c +++ b/mm/kasan/quarantine.c @@ -25,6 +25,7 @@ #include <linux/printk.h> #include <linux/shrinker.h> #include <linux/slab.h> +#include <linux/srcu.h> #include <linux/string.h> #include <linux/types.h> @@ -103,6 +104,7 @@ static int quarantine_tail; /* Total size of all objects in global_quarantine across all batches. */ static unsigned long quarantine_size; static DEFINE_SPINLOCK(quarantine_lock); +DEFINE_STATIC_SRCU(remove_cache_srcu); /* Maximum size of the global queue. */ static unsigned long quarantine_max_size; @@ -173,17 +175,22 @@ void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache) struct qlist_head *q; struct qlist_head temp = QLIST_INIT; + /* + * Note: irq must be disabled until after we move the batch to the + * global quarantine. Otherwise quarantine_remove_cache() can miss + * some objects belonging to the cache if they are in our local temp + * list. quarantine_remove_cache() executes on_each_cpu() at the + * beginning which ensures that it either sees the objects in per-cpu + * lists or in the global quarantine. + */ local_irq_save(flags); q = this_cpu_ptr(&cpu_quarantine); qlist_put(q, &info->quarantine_link, cache->size); - if (unlikely(q->bytes > QUARANTINE_PERCPU_SIZE)) + if (unlikely(q->bytes > QUARANTINE_PERCPU_SIZE)) { qlist_move_all(q, &temp); - local_irq_restore(flags); - - if (unlikely(!qlist_empty(&temp))) { - spin_lock_irqsave(&quarantine_lock, flags); + spin_lock(&quarantine_lock); WRITE_ONCE(quarantine_size, quarantine_size + temp.bytes); qlist_move_all(&temp, &global_quarantine[quarantine_tail]); if (global_quarantine[quarantine_tail].bytes >= @@ -196,20 +203,33 @@ void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache) if (new_tail != quarantine_head) quarantine_tail = new_tail; } - spin_unlock_irqrestore(&quarantine_lock, flags); + spin_unlock(&quarantine_lock); } + + local_irq_restore(flags); } void quarantine_reduce(void) { size_t total_size, new_quarantine_size, percpu_quarantines; unsigned long flags; + int srcu_idx; struct qlist_head to_free = QLIST_INIT; if (likely(READ_ONCE(quarantine_size) <= READ_ONCE(quarantine_max_size))) return; + /* + * srcu critical section ensures that quarantine_remove_cache() + * will not miss objects belonging to the cache while they are in our + * local to_free list. srcu is chosen because (1) it gives us private + * grace period domain that does not interfere with anything else, + * and (2) it allows synchronize_srcu() to return without waiting + * if there are no pending read critical sections (which is the + * expected case). + */ + srcu_idx = srcu_read_lock(&remove_cache_srcu); spin_lock_irqsave(&quarantine_lock, flags); /* @@ -237,6 +257,7 @@ void quarantine_reduce(void) spin_unlock_irqrestore(&quarantine_lock, flags); qlist_free_all(&to_free, NULL); + srcu_read_unlock(&remove_cache_srcu, srcu_idx); } static void qlist_move_cache(struct qlist_head *from, @@ -280,12 +301,28 @@ void quarantine_remove_cache(struct kmem_cache *cache) unsigned long flags, i; struct qlist_head to_free = QLIST_INIT; + /* + * Must be careful to not miss any objects that are being moved from + * per-cpu list to the global quarantine in quarantine_put(), + * nor objects being freed in quarantine_reduce(). on_each_cpu() + * achieves the first goal, while synchronize_srcu() achieves the + * second. + */ on_each_cpu(per_cpu_remove_cache, cache, 1); spin_lock_irqsave(&quarantine_lock, flags); - for (i = 0; i < QUARANTINE_BATCHES; i++) + for (i = 0; i < QUARANTINE_BATCHES; i++) { + if (qlist_empty(&global_quarantine[i])) + continue; qlist_move_cache(&global_quarantine[i], &to_free, cache); + /* Scanning whole quarantine can take a while. */ + spin_unlock_irqrestore(&quarantine_lock, flags); + cond_resched(); + spin_lock_irqsave(&quarantine_lock, flags); + } spin_unlock_irqrestore(&quarantine_lock, flags); qlist_free_all(&to_free, cache); + + synchronize_srcu(&remove_cache_srcu); } diff --git a/mm/madvise.c b/mm/madvise.c index dc5927c812d3..7a2abf0127ae 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -513,7 +513,43 @@ static long madvise_dontneed(struct vm_area_struct *vma, if (!can_madv_dontneed_vma(vma)) return -EINVAL; - userfaultfd_remove(vma, prev, start, end); + if (!userfaultfd_remove(vma, start, end)) { + *prev = NULL; /* mmap_sem has been dropped, prev is stale */ + + down_read(¤t->mm->mmap_sem); + vma = find_vma(current->mm, start); + if (!vma) + return -ENOMEM; + if (start < vma->vm_start) { + /* + * This "vma" under revalidation is the one + * with the lowest vma->vm_start where start + * is also < vma->vm_end. If start < + * vma->vm_start it means an hole materialized + * in the user address space within the + * virtual range passed to MADV_DONTNEED. + */ + return -ENOMEM; + } + if (!can_madv_dontneed_vma(vma)) + return -EINVAL; + if (end > vma->vm_end) { + /* + * Don't fail if end > vma->vm_end. If the old + * vma was splitted while the mmap_sem was + * released the effect of the concurrent + * operation may not cause MADV_DONTNEED to + * have an undefined result. There may be an + * adjacent next vma that we'll walk + * next. userfaultfd_remove() will generate an + * UFFD_EVENT_REMOVE repetition on the + * end-vma->vm_end range, but the manager can + * handle a repetition fine. + */ + end = vma->vm_end; + } + VM_WARN_ON(start >= end); + } zap_page_range(vma, start, end - start); return 0; } @@ -554,8 +590,10 @@ static long madvise_remove(struct vm_area_struct *vma, * mmap_sem. */ get_file(f); - userfaultfd_remove(vma, prev, start, end); - up_read(¤t->mm->mmap_sem); + if (userfaultfd_remove(vma, start, end)) { + /* mmap_sem was not released by userfaultfd_remove() */ + up_read(¤t->mm->mmap_sem); + } error = vfs_fallocate(f, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, offset, end - start); diff --git a/mm/memblock.c b/mm/memblock.c index b64b47803e52..696f06d17c4e 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1118,7 +1118,10 @@ unsigned long __init_memblock memblock_next_valid_pfn(unsigned long pfn, } } while (left < right); - return min(PHYS_PFN(type->regions[right].base), max_pfn); + if (right == type->cnt) + return max_pfn; + else + return min(PHYS_PFN(type->regions[right].base), max_pfn); } /** diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c52ec893e241..2bd7541d7c11 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -466,6 +466,8 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page) struct mem_cgroup_tree_per_node *mctz; mctz = soft_limit_tree_from_page(page); + if (!mctz) + return; /* * Necessary to update all ancestors when hierarchy is used. * because their event counter is not touched. @@ -503,7 +505,8 @@ static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg) for_each_node(nid) { mz = mem_cgroup_nodeinfo(memcg, nid); mctz = soft_limit_tree_node(nid); - mem_cgroup_remove_exceeded(mz, mctz); + if (mctz) + mem_cgroup_remove_exceeded(mz, mctz); } } @@ -2558,7 +2561,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, * is empty. Do it lockless to prevent lock bouncing. Races * are acceptable as soft limit is best effort anyway. */ - if (RB_EMPTY_ROOT(&mctz->rb_root)) + if (!mctz || RB_EMPTY_ROOT(&mctz->rb_root)) return 0; /* @@ -4135,17 +4138,22 @@ static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node) kfree(memcg->nodeinfo[node]); } -static void mem_cgroup_free(struct mem_cgroup *memcg) +static void __mem_cgroup_free(struct mem_cgroup *memcg) { int node; - memcg_wb_domain_exit(memcg); for_each_node(node) free_mem_cgroup_per_node_info(memcg, node); free_percpu(memcg->stat); kfree(memcg); } +static void mem_cgroup_free(struct mem_cgroup *memcg) +{ + memcg_wb_domain_exit(memcg); + __mem_cgroup_free(memcg); +} + static struct mem_cgroup *mem_cgroup_alloc(void) { struct mem_cgroup *memcg; @@ -4196,7 +4204,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void) fail: if (memcg->id.id > 0) idr_remove(&mem_cgroup_idr, memcg->id.id); - mem_cgroup_free(memcg); + __mem_cgroup_free(memcg); return NULL; } diff --git a/mm/memory.c b/mm/memory.c index a97a4cec2e1f..235ba51b2fbf 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -445,7 +445,7 @@ static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud, mm_dec_nr_pmds(tlb->mm); } -static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, +static inline void free_pud_range(struct mmu_gather *tlb, p4d_t *p4d, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) { @@ -454,7 +454,7 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, unsigned long start; start = addr; - pud = pud_offset(pgd, addr); + pud = pud_offset(p4d, addr); do { next = pud_addr_end(addr, end); if (pud_none_or_clear_bad(pud)) @@ -462,6 +462,39 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, free_pmd_range(tlb, pud, addr, next, floor, ceiling); } while (pud++, addr = next, addr != end); + start &= P4D_MASK; + if (start < floor) + return; + if (ceiling) { + ceiling &= P4D_MASK; + if (!ceiling) + return; + } + if (end - 1 > ceiling - 1) + return; + + pud = pud_offset(p4d, start); + p4d_clear(p4d); + pud_free_tlb(tlb, pud, start); +} + +static inline void free_p4d_range(struct mmu_gather *tlb, pgd_t *pgd, + unsigned long addr, unsigned long end, + unsigned long floor, unsigned long ceiling) +{ + p4d_t *p4d; + unsigned long next; + unsigned long start; + + start = addr; + p4d = p4d_offset(pgd, addr); + do { + next = p4d_addr_end(addr, end); + if (p4d_none_or_clear_bad(p4d)) + continue; + free_pud_range(tlb, p4d, addr, next, floor, ceiling); + } while (p4d++, addr = next, addr != end); + start &= PGDIR_MASK; if (start < floor) return; @@ -473,9 +506,9 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, if (end - 1 > ceiling - 1) return; - pud = pud_offset(pgd, start); + p4d = p4d_offset(pgd, start); pgd_clear(pgd); - pud_free_tlb(tlb, pud, start); + p4d_free_tlb(tlb, p4d, start); } /* @@ -539,7 +572,7 @@ void free_pgd_range(struct mmu_gather *tlb, next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; - free_pud_range(tlb, pgd, addr, next, floor, ceiling); + free_p4d_range(tlb, pgd, addr, next, floor, ceiling); } while (pgd++, addr = next, addr != end); } @@ -658,7 +691,8 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr, pte_t pte, struct page *page) { pgd_t *pgd = pgd_offset(vma->vm_mm, addr); - pud_t *pud = pud_offset(pgd, addr); + p4d_t *p4d = p4d_offset(pgd, addr); + pud_t *pud = pud_offset(p4d, addr); pmd_t *pmd = pmd_offset(pud, addr); struct address_space *mapping; pgoff_t index; @@ -1023,16 +1057,16 @@ static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src } static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, - pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma, + p4d_t *dst_p4d, p4d_t *src_p4d, struct vm_area_struct *vma, unsigned long addr, unsigned long end) { pud_t *src_pud, *dst_pud; unsigned long next; - dst_pud = pud_alloc(dst_mm, dst_pgd, addr); + dst_pud = pud_alloc(dst_mm, dst_p4d, addr); if (!dst_pud) return -ENOMEM; - src_pud = pud_offset(src_pgd, addr); + src_pud = pud_offset(src_p4d, addr); do { next = pud_addr_end(addr, end); if (pud_trans_huge(*src_pud) || pud_devmap(*src_pud)) { @@ -1056,6 +1090,28 @@ static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src return 0; } +static inline int copy_p4d_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, + pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma, + unsigned long addr, unsigned long end) +{ + p4d_t *src_p4d, *dst_p4d; + unsigned long next; + + dst_p4d = p4d_alloc(dst_mm, dst_pgd, addr); + if (!dst_p4d) + return -ENOMEM; + src_p4d = p4d_offset(src_pgd, addr); + do { + next = p4d_addr_end(addr, end); + if (p4d_none_or_clear_bad(src_p4d)) + continue; + if (copy_pud_range(dst_mm, src_mm, dst_p4d, src_p4d, + vma, addr, next)) + return -ENOMEM; + } while (dst_p4d++, src_p4d++, addr = next, addr != end); + return 0; +} + int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, struct vm_area_struct *vma) { @@ -1111,7 +1167,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(src_pgd)) continue; - if (unlikely(copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd, + if (unlikely(copy_p4d_range(dst_mm, src_mm, dst_pgd, src_pgd, vma, addr, next))) { ret = -ENOMEM; break; @@ -1267,14 +1323,14 @@ next: } static inline unsigned long zap_pud_range(struct mmu_gather *tlb, - struct vm_area_struct *vma, pgd_t *pgd, + struct vm_area_struct *vma, p4d_t *p4d, unsigned long addr, unsigned long end, struct zap_details *details) { pud_t *pud; unsigned long next; - pud = pud_offset(pgd, addr); + pud = pud_offset(p4d, addr); do { next = pud_addr_end(addr, end); if (pud_trans_huge(*pud) || pud_devmap(*pud)) { @@ -1295,6 +1351,25 @@ next: return addr; } +static inline unsigned long zap_p4d_range(struct mmu_gather *tlb, + struct vm_area_struct *vma, pgd_t *pgd, + unsigned long addr, unsigned long end, + struct zap_details *details) +{ + p4d_t *p4d; + unsigned long next; + + p4d = p4d_offset(pgd, addr); + do { + next = p4d_addr_end(addr, end); + if (p4d_none_or_clear_bad(p4d)) + continue; + next = zap_pud_range(tlb, vma, p4d, addr, next, details); + } while (p4d++, addr = next, addr != end); + + return addr; +} + void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long addr, unsigned long end, @@ -1310,7 +1385,7 @@ void unmap_page_range(struct mmu_gather *tlb, next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; - next = zap_pud_range(tlb, vma, pgd, addr, next, details); + next = zap_p4d_range(tlb, vma, pgd, addr, next, details); } while (pgd++, addr = next, addr != end); tlb_end_vma(tlb, vma); } @@ -1465,16 +1540,24 @@ EXPORT_SYMBOL_GPL(zap_vma_ptes); pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl) { - pgd_t *pgd = pgd_offset(mm, addr); - pud_t *pud = pud_alloc(mm, pgd, addr); - if (pud) { - pmd_t *pmd = pmd_alloc(mm, pud, addr); - if (pmd) { - VM_BUG_ON(pmd_trans_huge(*pmd)); - return pte_alloc_map_lock(mm, pmd, addr, ptl); - } - } - return NULL; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + pgd = pgd_offset(mm, addr); + p4d = p4d_alloc(mm, pgd, addr); + if (!p4d) + return NULL; + pud = pud_alloc(mm, p4d, addr); + if (!pud) + return NULL; + pmd = pmd_alloc(mm, pud, addr); + if (!pmd) + return NULL; + + VM_BUG_ON(pmd_trans_huge(*pmd)); + return pte_alloc_map_lock(mm, pmd, addr, ptl); } /* @@ -1740,7 +1823,7 @@ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud, return 0; } -static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd, +static inline int remap_pud_range(struct mm_struct *mm, p4d_t *p4d, unsigned long addr, unsigned long end, unsigned long pfn, pgprot_t prot) { @@ -1748,7 +1831,7 @@ static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd, unsigned long next; pfn -= addr >> PAGE_SHIFT; - pud = pud_alloc(mm, pgd, addr); + pud = pud_alloc(mm, p4d, addr); if (!pud) return -ENOMEM; do { @@ -1760,6 +1843,26 @@ static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd, return 0; } +static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd, + unsigned long addr, unsigned long end, + unsigned long pfn, pgprot_t prot) +{ + p4d_t *p4d; + unsigned long next; + + pfn -= addr >> PAGE_SHIFT; + p4d = p4d_alloc(mm, pgd, addr); + if (!p4d) + return -ENOMEM; + do { + next = p4d_addr_end(addr, end); + if (remap_pud_range(mm, p4d, addr, next, + pfn + (addr >> PAGE_SHIFT), prot)) + return -ENOMEM; + } while (p4d++, addr = next, addr != end); + return 0; +} + /** * remap_pfn_range - remap kernel memory to userspace * @vma: user vma to map to @@ -1816,7 +1919,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, flush_cache_range(vma, addr, end); do { next = pgd_addr_end(addr, end); - err = remap_pud_range(mm, pgd, addr, next, + err = remap_p4d_range(mm, pgd, addr, next, pfn + (addr >> PAGE_SHIFT), prot); if (err) break; @@ -1932,7 +2035,7 @@ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud, return err; } -static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd, +static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d, unsigned long addr, unsigned long end, pte_fn_t fn, void *data) { @@ -1940,7 +2043,7 @@ static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd, unsigned long next; int err; - pud = pud_alloc(mm, pgd, addr); + pud = pud_alloc(mm, p4d, addr); if (!pud) return -ENOMEM; do { @@ -1952,6 +2055,26 @@ static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd, return err; } +static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd, + unsigned long addr, unsigned long end, + pte_fn_t fn, void *data) +{ + p4d_t *p4d; + unsigned long next; + int err; + + p4d = p4d_alloc(mm, pgd, addr); + if (!p4d) + return -ENOMEM; + do { + next = p4d_addr_end(addr, end); + err = apply_to_pud_range(mm, p4d, addr, next, fn, data); + if (err) + break; + } while (p4d++, addr = next, addr != end); + return err; +} + /* * Scan a region of virtual memory, filling in page tables as necessary * and calling a provided function on each leaf page table. @@ -1970,7 +2093,7 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr, pgd = pgd_offset(mm, addr); do { next = pgd_addr_end(addr, end); - err = apply_to_pud_range(mm, pgd, addr, next, fn, data); + err = apply_to_p4d_range(mm, pgd, addr, next, fn, data); if (err) break; } while (pgd++, addr = next, addr != end); @@ -3653,11 +3776,15 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, }; struct mm_struct *mm = vma->vm_mm; pgd_t *pgd; + p4d_t *p4d; int ret; pgd = pgd_offset(mm, address); + p4d = p4d_alloc(mm, pgd, address); + if (!p4d) + return VM_FAULT_OOM; - vmf.pud = pud_alloc(mm, pgd, address); + vmf.pud = pud_alloc(mm, p4d, address); if (!vmf.pud) return VM_FAULT_OOM; if (pud_none(*vmf.pud) && transparent_hugepage_enabled(vma)) { @@ -3779,12 +3906,35 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address, } EXPORT_SYMBOL_GPL(handle_mm_fault); +#ifndef __PAGETABLE_P4D_FOLDED +/* + * Allocate p4d page table. + * We've already handled the fast-path in-line. + */ +int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) +{ + p4d_t *new = p4d_alloc_one(mm, address); + if (!new) + return -ENOMEM; + + smp_wmb(); /* See comment in __pte_alloc */ + + spin_lock(&mm->page_table_lock); + if (pgd_present(*pgd)) /* Another has populated it */ + p4d_free(mm, new); + else + pgd_populate(mm, pgd, new); + spin_unlock(&mm->page_table_lock); + return 0; +} +#endif /* __PAGETABLE_P4D_FOLDED */ + #ifndef __PAGETABLE_PUD_FOLDED /* * Allocate page upper directory. * We've already handled the fast-path in-line. */ -int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) +int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address) { pud_t *new = pud_alloc_one(mm, address); if (!new) @@ -3793,10 +3943,17 @@ int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) smp_wmb(); /* See comment in __pte_alloc */ spin_lock(&mm->page_table_lock); - if (pgd_present(*pgd)) /* Another has populated it */ +#ifndef __ARCH_HAS_5LEVEL_HACK + if (p4d_present(*p4d)) /* Another has populated it */ pud_free(mm, new); else - pgd_populate(mm, pgd, new); + p4d_populate(mm, p4d, new); +#else + if (pgd_present(*p4d)) /* Another has populated it */ + pud_free(mm, new); + else + pgd_populate(mm, p4d, new); +#endif /* __ARCH_HAS_5LEVEL_HACK */ spin_unlock(&mm->page_table_lock); return 0; } @@ -3839,6 +3996,7 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address, pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *ptep; @@ -3847,7 +4005,11 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address, if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) goto out; - pud = pud_offset(pgd, address); + p4d = p4d_offset(pgd, address); + if (p4d_none(*p4d) || unlikely(p4d_bad(*p4d))) + goto out; + + pud = pud_offset(p4d, address); if (pud_none(*pud) || unlikely(pud_bad(*pud))) goto out; diff --git a/mm/mlock.c b/mm/mlock.c index 1050511f8b2b..0dd9ca18e19e 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -380,6 +380,7 @@ static unsigned long __munlock_pagevec_fill(struct pagevec *pvec, pte = get_locked_pte(vma->vm_mm, start, &ptl); /* Make sure we do not cross the page table boundary */ end = pgd_addr_end(start, end); + end = p4d_addr_end(start, end); end = pud_addr_end(start, end); end = pmd_addr_end(start, end); @@ -442,7 +443,7 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, while (start < end) { struct page *page; - unsigned int page_mask; + unsigned int page_mask = 0; unsigned long page_increm; struct pagevec pvec; struct zone *zone; @@ -456,8 +457,7 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, * suits munlock very well (and if somehow an abnormal page * has sneaked into the range, we won't oops here: great). */ - page = follow_page_mask(vma, start, FOLL_GET | FOLL_DUMP, - &page_mask); + page = follow_page(vma, start, FOLL_GET | FOLL_DUMP); if (page && !IS_ERR(page)) { if (PageTransTail(page)) { @@ -468,8 +468,8 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, /* * Any THP page found by follow_page_mask() may * have gotten split before reaching - * munlock_vma_page(), so we need to recompute - * the page_mask here. + * munlock_vma_page(), so we need to compute + * the page_mask here instead. */ page_mask = munlock_vma_page(page); unlock_page(page); diff --git a/mm/mprotect.c b/mm/mprotect.c index 848e946b08e5..8edd0d576254 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -193,14 +193,14 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, } static inline unsigned long change_pud_range(struct vm_area_struct *vma, - pgd_t *pgd, unsigned long addr, unsigned long end, + p4d_t *p4d, unsigned long addr, unsigned long end, pgprot_t newprot, int dirty_accountable, int prot_numa) { pud_t *pud; unsigned long next; unsigned long pages = 0; - pud = pud_offset(pgd, addr); + pud = pud_offset(p4d, addr); do { next = pud_addr_end(addr, end); if (pud_none_or_clear_bad(pud)) @@ -212,6 +212,26 @@ static inline unsigned long change_pud_range(struct vm_area_struct *vma, return pages; } +static inline unsigned long change_p4d_range(struct vm_area_struct *vma, + pgd_t *pgd, unsigned long addr, unsigned long end, + pgprot_t newprot, int dirty_accountable, int prot_numa) +{ + p4d_t *p4d; + unsigned long next; + unsigned long pages = 0; + + p4d = p4d_offset(pgd, addr); + do { + next = p4d_addr_end(addr, end); + if (p4d_none_or_clear_bad(p4d)) + continue; + pages += change_pud_range(vma, p4d, addr, next, newprot, + dirty_accountable, prot_numa); + } while (p4d++, addr = next, addr != end); + + return pages; +} + static unsigned long change_protection_range(struct vm_area_struct *vma, unsigned long addr, unsigned long end, pgprot_t newprot, int dirty_accountable, int prot_numa) @@ -230,7 +250,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma, next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; - pages += change_pud_range(vma, pgd, addr, next, newprot, + pages += change_p4d_range(vma, pgd, addr, next, newprot, dirty_accountable, prot_numa); } while (pgd++, addr = next, addr != end); diff --git a/mm/mremap.c b/mm/mremap.c index 8233b0105c82..cd8a1b199ef9 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -32,6 +32,7 @@ static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; @@ -39,7 +40,11 @@ static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr) if (pgd_none_or_clear_bad(pgd)) return NULL; - pud = pud_offset(pgd, addr); + p4d = p4d_offset(pgd, addr); + if (p4d_none_or_clear_bad(p4d)) + return NULL; + + pud = pud_offset(p4d, addr); if (pud_none_or_clear_bad(pud)) return NULL; @@ -54,11 +59,15 @@ static pmd_t *alloc_new_pmd(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; pgd = pgd_offset(mm, addr); - pud = pud_alloc(mm, pgd, addr); + p4d = p4d_alloc(mm, pgd, addr); + if (!p4d) + return NULL; + pud = pud_alloc(mm, p4d, addr); if (!pud) return NULL; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index eaa64d2ffdc5..6cbde310abed 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -873,7 +873,8 @@ done_merging: higher_page = page + (combined_pfn - pfn); buddy_pfn = __find_buddy_pfn(combined_pfn, order + 1); higher_buddy = higher_page + (buddy_pfn - combined_pfn); - if (page_is_buddy(higher_page, higher_buddy, order + 1)) { + if (pfn_valid_within(buddy_pfn) && + page_is_buddy(higher_page, higher_buddy, order + 1)) { list_add_tail(&page->lru, &zone->free_area[order].free_list[migratetype]); goto out; diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index a23001a22c15..c4c9def8ffea 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -104,6 +104,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) struct mm_struct *mm = pvmw->vma->vm_mm; struct page *page = pvmw->page; pgd_t *pgd; + p4d_t *p4d; pud_t *pud; /* The only possible pmd mapping has been handled on last iteration */ @@ -133,7 +134,10 @@ restart: pgd = pgd_offset(mm, pvmw->address); if (!pgd_present(*pgd)) return false; - pud = pud_offset(pgd, pvmw->address); + p4d = p4d_offset(pgd, pvmw->address); + if (!p4d_present(*p4d)) + return false; + pud = pud_offset(p4d, pvmw->address); if (!pud_present(*pud)) return false; pvmw->pmd = pmd_offset(pud, pvmw->address); diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 03761577ae86..60f7856e508f 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -69,14 +69,14 @@ again: return err; } -static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end, +static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, struct mm_walk *walk) { pud_t *pud; unsigned long next; int err = 0; - pud = pud_offset(pgd, addr); + pud = pud_offset(p4d, addr); do { again: next = pud_addr_end(addr, end); @@ -113,6 +113,32 @@ static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end, return err; } +static int walk_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end, + struct mm_walk *walk) +{ + p4d_t *p4d; + unsigned long next; + int err = 0; + + p4d = p4d_offset(pgd, addr); + do { + next = p4d_addr_end(addr, end); + if (p4d_none_or_clear_bad(p4d)) { + if (walk->pte_hole) + err = walk->pte_hole(addr, next, walk); + if (err) + break; + continue; + } + if (walk->pmd_entry || walk->pte_entry) + err = walk_pud_range(p4d, addr, next, walk); + if (err) + break; + } while (p4d++, addr = next, addr != end); + + return err; +} + static int walk_pgd_range(unsigned long addr, unsigned long end, struct mm_walk *walk) { @@ -131,7 +157,7 @@ static int walk_pgd_range(unsigned long addr, unsigned long end, continue; } if (walk->pmd_entry || walk->pte_entry) - err = walk_pud_range(pgd, addr, next, walk); + err = walk_p4d_range(pgd, addr, next, walk); if (err) break; } while (pgd++, addr = next, addr != end); diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index 4ed5908c65b0..c99d9512a45b 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c @@ -22,6 +22,12 @@ void pgd_clear_bad(pgd_t *pgd) pgd_clear(pgd); } +void p4d_clear_bad(p4d_t *p4d) +{ + p4d_ERROR(*p4d); + p4d_clear(p4d); +} + void pud_clear_bad(pud_t *pud) { pud_ERROR(*pud); diff --git a/mm/rmap.c b/mm/rmap.c index 2da487d6cea8..49ed681ccc7b 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -684,6 +684,7 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd = NULL; pmd_t pmde; @@ -692,7 +693,11 @@ pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address) if (!pgd_present(*pgd)) goto out; - pud = pud_offset(pgd, address); + p4d = p4d_offset(pgd, address); + if (!p4d_present(*p4d)) + goto out; + + pud = pud_offset(p4d, address); if (!pud_present(*pud)) goto out; @@ -1316,12 +1321,6 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, } while (page_vma_mapped_walk(&pvmw)) { - subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte); - address = pvmw.address; - - /* Unexpected PMD-mapped THP? */ - VM_BUG_ON_PAGE(!pvmw.pte, page); - /* * If the page is mlock()d, we cannot swap it out. * If it's recently referenced (perhaps page_referenced @@ -1345,6 +1344,13 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, continue; } + /* Unexpected PMD-mapped THP? */ + VM_BUG_ON_PAGE(!pvmw.pte, page); + + subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte); + address = pvmw.address; + + if (!(flags & TTU_IGNORE_ACCESS)) { if (ptep_clear_flush_young_notify(vma, address, pvmw.pte)) { diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 574c67b663fe..a56c3989f773 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -196,9 +196,9 @@ pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node) return pmd; } -pud_t * __meminit vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node) +pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node) { - pud_t *pud = pud_offset(pgd, addr); + pud_t *pud = pud_offset(p4d, addr); if (pud_none(*pud)) { void *p = vmemmap_alloc_block(PAGE_SIZE, node); if (!p) @@ -208,6 +208,18 @@ pud_t * __meminit vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node) return pud; } +p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node) +{ + p4d_t *p4d = p4d_offset(pgd, addr); + if (p4d_none(*p4d)) { + void *p = vmemmap_alloc_block(PAGE_SIZE, node); + if (!p) + return NULL; + p4d_populate(&init_mm, p4d, p); + } + return p4d; +} + pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node) { pgd_t *pgd = pgd_offset_k(addr); @@ -225,6 +237,7 @@ int __meminit vmemmap_populate_basepages(unsigned long start, { unsigned long addr = start; pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *pte; @@ -233,7 +246,10 @@ int __meminit vmemmap_populate_basepages(unsigned long start, pgd = vmemmap_pgd_populate(addr, node); if (!pgd) return -ENOMEM; - pud = vmemmap_pud_populate(pgd, addr, node); + p4d = vmemmap_p4d_populate(pgd, addr, node); + if (!p4d) + return -ENOMEM; + pud = vmemmap_pud_populate(p4d, addr, node); if (!pud) return -ENOMEM; pmd = vmemmap_pmd_populate(pud, addr, node); diff --git a/mm/swapfile.c b/mm/swapfile.c index 521ef9b6064f..178130880b90 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1517,7 +1517,7 @@ static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud, return 0; } -static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd, +static inline int unuse_pud_range(struct vm_area_struct *vma, p4d_t *p4d, unsigned long addr, unsigned long end, swp_entry_t entry, struct page *page) { @@ -1525,7 +1525,7 @@ static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd, unsigned long next; int ret; - pud = pud_offset(pgd, addr); + pud = pud_offset(p4d, addr); do { next = pud_addr_end(addr, end); if (pud_none_or_clear_bad(pud)) @@ -1537,6 +1537,26 @@ static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd, return 0; } +static inline int unuse_p4d_range(struct vm_area_struct *vma, pgd_t *pgd, + unsigned long addr, unsigned long end, + swp_entry_t entry, struct page *page) +{ + p4d_t *p4d; + unsigned long next; + int ret; + + p4d = p4d_offset(pgd, addr); + do { + next = p4d_addr_end(addr, end); + if (p4d_none_or_clear_bad(p4d)) + continue; + ret = unuse_pud_range(vma, p4d, addr, next, entry, page); + if (ret) + return ret; + } while (p4d++, addr = next, addr != end); + return 0; +} + static int unuse_vma(struct vm_area_struct *vma, swp_entry_t entry, struct page *page) { @@ -1560,7 +1580,7 @@ static int unuse_vma(struct vm_area_struct *vma, next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; - ret = unuse_pud_range(vma, pgd, addr, next, entry, page); + ret = unuse_p4d_range(vma, pgd, addr, next, entry, page); if (ret) return ret; } while (pgd++, addr = next, addr != end); diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 479e631d43c2..8bcb501bce60 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -128,19 +128,22 @@ out_unlock: static pmd_t *mm_alloc_pmd(struct mm_struct *mm, unsigned long address) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; - pmd_t *pmd = NULL; pgd = pgd_offset(mm, address); - pud = pud_alloc(mm, pgd, address); - if (pud) - /* - * Note that we didn't run this because the pmd was - * missing, the *pmd may be already established and in - * turn it may also be a trans_huge_pmd. - */ - pmd = pmd_alloc(mm, pud, address); - return pmd; + p4d = p4d_alloc(mm, pgd, address); + if (!p4d) + return NULL; + pud = pud_alloc(mm, p4d, address); + if (!pud) + return NULL; + /* + * Note that we didn't run this because the pmd was + * missing, the *pmd may be already established and in + * turn it may also be a trans_huge_pmd. + */ + return pmd_alloc(mm, pud, address); } #ifdef CONFIG_HUGETLB_PAGE diff --git a/mm/vmalloc.c b/mm/vmalloc.c index b4024d688f38..0dd80222b20b 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -86,12 +86,12 @@ static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end) } while (pmd++, addr = next, addr != end); } -static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end) +static void vunmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end) { pud_t *pud; unsigned long next; - pud = pud_offset(pgd, addr); + pud = pud_offset(p4d, addr); do { next = pud_addr_end(addr, end); if (pud_clear_huge(pud)) @@ -102,6 +102,22 @@ static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end) } while (pud++, addr = next, addr != end); } +static void vunmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end) +{ + p4d_t *p4d; + unsigned long next; + + p4d = p4d_offset(pgd, addr); + do { + next = p4d_addr_end(addr, end); + if (p4d_clear_huge(p4d)) + continue; + if (p4d_none_or_clear_bad(p4d)) + continue; + vunmap_pud_range(p4d, addr, next); + } while (p4d++, addr = next, addr != end); +} + static void vunmap_page_range(unsigned long addr, unsigned long end) { pgd_t *pgd; @@ -113,7 +129,7 @@ static void vunmap_page_range(unsigned long addr, unsigned long end) next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; - vunmap_pud_range(pgd, addr, next); + vunmap_p4d_range(pgd, addr, next); } while (pgd++, addr = next, addr != end); } @@ -160,13 +176,13 @@ static int vmap_pmd_range(pud_t *pud, unsigned long addr, return 0; } -static int vmap_pud_range(pgd_t *pgd, unsigned long addr, +static int vmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, pgprot_t prot, struct page **pages, int *nr) { pud_t *pud; unsigned long next; - pud = pud_alloc(&init_mm, pgd, addr); + pud = pud_alloc(&init_mm, p4d, addr); if (!pud) return -ENOMEM; do { @@ -177,6 +193,23 @@ static int vmap_pud_range(pgd_t *pgd, unsigned long addr, return 0; } +static int vmap_p4d_range(pgd_t *pgd, unsigned long addr, + unsigned long end, pgprot_t prot, struct page **pages, int *nr) +{ + p4d_t *p4d; + unsigned long next; + + p4d = p4d_alloc(&init_mm, pgd, addr); + if (!p4d) + return -ENOMEM; + do { + next = p4d_addr_end(addr, end); + if (vmap_pud_range(p4d, addr, next, prot, pages, nr)) + return -ENOMEM; + } while (p4d++, addr = next, addr != end); + return 0; +} + /* * Set up page tables in kva (addr, end). The ptes shall have prot "prot", and * will have pfns corresponding to the "pages" array. @@ -196,7 +229,7 @@ static int vmap_page_range_noflush(unsigned long start, unsigned long end, pgd = pgd_offset_k(addr); do { next = pgd_addr_end(addr, end); - err = vmap_pud_range(pgd, addr, next, prot, pages, &nr); + err = vmap_p4d_range(pgd, addr, next, prot, pages, &nr); if (err) return err; } while (pgd++, addr = next, addr != end); @@ -237,6 +270,10 @@ struct page *vmalloc_to_page(const void *vmalloc_addr) unsigned long addr = (unsigned long) vmalloc_addr; struct page *page = NULL; pgd_t *pgd = pgd_offset_k(addr); + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + pte_t *ptep, pte; /* * XXX we might need to change this if we add VIRTUAL_BUG_ON for @@ -244,21 +281,23 @@ struct page *vmalloc_to_page(const void *vmalloc_addr) */ VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr)); - if (!pgd_none(*pgd)) { - pud_t *pud = pud_offset(pgd, addr); - if (!pud_none(*pud)) { - pmd_t *pmd = pmd_offset(pud, addr); - if (!pmd_none(*pmd)) { - pte_t *ptep, pte; - - ptep = pte_offset_map(pmd, addr); - pte = *ptep; - if (pte_present(pte)) - page = pte_page(pte); - pte_unmap(ptep); - } - } - } + if (pgd_none(*pgd)) + return NULL; + p4d = p4d_offset(pgd, addr); + if (p4d_none(*p4d)) + return NULL; + pud = pud_offset(p4d, addr); + if (pud_none(*pud)) + return NULL; + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) + return NULL; + + ptep = pte_offset_map(pmd, addr); + pte = *ptep; + if (pte_present(pte)) + page = pte_page(pte); + pte_unmap(ptep); return page; } EXPORT_SYMBOL(vmalloc_to_page); diff --git a/mm/vmstat.c b/mm/vmstat.c index 69f9aff39a2e..b1947f0cbee2 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1065,6 +1065,9 @@ const char * const vmstat_text[] = { "thp_split_page_failed", "thp_deferred_split_page", "thp_split_pmd", +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD + "thp_split_pud", +#endif "thp_zero_page_alloc", "thp_zero_page_alloc_failed", #endif diff --git a/scripts/gcc-plugins/sancov_plugin.c b/scripts/gcc-plugins/sancov_plugin.c index 9b0b5cbc5b89..0f98634c20a0 100644 --- a/scripts/gcc-plugins/sancov_plugin.c +++ b/scripts/gcc-plugins/sancov_plugin.c @@ -133,7 +133,7 @@ __visible int plugin_init(struct plugin_name_args *plugin_info, struct plugin_gc #if BUILDING_GCC_VERSION < 6000 register_callback(plugin_name, PLUGIN_START_UNIT, &sancov_start_unit, NULL); register_callback(plugin_name, PLUGIN_REGISTER_GGC_ROOTS, NULL, (void *)>_ggc_r_gt_sancov); - register_callback(plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL, &sancov_plugin_pass_info); + register_callback(plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL, &sancov_pass_info); #endif return 0; diff --git a/scripts/module-common.lds b/scripts/module-common.lds index cf7e52e4781b..9b6e246a45d0 100644 --- a/scripts/module-common.lds +++ b/scripts/module-common.lds @@ -22,4 +22,6 @@ SECTIONS { . = ALIGN(8); .init_array 0 : { *(SORT(.init_array.*)) *(.init_array) } + + __jump_table 0 : ALIGN(8) { KEEP(*(__jump_table)) } } diff --git a/scripts/spelling.txt b/scripts/spelling.txt index 0458b037c8a1..0545f5a8cabe 100644 --- a/scripts/spelling.txt +++ b/scripts/spelling.txt @@ -372,6 +372,8 @@ disassocation||disassociation disapear||disappear disapeared||disappeared disappared||disappeared +disble||disable +disbled||disabled disconnet||disconnect discontinous||discontinuous dispertion||dispersion @@ -732,6 +734,7 @@ oustanding||outstanding overaall||overall overhread||overhead overlaping||overlapping +overide||override overrided||overridden overriden||overridden overun||overrun diff --git a/sound/soc/amd/acp-pcm-dma.c b/sound/soc/amd/acp-pcm-dma.c index ec1067a679da..08b1399d1da2 100644 --- a/sound/soc/amd/acp-pcm-dma.c +++ b/sound/soc/amd/acp-pcm-dma.c @@ -89,7 +89,7 @@ static void acp_reg_write(u32 val, void __iomem *acp_mmio, u32 reg) writel(val, acp_mmio + (reg * 4)); } -/* Configure a given dma channel parameters - enable/disble, +/* Configure a given dma channel parameters - enable/disable, * number of descriptors, priority */ static void config_acp_dma_channel(void __iomem *acp_mmio, u8 ch_num, diff --git a/tools/lguest/lguest.c b/tools/lguest/lguest.c index 11c8d9bc762e..5d19fdf80292 100644 --- a/tools/lguest/lguest.c +++ b/tools/lguest/lguest.c @@ -1387,7 +1387,7 @@ static bool pci_data_iowrite(u16 port, u32 mask, u32 val) /* Allow writing to any other BAR, or expansion ROM */ iowrite(portoff, val, mask, &d->config_words[reg]); return true; - /* We let them overide latency timer and cacheline size */ + /* We let them override latency timer and cacheline size */ } else if (&d->config_words[reg] == (void *)&d->config.cacheline_size) { /* Only let them change the first two fields. */ if (mask == 0xFFFFFFFF) diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index e2efddf10231..1f5300e56b44 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -132,7 +132,7 @@ else Q = @ endif -# Disable command line variables (CFLAGS) overide from top +# Disable command line variables (CFLAGS) override from top # level Makefile (perf), otherwise build Makefile will get # the same command line setup. MAKEOVERRIDES= diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile index 47076b15eebe..9b8555ea3459 100644 --- a/tools/lib/traceevent/Makefile +++ b/tools/lib/traceevent/Makefile @@ -135,7 +135,7 @@ else Q = @ endif -# Disable command line variables (CFLAGS) overide from top +# Disable command line variables (CFLAGS) override from top # level Makefile (perf), otherwise build Makefile will get # the same command line setup. MAKEOVERRIDES= diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index 66342804161c..0c03538df74c 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -140,7 +140,7 @@ struct pevent_plugin_option { * struct pevent_plugin_option PEVENT_PLUGIN_OPTIONS[] = { * { * .name = "option-name", - * .plugin_alias = "overide-file-name", (optional) + * .plugin_alias = "override-file-name", (optional) * .description = "description of option to show users", * }, * { diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 4cfdbb5b6967..066086dd59a8 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -805,11 +805,20 @@ static struct rela *find_switch_table(struct objtool_file *file, insn->jump_dest->offset > orig_insn->offset)) break; + /* look for a relocation which references .rodata */ text_rela = find_rela_by_dest_range(insn->sec, insn->offset, insn->len); - if (text_rela && text_rela->sym == file->rodata->sym) - return find_rela_by_dest(file->rodata, - text_rela->addend); + if (!text_rela || text_rela->sym != file->rodata->sym) + continue; + + /* + * Make sure the .rodata address isn't associated with a + * symbol. gcc jump tables are anonymous data. + */ + if (find_symbol_containing(file->rodata, text_rela->addend)) + continue; + + return find_rela_by_dest(file->rodata, text_rela->addend); } return NULL; diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 0d7983ac63ef..d897702ce742 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -85,6 +85,18 @@ struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset) return NULL; } +struct symbol *find_symbol_containing(struct section *sec, unsigned long offset) +{ + struct symbol *sym; + + list_for_each_entry(sym, &sec->symbol_list, list) + if (sym->type != STT_SECTION && + offset >= sym->offset && offset < sym->offset + sym->len) + return sym; + + return NULL; +} + struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset, unsigned int len) { diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h index aa1ff6596684..731973e1a3f5 100644 --- a/tools/objtool/elf.h +++ b/tools/objtool/elf.h @@ -79,6 +79,7 @@ struct elf { struct elf *elf_open(const char *name); struct section *find_section_by_name(struct elf *elf, const char *name); struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset); +struct symbol *find_symbol_containing(struct section *sec, unsigned long offset); struct rela *find_rela_by_dest(struct section *sec, unsigned long offset); struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset, unsigned int len); diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c index 7913363bde5c..4f3c758d875d 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c @@ -31,7 +31,7 @@ #error Instruction buffer size too small #endif -/* Based on branch_type() from perf_event_intel_lbr.c */ +/* Based on branch_type() from arch/x86/events/intel/lbr.c */ static void intel_pt_insn_decoder(struct insn *insn, struct intel_pt_insn *intel_pt_insn) { diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 6e4eb2fc2d1e..0c8b61f8398e 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -1880,6 +1880,7 @@ sub get_grub_index { sub wait_for_input { my ($fp, $time) = @_; + my $start_time; my $rin; my $rout; my $nr; @@ -1895,17 +1896,22 @@ sub wait_for_input vec($rin, fileno($fp), 1) = 1; vec($rin, fileno(\*STDIN), 1) = 1; + $start_time = time; + while (1) { $nr = select($rout=$rin, undef, undef, $time); - if ($nr <= 0) { - return undef; - } + last if ($nr <= 0); # copy data from stdin to the console if (vec($rout, fileno(\*STDIN), 1) == 1) { - sysread(\*STDIN, $buf, 1000); - syswrite($fp, $buf, 1000); + $nr = sysread(\*STDIN, $buf, 1000); + syswrite($fp, $buf, $nr) if ($nr > 0); + } + + # The timeout is based on time waiting for the fp data + if (vec($rout, fileno($fp), 1) != 1) { + last if (defined($time) && (time - $start_time > $time)); next; } @@ -1917,12 +1923,11 @@ sub wait_for_input last if ($ch eq "\n"); } - if (!length($line)) { - return undef; - } + last if (!length($line)); return $line; } + return undef; } sub reboot_to { diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile index f11315bedefc..6a9480c03cbd 100644 --- a/tools/testing/radix-tree/Makefile +++ b/tools/testing/radix-tree/Makefile @@ -1,6 +1,7 @@ CFLAGS += -I. -I../../include -g -O2 -Wall -D_LGPL_SOURCE -fsanitize=address -LDFLAGS += -lpthread -lurcu +LDFLAGS += -fsanitize=address +LDLIBS+= -lpthread -lurcu TARGETS = main idr-test multiorder CORE_OFILES := radix-tree.o idr.o linux.o test.o find_bit.o OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \ @@ -10,23 +11,25 @@ ifndef SHIFT SHIFT=3 endif +ifeq ($(BUILD), 32) + CFLAGS += -m32 + LDFLAGS += -m32 +endif + targets: mapshift $(TARGETS) main: $(OFILES) - $(CC) $(CFLAGS) $(LDFLAGS) $^ -o main idr-test: idr-test.o $(CORE_OFILES) - $(CC) $(CFLAGS) $(LDFLAGS) $^ -o idr-test multiorder: multiorder.o $(CORE_OFILES) - $(CC) $(CFLAGS) $(LDFLAGS) $^ -o multiorder clean: $(RM) $(TARGETS) *.o radix-tree.c idr.c generated/map-shift.h vpath %.c ../../lib -$(OFILES): *.h */*.h generated/map-shift.h \ +$(OFILES): Makefile *.h */*.h generated/map-shift.h \ ../../include/linux/*.h \ ../../include/asm/*.h \ ../../../include/linux/radix-tree.h \ @@ -41,7 +44,7 @@ idr.c: ../../../lib/idr.c .PHONY: mapshift mapshift: - @if ! grep -qw $(SHIFT) generated/map-shift.h; then \ + @if ! grep -qws $(SHIFT) generated/map-shift.h; then \ echo "#define RADIX_TREE_MAP_SHIFT $(SHIFT)" > \ generated/map-shift.h; \ fi diff --git a/tools/testing/radix-tree/benchmark.c b/tools/testing/radix-tree/benchmark.c index 9b09ddfe462f..99c40f3ed133 100644 --- a/tools/testing/radix-tree/benchmark.c +++ b/tools/testing/radix-tree/benchmark.c @@ -17,6 +17,9 @@ #include <time.h> #include "test.h" +#define for_each_index(i, base, order) \ + for (i = base; i < base + (1 << order); i++) + #define NSEC_PER_SEC 1000000000L static long long benchmark_iter(struct radix_tree_root *root, bool tagged) @@ -57,27 +60,176 @@ again: return nsec; } +static void benchmark_insert(struct radix_tree_root *root, + unsigned long size, unsigned long step, int order) +{ + struct timespec start, finish; + unsigned long index; + long long nsec; + + clock_gettime(CLOCK_MONOTONIC, &start); + + for (index = 0 ; index < size ; index += step) + item_insert_order(root, index, order); + + clock_gettime(CLOCK_MONOTONIC, &finish); + + nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC + + (finish.tv_nsec - start.tv_nsec); + + printv(2, "Size: %8ld, step: %8ld, order: %d, insertion: %15lld ns\n", + size, step, order, nsec); +} + +static void benchmark_tagging(struct radix_tree_root *root, + unsigned long size, unsigned long step, int order) +{ + struct timespec start, finish; + unsigned long index; + long long nsec; + + clock_gettime(CLOCK_MONOTONIC, &start); + + for (index = 0 ; index < size ; index += step) + radix_tree_tag_set(root, index, 0); + + clock_gettime(CLOCK_MONOTONIC, &finish); + + nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC + + (finish.tv_nsec - start.tv_nsec); + + printv(2, "Size: %8ld, step: %8ld, order: %d, tagging: %17lld ns\n", + size, step, order, nsec); +} + +static void benchmark_delete(struct radix_tree_root *root, + unsigned long size, unsigned long step, int order) +{ + struct timespec start, finish; + unsigned long index, i; + long long nsec; + + clock_gettime(CLOCK_MONOTONIC, &start); + + for (index = 0 ; index < size ; index += step) + for_each_index(i, index, order) + item_delete(root, i); + + clock_gettime(CLOCK_MONOTONIC, &finish); + + nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC + + (finish.tv_nsec - start.tv_nsec); + + printv(2, "Size: %8ld, step: %8ld, order: %d, deletion: %16lld ns\n", + size, step, order, nsec); +} + static void benchmark_size(unsigned long size, unsigned long step, int order) { RADIX_TREE(tree, GFP_KERNEL); long long normal, tagged; - unsigned long index; - for (index = 0 ; index < size ; index += step) { - item_insert_order(&tree, index, order); - radix_tree_tag_set(&tree, index, 0); - } + benchmark_insert(&tree, size, step, order); + benchmark_tagging(&tree, size, step, order); tagged = benchmark_iter(&tree, true); normal = benchmark_iter(&tree, false); - printv(2, "Size %ld, step %6ld, order %d tagged %10lld ns, normal %10lld ns\n", - size, step, order, tagged, normal); + printv(2, "Size: %8ld, step: %8ld, order: %d, tagged iteration: %8lld ns\n", + size, step, order, tagged); + printv(2, "Size: %8ld, step: %8ld, order: %d, normal iteration: %8lld ns\n", + size, step, order, normal); + + benchmark_delete(&tree, size, step, order); item_kill_tree(&tree); rcu_barrier(); } +static long long __benchmark_split(unsigned long index, + int old_order, int new_order) +{ + struct timespec start, finish; + long long nsec; + RADIX_TREE(tree, GFP_ATOMIC); + + item_insert_order(&tree, index, old_order); + + clock_gettime(CLOCK_MONOTONIC, &start); + radix_tree_split(&tree, index, new_order); + clock_gettime(CLOCK_MONOTONIC, &finish); + nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC + + (finish.tv_nsec - start.tv_nsec); + + item_kill_tree(&tree); + + return nsec; + +} + +static void benchmark_split(unsigned long size, unsigned long step) +{ + int i, j, idx; + long long nsec = 0; + + + for (idx = 0; idx < size; idx += step) { + for (i = 3; i < 11; i++) { + for (j = 0; j < i; j++) { + nsec += __benchmark_split(idx, i, j); + } + } + } + + printv(2, "Size %8ld, step %8ld, split time %10lld ns\n", + size, step, nsec); + +} + +static long long __benchmark_join(unsigned long index, + unsigned order1, unsigned order2) +{ + unsigned long loc; + struct timespec start, finish; + long long nsec; + void *item, *item2 = item_create(index + 1, order1); + RADIX_TREE(tree, GFP_KERNEL); + + item_insert_order(&tree, index, order2); + item = radix_tree_lookup(&tree, index); + + clock_gettime(CLOCK_MONOTONIC, &start); + radix_tree_join(&tree, index + 1, order1, item2); + clock_gettime(CLOCK_MONOTONIC, &finish); + nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC + + (finish.tv_nsec - start.tv_nsec); + + loc = find_item(&tree, item); + if (loc == -1) + free(item); + + item_kill_tree(&tree); + + return nsec; +} + +static void benchmark_join(unsigned long step) +{ + int i, j, idx; + long long nsec = 0; + + for (idx = 0; idx < 1 << 10; idx += step) { + for (i = 1; i < 15; i++) { + for (j = 0; j < i; j++) { + nsec += __benchmark_join(idx, i, j); + } + } + } + + printv(2, "Size %8d, step %8ld, join time %10lld ns\n", + 1 << 10, step, nsec); +} + void benchmark(void) { unsigned long size[] = {1 << 10, 1 << 20, 0}; @@ -95,4 +247,11 @@ void benchmark(void) for (c = 0; size[c]; c++) for (s = 0; step[s]; s++) benchmark_size(size[c], step[s] << 9, 9); + + for (c = 0; size[c]; c++) + for (s = 0; step[s]; s++) + benchmark_split(size[c], step[s]); + + for (s = 0; step[s]; s++) + benchmark_join(step[s]); } diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c index a26098c6123d..30cd0b296f1a 100644 --- a/tools/testing/radix-tree/idr-test.c +++ b/tools/testing/radix-tree/idr-test.c @@ -153,6 +153,30 @@ void idr_nowait_test(void) idr_destroy(&idr); } +void idr_get_next_test(void) +{ + unsigned long i; + int nextid; + DEFINE_IDR(idr); + + int indices[] = {4, 7, 9, 15, 65, 128, 1000, 99999, 0}; + + for(i = 0; indices[i]; i++) { + struct item *item = item_create(indices[i], 0); + assert(idr_alloc(&idr, item, indices[i], indices[i+1], + GFP_KERNEL) == indices[i]); + } + + for(i = 0, nextid = 0; indices[i]; i++) { + idr_get_next(&idr, &nextid); + assert(nextid == indices[i]); + nextid++; + } + + idr_for_each(&idr, item_idr_free, &idr); + idr_destroy(&idr); +} + void idr_checks(void) { unsigned long i; @@ -202,6 +226,7 @@ void idr_checks(void) idr_alloc_test(); idr_null_test(); idr_nowait_test(); + idr_get_next_test(); } /* @@ -338,7 +363,7 @@ void ida_check_random(void) { DEFINE_IDA(ida); DECLARE_BITMAP(bitmap, 2048); - int id; + int id, err; unsigned int i; time_t s = time(NULL); @@ -352,8 +377,11 @@ void ida_check_random(void) ida_remove(&ida, bit); } else { __set_bit(bit, bitmap); - ida_pre_get(&ida, GFP_KERNEL); - assert(!ida_get_new_above(&ida, bit, &id)); + do { + ida_pre_get(&ida, GFP_KERNEL); + err = ida_get_new_above(&ida, bit, &id); + } while (err == -ENOMEM); + assert(!err); assert(id == bit); } } @@ -362,6 +390,24 @@ void ida_check_random(void) goto repeat; } +void ida_simple_get_remove_test(void) +{ + DEFINE_IDA(ida); + unsigned long i; + + for (i = 0; i < 10000; i++) { + assert(ida_simple_get(&ida, 0, 20000, GFP_KERNEL) == i); + } + assert(ida_simple_get(&ida, 5, 30, GFP_KERNEL) < 0); + + for (i = 0; i < 10000; i++) { + ida_simple_remove(&ida, i); + } + assert(ida_is_empty(&ida)); + + ida_destroy(&ida); +} + void ida_checks(void) { DEFINE_IDA(ida); @@ -428,15 +474,41 @@ void ida_checks(void) ida_check_max(); ida_check_conv(); ida_check_random(); + ida_simple_get_remove_test(); radix_tree_cpu_dead(1); } +static void *ida_random_fn(void *arg) +{ + rcu_register_thread(); + ida_check_random(); + rcu_unregister_thread(); + return NULL; +} + +void ida_thread_tests(void) +{ + pthread_t threads[10]; + int i; + + for (i = 0; i < ARRAY_SIZE(threads); i++) + if (pthread_create(&threads[i], NULL, ida_random_fn, NULL)) { + perror("creating ida thread"); + exit(1); + } + + while (i--) + pthread_join(threads[i], NULL); +} + int __weak main(void) { radix_tree_init(); idr_checks(); ida_checks(); + ida_thread_tests(); + radix_tree_cpu_dead(1); rcu_barrier(); if (nr_allocated) printf("nr_allocated = %d\n", nr_allocated); diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c index b829127d5670..bc9a78449572 100644 --- a/tools/testing/radix-tree/main.c +++ b/tools/testing/radix-tree/main.c @@ -368,6 +368,7 @@ int main(int argc, char **argv) iteration_test(0, 10 + 90 * long_run); iteration_test(7, 10 + 90 * long_run); single_thread_tests(long_run); + ida_thread_tests(); /* Free any remaining preallocated nodes */ radix_tree_cpu_dead(0); diff --git a/tools/testing/radix-tree/tag_check.c b/tools/testing/radix-tree/tag_check.c index d4ff00989245..36dcf7d6945d 100644 --- a/tools/testing/radix-tree/tag_check.c +++ b/tools/testing/radix-tree/tag_check.c @@ -330,6 +330,34 @@ static void single_check(void) item_kill_tree(&tree); } +void radix_tree_clear_tags_test(void) +{ + unsigned long index; + struct radix_tree_node *node; + struct radix_tree_iter iter; + void **slot; + + RADIX_TREE(tree, GFP_KERNEL); + + item_insert(&tree, 0); + item_tag_set(&tree, 0, 0); + __radix_tree_lookup(&tree, 0, &node, &slot); + radix_tree_clear_tags(&tree, node, slot); + assert(item_tag_get(&tree, 0, 0) == 0); + + for (index = 0; index < 1000; index++) { + item_insert(&tree, index); + item_tag_set(&tree, index, 0); + } + + radix_tree_for_each_slot(slot, &tree, &iter, 0) { + radix_tree_clear_tags(&tree, iter.node, slot); + assert(item_tag_get(&tree, iter.index, 0) == 0); + } + + item_kill_tree(&tree); +} + void tag_check(void) { single_check(); @@ -347,4 +375,5 @@ void tag_check(void) thrash_tags(); rcu_barrier(); printv(2, "after thrash_tags: %d allocated\n", nr_allocated); + radix_tree_clear_tags_test(); } diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h index b30e11d9d271..0f8220cc6166 100644 --- a/tools/testing/radix-tree/test.h +++ b/tools/testing/radix-tree/test.h @@ -36,6 +36,7 @@ void iteration_test(unsigned order, unsigned duration); void benchmark(void); void idr_checks(void); void ida_checks(void); +void ida_thread_tests(void); struct item * item_tag_set(struct radix_tree_root *root, unsigned long index, int tag); diff --git a/tools/testing/selftests/powerpc/harness.c b/tools/testing/selftests/powerpc/harness.c index 248a820048df..66d31de60b9a 100644 --- a/tools/testing/selftests/powerpc/harness.c +++ b/tools/testing/selftests/powerpc/harness.c @@ -114,9 +114,11 @@ int test_harness(int (test_function)(void), char *name) rc = run_test(test_function, name); - if (rc == MAGIC_SKIP_RETURN_VALUE) + if (rc == MAGIC_SKIP_RETURN_VALUE) { test_skip(name); - else + /* so that skipped test is not marked as failed */ + rc = 0; + } else test_finish(name, rc); return rc; diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 4cff7e7ddcc4..41642ba5e318 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -1,5 +1,9 @@ # Makefile for vm selftests +ifndef OUTPUT + OUTPUT := $(shell pwd) +endif + CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS) LDLIBS = -lrt TEST_GEN_FILES = compaction_test diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c index 5b2b4b3c634c..b4967d875236 100644 --- a/tools/testing/selftests/x86/fsgsbase.c +++ b/tools/testing/selftests/x86/fsgsbase.c @@ -245,7 +245,7 @@ void do_unexpected_base(void) long ret; asm volatile ("int $0x80" : "=a" (ret) : "a" (243), "b" (low_desc) - : "flags"); + : "r8", "r9", "r10", "r11"); memcpy(&desc, low_desc, sizeof(desc)); munmap(low_desc, sizeof(desc)); diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c index 4af47079cf04..f6121612e769 100644 --- a/tools/testing/selftests/x86/ldt_gdt.c +++ b/tools/testing/selftests/x86/ldt_gdt.c @@ -45,6 +45,12 @@ #define AR_DB (1 << 22) #define AR_G (1 << 23) +#ifdef __x86_64__ +# define INT80_CLOBBERS "r8", "r9", "r10", "r11" +#else +# define INT80_CLOBBERS +#endif + static int nerrs; /* Points to an array of 1024 ints, each holding its own index. */ @@ -588,7 +594,7 @@ static int invoke_set_thread_area(void) asm volatile ("int $0x80" : "=a" (ret), "+m" (low_user_desc) : "a" (243), "b" (low_user_desc) - : "flags"); + : INT80_CLOBBERS); return ret; } @@ -657,7 +663,7 @@ static void test_gdt_invalidation(void) "+a" (eax) : "m" (low_user_desc_clear), [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear) - : "flags"); + : INT80_CLOBBERS); if (sel != 0) { result = "FAIL"; @@ -688,7 +694,7 @@ static void test_gdt_invalidation(void) "+a" (eax) : "m" (low_user_desc_clear), [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear) - : "flags"); + : INT80_CLOBBERS); if (sel != 0) { result = "FAIL"; @@ -721,7 +727,7 @@ static void test_gdt_invalidation(void) "+a" (eax) : "m" (low_user_desc_clear), [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear) - : "flags"); + : INT80_CLOBBERS); #ifdef __x86_64__ syscall(SYS_arch_prctl, ARCH_GET_FS, &new_base); @@ -774,7 +780,7 @@ static void test_gdt_invalidation(void) "+a" (eax) : "m" (low_user_desc_clear), [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear) - : "flags"); + : INT80_CLOBBERS); #ifdef __x86_64__ syscall(SYS_arch_prctl, ARCH_GET_GS, &new_base); diff --git a/tools/testing/selftests/x86/ptrace_syscall.c b/tools/testing/selftests/x86/ptrace_syscall.c index b037ce9cf116..eaea92439708 100644 --- a/tools/testing/selftests/x86/ptrace_syscall.c +++ b/tools/testing/selftests/x86/ptrace_syscall.c @@ -58,7 +58,8 @@ static void do_full_int80(struct syscall_args32 *args) asm volatile ("int $0x80" : "+a" (args->nr), "+b" (args->arg0), "+c" (args->arg1), "+d" (args->arg2), - "+S" (args->arg3), "+D" (args->arg4), "+r" (bp)); + "+S" (args->arg3), "+D" (args->arg4), "+r" (bp) + : : "r8", "r9", "r10", "r11"); args->arg5 = bp; #else sys32_helper(args, int80_and_ret); diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c index 50c26358e8b7..a48da95c18fd 100644 --- a/tools/testing/selftests/x86/single_step_syscall.c +++ b/tools/testing/selftests/x86/single_step_syscall.c @@ -56,9 +56,11 @@ static volatile sig_atomic_t sig_traps; #ifdef __x86_64__ # define REG_IP REG_RIP # define WIDTH "q" +# define INT80_CLOBBERS "r8", "r9", "r10", "r11" #else # define REG_IP REG_EIP # define WIDTH "l" +# define INT80_CLOBBERS #endif static unsigned long get_eflags(void) @@ -140,7 +142,8 @@ int main() printf("[RUN]\tSet TF and check int80\n"); set_eflags(get_eflags() | X86_EFLAGS_TF); - asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid)); + asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid) + : INT80_CLOBBERS); check_result(); /* |