diff options
Diffstat (limited to 'arch')
264 files changed, 6085 insertions, 2614 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 129df498a8e1..98db63496bab 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1282,6 +1282,9 @@ config ARCH_SPLIT_ARG64 config ARCH_HAS_ELFCORE_COMPAT bool +config ARCH_HAS_PARANOID_L1D_FLUSH + bool + source "kernel/gcov/Kconfig" source "scripts/gcc-plugins/Kconfig" diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index d8f51eb8963b..b5bf68e74732 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -409,7 +409,7 @@ choice help Depending on the configuration, CPU can contain DSP registers (ACC0_GLO, ACC0_GHI, DSP_BFLY0, DSP_CTRL, DSP_FFT_CTRL). - Bellow is options describing how to handle these registers in + Below are options describing how to handle these registers in interrupt entry / exit and in context switch. config ARC_DSP_NONE diff --git a/arch/arc/include/asm/checksum.h b/arch/arc/include/asm/checksum.h index 69debd77cd04..0b485800a392 100644 --- a/arch/arc/include/asm/checksum.h +++ b/arch/arc/include/asm/checksum.h @@ -24,7 +24,7 @@ */ static inline __sum16 csum_fold(__wsum s) { - unsigned r = s << 16 | s >> 16; /* ror */ + unsigned int r = s << 16 | s >> 16; /* ror */ s = ~s; s -= r; return s >> 16; diff --git a/arch/arc/include/asm/perf_event.h b/arch/arc/include/asm/perf_event.h index 30b9ae511ea9..e1971d34ef30 100644 --- a/arch/arc/include/asm/perf_event.h +++ b/arch/arc/include/asm/perf_event.h @@ -123,7 +123,7 @@ static const char * const arc_pmu_ev_hw_map[] = { #define C(_x) PERF_COUNT_HW_CACHE_##_x #define CACHE_OP_UNSUPPORTED 0xffff -static const unsigned arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +static const unsigned int arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(L1D)] = { [C(OP_READ)] = { [C(RESULT_ACCESS)] = PERF_COUNT_ARC_LDC, diff --git a/arch/arc/kernel/fpu.c b/arch/arc/kernel/fpu.c index c67c0f0f5f77..ec640219d989 100644 --- a/arch/arc/kernel/fpu.c +++ b/arch/arc/kernel/fpu.c @@ -57,23 +57,26 @@ void fpu_save_restore(struct task_struct *prev, struct task_struct *next) void fpu_init_task(struct pt_regs *regs) { + const unsigned int fwe = 0x80000000; + /* default rounding mode */ write_aux_reg(ARC_REG_FPU_CTRL, 0x100); - /* set "Write enable" to allow explicit write to exception flags */ - write_aux_reg(ARC_REG_FPU_STATUS, 0x80000000); + /* Initialize to zero: setting requires FWE be set */ + write_aux_reg(ARC_REG_FPU_STATUS, fwe); } void fpu_save_restore(struct task_struct *prev, struct task_struct *next) { struct arc_fpu *save = &prev->thread.fpu; struct arc_fpu *restore = &next->thread.fpu; + const unsigned int fwe = 0x80000000; save->ctrl = read_aux_reg(ARC_REG_FPU_CTRL); save->status = read_aux_reg(ARC_REG_FPU_STATUS); write_aux_reg(ARC_REG_FPU_CTRL, restore->ctrl); - write_aux_reg(ARC_REG_FPU_STATUS, restore->status); + write_aux_reg(ARC_REG_FPU_STATUS, (fwe | restore->status)); } #endif diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c index abf9398cc333..f9fdb557c263 100644 --- a/arch/arc/kernel/mcip.c +++ b/arch/arc/kernel/mcip.c @@ -352,7 +352,7 @@ static void idu_cascade_isr(struct irq_desc *desc) irq_hw_number_t idu_hwirq = core_hwirq - FIRST_EXT_IRQ; chained_irq_enter(core_chip, desc); - generic_handle_irq(irq_find_mapping(idu_domain, idu_hwirq)); + generic_handle_domain_irq(idu_domain, idu_hwirq); chained_irq_exit(core_chip, desc); } diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c index 47bab67f8649..9e28058cdba8 100644 --- a/arch/arc/kernel/unwind.c +++ b/arch/arc/kernel/unwind.c @@ -260,7 +260,7 @@ static void init_unwind_hdr(struct unwind_table *table, { const u8 *ptr; unsigned long tableSize = table->size, hdrSize; - unsigned n; + unsigned int n; const u32 *fde; struct { u8 version; @@ -462,7 +462,7 @@ static uleb128_t get_uleb128(const u8 **pcur, const u8 *end) { const u8 *cur = *pcur; uleb128_t value; - unsigned shift; + unsigned int shift; for (shift = 0, value = 0; cur < end; shift += 7) { if (shift + 7 > 8 * sizeof(value) @@ -483,7 +483,7 @@ static sleb128_t get_sleb128(const u8 **pcur, const u8 *end) { const u8 *cur = *pcur; sleb128_t value; - unsigned shift; + unsigned int shift; for (shift = 0, value = 0; cur < end; shift += 7) { if (shift + 7 > 8 * sizeof(value) @@ -609,7 +609,7 @@ static unsigned long read_pointer(const u8 **pLoc, const void *end, static signed fde_pointer_type(const u32 *cie) { const u8 *ptr = (const u8 *)(cie + 2); - unsigned version = *ptr; + unsigned int version = *ptr; if (*++ptr) { const char *aug; @@ -904,7 +904,7 @@ int arc_unwind(struct unwind_frame_info *frame) const u8 *ptr = NULL, *end = NULL; unsigned long pc = UNW_PC(frame) - frame->call_frame; unsigned long startLoc = 0, endLoc = 0, cfa; - unsigned i; + unsigned int i; signed ptrType = -1; uleb128_t retAddrReg = 0; const struct unwind_table *table; diff --git a/arch/arc/kernel/vmlinux.lds.S b/arch/arc/kernel/vmlinux.lds.S index e2146a8da195..529ae50f9fe2 100644 --- a/arch/arc/kernel/vmlinux.lds.S +++ b/arch/arc/kernel/vmlinux.lds.S @@ -88,6 +88,8 @@ SECTIONS CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT + IRQENTRY_TEXT + SOFTIRQENTRY_TEXT *(.fixup) *(.gnu.warning) } diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c index ff5e0d04cb89..d17083c3fe2d 100644 --- a/arch/arm/common/sa1111.c +++ b/arch/arm/common/sa1111.c @@ -196,14 +196,6 @@ static int sa1111_map_irq(struct sa1111 *sachip, irq_hw_number_t hwirq) return irq_create_mapping(sachip->irqdomain, hwirq); } -static void sa1111_handle_irqdomain(struct irq_domain *irqdomain, int irq) -{ - struct irq_desc *d = irq_to_desc(irq_linear_revmap(irqdomain, irq)); - - if (d) - generic_handle_irq_desc(d); -} - /* * SA1111 interrupt support. Since clearing an IRQ while there are * active IRQs causes the interrupt output to pulse, the upper levels @@ -234,11 +226,11 @@ static void sa1111_irq_handler(struct irq_desc *desc) for (i = 0; stat0; i++, stat0 >>= 1) if (stat0 & 1) - sa1111_handle_irqdomain(irqdomain, i); + generic_handle_domain_irq(irqdomain, i); for (i = 32; stat1; i++, stat1 >>= 1) if (stat1 & 1) - sa1111_handle_irqdomain(irqdomain, i); + generic_handle_domain_irq(irqdomain, i); /* For level-based interrupts */ desc->irq_data.chip->irq_unmask(&desc->irq_data); diff --git a/arch/arm/configs/nhk8815_defconfig b/arch/arm/configs/nhk8815_defconfig index 3f35761dc9ff..23595fc5a29a 100644 --- a/arch/arm/configs/nhk8815_defconfig +++ b/arch/arm/configs/nhk8815_defconfig @@ -15,8 +15,6 @@ CONFIG_SLAB=y CONFIG_ARCH_NOMADIK=y CONFIG_MACH_NOMADIK_8815NHK=y CONFIG_AEABI=y -CONFIG_ZBOOT_ROM_TEXT=0x0 -CONFIG_ZBOOT_ROM_BSS=0x0 CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set @@ -52,9 +50,9 @@ CONFIG_MTD_BLOCK=y CONFIG_MTD_ONENAND=y CONFIG_MTD_ONENAND_VERIFY_WRITE=y CONFIG_MTD_ONENAND_GENERIC=y -CONFIG_MTD_NAND_ECC_SW_HAMMING_SMC=y CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_FSMC=y +CONFIG_MTD_NAND_ECC_SW_HAMMING_SMC=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_CRYPTOLOOP=y CONFIG_BLK_DEV_RAM=y @@ -97,6 +95,7 @@ CONFIG_REGULATOR=y CONFIG_DRM=y CONFIG_DRM_PANEL_TPO_TPG110=y CONFIG_DRM_PL111=y +CONFIG_FB=y CONFIG_BACKLIGHT_CLASS_DEVICE=y CONFIG_BACKLIGHT_PWM=y CONFIG_FRAMEBUFFER_CONSOLE=y @@ -136,9 +135,8 @@ CONFIG_NLS_ISO8859_15=y CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_SHA1=y CONFIG_CRYPTO_DES=y +# CONFIG_DEBUG_BUGVERBOSE is not set CONFIG_DEBUG_INFO=y -# CONFIG_ENABLE_MUST_CHECK is not set CONFIG_DEBUG_FS=y # CONFIG_SCHED_DEBUG is not set # CONFIG_DEBUG_PREEMPT is not set -# CONFIG_DEBUG_BUGVERBOSE is not set diff --git a/arch/arm/crypto/curve25519-glue.c b/arch/arm/crypto/curve25519-glue.c index 31eb75b6002f..9bdafd57888c 100644 --- a/arch/arm/crypto/curve25519-glue.c +++ b/arch/arm/crypto/curve25519-glue.c @@ -112,7 +112,7 @@ static struct kpp_alg curve25519_alg = { .max_size = curve25519_max_size, }; -static int __init mod_init(void) +static int __init arm_curve25519_init(void) { if (elf_hwcap & HWCAP_NEON) { static_branch_enable(&have_neon); @@ -122,14 +122,14 @@ static int __init mod_init(void) return 0; } -static void __exit mod_exit(void) +static void __exit arm_curve25519_exit(void) { if (IS_REACHABLE(CONFIG_CRYPTO_KPP) && elf_hwcap & HWCAP_NEON) crypto_unregister_kpp(&curve25519_alg); } -module_init(mod_init); -module_exit(mod_exit); +module_init(arm_curve25519_init); +module_exit(arm_curve25519_exit); MODULE_ALIAS_CRYPTO("curve25519"); MODULE_ALIAS_CRYPTO("curve25519-neon"); diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index cfc9dfd70aad..f673e13e0f94 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -160,10 +160,11 @@ extern unsigned long vectors_base; /* * Physical start and end address of the kernel sections. These addresses are - * 2MB-aligned to match the section mappings placed over the kernel. + * 2MB-aligned to match the section mappings placed over the kernel. We use + * u64 so that LPAE mappings beyond the 32bit limit will work out as well. */ -extern u32 kernel_sec_start; -extern u32 kernel_sec_end; +extern u64 kernel_sec_start; +extern u64 kernel_sec_end; /* * Physical vs virtual RAM address space conversion. These are diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 9eb0b4dbcc12..29070eb8df7d 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -49,7 +49,8 @@ /* * This needs to be assigned at runtime when the linker symbols are - * resolved. + * resolved. These are unsigned 64bit really, but in this assembly code + * We store them as 32bit. */ .pushsection .data .align 2 @@ -57,8 +58,10 @@ .globl kernel_sec_end kernel_sec_start: .long 0 + .long 0 kernel_sec_end: .long 0 + .long 0 .popsection .macro pgtbl, rd, phys @@ -250,7 +253,11 @@ __create_page_tables: add r0, r4, #KERNEL_OFFSET >> (SECTION_SHIFT - PMD_ORDER) ldr r6, =(_end - 1) adr_l r5, kernel_sec_start @ _pa(kernel_sec_start) - str r8, [r5] @ Save physical start of kernel +#ifdef CONFIG_CPU_ENDIAN_BE8 + str r8, [r5, #4] @ Save physical start of kernel (BE) +#else + str r8, [r5] @ Save physical start of kernel (LE) +#endif orr r3, r8, r7 @ Add the MMU flags add r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ORDER) 1: str r3, [r0], #1 << PMD_ORDER @@ -259,7 +266,11 @@ __create_page_tables: bls 1b eor r3, r3, r7 @ Remove the MMU flags adr_l r5, kernel_sec_end @ _pa(kernel_sec_end) - str r3, [r5] @ Save physical end of kernel +#ifdef CONFIG_CPU_ENDIAN_BE8 + str r3, [r5, #4] @ Save physical end of kernel (BE) +#else + str r3, [r5] @ Save physical end of kernel (LE) +#endif #ifdef CONFIG_XIP_KERNEL /* diff --git a/arch/arm/mach-ixp4xx/include/mach/ixp4xx-regs.h b/arch/arm/mach-ixp4xx/include/mach/ixp4xx-regs.h index abb07f105515..74e63d4531aa 100644 --- a/arch/arm/mach-ixp4xx/include/mach/ixp4xx-regs.h +++ b/arch/arm/mach-ixp4xx/include/mach/ixp4xx-regs.h @@ -218,30 +218,30 @@ /* * PCI Control/Status Registers */ -#define IXP4XX_PCI_CSR(x) ((volatile u32 *)(IXP4XX_PCI_CFG_BASE_VIRT+(x))) - -#define PCI_NP_AD IXP4XX_PCI_CSR(PCI_NP_AD_OFFSET) -#define PCI_NP_CBE IXP4XX_PCI_CSR(PCI_NP_CBE_OFFSET) -#define PCI_NP_WDATA IXP4XX_PCI_CSR(PCI_NP_WDATA_OFFSET) -#define PCI_NP_RDATA IXP4XX_PCI_CSR(PCI_NP_RDATA_OFFSET) -#define PCI_CRP_AD_CBE IXP4XX_PCI_CSR(PCI_CRP_AD_CBE_OFFSET) -#define PCI_CRP_WDATA IXP4XX_PCI_CSR(PCI_CRP_WDATA_OFFSET) -#define PCI_CRP_RDATA IXP4XX_PCI_CSR(PCI_CRP_RDATA_OFFSET) -#define PCI_CSR IXP4XX_PCI_CSR(PCI_CSR_OFFSET) -#define PCI_ISR IXP4XX_PCI_CSR(PCI_ISR_OFFSET) -#define PCI_INTEN IXP4XX_PCI_CSR(PCI_INTEN_OFFSET) -#define PCI_DMACTRL IXP4XX_PCI_CSR(PCI_DMACTRL_OFFSET) -#define PCI_AHBMEMBASE IXP4XX_PCI_CSR(PCI_AHBMEMBASE_OFFSET) -#define PCI_AHBIOBASE IXP4XX_PCI_CSR(PCI_AHBIOBASE_OFFSET) -#define PCI_PCIMEMBASE IXP4XX_PCI_CSR(PCI_PCIMEMBASE_OFFSET) -#define PCI_AHBDOORBELL IXP4XX_PCI_CSR(PCI_AHBDOORBELL_OFFSET) -#define PCI_PCIDOORBELL IXP4XX_PCI_CSR(PCI_PCIDOORBELL_OFFSET) -#define PCI_ATPDMA0_AHBADDR IXP4XX_PCI_CSR(PCI_ATPDMA0_AHBADDR_OFFSET) -#define PCI_ATPDMA0_PCIADDR IXP4XX_PCI_CSR(PCI_ATPDMA0_PCIADDR_OFFSET) -#define PCI_ATPDMA0_LENADDR IXP4XX_PCI_CSR(PCI_ATPDMA0_LENADDR_OFFSET) -#define PCI_ATPDMA1_AHBADDR IXP4XX_PCI_CSR(PCI_ATPDMA1_AHBADDR_OFFSET) -#define PCI_ATPDMA1_PCIADDR IXP4XX_PCI_CSR(PCI_ATPDMA1_PCIADDR_OFFSET) -#define PCI_ATPDMA1_LENADDR IXP4XX_PCI_CSR(PCI_ATPDMA1_LENADDR_OFFSET) +#define _IXP4XX_PCI_CSR(x) ((volatile u32 *)(IXP4XX_PCI_CFG_BASE_VIRT+(x))) + +#define PCI_NP_AD _IXP4XX_PCI_CSR(PCI_NP_AD_OFFSET) +#define PCI_NP_CBE _IXP4XX_PCI_CSR(PCI_NP_CBE_OFFSET) +#define PCI_NP_WDATA _IXP4XX_PCI_CSR(PCI_NP_WDATA_OFFSET) +#define PCI_NP_RDATA _IXP4XX_PCI_CSR(PCI_NP_RDATA_OFFSET) +#define PCI_CRP_AD_CBE _IXP4XX_PCI_CSR(PCI_CRP_AD_CBE_OFFSET) +#define PCI_CRP_WDATA _IXP4XX_PCI_CSR(PCI_CRP_WDATA_OFFSET) +#define PCI_CRP_RDATA _IXP4XX_PCI_CSR(PCI_CRP_RDATA_OFFSET) +#define PCI_CSR _IXP4XX_PCI_CSR(PCI_CSR_OFFSET) +#define PCI_ISR _IXP4XX_PCI_CSR(PCI_ISR_OFFSET) +#define PCI_INTEN _IXP4XX_PCI_CSR(PCI_INTEN_OFFSET) +#define PCI_DMACTRL _IXP4XX_PCI_CSR(PCI_DMACTRL_OFFSET) +#define PCI_AHBMEMBASE _IXP4XX_PCI_CSR(PCI_AHBMEMBASE_OFFSET) +#define PCI_AHBIOBASE _IXP4XX_PCI_CSR(PCI_AHBIOBASE_OFFSET) +#define PCI_PCIMEMBASE _IXP4XX_PCI_CSR(PCI_PCIMEMBASE_OFFSET) +#define PCI_AHBDOORBELL _IXP4XX_PCI_CSR(PCI_AHBDOORBELL_OFFSET) +#define PCI_PCIDOORBELL _IXP4XX_PCI_CSR(PCI_PCIDOORBELL_OFFSET) +#define PCI_ATPDMA0_AHBADDR _IXP4XX_PCI_CSR(PCI_ATPDMA0_AHBADDR_OFFSET) +#define PCI_ATPDMA0_PCIADDR _IXP4XX_PCI_CSR(PCI_ATPDMA0_PCIADDR_OFFSET) +#define PCI_ATPDMA0_LENADDR _IXP4XX_PCI_CSR(PCI_ATPDMA0_LENADDR_OFFSET) +#define PCI_ATPDMA1_AHBADDR _IXP4XX_PCI_CSR(PCI_ATPDMA1_AHBADDR_OFFSET) +#define PCI_ATPDMA1_PCIADDR _IXP4XX_PCI_CSR(PCI_ATPDMA1_PCIADDR_OFFSET) +#define PCI_ATPDMA1_LENADDR _IXP4XX_PCI_CSR(PCI_ATPDMA1_LENADDR_OFFSET) /* * PCI register values and bit definitions diff --git a/arch/arm/mach-pxa/pxa_cplds_irqs.c b/arch/arm/mach-pxa/pxa_cplds_irqs.c index bddfc7cd5d40..eda5a47d7fbb 100644 --- a/arch/arm/mach-pxa/pxa_cplds_irqs.c +++ b/arch/arm/mach-pxa/pxa_cplds_irqs.c @@ -39,10 +39,8 @@ static irqreturn_t cplds_irq_handler(int in_irq, void *d) do { pending = readl(fpga->base + FPGA_IRQ_SET_CLR) & fpga->irq_mask; - for_each_set_bit(bit, &pending, CPLDS_NB_IRQ) { - generic_handle_irq(irq_find_mapping(fpga->irqdomain, - bit)); - } + for_each_set_bit(bit, &pending, CPLDS_NB_IRQ) + generic_handle_domain_irq(fpga->irqdomain, bit); } while (pending); return IRQ_HANDLED; diff --git a/arch/arm/mach-s3c/irq-s3c24xx.c b/arch/arm/mach-s3c/irq-s3c24xx.c index 0c631c14a817..3edc5f614eef 100644 --- a/arch/arm/mach-s3c/irq-s3c24xx.c +++ b/arch/arm/mach-s3c/irq-s3c24xx.c @@ -298,7 +298,7 @@ static void s3c_irq_demux(struct irq_desc *desc) struct s3c_irq_data *irq_data = irq_desc_get_chip_data(desc); struct s3c_irq_intc *intc = irq_data->intc; struct s3c_irq_intc *sub_intc = irq_data->sub_intc; - unsigned int n, offset, irq; + unsigned int n, offset; unsigned long src, msk; /* we're using individual domains for the non-dt case @@ -318,8 +318,7 @@ static void s3c_irq_demux(struct irq_desc *desc) while (src) { n = __ffs(src); src &= ~(1 << n); - irq = irq_find_mapping(sub_intc->domain, offset + n); - generic_handle_irq(irq); + generic_handle_domain_irq(sub_intc->domain, offset + n); } chained_irq_exit(chip, desc); diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 7583bda5ea7d..a4e006005107 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1609,6 +1609,13 @@ static void __init early_paging_init(const struct machine_desc *mdesc) return; /* + * Offset the kernel section physical offsets so that the kernel + * mapping will work out later on. + */ + kernel_sec_start += offset; + kernel_sec_end += offset; + + /* * Get the address of the remap function in the 1:1 identity * mapping setup by the early page table assembly code. We * must get this prior to the pv update. The following barrier @@ -1716,7 +1723,7 @@ void __init paging_init(const struct machine_desc *mdesc) { void *zero_page; - pr_debug("physical kernel sections: 0x%08x-0x%08x\n", + pr_debug("physical kernel sections: 0x%08llx-0x%08llx\n", kernel_sec_start, kernel_sec_end); prepare_page_table(); diff --git a/arch/arm/mm/pv-fixup-asm.S b/arch/arm/mm/pv-fixup-asm.S index 5c5e1952000a..f8e11f7c7880 100644 --- a/arch/arm/mm/pv-fixup-asm.S +++ b/arch/arm/mm/pv-fixup-asm.S @@ -29,7 +29,7 @@ ENTRY(lpae_pgtables_remap_asm) ldr r6, =(_end - 1) add r7, r2, #0x1000 add r6, r7, r6, lsr #SECTION_SHIFT - L2_ORDER - add r7, r7, #PAGE_OFFSET >> (SECTION_SHIFT - L2_ORDER) + add r7, r7, #KERNEL_OFFSET >> (SECTION_SHIFT - L2_ORDER) 1: ldrd r4, r5, [r7] adds r4, r4, r0 adc r5, r5, r1 diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index fdcd54d39c1e..62c3c1d2190f 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -156,6 +156,7 @@ config ARM64 select HAVE_ARCH_KGDB select HAVE_ARCH_MMAP_RND_BITS select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT + select HAVE_ARCH_PFN_VALID select HAVE_ARCH_PREL32_RELOCATIONS select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET select HAVE_ARCH_SECCOMP_FILTER diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 7b668db43261..1110d386f3b4 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -183,6 +183,8 @@ endif # We use MRPROPER_FILES and CLEAN_FILES now archclean: $(Q)$(MAKE) $(clean)=$(boot) + $(Q)$(MAKE) $(clean)=arch/arm64/kernel/vdso + $(Q)$(MAKE) $(clean)=arch/arm64/kernel/vdso32 ifeq ($(KBUILD_EXTMOD),) # We need to generate vdso-offsets.h before compiling certain files in kernel/. diff --git a/arch/arm64/boot/dts/qcom/msm8992-bullhead-rev-101.dts b/arch/arm64/boot/dts/qcom/msm8992-bullhead-rev-101.dts index 23cdcc9f7c72..1ccca83292ac 100644 --- a/arch/arm64/boot/dts/qcom/msm8992-bullhead-rev-101.dts +++ b/arch/arm64/boot/dts/qcom/msm8992-bullhead-rev-101.dts @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2015, LGE Inc. All rights reserved. * Copyright (c) 2016, The Linux Foundation. All rights reserved. + * Copyright (c) 2021, Petr Vorel <petr.vorel@gmail.com> */ /dts-v1/; @@ -9,6 +10,9 @@ #include "pm8994.dtsi" #include "pmi8994.dtsi" +/* cont_splash_mem has different memory mapping */ +/delete-node/ &cont_splash_mem; + / { model = "LG Nexus 5X"; compatible = "lg,bullhead", "qcom,msm8992"; @@ -17,6 +21,9 @@ qcom,board-id = <0xb64 0>; qcom,pmic-id = <0x10009 0x1000A 0x0 0x0>; + /* Bullhead firmware doesn't support PSCI */ + /delete-node/ psci; + aliases { serial0 = &blsp1_uart2; }; @@ -38,6 +45,11 @@ ftrace-size = <0x10000>; pmsg-size = <0x20000>; }; + + cont_splash_mem: memory@3400000 { + reg = <0 0x03400000 0 0x1200000>; + no-map; + }; }; }; diff --git a/arch/arm64/boot/dts/qcom/msm8994-angler-rev-101.dts b/arch/arm64/boot/dts/qcom/msm8994-angler-rev-101.dts index ffe1a9bd8f70..c096b7758aa0 100644 --- a/arch/arm64/boot/dts/qcom/msm8994-angler-rev-101.dts +++ b/arch/arm64/boot/dts/qcom/msm8994-angler-rev-101.dts @@ -1,12 +1,16 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2015, Huawei Inc. All rights reserved. * Copyright (c) 2016, The Linux Foundation. All rights reserved. + * Copyright (c) 2021, Petr Vorel <petr.vorel@gmail.com> */ /dts-v1/; #include "msm8994.dtsi" +/* Angler's firmware does not report where the memory is allocated */ +/delete-node/ &cont_splash_mem; + / { model = "Huawei Nexus 6P"; compatible = "huawei,angler", "qcom,msm8994"; diff --git a/arch/arm64/boot/dts/qcom/sc7280.dtsi b/arch/arm64/boot/dts/qcom/sc7280.dtsi index a8c274ad74c4..188c5768a55a 100644 --- a/arch/arm64/boot/dts/qcom/sc7280.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7280.dtsi @@ -200,7 +200,7 @@ &BIG_CPU_SLEEP_1 &CLUSTER_SLEEP_0>; next-level-cache = <&L2_700>; - qcom,freq-domain = <&cpufreq_hw 1>; + qcom,freq-domain = <&cpufreq_hw 2>; #cooling-cells = <2>; L2_700: l2-cache { compatible = "cache"; diff --git a/arch/arm64/boot/dts/qcom/sdm845-oneplus-common.dtsi b/arch/arm64/boot/dts/qcom/sdm845-oneplus-common.dtsi index 4d052e39b348..eb6b1d15293d 100644 --- a/arch/arm64/boot/dts/qcom/sdm845-oneplus-common.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845-oneplus-common.dtsi @@ -69,7 +69,7 @@ }; rmtfs_upper_guard: memory@f5d01000 { no-map; - reg = <0 0xf5d01000 0 0x2000>; + reg = <0 0xf5d01000 0 0x1000>; }; /* @@ -78,7 +78,7 @@ */ removed_region: memory@88f00000 { no-map; - reg = <0 0x88f00000 0 0x200000>; + reg = <0 0x88f00000 0 0x1c00000>; }; ramoops: ramoops@ac300000 { diff --git a/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts b/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts index c2a709a384e9..d7591a4621a2 100644 --- a/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts +++ b/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts @@ -700,7 +700,7 @@ left_spkr: wsa8810-left{ compatible = "sdw10217211000"; reg = <0 3>; - powerdown-gpios = <&wcdgpio 2 GPIO_ACTIVE_HIGH>; + powerdown-gpios = <&wcdgpio 1 GPIO_ACTIVE_HIGH>; #thermal-sensor-cells = <0>; sound-name-prefix = "SpkrLeft"; #sound-dai-cells = <0>; @@ -708,7 +708,7 @@ right_spkr: wsa8810-right{ compatible = "sdw10217211000"; - powerdown-gpios = <&wcdgpio 3 GPIO_ACTIVE_HIGH>; + powerdown-gpios = <&wcdgpio 2 GPIO_ACTIVE_HIGH>; reg = <0 4>; #thermal-sensor-cells = <0>; sound-name-prefix = "SpkrRight"; diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index b8eb0453123d..55f19450091b 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -51,7 +51,7 @@ config CRYPTO_SM4_ARM64_CE tristate "SM4 symmetric cipher (ARMv8.2 Crypto Extensions)" depends on KERNEL_MODE_NEON select CRYPTO_ALGAPI - select CRYPTO_SM4 + select CRYPTO_LIB_SM4 config CRYPTO_GHASH_ARM64_CE tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions" diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-glue.c index 2754c875d39c..9c93cfc4841b 100644 --- a/arch/arm64/crypto/sm4-ce-glue.c +++ b/arch/arm64/crypto/sm4-ce-glue.c @@ -17,12 +17,20 @@ MODULE_LICENSE("GPL v2"); asmlinkage void sm4_ce_do_crypt(const u32 *rk, void *out, const void *in); +static int sm4_ce_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int key_len) +{ + struct sm4_ctx *ctx = crypto_tfm_ctx(tfm); + + return sm4_expandkey(ctx, key, key_len); +} + static void sm4_ce_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { - const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm); + const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm); if (!crypto_simd_usable()) { - crypto_sm4_encrypt(tfm, out, in); + sm4_crypt_block(ctx->rkey_enc, out, in); } else { kernel_neon_begin(); sm4_ce_do_crypt(ctx->rkey_enc, out, in); @@ -32,10 +40,10 @@ static void sm4_ce_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) static void sm4_ce_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { - const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm); + const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm); if (!crypto_simd_usable()) { - crypto_sm4_decrypt(tfm, out, in); + sm4_crypt_block(ctx->rkey_dec, out, in); } else { kernel_neon_begin(); sm4_ce_do_crypt(ctx->rkey_dec, out, in); @@ -49,12 +57,12 @@ static struct crypto_alg sm4_ce_alg = { .cra_priority = 200, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = SM4_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypto_sm4_ctx), + .cra_ctxsize = sizeof(struct sm4_ctx), .cra_module = THIS_MODULE, .cra_u.cipher = { .cia_min_keysize = SM4_KEY_SIZE, .cia_max_keysize = SM4_KEY_SIZE, - .cia_setkey = crypto_sm4_set_key, + .cia_setkey = sm4_ce_setkey, .cia_encrypt = sm4_ce_encrypt, .cia_decrypt = sm4_ce_decrypt } diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 21fa330f498d..b83fb24954b7 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -33,8 +33,7 @@ * EL2. */ .macro __init_el2_timers - mrs x0, cnthctl_el2 - orr x0, x0, #3 // Enable EL1 physical timers + mov x0, #3 // Enable EL1 physical timers msr cnthctl_el2, x0 msr cntvoff_el2, xzr // Clear virtual offset .endm diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 993a27ea6f54..f98c91bbd7c1 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -41,6 +41,7 @@ void tag_clear_highpage(struct page *to); typedef struct page *pgtable_t; +int pfn_valid(unsigned long pfn); int pfn_is_map_memory(unsigned long pfn); #include <asm/memory.h> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index e9a2b8f27792..0ca72f5cda41 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -94,10 +94,14 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, kvm->arch.return_nisv_io_abort_to_user = true; break; case KVM_CAP_ARM_MTE: - if (!system_supports_mte() || kvm->created_vcpus) - return -EINVAL; - r = 0; - kvm->arch.mte_enabled = true; + mutex_lock(&kvm->lock); + if (!system_supports_mte() || kvm->created_vcpus) { + r = -EINVAL; + } else { + r = 0; + kvm->arch.mte_enabled = true; + } + mutex_unlock(&kvm->lock); break; default: r = -EINVAL; diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index d938ce95d3bd..a6ce991b1467 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -193,7 +193,7 @@ static bool range_is_memory(u64 start, u64 end) { struct kvm_mem_range r1, r2; - if (!find_mem_range(start, &r1) || !find_mem_range(end, &r2)) + if (!find_mem_range(start, &r1) || !find_mem_range(end - 1, &r2)) return false; if (r1.start != r2.start) return false; diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 8490ed2917ff..1fdb7bb7c198 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -219,6 +219,43 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) free_area_init(max_zone_pfns); } +int pfn_valid(unsigned long pfn) +{ + phys_addr_t addr = PFN_PHYS(pfn); + struct mem_section *ms; + + /* + * Ensure the upper PAGE_SHIFT bits are clear in the + * pfn. Else it might lead to false positives when + * some of the upper bits are set, but the lower bits + * match a valid pfn. + */ + if (PHYS_PFN(addr) != pfn) + return 0; + + if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) + return 0; + + ms = __pfn_to_section(pfn); + if (!valid_section(ms)) + return 0; + + /* + * ZONE_DEVICE memory does not have the memblock entries. + * memblock_is_map_memory() check for ZONE_DEVICE based + * addresses will always fail. Even the normal hotplugged + * memory will never have MEMBLOCK_NOMAP flag set in their + * memblock entries. Skip memblock search for all non early + * memory sections covering all of hotplug memory including + * both normal and ZONE_DEVICE based. + */ + if (!early_section(ms)) + return pfn_section_valid(ms, pfn); + + return memblock_is_memory(addr); +} +EXPORT_SYMBOL(pfn_valid); + int pfn_is_map_memory(unsigned long pfn) { phys_addr_t addr = PFN_PHYS(pfn); diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu index 29e946394fdb..277d61a09463 100644 --- a/arch/m68k/Kconfig.cpu +++ b/arch/m68k/Kconfig.cpu @@ -26,6 +26,7 @@ config COLDFIRE bool "Coldfire CPU family support" select ARCH_HAVE_CUSTOM_GPIO_H select CPU_HAS_NO_BITFIELDS + select CPU_HAS_NO_CAS select CPU_HAS_NO_MULDIV64 select GENERIC_CSUM select GPIOLIB @@ -39,6 +40,7 @@ config M68000 bool depends on !MMU select CPU_HAS_NO_BITFIELDS + select CPU_HAS_NO_CAS select CPU_HAS_NO_MULDIV64 select CPU_HAS_NO_UNALIGNED select GENERIC_CSUM @@ -54,6 +56,7 @@ config M68000 config MCPU32 bool select CPU_HAS_NO_BITFIELDS + select CPU_HAS_NO_CAS select CPU_HAS_NO_UNALIGNED select CPU_NO_EFFICIENT_FFS help @@ -383,7 +386,7 @@ config ADVANCED config RMW_INSNS bool "Use read-modify-write instructions" - depends on ADVANCED + depends on ADVANCED && !CPU_HAS_NO_CAS help This allows to use certain instructions that work with indivisible read-modify-write bus cycles. While this is faster than the @@ -450,6 +453,9 @@ config M68K_L2_CACHE config CPU_HAS_NO_BITFIELDS bool +config CPU_HAS_NO_CAS + bool + config CPU_HAS_NO_MULDIV64 bool diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index 0a2cacf7be08..5f536286f5fc 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -84,6 +84,7 @@ CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y +CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_ZONES=y # CONFIG_NF_CONNTRACK_PROCFS is not set @@ -323,7 +324,6 @@ CONFIG_CDROM_PKTCDVD=m CONFIG_ATA_OVER_ETH=m CONFIG_DUMMY_IRQ=m CONFIG_RAID_ATTRS=m -CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_BLK_DEV_SR=y @@ -502,6 +502,7 @@ CONFIG_ROOT_NFS=y CONFIG_NFSD=m CONFIG_NFSD_V3=y CONFIG_CIFS=m +# CONFIG_CIFS_STATS2 is not set # CONFIG_CIFS_DEBUG is not set CONFIG_CODA_FS=m CONFIG_NLS_CODEPAGE_437=y @@ -616,6 +617,7 @@ CONFIG_PRIME_NUMBERS=m CONFIG_CRC32_SELFTEST=m CONFIG_CRC64=m CONFIG_XZ_DEC_TEST=m +CONFIG_GLOB_SELFTEST=m CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y @@ -624,7 +626,6 @@ CONFIG_WW_MUTEX_SELFTEST=m CONFIG_EARLY_PRINTK=y CONFIG_KUNIT=m CONFIG_KUNIT_ALL_TESTS=m -CONFIG_TEST_LIST_SORT=m CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_SORT=m CONFIG_TEST_DIV64=m @@ -636,6 +637,7 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m +CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index 4dc6dcfaf28a..d9568644051a 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -80,6 +80,7 @@ CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y +CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_ZONES=y # CONFIG_NF_CONNTRACK_PROCFS is not set @@ -458,6 +459,7 @@ CONFIG_ROOT_NFS=y CONFIG_NFSD=m CONFIG_NFSD_V3=y CONFIG_CIFS=m +# CONFIG_CIFS_STATS2 is not set # CONFIG_CIFS_DEBUG is not set CONFIG_CODA_FS=m CONFIG_NLS_CODEPAGE_437=y @@ -580,7 +582,6 @@ CONFIG_WW_MUTEX_SELFTEST=m CONFIG_EARLY_PRINTK=y CONFIG_KUNIT=m CONFIG_KUNIT_ALL_TESTS=m -CONFIG_TEST_LIST_SORT=m CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_SORT=m CONFIG_TEST_DIV64=m @@ -592,6 +593,7 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m +CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index 23d910a692ab..dbf1960c6669 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -87,6 +87,7 @@ CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y +CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_ZONES=y # CONFIG_NF_CONNTRACK_PROCFS is not set @@ -324,7 +325,6 @@ CONFIG_CDROM_PKTCDVD=m CONFIG_ATA_OVER_ETH=m CONFIG_DUMMY_IRQ=m CONFIG_RAID_ATTRS=m -CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_BLK_DEV_SR=y @@ -480,6 +480,7 @@ CONFIG_ROOT_NFS=y CONFIG_NFSD=m CONFIG_NFSD_V3=y CONFIG_CIFS=m +# CONFIG_CIFS_STATS2 is not set # CONFIG_CIFS_DEBUG is not set CONFIG_CODA_FS=m CONFIG_NLS_CODEPAGE_437=y @@ -594,6 +595,7 @@ CONFIG_PRIME_NUMBERS=m CONFIG_CRC32_SELFTEST=m CONFIG_CRC64=m CONFIG_XZ_DEC_TEST=m +CONFIG_GLOB_SELFTEST=m CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y @@ -602,7 +604,6 @@ CONFIG_WW_MUTEX_SELFTEST=m CONFIG_EARLY_PRINTK=y CONFIG_KUNIT=m CONFIG_KUNIT_ALL_TESTS=m -CONFIG_TEST_LIST_SORT=m CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_SORT=m CONFIG_TEST_DIV64=m @@ -614,6 +615,7 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m +CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index 2c3f42833846..7620db3e33e7 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -77,6 +77,7 @@ CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y +CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_ZONES=y # CONFIG_NF_CONNTRACK_PROCFS is not set @@ -451,6 +452,7 @@ CONFIG_ROOT_NFS=y CONFIG_NFSD=m CONFIG_NFSD_V3=y CONFIG_CIFS=m +# CONFIG_CIFS_STATS2 is not set # CONFIG_CIFS_DEBUG is not set CONFIG_CODA_FS=m CONFIG_NLS_CODEPAGE_437=y @@ -573,7 +575,6 @@ CONFIG_WW_MUTEX_SELFTEST=m CONFIG_EARLY_PRINTK=y CONFIG_KUNIT=m CONFIG_KUNIT_ALL_TESTS=m -CONFIG_TEST_LIST_SORT=m CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_SORT=m CONFIG_TEST_DIV64=m @@ -585,6 +586,7 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m +CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index 5b1898d4b249..113a02d47ebb 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -79,6 +79,7 @@ CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y +CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_ZONES=y # CONFIG_NF_CONNTRACK_PROCFS is not set @@ -460,6 +461,7 @@ CONFIG_ROOT_NFS=y CONFIG_NFSD=m CONFIG_NFSD_V3=y CONFIG_CIFS=m +# CONFIG_CIFS_STATS2 is not set # CONFIG_CIFS_DEBUG is not set CONFIG_CODA_FS=m CONFIG_NLS_CODEPAGE_437=y @@ -582,7 +584,6 @@ CONFIG_WW_MUTEX_SELFTEST=m CONFIG_EARLY_PRINTK=y CONFIG_KUNIT=m CONFIG_KUNIT_ALL_TESTS=m -CONFIG_TEST_LIST_SORT=m CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_SORT=m CONFIG_TEST_DIV64=m @@ -594,6 +595,7 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m +CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index 9606ccd8dafa..a8e006e8da66 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -78,6 +78,7 @@ CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y +CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_ZONES=y # CONFIG_NF_CONNTRACK_PROCFS is not set @@ -315,7 +316,6 @@ CONFIG_CDROM_PKTCDVD=m CONFIG_ATA_OVER_ETH=m CONFIG_DUMMY_IRQ=m CONFIG_RAID_ATTRS=m -CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_BLK_DEV_SR=y @@ -482,6 +482,7 @@ CONFIG_ROOT_NFS=y CONFIG_NFSD=m CONFIG_NFSD_V3=y CONFIG_CIFS=m +# CONFIG_CIFS_STATS2 is not set # CONFIG_CIFS_DEBUG is not set CONFIG_CODA_FS=m CONFIG_NLS_CODEPAGE_437=y @@ -596,6 +597,7 @@ CONFIG_PRIME_NUMBERS=m CONFIG_CRC32_SELFTEST=m CONFIG_CRC64=m CONFIG_XZ_DEC_TEST=m +CONFIG_GLOB_SELFTEST=m CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y @@ -604,7 +606,6 @@ CONFIG_WW_MUTEX_SELFTEST=m CONFIG_EARLY_PRINTK=y CONFIG_KUNIT=m CONFIG_KUNIT_ALL_TESTS=m -CONFIG_TEST_LIST_SORT=m CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_SORT=m CONFIG_TEST_DIV64=m @@ -616,6 +617,7 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m +CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index 3175ba5007e1..b6655907a1f3 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -98,6 +98,7 @@ CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y +CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_ZONES=y # CONFIG_NF_CONNTRACK_PROCFS is not set @@ -344,7 +345,6 @@ CONFIG_CDROM_PKTCDVD=m CONFIG_ATA_OVER_ETH=m CONFIG_DUMMY_IRQ=m CONFIG_RAID_ATTRS=m -CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_BLK_DEV_SR=y @@ -567,6 +567,7 @@ CONFIG_ROOT_NFS=y CONFIG_NFSD=m CONFIG_NFSD_V3=y CONFIG_CIFS=m +# CONFIG_CIFS_STATS2 is not set # CONFIG_CIFS_DEBUG is not set CONFIG_CODA_FS=m CONFIG_NLS_CODEPAGE_437=y @@ -681,6 +682,7 @@ CONFIG_PRIME_NUMBERS=m CONFIG_CRC32_SELFTEST=m CONFIG_CRC64=m CONFIG_XZ_DEC_TEST=m +CONFIG_GLOB_SELFTEST=m CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y @@ -689,7 +691,6 @@ CONFIG_WW_MUTEX_SELFTEST=m CONFIG_EARLY_PRINTK=y CONFIG_KUNIT=m CONFIG_KUNIT_ALL_TESTS=m -CONFIG_TEST_LIST_SORT=m CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_SORT=m CONFIG_TEST_DIV64=m @@ -701,6 +702,7 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m +CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index 793085f00c99..563ba47db8c6 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -76,6 +76,7 @@ CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y +CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_ZONES=y # CONFIG_NF_CONNTRACK_PROCFS is not set @@ -450,6 +451,7 @@ CONFIG_ROOT_NFS=y CONFIG_NFSD=m CONFIG_NFSD_V3=y CONFIG_CIFS=m +# CONFIG_CIFS_STATS2 is not set # CONFIG_CIFS_DEBUG is not set CONFIG_CODA_FS=m CONFIG_NLS_CODEPAGE_437=y @@ -572,7 +574,6 @@ CONFIG_WW_MUTEX_SELFTEST=m CONFIG_EARLY_PRINTK=y CONFIG_KUNIT=m CONFIG_KUNIT_ALL_TESTS=m -CONFIG_TEST_LIST_SORT=m CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_SORT=m CONFIG_TEST_DIV64=m @@ -584,6 +585,7 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m +CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index 56fbac7943b2..9f1b44de4706 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -77,6 +77,7 @@ CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y +CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_ZONES=y # CONFIG_NF_CONNTRACK_PROCFS is not set @@ -451,6 +452,7 @@ CONFIG_ROOT_NFS=y CONFIG_NFSD=m CONFIG_NFSD_V3=y CONFIG_CIFS=m +# CONFIG_CIFS_STATS2 is not set # CONFIG_CIFS_DEBUG is not set CONFIG_CODA_FS=m CONFIG_NLS_CODEPAGE_437=y @@ -573,7 +575,6 @@ CONFIG_WW_MUTEX_SELFTEST=m CONFIG_EARLY_PRINTK=y CONFIG_KUNIT=m CONFIG_KUNIT_ALL_TESTS=m -CONFIG_TEST_LIST_SORT=m CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_SORT=m CONFIG_TEST_DIV64=m @@ -585,6 +586,7 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m +CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index 0e15431b65e2..1993433d0840 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -78,6 +78,7 @@ CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y +CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_ZONES=y # CONFIG_NF_CONNTRACK_PROCFS is not set @@ -314,7 +315,6 @@ CONFIG_CDROM_PKTCDVD=m CONFIG_ATA_OVER_ETH=m CONFIG_DUMMY_IRQ=m CONFIG_RAID_ATTRS=m -CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_BLK_DEV_SR=y @@ -469,6 +469,7 @@ CONFIG_ROOT_NFS=y CONFIG_NFSD=m CONFIG_NFSD_V3=y CONFIG_CIFS=m +# CONFIG_CIFS_STATS2 is not set # CONFIG_CIFS_DEBUG is not set CONFIG_CODA_FS=m CONFIG_NLS_CODEPAGE_437=y @@ -583,6 +584,7 @@ CONFIG_PRIME_NUMBERS=m CONFIG_CRC32_SELFTEST=m CONFIG_CRC64=m CONFIG_XZ_DEC_TEST=m +CONFIG_GLOB_SELFTEST=m CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y @@ -591,7 +593,6 @@ CONFIG_WW_MUTEX_SELFTEST=m CONFIG_EARLY_PRINTK=y CONFIG_KUNIT=m CONFIG_KUNIT_ALL_TESTS=m -CONFIG_TEST_LIST_SORT=m CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_SORT=m CONFIG_TEST_DIV64=m @@ -603,6 +604,7 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m +CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index 3490a05f29b8..56dbc63cef5b 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -74,6 +74,7 @@ CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y +CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_ZONES=y # CONFIG_NF_CONNTRACK_PROCFS is not set @@ -453,6 +454,7 @@ CONFIG_ROOT_NFS=y CONFIG_NFSD=m CONFIG_NFSD_V3=y CONFIG_CIFS=m +# CONFIG_CIFS_STATS2 is not set # CONFIG_CIFS_DEBUG is not set CONFIG_CODA_FS=m CONFIG_NLS_CODEPAGE_437=y @@ -574,7 +576,6 @@ CONFIG_TEST_LOCKUP=m CONFIG_WW_MUTEX_SELFTEST=m CONFIG_KUNIT=m CONFIG_KUNIT_ALL_TESTS=m -CONFIG_TEST_LIST_SORT=m CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_SORT=m CONFIG_TEST_DIV64=m @@ -586,6 +587,7 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m +CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index 4e92c8c332fc..6bd1bba81ac3 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -74,6 +74,7 @@ CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y +CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_ZONES=y # CONFIG_NF_CONNTRACK_PROCFS is not set @@ -452,6 +453,7 @@ CONFIG_ROOT_NFS=y CONFIG_NFSD=m CONFIG_NFSD_V3=y CONFIG_CIFS=m +# CONFIG_CIFS_STATS2 is not set # CONFIG_CIFS_DEBUG is not set CONFIG_CODA_FS=m CONFIG_NLS_CODEPAGE_437=y @@ -574,7 +576,6 @@ CONFIG_WW_MUTEX_SELFTEST=m CONFIG_EARLY_PRINTK=y CONFIG_KUNIT=m CONFIG_KUNIT_ALL_TESTS=m -CONFIG_TEST_LIST_SORT=m CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_SORT=m CONFIG_TEST_DIV64=m @@ -586,6 +587,7 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m +CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m diff --git a/arch/m68k/emu/nfeth.c b/arch/m68k/emu/nfeth.c index d2875e32abfc..79e55421cfb1 100644 --- a/arch/m68k/emu/nfeth.c +++ b/arch/m68k/emu/nfeth.c @@ -254,8 +254,8 @@ static void __exit nfeth_cleanup(void) for (i = 0; i < MAX_UNIT; i++) { if (nfeth_dev[i]) { - unregister_netdev(nfeth_dev[0]); - free_netdev(nfeth_dev[0]); + unregister_netdev(nfeth_dev[i]); + free_netdev(nfeth_dev[i]); } } free_irq(nfEtherIRQ, nfeth_interrupt); diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h index 8637bf8a2f65..cfba83d230fd 100644 --- a/arch/m68k/include/asm/atomic.h +++ b/arch/m68k/include/asm/atomic.h @@ -48,7 +48,7 @@ static inline int arch_atomic_##op##_return(int i, atomic_t *v) \ " casl %2,%1,%0\n" \ " jne 1b" \ : "+m" (*v), "=&d" (t), "=&d" (tmp) \ - : "g" (i), "2" (arch_atomic_read(v))); \ + : "di" (i), "2" (arch_atomic_read(v))); \ return t; \ } @@ -63,7 +63,7 @@ static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \ " casl %2,%1,%0\n" \ " jne 1b" \ : "+m" (*v), "=&d" (t), "=&d" (tmp) \ - : "g" (i), "2" (arch_atomic_read(v))); \ + : "di" (i), "2" (arch_atomic_read(v))); \ return tmp; \ } diff --git a/arch/mips/ath25/ar2315.c b/arch/mips/ath25/ar2315.c index 9dbed7b5ea76..76e43a73ba1b 100644 --- a/arch/mips/ath25/ar2315.c +++ b/arch/mips/ath25/ar2315.c @@ -69,24 +69,24 @@ static void ar2315_misc_irq_handler(struct irq_desc *desc) { u32 pending = ar2315_rst_reg_read(AR2315_ISR) & ar2315_rst_reg_read(AR2315_IMR); - unsigned nr, misc_irq = 0; + unsigned nr; + int ret = 0; if (pending) { struct irq_domain *domain = irq_desc_get_handler_data(desc); nr = __ffs(pending); - misc_irq = irq_find_mapping(domain, nr); - } - if (misc_irq) { if (nr == AR2315_MISC_IRQ_GPIO) ar2315_rst_reg_write(AR2315_ISR, AR2315_ISR_GPIO); else if (nr == AR2315_MISC_IRQ_WATCHDOG) ar2315_rst_reg_write(AR2315_ISR, AR2315_ISR_WD); - generic_handle_irq(misc_irq); - } else { - spurious_interrupt(); + + ret = generic_handle_domain_irq(domain, nr); } + + if (!pending || ret) + spurious_interrupt(); } static void ar2315_misc_irq_unmask(struct irq_data *d) diff --git a/arch/mips/ath25/ar5312.c b/arch/mips/ath25/ar5312.c index 23c879f4b734..822b639dbd1e 100644 --- a/arch/mips/ath25/ar5312.c +++ b/arch/mips/ath25/ar5312.c @@ -73,22 +73,21 @@ static void ar5312_misc_irq_handler(struct irq_desc *desc) { u32 pending = ar5312_rst_reg_read(AR5312_ISR) & ar5312_rst_reg_read(AR5312_IMR); - unsigned nr, misc_irq = 0; + unsigned nr; + int ret = 0; if (pending) { struct irq_domain *domain = irq_desc_get_handler_data(desc); nr = __ffs(pending); - misc_irq = irq_find_mapping(domain, nr); - } - if (misc_irq) { - generic_handle_irq(misc_irq); + ret = generic_handle_domain_irq(domain, nr); if (nr == AR5312_MISC_IRQ_TIMER) ar5312_rst_reg_read(AR5312_TIMER); - } else { - spurious_interrupt(); } + + if (!pending || ret) + spurious_interrupt(); } /* Enable the specified AR5312_MISC_IRQ interrupt */ diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c index acfbdc01b0ac..b732495f138a 100644 --- a/arch/mips/lantiq/irq.c +++ b/arch/mips/lantiq/irq.c @@ -300,7 +300,7 @@ static void ltq_hw_irq_handler(struct irq_desc *desc) */ irq = __fls(irq); hwirq = irq + MIPS_CPU_IRQ_CASCADE + (INT_NUM_IM_OFFSET * module); - generic_handle_irq(irq_linear_revmap(ltq_domain, hwirq)); + generic_handle_domain_irq(ltq_domain, hwirq); /* if this is a EBU irq, we need to ack it or get a deadlock */ if (irq == LTQ_ICU_EBU_IRQ && !module && LTQ_EBU_PCC_ISTAT != 0) diff --git a/arch/mips/pci/pci-ar2315.c b/arch/mips/pci/pci-ar2315.c index c1a655aee599..9a4bfb4e63e3 100644 --- a/arch/mips/pci/pci-ar2315.c +++ b/arch/mips/pci/pci-ar2315.c @@ -337,14 +337,12 @@ static void ar2315_pci_irq_handler(struct irq_desc *desc) struct ar2315_pci_ctrl *apc = irq_desc_get_handler_data(desc); u32 pending = ar2315_pci_reg_read(apc, AR2315_PCI_ISR) & ar2315_pci_reg_read(apc, AR2315_PCI_IMR); - unsigned pci_irq = 0; + int ret = 0; if (pending) - pci_irq = irq_find_mapping(apc->domain, __ffs(pending)); + ret = generic_handle_domain_irq(apc->domain, __ffs(pending)); - if (pci_irq) - generic_handle_irq(pci_irq); - else + if (!pending || ret) spurious_interrupt(); } diff --git a/arch/mips/pci/pci-rt3883.c b/arch/mips/pci/pci-rt3883.c index c48e23cf5b5e..d3c947fa2969 100644 --- a/arch/mips/pci/pci-rt3883.c +++ b/arch/mips/pci/pci-rt3883.c @@ -140,10 +140,9 @@ static void rt3883_pci_irq_handler(struct irq_desc *desc) } while (pending) { - unsigned irq, bit = __ffs(pending); + unsigned bit = __ffs(pending); - irq = irq_find_mapping(rpc->irq_domain, bit); - generic_handle_irq(irq); + generic_handle_domain_irq(rpc->irq_domain, bit); pending &= ~BIT(bit); } diff --git a/arch/mips/ralink/irq.c b/arch/mips/ralink/irq.c index 220ca0cd7945..fa353bc13947 100644 --- a/arch/mips/ralink/irq.c +++ b/arch/mips/ralink/irq.c @@ -100,7 +100,7 @@ static void ralink_intc_irq_handler(struct irq_desc *desc) if (pending) { struct irq_domain *domain = irq_desc_get_handler_data(desc); - generic_handle_irq(irq_find_mapping(domain, __ffs(pending))); + generic_handle_domain_irq(domain, __ffs(pending)); } else { spurious_interrupt(); } diff --git a/arch/mips/sgi-ip27/ip27-irq.c b/arch/mips/sgi-ip27/ip27-irq.c index 95c1bff1ab9f..a0dd3bd2b81b 100644 --- a/arch/mips/sgi-ip27/ip27-irq.c +++ b/arch/mips/sgi-ip27/ip27-irq.c @@ -190,7 +190,7 @@ static void ip27_do_irq_mask0(struct irq_desc *desc) unsigned long *mask = per_cpu(irq_enable_mask, cpu); struct irq_domain *domain; u64 pend0; - int irq; + int ret; /* copied from Irix intpend0() */ pend0 = LOCAL_HUB_L(PI_INT_PEND0); @@ -216,10 +216,8 @@ static void ip27_do_irq_mask0(struct irq_desc *desc) #endif { domain = irq_desc_get_handler_data(desc); - irq = irq_linear_revmap(domain, __ffs(pend0)); - if (irq) - generic_handle_irq(irq); - else + ret = generic_handle_domain_irq(domain, __ffs(pend0)); + if (ret) spurious_interrupt(); } @@ -232,7 +230,7 @@ static void ip27_do_irq_mask1(struct irq_desc *desc) unsigned long *mask = per_cpu(irq_enable_mask, cpu); struct irq_domain *domain; u64 pend1; - int irq; + int ret; /* copied from Irix intpend0() */ pend1 = LOCAL_HUB_L(PI_INT_PEND1); @@ -242,10 +240,8 @@ static void ip27_do_irq_mask1(struct irq_desc *desc) return; domain = irq_desc_get_handler_data(desc); - irq = irq_linear_revmap(domain, __ffs(pend1) + 64); - if (irq) - generic_handle_irq(irq); - else + ret = generic_handle_domain_irq(domain, __ffs(pend1) + 64); + if (ret) spurious_interrupt(); LOCAL_HUB_L(PI_INT_PEND1); diff --git a/arch/mips/sgi-ip30/ip30-irq.c b/arch/mips/sgi-ip30/ip30-irq.c index ba87704073c8..423c32cb66ed 100644 --- a/arch/mips/sgi-ip30/ip30-irq.c +++ b/arch/mips/sgi-ip30/ip30-irq.c @@ -99,7 +99,7 @@ static void ip30_normal_irq(struct irq_desc *desc) int cpu = smp_processor_id(); struct irq_domain *domain; u64 pend, mask; - int irq; + int ret; pend = heart_read(&heart_regs->isr); mask = (heart_read(&heart_regs->imr[cpu]) & @@ -130,10 +130,8 @@ static void ip30_normal_irq(struct irq_desc *desc) #endif { domain = irq_desc_get_handler_data(desc); - irq = irq_linear_revmap(domain, __ffs(pend)); - if (irq) - generic_handle_irq(irq); - else + ret = generic_handle_domain_irq(domain, __ffs(pend)); + if (ret) spurious_interrupt(); } } diff --git a/arch/nios2/kernel/irq.c b/arch/nios2/kernel/irq.c index c6a1a9f6ac42..6b7890e5f7af 100644 --- a/arch/nios2/kernel/irq.c +++ b/arch/nios2/kernel/irq.c @@ -19,11 +19,9 @@ static u32 ienable; asmlinkage void do_IRQ(int hwirq, struct pt_regs *regs) { struct pt_regs *oldregs = set_irq_regs(regs); - int irq; irq_enter(); - irq = irq_find_mapping(NULL, hwirq); - generic_handle_irq(irq); + generic_handle_domain_irq(NULL, hwirq); irq_exit(); set_irq_regs(oldregs); diff --git a/arch/parisc/include/asm/string.h b/arch/parisc/include/asm/string.h index 4a0c9dbd62fd..f6e1132f4e35 100644 --- a/arch/parisc/include/asm/string.h +++ b/arch/parisc/include/asm/string.h @@ -8,19 +8,4 @@ extern void * memset(void *, int, size_t); #define __HAVE_ARCH_MEMCPY void * memcpy(void * dest,const void *src,size_t count); -#define __HAVE_ARCH_STRLEN -extern size_t strlen(const char *s); - -#define __HAVE_ARCH_STRCPY -extern char *strcpy(char *dest, const char *src); - -#define __HAVE_ARCH_STRNCPY -extern char *strncpy(char *dest, const char *src, size_t count); - -#define __HAVE_ARCH_STRCAT -extern char *strcat(char *dest, const char *src); - -#define __HAVE_ARCH_MEMSET -extern void *memset(void *, int, size_t); - #endif diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c index 8ed409ecec93..e8a6a751dfd8 100644 --- a/arch/parisc/kernel/parisc_ksyms.c +++ b/arch/parisc/kernel/parisc_ksyms.c @@ -17,10 +17,6 @@ #include <linux/string.h> EXPORT_SYMBOL(memset); -EXPORT_SYMBOL(strlen); -EXPORT_SYMBOL(strcpy); -EXPORT_SYMBOL(strncpy); -EXPORT_SYMBOL(strcat); #include <linux/atomic.h> EXPORT_SYMBOL(__xchg8); diff --git a/arch/parisc/lib/Makefile b/arch/parisc/lib/Makefile index 2d7a9974dbae..7b197667faf6 100644 --- a/arch/parisc/lib/Makefile +++ b/arch/parisc/lib/Makefile @@ -3,7 +3,7 @@ # Makefile for parisc-specific library files # -lib-y := lusercopy.o bitops.o checksum.o io.o memcpy.o \ - ucmpdi2.o delay.o string.o +lib-y := lusercopy.o bitops.o checksum.o io.o memset.o memcpy.o \ + ucmpdi2.o delay.o obj-y := iomap.o diff --git a/arch/parisc/lib/memset.c b/arch/parisc/lib/memset.c new file mode 100644 index 000000000000..133e4809859a --- /dev/null +++ b/arch/parisc/lib/memset.c @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#include <linux/types.h> +#include <asm/string.h> + +#define OPSIZ (BITS_PER_LONG/8) +typedef unsigned long op_t; + +void * +memset (void *dstpp, int sc, size_t len) +{ + unsigned int c = sc; + long int dstp = (long int) dstpp; + + if (len >= 8) + { + size_t xlen; + op_t cccc; + + cccc = (unsigned char) c; + cccc |= cccc << 8; + cccc |= cccc << 16; + if (OPSIZ > 4) + /* Do the shift in two steps to avoid warning if long has 32 bits. */ + cccc |= (cccc << 16) << 16; + + /* There are at least some bytes to set. + No need to test for LEN == 0 in this alignment loop. */ + while (dstp % OPSIZ != 0) + { + ((unsigned char *) dstp)[0] = c; + dstp += 1; + len -= 1; + } + + /* Write 8 `op_t' per iteration until less than 8 `op_t' remain. */ + xlen = len / (OPSIZ * 8); + while (xlen > 0) + { + ((op_t *) dstp)[0] = cccc; + ((op_t *) dstp)[1] = cccc; + ((op_t *) dstp)[2] = cccc; + ((op_t *) dstp)[3] = cccc; + ((op_t *) dstp)[4] = cccc; + ((op_t *) dstp)[5] = cccc; + ((op_t *) dstp)[6] = cccc; + ((op_t *) dstp)[7] = cccc; + dstp += 8 * OPSIZ; + xlen -= 1; + } + len %= OPSIZ * 8; + + /* Write 1 `op_t' per iteration until less than OPSIZ bytes remain. */ + xlen = len / OPSIZ; + while (xlen > 0) + { + ((op_t *) dstp)[0] = cccc; + dstp += OPSIZ; + xlen -= 1; + } + len %= OPSIZ; + } + + /* Write the last few bytes. */ + while (len > 0) + { + ((unsigned char *) dstp)[0] = c; + dstp += 1; + len -= 1; + } + + return dstpp; +} diff --git a/arch/parisc/lib/string.S b/arch/parisc/lib/string.S deleted file mode 100644 index 4a64264427a6..000000000000 --- a/arch/parisc/lib/string.S +++ /dev/null @@ -1,136 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * PA-RISC assembly string functions - * - * Copyright (C) 2019 Helge Deller <deller@gmx.de> - */ - -#include <asm/assembly.h> -#include <linux/linkage.h> - - .section .text.hot - .level PA_ASM_LEVEL - - t0 = r20 - t1 = r21 - t2 = r22 - -ENTRY_CFI(strlen, frame=0,no_calls) - or,COND(<>) arg0,r0,ret0 - b,l,n .Lstrlen_null_ptr,r0 - depwi 0,31,2,ret0 - cmpb,COND(<>) arg0,ret0,.Lstrlen_not_aligned - ldw,ma 4(ret0),t0 - cmpib,tr 0,r0,.Lstrlen_loop - uxor,nbz r0,t0,r0 -.Lstrlen_not_aligned: - uaddcm arg0,ret0,t1 - shladd t1,3,r0,t1 - mtsar t1 - depwi -1,%sar,32,t0 - uxor,nbz r0,t0,r0 -.Lstrlen_loop: - b,l,n .Lstrlen_end_loop,r0 - ldw,ma 4(ret0),t0 - cmpib,tr 0,r0,.Lstrlen_loop - uxor,nbz r0,t0,r0 -.Lstrlen_end_loop: - extrw,u,<> t0,7,8,r0 - addib,tr,n -3,ret0,.Lstrlen_out - extrw,u,<> t0,15,8,r0 - addib,tr,n -2,ret0,.Lstrlen_out - extrw,u,<> t0,23,8,r0 - addi -1,ret0,ret0 -.Lstrlen_out: - bv r0(rp) - uaddcm ret0,arg0,ret0 -.Lstrlen_null_ptr: - bv,n r0(rp) -ENDPROC_CFI(strlen) - - -ENTRY_CFI(strcpy, frame=0,no_calls) - ldb 0(arg1),t0 - stb t0,0(arg0) - ldo 0(arg0),ret0 - ldo 1(arg1),t1 - cmpb,= r0,t0,2f - ldo 1(arg0),t2 -1: ldb 0(t1),arg1 - stb arg1,0(t2) - ldo 1(t1),t1 - cmpb,<> r0,arg1,1b - ldo 1(t2),t2 -2: bv,n r0(rp) -ENDPROC_CFI(strcpy) - - -ENTRY_CFI(strncpy, frame=0,no_calls) - ldb 0(arg1),t0 - stb t0,0(arg0) - ldo 1(arg1),t1 - ldo 0(arg0),ret0 - cmpb,= r0,t0,2f - ldo 1(arg0),arg1 -1: ldo -1(arg2),arg2 - cmpb,COND(=),n r0,arg2,2f - ldb 0(t1),arg0 - stb arg0,0(arg1) - ldo 1(t1),t1 - cmpb,<> r0,arg0,1b - ldo 1(arg1),arg1 -2: bv,n r0(rp) -ENDPROC_CFI(strncpy) - - -ENTRY_CFI(strcat, frame=0,no_calls) - ldb 0(arg0),t0 - cmpb,= t0,r0,2f - ldo 0(arg0),ret0 - ldo 1(arg0),arg0 -1: ldb 0(arg0),t1 - cmpb,<>,n r0,t1,1b - ldo 1(arg0),arg0 -2: ldb 0(arg1),t2 - stb t2,0(arg0) - ldo 1(arg0),arg0 - ldb 0(arg1),t0 - cmpb,<> r0,t0,2b - ldo 1(arg1),arg1 - bv,n r0(rp) -ENDPROC_CFI(strcat) - - -ENTRY_CFI(memset, frame=0,no_calls) - copy arg0,ret0 - cmpb,COND(=) r0,arg0,4f - copy arg0,t2 - cmpb,COND(=) r0,arg2,4f - ldo -1(arg2),arg3 - subi -1,arg3,t0 - subi 0,t0,t1 - cmpiclr,COND(>=) 0,t1,arg2 - ldo -1(t1),arg2 - extru arg2,31,2,arg0 -2: stb arg1,0(t2) - ldo 1(t2),t2 - addib,>= -1,arg0,2b - ldo -1(arg3),arg3 - cmpiclr,COND(<=) 4,arg2,r0 - b,l,n 4f,r0 -#ifdef CONFIG_64BIT - depd,* r0,63,2,arg2 -#else - depw r0,31,2,arg2 -#endif - ldo 1(t2),t2 -3: stb arg1,-1(t2) - stb arg1,0(t2) - stb arg1,1(t2) - stb arg1,2(t2) - addib,COND(>) -4,arg2,3b - ldo 4(t2),t2 -4: bv,n r0(rp) -ENDPROC_CFI(memset) - - .end diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index 64201125a287..d4b145b279f6 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -4,6 +4,8 @@ #include <asm/bug.h> #include <asm/book3s/32/mmu-hash.h> +#include <asm/mmu.h> +#include <asm/synch.h> #ifndef __ASSEMBLY__ @@ -28,6 +30,15 @@ static inline void kuep_lock(void) return; update_user_segments(mfsr(0) | SR_NX); + /* + * This isync() shouldn't be necessary as the kernel is not excepted to + * run any instruction in userspace soon after the update of segments, + * but hash based cores (at least G3) seem to exhibit a random + * behaviour when the 'isync' is not there. 603 cores don't have this + * behaviour so don't do the 'isync' as it saves several CPU cycles. + */ + if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) + isync(); /* Context sync required after mtsr() */ } static inline void kuep_unlock(void) @@ -36,6 +47,15 @@ static inline void kuep_unlock(void) return; update_user_segments(mfsr(0) & ~SR_NX); + /* + * This isync() shouldn't be necessary as a 'rfi' will soon be executed + * to return to userspace, but hash based cores (at least G3) seem to + * exhibit a random behaviour when the 'isync' is not there. 603 cores + * don't have this behaviour so don't do the 'isync' as it saves several + * CPU cycles. + */ + if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) + isync(); /* Context sync required after mtsr() */ } #ifdef CONFIG_PPC_KUAP diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index d4bdf7d274ac..6b800d3e2681 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -583,6 +583,9 @@ DECLARE_INTERRUPT_HANDLER_NMI(hmi_exception_realmode); DECLARE_INTERRUPT_HANDLER_ASYNC(TAUException); +/* irq.c */ +DECLARE_INTERRUPT_HANDLER_ASYNC(do_IRQ); + void __noreturn unrecoverable_exception(struct pt_regs *regs); void replay_system_reset(void); diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h index 4982f3711fc3..2b3278534bc1 100644 --- a/arch/powerpc/include/asm/irq.h +++ b/arch/powerpc/include/asm/irq.h @@ -52,7 +52,7 @@ extern void *mcheckirq_ctx[NR_CPUS]; extern void *hardirq_ctx[NR_CPUS]; extern void *softirq_ctx[NR_CPUS]; -extern void do_IRQ(struct pt_regs *regs); +void __do_IRQ(struct pt_regs *regs); extern void __init init_IRQ(void); extern void __do_irq(struct pt_regs *regs); diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 3e5d470a6155..14422e851494 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -70,6 +70,22 @@ struct pt_regs unsigned long __pad[4]; /* Maintain 16 byte interrupt stack alignment */ }; #endif +#if defined(CONFIG_PPC32) && defined(CONFIG_BOOKE) + struct { /* Must be a multiple of 16 bytes */ + unsigned long mas0; + unsigned long mas1; + unsigned long mas2; + unsigned long mas3; + unsigned long mas6; + unsigned long mas7; + unsigned long srr0; + unsigned long srr1; + unsigned long csrr0; + unsigned long csrr1; + unsigned long dsrr0; + unsigned long dsrr1; + }; +#endif }; #endif diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index a47eefa09bcb..5bee245d832b 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -309,24 +309,21 @@ int main(void) STACK_PT_REGS_OFFSET(STACK_REGS_IAMR, iamr); #endif -#if defined(CONFIG_PPC32) -#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) - DEFINE(EXC_LVL_SIZE, STACK_EXC_LVL_FRAME_SIZE); - DEFINE(MAS0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas0)); +#if defined(CONFIG_PPC32) && defined(CONFIG_BOOKE) + STACK_PT_REGS_OFFSET(MAS0, mas0); /* we overload MMUCR for 44x on MAS0 since they are mutually exclusive */ - DEFINE(MMUCR, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas0)); - DEFINE(MAS1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas1)); - DEFINE(MAS2, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas2)); - DEFINE(MAS3, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas3)); - DEFINE(MAS6, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas6)); - DEFINE(MAS7, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas7)); - DEFINE(_SRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, srr0)); - DEFINE(_SRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, srr1)); - DEFINE(_CSRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, csrr0)); - DEFINE(_CSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, csrr1)); - DEFINE(_DSRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr0)); - DEFINE(_DSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr1)); -#endif + STACK_PT_REGS_OFFSET(MMUCR, mas0); + STACK_PT_REGS_OFFSET(MAS1, mas1); + STACK_PT_REGS_OFFSET(MAS2, mas2); + STACK_PT_REGS_OFFSET(MAS3, mas3); + STACK_PT_REGS_OFFSET(MAS6, mas6); + STACK_PT_REGS_OFFSET(MAS7, mas7); + STACK_PT_REGS_OFFSET(_SRR0, srr0); + STACK_PT_REGS_OFFSET(_SRR1, srr1); + STACK_PT_REGS_OFFSET(_CSRR0, csrr0); + STACK_PT_REGS_OFFSET(_CSRR1, csrr1); + STACK_PT_REGS_OFFSET(_DSRR0, dsrr0); + STACK_PT_REGS_OFFSET(_DSRR1, dsrr1); #endif /* About the CPU features table */ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 4aec59a77d4c..37859e62a8dc 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -812,7 +812,6 @@ __start_interrupts: * syscall register convention is in Documentation/powerpc/syscall64-abi.rst */ EXC_VIRT_BEGIN(system_call_vectored, 0x3000, 0x1000) -1: /* SCV 0 */ mr r9,r13 GET_PACA(r13) @@ -842,10 +841,12 @@ EXC_VIRT_BEGIN(system_call_vectored, 0x3000, 0x1000) b system_call_vectored_sigill #endif .endr -2: EXC_VIRT_END(system_call_vectored, 0x3000, 0x1000) -SOFT_MASK_TABLE(1b, 2b) // Treat scv vectors as soft-masked, see comment above. +// Treat scv vectors as soft-masked, see comment above. +// Use absolute values rather than labels here, so they don't get relocated, +// because this code runs unrelocated. +SOFT_MASK_TABLE(0xc000000000003000, 0xc000000000004000) #ifdef CONFIG_RELOCATABLE TRAMP_VIRT_BEGIN(system_call_vectored_tramp) diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 764edd860ed4..68e5c0a7e99d 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -300,7 +300,7 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) EXCEPTION_PROLOG_1 EXCEPTION_PROLOG_2 INTERRUPT_DATA_STORAGE DataAccess handle_dar_dsisr=1 prepare_transfer_to_handler - lwz r5, _DSISR(r11) + lwz r5, _DSISR(r1) andis. r0, r5, DSISR_DABRMATCH@h bne- 1f bl do_page_fault diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 87b806e8eded..e5503420b6c6 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -168,20 +168,18 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) /* only on e500mc */ #define DBG_STACK_BASE dbgirq_ctx -#define EXC_LVL_FRAME_OVERHEAD (THREAD_SIZE - INT_FRAME_SIZE - EXC_LVL_SIZE) - #ifdef CONFIG_SMP #define BOOKE_LOAD_EXC_LEVEL_STACK(level) \ mfspr r8,SPRN_PIR; \ slwi r8,r8,2; \ addis r8,r8,level##_STACK_BASE@ha; \ lwz r8,level##_STACK_BASE@l(r8); \ - addi r8,r8,EXC_LVL_FRAME_OVERHEAD; + addi r8,r8,THREAD_SIZE - INT_FRAME_SIZE; #else #define BOOKE_LOAD_EXC_LEVEL_STACK(level) \ lis r8,level##_STACK_BASE@ha; \ lwz r8,level##_STACK_BASE@l(r8); \ - addi r8,r8,EXC_LVL_FRAME_OVERHEAD; + addi r8,r8,THREAD_SIZE - INT_FRAME_SIZE; #endif /* @@ -208,7 +206,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) mtmsr r11; \ mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\ lwz r11, TASK_STACK - THREAD(r11); /* this thread's kernel stack */\ - addi r11,r11,EXC_LVL_FRAME_OVERHEAD; /* allocate stack frame */\ + addi r11,r11,THREAD_SIZE - INT_FRAME_SIZE; /* allocate stack frame */\ beq 1f; \ /* COMING FROM USER MODE */ \ stw r9,_CCR(r11); /* save CR */\ @@ -516,24 +514,5 @@ label: bl kernel_fp_unavailable_exception; \ b interrupt_return -#else /* __ASSEMBLY__ */ -struct exception_regs { - unsigned long mas0; - unsigned long mas1; - unsigned long mas2; - unsigned long mas3; - unsigned long mas6; - unsigned long mas7; - unsigned long srr0; - unsigned long srr1; - unsigned long csrr0; - unsigned long csrr1; - unsigned long dsrr0; - unsigned long dsrr1; -}; - -/* ensure this structure is always sized to a multiple of the stack alignment */ -#define STACK_EXC_LVL_FRAME_SIZE ALIGN(sizeof (struct exception_regs), 16) - #endif /* __ASSEMBLY__ */ #endif /* __HEAD_BOOKE_H__ */ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 91e63eac4e8f..551b653228c4 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -750,7 +750,7 @@ void __do_irq(struct pt_regs *regs) trace_irq_exit(regs); } -DEFINE_INTERRUPT_HANDLER_ASYNC(do_IRQ) +void __do_IRQ(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); void *cursp, *irqsp, *sirqsp; @@ -774,6 +774,11 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(do_IRQ) set_irq_regs(old_regs); } +DEFINE_INTERRUPT_HANDLER_ASYNC(do_IRQ) +{ + __do_IRQ(regs); +} + static void *__init alloc_vm_stack(void) { return __vmalloc_node(THREAD_SIZE, THREAD_ALIGN, THREADINFO_GFP, diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index cbc28d1a2e1b..7a7cd6bda53e 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -292,7 +292,8 @@ int kprobe_handler(struct pt_regs *regs) if (user_mode(regs)) return 0; - if (!(regs->msr & MSR_IR) || !(regs->msr & MSR_DR)) + if (!IS_ENABLED(CONFIG_BOOKE) && + (!(regs->msr & MSR_IR) || !(regs->msr & MSR_DR))) return 0; /* diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 5ff0e55d0db1..defecb3b1b15 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -1167,7 +1167,7 @@ static int __init topology_init(void) * CPU. For instance, the boot cpu might never be valid * for hotplugging. */ - if (smp_ops->cpu_offline_self) + if (smp_ops && smp_ops->cpu_offline_self) c->hotpluggable = 1; #endif diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index e45ce427bffb..c487ba5a6e11 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -586,7 +586,7 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt) #if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC) if (atomic_read(&ppc_n_lost_interrupts) != 0) - do_IRQ(regs); + __do_IRQ(regs); #endif old_regs = set_irq_regs(regs); diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index dfbce527c98e..d56254f05e17 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1104,7 +1104,7 @@ DEFINE_INTERRUPT_HANDLER(RunModeException) _exception(SIGTRAP, regs, TRAP_UNK, 0); } -DEFINE_INTERRUPT_HANDLER(single_step_exception) +static void __single_step_exception(struct pt_regs *regs) { clear_single_step(regs); clear_br_trace(regs); @@ -1121,6 +1121,11 @@ DEFINE_INTERRUPT_HANDLER(single_step_exception) _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); } +DEFINE_INTERRUPT_HANDLER(single_step_exception) +{ + __single_step_exception(regs); +} + /* * After we have successfully emulated an instruction, we have to * check if the instruction was being single-stepped, and if so, @@ -1130,7 +1135,7 @@ DEFINE_INTERRUPT_HANDLER(single_step_exception) static void emulate_single_step(struct pt_regs *regs) { if (single_stepping(regs)) - single_step_exception(regs); + __single_step_exception(regs); } static inline int __parse_fpscr(unsigned long fpscr) diff --git a/arch/powerpc/mm/pageattr.c b/arch/powerpc/mm/pageattr.c index 0876216ceee6..edea388e9d3f 100644 --- a/arch/powerpc/mm/pageattr.c +++ b/arch/powerpc/mm/pageattr.c @@ -18,16 +18,12 @@ /* * Updates the attributes of a page in three steps: * - * 1. invalidate the page table entry - * 2. flush the TLB - * 3. install the new entry with the updated attributes - * - * Invalidating the pte means there are situations where this will not work - * when in theory it should. - * For example: - * - removing write from page whilst it is being executed - * - setting a page read-only whilst it is being read by another CPU + * 1. take the page_table_lock + * 2. install the new entry with the updated attributes + * 3. flush the TLB * + * This sequence is safe against concurrent updates, and also allows updating the + * attributes of a page currently being executed or accessed. */ static int change_page_attr(pte_t *ptep, unsigned long addr, void *data) { @@ -36,9 +32,7 @@ static int change_page_attr(pte_t *ptep, unsigned long addr, void *data) spin_lock(&init_mm.page_table_lock); - /* invalidate the PTE so it's safe to modify */ - pte = ptep_get_and_clear(&init_mm, addr, ptep); - flush_tlb_kernel_range(addr, addr + PAGE_SIZE); + pte = ptep_get(ptep); /* modify the PTE bits as desired, then apply */ switch (action) { @@ -59,11 +53,14 @@ static int change_page_attr(pte_t *ptep, unsigned long addr, void *data) break; } - set_pte_at(&init_mm, addr, ptep, pte); + pte_update(&init_mm, addr, ptep, ~0UL, pte_val(pte), 0); /* See ptesync comment in radix__set_pte_at() */ if (radix_enabled()) asm volatile("ptesync": : :"memory"); + + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); + spin_unlock(&init_mm.page_table_lock); return 0; diff --git a/arch/powerpc/platforms/4xx/uic.c b/arch/powerpc/platforms/4xx/uic.c index 36fb66ce54cf..89e2587b1a59 100644 --- a/arch/powerpc/platforms/4xx/uic.c +++ b/arch/powerpc/platforms/4xx/uic.c @@ -198,7 +198,6 @@ static void uic_irq_cascade(struct irq_desc *desc) struct uic *uic = irq_desc_get_handler_data(desc); u32 msr; int src; - int subvirq; raw_spin_lock(&desc->lock); if (irqd_is_level_type(idata)) @@ -213,8 +212,7 @@ static void uic_irq_cascade(struct irq_desc *desc) src = 32 - ffs(msr); - subvirq = irq_linear_revmap(uic->irqhost, src); - generic_handle_irq(subvirq); + generic_handle_domain_irq(uic->irqhost, src); uic_irq_ret: raw_spin_lock(&desc->lock); diff --git a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c index b2981634f1f8..ea46870e5d6e 100644 --- a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c +++ b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c @@ -81,11 +81,10 @@ static struct irq_chip cpld_pic = { .irq_unmask = cpld_unmask_irq, }; -static int +static unsigned int cpld_pic_get_irq(int offset, u8 ignore, u8 __iomem *statusp, u8 __iomem *maskp) { - int cpld_irq; u8 status = in_8(statusp); u8 mask = in_8(maskp); @@ -93,28 +92,26 @@ cpld_pic_get_irq(int offset, u8 ignore, u8 __iomem *statusp, status |= (ignore | mask); if (status == 0xff) - return 0; - - cpld_irq = ffz(status) + offset; + return ~0; - return irq_linear_revmap(cpld_pic_host, cpld_irq); + return ffz(status) + offset; } static void cpld_pic_cascade(struct irq_desc *desc) { - unsigned int irq; + unsigned int hwirq; - irq = cpld_pic_get_irq(0, PCI_IGNORE, &cpld_regs->pci_status, + hwirq = cpld_pic_get_irq(0, PCI_IGNORE, &cpld_regs->pci_status, &cpld_regs->pci_mask); - if (irq) { - generic_handle_irq(irq); + if (hwirq != ~0) { + generic_handle_domain_irq(cpld_pic_host, hwirq); return; } - irq = cpld_pic_get_irq(8, MISC_IGNORE, &cpld_regs->misc_status, + hwirq = cpld_pic_get_irq(8, MISC_IGNORE, &cpld_regs->misc_status, &cpld_regs->misc_mask); - if (irq) { - generic_handle_irq(irq); + if (hwirq != ~0) { + generic_handle_domain_irq(cpld_pic_host, hwirq); return; } } diff --git a/arch/powerpc/platforms/52xx/media5200.c b/arch/powerpc/platforms/52xx/media5200.c index efb8bdecbcc7..110c444f4bc7 100644 --- a/arch/powerpc/platforms/52xx/media5200.c +++ b/arch/powerpc/platforms/52xx/media5200.c @@ -78,7 +78,7 @@ static struct irq_chip media5200_irq_chip = { static void media5200_irq_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); - int sub_virq, val; + int val; u32 status, enable; /* Mask off the cascaded IRQ */ @@ -92,11 +92,10 @@ static void media5200_irq_cascade(struct irq_desc *desc) enable = in_be32(media5200_irq.regs + MEDIA5200_IRQ_STATUS); val = ffs((status & enable) >> MEDIA5200_IRQ_SHIFT); if (val) { - sub_virq = irq_linear_revmap(media5200_irq.irqhost, val - 1); - /* pr_debug("%s: virq=%i s=%.8x e=%.8x hwirq=%i subvirq=%i\n", - * __func__, virq, status, enable, val - 1, sub_virq); + generic_handle_domain_irq(media5200_irq.irqhost, val - 1); + /* pr_debug("%s: virq=%i s=%.8x e=%.8x hwirq=%i\n", + * __func__, virq, status, enable, val - 1); */ - generic_handle_irq(sub_virq); } /* Processing done; can reenable the cascade now */ diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c index 3823df235f25..f862b48b4824 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c @@ -190,14 +190,11 @@ static struct irq_chip mpc52xx_gpt_irq_chip = { static void mpc52xx_gpt_irq_cascade(struct irq_desc *desc) { struct mpc52xx_gpt_priv *gpt = irq_desc_get_handler_data(desc); - int sub_virq; u32 status; status = in_be32(&gpt->regs->status) & MPC52xx_GPT_STATUS_IRQMASK; - if (status) { - sub_virq = irq_linear_revmap(gpt->irqhost, 0); - generic_handle_irq(sub_virq); - } + if (status) + generic_handle_domain_irq(gpt->irqhost, 0); } static int mpc52xx_gpt_irq_map(struct irq_domain *h, unsigned int virq, diff --git a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c index f82f75a6085c..285bfe19b798 100644 --- a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c +++ b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c @@ -91,10 +91,8 @@ static void pq2ads_pci_irq_demux(struct irq_desc *desc) break; for (bit = 0; pend != 0; ++bit, pend <<= 1) { - if (pend & 0x80000000) { - int virq = irq_linear_revmap(priv->host, bit); - generic_handle_irq(virq); - } + if (pend & 0x80000000) + generic_handle_domain_irq(priv->host, bit); } } } diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 6794145603de..a208997ade88 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -98,7 +98,7 @@ config PPC_BOOK3S_64 select PPC_HAVE_PMU_SUPPORT select HAVE_ARCH_TRANSPARENT_HUGEPAGE select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION - select ARCH_ENABLE_PMD_SPLIT_PTLOCK + select ARCH_ENABLE_SPLIT_PMD_PTLOCK select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE select ARCH_SUPPORTS_HUGETLBFS select ARCH_SUPPORTS_NUMA_BALANCING diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c index c0ab62ba6f16..0873a7a20271 100644 --- a/arch/powerpc/platforms/cell/interrupt.c +++ b/arch/powerpc/platforms/cell/interrupt.c @@ -106,13 +106,9 @@ static void iic_ioexc_cascade(struct irq_desc *desc) out_be64(&node_iic->iic_is, ack); /* handle them */ for (cascade = 63; cascade >= 0; cascade--) - if (bits & (0x8000000000000000UL >> cascade)) { - unsigned int cirq = - irq_linear_revmap(iic_host, + if (bits & (0x8000000000000000UL >> cascade)) + generic_handle_domain_irq(iic_host, base | cascade); - if (cirq) - generic_handle_irq(cirq); - } /* post-ack level interrupts */ ack = bits & ~IIC_ISR_EDGE_MASK; if (ack) diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c index 210785f59271..8af75867cb42 100644 --- a/arch/powerpc/platforms/cell/spider-pic.c +++ b/arch/powerpc/platforms/cell/spider-pic.c @@ -190,16 +190,11 @@ static void spider_irq_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct spider_pic *pic = irq_desc_get_handler_data(desc); - unsigned int cs, virq; + unsigned int cs; cs = in_be32(pic->regs + TIR_CS) >> 24; - if (cs == SPIDER_IRQ_INVALID) - virq = 0; - else - virq = irq_linear_revmap(pic->host, cs); - - if (virq) - generic_handle_irq(virq); + if (cs != SPIDER_IRQ_INVALID) + generic_handle_domain_irq(pic->host, cs); chip->irq_eoi(&desc->irq_data); } diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c index a1b7f79a8a15..15396333a90b 100644 --- a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c +++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c @@ -108,7 +108,6 @@ static const struct irq_domain_ops hlwd_irq_domain_ops = { static unsigned int __hlwd_pic_get_irq(struct irq_domain *h) { void __iomem *io_base = h->host_data; - int irq; u32 irq_status; irq_status = in_be32(io_base + HW_BROADWAY_ICR) & @@ -116,23 +115,22 @@ static unsigned int __hlwd_pic_get_irq(struct irq_domain *h) if (irq_status == 0) return 0; /* no more IRQs pending */ - irq = __ffs(irq_status); - return irq_linear_revmap(h, irq); + return __ffs(irq_status); } static void hlwd_pic_irq_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct irq_domain *irq_domain = irq_desc_get_handler_data(desc); - unsigned int virq; + unsigned int hwirq; raw_spin_lock(&desc->lock); chip->irq_mask(&desc->irq_data); /* IRQ_LEVEL */ raw_spin_unlock(&desc->lock); - virq = __hlwd_pic_get_irq(irq_domain); - if (virq) - generic_handle_irq(virq); + hwirq = __hlwd_pic_get_irq(irq_domain); + if (hwirq) + generic_handle_domain_irq(irq_domain, hwirq); else pr_err("spurious interrupt!\n"); @@ -190,7 +188,8 @@ static struct irq_domain *hlwd_pic_init(struct device_node *np) unsigned int hlwd_pic_get_irq(void) { - return __hlwd_pic_get_irq(hlwd_irq_host); + unsigned int hwirq = __hlwd_pic_get_irq(hlwd_irq_host); + return hwirq ? irq_linear_revmap(hlwd_irq_host, hwirq) : 0; } /* diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c index c164419e254d..d55652b5f6fa 100644 --- a/arch/powerpc/platforms/powernv/opal-irqchip.c +++ b/arch/powerpc/platforms/powernv/opal-irqchip.c @@ -46,18 +46,15 @@ void opal_handle_events(void) e = READ_ONCE(last_outstanding_events) & opal_event_irqchip.mask; again: while (e) { - int virq, hwirq; + int hwirq; hwirq = fls64(e) - 1; e &= ~BIT_ULL(hwirq); local_irq_disable(); - virq = irq_find_mapping(opal_event_irqchip.domain, hwirq); - if (virq) { - irq_enter(); - generic_handle_irq(virq); - irq_exit(); - } + irq_enter(); + generic_handle_domain_irq(opal_event_irqchip.domain, hwirq); + irq_exit(); local_irq_enable(); cond_resched(); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 6b0886668465..0dfaa6ab44cc 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -539,9 +539,10 @@ static void init_cpu_char_feature_flags(struct h_cpu_char_result *result) * H_CPU_BEHAV_FAVOUR_SECURITY_H could be set only if * H_CPU_BEHAV_FAVOUR_SECURITY is. */ - if (!(result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY)) + if (!(result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY)) { security_ftr_clear(SEC_FTR_FAVOUR_SECURITY); - else if (result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY_H) + pseries_security_flavor = 0; + } else if (result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY_H) pseries_security_flavor = 1; else pseries_security_flavor = 2; diff --git a/arch/powerpc/sysdev/fsl_mpic_err.c b/arch/powerpc/sysdev/fsl_mpic_err.c index 5fa5fa215541..9a98bb212922 100644 --- a/arch/powerpc/sysdev/fsl_mpic_err.c +++ b/arch/powerpc/sysdev/fsl_mpic_err.c @@ -99,7 +99,6 @@ static irqreturn_t fsl_error_int_handler(int irq, void *data) struct mpic *mpic = (struct mpic *) data; u32 eisr, eimr; int errint; - unsigned int cascade_irq; eisr = mpic_fsl_err_read(mpic->err_regs, MPIC_ERR_INT_EISR); eimr = mpic_fsl_err_read(mpic->err_regs, MPIC_ERR_INT_EIMR); @@ -108,13 +107,11 @@ static irqreturn_t fsl_error_int_handler(int irq, void *data) return IRQ_NONE; while (eisr) { + int ret; errint = __builtin_clz(eisr); - cascade_irq = irq_linear_revmap(mpic->irqhost, - mpic->err_int_vecs[errint]); - WARN_ON(!cascade_irq); - if (cascade_irq) { - generic_handle_irq(cascade_irq); - } else { + ret = generic_handle_domain_irq(mpic->irqhost, + mpic->err_int_vecs[errint]); + if (WARN_ON(ret)) { eimr |= 1 << (31 - errint); mpic_fsl_err_write(mpic->err_regs, eimr); } diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index 808e7118abfc..e6b06c3f8197 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -266,7 +266,6 @@ out_free: static irqreturn_t fsl_msi_cascade(int irq, void *data) { - unsigned int cascade_irq; struct fsl_msi *msi_data; int msir_index = -1; u32 msir_value = 0; @@ -279,9 +278,6 @@ static irqreturn_t fsl_msi_cascade(int irq, void *data) msir_index = cascade_data->index; - if (msir_index >= NR_MSI_REG_MAX) - cascade_irq = 0; - switch (msi_data->feature & FSL_PIC_IP_MASK) { case FSL_PIC_IP_MPIC: msir_value = fsl_msi_read(msi_data->msi_regs, @@ -305,15 +301,15 @@ static irqreturn_t fsl_msi_cascade(int irq, void *data) } while (msir_value) { + int err; intr_index = ffs(msir_value) - 1; - cascade_irq = irq_linear_revmap(msi_data->irqhost, + err = generic_handle_domain_irq(msi_data->irqhost, msi_hwirq(msi_data, msir_index, intr_index + have_shift)); - if (cascade_irq) { - generic_handle_irq(cascade_irq); + if (!err) ret = IRQ_HANDLED; - } + have_shift += intr_index + 1; msir_value = msir_value >> (intr_index + 1); } diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index dbdbbc2f1dc5..8183ca343675 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -67,6 +67,7 @@ static struct irq_domain *xive_irq_domain; static struct xive_ipi_desc { unsigned int irq; char name[16]; + atomic_t started; } *xive_ipis; /* @@ -1120,7 +1121,7 @@ static const struct irq_domain_ops xive_ipi_irq_domain_ops = { .alloc = xive_ipi_irq_domain_alloc, }; -static int __init xive_request_ipi(void) +static int __init xive_init_ipis(void) { struct fwnode_handle *fwnode; struct irq_domain *ipi_domain; @@ -1144,10 +1145,6 @@ static int __init xive_request_ipi(void) struct xive_ipi_desc *xid = &xive_ipis[node]; struct xive_ipi_alloc_info info = { node }; - /* Skip nodes without CPUs */ - if (cpumask_empty(cpumask_of_node(node))) - continue; - /* * Map one IPI interrupt per node for all cpus of that node. * Since the HW interrupt number doesn't have any meaning, @@ -1159,11 +1156,6 @@ static int __init xive_request_ipi(void) xid->irq = ret; snprintf(xid->name, sizeof(xid->name), "IPI-%d", node); - - ret = request_irq(xid->irq, xive_muxed_ipi_action, - IRQF_PERCPU | IRQF_NO_THREAD, xid->name, NULL); - - WARN(ret < 0, "Failed to request IPI %d: %d\n", xid->irq, ret); } return ret; @@ -1178,6 +1170,22 @@ out: return ret; } +static int xive_request_ipi(unsigned int cpu) +{ + struct xive_ipi_desc *xid = &xive_ipis[early_cpu_to_node(cpu)]; + int ret; + + if (atomic_inc_return(&xid->started) > 1) + return 0; + + ret = request_irq(xid->irq, xive_muxed_ipi_action, + IRQF_PERCPU | IRQF_NO_THREAD, + xid->name, NULL); + + WARN(ret < 0, "Failed to request IPI %d: %d\n", xid->irq, ret); + return ret; +} + static int xive_setup_cpu_ipi(unsigned int cpu) { unsigned int xive_ipi_irq = xive_ipi_cpu_to_irq(cpu); @@ -1192,6 +1200,9 @@ static int xive_setup_cpu_ipi(unsigned int cpu) if (xc->hw_ipi != XIVE_BAD_IRQ) return 0; + /* Register the IPI */ + xive_request_ipi(cpu); + /* Grab an IPI from the backend, this will populate xc->hw_ipi */ if (xive_ops->get_ipi(cpu, xc)) return -EIO; @@ -1231,6 +1242,8 @@ static void xive_cleanup_cpu_ipi(unsigned int cpu, struct xive_cpu *xc) if (xc->hw_ipi == XIVE_BAD_IRQ) return; + /* TODO: clear IPI mapping */ + /* Mask the IPI */ xive_do_source_set_mask(&xc->ipi_data, true); @@ -1253,7 +1266,7 @@ void __init xive_smp_probe(void) smp_ops->cause_ipi = xive_cause_ipi; /* Register the IPI */ - xive_request_ipi(); + xive_init_ipis(); /* Allocate and setup IPI for the boot CPU */ xive_setup_cpu_ipi(smp_processor_id()); diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts index ec79944065c9..baea7d204639 100644 --- a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts +++ b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts @@ -14,6 +14,10 @@ model = "Microchip PolarFire-SoC Icicle Kit"; compatible = "microchip,mpfs-icicle-kit"; + aliases { + ethernet0 = &emac1; + }; + chosen { stdout-path = &serial0; }; diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi index b9819570a7d1..9d2fbbc1f777 100644 --- a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi +++ b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi @@ -317,7 +317,7 @@ reg = <0x0 0x20112000 0x0 0x2000>; interrupt-parent = <&plic>; interrupts = <70 71 72 73>; - mac-address = [00 00 00 00 00 00]; + local-mac-address = [00 00 00 00 00 00]; clocks = <&clkcfg 5>, <&clkcfg 2>; status = "disabled"; clock-names = "pclk", "hclk"; diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index d3081e4d9600..3397ddac1a30 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -11,7 +11,7 @@ endif CFLAGS_syscall_table.o += $(call cc-option,-Wno-override-init,) ifdef CONFIG_KEXEC -AFLAGS_kexec_relocate.o := -mcmodel=medany -mno-relax +AFLAGS_kexec_relocate.o := -mcmodel=medany $(call cc-option,-mno-relax) endif extra-y += head.o diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c index 1a85305720e8..9c0511119bad 100644 --- a/arch/riscv/kernel/ptrace.c +++ b/arch/riscv/kernel/ptrace.c @@ -10,6 +10,7 @@ #include <asm/ptrace.h> #include <asm/syscall.h> #include <asm/thread_info.h> +#include <asm/switch_to.h> #include <linux/audit.h> #include <linux/ptrace.h> #include <linux/elf.h> @@ -56,6 +57,9 @@ static int riscv_fpr_get(struct task_struct *target, { struct __riscv_d_ext_state *fstate = &target->thread.fstate; + if (target == current) + fstate_save(current, task_pt_regs(current)); + membuf_write(&to, fstate, offsetof(struct __riscv_d_ext_state, fcsr)); membuf_store(&to, fstate->fcsr); return membuf_zero(&to, 4); // explicitly pad diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 18bd0e4bc36c..120b2f6f71bc 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -229,8 +229,8 @@ static void __init init_resources(void) } /* Clean-up any unused pre-allocated resources */ - mem_res_sz = (num_resources - res_idx + 1) * sizeof(*mem_res); - memblock_free(__pa(mem_res), mem_res_sz); + if (res_idx >= 0) + memblock_free(__pa(mem_res), (res_idx + 1) * sizeof(*mem_res)); return; error: diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 88134cc288d9..7cb4f391d106 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -197,7 +197,7 @@ static void __init setup_bootmem(void) * if end of dram is equal to maximum addressable memory. For 64-bit * kernel, this problem can't happen here as the end of the virtual * address space is occupied by the kernel mapping then this check must - * be done in create_kernel_page_table. + * be done as soon as the kernel mapping base address is determined. */ max_mapped_addr = __pa(~(ulong)0); if (max_mapped_addr == (phys_ram_end - 1)) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index a0e2130f0100..92c0a1b4c528 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -138,6 +138,8 @@ config S390 select HAVE_ARCH_JUMP_LABEL_RELATIVE select HAVE_ARCH_KASAN select HAVE_ARCH_KASAN_VMALLOC + select HAVE_ARCH_KCSAN + select HAVE_ARCH_KFENCE select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_SOFT_DIRTY diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 1e3172877982..17dc4f1ac4fa 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -142,7 +142,8 @@ all: bzImage KBUILD_IMAGE := $(boot)/bzImage install: - $(Q)$(MAKE) $(build)=$(boot) $@ + sh -x $(srctree)/$(boot)/install.sh $(KERNELRELEASE) $(KBUILD_IMAGE) \ + System.map "$(INSTALL_PATH)" bzImage: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index 41a64b8dce25..0ba646899131 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -7,6 +7,7 @@ KCOV_INSTRUMENT := n GCOV_PROFILE := n UBSAN_SANITIZE := n KASAN_SANITIZE := n +KCSAN_SANITIZE := n KBUILD_AFLAGS := $(KBUILD_AFLAGS_DECOMPRESSOR) KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR) @@ -36,7 +37,7 @@ CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char obj-y := head.o als.o startup.o mem_detect.o ipl_parm.o ipl_report.o obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o -obj-y += version.o pgm_check_info.o ctype.o text_dma.o +obj-y += version.o pgm_check_info.o ctype.o obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o obj-$(CONFIG_RELOCATABLE) += machine_kexec_reloc.o obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o @@ -69,7 +70,3 @@ $(obj)/compressed/vmlinux: $(obj)/startup.a FORCE $(obj)/startup.a: $(OBJECTS) FORCE $(call if_changed,ar) - -install: - sh -x $(srctree)/$(obj)/install.sh $(KERNELRELEASE) $(obj)/bzImage \ - System.map "$(INSTALL_PATH)" diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index ae04e1c93764..641ce0fc5c3e 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -2,14 +2,9 @@ #ifndef BOOT_BOOT_H #define BOOT_BOOT_H +#include <asm/extable.h> #include <linux/types.h> -#define BOOT_STACK_OFFSET 0x8000 - -#ifndef __ASSEMBLY__ - -#include <linux/compiler.h> - void startup_kernel(void); unsigned long detect_memory(void); bool is_ipl_block_dump(void); @@ -18,17 +13,22 @@ void setup_boot_command_line(void); void parse_boot_command_line(void); void verify_facilities(void); void print_missing_facilities(void); +void sclp_early_setup_buffer(void); void print_pgm_check_info(void); unsigned long get_random_base(unsigned long safe_addr); void __printf(1, 2) decompressor_printk(const char *fmt, ...); +/* Symbols defined by linker scripts */ extern const char kernel_version[]; extern unsigned long memory_limit; extern unsigned long vmalloc_size; extern int vmalloc_size_set; extern int kaslr_enabled; +extern char __boot_data_start[], __boot_data_end[]; +extern char __boot_data_preserved_start[], __boot_data_preserved_end[]; +extern char _decompressor_syms_start[], _decompressor_syms_end[]; +extern char _stack_start[], _stack_end[]; unsigned long read_ipl_report(unsigned long safe_offset); -#endif /* __ASSEMBLY__ */ #endif /* BOOT_BOOT_H */ diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index e30d3fdbbc78..3b860061e84d 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -9,6 +9,7 @@ KCOV_INSTRUMENT := n GCOV_PROFILE := n UBSAN_SANITIZE := n KASAN_SANITIZE := n +KCSAN_SANITIZE := n obj-y := $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o obj-$(CONFIG_KERNEL_ZSTD) += clz_ctz.o diff --git a/arch/s390/boot/compressed/decompressor.c b/arch/s390/boot/compressed/decompressor.c index 37a4a8d33c6c..e27c2140d620 100644 --- a/arch/s390/boot/compressed/decompressor.c +++ b/arch/s390/boot/compressed/decompressor.c @@ -23,11 +23,6 @@ #define memmove memmove #define memzero(s, n) memset((s), 0, (n)) -/* Symbols defined by linker scripts */ -extern char _end[]; -extern unsigned char _compressed_start[]; -extern unsigned char _compressed_end[]; - #ifdef CONFIG_KERNEL_BZIP2 #define BOOT_HEAP_SIZE 0x400000 #elif CONFIG_KERNEL_ZSTD diff --git a/arch/s390/boot/compressed/decompressor.h b/arch/s390/boot/compressed/decompressor.h index 41f0ad97a4db..a59f75c5b049 100644 --- a/arch/s390/boot/compressed/decompressor.h +++ b/arch/s390/boot/compressed/decompressor.h @@ -26,7 +26,12 @@ struct vmlinux_info { unsigned long rela_dyn_end; }; +/* Symbols defined by linker scripts */ +extern char _end[]; +extern unsigned char _compressed_start[]; +extern unsigned char _compressed_end[]; extern char _vmlinux_info[]; + #define vmlinux (*(struct vmlinux_info *)_vmlinux_info) #endif /* BOOT_COMPRESSED_DECOMPRESSOR_H */ diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/compressed/vmlinux.lds.S index 27a09c1c78f6..918e05137d4c 100644 --- a/arch/s390/boot/compressed/vmlinux.lds.S +++ b/arch/s390/boot/compressed/vmlinux.lds.S @@ -1,6 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include <asm-generic/vmlinux.lds.h> #include <asm/vmlinux.lds.h> +#include <asm/thread_info.h> +#include <asm/page.h> +#include <asm/sclp.h> OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390") OUTPUT_ARCH(s390:64-bit) @@ -34,27 +37,6 @@ SECTIONS *(.data.*) _edata = . ; } - /* - * .dma section for code, data, ex_table that need to stay below 2 GB, - * even when the kernel is relocate: above 2 GB. - */ - . = ALIGN(PAGE_SIZE); - _sdma = .; - .dma.text : { - _stext_dma = .; - *(.dma.text) - . = ALIGN(PAGE_SIZE); - _etext_dma = .; - } - . = ALIGN(16); - .dma.ex_table : { - _start_dma_ex_table = .; - KEEP(*(.dma.ex_table)) - _stop_dma_ex_table = .; - } - .dma.data : { *(.dma.data) } - . = ALIGN(PAGE_SIZE); - _edma = .; BOOT_DATA BOOT_DATA_PRESERVED @@ -69,6 +51,17 @@ SECTIONS *(.bss) *(.bss.*) *(COMMON) + /* + * Stacks for the decompressor + */ + . = ALIGN(PAGE_SIZE); + _dump_info_stack_start = .; + . += PAGE_SIZE; + _dump_info_stack_end = .; + . = ALIGN(PAGE_SIZE); + _stack_start = .; + . += BOOT_STACK_SIZE; + _stack_end = .; _ebss = .; } diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S index 51693cfb65c2..40f4cff538b8 100644 --- a/arch/s390/boot/head.S +++ b/arch/s390/boot/head.S @@ -25,13 +25,15 @@ #include <linux/init.h> #include <linux/linkage.h> #include <asm/asm-offsets.h> -#include <asm/thread_info.h> #include <asm/page.h> #include <asm/ptrace.h> -#include "boot.h" +#include <asm/sclp.h> #define ARCH_OFFSET 4 +#define EP_OFFSET 0x10008 +#define EP_STRING "S390EP" + __HEAD #define IPL_BS 0x730 @@ -275,11 +277,11 @@ iplstart: .Lcpuid:.fill 8,1,0 # -# startup-code at 0x10000, running in absolute addressing mode +# normal startup-code, running in absolute addressing mode # this is called either by the ipl loader or directly by PSW restart # or linload or SALIPL # - .org 0x10000 + .org STARTUP_NORMAL_OFFSET SYM_CODE_START(startup) j startup_normal .org EP_OFFSET @@ -292,9 +294,9 @@ SYM_CODE_START(startup) .ascii EP_STRING .byte 0x00,0x01 # -# kdump startup-code at 0x10010, running in 64 bit absolute addressing mode +# kdump startup-code, running in 64 bit absolute addressing mode # - .org 0x10010 + .org STARTUP_KDUMP_OFFSET j startup_kdump SYM_CODE_END(startup) SYM_CODE_START_LOCAL(startup_normal) @@ -315,18 +317,16 @@ SYM_CODE_START_LOCAL(startup_normal) xc 0x300(256),0x300 xc 0xe00(256),0xe00 xc 0xf00(256),0xf00 - lctlg %c0,%c15,.Lctl-.LPG0(%r13) # load control registers stcke __LC_BOOT_CLOCK mvc __LC_LAST_UPDATE_CLOCK(8),__LC_BOOT_CLOCK+1 spt 6f-.LPG0(%r13) mvc __LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13) - l %r15,.Lstack-.LPG0(%r13) + larl %r15,_stack_end-STACK_FRAME_OVERHEAD + brasl %r14,sclp_early_setup_buffer brasl %r14,verify_facilities brasl %r14,startup_kernel SYM_CODE_END(startup_normal) -.Lstack: - .long BOOT_STACK_OFFSET + BOOT_STACK_SIZE - STACK_FRAME_OVERHEAD .align 8 6: .long 0x7fffffff,0xffffffff .Lext_new_psw: @@ -335,35 +335,6 @@ SYM_CODE_END(startup_normal) .quad 0x0000000180000000,startup_pgm_check_handler .Lio_new_psw: .quad 0x0002000180000000,0x1f0 # disabled wait -.Lctl: .quad 0x04040000 # cr0: AFP registers & secondary space - .quad 0 # cr1: primary space segment table - .quad .Lduct # cr2: dispatchable unit control table - .quad 0 # cr3: instruction authorization - .quad 0xffff # cr4: instruction authorization - .quad .Lduct # cr5: primary-aste origin - .quad 0 # cr6: I/O interrupts - .quad 0 # cr7: secondary space segment table - .quad 0x0000000000008000 # cr8: access registers translation - .quad 0 # cr9: tracing off - .quad 0 # cr10: tracing off - .quad 0 # cr11: tracing off - .quad 0 # cr12: tracing off - .quad 0 # cr13: home space segment table - .quad 0xc0000000 # cr14: machine check handling off - .quad .Llinkage_stack # cr15: linkage stack operations - - .section .dma.data,"aw",@progbits -.Lduct: .long 0,.Laste,.Laste,0,.Lduald,0,0,0 - .long 0,0,0,0,0,0,0,0 -.Llinkage_stack: - .long 0,0,0x89000000,0,0,0,0x8a000000,0 - .align 64 -.Laste: .quad 0,0xffffffffffffffff,0,0,0,0,0,0 - .align 128 -.Lduald:.rept 8 - .long 0x80000000,0,0,0 # invalid access-list entries - .endr - .previous #include "head_kdump.S" @@ -386,15 +357,13 @@ SYM_CODE_START_LOCAL(startup_pgm_check_handler) oi __LC_RETURN_PSW+1,0x2 # set wait state bit larl %r9,.Lold_psw_disabled_wait stg %r9,__LC_PGM_NEW_PSW+8 - l %r15,.Ldump_info_stack-.Lold_psw_disabled_wait(%r9) + larl %r15,_dump_info_stack_end-STACK_FRAME_OVERHEAD brasl %r14,print_pgm_check_info .Lold_psw_disabled_wait: la %r8,4095 lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r8) lpswe __LC_RETURN_PSW # disabled wait SYM_CODE_END(startup_pgm_check_handler) -.Ldump_info_stack: - .long 0x5000 + PAGE_SIZE - STACK_FRAME_OVERHEAD # # params at 10400 (setup.h) @@ -415,7 +384,4 @@ SYM_DATA_START(parmarea) .org PARMAREA+__PARMAREA_SIZE SYM_DATA_END(parmarea) - .org EARLY_SCCB_OFFSET - .fill 4096 - .org HEAD_END diff --git a/arch/s390/boot/ipl_report.c b/arch/s390/boot/ipl_report.c index 0b4965573656..9b14045065b6 100644 --- a/arch/s390/boot/ipl_report.c +++ b/arch/s390/boot/ipl_report.c @@ -54,9 +54,9 @@ static unsigned long find_bootdata_space(struct ipl_rb_components *comps, * not overlap with any component or any certificate. */ repeat: - if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE && - intersects(INITRD_START, INITRD_SIZE, safe_addr, size)) - safe_addr = INITRD_START + INITRD_SIZE; + if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && initrd_data.start && initrd_data.size && + intersects(initrd_data.start, initrd_data.size, safe_addr, size)) + safe_addr = initrd_data.start + initrd_data.size; for_each_rb_entry(comp, comps) if (intersects(safe_addr, size, comp->addr, comp->len)) { safe_addr = comp->addr + comp->len; diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c index 0dd48fbdbaa4..d8984462071f 100644 --- a/arch/s390/boot/kaslr.c +++ b/arch/s390/boot/kaslr.c @@ -186,9 +186,9 @@ unsigned long get_random_base(unsigned long safe_addr) */ memory_limit -= kasan_estimate_memory_needs(memory_limit); - if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE) { - if (safe_addr < INITRD_START + INITRD_SIZE) - safe_addr = INITRD_START + INITRD_SIZE; + if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && initrd_data.start && initrd_data.size) { + if (safe_addr < initrd_data.start + initrd_data.size) + safe_addr = initrd_data.start + initrd_data.size; } safe_addr = ALIGN(safe_addr, THREAD_SIZE); diff --git a/arch/s390/boot/mem_detect.c b/arch/s390/boot/mem_detect.c index 4e17adbde495..2f949cd9076b 100644 --- a/arch/s390/boot/mem_detect.c +++ b/arch/s390/boot/mem_detect.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/errno.h> #include <linux/init.h> +#include <asm/setup.h> +#include <asm/processor.h> #include <asm/sclp.h> #include <asm/sections.h> #include <asm/mem_detect.h> @@ -24,9 +26,9 @@ static void *mem_detect_alloc_extended(void) { unsigned long offset = ALIGN(mem_safe_offset(), sizeof(u64)); - if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE && - INITRD_START < offset + ENTRIES_EXTENDED_MAX) - offset = ALIGN(INITRD_START + INITRD_SIZE, sizeof(u64)); + if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && initrd_data.start && initrd_data.size && + initrd_data.start < offset + ENTRIES_EXTENDED_MAX) + offset = ALIGN(initrd_data.start + initrd_data.size, sizeof(u64)); return (void *)offset; } diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c index 3a46abed2549..209f6ae5a197 100644 --- a/arch/s390/boot/pgm_check_info.c +++ b/arch/s390/boot/pgm_check_info.c @@ -29,7 +29,6 @@ static char *symstart(char *p) return p + 1; } -extern char _decompressor_syms_start[], _decompressor_syms_end[]; static noinline char *findsym(unsigned long ip, unsigned short *off, unsigned short *len) { /* symbol entries are in a form "10000 c4 startup\0" */ @@ -126,8 +125,8 @@ out: static noinline void print_stacktrace(void) { - struct stack_info boot_stack = { STACK_TYPE_TASK, BOOT_STACK_OFFSET, - BOOT_STACK_OFFSET + BOOT_STACK_SIZE }; + struct stack_info boot_stack = { STACK_TYPE_TASK, (unsigned long)_stack_start, + (unsigned long)_stack_end }; unsigned long sp = S390_lowcore.gpregs_save_area[15]; bool first = true; diff --git a/arch/s390/boot/sclp_early_core.c b/arch/s390/boot/sclp_early_core.c index 5a19fd7020b5..6f30646afbd0 100644 --- a/arch/s390/boot/sclp_early_core.c +++ b/arch/s390/boot/sclp_early_core.c @@ -1,2 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 +#include "boot.h" #include "../../../drivers/s390/char/sclp_early_core.c" + +/* SCLP early buffer must stay page-aligned and below 2GB */ +static char __sclp_early_sccb[EXT_SCCB_READ_SCP] __aligned(PAGE_SIZE); + +void sclp_early_setup_buffer(void) +{ + sclp_early_set_buffer(&__sclp_early_sccb); +} diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index d0cf21641e3a..6dc8d0a53864 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -12,9 +12,8 @@ #include <asm/uv.h> #include "compressed/decompressor.h" #include "boot.h" +#include "uv.h" -extern char __boot_data_start[], __boot_data_end[]; -extern char __boot_data_preserved_start[], __boot_data_preserved_end[]; unsigned long __bootdata_preserved(__kaslr_offset); unsigned long __bootdata_preserved(VMALLOC_START); unsigned long __bootdata_preserved(VMALLOC_END); @@ -24,44 +23,11 @@ unsigned long __bootdata_preserved(MODULES_VADDR); unsigned long __bootdata_preserved(MODULES_END); unsigned long __bootdata(ident_map_size); int __bootdata(is_full_image) = 1; +struct initrd_data __bootdata(initrd_data); u64 __bootdata_preserved(stfle_fac_list[16]); u64 __bootdata_preserved(alt_stfle_fac_list[16]); - -/* - * Some code and data needs to stay below 2 GB, even when the kernel would be - * relocated above 2 GB, because it has to use 31 bit addresses. - * Such code and data is part of the .dma section, and its location is passed - * over to the decompressed / relocated kernel via the .boot.preserved.data - * section. - */ -extern char _sdma[], _edma[]; -extern char _stext_dma[], _etext_dma[]; -extern struct exception_table_entry _start_dma_ex_table[]; -extern struct exception_table_entry _stop_dma_ex_table[]; -unsigned long __bootdata_preserved(__sdma) = __pa(&_sdma); -unsigned long __bootdata_preserved(__edma) = __pa(&_edma); -unsigned long __bootdata_preserved(__stext_dma) = __pa(&_stext_dma); -unsigned long __bootdata_preserved(__etext_dma) = __pa(&_etext_dma); -struct exception_table_entry * - __bootdata_preserved(__start_dma_ex_table) = _start_dma_ex_table; -struct exception_table_entry * - __bootdata_preserved(__stop_dma_ex_table) = _stop_dma_ex_table; - -int _diag210_dma(struct diag210 *addr); -int _diag26c_dma(void *req, void *resp, enum diag26c_sc subcode); -int _diag14_dma(unsigned long rx, unsigned long ry1, unsigned long subcode); -void _diag0c_dma(struct hypfs_diag0c_entry *entry); -void _diag308_reset_dma(void); -struct diag_ops __bootdata_preserved(diag_dma_ops) = { - .diag210 = _diag210_dma, - .diag26c = _diag26c_dma, - .diag14 = _diag14_dma, - .diag0c = _diag0c_dma, - .diag308_reset = _diag308_reset_dma -}; -static struct diag210 _diag210_tmp_dma __section(".dma.data"); -struct diag210 *__bootdata_preserved(__diag210_tmp_dma) = &_diag210_tmp_dma; +struct oldmem_data __bootdata_preserved(oldmem_data); void error(char *x) { @@ -91,12 +57,12 @@ static void rescue_initrd(unsigned long addr) { if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD)) return; - if (!INITRD_START || !INITRD_SIZE) + if (!initrd_data.start || !initrd_data.size) return; - if (addr <= INITRD_START) + if (addr <= initrd_data.start) return; - memmove((void *)addr, (void *)INITRD_START, INITRD_SIZE); - INITRD_START = addr; + memmove((void *)addr, (void *)initrd_data.start, initrd_data.size); + initrd_data.start = addr; } static void copy_bootdata(void) @@ -169,9 +135,9 @@ static void setup_ident_map_size(unsigned long max_physmem_end) ident_map_size = min(ident_map_size, 1UL << MAX_PHYSMEM_BITS); #ifdef CONFIG_CRASH_DUMP - if (OLDMEM_BASE) { + if (oldmem_data.start) { kaslr_enabled = 0; - ident_map_size = min(ident_map_size, OLDMEM_SIZE); + ident_map_size = min(ident_map_size, oldmem_data.size); } else if (ipl_block_valid && is_ipl_block_dump()) { kaslr_enabled = 0; if (!sclp_early_get_hsa_size(&hsa_size) && hsa_size) @@ -282,12 +248,28 @@ static void setup_vmalloc_size(void) vmalloc_size = max(size, vmalloc_size); } +static void offset_vmlinux_info(unsigned long offset) +{ + vmlinux.default_lma += offset; + *(unsigned long *)(&vmlinux.entry) += offset; + vmlinux.bootdata_off += offset; + vmlinux.bootdata_preserved_off += offset; + vmlinux.rela_dyn_start += offset; + vmlinux.rela_dyn_end += offset; + vmlinux.dynsym_start += offset; +} + void startup_kernel(void) { unsigned long random_lma; unsigned long safe_addr; void *img; + initrd_data.start = parmarea.initrd_start; + initrd_data.size = parmarea.initrd_size; + oldmem_data.start = parmarea.oldmem_base; + oldmem_data.size = parmarea.oldmem_size; + setup_lpp(); store_ipl_parmblock(); safe_addr = mem_safe_offset(); @@ -297,23 +279,17 @@ void startup_kernel(void) sclp_early_read_info(); setup_boot_command_line(); parse_boot_command_line(); + sanitize_prot_virt_host(); setup_ident_map_size(detect_memory()); setup_vmalloc_size(); setup_kernel_memory_layout(); - random_lma = __kaslr_offset = 0; if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_enabled) { random_lma = get_random_base(safe_addr); if (random_lma) { __kaslr_offset = random_lma - vmlinux.default_lma; img = (void *)vmlinux.default_lma; - vmlinux.default_lma += __kaslr_offset; - vmlinux.entry += __kaslr_offset; - vmlinux.bootdata_off += __kaslr_offset; - vmlinux.bootdata_preserved_off += __kaslr_offset; - vmlinux.rela_dyn_start += __kaslr_offset; - vmlinux.rela_dyn_end += __kaslr_offset; - vmlinux.dynsym_start += __kaslr_offset; + offset_vmlinux_info(__kaslr_offset); } } diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c index f6b0c4f43c99..e6be155ab2e5 100644 --- a/arch/s390/boot/uv.c +++ b/arch/s390/boot/uv.c @@ -1,8 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 #include <asm/uv.h> +#include <asm/boot_data.h> #include <asm/facility.h> #include <asm/sections.h> +#include "boot.h" +#include "uv.h" + /* will be used in arch/s390/kernel/uv.c */ #ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST int __bootdata_preserved(prot_virt_guest); @@ -47,26 +51,34 @@ void uv_query_info(void) } #if IS_ENABLED(CONFIG_KVM) -static bool has_uv_sec_stor_limit(void) +void adjust_to_uv_max(unsigned long *vmax) { - /* - * keep these conditions in line with setup_uv() - */ - if (!is_prot_virt_host()) - return false; + if (is_prot_virt_host() && uv_info.max_sec_stor_addr) + *vmax = min_t(unsigned long, *vmax, uv_info.max_sec_stor_addr); +} +static int is_prot_virt_host_capable(void) +{ + /* disable if no prot_virt=1 given on command-line */ + if (!is_prot_virt_host()) + return 0; + /* disable if protected guest virtualization is enabled */ if (is_prot_virt_guest()) - return false; - + return 0; + /* disable if no hardware support */ if (!test_facility(158)) - return false; - - return !!uv_info.max_sec_stor_addr; + return 0; + /* disable if kdump */ + if (oldmem_data.start) + return 0; + /* disable if stand-alone dump */ + if (ipl_block_valid && is_ipl_block_dump()) + return 0; + return 1; } -void adjust_to_uv_max(unsigned long *vmax) +void sanitize_prot_virt_host(void) { - if (has_uv_sec_stor_limit()) - *vmax = min_t(unsigned long, *vmax, uv_info.max_sec_stor_addr); + prot_virt_host = is_prot_virt_host_capable(); } #endif diff --git a/arch/s390/boot/uv.h b/arch/s390/boot/uv.h new file mode 100644 index 000000000000..690ce019af5a --- /dev/null +++ b/arch/s390/boot/uv.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef BOOT_UV_H +#define BOOT_UV_H + +#if IS_ENABLED(CONFIG_KVM) +void adjust_to_uv_max(unsigned long *vmax); +void sanitize_prot_virt_host(void); +#else +static inline void adjust_to_uv_max(unsigned long *vmax) {} +static inline void sanitize_prot_virt_host(void) {} +#endif + +#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM) +void uv_query_info(void); +#else +static inline void uv_query_info(void) {} +#endif + +#endif /* BOOT_UV_H */ diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index b88184019af9..11ffc7c37ada 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -10,7 +10,6 @@ CONFIG_BPF_JIT=y CONFIG_BPF_JIT_ALWAYS_ON=y CONFIG_BPF_LSM=y CONFIG_PREEMPT=y -CONFIG_SCHED_CORE=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_TASKSTATS=y @@ -75,7 +74,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y CONFIG_MODULE_SIG_SHA256=y -CONFIG_BLK_DEV_INTEGRITY=y CONFIG_BLK_DEV_THROTTLING=y CONFIG_BLK_WBT=y CONFIG_BLK_CGROUP_IOLATENCY=y @@ -466,6 +464,7 @@ CONFIG_DM_FLAKEY=m CONFIG_DM_VERITY=m CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG=y CONFIG_DM_SWITCH=m +CONFIG_DM_INTEGRITY=m CONFIG_NETDEVICES=y CONFIG_BONDING=m CONFIG_DUMMY=m diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 1667a3cdcf0a..e1642d2cba59 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -8,7 +8,6 @@ CONFIG_BPF_SYSCALL=y CONFIG_BPF_JIT=y CONFIG_BPF_JIT_ALWAYS_ON=y CONFIG_BPF_LSM=y -CONFIG_SCHED_CORE=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_TASKSTATS=y diff --git a/arch/s390/hypfs/hypfs_diag0c.c b/arch/s390/hypfs/hypfs_diag0c.c index 6c43d2ba2079..9a2786079e3a 100644 --- a/arch/s390/hypfs/hypfs_diag0c.c +++ b/arch/s390/hypfs/hypfs_diag0c.c @@ -21,7 +21,7 @@ static void diag0c_fn(void *data) { diag_stat_inc(DIAG_STAT_X00C); - diag_dma_ops.diag0c(((void **) data)[smp_processor_id()]); + diag_amode31_ops.diag0c(((void **)data)[smp_processor_id()]); } /* @@ -33,12 +33,12 @@ static void *diag0c_store(unsigned int *count) unsigned int cpu_count, cpu, i; void **cpu_vec; - get_online_cpus(); + cpus_read_lock(); cpu_count = num_online_cpus(); cpu_vec = kmalloc_array(num_possible_cpus(), sizeof(*cpu_vec), GFP_KERNEL); if (!cpu_vec) - goto fail_put_online_cpus; + goto fail_unlock_cpus; /* Note: Diag 0c needs 8 byte alignment and real storage */ diag0c_data = kzalloc(struct_size(diag0c_data, entry, cpu_count), GFP_KERNEL | GFP_DMA); @@ -54,13 +54,13 @@ static void *diag0c_store(unsigned int *count) on_each_cpu(diag0c_fn, cpu_vec, 1); *count = cpu_count; kfree(cpu_vec); - put_online_cpus(); + cpus_read_unlock(); return diag0c_data; fail_kfree_cpu_vec: kfree(cpu_vec); -fail_put_online_cpus: - put_online_cpus(); +fail_unlock_cpus: + cpus_read_unlock(); return ERR_PTR(-ENOMEM); } diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h index f58c92f28701..1effac6a0152 100644 --- a/arch/s390/include/asm/cio.h +++ b/arch/s390/include/asm/cio.h @@ -5,7 +5,6 @@ #ifndef _ASM_S390_CIO_H_ #define _ASM_S390_CIO_H_ -#include <linux/spinlock.h> #include <linux/bitops.h> #include <linux/genalloc.h> #include <asm/types.h> diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h index c0f3bfeddcbe..646b12981f20 100644 --- a/arch/s390/include/asm/cpacf.h +++ b/arch/s390/include/asm/cpacf.h @@ -173,17 +173,16 @@ typedef struct { unsigned char bytes[16]; } cpacf_mask_t; */ static __always_inline void __cpacf_query(unsigned int opcode, cpacf_mask_t *mask) { - register unsigned long r0 asm("0") = 0; /* query function */ - register unsigned long r1 asm("1") = (unsigned long) mask; - asm volatile( - " spm 0\n" /* pckmo doesn't change the cc */ + " lghi 0,0\n" /* query function */ + " lgr 1,%[mask]\n" + " spm 0\n" /* pckmo doesn't change the cc */ /* Parameter regs are ignored, but must be nonzero and unique */ "0: .insn rrf,%[opc] << 16,2,4,6,0\n" " brc 1,0b\n" /* handle partial completion */ : "=m" (*mask) - : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (opcode) - : "cc"); + : [mask] "d" ((unsigned long)mask), [opc] "i" (opcode) + : "cc", "0", "1"); } static __always_inline int __cpacf_check_opcode(unsigned int opcode) @@ -249,20 +248,22 @@ static __always_inline int cpacf_query_func(unsigned int opcode, unsigned int fu static inline int cpacf_km(unsigned long func, void *param, u8 *dest, const u8 *src, long src_len) { - register unsigned long r0 asm("0") = (unsigned long) func; - register unsigned long r1 asm("1") = (unsigned long) param; - register unsigned long r2 asm("2") = (unsigned long) src; - register unsigned long r3 asm("3") = (unsigned long) src_len; - register unsigned long r4 asm("4") = (unsigned long) dest; + union register_pair d, s; + d.even = (unsigned long)dest; + s.even = (unsigned long)src; + s.odd = (unsigned long)src_len; asm volatile( + " lgr 0,%[fc]\n" + " lgr 1,%[pba]\n" "0: .insn rre,%[opc] << 16,%[dst],%[src]\n" " brc 1,0b\n" /* handle partial completion */ - : [src] "+a" (r2), [len] "+d" (r3), [dst] "+a" (r4) - : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_KM) - : "cc", "memory"); + : [src] "+&d" (s.pair), [dst] "+&d" (d.pair) + : [fc] "d" (func), [pba] "d" ((unsigned long)param), + [opc] "i" (CPACF_KM) + : "cc", "memory", "0", "1"); - return src_len - r3; + return src_len - s.odd; } /** @@ -279,20 +280,22 @@ static inline int cpacf_km(unsigned long func, void *param, static inline int cpacf_kmc(unsigned long func, void *param, u8 *dest, const u8 *src, long src_len) { - register unsigned long r0 asm("0") = (unsigned long) func; - register unsigned long r1 asm("1") = (unsigned long) param; - register unsigned long r2 asm("2") = (unsigned long) src; - register unsigned long r3 asm("3") = (unsigned long) src_len; - register unsigned long r4 asm("4") = (unsigned long) dest; + union register_pair d, s; + d.even = (unsigned long)dest; + s.even = (unsigned long)src; + s.odd = (unsigned long)src_len; asm volatile( + " lgr 0,%[fc]\n" + " lgr 1,%[pba]\n" "0: .insn rre,%[opc] << 16,%[dst],%[src]\n" " brc 1,0b\n" /* handle partial completion */ - : [src] "+a" (r2), [len] "+d" (r3), [dst] "+a" (r4) - : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_KMC) - : "cc", "memory"); + : [src] "+&d" (s.pair), [dst] "+&d" (d.pair) + : [fc] "d" (func), [pba] "d" ((unsigned long)param), + [opc] "i" (CPACF_KMC) + : "cc", "memory", "0", "1"); - return src_len - r3; + return src_len - s.odd; } /** @@ -306,17 +309,19 @@ static inline int cpacf_kmc(unsigned long func, void *param, static inline void cpacf_kimd(unsigned long func, void *param, const u8 *src, long src_len) { - register unsigned long r0 asm("0") = (unsigned long) func; - register unsigned long r1 asm("1") = (unsigned long) param; - register unsigned long r2 asm("2") = (unsigned long) src; - register unsigned long r3 asm("3") = (unsigned long) src_len; + union register_pair s; + s.even = (unsigned long)src; + s.odd = (unsigned long)src_len; asm volatile( + " lgr 0,%[fc]\n" + " lgr 1,%[pba]\n" "0: .insn rre,%[opc] << 16,0,%[src]\n" " brc 1,0b\n" /* handle partial completion */ - : [src] "+a" (r2), [len] "+d" (r3) - : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_KIMD) - : "cc", "memory"); + : [src] "+&d" (s.pair) + : [fc] "d" (func), [pba] "d" ((unsigned long)(param)), + [opc] "i" (CPACF_KIMD) + : "cc", "memory", "0", "1"); } /** @@ -329,17 +334,19 @@ static inline void cpacf_kimd(unsigned long func, void *param, static inline void cpacf_klmd(unsigned long func, void *param, const u8 *src, long src_len) { - register unsigned long r0 asm("0") = (unsigned long) func; - register unsigned long r1 asm("1") = (unsigned long) param; - register unsigned long r2 asm("2") = (unsigned long) src; - register unsigned long r3 asm("3") = (unsigned long) src_len; + union register_pair s; + s.even = (unsigned long)src; + s.odd = (unsigned long)src_len; asm volatile( + " lgr 0,%[fc]\n" + " lgr 1,%[pba]\n" "0: .insn rre,%[opc] << 16,0,%[src]\n" " brc 1,0b\n" /* handle partial completion */ - : [src] "+a" (r2), [len] "+d" (r3) - : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_KLMD) - : "cc", "memory"); + : [src] "+&d" (s.pair) + : [fc] "d" (func), [pba] "d" ((unsigned long)param), + [opc] "i" (CPACF_KLMD) + : "cc", "memory", "0", "1"); } /** @@ -355,19 +362,21 @@ static inline void cpacf_klmd(unsigned long func, void *param, static inline int cpacf_kmac(unsigned long func, void *param, const u8 *src, long src_len) { - register unsigned long r0 asm("0") = (unsigned long) func; - register unsigned long r1 asm("1") = (unsigned long) param; - register unsigned long r2 asm("2") = (unsigned long) src; - register unsigned long r3 asm("3") = (unsigned long) src_len; + union register_pair s; + s.even = (unsigned long)src; + s.odd = (unsigned long)src_len; asm volatile( + " lgr 0,%[fc]\n" + " lgr 1,%[pba]\n" "0: .insn rre,%[opc] << 16,0,%[src]\n" " brc 1,0b\n" /* handle partial completion */ - : [src] "+a" (r2), [len] "+d" (r3) - : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_KMAC) - : "cc", "memory"); + : [src] "+&d" (s.pair) + : [fc] "d" (func), [pba] "d" ((unsigned long)param), + [opc] "i" (CPACF_KMAC) + : "cc", "memory", "0", "1"); - return src_len - r3; + return src_len - s.odd; } /** @@ -385,22 +394,24 @@ static inline int cpacf_kmac(unsigned long func, void *param, static inline int cpacf_kmctr(unsigned long func, void *param, u8 *dest, const u8 *src, long src_len, u8 *counter) { - register unsigned long r0 asm("0") = (unsigned long) func; - register unsigned long r1 asm("1") = (unsigned long) param; - register unsigned long r2 asm("2") = (unsigned long) src; - register unsigned long r3 asm("3") = (unsigned long) src_len; - register unsigned long r4 asm("4") = (unsigned long) dest; - register unsigned long r6 asm("6") = (unsigned long) counter; + union register_pair d, s, c; + d.even = (unsigned long)dest; + s.even = (unsigned long)src; + s.odd = (unsigned long)src_len; + c.even = (unsigned long)counter; asm volatile( + " lgr 0,%[fc]\n" + " lgr 1,%[pba]\n" "0: .insn rrf,%[opc] << 16,%[dst],%[src],%[ctr],0\n" " brc 1,0b\n" /* handle partial completion */ - : [src] "+a" (r2), [len] "+d" (r3), - [dst] "+a" (r4), [ctr] "+a" (r6) - : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_KMCTR) - : "cc", "memory"); + : [src] "+&d" (s.pair), [dst] "+&d" (d.pair), + [ctr] "+&d" (c.pair) + : [fc] "d" (func), [pba] "d" ((unsigned long)param), + [opc] "i" (CPACF_KMCTR) + : "cc", "memory", "0", "1"); - return src_len - r3; + return src_len - s.odd; } /** @@ -417,20 +428,21 @@ static inline void cpacf_prno(unsigned long func, void *param, u8 *dest, unsigned long dest_len, const u8 *seed, unsigned long seed_len) { - register unsigned long r0 asm("0") = (unsigned long) func; - register unsigned long r1 asm("1") = (unsigned long) param; - register unsigned long r2 asm("2") = (unsigned long) dest; - register unsigned long r3 asm("3") = (unsigned long) dest_len; - register unsigned long r4 asm("4") = (unsigned long) seed; - register unsigned long r5 asm("5") = (unsigned long) seed_len; + union register_pair d, s; + d.even = (unsigned long)dest; + d.odd = (unsigned long)dest_len; + s.even = (unsigned long)seed; + s.odd = (unsigned long)seed_len; asm volatile ( + " lgr 0,%[fc]\n" + " lgr 1,%[pba]\n" "0: .insn rre,%[opc] << 16,%[dst],%[seed]\n" " brc 1,0b\n" /* handle partial completion */ - : [dst] "+a" (r2), [dlen] "+d" (r3) - : [fc] "d" (r0), [pba] "a" (r1), - [seed] "a" (r4), [slen] "d" (r5), [opc] "i" (CPACF_PRNO) - : "cc", "memory"); + : [dst] "+&d" (d.pair) + : [fc] "d" (func), [pba] "d" ((unsigned long)param), + [seed] "d" (s.pair), [opc] "i" (CPACF_PRNO) + : "cc", "memory", "0", "1"); } /** @@ -443,19 +455,19 @@ static inline void cpacf_prno(unsigned long func, void *param, static inline void cpacf_trng(u8 *ucbuf, unsigned long ucbuf_len, u8 *cbuf, unsigned long cbuf_len) { - register unsigned long r0 asm("0") = (unsigned long) CPACF_PRNO_TRNG; - register unsigned long r2 asm("2") = (unsigned long) ucbuf; - register unsigned long r3 asm("3") = (unsigned long) ucbuf_len; - register unsigned long r4 asm("4") = (unsigned long) cbuf; - register unsigned long r5 asm("5") = (unsigned long) cbuf_len; + union register_pair u, c; + u.even = (unsigned long)ucbuf; + u.odd = (unsigned long)ucbuf_len; + c.even = (unsigned long)cbuf; + c.odd = (unsigned long)cbuf_len; asm volatile ( + " lghi 0,%[fc]\n" "0: .insn rre,%[opc] << 16,%[ucbuf],%[cbuf]\n" " brc 1,0b\n" /* handle partial completion */ - : [ucbuf] "+a" (r2), [ucbuflen] "+d" (r3), - [cbuf] "+a" (r4), [cbuflen] "+d" (r5) - : [fc] "d" (r0), [opc] "i" (CPACF_PRNO) - : "cc", "memory"); + : [ucbuf] "+&d" (u.pair), [cbuf] "+&d" (c.pair) + : [fc] "K" (CPACF_PRNO_TRNG), [opc] "i" (CPACF_PRNO) + : "cc", "memory", "0"); } /** @@ -466,15 +478,15 @@ static inline void cpacf_trng(u8 *ucbuf, unsigned long ucbuf_len, */ static inline void cpacf_pcc(unsigned long func, void *param) { - register unsigned long r0 asm("0") = (unsigned long) func; - register unsigned long r1 asm("1") = (unsigned long) param; - asm volatile( + " lgr 0,%[fc]\n" + " lgr 1,%[pba]\n" "0: .insn rre,%[opc] << 16,0,0\n" /* PCC opcode */ " brc 1,0b\n" /* handle partial completion */ : - : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_PCC) - : "cc", "memory"); + : [fc] "d" (func), [pba] "d" ((unsigned long)param), + [opc] "i" (CPACF_PCC) + : "cc", "memory", "0", "1"); } /** @@ -487,14 +499,14 @@ static inline void cpacf_pcc(unsigned long func, void *param) */ static inline void cpacf_pckmo(long func, void *param) { - register unsigned long r0 asm("0") = (unsigned long) func; - register unsigned long r1 asm("1") = (unsigned long) param; - asm volatile( + " lgr 0,%[fc]\n" + " lgr 1,%[pba]\n" " .insn rre,%[opc] << 16,0,0\n" /* PCKMO opcode */ : - : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_PCKMO) - : "cc", "memory"); + : [fc] "d" (func), [pba] "d" ((unsigned long)param), + [opc] "i" (CPACF_PCKMO) + : "cc", "memory", "0", "1"); } /** @@ -512,21 +524,23 @@ static inline void cpacf_kma(unsigned long func, void *param, u8 *dest, const u8 *src, unsigned long src_len, const u8 *aad, unsigned long aad_len) { - register unsigned long r0 asm("0") = (unsigned long) func; - register unsigned long r1 asm("1") = (unsigned long) param; - register unsigned long r2 asm("2") = (unsigned long) src; - register unsigned long r3 asm("3") = (unsigned long) src_len; - register unsigned long r4 asm("4") = (unsigned long) aad; - register unsigned long r5 asm("5") = (unsigned long) aad_len; - register unsigned long r6 asm("6") = (unsigned long) dest; + union register_pair d, s, a; + d.even = (unsigned long)dest; + s.even = (unsigned long)src; + s.odd = (unsigned long)src_len; + a.even = (unsigned long)aad; + a.odd = (unsigned long)aad_len; asm volatile( + " lgr 0,%[fc]\n" + " lgr 1,%[pba]\n" "0: .insn rrf,%[opc] << 16,%[dst],%[src],%[aad],0\n" " brc 1,0b\n" /* handle partial completion */ - : [dst] "+a" (r6), [src] "+a" (r2), [slen] "+d" (r3), - [aad] "+a" (r4), [alen] "+d" (r5) - : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_KMA) - : "cc", "memory"); + : [dst] "+&d" (d.pair), [src] "+&d" (s.pair), + [aad] "+&d" (a.pair) + : [fc] "d" (func), [pba] "d" ((unsigned long)param), + [opc] "i" (CPACF_KMA) + : "cc", "memory", "0", "1"); } #endif /* _ASM_S390_CPACF_H */ diff --git a/arch/s390/include/asm/cpufeature.h b/arch/s390/include/asm/cpufeature.h index 1d007c6ede95..14cfd48d598e 100644 --- a/arch/s390/include/asm/cpufeature.h +++ b/arch/s390/include/asm/cpufeature.h @@ -23,7 +23,7 @@ #define MAX_ELF_HWCAP_FEATURES (8 * sizeof(elf_hwcap)) #define MAX_CPU_FEATURES MAX_ELF_HWCAP_FEATURES -#define cpu_feature(feat) ilog2(HWCAP_S390_ ## feat) +#define cpu_feature(feat) ilog2(HWCAP_ ## feat) int cpu_have_feature(unsigned int nr); diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h index adc0179fa34e..04dc65f8901d 100644 --- a/arch/s390/include/asm/ctl_reg.h +++ b/arch/s390/include/asm/ctl_reg.h @@ -111,6 +111,23 @@ union ctlreg2 { }; }; +union ctlreg5 { + unsigned long val; + struct { + unsigned long : 33; + unsigned long pasteo: 25; + unsigned long : 6; + }; +}; + +union ctlreg15 { + unsigned long val; + struct { + unsigned long lsea : 61; + unsigned long : 3; + }; +}; + #define ctl_set_bit(cr, bit) smp_ctl_set_bit(cr, bit) #define ctl_clear_bit(cr, bit) smp_ctl_clear_bit(cr, bit) diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h index c1b82bcc017c..19a55e1e3a0c 100644 --- a/arch/s390/include/asm/debug.h +++ b/arch/s390/include/asm/debug.h @@ -13,6 +13,7 @@ #include <linux/time.h> #include <linux/refcount.h> #include <linux/fs.h> +#include <linux/init.h> #define DEBUG_MAX_LEVEL 6 /* debug levels range from 0 to 6 */ #define DEBUG_OFF_LEVEL -1 /* level where debug is switched off */ @@ -391,38 +392,99 @@ int debug_register_view(debug_info_t *id, struct debug_view *view); int debug_unregister_view(debug_info_t *id, struct debug_view *view); +#ifndef MODULE + +/* + * Note: Initial page and area numbers must be fixed to allow static + * initialization. This enables very early tracing. Changes to these values + * must be reflected in __DEFINE_STATIC_AREA. + */ +#define EARLY_PAGES 8 +#define EARLY_AREAS 1 + +#define VNAME(var, suffix) __##var##_##suffix + /* - define the debug levels: - - 0 No debugging output to console or syslog - - 1 Log internal errors to syslog, ignore check conditions - - 2 Log internal errors and check conditions to syslog - - 3 Log internal errors to console, log check conditions to syslog - - 4 Log internal errors and check conditions to console - - 5 panic on internal errors, log check conditions to console - - 6 panic on both, internal errors and check conditions + * Define static areas for early trace data. During boot debug_register_static() + * will replace these with dynamically allocated areas to allow custom page and + * area sizes, and dynamic resizing. */ +#define __DEFINE_STATIC_AREA(var) \ +static char VNAME(var, data)[EARLY_PAGES][PAGE_SIZE] __initdata; \ +static debug_entry_t *VNAME(var, pages)[EARLY_PAGES] __initdata = { \ + (debug_entry_t *)VNAME(var, data)[0], \ + (debug_entry_t *)VNAME(var, data)[1], \ + (debug_entry_t *)VNAME(var, data)[2], \ + (debug_entry_t *)VNAME(var, data)[3], \ + (debug_entry_t *)VNAME(var, data)[4], \ + (debug_entry_t *)VNAME(var, data)[5], \ + (debug_entry_t *)VNAME(var, data)[6], \ + (debug_entry_t *)VNAME(var, data)[7], \ +}; \ +static debug_entry_t **VNAME(var, areas)[EARLY_AREAS] __initdata = { \ + (debug_entry_t **)VNAME(var, pages), \ +}; \ +static int VNAME(var, active_pages)[EARLY_AREAS] __initdata; \ +static int VNAME(var, active_entries)[EARLY_AREAS] __initdata + +#define __DEBUG_INFO_INIT(var, _name, _buf_size) { \ + .next = NULL, \ + .prev = NULL, \ + .ref_count = REFCOUNT_INIT(1), \ + .lock = __SPIN_LOCK_UNLOCKED(var.lock), \ + .level = DEBUG_DEFAULT_LEVEL, \ + .nr_areas = EARLY_AREAS, \ + .pages_per_area = EARLY_PAGES, \ + .buf_size = (_buf_size), \ + .entry_size = sizeof(debug_entry_t) + (_buf_size), \ + .areas = VNAME(var, areas), \ + .active_area = 0, \ + .active_pages = VNAME(var, active_pages), \ + .active_entries = VNAME(var, active_entries), \ + .debugfs_root_entry = NULL, \ + .debugfs_entries = { NULL }, \ + .views = { NULL }, \ + .name = (_name), \ + .mode = 0600, \ +} + +#define __REGISTER_STATIC_DEBUG_INFO(var, name, pages, areas, view) \ +static int __init VNAME(var, reg)(void) \ +{ \ + debug_register_static(&var, (pages), (areas)); \ + debug_register_view(&var, (view)); \ + return 0; \ +} \ +arch_initcall(VNAME(var, reg)) + +/** + * DEFINE_STATIC_DEBUG_INFO - Define static debug_info_t + * + * @var: Name of debug_info_t variable + * @name: Name of debug log (e.g. used for debugfs entry) + * @pages_per_area: Number of pages per area + * @nr_areas: Number of debug areas + * @buf_size: Size of data area in each debug entry + * @view: Pointer to debug view struct + * + * Define a static debug_info_t for early tracing. The associated debugfs log + * is automatically registered with the specified debug view. + * + * Important: Users of this macro must not call any of the + * debug_register/_unregister() functions for this debug_info_t! + * + * Note: Tracing will start with a fixed number of initial pages and areas. + * The debug area will be changed to use the specified numbers during + * arch_initcall. + */ +#define DEFINE_STATIC_DEBUG_INFO(var, name, pages, nr_areas, buf_size, view) \ +__DEFINE_STATIC_AREA(var); \ +static debug_info_t __refdata var = \ + __DEBUG_INFO_INIT(var, (name), (buf_size)); \ +__REGISTER_STATIC_DEBUG_INFO(var, name, pages, nr_areas, view) + +void debug_register_static(debug_info_t *id, int pages_per_area, int nr_areas); -#ifndef DEBUG_LEVEL -#define DEBUG_LEVEL 4 -#endif - -#define INTERNAL_ERRMSG(x,y...) "E" __FILE__ "%d: " x, __LINE__, y -#define INTERNAL_WRNMSG(x,y...) "W" __FILE__ "%d: " x, __LINE__, y -#define INTERNAL_INFMSG(x,y...) "I" __FILE__ "%d: " x, __LINE__, y -#define INTERNAL_DEBMSG(x,y...) "D" __FILE__ "%d: " x, __LINE__, y - -#if DEBUG_LEVEL > 0 -#define PRINT_DEBUG(x...) printk(KERN_DEBUG PRINTK_HEADER x) -#define PRINT_INFO(x...) printk(KERN_INFO PRINTK_HEADER x) -#define PRINT_WARN(x...) printk(KERN_WARNING PRINTK_HEADER x) -#define PRINT_ERR(x...) printk(KERN_ERR PRINTK_HEADER x) -#define PRINT_FATAL(x...) panic(PRINTK_HEADER x) -#else -#define PRINT_DEBUG(x...) printk(KERN_DEBUG PRINTK_HEADER x) -#define PRINT_INFO(x...) printk(KERN_DEBUG PRINTK_HEADER x) -#define PRINT_WARN(x...) printk(KERN_DEBUG PRINTK_HEADER x) -#define PRINT_ERR(x...) printk(KERN_DEBUG PRINTK_HEADER x) -#define PRINT_FATAL(x...) printk(KERN_DEBUG PRINTK_HEADER x) -#endif /* DASD_DEBUG */ +#endif /* MODULE */ #endif /* DEBUG_H */ diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h index ca8f85b53a90..b3a8cb4daed6 100644 --- a/arch/s390/include/asm/diag.h +++ b/arch/s390/include/asm/diag.h @@ -309,6 +309,10 @@ int diag26c(void *req, void *resp, enum diag26c_sc subcode); struct hypfs_diag0c_entry; +/* + * This structure must contain only pointers/references into + * the AMODE31 text section. + */ struct diag_ops { int (*diag210)(struct diag210 *addr); int (*diag26c)(void *req, void *resp, enum diag26c_sc subcode); @@ -317,6 +321,13 @@ struct diag_ops { void (*diag308_reset)(void); }; -extern struct diag_ops diag_dma_ops; -extern struct diag210 *__diag210_tmp_dma; +extern struct diag_ops diag_amode31_ops; +extern struct diag210 *__diag210_tmp_amode31; + +int _diag210_amode31(struct diag210 *addr); +int _diag26c_amode31(void *req, void *resp, enum diag26c_sc subcode); +int _diag14_amode31(unsigned long rx, unsigned long ry1, unsigned long subcode); +void _diag0c_amode31(struct hypfs_diag0c_entry *entry); +void _diag308_reset_amode31(void); + #endif /* _ASM_S390_DIAG_H */ diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index bd00c94620d3..70a30ae258b7 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -91,29 +91,57 @@ /* Keep this the last entry. */ #define R_390_NUM 61 -/* Bits present in AT_HWCAP. */ -#define HWCAP_S390_ESAN3 1 -#define HWCAP_S390_ZARCH 2 -#define HWCAP_S390_STFLE 4 -#define HWCAP_S390_MSA 8 -#define HWCAP_S390_LDISP 16 -#define HWCAP_S390_EIMM 32 -#define HWCAP_S390_DFP 64 -#define HWCAP_S390_HPAGE 128 -#define HWCAP_S390_ETF3EH 256 -#define HWCAP_S390_HIGH_GPRS 512 -#define HWCAP_S390_TE 1024 -#define HWCAP_S390_VXRS 2048 -#define HWCAP_S390_VXRS_BCD 4096 -#define HWCAP_S390_VXRS_EXT 8192 -#define HWCAP_S390_GS 16384 -#define HWCAP_S390_VXRS_EXT2 32768 -#define HWCAP_S390_VXRS_PDE 65536 -#define HWCAP_S390_SORT 131072 -#define HWCAP_S390_DFLT 262144 +enum { + HWCAP_NR_ESAN3 = 0, + HWCAP_NR_ZARCH = 1, + HWCAP_NR_STFLE = 2, + HWCAP_NR_MSA = 3, + HWCAP_NR_LDISP = 4, + HWCAP_NR_EIMM = 5, + HWCAP_NR_DFP = 6, + HWCAP_NR_HPAGE = 7, + HWCAP_NR_ETF3EH = 8, + HWCAP_NR_HIGH_GPRS = 9, + HWCAP_NR_TE = 10, + HWCAP_NR_VXRS = 11, + HWCAP_NR_VXRS_BCD = 12, + HWCAP_NR_VXRS_EXT = 13, + HWCAP_NR_GS = 14, + HWCAP_NR_VXRS_EXT2 = 15, + HWCAP_NR_VXRS_PDE = 16, + HWCAP_NR_SORT = 17, + HWCAP_NR_DFLT = 18, + HWCAP_NR_VXRS_PDE2 = 19, + HWCAP_NR_NNPA = 20, + HWCAP_NR_PCI_MIO = 21, + HWCAP_NR_SIE = 22, + HWCAP_NR_MAX +}; -/* Internal bits, not exposed via elf */ -#define HWCAP_INT_SIE 1UL +/* Bits present in AT_HWCAP. */ +#define HWCAP_ESAN3 BIT(HWCAP_NR_ESAN3) +#define HWCAP_ZARCH BIT(HWCAP_NR_ZARCH) +#define HWCAP_STFLE BIT(HWCAP_NR_STFLE) +#define HWCAP_MSA BIT(HWCAP_NR_MSA) +#define HWCAP_LDISP BIT(HWCAP_NR_LDISP) +#define HWCAP_EIMM BIT(HWCAP_NR_EIMM) +#define HWCAP_DFP BIT(HWCAP_NR_DFP) +#define HWCAP_HPAGE BIT(HWCAP_NR_HPAGE) +#define HWCAP_ETF3EH BIT(HWCAP_NR_ETF3EH) +#define HWCAP_HIGH_GPRS BIT(HWCAP_NR_HIGH_GPRS) +#define HWCAP_TE BIT(HWCAP_NR_TE) +#define HWCAP_VXRS BIT(HWCAP_NR_VXRS) +#define HWCAP_VXRS_BCD BIT(HWCAP_NR_VXRS_BCD) +#define HWCAP_VXRS_EXT BIT(HWCAP_NR_VXRS_EXT) +#define HWCAP_GS BIT(HWCAP_NR_GS) +#define HWCAP_VXRS_EXT2 BIT(HWCAP_NR_VXRS_EXT2) +#define HWCAP_VXRS_PDE BIT(HWCAP_NR_VXRS_PDE) +#define HWCAP_SORT BIT(HWCAP_NR_SORT) +#define HWCAP_DFLT BIT(HWCAP_NR_DFLT) +#define HWCAP_VXRS_PDE2 BIT(HWCAP_NR_VXRS_PDE2) +#define HWCAP_NNPA BIT(HWCAP_NR_NNPA) +#define HWCAP_PCI_MIO BIT(HWCAP_NR_PCI_MIO) +#define HWCAP_SIE BIT(HWCAP_NR_SIE) /* * These are used to set parameters in the core dumps. @@ -209,10 +237,6 @@ struct arch_elf_state { extern unsigned long elf_hwcap; #define ELF_HWCAP (elf_hwcap) -/* Internal hardware capabilities, not exposed via elf */ - -extern unsigned long int_hwcap; - /* This yields a string that ld.so will use to load implementation specific libraries for optimization. This is more specific in intent than poking at uname or /proc/cpuinfo. diff --git a/arch/s390/include/asm/extable.h b/arch/s390/include/asm/extable.h index 3beb294fd553..16dc57dd90b3 100644 --- a/arch/s390/include/asm/extable.h +++ b/arch/s390/include/asm/extable.h @@ -28,8 +28,8 @@ struct exception_table_entry long handler; }; -extern struct exception_table_entry *__start_dma_ex_table; -extern struct exception_table_entry *__stop_dma_ex_table; +extern struct exception_table_entry *__start_amode31_ex_table; +extern struct exception_table_entry *__stop_amode31_ex_table; const struct exception_table_entry *s390_search_extables(unsigned long addr); diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index 345cbe982a8b..e8b460f39c58 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -18,7 +18,6 @@ void ftrace_caller(void); extern char ftrace_graph_caller_end; -extern unsigned long ftrace_plt; extern void *ftrace_func; struct dyn_arch_ftrace { }; @@ -31,10 +30,11 @@ struct dyn_arch_ftrace { }; struct module; struct dyn_ftrace; -/* - * Either -mhotpatch or -mnop-mcount is used - no explicit init is required - */ -static inline int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) { return 0; } + +bool ftrace_need_init_nop(void); +#define ftrace_need_init_nop ftrace_need_init_nop + +int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec); #define ftrace_init_nop ftrace_init_nop static inline unsigned long ftrace_call_adjust(unsigned long addr) @@ -42,42 +42,6 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) return addr; } -struct ftrace_insn { - u16 opc; - s32 disp; -} __packed; - -static inline void ftrace_generate_nop_insn(struct ftrace_insn *insn) -{ -#ifdef CONFIG_FUNCTION_TRACER - /* brcl 0,0 */ - insn->opc = 0xc004; - insn->disp = 0; -#endif -} - -static inline int is_ftrace_nop(struct ftrace_insn *insn) -{ -#ifdef CONFIG_FUNCTION_TRACER - if (insn->disp == 0) - return 1; -#endif - return 0; -} - -static inline void ftrace_generate_call_insn(struct ftrace_insn *insn, - unsigned long ip) -{ -#ifdef CONFIG_FUNCTION_TRACER - unsigned long target; - - /* brasl r0,ftrace_caller */ - target = is_module_addr((void *) ip) ? ftrace_plt : FTRACE_ADDR; - insn->opc = 0xc005; - insn->disp = (target - ip) / 2; -#endif -} - /* * Even though the system call numbers are identical for s390/s390x a * different system call table is used for compat tasks. This may lead diff --git a/arch/s390/include/asm/ftrace.lds.h b/arch/s390/include/asm/ftrace.lds.h new file mode 100644 index 000000000000..968adfd41240 --- /dev/null +++ b/arch/s390/include/asm/ftrace.lds.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef DIV_ROUND_UP +#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) +#endif + +#define SIZEOF_MCOUNT_LOC_ENTRY 8 +#define SIZEOF_FTRACE_HOTPATCH_TRAMPOLINE 24 +#define FTRACE_HOTPATCH_TRAMPOLINES_SIZE(n) \ + DIV_ROUND_UP(SIZEOF_FTRACE_HOTPATCH_TRAMPOLINE * (n), \ + SIZEOF_MCOUNT_LOC_ENTRY) + +#ifdef CONFIG_FUNCTION_TRACER +#define FTRACE_HOTPATCH_TRAMPOLINES_TEXT \ + . = ALIGN(8); \ + __ftrace_hotpatch_trampolines_start = .; \ + . = . + FTRACE_HOTPATCH_TRAMPOLINES_SIZE(__stop_mcount_loc - \ + __start_mcount_loc); \ + __ftrace_hotpatch_trampolines_end = .; +#else +#define FTRACE_HOTPATCH_TRAMPOLINES_TEXT +#endif diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h index a9e2c7295b35..3f8ee257f9aa 100644 --- a/arch/s390/include/asm/ipl.h +++ b/arch/s390/include/asm/ipl.h @@ -12,6 +12,7 @@ #include <asm/types.h> #include <asm/cio.h> #include <asm/setup.h> +#include <asm/page.h> #include <uapi/asm/ipl.h> struct ipl_parameter_block { diff --git a/arch/s390/include/asm/kfence.h b/arch/s390/include/asm/kfence.h new file mode 100644 index 000000000000..d55ba878378b --- /dev/null +++ b/arch/s390/include/asm/kfence.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_S390_KFENCE_H +#define _ASM_S390_KFENCE_H + +#include <linux/mm.h> +#include <linux/kfence.h> +#include <asm/set_memory.h> +#include <asm/page.h> + +void __kernel_map_pages(struct page *page, int numpages, int enable); + +static __always_inline bool arch_kfence_init_pool(void) +{ + return true; +} + +#define arch_kfence_test_address(addr) ((addr) & PAGE_MASK) + +/* + * Do not split kfence pool to 4k mapping with arch_kfence_init_pool(), + * but earlier where page table allocations still happen with memblock. + * Reason is that arch_kfence_init_pool() gets called when the system + * is still in a limbo state - disabling and enabling bottom halves is + * not yet allowed, but that is what our page_table_alloc() would do. + */ +static __always_inline void kfence_split_mapping(void) +{ +#ifdef CONFIG_KFENCE + unsigned long pool_pages = KFENCE_POOL_SIZE >> PAGE_SHIFT; + + set_memory_4k((unsigned long)__kfence_pool, pool_pages); +#endif +} + +static inline bool kfence_protect_page(unsigned long addr, bool protect) +{ + __kernel_map_pages(virt_to_page(addr), 1, !protect); + return true; +} + +#endif /* _ASM_S390_KFENCE_H */ diff --git a/arch/s390/include/asm/kvm_para.h b/arch/s390/include/asm/kvm_para.h index cbc7c3a68e4d..df73a052760c 100644 --- a/arch/s390/include/asm/kvm_para.h +++ b/arch/s390/include/asm/kvm_para.h @@ -24,162 +24,79 @@ #include <uapi/asm/kvm_para.h> #include <asm/diag.h> -static inline long __kvm_hypercall0(unsigned long nr) -{ - register unsigned long __nr asm("1") = nr; - register long __rc asm("2"); - - asm volatile ("diag 2,4,0x500\n" - : "=d" (__rc) : "d" (__nr): "memory", "cc"); - return __rc; -} - -static inline long kvm_hypercall0(unsigned long nr) -{ - diag_stat_inc(DIAG_STAT_X500); - return __kvm_hypercall0(nr); -} - -static inline long __kvm_hypercall1(unsigned long nr, unsigned long p1) -{ - register unsigned long __nr asm("1") = nr; - register unsigned long __p1 asm("2") = p1; - register long __rc asm("2"); - - asm volatile ("diag 2,4,0x500\n" - : "=d" (__rc) : "d" (__nr), "0" (__p1) : "memory", "cc"); - return __rc; -} - -static inline long kvm_hypercall1(unsigned long nr, unsigned long p1) -{ - diag_stat_inc(DIAG_STAT_X500); - return __kvm_hypercall1(nr, p1); -} - -static inline long __kvm_hypercall2(unsigned long nr, unsigned long p1, - unsigned long p2) -{ - register unsigned long __nr asm("1") = nr; - register unsigned long __p1 asm("2") = p1; - register unsigned long __p2 asm("3") = p2; - register long __rc asm("2"); - - asm volatile ("diag 2,4,0x500\n" - : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2) - : "memory", "cc"); - return __rc; -} - -static inline long kvm_hypercall2(unsigned long nr, unsigned long p1, - unsigned long p2) -{ - diag_stat_inc(DIAG_STAT_X500); - return __kvm_hypercall2(nr, p1, p2); -} - -static inline long __kvm_hypercall3(unsigned long nr, unsigned long p1, - unsigned long p2, unsigned long p3) -{ - register unsigned long __nr asm("1") = nr; - register unsigned long __p1 asm("2") = p1; - register unsigned long __p2 asm("3") = p2; - register unsigned long __p3 asm("4") = p3; - register long __rc asm("2"); - - asm volatile ("diag 2,4,0x500\n" - : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2), - "d" (__p3) : "memory", "cc"); - return __rc; -} - -static inline long kvm_hypercall3(unsigned long nr, unsigned long p1, - unsigned long p2, unsigned long p3) -{ - diag_stat_inc(DIAG_STAT_X500); - return __kvm_hypercall3(nr, p1, p2, p3); -} - -static inline long __kvm_hypercall4(unsigned long nr, unsigned long p1, - unsigned long p2, unsigned long p3, - unsigned long p4) -{ - register unsigned long __nr asm("1") = nr; - register unsigned long __p1 asm("2") = p1; - register unsigned long __p2 asm("3") = p2; - register unsigned long __p3 asm("4") = p3; - register unsigned long __p4 asm("5") = p4; - register long __rc asm("2"); - - asm volatile ("diag 2,4,0x500\n" - : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2), - "d" (__p3), "d" (__p4) : "memory", "cc"); - return __rc; -} - -static inline long kvm_hypercall4(unsigned long nr, unsigned long p1, - unsigned long p2, unsigned long p3, - unsigned long p4) -{ - diag_stat_inc(DIAG_STAT_X500); - return __kvm_hypercall4(nr, p1, p2, p3, p4); -} - -static inline long __kvm_hypercall5(unsigned long nr, unsigned long p1, - unsigned long p2, unsigned long p3, - unsigned long p4, unsigned long p5) -{ - register unsigned long __nr asm("1") = nr; - register unsigned long __p1 asm("2") = p1; - register unsigned long __p2 asm("3") = p2; - register unsigned long __p3 asm("4") = p3; - register unsigned long __p4 asm("5") = p4; - register unsigned long __p5 asm("6") = p5; - register long __rc asm("2"); - - asm volatile ("diag 2,4,0x500\n" - : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2), - "d" (__p3), "d" (__p4), "d" (__p5) : "memory", "cc"); - return __rc; -} - -static inline long kvm_hypercall5(unsigned long nr, unsigned long p1, - unsigned long p2, unsigned long p3, - unsigned long p4, unsigned long p5) -{ - diag_stat_inc(DIAG_STAT_X500); - return __kvm_hypercall5(nr, p1, p2, p3, p4, p5); -} - -static inline long __kvm_hypercall6(unsigned long nr, unsigned long p1, - unsigned long p2, unsigned long p3, - unsigned long p4, unsigned long p5, - unsigned long p6) -{ - register unsigned long __nr asm("1") = nr; - register unsigned long __p1 asm("2") = p1; - register unsigned long __p2 asm("3") = p2; - register unsigned long __p3 asm("4") = p3; - register unsigned long __p4 asm("5") = p4; - register unsigned long __p5 asm("6") = p5; - register unsigned long __p6 asm("7") = p6; - register long __rc asm("2"); - - asm volatile ("diag 2,4,0x500\n" - : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2), - "d" (__p3), "d" (__p4), "d" (__p5), "d" (__p6) - : "memory", "cc"); - return __rc; -} - -static inline long kvm_hypercall6(unsigned long nr, unsigned long p1, - unsigned long p2, unsigned long p3, - unsigned long p4, unsigned long p5, - unsigned long p6) -{ - diag_stat_inc(DIAG_STAT_X500); - return __kvm_hypercall6(nr, p1, p2, p3, p4, p5, p6); -} +#define HYPERCALL_FMT_0 +#define HYPERCALL_FMT_1 , "0" (r2) +#define HYPERCALL_FMT_2 , "d" (r3) HYPERCALL_FMT_1 +#define HYPERCALL_FMT_3 , "d" (r4) HYPERCALL_FMT_2 +#define HYPERCALL_FMT_4 , "d" (r5) HYPERCALL_FMT_3 +#define HYPERCALL_FMT_5 , "d" (r6) HYPERCALL_FMT_4 +#define HYPERCALL_FMT_6 , "d" (r7) HYPERCALL_FMT_5 + +#define HYPERCALL_PARM_0 +#define HYPERCALL_PARM_1 , unsigned long arg1 +#define HYPERCALL_PARM_2 HYPERCALL_PARM_1, unsigned long arg2 +#define HYPERCALL_PARM_3 HYPERCALL_PARM_2, unsigned long arg3 +#define HYPERCALL_PARM_4 HYPERCALL_PARM_3, unsigned long arg4 +#define HYPERCALL_PARM_5 HYPERCALL_PARM_4, unsigned long arg5 +#define HYPERCALL_PARM_6 HYPERCALL_PARM_5, unsigned long arg6 + +#define HYPERCALL_REGS_0 +#define HYPERCALL_REGS_1 \ + register unsigned long r2 asm("2") = arg1 +#define HYPERCALL_REGS_2 \ + HYPERCALL_REGS_1; \ + register unsigned long r3 asm("3") = arg2 +#define HYPERCALL_REGS_3 \ + HYPERCALL_REGS_2; \ + register unsigned long r4 asm("4") = arg3 +#define HYPERCALL_REGS_4 \ + HYPERCALL_REGS_3; \ + register unsigned long r5 asm("5") = arg4 +#define HYPERCALL_REGS_5 \ + HYPERCALL_REGS_4; \ + register unsigned long r6 asm("6") = arg5 +#define HYPERCALL_REGS_6 \ + HYPERCALL_REGS_5; \ + register unsigned long r7 asm("7") = arg6 + +#define HYPERCALL_ARGS_0 +#define HYPERCALL_ARGS_1 , arg1 +#define HYPERCALL_ARGS_2 HYPERCALL_ARGS_1, arg2 +#define HYPERCALL_ARGS_3 HYPERCALL_ARGS_2, arg3 +#define HYPERCALL_ARGS_4 HYPERCALL_ARGS_3, arg4 +#define HYPERCALL_ARGS_5 HYPERCALL_ARGS_4, arg5 +#define HYPERCALL_ARGS_6 HYPERCALL_ARGS_5, arg6 + +#define GENERATE_KVM_HYPERCALL_FUNC(args) \ +static inline \ +long __kvm_hypercall##args(unsigned long nr HYPERCALL_PARM_##args) \ +{ \ + register unsigned long __nr asm("1") = nr; \ + register long __rc asm("2"); \ + HYPERCALL_REGS_##args; \ + \ + asm volatile ( \ + " diag 2,4,0x500\n" \ + : "=d" (__rc) \ + : "d" (__nr) HYPERCALL_FMT_##args \ + : "memory", "cc"); \ + return __rc; \ +} \ + \ +static inline \ +long kvm_hypercall##args(unsigned long nr HYPERCALL_PARM_##args) \ +{ \ + diag_stat_inc(DIAG_STAT_X500); \ + return __kvm_hypercall##args(nr HYPERCALL_ARGS_##args); \ +} + +GENERATE_KVM_HYPERCALL_FUNC(0) +GENERATE_KVM_HYPERCALL_FUNC(1) +GENERATE_KVM_HYPERCALL_FUNC(2) +GENERATE_KVM_HYPERCALL_FUNC(3) +GENERATE_KVM_HYPERCALL_FUNC(4) +GENERATE_KVM_HYPERCALL_FUNC(5) +GENERATE_KVM_HYPERCALL_FUNC(6) /* kvm on s390 is always paravirtualization enabled */ static inline int kvm_para_available(void) diff --git a/arch/s390/include/asm/linkage.h b/arch/s390/include/asm/linkage.h index 24e8fed150cf..1ffea75b8ebc 100644 --- a/arch/s390/include/asm/linkage.h +++ b/arch/s390/include/asm/linkage.h @@ -22,7 +22,7 @@ #define EX_TABLE(_fault, _target) \ __EX_TABLE(__ex_table, _fault, _target) -#define EX_TABLE_DMA(_fault, _target) \ - __EX_TABLE(.dma.ex_table, _fault, _target) +#define EX_TABLE_AMODE31(_fault, _target) \ + __EX_TABLE(.amode31.ex_table, _fault, _target) #endif diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 47bde5a20a41..11213c8bfca5 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -124,7 +124,8 @@ struct lowcore { /* Restart function and parameter. */ __u64 restart_fn; /* 0x0370 */ __u64 restart_data; /* 0x0378 */ - __u64 restart_source; /* 0x0380 */ + __u32 restart_source; /* 0x0380 */ + __u32 restart_flags; /* 0x0384 */ /* Address space pointer. */ __u64 kernel_asce; /* 0x0388 */ diff --git a/arch/s390/include/asm/module.h b/arch/s390/include/asm/module.h index e0a6d29846e2..9f1eea15872c 100644 --- a/arch/s390/include/asm/module.h +++ b/arch/s390/include/asm/module.h @@ -8,16 +8,14 @@ * This file contains the s390 architecture specific module code. */ -struct mod_arch_syminfo -{ +struct mod_arch_syminfo { unsigned long got_offset; unsigned long plt_offset; int got_initialized; int plt_initialized; }; -struct mod_arch_specific -{ +struct mod_arch_specific { /* Starting offset of got in the module core memory. */ unsigned long got_offset; /* Starting offset of plt in the module core memory. */ @@ -30,6 +28,14 @@ struct mod_arch_specific int nsyms; /* Additional symbol information (got and plt offsets). */ struct mod_arch_syminfo *syminfo; +#ifdef CONFIG_FUNCTION_TRACER + /* Start of memory reserved for ftrace hotpatch trampolines. */ + struct ftrace_hotpatch_trampoline *trampolines_start; + /* End of memory reserved for ftrace hotpatch trampolines. */ + struct ftrace_hotpatch_trampoline *trampolines_end; + /* Next unused ftrace hotpatch trampoline slot. */ + struct ftrace_hotpatch_trampoline *next_trampoline; +#endif /* CONFIG_FUNCTION_TRACER */ }; #endif /* _ASM_S390_MODULE_H */ diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 3ba945c6b9dc..d98d17a36c7b 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -144,9 +144,6 @@ struct page; void arch_free_page(struct page *page, int order); void arch_alloc_page(struct page *page, int order); void arch_set_page_dat(struct page *page, int order); -void arch_set_page_nodat(struct page *page, int order); -int arch_test_page_nodat(struct page *page); -void arch_set_page_states(int make_stable); static inline int devmem_is_allowed(unsigned long pfn) { diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 5509b224c2ec..e4803ec51110 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -216,9 +216,10 @@ void zpci_remove_reserved_devices(void); int clp_setup_writeback_mio(void); int clp_scan_pci_devices(void); int clp_query_pci_fn(struct zpci_dev *zdev); -int clp_enable_fh(struct zpci_dev *, u8); -int clp_disable_fh(struct zpci_dev *); +int clp_enable_fh(struct zpci_dev *zdev, u32 *fh, u8 nr_dma_as); +int clp_disable_fh(struct zpci_dev *zdev, u32 *fh); int clp_get_state(u32 fid, enum zpci_state *state); +int clp_refresh_fh(u32 fid, u32 *fh); /* UID */ void update_uid_checking(bool new); @@ -271,6 +272,8 @@ struct zpci_dev *get_zdev_by_fid(u32); /* DMA */ int zpci_dma_init(void); void zpci_dma_exit(void); +int zpci_dma_init_device(struct zpci_dev *zdev); +int zpci_dma_exit_device(struct zpci_dev *zdev); /* IRQ */ int __init zpci_irq_init(void); diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h index f62cd3ed2d44..3b8e89d4578a 100644 --- a/arch/s390/include/asm/pci_dma.h +++ b/arch/s390/include/asm/pci_dma.h @@ -182,8 +182,6 @@ static inline unsigned long *get_st_pto(unsigned long entry) } /* Prototypes */ -int zpci_dma_init_device(struct zpci_dev *); -void zpci_dma_exit_device(struct zpci_dev *); void dma_free_seg_table(unsigned long); unsigned long *dma_alloc_cpu_table(void); void dma_cleanup_tables(unsigned long *); diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index dcac7b2df72c..b61426c9ef17 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -67,15 +67,15 @@ extern unsigned long zero_page_mask; /* TODO: s390 cannot support io_remap_pfn_range... */ #define pte_ERROR(e) \ - printk("%s:%d: bad pte %p.\n", __FILE__, __LINE__, (void *) pte_val(e)) + pr_err("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e)) #define pmd_ERROR(e) \ - printk("%s:%d: bad pmd %p.\n", __FILE__, __LINE__, (void *) pmd_val(e)) + pr_err("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e)) #define pud_ERROR(e) \ - printk("%s:%d: bad pud %p.\n", __FILE__, __LINE__, (void *) pud_val(e)) + pr_err("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e)) #define p4d_ERROR(e) \ - printk("%s:%d: bad p4d %p.\n", __FILE__, __LINE__, (void *) p4d_val(e)) + pr_err("%s:%d: bad p4d %016lx.\n", __FILE__, __LINE__, p4d_val(e)) #define pgd_ERROR(e) \ - printk("%s:%d: bad pgd %p.\n", __FILE__, __LINE__, (void *) pgd_val(e)) + pr_err("%s:%d: bad pgd %016lx.\n", __FILE__, __LINE__, pgd_val(e)) /* * The vmalloc and module area will always be on the topmost area of the diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index ddc7858bbce4..879b8e3f609c 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -26,6 +26,8 @@ #define _CIF_MCCK_GUEST BIT(CIF_MCCK_GUEST) #define _CIF_DEDICATED_CPU BIT(CIF_DEDICATED_CPU) +#define RESTART_FLAG_CTLREGS _AC(1 << 0, U) + #ifndef __ASSEMBLY__ #include <linux/cpumask.h> diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index cb4f73c7228d..25b5dc34db75 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -291,16 +291,15 @@ struct qdio_ssqd_desc { typedef void qdio_handler_t(struct ccw_device *, unsigned int, int, int, int, unsigned long); -/* qdio errors reported to the upper-layer program */ +/* qdio errors reported through the queue handlers: */ #define QDIO_ERROR_ACTIVATE 0x0001 #define QDIO_ERROR_GET_BUF_STATE 0x0002 #define QDIO_ERROR_SET_BUF_STATE 0x0004 + +/* extra info for completed SBALs: */ #define QDIO_ERROR_SLSB_STATE 0x0100 #define QDIO_ERROR_SLSB_PENDING 0x0200 -#define QDIO_ERROR_FATAL 0x00ff -#define QDIO_ERROR_TEMPORARY 0xff00 - /* for qdio_cleanup */ #define QDIO_FLAG_CLEANUP_USING_CLEAR 0x01 #define QDIO_FLAG_CLEANUP_USING_HALT 0x02 @@ -312,8 +311,6 @@ typedef void qdio_handler_t(struct ccw_device *, unsigned int, int, * @qib_param_field_format: format for qib_parm_field * @qib_param_field: pointer to 128 bytes or NULL, if no param field * @qib_rflags: rflags to set - * @input_slib_elements: pointer to no_input_qs * 128 words of data or NULL - * @output_slib_elements: pointer to no_output_qs * 128 words of data or NULL * @no_input_qs: number of input queues * @no_output_qs: number of output queues * @input_handler: handler to be called for input queues @@ -330,27 +327,18 @@ struct qdio_initialize { unsigned int qib_param_field_format; unsigned char *qib_param_field; unsigned char qib_rflags; - unsigned long *input_slib_elements; - unsigned long *output_slib_elements; unsigned int no_input_qs; unsigned int no_output_qs; qdio_handler_t *input_handler; qdio_handler_t *output_handler; void (*irq_poll)(struct ccw_device *cdev, unsigned long data); - unsigned int scan_threshold; unsigned long int_parm; struct qdio_buffer ***input_sbal_addr_array; struct qdio_buffer ***output_sbal_addr_array; }; -#define QDIO_STATE_INACTIVE 0x00000002 /* after qdio_cleanup */ -#define QDIO_STATE_ESTABLISHED 0x00000004 /* after qdio_establish */ -#define QDIO_STATE_ACTIVE 0x00000008 /* after qdio_activate */ -#define QDIO_STATE_STOPPED 0x00000010 /* after queues went down */ - #define QDIO_FLAG_SYNC_INPUT 0x01 #define QDIO_FLAG_SYNC_OUTPUT 0x02 -#define QDIO_FLAG_PCI_OUT 0x10 int qdio_alloc_buffers(struct qdio_buffer **buf, unsigned int count); void qdio_free_buffers(struct qdio_buffer **buf, unsigned int count); @@ -367,7 +355,6 @@ extern int do_QDIO(struct ccw_device *cdev, unsigned int callflags, int q_nr, unsigned int bufnr, unsigned int count, struct qaob *aob); extern int qdio_start_irq(struct ccw_device *cdev); extern int qdio_stop_irq(struct ccw_device *cdev); -extern int qdio_get_next_buffers(struct ccw_device *, int, int *, int *); extern int qdio_inspect_queue(struct ccw_device *cdev, unsigned int nr, bool is_input, unsigned int *bufnr, unsigned int *error); diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index 5763769a39b6..e3ae937bef1c 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -8,8 +8,6 @@ #define _ASM_S390_SCLP_H #include <linux/types.h> -#include <asm/chpid.h> -#include <asm/cpu.h> #define SCLP_CHP_INFO_MASK_SIZE 32 #define EARLY_SCCB_SIZE PAGE_SIZE @@ -19,6 +17,10 @@ /* 24 + 16 * SCLP_MAX_CORES */ #define EXT_SCCB_READ_CPU (3 * PAGE_SIZE) +#ifndef __ASSEMBLY__ +#include <asm/chpid.h> +#include <asm/cpu.h> + struct sclp_chp_info { u8 recognized[SCLP_CHP_INFO_MASK_SIZE]; u8 standby[SCLP_CHP_INFO_MASK_SIZE]; @@ -113,6 +115,9 @@ struct zpci_report_error_header { u8 data[0]; /* Subsequent Data passed verbatim to SCLP ET 24 */ } __packed; +extern char *sclp_early_sccb; + +void sclp_early_set_buffer(void *sccb); int sclp_early_read_info(void); int sclp_early_read_storage_info(void); int sclp_early_get_core_info(struct sclp_core_info *info); @@ -147,4 +152,5 @@ static inline int sclp_get_core_info(struct sclp_core_info *info, int early) return _sclp_get_core_info(info); } +#endif /* __ASSEMBLY__ */ #endif /* _ASM_S390_SCLP_H */ diff --git a/arch/s390/include/asm/sections.h b/arch/s390/include/asm/sections.h index 0c2151451ba5..85881dd48022 100644 --- a/arch/s390/include/asm/sections.h +++ b/arch/s390/include/asm/sections.h @@ -35,7 +35,7 @@ static inline int arch_is_kernel_initmem_freed(unsigned long addr) */ #define __bootdata_preserved(var) __section(".boot.preserved.data." #var) var -extern unsigned long __sdma, __edma; -extern unsigned long __stext_dma, __etext_dma; +extern unsigned long __samode31, __eamode31; +extern unsigned long __stext_amode31, __etext_amode31; #endif diff --git a/arch/s390/include/asm/set_memory.h b/arch/s390/include/asm/set_memory.h index a22a5a81811c..950d87bd997a 100644 --- a/arch/s390/include/asm/set_memory.h +++ b/arch/s390/include/asm/set_memory.h @@ -10,6 +10,7 @@ extern struct mutex cpa_mutex; #define SET_MEMORY_RW 2UL #define SET_MEMORY_NX 4UL #define SET_MEMORY_X 8UL +#define SET_MEMORY_4K 16UL int __set_memory(unsigned long addr, int numpages, unsigned long flags); @@ -33,4 +34,9 @@ static inline int set_memory_x(unsigned long addr, int numpages) return __set_memory(addr, numpages, SET_MEMORY_X); } +static inline int set_memory_4k(unsigned long addr, int numpages) +{ + return __set_memory(addr, numpages, SET_MEMORY_4K); +} + #endif diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 3a77aa96d092..b6606ffd85d8 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -10,11 +10,8 @@ #include <uapi/asm/setup.h> #include <linux/build_bug.h> -#define EP_OFFSET 0x10008 -#define EP_STRING "S390EP" #define PARMAREA 0x10400 -#define EARLY_SCCB_OFFSET 0x11000 -#define HEAD_END 0x12000 +#define HEAD_END 0x11000 /* * Machine features detected in early.c @@ -36,6 +33,7 @@ #define MACHINE_FLAG_NX BIT(15) #define MACHINE_FLAG_GS BIT(16) #define MACHINE_FLAG_SCC BIT(17) +#define MACHINE_FLAG_PCI_MIO BIT(18) #define LPP_MAGIC BIT(31) #define LPP_PID_MASK _AC(0xffffffff, UL) @@ -45,28 +43,11 @@ #define STARTUP_NORMAL_OFFSET 0x10000 #define STARTUP_KDUMP_OFFSET 0x10010 -/* Offsets to parameters in kernel/head.S */ - -#define IPL_DEVICE_OFFSET 0x10400 -#define INITRD_START_OFFSET 0x10408 -#define INITRD_SIZE_OFFSET 0x10410 -#define OLDMEM_BASE_OFFSET 0x10418 -#define OLDMEM_SIZE_OFFSET 0x10420 -#define KERNEL_VERSION_OFFSET 0x10428 -#define COMMAND_LINE_OFFSET 0x10480 - #ifndef __ASSEMBLY__ #include <asm/lowcore.h> #include <asm/types.h> -#define IPL_DEVICE (*(unsigned long *) (IPL_DEVICE_OFFSET)) -#define INITRD_START (*(unsigned long *) (INITRD_START_OFFSET)) -#define INITRD_SIZE (*(unsigned long *) (INITRD_SIZE_OFFSET)) -#define OLDMEM_BASE (*(unsigned long *) (OLDMEM_BASE_OFFSET)) -#define OLDMEM_SIZE (*(unsigned long *) (OLDMEM_SIZE_OFFSET)) -#define COMMAND_LINE ((char *) (COMMAND_LINE_OFFSET)) - struct parmarea { unsigned long ipl_device; /* 0x10400 */ unsigned long initrd_start; /* 0x10408 */ @@ -110,6 +91,7 @@ extern unsigned long mio_wb_bit_mask; #define MACHINE_HAS_NX (S390_lowcore.machine_flags & MACHINE_FLAG_NX) #define MACHINE_HAS_GS (S390_lowcore.machine_flags & MACHINE_FLAG_GS) #define MACHINE_HAS_SCC (S390_lowcore.machine_flags & MACHINE_FLAG_SCC) +#define MACHINE_HAS_PCI_MIO (S390_lowcore.machine_flags & MACHINE_FLAG_PCI_MIO) /* * Console mode. Override with conmode= @@ -161,20 +143,22 @@ static inline unsigned long kaslr_offset(void) extern int is_full_image; +struct initrd_data { + unsigned long start; + unsigned long size; +}; +extern struct initrd_data initrd_data; + +struct oldmem_data { + unsigned long start; + unsigned long size; +}; +extern struct oldmem_data oldmem_data; + static inline u32 gen_lpswe(unsigned long addr) { BUILD_BUG_ON(addr > 0xfff); return 0xb2b20000 | addr; } - -#else /* __ASSEMBLY__ */ - -#define IPL_DEVICE (IPL_DEVICE_OFFSET) -#define INITRD_START (INITRD_START_OFFSET) -#define INITRD_SIZE (INITRD_SIZE_OFFSET) -#define OLDMEM_BASE (OLDMEM_BASE_OFFSET) -#define OLDMEM_SIZE (OLDMEM_SIZE_OFFSET) -#define COMMAND_LINE (COMMAND_LINE_OFFSET) - #endif /* __ASSEMBLY__ */ #endif /* _ASM_S390_SETUP_H */ diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index 9107e3dab68c..b3dd883699e7 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -104,4 +104,63 @@ static inline bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs) return false; } +#define SYSCALL_FMT_0 +#define SYSCALL_FMT_1 , "0" (r2) +#define SYSCALL_FMT_2 , "d" (r3) SYSCALL_FMT_1 +#define SYSCALL_FMT_3 , "d" (r4) SYSCALL_FMT_2 +#define SYSCALL_FMT_4 , "d" (r5) SYSCALL_FMT_3 +#define SYSCALL_FMT_5 , "d" (r6) SYSCALL_FMT_4 +#define SYSCALL_FMT_6 , "d" (r7) SYSCALL_FMT_5 + +#define SYSCALL_PARM_0 +#define SYSCALL_PARM_1 , long arg1 +#define SYSCALL_PARM_2 SYSCALL_PARM_1, long arg2 +#define SYSCALL_PARM_3 SYSCALL_PARM_2, long arg3 +#define SYSCALL_PARM_4 SYSCALL_PARM_3, long arg4 +#define SYSCALL_PARM_5 SYSCALL_PARM_4, long arg5 +#define SYSCALL_PARM_6 SYSCALL_PARM_5, long arg6 + +#define SYSCALL_REGS_0 +#define SYSCALL_REGS_1 \ + register long r2 asm("2") = arg1 +#define SYSCALL_REGS_2 \ + SYSCALL_REGS_1; \ + register long r3 asm("3") = arg2 +#define SYSCALL_REGS_3 \ + SYSCALL_REGS_2; \ + register long r4 asm("4") = arg3 +#define SYSCALL_REGS_4 \ + SYSCALL_REGS_3; \ + register long r5 asm("5") = arg4 +#define SYSCALL_REGS_5 \ + SYSCALL_REGS_4; \ + register long r6 asm("6") = arg5 +#define SYSCALL_REGS_6 \ + SYSCALL_REGS_5; \ + register long r7 asm("7") = arg6 + +#define GENERATE_SYSCALL_FUNC(nr) \ +static __always_inline \ +long syscall##nr(unsigned long syscall SYSCALL_PARM_##nr) \ +{ \ + register unsigned long r1 asm ("1") = syscall; \ + register long rc asm ("2"); \ + SYSCALL_REGS_##nr; \ + \ + asm volatile ( \ + " svc 0\n" \ + : "=d" (rc) \ + : "d" (r1) SYSCALL_FMT_##nr \ + : "memory"); \ + return rc; \ +} + +GENERATE_SYSCALL_FUNC(0) +GENERATE_SYSCALL_FUNC(1) +GENERATE_SYSCALL_FUNC(2) +GENERATE_SYSCALL_FUNC(3) +GENERATE_SYSCALL_FUNC(4) +GENERATE_SYSCALL_FUNC(5) +GENERATE_SYSCALL_FUNC(6) + #endif /* _ASM_SYSCALL_H */ diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index 12c5f006c136..fe92a4caf5ec 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -356,11 +356,9 @@ int uv_convert_from_secure(unsigned long paddr); int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr); void setup_uv(void); -void adjust_to_uv_max(unsigned long *vmax); #else #define is_prot_virt_host() 0 static inline void setup_uv(void) {} -static inline void adjust_to_uv_max(unsigned long *vmax) {} static inline int uv_destroy_page(unsigned long paddr) { @@ -373,10 +371,4 @@ static inline int uv_convert_from_secure(unsigned long paddr) } #endif -#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM) -void uv_query_info(void); -#else -static inline void uv_query_info(void) {} -#endif - #endif /* _ASM_S390_UV_H */ diff --git a/arch/s390/include/asm/vdso/gettimeofday.h b/arch/s390/include/asm/vdso/gettimeofday.h index d6465b22ffe3..db84942eb78f 100644 --- a/arch/s390/include/asm/vdso/gettimeofday.h +++ b/arch/s390/include/asm/vdso/gettimeofday.h @@ -6,6 +6,7 @@ #define VDSO_HAS_CLOCK_GETRES 1 +#include <asm/syscall.h> #include <asm/timex.h> #include <asm/unistd.h> #include <linux/compiler.h> @@ -35,35 +36,20 @@ static inline u64 __arch_get_hw_counter(s32 clock_mode, const struct vdso_data * static __always_inline long clock_gettime_fallback(clockid_t clkid, struct __kernel_timespec *ts) { - register unsigned long r1 __asm__("r1") = __NR_clock_gettime; - register unsigned long r2 __asm__("r2") = (unsigned long)clkid; - register void *r3 __asm__("r3") = ts; - - asm ("svc 0\n" : "+d" (r2) : "d" (r1), "d" (r3) : "cc", "memory"); - return r2; + return syscall2(__NR_clock_gettime, (long)clkid, (long)ts); } static __always_inline long gettimeofday_fallback(register struct __kernel_old_timeval *tv, register struct timezone *tz) { - register unsigned long r1 __asm__("r1") = __NR_gettimeofday; - register unsigned long r2 __asm__("r2") = (unsigned long)tv; - register void *r3 __asm__("r3") = tz; - - asm ("svc 0\n" : "+d" (r2) : "d" (r1), "d" (r3) : "cc", "memory"); - return r2; + return syscall2(__NR_gettimeofday, (long)tv, (long)tz); } static __always_inline long clock_getres_fallback(clockid_t clkid, struct __kernel_timespec *ts) { - register unsigned long r1 __asm__("r1") = __NR_clock_getres; - register unsigned long r2 __asm__("r2") = (unsigned long)clkid; - register void *r3 __asm__("r3") = ts; - - asm ("svc 0\n" : "+d" (r2) : "d" (r1), "d" (r3) : "cc", "memory"); - return r2; + return syscall2(__NR_clock_getres, (long)clkid, (long)ts); } #ifdef CONFIG_TIME_NS diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 4a44ba5a2d73..80f500ffb55c 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -40,7 +40,7 @@ obj-y += sysinfo.o lgr.o os_info.o machine_kexec.o obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o -obj-y += smp.o +obj-y += smp.o text_amode31.o extra-y += head64.o vmlinux.lds diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 77ff2130cb04..b57da9338588 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -116,6 +116,7 @@ int main(void) OFFSET(__LC_RESTART_FN, lowcore, restart_fn); OFFSET(__LC_RESTART_DATA, lowcore, restart_data); OFFSET(__LC_RESTART_SOURCE, lowcore, restart_source); + OFFSET(__LC_RESTART_FLAGS, lowcore, restart_flags); OFFSET(__LC_KERNEL_ASCE, lowcore, kernel_asce); OFFSET(__LC_USER_ASCE, lowcore, user_asce); OFFSET(__LC_LPP, lowcore, lpp); @@ -152,5 +153,12 @@ int main(void) DEFINE(__KEXEC_SHA_REGION_SIZE, sizeof(struct kexec_sha_region)); /* sizeof kernel parameter area */ DEFINE(__PARMAREA_SIZE, sizeof(struct parmarea)); + /* kernel parameter area offsets */ + DEFINE(IPL_DEVICE, PARMAREA + offsetof(struct parmarea, ipl_device)); + DEFINE(INITRD_START, PARMAREA + offsetof(struct parmarea, initrd_start)); + DEFINE(INITRD_SIZE, PARMAREA + offsetof(struct parmarea, initrd_size)); + DEFINE(OLDMEM_BASE, PARMAREA + offsetof(struct parmarea, oldmem_base)); + DEFINE(OLDMEM_SIZE, PARMAREA + offsetof(struct parmarea, oldmem_size)); + DEFINE(COMMAND_LINE, PARMAREA + offsetof(struct parmarea, command_line)); return 0; } diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index 0e36dfc9ccd6..d72a6df058d7 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -140,7 +140,7 @@ int copy_oldmem_kernel(void *dst, void *src, size_t count) while (count) { from = __pa(src); - if (!OLDMEM_BASE && from < sclp.hsa_size) { + if (!oldmem_data.start && from < sclp.hsa_size) { /* Copy from zfcp/nvme dump HSA area */ len = min(count, sclp.hsa_size - from); rc = memcpy_hsa_kernel(dst, from, len); @@ -148,12 +148,12 @@ int copy_oldmem_kernel(void *dst, void *src, size_t count) return rc; } else { /* Check for swapped kdump oldmem areas */ - if (OLDMEM_BASE && from - OLDMEM_BASE < OLDMEM_SIZE) { - from -= OLDMEM_BASE; - len = min(count, OLDMEM_SIZE - from); - } else if (OLDMEM_BASE && from < OLDMEM_SIZE) { - len = min(count, OLDMEM_SIZE - from); - from += OLDMEM_BASE; + if (oldmem_data.start && from - oldmem_data.start < oldmem_data.size) { + from -= oldmem_data.start; + len = min(count, oldmem_data.size - from); + } else if (oldmem_data.start && from < oldmem_data.size) { + len = min(count, oldmem_data.size - from); + from += oldmem_data.start; } else { len = count; } @@ -183,7 +183,7 @@ static int copy_oldmem_user(void __user *dst, void *src, size_t count) while (count) { from = __pa(src); - if (!OLDMEM_BASE && from < sclp.hsa_size) { + if (!oldmem_data.start && from < sclp.hsa_size) { /* Copy from zfcp/nvme dump HSA area */ len = min(count, sclp.hsa_size - from); rc = memcpy_hsa_user(dst, from, len); @@ -191,12 +191,12 @@ static int copy_oldmem_user(void __user *dst, void *src, size_t count) return rc; } else { /* Check for swapped kdump oldmem areas */ - if (OLDMEM_BASE && from - OLDMEM_BASE < OLDMEM_SIZE) { - from -= OLDMEM_BASE; - len = min(count, OLDMEM_SIZE - from); - } else if (OLDMEM_BASE && from < OLDMEM_SIZE) { - len = min(count, OLDMEM_SIZE - from); - from += OLDMEM_BASE; + if (oldmem_data.start && from - oldmem_data.size < oldmem_data.size) { + from -= oldmem_data.size; + len = min(count, oldmem_data.size - from); + } else if (oldmem_data.start && from < oldmem_data.size) { + len = min(count, oldmem_data.size - from); + from += oldmem_data.start; } else { len = count; } @@ -243,10 +243,10 @@ static int remap_oldmem_pfn_range_kdump(struct vm_area_struct *vma, unsigned long size_old; int rc; - if (pfn < OLDMEM_SIZE >> PAGE_SHIFT) { - size_old = min(size, OLDMEM_SIZE - (pfn << PAGE_SHIFT)); + if (pfn < oldmem_data.size >> PAGE_SHIFT) { + size_old = min(size, oldmem_data.size - (pfn << PAGE_SHIFT)); rc = remap_pfn_range(vma, from, - pfn + (OLDMEM_BASE >> PAGE_SHIFT), + pfn + (oldmem_data.start >> PAGE_SHIFT), size_old, prot); if (rc || size == size_old) return rc; @@ -288,7 +288,7 @@ static int remap_oldmem_pfn_range_zfcpdump(struct vm_area_struct *vma, int remap_oldmem_pfn_range(struct vm_area_struct *vma, unsigned long from, unsigned long pfn, unsigned long size, pgprot_t prot) { - if (OLDMEM_BASE) + if (oldmem_data.start) return remap_oldmem_pfn_range_kdump(vma, from, pfn, size, prot); else return remap_oldmem_pfn_range_zfcpdump(vma, from, pfn, size, @@ -633,17 +633,17 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size) u64 hdr_off; /* If we are not in kdump or zfcp/nvme dump mode return */ - if (!OLDMEM_BASE && !is_ipl_type_dump()) + if (!oldmem_data.start && !is_ipl_type_dump()) return 0; /* If we cannot get HSA size for zfcp/nvme dump return error */ if (is_ipl_type_dump() && !sclp.hsa_size) return -ENODEV; /* For kdump, exclude previous crashkernel memory */ - if (OLDMEM_BASE) { - oldmem_region.base = OLDMEM_BASE; - oldmem_region.size = OLDMEM_SIZE; - oldmem_type.total_size = OLDMEM_SIZE; + if (oldmem_data.start) { + oldmem_region.base = oldmem_data.start; + oldmem_region.size = oldmem_data.size; + oldmem_type.total_size = oldmem_data.size; } mem_chunk_cnt = get_mem_chunk_cnt(); diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 09b6c6402f9b..4331c7e6e1c0 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -24,6 +24,7 @@ #include <linux/export.h> #include <linux/init.h> #include <linux/fs.h> +#include <linux/minmax.h> #include <linux/debugfs.h> #include <asm/debug.h> @@ -92,6 +93,8 @@ static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view, char *out_buf, const char *in_buf); static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, char *out_buf, debug_sprintf_entry_t *curr_event); +static void debug_areas_swap(debug_info_t *a, debug_info_t *b); +static void debug_events_append(debug_info_t *dest, debug_info_t *src); /* globals */ @@ -311,24 +314,6 @@ static debug_info_t *debug_info_create(const char *name, int pages_per_area, goto out; rc->mode = mode & ~S_IFMT; - - /* create root directory */ - rc->debugfs_root_entry = debugfs_create_dir(rc->name, - debug_debugfs_root_entry); - - /* append new element to linked list */ - if (!debug_area_first) { - /* first element in list */ - debug_area_first = rc; - rc->prev = NULL; - } else { - /* append element to end of list */ - debug_area_last->next = rc; - rc->prev = debug_area_last; - } - debug_area_last = rc; - rc->next = NULL; - refcount_set(&rc->ref_count, 1); out: return rc; @@ -388,27 +373,10 @@ static void debug_info_get(debug_info_t *db_info) */ static void debug_info_put(debug_info_t *db_info) { - int i; - if (!db_info) return; - if (refcount_dec_and_test(&db_info->ref_count)) { - for (i = 0; i < DEBUG_MAX_VIEWS; i++) { - if (!db_info->views[i]) - continue; - debugfs_remove(db_info->debugfs_entries[i]); - } - debugfs_remove(db_info->debugfs_root_entry); - if (db_info == debug_area_first) - debug_area_first = db_info->next; - if (db_info == debug_area_last) - debug_area_last = db_info->prev; - if (db_info->prev) - db_info->prev->next = db_info->next; - if (db_info->next) - db_info->next->prev = db_info->prev; + if (refcount_dec_and_test(&db_info->ref_count)) debug_info_free(db_info); - } } /* @@ -632,6 +600,31 @@ static int debug_close(struct inode *inode, struct file *file) return 0; /* success */ } +/* Create debugfs entries and add to internal list. */ +static void _debug_register(debug_info_t *id) +{ + /* create root directory */ + id->debugfs_root_entry = debugfs_create_dir(id->name, + debug_debugfs_root_entry); + + /* append new element to linked list */ + if (!debug_area_first) { + /* first element in list */ + debug_area_first = id; + id->prev = NULL; + } else { + /* append element to end of list */ + debug_area_last->next = id; + id->prev = debug_area_last; + } + debug_area_last = id; + id->next = NULL; + + debug_register_view(id, &debug_level_view); + debug_register_view(id, &debug_flush_view); + debug_register_view(id, &debug_pages_view); +} + /** * debug_register_mode() - creates and initializes debug area. * @@ -661,19 +654,16 @@ debug_info_t *debug_register_mode(const char *name, int pages_per_area, if ((uid != 0) || (gid != 0)) pr_warn("Root becomes the owner of all s390dbf files in sysfs\n"); BUG_ON(!initialized); - mutex_lock(&debug_mutex); /* create new debug_info */ rc = debug_info_create(name, pages_per_area, nr_areas, buf_size, mode); - if (!rc) - goto out; - debug_register_view(rc, &debug_level_view); - debug_register_view(rc, &debug_flush_view); - debug_register_view(rc, &debug_pages_view); -out: - if (!rc) + if (rc) { + mutex_lock(&debug_mutex); + _debug_register(rc); + mutex_unlock(&debug_mutex); + } else { pr_err("Registering debug feature %s failed\n", name); - mutex_unlock(&debug_mutex); + } return rc; } EXPORT_SYMBOL(debug_register_mode); @@ -703,6 +693,82 @@ debug_info_t *debug_register(const char *name, int pages_per_area, EXPORT_SYMBOL(debug_register); /** + * debug_register_static() - registers a static debug area + * + * @id: Handle for static debug area + * @pages_per_area: Number of pages per area + * @nr_areas: Number of debug areas + * + * Register debug_info_t defined using DEFINE_STATIC_DEBUG_INFO. + * + * Note: This function is called automatically via an initcall generated by + * DEFINE_STATIC_DEBUG_INFO. + */ +void debug_register_static(debug_info_t *id, int pages_per_area, int nr_areas) +{ + unsigned long flags; + debug_info_t *copy; + + if (!initialized) { + pr_err("Tried to register debug feature %s too early\n", + id->name); + return; + } + + copy = debug_info_alloc("", pages_per_area, nr_areas, id->buf_size, + id->level, ALL_AREAS); + if (!copy) { + pr_err("Registering debug feature %s failed\n", id->name); + + /* Clear pointers to prevent tracing into released initdata. */ + spin_lock_irqsave(&id->lock, flags); + id->areas = NULL; + id->active_pages = NULL; + id->active_entries = NULL; + spin_unlock_irqrestore(&id->lock, flags); + + return; + } + + /* Replace static trace area with dynamic copy. */ + spin_lock_irqsave(&id->lock, flags); + debug_events_append(copy, id); + debug_areas_swap(id, copy); + spin_unlock_irqrestore(&id->lock, flags); + + /* Clear pointers to initdata and discard copy. */ + copy->areas = NULL; + copy->active_pages = NULL; + copy->active_entries = NULL; + debug_info_free(copy); + + mutex_lock(&debug_mutex); + _debug_register(id); + mutex_unlock(&debug_mutex); +} + +/* Remove debugfs entries and remove from internal list. */ +static void _debug_unregister(debug_info_t *id) +{ + int i; + + for (i = 0; i < DEBUG_MAX_VIEWS; i++) { + if (!id->views[i]) + continue; + debugfs_remove(id->debugfs_entries[i]); + } + debugfs_remove(id->debugfs_root_entry); + if (id == debug_area_first) + debug_area_first = id->next; + if (id == debug_area_last) + debug_area_last = id->prev; + if (id->prev) + id->prev->next = id->next; + if (id->next) + id->next->prev = id->prev; +} + +/** * debug_unregister() - give back debug area. * * @id: handle for debug log @@ -715,8 +781,10 @@ void debug_unregister(debug_info_t *id) if (!id) return; mutex_lock(&debug_mutex); - debug_info_put(id); + _debug_unregister(id); mutex_unlock(&debug_mutex); + + debug_info_put(id); } EXPORT_SYMBOL(debug_unregister); @@ -726,35 +794,28 @@ EXPORT_SYMBOL(debug_unregister); */ static int debug_set_size(debug_info_t *id, int nr_areas, int pages_per_area) { - debug_entry_t ***new_areas; + debug_info_t *new_id; unsigned long flags; - int rc = 0; if (!id || (nr_areas <= 0) || (pages_per_area < 0)) return -EINVAL; - if (pages_per_area > 0) { - new_areas = debug_areas_alloc(pages_per_area, nr_areas); - if (!new_areas) { - pr_info("Allocating memory for %i pages failed\n", - pages_per_area); - rc = -ENOMEM; - goto out; - } - } else { - new_areas = NULL; + + new_id = debug_info_alloc("", pages_per_area, nr_areas, id->buf_size, + id->level, ALL_AREAS); + if (!new_id) { + pr_info("Allocating memory for %i pages failed\n", + pages_per_area); + return -ENOMEM; } + spin_lock_irqsave(&id->lock, flags); - debug_areas_free(id); - id->areas = new_areas; - id->nr_areas = nr_areas; - id->pages_per_area = pages_per_area; - id->active_area = 0; - memset(id->active_entries, 0, sizeof(int)*id->nr_areas); - memset(id->active_pages, 0, sizeof(int)*id->nr_areas); + debug_events_append(new_id, id); + debug_areas_swap(new_id, id); + debug_info_free(new_id); spin_unlock_irqrestore(&id->lock, flags); pr_info("%s: set new size (%i pages)\n", id->name, pages_per_area); -out: - return rc; + + return 0; } /** @@ -772,16 +833,17 @@ void debug_set_level(debug_info_t *id, int new_level) if (!id) return; - spin_lock_irqsave(&id->lock, flags); + if (new_level == DEBUG_OFF_LEVEL) { - id->level = DEBUG_OFF_LEVEL; pr_info("%s: switched off\n", id->name); } else if ((new_level > DEBUG_MAX_LEVEL) || (new_level < 0)) { pr_info("%s: level %i is out of range (%i - %i)\n", id->name, new_level, 0, DEBUG_MAX_LEVEL); - } else { - id->level = new_level; + return; } + + spin_lock_irqsave(&id->lock, flags); + id->level = new_level; spin_unlock_irqrestore(&id->lock, flags); } EXPORT_SYMBOL(debug_set_level); @@ -821,6 +883,42 @@ static inline debug_entry_t *get_active_entry(debug_info_t *id) id->active_entries[id->active_area]); } +/* Swap debug areas of a and b. */ +static void debug_areas_swap(debug_info_t *a, debug_info_t *b) +{ + swap(a->nr_areas, b->nr_areas); + swap(a->pages_per_area, b->pages_per_area); + swap(a->areas, b->areas); + swap(a->active_area, b->active_area); + swap(a->active_pages, b->active_pages); + swap(a->active_entries, b->active_entries); +} + +/* Append all debug events in active area from source to destination log. */ +static void debug_events_append(debug_info_t *dest, debug_info_t *src) +{ + debug_entry_t *from, *to, *last; + + if (!src->areas || !dest->areas) + return; + + /* Loop over all entries in src, starting with oldest. */ + from = get_active_entry(src); + last = from; + do { + if (from->clock != 0LL) { + to = get_active_entry(dest); + memset(to, 0, dest->entry_size); + memcpy(to, from, min(src->entry_size, + dest->entry_size)); + proceed_active_entry(dest); + } + + proceed_active_entry(src); + from = get_active_entry(src); + } while (from != last); +} + /* * debug_finish_entry: * - set timestamp, caller address, cpu number etc. @@ -1111,16 +1209,17 @@ int debug_register_view(debug_info_t *id, struct debug_view *view) break; } if (i == DEBUG_MAX_VIEWS) { - pr_err("Registering view %s/%s would exceed the maximum " - "number of views %i\n", id->name, view->name, i); rc = -1; } else { id->views[i] = view; id->debugfs_entries[i] = pde; } spin_unlock_irqrestore(&id->lock, flags); - if (rc) + if (rc) { + pr_err("Registering view %s/%s would exceed the maximum " + "number of views %i\n", id->name, view->name, i); debugfs_remove(pde); + } out: return rc; } diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c index a3f47464c3f1..76a656b2146f 100644 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag.c @@ -14,6 +14,7 @@ #include <asm/diag.h> #include <asm/trace/diag.h> #include <asm/sections.h> +#include "entry.h" struct diag_stat { unsigned int counter[NR_DIAG_STAT]; @@ -50,8 +51,16 @@ static const struct diag_desc diag_map[NR_DIAG_STAT] = { [DIAG_STAT_X500] = { .code = 0x500, .name = "Virtio Service" }, }; -struct diag_ops __bootdata_preserved(diag_dma_ops); -struct diag210 *__bootdata_preserved(__diag210_tmp_dma); +struct diag_ops __amode31_ref diag_amode31_ops = { + .diag210 = _diag210_amode31, + .diag26c = _diag26c_amode31, + .diag14 = _diag14_amode31, + .diag0c = _diag0c_amode31, + .diag308_reset = _diag308_reset_amode31 +}; + +static struct diag210 _diag210_tmp_amode31 __section(".amode31.data"); +struct diag210 __amode31_ref *__diag210_tmp_amode31 = &_diag210_tmp_amode31; static int show_diag_stat(struct seq_file *m, void *v) { @@ -59,7 +68,7 @@ static int show_diag_stat(struct seq_file *m, void *v) unsigned long n = (unsigned long) v - 1; int cpu, prec, tmp; - get_online_cpus(); + cpus_read_lock(); if (n == 0) { seq_puts(m, " "); @@ -78,7 +87,7 @@ static int show_diag_stat(struct seq_file *m, void *v) } seq_printf(m, " %s\n", diag_map[n-1].name); } - put_online_cpus(); + cpus_read_unlock(); return 0; } @@ -135,7 +144,7 @@ EXPORT_SYMBOL(diag_stat_inc_norecursion); int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode) { diag_stat_inc(DIAG_STAT_X014); - return diag_dma_ops.diag14(rx, ry1, subcode); + return diag_amode31_ops.diag14(rx, ry1, subcode); } EXPORT_SYMBOL(diag14); @@ -172,12 +181,12 @@ int diag210(struct diag210 *addr) int ccode; spin_lock_irqsave(&diag210_lock, flags); - *__diag210_tmp_dma = *addr; + *__diag210_tmp_amode31 = *addr; diag_stat_inc(DIAG_STAT_X210); - ccode = diag_dma_ops.diag210(__diag210_tmp_dma); + ccode = diag_amode31_ops.diag210(__diag210_tmp_amode31); - *addr = *__diag210_tmp_dma; + *addr = *__diag210_tmp_amode31; spin_unlock_irqrestore(&diag210_lock, flags); return ccode; @@ -205,6 +214,6 @@ EXPORT_SYMBOL(diag224); int diag26c(void *req, void *resp, enum diag26c_sc subcode) { diag_stat_inc(DIAG_STAT_X26C); - return diag_dma_ops.diag26c(req, resp, subcode); + return diag_amode31_ops.diag26c(req, resp, subcode); } EXPORT_SYMBOL(diag26c); diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 5412efe328f8..ec5515423f17 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -312,10 +312,12 @@ static const unsigned char formats[][6] = { [INSTR_VRR_VV] = { V_8, V_12, 0, 0, 0, 0 }, [INSTR_VRR_VV0U] = { V_8, V_12, U4_32, 0, 0, 0 }, [INSTR_VRR_VV0U0U] = { V_8, V_12, U4_32, U4_24, 0, 0 }, + [INSTR_VRR_VV0U2] = { V_8, V_12, U4_24, 0, 0, 0 }, [INSTR_VRR_VV0UU2] = { V_8, V_12, U4_32, U4_28, 0, 0 }, [INSTR_VRR_VV0UUU] = { V_8, V_12, U4_32, U4_28, U4_24, 0 }, [INSTR_VRR_VVV] = { V_8, V_12, V_16, 0, 0, 0 }, [INSTR_VRR_VVV0U] = { V_8, V_12, V_16, U4_32, 0, 0 }, + [INSTR_VRR_VVV0U0] = { V_8, V_12, V_16, U4_24, 0, 0 }, [INSTR_VRR_VVV0U0U] = { V_8, V_12, V_16, U4_32, U4_24, 0 }, [INSTR_VRR_VVV0UU] = { V_8, V_12, V_16, U4_32, U4_28, 0 }, [INSTR_VRR_VVV0UUU] = { V_8, V_12, V_16, U4_32, U4_28, U4_24 }, diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index fb84e3fc1686..9857cb046726 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -236,6 +236,10 @@ static __init void detect_machine_facilities(void) clock_comparator_max = -1ULL >> 1; __ctl_set_bit(0, 53); } + if (IS_ENABLED(CONFIG_PCI) && test_facility(153)) { + S390_lowcore.machine_flags |= MACHINE_FLAG_PCI_MIO; + /* the control bit is set during PCI initialization */ + } } static inline void save_vector_registers(void) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 5a2f70cbd3a9..b9716a7e326d 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -624,12 +624,15 @@ ENTRY(mcck_int_handler) 4: j 4b ENDPROC(mcck_int_handler) -# -# PSW restart interrupt handler -# ENTRY(restart_int_handler) ALTERNATIVE "", ".insn s,0xb2800000,_LPP_OFFSET", 40 stg %r15,__LC_SAVE_AREA_RESTART + TSTMSK __LC_RESTART_FLAGS,RESTART_FLAG_CTLREGS,4 + jz 0f + la %r15,4095 + lctlg %c0,%c15,__LC_CREGS_SAVE_AREA-4095(%r15) +0: larl %r15,.Lstosm_tmp + stosm 0(%r15),0x04 # turn dat on, keep irqs off lg %r15,__LC_RESTART_STACK xc STACK_FRAME_OVERHEAD(__PT_SIZE,%r15),STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r14,STACK_FRAME_OVERHEAD+__PT_R0(%r15) @@ -638,7 +641,7 @@ ENTRY(restart_int_handler) xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15) lg %r1,__LC_RESTART_FN # load fn, parm & source cpu lg %r2,__LC_RESTART_DATA - lg %r3,__LC_RESTART_SOURCE + lgf %r3,__LC_RESTART_SOURCE ltgr %r3,%r3 # test source cpu address jm 1f # negative -> skip source stop 0: sigp %r4,%r3,SIGP_SENSE # sigp sense to source cpu diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 1ab33465382f..7f2696e8d511 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -28,10 +28,8 @@ void do_non_secure_storage_access(struct pt_regs *regs); void do_secure_storage_violation(struct pt_regs *regs); void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str); void kernel_stack_overflow(struct pt_regs * regs); -void do_signal(struct pt_regs *regs); void handle_signal32(struct ksignal *ksig, sigset_t *oldset, struct pt_regs *regs); -void do_notify_resume(struct pt_regs *regs); void __init init_IRQ(void); void do_io_irq(struct pt_regs *regs); @@ -64,4 +62,13 @@ void stack_free(unsigned long stack); extern char kprobes_insn_page[]; +extern char _samode31[], _eamode31[]; +extern char _stext_amode31[], _etext_amode31[]; +extern struct exception_table_entry _start_amode31_ex_table[]; +extern struct exception_table_entry _stop_amode31_ex_table[]; + +#define __amode31_data __section(".amode31.data") +#define __amode31_ref __section(".amode31.refs") +extern long _start_amode31_refs[], _end_amode31_refs[]; + #endif /* _ENTRY_H */ diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 2d8f595d9196..0a464d328467 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -18,8 +18,11 @@ #include <trace/syscall.h> #include <asm/asm-offsets.h> #include <asm/cacheflush.h> +#include <asm/ftrace.lds.h> +#include <asm/nospec-branch.h> #include <asm/set_memory.h> #include "entry.h" +#include "ftrace.h" /* * To generate function prologue either gcc's hotpatch feature (since gcc 4.8) @@ -41,7 +44,130 @@ */ void *ftrace_func __read_mostly = ftrace_stub; -unsigned long ftrace_plt; +struct ftrace_insn { + u16 opc; + s32 disp; +} __packed; + +asm( + " .align 16\n" + "ftrace_shared_hotpatch_trampoline_br:\n" + " lmg %r0,%r1,2(%r1)\n" + " br %r1\n" + "ftrace_shared_hotpatch_trampoline_br_end:\n" +); + +#ifdef CONFIG_EXPOLINE +asm( + " .align 16\n" + "ftrace_shared_hotpatch_trampoline_ex:\n" + " lmg %r0,%r1,2(%r1)\n" + " ex %r0," __stringify(__LC_BR_R1) "(%r0)\n" + " j .\n" + "ftrace_shared_hotpatch_trampoline_ex_end:\n" +); + +asm( + " .align 16\n" + "ftrace_shared_hotpatch_trampoline_exrl:\n" + " lmg %r0,%r1,2(%r1)\n" + " .insn ril,0xc60000000000,%r0,0f\n" /* exrl */ + " j .\n" + "0: br %r1\n" + "ftrace_shared_hotpatch_trampoline_exrl_end:\n" +); +#endif /* CONFIG_EXPOLINE */ + +#ifdef CONFIG_MODULES +static char *ftrace_plt; + +asm( + " .data\n" + "ftrace_plt_template:\n" + " basr %r1,%r0\n" + " lg %r1,0f-.(%r1)\n" + " br %r1\n" + "0: .quad ftrace_caller\n" + "ftrace_plt_template_end:\n" + " .previous\n" +); +#endif /* CONFIG_MODULES */ + +static const char *ftrace_shared_hotpatch_trampoline(const char **end) +{ + const char *tstart, *tend; + + tstart = ftrace_shared_hotpatch_trampoline_br; + tend = ftrace_shared_hotpatch_trampoline_br_end; +#ifdef CONFIG_EXPOLINE + if (!nospec_disable) { + tstart = ftrace_shared_hotpatch_trampoline_ex; + tend = ftrace_shared_hotpatch_trampoline_ex_end; + if (test_facility(35)) { /* exrl */ + tstart = ftrace_shared_hotpatch_trampoline_exrl; + tend = ftrace_shared_hotpatch_trampoline_exrl_end; + } + } +#endif /* CONFIG_EXPOLINE */ + if (end) + *end = tend; + return tstart; +} + +bool ftrace_need_init_nop(void) +{ + return ftrace_shared_hotpatch_trampoline(NULL); +} + +int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) +{ + static struct ftrace_hotpatch_trampoline *next_vmlinux_trampoline = + __ftrace_hotpatch_trampolines_start; + static const char orig[6] = { 0xc0, 0x04, 0x00, 0x00, 0x00, 0x00 }; + static struct ftrace_hotpatch_trampoline *trampoline; + struct ftrace_hotpatch_trampoline **next_trampoline; + struct ftrace_hotpatch_trampoline *trampolines_end; + struct ftrace_hotpatch_trampoline tmp; + struct ftrace_insn *insn; + const char *shared; + s32 disp; + + BUILD_BUG_ON(sizeof(struct ftrace_hotpatch_trampoline) != + SIZEOF_FTRACE_HOTPATCH_TRAMPOLINE); + + next_trampoline = &next_vmlinux_trampoline; + trampolines_end = __ftrace_hotpatch_trampolines_end; + shared = ftrace_shared_hotpatch_trampoline(NULL); +#ifdef CONFIG_MODULES + if (mod) { + next_trampoline = &mod->arch.next_trampoline; + trampolines_end = mod->arch.trampolines_end; + shared = ftrace_plt; + } +#endif + + if (WARN_ON_ONCE(*next_trampoline >= trampolines_end)) + return -ENOMEM; + trampoline = (*next_trampoline)++; + + /* Check for the compiler-generated fentry nop (brcl 0, .). */ + if (WARN_ON_ONCE(memcmp((const void *)rec->ip, &orig, sizeof(orig)))) + return -EINVAL; + + /* Generate the trampoline. */ + tmp.brasl_opc = 0xc015; /* brasl %r1, shared */ + tmp.brasl_disp = (shared - (const char *)&trampoline->brasl_opc) / 2; + tmp.interceptor = FTRACE_ADDR; + tmp.rest_of_intercepted_function = rec->ip + sizeof(struct ftrace_insn); + s390_kernel_write(trampoline, &tmp, sizeof(tmp)); + + /* Generate a jump to the trampoline. */ + disp = ((char *)trampoline - (char *)rec->ip) / 2; + insn = (struct ftrace_insn *)rec->ip; + s390_kernel_write(&insn->disp, &disp, sizeof(disp)); + + return 0; +} int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) @@ -49,11 +175,45 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, return 0; } +static void ftrace_generate_nop_insn(struct ftrace_insn *insn) +{ + /* brcl 0,0 */ + insn->opc = 0xc004; + insn->disp = 0; +} + +static void ftrace_generate_call_insn(struct ftrace_insn *insn, + unsigned long ip) +{ + unsigned long target; + + /* brasl r0,ftrace_caller */ + target = FTRACE_ADDR; +#ifdef CONFIG_MODULES + if (is_module_addr((void *)ip)) + target = (unsigned long)ftrace_plt; +#endif /* CONFIG_MODULES */ + insn->opc = 0xc005; + insn->disp = (target - ip) / 2; +} + +static void brcl_disable(void *brcl) +{ + u8 op = 0x04; /* set mask field to zero */ + + s390_kernel_write((char *)brcl + 1, &op, sizeof(op)); +} + int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { struct ftrace_insn orig, new, old; + if (ftrace_shared_hotpatch_trampoline(NULL)) { + brcl_disable((void *)rec->ip); + return 0; + } + if (copy_from_kernel_nofault(&old, (void *) rec->ip, sizeof(old))) return -EFAULT; /* Replace ftrace call with a nop. */ @@ -67,10 +227,22 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, return 0; } +static void brcl_enable(void *brcl) +{ + u8 op = 0xf4; /* set mask field to all ones */ + + s390_kernel_write((char *)brcl + 1, &op, sizeof(op)); +} + int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { struct ftrace_insn orig, new, old; + if (ftrace_shared_hotpatch_trampoline(NULL)) { + brcl_enable((void *)rec->ip); + return 0; + } + if (copy_from_kernel_nofault(&old, (void *) rec->ip, sizeof(old))) return -EFAULT; /* Replace nop with an ftrace call. */ @@ -95,22 +267,44 @@ int __init ftrace_dyn_arch_init(void) return 0; } +void arch_ftrace_update_code(int command) +{ + if (ftrace_shared_hotpatch_trampoline(NULL)) + ftrace_modify_all_code(command); + else + ftrace_run_stop_machine(command); +} + +static void __ftrace_sync(void *dummy) +{ +} + +int ftrace_arch_code_modify_post_process(void) +{ + if (ftrace_shared_hotpatch_trampoline(NULL)) { + /* Send SIGP to the other CPUs, so they see the new code. */ + smp_call_function(__ftrace_sync, NULL, 1); + } + return 0; +} + #ifdef CONFIG_MODULES static int __init ftrace_plt_init(void) { - unsigned int *ip; + const char *start, *end; - ftrace_plt = (unsigned long) module_alloc(PAGE_SIZE); + ftrace_plt = module_alloc(PAGE_SIZE); if (!ftrace_plt) panic("cannot allocate ftrace plt\n"); - ip = (unsigned int *) ftrace_plt; - ip[0] = 0x0d10e310; /* basr 1,0; lg 1,10(1); br 1 */ - ip[1] = 0x100a0004; - ip[2] = 0x07f10000; - ip[3] = FTRACE_ADDR >> 32; - ip[4] = FTRACE_ADDR & 0xffffffff; - set_memory_ro(ftrace_plt, 1); + + start = ftrace_shared_hotpatch_trampoline(&end); + if (!start) { + start = ftrace_plt_template; + end = ftrace_plt_template_end; + } + memcpy(ftrace_plt, start, end - start); + set_memory_ro((unsigned long)ftrace_plt, 1); return 0; } device_initcall(ftrace_plt_init); @@ -147,17 +341,13 @@ NOKPROBE_SYMBOL(prepare_ftrace_return); */ int ftrace_enable_ftrace_graph_caller(void) { - u8 op = 0x04; /* set mask field to zero */ - - s390_kernel_write(__va(ftrace_graph_caller)+1, &op, sizeof(op)); + brcl_disable(__va(ftrace_graph_caller)); return 0; } int ftrace_disable_ftrace_graph_caller(void) { - u8 op = 0xf4; /* set mask field to all ones */ - - s390_kernel_write(__va(ftrace_graph_caller)+1, &op, sizeof(op)); + brcl_enable(__va(ftrace_graph_caller)); return 0; } diff --git a/arch/s390/kernel/ftrace.h b/arch/s390/kernel/ftrace.h new file mode 100644 index 000000000000..69e416f4c6b0 --- /dev/null +++ b/arch/s390/kernel/ftrace.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _FTRACE_H +#define _FTRACE_H + +#include <asm/types.h> + +struct ftrace_hotpatch_trampoline { + u16 brasl_opc; + s32 brasl_disp; + s16: 16; + u64 rest_of_intercepted_function; + u64 interceptor; +} __packed; + +extern struct ftrace_hotpatch_trampoline __ftrace_hotpatch_trampolines_start[]; +extern struct ftrace_hotpatch_trampoline __ftrace_hotpatch_trampolines_end[]; +extern const char ftrace_shared_hotpatch_trampoline_br[]; +extern const char ftrace_shared_hotpatch_trampoline_br_end[]; +extern const char ftrace_shared_hotpatch_trampoline_ex[]; +extern const char ftrace_shared_hotpatch_trampoline_ex_end[]; +extern const char ftrace_shared_hotpatch_trampoline_exrl[]; +extern const char ftrace_shared_hotpatch_trampoline_exrl_end[]; +extern const char ftrace_plt_template[]; +extern const char ftrace_plt_template_end[]; + +#endif /* _FTRACE_H */ diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 0c253886da78..114b5490ad8e 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -21,6 +21,7 @@ ENTRY(startup_continue) larl %r1,tod_clock_base mvc 0(16,%r1),__LC_BOOT_CLOCK larl %r13,.LPG1 # get base + lctlg %c0,%c15,.Lctl-.LPG1(%r13) # load control registers # # Setup stack # @@ -41,3 +42,19 @@ ENTRY(startup_continue) .align 16 .LPG1: .Ldw: .quad 0x0002000180000000,0x0000000000000000 +.Lctl: .quad 0x04040000 # cr0: AFP registers & secondary space + .quad 0 # cr1: primary space segment table + .quad 0 # cr2: dispatchable unit control table + .quad 0 # cr3: instruction authorization + .quad 0xffff # cr4: instruction authorization + .quad 0 # cr5: primary-aste origin + .quad 0 # cr6: I/O interrupts + .quad 0 # cr7: secondary space segment table + .quad 0x0000000000008000 # cr8: access registers translation + .quad 0 # cr9: tracing off + .quad 0 # cr10: tracing off + .quad 0 # cr11: tracing off + .quad 0 # cr12: tracing off + .quad 0 # cr13: home space segment table + .quad 0xc0000000 # cr14: machine check handling off + .quad 0 # cr15: linkage stack operations diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 50e2c21e0ec9..e2cc35775b99 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -179,8 +179,6 @@ static inline int __diag308(unsigned long subcode, void *addr) int diag308(unsigned long subcode, void *addr) { - if (IS_ENABLED(CONFIG_KASAN)) - __arch_local_irq_stosm(0x04); /* enable DAT */ diag_stat_inc(DIAG_STAT_X308); return __diag308(subcode, addr); } @@ -1843,7 +1841,6 @@ static struct kobj_attribute on_restart_attr = __ATTR_RW(on_restart); static void __do_restart(void *ignore) { - __arch_local_irq_stosm(0x04); /* enable DAT */ smp_send_stop(); #ifdef CONFIG_CRASH_DUMP crash_kexec(NULL); @@ -2082,7 +2079,7 @@ void s390_reset_system(void) /* Disable lowcore protection */ __ctl_clear_bit(0, 28); - diag_dma_ops.diag308_reset(); + diag_amode31_ops.diag308_reset(); } #ifdef CONFIG_KEXEC_FILE diff --git a/arch/s390/kernel/ipl_vmparm.c b/arch/s390/kernel/ipl_vmparm.c index af43535a976d..b5245fadcfb0 100644 --- a/arch/s390/kernel/ipl_vmparm.c +++ b/arch/s390/kernel/ipl_vmparm.c @@ -1,4 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 +#include <linux/minmax.h> +#include <linux/string.h> #include <asm/ebcdic.h> #include <asm/ipl.h> diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 234d085257eb..3a3145c4a3ba 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -228,7 +228,7 @@ int show_interrupts(struct seq_file *p, void *v) int index = *(loff_t *) v; int cpu, irq; - get_online_cpus(); + cpus_read_lock(); if (index == 0) { seq_puts(p, " "); for_each_online_cpu(cpu) @@ -258,7 +258,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); } out: - put_online_cpus(); + cpus_read_unlock(); return 0; } diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c index ab584e8e3527..9156653b56f6 100644 --- a/arch/s390/kernel/jump_label.c +++ b/arch/s390/kernel/jump_label.c @@ -36,7 +36,7 @@ static void jump_label_bug(struct jump_entry *entry, struct insn *expected, unsigned char *ipe = (unsigned char *)expected; unsigned char *ipn = (unsigned char *)new; - pr_emerg("Jump label code mismatch at %pS [%p]\n", ipc, ipc); + pr_emerg("Jump label code mismatch at %pS [%px]\n", ipc, ipc); pr_emerg("Found: %6ph\n", ipc); pr_emerg("Expected: %6ph\n", ipe); pr_emerg("New: %6ph\n", ipn); diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 1005a6935fbe..0505e55a6297 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -224,8 +224,8 @@ void arch_crash_save_vmcoreinfo(void) VMCOREINFO_SYMBOL(lowcore_ptr); VMCOREINFO_SYMBOL(high_memory); VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS); - vmcoreinfo_append_str("SDMA=%lx\n", __sdma); - vmcoreinfo_append_str("EDMA=%lx\n", __edma); + vmcoreinfo_append_str("SAMODE31=%lx\n", __samode31); + vmcoreinfo_append_str("EAMODE31=%lx\n", __eamode31); vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset()); mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note()); } @@ -263,7 +263,6 @@ static void __do_machine_kexec(void *data) */ static void __machine_kexec(void *data) { - __arch_local_irq_stosm(0x04); /* enable DAT */ pfault_fini(); tracing_off(); debug_locks_off(); diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 4055f1c49814..b01ba460b7ca 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -14,6 +14,7 @@ #include <linux/elf.h> #include <linux/vmalloc.h> #include <linux/fs.h> +#include <linux/ftrace.h> #include <linux/string.h> #include <linux/kernel.h> #include <linux/kasan.h> @@ -23,6 +24,8 @@ #include <asm/alternative.h> #include <asm/nospec-branch.h> #include <asm/facility.h> +#include <asm/ftrace.lds.h> +#include <asm/set_memory.h> #if 0 #define DEBUGP printk @@ -48,6 +51,13 @@ void *module_alloc(unsigned long size) return p; } +#ifdef CONFIG_FUNCTION_TRACER +void module_arch_cleanup(struct module *mod) +{ + module_memfree(mod->arch.trampolines_start); +} +#endif + void module_arch_freeing_init(struct module *mod) { if (is_livepatch_module(mod) && @@ -466,6 +476,30 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, write); } +#ifdef CONFIG_FUNCTION_TRACER +static int module_alloc_ftrace_hotpatch_trampolines(struct module *me, + const Elf_Shdr *s) +{ + char *start, *end; + int numpages; + size_t size; + + size = FTRACE_HOTPATCH_TRAMPOLINES_SIZE(s->sh_size); + numpages = DIV_ROUND_UP(size, PAGE_SIZE); + start = module_alloc(numpages * PAGE_SIZE); + if (!start) + return -ENOMEM; + set_memory_ro((unsigned long)start, numpages); + end = start + size; + + me->arch.trampolines_start = (struct ftrace_hotpatch_trampoline *)start; + me->arch.trampolines_end = (struct ftrace_hotpatch_trampoline *)end; + me->arch.next_trampoline = me->arch.trampolines_start; + + return 0; +} +#endif /* CONFIG_FUNCTION_TRACER */ + int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) @@ -473,6 +507,9 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *s; char *secstrings, *secname; void *aseg; +#ifdef CONFIG_FUNCTION_TRACER + int ret; +#endif if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_disable && me->arch.plt_size) { @@ -507,6 +544,14 @@ int module_finalize(const Elf_Ehdr *hdr, if (IS_ENABLED(CONFIG_EXPOLINE) && (str_has_prefix(secname, ".s390_return"))) nospec_revert(aseg, aseg + s->sh_size); + +#ifdef CONFIG_FUNCTION_TRACER + if (!strcmp(FTRACE_CALLSITE_SECTION, secname)) { + ret = module_alloc_ftrace_hotpatch_trampolines(me, s); + if (ret < 0) + return ret; + } +#endif /* CONFIG_FUNCTION_TRACER */ } jump_label_apply_nops(me); diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c index 5a7420b23aa8..4bef35b79b93 100644 --- a/arch/s390/kernel/os_info.c +++ b/arch/s390/kernel/os_info.c @@ -121,7 +121,7 @@ static void os_info_old_init(void) if (os_info_init) return; - if (!OLDMEM_BASE) + if (!oldmem_data.start) goto fail; if (copy_oldmem_kernel(&addr, &S390_lowcore.os_info, sizeof(addr))) goto fail; diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index d7dc36ec0a60..2e3bb633acf6 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -1138,7 +1138,7 @@ static long cfset_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { int ret; - get_online_cpus(); + cpus_read_lock(); mutex_lock(&cfset_ctrset_mutex); switch (cmd) { case S390_HWCTR_START: @@ -1155,7 +1155,7 @@ static long cfset_ioctl(struct file *file, unsigned int cmd, unsigned long arg) break; } mutex_unlock(&cfset_ctrset_mutex); - put_online_cpus(); + cpus_read_unlock(); return ret; } diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 82df39b17bb5..d9d4a806979e 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -11,6 +11,7 @@ #include <linux/cpufeature.h> #include <linux/bitops.h> #include <linux/kernel.h> +#include <linux/random.h> #include <linux/sched/mm.h> #include <linux/init.h> #include <linux/seq_file.h> @@ -23,8 +24,12 @@ #include <asm/elf.h> #include <asm/lowcore.h> #include <asm/param.h> +#include <asm/sclp.h> #include <asm/smp.h> +unsigned long __read_mostly elf_hwcap; +char elf_platform[ELF_PLATFORM_SIZE]; + struct cpu_info { unsigned int cpu_mhz_dynamic; unsigned int cpu_mhz_static; @@ -113,15 +118,33 @@ static void show_facilities(struct seq_file *m) static void show_cpu_summary(struct seq_file *m, void *v) { static const char *hwcap_str[] = { - "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", - "edat", "etf3eh", "highgprs", "te", "vx", "vxd", "vxe", "gs", - "vxe2", "vxp", "sort", "dflt" - }; - static const char * const int_hwcap_str[] = { - "sie" + [HWCAP_NR_ESAN3] = "esan3", + [HWCAP_NR_ZARCH] = "zarch", + [HWCAP_NR_STFLE] = "stfle", + [HWCAP_NR_MSA] = "msa", + [HWCAP_NR_LDISP] = "ldisp", + [HWCAP_NR_EIMM] = "eimm", + [HWCAP_NR_DFP] = "dfp", + [HWCAP_NR_HPAGE] = "edat", + [HWCAP_NR_ETF3EH] = "etf3eh", + [HWCAP_NR_HIGH_GPRS] = "highgprs", + [HWCAP_NR_TE] = "te", + [HWCAP_NR_VXRS] = "vx", + [HWCAP_NR_VXRS_BCD] = "vxd", + [HWCAP_NR_VXRS_EXT] = "vxe", + [HWCAP_NR_GS] = "gs", + [HWCAP_NR_VXRS_EXT2] = "vxe2", + [HWCAP_NR_VXRS_PDE] = "vxp", + [HWCAP_NR_SORT] = "sort", + [HWCAP_NR_DFLT] = "dflt", + [HWCAP_NR_VXRS_PDE2] = "vxp2", + [HWCAP_NR_NNPA] = "nnpa", + [HWCAP_NR_PCI_MIO] = "pcimio", + [HWCAP_NR_SIE] = "sie", }; int i, cpu; + BUILD_BUG_ON(ARRAY_SIZE(hwcap_str) != HWCAP_NR_MAX); seq_printf(m, "vendor_id : IBM/S390\n" "# processors : %i\n" "bogomips per cpu: %lu.%02lu\n", @@ -132,9 +155,6 @@ static void show_cpu_summary(struct seq_file *m, void *v) for (i = 0; i < ARRAY_SIZE(hwcap_str); i++) if (hwcap_str[i] && (elf_hwcap & (1UL << i))) seq_printf(m, "%s ", hwcap_str[i]); - for (i = 0; i < ARRAY_SIZE(int_hwcap_str); i++) - if (int_hwcap_str[i] && (int_hwcap & (1UL << i))) - seq_printf(m, "%s ", int_hwcap_str[i]); seq_puts(m, "\n"); show_facilities(m); show_cacheinfo(m); @@ -149,6 +169,141 @@ static void show_cpu_summary(struct seq_file *m, void *v) } } +static int __init setup_hwcaps(void) +{ + /* instructions named N3, "backported" to esa-mode */ + if (test_facility(0)) + elf_hwcap |= HWCAP_ESAN3; + + /* z/Architecture mode active */ + elf_hwcap |= HWCAP_ZARCH; + + /* store-facility-list-extended */ + if (test_facility(7)) + elf_hwcap |= HWCAP_STFLE; + + /* message-security assist */ + if (test_facility(17)) + elf_hwcap |= HWCAP_MSA; + + /* long-displacement */ + if (test_facility(19)) + elf_hwcap |= HWCAP_LDISP; + + /* extended-immediate */ + if (test_facility(21)) + elf_hwcap |= HWCAP_EIMM; + + /* extended-translation facility 3 enhancement */ + if (test_facility(22) && test_facility(30)) + elf_hwcap |= HWCAP_ETF3EH; + + /* decimal floating point & perform floating point operation */ + if (test_facility(42) && test_facility(44)) + elf_hwcap |= HWCAP_DFP; + + /* huge page support */ + if (MACHINE_HAS_EDAT1) + elf_hwcap |= HWCAP_HPAGE; + + /* 64-bit register support for 31-bit processes */ + elf_hwcap |= HWCAP_HIGH_GPRS; + + /* transactional execution */ + if (MACHINE_HAS_TE) + elf_hwcap |= HWCAP_TE; + + /* + * Vector extension can be disabled with the "novx" parameter. + * Use MACHINE_HAS_VX instead of facility bit 129. + */ + if (MACHINE_HAS_VX) { + elf_hwcap |= HWCAP_VXRS; + if (test_facility(134)) + elf_hwcap |= HWCAP_VXRS_BCD; + if (test_facility(135)) + elf_hwcap |= HWCAP_VXRS_EXT; + if (test_facility(148)) + elf_hwcap |= HWCAP_VXRS_EXT2; + if (test_facility(152)) + elf_hwcap |= HWCAP_VXRS_PDE; + if (test_facility(192)) + elf_hwcap |= HWCAP_VXRS_PDE2; + } + + if (test_facility(150)) + elf_hwcap |= HWCAP_SORT; + + if (test_facility(151)) + elf_hwcap |= HWCAP_DFLT; + + if (test_facility(165)) + elf_hwcap |= HWCAP_NNPA; + + /* guarded storage */ + if (MACHINE_HAS_GS) + elf_hwcap |= HWCAP_GS; + + if (MACHINE_HAS_PCI_MIO) + elf_hwcap |= HWCAP_PCI_MIO; + + /* virtualization support */ + if (sclp.has_sief2) + elf_hwcap |= HWCAP_SIE; + + return 0; +} +arch_initcall(setup_hwcaps); + +static int __init setup_elf_platform(void) +{ + struct cpuid cpu_id; + + get_cpu_id(&cpu_id); + add_device_randomness(&cpu_id, sizeof(cpu_id)); + switch (cpu_id.machine) { + case 0x2064: + case 0x2066: + default: /* Use "z900" as default for 64 bit kernels. */ + strcpy(elf_platform, "z900"); + break; + case 0x2084: + case 0x2086: + strcpy(elf_platform, "z990"); + break; + case 0x2094: + case 0x2096: + strcpy(elf_platform, "z9-109"); + break; + case 0x2097: + case 0x2098: + strcpy(elf_platform, "z10"); + break; + case 0x2817: + case 0x2818: + strcpy(elf_platform, "z196"); + break; + case 0x2827: + case 0x2828: + strcpy(elf_platform, "zEC12"); + break; + case 0x2964: + case 0x2965: + strcpy(elf_platform, "z13"); + break; + case 0x3906: + case 0x3907: + strcpy(elf_platform, "z14"); + break; + case 0x8561: + case 0x8562: + strcpy(elf_platform, "z15"); + break; + } + return 0; +} +arch_initcall(setup_elf_platform); + static void show_cpu_topology(struct seq_file *m, unsigned long n) { #ifdef CONFIG_SCHED_TOPOLOGY @@ -210,7 +365,7 @@ static inline void *c_update(loff_t *pos) static void *c_start(struct seq_file *m, loff_t *pos) { - get_online_cpus(); + cpus_read_lock(); return c_update(pos); } @@ -222,7 +377,7 @@ static void *c_next(struct seq_file *m, void *v, loff_t *pos) static void c_stop(struct seq_file *m, void *v) { - put_online_cpus(); + cpus_read_unlock(); } const struct seq_operations cpuinfo_op = { diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index ff0f9e838916..fe14beb338e5 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -89,27 +89,71 @@ EXPORT_SYMBOL(console_devno); unsigned int console_irq = -1; EXPORT_SYMBOL(console_irq); -unsigned long elf_hwcap __read_mostly = 0; -char elf_platform[ELF_PLATFORM_SIZE]; +/* + * Some code and data needs to stay below 2 GB, even when the kernel would be + * relocated above 2 GB, because it has to use 31 bit addresses. + * Such code and data is part of the .amode31 section. + */ +unsigned long __amode31_ref __samode31 = __pa(&_samode31); +unsigned long __amode31_ref __eamode31 = __pa(&_eamode31); +unsigned long __amode31_ref __stext_amode31 = __pa(&_stext_amode31); +unsigned long __amode31_ref __etext_amode31 = __pa(&_etext_amode31); +struct exception_table_entry __amode31_ref *__start_amode31_ex_table = _start_amode31_ex_table; +struct exception_table_entry __amode31_ref *__stop_amode31_ex_table = _stop_amode31_ex_table; + +/* + * Control registers CR2, CR5 and CR15 are initialized with addresses + * of tables that must be placed below 2G which is handled by the AMODE31 + * sections. + * Because the AMODE31 sections are relocated below 2G at startup, + * the content of control registers CR2, CR5 and CR15 must be updated + * with new addresses after the relocation. The initial initialization of + * control registers occurs in head64.S and then gets updated again after AMODE31 + * relocation. We must access the relevant AMODE31 tables indirectly via + * pointers placed in the .amode31.refs linker section. Those pointers get + * updated automatically during AMODE31 relocation and always contain a valid + * address within AMODE31 sections. + */ + +static __amode31_data u32 __ctl_duct_amode31[16] __aligned(64); + +static __amode31_data u64 __ctl_aste_amode31[8] __aligned(64) = { + [1] = 0xffffffffffffffff +}; + +static __amode31_data u32 __ctl_duald_amode31[32] __aligned(128) = { + 0x80000000, 0, 0, 0, + 0x80000000, 0, 0, 0, + 0x80000000, 0, 0, 0, + 0x80000000, 0, 0, 0, + 0x80000000, 0, 0, 0, + 0x80000000, 0, 0, 0, + 0x80000000, 0, 0, 0, + 0x80000000, 0, 0, 0 +}; + +static __amode31_data u32 __ctl_linkage_stack_amode31[8] __aligned(64) = { + 0, 0, 0x89000000, 0, + 0, 0, 0x8a000000, 0 +}; -unsigned long int_hwcap = 0; +static u64 __amode31_ref *__ctl_aste = __ctl_aste_amode31; +static u32 __amode31_ref *__ctl_duald = __ctl_duald_amode31; +static u32 __amode31_ref *__ctl_linkage_stack = __ctl_linkage_stack_amode31; +static u32 __amode31_ref *__ctl_duct = __ctl_duct_amode31; int __bootdata(noexec_disabled); unsigned long __bootdata(ident_map_size); struct mem_detect_info __bootdata(mem_detect); +struct initrd_data __bootdata(initrd_data); -struct exception_table_entry *__bootdata_preserved(__start_dma_ex_table); -struct exception_table_entry *__bootdata_preserved(__stop_dma_ex_table); -unsigned long __bootdata_preserved(__stext_dma); -unsigned long __bootdata_preserved(__etext_dma); -unsigned long __bootdata_preserved(__sdma); -unsigned long __bootdata_preserved(__edma); unsigned long __bootdata_preserved(__kaslr_offset); unsigned int __bootdata_preserved(zlib_dfltcc_support); EXPORT_SYMBOL(zlib_dfltcc_support); u64 __bootdata_preserved(stfle_fac_list[16]); EXPORT_SYMBOL(stfle_fac_list); u64 __bootdata_preserved(alt_stfle_fac_list[16]); +struct oldmem_data __bootdata_preserved(oldmem_data); unsigned long VMALLOC_START; EXPORT_SYMBOL(VMALLOC_START); @@ -254,7 +298,7 @@ static void __init setup_zfcpdump(void) { if (!is_ipl_type_dump()) return; - if (OLDMEM_BASE) + if (oldmem_data.start) return; strcat(boot_command_line, " cio_ignore=all,!ipldev,!condev"); console_loglevel = 2; @@ -421,7 +465,7 @@ static void __init setup_lowcore_dat_off(void) lc->restart_stack = (unsigned long) restart_stack; lc->restart_fn = (unsigned long) do_restart; lc->restart_data = 0; - lc->restart_source = -1UL; + lc->restart_source = -1U; mcck_stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE); if (!mcck_stack) @@ -450,12 +494,19 @@ static void __init setup_lowcore_dat_off(void) static void __init setup_lowcore_dat_on(void) { + struct lowcore *lc = lowcore_ptr[0]; + __ctl_clear_bit(0, 28); S390_lowcore.external_new_psw.mask |= PSW_MASK_DAT; S390_lowcore.svc_new_psw.mask |= PSW_MASK_DAT; S390_lowcore.program_new_psw.mask |= PSW_MASK_DAT; S390_lowcore.io_new_psw.mask |= PSW_MASK_DAT; + __ctl_store(S390_lowcore.cregs_save_area, 0, 15); __ctl_set_bit(0, 28); + mem_assign_absolute(S390_lowcore.restart_flags, RESTART_FLAG_CTLREGS); + mem_assign_absolute(S390_lowcore.program_new_psw, lc->program_new_psw); + memcpy_absolute(&S390_lowcore.cregs_save_area, lc->cregs_save_area, + sizeof(S390_lowcore.cregs_save_area)); } static struct resource code_resource = { @@ -610,9 +661,9 @@ static void __init reserve_crashkernel(void) return; } - low = crash_base ?: OLDMEM_BASE; + low = crash_base ?: oldmem_data.start; high = low + crash_size; - if (low >= OLDMEM_BASE && high <= OLDMEM_BASE + OLDMEM_SIZE) { + if (low >= oldmem_data.start && high <= oldmem_data.start + oldmem_data.size) { /* The crashkernel fits into OLDMEM, reuse OLDMEM */ crash_base = low; } else { @@ -639,7 +690,7 @@ static void __init reserve_crashkernel(void) if (register_memory_notifier(&kdump_mem_nb)) return; - if (!OLDMEM_BASE && MACHINE_IS_VM) + if (!oldmem_data.start && MACHINE_IS_VM) diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size)); crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; @@ -658,11 +709,11 @@ static void __init reserve_crashkernel(void) static void __init reserve_initrd(void) { #ifdef CONFIG_BLK_DEV_INITRD - if (!INITRD_START || !INITRD_SIZE) + if (!initrd_data.start || !initrd_data.size) return; - initrd_start = INITRD_START; - initrd_end = initrd_start + INITRD_SIZE; - memblock_reserve(INITRD_START, INITRD_SIZE); + initrd_start = initrd_data.start; + initrd_end = initrd_start + initrd_data.size; + memblock_reserve(initrd_data.start, initrd_data.size); #endif } @@ -732,10 +783,10 @@ static void __init memblock_add_mem_detect_info(void) static void __init check_initrd(void) { #ifdef CONFIG_BLK_DEV_INITRD - if (INITRD_START && INITRD_SIZE && - !memblock_is_region_memory(INITRD_START, INITRD_SIZE)) { + if (initrd_data.start && initrd_data.size && + !memblock_is_region_memory(initrd_data.start, initrd_data.size)) { pr_err("The initial RAM disk does not fit into the memory\n"); - memblock_free(INITRD_START, INITRD_SIZE); + memblock_free(initrd_data.start, initrd_data.size); initrd_start = initrd_end = 0; } #endif @@ -748,10 +799,10 @@ static void __init reserve_kernel(void) { unsigned long start_pfn = PFN_UP(__pa(_end)); - memblock_reserve(0, HEAD_END); + memblock_reserve(0, STARTUP_NORMAL_OFFSET); + memblock_reserve((unsigned long)sclp_early_sccb, EXT_SCCB_READ_SCP); memblock_reserve((unsigned long)_stext, PFN_PHYS(start_pfn) - (unsigned long)_stext); - memblock_reserve(__sdma, __edma - __sdma); } static void __init setup_memory(void) @@ -771,152 +822,52 @@ static void __init setup_memory(void) memblock_enforce_memory_limit(memblock_end_of_DRAM()); } -/* - * Setup hardware capabilities. - */ -static int __init setup_hwcaps(void) +static void __init relocate_amode31_section(void) { - static const int stfl_bits[6] = { 0, 2, 7, 17, 19, 21 }; - struct cpuid cpu_id; - int i; - - /* - * The store facility list bits numbers as found in the principles - * of operation are numbered with bit 1UL<<31 as number 0 to - * bit 1UL<<0 as number 31. - * Bit 0: instructions named N3, "backported" to esa-mode - * Bit 2: z/Architecture mode is active - * Bit 7: the store-facility-list-extended facility is installed - * Bit 17: the message-security assist is installed - * Bit 19: the long-displacement facility is installed - * Bit 21: the extended-immediate facility is installed - * Bit 22: extended-translation facility 3 is installed - * Bit 30: extended-translation facility 3 enhancement facility - * These get translated to: - * HWCAP_S390_ESAN3 bit 0, HWCAP_S390_ZARCH bit 1, - * HWCAP_S390_STFLE bit 2, HWCAP_S390_MSA bit 3, - * HWCAP_S390_LDISP bit 4, HWCAP_S390_EIMM bit 5 and - * HWCAP_S390_ETF3EH bit 8 (22 && 30). - */ - for (i = 0; i < 6; i++) - if (test_facility(stfl_bits[i])) - elf_hwcap |= 1UL << i; - - if (test_facility(22) && test_facility(30)) - elf_hwcap |= HWCAP_S390_ETF3EH; - - /* - * Check for additional facilities with store-facility-list-extended. - * stfle stores doublewords (8 byte) with bit 1ULL<<63 as bit 0 - * and 1ULL<<0 as bit 63. Bits 0-31 contain the same information - * as stored by stfl, bits 32-xxx contain additional facilities. - * How many facility words are stored depends on the number of - * doublewords passed to the instruction. The additional facilities - * are: - * Bit 42: decimal floating point facility is installed - * Bit 44: perform floating point operation facility is installed - * translated to: - * HWCAP_S390_DFP bit 6 (42 && 44). - */ - if ((elf_hwcap & (1UL << 2)) && test_facility(42) && test_facility(44)) - elf_hwcap |= HWCAP_S390_DFP; - - /* - * Huge page support HWCAP_S390_HPAGE is bit 7. - */ - if (MACHINE_HAS_EDAT1) - elf_hwcap |= HWCAP_S390_HPAGE; - - /* - * 64-bit register support for 31-bit processes - * HWCAP_S390_HIGH_GPRS is bit 9. - */ - elf_hwcap |= HWCAP_S390_HIGH_GPRS; - - /* - * Transactional execution support HWCAP_S390_TE is bit 10. - */ - if (MACHINE_HAS_TE) - elf_hwcap |= HWCAP_S390_TE; - - /* - * Vector extension HWCAP_S390_VXRS is bit 11. The Vector extension - * can be disabled with the "novx" parameter. Use MACHINE_HAS_VX - * instead of facility bit 129. - */ - if (MACHINE_HAS_VX) { - elf_hwcap |= HWCAP_S390_VXRS; - if (test_facility(134)) - elf_hwcap |= HWCAP_S390_VXRS_BCD; - if (test_facility(135)) - elf_hwcap |= HWCAP_S390_VXRS_EXT; - if (test_facility(148)) - elf_hwcap |= HWCAP_S390_VXRS_EXT2; - if (test_facility(152)) - elf_hwcap |= HWCAP_S390_VXRS_PDE; - } - if (test_facility(150)) - elf_hwcap |= HWCAP_S390_SORT; - if (test_facility(151)) - elf_hwcap |= HWCAP_S390_DFLT; - - /* - * Guarded storage support HWCAP_S390_GS is bit 12. - */ - if (MACHINE_HAS_GS) - elf_hwcap |= HWCAP_S390_GS; - - get_cpu_id(&cpu_id); - add_device_randomness(&cpu_id, sizeof(cpu_id)); - switch (cpu_id.machine) { - case 0x2064: - case 0x2066: - default: /* Use "z900" as default for 64 bit kernels. */ - strcpy(elf_platform, "z900"); - break; - case 0x2084: - case 0x2086: - strcpy(elf_platform, "z990"); - break; - case 0x2094: - case 0x2096: - strcpy(elf_platform, "z9-109"); - break; - case 0x2097: - case 0x2098: - strcpy(elf_platform, "z10"); - break; - case 0x2817: - case 0x2818: - strcpy(elf_platform, "z196"); - break; - case 0x2827: - case 0x2828: - strcpy(elf_platform, "zEC12"); - break; - case 0x2964: - case 0x2965: - strcpy(elf_platform, "z13"); - break; - case 0x3906: - case 0x3907: - strcpy(elf_platform, "z14"); - break; - case 0x8561: - case 0x8562: - strcpy(elf_platform, "z15"); - break; - } - - /* - * Virtualization support HWCAP_INT_SIE is bit 0. - */ - if (sclp.has_sief2) - int_hwcap |= HWCAP_INT_SIE; + unsigned long amode31_addr, amode31_size; + long amode31_offset; + long *ptr; + + /* Allocate a new AMODE31 capable memory region */ + amode31_size = __eamode31 - __samode31; + pr_info("Relocating AMODE31 section of size 0x%08lx\n", amode31_size); + amode31_addr = (unsigned long)memblock_alloc_low(amode31_size, PAGE_SIZE); + if (!amode31_addr) + panic("Failed to allocate memory for AMODE31 section\n"); + amode31_offset = amode31_addr - __samode31; + + /* Move original AMODE31 section to the new one */ + memmove((void *)amode31_addr, (void *)__samode31, amode31_size); + /* Zero out the old AMODE31 section to catch invalid accesses within it */ + memset((void *)__samode31, 0, amode31_size); + + /* Update all AMODE31 region references */ + for (ptr = _start_amode31_refs; ptr != _end_amode31_refs; ptr++) + *ptr += amode31_offset; +} - return 0; +/* This must be called after AMODE31 relocation */ +static void __init setup_cr(void) +{ + union ctlreg2 cr2; + union ctlreg5 cr5; + union ctlreg15 cr15; + + __ctl_duct[1] = (unsigned long)__ctl_aste; + __ctl_duct[2] = (unsigned long)__ctl_aste; + __ctl_duct[4] = (unsigned long)__ctl_duald; + + /* Update control registers CR2, CR5 and CR15 */ + __ctl_store(cr2.val, 2, 2); + __ctl_store(cr5.val, 5, 5); + __ctl_store(cr15.val, 15, 15); + cr2.ducto = (unsigned long)__ctl_duct >> 6; + cr5.pasteo = (unsigned long)__ctl_duct >> 6; + cr15.lsea = (unsigned long)__ctl_linkage_stack >> 3; + __ctl_load(cr2.val, 2, 2); + __ctl_load(cr5.val, 5, 5); + __ctl_load(cr15.val, 15, 15); } -arch_initcall(setup_hwcaps); /* * Add system information as device randomness @@ -1059,6 +1010,9 @@ void __init setup_arch(char **cmdline_p) free_mem_detect_info(); + relocate_amode31_section(); + setup_cr(); + setup_uv(); setup_memory_end(); setup_memory(); diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 78ef53b29958..307f5d99514d 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -533,9 +533,3 @@ void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal) */ restore_saved_sigmask(); } - -void do_notify_resume(struct pt_regs *regs) -{ - tracehook_notify_resume(regs); - rseq_handle_notify_resume(NULL, regs); -} diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 8984711f72ed..2a991e43ead3 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -252,6 +252,7 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask); cpumask_set_cpu(cpu, mm_cpumask(&init_mm)); lc->cpu_nr = cpu; + lc->restart_flags = RESTART_FLAG_CTLREGS; lc->spinlock_lockval = arch_spin_lockval(cpu); lc->spinlock_index = 0; lc->percpu_offset = __per_cpu_offset[cpu]; @@ -294,10 +295,10 @@ static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data) cpu = pcpu - pcpu_devices; lc = lowcore_ptr[cpu]; - lc->restart_stack = lc->nodat_stack; + lc->restart_stack = lc->kernel_stack; lc->restart_fn = (unsigned long) func; lc->restart_data = (unsigned long) data; - lc->restart_source = -1UL; + lc->restart_source = -1U; pcpu_sigp_retry(pcpu, SIGP_RESTART, 0); } @@ -311,12 +312,12 @@ static void __pcpu_delegate(pcpu_delegate_fn *func, void *data) func(data); /* should not return */ } -static void __no_sanitize_address pcpu_delegate(struct pcpu *pcpu, - pcpu_delegate_fn *func, - void *data, unsigned long stack) +static void pcpu_delegate(struct pcpu *pcpu, + pcpu_delegate_fn *func, + void *data, unsigned long stack) { struct lowcore *lc = lowcore_ptr[pcpu - pcpu_devices]; - unsigned long source_cpu = stap(); + unsigned int source_cpu = stap(); __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT); if (pcpu->address == source_cpu) { @@ -569,6 +570,9 @@ static void smp_ctl_bit_callback(void *info) __ctl_load(cregs, 0, 15); } +static DEFINE_SPINLOCK(ctl_lock); +static unsigned long ctlreg; + /* * Set a bit in a control register of all cpus */ @@ -576,6 +580,11 @@ void smp_ctl_set_bit(int cr, int bit) { struct ec_creg_mask_parms parms = { 1UL << bit, -1UL, cr }; + spin_lock(&ctl_lock); + memcpy_absolute(&ctlreg, &S390_lowcore.cregs_save_area[cr], sizeof(ctlreg)); + __set_bit(bit, &ctlreg); + memcpy_absolute(&S390_lowcore.cregs_save_area[cr], &ctlreg, sizeof(ctlreg)); + spin_unlock(&ctl_lock); on_each_cpu(smp_ctl_bit_callback, &parms, 1); } EXPORT_SYMBOL(smp_ctl_set_bit); @@ -587,6 +596,11 @@ void smp_ctl_clear_bit(int cr, int bit) { struct ec_creg_mask_parms parms = { 0, ~(1UL << bit), cr }; + spin_lock(&ctl_lock); + memcpy_absolute(&ctlreg, &S390_lowcore.cregs_save_area[cr], sizeof(ctlreg)); + __clear_bit(bit, &ctlreg); + memcpy_absolute(&S390_lowcore.cregs_save_area[cr], &ctlreg, sizeof(ctlreg)); + spin_unlock(&ctl_lock); on_each_cpu(smp_ctl_bit_callback, &parms, 1); } EXPORT_SYMBOL(smp_ctl_clear_bit); @@ -673,7 +687,7 @@ void __init smp_save_dump_cpus(void) unsigned long page; bool is_boot_cpu; - if (!(OLDMEM_BASE || is_ipl_type_dump())) + if (!(oldmem_data.start || is_ipl_type_dump())) /* No previous system present, normal boot. */ return; /* Allocate a page as dumping area for the store status sigps */ @@ -704,12 +718,12 @@ void __init smp_save_dump_cpus(void) * these registers an SCLP request is required which is * done by drivers/s390/char/zcore.c:init_cpu_info() */ - if (!is_boot_cpu || OLDMEM_BASE) + if (!is_boot_cpu || oldmem_data.start) /* Get the CPU registers */ smp_save_cpu_regs(sa, addr, is_boot_cpu, page); } memblock_free(page, PAGE_SIZE); - diag_dma_ops.diag308_reset(); + diag_amode31_ops.diag308_reset(); pcpu_set_smt(0); } #endif /* CONFIG_CRASH_DUMP */ @@ -793,7 +807,7 @@ static int __smp_rescan_cpus(struct sclp_core_info *info, bool early) u16 core_id; int nr, i; - get_online_cpus(); + cpus_read_lock(); mutex_lock(&smp_cpu_state_mutex); nr = 0; cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask); @@ -816,7 +830,7 @@ static int __smp_rescan_cpus(struct sclp_core_info *info, bool early) nr += smp_add_core(&info->core[i], &avail, configured, early); } mutex_unlock(&smp_cpu_state_mutex); - put_online_cpus(); + cpus_read_unlock(); return nr; } @@ -868,11 +882,19 @@ void __init smp_detect_cpus(void) memblock_free_early((unsigned long)info, sizeof(*info)); } -static void smp_init_secondary(void) +/* + * Activate a secondary processor. + */ +static void smp_start_secondary(void *cpuvoid) { int cpu = raw_smp_processor_id(); S390_lowcore.last_update_clock = get_tod_clock(); + S390_lowcore.restart_stack = (unsigned long)restart_stack; + S390_lowcore.restart_fn = (unsigned long)do_restart; + S390_lowcore.restart_data = 0; + S390_lowcore.restart_source = -1U; + S390_lowcore.restart_flags = 0; restore_access_regs(S390_lowcore.access_regs_save_area); cpu_init(); rcu_cpu_starting(cpu); @@ -892,20 +914,6 @@ static void smp_init_secondary(void) cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } -/* - * Activate a secondary processor. - */ -static void __no_sanitize_address smp_start_secondary(void *cpuvoid) -{ - S390_lowcore.restart_stack = (unsigned long) restart_stack; - S390_lowcore.restart_fn = (unsigned long) do_restart; - S390_lowcore.restart_data = 0; - S390_lowcore.restart_source = -1UL; - __ctl_load(S390_lowcore.cregs_save_area, 0, 15); - __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT); - call_on_stack_noreturn(smp_init_secondary, S390_lowcore.kernel_stack); -} - /* Upping and downing of CPUs */ int __cpu_up(unsigned int cpu, struct task_struct *tidle) { @@ -1055,7 +1063,7 @@ static ssize_t cpu_configure_store(struct device *dev, return -EINVAL; if (val != 0 && val != 1) return -EINVAL; - get_online_cpus(); + cpus_read_lock(); mutex_lock(&smp_cpu_state_mutex); rc = -EBUSY; /* disallow configuration changes of online cpus and cpu 0 */ @@ -1104,7 +1112,7 @@ static ssize_t cpu_configure_store(struct device *dev, } out: mutex_unlock(&smp_cpu_state_mutex); - put_online_cpus(); + cpus_read_unlock(); return rc ? rc : count; } static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store); diff --git a/arch/s390/boot/text_dma.S b/arch/s390/kernel/text_amode31.S index 5ff5fee02801..868e4a604110 100644 --- a/arch/s390/boot/text_dma.S +++ b/arch/s390/kernel/text_amode31.S @@ -9,14 +9,14 @@ #include <asm/errno.h> #include <asm/sigp.h> - .section .dma.text,"ax" + .section .amode31.text,"ax" /* * Simplified version of expoline thunk. The normal thunks can not be used here, * because they might be more than 2 GB away, and not reachable by the relative * branch. No comdat, exrl, etc. optimizations used here, because it only * affects a few functions that are not performance-relevant. */ - .macro BR_EX_DMA_r14 + .macro BR_EX_AMODE31_r14 larl %r1,0f ex 0,0(%r1) j . @@ -24,9 +24,9 @@ .endm /* - * int _diag14_dma(unsigned long rx, unsigned long ry1, unsigned long subcode) + * int _diag14_amode31(unsigned long rx, unsigned long ry1, unsigned long subcode) */ -ENTRY(_diag14_dma) +ENTRY(_diag14_amode31) lgr %r1,%r2 lgr %r2,%r3 lgr %r3,%r4 @@ -39,14 +39,14 @@ ENTRY(_diag14_dma) .Ldiag14_fault: sam64 lgfr %r2,%r5 - BR_EX_DMA_r14 - EX_TABLE_DMA(.Ldiag14_ex, .Ldiag14_fault) -ENDPROC(_diag14_dma) + BR_EX_AMODE31_r14 + EX_TABLE_AMODE31(.Ldiag14_ex, .Ldiag14_fault) +ENDPROC(_diag14_amode31) /* - * int _diag210_dma(struct diag210 *addr) + * int _diag210_amode31(struct diag210 *addr) */ -ENTRY(_diag210_dma) +ENTRY(_diag210_amode31) lgr %r1,%r2 lhi %r2,-1 sam31 @@ -57,40 +57,40 @@ ENTRY(_diag210_dma) .Ldiag210_fault: sam64 lgfr %r2,%r2 - BR_EX_DMA_r14 - EX_TABLE_DMA(.Ldiag210_ex, .Ldiag210_fault) -ENDPROC(_diag210_dma) + BR_EX_AMODE31_r14 + EX_TABLE_AMODE31(.Ldiag210_ex, .Ldiag210_fault) +ENDPROC(_diag210_amode31) /* - * int _diag26c_dma(void *req, void *resp, enum diag26c_sc subcode) + * int _diag26c_amode31(void *req, void *resp, enum diag26c_sc subcode) */ -ENTRY(_diag26c_dma) +ENTRY(_diag26c_amode31) lghi %r5,-EOPNOTSUPP sam31 diag %r2,%r4,0x26c .Ldiag26c_ex: sam64 lgfr %r2,%r5 - BR_EX_DMA_r14 - EX_TABLE_DMA(.Ldiag26c_ex, .Ldiag26c_ex) -ENDPROC(_diag26c_dma) + BR_EX_AMODE31_r14 + EX_TABLE_AMODE31(.Ldiag26c_ex, .Ldiag26c_ex) +ENDPROC(_diag26c_amode31) /* - * void _diag0c_dma(struct hypfs_diag0c_entry *entry) + * void _diag0c_amode31(struct hypfs_diag0c_entry *entry) */ -ENTRY(_diag0c_dma) +ENTRY(_diag0c_amode31) sam31 diag %r2,%r2,0x0c sam64 - BR_EX_DMA_r14 -ENDPROC(_diag0c_dma) + BR_EX_AMODE31_r14 +ENDPROC(_diag0c_amode31) /* - * void _diag308_reset_dma(void) + * void _diag308_reset_amode31(void) * * Calls diag 308 subcode 1 and continues execution */ -ENTRY(_diag308_reset_dma) +ENTRY(_diag308_reset_amode31) larl %r4,.Lctlregs # Save control registers stctg %c0,%c15,0(%r4) lg %r2,0(%r4) # Disable lowcore protection @@ -107,7 +107,7 @@ ENTRY(_diag308_reset_dma) larl %r4,.Lcontinue_psw # Save PSW flags epsw %r2,%r3 stm %r2,%r3,0(%r4) - larl %r4,restart_part2 # Setup restart PSW at absolute 0 + larl %r4,.Lrestart_part2 # Setup restart PSW at absolute 0 larl %r3,.Lrestart_diag308_psw og %r4,0(%r3) # Save PSW lghi %r3,0 @@ -115,7 +115,7 @@ ENTRY(_diag308_reset_dma) lghi %r1,1 lghi %r0,0 diag %r0,%r1,0x308 -restart_part2: +.Lrestart_part2: lhi %r0,0 # Load r0 with zero lhi %r1,2 # Use mode 2 = ESAME (dump) sigp %r1,%r0,SIGP_SET_ARCHITECTURE # Switch to ESAME mode @@ -127,19 +127,21 @@ restart_part2: larl %r4,.Lprefix # Restore prefix register spx 0(%r4) larl %r4,.Lcontinue_psw # Restore PSW flags + larl %r2,.Lcontinue + stg %r2,8(%r4) lpswe 0(%r4) .Lcontinue: - BR_EX_DMA_r14 -ENDPROC(_diag308_reset_dma) + BR_EX_AMODE31_r14 +ENDPROC(_diag308_reset_amode31) - .section .dma.data,"aw",@progbits + .section .amode31.data,"aw",@progbits .align 8 .Lrestart_diag308_psw: .long 0x00080000,0x80000000 .align 8 .Lcontinue_psw: - .quad 0,.Lcontinue + .quad 0,0 .align 8 .Lctlreg0: diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 26aa2614ee35..d2458a29618f 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -406,7 +406,7 @@ static ssize_t dispatching_store(struct device *dev, if (val != 0 && val != 1) return -EINVAL; rc = 0; - get_online_cpus(); + cpus_read_lock(); mutex_lock(&smp_cpu_state_mutex); if (cpu_management == val) goto out; @@ -417,7 +417,7 @@ static ssize_t dispatching_store(struct device *dev, topology_expect_change(); out: mutex_unlock(&smp_cpu_state_mutex); - put_online_cpus(); + cpus_read_unlock(); return rc ? rc : count; } static DEVICE_ATTR_RW(dispatching); diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 76947275fe8b..bcefc2173de4 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -291,7 +291,7 @@ static void __init test_monitor_call(void) void __init trap_init(void) { - sort_extable(__start_dma_ex_table, __stop_dma_ex_table); + sort_extable(__start_amode31_ex_table, __stop_amode31_ex_table); local_mcck_enable(); test_monitor_call(); } diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index aeb0a15bcbb7..5a656c7b7a67 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -51,24 +51,9 @@ void __init setup_uv(void) { unsigned long uv_stor_base; - /* - * keep these conditions in line with has_uv_sec_stor_limit() - */ if (!is_prot_virt_host()) return; - if (is_prot_virt_guest()) { - prot_virt_host = 0; - pr_warn("Protected virtualization not available in protected guests."); - return; - } - - if (!test_facility(158)) { - prot_virt_host = 0; - pr_warn("Protected virtualization not supported by the hardware."); - return; - } - uv_stor_base = (unsigned long)memblock_alloc_try_nid( uv_info.uv_base_stor_len, SZ_1M, SZ_2G, MEMBLOCK_ALLOC_ACCESSIBLE, NUMA_NO_NODE); diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile index 3457dcf10396..e3e6ac5686df 100644 --- a/arch/s390/kernel/vdso32/Makefile +++ b/arch/s390/kernel/vdso32/Makefile @@ -36,6 +36,7 @@ CPPFLAGS_vdso32.lds += -P -C -U$(ARCH) GCOV_PROFILE := n UBSAN_SANITIZE := n KASAN_SANITIZE := n +KCSAN_SANITIZE := n # Force dependency (incbin is bad) $(obj)/vdso32_wrapper.o : $(obj)/vdso32.so diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index 2a2092ce19f1..6568de236701 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -39,6 +39,7 @@ CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) GCOV_PROFILE := n UBSAN_SANITIZE := n KASAN_SANITIZE := n +KCSAN_SANITIZE := n # Force dependency (incbin is bad) $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 4c0e19145cc6..63bdb9e1bfc1 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -5,6 +5,7 @@ #include <asm/thread_info.h> #include <asm/page.h> +#include <asm/ftrace.lds.h> /* * Put .bss..swapper_pg_dir as the first thing in .bss. This will @@ -46,6 +47,7 @@ SECTIONS KPROBES_TEXT IRQENTRY_TEXT SOFTIRQENTRY_TEXT + FTRACE_HOTPATCH_TRAMPOLINES_TEXT *(.text.*_indirect_*) *(.fixup) *(.gnu.warning) @@ -71,6 +73,13 @@ SECTIONS RW_DATA(0x100, PAGE_SIZE, THREAD_SIZE) BOOT_DATA_PRESERVED + . = ALIGN(8); + .amode31.refs : { + _start_amode31_refs = .; + *(.amode31.refs) + _end_amode31_refs = .; + } + _edata = .; /* End of data section */ /* will be freed after init */ @@ -136,6 +145,32 @@ SECTIONS BOOT_DATA + /* + * .amode31 section for code, data, ex_table that need to stay + * below 2 GB, even when the kernel is relocated above 2 GB. + */ + . = ALIGN(PAGE_SIZE); + _samode31 = .; + .amode31.text : { + _stext_amode31 = .; + *(.amode31.text) + *(.amode31.text.*_indirect_*) + . = ALIGN(PAGE_SIZE); + _etext_amode31 = .; + } + . = ALIGN(16); + .amode31.ex_table : { + _start_amode31_ex_table = .; + KEEP(*(.amode31.ex_table)) + _stop_amode31_ex_table = .; + } + . = ALIGN(PAGE_SIZE); + .amode31.data : { + *(.amode31.data) + } + . = ALIGN(PAGE_SIZE); + _eamode31 = .; + /* early.c uses stsi, which requires page aligned data. */ . = ALIGN(PAGE_SIZE); INIT_DATA_SECTION(0x100) diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index f289afeb3f31..bccbf394ae7e 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -7,17 +7,10 @@ * Heiko Carstens <heiko.carstens@de.ibm.com>, */ -#include <linux/sched.h> +#include <linux/processor.h> #include <linux/delay.h> -#include <linux/timex.h> -#include <linux/export.h> -#include <linux/irqflags.h> -#include <linux/interrupt.h> -#include <linux/jump_label.h> -#include <linux/irq.h> -#include <asm/vtimer.h> #include <asm/div64.h> -#include <asm/idle.h> +#include <asm/timex.h> void __delay(unsigned long loops) { diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index e40a30647d99..0b0c8c284953 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -4,6 +4,7 @@ #include <linux/seq_file.h> #include <linux/debugfs.h> #include <linux/mm.h> +#include <linux/kfence.h> #include <linux/kasan.h> #include <asm/ptdump.h> #include <asm/kasan.h> @@ -21,6 +22,10 @@ enum address_markers_idx { IDENTITY_BEFORE_END_NR, KERNEL_START_NR, KERNEL_END_NR, +#ifdef CONFIG_KFENCE + KFENCE_START_NR, + KFENCE_END_NR, +#endif IDENTITY_AFTER_NR, IDENTITY_AFTER_END_NR, #ifdef CONFIG_KASAN @@ -40,6 +45,10 @@ static struct addr_marker address_markers[] = { [IDENTITY_BEFORE_END_NR] = {(unsigned long)_stext, "Identity Mapping End"}, [KERNEL_START_NR] = {(unsigned long)_stext, "Kernel Image Start"}, [KERNEL_END_NR] = {(unsigned long)_end, "Kernel Image End"}, +#ifdef CONFIG_KFENCE + [KFENCE_START_NR] = {0, "KFence Pool Start"}, + [KFENCE_END_NR] = {0, "KFence Pool End"}, +#endif [IDENTITY_AFTER_NR] = {(unsigned long)_end, "Identity Mapping Start"}, [IDENTITY_AFTER_END_NR] = {0, "Identity Mapping End"}, #ifdef CONFIG_KASAN @@ -248,6 +257,9 @@ static void sort_address_markers(void) static int pt_dump_init(void) { +#ifdef CONFIG_KFENCE + unsigned long kfence_start = (unsigned long)__kfence_pool; +#endif /* * Figure out the maximum virtual address being accessible with the * kernel ASCE. We need this to keep the page table walker functions @@ -262,6 +274,10 @@ static int pt_dump_init(void) address_markers[VMEMMAP_END_NR].start_address = (unsigned long)vmemmap + vmemmap_size; address_markers[VMALLOC_NR].start_address = VMALLOC_START; address_markers[VMALLOC_END_NR].start_address = VMALLOC_END; +#ifdef CONFIG_KFENCE + address_markers[KFENCE_START_NR].start_address = kfence_start; + address_markers[KFENCE_END_NR].start_address = kfence_start + KFENCE_POOL_SIZE; +#endif sort_address_markers(); #ifdef CONFIG_PTDUMP_DEBUGFS debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index e33c43b38afe..212632d57db9 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -31,6 +31,7 @@ #include <linux/kprobes.h> #include <linux/uaccess.h> #include <linux/hugetlb.h> +#include <linux/kfence.h> #include <asm/asm-offsets.h> #include <asm/diag.h> #include <asm/gmap.h> @@ -230,8 +231,8 @@ const struct exception_table_entry *s390_search_extables(unsigned long addr) { const struct exception_table_entry *fixup; - fixup = search_extable(__start_dma_ex_table, - __stop_dma_ex_table - __start_dma_ex_table, + fixup = search_extable(__start_amode31_ex_table, + __stop_amode31_ex_table - __start_amode31_ex_table, addr); if (!fixup) fixup = search_exception_tables(addr); @@ -356,6 +357,7 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access) unsigned long address; unsigned int flags; vm_fault_t fault; + bool is_write; tsk = current; /* @@ -369,6 +371,8 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access) mm = tsk->mm; trans_exc_code = regs->int_parm_long; + address = trans_exc_code & __FAIL_ADDR_MASK; + is_write = (trans_exc_code & store_indication) == 0x400; /* * Verify that the fault happened in user space, that @@ -379,6 +383,8 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access) type = get_fault_type(regs); switch (type) { case KERNEL_FAULT: + if (kfence_handle_page_fault(address, is_write, regs)) + return 0; goto out; case USER_FAULT: case GMAP_FAULT: @@ -387,12 +393,11 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access) break; } - address = trans_exc_code & __FAIL_ADDR_MASK; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); flags = FAULT_FLAG_DEFAULT; if (user_mode(regs)) flags |= FAULT_FLAG_USER; - if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400) + if (access == VM_WRITE || is_write) flags |= FAULT_FLAG_WRITE; mmap_read_lock(mm); diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 8ac710de1ab1..f3db3caa8447 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -34,6 +34,7 @@ #include <asm/processor.h> #include <linux/uaccess.h> #include <asm/pgalloc.h> +#include <asm/kfence.h> #include <asm/ptdump.h> #include <asm/dma.h> #include <asm/lowcore.h> @@ -200,7 +201,7 @@ void __init mem_init(void) high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); pv_init(); - + kfence_split_mapping(); /* Setup guest page hinting */ cmma_init(); diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c index a0fdc6dc5f9d..3e4735168019 100644 --- a/arch/s390/mm/kasan_init.c +++ b/arch/s390/mm/kasan_init.c @@ -107,6 +107,9 @@ static void __init kasan_early_pgtable_populate(unsigned long address, sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC; } + /* + * The first 1MB of 1:1 mapping is mapped with 4KB pages + */ while (address < end) { pg_dir = pgd_offset_k(address); if (pgd_none(*pg_dir)) { @@ -157,30 +160,26 @@ static void __init kasan_early_pgtable_populate(unsigned long address, pm_dir = pmd_offset(pu_dir, address); if (pmd_none(*pm_dir)) { - if (mode == POPULATE_ZERO_SHADOW && - IS_ALIGNED(address, PMD_SIZE) && + if (IS_ALIGNED(address, PMD_SIZE) && end - address >= PMD_SIZE) { - pmd_populate(&init_mm, pm_dir, - kasan_early_shadow_pte); - address = (address + PMD_SIZE) & PMD_MASK; - continue; - } - /* the first megabyte of 1:1 is mapped with 4k pages */ - if (has_edat && address && end - address >= PMD_SIZE && - mode != POPULATE_ZERO_SHADOW) { - void *page; - - if (mode == POPULATE_ONE2ONE) { - page = (void *)address; - } else { - page = kasan_early_alloc_segment(); - memset(page, 0, _SEGMENT_SIZE); + if (mode == POPULATE_ZERO_SHADOW) { + pmd_populate(&init_mm, pm_dir, kasan_early_shadow_pte); + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } else if (has_edat && address) { + void *page; + + if (mode == POPULATE_ONE2ONE) { + page = (void *)address; + } else { + page = kasan_early_alloc_segment(); + memset(page, 0, _SEGMENT_SIZE); + } + pmd_val(*pm_dir) = __pa(page) | sgt_prot; + address = (address + PMD_SIZE) & PMD_MASK; + continue; } - pmd_val(*pm_dir) = __pa(page) | sgt_prot; - address = (address + PMD_SIZE) & PMD_MASK; - continue; } - pt_dir = kasan_early_pte_alloc(); pmd_populate(&init_mm, pm_dir, pt_dir); } else if (pmd_large(*pm_dir)) { @@ -300,7 +299,7 @@ void __init kasan_early_init(void) pgalloc_low = round_up((unsigned long)_end, _SEGMENT_SIZE); if (IS_ENABLED(CONFIG_BLK_DEV_INITRD)) { initrd_end = - round_up(INITRD_START + INITRD_SIZE, _SEGMENT_SIZE); + round_up(initrd_data.start + initrd_data.size, _SEGMENT_SIZE); pgalloc_low = max(pgalloc_low, initrd_end); } diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index a0f54bd5e98a..9663ce3625bc 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -228,7 +228,7 @@ void *xlate_dev_mem_ptr(phys_addr_t addr) void *bounce = (void *) addr; unsigned long size; - get_online_cpus(); + cpus_read_lock(); preempt_disable(); if (is_swapped(addr)) { size = PAGE_SIZE - (addr & ~PAGE_MASK); @@ -237,7 +237,7 @@ void *xlate_dev_mem_ptr(phys_addr_t addr) memcpy_absolute(bounce, (void *) addr, size); } preempt_enable(); - put_online_cpus(); + cpus_read_unlock(); return bounce; } diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c index 68b153083a92..18a6381097a9 100644 --- a/arch/s390/mm/page-states.c +++ b/arch/s390/mm/page-states.c @@ -228,46 +228,3 @@ void arch_set_page_dat(struct page *page, int order) return; set_page_stable_dat(page, order); } - -void arch_set_page_nodat(struct page *page, int order) -{ - if (cmma_flag < 2) - return; - set_page_stable_nodat(page, order); -} - -int arch_test_page_nodat(struct page *page) -{ - unsigned char state; - - if (cmma_flag < 2) - return 0; - state = get_page_state(page); - return !!(state & 0x20); -} - -void arch_set_page_states(int make_stable) -{ - unsigned long flags, order, t; - struct list_head *l; - struct page *page; - struct zone *zone; - - if (!cmma_flag) - return; - if (make_stable) - drain_local_pages(NULL); - for_each_populated_zone(zone) { - spin_lock_irqsave(&zone->lock, flags); - for_each_migratetype_order(order, t) { - list_for_each(l, &zone->free_area[order].free_list[t]) { - page = list_entry(l, struct page, lru); - if (make_stable) - set_page_stable_dat(page, order); - else - set_page_unused(page, order); - } - } - spin_unlock_irqrestore(&zone->lock, flags); - } -} diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index ed8e5b3575d5..fdc86c0e4e6c 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -8,6 +8,7 @@ #include <asm/cacheflush.h> #include <asm/facility.h> #include <asm/pgalloc.h> +#include <asm/kfence.h> #include <asm/page.h> #include <asm/set_memory.h> @@ -85,6 +86,8 @@ static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end, { pte_t *ptep, new; + if (flags == SET_MEMORY_4K) + return 0; ptep = pte_offset_kernel(pmdp, addr); do { new = *ptep; @@ -155,6 +158,7 @@ static int walk_pmd_level(pud_t *pudp, unsigned long addr, unsigned long end, unsigned long flags) { unsigned long next; + int need_split; pmd_t *pmdp; int rc = 0; @@ -164,7 +168,10 @@ static int walk_pmd_level(pud_t *pudp, unsigned long addr, unsigned long end, return -EINVAL; next = pmd_addr_end(addr, end); if (pmd_large(*pmdp)) { - if (addr & ~PMD_MASK || addr + PMD_SIZE > next) { + need_split = !!(flags & SET_MEMORY_4K); + need_split |= !!(addr & ~PMD_MASK); + need_split |= !!(addr + PMD_SIZE > next); + if (need_split) { rc = split_pmd_page(pmdp, addr); if (rc) return rc; @@ -232,6 +239,7 @@ static int walk_pud_level(p4d_t *p4d, unsigned long addr, unsigned long end, unsigned long flags) { unsigned long next; + int need_split; pud_t *pudp; int rc = 0; @@ -241,7 +249,10 @@ static int walk_pud_level(p4d_t *p4d, unsigned long addr, unsigned long end, return -EINVAL; next = pud_addr_end(addr, end); if (pud_large(*pudp)) { - if (addr & ~PUD_MASK || addr + PUD_SIZE > next) { + need_split = !!(flags & SET_MEMORY_4K); + need_split |= !!(addr & ~PUD_MASK); + need_split |= !!(addr + PUD_SIZE > next); + if (need_split) { rc = split_pud_page(pudp, addr); if (rc) break; @@ -316,7 +327,7 @@ int __set_memory(unsigned long addr, int numpages, unsigned long flags) return change_page_attr(addr, addr + numpages * PAGE_SIZE, flags); } -#ifdef CONFIG_DEBUG_PAGEALLOC +#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) static void ipte_range(pte_t *pte, unsigned long address, int nr) { @@ -340,7 +351,7 @@ void __kernel_map_pages(struct page *page, int numpages, int enable) pte_t *pte; for (i = 0; i < numpages;) { - address = page_to_phys(page + i); + address = (unsigned long)page_to_virt(page + i); pte = virt_to_kpte(address); nr = (unsigned long)pte >> ilog2(sizeof(long)); nr = PTRS_PER_PTE - (nr & (PTRS_PER_PTE - 1)); diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 96897fab89dc..2b1c6d916cf9 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -581,7 +581,7 @@ void __init vmem_map_init(void) __set_memory((unsigned long)_sinittext, (unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT, SET_MEMORY_RO | SET_MEMORY_X); - __set_memory(__stext_dma, (__etext_dma - __stext_dma) >> PAGE_SHIFT, + __set_memory(__stext_amode31, (__etext_amode31 - __stext_amode31) >> PAGE_SHIFT, SET_MEMORY_RO | SET_MEMORY_X); /* we need lowcore executable for our LPSWE instructions */ diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index b0993e05affe..e7e6788d75a8 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -113,13 +113,16 @@ int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas, { u64 req = ZPCI_CREATE_REQ(zdev->fh, dmaas, ZPCI_MOD_FC_REG_IOAT); struct zpci_fib fib = {0}; - u8 status; + u8 cc, status; WARN_ON_ONCE(iota & 0x3fff); fib.pba = base; fib.pal = limit; fib.iota = iota | ZPCI_IOTA_RTTO_FLAG; - return zpci_mod_fc(req, &fib, &status) ? -EIO : 0; + cc = zpci_mod_fc(req, &fib, &status); + if (cc) + zpci_dbg(3, "reg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, status); + return cc; } /* Modify PCI: Unregister I/O address translation parameters */ @@ -130,9 +133,9 @@ int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas) u8 cc, status; cc = zpci_mod_fc(req, &fib, &status); - if (cc == 3) /* Function already gone. */ - cc = 0; - return cc ? -EIO : 0; + if (cc) + zpci_dbg(3, "unreg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, status); + return cc; } /* Modify PCI: Set PCI function measurement parameters */ @@ -560,9 +563,12 @@ static void zpci_cleanup_bus_resources(struct zpci_dev *zdev) int pcibios_add_device(struct pci_dev *pdev) { + struct zpci_dev *zdev = to_zpci(pdev); struct resource *res; int i; + /* The pdev has a reference to the zdev via its bus */ + zpci_zdev_get(zdev); if (pdev->is_physfn) pdev->no_vf_scan = 1; @@ -582,7 +588,10 @@ int pcibios_add_device(struct pci_dev *pdev) void pcibios_release_device(struct pci_dev *pdev) { + struct zpci_dev *zdev = to_zpci(pdev); + zpci_unmap_resources(pdev); + zpci_zdev_put(zdev); } int pcibios_enable_device(struct pci_dev *pdev, int mask) @@ -653,32 +662,37 @@ void zpci_free_domain(int domain) int zpci_enable_device(struct zpci_dev *zdev) { - int rc; - - rc = clp_enable_fh(zdev, ZPCI_NR_DMA_SPACES); - if (rc) - goto out; - - rc = zpci_dma_init_device(zdev); - if (rc) - goto out_dma; + u32 fh = zdev->fh; + int rc = 0; - return 0; - -out_dma: - clp_disable_fh(zdev); -out: + if (clp_enable_fh(zdev, &fh, ZPCI_NR_DMA_SPACES)) + rc = -EIO; + else + zdev->fh = fh; return rc; } int zpci_disable_device(struct zpci_dev *zdev) { - zpci_dma_exit_device(zdev); - /* - * The zPCI function may already be disabled by the platform, this is - * detected in clp_disable_fh() which becomes a no-op. - */ - return clp_disable_fh(zdev); + u32 fh = zdev->fh; + int cc, rc = 0; + + cc = clp_disable_fh(zdev, &fh); + if (!cc) { + zdev->fh = fh; + } else if (cc == CLP_RC_SETPCIFN_ALRDY) { + pr_info("Disabling PCI function %08x had no effect as it was already disabled\n", + zdev->fid); + /* Function is already disabled - update handle */ + rc = clp_refresh_fh(zdev->fid, &fh); + if (!rc) { + zdev->fh = fh; + rc = -EINVAL; + } + } else { + rc = -EIO; + } + return rc; } /** @@ -788,6 +802,11 @@ int zpci_deconfigure_device(struct zpci_dev *zdev) if (zdev->zbus->bus) zpci_bus_remove_device(zdev, false); + if (zdev->dma_table) { + rc = zpci_dma_exit_device(zdev); + if (rc) + return rc; + } if (zdev_enabled(zdev)) { rc = zpci_disable_device(zdev); if (rc) @@ -811,6 +830,8 @@ void zpci_release_device(struct kref *kref) if (zdev->zbus->bus) zpci_bus_remove_device(zdev, false); + if (zdev->dma_table) + zpci_dma_exit_device(zdev); if (zdev_enabled(zdev)) zpci_disable_device(zdev); @@ -822,7 +843,8 @@ void zpci_release_device(struct kref *kref) case ZPCI_FN_STATE_STANDBY: if (zdev->has_hp_slot) zpci_exit_slot(zdev); - zpci_cleanup_bus_resources(zdev); + if (zdev->has_resources) + zpci_cleanup_bus_resources(zdev); zpci_bus_device_unregister(zdev); zpci_destroy_iommu(zdev); fallthrough; @@ -886,7 +908,6 @@ static void zpci_mem_exit(void) } static unsigned int s390_pci_probe __initdata = 1; -static unsigned int s390_pci_no_mio __initdata; unsigned int s390_pci_force_floating __initdata; static unsigned int s390_pci_initialized; @@ -897,7 +918,7 @@ char * __init pcibios_setup(char *str) return NULL; } if (!strcmp(str, "nomio")) { - s390_pci_no_mio = 1; + S390_lowcore.machine_flags &= ~MACHINE_FLAG_PCI_MIO; return NULL; } if (!strcmp(str, "force_floating")) { @@ -928,7 +949,7 @@ static int __init pci_base_init(void) return 0; } - if (test_facility(153) && !s390_pci_no_mio) { + if (MACHINE_HAS_PCI_MIO) { static_branch_enable(&have_mio); ctl_set_bit(2, 5); } diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c index 9629f9779c79..5d77acbd1c87 100644 --- a/arch/s390/pci/pci_bus.c +++ b/arch/s390/pci/pci_bus.c @@ -49,6 +49,11 @@ static int zpci_bus_prepare_device(struct zpci_dev *zdev) rc = zpci_enable_device(zdev); if (rc) return rc; + rc = zpci_dma_init_device(zdev); + if (rc) { + zpci_disable_device(zdev); + return rc; + } } if (!zdev->has_resources) { @@ -343,11 +348,11 @@ static int zpci_bus_add_device(struct zpci_bus *zbus, struct zpci_dev *zdev) { int rc = -EINVAL; - zdev->zbus = zbus; if (zbus->function[zdev->devfn]) { pr_err("devfn %04x is already assigned\n", zdev->devfn); return rc; } + zdev->zbus = zbus; zbus->function[zdev->devfn] = zdev; zpci_nb_devices++; @@ -367,6 +372,7 @@ static int zpci_bus_add_device(struct zpci_bus *zbus, struct zpci_dev *zdev) error: zbus->function[zdev->devfn] = NULL; + zdev->zbus = NULL; zpci_nb_devices--; return rc; } diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h index b877a97e6745..e359d2686178 100644 --- a/arch/s390/pci/pci_bus.h +++ b/arch/s390/pci/pci_bus.h @@ -22,6 +22,11 @@ static inline void zpci_zdev_put(struct zpci_dev *zdev) kref_put(&zdev->kref, zpci_release_device); } +static inline void zpci_zdev_get(struct zpci_dev *zdev) +{ + kref_get(&zdev->kref); +} + int zpci_alloc_domain(int domain); void zpci_free_domain(int domain); int zpci_setup_bus_resources(struct zpci_dev *zdev, diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index d3331596ddbe..51dc2215a2b7 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -212,17 +212,22 @@ out: return rc; } -static int clp_refresh_fh(u32 fid); -/* - * Enable/Disable a given PCI function and update its function handle if - * necessary +/** + * clp_set_pci_fn() - Execute a command on a PCI function + * @zdev: Function that will be affected + * @fh: Out parameter for updated function handle + * @nr_dma_as: DMA address space number + * @command: The command code to execute + * + * Returns: 0 on success, < 0 for Linux errors (e.g. -ENOMEM), and + * > 0 for non-success platform responses */ -static int clp_set_pci_fn(struct zpci_dev *zdev, u8 nr_dma_as, u8 command) +static int clp_set_pci_fn(struct zpci_dev *zdev, u32 *fh, u8 nr_dma_as, u8 command) { struct clp_req_rsp_set_pci *rrb; int rc, retries = 100; - u32 fid = zdev->fid; + *fh = 0; rrb = clp_alloc_block(GFP_KERNEL); if (!rrb) return -ENOMEM; @@ -245,17 +250,13 @@ static int clp_set_pci_fn(struct zpci_dev *zdev, u8 nr_dma_as, u8 command) } } while (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY); - if (rc || rrb->response.hdr.rsp != CLP_RC_OK) { + if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) { + *fh = rrb->response.fh; + } else { zpci_err("Set PCI FN:\n"); zpci_err_clp(rrb->response.hdr.rsp, rc); - } - - if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) { - zdev->fh = rrb->response.fh; - } else if (!rc && rrb->response.hdr.rsp == CLP_RC_SETPCIFN_ALRDY && - rrb->response.fh == 0) { - /* Function is already in desired state - update handle */ - rc = clp_refresh_fh(fid); + if (!rc) + rc = rrb->response.hdr.rsp; } clp_free_block(rrb); return rc; @@ -295,35 +296,62 @@ int clp_setup_writeback_mio(void) return rc; } -int clp_enable_fh(struct zpci_dev *zdev, u8 nr_dma_as) +int clp_enable_fh(struct zpci_dev *zdev, u32 *fh, u8 nr_dma_as) { int rc; - rc = clp_set_pci_fn(zdev, nr_dma_as, CLP_SET_ENABLE_PCI_FN); - zpci_dbg(3, "ena fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc); - if (rc) - goto out; - - if (zpci_use_mio(zdev)) { - rc = clp_set_pci_fn(zdev, nr_dma_as, CLP_SET_ENABLE_MIO); + rc = clp_set_pci_fn(zdev, fh, nr_dma_as, CLP_SET_ENABLE_PCI_FN); + zpci_dbg(3, "ena fid:%x, fh:%x, rc:%d\n", zdev->fid, *fh, rc); + if (!rc && zpci_use_mio(zdev)) { + rc = clp_set_pci_fn(zdev, fh, nr_dma_as, CLP_SET_ENABLE_MIO); zpci_dbg(3, "ena mio fid:%x, fh:%x, rc:%d\n", - zdev->fid, zdev->fh, rc); + zdev->fid, *fh, rc); if (rc) - clp_disable_fh(zdev); + clp_disable_fh(zdev, fh); } -out: return rc; } -int clp_disable_fh(struct zpci_dev *zdev) +int clp_disable_fh(struct zpci_dev *zdev, u32 *fh) { int rc; if (!zdev_enabled(zdev)) return 0; - rc = clp_set_pci_fn(zdev, 0, CLP_SET_DISABLE_PCI_FN); - zpci_dbg(3, "dis fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc); + rc = clp_set_pci_fn(zdev, fh, 0, CLP_SET_DISABLE_PCI_FN); + zpci_dbg(3, "dis fid:%x, fh:%x, rc:%d\n", zdev->fid, *fh, rc); + return rc; +} + +static int clp_list_pci_req(struct clp_req_rsp_list_pci *rrb, + u64 *resume_token, int *nentries) +{ + int rc; + + memset(rrb, 0, sizeof(*rrb)); + rrb->request.hdr.len = sizeof(rrb->request); + rrb->request.hdr.cmd = CLP_LIST_PCI; + /* store as many entries as possible */ + rrb->response.hdr.len = CLP_BLK_SIZE - LIST_PCI_HDR_LEN; + rrb->request.resume_token = *resume_token; + + /* Get PCI function handle list */ + rc = clp_req(rrb, CLP_LPS_PCI); + if (rc || rrb->response.hdr.rsp != CLP_RC_OK) { + zpci_err("List PCI FN:\n"); + zpci_err_clp(rrb->response.hdr.rsp, rc); + return -EIO; + } + + update_uid_checking(rrb->response.uid_checking); + WARN_ON_ONCE(rrb->response.entry_size != + sizeof(struct clp_fh_list_entry)); + + *nentries = (rrb->response.hdr.len - LIST_PCI_HDR_LEN) / + rrb->response.entry_size; + *resume_token = rrb->response.resume_token; + return rc; } @@ -331,38 +359,40 @@ static int clp_list_pci(struct clp_req_rsp_list_pci *rrb, void *data, void (*cb)(struct clp_fh_list_entry *, void *)) { u64 resume_token = 0; - int entries, i, rc; + int nentries, i, rc; do { - memset(rrb, 0, sizeof(*rrb)); - rrb->request.hdr.len = sizeof(rrb->request); - rrb->request.hdr.cmd = CLP_LIST_PCI; - /* store as many entries as possible */ - rrb->response.hdr.len = CLP_BLK_SIZE - LIST_PCI_HDR_LEN; - rrb->request.resume_token = resume_token; - - /* Get PCI function handle list */ - rc = clp_req(rrb, CLP_LPS_PCI); - if (rc || rrb->response.hdr.rsp != CLP_RC_OK) { - zpci_err("List PCI FN:\n"); - zpci_err_clp(rrb->response.hdr.rsp, rc); - rc = -EIO; - goto out; - } + rc = clp_list_pci_req(rrb, &resume_token, &nentries); + if (rc) + return rc; + for (i = 0; i < nentries; i++) + cb(&rrb->response.fh_list[i], data); + } while (resume_token); - update_uid_checking(rrb->response.uid_checking); - WARN_ON_ONCE(rrb->response.entry_size != - sizeof(struct clp_fh_list_entry)); + return rc; +} - entries = (rrb->response.hdr.len - LIST_PCI_HDR_LEN) / - rrb->response.entry_size; +static int clp_find_pci(struct clp_req_rsp_list_pci *rrb, u32 fid, + struct clp_fh_list_entry *entry) +{ + struct clp_fh_list_entry *fh_list; + u64 resume_token = 0; + int nentries, i, rc; - resume_token = rrb->response.resume_token; - for (i = 0; i < entries; i++) - cb(&rrb->response.fh_list[i], data); + do { + rc = clp_list_pci_req(rrb, &resume_token, &nentries); + if (rc) + return rc; + for (i = 0; i < nentries; i++) { + fh_list = rrb->response.fh_list; + if (fh_list[i].fid == fid) { + *entry = fh_list[i]; + return 0; + } + } } while (resume_token); -out: - return rc; + + return -ENODEV; } static void __clp_add(struct clp_fh_list_entry *entry, void *data) @@ -392,67 +422,41 @@ int clp_scan_pci_devices(void) return rc; } -static void __clp_refresh_fh(struct clp_fh_list_entry *entry, void *data) -{ - struct zpci_dev *zdev; - u32 fid = *((u32 *)data); - - if (!entry->vendor_id || fid != entry->fid) - return; - - zdev = get_zdev_by_fid(fid); - if (!zdev) - return; - - zdev->fh = entry->fh; -} - /* - * Refresh the function handle of the function matching @fid + * Get the current function handle of the function matching @fid */ -static int clp_refresh_fh(u32 fid) +int clp_refresh_fh(u32 fid, u32 *fh) { struct clp_req_rsp_list_pci *rrb; + struct clp_fh_list_entry entry; int rc; rrb = clp_alloc_block(GFP_NOWAIT); if (!rrb) return -ENOMEM; - rc = clp_list_pci(rrb, &fid, __clp_refresh_fh); + rc = clp_find_pci(rrb, fid, &entry); + if (!rc) + *fh = entry.fh; clp_free_block(rrb); return rc; } -struct clp_state_data { - u32 fid; - enum zpci_state state; -}; - -static void __clp_get_state(struct clp_fh_list_entry *entry, void *data) -{ - struct clp_state_data *sd = data; - - if (entry->fid != sd->fid) - return; - - sd->state = entry->config_state; -} - int clp_get_state(u32 fid, enum zpci_state *state) { struct clp_req_rsp_list_pci *rrb; - struct clp_state_data sd = {fid, ZPCI_FN_STATE_RESERVED}; + struct clp_fh_list_entry entry; int rc; + *state = ZPCI_FN_STATE_RESERVED; rrb = clp_alloc_block(GFP_ATOMIC); if (!rrb) return -ENOMEM; - rc = clp_list_pci(rrb, &sd, __clp_get_state); + rc = clp_find_pci(rrb, fid, &entry); if (!rc) - *state = sd.state; + *state = entry.config_state; clp_free_block(rrb); return rc; diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index ebc9a49523aa..58f2f7abea96 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -590,10 +590,11 @@ int zpci_dma_init_device(struct zpci_dev *zdev) } } - rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - (u64) zdev->dma_table); - if (rc) + if (zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, + (u64)zdev->dma_table)) { + rc = -EIO; goto free_bitmap; + } return 0; free_bitmap: @@ -608,17 +609,25 @@ out: return rc; } -void zpci_dma_exit_device(struct zpci_dev *zdev) +int zpci_dma_exit_device(struct zpci_dev *zdev) { + int cc = 0; + /* * At this point, if the device is part of an IOMMU domain, this would * be a strong hint towards a bug in the IOMMU API (common) code and/or * simultaneous access via IOMMU and DMA API. So let's issue a warning. */ WARN_ON(zdev->s390_domain); - - if (zpci_unregister_ioat(zdev, 0)) - return; + if (zdev_enabled(zdev)) + cc = zpci_unregister_ioat(zdev, 0); + /* + * cc == 3 indicates the function is gone already. This can happen + * if the function was deconfigured/disabled suddenly and we have not + * received a new handle yet. + */ + if (cc && cc != 3) + return -EIO; dma_cleanup_tables(zdev->dma_table); zdev->dma_table = NULL; @@ -626,8 +635,8 @@ void zpci_dma_exit_device(struct zpci_dev *zdev) zdev->iommu_bitmap = NULL; vfree(zdev->lazy_bitmap); zdev->lazy_bitmap = NULL; - zdev->next_bit = 0; + return 0; } static int __init dma_alloc_cpu_table_caches(void) diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index cd447b96b4b1..c856f80cb21b 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -84,7 +84,10 @@ static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh) /* Even though the device is already gone we still * need to free zPCI resources as part of the disable. */ - zpci_disable_device(zdev); + if (zdev->dma_table) + zpci_dma_exit_device(zdev); + if (zdev_enabled(zdev)) + zpci_disable_device(zdev); zdev->state = ZPCI_FN_STATE_STANDBY; } diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c index 9c7de9089939..3823e159bf74 100644 --- a/arch/s390/pci/pci_irq.c +++ b/arch/s390/pci/pci_irq.c @@ -365,10 +365,6 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev) for_each_pci_msi_entry(msi, pdev) { if (!msi->irq) continue; - if (msi->msi_attrib.is_msix) - __pci_msix_desc_mask_irq(msi, 1); - else - __pci_msi_desc_mask_irq(msi, 1, 1); irq_set_msi_desc(msi->irq, NULL); irq_free_desc(msi->irq); msi->msg.address_lo = 0; diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c index 6e2450c2b9c1..335c281811c7 100644 --- a/arch/s390/pci/pci_sysfs.c +++ b/arch/s390/pci/pci_sysfs.c @@ -82,13 +82,26 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr, pci_lock_rescan_remove(); if (pci_dev_is_added(pdev)) { pci_stop_and_remove_bus_device(pdev); - ret = zpci_disable_device(zdev); - if (ret) - goto out; + if (zdev->dma_table) { + ret = zpci_dma_exit_device(zdev); + if (ret) + goto out; + } + + if (zdev_enabled(zdev)) { + ret = zpci_disable_device(zdev); + if (ret) + goto out; + } ret = zpci_enable_device(zdev); if (ret) goto out; + ret = zpci_dma_init_device(zdev); + if (ret) { + zpci_disable_device(zdev); + goto out; + } pci_rescan_bus(zdev->zbus->bus); } out: diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile index 21c4ebe29b9a..360ada80d20c 100644 --- a/arch/s390/purgatory/Makefile +++ b/arch/s390/purgatory/Makefile @@ -19,6 +19,7 @@ KCOV_INSTRUMENT := n GCOV_PROFILE := n UBSAN_SANITIZE := n KASAN_SANITIZE := n +KCSAN_SANITIZE := n KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare diff --git a/arch/s390/tools/opcodes.txt b/arch/s390/tools/opcodes.txt index 0e207c46e8da..6db9820d104a 100644 --- a/arch/s390/tools/opcodes.txt +++ b/arch/s390/tools/opcodes.txt @@ -189,6 +189,8 @@ ad stosm SI_URD ae sigp RS_RRRD af mc SI_URD b1 lra RX_RRRD +b200 lbear S_RD +b201 stbear S_RD b202 stidp S_RD b204 sck S_RD b205 stck S_RD @@ -523,6 +525,7 @@ b931 clgfr RRE_RR b938 sortl RRE_RR b939 dfltcc RRF_R0RR2 b93a kdsa RRE_RR +b93b nnpa RRE_00 b93c ppno RRE_RR b93e kimd RRE_RR b93f klmd RRE_RR @@ -562,6 +565,7 @@ b987 dlgr RRE_RR b988 alcgr RRE_RR b989 slbgr RRE_RR b98a cspg RRE_RR +b98b rdp RRF_RURR2 b98d epsw RRE_RR b98e idte RRF_RURR2 b98f crdte RRF_RURR2 @@ -876,19 +880,32 @@ e63d vstrl VSI_URDV e63f vstrlr VRS_RRDV e649 vlip VRI_V0UU2 e650 vcvb VRR_RV0UU +e651 vclzdp VRR_VV0U2 e652 vcvbg VRR_RV0UU +e654 vupkzh VRR_VV0U2 +e655 vcnf VRR_VV0UU2 +e656 vclfnh VRR_VV0UU2 e658 vcvd VRI_VR0UU e659 vsrp VRI_VVUUU2 e65a vcvdg VRI_VR0UU e65b vpsop VRI_VVUUU2 +e65c vupkzl VRR_VV0U2 +e65d vcfn VRR_VV0UU2 +e65e vclfnl VRR_VV0UU2 e65f vtp VRR_0V +e670 vpkzr VRI_VVV0UU2 e671 vap VRI_VVV0UU2 +e672 vsrpr VRI_VVV0UU2 e673 vsp VRI_VVV0UU2 +e674 vschp VRR_VVV0U0U +e675 vcrnf VRR_VVV0UU e677 vcp VRR_0VV0U e678 vmp VRI_VVV0UU2 e679 vmsp VRI_VVV0UU2 e67a vdp VRI_VVV0UU2 e67b vrp VRI_VVV0UU2 +e67c vscshp VRR_VVV +e67d vcsph VRR_VVV0U0 e67e vsdp VRI_VVV0UU2 e700 vleb VRX_VRRDU e701 vleh VRX_VRRDU @@ -1081,6 +1098,7 @@ eb61 stric RSY_RDRU eb62 mric RSY_RDRU eb6a asi SIY_IRD eb6e alsi SIY_IRD +eb71 lpswey SIY_URD eb7a agsi SIY_IRD eb7e algsi SIY_IRD eb80 icmh RSY_RURD diff --git a/arch/sh/boards/mach-se/7343/irq.c b/arch/sh/boards/mach-se/7343/irq.c index 1aedbfe32654..f9f3b14f70d5 100644 --- a/arch/sh/boards/mach-se/7343/irq.c +++ b/arch/sh/boards/mach-se/7343/irq.c @@ -38,7 +38,7 @@ static void se7343_irq_demux(struct irq_desc *desc) mask = ioread16(se7343_irq_regs + PA_CPLD_ST_REG); for_each_set_bit(bit, &mask, SE7343_FPGA_IRQ_NR) - generic_handle_irq(irq_linear_revmap(se7343_irq_domain, bit)); + generic_handle_domain_irq(se7343_irq_domain, bit); chip->irq_unmask(data); } diff --git a/arch/sh/boards/mach-se/7722/irq.c b/arch/sh/boards/mach-se/7722/irq.c index 6d34592767f8..efa96edd47dc 100644 --- a/arch/sh/boards/mach-se/7722/irq.c +++ b/arch/sh/boards/mach-se/7722/irq.c @@ -37,7 +37,7 @@ static void se7722_irq_demux(struct irq_desc *desc) mask = ioread16(se7722_irq_regs + IRQ01_STS_REG); for_each_set_bit(bit, &mask, SE7722_FPGA_IRQ_NR) - generic_handle_irq(irq_linear_revmap(se7722_irq_domain, bit)); + generic_handle_domain_irq(se7722_irq_domain, bit); chip->irq_unmask(data); } diff --git a/arch/sh/boards/mach-x3proto/gpio.c b/arch/sh/boards/mach-x3proto/gpio.c index efc992f641a6..f82d3a6a844a 100644 --- a/arch/sh/boards/mach-x3proto/gpio.c +++ b/arch/sh/boards/mach-x3proto/gpio.c @@ -68,7 +68,7 @@ static void x3proto_gpio_irq_handler(struct irq_desc *desc) mask = __raw_readw(KEYDETR); for_each_set_bit(pin, &mask, NR_BASEBOARD_GPIOS) - generic_handle_irq(irq_linear_revmap(x3proto_irq_domain, pin)); + generic_handle_domain_irq(x3proto_irq_domain, pin); chip->irq_unmask(data); } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 88fb922c23a0..421fa9e38c60 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -119,6 +119,7 @@ config X86 select ARCH_WANT_HUGE_PMD_SHARE select ARCH_WANT_LD_ORPHAN_WARN select ARCH_WANTS_THP_SWAP if X86_64 + select ARCH_HAS_PARANOID_L1D_FLUSH select BUILDTIME_TABLE_SORT select CLKEVT_I8253 select CLOCKSOURCE_VALIDATE_LAST_CYCLE diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 307fd0000a83..d82d01490dd3 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -31,8 +31,8 @@ REALMODE_CFLAGS := -m16 -g -Os -DDISABLE_BRANCH_PROFILING \ REALMODE_CFLAGS += -ffreestanding REALMODE_CFLAGS += -fno-stack-protector -REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -Wno-address-of-packed-member) -REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), $(cc_stack_align4)) +REALMODE_CFLAGS += -Wno-address-of-packed-member +REALMODE_CFLAGS += $(cc_stack_align4) REALMODE_CFLAGS += $(CLANG_FLAGS) export REALMODE_CFLAGS @@ -48,8 +48,7 @@ export BITS # # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383 # -KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -KBUILD_CFLAGS += $(call cc-option,-mno-avx,) +KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx # Intel CET isn't enabled in the kernel KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none) @@ -59,9 +58,8 @@ ifeq ($(CONFIG_X86_32),y) UTS_MACHINE := i386 CHECKFLAGS += -D__i386__ - biarch := $(call cc-option,-m32) - KBUILD_AFLAGS += $(biarch) - KBUILD_CFLAGS += $(biarch) + KBUILD_AFLAGS += -m32 + KBUILD_CFLAGS += -m32 KBUILD_CFLAGS += -msoft-float -mregparm=3 -freg-struct-return @@ -72,7 +70,7 @@ ifeq ($(CONFIG_X86_32),y) # Align the stack to the register width instead of using the default # alignment of 16 bytes. This reduces stack usage and the number of # alignment instructions. - KBUILD_CFLAGS += $(call cc-option,$(cc_stack_align4)) + KBUILD_CFLAGS += $(cc_stack_align4) # CPU-specific tuning. Anything which can be shared with UML should go here. include arch/x86/Makefile_32.cpu @@ -93,7 +91,6 @@ else UTS_MACHINE := x86_64 CHECKFLAGS += -D__x86_64__ - biarch := -m64 KBUILD_AFLAGS += -m64 KBUILD_CFLAGS += -m64 @@ -104,7 +101,7 @@ else KBUILD_CFLAGS += $(call cc-option,-falign-loops=1) # Don't autogenerate traditional x87 instructions - KBUILD_CFLAGS += $(call cc-option,-mno-80387) + KBUILD_CFLAGS += -mno-80387 KBUILD_CFLAGS += $(call cc-option,-mno-fp-ret-in-387) # By default gcc and clang use a stack alignment of 16 bytes for x86. @@ -114,20 +111,17 @@ else # default alignment which keep the stack *mis*aligned. # Furthermore an alignment to the register width reduces stack usage # and the number of alignment instructions. - KBUILD_CFLAGS += $(call cc-option,$(cc_stack_align8)) + KBUILD_CFLAGS += $(cc_stack_align8) # Use -mskip-rax-setup if supported. KBUILD_CFLAGS += $(call cc-option,-mskip-rax-setup) # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu) - cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8) - cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona) - - cflags-$(CONFIG_MCORE2) += \ - $(call cc-option,-march=core2,$(call cc-option,-mtune=generic)) - cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom) \ - $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic)) - cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic) + cflags-$(CONFIG_MK8) += -march=k8 + cflags-$(CONFIG_MPSC) += -march=nocona + cflags-$(CONFIG_MCORE2) += -march=core2 + cflags-$(CONFIG_MATOM) += -march=atom + cflags-$(CONFIG_GENERIC_CPU) += -mtune=generic KBUILD_CFLAGS += $(cflags-y) KBUILD_CFLAGS += -mno-red-zone @@ -158,18 +152,6 @@ export CONFIG_X86_X32_ABI ifdef CONFIG_FUNCTION_GRAPH_TRACER ifndef CONFIG_HAVE_FENTRY ACCUMULATE_OUTGOING_ARGS := 1 - else - ifeq ($(call cc-option-yn, -mfentry), n) - ACCUMULATE_OUTGOING_ARGS := 1 - - # GCC ignores '-maccumulate-outgoing-args' when used with '-Os'. - # If '-Os' is enabled, disable it and print a warning. - ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE - undefine CONFIG_CC_OPTIMIZE_FOR_SIZE - $(warning Disabling CONFIG_CC_OPTIMIZE_FOR_SIZE. Your compiler does not have -mfentry so you cannot optimize for size with CONFIG_FUNCTION_GRAPH_TRACER.) - endif - - endif endif endif @@ -193,7 +175,7 @@ ifdef CONFIG_RETPOLINE # only been fixed starting from gcc stable version 8.4.0 and # onwards, but not for older ones. See gcc bug #86952. ifndef CONFIG_CC_IS_CLANG - KBUILD_CFLAGS += $(call cc-option,-fno-jump-tables) + KBUILD_CFLAGS += -fno-jump-tables endif endif @@ -275,9 +257,10 @@ endif $(BOOT_TARGETS): vmlinux $(Q)$(MAKE) $(build)=$(boot) $@ -PHONY += install bzlilo -install bzlilo: - $(Q)$(MAKE) $(build)=$(boot) $@ +PHONY += install +install: + $(CONFIG_SHELL) $(srctree)/$(boot)/install.sh $(KERNELRELEASE) \ + $(KBUILD_IMAGE) System.map "$(INSTALL_PATH)" PHONY += vdso_install vdso_install: diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index dfbc26a8e924..b5aecb524a8a 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -133,7 +133,7 @@ quiet_cmd_genimage = GENIMAGE $3 cmd_genimage = $(BASH) $(srctree)/$(src)/genimage.sh $2 $3 $(obj)/bzImage \ $(obj)/mtools.conf '$(FDARGS)' $(FDINITRD) -PHONY += bzdisk fdimage fdimage144 fdimage288 hdimage isoimage install +PHONY += bzdisk fdimage fdimage144 fdimage288 hdimage isoimage # This requires write access to /dev/fd0 # All images require syslinux to be installed; hdimage also requires @@ -156,8 +156,3 @@ hdimage: $(imgdeps) isoimage: $(imgdeps) $(call cmd,genimage,isoimage,$(obj)/image.iso) @$(kecho) 'Kernel: $(obj)/image.iso is ready' - -install: - $(CONFIG_SHELL) $(srctree)/$(src)/install.sh \ - $(KERNELRELEASE) $(obj)/bzImage \ - System.map "$(INSTALL_PATH)" diff --git a/arch/x86/boot/compressed/efi_thunk_64.S b/arch/x86/boot/compressed/efi_thunk_64.S index 95a223b3e56a..8bb92e9f4e97 100644 --- a/arch/x86/boot/compressed/efi_thunk_64.S +++ b/arch/x86/boot/compressed/efi_thunk_64.S @@ -5,9 +5,8 @@ * Early support for invoking 32-bit EFI services from a 64-bit kernel. * * Because this thunking occurs before ExitBootServices() we have to - * restore the firmware's 32-bit GDT before we make EFI service calls, - * since the firmware's 32-bit IDT is still currently installed and it - * needs to be able to service interrupts. + * restore the firmware's 32-bit GDT and IDT before we make EFI service + * calls. * * On the plus side, we don't have to worry about mangling 64-bit * addresses into 32-bits because we're executing with an identity @@ -39,7 +38,7 @@ SYM_FUNC_START(__efi64_thunk) /* * Convert x86-64 ABI params to i386 ABI */ - subq $32, %rsp + subq $64, %rsp movl %esi, 0x0(%rsp) movl %edx, 0x4(%rsp) movl %ecx, 0x8(%rsp) @@ -49,14 +48,19 @@ SYM_FUNC_START(__efi64_thunk) leaq 0x14(%rsp), %rbx sgdt (%rbx) + addq $16, %rbx + sidt (%rbx) + /* - * Switch to gdt with 32-bit segments. This is the firmware GDT - * that was installed when the kernel started executing. This - * pointer was saved at the EFI stub entry point in head_64.S. + * Switch to IDT and GDT with 32-bit segments. This is the firmware GDT + * and IDT that was installed when the kernel started executing. The + * pointers were saved at the EFI stub entry point in head_64.S. * * Pass the saved DS selector to the 32-bit code, and use far return to * restore the saved CS selector. */ + leaq efi32_boot_idt(%rip), %rax + lidt (%rax) leaq efi32_boot_gdt(%rip), %rax lgdt (%rax) @@ -67,7 +71,7 @@ SYM_FUNC_START(__efi64_thunk) pushq %rax lretq -1: addq $32, %rsp +1: addq $64, %rsp movq %rdi, %rax pop %rbx @@ -128,10 +132,13 @@ SYM_FUNC_START_LOCAL(efi_enter32) /* * Some firmware will return with interrupts enabled. Be sure to - * disable them before we switch GDTs. + * disable them before we switch GDTs and IDTs. */ cli + lidtl (%ebx) + subl $16, %ebx + lgdtl (%ebx) movl %cr4, %eax @@ -166,6 +173,11 @@ SYM_DATA_START(efi32_boot_gdt) .quad 0 SYM_DATA_END(efi32_boot_gdt) +SYM_DATA_START(efi32_boot_idt) + .word 0 + .quad 0 +SYM_DATA_END(efi32_boot_idt) + SYM_DATA_START(efi32_boot_cs) .word 0 SYM_DATA_END(efi32_boot_cs) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index a2347ded77ea..572c535cf45b 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -319,6 +319,9 @@ SYM_INNER_LABEL(efi32_pe_stub_entry, SYM_L_LOCAL) movw %cs, rva(efi32_boot_cs)(%ebp) movw %ds, rva(efi32_boot_ds)(%ebp) + /* Store firmware IDT descriptor */ + sidtl rva(efi32_boot_idt)(%ebp) + /* Disable paging */ movl %cr0, %eax btrl $X86_CR0_PG_BIT, %eax diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index e36690778497..67c3208b668a 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -668,7 +668,7 @@ static bool process_mem_region(struct mem_vector *region, if (slot_area_index == MAX_SLOT_AREA) { debug_putstr("Aborted e820/efi memmap scan when walking immovable regions(slot_areas full)!\n"); - return 1; + return true; } } #endif diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index d0959e7b809f..f307c93fc90a 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -88,6 +88,12 @@ nhpoly1305-avx2-y := nh-avx2-x86_64.o nhpoly1305-avx2-glue.o obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o +obj-$(CONFIG_CRYPTO_SM4_AESNI_AVX_X86_64) += sm4-aesni-avx-x86_64.o +sm4-aesni-avx-x86_64-y := sm4-aesni-avx-asm_64.o sm4_aesni_avx_glue.o + +obj-$(CONFIG_CRYPTO_SM4_AESNI_AVX2_X86_64) += sm4-aesni-avx2-x86_64.o +sm4-aesni-avx2-x86_64-y := sm4-aesni-avx2-asm_64.o sm4_aesni_avx2_glue.o + quiet_cmd_perlasm = PERLASM $@ cmd_perlasm = $(PERL) $< > $@ $(obj)/%.S: $(src)/%.pl FORCE diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 2144e54a6c89..0fc961bef299 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -849,6 +849,8 @@ static int xts_crypt(struct skcipher_request *req, bool encrypt) return -EINVAL; err = skcipher_walk_virt(&walk, req, false); + if (!walk.nbytes) + return err; if (unlikely(tail > 0 && walk.nbytes < walk.total)) { int blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2; @@ -862,7 +864,10 @@ static int xts_crypt(struct skcipher_request *req, bool encrypt) skcipher_request_set_crypt(&subreq, req->src, req->dst, blocks * AES_BLOCK_SIZE, req->iv); req = &subreq; + err = skcipher_walk_virt(&walk, req, false); + if (err) + return err; } else { tail = 0; } diff --git a/arch/x86/crypto/sm4-aesni-avx-asm_64.S b/arch/x86/crypto/sm4-aesni-avx-asm_64.S new file mode 100644 index 000000000000..fa2c3f50aecb --- /dev/null +++ b/arch/x86/crypto/sm4-aesni-avx-asm_64.S @@ -0,0 +1,589 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM4 Cipher Algorithm, AES-NI/AVX optimized. + * as specified in + * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html + * + * Copyright (C) 2018 Markku-Juhani O. Saarinen <mjos@iki.fi> + * Copyright (C) 2020 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com> + */ + +/* Based on SM4 AES-NI work by libgcrypt and Markku-Juhani O. Saarinen at: + * https://github.com/mjosaarinen/sm4ni + */ + +#include <linux/linkage.h> +#include <asm/frame.h> + +#define rRIP (%rip) + +#define RX0 %xmm0 +#define RX1 %xmm1 +#define MASK_4BIT %xmm2 +#define RTMP0 %xmm3 +#define RTMP1 %xmm4 +#define RTMP2 %xmm5 +#define RTMP3 %xmm6 +#define RTMP4 %xmm7 + +#define RA0 %xmm8 +#define RA1 %xmm9 +#define RA2 %xmm10 +#define RA3 %xmm11 + +#define RB0 %xmm12 +#define RB1 %xmm13 +#define RB2 %xmm14 +#define RB3 %xmm15 + +#define RNOT %xmm0 +#define RBSWAP %xmm1 + + +/* Transpose four 32-bit words between 128-bit vectors. */ +#define transpose_4x4(x0, x1, x2, x3, t1, t2) \ + vpunpckhdq x1, x0, t2; \ + vpunpckldq x1, x0, x0; \ + \ + vpunpckldq x3, x2, t1; \ + vpunpckhdq x3, x2, x2; \ + \ + vpunpckhqdq t1, x0, x1; \ + vpunpcklqdq t1, x0, x0; \ + \ + vpunpckhqdq x2, t2, x3; \ + vpunpcklqdq x2, t2, x2; + +/* pre-SubByte transform. */ +#define transform_pre(x, lo_t, hi_t, mask4bit, tmp0) \ + vpand x, mask4bit, tmp0; \ + vpandn x, mask4bit, x; \ + vpsrld $4, x, x; \ + \ + vpshufb tmp0, lo_t, tmp0; \ + vpshufb x, hi_t, x; \ + vpxor tmp0, x, x; + +/* post-SubByte transform. Note: x has been XOR'ed with mask4bit by + * 'vaeslastenc' instruction. + */ +#define transform_post(x, lo_t, hi_t, mask4bit, tmp0) \ + vpandn mask4bit, x, tmp0; \ + vpsrld $4, x, x; \ + vpand x, mask4bit, x; \ + \ + vpshufb tmp0, lo_t, tmp0; \ + vpshufb x, hi_t, x; \ + vpxor tmp0, x, x; + + +.section .rodata.cst164, "aM", @progbits, 164 +.align 16 + +/* + * Following four affine transform look-up tables are from work by + * Markku-Juhani O. Saarinen, at https://github.com/mjosaarinen/sm4ni + * + * These allow exposing SM4 S-Box from AES SubByte. + */ + +/* pre-SubByte affine transform, from SM4 field to AES field. */ +.Lpre_tf_lo_s: + .quad 0x9197E2E474720701, 0xC7C1B4B222245157 +.Lpre_tf_hi_s: + .quad 0xE240AB09EB49A200, 0xF052B91BF95BB012 + +/* post-SubByte affine transform, from AES field to SM4 field. */ +.Lpost_tf_lo_s: + .quad 0x5B67F2CEA19D0834, 0xEDD14478172BBE82 +.Lpost_tf_hi_s: + .quad 0xAE7201DD73AFDC00, 0x11CDBE62CC1063BF + +/* For isolating SubBytes from AESENCLAST, inverse shift row */ +.Linv_shift_row: + .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b + .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03 + +/* Inverse shift row + Rotate left by 8 bits on 32-bit words with vpshufb */ +.Linv_shift_row_rol_8: + .byte 0x07, 0x00, 0x0d, 0x0a, 0x0b, 0x04, 0x01, 0x0e + .byte 0x0f, 0x08, 0x05, 0x02, 0x03, 0x0c, 0x09, 0x06 + +/* Inverse shift row + Rotate left by 16 bits on 32-bit words with vpshufb */ +.Linv_shift_row_rol_16: + .byte 0x0a, 0x07, 0x00, 0x0d, 0x0e, 0x0b, 0x04, 0x01 + .byte 0x02, 0x0f, 0x08, 0x05, 0x06, 0x03, 0x0c, 0x09 + +/* Inverse shift row + Rotate left by 24 bits on 32-bit words with vpshufb */ +.Linv_shift_row_rol_24: + .byte 0x0d, 0x0a, 0x07, 0x00, 0x01, 0x0e, 0x0b, 0x04 + .byte 0x05, 0x02, 0x0f, 0x08, 0x09, 0x06, 0x03, 0x0c + +/* For CTR-mode IV byteswap */ +.Lbswap128_mask: + .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +/* For input word byte-swap */ +.Lbswap32_mask: + .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 + +.align 4 +/* 4-bit mask */ +.L0f0f0f0f: + .long 0x0f0f0f0f + + +.text +.align 16 + +/* + * void sm4_aesni_avx_crypt4(const u32 *rk, u8 *dst, + * const u8 *src, int nblocks) + */ +.align 8 +SYM_FUNC_START(sm4_aesni_avx_crypt4) + /* input: + * %rdi: round key array, CTX + * %rsi: dst (1..4 blocks) + * %rdx: src (1..4 blocks) + * %rcx: num blocks (1..4) + */ + FRAME_BEGIN + + vmovdqu 0*16(%rdx), RA0; + vmovdqa RA0, RA1; + vmovdqa RA0, RA2; + vmovdqa RA0, RA3; + cmpq $2, %rcx; + jb .Lblk4_load_input_done; + vmovdqu 1*16(%rdx), RA1; + je .Lblk4_load_input_done; + vmovdqu 2*16(%rdx), RA2; + cmpq $3, %rcx; + je .Lblk4_load_input_done; + vmovdqu 3*16(%rdx), RA3; + +.Lblk4_load_input_done: + + vmovdqa .Lbswap32_mask rRIP, RTMP2; + vpshufb RTMP2, RA0, RA0; + vpshufb RTMP2, RA1, RA1; + vpshufb RTMP2, RA2, RA2; + vpshufb RTMP2, RA3, RA3; + + vbroadcastss .L0f0f0f0f rRIP, MASK_4BIT; + vmovdqa .Lpre_tf_lo_s rRIP, RTMP4; + vmovdqa .Lpre_tf_hi_s rRIP, RB0; + vmovdqa .Lpost_tf_lo_s rRIP, RB1; + vmovdqa .Lpost_tf_hi_s rRIP, RB2; + vmovdqa .Linv_shift_row rRIP, RB3; + vmovdqa .Linv_shift_row_rol_8 rRIP, RTMP2; + vmovdqa .Linv_shift_row_rol_16 rRIP, RTMP3; + transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1); + +#define ROUND(round, s0, s1, s2, s3) \ + vbroadcastss (4*(round))(%rdi), RX0; \ + vpxor s1, RX0, RX0; \ + vpxor s2, RX0, RX0; \ + vpxor s3, RX0, RX0; /* s1 ^ s2 ^ s3 ^ rk */ \ + \ + /* sbox, non-linear part */ \ + transform_pre(RX0, RTMP4, RB0, MASK_4BIT, RTMP0); \ + vaesenclast MASK_4BIT, RX0, RX0; \ + transform_post(RX0, RB1, RB2, MASK_4BIT, RTMP0); \ + \ + /* linear part */ \ + vpshufb RB3, RX0, RTMP0; \ + vpxor RTMP0, s0, s0; /* s0 ^ x */ \ + vpshufb RTMP2, RX0, RTMP1; \ + vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) */ \ + vpshufb RTMP3, RX0, RTMP1; \ + vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) ^ rol(x,16) */ \ + vpshufb .Linv_shift_row_rol_24 rRIP, RX0, RTMP1; \ + vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,24) */ \ + vpslld $2, RTMP0, RTMP1; \ + vpsrld $30, RTMP0, RTMP0; \ + vpxor RTMP0, s0, s0; \ + /* s0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \ + vpxor RTMP1, s0, s0; + + leaq (32*4)(%rdi), %rax; +.align 16 +.Lroundloop_blk4: + ROUND(0, RA0, RA1, RA2, RA3); + ROUND(1, RA1, RA2, RA3, RA0); + ROUND(2, RA2, RA3, RA0, RA1); + ROUND(3, RA3, RA0, RA1, RA2); + leaq (4*4)(%rdi), %rdi; + cmpq %rax, %rdi; + jne .Lroundloop_blk4; + +#undef ROUND + + vmovdqa .Lbswap128_mask rRIP, RTMP2; + + transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1); + vpshufb RTMP2, RA0, RA0; + vpshufb RTMP2, RA1, RA1; + vpshufb RTMP2, RA2, RA2; + vpshufb RTMP2, RA3, RA3; + + vmovdqu RA0, 0*16(%rsi); + cmpq $2, %rcx; + jb .Lblk4_store_output_done; + vmovdqu RA1, 1*16(%rsi); + je .Lblk4_store_output_done; + vmovdqu RA2, 2*16(%rsi); + cmpq $3, %rcx; + je .Lblk4_store_output_done; + vmovdqu RA3, 3*16(%rsi); + +.Lblk4_store_output_done: + vzeroall; + FRAME_END + ret; +SYM_FUNC_END(sm4_aesni_avx_crypt4) + +.align 8 +SYM_FUNC_START_LOCAL(__sm4_crypt_blk8) + /* input: + * %rdi: round key array, CTX + * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel + * plaintext blocks + * output: + * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel + * ciphertext blocks + */ + FRAME_BEGIN + + vmovdqa .Lbswap32_mask rRIP, RTMP2; + vpshufb RTMP2, RA0, RA0; + vpshufb RTMP2, RA1, RA1; + vpshufb RTMP2, RA2, RA2; + vpshufb RTMP2, RA3, RA3; + vpshufb RTMP2, RB0, RB0; + vpshufb RTMP2, RB1, RB1; + vpshufb RTMP2, RB2, RB2; + vpshufb RTMP2, RB3, RB3; + + vbroadcastss .L0f0f0f0f rRIP, MASK_4BIT; + transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1); + transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1); + +#define ROUND(round, s0, s1, s2, s3, r0, r1, r2, r3) \ + vbroadcastss (4*(round))(%rdi), RX0; \ + vmovdqa .Lpre_tf_lo_s rRIP, RTMP4; \ + vmovdqa .Lpre_tf_hi_s rRIP, RTMP1; \ + vmovdqa RX0, RX1; \ + vpxor s1, RX0, RX0; \ + vpxor s2, RX0, RX0; \ + vpxor s3, RX0, RX0; /* s1 ^ s2 ^ s3 ^ rk */ \ + vmovdqa .Lpost_tf_lo_s rRIP, RTMP2; \ + vmovdqa .Lpost_tf_hi_s rRIP, RTMP3; \ + vpxor r1, RX1, RX1; \ + vpxor r2, RX1, RX1; \ + vpxor r3, RX1, RX1; /* r1 ^ r2 ^ r3 ^ rk */ \ + \ + /* sbox, non-linear part */ \ + transform_pre(RX0, RTMP4, RTMP1, MASK_4BIT, RTMP0); \ + transform_pre(RX1, RTMP4, RTMP1, MASK_4BIT, RTMP0); \ + vmovdqa .Linv_shift_row rRIP, RTMP4; \ + vaesenclast MASK_4BIT, RX0, RX0; \ + vaesenclast MASK_4BIT, RX1, RX1; \ + transform_post(RX0, RTMP2, RTMP3, MASK_4BIT, RTMP0); \ + transform_post(RX1, RTMP2, RTMP3, MASK_4BIT, RTMP0); \ + \ + /* linear part */ \ + vpshufb RTMP4, RX0, RTMP0; \ + vpxor RTMP0, s0, s0; /* s0 ^ x */ \ + vpshufb RTMP4, RX1, RTMP2; \ + vmovdqa .Linv_shift_row_rol_8 rRIP, RTMP4; \ + vpxor RTMP2, r0, r0; /* r0 ^ x */ \ + vpshufb RTMP4, RX0, RTMP1; \ + vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) */ \ + vpshufb RTMP4, RX1, RTMP3; \ + vmovdqa .Linv_shift_row_rol_16 rRIP, RTMP4; \ + vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) */ \ + vpshufb RTMP4, RX0, RTMP1; \ + vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) ^ rol(x,16) */ \ + vpshufb RTMP4, RX1, RTMP3; \ + vmovdqa .Linv_shift_row_rol_24 rRIP, RTMP4; \ + vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) ^ rol(x,16) */ \ + vpshufb RTMP4, RX0, RTMP1; \ + vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,24) */ \ + /* s0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \ + vpslld $2, RTMP0, RTMP1; \ + vpsrld $30, RTMP0, RTMP0; \ + vpxor RTMP0, s0, s0; \ + vpxor RTMP1, s0, s0; \ + vpshufb RTMP4, RX1, RTMP3; \ + vpxor RTMP3, r0, r0; /* r0 ^ x ^ rol(x,24) */ \ + /* r0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \ + vpslld $2, RTMP2, RTMP3; \ + vpsrld $30, RTMP2, RTMP2; \ + vpxor RTMP2, r0, r0; \ + vpxor RTMP3, r0, r0; + + leaq (32*4)(%rdi), %rax; +.align 16 +.Lroundloop_blk8: + ROUND(0, RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3); + ROUND(1, RA1, RA2, RA3, RA0, RB1, RB2, RB3, RB0); + ROUND(2, RA2, RA3, RA0, RA1, RB2, RB3, RB0, RB1); + ROUND(3, RA3, RA0, RA1, RA2, RB3, RB0, RB1, RB2); + leaq (4*4)(%rdi), %rdi; + cmpq %rax, %rdi; + jne .Lroundloop_blk8; + +#undef ROUND + + vmovdqa .Lbswap128_mask rRIP, RTMP2; + + transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1); + transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1); + vpshufb RTMP2, RA0, RA0; + vpshufb RTMP2, RA1, RA1; + vpshufb RTMP2, RA2, RA2; + vpshufb RTMP2, RA3, RA3; + vpshufb RTMP2, RB0, RB0; + vpshufb RTMP2, RB1, RB1; + vpshufb RTMP2, RB2, RB2; + vpshufb RTMP2, RB3, RB3; + + FRAME_END + ret; +SYM_FUNC_END(__sm4_crypt_blk8) + +/* + * void sm4_aesni_avx_crypt8(const u32 *rk, u8 *dst, + * const u8 *src, int nblocks) + */ +.align 8 +SYM_FUNC_START(sm4_aesni_avx_crypt8) + /* input: + * %rdi: round key array, CTX + * %rsi: dst (1..8 blocks) + * %rdx: src (1..8 blocks) + * %rcx: num blocks (1..8) + */ + FRAME_BEGIN + + cmpq $5, %rcx; + jb sm4_aesni_avx_crypt4; + vmovdqu (0 * 16)(%rdx), RA0; + vmovdqu (1 * 16)(%rdx), RA1; + vmovdqu (2 * 16)(%rdx), RA2; + vmovdqu (3 * 16)(%rdx), RA3; + vmovdqu (4 * 16)(%rdx), RB0; + vmovdqa RB0, RB1; + vmovdqa RB0, RB2; + vmovdqa RB0, RB3; + je .Lblk8_load_input_done; + vmovdqu (5 * 16)(%rdx), RB1; + cmpq $7, %rcx; + jb .Lblk8_load_input_done; + vmovdqu (6 * 16)(%rdx), RB2; + je .Lblk8_load_input_done; + vmovdqu (7 * 16)(%rdx), RB3; + +.Lblk8_load_input_done: + call __sm4_crypt_blk8; + + cmpq $6, %rcx; + vmovdqu RA0, (0 * 16)(%rsi); + vmovdqu RA1, (1 * 16)(%rsi); + vmovdqu RA2, (2 * 16)(%rsi); + vmovdqu RA3, (3 * 16)(%rsi); + vmovdqu RB0, (4 * 16)(%rsi); + jb .Lblk8_store_output_done; + vmovdqu RB1, (5 * 16)(%rsi); + je .Lblk8_store_output_done; + vmovdqu RB2, (6 * 16)(%rsi); + cmpq $7, %rcx; + je .Lblk8_store_output_done; + vmovdqu RB3, (7 * 16)(%rsi); + +.Lblk8_store_output_done: + vzeroall; + FRAME_END + ret; +SYM_FUNC_END(sm4_aesni_avx_crypt8) + +/* + * void sm4_aesni_avx_ctr_enc_blk8(const u32 *rk, u8 *dst, + * const u8 *src, u8 *iv) + */ +.align 8 +SYM_FUNC_START(sm4_aesni_avx_ctr_enc_blk8) + /* input: + * %rdi: round key array, CTX + * %rsi: dst (8 blocks) + * %rdx: src (8 blocks) + * %rcx: iv (big endian, 128bit) + */ + FRAME_BEGIN + + /* load IV and byteswap */ + vmovdqu (%rcx), RA0; + + vmovdqa .Lbswap128_mask rRIP, RBSWAP; + vpshufb RBSWAP, RA0, RTMP0; /* be => le */ + + vpcmpeqd RNOT, RNOT, RNOT; + vpsrldq $8, RNOT, RNOT; /* low: -1, high: 0 */ + +#define inc_le128(x, minus_one, tmp) \ + vpcmpeqq minus_one, x, tmp; \ + vpsubq minus_one, x, x; \ + vpslldq $8, tmp, tmp; \ + vpsubq tmp, x, x; + + /* construct IVs */ + inc_le128(RTMP0, RNOT, RTMP2); /* +1 */ + vpshufb RBSWAP, RTMP0, RA1; + inc_le128(RTMP0, RNOT, RTMP2); /* +2 */ + vpshufb RBSWAP, RTMP0, RA2; + inc_le128(RTMP0, RNOT, RTMP2); /* +3 */ + vpshufb RBSWAP, RTMP0, RA3; + inc_le128(RTMP0, RNOT, RTMP2); /* +4 */ + vpshufb RBSWAP, RTMP0, RB0; + inc_le128(RTMP0, RNOT, RTMP2); /* +5 */ + vpshufb RBSWAP, RTMP0, RB1; + inc_le128(RTMP0, RNOT, RTMP2); /* +6 */ + vpshufb RBSWAP, RTMP0, RB2; + inc_le128(RTMP0, RNOT, RTMP2); /* +7 */ + vpshufb RBSWAP, RTMP0, RB3; + inc_le128(RTMP0, RNOT, RTMP2); /* +8 */ + vpshufb RBSWAP, RTMP0, RTMP1; + + /* store new IV */ + vmovdqu RTMP1, (%rcx); + + call __sm4_crypt_blk8; + + vpxor (0 * 16)(%rdx), RA0, RA0; + vpxor (1 * 16)(%rdx), RA1, RA1; + vpxor (2 * 16)(%rdx), RA2, RA2; + vpxor (3 * 16)(%rdx), RA3, RA3; + vpxor (4 * 16)(%rdx), RB0, RB0; + vpxor (5 * 16)(%rdx), RB1, RB1; + vpxor (6 * 16)(%rdx), RB2, RB2; + vpxor (7 * 16)(%rdx), RB3, RB3; + + vmovdqu RA0, (0 * 16)(%rsi); + vmovdqu RA1, (1 * 16)(%rsi); + vmovdqu RA2, (2 * 16)(%rsi); + vmovdqu RA3, (3 * 16)(%rsi); + vmovdqu RB0, (4 * 16)(%rsi); + vmovdqu RB1, (5 * 16)(%rsi); + vmovdqu RB2, (6 * 16)(%rsi); + vmovdqu RB3, (7 * 16)(%rsi); + + vzeroall; + FRAME_END + ret; +SYM_FUNC_END(sm4_aesni_avx_ctr_enc_blk8) + +/* + * void sm4_aesni_avx_cbc_dec_blk8(const u32 *rk, u8 *dst, + * const u8 *src, u8 *iv) + */ +.align 8 +SYM_FUNC_START(sm4_aesni_avx_cbc_dec_blk8) + /* input: + * %rdi: round key array, CTX + * %rsi: dst (8 blocks) + * %rdx: src (8 blocks) + * %rcx: iv + */ + FRAME_BEGIN + + vmovdqu (0 * 16)(%rdx), RA0; + vmovdqu (1 * 16)(%rdx), RA1; + vmovdqu (2 * 16)(%rdx), RA2; + vmovdqu (3 * 16)(%rdx), RA3; + vmovdqu (4 * 16)(%rdx), RB0; + vmovdqu (5 * 16)(%rdx), RB1; + vmovdqu (6 * 16)(%rdx), RB2; + vmovdqu (7 * 16)(%rdx), RB3; + + call __sm4_crypt_blk8; + + vmovdqu (7 * 16)(%rdx), RNOT; + vpxor (%rcx), RA0, RA0; + vpxor (0 * 16)(%rdx), RA1, RA1; + vpxor (1 * 16)(%rdx), RA2, RA2; + vpxor (2 * 16)(%rdx), RA3, RA3; + vpxor (3 * 16)(%rdx), RB0, RB0; + vpxor (4 * 16)(%rdx), RB1, RB1; + vpxor (5 * 16)(%rdx), RB2, RB2; + vpxor (6 * 16)(%rdx), RB3, RB3; + vmovdqu RNOT, (%rcx); /* store new IV */ + + vmovdqu RA0, (0 * 16)(%rsi); + vmovdqu RA1, (1 * 16)(%rsi); + vmovdqu RA2, (2 * 16)(%rsi); + vmovdqu RA3, (3 * 16)(%rsi); + vmovdqu RB0, (4 * 16)(%rsi); + vmovdqu RB1, (5 * 16)(%rsi); + vmovdqu RB2, (6 * 16)(%rsi); + vmovdqu RB3, (7 * 16)(%rsi); + + vzeroall; + FRAME_END + ret; +SYM_FUNC_END(sm4_aesni_avx_cbc_dec_blk8) + +/* + * void sm4_aesni_avx_cfb_dec_blk8(const u32 *rk, u8 *dst, + * const u8 *src, u8 *iv) + */ +.align 8 +SYM_FUNC_START(sm4_aesni_avx_cfb_dec_blk8) + /* input: + * %rdi: round key array, CTX + * %rsi: dst (8 blocks) + * %rdx: src (8 blocks) + * %rcx: iv + */ + FRAME_BEGIN + + /* Load input */ + vmovdqu (%rcx), RA0; + vmovdqu 0 * 16(%rdx), RA1; + vmovdqu 1 * 16(%rdx), RA2; + vmovdqu 2 * 16(%rdx), RA3; + vmovdqu 3 * 16(%rdx), RB0; + vmovdqu 4 * 16(%rdx), RB1; + vmovdqu 5 * 16(%rdx), RB2; + vmovdqu 6 * 16(%rdx), RB3; + + /* Update IV */ + vmovdqu 7 * 16(%rdx), RNOT; + vmovdqu RNOT, (%rcx); + + call __sm4_crypt_blk8; + + vpxor (0 * 16)(%rdx), RA0, RA0; + vpxor (1 * 16)(%rdx), RA1, RA1; + vpxor (2 * 16)(%rdx), RA2, RA2; + vpxor (3 * 16)(%rdx), RA3, RA3; + vpxor (4 * 16)(%rdx), RB0, RB0; + vpxor (5 * 16)(%rdx), RB1, RB1; + vpxor (6 * 16)(%rdx), RB2, RB2; + vpxor (7 * 16)(%rdx), RB3, RB3; + + vmovdqu RA0, (0 * 16)(%rsi); + vmovdqu RA1, (1 * 16)(%rsi); + vmovdqu RA2, (2 * 16)(%rsi); + vmovdqu RA3, (3 * 16)(%rsi); + vmovdqu RB0, (4 * 16)(%rsi); + vmovdqu RB1, (5 * 16)(%rsi); + vmovdqu RB2, (6 * 16)(%rsi); + vmovdqu RB3, (7 * 16)(%rsi); + + vzeroall; + FRAME_END + ret; +SYM_FUNC_END(sm4_aesni_avx_cfb_dec_blk8) diff --git a/arch/x86/crypto/sm4-aesni-avx2-asm_64.S b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S new file mode 100644 index 000000000000..d2ffd7f76ee2 --- /dev/null +++ b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S @@ -0,0 +1,497 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * SM4 Cipher Algorithm, AES-NI/AVX2 optimized. + * as specified in + * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html + * + * Copyright (C) 2018 Markku-Juhani O. Saarinen <mjos@iki.fi> + * Copyright (C) 2020 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com> + */ + +/* Based on SM4 AES-NI work by libgcrypt and Markku-Juhani O. Saarinen at: + * https://github.com/mjosaarinen/sm4ni + */ + +#include <linux/linkage.h> +#include <asm/frame.h> + +#define rRIP (%rip) + +/* vector registers */ +#define RX0 %ymm0 +#define RX1 %ymm1 +#define MASK_4BIT %ymm2 +#define RTMP0 %ymm3 +#define RTMP1 %ymm4 +#define RTMP2 %ymm5 +#define RTMP3 %ymm6 +#define RTMP4 %ymm7 + +#define RA0 %ymm8 +#define RA1 %ymm9 +#define RA2 %ymm10 +#define RA3 %ymm11 + +#define RB0 %ymm12 +#define RB1 %ymm13 +#define RB2 %ymm14 +#define RB3 %ymm15 + +#define RNOT %ymm0 +#define RBSWAP %ymm1 + +#define RX0x %xmm0 +#define RX1x %xmm1 +#define MASK_4BITx %xmm2 + +#define RNOTx %xmm0 +#define RBSWAPx %xmm1 + +#define RTMP0x %xmm3 +#define RTMP1x %xmm4 +#define RTMP2x %xmm5 +#define RTMP3x %xmm6 +#define RTMP4x %xmm7 + + +/* helper macros */ + +/* Transpose four 32-bit words between 128-bit vector lanes. */ +#define transpose_4x4(x0, x1, x2, x3, t1, t2) \ + vpunpckhdq x1, x0, t2; \ + vpunpckldq x1, x0, x0; \ + \ + vpunpckldq x3, x2, t1; \ + vpunpckhdq x3, x2, x2; \ + \ + vpunpckhqdq t1, x0, x1; \ + vpunpcklqdq t1, x0, x0; \ + \ + vpunpckhqdq x2, t2, x3; \ + vpunpcklqdq x2, t2, x2; + +/* post-SubByte transform. */ +#define transform_pre(x, lo_t, hi_t, mask4bit, tmp0) \ + vpand x, mask4bit, tmp0; \ + vpandn x, mask4bit, x; \ + vpsrld $4, x, x; \ + \ + vpshufb tmp0, lo_t, tmp0; \ + vpshufb x, hi_t, x; \ + vpxor tmp0, x, x; + +/* post-SubByte transform. Note: x has been XOR'ed with mask4bit by + * 'vaeslastenc' instruction. */ +#define transform_post(x, lo_t, hi_t, mask4bit, tmp0) \ + vpandn mask4bit, x, tmp0; \ + vpsrld $4, x, x; \ + vpand x, mask4bit, x; \ + \ + vpshufb tmp0, lo_t, tmp0; \ + vpshufb x, hi_t, x; \ + vpxor tmp0, x, x; + + +.section .rodata.cst164, "aM", @progbits, 164 +.align 16 + +/* + * Following four affine transform look-up tables are from work by + * Markku-Juhani O. Saarinen, at https://github.com/mjosaarinen/sm4ni + * + * These allow exposing SM4 S-Box from AES SubByte. + */ + +/* pre-SubByte affine transform, from SM4 field to AES field. */ +.Lpre_tf_lo_s: + .quad 0x9197E2E474720701, 0xC7C1B4B222245157 +.Lpre_tf_hi_s: + .quad 0xE240AB09EB49A200, 0xF052B91BF95BB012 + +/* post-SubByte affine transform, from AES field to SM4 field. */ +.Lpost_tf_lo_s: + .quad 0x5B67F2CEA19D0834, 0xEDD14478172BBE82 +.Lpost_tf_hi_s: + .quad 0xAE7201DD73AFDC00, 0x11CDBE62CC1063BF + +/* For isolating SubBytes from AESENCLAST, inverse shift row */ +.Linv_shift_row: + .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b + .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03 + +/* Inverse shift row + Rotate left by 8 bits on 32-bit words with vpshufb */ +.Linv_shift_row_rol_8: + .byte 0x07, 0x00, 0x0d, 0x0a, 0x0b, 0x04, 0x01, 0x0e + .byte 0x0f, 0x08, 0x05, 0x02, 0x03, 0x0c, 0x09, 0x06 + +/* Inverse shift row + Rotate left by 16 bits on 32-bit words with vpshufb */ +.Linv_shift_row_rol_16: + .byte 0x0a, 0x07, 0x00, 0x0d, 0x0e, 0x0b, 0x04, 0x01 + .byte 0x02, 0x0f, 0x08, 0x05, 0x06, 0x03, 0x0c, 0x09 + +/* Inverse shift row + Rotate left by 24 bits on 32-bit words with vpshufb */ +.Linv_shift_row_rol_24: + .byte 0x0d, 0x0a, 0x07, 0x00, 0x01, 0x0e, 0x0b, 0x04 + .byte 0x05, 0x02, 0x0f, 0x08, 0x09, 0x06, 0x03, 0x0c + +/* For CTR-mode IV byteswap */ +.Lbswap128_mask: + .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +/* For input word byte-swap */ +.Lbswap32_mask: + .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 + +.align 4 +/* 4-bit mask */ +.L0f0f0f0f: + .long 0x0f0f0f0f + +.text +.align 16 + +.align 8 +SYM_FUNC_START_LOCAL(__sm4_crypt_blk16) + /* input: + * %rdi: round key array, CTX + * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: sixteen parallel + * plaintext blocks + * output: + * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: sixteen parallel + * ciphertext blocks + */ + FRAME_BEGIN + + vbroadcasti128 .Lbswap32_mask rRIP, RTMP2; + vpshufb RTMP2, RA0, RA0; + vpshufb RTMP2, RA1, RA1; + vpshufb RTMP2, RA2, RA2; + vpshufb RTMP2, RA3, RA3; + vpshufb RTMP2, RB0, RB0; + vpshufb RTMP2, RB1, RB1; + vpshufb RTMP2, RB2, RB2; + vpshufb RTMP2, RB3, RB3; + + vpbroadcastd .L0f0f0f0f rRIP, MASK_4BIT; + transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1); + transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1); + +#define ROUND(round, s0, s1, s2, s3, r0, r1, r2, r3) \ + vpbroadcastd (4*(round))(%rdi), RX0; \ + vbroadcasti128 .Lpre_tf_lo_s rRIP, RTMP4; \ + vbroadcasti128 .Lpre_tf_hi_s rRIP, RTMP1; \ + vmovdqa RX0, RX1; \ + vpxor s1, RX0, RX0; \ + vpxor s2, RX0, RX0; \ + vpxor s3, RX0, RX0; /* s1 ^ s2 ^ s3 ^ rk */ \ + vbroadcasti128 .Lpost_tf_lo_s rRIP, RTMP2; \ + vbroadcasti128 .Lpost_tf_hi_s rRIP, RTMP3; \ + vpxor r1, RX1, RX1; \ + vpxor r2, RX1, RX1; \ + vpxor r3, RX1, RX1; /* r1 ^ r2 ^ r3 ^ rk */ \ + \ + /* sbox, non-linear part */ \ + transform_pre(RX0, RTMP4, RTMP1, MASK_4BIT, RTMP0); \ + transform_pre(RX1, RTMP4, RTMP1, MASK_4BIT, RTMP0); \ + vextracti128 $1, RX0, RTMP4x; \ + vextracti128 $1, RX1, RTMP0x; \ + vaesenclast MASK_4BITx, RX0x, RX0x; \ + vaesenclast MASK_4BITx, RTMP4x, RTMP4x; \ + vaesenclast MASK_4BITx, RX1x, RX1x; \ + vaesenclast MASK_4BITx, RTMP0x, RTMP0x; \ + vinserti128 $1, RTMP4x, RX0, RX0; \ + vbroadcasti128 .Linv_shift_row rRIP, RTMP4; \ + vinserti128 $1, RTMP0x, RX1, RX1; \ + transform_post(RX0, RTMP2, RTMP3, MASK_4BIT, RTMP0); \ + transform_post(RX1, RTMP2, RTMP3, MASK_4BIT, RTMP0); \ + \ + /* linear part */ \ + vpshufb RTMP4, RX0, RTMP0; \ + vpxor RTMP0, s0, s0; /* s0 ^ x */ \ + vpshufb RTMP4, RX1, RTMP2; \ + vbroadcasti128 .Linv_shift_row_rol_8 rRIP, RTMP4; \ + vpxor RTMP2, r0, r0; /* r0 ^ x */ \ + vpshufb RTMP4, RX0, RTMP1; \ + vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) */ \ + vpshufb RTMP4, RX1, RTMP3; \ + vbroadcasti128 .Linv_shift_row_rol_16 rRIP, RTMP4; \ + vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) */ \ + vpshufb RTMP4, RX0, RTMP1; \ + vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) ^ rol(x,16) */ \ + vpshufb RTMP4, RX1, RTMP3; \ + vbroadcasti128 .Linv_shift_row_rol_24 rRIP, RTMP4; \ + vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) ^ rol(x,16) */ \ + vpshufb RTMP4, RX0, RTMP1; \ + vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,24) */ \ + vpslld $2, RTMP0, RTMP1; \ + vpsrld $30, RTMP0, RTMP0; \ + vpxor RTMP0, s0, s0; \ + /* s0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \ + vpxor RTMP1, s0, s0; \ + vpshufb RTMP4, RX1, RTMP3; \ + vpxor RTMP3, r0, r0; /* r0 ^ x ^ rol(x,24) */ \ + vpslld $2, RTMP2, RTMP3; \ + vpsrld $30, RTMP2, RTMP2; \ + vpxor RTMP2, r0, r0; \ + /* r0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \ + vpxor RTMP3, r0, r0; + + leaq (32*4)(%rdi), %rax; +.align 16 +.Lroundloop_blk8: + ROUND(0, RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3); + ROUND(1, RA1, RA2, RA3, RA0, RB1, RB2, RB3, RB0); + ROUND(2, RA2, RA3, RA0, RA1, RB2, RB3, RB0, RB1); + ROUND(3, RA3, RA0, RA1, RA2, RB3, RB0, RB1, RB2); + leaq (4*4)(%rdi), %rdi; + cmpq %rax, %rdi; + jne .Lroundloop_blk8; + +#undef ROUND + + vbroadcasti128 .Lbswap128_mask rRIP, RTMP2; + + transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1); + transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1); + vpshufb RTMP2, RA0, RA0; + vpshufb RTMP2, RA1, RA1; + vpshufb RTMP2, RA2, RA2; + vpshufb RTMP2, RA3, RA3; + vpshufb RTMP2, RB0, RB0; + vpshufb RTMP2, RB1, RB1; + vpshufb RTMP2, RB2, RB2; + vpshufb RTMP2, RB3, RB3; + + FRAME_END + ret; +SYM_FUNC_END(__sm4_crypt_blk16) + +#define inc_le128(x, minus_one, tmp) \ + vpcmpeqq minus_one, x, tmp; \ + vpsubq minus_one, x, x; \ + vpslldq $8, tmp, tmp; \ + vpsubq tmp, x, x; + +/* + * void sm4_aesni_avx2_ctr_enc_blk16(const u32 *rk, u8 *dst, + * const u8 *src, u8 *iv) + */ +.align 8 +SYM_FUNC_START(sm4_aesni_avx2_ctr_enc_blk16) + /* input: + * %rdi: round key array, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + * %rcx: iv (big endian, 128bit) + */ + FRAME_BEGIN + + movq 8(%rcx), %rax; + bswapq %rax; + + vzeroupper; + + vbroadcasti128 .Lbswap128_mask rRIP, RTMP3; + vpcmpeqd RNOT, RNOT, RNOT; + vpsrldq $8, RNOT, RNOT; /* ab: -1:0 ; cd: -1:0 */ + vpaddq RNOT, RNOT, RTMP2; /* ab: -2:0 ; cd: -2:0 */ + + /* load IV and byteswap */ + vmovdqu (%rcx), RTMP4x; + vpshufb RTMP3x, RTMP4x, RTMP4x; + vmovdqa RTMP4x, RTMP0x; + inc_le128(RTMP4x, RNOTx, RTMP1x); + vinserti128 $1, RTMP4x, RTMP0, RTMP0; + vpshufb RTMP3, RTMP0, RA0; /* +1 ; +0 */ + + /* check need for handling 64-bit overflow and carry */ + cmpq $(0xffffffffffffffff - 16), %rax; + ja .Lhandle_ctr_carry; + + /* construct IVs */ + vpsubq RTMP2, RTMP0, RTMP0; /* +3 ; +2 */ + vpshufb RTMP3, RTMP0, RA1; + vpsubq RTMP2, RTMP0, RTMP0; /* +5 ; +4 */ + vpshufb RTMP3, RTMP0, RA2; + vpsubq RTMP2, RTMP0, RTMP0; /* +7 ; +6 */ + vpshufb RTMP3, RTMP0, RA3; + vpsubq RTMP2, RTMP0, RTMP0; /* +9 ; +8 */ + vpshufb RTMP3, RTMP0, RB0; + vpsubq RTMP2, RTMP0, RTMP0; /* +11 ; +10 */ + vpshufb RTMP3, RTMP0, RB1; + vpsubq RTMP2, RTMP0, RTMP0; /* +13 ; +12 */ + vpshufb RTMP3, RTMP0, RB2; + vpsubq RTMP2, RTMP0, RTMP0; /* +15 ; +14 */ + vpshufb RTMP3, RTMP0, RB3; + vpsubq RTMP2, RTMP0, RTMP0; /* +16 */ + vpshufb RTMP3x, RTMP0x, RTMP0x; + + jmp .Lctr_carry_done; + +.Lhandle_ctr_carry: + /* construct IVs */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RA1; /* +3 ; +2 */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RA2; /* +5 ; +4 */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RA3; /* +7 ; +6 */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RB0; /* +9 ; +8 */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RB1; /* +11 ; +10 */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RB2; /* +13 ; +12 */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RB3; /* +15 ; +14 */ + inc_le128(RTMP0, RNOT, RTMP1); + vextracti128 $1, RTMP0, RTMP0x; + vpshufb RTMP3x, RTMP0x, RTMP0x; /* +16 */ + +.align 4 +.Lctr_carry_done: + /* store new IV */ + vmovdqu RTMP0x, (%rcx); + + call __sm4_crypt_blk16; + + vpxor (0 * 32)(%rdx), RA0, RA0; + vpxor (1 * 32)(%rdx), RA1, RA1; + vpxor (2 * 32)(%rdx), RA2, RA2; + vpxor (3 * 32)(%rdx), RA3, RA3; + vpxor (4 * 32)(%rdx), RB0, RB0; + vpxor (5 * 32)(%rdx), RB1, RB1; + vpxor (6 * 32)(%rdx), RB2, RB2; + vpxor (7 * 32)(%rdx), RB3, RB3; + + vmovdqu RA0, (0 * 32)(%rsi); + vmovdqu RA1, (1 * 32)(%rsi); + vmovdqu RA2, (2 * 32)(%rsi); + vmovdqu RA3, (3 * 32)(%rsi); + vmovdqu RB0, (4 * 32)(%rsi); + vmovdqu RB1, (5 * 32)(%rsi); + vmovdqu RB2, (6 * 32)(%rsi); + vmovdqu RB3, (7 * 32)(%rsi); + + vzeroall; + FRAME_END + ret; +SYM_FUNC_END(sm4_aesni_avx2_ctr_enc_blk16) + +/* + * void sm4_aesni_avx2_cbc_dec_blk16(const u32 *rk, u8 *dst, + * const u8 *src, u8 *iv) + */ +.align 8 +SYM_FUNC_START(sm4_aesni_avx2_cbc_dec_blk16) + /* input: + * %rdi: round key array, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + * %rcx: iv + */ + FRAME_BEGIN + + vzeroupper; + + vmovdqu (0 * 32)(%rdx), RA0; + vmovdqu (1 * 32)(%rdx), RA1; + vmovdqu (2 * 32)(%rdx), RA2; + vmovdqu (3 * 32)(%rdx), RA3; + vmovdqu (4 * 32)(%rdx), RB0; + vmovdqu (5 * 32)(%rdx), RB1; + vmovdqu (6 * 32)(%rdx), RB2; + vmovdqu (7 * 32)(%rdx), RB3; + + call __sm4_crypt_blk16; + + vmovdqu (%rcx), RNOTx; + vinserti128 $1, (%rdx), RNOT, RNOT; + vpxor RNOT, RA0, RA0; + vpxor (0 * 32 + 16)(%rdx), RA1, RA1; + vpxor (1 * 32 + 16)(%rdx), RA2, RA2; + vpxor (2 * 32 + 16)(%rdx), RA3, RA3; + vpxor (3 * 32 + 16)(%rdx), RB0, RB0; + vpxor (4 * 32 + 16)(%rdx), RB1, RB1; + vpxor (5 * 32 + 16)(%rdx), RB2, RB2; + vpxor (6 * 32 + 16)(%rdx), RB3, RB3; + vmovdqu (7 * 32 + 16)(%rdx), RNOTx; + vmovdqu RNOTx, (%rcx); /* store new IV */ + + vmovdqu RA0, (0 * 32)(%rsi); + vmovdqu RA1, (1 * 32)(%rsi); + vmovdqu RA2, (2 * 32)(%rsi); + vmovdqu RA3, (3 * 32)(%rsi); + vmovdqu RB0, (4 * 32)(%rsi); + vmovdqu RB1, (5 * 32)(%rsi); + vmovdqu RB2, (6 * 32)(%rsi); + vmovdqu RB3, (7 * 32)(%rsi); + + vzeroall; + FRAME_END + ret; +SYM_FUNC_END(sm4_aesni_avx2_cbc_dec_blk16) + +/* + * void sm4_aesni_avx2_cfb_dec_blk16(const u32 *rk, u8 *dst, + * const u8 *src, u8 *iv) + */ +.align 8 +SYM_FUNC_START(sm4_aesni_avx2_cfb_dec_blk16) + /* input: + * %rdi: round key array, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + * %rcx: iv + */ + FRAME_BEGIN + + vzeroupper; + + /* Load input */ + vmovdqu (%rcx), RNOTx; + vinserti128 $1, (%rdx), RNOT, RA0; + vmovdqu (0 * 32 + 16)(%rdx), RA1; + vmovdqu (1 * 32 + 16)(%rdx), RA2; + vmovdqu (2 * 32 + 16)(%rdx), RA3; + vmovdqu (3 * 32 + 16)(%rdx), RB0; + vmovdqu (4 * 32 + 16)(%rdx), RB1; + vmovdqu (5 * 32 + 16)(%rdx), RB2; + vmovdqu (6 * 32 + 16)(%rdx), RB3; + + /* Update IV */ + vmovdqu (7 * 32 + 16)(%rdx), RNOTx; + vmovdqu RNOTx, (%rcx); + + call __sm4_crypt_blk16; + + vpxor (0 * 32)(%rdx), RA0, RA0; + vpxor (1 * 32)(%rdx), RA1, RA1; + vpxor (2 * 32)(%rdx), RA2, RA2; + vpxor (3 * 32)(%rdx), RA3, RA3; + vpxor (4 * 32)(%rdx), RB0, RB0; + vpxor (5 * 32)(%rdx), RB1, RB1; + vpxor (6 * 32)(%rdx), RB2, RB2; + vpxor (7 * 32)(%rdx), RB3, RB3; + + vmovdqu RA0, (0 * 32)(%rsi); + vmovdqu RA1, (1 * 32)(%rsi); + vmovdqu RA2, (2 * 32)(%rsi); + vmovdqu RA3, (3 * 32)(%rsi); + vmovdqu RB0, (4 * 32)(%rsi); + vmovdqu RB1, (5 * 32)(%rsi); + vmovdqu RB2, (6 * 32)(%rsi); + vmovdqu RB3, (7 * 32)(%rsi); + + vzeroall; + FRAME_END + ret; +SYM_FUNC_END(sm4_aesni_avx2_cfb_dec_blk16) diff --git a/arch/x86/crypto/sm4-avx.h b/arch/x86/crypto/sm4-avx.h new file mode 100644 index 000000000000..1bceab7516aa --- /dev/null +++ b/arch/x86/crypto/sm4-avx.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef ASM_X86_SM4_AVX_H +#define ASM_X86_SM4_AVX_H + +#include <linux/types.h> +#include <crypto/sm4.h> + +typedef void (*sm4_crypt_func)(const u32 *rk, u8 *dst, const u8 *src, u8 *iv); + +int sm4_avx_ecb_encrypt(struct skcipher_request *req); +int sm4_avx_ecb_decrypt(struct skcipher_request *req); + +int sm4_cbc_encrypt(struct skcipher_request *req); +int sm4_avx_cbc_decrypt(struct skcipher_request *req, + unsigned int bsize, sm4_crypt_func func); + +int sm4_cfb_encrypt(struct skcipher_request *req); +int sm4_avx_cfb_decrypt(struct skcipher_request *req, + unsigned int bsize, sm4_crypt_func func); + +int sm4_avx_ctr_crypt(struct skcipher_request *req, + unsigned int bsize, sm4_crypt_func func); + +#endif diff --git a/arch/x86/crypto/sm4_aesni_avx2_glue.c b/arch/x86/crypto/sm4_aesni_avx2_glue.c new file mode 100644 index 000000000000..84bc718f49a3 --- /dev/null +++ b/arch/x86/crypto/sm4_aesni_avx2_glue.c @@ -0,0 +1,169 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM4 Cipher Algorithm, AES-NI/AVX2 optimized. + * as specified in + * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html + * + * Copyright (c) 2021, Alibaba Group. + * Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com> + */ + +#include <linux/module.h> +#include <linux/crypto.h> +#include <linux/kernel.h> +#include <asm/simd.h> +#include <crypto/internal/simd.h> +#include <crypto/internal/skcipher.h> +#include <crypto/sm4.h> +#include "sm4-avx.h" + +#define SM4_CRYPT16_BLOCK_SIZE (SM4_BLOCK_SIZE * 16) + +asmlinkage void sm4_aesni_avx2_ctr_enc_blk16(const u32 *rk, u8 *dst, + const u8 *src, u8 *iv); +asmlinkage void sm4_aesni_avx2_cbc_dec_blk16(const u32 *rk, u8 *dst, + const u8 *src, u8 *iv); +asmlinkage void sm4_aesni_avx2_cfb_dec_blk16(const u32 *rk, u8 *dst, + const u8 *src, u8 *iv); + +static int sm4_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int key_len) +{ + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + + return sm4_expandkey(ctx, key, key_len); +} + +static int cbc_decrypt(struct skcipher_request *req) +{ + return sm4_avx_cbc_decrypt(req, SM4_CRYPT16_BLOCK_SIZE, + sm4_aesni_avx2_cbc_dec_blk16); +} + + +static int cfb_decrypt(struct skcipher_request *req) +{ + return sm4_avx_cfb_decrypt(req, SM4_CRYPT16_BLOCK_SIZE, + sm4_aesni_avx2_cfb_dec_blk16); +} + +static int ctr_crypt(struct skcipher_request *req) +{ + return sm4_avx_ctr_crypt(req, SM4_CRYPT16_BLOCK_SIZE, + sm4_aesni_avx2_ctr_enc_blk16); +} + +static struct skcipher_alg sm4_aesni_avx2_skciphers[] = { + { + .base = { + .cra_name = "__ecb(sm4)", + .cra_driver_name = "__ecb-sm4-aesni-avx2", + .cra_priority = 500, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = SM4_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .walksize = 16 * SM4_BLOCK_SIZE, + .setkey = sm4_skcipher_setkey, + .encrypt = sm4_avx_ecb_encrypt, + .decrypt = sm4_avx_ecb_decrypt, + }, { + .base = { + .cra_name = "__cbc(sm4)", + .cra_driver_name = "__cbc-sm4-aesni-avx2", + .cra_priority = 500, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = SM4_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .walksize = 16 * SM4_BLOCK_SIZE, + .setkey = sm4_skcipher_setkey, + .encrypt = sm4_cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base = { + .cra_name = "__cfb(sm4)", + .cra_driver_name = "__cfb-sm4-aesni-avx2", + .cra_priority = 500, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .chunksize = SM4_BLOCK_SIZE, + .walksize = 16 * SM4_BLOCK_SIZE, + .setkey = sm4_skcipher_setkey, + .encrypt = sm4_cfb_encrypt, + .decrypt = cfb_decrypt, + }, { + .base = { + .cra_name = "__ctr(sm4)", + .cra_driver_name = "__ctr-sm4-aesni-avx2", + .cra_priority = 500, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .chunksize = SM4_BLOCK_SIZE, + .walksize = 16 * SM4_BLOCK_SIZE, + .setkey = sm4_skcipher_setkey, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + } +}; + +static struct simd_skcipher_alg * +simd_sm4_aesni_avx2_skciphers[ARRAY_SIZE(sm4_aesni_avx2_skciphers)]; + +static int __init sm4_init(void) +{ + const char *feature_name; + + if (!boot_cpu_has(X86_FEATURE_AVX) || + !boot_cpu_has(X86_FEATURE_AVX2) || + !boot_cpu_has(X86_FEATURE_AES) || + !boot_cpu_has(X86_FEATURE_OSXSAVE)) { + pr_info("AVX2 or AES-NI instructions are not detected.\n"); + return -ENODEV; + } + + if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, + &feature_name)) { + pr_info("CPU feature '%s' is not supported.\n", feature_name); + return -ENODEV; + } + + return simd_register_skciphers_compat(sm4_aesni_avx2_skciphers, + ARRAY_SIZE(sm4_aesni_avx2_skciphers), + simd_sm4_aesni_avx2_skciphers); +} + +static void __exit sm4_exit(void) +{ + simd_unregister_skciphers(sm4_aesni_avx2_skciphers, + ARRAY_SIZE(sm4_aesni_avx2_skciphers), + simd_sm4_aesni_avx2_skciphers); +} + +module_init(sm4_init); +module_exit(sm4_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>"); +MODULE_DESCRIPTION("SM4 Cipher Algorithm, AES-NI/AVX2 optimized"); +MODULE_ALIAS_CRYPTO("sm4"); +MODULE_ALIAS_CRYPTO("sm4-aesni-avx2"); diff --git a/arch/x86/crypto/sm4_aesni_avx_glue.c b/arch/x86/crypto/sm4_aesni_avx_glue.c new file mode 100644 index 000000000000..7800f77d68ad --- /dev/null +++ b/arch/x86/crypto/sm4_aesni_avx_glue.c @@ -0,0 +1,487 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM4 Cipher Algorithm, AES-NI/AVX optimized. + * as specified in + * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html + * + * Copyright (c) 2021, Alibaba Group. + * Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com> + */ + +#include <linux/module.h> +#include <linux/crypto.h> +#include <linux/kernel.h> +#include <asm/simd.h> +#include <crypto/internal/simd.h> +#include <crypto/internal/skcipher.h> +#include <crypto/sm4.h> +#include "sm4-avx.h" + +#define SM4_CRYPT8_BLOCK_SIZE (SM4_BLOCK_SIZE * 8) + +asmlinkage void sm4_aesni_avx_crypt4(const u32 *rk, u8 *dst, + const u8 *src, int nblocks); +asmlinkage void sm4_aesni_avx_crypt8(const u32 *rk, u8 *dst, + const u8 *src, int nblocks); +asmlinkage void sm4_aesni_avx_ctr_enc_blk8(const u32 *rk, u8 *dst, + const u8 *src, u8 *iv); +asmlinkage void sm4_aesni_avx_cbc_dec_blk8(const u32 *rk, u8 *dst, + const u8 *src, u8 *iv); +asmlinkage void sm4_aesni_avx_cfb_dec_blk8(const u32 *rk, u8 *dst, + const u8 *src, u8 *iv); + +static int sm4_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int key_len) +{ + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + + return sm4_expandkey(ctx, key, key_len); +} + +static int ecb_do_crypt(struct skcipher_request *req, const u32 *rkey) +{ + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + + kernel_fpu_begin(); + while (nbytes >= SM4_CRYPT8_BLOCK_SIZE) { + sm4_aesni_avx_crypt8(rkey, dst, src, 8); + dst += SM4_CRYPT8_BLOCK_SIZE; + src += SM4_CRYPT8_BLOCK_SIZE; + nbytes -= SM4_CRYPT8_BLOCK_SIZE; + } + while (nbytes >= SM4_BLOCK_SIZE) { + unsigned int nblocks = min(nbytes >> 4, 4u); + sm4_aesni_avx_crypt4(rkey, dst, src, nblocks); + dst += nblocks * SM4_BLOCK_SIZE; + src += nblocks * SM4_BLOCK_SIZE; + nbytes -= nblocks * SM4_BLOCK_SIZE; + } + kernel_fpu_end(); + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +int sm4_avx_ecb_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + + return ecb_do_crypt(req, ctx->rkey_enc); +} +EXPORT_SYMBOL_GPL(sm4_avx_ecb_encrypt); + +int sm4_avx_ecb_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + + return ecb_do_crypt(req, ctx->rkey_dec); +} +EXPORT_SYMBOL_GPL(sm4_avx_ecb_decrypt); + +int sm4_cbc_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *iv = walk.iv; + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + + while (nbytes >= SM4_BLOCK_SIZE) { + crypto_xor_cpy(dst, src, iv, SM4_BLOCK_SIZE); + sm4_crypt_block(ctx->rkey_enc, dst, dst); + iv = dst; + src += SM4_BLOCK_SIZE; + dst += SM4_BLOCK_SIZE; + nbytes -= SM4_BLOCK_SIZE; + } + if (iv != walk.iv) + memcpy(walk.iv, iv, SM4_BLOCK_SIZE); + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} +EXPORT_SYMBOL_GPL(sm4_cbc_encrypt); + +int sm4_avx_cbc_decrypt(struct skcipher_request *req, + unsigned int bsize, sm4_crypt_func func) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + + kernel_fpu_begin(); + + while (nbytes >= bsize) { + func(ctx->rkey_dec, dst, src, walk.iv); + dst += bsize; + src += bsize; + nbytes -= bsize; + } + + while (nbytes >= SM4_BLOCK_SIZE) { + u8 keystream[SM4_BLOCK_SIZE * 8]; + u8 iv[SM4_BLOCK_SIZE]; + unsigned int nblocks = min(nbytes >> 4, 8u); + int i; + + sm4_aesni_avx_crypt8(ctx->rkey_dec, keystream, + src, nblocks); + + src += ((int)nblocks - 2) * SM4_BLOCK_SIZE; + dst += (nblocks - 1) * SM4_BLOCK_SIZE; + memcpy(iv, src + SM4_BLOCK_SIZE, SM4_BLOCK_SIZE); + + for (i = nblocks - 1; i > 0; i--) { + crypto_xor_cpy(dst, src, + &keystream[i * SM4_BLOCK_SIZE], + SM4_BLOCK_SIZE); + src -= SM4_BLOCK_SIZE; + dst -= SM4_BLOCK_SIZE; + } + crypto_xor_cpy(dst, walk.iv, keystream, SM4_BLOCK_SIZE); + memcpy(walk.iv, iv, SM4_BLOCK_SIZE); + dst += nblocks * SM4_BLOCK_SIZE; + src += (nblocks + 1) * SM4_BLOCK_SIZE; + nbytes -= nblocks * SM4_BLOCK_SIZE; + } + + kernel_fpu_end(); + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} +EXPORT_SYMBOL_GPL(sm4_avx_cbc_decrypt); + +static int cbc_decrypt(struct skcipher_request *req) +{ + return sm4_avx_cbc_decrypt(req, SM4_CRYPT8_BLOCK_SIZE, + sm4_aesni_avx_cbc_dec_blk8); +} + +int sm4_cfb_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + u8 keystream[SM4_BLOCK_SIZE]; + const u8 *iv = walk.iv; + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + + while (nbytes >= SM4_BLOCK_SIZE) { + sm4_crypt_block(ctx->rkey_enc, keystream, iv); + crypto_xor_cpy(dst, src, keystream, SM4_BLOCK_SIZE); + iv = dst; + src += SM4_BLOCK_SIZE; + dst += SM4_BLOCK_SIZE; + nbytes -= SM4_BLOCK_SIZE; + } + if (iv != walk.iv) + memcpy(walk.iv, iv, SM4_BLOCK_SIZE); + + /* tail */ + if (walk.nbytes == walk.total && nbytes > 0) { + sm4_crypt_block(ctx->rkey_enc, keystream, walk.iv); + crypto_xor_cpy(dst, src, keystream, nbytes); + nbytes = 0; + } + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} +EXPORT_SYMBOL_GPL(sm4_cfb_encrypt); + +int sm4_avx_cfb_decrypt(struct skcipher_request *req, + unsigned int bsize, sm4_crypt_func func) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + + kernel_fpu_begin(); + + while (nbytes >= bsize) { + func(ctx->rkey_enc, dst, src, walk.iv); + dst += bsize; + src += bsize; + nbytes -= bsize; + } + + while (nbytes >= SM4_BLOCK_SIZE) { + u8 keystream[SM4_BLOCK_SIZE * 8]; + unsigned int nblocks = min(nbytes >> 4, 8u); + + memcpy(keystream, walk.iv, SM4_BLOCK_SIZE); + if (nblocks > 1) + memcpy(&keystream[SM4_BLOCK_SIZE], src, + (nblocks - 1) * SM4_BLOCK_SIZE); + memcpy(walk.iv, src + (nblocks - 1) * SM4_BLOCK_SIZE, + SM4_BLOCK_SIZE); + + sm4_aesni_avx_crypt8(ctx->rkey_enc, keystream, + keystream, nblocks); + + crypto_xor_cpy(dst, src, keystream, + nblocks * SM4_BLOCK_SIZE); + dst += nblocks * SM4_BLOCK_SIZE; + src += nblocks * SM4_BLOCK_SIZE; + nbytes -= nblocks * SM4_BLOCK_SIZE; + } + + kernel_fpu_end(); + + /* tail */ + if (walk.nbytes == walk.total && nbytes > 0) { + u8 keystream[SM4_BLOCK_SIZE]; + + sm4_crypt_block(ctx->rkey_enc, keystream, walk.iv); + crypto_xor_cpy(dst, src, keystream, nbytes); + nbytes = 0; + } + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} +EXPORT_SYMBOL_GPL(sm4_avx_cfb_decrypt); + +static int cfb_decrypt(struct skcipher_request *req) +{ + return sm4_avx_cfb_decrypt(req, SM4_CRYPT8_BLOCK_SIZE, + sm4_aesni_avx_cfb_dec_blk8); +} + +int sm4_avx_ctr_crypt(struct skcipher_request *req, + unsigned int bsize, sm4_crypt_func func) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + + kernel_fpu_begin(); + + while (nbytes >= bsize) { + func(ctx->rkey_enc, dst, src, walk.iv); + dst += bsize; + src += bsize; + nbytes -= bsize; + } + + while (nbytes >= SM4_BLOCK_SIZE) { + u8 keystream[SM4_BLOCK_SIZE * 8]; + unsigned int nblocks = min(nbytes >> 4, 8u); + int i; + + for (i = 0; i < nblocks; i++) { + memcpy(&keystream[i * SM4_BLOCK_SIZE], + walk.iv, SM4_BLOCK_SIZE); + crypto_inc(walk.iv, SM4_BLOCK_SIZE); + } + sm4_aesni_avx_crypt8(ctx->rkey_enc, keystream, + keystream, nblocks); + + crypto_xor_cpy(dst, src, keystream, + nblocks * SM4_BLOCK_SIZE); + dst += nblocks * SM4_BLOCK_SIZE; + src += nblocks * SM4_BLOCK_SIZE; + nbytes -= nblocks * SM4_BLOCK_SIZE; + } + + kernel_fpu_end(); + + /* tail */ + if (walk.nbytes == walk.total && nbytes > 0) { + u8 keystream[SM4_BLOCK_SIZE]; + + memcpy(keystream, walk.iv, SM4_BLOCK_SIZE); + crypto_inc(walk.iv, SM4_BLOCK_SIZE); + + sm4_crypt_block(ctx->rkey_enc, keystream, keystream); + + crypto_xor_cpy(dst, src, keystream, nbytes); + dst += nbytes; + src += nbytes; + nbytes = 0; + } + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} +EXPORT_SYMBOL_GPL(sm4_avx_ctr_crypt); + +static int ctr_crypt(struct skcipher_request *req) +{ + return sm4_avx_ctr_crypt(req, SM4_CRYPT8_BLOCK_SIZE, + sm4_aesni_avx_ctr_enc_blk8); +} + +static struct skcipher_alg sm4_aesni_avx_skciphers[] = { + { + .base = { + .cra_name = "__ecb(sm4)", + .cra_driver_name = "__ecb-sm4-aesni-avx", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = SM4_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .walksize = 8 * SM4_BLOCK_SIZE, + .setkey = sm4_skcipher_setkey, + .encrypt = sm4_avx_ecb_encrypt, + .decrypt = sm4_avx_ecb_decrypt, + }, { + .base = { + .cra_name = "__cbc(sm4)", + .cra_driver_name = "__cbc-sm4-aesni-avx", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = SM4_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .walksize = 8 * SM4_BLOCK_SIZE, + .setkey = sm4_skcipher_setkey, + .encrypt = sm4_cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base = { + .cra_name = "__cfb(sm4)", + .cra_driver_name = "__cfb-sm4-aesni-avx", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .chunksize = SM4_BLOCK_SIZE, + .walksize = 8 * SM4_BLOCK_SIZE, + .setkey = sm4_skcipher_setkey, + .encrypt = sm4_cfb_encrypt, + .decrypt = cfb_decrypt, + }, { + .base = { + .cra_name = "__ctr(sm4)", + .cra_driver_name = "__ctr-sm4-aesni-avx", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .chunksize = SM4_BLOCK_SIZE, + .walksize = 8 * SM4_BLOCK_SIZE, + .setkey = sm4_skcipher_setkey, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + } +}; + +static struct simd_skcipher_alg * +simd_sm4_aesni_avx_skciphers[ARRAY_SIZE(sm4_aesni_avx_skciphers)]; + +static int __init sm4_init(void) +{ + const char *feature_name; + + if (!boot_cpu_has(X86_FEATURE_AVX) || + !boot_cpu_has(X86_FEATURE_AES) || + !boot_cpu_has(X86_FEATURE_OSXSAVE)) { + pr_info("AVX or AES-NI instructions are not detected.\n"); + return -ENODEV; + } + + if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, + &feature_name)) { + pr_info("CPU feature '%s' is not supported.\n", feature_name); + return -ENODEV; + } + + return simd_register_skciphers_compat(sm4_aesni_avx_skciphers, + ARRAY_SIZE(sm4_aesni_avx_skciphers), + simd_sm4_aesni_avx_skciphers); +} + +static void __exit sm4_exit(void) +{ + simd_unregister_skciphers(sm4_aesni_avx_skciphers, + ARRAY_SIZE(sm4_aesni_avx_skciphers), + simd_sm4_aesni_avx_skciphers); +} + +module_init(sm4_init); +module_exit(sm4_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>"); +MODULE_DESCRIPTION("SM4 Cipher Algorithm, AES-NI/AVX optimized"); +MODULE_ALIAS_CRYPTO("sm4"); +MODULE_ALIAS_CRYPTO("sm4-aesni-avx"); diff --git a/arch/x86/events/Kconfig b/arch/x86/events/Kconfig index 39d9ded9e25a..d6cdfe631674 100644 --- a/arch/x86/events/Kconfig +++ b/arch/x86/events/Kconfig @@ -34,4 +34,14 @@ config PERF_EVENTS_AMD_POWER (CPUID Fn8000_0007_EDX[12]) interface to calculate the average power consumption on Family 15h processors. +config PERF_EVENTS_AMD_UNCORE + tristate "AMD Uncore performance events" + depends on PERF_EVENTS && CPU_SUP_AMD + default y + help + Include support for AMD uncore performance events for use with + e.g., perf stat -e amd_l3/.../,amd_df/.../. + + To compile this driver as a module, choose M here: the + module will be called 'amd-uncore'. endmenu diff --git a/arch/x86/events/amd/Makefile b/arch/x86/events/amd/Makefile index fe8795a67385..6cbe38d5fd9d 100644 --- a/arch/x86/events/amd/Makefile +++ b/arch/x86/events/amd/Makefile @@ -1,8 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_CPU_SUP_AMD) += core.o uncore.o +obj-$(CONFIG_CPU_SUP_AMD) += core.o obj-$(CONFIG_PERF_EVENTS_AMD_POWER) += power.o obj-$(CONFIG_X86_LOCAL_APIC) += ibs.o +obj-$(CONFIG_PERF_EVENTS_AMD_UNCORE) += amd-uncore.o +amd-uncore-objs := uncore.o ifdef CONFIG_AMD_IOMMU obj-$(CONFIG_CPU_SUP_AMD) += iommu.o endif - diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 40669eac9d6d..9739019d4b67 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -26,6 +26,7 @@ static u32 ibs_caps; #include <linux/hardirq.h> #include <asm/nmi.h> +#include <asm/amd-ibs.h> #define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT) #define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT @@ -90,6 +91,7 @@ struct perf_ibs { unsigned long offset_mask[1]; int offset_max; unsigned int fetch_count_reset_broken : 1; + unsigned int fetch_ignore_if_zero_rip : 1; struct cpu_perf_ibs __percpu *pcpu; struct attribute **format_attrs; @@ -99,15 +101,6 @@ struct perf_ibs { u64 (*get_count)(u64 config); }; -struct perf_ibs_data { - u32 size; - union { - u32 data[0]; /* data buffer starts here */ - u32 caps; - }; - u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX]; -}; - static int perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period) { @@ -328,11 +321,14 @@ static int perf_ibs_set_period(struct perf_ibs *perf_ibs, static u64 get_ibs_fetch_count(u64 config) { - return (config & IBS_FETCH_CNT) >> 12; + union ibs_fetch_ctl fetch_ctl = (union ibs_fetch_ctl)config; + + return fetch_ctl.fetch_cnt << 4; } static u64 get_ibs_op_count(u64 config) { + union ibs_op_ctl op_ctl = (union ibs_op_ctl)config; u64 count = 0; /* @@ -340,12 +336,12 @@ static u64 get_ibs_op_count(u64 config) * and the lower 7 bits of CurCnt are randomized. * Otherwise CurCnt has the full 27-bit current counter value. */ - if (config & IBS_OP_VAL) { - count = (config & IBS_OP_MAX_CNT) << 4; + if (op_ctl.op_val) { + count = op_ctl.opmaxcnt << 4; if (ibs_caps & IBS_CAPS_OPCNTEXT) - count += config & IBS_OP_MAX_CNT_EXT_MASK; + count += op_ctl.opmaxcnt_ext << 20; } else if (ibs_caps & IBS_CAPS_RDWROPCNT) { - count = (config & IBS_OP_CUR_CNT) >> 32; + count = op_ctl.opcurcnt; } return count; @@ -570,6 +566,7 @@ static struct perf_ibs perf_ibs_op = { .start = perf_ibs_start, .stop = perf_ibs_stop, .read = perf_ibs_read, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }, .msr = MSR_AMD64_IBSOPCTL, .config_mask = IBS_OP_CONFIG_MASK, @@ -672,6 +669,10 @@ fail: if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) { regs.flags &= ~PERF_EFLAGS_EXACT; } else { + /* Workaround for erratum #1197 */ + if (perf_ibs->fetch_ignore_if_zero_rip && !(ibs_data.regs[1])) + goto out; + set_linear_ip(®s, ibs_data.regs[1]); regs.flags |= PERF_EFLAGS_EXACT; } @@ -769,6 +770,9 @@ static __init void perf_event_ibs_init(void) if (boot_cpu_data.x86 >= 0x16 && boot_cpu_data.x86 <= 0x18) perf_ibs_fetch.fetch_count_reset_broken = 1; + if (boot_cpu_data.x86 == 0x19 && boot_cpu_data.x86_model < 0x10) + perf_ibs_fetch.fetch_ignore_if_zero_rip = 1; + perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch"); if (ibs_caps & IBS_CAPS_OPCNT) { diff --git a/arch/x86/events/amd/power.c b/arch/x86/events/amd/power.c index 16a2369c586e..37d5b380516e 100644 --- a/arch/x86/events/amd/power.c +++ b/arch/x86/events/amd/power.c @@ -213,6 +213,7 @@ static struct pmu pmu_class = { .stop = pmu_event_stop, .read = pmu_event_read, .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + .module = THIS_MODULE, }; static int power_cpu_exit(unsigned int cpu) diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index 582c0ffb5e98..0d04414b97d2 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -12,11 +12,11 @@ #include <linux/init.h> #include <linux/cpu.h> #include <linux/cpumask.h> +#include <linux/cpufeature.h> +#include <linux/smp.h> -#include <asm/cpufeature.h> #include <asm/perf_event.h> #include <asm/msr.h> -#include <asm/smp.h> #define NUM_COUNTERS_NB 4 #define NUM_COUNTERS_L2 4 @@ -347,6 +347,7 @@ static struct pmu amd_nb_pmu = { .stop = amd_uncore_stop, .read = amd_uncore_read, .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, + .module = THIS_MODULE, }; static struct pmu amd_llc_pmu = { @@ -360,6 +361,7 @@ static struct pmu amd_llc_pmu = { .stop = amd_uncore_stop, .read = amd_uncore_read, .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, + .module = THIS_MODULE, }; static struct amd_uncore *amd_uncore_alloc(unsigned int cpu) @@ -452,7 +454,7 @@ static int amd_uncore_cpu_starting(unsigned int cpu) if (amd_uncore_llc) { uncore = *per_cpu_ptr(amd_uncore_llc, cpu); - uncore->id = per_cpu(cpu_llc_id, cpu); + uncore->id = get_llc_id(cpu); uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_llc); *per_cpu_ptr(amd_uncore_llc, cpu) = uncore; @@ -659,12 +661,34 @@ fail_prep: fail_llc: if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) perf_pmu_unregister(&amd_nb_pmu); - if (amd_uncore_llc) - free_percpu(amd_uncore_llc); + free_percpu(amd_uncore_llc); fail_nb: - if (amd_uncore_nb) - free_percpu(amd_uncore_nb); + free_percpu(amd_uncore_nb); return ret; } -device_initcall(amd_uncore_init); + +static void __exit amd_uncore_exit(void) +{ + cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE); + cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING); + cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP); + + if (boot_cpu_has(X86_FEATURE_PERFCTR_LLC)) { + perf_pmu_unregister(&amd_llc_pmu); + free_percpu(amd_uncore_llc); + amd_uncore_llc = NULL; + } + + if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) { + perf_pmu_unregister(&amd_nb_pmu); + free_percpu(amd_uncore_nb); + amd_uncore_nb = NULL; + } +} + +module_init(amd_uncore_init); +module_exit(amd_uncore_exit); + +MODULE_DESCRIPTION("AMD Uncore Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 3092fbf9dbe4..2a57dbed4894 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1087,10 +1087,8 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) * validate an event group (assign == NULL) */ if (!unsched && assign) { - for (i = 0; i < n; i++) { - e = cpuc->event_list[i]; + for (i = 0; i < n; i++) static_call_cond(x86_pmu_commit_scheduling)(cpuc, i, assign[i]); - } } else { for (i = n0; i < n; i++) { e = cpuc->event_list[i]; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index ac6fd2dabf6a..7011e87be6d0 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -5032,9 +5032,9 @@ static ssize_t freeze_on_smi_store(struct device *cdev, x86_pmu.attr_freeze_on_smi = val; - get_online_cpus(); + cpus_read_lock(); on_each_cpu(flip_smm_bit, &val, 1); - put_online_cpus(); + cpus_read_unlock(); done: mutex_unlock(&freeze_on_smi_mutex); @@ -5077,9 +5077,9 @@ static ssize_t set_sysctl_tfa(struct device *cdev, allow_tsx_force_abort = val; - get_online_cpus(); + cpus_read_lock(); on_each_cpu(update_tfa_sched, NULL, 1); - put_online_cpus(); + cpus_read_unlock(); return count; } diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 915847655c06..7f406c14715f 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -62,7 +62,7 @@ static struct pt_cap_desc { PT_CAP(single_range_output, 0, CPUID_ECX, BIT(2)), PT_CAP(output_subsys, 0, CPUID_ECX, BIT(3)), PT_CAP(payloads_lip, 0, CPUID_ECX, BIT(31)), - PT_CAP(num_address_ranges, 1, CPUID_EAX, 0x3), + PT_CAP(num_address_ranges, 1, CPUID_EAX, 0x7), PT_CAP(mtc_periods, 1, CPUID_EAX, 0xffff0000), PT_CAP(cycle_thresholds, 1, CPUID_EBX, 0xffff), PT_CAP(psb_periods, 1, CPUID_EBX, 0xffff0000), @@ -1708,7 +1708,7 @@ static __init int pt_init(void) if (!boot_cpu_has(X86_FEATURE_INTEL_PT)) return -ENODEV; - get_online_cpus(); + cpus_read_lock(); for_each_online_cpu(cpu) { u64 ctl; @@ -1716,7 +1716,7 @@ static __init int pt_init(void) if (!ret && (ctl & RTIT_CTL_TRACEEN)) prior_warn++; } - put_online_cpus(); + cpus_read_unlock(); if (prior_warn) { x86_add_exclusive(x86_lbr_exclusive_pt); diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 9bf4dbbc26e2..c72e368dd164 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -842,6 +842,18 @@ static const struct attribute_group uncore_pmu_attr_group = { .attrs = uncore_pmu_attrs, }; +void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu) +{ + struct intel_uncore_type *type = pmu->type; + + if (type->num_boxes == 1) + sprintf(pmu_name, "uncore_type_%u", type->type_id); + else { + sprintf(pmu_name, "uncore_type_%u_%d", + type->type_id, type->box_ids[pmu->pmu_idx]); + } +} + static void uncore_get_pmu_name(struct intel_uncore_pmu *pmu) { struct intel_uncore_type *type = pmu->type; @@ -851,12 +863,7 @@ static void uncore_get_pmu_name(struct intel_uncore_pmu *pmu) * Use uncore_type_&typeid_&boxid as name. */ if (!type->name) { - if (type->num_boxes == 1) - sprintf(pmu->name, "uncore_type_%u", type->type_id); - else { - sprintf(pmu->name, "uncore_type_%u_%d", - type->type_id, type->box_ids[pmu->pmu_idx]); - } + uncore_get_alias_name(pmu->name, pmu); return; } @@ -865,9 +872,13 @@ static void uncore_get_pmu_name(struct intel_uncore_pmu *pmu) sprintf(pmu->name, "uncore_%s", type->name); else sprintf(pmu->name, "uncore"); - } else - sprintf(pmu->name, "uncore_%s_%d", type->name, pmu->pmu_idx); - + } else { + /* + * Use the box ID from the discovery table if applicable. + */ + sprintf(pmu->name, "uncore_%s_%d", type->name, + type->box_ids ? type->box_ids[pmu->pmu_idx] : pmu->pmu_idx); + } } static int uncore_pmu_register(struct intel_uncore_pmu *pmu) @@ -1663,6 +1674,7 @@ struct intel_uncore_init_fun { void (*cpu_init)(void); int (*pci_init)(void); void (*mmio_init)(void); + bool use_discovery; }; static const struct intel_uncore_init_fun nhm_uncore_init __initconst = { @@ -1765,6 +1777,13 @@ static const struct intel_uncore_init_fun snr_uncore_init __initconst = { .mmio_init = snr_uncore_mmio_init, }; +static const struct intel_uncore_init_fun spr_uncore_init __initconst = { + .cpu_init = spr_uncore_cpu_init, + .pci_init = spr_uncore_pci_init, + .mmio_init = spr_uncore_mmio_init, + .use_discovery = true, +}; + static const struct intel_uncore_init_fun generic_uncore_init __initconst = { .cpu_init = intel_uncore_generic_uncore_cpu_init, .pci_init = intel_uncore_generic_uncore_pci_init, @@ -1809,6 +1828,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &rkl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &adl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &adl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &spr_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init), {}, }; @@ -1832,8 +1852,13 @@ static int __init intel_uncore_init(void) uncore_init = (struct intel_uncore_init_fun *)&generic_uncore_init; else return -ENODEV; - } else + } else { uncore_init = (struct intel_uncore_init_fun *)id->driver_data; + if (uncore_no_discover && uncore_init->use_discovery) + return -ENODEV; + if (uncore_init->use_discovery && !intel_uncore_has_discovery_tables()) + return -ENODEV; + } if (uncore_init->pci_init) { pret = uncore_init->pci_init(); diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index 187d7287039c..b9687980aab6 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -561,6 +561,7 @@ struct event_constraint * uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event); void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event); u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx); +void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu); extern struct intel_uncore_type *empty_uncore[]; extern struct intel_uncore_type **uncore_msr_uncores; @@ -608,6 +609,9 @@ void snr_uncore_mmio_init(void); int icx_uncore_pci_init(void); void icx_uncore_cpu_init(void); void icx_uncore_mmio_init(void); +int spr_uncore_pci_init(void); +void spr_uncore_cpu_init(void); +void spr_uncore_mmio_init(void); /* uncore_nhmex.c */ void nhmex_uncore_cpu_init(void); diff --git a/arch/x86/events/intel/uncore_discovery.c b/arch/x86/events/intel/uncore_discovery.c index aba9bff95413..3049c646fa20 100644 --- a/arch/x86/events/intel/uncore_discovery.c +++ b/arch/x86/events/intel/uncore_discovery.c @@ -337,17 +337,17 @@ static const struct attribute_group generic_uncore_format_group = { .attrs = generic_uncore_formats_attr, }; -static void intel_generic_uncore_msr_init_box(struct intel_uncore_box *box) +void intel_generic_uncore_msr_init_box(struct intel_uncore_box *box) { wrmsrl(uncore_msr_box_ctl(box), GENERIC_PMON_BOX_CTL_INT); } -static void intel_generic_uncore_msr_disable_box(struct intel_uncore_box *box) +void intel_generic_uncore_msr_disable_box(struct intel_uncore_box *box) { wrmsrl(uncore_msr_box_ctl(box), GENERIC_PMON_BOX_CTL_FRZ); } -static void intel_generic_uncore_msr_enable_box(struct intel_uncore_box *box) +void intel_generic_uncore_msr_enable_box(struct intel_uncore_box *box) { wrmsrl(uncore_msr_box_ctl(box), 0); } @@ -377,7 +377,7 @@ static struct intel_uncore_ops generic_uncore_msr_ops = { .read_counter = uncore_msr_read_counter, }; -static void intel_generic_uncore_pci_init_box(struct intel_uncore_box *box) +void intel_generic_uncore_pci_init_box(struct intel_uncore_box *box) { struct pci_dev *pdev = box->pci_dev; int box_ctl = uncore_pci_box_ctl(box); @@ -386,7 +386,7 @@ static void intel_generic_uncore_pci_init_box(struct intel_uncore_box *box) pci_write_config_dword(pdev, box_ctl, GENERIC_PMON_BOX_CTL_INT); } -static void intel_generic_uncore_pci_disable_box(struct intel_uncore_box *box) +void intel_generic_uncore_pci_disable_box(struct intel_uncore_box *box) { struct pci_dev *pdev = box->pci_dev; int box_ctl = uncore_pci_box_ctl(box); @@ -394,7 +394,7 @@ static void intel_generic_uncore_pci_disable_box(struct intel_uncore_box *box) pci_write_config_dword(pdev, box_ctl, GENERIC_PMON_BOX_CTL_FRZ); } -static void intel_generic_uncore_pci_enable_box(struct intel_uncore_box *box) +void intel_generic_uncore_pci_enable_box(struct intel_uncore_box *box) { struct pci_dev *pdev = box->pci_dev; int box_ctl = uncore_pci_box_ctl(box); @@ -411,8 +411,8 @@ static void intel_generic_uncore_pci_enable_event(struct intel_uncore_box *box, pci_write_config_dword(pdev, hwc->config_base, hwc->config); } -static void intel_generic_uncore_pci_disable_event(struct intel_uncore_box *box, - struct perf_event *event) +void intel_generic_uncore_pci_disable_event(struct intel_uncore_box *box, + struct perf_event *event) { struct pci_dev *pdev = box->pci_dev; struct hw_perf_event *hwc = &event->hw; @@ -420,8 +420,8 @@ static void intel_generic_uncore_pci_disable_event(struct intel_uncore_box *box, pci_write_config_dword(pdev, hwc->config_base, 0); } -static u64 intel_generic_uncore_pci_read_counter(struct intel_uncore_box *box, - struct perf_event *event) +u64 intel_generic_uncore_pci_read_counter(struct intel_uncore_box *box, + struct perf_event *event) { struct pci_dev *pdev = box->pci_dev; struct hw_perf_event *hwc = &event->hw; @@ -454,7 +454,7 @@ static unsigned int generic_uncore_mmio_box_ctl(struct intel_uncore_box *box) return type->box_ctls[box->dieid] + type->mmio_offsets[box->pmu->pmu_idx]; } -static void intel_generic_uncore_mmio_init_box(struct intel_uncore_box *box) +void intel_generic_uncore_mmio_init_box(struct intel_uncore_box *box) { unsigned int box_ctl = generic_uncore_mmio_box_ctl(box); struct intel_uncore_type *type = box->pmu->type; @@ -478,7 +478,7 @@ static void intel_generic_uncore_mmio_init_box(struct intel_uncore_box *box) writel(GENERIC_PMON_BOX_CTL_INT, box->io_addr); } -static void intel_generic_uncore_mmio_disable_box(struct intel_uncore_box *box) +void intel_generic_uncore_mmio_disable_box(struct intel_uncore_box *box) { if (!box->io_addr) return; @@ -486,7 +486,7 @@ static void intel_generic_uncore_mmio_disable_box(struct intel_uncore_box *box) writel(GENERIC_PMON_BOX_CTL_FRZ, box->io_addr); } -static void intel_generic_uncore_mmio_enable_box(struct intel_uncore_box *box) +void intel_generic_uncore_mmio_enable_box(struct intel_uncore_box *box) { if (!box->io_addr) return; @@ -505,8 +505,8 @@ static void intel_generic_uncore_mmio_enable_event(struct intel_uncore_box *box, writel(hwc->config, box->io_addr + hwc->config_base); } -static void intel_generic_uncore_mmio_disable_event(struct intel_uncore_box *box, - struct perf_event *event) +void intel_generic_uncore_mmio_disable_event(struct intel_uncore_box *box, + struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; @@ -568,8 +568,8 @@ static bool uncore_update_uncore_type(enum uncore_access_type type_id, return true; } -static struct intel_uncore_type ** -intel_uncore_generic_init_uncores(enum uncore_access_type type_id) +struct intel_uncore_type ** +intel_uncore_generic_init_uncores(enum uncore_access_type type_id, int num_extra) { struct intel_uncore_discovery_type *type; struct intel_uncore_type **uncores; @@ -577,7 +577,7 @@ intel_uncore_generic_init_uncores(enum uncore_access_type type_id) struct rb_node *node; int i = 0; - uncores = kcalloc(num_discovered_types[type_id] + 1, + uncores = kcalloc(num_discovered_types[type_id] + num_extra + 1, sizeof(struct intel_uncore_type *), GFP_KERNEL); if (!uncores) return empty_uncore; @@ -606,17 +606,17 @@ intel_uncore_generic_init_uncores(enum uncore_access_type type_id) void intel_uncore_generic_uncore_cpu_init(void) { - uncore_msr_uncores = intel_uncore_generic_init_uncores(UNCORE_ACCESS_MSR); + uncore_msr_uncores = intel_uncore_generic_init_uncores(UNCORE_ACCESS_MSR, 0); } int intel_uncore_generic_uncore_pci_init(void) { - uncore_pci_uncores = intel_uncore_generic_init_uncores(UNCORE_ACCESS_PCI); + uncore_pci_uncores = intel_uncore_generic_init_uncores(UNCORE_ACCESS_PCI, 0); return 0; } void intel_uncore_generic_uncore_mmio_init(void) { - uncore_mmio_uncores = intel_uncore_generic_init_uncores(UNCORE_ACCESS_MMIO); + uncore_mmio_uncores = intel_uncore_generic_init_uncores(UNCORE_ACCESS_MMIO, 0); } diff --git a/arch/x86/events/intel/uncore_discovery.h b/arch/x86/events/intel/uncore_discovery.h index 1d652939a01c..7280c8a3c831 100644 --- a/arch/x86/events/intel/uncore_discovery.h +++ b/arch/x86/events/intel/uncore_discovery.h @@ -129,3 +129,24 @@ void intel_uncore_clear_discovery_tables(void); void intel_uncore_generic_uncore_cpu_init(void); int intel_uncore_generic_uncore_pci_init(void); void intel_uncore_generic_uncore_mmio_init(void); + +void intel_generic_uncore_msr_init_box(struct intel_uncore_box *box); +void intel_generic_uncore_msr_disable_box(struct intel_uncore_box *box); +void intel_generic_uncore_msr_enable_box(struct intel_uncore_box *box); + +void intel_generic_uncore_mmio_init_box(struct intel_uncore_box *box); +void intel_generic_uncore_mmio_disable_box(struct intel_uncore_box *box); +void intel_generic_uncore_mmio_enable_box(struct intel_uncore_box *box); +void intel_generic_uncore_mmio_disable_event(struct intel_uncore_box *box, + struct perf_event *event); + +void intel_generic_uncore_pci_init_box(struct intel_uncore_box *box); +void intel_generic_uncore_pci_disable_box(struct intel_uncore_box *box); +void intel_generic_uncore_pci_enable_box(struct intel_uncore_box *box); +void intel_generic_uncore_pci_disable_event(struct intel_uncore_box *box, + struct perf_event *event); +u64 intel_generic_uncore_pci_read_counter(struct intel_uncore_box *box, + struct perf_event *event); + +struct intel_uncore_type ** +intel_uncore_generic_init_uncores(enum uncore_access_type type_id, int num_extra); diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 609c24aec71a..5ddc0f30db6f 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* SandyBridge-EP/IvyTown uncore support */ #include "uncore.h" +#include "uncore_discovery.h" /* SNB-EP pci bus to socket mapping */ #define SNBEP_CPUNODEID 0x40 @@ -454,6 +455,17 @@ #define ICX_NUMBER_IMC_CHN 2 #define ICX_IMC_MEM_STRIDE 0x4 +/* SPR */ +#define SPR_RAW_EVENT_MASK_EXT 0xffffff + +/* SPR CHA */ +#define SPR_CHA_PMON_CTL_TID_EN (1 << 16) +#define SPR_CHA_PMON_EVENT_MASK (SNBEP_PMON_RAW_EVENT_MASK | \ + SPR_CHA_PMON_CTL_TID_EN) +#define SPR_CHA_PMON_BOX_FILTER_TID 0x3ff + +#define SPR_C0_MSR_PMON_BOX_FILTER0 0x200e + DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); DEFINE_UNCORE_FORMAT_ATTR(event2, event, "config:0-6"); DEFINE_UNCORE_FORMAT_ATTR(event_ext, event, "config:0-7,21"); @@ -466,6 +478,7 @@ DEFINE_UNCORE_FORMAT_ATTR(umask_ext4, umask, "config:8-15,32-55"); DEFINE_UNCORE_FORMAT_ATTR(qor, qor, "config:16"); DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19"); +DEFINE_UNCORE_FORMAT_ATTR(tid_en2, tid_en, "config:16"); DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23"); DEFINE_UNCORE_FORMAT_ATTR(thresh9, thresh, "config:24-35"); DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31"); @@ -3838,26 +3851,32 @@ clear_attr_update: return ret; } -static int skx_iio_set_mapping(struct intel_uncore_type *type) -{ - return pmu_iio_set_mapping(type, &skx_iio_mapping_group); -} - -static void skx_iio_cleanup_mapping(struct intel_uncore_type *type) +static void +pmu_iio_cleanup_mapping(struct intel_uncore_type *type, struct attribute_group *ag) { - struct attribute **attr = skx_iio_mapping_group.attrs; + struct attribute **attr = ag->attrs; if (!attr) return; for (; *attr; attr++) kfree((*attr)->name); - kfree(attr_to_ext_attr(*skx_iio_mapping_group.attrs)); - kfree(skx_iio_mapping_group.attrs); - skx_iio_mapping_group.attrs = NULL; + kfree(attr_to_ext_attr(*ag->attrs)); + kfree(ag->attrs); + ag->attrs = NULL; kfree(type->topology); } +static int skx_iio_set_mapping(struct intel_uncore_type *type) +{ + return pmu_iio_set_mapping(type, &skx_iio_mapping_group); +} + +static void skx_iio_cleanup_mapping(struct intel_uncore_type *type) +{ + pmu_iio_cleanup_mapping(type, &skx_iio_mapping_group); +} + static struct intel_uncore_type skx_uncore_iio = { .name = "iio", .num_counters = 4, @@ -4501,6 +4520,11 @@ static int snr_iio_set_mapping(struct intel_uncore_type *type) return pmu_iio_set_mapping(type, &snr_iio_mapping_group); } +static void snr_iio_cleanup_mapping(struct intel_uncore_type *type) +{ + pmu_iio_cleanup_mapping(type, &snr_iio_mapping_group); +} + static struct intel_uncore_type snr_uncore_iio = { .name = "iio", .num_counters = 4, @@ -4517,7 +4541,7 @@ static struct intel_uncore_type snr_uncore_iio = { .attr_update = snr_iio_attr_update, .get_topology = snr_iio_get_topology, .set_mapping = snr_iio_set_mapping, - .cleanup_mapping = skx_iio_cleanup_mapping, + .cleanup_mapping = snr_iio_cleanup_mapping, }; static struct intel_uncore_type snr_uncore_irp = { @@ -4783,13 +4807,15 @@ int snr_uncore_pci_init(void) return 0; } -static struct pci_dev *snr_uncore_get_mc_dev(int id) +#define SNR_MC_DEVICE_ID 0x3451 + +static struct pci_dev *snr_uncore_get_mc_dev(unsigned int device, int id) { struct pci_dev *mc_dev = NULL; int pkg; while (1) { - mc_dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3451, mc_dev); + mc_dev = pci_get_device(PCI_VENDOR_ID_INTEL, device, mc_dev); if (!mc_dev) break; pkg = uncore_pcibus_to_dieid(mc_dev->bus); @@ -4799,19 +4825,20 @@ static struct pci_dev *snr_uncore_get_mc_dev(int id) return mc_dev; } -static void __snr_uncore_mmio_init_box(struct intel_uncore_box *box, - unsigned int box_ctl, int mem_offset) +static int snr_uncore_mmio_map(struct intel_uncore_box *box, + unsigned int box_ctl, int mem_offset, + unsigned int device) { - struct pci_dev *pdev = snr_uncore_get_mc_dev(box->dieid); + struct pci_dev *pdev = snr_uncore_get_mc_dev(device, box->dieid); struct intel_uncore_type *type = box->pmu->type; resource_size_t addr; u32 pci_dword; if (!pdev) - return; + return -ENODEV; pci_read_config_dword(pdev, SNR_IMC_MMIO_BASE_OFFSET, &pci_dword); - addr = (pci_dword & SNR_IMC_MMIO_BASE_MASK) << 23; + addr = ((resource_size_t)pci_dword & SNR_IMC_MMIO_BASE_MASK) << 23; pci_read_config_dword(pdev, mem_offset, &pci_dword); addr |= (pci_dword & SNR_IMC_MMIO_MEM0_MASK) << 12; @@ -4821,16 +4848,25 @@ static void __snr_uncore_mmio_init_box(struct intel_uncore_box *box, box->io_addr = ioremap(addr, type->mmio_map_size); if (!box->io_addr) { pr_warn("perf uncore: Failed to ioremap for %s.\n", type->name); - return; + return -EINVAL; } - writel(IVBEP_PMON_BOX_CTL_INT, box->io_addr); + return 0; +} + +static void __snr_uncore_mmio_init_box(struct intel_uncore_box *box, + unsigned int box_ctl, int mem_offset, + unsigned int device) +{ + if (!snr_uncore_mmio_map(box, box_ctl, mem_offset, device)) + writel(IVBEP_PMON_BOX_CTL_INT, box->io_addr); } static void snr_uncore_mmio_init_box(struct intel_uncore_box *box) { __snr_uncore_mmio_init_box(box, uncore_mmio_box_ctl(box), - SNR_IMC_MMIO_MEM0_OFFSET); + SNR_IMC_MMIO_MEM0_OFFSET, + SNR_MC_DEVICE_ID); } static void snr_uncore_mmio_disable_box(struct intel_uncore_box *box) @@ -5092,6 +5128,11 @@ static int icx_iio_set_mapping(struct intel_uncore_type *type) return pmu_iio_set_mapping(type, &icx_iio_mapping_group); } +static void icx_iio_cleanup_mapping(struct intel_uncore_type *type) +{ + pmu_iio_cleanup_mapping(type, &icx_iio_mapping_group); +} + static struct intel_uncore_type icx_uncore_iio = { .name = "iio", .num_counters = 4, @@ -5109,7 +5150,7 @@ static struct intel_uncore_type icx_uncore_iio = { .attr_update = icx_iio_attr_update, .get_topology = icx_iio_get_topology, .set_mapping = icx_iio_set_mapping, - .cleanup_mapping = skx_iio_cleanup_mapping, + .cleanup_mapping = icx_iio_cleanup_mapping, }; static struct intel_uncore_type icx_uncore_irp = { @@ -5405,7 +5446,8 @@ static void icx_uncore_imc_init_box(struct intel_uncore_box *box) int mem_offset = (box->pmu->pmu_idx / ICX_NUMBER_IMC_CHN) * ICX_IMC_MEM_STRIDE + SNR_IMC_MMIO_MEM0_OFFSET; - __snr_uncore_mmio_init_box(box, box_ctl, mem_offset); + __snr_uncore_mmio_init_box(box, box_ctl, mem_offset, + SNR_MC_DEVICE_ID); } static struct intel_uncore_ops icx_uncore_mmio_ops = { @@ -5475,7 +5517,8 @@ static void icx_uncore_imc_freerunning_init_box(struct intel_uncore_box *box) int mem_offset = box->pmu->pmu_idx * ICX_IMC_MEM_STRIDE + SNR_IMC_MMIO_MEM0_OFFSET; - __snr_uncore_mmio_init_box(box, uncore_mmio_box_ctl(box), mem_offset); + snr_uncore_mmio_map(box, uncore_mmio_box_ctl(box), + mem_offset, SNR_MC_DEVICE_ID); } static struct intel_uncore_ops icx_uncore_imc_freerunning_ops = { @@ -5509,3 +5552,497 @@ void icx_uncore_mmio_init(void) } /* end of ICX uncore support */ + +/* SPR uncore support */ + +static void spr_uncore_msr_enable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + + if (reg1->idx != EXTRA_REG_NONE) + wrmsrl(reg1->reg, reg1->config); + + wrmsrl(hwc->config_base, hwc->config); +} + +static void spr_uncore_msr_disable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + + if (reg1->idx != EXTRA_REG_NONE) + wrmsrl(reg1->reg, 0); + + wrmsrl(hwc->config_base, 0); +} + +static int spr_cha_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + bool tie_en = !!(event->hw.config & SPR_CHA_PMON_CTL_TID_EN); + struct intel_uncore_type *type = box->pmu->type; + + if (tie_en) { + reg1->reg = SPR_C0_MSR_PMON_BOX_FILTER0 + + HSWEP_CBO_MSR_OFFSET * type->box_ids[box->pmu->pmu_idx]; + reg1->config = event->attr.config1 & SPR_CHA_PMON_BOX_FILTER_TID; + reg1->idx = 0; + } + + return 0; +} + +static struct intel_uncore_ops spr_uncore_chabox_ops = { + .init_box = intel_generic_uncore_msr_init_box, + .disable_box = intel_generic_uncore_msr_disable_box, + .enable_box = intel_generic_uncore_msr_enable_box, + .disable_event = spr_uncore_msr_disable_event, + .enable_event = spr_uncore_msr_enable_event, + .read_counter = uncore_msr_read_counter, + .hw_config = spr_cha_hw_config, + .get_constraint = uncore_get_constraint, + .put_constraint = uncore_put_constraint, +}; + +static struct attribute *spr_uncore_cha_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask_ext4.attr, + &format_attr_tid_en2.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + &format_attr_filter_tid5.attr, + NULL, +}; +static const struct attribute_group spr_uncore_chabox_format_group = { + .name = "format", + .attrs = spr_uncore_cha_formats_attr, +}; + +static ssize_t alias_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct intel_uncore_pmu *pmu = dev_to_uncore_pmu(dev); + char pmu_name[UNCORE_PMU_NAME_LEN]; + + uncore_get_alias_name(pmu_name, pmu); + return sysfs_emit(buf, "%s\n", pmu_name); +} + +static DEVICE_ATTR_RO(alias); + +static struct attribute *uncore_alias_attrs[] = { + &dev_attr_alias.attr, + NULL +}; + +ATTRIBUTE_GROUPS(uncore_alias); + +static struct intel_uncore_type spr_uncore_chabox = { + .name = "cha", + .event_mask = SPR_CHA_PMON_EVENT_MASK, + .event_mask_ext = SPR_RAW_EVENT_MASK_EXT, + .num_shared_regs = 1, + .ops = &spr_uncore_chabox_ops, + .format_group = &spr_uncore_chabox_format_group, + .attr_update = uncore_alias_groups, +}; + +static struct intel_uncore_type spr_uncore_iio = { + .name = "iio", + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .event_mask_ext = SNR_IIO_PMON_RAW_EVENT_MASK_EXT, + .format_group = &snr_uncore_iio_format_group, + .attr_update = uncore_alias_groups, +}; + +static struct attribute *spr_uncore_raw_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask_ext4.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + NULL, +}; + +static const struct attribute_group spr_uncore_raw_format_group = { + .name = "format", + .attrs = spr_uncore_raw_formats_attr, +}; + +#define SPR_UNCORE_COMMON_FORMAT() \ + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, \ + .event_mask_ext = SPR_RAW_EVENT_MASK_EXT, \ + .format_group = &spr_uncore_raw_format_group, \ + .attr_update = uncore_alias_groups + +static struct intel_uncore_type spr_uncore_irp = { + SPR_UNCORE_COMMON_FORMAT(), + .name = "irp", + +}; + +static struct intel_uncore_type spr_uncore_m2pcie = { + SPR_UNCORE_COMMON_FORMAT(), + .name = "m2pcie", +}; + +static struct intel_uncore_type spr_uncore_pcu = { + .name = "pcu", + .attr_update = uncore_alias_groups, +}; + +static void spr_uncore_mmio_enable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (!box->io_addr) + return; + + if (uncore_pmc_fixed(hwc->idx)) + writel(SNBEP_PMON_CTL_EN, box->io_addr + hwc->config_base); + else + writel(hwc->config, box->io_addr + hwc->config_base); +} + +static struct intel_uncore_ops spr_uncore_mmio_ops = { + .init_box = intel_generic_uncore_mmio_init_box, + .exit_box = uncore_mmio_exit_box, + .disable_box = intel_generic_uncore_mmio_disable_box, + .enable_box = intel_generic_uncore_mmio_enable_box, + .disable_event = intel_generic_uncore_mmio_disable_event, + .enable_event = spr_uncore_mmio_enable_event, + .read_counter = uncore_mmio_read_counter, +}; + +static struct intel_uncore_type spr_uncore_imc = { + SPR_UNCORE_COMMON_FORMAT(), + .name = "imc", + .fixed_ctr_bits = 48, + .fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR, + .fixed_ctl = SNR_IMC_MMIO_PMON_FIXED_CTL, + .ops = &spr_uncore_mmio_ops, +}; + +static void spr_uncore_pci_enable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + + pci_write_config_dword(pdev, hwc->config_base + 4, (u32)(hwc->config >> 32)); + pci_write_config_dword(pdev, hwc->config_base, (u32)hwc->config); +} + +static struct intel_uncore_ops spr_uncore_pci_ops = { + .init_box = intel_generic_uncore_pci_init_box, + .disable_box = intel_generic_uncore_pci_disable_box, + .enable_box = intel_generic_uncore_pci_enable_box, + .disable_event = intel_generic_uncore_pci_disable_event, + .enable_event = spr_uncore_pci_enable_event, + .read_counter = intel_generic_uncore_pci_read_counter, +}; + +#define SPR_UNCORE_PCI_COMMON_FORMAT() \ + SPR_UNCORE_COMMON_FORMAT(), \ + .ops = &spr_uncore_pci_ops + +static struct intel_uncore_type spr_uncore_m2m = { + SPR_UNCORE_PCI_COMMON_FORMAT(), + .name = "m2m", +}; + +static struct intel_uncore_type spr_uncore_upi = { + SPR_UNCORE_PCI_COMMON_FORMAT(), + .name = "upi", +}; + +static struct intel_uncore_type spr_uncore_m3upi = { + SPR_UNCORE_PCI_COMMON_FORMAT(), + .name = "m3upi", +}; + +static struct intel_uncore_type spr_uncore_mdf = { + SPR_UNCORE_COMMON_FORMAT(), + .name = "mdf", +}; + +#define UNCORE_SPR_NUM_UNCORE_TYPES 12 +#define UNCORE_SPR_IIO 1 +#define UNCORE_SPR_IMC 6 + +static struct intel_uncore_type *spr_uncores[UNCORE_SPR_NUM_UNCORE_TYPES] = { + &spr_uncore_chabox, + &spr_uncore_iio, + &spr_uncore_irp, + &spr_uncore_m2pcie, + &spr_uncore_pcu, + NULL, + &spr_uncore_imc, + &spr_uncore_m2m, + &spr_uncore_upi, + &spr_uncore_m3upi, + NULL, + &spr_uncore_mdf, +}; + +enum perf_uncore_spr_iio_freerunning_type_id { + SPR_IIO_MSR_IOCLK, + SPR_IIO_MSR_BW_IN, + SPR_IIO_MSR_BW_OUT, + + SPR_IIO_FREERUNNING_TYPE_MAX, +}; + +static struct freerunning_counters spr_iio_freerunning[] = { + [SPR_IIO_MSR_IOCLK] = { 0x340e, 0x1, 0x10, 1, 48 }, + [SPR_IIO_MSR_BW_IN] = { 0x3800, 0x1, 0x10, 8, 48 }, + [SPR_IIO_MSR_BW_OUT] = { 0x3808, 0x1, 0x10, 8, 48 }, +}; + +static struct uncore_event_desc spr_uncore_iio_freerunning_events[] = { + /* Free-Running IIO CLOCKS Counter */ + INTEL_UNCORE_EVENT_DESC(ioclk, "event=0xff,umask=0x10"), + /* Free-Running IIO BANDWIDTH IN Counters */ + INTEL_UNCORE_EVENT_DESC(bw_in_port0, "event=0xff,umask=0x20"), + INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1, "event=0xff,umask=0x21"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2, "event=0xff,umask=0x22"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3, "event=0xff,umask=0x23"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port4, "event=0xff,umask=0x24"), + INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port4.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port5, "event=0xff,umask=0x25"), + INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port5.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port6, "event=0xff,umask=0x26"), + INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port6.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port7, "event=0xff,umask=0x27"), + INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port7.unit, "MiB"), + /* Free-Running IIO BANDWIDTH OUT Counters */ + INTEL_UNCORE_EVENT_DESC(bw_out_port0, "event=0xff,umask=0x30"), + INTEL_UNCORE_EVENT_DESC(bw_out_port0.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_out_port0.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_out_port1, "event=0xff,umask=0x31"), + INTEL_UNCORE_EVENT_DESC(bw_out_port1.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_out_port1.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_out_port2, "event=0xff,umask=0x32"), + INTEL_UNCORE_EVENT_DESC(bw_out_port2.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_out_port2.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_out_port3, "event=0xff,umask=0x33"), + INTEL_UNCORE_EVENT_DESC(bw_out_port3.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_out_port3.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_out_port4, "event=0xff,umask=0x34"), + INTEL_UNCORE_EVENT_DESC(bw_out_port4.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_out_port4.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_out_port5, "event=0xff,umask=0x35"), + INTEL_UNCORE_EVENT_DESC(bw_out_port5.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_out_port5.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_out_port6, "event=0xff,umask=0x36"), + INTEL_UNCORE_EVENT_DESC(bw_out_port6.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_out_port6.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_out_port7, "event=0xff,umask=0x37"), + INTEL_UNCORE_EVENT_DESC(bw_out_port7.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_out_port7.unit, "MiB"), + { /* end: all zeroes */ }, +}; + +static struct intel_uncore_type spr_uncore_iio_free_running = { + .name = "iio_free_running", + .num_counters = 17, + .num_freerunning_types = SPR_IIO_FREERUNNING_TYPE_MAX, + .freerunning = spr_iio_freerunning, + .ops = &skx_uncore_iio_freerunning_ops, + .event_descs = spr_uncore_iio_freerunning_events, + .format_group = &skx_uncore_iio_freerunning_format_group, +}; + +enum perf_uncore_spr_imc_freerunning_type_id { + SPR_IMC_DCLK, + SPR_IMC_PQ_CYCLES, + + SPR_IMC_FREERUNNING_TYPE_MAX, +}; + +static struct freerunning_counters spr_imc_freerunning[] = { + [SPR_IMC_DCLK] = { 0x22b0, 0x0, 0, 1, 48 }, + [SPR_IMC_PQ_CYCLES] = { 0x2318, 0x8, 0, 2, 48 }, +}; + +static struct uncore_event_desc spr_uncore_imc_freerunning_events[] = { + INTEL_UNCORE_EVENT_DESC(dclk, "event=0xff,umask=0x10"), + + INTEL_UNCORE_EVENT_DESC(rpq_cycles, "event=0xff,umask=0x20"), + INTEL_UNCORE_EVENT_DESC(wpq_cycles, "event=0xff,umask=0x21"), + { /* end: all zeroes */ }, +}; + +#define SPR_MC_DEVICE_ID 0x3251 + +static void spr_uncore_imc_freerunning_init_box(struct intel_uncore_box *box) +{ + int mem_offset = box->pmu->pmu_idx * ICX_IMC_MEM_STRIDE + SNR_IMC_MMIO_MEM0_OFFSET; + + snr_uncore_mmio_map(box, uncore_mmio_box_ctl(box), + mem_offset, SPR_MC_DEVICE_ID); +} + +static struct intel_uncore_ops spr_uncore_imc_freerunning_ops = { + .init_box = spr_uncore_imc_freerunning_init_box, + .exit_box = uncore_mmio_exit_box, + .read_counter = uncore_mmio_read_counter, + .hw_config = uncore_freerunning_hw_config, +}; + +static struct intel_uncore_type spr_uncore_imc_free_running = { + .name = "imc_free_running", + .num_counters = 3, + .mmio_map_size = SNR_IMC_MMIO_SIZE, + .num_freerunning_types = SPR_IMC_FREERUNNING_TYPE_MAX, + .freerunning = spr_imc_freerunning, + .ops = &spr_uncore_imc_freerunning_ops, + .event_descs = spr_uncore_imc_freerunning_events, + .format_group = &skx_uncore_iio_freerunning_format_group, +}; + +#define UNCORE_SPR_MSR_EXTRA_UNCORES 1 +#define UNCORE_SPR_MMIO_EXTRA_UNCORES 1 + +static struct intel_uncore_type *spr_msr_uncores[UNCORE_SPR_MSR_EXTRA_UNCORES] = { + &spr_uncore_iio_free_running, +}; + +static struct intel_uncore_type *spr_mmio_uncores[UNCORE_SPR_MMIO_EXTRA_UNCORES] = { + &spr_uncore_imc_free_running, +}; + +static void uncore_type_customized_copy(struct intel_uncore_type *to_type, + struct intel_uncore_type *from_type) +{ + if (!to_type || !from_type) + return; + + if (from_type->name) + to_type->name = from_type->name; + if (from_type->fixed_ctr_bits) + to_type->fixed_ctr_bits = from_type->fixed_ctr_bits; + if (from_type->event_mask) + to_type->event_mask = from_type->event_mask; + if (from_type->event_mask_ext) + to_type->event_mask_ext = from_type->event_mask_ext; + if (from_type->fixed_ctr) + to_type->fixed_ctr = from_type->fixed_ctr; + if (from_type->fixed_ctl) + to_type->fixed_ctl = from_type->fixed_ctl; + if (from_type->fixed_ctr_bits) + to_type->fixed_ctr_bits = from_type->fixed_ctr_bits; + if (from_type->num_shared_regs) + to_type->num_shared_regs = from_type->num_shared_regs; + if (from_type->constraints) + to_type->constraints = from_type->constraints; + if (from_type->ops) + to_type->ops = from_type->ops; + if (from_type->event_descs) + to_type->event_descs = from_type->event_descs; + if (from_type->format_group) + to_type->format_group = from_type->format_group; + if (from_type->attr_update) + to_type->attr_update = from_type->attr_update; +} + +static struct intel_uncore_type ** +uncore_get_uncores(enum uncore_access_type type_id, int num_extra, + struct intel_uncore_type **extra) +{ + struct intel_uncore_type **types, **start_types; + int i; + + start_types = types = intel_uncore_generic_init_uncores(type_id, num_extra); + + /* Only copy the customized features */ + for (; *types; types++) { + if ((*types)->type_id >= UNCORE_SPR_NUM_UNCORE_TYPES) + continue; + uncore_type_customized_copy(*types, spr_uncores[(*types)->type_id]); + } + + for (i = 0; i < num_extra; i++, types++) + *types = extra[i]; + + return start_types; +} + +static struct intel_uncore_type * +uncore_find_type_by_id(struct intel_uncore_type **types, int type_id) +{ + for (; *types; types++) { + if (type_id == (*types)->type_id) + return *types; + } + + return NULL; +} + +static int uncore_type_max_boxes(struct intel_uncore_type **types, + int type_id) +{ + struct intel_uncore_type *type; + int i, max = 0; + + type = uncore_find_type_by_id(types, type_id); + if (!type) + return 0; + + for (i = 0; i < type->num_boxes; i++) { + if (type->box_ids[i] > max) + max = type->box_ids[i]; + } + + return max + 1; +} + +void spr_uncore_cpu_init(void) +{ + uncore_msr_uncores = uncore_get_uncores(UNCORE_ACCESS_MSR, + UNCORE_SPR_MSR_EXTRA_UNCORES, + spr_msr_uncores); + + spr_uncore_iio_free_running.num_boxes = uncore_type_max_boxes(uncore_msr_uncores, UNCORE_SPR_IIO); +} + +int spr_uncore_pci_init(void) +{ + uncore_pci_uncores = uncore_get_uncores(UNCORE_ACCESS_PCI, 0, NULL); + return 0; +} + +void spr_uncore_mmio_init(void) +{ + int ret = snbep_pci2phy_map_init(0x3250, SKX_CPUNODEID, SKX_GIDNIDMAP, true); + + if (ret) + uncore_mmio_uncores = uncore_get_uncores(UNCORE_ACCESS_MMIO, 0, NULL); + else { + uncore_mmio_uncores = uncore_get_uncores(UNCORE_ACCESS_MMIO, + UNCORE_SPR_MMIO_EXTRA_UNCORES, + spr_mmio_uncores); + + spr_uncore_imc_free_running.num_boxes = uncore_type_max_boxes(uncore_mmio_uncores, UNCORE_SPR_IMC) / 2; + } +} + +/* end of SPR uncore support */ diff --git a/arch/x86/include/asm/amd-ibs.h b/arch/x86/include/asm/amd-ibs.h new file mode 100644 index 000000000000..46e1df45efc0 --- /dev/null +++ b/arch/x86/include/asm/amd-ibs.h @@ -0,0 +1,132 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * From PPR Vol 1 for AMD Family 19h Model 01h B1 + * 55898 Rev 0.35 - Feb 5, 2021 + */ + +#include <asm/msr-index.h> + +/* + * IBS Hardware MSRs + */ + +/* MSR 0xc0011030: IBS Fetch Control */ +union ibs_fetch_ctl { + __u64 val; + struct { + __u64 fetch_maxcnt:16,/* 0-15: instruction fetch max. count */ + fetch_cnt:16, /* 16-31: instruction fetch count */ + fetch_lat:16, /* 32-47: instruction fetch latency */ + fetch_en:1, /* 48: instruction fetch enable */ + fetch_val:1, /* 49: instruction fetch valid */ + fetch_comp:1, /* 50: instruction fetch complete */ + ic_miss:1, /* 51: i-cache miss */ + phy_addr_valid:1,/* 52: physical address valid */ + l1tlb_pgsz:2, /* 53-54: i-cache L1TLB page size + * (needs IbsPhyAddrValid) */ + l1tlb_miss:1, /* 55: i-cache fetch missed in L1TLB */ + l2tlb_miss:1, /* 56: i-cache fetch missed in L2TLB */ + rand_en:1, /* 57: random tagging enable */ + fetch_l2_miss:1,/* 58: L2 miss for sampled fetch + * (needs IbsFetchComp) */ + reserved:5; /* 59-63: reserved */ + }; +}; + +/* MSR 0xc0011033: IBS Execution Control */ +union ibs_op_ctl { + __u64 val; + struct { + __u64 opmaxcnt:16, /* 0-15: periodic op max. count */ + reserved0:1, /* 16: reserved */ + op_en:1, /* 17: op sampling enable */ + op_val:1, /* 18: op sample valid */ + cnt_ctl:1, /* 19: periodic op counter control */ + opmaxcnt_ext:7, /* 20-26: upper 7 bits of periodic op maximum count */ + reserved1:5, /* 27-31: reserved */ + opcurcnt:27, /* 32-58: periodic op counter current count */ + reserved2:5; /* 59-63: reserved */ + }; +}; + +/* MSR 0xc0011035: IBS Op Data 2 */ +union ibs_op_data { + __u64 val; + struct { + __u64 comp_to_ret_ctr:16, /* 0-15: op completion to retire count */ + tag_to_ret_ctr:16, /* 15-31: op tag to retire count */ + reserved1:2, /* 32-33: reserved */ + op_return:1, /* 34: return op */ + op_brn_taken:1, /* 35: taken branch op */ + op_brn_misp:1, /* 36: mispredicted branch op */ + op_brn_ret:1, /* 37: branch op retired */ + op_rip_invalid:1, /* 38: RIP is invalid */ + op_brn_fuse:1, /* 39: fused branch op */ + op_microcode:1, /* 40: microcode op */ + reserved2:23; /* 41-63: reserved */ + }; +}; + +/* MSR 0xc0011036: IBS Op Data 2 */ +union ibs_op_data2 { + __u64 val; + struct { + __u64 data_src:3, /* 0-2: data source */ + reserved0:1, /* 3: reserved */ + rmt_node:1, /* 4: destination node */ + cache_hit_st:1, /* 5: cache hit state */ + reserved1:57; /* 5-63: reserved */ + }; +}; + +/* MSR 0xc0011037: IBS Op Data 3 */ +union ibs_op_data3 { + __u64 val; + struct { + __u64 ld_op:1, /* 0: load op */ + st_op:1, /* 1: store op */ + dc_l1tlb_miss:1, /* 2: data cache L1TLB miss */ + dc_l2tlb_miss:1, /* 3: data cache L2TLB hit in 2M page */ + dc_l1tlb_hit_2m:1, /* 4: data cache L1TLB hit in 2M page */ + dc_l1tlb_hit_1g:1, /* 5: data cache L1TLB hit in 1G page */ + dc_l2tlb_hit_2m:1, /* 6: data cache L2TLB hit in 2M page */ + dc_miss:1, /* 7: data cache miss */ + dc_mis_acc:1, /* 8: misaligned access */ + reserved:4, /* 9-12: reserved */ + dc_wc_mem_acc:1, /* 13: write combining memory access */ + dc_uc_mem_acc:1, /* 14: uncacheable memory access */ + dc_locked_op:1, /* 15: locked operation */ + dc_miss_no_mab_alloc:1, /* 16: DC miss with no MAB allocated */ + dc_lin_addr_valid:1, /* 17: data cache linear address valid */ + dc_phy_addr_valid:1, /* 18: data cache physical address valid */ + dc_l2_tlb_hit_1g:1, /* 19: data cache L2 hit in 1GB page */ + l2_miss:1, /* 20: L2 cache miss */ + sw_pf:1, /* 21: software prefetch */ + op_mem_width:4, /* 22-25: load/store size in bytes */ + op_dc_miss_open_mem_reqs:6, /* 26-31: outstanding mem reqs on DC fill */ + dc_miss_lat:16, /* 32-47: data cache miss latency */ + tlb_refill_lat:16; /* 48-63: L1 TLB refill latency */ + }; +}; + +/* MSR 0xc001103c: IBS Fetch Control Extended */ +union ic_ibs_extd_ctl { + __u64 val; + struct { + __u64 itlb_refill_lat:16, /* 0-15: ITLB Refill latency for sampled fetch */ + reserved:48; /* 16-63: reserved */ + }; +}; + +/* + * IBS driver related + */ + +struct perf_ibs_data { + u32 size; + union { + u32 data[0]; /* data buffer starts here */ + u32 caps; + }; + u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX]; +}; diff --git a/arch/x86/include/asm/kfence.h b/arch/x86/include/asm/kfence.h index 05b48b33baf0..ff5c7134a37a 100644 --- a/arch/x86/include/asm/kfence.h +++ b/arch/x86/include/asm/kfence.h @@ -8,6 +8,8 @@ #ifndef _ASM_X86_KFENCE_H #define _ASM_X86_KFENCE_H +#ifndef MODULE + #include <linux/bug.h> #include <linux/kfence.h> @@ -66,4 +68,6 @@ static inline bool kfence_protect_page(unsigned long addr, bool protect) return true; } +#endif /* !MODULE */ + #endif /* _ASM_X86_KFENCE_H */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 974cbfb1eefe..af6ce8d4c86a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1038,6 +1038,13 @@ struct kvm_arch { struct list_head lpage_disallowed_mmu_pages; struct kvm_page_track_notifier_node mmu_sp_tracker; struct kvm_page_track_notifier_head track_notifier_head; + /* + * Protects marking pages unsync during page faults, as TDP MMU page + * faults only take mmu_lock for read. For simplicity, the unsync + * pages lock is always taken when marking pages unsync regardless of + * whether mmu_lock is held for read or write. + */ + spinlock_t mmu_unsync_pages_lock; struct list_head assigned_dev_head; struct iommu_domain *iommu_domain; diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 0607ec4f5091..da9321548f6f 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -265,6 +265,7 @@ enum mcp_flags { MCP_TIMESTAMP = BIT(0), /* log time stamp */ MCP_UC = BIT(1), /* log uncorrected errors */ MCP_DONTLOG = BIT(2), /* only clear, don't log */ + MCP_QUEUE_LOG = BIT(3), /* only queue to genpool */ }; bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b); diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 3ad8c6d3cbb3..ec2d5c8c6694 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -252,6 +252,8 @@ DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); DECLARE_STATIC_KEY_FALSE(mds_user_clear); DECLARE_STATIC_KEY_FALSE(mds_idle_clear); +DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); + #include <asm/segment.h> /** diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index f3020c54e2cb..9ad2acaaae9b 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -136,6 +136,8 @@ struct cpuinfo_x86 { u16 logical_die_id; /* Index into per_cpu list: */ u16 cpu_index; + /* Is SMT active on this core? */ + bool smt_active; u32 microcode; /* Address space bits used by the cache internally */ u8 x86_cache_bits; @@ -795,6 +797,8 @@ extern int set_tsc_mode(unsigned int val); DECLARE_PER_CPU(u64, msr_misc_features_shadow); +extern u16 get_llc_id(unsigned int cpu); + #ifdef CONFIG_CPU_SUP_AMD extern u32 amd_get_nodes_per_socket(void); extern u32 amd_get_highest_perf(void); diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index e322676039f4..b00dbc5fac2b 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -184,6 +184,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define V_IGN_TPR_SHIFT 20 #define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT) +#define V_IRQ_INJECTION_BITS_MASK (V_IRQ_MASK | V_INTR_PRIO_MASK | V_IGN_TPR_MASK) + #define V_INTR_MASKING_SHIFT 24 #define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT) diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index de406d93b515..cf132663c219 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -81,7 +81,7 @@ struct thread_info { #define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ #define TIF_SSBD 5 /* Speculative store bypass disable */ #define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */ -#define TIF_SPEC_FORCE_UPDATE 10 /* Force speculation MSR update in context switch */ +#define TIF_SPEC_L1D_FLUSH 10 /* Flush L1D on mm switches (processes) */ #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_UPROBE 12 /* breakpointed or singlestepping */ #define TIF_PATCH_PENDING 13 /* pending live patching update */ @@ -93,6 +93,7 @@ struct thread_info { #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ #define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */ #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ +#define TIF_SPEC_FORCE_UPDATE 23 /* Force speculation MSR update in context switch */ #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ #define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */ #define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ @@ -104,7 +105,7 @@ struct thread_info { #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) #define _TIF_SSBD (1 << TIF_SSBD) #define _TIF_SPEC_IB (1 << TIF_SPEC_IB) -#define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE) +#define _TIF_SPEC_L1D_FLUSH (1 << TIF_SPEC_L1D_FLUSH) #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) #define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING) @@ -115,6 +116,7 @@ struct thread_info { #define _TIF_SLD (1 << TIF_SLD) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) +#define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE) #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) #define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP) #define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index fa952eadbc2e..b587a9ee9cb2 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -83,7 +83,7 @@ struct tlb_state { /* Last user mm for optimizing IBPB */ union { struct mm_struct *last_user_mm; - unsigned long last_user_mm_ibpb; + unsigned long last_user_mm_spec; }; u16 loaded_mm_asid; diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 7846499c54ec..c1bb384935b0 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1986,7 +1986,8 @@ static struct irq_chip ioapic_chip __read_mostly = { .irq_set_affinity = ioapic_set_affinity, .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_get_irqchip_state = ioapic_irq_get_chip_state, - .flags = IRQCHIP_SKIP_SET_WAKE, + .flags = IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_AFFINITY_PRE_STARTUP, }; static struct irq_chip ioapic_ir_chip __read_mostly = { @@ -1999,7 +2000,8 @@ static struct irq_chip ioapic_ir_chip __read_mostly = { .irq_set_affinity = ioapic_set_affinity, .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_get_irqchip_state = ioapic_irq_get_chip_state, - .flags = IRQCHIP_SKIP_SET_WAKE, + .flags = IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_AFFINITY_PRE_STARTUP, }; static inline void init_IO_APIC_traps(void) diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index 44ebe25e7703..dbacb9ec8843 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -58,11 +58,13 @@ msi_set_affinity(struct irq_data *irqd, const struct cpumask *mask, bool force) * The quirk bit is not set in this case. * - The new vector is the same as the old vector * - The old vector is MANAGED_IRQ_SHUTDOWN_VECTOR (interrupt starts up) + * - The interrupt is not yet started up * - The new destination CPU is the same as the old destination CPU */ if (!irqd_msi_nomask_quirk(irqd) || cfg->vector == old_cfg.vector || old_cfg.vector == MANAGED_IRQ_SHUTDOWN_VECTOR || + !irqd_is_started(irqd) || cfg->dest_apicid == old_cfg.dest_apicid) { irq_msi_update_msg(irqd, cfg); return ret; @@ -150,7 +152,8 @@ static struct irq_chip pci_msi_controller = { .irq_ack = irq_chip_ack_parent, .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_set_affinity = msi_set_affinity, - .flags = IRQCHIP_SKIP_SET_WAKE, + .flags = IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_AFFINITY_PRE_STARTUP, }; int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec, @@ -219,7 +222,8 @@ static struct irq_chip pci_msi_ir_controller = { .irq_mask = pci_msi_mask_irq, .irq_ack = irq_chip_ack_parent, .irq_retrigger = irq_chip_retrigger_hierarchy, - .flags = IRQCHIP_SKIP_SET_WAKE, + .flags = IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_AFFINITY_PRE_STARTUP, }; static struct msi_domain_info pci_msi_ir_domain_info = { @@ -273,7 +277,8 @@ static struct irq_chip dmar_msi_controller = { .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_compose_msi_msg = dmar_msi_compose_msg, .irq_write_msi_msg = dmar_msi_write_msg, - .flags = IRQCHIP_SKIP_SET_WAKE, + .flags = IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_AFFINITY_PRE_STARTUP, }; static int dmar_msi_init(struct irq_domain *domain, diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index b7c003013d41..2131af9f2fa2 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -438,7 +438,7 @@ static void srat_detect_node(struct cpuinfo_x86 *c) node = numa_cpu_node(cpu); if (node == NUMA_NO_NODE) - node = per_cpu(cpu_llc_id, cpu); + node = get_llc_id(cpu); /* * On multi-fabric platform (e.g. Numascale NumaChip) a diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d41b70fe4918..ecfca3bbcd96 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -43,6 +43,7 @@ static void __init mds_select_mitigation(void); static void __init mds_print_mitigation(void); static void __init taa_select_mitigation(void); static void __init srbds_select_mitigation(void); +static void __init l1d_flush_select_mitigation(void); /* The base value of the SPEC_CTRL MSR that always has to be preserved. */ u64 x86_spec_ctrl_base; @@ -76,6 +77,13 @@ EXPORT_SYMBOL_GPL(mds_user_clear); DEFINE_STATIC_KEY_FALSE(mds_idle_clear); EXPORT_SYMBOL_GPL(mds_idle_clear); +/* + * Controls whether l1d flush based mitigations are enabled, + * based on hw features and admin setting via boot parameter + * defaults to false + */ +DEFINE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); + void __init check_bugs(void) { identify_boot_cpu(); @@ -111,6 +119,7 @@ void __init check_bugs(void) mds_select_mitigation(); taa_select_mitigation(); srbds_select_mitigation(); + l1d_flush_select_mitigation(); /* * As MDS and TAA mitigations are inter-related, print MDS @@ -492,6 +501,34 @@ static int __init srbds_parse_cmdline(char *str) early_param("srbds", srbds_parse_cmdline); #undef pr_fmt +#define pr_fmt(fmt) "L1D Flush : " fmt + +enum l1d_flush_mitigations { + L1D_FLUSH_OFF = 0, + L1D_FLUSH_ON, +}; + +static enum l1d_flush_mitigations l1d_flush_mitigation __initdata = L1D_FLUSH_OFF; + +static void __init l1d_flush_select_mitigation(void) +{ + if (!l1d_flush_mitigation || !boot_cpu_has(X86_FEATURE_FLUSH_L1D)) + return; + + static_branch_enable(&switch_mm_cond_l1d_flush); + pr_info("Conditional flush on switch_mm() enabled\n"); +} + +static int __init l1d_flush_parse_cmdline(char *str) +{ + if (!strcmp(str, "on")) + l1d_flush_mitigation = L1D_FLUSH_ON; + + return 0; +} +early_param("l1d_flush", l1d_flush_parse_cmdline); + +#undef pr_fmt #define pr_fmt(fmt) "Spectre V1 : " fmt enum spectre_v1_mitigation { @@ -1215,6 +1252,24 @@ static void task_update_spec_tif(struct task_struct *tsk) speculation_ctrl_update_current(); } +static int l1d_flush_prctl_set(struct task_struct *task, unsigned long ctrl) +{ + + if (!static_branch_unlikely(&switch_mm_cond_l1d_flush)) + return -EPERM; + + switch (ctrl) { + case PR_SPEC_ENABLE: + set_ti_thread_flag(&task->thread_info, TIF_SPEC_L1D_FLUSH); + return 0; + case PR_SPEC_DISABLE: + clear_ti_thread_flag(&task->thread_info, TIF_SPEC_L1D_FLUSH); + return 0; + default: + return -ERANGE; + } +} + static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) { if (ssb_mode != SPEC_STORE_BYPASS_PRCTL && @@ -1324,6 +1379,8 @@ int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, return ssb_prctl_set(task, ctrl); case PR_SPEC_INDIRECT_BRANCH: return ib_prctl_set(task, ctrl); + case PR_SPEC_L1D_FLUSH: + return l1d_flush_prctl_set(task, ctrl); default: return -ENODEV; } @@ -1340,6 +1397,17 @@ void arch_seccomp_spec_mitigate(struct task_struct *task) } #endif +static int l1d_flush_prctl_get(struct task_struct *task) +{ + if (!static_branch_unlikely(&switch_mm_cond_l1d_flush)) + return PR_SPEC_FORCE_DISABLE; + + if (test_ti_thread_flag(&task->thread_info, TIF_SPEC_L1D_FLUSH)) + return PR_SPEC_PRCTL | PR_SPEC_ENABLE; + else + return PR_SPEC_PRCTL | PR_SPEC_DISABLE; +} + static int ssb_prctl_get(struct task_struct *task) { switch (ssb_mode) { @@ -1390,6 +1458,8 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) return ssb_prctl_get(task); case PR_SPEC_INDIRECT_BRANCH: return ib_prctl_get(task); + case PR_SPEC_L1D_FLUSH: + return l1d_flush_prctl_get(task); default: return -ENODEV; } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 64b805bd6a54..0f8885949e8c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -79,6 +79,12 @@ EXPORT_SYMBOL(smp_num_siblings); /* Last level cache ID of each logical CPU */ DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID; +u16 get_llc_id(unsigned int cpu) +{ + return per_cpu(cpu_llc_id, cpu); +} +EXPORT_SYMBOL_GPL(get_llc_id); + /* correctly size the local cpu masks */ void __init setup_cpu_local_masks(void) { diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 22791aadc085..8cb7816d03b4 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -817,7 +817,10 @@ log_it: if (mca_cfg.dont_log_ce && !mce_usable_address(&m)) goto clear_it; - mce_log(&m); + if (flags & MCP_QUEUE_LOG) + mce_gen_pool_add(&m); + else + mce_log(&m); clear_it: /* @@ -1639,10 +1642,12 @@ static void __mcheck_cpu_init_generic(void) m_fl = MCP_DONTLOG; /* - * Log the machine checks left over from the previous reset. + * Log the machine checks left over from the previous reset. Log them + * only, do not start processing them. That will happen in mcheck_late_init() + * when all consumers have been registered on the notifier chain. */ bitmap_fill(all_banks, MAX_NR_BANKS); - machine_check_poll(MCP_UC | m_fl, &all_banks); + machine_check_poll(MCP_UC | MCP_QUEUE_LOG | m_fl, &all_banks); cr4_set_bits(X86_CR4_MCE); diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c index 4e86d97f9653..0bfc14041bbb 100644 --- a/arch/x86/kernel/cpu/mce/inject.c +++ b/arch/x86/kernel/cpu/mce/inject.c @@ -235,7 +235,7 @@ static void __maybe_unused raise_mce(struct mce *m) unsigned long start; int cpu; - get_online_cpus(); + cpus_read_lock(); cpumask_copy(mce_inject_cpumask, cpu_online_mask); cpumask_clear_cpu(get_cpu(), mce_inject_cpumask); for_each_online_cpu(cpu) { @@ -269,7 +269,7 @@ static void __maybe_unused raise_mce(struct mce *m) } raise_local(); put_cpu(); - put_online_cpus(); + cpus_read_unlock(); } else { preempt_disable(); raise_local(); @@ -529,7 +529,7 @@ static void do_inject(void) cpu = get_nbc_for_node(topology_die_id(cpu)); } - get_online_cpus(); + cpus_read_lock(); if (!cpu_online(cpu)) goto err; @@ -553,7 +553,7 @@ static void do_inject(void) } err: - put_online_cpus(); + cpus_read_unlock(); } diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 6a6318e9590c..efb69be41ab1 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -55,7 +55,7 @@ LIST_HEAD(microcode_cache); * All non cpu-hotplug-callback call sites use: * * - microcode_mutex to synchronize with each other; - * - get/put_online_cpus() to synchronize with + * - cpus_read_lock/unlock() to synchronize with * the cpu-hotplug-callback call sites. * * We guarantee that only a single cpu is being @@ -431,7 +431,7 @@ static ssize_t microcode_write(struct file *file, const char __user *buf, return ret; } - get_online_cpus(); + cpus_read_lock(); mutex_lock(µcode_mutex); if (do_microcode_update(buf, len) == 0) @@ -441,7 +441,7 @@ static ssize_t microcode_write(struct file *file, const char __user *buf, perf_check_microcode(); mutex_unlock(µcode_mutex); - put_online_cpus(); + cpus_read_unlock(); return ret; } @@ -629,7 +629,7 @@ static ssize_t reload_store(struct device *dev, if (val != 1) return size; - get_online_cpus(); + cpus_read_lock(); ret = check_online_cpus(); if (ret) @@ -644,7 +644,7 @@ static ssize_t reload_store(struct device *dev, mutex_unlock(µcode_mutex); put: - put_online_cpus(); + cpus_read_unlock(); if (ret == 0) ret = size; @@ -853,14 +853,14 @@ static int __init microcode_init(void) if (IS_ERR(microcode_pdev)) return PTR_ERR(microcode_pdev); - get_online_cpus(); + cpus_read_lock(); mutex_lock(µcode_mutex); error = subsys_interface_register(&mc_cpu_interface); if (!error) perf_check_microcode(); mutex_unlock(µcode_mutex); - put_online_cpus(); + cpus_read_unlock(); if (error) goto out_pdev; @@ -892,13 +892,13 @@ static int __init microcode_init(void) &cpu_root_microcode_group); out_driver: - get_online_cpus(); + cpus_read_lock(); mutex_lock(µcode_mutex); subsys_interface_unregister(&mc_cpu_interface); mutex_unlock(µcode_mutex); - put_online_cpus(); + cpus_read_unlock(); out_pdev: platform_device_unregister(microcode_pdev); diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c index a76694bffe86..2746cac9d8a9 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.c +++ b/arch/x86/kernel/cpu/mtrr/mtrr.c @@ -336,7 +336,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, replace = -1; /* No CPU hotplug when we change MTRR entries */ - get_online_cpus(); + cpus_read_lock(); /* Search for existing MTRR */ mutex_lock(&mtrr_mutex); @@ -398,7 +398,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, error = i; out: mutex_unlock(&mtrr_mutex); - put_online_cpus(); + cpus_read_unlock(); return error; } @@ -485,7 +485,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) max = num_var_ranges; /* No CPU hotplug when we change MTRR entries */ - get_online_cpus(); + cpus_read_lock(); mutex_lock(&mtrr_mutex); if (reg < 0) { /* Search for existing MTRR */ @@ -520,7 +520,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) error = reg; out: mutex_unlock(&mtrr_mutex); - put_online_cpus(); + cpus_read_unlock(); return error; } diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 23001ae03e82..4b8813bafffd 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -57,128 +57,57 @@ static void mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r); -#define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].domains) +#define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.domains) -struct rdt_resource rdt_resources_all[] = { +struct rdt_hw_resource rdt_resources_all[] = { [RDT_RESOURCE_L3] = { - .rid = RDT_RESOURCE_L3, - .name = "L3", - .domains = domain_init(RDT_RESOURCE_L3), - .msr_base = MSR_IA32_L3_CBM_BASE, - .msr_update = cat_wrmsr, - .cache_level = 3, - .cache = { - .min_cbm_bits = 1, - .cbm_idx_mult = 1, - .cbm_idx_offset = 0, - }, - .parse_ctrlval = parse_cbm, - .format_str = "%d=%0*x", - .fflags = RFTYPE_RES_CACHE, - }, - [RDT_RESOURCE_L3DATA] = - { - .rid = RDT_RESOURCE_L3DATA, - .name = "L3DATA", - .domains = domain_init(RDT_RESOURCE_L3DATA), - .msr_base = MSR_IA32_L3_CBM_BASE, - .msr_update = cat_wrmsr, - .cache_level = 3, - .cache = { - .min_cbm_bits = 1, - .cbm_idx_mult = 2, - .cbm_idx_offset = 0, + .r_resctrl = { + .rid = RDT_RESOURCE_L3, + .name = "L3", + .cache_level = 3, + .cache = { + .min_cbm_bits = 1, + }, + .domains = domain_init(RDT_RESOURCE_L3), + .parse_ctrlval = parse_cbm, + .format_str = "%d=%0*x", + .fflags = RFTYPE_RES_CACHE, }, - .parse_ctrlval = parse_cbm, - .format_str = "%d=%0*x", - .fflags = RFTYPE_RES_CACHE, - }, - [RDT_RESOURCE_L3CODE] = - { - .rid = RDT_RESOURCE_L3CODE, - .name = "L3CODE", - .domains = domain_init(RDT_RESOURCE_L3CODE), .msr_base = MSR_IA32_L3_CBM_BASE, .msr_update = cat_wrmsr, - .cache_level = 3, - .cache = { - .min_cbm_bits = 1, - .cbm_idx_mult = 2, - .cbm_idx_offset = 1, - }, - .parse_ctrlval = parse_cbm, - .format_str = "%d=%0*x", - .fflags = RFTYPE_RES_CACHE, }, [RDT_RESOURCE_L2] = { - .rid = RDT_RESOURCE_L2, - .name = "L2", - .domains = domain_init(RDT_RESOURCE_L2), - .msr_base = MSR_IA32_L2_CBM_BASE, - .msr_update = cat_wrmsr, - .cache_level = 2, - .cache = { - .min_cbm_bits = 1, - .cbm_idx_mult = 1, - .cbm_idx_offset = 0, + .r_resctrl = { + .rid = RDT_RESOURCE_L2, + .name = "L2", + .cache_level = 2, + .cache = { + .min_cbm_bits = 1, + }, + .domains = domain_init(RDT_RESOURCE_L2), + .parse_ctrlval = parse_cbm, + .format_str = "%d=%0*x", + .fflags = RFTYPE_RES_CACHE, }, - .parse_ctrlval = parse_cbm, - .format_str = "%d=%0*x", - .fflags = RFTYPE_RES_CACHE, - }, - [RDT_RESOURCE_L2DATA] = - { - .rid = RDT_RESOURCE_L2DATA, - .name = "L2DATA", - .domains = domain_init(RDT_RESOURCE_L2DATA), .msr_base = MSR_IA32_L2_CBM_BASE, .msr_update = cat_wrmsr, - .cache_level = 2, - .cache = { - .min_cbm_bits = 1, - .cbm_idx_mult = 2, - .cbm_idx_offset = 0, - }, - .parse_ctrlval = parse_cbm, - .format_str = "%d=%0*x", - .fflags = RFTYPE_RES_CACHE, - }, - [RDT_RESOURCE_L2CODE] = - { - .rid = RDT_RESOURCE_L2CODE, - .name = "L2CODE", - .domains = domain_init(RDT_RESOURCE_L2CODE), - .msr_base = MSR_IA32_L2_CBM_BASE, - .msr_update = cat_wrmsr, - .cache_level = 2, - .cache = { - .min_cbm_bits = 1, - .cbm_idx_mult = 2, - .cbm_idx_offset = 1, - }, - .parse_ctrlval = parse_cbm, - .format_str = "%d=%0*x", - .fflags = RFTYPE_RES_CACHE, }, [RDT_RESOURCE_MBA] = { - .rid = RDT_RESOURCE_MBA, - .name = "MB", - .domains = domain_init(RDT_RESOURCE_MBA), - .cache_level = 3, - .parse_ctrlval = parse_bw, - .format_str = "%d=%*u", - .fflags = RFTYPE_RES_MB, + .r_resctrl = { + .rid = RDT_RESOURCE_MBA, + .name = "MB", + .cache_level = 3, + .domains = domain_init(RDT_RESOURCE_MBA), + .parse_ctrlval = parse_bw, + .format_str = "%d=%*u", + .fflags = RFTYPE_RES_MB, + }, }, }; -static unsigned int cbm_idx(struct rdt_resource *r, unsigned int closid) -{ - return closid * r->cache.cbm_idx_mult + r->cache.cbm_idx_offset; -} - /* * cache_alloc_hsw_probe() - Have to probe for Intel haswell server CPUs * as they do not have CPUID enumeration support for Cache allocation. @@ -199,7 +128,8 @@ static unsigned int cbm_idx(struct rdt_resource *r, unsigned int closid) */ static inline void cache_alloc_hsw_probe(void) { - struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3]; + struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_L3]; + struct rdt_resource *r = &hw_res->r_resctrl; u32 l, h, max_cbm = BIT_MASK(20) - 1; if (wrmsr_safe(MSR_IA32_L3_CBM_BASE, max_cbm, 0)) @@ -211,7 +141,7 @@ static inline void cache_alloc_hsw_probe(void) if (l != max_cbm) return; - r->num_closid = 4; + hw_res->num_closid = 4; r->default_ctrl = max_cbm; r->cache.cbm_len = 20; r->cache.shareable_bits = 0xc0000; @@ -225,7 +155,7 @@ static inline void cache_alloc_hsw_probe(void) bool is_mba_sc(struct rdt_resource *r) { if (!r) - return rdt_resources_all[RDT_RESOURCE_MBA].membw.mba_sc; + return rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl.membw.mba_sc; return r->membw.mba_sc; } @@ -253,12 +183,13 @@ static inline bool rdt_get_mb_table(struct rdt_resource *r) static bool __get_mem_config_intel(struct rdt_resource *r) { + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); union cpuid_0x10_3_eax eax; union cpuid_0x10_x_edx edx; u32 ebx, ecx, max_delay; cpuid_count(0x00000010, 3, &eax.full, &ebx, &ecx, &edx.full); - r->num_closid = edx.split.cos_max + 1; + hw_res->num_closid = edx.split.cos_max + 1; max_delay = eax.split.max_delay + 1; r->default_ctrl = MAX_MBA_BW; r->membw.arch_needs_linear = true; @@ -287,12 +218,13 @@ static bool __get_mem_config_intel(struct rdt_resource *r) static bool __rdt_get_mem_config_amd(struct rdt_resource *r) { + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); union cpuid_0x10_3_eax eax; union cpuid_0x10_x_edx edx; u32 ebx, ecx; cpuid_count(0x80000020, 1, &eax.full, &ebx, &ecx, &edx.full); - r->num_closid = edx.split.cos_max + 1; + hw_res->num_closid = edx.split.cos_max + 1; r->default_ctrl = MAX_MBA_BW_AMD; /* AMD does not use delay */ @@ -317,12 +249,13 @@ static bool __rdt_get_mem_config_amd(struct rdt_resource *r) static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r) { + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); union cpuid_0x10_1_eax eax; union cpuid_0x10_x_edx edx; u32 ebx, ecx; cpuid_count(0x00000010, idx, &eax.full, &ebx, &ecx, &edx.full); - r->num_closid = edx.split.cos_max + 1; + hw_res->num_closid = edx.split.cos_max + 1; r->cache.cbm_len = eax.split.cbm_len + 1; r->default_ctrl = BIT_MASK(eax.split.cbm_len + 1) - 1; r->cache.shareable_bits = ebx & r->default_ctrl; @@ -331,43 +264,35 @@ static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r) r->alloc_enabled = true; } -static void rdt_get_cdp_config(int level, int type) +static void rdt_get_cdp_config(int level) { - struct rdt_resource *r_l = &rdt_resources_all[level]; - struct rdt_resource *r = &rdt_resources_all[type]; - - r->num_closid = r_l->num_closid / 2; - r->cache.cbm_len = r_l->cache.cbm_len; - r->default_ctrl = r_l->default_ctrl; - r->cache.shareable_bits = r_l->cache.shareable_bits; - r->data_width = (r->cache.cbm_len + 3) / 4; - r->alloc_capable = true; /* * By default, CDP is disabled. CDP can be enabled by mount parameter * "cdp" during resctrl file system mount time. */ - r->alloc_enabled = false; + rdt_resources_all[level].cdp_enabled = false; + rdt_resources_all[level].r_resctrl.cdp_capable = true; } static void rdt_get_cdp_l3_config(void) { - rdt_get_cdp_config(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA); - rdt_get_cdp_config(RDT_RESOURCE_L3, RDT_RESOURCE_L3CODE); + rdt_get_cdp_config(RDT_RESOURCE_L3); } static void rdt_get_cdp_l2_config(void) { - rdt_get_cdp_config(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA); - rdt_get_cdp_config(RDT_RESOURCE_L2, RDT_RESOURCE_L2CODE); + rdt_get_cdp_config(RDT_RESOURCE_L2); } static void mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r) { unsigned int i; + struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); for (i = m->low; i < m->high; i++) - wrmsrl(r->msr_base + i, d->ctrl_val[i]); + wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]); } /* @@ -389,19 +314,23 @@ mba_wrmsr_intel(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r) { unsigned int i; + struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); /* Write the delay values for mba. */ for (i = m->low; i < m->high; i++) - wrmsrl(r->msr_base + i, delay_bw_map(d->ctrl_val[i], r)); + wrmsrl(hw_res->msr_base + i, delay_bw_map(hw_dom->ctrl_val[i], r)); } static void cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r) { unsigned int i; + struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); for (i = m->low; i < m->high; i++) - wrmsrl(r->msr_base + cbm_idx(r, i), d->ctrl_val[i]); + wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]); } struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r) @@ -417,16 +346,22 @@ struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r) return NULL; } +u32 resctrl_arch_get_num_closid(struct rdt_resource *r) +{ + return resctrl_to_arch_res(r)->num_closid; +} + void rdt_ctrl_update(void *arg) { struct msr_param *m = arg; + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res); struct rdt_resource *r = m->res; int cpu = smp_processor_id(); struct rdt_domain *d; d = get_domain_from_cpu(cpu, r); if (d) { - r->msr_update(d, m, r); + hw_res->msr_update(d, m, r); return; } pr_warn_once("cpu %d not found in any domain for resource %s\n", @@ -468,6 +403,7 @@ struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id, void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm) { + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); int i; /* @@ -476,7 +412,7 @@ void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm) * For Memory Allocation: Set b/w requested to 100% * and the bandwidth in MBps to U32_MAX */ - for (i = 0; i < r->num_closid; i++, dc++, dm++) { + for (i = 0; i < hw_res->num_closid; i++, dc++, dm++) { *dc = r->default_ctrl; *dm = MBA_MAX_MBPS; } @@ -484,26 +420,30 @@ void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm) static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d) { + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); + struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); struct msr_param m; u32 *dc, *dm; - dc = kmalloc_array(r->num_closid, sizeof(*d->ctrl_val), GFP_KERNEL); + dc = kmalloc_array(hw_res->num_closid, sizeof(*hw_dom->ctrl_val), + GFP_KERNEL); if (!dc) return -ENOMEM; - dm = kmalloc_array(r->num_closid, sizeof(*d->mbps_val), GFP_KERNEL); + dm = kmalloc_array(hw_res->num_closid, sizeof(*hw_dom->mbps_val), + GFP_KERNEL); if (!dm) { kfree(dc); return -ENOMEM; } - d->ctrl_val = dc; - d->mbps_val = dm; + hw_dom->ctrl_val = dc; + hw_dom->mbps_val = dm; setup_default_ctrlval(r, dc, dm); m.low = 0; - m.high = r->num_closid; - r->msr_update(d, &m, r); + m.high = hw_res->num_closid; + hw_res->msr_update(d, &m, r); return 0; } @@ -560,6 +500,7 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) { int id = get_cpu_cacheinfo_id(cpu, r->cache_level); struct list_head *add_pos = NULL; + struct rdt_hw_domain *hw_dom; struct rdt_domain *d; d = rdt_find_domain(r, id, &add_pos); @@ -575,10 +516,11 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) return; } - d = kzalloc_node(sizeof(*d), GFP_KERNEL, cpu_to_node(cpu)); - if (!d) + hw_dom = kzalloc_node(sizeof(*hw_dom), GFP_KERNEL, cpu_to_node(cpu)); + if (!hw_dom) return; + d = &hw_dom->d_resctrl; d->id = id; cpumask_set_cpu(cpu, &d->cpu_mask); @@ -607,6 +549,7 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) static void domain_remove_cpu(int cpu, struct rdt_resource *r) { int id = get_cpu_cacheinfo_id(cpu, r->cache_level); + struct rdt_hw_domain *hw_dom; struct rdt_domain *d; d = rdt_find_domain(r, id, NULL); @@ -614,6 +557,7 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r) pr_warn("Couldn't find cache id for CPU %d\n", cpu); return; } + hw_dom = resctrl_to_arch_dom(d); cpumask_clear_cpu(cpu, &d->cpu_mask); if (cpumask_empty(&d->cpu_mask)) { @@ -646,16 +590,16 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r) if (d->plr) d->plr->d = NULL; - kfree(d->ctrl_val); - kfree(d->mbps_val); + kfree(hw_dom->ctrl_val); + kfree(hw_dom->mbps_val); bitmap_free(d->rmid_busy_llc); kfree(d->mbm_total); kfree(d->mbm_local); - kfree(d); + kfree(hw_dom); return; } - if (r == &rdt_resources_all[RDT_RESOURCE_L3]) { + if (r == &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl) { if (is_mbm_enabled() && cpu == d->mbm_work_cpu) { cancel_delayed_work(&d->mbm_over); mbm_setup_overflow_handler(d, 0); @@ -732,13 +676,8 @@ static int resctrl_offline_cpu(unsigned int cpu) static __init void rdt_init_padding(void) { struct rdt_resource *r; - int cl; for_each_alloc_capable_rdt_resource(r) { - cl = strlen(r->name); - if (cl > max_name_width) - max_name_width = cl; - if (r->data_width > max_data_width) max_data_width = r->data_width; } @@ -827,19 +766,22 @@ static bool __init rdt_cpu_has(int flag) static __init bool get_mem_config(void) { + struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_MBA]; + if (!rdt_cpu_has(X86_FEATURE_MBA)) return false; if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) - return __get_mem_config_intel(&rdt_resources_all[RDT_RESOURCE_MBA]); + return __get_mem_config_intel(&hw_res->r_resctrl); else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) - return __rdt_get_mem_config_amd(&rdt_resources_all[RDT_RESOURCE_MBA]); + return __rdt_get_mem_config_amd(&hw_res->r_resctrl); return false; } static __init bool get_rdt_alloc_resources(void) { + struct rdt_resource *r; bool ret = false; if (rdt_alloc_capable) @@ -849,14 +791,16 @@ static __init bool get_rdt_alloc_resources(void) return false; if (rdt_cpu_has(X86_FEATURE_CAT_L3)) { - rdt_get_cache_alloc_cfg(1, &rdt_resources_all[RDT_RESOURCE_L3]); + r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + rdt_get_cache_alloc_cfg(1, r); if (rdt_cpu_has(X86_FEATURE_CDP_L3)) rdt_get_cdp_l3_config(); ret = true; } if (rdt_cpu_has(X86_FEATURE_CAT_L2)) { /* CPUID 0x10.2 fields are same format at 0x10.1 */ - rdt_get_cache_alloc_cfg(2, &rdt_resources_all[RDT_RESOURCE_L2]); + r = &rdt_resources_all[RDT_RESOURCE_L2].r_resctrl; + rdt_get_cache_alloc_cfg(2, r); if (rdt_cpu_has(X86_FEATURE_CDP_L2)) rdt_get_cdp_l2_config(); ret = true; @@ -870,6 +814,8 @@ static __init bool get_rdt_alloc_resources(void) static __init bool get_rdt_mon_resources(void) { + struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC)) rdt_mon_features |= (1 << QOS_L3_OCCUP_EVENT_ID); if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) @@ -880,7 +826,7 @@ static __init bool get_rdt_mon_resources(void) if (!rdt_mon_features) return false; - return !rdt_get_mon_l3_config(&rdt_resources_all[RDT_RESOURCE_L3]); + return !rdt_get_mon_l3_config(r); } static __init void __check_quirks_intel(void) @@ -918,42 +864,40 @@ static __init bool get_rdt_resources(void) static __init void rdt_init_res_defs_intel(void) { + struct rdt_hw_resource *hw_res; struct rdt_resource *r; for_each_rdt_resource(r) { + hw_res = resctrl_to_arch_res(r); + if (r->rid == RDT_RESOURCE_L3 || - r->rid == RDT_RESOURCE_L3DATA || - r->rid == RDT_RESOURCE_L3CODE || - r->rid == RDT_RESOURCE_L2 || - r->rid == RDT_RESOURCE_L2DATA || - r->rid == RDT_RESOURCE_L2CODE) { + r->rid == RDT_RESOURCE_L2) { r->cache.arch_has_sparse_bitmaps = false; r->cache.arch_has_empty_bitmaps = false; r->cache.arch_has_per_cpu_cfg = false; } else if (r->rid == RDT_RESOURCE_MBA) { - r->msr_base = MSR_IA32_MBA_THRTL_BASE; - r->msr_update = mba_wrmsr_intel; + hw_res->msr_base = MSR_IA32_MBA_THRTL_BASE; + hw_res->msr_update = mba_wrmsr_intel; } } } static __init void rdt_init_res_defs_amd(void) { + struct rdt_hw_resource *hw_res; struct rdt_resource *r; for_each_rdt_resource(r) { + hw_res = resctrl_to_arch_res(r); + if (r->rid == RDT_RESOURCE_L3 || - r->rid == RDT_RESOURCE_L3DATA || - r->rid == RDT_RESOURCE_L3CODE || - r->rid == RDT_RESOURCE_L2 || - r->rid == RDT_RESOURCE_L2DATA || - r->rid == RDT_RESOURCE_L2CODE) { + r->rid == RDT_RESOURCE_L2) { r->cache.arch_has_sparse_bitmaps = true; r->cache.arch_has_empty_bitmaps = true; r->cache.arch_has_per_cpu_cfg = true; } else if (r->rid == RDT_RESOURCE_MBA) { - r->msr_base = MSR_IA32_MBA_BW_BASE; - r->msr_update = mba_wrmsr_amd; + hw_res->msr_base = MSR_IA32_MBA_BW_BASE; + hw_res->msr_update = mba_wrmsr_amd; } } } diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index c877642e8a14..87666275eed9 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -57,20 +57,23 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r) return true; } -int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r, +int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, struct rdt_domain *d) { + struct resctrl_staged_config *cfg; + struct rdt_resource *r = s->res; unsigned long bw_val; - if (d->have_new_ctrl) { + cfg = &d->staged_config[s->conf_type]; + if (cfg->have_new_ctrl) { rdt_last_cmd_printf("Duplicate domain %d\n", d->id); return -EINVAL; } if (!bw_validate(data->buf, &bw_val, r)) return -EINVAL; - d->new_ctrl = bw_val; - d->have_new_ctrl = true; + cfg->new_ctrl = bw_val; + cfg->have_new_ctrl = true; return 0; } @@ -125,13 +128,16 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r) * Read one cache bit mask (hex). Check that it is valid for the current * resource type. */ -int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r, +int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s, struct rdt_domain *d) { struct rdtgroup *rdtgrp = data->rdtgrp; + struct resctrl_staged_config *cfg; + struct rdt_resource *r = s->res; u32 cbm_val; - if (d->have_new_ctrl) { + cfg = &d->staged_config[s->conf_type]; + if (cfg->have_new_ctrl) { rdt_last_cmd_printf("Duplicate domain %d\n", d->id); return -EINVAL; } @@ -160,12 +166,12 @@ int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r, * The CBM may not overlap with the CBM of another closid if * either is exclusive. */ - if (rdtgroup_cbm_overlaps(r, d, cbm_val, rdtgrp->closid, true)) { + if (rdtgroup_cbm_overlaps(s, d, cbm_val, rdtgrp->closid, true)) { rdt_last_cmd_puts("Overlaps with exclusive group\n"); return -EINVAL; } - if (rdtgroup_cbm_overlaps(r, d, cbm_val, rdtgrp->closid, false)) { + if (rdtgroup_cbm_overlaps(s, d, cbm_val, rdtgrp->closid, false)) { if (rdtgrp->mode == RDT_MODE_EXCLUSIVE || rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { rdt_last_cmd_puts("Overlaps with other group\n"); @@ -173,8 +179,8 @@ int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r, } } - d->new_ctrl = cbm_val; - d->have_new_ctrl = true; + cfg->new_ctrl = cbm_val; + cfg->have_new_ctrl = true; return 0; } @@ -185,9 +191,12 @@ int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r, * separated by ";". The "id" is in decimal, and must match one of * the "id"s for this resource. */ -static int parse_line(char *line, struct rdt_resource *r, +static int parse_line(char *line, struct resctrl_schema *s, struct rdtgroup *rdtgrp) { + enum resctrl_conf_type t = s->conf_type; + struct resctrl_staged_config *cfg; + struct rdt_resource *r = s->res; struct rdt_parse_data data; char *dom = NULL, *id; struct rdt_domain *d; @@ -213,9 +222,10 @@ next: if (d->id == dom_id) { data.buf = dom; data.rdtgrp = rdtgrp; - if (r->parse_ctrlval(&data, r, d)) + if (r->parse_ctrlval(&data, s, d)) return -EINVAL; if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { + cfg = &d->staged_config[t]; /* * In pseudo-locking setup mode and just * parsed a valid CBM that should be @@ -224,9 +234,9 @@ next: * the required initialization for single * region and return. */ - rdtgrp->plr->r = r; + rdtgrp->plr->s = s; rdtgrp->plr->d = d; - rdtgrp->plr->cbm = d->new_ctrl; + rdtgrp->plr->cbm = cfg->new_ctrl; d->plr = rdtgrp->plr; return 0; } @@ -236,28 +246,72 @@ next: return -EINVAL; } -int update_domains(struct rdt_resource *r, int closid) +static u32 get_config_index(u32 closid, enum resctrl_conf_type type) { + switch (type) { + default: + case CDP_NONE: + return closid; + case CDP_CODE: + return closid * 2 + 1; + case CDP_DATA: + return closid * 2; + } +} + +static bool apply_config(struct rdt_hw_domain *hw_dom, + struct resctrl_staged_config *cfg, u32 idx, + cpumask_var_t cpu_mask, bool mba_sc) +{ + struct rdt_domain *dom = &hw_dom->d_resctrl; + u32 *dc = !mba_sc ? hw_dom->ctrl_val : hw_dom->mbps_val; + + if (cfg->new_ctrl != dc[idx]) { + cpumask_set_cpu(cpumask_any(&dom->cpu_mask), cpu_mask); + dc[idx] = cfg->new_ctrl; + + return true; + } + + return false; +} + +int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid) +{ + struct resctrl_staged_config *cfg; + struct rdt_hw_domain *hw_dom; struct msr_param msr_param; + enum resctrl_conf_type t; cpumask_var_t cpu_mask; struct rdt_domain *d; bool mba_sc; - u32 *dc; int cpu; + u32 idx; if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) return -ENOMEM; - msr_param.low = closid; - msr_param.high = msr_param.low + 1; - msr_param.res = r; - mba_sc = is_mba_sc(r); + msr_param.res = NULL; list_for_each_entry(d, &r->domains, list) { - dc = !mba_sc ? d->ctrl_val : d->mbps_val; - if (d->have_new_ctrl && d->new_ctrl != dc[closid]) { - cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); - dc[closid] = d->new_ctrl; + hw_dom = resctrl_to_arch_dom(d); + for (t = 0; t < CDP_NUM_TYPES; t++) { + cfg = &hw_dom->d_resctrl.staged_config[t]; + if (!cfg->have_new_ctrl) + continue; + + idx = get_config_index(closid, t); + if (!apply_config(hw_dom, cfg, idx, cpu_mask, mba_sc)) + continue; + + if (!msr_param.res) { + msr_param.low = idx; + msr_param.high = msr_param.low + 1; + msr_param.res = r; + } else { + msr_param.low = min(msr_param.low, idx); + msr_param.high = max(msr_param.high, idx + 1); + } } } @@ -284,11 +338,11 @@ done: static int rdtgroup_parse_resource(char *resname, char *tok, struct rdtgroup *rdtgrp) { - struct rdt_resource *r; + struct resctrl_schema *s; - for_each_alloc_enabled_rdt_resource(r) { - if (!strcmp(resname, r->name) && rdtgrp->closid < r->num_closid) - return parse_line(tok, r, rdtgrp); + list_for_each_entry(s, &resctrl_schema_all, list) { + if (!strcmp(resname, s->name) && rdtgrp->closid < s->num_closid) + return parse_line(tok, s, rdtgrp); } rdt_last_cmd_printf("Unknown or unsupported resource name '%s'\n", resname); return -EINVAL; @@ -297,6 +351,7 @@ static int rdtgroup_parse_resource(char *resname, char *tok, ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { + struct resctrl_schema *s; struct rdtgroup *rdtgrp; struct rdt_domain *dom; struct rdt_resource *r; @@ -327,9 +382,9 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, goto out; } - for_each_alloc_enabled_rdt_resource(r) { - list_for_each_entry(dom, &r->domains, list) - dom->have_new_ctrl = false; + list_for_each_entry(s, &resctrl_schema_all, list) { + list_for_each_entry(dom, &s->res->domains, list) + memset(dom->staged_config, 0, sizeof(dom->staged_config)); } while ((tok = strsep(&buf, "\n")) != NULL) { @@ -349,8 +404,9 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, goto out; } - for_each_alloc_enabled_rdt_resource(r) { - ret = update_domains(r, rdtgrp->closid); + list_for_each_entry(s, &resctrl_schema_all, list) { + r = s->res; + ret = resctrl_arch_update_domains(r, rdtgrp->closid); if (ret) goto out; } @@ -371,19 +427,31 @@ out: return ret ?: nbytes; } -static void show_doms(struct seq_file *s, struct rdt_resource *r, int closid) +u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d, + u32 closid, enum resctrl_conf_type type) +{ + struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); + u32 idx = get_config_index(closid, type); + + if (!is_mba_sc(r)) + return hw_dom->ctrl_val[idx]; + return hw_dom->mbps_val[idx]; +} + +static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int closid) { + struct rdt_resource *r = schema->res; struct rdt_domain *dom; bool sep = false; u32 ctrl_val; - seq_printf(s, "%*s:", max_name_width, r->name); + seq_printf(s, "%*s:", max_name_width, schema->name); list_for_each_entry(dom, &r->domains, list) { if (sep) seq_puts(s, ";"); - ctrl_val = (!is_mba_sc(r) ? dom->ctrl_val[closid] : - dom->mbps_val[closid]); + ctrl_val = resctrl_arch_get_config(r, dom, closid, + schema->conf_type); seq_printf(s, r->format_str, dom->id, max_data_width, ctrl_val); sep = true; @@ -394,16 +462,17 @@ static void show_doms(struct seq_file *s, struct rdt_resource *r, int closid) int rdtgroup_schemata_show(struct kernfs_open_file *of, struct seq_file *s, void *v) { + struct resctrl_schema *schema; struct rdtgroup *rdtgrp; - struct rdt_resource *r; int ret = 0; u32 closid; rdtgrp = rdtgroup_kn_lock_live(of->kn); if (rdtgrp) { if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { - for_each_alloc_enabled_rdt_resource(r) - seq_printf(s, "%s:uninitialized\n", r->name); + list_for_each_entry(schema, &resctrl_schema_all, list) { + seq_printf(s, "%s:uninitialized\n", schema->name); + } } else if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) { if (!rdtgrp->plr->d) { rdt_last_cmd_clear(); @@ -411,15 +480,15 @@ int rdtgroup_schemata_show(struct kernfs_open_file *of, ret = -ENODEV; } else { seq_printf(s, "%s:%d=%x\n", - rdtgrp->plr->r->name, + rdtgrp->plr->s->res->name, rdtgrp->plr->d->id, rdtgrp->plr->cbm); } } else { closid = rdtgrp->closid; - for_each_alloc_enabled_rdt_resource(r) { - if (closid < r->num_closid) - show_doms(s, r, closid); + list_for_each_entry(schema, &resctrl_schema_all, list) { + if (closid < schema->num_closid) + show_doms(s, schema, closid); } } } else { @@ -449,6 +518,7 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, int rdtgroup_mondata_show(struct seq_file *m, void *arg) { struct kernfs_open_file *of = m->private; + struct rdt_hw_resource *hw_res; u32 resid, evtid, domid; struct rdtgroup *rdtgrp; struct rdt_resource *r; @@ -468,7 +538,8 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) domid = md.u.domid; evtid = md.u.evtid; - r = &rdt_resources_all[resid]; + hw_res = &rdt_resources_all[resid]; + r = &hw_res->r_resctrl; d = rdt_find_domain(r, domid, NULL); if (IS_ERR_OR_NULL(d)) { ret = -ENOENT; @@ -482,7 +553,7 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) else if (rr.val & RMID_VAL_UNAVAIL) seq_puts(m, "Unavailable\n"); else - seq_printf(m, "%llu\n", rr.val * r->mon_scale); + seq_printf(m, "%llu\n", rr.val * hw_res->mon_scale); out: rdtgroup_kn_unlock(of->kn); diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 6a5f60a37219..1d647188a43b 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -2,6 +2,7 @@ #ifndef _ASM_X86_RESCTRL_INTERNAL_H #define _ASM_X86_RESCTRL_INTERNAL_H +#include <linux/resctrl.h> #include <linux/sched.h> #include <linux/kernfs.h> #include <linux/fs_context.h> @@ -109,6 +110,7 @@ extern unsigned int resctrl_cqm_threshold; extern bool rdt_alloc_capable; extern bool rdt_mon_capable; extern unsigned int rdt_mon_features; +extern struct list_head resctrl_schema_all; enum rdt_group_type { RDTCTRL_GROUP = 0, @@ -161,8 +163,8 @@ struct mongroup { /** * struct pseudo_lock_region - pseudo-lock region information - * @r: RDT resource to which this pseudo-locked region - * belongs + * @s: Resctrl schema for the resource to which this + * pseudo-locked region belongs * @d: RDT domain to which this pseudo-locked region * belongs * @cbm: bitmask of the pseudo-locked region @@ -182,7 +184,7 @@ struct mongroup { * @pm_reqs: Power management QoS requests related to this region */ struct pseudo_lock_region { - struct rdt_resource *r; + struct resctrl_schema *s; struct rdt_domain *d; u32 cbm; wait_queue_head_t lock_thread_wq; @@ -303,44 +305,25 @@ struct mbm_state { }; /** - * struct rdt_domain - group of cpus sharing an RDT resource - * @list: all instances of this resource - * @id: unique id for this instance - * @cpu_mask: which cpus share this resource - * @rmid_busy_llc: - * bitmap of which limbo RMIDs are above threshold - * @mbm_total: saved state for MBM total bandwidth - * @mbm_local: saved state for MBM local bandwidth - * @mbm_over: worker to periodically read MBM h/w counters - * @cqm_limbo: worker to periodically read CQM h/w counters - * @mbm_work_cpu: - * worker cpu for MBM h/w counters - * @cqm_work_cpu: - * worker cpu for CQM h/w counters + * struct rdt_hw_domain - Arch private attributes of a set of CPUs that share + * a resource + * @d_resctrl: Properties exposed to the resctrl file system * @ctrl_val: array of cache or mem ctrl values (indexed by CLOSID) * @mbps_val: When mba_sc is enabled, this holds the bandwidth in MBps - * @new_ctrl: new ctrl value to be loaded - * @have_new_ctrl: did user provide new_ctrl for this domain - * @plr: pseudo-locked region (if any) associated with domain + * + * Members of this structure are accessed via helpers that provide abstraction. */ -struct rdt_domain { - struct list_head list; - int id; - struct cpumask cpu_mask; - unsigned long *rmid_busy_llc; - struct mbm_state *mbm_total; - struct mbm_state *mbm_local; - struct delayed_work mbm_over; - struct delayed_work cqm_limbo; - int mbm_work_cpu; - int cqm_work_cpu; +struct rdt_hw_domain { + struct rdt_domain d_resctrl; u32 *ctrl_val; u32 *mbps_val; - u32 new_ctrl; - bool have_new_ctrl; - struct pseudo_lock_region *plr; }; +static inline struct rdt_hw_domain *resctrl_to_arch_dom(struct rdt_domain *r) +{ + return container_of(r, struct rdt_hw_domain, d_resctrl); +} + /** * struct msr_param - set a range of MSRs from a domain * @res: The resource to use @@ -349,69 +332,8 @@ struct rdt_domain { */ struct msr_param { struct rdt_resource *res; - int low; - int high; -}; - -/** - * struct rdt_cache - Cache allocation related data - * @cbm_len: Length of the cache bit mask - * @min_cbm_bits: Minimum number of consecutive bits to be set - * @cbm_idx_mult: Multiplier of CBM index - * @cbm_idx_offset: Offset of CBM index. CBM index is computed by: - * closid * cbm_idx_multi + cbm_idx_offset - * in a cache bit mask - * @shareable_bits: Bitmask of shareable resource with other - * executing entities - * @arch_has_sparse_bitmaps: True if a bitmap like f00f is valid. - * @arch_has_empty_bitmaps: True if the '0' bitmap is valid. - * @arch_has_per_cpu_cfg: True if QOS_CFG register for this cache - * level has CPU scope. - */ -struct rdt_cache { - unsigned int cbm_len; - unsigned int min_cbm_bits; - unsigned int cbm_idx_mult; - unsigned int cbm_idx_offset; - unsigned int shareable_bits; - bool arch_has_sparse_bitmaps; - bool arch_has_empty_bitmaps; - bool arch_has_per_cpu_cfg; -}; - -/** - * enum membw_throttle_mode - System's memory bandwidth throttling mode - * @THREAD_THROTTLE_UNDEFINED: Not relevant to the system - * @THREAD_THROTTLE_MAX: Memory bandwidth is throttled at the core - * always using smallest bandwidth percentage - * assigned to threads, aka "max throttling" - * @THREAD_THROTTLE_PER_THREAD: Memory bandwidth is throttled at the thread - */ -enum membw_throttle_mode { - THREAD_THROTTLE_UNDEFINED = 0, - THREAD_THROTTLE_MAX, - THREAD_THROTTLE_PER_THREAD, -}; - -/** - * struct rdt_membw - Memory bandwidth allocation related data - * @min_bw: Minimum memory bandwidth percentage user can request - * @bw_gran: Granularity at which the memory bandwidth is allocated - * @delay_linear: True if memory B/W delay is in linear scale - * @arch_needs_linear: True if we can't configure non-linear resources - * @throttle_mode: Bandwidth throttling mode when threads request - * different memory bandwidths - * @mba_sc: True if MBA software controller(mba_sc) is enabled - * @mb_map: Mapping of memory B/W percentage to memory B/W delay - */ -struct rdt_membw { - u32 min_bw; - u32 bw_gran; - u32 delay_linear; - bool arch_needs_linear; - enum membw_throttle_mode throttle_mode; - bool mba_sc; - u32 *mb_map; + u32 low; + u32 high; }; static inline bool is_llc_occupancy_enabled(void) @@ -446,111 +368,103 @@ struct rdt_parse_data { }; /** - * struct rdt_resource - attributes of an RDT resource - * @rid: The index of the resource - * @alloc_enabled: Is allocation enabled on this machine - * @mon_enabled: Is monitoring enabled for this feature - * @alloc_capable: Is allocation available on this machine - * @mon_capable: Is monitor feature available on this machine - * @name: Name to use in "schemata" file - * @num_closid: Number of CLOSIDs available - * @cache_level: Which cache level defines scope of this resource - * @default_ctrl: Specifies default cache cbm or memory B/W percent. + * struct rdt_hw_resource - arch private attributes of a resctrl resource + * @r_resctrl: Attributes of the resource used directly by resctrl. + * @num_closid: Maximum number of closid this hardware can support, + * regardless of CDP. This is exposed via + * resctrl_arch_get_num_closid() to avoid confusion + * with struct resctrl_schema's property of the same name, + * which has been corrected for features like CDP. * @msr_base: Base MSR address for CBMs * @msr_update: Function pointer to update QOS MSRs - * @data_width: Character width of data when displaying - * @domains: All domains for this resource - * @cache: Cache allocation related data - * @membw: If the component has bandwidth controls, their properties. - * @format_str: Per resource format string to show domain value - * @parse_ctrlval: Per resource function pointer to parse control values - * @evt_list: List of monitoring events - * @num_rmid: Number of RMIDs available * @mon_scale: cqm counter * mon_scale = occupancy in bytes * @mbm_width: Monitor width, to detect and correct for overflow. - * @fflags: flags to choose base and info files + * @cdp_enabled: CDP state of this resource + * + * Members of this structure are either private to the architecture + * e.g. mbm_width, or accessed via helpers that provide abstraction. e.g. + * msr_update and msr_base. */ -struct rdt_resource { - int rid; - bool alloc_enabled; - bool mon_enabled; - bool alloc_capable; - bool mon_capable; - char *name; - int num_closid; - int cache_level; - u32 default_ctrl; +struct rdt_hw_resource { + struct rdt_resource r_resctrl; + u32 num_closid; unsigned int msr_base; void (*msr_update) (struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r); - int data_width; - struct list_head domains; - struct rdt_cache cache; - struct rdt_membw membw; - const char *format_str; - int (*parse_ctrlval)(struct rdt_parse_data *data, - struct rdt_resource *r, - struct rdt_domain *d); - struct list_head evt_list; - int num_rmid; unsigned int mon_scale; unsigned int mbm_width; - unsigned long fflags; + bool cdp_enabled; }; -int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r, +static inline struct rdt_hw_resource *resctrl_to_arch_res(struct rdt_resource *r) +{ + return container_of(r, struct rdt_hw_resource, r_resctrl); +} + +int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s, struct rdt_domain *d); -int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r, +int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, struct rdt_domain *d); extern struct mutex rdtgroup_mutex; -extern struct rdt_resource rdt_resources_all[]; +extern struct rdt_hw_resource rdt_resources_all[]; extern struct rdtgroup rdtgroup_default; DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key); extern struct dentry *debugfs_resctrl; -enum { +enum resctrl_res_level { RDT_RESOURCE_L3, - RDT_RESOURCE_L3DATA, - RDT_RESOURCE_L3CODE, RDT_RESOURCE_L2, - RDT_RESOURCE_L2DATA, - RDT_RESOURCE_L2CODE, RDT_RESOURCE_MBA, /* Must be the last */ RDT_NUM_RESOURCES, }; +static inline struct rdt_resource *resctrl_inc(struct rdt_resource *res) +{ + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(res); + + hw_res++; + return &hw_res->r_resctrl; +} + +static inline bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level l) +{ + return rdt_resources_all[l].cdp_enabled; +} + +int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable); + +/* + * To return the common struct rdt_resource, which is contained in struct + * rdt_hw_resource, walk the resctrl member of struct rdt_hw_resource. + */ #define for_each_rdt_resource(r) \ - for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\ - r++) + for (r = &rdt_resources_all[0].r_resctrl; \ + r <= &rdt_resources_all[RDT_NUM_RESOURCES - 1].r_resctrl; \ + r = resctrl_inc(r)) #define for_each_capable_rdt_resource(r) \ - for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\ - r++) \ + for_each_rdt_resource(r) \ if (r->alloc_capable || r->mon_capable) #define for_each_alloc_capable_rdt_resource(r) \ - for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\ - r++) \ + for_each_rdt_resource(r) \ if (r->alloc_capable) #define for_each_mon_capable_rdt_resource(r) \ - for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\ - r++) \ + for_each_rdt_resource(r) \ if (r->mon_capable) #define for_each_alloc_enabled_rdt_resource(r) \ - for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\ - r++) \ + for_each_rdt_resource(r) \ if (r->alloc_enabled) #define for_each_mon_enabled_rdt_resource(r) \ - for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\ - r++) \ + for_each_rdt_resource(r) \ if (r->mon_enabled) /* CPUID.(EAX=10H, ECX=ResID=1).EAX */ @@ -594,7 +508,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off); int rdtgroup_schemata_show(struct kernfs_open_file *of, struct seq_file *s, void *v); -bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d, +bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d, unsigned long cbm, int closid, bool exclusive); unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_domain *d, unsigned long cbm); @@ -609,7 +523,6 @@ void rdt_pseudo_lock_release(void); int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp); void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp); struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r); -int update_domains(struct rdt_resource *r, int closid); int closids_supported(void); void closid_free(int closid); int alloc_rmid(void); diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index f07c10b87a87..c9f0f3d63f75 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -174,7 +174,7 @@ void __check_limbo(struct rdt_domain *d, bool force_free) struct rdt_resource *r; u32 crmid = 1, nrmid; - r = &rdt_resources_all[RDT_RESOURCE_L3]; + r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; /* * Skip RMID 0 and start from RMID 1 and check all the RMIDs that @@ -232,7 +232,7 @@ static void add_rmid_to_limbo(struct rmid_entry *entry) int cpu; u64 val; - r = &rdt_resources_all[RDT_RESOURCE_L3]; + r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; entry->busy = 0; cpu = get_cpu(); @@ -285,15 +285,15 @@ static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width) return chunks >>= shift; } -static int __mon_event_count(u32 rmid, struct rmid_read *rr) +static u64 __mon_event_count(u32 rmid, struct rmid_read *rr) { + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(rr->r); struct mbm_state *m; u64 chunks, tval; tval = __rmid_read(rmid, rr->evtid); if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) { - rr->val = tval; - return -EINVAL; + return tval; } switch (rr->evtid) { case QOS_L3_OCCUP_EVENT_ID: @@ -307,10 +307,10 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr) break; default: /* - * Code would never reach here because - * an invalid event id would fail the __rmid_read. + * Code would never reach here because an invalid + * event id would fail the __rmid_read. */ - return -EINVAL; + return RMID_VAL_ERROR; } if (rr->first) { @@ -319,7 +319,7 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr) return 0; } - chunks = mbm_overflow_count(m->prev_msr, tval, rr->r->mbm_width); + chunks = mbm_overflow_count(m->prev_msr, tval, hw_res->mbm_width); m->chunks += chunks; m->prev_msr = tval; @@ -334,7 +334,7 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr) */ static void mbm_bw_count(u32 rmid, struct rmid_read *rr) { - struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3]; + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(rr->r); struct mbm_state *m = &rr->d->mbm_local[rmid]; u64 tval, cur_bw, chunks; @@ -342,8 +342,8 @@ static void mbm_bw_count(u32 rmid, struct rmid_read *rr) if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) return; - chunks = mbm_overflow_count(m->prev_bw_msr, tval, rr->r->mbm_width); - cur_bw = (get_corrected_mbm_count(rmid, chunks) * r->mon_scale) >> 20; + chunks = mbm_overflow_count(m->prev_bw_msr, tval, hw_res->mbm_width); + cur_bw = (get_corrected_mbm_count(rmid, chunks) * hw_res->mon_scale) >> 20; if (m->delta_comp) m->delta_bw = abs(cur_bw - m->prev_bw); @@ -361,23 +361,29 @@ void mon_event_count(void *info) struct rdtgroup *rdtgrp, *entry; struct rmid_read *rr = info; struct list_head *head; + u64 ret_val; rdtgrp = rr->rgrp; - if (__mon_event_count(rdtgrp->mon.rmid, rr)) - return; + ret_val = __mon_event_count(rdtgrp->mon.rmid, rr); /* - * For Ctrl groups read data from child monitor groups. + * For Ctrl groups read data from child monitor groups and + * add them together. Count events which are read successfully. + * Discard the rmid_read's reporting errors. */ head = &rdtgrp->mon.crdtgrp_list; if (rdtgrp->type == RDTCTRL_GROUP) { list_for_each_entry(entry, head, mon.crdtgrp_list) { - if (__mon_event_count(entry->mon.rmid, rr)) - return; + if (__mon_event_count(entry->mon.rmid, rr) == 0) + ret_val = 0; } } + + /* Report error if none of rmid_reads are successful */ + if (ret_val) + rr->val = ret_val; } /* @@ -416,6 +422,8 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) { u32 closid, rmid, cur_msr, cur_msr_val, new_msr_val; struct mbm_state *pmbm_data, *cmbm_data; + struct rdt_hw_resource *hw_r_mba; + struct rdt_hw_domain *hw_dom_mba; u32 cur_bw, delta_bw, user_bw; struct rdt_resource *r_mba; struct rdt_domain *dom_mba; @@ -425,7 +433,8 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) if (!is_mbm_local_enabled()) return; - r_mba = &rdt_resources_all[RDT_RESOURCE_MBA]; + hw_r_mba = &rdt_resources_all[RDT_RESOURCE_MBA]; + r_mba = &hw_r_mba->r_resctrl; closid = rgrp->closid; rmid = rgrp->mon.rmid; pmbm_data = &dom_mbm->mbm_local[rmid]; @@ -435,11 +444,16 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) pr_warn_once("Failure to get domain for MBA update\n"); return; } + hw_dom_mba = resctrl_to_arch_dom(dom_mba); cur_bw = pmbm_data->prev_bw; - user_bw = dom_mba->mbps_val[closid]; + user_bw = resctrl_arch_get_config(r_mba, dom_mba, closid, CDP_NONE); delta_bw = pmbm_data->delta_bw; - cur_msr_val = dom_mba->ctrl_val[closid]; + /* + * resctrl_arch_get_config() chooses the mbps/ctrl value to return + * based on is_mba_sc(). For now, reach into the hw_dom. + */ + cur_msr_val = hw_dom_mba->ctrl_val[closid]; /* * For Ctrl groups read data from child monitor groups. @@ -474,9 +488,9 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) return; } - cur_msr = r_mba->msr_base + closid; + cur_msr = hw_r_mba->msr_base + closid; wrmsrl(cur_msr, delay_bw_map(new_msr_val, r_mba)); - dom_mba->ctrl_val[closid] = new_msr_val; + hw_dom_mba->ctrl_val[closid] = new_msr_val; /* * Delta values are updated dynamically package wise for each @@ -538,7 +552,7 @@ void cqm_handle_limbo(struct work_struct *work) mutex_lock(&rdtgroup_mutex); - r = &rdt_resources_all[RDT_RESOURCE_L3]; + r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; d = container_of(work, struct rdt_domain, cqm_limbo.work); __check_limbo(d, false); @@ -574,7 +588,7 @@ void mbm_handle_overflow(struct work_struct *work) if (!static_branch_likely(&rdt_mon_enable_key)) goto out_unlock; - r = &rdt_resources_all[RDT_RESOURCE_L3]; + r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; d = container_of(work, struct rdt_domain, mbm_over.work); list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { @@ -671,15 +685,16 @@ static void l3_mon_evt_init(struct rdt_resource *r) int rdt_get_mon_l3_config(struct rdt_resource *r) { unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset; + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); unsigned int cl_size = boot_cpu_data.x86_cache_size; int ret; - r->mon_scale = boot_cpu_data.x86_cache_occ_scale; + hw_res->mon_scale = boot_cpu_data.x86_cache_occ_scale; r->num_rmid = boot_cpu_data.x86_cache_max_rmid + 1; - r->mbm_width = MBM_CNTR_WIDTH_BASE; + hw_res->mbm_width = MBM_CNTR_WIDTH_BASE; if (mbm_offset > 0 && mbm_offset <= MBM_CNTR_WIDTH_OFFSET_MAX) - r->mbm_width += mbm_offset; + hw_res->mbm_width += mbm_offset; else if (mbm_offset > MBM_CNTR_WIDTH_OFFSET_MAX) pr_warn("Ignoring impossible MBM counter offset\n"); @@ -693,7 +708,7 @@ int rdt_get_mon_l3_config(struct rdt_resource *r) resctrl_cqm_threshold = cl_size * 1024 / r->num_rmid; /* h/w works in units of "boot_cpu_data.x86_cache_occ_scale" */ - resctrl_cqm_threshold /= r->mon_scale; + resctrl_cqm_threshold /= hw_res->mon_scale; ret = dom_data_init(r); if (ret) diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c index 2207916cae65..db813f819ad6 100644 --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c @@ -250,7 +250,7 @@ static void pseudo_lock_region_clear(struct pseudo_lock_region *plr) plr->line_size = 0; kfree(plr->kmem); plr->kmem = NULL; - plr->r = NULL; + plr->s = NULL; if (plr->d) plr->d->plr = NULL; plr->d = NULL; @@ -294,10 +294,10 @@ static int pseudo_lock_region_init(struct pseudo_lock_region *plr) ci = get_cpu_cacheinfo(plr->cpu); - plr->size = rdtgroup_cbm_to_size(plr->r, plr->d, plr->cbm); + plr->size = rdtgroup_cbm_to_size(plr->s->res, plr->d, plr->cbm); for (i = 0; i < ci->num_leaves; i++) { - if (ci->info_list[i].level == plr->r->cache_level) { + if (ci->info_list[i].level == plr->s->res->cache_level) { plr->line_size = ci->info_list[i].coherency_line_size; return 0; } @@ -688,8 +688,8 @@ int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp) * resource, the portion of cache used by it should be made * unavailable to all future allocations from both resources. */ - if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled || - rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled) { + if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3) || + resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2)) { rdt_last_cmd_puts("CDP enabled\n"); return -EINVAL; } @@ -800,7 +800,7 @@ bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm unsigned long cbm_b; if (d->plr) { - cbm_len = d->plr->r->cache.cbm_len; + cbm_len = d->plr->s->res->cache.cbm_len; cbm_b = d->plr->cbm; if (bitmap_intersects(&cbm, &cbm_b, cbm_len)) return true; diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 01fd30e7829d..b57b3db9a6a7 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -39,6 +39,9 @@ static struct kernfs_root *rdt_root; struct rdtgroup rdtgroup_default; LIST_HEAD(rdt_all_groups); +/* list of entries for the schemata file */ +LIST_HEAD(resctrl_schema_all); + /* Kernel fs node for "info" directory under root */ static struct kernfs_node *kn_info; @@ -100,12 +103,12 @@ int closids_supported(void) static void closid_init(void) { - struct rdt_resource *r; - int rdt_min_closid = 32; + struct resctrl_schema *s; + u32 rdt_min_closid = 32; /* Compute rdt_min_closid across all resources */ - for_each_alloc_enabled_rdt_resource(r) - rdt_min_closid = min(rdt_min_closid, r->num_closid); + list_for_each_entry(s, &resctrl_schema_all, list) + rdt_min_closid = min(rdt_min_closid, s->num_closid); closid_free_map = BIT_MASK(rdt_min_closid) - 1; @@ -842,16 +845,17 @@ static int rdt_last_cmd_status_show(struct kernfs_open_file *of, static int rdt_num_closids_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; + struct resctrl_schema *s = of->kn->parent->priv; - seq_printf(seq, "%d\n", r->num_closid); + seq_printf(seq, "%u\n", s->num_closid); return 0; } static int rdt_default_ctrl_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; + struct resctrl_schema *s = of->kn->parent->priv; + struct rdt_resource *r = s->res; seq_printf(seq, "%x\n", r->default_ctrl); return 0; @@ -860,7 +864,8 @@ static int rdt_default_ctrl_show(struct kernfs_open_file *of, static int rdt_min_cbm_bits_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; + struct resctrl_schema *s = of->kn->parent->priv; + struct rdt_resource *r = s->res; seq_printf(seq, "%u\n", r->cache.min_cbm_bits); return 0; @@ -869,7 +874,8 @@ static int rdt_min_cbm_bits_show(struct kernfs_open_file *of, static int rdt_shareable_bits_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; + struct resctrl_schema *s = of->kn->parent->priv; + struct rdt_resource *r = s->res; seq_printf(seq, "%x\n", r->cache.shareable_bits); return 0; @@ -892,38 +898,40 @@ static int rdt_shareable_bits_show(struct kernfs_open_file *of, static int rdt_bit_usage_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; + struct resctrl_schema *s = of->kn->parent->priv; /* * Use unsigned long even though only 32 bits are used to ensure * test_bit() is used safely. */ unsigned long sw_shareable = 0, hw_shareable = 0; unsigned long exclusive = 0, pseudo_locked = 0; + struct rdt_resource *r = s->res; struct rdt_domain *dom; int i, hwb, swb, excl, psl; enum rdtgrp_mode mode; bool sep = false; - u32 *ctrl; + u32 ctrl_val; mutex_lock(&rdtgroup_mutex); hw_shareable = r->cache.shareable_bits; list_for_each_entry(dom, &r->domains, list) { if (sep) seq_putc(seq, ';'); - ctrl = dom->ctrl_val; sw_shareable = 0; exclusive = 0; seq_printf(seq, "%d=", dom->id); - for (i = 0; i < closids_supported(); i++, ctrl++) { + for (i = 0; i < closids_supported(); i++) { if (!closid_allocated(i)) continue; + ctrl_val = resctrl_arch_get_config(r, dom, i, + s->conf_type); mode = rdtgroup_mode_by_closid(i); switch (mode) { case RDT_MODE_SHAREABLE: - sw_shareable |= *ctrl; + sw_shareable |= ctrl_val; break; case RDT_MODE_EXCLUSIVE: - exclusive |= *ctrl; + exclusive |= ctrl_val; break; case RDT_MODE_PSEUDO_LOCKSETUP: /* @@ -970,7 +978,8 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of, static int rdt_min_bw_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; + struct resctrl_schema *s = of->kn->parent->priv; + struct rdt_resource *r = s->res; seq_printf(seq, "%u\n", r->membw.min_bw); return 0; @@ -1001,7 +1010,8 @@ static int rdt_mon_features_show(struct kernfs_open_file *of, static int rdt_bw_gran_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; + struct resctrl_schema *s = of->kn->parent->priv; + struct rdt_resource *r = s->res; seq_printf(seq, "%u\n", r->membw.bw_gran); return 0; @@ -1010,7 +1020,8 @@ static int rdt_bw_gran_show(struct kernfs_open_file *of, static int rdt_delay_linear_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; + struct resctrl_schema *s = of->kn->parent->priv; + struct rdt_resource *r = s->res; seq_printf(seq, "%u\n", r->membw.delay_linear); return 0; @@ -1020,8 +1031,9 @@ static int max_threshold_occ_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { struct rdt_resource *r = of->kn->parent->priv; + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); - seq_printf(seq, "%u\n", resctrl_cqm_threshold * r->mon_scale); + seq_printf(seq, "%u\n", resctrl_cqm_threshold * hw_res->mon_scale); return 0; } @@ -1029,7 +1041,8 @@ static int max_threshold_occ_show(struct kernfs_open_file *of, static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; + struct resctrl_schema *s = of->kn->parent->priv; + struct rdt_resource *r = s->res; if (r->membw.throttle_mode == THREAD_THROTTLE_PER_THREAD) seq_puts(seq, "per-thread\n"); @@ -1042,7 +1055,7 @@ static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of, static ssize_t max_threshold_occ_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { - struct rdt_resource *r = of->kn->parent->priv; + struct rdt_hw_resource *hw_res; unsigned int bytes; int ret; @@ -1053,7 +1066,8 @@ static ssize_t max_threshold_occ_write(struct kernfs_open_file *of, if (bytes > (boot_cpu_data.x86_cache_size * 1024)) return -EINVAL; - resctrl_cqm_threshold = bytes / r->mon_scale; + hw_res = resctrl_to_arch_res(of->kn->parent->priv); + resctrl_cqm_threshold = bytes / hw_res->mon_scale; return nbytes; } @@ -1078,76 +1092,17 @@ static int rdtgroup_mode_show(struct kernfs_open_file *of, return 0; } -/** - * rdt_cdp_peer_get - Retrieve CDP peer if it exists - * @r: RDT resource to which RDT domain @d belongs - * @d: Cache instance for which a CDP peer is requested - * @r_cdp: RDT resource that shares hardware with @r (RDT resource peer) - * Used to return the result. - * @d_cdp: RDT domain that shares hardware with @d (RDT domain peer) - * Used to return the result. - * - * RDT resources are managed independently and by extension the RDT domains - * (RDT resource instances) are managed independently also. The Code and - * Data Prioritization (CDP) RDT resources, while managed independently, - * could refer to the same underlying hardware. For example, - * RDT_RESOURCE_L2CODE and RDT_RESOURCE_L2DATA both refer to the L2 cache. - * - * When provided with an RDT resource @r and an instance of that RDT - * resource @d rdt_cdp_peer_get() will return if there is a peer RDT - * resource and the exact instance that shares the same hardware. - * - * Return: 0 if a CDP peer was found, <0 on error or if no CDP peer exists. - * If a CDP peer was found, @r_cdp will point to the peer RDT resource - * and @d_cdp will point to the peer RDT domain. - */ -static int rdt_cdp_peer_get(struct rdt_resource *r, struct rdt_domain *d, - struct rdt_resource **r_cdp, - struct rdt_domain **d_cdp) +static enum resctrl_conf_type resctrl_peer_type(enum resctrl_conf_type my_type) { - struct rdt_resource *_r_cdp = NULL; - struct rdt_domain *_d_cdp = NULL; - int ret = 0; - - switch (r->rid) { - case RDT_RESOURCE_L3DATA: - _r_cdp = &rdt_resources_all[RDT_RESOURCE_L3CODE]; - break; - case RDT_RESOURCE_L3CODE: - _r_cdp = &rdt_resources_all[RDT_RESOURCE_L3DATA]; - break; - case RDT_RESOURCE_L2DATA: - _r_cdp = &rdt_resources_all[RDT_RESOURCE_L2CODE]; - break; - case RDT_RESOURCE_L2CODE: - _r_cdp = &rdt_resources_all[RDT_RESOURCE_L2DATA]; - break; + switch (my_type) { + case CDP_CODE: + return CDP_DATA; + case CDP_DATA: + return CDP_CODE; default: - ret = -ENOENT; - goto out; - } - - /* - * When a new CPU comes online and CDP is enabled then the new - * RDT domains (if any) associated with both CDP RDT resources - * are added in the same CPU online routine while the - * rdtgroup_mutex is held. It should thus not happen for one - * RDT domain to exist and be associated with its RDT CDP - * resource but there is no RDT domain associated with the - * peer RDT CDP resource. Hence the WARN. - */ - _d_cdp = rdt_find_domain(_r_cdp, d->id, NULL); - if (WARN_ON(IS_ERR_OR_NULL(_d_cdp))) { - _r_cdp = NULL; - _d_cdp = NULL; - ret = -EINVAL; + case CDP_NONE: + return CDP_NONE; } - -out: - *r_cdp = _r_cdp; - *d_cdp = _d_cdp; - - return ret; } /** @@ -1171,11 +1126,11 @@ out: * Return: false if CBM does not overlap, true if it does. */ static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d, - unsigned long cbm, int closid, bool exclusive) + unsigned long cbm, int closid, + enum resctrl_conf_type type, bool exclusive) { enum rdtgrp_mode mode; unsigned long ctrl_b; - u32 *ctrl; int i; /* Check for any overlap with regions used by hardware directly */ @@ -1186,9 +1141,8 @@ static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d } /* Check for overlap with other resource groups */ - ctrl = d->ctrl_val; - for (i = 0; i < closids_supported(); i++, ctrl++) { - ctrl_b = *ctrl; + for (i = 0; i < closids_supported(); i++) { + ctrl_b = resctrl_arch_get_config(r, d, i, type); mode = rdtgroup_mode_by_closid(i); if (closid_allocated(i) && i != closid && mode != RDT_MODE_PSEUDO_LOCKSETUP) { @@ -1208,7 +1162,7 @@ static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d /** * rdtgroup_cbm_overlaps - Does CBM overlap with other use of hardware - * @r: Resource to which domain instance @d belongs. + * @s: Schema for the resource to which domain instance @d belongs. * @d: The domain instance for which @closid is being tested. * @cbm: Capacity bitmask being tested. * @closid: Intended closid for @cbm. @@ -1226,19 +1180,19 @@ static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d * * Return: true if CBM overlap detected, false if there is no overlap */ -bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d, +bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d, unsigned long cbm, int closid, bool exclusive) { - struct rdt_resource *r_cdp; - struct rdt_domain *d_cdp; + enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type); + struct rdt_resource *r = s->res; - if (__rdtgroup_cbm_overlaps(r, d, cbm, closid, exclusive)) + if (__rdtgroup_cbm_overlaps(r, d, cbm, closid, s->conf_type, + exclusive)) return true; - if (rdt_cdp_peer_get(r, d, &r_cdp, &d_cdp) < 0) + if (!resctrl_arch_get_cdp_enabled(r->rid)) return false; - - return __rdtgroup_cbm_overlaps(r_cdp, d_cdp, cbm, closid, exclusive); + return __rdtgroup_cbm_overlaps(r, d, cbm, closid, peer_type, exclusive); } /** @@ -1256,17 +1210,21 @@ bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d, static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp) { int closid = rdtgrp->closid; + struct resctrl_schema *s; struct rdt_resource *r; bool has_cache = false; struct rdt_domain *d; + u32 ctrl; - for_each_alloc_enabled_rdt_resource(r) { + list_for_each_entry(s, &resctrl_schema_all, list) { + r = s->res; if (r->rid == RDT_RESOURCE_MBA) continue; has_cache = true; list_for_each_entry(d, &r->domains, list) { - if (rdtgroup_cbm_overlaps(r, d, d->ctrl_val[closid], - rdtgrp->closid, false)) { + ctrl = resctrl_arch_get_config(r, d, closid, + s->conf_type); + if (rdtgroup_cbm_overlaps(s, d, ctrl, closid, false)) { rdt_last_cmd_puts("Schemata overlaps\n"); return false; } @@ -1397,6 +1355,7 @@ unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, static int rdtgroup_size_show(struct kernfs_open_file *of, struct seq_file *s, void *v) { + struct resctrl_schema *schema; struct rdtgroup *rdtgrp; struct rdt_resource *r; struct rdt_domain *d; @@ -1418,8 +1377,8 @@ static int rdtgroup_size_show(struct kernfs_open_file *of, ret = -ENODEV; } else { seq_printf(s, "%*s:", max_name_width, - rdtgrp->plr->r->name); - size = rdtgroup_cbm_to_size(rdtgrp->plr->r, + rdtgrp->plr->s->name); + size = rdtgroup_cbm_to_size(rdtgrp->plr->s->res, rdtgrp->plr->d, rdtgrp->plr->cbm); seq_printf(s, "%d=%u\n", rdtgrp->plr->d->id, size); @@ -1427,18 +1386,19 @@ static int rdtgroup_size_show(struct kernfs_open_file *of, goto out; } - for_each_alloc_enabled_rdt_resource(r) { + list_for_each_entry(schema, &resctrl_schema_all, list) { + r = schema->res; sep = false; - seq_printf(s, "%*s:", max_name_width, r->name); + seq_printf(s, "%*s:", max_name_width, schema->name); list_for_each_entry(d, &r->domains, list) { if (sep) seq_putc(s, ';'); if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { size = 0; } else { - ctrl = (!is_mba_sc(r) ? - d->ctrl_val[rdtgrp->closid] : - d->mbps_val[rdtgrp->closid]); + ctrl = resctrl_arch_get_config(r, d, + rdtgrp->closid, + schema->conf_type); if (r->rid == RDT_RESOURCE_MBA) size = ctrl; else @@ -1757,14 +1717,14 @@ int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name, return ret; } -static int rdtgroup_mkdir_info_resdir(struct rdt_resource *r, char *name, +static int rdtgroup_mkdir_info_resdir(void *priv, char *name, unsigned long fflags) { struct kernfs_node *kn_subdir; int ret; kn_subdir = kernfs_create_dir(kn_info, name, - kn_info->mode, r); + kn_info->mode, priv); if (IS_ERR(kn_subdir)) return PTR_ERR(kn_subdir); @@ -1781,6 +1741,7 @@ static int rdtgroup_mkdir_info_resdir(struct rdt_resource *r, char *name, static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) { + struct resctrl_schema *s; struct rdt_resource *r; unsigned long fflags; char name[32]; @@ -1795,9 +1756,11 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) if (ret) goto out_destroy; - for_each_alloc_enabled_rdt_resource(r) { + /* loop over enabled controls, these are all alloc_enabled */ + list_for_each_entry(s, &resctrl_schema_all, list) { + r = s->res; fflags = r->fflags | RF_CTRL_INFO; - ret = rdtgroup_mkdir_info_resdir(r, r->name, fflags); + ret = rdtgroup_mkdir_info_resdir(s, s->name, fflags); if (ret) goto out_destroy; } @@ -1867,7 +1830,7 @@ static void l2_qos_cfg_update(void *arg) static inline bool is_mba_linear(void) { - return rdt_resources_all[RDT_RESOURCE_MBA].membw.delay_linear; + return rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl.membw.delay_linear; } static int set_cache_qos_cfg(int level, bool enable) @@ -1888,7 +1851,7 @@ static int set_cache_qos_cfg(int level, bool enable) if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) return -ENOMEM; - r_l = &rdt_resources_all[level]; + r_l = &rdt_resources_all[level].r_resctrl; list_for_each_entry(d, &r_l->domains, list) { if (r_l->cache.arch_has_per_cpu_cfg) /* Pick all the CPUs in the domain instance */ @@ -1914,14 +1877,16 @@ static int set_cache_qos_cfg(int level, bool enable) /* Restore the qos cfg state when a domain comes online */ void rdt_domain_reconfigure_cdp(struct rdt_resource *r) { - if (!r->alloc_capable) + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); + + if (!r->cdp_capable) return; - if (r == &rdt_resources_all[RDT_RESOURCE_L2DATA]) - l2_qos_cfg_update(&r->alloc_enabled); + if (r->rid == RDT_RESOURCE_L2) + l2_qos_cfg_update(&hw_res->cdp_enabled); - if (r == &rdt_resources_all[RDT_RESOURCE_L3DATA]) - l3_qos_cfg_update(&r->alloc_enabled); + if (r->rid == RDT_RESOURCE_L3) + l3_qos_cfg_update(&hw_res->cdp_enabled); } /* @@ -1932,7 +1897,8 @@ void rdt_domain_reconfigure_cdp(struct rdt_resource *r) */ static int set_mba_sc(bool mba_sc) { - struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA]; + struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl; + struct rdt_hw_domain *hw_dom; struct rdt_domain *d; if (!is_mbm_enabled() || !is_mba_linear() || @@ -1940,73 +1906,60 @@ static int set_mba_sc(bool mba_sc) return -EINVAL; r->membw.mba_sc = mba_sc; - list_for_each_entry(d, &r->domains, list) - setup_default_ctrlval(r, d->ctrl_val, d->mbps_val); + list_for_each_entry(d, &r->domains, list) { + hw_dom = resctrl_to_arch_dom(d); + setup_default_ctrlval(r, hw_dom->ctrl_val, hw_dom->mbps_val); + } return 0; } -static int cdp_enable(int level, int data_type, int code_type) +static int cdp_enable(int level) { - struct rdt_resource *r_ldata = &rdt_resources_all[data_type]; - struct rdt_resource *r_lcode = &rdt_resources_all[code_type]; - struct rdt_resource *r_l = &rdt_resources_all[level]; + struct rdt_resource *r_l = &rdt_resources_all[level].r_resctrl; int ret; - if (!r_l->alloc_capable || !r_ldata->alloc_capable || - !r_lcode->alloc_capable) + if (!r_l->alloc_capable) return -EINVAL; ret = set_cache_qos_cfg(level, true); - if (!ret) { - r_l->alloc_enabled = false; - r_ldata->alloc_enabled = true; - r_lcode->alloc_enabled = true; - } + if (!ret) + rdt_resources_all[level].cdp_enabled = true; + return ret; } -static int cdpl3_enable(void) +static void cdp_disable(int level) { - return cdp_enable(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA, - RDT_RESOURCE_L3CODE); -} + struct rdt_hw_resource *r_hw = &rdt_resources_all[level]; -static int cdpl2_enable(void) -{ - return cdp_enable(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA, - RDT_RESOURCE_L2CODE); + if (r_hw->cdp_enabled) { + set_cache_qos_cfg(level, false); + r_hw->cdp_enabled = false; + } } -static void cdp_disable(int level, int data_type, int code_type) +int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable) { - struct rdt_resource *r = &rdt_resources_all[level]; + struct rdt_hw_resource *hw_res = &rdt_resources_all[l]; - r->alloc_enabled = r->alloc_capable; + if (!hw_res->r_resctrl.cdp_capable) + return -EINVAL; - if (rdt_resources_all[data_type].alloc_enabled) { - rdt_resources_all[data_type].alloc_enabled = false; - rdt_resources_all[code_type].alloc_enabled = false; - set_cache_qos_cfg(level, false); - } -} + if (enable) + return cdp_enable(l); -static void cdpl3_disable(void) -{ - cdp_disable(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA, RDT_RESOURCE_L3CODE); -} + cdp_disable(l); -static void cdpl2_disable(void) -{ - cdp_disable(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA, RDT_RESOURCE_L2CODE); + return 0; } static void cdp_disable_all(void) { - if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled) - cdpl3_disable(); - if (rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled) - cdpl2_disable(); + if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3)) + resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false); + if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2)) + resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false); } /* @@ -2084,10 +2037,10 @@ static int rdt_enable_ctx(struct rdt_fs_context *ctx) int ret = 0; if (ctx->enable_cdpl2) - ret = cdpl2_enable(); + ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, true); if (!ret && ctx->enable_cdpl3) - ret = cdpl3_enable(); + ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, true); if (!ret && ctx->enable_mba_mbps) ret = set_mba_sc(true); @@ -2095,6 +2048,92 @@ static int rdt_enable_ctx(struct rdt_fs_context *ctx) return ret; } +static int schemata_list_add(struct rdt_resource *r, enum resctrl_conf_type type) +{ + struct resctrl_schema *s; + const char *suffix = ""; + int ret, cl; + + s = kzalloc(sizeof(*s), GFP_KERNEL); + if (!s) + return -ENOMEM; + + s->res = r; + s->num_closid = resctrl_arch_get_num_closid(r); + if (resctrl_arch_get_cdp_enabled(r->rid)) + s->num_closid /= 2; + + s->conf_type = type; + switch (type) { + case CDP_CODE: + suffix = "CODE"; + break; + case CDP_DATA: + suffix = "DATA"; + break; + case CDP_NONE: + suffix = ""; + break; + } + + ret = snprintf(s->name, sizeof(s->name), "%s%s", r->name, suffix); + if (ret >= sizeof(s->name)) { + kfree(s); + return -EINVAL; + } + + cl = strlen(s->name); + + /* + * If CDP is supported by this resource, but not enabled, + * include the suffix. This ensures the tabular format of the + * schemata file does not change between mounts of the filesystem. + */ + if (r->cdp_capable && !resctrl_arch_get_cdp_enabled(r->rid)) + cl += 4; + + if (cl > max_name_width) + max_name_width = cl; + + INIT_LIST_HEAD(&s->list); + list_add(&s->list, &resctrl_schema_all); + + return 0; +} + +static int schemata_list_create(void) +{ + struct rdt_resource *r; + int ret = 0; + + for_each_alloc_enabled_rdt_resource(r) { + if (resctrl_arch_get_cdp_enabled(r->rid)) { + ret = schemata_list_add(r, CDP_CODE); + if (ret) + break; + + ret = schemata_list_add(r, CDP_DATA); + } else { + ret = schemata_list_add(r, CDP_NONE); + } + + if (ret) + break; + } + + return ret; +} + +static void schemata_list_destroy(void) +{ + struct resctrl_schema *s, *tmp; + + list_for_each_entry_safe(s, tmp, &resctrl_schema_all, list) { + list_del(&s->list); + kfree(s); + } +} + static int rdt_get_tree(struct fs_context *fc) { struct rdt_fs_context *ctx = rdt_fc2context(fc); @@ -2116,11 +2155,17 @@ static int rdt_get_tree(struct fs_context *fc) if (ret < 0) goto out_cdp; + ret = schemata_list_create(); + if (ret) { + schemata_list_destroy(); + goto out_mba; + } + closid_init(); ret = rdtgroup_create_info_dir(rdtgroup_default.kn); if (ret < 0) - goto out_mba; + goto out_schemata_free; if (rdt_mon_capable) { ret = mongroup_create_dir(rdtgroup_default.kn, @@ -2153,7 +2198,7 @@ static int rdt_get_tree(struct fs_context *fc) static_branch_enable_cpuslocked(&rdt_enable_key); if (is_mbm_enabled()) { - r = &rdt_resources_all[RDT_RESOURCE_L3]; + r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; list_for_each_entry(dom, &r->domains, list) mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL); } @@ -2170,6 +2215,8 @@ out_mongrp: kernfs_remove(kn_mongrp); out_info: kernfs_remove(kn_info); +out_schemata_free: + schemata_list_destroy(); out_mba: if (ctx->enable_mba_mbps) set_mba_sc(false); @@ -2257,6 +2304,8 @@ static int rdt_init_fs_context(struct fs_context *fc) static int reset_all_ctrls(struct rdt_resource *r) { + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); + struct rdt_hw_domain *hw_dom; struct msr_param msr_param; cpumask_var_t cpu_mask; struct rdt_domain *d; @@ -2267,7 +2316,7 @@ static int reset_all_ctrls(struct rdt_resource *r) msr_param.res = r; msr_param.low = 0; - msr_param.high = r->num_closid; + msr_param.high = hw_res->num_closid; /* * Disable resource control for this resource by setting all @@ -2275,10 +2324,11 @@ static int reset_all_ctrls(struct rdt_resource *r) * from each domain to update the MSRs below. */ list_for_each_entry(d, &r->domains, list) { + hw_dom = resctrl_to_arch_dom(d); cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); - for (i = 0; i < r->num_closid; i++) - d->ctrl_val[i] = r->default_ctrl; + for (i = 0; i < hw_res->num_closid; i++) + hw_dom->ctrl_val[i] = r->default_ctrl; } cpu = get_cpu(); /* Update CBM on this cpu if it's in cpu_mask. */ @@ -2408,6 +2458,7 @@ static void rdt_kill_sb(struct super_block *sb) rmdir_all_sub(); rdt_pseudo_lock_release(); rdtgroup_default.mode = RDT_MODE_SHAREABLE; + schemata_list_destroy(); static_branch_disable_cpuslocked(&rdt_alloc_enable_key); static_branch_disable_cpuslocked(&rdt_mon_enable_key); static_branch_disable_cpuslocked(&rdt_enable_key); @@ -2642,23 +2693,24 @@ static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r) * Set the RDT domain up to start off with all usable allocations. That is, * all shareable and unused bits. All-zero CBM is invalid. */ -static int __init_one_rdt_domain(struct rdt_domain *d, struct rdt_resource *r, +static int __init_one_rdt_domain(struct rdt_domain *d, struct resctrl_schema *s, u32 closid) { - struct rdt_resource *r_cdp = NULL; - struct rdt_domain *d_cdp = NULL; + enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type); + enum resctrl_conf_type t = s->conf_type; + struct resctrl_staged_config *cfg; + struct rdt_resource *r = s->res; u32 used_b = 0, unused_b = 0; unsigned long tmp_cbm; enum rdtgrp_mode mode; - u32 peer_ctl, *ctrl; + u32 peer_ctl, ctrl_val; int i; - rdt_cdp_peer_get(r, d, &r_cdp, &d_cdp); - d->have_new_ctrl = false; - d->new_ctrl = r->cache.shareable_bits; + cfg = &d->staged_config[t]; + cfg->have_new_ctrl = false; + cfg->new_ctrl = r->cache.shareable_bits; used_b = r->cache.shareable_bits; - ctrl = d->ctrl_val; - for (i = 0; i < closids_supported(); i++, ctrl++) { + for (i = 0; i < closids_supported(); i++) { if (closid_allocated(i) && i != closid) { mode = rdtgroup_mode_by_closid(i); if (mode == RDT_MODE_PSEUDO_LOCKSETUP) @@ -2673,35 +2725,38 @@ static int __init_one_rdt_domain(struct rdt_domain *d, struct rdt_resource *r, * usage to ensure there is no overlap * with an exclusive group. */ - if (d_cdp) - peer_ctl = d_cdp->ctrl_val[i]; + if (resctrl_arch_get_cdp_enabled(r->rid)) + peer_ctl = resctrl_arch_get_config(r, d, i, + peer_type); else peer_ctl = 0; - used_b |= *ctrl | peer_ctl; + ctrl_val = resctrl_arch_get_config(r, d, i, + s->conf_type); + used_b |= ctrl_val | peer_ctl; if (mode == RDT_MODE_SHAREABLE) - d->new_ctrl |= *ctrl | peer_ctl; + cfg->new_ctrl |= ctrl_val | peer_ctl; } } if (d->plr && d->plr->cbm > 0) used_b |= d->plr->cbm; unused_b = used_b ^ (BIT_MASK(r->cache.cbm_len) - 1); unused_b &= BIT_MASK(r->cache.cbm_len) - 1; - d->new_ctrl |= unused_b; + cfg->new_ctrl |= unused_b; /* * Force the initial CBM to be valid, user can * modify the CBM based on system availability. */ - d->new_ctrl = cbm_ensure_valid(d->new_ctrl, r); + cfg->new_ctrl = cbm_ensure_valid(cfg->new_ctrl, r); /* * Assign the u32 CBM to an unsigned long to ensure that * bitmap_weight() does not access out-of-bound memory. */ - tmp_cbm = d->new_ctrl; + tmp_cbm = cfg->new_ctrl; if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < r->cache.min_cbm_bits) { - rdt_last_cmd_printf("No space on %s:%d\n", r->name, d->id); + rdt_last_cmd_printf("No space on %s:%d\n", s->name, d->id); return -ENOSPC; } - d->have_new_ctrl = true; + cfg->have_new_ctrl = true; return 0; } @@ -2716,13 +2771,13 @@ static int __init_one_rdt_domain(struct rdt_domain *d, struct rdt_resource *r, * If there are no more shareable bits available on any domain then * the entire allocation will fail. */ -static int rdtgroup_init_cat(struct rdt_resource *r, u32 closid) +static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid) { struct rdt_domain *d; int ret; - list_for_each_entry(d, &r->domains, list) { - ret = __init_one_rdt_domain(d, r, closid); + list_for_each_entry(d, &s->res->domains, list) { + ret = __init_one_rdt_domain(d, s, closid); if (ret < 0) return ret; } @@ -2733,30 +2788,34 @@ static int rdtgroup_init_cat(struct rdt_resource *r, u32 closid) /* Initialize MBA resource with default values. */ static void rdtgroup_init_mba(struct rdt_resource *r) { + struct resctrl_staged_config *cfg; struct rdt_domain *d; list_for_each_entry(d, &r->domains, list) { - d->new_ctrl = is_mba_sc(r) ? MBA_MAX_MBPS : r->default_ctrl; - d->have_new_ctrl = true; + cfg = &d->staged_config[CDP_NONE]; + cfg->new_ctrl = is_mba_sc(r) ? MBA_MAX_MBPS : r->default_ctrl; + cfg->have_new_ctrl = true; } } /* Initialize the RDT group's allocations. */ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp) { + struct resctrl_schema *s; struct rdt_resource *r; int ret; - for_each_alloc_enabled_rdt_resource(r) { + list_for_each_entry(s, &resctrl_schema_all, list) { + r = s->res; if (r->rid == RDT_RESOURCE_MBA) { rdtgroup_init_mba(r); } else { - ret = rdtgroup_init_cat(r, rdtgrp->closid); + ret = rdtgroup_init_cat(s, rdtgrp->closid); if (ret < 0) return ret; } - ret = update_domains(r, rdtgrp->closid); + ret = resctrl_arch_update_domains(r, rdtgrp->closid); if (ret < 0) { rdt_last_cmd_puts("Failed to initialize allocations\n"); return ret; @@ -3124,13 +3183,13 @@ out: static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf) { - if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled) + if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3)) seq_puts(seq, ",cdp"); - if (rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled) + if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2)) seq_puts(seq, ",cdpl2"); - if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA])) + if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl)) seq_puts(seq, ",mba_MBps"); return 0; diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 08651a4e6aa0..42fc41dd0e1f 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -508,7 +508,7 @@ static struct irq_chip hpet_msi_controller __ro_after_init = { .irq_set_affinity = msi_domain_set_affinity, .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_write_msi_msg = hpet_msi_write_msg, - .flags = IRQCHIP_SKIP_SET_WAKE, + .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_AFFINITY_PRE_STARTUP, }; static int hpet_msi_init(struct irq_domain *domain, diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 9320285a5e29..85f6e242b6b4 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -610,6 +610,9 @@ void set_cpu_sibling_map(int cpu) if (threads > __max_smt_threads) __max_smt_threads = threads; + for_each_cpu(i, topology_sibling_cpumask(cpu)) + cpu_data(i).smt_active = threads > 1; + /* * This needs a separate iteration over the cpus because we rely on all * topology_sibling_cpumask links to be set-up. @@ -1552,8 +1555,13 @@ static void remove_siblinginfo(int cpu) for_each_cpu(sibling, topology_die_cpumask(cpu)) cpumask_clear_cpu(cpu, topology_die_cpumask(sibling)); - for_each_cpu(sibling, topology_sibling_cpumask(cpu)) + + for_each_cpu(sibling, topology_sibling_cpumask(cpu)) { cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling)); + if (cpumask_weight(topology_sibling_cpumask(sibling)) == 1) + cpu_data(sibling).smt_active = false; + } + for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) cpumask_clear_cpu(cpu, cpu_llc_shared_mask(sibling)); cpumask_clear(cpu_llc_shared_mask(cpu)); diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 739be5da3bca..fe03bd978761 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -208,30 +208,6 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) kvm_mmu_after_set_cpuid(vcpu); } -static int is_efer_nx(void) -{ - return host_efer & EFER_NX; -} - -static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu) -{ - int i; - struct kvm_cpuid_entry2 *e, *entry; - - entry = NULL; - for (i = 0; i < vcpu->arch.cpuid_nent; ++i) { - e = &vcpu->arch.cpuid_entries[i]; - if (e->function == 0x80000001) { - entry = e; - break; - } - } - if (entry && cpuid_entry_has(entry, X86_FEATURE_NX) && !is_efer_nx()) { - cpuid_entry_clear(entry, X86_FEATURE_NX); - printk(KERN_INFO "kvm: guest NX capability removed\n"); - } -} - int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; @@ -302,7 +278,6 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, vcpu->arch.cpuid_entries = e2; vcpu->arch.cpuid_nent = cpuid->nent; - cpuid_fix_nx_cap(vcpu); kvm_update_cpuid_runtime(vcpu); kvm_vcpu_after_set_cpuid(vcpu); @@ -401,7 +376,6 @@ static __always_inline void kvm_cpu_cap_mask(enum cpuid_leafs leaf, u32 mask) void kvm_set_cpu_caps(void) { - unsigned int f_nx = is_efer_nx() ? F(NX) : 0; #ifdef CONFIG_X86_64 unsigned int f_gbpages = F(GBPAGES); unsigned int f_lm = F(LM); @@ -515,7 +489,7 @@ void kvm_set_cpu_caps(void) F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) | F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | F(PAT) | F(PSE36) | 0 /* Reserved */ | - f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) | + F(NX) | 0 /* Reserved */ | F(MMXEXT) | F(MMX) | F(FXSR) | F(FXSR_OPT) | f_gbpages | F(RDTSCP) | 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW) ); diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 0b38f944c6b6..41d2a53c5dea 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1933,7 +1933,7 @@ ret_success: void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *entry; - struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); + struct kvm_vcpu_hv *hv_vcpu; entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_INTERFACE, 0); if (entry && entry->eax == HYPERV_CPUID_SIGNATURE_EAX) { diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index c4f4fa23320e..47b765270239 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -2535,6 +2535,7 @@ static void kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn, bool can_unsync) { struct kvm_mmu_page *sp; + bool locked = false; /* * Force write-protection if the page is being tracked. Note, the page @@ -2557,9 +2558,34 @@ int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn, bool can_unsync) if (sp->unsync) continue; + /* + * TDP MMU page faults require an additional spinlock as they + * run with mmu_lock held for read, not write, and the unsync + * logic is not thread safe. Take the spinklock regardless of + * the MMU type to avoid extra conditionals/parameters, there's + * no meaningful penalty if mmu_lock is held for write. + */ + if (!locked) { + locked = true; + spin_lock(&vcpu->kvm->arch.mmu_unsync_pages_lock); + + /* + * Recheck after taking the spinlock, a different vCPU + * may have since marked the page unsync. A false + * positive on the unprotected check above is not + * possible as clearing sp->unsync _must_ hold mmu_lock + * for write, i.e. unsync cannot transition from 0->1 + * while this CPU holds mmu_lock for read (or write). + */ + if (READ_ONCE(sp->unsync)) + continue; + } + WARN_ON(sp->role.level != PG_LEVEL_4K); kvm_unsync_page(vcpu, sp); } + if (locked) + spin_unlock(&vcpu->kvm->arch.mmu_unsync_pages_lock); /* * We need to ensure that the marking of unsync pages is visible @@ -5537,6 +5563,8 @@ void kvm_mmu_init_vm(struct kvm *kvm) { struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker; + spin_lock_init(&kvm->arch.mmu_unsync_pages_lock); + if (!kvm_mmu_init_tdp_mmu(kvm)) /* * No smp_load/store wrappers needed here as we are in diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 0853370bd811..d80cb122b5f3 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -43,6 +43,7 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) if (!kvm->arch.tdp_mmu_enabled) return; + WARN_ON(!list_empty(&kvm->arch.tdp_mmu_pages)); WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots)); /* @@ -81,8 +82,6 @@ static void tdp_mmu_free_sp_rcu_callback(struct rcu_head *head) void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root, bool shared) { - gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT); - kvm_lockdep_assert_mmu_lock_held(kvm, shared); if (!refcount_dec_and_test(&root->tdp_mmu_root_count)) @@ -94,7 +93,7 @@ void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root, list_del_rcu(&root->link); spin_unlock(&kvm->arch.tdp_mmu_pages_lock); - zap_gfn_range(kvm, root, 0, max_gfn, false, false, shared); + zap_gfn_range(kvm, root, 0, -1ull, false, false, shared); call_rcu(&root->rcu_head, tdp_mmu_free_sp_rcu_callback); } @@ -724,13 +723,29 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, gfn_t start, gfn_t end, bool can_yield, bool flush, bool shared) { + gfn_t max_gfn_host = 1ULL << (shadow_phys_bits - PAGE_SHIFT); + bool zap_all = (start == 0 && end >= max_gfn_host); struct tdp_iter iter; + /* + * No need to try to step down in the iterator when zapping all SPTEs, + * zapping the top-level non-leaf SPTEs will recurse on their children. + */ + int min_level = zap_all ? root->role.level : PG_LEVEL_4K; + + /* + * Bound the walk at host.MAXPHYADDR, guest accesses beyond that will + * hit a #PF(RSVD) and never get to an EPT Violation/Misconfig / #NPF, + * and so KVM will never install a SPTE for such addresses. + */ + end = min(end, max_gfn_host); + kvm_lockdep_assert_mmu_lock_held(kvm, shared); rcu_read_lock(); - tdp_root_for_each_pte(iter, root, start, end) { + for_each_tdp_pte_min_level(iter, root->spt, root->role.level, + min_level, start, end) { retry: if (can_yield && tdp_mmu_iter_cond_resched(kvm, &iter, flush, shared)) { @@ -744,9 +759,10 @@ retry: /* * If this is a non-last-level SPTE that covers a larger range * than should be zapped, continue, and zap the mappings at a - * lower level. + * lower level, except when zapping all SPTEs. */ - if ((iter.gfn < start || + if (!zap_all && + (iter.gfn < start || iter.gfn + KVM_PAGES_PER_HPAGE(iter.level) > end) && !is_last_spte(iter.old_spte, iter.level)) continue; @@ -794,12 +810,11 @@ bool __kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, int as_id, gfn_t start, void kvm_tdp_mmu_zap_all(struct kvm *kvm) { - gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT); bool flush = false; int i; for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) - flush = kvm_tdp_mmu_zap_gfn_range(kvm, i, 0, max_gfn, + flush = kvm_tdp_mmu_zap_gfn_range(kvm, i, 0, -1ull, flush, false); if (flush) @@ -838,7 +853,6 @@ static struct kvm_mmu_page *next_invalidated_root(struct kvm *kvm, */ void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm) { - gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT); struct kvm_mmu_page *next_root; struct kvm_mmu_page *root; bool flush = false; @@ -854,8 +868,7 @@ void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm) rcu_read_unlock(); - flush = zap_gfn_range(kvm, root, 0, max_gfn, true, flush, - true); + flush = zap_gfn_range(kvm, root, 0, -1ull, true, flush, true); /* * Put the reference acquired in diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 61738ff8ef33..e5515477c30a 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -158,6 +158,9 @@ void recalc_intercepts(struct vcpu_svm *svm) /* If SMI is not intercepted, ignore guest SMI intercept as well */ if (!intercept_smi) vmcb_clr_intercept(c, INTERCEPT_SMI); + + vmcb_set_intercept(c, INTERCEPT_VMLOAD); + vmcb_set_intercept(c, INTERCEPT_VMSAVE); } static void copy_vmcb_control_area(struct vmcb_control_area *dst, @@ -503,7 +506,11 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12 static void nested_vmcb02_prepare_control(struct vcpu_svm *svm) { - const u32 mask = V_INTR_MASKING_MASK | V_GIF_ENABLE_MASK | V_GIF_MASK; + const u32 int_ctl_vmcb01_bits = + V_INTR_MASKING_MASK | V_GIF_MASK | V_GIF_ENABLE_MASK; + + const u32 int_ctl_vmcb12_bits = V_TPR_MASK | V_IRQ_INJECTION_BITS_MASK; + struct kvm_vcpu *vcpu = &svm->vcpu; /* @@ -535,8 +542,8 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm) vcpu->arch.l1_tsc_offset + svm->nested.ctl.tsc_offset; svm->vmcb->control.int_ctl = - (svm->nested.ctl.int_ctl & ~mask) | - (svm->vmcb01.ptr->control.int_ctl & mask); + (svm->nested.ctl.int_ctl & int_ctl_vmcb12_bits) | + (svm->vmcb01.ptr->control.int_ctl & int_ctl_vmcb01_bits); svm->vmcb->control.virt_ext = svm->nested.ctl.virt_ext; svm->vmcb->control.int_vector = svm->nested.ctl.int_vector; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index e8ccab50ebf6..69639f9624f5 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1589,17 +1589,18 @@ static void svm_set_vintr(struct vcpu_svm *svm) static void svm_clear_vintr(struct vcpu_svm *svm) { - const u32 mask = V_TPR_MASK | V_GIF_ENABLE_MASK | V_GIF_MASK | V_INTR_MASKING_MASK; svm_clr_intercept(svm, INTERCEPT_VINTR); /* Drop int_ctl fields related to VINTR injection. */ - svm->vmcb->control.int_ctl &= mask; + svm->vmcb->control.int_ctl &= ~V_IRQ_INJECTION_BITS_MASK; if (is_guest_mode(&svm->vcpu)) { - svm->vmcb01.ptr->control.int_ctl &= mask; + svm->vmcb01.ptr->control.int_ctl &= ~V_IRQ_INJECTION_BITS_MASK; WARN_ON((svm->vmcb->control.int_ctl & V_TPR_MASK) != (svm->nested.ctl.int_ctl & V_TPR_MASK)); - svm->vmcb->control.int_ctl |= svm->nested.ctl.int_ctl & ~mask; + + svm->vmcb->control.int_ctl |= svm->nested.ctl.int_ctl & + V_IRQ_INJECTION_BITS_MASK; } vmcb_mark_dirty(svm->vmcb, VMCB_INTR); diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 1a52134b0c42..b3f77d18eb5a 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -330,6 +330,31 @@ void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu) vcpu_put(vcpu); } +#define EPTP_PA_MASK GENMASK_ULL(51, 12) + +static bool nested_ept_root_matches(hpa_t root_hpa, u64 root_eptp, u64 eptp) +{ + return VALID_PAGE(root_hpa) && + ((root_eptp & EPTP_PA_MASK) == (eptp & EPTP_PA_MASK)); +} + +static void nested_ept_invalidate_addr(struct kvm_vcpu *vcpu, gpa_t eptp, + gpa_t addr) +{ + uint i; + struct kvm_mmu_root_info *cached_root; + + WARN_ON_ONCE(!mmu_is_nested(vcpu)); + + for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) { + cached_root = &vcpu->arch.mmu->prev_roots[i]; + + if (nested_ept_root_matches(cached_root->hpa, cached_root->pgd, + eptp)) + vcpu->arch.mmu->invlpg(vcpu, addr, cached_root->hpa); + } +} + static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) { @@ -342,10 +367,22 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, vm_exit_reason = EXIT_REASON_PML_FULL; vmx->nested.pml_full = false; exit_qualification &= INTR_INFO_UNBLOCK_NMI; - } else if (fault->error_code & PFERR_RSVD_MASK) - vm_exit_reason = EXIT_REASON_EPT_MISCONFIG; - else - vm_exit_reason = EXIT_REASON_EPT_VIOLATION; + } else { + if (fault->error_code & PFERR_RSVD_MASK) + vm_exit_reason = EXIT_REASON_EPT_MISCONFIG; + else + vm_exit_reason = EXIT_REASON_EPT_VIOLATION; + + /* + * Although the caller (kvm_inject_emulated_page_fault) would + * have already synced the faulting address in the shadow EPT + * tables for the current EPTP12, we also need to sync it for + * any other cached EPTP02s based on the same EP4TA, since the + * TLB associates mappings to the EP4TA rather than the full EPTP. + */ + nested_ept_invalidate_addr(vcpu, vmcs12->ept_pointer, + fault->address); + } nested_vmx_vmexit(vcpu, vm_exit_reason, 0, exit_qualification); vmcs12->guest_physical_address = fault->address; @@ -5325,14 +5362,6 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu) return nested_vmx_succeed(vcpu); } -#define EPTP_PA_MASK GENMASK_ULL(51, 12) - -static bool nested_ept_root_matches(hpa_t root_hpa, u64 root_eptp, u64 eptp) -{ - return VALID_PAGE(root_hpa) && - ((root_eptp & EPTP_PA_MASK) == (eptp & EPTP_PA_MASK)); -} - /* Emulate the INVEPT instruction */ static int handle_invept(struct kvm_vcpu *vcpu) { @@ -5826,7 +5855,8 @@ static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu, if (is_nmi(intr_info)) return true; else if (is_page_fault(intr_info)) - return vcpu->arch.apf.host_apf_flags || !enable_ept; + return vcpu->arch.apf.host_apf_flags || + vmx_need_pf_intercept(vcpu); else if (is_debug(intr_info) && vcpu->guest_debug & (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index db88ed4f2121..17a1cb4b059d 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -522,7 +522,7 @@ static inline struct vmcs *alloc_vmcs(bool shadow) static inline bool vmx_has_waitpkg(struct vcpu_vmx *vmx) { - return vmx->secondary_exec_control & + return secondary_exec_controls_get(vmx) & SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; } diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index cd768dafca9e..933a2ebad471 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -376,12 +376,12 @@ static void enter_uniprocessor(void) goto out; } - get_online_cpus(); + cpus_read_lock(); cpumask_copy(downed_cpus, cpu_online_mask); cpumask_clear_cpu(cpumask_first(cpu_online_mask), downed_cpus); if (num_online_cpus() > 1) pr_notice("Disabling non-boot CPUs...\n"); - put_online_cpus(); + cpus_read_unlock(); for_each_cpu(cpu, downed_cpus) { err = remove_cpu(cpu); diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index cfe6b1e85fa6..59ba2968af1b 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -8,11 +8,13 @@ #include <linux/export.h> #include <linux/cpu.h> #include <linux/debugfs.h> +#include <linux/sched/smt.h> #include <asm/tlbflush.h> #include <asm/mmu_context.h> #include <asm/nospec-branch.h> #include <asm/cache.h> +#include <asm/cacheflush.h> #include <asm/apic.h> #include <asm/perf_event.h> @@ -43,10 +45,15 @@ */ /* - * Use bit 0 to mangle the TIF_SPEC_IB state into the mm pointer which is - * stored in cpu_tlb_state.last_user_mm_ibpb. + * Bits to mangle the TIF_SPEC_* state into the mm pointer which is + * stored in cpu_tlb_state.last_user_mm_spec. */ #define LAST_USER_MM_IBPB 0x1UL +#define LAST_USER_MM_L1D_FLUSH 0x2UL +#define LAST_USER_MM_SPEC_MASK (LAST_USER_MM_IBPB | LAST_USER_MM_L1D_FLUSH) + +/* Bits to set when tlbstate and flush is (re)initialized */ +#define LAST_USER_MM_INIT LAST_USER_MM_IBPB /* * The x86 feature is called PCID (Process Context IDentifier). It is similar @@ -317,20 +324,70 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next, local_irq_restore(flags); } -static unsigned long mm_mangle_tif_spec_ib(struct task_struct *next) +/* + * Invoked from return to user/guest by a task that opted-in to L1D + * flushing but ended up running on an SMT enabled core due to wrong + * affinity settings or CPU hotplug. This is part of the paranoid L1D flush + * contract which this task requested. + */ +static void l1d_flush_force_sigbus(struct callback_head *ch) +{ + force_sig(SIGBUS); +} + +static void l1d_flush_evaluate(unsigned long prev_mm, unsigned long next_mm, + struct task_struct *next) +{ + /* Flush L1D if the outgoing task requests it */ + if (prev_mm & LAST_USER_MM_L1D_FLUSH) + wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH); + + /* Check whether the incoming task opted in for L1D flush */ + if (likely(!(next_mm & LAST_USER_MM_L1D_FLUSH))) + return; + + /* + * Validate that it is not running on an SMT sibling as this would + * make the excercise pointless because the siblings share L1D. If + * it runs on a SMT sibling, notify it with SIGBUS on return to + * user/guest + */ + if (this_cpu_read(cpu_info.smt_active)) { + clear_ti_thread_flag(&next->thread_info, TIF_SPEC_L1D_FLUSH); + next->l1d_flush_kill.func = l1d_flush_force_sigbus; + task_work_add(next, &next->l1d_flush_kill, TWA_RESUME); + } +} + +static unsigned long mm_mangle_tif_spec_bits(struct task_struct *next) { unsigned long next_tif = task_thread_info(next)->flags; - unsigned long ibpb = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_IBPB; + unsigned long spec_bits = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_SPEC_MASK; - return (unsigned long)next->mm | ibpb; + /* + * Ensure that the bit shift above works as expected and the two flags + * end up in bit 0 and 1. + */ + BUILD_BUG_ON(TIF_SPEC_L1D_FLUSH != TIF_SPEC_IB + 1); + + return (unsigned long)next->mm | spec_bits; } -static void cond_ibpb(struct task_struct *next) +static void cond_mitigation(struct task_struct *next) { + unsigned long prev_mm, next_mm; + if (!next || !next->mm) return; + next_mm = mm_mangle_tif_spec_bits(next); + prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_spec); + /* + * Avoid user/user BTB poisoning by flushing the branch predictor + * when switching between processes. This stops one process from + * doing Spectre-v2 attacks on another. + * * Both, the conditional and the always IBPB mode use the mm * pointer to avoid the IBPB when switching between tasks of the * same process. Using the mm pointer instead of mm->context.ctx_id @@ -340,8 +397,6 @@ static void cond_ibpb(struct task_struct *next) * exposed data is not really interesting. */ if (static_branch_likely(&switch_mm_cond_ibpb)) { - unsigned long prev_mm, next_mm; - /* * This is a bit more complex than the always mode because * it has to handle two cases: @@ -371,20 +426,14 @@ static void cond_ibpb(struct task_struct *next) * Optimize this with reasonably small overhead for the * above cases. Mangle the TIF_SPEC_IB bit into the mm * pointer of the incoming task which is stored in - * cpu_tlbstate.last_user_mm_ibpb for comparison. - */ - next_mm = mm_mangle_tif_spec_ib(next); - prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_ibpb); - - /* + * cpu_tlbstate.last_user_mm_spec for comparison. + * * Issue IBPB only if the mm's are different and one or * both have the IBPB bit set. */ if (next_mm != prev_mm && (next_mm | prev_mm) & LAST_USER_MM_IBPB) indirect_branch_prediction_barrier(); - - this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, next_mm); } if (static_branch_unlikely(&switch_mm_always_ibpb)) { @@ -393,11 +442,22 @@ static void cond_ibpb(struct task_struct *next) * different context than the user space task which ran * last on this CPU. */ - if (this_cpu_read(cpu_tlbstate.last_user_mm) != next->mm) { + if ((prev_mm & ~LAST_USER_MM_SPEC_MASK) != + (unsigned long)next->mm) indirect_branch_prediction_barrier(); - this_cpu_write(cpu_tlbstate.last_user_mm, next->mm); - } } + + if (static_branch_unlikely(&switch_mm_cond_l1d_flush)) { + /* + * Flush L1D when the outgoing task requested it and/or + * check whether the incoming task requested L1D flushing + * and ended up on an SMT sibling. + */ + if (unlikely((prev_mm | next_mm) & LAST_USER_MM_L1D_FLUSH)) + l1d_flush_evaluate(prev_mm, next_mm, next); + } + + this_cpu_write(cpu_tlbstate.last_user_mm_spec, next_mm); } #ifdef CONFIG_PERF_EVENTS @@ -531,11 +591,10 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, need_flush = true; } else { /* - * Avoid user/user BTB poisoning by flushing the branch - * predictor when switching between processes. This stops - * one process from doing Spectre-v2 attacks on another. + * Apply process to process speculation vulnerability + * mitigations if applicable. */ - cond_ibpb(tsk); + cond_mitigation(tsk); /* * Stop remote flushes for the previous mm. @@ -643,7 +702,7 @@ void initialize_tlbstate_and_flush(void) write_cr3(build_cr3(mm->pgd, 0)); /* Reinitialize tlbstate. */ - this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, LAST_USER_MM_IBPB); + this_cpu_write(cpu_tlbstate.last_user_mm_spec, LAST_USER_MM_INIT); this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0); this_cpu_write(cpu_tlbstate.next_asid, 1); this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id); diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 3a070e7cdb8b..6665f8802098 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -58,19 +58,20 @@ static void msr_restore_context(struct saved_context *ctxt) } /** - * __save_processor_state - save CPU registers before creating a - * hibernation image and before restoring the memory state from it - * @ctxt - structure to store the registers contents in + * __save_processor_state() - Save CPU registers before creating a + * hibernation image and before restoring + * the memory state from it + * @ctxt: Structure to store the registers contents in. * - * NOTE: If there is a CPU register the modification of which by the - * boot kernel (ie. the kernel used for loading the hibernation image) - * might affect the operations of the restored target kernel (ie. the one - * saved in the hibernation image), then its contents must be saved by this - * function. In other words, if kernel A is hibernated and different - * kernel B is used for loading the hibernation image into memory, the - * kernel A's __save_processor_state() function must save all registers - * needed by kernel A, so that it can operate correctly after the resume - * regardless of what kernel B does in the meantime. + * NOTE: If there is a CPU register the modification of which by the + * boot kernel (ie. the kernel used for loading the hibernation image) + * might affect the operations of the restored target kernel (ie. the one + * saved in the hibernation image), then its contents must be saved by this + * function. In other words, if kernel A is hibernated and different + * kernel B is used for loading the hibernation image into memory, the + * kernel A's __save_processor_state() function must save all registers + * needed by kernel A, so that it can operate correctly after the resume + * regardless of what kernel B does in the meantime. */ static void __save_processor_state(struct saved_context *ctxt) { @@ -181,9 +182,9 @@ static void fix_processor_context(void) } /** - * __restore_processor_state - restore the contents of CPU registers saved - * by __save_processor_state() - * @ctxt - structure to load the registers contents from + * __restore_processor_state() - Restore the contents of CPU registers saved + * by __save_processor_state() + * @ctxt: Structure to load the registers contents from. * * The asm code that gets us here will have restored a usable GDT, although * it will be pointing to the wrong alias. diff --git a/arch/x86/tools/chkobjdump.awk b/arch/x86/tools/chkobjdump.awk index fd1ab80be0de..a4cf678cf5c8 100644 --- a/arch/x86/tools/chkobjdump.awk +++ b/arch/x86/tools/chkobjdump.awk @@ -10,6 +10,7 @@ BEGIN { /^GNU objdump/ { verstr = "" + gsub(/\(.*\)/, ""); for (i = 3; i <= NF; i++) if (match($(i), "^[0-9]")) { verstr = $(i); diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index 9ba700dc47de..27c82207d387 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -26,6 +26,9 @@ static struct relocs relocs32; #if ELF_BITS == 64 static struct relocs relocs32neg; static struct relocs relocs64; +#define FMT PRIu64 +#else +#define FMT PRIu32 #endif struct section { @@ -389,7 +392,7 @@ static void read_ehdr(FILE *fp) Elf_Shdr shdr; if (fseek(fp, ehdr.e_shoff, SEEK_SET) < 0) - die("Seek to %d failed: %s\n", ehdr.e_shoff, strerror(errno)); + die("Seek to %" FMT " failed: %s\n", ehdr.e_shoff, strerror(errno)); if (fread(&shdr, sizeof(shdr), 1, fp) != 1) die("Cannot read initial ELF section header: %s\n", strerror(errno)); @@ -412,17 +415,17 @@ static void read_shdrs(FILE *fp) secs = calloc(shnum, sizeof(struct section)); if (!secs) { - die("Unable to allocate %d section headers\n", + die("Unable to allocate %ld section headers\n", shnum); } if (fseek(fp, ehdr.e_shoff, SEEK_SET) < 0) { - die("Seek to %d failed: %s\n", - ehdr.e_shoff, strerror(errno)); + die("Seek to %" FMT " failed: %s\n", + ehdr.e_shoff, strerror(errno)); } for (i = 0; i < shnum; i++) { struct section *sec = &secs[i]; if (fread(&shdr, sizeof(shdr), 1, fp) != 1) - die("Cannot read ELF section headers %d/%d: %s\n", + die("Cannot read ELF section headers %d/%ld: %s\n", i, shnum, strerror(errno)); sec->shdr.sh_name = elf_word_to_cpu(shdr.sh_name); sec->shdr.sh_type = elf_word_to_cpu(shdr.sh_type); @@ -450,12 +453,12 @@ static void read_strtabs(FILE *fp) } sec->strtab = malloc(sec->shdr.sh_size); if (!sec->strtab) { - die("malloc of %d bytes for strtab failed\n", - sec->shdr.sh_size); + die("malloc of %" FMT " bytes for strtab failed\n", + sec->shdr.sh_size); } if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) { - die("Seek to %d failed: %s\n", - sec->shdr.sh_offset, strerror(errno)); + die("Seek to %" FMT " failed: %s\n", + sec->shdr.sh_offset, strerror(errno)); } if (fread(sec->strtab, 1, sec->shdr.sh_size, fp) != sec->shdr.sh_size) { @@ -475,12 +478,12 @@ static void read_symtabs(FILE *fp) } sec->symtab = malloc(sec->shdr.sh_size); if (!sec->symtab) { - die("malloc of %d bytes for symtab failed\n", - sec->shdr.sh_size); + die("malloc of %" FMT " bytes for symtab failed\n", + sec->shdr.sh_size); } if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) { - die("Seek to %d failed: %s\n", - sec->shdr.sh_offset, strerror(errno)); + die("Seek to %" FMT " failed: %s\n", + sec->shdr.sh_offset, strerror(errno)); } if (fread(sec->symtab, 1, sec->shdr.sh_size, fp) != sec->shdr.sh_size) { @@ -508,12 +511,12 @@ static void read_relocs(FILE *fp) } sec->reltab = malloc(sec->shdr.sh_size); if (!sec->reltab) { - die("malloc of %d bytes for relocs failed\n", - sec->shdr.sh_size); + die("malloc of %" FMT " bytes for relocs failed\n", + sec->shdr.sh_size); } if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) { - die("Seek to %d failed: %s\n", - sec->shdr.sh_offset, strerror(errno)); + die("Seek to %" FMT " failed: %s\n", + sec->shdr.sh_offset, strerror(errno)); } if (fread(sec->reltab, 1, sec->shdr.sh_size, fp) != sec->shdr.sh_size) { diff --git a/arch/x86/tools/relocs.h b/arch/x86/tools/relocs.h index 43c83c0fd22c..4c49c82446eb 100644 --- a/arch/x86/tools/relocs.h +++ b/arch/x86/tools/relocs.h @@ -17,6 +17,7 @@ #include <regex.h> #include <tools/le_byteshift.h> +__attribute__((__format__(printf, 1, 2))) void die(char *fmt, ...) __attribute__((noreturn)); #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c index a48bf2d10ac2..764b54bef701 100644 --- a/arch/xtensa/kernel/irq.c +++ b/arch/xtensa/kernel/irq.c @@ -33,8 +33,6 @@ DECLARE_PER_CPU(unsigned long, nmi_count); asmlinkage void do_IRQ(int hwirq, struct pt_regs *regs) { - int irq = irq_find_mapping(NULL, hwirq); - #ifdef CONFIG_DEBUG_STACKOVERFLOW /* Debugging check for stack overflow: is there less than 1KB free? */ { @@ -48,7 +46,7 @@ asmlinkage void do_IRQ(int hwirq, struct pt_regs *regs) sp - sizeof(struct thread_info)); } #endif - generic_handle_irq(irq); + generic_handle_domain_irq(NULL, hwirq); } int arch_show_interrupts(struct seq_file *p, int prec) |