diff options
Diffstat (limited to 'arch')
289 files changed, 5963 insertions, 4047 deletions
diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h index 150a1c5d6a2c..2144530d1428 100644 --- a/arch/alpha/include/asm/atomic.h +++ b/arch/alpha/include/asm/atomic.h @@ -93,9 +93,9 @@ static inline int atomic_fetch_##op##_relaxed(int i, atomic_t *v) \ } #define ATOMIC64_OP(op, asm_op) \ -static __inline__ void atomic64_##op(long i, atomic64_t * v) \ +static __inline__ void atomic64_##op(s64 i, atomic64_t * v) \ { \ - unsigned long temp; \ + s64 temp; \ __asm__ __volatile__( \ "1: ldq_l %0,%1\n" \ " " #asm_op " %0,%2,%0\n" \ @@ -109,9 +109,9 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v) \ } \ #define ATOMIC64_OP_RETURN(op, asm_op) \ -static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \ +static __inline__ s64 atomic64_##op##_return_relaxed(s64 i, atomic64_t * v) \ { \ - long temp, result; \ + s64 temp, result; \ __asm__ __volatile__( \ "1: ldq_l %0,%1\n" \ " " #asm_op " %0,%3,%2\n" \ @@ -128,9 +128,9 @@ static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \ } #define ATOMIC64_FETCH_OP(op, asm_op) \ -static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v) \ +static __inline__ s64 atomic64_fetch_##op##_relaxed(s64 i, atomic64_t * v) \ { \ - long temp, result; \ + s64 temp, result; \ __asm__ __volatile__( \ "1: ldq_l %2,%1\n" \ " " #asm_op " %2,%3,%0\n" \ @@ -246,9 +246,9 @@ static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u) * Atomically adds @a to @v, so long as it was not @u. * Returns the old value of @v. */ -static __inline__ long atomic64_fetch_add_unless(atomic64_t *v, long a, long u) +static __inline__ s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) { - long c, new, old; + s64 c, new, old; smp_mb(); __asm__ __volatile__( "1: ldq_l %[old],%[mem]\n" @@ -276,9 +276,9 @@ static __inline__ long atomic64_fetch_add_unless(atomic64_t *v, long a, long u) * The function returns the old value of *v minus 1, even if * the atomic variable, v, was not decremented. */ -static inline long atomic64_dec_if_positive(atomic64_t *v) +static inline s64 atomic64_dec_if_positive(atomic64_t *v) { - long old, tmp; + s64 old, tmp; smp_mb(); __asm__ __volatile__( "1: ldq_l %[old],%[mem]\n" diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index d0dccae53ba9..5f90df30be20 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -614,8 +614,7 @@ void smp_imb(void) { /* Must wait other processors to flush their icache before continue. */ - if (on_each_cpu(ipi_imb, NULL, 1)) - printk(KERN_CRIT "smp_imb: timed out\n"); + on_each_cpu(ipi_imb, NULL, 1); } EXPORT_SYMBOL(smp_imb); @@ -630,9 +629,7 @@ flush_tlb_all(void) { /* Although we don't have any data to pass, we do want to synchronize with the other processors. */ - if (on_each_cpu(ipi_flush_tlb_all, NULL, 1)) { - printk(KERN_CRIT "flush_tlb_all: timed out\n"); - } + on_each_cpu(ipi_flush_tlb_all, NULL, 1); } #define asn_locked() (cpu_data[smp_processor_id()].asn_lock) @@ -667,9 +664,7 @@ flush_tlb_mm(struct mm_struct *mm) } } - if (smp_call_function(ipi_flush_tlb_mm, mm, 1)) { - printk(KERN_CRIT "flush_tlb_mm: timed out\n"); - } + smp_call_function(ipi_flush_tlb_mm, mm, 1); preempt_enable(); } @@ -720,9 +715,7 @@ flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) data.mm = mm; data.addr = addr; - if (smp_call_function(ipi_flush_tlb_page, &data, 1)) { - printk(KERN_CRIT "flush_tlb_page: timed out\n"); - } + smp_call_function(ipi_flush_tlb_page, &data, 1); preempt_enable(); } @@ -772,9 +765,7 @@ flush_icache_user_range(struct vm_area_struct *vma, struct page *page, } } - if (smp_call_function(ipi_flush_icache_page, mm, 1)) { - printk(KERN_CRIT "flush_icache_page: timed out\n"); - } + smp_call_function(ipi_flush_icache_page, mm, 1); preempt_enable(); } diff --git a/arch/alpha/oprofile/common.c b/arch/alpha/oprofile/common.c index 310a4ce1dccc..1b1259c7d7d1 100644 --- a/arch/alpha/oprofile/common.c +++ b/arch/alpha/oprofile/common.c @@ -65,7 +65,7 @@ op_axp_setup(void) model->reg_setup(®, ctr, &sys); /* Configure the registers on all cpus. */ - (void)smp_call_function(model->cpu_setup, ®, 1); + smp_call_function(model->cpu_setup, ®, 1); model->cpu_setup(®); return 0; } @@ -86,7 +86,7 @@ op_axp_cpu_start(void *dummy) static int op_axp_start(void) { - (void)smp_call_function(op_axp_cpu_start, NULL, 1); + smp_call_function(op_axp_cpu_start, NULL, 1); op_axp_cpu_start(NULL); return 0; } @@ -101,7 +101,7 @@ op_axp_cpu_stop(void *dummy) static void op_axp_stop(void) { - (void)smp_call_function(op_axp_cpu_stop, NULL, 1); + smp_call_function(op_axp_cpu_stop, NULL, 1); op_axp_cpu_stop(NULL); } diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 480af1af9e63..03a0b19c92cd 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -5,6 +5,10 @@ KBUILD_DEFCONFIG := nsim_hs_defconfig +ifeq ($(CROSS_COMPILE),) +CROSS_COMPILE := $(call cc-cross-prefix, arc-linux- arceb-linux-) +endif + cflags-y += -fno-common -pipe -fno-builtin -mmedium-calls -D__linux__ cflags-$(CONFIG_ISA_ARCOMPACT) += -mA7 cflags-$(CONFIG_ISA_ARCV2) += -mcpu=hs38 diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index 17cf1c657cb3..7298ce84762e 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -321,14 +321,14 @@ ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3) */ typedef struct { - aligned_u64 counter; + s64 __aligned(8) counter; } atomic64_t; #define ATOMIC64_INIT(a) { (a) } -static inline long long atomic64_read(const atomic64_t *v) +static inline s64 atomic64_read(const atomic64_t *v) { - unsigned long long val; + s64 val; __asm__ __volatile__( " ldd %0, [%1] \n" @@ -338,7 +338,7 @@ static inline long long atomic64_read(const atomic64_t *v) return val; } -static inline void atomic64_set(atomic64_t *v, long long a) +static inline void atomic64_set(atomic64_t *v, s64 a) { /* * This could have been a simple assignment in "C" but would need @@ -359,9 +359,9 @@ static inline void atomic64_set(atomic64_t *v, long long a) } #define ATOMIC64_OP(op, op1, op2) \ -static inline void atomic64_##op(long long a, atomic64_t *v) \ +static inline void atomic64_##op(s64 a, atomic64_t *v) \ { \ - unsigned long long val; \ + s64 val; \ \ __asm__ __volatile__( \ "1: \n" \ @@ -372,13 +372,13 @@ static inline void atomic64_##op(long long a, atomic64_t *v) \ " bnz 1b \n" \ : "=&r"(val) \ : "r"(&v->counter), "ir"(a) \ - : "cc"); \ + : "cc"); \ } \ #define ATOMIC64_OP_RETURN(op, op1, op2) \ -static inline long long atomic64_##op##_return(long long a, atomic64_t *v) \ +static inline s64 atomic64_##op##_return(s64 a, atomic64_t *v) \ { \ - unsigned long long val; \ + s64 val; \ \ smp_mb(); \ \ @@ -399,9 +399,9 @@ static inline long long atomic64_##op##_return(long long a, atomic64_t *v) \ } #define ATOMIC64_FETCH_OP(op, op1, op2) \ -static inline long long atomic64_fetch_##op(long long a, atomic64_t *v) \ +static inline s64 atomic64_fetch_##op(s64 a, atomic64_t *v) \ { \ - unsigned long long val, orig; \ + s64 val, orig; \ \ smp_mb(); \ \ @@ -441,10 +441,10 @@ ATOMIC64_OPS(xor, xor, xor) #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP -static inline long long -atomic64_cmpxchg(atomic64_t *ptr, long long expected, long long new) +static inline s64 +atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new) { - long long prev; + s64 prev; smp_mb(); @@ -464,9 +464,9 @@ atomic64_cmpxchg(atomic64_t *ptr, long long expected, long long new) return prev; } -static inline long long atomic64_xchg(atomic64_t *ptr, long long new) +static inline s64 atomic64_xchg(atomic64_t *ptr, s64 new) { - long long prev; + s64 prev; smp_mb(); @@ -492,9 +492,9 @@ static inline long long atomic64_xchg(atomic64_t *ptr, long long new) * the atomic variable, v, was not decremented. */ -static inline long long atomic64_dec_if_positive(atomic64_t *v) +static inline s64 atomic64_dec_if_positive(atomic64_t *v) { - long long val; + s64 val; smp_mb(); @@ -525,10 +525,9 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v) * Atomically adds @a to @v, if it was not @u. * Returns the old value of @v */ -static inline long long atomic64_fetch_add_unless(atomic64_t *v, long long a, - long long u) +static inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) { - long long old, temp; + s64 old, temp; smp_mb(); diff --git a/arch/arc/plat-hsdk/platform.c b/arch/arc/plat-hsdk/platform.c index 6a91a742ab3d..7dd2dd335cf6 100644 --- a/arch/arc/plat-hsdk/platform.c +++ b/arch/arc/plat-hsdk/platform.c @@ -32,8 +32,6 @@ static void __init hsdk_init_per_cpu(unsigned int cpu) #define ARC_PERIPHERAL_BASE 0xf0000000 #define CREG_BASE (ARC_PERIPHERAL_BASE + 0x1000) -#define CREG_PAE (CREG_BASE + 0x180) -#define CREG_PAE_UPDATE (CREG_BASE + 0x194) #define SDIO_BASE (ARC_PERIPHERAL_BASE + 0xA000) #define SDIO_UHS_REG_EXT (SDIO_BASE + 0x108) @@ -99,20 +97,167 @@ static void __init hsdk_enable_gpio_intc_wire(void) iowrite32(GPIO_INT_CONNECTED_MASK, (void __iomem *) GPIO_INTEN); } -static void __init hsdk_init_early(void) +enum hsdk_axi_masters { + M_HS_CORE = 0, + M_HS_RTT, + M_AXI_TUN, + M_HDMI_VIDEO, + M_HDMI_AUDIO, + M_USB_HOST, + M_ETHERNET, + M_SDIO, + M_GPU, + M_DMAC_0, + M_DMAC_1, + M_DVFS +}; + +#define UPDATE_VAL 1 + +/* + * This is modified configuration of AXI bridge. Default settings + * are specified in "Table 111 CREG Address Decoder register reset values". + * + * AXI_M_m_SLV{0|1} - Slave Select register for master 'm'. + * Possible slaves are: + * - 0 => no slave selected + * - 1 => DDR controller port #1 + * - 2 => SRAM controller + * - 3 => AXI tunnel + * - 4 => EBI controller + * - 5 => ROM controller + * - 6 => AXI2APB bridge + * - 7 => DDR controller port #2 + * - 8 => DDR controller port #3 + * - 9 => HS38x4 IOC + * - 10 => HS38x4 DMI + * AXI_M_m_OFFSET{0|1} - Addr Offset register for master 'm' + * + * Please read ARC HS Development IC Specification, section 17.2 for more + * information about apertures configuration. + * + * m master AXI_M_m_SLV0 AXI_M_m_SLV1 AXI_M_m_OFFSET0 AXI_M_m_OFFSET1 + * 0 HS (CBU) 0x11111111 0x63111111 0xFEDCBA98 0x0E543210 + * 1 HS (RTT) 0x77777777 0x77777777 0xFEDCBA98 0x76543210 + * 2 AXI Tunnel 0x88888888 0x88888888 0xFEDCBA98 0x76543210 + * 3 HDMI-VIDEO 0x77777777 0x77777777 0xFEDCBA98 0x76543210 + * 4 HDMI-ADUIO 0x77777777 0x77777777 0xFEDCBA98 0x76543210 + * 5 USB-HOST 0x77777777 0x77999999 0xFEDCBA98 0x76DCBA98 + * 6 ETHERNET 0x77777777 0x77999999 0xFEDCBA98 0x76DCBA98 + * 7 SDIO 0x77777777 0x77999999 0xFEDCBA98 0x76DCBA98 + * 8 GPU 0x77777777 0x77777777 0xFEDCBA98 0x76543210 + * 9 DMAC (port #1) 0x77777777 0x77777777 0xFEDCBA98 0x76543210 + * 10 DMAC (port #2) 0x77777777 0x77777777 0xFEDCBA98 0x76543210 + * 11 DVFS 0x00000000 0x60000000 0x00000000 0x00000000 + */ + +#define CREG_AXI_M_SLV0(m) ((void __iomem *)(CREG_BASE + 0x20 * (m))) +#define CREG_AXI_M_SLV1(m) ((void __iomem *)(CREG_BASE + 0x20 * (m) + 0x04)) +#define CREG_AXI_M_OFT0(m) ((void __iomem *)(CREG_BASE + 0x20 * (m) + 0x08)) +#define CREG_AXI_M_OFT1(m) ((void __iomem *)(CREG_BASE + 0x20 * (m) + 0x0C)) +#define CREG_AXI_M_UPDT(m) ((void __iomem *)(CREG_BASE + 0x20 * (m) + 0x14)) + +#define CREG_AXI_M_HS_CORE_BOOT ((void __iomem *)(CREG_BASE + 0x010)) + +#define CREG_PAE ((void __iomem *)(CREG_BASE + 0x180)) +#define CREG_PAE_UPDT ((void __iomem *)(CREG_BASE + 0x194)) + +static void __init hsdk_init_memory_bridge(void) { + u32 reg; + + /* + * M_HS_CORE has one unique register - BOOT. + * We need to clean boot mirror (BOOT[1:0]) bits in them to avoid first + * aperture to be masked by 'boot mirror'. + */ + reg = readl(CREG_AXI_M_HS_CORE_BOOT) & (~0x3); + writel(reg, CREG_AXI_M_HS_CORE_BOOT); + writel(0x11111111, CREG_AXI_M_SLV0(M_HS_CORE)); + writel(0x63111111, CREG_AXI_M_SLV1(M_HS_CORE)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_HS_CORE)); + writel(0x0E543210, CREG_AXI_M_OFT1(M_HS_CORE)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_HS_CORE)); + + writel(0x77777777, CREG_AXI_M_SLV0(M_HS_RTT)); + writel(0x77777777, CREG_AXI_M_SLV1(M_HS_RTT)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_HS_RTT)); + writel(0x76543210, CREG_AXI_M_OFT1(M_HS_RTT)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_HS_RTT)); + + writel(0x88888888, CREG_AXI_M_SLV0(M_AXI_TUN)); + writel(0x88888888, CREG_AXI_M_SLV1(M_AXI_TUN)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_AXI_TUN)); + writel(0x76543210, CREG_AXI_M_OFT1(M_AXI_TUN)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_AXI_TUN)); + + writel(0x77777777, CREG_AXI_M_SLV0(M_HDMI_VIDEO)); + writel(0x77777777, CREG_AXI_M_SLV1(M_HDMI_VIDEO)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_HDMI_VIDEO)); + writel(0x76543210, CREG_AXI_M_OFT1(M_HDMI_VIDEO)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_HDMI_VIDEO)); + + writel(0x77777777, CREG_AXI_M_SLV0(M_HDMI_AUDIO)); + writel(0x77777777, CREG_AXI_M_SLV1(M_HDMI_AUDIO)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_HDMI_AUDIO)); + writel(0x76543210, CREG_AXI_M_OFT1(M_HDMI_AUDIO)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_HDMI_AUDIO)); + + writel(0x77777777, CREG_AXI_M_SLV0(M_USB_HOST)); + writel(0x77999999, CREG_AXI_M_SLV1(M_USB_HOST)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_USB_HOST)); + writel(0x76DCBA98, CREG_AXI_M_OFT1(M_USB_HOST)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_USB_HOST)); + + writel(0x77777777, CREG_AXI_M_SLV0(M_ETHERNET)); + writel(0x77999999, CREG_AXI_M_SLV1(M_ETHERNET)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_ETHERNET)); + writel(0x76DCBA98, CREG_AXI_M_OFT1(M_ETHERNET)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_ETHERNET)); + + writel(0x77777777, CREG_AXI_M_SLV0(M_SDIO)); + writel(0x77999999, CREG_AXI_M_SLV1(M_SDIO)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_SDIO)); + writel(0x76DCBA98, CREG_AXI_M_OFT1(M_SDIO)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_SDIO)); + + writel(0x77777777, CREG_AXI_M_SLV0(M_GPU)); + writel(0x77777777, CREG_AXI_M_SLV1(M_GPU)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_GPU)); + writel(0x76543210, CREG_AXI_M_OFT1(M_GPU)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_GPU)); + + writel(0x77777777, CREG_AXI_M_SLV0(M_DMAC_0)); + writel(0x77777777, CREG_AXI_M_SLV1(M_DMAC_0)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_DMAC_0)); + writel(0x76543210, CREG_AXI_M_OFT1(M_DMAC_0)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_DMAC_0)); + + writel(0x77777777, CREG_AXI_M_SLV0(M_DMAC_1)); + writel(0x77777777, CREG_AXI_M_SLV1(M_DMAC_1)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_DMAC_1)); + writel(0x76543210, CREG_AXI_M_OFT1(M_DMAC_1)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_DMAC_1)); + + writel(0x00000000, CREG_AXI_M_SLV0(M_DVFS)); + writel(0x60000000, CREG_AXI_M_SLV1(M_DVFS)); + writel(0x00000000, CREG_AXI_M_OFT0(M_DVFS)); + writel(0x00000000, CREG_AXI_M_OFT1(M_DVFS)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_DVFS)); + /* * PAE remapping for DMA clients does not work due to an RTL bug, so * CREG_PAE register must be programmed to all zeroes, otherwise it * will cause problems with DMA to/from peripherals even if PAE40 is * not used. */ + writel(0x00000000, CREG_PAE); + writel(UPDATE_VAL, CREG_PAE_UPDT); +} - /* Default is 1, which means "PAE offset = 4GByte" */ - writel_relaxed(0, (void __iomem *) CREG_PAE); - - /* Really apply settings made above */ - writel(1, (void __iomem *) CREG_PAE_UPDATE); +static void __init hsdk_init_early(void) +{ + hsdk_init_memory_bridge(); /* * Switch SDIO external ciu clock divider from default div-by-8 to diff --git a/arch/arm/boot/dts/armada-xp-98dx3236.dtsi b/arch/arm/boot/dts/armada-xp-98dx3236.dtsi index 59753470cd34..267d0c178e55 100644 --- a/arch/arm/boot/dts/armada-xp-98dx3236.dtsi +++ b/arch/arm/boot/dts/armada-xp-98dx3236.dtsi @@ -336,3 +336,11 @@ status = "disabled"; }; +&uart0 { + compatible = "marvell,armada-38x-uart"; +}; + +&uart1 { + compatible = "marvell,armada-38x-uart"; +}; + diff --git a/arch/arm/boot/dts/gemini-dlink-dir-685.dts b/arch/arm/boot/dts/gemini-dlink-dir-685.dts index cfbfbc91a1e1..3613f05f8a80 100644 --- a/arch/arm/boot/dts/gemini-dlink-dir-685.dts +++ b/arch/arm/boot/dts/gemini-dlink-dir-685.dts @@ -20,7 +20,7 @@ }; chosen { - bootargs = "console=ttyS0,19200n8 root=/dev/sda1 rw rootwait"; + bootargs = "console=ttyS0,19200n8 root=/dev/sda1 rw rootwait consoleblank=300"; stdout-path = "uart0:19200n8"; }; diff --git a/arch/arm/boot/dts/gemini-dlink-dns-313.dts b/arch/arm/boot/dts/gemini-dlink-dns-313.dts index b12504e10f0b..360642a02a48 100644 --- a/arch/arm/boot/dts/gemini-dlink-dns-313.dts +++ b/arch/arm/boot/dts/gemini-dlink-dns-313.dts @@ -11,7 +11,7 @@ / { model = "D-Link DNS-313 1-Bay Network Storage Enclosure"; - compatible = "dlink,dir-313", "cortina,gemini"; + compatible = "dlink,dns-313", "cortina,gemini"; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm/boot/dts/imx6ul.dtsi b/arch/arm/boot/dts/imx6ul.dtsi index bbf010c73336..a7f6d1d58e20 100644 --- a/arch/arm/boot/dts/imx6ul.dtsi +++ b/arch/arm/boot/dts/imx6ul.dtsi @@ -358,7 +358,7 @@ pwm1: pwm@2080000 { compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm"; reg = <0x02080000 0x4000>; - interrupts = <GIC_SPI 115 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX6UL_CLK_PWM1>, <&clks IMX6UL_CLK_PWM1>; clock-names = "ipg", "per"; @@ -369,7 +369,7 @@ pwm2: pwm@2084000 { compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm"; reg = <0x02084000 0x4000>; - interrupts = <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 84 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX6UL_CLK_PWM2>, <&clks IMX6UL_CLK_PWM2>; clock-names = "ipg", "per"; @@ -380,7 +380,7 @@ pwm3: pwm@2088000 { compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm"; reg = <0x02088000 0x4000>; - interrupts = <GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 85 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX6UL_CLK_PWM3>, <&clks IMX6UL_CLK_PWM3>; clock-names = "ipg", "per"; @@ -391,7 +391,7 @@ pwm4: pwm@208c000 { compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm"; reg = <0x0208c000 0x4000>; - interrupts = <GIC_SPI 118 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX6UL_CLK_PWM4>, <&clks IMX6UL_CLK_PWM4>; clock-names = "ipg", "per"; diff --git a/arch/arm/boot/dts/meson8.dtsi b/arch/arm/boot/dts/meson8.dtsi index 7ef442462ea4..40c11b6b217a 100644 --- a/arch/arm/boot/dts/meson8.dtsi +++ b/arch/arm/boot/dts/meson8.dtsi @@ -248,8 +248,8 @@ <GIC_SPI 167 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 169 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 172 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 173 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 170 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 171 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 172 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 173 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 174 IRQ_TYPE_LEVEL_HIGH>, @@ -264,7 +264,6 @@ clocks = <&clkc CLKID_CLK81>, <&clkc CLKID_MALI>; clock-names = "bus", "core"; operating-points-v2 = <&gpu_opp_table>; - switch-delay = <0xffff>; }; }; }; /* end of / */ diff --git a/arch/arm/boot/dts/meson8b.dtsi b/arch/arm/boot/dts/meson8b.dtsi index 800cd65fc50a..ec67f49116d9 100644 --- a/arch/arm/boot/dts/meson8b.dtsi +++ b/arch/arm/boot/dts/meson8b.dtsi @@ -163,23 +163,23 @@ opp-255000000 { opp-hz = /bits/ 64 <255000000>; - opp-microvolt = <1150000>; + opp-microvolt = <1100000>; }; opp-364300000 { opp-hz = /bits/ 64 <364300000>; - opp-microvolt = <1150000>; + opp-microvolt = <1100000>; }; opp-425000000 { opp-hz = /bits/ 64 <425000000>; - opp-microvolt = <1150000>; + opp-microvolt = <1100000>; }; opp-510000000 { opp-hz = /bits/ 64 <510000000>; - opp-microvolt = <1150000>; + opp-microvolt = <1100000>; }; opp-637500000 { opp-hz = /bits/ 64 <637500000>; - opp-microvolt = <1150000>; + opp-microvolt = <1100000>; turbo-mode; }; }; @@ -229,7 +229,6 @@ clocks = <&clkc CLKID_CLK81>, <&clkc CLKID_MALI>; clock-names = "bus", "core"; operating-points-v2 = <&gpu_opp_table>; - switch-delay = <0xffff>; }; }; }; /* end of / */ diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c index 13e561737ca8..746e1fce777e 100644 --- a/arch/arm/common/bL_switcher.c +++ b/arch/arm/common/bL_switcher.c @@ -539,16 +539,14 @@ static void bL_switcher_trace_trigger_cpu(void *__always_unused info) int bL_switcher_trace_trigger(void) { - int ret; - preempt_disable(); bL_switcher_trace_trigger_cpu(NULL); - ret = smp_call_function(bL_switcher_trace_trigger_cpu, NULL, true); + smp_call_function(bL_switcher_trace_trigger_cpu, NULL, true); preempt_enable(); - return ret; + return 0; } EXPORT_SYMBOL_GPL(bL_switcher_trace_trigger); diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h index 4b66ecd6be99..99175812d903 100644 --- a/arch/arm/include/asm/arch_timer.h +++ b/arch/arm/include/asm/arch_timer.h @@ -4,6 +4,7 @@ #include <asm/barrier.h> #include <asm/errno.h> +#include <asm/hwcap.h> #include <linux/clocksource.h> #include <linux/init.h> #include <linux/types.h> @@ -124,6 +125,15 @@ static inline void arch_timer_set_cntkctl(u32 cntkctl) isb(); } +static inline void arch_timer_set_evtstrm_feature(void) +{ + elf_hwcap |= HWCAP_EVTSTRM; +} + +static inline bool arch_timer_have_evtstrm_feature(void) +{ + return elf_hwcap & HWCAP_EVTSTRM; +} #endif #endif diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h index 50c3ac5f0809..75bb2c543e59 100644 --- a/arch/arm/include/asm/atomic.h +++ b/arch/arm/include/asm/atomic.h @@ -246,15 +246,15 @@ ATOMIC_OPS(xor, ^=, eor) #ifndef CONFIG_GENERIC_ATOMIC64 typedef struct { - long long counter; + s64 counter; } atomic64_t; #define ATOMIC64_INIT(i) { (i) } #ifdef CONFIG_ARM_LPAE -static inline long long atomic64_read(const atomic64_t *v) +static inline s64 atomic64_read(const atomic64_t *v) { - long long result; + s64 result; __asm__ __volatile__("@ atomic64_read\n" " ldrd %0, %H0, [%1]" @@ -265,7 +265,7 @@ static inline long long atomic64_read(const atomic64_t *v) return result; } -static inline void atomic64_set(atomic64_t *v, long long i) +static inline void atomic64_set(atomic64_t *v, s64 i) { __asm__ __volatile__("@ atomic64_set\n" " strd %2, %H2, [%1]" @@ -274,9 +274,9 @@ static inline void atomic64_set(atomic64_t *v, long long i) ); } #else -static inline long long atomic64_read(const atomic64_t *v) +static inline s64 atomic64_read(const atomic64_t *v) { - long long result; + s64 result; __asm__ __volatile__("@ atomic64_read\n" " ldrexd %0, %H0, [%1]" @@ -287,9 +287,9 @@ static inline long long atomic64_read(const atomic64_t *v) return result; } -static inline void atomic64_set(atomic64_t *v, long long i) +static inline void atomic64_set(atomic64_t *v, s64 i) { - long long tmp; + s64 tmp; prefetchw(&v->counter); __asm__ __volatile__("@ atomic64_set\n" @@ -304,9 +304,9 @@ static inline void atomic64_set(atomic64_t *v, long long i) #endif #define ATOMIC64_OP(op, op1, op2) \ -static inline void atomic64_##op(long long i, atomic64_t *v) \ +static inline void atomic64_##op(s64 i, atomic64_t *v) \ { \ - long long result; \ + s64 result; \ unsigned long tmp; \ \ prefetchw(&v->counter); \ @@ -323,10 +323,10 @@ static inline void atomic64_##op(long long i, atomic64_t *v) \ } \ #define ATOMIC64_OP_RETURN(op, op1, op2) \ -static inline long long \ -atomic64_##op##_return_relaxed(long long i, atomic64_t *v) \ +static inline s64 \ +atomic64_##op##_return_relaxed(s64 i, atomic64_t *v) \ { \ - long long result; \ + s64 result; \ unsigned long tmp; \ \ prefetchw(&v->counter); \ @@ -346,10 +346,10 @@ atomic64_##op##_return_relaxed(long long i, atomic64_t *v) \ } #define ATOMIC64_FETCH_OP(op, op1, op2) \ -static inline long long \ -atomic64_fetch_##op##_relaxed(long long i, atomic64_t *v) \ +static inline s64 \ +atomic64_fetch_##op##_relaxed(s64 i, atomic64_t *v) \ { \ - long long result, val; \ + s64 result, val; \ unsigned long tmp; \ \ prefetchw(&v->counter); \ @@ -403,10 +403,9 @@ ATOMIC64_OPS(xor, eor, eor) #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP -static inline long long -atomic64_cmpxchg_relaxed(atomic64_t *ptr, long long old, long long new) +static inline s64 atomic64_cmpxchg_relaxed(atomic64_t *ptr, s64 old, s64 new) { - long long oldval; + s64 oldval; unsigned long res; prefetchw(&ptr->counter); @@ -427,9 +426,9 @@ atomic64_cmpxchg_relaxed(atomic64_t *ptr, long long old, long long new) } #define atomic64_cmpxchg_relaxed atomic64_cmpxchg_relaxed -static inline long long atomic64_xchg_relaxed(atomic64_t *ptr, long long new) +static inline s64 atomic64_xchg_relaxed(atomic64_t *ptr, s64 new) { - long long result; + s64 result; unsigned long tmp; prefetchw(&ptr->counter); @@ -447,9 +446,9 @@ static inline long long atomic64_xchg_relaxed(atomic64_t *ptr, long long new) } #define atomic64_xchg_relaxed atomic64_xchg_relaxed -static inline long long atomic64_dec_if_positive(atomic64_t *v) +static inline s64 atomic64_dec_if_positive(atomic64_t *v) { - long long result; + s64 result; unsigned long tmp; smp_mb(); @@ -475,10 +474,9 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v) } #define atomic64_dec_if_positive atomic64_dec_if_positive -static inline long long atomic64_fetch_add_unless(atomic64_t *v, long long a, - long long u) +static inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) { - long long oldval, newval; + s64 oldval, newval; unsigned long tmp; smp_mb(); diff --git a/arch/arm/mach-davinci/board-da830-evm.c b/arch/arm/mach-davinci/board-da830-evm.c index 51a892702e27..a273ab25c668 100644 --- a/arch/arm/mach-davinci/board-da830-evm.c +++ b/arch/arm/mach-davinci/board-da830-evm.c @@ -61,6 +61,9 @@ static struct regulator_consumer_supply da830_evm_usb_supplies[] = { static struct regulator_init_data da830_evm_usb_vbus_data = { .consumer_supplies = da830_evm_usb_supplies, .num_consumer_supplies = ARRAY_SIZE(da830_evm_usb_supplies), + .constraints = { + .valid_ops_mask = REGULATOR_CHANGE_STATUS, + }, }; static struct fixed_voltage_config da830_evm_usb_vbus = { @@ -88,7 +91,7 @@ static struct gpiod_lookup_table da830_evm_usb_oc_gpio_lookup = { static struct gpiod_lookup_table da830_evm_usb_vbus_gpio_lookup = { .dev_id = "reg-fixed-voltage.0", .table = { - GPIO_LOOKUP("davinci_gpio", ON_BD_USB_DRV, "vbus", 0), + GPIO_LOOKUP("davinci_gpio", ON_BD_USB_DRV, NULL, 0), { } }, }; diff --git a/arch/arm/mach-davinci/board-omapl138-hawk.c b/arch/arm/mach-davinci/board-omapl138-hawk.c index db177a6a7e48..5390a8630cf0 100644 --- a/arch/arm/mach-davinci/board-omapl138-hawk.c +++ b/arch/arm/mach-davinci/board-omapl138-hawk.c @@ -306,6 +306,9 @@ static struct regulator_consumer_supply hawk_usb_supplies[] = { static struct regulator_init_data hawk_usb_vbus_data = { .consumer_supplies = hawk_usb_supplies, .num_consumer_supplies = ARRAY_SIZE(hawk_usb_supplies), + .constraints = { + .valid_ops_mask = REGULATOR_CHANGE_STATUS, + }, }; static struct fixed_voltage_config hawk_usb_vbus = { diff --git a/arch/arm/mach-omap2/prm3xxx.c b/arch/arm/mach-omap2/prm3xxx.c index fd4a3bf27993..1b442b128569 100644 --- a/arch/arm/mach-omap2/prm3xxx.c +++ b/arch/arm/mach-omap2/prm3xxx.c @@ -430,7 +430,7 @@ static void omap3_prm_reconfigure_io_chain(void) * registers, and omap3xxx_prm_reconfigure_io_chain() must be called. * No return value. */ -static void __init omap3xxx_prm_enable_io_wakeup(void) +static void omap3xxx_prm_enable_io_wakeup(void) { if (prm_features & PRM_HAS_IO_WAKEUP) omap2_prm_set_mod_reg_bits(OMAP3430_EN_IO_MASK, WKUP_MOD, diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 697ea0510729..c1734e444fb8 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -26,6 +26,7 @@ config ARM64 select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SETUP_DMA_OPS + select ARCH_HAS_SET_DIRECT_MAP select ARCH_HAS_SET_MEMORY select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_STRICT_MODULE_RWX @@ -107,6 +108,8 @@ config ARM64 select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select GENERIC_TIME_VSYSCALL + select GENERIC_GETTIMEOFDAY + select GENERIC_COMPAT_VDSO if (!CPU_BIG_ENDIAN && COMPAT) select HANDLE_DOMAIN_IRQ select HARDIRQS_SW_RESEND select HAVE_PCI @@ -160,6 +163,7 @@ config ARM64 select HAVE_SYSCALL_TRACEPOINTS select HAVE_KPROBES select HAVE_KRETPROBES + select HAVE_GENERIC_VDSO select IOMMU_DMA if IOMMU_SUPPORT select IRQ_DOMAIN select IRQ_FORCED_THREADING @@ -260,7 +264,8 @@ config GENERIC_CALIBRATE_DELAY def_bool y config ZONE_DMA32 - def_bool y + bool "Support DMA32 zone" if EXPERT + default y config HAVE_GENERIC_GUP def_bool y @@ -933,7 +938,6 @@ config PARAVIRT config PARAVIRT_TIME_ACCOUNTING bool "Paravirtual steal time accounting" select PARAVIRT - default n help Select this option to enable fine granularity task steal time accounting. Time spent executing other tasks in parallel with @@ -1418,12 +1422,27 @@ config ARM64_SVE KVM in the same kernel image. config ARM64_MODULE_PLTS - bool + bool "Use PLTs to allow module memory to spill over into vmalloc area" + depends on MODULES select HAVE_MOD_ARCH_SPECIFIC + help + Allocate PLTs when loading modules so that jumps and calls whose + targets are too far away for their relative offsets to be encoded + in the instructions themselves can be bounced via veneers in the + module's PLT. This allows modules to be allocated in the generic + vmalloc area after the dedicated module memory area has been + exhausted. + + When running with address space randomization (KASLR), the module + region itself may be too far away for ordinary relative jumps and + calls, and so in that case, module PLTs are required and cannot be + disabled. + + Specific errata workaround(s) might also force module PLTs to be + enabled (ARM64_ERRATUM_843419). config ARM64_PSEUDO_NMI bool "Support for NMI-like interrupts" - depends on BROKEN # 1556553607-46531-1-git-send-email-julien.thierry@arm.com select CONFIG_ARM_GIC_V3 help Adds support for mimicking Non-Maskable Interrupts through the use of @@ -1436,6 +1455,17 @@ config ARM64_PSEUDO_NMI If unsure, say N +if ARM64_PSEUDO_NMI +config ARM64_DEBUG_PRIORITY_MASKING + bool "Debug interrupt priority masking" + help + This adds runtime checks to functions enabling/disabling + interrupts when using priority masking. The additional checks verify + the validity of ICC_PMR_EL1 when calling concerned functions. + + If unsure, say N +endif + config RELOCATABLE bool help diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index e9d2e578cbe6..e3d3fd0a4268 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -49,10 +49,26 @@ $(warning Detected assembler with broken .inst; disassembly will be unreliable) endif endif -KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) $(brokengasinst) +ifeq ($(CONFIG_GENERIC_COMPAT_VDSO), y) + CROSS_COMPILE_COMPAT ?= $(CONFIG_CROSS_COMPILE_COMPAT_VDSO:"%"=%) + + ifeq ($(CONFIG_CC_IS_CLANG), y) + $(warning CROSS_COMPILE_COMPAT is clang, the compat vDSO will not be built) + else ifeq ($(CROSS_COMPILE_COMPAT),) + $(warning CROSS_COMPILE_COMPAT not defined or empty, the compat vDSO will not be built) + else ifeq ($(shell which $(CROSS_COMPILE_COMPAT)gcc 2> /dev/null),) + $(error $(CROSS_COMPILE_COMPAT)gcc not found, check CROSS_COMPILE_COMPAT) + else + export CROSS_COMPILE_COMPAT + export CONFIG_COMPAT_VDSO := y + compat_vdso := -DCONFIG_COMPAT_VDSO=1 + endif +endif + +KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) $(brokengasinst) $(compat_vdso) KBUILD_CFLAGS += -fno-asynchronous-unwind-tables KBUILD_CFLAGS += $(call cc-disable-warning, psabi) -KBUILD_AFLAGS += $(lseinstr) $(brokengasinst) +KBUILD_AFLAGS += $(lseinstr) $(brokengasinst) $(compat_vdso) KBUILD_CFLAGS += $(call cc-option,-mabi=lp64) KBUILD_AFLAGS += $(call cc-option,-mabi=lp64) @@ -164,6 +180,9 @@ ifeq ($(KBUILD_EXTMOD),) prepare: vdso_prepare vdso_prepare: prepare0 $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso include/generated/vdso-offsets.h + $(if $(CONFIG_COMPAT_VDSO),$(Q)$(MAKE) \ + $(build)=arch/arm64/kernel/vdso32 \ + include/generated/vdso32-offsets.h) endif define archhelp diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi index b04581249f0b..bf7f845447ed 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi @@ -28,7 +28,7 @@ enable-method = "psci"; clocks = <&clockgen 1 0>; next-level-cache = <&l2>; - cpu-idle-states = <&CPU_PH20>; + cpu-idle-states = <&CPU_PW20>; }; cpu1: cpu@1 { @@ -38,7 +38,7 @@ enable-method = "psci"; clocks = <&clockgen 1 0>; next-level-cache = <&l2>; - cpu-idle-states = <&CPU_PH20>; + cpu-idle-states = <&CPU_PW20>; }; l2: l2-cache { @@ -53,13 +53,13 @@ */ entry-method = "arm,psci"; - CPU_PH20: cpu-ph20 { - compatible = "arm,idle-state"; - idle-state-name = "PH20"; - arm,psci-suspend-param = <0x00010000>; - entry-latency-us = <1000>; - exit-latency-us = <1000>; - min-residency-us = <3000>; + CPU_PW20: cpu-pw20 { + compatible = "arm,idle-state"; + idle-state-name = "PW20"; + arm,psci-suspend-param = <0x0>; + entry-latency-us = <2000>; + exit-latency-us = <2000>; + min-residency-us = <6000>; }; }; diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 4d583514258c..dd827e64e5fe 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -68,6 +68,7 @@ CONFIG_KEXEC=y CONFIG_CRASH_DUMP=y CONFIG_XEN=y CONFIG_COMPAT=y +CONFIG_RANDOMIZE_BASE=y CONFIG_HIBERNATION=y CONFIG_WQ_POWER_EFFICIENT_DEFAULT=y CONFIG_ARM_CPUIDLE=y @@ -613,6 +614,7 @@ CONFIG_RTC_DRV_TEGRA=y CONFIG_RTC_DRV_IMX_SC=m CONFIG_RTC_DRV_XGENE=y CONFIG_DMADEVICES=y +CONFIG_FSL_EDMA=y CONFIG_DMA_BCM2835=m CONFIG_K3_DMA=y CONFIG_MV_XOR=y diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index ada0bc480a1b..b263e239cb59 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -38,6 +38,9 @@ (!(entry) || (entry)->header.length < ACPI_MADT_GICC_MIN_LENGTH || \ (unsigned long)(entry) + (entry)->header.length > (end)) +#define ACPI_MADT_GICC_SPE (ACPI_OFFSET(struct acpi_madt_generic_interrupt, \ + spe_interrupt) + sizeof(u16)) + /* Basic configuration for ACPI */ #ifdef CONFIG_ACPI pgprot_t __acpi_get_mem_attribute(phys_addr_t addr); diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 2247908e55d6..79155a8cfe7c 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -152,7 +152,9 @@ static inline bool gic_prio_masking_enabled(void) static inline void gic_pmr_mask_irqs(void) { - BUILD_BUG_ON(GICD_INT_DEF_PRI <= GIC_PRIO_IRQOFF); + BUILD_BUG_ON(GICD_INT_DEF_PRI < (GIC_PRIO_IRQOFF | + GIC_PRIO_PSR_I_SET)); + BUILD_BUG_ON(GICD_INT_DEF_PRI >= GIC_PRIO_IRQON); gic_write_pmr(GIC_PRIO_IRQOFF); } diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index 6756178c27db..7ae54d7d333a 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -9,6 +9,7 @@ #define __ASM_ARCH_TIMER_H #include <asm/barrier.h> +#include <asm/hwcap.h> #include <asm/sysreg.h> #include <linux/bug.h> @@ -229,4 +230,16 @@ static inline int arch_timer_arch_init(void) return 0; } +static inline void arch_timer_set_evtstrm_feature(void) +{ + cpu_set_named_feature(EVTSTRM); +#ifdef CONFIG_COMPAT + compat_elf_hwcap |= COMPAT_HWCAP_EVTSTRM; +#endif +} + +static inline bool arch_timer_have_evtstrm_feature(void) +{ + return cpu_have_named_feature(EVTSTRM); +} #endif diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h index 23c378606aed..c8c850bc3dfb 100644 --- a/arch/arm64/include/asm/atomic_ll_sc.h +++ b/arch/arm64/include/asm/atomic_ll_sc.h @@ -122,9 +122,9 @@ ATOMIC_OPS(xor, eor) #define ATOMIC64_OP(op, asm_op) \ __LL_SC_INLINE void \ -__LL_SC_PREFIX(arch_atomic64_##op(long i, atomic64_t *v)) \ +__LL_SC_PREFIX(arch_atomic64_##op(s64 i, atomic64_t *v)) \ { \ - long result; \ + s64 result; \ unsigned long tmp; \ \ asm volatile("// atomic64_" #op "\n" \ @@ -139,10 +139,10 @@ __LL_SC_PREFIX(arch_atomic64_##op(long i, atomic64_t *v)) \ __LL_SC_EXPORT(arch_atomic64_##op); #define ATOMIC64_OP_RETURN(name, mb, acq, rel, cl, op, asm_op) \ -__LL_SC_INLINE long \ -__LL_SC_PREFIX(arch_atomic64_##op##_return##name(long i, atomic64_t *v))\ +__LL_SC_INLINE s64 \ +__LL_SC_PREFIX(arch_atomic64_##op##_return##name(s64 i, atomic64_t *v))\ { \ - long result; \ + s64 result; \ unsigned long tmp; \ \ asm volatile("// atomic64_" #op "_return" #name "\n" \ @@ -161,10 +161,10 @@ __LL_SC_PREFIX(arch_atomic64_##op##_return##name(long i, atomic64_t *v))\ __LL_SC_EXPORT(arch_atomic64_##op##_return##name); #define ATOMIC64_FETCH_OP(name, mb, acq, rel, cl, op, asm_op) \ -__LL_SC_INLINE long \ -__LL_SC_PREFIX(arch_atomic64_fetch_##op##name(long i, atomic64_t *v)) \ +__LL_SC_INLINE s64 \ +__LL_SC_PREFIX(arch_atomic64_fetch_##op##name(s64 i, atomic64_t *v)) \ { \ - long result, val; \ + s64 result, val; \ unsigned long tmp; \ \ asm volatile("// atomic64_fetch_" #op #name "\n" \ @@ -214,10 +214,10 @@ ATOMIC64_OPS(xor, eor) #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP -__LL_SC_INLINE long +__LL_SC_INLINE s64 __LL_SC_PREFIX(arch_atomic64_dec_if_positive(atomic64_t *v)) { - long result; + s64 result; unsigned long tmp; asm volatile("// atomic64_dec_if_positive\n" diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 45e030d54332..69acb1c19a15 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -213,9 +213,9 @@ ATOMIC_FETCH_OP_SUB( , al, "memory") #define __LL_SC_ATOMIC64(op) __LL_SC_CALL(arch_atomic64_##op) #define ATOMIC64_OP(op, asm_op) \ -static inline void arch_atomic64_##op(long i, atomic64_t *v) \ +static inline void arch_atomic64_##op(s64 i, atomic64_t *v) \ { \ - register long x0 asm ("x0") = i; \ + register s64 x0 asm ("x0") = i; \ register atomic64_t *x1 asm ("x1") = v; \ \ asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(op), \ @@ -233,9 +233,9 @@ ATOMIC64_OP(add, stadd) #undef ATOMIC64_OP #define ATOMIC64_FETCH_OP(name, mb, op, asm_op, cl...) \ -static inline long arch_atomic64_fetch_##op##name(long i, atomic64_t *v)\ +static inline s64 arch_atomic64_fetch_##op##name(s64 i, atomic64_t *v) \ { \ - register long x0 asm ("x0") = i; \ + register s64 x0 asm ("x0") = i; \ register atomic64_t *x1 asm ("x1") = v; \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ @@ -265,9 +265,9 @@ ATOMIC64_FETCH_OPS(add, ldadd) #undef ATOMIC64_FETCH_OPS #define ATOMIC64_OP_ADD_RETURN(name, mb, cl...) \ -static inline long arch_atomic64_add_return##name(long i, atomic64_t *v)\ +static inline s64 arch_atomic64_add_return##name(s64 i, atomic64_t *v) \ { \ - register long x0 asm ("x0") = i; \ + register s64 x0 asm ("x0") = i; \ register atomic64_t *x1 asm ("x1") = v; \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ @@ -291,9 +291,9 @@ ATOMIC64_OP_ADD_RETURN( , al, "memory") #undef ATOMIC64_OP_ADD_RETURN -static inline void arch_atomic64_and(long i, atomic64_t *v) +static inline void arch_atomic64_and(s64 i, atomic64_t *v) { - register long x0 asm ("x0") = i; + register s64 x0 asm ("x0") = i; register atomic64_t *x1 asm ("x1") = v; asm volatile(ARM64_LSE_ATOMIC_INSN( @@ -309,9 +309,9 @@ static inline void arch_atomic64_and(long i, atomic64_t *v) } #define ATOMIC64_FETCH_OP_AND(name, mb, cl...) \ -static inline long arch_atomic64_fetch_and##name(long i, atomic64_t *v) \ +static inline s64 arch_atomic64_fetch_and##name(s64 i, atomic64_t *v) \ { \ - register long x0 asm ("x0") = i; \ + register s64 x0 asm ("x0") = i; \ register atomic64_t *x1 asm ("x1") = v; \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ @@ -335,9 +335,9 @@ ATOMIC64_FETCH_OP_AND( , al, "memory") #undef ATOMIC64_FETCH_OP_AND -static inline void arch_atomic64_sub(long i, atomic64_t *v) +static inline void arch_atomic64_sub(s64 i, atomic64_t *v) { - register long x0 asm ("x0") = i; + register s64 x0 asm ("x0") = i; register atomic64_t *x1 asm ("x1") = v; asm volatile(ARM64_LSE_ATOMIC_INSN( @@ -353,9 +353,9 @@ static inline void arch_atomic64_sub(long i, atomic64_t *v) } #define ATOMIC64_OP_SUB_RETURN(name, mb, cl...) \ -static inline long arch_atomic64_sub_return##name(long i, atomic64_t *v)\ +static inline s64 arch_atomic64_sub_return##name(s64 i, atomic64_t *v) \ { \ - register long x0 asm ("x0") = i; \ + register s64 x0 asm ("x0") = i; \ register atomic64_t *x1 asm ("x1") = v; \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ @@ -381,9 +381,9 @@ ATOMIC64_OP_SUB_RETURN( , al, "memory") #undef ATOMIC64_OP_SUB_RETURN #define ATOMIC64_FETCH_OP_SUB(name, mb, cl...) \ -static inline long arch_atomic64_fetch_sub##name(long i, atomic64_t *v) \ +static inline s64 arch_atomic64_fetch_sub##name(s64 i, atomic64_t *v) \ { \ - register long x0 asm ("x0") = i; \ + register s64 x0 asm ("x0") = i; \ register atomic64_t *x1 asm ("x1") = v; \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ @@ -407,7 +407,7 @@ ATOMIC64_FETCH_OP_SUB( , al, "memory") #undef ATOMIC64_FETCH_OP_SUB -static inline long arch_atomic64_dec_if_positive(atomic64_t *v) +static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v) { register long x0 asm ("x0") = (long)v; diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index a05db636981a..64eeaa41e7ca 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -80,12 +80,15 @@ static inline u32 cache_type_cwg(void) #define __read_mostly __attribute__((__section__(".data..read_mostly"))) -static inline int cache_line_size(void) +static inline int cache_line_size_of_cpu(void) { u32 cwg = cache_type_cwg(); + return cwg ? 4 << cwg : ARCH_DMA_MINALIGN; } +int cache_line_size(void); + /* * Read the effective value of CTR_EL0. * diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 1fe4467442aa..665c78e0665a 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -176,4 +176,7 @@ static inline void flush_cache_vunmap(unsigned long start, unsigned long end) int set_memory_valid(unsigned long addr, int numpages, int enable); +int set_direct_map_invalid_noflush(struct page *page); +int set_direct_map_default_noflush(struct page *page); + #endif diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 373799b7982f..3d8db50d9ae2 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -614,6 +614,12 @@ static inline bool system_uses_irq_prio_masking(void) cpus_have_const_cap(ARM64_HAS_IRQ_PRIO_MASKING); } +static inline bool system_has_prio_mask_debugging(void) +{ + return IS_ENABLED(CONFIG_ARM64_DEBUG_PRIORITY_MASKING) && + system_uses_irq_prio_masking(); +} + #define ARM64_SSBD_UNKNOWN -1 #define ARM64_SSBD_FORCE_DISABLE 0 #define ARM64_SSBD_KERNEL 1 diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index 6dd8a8723525..987926ed535e 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -7,6 +7,7 @@ #include <linux/irqflags.h> +#include <asm/arch_gicv3.h> #include <asm/cpufeature.h> #define DAIF_PROCCTX 0 @@ -16,11 +17,20 @@ /* mask/save/unmask/restore all exceptions, including interrupts. */ static inline void local_daif_mask(void) { + WARN_ON(system_has_prio_mask_debugging() && + (read_sysreg_s(SYS_ICC_PMR_EL1) == (GIC_PRIO_IRQOFF | + GIC_PRIO_PSR_I_SET))); + asm volatile( "msr daifset, #0xf // local_daif_mask\n" : : : "memory"); + + /* Don't really care for a dsb here, we don't intend to enable IRQs */ + if (system_uses_irq_prio_masking()) + gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); + trace_hardirqs_off(); } @@ -32,7 +42,7 @@ static inline unsigned long local_daif_save(void) if (system_uses_irq_prio_masking()) { /* If IRQs are masked with PMR, reflect it in the flags */ - if (read_sysreg_s(SYS_ICC_PMR_EL1) <= GIC_PRIO_IRQOFF) + if (read_sysreg_s(SYS_ICC_PMR_EL1) != GIC_PRIO_IRQON) flags |= PSR_I_BIT; } @@ -45,39 +55,50 @@ static inline void local_daif_restore(unsigned long flags) { bool irq_disabled = flags & PSR_I_BIT; + WARN_ON(system_has_prio_mask_debugging() && + !(read_sysreg(daif) & PSR_I_BIT)); + if (!irq_disabled) { trace_hardirqs_on(); - if (system_uses_irq_prio_masking()) - arch_local_irq_enable(); - } else if (!(flags & PSR_A_BIT)) { - /* - * If interrupts are disabled but we can take - * asynchronous errors, we can take NMIs - */ if (system_uses_irq_prio_masking()) { - flags &= ~PSR_I_BIT; + gic_write_pmr(GIC_PRIO_IRQON); + dsb(sy); + } + } else if (system_uses_irq_prio_masking()) { + u64 pmr; + + if (!(flags & PSR_A_BIT)) { /* - * There has been concern that the write to daif - * might be reordered before this write to PMR. - * From the ARM ARM DDI 0487D.a, section D1.7.1 - * "Accessing PSTATE fields": - * Writes to the PSTATE fields have side-effects on - * various aspects of the PE operation. All of these - * side-effects are guaranteed: - * - Not to be visible to earlier instructions in - * the execution stream. - * - To be visible to later instructions in the - * execution stream - * - * Also, writes to PMR are self-synchronizing, so no - * interrupts with a lower priority than PMR is signaled - * to the PE after the write. - * - * So we don't need additional synchronization here. + * If interrupts are disabled but we can take + * asynchronous errors, we can take NMIs */ - arch_local_irq_disable(); + flags &= ~PSR_I_BIT; + pmr = GIC_PRIO_IRQOFF; + } else { + pmr = GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET; } + + /* + * There has been concern that the write to daif + * might be reordered before this write to PMR. + * From the ARM ARM DDI 0487D.a, section D1.7.1 + * "Accessing PSTATE fields": + * Writes to the PSTATE fields have side-effects on + * various aspects of the PE operation. All of these + * side-effects are guaranteed: + * - Not to be visible to earlier instructions in + * the execution stream. + * - To be visible to later instructions in the + * execution stream + * + * Also, writes to PMR are self-synchronizing, so no + * interrupts with a lower priority than PMR is signaled + * to the PE after the write. + * + * So we don't need additional synchronization here. + */ + gic_write_pmr(pmr); } write_sysreg(flags, daif); diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index 325d9515c0f8..3c7037c6ba9b 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -202,7 +202,21 @@ typedef compat_elf_greg_t compat_elf_gregset_t[COMPAT_ELF_NGREG]; ({ \ set_thread_flag(TIF_32BIT); \ }) +#ifdef CONFIG_GENERIC_COMPAT_VDSO +#define COMPAT_ARCH_DLINFO \ +do { \ + /* \ + * Note that we use Elf64_Off instead of elf_addr_t because \ + * elf_addr_t in compat is defined as Elf32_Addr and casting \ + * current->mm->context.vdso to it triggers a cast warning of \ + * cast from pointer to integer of different size. \ + */ \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, \ + (Elf64_Off)current->mm->context.vdso); \ +} while (0) +#else #define COMPAT_ARCH_DLINFO +#endif extern int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp); #define compat_arch_setup_additional_pages \ diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 897029c8e9b5..b6a2c352f4c3 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -37,8 +37,6 @@ struct task_struct; extern void fpsimd_save_state(struct user_fpsimd_state *state); extern void fpsimd_load_state(struct user_fpsimd_state *state); -extern void fpsimd_save(void); - extern void fpsimd_thread_switch(struct task_struct *next); extern void fpsimd_flush_thread(void); @@ -52,8 +50,7 @@ extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state, void *sve_state, unsigned int sve_vl); extern void fpsimd_flush_task_state(struct task_struct *target); -extern void fpsimd_flush_cpu_state(void); -extern void sve_flush_cpu_state(void); +extern void fpsimd_save_and_flush_cpu_state(void); /* Maximum VL that SVE VL-agnostic software can transparently support */ #define SVE_VL_ARCH_MAX 0x100 diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index e5d9420cd258..3d2f2472a36c 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -84,6 +84,8 @@ #define KERNEL_HWCAP_SVEBITPERM __khwcap2_feature(SVEBITPERM) #define KERNEL_HWCAP_SVESHA3 __khwcap2_feature(SVESHA3) #define KERNEL_HWCAP_SVESM4 __khwcap2_feature(SVESM4) +#define KERNEL_HWCAP_FLAGM2 __khwcap2_feature(FLAGM2) +#define KERNEL_HWCAP_FRINT __khwcap2_feature(FRINT) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h index 66853fde60f9..7872f260c9ee 100644 --- a/arch/arm64/include/asm/irqflags.h +++ b/arch/arm64/include/asm/irqflags.h @@ -29,6 +29,12 @@ */ static inline void arch_local_irq_enable(void) { + if (system_has_prio_mask_debugging()) { + u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1); + + WARN_ON_ONCE(pmr != GIC_PRIO_IRQON && pmr != GIC_PRIO_IRQOFF); + } + asm volatile(ALTERNATIVE( "msr daifclr, #2 // arch_local_irq_enable\n" "nop", @@ -42,6 +48,12 @@ static inline void arch_local_irq_enable(void) static inline void arch_local_irq_disable(void) { + if (system_has_prio_mask_debugging()) { + u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1); + + WARN_ON_ONCE(pmr != GIC_PRIO_IRQON && pmr != GIC_PRIO_IRQOFF); + } + asm volatile(ALTERNATIVE( "msr daifset, #2 // arch_local_irq_disable", __msr_s(SYS_ICC_PMR_EL1, "%0"), @@ -56,43 +68,46 @@ static inline void arch_local_irq_disable(void) */ static inline unsigned long arch_local_save_flags(void) { - unsigned long daif_bits; unsigned long flags; - daif_bits = read_sysreg(daif); - - /* - * The asm is logically equivalent to: - * - * if (system_uses_irq_prio_masking()) - * flags = (daif_bits & PSR_I_BIT) ? - * GIC_PRIO_IRQOFF : - * read_sysreg_s(SYS_ICC_PMR_EL1); - * else - * flags = daif_bits; - */ asm volatile(ALTERNATIVE( - "mov %0, %1\n" - "nop\n" - "nop", - __mrs_s("%0", SYS_ICC_PMR_EL1) - "ands %1, %1, " __stringify(PSR_I_BIT) "\n" - "csel %0, %0, %2, eq", - ARM64_HAS_IRQ_PRIO_MASKING) - : "=&r" (flags), "+r" (daif_bits) - : "r" ((unsigned long) GIC_PRIO_IRQOFF) + "mrs %0, daif", + __mrs_s("%0", SYS_ICC_PMR_EL1), + ARM64_HAS_IRQ_PRIO_MASKING) + : "=&r" (flags) + : : "memory"); return flags; } +static inline int arch_irqs_disabled_flags(unsigned long flags) +{ + int res; + + asm volatile(ALTERNATIVE( + "and %w0, %w1, #" __stringify(PSR_I_BIT), + "eor %w0, %w1, #" __stringify(GIC_PRIO_IRQON), + ARM64_HAS_IRQ_PRIO_MASKING) + : "=&r" (res) + : "r" ((int) flags) + : "memory"); + + return res; +} + static inline unsigned long arch_local_irq_save(void) { unsigned long flags; flags = arch_local_save_flags(); - arch_local_irq_disable(); + /* + * There are too many states with IRQs disabled, just keep the current + * state if interrupts are already disabled/masked. + */ + if (!arch_irqs_disabled_flags(flags)) + arch_local_irq_disable(); return flags; } @@ -108,26 +123,10 @@ static inline void arch_local_irq_restore(unsigned long flags) __msr_s(SYS_ICC_PMR_EL1, "%0") "dsb sy", ARM64_HAS_IRQ_PRIO_MASKING) - : "+r" (flags) : + : "r" (flags) : "memory"); } -static inline int arch_irqs_disabled_flags(unsigned long flags) -{ - int res; - - asm volatile(ALTERNATIVE( - "and %w0, %w1, #" __stringify(PSR_I_BIT) "\n" - "nop", - "cmp %w1, #" __stringify(GIC_PRIO_IRQOFF) "\n" - "cset %w0, ls", - ARM64_HAS_IRQ_PRIO_MASKING) - : "=&r" (res) - : "r" ((int) flags) - : "memory"); - - return res; -} #endif #endif diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index c328191aa202..9f19c354b165 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -597,11 +597,12 @@ static inline void kvm_arm_vhe_guest_enter(void) * will not signal the CPU of interrupts of lower priority, and the * only way to get out will be via guest exceptions. * Naturally, we want to avoid this. + * + * local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a + * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU. */ - if (system_uses_irq_prio_masking()) { - gic_write_pmr(GIC_PRIO_IRQON); + if (system_uses_irq_prio_masking()) dsb(sy); - } } static inline void kvm_arm_vhe_guest_exit(void) diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 30e5e67749e5..db92950bb1a0 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -115,7 +115,6 @@ * Level 2 descriptor (PMD). */ #define PMD_TYPE_MASK (_AT(pmdval_t, 3) << 0) -#define PMD_TYPE_FAULT (_AT(pmdval_t, 0) << 0) #define PMD_TYPE_TABLE (_AT(pmdval_t, 3) << 0) #define PMD_TYPE_SECT (_AT(pmdval_t, 1) << 0) #define PMD_TABLE_BIT (_AT(pmdval_t, 1) << 1) @@ -142,8 +141,8 @@ /* * Level 3 descriptor (PTE). */ +#define PTE_VALID (_AT(pteval_t, 1) << 0) #define PTE_TYPE_MASK (_AT(pteval_t, 3) << 0) -#define PTE_TYPE_FAULT (_AT(pteval_t, 0) << 0) #define PTE_TYPE_PAGE (_AT(pteval_t, 3) << 0) #define PTE_TABLE_BIT (_AT(pteval_t, 1) << 1) #define PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */ diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index c81583be034b..f318258a14be 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -13,7 +13,6 @@ /* * Software defined PTE bits definition. */ -#define PTE_VALID (_AT(pteval_t, 1) << 0) #define PTE_WRITE (PTE_DBM) /* same as DBM (51) */ #define PTE_DIRTY (_AT(pteval_t, 1) << 55) #define PTE_SPECIAL (_AT(pteval_t, 1) << 56) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index fca26759081a..3052381baaeb 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -235,29 +235,42 @@ extern void __sync_icache_dcache(pte_t pteval); * * PTE_DIRTY || (PTE_WRITE && !PTE_RDONLY) */ -static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte) + +static inline void __check_racy_pte_update(struct mm_struct *mm, pte_t *ptep, + pte_t pte) { pte_t old_pte; - if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte)) - __sync_icache_dcache(pte); + if (!IS_ENABLED(CONFIG_DEBUG_VM)) + return; + + old_pte = READ_ONCE(*ptep); + + if (!pte_valid(old_pte) || !pte_valid(pte)) + return; + if (mm != current->active_mm && atomic_read(&mm->mm_users) <= 1) + return; /* - * If the existing pte is valid, check for potential race with - * hardware updates of the pte (ptep_set_access_flags safely changes - * valid ptes without going through an invalid entry). + * Check for potential race with hardware updates of the pte + * (ptep_set_access_flags safely changes valid ptes without going + * through an invalid entry). */ - old_pte = READ_ONCE(*ptep); - if (IS_ENABLED(CONFIG_DEBUG_VM) && pte_valid(old_pte) && pte_valid(pte) && - (mm == current->active_mm || atomic_read(&mm->mm_users) > 1)) { - VM_WARN_ONCE(!pte_young(pte), - "%s: racy access flag clearing: 0x%016llx -> 0x%016llx", - __func__, pte_val(old_pte), pte_val(pte)); - VM_WARN_ONCE(pte_write(old_pte) && !pte_dirty(pte), - "%s: racy dirty state clearing: 0x%016llx -> 0x%016llx", - __func__, pte_val(old_pte), pte_val(pte)); - } + VM_WARN_ONCE(!pte_young(pte), + "%s: racy access flag clearing: 0x%016llx -> 0x%016llx", + __func__, pte_val(old_pte), pte_val(pte)); + VM_WARN_ONCE(pte_write(old_pte) && !pte_dirty(pte), + "%s: racy dirty state clearing: 0x%016llx -> 0x%016llx", + __func__, pte_val(old_pte), pte_val(pte)); +} + +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte)) + __sync_icache_dcache(pte); + + __check_racy_pte_update(mm, ptep, pte); set_pte(ptep, pte); } @@ -324,9 +337,14 @@ static inline pmd_t pte_pmd(pte_t pte) return __pmd(pte_val(pte)); } -static inline pgprot_t mk_sect_prot(pgprot_t prot) +static inline pgprot_t mk_pud_sect_prot(pgprot_t prot) +{ + return __pgprot((pgprot_val(prot) & ~PUD_TABLE_BIT) | PUD_TYPE_SECT); +} + +static inline pgprot_t mk_pmd_sect_prot(pgprot_t prot) { - return __pgprot(pgprot_val(prot) & ~PTE_TABLE_BIT); + return __pgprot((pgprot_val(prot) & ~PMD_TABLE_BIT) | PMD_TYPE_SECT); } #ifdef CONFIG_NUMA_BALANCING diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index dad858b6adc6..81693244f58d 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -24,9 +24,15 @@ * means masking more IRQs (or at least that the same IRQs remain masked). * * To mask interrupts, we clear the most significant bit of PMR. + * + * Some code sections either automatically switch back to PSR.I or explicitly + * require to not use priority masking. If bit GIC_PRIO_PSR_I_SET is included + * in the the priority mask, it indicates that PSR.I should be set and + * interrupt disabling temporarily does not rely on IRQ priorities. */ -#define GIC_PRIO_IRQON 0xf0 -#define GIC_PRIO_IRQOFF (GIC_PRIO_IRQON & ~0x80) +#define GIC_PRIO_IRQON 0xc0 +#define GIC_PRIO_IRQOFF (GIC_PRIO_IRQON & ~0x80) +#define GIC_PRIO_PSR_I_SET (1 << 4) /* Additional SPSR bits not exposed in the UABI */ #define PSR_IL_BIT (1 << 20) diff --git a/arch/arm64/include/asm/signal32.h b/arch/arm64/include/asm/signal32.h index 0418c67f2b8b..bd43d1cf724b 100644 --- a/arch/arm64/include/asm/signal32.h +++ b/arch/arm64/include/asm/signal32.h @@ -9,6 +9,52 @@ #ifdef CONFIG_COMPAT #include <linux/compat.h> +struct compat_sigcontext { + /* We always set these two fields to 0 */ + compat_ulong_t trap_no; + compat_ulong_t error_code; + + compat_ulong_t oldmask; + compat_ulong_t arm_r0; + compat_ulong_t arm_r1; + compat_ulong_t arm_r2; + compat_ulong_t arm_r3; + compat_ulong_t arm_r4; + compat_ulong_t arm_r5; + compat_ulong_t arm_r6; + compat_ulong_t arm_r7; + compat_ulong_t arm_r8; + compat_ulong_t arm_r9; + compat_ulong_t arm_r10; + compat_ulong_t arm_fp; + compat_ulong_t arm_ip; + compat_ulong_t arm_sp; + compat_ulong_t arm_lr; + compat_ulong_t arm_pc; + compat_ulong_t arm_cpsr; + compat_ulong_t fault_address; +}; + +struct compat_ucontext { + compat_ulong_t uc_flags; + compat_uptr_t uc_link; + compat_stack_t uc_stack; + struct compat_sigcontext uc_mcontext; + compat_sigset_t uc_sigmask; + int __unused[32 - (sizeof(compat_sigset_t) / sizeof(int))]; + compat_ulong_t uc_regspace[128] __attribute__((__aligned__(8))); +}; + +struct compat_sigframe { + struct compat_ucontext uc; + compat_ulong_t retcode[2]; +}; + +struct compat_rt_sigframe { + struct compat_siginfo info; + struct compat_sigframe sig; +}; + int compat_setup_frame(int usig, struct ksignal *ksig, sigset_t *set, struct pt_regs *regs); int compat_setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set, diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h index 7e245b9e03a5..7434844036d3 100644 --- a/arch/arm64/include/asm/simd.h +++ b/arch/arm64/include/asm/simd.h @@ -12,9 +12,9 @@ #include <linux/preempt.h> #include <linux/types.h> -#ifdef CONFIG_KERNEL_MODE_NEON +DECLARE_PER_CPU(bool, fpsimd_context_busy); -DECLARE_PER_CPU(bool, kernel_neon_busy); +#ifdef CONFIG_KERNEL_MODE_NEON /* * may_use_simd - whether it is allowable at this time to issue SIMD @@ -26,15 +26,15 @@ DECLARE_PER_CPU(bool, kernel_neon_busy); static __must_check inline bool may_use_simd(void) { /* - * kernel_neon_busy is only set while preemption is disabled, + * fpsimd_context_busy is only set while preemption is disabled, * and is clear whenever preemption is enabled. Since - * this_cpu_read() is atomic w.r.t. preemption, kernel_neon_busy + * this_cpu_read() is atomic w.r.t. preemption, fpsimd_context_busy * cannot change under our feet -- if it's set we cannot be * migrated, and if it's clear we cannot be migrated to a CPU * where it is set. */ return !in_irq() && !irqs_disabled() && !in_nmi() && - !this_cpu_read(kernel_neon_busy); + !this_cpu_read(fpsimd_context_busy); } #else /* ! CONFIG_KERNEL_MODE_NEON */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index cd7f7ce1a56a..d0bd4ffcf2c4 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -549,6 +549,7 @@ /* id_aa64isar1 */ #define ID_AA64ISAR1_SB_SHIFT 36 +#define ID_AA64ISAR1_FRINTTS_SHIFT 32 #define ID_AA64ISAR1_GPI_SHIFT 28 #define ID_AA64ISAR1_GPA_SHIFT 24 #define ID_AA64ISAR1_LRCPC_SHIFT 20 diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 2372e97db29c..180b34ec5965 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -65,6 +65,7 @@ void arch_release_task_struct(struct task_struct *tsk); * TIF_SYSCALL_TRACEPOINT - syscall tracepoint for ftrace * TIF_SYSCALL_AUDIT - syscall auditing * TIF_SECCOMP - syscall secure computing + * TIF_SYSCALL_EMU - syscall emulation active * TIF_SIGPENDING - signal pending * TIF_NEED_RESCHED - rescheduling necessary * TIF_NOTIFY_RESUME - callback before returning to user @@ -80,6 +81,7 @@ void arch_release_task_struct(struct task_struct *tsk); #define TIF_SYSCALL_AUDIT 9 #define TIF_SYSCALL_TRACEPOINT 10 #define TIF_SECCOMP 11 +#define TIF_SYSCALL_EMU 12 #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ #define TIF_FREEZE 19 #define TIF_RESTORE_SIGMASK 20 @@ -98,6 +100,7 @@ void arch_release_task_struct(struct task_struct *tsk); #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_FSCHECK (1 << TIF_FSCHECK) #define _TIF_32BIT (1 << TIF_32BIT) @@ -109,7 +112,7 @@ void arch_release_task_struct(struct task_struct *tsk); #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ - _TIF_NOHZ) + _TIF_NOHZ | _TIF_SYSCALL_EMU) #define INIT_THREAD_INFO(tsk) \ { \ diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index c9f8dd421c5f..2a23614198f1 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -22,8 +22,13 @@ #define __NR_compat_exit 1 #define __NR_compat_read 3 #define __NR_compat_write 4 +#define __NR_compat_gettimeofday 78 #define __NR_compat_sigreturn 119 #define __NR_compat_rt_sigreturn 173 +#define __NR_compat_clock_getres 247 +#define __NR_compat_clock_gettime 263 +#define __NR_compat_clock_gettime64 403 +#define __NR_compat_clock_getres_time64 406 /* * The following SVCs are ARM private. diff --git a/arch/arm64/include/asm/vdso.h b/arch/arm64/include/asm/vdso.h index 1f94ec19903c..9c15e0a06301 100644 --- a/arch/arm64/include/asm/vdso.h +++ b/arch/arm64/include/asm/vdso.h @@ -17,6 +17,9 @@ #ifndef __ASSEMBLY__ #include <generated/vdso-offsets.h> +#ifdef CONFIG_COMPAT_VDSO +#include <generated/vdso32-offsets.h> +#endif #define VDSO_SYMBOL(base, name) \ ({ \ diff --git a/arch/arm64/include/asm/vdso/compat_barrier.h b/arch/arm64/include/asm/vdso/compat_barrier.h new file mode 100644 index 000000000000..fb60a88b5ed4 --- /dev/null +++ b/arch/arm64/include/asm/vdso/compat_barrier.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2018 ARM Limited + */ +#ifndef __COMPAT_BARRIER_H +#define __COMPAT_BARRIER_H + +#ifndef __ASSEMBLY__ +/* + * Warning: This code is meant to be used with + * ENABLE_COMPAT_VDSO only. + */ +#ifndef ENABLE_COMPAT_VDSO +#error This header is meant to be used with ENABLE_COMPAT_VDSO only +#endif + +#ifdef dmb +#undef dmb +#endif + +#define dmb(option) __asm__ __volatile__ ("dmb " #option : : : "memory") + +#if __LINUX_ARM_ARCH__ >= 8 +#define aarch32_smp_mb() dmb(ish) +#define aarch32_smp_rmb() dmb(ishld) +#define aarch32_smp_wmb() dmb(ishst) +#else +#define aarch32_smp_mb() dmb(ish) +#define aarch32_smp_rmb() aarch32_smp_mb() +#define aarch32_smp_wmb() dmb(ishst) +#endif + + +#undef smp_mb +#undef smp_rmb +#undef smp_wmb + +#define smp_mb() aarch32_smp_mb() +#define smp_rmb() aarch32_smp_rmb() +#define smp_wmb() aarch32_smp_wmb() + +#endif /* !__ASSEMBLY__ */ + +#endif /* __COMPAT_BARRIER_H */ diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h new file mode 100644 index 000000000000..f4812777f5c5 --- /dev/null +++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h @@ -0,0 +1,126 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2018 ARM Limited + */ +#ifndef __ASM_VDSO_GETTIMEOFDAY_H +#define __ASM_VDSO_GETTIMEOFDAY_H + +#ifndef __ASSEMBLY__ + +#include <asm/unistd.h> +#include <uapi/linux/time.h> + +#include <asm/vdso/compat_barrier.h> + +#define __VDSO_USE_SYSCALL ULLONG_MAX + +#define VDSO_HAS_CLOCK_GETRES 1 + +static __always_inline +int gettimeofday_fallback(struct __kernel_old_timeval *_tv, + struct timezone *_tz) +{ + register struct timezone *tz asm("r1") = _tz; + register struct __kernel_old_timeval *tv asm("r0") = _tv; + register long ret asm ("r0"); + register long nr asm("r7") = __NR_compat_gettimeofday; + + asm volatile( + " swi #0\n" + : "=r" (ret) + : "r" (tv), "r" (tz), "r" (nr) + : "memory"); + + return ret; +} + +static __always_inline +long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + register struct __kernel_timespec *ts asm("r1") = _ts; + register clockid_t clkid asm("r0") = _clkid; + register long ret asm ("r0"); + register long nr asm("r7") = __NR_compat_clock_gettime64; + + asm volatile( + " swi #0\n" + : "=r" (ret) + : "r" (clkid), "r" (ts), "r" (nr) + : "memory"); + + return ret; +} + +static __always_inline +int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + register struct __kernel_timespec *ts asm("r1") = _ts; + register clockid_t clkid asm("r0") = _clkid; + register long ret asm ("r0"); + register long nr asm("r7") = __NR_compat_clock_getres_time64; + + /* The checks below are required for ABI consistency with arm */ + if ((_clkid >= MAX_CLOCKS) && (_ts == NULL)) + return -EINVAL; + + asm volatile( + " swi #0\n" + : "=r" (ret) + : "r" (clkid), "r" (ts), "r" (nr) + : "memory"); + + return ret; +} + +static __always_inline u64 __arch_get_hw_counter(s32 clock_mode) +{ + u64 res; + + /* + * clock_mode == 0 implies that vDSO are enabled otherwise + * fallback on syscall. + */ + if (clock_mode) + return __VDSO_USE_SYSCALL; + + /* + * This isb() is required to prevent that the counter value + * is speculated. + */ + isb(); + asm volatile("mrrc p15, 1, %Q0, %R0, c14" : "=r" (res)); + /* + * This isb() is required to prevent that the seq lock is + * speculated. + */ + isb(); + + return res; +} + +static __always_inline const struct vdso_data *__arch_get_vdso_data(void) +{ + const struct vdso_data *ret; + + /* + * This simply puts &_vdso_data into ret. The reason why we don't use + * `ret = _vdso_data` is that the compiler tends to optimise this in a + * very suboptimal way: instead of keeping &_vdso_data in a register, + * it goes through a relocation almost every time _vdso_data must be + * accessed (even in subfunctions). This is both time and space + * consuming: each relocation uses a word in the code section, and it + * has to be loaded at runtime. + * + * This trick hides the assignment from the compiler. Since it cannot + * track where the pointer comes from, it will only use one relocation + * where __arch_get_vdso_data() is called, and then keep the result in + * a register. + */ + asm volatile("mov %0, %1" : "=r"(ret) : "r"(_vdso_data)); + + return ret; +} + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h new file mode 100644 index 000000000000..b08f476b72b4 --- /dev/null +++ b/arch/arm64/include/asm/vdso/gettimeofday.h @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2018 ARM Limited + */ +#ifndef __ASM_VDSO_GETTIMEOFDAY_H +#define __ASM_VDSO_GETTIMEOFDAY_H + +#ifndef __ASSEMBLY__ + +#include <asm/unistd.h> +#include <uapi/linux/time.h> + +#define __VDSO_USE_SYSCALL ULLONG_MAX + +#define VDSO_HAS_CLOCK_GETRES 1 + +static __always_inline +int gettimeofday_fallback(struct __kernel_old_timeval *_tv, + struct timezone *_tz) +{ + register struct timezone *tz asm("x1") = _tz; + register struct __kernel_old_timeval *tv asm("x0") = _tv; + register long ret asm ("x0"); + register long nr asm("x8") = __NR_gettimeofday; + + asm volatile( + " svc #0\n" + : "=r" (ret) + : "r" (tv), "r" (tz), "r" (nr) + : "memory"); + + return ret; +} + +static __always_inline +long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + register struct __kernel_timespec *ts asm("x1") = _ts; + register clockid_t clkid asm("x0") = _clkid; + register long ret asm ("x0"); + register long nr asm("x8") = __NR_clock_gettime; + + asm volatile( + " svc #0\n" + : "=r" (ret) + : "r" (clkid), "r" (ts), "r" (nr) + : "memory"); + + return ret; +} + +static __always_inline +int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + register struct __kernel_timespec *ts asm("x1") = _ts; + register clockid_t clkid asm("x0") = _clkid; + register long ret asm ("x0"); + register long nr asm("x8") = __NR_clock_getres; + + asm volatile( + " svc #0\n" + : "=r" (ret) + : "r" (clkid), "r" (ts), "r" (nr) + : "memory"); + + return ret; +} + +static __always_inline u64 __arch_get_hw_counter(s32 clock_mode) +{ + u64 res; + + /* + * clock_mode == 0 implies that vDSO are enabled otherwise + * fallback on syscall. + */ + if (clock_mode) + return __VDSO_USE_SYSCALL; + + /* + * This isb() is required to prevent that the counter value + * is speculated. + */ + isb(); + asm volatile("mrs %0, cntvct_el0" : "=r" (res) :: "memory"); + /* + * This isb() is required to prevent that the seq lock is + * speculated.# + */ + isb(); + + return res; +} + +static __always_inline +const struct vdso_data *__arch_get_vdso_data(void) +{ + return _vdso_data; +} + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h new file mode 100644 index 000000000000..0c731bfc7c8c --- /dev/null +++ b/arch/arm64/include/asm/vdso/vsyscall.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_VDSO_VSYSCALL_H +#define __ASM_VDSO_VSYSCALL_H + +#ifndef __ASSEMBLY__ + +#include <linux/timekeeper_internal.h> +#include <vdso/datapage.h> + +#define VDSO_PRECISION_MASK ~(0xFF00ULL<<48) + +extern struct vdso_data *vdso_data; + +/* + * Update the vDSO data page to keep in sync with kernel timekeeping. + */ +static __always_inline +struct vdso_data *__arm64_get_k_vdso_data(void) +{ + return vdso_data; +} +#define __arch_get_k_vdso_data __arm64_get_k_vdso_data + +static __always_inline +int __arm64_get_clock_mode(struct timekeeper *tk) +{ + u32 use_syscall = !tk->tkr_mono.clock->archdata.vdso_direct; + + return use_syscall; +} +#define __arch_get_clock_mode __arm64_get_clock_mode + +static __always_inline +int __arm64_use_vsyscall(struct vdso_data *vdata) +{ + return !vdata[CS_HRES_COARSE].clock_mode; +} +#define __arch_use_vsyscall __arm64_use_vsyscall + +static __always_inline +void __arm64_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk) +{ + vdata[CS_HRES_COARSE].mask = VDSO_PRECISION_MASK; + vdata[CS_RAW].mask = VDSO_PRECISION_MASK; +} +#define __arch_update_vsyscall __arm64_update_vsyscall + +/* The asm-generic header needs to be included after the definitions above */ +#include <asm-generic/vdso/vsyscall.h> + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_VSYSCALL_H */ diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 1a772b162191..a1e72886b30c 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -63,5 +63,7 @@ #define HWCAP2_SVEBITPERM (1 << 4) #define HWCAP2_SVESHA3 (1 << 5) #define HWCAP2_SVESM4 (1 << 6) +#define HWCAP2_FLAGM2 (1 << 7) +#define HWCAP2_FRINT (1 << 8) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index e932284993d4..7ed9294e2004 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -62,6 +62,9 @@ #define PSR_x 0x0000ff00 /* Extension */ #define PSR_c 0x000000ff /* Control */ +/* syscall emulation path in ptrace */ +#define PTRACE_SYSEMU 31 +#define PTRACE_SYSEMU_SINGLESTEP 32 #ifndef __ASSEMBLY__ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 9e7dcb2c31c7..478491f07b4f 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -28,7 +28,10 @@ $(obj)/%.stub.o: $(obj)/%.o FORCE $(call if_changed,objcopy) obj-$(CONFIG_COMPAT) += sys32.o signal32.o \ - sigreturn32.o sys_compat.o + sys_compat.o +ifneq ($(CONFIG_COMPAT_VDSO), y) +obj-$(CONFIG_COMPAT) += sigreturn32.o +endif obj-$(CONFIG_KUSER_HELPERS) += kuser32.o obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o obj-$(CONFIG_MODULES) += module.o @@ -62,6 +65,7 @@ obj-$(CONFIG_ARM64_SSBD) += ssbd.o obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o obj-y += vdso/ probes/ +obj-$(CONFIG_COMPAT_VDSO) += vdso32/ head-y := head.o extra-y += $(head-y) vmlinux.lds diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index 2804330c95dc..3a58e9db5cfe 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -152,10 +152,14 @@ static int __init acpi_fadt_sanity_check(void) */ if (table->revision < 5 || (table->revision == 5 && fadt->minor_revision < 1)) { - pr_err("Unsupported FADT revision %d.%d, should be 5.1+\n", + pr_err(FW_BUG "Unsupported FADT revision %d.%d, should be 5.1+\n", table->revision, fadt->minor_revision); - ret = -EINVAL; - goto out; + + if (!fadt->arm_boot_flags) { + ret = -EINVAL; + goto out; + } + pr_err("FADT has ARM boot flags set, assuming 5.1\n"); } if (!(fadt->flags & ACPI_FADT_HW_REDUCED)) { diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 02f08768c298..214685760e1c 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -18,9 +18,9 @@ #include <asm/fixmap.h> #include <asm/thread_info.h> #include <asm/memory.h> +#include <asm/signal32.h> #include <asm/smp_plat.h> #include <asm/suspend.h> -#include <asm/vdso_datapage.h> #include <linux/kbuild.h> #include <linux/arm-smccc.h> @@ -66,6 +66,11 @@ int main(void) DEFINE(S_STACKFRAME, offsetof(struct pt_regs, stackframe)); DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs)); BLANK(); +#ifdef CONFIG_COMPAT + DEFINE(COMPAT_SIGFRAME_REGS_OFFSET, offsetof(struct compat_sigframe, uc.uc_mcontext.arm_r0)); + DEFINE(COMPAT_RT_SIGFRAME_REGS_OFFSET, offsetof(struct compat_rt_sigframe, sig.uc.uc_mcontext.arm_r0)); + BLANK(); +#endif DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id.counter)); BLANK(); DEFINE(VMA_VM_MM, offsetof(struct vm_area_struct, vm_mm)); @@ -80,33 +85,6 @@ int main(void) BLANK(); DEFINE(PREEMPT_DISABLE_OFFSET, PREEMPT_DISABLE_OFFSET); BLANK(); - DEFINE(CLOCK_REALTIME, CLOCK_REALTIME); - DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC); - DEFINE(CLOCK_MONOTONIC_RAW, CLOCK_MONOTONIC_RAW); - DEFINE(CLOCK_REALTIME_RES, offsetof(struct vdso_data, hrtimer_res)); - DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE); - DEFINE(CLOCK_MONOTONIC_COARSE,CLOCK_MONOTONIC_COARSE); - DEFINE(CLOCK_COARSE_RES, LOW_RES_NSEC); - DEFINE(NSEC_PER_SEC, NSEC_PER_SEC); - BLANK(); - DEFINE(VDSO_CS_CYCLE_LAST, offsetof(struct vdso_data, cs_cycle_last)); - DEFINE(VDSO_RAW_TIME_SEC, offsetof(struct vdso_data, raw_time_sec)); - DEFINE(VDSO_XTIME_CLK_SEC, offsetof(struct vdso_data, xtime_clock_sec)); - DEFINE(VDSO_XTIME_CRS_SEC, offsetof(struct vdso_data, xtime_coarse_sec)); - DEFINE(VDSO_XTIME_CRS_NSEC, offsetof(struct vdso_data, xtime_coarse_nsec)); - DEFINE(VDSO_WTM_CLK_SEC, offsetof(struct vdso_data, wtm_clock_sec)); - DEFINE(VDSO_TB_SEQ_COUNT, offsetof(struct vdso_data, tb_seq_count)); - DEFINE(VDSO_CS_MONO_MULT, offsetof(struct vdso_data, cs_mono_mult)); - DEFINE(VDSO_CS_SHIFT, offsetof(struct vdso_data, cs_shift)); - DEFINE(VDSO_TZ_MINWEST, offsetof(struct vdso_data, tz_minuteswest)); - DEFINE(VDSO_USE_SYSCALL, offsetof(struct vdso_data, use_syscall)); - BLANK(); - DEFINE(TVAL_TV_SEC, offsetof(struct timeval, tv_sec)); - DEFINE(TSPEC_TV_SEC, offsetof(struct timespec, tv_sec)); - BLANK(); - DEFINE(TZ_MINWEST, offsetof(struct timezone, tz_minuteswest)); - DEFINE(TZ_DSTTIME, offsetof(struct timezone, tz_dsttime)); - BLANK(); DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack)); DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task)); BLANK(); diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c index 880d79904d36..7fa6828bb488 100644 --- a/arch/arm64/kernel/cacheinfo.c +++ b/arch/arm64/kernel/cacheinfo.c @@ -17,6 +17,15 @@ #define CLIDR_CTYPE(clidr, level) \ (((clidr) & CLIDR_CTYPE_MASK(level)) >> CLIDR_CTYPE_SHIFT(level)) +int cache_line_size(void) +{ + if (coherency_max_size != 0) + return coherency_max_size; + + return cache_line_size_of_cpu(); +} +EXPORT_SYMBOL_GPL(cache_line_size); + static inline enum cache_type get_cache_type(int level) { u64 clidr; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index aabdabf52fdb..f29f36a65175 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1184,14 +1184,14 @@ static struct undef_hook ssbs_emulation_hook = { static void cpu_enable_ssbs(const struct arm64_cpu_capabilities *__unused) { static bool undef_hook_registered = false; - static DEFINE_SPINLOCK(hook_lock); + static DEFINE_RAW_SPINLOCK(hook_lock); - spin_lock(&hook_lock); + raw_spin_lock(&hook_lock); if (!undef_hook_registered) { register_undef_hook(&ssbs_emulation_hook); undef_hook_registered = true; } - spin_unlock(&hook_lock); + raw_spin_unlock(&hook_lock); if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) { sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_DSSBS); @@ -1618,6 +1618,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDDP), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDFHM), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FLAGM), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD), @@ -1629,6 +1630,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FCMA), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_LRCPC), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FRINTTS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FRINT), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB), HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT), #ifdef CONFIG_ARM64_SVE diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 0593665fc7b4..876055e37352 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -82,6 +82,8 @@ static const char *const hwcap_str[] = { "svebitperm", "svesha3", "svesm4", + "flagm2", + "frint", NULL }; diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 2df8d0a1d980..9cdc4592da3e 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -247,6 +247,7 @@ alternative_else_nop_endif /* * Registers that may be useful after this macro is invoked: * + * x20 - ICC_PMR_EL1 * x21 - aborted SP * x22 - aborted PC * x23 - aborted PSTATE @@ -424,6 +425,38 @@ tsk .req x28 // current thread_info irq_stack_exit .endm +#ifdef CONFIG_ARM64_PSEUDO_NMI + /* + * Set res to 0 if irqs were unmasked in interrupted context. + * Otherwise set res to non-0 value. + */ + .macro test_irqs_unmasked res:req, pmr:req +alternative_if ARM64_HAS_IRQ_PRIO_MASKING + sub \res, \pmr, #GIC_PRIO_IRQON +alternative_else + mov \res, xzr +alternative_endif + .endm +#endif + + .macro gic_prio_kentry_setup, tmp:req +#ifdef CONFIG_ARM64_PSEUDO_NMI + alternative_if ARM64_HAS_IRQ_PRIO_MASKING + mov \tmp, #(GIC_PRIO_PSR_I_SET | GIC_PRIO_IRQON) + msr_s SYS_ICC_PMR_EL1, \tmp + alternative_else_nop_endif +#endif + .endm + + .macro gic_prio_irq_setup, pmr:req, tmp:req +#ifdef CONFIG_ARM64_PSEUDO_NMI + alternative_if ARM64_HAS_IRQ_PRIO_MASKING + orr \tmp, \pmr, #GIC_PRIO_PSR_I_SET + msr_s SYS_ICC_PMR_EL1, \tmp + alternative_else_nop_endif +#endif + .endm + .text /* @@ -602,6 +635,7 @@ el1_dbg: cmp x24, #ESR_ELx_EC_BRK64 // if BRK64 cinc x24, x24, eq // set bit '0' tbz x24, #0, el1_inv // EL1 only + gic_prio_kentry_setup tmp=x3 mrs x0, far_el1 mov x2, sp // struct pt_regs bl do_debug_exception @@ -619,20 +653,18 @@ ENDPROC(el1_sync) .align 6 el1_irq: kernel_entry 1 + gic_prio_irq_setup pmr=x20, tmp=x1 enable_da_f -#ifdef CONFIG_TRACE_IRQFLAGS + #ifdef CONFIG_ARM64_PSEUDO_NMI -alternative_if ARM64_HAS_IRQ_PRIO_MASKING - ldr x20, [sp, #S_PMR_SAVE] -alternative_else - mov x20, #GIC_PRIO_IRQON -alternative_endif - cmp x20, #GIC_PRIO_IRQOFF - /* Irqs were disabled, don't trace */ - b.ls 1f + test_irqs_unmasked res=x0, pmr=x20 + cbz x0, 1f + bl asm_nmi_enter +1: #endif + +#ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_off -1: #endif irq_handler @@ -651,14 +683,23 @@ alternative_else_nop_endif bl preempt_schedule_irq // irq en/disable is done inside 1: #endif -#ifdef CONFIG_TRACE_IRQFLAGS + #ifdef CONFIG_ARM64_PSEUDO_NMI /* - * if IRQs were disabled when we received the interrupt, we have an NMI - * and we are not re-enabling interrupt upon eret. Skip tracing. + * When using IRQ priority masking, we can get spurious interrupts while + * PMR is set to GIC_PRIO_IRQOFF. An NMI might also have occurred in a + * section with interrupts disabled. Skip tracing in those cases. */ - cmp x20, #GIC_PRIO_IRQOFF - b.ls 1f + test_irqs_unmasked res=x0, pmr=x20 + cbz x0, 1f + bl asm_nmi_exit +1: +#endif + +#ifdef CONFIG_TRACE_IRQFLAGS +#ifdef CONFIG_ARM64_PSEUDO_NMI + test_irqs_unmasked res=x0, pmr=x20 + cbnz x0, 1f #endif bl trace_hardirqs_on 1: @@ -776,6 +817,7 @@ el0_ia: * Instruction abort handling */ mrs x26, far_el1 + gic_prio_kentry_setup tmp=x0 enable_da_f #ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_off @@ -821,6 +863,7 @@ el0_sp_pc: * Stack or PC alignment exception handling */ mrs x26, far_el1 + gic_prio_kentry_setup tmp=x0 enable_da_f #ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_off @@ -855,11 +898,12 @@ el0_dbg: * Debug exception handling */ tbnz x24, #0, el0_inv // EL0 only + gic_prio_kentry_setup tmp=x3 mrs x0, far_el1 mov x1, x25 mov x2, sp bl do_debug_exception - enable_daif + enable_da_f ct_user_exit b ret_to_user el0_inv: @@ -876,7 +920,9 @@ ENDPROC(el0_sync) el0_irq: kernel_entry 0 el0_irq_naked: + gic_prio_irq_setup pmr=x20, tmp=x0 enable_da_f + #ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_off #endif @@ -898,6 +944,7 @@ ENDPROC(el0_irq) el1_error: kernel_entry 1 mrs x1, esr_el1 + gic_prio_kentry_setup tmp=x2 enable_dbg mov x0, sp bl do_serror @@ -908,10 +955,11 @@ el0_error: kernel_entry 0 el0_error_naked: mrs x1, esr_el1 + gic_prio_kentry_setup tmp=x2 enable_dbg mov x0, sp bl do_serror - enable_daif + enable_da_f ct_user_exit b ret_to_user ENDPROC(el0_error) @@ -932,6 +980,7 @@ work_pending: */ ret_to_user: disable_daif + gic_prio_kentry_setup tmp=x3 ldr x1, [tsk, #TSK_TI_FLAGS] and x2, x1, #_TIF_WORK_MASK cbnz x2, work_pending @@ -948,6 +997,7 @@ ENDPROC(ret_to_user) */ .align 6 el0_svc: + gic_prio_kentry_setup tmp=x1 mov x0, sp bl el0_svc_handler b ret_to_user diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 0cfcf5c237c5..eec4776ae5f0 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -82,7 +82,8 @@ * To prevent this from racing with the manipulation of the task's FPSIMD state * from task context and thereby corrupting the state, it is necessary to * protect any manipulation of a task's fpsimd_state or TIF_FOREIGN_FPSTATE - * flag with local_bh_disable() unless softirqs are already masked. + * flag with {, __}get_cpu_fpsimd_context(). This will still allow softirqs to + * run but prevent them to use FPSIMD. * * For a certain task, the sequence may look something like this: * - the task gets scheduled in; if both the task's fpsimd_cpu field @@ -145,6 +146,56 @@ extern void __percpu *efi_sve_state; #endif /* ! CONFIG_ARM64_SVE */ +DEFINE_PER_CPU(bool, fpsimd_context_busy); +EXPORT_PER_CPU_SYMBOL(fpsimd_context_busy); + +static void __get_cpu_fpsimd_context(void) +{ + bool busy = __this_cpu_xchg(fpsimd_context_busy, true); + + WARN_ON(busy); +} + +/* + * Claim ownership of the CPU FPSIMD context for use by the calling context. + * + * The caller may freely manipulate the FPSIMD context metadata until + * put_cpu_fpsimd_context() is called. + * + * The double-underscore version must only be called if you know the task + * can't be preempted. + */ +static void get_cpu_fpsimd_context(void) +{ + preempt_disable(); + __get_cpu_fpsimd_context(); +} + +static void __put_cpu_fpsimd_context(void) +{ + bool busy = __this_cpu_xchg(fpsimd_context_busy, false); + + WARN_ON(!busy); /* No matching get_cpu_fpsimd_context()? */ +} + +/* + * Release the CPU FPSIMD context. + * + * Must be called from a context in which get_cpu_fpsimd_context() was + * previously called, with no call to put_cpu_fpsimd_context() in the + * meantime. + */ +static void put_cpu_fpsimd_context(void) +{ + __put_cpu_fpsimd_context(); + preempt_enable(); +} + +static bool have_cpu_fpsimd_context(void) +{ + return !preemptible() && __this_cpu_read(fpsimd_context_busy); +} + /* * Call __sve_free() directly only if you know task can't be scheduled * or preempted. @@ -215,12 +266,10 @@ static void sve_free(struct task_struct *task) * This function should be called only when the FPSIMD/SVE state in * thread_struct is known to be up to date, when preparing to enter * userspace. - * - * Softirqs (and preemption) must be disabled. */ static void task_fpsimd_load(void) { - WARN_ON(!in_softirq() && !irqs_disabled()); + WARN_ON(!have_cpu_fpsimd_context()); if (system_supports_sve() && test_thread_flag(TIF_SVE)) sve_load_state(sve_pffr(¤t->thread), @@ -233,16 +282,14 @@ static void task_fpsimd_load(void) /* * Ensure FPSIMD/SVE storage in memory for the loaded context is up to * date with respect to the CPU registers. - * - * Softirqs (and preemption) must be disabled. */ -void fpsimd_save(void) +static void fpsimd_save(void) { struct fpsimd_last_state_struct const *last = this_cpu_ptr(&fpsimd_last_state); /* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */ - WARN_ON(!in_softirq() && !irqs_disabled()); + WARN_ON(!have_cpu_fpsimd_context()); if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { if (system_supports_sve() && test_thread_flag(TIF_SVE)) { @@ -364,7 +411,8 @@ static __uint128_t arm64_cpu_to_le128(__uint128_t x) * task->thread.sve_state. * * Task can be a non-runnable task, or current. In the latter case, - * softirqs (and preemption) must be disabled. + * the caller must have ownership of the cpu FPSIMD context before calling + * this function. * task->thread.sve_state must point to at least sve_state_size(task) * bytes of allocated kernel memory. * task->thread.uw.fpsimd_state must be up to date before calling this @@ -393,7 +441,8 @@ static void fpsimd_to_sve(struct task_struct *task) * task->thread.uw.fpsimd_state. * * Task can be a non-runnable task, or current. In the latter case, - * softirqs (and preemption) must be disabled. + * the caller must have ownership of the cpu FPSIMD context before calling + * this function. * task->thread.sve_state must point to at least sve_state_size(task) * bytes of allocated kernel memory. * task->thread.sve_state must be up to date before calling this function. @@ -557,7 +606,7 @@ int sve_set_vector_length(struct task_struct *task, * non-SVE thread. */ if (task == current) { - local_bh_disable(); + get_cpu_fpsimd_context(); fpsimd_save(); } @@ -567,7 +616,7 @@ int sve_set_vector_length(struct task_struct *task, sve_to_fpsimd(task); if (task == current) - local_bh_enable(); + put_cpu_fpsimd_context(); /* * Force reallocation of task SVE state to the correct size @@ -880,7 +929,7 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs) sve_alloc(current); - local_bh_disable(); + get_cpu_fpsimd_context(); fpsimd_save(); @@ -891,7 +940,7 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs) if (test_and_set_thread_flag(TIF_SVE)) WARN_ON(1); /* SVE access shouldn't have trapped */ - local_bh_enable(); + put_cpu_fpsimd_context(); } /* @@ -935,6 +984,8 @@ void fpsimd_thread_switch(struct task_struct *next) if (!system_supports_fpsimd()) return; + __get_cpu_fpsimd_context(); + /* Save unsaved fpsimd state, if any: */ fpsimd_save(); @@ -949,6 +1000,8 @@ void fpsimd_thread_switch(struct task_struct *next) update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE, wrong_task || wrong_cpu); + + __put_cpu_fpsimd_context(); } void fpsimd_flush_thread(void) @@ -958,7 +1011,7 @@ void fpsimd_flush_thread(void) if (!system_supports_fpsimd()) return; - local_bh_disable(); + get_cpu_fpsimd_context(); fpsimd_flush_task_state(current); memset(¤t->thread.uw.fpsimd_state, 0, @@ -999,7 +1052,7 @@ void fpsimd_flush_thread(void) current->thread.sve_vl_onexec = 0; } - local_bh_enable(); + put_cpu_fpsimd_context(); } /* @@ -1011,9 +1064,9 @@ void fpsimd_preserve_current_state(void) if (!system_supports_fpsimd()) return; - local_bh_disable(); + get_cpu_fpsimd_context(); fpsimd_save(); - local_bh_enable(); + put_cpu_fpsimd_context(); } /* @@ -1030,7 +1083,8 @@ void fpsimd_signal_preserve_current_state(void) /* * Associate current's FPSIMD context with this cpu - * Preemption must be disabled when calling this function. + * The caller must have ownership of the cpu FPSIMD context before calling + * this function. */ void fpsimd_bind_task_to_cpu(void) { @@ -1076,14 +1130,14 @@ void fpsimd_restore_current_state(void) if (!system_supports_fpsimd()) return; - local_bh_disable(); + get_cpu_fpsimd_context(); if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { task_fpsimd_load(); fpsimd_bind_task_to_cpu(); } - local_bh_enable(); + put_cpu_fpsimd_context(); } /* @@ -1096,7 +1150,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state) if (!system_supports_fpsimd()) return; - local_bh_disable(); + get_cpu_fpsimd_context(); current->thread.uw.fpsimd_state = *state; if (system_supports_sve() && test_thread_flag(TIF_SVE)) @@ -1107,7 +1161,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state) clear_thread_flag(TIF_FOREIGN_FPSTATE); - local_bh_enable(); + put_cpu_fpsimd_context(); } /* @@ -1133,18 +1187,29 @@ void fpsimd_flush_task_state(struct task_struct *t) /* * Invalidate any task's FPSIMD state that is present on this cpu. - * This function must be called with softirqs disabled. + * The FPSIMD context should be acquired with get_cpu_fpsimd_context() + * before calling this function. */ -void fpsimd_flush_cpu_state(void) +static void fpsimd_flush_cpu_state(void) { __this_cpu_write(fpsimd_last_state.st, NULL); set_thread_flag(TIF_FOREIGN_FPSTATE); } -#ifdef CONFIG_KERNEL_MODE_NEON +/* + * Save the FPSIMD state to memory and invalidate cpu view. + * This function must be called with preemption disabled. + */ +void fpsimd_save_and_flush_cpu_state(void) +{ + WARN_ON(preemptible()); + __get_cpu_fpsimd_context(); + fpsimd_save(); + fpsimd_flush_cpu_state(); + __put_cpu_fpsimd_context(); +} -DEFINE_PER_CPU(bool, kernel_neon_busy); -EXPORT_PER_CPU_SYMBOL(kernel_neon_busy); +#ifdef CONFIG_KERNEL_MODE_NEON /* * Kernel-side NEON support functions @@ -1170,19 +1235,13 @@ void kernel_neon_begin(void) BUG_ON(!may_use_simd()); - local_bh_disable(); - - __this_cpu_write(kernel_neon_busy, true); + get_cpu_fpsimd_context(); /* Save unsaved fpsimd state, if any: */ fpsimd_save(); /* Invalidate any task state remaining in the fpsimd regs: */ fpsimd_flush_cpu_state(); - - preempt_disable(); - - local_bh_enable(); } EXPORT_SYMBOL(kernel_neon_begin); @@ -1197,15 +1256,10 @@ EXPORT_SYMBOL(kernel_neon_begin); */ void kernel_neon_end(void) { - bool busy; - if (!system_supports_fpsimd()) return; - busy = __this_cpu_xchg(kernel_neon_busy, false); - WARN_ON(!busy); /* No matching kernel_neon_begin()? */ - - preempt_enable(); + put_cpu_fpsimd_context(); } EXPORT_SYMBOL(kernel_neon_end); @@ -1297,8 +1351,7 @@ static int fpsimd_cpu_pm_notifier(struct notifier_block *self, { switch (cmd) { case CPU_PM_ENTER: - fpsimd_save(); - fpsimd_flush_cpu_state(); + fpsimd_save_and_flush_cpu_state(); break; case CPU_PM_EXIT: break; diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h index 04ca08086d35..2b85c0d6fa3d 100644 --- a/arch/arm64/kernel/image.h +++ b/arch/arm64/kernel/image.h @@ -67,7 +67,11 @@ #ifdef CONFIG_EFI -__efistub_stext_offset = stext - _text; +/* + * Use ABSOLUTE() to avoid ld.lld treating this as a relative symbol: + * https://github.com/ClangBuiltLinux/linux/issues/561 + */ +__efistub_stext_offset = ABSOLUTE(stext - _text); /* * The EFI stub has its own symbol namespace prefixed by __efistub_, to diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index c70034fbd4ce..04a327ccf84d 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -16,8 +16,10 @@ #include <linux/smp.h> #include <linux/init.h> #include <linux/irqchip.h> +#include <linux/kprobes.h> #include <linux/seq_file.h> #include <linux/vmalloc.h> +#include <asm/daifflags.h> #include <asm/vmap_stack.h> unsigned long irq_err_count; @@ -64,4 +66,28 @@ void __init init_IRQ(void) irqchip_init(); if (!handle_arch_irq) panic("No interrupt controller found."); + + if (system_uses_irq_prio_masking()) { + /* + * Now that we have a stack for our IRQ handler, set + * the PMR/PSR pair to a consistent state. + */ + WARN_ON(read_sysreg(daif) & PSR_A_BIT); + local_daif_restore(DAIF_PROCCTX_NOIRQ); + } +} + +/* + * Stubs to make nmi_enter/exit() code callable from ASM + */ +asmlinkage void notrace asm_nmi_enter(void) +{ + nmi_enter(); +} +NOKPROBE_SYMBOL(asm_nmi_enter); + +asmlinkage void notrace asm_nmi_exit(void) +{ + nmi_exit(); } +NOKPROBE_SYMBOL(asm_nmi_exit); diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index e23a68a5808f..46e643e30708 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -21,6 +21,7 @@ void *module_alloc(unsigned long size) { + u64 module_alloc_end = module_alloc_base + MODULES_VSIZE; gfp_t gfp_mask = GFP_KERNEL; void *p; @@ -28,9 +29,12 @@ void *module_alloc(unsigned long size) if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS)) gfp_mask |= __GFP_NOWARN; + if (IS_ENABLED(CONFIG_KASAN)) + /* don't exceed the static module region - see below */ + module_alloc_end = MODULES_END; + p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base, - module_alloc_base + MODULES_VSIZE, - gfp_mask, PAGE_KERNEL_EXEC, 0, + module_alloc_end, gfp_mask, PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && @@ -46,7 +50,7 @@ void *module_alloc(unsigned long size) */ p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base, module_alloc_base + SZ_2G, GFP_KERNEL, - PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, + PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); if (p && (kasan_module_alloc(p, size) < 0)) { diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 88ce502c8e6f..bd5dfffca272 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -122,8 +122,10 @@ void *alloc_insn_page(void) void *page; page = vmalloc_exec(PAGE_SIZE); - if (page) + if (page) { set_memory_ro((unsigned long)page, 1); + set_vm_flush_reset_perms(page); + } return page; } diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 9856395ccdb7..6a869d9f304f 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -83,7 +83,7 @@ static void __cpu_do_idle_irqprio(void) * be raised. */ pmr = gic_read_pmr(); - gic_write_pmr(GIC_PRIO_IRQON); + gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); __cpu_do_idle(); diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index da2441d7b066..3cf3b135027e 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1808,8 +1808,12 @@ static void tracehook_report_syscall(struct pt_regs *regs, int syscall_trace_enter(struct pt_regs *regs) { - if (test_thread_flag(TIF_SYSCALL_TRACE)) + if (test_thread_flag(TIF_SYSCALL_TRACE) || + test_thread_flag(TIF_SYSCALL_EMU)) { tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); + if (!in_syscall(regs) || test_thread_flag(TIF_SYSCALL_EMU)) + return -1; + } /* Do the secure computing after ptrace; failures should be fast. */ if (secure_computing(NULL) == -1) diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 331d1e5acad4..12a585386c2f 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -18,42 +18,7 @@ #include <asm/traps.h> #include <linux/uaccess.h> #include <asm/unistd.h> - -struct compat_sigcontext { - /* We always set these two fields to 0 */ - compat_ulong_t trap_no; - compat_ulong_t error_code; - - compat_ulong_t oldmask; - compat_ulong_t arm_r0; - compat_ulong_t arm_r1; - compat_ulong_t arm_r2; - compat_ulong_t arm_r3; - compat_ulong_t arm_r4; - compat_ulong_t arm_r5; - compat_ulong_t arm_r6; - compat_ulong_t arm_r7; - compat_ulong_t arm_r8; - compat_ulong_t arm_r9; - compat_ulong_t arm_r10; - compat_ulong_t arm_fp; - compat_ulong_t arm_ip; - compat_ulong_t arm_sp; - compat_ulong_t arm_lr; - compat_ulong_t arm_pc; - compat_ulong_t arm_cpsr; - compat_ulong_t fault_address; -}; - -struct compat_ucontext { - compat_ulong_t uc_flags; - compat_uptr_t uc_link; - compat_stack_t uc_stack; - struct compat_sigcontext uc_mcontext; - compat_sigset_t uc_sigmask; - int __unused[32 - (sizeof (compat_sigset_t) / sizeof (int))]; - compat_ulong_t uc_regspace[128] __attribute__((__aligned__(8))); -}; +#include <asm/vdso.h> struct compat_vfp_sigframe { compat_ulong_t magic; @@ -81,16 +46,6 @@ struct compat_aux_sigframe { unsigned long end_magic; } __attribute__((__aligned__(8))); -struct compat_sigframe { - struct compat_ucontext uc; - compat_ulong_t retcode[2]; -}; - -struct compat_rt_sigframe { - struct compat_siginfo info; - struct compat_sigframe sig; -}; - #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set) @@ -387,6 +342,30 @@ static void compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka, retcode = ptr_to_compat(ka->sa.sa_restorer); } else { /* Set up sigreturn pointer */ +#ifdef CONFIG_COMPAT_VDSO + void *vdso_base = current->mm->context.vdso; + void *vdso_trampoline; + + if (ka->sa.sa_flags & SA_SIGINFO) { + if (thumb) { + vdso_trampoline = VDSO_SYMBOL(vdso_base, + compat_rt_sigreturn_thumb); + } else { + vdso_trampoline = VDSO_SYMBOL(vdso_base, + compat_rt_sigreturn_arm); + } + } else { + if (thumb) { + vdso_trampoline = VDSO_SYMBOL(vdso_base, + compat_sigreturn_thumb); + } else { + vdso_trampoline = VDSO_SYMBOL(vdso_base, + compat_sigreturn_arm); + } + } + + retcode = ptr_to_compat(vdso_trampoline) + thumb; +#else unsigned int idx = thumb << 1; if (ka->sa.sa_flags & SA_SIGINFO) @@ -394,6 +373,7 @@ static void compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka, retcode = (unsigned long)current->mm->context.vdso + (idx << 2) + thumb; +#endif } regs->regs[0] = usig; diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index 3e53ffa07994..f5b04dd8a710 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -27,7 +27,7 @@ * aff0 = mpidr_masked & 0xff; * aff1 = mpidr_masked & 0xff00; * aff2 = mpidr_masked & 0xff0000; - * aff2 = mpidr_masked & 0xff00000000; + * aff3 = mpidr_masked & 0xff00000000; * dst = (aff0 >> rs0 | aff1 >> rs1 | aff2 >> rs2 | aff3 >> rs3); *} * Input registers: rs0, rs1, rs2, rs3, mpidr, mask diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 6dcf9607d770..9286ee6749e8 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -181,11 +181,7 @@ static void init_gic_priority_masking(void) WARN_ON(!(cpuflags & PSR_I_BIT)); - gic_write_pmr(GIC_PRIO_IRQOFF); - - /* We can only unmask PSR.I if we can take aborts */ - if (!(cpuflags & PSR_A_BIT)) - write_sysreg(cpuflags & ~PSR_I_BIT, daif); + gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); } /* @@ -834,18 +830,23 @@ void arch_irq_work_raise(void) } #endif -/* - * ipi_cpu_stop - handle IPI from smp_send_stop() - */ -static void ipi_cpu_stop(unsigned int cpu) +static void local_cpu_stop(void) { - set_cpu_online(cpu, false); + set_cpu_online(smp_processor_id(), false); local_daif_mask(); sdei_mask_local_cpu(); + cpu_park_loop(); +} - while (1) - cpu_relax(); +/* + * We need to implement panic_smp_self_stop() for parallel panic() calls, so + * that cpu_online_mask gets correctly updated and smp_send_stop() can skip + * CPUs that have already stopped themselves. + */ +void panic_smp_self_stop(void) +{ + local_cpu_stop(); } #ifdef CONFIG_KEXEC_CORE @@ -898,7 +899,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs) case IPI_CPU_STOP: irq_enter(); - ipi_cpu_stop(cpu); + local_cpu_stop(); irq_exit(); break; diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 985721a1264c..a835a1a53826 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -55,16 +55,19 @@ static void dump_backtrace_entry(unsigned long where) printk(" %pS\n", (void *)where); } -static void __dump_instr(const char *lvl, struct pt_regs *regs) +static void dump_kernel_instr(const char *lvl, struct pt_regs *regs) { unsigned long addr = instruction_pointer(regs); char str[sizeof("00000000 ") * 5 + 2 + 1], *p = str; int i; + if (user_mode(regs)) + return; + for (i = -4; i < 1; i++) { unsigned int val, bad; - bad = get_user(val, &((u32 *)addr)[i]); + bad = aarch64_insn_read(&((u32 *)addr)[i], &val); if (!bad) p += sprintf(p, i == 0 ? "(%08x) " : "%08x ", val); @@ -73,19 +76,8 @@ static void __dump_instr(const char *lvl, struct pt_regs *regs) break; } } - printk("%sCode: %s\n", lvl, str); -} -static void dump_instr(const char *lvl, struct pt_regs *regs) -{ - if (!user_mode(regs)) { - mm_segment_t fs = get_fs(); - set_fs(KERNEL_DS); - __dump_instr(lvl, regs); - set_fs(fs); - } else { - __dump_instr(lvl, regs); - } + printk("%sCode: %s\n", lvl, str); } void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) @@ -171,8 +163,7 @@ static int __die(const char *str, int err, struct pt_regs *regs) print_modules(); show_regs(regs); - if (!user_mode(regs)) - dump_instr(KERN_EMERG, regs); + dump_kernel_instr(KERN_EMERG, regs); return ret; } diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 663b166241d0..354b11e27c07 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -20,41 +20,212 @@ #include <linux/slab.h> #include <linux/timekeeper_internal.h> #include <linux/vmalloc.h> +#include <vdso/datapage.h> +#include <vdso/helpers.h> +#include <vdso/vsyscall.h> #include <asm/cacheflush.h> #include <asm/signal32.h> #include <asm/vdso.h> -#include <asm/vdso_datapage.h> extern char vdso_start[], vdso_end[]; -static unsigned long vdso_pages __ro_after_init; +#ifdef CONFIG_COMPAT_VDSO +extern char vdso32_start[], vdso32_end[]; +#endif /* CONFIG_COMPAT_VDSO */ + +/* vdso_lookup arch_index */ +enum arch_vdso_type { + ARM64_VDSO = 0, +#ifdef CONFIG_COMPAT_VDSO + ARM64_VDSO32 = 1, +#endif /* CONFIG_COMPAT_VDSO */ +}; +#ifdef CONFIG_COMPAT_VDSO +#define VDSO_TYPES (ARM64_VDSO32 + 1) +#else +#define VDSO_TYPES (ARM64_VDSO + 1) +#endif /* CONFIG_COMPAT_VDSO */ + +struct __vdso_abi { + const char *name; + const char *vdso_code_start; + const char *vdso_code_end; + unsigned long vdso_pages; + /* Data Mapping */ + struct vm_special_mapping *dm; + /* Code Mapping */ + struct vm_special_mapping *cm; +}; + +static struct __vdso_abi vdso_lookup[VDSO_TYPES] __ro_after_init = { + { + .name = "vdso", + .vdso_code_start = vdso_start, + .vdso_code_end = vdso_end, + }, +#ifdef CONFIG_COMPAT_VDSO + { + .name = "vdso32", + .vdso_code_start = vdso32_start, + .vdso_code_end = vdso32_end, + }, +#endif /* CONFIG_COMPAT_VDSO */ +}; /* * The vDSO data page. */ static union { - struct vdso_data data; + struct vdso_data data[CS_BASES]; u8 page[PAGE_SIZE]; } vdso_data_store __page_aligned_data; -struct vdso_data *vdso_data = &vdso_data_store.data; +struct vdso_data *vdso_data = vdso_data_store.data; + +static int __vdso_remap(enum arch_vdso_type arch_index, + const struct vm_special_mapping *sm, + struct vm_area_struct *new_vma) +{ + unsigned long new_size = new_vma->vm_end - new_vma->vm_start; + unsigned long vdso_size = vdso_lookup[arch_index].vdso_code_end - + vdso_lookup[arch_index].vdso_code_start; + + if (vdso_size != new_size) + return -EINVAL; + + current->mm->context.vdso = (void *)new_vma->vm_start; + + return 0; +} + +static int __vdso_init(enum arch_vdso_type arch_index) +{ + int i; + struct page **vdso_pagelist; + unsigned long pfn; + + if (memcmp(vdso_lookup[arch_index].vdso_code_start, "\177ELF", 4)) { + pr_err("vDSO is not a valid ELF object!\n"); + return -EINVAL; + } + + vdso_lookup[arch_index].vdso_pages = ( + vdso_lookup[arch_index].vdso_code_end - + vdso_lookup[arch_index].vdso_code_start) >> + PAGE_SHIFT; + + /* Allocate the vDSO pagelist, plus a page for the data. */ + vdso_pagelist = kcalloc(vdso_lookup[arch_index].vdso_pages + 1, + sizeof(struct page *), + GFP_KERNEL); + if (vdso_pagelist == NULL) + return -ENOMEM; + + /* Grab the vDSO data page. */ + vdso_pagelist[0] = phys_to_page(__pa_symbol(vdso_data)); + + + /* Grab the vDSO code pages. */ + pfn = sym_to_pfn(vdso_lookup[arch_index].vdso_code_start); + + for (i = 0; i < vdso_lookup[arch_index].vdso_pages; i++) + vdso_pagelist[i + 1] = pfn_to_page(pfn + i); + + vdso_lookup[arch_index].dm->pages = &vdso_pagelist[0]; + vdso_lookup[arch_index].cm->pages = &vdso_pagelist[1]; + + return 0; +} + +static int __setup_additional_pages(enum arch_vdso_type arch_index, + struct mm_struct *mm, + struct linux_binprm *bprm, + int uses_interp) +{ + unsigned long vdso_base, vdso_text_len, vdso_mapping_len; + void *ret; + + vdso_text_len = vdso_lookup[arch_index].vdso_pages << PAGE_SHIFT; + /* Be sure to map the data page */ + vdso_mapping_len = vdso_text_len + PAGE_SIZE; + + vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0); + if (IS_ERR_VALUE(vdso_base)) { + ret = ERR_PTR(vdso_base); + goto up_fail; + } + + ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE, + VM_READ|VM_MAYREAD, + vdso_lookup[arch_index].dm); + if (IS_ERR(ret)) + goto up_fail; + + vdso_base += PAGE_SIZE; + mm->context.vdso = (void *)vdso_base; + ret = _install_special_mapping(mm, vdso_base, vdso_text_len, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, + vdso_lookup[arch_index].cm); + if (IS_ERR(ret)) + goto up_fail; + + return 0; + +up_fail: + mm->context.vdso = NULL; + return PTR_ERR(ret); +} #ifdef CONFIG_COMPAT /* * Create and map the vectors page for AArch32 tasks. */ +#ifdef CONFIG_COMPAT_VDSO +static int aarch32_vdso_mremap(const struct vm_special_mapping *sm, + struct vm_area_struct *new_vma) +{ + return __vdso_remap(ARM64_VDSO32, sm, new_vma); +} +#endif /* CONFIG_COMPAT_VDSO */ + +/* + * aarch32_vdso_pages: + * 0 - kuser helpers + * 1 - sigreturn code + * or (CONFIG_COMPAT_VDSO): + * 0 - kuser helpers + * 1 - vdso data + * 2 - vdso code + */ #define C_VECTORS 0 +#ifdef CONFIG_COMPAT_VDSO +#define C_VVAR 1 +#define C_VDSO 2 +#define C_PAGES (C_VDSO + 1) +#else #define C_SIGPAGE 1 #define C_PAGES (C_SIGPAGE + 1) +#endif /* CONFIG_COMPAT_VDSO */ static struct page *aarch32_vdso_pages[C_PAGES] __ro_after_init; -static const struct vm_special_mapping aarch32_vdso_spec[C_PAGES] = { +static struct vm_special_mapping aarch32_vdso_spec[C_PAGES] = { { .name = "[vectors]", /* ABI */ .pages = &aarch32_vdso_pages[C_VECTORS], }, +#ifdef CONFIG_COMPAT_VDSO + { + .name = "[vvar]", + }, + { + .name = "[vdso]", + .mremap = aarch32_vdso_mremap, + }, +#else { .name = "[sigpage]", /* ABI */ .pages = &aarch32_vdso_pages[C_SIGPAGE], }, +#endif /* CONFIG_COMPAT_VDSO */ }; static int aarch32_alloc_kuser_vdso_page(void) @@ -77,7 +248,33 @@ static int aarch32_alloc_kuser_vdso_page(void) return 0; } -static int __init aarch32_alloc_vdso_pages(void) +#ifdef CONFIG_COMPAT_VDSO +static int __aarch32_alloc_vdso_pages(void) +{ + int ret; + + vdso_lookup[ARM64_VDSO32].dm = &aarch32_vdso_spec[C_VVAR]; + vdso_lookup[ARM64_VDSO32].cm = &aarch32_vdso_spec[C_VDSO]; + + ret = __vdso_init(ARM64_VDSO32); + if (ret) + return ret; + + ret = aarch32_alloc_kuser_vdso_page(); + if (ret) { + unsigned long c_vvar = + (unsigned long)page_to_virt(aarch32_vdso_pages[C_VVAR]); + unsigned long c_vdso = + (unsigned long)page_to_virt(aarch32_vdso_pages[C_VDSO]); + + free_page(c_vvar); + free_page(c_vdso); + } + + return ret; +} +#else +static int __aarch32_alloc_vdso_pages(void) { extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[]; int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start; @@ -98,6 +295,12 @@ static int __init aarch32_alloc_vdso_pages(void) return ret; } +#endif /* CONFIG_COMPAT_VDSO */ + +static int __init aarch32_alloc_vdso_pages(void) +{ + return __aarch32_alloc_vdso_pages(); +} arch_initcall(aarch32_alloc_vdso_pages); static int aarch32_kuser_helpers_setup(struct mm_struct *mm) @@ -119,6 +322,7 @@ static int aarch32_kuser_helpers_setup(struct mm_struct *mm) return PTR_ERR_OR_ZERO(ret); } +#ifndef CONFIG_COMPAT_VDSO static int aarch32_sigreturn_setup(struct mm_struct *mm) { unsigned long addr; @@ -146,6 +350,7 @@ static int aarch32_sigreturn_setup(struct mm_struct *mm) out: return PTR_ERR_OR_ZERO(ret); } +#endif /* !CONFIG_COMPAT_VDSO */ int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { @@ -159,7 +364,14 @@ int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) if (ret) goto out; +#ifdef CONFIG_COMPAT_VDSO + ret = __setup_additional_pages(ARM64_VDSO32, + mm, + bprm, + uses_interp); +#else ret = aarch32_sigreturn_setup(mm); +#endif /* CONFIG_COMPAT_VDSO */ out: up_write(&mm->mmap_sem); @@ -170,18 +382,18 @@ out: static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma) { - unsigned long new_size = new_vma->vm_end - new_vma->vm_start; - unsigned long vdso_size = vdso_end - vdso_start; - - if (vdso_size != new_size) - return -EINVAL; - - current->mm->context.vdso = (void *)new_vma->vm_start; - - return 0; + return __vdso_remap(ARM64_VDSO, sm, new_vma); } -static struct vm_special_mapping vdso_spec[2] __ro_after_init = { +/* + * aarch64_vdso_pages: + * 0 - vvar + * 1 - vdso + */ +#define A_VVAR 0 +#define A_VDSO 1 +#define A_PAGES (A_VDSO + 1) +static struct vm_special_mapping vdso_spec[A_PAGES] __ro_after_init = { { .name = "[vvar]", }, @@ -193,37 +405,10 @@ static struct vm_special_mapping vdso_spec[2] __ro_after_init = { static int __init vdso_init(void) { - int i; - struct page **vdso_pagelist; - unsigned long pfn; - - if (memcmp(vdso_start, "\177ELF", 4)) { - pr_err("vDSO is not a valid ELF object!\n"); - return -EINVAL; - } - - vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT; - - /* Allocate the vDSO pagelist, plus a page for the data. */ - vdso_pagelist = kcalloc(vdso_pages + 1, sizeof(struct page *), - GFP_KERNEL); - if (vdso_pagelist == NULL) - return -ENOMEM; - - /* Grab the vDSO data page. */ - vdso_pagelist[0] = phys_to_page(__pa_symbol(vdso_data)); - - - /* Grab the vDSO code pages. */ - pfn = sym_to_pfn(vdso_start); - - for (i = 0; i < vdso_pages; i++) - vdso_pagelist[i + 1] = pfn_to_page(pfn + i); + vdso_lookup[ARM64_VDSO].dm = &vdso_spec[A_VVAR]; + vdso_lookup[ARM64_VDSO].cm = &vdso_spec[A_VDSO]; - vdso_spec[0].pages = &vdso_pagelist[0]; - vdso_spec[1].pages = &vdso_pagelist[1]; - - return 0; + return __vdso_init(ARM64_VDSO); } arch_initcall(vdso_init); @@ -231,84 +416,17 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; - unsigned long vdso_base, vdso_text_len, vdso_mapping_len; - void *ret; - - vdso_text_len = vdso_pages << PAGE_SHIFT; - /* Be sure to map the data page */ - vdso_mapping_len = vdso_text_len + PAGE_SIZE; + int ret; if (down_write_killable(&mm->mmap_sem)) return -EINTR; - vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0); - if (IS_ERR_VALUE(vdso_base)) { - ret = ERR_PTR(vdso_base); - goto up_fail; - } - ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE, - VM_READ|VM_MAYREAD, - &vdso_spec[0]); - if (IS_ERR(ret)) - goto up_fail; - - vdso_base += PAGE_SIZE; - mm->context.vdso = (void *)vdso_base; - ret = _install_special_mapping(mm, vdso_base, vdso_text_len, - VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, - &vdso_spec[1]); - if (IS_ERR(ret)) - goto up_fail; + ret = __setup_additional_pages(ARM64_VDSO, + mm, + bprm, + uses_interp); up_write(&mm->mmap_sem); - return 0; - -up_fail: - mm->context.vdso = NULL; - up_write(&mm->mmap_sem); - return PTR_ERR(ret); -} -/* - * Update the vDSO data page to keep in sync with kernel timekeeping. - */ -void update_vsyscall(struct timekeeper *tk) -{ - u32 use_syscall = !tk->tkr_mono.clock->archdata.vdso_direct; - - ++vdso_data->tb_seq_count; - smp_wmb(); - - vdso_data->use_syscall = use_syscall; - vdso_data->xtime_coarse_sec = tk->xtime_sec; - vdso_data->xtime_coarse_nsec = tk->tkr_mono.xtime_nsec >> - tk->tkr_mono.shift; - vdso_data->wtm_clock_sec = tk->wall_to_monotonic.tv_sec; - vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec; - - /* Read without the seqlock held by clock_getres() */ - WRITE_ONCE(vdso_data->hrtimer_res, hrtimer_resolution); - - if (!use_syscall) { - /* tkr_mono.cycle_last == tkr_raw.cycle_last */ - vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last; - vdso_data->raw_time_sec = tk->raw_sec; - vdso_data->raw_time_nsec = tk->tkr_raw.xtime_nsec; - vdso_data->xtime_clock_sec = tk->xtime_sec; - vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec; - vdso_data->cs_mono_mult = tk->tkr_mono.mult; - vdso_data->cs_raw_mult = tk->tkr_raw.mult; - /* tkr_mono.shift == tkr_raw.shift */ - vdso_data->cs_shift = tk->tkr_mono.shift; - } - - smp_wmb(); - ++vdso_data->tb_seq_count; -} - -void update_vsyscall_tz(void) -{ - vdso_data->tz_minuteswest = sys_tz.tz_minuteswest; - vdso_data->tz_dsttime = sys_tz.tz_dsttime; + return ret; } diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index fa230ff09aa1..4ab863045188 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -6,7 +6,12 @@ # Heavily based on the vDSO Makefiles for other archs. # -obj-vdso := gettimeofday.o note.o sigreturn.o +# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before +# the inclusion of generic Makefile. +ARCH_REL_TYPE_ABS := R_AARCH64_JUMP_SLOT|R_AARCH64_GLOB_DAT|R_AARCH64_ABS64 +include $(srctree)/lib/vdso/Makefile + +obj-vdso := vgettimeofday.o note.o sigreturn.o # Build rules targets := $(obj-vdso) vdso.so vdso.so.dbg @@ -15,6 +20,31 @@ obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \ --build-id -n -T +ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18 +ccflags-y += -DDISABLE_BRANCH_PROFILING + +VDSO_LDFLAGS := -Bsymbolic + +CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os +KBUILD_CFLAGS += $(DISABLE_LTO) +KASAN_SANITIZE := n +UBSAN_SANITIZE := n +OBJECT_FILES_NON_STANDARD := y +KCOV_INSTRUMENT := n + +ifeq ($(c-gettimeofday-y),) +CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny +else +CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -include $(c-gettimeofday-y) +endif + +# Clang versions less than 8 do not support -mcmodel=tiny +ifeq ($(CONFIG_CC_IS_CLANG), y) + ifeq ($(shell test $(CONFIG_CLANG_VERSION) -lt 80000; echo $$?),0) + CFLAGS_REMOVE_vgettimeofday.o += -mcmodel=tiny + endif +endif + # Disable gcov profiling for VDSO code GCOV_PROFILE := n @@ -28,6 +58,7 @@ $(obj)/vdso.o : $(obj)/vdso.so # Link rule for the .so file, .lds has to be first $(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE $(call if_changed,ld) + $(call if_changed,vdso_check) # Strip rule for the .so file $(obj)/%.so: OBJCOPYFLAGS := -S @@ -42,13 +73,9 @@ quiet_cmd_vdsosym = VDSOSYM $@ include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE $(call if_changed,vdsosym) -# Assembly rules for the .S files -$(obj-vdso): %.o: %.S FORCE - $(call if_changed_dep,vdsoas) - # Actual build commands -quiet_cmd_vdsoas = VDSOA $@ - cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $< +quiet_cmd_vdsocc = VDSOCC $@ + cmd_vdsocc = $(CC) $(a_flags) $(c_flags) -c -o $@ $< # Install commands for the unstripped file quiet_cmd_vdso_install = INSTALL $@ diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S index 80f780f56e0d..e69de29bb2d1 100644 --- a/arch/arm64/kernel/vdso/gettimeofday.S +++ b/arch/arm64/kernel/vdso/gettimeofday.S @@ -1,323 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Userspace implementations of gettimeofday() and friends. - * - * Copyright (C) 2012 ARM Limited - * - * Author: Will Deacon <will.deacon@arm.com> - */ - -#include <linux/linkage.h> -#include <asm/asm-offsets.h> -#include <asm/unistd.h> - -#define NSEC_PER_SEC_LO16 0xca00 -#define NSEC_PER_SEC_HI16 0x3b9a - -vdso_data .req x6 -seqcnt .req w7 -w_tmp .req w8 -x_tmp .req x8 - -/* - * Conventions for macro arguments: - * - An argument is write-only if its name starts with "res". - * - All other arguments are read-only, unless otherwise specified. - */ - - .macro seqcnt_acquire -9999: ldr seqcnt, [vdso_data, #VDSO_TB_SEQ_COUNT] - tbnz seqcnt, #0, 9999b - dmb ishld - .endm - - .macro seqcnt_check fail - dmb ishld - ldr w_tmp, [vdso_data, #VDSO_TB_SEQ_COUNT] - cmp w_tmp, seqcnt - b.ne \fail - .endm - - .macro syscall_check fail - ldr w_tmp, [vdso_data, #VDSO_USE_SYSCALL] - cbnz w_tmp, \fail - .endm - - .macro get_nsec_per_sec res - mov \res, #NSEC_PER_SEC_LO16 - movk \res, #NSEC_PER_SEC_HI16, lsl #16 - .endm - - /* - * Returns the clock delta, in nanoseconds left-shifted by the clock - * shift. - */ - .macro get_clock_shifted_nsec res, cycle_last, mult - /* Read the virtual counter. */ - isb - mrs x_tmp, cntvct_el0 - /* Calculate cycle delta and convert to ns. */ - sub \res, x_tmp, \cycle_last - /* We can only guarantee 56 bits of precision. */ - movn x_tmp, #0xff00, lsl #48 - and \res, x_tmp, \res - mul \res, \res, \mult - /* - * Fake address dependency from the value computed from the counter - * register to subsequent data page accesses so that the sequence - * locking also orders the read of the counter. - */ - and x_tmp, \res, xzr - add vdso_data, vdso_data, x_tmp - .endm - - /* - * Returns in res_{sec,nsec} the REALTIME timespec, based on the - * "wall time" (xtime) and the clock_mono delta. - */ - .macro get_ts_realtime res_sec, res_nsec, \ - clock_nsec, xtime_sec, xtime_nsec, nsec_to_sec - add \res_nsec, \clock_nsec, \xtime_nsec - udiv x_tmp, \res_nsec, \nsec_to_sec - add \res_sec, \xtime_sec, x_tmp - msub \res_nsec, x_tmp, \nsec_to_sec, \res_nsec - .endm - - /* - * Returns in res_{sec,nsec} the timespec based on the clock_raw delta, - * used for CLOCK_MONOTONIC_RAW. - */ - .macro get_ts_clock_raw res_sec, res_nsec, clock_nsec, nsec_to_sec - udiv \res_sec, \clock_nsec, \nsec_to_sec - msub \res_nsec, \res_sec, \nsec_to_sec, \clock_nsec - .endm - - /* sec and nsec are modified in place. */ - .macro add_ts sec, nsec, ts_sec, ts_nsec, nsec_to_sec - /* Add timespec. */ - add \sec, \sec, \ts_sec - add \nsec, \nsec, \ts_nsec - - /* Normalise the new timespec. */ - cmp \nsec, \nsec_to_sec - b.lt 9999f - sub \nsec, \nsec, \nsec_to_sec - add \sec, \sec, #1 -9999: - cmp \nsec, #0 - b.ge 9998f - add \nsec, \nsec, \nsec_to_sec - sub \sec, \sec, #1 -9998: - .endm - - .macro clock_gettime_return, shift=0 - .if \shift == 1 - lsr x11, x11, x12 - .endif - stp x10, x11, [x1, #TSPEC_TV_SEC] - mov x0, xzr - ret - .endm - - .macro jump_slot jumptable, index, label - .if (. - \jumptable) != 4 * (\index) - .error "Jump slot index mismatch" - .endif - b \label - .endm - - .text - -/* int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); */ -ENTRY(__kernel_gettimeofday) - .cfi_startproc - adr vdso_data, _vdso_data - /* If tv is NULL, skip to the timezone code. */ - cbz x0, 2f - - /* Compute the time of day. */ -1: seqcnt_acquire - syscall_check fail=4f - ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] - /* w11 = cs_mono_mult, w12 = cs_shift */ - ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] - ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] - - get_nsec_per_sec res=x9 - lsl x9, x9, x12 - - get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 - seqcnt_check fail=1b - get_ts_realtime res_sec=x10, res_nsec=x11, \ - clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 - - /* Convert ns to us. */ - mov x13, #1000 - lsl x13, x13, x12 - udiv x11, x11, x13 - stp x10, x11, [x0, #TVAL_TV_SEC] -2: - /* If tz is NULL, return 0. */ - cbz x1, 3f - ldp w4, w5, [vdso_data, #VDSO_TZ_MINWEST] - stp w4, w5, [x1, #TZ_MINWEST] -3: - mov x0, xzr - ret -4: - /* Syscall fallback. */ - mov x8, #__NR_gettimeofday - svc #0 - ret - .cfi_endproc -ENDPROC(__kernel_gettimeofday) - -#define JUMPSLOT_MAX CLOCK_MONOTONIC_COARSE - -/* int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp); */ -ENTRY(__kernel_clock_gettime) - .cfi_startproc - cmp w0, #JUMPSLOT_MAX - b.hi syscall - adr vdso_data, _vdso_data - adr x_tmp, jumptable - add x_tmp, x_tmp, w0, uxtw #2 - br x_tmp - - ALIGN -jumptable: - jump_slot jumptable, CLOCK_REALTIME, realtime - jump_slot jumptable, CLOCK_MONOTONIC, monotonic - b syscall - b syscall - jump_slot jumptable, CLOCK_MONOTONIC_RAW, monotonic_raw - jump_slot jumptable, CLOCK_REALTIME_COARSE, realtime_coarse - jump_slot jumptable, CLOCK_MONOTONIC_COARSE, monotonic_coarse - - .if (. - jumptable) != 4 * (JUMPSLOT_MAX + 1) - .error "Wrong jumptable size" - .endif - - ALIGN -realtime: - seqcnt_acquire - syscall_check fail=syscall - ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] - /* w11 = cs_mono_mult, w12 = cs_shift */ - ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] - ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] - - /* All computations are done with left-shifted nsecs. */ - get_nsec_per_sec res=x9 - lsl x9, x9, x12 - - get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 - seqcnt_check fail=realtime - get_ts_realtime res_sec=x10, res_nsec=x11, \ - clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 - clock_gettime_return, shift=1 - - ALIGN -monotonic: - seqcnt_acquire - syscall_check fail=syscall - ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] - /* w11 = cs_mono_mult, w12 = cs_shift */ - ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] - ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] - ldp x3, x4, [vdso_data, #VDSO_WTM_CLK_SEC] - - /* All computations are done with left-shifted nsecs. */ - lsl x4, x4, x12 - get_nsec_per_sec res=x9 - lsl x9, x9, x12 - - get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 - seqcnt_check fail=monotonic - get_ts_realtime res_sec=x10, res_nsec=x11, \ - clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 - - add_ts sec=x10, nsec=x11, ts_sec=x3, ts_nsec=x4, nsec_to_sec=x9 - clock_gettime_return, shift=1 - - ALIGN -monotonic_raw: - seqcnt_acquire - syscall_check fail=syscall - ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] - /* w11 = cs_raw_mult, w12 = cs_shift */ - ldp w12, w11, [vdso_data, #VDSO_CS_SHIFT] - ldp x13, x14, [vdso_data, #VDSO_RAW_TIME_SEC] - - /* All computations are done with left-shifted nsecs. */ - get_nsec_per_sec res=x9 - lsl x9, x9, x12 - - get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 - seqcnt_check fail=monotonic_raw - get_ts_clock_raw res_sec=x10, res_nsec=x11, \ - clock_nsec=x15, nsec_to_sec=x9 - - add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9 - clock_gettime_return, shift=1 - - ALIGN -realtime_coarse: - seqcnt_acquire - ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC] - seqcnt_check fail=realtime_coarse - clock_gettime_return - - ALIGN -monotonic_coarse: - seqcnt_acquire - ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC] - ldp x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC] - seqcnt_check fail=monotonic_coarse - - /* Computations are done in (non-shifted) nsecs. */ - get_nsec_per_sec res=x9 - add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9 - clock_gettime_return - - ALIGN -syscall: /* Syscall fallback. */ - mov x8, #__NR_clock_gettime - svc #0 - ret - .cfi_endproc -ENDPROC(__kernel_clock_gettime) - -/* int __kernel_clock_getres(clockid_t clock_id, struct timespec *res); */ -ENTRY(__kernel_clock_getres) - .cfi_startproc - cmp w0, #CLOCK_REALTIME - ccmp w0, #CLOCK_MONOTONIC, #0x4, ne - ccmp w0, #CLOCK_MONOTONIC_RAW, #0x4, ne - b.ne 1f - - adr vdso_data, _vdso_data - ldr w2, [vdso_data, #CLOCK_REALTIME_RES] - b 2f -1: - cmp w0, #CLOCK_REALTIME_COARSE - ccmp w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne - b.ne 4f - ldr x2, 5f -2: - cbz x1, 3f - stp xzr, x2, [x1] - -3: /* res == NULL. */ - mov w0, wzr - ret - -4: /* Syscall fallback. */ - mov x8, #__NR_clock_getres - svc #0 - ret -5: - .quad CLOCK_COARSE_RES - .cfi_endproc -ENDPROC(__kernel_clock_getres) diff --git a/arch/arm64/kernel/vdso/vgettimeofday.c b/arch/arm64/kernel/vdso/vgettimeofday.c new file mode 100644 index 000000000000..747635501a14 --- /dev/null +++ b/arch/arm64/kernel/vdso/vgettimeofday.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ARM64 userspace implementations of gettimeofday() and similar. + * + * Copyright (C) 2018 ARM Limited + * + */ +#include <linux/time.h> +#include <linux/types.h> + +int __kernel_clock_gettime(clockid_t clock, + struct __kernel_timespec *ts) +{ + return __cvdso_clock_gettime(clock, ts); +} + +int __kernel_gettimeofday(struct __kernel_old_timeval *tv, + struct timezone *tz) +{ + return __cvdso_gettimeofday(tv, tz); +} + +int __kernel_clock_getres(clockid_t clock_id, + struct __kernel_timespec *res) +{ + return __cvdso_clock_getres(clock_id, res); +} diff --git a/arch/arm64/kernel/vdso32/.gitignore b/arch/arm64/kernel/vdso32/.gitignore new file mode 100644 index 000000000000..4fea950fa5ed --- /dev/null +++ b/arch/arm64/kernel/vdso32/.gitignore @@ -0,0 +1,2 @@ +vdso.lds +vdso.so.raw diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile new file mode 100644 index 000000000000..288c14d30b45 --- /dev/null +++ b/arch/arm64/kernel/vdso32/Makefile @@ -0,0 +1,186 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for vdso32 +# + +# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before +# the inclusion of generic Makefile. +ARCH_REL_TYPE_ABS := R_ARM_JUMP_SLOT|R_ARM_GLOB_DAT|R_ARM_ABS32 +include $(srctree)/lib/vdso/Makefile + +COMPATCC := $(CROSS_COMPILE_COMPAT)gcc + +# Same as cc-*option, but using COMPATCC instead of CC +cc32-option = $(call try-run,\ + $(COMPATCC) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2)) +cc32-disable-warning = $(call try-run,\ + $(COMPATCC) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1))) +cc32-ldoption = $(call try-run,\ + $(COMPATCC) $(1) -nostdlib -x c /dev/null -o "$$TMP",$(1),$(2)) + +# We cannot use the global flags to compile the vDSO files, the main reason +# being that the 32-bit compiler may be older than the main (64-bit) compiler +# and therefore may not understand flags set using $(cc-option ...). Besides, +# arch-specific options should be taken from the arm Makefile instead of the +# arm64 one. +# As a result we set our own flags here. + +# From top-level Makefile +# NOSTDINC_FLAGS +VDSO_CPPFLAGS := -nostdinc -isystem $(shell $(COMPATCC) -print-file-name=include) +VDSO_CPPFLAGS += $(LINUXINCLUDE) +VDSO_CPPFLAGS += $(KBUILD_CPPFLAGS) + +# Common C and assembly flags +# From top-level Makefile +VDSO_CAFLAGS := $(VDSO_CPPFLAGS) +VDSO_CAFLAGS += $(call cc32-option,-fno-PIE) +ifdef CONFIG_DEBUG_INFO +VDSO_CAFLAGS += -g +endif +ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(COMPATCC)), y) +VDSO_CAFLAGS += -DCC_HAVE_ASM_GOTO +endif + +# From arm Makefile +VDSO_CAFLAGS += $(call cc32-option,-fno-dwarf2-cfi-asm) +VDSO_CAFLAGS += -mabi=aapcs-linux -mfloat-abi=soft +ifeq ($(CONFIG_CPU_BIG_ENDIAN), y) +VDSO_CAFLAGS += -mbig-endian +else +VDSO_CAFLAGS += -mlittle-endian +endif + +# From arm vDSO Makefile +VDSO_CAFLAGS += -fPIC -fno-builtin -fno-stack-protector +VDSO_CAFLAGS += -DDISABLE_BRANCH_PROFILING + +# Try to compile for ARMv8. If the compiler is too old and doesn't support it, +# fall back to v7. There is no easy way to check for what architecture the code +# is being compiled, so define a macro specifying that (see arch/arm/Makefile). +VDSO_CAFLAGS += $(call cc32-option,-march=armv8-a -D__LINUX_ARM_ARCH__=8,\ + -march=armv7-a -D__LINUX_ARM_ARCH__=7) + +VDSO_CFLAGS := $(VDSO_CAFLAGS) +VDSO_CFLAGS += -DENABLE_COMPAT_VDSO=1 +# KBUILD_CFLAGS from top-level Makefile +VDSO_CFLAGS += -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ + -fno-strict-aliasing -fno-common \ + -Werror-implicit-function-declaration \ + -Wno-format-security \ + -std=gnu89 +VDSO_CFLAGS += -O2 +# Some useful compiler-dependent flags from top-level Makefile +VDSO_CFLAGS += $(call cc32-option,-Wdeclaration-after-statement,) +VDSO_CFLAGS += $(call cc32-option,-Wno-pointer-sign) +VDSO_CFLAGS += $(call cc32-option,-fno-strict-overflow) +VDSO_CFLAGS += $(call cc32-option,-Werror=strict-prototypes) +VDSO_CFLAGS += $(call cc32-option,-Werror=date-time) +VDSO_CFLAGS += $(call cc32-option,-Werror=incompatible-pointer-types) + +# The 32-bit compiler does not provide 128-bit integers, which are used in +# some headers that are indirectly included from the vDSO code. +# This hack makes the compiler happy and should trigger a warning/error if +# variables of such type are referenced. +VDSO_CFLAGS += -D__uint128_t='void*' +# Silence some warnings coming from headers that operate on long's +# (on GCC 4.8 or older, there is unfortunately no way to silence this warning) +VDSO_CFLAGS += $(call cc32-disable-warning,shift-count-overflow) +VDSO_CFLAGS += -Wno-int-to-pointer-cast + +VDSO_AFLAGS := $(VDSO_CAFLAGS) +VDSO_AFLAGS += -D__ASSEMBLY__ + +VDSO_LDFLAGS := $(VDSO_CPPFLAGS) +# From arm vDSO Makefile +VDSO_LDFLAGS += -Wl,-Bsymbolic -Wl,--no-undefined -Wl,-soname=linux-vdso.so.1 +VDSO_LDFLAGS += -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 +VDSO_LDFLAGS += -nostdlib -shared -mfloat-abi=soft +VDSO_LDFLAGS += $(call cc32-ldoption,-Wl$(comma)--hash-style=sysv) +VDSO_LDFLAGS += $(call cc32-ldoption,-Wl$(comma)--build-id) +VDSO_LDFLAGS += $(call cc32-ldoption,-fuse-ld=bfd) + + +# Borrow vdsomunge.c from the arm vDSO +# We have to use a relative path because scripts/Makefile.host prefixes +# $(hostprogs-y) with $(obj) +munge := ../../../arm/vdso/vdsomunge +hostprogs-y := $(munge) + +c-obj-vdso := note.o +c-obj-vdso-gettimeofday := vgettimeofday.o +asm-obj-vdso := sigreturn.o + +ifneq ($(c-gettimeofday-y),) +VDSO_CFLAGS_gettimeofday_o += -include $(c-gettimeofday-y) +endif + +VDSO_CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os + +# Build rules +targets := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso) vdso.so vdso.so.dbg vdso.so.raw +c-obj-vdso := $(addprefix $(obj)/, $(c-obj-vdso)) +c-obj-vdso-gettimeofday := $(addprefix $(obj)/, $(c-obj-vdso-gettimeofday)) +asm-obj-vdso := $(addprefix $(obj)/, $(asm-obj-vdso)) +obj-vdso := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso) + +obj-y += vdso.o +extra-y += vdso.lds +CPPFLAGS_vdso.lds += -P -C -U$(ARCH) + +# Force dependency (vdso.s includes vdso.so through incbin) +$(obj)/vdso.o: $(obj)/vdso.so + +include/generated/vdso32-offsets.h: $(obj)/vdso.so.dbg FORCE + $(call if_changed,vdsosym) + +# Strip rule for vdso.so +$(obj)/vdso.so: OBJCOPYFLAGS := -S +$(obj)/vdso.so: $(obj)/vdso.so.dbg FORCE + $(call if_changed,objcopy) + +$(obj)/vdso.so.dbg: $(obj)/vdso.so.raw $(obj)/$(munge) FORCE + $(call if_changed,vdsomunge) + +# Link rule for the .so file, .lds has to be first +$(obj)/vdso.so.raw: $(src)/vdso.lds $(obj-vdso) FORCE + $(call if_changed,vdsold) + $(call if_changed,vdso_check) + +# Compilation rules for the vDSO sources +$(c-obj-vdso): %.o: %.c FORCE + $(call if_changed_dep,vdsocc) +$(c-obj-vdso-gettimeofday): %.o: %.c FORCE + $(call if_changed_dep,vdsocc_gettimeofday) +$(asm-obj-vdso): %.o: %.S FORCE + $(call if_changed_dep,vdsoas) + +# Actual build commands +quiet_cmd_vdsold = VDSOL $@ + cmd_vdsold = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_LDFLAGS) \ + -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) -o $@ +quiet_cmd_vdsocc = VDSOC $@ + cmd_vdsocc = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) -c -o $@ $< +quiet_cmd_vdsocc_gettimeofday = VDSOC_GTD $@ + cmd_vdsocc_gettimeofday = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) $(VDSO_CFLAGS_gettimeofday_o) -c -o $@ $< +quiet_cmd_vdsoas = VDSOA $@ + cmd_vdsoas = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_AFLAGS) -c -o $@ $< + +quiet_cmd_vdsomunge = MUNGE $@ + cmd_vdsomunge = $(obj)/$(munge) $< $@ + +# Generate vDSO offsets using helper script (borrowed from the 64-bit vDSO) +gen-vdsosym := $(srctree)/$(src)/../vdso/gen_vdso_offsets.sh +quiet_cmd_vdsosym = VDSOSYM $@ +# The AArch64 nm should be able to read an AArch32 binary + cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ + +# Install commands for the unstripped file +quiet_cmd_vdso_install = INSTALL $@ + cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/vdso32.so + +vdso.so: $(obj)/vdso.so.dbg + @mkdir -p $(MODLIB)/vdso + $(call cmd,vdso_install) + +vdso_install: vdso.so diff --git a/arch/arm64/kernel/vdso32/note.c b/arch/arm64/kernel/vdso32/note.c new file mode 100644 index 000000000000..eff5bf9efb8b --- /dev/null +++ b/arch/arm64/kernel/vdso32/note.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2012-2018 ARM Limited + * + * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. + * Here we can supply some information useful to userland. + */ + +#include <linux/uts.h> +#include <linux/version.h> +#include <linux/elfnote.h> +#include <linux/build-salt.h> + +ELFNOTE32("Linux", 0, LINUX_VERSION_CODE); +BUILD_SALT; diff --git a/arch/arm64/kernel/vdso32/sigreturn.S b/arch/arm64/kernel/vdso32/sigreturn.S new file mode 100644 index 000000000000..1a81277c2d09 --- /dev/null +++ b/arch/arm64/kernel/vdso32/sigreturn.S @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file provides both A32 and T32 versions, in accordance with the + * arm sigreturn code. + * + * Copyright (C) 2018 ARM Limited + */ + +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> + +#define ARM_ENTRY(name) \ + ENTRY(name) + +#define ARM_ENDPROC(name) \ + .type name, %function; \ + END(name) + + .text + + .arm + .fnstart + .save {r0-r15} + .pad #COMPAT_SIGFRAME_REGS_OFFSET + nop +ARM_ENTRY(__kernel_sigreturn_arm) + mov r7, #__NR_compat_sigreturn + svc #0 + .fnend +ARM_ENDPROC(__kernel_sigreturn_arm) + + .fnstart + .save {r0-r15} + .pad #COMPAT_RT_SIGFRAME_REGS_OFFSET + nop +ARM_ENTRY(__kernel_rt_sigreturn_arm) + mov r7, #__NR_compat_rt_sigreturn + svc #0 + .fnend +ARM_ENDPROC(__kernel_rt_sigreturn_arm) + + .thumb + .fnstart + .save {r0-r15} + .pad #COMPAT_SIGFRAME_REGS_OFFSET + nop +ARM_ENTRY(__kernel_sigreturn_thumb) + mov r7, #__NR_compat_sigreturn + svc #0 + .fnend +ARM_ENDPROC(__kernel_sigreturn_thumb) + + .fnstart + .save {r0-r15} + .pad #COMPAT_RT_SIGFRAME_REGS_OFFSET + nop +ARM_ENTRY(__kernel_rt_sigreturn_thumb) + mov r7, #__NR_compat_rt_sigreturn + svc #0 + .fnend +ARM_ENDPROC(__kernel_rt_sigreturn_thumb) diff --git a/arch/arm64/kernel/vdso32/vdso.S b/arch/arm64/kernel/vdso32/vdso.S new file mode 100644 index 000000000000..e72ac7bc4c04 --- /dev/null +++ b/arch/arm64/kernel/vdso32/vdso.S @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2012 ARM Limited + */ + +#include <linux/init.h> +#include <linux/linkage.h> +#include <linux/const.h> +#include <asm/page.h> + + .globl vdso32_start, vdso32_end + .section .rodata + .balign PAGE_SIZE +vdso32_start: + .incbin "arch/arm64/kernel/vdso32/vdso.so" + .balign PAGE_SIZE +vdso32_end: + + .previous diff --git a/arch/arm64/kernel/vdso32/vdso.lds.S b/arch/arm64/kernel/vdso32/vdso.lds.S new file mode 100644 index 000000000000..a3944927eaeb --- /dev/null +++ b/arch/arm64/kernel/vdso32/vdso.lds.S @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Adapted from arm64 version. + * + * GNU linker script for the VDSO library. + * Heavily based on the vDSO linker scripts for other archs. + * + * Copyright (C) 2012-2018 ARM Limited + */ + +#include <linux/const.h> +#include <asm/page.h> +#include <asm/vdso.h> + +OUTPUT_FORMAT("elf32-littlearm", "elf32-bigarm", "elf32-littlearm") +OUTPUT_ARCH(arm) + +SECTIONS +{ + PROVIDE_HIDDEN(_vdso_data = . - PAGE_SIZE); + . = VDSO_LBASE + SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .note : { *(.note.*) } :text :note + + .dynamic : { *(.dynamic) } :text :dynamic + + .rodata : { *(.rodata*) } :text + + .text : { *(.text*) } :text =0xe7f001f2 + + .got : { *(.got) } + .rel.plt : { *(.rel.plt) } + + /DISCARD/ : { + *(.note.GNU-stack) + *(.data .data.* .gnu.linkonce.d.* .sdata*) + *(.bss .sbss .dynbss .dynsbss) + } +} + +/* + * We must supply the ELF program headers explicitly to get just one + * PT_LOAD segment, and set the flags explicitly to make segments read-only. + */ +PHDRS +{ + text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ +} + +VERSION +{ + LINUX_2.6 { + global: + __vdso_clock_gettime; + __vdso_gettimeofday; + __vdso_clock_getres; + __kernel_sigreturn_arm; + __kernel_sigreturn_thumb; + __kernel_rt_sigreturn_arm; + __kernel_rt_sigreturn_thumb; + __vdso_clock_gettime64; + local: *; + }; +} + +/* + * Make the sigreturn code visible to the kernel. + */ +VDSO_compat_sigreturn_arm = __kernel_sigreturn_arm; +VDSO_compat_sigreturn_thumb = __kernel_sigreturn_thumb; +VDSO_compat_rt_sigreturn_arm = __kernel_rt_sigreturn_arm; +VDSO_compat_rt_sigreturn_thumb = __kernel_rt_sigreturn_thumb; diff --git a/arch/arm64/kernel/vdso32/vgettimeofday.c b/arch/arm64/kernel/vdso32/vgettimeofday.c new file mode 100644 index 000000000000..54fc1c2ce93f --- /dev/null +++ b/arch/arm64/kernel/vdso32/vgettimeofday.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ARM64 compat userspace implementations of gettimeofday() and similar. + * + * Copyright (C) 2018 ARM Limited + * + */ +#include <linux/time.h> +#include <linux/types.h> + +int __vdso_clock_gettime(clockid_t clock, + struct old_timespec32 *ts) +{ + /* The checks below are required for ABI consistency with arm */ + if ((u32)ts >= TASK_SIZE_32) + return -EFAULT; + + return __cvdso_clock_gettime32(clock, ts); +} + +int __vdso_clock_gettime64(clockid_t clock, + struct __kernel_timespec *ts) +{ + /* The checks below are required for ABI consistency with arm */ + if ((u32)ts >= TASK_SIZE_32) + return -EFAULT; + + return __cvdso_clock_gettime(clock, ts); +} + +int __vdso_gettimeofday(struct __kernel_old_timeval *tv, + struct timezone *tz) +{ + return __cvdso_gettimeofday(tv, tz); +} + +int __vdso_clock_getres(clockid_t clock_id, + struct old_timespec32 *res) +{ + /* The checks below are required for ABI consistency with arm */ + if ((u32)res >= TASK_SIZE_32) + return -EFAULT; + + return __cvdso_clock_getres_time32(clock_id, res); +} + +/* Avoid unresolved references emitted by GCC */ + +void __aeabi_unwind_cpp_pr0(void) +{ +} + +void __aeabi_unwind_cpp_pr1(void) +{ +} + +void __aeabi_unwind_cpp_pr2(void) +{ +} diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 6e3c9c8b2df9..525010504f9d 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -112,9 +112,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { u64 *guest_zcr = &vcpu->arch.ctxt.sys_regs[ZCR_EL1]; - /* Clean guest FP state to memory and invalidate cpu view */ - fpsimd_save(); - fpsimd_flush_cpu_state(); + fpsimd_save_and_flush_cpu_state(); if (guest_has_sve) *guest_zcr = read_sysreg_s(SYS_ZCR_EL12); diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index c2afa7982047..dfd626447482 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -208,7 +208,7 @@ out: #define vq_word(vq) (((vq) - SVE_VQ_MIN) / 64) #define vq_mask(vq) ((u64)1 << ((vq) - SVE_VQ_MIN) % 64) -#define vq_present(vqs, vq) ((vqs)[vq_word(vq)] & vq_mask(vq)) +#define vq_present(vqs, vq) (!!((vqs)[vq_word(vq)] & vq_mask(vq))) static int get_sve_vls(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index b0041812bca9..58f281b6ca4a 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -604,7 +604,7 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) * Naturally, we want to avoid this. */ if (system_uses_irq_prio_masking()) { - gic_write_pmr(GIC_PRIO_IRQON); + gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); dsb(sy); } diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 5992eb9a9a08..1d17dbeafe76 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -80,10 +80,6 @@ static int __swiotlb_mmap_pfn(struct vm_area_struct *vma, static int __init arm64_dma_init(void) { - WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(), - TAINT_CPU_OUT_OF_SPEC, - "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)", - ARCH_DMA_MINALIGN, cache_line_size()); return dma_atomic_pool_init(GFP_DMA32, __pgprot(PROT_NORMAL_NC)); } arch_initcall(arm64_dma_init); @@ -461,6 +457,14 @@ static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct iommu_ops *iommu, bool coherent) { + int cls = cache_line_size_of_cpu(); + + WARN_TAINT(!coherent && cls > ARCH_DMA_MINALIGN, + TAINT_CPU_OUT_OF_SPEC, + "%s %s: ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)", + dev_driver_string(dev), dev_name(dev), + ARCH_DMA_MINALIGN, cls); + dev->dma_coherent = coherent; __iommu_setup_dma_ops(dev, dma_base, size, iommu); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 2d115016feb4..c8c61b1eb479 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -384,40 +384,31 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re #define VM_FAULT_BADACCESS 0x020000 static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr, - unsigned int mm_flags, unsigned long vm_flags, - struct task_struct *tsk) + unsigned int mm_flags, unsigned long vm_flags) { - struct vm_area_struct *vma; - vm_fault_t fault; + struct vm_area_struct *vma = find_vma(mm, addr); - vma = find_vma(mm, addr); - fault = VM_FAULT_BADMAP; if (unlikely(!vma)) - goto out; - if (unlikely(vma->vm_start > addr)) - goto check_stack; + return VM_FAULT_BADMAP; /* * Ok, we have a good vm_area for this memory access, so we can handle * it. */ -good_area: + if (unlikely(vma->vm_start > addr)) { + if (!(vma->vm_flags & VM_GROWSDOWN)) + return VM_FAULT_BADMAP; + if (expand_stack(vma, addr)) + return VM_FAULT_BADMAP; + } + /* * Check that the permissions on the VMA allow for the fault which * occurred. */ - if (!(vma->vm_flags & vm_flags)) { - fault = VM_FAULT_BADACCESS; - goto out; - } - + if (!(vma->vm_flags & vm_flags)) + return VM_FAULT_BADACCESS; return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags); - -check_stack: - if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr)) - goto good_area; -out: - return fault; } static bool is_el0_instruction_abort(unsigned int esr) @@ -425,12 +416,20 @@ static bool is_el0_instruction_abort(unsigned int esr) return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW; } +/* + * Note: not valid for EL1 DC IVAC, but we never use that such that it + * should fault. EL0 cannot issue DC IVAC (undef). + */ +static bool is_write_abort(unsigned int esr) +{ + return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM); +} + static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, struct pt_regs *regs) { const struct fault_info *inf; - struct task_struct *tsk; - struct mm_struct *mm; + struct mm_struct *mm = current->mm; vm_fault_t fault, major = 0; unsigned long vm_flags = VM_READ | VM_WRITE; unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; @@ -438,9 +437,6 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, if (notify_page_fault(regs, esr)) return 0; - tsk = current; - mm = tsk->mm; - /* * If we're in an interrupt or have no user context, we must not take * the fault. @@ -453,7 +449,8 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, if (is_el0_instruction_abort(esr)) { vm_flags = VM_EXEC; - } else if ((esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM)) { + mm_flags |= FAULT_FLAG_INSTRUCTION; + } else if (is_write_abort(esr)) { vm_flags = VM_WRITE; mm_flags |= FAULT_FLAG_WRITE; } @@ -492,12 +489,14 @@ retry: */ might_sleep(); #ifdef CONFIG_DEBUG_VM - if (!user_mode(regs) && !search_exception_tables(regs->pc)) + if (!user_mode(regs) && !search_exception_tables(regs->pc)) { + up_read(&mm->mmap_sem); goto no_context; + } #endif } - fault = __do_page_fault(mm, addr, mm_flags, vm_flags, tsk); + fault = __do_page_fault(mm, addr, mm_flags, vm_flags); major |= fault & VM_FAULT_MAJOR; if (fault & VM_FAULT_RETRY) { @@ -537,11 +536,11 @@ retry: * that point. */ if (major) { - tsk->maj_flt++; + current->maj_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, addr); } else { - tsk->min_flt++; + current->min_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, addr); } diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index f475e54fbc43..bbeb6a5a6ba6 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -228,7 +228,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, if (sz == PUD_SIZE) { ptep = (pte_t *)pudp; - } else if (sz == (PAGE_SIZE * CONT_PTES)) { + } else if (sz == (CONT_PTE_SIZE)) { pmdp = pmd_alloc(mm, pudp, addr); WARN_ON(addr & (sz - 1)); @@ -246,7 +246,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, ptep = huge_pmd_share(mm, addr, pudp); else ptep = (pte_t *)pmd_alloc(mm, pudp, addr); - } else if (sz == (PMD_SIZE * CONT_PMDS)) { + } else if (sz == (CONT_PMD_SIZE)) { pmdp = pmd_alloc(mm, pudp, addr); WARN_ON(addr & (sz - 1)); return (pte_t *)pmdp; @@ -454,9 +454,9 @@ static int __init hugetlbpage_init(void) #ifdef CONFIG_ARM64_4K_PAGES add_huge_page_size(PUD_SIZE); #endif - add_huge_page_size(PMD_SIZE * CONT_PMDS); + add_huge_page_size(CONT_PMD_SIZE); add_huge_page_size(PMD_SIZE); - add_huge_page_size(PAGE_SIZE * CONT_PTES); + add_huge_page_size(CONT_PTE_SIZE); return 0; } @@ -470,9 +470,9 @@ static __init int setup_hugepagesz(char *opt) #ifdef CONFIG_ARM64_4K_PAGES case PUD_SIZE: #endif - case PMD_SIZE * CONT_PMDS: + case CONT_PMD_SIZE: case PMD_SIZE: - case PAGE_SIZE * CONT_PTES: + case CONT_PTE_SIZE: add_huge_page_size(ps); return 1; } diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 749c9b269f08..f3c795278def 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -180,8 +180,9 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) { unsigned long max_zone_pfns[MAX_NR_ZONES] = {0}; - if (IS_ENABLED(CONFIG_ZONE_DMA32)) - max_zone_pfns[ZONE_DMA32] = PFN_DOWN(max_zone_dma_phys()); +#ifdef CONFIG_ZONE_DMA32 + max_zone_pfns[ZONE_DMA32] = PFN_DOWN(max_zone_dma_phys()); +#endif max_zone_pfns[ZONE_NORMAL] = max; free_area_init_nodes(max_zone_pfns); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index e5ae8663f230..3645f29bd814 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -765,7 +765,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, return 0; } -#endif /* CONFIG_ARM64_64K_PAGES */ +#endif /* !ARM64_SWAPPER_USES_SECTION_MAPS */ void vmemmap_free(unsigned long start, unsigned long end, struct vmem_altmap *altmap) { @@ -960,32 +960,28 @@ int __init arch_ioremap_pmd_supported(void) int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot) { - pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT | - pgprot_val(mk_sect_prot(prot))); - pud_t new_pud = pfn_pud(__phys_to_pfn(phys), sect_prot); + pud_t new_pud = pfn_pud(__phys_to_pfn(phys), mk_pud_sect_prot(prot)); /* Only allow permission changes for now */ if (!pgattr_change_is_safe(READ_ONCE(pud_val(*pudp)), pud_val(new_pud))) return 0; - BUG_ON(phys & ~PUD_MASK); + VM_BUG_ON(phys & ~PUD_MASK); set_pud(pudp, new_pud); return 1; } int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot) { - pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT | - pgprot_val(mk_sect_prot(prot))); - pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), sect_prot); + pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), mk_pmd_sect_prot(prot)); /* Only allow permission changes for now */ if (!pgattr_change_is_safe(READ_ONCE(pmd_val(*pmdp)), pmd_val(new_pmd))) return 0; - BUG_ON(phys & ~PMD_MASK); + VM_BUG_ON(phys & ~PMD_MASK); set_pmd(pmdp, new_pmd); return 1; } diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 47b057bfa803..fcdcf6cd7677 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -151,17 +151,48 @@ int set_memory_valid(unsigned long addr, int numpages, int enable) __pgprot(PTE_VALID)); } -#ifdef CONFIG_DEBUG_PAGEALLOC +int set_direct_map_invalid_noflush(struct page *page) +{ + struct page_change_data data = { + .set_mask = __pgprot(0), + .clear_mask = __pgprot(PTE_VALID), + }; + + if (!rodata_full) + return 0; + + return apply_to_page_range(&init_mm, + (unsigned long)page_address(page), + PAGE_SIZE, change_page_range, &data); +} + +int set_direct_map_default_noflush(struct page *page) +{ + struct page_change_data data = { + .set_mask = __pgprot(PTE_VALID | PTE_WRITE), + .clear_mask = __pgprot(PTE_RDONLY), + }; + + if (!rodata_full) + return 0; + + return apply_to_page_range(&init_mm, + (unsigned long)page_address(page), + PAGE_SIZE, change_page_range, &data); +} + void __kernel_map_pages(struct page *page, int numpages, int enable) { + if (!debug_pagealloc_enabled() && !rodata_full) + return; + set_memory_valid((unsigned long)page_address(page), numpages, enable); } -#ifdef CONFIG_HIBERNATION + /* - * When built with CONFIG_DEBUG_PAGEALLOC and CONFIG_HIBERNATION, this function - * is used to determine if a linear map page has been marked as not-valid by - * CONFIG_DEBUG_PAGEALLOC. Walk the page table and check the PTE_VALID bit. - * This is based on kern_addr_valid(), which almost does what we need. + * This function is used to determine if a linear map page has been marked as + * not-valid. Walk the page table and check the PTE_VALID bit. This is based + * on kern_addr_valid(), which almost does what we need. * * Because this is only called on the kernel linear map, p?d_sect() implies * p?d_present(). When debug_pagealloc is enabled, sections mappings are @@ -175,6 +206,9 @@ bool kernel_page_present(struct page *page) pte_t *ptep; unsigned long addr = (unsigned long)page_address(page); + if (!debug_pagealloc_enabled() && !rodata_full) + return true; + pgdp = pgd_offset_k(addr); if (pgd_none(READ_ONCE(*pgdp))) return false; @@ -196,5 +230,3 @@ bool kernel_page_present(struct page *page) ptep = pte_offset_kernel(pmdp, addr); return pte_valid(READ_ONCE(*ptep)); } -#endif /* CONFIG_HIBERNATION */ -#endif /* CONFIG_DEBUG_PAGEALLOC */ diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 87c568807925..f5b437f8a22b 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -970,7 +970,7 @@ void *bpf_jit_alloc_exec(unsigned long size) { return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START, BPF_JIT_REGION_END, GFP_KERNEL, - PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, + PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); } diff --git a/arch/csky/kernel/signal.c b/arch/csky/kernel/signal.c index 04a43cfd4e09..d47a3381aad8 100644 --- a/arch/csky/kernel/signal.c +++ b/arch/csky/kernel/signal.c @@ -39,6 +39,11 @@ static int save_fpu_state(struct sigcontext __user *sc) #endif struct rt_sigframe { + /* + * pad[3] is compatible with the same struct defined in + * gcc/libgcc/config/csky/linux-unwind.h + */ + int pad[3]; struct siginfo info; struct ucontext uc; }; diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h index 206530d0751b..50440f3ddc43 100644 --- a/arch/ia64/include/asm/atomic.h +++ b/arch/ia64/include/asm/atomic.h @@ -124,10 +124,10 @@ ATOMIC_FETCH_OP(xor, ^) #undef ATOMIC_OP #define ATOMIC64_OP(op, c_op) \ -static __inline__ long \ -ia64_atomic64_##op (__s64 i, atomic64_t *v) \ +static __inline__ s64 \ +ia64_atomic64_##op (s64 i, atomic64_t *v) \ { \ - __s64 old, new; \ + s64 old, new; \ CMPXCHG_BUGCHECK_DECL \ \ do { \ @@ -139,10 +139,10 @@ ia64_atomic64_##op (__s64 i, atomic64_t *v) \ } #define ATOMIC64_FETCH_OP(op, c_op) \ -static __inline__ long \ -ia64_atomic64_fetch_##op (__s64 i, atomic64_t *v) \ +static __inline__ s64 \ +ia64_atomic64_fetch_##op (s64 i, atomic64_t *v) \ { \ - __s64 old, new; \ + s64 old, new; \ CMPXCHG_BUGCHECK_DECL \ \ do { \ @@ -162,7 +162,7 @@ ATOMIC64_OPS(sub, -) #define atomic64_add_return(i,v) \ ({ \ - long __ia64_aar_i = (i); \ + s64 __ia64_aar_i = (i); \ __ia64_atomic_const(i) \ ? ia64_fetch_and_add(__ia64_aar_i, &(v)->counter) \ : ia64_atomic64_add(__ia64_aar_i, v); \ @@ -170,7 +170,7 @@ ATOMIC64_OPS(sub, -) #define atomic64_sub_return(i,v) \ ({ \ - long __ia64_asr_i = (i); \ + s64 __ia64_asr_i = (i); \ __ia64_atomic_const(i) \ ? ia64_fetch_and_add(-__ia64_asr_i, &(v)->counter) \ : ia64_atomic64_sub(__ia64_asr_i, v); \ @@ -178,7 +178,7 @@ ATOMIC64_OPS(sub, -) #define atomic64_fetch_add(i,v) \ ({ \ - long __ia64_aar_i = (i); \ + s64 __ia64_aar_i = (i); \ __ia64_atomic_const(i) \ ? ia64_fetchadd(__ia64_aar_i, &(v)->counter, acq) \ : ia64_atomic64_fetch_add(__ia64_aar_i, v); \ @@ -186,7 +186,7 @@ ATOMIC64_OPS(sub, -) #define atomic64_fetch_sub(i,v) \ ({ \ - long __ia64_asr_i = (i); \ + s64 __ia64_asr_i = (i); \ __ia64_atomic_const(i) \ ? ia64_fetchadd(-__ia64_asr_i, &(v)->counter, acq) \ : ia64_atomic64_fetch_sub(__ia64_asr_i, v); \ diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 58a6337c0690..7c52bd2695a2 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -6390,11 +6390,7 @@ pfm_install_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl) } /* save the current system wide pmu states */ - ret = on_each_cpu(pfm_alt_save_pmu_state, NULL, 1); - if (ret) { - DPRINT(("on_each_cpu() failed: %d\n", ret)); - goto cleanup_reserve; - } + on_each_cpu(pfm_alt_save_pmu_state, NULL, 1); /* officially change to the alternate interrupt handler */ pfm_alt_intr_handler = hdl; @@ -6421,7 +6417,6 @@ int pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl) { int i; - int ret; if (hdl == NULL) return -EINVAL; @@ -6435,10 +6430,7 @@ pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl) pfm_alt_intr_handler = NULL; - ret = on_each_cpu(pfm_alt_restore_pmu_state, NULL, 1); - if (ret) { - DPRINT(("on_each_cpu() failed: %d\n", ret)); - } + on_each_cpu(pfm_alt_restore_pmu_state, NULL, 1); for_each_online_cpu(i) { pfm_unreserve_session(NULL, 1, i); diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c index edcdfc149311..16c6d377c502 100644 --- a/arch/ia64/kernel/uncached.c +++ b/arch/ia64/kernel/uncached.c @@ -121,8 +121,8 @@ static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid) status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL); if (status == PAL_VISIBILITY_OK_REMOTE_NEEDED) { atomic_set(&uc_pool->status, 0); - status = smp_call_function(uncached_ipi_visibility, uc_pool, 1); - if (status || atomic_read(&uc_pool->status)) + smp_call_function(uncached_ipi_visibility, uc_pool, 1); + if (atomic_read(&uc_pool->status)) goto failed; } else if (status != PAL_VISIBILITY_OK) goto failed; @@ -143,8 +143,8 @@ static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid) if (status != PAL_STATUS_SUCCESS) goto failed; atomic_set(&uc_pool->status, 0); - status = smp_call_function(uncached_ipi_mc_drain, uc_pool, 1); - if (status || atomic_read(&uc_pool->status)) + smp_call_function(uncached_ipi_mc_drain, uc_pool, 1); + if (atomic_read(&uc_pool->status)) goto failed; /* diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 218e037ef901..00f5c98a5e05 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -3,10 +3,13 @@ config M68K bool default y select ARCH_32BIT_OFF_T + select ARCH_HAS_DMA_MMAP_PGPROT if MMU && !COLDFIRE + select ARCH_HAS_DMA_PREP_COHERENT select ARCH_HAS_SYNC_DMA_FOR_DEVICE if HAS_DMA select ARCH_MIGHT_HAVE_PC_PARPORT if ISA select ARCH_NO_COHERENT_DMA_MMAP if !MMU select ARCH_NO_PREEMPT if !COLDFIRE + select DMA_DIRECT_REMAP if HAS_DMA && MMU && !COLDFIRE select HAVE_IDE select HAVE_AOUT if MMU select HAVE_DEBUG_BUGVERBOSE diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index fea392cfcf1b..04e0f211afb3 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -71,9 +71,6 @@ CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_ESP_OFFLOAD=m CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_INET_RAW_DIAG=m @@ -205,7 +202,6 @@ CONFIG_IP_SET_HASH_NETNET=m CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m -CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y @@ -231,7 +227,6 @@ CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NFT_CHAIN_ROUTE_IPV6=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m CONFIG_NF_FLOW_TABLE_IPV6=m @@ -308,7 +303,6 @@ CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set CONFIG_PSAMPLE=m CONFIG_NET_IFE=m -# CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_TEST_ASYNC_DRIVER_PROBE=m @@ -436,6 +430,8 @@ CONFIG_FB_AMIGA_OCS=y CONFIG_FB_AMIGA_ECS=y CONFIG_FB_AMIGA_AGA=y CONFIG_FB_FM2=y +# CONFIG_LCD_CLASS_DEVICE is not set +# CONFIG_BACKLIGHT_CLASS_DEVICE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y CONFIG_SOUND=m @@ -553,13 +549,14 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m CONFIG_ENCRYPTED_KEYS=m CONFIG_HARDENED_USERCOPY=y -CONFIG_CRYPTO_RSA=m -CONFIG_CRYPTO_DH=m -CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_RSA=m +CONFIG_CRYPTO_DH=m +CONFIG_CRYPTO_ECDH=m +CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_AEGIS128L=m @@ -583,7 +580,6 @@ CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_SM3=m -CONFIG_CRYPTO_STREEBOG=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m @@ -626,6 +622,7 @@ CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m +CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index 2474d267460e..c6abbb535878 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -67,9 +67,6 @@ CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_ESP_OFFLOAD=m CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_INET_RAW_DIAG=m @@ -201,7 +198,6 @@ CONFIG_IP_SET_HASH_NETNET=m CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m -CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y @@ -227,7 +223,6 @@ CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NFT_CHAIN_ROUTE_IPV6=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m CONFIG_NF_FLOW_TABLE_IPV6=m @@ -304,7 +299,6 @@ CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set CONFIG_PSAMPLE=m CONFIG_NET_IFE=m -# CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_TEST_ASYNC_DRIVER_PROBE=m @@ -397,6 +391,8 @@ CONFIG_PPS_CLIENT_LDISC=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set CONFIG_FB=y +# CONFIG_LCD_CLASS_DEVICE is not set +# CONFIG_BACKLIGHT_CLASS_DEVICE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y # CONFIG_LOGO_LINUX_VGA16 is not set @@ -513,13 +509,14 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m CONFIG_ENCRYPTED_KEYS=m CONFIG_HARDENED_USERCOPY=y -CONFIG_CRYPTO_RSA=m -CONFIG_CRYPTO_DH=m -CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_RSA=m +CONFIG_CRYPTO_DH=m +CONFIG_CRYPTO_ECDH=m +CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_AEGIS128L=m @@ -543,7 +540,6 @@ CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_SM3=m -CONFIG_CRYPTO_STREEBOG=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m @@ -586,6 +582,7 @@ CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m +CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index 0fc7d2992fe0..06ae65bad177 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -74,9 +74,6 @@ CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_ESP_OFFLOAD=m CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_INET_RAW_DIAG=m @@ -208,7 +205,6 @@ CONFIG_IP_SET_HASH_NETNET=m CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m -CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y @@ -234,7 +230,6 @@ CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NFT_CHAIN_ROUTE_IPV6=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m CONFIG_NF_FLOW_TABLE_IPV6=m @@ -311,7 +306,6 @@ CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set CONFIG_PSAMPLE=m CONFIG_NET_IFE=m -# CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_TEST_ASYNC_DRIVER_PROBE=m @@ -421,6 +415,8 @@ CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set CONFIG_FB=y CONFIG_FB_ATARI=y +# CONFIG_LCD_CLASS_DEVICE is not set +# CONFIG_BACKLIGHT_CLASS_DEVICE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y CONFIG_SOUND=m @@ -535,13 +531,14 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m CONFIG_ENCRYPTED_KEYS=m CONFIG_HARDENED_USERCOPY=y -CONFIG_CRYPTO_RSA=m -CONFIG_CRYPTO_DH=m -CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_RSA=m +CONFIG_CRYPTO_DH=m +CONFIG_CRYPTO_ECDH=m +CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_AEGIS128L=m @@ -565,7 +562,6 @@ CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_SM3=m -CONFIG_CRYPTO_STREEBOG=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m @@ -608,6 +604,7 @@ CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m +CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index 699df9fdf866..5616b94053b6 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -64,9 +64,6 @@ CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_ESP_OFFLOAD=m CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_INET_RAW_DIAG=m @@ -198,7 +195,6 @@ CONFIG_IP_SET_HASH_NETNET=m CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m -CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y @@ -224,7 +220,6 @@ CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NFT_CHAIN_ROUTE_IPV6=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m CONFIG_NF_FLOW_TABLE_IPV6=m @@ -301,7 +296,6 @@ CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set CONFIG_PSAMPLE=m CONFIG_NET_IFE=m -# CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_TEST_ASYNC_DRIVER_PROBE=m @@ -394,6 +388,8 @@ CONFIG_NTP_PPS=y CONFIG_PPS_CLIENT_LDISC=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set +# CONFIG_LCD_CLASS_DEVICE is not set +# CONFIG_BACKLIGHT_CLASS_DEVICE is not set CONFIG_HID=m CONFIG_HIDRAW=y CONFIG_UHID=m @@ -506,13 +502,14 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m CONFIG_ENCRYPTED_KEYS=m CONFIG_HARDENED_USERCOPY=y -CONFIG_CRYPTO_RSA=m -CONFIG_CRYPTO_DH=m -CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_RSA=m +CONFIG_CRYPTO_DH=m +CONFIG_CRYPTO_ECDH=m +CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_AEGIS128L=m @@ -536,7 +533,6 @@ CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_SM3=m -CONFIG_CRYPTO_STREEBOG=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m @@ -579,6 +575,7 @@ CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m +CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index b50802255324..1106521f3b56 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -66,9 +66,6 @@ CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_ESP_OFFLOAD=m CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_INET_RAW_DIAG=m @@ -200,7 +197,6 @@ CONFIG_IP_SET_HASH_NETNET=m CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m -CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y @@ -226,7 +222,6 @@ CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NFT_CHAIN_ROUTE_IPV6=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m CONFIG_NF_FLOW_TABLE_IPV6=m @@ -303,7 +298,6 @@ CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set CONFIG_PSAMPLE=m CONFIG_NET_IFE=m -# CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_TEST_ASYNC_DRIVER_PROBE=m @@ -399,6 +393,8 @@ CONFIG_PPS_CLIENT_LDISC=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set CONFIG_FB=y +# CONFIG_LCD_CLASS_DEVICE is not set +# CONFIG_BACKLIGHT_CLASS_DEVICE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y # CONFIG_LOGO_LINUX_MONO is not set @@ -515,13 +511,14 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m CONFIG_ENCRYPTED_KEYS=m CONFIG_HARDENED_USERCOPY=y -CONFIG_CRYPTO_RSA=m -CONFIG_CRYPTO_DH=m -CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_RSA=m +CONFIG_CRYPTO_DH=m +CONFIG_CRYPTO_ECDH=m +CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_AEGIS128L=m @@ -545,7 +542,6 @@ CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_SM3=m -CONFIG_CRYPTO_STREEBOG=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m @@ -588,6 +584,7 @@ CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m +CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index 04e7d70f6030..226c6c063cd4 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -65,9 +65,6 @@ CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_ESP_OFFLOAD=m CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_INET_RAW_DIAG=m @@ -199,7 +196,6 @@ CONFIG_IP_SET_HASH_NETNET=m CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m -CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y @@ -225,7 +221,6 @@ CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NFT_CHAIN_ROUTE_IPV6=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m CONFIG_NF_FLOW_TABLE_IPV6=m @@ -305,7 +300,6 @@ CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set CONFIG_PSAMPLE=m CONFIG_NET_IFE=m -# CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_TEST_ASYNC_DRIVER_PROBE=m @@ -423,6 +417,8 @@ CONFIG_PTP_1588_CLOCK=m CONFIG_FB=y CONFIG_FB_VALKYRIE=y CONFIG_FB_MAC=y +# CONFIG_LCD_CLASS_DEVICE is not set +# CONFIG_BACKLIGHT_CLASS_DEVICE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y CONFIG_HID=m @@ -537,13 +533,14 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m CONFIG_ENCRYPTED_KEYS=m CONFIG_HARDENED_USERCOPY=y -CONFIG_CRYPTO_RSA=m -CONFIG_CRYPTO_DH=m -CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_RSA=m +CONFIG_CRYPTO_DH=m +CONFIG_CRYPTO_ECDH=m +CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_AEGIS128L=m @@ -567,7 +564,6 @@ CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_SM3=m -CONFIG_CRYPTO_STREEBOG=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m @@ -610,6 +606,7 @@ CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m +CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index 5e1cc4c17852..39f603417928 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -85,9 +85,6 @@ CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_ESP_OFFLOAD=m CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_INET_RAW_DIAG=m @@ -219,7 +216,6 @@ CONFIG_IP_SET_HASH_NETNET=m CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m -CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y @@ -245,7 +241,6 @@ CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NFT_CHAIN_ROUTE_IPV6=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m CONFIG_NF_FLOW_TABLE_IPV6=m @@ -325,7 +320,6 @@ CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set CONFIG_PSAMPLE=m CONFIG_NET_IFE=m -# CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_TEST_ASYNC_DRIVER_PROBE=m @@ -499,6 +493,8 @@ CONFIG_FB_FM2=y CONFIG_FB_ATARI=y CONFIG_FB_VALKYRIE=y CONFIG_FB_MAC=y +# CONFIG_LCD_CLASS_DEVICE is not set +# CONFIG_BACKLIGHT_CLASS_DEVICE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y CONFIG_SOUND=m @@ -619,13 +615,14 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m CONFIG_ENCRYPTED_KEYS=m CONFIG_HARDENED_USERCOPY=y -CONFIG_CRYPTO_RSA=m -CONFIG_CRYPTO_DH=m -CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_RSA=m +CONFIG_CRYPTO_DH=m +CONFIG_CRYPTO_ECDH=m +CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_AEGIS128L=m @@ -649,7 +646,6 @@ CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_SM3=m -CONFIG_CRYPTO_STREEBOG=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m @@ -692,6 +688,7 @@ CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m +CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index 170ac8792c2d..175a607f576c 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -63,9 +63,6 @@ CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_ESP_OFFLOAD=m CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_INET_RAW_DIAG=m @@ -197,7 +194,6 @@ CONFIG_IP_SET_HASH_NETNET=m CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m -CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y @@ -223,7 +219,6 @@ CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NFT_CHAIN_ROUTE_IPV6=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m CONFIG_NF_FLOW_TABLE_IPV6=m @@ -300,7 +295,6 @@ CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set CONFIG_PSAMPLE=m CONFIG_NET_IFE=m -# CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_TEST_ASYNC_DRIVER_PROBE=m @@ -393,6 +387,8 @@ CONFIG_NTP_PPS=y CONFIG_PPS_CLIENT_LDISC=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set +# CONFIG_LCD_CLASS_DEVICE is not set +# CONFIG_BACKLIGHT_CLASS_DEVICE is not set CONFIG_HID=m CONFIG_HIDRAW=y CONFIG_UHID=m @@ -505,13 +501,14 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m CONFIG_ENCRYPTED_KEYS=m CONFIG_HARDENED_USERCOPY=y -CONFIG_CRYPTO_RSA=m -CONFIG_CRYPTO_DH=m -CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_RSA=m +CONFIG_CRYPTO_DH=m +CONFIG_CRYPTO_ECDH=m +CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_AEGIS128L=m @@ -535,7 +532,6 @@ CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_SM3=m -CONFIG_CRYPTO_STREEBOG=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m @@ -578,6 +574,7 @@ CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m +CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index d865592a423e..f41c34d3cdd0 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -64,9 +64,6 @@ CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_ESP_OFFLOAD=m CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_INET_RAW_DIAG=m @@ -198,7 +195,6 @@ CONFIG_IP_SET_HASH_NETNET=m CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m -CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y @@ -224,7 +220,6 @@ CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NFT_CHAIN_ROUTE_IPV6=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m CONFIG_NF_FLOW_TABLE_IPV6=m @@ -301,7 +296,6 @@ CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set CONFIG_PSAMPLE=m CONFIG_NET_IFE=m -# CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_TEST_ASYNC_DRIVER_PROBE=m @@ -394,6 +388,8 @@ CONFIG_NTP_PPS=y CONFIG_PPS_CLIENT_LDISC=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set +# CONFIG_LCD_CLASS_DEVICE is not set +# CONFIG_BACKLIGHT_CLASS_DEVICE is not set CONFIG_HID=m CONFIG_HIDRAW=y CONFIG_UHID=m @@ -506,13 +502,14 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m CONFIG_ENCRYPTED_KEYS=m CONFIG_HARDENED_USERCOPY=y -CONFIG_CRYPTO_RSA=m -CONFIG_CRYPTO_DH=m -CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_RSA=m +CONFIG_CRYPTO_DH=m +CONFIG_CRYPTO_ECDH=m +CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_AEGIS128L=m @@ -536,7 +533,6 @@ CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_SM3=m -CONFIG_CRYPTO_STREEBOG=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m @@ -579,6 +575,7 @@ CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m +CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index 034a9de90484..c9d2cb0a1cf4 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -65,9 +65,6 @@ CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_ESP_OFFLOAD=m CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_INET_RAW_DIAG=m @@ -199,7 +196,6 @@ CONFIG_IP_SET_HASH_NETNET=m CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m -CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y @@ -225,7 +221,6 @@ CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NFT_CHAIN_ROUTE_IPV6=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m CONFIG_NF_FLOW_TABLE_IPV6=m @@ -302,7 +297,6 @@ CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set CONFIG_PSAMPLE=m CONFIG_NET_IFE=m -# CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_TEST_ASYNC_DRIVER_PROBE=m @@ -408,6 +402,8 @@ CONFIG_PPS_CLIENT_PARPORT=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set CONFIG_FB=y +# CONFIG_LCD_CLASS_DEVICE is not set +# CONFIG_BACKLIGHT_CLASS_DEVICE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y CONFIG_SOUND=m @@ -524,13 +520,14 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m CONFIG_ENCRYPTED_KEYS=m CONFIG_HARDENED_USERCOPY=y -CONFIG_CRYPTO_RSA=m -CONFIG_CRYPTO_DH=m -CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_RSA=m +CONFIG_CRYPTO_DH=m +CONFIG_CRYPTO_ECDH=m +CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_AEGIS128L=m @@ -554,7 +551,6 @@ CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_SM3=m -CONFIG_CRYPTO_STREEBOG=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m @@ -597,6 +593,7 @@ CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m +CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index 49be0f9fcd8d..79a64fdd6bf0 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -61,9 +61,6 @@ CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_ESP_OFFLOAD=m CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_INET_RAW_DIAG=m @@ -195,7 +192,6 @@ CONFIG_IP_SET_HASH_NETNET=m CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m -CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y @@ -221,7 +217,6 @@ CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NFT_CHAIN_ROUTE_IPV6=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m CONFIG_NF_FLOW_TABLE_IPV6=m @@ -298,7 +293,6 @@ CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set CONFIG_PSAMPLE=m CONFIG_NET_IFE=m -# CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_TEST_ASYNC_DRIVER_PROBE=m @@ -394,6 +388,8 @@ CONFIG_PPS_CLIENT_LDISC=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set CONFIG_FB=y +# CONFIG_LCD_CLASS_DEVICE is not set +# CONFIG_BACKLIGHT_CLASS_DEVICE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y CONFIG_HID=m @@ -508,13 +504,14 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m CONFIG_ENCRYPTED_KEYS=m CONFIG_HARDENED_USERCOPY=y -CONFIG_CRYPTO_RSA=m -CONFIG_CRYPTO_DH=m -CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_RSA=m +CONFIG_CRYPTO_DH=m +CONFIG_CRYPTO_ECDH=m +CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_AEGIS128L=m @@ -538,7 +535,6 @@ CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_SM3=m -CONFIG_CRYPTO_STREEBOG=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m @@ -581,6 +577,7 @@ CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m +CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index a71acf4a6004..e3402a5d165b 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -61,9 +61,6 @@ CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_ESP_OFFLOAD=m CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_INET_RAW_DIAG=m @@ -195,7 +192,6 @@ CONFIG_IP_SET_HASH_NETNET=m CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m -CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y @@ -221,7 +217,6 @@ CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NFT_CHAIN_ROUTE_IPV6=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m CONFIG_NF_FLOW_TABLE_IPV6=m @@ -298,7 +293,6 @@ CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set CONFIG_PSAMPLE=m CONFIG_NET_IFE=m -# CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_TEST_ASYNC_DRIVER_PROBE=m @@ -393,6 +387,8 @@ CONFIG_PPS_CLIENT_LDISC=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set CONFIG_FB=y +# CONFIG_LCD_CLASS_DEVICE is not set +# CONFIG_BACKLIGHT_CLASS_DEVICE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y CONFIG_HID=m @@ -507,13 +503,14 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m CONFIG_ENCRYPTED_KEYS=m CONFIG_HARDENED_USERCOPY=y -CONFIG_CRYPTO_RSA=m -CONFIG_CRYPTO_DH=m -CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_RSA=m +CONFIG_CRYPTO_DH=m +CONFIG_CRYPTO_ECDH=m +CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_AEGIS128L=m @@ -537,7 +534,6 @@ CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_SM3=m -CONFIG_CRYPTO_STREEBOG=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m @@ -580,6 +576,7 @@ CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m +CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m diff --git a/arch/m68k/kernel/dma.c b/arch/m68k/kernel/dma.c index b4aa853051bd..30cd59caf037 100644 --- a/arch/m68k/kernel/dma.c +++ b/arch/m68k/kernel/dma.c @@ -18,57 +18,22 @@ #include <asm/pgalloc.h> #if defined(CONFIG_MMU) && !defined(CONFIG_COLDFIRE) - -void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, - gfp_t flag, unsigned long attrs) +void arch_dma_prep_coherent(struct page *page, size_t size) { - struct page *page, **map; - pgprot_t pgprot; - void *addr; - int i, order; - - pr_debug("dma_alloc_coherent: %d,%x\n", size, flag); - - size = PAGE_ALIGN(size); - order = get_order(size); - - page = alloc_pages(flag | __GFP_ZERO, order); - if (!page) - return NULL; - - *handle = page_to_phys(page); - map = kmalloc(sizeof(struct page *) << order, flag & ~__GFP_DMA); - if (!map) { - __free_pages(page, order); - return NULL; - } - split_page(page, order); - - order = 1 << order; - size >>= PAGE_SHIFT; - map[0] = page; - for (i = 1; i < size; i++) - map[i] = page + i; - for (; i < order; i++) - __free_page(page + i); - pgprot = __pgprot(_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_DIRTY); - if (CPU_IS_040_OR_060) - pgprot_val(pgprot) |= _PAGE_GLOBAL040 | _PAGE_NOCACHE_S; - else - pgprot_val(pgprot) |= _PAGE_NOCACHE030; - addr = vmap(map, size, VM_MAP, pgprot); - kfree(map); - - return addr; + cache_push(page_to_phys(page), size); } -void arch_dma_free(struct device *dev, size_t size, void *addr, - dma_addr_t handle, unsigned long attrs) +pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot, + unsigned long attrs) { - pr_debug("dma_free_coherent: %p, %x\n", addr, handle); - vfree(addr); + if (CPU_IS_040_OR_060) { + pgprot_val(prot) &= ~_PAGE_CACHE040; + pgprot_val(prot) |= _PAGE_GLOBAL040 | _PAGE_NOCACHE_S; + } else { + pgprot_val(prot) |= _PAGE_NOCACHE030; + } + return prot; } - #else #include <asm/cacheflush.h> diff --git a/arch/mips/Makefile b/arch/mips/Makefile index 8f4486c4415b..eceff9b75b22 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -17,6 +17,7 @@ archscripts: scripts_basic $(Q)$(MAKE) $(build)=arch/mips/boot/tools relocs KBUILD_DEFCONFIG := 32r2el_defconfig +KBUILD_DTBS := dtbs # # Select the object file format to substitute into the linker script. @@ -384,7 +385,7 @@ quiet_cmd_64 = OBJCOPY $@ vmlinux.64: vmlinux $(call cmd,64) -all: $(all-y) +all: $(all-y) $(KBUILD_DTBS) # boot $(boot-y): $(vmlinux-32) FORCE diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile index 3c453a1f1ff1..172801ed35b8 100644 --- a/arch/mips/boot/compressed/Makefile +++ b/arch/mips/boot/compressed/Makefile @@ -78,6 +78,8 @@ OBJCOPYFLAGS_piggy.o := --add-section=.image=$(obj)/vmlinux.bin.z \ $(obj)/piggy.o: $(obj)/dummy.o $(obj)/vmlinux.bin.z FORCE $(call if_changed,objcopy) +HOSTCFLAGS_calc_vmlinuz_load_addr.o += $(LINUXINCLUDE) + # Calculate the load address of the compressed kernel image hostprogs-y := calc_vmlinuz_load_addr diff --git a/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c b/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c index 240f1d12df75..080b926d2623 100644 --- a/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c +++ b/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c @@ -9,7 +9,7 @@ #include <stdint.h> #include <stdio.h> #include <stdlib.h> -#include "../../../../include/linux/sizes.h" +#include <linux/sizes.h> int main(int argc, char *argv[]) { diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h index 94096299fc56..9a82dd11c0e9 100644 --- a/arch/mips/include/asm/atomic.h +++ b/arch/mips/include/asm/atomic.h @@ -254,10 +254,10 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v) #define atomic64_set(v, i) WRITE_ONCE((v)->counter, (i)) #define ATOMIC64_OP(op, c_op, asm_op) \ -static __inline__ void atomic64_##op(long i, atomic64_t * v) \ +static __inline__ void atomic64_##op(s64 i, atomic64_t * v) \ { \ if (kernel_uses_llsc) { \ - long temp; \ + s64 temp; \ \ loongson_llsc_mb(); \ __asm__ __volatile__( \ @@ -280,12 +280,12 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v) \ } #define ATOMIC64_OP_RETURN(op, c_op, asm_op) \ -static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \ +static __inline__ s64 atomic64_##op##_return_relaxed(s64 i, atomic64_t * v) \ { \ - long result; \ + s64 result; \ \ if (kernel_uses_llsc) { \ - long temp; \ + s64 temp; \ \ loongson_llsc_mb(); \ __asm__ __volatile__( \ @@ -314,12 +314,12 @@ static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \ } #define ATOMIC64_FETCH_OP(op, c_op, asm_op) \ -static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v) \ +static __inline__ s64 atomic64_fetch_##op##_relaxed(s64 i, atomic64_t * v) \ { \ - long result; \ + s64 result; \ \ if (kernel_uses_llsc) { \ - long temp; \ + s64 temp; \ \ loongson_llsc_mb(); \ __asm__ __volatile__( \ @@ -386,14 +386,14 @@ ATOMIC64_OPS(xor, ^=, xor) * Atomically test @v and subtract @i if @v is greater or equal than @i. * The function returns the old value of @v minus @i. */ -static __inline__ long atomic64_sub_if_positive(long i, atomic64_t * v) +static __inline__ s64 atomic64_sub_if_positive(s64 i, atomic64_t * v) { - long result; + s64 result; smp_mb__before_llsc(); if (kernel_uses_llsc) { - long temp; + s64 temp; __asm__ __volatile__( " .set push \n" diff --git a/arch/mips/include/asm/mach-ath79/ar933x_uart.h b/arch/mips/include/asm/mach-ath79/ar933x_uart.h index b8f8af7dc47c..cacf3545e018 100644 --- a/arch/mips/include/asm/mach-ath79/ar933x_uart.h +++ b/arch/mips/include/asm/mach-ath79/ar933x_uart.h @@ -24,8 +24,8 @@ #define AR933X_UART_CS_PARITY_S 0 #define AR933X_UART_CS_PARITY_M 0x3 #define AR933X_UART_CS_PARITY_NONE 0 -#define AR933X_UART_CS_PARITY_ODD 1 -#define AR933X_UART_CS_PARITY_EVEN 2 +#define AR933X_UART_CS_PARITY_ODD 2 +#define AR933X_UART_CS_PARITY_EVEN 3 #define AR933X_UART_CS_IF_MODE_S 2 #define AR933X_UART_CS_IF_MODE_M 0x3 #define AR933X_UART_CS_IF_MODE_NONE 0 diff --git a/arch/mips/include/asm/mips-gic.h b/arch/mips/include/asm/mips-gic.h index 75a1cdee1331..084cac1c5ea2 100644 --- a/arch/mips/include/asm/mips-gic.h +++ b/arch/mips/include/asm/mips-gic.h @@ -311,6 +311,36 @@ static inline bool mips_gic_present(void) } /** + * mips_gic_vx_map_reg() - Return GIC_Vx_<intr>_MAP register offset + * @intr: A GIC local interrupt + * + * Determine the index of the GIC_VL_<intr>_MAP or GIC_VO_<intr>_MAP register + * within the block of GIC map registers. This is almost the same as the order + * of interrupts in the pending & mask registers, as used by enum + * mips_gic_local_interrupt, but moves the FDC interrupt & thus offsets the + * interrupts after it... + * + * Return: The map register index corresponding to @intr. + * + * The return value is suitable for use with the (read|write)_gic_v[lo]_map + * accessor functions. + */ +static inline unsigned int +mips_gic_vx_map_reg(enum mips_gic_local_interrupt intr) +{ + /* WD, Compare & Timer are 1:1 */ + if (intr <= GIC_LOCAL_INT_TIMER) + return intr; + + /* FDC moves to after Timer... */ + if (intr == GIC_LOCAL_INT_FDC) + return GIC_LOCAL_INT_TIMER + 1; + + /* As a result everything else is offset by 1 */ + return intr + 1; +} + +/** * gic_get_c0_compare_int() - Return cp0 count/compare interrupt virq * * Determine the virq number to use for the coprocessor 0 count/compare diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c index 50ee7213b432..d79f2b432318 100644 --- a/arch/mips/mm/mmap.c +++ b/arch/mips/mm/mmap.c @@ -203,7 +203,7 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) bool __virt_addr_valid(const volatile void *kaddr) { - unsigned long vaddr = (unsigned long)vaddr; + unsigned long vaddr = (unsigned long)kaddr; if ((vaddr < PAGE_OFFSET) || (vaddr >= MAP_BASE)) return false; diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 65b6e85447b1..144ceb0fba88 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -391,6 +391,7 @@ static struct work_registers build_get_work_registers(u32 **p) static void build_restore_work_registers(u32 **p) { if (scratch_reg >= 0) { + uasm_i_ehb(p); UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg); return; } @@ -668,10 +669,12 @@ static void build_restore_pagemask(u32 **p, struct uasm_reloc **r, uasm_i_mtc0(p, 0, C0_PAGEMASK); uasm_il_b(p, r, lid); } - if (scratch_reg >= 0) + if (scratch_reg >= 0) { + uasm_i_ehb(p); UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg); - else + } else { UASM_i_LW(p, 1, scratchpad_offset(0), 0); + } } else { /* Reset default page size */ if (PM_DEFAULT_MASK >> 16) { @@ -938,10 +941,12 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, uasm_i_jr(p, ptr); if (mode == refill_scratch) { - if (scratch_reg >= 0) + if (scratch_reg >= 0) { + uasm_i_ehb(p); UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg); - else + } else { UASM_i_LW(p, 1, scratchpad_offset(0), 0); + } } else { uasm_i_nop(p); } @@ -1258,6 +1263,7 @@ build_fast_tlb_refill_handler (u32 **p, struct uasm_label **l, UASM_i_MTC0(p, odd, C0_ENTRYLO1); /* load it */ if (c0_scratch_reg >= 0) { + uasm_i_ehb(p); UASM_i_MFC0(p, scratch, c0_kscratch(), c0_scratch_reg); build_tlb_write_entry(p, l, r, tlb_random); uasm_l_leave(l, *p); @@ -1603,15 +1609,17 @@ static void build_setup_pgd(void) uasm_i_dinsm(&p, a0, 0, 29, 64 - 29); uasm_l_tlbl_goaround1(&l, p); UASM_i_SLL(&p, a0, a0, 11); - uasm_i_jr(&p, 31); UASM_i_MTC0(&p, a0, C0_CONTEXT); + uasm_i_jr(&p, 31); + uasm_i_ehb(&p); } else { /* PGD in c0_KScratch */ - uasm_i_jr(&p, 31); if (cpu_has_ldpte) UASM_i_MTC0(&p, a0, C0_PWBASE); else UASM_i_MTC0(&p, a0, c0_kscratch(), pgd_reg); + uasm_i_jr(&p, 31); + uasm_i_ehb(&p); } #else #ifdef CONFIG_SMP @@ -1625,13 +1633,16 @@ static void build_setup_pgd(void) UASM_i_LA_mostly(&p, a2, pgdc); UASM_i_SW(&p, a0, uasm_rel_lo(pgdc), a2); #endif /* SMP */ - uasm_i_jr(&p, 31); /* if pgd_reg is allocated, save PGD also to scratch register */ - if (pgd_reg != -1) + if (pgd_reg != -1) { UASM_i_MTC0(&p, a0, c0_kscratch(), pgd_reg); - else + uasm_i_jr(&p, 31); + uasm_i_ehb(&p); + } else { + uasm_i_jr(&p, 31); uasm_i_nop(&p); + } #endif if (p >= (u32 *)tlbmiss_handler_setup_pgd_end) panic("tlbmiss_handler_setup_pgd space exceeded"); diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index f241ded9239b..1f0f29a289d3 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c @@ -786,6 +786,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs, /* 32-bit PC relative address */ *loc = val - dot - 8 + addend; break; + case R_PARISC_PCREL64: + /* 64-bit PC relative address */ + *loc64 = val - dot - 8 + addend; + break; case R_PARISC_DIR64: /* 64-bit effective address */ *loc64 = val + addend; diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index 52eafaf74054..31c231ea56b7 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -297,24 +297,24 @@ static __inline__ int atomic_dec_if_positive(atomic_t *v) #define ATOMIC64_INIT(i) { (i) } -static __inline__ long atomic64_read(const atomic64_t *v) +static __inline__ s64 atomic64_read(const atomic64_t *v) { - long t; + s64 t; __asm__ __volatile__("ld%U1%X1 %0,%1" : "=r"(t) : "m"(v->counter)); return t; } -static __inline__ void atomic64_set(atomic64_t *v, long i) +static __inline__ void atomic64_set(atomic64_t *v, s64 i) { __asm__ __volatile__("std%U0%X0 %1,%0" : "=m"(v->counter) : "r"(i)); } #define ATOMIC64_OP(op, asm_op) \ -static __inline__ void atomic64_##op(long a, atomic64_t *v) \ +static __inline__ void atomic64_##op(s64 a, atomic64_t *v) \ { \ - long t; \ + s64 t; \ \ __asm__ __volatile__( \ "1: ldarx %0,0,%3 # atomic64_" #op "\n" \ @@ -327,10 +327,10 @@ static __inline__ void atomic64_##op(long a, atomic64_t *v) \ } #define ATOMIC64_OP_RETURN_RELAXED(op, asm_op) \ -static inline long \ -atomic64_##op##_return_relaxed(long a, atomic64_t *v) \ +static inline s64 \ +atomic64_##op##_return_relaxed(s64 a, atomic64_t *v) \ { \ - long t; \ + s64 t; \ \ __asm__ __volatile__( \ "1: ldarx %0,0,%3 # atomic64_" #op "_return_relaxed\n" \ @@ -345,10 +345,10 @@ atomic64_##op##_return_relaxed(long a, atomic64_t *v) \ } #define ATOMIC64_FETCH_OP_RELAXED(op, asm_op) \ -static inline long \ -atomic64_fetch_##op##_relaxed(long a, atomic64_t *v) \ +static inline s64 \ +atomic64_fetch_##op##_relaxed(s64 a, atomic64_t *v) \ { \ - long res, t; \ + s64 res, t; \ \ __asm__ __volatile__( \ "1: ldarx %0,0,%4 # atomic64_fetch_" #op "_relaxed\n" \ @@ -396,7 +396,7 @@ ATOMIC64_OPS(xor, xor) static __inline__ void atomic64_inc(atomic64_t *v) { - long t; + s64 t; __asm__ __volatile__( "1: ldarx %0,0,%2 # atomic64_inc\n\ @@ -409,9 +409,9 @@ static __inline__ void atomic64_inc(atomic64_t *v) } #define atomic64_inc atomic64_inc -static __inline__ long atomic64_inc_return_relaxed(atomic64_t *v) +static __inline__ s64 atomic64_inc_return_relaxed(atomic64_t *v) { - long t; + s64 t; __asm__ __volatile__( "1: ldarx %0,0,%2 # atomic64_inc_return_relaxed\n" @@ -427,7 +427,7 @@ static __inline__ long atomic64_inc_return_relaxed(atomic64_t *v) static __inline__ void atomic64_dec(atomic64_t *v) { - long t; + s64 t; __asm__ __volatile__( "1: ldarx %0,0,%2 # atomic64_dec\n\ @@ -440,9 +440,9 @@ static __inline__ void atomic64_dec(atomic64_t *v) } #define atomic64_dec atomic64_dec -static __inline__ long atomic64_dec_return_relaxed(atomic64_t *v) +static __inline__ s64 atomic64_dec_return_relaxed(atomic64_t *v) { - long t; + s64 t; __asm__ __volatile__( "1: ldarx %0,0,%2 # atomic64_dec_return_relaxed\n" @@ -463,9 +463,9 @@ static __inline__ long atomic64_dec_return_relaxed(atomic64_t *v) * Atomically test *v and decrement if it is greater than 0. * The function returns the old value of *v minus 1. */ -static __inline__ long atomic64_dec_if_positive(atomic64_t *v) +static __inline__ s64 atomic64_dec_if_positive(atomic64_t *v) { - long t; + s64 t; __asm__ __volatile__( PPC_ATOMIC_ENTRY_BARRIER @@ -502,9 +502,9 @@ static __inline__ long atomic64_dec_if_positive(atomic64_t *v) * Atomically adds @a to @v, so long as it was not @u. * Returns the old value of @v. */ -static __inline__ long atomic64_fetch_add_unless(atomic64_t *v, long a, long u) +static __inline__ s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) { - long t; + s64 t; __asm__ __volatile__ ( PPC_ATOMIC_ENTRY_BARRIER @@ -534,7 +534,7 @@ static __inline__ long atomic64_fetch_add_unless(atomic64_t *v, long a, long u) */ static __inline__ int atomic64_inc_not_zero(atomic64_t *v) { - long t1, t2; + s64 t1, t2; __asm__ __volatile__ ( PPC_ATOMIC_ENTRY_BARRIER diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index ef573fe9873e..a9993e7a443b 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -346,8 +346,6 @@ static inline unsigned long __pack_fe01(unsigned int fpmode) #define spin_cpu_relax() barrier() -#define spin_cpu_yield() spin_cpu_relax() - #define spin_end() HMT_medium() #define spin_until_cond(cond) \ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 6b86055e5251..73ba246ca11d 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -315,7 +315,7 @@ TRAMP_REAL_BEGIN(machine_check_common_early) mfspr r11,SPRN_DSISR /* Save DSISR */ std r11,_DSISR(r1) std r9,_CCR(r1) /* Save CR in stackframe */ - kuap_save_amr_and_lock r9, r10, cr1 + /* We don't touch AMR here, we never go to virtual mode */ /* Save r9 through r13 from EXMC save area to stack frame. */ EXCEPTION_PROLOG_COMMON_2(PACA_EXMC) mfmsr r11 /* get MSR value */ diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 684b0b315c32..8c92febf5f44 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -2521,7 +2521,6 @@ void ptrace_disable(struct task_struct *child) { /* make sure the single step bit is not set. */ user_disable_single_step(child); - clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); } #ifdef CONFIG_PPC_ADV_DEBUG_REGS diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index b824f4c69622..0ab4c72515c4 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -990,8 +990,7 @@ int rtas_ibm_suspend_me(u64 handle) /* Call function on all CPUs. One of us will make the * rtas call */ - if (on_each_cpu(rtas_percpu_suspend_me, &data, 0)) - atomic_set(&data.error, -EINVAL); + on_each_cpu(rtas_percpu_suspend_me, &data, 0); wait_for_completion(&done); diff --git a/arch/powerpc/mm/book3s64/mmu_context.c b/arch/powerpc/mm/book3s64/mmu_context.c index bb70391401f7..794404d50a85 100644 --- a/arch/powerpc/mm/book3s64/mmu_context.c +++ b/arch/powerpc/mm/book3s64/mmu_context.c @@ -50,20 +50,52 @@ EXPORT_SYMBOL_GPL(hash__alloc_context_id); void slb_setup_new_exec(void); +static int realloc_context_ids(mm_context_t *ctx) +{ + int i, id; + + /* + * id 0 (aka. ctx->id) is special, we always allocate a new one, even if + * there wasn't one allocated previously (which happens in the exec + * case where ctx is newly allocated). + * + * We have to be a bit careful here. We must keep the existing ids in + * the array, so that we can test if they're non-zero to decide if we + * need to allocate a new one. However in case of error we must free the + * ids we've allocated but *not* any of the existing ones (or risk a + * UAF). That's why we decrement i at the start of the error handling + * loop, to skip the id that we just tested but couldn't reallocate. + */ + for (i = 0; i < ARRAY_SIZE(ctx->extended_id); i++) { + if (i == 0 || ctx->extended_id[i]) { + id = hash__alloc_context_id(); + if (id < 0) + goto error; + + ctx->extended_id[i] = id; + } + } + + /* The caller expects us to return id */ + return ctx->id; + +error: + for (i--; i >= 0; i--) { + if (ctx->extended_id[i]) + ida_free(&mmu_context_ida, ctx->extended_id[i]); + } + + return id; +} + static int hash__init_new_context(struct mm_struct *mm) { int index; - index = hash__alloc_context_id(); - if (index < 0) - return index; - mm->context.hash_context = kmalloc(sizeof(struct hash_mm_context), GFP_KERNEL); - if (!mm->context.hash_context) { - ida_free(&mmu_context_ida, index); + if (!mm->context.hash_context) return -ENOMEM; - } /* * The old code would re-promote on fork, we don't do that when using @@ -91,13 +123,20 @@ static int hash__init_new_context(struct mm_struct *mm) mm->context.hash_context->spt = kmalloc(sizeof(struct subpage_prot_table), GFP_KERNEL); if (!mm->context.hash_context->spt) { - ida_free(&mmu_context_ida, index); kfree(mm->context.hash_context); return -ENOMEM; } } #endif + } + index = realloc_context_ids(&mm->context); + if (index < 0) { +#ifdef CONFIG_PPC_SUBPAGE_PROT + kfree(mm->context.hash_context->spt); +#endif + kfree(mm->context.hash_context); + return index; } pkey_mm_init(mm); diff --git a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi index 3c06ee4b2b29..40983491b95f 100644 --- a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi +++ b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi @@ -163,6 +163,7 @@ interrupt-parent = <&plic0>; interrupts = <4>; clocks = <&prci PRCI_CLK_TLCLK>; + status = "disabled"; }; uart1: serial@10011000 { compatible = "sifive,fu540-c000-uart", "sifive,uart0"; @@ -170,6 +171,7 @@ interrupt-parent = <&plic0>; interrupts = <5>; clocks = <&prci PRCI_CLK_TLCLK>; + status = "disabled"; }; i2c0: i2c@10030000 { compatible = "sifive,fu540-c000-i2c", "sifive,i2c0"; @@ -181,6 +183,7 @@ reg-io-width = <1>; #address-cells = <1>; #size-cells = <0>; + status = "disabled"; }; qspi0: spi@10040000 { compatible = "sifive,fu540-c000-spi", "sifive,spi0"; @@ -191,6 +194,7 @@ clocks = <&prci PRCI_CLK_TLCLK>; #address-cells = <1>; #size-cells = <0>; + status = "disabled"; }; qspi1: spi@10041000 { compatible = "sifive,fu540-c000-spi", "sifive,spi0"; @@ -201,6 +205,7 @@ clocks = <&prci PRCI_CLK_TLCLK>; #address-cells = <1>; #size-cells = <0>; + status = "disabled"; }; qspi2: spi@10050000 { compatible = "sifive,fu540-c000-spi", "sifive,spi0"; @@ -210,6 +215,7 @@ clocks = <&prci PRCI_CLK_TLCLK>; #address-cells = <1>; #size-cells = <0>; + status = "disabled"; }; }; }; diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts index 4da88707e28f..0b55c53c08c7 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts @@ -42,7 +42,20 @@ }; }; +&uart0 { + status = "okay"; +}; + +&uart1 { + status = "okay"; +}; + +&i2c0 { + status = "okay"; +}; + &qspi0 { + status = "okay"; flash@0 { compatible = "issi,is25wp256", "jedec,spi-nor"; reg = <0>; diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 4f02967e55de..04944fb4fa7a 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -69,6 +69,7 @@ CONFIG_VIRTIO_MMIO=y CONFIG_CLK_SIFIVE=y CONFIG_CLK_SIFIVE_FU540_PRCI=y CONFIG_SIFIVE_PLIC=y +CONFIG_SPI_SIFIVE=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_AUTOFS4_FS=y @@ -84,4 +85,8 @@ CONFIG_ROOT_NFS=y CONFIG_CRYPTO_USER_API_HASH=y CONFIG_CRYPTO_DEV_VIRTIO=y CONFIG_PRINTK_TIME=y +CONFIG_SPI=y +CONFIG_MMC_SPI=y +CONFIG_MMC=y +CONFIG_DEVTMPFS_MOUNT=y # CONFIG_RCU_TRACE is not set diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h index 9038aeb900a6..96f95c9ebd97 100644 --- a/arch/riscv/include/asm/atomic.h +++ b/arch/riscv/include/asm/atomic.h @@ -38,11 +38,11 @@ static __always_inline void atomic_set(atomic_t *v, int i) #ifndef CONFIG_GENERIC_ATOMIC64 #define ATOMIC64_INIT(i) { (i) } -static __always_inline long atomic64_read(const atomic64_t *v) +static __always_inline s64 atomic64_read(const atomic64_t *v) { return READ_ONCE(v->counter); } -static __always_inline void atomic64_set(atomic64_t *v, long i) +static __always_inline void atomic64_set(atomic64_t *v, s64 i) { WRITE_ONCE(v->counter, i); } @@ -66,11 +66,11 @@ void atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \ #ifdef CONFIG_GENERIC_ATOMIC64 #define ATOMIC_OPS(op, asm_op, I) \ - ATOMIC_OP (op, asm_op, I, w, int, ) + ATOMIC_OP (op, asm_op, I, w, int, ) #else #define ATOMIC_OPS(op, asm_op, I) \ - ATOMIC_OP (op, asm_op, I, w, int, ) \ - ATOMIC_OP (op, asm_op, I, d, long, 64) + ATOMIC_OP (op, asm_op, I, w, int, ) \ + ATOMIC_OP (op, asm_op, I, d, s64, 64) #endif ATOMIC_OPS(add, add, i) @@ -127,14 +127,14 @@ c_type atomic##prefix##_##op##_return(c_type i, atomic##prefix##_t *v) \ #ifdef CONFIG_GENERIC_ATOMIC64 #define ATOMIC_OPS(op, asm_op, c_op, I) \ - ATOMIC_FETCH_OP( op, asm_op, I, w, int, ) \ - ATOMIC_OP_RETURN(op, asm_op, c_op, I, w, int, ) + ATOMIC_FETCH_OP( op, asm_op, I, w, int, ) \ + ATOMIC_OP_RETURN(op, asm_op, c_op, I, w, int, ) #else #define ATOMIC_OPS(op, asm_op, c_op, I) \ - ATOMIC_FETCH_OP( op, asm_op, I, w, int, ) \ - ATOMIC_OP_RETURN(op, asm_op, c_op, I, w, int, ) \ - ATOMIC_FETCH_OP( op, asm_op, I, d, long, 64) \ - ATOMIC_OP_RETURN(op, asm_op, c_op, I, d, long, 64) + ATOMIC_FETCH_OP( op, asm_op, I, w, int, ) \ + ATOMIC_OP_RETURN(op, asm_op, c_op, I, w, int, ) \ + ATOMIC_FETCH_OP( op, asm_op, I, d, s64, 64) \ + ATOMIC_OP_RETURN(op, asm_op, c_op, I, d, s64, 64) #endif ATOMIC_OPS(add, add, +, i) @@ -166,11 +166,11 @@ ATOMIC_OPS(sub, add, +, -i) #ifdef CONFIG_GENERIC_ATOMIC64 #define ATOMIC_OPS(op, asm_op, I) \ - ATOMIC_FETCH_OP(op, asm_op, I, w, int, ) + ATOMIC_FETCH_OP(op, asm_op, I, w, int, ) #else #define ATOMIC_OPS(op, asm_op, I) \ - ATOMIC_FETCH_OP(op, asm_op, I, w, int, ) \ - ATOMIC_FETCH_OP(op, asm_op, I, d, long, 64) + ATOMIC_FETCH_OP(op, asm_op, I, w, int, ) \ + ATOMIC_FETCH_OP(op, asm_op, I, d, s64, 64) #endif ATOMIC_OPS(and, and, i) @@ -219,9 +219,10 @@ static __always_inline int atomic_fetch_add_unless(atomic_t *v, int a, int u) #define atomic_fetch_add_unless atomic_fetch_add_unless #ifndef CONFIG_GENERIC_ATOMIC64 -static __always_inline long atomic64_fetch_add_unless(atomic64_t *v, long a, long u) +static __always_inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) { - long prev, rc; + s64 prev; + long rc; __asm__ __volatile__ ( "0: lr.d %[p], %[c]\n" @@ -290,11 +291,11 @@ c_t atomic##prefix##_cmpxchg(atomic##prefix##_t *v, c_t o, c_t n) \ #ifdef CONFIG_GENERIC_ATOMIC64 #define ATOMIC_OPS() \ - ATOMIC_OP( int, , 4) + ATOMIC_OP(int, , 4) #else #define ATOMIC_OPS() \ - ATOMIC_OP( int, , 4) \ - ATOMIC_OP(long, 64, 8) + ATOMIC_OP(int, , 4) \ + ATOMIC_OP(s64, 64, 8) #endif ATOMIC_OPS() @@ -332,9 +333,10 @@ static __always_inline int atomic_sub_if_positive(atomic_t *v, int offset) #define atomic_dec_if_positive(v) atomic_sub_if_positive(v, 1) #ifndef CONFIG_GENERIC_ATOMIC64 -static __always_inline long atomic64_sub_if_positive(atomic64_t *v, int offset) +static __always_inline s64 atomic64_sub_if_positive(atomic64_t *v, s64 offset) { - long prev, rc; + s64 prev; + long rc; __asm__ __volatile__ ( "0: lr.d %[p], %[c]\n" diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 3e2708c626a8..f960c3f4ce47 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -272,9 +272,6 @@ vmalloc_fault: * entries, but in RISC-V, SFENCE.VMA specifies an * ordering constraint, not a cache flush; it is * necessary even after writing invalid entries. - * Relying on flush_tlb_fix_spurious_fault would - * suffice, but the extra traps reduce - * performance. So, eagerly SFENCE.VMA. */ local_flush_tlb_page(addr); diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 109243fdb6ec..fdb4246265a5 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -1,4 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 +config ARCH_HAS_MEM_ENCRYPT + def_bool y + config MMU def_bool y @@ -30,7 +33,7 @@ config GENERIC_BUG_RELATIVE_POINTERS def_bool y config GENERIC_LOCKBREAK - def_bool y if SMP && PREEMPT + def_bool y if PREEMPT config PGSTE def_bool y if KVM @@ -113,7 +116,6 @@ config S390 select DYNAMIC_FTRACE if FUNCTION_TRACER select GENERIC_CLOCKEVENTS select GENERIC_CPU_AUTOPROBE - select GENERIC_CPU_DEVICES if !SMP select GENERIC_CPU_VULNERABILITIES select GENERIC_FIND_FIRST_BIT select GENERIC_SMP_IDLE_THREAD @@ -187,6 +189,8 @@ config S390 select VIRT_CPU_ACCOUNTING select ARCH_HAS_SCALED_CPUTIME select HAVE_NMI + select SWIOTLB + select GENERIC_ALLOCATOR config SCHED_OMIT_FRAME_POINTER @@ -399,27 +403,10 @@ config SYSVIPC_COMPAT config SMP def_bool y - prompt "Symmetric multi-processing support" - ---help--- - This enables support for systems with more than one CPU. If you have - a system with only one CPU, like most personal computers, say N. If - you have a system with more than one CPU, say Y. - - If you say N here, the kernel will run on uni- and multiprocessor - machines, but will use only one CPU of a multiprocessor machine. If - you say Y here, the kernel will run on many, but not all, - uniprocessor machines. On a uniprocessor machine, the kernel - will run faster if you say N here. - - See also the SMP-HOWTO available at - <http://www.tldp.org/docs.html#howto>. - - Even if you don't know what to do here, say Y. config NR_CPUS int "Maximum number of CPUs (2-512)" range 2 512 - depends on SMP default "64" help This allows you to specify the maximum number of CPUs which this @@ -431,12 +418,6 @@ config NR_CPUS config HOTPLUG_CPU def_bool y - prompt "Support for hot-pluggable CPUs" - depends on SMP - help - Say Y here to be able to turn CPUs off and on. CPUs - can be controlled through /sys/devices/system/cpu/cpu#. - Say N if you want to disable CPU hotplug. # Some NUMA nodes have memory ranges that span # other nodes. Even though a pfn is valid and @@ -448,7 +429,7 @@ config NODES_SPAN_OTHER_NODES config NUMA bool "NUMA support" - depends on SMP && SCHED_TOPOLOGY + depends on SCHED_TOPOLOGY default n help Enable NUMA support @@ -523,7 +504,6 @@ config SCHED_DRAWER config SCHED_TOPOLOGY def_bool y prompt "Topology scheduler support" - depends on SMP select SCHED_SMT select SCHED_MC select SCHED_BOOK @@ -763,7 +743,7 @@ config PCI_NR_FUNCTIONS This allows you to specify the maximum number of PCI functions which this kernel will support. -endif # PCI +endif # PCI config HAS_IOMEM def_bool PCI @@ -829,16 +809,15 @@ menu "Dump support" config CRASH_DUMP bool "kernel crash dumps" - depends on SMP select KEXEC help Generate crash dump after being started by kexec. Crash dump kernels are loaded in the main kernel with kexec-tools into a specially reserved region and then later executed after a crash by kdump/kexec. - Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this. + Refer to <file:Documentation/s390/zfcpdump.rst> for more details on this. This option also enables s390 zfcpdump. - See also <file:Documentation/s390/zfcpdump.txt> + See also <file:Documentation/s390/zfcpdump.rst> endmenu diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index b0920b35f87b..a6dc01a22048 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -88,6 +88,7 @@ CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_S390=y CONFIG_CHSC_SCH=y CONFIG_VFIO_AP=m +CONFIG_VFIO_CCW=m CONFIG_CRASH_DUMP=y CONFIG_BINFMT_MISC=m CONFIG_HIBERNATION=y @@ -498,6 +499,7 @@ CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=y CONFIG_S390_AP_IOMMU=y +CONFIG_S390_CCW_IOMMU=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index c59b922cb6c5..e4bc40073003 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -1,21 +1,22 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y -CONFIG_USELIB=y CONFIG_AUDIT=y CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_TASKSTATS=y CONFIG_TASK_DELAY_ACCT=y CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y -# CONFIG_CPU_ISOLATION is not set CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y -CONFIG_CGROUPS=y +CONFIG_NUMA_BALANCING=y +# CONFIG_NUMA_BALANCING_DEFAULT_ENABLED is not set CONFIG_MEMCG=y CONFIG_MEMCG_SWAP=y CONFIG_BLK_CGROUP=y -CONFIG_CGROUP_SCHED=y +CONFIG_CFS_BANDWIDTH=y CONFIG_RT_GROUP_SCHED=y CONFIG_CGROUP_PIDS=y CONFIG_CGROUP_FREEZER=y @@ -26,98 +27,402 @@ CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_PERF=y CONFIG_NAMESPACES=y CONFIG_USER_NS=y -CONFIG_CHECKPOINT_RESTORE=y +CONFIG_SCHED_AUTOGROUP=y CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y # CONFIG_SYSFS_SYSCALL is not set +CONFIG_CHECKPOINT_RESTORE=y CONFIG_BPF_SYSCALL=y CONFIG_USERFAULTFD=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y -CONFIG_LIVEPATCH=y -CONFIG_NR_CPUS=256 -CONFIG_NUMA=y -CONFIG_HZ_100=y -CONFIG_KEXEC_FILE=y -CONFIG_KEXEC_VERIFY_SIG=y -CONFIG_CRASH_DUMP=y -CONFIG_HIBERNATION=y -CONFIG_PM_DEBUG=y -CONFIG_CMM=m -CONFIG_OPROFILE=y +CONFIG_OPROFILE=m CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y -CONFIG_STATIC_KEYS_SELFTEST=y CONFIG_MODULES=y +CONFIG_MODULE_FORCE_LOAD=y CONFIG_MODULE_UNLOAD=y +CONFIG_MODULE_FORCE_UNLOAD=y +CONFIG_MODVERSIONS=y +CONFIG_MODULE_SRCVERSION_ALL=y +CONFIG_MODULE_SIG=y +CONFIG_MODULE_SIG_SHA256=y CONFIG_BLK_DEV_INTEGRITY=y +CONFIG_BLK_DEV_THROTTLING=y +CONFIG_BLK_WBT=y +CONFIG_BLK_WBT_SQ=y CONFIG_PARTITION_ADVANCED=y CONFIG_IBM_PARTITION=y +CONFIG_BSD_DISKLABEL=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_UNIXWARE_DISKLABEL=y +CONFIG_CFQ_GROUP_IOSCHED=y CONFIG_DEFAULT_DEADLINE=y -CONFIG_BINFMT_MISC=m +CONFIG_LIVEPATCH=y +CONFIG_TUNE_ZEC12=y +CONFIG_NR_CPUS=512 +CONFIG_NUMA=y +CONFIG_HZ_100=y +CONFIG_KEXEC_FILE=y +CONFIG_KEXEC_VERIFY_SIG=y +CONFIG_EXPOLINE=y +CONFIG_EXPOLINE_AUTO=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_CLEANCACHE=y CONFIG_FRONTSWAP=y +CONFIG_MEM_SOFT_DIRTY=y CONFIG_ZSWAP=y CONFIG_ZBUD=m CONFIG_ZSMALLOC=m CONFIG_ZSMALLOC_STAT=y +CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_IDLE_PAGE_TRACKING=y +CONFIG_PCI=y +CONFIG_HOTPLUG_PCI=y +CONFIG_HOTPLUG_PCI_S390=y +CONFIG_CHSC_SCH=y +CONFIG_VFIO_AP=m +CONFIG_VFIO_CCW=m +CONFIG_CRASH_DUMP=y +CONFIG_BINFMT_MISC=m +CONFIG_HIBERNATION=y +CONFIG_PM_DEBUG=y CONFIG_NET=y CONFIG_PACKET=y +CONFIG_PACKET_DIAG=m CONFIG_UNIX=y -CONFIG_NET_KEY=y +CONFIG_UNIX_DIAG=m +CONFIG_XFRM_USER=m +CONFIG_NET_KEY=m +CONFIG_SMC=m +CONFIG_SMC_DIAG=m CONFIG_INET=y CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_NET_IPIP=m +CONFIG_NET_IPGRE_DEMUX=m +CONFIG_NET_IPGRE=m +CONFIG_NET_IPGRE_BROADCAST=y +CONFIG_IP_MROUTE=y +CONFIG_IP_MROUTE_MULTIPLE_TABLES=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +CONFIG_SYN_COOKIES=y +CONFIG_NET_IPVTI=m +CONFIG_INET_AH=m +CONFIG_INET_ESP=m +CONFIG_INET_IPCOMP=m +CONFIG_INET_XFRM_MODE_TRANSPORT=m +CONFIG_INET_XFRM_MODE_TUNNEL=m +CONFIG_INET_XFRM_MODE_BEET=m +CONFIG_INET_DIAG=m +CONFIG_INET_UDP_DIAG=m +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_TCP_CONG_HSTCP=m +CONFIG_TCP_CONG_HYBLA=m +CONFIG_TCP_CONG_SCALABLE=m +CONFIG_TCP_CONG_LP=m +CONFIG_TCP_CONG_VENO=m +CONFIG_TCP_CONG_YEAH=m +CONFIG_TCP_CONG_ILLINOIS=m +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_INET6_AH=m +CONFIG_INET6_ESP=m +CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_MIP6=m +CONFIG_INET6_XFRM_MODE_TRANSPORT=m +CONFIG_INET6_XFRM_MODE_TUNNEL=m +CONFIG_INET6_XFRM_MODE_BEET=m +CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m +CONFIG_IPV6_VTI=m +CONFIG_IPV6_SIT=m +CONFIG_IPV6_GRE=m +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IPV6_SUBTREES=y +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=m +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CONNTRACK_TIMEOUT=y +CONFIG_NF_CONNTRACK_TIMESTAMP=y +CONFIG_NF_CONNTRACK_AMANDA=m +CONFIG_NF_CONNTRACK_FTP=m +CONFIG_NF_CONNTRACK_H323=m +CONFIG_NF_CONNTRACK_IRC=m +CONFIG_NF_CONNTRACK_NETBIOS_NS=m +CONFIG_NF_CONNTRACK_SNMP=m +CONFIG_NF_CONNTRACK_PPTP=m +CONFIG_NF_CONNTRACK_SANE=m +CONFIG_NF_CONNTRACK_SIP=m +CONFIG_NF_CONNTRACK_TFTP=m +CONFIG_NF_CT_NETLINK=m +CONFIG_NF_CT_NETLINK_TIMEOUT=m +CONFIG_NF_TABLES=m +CONFIG_NFT_CT=m +CONFIG_NFT_COUNTER=m +CONFIG_NFT_LOG=m +CONFIG_NFT_LIMIT=m +CONFIG_NFT_NAT=m +CONFIG_NFT_COMPAT=m +CONFIG_NFT_HASH=m +CONFIG_NETFILTER_XT_SET=m +CONFIG_NETFILTER_XT_TARGET_AUDIT=m +CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m +CONFIG_NETFILTER_XT_TARGET_CONNMARK=m +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m +CONFIG_NETFILTER_XT_TARGET_CT=m +CONFIG_NETFILTER_XT_TARGET_DSCP=m +CONFIG_NETFILTER_XT_TARGET_HMARK=m +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m +CONFIG_NETFILTER_XT_TARGET_LOG=m +CONFIG_NETFILTER_XT_TARGET_MARK=m +CONFIG_NETFILTER_XT_TARGET_NFLOG=m +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m +CONFIG_NETFILTER_XT_TARGET_TEE=m +CONFIG_NETFILTER_XT_TARGET_TPROXY=m +CONFIG_NETFILTER_XT_TARGET_TRACE=m +CONFIG_NETFILTER_XT_TARGET_SECMARK=m +CONFIG_NETFILTER_XT_TARGET_TCPMSS=m +CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m +CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m +CONFIG_NETFILTER_XT_MATCH_BPF=m +CONFIG_NETFILTER_XT_MATCH_CLUSTER=m +CONFIG_NETFILTER_XT_MATCH_COMMENT=m +CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m +CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m +CONFIG_NETFILTER_XT_MATCH_CONNMARK=m +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m +CONFIG_NETFILTER_XT_MATCH_CPU=m +CONFIG_NETFILTER_XT_MATCH_DCCP=m +CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m +CONFIG_NETFILTER_XT_MATCH_DSCP=m +CONFIG_NETFILTER_XT_MATCH_ESP=m +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m +CONFIG_NETFILTER_XT_MATCH_HELPER=m +CONFIG_NETFILTER_XT_MATCH_IPRANGE=m +CONFIG_NETFILTER_XT_MATCH_IPVS=m +CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NETFILTER_XT_MATCH_LIMIT=m +CONFIG_NETFILTER_XT_MATCH_MAC=m +CONFIG_NETFILTER_XT_MATCH_MARK=m +CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m +CONFIG_NETFILTER_XT_MATCH_NFACCT=m +CONFIG_NETFILTER_XT_MATCH_OSF=m +CONFIG_NETFILTER_XT_MATCH_OWNER=m +CONFIG_NETFILTER_XT_MATCH_POLICY=m +CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m +CONFIG_NETFILTER_XT_MATCH_QUOTA=m +CONFIG_NETFILTER_XT_MATCH_RATEEST=m +CONFIG_NETFILTER_XT_MATCH_REALM=m +CONFIG_NETFILTER_XT_MATCH_RECENT=m +CONFIG_NETFILTER_XT_MATCH_STATE=m +CONFIG_NETFILTER_XT_MATCH_STATISTIC=m +CONFIG_NETFILTER_XT_MATCH_STRING=m +CONFIG_NETFILTER_XT_MATCH_TCPMSS=m +CONFIG_NETFILTER_XT_MATCH_TIME=m +CONFIG_NETFILTER_XT_MATCH_U32=m +CONFIG_IP_SET=m +CONFIG_IP_SET_BITMAP_IP=m +CONFIG_IP_SET_BITMAP_IPMAC=m +CONFIG_IP_SET_BITMAP_PORT=m +CONFIG_IP_SET_HASH_IP=m +CONFIG_IP_SET_HASH_IPPORT=m +CONFIG_IP_SET_HASH_IPPORTIP=m +CONFIG_IP_SET_HASH_IPPORTNET=m +CONFIG_IP_SET_HASH_NETPORTNET=m +CONFIG_IP_SET_HASH_NET=m +CONFIG_IP_SET_HASH_NETNET=m +CONFIG_IP_SET_HASH_NETPORT=m +CONFIG_IP_SET_HASH_NETIFACE=m +CONFIG_IP_SET_LIST_SET=m +CONFIG_IP_VS=m +CONFIG_IP_VS_PROTO_TCP=y +CONFIG_IP_VS_PROTO_UDP=y +CONFIG_IP_VS_PROTO_ESP=y +CONFIG_IP_VS_PROTO_AH=y +CONFIG_IP_VS_RR=m +CONFIG_IP_VS_WRR=m +CONFIG_IP_VS_LC=m +CONFIG_IP_VS_WLC=m +CONFIG_IP_VS_LBLC=m +CONFIG_IP_VS_LBLCR=m +CONFIG_IP_VS_DH=m +CONFIG_IP_VS_SH=m +CONFIG_IP_VS_SED=m +CONFIG_IP_VS_NQ=m +CONFIG_IP_VS_FTP=m +CONFIG_IP_VS_PE_SIP=m +CONFIG_NF_CONNTRACK_IPV4=m +CONFIG_NF_TABLES_IPV4=y +CONFIG_NFT_CHAIN_ROUTE_IPV4=m +CONFIG_NF_TABLES_ARP=y +CONFIG_NFT_CHAIN_NAT_IPV4=m +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP_NF_MATCH_AH=m +CONFIG_IP_NF_MATCH_ECN=m +CONFIG_IP_NF_MATCH_RPFILTER=m +CONFIG_IP_NF_MATCH_TTL=m +CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IP_NF_NAT=m +CONFIG_IP_NF_TARGET_MASQUERADE=m +CONFIG_IP_NF_MANGLE=m +CONFIG_IP_NF_TARGET_CLUSTERIP=m +CONFIG_IP_NF_TARGET_ECN=m +CONFIG_IP_NF_TARGET_TTL=m +CONFIG_IP_NF_RAW=m +CONFIG_IP_NF_SECURITY=m +CONFIG_IP_NF_ARPTABLES=m +CONFIG_IP_NF_ARPFILTER=m +CONFIG_IP_NF_ARP_MANGLE=m +CONFIG_NF_CONNTRACK_IPV6=m +CONFIG_NF_TABLES_IPV6=y +CONFIG_NFT_CHAIN_ROUTE_IPV6=m +CONFIG_NFT_CHAIN_NAT_IPV6=m +CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP6_NF_MATCH_AH=m +CONFIG_IP6_NF_MATCH_EUI64=m +CONFIG_IP6_NF_MATCH_FRAG=m +CONFIG_IP6_NF_MATCH_OPTS=m +CONFIG_IP6_NF_MATCH_HL=m +CONFIG_IP6_NF_MATCH_IPV6HEADER=m +CONFIG_IP6_NF_MATCH_MH=m +CONFIG_IP6_NF_MATCH_RPFILTER=m +CONFIG_IP6_NF_MATCH_RT=m +CONFIG_IP6_NF_TARGET_HL=m +CONFIG_IP6_NF_FILTER=m +CONFIG_IP6_NF_TARGET_REJECT=m +CONFIG_IP6_NF_MANGLE=m +CONFIG_IP6_NF_RAW=m +CONFIG_IP6_NF_SECURITY=m +CONFIG_IP6_NF_NAT=m +CONFIG_IP6_NF_TARGET_MASQUERADE=m +CONFIG_NF_TABLES_BRIDGE=y +CONFIG_RDS=m +CONFIG_RDS_RDMA=m +CONFIG_RDS_TCP=m CONFIG_L2TP=m CONFIG_L2TP_DEBUGFS=m -CONFIG_VLAN_8021Q=y +CONFIG_L2TP_V3=y +CONFIG_L2TP_IP=m +CONFIG_L2TP_ETH=m +CONFIG_BRIDGE=m +CONFIG_VLAN_8021Q=m +CONFIG_VLAN_8021Q_GVRP=y CONFIG_NET_SCHED=y CONFIG_NET_SCH_CBQ=m +CONFIG_NET_SCH_HTB=m +CONFIG_NET_SCH_HFSC=m CONFIG_NET_SCH_PRIO=m +CONFIG_NET_SCH_MULTIQ=m CONFIG_NET_SCH_RED=m +CONFIG_NET_SCH_SFB=m CONFIG_NET_SCH_SFQ=m CONFIG_NET_SCH_TEQL=m CONFIG_NET_SCH_TBF=m CONFIG_NET_SCH_GRED=m CONFIG_NET_SCH_DSMARK=m +CONFIG_NET_SCH_NETEM=m +CONFIG_NET_SCH_DRR=m +CONFIG_NET_SCH_MQPRIO=m +CONFIG_NET_SCH_CHOKE=m +CONFIG_NET_SCH_QFQ=m +CONFIG_NET_SCH_CODEL=m +CONFIG_NET_SCH_FQ_CODEL=m +CONFIG_NET_SCH_INGRESS=m +CONFIG_NET_SCH_PLUG=m +CONFIG_NET_CLS_BASIC=m CONFIG_NET_CLS_TCINDEX=m CONFIG_NET_CLS_ROUTE4=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_U32=m +CONFIG_CLS_U32_PERF=y CONFIG_CLS_U32_MARK=y CONFIG_NET_CLS_RSVP=m CONFIG_NET_CLS_RSVP6=m +CONFIG_NET_CLS_FLOW=m +CONFIG_NET_CLS_CGROUP=y +CONFIG_NET_CLS_BPF=m CONFIG_NET_CLS_ACT=y -CONFIG_NET_ACT_POLICE=y +CONFIG_NET_ACT_POLICE=m +CONFIG_NET_ACT_GACT=m +CONFIG_GACT_PROB=y +CONFIG_NET_ACT_MIRRED=m +CONFIG_NET_ACT_IPT=m +CONFIG_NET_ACT_NAT=m +CONFIG_NET_ACT_PEDIT=m +CONFIG_NET_ACT_SIMP=m +CONFIG_NET_ACT_SKBEDIT=m +CONFIG_NET_ACT_CSUM=m +CONFIG_DNS_RESOLVER=y +CONFIG_OPENVSWITCH=m +CONFIG_VSOCKETS=m +CONFIG_VIRTIO_VSOCKETS=m +CONFIG_NETLINK_DIAG=m +CONFIG_CGROUP_NET_PRIO=y CONFIG_BPF_JIT=y -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_NET_PKTGEN=m CONFIG_DEVTMPFS=y +CONFIG_DMA_CMA=y +CONFIG_CMA_SIZE_MBYTES=0 +CONFIG_CONNECTOR=y +CONFIG_ZRAM=m CONFIG_BLK_DEV_LOOP=m +CONFIG_BLK_DEV_CRYPTOLOOP=m +CONFIG_BLK_DEV_DRBD=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=32768 CONFIG_VIRTIO_BLK=y +CONFIG_BLK_DEV_RBD=m +CONFIG_BLK_DEV_NVME=m +CONFIG_ENCLOSURE_SERVICES=m +CONFIG_GENWQE=m +CONFIG_RAID_ATTRS=m CONFIG_SCSI=y -# CONFIG_SCSI_MQ_DEFAULT is not set CONFIG_BLK_DEV_SD=y -CONFIG_CHR_DEV_ST=y -CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y +CONFIG_CHR_DEV_ST=m +CONFIG_CHR_DEV_OSST=m +CONFIG_BLK_DEV_SR=m CONFIG_CHR_DEV_SG=y +CONFIG_CHR_DEV_SCH=m +CONFIG_SCSI_ENCLOSURE=m CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_LOGGING=y +CONFIG_SCSI_SPI_ATTRS=m CONFIG_SCSI_FC_ATTRS=y +CONFIG_SCSI_SAS_LIBSAS=m +CONFIG_SCSI_SRP_ATTRS=m +CONFIG_ISCSI_TCP=m +CONFIG_SCSI_DEBUG=m CONFIG_ZFCP=y -CONFIG_SCSI_VIRTIO=y +CONFIG_SCSI_VIRTIO=m +CONFIG_SCSI_DH=y +CONFIG_SCSI_DH_RDAC=m +CONFIG_SCSI_DH_HP_SW=m +CONFIG_SCSI_DH_EMC=m +CONFIG_SCSI_DH_ALUA=m +CONFIG_SCSI_OSD_INITIATOR=m +CONFIG_SCSI_OSD_ULD=m CONFIG_MD=y +CONFIG_BLK_DEV_MD=y CONFIG_MD_LINEAR=m CONFIG_MD_MULTIPATH=m -CONFIG_BLK_DEV_DM=y +CONFIG_MD_FAULTY=m +CONFIG_BLK_DEV_DM=m CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m +CONFIG_DM_THIN_PROVISIONING=m CONFIG_DM_MIRROR=m CONFIG_DM_LOG_USERSPACE=m CONFIG_DM_RAID=m @@ -125,71 +430,216 @@ CONFIG_DM_ZERO=m CONFIG_DM_MULTIPATH=m CONFIG_DM_MULTIPATH_QL=m CONFIG_DM_MULTIPATH_ST=m +CONFIG_DM_DELAY=m CONFIG_DM_UEVENT=y +CONFIG_DM_FLAKEY=m CONFIG_DM_VERITY=m CONFIG_DM_SWITCH=m CONFIG_NETDEVICES=y CONFIG_BONDING=m CONFIG_DUMMY=m CONFIG_EQUALIZER=m +CONFIG_IFB=m +CONFIG_MACVLAN=m +CONFIG_MACVTAP=m +CONFIG_VXLAN=m CONFIG_TUN=m -CONFIG_VIRTIO_NET=y -# CONFIG_NET_VENDOR_ALACRITECH is not set -# CONFIG_NET_VENDOR_AURORA is not set -# CONFIG_NET_VENDOR_CORTINA is not set -# CONFIG_NET_VENDOR_SOLARFLARE is not set -# CONFIG_NET_VENDOR_SOCIONEXT is not set -# CONFIG_NET_VENDOR_SYNOPSYS is not set -# CONFIG_INPUT is not set +CONFIG_VETH=m +CONFIG_VIRTIO_NET=m +CONFIG_NLMON=m +# CONFIG_NET_VENDOR_ARC is not set +# CONFIG_NET_VENDOR_CHELSIO is not set +# CONFIG_NET_VENDOR_INTEL is not set +# CONFIG_NET_VENDOR_MARVELL is not set +CONFIG_MLX4_EN=m +CONFIG_MLX5_CORE=m +CONFIG_MLX5_CORE_EN=y +# CONFIG_NET_VENDOR_NATSEMI is not set +CONFIG_PPP=m +CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m +CONFIG_PPP_MPPE=m +CONFIG_PPPOE=m +CONFIG_PPTP=m +CONFIG_PPPOL2TP=m +CONFIG_PPP_ASYNC=m +CONFIG_PPP_SYNC_TTY=m +CONFIG_ISM=m +CONFIG_INPUT_EVDEV=y +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set -# CONFIG_VT is not set -CONFIG_DEVKMEM=y +CONFIG_LEGACY_PTY_COUNT=0 +CONFIG_HW_RANDOM_VIRTIO=m CONFIG_RAW_DRIVER=m -CONFIG_VIRTIO_BALLOON=y +CONFIG_HANGCHECK_TIMER=m +CONFIG_TN3270_FS=y +# CONFIG_HWMON is not set +CONFIG_WATCHDOG=y +CONFIG_WATCHDOG_NOWAYOUT=y +CONFIG_SOFT_WATCHDOG=m +CONFIG_DIAG288_WATCHDOG=m +CONFIG_DRM=y +CONFIG_DRM_VIRTIO_GPU=y +CONFIG_FRAMEBUFFER_CONSOLE=y +# CONFIG_HID is not set +# CONFIG_USB_SUPPORT is not set +CONFIG_INFINIBAND=m +CONFIG_INFINIBAND_USER_ACCESS=m +CONFIG_MLX4_INFINIBAND=m +CONFIG_MLX5_INFINIBAND=m +CONFIG_VFIO=m +CONFIG_VFIO_PCI=m +CONFIG_VFIO_MDEV=m +CONFIG_VFIO_MDEV_DEVICE=m +CONFIG_VIRTIO_PCI=m +CONFIG_VIRTIO_BALLOON=m +CONFIG_VIRTIO_INPUT=y +CONFIG_S390_AP_IOMMU=y +CONFIG_S390_CCW_IOMMU=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y +CONFIG_JBD2_DEBUG=y +CONFIG_JFS_FS=m +CONFIG_JFS_POSIX_ACL=y +CONFIG_JFS_SECURITY=y +CONFIG_JFS_STATISTICS=y CONFIG_XFS_FS=y CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y CONFIG_XFS_RT=y +CONFIG_GFS2_FS=m +CONFIG_GFS2_FS_LOCKING_DLM=y +CONFIG_OCFS2_FS=m CONFIG_BTRFS_FS=y CONFIG_BTRFS_FS_POSIX_ACL=y +CONFIG_NILFS2_FS=m +CONFIG_FS_DAX=y +CONFIG_EXPORTFS_BLOCK_OPS=y +CONFIG_FS_ENCRYPTION=y CONFIG_FANOTIFY=y +CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y +CONFIG_QUOTA_NETLINK_INTERFACE=y +CONFIG_QFMT_V1=m +CONFIG_QFMT_V2=m +CONFIG_AUTOFS4_FS=m CONFIG_FUSE_FS=y +CONFIG_CUSE=m +CONFIG_OVERLAY_FS=m +CONFIG_FSCACHE=m +CONFIG_CACHEFILES=m +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +CONFIG_UDF_FS=m +CONFIG_MSDOS_FS=m +CONFIG_VFAT_FS=m +CONFIG_NTFS_FS=m +CONFIG_NTFS_RW=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_HUGETLBFS=y -# CONFIG_NETWORK_FILESYSTEMS is not set +CONFIG_CONFIGFS_FS=m +CONFIG_ECRYPT_FS=m +CONFIG_CRAMFS=m +CONFIG_SQUASHFS=m +CONFIG_SQUASHFS_XATTR=y +CONFIG_SQUASHFS_LZO=y +CONFIG_SQUASHFS_XZ=y +CONFIG_ROMFS_FS=m +CONFIG_NFS_FS=m +CONFIG_NFS_V3_ACL=y +CONFIG_NFS_V4=m +CONFIG_NFS_SWAP=y +CONFIG_NFSD=m +CONFIG_NFSD_V3_ACL=y +CONFIG_NFSD_V4=y +CONFIG_NFSD_V4_SECURITY_LABEL=y +CONFIG_CIFS=m +CONFIG_CIFS_STATS=y +CONFIG_CIFS_STATS2=y +CONFIG_CIFS_WEAK_PW_HASH=y +CONFIG_CIFS_UPCALL=y +CONFIG_CIFS_XATTR=y +CONFIG_CIFS_POSIX=y +# CONFIG_CIFS_DEBUG is not set +CONFIG_CIFS_DFS_UPCALL=y +CONFIG_NLS_DEFAULT="utf8" +CONFIG_NLS_CODEPAGE_437=m +CONFIG_NLS_CODEPAGE_850=m +CONFIG_NLS_ASCII=m +CONFIG_NLS_ISO8859_1=m +CONFIG_NLS_ISO8859_15=m +CONFIG_NLS_UTF8=m +CONFIG_DLM=m +CONFIG_PRINTK_TIME=y +CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF4=y +CONFIG_GDB_SCRIPTS=y +# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_FRAME_WARN=1024 +CONFIG_UNUSED_SYMBOLS=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_MEMORY_INIT=y +CONFIG_PANIC_ON_OOPS=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_RCU_CPU_STALL_TIMEOUT=60 +CONFIG_LATENCYTOP=y +CONFIG_SCHED_TRACER=y +CONFIG_FTRACE_SYSCALLS=y +CONFIG_STACK_TRACER=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_FUNCTION_PROFILER=y +CONFIG_HIST_TRIGGERS=y +CONFIG_LKDTM=m +CONFIG_PERCPU_TEST=m +CONFIG_ATOMIC64_SELFTEST=y +CONFIG_TEST_BPF=m +CONFIG_BUG_ON_DATA_CORRUPTION=y +CONFIG_S390_PTDUMP=y +CONFIG_PERSISTENT_KEYRINGS=y +CONFIG_BIG_KEYS=y +CONFIG_ENCRYPTED_KEYS=m +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_SELINUX=y +CONFIG_SECURITY_SELINUX_BOOTPARAM=y +CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0 +CONFIG_SECURITY_SELINUX_DISABLE=y +CONFIG_INTEGRITY_SIGNATURE=y +CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y +CONFIG_IMA=y +CONFIG_IMA_DEFAULT_HASH_SHA256=y +CONFIG_IMA_WRITE_POLICY=y +CONFIG_IMA_APPRAISE=y +CONFIG_CRYPTO_FIPS=y +CONFIG_CRYPTO_DH=m +CONFIG_CRYPTO_ECDH=m +CONFIG_CRYPTO_USER=m +# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set +CONFIG_CRYPTO_PCRYPT=m CONFIG_CRYPTO_CRYPTD=m -CONFIG_CRYPTO_AUTHENC=m CONFIG_CRYPTO_TEST=m -CONFIG_CRYPTO_CCM=m -CONFIG_CRYPTO_GCM=m -CONFIG_CRYPTO_CBC=y -CONFIG_CRYPTO_CFB=m -CONFIG_CRYPTO_CTS=m +CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_LRW=m -CONFIG_CRYPTO_OFB=m CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_XTS=m -CONFIG_CRYPTO_CMAC=m +CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_CRC32=m -CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m -CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_SHA512=m +CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m -CONFIG_CRYPTO_ARC4=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAMELLIA=m CONFIG_CRYPTO_CAST5=m @@ -199,16 +649,16 @@ CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SALSA20=m CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m -CONFIG_CRYPTO_SM4=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_DEFLATE=m +CONFIG_CRYPTO_842=m CONFIG_CRYPTO_LZ4=m CONFIG_CRYPTO_LZ4HC=m CONFIG_CRYPTO_ANSI_CPRNG=m CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m +CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_ZCRYPT=m CONFIG_PKEY=m CONFIG_CRYPTO_PAES_S390=m @@ -217,38 +667,14 @@ CONFIG_CRYPTO_SHA256_S390=m CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_AES_S390=m +CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_CRC32_S390=y CONFIG_CRC7=m -# CONFIG_XZ_DEC_X86 is not set -# CONFIG_XZ_DEC_POWERPC is not set -# CONFIG_XZ_DEC_IA64 is not set -# CONFIG_XZ_DEC_ARM is not set -# CONFIG_XZ_DEC_ARMTHUMB is not set -# CONFIG_XZ_DEC_SPARC is not set -CONFIG_DEBUG_INFO=y -CONFIG_DEBUG_INFO_DWARF4=y -CONFIG_GDB_SCRIPTS=y -CONFIG_UNUSED_SYMBOLS=y -CONFIG_DEBUG_SECTION_MISMATCH=y -CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y -CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_PAGEALLOC=y -CONFIG_DETECT_HUNG_TASK=y -CONFIG_PANIC_ON_OOPS=y -CONFIG_PROVE_LOCKING=y -CONFIG_LOCK_STAT=y -CONFIG_DEBUG_LOCKDEP=y -CONFIG_DEBUG_ATOMIC_SLEEP=y -CONFIG_DEBUG_LIST=y -CONFIG_DEBUG_SG=y -CONFIG_DEBUG_NOTIFIERS=y -CONFIG_RCU_CPU_STALL_TIMEOUT=60 -CONFIG_LATENCYTOP=y -CONFIG_SCHED_TRACER=y -CONFIG_FTRACE_SYSCALLS=y -CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y -CONFIG_STACK_TRACER=y -CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_FUNCTION_PROFILER=y -# CONFIG_RUNTIME_TESTING_MENU is not set -CONFIG_S390_PTDUMP=y +CONFIG_CRC8=m +CONFIG_CORDIC=m +CONFIG_CMM=m +CONFIG_APPLDATA_BASE=y +CONFIG_KVM=m +CONFIG_KVM_S390_UCONTROL=y +CONFIG_VHOST_NET=m +CONFIG_VHOST_VSOCK=m diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig deleted file mode 100644 index 09aa5cb14873..000000000000 --- a/arch/s390/configs/performance_defconfig +++ /dev/null @@ -1,678 +0,0 @@ -CONFIG_SYSVIPC=y -CONFIG_POSIX_MQUEUE=y -CONFIG_AUDIT=y -CONFIG_NO_HZ_IDLE=y -CONFIG_HIGH_RES_TIMERS=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_BSD_PROCESS_ACCT_V3=y -CONFIG_TASKSTATS=y -CONFIG_TASK_DELAY_ACCT=y -CONFIG_TASK_XACCT=y -CONFIG_TASK_IO_ACCOUNTING=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_NUMA_BALANCING=y -# CONFIG_NUMA_BALANCING_DEFAULT_ENABLED is not set -CONFIG_MEMCG=y -CONFIG_MEMCG_SWAP=y -CONFIG_BLK_CGROUP=y -CONFIG_CFS_BANDWIDTH=y -CONFIG_RT_GROUP_SCHED=y -CONFIG_CGROUP_PIDS=y -CONFIG_CGROUP_FREEZER=y -CONFIG_CGROUP_HUGETLB=y -CONFIG_CPUSETS=y -CONFIG_CGROUP_DEVICE=y -CONFIG_CGROUP_CPUACCT=y -CONFIG_CGROUP_PERF=y -CONFIG_NAMESPACES=y -CONFIG_USER_NS=y -CONFIG_SCHED_AUTOGROUP=y -CONFIG_BLK_DEV_INITRD=y -CONFIG_EXPERT=y -# CONFIG_SYSFS_SYSCALL is not set -CONFIG_CHECKPOINT_RESTORE=y -CONFIG_BPF_SYSCALL=y -CONFIG_USERFAULTFD=y -# CONFIG_COMPAT_BRK is not set -CONFIG_PROFILING=y -CONFIG_OPROFILE=m -CONFIG_KPROBES=y -CONFIG_JUMP_LABEL=y -CONFIG_MODULES=y -CONFIG_MODULE_FORCE_LOAD=y -CONFIG_MODULE_UNLOAD=y -CONFIG_MODULE_FORCE_UNLOAD=y -CONFIG_MODVERSIONS=y -CONFIG_MODULE_SRCVERSION_ALL=y -CONFIG_MODULE_SIG=y -CONFIG_MODULE_SIG_SHA256=y -CONFIG_BLK_DEV_INTEGRITY=y -CONFIG_BLK_DEV_THROTTLING=y -CONFIG_BLK_WBT=y -CONFIG_BLK_WBT_SQ=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_IBM_PARTITION=y -CONFIG_BSD_DISKLABEL=y -CONFIG_MINIX_SUBPARTITION=y -CONFIG_SOLARIS_X86_PARTITION=y -CONFIG_UNIXWARE_DISKLABEL=y -CONFIG_CFQ_GROUP_IOSCHED=y -CONFIG_DEFAULT_DEADLINE=y -CONFIG_LIVEPATCH=y -CONFIG_TUNE_ZEC12=y -CONFIG_NR_CPUS=512 -CONFIG_NUMA=y -CONFIG_HZ_100=y -CONFIG_KEXEC_FILE=y -CONFIG_KEXEC_VERIFY_SIG=y -CONFIG_EXPOLINE=y -CONFIG_EXPOLINE_AUTO=y -CONFIG_MEMORY_HOTPLUG=y -CONFIG_MEMORY_HOTREMOVE=y -CONFIG_KSM=y -CONFIG_TRANSPARENT_HUGEPAGE=y -CONFIG_CLEANCACHE=y -CONFIG_FRONTSWAP=y -CONFIG_MEM_SOFT_DIRTY=y -CONFIG_ZSWAP=y -CONFIG_ZBUD=m -CONFIG_ZSMALLOC=m -CONFIG_ZSMALLOC_STAT=y -CONFIG_DEFERRED_STRUCT_PAGE_INIT=y -CONFIG_IDLE_PAGE_TRACKING=y -CONFIG_PCI=y -CONFIG_HOTPLUG_PCI=y -CONFIG_HOTPLUG_PCI_S390=y -CONFIG_CHSC_SCH=y -CONFIG_VFIO_AP=m -CONFIG_CRASH_DUMP=y -CONFIG_BINFMT_MISC=m -CONFIG_HIBERNATION=y -CONFIG_PM_DEBUG=y -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_PACKET_DIAG=m -CONFIG_UNIX=y -CONFIG_UNIX_DIAG=m -CONFIG_XFRM_USER=m -CONFIG_NET_KEY=m -CONFIG_SMC=m -CONFIG_SMC_DIAG=m -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -CONFIG_IP_ADVANCED_ROUTER=y -CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_ROUTE_MULTIPATH=y -CONFIG_IP_ROUTE_VERBOSE=y -CONFIG_NET_IPIP=m -CONFIG_NET_IPGRE_DEMUX=m -CONFIG_NET_IPGRE=m -CONFIG_NET_IPGRE_BROADCAST=y -CONFIG_IP_MROUTE=y -CONFIG_IP_MROUTE_MULTIPLE_TABLES=y -CONFIG_IP_PIMSM_V1=y -CONFIG_IP_PIMSM_V2=y -CONFIG_SYN_COOKIES=y -CONFIG_NET_IPVTI=m -CONFIG_INET_AH=m -CONFIG_INET_ESP=m -CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=m -CONFIG_INET_DIAG=m -CONFIG_INET_UDP_DIAG=m -CONFIG_TCP_CONG_ADVANCED=y -CONFIG_TCP_CONG_HSTCP=m -CONFIG_TCP_CONG_HYBLA=m -CONFIG_TCP_CONG_SCALABLE=m -CONFIG_TCP_CONG_LP=m -CONFIG_TCP_CONG_VENO=m -CONFIG_TCP_CONG_YEAH=m -CONFIG_TCP_CONG_ILLINOIS=m -CONFIG_IPV6_ROUTER_PREF=y -CONFIG_INET6_AH=m -CONFIG_INET6_ESP=m -CONFIG_INET6_IPCOMP=m -CONFIG_IPV6_MIP6=m -CONFIG_INET6_XFRM_MODE_TRANSPORT=m -CONFIG_INET6_XFRM_MODE_TUNNEL=m -CONFIG_INET6_XFRM_MODE_BEET=m -CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m -CONFIG_IPV6_VTI=m -CONFIG_IPV6_SIT=m -CONFIG_IPV6_GRE=m -CONFIG_IPV6_MULTIPLE_TABLES=y -CONFIG_IPV6_SUBTREES=y -CONFIG_NETFILTER=y -CONFIG_NF_CONNTRACK=m -CONFIG_NF_CONNTRACK_SECMARK=y -CONFIG_NF_CONNTRACK_EVENTS=y -CONFIG_NF_CONNTRACK_TIMEOUT=y -CONFIG_NF_CONNTRACK_TIMESTAMP=y -CONFIG_NF_CONNTRACK_AMANDA=m -CONFIG_NF_CONNTRACK_FTP=m -CONFIG_NF_CONNTRACK_H323=m -CONFIG_NF_CONNTRACK_IRC=m -CONFIG_NF_CONNTRACK_NETBIOS_NS=m -CONFIG_NF_CONNTRACK_SNMP=m -CONFIG_NF_CONNTRACK_PPTP=m -CONFIG_NF_CONNTRACK_SANE=m -CONFIG_NF_CONNTRACK_SIP=m -CONFIG_NF_CONNTRACK_TFTP=m -CONFIG_NF_CT_NETLINK=m -CONFIG_NF_CT_NETLINK_TIMEOUT=m -CONFIG_NF_TABLES=m -CONFIG_NFT_CT=m -CONFIG_NFT_COUNTER=m -CONFIG_NFT_LOG=m -CONFIG_NFT_LIMIT=m -CONFIG_NFT_NAT=m -CONFIG_NFT_COMPAT=m -CONFIG_NFT_HASH=m -CONFIG_NETFILTER_XT_SET=m -CONFIG_NETFILTER_XT_TARGET_AUDIT=m -CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m -CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m -CONFIG_NETFILTER_XT_TARGET_CONNMARK=m -CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m -CONFIG_NETFILTER_XT_TARGET_CT=m -CONFIG_NETFILTER_XT_TARGET_DSCP=m -CONFIG_NETFILTER_XT_TARGET_HMARK=m -CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m -CONFIG_NETFILTER_XT_TARGET_LOG=m -CONFIG_NETFILTER_XT_TARGET_MARK=m -CONFIG_NETFILTER_XT_TARGET_NFLOG=m -CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_TEE=m -CONFIG_NETFILTER_XT_TARGET_TPROXY=m -CONFIG_NETFILTER_XT_TARGET_TRACE=m -CONFIG_NETFILTER_XT_TARGET_SECMARK=m -CONFIG_NETFILTER_XT_TARGET_TCPMSS=m -CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m -CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m -CONFIG_NETFILTER_XT_MATCH_BPF=m -CONFIG_NETFILTER_XT_MATCH_CLUSTER=m -CONFIG_NETFILTER_XT_MATCH_COMMENT=m -CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m -CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m -CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m -CONFIG_NETFILTER_XT_MATCH_CONNMARK=m -CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m -CONFIG_NETFILTER_XT_MATCH_CPU=m -CONFIG_NETFILTER_XT_MATCH_DCCP=m -CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m -CONFIG_NETFILTER_XT_MATCH_DSCP=m -CONFIG_NETFILTER_XT_MATCH_ESP=m -CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m -CONFIG_NETFILTER_XT_MATCH_HELPER=m -CONFIG_NETFILTER_XT_MATCH_IPRANGE=m -CONFIG_NETFILTER_XT_MATCH_IPVS=m -CONFIG_NETFILTER_XT_MATCH_LENGTH=m -CONFIG_NETFILTER_XT_MATCH_LIMIT=m -CONFIG_NETFILTER_XT_MATCH_MAC=m -CONFIG_NETFILTER_XT_MATCH_MARK=m -CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m -CONFIG_NETFILTER_XT_MATCH_NFACCT=m -CONFIG_NETFILTER_XT_MATCH_OSF=m -CONFIG_NETFILTER_XT_MATCH_OWNER=m -CONFIG_NETFILTER_XT_MATCH_POLICY=m -CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m -CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m -CONFIG_NETFILTER_XT_MATCH_QUOTA=m -CONFIG_NETFILTER_XT_MATCH_RATEEST=m -CONFIG_NETFILTER_XT_MATCH_REALM=m -CONFIG_NETFILTER_XT_MATCH_RECENT=m -CONFIG_NETFILTER_XT_MATCH_STATE=m -CONFIG_NETFILTER_XT_MATCH_STATISTIC=m -CONFIG_NETFILTER_XT_MATCH_STRING=m -CONFIG_NETFILTER_XT_MATCH_TCPMSS=m -CONFIG_NETFILTER_XT_MATCH_TIME=m -CONFIG_NETFILTER_XT_MATCH_U32=m -CONFIG_IP_SET=m -CONFIG_IP_SET_BITMAP_IP=m -CONFIG_IP_SET_BITMAP_IPMAC=m -CONFIG_IP_SET_BITMAP_PORT=m -CONFIG_IP_SET_HASH_IP=m -CONFIG_IP_SET_HASH_IPPORT=m -CONFIG_IP_SET_HASH_IPPORTIP=m -CONFIG_IP_SET_HASH_IPPORTNET=m -CONFIG_IP_SET_HASH_NETPORTNET=m -CONFIG_IP_SET_HASH_NET=m -CONFIG_IP_SET_HASH_NETNET=m -CONFIG_IP_SET_HASH_NETPORT=m -CONFIG_IP_SET_HASH_NETIFACE=m -CONFIG_IP_SET_LIST_SET=m -CONFIG_IP_VS=m -CONFIG_IP_VS_PROTO_TCP=y -CONFIG_IP_VS_PROTO_UDP=y -CONFIG_IP_VS_PROTO_ESP=y -CONFIG_IP_VS_PROTO_AH=y -CONFIG_IP_VS_RR=m -CONFIG_IP_VS_WRR=m -CONFIG_IP_VS_LC=m -CONFIG_IP_VS_WLC=m -CONFIG_IP_VS_LBLC=m -CONFIG_IP_VS_LBLCR=m -CONFIG_IP_VS_DH=m -CONFIG_IP_VS_SH=m -CONFIG_IP_VS_SED=m -CONFIG_IP_VS_NQ=m -CONFIG_IP_VS_FTP=m -CONFIG_IP_VS_PE_SIP=m -CONFIG_NF_CONNTRACK_IPV4=m -CONFIG_NF_TABLES_IPV4=y -CONFIG_NFT_CHAIN_ROUTE_IPV4=m -CONFIG_NF_TABLES_ARP=y -CONFIG_NFT_CHAIN_NAT_IPV4=m -CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_MATCH_AH=m -CONFIG_IP_NF_MATCH_ECN=m -CONFIG_IP_NF_MATCH_RPFILTER=m -CONFIG_IP_NF_MATCH_TTL=m -CONFIG_IP_NF_FILTER=m -CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_NAT=m -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_TARGET_CLUSTERIP=m -CONFIG_IP_NF_TARGET_ECN=m -CONFIG_IP_NF_TARGET_TTL=m -CONFIG_IP_NF_RAW=m -CONFIG_IP_NF_SECURITY=m -CONFIG_IP_NF_ARPTABLES=m -CONFIG_IP_NF_ARPFILTER=m -CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NF_CONNTRACK_IPV6=m -CONFIG_NF_TABLES_IPV6=y -CONFIG_NFT_CHAIN_ROUTE_IPV6=m -CONFIG_NFT_CHAIN_NAT_IPV6=m -CONFIG_IP6_NF_IPTABLES=m -CONFIG_IP6_NF_MATCH_AH=m -CONFIG_IP6_NF_MATCH_EUI64=m -CONFIG_IP6_NF_MATCH_FRAG=m -CONFIG_IP6_NF_MATCH_OPTS=m -CONFIG_IP6_NF_MATCH_HL=m -CONFIG_IP6_NF_MATCH_IPV6HEADER=m -CONFIG_IP6_NF_MATCH_MH=m -CONFIG_IP6_NF_MATCH_RPFILTER=m -CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_TARGET_HL=m -CONFIG_IP6_NF_FILTER=m -CONFIG_IP6_NF_TARGET_REJECT=m -CONFIG_IP6_NF_MANGLE=m -CONFIG_IP6_NF_RAW=m -CONFIG_IP6_NF_SECURITY=m -CONFIG_IP6_NF_NAT=m -CONFIG_IP6_NF_TARGET_MASQUERADE=m -CONFIG_NF_TABLES_BRIDGE=y -CONFIG_RDS=m -CONFIG_RDS_RDMA=m -CONFIG_RDS_TCP=m -CONFIG_L2TP=m -CONFIG_L2TP_DEBUGFS=m -CONFIG_L2TP_V3=y -CONFIG_L2TP_IP=m -CONFIG_L2TP_ETH=m -CONFIG_BRIDGE=m -CONFIG_VLAN_8021Q=m -CONFIG_VLAN_8021Q_GVRP=y -CONFIG_NET_SCHED=y -CONFIG_NET_SCH_CBQ=m -CONFIG_NET_SCH_HTB=m -CONFIG_NET_SCH_HFSC=m -CONFIG_NET_SCH_PRIO=m -CONFIG_NET_SCH_MULTIQ=m -CONFIG_NET_SCH_RED=m -CONFIG_NET_SCH_SFB=m -CONFIG_NET_SCH_SFQ=m -CONFIG_NET_SCH_TEQL=m -CONFIG_NET_SCH_TBF=m -CONFIG_NET_SCH_GRED=m -CONFIG_NET_SCH_DSMARK=m -CONFIG_NET_SCH_NETEM=m -CONFIG_NET_SCH_DRR=m -CONFIG_NET_SCH_MQPRIO=m -CONFIG_NET_SCH_CHOKE=m -CONFIG_NET_SCH_QFQ=m -CONFIG_NET_SCH_CODEL=m -CONFIG_NET_SCH_FQ_CODEL=m -CONFIG_NET_SCH_INGRESS=m -CONFIG_NET_SCH_PLUG=m -CONFIG_NET_CLS_BASIC=m -CONFIG_NET_CLS_TCINDEX=m -CONFIG_NET_CLS_ROUTE4=m -CONFIG_NET_CLS_FW=m -CONFIG_NET_CLS_U32=m -CONFIG_CLS_U32_PERF=y -CONFIG_CLS_U32_MARK=y -CONFIG_NET_CLS_RSVP=m -CONFIG_NET_CLS_RSVP6=m -CONFIG_NET_CLS_FLOW=m -CONFIG_NET_CLS_CGROUP=y -CONFIG_NET_CLS_BPF=m -CONFIG_NET_CLS_ACT=y -CONFIG_NET_ACT_POLICE=m -CONFIG_NET_ACT_GACT=m -CONFIG_GACT_PROB=y -CONFIG_NET_ACT_MIRRED=m -CONFIG_NET_ACT_IPT=m -CONFIG_NET_ACT_NAT=m -CONFIG_NET_ACT_PEDIT=m -CONFIG_NET_ACT_SIMP=m -CONFIG_NET_ACT_SKBEDIT=m -CONFIG_NET_ACT_CSUM=m -CONFIG_DNS_RESOLVER=y -CONFIG_OPENVSWITCH=m -CONFIG_VSOCKETS=m -CONFIG_VIRTIO_VSOCKETS=m -CONFIG_NETLINK_DIAG=m -CONFIG_CGROUP_NET_PRIO=y -CONFIG_BPF_JIT=y -CONFIG_NET_PKTGEN=m -CONFIG_DEVTMPFS=y -CONFIG_DMA_CMA=y -CONFIG_CMA_SIZE_MBYTES=0 -CONFIG_CONNECTOR=y -CONFIG_ZRAM=m -CONFIG_BLK_DEV_LOOP=m -CONFIG_BLK_DEV_CRYPTOLOOP=m -CONFIG_BLK_DEV_DRBD=m -CONFIG_BLK_DEV_NBD=m -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=32768 -CONFIG_VIRTIO_BLK=y -CONFIG_BLK_DEV_RBD=m -CONFIG_BLK_DEV_NVME=m -CONFIG_ENCLOSURE_SERVICES=m -CONFIG_GENWQE=m -CONFIG_RAID_ATTRS=m -CONFIG_SCSI=y -CONFIG_BLK_DEV_SD=y -CONFIG_CHR_DEV_ST=m -CONFIG_CHR_DEV_OSST=m -CONFIG_BLK_DEV_SR=m -CONFIG_CHR_DEV_SG=y -CONFIG_CHR_DEV_SCH=m -CONFIG_SCSI_ENCLOSURE=m -CONFIG_SCSI_CONSTANTS=y -CONFIG_SCSI_LOGGING=y -CONFIG_SCSI_SPI_ATTRS=m -CONFIG_SCSI_FC_ATTRS=y -CONFIG_SCSI_SAS_LIBSAS=m -CONFIG_SCSI_SRP_ATTRS=m -CONFIG_ISCSI_TCP=m -CONFIG_SCSI_DEBUG=m -CONFIG_ZFCP=y -CONFIG_SCSI_VIRTIO=m -CONFIG_SCSI_DH=y -CONFIG_SCSI_DH_RDAC=m -CONFIG_SCSI_DH_HP_SW=m -CONFIG_SCSI_DH_EMC=m -CONFIG_SCSI_DH_ALUA=m -CONFIG_SCSI_OSD_INITIATOR=m -CONFIG_SCSI_OSD_ULD=m -CONFIG_MD=y -CONFIG_BLK_DEV_MD=y -CONFIG_MD_LINEAR=m -CONFIG_MD_MULTIPATH=m -CONFIG_MD_FAULTY=m -CONFIG_BLK_DEV_DM=m -CONFIG_DM_CRYPT=m -CONFIG_DM_SNAPSHOT=m -CONFIG_DM_THIN_PROVISIONING=m -CONFIG_DM_MIRROR=m -CONFIG_DM_LOG_USERSPACE=m -CONFIG_DM_RAID=m -CONFIG_DM_ZERO=m -CONFIG_DM_MULTIPATH=m -CONFIG_DM_MULTIPATH_QL=m -CONFIG_DM_MULTIPATH_ST=m -CONFIG_DM_DELAY=m -CONFIG_DM_UEVENT=y -CONFIG_DM_FLAKEY=m -CONFIG_DM_VERITY=m -CONFIG_DM_SWITCH=m -CONFIG_NETDEVICES=y -CONFIG_BONDING=m -CONFIG_DUMMY=m -CONFIG_EQUALIZER=m -CONFIG_IFB=m -CONFIG_MACVLAN=m -CONFIG_MACVTAP=m -CONFIG_VXLAN=m -CONFIG_TUN=m -CONFIG_VETH=m -CONFIG_VIRTIO_NET=m -CONFIG_NLMON=m -# CONFIG_NET_VENDOR_ARC is not set -# CONFIG_NET_VENDOR_CHELSIO is not set -# CONFIG_NET_VENDOR_INTEL is not set -# CONFIG_NET_VENDOR_MARVELL is not set -CONFIG_MLX4_EN=m -CONFIG_MLX5_CORE=m -CONFIG_MLX5_CORE_EN=y -# CONFIG_NET_VENDOR_NATSEMI is not set -CONFIG_PPP=m -CONFIG_PPP_BSDCOMP=m -CONFIG_PPP_DEFLATE=m -CONFIG_PPP_MPPE=m -CONFIG_PPPOE=m -CONFIG_PPTP=m -CONFIG_PPPOL2TP=m -CONFIG_PPP_ASYNC=m -CONFIG_PPP_SYNC_TTY=m -CONFIG_ISM=m -CONFIG_INPUT_EVDEV=y -# CONFIG_INPUT_KEYBOARD is not set -# CONFIG_INPUT_MOUSE is not set -# CONFIG_SERIO is not set -CONFIG_LEGACY_PTY_COUNT=0 -CONFIG_HW_RANDOM_VIRTIO=m -CONFIG_RAW_DRIVER=m -CONFIG_HANGCHECK_TIMER=m -CONFIG_TN3270_FS=y -# CONFIG_HWMON is not set -CONFIG_WATCHDOG=y -CONFIG_WATCHDOG_NOWAYOUT=y -CONFIG_SOFT_WATCHDOG=m -CONFIG_DIAG288_WATCHDOG=m -CONFIG_DRM=y -CONFIG_DRM_VIRTIO_GPU=y -CONFIG_FRAMEBUFFER_CONSOLE=y -# CONFIG_HID is not set -# CONFIG_USB_SUPPORT is not set -CONFIG_INFINIBAND=m -CONFIG_INFINIBAND_USER_ACCESS=m -CONFIG_MLX4_INFINIBAND=m -CONFIG_MLX5_INFINIBAND=m -CONFIG_VFIO=m -CONFIG_VFIO_PCI=m -CONFIG_VFIO_MDEV=m -CONFIG_VFIO_MDEV_DEVICE=m -CONFIG_VIRTIO_PCI=m -CONFIG_VIRTIO_BALLOON=m -CONFIG_VIRTIO_INPUT=y -CONFIG_S390_AP_IOMMU=y -CONFIG_EXT4_FS=y -CONFIG_EXT4_FS_POSIX_ACL=y -CONFIG_EXT4_FS_SECURITY=y -CONFIG_JBD2_DEBUG=y -CONFIG_JFS_FS=m -CONFIG_JFS_POSIX_ACL=y -CONFIG_JFS_SECURITY=y -CONFIG_JFS_STATISTICS=y -CONFIG_XFS_FS=y -CONFIG_XFS_QUOTA=y -CONFIG_XFS_POSIX_ACL=y -CONFIG_XFS_RT=y -CONFIG_GFS2_FS=m -CONFIG_GFS2_FS_LOCKING_DLM=y -CONFIG_OCFS2_FS=m -CONFIG_BTRFS_FS=y -CONFIG_BTRFS_FS_POSIX_ACL=y -CONFIG_NILFS2_FS=m -CONFIG_FS_DAX=y -CONFIG_EXPORTFS_BLOCK_OPS=y -CONFIG_FS_ENCRYPTION=y -CONFIG_FANOTIFY=y -CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y -CONFIG_QUOTA_NETLINK_INTERFACE=y -CONFIG_QFMT_V1=m -CONFIG_QFMT_V2=m -CONFIG_AUTOFS4_FS=m -CONFIG_FUSE_FS=y -CONFIG_CUSE=m -CONFIG_OVERLAY_FS=m -CONFIG_FSCACHE=m -CONFIG_CACHEFILES=m -CONFIG_ISO9660_FS=y -CONFIG_JOLIET=y -CONFIG_ZISOFS=y -CONFIG_UDF_FS=m -CONFIG_MSDOS_FS=m -CONFIG_VFAT_FS=m -CONFIG_NTFS_FS=m -CONFIG_NTFS_RW=y -CONFIG_PROC_KCORE=y -CONFIG_TMPFS=y -CONFIG_TMPFS_POSIX_ACL=y -CONFIG_HUGETLBFS=y -CONFIG_CONFIGFS_FS=m -CONFIG_ECRYPT_FS=m -CONFIG_CRAMFS=m -CONFIG_SQUASHFS=m -CONFIG_SQUASHFS_XATTR=y -CONFIG_SQUASHFS_LZO=y -CONFIG_SQUASHFS_XZ=y -CONFIG_ROMFS_FS=m -CONFIG_NFS_FS=m -CONFIG_NFS_V3_ACL=y -CONFIG_NFS_V4=m -CONFIG_NFS_SWAP=y -CONFIG_NFSD=m -CONFIG_NFSD_V3_ACL=y -CONFIG_NFSD_V4=y -CONFIG_NFSD_V4_SECURITY_LABEL=y -CONFIG_CIFS=m -CONFIG_CIFS_STATS=y -CONFIG_CIFS_STATS2=y -CONFIG_CIFS_WEAK_PW_HASH=y -CONFIG_CIFS_UPCALL=y -CONFIG_CIFS_XATTR=y -CONFIG_CIFS_POSIX=y -# CONFIG_CIFS_DEBUG is not set -CONFIG_CIFS_DFS_UPCALL=y -CONFIG_NLS_DEFAULT="utf8" -CONFIG_NLS_CODEPAGE_437=m -CONFIG_NLS_CODEPAGE_850=m -CONFIG_NLS_ASCII=m -CONFIG_NLS_ISO8859_1=m -CONFIG_NLS_ISO8859_15=m -CONFIG_NLS_UTF8=m -CONFIG_DLM=m -CONFIG_PRINTK_TIME=y -CONFIG_DEBUG_INFO=y -CONFIG_DEBUG_INFO_DWARF4=y -CONFIG_GDB_SCRIPTS=y -# CONFIG_ENABLE_MUST_CHECK is not set -CONFIG_FRAME_WARN=1024 -CONFIG_UNUSED_SYMBOLS=y -CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_MEMORY_INIT=y -CONFIG_PANIC_ON_OOPS=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_RCU_CPU_STALL_TIMEOUT=60 -CONFIG_LATENCYTOP=y -CONFIG_SCHED_TRACER=y -CONFIG_FTRACE_SYSCALLS=y -CONFIG_STACK_TRACER=y -CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_FUNCTION_PROFILER=y -CONFIG_HIST_TRIGGERS=y -CONFIG_LKDTM=m -CONFIG_PERCPU_TEST=m -CONFIG_ATOMIC64_SELFTEST=y -CONFIG_TEST_BPF=m -CONFIG_BUG_ON_DATA_CORRUPTION=y -CONFIG_S390_PTDUMP=y -CONFIG_PERSISTENT_KEYRINGS=y -CONFIG_BIG_KEYS=y -CONFIG_ENCRYPTED_KEYS=m -CONFIG_SECURITY=y -CONFIG_SECURITY_NETWORK=y -CONFIG_SECURITY_SELINUX=y -CONFIG_SECURITY_SELINUX_BOOTPARAM=y -CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0 -CONFIG_SECURITY_SELINUX_DISABLE=y -CONFIG_INTEGRITY_SIGNATURE=y -CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y -CONFIG_IMA=y -CONFIG_IMA_DEFAULT_HASH_SHA256=y -CONFIG_IMA_WRITE_POLICY=y -CONFIG_IMA_APPRAISE=y -CONFIG_CRYPTO_FIPS=y -CONFIG_CRYPTO_DH=m -CONFIG_CRYPTO_ECDH=m -CONFIG_CRYPTO_USER=m -# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set -CONFIG_CRYPTO_PCRYPT=m -CONFIG_CRYPTO_CRYPTD=m -CONFIG_CRYPTO_TEST=m -CONFIG_CRYPTO_CHACHA20POLY1305=m -CONFIG_CRYPTO_LRW=m -CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_KEYWRAP=m -CONFIG_CRYPTO_XCBC=m -CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_CRC32=m -CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_RMD128=m -CONFIG_CRYPTO_RMD160=m -CONFIG_CRYPTO_RMD256=m -CONFIG_CRYPTO_RMD320=m -CONFIG_CRYPTO_SHA512=m -CONFIG_CRYPTO_SHA3=m -CONFIG_CRYPTO_TGR192=m -CONFIG_CRYPTO_WP512=m -CONFIG_CRYPTO_AES_TI=m -CONFIG_CRYPTO_ANUBIS=m -CONFIG_CRYPTO_BLOWFISH=m -CONFIG_CRYPTO_CAMELLIA=m -CONFIG_CRYPTO_CAST5=m -CONFIG_CRYPTO_CAST6=m -CONFIG_CRYPTO_FCRYPT=m -CONFIG_CRYPTO_KHAZAD=m -CONFIG_CRYPTO_SALSA20=m -CONFIG_CRYPTO_SEED=m -CONFIG_CRYPTO_SERPENT=m -CONFIG_CRYPTO_TEA=m -CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_842=m -CONFIG_CRYPTO_LZ4=m -CONFIG_CRYPTO_LZ4HC=m -CONFIG_CRYPTO_ANSI_CPRNG=m -CONFIG_CRYPTO_USER_API_HASH=m -CONFIG_CRYPTO_USER_API_SKCIPHER=m -CONFIG_CRYPTO_USER_API_RNG=m -CONFIG_CRYPTO_USER_API_AEAD=m -CONFIG_ZCRYPT=m -CONFIG_PKEY=m -CONFIG_CRYPTO_PAES_S390=m -CONFIG_CRYPTO_SHA1_S390=m -CONFIG_CRYPTO_SHA256_S390=m -CONFIG_CRYPTO_SHA512_S390=m -CONFIG_CRYPTO_DES_S390=m -CONFIG_CRYPTO_AES_S390=m -CONFIG_CRYPTO_GHASH_S390=m -CONFIG_CRYPTO_CRC32_S390=y -CONFIG_CRC7=m -CONFIG_CRC8=m -CONFIG_CORDIC=m -CONFIG_CMM=m -CONFIG_APPLDATA_BASE=y -CONFIG_KVM=m -CONFIG_KVM_S390_UCONTROL=y -CONFIG_VHOST_NET=m -CONFIG_VHOST_VSOCK=m diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 7dc7f58c4287..d92bab844b73 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -24,7 +24,6 @@ CONFIG_CRASH_DUMP=y # CONFIG_SECCOMP is not set CONFIG_NET=y # CONFIG_IUCV is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_BLK_DEV_RAM=y # CONFIG_BLK_DEV_XPRAM is not set diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c index 86aed30fad3a..eeeb6a7737a4 100644 --- a/arch/s390/crypto/ghash_s390.c +++ b/arch/s390/crypto/ghash_s390.c @@ -137,7 +137,7 @@ static struct shash_alg ghash_alg = { static int __init ghash_mod_init(void) { if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_GHASH)) - return -EOPNOTSUPP; + return -ENODEV; return crypto_register_shash(&ghash_alg); } diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c index 12cca467af7d..d977643fa627 100644 --- a/arch/s390/crypto/prng.c +++ b/arch/s390/crypto/prng.c @@ -824,7 +824,7 @@ static int __init prng_init(void) /* check if the CPU has a PRNG */ if (!cpacf_query_func(CPACF_KMC, CPACF_KMC_PRNG)) - return -EOPNOTSUPP; + return -ENODEV; /* check if TRNG subfunction is available */ if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG)) @@ -837,7 +837,7 @@ static int __init prng_init(void) if (prng_mode == PRNG_MODE_SHA512) { pr_err("The prng module cannot " "start in SHA-512 mode\n"); - return -EOPNOTSUPP; + return -ENODEV; } prng_mode = PRNG_MODE_TDES; } else diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c index 009572e8276d..7c15542d3685 100644 --- a/arch/s390/crypto/sha1_s390.c +++ b/arch/s390/crypto/sha1_s390.c @@ -86,7 +86,7 @@ static struct shash_alg alg = { static int __init sha1_s390_init(void) { if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_1)) - return -EOPNOTSUPP; + return -ENODEV; return crypto_register_shash(&alg); } diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c index 62833a1d8724..af7505148f80 100644 --- a/arch/s390/crypto/sha256_s390.c +++ b/arch/s390/crypto/sha256_s390.c @@ -117,7 +117,7 @@ static int __init sha256_s390_init(void) int ret; if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_256)) - return -EOPNOTSUPP; + return -ENODEV; ret = crypto_register_shash(&sha256_alg); if (ret < 0) goto out; diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c index be589c340d15..ad29db085a18 100644 --- a/arch/s390/crypto/sha512_s390.c +++ b/arch/s390/crypto/sha512_s390.c @@ -127,7 +127,7 @@ static int __init init(void) int ret; if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_512)) - return -EOPNOTSUPP; + return -ENODEV; if ((ret = crypto_register_shash(&sha512_alg)) < 0) goto out; if ((ret = crypto_register_shash(&sha384_alg)) < 0) diff --git a/arch/s390/include/asm/airq.h b/arch/s390/include/asm/airq.h index c10d2ee2dfda..01936fdfaddb 100644 --- a/arch/s390/include/asm/airq.h +++ b/arch/s390/include/asm/airq.h @@ -11,6 +11,7 @@ #define _ASM_S390_AIRQ_H #include <linux/bit_spinlock.h> +#include <linux/dma-mapping.h> struct airq_struct { struct hlist_node list; /* Handler queueing. */ @@ -29,6 +30,7 @@ void unregister_adapter_interrupt(struct airq_struct *airq); /* Adapter interrupt bit vector */ struct airq_iv { unsigned long *vector; /* Adapter interrupt bit vector */ + dma_addr_t vector_dma; /* Adapter interrupt bit vector dma */ unsigned long *avail; /* Allocation bit mask for the bit vector */ unsigned long *bitlock; /* Lock bit mask for the bit vector */ unsigned long *ptr; /* Pointer associated with each bit */ diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h index fd20ab5d4cf7..491ad53a0d4e 100644 --- a/arch/s390/include/asm/atomic.h +++ b/arch/s390/include/asm/atomic.h @@ -84,9 +84,9 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new) #define ATOMIC64_INIT(i) { (i) } -static inline long atomic64_read(const atomic64_t *v) +static inline s64 atomic64_read(const atomic64_t *v) { - long c; + s64 c; asm volatile( " lg %0,%1\n" @@ -94,49 +94,49 @@ static inline long atomic64_read(const atomic64_t *v) return c; } -static inline void atomic64_set(atomic64_t *v, long i) +static inline void atomic64_set(atomic64_t *v, s64 i) { asm volatile( " stg %1,%0\n" : "=Q" (v->counter) : "d" (i)); } -static inline long atomic64_add_return(long i, atomic64_t *v) +static inline s64 atomic64_add_return(s64 i, atomic64_t *v) { - return __atomic64_add_barrier(i, &v->counter) + i; + return __atomic64_add_barrier(i, (long *)&v->counter) + i; } -static inline long atomic64_fetch_add(long i, atomic64_t *v) +static inline s64 atomic64_fetch_add(s64 i, atomic64_t *v) { - return __atomic64_add_barrier(i, &v->counter); + return __atomic64_add_barrier(i, (long *)&v->counter); } -static inline void atomic64_add(long i, atomic64_t *v) +static inline void atomic64_add(s64 i, atomic64_t *v) { #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES if (__builtin_constant_p(i) && (i > -129) && (i < 128)) { - __atomic64_add_const(i, &v->counter); + __atomic64_add_const(i, (long *)&v->counter); return; } #endif - __atomic64_add(i, &v->counter); + __atomic64_add(i, (long *)&v->counter); } #define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) -static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new) +static inline s64 atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) { - return __atomic64_cmpxchg(&v->counter, old, new); + return __atomic64_cmpxchg((long *)&v->counter, old, new); } #define ATOMIC64_OPS(op) \ -static inline void atomic64_##op(long i, atomic64_t *v) \ +static inline void atomic64_##op(s64 i, atomic64_t *v) \ { \ - __atomic64_##op(i, &v->counter); \ + __atomic64_##op(i, (long *)&v->counter); \ } \ -static inline long atomic64_fetch_##op(long i, atomic64_t *v) \ +static inline long atomic64_fetch_##op(s64 i, atomic64_t *v) \ { \ - return __atomic64_##op##_barrier(i, &v->counter); \ + return __atomic64_##op##_barrier(i, (long *)&v->counter); \ } ATOMIC64_OPS(and) @@ -145,8 +145,8 @@ ATOMIC64_OPS(xor) #undef ATOMIC64_OPS -#define atomic64_sub_return(_i, _v) atomic64_add_return(-(long)(_i), _v) -#define atomic64_fetch_sub(_i, _v) atomic64_fetch_add(-(long)(_i), _v) -#define atomic64_sub(_i, _v) atomic64_add(-(long)(_i), _v) +#define atomic64_sub_return(_i, _v) atomic64_add_return(-(s64)(_i), _v) +#define atomic64_fetch_sub(_i, _v) atomic64_fetch_add(-(s64)(_i), _v) +#define atomic64_sub(_i, _v) atomic64_add(-(s64)(_i), _v) #endif /* __ARCH_S390_ATOMIC__ */ diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h index a29dd430fb40..865ce1cb86d5 100644 --- a/arch/s390/include/asm/ccwdev.h +++ b/arch/s390/include/asm/ccwdev.h @@ -226,6 +226,10 @@ extern int ccw_device_enable_console(struct ccw_device *); extern void ccw_device_wait_idle(struct ccw_device *); extern int ccw_device_force_console(struct ccw_device *); +extern void *ccw_device_dma_zalloc(struct ccw_device *cdev, size_t size); +extern void ccw_device_dma_free(struct ccw_device *cdev, + void *cpu_addr, size_t size); + int ccw_device_siosl(struct ccw_device *); extern void ccw_device_get_schid(struct ccw_device *, struct subchannel_id *); diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h index 1727180e8ca1..b5bfb3123cb1 100644 --- a/arch/s390/include/asm/cio.h +++ b/arch/s390/include/asm/cio.h @@ -7,6 +7,7 @@ #include <linux/spinlock.h> #include <linux/bitops.h> +#include <linux/genalloc.h> #include <asm/types.h> #define LPM_ANYPATH 0xff @@ -264,6 +265,36 @@ struct ciw { #define CIW_TYPE_RNI 0x2 /* read node identifier */ /* + * Node Descriptor as defined in SA22-7204, "Common I/O-Device Commands" + */ + +#define ND_VALIDITY_VALID 0 +#define ND_VALIDITY_OUTDATED 1 +#define ND_VALIDITY_INVALID 2 + +struct node_descriptor { + /* Flags. */ + union { + struct { + u32 validity:3; + u32 reserved:5; + } __packed; + u8 byte0; + } __packed; + + /* Node parameters. */ + u32 params:24; + + /* Node ID. */ + char type[6]; + char model[3]; + char manufacturer[3]; + char plant[2]; + char seq[12]; + u16 tag; +} __packed; + +/* * Flags used as input parameters for do_IO() */ #define DOIO_ALLOW_SUSPEND 0x0001 /* allow for channel prog. suspend */ @@ -328,6 +359,16 @@ static inline u8 pathmask_to_pos(u8 mask) void channel_subsystem_reinit(void); extern void css_schedule_reprobe(void); +extern void *cio_dma_zalloc(size_t size); +extern void cio_dma_free(void *cpu_addr, size_t size); +extern struct device *cio_get_dma_css_dev(void); + +void *cio_gp_dma_zalloc(struct gen_pool *gp_dma, struct device *dma_dev, + size_t size); +void cio_gp_dma_free(struct gen_pool *gp_dma, void *cpu_addr, size_t size); +void cio_gp_dma_destroy(struct gen_pool *gp_dma, struct device *dma_dev); +struct gen_pool *cio_gp_dma_create(struct device *dma_dev, int nr_pages); + /* Function from drivers/s390/cio/chsc.c */ int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta); int chsc_sstpi(void *page, void *result, size_t size); diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h index 3bda757317cf..0cf6b53587db 100644 --- a/arch/s390/include/asm/ctl_reg.h +++ b/arch/s390/include/asm/ctl_reg.h @@ -112,13 +112,8 @@ union ctlreg2 { }; }; -#ifdef CONFIG_SMP -# define ctl_set_bit(cr, bit) smp_ctl_set_bit(cr, bit) -# define ctl_clear_bit(cr, bit) smp_ctl_clear_bit(cr, bit) -#else -# define ctl_set_bit(cr, bit) __ctl_set_bit(cr, bit) -# define ctl_clear_bit(cr, bit) __ctl_clear_bit(cr, bit) -#endif +#define ctl_set_bit(cr, bit) smp_ctl_set_bit(cr, bit) +#define ctl_clear_bit(cr, bit) smp_ctl_clear_bit(cr, bit) #endif /* __ASSEMBLY__ */ #endif /* __ASM_CTL_REG_H */ diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h index c305d39f5016..310134015541 100644 --- a/arch/s390/include/asm/debug.h +++ b/arch/s390/include/asm/debug.h @@ -107,13 +107,37 @@ void debug_unregister(debug_info_t *id); void debug_set_level(debug_info_t *id, int new_level); void debug_set_critical(void); + void debug_stop_all(void); +/** + * debug_level_enabled() - Returns true if debug events for the specified + * level would be logged. Otherwise returns false. + * + * @id: handle for debug log + * @level: debug level + * + * Return: + * - %true if level is less or equal to the current debug level. + */ static inline bool debug_level_enabled(debug_info_t *id, int level) { return level <= id->level; } +/** + * debug_event() - writes binary debug entry to active debug area + * (if level <= actual debug level) + * + * @id: handle for debug log + * @level: debug level + * @data: pointer to data for debug entry + * @length: length of data in bytes + * + * Return: + * - Address of written debug entry + * - %NULL if error + */ static inline debug_entry_t *debug_event(debug_info_t *id, int level, void *data, int length) { @@ -122,6 +146,18 @@ static inline debug_entry_t *debug_event(debug_info_t *id, int level, return debug_event_common(id, level, data, length); } +/** + * debug_int_event() - writes unsigned integer debug entry to active debug area + * (if level <= actual debug level) + * + * @id: handle for debug log + * @level: debug level + * @tag: integer value for debug entry + * + * Return: + * - Address of written debug entry + * - %NULL if error + */ static inline debug_entry_t *debug_int_event(debug_info_t *id, int level, unsigned int tag) { @@ -132,6 +168,18 @@ static inline debug_entry_t *debug_int_event(debug_info_t *id, int level, return debug_event_common(id, level, &t, sizeof(unsigned int)); } +/** + * debug_long_event() - writes unsigned long debug entry to active debug area + * (if level <= actual debug level) + * + * @id: handle for debug log + * @level: debug level + * @tag: long integer value for debug entry + * + * Return: + * - Address of written debug entry + * - %NULL if error + */ static inline debug_entry_t *debug_long_event(debug_info_t *id, int level, unsigned long tag) { @@ -142,6 +190,18 @@ static inline debug_entry_t *debug_long_event(debug_info_t *id, int level, return debug_event_common(id, level, &t, sizeof(unsigned long)); } +/** + * debug_text_event() - writes string debug entry in ascii format to active + * debug area (if level <= actual debug level) + * + * @id: handle for debug log + * @level: debug level + * @txt: string for debug entry + * + * Return: + * - Address of written debug entry + * - %NULL if error + */ static inline debug_entry_t *debug_text_event(debug_info_t *id, int level, const char *txt) { @@ -152,12 +212,28 @@ static inline debug_entry_t *debug_text_event(debug_info_t *id, int level, /* * IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are - * stored in the s390dbf. See Documentation/s390/s390dbf.txt for more details! + * stored in the s390dbf. See Documentation/s390/s390dbf.rst for more details! */ extern debug_entry_t * __debug_sprintf_event(debug_info_t *id, int level, char *string, ...) __attribute__ ((format(printf, 3, 4))); +/** + * debug_sprintf_event() - writes debug entry with format string + * and varargs (longs) to active debug area + * (if level $<=$ actual debug level). + * + * @_id: handle for debug log + * @_level: debug level + * @_fmt: format string for debug entry + * @...: varargs used as in sprintf() + * + * Return: + * - Address of written debug entry + * - %NULL if error + * + * floats and long long datatypes cannot be used as varargs. + */ #define debug_sprintf_event(_id, _level, _fmt, ...) \ ({ \ debug_entry_t *__ret; \ @@ -172,6 +248,20 @@ __debug_sprintf_event(debug_info_t *id, int level, char *string, ...) __ret; \ }) +/** + * debug_exception() - writes binary debug entry to active debug area + * (if level <= actual debug level) + * and switches to next debug area + * + * @id: handle for debug log + * @level: debug level + * @data: pointer to data for debug entry + * @length: length of data in bytes + * + * Return: + * - Address of written debug entry + * - %NULL if error + */ static inline debug_entry_t *debug_exception(debug_info_t *id, int level, void *data, int length) { @@ -180,6 +270,19 @@ static inline debug_entry_t *debug_exception(debug_info_t *id, int level, return debug_exception_common(id, level, data, length); } +/** + * debug_int_exception() - writes unsigned int debug entry to active debug area + * (if level <= actual debug level) + * and switches to next debug area + * + * @id: handle for debug log + * @level: debug level + * @tag: integer value for debug entry + * + * Return: + * - Address of written debug entry + * - %NULL if error + */ static inline debug_entry_t *debug_int_exception(debug_info_t *id, int level, unsigned int tag) { @@ -190,6 +293,19 @@ static inline debug_entry_t *debug_int_exception(debug_info_t *id, int level, return debug_exception_common(id, level, &t, sizeof(unsigned int)); } +/** + * debug_long_exception() - writes long debug entry to active debug area + * (if level <= actual debug level) + * and switches to next debug area + * + * @id: handle for debug log + * @level: debug level + * @tag: long integer value for debug entry + * + * Return: + * - Address of written debug entry + * - %NULL if error + */ static inline debug_entry_t *debug_long_exception (debug_info_t *id, int level, unsigned long tag) { @@ -200,6 +316,20 @@ static inline debug_entry_t *debug_long_exception (debug_info_t *id, int level, return debug_exception_common(id, level, &t, sizeof(unsigned long)); } +/** + * debug_text_exception() - writes string debug entry in ascii format to active + * debug area (if level <= actual debug level) + * and switches to next debug area + * area + * + * @id: handle for debug log + * @level: debug level + * @txt: string for debug entry + * + * Return: + * - Address of written debug entry + * - %NULL if error + */ static inline debug_entry_t *debug_text_exception(debug_info_t *id, int level, const char *txt) { @@ -210,12 +340,30 @@ static inline debug_entry_t *debug_text_exception(debug_info_t *id, int level, /* * IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are - * stored in the s390dbf. See Documentation/s390/s390dbf.txt for more details! + * stored in the s390dbf. See Documentation/s390/s390dbf.rst for more details! */ extern debug_entry_t * __debug_sprintf_exception(debug_info_t *id, int level, char *string, ...) __attribute__ ((format(printf, 3, 4))); + +/** + * debug_sprintf_exception() - writes debug entry with format string and + * varargs (longs) to active debug area + * (if level <= actual debug level) + * and switches to next debug area. + * + * @_id: handle for debug log + * @_level: debug level + * @_fmt: format string for debug entry + * @...: varargs used as in sprintf() + * + * Return: + * - Address of written debug entry + * - %NULL if error + * + * floats and long long datatypes cannot be used as varargs. + */ #define debug_sprintf_exception(_id, _level, _fmt, ...) \ ({ \ debug_entry_t *__ret; \ @@ -231,6 +379,7 @@ __debug_sprintf_exception(debug_info_t *id, int level, char *string, ...) }) int debug_register_view(debug_info_t *id, struct debug_view *view); + int debug_unregister_view(debug_info_t *id, struct debug_view *view); /* diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h index e78cda94456b..68c476b20b57 100644 --- a/arch/s390/include/asm/facility.h +++ b/arch/s390/include/asm/facility.h @@ -59,6 +59,18 @@ static inline int test_facility(unsigned long nr) return __test_facility(nr, &S390_lowcore.stfle_fac_list); } +static inline unsigned long __stfle_asm(u64 *stfle_fac_list, int size) +{ + register unsigned long reg0 asm("0") = size - 1; + + asm volatile( + ".insn s,0xb2b00000,0(%1)" /* stfle */ + : "+d" (reg0) + : "a" (stfle_fac_list) + : "memory", "cc"); + return reg0; +} + /** * stfle - Store facility list extended * @stfle_fac_list: array where facility list can be stored @@ -75,13 +87,8 @@ static inline void __stfle(u64 *stfle_fac_list, int size) memcpy(stfle_fac_list, &S390_lowcore.stfl_fac_list, 4); if (S390_lowcore.stfl_fac_list & 0x01000000) { /* More facility bits available with stfle */ - register unsigned long reg0 asm("0") = size - 1; - - asm volatile(".insn s,0xb2b00000,0(%1)" /* stfle */ - : "+d" (reg0) - : "a" (stfle_fac_list) - : "memory", "cc"); - nr = (reg0 + 1) * 8; /* # bytes stored by stfle */ + nr = __stfle_asm(stfle_fac_list, size); + nr = min_t(unsigned long, (nr + 1) * 8, size * 8); } memset((char *) stfle_fac_list + nr, 0, size * 8 - nr); } diff --git a/arch/s390/include/asm/idals.h b/arch/s390/include/asm/idals.h index 15578fd762f6..6fb7aced104a 100644 --- a/arch/s390/include/asm/idals.h +++ b/arch/s390/include/asm/idals.h @@ -122,8 +122,7 @@ idal_buffer_alloc(size_t size, int page_order) nr_ptrs = (size + IDA_BLOCK_SIZE - 1) >> IDA_SIZE_LOG; nr_chunks = (4096 << page_order) >> IDA_SIZE_LOG; - ib = kmalloc(sizeof(struct idal_buffer) + nr_ptrs*sizeof(void *), - GFP_DMA | GFP_KERNEL); + ib = kmalloc(struct_size(ib, data, nr_ptrs), GFP_DMA | GFP_KERNEL); if (ib == NULL) return ERR_PTR(-ENOMEM); ib->size = size; diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 2b00a3ebee08..4a928e2c667b 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -18,6 +18,7 @@ #include <linux/kvm_host.h> #include <linux/kvm.h> #include <linux/seqlock.h> +#include <linux/module.h> #include <asm/debug.h> #include <asm/cpu.h> #include <asm/fpu/api.h> @@ -720,8 +721,14 @@ struct kvm_s390_cpu_model { unsigned short ibc; }; +struct kvm_s390_module_hook { + int (*hook)(struct kvm_vcpu *vcpu); + struct module *owner; +}; + struct kvm_s390_crypto { struct kvm_s390_crypto_cb *crycb; + struct kvm_s390_module_hook *pqap_hook; __u32 crycbd; __u8 aes_kw; __u8 dea_kw; diff --git a/arch/s390/include/asm/mem_encrypt.h b/arch/s390/include/asm/mem_encrypt.h new file mode 100644 index 000000000000..3eb018508190 --- /dev/null +++ b/arch/s390/include/asm/mem_encrypt.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef S390_MEM_ENCRYPT_H__ +#define S390_MEM_ENCRYPT_H__ + +#ifndef __ASSEMBLY__ + +#define sme_me_mask 0ULL + +static inline bool sme_active(void) { return false; } +extern bool sev_active(void); + +int set_memory_encrypted(unsigned long addr, int numpages); +int set_memory_decrypted(unsigned long addr, int numpages); + +#endif /* __ASSEMBLY__ */ + +#endif /* S390_MEM_ENCRYPT_H__ */ diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 305befd55326..a2399eff84ca 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -194,6 +194,11 @@ int zpci_init_iommu(struct zpci_dev *zdev); void zpci_destroy_iommu(struct zpci_dev *zdev); #ifdef CONFIG_PCI +static inline bool zpci_use_mio(struct zpci_dev *zdev) +{ + return static_branch_likely(&have_mio) && zdev->mio_capable; +} + /* Error handling and recovery */ void zpci_event_error(void *); void zpci_event_availability(void *); diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h index 0095ddb58ff6..50b4ce8cddfd 100644 --- a/arch/s390/include/asm/percpu.h +++ b/arch/s390/include/asm/percpu.h @@ -16,7 +16,7 @@ * per cpu area, use weak definitions to force the compiler to * generate external references. */ -#if defined(CONFIG_SMP) && defined(MODULE) +#if defined(MODULE) #define ARCH_NEEDS_WEAK_PER_CPU #endif diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index b0fcbc37b637..14883b1562e0 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -36,6 +36,7 @@ #ifndef __ASSEMBLY__ +#include <linux/cpumask.h> #include <linux/linkage.h> #include <linux/irqflags.h> #include <asm/cpu.h> @@ -221,12 +222,6 @@ static __no_kasan_or_inline unsigned short stap(void) return cpu_address; } -/* - * Give up the time slice of the virtual PU. - */ -#define cpu_relax_yield cpu_relax_yield -void cpu_relax_yield(void); - #define cpu_relax() barrier() #define ECAG_CACHE_ATTRIBUTE 0 diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index 3907ead27ffa..b157a81fb977 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -9,9 +9,6 @@ #define __ASM_SMP_H #include <asm/sigp.h> - -#ifdef CONFIG_SMP - #include <asm/lowcore.h> #define raw_smp_processor_id() (S390_lowcore.cpu_nr) @@ -40,33 +37,6 @@ extern int smp_cpu_get_polarization(int cpu); extern void smp_fill_possible_mask(void); extern void smp_detect_cpus(void); -#else /* CONFIG_SMP */ - -#define smp_cpu_mtid 0 - -static inline void smp_call_ipl_cpu(void (*func)(void *), void *data) -{ - func(data); -} - -static inline void smp_call_online_cpu(void (*func)(void *), void *data) -{ - func(data); -} - -static inline void smp_emergency_stop(void) -{ -} - -static inline int smp_find_processor_id(u16 address) { return 0; } -static inline int smp_store_status(int cpu) { return 0; } -static inline int smp_vcpu_scheduled(int cpu) { return 1; } -static inline void smp_yield_cpu(int cpu) { } -static inline void smp_fill_possible_mask(void) { } -static inline void smp_detect_cpus(void) { } - -#endif /* CONFIG_SMP */ - static inline void smp_stop_cpu(void) { u16 pcpu = stap(); @@ -83,14 +53,9 @@ static inline int smp_get_base_cpu(int cpu) return cpu - (cpu % (smp_cpu_mtid + 1)); } -#ifdef CONFIG_HOTPLUG_CPU extern int smp_rescan_cpus(void); extern void __noreturn cpu_die(void); extern void __cpu_die(unsigned int cpu); extern int __cpu_disable(void); -#else -static inline int smp_rescan_cpus(void) { return 0; } -static inline void cpu_die(void) { } -#endif #endif /* __ASM_SMP_H */ diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index 0a29588aa00b..c02bff33f6c7 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -20,11 +20,7 @@ extern int spin_retry; -#ifndef CONFIG_SMP -static inline bool arch_vcpu_is_preempted(int cpu) { return false; } -#else bool arch_vcpu_is_preempted(int cpu); -#endif #define vcpu_is_preempted arch_vcpu_is_preempted diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index 8c840f0904f3..82703e03f35d 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -32,7 +32,6 @@ static inline void __tlb_flush_idte(unsigned long asce) : : "a" (opt), "a" (asce) : "cc"); } -#ifdef CONFIG_SMP void smp_ptlb_all(void); /* @@ -83,22 +82,6 @@ static inline void __tlb_flush_kernel(void) else __tlb_flush_global(); } -#else -#define __tlb_flush_global() __tlb_flush_local() - -/* - * Flush TLB entries for a specific ASCE on all CPUs. - */ -static inline void __tlb_flush_mm(struct mm_struct *mm) -{ - __tlb_flush_local(); -} - -static inline void __tlb_flush_kernel(void) -{ - __tlb_flush_local(); -} -#endif static inline void __tlb_flush_mm_lazy(struct mm_struct * mm) { diff --git a/arch/s390/include/asm/unwind.h b/arch/s390/include/asm/unwind.h index 6eb2ef105d87..d827b5b9a32c 100644 --- a/arch/s390/include/asm/unwind.h +++ b/arch/s390/include/asm/unwind.h @@ -79,23 +79,4 @@ static inline void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, void *orc, size_t orc_size) {} -#ifdef CONFIG_KASAN -/* - * This disables KASAN checking when reading a value from another task's stack, - * since the other task could be running on another CPU and could have poisoned - * the stack in the meantime. - */ -#define READ_ONCE_TASK_STACK(task, x) \ -({ \ - unsigned long val; \ - if (task == current) \ - val = READ_ONCE(x); \ - else \ - val = READ_ONCE_NOCHECK(x); \ - val; \ -}) -#else -#define READ_ONCE_TASK_STACK(task, x) READ_ONCE(x) -#endif - #endif /* _ASM_S390_UNWIND_H */ diff --git a/arch/s390/include/uapi/asm/runtime_instr.h b/arch/s390/include/uapi/asm/runtime_instr.h index 45c9ec984e6b..455da46e3193 100644 --- a/arch/s390/include/uapi/asm/runtime_instr.h +++ b/arch/s390/include/uapi/asm/runtime_instr.h @@ -57,7 +57,7 @@ struct runtime_instr_cb { __u64 sf; __u64 rsic; __u64 reserved8; -} __packed __aligned(8); +} __attribute__((__packed__, __aligned__(8))); static inline void load_runtime_instr_cb(struct runtime_instr_cb *cb) { diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index b0478d01a0c5..0f255b54b051 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -53,6 +53,7 @@ obj-y += sysinfo.o lgr.o os_info.o machine_kexec.o pgm_check.o obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o +obj-y += smp.o extra-y += head64.o vmlinux.lds @@ -60,7 +61,6 @@ obj-$(CONFIG_SYSFS) += nospec-sysfs.o CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE) obj-$(CONFIG_MODULES) += module.o -obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_SCHED_TOPOLOGY) += topology.o obj-$(CONFIG_HIBERNATION) += suspend.o swsusp.o obj-$(CONFIG_AUDIT) += audit.o diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 0ebf08c3b35e..6d321f5f101d 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -647,11 +647,23 @@ static int debug_close(struct inode *inode, struct file *file) return 0; /* success */ } -/* - * debug_register_mode: - * - Creates and initializes debug area for the caller - * The mode parameter allows to specify access rights for the s390dbf files - * - Returns handle for debug area +/** + * debug_register_mode() - creates and initializes debug area. + * + * @name: Name of debug log (e.g. used for debugfs entry) + * @pages_per_area: Number of pages, which will be allocated per area + * @nr_areas: Number of debug areas + * @buf_size: Size of data area in each debug entry + * @mode: File mode for debugfs files. E.g. S_IRWXUGO + * @uid: User ID for debugfs files. Currently only 0 is supported. + * @gid: Group ID for debugfs files. Currently only 0 is supported. + * + * Return: + * - Handle for generated debug area + * - %NULL if register failed + * + * Allocates memory for a debug log. + * Must not be called within an interrupt handler. */ debug_info_t *debug_register_mode(const char *name, int pages_per_area, int nr_areas, int buf_size, umode_t mode, @@ -681,10 +693,21 @@ out: } EXPORT_SYMBOL(debug_register_mode); -/* - * debug_register: - * - creates and initializes debug area for the caller - * - returns handle for debug area +/** + * debug_register() - creates and initializes debug area with default file mode. + * + * @name: Name of debug log (e.g. used for debugfs entry) + * @pages_per_area: Number of pages, which will be allocated per area + * @nr_areas: Number of debug areas + * @buf_size: Size of data area in each debug entry + * + * Return: + * - Handle for generated debug area + * - %NULL if register failed + * + * Allocates memory for a debug log. + * The debugfs file mode access permissions are read and write for user. + * Must not be called within an interrupt handler. */ debug_info_t *debug_register(const char *name, int pages_per_area, int nr_areas, int buf_size) @@ -694,9 +717,13 @@ debug_info_t *debug_register(const char *name, int pages_per_area, } EXPORT_SYMBOL(debug_register); -/* - * debug_unregister: - * - give back debug area +/** + * debug_unregister() - give back debug area. + * + * @id: handle for debug log + * + * Return: + * none */ void debug_unregister(debug_info_t *id) { @@ -745,9 +772,14 @@ out: return rc; } -/* - * debug_set_level: - * - set actual debug level +/** + * debug_set_level() - Sets new actual debug level if new_level is valid. + * + * @id: handle for debug log + * @new_level: new debug level + * + * Return: + * none */ void debug_set_level(debug_info_t *id, int new_level) { @@ -873,6 +905,14 @@ static struct ctl_table s390dbf_dir_table[] = { static struct ctl_table_header *s390dbf_sysctl_header; +/** + * debug_stop_all() - stops the debug feature if stopping is allowed. + * + * Return: + * - none + * + * Currently used in case of a kernel oops. + */ void debug_stop_all(void) { if (debug_stoppable) @@ -880,6 +920,17 @@ void debug_stop_all(void) } EXPORT_SYMBOL(debug_stop_all); +/** + * debug_set_critical() - event/exception functions try lock instead of spin. + * + * Return: + * - none + * + * Currently used in case of stopping all CPUs but the current one. + * Once in this state, functions to write a debug entry for an + * event or exception no longer spin on the debug area lock, + * but only try to get it and fail if they do not get the lock. + */ void debug_set_critical(void) { debug_critical = 1; @@ -1036,8 +1087,16 @@ debug_entry_t *__debug_sprintf_exception(debug_info_t *id, int level, char *stri } EXPORT_SYMBOL(__debug_sprintf_exception); -/* - * debug_register_view: +/** + * debug_register_view() - registers new debug view and creates debugfs + * dir entry + * + * @id: handle for debug log + * @view: pointer to debug view struct + * + * Return: + * - 0 : ok + * - < 0: Error */ int debug_register_view(debug_info_t *id, struct debug_view *view) { @@ -1077,8 +1136,16 @@ out: } EXPORT_SYMBOL(debug_register_view); -/* - * debug_unregister_view: +/** + * debug_unregister_view() - unregisters debug view and removes debugfs + * dir entry + * + * @id: handle for debug log + * @view: pointer to debug view struct + * + * Return: + * - 0 : ok + * - < 0: Error */ int debug_unregister_view(debug_info_t *id, struct debug_view *view) { diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index b2c68fbf2634..7abe6ae261b4 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -242,6 +242,7 @@ static const unsigned char formats[][6] = { [INSTR_RRF_U0FF] = { F_24, U4_16, F_28, 0, 0, 0 }, [INSTR_RRF_U0RF] = { R_24, U4_16, F_28, 0, 0, 0 }, [INSTR_RRF_U0RR] = { R_24, R_28, U4_16, 0, 0, 0 }, + [INSTR_RRF_URR] = { R_24, R_28, U8_16, 0, 0, 0 }, [INSTR_RRF_UUFF] = { F_24, U4_16, F_28, U4_20, 0, 0 }, [INSTR_RRF_UUFR] = { F_24, U4_16, R_28, U4_20, 0, 0 }, [INSTR_RRF_UURF] = { R_24, U4_16, F_28, U4_20, 0, 0 }, @@ -306,7 +307,7 @@ static const unsigned char formats[][6] = { [INSTR_VRI_VVV0UU2] = { V_8, V_12, V_16, U8_28, U4_24, 0 }, [INSTR_VRR_0V] = { V_12, 0, 0, 0, 0, 0 }, [INSTR_VRR_0VV0U] = { V_12, V_16, U4_24, 0, 0, 0 }, - [INSTR_VRR_RV0U] = { R_8, V_12, U4_24, 0, 0, 0 }, + [INSTR_VRR_RV0UU] = { R_8, V_12, U4_24, U4_28, 0, 0 }, [INSTR_VRR_VRR] = { V_8, R_12, R_16, 0, 0, 0 }, [INSTR_VRR_VV] = { V_8, V_12, 0, 0, 0, 0 }, [INSTR_VRR_VV0U] = { V_8, V_12, U4_32, 0, 0, 0 }, @@ -326,10 +327,8 @@ static const unsigned char formats[][6] = { [INSTR_VRS_RVRDU] = { R_8, V_12, D_20, B_16, U4_32, 0 }, [INSTR_VRS_VRRD] = { V_8, R_12, D_20, B_16, 0, 0 }, [INSTR_VRS_VRRDU] = { V_8, R_12, D_20, B_16, U4_32, 0 }, - [INSTR_VRS_VVRD] = { V_8, V_12, D_20, B_16, 0, 0 }, [INSTR_VRS_VVRDU] = { V_8, V_12, D_20, B_16, U4_32, 0 }, [INSTR_VRV_VVXRDU] = { V_8, D_20, VX_12, B_16, U4_32, 0 }, - [INSTR_VRX_VRRD] = { V_8, D_20, X_12, B_16, 0, 0 }, [INSTR_VRX_VRRDU] = { V_8, D_20, X_12, B_16, U4_32, 0 }, [INSTR_VRX_VV] = { V_8, V_12, 0, 0, 0, 0 }, [INSTR_VSI_URDV] = { V_32, D_20, B_16, U8_8, 0, 0 }, diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 9e87b68be21c..ac06c3949ab3 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -199,9 +199,7 @@ void die(struct pt_regs *regs, const char *str) #ifdef CONFIG_PREEMPT pr_cont("PREEMPT "); #endif -#ifdef CONFIG_SMP pr_cont("SMP "); -#endif if (debug_pagealloc_enabled()) pr_cont("DEBUG_PAGEALLOC"); pr_cont("\n"); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 3f4d272577d3..270d1d145761 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -986,14 +986,12 @@ ENTRY(psw_idle) stg %r3,__SF_EMPTY(%r15) larl %r1,.Lpsw_idle_lpsw+4 stg %r1,__SF_EMPTY+8(%r15) -#ifdef CONFIG_SMP larl %r1,smp_cpu_mtid llgf %r1,0(%r1) ltgr %r1,%r1 jz .Lpsw_idle_stcctm .insn rsy,0xeb0000000017,%r1,5,__SF_EMPTY+16(%r15) .Lpsw_idle_stcctm: -#endif oi __LC_CPU_FLAGS+7,_CIF_ENABLED_WAIT BPON STCK __CLOCK_IDLE_ENTER(%r2) @@ -1468,7 +1466,6 @@ ENDPROC(cleanup_critical) mvc __CLOCK_IDLE_ENTER(8,%r2),__CLOCK_IDLE_EXIT(%r2) mvc __TIMER_IDLE_ENTER(8,%r2),__TIMER_IDLE_EXIT(%r2) 1: # calculate idle cycles -#ifdef CONFIG_SMP clg %r9,BASED(.Lcleanup_idle_insn) jl 3f larl %r1,smp_cpu_mtid @@ -1486,7 +1483,6 @@ ENDPROC(cleanup_critical) la %r3,8(%r3) la %r4,8(%r4) brct %r1,2b -#endif 3: # account system time going idle lg %r9,__LC_STEAL_TIMER alg %r9,__CLOCK_IDLE_ENTER(%r2) diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c index 3f10b56bd5a3..ab584e8e3527 100644 --- a/arch/s390/kernel/jump_label.c +++ b/arch/s390/kernel/jump_label.c @@ -15,16 +15,11 @@ struct insn { s32 offset; } __packed; -struct insn_args { - struct jump_entry *entry; - enum jump_label_type type; -}; - static void jump_label_make_nop(struct jump_entry *entry, struct insn *insn) { - /* brcl 0,0 */ + /* brcl 0,offset */ insn->opcode = 0xc004; - insn->offset = 0; + insn->offset = (jump_entry_target(entry) - jump_entry_code(entry)) >> 1; } static void jump_label_make_branch(struct jump_entry *entry, struct insn *insn) @@ -77,23 +72,15 @@ static void __jump_label_transform(struct jump_entry *entry, s390_kernel_write(code, &new, sizeof(new)); } -static int __sm_arch_jump_label_transform(void *data) +static void __jump_label_sync(void *dummy) { - struct insn_args *args = data; - - __jump_label_transform(args->entry, args->type, 0); - return 0; } void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type) { - struct insn_args args; - - args.entry = entry; - args.type = type; - - stop_machine_cpuslocked(__sm_arch_jump_label_transform, &args, NULL); + __jump_label_transform(entry, type, 0); + smp_call_function(__jump_label_sync, NULL, 1); } void arch_jump_label_transform_static(struct jump_entry *entry, diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 8a1ae140c5e2..444a19125a81 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -141,7 +141,6 @@ static noinline void __machine_kdump(void *image) */ store_status(__do_machine_kdump, image); } -#endif static unsigned long do_start_kdump(unsigned long addr) { @@ -155,6 +154,8 @@ static unsigned long do_start_kdump(unsigned long addr) return rc; } +#endif /* CONFIG_CRASH_DUMP */ + /* * Check if kdump checksums are valid: We call purgatory with parameter "0" */ diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 5de13307b703..6ebc2117c66c 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -7,6 +7,7 @@ #define KMSG_COMPONENT "cpu" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#include <linux/stop_machine.h> #include <linux/cpufeature.h> #include <linux/bitops.h> #include <linux/kernel.h> @@ -31,6 +32,7 @@ struct cpu_info { }; static DEFINE_PER_CPU(struct cpu_info, cpu_info); +static DEFINE_PER_CPU(int, cpu_relax_retry); static bool machine_has_cpu_mhz; @@ -58,15 +60,20 @@ void s390_update_cpu_mhz(void) on_each_cpu(update_cpu_mhz, NULL, 0); } -void notrace cpu_relax_yield(void) +void notrace stop_machine_yield(const struct cpumask *cpumask) { - if (!smp_cpu_mtid && MACHINE_HAS_DIAG44) { - diag_stat_inc(DIAG_STAT_X044); - asm volatile("diag 0,0,0x44"); + int cpu, this_cpu; + + this_cpu = smp_processor_id(); + if (__this_cpu_inc_return(cpu_relax_retry) >= spin_retry) { + __this_cpu_write(cpu_relax_retry, 0); + cpu = cpumask_next_wrap(this_cpu, cpumask, this_cpu, false); + if (cpu >= nr_cpu_ids) + return; + if (arch_vcpu_is_preempted(cpu)) + smp_yield_cpu(cpu); } - barrier(); } -EXPORT_SYMBOL(cpu_relax_yield); /* * cpu_init - initializes state that is per-CPU. diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index f8544d517430..2b94b0ad3588 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -461,11 +461,9 @@ static void __init setup_lowcore_dat_off(void) mem_assign_absolute(S390_lowcore.restart_source, lc->restart_source); mem_assign_absolute(S390_lowcore.restart_psw, lc->restart_psw); -#ifdef CONFIG_SMP lc->spinlock_lockval = arch_spin_lockval(0); lc->spinlock_index = 0; arch_spin_lock_setup(0); -#endif lc->br_r1_trampoline = 0x07f1; /* br %r1 */ set_prefix((u32)(unsigned long) lc); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 35fafa2b91a8..44974654cbd0 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -232,8 +232,6 @@ out: return -ENOMEM; } -#ifdef CONFIG_HOTPLUG_CPU - static void pcpu_free_lowcore(struct pcpu *pcpu) { unsigned long async_stack, nodat_stack, lowcore; @@ -253,8 +251,6 @@ static void pcpu_free_lowcore(struct pcpu *pcpu) free_pages(lowcore, LC_ORDER); } -#endif /* CONFIG_HOTPLUG_CPU */ - static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) { struct lowcore *lc = pcpu->lowcore; @@ -418,7 +414,7 @@ void smp_yield_cpu(int cpu) diag_stat_inc_norecursion(DIAG_STAT_X09C); asm volatile("diag %0,0,0x9c" : : "d" (pcpu_devices[cpu].address)); - } else if (MACHINE_HAS_DIAG44) { + } else if (MACHINE_HAS_DIAG44 && !smp_cpu_mtid) { diag_stat_inc_norecursion(DIAG_STAT_X044); asm volatile("diag 0,0,0x44"); } @@ -895,8 +891,6 @@ static int __init _setup_possible_cpus(char *s) } early_param("possible_cpus", _setup_possible_cpus); -#ifdef CONFIG_HOTPLUG_CPU - int __cpu_disable(void) { unsigned long cregs[16]; @@ -937,8 +931,6 @@ void __noreturn cpu_die(void) for (;;) ; } -#endif /* CONFIG_HOTPLUG_CPU */ - void __init smp_fill_possible_mask(void) { unsigned int possible, sclp_max, cpu; @@ -996,7 +988,6 @@ int setup_profiling_timer(unsigned int multiplier) return 0; } -#ifdef CONFIG_HOTPLUG_CPU static ssize_t cpu_configure_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1073,7 +1064,6 @@ out: return rc ? rc : count; } static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store); -#endif /* CONFIG_HOTPLUG_CPU */ static ssize_t show_cpu_address(struct device *dev, struct device_attribute *attr, char *buf) @@ -1083,9 +1073,7 @@ static ssize_t show_cpu_address(struct device *dev, static DEVICE_ATTR(address, 0444, show_cpu_address, NULL); static struct attribute *cpu_common_attrs[] = { -#ifdef CONFIG_HOTPLUG_CPU &dev_attr_configure.attr, -#endif &dev_attr_address.attr, NULL, }; @@ -1144,15 +1132,11 @@ static int smp_add_present_cpu(int cpu) out_topology: sysfs_remove_group(&s->kobj, &cpu_common_attr_group); out_cpu: -#ifdef CONFIG_HOTPLUG_CPU unregister_cpu(c); -#endif out: return rc; } -#ifdef CONFIG_HOTPLUG_CPU - int __ref smp_rescan_cpus(void) { struct sclp_core_info *info; @@ -1188,17 +1172,14 @@ static ssize_t __ref rescan_store(struct device *dev, return rc ? rc : count; } static DEVICE_ATTR_WO(rescan); -#endif /* CONFIG_HOTPLUG_CPU */ static int __init s390_smp_init(void) { int cpu, rc = 0; -#ifdef CONFIG_HOTPLUG_CPU rc = device_create_file(cpu_subsys.dev_root, &dev_attr_rescan); if (rc) return rc; -#endif for_each_present_cpu(cpu) { rc = smp_add_present_cpu(cpu); if (rc) diff --git a/arch/s390/kernel/swsusp.S b/arch/s390/kernel/swsusp.S index 19a3c427801a..a7baf0b5f818 100644 --- a/arch/s390/kernel/swsusp.S +++ b/arch/s390/kernel/swsusp.S @@ -162,7 +162,6 @@ ENTRY(swsusp_arch_resume) larl %r1,__swsusp_reset_dma lg %r1,0(%r1) BASR_EX %r14,%r1 -#ifdef CONFIG_SMP larl %r1,smp_cpu_mt_shift icm %r1,15,0(%r1) jz smt_done @@ -172,7 +171,6 @@ smt_loop: brc 8,smt_done /* accepted */ brc 2,smt_loop /* busy, try again */ smt_done: -#endif larl %r1,.Lnew_pgm_check_psw lpswe 0(%r1) pgm_check_entry: diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 82e81a9f7112..4736b6ec0ad2 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -229,17 +229,11 @@ void vector_exception(struct pt_regs *regs) void data_exception(struct pt_regs *regs) { - int signal = 0; - save_fpu_regs(); if (current->thread.fpu.fpc & FPC_DXC_MASK) - signal = SIGFPE; - else - signal = SIGILL; - if (signal == SIGFPE) do_fp_trap(regs, current->thread.fpu.fpc); - else if (signal) - do_trap(regs, signal, ILL_ILLOPN, "data exception"); + else + do_trap(regs, SIGILL, ILL_ILLOPN, "data exception"); } void space_switch_exception(struct pt_regs *regs) diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c index 57fd4e902f1f..3ce8a0808059 100644 --- a/arch/s390/kernel/unwind_bc.c +++ b/arch/s390/kernel/unwind_bc.c @@ -46,18 +46,18 @@ bool unwind_next_frame(struct unwind_state *state) regs = state->regs; if (unlikely(regs)) { - sp = READ_ONCE_TASK_STACK(state->task, regs->gprs[15]); + sp = READ_ONCE_NOCHECK(regs->gprs[15]); if (unlikely(outside_of_stack(state, sp))) { if (!update_stack_info(state, sp)) goto out_err; } sf = (struct stack_frame *) sp; - ip = READ_ONCE_TASK_STACK(state->task, sf->gprs[8]); + ip = READ_ONCE_NOCHECK(sf->gprs[8]); reliable = false; regs = NULL; } else { sf = (struct stack_frame *) state->sp; - sp = READ_ONCE_TASK_STACK(state->task, sf->back_chain); + sp = READ_ONCE_NOCHECK(sf->back_chain); if (likely(sp)) { /* Non-zero back-chain points to the previous frame */ if (unlikely(outside_of_stack(state, sp))) { @@ -65,7 +65,7 @@ bool unwind_next_frame(struct unwind_state *state) goto out_err; } sf = (struct stack_frame *) sp; - ip = READ_ONCE_TASK_STACK(state->task, sf->gprs[8]); + ip = READ_ONCE_NOCHECK(sf->gprs[8]); reliable = true; } else { /* No back-chain, look for a pt_regs structure */ @@ -73,9 +73,9 @@ bool unwind_next_frame(struct unwind_state *state) if (!on_stack(info, sp, sizeof(struct pt_regs))) goto out_stop; regs = (struct pt_regs *) sp; - if (user_mode(regs)) + if (READ_ONCE_NOCHECK(regs->psw.mask) & PSW_MASK_PSTATE) goto out_stop; - ip = READ_ONCE_TASK_STACK(state->task, regs->psw.addr); + ip = READ_ONCE_NOCHECK(regs->psw.addr); reliable = true; } } @@ -132,11 +132,11 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, /* Get the instruction pointer from pt_regs or the stack frame */ if (regs) { - ip = READ_ONCE_TASK_STACK(state->task, regs->psw.addr); + ip = READ_ONCE_NOCHECK(regs->psw.addr); reliable = true; } else { sf = (struct stack_frame *) sp; - ip = READ_ONCE_TASK_STACK(state->task, sf->gprs[8]); + ip = READ_ONCE_NOCHECK(sf->gprs[8]); reliable = false; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 28ebd647784c..1c4113f0f2a8 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2461,6 +2461,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) set_kvm_facility(kvm->arch.model.fac_list, 147); } + if (css_general_characteristics.aiv && test_facility(65)) + set_kvm_facility(kvm->arch.model.fac_mask, 65); + kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid(); kvm->arch.model.ibc = sclp.ibc & 0x0fff; diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 8679bd74d337..ed52ffa8d5d4 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -27,6 +27,7 @@ #include <asm/io.h> #include <asm/ptrace.h> #include <asm/sclp.h> +#include <asm/ap.h> #include "gaccess.h" #include "kvm-s390.h" #include "trace.h" @@ -592,6 +593,89 @@ static int handle_io_inst(struct kvm_vcpu *vcpu) } } +/* + * handle_pqap: Handling pqap interception + * @vcpu: the vcpu having issue the pqap instruction + * + * We now support PQAP/AQIC instructions and we need to correctly + * answer the guest even if no dedicated driver's hook is available. + * + * The intercepting code calls a dedicated callback for this instruction + * if a driver did register one in the CRYPTO satellite of the + * SIE block. + * + * If no callback is available, the queues are not available, return this + * response code to the caller and set CC to 3. + * Else return the response code returned by the callback. + */ +static int handle_pqap(struct kvm_vcpu *vcpu) +{ + struct ap_queue_status status = {}; + unsigned long reg0; + int ret; + uint8_t fc; + + /* Verify that the AP instruction are available */ + if (!ap_instructions_available()) + return -EOPNOTSUPP; + /* Verify that the guest is allowed to use AP instructions */ + if (!(vcpu->arch.sie_block->eca & ECA_APIE)) + return -EOPNOTSUPP; + /* + * The only possibly intercepted functions when AP instructions are + * available for the guest are AQIC and TAPQ with the t bit set + * since we do not set IC.3 (FIII) we currently will only intercept + * the AQIC function code. + */ + reg0 = vcpu->run->s.regs.gprs[0]; + fc = (reg0 >> 24) & 0xff; + if (WARN_ON_ONCE(fc != 0x03)) + return -EOPNOTSUPP; + + /* PQAP instruction is allowed for guest kernel only */ + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + /* Common PQAP instruction specification exceptions */ + /* bits 41-47 must all be zeros */ + if (reg0 & 0x007f0000UL) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + /* APFT not install and T bit set */ + if (!test_kvm_facility(vcpu->kvm, 15) && (reg0 & 0x00800000UL)) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + /* APXA not installed and APID greater 64 or APQI greater 16 */ + if (!(vcpu->kvm->arch.crypto.crycbd & 0x02) && (reg0 & 0x0000c0f0UL)) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + /* AQIC function code specific exception */ + /* facility 65 not present for AQIC function code */ + if (!test_kvm_facility(vcpu->kvm, 65)) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + /* + * Verify that the hook callback is registered, lock the owner + * and call the hook. + */ + if (vcpu->kvm->arch.crypto.pqap_hook) { + if (!try_module_get(vcpu->kvm->arch.crypto.pqap_hook->owner)) + return -EOPNOTSUPP; + ret = vcpu->kvm->arch.crypto.pqap_hook->hook(vcpu); + module_put(vcpu->kvm->arch.crypto.pqap_hook->owner); + if (!ret && vcpu->run->s.regs.gprs[1] & 0x00ff0000) + kvm_s390_set_psw_cc(vcpu, 3); + return ret; + } + /* + * A vfio_driver must register a hook. + * No hook means no driver to enable the SIE CRYCB and no queues. + * We send this response to the guest. + */ + status.response_code = 0x01; + memcpy(&vcpu->run->s.regs.gprs[1], &status, sizeof(status)); + kvm_s390_set_psw_cc(vcpu, 3); + return 0; +} + static int handle_stfl(struct kvm_vcpu *vcpu) { int rc; @@ -878,6 +962,8 @@ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu) return handle_sthyi(vcpu); case 0x7d: return handle_stsi(vcpu); + case 0xaf: + return handle_pqap(vcpu); case 0xb1: return handle_stfl(vcpu); case 0xb2: diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index 5418d10dc2a8..a1ec63abfb95 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -3,9 +3,8 @@ # Makefile for s390-specific library files.. # -lib-y += delay.o string.o uaccess.o find.o +lib-y += delay.o string.o uaccess.o find.o spinlock.o obj-y += mem.o xor.o -lib-$(CONFIG_SMP) += spinlock.o lib-$(CONFIG_KPROBES) += probes.o lib-$(CONFIG_UPROBES) += probes.o diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 14d1eae9fe43..f0bee6af3960 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -18,6 +18,7 @@ #include <linux/mman.h> #include <linux/mm.h> #include <linux/swap.h> +#include <linux/swiotlb.h> #include <linux/smp.h> #include <linux/init.h> #include <linux/pagemap.h> @@ -29,6 +30,7 @@ #include <linux/export.h> #include <linux/cma.h> #include <linux/gfp.h> +#include <linux/dma-mapping.h> #include <asm/processor.h> #include <linux/uaccess.h> #include <asm/pgtable.h> @@ -42,6 +44,8 @@ #include <asm/sclp.h> #include <asm/set_memory.h> #include <asm/kasan.h> +#include <asm/dma-mapping.h> +#include <asm/uv.h> pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(.bss..swapper_pg_dir); @@ -128,6 +132,47 @@ void mark_rodata_ro(void) pr_info("Write protected read-only-after-init data: %luk\n", size >> 10); } +int set_memory_encrypted(unsigned long addr, int numpages) +{ + int i; + + /* make specified pages unshared, (swiotlb, dma_free) */ + for (i = 0; i < numpages; ++i) { + uv_remove_shared(addr); + addr += PAGE_SIZE; + } + return 0; +} + +int set_memory_decrypted(unsigned long addr, int numpages) +{ + int i; + /* make specified pages shared (swiotlb, dma_alloca) */ + for (i = 0; i < numpages; ++i) { + uv_set_shared(addr); + addr += PAGE_SIZE; + } + return 0; +} + +/* are we a protected virtualization guest? */ +bool sev_active(void) +{ + return is_prot_virt_guest(); +} + +/* protected virtualization */ +static void pv_init(void) +{ + if (!is_prot_virt_guest()) + return; + + /* make sure bounce buffers are shared */ + swiotlb_init(1); + swiotlb_update_mem_attributes(); + swiotlb_force = SWIOTLB_FORCE; +} + void __init mem_init(void) { cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask); @@ -136,6 +181,8 @@ void __init mem_init(void) set_max_mapnr(max_low_pfn); high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); + pv_init(); + /* Setup guest page hinting */ cmma_init(); diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 818deeb1ebc3..1864a8bb9622 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -52,21 +52,22 @@ static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t siz * Therefore we have a read-modify-write sequence: the function reads eight * bytes from destination at an eight byte boundary, modifies the bytes * requested and writes the result back in a loop. - * - * Note: this means that this function may not be called concurrently on - * several cpus with overlapping words, since this may potentially - * cause data corruption. */ +static DEFINE_SPINLOCK(s390_kernel_write_lock); + void notrace s390_kernel_write(void *dst, const void *src, size_t size) { + unsigned long flags; long copied; + spin_lock_irqsave(&s390_kernel_write_lock, flags); while (size) { copied = s390_kernel_write_odd(dst, src, size); dst += copied; src += copied; size -= copied; } + spin_unlock_irqrestore(&s390_kernel_write_lock, flags); } static int __memcpy_real(void *dest, void *src, size_t count) diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 687f2a4d3459..cbc718ba6d78 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -24,8 +24,6 @@ static unsigned long stack_maxrandom_size(void) { if (!(current->flags & PF_RANDOMIZE)) return 0; - if (current->personality & ADDR_NO_RANDOMIZE) - return 0; return STACK_RND_MASK << PAGE_SHIFT; } diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 86ca7f88fb22..b8a64cbb5dea 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -421,12 +421,12 @@ static void zpci_map_resources(struct pci_dev *pdev) if (!len) continue; - if (static_branch_likely(&have_mio)) + if (zpci_use_mio(zdev)) pdev->resource[i].start = (resource_size_t __force) zdev->bars[i].mio_wb; else - pdev->resource[i].start = - (resource_size_t __force) pci_iomap(pdev, i, 0); + pdev->resource[i].start = (resource_size_t __force) + pci_iomap_range_fh(pdev, i, 0, 0); pdev->resource[i].end = pdev->resource[i].start + len - 1; } @@ -444,18 +444,19 @@ static void zpci_map_resources(struct pci_dev *pdev) static void zpci_unmap_resources(struct pci_dev *pdev) { + struct zpci_dev *zdev = to_zpci(pdev); resource_size_t len; int i; - if (static_branch_likely(&have_mio)) + if (zpci_use_mio(zdev)) return; for (i = 0; i < PCI_BAR_COUNT; i++) { len = pci_resource_len(pdev, i); if (!len) continue; - pci_iounmap(pdev, (void __iomem __force *) - pdev->resource[i].start); + pci_iounmap_fh(pdev, (void __iomem __force *) + pdev->resource[i].start); } } @@ -528,7 +529,7 @@ static int zpci_setup_bus_resources(struct zpci_dev *zdev, if (zdev->bars[i].val & 4) flags |= IORESOURCE_MEM_64; - if (static_branch_likely(&have_mio)) + if (zpci_use_mio(zdev)) addr = (unsigned long) zdev->bars[i].mio_wb; else addr = ZPCI_ADDR(entry); diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index d03631dba7c2..9bdff4defef1 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -291,7 +291,7 @@ int clp_enable_fh(struct zpci_dev *zdev, u8 nr_dma_as) goto out; zdev->fh = fh; - if (zdev->mio_capable) { + if (zpci_use_mio(zdev)) { rc = clp_set_pci_fn(&fh, nr_dma_as, CLP_SET_ENABLE_MIO); zpci_dbg(3, "ena mio fid:%x, fh:%x, rc:%d\n", zdev->fid, fh, rc); if (rc) diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c index 6b48ca7760a7..3408c0df3ebf 100644 --- a/arch/s390/pci/pci_debug.c +++ b/arch/s390/pci/pci_debug.c @@ -74,7 +74,7 @@ static void pci_sw_counter_show(struct seq_file *m) int i; for (i = 0; i < ARRAY_SIZE(pci_sw_names); i++, counter++) - seq_printf(m, "%26s:\t%lu\n", pci_sw_names[i], + seq_printf(m, "%26s:\t%llu\n", pci_sw_names[i], atomic64_read(counter)); } diff --git a/arch/s390/purgatory/.gitignore b/arch/s390/purgatory/.gitignore index e9e66f178a6d..04a03433c720 100644 --- a/arch/s390/purgatory/.gitignore +++ b/arch/s390/purgatory/.gitignore @@ -1,2 +1,3 @@ -kexec-purgatory.c +purgatory +purgatory.lds purgatory.ro diff --git a/arch/s390/tools/Makefile b/arch/s390/tools/Makefile index 2342b84b3386..b5e35e8f999a 100644 --- a/arch/s390/tools/Makefile +++ b/arch/s390/tools/Makefile @@ -6,7 +6,6 @@ kapi := arch/$(ARCH)/include/generated/asm kapi-hdrs-y := $(kapi)/facility-defs.h $(kapi)/dis-defs.h -targets += $(addprefix ../../../,$(kapi-hdrs-y)) PHONY += kapi kapi: $(kapi-hdrs-y) @@ -14,11 +13,7 @@ kapi: $(kapi-hdrs-y) hostprogs-y += gen_facilities hostprogs-y += gen_opcode_table -HOSTCFLAGS_gen_facilities.o += -Wall $(LINUXINCLUDE) -HOSTCFLAGS_gen_opcode_table.o += -Wall $(LINUXINCLUDE) - -# Ensure output directory exists -_dummy := $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)') +HOSTCFLAGS_gen_facilities.o += $(LINUXINCLUDE) filechk_facility-defs.h = $(obj)/gen_facilities diff --git a/arch/s390/tools/opcodes.txt b/arch/s390/tools/opcodes.txt index 64638b764d1c..46d8ed96cf06 100644 --- a/arch/s390/tools/opcodes.txt +++ b/arch/s390/tools/opcodes.txt @@ -520,6 +520,9 @@ b92e km RRE_RR b92f kmc RRE_RR b930 cgfr RRE_RR b931 clgfr RRE_RR +b938 sortl RRE_RR +b939 dfltcc RRF_R0RR2 +b93a kdsa RRE_RR b93c ppno RRE_RR b93e kimd RRE_RR b93f klmd RRE_RR @@ -538,8 +541,16 @@ b95a cxlgtr RRF_UUFR b95b cxlftr RRF_UUFR b960 cgrt RRF_U0RR b961 clgrt RRF_U0RR +b964 nngrk RRF_R0RR2 +b965 ocgrk RRF_R0RR2 +b966 nogrk RRF_R0RR2 +b967 nxgrk RRF_R0RR2 b972 crt RRF_U0RR b973 clrt RRF_U0RR +b974 nnrk RRF_R0RR2 +b975 ocrk RRF_R0RR2 +b976 nork RRF_R0RR2 +b977 nxrk RRF_R0RR2 b980 ngr RRE_RR b981 ogr RRE_RR b982 xgr RRE_RR @@ -573,6 +584,7 @@ b99f ssair RRE_R0 b9a0 clp RRF_U0RR b9a1 tpei RRE_RR b9a2 ptf RRE_R0 +b9a4 uvc RRF_URR b9aa lptea RRF_RURR2 b9ab essa RRF_U0RR b9ac irbm RRE_RR @@ -585,6 +597,7 @@ b9b3 cu42 RRE_RR b9bd trtre RRF_U0RR b9be srstu RRE_RR b9bf trte RRF_U0RR +b9c0 selhhhr RRF_RURR b9c8 ahhhr RRF_R0RR2 b9c9 shhhr RRF_R0RR2 b9ca alhhhr RRF_R0RR2 @@ -594,6 +607,9 @@ b9cf clhhr RRE_RR b9d0 pcistg RRE_RR b9d2 pcilg RRE_RR b9d3 rpcit RRE_RR +b9d4 pcistgi RRE_RR +b9d5 pciwb RRE_00 +b9d6 pcilgi RRE_RR b9d8 ahhlr RRF_R0RR2 b9d9 shhlr RRF_R0RR2 b9da alhhlr RRF_R0RR2 @@ -601,9 +617,11 @@ b9db slhhlr RRF_R0RR2 b9dd chlr RRE_RR b9df clhlr RRE_RR b9e0 locfhr RRF_U0RR -b9e1 popcnt RRE_RR +b9e1 popcnt RRF_U0RR b9e2 locgr RRF_U0RR +b9e3 selgr RRF_RURR b9e4 ngrk RRF_R0RR2 +b9e5 ncgrk RRF_R0RR2 b9e6 ogrk RRF_R0RR2 b9e7 xgrk RRF_R0RR2 b9e8 agrk RRF_R0RR2 @@ -612,8 +630,10 @@ b9ea algrk RRF_R0RR2 b9eb slgrk RRF_R0RR2 b9ec mgrk RRF_R0RR2 b9ed msgrkc RRF_R0RR2 +b9f0 selr RRF_RURR b9f2 locr RRF_U0RR b9f4 nrk RRF_R0RR2 +b9f5 ncrk RRF_R0RR2 b9f6 ork RRF_R0RR2 b9f7 xrk RRF_R0RR2 b9f8 ark RRF_R0RR2 @@ -822,6 +842,7 @@ e3d4 stpcifc RXY_RRRD e500 lasp SSE_RDRD e501 tprot SSE_RDRD e502 strag SSE_RDRD +e50a mvcrl SSE_RDRD e50e mvcsk SSE_RDRD e50f mvcdk SSE_RDRD e544 mvhhi SIL_RDI @@ -835,6 +856,18 @@ e55c chsi SIL_RDI e55d clfhsi SIL_RDU e560 tbegin SIL_RDU e561 tbeginc SIL_RDU +e601 vlebrh VRX_VRRDU +e602 vlebrg VRX_VRRDU +e603 vlebrf VRX_VRRDU +e604 vllebrz VRX_VRRDU +e605 vlbrrep VRX_VRRDU +e606 vlbr VRX_VRRDU +e607 vler VRX_VRRDU +e609 vstebrh VRX_VRRDU +e60a vstebrg VRX_VRRDU +e60b vstebrf VRX_VRRDU +e60e vstbr VRX_VRRDU +e60f vster VRX_VRRDU e634 vpkz VSI_URDV e635 vlrl VSI_URDV e637 vlrlr VRS_RRDV @@ -842,8 +875,8 @@ e63c vupkz VSI_URDV e63d vstrl VSI_URDV e63f vstrlr VRS_RRDV e649 vlip VRI_V0UU2 -e650 vcvb VRR_RV0U -e652 vcvbg VRR_RV0U +e650 vcvb VRR_RV0UU +e652 vcvbg VRR_RV0UU e658 vcvd VRI_VR0UU e659 vsrp VRI_VVUUU2 e65a vcvdg VRI_VR0UU @@ -863,13 +896,13 @@ e702 vleg VRX_VRRDU e703 vlef VRX_VRRDU e704 vllez VRX_VRRDU e705 vlrep VRX_VRRDU -e706 vl VRX_VRRD +e706 vl VRX_VRRDU e707 vlbb VRX_VRRDU e708 vsteb VRX_VRRDU e709 vsteh VRX_VRRDU e70a vsteg VRX_VRRDU e70b vstef VRX_VRRDU -e70e vst VRX_VRRD +e70e vst VRX_VRRDU e712 vgeg VRV_VVXRDU e713 vgef VRV_VVXRDU e71a vsceg VRV_VVXRDU @@ -879,11 +912,11 @@ e722 vlvg VRS_VRRDU e727 lcbb RXE_RRRDU e730 vesl VRS_VVRDU e733 verll VRS_VVRDU -e736 vlm VRS_VVRD +e736 vlm VRS_VVRDU e737 vll VRS_VRRD e738 vesrl VRS_VVRDU e73a vesra VRS_VVRDU -e73e vstm VRS_VVRD +e73e vstm VRS_VVRDU e73f vstl VRS_VRRD e740 vleib VRI_V0IU e741 vleih VRI_V0IU @@ -932,7 +965,10 @@ e781 vfene VRR_VVV0U0U e782 vfae VRR_VVV0U0U e784 vpdi VRR_VVV0U e785 vbperm VRR_VVV +e786 vsld VRI_VVV0U +e787 vsrd VRI_VVV0U e78a vstrc VRR_VVVUU0V +e78b vstrs VRR_VVVUU0V e78c vperm VRR_VVV0V e78d vsel VRR_VVV0V e78e vfms VRR_VVVU0UV @@ -1060,6 +1096,7 @@ eb9b stamy RSY_AARD ebc0 tp RSL_R0RD ebd0 pcistb RSY_RRRD ebd1 sic RSY_RRRD +ebd4 pcistbi RSY_RRRD ebdc srak RSY_RRRD ebdd slak RSY_RRRD ebde srlk RSY_RRRD diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h index 6963482c81d8..b60448397d4f 100644 --- a/arch/sparc/include/asm/atomic_64.h +++ b/arch/sparc/include/asm/atomic_64.h @@ -23,15 +23,15 @@ #define ATOMIC_OP(op) \ void atomic_##op(int, atomic_t *); \ -void atomic64_##op(long, atomic64_t *); +void atomic64_##op(s64, atomic64_t *); #define ATOMIC_OP_RETURN(op) \ int atomic_##op##_return(int, atomic_t *); \ -long atomic64_##op##_return(long, atomic64_t *); +s64 atomic64_##op##_return(s64, atomic64_t *); #define ATOMIC_FETCH_OP(op) \ int atomic_fetch_##op(int, atomic_t *); \ -long atomic64_fetch_##op(long, atomic64_t *); +s64 atomic64_fetch_##op(s64, atomic64_t *); #define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op) @@ -61,7 +61,7 @@ static inline int atomic_xchg(atomic_t *v, int new) ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n))) #define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) -long atomic64_dec_if_positive(atomic64_t *v); +s64 atomic64_dec_if_positive(atomic64_t *v); #define atomic64_dec_if_positive atomic64_dec_if_positive #endif /* !(__ARCH_SPARC64_ATOMIC__) */ diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2bbbd4d1ba31..fbabf59692ff 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -17,6 +17,7 @@ config X86_32 select HAVE_DEBUG_STACKOVERFLOW select MODULES_USE_ELF_REL select OLD_SIGACTION + select GENERIC_VDSO_32 config X86_64 def_bool y @@ -121,6 +122,7 @@ config X86 select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select GENERIC_TIME_VSYSCALL + select GENERIC_GETTIMEOFDAY select HARDLOCKUP_CHECK_TIMESTAMP if X86_64 select HAVE_ACPI_APEI if ACPI select HAVE_ACPI_APEI_NMI if ACPI @@ -202,6 +204,7 @@ config X86 select HAVE_SYSCALL_TRACEPOINTS select HAVE_UNSTABLE_SCHED_CLOCK select HAVE_USER_RETURN_NOTIFIER + select HAVE_GENERIC_VDSO select HOTPLUG_SMT if SMP select IRQ_FORCED_THREADING select NEED_SG_DMA_LENGTH @@ -2285,7 +2288,7 @@ config COMPAT_VDSO choice prompt "vsyscall table for legacy applications" depends on X86_64 - default LEGACY_VSYSCALL_EMULATE + default LEGACY_VSYSCALL_XONLY help Legacy user code that does not know how to find the vDSO expects to be able to issue three syscalls by calling fixed addresses in @@ -2293,23 +2296,38 @@ choice it can be used to assist security vulnerability exploitation. This setting can be changed at boot time via the kernel command - line parameter vsyscall=[emulate|none]. + line parameter vsyscall=[emulate|xonly|none]. On a system with recent enough glibc (2.14 or newer) and no static binaries, you can say None without a performance penalty to improve security. - If unsure, select "Emulate". + If unsure, select "Emulate execution only". config LEGACY_VSYSCALL_EMULATE - bool "Emulate" + bool "Full emulation" help - The kernel traps and emulates calls into the fixed - vsyscall address mapping. This makes the mapping - non-executable, but it still contains known contents, - which could be used in certain rare security vulnerability - exploits. This configuration is recommended when userspace - still uses the vsyscall area. + The kernel traps and emulates calls into the fixed vsyscall + address mapping. This makes the mapping non-executable, but + it still contains readable known contents, which could be + used in certain rare security vulnerability exploits. This + configuration is recommended when using legacy userspace + that still uses vsyscalls along with legacy binary + instrumentation tools that require code to be readable. + + An example of this type of legacy userspace is running + Pin on an old binary that still uses vsyscalls. + + config LEGACY_VSYSCALL_XONLY + bool "Emulate execution only" + help + The kernel traps and emulates calls into the fixed vsyscall + address mapping and does not allow reads. This + configuration is recommended when userspace might use the + legacy vsyscall area but support for legacy binary + instrumentation of legacy code is not needed. It mitigates + certain uses of the vsyscall area as an ASLR-bypassing + buffer. config LEGACY_VSYSCALL_NONE bool "None" diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 6adce15268bd..8e29c991ba3e 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -480,3 +480,16 @@ config CPU_SUP_UMC_32 CPU might render the kernel unbootable. If unsure, say N. + +config CPU_SUP_ZHAOXIN + default y + bool "Support Zhaoxin processors" if PROCESSOR_SELECT + help + This enables detection, tunings and quirks for Zhaoxin processors + + You need this enabled if you want your kernel to run on a + Zhaoxin CPU. Disabling this option on other types of CPUs + makes the kernel a tiny bit smaller. Disabling it on a Zhaoxin + CPU might render the kernel unbootable. + + If unsure, say N. diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 2418804e66b4..536b574b6161 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -72,23 +72,18 @@ static long syscall_trace_enter(struct pt_regs *regs) struct thread_info *ti = current_thread_info(); unsigned long ret = 0; - bool emulated = false; u32 work; if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) BUG_ON(regs != task_pt_regs(current)); - work = READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY; + work = READ_ONCE(ti->flags); - if (unlikely(work & _TIF_SYSCALL_EMU)) - emulated = true; - - if ((emulated || (work & _TIF_SYSCALL_TRACE)) && - tracehook_report_syscall_entry(regs)) - return -1L; - - if (emulated) - return -1L; + if (work & (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU)) { + ret = tracehook_report_syscall_entry(regs); + if (ret || (work & _TIF_SYSCALL_EMU)) + return -1L; + } #ifdef CONFIG_SECCOMP /* diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 7b23431be5cb..44c6e6f54bf7 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1104,6 +1104,30 @@ ENTRY(irq_entries_start) .endr END(irq_entries_start) +#ifdef CONFIG_X86_LOCAL_APIC + .align 8 +ENTRY(spurious_entries_start) + vector=FIRST_SYSTEM_VECTOR + .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR) + pushl $(~vector+0x80) /* Note: always in signed byte range */ + vector=vector+1 + jmp common_spurious + .align 8 + .endr +END(spurious_entries_start) + +common_spurious: + ASM_CLAC + addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */ + SAVE_ALL switch_stacks=1 + ENCODE_FRAME_POINTER + TRACE_IRQS_OFF + movl %esp, %eax + call smp_spurious_interrupt + jmp ret_from_intr +ENDPROC(common_interrupt) +#endif + /* * the CPU automatically disables interrupts when executing an IRQ vector, * so IRQ-flags tracing has to follow that: diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 11aa3b2afa4d..15f0749d0a15 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -375,6 +375,18 @@ ENTRY(irq_entries_start) .endr END(irq_entries_start) + .align 8 +ENTRY(spurious_entries_start) + vector=FIRST_SYSTEM_VECTOR + .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR) + UNWIND_HINT_IRET_REGS + pushq $(~vector+0x80) /* Note: always in signed byte range */ + jmp common_spurious + .align 8 + vector=vector+1 + .endr +END(spurious_entries_start) + .macro DEBUG_ENTRY_ASSERT_IRQS_OFF #ifdef CONFIG_DEBUG_ENTRY pushq %rax @@ -571,10 +583,20 @@ _ASM_NOKPROBE(interrupt_entry) /* Interrupt entry/exit. */ - /* - * The interrupt stubs push (~vector+0x80) onto the stack and - * then jump to common_interrupt. - */ +/* + * The interrupt stubs push (~vector+0x80) onto the stack and + * then jump to common_spurious/interrupt. + */ +common_spurious: + addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */ + call interrupt_entry + UNWIND_HINT_REGS indirect=1 + call smp_spurious_interrupt /* rdi points to pt_regs */ + jmp ret_from_intr +END(common_spurious) +_ASM_NOKPROBE(common_spurious) + +/* common_interrupt is a hotpath. Align it */ .p2align CONFIG_X86_L1_CACHE_SHIFT common_interrupt: addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */ @@ -1670,11 +1692,17 @@ nmi_restore: iretq END(nmi) +#ifndef CONFIG_IA32_EMULATION +/* + * This handles SYSCALL from 32-bit code. There is no way to program + * MSRs to fully disable 32-bit SYSCALL. + */ ENTRY(ignore_sysret) UNWIND_HINT_EMPTY mov $-ENOSYS, %eax sysret END(ignore_sysret) +#endif ENTRY(rewind_stack_do_exit) UNWIND_HINT_FUNC diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 42fe42e82baf..39106111be86 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -3,6 +3,12 @@ # Building vDSO images for x86. # +# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before +# the inclusion of generic Makefile. +ARCH_REL_TYPE_ABS := R_X86_64_JUMP_SLOT|R_X86_64_GLOB_DAT|R_X86_64_RELATIVE| +ARCH_REL_TYPE_ABS += R_386_GLOB_DAT|R_386_JMP_SLOT|R_386_RELATIVE +include $(srctree)/lib/vdso/Makefile + KBUILD_CFLAGS += $(DISABLE_LTO) KASAN_SANITIZE := n UBSAN_SANITIZE := n @@ -51,6 +57,7 @@ VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -soname linux-vdso.so.1 --no-undefined \ $(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE $(call if_changed,vdso) + $(call if_changed,vdso_check) HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/$(SUBARCH)/include/uapi hostprogs-y += vdso2c @@ -121,6 +128,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE $(obj)/vdsox32.so.dbg: $(obj)/vdsox32.lds $(vobjx32s) FORCE $(call if_changed,vdso) + $(call if_changed,vdso_check) CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds) VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -soname linux-gate.so.1 @@ -160,6 +168,7 @@ $(obj)/vdso32.so.dbg: FORCE \ $(obj)/vdso32/system_call.o \ $(obj)/vdso32/sigreturn.o $(call if_changed,vdso) + $(call if_changed,vdso_check) # # The DSO images are built using a special linker script. diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index 4aed41f638bb..d9ff616bb0f6 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -1,251 +1,85 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright 2006 Andi Kleen, SUSE Labs. - * * Fast user context implementation of clock_gettime, gettimeofday, and time. * + * Copyright 2006 Andi Kleen, SUSE Labs. + * Copyright 2019 ARM Limited + * * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net> * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany - * - * The code should have no internal unresolved relocations. - * Check with readelf after changing. */ - -#include <uapi/linux/time.h> -#include <asm/vgtod.h> -#include <asm/vvar.h> -#include <asm/unistd.h> -#include <asm/msr.h> -#include <asm/pvclock.h> -#include <asm/mshyperv.h> -#include <linux/math64.h> #include <linux/time.h> #include <linux/kernel.h> +#include <linux/types.h> -#define gtod (&VVAR(vsyscall_gtod_data)) +#include "../../../../lib/vdso/gettimeofday.c" -extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts); -extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); +extern int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz); extern time_t __vdso_time(time_t *t); -#ifdef CONFIG_PARAVIRT_CLOCK -extern u8 pvclock_page[PAGE_SIZE] - __attribute__((visibility("hidden"))); -#endif - -#ifdef CONFIG_HYPERV_TSCPAGE -extern u8 hvclock_page[PAGE_SIZE] - __attribute__((visibility("hidden"))); -#endif - -#ifndef BUILD_VDSO32 - -notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) +int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) { - long ret; - asm ("syscall" : "=a" (ret), "=m" (*ts) : - "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : - "rcx", "r11"); - return ret; + return __cvdso_gettimeofday(tv, tz); } -#else +int gettimeofday(struct __kernel_old_timeval *, struct timezone *) + __attribute__((weak, alias("__vdso_gettimeofday"))); -notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) +time_t __vdso_time(time_t *t) { - long ret; - - asm ( - "mov %%ebx, %%edx \n" - "mov %[clock], %%ebx \n" - "call __kernel_vsyscall \n" - "mov %%edx, %%ebx \n" - : "=a" (ret), "=m" (*ts) - : "0" (__NR_clock_gettime), [clock] "g" (clock), "c" (ts) - : "edx"); - return ret; + return __cvdso_time(t); } -#endif +time_t time(time_t *t) __attribute__((weak, alias("__vdso_time"))); -#ifdef CONFIG_PARAVIRT_CLOCK -static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void) -{ - return (const struct pvclock_vsyscall_time_info *)&pvclock_page; -} -static notrace u64 vread_pvclock(void) -{ - const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti; - u32 version; - u64 ret; - - /* - * Note: The kernel and hypervisor must guarantee that cpu ID - * number maps 1:1 to per-CPU pvclock time info. - * - * Because the hypervisor is entirely unaware of guest userspace - * preemption, it cannot guarantee that per-CPU pvclock time - * info is updated if the underlying CPU changes or that that - * version is increased whenever underlying CPU changes. - * - * On KVM, we are guaranteed that pvti updates for any vCPU are - * atomic as seen by *all* vCPUs. This is an even stronger - * guarantee than we get with a normal seqlock. - * - * On Xen, we don't appear to have that guarantee, but Xen still - * supplies a valid seqlock using the version field. - * - * We only do pvclock vdso timing at all if - * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to - * mean that all vCPUs have matching pvti and that the TSC is - * synced, so we can just look at vCPU 0's pvti. - */ - - do { - version = pvclock_read_begin(pvti); - - if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) - return U64_MAX; - - ret = __pvclock_read_cycles(pvti, rdtsc_ordered()); - } while (pvclock_read_retry(pvti, version)); - - return ret; -} -#endif -#ifdef CONFIG_HYPERV_TSCPAGE -static notrace u64 vread_hvclock(void) -{ - const struct ms_hyperv_tsc_page *tsc_pg = - (const struct ms_hyperv_tsc_page *)&hvclock_page; +#if defined(CONFIG_X86_64) && !defined(BUILD_VDSO32_64) +/* both 64-bit and x32 use these */ +extern int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts); +extern int __vdso_clock_getres(clockid_t clock, struct __kernel_timespec *res); - return hv_read_tsc_page(tsc_pg); -} -#endif - -notrace static inline u64 vgetcyc(int mode) +int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts) { - if (mode == VCLOCK_TSC) - return (u64)rdtsc_ordered(); - - /* - * For any memory-mapped vclock type, we need to make sure that gcc - * doesn't cleverly hoist a load before the mode check. Otherwise we - * might end up touching the memory-mapped page even if the vclock in - * question isn't enabled, which will segfault. Hence the barriers. - */ -#ifdef CONFIG_PARAVIRT_CLOCK - if (mode == VCLOCK_PVCLOCK) { - barrier(); - return vread_pvclock(); - } -#endif -#ifdef CONFIG_HYPERV_TSCPAGE - if (mode == VCLOCK_HVCLOCK) { - barrier(); - return vread_hvclock(); - } -#endif - return U64_MAX; + return __cvdso_clock_gettime(clock, ts); } -notrace static int do_hres(clockid_t clk, struct timespec *ts) -{ - struct vgtod_ts *base = >od->basetime[clk]; - u64 cycles, last, sec, ns; - unsigned int seq; - - do { - seq = gtod_read_begin(gtod); - cycles = vgetcyc(gtod->vclock_mode); - ns = base->nsec; - last = gtod->cycle_last; - if (unlikely((s64)cycles < 0)) - return vdso_fallback_gettime(clk, ts); - if (cycles > last) - ns += (cycles - last) * gtod->mult; - ns >>= gtod->shift; - sec = base->sec; - } while (unlikely(gtod_read_retry(gtod, seq))); - - /* - * Do this outside the loop: a race inside the loop could result - * in __iter_div_u64_rem() being extremely slow. - */ - ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); - ts->tv_nsec = ns; - - return 0; -} +int clock_gettime(clockid_t, struct __kernel_timespec *) + __attribute__((weak, alias("__vdso_clock_gettime"))); -notrace static void do_coarse(clockid_t clk, struct timespec *ts) +int __vdso_clock_getres(clockid_t clock, + struct __kernel_timespec *res) { - struct vgtod_ts *base = >od->basetime[clk]; - unsigned int seq; - - do { - seq = gtod_read_begin(gtod); - ts->tv_sec = base->sec; - ts->tv_nsec = base->nsec; - } while (unlikely(gtod_read_retry(gtod, seq))); + return __cvdso_clock_getres(clock, res); } +int clock_getres(clockid_t, struct __kernel_timespec *) + __attribute__((weak, alias("__vdso_clock_getres"))); -notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) +#else +/* i386 only */ +extern int __vdso_clock_gettime(clockid_t clock, struct old_timespec32 *ts); +extern int __vdso_clock_getres(clockid_t clock, struct old_timespec32 *res); + +int __vdso_clock_gettime(clockid_t clock, struct old_timespec32 *ts) { - unsigned int msk; - - /* Sort out negative (CPU/FD) and invalid clocks */ - if (unlikely((unsigned int) clock >= MAX_CLOCKS)) - return vdso_fallback_gettime(clock, ts); - - /* - * Convert the clockid to a bitmask and use it to check which - * clocks are handled in the VDSO directly. - */ - msk = 1U << clock; - if (likely(msk & VGTOD_HRES)) { - return do_hres(clock, ts); - } else if (msk & VGTOD_COARSE) { - do_coarse(clock, ts); - return 0; - } - return vdso_fallback_gettime(clock, ts); + return __cvdso_clock_gettime32(clock, ts); } -int clock_gettime(clockid_t, struct timespec *) +int clock_gettime(clockid_t, struct old_timespec32 *) __attribute__((weak, alias("__vdso_clock_gettime"))); -notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) +int __vdso_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts) { - if (likely(tv != NULL)) { - struct timespec *ts = (struct timespec *) tv; - - do_hres(CLOCK_REALTIME, ts); - tv->tv_usec /= 1000; - } - if (unlikely(tz != NULL)) { - tz->tz_minuteswest = gtod->tz_minuteswest; - tz->tz_dsttime = gtod->tz_dsttime; - } - - return 0; + return __cvdso_clock_gettime(clock, ts); } -int gettimeofday(struct timeval *, struct timezone *) - __attribute__((weak, alias("__vdso_gettimeofday"))); -/* - * This will break when the xtime seconds get inaccurate, but that is - * unlikely - */ -notrace time_t __vdso_time(time_t *t) -{ - /* This is atomic on x86 so we don't need any locks. */ - time_t result = READ_ONCE(gtod->basetime[CLOCK_REALTIME].sec); +int clock_gettime64(clockid_t, struct __kernel_timespec *) + __attribute__((weak, alias("__vdso_clock_gettime64"))); - if (t) - *t = result; - return result; +int __vdso_clock_getres(clockid_t clock, struct old_timespec32 *res) +{ + return __cvdso_clock_getres_time32(clock, res); } -time_t time(time_t *t) - __attribute__((weak, alias("__vdso_time"))); + +int clock_getres(clockid_t, struct old_timespec32 *) + __attribute__((weak, alias("__vdso_clock_getres"))); +#endif diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S index d3a2dce4cfa9..36b644e16272 100644 --- a/arch/x86/entry/vdso/vdso.lds.S +++ b/arch/x86/entry/vdso/vdso.lds.S @@ -25,6 +25,8 @@ VERSION { __vdso_getcpu; time; __vdso_time; + clock_getres; + __vdso_clock_getres; local: *; }; } diff --git a/arch/x86/entry/vdso/vdso32/vdso32.lds.S b/arch/x86/entry/vdso/vdso32/vdso32.lds.S index 422764a81d32..c7720995ab1a 100644 --- a/arch/x86/entry/vdso/vdso32/vdso32.lds.S +++ b/arch/x86/entry/vdso/vdso32/vdso32.lds.S @@ -26,6 +26,8 @@ VERSION __vdso_clock_gettime; __vdso_gettimeofday; __vdso_time; + __vdso_clock_getres; + __vdso_clock_gettime64; }; LINUX_2.5 { diff --git a/arch/x86/entry/vdso/vdsox32.lds.S b/arch/x86/entry/vdso/vdsox32.lds.S index 05cd1c5c4a15..16a8050a4fb6 100644 --- a/arch/x86/entry/vdso/vdsox32.lds.S +++ b/arch/x86/entry/vdso/vdsox32.lds.S @@ -21,6 +21,7 @@ VERSION { __vdso_gettimeofday; __vdso_getcpu; __vdso_time; + __vdso_clock_getres; local: *; }; } diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 8db1f594e8b1..349a61d8bf34 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -22,7 +22,7 @@ #include <asm/page.h> #include <asm/desc.h> #include <asm/cpufeature.h> -#include <asm/mshyperv.h> +#include <clocksource/hyperv_timer.h> #if defined(CONFIG_X86_64) unsigned int __read_mostly vdso64_enabled = 1; diff --git a/arch/x86/entry/vsyscall/Makefile b/arch/x86/entry/vsyscall/Makefile index 1ac4dd116c26..93c1b3e949a7 100644 --- a/arch/x86/entry/vsyscall/Makefile +++ b/arch/x86/entry/vsyscall/Makefile @@ -2,7 +2,5 @@ # # Makefile for the x86 low level vsyscall code # -obj-y := vsyscall_gtod.o - obj-$(CONFIG_X86_VSYSCALL_EMULATION) += vsyscall_64.o vsyscall_emu_64.o diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index d9d81ad7a400..07003f3f1bfc 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -42,9 +42,11 @@ #define CREATE_TRACE_POINTS #include "vsyscall_trace.h" -static enum { EMULATE, NONE } vsyscall_mode = +static enum { EMULATE, XONLY, NONE } vsyscall_mode __ro_after_init = #ifdef CONFIG_LEGACY_VSYSCALL_NONE NONE; +#elif defined(CONFIG_LEGACY_VSYSCALL_XONLY) + XONLY; #else EMULATE; #endif @@ -54,6 +56,8 @@ static int __init vsyscall_setup(char *str) if (str) { if (!strcmp("emulate", str)) vsyscall_mode = EMULATE; + else if (!strcmp("xonly", str)) + vsyscall_mode = XONLY; else if (!strcmp("none", str)) vsyscall_mode = NONE; else @@ -113,7 +117,8 @@ static bool write_ok_or_segv(unsigned long ptr, size_t size) } } -bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) +bool emulate_vsyscall(unsigned long error_code, + struct pt_regs *regs, unsigned long address) { struct task_struct *tsk; unsigned long caller; @@ -122,6 +127,22 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) long ret; unsigned long orig_dx; + /* Write faults or kernel-privilege faults never get fixed up. */ + if ((error_code & (X86_PF_WRITE | X86_PF_USER)) != X86_PF_USER) + return false; + + if (!(error_code & X86_PF_INSTR)) { + /* Failed vsyscall read */ + if (vsyscall_mode == EMULATE) + return false; + + /* + * User code tried and failed to read the vsyscall page. + */ + warn_bad_vsyscall(KERN_INFO, regs, "vsyscall read attempt denied -- look up the vsyscall kernel parameter if you need a workaround"); + return false; + } + /* * No point in checking CS -- the only way to get here is a user mode * trap to a high address, which means that we're in 64-bit user code. @@ -284,7 +305,7 @@ static const char *gate_vma_name(struct vm_area_struct *vma) static const struct vm_operations_struct gate_vma_ops = { .name = gate_vma_name, }; -static struct vm_area_struct gate_vma = { +static struct vm_area_struct gate_vma __ro_after_init = { .vm_start = VSYSCALL_ADDR, .vm_end = VSYSCALL_ADDR + PAGE_SIZE, .vm_page_prot = PAGE_READONLY_EXEC, @@ -357,12 +378,20 @@ void __init map_vsyscall(void) extern char __vsyscall_page; unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page); - if (vsyscall_mode != NONE) { + /* + * For full emulation, the page needs to exist for real. In + * execute-only mode, there is no PTE at all backing the vsyscall + * page. + */ + if (vsyscall_mode == EMULATE) { __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall, PAGE_KERNEL_VVAR); set_vsyscall_pgtable_user_bits(swapper_pg_dir); } + if (vsyscall_mode == XONLY) + gate_vma.vm_flags = VM_EXEC; + BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) != (unsigned long)VSYSCALL_ADDR); } diff --git a/arch/x86/entry/vsyscall/vsyscall_gtod.c b/arch/x86/entry/vsyscall/vsyscall_gtod.c deleted file mode 100644 index cfcdba082feb..000000000000 --- a/arch/x86/entry/vsyscall/vsyscall_gtod.c +++ /dev/null @@ -1,83 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE - * Copyright 2003 Andi Kleen, SuSE Labs. - * - * Modified for x86 32 bit architecture by - * Stefani Seibold <stefani@seibold.net> - * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany - * - * Thanks to hpa@transmeta.com for some useful hint. - * Special thanks to Ingo Molnar for his early experience with - * a different vsyscall implementation for Linux/IA32 and for the name. - * - */ - -#include <linux/timekeeper_internal.h> -#include <asm/vgtod.h> -#include <asm/vvar.h> - -int vclocks_used __read_mostly; - -DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data); - -void update_vsyscall_tz(void) -{ - vsyscall_gtod_data.tz_minuteswest = sys_tz.tz_minuteswest; - vsyscall_gtod_data.tz_dsttime = sys_tz.tz_dsttime; -} - -void update_vsyscall(struct timekeeper *tk) -{ - int vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode; - struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data; - struct vgtod_ts *base; - u64 nsec; - - /* Mark the new vclock used. */ - BUILD_BUG_ON(VCLOCK_MAX >= 32); - WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << vclock_mode)); - - gtod_write_begin(vdata); - - /* copy vsyscall data */ - vdata->vclock_mode = vclock_mode; - vdata->cycle_last = tk->tkr_mono.cycle_last; - vdata->mask = tk->tkr_mono.mask; - vdata->mult = tk->tkr_mono.mult; - vdata->shift = tk->tkr_mono.shift; - - base = &vdata->basetime[CLOCK_REALTIME]; - base->sec = tk->xtime_sec; - base->nsec = tk->tkr_mono.xtime_nsec; - - base = &vdata->basetime[CLOCK_TAI]; - base->sec = tk->xtime_sec + (s64)tk->tai_offset; - base->nsec = tk->tkr_mono.xtime_nsec; - - base = &vdata->basetime[CLOCK_MONOTONIC]; - base->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; - nsec = tk->tkr_mono.xtime_nsec; - nsec += ((u64)tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift); - while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) { - nsec -= ((u64)NSEC_PER_SEC) << tk->tkr_mono.shift; - base->sec++; - } - base->nsec = nsec; - - base = &vdata->basetime[CLOCK_REALTIME_COARSE]; - base->sec = tk->xtime_sec; - base->nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; - - base = &vdata->basetime[CLOCK_MONOTONIC_COARSE]; - base->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; - nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; - nsec += tk->wall_to_monotonic.tv_nsec; - while (nsec >= NSEC_PER_SEC) { - nsec -= NSEC_PER_SEC; - base->sec++; - } - base->nsec = nsec; - - gtod_write_end(vdata); -} diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index f315425d8468..ceb712b0a1c6 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -561,14 +561,14 @@ int x86_pmu_hw_config(struct perf_event *event) } /* sample_regs_user never support XMM registers */ - if (unlikely(event->attr.sample_regs_user & PEBS_XMM_REGS)) + if (unlikely(event->attr.sample_regs_user & PERF_REG_EXTENDED_MASK)) return -EINVAL; /* * Besides the general purpose registers, XMM registers may * be collected in PEBS on some platforms, e.g. Icelake */ - if (unlikely(event->attr.sample_regs_intr & PEBS_XMM_REGS)) { - if (x86_pmu.pebs_no_xmm_regs) + if (unlikely(event->attr.sample_regs_intr & PERF_REG_EXTENDED_MASK)) { + if (!(event->pmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS)) return -EINVAL; if (!event->attr.precise_ip) @@ -2179,7 +2179,7 @@ static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm) * For now, this can't happen because all callers hold mmap_sem * for write. If this changes, we'll need a different solution. */ - lockdep_assert_held_exclusive(&mm->mmap_sem); + lockdep_assert_held_write(&mm->mmap_sem); if (atomic_inc_return(&mm->context.perf_rdpmc_allowed) == 1) on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1); @@ -2402,13 +2402,13 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re return; } - if (perf_hw_regs(regs)) { - if (perf_callchain_store(entry, regs->ip)) - return; + if (perf_callchain_store(entry, regs->ip)) + return; + + if (perf_hw_regs(regs)) unwind_start(&state, current, regs, NULL); - } else { + else unwind_start(&state, current, NULL, (void *)regs->sp); - } for (; !unwind_done(&state); unwind_next_frame(&state)) { addr = unwind_get_return_address(&state); diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 7acc526b4ad2..505c73dc6a73 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -987,7 +987,7 @@ static u64 pebs_update_adaptive_cfg(struct perf_event *event) pebs_data_cfg |= PEBS_DATACFG_GP; if ((sample_type & PERF_SAMPLE_REGS_INTR) && - (attr->sample_regs_intr & PEBS_XMM_REGS)) + (attr->sample_regs_intr & PERF_REG_EXTENDED_MASK)) pebs_data_cfg |= PEBS_DATACFG_XMMS; if (sample_type & PERF_SAMPLE_BRANCH_STACK) { @@ -1964,10 +1964,9 @@ void __init intel_ds_init(void) x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE; - if (x86_pmu.version <= 4) { + if (x86_pmu.version <= 4) x86_pmu.pebs_no_isolation = 1; - x86_pmu.pebs_no_xmm_regs = 1; - } + if (x86_pmu.pebs) { char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-'; char *pebs_qual = ""; @@ -2020,9 +2019,9 @@ void __init intel_ds_init(void) PERF_SAMPLE_TIME; x86_pmu.flags |= PMU_FL_PEBS_ALL; pebs_qual = "-baseline"; + x86_get_pmu()->capabilities |= PERF_PMU_CAP_EXTENDED_REGS; } else { /* Only basic record supported */ - x86_pmu.pebs_no_xmm_regs = 1; x86_pmu.large_pebs_flags &= ~(PERF_SAMPLE_ADDR | PERF_SAMPLE_TIME | diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 9e3fbd47cb56..089bfcdf2f7f 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1400,6 +1400,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE_MOBILE, skl_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_MOBILE, icl_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_NNPI, icl_uncore_init), {}, }; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index a6ac2f4f76fc..4e346856ee19 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -121,24 +121,6 @@ struct amd_nb { (1ULL << PERF_REG_X86_R14) | \ (1ULL << PERF_REG_X86_R15)) -#define PEBS_XMM_REGS \ - ((1ULL << PERF_REG_X86_XMM0) | \ - (1ULL << PERF_REG_X86_XMM1) | \ - (1ULL << PERF_REG_X86_XMM2) | \ - (1ULL << PERF_REG_X86_XMM3) | \ - (1ULL << PERF_REG_X86_XMM4) | \ - (1ULL << PERF_REG_X86_XMM5) | \ - (1ULL << PERF_REG_X86_XMM6) | \ - (1ULL << PERF_REG_X86_XMM7) | \ - (1ULL << PERF_REG_X86_XMM8) | \ - (1ULL << PERF_REG_X86_XMM9) | \ - (1ULL << PERF_REG_X86_XMM10) | \ - (1ULL << PERF_REG_X86_XMM11) | \ - (1ULL << PERF_REG_X86_XMM12) | \ - (1ULL << PERF_REG_X86_XMM13) | \ - (1ULL << PERF_REG_X86_XMM14) | \ - (1ULL << PERF_REG_X86_XMM15)) - /* * Per register state. */ @@ -668,8 +650,7 @@ struct x86_pmu { pebs_broken :1, pebs_prec_dist :1, pebs_no_tlb :1, - pebs_no_isolation :1, - pebs_no_xmm_regs :1; + pebs_no_isolation :1; int pebs_record_size; int pebs_buffer_size; int max_pebs_events; diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 1608050e9df9..0e033ef11a9f 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -17,64 +17,13 @@ #include <linux/version.h> #include <linux/vmalloc.h> #include <linux/mm.h> -#include <linux/clockchips.h> #include <linux/hyperv.h> #include <linux/slab.h> #include <linux/cpuhotplug.h> - -#ifdef CONFIG_HYPERV_TSCPAGE - -static struct ms_hyperv_tsc_page *tsc_pg; - -struct ms_hyperv_tsc_page *hv_get_tsc_page(void) -{ - return tsc_pg; -} -EXPORT_SYMBOL_GPL(hv_get_tsc_page); - -static u64 read_hv_clock_tsc(struct clocksource *arg) -{ - u64 current_tick = hv_read_tsc_page(tsc_pg); - - if (current_tick == U64_MAX) - rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick); - - return current_tick; -} - -static struct clocksource hyperv_cs_tsc = { - .name = "hyperv_clocksource_tsc_page", - .rating = 400, - .read = read_hv_clock_tsc, - .mask = CLOCKSOURCE_MASK(64), - .flags = CLOCK_SOURCE_IS_CONTINUOUS, -}; -#endif - -static u64 read_hv_clock_msr(struct clocksource *arg) -{ - u64 current_tick; - /* - * Read the partition counter to get the current tick count. This count - * is set to 0 when the partition is created and is incremented in - * 100 nanosecond units. - */ - rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick); - return current_tick; -} - -static struct clocksource hyperv_cs_msr = { - .name = "hyperv_clocksource_msr", - .rating = 400, - .read = read_hv_clock_msr, - .mask = CLOCKSOURCE_MASK(64), - .flags = CLOCK_SOURCE_IS_CONTINUOUS, -}; +#include <clocksource/hyperv_timer.h> void *hv_hypercall_pg; EXPORT_SYMBOL_GPL(hv_hypercall_pg); -struct clocksource *hyperv_cs; -EXPORT_SYMBOL_GPL(hyperv_cs); u32 *hv_vp_index; EXPORT_SYMBOL_GPL(hv_vp_index); @@ -343,42 +292,8 @@ void __init hyperv_init(void) x86_init.pci.arch_init = hv_pci_init; - /* - * Register Hyper-V specific clocksource. - */ -#ifdef CONFIG_HYPERV_TSCPAGE - if (ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE) { - union hv_x64_msr_hypercall_contents tsc_msr; - - tsc_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL); - if (!tsc_pg) - goto register_msr_cs; - - hyperv_cs = &hyperv_cs_tsc; - - rdmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64); - - tsc_msr.enable = 1; - tsc_msr.guest_physical_address = vmalloc_to_pfn(tsc_pg); - - wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64); - - hyperv_cs_tsc.archdata.vclock_mode = VCLOCK_HVCLOCK; - - clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100); - return; - } -register_msr_cs: -#endif - /* - * For 32 bit guests just use the MSR based mechanism for reading - * the partition counter. - */ - - hyperv_cs = &hyperv_cs_msr; - if (ms_hyperv.features & HV_MSR_TIME_REF_COUNT_AVAILABLE) - clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100); - + /* Register Hyper-V specific clocksource */ + hv_init_clocksource(); return; remove_cpuhp_state: diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 1340fa53b575..050e5f9ebf81 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -53,7 +53,7 @@ extern unsigned int apic_verbosity; extern int local_apic_timer_c2_ok; extern int disable_apic; -extern unsigned int lapic_timer_frequency; +extern unsigned int lapic_timer_period; extern enum apic_intr_mode_id apic_intr_mode; enum apic_intr_mode_id { @@ -155,7 +155,6 @@ static inline int apic_force_enable(unsigned long addr) extern int apic_force_enable(unsigned long addr); #endif -extern void apic_bsp_setup(bool upmode); extern void apic_ap_setup(void); /* @@ -175,6 +174,7 @@ extern void lapic_assign_system_vectors(void); extern void lapic_assign_legacy_vector(unsigned int isairq, bool replace); extern void lapic_online(void); extern void lapic_offline(void); +extern bool apic_needs_pit(void); #else /* !CONFIG_X86_LOCAL_APIC */ static inline void lapic_shutdown(void) { } @@ -188,6 +188,7 @@ static inline void init_bsp_APIC(void) { } static inline void apic_intr_mode_init(void) { } static inline void lapic_assign_system_vectors(void) { } static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { } +static inline bool apic_needs_pit(void) { return true; } #endif /* !CONFIG_X86_LOCAL_APIC */ #ifdef CONFIG_X86_X2APIC diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index ea3d95275b43..115127c7ad28 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -54,7 +54,7 @@ static __always_inline void arch_atomic_add(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "addl %1,%0" : "+m" (v->counter) - : "ir" (i)); + : "ir" (i) : "memory"); } /** @@ -68,7 +68,7 @@ static __always_inline void arch_atomic_sub(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "subl %1,%0" : "+m" (v->counter) - : "ir" (i)); + : "ir" (i) : "memory"); } /** @@ -95,7 +95,7 @@ static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v) static __always_inline void arch_atomic_inc(atomic_t *v) { asm volatile(LOCK_PREFIX "incl %0" - : "+m" (v->counter)); + : "+m" (v->counter) :: "memory"); } #define arch_atomic_inc arch_atomic_inc @@ -108,7 +108,7 @@ static __always_inline void arch_atomic_inc(atomic_t *v) static __always_inline void arch_atomic_dec(atomic_t *v) { asm volatile(LOCK_PREFIX "decl %0" - : "+m" (v->counter)); + : "+m" (v->counter) :: "memory"); } #define arch_atomic_dec arch_atomic_dec diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h index 6a5b0ec460da..52cfaecb13f9 100644 --- a/arch/x86/include/asm/atomic64_32.h +++ b/arch/x86/include/asm/atomic64_32.h @@ -9,7 +9,7 @@ /* An 64bit atomic type */ typedef struct { - u64 __aligned(8) counter; + s64 __aligned(8) counter; } atomic64_t; #define ATOMIC64_INIT(val) { (val) } @@ -71,8 +71,7 @@ ATOMIC64_DECL(add_unless); * the old value. */ -static inline long long arch_atomic64_cmpxchg(atomic64_t *v, long long o, - long long n) +static inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n) { return arch_cmpxchg64(&v->counter, o, n); } @@ -85,9 +84,9 @@ static inline long long arch_atomic64_cmpxchg(atomic64_t *v, long long o, * Atomically xchgs the value of @v to @n and returns * the old value. */ -static inline long long arch_atomic64_xchg(atomic64_t *v, long long n) +static inline s64 arch_atomic64_xchg(atomic64_t *v, s64 n) { - long long o; + s64 o; unsigned high = (unsigned)(n >> 32); unsigned low = (unsigned)n; alternative_atomic64(xchg, "=&A" (o), @@ -103,7 +102,7 @@ static inline long long arch_atomic64_xchg(atomic64_t *v, long long n) * * Atomically sets the value of @v to @n. */ -static inline void arch_atomic64_set(atomic64_t *v, long long i) +static inline void arch_atomic64_set(atomic64_t *v, s64 i) { unsigned high = (unsigned)(i >> 32); unsigned low = (unsigned)i; @@ -118,9 +117,9 @@ static inline void arch_atomic64_set(atomic64_t *v, long long i) * * Atomically reads the value of @v and returns it. */ -static inline long long arch_atomic64_read(const atomic64_t *v) +static inline s64 arch_atomic64_read(const atomic64_t *v) { - long long r; + s64 r; alternative_atomic64(read, "=&A" (r), "c" (v) : "memory"); return r; } @@ -132,7 +131,7 @@ static inline long long arch_atomic64_read(const atomic64_t *v) * * Atomically adds @i to @v and returns @i + *@v */ -static inline long long arch_atomic64_add_return(long long i, atomic64_t *v) +static inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v) { alternative_atomic64(add_return, ASM_OUTPUT2("+A" (i), "+c" (v)), @@ -143,7 +142,7 @@ static inline long long arch_atomic64_add_return(long long i, atomic64_t *v) /* * Other variants with different arithmetic operators: */ -static inline long long arch_atomic64_sub_return(long long i, atomic64_t *v) +static inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v) { alternative_atomic64(sub_return, ASM_OUTPUT2("+A" (i), "+c" (v)), @@ -151,18 +150,18 @@ static inline long long arch_atomic64_sub_return(long long i, atomic64_t *v) return i; } -static inline long long arch_atomic64_inc_return(atomic64_t *v) +static inline s64 arch_atomic64_inc_return(atomic64_t *v) { - long long a; + s64 a; alternative_atomic64(inc_return, "=&A" (a), "S" (v) : "memory", "ecx"); return a; } #define arch_atomic64_inc_return arch_atomic64_inc_return -static inline long long arch_atomic64_dec_return(atomic64_t *v) +static inline s64 arch_atomic64_dec_return(atomic64_t *v) { - long long a; + s64 a; alternative_atomic64(dec_return, "=&A" (a), "S" (v) : "memory", "ecx"); return a; @@ -176,7 +175,7 @@ static inline long long arch_atomic64_dec_return(atomic64_t *v) * * Atomically adds @i to @v. */ -static inline long long arch_atomic64_add(long long i, atomic64_t *v) +static inline s64 arch_atomic64_add(s64 i, atomic64_t *v) { __alternative_atomic64(add, add_return, ASM_OUTPUT2("+A" (i), "+c" (v)), @@ -191,7 +190,7 @@ static inline long long arch_atomic64_add(long long i, atomic64_t *v) * * Atomically subtracts @i from @v. */ -static inline long long arch_atomic64_sub(long long i, atomic64_t *v) +static inline s64 arch_atomic64_sub(s64 i, atomic64_t *v) { __alternative_atomic64(sub, sub_return, ASM_OUTPUT2("+A" (i), "+c" (v)), @@ -234,8 +233,7 @@ static inline void arch_atomic64_dec(atomic64_t *v) * Atomically adds @a to @v, so long as it was not @u. * Returns non-zero if the add was done, zero otherwise. */ -static inline int arch_atomic64_add_unless(atomic64_t *v, long long a, - long long u) +static inline int arch_atomic64_add_unless(atomic64_t *v, s64 a, s64 u) { unsigned low = (unsigned)u; unsigned high = (unsigned)(u >> 32); @@ -254,9 +252,9 @@ static inline int arch_atomic64_inc_not_zero(atomic64_t *v) } #define arch_atomic64_inc_not_zero arch_atomic64_inc_not_zero -static inline long long arch_atomic64_dec_if_positive(atomic64_t *v) +static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v) { - long long r; + s64 r; alternative_atomic64(dec_if_positive, "=&A" (r), "S" (v) : "ecx", "memory"); return r; @@ -266,17 +264,17 @@ static inline long long arch_atomic64_dec_if_positive(atomic64_t *v) #undef alternative_atomic64 #undef __alternative_atomic64 -static inline void arch_atomic64_and(long long i, atomic64_t *v) +static inline void arch_atomic64_and(s64 i, atomic64_t *v) { - long long old, c = 0; + s64 old, c = 0; while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c) c = old; } -static inline long long arch_atomic64_fetch_and(long long i, atomic64_t *v) +static inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v) { - long long old, c = 0; + s64 old, c = 0; while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c) c = old; @@ -284,17 +282,17 @@ static inline long long arch_atomic64_fetch_and(long long i, atomic64_t *v) return old; } -static inline void arch_atomic64_or(long long i, atomic64_t *v) +static inline void arch_atomic64_or(s64 i, atomic64_t *v) { - long long old, c = 0; + s64 old, c = 0; while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c) c = old; } -static inline long long arch_atomic64_fetch_or(long long i, atomic64_t *v) +static inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v) { - long long old, c = 0; + s64 old, c = 0; while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c) c = old; @@ -302,17 +300,17 @@ static inline long long arch_atomic64_fetch_or(long long i, atomic64_t *v) return old; } -static inline void arch_atomic64_xor(long long i, atomic64_t *v) +static inline void arch_atomic64_xor(s64 i, atomic64_t *v) { - long long old, c = 0; + s64 old, c = 0; while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c) c = old; } -static inline long long arch_atomic64_fetch_xor(long long i, atomic64_t *v) +static inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v) { - long long old, c = 0; + s64 old, c = 0; while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c) c = old; @@ -320,9 +318,9 @@ static inline long long arch_atomic64_fetch_xor(long long i, atomic64_t *v) return old; } -static inline long long arch_atomic64_fetch_add(long long i, atomic64_t *v) +static inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v) { - long long old, c = 0; + s64 old, c = 0; while ((old = arch_atomic64_cmpxchg(v, c, c + i)) != c) c = old; diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index dadc20adba21..95c6ceac66b9 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -17,7 +17,7 @@ * Atomically reads the value of @v. * Doesn't imply a read memory barrier. */ -static inline long arch_atomic64_read(const atomic64_t *v) +static inline s64 arch_atomic64_read(const atomic64_t *v) { return READ_ONCE((v)->counter); } @@ -29,7 +29,7 @@ static inline long arch_atomic64_read(const atomic64_t *v) * * Atomically sets the value of @v to @i. */ -static inline void arch_atomic64_set(atomic64_t *v, long i) +static inline void arch_atomic64_set(atomic64_t *v, s64 i) { WRITE_ONCE(v->counter, i); } @@ -41,11 +41,11 @@ static inline void arch_atomic64_set(atomic64_t *v, long i) * * Atomically adds @i to @v. */ -static __always_inline void arch_atomic64_add(long i, atomic64_t *v) +static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v) { asm volatile(LOCK_PREFIX "addq %1,%0" : "=m" (v->counter) - : "er" (i), "m" (v->counter)); + : "er" (i), "m" (v->counter) : "memory"); } /** @@ -55,11 +55,11 @@ static __always_inline void arch_atomic64_add(long i, atomic64_t *v) * * Atomically subtracts @i from @v. */ -static inline void arch_atomic64_sub(long i, atomic64_t *v) +static inline void arch_atomic64_sub(s64 i, atomic64_t *v) { asm volatile(LOCK_PREFIX "subq %1,%0" : "=m" (v->counter) - : "er" (i), "m" (v->counter)); + : "er" (i), "m" (v->counter) : "memory"); } /** @@ -71,7 +71,7 @@ static inline void arch_atomic64_sub(long i, atomic64_t *v) * true if the result is zero, or false for all * other cases. */ -static inline bool arch_atomic64_sub_and_test(long i, atomic64_t *v) +static inline bool arch_atomic64_sub_and_test(s64 i, atomic64_t *v) { return GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, e, "er", i); } @@ -87,7 +87,7 @@ static __always_inline void arch_atomic64_inc(atomic64_t *v) { asm volatile(LOCK_PREFIX "incq %0" : "=m" (v->counter) - : "m" (v->counter)); + : "m" (v->counter) : "memory"); } #define arch_atomic64_inc arch_atomic64_inc @@ -101,7 +101,7 @@ static __always_inline void arch_atomic64_dec(atomic64_t *v) { asm volatile(LOCK_PREFIX "decq %0" : "=m" (v->counter) - : "m" (v->counter)); + : "m" (v->counter) : "memory"); } #define arch_atomic64_dec arch_atomic64_dec @@ -142,7 +142,7 @@ static inline bool arch_atomic64_inc_and_test(atomic64_t *v) * if the result is negative, or false when * result is greater than or equal to zero. */ -static inline bool arch_atomic64_add_negative(long i, atomic64_t *v) +static inline bool arch_atomic64_add_negative(s64 i, atomic64_t *v) { return GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, s, "er", i); } @@ -155,43 +155,43 @@ static inline bool arch_atomic64_add_negative(long i, atomic64_t *v) * * Atomically adds @i to @v and returns @i + @v */ -static __always_inline long arch_atomic64_add_return(long i, atomic64_t *v) +static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v) { return i + xadd(&v->counter, i); } -static inline long arch_atomic64_sub_return(long i, atomic64_t *v) +static inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v) { return arch_atomic64_add_return(-i, v); } -static inline long arch_atomic64_fetch_add(long i, atomic64_t *v) +static inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v) { return xadd(&v->counter, i); } -static inline long arch_atomic64_fetch_sub(long i, atomic64_t *v) +static inline s64 arch_atomic64_fetch_sub(s64 i, atomic64_t *v) { return xadd(&v->counter, -i); } -static inline long arch_atomic64_cmpxchg(atomic64_t *v, long old, long new) +static inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) { return arch_cmpxchg(&v->counter, old, new); } #define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg -static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, long new) +static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) { return try_cmpxchg(&v->counter, old, new); } -static inline long arch_atomic64_xchg(atomic64_t *v, long new) +static inline s64 arch_atomic64_xchg(atomic64_t *v, s64 new) { return arch_xchg(&v->counter, new); } -static inline void arch_atomic64_and(long i, atomic64_t *v) +static inline void arch_atomic64_and(s64 i, atomic64_t *v) { asm volatile(LOCK_PREFIX "andq %1,%0" : "+m" (v->counter) @@ -199,7 +199,7 @@ static inline void arch_atomic64_and(long i, atomic64_t *v) : "memory"); } -static inline long arch_atomic64_fetch_and(long i, atomic64_t *v) +static inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v) { s64 val = arch_atomic64_read(v); @@ -208,7 +208,7 @@ static inline long arch_atomic64_fetch_and(long i, atomic64_t *v) return val; } -static inline void arch_atomic64_or(long i, atomic64_t *v) +static inline void arch_atomic64_or(s64 i, atomic64_t *v) { asm volatile(LOCK_PREFIX "orq %1,%0" : "+m" (v->counter) @@ -216,7 +216,7 @@ static inline void arch_atomic64_or(long i, atomic64_t *v) : "memory"); } -static inline long arch_atomic64_fetch_or(long i, atomic64_t *v) +static inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v) { s64 val = arch_atomic64_read(v); @@ -225,7 +225,7 @@ static inline long arch_atomic64_fetch_or(long i, atomic64_t *v) return val; } -static inline void arch_atomic64_xor(long i, atomic64_t *v) +static inline void arch_atomic64_xor(s64 i, atomic64_t *v) { asm volatile(LOCK_PREFIX "xorq %1,%0" : "+m" (v->counter) @@ -233,7 +233,7 @@ static inline void arch_atomic64_xor(long i, atomic64_t *v) : "memory"); } -static inline long arch_atomic64_fetch_xor(long i, atomic64_t *v) +static inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v) { s64 val = arch_atomic64_read(v); diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 14de0432d288..84f848c2541a 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -80,8 +80,8 @@ do { \ }) /* Atomic operations are already serializing on x86 */ -#define __smp_mb__before_atomic() barrier() -#define __smp_mb__after_atomic() barrier() +#define __smp_mb__before_atomic() do { } while (0) +#define __smp_mb__after_atomic() do { } while (0) #include <asm-generic/barrier.h> diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 1d337c51f7e6..58acda503817 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -22,8 +22,8 @@ enum cpuid_leafs CPUID_LNX_3, CPUID_7_0_EBX, CPUID_D_1_EAX, - CPUID_F_0_EDX, - CPUID_F_1_EDX, + CPUID_LNX_4, + CPUID_7_1_EAX, CPUID_8000_0008_EBX, CPUID_6_EAX, CPUID_8000_000A_EDX, diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 75f27ee2c263..998c2cc08363 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -239,12 +239,14 @@ #define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */ #define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */ #define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */ +#define X86_FEATURE_FDP_EXCPTN_ONLY ( 9*32+ 6) /* "" FPU data pointer updated only on x87 exceptions */ #define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */ #define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */ #define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB instructions */ #define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */ #define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ #define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */ +#define X86_FEATURE_ZERO_FCS_FDS ( 9*32+13) /* "" Zero out FPU CS and FPU DS */ #define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */ #define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */ #define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ @@ -269,13 +271,19 @@ #define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */ #define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */ -/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (EDX), word 11 */ -#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */ +/* + * Extended auxiliary flags: Linux defined - for features scattered in various + * CPUID levels like 0xf, etc. + * + * Reuse free bits when adding new feature flags! + */ +#define X86_FEATURE_CQM_LLC (11*32+ 0) /* LLC QoS if 1 */ +#define X86_FEATURE_CQM_OCCUP_LLC (11*32+ 1) /* LLC occupancy monitoring */ +#define X86_FEATURE_CQM_MBM_TOTAL (11*32+ 2) /* LLC Total MBM monitoring */ +#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */ -/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (EDX), word 12 */ -#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring */ -#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */ -#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */ +/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ +#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ @@ -322,6 +330,7 @@ #define X86_FEATURE_UMIP (16*32+ 2) /* User Mode Instruction Protection */ #define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ #define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ +#define X86_FEATURE_WAITPKG (16*32+ 5) /* UMONITOR/UMWAIT/TPAUSE Instructions */ #define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */ #define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */ #define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */ diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 7e42b285c856..c6136d79f8c0 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -47,7 +47,6 @@ extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; extern void __init update_regset_xstate_info(unsigned int size, u64 xstate_mask); -void fpu__xstate_clear_all_cpu_caps(void); void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr); const void *get_xsave_field_ptr(int xfeature_nr); int using_compacted_format(void); diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h index 67385d56d4f4..6352dee37cda 100644 --- a/arch/x86/include/asm/hpet.h +++ b/arch/x86/include/asm/hpet.h @@ -75,16 +75,15 @@ extern unsigned int hpet_readl(unsigned int a); extern void force_hpet_resume(void); struct irq_data; -struct hpet_dev; +struct hpet_channel; struct irq_domain; extern void hpet_msi_unmask(struct irq_data *data); extern void hpet_msi_mask(struct irq_data *data); -extern void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg); -extern void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg); +extern void hpet_msi_write(struct hpet_channel *hc, struct msi_msg *msg); extern struct irq_domain *hpet_create_irq_domain(int hpet_id); extern int hpet_assign_irq(struct irq_domain *domain, - struct hpet_dev *dev, int dev_num); + struct hpet_channel *hc, int dev_num); #ifdef CONFIG_HPET_EMULATE_RTC diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 32e666e1231e..cbd97e22d2f3 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -150,8 +150,11 @@ extern char irq_entries_start[]; #define trace_irq_entries_start irq_entries_start #endif +extern char spurious_entries_start[]; + #define VECTOR_UNUSED NULL -#define VECTOR_RETRIGGERED ((void *)~0UL) +#define VECTOR_SHUTDOWN ((void *)~0UL) +#define VECTOR_RETRIGGERED ((void *)~1UL) typedef struct irq_desc* vector_irq_t[NR_VECTORS]; DECLARE_PER_CPU(vector_irq_t, vector_irq); diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index cdf44aa9a501..af78cd72b8f3 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -401,6 +401,12 @@ enum HV_GENERIC_SET_FORMAT { #define HV_STATUS_INVALID_CONNECTION_ID 18 #define HV_STATUS_INSUFFICIENT_BUFFERS 19 +/* + * The Hyper-V TimeRefCount register and the TSC + * page provide a guest VM clock with 100ns tick rate + */ +#define HV_CLOCK_HZ (NSEC_PER_SEC/100) + typedef struct _HV_REFERENCE_TSC_PAGE { __u32 tsc_sequence; __u32 res1; diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 310118805f57..0278aa66ef62 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -56,6 +56,7 @@ #define INTEL_FAM6_ICELAKE_XEON_D 0x6C #define INTEL_FAM6_ICELAKE_DESKTOP 0x7D #define INTEL_FAM6_ICELAKE_MOBILE 0x7E +#define INTEL_FAM6_ICELAKE_NNPI 0x9D /* "Small Core" Processors (Atom) */ @@ -76,6 +77,7 @@ #define INTEL_FAM6_ATOM_GOLDMONT 0x5C /* Apollo Lake */ #define INTEL_FAM6_ATOM_GOLDMONT_X 0x5F /* Denverton */ #define INTEL_FAM6_ATOM_GOLDMONT_PLUS 0x7A /* Gemini Lake */ + #define INTEL_FAM6_ATOM_TREMONT_X 0x86 /* Jacobsville */ /* Xeon Phi */ diff --git a/arch/x86/include/asm/irq_regs.h b/arch/x86/include/asm/irq_regs.h index 8f3bee821e6c..187ce59aea28 100644 --- a/arch/x86/include/asm/irq_regs.h +++ b/arch/x86/include/asm/irq_regs.h @@ -16,7 +16,7 @@ DECLARE_PER_CPU(struct pt_regs *, irq_regs); static inline struct pt_regs *get_irq_regs(void) { - return this_cpu_read(irq_regs); + return __this_cpu_read(irq_regs); } static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) @@ -24,7 +24,7 @@ static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) struct pt_regs *old_regs; old_regs = get_irq_regs(); - this_cpu_write(irq_regs, new_regs); + __this_cpu_write(irq_regs, new_regs); return old_regs; } diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index 65191ce8e1cf..06c3cc22a058 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_JUMP_LABEL_H #define _ASM_X86_JUMP_LABEL_H +#define HAVE_JUMP_LABEL_BATCH + #define JUMP_LABEL_NOP_SIZE 5 #ifdef CONFIG_X86_64 diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index cc60e617931c..f4fa8a9d5d0b 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -105,6 +105,17 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type) #define hv_get_crash_ctl(val) \ rdmsrl(HV_X64_MSR_CRASH_CTL, val) +#define hv_get_time_ref_count(val) \ + rdmsrl(HV_X64_MSR_TIME_REF_COUNT, val) + +#define hv_get_reference_tsc(val) \ + rdmsrl(HV_X64_MSR_REFERENCE_TSC, val) +#define hv_set_reference_tsc(val) \ + wrmsrl(HV_X64_MSR_REFERENCE_TSC, val) +#define hv_set_clocksource_vdso(val) \ + ((val).archdata.vclock_mode = VCLOCK_HVCLOCK) +#define hv_get_raw_timer() rdtsc_ordered() + void hyperv_callback_vector(void); void hyperv_reenlightenment_vector(void); #ifdef CONFIG_TRACING @@ -133,7 +144,6 @@ static inline void hv_disable_stimer0_percpu_irq(int irq) {} #if IS_ENABLED(CONFIG_HYPERV) -extern struct clocksource *hyperv_cs; extern void *hv_hypercall_pg; extern void __percpu **hyperv_pcpu_input_arg; @@ -387,73 +397,4 @@ static inline int hyperv_flush_guest_mapping_range(u64 as, } #endif /* CONFIG_HYPERV */ -#ifdef CONFIG_HYPERV_TSCPAGE -struct ms_hyperv_tsc_page *hv_get_tsc_page(void); -static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg, - u64 *cur_tsc) -{ - u64 scale, offset; - u32 sequence; - - /* - * The protocol for reading Hyper-V TSC page is specified in Hypervisor - * Top-Level Functional Specification ver. 3.0 and above. To get the - * reference time we must do the following: - * - READ ReferenceTscSequence - * A special '0' value indicates the time source is unreliable and we - * need to use something else. The currently published specification - * versions (up to 4.0b) contain a mistake and wrongly claim '-1' - * instead of '0' as the special value, see commit c35b82ef0294. - * - ReferenceTime = - * ((RDTSC() * ReferenceTscScale) >> 64) + ReferenceTscOffset - * - READ ReferenceTscSequence again. In case its value has changed - * since our first reading we need to discard ReferenceTime and repeat - * the whole sequence as the hypervisor was updating the page in - * between. - */ - do { - sequence = READ_ONCE(tsc_pg->tsc_sequence); - if (!sequence) - return U64_MAX; - /* - * Make sure we read sequence before we read other values from - * TSC page. - */ - smp_rmb(); - - scale = READ_ONCE(tsc_pg->tsc_scale); - offset = READ_ONCE(tsc_pg->tsc_offset); - *cur_tsc = rdtsc_ordered(); - - /* - * Make sure we read sequence after we read all other values - * from TSC page. - */ - smp_rmb(); - - } while (READ_ONCE(tsc_pg->tsc_sequence) != sequence); - - return mul_u64_u64_shr(*cur_tsc, scale, 64) + offset; -} - -static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg) -{ - u64 cur_tsc; - - return hv_read_tsc_page_tsc(tsc_pg, &cur_tsc); -} - -#else -static inline struct ms_hyperv_tsc_page *hv_get_tsc_page(void) -{ - return NULL; -} - -static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg, - u64 *cur_tsc) -{ - BUG(); - return U64_MAX; -} -#endif #endif diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 979ef971cc78..6b4fc2788078 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -61,6 +61,15 @@ #define MSR_PLATFORM_INFO_CPUID_FAULT_BIT 31 #define MSR_PLATFORM_INFO_CPUID_FAULT BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT) +#define MSR_IA32_UMWAIT_CONTROL 0xe1 +#define MSR_IA32_UMWAIT_CONTROL_C02_DISABLE BIT(0) +#define MSR_IA32_UMWAIT_CONTROL_RESERVED BIT(1) +/* + * The time field is bit[31:2], but representing a 32bit value with + * bit[1:0] zero. + */ +#define MSR_IA32_UMWAIT_CONTROL_TIME_MASK (~0x03U) + #define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2 #define NHM_C3_AUTO_DEMOTE (1UL << 25) #define NHM_C1_AUTO_DEMOTE (1UL << 26) diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index eb0f80ce8524..e28f8b723b5c 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -86,9 +86,9 @@ static inline void __mwaitx(unsigned long eax, unsigned long ebx, static inline void __sti_mwait(unsigned long eax, unsigned long ecx) { - mds_idle_clear_cpu_buffers(); - trace_hardirqs_on(); + + mds_idle_clear_cpu_buffers(); /* "mwait %eax, %ecx;" */ asm volatile("sti; .byte 0x0f, 0x01, 0xc9;" :: "a" (eax), "c" (ecx)); diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 1a19d11cfbbd..2278797c769d 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -87,7 +87,7 @@ * don't give an lvalue though). */ extern void __bad_percpu_size(void); -#define percpu_to_op(op, var, val) \ +#define percpu_to_op(qual, op, var, val) \ do { \ typedef typeof(var) pto_T__; \ if (0) { \ @@ -97,22 +97,22 @@ do { \ } \ switch (sizeof(var)) { \ case 1: \ - asm(op "b %1,"__percpu_arg(0) \ + asm qual (op "b %1,"__percpu_arg(0) \ : "+m" (var) \ : "qi" ((pto_T__)(val))); \ break; \ case 2: \ - asm(op "w %1,"__percpu_arg(0) \ + asm qual (op "w %1,"__percpu_arg(0) \ : "+m" (var) \ : "ri" ((pto_T__)(val))); \ break; \ case 4: \ - asm(op "l %1,"__percpu_arg(0) \ + asm qual (op "l %1,"__percpu_arg(0) \ : "+m" (var) \ : "ri" ((pto_T__)(val))); \ break; \ case 8: \ - asm(op "q %1,"__percpu_arg(0) \ + asm qual (op "q %1,"__percpu_arg(0) \ : "+m" (var) \ : "re" ((pto_T__)(val))); \ break; \ @@ -124,7 +124,7 @@ do { \ * Generate a percpu add to memory instruction and optimize code * if one is added or subtracted. */ -#define percpu_add_op(var, val) \ +#define percpu_add_op(qual, var, val) \ do { \ typedef typeof(var) pao_T__; \ const int pao_ID__ = (__builtin_constant_p(val) && \ @@ -138,41 +138,41 @@ do { \ switch (sizeof(var)) { \ case 1: \ if (pao_ID__ == 1) \ - asm("incb "__percpu_arg(0) : "+m" (var)); \ + asm qual ("incb "__percpu_arg(0) : "+m" (var)); \ else if (pao_ID__ == -1) \ - asm("decb "__percpu_arg(0) : "+m" (var)); \ + asm qual ("decb "__percpu_arg(0) : "+m" (var)); \ else \ - asm("addb %1, "__percpu_arg(0) \ + asm qual ("addb %1, "__percpu_arg(0) \ : "+m" (var) \ : "qi" ((pao_T__)(val))); \ break; \ case 2: \ if (pao_ID__ == 1) \ - asm("incw "__percpu_arg(0) : "+m" (var)); \ + asm qual ("incw "__percpu_arg(0) : "+m" (var)); \ else if (pao_ID__ == -1) \ - asm("decw "__percpu_arg(0) : "+m" (var)); \ + asm qual ("decw "__percpu_arg(0) : "+m" (var)); \ else \ - asm("addw %1, "__percpu_arg(0) \ + asm qual ("addw %1, "__percpu_arg(0) \ : "+m" (var) \ : "ri" ((pao_T__)(val))); \ break; \ case 4: \ if (pao_ID__ == 1) \ - asm("incl "__percpu_arg(0) : "+m" (var)); \ + asm qual ("incl "__percpu_arg(0) : "+m" (var)); \ else if (pao_ID__ == -1) \ - asm("decl "__percpu_arg(0) : "+m" (var)); \ + asm qual ("decl "__percpu_arg(0) : "+m" (var)); \ else \ - asm("addl %1, "__percpu_arg(0) \ + asm qual ("addl %1, "__percpu_arg(0) \ : "+m" (var) \ : "ri" ((pao_T__)(val))); \ break; \ case 8: \ if (pao_ID__ == 1) \ - asm("incq "__percpu_arg(0) : "+m" (var)); \ + asm qual ("incq "__percpu_arg(0) : "+m" (var)); \ else if (pao_ID__ == -1) \ - asm("decq "__percpu_arg(0) : "+m" (var)); \ + asm qual ("decq "__percpu_arg(0) : "+m" (var)); \ else \ - asm("addq %1, "__percpu_arg(0) \ + asm qual ("addq %1, "__percpu_arg(0) \ : "+m" (var) \ : "re" ((pao_T__)(val))); \ break; \ @@ -180,27 +180,27 @@ do { \ } \ } while (0) -#define percpu_from_op(op, var) \ +#define percpu_from_op(qual, op, var) \ ({ \ typeof(var) pfo_ret__; \ switch (sizeof(var)) { \ case 1: \ - asm volatile(op "b "__percpu_arg(1)",%0"\ + asm qual (op "b "__percpu_arg(1)",%0" \ : "=q" (pfo_ret__) \ : "m" (var)); \ break; \ case 2: \ - asm volatile(op "w "__percpu_arg(1)",%0"\ + asm qual (op "w "__percpu_arg(1)",%0" \ : "=r" (pfo_ret__) \ : "m" (var)); \ break; \ case 4: \ - asm volatile(op "l "__percpu_arg(1)",%0"\ + asm qual (op "l "__percpu_arg(1)",%0" \ : "=r" (pfo_ret__) \ : "m" (var)); \ break; \ case 8: \ - asm volatile(op "q "__percpu_arg(1)",%0"\ + asm qual (op "q "__percpu_arg(1)",%0" \ : "=r" (pfo_ret__) \ : "m" (var)); \ break; \ @@ -238,23 +238,23 @@ do { \ pfo_ret__; \ }) -#define percpu_unary_op(op, var) \ +#define percpu_unary_op(qual, op, var) \ ({ \ switch (sizeof(var)) { \ case 1: \ - asm(op "b "__percpu_arg(0) \ + asm qual (op "b "__percpu_arg(0) \ : "+m" (var)); \ break; \ case 2: \ - asm(op "w "__percpu_arg(0) \ + asm qual (op "w "__percpu_arg(0) \ : "+m" (var)); \ break; \ case 4: \ - asm(op "l "__percpu_arg(0) \ + asm qual (op "l "__percpu_arg(0) \ : "+m" (var)); \ break; \ case 8: \ - asm(op "q "__percpu_arg(0) \ + asm qual (op "q "__percpu_arg(0) \ : "+m" (var)); \ break; \ default: __bad_percpu_size(); \ @@ -264,27 +264,27 @@ do { \ /* * Add return operation */ -#define percpu_add_return_op(var, val) \ +#define percpu_add_return_op(qual, var, val) \ ({ \ typeof(var) paro_ret__ = val; \ switch (sizeof(var)) { \ case 1: \ - asm("xaddb %0, "__percpu_arg(1) \ + asm qual ("xaddb %0, "__percpu_arg(1) \ : "+q" (paro_ret__), "+m" (var) \ : : "memory"); \ break; \ case 2: \ - asm("xaddw %0, "__percpu_arg(1) \ + asm qual ("xaddw %0, "__percpu_arg(1) \ : "+r" (paro_ret__), "+m" (var) \ : : "memory"); \ break; \ case 4: \ - asm("xaddl %0, "__percpu_arg(1) \ + asm qual ("xaddl %0, "__percpu_arg(1) \ : "+r" (paro_ret__), "+m" (var) \ : : "memory"); \ break; \ case 8: \ - asm("xaddq %0, "__percpu_arg(1) \ + asm qual ("xaddq %0, "__percpu_arg(1) \ : "+re" (paro_ret__), "+m" (var) \ : : "memory"); \ break; \ @@ -299,13 +299,13 @@ do { \ * expensive due to the implied lock prefix. The processor cannot prefetch * cachelines if xchg is used. */ -#define percpu_xchg_op(var, nval) \ +#define percpu_xchg_op(qual, var, nval) \ ({ \ typeof(var) pxo_ret__; \ typeof(var) pxo_new__ = (nval); \ switch (sizeof(var)) { \ case 1: \ - asm("\n\tmov "__percpu_arg(1)",%%al" \ + asm qual ("\n\tmov "__percpu_arg(1)",%%al" \ "\n1:\tcmpxchgb %2, "__percpu_arg(1) \ "\n\tjnz 1b" \ : "=&a" (pxo_ret__), "+m" (var) \ @@ -313,7 +313,7 @@ do { \ : "memory"); \ break; \ case 2: \ - asm("\n\tmov "__percpu_arg(1)",%%ax" \ + asm qual ("\n\tmov "__percpu_arg(1)",%%ax" \ "\n1:\tcmpxchgw %2, "__percpu_arg(1) \ "\n\tjnz 1b" \ : "=&a" (pxo_ret__), "+m" (var) \ @@ -321,7 +321,7 @@ do { \ : "memory"); \ break; \ case 4: \ - asm("\n\tmov "__percpu_arg(1)",%%eax" \ + asm qual ("\n\tmov "__percpu_arg(1)",%%eax" \ "\n1:\tcmpxchgl %2, "__percpu_arg(1) \ "\n\tjnz 1b" \ : "=&a" (pxo_ret__), "+m" (var) \ @@ -329,7 +329,7 @@ do { \ : "memory"); \ break; \ case 8: \ - asm("\n\tmov "__percpu_arg(1)",%%rax" \ + asm qual ("\n\tmov "__percpu_arg(1)",%%rax" \ "\n1:\tcmpxchgq %2, "__percpu_arg(1) \ "\n\tjnz 1b" \ : "=&a" (pxo_ret__), "+m" (var) \ @@ -345,32 +345,32 @@ do { \ * cmpxchg has no such implied lock semantics as a result it is much * more efficient for cpu local operations. */ -#define percpu_cmpxchg_op(var, oval, nval) \ +#define percpu_cmpxchg_op(qual, var, oval, nval) \ ({ \ typeof(var) pco_ret__; \ typeof(var) pco_old__ = (oval); \ typeof(var) pco_new__ = (nval); \ switch (sizeof(var)) { \ case 1: \ - asm("cmpxchgb %2, "__percpu_arg(1) \ + asm qual ("cmpxchgb %2, "__percpu_arg(1) \ : "=a" (pco_ret__), "+m" (var) \ : "q" (pco_new__), "0" (pco_old__) \ : "memory"); \ break; \ case 2: \ - asm("cmpxchgw %2, "__percpu_arg(1) \ + asm qual ("cmpxchgw %2, "__percpu_arg(1) \ : "=a" (pco_ret__), "+m" (var) \ : "r" (pco_new__), "0" (pco_old__) \ : "memory"); \ break; \ case 4: \ - asm("cmpxchgl %2, "__percpu_arg(1) \ + asm qual ("cmpxchgl %2, "__percpu_arg(1) \ : "=a" (pco_ret__), "+m" (var) \ : "r" (pco_new__), "0" (pco_old__) \ : "memory"); \ break; \ case 8: \ - asm("cmpxchgq %2, "__percpu_arg(1) \ + asm qual ("cmpxchgq %2, "__percpu_arg(1) \ : "=a" (pco_ret__), "+m" (var) \ : "r" (pco_new__), "0" (pco_old__) \ : "memory"); \ @@ -391,58 +391,70 @@ do { \ */ #define this_cpu_read_stable(var) percpu_stable_op("mov", var) -#define raw_cpu_read_1(pcp) percpu_from_op("mov", pcp) -#define raw_cpu_read_2(pcp) percpu_from_op("mov", pcp) -#define raw_cpu_read_4(pcp) percpu_from_op("mov", pcp) - -#define raw_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) -#define raw_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) -#define raw_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) -#define raw_cpu_add_1(pcp, val) percpu_add_op((pcp), val) -#define raw_cpu_add_2(pcp, val) percpu_add_op((pcp), val) -#define raw_cpu_add_4(pcp, val) percpu_add_op((pcp), val) -#define raw_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) -#define raw_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) -#define raw_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) -#define raw_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val) -#define raw_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val) -#define raw_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val) -#define raw_cpu_xchg_1(pcp, val) percpu_xchg_op(pcp, val) -#define raw_cpu_xchg_2(pcp, val) percpu_xchg_op(pcp, val) -#define raw_cpu_xchg_4(pcp, val) percpu_xchg_op(pcp, val) - -#define this_cpu_read_1(pcp) percpu_from_op("mov", pcp) -#define this_cpu_read_2(pcp) percpu_from_op("mov", pcp) -#define this_cpu_read_4(pcp) percpu_from_op("mov", pcp) -#define this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) -#define this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) -#define this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) -#define this_cpu_add_1(pcp, val) percpu_add_op((pcp), val) -#define this_cpu_add_2(pcp, val) percpu_add_op((pcp), val) -#define this_cpu_add_4(pcp, val) percpu_add_op((pcp), val) -#define this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) -#define this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) -#define this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) -#define this_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val) -#define this_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val) -#define this_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val) -#define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval) -#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval) -#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval) - -#define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) -#define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) -#define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) -#define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) -#define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) -#define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) - -#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) -#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) -#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) -#define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) -#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) -#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) +#define raw_cpu_read_1(pcp) percpu_from_op(, "mov", pcp) +#define raw_cpu_read_2(pcp) percpu_from_op(, "mov", pcp) +#define raw_cpu_read_4(pcp) percpu_from_op(, "mov", pcp) + +#define raw_cpu_write_1(pcp, val) percpu_to_op(, "mov", (pcp), val) +#define raw_cpu_write_2(pcp, val) percpu_to_op(, "mov", (pcp), val) +#define raw_cpu_write_4(pcp, val) percpu_to_op(, "mov", (pcp), val) +#define raw_cpu_add_1(pcp, val) percpu_add_op(, (pcp), val) +#define raw_cpu_add_2(pcp, val) percpu_add_op(, (pcp), val) +#define raw_cpu_add_4(pcp, val) percpu_add_op(, (pcp), val) +#define raw_cpu_and_1(pcp, val) percpu_to_op(, "and", (pcp), val) +#define raw_cpu_and_2(pcp, val) percpu_to_op(, "and", (pcp), val) +#define raw_cpu_and_4(pcp, val) percpu_to_op(, "and", (pcp), val) +#define raw_cpu_or_1(pcp, val) percpu_to_op(, "or", (pcp), val) +#define raw_cpu_or_2(pcp, val) percpu_to_op(, "or", (pcp), val) +#define raw_cpu_or_4(pcp, val) percpu_to_op(, "or", (pcp), val) + +/* + * raw_cpu_xchg() can use a load-store since it is not required to be + * IRQ-safe. + */ +#define raw_percpu_xchg_op(var, nval) \ +({ \ + typeof(var) pxo_ret__ = raw_cpu_read(var); \ + raw_cpu_write(var, (nval)); \ + pxo_ret__; \ +}) + +#define raw_cpu_xchg_1(pcp, val) raw_percpu_xchg_op(pcp, val) +#define raw_cpu_xchg_2(pcp, val) raw_percpu_xchg_op(pcp, val) +#define raw_cpu_xchg_4(pcp, val) raw_percpu_xchg_op(pcp, val) + +#define this_cpu_read_1(pcp) percpu_from_op(volatile, "mov", pcp) +#define this_cpu_read_2(pcp) percpu_from_op(volatile, "mov", pcp) +#define this_cpu_read_4(pcp) percpu_from_op(volatile, "mov", pcp) +#define this_cpu_write_1(pcp, val) percpu_to_op(volatile, "mov", (pcp), val) +#define this_cpu_write_2(pcp, val) percpu_to_op(volatile, "mov", (pcp), val) +#define this_cpu_write_4(pcp, val) percpu_to_op(volatile, "mov", (pcp), val) +#define this_cpu_add_1(pcp, val) percpu_add_op(volatile, (pcp), val) +#define this_cpu_add_2(pcp, val) percpu_add_op(volatile, (pcp), val) +#define this_cpu_add_4(pcp, val) percpu_add_op(volatile, (pcp), val) +#define this_cpu_and_1(pcp, val) percpu_to_op(volatile, "and", (pcp), val) +#define this_cpu_and_2(pcp, val) percpu_to_op(volatile, "and", (pcp), val) +#define this_cpu_and_4(pcp, val) percpu_to_op(volatile, "and", (pcp), val) +#define this_cpu_or_1(pcp, val) percpu_to_op(volatile, "or", (pcp), val) +#define this_cpu_or_2(pcp, val) percpu_to_op(volatile, "or", (pcp), val) +#define this_cpu_or_4(pcp, val) percpu_to_op(volatile, "or", (pcp), val) +#define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(volatile, pcp, nval) +#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(volatile, pcp, nval) +#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(volatile, pcp, nval) + +#define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(, pcp, val) +#define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(, pcp, val) +#define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(, pcp, val) +#define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval) +#define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval) +#define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval) + +#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(volatile, pcp, val) +#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(volatile, pcp, val) +#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(volatile, pcp, val) +#define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval) +#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval) +#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval) #ifdef CONFIG_X86_CMPXCHG64 #define percpu_cmpxchg8b_double(pcp1, pcp2, o1, o2, n1, n2) \ @@ -466,23 +478,23 @@ do { \ * 32 bit must fall back to generic operations. */ #ifdef CONFIG_X86_64 -#define raw_cpu_read_8(pcp) percpu_from_op("mov", pcp) -#define raw_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) -#define raw_cpu_add_8(pcp, val) percpu_add_op((pcp), val) -#define raw_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) -#define raw_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) -#define raw_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) -#define raw_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) -#define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) - -#define this_cpu_read_8(pcp) percpu_from_op("mov", pcp) -#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) -#define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val) -#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) -#define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) -#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) -#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) -#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) +#define raw_cpu_read_8(pcp) percpu_from_op(, "mov", pcp) +#define raw_cpu_write_8(pcp, val) percpu_to_op(, "mov", (pcp), val) +#define raw_cpu_add_8(pcp, val) percpu_add_op(, (pcp), val) +#define raw_cpu_and_8(pcp, val) percpu_to_op(, "and", (pcp), val) +#define raw_cpu_or_8(pcp, val) percpu_to_op(, "or", (pcp), val) +#define raw_cpu_add_return_8(pcp, val) percpu_add_return_op(, pcp, val) +#define raw_cpu_xchg_8(pcp, nval) raw_percpu_xchg_op(pcp, nval) +#define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval) + +#define this_cpu_read_8(pcp) percpu_from_op(volatile, "mov", pcp) +#define this_cpu_write_8(pcp, val) percpu_to_op(volatile, "mov", (pcp), val) +#define this_cpu_add_8(pcp, val) percpu_add_op(volatile, (pcp), val) +#define this_cpu_and_8(pcp, val) percpu_to_op(volatile, "and", (pcp), val) +#define this_cpu_or_8(pcp, val) percpu_to_op(volatile, "or", (pcp), val) +#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(volatile, pcp, val) +#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(volatile, pcp, nval) +#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval) /* * Pretty complex macro to generate cmpxchg16 instruction. The instruction diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c34a35c78618..e57d2ca2ed87 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -144,7 +144,8 @@ enum cpuid_regs_idx { #define X86_VENDOR_TRANSMETA 7 #define X86_VENDOR_NSC 8 #define X86_VENDOR_HYGON 9 -#define X86_VENDOR_NUM 10 +#define X86_VENDOR_ZHAOXIN 10 +#define X86_VENDOR_NUM 11 #define X86_VENDOR_UNKNOWN 0xff diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index b6033680d458..19b695ff2c68 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_PVCLOCK_H #define _ASM_X86_PVCLOCK_H -#include <linux/clocksource.h> +#include <asm/clocksource.h> #include <asm/pvclock-abi.h> /* some helper functions for xen and kvm pv clock sources */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index da545df207b2..0d3fe060a44f 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -162,7 +162,8 @@ __visible void smp_call_function_single_interrupt(struct pt_regs *r); * from the initial startup. We map APIC_BASE very early in page_setup(), * so this is correct in the x86 case. */ -#define raw_smp_processor_id() (this_cpu_read(cpu_number)) +#define raw_smp_processor_id() this_cpu_read(cpu_number) +#define __smp_processor_id() __this_cpu_read(cpu_number) #ifdef CONFIG_X86_32 extern int safe_smp_processor_id(void); diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index 880b5515b1d6..d83e9f771d86 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -18,6 +18,20 @@ static inline void apply_paravirt(struct paravirt_patch_site *start, #define __parainstructions_end NULL #endif +/* + * Currently, the max observed size in the kernel code is + * JUMP_LABEL_NOP_SIZE/RELATIVEJUMP_SIZE, which are 5. + * Raise it if needed. + */ +#define POKE_MAX_OPCODE_SIZE 5 + +struct text_poke_loc { + void *detour; + void *addr; + size_t len; + const char opcode[POKE_MAX_OPCODE_SIZE]; +}; + extern void text_poke_early(void *addr, const void *opcode, size_t len); /* @@ -38,6 +52,7 @@ extern void *text_poke(void *addr, const void *opcode, size_t len); extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len); extern int poke_int3_handler(struct pt_regs *regs); extern void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler); +extern void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries); extern int after_bootmem; extern __ro_after_init struct mm_struct *poking_mm; extern __ro_after_init unsigned long poking_addr; diff --git a/arch/x86/include/asm/time.h b/arch/x86/include/asm/time.h index cef818b16045..8ac563abb567 100644 --- a/arch/x86/include/asm/time.h +++ b/arch/x86/include/asm/time.h @@ -7,6 +7,7 @@ extern void hpet_time_init(void); extern void time_init(void); +extern bool pit_timer_init(void); extern struct clock_event_device *global_clock_event; diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h new file mode 100644 index 000000000000..ae91429129a6 --- /dev/null +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -0,0 +1,261 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Fast user context implementation of clock_gettime, gettimeofday, and time. + * + * Copyright (C) 2019 ARM Limited. + * Copyright 2006 Andi Kleen, SUSE Labs. + * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net> + * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany + */ +#ifndef __ASM_VDSO_GETTIMEOFDAY_H +#define __ASM_VDSO_GETTIMEOFDAY_H + +#ifndef __ASSEMBLY__ + +#include <uapi/linux/time.h> +#include <asm/vgtod.h> +#include <asm/vvar.h> +#include <asm/unistd.h> +#include <asm/msr.h> +#include <asm/pvclock.h> +#include <clocksource/hyperv_timer.h> + +#define __vdso_data (VVAR(_vdso_data)) + +#define VDSO_HAS_TIME 1 + +#define VDSO_HAS_CLOCK_GETRES 1 + +/* + * Declare the memory-mapped vclock data pages. These come from hypervisors. + * If we ever reintroduce something like direct access to an MMIO clock like + * the HPET again, it will go here as well. + * + * A load from any of these pages will segfault if the clock in question is + * disabled, so appropriate compiler barriers and checks need to be used + * to prevent stray loads. + * + * These declarations MUST NOT be const. The compiler will assume that + * an extern const variable has genuinely constant contents, and the + * resulting code won't work, since the whole point is that these pages + * change over time, possibly while we're accessing them. + */ + +#ifdef CONFIG_PARAVIRT_CLOCK +/* + * This is the vCPU 0 pvclock page. We only use pvclock from the vDSO + * if the hypervisor tells us that all vCPUs can get valid data from the + * vCPU 0 page. + */ +extern struct pvclock_vsyscall_time_info pvclock_page + __attribute__((visibility("hidden"))); +#endif + +#ifdef CONFIG_HYPERV_TSCPAGE +extern struct ms_hyperv_tsc_page hvclock_page + __attribute__((visibility("hidden"))); +#endif + +#ifndef BUILD_VDSO32 + +static __always_inline +long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + long ret; + + asm ("syscall" : "=a" (ret), "=m" (*_ts) : + "0" (__NR_clock_gettime), "D" (_clkid), "S" (_ts) : + "rcx", "r11"); + + return ret; +} + +static __always_inline +long gettimeofday_fallback(struct __kernel_old_timeval *_tv, + struct timezone *_tz) +{ + long ret; + + asm("syscall" : "=a" (ret) : + "0" (__NR_gettimeofday), "D" (_tv), "S" (_tz) : "memory"); + + return ret; +} + +static __always_inline +long clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + long ret; + + asm ("syscall" : "=a" (ret), "=m" (*_ts) : + "0" (__NR_clock_getres), "D" (_clkid), "S" (_ts) : + "rcx", "r11"); + + return ret; +} + +#else + +static __always_inline +long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + long ret; + + asm ( + "mov %%ebx, %%edx \n" + "mov %[clock], %%ebx \n" + "call __kernel_vsyscall \n" + "mov %%edx, %%ebx \n" + : "=a" (ret), "=m" (*_ts) + : "0" (__NR_clock_gettime64), [clock] "g" (_clkid), "c" (_ts) + : "edx"); + + return ret; +} + +static __always_inline +long gettimeofday_fallback(struct __kernel_old_timeval *_tv, + struct timezone *_tz) +{ + long ret; + + asm( + "mov %%ebx, %%edx \n" + "mov %2, %%ebx \n" + "call __kernel_vsyscall \n" + "mov %%edx, %%ebx \n" + : "=a" (ret) + : "0" (__NR_gettimeofday), "g" (_tv), "c" (_tz) + : "memory", "edx"); + + return ret; +} + +static __always_inline long +clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + long ret; + + asm ( + "mov %%ebx, %%edx \n" + "mov %[clock], %%ebx \n" + "call __kernel_vsyscall \n" + "mov %%edx, %%ebx \n" + : "=a" (ret), "=m" (*_ts) + : "0" (__NR_clock_getres_time64), [clock] "g" (_clkid), "c" (_ts) + : "edx"); + + return ret; +} + +#endif + +#ifdef CONFIG_PARAVIRT_CLOCK +static u64 vread_pvclock(void) +{ + const struct pvclock_vcpu_time_info *pvti = &pvclock_page.pvti; + u32 version; + u64 ret; + + /* + * Note: The kernel and hypervisor must guarantee that cpu ID + * number maps 1:1 to per-CPU pvclock time info. + * + * Because the hypervisor is entirely unaware of guest userspace + * preemption, it cannot guarantee that per-CPU pvclock time + * info is updated if the underlying CPU changes or that that + * version is increased whenever underlying CPU changes. + * + * On KVM, we are guaranteed that pvti updates for any vCPU are + * atomic as seen by *all* vCPUs. This is an even stronger + * guarantee than we get with a normal seqlock. + * + * On Xen, we don't appear to have that guarantee, but Xen still + * supplies a valid seqlock using the version field. + * + * We only do pvclock vdso timing at all if + * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to + * mean that all vCPUs have matching pvti and that the TSC is + * synced, so we can just look at vCPU 0's pvti. + */ + + do { + version = pvclock_read_begin(pvti); + + if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) + return U64_MAX; + + ret = __pvclock_read_cycles(pvti, rdtsc_ordered()); + } while (pvclock_read_retry(pvti, version)); + + return ret; +} +#endif + +#ifdef CONFIG_HYPERV_TSCPAGE +static u64 vread_hvclock(void) +{ + return hv_read_tsc_page(&hvclock_page); +} +#endif + +static inline u64 __arch_get_hw_counter(s32 clock_mode) +{ + if (clock_mode == VCLOCK_TSC) + return (u64)rdtsc_ordered(); + /* + * For any memory-mapped vclock type, we need to make sure that gcc + * doesn't cleverly hoist a load before the mode check. Otherwise we + * might end up touching the memory-mapped page even if the vclock in + * question isn't enabled, which will segfault. Hence the barriers. + */ +#ifdef CONFIG_PARAVIRT_CLOCK + if (clock_mode == VCLOCK_PVCLOCK) { + barrier(); + return vread_pvclock(); + } +#endif +#ifdef CONFIG_HYPERV_TSCPAGE + if (clock_mode == VCLOCK_HVCLOCK) { + barrier(); + return vread_hvclock(); + } +#endif + return U64_MAX; +} + +static __always_inline const struct vdso_data *__arch_get_vdso_data(void) +{ + return __vdso_data; +} + +/* + * x86 specific delta calculation. + * + * The regular implementation assumes that clocksource reads are globally + * monotonic. The TSC can be slightly off across sockets which can cause + * the regular delta calculation (@cycles - @last) to return a huge time + * jump. + * + * Therefore it needs to be verified that @cycles are greater than + * @last. If not then use @last, which is the base time of the current + * conversion period. + * + * This variant also removes the masking of the subtraction because the + * clocksource mask of all VDSO capable clocksources on x86 is U64_MAX + * which would result in a pointless operation. The compiler cannot + * optimize it away as the mask comes from the vdso data and is not compile + * time constant. + */ +static __always_inline +u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult) +{ + if (cycles > last) + return (cycles - last) * mult; + return 0; +} +#define vdso_calc_delta vdso_calc_delta + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/x86/include/asm/vdso/vsyscall.h b/arch/x86/include/asm/vdso/vsyscall.h new file mode 100644 index 000000000000..0026ab2123ce --- /dev/null +++ b/arch/x86/include/asm/vdso/vsyscall.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_VDSO_VSYSCALL_H +#define __ASM_VDSO_VSYSCALL_H + +#ifndef __ASSEMBLY__ + +#include <linux/hrtimer.h> +#include <linux/timekeeper_internal.h> +#include <vdso/datapage.h> +#include <asm/vgtod.h> +#include <asm/vvar.h> + +int vclocks_used __read_mostly; + +DEFINE_VVAR(struct vdso_data, _vdso_data); +/* + * Update the vDSO data page to keep in sync with kernel timekeeping. + */ +static __always_inline +struct vdso_data *__x86_get_k_vdso_data(void) +{ + return _vdso_data; +} +#define __arch_get_k_vdso_data __x86_get_k_vdso_data + +static __always_inline +int __x86_get_clock_mode(struct timekeeper *tk) +{ + int vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode; + + /* Mark the new vclock used. */ + BUILD_BUG_ON(VCLOCK_MAX >= 32); + WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << vclock_mode)); + + return vclock_mode; +} +#define __arch_get_clock_mode __x86_get_clock_mode + +/* The asm-generic header needs to be included after the definitions above */ +#include <asm-generic/vdso/vsyscall.h> + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_VSYSCALL_H */ diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 913a133f8e6f..a2638c6124ed 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -3,7 +3,9 @@ #define _ASM_X86_VGTOD_H #include <linux/compiler.h> -#include <linux/clocksource.h> +#include <asm/clocksource.h> +#include <vdso/datapage.h> +#include <vdso/helpers.h> #include <uapi/linux/time.h> @@ -13,81 +15,10 @@ typedef u64 gtod_long_t; typedef unsigned long gtod_long_t; #endif -/* - * There is one of these objects in the vvar page for each - * vDSO-accelerated clockid. For high-resolution clocks, this encodes - * the time corresponding to vsyscall_gtod_data.cycle_last. For coarse - * clocks, this encodes the actual time. - * - * To confuse the reader, for high-resolution clocks, nsec is left-shifted - * by vsyscall_gtod_data.shift. - */ -struct vgtod_ts { - u64 sec; - u64 nsec; -}; - -#define VGTOD_BASES (CLOCK_TAI + 1) -#define VGTOD_HRES (BIT(CLOCK_REALTIME) | BIT(CLOCK_MONOTONIC) | BIT(CLOCK_TAI)) -#define VGTOD_COARSE (BIT(CLOCK_REALTIME_COARSE) | BIT(CLOCK_MONOTONIC_COARSE)) - -/* - * vsyscall_gtod_data will be accessed by 32 and 64 bit code at the same time - * so be carefull by modifying this structure. - */ -struct vsyscall_gtod_data { - unsigned int seq; - - int vclock_mode; - u64 cycle_last; - u64 mask; - u32 mult; - u32 shift; - - struct vgtod_ts basetime[VGTOD_BASES]; - - int tz_minuteswest; - int tz_dsttime; -}; -extern struct vsyscall_gtod_data vsyscall_gtod_data; - extern int vclocks_used; static inline bool vclock_was_used(int vclock) { return READ_ONCE(vclocks_used) & (1 << vclock); } -static inline unsigned int gtod_read_begin(const struct vsyscall_gtod_data *s) -{ - unsigned int ret; - -repeat: - ret = READ_ONCE(s->seq); - if (unlikely(ret & 1)) { - cpu_relax(); - goto repeat; - } - smp_rmb(); - return ret; -} - -static inline int gtod_read_retry(const struct vsyscall_gtod_data *s, - unsigned int start) -{ - smp_rmb(); - return unlikely(s->seq != start); -} - -static inline void gtod_write_begin(struct vsyscall_gtod_data *s) -{ - ++s->seq; - smp_wmb(); -} - -static inline void gtod_write_end(struct vsyscall_gtod_data *s) -{ - smp_wmb(); - ++s->seq; -} - #endif /* _ASM_X86_VGTOD_H */ diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h index b986b2ca688a..ab60a71a8dcb 100644 --- a/arch/x86/include/asm/vsyscall.h +++ b/arch/x86/include/asm/vsyscall.h @@ -13,10 +13,12 @@ extern void set_vsyscall_pgtable_user_bits(pgd_t *root); * Called on instruction fetch fault in vsyscall page. * Returns true if handled. */ -extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address); +extern bool emulate_vsyscall(unsigned long error_code, + struct pt_regs *regs, unsigned long address); #else static inline void map_vsyscall(void) {} -static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) +static inline bool emulate_vsyscall(unsigned long error_code, + struct pt_regs *regs, unsigned long address) { return false; } diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h index e474f5c6e387..32f5d9a0b90e 100644 --- a/arch/x86/include/asm/vvar.h +++ b/arch/x86/include/asm/vvar.h @@ -32,19 +32,20 @@ extern char __vvar_page; #define DECLARE_VVAR(offset, type, name) \ - extern type vvar_ ## name __attribute__((visibility("hidden"))); + extern type vvar_ ## name[CS_BASES] \ + __attribute__((visibility("hidden"))); #define VVAR(name) (vvar_ ## name) #define DEFINE_VVAR(type, name) \ - type name \ + type name[CS_BASES] \ __attribute__((section(".vvar_" #name), aligned(16))) __visible #endif /* DECLARE_VVAR(offset, type, name) */ -DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data) +DECLARE_VVAR(128, struct vdso_data, _vdso_data) #undef DECLARE_VVAR diff --git a/arch/x86/include/uapi/asm/perf_regs.h b/arch/x86/include/uapi/asm/perf_regs.h index ac67bbea10ca..7c9d2bb3833b 100644 --- a/arch/x86/include/uapi/asm/perf_regs.h +++ b/arch/x86/include/uapi/asm/perf_regs.h @@ -52,4 +52,7 @@ enum perf_event_x86_regs { /* These include both GPRs and XMMX registers */ PERF_REG_X86_XMM_MAX = PERF_REG_X86_XMM15 + 2, }; + +#define PERF_REG_EXTENDED_MASK (~((1ULL << PERF_REG_X86_XMM0) - 1)) + #endif /* _ASM_X86_PERF_REGS_H */ diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index a5e5484988fd..caf2edccbad2 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -64,6 +64,21 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags, c->x86_stepping >= 0x0e)) flags->bm_check = 1; } + + if (c->x86_vendor == X86_VENDOR_ZHAOXIN) { + /* + * All Zhaoxin CPUs that support C3 share cache. + * And caches should not be flushed by software while + * entering C3 type state. + */ + flags->bm_check = 1; + /* + * On all recent Zhaoxin platforms, ARB_DISABLE is a nop. + * So, set bm_control to zero to indicate that ARB_DISABLE + * is not required while entering C3 type state. + */ + flags->bm_control = 0; + } } EXPORT_SYMBOL(acpi_processor_power_init_bm_check); diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 390596b761e3..bd542f9b0953 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -14,6 +14,7 @@ #include <linux/kdebug.h> #include <linux/kprobes.h> #include <linux/mmu_context.h> +#include <linux/bsearch.h> #include <asm/text-patching.h> #include <asm/alternative.h> #include <asm/sections.h> @@ -848,81 +849,133 @@ static void do_sync_core(void *info) sync_core(); } -static bool bp_patching_in_progress; -static void *bp_int3_handler, *bp_int3_addr; +static struct bp_patching_desc { + struct text_poke_loc *vec; + int nr_entries; +} bp_patching; + +static int patch_cmp(const void *key, const void *elt) +{ + struct text_poke_loc *tp = (struct text_poke_loc *) elt; + + if (key < tp->addr) + return -1; + if (key > tp->addr) + return 1; + return 0; +} +NOKPROBE_SYMBOL(patch_cmp); int poke_int3_handler(struct pt_regs *regs) { + struct text_poke_loc *tp; + unsigned char int3 = 0xcc; + void *ip; + /* * Having observed our INT3 instruction, we now must observe - * bp_patching_in_progress. + * bp_patching.nr_entries. * - * in_progress = TRUE INT3 + * nr_entries != 0 INT3 * WMB RMB - * write INT3 if (in_progress) + * write INT3 if (nr_entries) * - * Idem for bp_int3_handler. + * Idem for other elements in bp_patching. */ smp_rmb(); - if (likely(!bp_patching_in_progress)) + if (likely(!bp_patching.nr_entries)) return 0; - if (user_mode(regs) || regs->ip != (unsigned long)bp_int3_addr) + if (user_mode(regs)) return 0; - /* set up the specified breakpoint handler */ - regs->ip = (unsigned long) bp_int3_handler; + /* + * Discount the sizeof(int3). See text_poke_bp_batch(). + */ + ip = (void *) regs->ip - sizeof(int3); + + /* + * Skip the binary search if there is a single member in the vector. + */ + if (unlikely(bp_patching.nr_entries > 1)) { + tp = bsearch(ip, bp_patching.vec, bp_patching.nr_entries, + sizeof(struct text_poke_loc), + patch_cmp); + if (!tp) + return 0; + } else { + tp = bp_patching.vec; + if (tp->addr != ip) + return 0; + } + + /* set up the specified breakpoint detour */ + regs->ip = (unsigned long) tp->detour; return 1; } NOKPROBE_SYMBOL(poke_int3_handler); /** - * text_poke_bp() -- update instructions on live kernel on SMP - * @addr: address to patch - * @opcode: opcode of new instruction - * @len: length to copy - * @handler: address to jump to when the temporary breakpoint is hit + * text_poke_bp_batch() -- update instructions on live kernel on SMP + * @tp: vector of instructions to patch + * @nr_entries: number of entries in the vector * * Modify multi-byte instruction by using int3 breakpoint on SMP. * We completely avoid stop_machine() here, and achieve the * synchronization using int3 breakpoint. * * The way it is done: - * - add a int3 trap to the address that will be patched + * - For each entry in the vector: + * - add a int3 trap to the address that will be patched * - sync cores - * - update all but the first byte of the patched range + * - For each entry in the vector: + * - update all but the first byte of the patched range * - sync cores - * - replace the first byte (int3) by the first byte of - * replacing opcode + * - For each entry in the vector: + * - replace the first byte (int3) by the first byte of + * replacing opcode * - sync cores */ -void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler) +void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) { + int patched_all_but_first = 0; unsigned char int3 = 0xcc; - - bp_int3_handler = handler; - bp_int3_addr = (u8 *)addr + sizeof(int3); - bp_patching_in_progress = true; + unsigned int i; lockdep_assert_held(&text_mutex); + bp_patching.vec = tp; + bp_patching.nr_entries = nr_entries; + /* * Corresponding read barrier in int3 notifier for making sure the - * in_progress and handler are correctly ordered wrt. patching. + * nr_entries and handler are correctly ordered wrt. patching. */ smp_wmb(); - text_poke(addr, &int3, sizeof(int3)); + /* + * First step: add a int3 trap to the address that will be patched. + */ + for (i = 0; i < nr_entries; i++) + text_poke(tp[i].addr, &int3, sizeof(int3)); on_each_cpu(do_sync_core, NULL, 1); - if (len - sizeof(int3) > 0) { - /* patch all but the first byte */ - text_poke((char *)addr + sizeof(int3), - (const char *) opcode + sizeof(int3), - len - sizeof(int3)); + /* + * Second step: update all but the first byte of the patched range. + */ + for (i = 0; i < nr_entries; i++) { + if (tp[i].len - sizeof(int3) > 0) { + text_poke((char *)tp[i].addr + sizeof(int3), + (const char *)tp[i].opcode + sizeof(int3), + tp[i].len - sizeof(int3)); + patched_all_but_first++; + } + } + + if (patched_all_but_first) { /* * According to Intel, this core syncing is very likely * not necessary and we'd be safe even without it. But @@ -931,14 +984,47 @@ void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler) on_each_cpu(do_sync_core, NULL, 1); } - /* patch the first byte */ - text_poke(addr, opcode, sizeof(int3)); + /* + * Third step: replace the first byte (int3) by the first byte of + * replacing opcode. + */ + for (i = 0; i < nr_entries; i++) + text_poke(tp[i].addr, tp[i].opcode, sizeof(int3)); on_each_cpu(do_sync_core, NULL, 1); /* * sync_core() implies an smp_mb() and orders this store against * the writing of the new instruction. */ - bp_patching_in_progress = false; + bp_patching.vec = NULL; + bp_patching.nr_entries = 0; } +/** + * text_poke_bp() -- update instructions on live kernel on SMP + * @addr: address to patch + * @opcode: opcode of new instruction + * @len: length to copy + * @handler: address to jump to when the temporary breakpoint is hit + * + * Update a single instruction with the vector in the stack, avoiding + * dynamically allocated memory. This function should be used when it is + * not possible to allocate memory. + */ +void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler) +{ + struct text_poke_loc tp = { + .detour = handler, + .addr = addr, + .len = len, + }; + + if (len > POKE_MAX_OPCODE_SIZE) { + WARN_ONCE(1, "len is larger than %d\n", POKE_MAX_OPCODE_SIZE); + return; + } + + memcpy((void *)tp.opcode, opcode, len); + + text_poke_bp_batch(&tp, 1); +} diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 177aa8ef2afa..1bd91cb7b320 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -195,7 +195,7 @@ static struct resource lapic_resource = { .flags = IORESOURCE_MEM | IORESOURCE_BUSY, }; -unsigned int lapic_timer_frequency = 0; +unsigned int lapic_timer_period = 0; static void apic_pm_activate(void); @@ -501,7 +501,7 @@ lapic_timer_set_periodic_oneshot(struct clock_event_device *evt, bool oneshot) if (evt->features & CLOCK_EVT_FEAT_DUMMY) return 0; - __setup_APIC_LVTT(lapic_timer_frequency, oneshot, 1); + __setup_APIC_LVTT(lapic_timer_period, oneshot, 1); return 0; } @@ -805,11 +805,11 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc) static int __init lapic_init_clockevent(void) { - if (!lapic_timer_frequency) + if (!lapic_timer_period) return -1; /* Calculate the scaled math multiplication factor */ - lapic_clockevent.mult = div_sc(lapic_timer_frequency/APIC_DIVISOR, + lapic_clockevent.mult = div_sc(lapic_timer_period/APIC_DIVISOR, TICK_NSEC, lapic_clockevent.shift); lapic_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent); @@ -821,6 +821,33 @@ static int __init lapic_init_clockevent(void) return 0; } +bool __init apic_needs_pit(void) +{ + /* + * If the frequencies are not known, PIT is required for both TSC + * and apic timer calibration. + */ + if (!tsc_khz || !cpu_khz) + return true; + + /* Is there an APIC at all? */ + if (!boot_cpu_has(X86_FEATURE_APIC)) + return true; + + /* Deadline timer is based on TSC so no further PIT action required */ + if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) + return false; + + /* APIC timer disabled? */ + if (disable_apic_timer) + return true; + /* + * The APIC timer frequency is known already, no PIT calibration + * required. If unknown, let the PIT be initialized. + */ + return lapic_timer_period == 0; +} + static int __init calibrate_APIC_clock(void) { struct clock_event_device *levt = this_cpu_ptr(&lapic_events); @@ -839,7 +866,7 @@ static int __init calibrate_APIC_clock(void) */ if (!lapic_init_clockevent()) { apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n", - lapic_timer_frequency); + lapic_timer_period); /* * Direct calibration methods must have an always running * local APIC timer, no need for broadcast timer. @@ -884,13 +911,13 @@ static int __init calibrate_APIC_clock(void) pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1, &delta, &deltatsc); - lapic_timer_frequency = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; + lapic_timer_period = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; lapic_init_clockevent(); apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta); apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult); apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", - lapic_timer_frequency); + lapic_timer_period); if (boot_cpu_has(X86_FEATURE_TSC)) { apic_printk(APIC_VERBOSE, "..... CPU clock speed is " @@ -901,13 +928,13 @@ static int __init calibrate_APIC_clock(void) apic_printk(APIC_VERBOSE, "..... host bus clock speed is " "%u.%04u MHz.\n", - lapic_timer_frequency / (1000000 / HZ), - lapic_timer_frequency % (1000000 / HZ)); + lapic_timer_period / (1000000 / HZ), + lapic_timer_period % (1000000 / HZ)); /* * Do a sanity check on the APIC calibration result */ - if (lapic_timer_frequency < (1000000 / HZ)) { + if (lapic_timer_period < (1000000 / HZ)) { local_irq_enable(); pr_warning("APIC frequency too slow, disabling apic timer\n"); return -1; @@ -1351,6 +1378,8 @@ void __init init_bsp_APIC(void) apic_write(APIC_LVT1, value); } +static void __init apic_bsp_setup(bool upmode); + /* Init the interrupt delivery mode for the BSP */ void __init apic_intr_mode_init(void) { @@ -1464,7 +1493,8 @@ static void apic_pending_intr_clear(void) if (queued) { if (boot_cpu_has(X86_FEATURE_TSC) && cpu_khz) { ntsc = rdtsc(); - max_loops = (cpu_khz << 10) - (ntsc - tsc); + max_loops = (long long)cpu_khz << 10; + max_loops -= ntsc - tsc; } else { max_loops--; } @@ -2040,21 +2070,32 @@ __visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs) entering_irq(); trace_spurious_apic_entry(vector); + inc_irq_stat(irq_spurious_count); + + /* + * If this is a spurious interrupt then do not acknowledge + */ + if (vector == SPURIOUS_APIC_VECTOR) { + /* See SDM vol 3 */ + pr_info("Spurious APIC interrupt (vector 0xFF) on CPU#%d, should never happen.\n", + smp_processor_id()); + goto out; + } + /* - * Check if this really is a spurious interrupt and ACK it - * if it is a vectored one. Just in case... - * Spurious interrupts should not be ACKed. + * If it is a vectored one, verify it's set in the ISR. If set, + * acknowledge it. */ v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1)); - if (v & (1 << (vector & 0x1f))) + if (v & (1 << (vector & 0x1f))) { + pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Acked\n", + vector, smp_processor_id()); ack_APIC_irq(); - - inc_irq_stat(irq_spurious_count); - - /* see sw-dev-man vol 3, chapter 7.4.13.5 */ - pr_info("spurious APIC interrupt through vector %02x on CPU#%d, " - "should never happen.\n", vector, smp_processor_id()); - + } else { + pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Not pending!\n", + vector, smp_processor_id()); + } +out: trace_spurious_apic_exit(vector); exiting_irq(); } @@ -2415,11 +2456,8 @@ static void __init apic_bsp_up_setup(void) /** * apic_bsp_setup - Setup function for local apic and io-apic * @upmode: Force UP mode (for APIC_init_uniprocessor) - * - * Returns: - * apic_id of BSP APIC */ -void __init apic_bsp_setup(bool upmode) +static void __init apic_bsp_setup(bool upmode) { connect_bsp_APIC(); if (upmode) diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index bf083c3f1d73..bbdca603f94a 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -78,7 +78,7 @@ flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) int cpu = smp_processor_id(); if (cpu < BITS_PER_LONG) - clear_bit(cpu, &mask); + __clear_bit(cpu, &mask); _flat_send_IPI_mask(mask, vector); } @@ -92,7 +92,7 @@ static void flat_send_IPI_allbutself(int vector) unsigned long mask = cpumask_bits(cpu_online_mask)[0]; if (cpu < BITS_PER_LONG) - clear_bit(cpu, &mask); + __clear_bit(cpu, &mask); _flat_send_IPI_mask(mask, vector); } diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 53aa234a6803..c7bb6c69f21c 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -58,6 +58,7 @@ #include <asm/acpi.h> #include <asm/dma.h> #include <asm/timer.h> +#include <asm/time.h> #include <asm/i8259.h> #include <asm/setup.h> #include <asm/irq_remapping.h> @@ -1893,6 +1894,50 @@ static int ioapic_set_affinity(struct irq_data *irq_data, return ret; } +/* + * Interrupt shutdown masks the ioapic pin, but the interrupt might already + * be in flight, but not yet serviced by the target CPU. That means + * __synchronize_hardirq() would return and claim that everything is calmed + * down. So free_irq() would proceed and deactivate the interrupt and free + * resources. + * + * Once the target CPU comes around to service it it will find a cleared + * vector and complain. While the spurious interrupt is harmless, the full + * release of resources might prevent the interrupt from being acknowledged + * which keeps the hardware in a weird state. + * + * Verify that the corresponding Remote-IRR bits are clear. + */ +static int ioapic_irq_get_chip_state(struct irq_data *irqd, + enum irqchip_irq_state which, + bool *state) +{ + struct mp_chip_data *mcd = irqd->chip_data; + struct IO_APIC_route_entry rentry; + struct irq_pin_list *p; + + if (which != IRQCHIP_STATE_ACTIVE) + return -EINVAL; + + *state = false; + raw_spin_lock(&ioapic_lock); + for_each_irq_pin(p, mcd->irq_2_pin) { + rentry = __ioapic_read_entry(p->apic, p->pin); + /* + * The remote IRR is only valid in level trigger mode. It's + * meaning is undefined for edge triggered interrupts and + * irrelevant because the IO-APIC treats them as fire and + * forget. + */ + if (rentry.irr && rentry.trigger) { + *state = true; + break; + } + } + raw_spin_unlock(&ioapic_lock); + return 0; +} + static struct irq_chip ioapic_chip __read_mostly = { .name = "IO-APIC", .irq_startup = startup_ioapic_irq, @@ -1902,6 +1947,7 @@ static struct irq_chip ioapic_chip __read_mostly = { .irq_eoi = ioapic_ack_level, .irq_set_affinity = ioapic_set_affinity, .irq_retrigger = irq_chip_retrigger_hierarchy, + .irq_get_irqchip_state = ioapic_irq_get_chip_state, .flags = IRQCHIP_SKIP_SET_WAKE, }; @@ -1914,6 +1960,7 @@ static struct irq_chip ioapic_ir_chip __read_mostly = { .irq_eoi = ioapic_ir_ack_level, .irq_set_affinity = ioapic_set_affinity, .irq_retrigger = irq_chip_retrigger_hierarchy, + .irq_get_irqchip_state = ioapic_irq_get_chip_state, .flags = IRQCHIP_SKIP_SET_WAKE, }; @@ -2083,6 +2130,9 @@ static inline void __init check_timer(void) unsigned long flags; int no_pin1 = 0; + if (!global_clock_event) + return; + local_irq_save(flags); /* diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index dad0dd759de2..7f7533462474 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -370,14 +370,14 @@ struct irq_domain *hpet_create_irq_domain(int hpet_id) return d; } -int hpet_assign_irq(struct irq_domain *domain, struct hpet_dev *dev, +int hpet_assign_irq(struct irq_domain *domain, struct hpet_channel *hc, int dev_num) { struct irq_alloc_info info; init_irq_alloc_info(&info, NULL); info.type = X86_IRQ_ALLOC_TYPE_HPET; - info.hpet_data = dev; + info.hpet_data = hc; info.hpet_id = hpet_dev_id(domain); info.hpet_index = dev_num; diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index e7cb78aed644..fdacb864c3dd 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -340,7 +340,7 @@ static void clear_irq_vector(struct irq_data *irqd) trace_vector_clear(irqd->irq, vector, apicd->cpu, apicd->prev_vector, apicd->prev_cpu); - per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_UNUSED; + per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_SHUTDOWN; irq_matrix_free(vector_matrix, apicd->cpu, vector, managed); apicd->vector = 0; @@ -349,7 +349,7 @@ static void clear_irq_vector(struct irq_data *irqd) if (!vector) return; - per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_UNUSED; + per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_SHUTDOWN; irq_matrix_free(vector_matrix, apicd->prev_cpu, vector, managed); apicd->prev_vector = 0; apicd->move_in_progress = 0; diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 7685444a106b..609e499387a1 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -50,7 +50,7 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) cpumask_copy(tmpmsk, mask); /* If IPI should not be sent to self, clear current CPU */ if (apic_dest != APIC_DEST_ALLINC) - cpumask_clear_cpu(smp_processor_id(), tmpmsk); + __cpumask_clear_cpu(smp_processor_id(), tmpmsk); /* Collapse cpus in a cluster so a single IPI per cluster is sent */ for_each_cpu(cpu, tmpmsk) { diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 5102bf7c8192..4b4eb06e117c 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -24,6 +24,7 @@ obj-y += match.o obj-y += bugs.o obj-y += aperfmperf.o obj-y += cpuid-deps.o +obj-y += umwait.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o @@ -38,6 +39,7 @@ obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o +obj-$(CONFIG_CPU_SUP_ZHAOXIN) += zhaoxin.o obj-$(CONFIG_X86_MCE) += mce/ obj-$(CONFIG_MTRR) += mtrr/ diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c index e71a6ff8a67e..e2f319dc992d 100644 --- a/arch/x86/kernel/cpu/aperfmperf.c +++ b/arch/x86/kernel/cpu/aperfmperf.c @@ -13,6 +13,7 @@ #include <linux/percpu.h> #include <linux/cpufreq.h> #include <linux/smp.h> +#include <linux/sched/isolation.h> #include "cpu.h" @@ -85,6 +86,9 @@ unsigned int aperfmperf_get_khz(int cpu) if (!boot_cpu_has(X86_FEATURE_APERFMPERF)) return 0; + if (!housekeeping_cpu(cpu, HK_FLAG_MISC)) + return 0; + aperfmperf_snapshot_cpu(cpu, ktime_get(), true); return per_cpu(samples.khz, cpu); } @@ -101,9 +105,12 @@ void arch_freq_prepare_all(void) if (!boot_cpu_has(X86_FEATURE_APERFMPERF)) return; - for_each_online_cpu(cpu) + for_each_online_cpu(cpu) { + if (!housekeeping_cpu(cpu, HK_FLAG_MISC)) + continue; if (!aperfmperf_snapshot_cpu(cpu, now, false)) wait = true; + } if (wait) msleep(APERFMPERF_REFRESH_DELAY_MS); @@ -117,6 +124,9 @@ unsigned int arch_freq_get_on_cpu(int cpu) if (!boot_cpu_has(X86_FEATURE_APERFMPERF)) return 0; + if (!housekeeping_cpu(cpu, HK_FLAG_MISC)) + return 0; + if (aperfmperf_snapshot_cpu(cpu, ktime_get(), true)) return per_cpu(samples.khz, cpu); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 03b4cc0ec3a7..66ca906aa790 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -836,6 +836,16 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) } /* + * If SSBD is controlled by the SPEC_CTRL MSR, then set the proper + * bit in the mask to allow guests to use the mitigation even in the + * case where the host does not enable it. + */ + if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || + static_cpu_has(X86_FEATURE_AMD_SSBD)) { + x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; + } + + /* * We have three CPU feature flags that are in play here: * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible. * - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass @@ -852,7 +862,6 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) x86_amd_ssb_disable(); } else { x86_spec_ctrl_base |= SPEC_CTRL_SSBD; - x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); } } diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index 395d46f78582..c7503be92f35 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -658,8 +658,7 @@ void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id) if (c->x86 < 0x17) { /* LLC is at the node level. */ per_cpu(cpu_llc_id, cpu) = node_id; - } else if (c->x86 == 0x17 && - c->x86_model >= 0 && c->x86_model <= 0x1F) { + } else if (c->x86 == 0x17 && c->x86_model <= 0x1F) { /* * LLC is at the core complex level. * Core complex ID is ApicId[3] for these processors. diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 2c57fffebf9b..dad20bc891d5 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -801,6 +801,30 @@ static void init_speculation_control(struct cpuinfo_x86 *c) } } +static void init_cqm(struct cpuinfo_x86 *c) +{ + if (!cpu_has(c, X86_FEATURE_CQM_LLC)) { + c->x86_cache_max_rmid = -1; + c->x86_cache_occ_scale = -1; + return; + } + + /* will be overridden if occupancy monitoring exists */ + c->x86_cache_max_rmid = cpuid_ebx(0xf); + + if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) || + cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) || + cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) { + u32 eax, ebx, ecx, edx; + + /* QoS sub-leaf, EAX=0Fh, ECX=1 */ + cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx); + + c->x86_cache_max_rmid = ecx; + c->x86_cache_occ_scale = ebx; + } +} + void get_cpu_cap(struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; @@ -823,6 +847,12 @@ void get_cpu_cap(struct cpuinfo_x86 *c) c->x86_capability[CPUID_7_0_EBX] = ebx; c->x86_capability[CPUID_7_ECX] = ecx; c->x86_capability[CPUID_7_EDX] = edx; + + /* Check valid sub-leaf index before accessing it */ + if (eax >= 1) { + cpuid_count(0x00000007, 1, &eax, &ebx, &ecx, &edx); + c->x86_capability[CPUID_7_1_EAX] = eax; + } } /* Extended state features: level 0x0000000d */ @@ -832,33 +862,6 @@ void get_cpu_cap(struct cpuinfo_x86 *c) c->x86_capability[CPUID_D_1_EAX] = eax; } - /* Additional Intel-defined flags: level 0x0000000F */ - if (c->cpuid_level >= 0x0000000F) { - - /* QoS sub-leaf, EAX=0Fh, ECX=0 */ - cpuid_count(0x0000000F, 0, &eax, &ebx, &ecx, &edx); - c->x86_capability[CPUID_F_0_EDX] = edx; - - if (cpu_has(c, X86_FEATURE_CQM_LLC)) { - /* will be overridden if occupancy monitoring exists */ - c->x86_cache_max_rmid = ebx; - - /* QoS sub-leaf, EAX=0Fh, ECX=1 */ - cpuid_count(0x0000000F, 1, &eax, &ebx, &ecx, &edx); - c->x86_capability[CPUID_F_1_EDX] = edx; - - if ((cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC)) || - ((cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL)) || - (cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)))) { - c->x86_cache_max_rmid = ecx; - c->x86_cache_occ_scale = ebx; - } - } else { - c->x86_cache_max_rmid = -1; - c->x86_cache_occ_scale = -1; - } - } - /* AMD-defined flags: level 0x80000001 */ eax = cpuid_eax(0x80000000); c->extended_cpuid_level = eax; @@ -889,6 +892,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c) init_scattered_cpuid_features(c); init_speculation_control(c); + init_cqm(c); /* * Clear/Set all flags overridden by options, after probe. diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c index 2c0bd38a44ab..b5353244749b 100644 --- a/arch/x86/kernel/cpu/cpuid-deps.c +++ b/arch/x86/kernel/cpu/cpuid-deps.c @@ -20,6 +20,7 @@ struct cpuid_dep { * but it's difficult to tell that to the init reference checker. */ static const struct cpuid_dep cpuid_deps[] = { + { X86_FEATURE_FXSR, X86_FEATURE_FPU }, { X86_FEATURE_XSAVEOPT, X86_FEATURE_XSAVE }, { X86_FEATURE_XSAVEC, X86_FEATURE_XSAVE }, { X86_FEATURE_XSAVES, X86_FEATURE_XSAVE }, @@ -27,7 +28,11 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_PKU, X86_FEATURE_XSAVE }, { X86_FEATURE_MPX, X86_FEATURE_XSAVE }, { X86_FEATURE_XGETBV1, X86_FEATURE_XSAVE }, + { X86_FEATURE_CMOV, X86_FEATURE_FXSR }, + { X86_FEATURE_MMX, X86_FEATURE_FXSR }, + { X86_FEATURE_MMXEXT, X86_FEATURE_MMX }, { X86_FEATURE_FXSR_OPT, X86_FEATURE_FXSR }, + { X86_FEATURE_XSAVE, X86_FEATURE_FXSR }, { X86_FEATURE_XMM, X86_FEATURE_FXSR }, { X86_FEATURE_XMM2, X86_FEATURE_XMM }, { X86_FEATURE_XMM3, X86_FEATURE_XMM2 }, @@ -59,6 +64,10 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F }, { X86_FEATURE_AVX512_4FMAPS, X86_FEATURE_AVX512F }, { X86_FEATURE_AVX512_VPOPCNTDQ, X86_FEATURE_AVX512F }, + { X86_FEATURE_CQM_OCCUP_LLC, X86_FEATURE_CQM_LLC }, + { X86_FEATURE_CQM_MBM_TOTAL, X86_FEATURE_CQM_LLC }, + { X86_FEATURE_CQM_MBM_LOCAL, X86_FEATURE_CQM_LLC }, + { X86_FEATURE_AVX512_BF16, X86_FEATURE_AVX512VL }, {} }; diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index f17c1a714779..8d6d92ebeb54 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -66,6 +66,32 @@ void check_mpx_erratum(struct cpuinfo_x86 *c) } } +/* + * Processors which have self-snooping capability can handle conflicting + * memory type across CPUs by snooping its own cache. However, there exists + * CPU models in which having conflicting memory types still leads to + * unpredictable behavior, machine check errors, or hangs. Clear this + * feature to prevent its use on machines with known erratas. + */ +static void check_memory_type_self_snoop_errata(struct cpuinfo_x86 *c) +{ + switch (c->x86_model) { + case INTEL_FAM6_CORE_YONAH: + case INTEL_FAM6_CORE2_MEROM: + case INTEL_FAM6_CORE2_MEROM_L: + case INTEL_FAM6_CORE2_PENRYN: + case INTEL_FAM6_CORE2_DUNNINGTON: + case INTEL_FAM6_NEHALEM: + case INTEL_FAM6_NEHALEM_G: + case INTEL_FAM6_NEHALEM_EP: + case INTEL_FAM6_NEHALEM_EX: + case INTEL_FAM6_WESTMERE: + case INTEL_FAM6_WESTMERE_EP: + case INTEL_FAM6_SANDYBRIDGE: + setup_clear_cpu_cap(X86_FEATURE_SELFSNOOP); + } +} + static bool ring3mwait_disabled __read_mostly; static int __init ring3mwait_disable(char *__unused) @@ -304,6 +330,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) } check_mpx_erratum(c); + check_memory_type_self_snoop_errata(c); /* * Get the number of SMT siblings early from the extended topology diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 785050af85e5..6ea7fdc82f3c 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -99,11 +99,6 @@ static struct smca_bank_name smca_names[] = { [SMCA_PCIE] = { "pcie", "PCI Express Unit" }, }; -static u32 smca_bank_addrs[MAX_NR_BANKS][NR_BLOCKS] __ro_after_init = -{ - [0 ... MAX_NR_BANKS - 1] = { [0 ... NR_BLOCKS - 1] = -1 } -}; - static const char *smca_get_name(enum smca_bank_types t) { if (t >= N_SMCA_BANK_TYPES) @@ -197,6 +192,9 @@ static char buf_mcatype[MAX_MCATYPE_NAME_LEN]; static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks); static DEFINE_PER_CPU(unsigned int, bank_map); /* see which banks are on */ +/* Map of banks that have more than MCA_MISC0 available. */ +static DEFINE_PER_CPU(u32, smca_misc_banks_map); + static void amd_threshold_interrupt(void); static void amd_deferred_error_interrupt(void); @@ -206,6 +204,28 @@ static void default_deferred_error_interrupt(void) } void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt; +static void smca_set_misc_banks_map(unsigned int bank, unsigned int cpu) +{ + u32 low, high; + + /* + * For SMCA enabled processors, BLKPTR field of the first MISC register + * (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4). + */ + if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high)) + return; + + if (!(low & MCI_CONFIG_MCAX)) + return; + + if (rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high)) + return; + + if (low & MASK_BLKPTR_LO) + per_cpu(smca_misc_banks_map, cpu) |= BIT(bank); + +} + static void smca_configure(unsigned int bank, unsigned int cpu) { unsigned int i, hwid_mcatype; @@ -243,6 +263,8 @@ static void smca_configure(unsigned int bank, unsigned int cpu) wrmsr(smca_config, low, high); } + smca_set_misc_banks_map(bank, cpu); + /* Return early if this bank was already initialized. */ if (smca_banks[bank].hwid) return; @@ -453,50 +475,29 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c) wrmsr(MSR_CU_DEF_ERR, low, high); } -static u32 smca_get_block_address(unsigned int bank, unsigned int block) +static u32 smca_get_block_address(unsigned int bank, unsigned int block, + unsigned int cpu) { - u32 low, high; - u32 addr = 0; - - if (smca_get_bank_type(bank) == SMCA_RESERVED) - return addr; - if (!block) return MSR_AMD64_SMCA_MCx_MISC(bank); - /* Check our cache first: */ - if (smca_bank_addrs[bank][block] != -1) - return smca_bank_addrs[bank][block]; - - /* - * For SMCA enabled processors, BLKPTR field of the first MISC register - * (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4). - */ - if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high)) - goto out; - - if (!(low & MCI_CONFIG_MCAX)) - goto out; - - if (!rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) && - (low & MASK_BLKPTR_LO)) - addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1); + if (!(per_cpu(smca_misc_banks_map, cpu) & BIT(bank))) + return 0; -out: - smca_bank_addrs[bank][block] = addr; - return addr; + return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1); } static u32 get_block_address(u32 current_addr, u32 low, u32 high, - unsigned int bank, unsigned int block) + unsigned int bank, unsigned int block, + unsigned int cpu) { u32 addr = 0, offset = 0; - if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS)) + if ((bank >= per_cpu(mce_num_banks, cpu)) || (block >= NR_BLOCKS)) return addr; if (mce_flags.smca) - return smca_get_block_address(bank, block); + return smca_get_block_address(bank, block, cpu); /* Fall back to method we used for older processors: */ switch (block) { @@ -624,18 +625,19 @@ void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank) /* cpu init entry point, called from mce.c with preempt off */ void mce_amd_feature_init(struct cpuinfo_x86 *c) { - u32 low = 0, high = 0, address = 0; unsigned int bank, block, cpu = smp_processor_id(); + u32 low = 0, high = 0, address = 0; int offset = -1; - for (bank = 0; bank < mca_cfg.banks; ++bank) { + + for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) { if (mce_flags.smca) smca_configure(bank, cpu); disable_err_thresholding(c, bank); for (block = 0; block < NR_BLOCKS; ++block) { - address = get_block_address(address, low, high, bank, block); + address = get_block_address(address, low, high, bank, block, cpu); if (!address) break; @@ -973,7 +975,7 @@ static void amd_deferred_error_interrupt(void) { unsigned int bank; - for (bank = 0; bank < mca_cfg.banks; ++bank) + for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) log_error_deferred(bank); } @@ -1014,7 +1016,7 @@ static void amd_threshold_interrupt(void) struct threshold_block *first_block = NULL, *block = NULL, *tmp = NULL; unsigned int bank, cpu = smp_processor_id(); - for (bank = 0; bank < mca_cfg.banks; ++bank) { + for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) { if (!(per_cpu(bank_map, cpu) & (1 << bank))) continue; @@ -1201,7 +1203,7 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank, u32 low, high; int err; - if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS)) + if ((bank >= per_cpu(mce_num_banks, cpu)) || (block >= NR_BLOCKS)) return 0; if (rdmsr_safe_on_cpu(cpu, address, &low, &high)) @@ -1252,7 +1254,7 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank, if (err) goto out_free; recurse: - address = get_block_address(address, low, high, bank, ++block); + address = get_block_address(address, low, high, bank, ++block, cpu); if (!address) return 0; @@ -1435,7 +1437,7 @@ int mce_threshold_remove_device(unsigned int cpu) { unsigned int bank; - for (bank = 0; bank < mca_cfg.banks; ++bank) { + for (bank = 0; bank < per_cpu(mce_num_banks, cpu); ++bank) { if (!(per_cpu(bank_map, cpu) & (1 << bank))) continue; threshold_remove_bank(cpu, bank); @@ -1456,14 +1458,14 @@ int mce_threshold_create_device(unsigned int cpu) if (bp) return 0; - bp = kcalloc(mca_cfg.banks, sizeof(struct threshold_bank *), + bp = kcalloc(per_cpu(mce_num_banks, cpu), sizeof(struct threshold_bank *), GFP_KERNEL); if (!bp) return -ENOMEM; per_cpu(threshold_banks, cpu) = bp; - for (bank = 0; bank < mca_cfg.banks; ++bank) { + for (bank = 0; bank < per_cpu(mce_num_banks, cpu); ++bank) { if (!(per_cpu(bank_map, cpu) & (1 << bank))) continue; err = threshold_create_bank(cpu, bank); diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 282916f3b8d8..066562a1ea20 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -65,7 +65,23 @@ static DEFINE_MUTEX(mce_sysfs_mutex); DEFINE_PER_CPU(unsigned, mce_exception_count); -struct mce_bank *mce_banks __read_mostly; +DEFINE_PER_CPU_READ_MOSTLY(unsigned int, mce_num_banks); + +struct mce_bank { + u64 ctl; /* subevents to enable */ + bool init; /* initialise bank? */ +}; +static DEFINE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array); + +#define ATTR_LEN 16 +/* One object for each MCE bank, shared by all CPUs */ +struct mce_bank_dev { + struct device_attribute attr; /* device attribute */ + char attrname[ATTR_LEN]; /* attribute name */ + u8 bank; /* bank number */ +}; +static struct mce_bank_dev mce_bank_devs[MAX_NR_BANKS]; + struct mce_vendor_flags mce_flags __read_mostly; struct mca_config mca_cfg __read_mostly = { @@ -675,6 +691,7 @@ DEFINE_PER_CPU(unsigned, mce_poll_count); */ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) { + struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); bool error_seen = false; struct mce m; int i; @@ -686,7 +703,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) if (flags & MCP_TIMESTAMP) m.tsc = rdtsc(); - for (i = 0; i < mca_cfg.banks; i++) { + for (i = 0; i < this_cpu_read(mce_num_banks); i++) { if (!mce_banks[i].ctl || !test_bit(i, *b)) continue; @@ -788,7 +805,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, char *tmp; int i; - for (i = 0; i < mca_cfg.banks; i++) { + for (i = 0; i < this_cpu_read(mce_num_banks); i++) { m->status = mce_rdmsrl(msr_ops.status(i)); if (!(m->status & MCI_STATUS_VAL)) continue; @@ -1068,7 +1085,7 @@ static void mce_clear_state(unsigned long *toclear) { int i; - for (i = 0; i < mca_cfg.banks; i++) { + for (i = 0; i < this_cpu_read(mce_num_banks); i++) { if (test_bit(i, toclear)) mce_wrmsrl(msr_ops.status(i), 0); } @@ -1122,10 +1139,11 @@ static void __mc_scan_banks(struct mce *m, struct mce *final, unsigned long *toclear, unsigned long *valid_banks, int no_way_out, int *worst) { + struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); struct mca_config *cfg = &mca_cfg; int severity, i; - for (i = 0; i < cfg->banks; i++) { + for (i = 0; i < this_cpu_read(mce_num_banks); i++) { __clear_bit(i, toclear); if (!test_bit(i, valid_banks)) continue; @@ -1463,27 +1481,29 @@ int mce_notify_irq(void) } EXPORT_SYMBOL_GPL(mce_notify_irq); -static int __mcheck_cpu_mce_banks_init(void) +static void __mcheck_cpu_mce_banks_init(void) { + struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); + u8 n_banks = this_cpu_read(mce_num_banks); int i; - mce_banks = kcalloc(MAX_NR_BANKS, sizeof(struct mce_bank), GFP_KERNEL); - if (!mce_banks) - return -ENOMEM; - - for (i = 0; i < MAX_NR_BANKS; i++) { + for (i = 0; i < n_banks; i++) { struct mce_bank *b = &mce_banks[i]; + /* + * Init them all, __mcheck_cpu_apply_quirks() is going to apply + * the required vendor quirks before + * __mcheck_cpu_init_clear_banks() does the final bank setup. + */ b->ctl = -1ULL; b->init = 1; } - return 0; } /* * Initialize Machine Checks for a CPU. */ -static int __mcheck_cpu_cap_init(void) +static void __mcheck_cpu_cap_init(void) { u64 cap; u8 b; @@ -1491,16 +1511,16 @@ static int __mcheck_cpu_cap_init(void) rdmsrl(MSR_IA32_MCG_CAP, cap); b = cap & MCG_BANKCNT_MASK; - if (WARN_ON_ONCE(b > MAX_NR_BANKS)) + + if (b > MAX_NR_BANKS) { + pr_warn("CPU%d: Using only %u machine check banks out of %u\n", + smp_processor_id(), MAX_NR_BANKS, b); b = MAX_NR_BANKS; + } - mca_cfg.banks = max(mca_cfg.banks, b); + this_cpu_write(mce_num_banks, b); - if (!mce_banks) { - int err = __mcheck_cpu_mce_banks_init(); - if (err) - return err; - } + __mcheck_cpu_mce_banks_init(); /* Use accurate RIP reporting if available. */ if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9) @@ -1508,8 +1528,6 @@ static int __mcheck_cpu_cap_init(void) if (cap & MCG_SER_P) mca_cfg.ser = 1; - - return 0; } static void __mcheck_cpu_init_generic(void) @@ -1536,9 +1554,10 @@ static void __mcheck_cpu_init_generic(void) static void __mcheck_cpu_init_clear_banks(void) { + struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); int i; - for (i = 0; i < mca_cfg.banks; i++) { + for (i = 0; i < this_cpu_read(mce_num_banks); i++) { struct mce_bank *b = &mce_banks[i]; if (!b->init) @@ -1549,6 +1568,33 @@ static void __mcheck_cpu_init_clear_banks(void) } /* + * Do a final check to see if there are any unused/RAZ banks. + * + * This must be done after the banks have been initialized and any quirks have + * been applied. + * + * Do not call this from any user-initiated flows, e.g. CPU hotplug or sysfs. + * Otherwise, a user who disables a bank will not be able to re-enable it + * without a system reboot. + */ +static void __mcheck_cpu_check_banks(void) +{ + struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); + u64 msrval; + int i; + + for (i = 0; i < this_cpu_read(mce_num_banks); i++) { + struct mce_bank *b = &mce_banks[i]; + + if (!b->init) + continue; + + rdmsrl(msr_ops.ctl(i), msrval); + b->init = !!msrval; + } +} + +/* * During IFU recovery Sandy Bridge -EP4S processors set the RIPV and * EIPV bits in MCG_STATUS to zero on the affected logical processor (SDM * Vol 3B Table 15-20). But this confuses both the code that determines @@ -1579,6 +1625,7 @@ static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs) /* Add per CPU specific workarounds here */ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) { + struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); struct mca_config *cfg = &mca_cfg; if (c->x86_vendor == X86_VENDOR_UNKNOWN) { @@ -1588,7 +1635,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) /* This should be disabled by the BIOS, but isn't always */ if (c->x86_vendor == X86_VENDOR_AMD) { - if (c->x86 == 15 && cfg->banks > 4) { + if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) { /* * disable GART TBL walk error reporting, which * trips off incorrectly with the IOMMU & 3ware @@ -1607,7 +1654,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) * Various K7s with broken bank 0 around. Always disable * by default. */ - if (c->x86 == 6 && cfg->banks > 0) + if (c->x86 == 6 && this_cpu_read(mce_num_banks) > 0) mce_banks[0].ctl = 0; /* @@ -1629,7 +1676,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) * valid event later, merely don't write CTL0. */ - if (c->x86 == 6 && c->x86_model < 0x1A && cfg->banks > 0) + if (c->x86 == 6 && c->x86_model < 0x1A && this_cpu_read(mce_num_banks) > 0) mce_banks[0].init = 0; /* @@ -1815,7 +1862,9 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c) if (!mce_available(c)) return; - if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) { + __mcheck_cpu_cap_init(); + + if (__mcheck_cpu_apply_quirks(c) < 0) { mca_cfg.disabled = 1; return; } @@ -1832,6 +1881,7 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c) __mcheck_cpu_init_generic(); __mcheck_cpu_init_vendor(c); __mcheck_cpu_init_clear_banks(); + __mcheck_cpu_check_banks(); __mcheck_cpu_setup_timer(); } @@ -1863,7 +1913,7 @@ static void __mce_disable_bank(void *arg) void mce_disable_bank(int bank) { - if (bank >= mca_cfg.banks) { + if (bank >= this_cpu_read(mce_num_banks)) { pr_warn(FW_BUG "Ignoring request to disable invalid MCA bank %d.\n", bank); @@ -1949,9 +1999,10 @@ int __init mcheck_init(void) */ static void mce_disable_error_reporting(void) { + struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); int i; - for (i = 0; i < mca_cfg.banks; i++) { + for (i = 0; i < this_cpu_read(mce_num_banks); i++) { struct mce_bank *b = &mce_banks[i]; if (b->init) @@ -2051,26 +2102,47 @@ static struct bus_type mce_subsys = { DEFINE_PER_CPU(struct device *, mce_device); -static inline struct mce_bank *attr_to_bank(struct device_attribute *attr) +static inline struct mce_bank_dev *attr_to_bank(struct device_attribute *attr) { - return container_of(attr, struct mce_bank, attr); + return container_of(attr, struct mce_bank_dev, attr); } static ssize_t show_bank(struct device *s, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%llx\n", attr_to_bank(attr)->ctl); + u8 bank = attr_to_bank(attr)->bank; + struct mce_bank *b; + + if (bank >= per_cpu(mce_num_banks, s->id)) + return -EINVAL; + + b = &per_cpu(mce_banks_array, s->id)[bank]; + + if (!b->init) + return -ENODEV; + + return sprintf(buf, "%llx\n", b->ctl); } static ssize_t set_bank(struct device *s, struct device_attribute *attr, const char *buf, size_t size) { + u8 bank = attr_to_bank(attr)->bank; + struct mce_bank *b; u64 new; if (kstrtou64(buf, 0, &new) < 0) return -EINVAL; - attr_to_bank(attr)->ctl = new; + if (bank >= per_cpu(mce_num_banks, s->id)) + return -EINVAL; + + b = &per_cpu(mce_banks_array, s->id)[bank]; + + if (!b->init) + return -ENODEV; + + b->ctl = new; mce_restart(); return size; @@ -2185,7 +2257,7 @@ static void mce_device_release(struct device *dev) kfree(dev); } -/* Per cpu device init. All of the cpus still share the same ctrl bank: */ +/* Per CPU device init. All of the CPUs still share the same bank device: */ static int mce_device_create(unsigned int cpu) { struct device *dev; @@ -2217,8 +2289,8 @@ static int mce_device_create(unsigned int cpu) if (err) goto error; } - for (j = 0; j < mca_cfg.banks; j++) { - err = device_create_file(dev, &mce_banks[j].attr); + for (j = 0; j < per_cpu(mce_num_banks, cpu); j++) { + err = device_create_file(dev, &mce_bank_devs[j].attr); if (err) goto error2; } @@ -2228,7 +2300,7 @@ static int mce_device_create(unsigned int cpu) return 0; error2: while (--j >= 0) - device_remove_file(dev, &mce_banks[j].attr); + device_remove_file(dev, &mce_bank_devs[j].attr); error: while (--i >= 0) device_remove_file(dev, mce_device_attrs[i]); @@ -2249,8 +2321,8 @@ static void mce_device_remove(unsigned int cpu) for (i = 0; mce_device_attrs[i]; i++) device_remove_file(dev, mce_device_attrs[i]); - for (i = 0; i < mca_cfg.banks; i++) - device_remove_file(dev, &mce_banks[i].attr); + for (i = 0; i < per_cpu(mce_num_banks, cpu); i++) + device_remove_file(dev, &mce_bank_devs[i].attr); device_unregister(dev); cpumask_clear_cpu(cpu, mce_device_initialized); @@ -2271,6 +2343,7 @@ static void mce_disable_cpu(void) static void mce_reenable_cpu(void) { + struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); int i; if (!mce_available(raw_cpu_ptr(&cpu_info))) @@ -2278,7 +2351,7 @@ static void mce_reenable_cpu(void) if (!cpuhp_tasks_frozen) cmci_reenable(); - for (i = 0; i < mca_cfg.banks; i++) { + for (i = 0; i < this_cpu_read(mce_num_banks); i++) { struct mce_bank *b = &mce_banks[i]; if (b->init) @@ -2328,10 +2401,12 @@ static __init void mce_init_banks(void) { int i; - for (i = 0; i < mca_cfg.banks; i++) { - struct mce_bank *b = &mce_banks[i]; + for (i = 0; i < MAX_NR_BANKS; i++) { + struct mce_bank_dev *b = &mce_bank_devs[i]; struct device_attribute *a = &b->attr; + b->bank = i; + sysfs_attr_init(&a->attr); a->attr.name = b->attrname; snprintf(b->attrname, ATTR_LEN, "bank%d", i); @@ -2441,22 +2516,16 @@ static int fake_panic_set(void *data, u64 val) DEFINE_DEBUGFS_ATTRIBUTE(fake_panic_fops, fake_panic_get, fake_panic_set, "%llu\n"); -static int __init mcheck_debugfs_init(void) +static void __init mcheck_debugfs_init(void) { - struct dentry *dmce, *ffake_panic; + struct dentry *dmce; dmce = mce_get_debugfs_dir(); - if (!dmce) - return -ENOMEM; - ffake_panic = debugfs_create_file_unsafe("fake_panic", 0444, dmce, - NULL, &fake_panic_fops); - if (!ffake_panic) - return -ENOMEM; - - return 0; + debugfs_create_file_unsafe("fake_panic", 0444, dmce, NULL, + &fake_panic_fops); } #else -static int __init mcheck_debugfs_init(void) { return -EINVAL; } +static void __init mcheck_debugfs_init(void) { } #endif DEFINE_STATIC_KEY_FALSE(mcsafe_key); @@ -2464,8 +2533,6 @@ EXPORT_SYMBOL_GPL(mcsafe_key); static int __init mcheck_late_init(void) { - pr_info("Using %d MCE banks\n", mca_cfg.banks); - if (mca_cfg.recovery) static_branch_inc(&mcsafe_key); diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c index 5d108f70f315..1f30117b24ba 100644 --- a/arch/x86/kernel/cpu/mce/inject.c +++ b/arch/x86/kernel/cpu/mce/inject.c @@ -645,7 +645,6 @@ static const struct file_operations readme_fops = { static struct dfs_node { char *name; - struct dentry *d; const struct file_operations *fops; umode_t perm; } dfs_fls[] = { @@ -659,49 +658,23 @@ static struct dfs_node { { .name = "README", .fops = &readme_fops, .perm = S_IRUSR | S_IRGRP | S_IROTH }, }; -static int __init debugfs_init(void) +static void __init debugfs_init(void) { unsigned int i; dfs_inj = debugfs_create_dir("mce-inject", NULL); - if (!dfs_inj) - return -EINVAL; - - for (i = 0; i < ARRAY_SIZE(dfs_fls); i++) { - dfs_fls[i].d = debugfs_create_file(dfs_fls[i].name, - dfs_fls[i].perm, - dfs_inj, - &i_mce, - dfs_fls[i].fops); - - if (!dfs_fls[i].d) - goto err_dfs_add; - } - - return 0; - -err_dfs_add: - while (i-- > 0) - debugfs_remove(dfs_fls[i].d); - debugfs_remove(dfs_inj); - dfs_inj = NULL; - - return -ENODEV; + for (i = 0; i < ARRAY_SIZE(dfs_fls); i++) + debugfs_create_file(dfs_fls[i].name, dfs_fls[i].perm, dfs_inj, + &i_mce, dfs_fls[i].fops); } static int __init inject_init(void) { - int err; - if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL)) return -ENOMEM; - err = debugfs_init(); - if (err) { - free_cpumask_var(mce_inject_cpumask); - return err; - } + debugfs_init(); register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, "mce_notify"); mce_register_injector_chain(&inject_nb); diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h index a34b55baa7aa..43031db429d2 100644 --- a/arch/x86/kernel/cpu/mce/internal.h +++ b/arch/x86/kernel/cpu/mce/internal.h @@ -22,17 +22,8 @@ enum severity_level { extern struct blocking_notifier_head x86_mce_decoder_chain; -#define ATTR_LEN 16 #define INITIAL_CHECK_INTERVAL 5 * 60 /* 5 minutes */ -/* One object for each MCE bank, shared by all CPUs */ -struct mce_bank { - u64 ctl; /* subevents to enable */ - unsigned char init; /* initialise bank? */ - struct device_attribute attr; /* device attribute */ - char attrname[ATTR_LEN]; /* attribute name */ -}; - struct mce_evt_llist { struct llist_node llnode; struct mce mce; @@ -47,7 +38,6 @@ struct llist_node *mce_gen_pool_prepare_records(void); extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp); struct dentry *mce_get_debugfs_dir(void); -extern struct mce_bank *mce_banks; extern mce_banks_t mce_banks_ce_disabled; #ifdef CONFIG_X86_MCE_INTEL @@ -128,7 +118,6 @@ struct mca_config { bios_cmci_threshold : 1, __reserved : 59; - u8 banks; s8 bootlog; int tolerant; int monarch_timeout; @@ -137,6 +126,7 @@ struct mca_config { }; extern struct mca_config mca_cfg; +DECLARE_PER_CPU_READ_MOSTLY(unsigned int, mce_num_banks); struct mce_vendor_flags { /* diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c index 2d33a26d257e..210f1f5db5f7 100644 --- a/arch/x86/kernel/cpu/mce/severity.c +++ b/arch/x86/kernel/cpu/mce/severity.c @@ -400,21 +400,13 @@ static const struct file_operations severities_coverage_fops = { static int __init severities_debugfs_init(void) { - struct dentry *dmce, *fsev; + struct dentry *dmce; dmce = mce_get_debugfs_dir(); - if (!dmce) - goto err_out; - - fsev = debugfs_create_file("severities-coverage", 0444, dmce, NULL, - &severities_coverage_fops); - if (!fsev) - goto err_out; + debugfs_create_file("severities-coverage", 0444, dmce, NULL, + &severities_coverage_fops); return 0; - -err_out: - return -ENOMEM; } late_initcall(severities_debugfs_init); #endif /* CONFIG_DEBUG_FS */ diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index a813987b5552..cb0fdcaf1415 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -789,13 +789,16 @@ static struct syscore_ops mc_syscore_ops = { .resume = mc_bp_resume, }; -static int mc_cpu_online(unsigned int cpu) +static int mc_cpu_starting(unsigned int cpu) { - struct device *dev; - - dev = get_cpu_device(cpu); microcode_update_cpu(cpu); pr_debug("CPU%d added\n", cpu); + return 0; +} + +static int mc_cpu_online(unsigned int cpu) +{ + struct device *dev = get_cpu_device(cpu); if (sysfs_create_group(&dev->kobj, &mc_attr_group)) pr_err("Failed to create group for CPU%d\n", cpu); @@ -872,7 +875,9 @@ int __init microcode_init(void) goto out_ucode_group; register_syscore_ops(&mc_syscore_ops); - cpuhp_setup_state_nocalls(CPUHP_AP_MICROCODE_LOADER, "x86/microcode:online", + cpuhp_setup_state_nocalls(CPUHP_AP_MICROCODE_LOADER, "x86/microcode:starting", + mc_cpu_starting, NULL); + cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/microcode:online", mc_cpu_online, mc_cpu_down_prep); pr_info("Microcode Update Driver: v%s.", DRIVER_VERSION); diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 7df29f08871b..062f77279ce3 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -17,6 +17,7 @@ #include <linux/irq.h> #include <linux/kexec.h> #include <linux/i8253.h> +#include <linux/random.h> #include <asm/processor.h> #include <asm/hypervisor.h> #include <asm/hyperv-tlfs.h> @@ -80,6 +81,7 @@ __visible void __irq_entry hv_stimer0_vector_handler(struct pt_regs *regs) inc_irq_stat(hyperv_stimer0_count); if (hv_stimer0_handler) hv_stimer0_handler(); + add_interrupt_randomness(HYPERV_STIMER0_VECTOR, 0); ack_APIC_irq(); exiting_irq(); @@ -89,7 +91,7 @@ __visible void __irq_entry hv_stimer0_vector_handler(struct pt_regs *regs) int hv_setup_stimer0_irq(int *irq, int *vector, void (*handler)(void)) { *vector = HYPERV_STIMER0_VECTOR; - *irq = 0; /* Unused on x86/x64 */ + *irq = -1; /* Unused on x86/x64 */ hv_stimer0_handler = handler; return 0; } @@ -266,9 +268,9 @@ static void __init ms_hyperv_init_platform(void) rdmsrl(HV_X64_MSR_APIC_FREQUENCY, hv_lapic_frequency); hv_lapic_frequency = div_u64(hv_lapic_frequency, HZ); - lapic_timer_frequency = hv_lapic_frequency; + lapic_timer_period = hv_lapic_frequency; pr_info("Hyper-V: LAPIC Timer Frequency: %#x\n", - lapic_timer_frequency); + lapic_timer_period); } register_nmi_handler(NMI_UNKNOWN, hv_nmi_unknown, NMI_FLAG_FIRST, diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 9356c1c9024d..aa5c064a6a22 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -743,7 +743,15 @@ static void prepare_set(void) __acquires(set_atomicity_lock) /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ cr0 = read_cr0() | X86_CR0_CD; write_cr0(cr0); - wbinvd(); + + /* + * Cache flushing is the most time-consuming step when programming + * the MTRRs. Fortunately, as per the Intel Software Development + * Manual, we can skip it if the processor supports cache self- + * snooping. + */ + if (!static_cpu_has(X86_FEATURE_SELFSNOOP)) + wbinvd(); /* Save value of CR4 and clear Page Global Enable (bit 7) */ if (boot_cpu_has(X86_FEATURE_PGE)) { @@ -760,7 +768,10 @@ static void prepare_set(void) __acquires(set_atomicity_lock) /* Disable MTRRs, and set the default type to uncached */ mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi); - wbinvd(); + + /* Again, only flush caches if we have to. */ + if (!static_cpu_has(X86_FEATURE_SELFSNOOP)) + wbinvd(); } static void post_set(void) __releases(set_atomicity_lock) diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 2131b8bbaad7..2f4824793798 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -796,8 +796,12 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { struct rdt_resource *r = of->kn->parent->priv; - u32 sw_shareable = 0, hw_shareable = 0; - u32 exclusive = 0, pseudo_locked = 0; + /* + * Use unsigned long even though only 32 bits are used to ensure + * test_bit() is used safely. + */ + unsigned long sw_shareable = 0, hw_shareable = 0; + unsigned long exclusive = 0, pseudo_locked = 0; struct rdt_domain *dom; int i, hwb, swb, excl, psl; enum rdtgrp_mode mode; @@ -842,10 +846,10 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of, } for (i = r->cache.cbm_len - 1; i >= 0; i--) { pseudo_locked = dom->plr ? dom->plr->cbm : 0; - hwb = test_bit(i, (unsigned long *)&hw_shareable); - swb = test_bit(i, (unsigned long *)&sw_shareable); - excl = test_bit(i, (unsigned long *)&exclusive); - psl = test_bit(i, (unsigned long *)&pseudo_locked); + hwb = test_bit(i, &hw_shareable); + swb = test_bit(i, &sw_shareable); + excl = test_bit(i, &exclusive); + psl = test_bit(i, &pseudo_locked); if (hwb && swb) seq_putc(seq, 'X'); else if (hwb && !swb) @@ -2486,26 +2490,19 @@ out_destroy: */ static void cbm_ensure_valid(u32 *_val, struct rdt_resource *r) { - /* - * Convert the u32 _val to an unsigned long required by all the bit - * operations within this function. No more than 32 bits of this - * converted value can be accessed because all bit operations are - * additionally provided with cbm_len that is initialized during - * hardware enumeration using five bits from the EAX register and - * thus never can exceed 32 bits. - */ - unsigned long *val = (unsigned long *)_val; + unsigned long val = *_val; unsigned int cbm_len = r->cache.cbm_len; unsigned long first_bit, zero_bit; - if (*val == 0) + if (val == 0) return; - first_bit = find_first_bit(val, cbm_len); - zero_bit = find_next_zero_bit(val, cbm_len, first_bit); + first_bit = find_first_bit(&val, cbm_len); + zero_bit = find_next_zero_bit(&val, cbm_len, first_bit); /* Clear any remaining bits to ensure contiguous region */ - bitmap_clear(val, zero_bit, cbm_len - zero_bit); + bitmap_clear(&val, zero_bit, cbm_len - zero_bit); + *_val = (u32)val; } /* diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 94aa1c72ca98..adf9b71386ef 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -26,6 +26,10 @@ struct cpuid_bit { static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, + { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 }, + { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 }, + { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 }, + { X86_FEATURE_CQM_MBM_LOCAL, CPUID_EDX, 2, 0x0000000f, 1 }, { X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 }, { X86_FEATURE_CAT_L2, CPUID_EBX, 2, 0x00000010, 0 }, { X86_FEATURE_CDP_L3, CPUID_ECX, 2, 0x00000010, 1 }, diff --git a/arch/x86/kernel/cpu/umwait.c b/arch/x86/kernel/cpu/umwait.c new file mode 100644 index 000000000000..6a204e7336c1 --- /dev/null +++ b/arch/x86/kernel/cpu/umwait.c @@ -0,0 +1,200 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/syscore_ops.h> +#include <linux/suspend.h> +#include <linux/cpu.h> + +#include <asm/msr.h> + +#define UMWAIT_C02_ENABLE 0 + +#define UMWAIT_CTRL_VAL(max_time, c02_disable) \ + (((max_time) & MSR_IA32_UMWAIT_CONTROL_TIME_MASK) | \ + ((c02_disable) & MSR_IA32_UMWAIT_CONTROL_C02_DISABLE)) + +/* + * Cache IA32_UMWAIT_CONTROL MSR. This is a systemwide control. By default, + * umwait max time is 100000 in TSC-quanta and C0.2 is enabled + */ +static u32 umwait_control_cached = UMWAIT_CTRL_VAL(100000, UMWAIT_C02_ENABLE); + +/* + * Serialize access to umwait_control_cached and IA32_UMWAIT_CONTROL MSR in + * the sysfs write functions. + */ +static DEFINE_MUTEX(umwait_lock); + +static void umwait_update_control_msr(void * unused) +{ + lockdep_assert_irqs_disabled(); + wrmsr(MSR_IA32_UMWAIT_CONTROL, READ_ONCE(umwait_control_cached), 0); +} + +/* + * The CPU hotplug callback sets the control MSR to the global control + * value. + * + * Disable interrupts so the read of umwait_control_cached and the WRMSR + * are protected against a concurrent sysfs write. Otherwise the sysfs + * write could update the cached value after it had been read on this CPU + * and issue the IPI before the old value had been written. The IPI would + * interrupt, write the new value and after return from IPI the previous + * value would be written by this CPU. + * + * With interrupts disabled the upcoming CPU either sees the new control + * value or the IPI is updating this CPU to the new control value after + * interrupts have been reenabled. + */ +static int umwait_cpu_online(unsigned int cpu) +{ + local_irq_disable(); + umwait_update_control_msr(NULL); + local_irq_enable(); + return 0; +} + +/* + * On resume, restore IA32_UMWAIT_CONTROL MSR on the boot processor which + * is the only active CPU at this time. The MSR is set up on the APs via the + * CPU hotplug callback. + * + * This function is invoked on resume from suspend and hibernation. On + * resume from suspend the restore should be not required, but we neither + * trust the firmware nor does it matter if the same value is written + * again. + */ +static void umwait_syscore_resume(void) +{ + umwait_update_control_msr(NULL); +} + +static struct syscore_ops umwait_syscore_ops = { + .resume = umwait_syscore_resume, +}; + +/* sysfs interface */ + +/* + * When bit 0 in IA32_UMWAIT_CONTROL MSR is 1, C0.2 is disabled. + * Otherwise, C0.2 is enabled. + */ +static inline bool umwait_ctrl_c02_enabled(u32 ctrl) +{ + return !(ctrl & MSR_IA32_UMWAIT_CONTROL_C02_DISABLE); +} + +static inline u32 umwait_ctrl_max_time(u32 ctrl) +{ + return ctrl & MSR_IA32_UMWAIT_CONTROL_TIME_MASK; +} + +static inline void umwait_update_control(u32 maxtime, bool c02_enable) +{ + u32 ctrl = maxtime & MSR_IA32_UMWAIT_CONTROL_TIME_MASK; + + if (!c02_enable) + ctrl |= MSR_IA32_UMWAIT_CONTROL_C02_DISABLE; + + WRITE_ONCE(umwait_control_cached, ctrl); + /* Propagate to all CPUs */ + on_each_cpu(umwait_update_control_msr, NULL, 1); +} + +static ssize_t +enable_c02_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + u32 ctrl = READ_ONCE(umwait_control_cached); + + return sprintf(buf, "%d\n", umwait_ctrl_c02_enabled(ctrl)); +} + +static ssize_t enable_c02_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + bool c02_enable; + u32 ctrl; + int ret; + + ret = kstrtobool(buf, &c02_enable); + if (ret) + return ret; + + mutex_lock(&umwait_lock); + + ctrl = READ_ONCE(umwait_control_cached); + if (c02_enable != umwait_ctrl_c02_enabled(ctrl)) + umwait_update_control(ctrl, c02_enable); + + mutex_unlock(&umwait_lock); + + return count; +} +static DEVICE_ATTR_RW(enable_c02); + +static ssize_t +max_time_show(struct device *kobj, struct device_attribute *attr, char *buf) +{ + u32 ctrl = READ_ONCE(umwait_control_cached); + + return sprintf(buf, "%u\n", umwait_ctrl_max_time(ctrl)); +} + +static ssize_t max_time_store(struct device *kobj, + struct device_attribute *attr, + const char *buf, size_t count) +{ + u32 max_time, ctrl; + int ret; + + ret = kstrtou32(buf, 0, &max_time); + if (ret) + return ret; + + /* bits[1:0] must be zero */ + if (max_time & ~MSR_IA32_UMWAIT_CONTROL_TIME_MASK) + return -EINVAL; + + mutex_lock(&umwait_lock); + + ctrl = READ_ONCE(umwait_control_cached); + if (max_time != umwait_ctrl_max_time(ctrl)) + umwait_update_control(max_time, umwait_ctrl_c02_enabled(ctrl)); + + mutex_unlock(&umwait_lock); + + return count; +} +static DEVICE_ATTR_RW(max_time); + +static struct attribute *umwait_attrs[] = { + &dev_attr_enable_c02.attr, + &dev_attr_max_time.attr, + NULL +}; + +static struct attribute_group umwait_attr_group = { + .attrs = umwait_attrs, + .name = "umwait_control", +}; + +static int __init umwait_init(void) +{ + struct device *dev; + int ret; + + if (!boot_cpu_has(X86_FEATURE_WAITPKG)) + return -ENODEV; + + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "umwait:online", + umwait_cpu_online, NULL); + + register_syscore_ops(&umwait_syscore_ops); + + /* + * Add umwait control interface. Ignore failure, so at least the + * default values are set up in case the machine manages to boot. + */ + dev = cpu_subsys.dev_root; + return sysfs_create_group(&dev->kobj, &umwait_attr_group); +} +device_initcall(umwait_init); diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 0eda91f8eeac..3c648476d4fb 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -157,7 +157,7 @@ static void __init vmware_platform_setup(void) #ifdef CONFIG_X86_LOCAL_APIC /* Skip lapic calibration since we know the bus frequency. */ - lapic_timer_frequency = ecx / HZ; + lapic_timer_period = ecx / HZ; pr_info("Host bus clock speed read from hypervisor : %u Hz\n", ecx); #endif diff --git a/arch/x86/kernel/cpu/zhaoxin.c b/arch/x86/kernel/cpu/zhaoxin.c new file mode 100644 index 000000000000..8e6f2f4b4afe --- /dev/null +++ b/arch/x86/kernel/cpu/zhaoxin.c @@ -0,0 +1,167 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/sched.h> +#include <linux/sched/clock.h> + +#include <asm/cpufeature.h> + +#include "cpu.h" + +#define MSR_ZHAOXIN_FCR57 0x00001257 + +#define ACE_PRESENT (1 << 6) +#define ACE_ENABLED (1 << 7) +#define ACE_FCR (1 << 7) /* MSR_ZHAOXIN_FCR */ + +#define RNG_PRESENT (1 << 2) +#define RNG_ENABLED (1 << 3) +#define RNG_ENABLE (1 << 8) /* MSR_ZHAOXIN_RNG */ + +#define X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW 0x00200000 +#define X86_VMX_FEATURE_PROC_CTLS_VNMI 0x00400000 +#define X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS 0x80000000 +#define X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC 0x00000001 +#define X86_VMX_FEATURE_PROC_CTLS2_EPT 0x00000002 +#define X86_VMX_FEATURE_PROC_CTLS2_VPID 0x00000020 + +static void init_zhaoxin_cap(struct cpuinfo_x86 *c) +{ + u32 lo, hi; + + /* Test for Extended Feature Flags presence */ + if (cpuid_eax(0xC0000000) >= 0xC0000001) { + u32 tmp = cpuid_edx(0xC0000001); + + /* Enable ACE unit, if present and disabled */ + if ((tmp & (ACE_PRESENT | ACE_ENABLED)) == ACE_PRESENT) { + rdmsr(MSR_ZHAOXIN_FCR57, lo, hi); + /* Enable ACE unit */ + lo |= ACE_FCR; + wrmsr(MSR_ZHAOXIN_FCR57, lo, hi); + pr_info("CPU: Enabled ACE h/w crypto\n"); + } + + /* Enable RNG unit, if present and disabled */ + if ((tmp & (RNG_PRESENT | RNG_ENABLED)) == RNG_PRESENT) { + rdmsr(MSR_ZHAOXIN_FCR57, lo, hi); + /* Enable RNG unit */ + lo |= RNG_ENABLE; + wrmsr(MSR_ZHAOXIN_FCR57, lo, hi); + pr_info("CPU: Enabled h/w RNG\n"); + } + + /* + * Store Extended Feature Flags as word 5 of the CPU + * capability bit array + */ + c->x86_capability[CPUID_C000_0001_EDX] = cpuid_edx(0xC0000001); + } + + if (c->x86 >= 0x6) + set_cpu_cap(c, X86_FEATURE_REP_GOOD); + + cpu_detect_cache_sizes(c); +} + +static void early_init_zhaoxin(struct cpuinfo_x86 *c) +{ + if (c->x86 >= 0x6) + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); +#ifdef CONFIG_X86_64 + set_cpu_cap(c, X86_FEATURE_SYSENTER32); +#endif + if (c->x86_power & (1 << 8)) { + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); + set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); + } + + if (c->cpuid_level >= 0x00000001) { + u32 eax, ebx, ecx, edx; + + cpuid(0x00000001, &eax, &ebx, &ecx, &edx); + /* + * If HTT (EDX[28]) is set EBX[16:23] contain the number of + * apicids which are reserved per package. Store the resulting + * shift value for the package management code. + */ + if (edx & (1U << 28)) + c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff); + } + +} + +static void zhaoxin_detect_vmx_virtcap(struct cpuinfo_x86 *c) +{ + u32 vmx_msr_low, vmx_msr_high, msr_ctl, msr_ctl2; + + rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high); + msr_ctl = vmx_msr_high | vmx_msr_low; + + if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW) + set_cpu_cap(c, X86_FEATURE_TPR_SHADOW); + if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_VNMI) + set_cpu_cap(c, X86_FEATURE_VNMI); + if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS) { + rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, + vmx_msr_low, vmx_msr_high); + msr_ctl2 = vmx_msr_high | vmx_msr_low; + if ((msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC) && + (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW)) + set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY); + if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_EPT) + set_cpu_cap(c, X86_FEATURE_EPT); + if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VPID) + set_cpu_cap(c, X86_FEATURE_VPID); + } +} + +static void init_zhaoxin(struct cpuinfo_x86 *c) +{ + early_init_zhaoxin(c); + init_intel_cacheinfo(c); + detect_num_cpu_cores(c); +#ifdef CONFIG_X86_32 + detect_ht(c); +#endif + + if (c->cpuid_level > 9) { + unsigned int eax = cpuid_eax(10); + + /* + * Check for version and the number of counters + * Version(eax[7:0]) can't be 0; + * Counters(eax[15:8]) should be greater than 1; + */ + if ((eax & 0xff) && (((eax >> 8) & 0xff) > 1)) + set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); + } + + if (c->x86 >= 0x6) + init_zhaoxin_cap(c); +#ifdef CONFIG_X86_64 + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); +#endif + + if (cpu_has(c, X86_FEATURE_VMX)) + zhaoxin_detect_vmx_virtcap(c); +} + +#ifdef CONFIG_X86_32 +static unsigned int +zhaoxin_size_cache(struct cpuinfo_x86 *c, unsigned int size) +{ + return size; +} +#endif + +static const struct cpu_dev zhaoxin_cpu_dev = { + .c_vendor = "zhaoxin", + .c_ident = { " Shanghai " }, + .c_early_init = early_init_zhaoxin, + .c_init = init_zhaoxin, +#ifdef CONFIG_X86_32 + .legacy_cache_size = zhaoxin_size_cache, +#endif + .c_x86_vendor = X86_VENDOR_ZHAOXIN, +}; + +cpu_dev_register(zhaoxin_cpu_dev); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 649fbc3fcf9f..12c70840980e 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -43,18 +43,6 @@ static DEFINE_PER_CPU(bool, in_kernel_fpu); */ DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); -static void kernel_fpu_disable(void) -{ - WARN_ON_FPU(this_cpu_read(in_kernel_fpu)); - this_cpu_write(in_kernel_fpu, true); -} - -static void kernel_fpu_enable(void) -{ - WARN_ON_FPU(!this_cpu_read(in_kernel_fpu)); - this_cpu_write(in_kernel_fpu, false); -} - static bool kernel_fpu_disabled(void) { return this_cpu_read(in_kernel_fpu); @@ -94,42 +82,33 @@ bool irq_fpu_usable(void) } EXPORT_SYMBOL(irq_fpu_usable); -static void __kernel_fpu_begin(void) +void kernel_fpu_begin(void) { - struct fpu *fpu = ¤t->thread.fpu; + preempt_disable(); WARN_ON_FPU(!irq_fpu_usable()); + WARN_ON_FPU(this_cpu_read(in_kernel_fpu)); - kernel_fpu_disable(); + this_cpu_write(in_kernel_fpu, true); - if (!(current->flags & PF_KTHREAD)) { - if (!test_thread_flag(TIF_NEED_FPU_LOAD)) { - set_thread_flag(TIF_NEED_FPU_LOAD); - /* - * Ignore return value -- we don't care if reg state - * is clobbered. - */ - copy_fpregs_to_fpstate(fpu); - } + if (!(current->flags & PF_KTHREAD) && + !test_thread_flag(TIF_NEED_FPU_LOAD)) { + set_thread_flag(TIF_NEED_FPU_LOAD); + /* + * Ignore return value -- we don't care if reg state + * is clobbered. + */ + copy_fpregs_to_fpstate(¤t->thread.fpu); } __cpu_invalidate_fpregs_state(); } - -static void __kernel_fpu_end(void) -{ - kernel_fpu_enable(); -} - -void kernel_fpu_begin(void) -{ - preempt_disable(); - __kernel_fpu_begin(); -} EXPORT_SYMBOL_GPL(kernel_fpu_begin); void kernel_fpu_end(void) { - __kernel_fpu_end(); + WARN_ON_FPU(!this_cpu_read(in_kernel_fpu)); + + this_cpu_write(in_kernel_fpu, false); preempt_enable(); } EXPORT_SYMBOL_GPL(kernel_fpu_end); @@ -155,7 +134,6 @@ void fpu__save(struct fpu *fpu) trace_x86_fpu_after_save(fpu); fpregs_unlock(); } -EXPORT_SYMBOL_GPL(fpu__save); /* * Legacy x87 fpstate state init: diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index ef0030e3fe6b..6ce7e0a23268 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -204,12 +204,6 @@ static void __init fpu__init_system_xstate_size_legacy(void) */ if (!boot_cpu_has(X86_FEATURE_FPU)) { - /* - * Disable xsave as we do not support it if i387 - * emulation is enabled. - */ - setup_clear_cpu_cap(X86_FEATURE_XSAVE); - setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); fpu_kernel_xstate_size = sizeof(struct swregs_state); } else { if (boot_cpu_has(X86_FEATURE_FXSR)) @@ -252,17 +246,20 @@ static void __init fpu__init_parse_early_param(void) char *argptr = arg; int bit; +#ifdef CONFIG_X86_32 if (cmdline_find_option_bool(boot_command_line, "no387")) +#ifdef CONFIG_MATH_EMULATION setup_clear_cpu_cap(X86_FEATURE_FPU); +#else + pr_err("Option 'no387' required CONFIG_MATH_EMULATION enabled.\n"); +#endif - if (cmdline_find_option_bool(boot_command_line, "nofxsr")) { + if (cmdline_find_option_bool(boot_command_line, "nofxsr")) setup_clear_cpu_cap(X86_FEATURE_FXSR); - setup_clear_cpu_cap(X86_FEATURE_FXSR_OPT); - setup_clear_cpu_cap(X86_FEATURE_XMM); - } +#endif if (cmdline_find_option_bool(boot_command_line, "noxsave")) - fpu__xstate_clear_all_cpu_caps(); + setup_clear_cpu_cap(X86_FEATURE_XSAVE); if (cmdline_find_option_bool(boot_command_line, "noxsaveopt")) setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 3c36dd1784db..7b4c52aa929f 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -68,15 +68,6 @@ static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8]; unsigned int fpu_user_xstate_size; /* - * Clear all of the X86_FEATURE_* bits that are unavailable - * when the CPU has no XSAVE support. - */ -void fpu__xstate_clear_all_cpu_caps(void) -{ - setup_clear_cpu_cap(X86_FEATURE_XSAVE); -} - -/* * Return whether the system supports a given xfeature. * * Also return the name of the (most advanced) feature that the caller requested: @@ -709,7 +700,7 @@ static void fpu__init_disable_system_xstate(void) { xfeatures_mask = 0; cr4_clear_bits(X86_CR4_OSXSAVE); - fpu__xstate_clear_all_cpu_caps(); + setup_clear_cpu_cap(X86_FEATURE_XSAVE); } /* diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 0927bb158ffc..76228525acd0 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -22,6 +22,7 @@ #include <linux/init.h> #include <linux/list.h> #include <linux/module.h> +#include <linux/memory.h> #include <trace/syscall.h> @@ -34,16 +35,25 @@ #ifdef CONFIG_DYNAMIC_FTRACE int ftrace_arch_code_modify_prepare(void) + __acquires(&text_mutex) { + /* + * Need to grab text_mutex to prevent a race from module loading + * and live kernel patching from changing the text permissions while + * ftrace has it set to "read/write". + */ + mutex_lock(&text_mutex); set_kernel_text_rw(); set_all_modules_text_rw(); return 0; } int ftrace_arch_code_modify_post_process(void) + __releases(&text_mutex) { set_all_modules_text_ro(); set_kernel_text_ro(); + mutex_unlock(&text_mutex); return 0; } diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 16b1cbd3a61e..29ffa495bd1c 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -184,24 +184,25 @@ unsigned long __head __startup_64(unsigned long physaddr, pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask(); if (la57) { - p4d = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr); + p4d = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++], + physaddr); i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD; pgd[i + 0] = (pgdval_t)p4d + pgtable_flags; pgd[i + 1] = (pgdval_t)p4d + pgtable_flags; - i = (physaddr >> P4D_SHIFT) % PTRS_PER_P4D; - p4d[i + 0] = (pgdval_t)pud + pgtable_flags; - p4d[i + 1] = (pgdval_t)pud + pgtable_flags; + i = physaddr >> P4D_SHIFT; + p4d[(i + 0) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags; + p4d[(i + 1) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags; } else { i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD; pgd[i + 0] = (pgdval_t)pud + pgtable_flags; pgd[i + 1] = (pgdval_t)pud + pgtable_flags; } - i = (physaddr >> PUD_SHIFT) % PTRS_PER_PUD; - pud[i + 0] = (pudval_t)pmd + pgtable_flags; - pud[i + 1] = (pudval_t)pmd + pgtable_flags; + i = physaddr >> PUD_SHIFT; + pud[(i + 0) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags; + pud[(i + 1) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags; pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL; /* Filter out unsupported __PAGE_KERNEL_* bits: */ @@ -211,8 +212,9 @@ unsigned long __head __startup_64(unsigned long physaddr, pmd_entry += physaddr; for (i = 0; i < DIV_ROUND_UP(_end - _text, PMD_SIZE); i++) { - int idx = i + (physaddr >> PMD_SHIFT) % PTRS_PER_PMD; - pmd[idx] = pmd_entry + i * PMD_SIZE; + int idx = i + (physaddr >> PMD_SHIFT); + + pmd[idx % PTRS_PER_PMD] = pmd_entry + i * PMD_SIZE; } /* diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index a0573f2e7763..c43e96a938d0 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -1,32 +1,44 @@ // SPDX-License-Identifier: GPL-2.0-only -#include <linux/clocksource.h> #include <linux/clockchips.h> #include <linux/interrupt.h> -#include <linux/irq.h> #include <linux/export.h> #include <linux/delay.h> -#include <linux/errno.h> -#include <linux/i8253.h> -#include <linux/slab.h> #include <linux/hpet.h> -#include <linux/init.h> #include <linux/cpu.h> -#include <linux/pm.h> -#include <linux/io.h> +#include <linux/irq.h> -#include <asm/cpufeature.h> -#include <asm/irqdomain.h> -#include <asm/fixmap.h> #include <asm/hpet.h> #include <asm/time.h> -#define HPET_MASK CLOCKSOURCE_MASK(32) +#undef pr_fmt +#define pr_fmt(fmt) "hpet: " fmt -#define HPET_DEV_USED_BIT 2 -#define HPET_DEV_USED (1 << HPET_DEV_USED_BIT) -#define HPET_DEV_VALID 0x8 -#define HPET_DEV_FSB_CAP 0x1000 -#define HPET_DEV_PERI_CAP 0x2000 +enum hpet_mode { + HPET_MODE_UNUSED, + HPET_MODE_LEGACY, + HPET_MODE_CLOCKEVT, + HPET_MODE_DEVICE, +}; + +struct hpet_channel { + struct clock_event_device evt; + unsigned int num; + unsigned int cpu; + unsigned int irq; + unsigned int in_use; + enum hpet_mode mode; + unsigned int boot_cfg; + char name[10]; +}; + +struct hpet_base { + unsigned int nr_channels; + unsigned int nr_clockevents; + unsigned int boot_cfg; + struct hpet_channel *channels; +}; + +#define HPET_MASK CLOCKSOURCE_MASK(32) #define HPET_MIN_CYCLES 128 #define HPET_MIN_PROG_DELTA (HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 1)) @@ -39,22 +51,25 @@ u8 hpet_blockid; /* OS timer block num */ bool hpet_msi_disable; #ifdef CONFIG_PCI_MSI -static unsigned int hpet_num_timers; +static DEFINE_PER_CPU(struct hpet_channel *, cpu_hpet_channel); +static struct irq_domain *hpet_domain; #endif + static void __iomem *hpet_virt_address; -struct hpet_dev { - struct clock_event_device evt; - unsigned int num; - int cpu; - unsigned int irq; - unsigned int flags; - char name[10]; -}; +static struct hpet_base hpet_base; + +static bool hpet_legacy_int_enabled; +static unsigned long hpet_freq; -static inline struct hpet_dev *EVT_TO_HPET_DEV(struct clock_event_device *evtdev) +bool boot_hpet_disable; +bool hpet_force_user; +static bool hpet_verbose; + +static inline +struct hpet_channel *clockevent_to_channel(struct clock_event_device *evt) { - return container_of(evtdev, struct hpet_dev, evt); + return container_of(evt, struct hpet_channel, evt); } inline unsigned int hpet_readl(unsigned int a) @@ -67,10 +82,6 @@ static inline void hpet_writel(unsigned int d, unsigned int a) writel(d, hpet_virt_address + a); } -#ifdef CONFIG_X86_64 -#include <asm/pgtable.h> -#endif - static inline void hpet_set_mapping(void) { hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE); @@ -85,10 +96,6 @@ static inline void hpet_clear_mapping(void) /* * HPET command line enable / disable */ -bool boot_hpet_disable; -bool hpet_force_user; -static bool hpet_verbose; - static int __init hpet_setup(char *str) { while (str) { @@ -120,13 +127,8 @@ static inline int is_hpet_capable(void) return !boot_hpet_disable && hpet_address; } -/* - * HPET timer interrupt enable / disable - */ -static bool hpet_legacy_int_enabled; - /** - * is_hpet_enabled - check whether the hpet timer interrupt is enabled + * is_hpet_enabled - Check whether the legacy HPET timer interrupt is enabled */ int is_hpet_enabled(void) { @@ -136,32 +138,36 @@ EXPORT_SYMBOL_GPL(is_hpet_enabled); static void _hpet_print_config(const char *function, int line) { - u32 i, timers, l, h; - printk(KERN_INFO "hpet: %s(%d):\n", function, line); - l = hpet_readl(HPET_ID); - h = hpet_readl(HPET_PERIOD); - timers = ((l & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1; - printk(KERN_INFO "hpet: ID: 0x%x, PERIOD: 0x%x\n", l, h); - l = hpet_readl(HPET_CFG); - h = hpet_readl(HPET_STATUS); - printk(KERN_INFO "hpet: CFG: 0x%x, STATUS: 0x%x\n", l, h); + u32 i, id, period, cfg, status, channels, l, h; + + pr_info("%s(%d):\n", function, line); + + id = hpet_readl(HPET_ID); + period = hpet_readl(HPET_PERIOD); + pr_info("ID: 0x%x, PERIOD: 0x%x\n", id, period); + + cfg = hpet_readl(HPET_CFG); + status = hpet_readl(HPET_STATUS); + pr_info("CFG: 0x%x, STATUS: 0x%x\n", cfg, status); + l = hpet_readl(HPET_COUNTER); h = hpet_readl(HPET_COUNTER+4); - printk(KERN_INFO "hpet: COUNTER_l: 0x%x, COUNTER_h: 0x%x\n", l, h); + pr_info("COUNTER_l: 0x%x, COUNTER_h: 0x%x\n", l, h); + + channels = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1; - for (i = 0; i < timers; i++) { + for (i = 0; i < channels; i++) { l = hpet_readl(HPET_Tn_CFG(i)); h = hpet_readl(HPET_Tn_CFG(i)+4); - printk(KERN_INFO "hpet: T%d: CFG_l: 0x%x, CFG_h: 0x%x\n", - i, l, h); + pr_info("T%d: CFG_l: 0x%x, CFG_h: 0x%x\n", i, l, h); + l = hpet_readl(HPET_Tn_CMP(i)); h = hpet_readl(HPET_Tn_CMP(i)+4); - printk(KERN_INFO "hpet: T%d: CMP_l: 0x%x, CMP_h: 0x%x\n", - i, l, h); + pr_info("T%d: CMP_l: 0x%x, CMP_h: 0x%x\n", i, l, h); + l = hpet_readl(HPET_Tn_ROUTE(i)); h = hpet_readl(HPET_Tn_ROUTE(i)+4); - printk(KERN_INFO "hpet: T%d ROUTE_l: 0x%x, ROUTE_h: 0x%x\n", - i, l, h); + pr_info("T%d ROUTE_l: 0x%x, ROUTE_h: 0x%x\n", i, l, h); } } @@ -172,31 +178,20 @@ do { \ } while (0) /* - * When the hpet driver (/dev/hpet) is enabled, we need to reserve + * When the HPET driver (/dev/hpet) is enabled, we need to reserve * timer 0 and timer 1 in case of RTC emulation. */ #ifdef CONFIG_HPET -static void hpet_reserve_msi_timers(struct hpet_data *hd); - -static void hpet_reserve_platform_timers(unsigned int id) +static void __init hpet_reserve_platform_timers(void) { - struct hpet __iomem *hpet = hpet_virt_address; - struct hpet_timer __iomem *timer = &hpet->hpet_timers[2]; - unsigned int nrtimers, i; struct hpet_data hd; - - nrtimers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1; + unsigned int i; memset(&hd, 0, sizeof(hd)); hd.hd_phys_address = hpet_address; - hd.hd_address = hpet; - hd.hd_nirqs = nrtimers; - hpet_reserve_timer(&hd, 0); - -#ifdef CONFIG_HPET_EMULATE_RTC - hpet_reserve_timer(&hd, 1); -#endif + hd.hd_address = hpet_virt_address; + hd.hd_nirqs = hpet_base.nr_channels; /* * NOTE that hd_irq[] reflects IOAPIC input pins (LEGACY_8254 @@ -206,30 +201,52 @@ static void hpet_reserve_platform_timers(unsigned int id) hd.hd_irq[0] = HPET_LEGACY_8254; hd.hd_irq[1] = HPET_LEGACY_RTC; - for (i = 2; i < nrtimers; timer++, i++) { - hd.hd_irq[i] = (readl(&timer->hpet_config) & - Tn_INT_ROUTE_CNF_MASK) >> Tn_INT_ROUTE_CNF_SHIFT; - } + for (i = 0; i < hpet_base.nr_channels; i++) { + struct hpet_channel *hc = hpet_base.channels + i; + + if (i >= 2) + hd.hd_irq[i] = hc->irq; - hpet_reserve_msi_timers(&hd); + switch (hc->mode) { + case HPET_MODE_UNUSED: + case HPET_MODE_DEVICE: + hc->mode = HPET_MODE_DEVICE; + break; + case HPET_MODE_CLOCKEVT: + case HPET_MODE_LEGACY: + hpet_reserve_timer(&hd, hc->num); + break; + } + } hpet_alloc(&hd); +} +static void __init hpet_select_device_channel(void) +{ + int i; + + for (i = 0; i < hpet_base.nr_channels; i++) { + struct hpet_channel *hc = hpet_base.channels + i; + + /* Associate the first unused channel to /dev/hpet */ + if (hc->mode == HPET_MODE_UNUSED) { + hc->mode = HPET_MODE_DEVICE; + return; + } + } } + #else -static void hpet_reserve_platform_timers(unsigned int id) { } +static inline void hpet_reserve_platform_timers(void) { } +static inline void hpet_select_device_channel(void) {} #endif -/* - * Common hpet info - */ -static unsigned long hpet_freq; - -static struct clock_event_device hpet_clockevent; - +/* Common HPET functions */ static void hpet_stop_counter(void) { u32 cfg = hpet_readl(HPET_CFG); + cfg &= ~HPET_CFG_ENABLE; hpet_writel(cfg, HPET_CFG); } @@ -243,6 +260,7 @@ static void hpet_reset_counter(void) static void hpet_start_counter(void) { unsigned int cfg = hpet_readl(HPET_CFG); + cfg |= HPET_CFG_ENABLE; hpet_writel(cfg, HPET_CFG); } @@ -274,24 +292,9 @@ static void hpet_enable_legacy_int(void) hpet_legacy_int_enabled = true; } -static void hpet_legacy_clockevent_register(void) -{ - /* Start HPET legacy interrupts */ - hpet_enable_legacy_int(); - - /* - * Start hpet with the boot cpu mask and make it - * global after the IO_APIC has been initialized. - */ - hpet_clockevent.cpumask = cpumask_of(boot_cpu_data.cpu_index); - clockevents_config_and_register(&hpet_clockevent, hpet_freq, - HPET_MIN_PROG_DELTA, 0x7FFFFFFF); - global_clock_event = &hpet_clockevent; - printk(KERN_DEBUG "hpet clockevent registered\n"); -} - -static int hpet_set_periodic(struct clock_event_device *evt, int timer) +static int hpet_clkevt_set_state_periodic(struct clock_event_device *evt) { + unsigned int channel = clockevent_to_channel(evt)->num; unsigned int cfg, cmp, now; uint64_t delta; @@ -300,11 +303,11 @@ static int hpet_set_periodic(struct clock_event_device *evt, int timer) delta >>= evt->shift; now = hpet_readl(HPET_COUNTER); cmp = now + (unsigned int)delta; - cfg = hpet_readl(HPET_Tn_CFG(timer)); + cfg = hpet_readl(HPET_Tn_CFG(channel)); cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | HPET_TN_32BIT; - hpet_writel(cfg, HPET_Tn_CFG(timer)); - hpet_writel(cmp, HPET_Tn_CMP(timer)); + hpet_writel(cfg, HPET_Tn_CFG(channel)); + hpet_writel(cmp, HPET_Tn_CMP(channel)); udelay(1); /* * HPET on AMD 81xx needs a second write (with HPET_TN_SETVAL @@ -313,52 +316,55 @@ static int hpet_set_periodic(struct clock_event_device *evt, int timer) * (See AMD-8111 HyperTransport I/O Hub Data Sheet, * Publication # 24674) */ - hpet_writel((unsigned int)delta, HPET_Tn_CMP(timer)); + hpet_writel((unsigned int)delta, HPET_Tn_CMP(channel)); hpet_start_counter(); hpet_print_config(); return 0; } -static int hpet_set_oneshot(struct clock_event_device *evt, int timer) +static int hpet_clkevt_set_state_oneshot(struct clock_event_device *evt) { + unsigned int channel = clockevent_to_channel(evt)->num; unsigned int cfg; - cfg = hpet_readl(HPET_Tn_CFG(timer)); + cfg = hpet_readl(HPET_Tn_CFG(channel)); cfg &= ~HPET_TN_PERIODIC; cfg |= HPET_TN_ENABLE | HPET_TN_32BIT; - hpet_writel(cfg, HPET_Tn_CFG(timer)); + hpet_writel(cfg, HPET_Tn_CFG(channel)); return 0; } -static int hpet_shutdown(struct clock_event_device *evt, int timer) +static int hpet_clkevt_set_state_shutdown(struct clock_event_device *evt) { + unsigned int channel = clockevent_to_channel(evt)->num; unsigned int cfg; - cfg = hpet_readl(HPET_Tn_CFG(timer)); + cfg = hpet_readl(HPET_Tn_CFG(channel)); cfg &= ~HPET_TN_ENABLE; - hpet_writel(cfg, HPET_Tn_CFG(timer)); + hpet_writel(cfg, HPET_Tn_CFG(channel)); return 0; } -static int hpet_resume(struct clock_event_device *evt) +static int hpet_clkevt_legacy_resume(struct clock_event_device *evt) { hpet_enable_legacy_int(); hpet_print_config(); return 0; } -static int hpet_next_event(unsigned long delta, - struct clock_event_device *evt, int timer) +static int +hpet_clkevt_set_next_event(unsigned long delta, struct clock_event_device *evt) { + unsigned int channel = clockevent_to_channel(evt)->num; u32 cnt; s32 res; cnt = hpet_readl(HPET_COUNTER); cnt += (u32) delta; - hpet_writel(cnt, HPET_Tn_CMP(timer)); + hpet_writel(cnt, HPET_Tn_CMP(channel)); /* * HPETs are a complete disaster. The compare register is @@ -387,360 +393,250 @@ static int hpet_next_event(unsigned long delta, return res < HPET_MIN_CYCLES ? -ETIME : 0; } -static int hpet_legacy_shutdown(struct clock_event_device *evt) +static void hpet_init_clockevent(struct hpet_channel *hc, unsigned int rating) { - return hpet_shutdown(evt, 0); -} + struct clock_event_device *evt = &hc->evt; -static int hpet_legacy_set_oneshot(struct clock_event_device *evt) -{ - return hpet_set_oneshot(evt, 0); -} + evt->rating = rating; + evt->irq = hc->irq; + evt->name = hc->name; + evt->cpumask = cpumask_of(hc->cpu); + evt->set_state_oneshot = hpet_clkevt_set_state_oneshot; + evt->set_next_event = hpet_clkevt_set_next_event; + evt->set_state_shutdown = hpet_clkevt_set_state_shutdown; -static int hpet_legacy_set_periodic(struct clock_event_device *evt) -{ - return hpet_set_periodic(evt, 0); + evt->features = CLOCK_EVT_FEAT_ONESHOT; + if (hc->boot_cfg & HPET_TN_PERIODIC) { + evt->features |= CLOCK_EVT_FEAT_PERIODIC; + evt->set_state_periodic = hpet_clkevt_set_state_periodic; + } } -static int hpet_legacy_resume(struct clock_event_device *evt) +static void __init hpet_legacy_clockevent_register(struct hpet_channel *hc) { - return hpet_resume(evt); -} + /* + * Start HPET with the boot CPU's cpumask and make it global after + * the IO_APIC has been initialized. + */ + hc->cpu = boot_cpu_data.cpu_index; + strncpy(hc->name, "hpet", sizeof(hc->name)); + hpet_init_clockevent(hc, 50); -static int hpet_legacy_next_event(unsigned long delta, - struct clock_event_device *evt) -{ - return hpet_next_event(delta, evt, 0); -} + hc->evt.tick_resume = hpet_clkevt_legacy_resume; -/* - * The hpet clock event device - */ -static struct clock_event_device hpet_clockevent = { - .name = "hpet", - .features = CLOCK_EVT_FEAT_PERIODIC | - CLOCK_EVT_FEAT_ONESHOT, - .set_state_periodic = hpet_legacy_set_periodic, - .set_state_oneshot = hpet_legacy_set_oneshot, - .set_state_shutdown = hpet_legacy_shutdown, - .tick_resume = hpet_legacy_resume, - .set_next_event = hpet_legacy_next_event, - .irq = 0, - .rating = 50, -}; + /* + * Legacy horrors and sins from the past. HPET used periodic mode + * unconditionally forever on the legacy channel 0. Removing the + * below hack and using the conditional in hpet_init_clockevent() + * makes at least Qemu and one hardware machine fail to boot. + * There are two issues which cause the boot failure: + * + * #1 After the timer delivery test in IOAPIC and the IOAPIC setup + * the next interrupt is not delivered despite the HPET channel + * being programmed correctly. Reprogramming the HPET after + * switching to IOAPIC makes it work again. After fixing this, + * the next issue surfaces: + * + * #2 Due to the unconditional periodic mode availability the Local + * APIC timer calibration can hijack the global clockevents + * event handler without causing damage. Using oneshot at this + * stage makes if hang because the HPET does not get + * reprogrammed due to the handler hijacking. Duh, stupid me! + * + * Both issues require major surgery and especially the kick HPET + * again after enabling IOAPIC results in really nasty hackery. + * This 'assume periodic works' magic has survived since HPET + * support got added, so it's questionable whether this should be + * fixed. Both Qemu and the failing hardware machine support + * periodic mode despite the fact that both don't advertise it in + * the configuration register and both need that extra kick after + * switching to IOAPIC. Seems to be a feature... + */ + hc->evt.features |= CLOCK_EVT_FEAT_PERIODIC; + hc->evt.set_state_periodic = hpet_clkevt_set_state_periodic; + + /* Start HPET legacy interrupts */ + hpet_enable_legacy_int(); + + clockevents_config_and_register(&hc->evt, hpet_freq, + HPET_MIN_PROG_DELTA, 0x7FFFFFFF); + global_clock_event = &hc->evt; + pr_debug("Clockevent registered\n"); +} /* * HPET MSI Support */ #ifdef CONFIG_PCI_MSI -static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev); -static struct hpet_dev *hpet_devs; -static struct irq_domain *hpet_domain; - void hpet_msi_unmask(struct irq_data *data) { - struct hpet_dev *hdev = irq_data_get_irq_handler_data(data); + struct hpet_channel *hc = irq_data_get_irq_handler_data(data); unsigned int cfg; - /* unmask it */ - cfg = hpet_readl(HPET_Tn_CFG(hdev->num)); + cfg = hpet_readl(HPET_Tn_CFG(hc->num)); cfg |= HPET_TN_ENABLE | HPET_TN_FSB; - hpet_writel(cfg, HPET_Tn_CFG(hdev->num)); + hpet_writel(cfg, HPET_Tn_CFG(hc->num)); } void hpet_msi_mask(struct irq_data *data) { - struct hpet_dev *hdev = irq_data_get_irq_handler_data(data); + struct hpet_channel *hc = irq_data_get_irq_handler_data(data); unsigned int cfg; - /* mask it */ - cfg = hpet_readl(HPET_Tn_CFG(hdev->num)); + cfg = hpet_readl(HPET_Tn_CFG(hc->num)); cfg &= ~(HPET_TN_ENABLE | HPET_TN_FSB); - hpet_writel(cfg, HPET_Tn_CFG(hdev->num)); -} - -void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg) -{ - hpet_writel(msg->data, HPET_Tn_ROUTE(hdev->num)); - hpet_writel(msg->address_lo, HPET_Tn_ROUTE(hdev->num) + 4); + hpet_writel(cfg, HPET_Tn_CFG(hc->num)); } -void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg) +void hpet_msi_write(struct hpet_channel *hc, struct msi_msg *msg) { - msg->data = hpet_readl(HPET_Tn_ROUTE(hdev->num)); - msg->address_lo = hpet_readl(HPET_Tn_ROUTE(hdev->num) + 4); - msg->address_hi = 0; + hpet_writel(msg->data, HPET_Tn_ROUTE(hc->num)); + hpet_writel(msg->address_lo, HPET_Tn_ROUTE(hc->num) + 4); } -static int hpet_msi_shutdown(struct clock_event_device *evt) +static int hpet_clkevt_msi_resume(struct clock_event_device *evt) { - struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); - - return hpet_shutdown(evt, hdev->num); -} - -static int hpet_msi_set_oneshot(struct clock_event_device *evt) -{ - struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); - - return hpet_set_oneshot(evt, hdev->num); -} - -static int hpet_msi_set_periodic(struct clock_event_device *evt) -{ - struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); - - return hpet_set_periodic(evt, hdev->num); -} - -static int hpet_msi_resume(struct clock_event_device *evt) -{ - struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); - struct irq_data *data = irq_get_irq_data(hdev->irq); + struct hpet_channel *hc = clockevent_to_channel(evt); + struct irq_data *data = irq_get_irq_data(hc->irq); struct msi_msg msg; /* Restore the MSI msg and unmask the interrupt */ irq_chip_compose_msi_msg(data, &msg); - hpet_msi_write(hdev, &msg); + hpet_msi_write(hc, &msg); hpet_msi_unmask(data); return 0; } -static int hpet_msi_next_event(unsigned long delta, - struct clock_event_device *evt) -{ - struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); - return hpet_next_event(delta, evt, hdev->num); -} - -static irqreturn_t hpet_interrupt_handler(int irq, void *data) +static irqreturn_t hpet_msi_interrupt_handler(int irq, void *data) { - struct hpet_dev *dev = (struct hpet_dev *)data; - struct clock_event_device *hevt = &dev->evt; + struct hpet_channel *hc = data; + struct clock_event_device *evt = &hc->evt; - if (!hevt->event_handler) { - printk(KERN_INFO "Spurious HPET timer interrupt on HPET timer %d\n", - dev->num); + if (!evt->event_handler) { + pr_info("Spurious interrupt HPET channel %d\n", hc->num); return IRQ_HANDLED; } - hevt->event_handler(hevt); + evt->event_handler(evt); return IRQ_HANDLED; } -static int hpet_setup_irq(struct hpet_dev *dev) +static int hpet_setup_msi_irq(struct hpet_channel *hc) { - - if (request_irq(dev->irq, hpet_interrupt_handler, + if (request_irq(hc->irq, hpet_msi_interrupt_handler, IRQF_TIMER | IRQF_NOBALANCING, - dev->name, dev)) + hc->name, hc)) return -1; - disable_irq(dev->irq); - irq_set_affinity(dev->irq, cpumask_of(dev->cpu)); - enable_irq(dev->irq); + disable_irq(hc->irq); + irq_set_affinity(hc->irq, cpumask_of(hc->cpu)); + enable_irq(hc->irq); - printk(KERN_DEBUG "hpet: %s irq %d for MSI\n", - dev->name, dev->irq); + pr_debug("%s irq %u for MSI\n", hc->name, hc->irq); return 0; } -/* This should be called in specific @cpu */ -static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu) +/* Invoked from the hotplug callback on @cpu */ +static void init_one_hpet_msi_clockevent(struct hpet_channel *hc, int cpu) { - struct clock_event_device *evt = &hdev->evt; - - WARN_ON(cpu != smp_processor_id()); - if (!(hdev->flags & HPET_DEV_VALID)) - return; - - hdev->cpu = cpu; - per_cpu(cpu_hpet_dev, cpu) = hdev; - evt->name = hdev->name; - hpet_setup_irq(hdev); - evt->irq = hdev->irq; + struct clock_event_device *evt = &hc->evt; - evt->rating = 110; - evt->features = CLOCK_EVT_FEAT_ONESHOT; - if (hdev->flags & HPET_DEV_PERI_CAP) { - evt->features |= CLOCK_EVT_FEAT_PERIODIC; - evt->set_state_periodic = hpet_msi_set_periodic; - } + hc->cpu = cpu; + per_cpu(cpu_hpet_channel, cpu) = hc; + hpet_setup_msi_irq(hc); - evt->set_state_shutdown = hpet_msi_shutdown; - evt->set_state_oneshot = hpet_msi_set_oneshot; - evt->tick_resume = hpet_msi_resume; - evt->set_next_event = hpet_msi_next_event; - evt->cpumask = cpumask_of(hdev->cpu); + hpet_init_clockevent(hc, 110); + evt->tick_resume = hpet_clkevt_msi_resume; clockevents_config_and_register(evt, hpet_freq, HPET_MIN_PROG_DELTA, 0x7FFFFFFF); } -#ifdef CONFIG_HPET -/* Reserve at least one timer for userspace (/dev/hpet) */ -#define RESERVE_TIMERS 1 -#else -#define RESERVE_TIMERS 0 -#endif - -static void hpet_msi_capability_lookup(unsigned int start_timer) +static struct hpet_channel *hpet_get_unused_clockevent(void) { - unsigned int id; - unsigned int num_timers; - unsigned int num_timers_used = 0; - int i, irq; - - if (hpet_msi_disable) - return; - - if (boot_cpu_has(X86_FEATURE_ARAT)) - return; - id = hpet_readl(HPET_ID); - - num_timers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT); - num_timers++; /* Value read out starts from 0 */ - hpet_print_config(); - - hpet_domain = hpet_create_irq_domain(hpet_blockid); - if (!hpet_domain) - return; - - hpet_devs = kcalloc(num_timers, sizeof(struct hpet_dev), GFP_KERNEL); - if (!hpet_devs) - return; - - hpet_num_timers = num_timers; - - for (i = start_timer; i < num_timers - RESERVE_TIMERS; i++) { - struct hpet_dev *hdev = &hpet_devs[num_timers_used]; - unsigned int cfg = hpet_readl(HPET_Tn_CFG(i)); - - /* Only consider HPET timer with MSI support */ - if (!(cfg & HPET_TN_FSB_CAP)) - continue; + int i; - hdev->flags = 0; - if (cfg & HPET_TN_PERIODIC_CAP) - hdev->flags |= HPET_DEV_PERI_CAP; - sprintf(hdev->name, "hpet%d", i); - hdev->num = i; + for (i = 0; i < hpet_base.nr_channels; i++) { + struct hpet_channel *hc = hpet_base.channels + i; - irq = hpet_assign_irq(hpet_domain, hdev, hdev->num); - if (irq <= 0) + if (hc->mode != HPET_MODE_CLOCKEVT || hc->in_use) continue; - - hdev->irq = irq; - hdev->flags |= HPET_DEV_FSB_CAP; - hdev->flags |= HPET_DEV_VALID; - num_timers_used++; - if (num_timers_used == num_possible_cpus()) - break; + hc->in_use = 1; + return hc; } - - printk(KERN_INFO "HPET: %d timers in total, %d timers will be used for per-cpu timer\n", - num_timers, num_timers_used); + return NULL; } -#ifdef CONFIG_HPET -static void hpet_reserve_msi_timers(struct hpet_data *hd) +static int hpet_cpuhp_online(unsigned int cpu) { - int i; - - if (!hpet_devs) - return; + struct hpet_channel *hc = hpet_get_unused_clockevent(); - for (i = 0; i < hpet_num_timers; i++) { - struct hpet_dev *hdev = &hpet_devs[i]; + if (hc) + init_one_hpet_msi_clockevent(hc, cpu); + return 0; +} - if (!(hdev->flags & HPET_DEV_VALID)) - continue; +static int hpet_cpuhp_dead(unsigned int cpu) +{ + struct hpet_channel *hc = per_cpu(cpu_hpet_channel, cpu); - hd->hd_irq[hdev->num] = hdev->irq; - hpet_reserve_timer(hd, hdev->num); - } + if (!hc) + return 0; + free_irq(hc->irq, hc); + hc->in_use = 0; + per_cpu(cpu_hpet_channel, cpu) = NULL; + return 0; } -#endif -static struct hpet_dev *hpet_get_unused_timer(void) +static void __init hpet_select_clockevents(void) { - int i; + unsigned int i; - if (!hpet_devs) - return NULL; + hpet_base.nr_clockevents = 0; - for (i = 0; i < hpet_num_timers; i++) { - struct hpet_dev *hdev = &hpet_devs[i]; + /* No point if MSI is disabled or CPU has an Always Runing APIC Timer */ + if (hpet_msi_disable || boot_cpu_has(X86_FEATURE_ARAT)) + return; - if (!(hdev->flags & HPET_DEV_VALID)) - continue; - if (test_and_set_bit(HPET_DEV_USED_BIT, - (unsigned long *)&hdev->flags)) - continue; - return hdev; - } - return NULL; -} + hpet_print_config(); -struct hpet_work_struct { - struct delayed_work work; - struct completion complete; -}; + hpet_domain = hpet_create_irq_domain(hpet_blockid); + if (!hpet_domain) + return; -static void hpet_work(struct work_struct *w) -{ - struct hpet_dev *hdev; - int cpu = smp_processor_id(); - struct hpet_work_struct *hpet_work; + for (i = 0; i < hpet_base.nr_channels; i++) { + struct hpet_channel *hc = hpet_base.channels + i; + int irq; - hpet_work = container_of(w, struct hpet_work_struct, work.work); + if (hc->mode != HPET_MODE_UNUSED) + continue; - hdev = hpet_get_unused_timer(); - if (hdev) - init_one_hpet_msi_clockevent(hdev, cpu); + /* Only consider HPET channel with MSI support */ + if (!(hc->boot_cfg & HPET_TN_FSB_CAP)) + continue; - complete(&hpet_work->complete); -} + sprintf(hc->name, "hpet%d", i); -static int hpet_cpuhp_online(unsigned int cpu) -{ - struct hpet_work_struct work; - - INIT_DELAYED_WORK_ONSTACK(&work.work, hpet_work); - init_completion(&work.complete); - /* FIXME: add schedule_work_on() */ - schedule_delayed_work_on(cpu, &work.work, 0); - wait_for_completion(&work.complete); - destroy_delayed_work_on_stack(&work.work); - return 0; -} + irq = hpet_assign_irq(hpet_domain, hc, hc->num); + if (irq <= 0) + continue; -static int hpet_cpuhp_dead(unsigned int cpu) -{ - struct hpet_dev *hdev = per_cpu(cpu_hpet_dev, cpu); + hc->irq = irq; + hc->mode = HPET_MODE_CLOCKEVT; - if (!hdev) - return 0; - free_irq(hdev->irq, hdev); - hdev->flags &= ~HPET_DEV_USED; - per_cpu(cpu_hpet_dev, cpu) = NULL; - return 0; -} -#else + if (++hpet_base.nr_clockevents == num_possible_cpus()) + break; + } -static void hpet_msi_capability_lookup(unsigned int start_timer) -{ - return; + pr_info("%d channels of %d reserved for per-cpu timers\n", + hpet_base.nr_channels, hpet_base.nr_clockevents); } -#ifdef CONFIG_HPET -static void hpet_reserve_msi_timers(struct hpet_data *hd) -{ - return; -} -#endif +#else + +static inline void hpet_select_clockevents(void) { } #define hpet_cpuhp_online NULL #define hpet_cpuhp_dead NULL @@ -754,10 +650,10 @@ static void hpet_reserve_msi_timers(struct hpet_data *hd) /* * Reading the HPET counter is a very slow operation. If a large number of * CPUs are trying to access the HPET counter simultaneously, it can cause - * massive delay and slow down system performance dramatically. This may + * massive delays and slow down system performance dramatically. This may * happen when HPET is the default clock source instead of TSC. For a * really large system with hundreds of CPUs, the slowdown may be so - * severe that it may actually crash the system because of a NMI watchdog + * severe, that it can actually crash the system because of a NMI watchdog * soft lockup, for example. * * If multiple CPUs are trying to access the HPET counter at the same time, @@ -766,10 +662,9 @@ static void hpet_reserve_msi_timers(struct hpet_data *hd) * * This special feature is only enabled on x86-64 systems. It is unlikely * that 32-bit x86 systems will have enough CPUs to require this feature - * with its associated locking overhead. And we also need 64-bit atomic - * read. + * with its associated locking overhead. We also need 64-bit atomic read. * - * The lock and the hpet value are stored together and can be read in a + * The lock and the HPET value are stored together and can be read in a * single atomic 64-bit read. It is explicitly assumed that arch_spinlock_t * is 32 bits in size. */ @@ -858,15 +753,40 @@ static struct clocksource clocksource_hpet = { .resume = hpet_resume_counter, }; -static int hpet_clocksource_register(void) +/* + * AMD SB700 based systems with spread spectrum enabled use a SMM based + * HPET emulation to provide proper frequency setting. + * + * On such systems the SMM code is initialized with the first HPET register + * access and takes some time to complete. During this time the config + * register reads 0xffffffff. We check for max 1000 loops whether the + * config register reads a non-0xffffffff value to make sure that the + * HPET is up and running before we proceed any further. + * + * A counting loop is safe, as the HPET access takes thousands of CPU cycles. + * + * On non-SB700 based machines this check is only done once and has no + * side effects. + */ +static bool __init hpet_cfg_working(void) { - u64 start, now; - u64 t1; + int i; + + for (i = 0; i < 1000; i++) { + if (hpet_readl(HPET_CFG) != 0xFFFFFFFF) + return true; + } + + pr_warn("Config register invalid. Disabling HPET\n"); + return false; +} + +static bool __init hpet_counting(void) +{ + u64 start, now, t1; - /* Start the counter */ hpet_restart_counter(); - /* Verify whether hpet counter works */ t1 = hpet_readl(HPET_COUNTER); start = rdtsc(); @@ -877,30 +797,24 @@ static int hpet_clocksource_register(void) * 1 GHz == 200us */ do { - rep_nop(); + if (t1 != hpet_readl(HPET_COUNTER)) + return true; now = rdtsc(); } while ((now - start) < 200000UL); - if (t1 == hpet_readl(HPET_COUNTER)) { - printk(KERN_WARNING - "HPET counter not counting. HPET disabled\n"); - return -ENODEV; - } - - clocksource_register_hz(&clocksource_hpet, (u32)hpet_freq); - return 0; + pr_warn("Counter not counting. HPET disabled\n"); + return false; } -static u32 *hpet_boot_cfg; - /** * hpet_enable - Try to setup the HPET timer. Returns 1 on success. */ int __init hpet_enable(void) { - u32 hpet_period, cfg, id; + u32 hpet_period, cfg, id, irq; + unsigned int i, channels; + struct hpet_channel *hc; u64 freq; - unsigned int i, last; if (!is_hpet_capable()) return 0; @@ -909,40 +823,22 @@ int __init hpet_enable(void) if (!hpet_virt_address) return 0; + /* Validate that the config register is working */ + if (!hpet_cfg_working()) + goto out_nohpet; + + /* Validate that the counter is counting */ + if (!hpet_counting()) + goto out_nohpet; + /* * Read the period and check for a sane value: */ hpet_period = hpet_readl(HPET_PERIOD); - - /* - * AMD SB700 based systems with spread spectrum enabled use a - * SMM based HPET emulation to provide proper frequency - * setting. The SMM code is initialized with the first HPET - * register access and takes some time to complete. During - * this time the config register reads 0xffffffff. We check - * for max. 1000 loops whether the config register reads a non - * 0xffffffff value to make sure that HPET is up and running - * before we go further. A counting loop is safe, as the HPET - * access takes thousands of CPU cycles. On non SB700 based - * machines this check is only done once and has no side - * effects. - */ - for (i = 0; hpet_readl(HPET_CFG) == 0xFFFFFFFF; i++) { - if (i == 1000) { - printk(KERN_WARNING - "HPET config register value = 0xFFFFFFFF. " - "Disabling HPET\n"); - goto out_nohpet; - } - } - if (hpet_period < HPET_MIN_PERIOD || hpet_period > HPET_MAX_PERIOD) goto out_nohpet; - /* - * The period is a femto seconds value. Convert it to a - * frequency. - */ + /* The period is a femtoseconds value. Convert it to a frequency. */ freq = FSEC_PER_SEC; do_div(freq, hpet_period); hpet_freq = freq; @@ -954,72 +850,90 @@ int __init hpet_enable(void) id = hpet_readl(HPET_ID); hpet_print_config(); - last = (id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT; + /* This is the HPET channel number which is zero based */ + channels = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1; -#ifdef CONFIG_HPET_EMULATE_RTC /* * The legacy routing mode needs at least two channels, tick timer * and the rtc emulation channel. */ - if (!last) + if (IS_ENABLED(CONFIG_HPET_EMULATE_RTC) && channels < 2) goto out_nohpet; -#endif + hc = kcalloc(channels, sizeof(*hc), GFP_KERNEL); + if (!hc) { + pr_warn("Disabling HPET.\n"); + goto out_nohpet; + } + hpet_base.channels = hc; + hpet_base.nr_channels = channels; + + /* Read, store and sanitize the global configuration */ cfg = hpet_readl(HPET_CFG); - hpet_boot_cfg = kmalloc_array(last + 2, sizeof(*hpet_boot_cfg), - GFP_KERNEL); - if (hpet_boot_cfg) - *hpet_boot_cfg = cfg; - else - pr_warn("HPET initial state will not be saved\n"); + hpet_base.boot_cfg = cfg; cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY); hpet_writel(cfg, HPET_CFG); if (cfg) - pr_warn("Unrecognized bits %#x set in global cfg\n", cfg); + pr_warn("Global config: Unknown bits %#x\n", cfg); + + /* Read, store and sanitize the per channel configuration */ + for (i = 0; i < channels; i++, hc++) { + hc->num = i; - for (i = 0; i <= last; ++i) { cfg = hpet_readl(HPET_Tn_CFG(i)); - if (hpet_boot_cfg) - hpet_boot_cfg[i + 1] = cfg; + hc->boot_cfg = cfg; + irq = (cfg & Tn_INT_ROUTE_CNF_MASK) >> Tn_INT_ROUTE_CNF_SHIFT; + hc->irq = irq; + cfg &= ~(HPET_TN_ENABLE | HPET_TN_LEVEL | HPET_TN_FSB); hpet_writel(cfg, HPET_Tn_CFG(i)); + cfg &= ~(HPET_TN_PERIODIC | HPET_TN_PERIODIC_CAP | HPET_TN_64BIT_CAP | HPET_TN_32BIT | HPET_TN_ROUTE | HPET_TN_FSB | HPET_TN_FSB_CAP); if (cfg) - pr_warn("Unrecognized bits %#x set in cfg#%u\n", - cfg, i); + pr_warn("Channel #%u config: Unknown bits %#x\n", i, cfg); } hpet_print_config(); - if (hpet_clocksource_register()) - goto out_nohpet; + clocksource_register_hz(&clocksource_hpet, (u32)hpet_freq); if (id & HPET_ID_LEGSUP) { - hpet_legacy_clockevent_register(); + hpet_legacy_clockevent_register(&hpet_base.channels[0]); + hpet_base.channels[0].mode = HPET_MODE_LEGACY; + if (IS_ENABLED(CONFIG_HPET_EMULATE_RTC)) + hpet_base.channels[1].mode = HPET_MODE_LEGACY; return 1; } return 0; out_nohpet: + kfree(hpet_base.channels); + hpet_base.channels = NULL; + hpet_base.nr_channels = 0; hpet_clear_mapping(); hpet_address = 0; return 0; } /* - * Needs to be late, as the reserve_timer code calls kalloc ! + * The late initialization runs after the PCI quirks have been invoked + * which might have detected a system on which the HPET can be enforced. + * + * Also, the MSI machinery is not working yet when the HPET is initialized + * early. * - * Not a problem on i386 as hpet_enable is called from late_time_init, - * but on x86_64 it is necessary ! + * If the HPET is enabled, then: + * + * 1) Reserve one channel for /dev/hpet if CONFIG_HPET=y + * 2) Reserve up to num_possible_cpus() channels as per CPU clockevents + * 3) Setup /dev/hpet if CONFIG_HPET=y + * 4) Register hotplug callbacks when clockevents are available */ static __init int hpet_late_init(void) { int ret; - if (boot_hpet_disable) - return -ENODEV; - if (!hpet_address) { if (!force_hpet_address) return -ENODEV; @@ -1031,21 +945,14 @@ static __init int hpet_late_init(void) if (!hpet_virt_address) return -ENODEV; - if (hpet_readl(HPET_ID) & HPET_ID_LEGSUP) - hpet_msi_capability_lookup(2); - else - hpet_msi_capability_lookup(0); - - hpet_reserve_platform_timers(hpet_readl(HPET_ID)); + hpet_select_device_channel(); + hpet_select_clockevents(); + hpet_reserve_platform_timers(); hpet_print_config(); - if (hpet_msi_disable) + if (!hpet_base.nr_clockevents) return 0; - if (boot_cpu_has(X86_FEATURE_ARAT)) - return 0; - - /* This notifier should be called after workqueue is ready */ ret = cpuhp_setup_state(CPUHP_AP_X86_HPET_ONLINE, "x86/hpet:online", hpet_cpuhp_online, NULL); if (ret) @@ -1064,47 +971,47 @@ fs_initcall(hpet_late_init); void hpet_disable(void) { - if (is_hpet_capable() && hpet_virt_address) { - unsigned int cfg = hpet_readl(HPET_CFG), id, last; - - if (hpet_boot_cfg) - cfg = *hpet_boot_cfg; - else if (hpet_legacy_int_enabled) { - cfg &= ~HPET_CFG_LEGACY; - hpet_legacy_int_enabled = false; - } - cfg &= ~HPET_CFG_ENABLE; - hpet_writel(cfg, HPET_CFG); + unsigned int i; + u32 cfg; - if (!hpet_boot_cfg) - return; + if (!is_hpet_capable() || !hpet_virt_address) + return; - id = hpet_readl(HPET_ID); - last = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT); + /* Restore boot configuration with the enable bit cleared */ + cfg = hpet_base.boot_cfg; + cfg &= ~HPET_CFG_ENABLE; + hpet_writel(cfg, HPET_CFG); - for (id = 0; id <= last; ++id) - hpet_writel(hpet_boot_cfg[id + 1], HPET_Tn_CFG(id)); + /* Restore the channel boot configuration */ + for (i = 0; i < hpet_base.nr_channels; i++) + hpet_writel(hpet_base.channels[i].boot_cfg, HPET_Tn_CFG(i)); - if (*hpet_boot_cfg & HPET_CFG_ENABLE) - hpet_writel(*hpet_boot_cfg, HPET_CFG); - } + /* If the HPET was enabled at boot time, reenable it */ + if (hpet_base.boot_cfg & HPET_CFG_ENABLE) + hpet_writel(hpet_base.boot_cfg, HPET_CFG); } #ifdef CONFIG_HPET_EMULATE_RTC -/* HPET in LegacyReplacement Mode eats up RTC interrupt line. When, HPET +/* + * HPET in LegacyReplacement mode eats up the RTC interrupt line. When HPET * is enabled, we support RTC interrupt functionality in software. + * * RTC has 3 kinds of interrupts: - * 1) Update Interrupt - generate an interrupt, every sec, when RTC clock - * is updated - * 2) Alarm Interrupt - generate an interrupt at a specific time of day - * 3) Periodic Interrupt - generate periodic interrupt, with frequencies - * 2Hz-8192Hz (2Hz-64Hz for non-root user) (all freqs in powers of 2) - * (1) and (2) above are implemented using polling at a frequency of - * 64 Hz. The exact frequency is a tradeoff between accuracy and interrupt - * overhead. (DEFAULT_RTC_INT_FREQ) - * For (3), we use interrupts at 64Hz or user specified periodic - * frequency, whichever is higher. + * + * 1) Update Interrupt - generate an interrupt, every second, when the + * RTC clock is updated + * 2) Alarm Interrupt - generate an interrupt at a specific time of day + * 3) Periodic Interrupt - generate periodic interrupt, with frequencies + * 2Hz-8192Hz (2Hz-64Hz for non-root user) (all frequencies in powers of 2) + * + * (1) and (2) above are implemented using polling at a frequency of 64 Hz: + * DEFAULT_RTC_INT_FREQ. + * + * The exact frequency is a tradeoff between accuracy and interrupt overhead. + * + * For (3), we use interrupts at 64 Hz, or the user specified periodic frequency, + * if it's higher. */ #include <linux/mc146818rtc.h> #include <linux/rtc.h> @@ -1125,7 +1032,7 @@ static unsigned long hpet_pie_limit; static rtc_irq_handler irq_handler; /* - * Check that the hpet counter c1 is ahead of the c2 + * Check that the HPET counter c1 is ahead of c2 */ static inline int hpet_cnt_ahead(u32 c1, u32 c2) { @@ -1163,8 +1070,8 @@ void hpet_unregister_irq_handler(rtc_irq_handler handler) EXPORT_SYMBOL_GPL(hpet_unregister_irq_handler); /* - * Timer 1 for RTC emulation. We use one shot mode, as periodic mode - * is not supported by all HPET implementations for timer 1. + * Channel 1 for RTC emulation. We use one shot mode, as periodic mode + * is not supported by all HPET implementations for channel 1. * * hpet_rtc_timer_init() is called when the rtc is initialized. */ @@ -1177,10 +1084,11 @@ int hpet_rtc_timer_init(void) return 0; if (!hpet_default_delta) { + struct clock_event_device *evt = &hpet_base.channels[0].evt; uint64_t clc; - clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC; - clc >>= hpet_clockevent.shift + DEFAULT_RTC_SHIFT; + clc = (uint64_t) evt->mult * NSEC_PER_SEC; + clc >>= evt->shift + DEFAULT_RTC_SHIFT; hpet_default_delta = clc; } @@ -1209,6 +1117,7 @@ EXPORT_SYMBOL_GPL(hpet_rtc_timer_init); static void hpet_disable_rtc_channel(void) { u32 cfg = hpet_readl(HPET_T1_CFG); + cfg &= ~HPET_TN_ENABLE; hpet_writel(cfg, HPET_T1_CFG); } @@ -1250,8 +1159,7 @@ int hpet_set_rtc_irq_bit(unsigned long bit_mask) } EXPORT_SYMBOL_GPL(hpet_set_rtc_irq_bit); -int hpet_set_alarm_time(unsigned char hrs, unsigned char min, - unsigned char sec) +int hpet_set_alarm_time(unsigned char hrs, unsigned char min, unsigned char sec) { if (!is_hpet_enabled()) return 0; @@ -1271,15 +1179,18 @@ int hpet_set_periodic_freq(unsigned long freq) if (!is_hpet_enabled()) return 0; - if (freq <= DEFAULT_RTC_INT_FREQ) + if (freq <= DEFAULT_RTC_INT_FREQ) { hpet_pie_limit = DEFAULT_RTC_INT_FREQ / freq; - else { - clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC; + } else { + struct clock_event_device *evt = &hpet_base.channels[0].evt; + + clc = (uint64_t) evt->mult * NSEC_PER_SEC; do_div(clc, freq); - clc >>= hpet_clockevent.shift; + clc >>= evt->shift; hpet_pie_delta = clc; hpet_pie_limit = 0; } + return 1; } EXPORT_SYMBOL_GPL(hpet_set_periodic_freq); @@ -1317,8 +1228,7 @@ static void hpet_rtc_timer_reinit(void) if (hpet_rtc_flags & RTC_PIE) hpet_pie_count += lost_ints; if (printk_ratelimit()) - printk(KERN_WARNING "hpet1: lost %d rtc interrupts\n", - lost_ints); + pr_warn("Lost %d RTC interrupts\n", lost_ints); } } @@ -1340,8 +1250,7 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id) hpet_prev_update_sec = curr_time.tm_sec; } - if (hpet_rtc_flags & RTC_PIE && - ++hpet_pie_count >= hpet_pie_limit) { + if (hpet_rtc_flags & RTC_PIE && ++hpet_pie_count >= hpet_pie_limit) { rtc_int_flag |= RTC_PF; hpet_pie_count = 0; } @@ -1350,7 +1259,7 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id) (curr_time.tm_sec == hpet_alarm_time.tm_sec) && (curr_time.tm_min == hpet_alarm_time.tm_min) && (curr_time.tm_hour == hpet_alarm_time.tm_hour)) - rtc_int_flag |= RTC_AF; + rtc_int_flag |= RTC_AF; if (rtc_int_flag) { rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8)); diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index 0d307a657abb..2b7999a1a50a 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c @@ -8,6 +8,7 @@ #include <linux/timex.h> #include <linux/i8253.h> +#include <asm/apic.h> #include <asm/hpet.h> #include <asm/time.h> #include <asm/smp.h> @@ -18,10 +19,32 @@ */ struct clock_event_device *global_clock_event; -void __init setup_pit_timer(void) +/* + * Modern chipsets can disable the PIT clock which makes it unusable. It + * would be possible to enable the clock but the registers are chipset + * specific and not discoverable. Avoid the whack a mole game. + * + * These platforms have discoverable TSC/CPU frequencies but this also + * requires to know the local APIC timer frequency as it normally is + * calibrated against the PIT interrupt. + */ +static bool __init use_pit(void) +{ + if (!IS_ENABLED(CONFIG_X86_TSC) || !boot_cpu_has(X86_FEATURE_TSC)) + return true; + + /* This also returns true when APIC is disabled */ + return apic_needs_pit(); +} + +bool __init pit_timer_init(void) { + if (!use_pit()) + return false; + clockevent_i8253_init(true); global_clock_event = &i8253_clockevent; + return true; } #ifndef CONFIG_X86_64 diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index d2482bbbe3d0..87ef69a72c52 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -319,7 +319,8 @@ void __init idt_setup_apic_and_irq_gates(void) #ifdef CONFIG_X86_LOCAL_APIC for_each_clear_bit_from(i, system_vectors, NR_VECTORS) { set_bit(i, system_vectors); - set_intr_gate(i, spurious_interrupt); + entry = spurious_entries_start + 8 * (i - FIRST_SYSTEM_VECTOR); + set_intr_gate(i, entry); } #endif } diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 9b68b5b00ac9..cc496eb7a8d2 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -247,7 +247,7 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs) if (!handle_irq(desc, regs)) { ack_APIC_irq(); - if (desc != VECTOR_RETRIGGERED) { + if (desc != VECTOR_RETRIGGERED && desc != VECTOR_SHUTDOWN) { pr_emerg_ratelimited("%s: %d.%d No irq handler for vector\n", __func__, smp_processor_id(), vector); diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c index 1b2ee55a2dfb..ba95bc70460d 100644 --- a/arch/x86/kernel/jailhouse.c +++ b/arch/x86/kernel/jailhouse.c @@ -45,7 +45,7 @@ static void jailhouse_get_wallclock(struct timespec64 *now) static void __init jailhouse_timer_init(void) { - lapic_timer_frequency = setup_data.apic_khz * (1000 / HZ); + lapic_timer_period = setup_data.apic_khz * (1000 / HZ); } static unsigned long jailhouse_get_tsc(void) diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index e631c358f7f4..044053235302 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c @@ -35,41 +35,43 @@ static void bug_at(unsigned char *ip, int line) BUG(); } -static void __ref __jump_label_transform(struct jump_entry *entry, - enum jump_label_type type, - int init) +static void __jump_label_set_jump_code(struct jump_entry *entry, + enum jump_label_type type, + union jump_code_union *code, + int init) { - union jump_code_union jmp; const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP }; const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5]; - const void *expect, *code; + const void *expect; int line; - jmp.jump = 0xe9; - jmp.offset = jump_entry_target(entry) - - (jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE); + code->jump = 0xe9; + code->offset = jump_entry_target(entry) - + (jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE); - if (type == JUMP_LABEL_JMP) { - if (init) { - expect = default_nop; line = __LINE__; - } else { - expect = ideal_nop; line = __LINE__; - } - - code = &jmp.code; + if (init) { + expect = default_nop; line = __LINE__; + } else if (type == JUMP_LABEL_JMP) { + expect = ideal_nop; line = __LINE__; } else { - if (init) { - expect = default_nop; line = __LINE__; - } else { - expect = &jmp.code; line = __LINE__; - } - - code = ideal_nop; + expect = code->code; line = __LINE__; } if (memcmp((void *)jump_entry_code(entry), expect, JUMP_LABEL_NOP_SIZE)) bug_at((void *)jump_entry_code(entry), line); + if (type == JUMP_LABEL_NOP) + memcpy(code, ideal_nop, JUMP_LABEL_NOP_SIZE); +} + +static void __ref __jump_label_transform(struct jump_entry *entry, + enum jump_label_type type, + int init) +{ + union jump_code_union code; + + __jump_label_set_jump_code(entry, type, &code, init); + /* * As long as only a single processor is running and the code is still * not marked as RO, text_poke_early() can be used; Checking that @@ -82,12 +84,12 @@ static void __ref __jump_label_transform(struct jump_entry *entry, * always nop being the 'currently valid' instruction */ if (init || system_state == SYSTEM_BOOTING) { - text_poke_early((void *)jump_entry_code(entry), code, + text_poke_early((void *)jump_entry_code(entry), &code, JUMP_LABEL_NOP_SIZE); return; } - text_poke_bp((void *)jump_entry_code(entry), code, JUMP_LABEL_NOP_SIZE, + text_poke_bp((void *)jump_entry_code(entry), &code, JUMP_LABEL_NOP_SIZE, (void *)jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE); } @@ -99,6 +101,75 @@ void arch_jump_label_transform(struct jump_entry *entry, mutex_unlock(&text_mutex); } +#define TP_VEC_MAX (PAGE_SIZE / sizeof(struct text_poke_loc)) +static struct text_poke_loc tp_vec[TP_VEC_MAX]; +static int tp_vec_nr; + +bool arch_jump_label_transform_queue(struct jump_entry *entry, + enum jump_label_type type) +{ + struct text_poke_loc *tp; + void *entry_code; + + if (system_state == SYSTEM_BOOTING) { + /* + * Fallback to the non-batching mode. + */ + arch_jump_label_transform(entry, type); + return true; + } + + /* + * No more space in the vector, tell upper layer to apply + * the queue before continuing. + */ + if (tp_vec_nr == TP_VEC_MAX) + return false; + + tp = &tp_vec[tp_vec_nr]; + + entry_code = (void *)jump_entry_code(entry); + + /* + * The INT3 handler will do a bsearch in the queue, so we need entries + * to be sorted. We can survive an unsorted list by rejecting the entry, + * forcing the generic jump_label code to apply the queue. Warning once, + * to raise the attention to the case of an unsorted entry that is + * better not happen, because, in the worst case we will perform in the + * same way as we do without batching - with some more overhead. + */ + if (tp_vec_nr > 0) { + int prev = tp_vec_nr - 1; + struct text_poke_loc *prev_tp = &tp_vec[prev]; + + if (WARN_ON_ONCE(prev_tp->addr > entry_code)) + return false; + } + + __jump_label_set_jump_code(entry, type, + (union jump_code_union *) &tp->opcode, 0); + + tp->addr = entry_code; + tp->detour = entry_code + JUMP_LABEL_NOP_SIZE; + tp->len = JUMP_LABEL_NOP_SIZE; + + tp_vec_nr++; + + return true; +} + +void arch_jump_label_transform_apply(void) +{ + if (!tp_vec_nr) + return; + + mutex_lock(&text_mutex); + text_poke_bp_batch(tp_vec, tp_vec_nr); + mutex_unlock(&text_mutex); + + tp_vec_nr = 0; +} + static enum { JL_STATE_START, JL_STATE_NO_UPDATE, diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c index 07c30ee17425..bb7e1132290b 100644 --- a/arch/x86/kernel/perf_regs.c +++ b/arch/x86/kernel/perf_regs.c @@ -74,6 +74,9 @@ u64 perf_reg_value(struct pt_regs *regs, int idx) return regs_get_register(regs, pt_regs_offset[idx]); } +#define PERF_REG_X86_RESERVED (((1ULL << PERF_REG_X86_XMM0) - 1) & \ + ~((1ULL << PERF_REG_X86_MAX) - 1)) + #ifdef CONFIG_X86_32 #define REG_NOSUPPORT ((1ULL << PERF_REG_X86_R8) | \ (1ULL << PERF_REG_X86_R9) | \ @@ -86,7 +89,7 @@ u64 perf_reg_value(struct pt_regs *regs, int idx) int perf_reg_validate(u64 mask) { - if (!mask || (mask & REG_NOSUPPORT)) + if (!mask || (mask & (REG_NOSUPPORT | PERF_REG_X86_RESERVED))) return -EINVAL; return 0; @@ -112,7 +115,7 @@ void perf_get_regs_user(struct perf_regs *regs_user, int perf_reg_validate(u64 mask) { - if (!mask || (mask & REG_NOSUPPORT)) + if (!mask || (mask & (REG_NOSUPPORT | PERF_REG_X86_RESERVED))) return -EINVAL; return 0; diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index a166c960bc9e..ee9099061d01 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -25,6 +25,7 @@ #include <linux/rcupdate.h> #include <linux/export.h> #include <linux/context_tracking.h> +#include <linux/nospec.h> #include <linux/uaccess.h> #include <asm/pgtable.h> @@ -397,22 +398,12 @@ static int putreg(struct task_struct *child, case offsetof(struct user_regs_struct,fs_base): if (value >= TASK_SIZE_MAX) return -EIO; - /* - * When changing the FS base, use do_arch_prctl_64() - * to set the index to zero and to set the base - * as requested. - */ - if (child->thread.fsbase != value) - return do_arch_prctl_64(child, ARCH_SET_FS, value); + x86_fsbase_write_task(child, value); return 0; case offsetof(struct user_regs_struct,gs_base): - /* - * Exactly the same here as the %fs handling above. - */ if (value >= TASK_SIZE_MAX) return -EIO; - if (child->thread.gsbase != value) - return do_arch_prctl_64(child, ARCH_SET_GS, value); + x86_gsbase_write_task(child, value); return 0; #endif } @@ -645,7 +636,8 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) unsigned long val = 0; if (n < HBP_NUM) { - struct perf_event *bp = thread->ptrace_bps[n]; + int index = array_index_nospec(n, HBP_NUM); + struct perf_event *bp = thread->ptrace_bps[index]; if (bp) val = bp->hw.info.address; @@ -747,9 +739,6 @@ static int ioperm_get(struct task_struct *target, void ptrace_disable(struct task_struct *child) { user_disable_single_step(child); -#ifdef TIF_SYSCALL_EMU - clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); -#endif } #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 0ff3e294d0e5..10125358b9c4 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -3,6 +3,7 @@ */ +#include <linux/clocksource.h> #include <linux/kernel.h> #include <linux/percpu.h> #include <linux/notifier.h> diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 4693e2f3a03e..96421f97e75c 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -144,7 +144,7 @@ void native_send_call_func_ipi(const struct cpumask *mask) } cpumask_copy(allbutself, cpu_online_mask); - cpumask_clear_cpu(smp_processor_id(), allbutself); + __cpumask_clear_cpu(smp_processor_id(), allbutself); if (cpumask_equal(mask, allbutself) && cpumask_equal(cpu_online_mask, cpu_callout_mask)) diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index 0e14f6c0d35e..07c0e960b3f3 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -82,8 +82,11 @@ static void __init setup_default_timer_irq(void) /* Default timer init function */ void __init hpet_time_init(void) { - if (!hpet_enable()) - setup_pit_timer(); + if (!hpet_enable()) { + if (!pit_timer_init()) + return; + } + setup_default_timer_irq(); } diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c index a5b802a12212..71d3fef1edc9 100644 --- a/arch/x86/kernel/tls.c +++ b/arch/x86/kernel/tls.c @@ -5,6 +5,7 @@ #include <linux/user.h> #include <linux/regset.h> #include <linux/syscalls.h> +#include <linux/nospec.h> #include <linux/uaccess.h> #include <asm/desc.h> @@ -220,6 +221,7 @@ int do_get_thread_area(struct task_struct *p, int idx, struct user_desc __user *u_info) { struct user_desc info; + int index; if (idx == -1 && get_user(idx, &u_info->entry_number)) return -EFAULT; @@ -227,8 +229,11 @@ int do_get_thread_area(struct task_struct *p, int idx, if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) return -EINVAL; - fill_user_desc(&info, idx, - &p->thread.tls_array[idx - GDT_ENTRY_TLS_MIN]); + index = idx - GDT_ENTRY_TLS_MIN; + index = array_index_nospec(index, + GDT_ENTRY_TLS_MAX - GDT_ENTRY_TLS_MIN + 1); + + fill_user_desc(&info, idx, &p->thread.tls_array[index]); if (copy_to_user(u_info, &info, sizeof(info))) return -EFAULT; diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 0b29e58f288e..59b57605e66c 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -632,31 +632,38 @@ unsigned long native_calibrate_tsc(void) crystal_khz = ecx_hz / 1000; - if (crystal_khz == 0) { - switch (boot_cpu_data.x86_model) { - case INTEL_FAM6_SKYLAKE_MOBILE: - case INTEL_FAM6_SKYLAKE_DESKTOP: - case INTEL_FAM6_KABYLAKE_MOBILE: - case INTEL_FAM6_KABYLAKE_DESKTOP: - crystal_khz = 24000; /* 24.0 MHz */ - break; - case INTEL_FAM6_ATOM_GOLDMONT_X: - crystal_khz = 25000; /* 25.0 MHz */ - break; - case INTEL_FAM6_ATOM_GOLDMONT: - crystal_khz = 19200; /* 19.2 MHz */ - break; - } - } + /* + * Denverton SoCs don't report crystal clock, and also don't support + * CPUID.0x16 for the calculation below, so hardcode the 25MHz crystal + * clock. + */ + if (crystal_khz == 0 && + boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT_X) + crystal_khz = 25000; - if (crystal_khz == 0) - return 0; /* - * TSC frequency determined by CPUID is a "hardware reported" + * TSC frequency reported directly by CPUID is a "hardware reported" * frequency and is the most accurate one so far we have. This * is considered a known frequency. */ - setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); + if (crystal_khz != 0) + setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); + + /* + * Some Intel SoCs like Skylake and Kabylake don't report the crystal + * clock, but we can easily calculate it to a high degree of accuracy + * by considering the crystal ratio and the CPU speed. + */ + if (crystal_khz == 0 && boot_cpu_data.cpuid_level >= 0x16) { + unsigned int eax_base_mhz, ebx, ecx, edx; + + cpuid(0x16, &eax_base_mhz, &ebx, &ecx, &edx); + crystal_khz = eax_base_mhz * 1000 * + eax_denominator / ebx_numerator; + } + + if (crystal_khz == 0) + return 0; /* * For Atom SoCs TSC is the only reliable clocksource. @@ -665,6 +672,16 @@ unsigned long native_calibrate_tsc(void) if (boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT) setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE); +#ifdef CONFIG_X86_LOCAL_APIC + /* + * The local APIC appears to be fed by the core crystal clock + * (which sounds entirely sensible). We can set the global + * lapic_timer_period here to avoid having to calibrate the APIC + * timer later. + */ + lapic_timer_period = crystal_khz * 1000 / HZ; +#endif + return crystal_khz * ebx_numerator / eax_denominator; } diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c index 3d0e9aeea7c8..067858fe4db8 100644 --- a/arch/x86/kernel/tsc_msr.c +++ b/arch/x86/kernel/tsc_msr.c @@ -71,7 +71,7 @@ static const struct x86_cpu_id tsc_msr_cpu_ids[] = { /* * MSR-based CPU/TSC frequency discovery for certain CPUs. * - * Set global "lapic_timer_frequency" to bus_clock_cycles/jiffy + * Set global "lapic_timer_period" to bus_clock_cycles/jiffy * Return processor base frequency in KHz, or 0 on failure. */ unsigned long cpu_khz_from_msr(void) @@ -104,7 +104,7 @@ unsigned long cpu_khz_from_msr(void) res = freq * ratio; #ifdef CONFIG_X86_LOCAL_APIC - lapic_timer_frequency = (freq * 1000) / HZ; + lapic_timer_period = (freq * 1000) / HZ; #endif /* diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 33b66b5c5aec..72b997eaa1fc 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -82,9 +82,9 @@ static struct orc_entry *orc_find(unsigned long ip); * But they are copies of the ftrace entries that are static and * defined in ftrace_*.S, which do have orc entries. * - * If the undwinder comes across a ftrace trampoline, then find the + * If the unwinder comes across a ftrace trampoline, then find the * ftrace function that was used to create it, and use that ftrace - * function's orc entrie, as the placement of the return code in + * function's orc entry, as the placement of the return code in * the stack will be identical. */ static struct orc_entry *orc_ftrace_find(unsigned long ip) @@ -128,6 +128,16 @@ static struct orc_entry null_orc_entry = { .type = ORC_TYPE_CALL }; +/* Fake frame pointer entry -- used as a fallback for generated code */ +static struct orc_entry orc_fp_entry = { + .type = ORC_TYPE_CALL, + .sp_reg = ORC_REG_BP, + .sp_offset = 16, + .bp_reg = ORC_REG_PREV_SP, + .bp_offset = -16, + .end = 0, +}; + static struct orc_entry *orc_find(unsigned long ip) { static struct orc_entry *orc; @@ -392,8 +402,16 @@ bool unwind_next_frame(struct unwind_state *state) * calls and calls to noreturn functions. */ orc = orc_find(state->signal ? state->ip : state->ip - 1); - if (!orc) - goto err; + if (!orc) { + /* + * As a fallback, try to assume this code uses a frame pointer. + * This is useful for generated code, like BPF, which ORC + * doesn't know about. This is just a guess, so the rest of + * the unwind is no longer considered reliable. + */ + orc = &orc_fp_entry; + state->error = true; + } /* End-of-stack check for kernel threads: */ if (orc->sp_reg == ORC_REG_UNDEFINED) { diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 9a327d5b6d1f..d78a61408243 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -47,8 +47,6 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_8000_0001_ECX] = {0x80000001, 0, CPUID_ECX}, [CPUID_7_0_EBX] = { 7, 0, CPUID_EBX}, [CPUID_D_1_EAX] = { 0xd, 1, CPUID_EAX}, - [CPUID_F_0_EDX] = { 0xf, 0, CPUID_EDX}, - [CPUID_F_1_EDX] = { 0xf, 1, CPUID_EDX}, [CPUID_8000_0008_EBX] = {0x80000008, 0, CPUID_EBX}, [CPUID_6_EAX] = { 6, 0, CPUID_EAX}, [CPUID_8000_000A_EDX] = {0x8000000a, 0, CPUID_EDX}, diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index a21c440ff356..4dabc318adb8 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2339,7 +2339,7 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) struct kvm_lapic *apic = vcpu->arch.apic; u32 ppr; - if (!apic_enabled(apic)) + if (!kvm_apic_hw_enabled(apic)) return -1; __apic_update_ppr(apic, &ppr); diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 132d149494d6..ab73a9a639ae 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -261,10 +261,10 @@ static int kvm_pmu_rdpmc_vmware(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) ctr_val = rdtsc(); break; case VMWARE_BACKDOOR_PMC_REAL_TIME: - ctr_val = ktime_get_boot_ns(); + ctr_val = ktime_get_boottime_ns(); break; case VMWARE_BACKDOOR_PMC_APPARENT_TIME: - ctr_val = ktime_get_boot_ns() + + ctr_val = ktime_get_boottime_ns() + vcpu->kvm->arch.kvmclock_offset; break; default: diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 5f9c1a200201..46af3a5e9209 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -5240,9 +5240,6 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, vmx = to_vmx(vcpu); vmcs12 = get_vmcs12(vcpu); - if (nested_vmx_allowed(vcpu) && vmx->nested.enlightened_vmcs_enabled) - kvm_state.flags |= KVM_STATE_NESTED_EVMCS; - if (nested_vmx_allowed(vcpu) && (vmx->nested.vmxon || vmx->nested.smm.vmxon)) { kvm_state.hdr.vmx.vmxon_pa = vmx->nested.vmxon_ptr; @@ -5251,6 +5248,9 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, if (vmx_has_valid_vmcs12(vcpu)) { kvm_state.size += sizeof(user_vmx_nested_state->vmcs12); + if (vmx->nested.hv_evmcs) + kvm_state.flags |= KVM_STATE_NESTED_EVMCS; + if (is_guest_mode(vcpu) && nested_cpu_has_shadow_vmcs(vmcs12) && vmcs12->vmcs_link_pointer != -1ull) @@ -5350,6 +5350,15 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, if (kvm_state->hdr.vmx.vmcs12_pa != -1ull) return -EINVAL; + /* + * KVM_STATE_NESTED_EVMCS used to signal that KVM should + * enable eVMCS capability on vCPU. However, since then + * code was changed such that flag signals vmcs12 should + * be copied into eVMCS in guest memory. + * + * To preserve backwards compatability, allow user + * to set this flag even when there is no VMXON region. + */ if (kvm_state->flags & ~KVM_STATE_NESTED_EVMCS) return -EINVAL; } else { @@ -5358,7 +5367,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, if (!page_address_valid(vcpu, kvm_state->hdr.vmx.vmxon_pa)) return -EINVAL; - } + } if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) && (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) @@ -5373,20 +5382,21 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, * nor can VMLAUNCH/VMRESUME be pending. Outside SMM, SMM flags * must be zero. */ - if (is_smm(vcpu) ? kvm_state->flags : kvm_state->hdr.vmx.smm.flags) + if (is_smm(vcpu) ? + (kvm_state->flags & + (KVM_STATE_NESTED_GUEST_MODE | KVM_STATE_NESTED_RUN_PENDING)) + : kvm_state->hdr.vmx.smm.flags) return -EINVAL; if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) && !(kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON)) return -EINVAL; - vmx_leave_nested(vcpu); - if (kvm_state->flags & KVM_STATE_NESTED_EVMCS) { - if (!nested_vmx_allowed(vcpu)) + if ((kvm_state->flags & KVM_STATE_NESTED_EVMCS) && + (!nested_vmx_allowed(vcpu) || !vmx->nested.enlightened_vmcs_enabled)) return -EINVAL; - nested_enable_evmcs(vcpu, NULL); - } + vmx_leave_nested(vcpu); if (kvm_state->hdr.vmx.vmxon_pa == -1ull) return 0; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9857992d4e58..63bb1ee8258e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -67,6 +67,7 @@ #include <asm/mshyperv.h> #include <asm/hypervisor.h> #include <asm/intel_pt.h> +#include <clocksource/hyperv_timer.h> #define CREATE_TRACE_POINTS #include "trace.h" @@ -1554,7 +1555,7 @@ static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale) vcpu->arch.tsc_always_catchup = 1; return 0; } else { - WARN(1, "user requested TSC rate below hardware speed\n"); + pr_warn_ratelimited("user requested TSC rate below hardware speed\n"); return -1; } } @@ -1564,8 +1565,8 @@ static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale) user_tsc_khz, tsc_khz); if (ratio == 0 || ratio >= kvm_max_tsc_scaling_ratio) { - WARN_ONCE(1, "Invalid TSC scaling ratio - virtual-tsc-khz=%u\n", - user_tsc_khz); + pr_warn_ratelimited("Invalid TSC scaling ratio - virtual-tsc-khz=%u\n", + user_tsc_khz); return -1; } @@ -1728,7 +1729,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); offset = kvm_compute_tsc_offset(vcpu, data); - ns = ktime_get_boot_ns(); + ns = ktime_get_boottime_ns(); elapsed = ns - kvm->arch.last_tsc_nsec; if (vcpu->arch.virtual_tsc_khz) { @@ -2070,7 +2071,7 @@ u64 get_kvmclock_ns(struct kvm *kvm) spin_lock(&ka->pvclock_gtod_sync_lock); if (!ka->use_master_clock) { spin_unlock(&ka->pvclock_gtod_sync_lock); - return ktime_get_boot_ns() + ka->kvmclock_offset; + return ktime_get_boottime_ns() + ka->kvmclock_offset; } hv_clock.tsc_timestamp = ka->master_cycle_now; @@ -2086,7 +2087,7 @@ u64 get_kvmclock_ns(struct kvm *kvm) &hv_clock.tsc_to_system_mul); ret = __pvclock_read_cycles(&hv_clock, rdtsc()); } else - ret = ktime_get_boot_ns() + ka->kvmclock_offset; + ret = ktime_get_boottime_ns() + ka->kvmclock_offset; put_cpu(); @@ -2185,7 +2186,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) } if (!use_master_clock) { host_tsc = rdtsc(); - kernel_ns = ktime_get_boot_ns(); + kernel_ns = ktime_get_boottime_ns(); } tsc_timestamp = kvm_read_l1_tsc(v, host_tsc); @@ -9015,7 +9016,7 @@ int kvm_arch_hardware_enable(void) * before any KVM threads can be running. Unfortunately, we can't * bring the TSCs fully up to date with real time, as we aren't yet far * enough into CPU bringup that we know how much real time has actually - * elapsed; our helper function, ktime_get_boot_ns() will be using boot + * elapsed; our helper function, ktime_get_boottime_ns() will be using boot * variables that haven't been updated yet. * * So we simply find the maximum observed TSC above, then record the @@ -9243,7 +9244,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) mutex_init(&kvm->arch.apic_map_lock); spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock); - kvm->arch.kvmclock_offset = -ktime_get_boot_ns(); + kvm->arch.kvmclock_offset = -ktime_get_boottime_ns(); pvclock_update_vm_gtod_copy(kvm); kvm->arch.guest_can_read_msr_platform_info = true; diff --git a/arch/x86/lib/cache-smp.c b/arch/x86/lib/cache-smp.c index 1811fa4a1b1a..7c48ff4ae8d1 100644 --- a/arch/x86/lib/cache-smp.c +++ b/arch/x86/lib/cache-smp.c @@ -15,6 +15,7 @@ EXPORT_SYMBOL(wbinvd_on_cpu); int wbinvd_on_all_cpus(void) { - return on_each_cpu(__wbinvd, NULL, 1); + on_each_cpu(__wbinvd, NULL, 1); + return 0; } EXPORT_SYMBOL(wbinvd_on_all_cpus); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 46df4c6aae46..58e4f1f00bbc 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -710,6 +710,10 @@ static void set_signal_archinfo(unsigned long address, * To avoid leaking information about the kernel page * table layout, pretend that user-mode accesses to * kernel addresses are always protection faults. + * + * NB: This means that failed vsyscalls with vsyscall=none + * will have the PROT bit. This doesn't leak any + * information and does not appear to cause any problems. */ if (address >= TASK_SIZE_MAX) error_code |= X86_PF_PROT; @@ -1369,16 +1373,18 @@ void do_user_addr_fault(struct pt_regs *regs, #ifdef CONFIG_X86_64 /* - * Instruction fetch faults in the vsyscall page might need - * emulation. The vsyscall page is at a high address - * (>PAGE_OFFSET), but is considered to be part of the user - * address space. + * Faults in the vsyscall page might need emulation. The + * vsyscall page is at a high address (>PAGE_OFFSET), but is + * considered to be part of the user address space. * * The vsyscall page does not have a "real" VMA, so do this * emulation before we go searching for VMAs. + * + * PKRU never rejects instruction fetches, so we don't need + * to consider the PF_PK bit. */ - if ((hw_error_code & X86_PF_INSTR) && is_vsyscall_vaddr(address)) { - if (emulate_vsyscall(regs, address)) + if (is_vsyscall_vaddr(address)) { + if (emulate_vsyscall(hw_error_code, regs, address)) return; } #endif diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 693aaf28d5fe..0f01c7b1d217 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -671,23 +671,25 @@ static unsigned long __meminit phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, unsigned long page_size_mask, bool init) { - unsigned long paddr_next, paddr_last = paddr_end; - unsigned long vaddr = (unsigned long)__va(paddr); - int i = p4d_index(vaddr); + unsigned long vaddr, vaddr_end, vaddr_next, paddr_next, paddr_last; + + paddr_last = paddr_end; + vaddr = (unsigned long)__va(paddr); + vaddr_end = (unsigned long)__va(paddr_end); if (!pgtable_l5_enabled()) return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, page_size_mask, init); - for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) { - p4d_t *p4d; + for (; vaddr < vaddr_end; vaddr = vaddr_next) { + p4d_t *p4d = p4d_page + p4d_index(vaddr); pud_t *pud; - vaddr = (unsigned long)__va(paddr); - p4d = p4d_page + p4d_index(vaddr); - paddr_next = (paddr & P4D_MASK) + P4D_SIZE; + vaddr_next = (vaddr & P4D_MASK) + P4D_SIZE; + paddr = __pa(vaddr); if (paddr >= paddr_end) { + paddr_next = __pa(vaddr_next); if (!after_bootmem && !e820__mapped_any(paddr & P4D_MASK, paddr_next, E820_TYPE_RAM) && @@ -699,13 +701,13 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, if (!p4d_none(*p4d)) { pud = pud_offset(p4d, 0); - paddr_last = phys_pud_init(pud, paddr, paddr_end, - page_size_mask, init); + paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end), + page_size_mask, init); continue; } pud = alloc_low_page(); - paddr_last = phys_pud_init(pud, paddr, paddr_end, + paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end), page_size_mask, init); spin_lock(&init_mm.page_table_lock); diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 632b83885867..3b9fd679cea9 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -728,7 +728,7 @@ void efi_recover_from_page_fault(unsigned long phys_addr) * Address range 0x0000 - 0x0fff is always mapped in the efi_pgd, so * page faulting on these addresses isn't expected. */ - if (phys_addr >= 0x0000 && phys_addr <= 0x0fff) + if (phys_addr <= 0x0fff) return; /* diff --git a/arch/x86/ras/Kconfig b/arch/x86/ras/Kconfig index a9c3db125222..9ad6842de4b4 100644 --- a/arch/x86/ras/Kconfig +++ b/arch/x86/ras/Kconfig @@ -11,3 +11,13 @@ config RAS_CEC Bear in mind that this is absolutely useless if your platform doesn't have ECC DIMMs and doesn't have DRAM ECC checking enabled in the BIOS. + +config RAS_CEC_DEBUG + bool "CEC debugging machinery" + default n + depends on RAS_CEC + help + Add extra files to (debugfs)/ras/cec to test the correctable error + collector feature. "pfn" is a writable file that allows user to + simulate an error in a particular page frame. "array" is a read-only + file that dumps out the current state of all pages logged so far. |