diff options
Diffstat (limited to 'arch')
437 files changed, 12631 insertions, 7836 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 057370a0ac4e..400b9e1b2f27 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -91,7 +91,7 @@ config STATIC_KEYS_SELFTEST config OPTPROBES def_bool y depends on KPROBES && HAVE_OPTPROBES - depends on !PREEMPT + select TASKS_RCU if PREEMPT config KPROBES_ON_FTRACE def_bool y diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h index 85867d3cea64..767bfdd42992 100644 --- a/arch/alpha/include/asm/atomic.h +++ b/arch/alpha/include/asm/atomic.h @@ -14,6 +14,15 @@ * than regular operations. */ +/* + * To ensure dependency ordering is preserved for the _relaxed and + * _release atomics, an smp_read_barrier_depends() is unconditionally + * inserted into the _relaxed variants, which are used to build the + * barriered versions. To avoid redundant back-to-back fences, we can + * define the _acquire and _fence versions explicitly. + */ +#define __atomic_op_acquire(op, args...) op##_relaxed(args) +#define __atomic_op_fence __atomic_op_release #define ATOMIC_INIT(i) { (i) } #define ATOMIC64_INIT(i) { (i) } @@ -61,6 +70,7 @@ static inline int atomic_##op##_return_relaxed(int i, atomic_t *v) \ ".previous" \ :"=&r" (temp), "=m" (v->counter), "=&r" (result) \ :"Ir" (i), "m" (v->counter) : "memory"); \ + smp_read_barrier_depends(); \ return result; \ } @@ -78,6 +88,7 @@ static inline int atomic_fetch_##op##_relaxed(int i, atomic_t *v) \ ".previous" \ :"=&r" (temp), "=m" (v->counter), "=&r" (result) \ :"Ir" (i), "m" (v->counter) : "memory"); \ + smp_read_barrier_depends(); \ return result; \ } @@ -112,6 +123,7 @@ static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \ ".previous" \ :"=&r" (temp), "=m" (v->counter), "=&r" (result) \ :"Ir" (i), "m" (v->counter) : "memory"); \ + smp_read_barrier_depends(); \ return result; \ } @@ -129,6 +141,7 @@ static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v) \ ".previous" \ :"=&r" (temp), "=m" (v->counter), "=&r" (result) \ :"Ir" (i), "m" (v->counter) : "memory"); \ + smp_read_barrier_depends(); \ return result; \ } diff --git a/arch/alpha/include/asm/rwsem.h b/arch/alpha/include/asm/rwsem.h index 3925f06afd6b..cf8fc8f9a2ed 100644 --- a/arch/alpha/include/asm/rwsem.h +++ b/arch/alpha/include/asm/rwsem.h @@ -22,7 +22,7 @@ #define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) -static inline void __down_read(struct rw_semaphore *sem) +static inline int ___down_read(struct rw_semaphore *sem) { long oldcount; #ifndef CONFIG_SMP @@ -42,10 +42,24 @@ static inline void __down_read(struct rw_semaphore *sem) :"=&r" (oldcount), "=m" (sem->count), "=&r" (temp) :"Ir" (RWSEM_ACTIVE_READ_BIAS), "m" (sem->count) : "memory"); #endif - if (unlikely(oldcount < 0)) + return (oldcount < 0); +} + +static inline void __down_read(struct rw_semaphore *sem) +{ + if (unlikely(___down_read(sem))) rwsem_down_read_failed(sem); } +static inline int __down_read_killable(struct rw_semaphore *sem) +{ + if (unlikely(___down_read(sem))) + if (IS_ERR(rwsem_down_read_failed_killable(sem))) + return -EINTR; + + return 0; +} + /* * trylock for reading -- returns 1 if successful, 0 if contention */ @@ -95,9 +109,10 @@ static inline void __down_write(struct rw_semaphore *sem) static inline int __down_write_killable(struct rw_semaphore *sem) { - if (unlikely(___down_write(sem))) + if (unlikely(___down_write(sem))) { if (IS_ERR(rwsem_down_write_failed_killable(sem))) return -EINTR; + } return 0; } diff --git a/arch/alpha/include/asm/spinlock.h b/arch/alpha/include/asm/spinlock.h index aa4304afbea6..1221cbb86a6f 100644 --- a/arch/alpha/include/asm/spinlock.h +++ b/arch/alpha/include/asm/spinlock.h @@ -14,7 +14,6 @@ * We make no fairness assumptions. They have a cost. */ -#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) #define arch_spin_is_locked(x) ((x)->lock != 0) static inline int arch_spin_value_unlocked(arch_spinlock_t lock) @@ -55,16 +54,6 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) /***********************************************************/ -static inline int arch_read_can_lock(arch_rwlock_t *lock) -{ - return (lock->lock & 1) == 0; -} - -static inline int arch_write_can_lock(arch_rwlock_t *lock) -{ - return lock->lock == 0; -} - static inline void arch_read_lock(arch_rwlock_t *lock) { long regx; @@ -171,7 +160,4 @@ static inline void arch_write_unlock(arch_rwlock_t * lock) lock->lock = 0; } -#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) -#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) - #endif /* _ALPHA_SPINLOCK_H */ diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h index 47efc8451b70..2ba04a7db621 100644 --- a/arch/arc/include/asm/spinlock.h +++ b/arch/arc/include/asm/spinlock.h @@ -14,7 +14,6 @@ #include <asm/barrier.h> #define arch_spin_is_locked(x) ((x)->slock != __ARCH_SPIN_LOCK_UNLOCKED__) -#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) #ifdef CONFIG_ARC_HAS_LLSC @@ -410,14 +409,4 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) #endif -#define arch_read_can_lock(x) ((x)->counter > 0) -#define arch_write_can_lock(x) ((x)->counter == __ARCH_RW_LOCK_UNLOCKED__) - -#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) -#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) - -#define arch_spin_relax(lock) cpu_relax() -#define arch_read_relax(lock) cpu_relax() -#define arch_write_relax(lock) cpu_relax() - #endif /* __ASM_SPINLOCK_H */ diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index 6df9d94a9537..efe8b4200a67 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -250,7 +250,7 @@ static void ipi_send_msg_one(int cpu, enum ipi_msg_type msg) * and read back old value */ do { - new = old = ACCESS_ONCE(*ipi_data_ptr); + new = old = READ_ONCE(*ipi_data_ptr); new |= 1U << msg; } while (cmpxchg(ipi_data_ptr, old, new) != old); diff --git a/arch/arm/common/locomo.c b/arch/arm/common/locomo.c index 6c7b06854fce..51936bde1eb2 100644 --- a/arch/arm/common/locomo.c +++ b/arch/arm/common/locomo.c @@ -826,28 +826,6 @@ static int locomo_match(struct device *_dev, struct device_driver *_drv) return dev->devid == drv->devid; } -static int locomo_bus_suspend(struct device *dev, pm_message_t state) -{ - struct locomo_dev *ldev = LOCOMO_DEV(dev); - struct locomo_driver *drv = LOCOMO_DRV(dev->driver); - int ret = 0; - - if (drv && drv->suspend) - ret = drv->suspend(ldev, state); - return ret; -} - -static int locomo_bus_resume(struct device *dev) -{ - struct locomo_dev *ldev = LOCOMO_DEV(dev); - struct locomo_driver *drv = LOCOMO_DRV(dev->driver); - int ret = 0; - - if (drv && drv->resume) - ret = drv->resume(ldev); - return ret; -} - static int locomo_bus_probe(struct device *dev) { struct locomo_dev *ldev = LOCOMO_DEV(dev); @@ -875,8 +853,6 @@ struct bus_type locomo_bus_type = { .match = locomo_match, .probe = locomo_bus_probe, .remove = locomo_bus_remove, - .suspend = locomo_bus_suspend, - .resume = locomo_bus_resume, }; int locomo_driver_register(struct locomo_driver *driver) diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h index eee269321923..1070044f5c3f 100644 --- a/arch/arm/include/asm/arch_gicv3.h +++ b/arch/arm/include/asm/arch_gicv3.h @@ -196,6 +196,11 @@ static inline void gic_write_ctlr(u32 val) isb(); } +static inline u32 gic_read_ctlr(void) +{ + return read_sysreg(ICC_CTLR); +} + static inline void gic_write_grpen1(u32 val) { write_sysreg(val, ICC_IGRPEN1); diff --git a/arch/arm/include/asm/hardware/locomo.h b/arch/arm/include/asm/hardware/locomo.h index 74e51d6bd93f..f8712e3c29cf 100644 --- a/arch/arm/include/asm/hardware/locomo.h +++ b/arch/arm/include/asm/hardware/locomo.h @@ -189,8 +189,6 @@ struct locomo_driver { unsigned int devid; int (*probe)(struct locomo_dev *); int (*remove)(struct locomo_dev *); - int (*suspend)(struct locomo_dev *, pm_message_t); - int (*resume)(struct locomo_dev *); }; #define LOCOMO_DRV(_d) container_of((_d), struct locomo_driver, drv) diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h index e9c9a117bd25..c7cdbb43ae7c 100644 --- a/arch/arm/include/asm/ptrace.h +++ b/arch/arm/include/asm/ptrace.h @@ -126,8 +126,7 @@ extern unsigned long profile_pc(struct pt_regs *regs); /* * kprobe-based event tracer support */ -#include <linux/stddef.h> -#include <linux/types.h> +#include <linux/compiler.h> #define MAX_REG_OFFSET (offsetof(struct pt_regs, ARM_ORIG_r0)) extern int regs_query_register_offset(const char *name); diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index 25cb465c8538..099c78fcf62d 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -53,8 +53,6 @@ static inline void dsb_sev(void) * memory. */ -#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) - static inline void arch_spin_lock(arch_spinlock_t *lock) { unsigned long tmp; @@ -74,7 +72,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) while (lockval.tickets.next != lockval.tickets.owner) { wfe(); - lockval.tickets.owner = ACCESS_ONCE(lock->tickets.owner); + lockval.tickets.owner = READ_ONCE(lock->tickets.owner); } smp_mb(); @@ -194,9 +192,6 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) dsb_sev(); } -/* write_can_lock - would write_trylock() succeed? */ -#define arch_write_can_lock(x) (ACCESS_ONCE((x)->lock) == 0) - /* * Read locks are a bit more hairy: * - Exclusively load the lock value. @@ -274,14 +269,4 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) } } -/* read_can_lock - would read_trylock() succeed? */ -#define arch_read_can_lock(x) (ACCESS_ONCE((x)->lock) < 0x80000000) - -#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) -#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) - -#define arch_spin_relax(lock) cpu_relax() -#define arch_read_relax(lock) cpu_relax() -#define arch_write_relax(lock) cpu_relax() - #endif /* __ASM_SPINLOCK_H */ diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h index f59ab9bcbaf9..5d88d2f22b2c 100644 --- a/arch/arm/include/asm/topology.h +++ b/arch/arm/include/asm/topology.h @@ -25,6 +25,14 @@ void init_cpu_topology(void); void store_cpu_topology(unsigned int cpuid); const struct cpumask *cpu_coregroup_mask(int cpu); +#include <linux/arch_topology.h> + +/* Replace task scheduler's default frequency-invariant accounting */ +#define arch_scale_freq_capacity topology_get_freq_scale + +/* Replace task scheduler's default cpu-invariant accounting */ +#define arch_scale_cpu_capacity topology_get_cpu_scale + #else static inline void init_cpu_topology(void) { } diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 948c648fea00..0fcd82f01388 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -154,30 +154,26 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom, set_fs(fs); } -static void dump_instr(const char *lvl, struct pt_regs *regs) +static void __dump_instr(const char *lvl, struct pt_regs *regs) { unsigned long addr = instruction_pointer(regs); const int thumb = thumb_mode(regs); const int width = thumb ? 4 : 8; - mm_segment_t fs; char str[sizeof("00000000 ") * 5 + 2 + 1], *p = str; int i; /* - * We need to switch to kernel mode so that we can use __get_user - * to safely read from kernel space. Note that we now dump the - * code first, just in case the backtrace kills us. + * Note that we now dump the code first, just in case the backtrace + * kills us. */ - fs = get_fs(); - set_fs(KERNEL_DS); for (i = -4; i < 1 + !!thumb; i++) { unsigned int val, bad; if (thumb) - bad = __get_user(val, &((u16 *)addr)[i]); + bad = get_user(val, &((u16 *)addr)[i]); else - bad = __get_user(val, &((u32 *)addr)[i]); + bad = get_user(val, &((u32 *)addr)[i]); if (!bad) p += sprintf(p, i == 0 ? "(%0*x) " : "%0*x ", @@ -188,8 +184,20 @@ static void dump_instr(const char *lvl, struct pt_regs *regs) } } printk("%sCode: %s\n", lvl, str); +} - set_fs(fs); +static void dump_instr(const char *lvl, struct pt_regs *regs) +{ + mm_segment_t fs; + + if (!user_mode(regs)) { + fs = get_fs(); + set_fs(KERNEL_DS); + __dump_instr(lvl, regs); + set_fs(fs); + } else { + __dump_instr(lvl, regs); + } } #ifdef CONFIG_ARM_UNWIND diff --git a/arch/arm/mach-footbridge/dc21285.c b/arch/arm/mach-footbridge/dc21285.c index 96a3d73ef4bf..e7b350f18f5f 100644 --- a/arch/arm/mach-footbridge/dc21285.c +++ b/arch/arm/mach-footbridge/dc21285.c @@ -136,19 +136,14 @@ struct pci_ops dc21285_ops = { static struct timer_list serr_timer; static struct timer_list perr_timer; -static void dc21285_enable_error(unsigned long __data) +static void dc21285_enable_error(struct timer_list *timer) { - switch (__data) { - case IRQ_PCI_SERR: - del_timer(&serr_timer); - break; - - case IRQ_PCI_PERR: - del_timer(&perr_timer); - break; - } + del_timer(timer); - enable_irq(__data); + if (timer == &serr_timer) + enable_irq(IRQ_PCI_SERR); + else if (timer == &perr_timer) + enable_irq(IRQ_PCI_PERR); } /* @@ -323,13 +318,8 @@ void __init dc21285_preinit(void) *CSR_PCICMD = (*CSR_PCICMD & 0xffff) | PCICMD_ERROR_BITS; } - init_timer(&serr_timer); - init_timer(&perr_timer); - - serr_timer.data = IRQ_PCI_SERR; - serr_timer.function = dc21285_enable_error; - perr_timer.data = IRQ_PCI_PERR; - perr_timer.function = dc21285_enable_error; + timer_setup(&serr_timer, dc21285_enable_error, 0); + timer_setup(&perr_timer, dc21285_enable_error, 0); /* * We don't care if these fail. diff --git a/arch/arm/mach-imx/mach-imx6q.c b/arch/arm/mach-imx/mach-imx6q.c index 45801b27ee5c..b5f89fdbbb4b 100644 --- a/arch/arm/mach-imx/mach-imx6q.c +++ b/arch/arm/mach-imx/mach-imx6q.c @@ -286,88 +286,6 @@ static void __init imx6q_init_machine(void) imx6q_axi_init(); } -#define OCOTP_CFG3 0x440 -#define OCOTP_CFG3_SPEED_SHIFT 16 -#define OCOTP_CFG3_SPEED_1P2GHZ 0x3 -#define OCOTP_CFG3_SPEED_996MHZ 0x2 -#define OCOTP_CFG3_SPEED_852MHZ 0x1 - -static void __init imx6q_opp_check_speed_grading(struct device *cpu_dev) -{ - struct device_node *np; - void __iomem *base; - u32 val; - - np = of_find_compatible_node(NULL, NULL, "fsl,imx6q-ocotp"); - if (!np) { - pr_warn("failed to find ocotp node\n"); - return; - } - - base = of_iomap(np, 0); - if (!base) { - pr_warn("failed to map ocotp\n"); - goto put_node; - } - - /* - * SPEED_GRADING[1:0] defines the max speed of ARM: - * 2b'11: 1200000000Hz; - * 2b'10: 996000000Hz; - * 2b'01: 852000000Hz; -- i.MX6Q Only, exclusive with 996MHz. - * 2b'00: 792000000Hz; - * We need to set the max speed of ARM according to fuse map. - */ - val = readl_relaxed(base + OCOTP_CFG3); - val >>= OCOTP_CFG3_SPEED_SHIFT; - val &= 0x3; - - if ((val != OCOTP_CFG3_SPEED_1P2GHZ) && cpu_is_imx6q()) - if (dev_pm_opp_disable(cpu_dev, 1200000000)) - pr_warn("failed to disable 1.2 GHz OPP\n"); - if (val < OCOTP_CFG3_SPEED_996MHZ) - if (dev_pm_opp_disable(cpu_dev, 996000000)) - pr_warn("failed to disable 996 MHz OPP\n"); - if (cpu_is_imx6q()) { - if (val != OCOTP_CFG3_SPEED_852MHZ) - if (dev_pm_opp_disable(cpu_dev, 852000000)) - pr_warn("failed to disable 852 MHz OPP\n"); - } - iounmap(base); -put_node: - of_node_put(np); -} - -static void __init imx6q_opp_init(void) -{ - struct device_node *np; - struct device *cpu_dev = get_cpu_device(0); - - if (!cpu_dev) { - pr_warn("failed to get cpu0 device\n"); - return; - } - np = of_node_get(cpu_dev->of_node); - if (!np) { - pr_warn("failed to find cpu0 node\n"); - return; - } - - if (dev_pm_opp_of_add_table(cpu_dev)) { - pr_warn("failed to init OPP table\n"); - goto put_node; - } - - imx6q_opp_check_speed_grading(cpu_dev); - -put_node: - of_node_put(np); -} - -static struct platform_device imx6q_cpufreq_pdev = { - .name = "imx6q-cpufreq", -}; - static void __init imx6q_init_late(void) { /* @@ -377,10 +295,8 @@ static void __init imx6q_init_late(void) if (imx_get_soc_revision() > IMX_CHIP_REVISION_1_1) imx6q_cpuidle_init(); - if (IS_ENABLED(CONFIG_ARM_IMX6Q_CPUFREQ)) { - imx6q_opp_init(); - platform_device_register(&imx6q_cpufreq_pdev); - } + if (IS_ENABLED(CONFIG_ARM_IMX6Q_CPUFREQ)) + platform_device_register_simple("imx6q-cpufreq", -1, NULL, 0); } static void __init imx6q_map_io(void) diff --git a/arch/arm/mach-ixp4xx/dsmg600-setup.c b/arch/arm/mach-ixp4xx/dsmg600-setup.c index db488ecc98b5..19839bba7f17 100644 --- a/arch/arm/mach-ixp4xx/dsmg600-setup.c +++ b/arch/arm/mach-ixp4xx/dsmg600-setup.c @@ -175,7 +175,7 @@ static int power_button_countdown; #define PBUTTON_HOLDDOWN_COUNT 4 /* 2 secs */ static void dsmg600_power_handler(unsigned long data); -static DEFINE_TIMER(dsmg600_power_timer, dsmg600_power_handler, 0, 0); +static DEFINE_TIMER(dsmg600_power_timer, dsmg600_power_handler); static void dsmg600_power_handler(unsigned long data) { diff --git a/arch/arm/mach-ixp4xx/nas100d-setup.c b/arch/arm/mach-ixp4xx/nas100d-setup.c index 1b8170d65c74..b6d731241317 100644 --- a/arch/arm/mach-ixp4xx/nas100d-setup.c +++ b/arch/arm/mach-ixp4xx/nas100d-setup.c @@ -198,7 +198,7 @@ static int power_button_countdown; #define PBUTTON_HOLDDOWN_COUNT 4 /* 2 secs */ static void nas100d_power_handler(unsigned long data); -static DEFINE_TIMER(nas100d_power_timer, nas100d_power_handler, 0, 0); +static DEFINE_TIMER(nas100d_power_timer, nas100d_power_handler); static void nas100d_power_handler(unsigned long data) { diff --git a/arch/arm/mach-pxa/lubbock.c b/arch/arm/mach-pxa/lubbock.c index d6159f8ef0c2..df45682e99a5 100644 --- a/arch/arm/mach-pxa/lubbock.c +++ b/arch/arm/mach-pxa/lubbock.c @@ -381,14 +381,11 @@ static struct pxafb_mach_info sharp_lm8v31 = { #define MMC_POLL_RATE msecs_to_jiffies(1000) -static void lubbock_mmc_poll(unsigned long); static irq_handler_t mmc_detect_int; +static void *mmc_detect_int_data; +static struct timer_list mmc_timer; -static struct timer_list mmc_timer = { - .function = lubbock_mmc_poll, -}; - -static void lubbock_mmc_poll(unsigned long data) +static void lubbock_mmc_poll(struct timer_list *unused) { unsigned long flags; @@ -401,7 +398,7 @@ static void lubbock_mmc_poll(unsigned long data) if (LUB_IRQ_SET_CLR & (1 << 0)) mod_timer(&mmc_timer, jiffies + MMC_POLL_RATE); else { - (void) mmc_detect_int(LUBBOCK_SD_IRQ, (void *)data); + (void) mmc_detect_int(LUBBOCK_SD_IRQ, mmc_detect_int_data); enable_irq(LUBBOCK_SD_IRQ); } } @@ -421,8 +418,8 @@ static int lubbock_mci_init(struct device *dev, { /* detect card insert/eject */ mmc_detect_int = detect_int; - init_timer(&mmc_timer); - mmc_timer.data = (unsigned long) data; + mmc_detect_int_data = data; + timer_setup(&mmc_timer, lubbock_mmc_poll, 0); return request_irq(LUBBOCK_SD_IRQ, lubbock_detect_int, 0, "lubbock-sd-detect", data); } diff --git a/arch/arm/mach-pxa/sharpsl_pm.c b/arch/arm/mach-pxa/sharpsl_pm.c index 249b7bd5fbc4..398ba9ba2632 100644 --- a/arch/arm/mach-pxa/sharpsl_pm.c +++ b/arch/arm/mach-pxa/sharpsl_pm.c @@ -341,7 +341,7 @@ static void sharpsl_charge_toggle(struct work_struct *private_) sharpsl_pm.charge_start_time = jiffies; } -static void sharpsl_ac_timer(unsigned long data) +static void sharpsl_ac_timer(struct timer_list *unused) { int acin = sharpsl_pm.machinfo->read_devdata(SHARPSL_STATUS_ACIN); @@ -366,7 +366,7 @@ static irqreturn_t sharpsl_ac_isr(int irq, void *dev_id) return IRQ_HANDLED; } -static void sharpsl_chrg_full_timer(unsigned long data) +static void sharpsl_chrg_full_timer(struct timer_list *unused) { dev_dbg(sharpsl_pm.dev, "Charge Full at time: %lx\n", jiffies); @@ -841,9 +841,9 @@ static int sharpsl_pm_probe(struct platform_device *pdev) sharpsl_pm.charge_mode = CHRG_OFF; sharpsl_pm.flags = 0; - setup_timer(&sharpsl_pm.ac_timer, sharpsl_ac_timer, 0UL); + timer_setup(&sharpsl_pm.ac_timer, sharpsl_ac_timer, 0); - setup_timer(&sharpsl_pm.chrg_full_timer, sharpsl_chrg_full_timer, 0UL); + timer_setup(&sharpsl_pm.chrg_full_timer, sharpsl_chrg_full_timer, 0); led_trigger_register_simple("sharpsl-charge", &sharpsl_charge_led_trigger); diff --git a/arch/arm/mach-pxa/stargate2.c b/arch/arm/mach-pxa/stargate2.c index 2d45d18b1a5e..6b7df6fd2448 100644 --- a/arch/arm/mach-pxa/stargate2.c +++ b/arch/arm/mach-pxa/stargate2.c @@ -29,6 +29,7 @@ #include <linux/platform_data/pcf857x.h> #include <linux/platform_data/at24.h> #include <linux/smc91x.h> +#include <linux/gpio/machine.h> #include <linux/gpio.h> #include <linux/leds.h> @@ -52,7 +53,6 @@ #include <linux/spi/spi.h> #include <linux/spi/pxa2xx_spi.h> #include <linux/mfd/da903x.h> -#include <linux/platform_data/sht15.h> #include "devices.h" #include "generic.h" @@ -137,17 +137,18 @@ static unsigned long sg2_im2_unified_pin_config[] __initdata = { GPIO10_GPIO, /* large basic connector pin 23 */ }; -static struct sht15_platform_data platform_data_sht15 = { - .gpio_data = 100, - .gpio_sck = 98, +static struct gpiod_lookup_table sht15_gpiod_table = { + .dev_id = "sht15", + .table = { + /* FIXME: should this have |GPIO_OPEN_DRAIN set? */ + GPIO_LOOKUP("gpio-pxa", 100, "data", GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("gpio-pxa", 98, "clk", GPIO_ACTIVE_HIGH), + }, }; static struct platform_device sht15 = { .name = "sht15", .id = -1, - .dev = { - .platform_data = &platform_data_sht15, - }, }; static struct regulator_consumer_supply stargate2_sensor_3_con[] = { @@ -608,6 +609,7 @@ static void __init imote2_init(void) imote2_stargate2_init(); + gpiod_add_lookup_table(&sht15_gpiod_table); platform_add_devices(imote2_devices, ARRAY_SIZE(imote2_devices)); i2c_register_board_info(0, imote2_i2c_board_info, @@ -988,6 +990,7 @@ static void __init stargate2_init(void) imote2_stargate2_init(); + gpiod_add_lookup_table(&sht15_gpiod_table); platform_add_devices(ARRAY_AND_SIZE(stargate2_devices)); i2c_register_board_info(0, ARRAY_AND_SIZE(stargate2_i2c_board_info)); diff --git a/arch/arm/mach-shmobile/pm-rmobile.c b/arch/arm/mach-shmobile/pm-rmobile.c index 3a4ed4c33a68..e348bcfe389d 100644 --- a/arch/arm/mach-shmobile/pm-rmobile.c +++ b/arch/arm/mach-shmobile/pm-rmobile.c @@ -120,18 +120,12 @@ static int rmobile_pd_power_up(struct generic_pm_domain *genpd) return __rmobile_pd_power_up(to_rmobile_pd(genpd), true); } -static bool rmobile_pd_active_wakeup(struct device *dev) -{ - return true; -} - static void rmobile_init_pm_domain(struct rmobile_pm_domain *rmobile_pd) { struct generic_pm_domain *genpd = &rmobile_pd->genpd; struct dev_power_governor *gov = rmobile_pd->gov; - genpd->flags |= GENPD_FLAG_PM_CLK; - genpd->dev_ops.active_wakeup = rmobile_pd_active_wakeup; + genpd->flags |= GENPD_FLAG_PM_CLK | GENPD_FLAG_ACTIVE_WAKEUP; genpd->power_off = rmobile_pd_power_down; genpd->power_on = rmobile_pd_power_up; genpd->attach_dev = cpg_mstp_attach_dev; diff --git a/arch/arm/mach-tegra/cpuidle-tegra20.c b/arch/arm/mach-tegra/cpuidle-tegra20.c index 76e4c83cd5c8..3f24addd7972 100644 --- a/arch/arm/mach-tegra/cpuidle-tegra20.c +++ b/arch/arm/mach-tegra/cpuidle-tegra20.c @@ -179,7 +179,7 @@ static int tegra20_idle_lp2_coupled(struct cpuidle_device *dev, bool entered_lp2 = false; if (tegra_pending_sgi()) - ACCESS_ONCE(abort_flag) = true; + WRITE_ONCE(abort_flag, true); cpuidle_coupled_parallel_barrier(dev, &abort_barrier); diff --git a/arch/arm/probes/kprobes/test-core.c b/arch/arm/probes/kprobes/test-core.c index 1c98a87786ca..9ed0129bed3c 100644 --- a/arch/arm/probes/kprobes/test-core.c +++ b/arch/arm/probes/kprobes/test-core.c @@ -227,7 +227,6 @@ static bool test_regs_ok; static int test_func_instance; static int pre_handler_called; static int post_handler_called; -static int jprobe_func_called; static int kretprobe_handler_called; static int tests_failed; @@ -370,50 +369,6 @@ static int test_kprobe(long (*func)(long, long)) return 0; } -static void __kprobes jprobe_func(long r0, long r1) -{ - jprobe_func_called = test_func_instance; - if (r0 == FUNC_ARG1 && r1 == FUNC_ARG2) - test_regs_ok = true; - jprobe_return(); -} - -static struct jprobe the_jprobe = { - .entry = jprobe_func, -}; - -static int test_jprobe(long (*func)(long, long)) -{ - int ret; - - the_jprobe.kp.addr = (kprobe_opcode_t *)func; - ret = register_jprobe(&the_jprobe); - if (ret < 0) { - pr_err("FAIL: register_jprobe failed with %d\n", ret); - return ret; - } - - ret = call_test_func(func, true); - - unregister_jprobe(&the_jprobe); - the_jprobe.kp.flags = 0; /* Clear disable flag to allow reuse */ - - if (!ret) - return -EINVAL; - if (jprobe_func_called != test_func_instance) { - pr_err("FAIL: jprobe handler function not called\n"); - return -EINVAL; - } - if (!call_test_func(func, false)) - return -EINVAL; - if (jprobe_func_called == test_func_instance) { - pr_err("FAIL: probe called after unregistering\n"); - return -EINVAL; - } - - return 0; -} - static int __kprobes kretprobe_handler(struct kretprobe_instance *ri, struct pt_regs *regs) { @@ -451,7 +406,7 @@ static int test_kretprobe(long (*func)(long, long)) } if (!call_test_func(func, false)) return -EINVAL; - if (jprobe_func_called == test_func_instance) { + if (kretprobe_handler_called == test_func_instance) { pr_err("FAIL: kretprobe called after unregistering\n"); return -EINVAL; } @@ -468,18 +423,6 @@ static int run_api_tests(long (*func)(long, long)) if (ret < 0) return ret; - pr_info(" jprobe\n"); - ret = test_jprobe(func); -#if defined(CONFIG_THUMB2_KERNEL) && !defined(MODULE) - if (ret == -EINVAL) { - pr_err("FAIL: Known longtime bug with jprobe on Thumb kernels\n"); - tests_failed = ret; - ret = 0; - } -#endif - if (ret < 0) - return ret; - pr_info(" kretprobe\n"); ret = test_kretprobe(func); if (ret < 0) diff --git a/arch/arm/vdso/vgettimeofday.c b/arch/arm/vdso/vgettimeofday.c index 79214d5ff097..a9dd619c6c29 100644 --- a/arch/arm/vdso/vgettimeofday.c +++ b/arch/arm/vdso/vgettimeofday.c @@ -35,7 +35,7 @@ static notrace u32 __vdso_read_begin(const struct vdso_data *vdata) { u32 seq; repeat: - seq = ACCESS_ONCE(vdata->seq_count); + seq = READ_ONCE(vdata->seq_count); if (seq & 1) { cpu_relax(); goto repeat; diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 0df64a6a56d4..6205f521b648 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -22,7 +22,24 @@ config ARM64 select ARCH_HAS_STRICT_MODULE_RWX select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAVE_NMI_SAFE_CMPXCHG if ACPI_APEI_SEA + select ARCH_INLINE_READ_LOCK if !PREEMPT + select ARCH_INLINE_READ_LOCK_BH if !PREEMPT + select ARCH_INLINE_READ_LOCK_IRQ if !PREEMPT + select ARCH_INLINE_READ_LOCK_IRQSAVE if !PREEMPT + select ARCH_INLINE_READ_UNLOCK if !PREEMPT + select ARCH_INLINE_READ_UNLOCK_BH if !PREEMPT + select ARCH_INLINE_READ_UNLOCK_IRQ if !PREEMPT + select ARCH_INLINE_READ_UNLOCK_IRQRESTORE if !PREEMPT + select ARCH_INLINE_WRITE_LOCK if !PREEMPT + select ARCH_INLINE_WRITE_LOCK_BH if !PREEMPT + select ARCH_INLINE_WRITE_LOCK_IRQ if !PREEMPT + select ARCH_INLINE_WRITE_LOCK_IRQSAVE if !PREEMPT + select ARCH_INLINE_WRITE_UNLOCK if !PREEMPT + select ARCH_INLINE_WRITE_UNLOCK_BH if !PREEMPT + select ARCH_INLINE_WRITE_UNLOCK_IRQ if !PREEMPT + select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE if !PREEMPT select ARCH_USE_CMPXCHG_LOCKREF + select ARCH_USE_QUEUED_RWLOCKS select ARCH_SUPPORTS_MEMORY_FAILURE select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_NUMA_BALANCING @@ -539,6 +556,25 @@ config QCOM_QDF2400_ERRATUM_0065 If unsure, say Y. + +config SOCIONEXT_SYNQUACER_PREITS + bool "Socionext Synquacer: Workaround for GICv3 pre-ITS" + default y + help + Socionext Synquacer SoCs implement a separate h/w block to generate + MSI doorbell writes with non-zero values for the device ID. + + If unsure, say Y. + +config HISILICON_ERRATUM_161600802 + bool "Hip07 161600802: Erroneous redistributor VLPI base" + default y + help + The HiSilicon Hip07 SoC usees the wrong redistributor base + when issued ITS commands such as VMOVP and VMAPP, and requires + a 128kB offset to be applied to the target address in this commands. + + If unsure, say Y. endmenu diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms index 6b54ee8c1262..1d03ef54295a 100644 --- a/arch/arm64/Kconfig.platforms +++ b/arch/arm64/Kconfig.platforms @@ -161,6 +161,9 @@ config ARCH_SEATTLE config ARCH_SHMOBILE bool +config ARCH_SYNQUACER + bool "Socionext SynQuacer SoC Family" + config ARCH_RENESAS bool "Renesas SoC Platforms" select ARCH_SHMOBILE diff --git a/arch/arm64/boot/dts/mediatek/mt8173.dtsi b/arch/arm64/boot/dts/mediatek/mt8173.dtsi index b99a27372965..26396ef53bde 100644 --- a/arch/arm64/boot/dts/mediatek/mt8173.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8173.dtsi @@ -682,8 +682,7 @@ }; mmc0: mmc@11230000 { - compatible = "mediatek,mt8173-mmc", - "mediatek,mt8135-mmc"; + compatible = "mediatek,mt8173-mmc"; reg = <0 0x11230000 0 0x1000>; interrupts = <GIC_SPI 71 IRQ_TYPE_LEVEL_LOW>; clocks = <&pericfg CLK_PERI_MSDC30_0>, @@ -693,8 +692,7 @@ }; mmc1: mmc@11240000 { - compatible = "mediatek,mt8173-mmc", - "mediatek,mt8135-mmc"; + compatible = "mediatek,mt8173-mmc"; reg = <0 0x11240000 0 0x1000>; interrupts = <GIC_SPI 72 IRQ_TYPE_LEVEL_LOW>; clocks = <&pericfg CLK_PERI_MSDC30_1>, @@ -704,8 +702,7 @@ }; mmc2: mmc@11250000 { - compatible = "mediatek,mt8173-mmc", - "mediatek,mt8135-mmc"; + compatible = "mediatek,mt8173-mmc"; reg = <0 0x11250000 0 0x1000>; interrupts = <GIC_SPI 73 IRQ_TYPE_LEVEL_LOW>; clocks = <&pericfg CLK_PERI_MSDC30_2>, @@ -715,8 +712,7 @@ }; mmc3: mmc@11260000 { - compatible = "mediatek,mt8173-mmc", - "mediatek,mt8135-mmc"; + compatible = "mediatek,mt8173-mmc"; reg = <0 0x11260000 0 0x1000>; interrupts = <GIC_SPI 74 IRQ_TYPE_LEVEL_LOW>; clocks = <&pericfg CLK_PERI_MSDC30_3>, diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 2326e39d5892..e63d0a8312de 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -16,6 +16,7 @@ generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h generic-y += msi.h generic-y += preempt.h +generic-y += qrwlock.h generic-y += rwsem.h generic-y += segment.h generic-y += serial.h diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index b7e3f74822da..9becba9ab392 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -87,6 +87,11 @@ static inline void gic_write_ctlr(u32 val) isb(); } +static inline u32 gic_read_ctlr(void) +{ + return read_sysreg_s(SYS_ICC_CTLR_EL1); +} + static inline void gic_write_grpen1(u32 val) { write_sysreg_s(val, SYS_ICC_IGRPEN1_EL1); diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index 95ad7102b63c..fdb827c7832f 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -27,8 +27,6 @@ * instructions. */ -#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) - static inline void arch_spin_lock(arch_spinlock_t *lock) { unsigned int tmp; @@ -139,176 +137,7 @@ static inline int arch_spin_is_contended(arch_spinlock_t *lock) } #define arch_spin_is_contended arch_spin_is_contended -/* - * Write lock implementation. - * - * Write locks set bit 31. Unlocking, is done by writing 0 since the lock is - * exclusively held. - * - * The memory barriers are implicit with the load-acquire and store-release - * instructions. - */ - -static inline void arch_write_lock(arch_rwlock_t *rw) -{ - unsigned int tmp; - - asm volatile(ARM64_LSE_ATOMIC_INSN( - /* LL/SC */ - " sevl\n" - "1: wfe\n" - "2: ldaxr %w0, %1\n" - " cbnz %w0, 1b\n" - " stxr %w0, %w2, %1\n" - " cbnz %w0, 2b\n" - __nops(1), - /* LSE atomics */ - "1: mov %w0, wzr\n" - "2: casa %w0, %w2, %1\n" - " cbz %w0, 3f\n" - " ldxr %w0, %1\n" - " cbz %w0, 2b\n" - " wfe\n" - " b 1b\n" - "3:") - : "=&r" (tmp), "+Q" (rw->lock) - : "r" (0x80000000) - : "memory"); -} - -static inline int arch_write_trylock(arch_rwlock_t *rw) -{ - unsigned int tmp; - - asm volatile(ARM64_LSE_ATOMIC_INSN( - /* LL/SC */ - "1: ldaxr %w0, %1\n" - " cbnz %w0, 2f\n" - " stxr %w0, %w2, %1\n" - " cbnz %w0, 1b\n" - "2:", - /* LSE atomics */ - " mov %w0, wzr\n" - " casa %w0, %w2, %1\n" - __nops(2)) - : "=&r" (tmp), "+Q" (rw->lock) - : "r" (0x80000000) - : "memory"); - - return !tmp; -} - -static inline void arch_write_unlock(arch_rwlock_t *rw) -{ - asm volatile(ARM64_LSE_ATOMIC_INSN( - " stlr wzr, %0", - " swpl wzr, wzr, %0") - : "=Q" (rw->lock) :: "memory"); -} - -/* write_can_lock - would write_trylock() succeed? */ -#define arch_write_can_lock(x) ((x)->lock == 0) - -/* - * Read lock implementation. - * - * It exclusively loads the lock value, increments it and stores the new value - * back if positive and the CPU still exclusively owns the location. If the - * value is negative, the lock is already held. - * - * During unlocking there may be multiple active read locks but no write lock. - * - * The memory barriers are implicit with the load-acquire and store-release - * instructions. - * - * Note that in UNDEFINED cases, such as unlocking a lock twice, the LL/SC - * and LSE implementations may exhibit different behaviour (although this - * will have no effect on lockdep). - */ -static inline void arch_read_lock(arch_rwlock_t *rw) -{ - unsigned int tmp, tmp2; - - asm volatile( - " sevl\n" - ARM64_LSE_ATOMIC_INSN( - /* LL/SC */ - "1: wfe\n" - "2: ldaxr %w0, %2\n" - " add %w0, %w0, #1\n" - " tbnz %w0, #31, 1b\n" - " stxr %w1, %w0, %2\n" - " cbnz %w1, 2b\n" - __nops(1), - /* LSE atomics */ - "1: wfe\n" - "2: ldxr %w0, %2\n" - " adds %w1, %w0, #1\n" - " tbnz %w1, #31, 1b\n" - " casa %w0, %w1, %2\n" - " sbc %w0, %w1, %w0\n" - " cbnz %w0, 2b") - : "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock) - : - : "cc", "memory"); -} - -static inline void arch_read_unlock(arch_rwlock_t *rw) -{ - unsigned int tmp, tmp2; - - asm volatile(ARM64_LSE_ATOMIC_INSN( - /* LL/SC */ - "1: ldxr %w0, %2\n" - " sub %w0, %w0, #1\n" - " stlxr %w1, %w0, %2\n" - " cbnz %w1, 1b", - /* LSE atomics */ - " movn %w0, #0\n" - " staddl %w0, %2\n" - __nops(2)) - : "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock) - : - : "memory"); -} - -static inline int arch_read_trylock(arch_rwlock_t *rw) -{ - unsigned int tmp, tmp2; - - asm volatile(ARM64_LSE_ATOMIC_INSN( - /* LL/SC */ - " mov %w1, #1\n" - "1: ldaxr %w0, %2\n" - " add %w0, %w0, #1\n" - " tbnz %w0, #31, 2f\n" - " stxr %w1, %w0, %2\n" - " cbnz %w1, 1b\n" - "2:", - /* LSE atomics */ - " ldr %w0, %2\n" - " adds %w1, %w0, #1\n" - " tbnz %w1, #31, 1f\n" - " casa %w0, %w1, %2\n" - " sbc %w1, %w1, %w0\n" - __nops(1) - "1:") - : "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock) - : - : "cc", "memory"); - - return !tmp2; -} - -/* read_can_lock - would read_trylock() succeed? */ -#define arch_read_can_lock(x) ((x)->lock < 0x80000000) - -#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) -#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) - -#define arch_spin_relax(lock) cpu_relax() -#define arch_read_relax(lock) cpu_relax() -#define arch_write_relax(lock) cpu_relax() +#include <asm/qrwlock.h> /* See include/linux/spinlock.h */ #define smp_mb__after_spinlock() smp_mb() diff --git a/arch/arm64/include/asm/spinlock_types.h b/arch/arm64/include/asm/spinlock_types.h index 55be59a35e3f..6b856012c51b 100644 --- a/arch/arm64/include/asm/spinlock_types.h +++ b/arch/arm64/include/asm/spinlock_types.h @@ -36,10 +36,6 @@ typedef struct { #define __ARCH_SPIN_LOCK_UNLOCKED { 0 , 0 } -typedef struct { - volatile unsigned int lock; -} arch_rwlock_t; - -#define __ARCH_RW_LOCK_UNLOCKED { 0 } +#include <asm-generic/qrwlock_types.h> #endif diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index b3202284568b..c4f2d50491eb 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h @@ -33,6 +33,14 @@ int pcibus_to_node(struct pci_bus *bus); #endif /* CONFIG_NUMA */ +#include <linux/arch_topology.h> + +/* Replace task scheduler's default frequency-invariant accounting */ +#define arch_scale_freq_capacity topology_get_freq_scale + +/* Replace task scheduler's default cpu-invariant accounting */ +#define arch_scale_cpu_capacity topology_get_cpu_scale + #include <asm-generic/topology.h> #endif /* _ASM_ARM_TOPOLOGY_H */ diff --git a/arch/blackfin/include/asm/spinlock.h b/arch/blackfin/include/asm/spinlock.h index f6431439d15d..839d1441af3a 100644 --- a/arch/blackfin/include/asm/spinlock.h +++ b/arch/blackfin/include/asm/spinlock.h @@ -36,8 +36,6 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) __raw_spin_lock_asm(&lock->lock); } -#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) - static inline int arch_spin_trylock(arch_spinlock_t *lock) { return __raw_spin_trylock_asm(&lock->lock); @@ -48,23 +46,11 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) __raw_spin_unlock_asm(&lock->lock); } -static inline int arch_read_can_lock(arch_rwlock_t *rw) -{ - return __raw_uncached_fetch_asm(&rw->lock) > 0; -} - -static inline int arch_write_can_lock(arch_rwlock_t *rw) -{ - return __raw_uncached_fetch_asm(&rw->lock) == RW_LOCK_BIAS; -} - static inline void arch_read_lock(arch_rwlock_t *rw) { __raw_read_lock_asm(&rw->lock); } -#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) - static inline int arch_read_trylock(arch_rwlock_t *rw) { return __raw_read_trylock_asm(&rw->lock); @@ -80,8 +66,6 @@ static inline void arch_write_lock(arch_rwlock_t *rw) __raw_write_lock_asm(&rw->lock); } -#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) - static inline int arch_write_trylock(arch_rwlock_t *rw) { return __raw_write_trylock_asm(&rw->lock); @@ -92,10 +76,6 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) __raw_write_unlock_asm(&rw->lock); } -#define arch_spin_relax(lock) cpu_relax() -#define arch_read_relax(lock) cpu_relax() -#define arch_write_relax(lock) cpu_relax() - #endif #endif /* !__BFIN_SPINLOCK_H */ diff --git a/arch/hexagon/include/asm/spinlock.h b/arch/hexagon/include/asm/spinlock.h index 53a8d5885887..48020863f53a 100644 --- a/arch/hexagon/include/asm/spinlock.h +++ b/arch/hexagon/include/asm/spinlock.h @@ -86,16 +86,6 @@ static inline int arch_read_trylock(arch_rwlock_t *lock) return temp; } -static inline int arch_read_can_lock(arch_rwlock_t *rwlock) -{ - return rwlock->lock == 0; -} - -static inline int arch_write_can_lock(arch_rwlock_t *rwlock) -{ - return rwlock->lock == 0; -} - /* Stuffs a -1 in the lock value? */ static inline void arch_write_lock(arch_rwlock_t *lock) { @@ -177,11 +167,6 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock) /* * SMP spinlocks are intended to allow only a single CPU at the lock */ -#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) - #define arch_spin_is_locked(x) ((x)->lock != 0) -#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) -#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) - #endif diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 1efc444f5fa1..49583c5a5d44 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -47,7 +47,7 @@ config IA64 select ARCH_TASK_STRUCT_ALLOCATOR select ARCH_THREAD_STACK_ALLOCATOR select ARCH_CLOCKSOURCE_DATA - select GENERIC_TIME_VSYSCALL_OLD + select GENERIC_TIME_VSYSCALL select SYSCTL_ARCH_UNALIGN_NO_WARN select HAVE_MOD_ARCH_SPECIFIC select MODULES_USE_ELF_RELA diff --git a/arch/ia64/include/asm/rwsem.h b/arch/ia64/include/asm/rwsem.h index 7d6fceb3d567..917910607e0e 100644 --- a/arch/ia64/include/asm/rwsem.h +++ b/arch/ia64/include/asm/rwsem.h @@ -38,15 +38,31 @@ /* * lock for reading */ -static inline void -__down_read (struct rw_semaphore *sem) +static inline int +___down_read (struct rw_semaphore *sem) { long result = ia64_fetchadd8_acq((unsigned long *)&sem->count.counter, 1); - if (result < 0) + return (result < 0); +} + +static inline void +__down_read (struct rw_semaphore *sem) +{ + if (___down_read(sem)) rwsem_down_read_failed(sem); } +static inline int +__down_read_killable (struct rw_semaphore *sem) +{ + if (___down_read(sem)) + if (IS_ERR(rwsem_down_read_failed_killable(sem))) + return -EINTR; + + return 0; +} + /* * lock for writing */ @@ -73,9 +89,10 @@ __down_write (struct rw_semaphore *sem) static inline int __down_write_killable (struct rw_semaphore *sem) { - if (___down_write(sem)) + if (___down_write(sem)) { if (IS_ERR(rwsem_down_write_failed_killable(sem))) return -EINTR; + } return 0; } diff --git a/arch/ia64/include/asm/sn/bte.h b/arch/ia64/include/asm/sn/bte.h index cc6c4dbf53af..cd71ab5faf62 100644 --- a/arch/ia64/include/asm/sn/bte.h +++ b/arch/ia64/include/asm/sn/bte.h @@ -17,6 +17,8 @@ #include <asm/sn/types.h> #include <asm/sn/shub_mmr.h> +struct nodepda_s; + #define IBCT_NOTIFY (0x1UL << 4) #define IBCT_ZFIL_MODE (0x1UL << 0) @@ -210,7 +212,7 @@ struct bteinfo_s { */ extern bte_result_t bte_copy(u64, u64, u64, u64, void *); extern bte_result_t bte_unaligned_copy(u64, u64, u64, u64); -extern void bte_error_handler(unsigned long); +extern void bte_error_handler(struct nodepda_s *); #define bte_zero(dest, len, mode, notification) \ bte_copy(0, dest, len, ((mode) | BTE_ZERO_FILL), notification) diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h index aa057abd948e..afd0b3121b4c 100644 --- a/arch/ia64/include/asm/spinlock.h +++ b/arch/ia64/include/asm/spinlock.h @@ -62,7 +62,7 @@ static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock) static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock) { - int tmp = ACCESS_ONCE(lock->lock); + int tmp = READ_ONCE(lock->lock); if (!(((tmp >> TICKET_SHIFT) ^ tmp) & TICKET_MASK)) return ia64_cmpxchg(acq, &lock->lock, tmp, tmp + 1, sizeof (tmp)) == tmp; @@ -74,19 +74,19 @@ static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) unsigned short *p = (unsigned short *)&lock->lock + 1, tmp; asm volatile ("ld2.bias %0=[%1]" : "=r"(tmp) : "r"(p)); - ACCESS_ONCE(*p) = (tmp + 2) & ~1; + WRITE_ONCE(*p, (tmp + 2) & ~1); } static inline int __ticket_spin_is_locked(arch_spinlock_t *lock) { - long tmp = ACCESS_ONCE(lock->lock); + long tmp = READ_ONCE(lock->lock); return !!(((tmp >> TICKET_SHIFT) ^ tmp) & TICKET_MASK); } static inline int __ticket_spin_is_contended(arch_spinlock_t *lock) { - long tmp = ACCESS_ONCE(lock->lock); + long tmp = READ_ONCE(lock->lock); return ((tmp - (tmp >> TICKET_SHIFT)) & TICKET_MASK) > 1; } @@ -127,9 +127,7 @@ static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock, { arch_spin_lock(lock); } - -#define arch_read_can_lock(rw) (*(volatile int *)(rw) >= 0) -#define arch_write_can_lock(rw) (*(volatile int *)(rw) == 0) +#define arch_spin_lock_flags arch_spin_lock_flags #ifdef ASM_SUPPORTED @@ -157,6 +155,7 @@ arch_read_lock_flags(arch_rwlock_t *lock, unsigned long flags) : "p6", "p7", "r2", "memory"); } +#define arch_read_lock_flags arch_read_lock_flags #define arch_read_lock(lock) arch_read_lock_flags(lock, 0) #else /* !ASM_SUPPORTED */ @@ -209,6 +208,7 @@ arch_write_lock_flags(arch_rwlock_t *lock, unsigned long flags) : "ar.ccv", "p6", "p7", "r2", "r29", "memory"); } +#define arch_write_lock_flags arch_write_lock_flags #define arch_write_lock(rw) arch_write_lock_flags(rw, 0) #define arch_write_trylock(rw) \ @@ -232,8 +232,6 @@ static inline void arch_write_unlock(arch_rwlock_t *x) #else /* !ASM_SUPPORTED */ -#define arch_write_lock_flags(l, flags) arch_write_lock(l) - #define arch_write_lock(l) \ ({ \ __u64 ia64_val, ia64_set_val = ia64_dep_mi(-1, 0, 31, 1); \ @@ -273,8 +271,4 @@ static inline int arch_read_trylock(arch_rwlock_t *x) return (u32)ia64_cmpxchg4_acq((__u32 *)(x), new.word, old.word) == old.word; } -#define arch_spin_relax(lock) cpu_relax() -#define arch_read_relax(lock) cpu_relax() -#define arch_write_relax(lock) cpu_relax() - #endif /* _ASM_IA64_SPINLOCK_H */ diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c index b385ff2bf6ce..f7693f49c573 100644 --- a/arch/ia64/kernel/asm-offsets.c +++ b/arch/ia64/kernel/asm-offsets.c @@ -212,6 +212,8 @@ void foo(void) BLANK(); DEFINE(IA64_TIMESPEC_TV_NSEC_OFFSET, offsetof (struct timespec, tv_nsec)); + DEFINE(IA64_TIME_SN_SPEC_SNSEC_OFFSET, + offsetof (struct time_sn_spec, snsec)); DEFINE(CLONE_SETTLS_BIT, 19); #if CLONE_SETTLS != (1<<19) diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S index c0e7c9af2bb9..fe742ffafc7a 100644 --- a/arch/ia64/kernel/fsys.S +++ b/arch/ia64/kernel/fsys.S @@ -236,9 +236,9 @@ ENTRY(fsys_gettimeofday) MOV_FROM_ITC(p8, p6, r2, r10) // CPU_TIMER. 36 clocks latency!!! (p9) ld8 r2 = [r30] // MMIO_TIMER. Could also have latency issues.. (p13) ld8 r25 = [r19] // get itc_lastcycle value - ld8 r9 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET // tv_sec + ld8 r9 = [r22],IA64_TIME_SN_SPEC_SNSEC_OFFSET // sec ;; - ld8 r8 = [r22],-IA64_TIMESPEC_TV_NSEC_OFFSET // tv_nsec + ld8 r8 = [r22],-IA64_TIME_SN_SPEC_SNSEC_OFFSET // snsec (p13) sub r3 = r25,r2 // Diff needed before comparison (thanks davidm) ;; (p13) cmp.gt.unc p6,p7 = r3,r0 // check if it is less than last. p6,p7 cleared @@ -266,9 +266,9 @@ EX(.fail_efault, probe.w.fault r31, 3) mf ;; ld4 r10 = [r20] // gtod_lock.sequence - shr.u r2 = r2,r23 // shift by factor - ;; add r8 = r8,r2 // Add xtime.nsecs + ;; + shr.u r8 = r8,r23 // shift by factor cmp4.ne p7,p0 = r28,r10 (p7) br.cond.dpnt.few .time_redo // sequence number changed, redo // End critical section. diff --git a/arch/ia64/kernel/fsyscall_gtod_data.h b/arch/ia64/kernel/fsyscall_gtod_data.h index 0914c02a1eb0..cc2861445965 100644 --- a/arch/ia64/kernel/fsyscall_gtod_data.h +++ b/arch/ia64/kernel/fsyscall_gtod_data.h @@ -6,10 +6,16 @@ * fsyscall gettimeofday data */ +/* like timespec, but includes "shifted nanoseconds" */ +struct time_sn_spec { + u64 sec; + u64 snsec; +}; + struct fsyscall_gtod_data_t { seqcount_t seq; - struct timespec wall_time; - struct timespec monotonic_time; + struct time_sn_spec wall_time; + struct time_sn_spec monotonic_time; u64 clk_mask; u32 clk_mult; u32 clk_shift; diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 555b11180156..6115464d5f03 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -1513,7 +1513,7 @@ ia64_mca_cmc_int_caller(int cmc_irq, void *arg) * */ static void -ia64_mca_cmc_poll (unsigned long dummy) +ia64_mca_cmc_poll (struct timer_list *unused) { /* Trigger a CMC interrupt cascade */ platform_send_ipi(cpumask_first(cpu_online_mask), IA64_CMCP_VECTOR, @@ -1590,7 +1590,7 @@ ia64_mca_cpe_int_caller(int cpe_irq, void *arg) * */ static void -ia64_mca_cpe_poll (unsigned long dummy) +ia64_mca_cpe_poll (struct timer_list *unused) { /* Trigger a CPE interrupt cascade */ platform_send_ipi(cpumask_first(cpu_online_mask), IA64_CPEP_VECTOR, @@ -2098,7 +2098,7 @@ ia64_mca_late_init(void) return 0; /* Setup the CMCI/P vector and handler */ - setup_timer(&cmc_poll_timer, ia64_mca_cmc_poll, 0UL); + timer_setup(&cmc_poll_timer, ia64_mca_cmc_poll, 0); /* Unmask/enable the vector */ cmc_polling_enabled = 0; @@ -2109,7 +2109,7 @@ ia64_mca_late_init(void) #ifdef CONFIG_ACPI /* Setup the CPEI/P vector and handler */ cpe_vector = acpi_request_vector(ACPI_INTERRUPT_CPEI); - setup_timer(&cpe_poll_timer, ia64_mca_cpe_poll, 0UL); + timer_setup(&cpe_poll_timer, ia64_mca_cpe_poll, 0); { unsigned int irq; diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c index 63dc9cdc95c5..52c404b08904 100644 --- a/arch/ia64/kernel/salinfo.c +++ b/arch/ia64/kernel/salinfo.c @@ -263,7 +263,7 @@ salinfo_timeout_check(struct salinfo_data *data) } static void -salinfo_timeout (unsigned long arg) +salinfo_timeout(struct timer_list *unused) { ia64_mlogbuf_dump(); salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_MCA); @@ -623,9 +623,8 @@ salinfo_init(void) *sdir++ = salinfo_dir; - init_timer(&salinfo_timer); + timer_setup(&salinfo_timer, salinfo_timeout, 0); salinfo_timer.expires = jiffies + SALINFO_TIMER_DELAY; - salinfo_timer.function = &salinfo_timeout; add_timer(&salinfo_timer); i = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "ia64/salinfo:online", diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index aa7be020a904..c6ecb97151a2 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -430,30 +430,32 @@ void update_vsyscall_tz(void) { } -void update_vsyscall_old(struct timespec *wall, struct timespec *wtm, - struct clocksource *c, u32 mult, u64 cycle_last) +void update_vsyscall(struct timekeeper *tk) { write_seqcount_begin(&fsyscall_gtod_data.seq); - /* copy fsyscall clock data */ - fsyscall_gtod_data.clk_mask = c->mask; - fsyscall_gtod_data.clk_mult = mult; - fsyscall_gtod_data.clk_shift = c->shift; - fsyscall_gtod_data.clk_fsys_mmio = c->archdata.fsys_mmio; - fsyscall_gtod_data.clk_cycle_last = cycle_last; - - /* copy kernel time structures */ - fsyscall_gtod_data.wall_time.tv_sec = wall->tv_sec; - fsyscall_gtod_data.wall_time.tv_nsec = wall->tv_nsec; - fsyscall_gtod_data.monotonic_time.tv_sec = wtm->tv_sec - + wall->tv_sec; - fsyscall_gtod_data.monotonic_time.tv_nsec = wtm->tv_nsec - + wall->tv_nsec; + /* copy vsyscall data */ + fsyscall_gtod_data.clk_mask = tk->tkr_mono.mask; + fsyscall_gtod_data.clk_mult = tk->tkr_mono.mult; + fsyscall_gtod_data.clk_shift = tk->tkr_mono.shift; + fsyscall_gtod_data.clk_fsys_mmio = tk->tkr_mono.clock->archdata.fsys_mmio; + fsyscall_gtod_data.clk_cycle_last = tk->tkr_mono.cycle_last; + + fsyscall_gtod_data.wall_time.sec = tk->xtime_sec; + fsyscall_gtod_data.wall_time.snsec = tk->tkr_mono.xtime_nsec; + + fsyscall_gtod_data.monotonic_time.sec = tk->xtime_sec + + tk->wall_to_monotonic.tv_sec; + fsyscall_gtod_data.monotonic_time.snsec = tk->tkr_mono.xtime_nsec + + ((u64)tk->wall_to_monotonic.tv_nsec + << tk->tkr_mono.shift); /* normalize */ - while (fsyscall_gtod_data.monotonic_time.tv_nsec >= NSEC_PER_SEC) { - fsyscall_gtod_data.monotonic_time.tv_nsec -= NSEC_PER_SEC; - fsyscall_gtod_data.monotonic_time.tv_sec++; + while (fsyscall_gtod_data.monotonic_time.snsec >= + (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) { + fsyscall_gtod_data.monotonic_time.snsec -= + ((u64)NSEC_PER_SEC) << tk->tkr_mono.shift; + fsyscall_gtod_data.monotonic_time.sec++; } write_seqcount_end(&fsyscall_gtod_data.seq); diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c index b2eb48490754..9146192b86f5 100644 --- a/arch/ia64/sn/kernel/bte.c +++ b/arch/ia64/sn/kernel/bte.c @@ -219,7 +219,7 @@ retry_bteop: BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na) ); bte->bte_error_count++; bte->bh_error = IBLS_ERROR; - bte_error_handler((unsigned long)NODEPDA(bte->bte_cnode)); + bte_error_handler(NODEPDA(bte->bte_cnode)); *bte->most_rcnt_na = BTE_WORD_AVAILABLE; goto retry_bteop; } @@ -414,6 +414,12 @@ EXPORT_SYMBOL(bte_unaligned_copy); * Block Transfer Engine initialization functions. * ***********************************************************************/ +static void bte_recovery_timeout(struct timer_list *t) +{ + struct nodepda_s *nodepda = from_timer(nodepda, t, bte_recovery_timer); + + bte_error_handler(nodepda); +} /* * bte_init_node(nodepda, cnode) @@ -436,9 +442,7 @@ void bte_init_node(nodepda_t * mynodepda, cnodeid_t cnode) * will point at this one bte_recover structure to get the lock. */ spin_lock_init(&mynodepda->bte_recovery_lock); - init_timer(&mynodepda->bte_recovery_timer); - mynodepda->bte_recovery_timer.function = bte_error_handler; - mynodepda->bte_recovery_timer.data = (unsigned long)mynodepda; + timer_setup(&mynodepda->bte_recovery_timer, bte_recovery_timeout, 0); for (i = 0; i < BTES_PER_NODE; i++) { u64 *base_addr; diff --git a/arch/ia64/sn/kernel/bte_error.c b/arch/ia64/sn/kernel/bte_error.c index 4cb09f3f1efc..d92786c09b34 100644 --- a/arch/ia64/sn/kernel/bte_error.c +++ b/arch/ia64/sn/kernel/bte_error.c @@ -27,15 +27,12 @@ * transfers to be queued. */ -void bte_error_handler(unsigned long); - /* * Wait until all BTE related CRBs are completed * and then reset the interfaces. */ -int shub1_bte_error_handler(unsigned long _nodepda) +static int shub1_bte_error_handler(struct nodepda_s *err_nodepda) { - struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda; struct timer_list *recovery_timer = &err_nodepda->bte_recovery_timer; nasid_t nasid; int i; @@ -131,9 +128,8 @@ int shub1_bte_error_handler(unsigned long _nodepda) * Wait until all BTE related CRBs are completed * and then reset the interfaces. */ -int shub2_bte_error_handler(unsigned long _nodepda) +static int shub2_bte_error_handler(struct nodepda_s *err_nodepda) { - struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda; struct timer_list *recovery_timer = &err_nodepda->bte_recovery_timer; struct bteinfo_s *bte; nasid_t nasid; @@ -170,9 +166,8 @@ int shub2_bte_error_handler(unsigned long _nodepda) * Wait until all BTE related CRBs are completed * and then reset the interfaces. */ -void bte_error_handler(unsigned long _nodepda) +void bte_error_handler(struct nodepda_s *err_nodepda) { - struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda; spinlock_t *recovery_lock = &err_nodepda->bte_recovery_lock; int i; unsigned long irq_flags; @@ -199,12 +194,12 @@ void bte_error_handler(unsigned long _nodepda) } if (is_shub1()) { - if (shub1_bte_error_handler(_nodepda)) { + if (shub1_bte_error_handler(err_nodepda)) { spin_unlock_irqrestore(recovery_lock, irq_flags); return; } } else { - if (shub2_bte_error_handler(_nodepda)) { + if (shub2_bte_error_handler(err_nodepda)) { spin_unlock_irqrestore(recovery_lock, irq_flags); return; } @@ -255,6 +250,6 @@ bte_crb_error_handler(cnodeid_t cnode, int btenum, BTE_PRINTK(("Got an error on cnode %d bte %d: HW error type 0x%x\n", bte->bte_cnode, bte->bte_num, ioe->ie_errortype)); - bte_error_handler((unsigned long) NODEPDA(cnode)); + bte_error_handler(NODEPDA(cnode)); } diff --git a/arch/ia64/sn/kernel/huberror.c b/arch/ia64/sn/kernel/huberror.c index f925dec2da92..97fa56dddf50 100644 --- a/arch/ia64/sn/kernel/huberror.c +++ b/arch/ia64/sn/kernel/huberror.c @@ -50,7 +50,7 @@ static irqreturn_t hub_eint_handler(int irq, void *arg) if ((int)ret_stuff.v0) panic("%s: Fatal TIO Error", __func__); } else - bte_error_handler((unsigned long)NODEPDA(nasid_to_cnodeid(nasid))); + bte_error_handler(NODEPDA(nasid_to_cnodeid(nasid))); return IRQ_HANDLED; } diff --git a/arch/ia64/sn/kernel/mca.c b/arch/ia64/sn/kernel/mca.c index 5b799d4deb74..bc3bd930c74c 100644 --- a/arch/ia64/sn/kernel/mca.c +++ b/arch/ia64/sn/kernel/mca.c @@ -72,7 +72,7 @@ static void sn_cpei_handler(int irq, void *devid, struct pt_regs *regs) ia64_sn_plat_cpei_handler(); } -static void sn_cpei_timer_handler(unsigned long dummy) +static void sn_cpei_timer_handler(struct timer_list *unused) { sn_cpei_handler(-1, NULL, NULL); mod_timer(&sn_cpei_timer, jiffies + CPEI_INTERVAL); @@ -80,9 +80,8 @@ static void sn_cpei_timer_handler(unsigned long dummy) void sn_init_cpei_timer(void) { - init_timer(&sn_cpei_timer); + timer_setup(&sn_cpei_timer, sn_cpei_timer_handler, 0); sn_cpei_timer.expires = jiffies + CPEI_INTERVAL; - sn_cpei_timer.function = sn_cpei_timer_handler; add_timer(&sn_cpei_timer); } diff --git a/arch/m32r/include/asm/spinlock.h b/arch/m32r/include/asm/spinlock.h index 604af84427ff..0189f410f8f5 100644 --- a/arch/m32r/include/asm/spinlock.h +++ b/arch/m32r/include/asm/spinlock.h @@ -29,7 +29,6 @@ */ #define arch_spin_is_locked(x) (*(volatile int *)(&(x)->slock) <= 0) -#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) /** * arch_spin_trylock - Try spin lock and return a result @@ -138,18 +137,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) * semaphore.h for details. -ben */ -/** - * read_can_lock - would read_trylock() succeed? - * @lock: the rwlock in question. - */ -#define arch_read_can_lock(x) ((int)(x)->lock > 0) - -/** - * write_can_lock - would write_trylock() succeed? - * @lock: the rwlock in question. - */ -#define arch_write_can_lock(x) ((x)->lock == RW_LOCK_BIAS) - static inline void arch_read_lock(arch_rwlock_t *rw) { unsigned long tmp0, tmp1; @@ -318,11 +305,4 @@ static inline int arch_write_trylock(arch_rwlock_t *lock) return 0; } -#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) -#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) - -#define arch_spin_relax(lock) cpu_relax() -#define arch_read_relax(lock) cpu_relax() -#define arch_write_relax(lock) cpu_relax() - #endif /* _ASM_M32R_SPINLOCK_H */ diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu index ff5f0896318b..21f00349af52 100644 --- a/arch/m68k/Kconfig.cpu +++ b/arch/m68k/Kconfig.cpu @@ -284,7 +284,7 @@ config M548x config M5441x bool "MCF5441x" - depends on !MMU + select MMU_COLDFIRE if MMU select GENERIC_CLOCKEVENTS select HAVE_CACHE_CB help diff --git a/arch/m68k/Kconfig.machine b/arch/m68k/Kconfig.machine index 5cd57b4d3615..64a641467736 100644 --- a/arch/m68k/Kconfig.machine +++ b/arch/m68k/Kconfig.machine @@ -266,6 +266,12 @@ config AMCORE help Support for the Sysam AMCORE open-hardware generic board. +config STMARK2 + bool "Sysam stmark2 board support" + depends on M5441x + help + Support for the Sysam stmark2 open-hardware generic board. + config FIREBEE bool "FireBee board support" depends on M547x diff --git a/arch/m68k/amiga/amisound.c b/arch/m68k/amiga/amisound.c index 90a60d758f8b..a23f48181fd6 100644 --- a/arch/m68k/amiga/amisound.c +++ b/arch/m68k/amiga/amisound.c @@ -66,7 +66,7 @@ void __init amiga_init_sound(void) } static void nosound( unsigned long ignored ); -static DEFINE_TIMER(sound_timer, nosound, 0, 0); +static DEFINE_TIMER(sound_timer, nosound); void amiga_mksound( unsigned int hz, unsigned int ticks ) { diff --git a/arch/m68k/coldfire/Makefile b/arch/m68k/coldfire/Makefile index f8cef9681416..573eabca1a3a 100644 --- a/arch/m68k/coldfire/Makefile +++ b/arch/m68k/coldfire/Makefile @@ -35,7 +35,8 @@ obj-$(CONFIG_NETtel) += nettel.o obj-$(CONFIG_CLEOPATRA) += nettel.o obj-$(CONFIG_FIREBEE) += firebee.o obj-$(CONFIG_MCF8390) += mcf8390.o -obj-$(CONFIG_AMCORE) += amcore.o +obj-$(CONFIG_AMCORE) += amcore.o +obj-$(CONFIG_STMARK2) += stmark2.o obj-$(CONFIG_PCI) += pci.o diff --git a/arch/m68k/coldfire/m5441x.c b/arch/m68k/coldfire/m5441x.c index 315d14b0dca0..55392af845fb 100644 --- a/arch/m68k/coldfire/m5441x.c +++ b/arch/m68k/coldfire/m5441x.c @@ -27,7 +27,7 @@ DEFINE_CLK(0, "intc.0", 18, MCF_CLK); DEFINE_CLK(0, "intc.1", 19, MCF_CLK); DEFINE_CLK(0, "intc.2", 20, MCF_CLK); DEFINE_CLK(0, "imx1-i2c.0", 22, MCF_CLK); -DEFINE_CLK(0, "mcfdspi.0", 23, MCF_CLK); +DEFINE_CLK(0, "fsl-dspi.0", 23, MCF_CLK); DEFINE_CLK(0, "mcfuart.0", 24, MCF_BUSCLK); DEFINE_CLK(0, "mcfuart.1", 25, MCF_BUSCLK); DEFINE_CLK(0, "mcfuart.2", 26, MCF_BUSCLK); @@ -140,6 +140,7 @@ static struct clk * const enable_clks[] __initconst = { &__clk_0_18, /* intc0 */ &__clk_0_19, /* intc0 */ &__clk_0_20, /* intc0 */ + &__clk_0_23, /* dspi.0 */ &__clk_0_24, /* uart0 */ &__clk_0_25, /* uart1 */ &__clk_0_26, /* uart2 */ diff --git a/arch/m68k/coldfire/m54xx.c b/arch/m68k/coldfire/m54xx.c index e53ffed13ba8..adad03ca6e11 100644 --- a/arch/m68k/coldfire/m54xx.c +++ b/arch/m68k/coldfire/m54xx.c @@ -96,10 +96,6 @@ static void mcf54xx_reset(void) void __init config_BSP(char *commandp, int size) { -#ifdef CONFIG_MMU - cf_bootmem_alloc(); - mmu_context_init(); -#endif mach_reset = mcf54xx_reset; mach_sched_init = hw_timer_init; m54xx_uarts_init(); diff --git a/arch/m68k/coldfire/stmark2.c b/arch/m68k/coldfire/stmark2.c new file mode 100644 index 000000000000..a8d2b3d172f9 --- /dev/null +++ b/arch/m68k/coldfire/stmark2.c @@ -0,0 +1,119 @@ +/* + * stmark2.c -- Support for Sysam AMCORE open board + * + * (C) Copyright 2017, Angelo Dureghello <angelo@sysam.it> + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive + * for more details. + */ + +#include <linux/platform_device.h> +#include <linux/mtd/partitions.h> +#include <linux/spi/spi.h> +#include <linux/spi/spi-fsl-dspi.h> +#include <linux/spi/flash.h> +#include <asm/mcfsim.h> + +/* + * Partitioning of parallel NOR flash (39VF3201B) + */ +static struct mtd_partition stmark2_partitions[] = { + { + .name = "U-Boot (1024K)", + .size = 0x100000, + .offset = 0x0 + }, { + .name = "Kernel+initramfs (7168K)", + .size = 0x700000, + .offset = MTDPART_OFS_APPEND + }, { + .name = "Flash Free Space (8192K)", + .size = MTDPART_SIZ_FULL, + .offset = MTDPART_OFS_APPEND + } +}; + +static struct flash_platform_data stmark2_spi_flash_data = { + .name = "is25lp128", + .parts = stmark2_partitions, + .nr_parts = ARRAY_SIZE(stmark2_partitions), + .type = "is25lp128", +}; + +static struct spi_board_info stmark2_board_info[] __initdata = { + { + .modalias = "m25p80", + .max_speed_hz = 5000000, + .bus_num = 0, + .chip_select = 1, + .platform_data = &stmark2_spi_flash_data, + .mode = SPI_MODE_3, + } +}; + +/* SPI controller data, SPI (0) */ +static struct fsl_dspi_platform_data dspi_spi0_info = { + .cs_num = 4, + .bus_num = 0, + .sck_cs_delay = 100, + .cs_sck_delay = 100, +}; + +static struct resource dspi_spi0_resource[] = { + [0] = { + .start = MCFDSPI_BASE0, + .end = MCFDSPI_BASE0 + 0xFF, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = 12, + .end = 13, + .flags = IORESOURCE_DMA, + }, + [2] = { + .start = MCF_IRQ_DSPI0, + .end = MCF_IRQ_DSPI0, + .flags = IORESOURCE_IRQ, + }, +}; + +/* SPI controller, id = bus number */ +static struct platform_device dspi_spi0_device = { + .name = "fsl-dspi", + .id = 0, + .num_resources = ARRAY_SIZE(dspi_spi0_resource), + .resource = dspi_spi0_resource, + .dev = { + .platform_data = &dspi_spi0_info, + }, +}; + +static struct platform_device *stmark2_devices[] __initdata = { + &dspi_spi0_device, +}; + +/* + * Note: proper pin-mux setup is mandatory for proper SPI functionality. + */ +static int __init init_stmark2(void) +{ + /* DSPI0, all pins as DSPI, and using CS1 */ + __raw_writeb(0x80, MCFGPIO_PAR_DSPIOWL); + __raw_writeb(0xfc, MCFGPIO_PAR_DSPIOWH); + + /* Board gpio setup */ + __raw_writeb(0x00, MCFGPIO_PAR_BE); + __raw_writeb(0x00, MCFGPIO_PAR_FBCTL); + __raw_writeb(0x00, MCFGPIO_PAR_CS); + __raw_writeb(0x00, MCFGPIO_PAR_CANI2C); + + platform_add_devices(stmark2_devices, ARRAY_SIZE(stmark2_devices)); + + spi_register_board_info(stmark2_board_info, + ARRAY_SIZE(stmark2_board_info)); + + return 0; +} + +late_initcall(init_stmark2); diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index 54191f6fc715..5b5fa9831b4d 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -123,6 +123,7 @@ CONFIG_NFT_HASH=m CONFIG_NFT_FIB_INET=m CONFIG_NFT_DUP_NETDEV=m CONFIG_NFT_FWD_NETDEV=m +CONFIG_NFT_FIB_NETDEV=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m @@ -302,6 +303,7 @@ CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m +CONFIG_NET_NSH=m CONFIG_NET_L3_MASTER_DEV=y CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set @@ -400,6 +402,7 @@ CONFIG_ARIADNE=y # CONFIG_NET_VENDOR_CIRRUS is not set # CONFIG_NET_VENDOR_EZCHIP is not set # CONFIG_NET_VENDOR_HP is not set +# CONFIG_NET_VENDOR_HUAWEI is not set # CONFIG_NET_VENDOR_INTEL is not set # CONFIG_NET_VENDOR_MARVELL is not set # CONFIG_NET_VENDOR_MICREL is not set @@ -451,6 +454,7 @@ CONFIG_PPS_CLIENT_LDISC=m CONFIG_PPS_CLIENT_PARPORT=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set +# CONFIG_RC_CORE is not set CONFIG_FB=y CONFIG_FB_CIRRUS=y CONFIG_FB_AMIGA=y @@ -607,12 +611,10 @@ CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m -CONFIG_CRYPTO_CCM=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m -CONFIG_CRYPTO_CMAC=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_MICHAEL_MIC=m diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index fb4663904428..72a7764b74ed 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -121,6 +121,7 @@ CONFIG_NFT_HASH=m CONFIG_NFT_FIB_INET=m CONFIG_NFT_DUP_NETDEV=m CONFIG_NFT_FWD_NETDEV=m +CONFIG_NFT_FIB_NETDEV=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m @@ -300,6 +301,7 @@ CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m +CONFIG_NET_NSH=m CONFIG_NET_L3_MASTER_DEV=y CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set @@ -377,6 +379,7 @@ CONFIG_VETH=m # CONFIG_NET_CADENCE is not set # CONFIG_NET_VENDOR_BROADCOM is not set # CONFIG_NET_VENDOR_EZCHIP is not set +# CONFIG_NET_VENDOR_HUAWEI is not set # CONFIG_NET_VENDOR_INTEL is not set # CONFIG_NET_VENDOR_MARVELL is not set # CONFIG_NET_VENDOR_MICREL is not set @@ -419,6 +422,7 @@ CONFIG_NTP_PPS=y CONFIG_PPS_CLIENT_LDISC=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set +# CONFIG_RC_CORE is not set CONFIG_FB=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y @@ -566,12 +570,10 @@ CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m -CONFIG_CRYPTO_CCM=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m -CONFIG_CRYPTO_CMAC=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_MICHAEL_MIC=m diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index 4ab393e86e52..884b43a2f0d9 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -121,6 +121,7 @@ CONFIG_NFT_HASH=m CONFIG_NFT_FIB_INET=m CONFIG_NFT_DUP_NETDEV=m CONFIG_NFT_FWD_NETDEV=m +CONFIG_NFT_FIB_NETDEV=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m @@ -300,6 +301,7 @@ CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m +CONFIG_NET_NSH=m CONFIG_NET_L3_MASTER_DEV=y CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set @@ -387,6 +389,7 @@ CONFIG_ATARILANCE=y # CONFIG_NET_CADENCE is not set # CONFIG_NET_VENDOR_BROADCOM is not set # CONFIG_NET_VENDOR_EZCHIP is not set +# CONFIG_NET_VENDOR_HUAWEI is not set # CONFIG_NET_VENDOR_INTEL is not set # CONFIG_NET_VENDOR_MARVELL is not set # CONFIG_NET_VENDOR_MICREL is not set @@ -434,6 +437,7 @@ CONFIG_PPS_CLIENT_LDISC=m CONFIG_PPS_CLIENT_PARPORT=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set +# CONFIG_RC_CORE is not set CONFIG_FB=y CONFIG_FB_ATARI=y CONFIG_FRAMEBUFFER_CONSOLE=y @@ -588,12 +592,10 @@ CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m -CONFIG_CRYPTO_CCM=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m -CONFIG_CRYPTO_CMAC=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_MICHAEL_MIC=m diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index 1dd8d697545b..fcfa60d31499 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -119,6 +119,7 @@ CONFIG_NFT_HASH=m CONFIG_NFT_FIB_INET=m CONFIG_NFT_DUP_NETDEV=m CONFIG_NFT_FWD_NETDEV=m +CONFIG_NFT_FIB_NETDEV=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m @@ -298,6 +299,7 @@ CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m +CONFIG_NET_NSH=m CONFIG_NET_L3_MASTER_DEV=y CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set @@ -376,6 +378,7 @@ CONFIG_VETH=m # CONFIG_NET_CADENCE is not set # CONFIG_NET_VENDOR_BROADCOM is not set # CONFIG_NET_VENDOR_EZCHIP is not set +# CONFIG_NET_VENDOR_HUAWEI is not set CONFIG_BVME6000_NET=y # CONFIG_NET_VENDOR_MARVELL is not set # CONFIG_NET_VENDOR_MICREL is not set @@ -417,6 +420,7 @@ CONFIG_NTP_PPS=y CONFIG_PPS_CLIENT_LDISC=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set +# CONFIG_RC_CORE is not set CONFIG_HID=m CONFIG_HIDRAW=y CONFIG_UHID=m @@ -558,12 +562,10 @@ CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m -CONFIG_CRYPTO_CCM=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m -CONFIG_CRYPTO_CMAC=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_MICHAEL_MIC=m diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index 02b39f50076e..9d597bbbbbfe 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -121,6 +121,7 @@ CONFIG_NFT_HASH=m CONFIG_NFT_FIB_INET=m CONFIG_NFT_DUP_NETDEV=m CONFIG_NFT_FWD_NETDEV=m +CONFIG_NFT_FIB_NETDEV=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m @@ -300,6 +301,7 @@ CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m +CONFIG_NET_NSH=m CONFIG_NET_L3_MASTER_DEV=y CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set @@ -378,6 +380,7 @@ CONFIG_HPLANCE=y # CONFIG_NET_CADENCE is not set # CONFIG_NET_VENDOR_BROADCOM is not set # CONFIG_NET_VENDOR_EZCHIP is not set +# CONFIG_NET_VENDOR_HUAWEI is not set # CONFIG_NET_VENDOR_INTEL is not set # CONFIG_NET_VENDOR_MARVELL is not set # CONFIG_NET_VENDOR_MICREL is not set @@ -422,6 +425,7 @@ CONFIG_NTP_PPS=y CONFIG_PPS_CLIENT_LDISC=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set +# CONFIG_RC_CORE is not set CONFIG_FB=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y @@ -568,12 +572,10 @@ CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m -CONFIG_CRYPTO_CCM=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m -CONFIG_CRYPTO_CMAC=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_MICHAEL_MIC=m diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index 044dcb2bf8fb..45da20d1286c 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -120,6 +120,7 @@ CONFIG_NFT_HASH=m CONFIG_NFT_FIB_INET=m CONFIG_NFT_DUP_NETDEV=m CONFIG_NFT_FWD_NETDEV=m +CONFIG_NFT_FIB_NETDEV=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m @@ -302,6 +303,7 @@ CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m +CONFIG_NET_NSH=m CONFIG_NET_L3_MASTER_DEV=y CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set @@ -395,6 +397,7 @@ CONFIG_MACMACE=y # CONFIG_NET_VENDOR_BROADCOM is not set CONFIG_MAC89x0=y # CONFIG_NET_VENDOR_EZCHIP is not set +# CONFIG_NET_VENDOR_HUAWEI is not set # CONFIG_NET_VENDOR_INTEL is not set # CONFIG_NET_VENDOR_MARVELL is not set # CONFIG_NET_VENDOR_MICREL is not set @@ -444,6 +447,7 @@ CONFIG_NTP_PPS=y CONFIG_PPS_CLIENT_LDISC=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set +# CONFIG_RC_CORE is not set CONFIG_FB=y CONFIG_FB_VALKYRIE=y CONFIG_FB_MAC=y @@ -590,12 +594,10 @@ CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m -CONFIG_CRYPTO_CCM=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m -CONFIG_CRYPTO_CMAC=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_MICHAEL_MIC=m diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index 3ad04682077a..fda880c10861 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -130,6 +130,7 @@ CONFIG_NFT_HASH=m CONFIG_NFT_FIB_INET=m CONFIG_NFT_DUP_NETDEV=m CONFIG_NFT_FWD_NETDEV=m +CONFIG_NFT_FIB_NETDEV=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m @@ -312,6 +313,7 @@ CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m +CONFIG_NET_NSH=m CONFIG_NET_L3_MASTER_DEV=y CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set @@ -436,6 +438,7 @@ CONFIG_MACMACE=y CONFIG_MAC89x0=y # CONFIG_NET_VENDOR_EZCHIP is not set # CONFIG_NET_VENDOR_HP is not set +# CONFIG_NET_VENDOR_HUAWEI is not set CONFIG_BVME6000_NET=y CONFIG_MVME16x_NET=y # CONFIG_NET_VENDOR_MARVELL is not set @@ -501,6 +504,7 @@ CONFIG_PPS_CLIENT_LDISC=m CONFIG_PPS_CLIENT_PARPORT=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set +# CONFIG_RC_CORE is not set CONFIG_FB=y CONFIG_FB_CIRRUS=y CONFIG_FB_AMIGA=y @@ -670,12 +674,10 @@ CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m -CONFIG_CRYPTO_CCM=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m -CONFIG_CRYPTO_CMAC=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_MICHAEL_MIC=m diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index dc2dd61948cd..7d5e4863efec 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -118,6 +118,7 @@ CONFIG_NFT_HASH=m CONFIG_NFT_FIB_INET=m CONFIG_NFT_DUP_NETDEV=m CONFIG_NFT_FWD_NETDEV=m +CONFIG_NFT_FIB_NETDEV=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m @@ -297,6 +298,7 @@ CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m +CONFIG_NET_NSH=m CONFIG_NET_L3_MASTER_DEV=y CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set @@ -376,6 +378,7 @@ CONFIG_MVME147_NET=y # CONFIG_NET_CADENCE is not set # CONFIG_NET_VENDOR_BROADCOM is not set # CONFIG_NET_VENDOR_EZCHIP is not set +# CONFIG_NET_VENDOR_HUAWEI is not set # CONFIG_NET_VENDOR_INTEL is not set # CONFIG_NET_VENDOR_MARVELL is not set # CONFIG_NET_VENDOR_MICREL is not set @@ -417,6 +420,7 @@ CONFIG_NTP_PPS=y CONFIG_PPS_CLIENT_LDISC=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set +# CONFIG_RC_CORE is not set CONFIG_HID=m CONFIG_HIDRAW=y CONFIG_UHID=m @@ -558,12 +562,10 @@ CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m -CONFIG_CRYPTO_CCM=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m -CONFIG_CRYPTO_CMAC=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_MICHAEL_MIC=m diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index 54e7b523fc3d..7763b71a9c49 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -119,6 +119,7 @@ CONFIG_NFT_HASH=m CONFIG_NFT_FIB_INET=m CONFIG_NFT_DUP_NETDEV=m CONFIG_NFT_FWD_NETDEV=m +CONFIG_NFT_FIB_NETDEV=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m @@ -298,6 +299,7 @@ CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m +CONFIG_NET_NSH=m CONFIG_NET_L3_MASTER_DEV=y CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set @@ -376,6 +378,7 @@ CONFIG_VETH=m # CONFIG_NET_CADENCE is not set # CONFIG_NET_VENDOR_BROADCOM is not set # CONFIG_NET_VENDOR_EZCHIP is not set +# CONFIG_NET_VENDOR_HUAWEI is not set CONFIG_MVME16x_NET=y # CONFIG_NET_VENDOR_MARVELL is not set # CONFIG_NET_VENDOR_MICREL is not set @@ -417,6 +420,7 @@ CONFIG_NTP_PPS=y CONFIG_PPS_CLIENT_LDISC=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set +# CONFIG_RC_CORE is not set CONFIG_HID=m CONFIG_HIDRAW=y CONFIG_UHID=m @@ -558,12 +562,10 @@ CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m -CONFIG_CRYPTO_CCM=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m -CONFIG_CRYPTO_CMAC=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_MICHAEL_MIC=m diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index d63d8a15f6db..17eaebfa3e19 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -119,6 +119,7 @@ CONFIG_NFT_HASH=m CONFIG_NFT_FIB_INET=m CONFIG_NFT_DUP_NETDEV=m CONFIG_NFT_FWD_NETDEV=m +CONFIG_NFT_FIB_NETDEV=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m @@ -298,6 +299,7 @@ CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m +CONFIG_NET_NSH=m CONFIG_NET_L3_MASTER_DEV=y CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set @@ -386,6 +388,7 @@ CONFIG_VETH=m # CONFIG_NET_VENDOR_CIRRUS is not set # CONFIG_NET_VENDOR_EZCHIP is not set # CONFIG_NET_VENDOR_HP is not set +# CONFIG_NET_VENDOR_HUAWEI is not set # CONFIG_NET_VENDOR_INTEL is not set # CONFIG_NET_VENDOR_MARVELL is not set # CONFIG_NET_VENDOR_MICREL is not set @@ -434,6 +437,7 @@ CONFIG_PPS_CLIENT_LDISC=m CONFIG_PPS_CLIENT_PARPORT=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set +# CONFIG_RC_CORE is not set CONFIG_FB=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y @@ -581,12 +585,10 @@ CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m -CONFIG_CRYPTO_CCM=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m -CONFIG_CRYPTO_CMAC=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_MICHAEL_MIC=m diff --git a/arch/m68k/configs/stmark2_defconfig b/arch/m68k/configs/stmark2_defconfig new file mode 100644 index 000000000000..55e55dbc2fb6 --- /dev/null +++ b/arch/m68k/configs/stmark2_defconfig @@ -0,0 +1,92 @@ +CONFIG_LOCALVERSION="stmark2-001" +CONFIG_DEFAULT_HOSTNAME="stmark2" +CONFIG_SYSVIPC=y +# CONFIG_FHANDLE is not set +CONFIG_LOG_BUF_SHIFT=14 +CONFIG_NAMESPACES=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="../uClinux-dist/romfs" +# CONFIG_RD_BZIP2 is not set +# CONFIG_RD_LZMA is not set +# CONFIG_RD_XZ is not set +# CONFIG_RD_LZO is not set +# CONFIG_RD_LZ4 is not set +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +# CONFIG_AIO is not set +# CONFIG_ADVISE_SYSCALLS is not set +# CONFIG_MEMBARRIER is not set +CONFIG_EMBEDDED=y +# CONFIG_VM_EVENT_COUNTERS is not set +# CONFIG_COMPAT_BRK is not set +# CONFIG_LBDAF is not set +# CONFIG_BLK_DEV_BSG is not set +CONFIG_BLK_CMDLINE_PARSER=y +# CONFIG_MMU is not set +CONFIG_M5441x=y +CONFIG_CLOCK_FREQ=240000000 +CONFIG_STMARK2=y +CONFIG_RAMBASE=0x40000000 +CONFIG_RAMSIZE=0x8000000 +CONFIG_VECTORBASE=0x40000000 +CONFIG_KERNELBASE=0x40001000 +CONFIG_BINFMT_FLAT=y +CONFIG_BINFMT_MISC=y +# CONFIG_UEVENT_HELPER is not set +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y +CONFIG_FW_LOADER_USER_HELPER_FALLBACK=y +# CONFIG_ALLOW_DEV_COREDUMP is not set +CONFIG_MTD=y +CONFIG_MTD_CMDLINE_PARTS=y +CONFIG_MTD_BLOCK=y +CONFIG_MTD_CFI=y +CONFIG_MTD_JEDECPROBE=y +CONFIG_MTD_CFI_ADV_OPTIONS=y +CONFIG_MTD_CFI_LE_BYTE_SWAP=y +CONFIG_MTD_CFI_GEOMETRY=y +# CONFIG_MTD_CFI_I2 is not set +CONFIG_MTD_CFI_AMDSTD=y +CONFIG_MTD_CFI_STAA=y +CONFIG_MTD_ROM=y +CONFIG_MTD_COMPLEX_MAPPINGS=y +CONFIG_MTD_PLATRAM=y +CONFIG_MTD_M25P80=y +CONFIG_MTD_SPI_NOR=y +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +CONFIG_SERIO_LIBPS2=y +# CONFIG_UNIX98_PTYS is not set +# CONFIG_DEVMEM is not set +CONFIG_SERIAL_MCF=y +CONFIG_SERIAL_MCF_BAUDRATE=115200 +CONFIG_SERIAL_MCF_CONSOLE=y +# CONFIG_HW_RANDOM is not set +CONFIG_SPI=y +CONFIG_SPI_DEBUG=y +CONFIG_SPI_FSL_DSPI=y +CONFIG_DEBUG_GPIO=y +CONFIG_GPIO_SYSFS=y +CONFIG_GPIO_GENERIC_PLATFORM=y +# CONFIG_HWMON is not set +# CONFIG_RC_CORE is not set +# CONFIG_HID is not set +# CONFIG_USB_SUPPORT is not set +# CONFIG_FILE_LOCKING is not set +# CONFIG_DNOTIFY is not set +# CONFIG_INOTIFY_USER is not set +CONFIG_FSCACHE=y +# CONFIG_PROC_SYSCTL is not set +CONFIG_PRINTK_TIME=y +# CONFIG_ENABLE_WARN_DEPRECATED is not set +# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set +CONFIG_SLUB_DEBUG_ON=y +CONFIG_PANIC_ON_OOPS=y +# CONFIG_SCHED_DEBUG is not set +# CONFIG_DEBUG_BUGVERBOSE is not set +CONFIG_BOOTPARAM=y +CONFIG_BOOTPARAM_STRING="console=ttyS0,115200 root=/dev/ram0 rw rootfstype=ramfs rdinit=/bin/init devtmpfs.mount=1" +CONFIG_CRYPTO=y +# CONFIG_CRYPTO_ECHAINIV is not set +CONFIG_CRYPTO_ANSI_CPRNG=y +# CONFIG_CRYPTO_HW is not set +CONFIG_CRC16=y diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index d0924c22f52a..d1cb7a04ae1d 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -116,6 +116,7 @@ CONFIG_NFT_HASH=m CONFIG_NFT_FIB_INET=m CONFIG_NFT_DUP_NETDEV=m CONFIG_NFT_FWD_NETDEV=m +CONFIG_NFT_FIB_NETDEV=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m @@ -295,6 +296,7 @@ CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m +CONFIG_NET_NSH=m CONFIG_NET_L3_MASTER_DEV=y CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set @@ -373,6 +375,7 @@ CONFIG_SUN3LANCE=y # CONFIG_NET_VENDOR_ARC is not set # CONFIG_NET_CADENCE is not set # CONFIG_NET_VENDOR_EZCHIP is not set +# CONFIG_NET_VENDOR_HUAWEI is not set CONFIG_SUN3_82586=y # CONFIG_NET_VENDOR_MARVELL is not set # CONFIG_NET_VENDOR_MICREL is not set @@ -416,6 +419,7 @@ CONFIG_NTP_PPS=y CONFIG_PPS_CLIENT_LDISC=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set +# CONFIG_RC_CORE is not set CONFIG_FB=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y @@ -559,12 +563,10 @@ CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m -CONFIG_CRYPTO_CCM=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m -CONFIG_CRYPTO_CMAC=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_MICHAEL_MIC=m diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index 3001ee1e5dc5..ea3a331c62d5 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -116,6 +116,7 @@ CONFIG_NFT_HASH=m CONFIG_NFT_FIB_INET=m CONFIG_NFT_DUP_NETDEV=m CONFIG_NFT_FWD_NETDEV=m +CONFIG_NFT_FIB_NETDEV=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m @@ -295,6 +296,7 @@ CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m +CONFIG_NET_NSH=m CONFIG_NET_L3_MASTER_DEV=y CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set @@ -374,6 +376,7 @@ CONFIG_SUN3LANCE=y # CONFIG_NET_CADENCE is not set # CONFIG_NET_VENDOR_BROADCOM is not set # CONFIG_NET_VENDOR_EZCHIP is not set +# CONFIG_NET_VENDOR_HUAWEI is not set # CONFIG_NET_VENDOR_INTEL is not set # CONFIG_NET_VENDOR_MARVELL is not set # CONFIG_NET_VENDOR_MICREL is not set @@ -416,6 +419,7 @@ CONFIG_NTP_PPS=y CONFIG_PPS_CLIENT_LDISC=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set +# CONFIG_RC_CORE is not set CONFIG_FB=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y @@ -560,12 +564,10 @@ CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m -CONFIG_CRYPTO_CCM=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m -CONFIG_CRYPTO_CMAC=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_MICHAEL_MIC=m diff --git a/arch/m68k/include/asm/m5441xsim.h b/arch/m68k/include/asm/m5441xsim.h index 4e9095b9480a..c87556d5581c 100644 --- a/arch/m68k/include/asm/m5441xsim.h +++ b/arch/m68k/include/asm/m5441xsim.h @@ -278,4 +278,10 @@ #define MCFGPIO_IRQ_VECBASE (MCFINT_VECBASE - MCFGPIO_IRQ_MIN) #define MCFGPIO_PIN_MAX 87 +/* + * DSPI module. + */ +#define MCFDSPI_BASE0 0xfc05c000 +#define MCF_IRQ_DSPI0 (MCFINT0_VECBASE + MCFINT0_DSPI0) + #endif /* m5441xsim_h */ diff --git a/arch/m68k/include/asm/mac_iop.h b/arch/m68k/include/asm/mac_iop.h index 73dae2abeba3..32f1c79c818f 100644 --- a/arch/m68k/include/asm/mac_iop.h +++ b/arch/m68k/include/asm/mac_iop.h @@ -159,6 +159,7 @@ extern void iop_complete_message(struct iop_msg *); extern void iop_upload_code(uint, __u8 *, uint, __u16); extern void iop_download_code(uint, __u8 *, uint, __u16); extern __u8 *iop_compare_code(uint, __u8 *, uint, __u16); +extern void iop_ism_irq_poll(uint); extern void iop_register_interrupts(void); diff --git a/arch/m68k/include/asm/mcfmmu.h b/arch/m68k/include/asm/mcfmmu.h index 10f9930ec49a..283352ab0d5d 100644 --- a/arch/m68k/include/asm/mcfmmu.h +++ b/arch/m68k/include/asm/mcfmmu.h @@ -106,6 +106,7 @@ static inline void mmu_write(u32 a, u32 v) } void cf_bootmem_alloc(void); +void cf_mmu_context_init(void); int cf_tlb_miss(struct pt_regs *regs, int write, int dtlb, int extension_word); #endif diff --git a/arch/m68k/include/asm/mmu_context.h b/arch/m68k/include/asm/mmu_context.h index 836acea8f758..f5b1852b4663 100644 --- a/arch/m68k/include/asm/mmu_context.h +++ b/arch/m68k/include/asm/mmu_context.h @@ -92,7 +92,6 @@ static inline void activate_mm(struct mm_struct *active_mm, #define deactivate_mm(tsk, mm) do { } while (0) -extern void mmu_context_init(void); #define prepare_arch_switch(next) load_ksp_mmu(next) static inline void load_ksp_mmu(struct task_struct *task) diff --git a/arch/m68k/kernel/setup.c b/arch/m68k/kernel/setup.c index 854e09f403e7..19a92982629a 100644 --- a/arch/m68k/kernel/setup.c +++ b/arch/m68k/kernel/setup.c @@ -4,3 +4,8 @@ #else #include "setup_no.c" #endif + +#if IS_ENABLED(CONFIG_INPUT_M68K_BEEP) +void (*mach_beep)(unsigned int, unsigned int); +EXPORT_SYMBOL(mach_beep); +#endif diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c index 657a9843ebfa..dd25bfc22fb4 100644 --- a/arch/m68k/kernel/setup_mm.c +++ b/arch/m68k/kernel/setup_mm.c @@ -106,10 +106,6 @@ EXPORT_SYMBOL(mach_heartbeat); #ifdef CONFIG_M68K_L2_CACHE void (*mach_l2_flush) (int); #endif -#if IS_ENABLED(CONFIG_INPUT_M68K_BEEP) -void (*mach_beep)(unsigned int, unsigned int); -EXPORT_SYMBOL(mach_beep); -#endif #if defined(CONFIG_ISA) && defined(MULTI_ISA) int isa_type; int isa_sex; @@ -344,6 +340,8 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_COLDFIRE case MACH_M54XX: case MACH_M5441X: + cf_bootmem_alloc(); + cf_mmu_context_init(); config_BSP(NULL, 0); break; #endif diff --git a/arch/m68k/mac/baboon.c b/arch/m68k/mac/baboon.c index 850f0dc284ca..c7ea6475ef9b 100644 --- a/arch/m68k/mac/baboon.c +++ b/arch/m68k/mac/baboon.c @@ -37,7 +37,7 @@ void __init baboon_init(void) baboon = (struct baboon *) BABOON_BASE; baboon_present = 1; - printk("Baboon detected at %p\n", baboon); + pr_debug("Baboon detected at %p\n", baboon); } /* diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c index 22123f7e8f75..16cd5cea5207 100644 --- a/arch/m68k/mac/config.c +++ b/arch/m68k/mac/config.c @@ -898,8 +898,8 @@ static void __init mac_identify(void) mac_bi_data.id, mac_bi_data.cpuid, mac_bi_data.memsize); iop_init(); - via_init(); oss_init(); + via_init(); psc_init(); baboon_init(); diff --git a/arch/m68k/mac/iop.c b/arch/m68k/mac/iop.c index 4c1e606e7d03..9bfa17015768 100644 --- a/arch/m68k/mac/iop.c +++ b/arch/m68k/mac/iop.c @@ -273,10 +273,10 @@ void __init iop_init(void) int i; if (iop_scc_present) { - pr_info("IOP: detected SCC IOP at %p\n", iop_base[IOP_NUM_SCC]); + pr_debug("SCC IOP detected at %p\n", iop_base[IOP_NUM_SCC]); } if (iop_ism_present) { - pr_info("IOP: detected ISM IOP at %p\n", iop_base[IOP_NUM_ISM]); + pr_debug("ISM IOP detected at %p\n", iop_base[IOP_NUM_ISM]); iop_start(iop_base[IOP_NUM_ISM]); iop_alive(iop_base[IOP_NUM_ISM]); /* clears the alive flag */ } @@ -598,3 +598,12 @@ irqreturn_t iop_ism_irq(int irq, void *dev_id) } return IRQ_HANDLED; } + +void iop_ism_irq_poll(uint iop_num) +{ + unsigned long flags; + + local_irq_save(flags); + iop_ism_irq(0, (void *)iop_num); + local_irq_restore(flags); +} diff --git a/arch/m68k/mac/macboing.c b/arch/m68k/mac/macboing.c index fa2b9604fd24..d17668649641 100644 --- a/arch/m68k/mac/macboing.c +++ b/arch/m68k/mac/macboing.c @@ -57,7 +57,7 @@ static void ( *mac_special_bell )( unsigned int, unsigned int, unsigned int ); /* * our timer to start/continue/stop the bell */ -static DEFINE_TIMER(mac_sound_timer, mac_nosound, 0, 0); +static DEFINE_TIMER(mac_sound_timer, mac_nosound); /* * Sort of initialize the sound chip (called from mac_mksound on the first diff --git a/arch/m68k/mac/oss.c b/arch/m68k/mac/oss.c index 34c0993dc689..3f81892527ad 100644 --- a/arch/m68k/mac/oss.c +++ b/arch/m68k/mac/oss.c @@ -32,18 +32,18 @@ volatile struct mac_oss *oss; /* * Initialize the OSS - * - * The OSS "detection" code is actually in via_init() which is always called - * before us. Thus we can count on oss_present being valid on entry. */ void __init oss_init(void) { int i; - if (!oss_present) return; + if (macintosh_config->ident != MAC_MODEL_IIFX) + return; oss = (struct mac_oss *) OSS_BASE; + pr_debug("OSS detected at %p", oss); + oss_present = 1; /* Disable all interrupts. Unlike a VIA it looks like we */ /* do this by setting the source's interrupt level to zero. */ @@ -53,14 +53,6 @@ void __init oss_init(void) } /* - * Initialize OSS for Nubus access - */ - -void __init oss_nubus_init(void) -{ -} - -/* * Handle miscellaneous OSS interrupts. */ diff --git a/arch/m68k/mac/psc.c b/arch/m68k/mac/psc.c index 439a2a2e5874..8d547df4e16c 100644 --- a/arch/m68k/mac/psc.c +++ b/arch/m68k/mac/psc.c @@ -42,7 +42,7 @@ static void psc_debug_dump(void) return; for (i = 0x30 ; i < 0x70 ; i += 0x10) { - printk("PSC #%d: IFR = 0x%02X IER = 0x%02X\n", + printk(KERN_DEBUG "PSC #%d: IFR = 0x%02X IER = 0x%02X\n", i >> 4, (int) psc_read_byte(pIFRbase + i), (int) psc_read_byte(pIERbase + i)); @@ -59,14 +59,12 @@ static __init void psc_dma_die_die_die(void) { int i; - printk("Killing all PSC DMA channels..."); for (i = 0 ; i < 9 ; i++) { psc_write_word(PSC_CTL_BASE + (i << 4), 0x8800); psc_write_word(PSC_CTL_BASE + (i << 4), 0x1000); psc_write_word(PSC_CMD_BASE + (i << 5), 0x1100); psc_write_word(PSC_CMD_BASE + (i << 5) + 0x10, 0x1100); } - printk("done!\n"); } /* @@ -92,7 +90,7 @@ void __init psc_init(void) psc = (void *) PSC_BASE; - printk("PSC detected at %p\n", psc); + pr_debug("PSC detected at %p\n", psc); psc_dma_die_die_die(); diff --git a/arch/m68k/mac/via.c b/arch/m68k/mac/via.c index 9f59a662ace5..acdabbeecfd2 100644 --- a/arch/m68k/mac/via.c +++ b/arch/m68k/mac/via.c @@ -107,6 +107,7 @@ static int gIER,gIFR,gBufA,gBufB; static u8 nubus_disabled; void via_debug_dump(void); +static void via_nubus_init(void); /* * Initialize the VIAs @@ -114,29 +115,25 @@ void via_debug_dump(void); * First we figure out where they actually _are_ as well as what type of * VIA we have for VIA2 (it could be a real VIA or an RBV or even an OSS.) * Then we pretty much clear them out and disable all IRQ sources. - * - * Note: the OSS is actually "detected" here and not in oss_init(). It just - * seems more logical to do it here since via_init() needs to know - * these things anyways. */ void __init via_init(void) { - switch(macintosh_config->via_type) { + via1 = (void *)VIA1_BASE; + pr_debug("VIA1 detected at %p\n", via1); + + if (oss_present) { + via2 = NULL; + rbv_present = 0; + } else { + switch (macintosh_config->via_type) { /* IIci, IIsi, IIvx, IIvi (P6xx), LC series */ case MAC_VIA_IICI: - via1 = (void *) VIA1_BASE; - if (macintosh_config->ident == MAC_MODEL_IIFX) { - via2 = NULL; - rbv_present = 0; - oss_present = 1; - } else { - via2 = (void *) RBV_BASE; - rbv_present = 1; - oss_present = 0; - } + via2 = (void *)RBV_BASE; + pr_debug("VIA2 (RBV) detected at %p\n", via2); + rbv_present = 1; if (macintosh_config->ident == MAC_MODEL_LCIII) { rbv_clear = 0x00; } else { @@ -155,29 +152,19 @@ void __init via_init(void) case MAC_VIA_QUADRA: case MAC_VIA_II: - via1 = (void *) VIA1_BASE; via2 = (void *) VIA2_BASE; + pr_debug("VIA2 detected at %p\n", via2); rbv_present = 0; - oss_present = 0; rbv_clear = 0x00; gIER = vIER; gIFR = vIFR; gBufA = vBufA; gBufB = vBufB; break; + default: panic("UNKNOWN VIA TYPE"); - } - - printk(KERN_INFO "VIA1 at %p is a 6522 or clone\n", via1); - - printk(KERN_INFO "VIA2 at %p is ", via2); - if (rbv_present) { - printk("an RBV\n"); - } else if (oss_present) { - printk("an OSS\n"); - } else { - printk("a 6522 or clone\n"); + } } #ifdef DEBUG_VIA @@ -253,6 +240,8 @@ void __init via_init(void) via2[vACR] &= ~0x03; /* disable port A & B latches */ } + via_nubus_init(); + /* Everything below this point is VIA2 only... */ if (rbv_present) @@ -304,9 +293,9 @@ void via_debug_dump(void) (uint) via1[vDirA], (uint) via1[vDirB], (uint) via1[vACR]); printk(KERN_DEBUG " PCR = 0x%02X IFR = 0x%02X IER = 0x%02X\n", (uint) via1[vPCR], (uint) via1[vIFR], (uint) via1[vIER]); - if (oss_present) { - printk(KERN_DEBUG "VIA2: <OSS>\n"); - } else if (rbv_present) { + if (!via2) + return; + if (rbv_present) { printk(KERN_DEBUG "VIA2: IFR = 0x%02X IER = 0x%02X\n", (uint) via2[rIFR], (uint) via2[rIER]); printk(KERN_DEBUG " SIFR = 0x%02X SIER = 0x%02X\n", @@ -374,7 +363,7 @@ int via_get_cache_disable(void) * Initialize VIA2 for Nubus access */ -void __init via_nubus_init(void) +static void __init via_nubus_init(void) { /* unlock nubus transactions */ diff --git a/arch/m68k/mm/mcfmmu.c b/arch/m68k/mm/mcfmmu.c index 8d1408583cf4..2925d795d71a 100644 --- a/arch/m68k/mm/mcfmmu.c +++ b/arch/m68k/mm/mcfmmu.c @@ -170,7 +170,7 @@ void __init cf_bootmem_alloc(void) max_pfn = max_low_pfn = PFN_DOWN(_ramend); high_memory = (void *)_ramend; - m68k_virt_to_node_shift = fls(_ramend - _rambase - 1) - 6; + m68k_virt_to_node_shift = fls(_ramend - 1) - 6; module_fixup(NULL, __start_fixup, __stop_fixup); /* setup bootmem data */ @@ -184,7 +184,7 @@ void __init cf_bootmem_alloc(void) * Initialize the context management stuff. * The following was taken from arch/ppc/mmu_context.c */ -void __init mmu_context_init(void) +void __init cf_mmu_context_init(void) { /* * Some processors have too few contexts to reserve one for diff --git a/arch/metag/include/asm/spinlock.h b/arch/metag/include/asm/spinlock.h index 349938c35f2d..4497c232d9c1 100644 --- a/arch/metag/include/asm/spinlock.h +++ b/arch/metag/include/asm/spinlock.h @@ -16,13 +16,4 @@ * locked. */ -#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) - -#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) -#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) - -#define arch_spin_relax(lock) cpu_relax() -#define arch_read_relax(lock) cpu_relax() -#define arch_write_relax(lock) cpu_relax() - #endif /* __ASM_SPINLOCK_H */ diff --git a/arch/metag/include/asm/spinlock_lnkget.h b/arch/metag/include/asm/spinlock_lnkget.h index 029935560b7f..dfd780eab350 100644 --- a/arch/metag/include/asm/spinlock_lnkget.h +++ b/arch/metag/include/asm/spinlock_lnkget.h @@ -137,21 +137,6 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) : "memory"); } -/* write_can_lock - would write_trylock() succeed? */ -static inline int arch_write_can_lock(arch_rwlock_t *rw) -{ - int ret; - - asm volatile ("LNKGETD %0, [%1]\n" - "CMP %0, #0\n" - "MOV %0, #1\n" - "XORNZ %0, %0, %0\n" - : "=&d" (ret) - : "da" (&rw->lock) - : "cc"); - return ret; -} - /* * Read locks are a bit more hairy: * - Exclusively load the lock value. @@ -225,26 +210,4 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) return tmp; } -/* read_can_lock - would read_trylock() succeed? */ -static inline int arch_read_can_lock(arch_rwlock_t *rw) -{ - int tmp; - - asm volatile ("LNKGETD %0, [%1]\n" - "CMP %0, %2\n" - "MOV %0, #1\n" - "XORZ %0, %0, %0\n" - : "=&d" (tmp) - : "da" (&rw->lock), "bd" (0x80000000) - : "cc"); - return tmp; -} - -#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) -#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) - -#define arch_spin_relax(lock) cpu_relax() -#define arch_read_relax(lock) cpu_relax() -#define arch_write_relax(lock) cpu_relax() - #endif /* __ASM_SPINLOCK_LNKGET_H */ diff --git a/arch/metag/include/asm/spinlock_lock1.h b/arch/metag/include/asm/spinlock_lock1.h index 12de9862d190..c0bd81bbe18c 100644 --- a/arch/metag/include/asm/spinlock_lock1.h +++ b/arch/metag/include/asm/spinlock_lock1.h @@ -105,16 +105,6 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) rw->lock = 0; } -/* write_can_lock - would write_trylock() succeed? */ -static inline int arch_write_can_lock(arch_rwlock_t *rw) -{ - unsigned int ret; - - barrier(); - ret = rw->lock; - return (ret == 0); -} - /* * Read locks are a bit more hairy: * - Exclusively load the lock value. @@ -172,14 +162,4 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) return (ret < 0x80000000); } -/* read_can_lock - would read_trylock() succeed? */ -static inline int arch_read_can_lock(arch_rwlock_t *rw) -{ - unsigned int ret; - - barrier(); - ret = rw->lock; - return (ret < 0x80000000); -} - #endif /* __ASM_SPINLOCK_LOCK1_H */ diff --git a/arch/mips/ar7/platform.c b/arch/mips/ar7/platform.c index df7acea3747a..4674f1efbe7a 100644 --- a/arch/mips/ar7/platform.c +++ b/arch/mips/ar7/platform.c @@ -575,6 +575,7 @@ static int __init ar7_register_uarts(void) uart_port.type = PORT_AR7; uart_port.uartclk = clk_get_rate(bus_clk) / 2; uart_port.iotype = UPIO_MEM32; + uart_port.flags = UPF_FIXED_TYPE; uart_port.regshift = 2; uart_port.line = 0; @@ -653,6 +654,10 @@ static int __init ar7_register_devices(void) u32 val; int res; + res = ar7_gpio_init(); + if (res) + pr_warn("unable to register gpios: %d\n", res); + res = ar7_register_uarts(); if (res) pr_err("unable to setup uart(s): %d\n", res); diff --git a/arch/mips/ar7/prom.c b/arch/mips/ar7/prom.c index 4fd83336131a..dd53987a690f 100644 --- a/arch/mips/ar7/prom.c +++ b/arch/mips/ar7/prom.c @@ -246,8 +246,6 @@ void __init prom_init(void) ar7_init_cmdline(fw_arg0, (char **)fw_arg1); ar7_init_env((struct env_var *)fw_arg2); console_config(); - - ar7_gpio_init(); } #define PORT(offset) (KSEG1ADDR(AR7_REGS_UART0 + (offset * 4))) diff --git a/arch/mips/include/asm/spinlock.h b/arch/mips/include/asm/spinlock.h index a7d21da16b6a..ee81297d9117 100644 --- a/arch/mips/include/asm/spinlock.h +++ b/arch/mips/include/asm/spinlock.h @@ -13,11 +13,4 @@ #include <asm/qrwlock.h> #include <asm/qspinlock.h> -#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) -#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) - -#define arch_spin_relax(lock) cpu_relax() -#define arch_read_relax(lock) cpu_relax() -#define arch_write_relax(lock) cpu_relax() - #endif /* _ASM_SPINLOCK_H */ diff --git a/arch/mips/include/asm/vdso.h b/arch/mips/include/asm/vdso.h index b7cd6cf77b83..91bf0c2c265c 100644 --- a/arch/mips/include/asm/vdso.h +++ b/arch/mips/include/asm/vdso.h @@ -99,7 +99,7 @@ static inline u32 vdso_data_read_begin(const union mips_vdso_data *data) u32 seq; while (true) { - seq = ACCESS_ONCE(data->seq_count); + seq = READ_ONCE(data->seq_count); if (likely(!(seq & 1))) { /* Paired with smp_wmb() in vdso_data_write_*(). */ smp_rmb(); diff --git a/arch/mips/kernel/pm-cps.c b/arch/mips/kernel/pm-cps.c index 9dd624c2fe56..421e06dfee72 100644 --- a/arch/mips/kernel/pm-cps.c +++ b/arch/mips/kernel/pm-cps.c @@ -166,7 +166,7 @@ int cps_pm_enter_state(enum cps_pm_state state) nc_core_ready_count = nc_addr; /* Ensure ready_count is zero-initialised before the assembly runs */ - ACCESS_ONCE(*nc_core_ready_count) = 0; + WRITE_ONCE(*nc_core_ready_count, 0); coupled_barrier(&per_cpu(pm_barrier, core), online); /* Run the generated entry code */ diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c index 406072e26752..87dcac2447c8 100644 --- a/arch/mips/kernel/smp-bmips.c +++ b/arch/mips/kernel/smp-bmips.c @@ -591,11 +591,11 @@ void __init bmips_cpu_setup(void) /* Flush and enable RAC */ cfg = __raw_readl(cbr + BMIPS_RAC_CONFIG); - __raw_writel(cfg | 0x100, BMIPS_RAC_CONFIG); + __raw_writel(cfg | 0x100, cbr + BMIPS_RAC_CONFIG); __raw_readl(cbr + BMIPS_RAC_CONFIG); cfg = __raw_readl(cbr + BMIPS_RAC_CONFIG); - __raw_writel(cfg | 0xf, BMIPS_RAC_CONFIG); + __raw_writel(cfg | 0xf, cbr + BMIPS_RAC_CONFIG); __raw_readl(cbr + BMIPS_RAC_CONFIG); cfg = __raw_readl(cbr + BMIPS_RAC_ADDRESS_RANGE); diff --git a/arch/mips/mti-malta/malta-display.c b/arch/mips/mti-malta/malta-display.c index d4f807191ecd..063de44675ce 100644 --- a/arch/mips/mti-malta/malta-display.c +++ b/arch/mips/mti-malta/malta-display.c @@ -36,10 +36,10 @@ void mips_display_message(const char *str) } } -static void scroll_display_message(unsigned long data); -static DEFINE_TIMER(mips_scroll_timer, scroll_display_message, HZ, 0); +static void scroll_display_message(unsigned long unused); +static DEFINE_TIMER(mips_scroll_timer, scroll_display_message); -static void scroll_display_message(unsigned long data) +static void scroll_display_message(unsigned long unused) { mips_display_message(&display_string[display_count++]); if (display_count == max_display_count) diff --git a/arch/mips/sgi-ip22/ip22-reset.c b/arch/mips/sgi-ip22/ip22-reset.c index 03a39ac5ead9..c374f3ceec38 100644 --- a/arch/mips/sgi-ip22/ip22-reset.c +++ b/arch/mips/sgi-ip22/ip22-reset.c @@ -38,6 +38,7 @@ #define PANIC_FREQ (HZ / 8) static struct timer_list power_timer, blink_timer, debounce_timer; +static unsigned long blink_timer_timeout; #define MACHINE_PANICED 1 #define MACHINE_SHUTTING_DOWN 2 @@ -81,21 +82,21 @@ static void __noreturn sgi_machine_halt(void) ArcEnterInteractiveMode(); } -static void power_timeout(unsigned long data) +static void power_timeout(struct timer_list *unused) { sgi_machine_power_off(); } -static void blink_timeout(unsigned long data) +static void blink_timeout(struct timer_list *unused) { /* XXX fix this for fullhouse */ sgi_ioc_reset ^= (SGIOC_RESET_LC0OFF|SGIOC_RESET_LC1OFF); sgioc->reset = sgi_ioc_reset; - mod_timer(&blink_timer, jiffies + data); + mod_timer(&blink_timer, jiffies + blink_timer_timeout); } -static void debounce(unsigned long data) +static void debounce(struct timer_list *unused) { del_timer(&debounce_timer); if (sgint->istat1 & SGINT_ISTAT1_PWR) { @@ -128,11 +129,10 @@ static inline void power_button(void) } machine_state |= MACHINE_SHUTTING_DOWN; - blink_timer.data = POWERDOWN_FREQ; - blink_timeout(POWERDOWN_FREQ); + blink_timer_timeout = POWERDOWN_FREQ; + blink_timeout(&blink_timer); - init_timer(&power_timer); - power_timer.function = power_timeout; + timer_setup(&power_timer, power_timeout, 0); power_timer.expires = jiffies + POWERDOWN_TIMEOUT * HZ; add_timer(&power_timer); } @@ -147,8 +147,7 @@ static irqreturn_t panel_int(int irq, void *dev_id) if (sgint->istat1 & SGINT_ISTAT1_PWR) { /* Wait until interrupt goes away */ disable_irq_nosync(SGI_PANEL_IRQ); - init_timer(&debounce_timer); - debounce_timer.function = debounce; + timer_setup(&debounce_timer, debounce, 0); debounce_timer.expires = jiffies + 5; add_timer(&debounce_timer); } @@ -171,8 +170,8 @@ static int panic_event(struct notifier_block *this, unsigned long event, return NOTIFY_DONE; machine_state |= MACHINE_PANICED; - blink_timer.data = PANIC_FREQ; - blink_timeout(PANIC_FREQ); + blink_timer_timeout = PANIC_FREQ; + blink_timeout(&blink_timer); return NOTIFY_DONE; } @@ -195,8 +194,7 @@ static int __init reboot_setup(void) return res; } - init_timer(&blink_timer); - blink_timer.function = blink_timeout; + timer_setup(&blink_timer, blink_timeout, 0); atomic_notifier_chain_register(&panic_notifier_list, &panic_block); return 0; diff --git a/arch/mips/sgi-ip32/ip32-reset.c b/arch/mips/sgi-ip32/ip32-reset.c index b3b442def423..20d8637340be 100644 --- a/arch/mips/sgi-ip32/ip32-reset.c +++ b/arch/mips/sgi-ip32/ip32-reset.c @@ -38,6 +38,7 @@ extern struct platform_device ip32_rtc_device; static struct timer_list power_timer, blink_timer; +static unsigned long blink_timer_timeout; static int has_panicked, shutting_down; static __noreturn void ip32_poweroff(void *data) @@ -71,11 +72,11 @@ static void ip32_machine_restart(char *cmd) unreachable(); } -static void blink_timeout(unsigned long data) +static void blink_timeout(struct timer_list *unused) { unsigned long led = mace->perif.ctrl.misc ^ MACEISA_LED_RED; mace->perif.ctrl.misc = led; - mod_timer(&blink_timer, jiffies + data); + mod_timer(&blink_timer, jiffies + blink_timer_timeout); } static void ip32_machine_halt(void) @@ -83,7 +84,7 @@ static void ip32_machine_halt(void) ip32_poweroff(&ip32_rtc_device); } -static void power_timeout(unsigned long data) +static void power_timeout(struct timer_list *unused) { ip32_poweroff(&ip32_rtc_device); } @@ -99,11 +100,10 @@ void ip32_prepare_poweroff(void) } shutting_down = 1; - blink_timer.data = POWERDOWN_FREQ; - blink_timeout(POWERDOWN_FREQ); + blink_timer_timeout = POWERDOWN_FREQ; + blink_timeout(&blink_timer); - init_timer(&power_timer); - power_timer.function = power_timeout; + timer_setup(&power_timer, power_timeout, 0); power_timer.expires = jiffies + POWERDOWN_TIMEOUT * HZ; add_timer(&power_timer); } @@ -121,8 +121,8 @@ static int panic_event(struct notifier_block *this, unsigned long event, led = mace->perif.ctrl.misc | MACEISA_LED_GREEN; mace->perif.ctrl.misc = led; - blink_timer.data = PANIC_FREQ; - blink_timeout(PANIC_FREQ); + blink_timer_timeout = PANIC_FREQ; + blink_timeout(&blink_timer); return NOTIFY_DONE; } @@ -143,8 +143,7 @@ static __init int ip32_reboot_setup(void) _machine_halt = ip32_machine_halt; pm_power_off = ip32_machine_halt; - init_timer(&blink_timer); - blink_timer.function = blink_timeout; + timer_setup(&blink_timer, blink_timeout, 0); atomic_notifier_chain_register(&panic_notifier_list, &panic_block); return 0; diff --git a/arch/mn10300/include/asm/spinlock.h b/arch/mn10300/include/asm/spinlock.h index fe413b41df6c..879cd0df53ba 100644 --- a/arch/mn10300/include/asm/spinlock.h +++ b/arch/mn10300/include/asm/spinlock.h @@ -84,6 +84,7 @@ static inline void arch_spin_lock_flags(arch_spinlock_t *lock, : "d" (flags), "a"(&lock->slock), "i"(EPSW_IE | MN10300_CLI_LEVEL) : "memory", "cc"); } +#define arch_spin_lock_flags arch_spin_lock_flags #ifdef __KERNEL__ @@ -98,18 +99,6 @@ static inline void arch_spin_lock_flags(arch_spinlock_t *lock, * read-locks. */ -/** - * read_can_lock - would read_trylock() succeed? - * @lock: the rwlock in question. - */ -#define arch_read_can_lock(x) ((int)(x)->lock > 0) - -/** - * write_can_lock - would write_trylock() succeed? - * @lock: the rwlock in question. - */ -#define arch_write_can_lock(x) ((x)->lock == RW_LOCK_BIAS) - /* * On mn10300, we implement read-write locks as a 32-bit counter * with the high bit (sign) being the "contended" bit. @@ -183,9 +172,6 @@ static inline int arch_write_trylock(arch_rwlock_t *lock) return 0; } -#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) -#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) - #define _raw_spin_relax(lock) cpu_relax() #define _raw_read_relax(lock) cpu_relax() #define _raw_write_relax(lock) cpu_relax() diff --git a/arch/mn10300/kernel/mn10300-serial.c b/arch/mn10300/kernel/mn10300-serial.c index 7ecf69879e2d..d7ef1232a82a 100644 --- a/arch/mn10300/kernel/mn10300-serial.c +++ b/arch/mn10300/kernel/mn10300-serial.c @@ -543,7 +543,7 @@ static void mn10300_serial_receive_interrupt(struct mn10300_serial_port *port) try_again: /* pull chars out of the hat */ - ix = ACCESS_ONCE(port->rx_outp); + ix = READ_ONCE(port->rx_outp); if (CIRC_CNT(port->rx_inp, ix, MNSC_BUFFER_SIZE) == 0) { if (push && !tport->low_latency) tty_flip_buffer_push(tport); @@ -1724,7 +1724,7 @@ static int mn10300_serial_poll_get_char(struct uart_port *_port) if (mn10300_serial_int_tbl[port->rx_irq].port != NULL) { do { /* pull chars out of the hat */ - ix = ACCESS_ONCE(port->rx_outp); + ix = READ_ONCE(port->rx_outp); if (CIRC_CNT(port->rx_inp, ix, MNSC_BUFFER_SIZE) == 0) return NO_POLL_CHAR; diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig index df2136ab1dcc..339df7324e9c 100644 --- a/arch/openrisc/Kconfig +++ b/arch/openrisc/Kconfig @@ -22,13 +22,19 @@ config OPENRISC select HAVE_UID16 select GENERIC_ATOMIC64 select GENERIC_CLOCKEVENTS + select GENERIC_CLOCKEVENTS_BROADCAST select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER + select GENERIC_SMP_IDLE_THREAD select MODULES_USE_ELF_RELA select HAVE_DEBUG_STACKOVERFLOW select OR1K_PIC select CPU_NO_EFFICIENT_FFS if !OPENRISC_HAVE_INST_FF1 select NO_BOOTMEM + select ARCH_USE_QUEUED_SPINLOCKS + select ARCH_USE_QUEUED_RWLOCKS + select OMPIC if SMP + select ARCH_WANT_FRAME_POINTERS config CPU_BIG_ENDIAN def_bool y @@ -56,6 +62,12 @@ config TRACE_IRQFLAGS_SUPPORT config GENERIC_CSUM def_bool y +config STACKTRACE_SUPPORT + def_bool y + +config LOCKDEP_SUPPORT + def_bool y + source "init/Kconfig" source "kernel/Kconfig.freezer" @@ -73,6 +85,17 @@ config OR1K_1200 endchoice +config DCACHE_WRITETHROUGH + bool "Have write through data caches" + default n + help + Select this if your implementation features write through data caches. + Selecting 'N' here will allow the kernel to force flushing of data + caches at relevant times. Most OpenRISC implementations support write- + through data caches. + + If unsure say N here + config OPENRISC_BUILTIN_DTB string "Builtin DTB" default "" @@ -105,8 +128,19 @@ config OPENRISC_HAVE_INST_DIV endmenu config NR_CPUS - int - default "1" + int "Maximum number of CPUs (2-32)" + range 2 32 + depends on SMP + default "2" + +config SMP + bool "Symmetric Multi-Processing support" + help + This enables support for systems with more than one CPU. If you have + a system with only one CPU, say N. If you have a system with more + than one CPU, say Y. + + If you don't know what to do here, say N. source kernel/Kconfig.hz source kernel/Kconfig.preempt @@ -125,6 +159,17 @@ config OPENRISC_NO_SPR_SR_DSX Say N here if you know that your OpenRISC processor has SPR_SR_DSX bit implemented. Say Y if you are unsure. +config OPENRISC_HAVE_SHADOW_GPRS + bool "Support for shadow gpr files" if !SMP + default y if SMP + help + Say Y here if your OpenRISC processor features shadowed + register files. They will in such case be used as a + scratch reg storage on exception entry. + + On SMP systems, this feature is mandatory. + On a unicore system it's safe to say N here if you are unsure. + config CMDLINE string "Default kernel command string" default "" diff --git a/arch/openrisc/Makefile b/arch/openrisc/Makefile index 89076a66eee2..cf8802962864 100644 --- a/arch/openrisc/Makefile +++ b/arch/openrisc/Makefile @@ -25,6 +25,7 @@ LDFLAGS_vmlinux := LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name) KBUILD_CFLAGS += -pipe -ffixed-r10 -D__linux__ +CHECKFLAGS += -mbig-endian ifeq ($(CONFIG_OPENRISC_HAVE_INST_MUL),y) KBUILD_CFLAGS += $(call cc-option,-mhard-mul) diff --git a/arch/openrisc/README.openrisc b/arch/openrisc/README.openrisc deleted file mode 100644 index 072069ab5100..000000000000 --- a/arch/openrisc/README.openrisc +++ /dev/null @@ -1,99 +0,0 @@ -OpenRISC Linux -============== - -This is a port of Linux to the OpenRISC class of microprocessors; the initial -target architecture, specifically, is the 32-bit OpenRISC 1000 family (or1k). - -For information about OpenRISC processors and ongoing development: - - website http://openrisc.io - -For more information about Linux on OpenRISC, please contact South Pole AB. - - email: info@southpole.se - - website: http://southpole.se - http://southpoleconsulting.com - ---------------------------------------------------------------------- - -Build instructions for OpenRISC toolchain and Linux -=================================================== - -In order to build and run Linux for OpenRISC, you'll need at least a basic -toolchain and, perhaps, the architectural simulator. Steps to get these bits -in place are outlined here. - -1) The toolchain can be obtained from openrisc.io. Instructions for building -a toolchain can be found at: - -https://github.com/openrisc/tutorials - -2) or1ksim (optional) - -or1ksim is the architectural simulator which will allow you to actually run -your OpenRISC Linux kernel if you don't have an OpenRISC processor at hand. - - git clone https://github.com/openrisc/or1ksim.git - - cd or1ksim - ./configure --prefix=$OPENRISC_PREFIX - make - make install - -3) Linux kernel - -Build the kernel as usual - - make ARCH=openrisc defconfig - make ARCH=openrisc - -4) Run in architectural simulator - -Grab the or1ksim platform configuration file (from the or1ksim source) and -together with your freshly built vmlinux, run your kernel with the following -incantation: - - sim -f arch/openrisc/or1ksim.cfg vmlinux - ---------------------------------------------------------------------- - -Terminology -=========== - -In the code, the following particles are used on symbols to limit the scope -to more or less specific processor implementations: - -openrisc: the OpenRISC class of processors -or1k: the OpenRISC 1000 family of processors -or1200: the OpenRISC 1200 processor - ---------------------------------------------------------------------- - -History -======== - -18. 11. 2003 Matjaz Breskvar (phoenix@bsemi.com) - initial port of linux to OpenRISC/or32 architecture. - all the core stuff is implemented and seams usable. - -08. 12. 2003 Matjaz Breskvar (phoenix@bsemi.com) - complete change of TLB miss handling. - rewrite of exceptions handling. - fully functional sash-3.6 in default initrd. - a much improved version with changes all around. - -10. 04. 2004 Matjaz Breskvar (phoenix@bsemi.com) - alot of bugfixes all over. - ethernet support, functional http and telnet servers. - running many standard linux apps. - -26. 06. 2004 Matjaz Breskvar (phoenix@bsemi.com) - port to 2.6.x - -30. 11. 2004 Matjaz Breskvar (phoenix@bsemi.com) - lots of bugfixes and enhancments. - added opencores framebuffer driver. - -09. 10. 2010 Jonas Bonn (jonas@southpole.se) - major rewrite to bring up to par with upstream Linux 2.6.36 diff --git a/arch/openrisc/TODO.openrisc b/arch/openrisc/TODO.openrisc deleted file mode 100644 index c43d4e1d14eb..000000000000 --- a/arch/openrisc/TODO.openrisc +++ /dev/null @@ -1,12 +0,0 @@ -The OpenRISC Linux port is fully functional and has been tracking upstream -since 2.6.35. There are, however, remaining items to be completed within -the coming months. Here's a list of known-to-be-less-than-stellar items -that are due for investigation shortly, i.e. our TODO list: - --- Implement the rest of the DMA API... dma_map_sg, etc. - --- Finish the renaming cleanup... there are references to or32 in the code - which was an older name for the architecture. The name we've settled on is - or1k and this change is slowly trickling through the stack. For the time - being, or32 is equivalent to or1k. - diff --git a/arch/openrisc/boot/dts/or1ksim.dts b/arch/openrisc/boot/dts/or1ksim.dts index 9f4b856da580..d8aa8309c9d3 100644 --- a/arch/openrisc/boot/dts/or1ksim.dts +++ b/arch/openrisc/boot/dts/or1ksim.dts @@ -6,8 +6,13 @@ #size-cells = <1>; interrupt-parent = <&pic>; + aliases { + uart0 = &serial0; + }; + chosen { - bootargs = "console=uart,mmio,0x90000000,115200"; + bootargs = "earlycon"; + stdout-path = "uart0:115200"; }; memory@0 { diff --git a/arch/openrisc/boot/dts/simple_smp.dts b/arch/openrisc/boot/dts/simple_smp.dts new file mode 100644 index 000000000000..defbb92714ec --- /dev/null +++ b/arch/openrisc/boot/dts/simple_smp.dts @@ -0,0 +1,63 @@ +/dts-v1/; +/ { + compatible = "opencores,or1ksim"; + #address-cells = <1>; + #size-cells = <1>; + interrupt-parent = <&pic>; + + aliases { + uart0 = &serial0; + }; + + chosen { + bootargs = "earlycon"; + stdout-path = "uart0:115200"; + }; + + memory@0 { + device_type = "memory"; + reg = <0x00000000 0x02000000>; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + cpu@0 { + compatible = "opencores,or1200-rtlsvn481"; + reg = <0>; + clock-frequency = <20000000>; + }; + cpu@1 { + compatible = "opencores,or1200-rtlsvn481"; + reg = <1>; + clock-frequency = <20000000>; + }; + }; + + ompic: ompic@98000000 { + compatible = "openrisc,ompic"; + reg = <0x98000000 16>; + interrupt-controller; + #interrupt-cells = <0>; + interrupts = <1>; + }; + + /* + * OR1K PIC is built into CPU and accessed via special purpose + * registers. It is not addressable and, hence, has no 'reg' + * property. + */ + pic: pic { + compatible = "opencores,or1k-pic-level"; + #interrupt-cells = <1>; + interrupt-controller; + }; + + serial0: serial@90000000 { + compatible = "opencores,uart16550-rtlsvn105", "ns16550a"; + reg = <0x90000000 0x100>; + interrupts = <2>; + clock-frequency = <20000000>; + }; + +}; diff --git a/arch/openrisc/configs/simple_smp_defconfig b/arch/openrisc/configs/simple_smp_defconfig new file mode 100644 index 000000000000..b6e3c7e158e7 --- /dev/null +++ b/arch/openrisc/configs/simple_smp_defconfig @@ -0,0 +1,66 @@ +CONFIG_CROSS_COMPILE="or1k-linux-" +CONFIG_LOCALVERSION="-simple-smp" +CONFIG_NO_HZ=y +CONFIG_LOG_BUF_SHIFT=14 +CONFIG_BLK_DEV_INITRD=y +# CONFIG_RD_GZIP is not set +# CONFIG_RD_BZIP2 is not set +# CONFIG_RD_LZMA is not set +# CONFIG_RD_XZ is not set +# CONFIG_RD_LZO is not set +# CONFIG_RD_LZ4 is not set +CONFIG_EXPERT=y +# CONFIG_KALLSYMS is not set +# CONFIG_EPOLL is not set +# CONFIG_TIMERFD is not set +# CONFIG_EVENTFD is not set +# CONFIG_AIO is not set +# CONFIG_VM_EVENT_COUNTERS is not set +# CONFIG_COMPAT_BRK is not set +CONFIG_SLOB=y +CONFIG_MODULES=y +# CONFIG_BLOCK is not set +CONFIG_OPENRISC_BUILTIN_DTB="simple_smp" +CONFIG_SMP=y +CONFIG_HZ_100=y +CONFIG_OPENRISC_HAVE_SHADOW_GPRS=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_INET=y +# CONFIG_INET_XFRM_MODE_TRANSPORT is not set +# CONFIG_INET_XFRM_MODE_TUNNEL is not set +# CONFIG_INET_XFRM_MODE_BEET is not set +# CONFIG_INET_DIAG is not set +CONFIG_TCP_CONG_ADVANCED=y +# CONFIG_TCP_CONG_BIC is not set +# CONFIG_TCP_CONG_CUBIC is not set +# CONFIG_TCP_CONG_WESTWOOD is not set +# CONFIG_TCP_CONG_HTCP is not set +# CONFIG_IPV6 is not set +# CONFIG_WIRELESS is not set +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y +# CONFIG_PREVENT_FIRMWARE_BUILD is not set +# CONFIG_FW_LOADER is not set +CONFIG_NETDEVICES=y +CONFIG_ETHOC=y +CONFIG_MICREL_PHY=y +# CONFIG_WLAN is not set +# CONFIG_INPUT is not set +# CONFIG_SERIO is not set +# CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_OF_PLATFORM=y +# CONFIG_HW_RANDOM is not set +# CONFIG_HWMON is not set +# CONFIG_USB_SUPPORT is not set +# CONFIG_DNOTIFY is not set +CONFIG_TMPFS=y +CONFIG_NFS_FS=y +CONFIG_XZ_DEC=y +# CONFIG_ENABLE_WARN_DEPRECATED is not set +# CONFIG_ENABLE_MUST_CHECK is not set +# CONFIG_RCU_TRACE is not set diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild index 5bea416a7792..6eb16719549e 100644 --- a/arch/openrisc/include/asm/Kbuild +++ b/arch/openrisc/include/asm/Kbuild @@ -1,7 +1,6 @@ generic-y += barrier.h generic-y += bug.h generic-y += bugs.h -generic-y += cacheflush.h generic-y += checksum.h generic-y += clkdev.h generic-y += current.h @@ -28,6 +27,10 @@ generic-y += module.h generic-y += pci.h generic-y += percpu.h generic-y += preempt.h +generic-y += qspinlock_types.h +generic-y += qspinlock.h +generic-y += qrwlock_types.h +generic-y += qrwlock.h generic-y += sections.h generic-y += segment.h generic-y += string.h diff --git a/arch/openrisc/include/asm/cacheflush.h b/arch/openrisc/include/asm/cacheflush.h new file mode 100644 index 000000000000..70f46fd7a074 --- /dev/null +++ b/arch/openrisc/include/asm/cacheflush.h @@ -0,0 +1,96 @@ +/* + * OpenRISC Linux + * + * Linux architectural port borrowing liberally from similar works of + * others. All original copyrights apply as per the original source + * declaration. + * + * OpenRISC implementation: + * Copyright (C) Jan Henrik Weinstock <jan.weinstock@rwth-aachen.de> + * et al. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __ASM_CACHEFLUSH_H +#define __ASM_CACHEFLUSH_H + +#include <linux/mm.h> + +/* + * Helper function for flushing or invalidating entire pages from data + * and instruction caches. SMP needs a little extra work, since we need + * to flush the pages on all cpus. + */ +extern void local_dcache_page_flush(struct page *page); +extern void local_icache_page_inv(struct page *page); + +/* + * Data cache flushing always happen on the local cpu. Instruction cache + * invalidations need to be broadcasted to all other cpu in the system in + * case of SMP configurations. + */ +#ifndef CONFIG_SMP +#define dcache_page_flush(page) local_dcache_page_flush(page) +#define icache_page_inv(page) local_icache_page_inv(page) +#else /* CONFIG_SMP */ +#define dcache_page_flush(page) local_dcache_page_flush(page) +#define icache_page_inv(page) smp_icache_page_inv(page) +extern void smp_icache_page_inv(struct page *page); +#endif /* CONFIG_SMP */ + +/* + * Synchronizes caches. Whenever a cpu writes executable code to memory, this + * should be called to make sure the processor sees the newly written code. + */ +static inline void sync_icache_dcache(struct page *page) +{ + if (!IS_ENABLED(CONFIG_DCACHE_WRITETHROUGH)) + dcache_page_flush(page); + icache_page_inv(page); +} + +/* + * Pages with this bit set need not be flushed/invalidated, since + * they have not changed since last flush. New pages start with + * PG_arch_1 not set and are therefore dirty by default. + */ +#define PG_dc_clean PG_arch_1 + +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 +static inline void flush_dcache_page(struct page *page) +{ + clear_bit(PG_dc_clean, &page->flags); +} + +/* + * Other interfaces are not required since we do not have virtually + * indexed or tagged caches. So we can use the default here. + */ +#define flush_cache_all() do { } while (0) +#define flush_cache_mm(mm) do { } while (0) +#define flush_cache_dup_mm(mm) do { } while (0) +#define flush_cache_range(vma, start, end) do { } while (0) +#define flush_cache_page(vma, vmaddr, pfn) do { } while (0) +#define flush_dcache_mmap_lock(mapping) do { } while (0) +#define flush_dcache_mmap_unlock(mapping) do { } while (0) +#define flush_icache_range(start, end) do { } while (0) +#define flush_icache_page(vma, pg) do { } while (0) +#define flush_icache_user_range(vma, pg, adr, len) do { } while (0) +#define flush_cache_vmap(start, end) do { } while (0) +#define flush_cache_vunmap(start, end) do { } while (0) + +#define copy_to_user_page(vma, page, vaddr, dst, src, len) \ + do { \ + memcpy(dst, src, len); \ + if (vma->vm_flags & VM_EXEC) \ + sync_icache_dcache(page); \ + } while (0) + +#define copy_from_user_page(vma, page, vaddr, dst, src, len) \ + memcpy(dst, src, len) + +#endif /* __ASM_CACHEFLUSH_H */ diff --git a/arch/openrisc/include/asm/cmpxchg.h b/arch/openrisc/include/asm/cmpxchg.h index f0a5d8b844d6..d29f7db53906 100644 --- a/arch/openrisc/include/asm/cmpxchg.h +++ b/arch/openrisc/include/asm/cmpxchg.h @@ -1,32 +1,29 @@ /* + * 1,2 and 4 byte cmpxchg and xchg implementations for OpenRISC. + * * Copyright (C) 2014 Stefan Kristiansson <stefan.kristiansson@saunalahti.fi> + * Copyright (C) 2017 Stafford Horne <shorne@gmail.com> * * This file is licensed under the terms of the GNU General Public License * version 2. This program is licensed "as is" without any warranty of any * kind, whether express or implied. + * + * Note: + * The portable implementations of 1 and 2 byte xchg and cmpxchg using a 4 + * byte cmpxchg is sourced heavily from the sh and mips implementations. */ #ifndef __ASM_OPENRISC_CMPXCHG_H #define __ASM_OPENRISC_CMPXCHG_H #include <linux/types.h> - -/* - * This function doesn't exist, so you'll get a linker error - * if something tries to do an invalid cmpxchg(). - */ -extern void __cmpxchg_called_with_bad_pointer(void); +#include <linux/bitops.h> #define __HAVE_ARCH_CMPXCHG 1 -static inline unsigned long -__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size) +static inline unsigned long cmpxchg_u32(volatile void *ptr, + unsigned long old, unsigned long new) { - if (size != 4) { - __cmpxchg_called_with_bad_pointer(); - return old; - } - __asm__ __volatile__( "1: l.lwa %0, 0(%1) \n" " l.sfeq %0, %2 \n" @@ -43,6 +40,97 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size) return old; } +static inline unsigned long xchg_u32(volatile void *ptr, + unsigned long val) +{ + __asm__ __volatile__( + "1: l.lwa %0, 0(%1) \n" + " l.swa 0(%1), %2 \n" + " l.bnf 1b \n" + " l.nop \n" + : "=&r"(val) + : "r"(ptr), "r"(val) + : "cc", "memory"); + + return val; +} + +static inline u32 cmpxchg_small(volatile void *ptr, u32 old, u32 new, + int size) +{ + int off = (unsigned long)ptr % sizeof(u32); + volatile u32 *p = ptr - off; +#ifdef __BIG_ENDIAN + int bitoff = (sizeof(u32) - size - off) * BITS_PER_BYTE; +#else + int bitoff = off * BITS_PER_BYTE; +#endif + u32 bitmask = ((0x1 << size * BITS_PER_BYTE) - 1) << bitoff; + u32 load32, old32, new32; + u32 ret; + + load32 = READ_ONCE(*p); + + while (true) { + ret = (load32 & bitmask) >> bitoff; + if (old != ret) + return ret; + + old32 = (load32 & ~bitmask) | (old << bitoff); + new32 = (load32 & ~bitmask) | (new << bitoff); + + /* Do 32 bit cmpxchg */ + load32 = cmpxchg_u32(p, old32, new32); + if (load32 == old32) + return old; + } +} + +/* xchg */ + +static inline u32 xchg_small(volatile void *ptr, u32 x, int size) +{ + int off = (unsigned long)ptr % sizeof(u32); + volatile u32 *p = ptr - off; +#ifdef __BIG_ENDIAN + int bitoff = (sizeof(u32) - size - off) * BITS_PER_BYTE; +#else + int bitoff = off * BITS_PER_BYTE; +#endif + u32 bitmask = ((0x1 << size * BITS_PER_BYTE) - 1) << bitoff; + u32 oldv, newv; + u32 ret; + + do { + oldv = READ_ONCE(*p); + ret = (oldv & bitmask) >> bitoff; + newv = (oldv & ~bitmask) | (x << bitoff); + } while (cmpxchg_u32(p, oldv, newv) != oldv); + + return ret; +} + +/* + * This function doesn't exist, so you'll get a linker error + * if something tries to do an invalid cmpxchg(). + */ +extern unsigned long __cmpxchg_called_with_bad_pointer(void) + __compiletime_error("Bad argument size for cmpxchg"); + +static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, + unsigned long new, int size) +{ + switch (size) { + case 1: + case 2: + return cmpxchg_small(ptr, old, new, size); + case 4: + return cmpxchg_u32(ptr, old, new); + default: + return __cmpxchg_called_with_bad_pointer(); + } +} + #define cmpxchg(ptr, o, n) \ ({ \ (__typeof__(*(ptr))) __cmpxchg((ptr), \ @@ -55,32 +143,27 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size) * This function doesn't exist, so you'll get a linker error if * something tries to do an invalidly-sized xchg(). */ -extern void __xchg_called_with_bad_pointer(void); +extern unsigned long __xchg_called_with_bad_pointer(void) + __compiletime_error("Bad argument size for xchg"); -static inline unsigned long __xchg(unsigned long val, volatile void *ptr, - int size) +static inline unsigned long __xchg(volatile void *ptr, unsigned long with, + int size) { - if (size != 4) { - __xchg_called_with_bad_pointer(); - return val; + switch (size) { + case 1: + case 2: + return xchg_small(ptr, with, size); + case 4: + return xchg_u32(ptr, with); + default: + return __xchg_called_with_bad_pointer(); } - - __asm__ __volatile__( - "1: l.lwa %0, 0(%1) \n" - " l.swa 0(%1), %2 \n" - " l.bnf 1b \n" - " l.nop \n" - : "=&r"(val) - : "r"(ptr), "r"(val) - : "cc", "memory"); - - return val; } #define xchg(ptr, with) \ ({ \ - (__typeof__(*(ptr))) __xchg((unsigned long)(with), \ - (ptr), \ + (__typeof__(*(ptr))) __xchg((ptr), \ + (unsigned long)(with), \ sizeof(*(ptr))); \ }) diff --git a/arch/openrisc/include/asm/cpuinfo.h b/arch/openrisc/include/asm/cpuinfo.h index ec10679d6429..4ea0a33eba6c 100644 --- a/arch/openrisc/include/asm/cpuinfo.h +++ b/arch/openrisc/include/asm/cpuinfo.h @@ -19,7 +19,7 @@ #ifndef __ASM_OPENRISC_CPUINFO_H #define __ASM_OPENRISC_CPUINFO_H -struct cpuinfo { +struct cpuinfo_or1k { u32 clock_frequency; u32 icache_size; @@ -29,8 +29,11 @@ struct cpuinfo { u32 dcache_size; u32 dcache_block_size; u32 dcache_ways; + + u16 coreid; }; -extern struct cpuinfo cpuinfo; +extern struct cpuinfo_or1k cpuinfo_or1k[NR_CPUS]; +extern void setup_cpuinfo(void); #endif /* __ASM_OPENRISC_CPUINFO_H */ diff --git a/arch/openrisc/include/asm/mmu_context.h b/arch/openrisc/include/asm/mmu_context.h index e94b814d2e3c..c380d8caf84f 100644 --- a/arch/openrisc/include/asm/mmu_context.h +++ b/arch/openrisc/include/asm/mmu_context.h @@ -34,7 +34,7 @@ extern void switch_mm(struct mm_struct *prev, struct mm_struct *next, * registers like cr3 on the i386 */ -extern volatile pgd_t *current_pgd; /* defined in arch/openrisc/mm/fault.c */ +extern volatile pgd_t *current_pgd[]; /* defined in arch/openrisc/mm/fault.c */ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h index 71a6f08de8f2..21c71303012f 100644 --- a/arch/openrisc/include/asm/pgtable.h +++ b/arch/openrisc/include/asm/pgtable.h @@ -94,7 +94,7 @@ extern void paging_init(void); * 64 MB of vmalloc area is comparable to what's available on other arches. */ -#define VMALLOC_START (PAGE_OFFSET-0x04000000) +#define VMALLOC_START (PAGE_OFFSET-0x04000000UL) #define VMALLOC_END (PAGE_OFFSET) #define VMALLOC_VMADDR(x) ((unsigned long)(x)) @@ -416,15 +416,19 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; /* defined in head.S */ struct vm_area_struct; -/* - * or32 doesn't have any external MMU info: the kernel page - * tables contain all the necessary information. - * - * Actually I am not sure on what this could be used for. - */ +static inline void update_tlb(struct vm_area_struct *vma, + unsigned long address, pte_t *pte) +{ +} + +extern void update_cache(struct vm_area_struct *vma, + unsigned long address, pte_t *pte); + static inline void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *pte) { + update_tlb(vma, address, pte); + update_cache(vma, address, pte); } /* __PHX__ FIXME, SWAP, this probably doesn't work */ diff --git a/arch/openrisc/include/asm/serial.h b/arch/openrisc/include/asm/serial.h index 270a45241639..cb5932f5447a 100644 --- a/arch/openrisc/include/asm/serial.h +++ b/arch/openrisc/include/asm/serial.h @@ -29,7 +29,7 @@ * it needs to be correct to get the early console working. */ -#define BASE_BAUD (cpuinfo.clock_frequency/16) +#define BASE_BAUD (cpuinfo_or1k[smp_processor_id()].clock_frequency/16) #endif /* __KERNEL__ */ diff --git a/arch/openrisc/include/asm/smp.h b/arch/openrisc/include/asm/smp.h new file mode 100644 index 000000000000..e21d2f12b5b6 --- /dev/null +++ b/arch/openrisc/include/asm/smp.h @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2014 Stefan Kristiansson <stefan.kristiansson@saunalahti.fi> + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#ifndef __ASM_OPENRISC_SMP_H +#define __ASM_OPENRISC_SMP_H + +#include <asm/spr.h> +#include <asm/spr_defs.h> + +#define raw_smp_processor_id() (current_thread_info()->cpu) +#define hard_smp_processor_id() mfspr(SPR_COREID) + +extern void smp_init_cpus(void); + +extern void arch_send_call_function_single_ipi(int cpu); +extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); + +extern void set_smp_cross_call(void (*)(const struct cpumask *, unsigned int)); +extern void handle_IPI(unsigned int ipi_msg); + +#endif /* __ASM_OPENRISC_SMP_H */ diff --git a/arch/openrisc/include/asm/spinlock.h b/arch/openrisc/include/asm/spinlock.h index fd00a3a24123..9b761e0e22c3 100644 --- a/arch/openrisc/include/asm/spinlock.h +++ b/arch/openrisc/include/asm/spinlock.h @@ -19,6 +19,16 @@ #ifndef __ASM_OPENRISC_SPINLOCK_H #define __ASM_OPENRISC_SPINLOCK_H -#error "or32 doesn't do SMP yet" +#include <asm/qspinlock.h> + +#include <asm/qrwlock.h> + +#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) +#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) + +#define arch_spin_relax(lock) cpu_relax() +#define arch_read_relax(lock) cpu_relax() +#define arch_write_relax(lock) cpu_relax() + #endif diff --git a/arch/openrisc/include/asm/spinlock_types.h b/arch/openrisc/include/asm/spinlock_types.h new file mode 100644 index 000000000000..7c6fb1208c88 --- /dev/null +++ b/arch/openrisc/include/asm/spinlock_types.h @@ -0,0 +1,7 @@ +#ifndef _ASM_OPENRISC_SPINLOCK_TYPES_H +#define _ASM_OPENRISC_SPINLOCK_TYPES_H + +#include <asm/qspinlock_types.h> +#include <asm/qrwlock_types.h> + +#endif /* _ASM_OPENRISC_SPINLOCK_TYPES_H */ diff --git a/arch/openrisc/include/asm/spr_defs.h b/arch/openrisc/include/asm/spr_defs.h index 367dac70326a..154b5a1ee579 100644 --- a/arch/openrisc/include/asm/spr_defs.h +++ b/arch/openrisc/include/asm/spr_defs.h @@ -51,6 +51,11 @@ #define SPR_ICCFGR (SPRGROUP_SYS + 6) #define SPR_DCFGR (SPRGROUP_SYS + 7) #define SPR_PCCFGR (SPRGROUP_SYS + 8) +#define SPR_VR2 (SPRGROUP_SYS + 9) +#define SPR_AVR (SPRGROUP_SYS + 10) +#define SPR_EVBAR (SPRGROUP_SYS + 11) +#define SPR_AECR (SPRGROUP_SYS + 12) +#define SPR_AESR (SPRGROUP_SYS + 13) #define SPR_NPC (SPRGROUP_SYS + 16) /* CZ 21/06/01 */ #define SPR_SR (SPRGROUP_SYS + 17) /* CZ 21/06/01 */ #define SPR_PPC (SPRGROUP_SYS + 18) /* CZ 21/06/01 */ @@ -61,6 +66,8 @@ #define SPR_EEAR_LAST (SPRGROUP_SYS + 63) #define SPR_ESR_BASE (SPRGROUP_SYS + 64) #define SPR_ESR_LAST (SPRGROUP_SYS + 79) +#define SPR_COREID (SPRGROUP_SYS + 128) +#define SPR_NUMCORES (SPRGROUP_SYS + 129) #define SPR_GPR_BASE (SPRGROUP_SYS + 1024) /* Data MMU group */ @@ -135,12 +142,19 @@ #define SPR_VR_CFG 0x00ff0000 /* Processor configuration */ #define SPR_VR_RES 0x0000ffc0 /* Reserved */ #define SPR_VR_REV 0x0000003f /* Processor revision */ +#define SPR_VR_UVRP 0x00000040 /* Updated Version Registers Present */ #define SPR_VR_VER_OFF 24 #define SPR_VR_CFG_OFF 16 #define SPR_VR_REV_OFF 0 /* + * Bit definitions for the Version Register 2 + */ +#define SPR_VR2_CPUID 0xff000000 /* Processor ID */ +#define SPR_VR2_VER 0x00ffffff /* Processor version */ + +/* * Bit definitions for the Unit Present Register * */ diff --git a/arch/openrisc/include/asm/thread_info.h b/arch/openrisc/include/asm/thread_info.h index 6e619a79a401..c229aa6bb502 100644 --- a/arch/openrisc/include/asm/thread_info.h +++ b/arch/openrisc/include/asm/thread_info.h @@ -74,7 +74,7 @@ struct thread_info { .task = &tsk, \ .flags = 0, \ .cpu = 0, \ - .preempt_count = 1, \ + .preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ .ksp = 0, \ } diff --git a/arch/openrisc/include/asm/time.h b/arch/openrisc/include/asm/time.h new file mode 100644 index 000000000000..313ee975774b --- /dev/null +++ b/arch/openrisc/include/asm/time.h @@ -0,0 +1,23 @@ +/* + * OpenRISC timer API + * + * Copyright (C) 2017 by Stafford Horne (shorne@gmail.com) + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#ifndef __ASM_OR1K_TIME_H +#define __ASM_OR1K_TIME_H + +extern void openrisc_clockevent_init(void); + +extern void openrisc_timer_set(unsigned long count); +extern void openrisc_timer_set_next(unsigned long delta); + +#ifdef CONFIG_SMP +extern void synchronise_count_master(int cpu); +extern void synchronise_count_slave(int cpu); +#endif + +#endif /* __ASM_OR1K_TIME_H */ diff --git a/arch/openrisc/include/asm/tlbflush.h b/arch/openrisc/include/asm/tlbflush.h index 6a2accd6cb67..94227f0eaf6d 100644 --- a/arch/openrisc/include/asm/tlbflush.h +++ b/arch/openrisc/include/asm/tlbflush.h @@ -33,13 +33,26 @@ * - flush_tlb_page(vma, vmaddr) flushes one page * - flush_tlb_range(mm, start, end) flushes a range of pages */ +extern void local_flush_tlb_all(void); +extern void local_flush_tlb_mm(struct mm_struct *mm); +extern void local_flush_tlb_page(struct vm_area_struct *vma, + unsigned long addr); +extern void local_flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, + unsigned long end); -void flush_tlb_all(void); -void flush_tlb_mm(struct mm_struct *mm); -void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr); -void flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, - unsigned long end); +#ifndef CONFIG_SMP +#define flush_tlb_all local_flush_tlb_all +#define flush_tlb_mm local_flush_tlb_mm +#define flush_tlb_page local_flush_tlb_page +#define flush_tlb_range local_flush_tlb_range +#else +extern void flush_tlb_all(void); +extern void flush_tlb_mm(struct mm_struct *mm); +extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr); +extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end); +#endif static inline void flush_tlb(void) { diff --git a/arch/openrisc/include/asm/unwinder.h b/arch/openrisc/include/asm/unwinder.h new file mode 100644 index 000000000000..165ec6f02ab8 --- /dev/null +++ b/arch/openrisc/include/asm/unwinder.h @@ -0,0 +1,20 @@ +/* + * OpenRISC unwinder.h + * + * Architecture API for unwinding stacks. + * + * Copyright (C) 2017 Stafford Horne <shorne@gmail.com> + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#ifndef __ASM_OPENRISC_UNWINDER_H +#define __ASM_OPENRISC_UNWINDER_H + +void unwind_stack(void *data, unsigned long *stack, + void (*trace)(void *data, unsigned long addr, + int reliable)); + +#endif /* __ASM_OPENRISC_UNWINDER_H */ diff --git a/arch/openrisc/kernel/Makefile b/arch/openrisc/kernel/Makefile index c4ea6cabad46..2d172e79f58d 100644 --- a/arch/openrisc/kernel/Makefile +++ b/arch/openrisc/kernel/Makefile @@ -7,8 +7,10 @@ extra-y := head.o vmlinux.lds obj-y := setup.o or32_ksyms.o process.o dma.o \ traps.o time.o irq.o entry.o ptrace.o signal.o \ - sys_call_table.o + sys_call_table.o unwinder.o +obj-$(CONFIG_SMP) += smp.o sync-timer.o +obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_OF) += prom.o diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c index b10369b7e31b..a945f00011b4 100644 --- a/arch/openrisc/kernel/dma.c +++ b/arch/openrisc/kernel/dma.c @@ -32,6 +32,7 @@ page_set_nocache(pte_t *pte, unsigned long addr, unsigned long next, struct mm_walk *walk) { unsigned long cl; + struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()]; pte_val(*pte) |= _PAGE_CI; @@ -42,7 +43,7 @@ page_set_nocache(pte_t *pte, unsigned long addr, flush_tlb_page(NULL, addr); /* Flush page out of dcache */ - for (cl = __pa(addr); cl < __pa(next); cl += cpuinfo.dcache_block_size) + for (cl = __pa(addr); cl < __pa(next); cl += cpuinfo->dcache_block_size) mtspr(SPR_DCBFR, cl); return 0; @@ -140,6 +141,7 @@ or1k_map_page(struct device *dev, struct page *page, { unsigned long cl; dma_addr_t addr = page_to_phys(page) + offset; + struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()]; if (attrs & DMA_ATTR_SKIP_CPU_SYNC) return addr; @@ -148,13 +150,13 @@ or1k_map_page(struct device *dev, struct page *page, case DMA_TO_DEVICE: /* Flush the dcache for the requested range */ for (cl = addr; cl < addr + size; - cl += cpuinfo.dcache_block_size) + cl += cpuinfo->dcache_block_size) mtspr(SPR_DCBFR, cl); break; case DMA_FROM_DEVICE: /* Invalidate the dcache for the requested range */ for (cl = addr; cl < addr + size; - cl += cpuinfo.dcache_block_size) + cl += cpuinfo->dcache_block_size) mtspr(SPR_DCBIR, cl); break; default: @@ -213,9 +215,10 @@ or1k_sync_single_for_cpu(struct device *dev, { unsigned long cl; dma_addr_t addr = dma_handle; + struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()]; /* Invalidate the dcache for the requested range */ - for (cl = addr; cl < addr + size; cl += cpuinfo.dcache_block_size) + for (cl = addr; cl < addr + size; cl += cpuinfo->dcache_block_size) mtspr(SPR_DCBIR, cl); } @@ -226,9 +229,10 @@ or1k_sync_single_for_device(struct device *dev, { unsigned long cl; dma_addr_t addr = dma_handle; + struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()]; /* Flush the dcache for the requested range */ - for (cl = addr; cl < addr + size; cl += cpuinfo.dcache_block_size) + for (cl = addr; cl < addr + size; cl += cpuinfo->dcache_block_size) mtspr(SPR_DCBFR, cl); } diff --git a/arch/openrisc/kernel/entry.S b/arch/openrisc/kernel/entry.S index 1b7160c79646..690d55272ba6 100644 --- a/arch/openrisc/kernel/entry.S +++ b/arch/openrisc/kernel/entry.S @@ -42,6 +42,61 @@ /* =========================================================[ macros ]=== */ +#ifdef CONFIG_TRACE_IRQFLAGS +/* + * Trace irq on/off creating a stack frame. + */ +#define TRACE_IRQS_OP(trace_op) \ + l.sw -8(r1),r2 /* store frame pointer */ ;\ + l.sw -4(r1),r9 /* store return address */ ;\ + l.addi r2,r1,0 /* move sp to fp */ ;\ + l.jal trace_op ;\ + l.addi r1,r1,-8 ;\ + l.ori r1,r2,0 /* restore sp */ ;\ + l.lwz r9,-4(r1) /* restore return address */ ;\ + l.lwz r2,-8(r1) /* restore fp */ ;\ +/* + * Trace irq on/off and save registers we need that would otherwise be + * clobbered. + */ +#define TRACE_IRQS_SAVE(t1,trace_op) \ + l.sw -12(r1),t1 /* save extra reg */ ;\ + l.sw -8(r1),r2 /* store frame pointer */ ;\ + l.sw -4(r1),r9 /* store return address */ ;\ + l.addi r2,r1,0 /* move sp to fp */ ;\ + l.jal trace_op ;\ + l.addi r1,r1,-12 ;\ + l.ori r1,r2,0 /* restore sp */ ;\ + l.lwz r9,-4(r1) /* restore return address */ ;\ + l.lwz r2,-8(r1) /* restore fp */ ;\ + l.lwz t1,-12(r1) /* restore extra reg */ + +#define TRACE_IRQS_OFF TRACE_IRQS_OP(trace_hardirqs_off) +#define TRACE_IRQS_ON TRACE_IRQS_OP(trace_hardirqs_on) +#define TRACE_IRQS_ON_SYSCALL \ + TRACE_IRQS_SAVE(r10,trace_hardirqs_on) ;\ + l.lwz r3,PT_GPR3(r1) ;\ + l.lwz r4,PT_GPR4(r1) ;\ + l.lwz r5,PT_GPR5(r1) ;\ + l.lwz r6,PT_GPR6(r1) ;\ + l.lwz r7,PT_GPR7(r1) ;\ + l.lwz r8,PT_GPR8(r1) ;\ + l.lwz r11,PT_GPR11(r1) +#define TRACE_IRQS_OFF_ENTRY \ + l.lwz r5,PT_SR(r1) ;\ + l.andi r3,r5,(SPR_SR_IEE|SPR_SR_TEE) ;\ + l.sfeq r5,r0 /* skip trace if irqs were already off */;\ + l.bf 1f ;\ + l.nop ;\ + TRACE_IRQS_SAVE(r4,trace_hardirqs_off) ;\ +1: +#else +#define TRACE_IRQS_OFF +#define TRACE_IRQS_ON +#define TRACE_IRQS_OFF_ENTRY +#define TRACE_IRQS_ON_SYSCALL +#endif + /* * We need to disable interrupts at beginning of RESTORE_ALL * since interrupt might come in after we've loaded EPC return address @@ -124,6 +179,7 @@ handler: ;\ /* r30 already save */ ;\ /* l.sw PT_GPR30(r1),r30*/ ;\ l.sw PT_GPR31(r1),r31 ;\ + TRACE_IRQS_OFF_ENTRY ;\ /* Store -1 in orig_gpr11 for non-syscall exceptions */ ;\ l.addi r30,r0,-1 ;\ l.sw PT_ORIG_GPR11(r1),r30 @@ -557,9 +613,6 @@ _string_syscall_return: .align 4 ENTRY(_sys_call_handler) - /* syscalls run with interrupts enabled */ - ENABLE_INTERRUPTS(r29) // enable interrupts, r29 is temp - /* r1, EPCR, ESR a already saved */ l.sw PT_GPR2(r1),r2 /* r3-r8 must be saved because syscall restart relies @@ -597,6 +650,10 @@ ENTRY(_sys_call_handler) /* l.sw PT_GPR30(r1),r30 */ _syscall_check_trace_enter: + /* syscalls run with interrupts enabled */ + TRACE_IRQS_ON_SYSCALL + ENABLE_INTERRUPTS(r29) // enable interrupts, r29 is temp + /* If TIF_SYSCALL_TRACE is set, then we want to do syscall tracing */ l.lwz r30,TI_FLAGS(r10) l.andi r30,r30,_TIF_SYSCALL_TRACE @@ -657,6 +714,7 @@ _syscall_check_trace_leave: _syscall_check_work: /* Here we need to disable interrupts */ DISABLE_INTERRUPTS(r27,r29) + TRACE_IRQS_OFF l.lwz r30,TI_FLAGS(r10) l.andi r30,r30,_TIF_WORK_MASK l.sfne r30,r0 @@ -871,6 +929,7 @@ UNHANDLED_EXCEPTION(_vector_0x1f00,0x1f00) _resume_userspace: DISABLE_INTERRUPTS(r3,r4) + TRACE_IRQS_OFF l.lwz r4,TI_FLAGS(r10) l.andi r13,r4,_TIF_WORK_MASK l.sfeqi r13,0 @@ -909,6 +968,15 @@ _work_pending: l.lwz r8,PT_GPR8(r1) _restore_all: +#ifdef CONFIG_TRACE_IRQFLAGS + l.lwz r4,PT_SR(r1) + l.andi r3,r4,(SPR_SR_IEE|SPR_SR_TEE) + l.sfeq r3,r0 /* skip trace if irqs were off */ + l.bf skip_hardirqs_on + l.nop + TRACE_IRQS_ON +skip_hardirqs_on: +#endif RESTORE_ALL /* This returns to userspace code */ diff --git a/arch/openrisc/kernel/head.S b/arch/openrisc/kernel/head.S index 1e87913576e3..fb02b2a1d6f2 100644 --- a/arch/openrisc/kernel/head.S +++ b/arch/openrisc/kernel/head.S @@ -49,9 +49,31 @@ /* ============================================[ tmp store locations ]=== */ +#define SPR_SHADOW_GPR(x) ((x) + SPR_GPR_BASE + 32) + /* * emergency_print temporary stores */ +#ifdef CONFIG_OPENRISC_HAVE_SHADOW_GPRS +#define EMERGENCY_PRINT_STORE_GPR4 l.mtspr r0,r4,SPR_SHADOW_GPR(14) +#define EMERGENCY_PRINT_LOAD_GPR4 l.mfspr r4,r0,SPR_SHADOW_GPR(14) + +#define EMERGENCY_PRINT_STORE_GPR5 l.mtspr r0,r5,SPR_SHADOW_GPR(15) +#define EMERGENCY_PRINT_LOAD_GPR5 l.mfspr r5,r0,SPR_SHADOW_GPR(15) + +#define EMERGENCY_PRINT_STORE_GPR6 l.mtspr r0,r6,SPR_SHADOW_GPR(16) +#define EMERGENCY_PRINT_LOAD_GPR6 l.mfspr r6,r0,SPR_SHADOW_GPR(16) + +#define EMERGENCY_PRINT_STORE_GPR7 l.mtspr r0,r7,SPR_SHADOW_GPR(7) +#define EMERGENCY_PRINT_LOAD_GPR7 l.mfspr r7,r0,SPR_SHADOW_GPR(7) + +#define EMERGENCY_PRINT_STORE_GPR8 l.mtspr r0,r8,SPR_SHADOW_GPR(8) +#define EMERGENCY_PRINT_LOAD_GPR8 l.mfspr r8,r0,SPR_SHADOW_GPR(8) + +#define EMERGENCY_PRINT_STORE_GPR9 l.mtspr r0,r9,SPR_SHADOW_GPR(9) +#define EMERGENCY_PRINT_LOAD_GPR9 l.mfspr r9,r0,SPR_SHADOW_GPR(9) + +#else /* !CONFIG_OPENRISC_HAVE_SHADOW_GPRS */ #define EMERGENCY_PRINT_STORE_GPR4 l.sw 0x20(r0),r4 #define EMERGENCY_PRINT_LOAD_GPR4 l.lwz r4,0x20(r0) @@ -70,13 +92,28 @@ #define EMERGENCY_PRINT_STORE_GPR9 l.sw 0x34(r0),r9 #define EMERGENCY_PRINT_LOAD_GPR9 l.lwz r9,0x34(r0) +#endif /* * TLB miss handlers temorary stores */ -#define EXCEPTION_STORE_GPR9 l.sw 0x10(r0),r9 -#define EXCEPTION_LOAD_GPR9 l.lwz r9,0x10(r0) +#ifdef CONFIG_OPENRISC_HAVE_SHADOW_GPRS +#define EXCEPTION_STORE_GPR2 l.mtspr r0,r2,SPR_SHADOW_GPR(2) +#define EXCEPTION_LOAD_GPR2 l.mfspr r2,r0,SPR_SHADOW_GPR(2) + +#define EXCEPTION_STORE_GPR3 l.mtspr r0,r3,SPR_SHADOW_GPR(3) +#define EXCEPTION_LOAD_GPR3 l.mfspr r3,r0,SPR_SHADOW_GPR(3) + +#define EXCEPTION_STORE_GPR4 l.mtspr r0,r4,SPR_SHADOW_GPR(4) +#define EXCEPTION_LOAD_GPR4 l.mfspr r4,r0,SPR_SHADOW_GPR(4) + +#define EXCEPTION_STORE_GPR5 l.mtspr r0,r5,SPR_SHADOW_GPR(5) +#define EXCEPTION_LOAD_GPR5 l.mfspr r5,r0,SPR_SHADOW_GPR(5) + +#define EXCEPTION_STORE_GPR6 l.mtspr r0,r6,SPR_SHADOW_GPR(6) +#define EXCEPTION_LOAD_GPR6 l.mfspr r6,r0,SPR_SHADOW_GPR(6) +#else /* !CONFIG_OPENRISC_HAVE_SHADOW_GPRS */ #define EXCEPTION_STORE_GPR2 l.sw 0x64(r0),r2 #define EXCEPTION_LOAD_GPR2 l.lwz r2,0x64(r0) @@ -92,35 +129,67 @@ #define EXCEPTION_STORE_GPR6 l.sw 0x74(r0),r6 #define EXCEPTION_LOAD_GPR6 l.lwz r6,0x74(r0) +#endif /* * EXCEPTION_HANDLE temporary stores */ +#ifdef CONFIG_OPENRISC_HAVE_SHADOW_GPRS +#define EXCEPTION_T_STORE_GPR30 l.mtspr r0,r30,SPR_SHADOW_GPR(30) +#define EXCEPTION_T_LOAD_GPR30(reg) l.mfspr reg,r0,SPR_SHADOW_GPR(30) + +#define EXCEPTION_T_STORE_GPR10 l.mtspr r0,r10,SPR_SHADOW_GPR(10) +#define EXCEPTION_T_LOAD_GPR10(reg) l.mfspr reg,r0,SPR_SHADOW_GPR(10) + +#define EXCEPTION_T_STORE_SP l.mtspr r0,r1,SPR_SHADOW_GPR(1) +#define EXCEPTION_T_LOAD_SP(reg) l.mfspr reg,r0,SPR_SHADOW_GPR(1) + +#else /* !CONFIG_OPENRISC_HAVE_SHADOW_GPRS */ #define EXCEPTION_T_STORE_GPR30 l.sw 0x78(r0),r30 #define EXCEPTION_T_LOAD_GPR30(reg) l.lwz reg,0x78(r0) #define EXCEPTION_T_STORE_GPR10 l.sw 0x7c(r0),r10 #define EXCEPTION_T_LOAD_GPR10(reg) l.lwz reg,0x7c(r0) -#define EXCEPTION_T_STORE_SP l.sw 0x80(r0),r1 +#define EXCEPTION_T_STORE_SP l.sw 0x80(r0),r1 #define EXCEPTION_T_LOAD_SP(reg) l.lwz reg,0x80(r0) - -/* - * For UNHANLDED_EXCEPTION - */ - -#define EXCEPTION_T_STORE_GPR31 l.sw 0x84(r0),r31 -#define EXCEPTION_T_LOAD_GPR31(reg) l.lwz reg,0x84(r0) +#endif /* =========================================================[ macros ]=== */ - +#ifdef CONFIG_SMP +#define GET_CURRENT_PGD(reg,t1) \ + LOAD_SYMBOL_2_GPR(reg,current_pgd) ;\ + l.mfspr t1,r0,SPR_COREID ;\ + l.slli t1,t1,2 ;\ + l.add reg,reg,t1 ;\ + tophys (t1,reg) ;\ + l.lwz reg,0(t1) +#else #define GET_CURRENT_PGD(reg,t1) \ LOAD_SYMBOL_2_GPR(reg,current_pgd) ;\ tophys (t1,reg) ;\ l.lwz reg,0(t1) +#endif +/* Load r10 from current_thread_info_set - clobbers r1 and r30 */ +#ifdef CONFIG_SMP +#define GET_CURRENT_THREAD_INFO \ + LOAD_SYMBOL_2_GPR(r1,current_thread_info_set) ;\ + tophys (r30,r1) ;\ + l.mfspr r10,r0,SPR_COREID ;\ + l.slli r10,r10,2 ;\ + l.add r30,r30,r10 ;\ + /* r10: current_thread_info */ ;\ + l.lwz r10,0(r30) +#else +#define GET_CURRENT_THREAD_INFO \ + LOAD_SYMBOL_2_GPR(r1,current_thread_info_set) ;\ + tophys (r30,r1) ;\ + /* r10: current_thread_info */ ;\ + l.lwz r10,0(r30) +#endif /* * DSCR: this is a common hook for handling exceptions. it will save @@ -163,10 +232,7 @@ l.bnf 2f /* kernel_mode */ ;\ EXCEPTION_T_STORE_SP /* delay slot */ ;\ 1: /* user_mode: */ ;\ - LOAD_SYMBOL_2_GPR(r1,current_thread_info_set) ;\ - tophys (r30,r1) ;\ - /* r10: current_thread_info */ ;\ - l.lwz r10,0(r30) ;\ + GET_CURRENT_THREAD_INFO ;\ tophys (r30,r10) ;\ l.lwz r1,(TI_KSP)(r30) ;\ /* fall through */ ;\ @@ -226,7 +292,7 @@ * */ #define UNHANDLED_EXCEPTION(handler) \ - EXCEPTION_T_STORE_GPR31 ;\ + EXCEPTION_T_STORE_GPR30 ;\ EXCEPTION_T_STORE_GPR10 ;\ EXCEPTION_T_STORE_SP ;\ /* temporary store r3, r9 into r1, r10 */ ;\ @@ -255,35 +321,35 @@ /* r1: KSP, r10: current, r31: __pa(KSP) */ ;\ /* r12: temp, syscall indicator, r13 temp */ ;\ l.addi r1,r1,-(INT_FRAME_SIZE) ;\ - /* r1 is KSP, r31 is __pa(KSP) */ ;\ - tophys (r31,r1) ;\ - l.sw PT_GPR12(r31),r12 ;\ + /* r1 is KSP, r30 is __pa(KSP) */ ;\ + tophys (r30,r1) ;\ + l.sw PT_GPR12(r30),r12 ;\ l.mfspr r12,r0,SPR_EPCR_BASE ;\ - l.sw PT_PC(r31),r12 ;\ + l.sw PT_PC(r30),r12 ;\ l.mfspr r12,r0,SPR_ESR_BASE ;\ - l.sw PT_SR(r31),r12 ;\ + l.sw PT_SR(r30),r12 ;\ /* save r31 */ ;\ - EXCEPTION_T_LOAD_GPR31(r12) ;\ - l.sw PT_GPR31(r31),r12 ;\ + EXCEPTION_T_LOAD_GPR30(r12) ;\ + l.sw PT_GPR30(r30),r12 ;\ /* save r10 as was prior to exception */ ;\ EXCEPTION_T_LOAD_GPR10(r12) ;\ - l.sw PT_GPR10(r31),r12 ;\ + l.sw PT_GPR10(r30),r12 ;\ /* save PT_SP as was prior to exception */ ;\ EXCEPTION_T_LOAD_SP(r12) ;\ - l.sw PT_SP(r31),r12 ;\ - l.sw PT_GPR13(r31),r13 ;\ + l.sw PT_SP(r30),r12 ;\ + l.sw PT_GPR13(r30),r13 ;\ /* --> */ ;\ /* save exception r4, set r4 = EA */ ;\ - l.sw PT_GPR4(r31),r4 ;\ + l.sw PT_GPR4(r30),r4 ;\ l.mfspr r4,r0,SPR_EEAR_BASE ;\ /* r12 == 1 if we come from syscall */ ;\ CLEAR_GPR(r12) ;\ /* ----- play a MMU trick ----- */ ;\ - l.ori r31,r0,(EXCEPTION_SR) ;\ - l.mtspr r0,r31,SPR_ESR_BASE ;\ + l.ori r30,r0,(EXCEPTION_SR) ;\ + l.mtspr r0,r30,SPR_ESR_BASE ;\ /* r31: EA address of handler */ ;\ - LOAD_SYMBOL_2_GPR(r31,handler) ;\ - l.mtspr r0,r31,SPR_EPCR_BASE ;\ + LOAD_SYMBOL_2_GPR(r30,handler) ;\ + l.mtspr r0,r30,SPR_EPCR_BASE ;\ l.rfe /* =====================================================[ exceptions] === */ @@ -487,6 +553,12 @@ _start: CLEAR_GPR(r30) CLEAR_GPR(r31) +#ifdef CONFIG_SMP + l.mfspr r26,r0,SPR_COREID + l.sfeq r26,r0 + l.bnf secondary_wait + l.nop +#endif /* * set up initial ksp and current */ @@ -638,6 +710,100 @@ _flush_tlb: l.jr r9 l.nop +#ifdef CONFIG_SMP +secondary_wait: + /* Doze the cpu until we are asked to run */ + /* If we dont have power management skip doze */ + l.mfspr r25,r0,SPR_UPR + l.andi r25,r25,SPR_UPR_PMP + l.sfeq r25,r0 + l.bf secondary_check_release + l.nop + + /* Setup special secondary exception handler */ + LOAD_SYMBOL_2_GPR(r3, _secondary_evbar) + tophys(r25,r3) + l.mtspr r0,r25,SPR_EVBAR + + /* Enable Interrupts */ + l.mfspr r25,r0,SPR_SR + l.ori r25,r25,SPR_SR_IEE + l.mtspr r0,r25,SPR_SR + + /* Unmask interrupts interrupts */ + l.mfspr r25,r0,SPR_PICMR + l.ori r25,r25,0xffff + l.mtspr r0,r25,SPR_PICMR + + /* Doze */ + l.mfspr r25,r0,SPR_PMR + LOAD_SYMBOL_2_GPR(r3, SPR_PMR_DME) + l.or r25,r25,r3 + l.mtspr r0,r25,SPR_PMR + + /* Wakeup - Restore exception handler */ + l.mtspr r0,r0,SPR_EVBAR + +secondary_check_release: + /* + * Check if we actually got the release signal, if not go-back to + * sleep. + */ + l.mfspr r25,r0,SPR_COREID + LOAD_SYMBOL_2_GPR(r3, secondary_release) + tophys(r4, r3) + l.lwz r3,0(r4) + l.sfeq r25,r3 + l.bnf secondary_wait + l.nop + /* fall through to secondary_init */ + +secondary_init: + /* + * set up initial ksp and current + */ + LOAD_SYMBOL_2_GPR(r10, secondary_thread_info) + tophys (r30,r10) + l.lwz r10,0(r30) + l.addi r1,r10,THREAD_SIZE + tophys (r30,r10) + l.sw TI_KSP(r30),r1 + + l.jal _ic_enable + l.nop + + l.jal _dc_enable + l.nop + + l.jal _flush_tlb + l.nop + + /* + * enable dmmu & immu + */ + l.mfspr r30,r0,SPR_SR + l.movhi r28,hi(SPR_SR_DME | SPR_SR_IME) + l.ori r28,r28,lo(SPR_SR_DME | SPR_SR_IME) + l.or r30,r30,r28 + /* + * This is a bit tricky, we need to switch over from physical addresses + * to virtual addresses on the fly. + * To do that, we first set up ESR with the IME and DME bits set. + * Then EPCR is set to secondary_start and then a l.rfe is issued to + * "jump" to that. + */ + l.mtspr r0,r30,SPR_ESR_BASE + LOAD_SYMBOL_2_GPR(r30, secondary_start) + l.mtspr r0,r30,SPR_EPCR_BASE + l.rfe + +secondary_start: + LOAD_SYMBOL_2_GPR(r30, secondary_start_kernel) + l.jr r30 + l.nop + +#endif + /* ========================================[ cache ]=== */ /* alignment here so we don't change memory offsets with @@ -1533,6 +1699,17 @@ ENTRY(_early_uart_init) l.jr r9 l.nop + .align 0x1000 + .global _secondary_evbar +_secondary_evbar: + + .space 0x800 + /* Just disable interrupts and Return */ + l.ori r3,r0,SPR_SR_SM + l.mtspr r0,r3,SPR_ESR_BASE + l.rfe + + .section .rodata _string_unhandled_exception: .string "\n\rRunarunaround: Unhandled exception 0x\0" diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c index dbf5ee95a0d5..9d28ab14d139 100644 --- a/arch/openrisc/kernel/setup.c +++ b/arch/openrisc/kernel/setup.c @@ -93,7 +93,7 @@ static void __init setup_memory(void) memblock_dump_all(); } -struct cpuinfo cpuinfo; +struct cpuinfo_or1k cpuinfo_or1k[NR_CPUS]; static void print_cpuinfo(void) { @@ -101,12 +101,13 @@ static void print_cpuinfo(void) unsigned long vr = mfspr(SPR_VR); unsigned int version; unsigned int revision; + struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()]; version = (vr & SPR_VR_VER) >> 24; revision = (vr & SPR_VR_REV); printk(KERN_INFO "CPU: OpenRISC-%x (revision %d) @%d MHz\n", - version, revision, cpuinfo.clock_frequency / 1000000); + version, revision, cpuinfo->clock_frequency / 1000000); if (!(upr & SPR_UPR_UP)) { printk(KERN_INFO @@ -117,15 +118,15 @@ static void print_cpuinfo(void) if (upr & SPR_UPR_DCP) printk(KERN_INFO "-- dcache: %4d bytes total, %2d bytes/line, %d way(s)\n", - cpuinfo.dcache_size, cpuinfo.dcache_block_size, - cpuinfo.dcache_ways); + cpuinfo->dcache_size, cpuinfo->dcache_block_size, + cpuinfo->dcache_ways); else printk(KERN_INFO "-- dcache disabled\n"); if (upr & SPR_UPR_ICP) printk(KERN_INFO "-- icache: %4d bytes total, %2d bytes/line, %d way(s)\n", - cpuinfo.icache_size, cpuinfo.icache_block_size, - cpuinfo.icache_ways); + cpuinfo->icache_size, cpuinfo->icache_block_size, + cpuinfo->icache_ways); else printk(KERN_INFO "-- icache disabled\n"); @@ -153,38 +154,58 @@ static void print_cpuinfo(void) printk(KERN_INFO "-- custom unit(s)\n"); } +static struct device_node *setup_find_cpu_node(int cpu) +{ + u32 hwid; + struct device_node *cpun; + struct device_node *cpus = of_find_node_by_path("/cpus"); + + for_each_available_child_of_node(cpus, cpun) { + if (of_property_read_u32(cpun, "reg", &hwid)) + continue; + if (hwid == cpu) + return cpun; + } + + return NULL; +} + void __init setup_cpuinfo(void) { struct device_node *cpu; unsigned long iccfgr, dccfgr; unsigned long cache_set_size; + int cpu_id = smp_processor_id(); + struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[cpu_id]; - cpu = of_find_compatible_node(NULL, NULL, "opencores,or1200-rtlsvn481"); + cpu = setup_find_cpu_node(cpu_id); if (!cpu) - panic("No compatible CPU found in device tree...\n"); + panic("Couldn't find CPU%d in device tree...\n", cpu_id); iccfgr = mfspr(SPR_ICCFGR); - cpuinfo.icache_ways = 1 << (iccfgr & SPR_ICCFGR_NCW); + cpuinfo->icache_ways = 1 << (iccfgr & SPR_ICCFGR_NCW); cache_set_size = 1 << ((iccfgr & SPR_ICCFGR_NCS) >> 3); - cpuinfo.icache_block_size = 16 << ((iccfgr & SPR_ICCFGR_CBS) >> 7); - cpuinfo.icache_size = - cache_set_size * cpuinfo.icache_ways * cpuinfo.icache_block_size; + cpuinfo->icache_block_size = 16 << ((iccfgr & SPR_ICCFGR_CBS) >> 7); + cpuinfo->icache_size = + cache_set_size * cpuinfo->icache_ways * cpuinfo->icache_block_size; dccfgr = mfspr(SPR_DCCFGR); - cpuinfo.dcache_ways = 1 << (dccfgr & SPR_DCCFGR_NCW); + cpuinfo->dcache_ways = 1 << (dccfgr & SPR_DCCFGR_NCW); cache_set_size = 1 << ((dccfgr & SPR_DCCFGR_NCS) >> 3); - cpuinfo.dcache_block_size = 16 << ((dccfgr & SPR_DCCFGR_CBS) >> 7); - cpuinfo.dcache_size = - cache_set_size * cpuinfo.dcache_ways * cpuinfo.dcache_block_size; + cpuinfo->dcache_block_size = 16 << ((dccfgr & SPR_DCCFGR_CBS) >> 7); + cpuinfo->dcache_size = + cache_set_size * cpuinfo->dcache_ways * cpuinfo->dcache_block_size; if (of_property_read_u32(cpu, "clock-frequency", - &cpuinfo.clock_frequency)) { + &cpuinfo->clock_frequency)) { printk(KERN_WARNING "Device tree missing CPU 'clock-frequency' parameter." "Assuming frequency 25MHZ" "This is probably not what you want."); } + cpuinfo->coreid = mfspr(SPR_COREID); + of_node_put(cpu); print_cpuinfo(); @@ -251,8 +272,8 @@ void __init detect_unit_config(unsigned long upr, unsigned long mask, void calibrate_delay(void) { const int *val; - struct device_node *cpu = NULL; - cpu = of_find_compatible_node(NULL, NULL, "opencores,or1200-rtlsvn481"); + struct device_node *cpu = setup_find_cpu_node(smp_processor_id()); + val = of_get_property(cpu, "clock-frequency", NULL); if (!val) panic("no cpu 'clock-frequency' parameter in device tree"); @@ -268,6 +289,10 @@ void __init setup_arch(char **cmdline_p) setup_cpuinfo(); +#ifdef CONFIG_SMP + smp_init_cpus(); +#endif + /* process 1's initial memory region is the kernel code/data */ init_mm.start_code = (unsigned long)_stext; init_mm.end_code = (unsigned long)_etext; @@ -302,54 +327,78 @@ void __init setup_arch(char **cmdline_p) static int show_cpuinfo(struct seq_file *m, void *v) { - unsigned long vr; - int version, revision; + unsigned int vr, cpucfgr; + unsigned int avr; + unsigned int version; + struct cpuinfo_or1k *cpuinfo = v; vr = mfspr(SPR_VR); - version = (vr & SPR_VR_VER) >> 24; - revision = vr & SPR_VR_REV; - - seq_printf(m, - "cpu\t\t: OpenRISC-%x\n" - "revision\t: %d\n" - "frequency\t: %ld\n" - "dcache size\t: %d bytes\n" - "dcache block size\t: %d bytes\n" - "dcache ways\t: %d\n" - "icache size\t: %d bytes\n" - "icache block size\t: %d bytes\n" - "icache ways\t: %d\n" - "immu\t\t: %d entries, %lu ways\n" - "dmmu\t\t: %d entries, %lu ways\n" - "bogomips\t: %lu.%02lu\n", - version, - revision, - loops_per_jiffy * HZ, - cpuinfo.dcache_size, - cpuinfo.dcache_block_size, - cpuinfo.dcache_ways, - cpuinfo.icache_size, - cpuinfo.icache_block_size, - cpuinfo.icache_ways, - 1 << ((mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTS) >> 2), - 1 + (mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTW), - 1 << ((mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTS) >> 2), - 1 + (mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTW), - (loops_per_jiffy * HZ) / 500000, - ((loops_per_jiffy * HZ) / 5000) % 100); + cpucfgr = mfspr(SPR_CPUCFGR); + +#ifdef CONFIG_SMP + seq_printf(m, "processor\t\t: %d\n", cpuinfo->coreid); +#endif + if (vr & SPR_VR_UVRP) { + vr = mfspr(SPR_VR2); + version = vr & SPR_VR2_VER; + avr = mfspr(SPR_AVR); + seq_printf(m, "cpu architecture\t: " + "OpenRISC 1000 (%d.%d-rev%d)\n", + (avr >> 24) & 0xff, + (avr >> 16) & 0xff, + (avr >> 8) & 0xff); + seq_printf(m, "cpu implementation id\t: 0x%x\n", + (vr & SPR_VR2_CPUID) >> 24); + seq_printf(m, "cpu version\t\t: 0x%x\n", version); + } else { + version = (vr & SPR_VR_VER) >> 24; + seq_printf(m, "cpu\t\t\t: OpenRISC-%x\n", version); + seq_printf(m, "revision\t\t: %d\n", vr & SPR_VR_REV); + } + seq_printf(m, "frequency\t\t: %ld\n", loops_per_jiffy * HZ); + seq_printf(m, "dcache size\t\t: %d bytes\n", cpuinfo->dcache_size); + seq_printf(m, "dcache block size\t: %d bytes\n", + cpuinfo->dcache_block_size); + seq_printf(m, "dcache ways\t\t: %d\n", cpuinfo->dcache_ways); + seq_printf(m, "icache size\t\t: %d bytes\n", cpuinfo->icache_size); + seq_printf(m, "icache block size\t: %d bytes\n", + cpuinfo->icache_block_size); + seq_printf(m, "icache ways\t\t: %d\n", cpuinfo->icache_ways); + seq_printf(m, "immu\t\t\t: %d entries, %lu ways\n", + 1 << ((mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTS) >> 2), + 1 + (mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTW)); + seq_printf(m, "dmmu\t\t\t: %d entries, %lu ways\n", + 1 << ((mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTS) >> 2), + 1 + (mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTW)); + seq_printf(m, "bogomips\t\t: %lu.%02lu\n", + (loops_per_jiffy * HZ) / 500000, + ((loops_per_jiffy * HZ) / 5000) % 100); + + seq_puts(m, "features\t\t: "); + seq_printf(m, "%s ", cpucfgr & SPR_CPUCFGR_OB32S ? "orbis32" : ""); + seq_printf(m, "%s ", cpucfgr & SPR_CPUCFGR_OB64S ? "orbis64" : ""); + seq_printf(m, "%s ", cpucfgr & SPR_CPUCFGR_OF32S ? "orfpx32" : ""); + seq_printf(m, "%s ", cpucfgr & SPR_CPUCFGR_OF64S ? "orfpx64" : ""); + seq_printf(m, "%s ", cpucfgr & SPR_CPUCFGR_OV64S ? "orvdx64" : ""); + seq_puts(m, "\n"); + + seq_puts(m, "\n"); + return 0; } -static void *c_start(struct seq_file *m, loff_t * pos) +static void *c_start(struct seq_file *m, loff_t *pos) { - /* We only have one CPU... */ - return *pos < 1 ? (void *)1 : NULL; + *pos = cpumask_next(*pos - 1, cpu_online_mask); + if ((*pos) < nr_cpu_ids) + return &cpuinfo_or1k[*pos]; + return NULL; } -static void *c_next(struct seq_file *m, void *v, loff_t * pos) +static void *c_next(struct seq_file *m, void *v, loff_t *pos) { - ++*pos; - return NULL; + (*pos)++; + return c_start(m, pos); } static void c_stop(struct seq_file *m, void *v) diff --git a/arch/openrisc/kernel/smp.c b/arch/openrisc/kernel/smp.c new file mode 100644 index 000000000000..7d518ee8bddc --- /dev/null +++ b/arch/openrisc/kernel/smp.c @@ -0,0 +1,259 @@ +/* + * Copyright (C) 2014 Stefan Kristiansson <stefan.kristiansson@saunalahti.fi> + * Copyright (C) 2017 Stafford Horne <shorne@gmail.com> + * + * Based on arm64 and arc implementations + * Copyright (C) 2013 ARM Ltd. + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#include <linux/smp.h> +#include <linux/cpu.h> +#include <linux/sched.h> +#include <linux/irq.h> +#include <asm/cpuinfo.h> +#include <asm/mmu_context.h> +#include <asm/tlbflush.h> +#include <asm/cacheflush.h> +#include <asm/time.h> + +static void (*smp_cross_call)(const struct cpumask *, unsigned int); + +unsigned long secondary_release = -1; +struct thread_info *secondary_thread_info; + +enum ipi_msg_type { + IPI_WAKEUP, + IPI_RESCHEDULE, + IPI_CALL_FUNC, + IPI_CALL_FUNC_SINGLE, +}; + +static DEFINE_SPINLOCK(boot_lock); + +static void boot_secondary(unsigned int cpu, struct task_struct *idle) +{ + /* + * set synchronisation state between this boot processor + * and the secondary one + */ + spin_lock(&boot_lock); + + secondary_release = cpu; + smp_cross_call(cpumask_of(cpu), IPI_WAKEUP); + + /* + * now the secondary core is starting up let it run its + * calibrations, then wait for it to finish + */ + spin_unlock(&boot_lock); +} + +void __init smp_prepare_boot_cpu(void) +{ +} + +void __init smp_init_cpus(void) +{ + int i; + + for (i = 0; i < NR_CPUS; i++) + set_cpu_possible(i, true); +} + +void __init smp_prepare_cpus(unsigned int max_cpus) +{ + int i; + + /* + * Initialise the present map, which describes the set of CPUs + * actually populated at the present time. + */ + for (i = 0; i < max_cpus; i++) + set_cpu_present(i, true); +} + +void __init smp_cpus_done(unsigned int max_cpus) +{ +} + +static DECLARE_COMPLETION(cpu_running); + +int __cpu_up(unsigned int cpu, struct task_struct *idle) +{ + if (smp_cross_call == NULL) { + pr_warn("CPU%u: failed to start, IPI controller missing", + cpu); + return -EIO; + } + + secondary_thread_info = task_thread_info(idle); + current_pgd[cpu] = init_mm.pgd; + + boot_secondary(cpu, idle); + if (!wait_for_completion_timeout(&cpu_running, + msecs_to_jiffies(1000))) { + pr_crit("CPU%u: failed to start\n", cpu); + return -EIO; + } + synchronise_count_master(cpu); + + return 0; +} + +asmlinkage __init void secondary_start_kernel(void) +{ + struct mm_struct *mm = &init_mm; + unsigned int cpu = smp_processor_id(); + /* + * All kernel threads share the same mm context; grab a + * reference and switch to it. + */ + atomic_inc(&mm->mm_count); + current->active_mm = mm; + cpumask_set_cpu(cpu, mm_cpumask(mm)); + + pr_info("CPU%u: Booted secondary processor\n", cpu); + + setup_cpuinfo(); + openrisc_clockevent_init(); + + notify_cpu_starting(cpu); + + /* + * OK, now it's safe to let the boot CPU continue + */ + complete(&cpu_running); + + synchronise_count_slave(cpu); + set_cpu_online(cpu, true); + + local_irq_enable(); + + preempt_disable(); + /* + * OK, it's off to the idle thread for us + */ + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); +} + +void handle_IPI(unsigned int ipi_msg) +{ + unsigned int cpu = smp_processor_id(); + + switch (ipi_msg) { + case IPI_WAKEUP: + break; + + case IPI_RESCHEDULE: + scheduler_ipi(); + break; + + case IPI_CALL_FUNC: + generic_smp_call_function_interrupt(); + break; + + case IPI_CALL_FUNC_SINGLE: + generic_smp_call_function_single_interrupt(); + break; + + default: + WARN(1, "CPU%u: Unknown IPI message 0x%x\n", cpu, ipi_msg); + break; + } +} + +void smp_send_reschedule(int cpu) +{ + smp_cross_call(cpumask_of(cpu), IPI_RESCHEDULE); +} + +static void stop_this_cpu(void *dummy) +{ + /* Remove this CPU */ + set_cpu_online(smp_processor_id(), false); + + local_irq_disable(); + /* CPU Doze */ + if (mfspr(SPR_UPR) & SPR_UPR_PMP) + mtspr(SPR_PMR, mfspr(SPR_PMR) | SPR_PMR_DME); + /* If that didn't work, infinite loop */ + while (1) + ; +} + +void smp_send_stop(void) +{ + smp_call_function(stop_this_cpu, NULL, 0); +} + +/* not supported, yet */ +int setup_profiling_timer(unsigned int multiplier) +{ + return -EINVAL; +} + +void __init set_smp_cross_call(void (*fn)(const struct cpumask *, unsigned int)) +{ + smp_cross_call = fn; +} + +void arch_send_call_function_single_ipi(int cpu) +{ + smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE); +} + +void arch_send_call_function_ipi_mask(const struct cpumask *mask) +{ + smp_cross_call(mask, IPI_CALL_FUNC); +} + +/* TLB flush operations - Performed on each CPU*/ +static inline void ipi_flush_tlb_all(void *ignored) +{ + local_flush_tlb_all(); +} + +void flush_tlb_all(void) +{ + on_each_cpu(ipi_flush_tlb_all, NULL, 1); +} + +/* + * FIXME: implement proper functionality instead of flush_tlb_all. + * *But*, as things currently stands, the local_tlb_flush_* functions will + * all boil down to local_tlb_flush_all anyway. + */ +void flush_tlb_mm(struct mm_struct *mm) +{ + on_each_cpu(ipi_flush_tlb_all, NULL, 1); +} + +void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) +{ + on_each_cpu(ipi_flush_tlb_all, NULL, 1); +} + +void flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + on_each_cpu(ipi_flush_tlb_all, NULL, 1); +} + +/* Instruction cache invalidate - performed on each cpu */ +static void ipi_icache_page_inv(void *arg) +{ + struct page *page = arg; + + local_icache_page_inv(page); +} + +void smp_icache_page_inv(struct page *page) +{ + on_each_cpu(ipi_icache_page_inv, page, 1); +} +EXPORT_SYMBOL(smp_icache_page_inv); diff --git a/arch/openrisc/kernel/stacktrace.c b/arch/openrisc/kernel/stacktrace.c new file mode 100644 index 000000000000..43f140a28bc7 --- /dev/null +++ b/arch/openrisc/kernel/stacktrace.c @@ -0,0 +1,86 @@ +/* + * Stack trace utility for OpenRISC + * + * Copyright (C) 2017 Stafford Horne <shorne@gmail.com> + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + * + * Losely based on work from sh and powerpc. + */ + +#include <linux/export.h> +#include <linux/sched.h> +#include <linux/sched/debug.h> +#include <linux/stacktrace.h> + +#include <asm/processor.h> +#include <asm/unwinder.h> + +/* + * Save stack-backtrace addresses into a stack_trace buffer. + */ +static void +save_stack_address(void *data, unsigned long addr, int reliable) +{ + struct stack_trace *trace = data; + + if (!reliable) + return; + + if (trace->skip > 0) { + trace->skip--; + return; + } + + if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = addr; +} + +void save_stack_trace(struct stack_trace *trace) +{ + unwind_stack(trace, (unsigned long *) &trace, save_stack_address); +} +EXPORT_SYMBOL_GPL(save_stack_trace); + +static void +save_stack_address_nosched(void *data, unsigned long addr, int reliable) +{ + struct stack_trace *trace = (struct stack_trace *)data; + + if (!reliable) + return; + + if (in_sched_functions(addr)) + return; + + if (trace->skip > 0) { + trace->skip--; + return; + } + + if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = addr; +} + +void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) +{ + unsigned long *sp = NULL; + + if (tsk == current) + sp = (unsigned long *) &sp; + else + sp = (unsigned long *) KSTK_ESP(tsk); + + unwind_stack(trace, sp, save_stack_address_nosched); +} +EXPORT_SYMBOL_GPL(save_stack_trace_tsk); + +void +save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) +{ + unwind_stack(trace, (unsigned long *) regs->sp, + save_stack_address_nosched); +} +EXPORT_SYMBOL_GPL(save_stack_trace_regs); diff --git a/arch/openrisc/kernel/sync-timer.c b/arch/openrisc/kernel/sync-timer.c new file mode 100644 index 000000000000..ed8d835caca1 --- /dev/null +++ b/arch/openrisc/kernel/sync-timer.c @@ -0,0 +1,120 @@ +/* + * OR1K timer synchronisation + * + * Based on work from MIPS implementation. + * + * All CPUs will have their count registers synchronised to the CPU0 next time + * value. This can cause a small timewarp for CPU0. All other CPU's should + * not have done anything significant (but they may have had interrupts + * enabled briefly - prom_smp_finish() should not be responsible for enabling + * interrupts...) + */ + +#include <linux/kernel.h> +#include <linux/irqflags.h> +#include <linux/cpumask.h> + +#include <asm/time.h> +#include <asm/timex.h> +#include <linux/atomic.h> +#include <asm/barrier.h> + +#include <asm/spr.h> + +static unsigned int initcount; +static atomic_t count_count_start = ATOMIC_INIT(0); +static atomic_t count_count_stop = ATOMIC_INIT(0); + +#define COUNTON 100 +#define NR_LOOPS 3 + +void synchronise_count_master(int cpu) +{ + int i; + unsigned long flags; + + pr_info("Synchronize counters for CPU %u: ", cpu); + + local_irq_save(flags); + + /* + * We loop a few times to get a primed instruction cache, + * then the last pass is more or less synchronised and + * the master and slaves each set their cycle counters to a known + * value all at once. This reduces the chance of having random offsets + * between the processors, and guarantees that the maximum + * delay between the cycle counters is never bigger than + * the latency of information-passing (cachelines) between + * two CPUs. + */ + + for (i = 0; i < NR_LOOPS; i++) { + /* slaves loop on '!= 2' */ + while (atomic_read(&count_count_start) != 1) + mb(); + atomic_set(&count_count_stop, 0); + smp_wmb(); + + /* Let the slave writes its count register */ + atomic_inc(&count_count_start); + + /* Count will be initialised to current timer */ + if (i == 1) + initcount = get_cycles(); + + /* + * Everyone initialises count in the last loop: + */ + if (i == NR_LOOPS-1) + openrisc_timer_set(initcount); + + /* + * Wait for slave to leave the synchronization point: + */ + while (atomic_read(&count_count_stop) != 1) + mb(); + atomic_set(&count_count_start, 0); + smp_wmb(); + atomic_inc(&count_count_stop); + } + /* Arrange for an interrupt in a short while */ + openrisc_timer_set_next(COUNTON); + + local_irq_restore(flags); + + /* + * i386 code reported the skew here, but the + * count registers were almost certainly out of sync + * so no point in alarming people + */ + pr_cont("done.\n"); +} + +void synchronise_count_slave(int cpu) +{ + int i; + + /* + * Not every cpu is online at the time this gets called, + * so we first wait for the master to say everyone is ready + */ + + for (i = 0; i < NR_LOOPS; i++) { + atomic_inc(&count_count_start); + while (atomic_read(&count_count_start) != 2) + mb(); + + /* + * Everyone initialises count in the last loop: + */ + if (i == NR_LOOPS-1) + openrisc_timer_set(initcount); + + atomic_inc(&count_count_stop); + while (atomic_read(&count_count_stop) != 2) + mb(); + } + /* Arrange for an interrupt in a short while */ + openrisc_timer_set_next(COUNTON); +} +#undef NR_LOOPS diff --git a/arch/openrisc/kernel/time.c b/arch/openrisc/kernel/time.c index 687c11d048d7..6baecea27080 100644 --- a/arch/openrisc/kernel/time.c +++ b/arch/openrisc/kernel/time.c @@ -27,8 +27,14 @@ #include <asm/cpuinfo.h> -static int openrisc_timer_set_next_event(unsigned long delta, - struct clock_event_device *dev) +/* Test the timer ticks to count, used in sync routine */ +inline void openrisc_timer_set(unsigned long count) +{ + mtspr(SPR_TTCR, count); +} + +/* Set the timer to trigger in delta cycles */ +inline void openrisc_timer_set_next(unsigned long delta) { u32 c; @@ -44,7 +50,12 @@ static int openrisc_timer_set_next_event(unsigned long delta, * Keep timer in continuous mode always. */ mtspr(SPR_TTMR, SPR_TTMR_CR | SPR_TTMR_IE | c); +} +static int openrisc_timer_set_next_event(unsigned long delta, + struct clock_event_device *dev) +{ + openrisc_timer_set_next(delta); return 0; } @@ -53,13 +64,32 @@ static int openrisc_timer_set_next_event(unsigned long delta, * timers) we cannot enable the PERIODIC feature. The tick timer can run using * one-shot events, so no problem. */ +DEFINE_PER_CPU(struct clock_event_device, clockevent_openrisc_timer); -static struct clock_event_device clockevent_openrisc_timer = { - .name = "openrisc_timer_clockevent", - .features = CLOCK_EVT_FEAT_ONESHOT, - .rating = 300, - .set_next_event = openrisc_timer_set_next_event, -}; +void openrisc_clockevent_init(void) +{ + unsigned int cpu = smp_processor_id(); + struct clock_event_device *evt = + &per_cpu(clockevent_openrisc_timer, cpu); + struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[cpu]; + + mtspr(SPR_TTMR, SPR_TTMR_CR); + +#ifdef CONFIG_SMP + evt->broadcast = tick_broadcast; +#endif + evt->name = "openrisc_timer_clockevent", + evt->features = CLOCK_EVT_FEAT_ONESHOT, + evt->rating = 300, + evt->set_next_event = openrisc_timer_set_next_event, + + evt->cpumask = cpumask_of(cpu); + + /* We only have 28 bits */ + clockevents_config_and_register(evt, cpuinfo->clock_frequency, + 100, 0x0fffffff); + +} static inline void timer_ack(void) { @@ -83,7 +113,9 @@ static inline void timer_ack(void) irqreturn_t __irq_entry timer_interrupt(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); - struct clock_event_device *evt = &clockevent_openrisc_timer; + unsigned int cpu = smp_processor_id(); + struct clock_event_device *evt = + &per_cpu(clockevent_openrisc_timer, cpu); timer_ack(); @@ -99,24 +131,12 @@ irqreturn_t __irq_entry timer_interrupt(struct pt_regs *regs) return IRQ_HANDLED; } -static __init void openrisc_clockevent_init(void) -{ - clockevent_openrisc_timer.cpumask = cpumask_of(0); - - /* We only have 28 bits */ - clockevents_config_and_register(&clockevent_openrisc_timer, - cpuinfo.clock_frequency, - 100, 0x0fffffff); - -} - /** * Clocksource: Based on OpenRISC timer/counter * * This sets up the OpenRISC Tick Timer as a clock source. The tick timer * is 32 bits wide and runs at the CPU clock frequency. */ - static u64 openrisc_timer_read(struct clocksource *cs) { return (u64) mfspr(SPR_TTCR); @@ -132,7 +152,9 @@ static struct clocksource openrisc_timer = { static int __init openrisc_timer_init(void) { - if (clocksource_register_hz(&openrisc_timer, cpuinfo.clock_frequency)) + struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()]; + + if (clocksource_register_hz(&openrisc_timer, cpuinfo->clock_frequency)) panic("failed to register clocksource"); /* Enable the incrementer: 'continuous' mode with interrupt disabled */ diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c index 803e9e756f77..4085d72fa5ae 100644 --- a/arch/openrisc/kernel/traps.c +++ b/arch/openrisc/kernel/traps.c @@ -38,6 +38,7 @@ #include <asm/segment.h> #include <asm/io.h> #include <asm/pgtable.h> +#include <asm/unwinder.h> extern char _etext, _stext; @@ -45,61 +46,20 @@ int kstack_depth_to_print = 0x180; int lwa_flag; unsigned long __user *lwa_addr; -static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) +void print_trace(void *data, unsigned long addr, int reliable) { - return p > (void *)tinfo && p < (void *)tinfo + THREAD_SIZE - 3; -} - -void show_trace(struct task_struct *task, unsigned long *stack) -{ - struct thread_info *context; - unsigned long addr; - - context = (struct thread_info *) - ((unsigned long)stack & (~(THREAD_SIZE - 1))); - - while (valid_stack_ptr(context, stack)) { - addr = *stack++; - if (__kernel_text_address(addr)) { - printk(" [<%08lx>]", addr); - print_symbol(" %s", addr); - printk("\n"); - } - } - printk(" =======================\n"); + pr_emerg("[<%p>] %s%pS\n", (void *) addr, reliable ? "" : "? ", + (void *) addr); } /* displays a short stack trace */ void show_stack(struct task_struct *task, unsigned long *esp) { - unsigned long addr, *stack; - int i; - if (esp == NULL) esp = (unsigned long *)&esp; - stack = esp; - - printk("Stack dump [0x%08lx]:\n", (unsigned long)esp); - for (i = 0; i < kstack_depth_to_print; i++) { - if (kstack_end(stack)) - break; - if (__get_user(addr, stack)) { - /* This message matches "failing address" marked - s390 in ksymoops, so lines containing it will - not be filtered out by ksymoops. */ - printk("Failing address 0x%lx\n", (unsigned long)stack); - break; - } - stack++; - - printk("sp + %02d: 0x%08lx\n", i * 4, addr); - } - printk("\n"); - - show_trace(task, esp); - - return; + pr_emerg("Call trace:\n"); + unwind_stack(NULL, esp, print_trace); } void show_trace_task(struct task_struct *tsk) @@ -115,7 +75,7 @@ void show_registers(struct pt_regs *regs) int in_kernel = 1; unsigned long esp; - esp = (unsigned long)(®s->sp); + esp = (unsigned long)(regs->sp); if (user_mode(regs)) in_kernel = 0; diff --git a/arch/openrisc/kernel/unwinder.c b/arch/openrisc/kernel/unwinder.c new file mode 100644 index 000000000000..8ae15c2c1845 --- /dev/null +++ b/arch/openrisc/kernel/unwinder.c @@ -0,0 +1,105 @@ +/* + * OpenRISC unwinder.c + * + * Reusable arch specific api for unwinding stacks. + * + * Copyright (C) 2017 Stafford Horne <shorne@gmail.com> + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#include <linux/sched/task_stack.h> +#include <linux/kernel.h> + +#include <asm/unwinder.h> + +#ifdef CONFIG_FRAME_POINTER +struct or1k_frameinfo { + unsigned long *fp; + unsigned long ra; + unsigned long top; +}; + +/* + * Verify a frameinfo structure. The return address should be a valid text + * address. The frame pointer may be null if its the last frame, otherwise + * the frame pointer should point to a location in the stack after the the + * top of the next frame up. + */ +static inline int or1k_frameinfo_valid(struct or1k_frameinfo *frameinfo) +{ + return (frameinfo->fp == NULL || + (!kstack_end(frameinfo->fp) && + frameinfo->fp > &frameinfo->top)) && + __kernel_text_address(frameinfo->ra); +} + +/* + * Create a stack trace doing scanning which is frame pointer aware. We can + * get reliable stack traces by matching the previously found frame + * pointer with the top of the stack address every time we find a valid + * or1k_frameinfo. + * + * Ideally the stack parameter will be passed as FP, but it can not be + * guaranteed. Therefore we scan each address looking for the first sign + * of a return address. + * + * The OpenRISC stack frame looks something like the following. The + * location SP is held in r1 and location FP is held in r2 when frame pointers + * enabled. + * + * SP -> (top of stack) + * - (callee saved registers) + * - (local variables) + * FP-8 -> previous FP \ + * FP-4 -> return address |- or1k_frameinfo + * FP -> (previous top of stack) / + */ +void unwind_stack(void *data, unsigned long *stack, + void (*trace)(void *data, unsigned long addr, int reliable)) +{ + unsigned long *next_fp = NULL; + struct or1k_frameinfo *frameinfo = NULL; + int reliable = 0; + + while (!kstack_end(stack)) { + frameinfo = container_of(stack, + struct or1k_frameinfo, + top); + + if (__kernel_text_address(frameinfo->ra)) { + if (or1k_frameinfo_valid(frameinfo) && + (next_fp == NULL || + next_fp == &frameinfo->top)) { + reliable = 1; + next_fp = frameinfo->fp; + } else + reliable = 0; + + trace(data, frameinfo->ra, reliable); + } + stack++; + } +} + +#else /* CONFIG_FRAME_POINTER */ + +/* + * Create a stack trace by doing a simple scan treating all text addresses + * as return addresses. + */ +void unwind_stack(void *data, unsigned long *stack, + void (*trace)(void *data, unsigned long addr, int reliable)) +{ + unsigned long addr; + + while (!kstack_end(stack)) { + addr = *stack++; + if (__kernel_text_address(addr)) + trace(data, addr, 0); + } +} +#endif /* CONFIG_FRAME_POINTER */ + diff --git a/arch/openrisc/lib/delay.c b/arch/openrisc/lib/delay.c index 8b13fdf43ec6..a92bd621aa1f 100644 --- a/arch/openrisc/lib/delay.c +++ b/arch/openrisc/lib/delay.c @@ -25,7 +25,7 @@ int read_current_timer(unsigned long *timer_value) { - *timer_value = mfspr(SPR_TTCR); + *timer_value = get_cycles(); return 0; } diff --git a/arch/openrisc/mm/Makefile b/arch/openrisc/mm/Makefile index 324ba2634529..a31b2a42e966 100644 --- a/arch/openrisc/mm/Makefile +++ b/arch/openrisc/mm/Makefile @@ -2,4 +2,4 @@ # Makefile for the linux openrisc-specific parts of the memory manager. # -obj-y := fault.o tlb.o init.o ioremap.o +obj-y := fault.o cache.o tlb.o init.o ioremap.o diff --git a/arch/openrisc/mm/cache.c b/arch/openrisc/mm/cache.c new file mode 100644 index 000000000000..b747bf1fc1b6 --- /dev/null +++ b/arch/openrisc/mm/cache.c @@ -0,0 +1,61 @@ +/* + * OpenRISC cache.c + * + * Linux architectural port borrowing liberally from similar works of + * others. All original copyrights apply as per the original source + * declaration. + * + * Modifications for the OpenRISC architecture: + * Copyright (C) 2015 Jan Henrik Weinstock <jan.weinstock@rwth-aachen.de> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <asm/spr.h> +#include <asm/spr_defs.h> +#include <asm/cache.h> +#include <asm/cacheflush.h> +#include <asm/tlbflush.h> + +static void cache_loop(struct page *page, const unsigned int reg) +{ + unsigned long paddr = page_to_pfn(page) << PAGE_SHIFT; + unsigned long line = paddr & ~(L1_CACHE_BYTES - 1); + + while (line < paddr + PAGE_SIZE) { + mtspr(reg, line); + line += L1_CACHE_BYTES; + } +} + +void local_dcache_page_flush(struct page *page) +{ + cache_loop(page, SPR_DCBFR); +} +EXPORT_SYMBOL(local_dcache_page_flush); + +void local_icache_page_inv(struct page *page) +{ + cache_loop(page, SPR_ICBIR); +} +EXPORT_SYMBOL(local_icache_page_inv); + +void update_cache(struct vm_area_struct *vma, unsigned long address, + pte_t *pte) +{ + unsigned long pfn = pte_val(*pte) >> PAGE_SHIFT; + struct page *page = pfn_to_page(pfn); + int dirty = !test_and_set_bit(PG_dc_clean, &page->flags); + + /* + * Since icaches do not snoop for updated data on OpenRISC, we + * must write back and invalidate any dirty pages manually. We + * can skip data pages, since they will not end up in icaches. + */ + if ((vma->vm_flags & VM_EXEC) && dirty) + sync_icache_dcache(page); +} + diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c index e310ab499385..d0021dfae20a 100644 --- a/arch/openrisc/mm/fault.c +++ b/arch/openrisc/mm/fault.c @@ -33,7 +33,7 @@ unsigned long pte_errors; /* updated by do_page_fault() */ /* __PHX__ :: - check the vmalloc_fault in do_page_fault() * - also look into include/asm-or32/mmu_context.h */ -volatile pgd_t *current_pgd; +volatile pgd_t *current_pgd[NR_CPUS]; extern void die(char *, struct pt_regs *, long); @@ -319,7 +319,7 @@ vmalloc_fault: phx_mmu("vmalloc_fault"); */ - pgd = (pgd_t *)current_pgd + offset; + pgd = (pgd_t *)current_pgd[smp_processor_id()] + offset; pgd_k = init_mm.pgd + offset; /* Since we're two-level, we don't need to do both diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c index f67d82b9d22f..6972d5d6f23f 100644 --- a/arch/openrisc/mm/init.c +++ b/arch/openrisc/mm/init.c @@ -147,7 +147,7 @@ void __init paging_init(void) * (even if it is most probably not used until the next * switch_mm) */ - current_pgd = init_mm.pgd; + current_pgd[smp_processor_id()] = init_mm.pgd; end = (unsigned long)__va(max_low_pfn * PAGE_SIZE); diff --git a/arch/openrisc/mm/tlb.c b/arch/openrisc/mm/tlb.c index 683bd4d31c7c..6c253a2e86bc 100644 --- a/arch/openrisc/mm/tlb.c +++ b/arch/openrisc/mm/tlb.c @@ -49,7 +49,7 @@ * */ -void flush_tlb_all(void) +void local_flush_tlb_all(void) { int i; unsigned long num_tlb_sets; @@ -86,7 +86,7 @@ void flush_tlb_all(void) #define flush_itlb_page_no_eir(addr) \ mtspr_off(SPR_ITLBMR_BASE(0), ITLB_OFFSET(addr), 0); -void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) +void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) { if (have_dtlbeir) flush_dtlb_page_eir(addr); @@ -99,8 +99,8 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) flush_itlb_page_no_eir(addr); } -void flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) +void local_flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) { int addr; bool dtlbeir; @@ -129,13 +129,13 @@ void flush_tlb_range(struct vm_area_struct *vma, * This should be changed to loop over over mm and call flush_tlb_range. */ -void flush_tlb_mm(struct mm_struct *mm) +void local_flush_tlb_mm(struct mm_struct *mm) { /* Was seeing bugs with the mm struct passed to us. Scrapped most of this function. */ /* Several architctures do this */ - flush_tlb_all(); + local_flush_tlb_all(); } /* called in schedule() just before actually doing the switch_to */ @@ -149,14 +149,14 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next, * might be invalid at points where we still need to derefer * the pgd. */ - current_pgd = next->pgd; + current_pgd[smp_processor_id()] = next->pgd; /* We don't have context support implemented, so flush all * entries belonging to previous map */ if (prev != next) - flush_tlb_mm(prev); + local_flush_tlb_mm(prev); } diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h index bc54addd589f..88bae6676c9b 100644 --- a/arch/parisc/include/asm/atomic.h +++ b/arch/parisc/include/asm/atomic.h @@ -261,7 +261,7 @@ atomic64_set(atomic64_t *v, s64 i) static __inline__ s64 atomic64_read(const atomic64_t *v) { - return ACCESS_ONCE((v)->counter); + return READ_ONCE((v)->counter); } #define atomic64_inc(v) (atomic64_add( 1,(v))) diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h index af03359e6ac5..6f84b6acc86e 100644 --- a/arch/parisc/include/asm/spinlock.h +++ b/arch/parisc/include/asm/spinlock.h @@ -32,6 +32,7 @@ static inline void arch_spin_lock_flags(arch_spinlock_t *x, cpu_relax(); mb(); } +#define arch_spin_lock_flags arch_spin_lock_flags static inline void arch_spin_unlock(arch_spinlock_t *x) { @@ -169,25 +170,4 @@ static __inline__ int arch_write_trylock(arch_rwlock_t *rw) return result; } -/* - * read_can_lock - would read_trylock() succeed? - * @lock: the rwlock in question. - */ -static __inline__ int arch_read_can_lock(arch_rwlock_t *rw) -{ - return rw->counter >= 0; -} - -/* - * write_can_lock - would write_trylock() succeed? - * @lock: the rwlock in question. - */ -static __inline__ int arch_write_can_lock(arch_rwlock_t *rw) -{ - return !rw->counter; -} - -#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) -#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) - #endif /* __ASM_SPINLOCK_H */ diff --git a/arch/parisc/kernel/pdc_cons.c b/arch/parisc/kernel/pdc_cons.c index 10a5ae9553fd..27a2dd616a7d 100644 --- a/arch/parisc/kernel/pdc_cons.c +++ b/arch/parisc/kernel/pdc_cons.c @@ -92,7 +92,7 @@ static int pdc_console_setup(struct console *co, char *options) #define PDC_CONS_POLL_DELAY (30 * HZ / 1000) static void pdc_console_poll(unsigned long unused); -static DEFINE_TIMER(pdc_console_timer, pdc_console_poll, 0, 0); +static DEFINE_TIMER(pdc_console_timer, pdc_console_poll); static struct tty_port tty_port; static int pdc_console_tty_open(struct tty_struct *tty, struct file *filp) diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index edbe571bcc54..b9ebc3085fb7 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -161,6 +161,7 @@ void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags) local_irq_restore(flags_dis); } } +#define arch_spin_lock_flags arch_spin_lock_flags static inline void arch_spin_unlock(arch_spinlock_t *lock) { @@ -181,9 +182,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) * read-locks. */ -#define arch_read_can_lock(rw) ((rw)->lock >= 0) -#define arch_write_can_lock(rw) (!(rw)->lock) - #ifdef CONFIG_PPC64 #define __DO_SIGN_EXTEND "extsw %0,%0\n" #define WRLOCK_TOKEN LOCK_TOKEN /* it's negative */ @@ -302,9 +300,6 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) rw->lock = 0; } -#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) -#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) - #define arch_spin_relax(lock) __spin_yield(lock) #define arch_read_relax(lock) __rw_yield(lock) #define arch_write_relax(lock) __rw_yield(lock) diff --git a/arch/powerpc/kernel/machine_kexec_file_64.c b/arch/powerpc/kernel/machine_kexec_file_64.c index 992c0d258e5d..e4395f937d63 100644 --- a/arch/powerpc/kernel/machine_kexec_file_64.c +++ b/arch/powerpc/kernel/machine_kexec_file_64.c @@ -91,11 +91,13 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image) * and that value will be returned. If all free regions are visited without * func returning non-zero, then zero will be returned. */ -int arch_kexec_walk_mem(struct kexec_buf *kbuf, int (*func)(u64, u64, void *)) +int arch_kexec_walk_mem(struct kexec_buf *kbuf, + int (*func)(struct resource *, void *)) { int ret = 0; u64 i; phys_addr_t mstart, mend; + struct resource res = { }; if (kbuf->top_down) { for_each_free_mem_range_reverse(i, NUMA_NO_NODE, 0, @@ -105,7 +107,9 @@ int arch_kexec_walk_mem(struct kexec_buf *kbuf, int (*func)(u64, u64, void *)) * range while in kexec, end points to the last byte * in the range. */ - ret = func(mstart, mend - 1, kbuf); + res.start = mstart; + res.end = mend - 1; + ret = func(&res, kbuf); if (ret) break; } @@ -117,7 +121,9 @@ int arch_kexec_walk_mem(struct kexec_buf *kbuf, int (*func)(u64, u64, void *)) * range while in kexec, end points to the last byte * in the range. */ - ret = func(mstart, mend - 1, kbuf); + res.start = mstart; + res.end = mend - 1; + ret = func(&res, kbuf); if (ret) break; } diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 1643e9e53655..3f1c4fcbe0aa 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -78,7 +78,7 @@ static unsigned long lock_rtas(void) local_irq_save(flags); preempt_disable(); - arch_spin_lock_flags(&rtas.lock, flags); + arch_spin_lock(&rtas.lock); return flags; } diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index 57190f384f63..1d89163d67f2 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -262,9 +262,8 @@ static void wd_timer_reset(unsigned int cpu, struct timer_list *t) add_timer_on(t, cpu); } -static void wd_timer_fn(unsigned long data) +static void wd_timer_fn(struct timer_list *t) { - struct timer_list *t = this_cpu_ptr(&wd_timer); int cpu = smp_processor_id(); watchdog_timer_interrupt(cpu); @@ -288,7 +287,7 @@ static void start_watchdog_timer_on(unsigned int cpu) per_cpu(wd_timer_tb, cpu) = get_tb(); - setup_pinned_timer(t, wd_timer_fn, 0); + timer_setup(t, wd_timer_fn, TIMER_PINNED); wd_timer_reset(cpu, t); } diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 7c62967d672c..59247af5fd45 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -646,6 +646,16 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, hnow_v = hpte_new_to_old_v(hnow_v, hnow_r); hnow_r = hpte_new_to_old_r(hnow_r); } + + /* + * If the HPT is being resized, don't update the HPTE, + * instead let the guest retry after the resize operation is complete. + * The synchronization for hpte_setup_done test vs. set is provided + * by the HPTE lock. + */ + if (!kvm->arch.hpte_setup_done) + goto out_unlock; + if ((hnow_v & ~HPTE_V_HVLOCK) != hpte[0] || hnow_r != hpte[1] || rev->guest_rpte != hpte[2]) /* HPTE has been changed under us; let the guest retry */ diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 73bf1ebfa78f..8d43cf205d34 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2705,11 +2705,14 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) * Hard-disable interrupts, and check resched flag and signals. * If we need to reschedule or deliver a signal, clean up * and return without going into the guest(s). + * If the hpte_setup_done flag has been cleared, don't go into the + * guest because that means a HPT resize operation is in progress. */ local_irq_disable(); hard_irq_disable(); if (lazy_irq_pending() || need_resched() || - recheck_signals(&core_info)) { + recheck_signals(&core_info) || + (!kvm_is_radix(vc->kvm) && !vc->kvm->arch.hpte_setup_done)) { local_irq_enable(); vc->vcore_state = VCORE_INACTIVE; /* Unlock all except the primary vcore */ @@ -3078,7 +3081,7 @@ out: static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) { - int n_ceded, i; + int n_ceded, i, r; struct kvmppc_vcore *vc; struct kvm_vcpu *v; @@ -3132,6 +3135,20 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE && !signal_pending(current)) { + /* See if the HPT and VRMA are ready to go */ + if (!kvm_is_radix(vcpu->kvm) && + !vcpu->kvm->arch.hpte_setup_done) { + spin_unlock(&vc->lock); + r = kvmppc_hv_setup_htab_rma(vcpu); + spin_lock(&vc->lock); + if (r) { + kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; + kvm_run->fail_entry.hardware_entry_failure_reason = 0; + vcpu->arch.ret = r; + break; + } + } + if (vc->vcore_state == VCORE_PREEMPT && vc->runner == NULL) kvmppc_vcore_end_preempt(vc); @@ -3249,13 +3266,6 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) /* Order vcpus_running vs. hpte_setup_done, see kvmppc_alloc_reset_hpt */ smp_mb(); - /* On the first time here, set up HTAB and VRMA */ - if (!kvm_is_radix(vcpu->kvm) && !vcpu->kvm->arch.hpte_setup_done) { - r = kvmppc_hv_setup_htab_rma(vcpu); - if (r) - goto out; - } - flush_all_to_thread(current); /* Save userspace EBB and other register values */ @@ -3303,7 +3313,6 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) } mtspr(SPRN_VRSAVE, user_vrsave); - out: vcpu->arch.state = KVMPPC_VCPU_NOTREADY; atomic_dec(&vcpu->kvm->arch.vcpus_running); return r; diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index a51df9ef529d..73016451f330 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1452,7 +1452,7 @@ static void topology_schedule_update(void) schedule_work(&topology_work); } -static void topology_timer_fn(unsigned long ignored) +static void topology_timer_fn(struct timer_list *unused) { if (prrn_enabled && cpumask_weight(&cpu_associativity_changes_mask)) topology_schedule_update(); @@ -1462,14 +1462,11 @@ static void topology_timer_fn(unsigned long ignored) reset_topology_timer(); } } -static struct timer_list topology_timer = - TIMER_INITIALIZER(topology_timer_fn, 0, 0); +static struct timer_list topology_timer; static void reset_topology_timer(void) { - topology_timer.data = 0; - topology_timer.expires = jiffies + 60 * HZ; - mod_timer(&topology_timer, topology_timer.expires); + mod_timer(&topology_timer, jiffies + 60 * HZ); } #ifdef CONFIG_SMP @@ -1529,7 +1526,8 @@ int start_topology_update(void) prrn_enabled = 0; vphn_enabled = 1; setup_cpu_associativity_change_counters(); - init_timer_deferrable(&topology_timer); + timer_setup(&topology_timer, topology_timer_fn, + TIMER_DEFERRABLE); reset_topology_timer(); } } diff --git a/arch/powerpc/platforms/powernv/opal-msglog.c b/arch/powerpc/platforms/powernv/opal-msglog.c index 7a9cde0cfbd1..acd3206dfae3 100644 --- a/arch/powerpc/platforms/powernv/opal-msglog.c +++ b/arch/powerpc/platforms/powernv/opal-msglog.c @@ -43,7 +43,7 @@ ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count) if (!opal_memcons) return -ENODEV; - out_pos = be32_to_cpu(ACCESS_ONCE(opal_memcons->out_pos)); + out_pos = be32_to_cpu(READ_ONCE(opal_memcons->out_pos)); /* Now we've read out_pos, put a barrier in before reading the new * data it points to in conbuf. */ diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index ae55e715cc74..863a62a6de3c 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -68,6 +68,7 @@ config S390 select ARCH_BINFMT_ELF_STATE select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE + select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA select ARCH_HAS_KCOV @@ -143,7 +144,6 @@ config S390 select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_EFFICIENT_UNALIGNED_ACCESS - select HAVE_EXIT_THREAD select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER @@ -538,6 +538,22 @@ config ARCH_RANDOM If unsure, say Y. +config ALTERNATIVES + def_bool y + prompt "Patch optimized instructions for running CPU type" + help + When enabled the kernel code is compiled with additional + alternative instructions blocks optimized for newer CPU types. + These alternative instructions blocks are patched at kernel boot + time when running CPU supports them. This mechanism is used to + optimize some critical code paths (i.e. spinlocks) for newer CPUs + even if kernel is build to support older machine generations. + + This mechanism could be disabled by appending "noaltinstr" + option to the kernel command line. + + If unsure, say Y. + endmenu menu "Memory setup" @@ -809,18 +825,6 @@ config PFAULT Everybody who wants to run Linux under VM != VM4.2 should select this option. -config SHARED_KERNEL - bool "VM shared kernel support" - depends on !JUMP_LABEL - help - Select this option, if you want to share the text segment of the - Linux kernel between different VM guests. This reduces memory - usage with lots of guests but greatly increases kernel size. - Also if a kernel was IPL'ed from a shared segment the kexec system - call will not work. - You should only select this option if you know what you are - doing and want to exploit this feature. - config CMM def_tristate n prompt "Cooperative memory management" @@ -930,17 +934,4 @@ config S390_GUEST Select this option if you want to run the kernel as a guest under the KVM hypervisor. -config S390_GUEST_OLD_TRANSPORT - def_bool y - prompt "Guest support for old s390 virtio transport (DEPRECATED)" - depends on S390_GUEST - help - Enable this option to add support for the old s390-virtio - transport (i.e. virtio devices NOT based on virtio-ccw). This - type of virtio devices is only available on the experimental - kuli userspace or with old (< 2.6) qemu. If you are running - with a modern version of qemu (which supports virtio-ccw since - 1.4 and uses it by default since version 2.4), you probably won't - need this. - endmenu diff --git a/arch/s390/Makefile b/arch/s390/Makefile index dac821cfcd43..6b3f41985f28 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -21,7 +21,7 @@ KBUILD_CFLAGS += -m64 KBUILD_AFLAGS += -m64 UTS_MACHINE := s390x STACK_SIZE := 16384 -CHECKFLAGS += -D__s390__ -D__s390x__ +CHECKFLAGS += -D__s390__ -D__s390x__ -mbig-endian export LD_BFD @@ -133,6 +133,7 @@ archclean: archprepare: $(Q)$(MAKE) $(build)=$(tools) include/generated/facilities.h + $(Q)$(MAKE) $(build)=$(tools) include/generated/dis.h # Don't use tabs in echo arguments define archhelp diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index 3df10c989893..29e3dc99b916 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -12,7 +12,7 @@ targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4 targets += misc.o piggy.o sizes.h head.o KBUILD_CFLAGS := -m64 -D__KERNEL__ -O2 -KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING +KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY KBUILD_CFLAGS += $(cflags-y) -fno-delete-null-pointer-checks -msoft-float KBUILD_CFLAGS += $(call cc-option,-mpacked-stack) KBUILD_CFLAGS += $(call cc-option,-ffreestanding) diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c index 77633200f42c..cecf38b9ec82 100644 --- a/arch/s390/boot/compressed/misc.c +++ b/arch/s390/boot/compressed/misc.c @@ -170,9 +170,7 @@ unsigned long decompress_kernel(void) free_mem_ptr = (unsigned long) &_end; free_mem_end_ptr = free_mem_ptr + HEAP_SIZE; - puts("Uncompressing Linux... "); __decompress(input_data, input_len, NULL, NULL, output, 0, NULL, error); - puts("Ok, booting the kernel.\n"); return (unsigned long) output; } diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig index 282072206df7..84eccc88c065 100644 --- a/arch/s390/configs/default_defconfig +++ b/arch/s390/configs/default_defconfig @@ -69,7 +69,6 @@ CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_CLEANCACHE=y CONFIG_FRONTSWAP=y -CONFIG_CMA=y CONFIG_CMA_DEBUG=y CONFIG_CMA_DEBUGFS=y CONFIG_MEM_SOFT_DIRTY=y @@ -379,7 +378,6 @@ CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_DRBD=m CONFIG_BLK_DEV_NBD=m -CONFIG_BLK_DEV_OSD=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 CONFIG_BLK_DEV_RAM_DAX=y @@ -416,7 +414,6 @@ CONFIG_SCSI_OSD_ULD=m CONFIG_MD=y CONFIG_BLK_DEV_MD=y CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m CONFIG_MD_MULTIPATH=m CONFIG_MD_FAULTY=m CONFIG_BLK_DEV_DM=m @@ -483,6 +480,8 @@ CONFIG_INFINIBAND=m CONFIG_INFINIBAND_USER_ACCESS=m CONFIG_MLX4_INFINIBAND=m CONFIG_MLX5_INFINIBAND=m +CONFIG_VFIO=m +CONFIG_VFIO_PCI=m CONFIG_VIRTIO_BALLOON=m CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y @@ -599,7 +598,6 @@ CONFIG_DETECT_HUNG_TASK=y CONFIG_WQ_WATCHDOG=y CONFIG_PANIC_ON_OOPS=y CONFIG_DEBUG_TIMEKEEPING=y -CONFIG_DEBUG_RT_MUTEXES=y CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y CONFIG_PROVE_LOCKING=y CONFIG_LOCK_STAT=y @@ -629,10 +627,8 @@ CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_UPROBE_EVENTS=y CONFIG_FUNCTION_PROFILER=y CONFIG_HIST_TRIGGERS=y -CONFIG_TRACE_ENUM_MAP_FILE=y CONFIG_LKDTM=m CONFIG_TEST_LIST_SORT=y CONFIG_TEST_SORT=y @@ -649,6 +645,7 @@ CONFIG_ENCRYPTED_KEYS=m CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y CONFIG_HARDENED_USERCOPY=y +CONFIG_FORTIFY_SOURCE=y CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0 @@ -705,12 +702,12 @@ CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_ZCRYPT=m CONFIG_PKEY=m +CONFIG_CRYPTO_PAES_S390=m CONFIG_CRYPTO_SHA1_S390=m CONFIG_CRYPTO_SHA256_S390=m CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_AES_S390=m -CONFIG_CRYPTO_PAES_S390=m CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_CRC32_S390=y CONFIG_ASYMMETRIC_KEY_TYPE=y diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig index 3c6b78189fbc..f7202358e6d7 100644 --- a/arch/s390/configs/gcov_defconfig +++ b/arch/s390/configs/gcov_defconfig @@ -70,7 +70,6 @@ CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_CLEANCACHE=y CONFIG_FRONTSWAP=y -CONFIG_CMA=y CONFIG_MEM_SOFT_DIRTY=y CONFIG_ZSWAP=y CONFIG_ZBUD=m @@ -376,7 +375,6 @@ CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_DRBD=m CONFIG_BLK_DEV_NBD=m -CONFIG_BLK_DEV_OSD=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 CONFIG_BLK_DEV_RAM_DAX=y @@ -412,7 +410,6 @@ CONFIG_SCSI_OSD_ULD=m CONFIG_MD=y CONFIG_BLK_DEV_MD=y CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m CONFIG_MD_MULTIPATH=m CONFIG_MD_FAULTY=m CONFIG_BLK_DEV_DM=m @@ -479,6 +476,8 @@ CONFIG_INFINIBAND=m CONFIG_INFINIBAND_USER_ACCESS=m CONFIG_MLX4_INFINIBAND=m CONFIG_MLX5_INFINIBAND=m +CONFIG_VFIO=m +CONFIG_VFIO_PCI=m CONFIG_VIRTIO_BALLOON=m CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y @@ -575,10 +574,8 @@ CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_UPROBE_EVENTS=y CONFIG_FUNCTION_PROFILER=y CONFIG_HIST_TRIGGERS=y -CONFIG_TRACE_ENUM_MAP_FILE=y CONFIG_LKDTM=m CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y @@ -650,12 +647,12 @@ CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_ZCRYPT=m CONFIG_PKEY=m +CONFIG_CRYPTO_PAES_S390=m CONFIG_CRYPTO_SHA1_S390=m CONFIG_CRYPTO_SHA256_S390=m CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_AES_S390=m -CONFIG_CRYPTO_PAES_S390=m CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_CRC32_S390=y CONFIG_CRC7=m diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig index 653d72bcc007..03100fe74ea8 100644 --- a/arch/s390/configs/performance_defconfig +++ b/arch/s390/configs/performance_defconfig @@ -68,7 +68,6 @@ CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_CLEANCACHE=y CONFIG_FRONTSWAP=y -CONFIG_CMA=y CONFIG_MEM_SOFT_DIRTY=y CONFIG_ZSWAP=y CONFIG_ZBUD=m @@ -374,7 +373,6 @@ CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_DRBD=m CONFIG_BLK_DEV_NBD=m -CONFIG_BLK_DEV_OSD=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 CONFIG_BLK_DEV_RAM_DAX=y @@ -410,7 +408,6 @@ CONFIG_SCSI_OSD_ULD=m CONFIG_MD=y CONFIG_BLK_DEV_MD=y CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m CONFIG_MD_MULTIPATH=m CONFIG_MD_FAULTY=m CONFIG_BLK_DEV_DM=m @@ -477,6 +474,8 @@ CONFIG_INFINIBAND=m CONFIG_INFINIBAND_USER_ACCESS=m CONFIG_MLX4_INFINIBAND=m CONFIG_MLX5_INFINIBAND=m +CONFIG_VFIO=m +CONFIG_VFIO_PCI=m CONFIG_VIRTIO_BALLOON=m CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y @@ -573,10 +572,8 @@ CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_UPROBE_EVENTS=y CONFIG_FUNCTION_PROFILER=y CONFIG_HIST_TRIGGERS=y -CONFIG_TRACE_ENUM_MAP_FILE=y CONFIG_LKDTM=m CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y @@ -648,12 +645,12 @@ CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_ZCRYPT=m CONFIG_PKEY=m +CONFIG_CRYPTO_PAES_S390=m CONFIG_CRYPTO_SHA1_S390=m CONFIG_CRYPTO_SHA256_S390=m CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_AES_S390=m -CONFIG_CRYPTO_PAES_S390=m CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_CRC32_S390=y CONFIG_CRC7=m diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 591cbdf615af..b48e20dd94e9 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -4,9 +4,11 @@ * s390 implementation of the AES Cipher Algorithm. * * s390 Version: - * Copyright IBM Corp. 2005, 2007 + * Copyright IBM Corp. 2005, 2017 * Author(s): Jan Glauber (jang@de.ibm.com) * Sebastian Siewior (sebastian@breakpoint.cc> SW-Fallback + * Patrick Steuer <patrick.steuer@de.ibm.com> + * Harald Freudenberger <freude@de.ibm.com> * * Derived from "crypto/aes_generic.c" * @@ -22,20 +24,25 @@ #include <crypto/aes.h> #include <crypto/algapi.h> +#include <crypto/ghash.h> +#include <crypto/internal/aead.h> #include <crypto/internal/skcipher.h> +#include <crypto/scatterwalk.h> #include <linux/err.h> #include <linux/module.h> #include <linux/cpufeature.h> #include <linux/init.h> #include <linux/spinlock.h> #include <linux/fips.h> +#include <linux/string.h> #include <crypto/xts.h> #include <asm/cpacf.h> static u8 *ctrblk; static DEFINE_SPINLOCK(ctrblk_lock); -static cpacf_mask_t km_functions, kmc_functions, kmctr_functions; +static cpacf_mask_t km_functions, kmc_functions, kmctr_functions, + kma_functions; struct s390_aes_ctx { u8 key[AES_MAX_KEY_SIZE]; @@ -55,6 +62,17 @@ struct s390_xts_ctx { struct crypto_skcipher *fallback; }; +struct gcm_sg_walk { + struct scatter_walk walk; + unsigned int walk_bytes; + u8 *walk_ptr; + unsigned int walk_bytes_remain; + u8 buf[AES_BLOCK_SIZE]; + unsigned int buf_bytes; + u8 *ptr; + unsigned int nbytes; +}; + static int setkey_fallback_cip(struct crypto_tfm *tfm, const u8 *in_key, unsigned int key_len) { @@ -771,6 +789,267 @@ static struct crypto_alg ctr_aes_alg = { } }; +static int gcm_aes_setkey(struct crypto_aead *tfm, const u8 *key, + unsigned int keylen) +{ + struct s390_aes_ctx *ctx = crypto_aead_ctx(tfm); + + switch (keylen) { + case AES_KEYSIZE_128: + ctx->fc = CPACF_KMA_GCM_AES_128; + break; + case AES_KEYSIZE_192: + ctx->fc = CPACF_KMA_GCM_AES_192; + break; + case AES_KEYSIZE_256: + ctx->fc = CPACF_KMA_GCM_AES_256; + break; + default: + return -EINVAL; + } + + memcpy(ctx->key, key, keylen); + ctx->key_len = keylen; + return 0; +} + +static int gcm_aes_setauthsize(struct crypto_aead *tfm, unsigned int authsize) +{ + switch (authsize) { + case 4: + case 8: + case 12: + case 13: + case 14: + case 15: + case 16: + break; + default: + return -EINVAL; + } + + return 0; +} + +static void gcm_sg_walk_start(struct gcm_sg_walk *gw, struct scatterlist *sg, + unsigned int len) +{ + memset(gw, 0, sizeof(*gw)); + gw->walk_bytes_remain = len; + scatterwalk_start(&gw->walk, sg); +} + +static int gcm_sg_walk_go(struct gcm_sg_walk *gw, unsigned int minbytesneeded) +{ + int n; + + /* minbytesneeded <= AES_BLOCK_SIZE */ + if (gw->buf_bytes && gw->buf_bytes >= minbytesneeded) { + gw->ptr = gw->buf; + gw->nbytes = gw->buf_bytes; + goto out; + } + + if (gw->walk_bytes_remain == 0) { + gw->ptr = NULL; + gw->nbytes = 0; + goto out; + } + + gw->walk_bytes = scatterwalk_clamp(&gw->walk, gw->walk_bytes_remain); + if (!gw->walk_bytes) { + scatterwalk_start(&gw->walk, sg_next(gw->walk.sg)); + gw->walk_bytes = scatterwalk_clamp(&gw->walk, + gw->walk_bytes_remain); + } + gw->walk_ptr = scatterwalk_map(&gw->walk); + + if (!gw->buf_bytes && gw->walk_bytes >= minbytesneeded) { + gw->ptr = gw->walk_ptr; + gw->nbytes = gw->walk_bytes; + goto out; + } + + while (1) { + n = min(gw->walk_bytes, AES_BLOCK_SIZE - gw->buf_bytes); + memcpy(gw->buf + gw->buf_bytes, gw->walk_ptr, n); + gw->buf_bytes += n; + gw->walk_bytes_remain -= n; + scatterwalk_unmap(&gw->walk); + scatterwalk_advance(&gw->walk, n); + scatterwalk_done(&gw->walk, 0, gw->walk_bytes_remain); + + if (gw->buf_bytes >= minbytesneeded) { + gw->ptr = gw->buf; + gw->nbytes = gw->buf_bytes; + goto out; + } + + gw->walk_bytes = scatterwalk_clamp(&gw->walk, + gw->walk_bytes_remain); + if (!gw->walk_bytes) { + scatterwalk_start(&gw->walk, sg_next(gw->walk.sg)); + gw->walk_bytes = scatterwalk_clamp(&gw->walk, + gw->walk_bytes_remain); + } + gw->walk_ptr = scatterwalk_map(&gw->walk); + } + +out: + return gw->nbytes; +} + +static void gcm_sg_walk_done(struct gcm_sg_walk *gw, unsigned int bytesdone) +{ + int n; + + if (gw->ptr == NULL) + return; + + if (gw->ptr == gw->buf) { + n = gw->buf_bytes - bytesdone; + if (n > 0) { + memmove(gw->buf, gw->buf + bytesdone, n); + gw->buf_bytes -= n; + } else + gw->buf_bytes = 0; + } else { + gw->walk_bytes_remain -= bytesdone; + scatterwalk_unmap(&gw->walk); + scatterwalk_advance(&gw->walk, bytesdone); + scatterwalk_done(&gw->walk, 0, gw->walk_bytes_remain); + } +} + +static int gcm_aes_crypt(struct aead_request *req, unsigned int flags) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct s390_aes_ctx *ctx = crypto_aead_ctx(tfm); + unsigned int ivsize = crypto_aead_ivsize(tfm); + unsigned int taglen = crypto_aead_authsize(tfm); + unsigned int aadlen = req->assoclen; + unsigned int pclen = req->cryptlen; + int ret = 0; + + unsigned int len, in_bytes, out_bytes, + min_bytes, bytes, aad_bytes, pc_bytes; + struct gcm_sg_walk gw_in, gw_out; + u8 tag[GHASH_DIGEST_SIZE]; + + struct { + u32 _[3]; /* reserved */ + u32 cv; /* Counter Value */ + u8 t[GHASH_DIGEST_SIZE];/* Tag */ + u8 h[AES_BLOCK_SIZE]; /* Hash-subkey */ + u64 taadl; /* Total AAD Length */ + u64 tpcl; /* Total Plain-/Cipher-text Length */ + u8 j0[GHASH_BLOCK_SIZE];/* initial counter value */ + u8 k[AES_MAX_KEY_SIZE]; /* Key */ + } param; + + /* + * encrypt + * req->src: aad||plaintext + * req->dst: aad||ciphertext||tag + * decrypt + * req->src: aad||ciphertext||tag + * req->dst: aad||plaintext, return 0 or -EBADMSG + * aad, plaintext and ciphertext may be empty. + */ + if (flags & CPACF_DECRYPT) + pclen -= taglen; + len = aadlen + pclen; + + memset(¶m, 0, sizeof(param)); + param.cv = 1; + param.taadl = aadlen * 8; + param.tpcl = pclen * 8; + memcpy(param.j0, req->iv, ivsize); + *(u32 *)(param.j0 + ivsize) = 1; + memcpy(param.k, ctx->key, ctx->key_len); + + gcm_sg_walk_start(&gw_in, req->src, len); + gcm_sg_walk_start(&gw_out, req->dst, len); + + do { + min_bytes = min_t(unsigned int, + aadlen > 0 ? aadlen : pclen, AES_BLOCK_SIZE); + in_bytes = gcm_sg_walk_go(&gw_in, min_bytes); + out_bytes = gcm_sg_walk_go(&gw_out, min_bytes); + bytes = min(in_bytes, out_bytes); + + if (aadlen + pclen <= bytes) { + aad_bytes = aadlen; + pc_bytes = pclen; + flags |= CPACF_KMA_LAAD | CPACF_KMA_LPC; + } else { + if (aadlen <= bytes) { + aad_bytes = aadlen; + pc_bytes = (bytes - aadlen) & + ~(AES_BLOCK_SIZE - 1); + flags |= CPACF_KMA_LAAD; + } else { + aad_bytes = bytes & ~(AES_BLOCK_SIZE - 1); + pc_bytes = 0; + } + } + + if (aad_bytes > 0) + memcpy(gw_out.ptr, gw_in.ptr, aad_bytes); + + cpacf_kma(ctx->fc | flags, ¶m, + gw_out.ptr + aad_bytes, + gw_in.ptr + aad_bytes, pc_bytes, + gw_in.ptr, aad_bytes); + + gcm_sg_walk_done(&gw_in, aad_bytes + pc_bytes); + gcm_sg_walk_done(&gw_out, aad_bytes + pc_bytes); + aadlen -= aad_bytes; + pclen -= pc_bytes; + } while (aadlen + pclen > 0); + + if (flags & CPACF_DECRYPT) { + scatterwalk_map_and_copy(tag, req->src, len, taglen, 0); + if (crypto_memneq(tag, param.t, taglen)) + ret = -EBADMSG; + } else + scatterwalk_map_and_copy(param.t, req->dst, len, taglen, 1); + + memzero_explicit(¶m, sizeof(param)); + return ret; +} + +static int gcm_aes_encrypt(struct aead_request *req) +{ + return gcm_aes_crypt(req, CPACF_ENCRYPT); +} + +static int gcm_aes_decrypt(struct aead_request *req) +{ + return gcm_aes_crypt(req, CPACF_DECRYPT); +} + +static struct aead_alg gcm_aes_aead = { + .setkey = gcm_aes_setkey, + .setauthsize = gcm_aes_setauthsize, + .encrypt = gcm_aes_encrypt, + .decrypt = gcm_aes_decrypt, + + .ivsize = GHASH_BLOCK_SIZE - sizeof(u32), + .maxauthsize = GHASH_DIGEST_SIZE, + .chunksize = AES_BLOCK_SIZE, + + .base = { + .cra_flags = CRYPTO_ALG_TYPE_AEAD, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct s390_aes_ctx), + .cra_priority = 900, + .cra_name = "gcm(aes)", + .cra_driver_name = "gcm-aes-s390", + .cra_module = THIS_MODULE, + }, +}; + static struct crypto_alg *aes_s390_algs_ptr[5]; static int aes_s390_algs_num; @@ -790,16 +1069,19 @@ static void aes_s390_fini(void) crypto_unregister_alg(aes_s390_algs_ptr[aes_s390_algs_num]); if (ctrblk) free_page((unsigned long) ctrblk); + + crypto_unregister_aead(&gcm_aes_aead); } static int __init aes_s390_init(void) { int ret; - /* Query available functions for KM, KMC and KMCTR */ + /* Query available functions for KM, KMC, KMCTR and KMA */ cpacf_query(CPACF_KM, &km_functions); cpacf_query(CPACF_KMC, &kmc_functions); cpacf_query(CPACF_KMCTR, &kmctr_functions); + cpacf_query(CPACF_KMA, &kma_functions); if (cpacf_test_func(&km_functions, CPACF_KM_AES_128) || cpacf_test_func(&km_functions, CPACF_KM_AES_192) || @@ -840,6 +1122,14 @@ static int __init aes_s390_init(void) goto out_err; } + if (cpacf_test_func(&kma_functions, CPACF_KMA_GCM_AES_128) || + cpacf_test_func(&kma_functions, CPACF_KMA_GCM_AES_192) || + cpacf_test_func(&kma_functions, CPACF_KMA_GCM_AES_256)) { + ret = crypto_register_aead(&gcm_aes_aead); + if (ret) + goto out_err; + } + return 0; out_err: aes_s390_fini(); diff --git a/arch/s390/defconfig b/arch/s390/defconfig index 20244a38c886..46a3178d8bc6 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -53,7 +53,6 @@ CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_CLEANCACHE=y CONFIG_FRONTSWAP=y -CONFIG_CMA=y CONFIG_ZSWAP=y CONFIG_ZBUD=m CONFIG_ZSMALLOC=m @@ -163,7 +162,6 @@ CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_PAGEALLOC=y CONFIG_DETECT_HUNG_TASK=y CONFIG_PANIC_ON_OOPS=y -CONFIG_DEBUG_RT_MUTEXES=y CONFIG_PROVE_LOCKING=y CONFIG_LOCK_STAT=y CONFIG_DEBUG_LOCKDEP=y @@ -179,7 +177,6 @@ CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_FUNCTION_PROFILER=y -CONFIG_TRACE_ENUM_MAP_FILE=y CONFIG_KPROBES_SANITY_TEST=y CONFIG_S390_PTDUMP=y CONFIG_CRYPTO_CRYPTD=m diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index 6e2c9f7e47fa..41c211a4d8b1 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -15,6 +15,7 @@ generic-y += local64.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h generic-y += preempt.h +generic-y += rwsem.h generic-y += trace_clock.h generic-y += unaligned.h generic-y += word-at-a-time.h diff --git a/arch/s390/include/asm/alternative.h b/arch/s390/include/asm/alternative.h new file mode 100644 index 000000000000..6c268f6a51d3 --- /dev/null +++ b/arch/s390/include/asm/alternative.h @@ -0,0 +1,163 @@ +#ifndef _ASM_S390_ALTERNATIVE_H +#define _ASM_S390_ALTERNATIVE_H + +#ifndef __ASSEMBLY__ + +#include <linux/types.h> +#include <linux/stddef.h> +#include <linux/stringify.h> + +struct alt_instr { + s32 instr_offset; /* original instruction */ + s32 repl_offset; /* offset to replacement instruction */ + u16 facility; /* facility bit set for replacement */ + u8 instrlen; /* length of original instruction */ + u8 replacementlen; /* length of new instruction */ +} __packed; + +#ifdef CONFIG_ALTERNATIVES +extern void apply_alternative_instructions(void); +extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); +#else +static inline void apply_alternative_instructions(void) {}; +static inline void apply_alternatives(struct alt_instr *start, + struct alt_instr *end) {}; +#endif +/* + * |661: |662: |6620 |663: + * +-----------+---------------------+ + * | oldinstr | oldinstr_padding | + * | +----------+----------+ + * | | | | + * | | >6 bytes |6/4/2 nops| + * | |6 bytes jg-----------> + * +-----------+---------------------+ + * ^^ static padding ^^ + * + * .altinstr_replacement section + * +---------------------+-----------+ + * |6641: |6651: + * | alternative instr 1 | + * +-----------+---------+- - - - - -+ + * |6642: |6652: | + * | alternative instr 2 | padding + * +---------------------+- - - - - -+ + * ^ runtime ^ + * + * .altinstructions section + * +---------------------------------+ + * | alt_instr entries for each | + * | alternative instr | + * +---------------------------------+ + */ + +#define b_altinstr(num) "664"#num +#define e_altinstr(num) "665"#num + +#define e_oldinstr_pad_end "663" +#define oldinstr_len "662b-661b" +#define oldinstr_total_len e_oldinstr_pad_end"b-661b" +#define altinstr_len(num) e_altinstr(num)"b-"b_altinstr(num)"b" +#define oldinstr_pad_len(num) \ + "-(((" altinstr_len(num) ")-(" oldinstr_len ")) > 0) * " \ + "((" altinstr_len(num) ")-(" oldinstr_len "))" + +#define INSTR_LEN_SANITY_CHECK(len) \ + ".if " len " > 254\n" \ + "\t.error \"cpu alternatives does not support instructions " \ + "blocks > 254 bytes\"\n" \ + ".endif\n" \ + ".if (" len ") %% 2\n" \ + "\t.error \"cpu alternatives instructions length is odd\"\n" \ + ".endif\n" + +#define OLDINSTR_PADDING(oldinstr, num) \ + ".if " oldinstr_pad_len(num) " > 6\n" \ + "\tjg " e_oldinstr_pad_end "f\n" \ + "6620:\n" \ + "\t.fill (" oldinstr_pad_len(num) " - (6620b-662b)) / 2, 2, 0x0700\n" \ + ".else\n" \ + "\t.fill " oldinstr_pad_len(num) " / 6, 6, 0xc0040000\n" \ + "\t.fill " oldinstr_pad_len(num) " %% 6 / 4, 4, 0x47000000\n" \ + "\t.fill " oldinstr_pad_len(num) " %% 6 %% 4 / 2, 2, 0x0700\n" \ + ".endif\n" + +#define OLDINSTR(oldinstr, num) \ + "661:\n\t" oldinstr "\n662:\n" \ + OLDINSTR_PADDING(oldinstr, num) \ + e_oldinstr_pad_end ":\n" \ + INSTR_LEN_SANITY_CHECK(oldinstr_len) + +#define OLDINSTR_2(oldinstr, num1, num2) \ + "661:\n\t" oldinstr "\n662:\n" \ + ".if " altinstr_len(num1) " < " altinstr_len(num2) "\n" \ + OLDINSTR_PADDING(oldinstr, num2) \ + ".else\n" \ + OLDINSTR_PADDING(oldinstr, num1) \ + ".endif\n" \ + e_oldinstr_pad_end ":\n" \ + INSTR_LEN_SANITY_CHECK(oldinstr_len) + +#define ALTINSTR_ENTRY(facility, num) \ + "\t.long 661b - .\n" /* old instruction */ \ + "\t.long " b_altinstr(num)"b - .\n" /* alt instruction */ \ + "\t.word " __stringify(facility) "\n" /* facility bit */ \ + "\t.byte " oldinstr_total_len "\n" /* source len */ \ + "\t.byte " altinstr_len(num) "\n" /* alt instruction len */ + +#define ALTINSTR_REPLACEMENT(altinstr, num) /* replacement */ \ + b_altinstr(num)":\n\t" altinstr "\n" e_altinstr(num) ":\n" \ + INSTR_LEN_SANITY_CHECK(altinstr_len(num)) + +#ifdef CONFIG_ALTERNATIVES +/* alternative assembly primitive: */ +#define ALTERNATIVE(oldinstr, altinstr, facility) \ + ".pushsection .altinstr_replacement, \"ax\"\n" \ + ALTINSTR_REPLACEMENT(altinstr, 1) \ + ".popsection\n" \ + OLDINSTR(oldinstr, 1) \ + ".pushsection .altinstructions,\"a\"\n" \ + ALTINSTR_ENTRY(facility, 1) \ + ".popsection\n" + +#define ALTERNATIVE_2(oldinstr, altinstr1, facility1, altinstr2, facility2)\ + ".pushsection .altinstr_replacement, \"ax\"\n" \ + ALTINSTR_REPLACEMENT(altinstr1, 1) \ + ALTINSTR_REPLACEMENT(altinstr2, 2) \ + ".popsection\n" \ + OLDINSTR_2(oldinstr, 1, 2) \ + ".pushsection .altinstructions,\"a\"\n" \ + ALTINSTR_ENTRY(facility1, 1) \ + ALTINSTR_ENTRY(facility2, 2) \ + ".popsection\n" +#else +/* Alternative instructions are disabled, let's put just oldinstr in */ +#define ALTERNATIVE(oldinstr, altinstr, facility) \ + oldinstr "\n" + +#define ALTERNATIVE_2(oldinstr, altinstr1, facility1, altinstr2, facility2) \ + oldinstr "\n" +#endif + +/* + * Alternative instructions for different CPU types or capabilities. + * + * This allows to use optimized instructions even on generic binary + * kernels. + * + * oldinstr is padded with jump and nops at compile time if altinstr is + * longer. altinstr is padded with jump and nops at run-time during patching. + * + * For non barrier like inlines please define new variants + * without volatile and memory clobber. + */ +#define alternative(oldinstr, altinstr, facility) \ + asm volatile(ALTERNATIVE(oldinstr, altinstr, facility) : : : "memory") + +#define alternative_2(oldinstr, altinstr1, facility1, altinstr2, facility2) \ + asm volatile(ALTERNATIVE_2(oldinstr, altinstr1, facility1, \ + altinstr2, facility2) ::: "memory") + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_S390_ALTERNATIVE_H */ diff --git a/arch/s390/include/asm/archrandom.h b/arch/s390/include/asm/archrandom.h index e9f7d7a57f99..09aed1095336 100644 --- a/arch/s390/include/asm/archrandom.h +++ b/arch/s390/include/asm/archrandom.h @@ -28,42 +28,42 @@ static void s390_arch_random_generate(u8 *buf, unsigned int nbytes) static inline bool arch_has_random(void) { - if (static_branch_likely(&s390_arch_random_available)) - return true; return false; } static inline bool arch_has_random_seed(void) { - return arch_has_random(); + if (static_branch_likely(&s390_arch_random_available)) + return true; + return false; } static inline bool arch_get_random_long(unsigned long *v) { - if (static_branch_likely(&s390_arch_random_available)) { - s390_arch_random_generate((u8 *)v, sizeof(*v)); - return true; - } return false; } static inline bool arch_get_random_int(unsigned int *v) { - if (static_branch_likely(&s390_arch_random_available)) { - s390_arch_random_generate((u8 *)v, sizeof(*v)); - return true; - } return false; } static inline bool arch_get_random_seed_long(unsigned long *v) { - return arch_get_random_long(v); + if (static_branch_likely(&s390_arch_random_available)) { + s390_arch_random_generate((u8 *)v, sizeof(*v)); + return true; + } + return false; } static inline bool arch_get_random_seed_int(unsigned int *v) { - return arch_get_random_int(v); + if (static_branch_likely(&s390_arch_random_available)) { + s390_arch_random_generate((u8 *)v, sizeof(*v)); + return true; + } + return false; } #endif /* CONFIG_ARCH_RANDOM */ diff --git a/arch/s390/include/asm/atomic_ops.h b/arch/s390/include/asm/atomic_ops.h index f479e4c0b87e..d3f09526ee19 100644 --- a/arch/s390/include/asm/atomic_ops.h +++ b/arch/s390/include/asm/atomic_ops.h @@ -40,19 +40,24 @@ __ATOMIC_OPS(__atomic64_xor, long, "laxg") #undef __ATOMIC_OPS #undef __ATOMIC_OP -static inline void __atomic_add_const(int val, int *ptr) -{ - asm volatile( - " asi %[ptr],%[val]\n" - : [ptr] "+Q" (*ptr) : [val] "i" (val) : "cc"); +#define __ATOMIC_CONST_OP(op_name, op_type, op_string, op_barrier) \ +static inline void op_name(op_type val, op_type *ptr) \ +{ \ + asm volatile( \ + op_string " %[ptr],%[val]\n" \ + op_barrier \ + : [ptr] "+Q" (*ptr) : [val] "i" (val) : "cc", "memory");\ } -static inline void __atomic64_add_const(long val, long *ptr) -{ - asm volatile( - " agsi %[ptr],%[val]\n" - : [ptr] "+Q" (*ptr) : [val] "i" (val) : "cc"); -} +#define __ATOMIC_CONST_OPS(op_name, op_type, op_string) \ + __ATOMIC_CONST_OP(op_name, op_type, op_string, "\n") \ + __ATOMIC_CONST_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n") + +__ATOMIC_CONST_OPS(__atomic_add_const, int, "asi") +__ATOMIC_CONST_OPS(__atomic64_add_const, long, "agsi") + +#undef __ATOMIC_CONST_OPS +#undef __ATOMIC_CONST_OP #else /* CONFIG_HAVE_MARCH_Z196_FEATURES */ @@ -108,6 +113,11 @@ __ATOMIC64_OPS(__atomic64_xor, "xgr") #undef __ATOMIC64_OPS +#define __atomic_add_const(val, ptr) __atomic_add(val, ptr) +#define __atomic_add_const_barrier(val, ptr) __atomic_add(val, ptr) +#define __atomic64_add_const(val, ptr) __atomic64_add(val, ptr) +#define __atomic64_add_const_barrier(val, ptr) __atomic64_add(val, ptr) + #endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */ static inline int __atomic_cmpxchg(int *ptr, int old, int new) diff --git a/arch/s390/include/asm/ccwgroup.h b/arch/s390/include/asm/ccwgroup.h index b00777ce93b4..99aa817dad32 100644 --- a/arch/s390/include/asm/ccwgroup.h +++ b/arch/s390/include/asm/ccwgroup.h @@ -42,6 +42,7 @@ struct ccwgroup_device { * @thaw: undo work done in @freeze * @restore: callback for restoring after hibernation * @driver: embedded driver structure + * @ccw_driver: supported ccw_driver (optional) */ struct ccwgroup_driver { int (*setup) (struct ccwgroup_device *); @@ -56,6 +57,7 @@ struct ccwgroup_driver { int (*restore)(struct ccwgroup_device *); struct device_driver driver; + struct ccw_driver *ccw_driver; }; extern int ccwgroup_driver_register (struct ccwgroup_driver *cdriver); diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h index 056670ebba67..3cc52e37b4b2 100644 --- a/arch/s390/include/asm/cpacf.h +++ b/arch/s390/include/asm/cpacf.h @@ -2,7 +2,7 @@ /* * CP Assist for Cryptographic Functions (CPACF) * - * Copyright IBM Corp. 2003, 2016 + * Copyright IBM Corp. 2003, 2017 * Author(s): Thomas Spatzier * Jan Glauber * Harald Freudenberger (freude@de.ibm.com) @@ -134,6 +134,22 @@ #define CPACF_PRNO_TRNG_Q_R2C_RATIO 0x70 #define CPACF_PRNO_TRNG 0x72 +/* + * Function codes for the KMA (CIPHER MESSAGE WITH AUTHENTICATION) + * instruction + */ +#define CPACF_KMA_QUERY 0x00 +#define CPACF_KMA_GCM_AES_128 0x12 +#define CPACF_KMA_GCM_AES_192 0x13 +#define CPACF_KMA_GCM_AES_256 0x14 + +/* + * Flags for the KMA (CIPHER MESSAGE WITH AUTHENTICATION) instruction + */ +#define CPACF_KMA_LPC 0x100 /* Last-Plaintext/Ciphertext */ +#define CPACF_KMA_LAAD 0x200 /* Last-AAD */ +#define CPACF_KMA_HS 0x400 /* Hash-subkey Supplied */ + typedef struct { unsigned char bytes[16]; } cpacf_mask_t; /** @@ -179,6 +195,8 @@ static inline int __cpacf_check_opcode(unsigned int opcode) return test_facility(77); /* check for MSA4 */ case CPACF_PRNO: return test_facility(57); /* check for MSA5 */ + case CPACF_KMA: + return test_facility(146); /* check for MSA8 */ default: BUG(); } @@ -470,4 +488,36 @@ static inline void cpacf_pckmo(long func, void *param) : "cc", "memory"); } +/** + * cpacf_kma() - executes the KMA (CIPHER MESSAGE WITH AUTHENTICATION) + * instruction + * @func: the function code passed to KMA; see CPACF_KMA_xxx defines + * @param: address of parameter block; see POP for details on each func + * @dest: address of destination memory area + * @src: address of source memory area + * @src_len: length of src operand in bytes + * @aad: address of additional authenticated data memory area + * @aad_len: length of aad operand in bytes + */ +static inline void cpacf_kma(unsigned long func, void *param, u8 *dest, + const u8 *src, unsigned long src_len, + const u8 *aad, unsigned long aad_len) +{ + register unsigned long r0 asm("0") = (unsigned long) func; + register unsigned long r1 asm("1") = (unsigned long) param; + register unsigned long r2 asm("2") = (unsigned long) src; + register unsigned long r3 asm("3") = (unsigned long) src_len; + register unsigned long r4 asm("4") = (unsigned long) aad; + register unsigned long r5 asm("5") = (unsigned long) aad_len; + register unsigned long r6 asm("6") = (unsigned long) dest; + + asm volatile( + "0: .insn rrf,%[opc] << 16,%[dst],%[src],%[aad],0\n" + " brc 1,0b\n" /* handle partial completion */ + : [dst] "+a" (r6), [src] "+a" (r2), [slen] "+d" (r3), + [aad] "+a" (r4), [alen] "+d" (r5) + : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_KMA) + : "cc", "memory"); +} + #endif /* _ASM_S390_CPACF_H */ diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h index 93e0d72f6c94..99c93d0346f9 100644 --- a/arch/s390/include/asm/ctl_reg.h +++ b/arch/s390/include/asm/ctl_reg.h @@ -8,6 +8,18 @@ #ifndef __ASM_CTL_REG_H #define __ASM_CTL_REG_H +#include <linux/const.h> + +#define CR2_GUARDED_STORAGE _BITUL(63 - 59) + +#define CR14_CHANNEL_REPORT_SUBMASK _BITUL(63 - 35) +#define CR14_RECOVERY_SUBMASK _BITUL(63 - 36) +#define CR14_DEGRADATION_SUBMASK _BITUL(63 - 37) +#define CR14_EXTERNAL_DAMAGE_SUBMASK _BITUL(63 - 38) +#define CR14_WARNING_SUBMASK _BITUL(63 - 39) + +#ifndef __ASSEMBLY__ + #include <linux/bug.h> #define __ctl_load(array, low, high) do { \ @@ -55,7 +67,11 @@ void smp_ctl_clear_bit(int cr, int bit); union ctlreg0 { unsigned long val; struct { - unsigned long : 32; + unsigned long : 8; + unsigned long tcx : 1; /* Transactional-Execution control */ + unsigned long pifo : 1; /* Transactional-Execution Program- + Interruption-Filtering Override */ + unsigned long : 22; unsigned long : 3; unsigned long lap : 1; /* Low-address-protection control */ unsigned long : 4; @@ -71,6 +87,19 @@ union ctlreg0 { }; }; +union ctlreg2 { + unsigned long val; + struct { + unsigned long : 33; + unsigned long ducto : 25; + unsigned long : 1; + unsigned long gse : 1; + unsigned long : 1; + unsigned long tds : 1; + unsigned long tdc : 2; + }; +}; + #ifdef CONFIG_SMP # define ctl_set_bit(cr, bit) smp_ctl_set_bit(cr, bit) # define ctl_clear_bit(cr, bit) smp_ctl_clear_bit(cr, bit) @@ -79,4 +108,5 @@ union ctlreg0 { # define ctl_clear_bit(cr, bit) __ctl_clear_bit(cr, bit) #endif +#endif /* __ASSEMBLY__ */ #endif /* __ASM_CTL_REG_H */ diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h index a4ed25dd3278..c305d39f5016 100644 --- a/arch/s390/include/asm/debug.h +++ b/arch/s390/include/asm/debug.h @@ -14,71 +14,71 @@ #include <linux/refcount.h> #include <uapi/asm/debug.h> -#define DEBUG_MAX_LEVEL 6 /* debug levels range from 0 to 6 */ -#define DEBUG_OFF_LEVEL -1 /* level where debug is switched off */ -#define DEBUG_FLUSH_ALL -1 /* parameter to flush all areas */ -#define DEBUG_MAX_VIEWS 10 /* max number of views in proc fs */ -#define DEBUG_MAX_NAME_LEN 64 /* max length for a debugfs file name */ -#define DEBUG_DEFAULT_LEVEL 3 /* initial debug level */ +#define DEBUG_MAX_LEVEL 6 /* debug levels range from 0 to 6 */ +#define DEBUG_OFF_LEVEL -1 /* level where debug is switched off */ +#define DEBUG_FLUSH_ALL -1 /* parameter to flush all areas */ +#define DEBUG_MAX_VIEWS 10 /* max number of views in proc fs */ +#define DEBUG_MAX_NAME_LEN 64 /* max length for a debugfs file name */ +#define DEBUG_DEFAULT_LEVEL 3 /* initial debug level */ #define DEBUG_DIR_ROOT "s390dbf" /* name of debug root directory in proc fs */ -#define DEBUG_DATA(entry) (char*)(entry + 1) /* data is stored behind */ - /* the entry information */ +#define DEBUG_DATA(entry) (char *)(entry + 1) /* data is stored behind */ + /* the entry information */ typedef struct __debug_entry debug_entry_t; struct debug_view; -typedef struct debug_info { - struct debug_info* next; - struct debug_info* prev; +typedef struct debug_info { + struct debug_info *next; + struct debug_info *prev; refcount_t ref_count; - spinlock_t lock; + spinlock_t lock; int level; int nr_areas; int pages_per_area; int buf_size; - int entry_size; - debug_entry_t*** areas; + int entry_size; + debug_entry_t ***areas; int active_area; int *active_pages; int *active_entries; - struct dentry* debugfs_root_entry; - struct dentry* debugfs_entries[DEBUG_MAX_VIEWS]; - struct debug_view* views[DEBUG_MAX_VIEWS]; + struct dentry *debugfs_root_entry; + struct dentry *debugfs_entries[DEBUG_MAX_VIEWS]; + struct debug_view *views[DEBUG_MAX_VIEWS]; char name[DEBUG_MAX_NAME_LEN]; umode_t mode; } debug_info_t; -typedef int (debug_header_proc_t) (debug_info_t* id, - struct debug_view* view, +typedef int (debug_header_proc_t) (debug_info_t *id, + struct debug_view *view, int area, - debug_entry_t* entry, - char* out_buf); - -typedef int (debug_format_proc_t) (debug_info_t* id, - struct debug_view* view, char* out_buf, - const char* in_buf); -typedef int (debug_prolog_proc_t) (debug_info_t* id, - struct debug_view* view, - char* out_buf); -typedef int (debug_input_proc_t) (debug_info_t* id, - struct debug_view* view, - struct file* file, + debug_entry_t *entry, + char *out_buf); + +typedef int (debug_format_proc_t) (debug_info_t *id, + struct debug_view *view, char *out_buf, + const char *in_buf); +typedef int (debug_prolog_proc_t) (debug_info_t *id, + struct debug_view *view, + char *out_buf); +typedef int (debug_input_proc_t) (debug_info_t *id, + struct debug_view *view, + struct file *file, const char __user *user_buf, - size_t in_buf_size, loff_t* offset); + size_t in_buf_size, loff_t *offset); + +int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view, + int area, debug_entry_t *entry, char *out_buf); -int debug_dflt_header_fn(debug_info_t* id, struct debug_view* view, - int area, debug_entry_t* entry, char* out_buf); - struct debug_view { char name[DEBUG_MAX_NAME_LEN]; - debug_prolog_proc_t* prolog_proc; - debug_header_proc_t* header_proc; - debug_format_proc_t* format_proc; - debug_input_proc_t* input_proc; - void* private_data; + debug_prolog_proc_t *prolog_proc; + debug_header_proc_t *header_proc; + debug_format_proc_t *format_proc; + debug_input_proc_t *input_proc; + void *private_data; }; extern struct debug_view debug_hex_ascii_view; @@ -87,65 +87,67 @@ extern struct debug_view debug_sprintf_view; /* do NOT use the _common functions */ -debug_entry_t* debug_event_common(debug_info_t* id, int level, - const void* data, int length); +debug_entry_t *debug_event_common(debug_info_t *id, int level, + const void *data, int length); -debug_entry_t* debug_exception_common(debug_info_t* id, int level, - const void* data, int length); +debug_entry_t *debug_exception_common(debug_info_t *id, int level, + const void *data, int length); /* Debug Feature API: */ debug_info_t *debug_register(const char *name, int pages, int nr_areas, - int buf_size); + int buf_size); debug_info_t *debug_register_mode(const char *name, int pages, int nr_areas, int buf_size, umode_t mode, uid_t uid, gid_t gid); -void debug_unregister(debug_info_t* id); +void debug_unregister(debug_info_t *id); -void debug_set_level(debug_info_t* id, int new_level); +void debug_set_level(debug_info_t *id, int new_level); void debug_set_critical(void); void debug_stop_all(void); -static inline bool debug_level_enabled(debug_info_t* id, int level) +static inline bool debug_level_enabled(debug_info_t *id, int level) { return level <= id->level; } -static inline debug_entry_t* -debug_event(debug_info_t* id, int level, void* data, int length) +static inline debug_entry_t *debug_event(debug_info_t *id, int level, + void *data, int length) { if ((!id) || (level > id->level) || (id->pages_per_area == 0)) return NULL; - return debug_event_common(id,level,data,length); + return debug_event_common(id, level, data, length); } -static inline debug_entry_t* -debug_int_event(debug_info_t* id, int level, unsigned int tag) +static inline debug_entry_t *debug_int_event(debug_info_t *id, int level, + unsigned int tag) { - unsigned int t=tag; + unsigned int t = tag; + if ((!id) || (level > id->level) || (id->pages_per_area == 0)) return NULL; - return debug_event_common(id,level,&t,sizeof(unsigned int)); + return debug_event_common(id, level, &t, sizeof(unsigned int)); } -static inline debug_entry_t * -debug_long_event (debug_info_t* id, int level, unsigned long tag) +static inline debug_entry_t *debug_long_event(debug_info_t *id, int level, + unsigned long tag) { - unsigned long t=tag; + unsigned long t = tag; + if ((!id) || (level > id->level) || (id->pages_per_area == 0)) return NULL; - return debug_event_common(id,level,&t,sizeof(unsigned long)); + return debug_event_common(id, level, &t, sizeof(unsigned long)); } -static inline debug_entry_t* -debug_text_event(debug_info_t* id, int level, const char* txt) +static inline debug_entry_t *debug_text_event(debug_info_t *id, int level, + const char *txt) { if ((!id) || (level > id->level) || (id->pages_per_area == 0)) return NULL; - return debug_event_common(id,level,txt,strlen(txt)); + return debug_event_common(id, level, txt, strlen(txt)); } /* @@ -161,6 +163,7 @@ __debug_sprintf_event(debug_info_t *id, int level, char *string, ...) debug_entry_t *__ret; \ debug_info_t *__id = _id; \ int __level = _level; \ + \ if ((!__id) || (__level > __id->level)) \ __ret = NULL; \ else \ @@ -169,38 +172,40 @@ __debug_sprintf_event(debug_info_t *id, int level, char *string, ...) __ret; \ }) -static inline debug_entry_t* -debug_exception(debug_info_t* id, int level, void* data, int length) +static inline debug_entry_t *debug_exception(debug_info_t *id, int level, + void *data, int length) { if ((!id) || (level > id->level) || (id->pages_per_area == 0)) return NULL; - return debug_exception_common(id,level,data,length); + return debug_exception_common(id, level, data, length); } -static inline debug_entry_t* -debug_int_exception(debug_info_t* id, int level, unsigned int tag) +static inline debug_entry_t *debug_int_exception(debug_info_t *id, int level, + unsigned int tag) { - unsigned int t=tag; + unsigned int t = tag; + if ((!id) || (level > id->level) || (id->pages_per_area == 0)) return NULL; - return debug_exception_common(id,level,&t,sizeof(unsigned int)); + return debug_exception_common(id, level, &t, sizeof(unsigned int)); } -static inline debug_entry_t * -debug_long_exception (debug_info_t* id, int level, unsigned long tag) +static inline debug_entry_t *debug_long_exception (debug_info_t *id, int level, + unsigned long tag) { - unsigned long t=tag; + unsigned long t = tag; + if ((!id) || (level > id->level) || (id->pages_per_area == 0)) return NULL; - return debug_exception_common(id,level,&t,sizeof(unsigned long)); + return debug_exception_common(id, level, &t, sizeof(unsigned long)); } -static inline debug_entry_t* -debug_text_exception(debug_info_t* id, int level, const char* txt) +static inline debug_entry_t *debug_text_exception(debug_info_t *id, int level, + const char *txt) { if ((!id) || (level > id->level) || (id->pages_per_area == 0)) return NULL; - return debug_exception_common(id,level,txt,strlen(txt)); + return debug_exception_common(id, level, txt, strlen(txt)); } /* @@ -216,6 +221,7 @@ __debug_sprintf_exception(debug_info_t *id, int level, char *string, ...) debug_entry_t *__ret; \ debug_info_t *__id = _id; \ int __level = _level; \ + \ if ((!__id) || (__level > __id->level)) \ __ret = NULL; \ else \ @@ -224,13 +230,13 @@ __debug_sprintf_exception(debug_info_t *id, int level, char *string, ...) __ret; \ }) -int debug_register_view(debug_info_t* id, struct debug_view* view); -int debug_unregister_view(debug_info_t* id, struct debug_view* view); +int debug_register_view(debug_info_t *id, struct debug_view *view); +int debug_unregister_view(debug_info_t *id, struct debug_view *view); /* define the debug levels: - 0 No debugging output to console or syslog - - 1 Log internal errors to syslog, ignore check conditions + - 1 Log internal errors to syslog, ignore check conditions - 2 Log internal errors and check conditions to syslog - 3 Log internal errors to console, log check conditions to syslog - 4 Log internal errors and check conditions to console @@ -248,17 +254,17 @@ int debug_unregister_view(debug_info_t* id, struct debug_view* view); #define INTERNAL_DEBMSG(x,y...) "D" __FILE__ "%d: " x, __LINE__, y #if DEBUG_LEVEL > 0 -#define PRINT_DEBUG(x...) printk ( KERN_DEBUG PRINTK_HEADER x ) -#define PRINT_INFO(x...) printk ( KERN_INFO PRINTK_HEADER x ) -#define PRINT_WARN(x...) printk ( KERN_WARNING PRINTK_HEADER x ) -#define PRINT_ERR(x...) printk ( KERN_ERR PRINTK_HEADER x ) -#define PRINT_FATAL(x...) panic ( PRINTK_HEADER x ) +#define PRINT_DEBUG(x...) printk(KERN_DEBUG PRINTK_HEADER x) +#define PRINT_INFO(x...) printk(KERN_INFO PRINTK_HEADER x) +#define PRINT_WARN(x...) printk(KERN_WARNING PRINTK_HEADER x) +#define PRINT_ERR(x...) printk(KERN_ERR PRINTK_HEADER x) +#define PRINT_FATAL(x...) panic(PRINTK_HEADER x) #else -#define PRINT_DEBUG(x...) printk ( KERN_DEBUG PRINTK_HEADER x ) -#define PRINT_INFO(x...) printk ( KERN_DEBUG PRINTK_HEADER x ) -#define PRINT_WARN(x...) printk ( KERN_DEBUG PRINTK_HEADER x ) -#define PRINT_ERR(x...) printk ( KERN_DEBUG PRINTK_HEADER x ) -#define PRINT_FATAL(x...) printk ( KERN_DEBUG PRINTK_HEADER x ) -#endif /* DASD_DEBUG */ - -#endif /* DEBUG_H */ +#define PRINT_DEBUG(x...) printk(KERN_DEBUG PRINTK_HEADER x) +#define PRINT_INFO(x...) printk(KERN_DEBUG PRINTK_HEADER x) +#define PRINT_WARN(x...) printk(KERN_DEBUG PRINTK_HEADER x) +#define PRINT_ERR(x...) printk(KERN_DEBUG PRINTK_HEADER x) +#define PRINT_FATAL(x...) printk(KERN_DEBUG PRINTK_HEADER x) +#endif /* DASD_DEBUG */ + +#endif /* DEBUG_H */ diff --git a/arch/s390/include/asm/dis.h b/arch/s390/include/asm/dis.h index 78d1b2d725b9..b0480c60a8e1 100644 --- a/arch/s390/include/asm/dis.h +++ b/arch/s390/include/asm/dis.h @@ -9,32 +9,7 @@ #ifndef __ASM_S390_DIS_H__ #define __ASM_S390_DIS_H__ -/* Type of operand */ -#define OPERAND_GPR 0x1 /* Operand printed as %rx */ -#define OPERAND_FPR 0x2 /* Operand printed as %fx */ -#define OPERAND_AR 0x4 /* Operand printed as %ax */ -#define OPERAND_CR 0x8 /* Operand printed as %cx */ -#define OPERAND_VR 0x10 /* Operand printed as %vx */ -#define OPERAND_DISP 0x20 /* Operand printed as displacement */ -#define OPERAND_BASE 0x40 /* Operand printed as base register */ -#define OPERAND_INDEX 0x80 /* Operand printed as index register */ -#define OPERAND_PCREL 0x100 /* Operand printed as pc-relative symbol */ -#define OPERAND_SIGNED 0x200 /* Operand printed as signed value */ -#define OPERAND_LENGTH 0x400 /* Operand printed as length (+1) */ - - -struct s390_operand { - int bits; /* The number of bits in the operand. */ - int shift; /* The number of bits to shift. */ - int flags; /* One bit syntax flags. */ -}; - -struct s390_insn { - const char name[5]; - unsigned char opfrag; - unsigned char format; -}; - +#include <generated/dis.h> static inline int insn_length(unsigned char code) { @@ -45,7 +20,6 @@ struct pt_regs; void show_code(struct pt_regs *regs); void print_fn_code(unsigned char *code, unsigned long len); -int insn_to_mnemonic(unsigned char *instruction, char *buf, unsigned int len); struct s390_insn *find_insn(unsigned char *code); static inline int is_known_insn(unsigned char *code) diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h index 5a8d92758a58..186c7b5f5511 100644 --- a/arch/s390/include/asm/ipl.h +++ b/arch/s390/include/asm/ipl.h @@ -13,6 +13,8 @@ #include <asm/cio.h> #include <asm/setup.h> +#define NSS_NAME_SIZE 8 + #define IPL_PARMBLOCK_ORIGIN 0x2000 #define IPL_PARM_BLK_FCP_LEN (sizeof(struct ipl_list_hdr) + \ @@ -106,7 +108,6 @@ extern size_t append_ipl_scpdata(char *, size_t); enum { IPL_DEVNO_VALID = 1, IPL_PARMBLOCK_VALID = 2, - IPL_NSS_VALID = 4, }; enum ipl_type { diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h index 28792ef82c83..921391f2341e 100644 --- a/arch/s390/include/asm/kprobes.h +++ b/arch/s390/include/asm/kprobes.h @@ -63,8 +63,6 @@ typedef u16 kprobe_opcode_t; #define kretprobe_blacklist_size 0 -#define KPROBE_SWAP_INST 0x10 - /* Architecture specific copy of original instruction */ struct arch_specific_insn { /* copy of original instruction */ diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 51375e766e90..fd006a272024 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -736,7 +736,6 @@ struct kvm_arch{ wait_queue_head_t ipte_wq; int ipte_lock_count; struct mutex ipte_mutex; - struct ratelimit_state sthyi_limit; spinlock_t start_stop_lock; struct sie_page2 *sie_page2; struct kvm_s390_cpu_model model; diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 917f7344cab6..9eb36a1592c7 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -134,8 +134,9 @@ struct lowcore { __u8 pad_0x03b4[0x03b8-0x03b4]; /* 0x03b4 */ __u64 gmap; /* 0x03b8 */ __u32 spinlock_lockval; /* 0x03c0 */ - __u32 fpu_flags; /* 0x03c4 */ - __u8 pad_0x03c8[0x0400-0x03c8]; /* 0x03c8 */ + __u32 spinlock_index; /* 0x03c4 */ + __u32 fpu_flags; /* 0x03c8 */ + __u8 pad_0x03cc[0x0400-0x03cc]; /* 0x03cc */ /* Per cpu primary space access list */ __u32 paste[16]; /* 0x0400 */ diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h index c8a7beadd3d4..1e5dc4537bf2 100644 --- a/arch/s390/include/asm/nmi.h +++ b/arch/s390/include/asm/nmi.h @@ -26,12 +26,9 @@ #define MCCK_CODE_CPU_TIMER_VALID _BITUL(63 - 46) #define MCCK_CODE_PSW_MWP_VALID _BITUL(63 - 20) #define MCCK_CODE_PSW_IA_VALID _BITUL(63 - 23) - -#define MCCK_CR14_CR_PENDING_SUB_MASK (1 << 28) -#define MCCK_CR14_RECOVERY_SUB_MASK (1 << 27) -#define MCCK_CR14_DEGRAD_SUB_MASK (1 << 26) -#define MCCK_CR14_EXT_DAMAGE_SUB_MASK (1 << 25) -#define MCCK_CR14_WARN_SUB_MASK (1 << 24) +#define MCCK_CODE_CR_VALID _BITUL(63 - 29) +#define MCCK_CODE_GS_VALID _BITUL(63 - 36) +#define MCCK_CODE_FC_VALID _BITUL(63 - 43) #ifndef __ASSEMBLY__ @@ -87,6 +84,8 @@ union mci { #define MCESA_ORIGIN_MASK (~0x3ffUL) #define MCESA_LC_MASK (0xfUL) +#define MCESA_MIN_SIZE (1024) +#define MCESA_MAX_SIZE (2048) struct mcesa { u8 vector_save_area[1024]; @@ -95,8 +94,12 @@ struct mcesa { struct pt_regs; -extern void s390_handle_mcck(void); -extern void s390_do_machine_check(struct pt_regs *regs); +void nmi_alloc_boot_cpu(struct lowcore *lc); +int nmi_alloc_per_cpu(struct lowcore *lc); +void nmi_free_per_cpu(struct lowcore *lc); + +void s390_handle_mcck(void); +void s390_do_machine_check(struct pt_regs *regs); #endif /* __ASSEMBLY__ */ #endif /* _ASM_S390_NMI_H */ diff --git a/arch/s390/include/asm/pci_debug.h b/arch/s390/include/asm/pci_debug.h index 6c2c38060f8b..5dfe47588277 100644 --- a/arch/s390/include/asm/pci_debug.h +++ b/arch/s390/include/asm/pci_debug.h @@ -19,11 +19,7 @@ extern debug_info_t *pci_debug_err_id; static inline void zpci_err_hex(void *addr, int len) { - while (len > 0) { - debug_event(pci_debug_err_id, 0, (void *) addr, len); - len -= pci_debug_err_id->buf_size; - addr += pci_debug_err_id->buf_size; - } + debug_event(pci_debug_err_id, 0, addr, len); } #endif diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h index 419e83fa4721..ba22a6ea51a1 100644 --- a/arch/s390/include/asm/pci_insn.h +++ b/arch/s390/include/asm/pci_insn.h @@ -82,6 +82,6 @@ int zpci_refresh_trans(u64 fn, u64 addr, u64 range); int zpci_load(u64 *data, u64 req, u64 offset); int zpci_store(u64 data, u64 req, u64 offset); int zpci_store_block(const u64 *data, u64 req, u64 offset); -void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc); +int zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc); #endif diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index bbe99cb8219d..c7b4333d1de0 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -13,6 +13,7 @@ #define _S390_PGALLOC_H #include <linux/threads.h> +#include <linux/string.h> #include <linux/gfp.h> #include <linux/mm.h> @@ -28,24 +29,9 @@ void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long); void page_table_free_pgste(struct page *page); extern int page_table_allocate_pgste; -static inline void clear_table(unsigned long *s, unsigned long val, size_t n) -{ - struct addrtype { char _[256]; }; - int i; - - for (i = 0; i < n; i += 256) { - *s = val; - asm volatile( - "mvc 8(248,%[s]),0(%[s])\n" - : "+m" (*(struct addrtype *) s) - : [s] "a" (s)); - s += 256 / sizeof(long); - } -} - static inline void crst_table_init(unsigned long *crst, unsigned long entry) { - clear_table(crst, entry, _CRST_TABLE_SIZE); + memset64((u64 *)crst, entry, _CRST_ENTRIES); } static inline unsigned long pgd_entry_type(struct mm_struct *mm) diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 9cf92abe23c3..f25bfe888933 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -22,6 +22,7 @@ #define CIF_IGNORE_IRQ 5 /* ignore interrupt (for udelay) */ #define CIF_ENABLED_WAIT 6 /* in enabled wait state */ #define CIF_MCCK_GUEST 7 /* machine check happening in guest */ +#define CIF_DEDICATED_CPU 8 /* this CPU is dedicated */ #define _CIF_MCCK_PENDING _BITUL(CIF_MCCK_PENDING) #define _CIF_ASCE_PRIMARY _BITUL(CIF_ASCE_PRIMARY) @@ -31,6 +32,7 @@ #define _CIF_IGNORE_IRQ _BITUL(CIF_IGNORE_IRQ) #define _CIF_ENABLED_WAIT _BITUL(CIF_ENABLED_WAIT) #define _CIF_MCCK_GUEST _BITUL(CIF_MCCK_GUEST) +#define _CIF_DEDICATED_CPU _BITUL(CIF_DEDICATED_CPU) #ifndef __ASSEMBLY__ @@ -219,10 +221,10 @@ void show_registers(struct pt_regs *regs); void show_cacheinfo(struct seq_file *m); /* Free all resources held by a thread. */ -extern void release_thread(struct task_struct *); +static inline void release_thread(struct task_struct *tsk) { } -/* Free guarded storage control block for current */ -void exit_thread_gs(void); +/* Free guarded storage control block */ +void guarded_storage_release(struct task_struct *tsk); unsigned long get_wchan(struct task_struct *p); #define task_pt_regs(tsk) ((struct pt_regs *) \ diff --git a/arch/s390/include/asm/runtime_instr.h b/arch/s390/include/asm/runtime_instr.h index ea8896ba5afc..6b1540337ed6 100644 --- a/arch/s390/include/asm/runtime_instr.h +++ b/arch/s390/include/asm/runtime_instr.h @@ -6,55 +6,55 @@ #define S390_RUNTIME_INSTR_STOP 0x2 struct runtime_instr_cb { - __u64 buf_current; - __u64 buf_origin; - __u64 buf_limit; + __u64 rca; + __u64 roa; + __u64 rla; - __u32 valid : 1; - __u32 pstate : 1; - __u32 pstate_set_buf : 1; - __u32 home_space : 1; - __u32 altered : 1; - __u32 : 3; - __u32 pstate_sample : 1; - __u32 sstate_sample : 1; - __u32 pstate_collect : 1; - __u32 sstate_collect : 1; - __u32 : 1; - __u32 halted_int : 1; - __u32 int_requested : 1; - __u32 buffer_full_int : 1; + __u32 v : 1; + __u32 s : 1; + __u32 k : 1; + __u32 h : 1; + __u32 a : 1; + __u32 reserved1 : 3; + __u32 ps : 1; + __u32 qs : 1; + __u32 pc : 1; + __u32 qc : 1; + __u32 reserved2 : 1; + __u32 g : 1; + __u32 u : 1; + __u32 l : 1; __u32 key : 4; - __u32 : 9; + __u32 reserved3 : 8; + __u32 t : 1; __u32 rgs : 3; - __u32 mode : 4; - __u32 next : 1; + __u32 m : 4; + __u32 n : 1; __u32 mae : 1; - __u32 : 2; - __u32 call_type_br : 1; - __u32 return_type_br : 1; - __u32 other_type_br : 1; - __u32 bc_other_type : 1; - __u32 emit : 1; - __u32 tx_abort : 1; - __u32 : 2; - __u32 bp_xn : 1; - __u32 bp_xt : 1; - __u32 bp_ti : 1; - __u32 bp_ni : 1; - __u32 suppr_y : 1; - __u32 suppr_z : 1; + __u32 reserved4 : 2; + __u32 c : 1; + __u32 r : 1; + __u32 b : 1; + __u32 j : 1; + __u32 e : 1; + __u32 x : 1; + __u32 reserved5 : 2; + __u32 bpxn : 1; + __u32 bpxt : 1; + __u32 bpti : 1; + __u32 bpni : 1; + __u32 reserved6 : 2; - __u32 dc_miss_extra : 1; - __u32 lat_lev_ignore : 1; - __u32 ic_lat_lev : 4; - __u32 dc_lat_lev : 4; + __u32 d : 1; + __u32 f : 1; + __u32 ic : 4; + __u32 dc : 4; - __u64 reserved1; - __u64 scaling_factor; + __u64 reserved7; + __u64 sf; __u64 rsic; - __u64 reserved2; + __u64 reserved8; } __packed __aligned(8); extern struct runtime_instr_cb runtime_instr_empty_cb; @@ -86,6 +86,8 @@ static inline void restore_ri_cb(struct runtime_instr_cb *cb_next, load_runtime_instr_cb(&runtime_instr_empty_cb); } -void exit_thread_runtime_instr(void); +struct task_struct; + +void runtime_instr_release(struct task_struct *tsk); #endif /* _RUNTIME_INSTR_H */ diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h deleted file mode 100644 index f731b7b518bd..000000000000 --- a/arch/s390/include/asm/rwsem.h +++ /dev/null @@ -1,211 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _S390_RWSEM_H -#define _S390_RWSEM_H - -/* - * S390 version - * Copyright IBM Corp. 2002 - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * Based on asm-alpha/semaphore.h and asm-i386/rwsem.h - */ - -/* - * - * The MSW of the count is the negated number of active writers and waiting - * lockers, and the LSW is the total number of active locks - * - * The lock count is initialized to 0 (no active and no waiting lockers). - * - * When a writer subtracts WRITE_BIAS, it'll get 0xffff0001 for the case of an - * uncontended lock. This can be determined because XADD returns the old value. - * Readers increment by 1 and see a positive value when uncontended, negative - * if there are writers (and maybe) readers waiting (in which case it goes to - * sleep). - * - * The value of WAITING_BIAS supports up to 32766 waiting processes. This can - * be extended to 65534 by manually checking the whole MSW rather than relying - * on the S flag. - * - * The value of ACTIVE_BIAS supports up to 65535 active processes. - * - * This should be totally fair - if anything is waiting, a process that wants a - * lock will go to the back of the queue. When the currently active lock is - * released, if there's a writer at the front of the queue, then that and only - * that will be woken up; if there's a bunch of consecutive readers at the - * front, then they'll all be woken up, but no other readers will be. - */ - -#ifndef _LINUX_RWSEM_H -#error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead" -#endif - -#define RWSEM_UNLOCKED_VALUE 0x0000000000000000L -#define RWSEM_ACTIVE_BIAS 0x0000000000000001L -#define RWSEM_ACTIVE_MASK 0x00000000ffffffffL -#define RWSEM_WAITING_BIAS (-0x0000000100000000L) -#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS -#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) - -/* - * lock for reading - */ -static inline void __down_read(struct rw_semaphore *sem) -{ - signed long old, new; - - asm volatile( - " lg %0,%2\n" - "0: lgr %1,%0\n" - " aghi %1,%4\n" - " csg %0,%1,%2\n" - " jl 0b" - : "=&d" (old), "=&d" (new), "=Q" (sem->count) - : "Q" (sem->count), "i" (RWSEM_ACTIVE_READ_BIAS) - : "cc", "memory"); - if (old < 0) - rwsem_down_read_failed(sem); -} - -/* - * trylock for reading -- returns 1 if successful, 0 if contention - */ -static inline int __down_read_trylock(struct rw_semaphore *sem) -{ - signed long old, new; - - asm volatile( - " lg %0,%2\n" - "0: ltgr %1,%0\n" - " jm 1f\n" - " aghi %1,%4\n" - " csg %0,%1,%2\n" - " jl 0b\n" - "1:" - : "=&d" (old), "=&d" (new), "=Q" (sem->count) - : "Q" (sem->count), "i" (RWSEM_ACTIVE_READ_BIAS) - : "cc", "memory"); - return old >= 0 ? 1 : 0; -} - -/* - * lock for writing - */ -static inline long ___down_write(struct rw_semaphore *sem) -{ - signed long old, new, tmp; - - tmp = RWSEM_ACTIVE_WRITE_BIAS; - asm volatile( - " lg %0,%2\n" - "0: lgr %1,%0\n" - " ag %1,%4\n" - " csg %0,%1,%2\n" - " jl 0b" - : "=&d" (old), "=&d" (new), "=Q" (sem->count) - : "Q" (sem->count), "m" (tmp) - : "cc", "memory"); - - return old; -} - -static inline void __down_write(struct rw_semaphore *sem) -{ - if (___down_write(sem)) - rwsem_down_write_failed(sem); -} - -static inline int __down_write_killable(struct rw_semaphore *sem) -{ - if (___down_write(sem)) - if (IS_ERR(rwsem_down_write_failed_killable(sem))) - return -EINTR; - - return 0; -} - -/* - * trylock for writing -- returns 1 if successful, 0 if contention - */ -static inline int __down_write_trylock(struct rw_semaphore *sem) -{ - signed long old; - - asm volatile( - " lg %0,%1\n" - "0: ltgr %0,%0\n" - " jnz 1f\n" - " csg %0,%3,%1\n" - " jl 0b\n" - "1:" - : "=&d" (old), "=Q" (sem->count) - : "Q" (sem->count), "d" (RWSEM_ACTIVE_WRITE_BIAS) - : "cc", "memory"); - return (old == RWSEM_UNLOCKED_VALUE) ? 1 : 0; -} - -/* - * unlock after reading - */ -static inline void __up_read(struct rw_semaphore *sem) -{ - signed long old, new; - - asm volatile( - " lg %0,%2\n" - "0: lgr %1,%0\n" - " aghi %1,%4\n" - " csg %0,%1,%2\n" - " jl 0b" - : "=&d" (old), "=&d" (new), "=Q" (sem->count) - : "Q" (sem->count), "i" (-RWSEM_ACTIVE_READ_BIAS) - : "cc", "memory"); - if (new < 0) - if ((new & RWSEM_ACTIVE_MASK) == 0) - rwsem_wake(sem); -} - -/* - * unlock after writing - */ -static inline void __up_write(struct rw_semaphore *sem) -{ - signed long old, new, tmp; - - tmp = -RWSEM_ACTIVE_WRITE_BIAS; - asm volatile( - " lg %0,%2\n" - "0: lgr %1,%0\n" - " ag %1,%4\n" - " csg %0,%1,%2\n" - " jl 0b" - : "=&d" (old), "=&d" (new), "=Q" (sem->count) - : "Q" (sem->count), "m" (tmp) - : "cc", "memory"); - if (new < 0) - if ((new & RWSEM_ACTIVE_MASK) == 0) - rwsem_wake(sem); -} - -/* - * downgrade write lock to read lock - */ -static inline void __downgrade_write(struct rw_semaphore *sem) -{ - signed long old, new, tmp; - - tmp = -RWSEM_WAITING_BIAS; - asm volatile( - " lg %0,%2\n" - "0: lgr %1,%0\n" - " ag %1,%4\n" - " csg %0,%1,%2\n" - " jl 0b" - : "=&d" (old), "=&d" (new), "=Q" (sem->count) - : "Q" (sem->count), "m" (tmp) - : "cc", "memory"); - if (new > 1) - rwsem_downgrade_wake(sem); -} - -#endif /* _S390_RWSEM_H */ diff --git a/arch/s390/include/asm/sections.h b/arch/s390/include/asm/sections.h index 0ac3e8166e85..54f81f8ed662 100644 --- a/arch/s390/include/asm/sections.h +++ b/arch/s390/include/asm/sections.h @@ -4,6 +4,6 @@ #include <asm-generic/sections.h> -extern char _eshared[], _ehead[]; +extern char _ehead[]; #endif diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index f2c2b7cd9099..8bc87dcb10eb 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -98,9 +98,6 @@ extern char vmpoff_cmd[]; #define SET_CONSOLE_VT220 do { console_mode = 4; } while (0) #define SET_CONSOLE_HVC do { console_mode = 5; } while (0) -#define NSS_NAME_SIZE 8 -extern char kernel_nss_name[]; - #ifdef CONFIG_PFAULT extern int pfault_init(void); extern void pfault_fini(void); diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index babe83ed416c..3907ead27ffa 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -28,6 +28,7 @@ extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); extern void smp_call_online_cpu(void (*func)(void *), void *); extern void smp_call_ipl_cpu(void (*func)(void *), void *); +extern void smp_emergency_stop(void); extern int smp_find_processor_id(u16 address); extern int smp_store_status(int cpu); @@ -53,6 +54,10 @@ static inline void smp_call_online_cpu(void (*func)(void *), void *data) func(data); } +static inline void smp_emergency_stop(void) +{ +} + static inline int smp_find_processor_id(u16 address) { return 0; } static inline int smp_store_status(int cpu) { return 0; } static inline int smp_vcpu_scheduled(int cpu) { return 1; } diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index f3f5e0155b10..0a29588aa00b 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -14,6 +14,7 @@ #include <asm/atomic_ops.h> #include <asm/barrier.h> #include <asm/processor.h> +#include <asm/alternative.h> #define SPINLOCK_LOCKVAL (S390_lowcore.spinlock_lockval) @@ -36,20 +37,16 @@ bool arch_vcpu_is_preempted(int cpu); * (the type definitions are in asm/spinlock_types.h) */ -void arch_lock_relax(int cpu); +void arch_spin_relax(arch_spinlock_t *lock); +#define arch_spin_relax arch_spin_relax void arch_spin_lock_wait(arch_spinlock_t *); int arch_spin_trylock_retry(arch_spinlock_t *); -void arch_spin_lock_wait_flags(arch_spinlock_t *, unsigned long flags); - -static inline void arch_spin_relax(arch_spinlock_t *lock) -{ - arch_lock_relax(lock->lock); -} +void arch_spin_lock_setup(int cpu); static inline u32 arch_spin_lockval(int cpu) { - return ~cpu; + return cpu + 1; } static inline int arch_spin_value_unlocked(arch_spinlock_t lock) @@ -65,8 +62,7 @@ static inline int arch_spin_is_locked(arch_spinlock_t *lp) static inline int arch_spin_trylock_once(arch_spinlock_t *lp) { barrier(); - return likely(arch_spin_value_unlocked(*lp) && - __atomic_cmpxchg_bool(&lp->lock, 0, SPINLOCK_LOCKVAL)); + return likely(__atomic_cmpxchg_bool(&lp->lock, 0, SPINLOCK_LOCKVAL)); } static inline void arch_spin_lock(arch_spinlock_t *lp) @@ -79,8 +75,9 @@ static inline void arch_spin_lock_flags(arch_spinlock_t *lp, unsigned long flags) { if (!arch_spin_trylock_once(lp)) - arch_spin_lock_wait_flags(lp, flags); + arch_spin_lock_wait(lp); } +#define arch_spin_lock_flags arch_spin_lock_flags static inline int arch_spin_trylock(arch_spinlock_t *lp) { @@ -93,11 +90,10 @@ static inline void arch_spin_unlock(arch_spinlock_t *lp) { typecheck(int, lp->lock); asm volatile( -#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES - " .long 0xb2fa0070\n" /* NIAI 7 */ -#endif - " st %1,%0\n" - : "=Q" (lp->lock) : "d" (0) : "cc", "memory"); + ALTERNATIVE("", ".long 0xb2fa0070", 49) /* NIAI 7 */ + " sth %1,%0\n" + : "=Q" (((unsigned short *) &lp->lock)[1]) + : "d" (0) : "cc", "memory"); } /* @@ -111,168 +107,53 @@ static inline void arch_spin_unlock(arch_spinlock_t *lp) * read-locks. */ -/** - * read_can_lock - would read_trylock() succeed? - * @lock: the rwlock in question. - */ -#define arch_read_can_lock(x) ((int)(x)->lock >= 0) - -/** - * write_can_lock - would write_trylock() succeed? - * @lock: the rwlock in question. - */ -#define arch_write_can_lock(x) ((x)->lock == 0) - -extern int _raw_read_trylock_retry(arch_rwlock_t *lp); -extern int _raw_write_trylock_retry(arch_rwlock_t *lp); - -#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) -#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) - -static inline int arch_read_trylock_once(arch_rwlock_t *rw) -{ - int old = ACCESS_ONCE(rw->lock); - return likely(old >= 0 && - __atomic_cmpxchg_bool(&rw->lock, old, old + 1)); -} - -static inline int arch_write_trylock_once(arch_rwlock_t *rw) -{ - int old = ACCESS_ONCE(rw->lock); - return likely(old == 0 && - __atomic_cmpxchg_bool(&rw->lock, 0, 0x80000000)); -} - -#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES - -#define __RAW_OP_OR "lao" -#define __RAW_OP_AND "lan" -#define __RAW_OP_ADD "laa" - -#define __RAW_LOCK(ptr, op_val, op_string) \ -({ \ - int old_val; \ - \ - typecheck(int *, ptr); \ - asm volatile( \ - op_string " %0,%2,%1\n" \ - "bcr 14,0\n" \ - : "=d" (old_val), "+Q" (*ptr) \ - : "d" (op_val) \ - : "cc", "memory"); \ - old_val; \ -}) - -#define __RAW_UNLOCK(ptr, op_val, op_string) \ -({ \ - int old_val; \ - \ - typecheck(int *, ptr); \ - asm volatile( \ - op_string " %0,%2,%1\n" \ - : "=d" (old_val), "+Q" (*ptr) \ - : "d" (op_val) \ - : "cc", "memory"); \ - old_val; \ -}) +#define arch_read_relax(rw) barrier() +#define arch_write_relax(rw) barrier() -extern void _raw_read_lock_wait(arch_rwlock_t *lp); -extern void _raw_write_lock_wait(arch_rwlock_t *lp, int prev); +void arch_read_lock_wait(arch_rwlock_t *lp); +void arch_write_lock_wait(arch_rwlock_t *lp); static inline void arch_read_lock(arch_rwlock_t *rw) { int old; - old = __RAW_LOCK(&rw->lock, 1, __RAW_OP_ADD); - if (old < 0) - _raw_read_lock_wait(rw); + old = __atomic_add(1, &rw->cnts); + if (old & 0xffff0000) + arch_read_lock_wait(rw); } static inline void arch_read_unlock(arch_rwlock_t *rw) { - __RAW_UNLOCK(&rw->lock, -1, __RAW_OP_ADD); + __atomic_add_const_barrier(-1, &rw->cnts); } static inline void arch_write_lock(arch_rwlock_t *rw) { - int old; - - old = __RAW_LOCK(&rw->lock, 0x80000000, __RAW_OP_OR); - if (old != 0) - _raw_write_lock_wait(rw, old); - rw->owner = SPINLOCK_LOCKVAL; + if (!__atomic_cmpxchg_bool(&rw->cnts, 0, 0x30000)) + arch_write_lock_wait(rw); } static inline void arch_write_unlock(arch_rwlock_t *rw) { - rw->owner = 0; - __RAW_UNLOCK(&rw->lock, 0x7fffffff, __RAW_OP_AND); + __atomic_add_barrier(-0x30000, &rw->cnts); } -#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */ - -extern void _raw_read_lock_wait(arch_rwlock_t *lp); -extern void _raw_write_lock_wait(arch_rwlock_t *lp); - -static inline void arch_read_lock(arch_rwlock_t *rw) -{ - if (!arch_read_trylock_once(rw)) - _raw_read_lock_wait(rw); -} -static inline void arch_read_unlock(arch_rwlock_t *rw) +static inline int arch_read_trylock(arch_rwlock_t *rw) { int old; - do { - old = ACCESS_ONCE(rw->lock); - } while (!__atomic_cmpxchg_bool(&rw->lock, old, old - 1)); -} - -static inline void arch_write_lock(arch_rwlock_t *rw) -{ - if (!arch_write_trylock_once(rw)) - _raw_write_lock_wait(rw); - rw->owner = SPINLOCK_LOCKVAL; -} - -static inline void arch_write_unlock(arch_rwlock_t *rw) -{ - typecheck(int, rw->lock); - - rw->owner = 0; - asm volatile( - "st %1,%0\n" - : "+Q" (rw->lock) - : "d" (0) - : "cc", "memory"); -} - -#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */ - -static inline int arch_read_trylock(arch_rwlock_t *rw) -{ - if (!arch_read_trylock_once(rw)) - return _raw_read_trylock_retry(rw); - return 1; + old = READ_ONCE(rw->cnts); + return (!(old & 0xffff0000) && + __atomic_cmpxchg_bool(&rw->cnts, old, old + 1)); } static inline int arch_write_trylock(arch_rwlock_t *rw) { - if (!arch_write_trylock_once(rw) && !_raw_write_trylock_retry(rw)) - return 0; - rw->owner = SPINLOCK_LOCKVAL; - return 1; -} - -static inline void arch_read_relax(arch_rwlock_t *rw) -{ - arch_lock_relax(rw->owner); -} + int old; -static inline void arch_write_relax(arch_rwlock_t *rw) -{ - arch_lock_relax(rw->owner); + old = READ_ONCE(rw->cnts); + return !old && __atomic_cmpxchg_bool(&rw->cnts, 0, 0x30000); } #endif /* __ASM_SPINLOCK_H */ diff --git a/arch/s390/include/asm/spinlock_types.h b/arch/s390/include/asm/spinlock_types.h index 1861a0c5dd47..cfed272e4fd5 100644 --- a/arch/s390/include/asm/spinlock_types.h +++ b/arch/s390/include/asm/spinlock_types.h @@ -13,8 +13,8 @@ typedef struct { #define __ARCH_SPIN_LOCK_UNLOCKED { .lock = 0, } typedef struct { - int lock; - int owner; + int cnts; + arch_spinlock_t wait; } arch_rwlock_t; #define __ARCH_RW_LOCK_UNLOCKED { 0 } diff --git a/arch/s390/include/asm/string.h b/arch/s390/include/asm/string.h index 27ce494198f5..50f26fc9acb2 100644 --- a/arch/s390/include/asm/string.h +++ b/arch/s390/include/asm/string.h @@ -18,6 +18,9 @@ #define __HAVE_ARCH_MEMMOVE /* gcc builtin & arch function */ #define __HAVE_ARCH_MEMSCAN /* inline & arch function */ #define __HAVE_ARCH_MEMSET /* gcc builtin & arch function */ +#define __HAVE_ARCH_MEMSET16 /* arch function */ +#define __HAVE_ARCH_MEMSET32 /* arch function */ +#define __HAVE_ARCH_MEMSET64 /* arch function */ #define __HAVE_ARCH_STRCAT /* inline & arch function */ #define __HAVE_ARCH_STRCMP /* arch function */ #define __HAVE_ARCH_STRCPY /* inline & arch function */ @@ -31,17 +34,17 @@ #define __HAVE_ARCH_STRSTR /* arch function */ /* Prototypes for non-inlined arch strings functions. */ -extern int memcmp(const void *, const void *, size_t); -extern void *memcpy(void *, const void *, size_t); -extern void *memset(void *, int, size_t); -extern void *memmove(void *, const void *, size_t); -extern int strcmp(const char *,const char *); -extern size_t strlcat(char *, const char *, size_t); -extern size_t strlcpy(char *, const char *, size_t); -extern char *strncat(char *, const char *, size_t); -extern char *strncpy(char *, const char *, size_t); -extern char *strrchr(const char *, int); -extern char *strstr(const char *, const char *); +int memcmp(const void *s1, const void *s2, size_t n); +void *memcpy(void *dest, const void *src, size_t n); +void *memset(void *s, int c, size_t n); +void *memmove(void *dest, const void *src, size_t n); +int strcmp(const char *s1, const char *s2); +size_t strlcat(char *dest, const char *src, size_t n); +size_t strlcpy(char *dest, const char *src, size_t size); +char *strncat(char *dest, const char *src, size_t n); +char *strncpy(char *dest, const char *src, size_t n); +char *strrchr(const char *s, int c); +char *strstr(const char *s1, const char *s2); #undef __HAVE_ARCH_STRCHR #undef __HAVE_ARCH_STRNCHR @@ -50,7 +53,26 @@ extern char *strstr(const char *, const char *); #undef __HAVE_ARCH_STRSEP #undef __HAVE_ARCH_STRSPN -#if !defined(IN_ARCH_STRING_C) +void *__memset16(uint16_t *s, uint16_t v, size_t count); +void *__memset32(uint32_t *s, uint32_t v, size_t count); +void *__memset64(uint64_t *s, uint64_t v, size_t count); + +static inline void *memset16(uint16_t *s, uint16_t v, size_t count) +{ + return __memset16(s, v, count * sizeof(v)); +} + +static inline void *memset32(uint32_t *s, uint32_t v, size_t count) +{ + return __memset32(s, v, count * sizeof(v)); +} + +static inline void *memset64(uint64_t *s, uint64_t v, size_t count) +{ + return __memset64(s, v, count * sizeof(v)); +} + +#if !defined(IN_ARCH_STRING_C) && (!defined(CONFIG_FORTIFY_SOURCE) || defined(__NO_FORTIFY)) static inline void *memchr(const void * s, int c, size_t n) { diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h index c21fe1d57c00..ec7b476c1ac5 100644 --- a/arch/s390/include/asm/switch_to.h +++ b/arch/s390/include/asm/switch_to.h @@ -37,8 +37,8 @@ static inline void restore_access_regs(unsigned int *acrs) save_ri_cb(prev->thread.ri_cb); \ save_gs_cb(prev->thread.gs_cb); \ } \ + update_cr_regs(next); \ if (next->mm) { \ - update_cr_regs(next); \ set_cpu_flag(CIF_FPU); \ restore_access_regs(&next->thread.acrs[0]); \ restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \ diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h index 2b498e58b914..a702cb9d4269 100644 --- a/arch/s390/include/asm/sysinfo.h +++ b/arch/s390/include/asm/sysinfo.h @@ -156,7 +156,8 @@ static inline unsigned char topology_mnest_limit(void) struct topology_core { unsigned char nl; unsigned char reserved0[3]; - unsigned char :6; + unsigned char :5; + unsigned char d:1; unsigned char pp:2; unsigned char reserved1; unsigned short origin; @@ -198,4 +199,5 @@ struct service_level { int register_service_level(struct service_level *); int unregister_service_level(struct service_level *); +int sthyi_fill(void *dst, u64 *rc); #endif /* __ASM_S390_SYSINFO_H */ diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index 55de4eb73604..1807229b292f 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -17,6 +17,7 @@ struct cpu_topology_s390 { unsigned short book_id; unsigned short drawer_id; unsigned short node_id; + unsigned short dedicated : 1; cpumask_t thread_mask; cpumask_t core_mask; cpumask_t book_mask; @@ -35,6 +36,7 @@ extern cpumask_t cpus_with_topology; #define topology_book_cpumask(cpu) (&cpu_topology[cpu].book_mask) #define topology_drawer_id(cpu) (cpu_topology[cpu].drawer_id) #define topology_drawer_cpumask(cpu) (&cpu_topology[cpu].drawer_mask) +#define topology_cpu_dedicated(cpu) (cpu_topology[cpu].dedicated) #define mc_capable() 1 diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index bb2ce72300b0..ae6261ef97d5 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -47,6 +47,7 @@ struct vdso_per_cpu_data { extern struct vdso_data *vdso_data; +void vdso_alloc_boot_cpu(struct lowcore *lowcore); int vdso_alloc_per_cpu(struct lowcore *lowcore); void vdso_free_per_cpu(struct lowcore *lowcore); diff --git a/arch/s390/include/uapi/asm/kvm_virtio.h b/arch/s390/include/uapi/asm/kvm_virtio.h deleted file mode 100644 index 73283677a132..000000000000 --- a/arch/s390/include/uapi/asm/kvm_virtio.h +++ /dev/null @@ -1,65 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * definition for virtio for kvm on s390 - * - * Copyright IBM Corp. 2008 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * - * Author(s): Christian Borntraeger <borntraeger@de.ibm.com> - */ - -#ifndef __KVM_S390_VIRTIO_H -#define __KVM_S390_VIRTIO_H - -#include <linux/types.h> - -struct kvm_device_desc { - /* The device type: console, network, disk etc. Type 0 terminates. */ - __u8 type; - /* The number of virtqueues (first in config array) */ - __u8 num_vq; - /* - * The number of bytes of feature bits. Multiply by 2: one for host - * features and one for guest acknowledgements. - */ - __u8 feature_len; - /* The number of bytes of the config array after virtqueues. */ - __u8 config_len; - /* A status byte, written by the Guest. */ - __u8 status; - __u8 config[0]; -}; - -/* - * This is how we expect the device configuration field for a virtqueue - * to be laid out in config space. - */ -struct kvm_vqconfig { - /* The token returned with an interrupt. Set by the guest */ - __u64 token; - /* The address of the virtio ring */ - __u64 address; - /* The number of entries in the virtio_ring */ - __u16 num; - -}; - -#define KVM_S390_VIRTIO_NOTIFY 0 -#define KVM_S390_VIRTIO_RESET 1 -#define KVM_S390_VIRTIO_SET_STATUS 2 - -/* The alignment to use between consumer and producer parts of vring. - * This is pagesize for historical reasons. */ -#define KVM_S390_VIRTIO_RING_ALIGN 4096 - - -/* These values are supposed to be in ext_params on an interrupt */ -#define VIRTIO_PARAM_MASK 0xff -#define VIRTIO_PARAM_VRING_INTERRUPT 0x0 -#define VIRTIO_PARAM_CONFIG_CHANGED 0x1 -#define VIRTIO_PARAM_DEV_ADD 0x2 - -#endif diff --git a/arch/s390/include/uapi/asm/sthyi.h b/arch/s390/include/uapi/asm/sthyi.h new file mode 100644 index 000000000000..ec113db4eb7e --- /dev/null +++ b/arch/s390/include/uapi/asm/sthyi.h @@ -0,0 +1,6 @@ +#ifndef _UAPI_ASM_STHYI_H +#define _UAPI_ASM_STHYI_H + +#define STHYI_FC_CP_IFL_CAP 0 + +#endif /* _UAPI_ASM_STHYI_H */ diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h index b52bce8ee941..725120939051 100644 --- a/arch/s390/include/uapi/asm/unistd.h +++ b/arch/s390/include/uapi/asm/unistd.h @@ -316,7 +316,8 @@ #define __NR_pwritev2 377 #define __NR_s390_guarded_storage 378 #define __NR_statx 379 -#define NR_syscalls 380 +#define __NR_s390_sthyi 380 +#define NR_syscalls 381 /* * There are some system calls that are not present on 64 bit, some diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 4ce2d05929a7..83bc82001c06 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -34,6 +34,8 @@ AFLAGS_REMOVE_head.o += $(CC_FLAGS_MARCH) AFLAGS_head.o += -march=z900 endif +CFLAGS_als.o += -D__NO_FORTIFY + # # Passing null pointers is ok for smp code, since we access the lowcore here. # @@ -56,7 +58,7 @@ obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o als.o obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o -obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o +obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o extra-y += head.o head64.o vmlinux.lds @@ -75,6 +77,7 @@ obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_UPROBES) += uprobes.o +obj-$(CONFIG_ALTERNATIVES) += alternative.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o perf_cpum_sf.o obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c new file mode 100644 index 000000000000..315986a06cf5 --- /dev/null +++ b/arch/s390/kernel/alternative.c @@ -0,0 +1,110 @@ +#include <linux/module.h> +#include <asm/alternative.h> +#include <asm/facility.h> + +#define MAX_PATCH_LEN (255 - 1) + +static int __initdata_or_module alt_instr_disabled; + +static int __init disable_alternative_instructions(char *str) +{ + alt_instr_disabled = 1; + return 0; +} + +early_param("noaltinstr", disable_alternative_instructions); + +struct brcl_insn { + u16 opc; + s32 disp; +} __packed; + +static u16 __initdata_or_module nop16 = 0x0700; +static u32 __initdata_or_module nop32 = 0x47000000; +static struct brcl_insn __initdata_or_module nop48 = { + 0xc004, 0 +}; + +static const void *nops[] __initdata_or_module = { + &nop16, + &nop32, + &nop48 +}; + +static void __init_or_module add_jump_padding(void *insns, unsigned int len) +{ + struct brcl_insn brcl = { + 0xc0f4, + len / 2 + }; + + memcpy(insns, &brcl, sizeof(brcl)); + insns += sizeof(brcl); + len -= sizeof(brcl); + + while (len > 0) { + memcpy(insns, &nop16, 2); + insns += 2; + len -= 2; + } +} + +static void __init_or_module add_padding(void *insns, unsigned int len) +{ + if (len > 6) + add_jump_padding(insns, len); + else if (len >= 2) + memcpy(insns, nops[len / 2 - 1], len); +} + +static void __init_or_module __apply_alternatives(struct alt_instr *start, + struct alt_instr *end) +{ + struct alt_instr *a; + u8 *instr, *replacement; + u8 insnbuf[MAX_PATCH_LEN]; + + /* + * The scan order should be from start to end. A later scanned + * alternative code can overwrite previously scanned alternative code. + */ + for (a = start; a < end; a++) { + int insnbuf_sz = 0; + + instr = (u8 *)&a->instr_offset + a->instr_offset; + replacement = (u8 *)&a->repl_offset + a->repl_offset; + + if (!test_facility(a->facility)) + continue; + + if (unlikely(a->instrlen % 2 || a->replacementlen % 2)) { + WARN_ONCE(1, "cpu alternatives instructions length is " + "odd, skipping patching\n"); + continue; + } + + memcpy(insnbuf, replacement, a->replacementlen); + insnbuf_sz = a->replacementlen; + + if (a->instrlen > a->replacementlen) { + add_padding(insnbuf + a->replacementlen, + a->instrlen - a->replacementlen); + insnbuf_sz += a->instrlen - a->replacementlen; + } + + s390_kernel_write(instr, insnbuf, insnbuf_sz); + } +} + +void __init_or_module apply_alternatives(struct alt_instr *start, + struct alt_instr *end) +{ + if (!alt_instr_disabled) + __apply_alternatives(start, end); +} + +extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; +void __init apply_alternative_instructions(void) +{ + apply_alternatives(__alt_instructions, __alt_instructions_end); +} diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 0e6d2b032484..33ec80df7ed4 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -14,6 +14,7 @@ #include <asm/vdso.h> #include <asm/pgtable.h> #include <asm/gmap.h> +#include <asm/nmi.h> /* * Make sure that the compiler is new enough. We want a compiler that @@ -159,6 +160,7 @@ int main(void) OFFSET(__LC_LAST_UPDATE_CLOCK, lowcore, last_update_clock); OFFSET(__LC_INT_CLOCK, lowcore, int_clock); OFFSET(__LC_MCCK_CLOCK, lowcore, mcck_clock); + OFFSET(__LC_CLOCK_COMPARATOR, lowcore, clock_comparator); OFFSET(__LC_BOOT_CLOCK, lowcore, boot_clock); OFFSET(__LC_CURRENT, lowcore, current_task); OFFSET(__LC_KERNEL_STACK, lowcore, kernel_stack); @@ -194,6 +196,9 @@ int main(void) OFFSET(__LC_CREGS_SAVE_AREA, lowcore, cregs_save_area); OFFSET(__LC_PGM_TDB, lowcore, pgm_tdb); BLANK(); + /* extended machine check save area */ + OFFSET(__MCESA_GS_SAVE_AREA, mcesa, guarded_storage_save_area); + BLANK(); /* gmap/sie offsets */ OFFSET(__GMAP_ASCE, gmap, asce); OFFSET(__SIE_PROG0C, kvm_s390_sie_block, prog0c); diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c index d04918583971..11e9d8b5c1b0 100644 --- a/arch/s390/kernel/compat_wrapper.c +++ b/arch/s390/kernel/compat_wrapper.c @@ -181,3 +181,4 @@ COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags); COMPAT_SYSCALL_WRAP6(copy_file_range, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags); COMPAT_SYSCALL_WRAP2(s390_guarded_storage, int, command, struct gs_cb *, gs_cb); COMPAT_SYSCALL_WRAP5(statx, int, dfd, const char __user *, path, unsigned, flags, unsigned, mask, struct statx __user *, buffer); +COMPAT_SYSCALL_WRAP4(s390_sthyi, unsigned long, code, void __user *, info, u64 __user *, rc, unsigned long, flags); diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 05a9cf4ae9c2..58b9e127b615 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -5,7 +5,7 @@ * Copyright IBM Corp. 1999, 2012 * * Author(s): Michael Holzheu (holzheu@de.ibm.com), - * Holger Smolinski (Holger.Smolinski@de.ibm.com) + * Holger Smolinski (Holger.Smolinski@de.ibm.com) * * Bugreports to: <Linux390@de.ibm.com> */ @@ -37,69 +37,67 @@ typedef struct file_private_info { loff_t offset; /* offset of last read in file */ - int act_area; /* number of last formated area */ - int act_page; /* act page in given area */ - int act_entry; /* last formated entry (offset */ - /* relative to beginning of last */ - /* formated page) */ - size_t act_entry_offset; /* up to this offset we copied */ + int act_area; /* number of last formated area */ + int act_page; /* act page in given area */ + int act_entry; /* last formated entry (offset */ + /* relative to beginning of last */ + /* formated page) */ + size_t act_entry_offset; /* up to this offset we copied */ /* in last read the last formated */ /* entry to userland */ char temp_buf[2048]; /* buffer for output */ - debug_info_t *debug_info_org; /* original debug information */ + debug_info_t *debug_info_org; /* original debug information */ debug_info_t *debug_info_snap; /* snapshot of debug information */ struct debug_view *view; /* used view of debug info */ } file_private_info_t; -typedef struct -{ +typedef struct { char *string; - /* - * This assumes that all args are converted into longs - * on L/390 this is the case for all types of parameter - * except of floats, and long long (32 bit) + /* + * This assumes that all args are converted into longs + * on L/390 this is the case for all types of parameter + * except of floats, and long long (32 bit) * */ long args[0]; } debug_sprintf_entry_t; - /* internal function prototyes */ static int debug_init(void); static ssize_t debug_output(struct file *file, char __user *user_buf, - size_t user_len, loff_t * offset); + size_t user_len, loff_t *offset); static ssize_t debug_input(struct file *file, const char __user *user_buf, - size_t user_len, loff_t * offset); + size_t user_len, loff_t *offset); static int debug_open(struct inode *inode, struct file *file); static int debug_close(struct inode *inode, struct file *file); static debug_info_t *debug_info_create(const char *name, int pages_per_area, - int nr_areas, int buf_size, umode_t mode); + int nr_areas, int buf_size, umode_t mode); static void debug_info_get(debug_info_t *); static void debug_info_put(debug_info_t *); -static int debug_prolog_level_fn(debug_info_t * id, - struct debug_view *view, char *out_buf); -static int debug_input_level_fn(debug_info_t * id, struct debug_view *view, - struct file *file, const char __user *user_buf, - size_t user_buf_size, loff_t * offset); -static int debug_prolog_pages_fn(debug_info_t * id, - struct debug_view *view, char *out_buf); -static int debug_input_pages_fn(debug_info_t * id, struct debug_view *view, - struct file *file, const char __user *user_buf, - size_t user_buf_size, loff_t * offset); -static int debug_input_flush_fn(debug_info_t * id, struct debug_view *view, - struct file *file, const char __user *user_buf, - size_t user_buf_size, loff_t * offset); -static int debug_hex_ascii_format_fn(debug_info_t * id, struct debug_view *view, - char *out_buf, const char *in_buf); -static int debug_raw_format_fn(debug_info_t * id, - struct debug_view *view, char *out_buf, - const char *in_buf); -static int debug_raw_header_fn(debug_info_t * id, struct debug_view *view, - int area, debug_entry_t * entry, char *out_buf); - -static int debug_sprintf_format_fn(debug_info_t * id, struct debug_view *view, - char *out_buf, debug_sprintf_entry_t *curr_event); +static int debug_prolog_level_fn(debug_info_t *id, + struct debug_view *view, char *out_buf); +static int debug_input_level_fn(debug_info_t *id, struct debug_view *view, + struct file *file, const char __user *user_buf, + size_t user_buf_size, loff_t *offset); +static int debug_prolog_pages_fn(debug_info_t *id, + struct debug_view *view, char *out_buf); +static int debug_input_pages_fn(debug_info_t *id, struct debug_view *view, + struct file *file, const char __user *user_buf, + size_t user_buf_size, loff_t *offset); +static int debug_input_flush_fn(debug_info_t *id, struct debug_view *view, + struct file *file, const char __user *user_buf, + size_t user_buf_size, loff_t *offset); +static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view, + char *out_buf, const char *in_buf); +static int debug_raw_format_fn(debug_info_t *id, + struct debug_view *view, char *out_buf, + const char *in_buf); +static int debug_raw_header_fn(debug_info_t *id, struct debug_view *view, + int area, debug_entry_t *entry, char *out_buf); + +static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, + char *out_buf, debug_sprintf_entry_t *curr_event); /* globals */ @@ -142,19 +140,19 @@ static struct debug_view debug_pages_view = { }; static struct debug_view debug_flush_view = { - "flush", - NULL, - NULL, - NULL, - &debug_input_flush_fn, - NULL + "flush", + NULL, + NULL, + NULL, + &debug_input_flush_fn, + NULL }; struct debug_view debug_sprintf_view = { "sprintf", NULL, &debug_dflt_header_fn, - (debug_format_proc_t*)&debug_sprintf_format_fn, + (debug_format_proc_t *)&debug_sprintf_format_fn, NULL, NULL }; @@ -165,18 +163,18 @@ static unsigned int __used debug_feature_version = __DEBUG_FEATURE_VERSION; /* static globals */ -static debug_info_t *debug_area_first = NULL; -static debug_info_t *debug_area_last = NULL; +static debug_info_t *debug_area_first; +static debug_info_t *debug_area_last; static DEFINE_MUTEX(debug_mutex); static int initialized; static int debug_critical; static const struct file_operations debug_file_ops = { - .owner = THIS_MODULE, - .read = debug_output, - .write = debug_input, - .open = debug_open, + .owner = THIS_MODULE, + .read = debug_output, + .write = debug_input, + .open = debug_open, .release = debug_close, .llseek = no_llseek, }; @@ -191,29 +189,23 @@ static struct dentry *debug_debugfs_root_entry; * areas[areanumber][pagenumber][pageoffset] */ -static debug_entry_t*** -debug_areas_alloc(int pages_per_area, int nr_areas) +static debug_entry_t ***debug_areas_alloc(int pages_per_area, int nr_areas) { - debug_entry_t*** areas; - int i,j; + debug_entry_t ***areas; + int i, j; - areas = kmalloc(nr_areas * - sizeof(debug_entry_t**), - GFP_KERNEL); + areas = kmalloc(nr_areas * sizeof(debug_entry_t **), GFP_KERNEL); if (!areas) goto fail_malloc_areas; for (i = 0; i < nr_areas; i++) { - areas[i] = kmalloc(pages_per_area * - sizeof(debug_entry_t*),GFP_KERNEL); - if (!areas[i]) { + areas[i] = kmalloc(pages_per_area * sizeof(debug_entry_t *), GFP_KERNEL); + if (!areas[i]) goto fail_malloc_areas2; - } - for(j = 0; j < pages_per_area; j++) { + for (j = 0; j < pages_per_area; j++) { areas[i][j] = kzalloc(PAGE_SIZE, GFP_KERNEL); - if(!areas[i][j]) { - for(j--; j >=0 ; j--) { + if (!areas[i][j]) { + for (j--; j >= 0 ; j--) kfree(areas[i][j]); - } kfree(areas[i]); goto fail_malloc_areas2; } @@ -222,62 +214,55 @@ debug_areas_alloc(int pages_per_area, int nr_areas) return areas; fail_malloc_areas2: - for(i--; i >= 0; i--){ - for(j=0; j < pages_per_area;j++){ + for (i--; i >= 0; i--) { + for (j = 0; j < pages_per_area; j++) kfree(areas[i][j]); - } kfree(areas[i]); } kfree(areas); fail_malloc_areas: return NULL; - } - /* * debug_info_alloc * - alloc new debug-info */ - -static debug_info_t* -debug_info_alloc(const char *name, int pages_per_area, int nr_areas, - int buf_size, int level, int mode) +static debug_info_t *debug_info_alloc(const char *name, int pages_per_area, + int nr_areas, int buf_size, int level, + int mode) { - debug_info_t* rc; + debug_info_t *rc; /* alloc everything */ - rc = kmalloc(sizeof(debug_info_t), GFP_KERNEL); - if(!rc) + if (!rc) goto fail_malloc_rc; rc->active_entries = kcalloc(nr_areas, sizeof(int), GFP_KERNEL); - if(!rc->active_entries) + if (!rc->active_entries) goto fail_malloc_active_entries; rc->active_pages = kcalloc(nr_areas, sizeof(int), GFP_KERNEL); - if(!rc->active_pages) + if (!rc->active_pages) goto fail_malloc_active_pages; - if((mode == ALL_AREAS) && (pages_per_area != 0)){ + if ((mode == ALL_AREAS) && (pages_per_area != 0)) { rc->areas = debug_areas_alloc(pages_per_area, nr_areas); - if(!rc->areas) + if (!rc->areas) goto fail_malloc_areas; } else { rc->areas = NULL; } /* initialize members */ - spin_lock_init(&rc->lock); rc->pages_per_area = pages_per_area; - rc->nr_areas = nr_areas; + rc->nr_areas = nr_areas; rc->active_area = 0; - rc->level = level; - rc->buf_size = buf_size; - rc->entry_size = sizeof(debug_entry_t) + buf_size; + rc->level = level; + rc->buf_size = buf_size; + rc->entry_size = sizeof(debug_entry_t) + buf_size; strlcpy(rc->name, name, sizeof(rc->name)); memset(rc->views, 0, DEBUG_MAX_VIEWS * sizeof(struct debug_view *)); - memset(rc->debugfs_entries, 0 ,DEBUG_MAX_VIEWS * - sizeof(struct dentry*)); + memset(rc->debugfs_entries, 0, DEBUG_MAX_VIEWS * sizeof(struct dentry *)); refcount_set(&(rc->ref_count), 0); return rc; @@ -296,18 +281,15 @@ fail_malloc_rc: * debug_areas_free * - free all debug areas */ - -static void -debug_areas_free(debug_info_t* db_info) +static void debug_areas_free(debug_info_t *db_info) { - int i,j; + int i, j; - if(!db_info->areas) + if (!db_info->areas) return; for (i = 0; i < db_info->nr_areas; i++) { - for(j = 0; j < db_info->pages_per_area; j++) { + for (j = 0; j < db_info->pages_per_area; j++) kfree(db_info->areas[i][j]); - } kfree(db_info->areas[i]); } kfree(db_info->areas); @@ -318,9 +300,8 @@ debug_areas_free(debug_info_t* db_info) * debug_info_free * - free memory debug-info */ - -static void -debug_info_free(debug_info_t* db_info){ +static void debug_info_free(debug_info_t *db_info) +{ debug_areas_free(db_info); kfree(db_info->active_entries); kfree(db_info->active_pages); @@ -332,35 +313,34 @@ debug_info_free(debug_info_t* db_info){ * - create new debug-info */ -static debug_info_t* -debug_info_create(const char *name, int pages_per_area, int nr_areas, - int buf_size, umode_t mode) +static debug_info_t *debug_info_create(const char *name, int pages_per_area, + int nr_areas, int buf_size, umode_t mode) { - debug_info_t* rc; + debug_info_t *rc; - rc = debug_info_alloc(name, pages_per_area, nr_areas, buf_size, - DEBUG_DEFAULT_LEVEL, ALL_AREAS); - if(!rc) + rc = debug_info_alloc(name, pages_per_area, nr_areas, buf_size, + DEBUG_DEFAULT_LEVEL, ALL_AREAS); + if (!rc) goto out; rc->mode = mode & ~S_IFMT; /* create root directory */ - rc->debugfs_root_entry = debugfs_create_dir(rc->name, - debug_debugfs_root_entry); + rc->debugfs_root_entry = debugfs_create_dir(rc->name, + debug_debugfs_root_entry); /* append new element to linked list */ - if (!debug_area_first) { - /* first element in list */ - debug_area_first = rc; - rc->prev = NULL; - } else { - /* append element to end of list */ - debug_area_last->next = rc; - rc->prev = debug_area_last; - } - debug_area_last = rc; - rc->next = NULL; + if (!debug_area_first) { + /* first element in list */ + debug_area_first = rc; + rc->prev = NULL; + } else { + /* append element to end of list */ + debug_area_last->next = rc; + rc->prev = debug_area_last; + } + debug_area_last = rc; + rc->next = NULL; refcount_set(&rc->ref_count, 1); out: @@ -371,24 +351,22 @@ out: * debug_info_copy * - copy debug-info */ - -static debug_info_t* -debug_info_copy(debug_info_t* in, int mode) +static debug_info_t *debug_info_copy(debug_info_t *in, int mode) { - int i,j; - debug_info_t* rc; - unsigned long flags; + unsigned long flags; + debug_info_t *rc; + int i, j; /* get a consistent copy of the debug areas */ do { rc = debug_info_alloc(in->name, in->pages_per_area, in->nr_areas, in->buf_size, in->level, mode); spin_lock_irqsave(&in->lock, flags); - if(!rc) + if (!rc) goto out; /* has something changed in the meantime ? */ - if((rc->pages_per_area == in->pages_per_area) && - (rc->nr_areas == in->nr_areas)) { + if ((rc->pages_per_area == in->pages_per_area) && + (rc->nr_areas == in->nr_areas)) { break; } spin_unlock_irqrestore(&in->lock, flags); @@ -396,25 +374,22 @@ debug_info_copy(debug_info_t* in, int mode) } while (1); if (mode == NO_AREAS) - goto out; + goto out; - for(i = 0; i < in->nr_areas; i++){ - for(j = 0; j < in->pages_per_area; j++) { - memcpy(rc->areas[i][j], in->areas[i][j],PAGE_SIZE); - } - } + for (i = 0; i < in->nr_areas; i++) { + for (j = 0; j < in->pages_per_area; j++) + memcpy(rc->areas[i][j], in->areas[i][j], PAGE_SIZE); + } out: - spin_unlock_irqrestore(&in->lock, flags); - return rc; + spin_unlock_irqrestore(&in->lock, flags); + return rc; } /* * debug_info_get * - increments reference count for debug-info */ - -static void -debug_info_get(debug_info_t * db_info) +static void debug_info_get(debug_info_t *db_info) { if (db_info) refcount_inc(&db_info->ref_count); @@ -424,9 +399,7 @@ debug_info_get(debug_info_t * db_info) * debug_info_put: * - decreases reference count for debug-info and frees it if necessary */ - -static void -debug_info_put(debug_info_t *db_info) +static void debug_info_put(debug_info_t *db_info) { int i; @@ -439,12 +412,14 @@ debug_info_put(debug_info_t *db_info) debugfs_remove(db_info->debugfs_entries[i]); } debugfs_remove(db_info->debugfs_root_entry); - if(db_info == debug_area_first) + if (db_info == debug_area_first) debug_area_first = db_info->next; - if(db_info == debug_area_last) + if (db_info == debug_area_last) debug_area_last = db_info->prev; - if(db_info->prev) db_info->prev->next = db_info->next; - if(db_info->next) db_info->next->prev = db_info->prev; + if (db_info->prev) + db_info->prev->next = db_info->next; + if (db_info->next) + db_info->next->prev = db_info->prev; debug_info_free(db_info); } } @@ -453,71 +428,68 @@ debug_info_put(debug_info_t *db_info) * debug_format_entry: * - format one debug entry and return size of formated data */ - -static int -debug_format_entry(file_private_info_t *p_info) +static int debug_format_entry(file_private_info_t *p_info) { - debug_info_t *id_snap = p_info->debug_info_snap; + debug_info_t *id_snap = p_info->debug_info_snap; struct debug_view *view = p_info->view; debug_entry_t *act_entry; size_t len = 0; - if(p_info->act_entry == DEBUG_PROLOG_ENTRY){ + + if (p_info->act_entry == DEBUG_PROLOG_ENTRY) { /* print prolog */ - if (view->prolog_proc) - len += view->prolog_proc(id_snap,view,p_info->temp_buf); + if (view->prolog_proc) + len += view->prolog_proc(id_snap, view, p_info->temp_buf); goto out; } if (!id_snap->areas) /* this is true, if we have a prolog only view */ goto out; /* or if 'pages_per_area' is 0 */ - act_entry = (debug_entry_t *) ((char*)id_snap->areas[p_info->act_area] - [p_info->act_page] + p_info->act_entry); - + act_entry = (debug_entry_t *) ((char *)id_snap->areas[p_info->act_area] + [p_info->act_page] + p_info->act_entry); + if (act_entry->id.stck == 0LL) - goto out; /* empty entry */ + goto out; /* empty entry */ if (view->header_proc) len += view->header_proc(id_snap, view, p_info->act_area, - act_entry, p_info->temp_buf + len); + act_entry, p_info->temp_buf + len); if (view->format_proc) len += view->format_proc(id_snap, view, p_info->temp_buf + len, - DEBUG_DATA(act_entry)); + DEBUG_DATA(act_entry)); out: - return len; + return len; } /* * debug_next_entry: * - goto next entry in p_info */ - -static inline int -debug_next_entry(file_private_info_t *p_info) +static inline int debug_next_entry(file_private_info_t *p_info) { debug_info_t *id; id = p_info->debug_info_snap; - if(p_info->act_entry == DEBUG_PROLOG_ENTRY){ + if (p_info->act_entry == DEBUG_PROLOG_ENTRY) { p_info->act_entry = 0; p_info->act_page = 0; goto out; } - if(!id->areas) + if (!id->areas) return 1; p_info->act_entry += id->entry_size; /* switch to next page, if we reached the end of the page */ - if (p_info->act_entry > (PAGE_SIZE - id->entry_size)){ + if (p_info->act_entry > (PAGE_SIZE - id->entry_size)) { /* next page */ p_info->act_entry = 0; p_info->act_page += 1; - if((p_info->act_page % id->pages_per_area) == 0) { + if ((p_info->act_page % id->pages_per_area) == 0) { /* next area */ - p_info->act_area++; - p_info->act_page=0; + p_info->act_area++; + p_info->act_page = 0; } - if(p_info->act_area >= id->nr_areas) + if (p_info->act_area >= id->nr_areas) return 1; } out: - return 0; + return 0; } /* @@ -525,26 +497,24 @@ out: * - called for user read() * - copies formated debug entries to the user buffer */ - -static ssize_t -debug_output(struct file *file, /* file descriptor */ - char __user *user_buf, /* user buffer */ - size_t len, /* length of buffer */ - loff_t *offset) /* offset in the file */ +static ssize_t debug_output(struct file *file, /* file descriptor */ + char __user *user_buf, /* user buffer */ + size_t len, /* length of buffer */ + loff_t *offset) /* offset in the file */ { size_t count = 0; size_t entry_offset; file_private_info_t *p_info; - p_info = ((file_private_info_t *) file->private_data); - if (*offset != p_info->offset) + p_info = (file_private_info_t *) file->private_data; + if (*offset != p_info->offset) return -EPIPE; - if(p_info->act_area >= p_info->debug_info_snap->nr_areas) + if (p_info->act_area >= p_info->debug_info_snap->nr_areas) return 0; entry_offset = p_info->act_entry_offset; - while(count < len){ - int formatted_line_size; + while (count < len) { int formatted_line_residue; + int formatted_line_size; int user_buf_residue; size_t copy_size; @@ -552,21 +522,21 @@ debug_output(struct file *file, /* file descriptor */ formatted_line_residue = formatted_line_size - entry_offset; user_buf_residue = len-count; copy_size = min(user_buf_residue, formatted_line_residue); - if(copy_size){ + if (copy_size) { if (copy_to_user(user_buf + count, p_info->temp_buf - + entry_offset, copy_size)) + + entry_offset, copy_size)) return -EFAULT; count += copy_size; entry_offset += copy_size; } - if(copy_size == formatted_line_residue){ + if (copy_size == formatted_line_residue) { entry_offset = 0; - if(debug_next_entry(p_info)) + if (debug_next_entry(p_info)) goto out; } } out: - p_info->offset = *offset + count; + p_info->offset = *offset + count; p_info->act_entry_offset = entry_offset; *offset = p_info->offset; return count; @@ -577,24 +547,23 @@ out: * - called for user write() * - calls input function of view */ - -static ssize_t -debug_input(struct file *file, const char __user *user_buf, size_t length, - loff_t *offset) +static ssize_t debug_input(struct file *file, const char __user *user_buf, + size_t length, loff_t *offset) { - int rc = 0; file_private_info_t *p_info; + int rc = 0; mutex_lock(&debug_mutex); p_info = ((file_private_info_t *) file->private_data); - if (p_info->view->input_proc) + if (p_info->view->input_proc) { rc = p_info->view->input_proc(p_info->debug_info_org, p_info->view, file, user_buf, length, offset); - else + } else { rc = -EPERM; + } mutex_unlock(&debug_mutex); - return rc; /* number of input characters */ + return rc; /* number of input characters */ } /* @@ -603,13 +572,11 @@ debug_input(struct file *file, const char __user *user_buf, size_t length, * - copies formated output to private_data area of the file * handle */ - -static int -debug_open(struct inode *inode, struct file *file) +static int debug_open(struct inode *inode, struct file *file) { - int i, rc = 0; - file_private_info_t *p_info; debug_info_t *debug_info, *debug_info_snapshot; + file_private_info_t *p_info; + int i, rc = 0; mutex_lock(&debug_mutex); debug_info = file_inode(file)->i_private; @@ -617,10 +584,8 @@ debug_open(struct inode *inode, struct file *file) for (i = 0; i < DEBUG_MAX_VIEWS; i++) { if (!debug_info->views[i]) continue; - else if (debug_info->debugfs_entries[i] == - file->f_path.dentry) { - goto found; /* found view ! */ - } + else if (debug_info->debugfs_entries[i] == file->f_path.dentry) + goto found; /* found view ! */ } /* no entry found */ rc = -EINVAL; @@ -628,31 +593,28 @@ debug_open(struct inode *inode, struct file *file) found: - /* Make snapshot of current debug areas to get it consistent. */ + /* Make snapshot of current debug areas to get it consistent. */ /* To copy all the areas is only needed, if we have a view which */ /* formats the debug areas. */ - if(!debug_info->views[i]->format_proc && - !debug_info->views[i]->header_proc){ + if (!debug_info->views[i]->format_proc && !debug_info->views[i]->header_proc) debug_info_snapshot = debug_info_copy(debug_info, NO_AREAS); - } else { + else debug_info_snapshot = debug_info_copy(debug_info, ALL_AREAS); - } - if(!debug_info_snapshot){ + if (!debug_info_snapshot) { rc = -ENOMEM; goto out; } - p_info = kmalloc(sizeof(file_private_info_t), - GFP_KERNEL); - if(!p_info){ + p_info = kmalloc(sizeof(file_private_info_t), GFP_KERNEL); + if (!p_info) { debug_info_free(debug_info_snapshot); rc = -ENOMEM; goto out; } p_info->offset = 0; p_info->debug_info_snap = debug_info_snapshot; - p_info->debug_info_org = debug_info; + p_info->debug_info_org = debug_info; p_info->view = debug_info->views[i]; p_info->act_area = 0; p_info->act_page = 0; @@ -671,17 +633,16 @@ out: * - called for user close() * - deletes private_data area of the file handle */ - -static int -debug_close(struct inode *inode, struct file *file) +static int debug_close(struct inode *inode, struct file *file) { file_private_info_t *p_info; + p_info = (file_private_info_t *) file->private_data; - if(p_info->debug_info_snap) + if (p_info->debug_info_snap) debug_info_free(p_info->debug_info_snap); debug_info_put(p_info->debug_info_org); kfree(file->private_data); - return 0; /* success */ + return 0; /* success */ } /* @@ -690,7 +651,6 @@ debug_close(struct inode *inode, struct file *file) * The mode parameter allows to specify access rights for the s390dbf files * - Returns handle for debug area */ - debug_info_t *debug_register_mode(const char *name, int pages_per_area, int nr_areas, int buf_size, umode_t mode, uid_t uid, gid_t gid) @@ -704,18 +664,16 @@ debug_info_t *debug_register_mode(const char *name, int pages_per_area, BUG_ON(!initialized); mutex_lock(&debug_mutex); - /* create new debug_info */ - + /* create new debug_info */ rc = debug_info_create(name, pages_per_area, nr_areas, buf_size, mode); - if(!rc) + if (!rc) goto out; debug_register_view(rc, &debug_level_view); - debug_register_view(rc, &debug_flush_view); + debug_register_view(rc, &debug_flush_view); debug_register_view(rc, &debug_pages_view); out: - if (!rc){ + if (!rc) pr_err("Registering debug feature %s failed\n", name); - } mutex_unlock(&debug_mutex); return rc; } @@ -726,7 +684,6 @@ EXPORT_SYMBOL(debug_register_mode); * - creates and initializes debug area for the caller * - returns handle for debug area */ - debug_info_t *debug_register(const char *name, int pages_per_area, int nr_areas, int buf_size) { @@ -739,18 +696,13 @@ EXPORT_SYMBOL(debug_register); * debug_unregister: * - give back debug area */ - -void -debug_unregister(debug_info_t * id) +void debug_unregister(debug_info_t *id) { if (!id) - goto out; + return; mutex_lock(&debug_mutex); debug_info_put(id); mutex_unlock(&debug_mutex); - -out: - return; } EXPORT_SYMBOL(debug_unregister); @@ -758,18 +710,17 @@ EXPORT_SYMBOL(debug_unregister); * debug_set_size: * - set area size (number of pages) and number of areas */ -static int -debug_set_size(debug_info_t* id, int nr_areas, int pages_per_area) +static int debug_set_size(debug_info_t *id, int nr_areas, int pages_per_area) { + debug_entry_t ***new_areas; unsigned long flags; - debug_entry_t *** new_areas; - int rc=0; + int rc = 0; - if(!id || (nr_areas <= 0) || (pages_per_area < 0)) + if (!id || (nr_areas <= 0) || (pages_per_area < 0)) return -EINVAL; - if(pages_per_area > 0){ + if (pages_per_area > 0) { new_areas = debug_areas_alloc(pages_per_area, nr_areas); - if(!new_areas) { + if (!new_areas) { pr_info("Allocating memory for %i pages failed\n", pages_per_area); rc = -ENOMEM; @@ -778,16 +729,16 @@ debug_set_size(debug_info_t* id, int nr_areas, int pages_per_area) } else { new_areas = NULL; } - spin_lock_irqsave(&id->lock,flags); + spin_lock_irqsave(&id->lock, flags); debug_areas_free(id); id->areas = new_areas; id->nr_areas = nr_areas; id->pages_per_area = pages_per_area; id->active_area = 0; - memset(id->active_entries,0,sizeof(int)*id->nr_areas); + memset(id->active_entries, 0, sizeof(int)*id->nr_areas); memset(id->active_pages, 0, sizeof(int)*id->nr_areas); - spin_unlock_irqrestore(&id->lock,flags); - pr_info("%s: set new size (%i pages)\n" ,id->name, pages_per_area); + spin_unlock_irqrestore(&id->lock, flags); + pr_info("%s: set new size (%i pages)\n", id->name, pages_per_area); out: return rc; } @@ -796,24 +747,23 @@ out: * debug_set_level: * - set actual debug level */ - -void -debug_set_level(debug_info_t* id, int new_level) +void debug_set_level(debug_info_t *id, int new_level) { unsigned long flags; - if(!id) - return; - spin_lock_irqsave(&id->lock,flags); - if(new_level == DEBUG_OFF_LEVEL){ - id->level = DEBUG_OFF_LEVEL; - pr_info("%s: switched off\n",id->name); - } else if ((new_level > DEBUG_MAX_LEVEL) || (new_level < 0)) { + + if (!id) + return; + spin_lock_irqsave(&id->lock, flags); + if (new_level == DEBUG_OFF_LEVEL) { + id->level = DEBUG_OFF_LEVEL; + pr_info("%s: switched off\n", id->name); + } else if ((new_level > DEBUG_MAX_LEVEL) || (new_level < 0)) { pr_info("%s: level %i is out of range (%i - %i)\n", - id->name, new_level, 0, DEBUG_MAX_LEVEL); - } else { - id->level = new_level; - } - spin_unlock_irqrestore(&id->lock,flags); + id->name, new_level, 0, DEBUG_MAX_LEVEL); + } else { + id->level = new_level; + } + spin_unlock_irqrestore(&id->lock, flags); } EXPORT_SYMBOL(debug_set_level); @@ -821,12 +771,10 @@ EXPORT_SYMBOL(debug_set_level); * proceed_active_entry: * - set active entry to next in the ring buffer */ - -static inline void -proceed_active_entry(debug_info_t * id) +static inline void proceed_active_entry(debug_info_t *id) { if ((id->active_entries[id->active_area] += id->entry_size) - > (PAGE_SIZE - id->entry_size)){ + > (PAGE_SIZE - id->entry_size)) { id->active_entries[id->active_area] = 0; id->active_pages[id->active_area] = (id->active_pages[id->active_area] + 1) % @@ -838,9 +786,7 @@ proceed_active_entry(debug_info_t * id) * proceed_active_area: * - set active area to next in the ring buffer */ - -static inline void -proceed_active_area(debug_info_t * id) +static inline void proceed_active_area(debug_info_t *id) { id->active_area++; id->active_area = id->active_area % id->nr_areas; @@ -849,13 +795,11 @@ proceed_active_area(debug_info_t * id) /* * get_active_entry: */ - -static inline debug_entry_t* -get_active_entry(debug_info_t * id) +static inline debug_entry_t *get_active_entry(debug_info_t *id) { return (debug_entry_t *) (((char *) id->areas[id->active_area] - [id->active_pages[id->active_area]]) + - id->active_entries[id->active_area]); + [id->active_pages[id->active_area]]) + + id->active_entries[id->active_area]); } /* @@ -863,23 +807,22 @@ get_active_entry(debug_info_t * id) * - set timestamp, caller address, cpu number etc. */ -static inline void -debug_finish_entry(debug_info_t * id, debug_entry_t* active, int level, - int exception) +static inline void debug_finish_entry(debug_info_t *id, debug_entry_t *active, + int level, int exception) { active->id.stck = get_tod_clock_fast() - *(unsigned long long *) &tod_clock_base[1]; active->id.fields.cpuid = smp_processor_id(); active->caller = __builtin_return_address(0); active->id.fields.exception = exception; - active->id.fields.level = level; + active->id.fields.level = level; proceed_active_entry(id); - if(exception) + if (exception) proceed_active_area(id); } -static int debug_stoppable=1; -static int debug_active=1; +static int debug_stoppable = 1; +static int debug_active = 1; #define CTL_S390DBF_STOPPABLE 5678 #define CTL_S390DBF_ACTIVE 5679 @@ -889,9 +832,8 @@ static int debug_active=1; * always allow read, allow write only if debug_stoppable is set or * if debug_active is already off */ -static int -s390dbf_procactive(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) +static int s390dbf_procactive(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) { if (!write || debug_stoppable || !debug_active) return proc_dointvec(table, write, buffer, lenp, ppos); @@ -899,39 +841,37 @@ s390dbf_procactive(struct ctl_table *table, int write, return 0; } - static struct ctl_table s390dbf_table[] = { { - .procname = "debug_stoppable", + .procname = "debug_stoppable", .data = &debug_stoppable, .maxlen = sizeof(int), - .mode = S_IRUGO | S_IWUSR, - .proc_handler = proc_dointvec, + .mode = S_IRUGO | S_IWUSR, + .proc_handler = proc_dointvec, }, - { - .procname = "debug_active", + { + .procname = "debug_active", .data = &debug_active, .maxlen = sizeof(int), - .mode = S_IRUGO | S_IWUSR, - .proc_handler = s390dbf_procactive, + .mode = S_IRUGO | S_IWUSR, + .proc_handler = s390dbf_procactive, }, { } }; static struct ctl_table s390dbf_dir_table[] = { { - .procname = "s390dbf", - .maxlen = 0, - .mode = S_IRUGO | S_IXUGO, - .child = s390dbf_table, + .procname = "s390dbf", + .maxlen = 0, + .mode = S_IRUGO | S_IXUGO, + .child = s390dbf_table, }, { } }; static struct ctl_table_header *s390dbf_sysctl_header; -void -debug_stop_all(void) +void debug_stop_all(void) { if (debug_stoppable) debug_active = 0; @@ -947,26 +887,31 @@ void debug_set_critical(void) * debug_event_common: * - write debug entry with given size */ - -debug_entry_t* -debug_event_common(debug_info_t * id, int level, const void *buf, int len) +debug_entry_t *debug_event_common(debug_info_t *id, int level, const void *buf, + int len) { - unsigned long flags; debug_entry_t *active; + unsigned long flags; if (!debug_active || !id->areas) return NULL; if (debug_critical) { if (!spin_trylock_irqsave(&id->lock, flags)) return NULL; - } else + } else { spin_lock_irqsave(&id->lock, flags); - active = get_active_entry(id); - memset(DEBUG_DATA(active), 0, id->buf_size); - memcpy(DEBUG_DATA(active), buf, min(len, id->buf_size)); - debug_finish_entry(id, active, level, 0); - spin_unlock_irqrestore(&id->lock, flags); + } + do { + active = get_active_entry(id); + memcpy(DEBUG_DATA(active), buf, min(len, id->buf_size)); + if (len < id->buf_size) + memset((DEBUG_DATA(active)) + len, 0, id->buf_size - len); + debug_finish_entry(id, active, level, 0); + len -= id->buf_size; + buf += id->buf_size; + } while (len > 0); + spin_unlock_irqrestore(&id->lock, flags); return active; } EXPORT_SYMBOL(debug_event_common); @@ -975,26 +920,31 @@ EXPORT_SYMBOL(debug_event_common); * debug_exception_common: * - write debug entry with given size and switch to next debug area */ - -debug_entry_t -*debug_exception_common(debug_info_t * id, int level, const void *buf, int len) +debug_entry_t *debug_exception_common(debug_info_t *id, int level, + const void *buf, int len) { - unsigned long flags; debug_entry_t *active; + unsigned long flags; if (!debug_active || !id->areas) return NULL; if (debug_critical) { if (!spin_trylock_irqsave(&id->lock, flags)) return NULL; - } else + } else { spin_lock_irqsave(&id->lock, flags); - active = get_active_entry(id); - memset(DEBUG_DATA(active), 0, id->buf_size); - memcpy(DEBUG_DATA(active), buf, min(len, id->buf_size)); - debug_finish_entry(id, active, level, 1); - spin_unlock_irqrestore(&id->lock, flags); + } + do { + active = get_active_entry(id); + memcpy(DEBUG_DATA(active), buf, min(len, id->buf_size)); + if (len < id->buf_size) + memset((DEBUG_DATA(active)) + len, 0, id->buf_size - len); + debug_finish_entry(id, active, level, len <= id->buf_size); + len -= id->buf_size; + buf += id->buf_size; + } while (len > 0); + spin_unlock_irqrestore(&id->lock, flags); return active; } EXPORT_SYMBOL(debug_exception_common); @@ -1002,47 +952,44 @@ EXPORT_SYMBOL(debug_exception_common); /* * counts arguments in format string for sprintf view */ - -static inline int -debug_count_numargs(char *string) +static inline int debug_count_numargs(char *string) { - int numargs=0; + int numargs = 0; - while(*string) { - if(*string++=='%') + while (*string) { + if (*string++ == '%') numargs++; } - return(numargs); + return numargs; } /* * debug_sprintf_event: */ - -debug_entry_t* -__debug_sprintf_event(debug_info_t *id, int level, char *string, ...) +debug_entry_t *__debug_sprintf_event(debug_info_t *id, int level, char *string, ...) { - va_list ap; - int numargs,idx; - unsigned long flags; debug_sprintf_entry_t *curr_event; debug_entry_t *active; + unsigned long flags; + int numargs, idx; + va_list ap; if (!debug_active || !id->areas) return NULL; - numargs=debug_count_numargs(string); + numargs = debug_count_numargs(string); if (debug_critical) { if (!spin_trylock_irqsave(&id->lock, flags)) return NULL; - } else + } else { spin_lock_irqsave(&id->lock, flags); + } active = get_active_entry(id); - curr_event=(debug_sprintf_entry_t *) DEBUG_DATA(active); - va_start(ap,string); - curr_event->string=string; - for(idx=0;idx<min(numargs,(int)(id->buf_size / sizeof(long))-1);idx++) - curr_event->args[idx]=va_arg(ap,long); + curr_event = (debug_sprintf_entry_t *) DEBUG_DATA(active); + va_start(ap, string); + curr_event->string = string; + for (idx = 0; idx < min(numargs, (int)(id->buf_size / sizeof(long)) - 1); idx++) + curr_event->args[idx] = va_arg(ap, long); va_end(ap); debug_finish_entry(id, active, level, 0); spin_unlock_irqrestore(&id->lock, flags); @@ -1054,32 +1001,31 @@ EXPORT_SYMBOL(__debug_sprintf_event); /* * debug_sprintf_exception: */ - -debug_entry_t* -__debug_sprintf_exception(debug_info_t *id, int level, char *string, ...) +debug_entry_t *__debug_sprintf_exception(debug_info_t *id, int level, char *string, ...) { - va_list ap; - int numargs,idx; - unsigned long flags; debug_sprintf_entry_t *curr_event; debug_entry_t *active; + unsigned long flags; + int numargs, idx; + va_list ap; if (!debug_active || !id->areas) return NULL; - numargs=debug_count_numargs(string); + numargs = debug_count_numargs(string); if (debug_critical) { if (!spin_trylock_irqsave(&id->lock, flags)) return NULL; - } else + } else { spin_lock_irqsave(&id->lock, flags); + } active = get_active_entry(id); - curr_event=(debug_sprintf_entry_t *)DEBUG_DATA(active); - va_start(ap,string); - curr_event->string=string; - for(idx=0;idx<min(numargs,(int)(id->buf_size / sizeof(long))-1);idx++) - curr_event->args[idx]=va_arg(ap,long); + curr_event = (debug_sprintf_entry_t *)DEBUG_DATA(active); + va_start(ap, string); + curr_event->string = string; + for (idx = 0; idx < min(numargs, (int)(id->buf_size / sizeof(long)) - 1); idx++) + curr_event->args[idx] = va_arg(ap, long); va_end(ap); debug_finish_entry(id, active, level, 1); spin_unlock_irqrestore(&id->lock, flags); @@ -1091,15 +1037,13 @@ EXPORT_SYMBOL(__debug_sprintf_exception); /* * debug_register_view: */ - -int -debug_register_view(debug_info_t * id, struct debug_view *view) +int debug_register_view(debug_info_t *id, struct debug_view *view) { - int rc = 0; - int i; unsigned long flags; - umode_t mode; struct dentry *pde; + umode_t mode; + int rc = 0; + int i; if (!id) goto out; @@ -1109,10 +1053,10 @@ debug_register_view(debug_info_t * id, struct debug_view *view) if (!view->input_proc) mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH); pde = debugfs_create_file(view->name, mode, id->debugfs_root_entry, - id , &debug_file_ops); - if (!pde){ + id, &debug_file_ops); + if (!pde) { pr_err("Registering view %s/%s failed due to out of " - "memory\n", id->name,view->name); + "memory\n", id->name, view->name); rc = -1; goto out; } @@ -1140,9 +1084,7 @@ EXPORT_SYMBOL(debug_register_view); /* * debug_unregister_view: */ - -int -debug_unregister_view(debug_info_t * id, struct debug_view *view) +int debug_unregister_view(debug_info_t *id, struct debug_view *view) { struct dentry *dentry = NULL; unsigned long flags; @@ -1155,9 +1097,9 @@ debug_unregister_view(debug_info_t * id, struct debug_view *view) if (id->views[i] == view) break; } - if (i == DEBUG_MAX_VIEWS) + if (i == DEBUG_MAX_VIEWS) { rc = -1; - else { + } else { dentry = id->debugfs_entries[i]; id->views[i] = NULL; id->debugfs_entries[i] = NULL; @@ -1169,10 +1111,10 @@ out: } EXPORT_SYMBOL(debug_unregister_view); -static inline char * -debug_get_user_string(const char __user *user_buf, size_t user_len) +static inline char *debug_get_user_string(const char __user *user_buf, + size_t user_len) { - char* buffer; + char *buffer; buffer = kmalloc(user_len + 1, GFP_KERNEL); if (!buffer) @@ -1186,19 +1128,17 @@ debug_get_user_string(const char __user *user_buf, size_t user_len) buffer[user_len - 1] = 0; else buffer[user_len] = 0; - return buffer; + return buffer; } -static inline int -debug_get_uint(char *buf) +static inline int debug_get_uint(char *buf) { int rc; buf = skip_spaces(buf); rc = simple_strtoul(buf, &buf, 10); - if(*buf){ + if (*buf) rc = -EINVAL; - } return rc; } @@ -1211,9 +1151,8 @@ debug_get_uint(char *buf) * prints out actual debug level */ -static int -debug_prolog_pages_fn(debug_info_t * id, - struct debug_view *view, char *out_buf) +static int debug_prolog_pages_fn(debug_info_t *id, struct debug_view *view, + char *out_buf) { return sprintf(out_buf, "%i\n", id->pages_per_area); } @@ -1222,32 +1161,31 @@ debug_prolog_pages_fn(debug_info_t * id, * reads new size (number of pages per debug area) */ -static int -debug_input_pages_fn(debug_info_t * id, struct debug_view *view, - struct file *file, const char __user *user_buf, - size_t user_len, loff_t * offset) +static int debug_input_pages_fn(debug_info_t *id, struct debug_view *view, + struct file *file, const char __user *user_buf, + size_t user_len, loff_t *offset) { + int rc, new_pages; char *str; - int rc,new_pages; if (user_len > 0x10000) - user_len = 0x10000; - if (*offset != 0){ + user_len = 0x10000; + if (*offset != 0) { rc = -EPIPE; goto out; } - str = debug_get_user_string(user_buf,user_len); - if(IS_ERR(str)){ + str = debug_get_user_string(user_buf, user_len); + if (IS_ERR(str)) { rc = PTR_ERR(str); goto out; } new_pages = debug_get_uint(str); - if(new_pages < 0){ + if (new_pages < 0) { rc = -EINVAL; goto free_str; } - rc = debug_set_size(id,id->nr_areas, new_pages); - if(rc != 0){ + rc = debug_set_size(id, id->nr_areas, new_pages); + if (rc != 0) { rc = -EINVAL; goto free_str; } @@ -1262,52 +1200,47 @@ out: /* * prints out actual debug level */ - -static int -debug_prolog_level_fn(debug_info_t * id, struct debug_view *view, char *out_buf) +static int debug_prolog_level_fn(debug_info_t *id, struct debug_view *view, + char *out_buf) { int rc = 0; - if(id->level == DEBUG_OFF_LEVEL) { - rc = sprintf(out_buf,"-\n"); - } - else { + if (id->level == DEBUG_OFF_LEVEL) + rc = sprintf(out_buf, "-\n"); + else rc = sprintf(out_buf, "%i\n", id->level); - } return rc; } /* * reads new debug level */ - -static int -debug_input_level_fn(debug_info_t * id, struct debug_view *view, - struct file *file, const char __user *user_buf, - size_t user_len, loff_t * offset) +static int debug_input_level_fn(debug_info_t *id, struct debug_view *view, + struct file *file, const char __user *user_buf, + size_t user_len, loff_t *offset) { + int rc, new_level; char *str; - int rc,new_level; if (user_len > 0x10000) - user_len = 0x10000; - if (*offset != 0){ + user_len = 0x10000; + if (*offset != 0) { rc = -EPIPE; goto out; } - str = debug_get_user_string(user_buf,user_len); - if(IS_ERR(str)){ + str = debug_get_user_string(user_buf, user_len); + if (IS_ERR(str)) { rc = PTR_ERR(str); goto out; } - if(str[0] == '-'){ + if (str[0] == '-') { debug_set_level(id, DEBUG_OFF_LEVEL); rc = user_len; goto free_str; } else { new_level = debug_get_uint(str); } - if(new_level < 0) { + if (new_level < 0) { pr_warn("%s is not a valid level for a debug feature\n", str); rc = -EINVAL; } else { @@ -1321,99 +1254,90 @@ out: return rc; /* number of input characters */ } - /* * flushes debug areas */ - -static void debug_flush(debug_info_t* id, int area) +static void debug_flush(debug_info_t *id, int area) { - unsigned long flags; - int i,j; - - if(!id || !id->areas) - return; - spin_lock_irqsave(&id->lock,flags); - if(area == DEBUG_FLUSH_ALL){ - id->active_area = 0; - memset(id->active_entries, 0, id->nr_areas * sizeof(int)); - for (i = 0; i < id->nr_areas; i++) { + unsigned long flags; + int i, j; + + if (!id || !id->areas) + return; + spin_lock_irqsave(&id->lock, flags); + if (area == DEBUG_FLUSH_ALL) { + id->active_area = 0; + memset(id->active_entries, 0, id->nr_areas * sizeof(int)); + for (i = 0; i < id->nr_areas; i++) { id->active_pages[i] = 0; - for(j = 0; j < id->pages_per_area; j++) { - memset(id->areas[i][j], 0, PAGE_SIZE); - } + for (j = 0; j < id->pages_per_area; j++) + memset(id->areas[i][j], 0, PAGE_SIZE); } - } else if(area >= 0 && area < id->nr_areas) { - id->active_entries[area] = 0; + } else if (area >= 0 && area < id->nr_areas) { + id->active_entries[area] = 0; id->active_pages[area] = 0; - for(i = 0; i < id->pages_per_area; i++) { - memset(id->areas[area][i],0,PAGE_SIZE); - } - } - spin_unlock_irqrestore(&id->lock,flags); + for (i = 0; i < id->pages_per_area; i++) + memset(id->areas[area][i], 0, PAGE_SIZE); + } + spin_unlock_irqrestore(&id->lock, flags); } /* - * view function: flushes debug areas + * view function: flushes debug areas */ - -static int -debug_input_flush_fn(debug_info_t * id, struct debug_view *view, - struct file *file, const char __user *user_buf, - size_t user_len, loff_t * offset) +static int debug_input_flush_fn(debug_info_t *id, struct debug_view *view, + struct file *file, const char __user *user_buf, + size_t user_len, loff_t *offset) { - char input_buf[1]; - int rc = user_len; + char input_buf[1]; + int rc = user_len; if (user_len > 0x10000) - user_len = 0x10000; - if (*offset != 0){ + user_len = 0x10000; + if (*offset != 0) { rc = -EPIPE; - goto out; + goto out; + } + if (copy_from_user(input_buf, user_buf, 1)) { + rc = -EFAULT; + goto out; + } + if (input_buf[0] == '-') { + debug_flush(id, DEBUG_FLUSH_ALL); + goto out; + } + if (isdigit(input_buf[0])) { + int area = ((int) input_buf[0] - (int) '0'); + + debug_flush(id, area); + goto out; } - if (copy_from_user(input_buf, user_buf, 1)){ - rc = -EFAULT; - goto out; - } - if(input_buf[0] == '-') { - debug_flush(id, DEBUG_FLUSH_ALL); - goto out; - } - if (isdigit(input_buf[0])) { - int area = ((int) input_buf[0] - (int) '0'); - debug_flush(id, area); - goto out; - } pr_info("Flushing debug data failed because %c is not a valid " "area\n", input_buf[0]); out: - *offset += user_len; - return rc; /* number of input characters */ + *offset += user_len; + return rc; /* number of input characters */ } /* * prints debug header in raw format */ - -static int -debug_raw_header_fn(debug_info_t * id, struct debug_view *view, - int area, debug_entry_t * entry, char *out_buf) +static int debug_raw_header_fn(debug_info_t *id, struct debug_view *view, + int area, debug_entry_t *entry, char *out_buf) { - int rc; + int rc; rc = sizeof(debug_entry_t); - memcpy(out_buf,entry,sizeof(debug_entry_t)); - return rc; + memcpy(out_buf, entry, sizeof(debug_entry_t)); + return rc; } /* * prints debug data in raw format */ - -static int -debug_raw_format_fn(debug_info_t * id, struct debug_view *view, +static int debug_raw_format_fn(debug_info_t *id, struct debug_view *view, char *out_buf, const char *in_buf) { int rc; @@ -1426,20 +1350,17 @@ debug_raw_format_fn(debug_info_t * id, struct debug_view *view, /* * prints debug data in hex/ascii format */ - -static int -debug_hex_ascii_format_fn(debug_info_t * id, struct debug_view *view, - char *out_buf, const char *in_buf) +static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view, + char *out_buf, const char *in_buf) { int i, rc = 0; - for (i = 0; i < id->buf_size; i++) { - rc += sprintf(out_buf + rc, "%02x ", - ((unsigned char *) in_buf)[i]); - } + for (i = 0; i < id->buf_size; i++) + rc += sprintf(out_buf + rc, "%02x ", ((unsigned char *) in_buf)[i]); rc += sprintf(out_buf + rc, "| "); for (i = 0; i < id->buf_size; i++) { unsigned char c = in_buf[i]; + if (isascii(c) && isprint(c)) rc += sprintf(out_buf + rc, "%c", c); else @@ -1452,16 +1373,14 @@ debug_hex_ascii_format_fn(debug_info_t * id, struct debug_view *view, /* * prints header for debug entry */ - -int -debug_dflt_header_fn(debug_info_t * id, struct debug_view *view, - int area, debug_entry_t * entry, char *out_buf) +int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view, + int area, debug_entry_t *entry, char *out_buf) { unsigned long base, sec, usec; - char *except_str; unsigned long caller; - int rc = 0; unsigned int level; + char *except_str; + int rc = 0; level = entry->id.fields.level; base = (*(unsigned long *) &tod_clock_base[0]) >> 4; @@ -1487,19 +1406,18 @@ EXPORT_SYMBOL(debug_dflt_header_fn); #define DEBUG_SPRINTF_MAX_ARGS 10 -static int -debug_sprintf_format_fn(debug_info_t * id, struct debug_view *view, - char *out_buf, debug_sprintf_entry_t *curr_event) +static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, + char *out_buf, debug_sprintf_entry_t *curr_event) { - int num_longs, num_used_args = 0,i, rc = 0; + int num_longs, num_used_args = 0, i, rc = 0; int index[DEBUG_SPRINTF_MAX_ARGS]; /* count of longs fit into one entry */ - num_longs = id->buf_size / sizeof(long); + num_longs = id->buf_size / sizeof(long); - if(num_longs < 1) + if (num_longs < 1) goto out; /* bufsize of entry too small */ - if(num_longs == 1) { + if (num_longs == 1) { /* no args, we use only the string */ strcpy(out_buf, curr_event->string); rc = strlen(curr_event->string); @@ -1507,22 +1425,20 @@ debug_sprintf_format_fn(debug_info_t * id, struct debug_view *view, } /* number of arguments used for sprintf (without the format string) */ - num_used_args = min(DEBUG_SPRINTF_MAX_ARGS, (num_longs - 1)); + num_used_args = min(DEBUG_SPRINTF_MAX_ARGS, (num_longs - 1)); - memset(index,0, DEBUG_SPRINTF_MAX_ARGS * sizeof(int)); + memset(index, 0, DEBUG_SPRINTF_MAX_ARGS * sizeof(int)); - for(i = 0; i < num_used_args; i++) + for (i = 0; i < num_used_args; i++) index[i] = i; - rc = sprintf(out_buf, curr_event->string, curr_event->args[index[0]], - curr_event->args[index[1]], curr_event->args[index[2]], - curr_event->args[index[3]], curr_event->args[index[4]], - curr_event->args[index[5]], curr_event->args[index[6]], - curr_event->args[index[7]], curr_event->args[index[8]], - curr_event->args[index[9]]); - + rc = sprintf(out_buf, curr_event->string, curr_event->args[index[0]], + curr_event->args[index[1]], curr_event->args[index[2]], + curr_event->args[index[3]], curr_event->args[index[4]], + curr_event->args[index[5]], curr_event->args[index[6]], + curr_event->args[index[7]], curr_event->args[index[8]], + curr_event->args[index[9]]); out: - return rc; } diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index f7e82302a71e..b811d3a8417d 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -21,52 +21,91 @@ #include <linux/reboot.h> #include <linux/kprobes.h> #include <linux/kdebug.h> - #include <linux/uaccess.h> +#include <linux/atomic.h> #include <asm/dis.h> #include <asm/io.h> -#include <linux/atomic.h> #include <asm/cpcmd.h> #include <asm/lowcore.h> #include <asm/debug.h> #include <asm/irq.h> +/* Type of operand */ +#define OPERAND_GPR 0x1 /* Operand printed as %rx */ +#define OPERAND_FPR 0x2 /* Operand printed as %fx */ +#define OPERAND_AR 0x4 /* Operand printed as %ax */ +#define OPERAND_CR 0x8 /* Operand printed as %cx */ +#define OPERAND_VR 0x10 /* Operand printed as %vx */ +#define OPERAND_DISP 0x20 /* Operand printed as displacement */ +#define OPERAND_BASE 0x40 /* Operand printed as base register */ +#define OPERAND_INDEX 0x80 /* Operand printed as index register */ +#define OPERAND_PCREL 0x100 /* Operand printed as pc-relative symbol */ +#define OPERAND_SIGNED 0x200 /* Operand printed as signed value */ +#define OPERAND_LENGTH 0x400 /* Operand printed as length (+1) */ + +struct s390_operand { + unsigned char bits; /* The number of bits in the operand. */ + unsigned char shift; /* The number of bits to shift. */ + unsigned short flags; /* One bit syntax flags. */ +}; + +struct s390_insn { + union { + const char name[5]; + struct { + unsigned char zero; + unsigned int offset; + } __packed; + }; + unsigned char opfrag; + unsigned char format; +}; + +struct s390_opcode_offset { + unsigned char opcode; + unsigned char mask; + unsigned char byte; + unsigned short offset; + unsigned short count; +} __packed; + enum { - UNUSED, /* Indicates the end of the operand list */ - R_8, /* GPR starting at position 8 */ - R_12, /* GPR starting at position 12 */ - R_16, /* GPR starting at position 16 */ - R_20, /* GPR starting at position 20 */ - R_24, /* GPR starting at position 24 */ - R_28, /* GPR starting at position 28 */ - R_32, /* GPR starting at position 32 */ - F_8, /* FPR starting at position 8 */ - F_12, /* FPR starting at position 12 */ - F_16, /* FPR starting at position 16 */ - F_20, /* FPR starting at position 16 */ - F_24, /* FPR starting at position 24 */ - F_28, /* FPR starting at position 28 */ - F_32, /* FPR starting at position 32 */ + UNUSED, A_8, /* Access reg. starting at position 8 */ A_12, /* Access reg. starting at position 12 */ A_24, /* Access reg. starting at position 24 */ A_28, /* Access reg. starting at position 28 */ - C_8, /* Control reg. starting at position 8 */ - C_12, /* Control reg. starting at position 12 */ - V_8, /* Vector reg. starting at position 8, extension bit at 36 */ - V_12, /* Vector reg. starting at position 12, extension bit at 37 */ - V_16, /* Vector reg. starting at position 16, extension bit at 38 */ - V_32, /* Vector reg. starting at position 32, extension bit at 39 */ - W_12, /* Vector reg. at bit 12, extension at bit 37, used as index */ B_16, /* Base register starting at position 16 */ B_32, /* Base register starting at position 32 */ - X_12, /* Index register starting at position 12 */ + C_8, /* Control reg. starting at position 8 */ + C_12, /* Control reg. starting at position 12 */ + D20_20, /* 20 bit displacement starting at 20 */ D_20, /* Displacement starting at position 20 */ D_36, /* Displacement starting at position 36 */ - D20_20, /* 20 bit displacement starting at 20 */ + F_8, /* FPR starting at position 8 */ + F_12, /* FPR starting at position 12 */ + F_16, /* FPR starting at position 16 */ + F_24, /* FPR starting at position 24 */ + F_28, /* FPR starting at position 28 */ + F_32, /* FPR starting at position 32 */ + I8_8, /* 8 bit signed value starting at 8 */ + I8_32, /* 8 bit signed value starting at 32 */ + I16_16, /* 16 bit signed value starting at 16 */ + I16_32, /* 16 bit signed value starting at 32 */ + I32_16, /* 32 bit signed value starting at 16 */ + J12_12, /* 12 bit PC relative offset at 12 */ + J16_16, /* 16 bit PC relative offset at 16 */ + J16_32, /* 16 bit PC relative offset at 32 */ + J24_24, /* 24 bit PC relative offset at 24 */ + J32_16, /* 32 bit PC relative offset at 16 */ L4_8, /* 4 bit length starting at position 8 */ L4_12, /* 4 bit length starting at position 12 */ L8_8, /* 8 bit length starting at position 8 */ + R_8, /* GPR starting at position 8 */ + R_12, /* GPR starting at position 12 */ + R_16, /* GPR starting at position 16 */ + R_24, /* GPR starting at position 24 */ + R_28, /* GPR starting at position 28 */ U4_8, /* 4 bit unsigned value starting at 8 */ U4_12, /* 4 bit unsigned value starting at 12 */ U4_16, /* 4 bit unsigned value starting at 16 */ @@ -78,1651 +117,226 @@ enum { U8_8, /* 8 bit unsigned value starting at 8 */ U8_16, /* 8 bit unsigned value starting at 16 */ U8_24, /* 8 bit unsigned value starting at 24 */ + U8_28, /* 8 bit unsigned value starting at 28 */ U8_32, /* 8 bit unsigned value starting at 32 */ - I8_8, /* 8 bit signed value starting at 8 */ - I8_16, /* 8 bit signed value starting at 16 */ - I8_24, /* 8 bit signed value starting at 24 */ - I8_32, /* 8 bit signed value starting at 32 */ - J12_12, /* PC relative offset at 12 */ - I16_16, /* 16 bit signed value starting at 16 */ - I16_32, /* 32 bit signed value starting at 16 */ - U16_16, /* 16 bit unsigned value starting at 16 */ - U16_32, /* 32 bit unsigned value starting at 16 */ - J16_16, /* PC relative jump offset at 16 */ - J16_32, /* PC relative offset at 16 */ - I24_24, /* 24 bit signed value starting at 24 */ - J32_16, /* PC relative long offset at 16 */ - I32_16, /* 32 bit signed value starting at 16 */ - U32_16, /* 32 bit unsigned value starting at 16 */ - M_16, /* 4 bit optional mask starting at 16 */ - M_20, /* 4 bit optional mask starting at 20 */ - M_24, /* 4 bit optional mask starting at 24 */ - M_28, /* 4 bit optional mask starting at 28 */ - M_32, /* 4 bit optional mask starting at 32 */ - RO_28, /* optional GPR starting at position 28 */ -}; - -/* - * Enumeration of the different instruction formats. - * For details consult the principles of operation. - */ -enum { - INSTR_INVALID, - INSTR_E, - INSTR_IE_UU, - INSTR_MII_UPI, - INSTR_RIE_R0IU, INSTR_RIE_R0UU, INSTR_RIE_RRP, INSTR_RIE_RRPU, - INSTR_RIE_RRUUU, INSTR_RIE_RUPI, INSTR_RIE_RUPU, INSTR_RIE_RRI0, - INSTR_RIL_RI, INSTR_RIL_RP, INSTR_RIL_RU, INSTR_RIL_UP, - INSTR_RIS_R0RDU, INSTR_RIS_R0UU, INSTR_RIS_RURDI, INSTR_RIS_RURDU, - INSTR_RI_RI, INSTR_RI_RP, INSTR_RI_RU, INSTR_RI_UP, - INSTR_RRE_00, INSTR_RRE_0R, INSTR_RRE_AA, INSTR_RRE_AR, INSTR_RRE_F0, - INSTR_RRE_FF, INSTR_RRE_FR, INSTR_RRE_R0, INSTR_RRE_RA, INSTR_RRE_RF, - INSTR_RRE_RR, INSTR_RRE_RR_OPT, - INSTR_RRF_0UFF, INSTR_RRF_F0FF, INSTR_RRF_F0FF2, INSTR_RRF_F0FR, - INSTR_RRF_FFRU, INSTR_RRF_FUFF, INSTR_RRF_FUFF2, INSTR_RRF_M0RR, - INSTR_RRF_R0RR, INSTR_RRF_R0RR2, INSTR_RRF_RMRR, INSTR_RRF_RURR, - INSTR_RRF_U0FF, INSTR_RRF_U0RF, INSTR_RRF_U0RR, INSTR_RRF_UUFF, - INSTR_RRF_UUFR, INSTR_RRF_UURF, - INSTR_RRR_F0FF, INSTR_RRS_RRRDU, - INSTR_RR_FF, INSTR_RR_R0, INSTR_RR_RR, INSTR_RR_U0, INSTR_RR_UR, - INSTR_RSE_CCRD, INSTR_RSE_RRRD, INSTR_RSE_RURD, - INSTR_RSI_RRP, - INSTR_RSL_LRDFU, INSTR_RSL_R0RD, - INSTR_RSY_AARD, INSTR_RSY_CCRD, INSTR_RSY_RRRD, INSTR_RSY_RURD, - INSTR_RSY_RDRM, INSTR_RSY_RMRD, - INSTR_RS_AARD, INSTR_RS_CCRD, INSTR_RS_R0RD, INSTR_RS_RRRD, - INSTR_RS_RURD, - INSTR_RXE_FRRD, INSTR_RXE_RRRD, INSTR_RXE_RRRDM, - INSTR_RXF_FRRDF, - INSTR_RXY_FRRD, INSTR_RXY_RRRD, INSTR_RXY_URRD, - INSTR_RX_FRRD, INSTR_RX_RRRD, INSTR_RX_URRD, - INSTR_SIL_RDI, INSTR_SIL_RDU, - INSTR_SIY_IRD, INSTR_SIY_URD, - INSTR_SI_URD, - INSTR_SMI_U0RDP, - INSTR_SSE_RDRD, - INSTR_SSF_RRDRD, INSTR_SSF_RRDRD2, - INSTR_SS_L0RDRD, INSTR_SS_LIRDRD, INSTR_SS_LLRDRD, INSTR_SS_RRRDRD, - INSTR_SS_RRRDRD2, INSTR_SS_RRRDRD3, - INSTR_S_00, INSTR_S_RD, - INSTR_VRI_V0IM, INSTR_VRI_V0I0, INSTR_VRI_V0IIM, INSTR_VRI_VVIM, - INSTR_VRI_VVV0IM, INSTR_VRI_VVV0I0, INSTR_VRI_VVIMM, - INSTR_VRR_VV00MMM, INSTR_VRR_VV000MM, INSTR_VRR_VV0000M, - INSTR_VRR_VV00000, INSTR_VRR_VVV0M0M, INSTR_VRR_VV00M0M, - INSTR_VRR_VVV000M, INSTR_VRR_VVV000V, INSTR_VRR_VVV0000, - INSTR_VRR_VVV0MMM, INSTR_VRR_VVV00MM, INSTR_VRR_VVVMM0V, - INSTR_VRR_VVVM0MV, INSTR_VRR_VVVM00V, INSTR_VRR_VRR0000, - INSTR_VRS_VVRDM, INSTR_VRS_VVRD0, INSTR_VRS_VRRDM, INSTR_VRS_VRRD0, - INSTR_VRS_RVRDM, - INSTR_VRV_VVRDM, INSTR_VRV_VWRDM, - INSTR_VRX_VRRDM, INSTR_VRX_VRRD0, + U12_16, /* 12 bit unsigned value starting at 16 */ + U16_16, /* 16 bit unsigned value starting at 16 */ + U16_32, /* 16 bit unsigned value starting at 32 */ + U32_16, /* 32 bit unsigned value starting at 16 */ + VX_12, /* Vector index register starting at position 12 */ + V_8, /* Vector reg. starting at position 8 */ + V_12, /* Vector reg. starting at position 12 */ + V_16, /* Vector reg. starting at position 16 */ + V_32, /* Vector reg. starting at position 32 */ + X_12, /* Index register starting at position 12 */ }; -static const struct s390_operand operands[] = -{ - [UNUSED] = { 0, 0, 0 }, - [R_8] = { 4, 8, OPERAND_GPR }, - [R_12] = { 4, 12, OPERAND_GPR }, - [R_16] = { 4, 16, OPERAND_GPR }, - [R_20] = { 4, 20, OPERAND_GPR }, - [R_24] = { 4, 24, OPERAND_GPR }, - [R_28] = { 4, 28, OPERAND_GPR }, - [R_32] = { 4, 32, OPERAND_GPR }, - [F_8] = { 4, 8, OPERAND_FPR }, - [F_12] = { 4, 12, OPERAND_FPR }, - [F_16] = { 4, 16, OPERAND_FPR }, - [F_20] = { 4, 16, OPERAND_FPR }, - [F_24] = { 4, 24, OPERAND_FPR }, - [F_28] = { 4, 28, OPERAND_FPR }, - [F_32] = { 4, 32, OPERAND_FPR }, +static const struct s390_operand operands[] = { + [UNUSED] = { 0, 0, 0 }, [A_8] = { 4, 8, OPERAND_AR }, [A_12] = { 4, 12, OPERAND_AR }, [A_24] = { 4, 24, OPERAND_AR }, [A_28] = { 4, 28, OPERAND_AR }, - [C_8] = { 4, 8, OPERAND_CR }, - [C_12] = { 4, 12, OPERAND_CR }, - [V_8] = { 4, 8, OPERAND_VR }, - [V_12] = { 4, 12, OPERAND_VR }, - [V_16] = { 4, 16, OPERAND_VR }, - [V_32] = { 4, 32, OPERAND_VR }, - [W_12] = { 4, 12, OPERAND_INDEX | OPERAND_VR }, [B_16] = { 4, 16, OPERAND_BASE | OPERAND_GPR }, [B_32] = { 4, 32, OPERAND_BASE | OPERAND_GPR }, - [X_12] = { 4, 12, OPERAND_INDEX | OPERAND_GPR }, + [C_8] = { 4, 8, OPERAND_CR }, + [C_12] = { 4, 12, OPERAND_CR }, + [D20_20] = { 20, 20, OPERAND_DISP | OPERAND_SIGNED }, [D_20] = { 12, 20, OPERAND_DISP }, [D_36] = { 12, 36, OPERAND_DISP }, - [D20_20] = { 20, 20, OPERAND_DISP | OPERAND_SIGNED }, + [F_8] = { 4, 8, OPERAND_FPR }, + [F_12] = { 4, 12, OPERAND_FPR }, + [F_16] = { 4, 16, OPERAND_FPR }, + [F_24] = { 4, 24, OPERAND_FPR }, + [F_28] = { 4, 28, OPERAND_FPR }, + [F_32] = { 4, 32, OPERAND_FPR }, + [I8_8] = { 8, 8, OPERAND_SIGNED }, + [I8_32] = { 8, 32, OPERAND_SIGNED }, + [I16_16] = { 16, 16, OPERAND_SIGNED }, + [I16_32] = { 16, 32, OPERAND_SIGNED }, + [I32_16] = { 32, 16, OPERAND_SIGNED }, + [J12_12] = { 12, 12, OPERAND_PCREL }, + [J16_16] = { 16, 16, OPERAND_PCREL }, + [J16_32] = { 16, 32, OPERAND_PCREL }, + [J24_24] = { 24, 24, OPERAND_PCREL }, + [J32_16] = { 32, 16, OPERAND_PCREL }, [L4_8] = { 4, 8, OPERAND_LENGTH }, - [L4_12] = { 4, 12, OPERAND_LENGTH }, + [L4_12] = { 4, 12, OPERAND_LENGTH }, [L8_8] = { 8, 8, OPERAND_LENGTH }, + [R_8] = { 4, 8, OPERAND_GPR }, + [R_12] = { 4, 12, OPERAND_GPR }, + [R_16] = { 4, 16, OPERAND_GPR }, + [R_24] = { 4, 24, OPERAND_GPR }, + [R_28] = { 4, 28, OPERAND_GPR }, [U4_8] = { 4, 8, 0 }, - [U4_12] = { 4, 12, 0 }, - [U4_16] = { 4, 16, 0 }, - [U4_20] = { 4, 20, 0 }, - [U4_24] = { 4, 24, 0 }, - [U4_28] = { 4, 28, 0 }, - [U4_32] = { 4, 32, 0 }, - [U4_36] = { 4, 36, 0 }, + [U4_12] = { 4, 12, 0 }, + [U4_16] = { 4, 16, 0 }, + [U4_20] = { 4, 20, 0 }, + [U4_24] = { 4, 24, 0 }, + [U4_28] = { 4, 28, 0 }, + [U4_32] = { 4, 32, 0 }, + [U4_36] = { 4, 36, 0 }, [U8_8] = { 8, 8, 0 }, - [U8_16] = { 8, 16, 0 }, - [U8_24] = { 8, 24, 0 }, - [U8_32] = { 8, 32, 0 }, - [J12_12] = { 12, 12, OPERAND_PCREL }, - [I8_8] = { 8, 8, OPERAND_SIGNED }, - [I8_16] = { 8, 16, OPERAND_SIGNED }, - [I8_24] = { 8, 24, OPERAND_SIGNED }, - [I8_32] = { 8, 32, OPERAND_SIGNED }, - [I16_32] = { 16, 32, OPERAND_SIGNED }, - [I16_16] = { 16, 16, OPERAND_SIGNED }, + [U8_16] = { 8, 16, 0 }, + [U8_24] = { 8, 24, 0 }, + [U8_28] = { 8, 28, 0 }, + [U8_32] = { 8, 32, 0 }, + [U12_16] = { 12, 16, 0 }, [U16_16] = { 16, 16, 0 }, [U16_32] = { 16, 32, 0 }, - [J16_16] = { 16, 16, OPERAND_PCREL }, - [J16_32] = { 16, 32, OPERAND_PCREL }, - [I24_24] = { 24, 24, OPERAND_SIGNED }, - [J32_16] = { 32, 16, OPERAND_PCREL }, - [I32_16] = { 32, 16, OPERAND_SIGNED }, [U32_16] = { 32, 16, 0 }, - [M_16] = { 4, 16, 0 }, - [M_20] = { 4, 20, 0 }, - [M_24] = { 4, 24, 0 }, - [M_28] = { 4, 28, 0 }, - [M_32] = { 4, 32, 0 }, - [RO_28] = { 4, 28, OPERAND_GPR } -}; - -static const unsigned char formats[][7] = { - [INSTR_E] = { 0xff, 0,0,0,0,0,0 }, - [INSTR_IE_UU] = { 0xff, U4_24,U4_28,0,0,0,0 }, - [INSTR_MII_UPI] = { 0xff, U4_8,J12_12,I24_24 }, - [INSTR_RIE_R0IU] = { 0xff, R_8,I16_16,U4_32,0,0,0 }, - [INSTR_RIE_R0UU] = { 0xff, R_8,U16_16,U4_32,0,0,0 }, - [INSTR_RIE_RRI0] = { 0xff, R_8,R_12,I16_16,0,0,0 }, - [INSTR_RIE_RRPU] = { 0xff, R_8,R_12,U4_32,J16_16,0,0 }, - [INSTR_RIE_RRP] = { 0xff, R_8,R_12,J16_16,0,0,0 }, - [INSTR_RIE_RRUUU] = { 0xff, R_8,R_12,U8_16,U8_24,U8_32,0 }, - [INSTR_RIE_RUPI] = { 0xff, R_8,I8_32,U4_12,J16_16,0,0 }, - [INSTR_RIE_RUPU] = { 0xff, R_8,U8_32,U4_12,J16_16,0,0 }, - [INSTR_RIL_RI] = { 0x0f, R_8,I32_16,0,0,0,0 }, - [INSTR_RIL_RP] = { 0x0f, R_8,J32_16,0,0,0,0 }, - [INSTR_RIL_RU] = { 0x0f, R_8,U32_16,0,0,0,0 }, - [INSTR_RIL_UP] = { 0x0f, U4_8,J32_16,0,0,0,0 }, - [INSTR_RIS_R0RDU] = { 0xff, R_8,U8_32,D_20,B_16,0,0 }, - [INSTR_RIS_RURDI] = { 0xff, R_8,I8_32,U4_12,D_20,B_16,0 }, - [INSTR_RIS_RURDU] = { 0xff, R_8,U8_32,U4_12,D_20,B_16,0 }, - [INSTR_RI_RI] = { 0x0f, R_8,I16_16,0,0,0,0 }, - [INSTR_RI_RP] = { 0x0f, R_8,J16_16,0,0,0,0 }, - [INSTR_RI_RU] = { 0x0f, R_8,U16_16,0,0,0,0 }, - [INSTR_RI_UP] = { 0x0f, U4_8,J16_16,0,0,0,0 }, - [INSTR_RRE_00] = { 0xff, 0,0,0,0,0,0 }, - [INSTR_RRE_0R] = { 0xff, R_28,0,0,0,0,0 }, - [INSTR_RRE_AA] = { 0xff, A_24,A_28,0,0,0,0 }, - [INSTR_RRE_AR] = { 0xff, A_24,R_28,0,0,0,0 }, - [INSTR_RRE_F0] = { 0xff, F_24,0,0,0,0,0 }, - [INSTR_RRE_FF] = { 0xff, F_24,F_28,0,0,0,0 }, - [INSTR_RRE_FR] = { 0xff, F_24,R_28,0,0,0,0 }, - [INSTR_RRE_R0] = { 0xff, R_24,0,0,0,0,0 }, - [INSTR_RRE_RA] = { 0xff, R_24,A_28,0,0,0,0 }, - [INSTR_RRE_RF] = { 0xff, R_24,F_28,0,0,0,0 }, - [INSTR_RRE_RR] = { 0xff, R_24,R_28,0,0,0,0 }, - [INSTR_RRE_RR_OPT]= { 0xff, R_24,RO_28,0,0,0,0 }, - [INSTR_RRF_0UFF] = { 0xff, F_24,F_28,U4_20,0,0,0 }, - [INSTR_RRF_F0FF2] = { 0xff, F_24,F_16,F_28,0,0,0 }, - [INSTR_RRF_F0FF] = { 0xff, F_16,F_24,F_28,0,0,0 }, - [INSTR_RRF_F0FR] = { 0xff, F_24,F_16,R_28,0,0,0 }, - [INSTR_RRF_FFRU] = { 0xff, F_24,F_16,R_28,U4_20,0,0 }, - [INSTR_RRF_FUFF] = { 0xff, F_24,F_16,F_28,U4_20,0,0 }, - [INSTR_RRF_FUFF2] = { 0xff, F_24,F_28,F_16,U4_20,0,0 }, - [INSTR_RRF_M0RR] = { 0xff, R_24,R_28,M_16,0,0,0 }, - [INSTR_RRF_R0RR] = { 0xff, R_24,R_16,R_28,0,0,0 }, - [INSTR_RRF_R0RR2] = { 0xff, R_24,R_28,R_16,0,0,0 }, - [INSTR_RRF_RMRR] = { 0xff, R_24,R_16,R_28,M_20,0,0 }, - [INSTR_RRF_RURR] = { 0xff, R_24,R_28,R_16,U4_20,0,0 }, - [INSTR_RRF_U0FF] = { 0xff, F_24,U4_16,F_28,0,0,0 }, - [INSTR_RRF_U0RF] = { 0xff, R_24,U4_16,F_28,0,0,0 }, - [INSTR_RRF_U0RR] = { 0xff, R_24,R_28,U4_16,0,0,0 }, - [INSTR_RRF_UUFF] = { 0xff, F_24,U4_16,F_28,U4_20,0,0 }, - [INSTR_RRF_UUFR] = { 0xff, F_24,U4_16,R_28,U4_20,0,0 }, - [INSTR_RRF_UURF] = { 0xff, R_24,U4_16,F_28,U4_20,0,0 }, - [INSTR_RRR_F0FF] = { 0xff, F_24,F_28,F_16,0,0,0 }, - [INSTR_RRS_RRRDU] = { 0xff, R_8,R_12,U4_32,D_20,B_16,0 }, - [INSTR_RR_FF] = { 0xff, F_8,F_12,0,0,0,0 }, - [INSTR_RR_R0] = { 0xff, R_8, 0,0,0,0,0 }, - [INSTR_RR_RR] = { 0xff, R_8,R_12,0,0,0,0 }, - [INSTR_RR_U0] = { 0xff, U8_8, 0,0,0,0,0 }, - [INSTR_RR_UR] = { 0xff, U4_8,R_12,0,0,0,0 }, - [INSTR_RSE_CCRD] = { 0xff, C_8,C_12,D_20,B_16,0,0 }, - [INSTR_RSE_RRRD] = { 0xff, R_8,R_12,D_20,B_16,0,0 }, - [INSTR_RSE_RURD] = { 0xff, R_8,U4_12,D_20,B_16,0,0 }, - [INSTR_RSI_RRP] = { 0xff, R_8,R_12,J16_16,0,0,0 }, - [INSTR_RSL_LRDFU] = { 0xff, F_32,D_20,L4_8,B_16,U4_36,0 }, - [INSTR_RSL_R0RD] = { 0xff, D_20,L4_8,B_16,0,0,0 }, - [INSTR_RSY_AARD] = { 0xff, A_8,A_12,D20_20,B_16,0,0 }, - [INSTR_RSY_CCRD] = { 0xff, C_8,C_12,D20_20,B_16,0,0 }, - [INSTR_RSY_RDRM] = { 0xff, R_8,D20_20,B_16,U4_12,0,0 }, - [INSTR_RSY_RMRD] = { 0xff, R_8,U4_12,D20_20,B_16,0,0 }, - [INSTR_RSY_RRRD] = { 0xff, R_8,R_12,D20_20,B_16,0,0 }, - [INSTR_RSY_RURD] = { 0xff, R_8,U4_12,D20_20,B_16,0,0 }, - [INSTR_RS_AARD] = { 0xff, A_8,A_12,D_20,B_16,0,0 }, - [INSTR_RS_CCRD] = { 0xff, C_8,C_12,D_20,B_16,0,0 }, - [INSTR_RS_R0RD] = { 0xff, R_8,D_20,B_16,0,0,0 }, - [INSTR_RS_RRRD] = { 0xff, R_8,R_12,D_20,B_16,0,0 }, - [INSTR_RS_RURD] = { 0xff, R_8,U4_12,D_20,B_16,0,0 }, - [INSTR_RXE_FRRD] = { 0xff, F_8,D_20,X_12,B_16,0,0 }, - [INSTR_RXE_RRRD] = { 0xff, R_8,D_20,X_12,B_16,0,0 }, - [INSTR_RXE_RRRDM] = { 0xff, R_8,D_20,X_12,B_16,M_32,0 }, - [INSTR_RXF_FRRDF] = { 0xff, F_32,F_8,D_20,X_12,B_16,0 }, - [INSTR_RXY_FRRD] = { 0xff, F_8,D20_20,X_12,B_16,0,0 }, - [INSTR_RXY_RRRD] = { 0xff, R_8,D20_20,X_12,B_16,0,0 }, - [INSTR_RXY_URRD] = { 0xff, U4_8,D20_20,X_12,B_16,0,0 }, - [INSTR_RX_FRRD] = { 0xff, F_8,D_20,X_12,B_16,0,0 }, - [INSTR_RX_RRRD] = { 0xff, R_8,D_20,X_12,B_16,0,0 }, - [INSTR_RX_URRD] = { 0xff, U4_8,D_20,X_12,B_16,0,0 }, - [INSTR_SIL_RDI] = { 0xff, D_20,B_16,I16_32,0,0,0 }, - [INSTR_SIL_RDU] = { 0xff, D_20,B_16,U16_32,0,0,0 }, - [INSTR_SIY_IRD] = { 0xff, D20_20,B_16,I8_8,0,0,0 }, - [INSTR_SIY_URD] = { 0xff, D20_20,B_16,U8_8,0,0,0 }, - [INSTR_SI_URD] = { 0xff, D_20,B_16,U8_8,0,0,0 }, - [INSTR_SMI_U0RDP] = { 0xff, U4_8,J16_32,D_20,B_16,0,0 }, - [INSTR_SSE_RDRD] = { 0xff, D_20,B_16,D_36,B_32,0,0 }, - [INSTR_SSF_RRDRD] = { 0x0f, D_20,B_16,D_36,B_32,R_8,0 }, - [INSTR_SSF_RRDRD2]= { 0x0f, R_8,D_20,B_16,D_36,B_32,0 }, - [INSTR_SS_L0RDRD] = { 0xff, D_20,L8_8,B_16,D_36,B_32,0 }, - [INSTR_SS_LIRDRD] = { 0xff, D_20,L4_8,B_16,D_36,B_32,U4_12 }, - [INSTR_SS_LLRDRD] = { 0xff, D_20,L4_8,B_16,D_36,L4_12,B_32 }, - [INSTR_SS_RRRDRD2]= { 0xff, R_8,D_20,B_16,R_12,D_36,B_32 }, - [INSTR_SS_RRRDRD3]= { 0xff, R_8,R_12,D_20,B_16,D_36,B_32 }, - [INSTR_SS_RRRDRD] = { 0xff, D_20,R_8,B_16,D_36,B_32,R_12 }, - [INSTR_S_00] = { 0xff, 0,0,0,0,0,0 }, - [INSTR_S_RD] = { 0xff, D_20,B_16,0,0,0,0 }, - [INSTR_VRI_V0IM] = { 0xff, V_8,I16_16,M_32,0,0,0 }, - [INSTR_VRI_V0I0] = { 0xff, V_8,I16_16,0,0,0,0 }, - [INSTR_VRI_V0IIM] = { 0xff, V_8,I8_16,I8_24,M_32,0,0 }, - [INSTR_VRI_VVIM] = { 0xff, V_8,I16_16,V_12,M_32,0,0 }, - [INSTR_VRI_VVV0IM]= { 0xff, V_8,V_12,V_16,I8_24,M_32,0 }, - [INSTR_VRI_VVV0I0]= { 0xff, V_8,V_12,V_16,I8_24,0,0 }, - [INSTR_VRI_VVIMM] = { 0xff, V_8,V_12,I16_16,M_32,M_28,0 }, - [INSTR_VRR_VV00MMM]={ 0xff, V_8,V_12,M_32,M_28,M_24,0 }, - [INSTR_VRR_VV000MM]={ 0xff, V_8,V_12,M_32,M_28,0,0 }, - [INSTR_VRR_VV0000M]={ 0xff, V_8,V_12,M_32,0,0,0 }, - [INSTR_VRR_VV00000]={ 0xff, V_8,V_12,0,0,0,0 }, - [INSTR_VRR_VVV0M0M]={ 0xff, V_8,V_12,V_16,M_32,M_24,0 }, - [INSTR_VRR_VV00M0M]={ 0xff, V_8,V_12,M_32,M_24,0,0 }, - [INSTR_VRR_VVV000M]={ 0xff, V_8,V_12,V_16,M_32,0,0 }, - [INSTR_VRR_VVV000V]={ 0xff, V_8,V_12,V_16,V_32,0,0 }, - [INSTR_VRR_VVV0000]={ 0xff, V_8,V_12,V_16,0,0,0 }, - [INSTR_VRR_VVV0MMM]={ 0xff, V_8,V_12,V_16,M_32,M_28,M_24 }, - [INSTR_VRR_VVV00MM]={ 0xff, V_8,V_12,V_16,M_32,M_28,0 }, - [INSTR_VRR_VVVMM0V]={ 0xff, V_8,V_12,V_16,V_32,M_20,M_24 }, - [INSTR_VRR_VVVM0MV]={ 0xff, V_8,V_12,V_16,V_32,M_28,M_20 }, - [INSTR_VRR_VVVM00V]={ 0xff, V_8,V_12,V_16,V_32,M_20,0 }, - [INSTR_VRR_VRR0000]={ 0xff, V_8,R_12,R_16,0,0,0 }, - [INSTR_VRS_VVRDM] = { 0xff, V_8,V_12,D_20,B_16,M_32,0 }, - [INSTR_VRS_VVRD0] = { 0xff, V_8,V_12,D_20,B_16,0,0 }, - [INSTR_VRS_VRRDM] = { 0xff, V_8,R_12,D_20,B_16,M_32,0 }, - [INSTR_VRS_VRRD0] = { 0xff, V_8,R_12,D_20,B_16,0,0 }, - [INSTR_VRS_RVRDM] = { 0xff, R_8,V_12,D_20,B_16,M_32,0 }, - [INSTR_VRV_VVRDM] = { 0xff, V_8,V_12,D_20,B_16,M_32,0 }, - [INSTR_VRV_VWRDM] = { 0xff, V_8,D_20,W_12,B_16,M_32,0 }, - [INSTR_VRX_VRRDM] = { 0xff, V_8,D_20,X_12,B_16,M_32,0 }, - [INSTR_VRX_VRRD0] = { 0xff, V_8,D_20,X_12,B_16,0,0 }, -}; - -enum { - LONG_INSN_ALGHSIK, - LONG_INSN_ALHHHR, - LONG_INSN_ALHHLR, - LONG_INSN_ALHSIK, - LONG_INSN_ALSIHN, - LONG_INSN_CDFBRA, - LONG_INSN_CDGBRA, - LONG_INSN_CDGTRA, - LONG_INSN_CDLFBR, - LONG_INSN_CDLFTR, - LONG_INSN_CDLGBR, - LONG_INSN_CDLGTR, - LONG_INSN_CEFBRA, - LONG_INSN_CEGBRA, - LONG_INSN_CELFBR, - LONG_INSN_CELGBR, - LONG_INSN_CFDBRA, - LONG_INSN_CFEBRA, - LONG_INSN_CFXBRA, - LONG_INSN_CGDBRA, - LONG_INSN_CGDTRA, - LONG_INSN_CGEBRA, - LONG_INSN_CGXBRA, - LONG_INSN_CGXTRA, - LONG_INSN_CLFDBR, - LONG_INSN_CLFDTR, - LONG_INSN_CLFEBR, - LONG_INSN_CLFHSI, - LONG_INSN_CLFXBR, - LONG_INSN_CLFXTR, - LONG_INSN_CLGDBR, - LONG_INSN_CLGDTR, - LONG_INSN_CLGEBR, - LONG_INSN_CLGFRL, - LONG_INSN_CLGHRL, - LONG_INSN_CLGHSI, - LONG_INSN_CLGXBR, - LONG_INSN_CLGXTR, - LONG_INSN_CLHHSI, - LONG_INSN_CXFBRA, - LONG_INSN_CXGBRA, - LONG_INSN_CXGTRA, - LONG_INSN_CXLFBR, - LONG_INSN_CXLFTR, - LONG_INSN_CXLGBR, - LONG_INSN_CXLGTR, - LONG_INSN_FIDBRA, - LONG_INSN_FIEBRA, - LONG_INSN_FIXBRA, - LONG_INSN_LDXBRA, - LONG_INSN_LEDBRA, - LONG_INSN_LEXBRA, - LONG_INSN_LLGFAT, - LONG_INSN_LLGFRL, - LONG_INSN_LLGHRL, - LONG_INSN_LLGTAT, - LONG_INSN_POPCNT, - LONG_INSN_RIEMIT, - LONG_INSN_RINEXT, - LONG_INSN_RISBGN, - LONG_INSN_RISBHG, - LONG_INSN_RISBLG, - LONG_INSN_SLHHHR, - LONG_INSN_SLHHLR, - LONG_INSN_TABORT, - LONG_INSN_TBEGIN, - LONG_INSN_TBEGINC, - LONG_INSN_PCISTG, - LONG_INSN_MPCIFC, - LONG_INSN_STPCIFC, - LONG_INSN_PCISTB, - LONG_INSN_VPOPCT, - LONG_INSN_VERLLV, - LONG_INSN_VESRAV, - LONG_INSN_VESRLV, - LONG_INSN_VSBCBI, - LONG_INSN_STCCTM -}; - -static char *long_insn_name[] = { - [LONG_INSN_ALGHSIK] = "alghsik", - [LONG_INSN_ALHHHR] = "alhhhr", - [LONG_INSN_ALHHLR] = "alhhlr", - [LONG_INSN_ALHSIK] = "alhsik", - [LONG_INSN_ALSIHN] = "alsihn", - [LONG_INSN_CDFBRA] = "cdfbra", - [LONG_INSN_CDGBRA] = "cdgbra", - [LONG_INSN_CDGTRA] = "cdgtra", - [LONG_INSN_CDLFBR] = "cdlfbr", - [LONG_INSN_CDLFTR] = "cdlftr", - [LONG_INSN_CDLGBR] = "cdlgbr", - [LONG_INSN_CDLGTR] = "cdlgtr", - [LONG_INSN_CEFBRA] = "cefbra", - [LONG_INSN_CEGBRA] = "cegbra", - [LONG_INSN_CELFBR] = "celfbr", - [LONG_INSN_CELGBR] = "celgbr", - [LONG_INSN_CFDBRA] = "cfdbra", - [LONG_INSN_CFEBRA] = "cfebra", - [LONG_INSN_CFXBRA] = "cfxbra", - [LONG_INSN_CGDBRA] = "cgdbra", - [LONG_INSN_CGDTRA] = "cgdtra", - [LONG_INSN_CGEBRA] = "cgebra", - [LONG_INSN_CGXBRA] = "cgxbra", - [LONG_INSN_CGXTRA] = "cgxtra", - [LONG_INSN_CLFDBR] = "clfdbr", - [LONG_INSN_CLFDTR] = "clfdtr", - [LONG_INSN_CLFEBR] = "clfebr", - [LONG_INSN_CLFHSI] = "clfhsi", - [LONG_INSN_CLFXBR] = "clfxbr", - [LONG_INSN_CLFXTR] = "clfxtr", - [LONG_INSN_CLGDBR] = "clgdbr", - [LONG_INSN_CLGDTR] = "clgdtr", - [LONG_INSN_CLGEBR] = "clgebr", - [LONG_INSN_CLGFRL] = "clgfrl", - [LONG_INSN_CLGHRL] = "clghrl", - [LONG_INSN_CLGHSI] = "clghsi", - [LONG_INSN_CLGXBR] = "clgxbr", - [LONG_INSN_CLGXTR] = "clgxtr", - [LONG_INSN_CLHHSI] = "clhhsi", - [LONG_INSN_CXFBRA] = "cxfbra", - [LONG_INSN_CXGBRA] = "cxgbra", - [LONG_INSN_CXGTRA] = "cxgtra", - [LONG_INSN_CXLFBR] = "cxlfbr", - [LONG_INSN_CXLFTR] = "cxlftr", - [LONG_INSN_CXLGBR] = "cxlgbr", - [LONG_INSN_CXLGTR] = "cxlgtr", - [LONG_INSN_FIDBRA] = "fidbra", - [LONG_INSN_FIEBRA] = "fiebra", - [LONG_INSN_FIXBRA] = "fixbra", - [LONG_INSN_LDXBRA] = "ldxbra", - [LONG_INSN_LEDBRA] = "ledbra", - [LONG_INSN_LEXBRA] = "lexbra", - [LONG_INSN_LLGFAT] = "llgfat", - [LONG_INSN_LLGFRL] = "llgfrl", - [LONG_INSN_LLGHRL] = "llghrl", - [LONG_INSN_LLGTAT] = "llgtat", - [LONG_INSN_POPCNT] = "popcnt", - [LONG_INSN_RIEMIT] = "riemit", - [LONG_INSN_RINEXT] = "rinext", - [LONG_INSN_RISBGN] = "risbgn", - [LONG_INSN_RISBHG] = "risbhg", - [LONG_INSN_RISBLG] = "risblg", - [LONG_INSN_SLHHHR] = "slhhhr", - [LONG_INSN_SLHHLR] = "slhhlr", - [LONG_INSN_TABORT] = "tabort", - [LONG_INSN_TBEGIN] = "tbegin", - [LONG_INSN_TBEGINC] = "tbeginc", - [LONG_INSN_PCISTG] = "pcistg", - [LONG_INSN_MPCIFC] = "mpcifc", - [LONG_INSN_STPCIFC] = "stpcifc", - [LONG_INSN_PCISTB] = "pcistb", - [LONG_INSN_VPOPCT] = "vpopct", - [LONG_INSN_VERLLV] = "verllv", - [LONG_INSN_VESRAV] = "vesrav", - [LONG_INSN_VESRLV] = "vesrlv", - [LONG_INSN_VSBCBI] = "vsbcbi", - [LONG_INSN_STCCTM] = "stcctm", -}; - -static struct s390_insn opcode[] = { - { "bprp", 0xc5, INSTR_MII_UPI }, - { "bpp", 0xc7, INSTR_SMI_U0RDP }, - { "trtr", 0xd0, INSTR_SS_L0RDRD }, - { "lmd", 0xef, INSTR_SS_RRRDRD3 }, - { "spm", 0x04, INSTR_RR_R0 }, - { "balr", 0x05, INSTR_RR_RR }, - { "bctr", 0x06, INSTR_RR_RR }, - { "bcr", 0x07, INSTR_RR_UR }, - { "svc", 0x0a, INSTR_RR_U0 }, - { "bsm", 0x0b, INSTR_RR_RR }, - { "bassm", 0x0c, INSTR_RR_RR }, - { "basr", 0x0d, INSTR_RR_RR }, - { "mvcl", 0x0e, INSTR_RR_RR }, - { "clcl", 0x0f, INSTR_RR_RR }, - { "lpr", 0x10, INSTR_RR_RR }, - { "lnr", 0x11, INSTR_RR_RR }, - { "ltr", 0x12, INSTR_RR_RR }, - { "lcr", 0x13, INSTR_RR_RR }, - { "nr", 0x14, INSTR_RR_RR }, - { "clr", 0x15, INSTR_RR_RR }, - { "or", 0x16, INSTR_RR_RR }, - { "xr", 0x17, INSTR_RR_RR }, - { "lr", 0x18, INSTR_RR_RR }, - { "cr", 0x19, INSTR_RR_RR }, - { "ar", 0x1a, INSTR_RR_RR }, - { "sr", 0x1b, INSTR_RR_RR }, - { "mr", 0x1c, INSTR_RR_RR }, - { "dr", 0x1d, INSTR_RR_RR }, - { "alr", 0x1e, INSTR_RR_RR }, - { "slr", 0x1f, INSTR_RR_RR }, - { "lpdr", 0x20, INSTR_RR_FF }, - { "lndr", 0x21, INSTR_RR_FF }, - { "ltdr", 0x22, INSTR_RR_FF }, - { "lcdr", 0x23, INSTR_RR_FF }, - { "hdr", 0x24, INSTR_RR_FF }, - { "ldxr", 0x25, INSTR_RR_FF }, - { "mxr", 0x26, INSTR_RR_FF }, - { "mxdr", 0x27, INSTR_RR_FF }, - { "ldr", 0x28, INSTR_RR_FF }, - { "cdr", 0x29, INSTR_RR_FF }, - { "adr", 0x2a, INSTR_RR_FF }, - { "sdr", 0x2b, INSTR_RR_FF }, - { "mdr", 0x2c, INSTR_RR_FF }, - { "ddr", 0x2d, INSTR_RR_FF }, - { "awr", 0x2e, INSTR_RR_FF }, - { "swr", 0x2f, INSTR_RR_FF }, - { "lper", 0x30, INSTR_RR_FF }, - { "lner", 0x31, INSTR_RR_FF }, - { "lter", 0x32, INSTR_RR_FF }, - { "lcer", 0x33, INSTR_RR_FF }, - { "her", 0x34, INSTR_RR_FF }, - { "ledr", 0x35, INSTR_RR_FF }, - { "axr", 0x36, INSTR_RR_FF }, - { "sxr", 0x37, INSTR_RR_FF }, - { "ler", 0x38, INSTR_RR_FF }, - { "cer", 0x39, INSTR_RR_FF }, - { "aer", 0x3a, INSTR_RR_FF }, - { "ser", 0x3b, INSTR_RR_FF }, - { "mder", 0x3c, INSTR_RR_FF }, - { "der", 0x3d, INSTR_RR_FF }, - { "aur", 0x3e, INSTR_RR_FF }, - { "sur", 0x3f, INSTR_RR_FF }, - { "sth", 0x40, INSTR_RX_RRRD }, - { "la", 0x41, INSTR_RX_RRRD }, - { "stc", 0x42, INSTR_RX_RRRD }, - { "ic", 0x43, INSTR_RX_RRRD }, - { "ex", 0x44, INSTR_RX_RRRD }, - { "bal", 0x45, INSTR_RX_RRRD }, - { "bct", 0x46, INSTR_RX_RRRD }, - { "bc", 0x47, INSTR_RX_URRD }, - { "lh", 0x48, INSTR_RX_RRRD }, - { "ch", 0x49, INSTR_RX_RRRD }, - { "ah", 0x4a, INSTR_RX_RRRD }, - { "sh", 0x4b, INSTR_RX_RRRD }, - { "mh", 0x4c, INSTR_RX_RRRD }, - { "bas", 0x4d, INSTR_RX_RRRD }, - { "cvd", 0x4e, INSTR_RX_RRRD }, - { "cvb", 0x4f, INSTR_RX_RRRD }, - { "st", 0x50, INSTR_RX_RRRD }, - { "lae", 0x51, INSTR_RX_RRRD }, - { "n", 0x54, INSTR_RX_RRRD }, - { "cl", 0x55, INSTR_RX_RRRD }, - { "o", 0x56, INSTR_RX_RRRD }, - { "x", 0x57, INSTR_RX_RRRD }, - { "l", 0x58, INSTR_RX_RRRD }, - { "c", 0x59, INSTR_RX_RRRD }, - { "a", 0x5a, INSTR_RX_RRRD }, - { "s", 0x5b, INSTR_RX_RRRD }, - { "m", 0x5c, INSTR_RX_RRRD }, - { "d", 0x5d, INSTR_RX_RRRD }, - { "al", 0x5e, INSTR_RX_RRRD }, - { "sl", 0x5f, INSTR_RX_RRRD }, - { "std", 0x60, INSTR_RX_FRRD }, - { "mxd", 0x67, INSTR_RX_FRRD }, - { "ld", 0x68, INSTR_RX_FRRD }, - { "cd", 0x69, INSTR_RX_FRRD }, - { "ad", 0x6a, INSTR_RX_FRRD }, - { "sd", 0x6b, INSTR_RX_FRRD }, - { "md", 0x6c, INSTR_RX_FRRD }, - { "dd", 0x6d, INSTR_RX_FRRD }, - { "aw", 0x6e, INSTR_RX_FRRD }, - { "sw", 0x6f, INSTR_RX_FRRD }, - { "ste", 0x70, INSTR_RX_FRRD }, - { "ms", 0x71, INSTR_RX_RRRD }, - { "le", 0x78, INSTR_RX_FRRD }, - { "ce", 0x79, INSTR_RX_FRRD }, - { "ae", 0x7a, INSTR_RX_FRRD }, - { "se", 0x7b, INSTR_RX_FRRD }, - { "mde", 0x7c, INSTR_RX_FRRD }, - { "de", 0x7d, INSTR_RX_FRRD }, - { "au", 0x7e, INSTR_RX_FRRD }, - { "su", 0x7f, INSTR_RX_FRRD }, - { "ssm", 0x80, INSTR_S_RD }, - { "lpsw", 0x82, INSTR_S_RD }, - { "diag", 0x83, INSTR_RS_RRRD }, - { "brxh", 0x84, INSTR_RSI_RRP }, - { "brxle", 0x85, INSTR_RSI_RRP }, - { "bxh", 0x86, INSTR_RS_RRRD }, - { "bxle", 0x87, INSTR_RS_RRRD }, - { "srl", 0x88, INSTR_RS_R0RD }, - { "sll", 0x89, INSTR_RS_R0RD }, - { "sra", 0x8a, INSTR_RS_R0RD }, - { "sla", 0x8b, INSTR_RS_R0RD }, - { "srdl", 0x8c, INSTR_RS_R0RD }, - { "sldl", 0x8d, INSTR_RS_R0RD }, - { "srda", 0x8e, INSTR_RS_R0RD }, - { "slda", 0x8f, INSTR_RS_R0RD }, - { "stm", 0x90, INSTR_RS_RRRD }, - { "tm", 0x91, INSTR_SI_URD }, - { "mvi", 0x92, INSTR_SI_URD }, - { "ts", 0x93, INSTR_S_RD }, - { "ni", 0x94, INSTR_SI_URD }, - { "cli", 0x95, INSTR_SI_URD }, - { "oi", 0x96, INSTR_SI_URD }, - { "xi", 0x97, INSTR_SI_URD }, - { "lm", 0x98, INSTR_RS_RRRD }, - { "trace", 0x99, INSTR_RS_RRRD }, - { "lam", 0x9a, INSTR_RS_AARD }, - { "stam", 0x9b, INSTR_RS_AARD }, - { "mvcle", 0xa8, INSTR_RS_RRRD }, - { "clcle", 0xa9, INSTR_RS_RRRD }, - { "stnsm", 0xac, INSTR_SI_URD }, - { "stosm", 0xad, INSTR_SI_URD }, - { "sigp", 0xae, INSTR_RS_RRRD }, - { "mc", 0xaf, INSTR_SI_URD }, - { "lra", 0xb1, INSTR_RX_RRRD }, - { "stctl", 0xb6, INSTR_RS_CCRD }, - { "lctl", 0xb7, INSTR_RS_CCRD }, - { "cs", 0xba, INSTR_RS_RRRD }, - { "cds", 0xbb, INSTR_RS_RRRD }, - { "clm", 0xbd, INSTR_RS_RURD }, - { "stcm", 0xbe, INSTR_RS_RURD }, - { "icm", 0xbf, INSTR_RS_RURD }, - { "mvn", 0xd1, INSTR_SS_L0RDRD }, - { "mvc", 0xd2, INSTR_SS_L0RDRD }, - { "mvz", 0xd3, INSTR_SS_L0RDRD }, - { "nc", 0xd4, INSTR_SS_L0RDRD }, - { "clc", 0xd5, INSTR_SS_L0RDRD }, - { "oc", 0xd6, INSTR_SS_L0RDRD }, - { "xc", 0xd7, INSTR_SS_L0RDRD }, - { "mvck", 0xd9, INSTR_SS_RRRDRD }, - { "mvcp", 0xda, INSTR_SS_RRRDRD }, - { "mvcs", 0xdb, INSTR_SS_RRRDRD }, - { "tr", 0xdc, INSTR_SS_L0RDRD }, - { "trt", 0xdd, INSTR_SS_L0RDRD }, - { "ed", 0xde, INSTR_SS_L0RDRD }, - { "edmk", 0xdf, INSTR_SS_L0RDRD }, - { "pku", 0xe1, INSTR_SS_L0RDRD }, - { "unpku", 0xe2, INSTR_SS_L0RDRD }, - { "mvcin", 0xe8, INSTR_SS_L0RDRD }, - { "pka", 0xe9, INSTR_SS_L0RDRD }, - { "unpka", 0xea, INSTR_SS_L0RDRD }, - { "plo", 0xee, INSTR_SS_RRRDRD2 }, - { "srp", 0xf0, INSTR_SS_LIRDRD }, - { "mvo", 0xf1, INSTR_SS_LLRDRD }, - { "pack", 0xf2, INSTR_SS_LLRDRD }, - { "unpk", 0xf3, INSTR_SS_LLRDRD }, - { "zap", 0xf8, INSTR_SS_LLRDRD }, - { "cp", 0xf9, INSTR_SS_LLRDRD }, - { "ap", 0xfa, INSTR_SS_LLRDRD }, - { "sp", 0xfb, INSTR_SS_LLRDRD }, - { "mp", 0xfc, INSTR_SS_LLRDRD }, - { "dp", 0xfd, INSTR_SS_LLRDRD }, - { "", 0, INSTR_INVALID } -}; - -static struct s390_insn opcode_01[] = { - { "ptff", 0x04, INSTR_E }, - { "pfpo", 0x0a, INSTR_E }, - { "sam64", 0x0e, INSTR_E }, - { "pr", 0x01, INSTR_E }, - { "upt", 0x02, INSTR_E }, - { "sckpf", 0x07, INSTR_E }, - { "tam", 0x0b, INSTR_E }, - { "sam24", 0x0c, INSTR_E }, - { "sam31", 0x0d, INSTR_E }, - { "trap2", 0xff, INSTR_E }, - { "", 0, INSTR_INVALID } -}; - -static struct s390_insn opcode_a5[] = { - { "iihh", 0x00, INSTR_RI_RU }, - { "iihl", 0x01, INSTR_RI_RU }, - { "iilh", 0x02, INSTR_RI_RU }, - { "iill", 0x03, INSTR_RI_RU }, - { "nihh", 0x04, INSTR_RI_RU }, - { "nihl", 0x05, INSTR_RI_RU }, - { "nilh", 0x06, INSTR_RI_RU }, - { "nill", 0x07, INSTR_RI_RU }, - { "oihh", 0x08, INSTR_RI_RU }, - { "oihl", 0x09, INSTR_RI_RU }, - { "oilh", 0x0a, INSTR_RI_RU }, - { "oill", 0x0b, INSTR_RI_RU }, - { "llihh", 0x0c, INSTR_RI_RU }, - { "llihl", 0x0d, INSTR_RI_RU }, - { "llilh", 0x0e, INSTR_RI_RU }, - { "llill", 0x0f, INSTR_RI_RU }, - { "", 0, INSTR_INVALID } -}; - -static struct s390_insn opcode_a7[] = { - { "tmhh", 0x02, INSTR_RI_RU }, - { "tmhl", 0x03, INSTR_RI_RU }, - { "brctg", 0x07, INSTR_RI_RP }, - { "lghi", 0x09, INSTR_RI_RI }, - { "aghi", 0x0b, INSTR_RI_RI }, - { "mghi", 0x0d, INSTR_RI_RI }, - { "cghi", 0x0f, INSTR_RI_RI }, - { "tmlh", 0x00, INSTR_RI_RU }, - { "tmll", 0x01, INSTR_RI_RU }, - { "brc", 0x04, INSTR_RI_UP }, - { "bras", 0x05, INSTR_RI_RP }, - { "brct", 0x06, INSTR_RI_RP }, - { "lhi", 0x08, INSTR_RI_RI }, - { "ahi", 0x0a, INSTR_RI_RI }, - { "mhi", 0x0c, INSTR_RI_RI }, - { "chi", 0x0e, INSTR_RI_RI }, - { "", 0, INSTR_INVALID } -}; - -static struct s390_insn opcode_aa[] = { - { { 0, LONG_INSN_RINEXT }, 0x00, INSTR_RI_RI }, - { "rion", 0x01, INSTR_RI_RI }, - { "tric", 0x02, INSTR_RI_RI }, - { "rioff", 0x03, INSTR_RI_RI }, - { { 0, LONG_INSN_RIEMIT }, 0x04, INSTR_RI_RI }, - { "", 0, INSTR_INVALID } -}; - -static struct s390_insn opcode_b2[] = { - { "stckf", 0x7c, INSTR_S_RD }, - { "lpp", 0x80, INSTR_S_RD }, - { "lcctl", 0x84, INSTR_S_RD }, - { "lpctl", 0x85, INSTR_S_RD }, - { "qsi", 0x86, INSTR_S_RD }, - { "lsctl", 0x87, INSTR_S_RD }, - { "qctri", 0x8e, INSTR_S_RD }, - { "stfle", 0xb0, INSTR_S_RD }, - { "lpswe", 0xb2, INSTR_S_RD }, - { "srnmb", 0xb8, INSTR_S_RD }, - { "srnmt", 0xb9, INSTR_S_RD }, - { "lfas", 0xbd, INSTR_S_RD }, - { "scctr", 0xe0, INSTR_RRE_RR }, - { "spctr", 0xe1, INSTR_RRE_RR }, - { "ecctr", 0xe4, INSTR_RRE_RR }, - { "epctr", 0xe5, INSTR_RRE_RR }, - { "ppa", 0xe8, INSTR_RRF_U0RR }, - { "etnd", 0xec, INSTR_RRE_R0 }, - { "ecpga", 0xed, INSTR_RRE_RR }, - { "tend", 0xf8, INSTR_S_00 }, - { "niai", 0xfa, INSTR_IE_UU }, - { { 0, LONG_INSN_TABORT }, 0xfc, INSTR_S_RD }, - { "stidp", 0x02, INSTR_S_RD }, - { "sck", 0x04, INSTR_S_RD }, - { "stck", 0x05, INSTR_S_RD }, - { "sckc", 0x06, INSTR_S_RD }, - { "stckc", 0x07, INSTR_S_RD }, - { "spt", 0x08, INSTR_S_RD }, - { "stpt", 0x09, INSTR_S_RD }, - { "spka", 0x0a, INSTR_S_RD }, - { "ipk", 0x0b, INSTR_S_00 }, - { "ptlb", 0x0d, INSTR_S_00 }, - { "spx", 0x10, INSTR_S_RD }, - { "stpx", 0x11, INSTR_S_RD }, - { "stap", 0x12, INSTR_S_RD }, - { "sie", 0x14, INSTR_S_RD }, - { "pc", 0x18, INSTR_S_RD }, - { "sac", 0x19, INSTR_S_RD }, - { "cfc", 0x1a, INSTR_S_RD }, - { "servc", 0x20, INSTR_RRE_RR }, - { "ipte", 0x21, INSTR_RRE_RR }, - { "ipm", 0x22, INSTR_RRE_R0 }, - { "ivsk", 0x23, INSTR_RRE_RR }, - { "iac", 0x24, INSTR_RRE_R0 }, - { "ssar", 0x25, INSTR_RRE_R0 }, - { "epar", 0x26, INSTR_RRE_R0 }, - { "esar", 0x27, INSTR_RRE_R0 }, - { "pt", 0x28, INSTR_RRE_RR }, - { "iske", 0x29, INSTR_RRE_RR }, - { "rrbe", 0x2a, INSTR_RRE_RR }, - { "sske", 0x2b, INSTR_RRF_M0RR }, - { "tb", 0x2c, INSTR_RRE_0R }, - { "dxr", 0x2d, INSTR_RRE_FF }, - { "pgin", 0x2e, INSTR_RRE_RR }, - { "pgout", 0x2f, INSTR_RRE_RR }, - { "csch", 0x30, INSTR_S_00 }, - { "hsch", 0x31, INSTR_S_00 }, - { "msch", 0x32, INSTR_S_RD }, - { "ssch", 0x33, INSTR_S_RD }, - { "stsch", 0x34, INSTR_S_RD }, - { "tsch", 0x35, INSTR_S_RD }, - { "tpi", 0x36, INSTR_S_RD }, - { "sal", 0x37, INSTR_S_00 }, - { "rsch", 0x38, INSTR_S_00 }, - { "stcrw", 0x39, INSTR_S_RD }, - { "stcps", 0x3a, INSTR_S_RD }, - { "rchp", 0x3b, INSTR_S_00 }, - { "schm", 0x3c, INSTR_S_00 }, - { "bakr", 0x40, INSTR_RRE_RR }, - { "cksm", 0x41, INSTR_RRE_RR }, - { "sqdr", 0x44, INSTR_RRE_FF }, - { "sqer", 0x45, INSTR_RRE_FF }, - { "stura", 0x46, INSTR_RRE_RR }, - { "msta", 0x47, INSTR_RRE_R0 }, - { "palb", 0x48, INSTR_RRE_00 }, - { "ereg", 0x49, INSTR_RRE_RR }, - { "esta", 0x4a, INSTR_RRE_RR }, - { "lura", 0x4b, INSTR_RRE_RR }, - { "tar", 0x4c, INSTR_RRE_AR }, - { "cpya", 0x4d, INSTR_RRE_AA }, - { "sar", 0x4e, INSTR_RRE_AR }, - { "ear", 0x4f, INSTR_RRE_RA }, - { "csp", 0x50, INSTR_RRE_RR }, - { "msr", 0x52, INSTR_RRE_RR }, - { "mvpg", 0x54, INSTR_RRE_RR }, - { "mvst", 0x55, INSTR_RRE_RR }, - { "cuse", 0x57, INSTR_RRE_RR }, - { "bsg", 0x58, INSTR_RRE_RR }, - { "bsa", 0x5a, INSTR_RRE_RR }, - { "clst", 0x5d, INSTR_RRE_RR }, - { "srst", 0x5e, INSTR_RRE_RR }, - { "cmpsc", 0x63, INSTR_RRE_RR }, - { "siga", 0x74, INSTR_S_RD }, - { "xsch", 0x76, INSTR_S_00 }, - { "rp", 0x77, INSTR_S_RD }, - { "stcke", 0x78, INSTR_S_RD }, - { "sacf", 0x79, INSTR_S_RD }, - { "stsi", 0x7d, INSTR_S_RD }, - { "srnm", 0x99, INSTR_S_RD }, - { "stfpc", 0x9c, INSTR_S_RD }, - { "lfpc", 0x9d, INSTR_S_RD }, - { "tre", 0xa5, INSTR_RRE_RR }, - { "cuutf", 0xa6, INSTR_RRF_M0RR }, - { "cutfu", 0xa7, INSTR_RRF_M0RR }, - { "stfl", 0xb1, INSTR_S_RD }, - { "trap4", 0xff, INSTR_S_RD }, - { "", 0, INSTR_INVALID } -}; - -static struct s390_insn opcode_b3[] = { - { "maylr", 0x38, INSTR_RRF_F0FF }, - { "mylr", 0x39, INSTR_RRF_F0FF }, - { "mayr", 0x3a, INSTR_RRF_F0FF }, - { "myr", 0x3b, INSTR_RRF_F0FF }, - { "mayhr", 0x3c, INSTR_RRF_F0FF }, - { "myhr", 0x3d, INSTR_RRF_F0FF }, - { "lpdfr", 0x70, INSTR_RRE_FF }, - { "lndfr", 0x71, INSTR_RRE_FF }, - { "cpsdr", 0x72, INSTR_RRF_F0FF2 }, - { "lcdfr", 0x73, INSTR_RRE_FF }, - { "sfasr", 0x85, INSTR_RRE_R0 }, - { { 0, LONG_INSN_CELFBR }, 0x90, INSTR_RRF_UUFR }, - { { 0, LONG_INSN_CDLFBR }, 0x91, INSTR_RRF_UUFR }, - { { 0, LONG_INSN_CXLFBR }, 0x92, INSTR_RRF_UURF }, - { { 0, LONG_INSN_CEFBRA }, 0x94, INSTR_RRF_UUFR }, - { { 0, LONG_INSN_CDFBRA }, 0x95, INSTR_RRF_UUFR }, - { { 0, LONG_INSN_CXFBRA }, 0x96, INSTR_RRF_UURF }, - { { 0, LONG_INSN_CFEBRA }, 0x98, INSTR_RRF_UURF }, - { { 0, LONG_INSN_CFDBRA }, 0x99, INSTR_RRF_UURF }, - { { 0, LONG_INSN_CFXBRA }, 0x9a, INSTR_RRF_UUFR }, - { { 0, LONG_INSN_CLFEBR }, 0x9c, INSTR_RRF_UURF }, - { { 0, LONG_INSN_CLFDBR }, 0x9d, INSTR_RRF_UURF }, - { { 0, LONG_INSN_CLFXBR }, 0x9e, INSTR_RRF_UUFR }, - { { 0, LONG_INSN_CELGBR }, 0xa0, INSTR_RRF_UUFR }, - { { 0, LONG_INSN_CDLGBR }, 0xa1, INSTR_RRF_UUFR }, - { { 0, LONG_INSN_CXLGBR }, 0xa2, INSTR_RRF_UURF }, - { { 0, LONG_INSN_CEGBRA }, 0xa4, INSTR_RRF_UUFR }, - { { 0, LONG_INSN_CDGBRA }, 0xa5, INSTR_RRF_UUFR }, - { { 0, LONG_INSN_CXGBRA }, 0xa6, INSTR_RRF_UURF }, - { { 0, LONG_INSN_CGEBRA }, 0xa8, INSTR_RRF_UURF }, - { { 0, LONG_INSN_CGDBRA }, 0xa9, INSTR_RRF_UURF }, - { { 0, LONG_INSN_CGXBRA }, 0xaa, INSTR_RRF_UUFR }, - { { 0, LONG_INSN_CLGEBR }, 0xac, INSTR_RRF_UURF }, - { { 0, LONG_INSN_CLGDBR }, 0xad, INSTR_RRF_UURF }, - { { 0, LONG_INSN_CLGXBR }, 0xae, INSTR_RRF_UUFR }, - { "ldgr", 0xc1, INSTR_RRE_FR }, - { "cegr", 0xc4, INSTR_RRE_FR }, - { "cdgr", 0xc5, INSTR_RRE_FR }, - { "cxgr", 0xc6, INSTR_RRE_FR }, - { "cger", 0xc8, INSTR_RRF_U0RF }, - { "cgdr", 0xc9, INSTR_RRF_U0RF }, - { "cgxr", 0xca, INSTR_RRF_U0RF }, - { "lgdr", 0xcd, INSTR_RRE_RF }, - { "mdtra", 0xd0, INSTR_RRF_FUFF2 }, - { "ddtra", 0xd1, INSTR_RRF_FUFF2 }, - { "adtra", 0xd2, INSTR_RRF_FUFF2 }, - { "sdtra", 0xd3, INSTR_RRF_FUFF2 }, - { "ldetr", 0xd4, INSTR_RRF_0UFF }, - { "ledtr", 0xd5, INSTR_RRF_UUFF }, - { "ltdtr", 0xd6, INSTR_RRE_FF }, - { "fidtr", 0xd7, INSTR_RRF_UUFF }, - { "mxtra", 0xd8, INSTR_RRF_FUFF2 }, - { "dxtra", 0xd9, INSTR_RRF_FUFF2 }, - { "axtra", 0xda, INSTR_RRF_FUFF2 }, - { "sxtra", 0xdb, INSTR_RRF_FUFF2 }, - { "lxdtr", 0xdc, INSTR_RRF_0UFF }, - { "ldxtr", 0xdd, INSTR_RRF_UUFF }, - { "ltxtr", 0xde, INSTR_RRE_FF }, - { "fixtr", 0xdf, INSTR_RRF_UUFF }, - { "kdtr", 0xe0, INSTR_RRE_FF }, - { { 0, LONG_INSN_CGDTRA }, 0xe1, INSTR_RRF_UURF }, - { "cudtr", 0xe2, INSTR_RRE_RF }, - { "csdtr", 0xe3, INSTR_RRE_RF }, - { "cdtr", 0xe4, INSTR_RRE_FF }, - { "eedtr", 0xe5, INSTR_RRE_RF }, - { "esdtr", 0xe7, INSTR_RRE_RF }, - { "kxtr", 0xe8, INSTR_RRE_FF }, - { { 0, LONG_INSN_CGXTRA }, 0xe9, INSTR_RRF_UUFR }, - { "cuxtr", 0xea, INSTR_RRE_RF }, - { "csxtr", 0xeb, INSTR_RRE_RF }, - { "cxtr", 0xec, INSTR_RRE_FF }, - { "eextr", 0xed, INSTR_RRE_RF }, - { "esxtr", 0xef, INSTR_RRE_RF }, - { { 0, LONG_INSN_CDGTRA }, 0xf1, INSTR_RRF_UUFR }, - { "cdutr", 0xf2, INSTR_RRE_FR }, - { "cdstr", 0xf3, INSTR_RRE_FR }, - { "cedtr", 0xf4, INSTR_RRE_FF }, - { "qadtr", 0xf5, INSTR_RRF_FUFF }, - { "iedtr", 0xf6, INSTR_RRF_F0FR }, - { "rrdtr", 0xf7, INSTR_RRF_FFRU }, - { { 0, LONG_INSN_CXGTRA }, 0xf9, INSTR_RRF_UURF }, - { "cxutr", 0xfa, INSTR_RRE_FR }, - { "cxstr", 0xfb, INSTR_RRE_FR }, - { "cextr", 0xfc, INSTR_RRE_FF }, - { "qaxtr", 0xfd, INSTR_RRF_FUFF }, - { "iextr", 0xfe, INSTR_RRF_F0FR }, - { "rrxtr", 0xff, INSTR_RRF_FFRU }, - { "lpebr", 0x00, INSTR_RRE_FF }, - { "lnebr", 0x01, INSTR_RRE_FF }, - { "ltebr", 0x02, INSTR_RRE_FF }, - { "lcebr", 0x03, INSTR_RRE_FF }, - { "ldebr", 0x04, INSTR_RRE_FF }, - { "lxdbr", 0x05, INSTR_RRE_FF }, - { "lxebr", 0x06, INSTR_RRE_FF }, - { "mxdbr", 0x07, INSTR_RRE_FF }, - { "kebr", 0x08, INSTR_RRE_FF }, - { "cebr", 0x09, INSTR_RRE_FF }, - { "aebr", 0x0a, INSTR_RRE_FF }, - { "sebr", 0x0b, INSTR_RRE_FF }, - { "mdebr", 0x0c, INSTR_RRE_FF }, - { "debr", 0x0d, INSTR_RRE_FF }, - { "maebr", 0x0e, INSTR_RRF_F0FF }, - { "msebr", 0x0f, INSTR_RRF_F0FF }, - { "lpdbr", 0x10, INSTR_RRE_FF }, - { "lndbr", 0x11, INSTR_RRE_FF }, - { "ltdbr", 0x12, INSTR_RRE_FF }, - { "lcdbr", 0x13, INSTR_RRE_FF }, - { "sqebr", 0x14, INSTR_RRE_FF }, - { "sqdbr", 0x15, INSTR_RRE_FF }, - { "sqxbr", 0x16, INSTR_RRE_FF }, - { "meebr", 0x17, INSTR_RRE_FF }, - { "kdbr", 0x18, INSTR_RRE_FF }, - { "cdbr", 0x19, INSTR_RRE_FF }, - { "adbr", 0x1a, INSTR_RRE_FF }, - { "sdbr", 0x1b, INSTR_RRE_FF }, - { "mdbr", 0x1c, INSTR_RRE_FF }, - { "ddbr", 0x1d, INSTR_RRE_FF }, - { "madbr", 0x1e, INSTR_RRF_F0FF }, - { "msdbr", 0x1f, INSTR_RRF_F0FF }, - { "lder", 0x24, INSTR_RRE_FF }, - { "lxdr", 0x25, INSTR_RRE_FF }, - { "lxer", 0x26, INSTR_RRE_FF }, - { "maer", 0x2e, INSTR_RRF_F0FF }, - { "mser", 0x2f, INSTR_RRF_F0FF }, - { "sqxr", 0x36, INSTR_RRE_FF }, - { "meer", 0x37, INSTR_RRE_FF }, - { "madr", 0x3e, INSTR_RRF_F0FF }, - { "msdr", 0x3f, INSTR_RRF_F0FF }, - { "lpxbr", 0x40, INSTR_RRE_FF }, - { "lnxbr", 0x41, INSTR_RRE_FF }, - { "ltxbr", 0x42, INSTR_RRE_FF }, - { "lcxbr", 0x43, INSTR_RRE_FF }, - { { 0, LONG_INSN_LEDBRA }, 0x44, INSTR_RRF_UUFF }, - { { 0, LONG_INSN_LDXBRA }, 0x45, INSTR_RRF_UUFF }, - { { 0, LONG_INSN_LEXBRA }, 0x46, INSTR_RRF_UUFF }, - { { 0, LONG_INSN_FIXBRA }, 0x47, INSTR_RRF_UUFF }, - { "kxbr", 0x48, INSTR_RRE_FF }, - { "cxbr", 0x49, INSTR_RRE_FF }, - { "axbr", 0x4a, INSTR_RRE_FF }, - { "sxbr", 0x4b, INSTR_RRE_FF }, - { "mxbr", 0x4c, INSTR_RRE_FF }, - { "dxbr", 0x4d, INSTR_RRE_FF }, - { "tbedr", 0x50, INSTR_RRF_U0FF }, - { "tbdr", 0x51, INSTR_RRF_U0FF }, - { "diebr", 0x53, INSTR_RRF_FUFF }, - { { 0, LONG_INSN_FIEBRA }, 0x57, INSTR_RRF_UUFF }, - { "thder", 0x58, INSTR_RRE_FF }, - { "thdr", 0x59, INSTR_RRE_FF }, - { "didbr", 0x5b, INSTR_RRF_FUFF }, - { { 0, LONG_INSN_FIDBRA }, 0x5f, INSTR_RRF_UUFF }, - { "lpxr", 0x60, INSTR_RRE_FF }, - { "lnxr", 0x61, INSTR_RRE_FF }, - { "ltxr", 0x62, INSTR_RRE_FF }, - { "lcxr", 0x63, INSTR_RRE_FF }, - { "lxr", 0x65, INSTR_RRE_FF }, - { "lexr", 0x66, INSTR_RRE_FF }, - { "fixr", 0x67, INSTR_RRE_FF }, - { "cxr", 0x69, INSTR_RRE_FF }, - { "lzer", 0x74, INSTR_RRE_F0 }, - { "lzdr", 0x75, INSTR_RRE_F0 }, - { "lzxr", 0x76, INSTR_RRE_F0 }, - { "fier", 0x77, INSTR_RRE_FF }, - { "fidr", 0x7f, INSTR_RRE_FF }, - { "sfpc", 0x84, INSTR_RRE_RR_OPT }, - { "efpc", 0x8c, INSTR_RRE_RR_OPT }, - { "cefbr", 0x94, INSTR_RRE_RF }, - { "cdfbr", 0x95, INSTR_RRE_RF }, - { "cxfbr", 0x96, INSTR_RRE_RF }, - { "cfebr", 0x98, INSTR_RRF_U0RF }, - { "cfdbr", 0x99, INSTR_RRF_U0RF }, - { "cfxbr", 0x9a, INSTR_RRF_U0RF }, - { "cefr", 0xb4, INSTR_RRE_FR }, - { "cdfr", 0xb5, INSTR_RRE_FR }, - { "cxfr", 0xb6, INSTR_RRE_FR }, - { "cfer", 0xb8, INSTR_RRF_U0RF }, - { "cfdr", 0xb9, INSTR_RRF_U0RF }, - { "cfxr", 0xba, INSTR_RRF_U0RF }, - { "", 0, INSTR_INVALID } -}; - -static struct s390_insn opcode_b9[] = { - { "lpgr", 0x00, INSTR_RRE_RR }, - { "lngr", 0x01, INSTR_RRE_RR }, - { "ltgr", 0x02, INSTR_RRE_RR }, - { "lcgr", 0x03, INSTR_RRE_RR }, - { "lgr", 0x04, INSTR_RRE_RR }, - { "lurag", 0x05, INSTR_RRE_RR }, - { "lgbr", 0x06, INSTR_RRE_RR }, - { "lghr", 0x07, INSTR_RRE_RR }, - { "agr", 0x08, INSTR_RRE_RR }, - { "sgr", 0x09, INSTR_RRE_RR }, - { "algr", 0x0a, INSTR_RRE_RR }, - { "slgr", 0x0b, INSTR_RRE_RR }, - { "msgr", 0x0c, INSTR_RRE_RR }, - { "dsgr", 0x0d, INSTR_RRE_RR }, - { "eregg", 0x0e, INSTR_RRE_RR }, - { "lrvgr", 0x0f, INSTR_RRE_RR }, - { "lpgfr", 0x10, INSTR_RRE_RR }, - { "lngfr", 0x11, INSTR_RRE_RR }, - { "ltgfr", 0x12, INSTR_RRE_RR }, - { "lcgfr", 0x13, INSTR_RRE_RR }, - { "lgfr", 0x14, INSTR_RRE_RR }, - { "llgfr", 0x16, INSTR_RRE_RR }, - { "llgtr", 0x17, INSTR_RRE_RR }, - { "agfr", 0x18, INSTR_RRE_RR }, - { "sgfr", 0x19, INSTR_RRE_RR }, - { "algfr", 0x1a, INSTR_RRE_RR }, - { "slgfr", 0x1b, INSTR_RRE_RR }, - { "msgfr", 0x1c, INSTR_RRE_RR }, - { "dsgfr", 0x1d, INSTR_RRE_RR }, - { "cgr", 0x20, INSTR_RRE_RR }, - { "clgr", 0x21, INSTR_RRE_RR }, - { "sturg", 0x25, INSTR_RRE_RR }, - { "lbr", 0x26, INSTR_RRE_RR }, - { "lhr", 0x27, INSTR_RRE_RR }, - { "cgfr", 0x30, INSTR_RRE_RR }, - { "clgfr", 0x31, INSTR_RRE_RR }, - { "cfdtr", 0x41, INSTR_RRF_UURF }, - { { 0, LONG_INSN_CLGDTR }, 0x42, INSTR_RRF_UURF }, - { { 0, LONG_INSN_CLFDTR }, 0x43, INSTR_RRF_UURF }, - { "bctgr", 0x46, INSTR_RRE_RR }, - { "cfxtr", 0x49, INSTR_RRF_UURF }, - { { 0, LONG_INSN_CLGXTR }, 0x4a, INSTR_RRF_UUFR }, - { { 0, LONG_INSN_CLFXTR }, 0x4b, INSTR_RRF_UUFR }, - { "cdftr", 0x51, INSTR_RRF_UUFR }, - { { 0, LONG_INSN_CDLGTR }, 0x52, INSTR_RRF_UUFR }, - { { 0, LONG_INSN_CDLFTR }, 0x53, INSTR_RRF_UUFR }, - { "cxftr", 0x59, INSTR_RRF_UURF }, - { { 0, LONG_INSN_CXLGTR }, 0x5a, INSTR_RRF_UURF }, - { { 0, LONG_INSN_CXLFTR }, 0x5b, INSTR_RRF_UUFR }, - { "cgrt", 0x60, INSTR_RRF_U0RR }, - { "clgrt", 0x61, INSTR_RRF_U0RR }, - { "crt", 0x72, INSTR_RRF_U0RR }, - { "clrt", 0x73, INSTR_RRF_U0RR }, - { "ngr", 0x80, INSTR_RRE_RR }, - { "ogr", 0x81, INSTR_RRE_RR }, - { "xgr", 0x82, INSTR_RRE_RR }, - { "flogr", 0x83, INSTR_RRE_RR }, - { "llgcr", 0x84, INSTR_RRE_RR }, - { "llghr", 0x85, INSTR_RRE_RR }, - { "mlgr", 0x86, INSTR_RRE_RR }, - { "dlgr", 0x87, INSTR_RRE_RR }, - { "alcgr", 0x88, INSTR_RRE_RR }, - { "slbgr", 0x89, INSTR_RRE_RR }, - { "cspg", 0x8a, INSTR_RRE_RR }, - { "idte", 0x8e, INSTR_RRF_R0RR }, - { "crdte", 0x8f, INSTR_RRF_RMRR }, - { "llcr", 0x94, INSTR_RRE_RR }, - { "llhr", 0x95, INSTR_RRE_RR }, - { "esea", 0x9d, INSTR_RRE_R0 }, - { "ptf", 0xa2, INSTR_RRE_R0 }, - { "lptea", 0xaa, INSTR_RRF_RURR }, - { "rrbm", 0xae, INSTR_RRE_RR }, - { "pfmf", 0xaf, INSTR_RRE_RR }, - { "cu14", 0xb0, INSTR_RRF_M0RR }, - { "cu24", 0xb1, INSTR_RRF_M0RR }, - { "cu41", 0xb2, INSTR_RRE_RR }, - { "cu42", 0xb3, INSTR_RRE_RR }, - { "trtre", 0xbd, INSTR_RRF_M0RR }, - { "srstu", 0xbe, INSTR_RRE_RR }, - { "trte", 0xbf, INSTR_RRF_M0RR }, - { "ahhhr", 0xc8, INSTR_RRF_R0RR2 }, - { "shhhr", 0xc9, INSTR_RRF_R0RR2 }, - { { 0, LONG_INSN_ALHHHR }, 0xca, INSTR_RRF_R0RR2 }, - { { 0, LONG_INSN_SLHHHR }, 0xcb, INSTR_RRF_R0RR2 }, - { "chhr", 0xcd, INSTR_RRE_RR }, - { "clhhr", 0xcf, INSTR_RRE_RR }, - { { 0, LONG_INSN_PCISTG }, 0xd0, INSTR_RRE_RR }, - { "pcilg", 0xd2, INSTR_RRE_RR }, - { "rpcit", 0xd3, INSTR_RRE_RR }, - { "ahhlr", 0xd8, INSTR_RRF_R0RR2 }, - { "shhlr", 0xd9, INSTR_RRF_R0RR2 }, - { { 0, LONG_INSN_ALHHLR }, 0xda, INSTR_RRF_R0RR2 }, - { { 0, LONG_INSN_SLHHLR }, 0xdb, INSTR_RRF_R0RR2 }, - { "chlr", 0xdd, INSTR_RRE_RR }, - { "clhlr", 0xdf, INSTR_RRE_RR }, - { { 0, LONG_INSN_POPCNT }, 0xe1, INSTR_RRE_RR }, - { "locgr", 0xe2, INSTR_RRF_M0RR }, - { "ngrk", 0xe4, INSTR_RRF_R0RR2 }, - { "ogrk", 0xe6, INSTR_RRF_R0RR2 }, - { "xgrk", 0xe7, INSTR_RRF_R0RR2 }, - { "agrk", 0xe8, INSTR_RRF_R0RR2 }, - { "sgrk", 0xe9, INSTR_RRF_R0RR2 }, - { "algrk", 0xea, INSTR_RRF_R0RR2 }, - { "slgrk", 0xeb, INSTR_RRF_R0RR2 }, - { "locr", 0xf2, INSTR_RRF_M0RR }, - { "nrk", 0xf4, INSTR_RRF_R0RR2 }, - { "ork", 0xf6, INSTR_RRF_R0RR2 }, - { "xrk", 0xf7, INSTR_RRF_R0RR2 }, - { "ark", 0xf8, INSTR_RRF_R0RR2 }, - { "srk", 0xf9, INSTR_RRF_R0RR2 }, - { "alrk", 0xfa, INSTR_RRF_R0RR2 }, - { "slrk", 0xfb, INSTR_RRF_R0RR2 }, - { "kmac", 0x1e, INSTR_RRE_RR }, - { "lrvr", 0x1f, INSTR_RRE_RR }, - { "km", 0x2e, INSTR_RRE_RR }, - { "kmc", 0x2f, INSTR_RRE_RR }, - { "kimd", 0x3e, INSTR_RRE_RR }, - { "klmd", 0x3f, INSTR_RRE_RR }, - { "epsw", 0x8d, INSTR_RRE_RR }, - { "trtt", 0x90, INSTR_RRF_M0RR }, - { "trto", 0x91, INSTR_RRF_M0RR }, - { "trot", 0x92, INSTR_RRF_M0RR }, - { "troo", 0x93, INSTR_RRF_M0RR }, - { "mlr", 0x96, INSTR_RRE_RR }, - { "dlr", 0x97, INSTR_RRE_RR }, - { "alcr", 0x98, INSTR_RRE_RR }, - { "slbr", 0x99, INSTR_RRE_RR }, - { "", 0, INSTR_INVALID } -}; - -static struct s390_insn opcode_c0[] = { - { "lgfi", 0x01, INSTR_RIL_RI }, - { "xihf", 0x06, INSTR_RIL_RU }, - { "xilf", 0x07, INSTR_RIL_RU }, - { "iihf", 0x08, INSTR_RIL_RU }, - { "iilf", 0x09, INSTR_RIL_RU }, - { "nihf", 0x0a, INSTR_RIL_RU }, - { "nilf", 0x0b, INSTR_RIL_RU }, - { "oihf", 0x0c, INSTR_RIL_RU }, - { "oilf", 0x0d, INSTR_RIL_RU }, - { "llihf", 0x0e, INSTR_RIL_RU }, - { "llilf", 0x0f, INSTR_RIL_RU }, - { "larl", 0x00, INSTR_RIL_RP }, - { "brcl", 0x04, INSTR_RIL_UP }, - { "brasl", 0x05, INSTR_RIL_RP }, - { "", 0, INSTR_INVALID } -}; - -static struct s390_insn opcode_c2[] = { - { "msgfi", 0x00, INSTR_RIL_RI }, - { "msfi", 0x01, INSTR_RIL_RI }, - { "slgfi", 0x04, INSTR_RIL_RU }, - { "slfi", 0x05, INSTR_RIL_RU }, - { "agfi", 0x08, INSTR_RIL_RI }, - { "afi", 0x09, INSTR_RIL_RI }, - { "algfi", 0x0a, INSTR_RIL_RU }, - { "alfi", 0x0b, INSTR_RIL_RU }, - { "cgfi", 0x0c, INSTR_RIL_RI }, - { "cfi", 0x0d, INSTR_RIL_RI }, - { "clgfi", 0x0e, INSTR_RIL_RU }, - { "clfi", 0x0f, INSTR_RIL_RU }, - { "", 0, INSTR_INVALID } -}; - -static struct s390_insn opcode_c4[] = { - { "llhrl", 0x02, INSTR_RIL_RP }, - { "lghrl", 0x04, INSTR_RIL_RP }, - { "lhrl", 0x05, INSTR_RIL_RP }, - { { 0, LONG_INSN_LLGHRL }, 0x06, INSTR_RIL_RP }, - { "sthrl", 0x07, INSTR_RIL_RP }, - { "lgrl", 0x08, INSTR_RIL_RP }, - { "stgrl", 0x0b, INSTR_RIL_RP }, - { "lgfrl", 0x0c, INSTR_RIL_RP }, - { "lrl", 0x0d, INSTR_RIL_RP }, - { { 0, LONG_INSN_LLGFRL }, 0x0e, INSTR_RIL_RP }, - { "strl", 0x0f, INSTR_RIL_RP }, - { "", 0, INSTR_INVALID } -}; - -static struct s390_insn opcode_c6[] = { - { "exrl", 0x00, INSTR_RIL_RP }, - { "pfdrl", 0x02, INSTR_RIL_UP }, - { "cghrl", 0x04, INSTR_RIL_RP }, - { "chrl", 0x05, INSTR_RIL_RP }, - { { 0, LONG_INSN_CLGHRL }, 0x06, INSTR_RIL_RP }, - { "clhrl", 0x07, INSTR_RIL_RP }, - { "cgrl", 0x08, INSTR_RIL_RP }, - { "clgrl", 0x0a, INSTR_RIL_RP }, - { "cgfrl", 0x0c, INSTR_RIL_RP }, - { "crl", 0x0d, INSTR_RIL_RP }, - { { 0, LONG_INSN_CLGFRL }, 0x0e, INSTR_RIL_RP }, - { "clrl", 0x0f, INSTR_RIL_RP }, - { "", 0, INSTR_INVALID } -}; - -static struct s390_insn opcode_c8[] = { - { "mvcos", 0x00, INSTR_SSF_RRDRD }, - { "ectg", 0x01, INSTR_SSF_RRDRD }, - { "csst", 0x02, INSTR_SSF_RRDRD }, - { "lpd", 0x04, INSTR_SSF_RRDRD2 }, - { "lpdg", 0x05, INSTR_SSF_RRDRD2 }, - { "", 0, INSTR_INVALID } -}; - -static struct s390_insn opcode_cc[] = { - { "brcth", 0x06, INSTR_RIL_RP }, - { "aih", 0x08, INSTR_RIL_RI }, - { "alsih", 0x0a, INSTR_RIL_RI }, - { { 0, LONG_INSN_ALSIHN }, 0x0b, INSTR_RIL_RI }, - { "cih", 0x0d, INSTR_RIL_RI }, - { "clih", 0x0f, INSTR_RIL_RI }, - { "", 0, INSTR_INVALID } -}; - -static struct s390_insn opcode_e3[] = { - { "ltg", 0x02, INSTR_RXY_RRRD }, - { "lrag", 0x03, INSTR_RXY_RRRD }, - { "lg", 0x04, INSTR_RXY_RRRD }, - { "cvby", 0x06, INSTR_RXY_RRRD }, - { "ag", 0x08, INSTR_RXY_RRRD }, - { "sg", 0x09, INSTR_RXY_RRRD }, - { "alg", 0x0a, INSTR_RXY_RRRD }, - { "slg", 0x0b, INSTR_RXY_RRRD }, - { "msg", 0x0c, INSTR_RXY_RRRD }, - { "dsg", 0x0d, INSTR_RXY_RRRD }, - { "cvbg", 0x0e, INSTR_RXY_RRRD }, - { "lrvg", 0x0f, INSTR_RXY_RRRD }, - { "lt", 0x12, INSTR_RXY_RRRD }, - { "lray", 0x13, INSTR_RXY_RRRD }, - { "lgf", 0x14, INSTR_RXY_RRRD }, - { "lgh", 0x15, INSTR_RXY_RRRD }, - { "llgf", 0x16, INSTR_RXY_RRRD }, - { "llgt", 0x17, INSTR_RXY_RRRD }, - { "agf", 0x18, INSTR_RXY_RRRD }, - { "sgf", 0x19, INSTR_RXY_RRRD }, - { "algf", 0x1a, INSTR_RXY_RRRD }, - { "slgf", 0x1b, INSTR_RXY_RRRD }, - { "msgf", 0x1c, INSTR_RXY_RRRD }, - { "dsgf", 0x1d, INSTR_RXY_RRRD }, - { "cg", 0x20, INSTR_RXY_RRRD }, - { "clg", 0x21, INSTR_RXY_RRRD }, - { "stg", 0x24, INSTR_RXY_RRRD }, - { "ntstg", 0x25, INSTR_RXY_RRRD }, - { "cvdy", 0x26, INSTR_RXY_RRRD }, - { "cvdg", 0x2e, INSTR_RXY_RRRD }, - { "strvg", 0x2f, INSTR_RXY_RRRD }, - { "cgf", 0x30, INSTR_RXY_RRRD }, - { "clgf", 0x31, INSTR_RXY_RRRD }, - { "ltgf", 0x32, INSTR_RXY_RRRD }, - { "cgh", 0x34, INSTR_RXY_RRRD }, - { "pfd", 0x36, INSTR_RXY_URRD }, - { "strvh", 0x3f, INSTR_RXY_RRRD }, - { "bctg", 0x46, INSTR_RXY_RRRD }, - { "sty", 0x50, INSTR_RXY_RRRD }, - { "msy", 0x51, INSTR_RXY_RRRD }, - { "ny", 0x54, INSTR_RXY_RRRD }, - { "cly", 0x55, INSTR_RXY_RRRD }, - { "oy", 0x56, INSTR_RXY_RRRD }, - { "xy", 0x57, INSTR_RXY_RRRD }, - { "ly", 0x58, INSTR_RXY_RRRD }, - { "cy", 0x59, INSTR_RXY_RRRD }, - { "ay", 0x5a, INSTR_RXY_RRRD }, - { "sy", 0x5b, INSTR_RXY_RRRD }, - { "mfy", 0x5c, INSTR_RXY_RRRD }, - { "aly", 0x5e, INSTR_RXY_RRRD }, - { "sly", 0x5f, INSTR_RXY_RRRD }, - { "sthy", 0x70, INSTR_RXY_RRRD }, - { "lay", 0x71, INSTR_RXY_RRRD }, - { "stcy", 0x72, INSTR_RXY_RRRD }, - { "icy", 0x73, INSTR_RXY_RRRD }, - { "laey", 0x75, INSTR_RXY_RRRD }, - { "lb", 0x76, INSTR_RXY_RRRD }, - { "lgb", 0x77, INSTR_RXY_RRRD }, - { "lhy", 0x78, INSTR_RXY_RRRD }, - { "chy", 0x79, INSTR_RXY_RRRD }, - { "ahy", 0x7a, INSTR_RXY_RRRD }, - { "shy", 0x7b, INSTR_RXY_RRRD }, - { "mhy", 0x7c, INSTR_RXY_RRRD }, - { "ng", 0x80, INSTR_RXY_RRRD }, - { "og", 0x81, INSTR_RXY_RRRD }, - { "xg", 0x82, INSTR_RXY_RRRD }, - { "lgat", 0x85, INSTR_RXY_RRRD }, - { "mlg", 0x86, INSTR_RXY_RRRD }, - { "dlg", 0x87, INSTR_RXY_RRRD }, - { "alcg", 0x88, INSTR_RXY_RRRD }, - { "slbg", 0x89, INSTR_RXY_RRRD }, - { "stpq", 0x8e, INSTR_RXY_RRRD }, - { "lpq", 0x8f, INSTR_RXY_RRRD }, - { "llgc", 0x90, INSTR_RXY_RRRD }, - { "llgh", 0x91, INSTR_RXY_RRRD }, - { "llc", 0x94, INSTR_RXY_RRRD }, - { "llh", 0x95, INSTR_RXY_RRRD }, - { { 0, LONG_INSN_LLGTAT }, 0x9c, INSTR_RXY_RRRD }, - { { 0, LONG_INSN_LLGFAT }, 0x9d, INSTR_RXY_RRRD }, - { "lat", 0x9f, INSTR_RXY_RRRD }, - { "lbh", 0xc0, INSTR_RXY_RRRD }, - { "llch", 0xc2, INSTR_RXY_RRRD }, - { "stch", 0xc3, INSTR_RXY_RRRD }, - { "lhh", 0xc4, INSTR_RXY_RRRD }, - { "llhh", 0xc6, INSTR_RXY_RRRD }, - { "sthh", 0xc7, INSTR_RXY_RRRD }, - { "lfhat", 0xc8, INSTR_RXY_RRRD }, - { "lfh", 0xca, INSTR_RXY_RRRD }, - { "stfh", 0xcb, INSTR_RXY_RRRD }, - { "chf", 0xcd, INSTR_RXY_RRRD }, - { "clhf", 0xcf, INSTR_RXY_RRRD }, - { { 0, LONG_INSN_MPCIFC }, 0xd0, INSTR_RXY_RRRD }, - { { 0, LONG_INSN_STPCIFC }, 0xd4, INSTR_RXY_RRRD }, - { "lrv", 0x1e, INSTR_RXY_RRRD }, - { "lrvh", 0x1f, INSTR_RXY_RRRD }, - { "strv", 0x3e, INSTR_RXY_RRRD }, - { "ml", 0x96, INSTR_RXY_RRRD }, - { "dl", 0x97, INSTR_RXY_RRRD }, - { "alc", 0x98, INSTR_RXY_RRRD }, - { "slb", 0x99, INSTR_RXY_RRRD }, - { "", 0, INSTR_INVALID } -}; - -static struct s390_insn opcode_e5[] = { - { "strag", 0x02, INSTR_SSE_RDRD }, - { "mvhhi", 0x44, INSTR_SIL_RDI }, - { "mvghi", 0x48, INSTR_SIL_RDI }, - { "mvhi", 0x4c, INSTR_SIL_RDI }, - { "chhsi", 0x54, INSTR_SIL_RDI }, - { { 0, LONG_INSN_CLHHSI }, 0x55, INSTR_SIL_RDU }, - { "cghsi", 0x58, INSTR_SIL_RDI }, - { { 0, LONG_INSN_CLGHSI }, 0x59, INSTR_SIL_RDU }, - { "chsi", 0x5c, INSTR_SIL_RDI }, - { { 0, LONG_INSN_CLFHSI }, 0x5d, INSTR_SIL_RDU }, - { { 0, LONG_INSN_TBEGIN }, 0x60, INSTR_SIL_RDU }, - { { 0, LONG_INSN_TBEGINC }, 0x61, INSTR_SIL_RDU }, - { "lasp", 0x00, INSTR_SSE_RDRD }, - { "tprot", 0x01, INSTR_SSE_RDRD }, - { "mvcsk", 0x0e, INSTR_SSE_RDRD }, - { "mvcdk", 0x0f, INSTR_SSE_RDRD }, - { "", 0, INSTR_INVALID } -}; - -static struct s390_insn opcode_e7[] = { - { "lcbb", 0x27, INSTR_RXE_RRRDM }, - { "vgef", 0x13, INSTR_VRV_VVRDM }, - { "vgeg", 0x12, INSTR_VRV_VVRDM }, - { "vgbm", 0x44, INSTR_VRI_V0I0 }, - { "vgm", 0x46, INSTR_VRI_V0IIM }, - { "vl", 0x06, INSTR_VRX_VRRD0 }, - { "vlr", 0x56, INSTR_VRR_VV00000 }, - { "vlrp", 0x05, INSTR_VRX_VRRDM }, - { "vleb", 0x00, INSTR_VRX_VRRDM }, - { "vleh", 0x01, INSTR_VRX_VRRDM }, - { "vlef", 0x03, INSTR_VRX_VRRDM }, - { "vleg", 0x02, INSTR_VRX_VRRDM }, - { "vleib", 0x40, INSTR_VRI_V0IM }, - { "vleih", 0x41, INSTR_VRI_V0IM }, - { "vleif", 0x43, INSTR_VRI_V0IM }, - { "vleig", 0x42, INSTR_VRI_V0IM }, - { "vlgv", 0x21, INSTR_VRS_RVRDM }, - { "vllez", 0x04, INSTR_VRX_VRRDM }, - { "vlm", 0x36, INSTR_VRS_VVRD0 }, - { "vlbb", 0x07, INSTR_VRX_VRRDM }, - { "vlvg", 0x22, INSTR_VRS_VRRDM }, - { "vlvgp", 0x62, INSTR_VRR_VRR0000 }, - { "vll", 0x37, INSTR_VRS_VRRD0 }, - { "vmrh", 0x61, INSTR_VRR_VVV000M }, - { "vmrl", 0x60, INSTR_VRR_VVV000M }, - { "vpk", 0x94, INSTR_VRR_VVV000M }, - { "vpks", 0x97, INSTR_VRR_VVV0M0M }, - { "vpkls", 0x95, INSTR_VRR_VVV0M0M }, - { "vperm", 0x8c, INSTR_VRR_VVV000V }, - { "vpdi", 0x84, INSTR_VRR_VVV000M }, - { "vrep", 0x4d, INSTR_VRI_VVIM }, - { "vrepi", 0x45, INSTR_VRI_V0IM }, - { "vscef", 0x1b, INSTR_VRV_VWRDM }, - { "vsceg", 0x1a, INSTR_VRV_VWRDM }, - { "vsel", 0x8d, INSTR_VRR_VVV000V }, - { "vseg", 0x5f, INSTR_VRR_VV0000M }, - { "vst", 0x0e, INSTR_VRX_VRRD0 }, - { "vsteb", 0x08, INSTR_VRX_VRRDM }, - { "vsteh", 0x09, INSTR_VRX_VRRDM }, - { "vstef", 0x0b, INSTR_VRX_VRRDM }, - { "vsteg", 0x0a, INSTR_VRX_VRRDM }, - { "vstm", 0x3e, INSTR_VRS_VVRD0 }, - { "vstl", 0x3f, INSTR_VRS_VRRD0 }, - { "vuph", 0xd7, INSTR_VRR_VV0000M }, - { "vuplh", 0xd5, INSTR_VRR_VV0000M }, - { "vupl", 0xd6, INSTR_VRR_VV0000M }, - { "vupll", 0xd4, INSTR_VRR_VV0000M }, - { "va", 0xf3, INSTR_VRR_VVV000M }, - { "vacc", 0xf1, INSTR_VRR_VVV000M }, - { "vac", 0xbb, INSTR_VRR_VVVM00V }, - { "vaccc", 0xb9, INSTR_VRR_VVVM00V }, - { "vn", 0x68, INSTR_VRR_VVV0000 }, - { "vnc", 0x69, INSTR_VRR_VVV0000 }, - { "vavg", 0xf2, INSTR_VRR_VVV000M }, - { "vavgl", 0xf0, INSTR_VRR_VVV000M }, - { "vcksm", 0x66, INSTR_VRR_VVV0000 }, - { "vec", 0xdb, INSTR_VRR_VV0000M }, - { "vecl", 0xd9, INSTR_VRR_VV0000M }, - { "vceq", 0xf8, INSTR_VRR_VVV0M0M }, - { "vch", 0xfb, INSTR_VRR_VVV0M0M }, - { "vchl", 0xf9, INSTR_VRR_VVV0M0M }, - { "vclz", 0x53, INSTR_VRR_VV0000M }, - { "vctz", 0x52, INSTR_VRR_VV0000M }, - { "vx", 0x6d, INSTR_VRR_VVV0000 }, - { "vgfm", 0xb4, INSTR_VRR_VVV000M }, - { "vgfma", 0xbc, INSTR_VRR_VVVM00V }, - { "vlc", 0xde, INSTR_VRR_VV0000M }, - { "vlp", 0xdf, INSTR_VRR_VV0000M }, - { "vmx", 0xff, INSTR_VRR_VVV000M }, - { "vmxl", 0xfd, INSTR_VRR_VVV000M }, - { "vmn", 0xfe, INSTR_VRR_VVV000M }, - { "vmnl", 0xfc, INSTR_VRR_VVV000M }, - { "vmal", 0xaa, INSTR_VRR_VVVM00V }, - { "vmae", 0xae, INSTR_VRR_VVVM00V }, - { "vmale", 0xac, INSTR_VRR_VVVM00V }, - { "vmah", 0xab, INSTR_VRR_VVVM00V }, - { "vmalh", 0xa9, INSTR_VRR_VVVM00V }, - { "vmao", 0xaf, INSTR_VRR_VVVM00V }, - { "vmalo", 0xad, INSTR_VRR_VVVM00V }, - { "vmh", 0xa3, INSTR_VRR_VVV000M }, - { "vmlh", 0xa1, INSTR_VRR_VVV000M }, - { "vml", 0xa2, INSTR_VRR_VVV000M }, - { "vme", 0xa6, INSTR_VRR_VVV000M }, - { "vmle", 0xa4, INSTR_VRR_VVV000M }, - { "vmo", 0xa7, INSTR_VRR_VVV000M }, - { "vmlo", 0xa5, INSTR_VRR_VVV000M }, - { "vno", 0x6b, INSTR_VRR_VVV0000 }, - { "vo", 0x6a, INSTR_VRR_VVV0000 }, - { { 0, LONG_INSN_VPOPCT }, 0x50, INSTR_VRR_VV0000M }, - { { 0, LONG_INSN_VERLLV }, 0x73, INSTR_VRR_VVV000M }, - { "verll", 0x33, INSTR_VRS_VVRDM }, - { "verim", 0x72, INSTR_VRI_VVV0IM }, - { "veslv", 0x70, INSTR_VRR_VVV000M }, - { "vesl", 0x30, INSTR_VRS_VVRDM }, - { { 0, LONG_INSN_VESRAV }, 0x7a, INSTR_VRR_VVV000M }, - { "vesra", 0x3a, INSTR_VRS_VVRDM }, - { { 0, LONG_INSN_VESRLV }, 0x78, INSTR_VRR_VVV000M }, - { "vesrl", 0x38, INSTR_VRS_VVRDM }, - { "vsl", 0x74, INSTR_VRR_VVV0000 }, - { "vslb", 0x75, INSTR_VRR_VVV0000 }, - { "vsldb", 0x77, INSTR_VRI_VVV0I0 }, - { "vsra", 0x7e, INSTR_VRR_VVV0000 }, - { "vsrab", 0x7f, INSTR_VRR_VVV0000 }, - { "vsrl", 0x7c, INSTR_VRR_VVV0000 }, - { "vsrlb", 0x7d, INSTR_VRR_VVV0000 }, - { "vs", 0xf7, INSTR_VRR_VVV000M }, - { "vscb", 0xf5, INSTR_VRR_VVV000M }, - { "vsb", 0xbf, INSTR_VRR_VVVM00V }, - { { 0, LONG_INSN_VSBCBI }, 0xbd, INSTR_VRR_VVVM00V }, - { "vsumg", 0x65, INSTR_VRR_VVV000M }, - { "vsumq", 0x67, INSTR_VRR_VVV000M }, - { "vsum", 0x64, INSTR_VRR_VVV000M }, - { "vtm", 0xd8, INSTR_VRR_VV00000 }, - { "vfae", 0x82, INSTR_VRR_VVV0M0M }, - { "vfee", 0x80, INSTR_VRR_VVV0M0M }, - { "vfene", 0x81, INSTR_VRR_VVV0M0M }, - { "vistr", 0x5c, INSTR_VRR_VV00M0M }, - { "vstrc", 0x8a, INSTR_VRR_VVVMM0V }, - { "vfa", 0xe3, INSTR_VRR_VVV00MM }, - { "wfc", 0xcb, INSTR_VRR_VV000MM }, - { "wfk", 0xca, INSTR_VRR_VV000MM }, - { "vfce", 0xe8, INSTR_VRR_VVV0MMM }, - { "vfch", 0xeb, INSTR_VRR_VVV0MMM }, - { "vfche", 0xea, INSTR_VRR_VVV0MMM }, - { "vcdg", 0xc3, INSTR_VRR_VV00MMM }, - { "vcdlg", 0xc1, INSTR_VRR_VV00MMM }, - { "vcgd", 0xc2, INSTR_VRR_VV00MMM }, - { "vclgd", 0xc0, INSTR_VRR_VV00MMM }, - { "vfd", 0xe5, INSTR_VRR_VVV00MM }, - { "vfi", 0xc7, INSTR_VRR_VV00MMM }, - { "vlde", 0xc4, INSTR_VRR_VV000MM }, - { "vled", 0xc5, INSTR_VRR_VV00MMM }, - { "vfm", 0xe7, INSTR_VRR_VVV00MM }, - { "vfma", 0x8f, INSTR_VRR_VVVM0MV }, - { "vfms", 0x8e, INSTR_VRR_VVVM0MV }, - { "vfpso", 0xcc, INSTR_VRR_VV00MMM }, - { "vfsq", 0xce, INSTR_VRR_VV000MM }, - { "vfs", 0xe2, INSTR_VRR_VVV00MM }, - { "vftci", 0x4a, INSTR_VRI_VVIMM }, -}; - -static struct s390_insn opcode_eb[] = { - { "lmg", 0x04, INSTR_RSY_RRRD }, - { "srag", 0x0a, INSTR_RSY_RRRD }, - { "slag", 0x0b, INSTR_RSY_RRRD }, - { "srlg", 0x0c, INSTR_RSY_RRRD }, - { "sllg", 0x0d, INSTR_RSY_RRRD }, - { "tracg", 0x0f, INSTR_RSY_RRRD }, - { "csy", 0x14, INSTR_RSY_RRRD }, - { "rllg", 0x1c, INSTR_RSY_RRRD }, - { "clmh", 0x20, INSTR_RSY_RURD }, - { "clmy", 0x21, INSTR_RSY_RURD }, - { "clt", 0x23, INSTR_RSY_RURD }, - { "stmg", 0x24, INSTR_RSY_RRRD }, - { "stctg", 0x25, INSTR_RSY_CCRD }, - { "stmh", 0x26, INSTR_RSY_RRRD }, - { "clgt", 0x2b, INSTR_RSY_RURD }, - { "stcmh", 0x2c, INSTR_RSY_RURD }, - { "stcmy", 0x2d, INSTR_RSY_RURD }, - { "lctlg", 0x2f, INSTR_RSY_CCRD }, - { "csg", 0x30, INSTR_RSY_RRRD }, - { "cdsy", 0x31, INSTR_RSY_RRRD }, - { "cdsg", 0x3e, INSTR_RSY_RRRD }, - { "bxhg", 0x44, INSTR_RSY_RRRD }, - { "bxleg", 0x45, INSTR_RSY_RRRD }, - { "ecag", 0x4c, INSTR_RSY_RRRD }, - { "tmy", 0x51, INSTR_SIY_URD }, - { "mviy", 0x52, INSTR_SIY_URD }, - { "niy", 0x54, INSTR_SIY_URD }, - { "cliy", 0x55, INSTR_SIY_URD }, - { "oiy", 0x56, INSTR_SIY_URD }, - { "xiy", 0x57, INSTR_SIY_URD }, - { "asi", 0x6a, INSTR_SIY_IRD }, - { "alsi", 0x6e, INSTR_SIY_IRD }, - { "agsi", 0x7a, INSTR_SIY_IRD }, - { "algsi", 0x7e, INSTR_SIY_IRD }, - { "icmh", 0x80, INSTR_RSY_RURD }, - { "icmy", 0x81, INSTR_RSY_RURD }, - { "clclu", 0x8f, INSTR_RSY_RRRD }, - { "stmy", 0x90, INSTR_RSY_RRRD }, - { "lmh", 0x96, INSTR_RSY_RRRD }, - { "lmy", 0x98, INSTR_RSY_RRRD }, - { "lamy", 0x9a, INSTR_RSY_AARD }, - { "stamy", 0x9b, INSTR_RSY_AARD }, - { { 0, LONG_INSN_PCISTB }, 0xd0, INSTR_RSY_RRRD }, - { "sic", 0xd1, INSTR_RSY_RRRD }, - { "srak", 0xdc, INSTR_RSY_RRRD }, - { "slak", 0xdd, INSTR_RSY_RRRD }, - { "srlk", 0xde, INSTR_RSY_RRRD }, - { "sllk", 0xdf, INSTR_RSY_RRRD }, - { "locg", 0xe2, INSTR_RSY_RDRM }, - { "stocg", 0xe3, INSTR_RSY_RDRM }, - { "lang", 0xe4, INSTR_RSY_RRRD }, - { "laog", 0xe6, INSTR_RSY_RRRD }, - { "laxg", 0xe7, INSTR_RSY_RRRD }, - { "laag", 0xe8, INSTR_RSY_RRRD }, - { "laalg", 0xea, INSTR_RSY_RRRD }, - { "loc", 0xf2, INSTR_RSY_RDRM }, - { "stoc", 0xf3, INSTR_RSY_RDRM }, - { "lan", 0xf4, INSTR_RSY_RRRD }, - { "lao", 0xf6, INSTR_RSY_RRRD }, - { "lax", 0xf7, INSTR_RSY_RRRD }, - { "laa", 0xf8, INSTR_RSY_RRRD }, - { "laal", 0xfa, INSTR_RSY_RRRD }, - { "lric", 0x60, INSTR_RSY_RDRM }, - { "stric", 0x61, INSTR_RSY_RDRM }, - { "mric", 0x62, INSTR_RSY_RDRM }, - { { 0, LONG_INSN_STCCTM }, 0x17, INSTR_RSY_RMRD }, - { "rll", 0x1d, INSTR_RSY_RRRD }, - { "mvclu", 0x8e, INSTR_RSY_RRRD }, - { "tp", 0xc0, INSTR_RSL_R0RD }, - { "", 0, INSTR_INVALID } + [VX_12] = { 4, 12, OPERAND_INDEX | OPERAND_VR }, + [V_8] = { 4, 8, OPERAND_VR }, + [V_12] = { 4, 12, OPERAND_VR }, + [V_16] = { 4, 16, OPERAND_VR }, + [V_32] = { 4, 32, OPERAND_VR }, + [X_12] = { 4, 12, OPERAND_INDEX | OPERAND_GPR }, }; -static struct s390_insn opcode_ec[] = { - { "brxhg", 0x44, INSTR_RIE_RRP }, - { "brxlg", 0x45, INSTR_RIE_RRP }, - { { 0, LONG_INSN_RISBLG }, 0x51, INSTR_RIE_RRUUU }, - { "rnsbg", 0x54, INSTR_RIE_RRUUU }, - { "risbg", 0x55, INSTR_RIE_RRUUU }, - { "rosbg", 0x56, INSTR_RIE_RRUUU }, - { "rxsbg", 0x57, INSTR_RIE_RRUUU }, - { { 0, LONG_INSN_RISBGN }, 0x59, INSTR_RIE_RRUUU }, - { { 0, LONG_INSN_RISBHG }, 0x5D, INSTR_RIE_RRUUU }, - { "cgrj", 0x64, INSTR_RIE_RRPU }, - { "clgrj", 0x65, INSTR_RIE_RRPU }, - { "cgit", 0x70, INSTR_RIE_R0IU }, - { "clgit", 0x71, INSTR_RIE_R0UU }, - { "cit", 0x72, INSTR_RIE_R0IU }, - { "clfit", 0x73, INSTR_RIE_R0UU }, - { "crj", 0x76, INSTR_RIE_RRPU }, - { "clrj", 0x77, INSTR_RIE_RRPU }, - { "cgij", 0x7c, INSTR_RIE_RUPI }, - { "clgij", 0x7d, INSTR_RIE_RUPU }, - { "cij", 0x7e, INSTR_RIE_RUPI }, - { "clij", 0x7f, INSTR_RIE_RUPU }, - { "ahik", 0xd8, INSTR_RIE_RRI0 }, - { "aghik", 0xd9, INSTR_RIE_RRI0 }, - { { 0, LONG_INSN_ALHSIK }, 0xda, INSTR_RIE_RRI0 }, - { { 0, LONG_INSN_ALGHSIK }, 0xdb, INSTR_RIE_RRI0 }, - { "cgrb", 0xe4, INSTR_RRS_RRRDU }, - { "clgrb", 0xe5, INSTR_RRS_RRRDU }, - { "crb", 0xf6, INSTR_RRS_RRRDU }, - { "clrb", 0xf7, INSTR_RRS_RRRDU }, - { "cgib", 0xfc, INSTR_RIS_RURDI }, - { "clgib", 0xfd, INSTR_RIS_RURDU }, - { "cib", 0xfe, INSTR_RIS_RURDI }, - { "clib", 0xff, INSTR_RIS_RURDU }, - { "", 0, INSTR_INVALID } +static const unsigned char formats[][6] = { + [INSTR_E] = { 0, 0, 0, 0, 0, 0 }, + [INSTR_IE_UU] = { U4_24, U4_28, 0, 0, 0, 0 }, + [INSTR_MII_UPP] = { U4_8, J12_12, J24_24 }, + [INSTR_RIE_R0IU] = { R_8, I16_16, U4_32, 0, 0, 0 }, + [INSTR_RIE_R0UU] = { R_8, U16_16, U4_32, 0, 0, 0 }, + [INSTR_RIE_RRI0] = { R_8, R_12, I16_16, 0, 0, 0 }, + [INSTR_RIE_RRP] = { R_8, R_12, J16_16, 0, 0, 0 }, + [INSTR_RIE_RRPU] = { R_8, R_12, U4_32, J16_16, 0, 0 }, + [INSTR_RIE_RRUUU] = { R_8, R_12, U8_16, U8_24, U8_32, 0 }, + [INSTR_RIE_RUI0] = { R_8, I16_16, U4_12, 0, 0, 0 }, + [INSTR_RIE_RUPI] = { R_8, I8_32, U4_12, J16_16, 0, 0 }, + [INSTR_RIE_RUPU] = { R_8, U8_32, U4_12, J16_16, 0, 0 }, + [INSTR_RIL_RI] = { R_8, I32_16, 0, 0, 0, 0 }, + [INSTR_RIL_RP] = { R_8, J32_16, 0, 0, 0, 0 }, + [INSTR_RIL_RU] = { R_8, U32_16, 0, 0, 0, 0 }, + [INSTR_RIL_UP] = { U4_8, J32_16, 0, 0, 0, 0 }, + [INSTR_RIS_RURDI] = { R_8, I8_32, U4_12, D_20, B_16, 0 }, + [INSTR_RIS_RURDU] = { R_8, U8_32, U4_12, D_20, B_16, 0 }, + [INSTR_RI_RI] = { R_8, I16_16, 0, 0, 0, 0 }, + [INSTR_RI_RP] = { R_8, J16_16, 0, 0, 0, 0 }, + [INSTR_RI_RU] = { R_8, U16_16, 0, 0, 0, 0 }, + [INSTR_RI_UP] = { U4_8, J16_16, 0, 0, 0, 0 }, + [INSTR_RRE_00] = { 0, 0, 0, 0, 0, 0 }, + [INSTR_RRE_AA] = { A_24, A_28, 0, 0, 0, 0 }, + [INSTR_RRE_AR] = { A_24, R_28, 0, 0, 0, 0 }, + [INSTR_RRE_F0] = { F_24, 0, 0, 0, 0, 0 }, + [INSTR_RRE_FF] = { F_24, F_28, 0, 0, 0, 0 }, + [INSTR_RRE_FR] = { F_24, R_28, 0, 0, 0, 0 }, + [INSTR_RRE_R0] = { R_24, 0, 0, 0, 0, 0 }, + [INSTR_RRE_RA] = { R_24, A_28, 0, 0, 0, 0 }, + [INSTR_RRE_RF] = { R_24, F_28, 0, 0, 0, 0 }, + [INSTR_RRE_RR] = { R_24, R_28, 0, 0, 0, 0 }, + [INSTR_RRF_0UFF] = { F_24, F_28, U4_20, 0, 0, 0 }, + [INSTR_RRF_0URF] = { R_24, F_28, U4_20, 0, 0, 0 }, + [INSTR_RRF_F0FF] = { F_16, F_24, F_28, 0, 0, 0 }, + [INSTR_RRF_F0FF2] = { F_24, F_16, F_28, 0, 0, 0 }, + [INSTR_RRF_F0FR] = { F_24, F_16, R_28, 0, 0, 0 }, + [INSTR_RRF_FFRU] = { F_24, F_16, R_28, U4_20, 0, 0 }, + [INSTR_RRF_FUFF] = { F_24, F_16, F_28, U4_20, 0, 0 }, + [INSTR_RRF_FUFF2] = { F_24, F_28, F_16, U4_20, 0, 0 }, + [INSTR_RRF_R0RR] = { R_24, R_16, R_28, 0, 0, 0 }, + [INSTR_RRF_R0RR2] = { R_24, R_28, R_16, 0, 0, 0 }, + [INSTR_RRF_RURR] = { R_24, R_28, R_16, U4_20, 0, 0 }, + [INSTR_RRF_RURR2] = { R_24, R_16, R_28, U4_20, 0, 0 }, + [INSTR_RRF_U0FF] = { F_24, U4_16, F_28, 0, 0, 0 }, + [INSTR_RRF_U0RF] = { R_24, U4_16, F_28, 0, 0, 0 }, + [INSTR_RRF_U0RR] = { R_24, R_28, U4_16, 0, 0, 0 }, + [INSTR_RRF_UUFF] = { F_24, U4_16, F_28, U4_20, 0, 0 }, + [INSTR_RRF_UUFR] = { F_24, U4_16, R_28, U4_20, 0, 0 }, + [INSTR_RRF_UURF] = { R_24, U4_16, F_28, U4_20, 0, 0 }, + [INSTR_RRS_RRRDU] = { R_8, R_12, U4_32, D_20, B_16 }, + [INSTR_RR_FF] = { F_8, F_12, 0, 0, 0, 0 }, + [INSTR_RR_R0] = { R_8, 0, 0, 0, 0, 0 }, + [INSTR_RR_RR] = { R_8, R_12, 0, 0, 0, 0 }, + [INSTR_RR_U0] = { U8_8, 0, 0, 0, 0, 0 }, + [INSTR_RR_UR] = { U4_8, R_12, 0, 0, 0, 0 }, + [INSTR_RSI_RRP] = { R_8, R_12, J16_16, 0, 0, 0 }, + [INSTR_RSL_LRDFU] = { F_32, D_20, L8_8, B_16, U4_36, 0 }, + [INSTR_RSL_R0RD] = { D_20, L4_8, B_16, 0, 0, 0 }, + [INSTR_RSY_AARD] = { A_8, A_12, D20_20, B_16, 0, 0 }, + [INSTR_RSY_CCRD] = { C_8, C_12, D20_20, B_16, 0, 0 }, + [INSTR_RSY_RDRU] = { R_8, D20_20, B_16, U4_12, 0, 0 }, + [INSTR_RSY_RRRD] = { R_8, R_12, D20_20, B_16, 0, 0 }, + [INSTR_RSY_RURD] = { R_8, U4_12, D20_20, B_16, 0, 0 }, + [INSTR_RSY_RURD2] = { R_8, D20_20, B_16, U4_12, 0, 0 }, + [INSTR_RS_AARD] = { A_8, A_12, D_20, B_16, 0, 0 }, + [INSTR_RS_CCRD] = { C_8, C_12, D_20, B_16, 0, 0 }, + [INSTR_RS_R0RD] = { R_8, D_20, B_16, 0, 0, 0 }, + [INSTR_RS_RRRD] = { R_8, R_12, D_20, B_16, 0, 0 }, + [INSTR_RS_RURD] = { R_8, U4_12, D_20, B_16, 0, 0 }, + [INSTR_RXE_FRRD] = { F_8, D_20, X_12, B_16, 0, 0 }, + [INSTR_RXE_RRRDU] = { R_8, D_20, X_12, B_16, U4_32, 0 }, + [INSTR_RXF_FRRDF] = { F_32, F_8, D_20, X_12, B_16, 0 }, + [INSTR_RXY_FRRD] = { F_8, D20_20, X_12, B_16, 0, 0 }, + [INSTR_RXY_RRRD] = { R_8, D20_20, X_12, B_16, 0, 0 }, + [INSTR_RXY_URRD] = { U4_8, D20_20, X_12, B_16, 0, 0 }, + [INSTR_RX_FRRD] = { F_8, D_20, X_12, B_16, 0, 0 }, + [INSTR_RX_RRRD] = { R_8, D_20, X_12, B_16, 0, 0 }, + [INSTR_RX_URRD] = { U4_8, D_20, X_12, B_16, 0, 0 }, + [INSTR_SIL_RDI] = { D_20, B_16, I16_32, 0, 0, 0 }, + [INSTR_SIL_RDU] = { D_20, B_16, U16_32, 0, 0, 0 }, + [INSTR_SIY_IRD] = { D20_20, B_16, I8_8, 0, 0, 0 }, + [INSTR_SIY_URD] = { D20_20, B_16, U8_8, 0, 0, 0 }, + [INSTR_SI_RD] = { D_20, B_16, 0, 0, 0, 0 }, + [INSTR_SI_URD] = { D_20, B_16, U8_8, 0, 0, 0 }, + [INSTR_SMI_U0RDP] = { U4_8, J16_32, D_20, B_16, 0, 0 }, + [INSTR_SSE_RDRD] = { D_20, B_16, D_36, B_32, 0, 0 }, + [INSTR_SSF_RRDRD] = { D_20, B_16, D_36, B_32, R_8, 0 }, + [INSTR_SSF_RRDRD2] = { R_8, D_20, B_16, D_36, B_32, 0 }, + [INSTR_SS_L0RDRD] = { D_20, L8_8, B_16, D_36, B_32, 0 }, + [INSTR_SS_L2RDRD] = { D_20, B_16, D_36, L8_8, B_32, 0 }, + [INSTR_SS_LIRDRD] = { D_20, L4_8, B_16, D_36, B_32, U4_12 }, + [INSTR_SS_LLRDRD] = { D_20, L4_8, B_16, D_36, L4_12, B_32 }, + [INSTR_SS_RRRDRD] = { D_20, R_8, B_16, D_36, B_32, R_12 }, + [INSTR_SS_RRRDRD2] = { R_8, D_20, B_16, R_12, D_36, B_32 }, + [INSTR_SS_RRRDRD3] = { R_8, R_12, D_20, B_16, D_36, B_32 }, + [INSTR_S_00] = { 0, 0, 0, 0, 0, 0 }, + [INSTR_S_RD] = { D_20, B_16, 0, 0, 0, 0 }, + [INSTR_VRI_V0IU] = { V_8, I16_16, U4_32, 0, 0, 0 }, + [INSTR_VRI_V0U] = { V_8, U16_16, 0, 0, 0, 0 }, + [INSTR_VRI_V0UU2] = { V_8, U16_16, U4_32, 0, 0, 0 }, + [INSTR_VRI_V0UUU] = { V_8, U8_16, U8_24, U4_32, 0, 0 }, + [INSTR_VRI_VR0UU] = { V_8, R_12, U8_28, U4_24, 0, 0 }, + [INSTR_VRI_VVUU] = { V_8, V_12, U16_16, U4_32, 0, 0 }, + [INSTR_VRI_VVUUU] = { V_8, V_12, U12_16, U4_32, U4_28, 0 }, + [INSTR_VRI_VVUUU2] = { V_8, V_12, U8_28, U8_16, U4_24, 0 }, + [INSTR_VRI_VVV0U] = { V_8, V_12, V_16, U8_24, 0, 0 }, + [INSTR_VRI_VVV0UU] = { V_8, V_12, V_16, U8_24, U4_32, 0 }, + [INSTR_VRI_VVV0UU2] = { V_8, V_12, V_16, U8_28, U4_24, 0 }, + [INSTR_VRR_0V] = { V_12, 0, 0, 0, 0, 0 }, + [INSTR_VRR_0VV0U] = { V_12, V_16, U4_24, 0, 0, 0 }, + [INSTR_VRR_RV0U] = { R_8, V_12, U4_24, 0, 0, 0 }, + [INSTR_VRR_VRR] = { V_8, R_12, R_16, 0, 0, 0 }, + [INSTR_VRR_VV] = { V_8, V_12, 0, 0, 0, 0 }, + [INSTR_VRR_VV0U] = { V_8, V_12, U4_32, 0, 0, 0 }, + [INSTR_VRR_VV0U0U] = { V_8, V_12, U4_32, U4_24, 0, 0 }, + [INSTR_VRR_VV0UU2] = { V_8, V_12, U4_32, U4_28, 0, 0 }, + [INSTR_VRR_VV0UUU] = { V_8, V_12, U4_32, U4_28, U4_24, 0 }, + [INSTR_VRR_VVV] = { V_8, V_12, V_16, 0, 0, 0 }, + [INSTR_VRR_VVV0U] = { V_8, V_12, V_16, U4_32, 0, 0 }, + [INSTR_VRR_VVV0U0U] = { V_8, V_12, V_16, U4_32, U4_24, 0 }, + [INSTR_VRR_VVV0UU] = { V_8, V_12, V_16, U4_32, U4_28, 0 }, + [INSTR_VRR_VVV0UUU] = { V_8, V_12, V_16, U4_32, U4_28, U4_24 }, + [INSTR_VRR_VVV0V] = { V_8, V_12, V_16, V_32, 0, 0 }, + [INSTR_VRR_VVVU0UV] = { V_8, V_12, V_16, V_32, U4_28, U4_20 }, + [INSTR_VRR_VVVU0V] = { V_8, V_12, V_16, V_32, U4_20, 0 }, + [INSTR_VRR_VVVUU0V] = { V_8, V_12, V_16, V_32, U4_20, U4_24 }, + [INSTR_VRS_RRDV] = { V_32, R_12, D_20, B_16, 0, 0 }, + [INSTR_VRS_RVRDU] = { R_8, V_12, D_20, B_16, U4_32, 0 }, + [INSTR_VRS_VRRD] = { V_8, R_12, D_20, B_16, 0, 0 }, + [INSTR_VRS_VRRDU] = { V_8, R_12, D_20, B_16, U4_32, 0 }, + [INSTR_VRS_VVRD] = { V_8, V_12, D_20, B_16, 0, 0 }, + [INSTR_VRS_VVRDU] = { V_8, V_12, D_20, B_16, U4_32, 0 }, + [INSTR_VRV_VVXRDU] = { V_8, D_20, VX_12, B_16, U4_32, 0 }, + [INSTR_VRX_VRRD] = { V_8, D_20, X_12, B_16, 0, 0 }, + [INSTR_VRX_VRRDU] = { V_8, D_20, X_12, B_16, U4_32, 0 }, + [INSTR_VRX_VV] = { V_8, V_12, 0, 0, 0, 0 }, + [INSTR_VSI_URDV] = { V_32, D_20, B_16, U8_8, 0, 0 }, }; -static struct s390_insn opcode_ed[] = { - { "mayl", 0x38, INSTR_RXF_FRRDF }, - { "myl", 0x39, INSTR_RXF_FRRDF }, - { "may", 0x3a, INSTR_RXF_FRRDF }, - { "my", 0x3b, INSTR_RXF_FRRDF }, - { "mayh", 0x3c, INSTR_RXF_FRRDF }, - { "myh", 0x3d, INSTR_RXF_FRRDF }, - { "sldt", 0x40, INSTR_RXF_FRRDF }, - { "srdt", 0x41, INSTR_RXF_FRRDF }, - { "slxt", 0x48, INSTR_RXF_FRRDF }, - { "srxt", 0x49, INSTR_RXF_FRRDF }, - { "tdcet", 0x50, INSTR_RXE_FRRD }, - { "tdget", 0x51, INSTR_RXE_FRRD }, - { "tdcdt", 0x54, INSTR_RXE_FRRD }, - { "tdgdt", 0x55, INSTR_RXE_FRRD }, - { "tdcxt", 0x58, INSTR_RXE_FRRD }, - { "tdgxt", 0x59, INSTR_RXE_FRRD }, - { "ley", 0x64, INSTR_RXY_FRRD }, - { "ldy", 0x65, INSTR_RXY_FRRD }, - { "stey", 0x66, INSTR_RXY_FRRD }, - { "stdy", 0x67, INSTR_RXY_FRRD }, - { "czdt", 0xa8, INSTR_RSL_LRDFU }, - { "czxt", 0xa9, INSTR_RSL_LRDFU }, - { "cdzt", 0xaa, INSTR_RSL_LRDFU }, - { "cxzt", 0xab, INSTR_RSL_LRDFU }, - { "ldeb", 0x04, INSTR_RXE_FRRD }, - { "lxdb", 0x05, INSTR_RXE_FRRD }, - { "lxeb", 0x06, INSTR_RXE_FRRD }, - { "mxdb", 0x07, INSTR_RXE_FRRD }, - { "keb", 0x08, INSTR_RXE_FRRD }, - { "ceb", 0x09, INSTR_RXE_FRRD }, - { "aeb", 0x0a, INSTR_RXE_FRRD }, - { "seb", 0x0b, INSTR_RXE_FRRD }, - { "mdeb", 0x0c, INSTR_RXE_FRRD }, - { "deb", 0x0d, INSTR_RXE_FRRD }, - { "maeb", 0x0e, INSTR_RXF_FRRDF }, - { "mseb", 0x0f, INSTR_RXF_FRRDF }, - { "tceb", 0x10, INSTR_RXE_FRRD }, - { "tcdb", 0x11, INSTR_RXE_FRRD }, - { "tcxb", 0x12, INSTR_RXE_FRRD }, - { "sqeb", 0x14, INSTR_RXE_FRRD }, - { "sqdb", 0x15, INSTR_RXE_FRRD }, - { "meeb", 0x17, INSTR_RXE_FRRD }, - { "kdb", 0x18, INSTR_RXE_FRRD }, - { "cdb", 0x19, INSTR_RXE_FRRD }, - { "adb", 0x1a, INSTR_RXE_FRRD }, - { "sdb", 0x1b, INSTR_RXE_FRRD }, - { "mdb", 0x1c, INSTR_RXE_FRRD }, - { "ddb", 0x1d, INSTR_RXE_FRRD }, - { "madb", 0x1e, INSTR_RXF_FRRDF }, - { "msdb", 0x1f, INSTR_RXF_FRRDF }, - { "lde", 0x24, INSTR_RXE_FRRD }, - { "lxd", 0x25, INSTR_RXE_FRRD }, - { "lxe", 0x26, INSTR_RXE_FRRD }, - { "mae", 0x2e, INSTR_RXF_FRRDF }, - { "mse", 0x2f, INSTR_RXF_FRRDF }, - { "sqe", 0x34, INSTR_RXE_FRRD }, - { "sqd", 0x35, INSTR_RXE_FRRD }, - { "mee", 0x37, INSTR_RXE_FRRD }, - { "mad", 0x3e, INSTR_RXF_FRRDF }, - { "msd", 0x3f, INSTR_RXF_FRRDF }, - { "", 0, INSTR_INVALID } -}; +static char long_insn_name[][7] = LONG_INSN_INITIALIZER; +static struct s390_insn opcode[] = OPCODE_TABLE_INITIALIZER; +static struct s390_opcode_offset opcode_offset[] = OPCODE_OFFSET_INITIALIZER; /* Extracts an operand value from an instruction. */ static unsigned int extract_operand(unsigned char *code, @@ -1777,114 +391,27 @@ static unsigned int extract_operand(unsigned char *code, struct s390_insn *find_insn(unsigned char *code) { - unsigned char opfrag = code[1]; - unsigned char opmask; - struct s390_insn *table; + struct s390_opcode_offset *entry; + struct s390_insn *insn; + unsigned char opfrag; + int i; - switch (code[0]) { - case 0x01: - table = opcode_01; - break; - case 0xa5: - table = opcode_a5; - break; - case 0xa7: - table = opcode_a7; - break; - case 0xaa: - table = opcode_aa; - break; - case 0xb2: - table = opcode_b2; - break; - case 0xb3: - table = opcode_b3; - break; - case 0xb9: - table = opcode_b9; - break; - case 0xc0: - table = opcode_c0; - break; - case 0xc2: - table = opcode_c2; - break; - case 0xc4: - table = opcode_c4; - break; - case 0xc6: - table = opcode_c6; - break; - case 0xc8: - table = opcode_c8; - break; - case 0xcc: - table = opcode_cc; - break; - case 0xe3: - table = opcode_e3; - opfrag = code[5]; - break; - case 0xe5: - table = opcode_e5; - break; - case 0xe7: - table = opcode_e7; - opfrag = code[5]; - break; - case 0xeb: - table = opcode_eb; - opfrag = code[5]; - break; - case 0xec: - table = opcode_ec; - opfrag = code[5]; - break; - case 0xed: - table = opcode_ed; - opfrag = code[5]; - break; - default: - table = opcode; - opfrag = code[0]; - break; - } - while (table->format != INSTR_INVALID) { - opmask = formats[table->format][0]; - if (table->opfrag == (opfrag & opmask)) - return table; - table++; + for (i = 0; i < ARRAY_SIZE(opcode_offset); i++) { + entry = &opcode_offset[i]; + if (entry->opcode == code[0] || entry->opcode == 0) + break; } - return NULL; -} -/** - * insn_to_mnemonic - decode an s390 instruction - * @instruction: instruction to decode - * @buf: buffer to fill with mnemonic - * @len: length of buffer - * - * Decode the instruction at @instruction and store the corresponding - * mnemonic into @buf of length @len. - * @buf is left unchanged if the instruction could not be decoded. - * Returns: - * %0 on success, %-ENOENT if the instruction was not found. - */ -int insn_to_mnemonic(unsigned char *instruction, char *buf, unsigned int len) -{ - struct s390_insn *insn; + opfrag = *(code + entry->byte) & entry->mask; - insn = find_insn(instruction); - if (!insn) - return -ENOENT; - if (insn->name[0] == '\0') - snprintf(buf, len, "%s", - long_insn_name[(int) insn->name[1]]); - else - snprintf(buf, len, "%.5s", insn->name); - return 0; + insn = &opcode[entry->offset]; + for (i = 0; i < entry->count; i++) { + if (insn->opfrag == opfrag) + return insn; + insn++; + } + return NULL; } -EXPORT_SYMBOL_GPL(insn_to_mnemonic); static int print_insn(char *buffer, unsigned char *code, unsigned long addr) { @@ -1899,14 +426,14 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr) ptr = buffer; insn = find_insn(code); if (insn) { - if (insn->name[0] == '\0') - ptr += sprintf(ptr, "%s\t", - long_insn_name[(int) insn->name[1]]); + if (insn->zero == 0) + ptr += sprintf(ptr, "%.7s\t", + long_insn_name[insn->offset]); else ptr += sprintf(ptr, "%.5s\t", insn->name); /* Extract the operands. */ separator = 0; - for (ops = formats[insn->format] + 1, i = 0; + for (ops = formats[insn->format], i = 0; *ops != 0 && i < 6; ops++, i++) { operand = operands + *ops; value = extract_operand(code, operand); diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index b945448b9eae..497a92047591 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -31,14 +31,6 @@ #include <asm/facility.h> #include "entry.h" -/* - * Create a Kernel NSS if the SAVESYS= parameter is defined - */ -#define DEFSYS_CMD_SIZE 128 -#define SAVESYS_CMD_SIZE 32 - -char kernel_nss_name[NSS_NAME_SIZE + 1]; - static void __init setup_boot_command_line(void); /* @@ -59,134 +51,6 @@ static void __init reset_tod_clock(void) S390_lowcore.last_update_clock = TOD_UNIX_EPOCH; } -#ifdef CONFIG_SHARED_KERNEL -int __init savesys_ipl_nss(char *cmd, const int cmdlen); - -asm( - " .section .init.text,\"ax\",@progbits\n" - " .align 4\n" - " .type savesys_ipl_nss, @function\n" - "savesys_ipl_nss:\n" - " stmg 6,15,48(15)\n" - " lgr 14,3\n" - " sam31\n" - " diag 2,14,0x8\n" - " sam64\n" - " lgr 2,14\n" - " lmg 6,15,48(15)\n" - " br 14\n" - " .size savesys_ipl_nss, .-savesys_ipl_nss\n" - " .previous\n"); - -static __initdata char upper_command_line[COMMAND_LINE_SIZE]; - -static noinline __init void create_kernel_nss(void) -{ - unsigned int i, stext_pfn, eshared_pfn, end_pfn, min_size; -#ifdef CONFIG_BLK_DEV_INITRD - unsigned int sinitrd_pfn, einitrd_pfn; -#endif - int response; - int hlen; - size_t len; - char *savesys_ptr; - char defsys_cmd[DEFSYS_CMD_SIZE]; - char savesys_cmd[SAVESYS_CMD_SIZE]; - - /* Do nothing if we are not running under VM */ - if (!MACHINE_IS_VM) - return; - - /* Convert COMMAND_LINE to upper case */ - for (i = 0; i < strlen(boot_command_line); i++) - upper_command_line[i] = toupper(boot_command_line[i]); - - savesys_ptr = strstr(upper_command_line, "SAVESYS="); - - if (!savesys_ptr) - return; - - savesys_ptr += 8; /* Point to the beginning of the NSS name */ - for (i = 0; i < NSS_NAME_SIZE; i++) { - if (savesys_ptr[i] == ' ' || savesys_ptr[i] == '\0') - break; - kernel_nss_name[i] = savesys_ptr[i]; - } - - stext_pfn = PFN_DOWN(__pa(&_stext)); - eshared_pfn = PFN_DOWN(__pa(&_eshared)); - end_pfn = PFN_UP(__pa(&_end)); - min_size = end_pfn << 2; - - hlen = snprintf(defsys_cmd, DEFSYS_CMD_SIZE, - "DEFSYS %s 00000-%.5X EW %.5X-%.5X SR %.5X-%.5X", - kernel_nss_name, stext_pfn - 1, stext_pfn, - eshared_pfn - 1, eshared_pfn, end_pfn); - -#ifdef CONFIG_BLK_DEV_INITRD - if (INITRD_START && INITRD_SIZE) { - sinitrd_pfn = PFN_DOWN(__pa(INITRD_START)); - einitrd_pfn = PFN_UP(__pa(INITRD_START + INITRD_SIZE)); - min_size = einitrd_pfn << 2; - hlen += snprintf(defsys_cmd + hlen, DEFSYS_CMD_SIZE - hlen, - " EW %.5X-%.5X", sinitrd_pfn, einitrd_pfn); - } -#endif - - snprintf(defsys_cmd + hlen, DEFSYS_CMD_SIZE - hlen, - " EW MINSIZE=%.7iK PARMREGS=0-13", min_size); - defsys_cmd[DEFSYS_CMD_SIZE - 1] = '\0'; - snprintf(savesys_cmd, SAVESYS_CMD_SIZE, "SAVESYS %s \n IPL %s", - kernel_nss_name, kernel_nss_name); - savesys_cmd[SAVESYS_CMD_SIZE - 1] = '\0'; - - __cpcmd(defsys_cmd, NULL, 0, &response); - - if (response != 0) { - pr_err("Defining the Linux kernel NSS failed with rc=%d\n", - response); - kernel_nss_name[0] = '\0'; - return; - } - - len = strlen(savesys_cmd); - ASCEBC(savesys_cmd, len); - response = savesys_ipl_nss(savesys_cmd, len); - - /* On success: response is equal to the command size, - * max SAVESYS_CMD_SIZE - * On error: response contains the numeric portion of cp error message. - * for SAVESYS it will be >= 263 - * for missing privilege class, it will be 1 - */ - if (response > SAVESYS_CMD_SIZE || response == 1) { - pr_err("Saving the Linux kernel NSS failed with rc=%d\n", - response); - kernel_nss_name[0] = '\0'; - return; - } - - /* re-initialize cputime accounting. */ - get_tod_clock_ext(tod_clock_base); - S390_lowcore.last_update_clock = *(__u64 *) &tod_clock_base[1]; - S390_lowcore.last_update_timer = 0x7fffffffffffffffULL; - S390_lowcore.user_timer = 0; - S390_lowcore.system_timer = 0; - asm volatile("SPT 0(%0)" : : "a" (&S390_lowcore.last_update_timer)); - - /* re-setup boot command line with new ipl vm parms */ - ipl_update_parameters(); - setup_boot_command_line(); - - ipl_flags = IPL_NSS_VALID; -} - -#else /* CONFIG_SHARED_KERNEL */ - -static inline void create_kernel_nss(void) { } - -#endif /* CONFIG_SHARED_KERNEL */ - /* * Clear bss memory */ @@ -375,8 +239,10 @@ static __init void detect_machine_facilities(void) S390_lowcore.machine_flags |= MACHINE_FLAG_IDTE; if (test_facility(40)) S390_lowcore.machine_flags |= MACHINE_FLAG_LPP; - if (test_facility(50) && test_facility(73)) + if (test_facility(50) && test_facility(73)) { S390_lowcore.machine_flags |= MACHINE_FLAG_TE; + __ctl_set_bit(0, 55); + } if (test_facility(51)) S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC; if (test_facility(129)) { @@ -549,10 +415,6 @@ static void __init setup_boot_command_line(void) append_to_cmdline(append_ipl_scpdata); } -/* - * Save ipl parameters, clear bss memory, initialize storage keys - * and create a kernel NSS at startup if the SAVESYS= parm is defined - */ void __init startup_init(void) { reset_tod_clock(); @@ -569,7 +431,6 @@ void __init startup_init(void) setup_arch_string(); ipl_update_parameters(); setup_boot_command_line(); - create_kernel_nss(); detect_diag9c(); detect_diag44(); detect_machine_facilities(); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 7c6904d616d8..f498d201f98d 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -13,6 +13,7 @@ #include <linux/linkage.h> #include <asm/processor.h> #include <asm/cache.h> +#include <asm/ctl_reg.h> #include <asm/errno.h> #include <asm/ptrace.h> #include <asm/thread_info.h> @@ -952,15 +953,56 @@ load_fpu_regs: */ ENTRY(mcck_int_handler) STCK __LC_MCCK_CLOCK - la %r1,4095 # revalidate r1 - spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # revalidate cpu timer - lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs + la %r1,4095 # validate r1 + spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # validate cpu timer + sckc __LC_CLOCK_COMPARATOR # validate comparator + lam %a0,%a15,__LC_AREGS_SAVE_AREA-4095(%r1) # validate acrs + lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# validate gprs lg %r12,__LC_CURRENT larl %r13,cleanup_critical lmg %r8,%r9,__LC_MCK_OLD_PSW TSTMSK __LC_MCCK_CODE,MCCK_CODE_SYSTEM_DAMAGE jo .Lmcck_panic # yes -> rest of mcck code invalid - lghi %r14,__LC_CPU_TIMER_SAVE_AREA + TSTMSK __LC_MCCK_CODE,MCCK_CODE_CR_VALID + jno .Lmcck_panic # control registers invalid -> panic + la %r14,4095 + lctlg %c0,%c15,__LC_CREGS_SAVE_AREA-4095(%r14) # validate ctl regs + ptlb + lg %r11,__LC_MCESAD-4095(%r14) # extended machine check save area + nill %r11,0xfc00 # MCESA_ORIGIN_MASK + TSTMSK __LC_CREGS_SAVE_AREA+16-4095(%r14),CR2_GUARDED_STORAGE + jno 0f + TSTMSK __LC_MCCK_CODE,MCCK_CODE_GS_VALID + jno 0f + .insn rxy,0xe3000000004d,0,__MCESA_GS_SAVE_AREA(%r11) # LGSC +0: l %r14,__LC_FP_CREG_SAVE_AREA-4095(%r14) + TSTMSK __LC_MCCK_CODE,MCCK_CODE_FC_VALID + jo 0f + sr %r14,%r14 +0: sfpc %r14 + TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX + jo 0f + lghi %r14,__LC_FPREGS_SAVE_AREA + ld %f0,0(%r14) + ld %f1,8(%r14) + ld %f2,16(%r14) + ld %f3,24(%r14) + ld %f4,32(%r14) + ld %f5,40(%r14) + ld %f6,48(%r14) + ld %f7,56(%r14) + ld %f8,64(%r14) + ld %f9,72(%r14) + ld %f10,80(%r14) + ld %f11,88(%r14) + ld %f12,96(%r14) + ld %f13,104(%r14) + ld %f14,112(%r14) + ld %f15,120(%r14) + j 1f +0: VLM %v0,%v15,0,%r11 + VLM %v16,%v31,256,%r11 +1: lghi %r14,__LC_CPU_TIMER_SAVE_AREA mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) TSTMSK __LC_MCCK_CODE,MCCK_CODE_CPU_TIMER_VALID jo 3f @@ -976,9 +1018,13 @@ ENTRY(mcck_int_handler) la %r14,__LC_LAST_UPDATE_TIMER 2: spt 0(%r14) mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) -3: TSTMSK __LC_MCCK_CODE,(MCCK_CODE_PSW_MWP_VALID|MCCK_CODE_PSW_IA_VALID) - jno .Lmcck_panic # no -> skip cleanup critical - SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_MCCK_ENTER_TIMER +3: TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_MWP_VALID + jno .Lmcck_panic + tmhh %r8,0x0001 # interrupting from user ? + jnz 4f + TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID + jno .Lmcck_panic +4: SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_MCCK_ENTER_TIMER .Lmcck_skip: lghi %r14,__LC_GPREGS_SAVE_AREA+64 stmg %r0,%r7,__PT_R0(%r11) diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 905bde782490..e87758f8fbdc 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -78,6 +78,7 @@ long sys_s390_runtime_instr(int command, int signum); long sys_s390_guarded_storage(int command, struct gs_cb __user *); long sys_s390_pci_mmio_write(unsigned long, const void __user *, size_t); long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t); +long sys_s390_sthyi(unsigned long function_code, void __user *buffer, u64 __user *return_code, unsigned long flags); DECLARE_PER_CPU(u64, mt_cycles[8]); diff --git a/arch/s390/kernel/guarded_storage.c b/arch/s390/kernel/guarded_storage.c index bff39b66c9ff..d14dd1c2e524 100644 --- a/arch/s390/kernel/guarded_storage.c +++ b/arch/s390/kernel/guarded_storage.c @@ -12,11 +12,10 @@ #include <asm/guarded_storage.h> #include "entry.h" -void exit_thread_gs(void) +void guarded_storage_release(struct task_struct *tsk) { - kfree(current->thread.gs_cb); - kfree(current->thread.gs_bc_cb); - current->thread.gs_cb = current->thread.gs_bc_cb = NULL; + kfree(tsk->thread.gs_cb); + kfree(tsk->thread.gs_bc_cb); } static int gs_enable(void) diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 8e622bb52f7a..310e59e6eb4b 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -279,8 +279,6 @@ static __init enum ipl_type get_ipl_type(void) { struct ipl_parameter_block *ipl = IPL_PARMBLOCK_START; - if (ipl_flags & IPL_NSS_VALID) - return IPL_TYPE_NSS; if (!(ipl_flags & IPL_DEVNO_VALID)) return IPL_TYPE_UNKNOWN; if (!(ipl_flags & IPL_PARMBLOCK_VALID)) @@ -533,22 +531,6 @@ static struct attribute_group ipl_ccw_attr_group_lpar = { .attrs = ipl_ccw_attrs_lpar }; -/* NSS ipl device attributes */ - -DEFINE_IPL_ATTR_RO(ipl_nss, name, "%s\n", kernel_nss_name); - -static struct attribute *ipl_nss_attrs[] = { - &sys_ipl_type_attr.attr, - &sys_ipl_nss_name_attr.attr, - &sys_ipl_ccw_loadparm_attr.attr, - &sys_ipl_vm_parm_attr.attr, - NULL, -}; - -static struct attribute_group ipl_nss_attr_group = { - .attrs = ipl_nss_attrs, -}; - /* UNKNOWN ipl device attributes */ static struct attribute *ipl_unknown_attrs[] = { @@ -598,9 +580,6 @@ static int __init ipl_init(void) case IPL_TYPE_FCP_DUMP: rc = sysfs_create_group(&ipl_kset->kobj, &ipl_fcp_attr_group); break; - case IPL_TYPE_NSS: - rc = sysfs_create_group(&ipl_kset->kobj, &ipl_nss_attr_group); - break; default: rc = sysfs_create_group(&ipl_kset->kobj, &ipl_unknown_attr_group); @@ -1172,18 +1151,6 @@ static int __init reipl_nss_init(void) return rc; reipl_block_ccw_init(reipl_block_nss); - if (ipl_info.type == IPL_TYPE_NSS) { - memset(reipl_block_nss->ipl_info.ccw.nss_name, - ' ', NSS_NAME_SIZE); - memcpy(reipl_block_nss->ipl_info.ccw.nss_name, - kernel_nss_name, strlen(kernel_nss_name)); - ASCEBC(reipl_block_nss->ipl_info.ccw.nss_name, NSS_NAME_SIZE); - reipl_block_nss->ipl_info.ccw.vm_flags |= - DIAG308_VM_FLAGS_NSS_VALID; - - reipl_block_ccw_fill_parms(reipl_block_nss); - } - reipl_capabilities |= IPL_TYPE_NSS; return 0; } @@ -1971,9 +1938,6 @@ void __init setup_ipl(void) ipl_info.data.fcp.lun = IPL_PARMBLOCK_START->ipl_info.fcp.lun; break; case IPL_TYPE_NSS: - strncpy(ipl_info.data.nss.name, kernel_nss_name, - sizeof(ipl_info.data.nss.name)); - break; case IPL_TYPE_UNKNOWN: /* We have no info to copy */ break; diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 6842e4501e2e..1a6521af1751 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -161,8 +161,6 @@ struct swap_insn_args { static int swap_instruction(void *data) { - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - unsigned long status = kcb->kprobe_status; struct swap_insn_args *args = data; struct ftrace_insn new_insn, *insn; struct kprobe *p = args->p; @@ -185,9 +183,7 @@ static int swap_instruction(void *data) ftrace_generate_nop_insn(&new_insn); } skip_ftrace: - kcb->kprobe_status = KPROBE_SWAP_INST; s390_kernel_write(p->addr, &new_insn, len); - kcb->kprobe_status = status; return 0; } NOKPROBE_SYMBOL(swap_instruction); @@ -574,9 +570,6 @@ static int kprobe_trap_handler(struct pt_regs *regs, int trapnr) const struct exception_table_entry *entry; switch(kcb->kprobe_status) { - case KPROBE_SWAP_INST: - /* We are here because the instruction replacement failed */ - return 0; case KPROBE_HIT_SS: case KPROBE_REENTER: /* diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c index ae7dff110054..bf9622f0e6b1 100644 --- a/arch/s390/kernel/lgr.c +++ b/arch/s390/kernel/lgr.c @@ -153,14 +153,13 @@ static void lgr_timer_set(void); /* * LGR timer callback */ -static void lgr_timer_fn(unsigned long ignored) +static void lgr_timer_fn(struct timer_list *unused) { lgr_info_log(); lgr_timer_set(); } -static struct timer_list lgr_timer = - TIMER_DEFERRED_INITIALIZER(lgr_timer_fn, 0, 0); +static struct timer_list lgr_timer; /* * Setup next LGR timer @@ -181,6 +180,7 @@ static int __init lgr_init(void) debug_register_view(lgr_dbf, &debug_hex_ascii_view); lgr_info_get(&lgr_info_last); debug_event(lgr_dbf, 1, &lgr_info_last, sizeof(lgr_info_last)); + timer_setup(&lgr_timer, lgr_timer_fn, TIMER_DEFERRABLE); lgr_timer_set(); return 0; } diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index b0ba2c26b45e..a80050bbe2e4 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -106,7 +106,7 @@ static void __do_machine_kdump(void *image) static noinline void __machine_kdump(void *image) { struct mcesa *mcesa; - unsigned long cr2_old, cr2_new; + union ctlreg2 cr2_old, cr2_new; int this_cpu, cpu; lgr_info_log(); @@ -123,11 +123,12 @@ static noinline void __machine_kdump(void *image) if (MACHINE_HAS_VX) save_vx_regs((__vector128 *) mcesa->vector_save_area); if (MACHINE_HAS_GS) { - __ctl_store(cr2_old, 2, 2); - cr2_new = cr2_old | (1UL << 4); - __ctl_load(cr2_new, 2, 2); + __ctl_store(cr2_old.val, 2, 2); + cr2_new = cr2_old; + cr2_new.gse = 1; + __ctl_load(cr2_new.val, 2, 2); save_gs_cb((struct gs_cb *) mcesa->guarded_storage_save_area); - __ctl_load(cr2_old, 2, 2); + __ctl_load(cr2_old.val, 2, 2); } /* * To create a good backchain for this CPU in the dump store_status @@ -145,7 +146,7 @@ static noinline void __machine_kdump(void *image) /* * Check if kdump checksums are valid: We call purgatory with parameter "0" */ -static int kdump_csum_valid(struct kimage *image) +static bool kdump_csum_valid(struct kimage *image) { #ifdef CONFIG_CRASH_DUMP int (*start_kdump)(int) = (void *)image->start; @@ -154,9 +155,9 @@ static int kdump_csum_valid(struct kimage *image) __arch_local_irq_stnsm(0xfb); /* disable DAT */ rc = start_kdump(0); __arch_local_irq_stosm(0x04); /* enable DAT */ - return rc ? 0 : -EINVAL; + return rc == 0; #else - return -EINVAL; + return false; #endif } @@ -219,10 +220,6 @@ int machine_kexec_prepare(struct kimage *image) { void *reboot_code_buffer; - /* Can't replace kernel image since it is read-only. */ - if (ipl_flags & IPL_NSS_VALID) - return -EOPNOTSUPP; - if (image->type == KEXEC_TYPE_CRASH) return machine_kexec_prepare_kdump(); @@ -269,6 +266,7 @@ static void __do_machine_kexec(void *data) s390_reset_system(); data_mover = (relocate_kernel_t) page_to_phys(image->control_code_page); + __arch_local_irq_stnsm(0xfb); /* disable DAT - avoid no-execute */ /* Call the moving routine */ (*data_mover)(&image->head, image->start); diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 1a27f307a920..6d9f73bb4142 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -31,6 +31,7 @@ #include <linux/kernel.h> #include <linux/moduleloader.h> #include <linux/bug.h> +#include <asm/alternative.h> #if 0 #define DEBUGP printk @@ -429,6 +430,22 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { + const Elf_Shdr *s; + char *secstrings; + + if (IS_ENABLED(CONFIG_ALTERNATIVES)) { + secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { + if (!strcmp(".altinstructions", + secstrings + s->sh_name)) { + /* patch .altinstructions */ + void *aseg = (void *)s->sh_addr; + + apply_alternatives(aseg, aseg + s->sh_size); + } + } + } + jump_label_apply_nops(me); return 0; } diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 31d03a84126c..3f3cda41f32a 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -12,6 +12,9 @@ #include <linux/init.h> #include <linux/errno.h> #include <linux/hardirq.h> +#include <linux/log2.h> +#include <linux/kprobes.h> +#include <linux/slab.h> #include <linux/time.h> #include <linux/module.h> #include <linux/sched/signal.h> @@ -37,13 +40,94 @@ struct mcck_struct { }; static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck); +static struct kmem_cache *mcesa_cache; +static unsigned long mcesa_origin_lc; -static void s390_handle_damage(void) +static inline int nmi_needs_mcesa(void) { - smp_send_stop(); + return MACHINE_HAS_VX || MACHINE_HAS_GS; +} + +static inline unsigned long nmi_get_mcesa_size(void) +{ + if (MACHINE_HAS_GS) + return MCESA_MAX_SIZE; + return MCESA_MIN_SIZE; +} + +/* + * The initial machine check extended save area for the boot CPU. + * It will be replaced by nmi_init() with an allocated structure. + * The structure is required for machine check happening early in + * the boot process. + */ +static struct mcesa boot_mcesa __initdata __aligned(MCESA_MAX_SIZE); + +void __init nmi_alloc_boot_cpu(struct lowcore *lc) +{ + if (!nmi_needs_mcesa()) + return; + lc->mcesad = (unsigned long) &boot_mcesa; + if (MACHINE_HAS_GS) + lc->mcesad |= ilog2(MCESA_MAX_SIZE); +} + +static int __init nmi_init(void) +{ + unsigned long origin, cr0, size; + + if (!nmi_needs_mcesa()) + return 0; + size = nmi_get_mcesa_size(); + if (size > MCESA_MIN_SIZE) + mcesa_origin_lc = ilog2(size); + /* create slab cache for the machine-check-extended-save-areas */ + mcesa_cache = kmem_cache_create("nmi_save_areas", size, size, 0, NULL); + if (!mcesa_cache) + panic("Couldn't create nmi save area cache"); + origin = (unsigned long) kmem_cache_alloc(mcesa_cache, GFP_KERNEL); + if (!origin) + panic("Couldn't allocate nmi save area"); + /* The pointer is stored with mcesa_bits ORed in */ + kmemleak_not_leak((void *) origin); + __ctl_store(cr0, 0, 0); + __ctl_clear_bit(0, 28); /* disable lowcore protection */ + /* Replace boot_mcesa on the boot CPU */ + S390_lowcore.mcesad = origin | mcesa_origin_lc; + __ctl_load(cr0, 0, 0); + return 0; +} +early_initcall(nmi_init); + +int nmi_alloc_per_cpu(struct lowcore *lc) +{ + unsigned long origin; + + if (!nmi_needs_mcesa()) + return 0; + origin = (unsigned long) kmem_cache_alloc(mcesa_cache, GFP_KERNEL); + if (!origin) + return -ENOMEM; + /* The pointer is stored with mcesa_bits ORed in */ + kmemleak_not_leak((void *) origin); + lc->mcesad = origin | mcesa_origin_lc; + return 0; +} + +void nmi_free_per_cpu(struct lowcore *lc) +{ + if (!nmi_needs_mcesa()) + return; + kmem_cache_free(mcesa_cache, (void *)(lc->mcesad & MCESA_ORIGIN_MASK)); +} + +static notrace void s390_handle_damage(void) +{ + smp_emergency_stop(); disabled_wait((unsigned long) __builtin_return_address(0)); while (1); } +NOKPROBE_SYMBOL(s390_handle_damage); /* * Main machine check handler function. Will be called with interrupts enabled @@ -100,18 +184,16 @@ void s390_handle_mcck(void) EXPORT_SYMBOL_GPL(s390_handle_mcck); /* - * returns 0 if all registers could be validated + * returns 0 if all required registers are available * returns 1 otherwise */ -static int notrace s390_validate_registers(union mci mci, int umode) +static int notrace s390_check_registers(union mci mci, int umode) { + union ctlreg2 cr2; int kill_task; - u64 zero; void *fpt_save_area; - struct mcesa *mcesa; kill_task = 0; - zero = 0; if (!mci.gr) { /* @@ -122,18 +204,13 @@ static int notrace s390_validate_registers(union mci mci, int umode) s390_handle_damage(); kill_task = 1; } - /* Validate control registers */ + /* Check control registers */ if (!mci.cr) { /* * Control registers have unknown contents. * Can't recover and therefore stopping machine. */ s390_handle_damage(); - } else { - asm volatile( - " lctlg 0,15,0(%0)\n" - " ptlb\n" - : : "a" (&S390_lowcore.cregs_save_area) : "memory"); } if (!mci.fp) { /* @@ -141,7 +218,6 @@ static int notrace s390_validate_registers(union mci mci, int umode) * kernel currently uses floating point registers the * system is stopped. If the process has its floating * pointer registers loaded it is terminated. - * Otherwise just revalidate the registers. */ if (S390_lowcore.fpu_flags & KERNEL_VXR_V0V7) s390_handle_damage(); @@ -155,72 +231,29 @@ static int notrace s390_validate_registers(union mci mci, int umode) * If the kernel currently uses the floating pointer * registers and needs the FPC register the system is * stopped. If the process has its floating pointer - * registers loaded it is terminated. Otherwiese the - * FPC is just revalidated. + * registers loaded it is terminated. */ if (S390_lowcore.fpu_flags & KERNEL_FPC) s390_handle_damage(); - asm volatile("lfpc %0" : : "Q" (zero)); if (!test_cpu_flag(CIF_FPU)) kill_task = 1; - } else { - asm volatile("lfpc %0" - : : "Q" (S390_lowcore.fpt_creg_save_area)); } - mcesa = (struct mcesa *)(S390_lowcore.mcesad & MCESA_ORIGIN_MASK); - if (!MACHINE_HAS_VX) { - /* Validate floating point registers */ - asm volatile( - " ld 0,0(%0)\n" - " ld 1,8(%0)\n" - " ld 2,16(%0)\n" - " ld 3,24(%0)\n" - " ld 4,32(%0)\n" - " ld 5,40(%0)\n" - " ld 6,48(%0)\n" - " ld 7,56(%0)\n" - " ld 8,64(%0)\n" - " ld 9,72(%0)\n" - " ld 10,80(%0)\n" - " ld 11,88(%0)\n" - " ld 12,96(%0)\n" - " ld 13,104(%0)\n" - " ld 14,112(%0)\n" - " ld 15,120(%0)\n" - : : "a" (fpt_save_area) : "memory"); - } else { - /* Validate vector registers */ - union ctlreg0 cr0; - + if (MACHINE_HAS_VX) { if (!mci.vr) { /* * Vector registers can't be restored. If the kernel * currently uses vector registers the system is * stopped. If the process has its vector registers - * loaded it is terminated. Otherwise just revalidate - * the registers. + * loaded it is terminated. */ if (S390_lowcore.fpu_flags & KERNEL_VXR) s390_handle_damage(); if (!test_cpu_flag(CIF_FPU)) kill_task = 1; } - cr0.val = S390_lowcore.cregs_save_area[0]; - cr0.afp = cr0.vx = 1; - __ctl_load(cr0.val, 0, 0); - asm volatile( - " la 1,%0\n" - " .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */ - " .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */ - : : "Q" (*(struct vx_array *) mcesa->vector_save_area) - : "1"); - __ctl_load(S390_lowcore.cregs_save_area[0], 0, 0); } - /* Validate access registers */ - asm volatile( - " lam 0,15,0(%0)" - : : "a" (&S390_lowcore.access_regs_save_area)); + /* Check if access registers are valid */ if (!mci.ar) { /* * Access registers have unknown contents. @@ -228,53 +261,41 @@ static int notrace s390_validate_registers(union mci mci, int umode) */ kill_task = 1; } - /* Validate guarded storage registers */ - if (MACHINE_HAS_GS && (S390_lowcore.cregs_save_area[2] & (1UL << 4))) { - if (!mci.gs) + /* Check guarded storage registers */ + cr2.val = S390_lowcore.cregs_save_area[2]; + if (cr2.gse) { + if (!mci.gs) { /* * Guarded storage register can't be restored and * the current processes uses guarded storage. * It has to be terminated. */ kill_task = 1; - else - load_gs_cb((struct gs_cb *) - mcesa->guarded_storage_save_area); + } } - /* - * We don't even try to validate the TOD register, since we simply - * can't write something sensible into that register. - */ - /* - * See if we can validate the TOD programmable register with its - * old contents (should be zero) otherwise set it to zero. - */ - if (!mci.pr) - asm volatile( - " sr 0,0\n" - " sckpf" - : : : "0", "cc"); - else - asm volatile( - " l 0,%0\n" - " sckpf" - : : "Q" (S390_lowcore.tod_progreg_save_area) - : "0", "cc"); - /* Validate clock comparator register */ - set_clock_comparator(S390_lowcore.clock_comparator); /* Check if old PSW is valid */ - if (!mci.wp) + if (!mci.wp) { /* * Can't tell if we come from user or kernel mode * -> stopping machine. */ s390_handle_damage(); + } + /* Check for invalid kernel instruction address */ + if (!mci.ia && !umode) { + /* + * The instruction address got lost while running + * in the kernel -> stopping machine. + */ + s390_handle_damage(); + } if (!mci.ms || !mci.pm || !mci.ia) kill_task = 1; return kill_task; } +NOKPROBE_SYMBOL(s390_check_registers); /* * Backup the guest's machine check info to its description block @@ -300,6 +321,7 @@ static void notrace s390_backup_mcck_info(struct pt_regs *regs) mcck_backup->failing_storage_address = S390_lowcore.failing_storage_address; } +NOKPROBE_SYMBOL(s390_backup_mcck_info); #define MAX_IPD_COUNT 29 #define MAX_IPD_TIME (5 * 60 * USEC_PER_SEC) /* 5 minutes */ @@ -372,7 +394,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs) s390_handle_damage(); } } - if (s390_validate_registers(mci, user_mode(regs))) { + if (s390_check_registers(mci, user_mode(regs))) { /* * Couldn't restore all register contents for the * user space process -> mark task for termination. @@ -443,6 +465,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs) clear_cpu_flag(CIF_MCCK_GUEST); nmi_exit(); } +NOKPROBE_SYMBOL(s390_do_machine_check); static int __init machine_check_init(void) { diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c index 08bfa17ba0a0..94f90cefbffc 100644 --- a/arch/s390/kernel/perf_cpum_cf_events.c +++ b/arch/s390/kernel/perf_cpum_cf_events.c @@ -10,34 +10,42 @@ /* BEGIN: CPUM_CF COUNTER DEFINITIONS =================================== */ -CPUMF_EVENT_ATTR(cf, CPU_CYCLES, 0x0000); -CPUMF_EVENT_ATTR(cf, INSTRUCTIONS, 0x0001); -CPUMF_EVENT_ATTR(cf, L1I_DIR_WRITES, 0x0002); -CPUMF_EVENT_ATTR(cf, L1I_PENALTY_CYCLES, 0x0003); -CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_CPU_CYCLES, 0x0020); -CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_INSTRUCTIONS, 0x0021); -CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1I_DIR_WRITES, 0x0022); -CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1I_PENALTY_CYCLES, 0x0023); -CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1D_DIR_WRITES, 0x0024); -CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1D_PENALTY_CYCLES, 0x0025); -CPUMF_EVENT_ATTR(cf, L1D_DIR_WRITES, 0x0004); -CPUMF_EVENT_ATTR(cf, L1D_PENALTY_CYCLES, 0x0005); -CPUMF_EVENT_ATTR(cf, PRNG_FUNCTIONS, 0x0040); -CPUMF_EVENT_ATTR(cf, PRNG_CYCLES, 0x0041); -CPUMF_EVENT_ATTR(cf, PRNG_BLOCKED_FUNCTIONS, 0x0042); -CPUMF_EVENT_ATTR(cf, PRNG_BLOCKED_CYCLES, 0x0043); -CPUMF_EVENT_ATTR(cf, SHA_FUNCTIONS, 0x0044); -CPUMF_EVENT_ATTR(cf, SHA_CYCLES, 0x0045); -CPUMF_EVENT_ATTR(cf, SHA_BLOCKED_FUNCTIONS, 0x0046); -CPUMF_EVENT_ATTR(cf, SHA_BLOCKED_CYCLES, 0x0047); -CPUMF_EVENT_ATTR(cf, DEA_FUNCTIONS, 0x0048); -CPUMF_EVENT_ATTR(cf, DEA_CYCLES, 0x0049); -CPUMF_EVENT_ATTR(cf, DEA_BLOCKED_FUNCTIONS, 0x004a); -CPUMF_EVENT_ATTR(cf, DEA_BLOCKED_CYCLES, 0x004b); -CPUMF_EVENT_ATTR(cf, AES_FUNCTIONS, 0x004c); -CPUMF_EVENT_ATTR(cf, AES_CYCLES, 0x004d); -CPUMF_EVENT_ATTR(cf, AES_BLOCKED_FUNCTIONS, 0x004e); -CPUMF_EVENT_ATTR(cf, AES_BLOCKED_CYCLES, 0x004f); +CPUMF_EVENT_ATTR(cf_fvn1, CPU_CYCLES, 0x0000); +CPUMF_EVENT_ATTR(cf_fvn1, INSTRUCTIONS, 0x0001); +CPUMF_EVENT_ATTR(cf_fvn1, L1I_DIR_WRITES, 0x0002); +CPUMF_EVENT_ATTR(cf_fvn1, L1I_PENALTY_CYCLES, 0x0003); +CPUMF_EVENT_ATTR(cf_fvn1, PROBLEM_STATE_CPU_CYCLES, 0x0020); +CPUMF_EVENT_ATTR(cf_fvn1, PROBLEM_STATE_INSTRUCTIONS, 0x0021); +CPUMF_EVENT_ATTR(cf_fvn1, PROBLEM_STATE_L1I_DIR_WRITES, 0x0022); +CPUMF_EVENT_ATTR(cf_fvn1, PROBLEM_STATE_L1I_PENALTY_CYCLES, 0x0023); +CPUMF_EVENT_ATTR(cf_fvn1, PROBLEM_STATE_L1D_DIR_WRITES, 0x0024); +CPUMF_EVENT_ATTR(cf_fvn1, PROBLEM_STATE_L1D_PENALTY_CYCLES, 0x0025); +CPUMF_EVENT_ATTR(cf_fvn1, L1D_DIR_WRITES, 0x0004); +CPUMF_EVENT_ATTR(cf_fvn1, L1D_PENALTY_CYCLES, 0x0005); +CPUMF_EVENT_ATTR(cf_fvn3, CPU_CYCLES, 0x0000); +CPUMF_EVENT_ATTR(cf_fvn3, INSTRUCTIONS, 0x0001); +CPUMF_EVENT_ATTR(cf_fvn3, L1I_DIR_WRITES, 0x0002); +CPUMF_EVENT_ATTR(cf_fvn3, L1I_PENALTY_CYCLES, 0x0003); +CPUMF_EVENT_ATTR(cf_fvn3, PROBLEM_STATE_CPU_CYCLES, 0x0020); +CPUMF_EVENT_ATTR(cf_fvn3, PROBLEM_STATE_INSTRUCTIONS, 0x0021); +CPUMF_EVENT_ATTR(cf_fvn3, L1D_DIR_WRITES, 0x0004); +CPUMF_EVENT_ATTR(cf_fvn3, L1D_PENALTY_CYCLES, 0x0005); +CPUMF_EVENT_ATTR(cf_svn_generic, PRNG_FUNCTIONS, 0x0040); +CPUMF_EVENT_ATTR(cf_svn_generic, PRNG_CYCLES, 0x0041); +CPUMF_EVENT_ATTR(cf_svn_generic, PRNG_BLOCKED_FUNCTIONS, 0x0042); +CPUMF_EVENT_ATTR(cf_svn_generic, PRNG_BLOCKED_CYCLES, 0x0043); +CPUMF_EVENT_ATTR(cf_svn_generic, SHA_FUNCTIONS, 0x0044); +CPUMF_EVENT_ATTR(cf_svn_generic, SHA_CYCLES, 0x0045); +CPUMF_EVENT_ATTR(cf_svn_generic, SHA_BLOCKED_FUNCTIONS, 0x0046); +CPUMF_EVENT_ATTR(cf_svn_generic, SHA_BLOCKED_CYCLES, 0x0047); +CPUMF_EVENT_ATTR(cf_svn_generic, DEA_FUNCTIONS, 0x0048); +CPUMF_EVENT_ATTR(cf_svn_generic, DEA_CYCLES, 0x0049); +CPUMF_EVENT_ATTR(cf_svn_generic, DEA_BLOCKED_FUNCTIONS, 0x004a); +CPUMF_EVENT_ATTR(cf_svn_generic, DEA_BLOCKED_CYCLES, 0x004b); +CPUMF_EVENT_ATTR(cf_svn_generic, AES_FUNCTIONS, 0x004c); +CPUMF_EVENT_ATTR(cf_svn_generic, AES_CYCLES, 0x004d); +CPUMF_EVENT_ATTR(cf_svn_generic, AES_BLOCKED_FUNCTIONS, 0x004e); +CPUMF_EVENT_ATTR(cf_svn_generic, AES_BLOCKED_CYCLES, 0x004f); CPUMF_EVENT_ATTR(cf_z10, L1I_L2_SOURCED_WRITES, 0x0080); CPUMF_EVENT_ATTR(cf_z10, L1D_L2_SOURCED_WRITES, 0x0081); CPUMF_EVENT_ATTR(cf_z10, L1I_L3_LOCAL_WRITES, 0x0082); @@ -171,36 +179,105 @@ CPUMF_EVENT_ATTR(cf_z13, TX_C_TABORT_NO_SPECIAL, 0x00db); CPUMF_EVENT_ATTR(cf_z13, TX_C_TABORT_SPECIAL, 0x00dc); CPUMF_EVENT_ATTR(cf_z13, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); CPUMF_EVENT_ATTR(cf_z13, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); +CPUMF_EVENT_ATTR(cf_z14, L1D_WRITES_RO_EXCL, 0x0080); +CPUMF_EVENT_ATTR(cf_z14, DTLB2_WRITES, 0x0081); +CPUMF_EVENT_ATTR(cf_z14, DTLB2_MISSES, 0x0082); +CPUMF_EVENT_ATTR(cf_z14, DTLB2_HPAGE_WRITES, 0x0083); +CPUMF_EVENT_ATTR(cf_z14, DTLB2_GPAGE_WRITES, 0x0084); +CPUMF_EVENT_ATTR(cf_z14, L1D_L2D_SOURCED_WRITES, 0x0085); +CPUMF_EVENT_ATTR(cf_z14, ITLB2_WRITES, 0x0086); +CPUMF_EVENT_ATTR(cf_z14, ITLB2_MISSES, 0x0087); +CPUMF_EVENT_ATTR(cf_z14, L1I_L2I_SOURCED_WRITES, 0x0088); +CPUMF_EVENT_ATTR(cf_z14, TLB2_PTE_WRITES, 0x0089); +CPUMF_EVENT_ATTR(cf_z14, TLB2_CRSTE_WRITES, 0x008a); +CPUMF_EVENT_ATTR(cf_z14, TLB2_ENGINES_BUSY, 0x008b); +CPUMF_EVENT_ATTR(cf_z14, TX_C_TEND, 0x008c); +CPUMF_EVENT_ATTR(cf_z14, TX_NC_TEND, 0x008d); +CPUMF_EVENT_ATTR(cf_z14, L1C_TLB2_MISSES, 0x008f); +CPUMF_EVENT_ATTR(cf_z14, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090); +CPUMF_EVENT_ATTR(cf_z14, L1D_ONCHIP_MEMORY_SOURCED_WRITES, 0x0091); +CPUMF_EVENT_ATTR(cf_z14, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0092); +CPUMF_EVENT_ATTR(cf_z14, L1D_ONCLUSTER_L3_SOURCED_WRITES, 0x0093); +CPUMF_EVENT_ATTR(cf_z14, L1D_ONCLUSTER_MEMORY_SOURCED_WRITES, 0x0094); +CPUMF_EVENT_ATTR(cf_z14, L1D_ONCLUSTER_L3_SOURCED_WRITES_IV, 0x0095); +CPUMF_EVENT_ATTR(cf_z14, L1D_OFFCLUSTER_L3_SOURCED_WRITES, 0x0096); +CPUMF_EVENT_ATTR(cf_z14, L1D_OFFCLUSTER_MEMORY_SOURCED_WRITES, 0x0097); +CPUMF_EVENT_ATTR(cf_z14, L1D_OFFCLUSTER_L3_SOURCED_WRITES_IV, 0x0098); +CPUMF_EVENT_ATTR(cf_z14, L1D_OFFDRAWER_L3_SOURCED_WRITES, 0x0099); +CPUMF_EVENT_ATTR(cf_z14, L1D_OFFDRAWER_MEMORY_SOURCED_WRITES, 0x009a); +CPUMF_EVENT_ATTR(cf_z14, L1D_OFFDRAWER_L3_SOURCED_WRITES_IV, 0x009b); +CPUMF_EVENT_ATTR(cf_z14, L1D_ONDRAWER_L4_SOURCED_WRITES, 0x009c); +CPUMF_EVENT_ATTR(cf_z14, L1D_OFFDRAWER_L4_SOURCED_WRITES, 0x009d); +CPUMF_EVENT_ATTR(cf_z14, L1D_ONCHIP_L3_SOURCED_WRITES_RO, 0x009e); +CPUMF_EVENT_ATTR(cf_z14, L1I_ONCHIP_L3_SOURCED_WRITES, 0x00a2); +CPUMF_EVENT_ATTR(cf_z14, L1I_ONCHIP_MEMORY_SOURCED_WRITES, 0x00a3); +CPUMF_EVENT_ATTR(cf_z14, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x00a4); +CPUMF_EVENT_ATTR(cf_z14, L1I_ONCLUSTER_L3_SOURCED_WRITES, 0x00a5); +CPUMF_EVENT_ATTR(cf_z14, L1I_ONCLUSTER_MEMORY_SOURCED_WRITES, 0x00a6); +CPUMF_EVENT_ATTR(cf_z14, L1I_ONCLUSTER_L3_SOURCED_WRITES_IV, 0x00a7); +CPUMF_EVENT_ATTR(cf_z14, L1I_OFFCLUSTER_L3_SOURCED_WRITES, 0x00a8); +CPUMF_EVENT_ATTR(cf_z14, L1I_OFFCLUSTER_MEMORY_SOURCED_WRITES, 0x00a9); +CPUMF_EVENT_ATTR(cf_z14, L1I_OFFCLUSTER_L3_SOURCED_WRITES_IV, 0x00aa); +CPUMF_EVENT_ATTR(cf_z14, L1I_OFFDRAWER_L3_SOURCED_WRITES, 0x00ab); +CPUMF_EVENT_ATTR(cf_z14, L1I_OFFDRAWER_MEMORY_SOURCED_WRITES, 0x00ac); +CPUMF_EVENT_ATTR(cf_z14, L1I_OFFDRAWER_L3_SOURCED_WRITES_IV, 0x00ad); +CPUMF_EVENT_ATTR(cf_z14, L1I_ONDRAWER_L4_SOURCED_WRITES, 0x00ae); +CPUMF_EVENT_ATTR(cf_z14, L1I_OFFDRAWER_L4_SOURCED_WRITES, 0x00af); +CPUMF_EVENT_ATTR(cf_z14, BCD_DFP_EXECUTION_SLOTS, 0x00e0); +CPUMF_EVENT_ATTR(cf_z14, VX_BCD_EXECUTION_SLOTS, 0x00e1); +CPUMF_EVENT_ATTR(cf_z14, DECIMAL_INSTRUCTIONS, 0x00e2); +CPUMF_EVENT_ATTR(cf_z14, LAST_HOST_TRANSLATIONS, 0x00e9); +CPUMF_EVENT_ATTR(cf_z14, TX_NC_TABORT, 0x00f3); +CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_NO_SPECIAL, 0x00f4); +CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_SPECIAL, 0x00f5); +CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); +CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); -static struct attribute *cpumcf_pmu_event_attr[] __initdata = { - CPUMF_EVENT_PTR(cf, CPU_CYCLES), - CPUMF_EVENT_PTR(cf, INSTRUCTIONS), - CPUMF_EVENT_PTR(cf, L1I_DIR_WRITES), - CPUMF_EVENT_PTR(cf, L1I_PENALTY_CYCLES), - CPUMF_EVENT_PTR(cf, PROBLEM_STATE_CPU_CYCLES), - CPUMF_EVENT_PTR(cf, PROBLEM_STATE_INSTRUCTIONS), - CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1I_DIR_WRITES), - CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1I_PENALTY_CYCLES), - CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1D_DIR_WRITES), - CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1D_PENALTY_CYCLES), - CPUMF_EVENT_PTR(cf, L1D_DIR_WRITES), - CPUMF_EVENT_PTR(cf, L1D_PENALTY_CYCLES), - CPUMF_EVENT_PTR(cf, PRNG_FUNCTIONS), - CPUMF_EVENT_PTR(cf, PRNG_CYCLES), - CPUMF_EVENT_PTR(cf, PRNG_BLOCKED_FUNCTIONS), - CPUMF_EVENT_PTR(cf, PRNG_BLOCKED_CYCLES), - CPUMF_EVENT_PTR(cf, SHA_FUNCTIONS), - CPUMF_EVENT_PTR(cf, SHA_CYCLES), - CPUMF_EVENT_PTR(cf, SHA_BLOCKED_FUNCTIONS), - CPUMF_EVENT_PTR(cf, SHA_BLOCKED_CYCLES), - CPUMF_EVENT_PTR(cf, DEA_FUNCTIONS), - CPUMF_EVENT_PTR(cf, DEA_CYCLES), - CPUMF_EVENT_PTR(cf, DEA_BLOCKED_FUNCTIONS), - CPUMF_EVENT_PTR(cf, DEA_BLOCKED_CYCLES), - CPUMF_EVENT_PTR(cf, AES_FUNCTIONS), - CPUMF_EVENT_PTR(cf, AES_CYCLES), - CPUMF_EVENT_PTR(cf, AES_BLOCKED_FUNCTIONS), - CPUMF_EVENT_PTR(cf, AES_BLOCKED_CYCLES), +static struct attribute *cpumcf_fvn1_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_fvn1, CPU_CYCLES), + CPUMF_EVENT_PTR(cf_fvn1, INSTRUCTIONS), + CPUMF_EVENT_PTR(cf_fvn1, L1I_DIR_WRITES), + CPUMF_EVENT_PTR(cf_fvn1, L1I_PENALTY_CYCLES), + CPUMF_EVENT_PTR(cf_fvn1, PROBLEM_STATE_CPU_CYCLES), + CPUMF_EVENT_PTR(cf_fvn1, PROBLEM_STATE_INSTRUCTIONS), + CPUMF_EVENT_PTR(cf_fvn1, PROBLEM_STATE_L1I_DIR_WRITES), + CPUMF_EVENT_PTR(cf_fvn1, PROBLEM_STATE_L1I_PENALTY_CYCLES), + CPUMF_EVENT_PTR(cf_fvn1, PROBLEM_STATE_L1D_DIR_WRITES), + CPUMF_EVENT_PTR(cf_fvn1, PROBLEM_STATE_L1D_PENALTY_CYCLES), + CPUMF_EVENT_PTR(cf_fvn1, L1D_DIR_WRITES), + CPUMF_EVENT_PTR(cf_fvn1, L1D_PENALTY_CYCLES), + NULL, +}; + +static struct attribute *cpumcf_fvn3_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_fvn3, CPU_CYCLES), + CPUMF_EVENT_PTR(cf_fvn3, INSTRUCTIONS), + CPUMF_EVENT_PTR(cf_fvn3, L1I_DIR_WRITES), + CPUMF_EVENT_PTR(cf_fvn3, L1I_PENALTY_CYCLES), + CPUMF_EVENT_PTR(cf_fvn3, PROBLEM_STATE_CPU_CYCLES), + CPUMF_EVENT_PTR(cf_fvn3, PROBLEM_STATE_INSTRUCTIONS), + CPUMF_EVENT_PTR(cf_fvn3, L1D_DIR_WRITES), + CPUMF_EVENT_PTR(cf_fvn3, L1D_PENALTY_CYCLES), + NULL, +}; + +static struct attribute *cpumcf_svn_generic_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_svn_generic, PRNG_FUNCTIONS), + CPUMF_EVENT_PTR(cf_svn_generic, PRNG_CYCLES), + CPUMF_EVENT_PTR(cf_svn_generic, PRNG_BLOCKED_FUNCTIONS), + CPUMF_EVENT_PTR(cf_svn_generic, PRNG_BLOCKED_CYCLES), + CPUMF_EVENT_PTR(cf_svn_generic, SHA_FUNCTIONS), + CPUMF_EVENT_PTR(cf_svn_generic, SHA_CYCLES), + CPUMF_EVENT_PTR(cf_svn_generic, SHA_BLOCKED_FUNCTIONS), + CPUMF_EVENT_PTR(cf_svn_generic, SHA_BLOCKED_CYCLES), + CPUMF_EVENT_PTR(cf_svn_generic, DEA_FUNCTIONS), + CPUMF_EVENT_PTR(cf_svn_generic, DEA_CYCLES), + CPUMF_EVENT_PTR(cf_svn_generic, DEA_BLOCKED_FUNCTIONS), + CPUMF_EVENT_PTR(cf_svn_generic, DEA_BLOCKED_CYCLES), + CPUMF_EVENT_PTR(cf_svn_generic, AES_FUNCTIONS), + CPUMF_EVENT_PTR(cf_svn_generic, AES_CYCLES), + CPUMF_EVENT_PTR(cf_svn_generic, AES_BLOCKED_FUNCTIONS), + CPUMF_EVENT_PTR(cf_svn_generic, AES_BLOCKED_CYCLES), NULL, }; @@ -353,6 +430,63 @@ static struct attribute *cpumcf_z13_pmu_event_attr[] __initdata = { NULL, }; +static struct attribute *cpumcf_z14_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_z14, L1D_WRITES_RO_EXCL), + CPUMF_EVENT_PTR(cf_z14, DTLB2_WRITES), + CPUMF_EVENT_PTR(cf_z14, DTLB2_MISSES), + CPUMF_EVENT_PTR(cf_z14, DTLB2_HPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z14, DTLB2_GPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z14, L1D_L2D_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, ITLB2_WRITES), + CPUMF_EVENT_PTR(cf_z14, ITLB2_MISSES), + CPUMF_EVENT_PTR(cf_z14, L1I_L2I_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, TLB2_PTE_WRITES), + CPUMF_EVENT_PTR(cf_z14, TLB2_CRSTE_WRITES), + CPUMF_EVENT_PTR(cf_z14, TLB2_ENGINES_BUSY), + CPUMF_EVENT_PTR(cf_z14, TX_C_TEND), + CPUMF_EVENT_PTR(cf_z14, TX_NC_TEND), + CPUMF_EVENT_PTR(cf_z14, L1C_TLB2_MISSES), + CPUMF_EVENT_PTR(cf_z14, L1D_ONCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, L1D_ONCHIP_MEMORY_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, L1D_ONCHIP_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z14, L1D_ONCLUSTER_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, L1D_ONCLUSTER_MEMORY_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, L1D_ONCLUSTER_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z14, L1D_OFFCLUSTER_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, L1D_OFFCLUSTER_MEMORY_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, L1D_OFFCLUSTER_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z14, L1D_OFFDRAWER_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, L1D_OFFDRAWER_MEMORY_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, L1D_OFFDRAWER_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z14, L1D_ONDRAWER_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, L1D_OFFDRAWER_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, L1D_ONCHIP_L3_SOURCED_WRITES_RO), + CPUMF_EVENT_PTR(cf_z14, L1I_ONCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, L1I_ONCHIP_MEMORY_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, L1I_ONCHIP_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z14, L1I_ONCLUSTER_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, L1I_ONCLUSTER_MEMORY_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, L1I_ONCLUSTER_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z14, L1I_OFFCLUSTER_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, L1I_OFFCLUSTER_MEMORY_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, L1I_OFFCLUSTER_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z14, L1I_OFFDRAWER_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, L1I_OFFDRAWER_MEMORY_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, L1I_OFFDRAWER_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z14, L1I_ONDRAWER_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, L1I_OFFDRAWER_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z14, BCD_DFP_EXECUTION_SLOTS), + CPUMF_EVENT_PTR(cf_z14, VX_BCD_EXECUTION_SLOTS), + CPUMF_EVENT_PTR(cf_z14, DECIMAL_INSTRUCTIONS), + CPUMF_EVENT_PTR(cf_z14, LAST_HOST_TRANSLATIONS), + CPUMF_EVENT_PTR(cf_z14, TX_NC_TABORT), + CPUMF_EVENT_PTR(cf_z14, TX_C_TABORT_NO_SPECIAL), + CPUMF_EVENT_PTR(cf_z14, TX_C_TABORT_SPECIAL), + CPUMF_EVENT_PTR(cf_z14, MT_DIAG_CYCLES_ONE_THR_ACTIVE), + CPUMF_EVENT_PTR(cf_z14, MT_DIAG_CYCLES_TWO_THR_ACTIVE), + NULL, +}; + /* END: CPUM_CF COUNTER DEFINITIONS ===================================== */ static struct attribute_group cpumcf_pmu_events_group = { @@ -379,7 +513,8 @@ static const struct attribute_group *cpumcf_pmu_attr_groups[] = { static __init struct attribute **merge_attr(struct attribute **a, - struct attribute **b) + struct attribute **b, + struct attribute **c) { struct attribute **new; int j, i; @@ -388,6 +523,8 @@ static __init struct attribute **merge_attr(struct attribute **a, ; for (i = 0; b[i]; i++) j++; + for (i = 0; c[i]; i++) + j++; j++; new = kmalloc(sizeof(struct attribute *) * j, GFP_KERNEL); @@ -398,6 +535,8 @@ static __init struct attribute **merge_attr(struct attribute **a, new[j++] = a[i]; for (i = 0; b[i]; i++) new[j++] = b[i]; + for (i = 0; c[i]; i++) + new[j++] = c[i]; new[j] = NULL; return new; @@ -405,10 +544,26 @@ static __init struct attribute **merge_attr(struct attribute **a, __init const struct attribute_group **cpumf_cf_event_group(void) { - struct attribute **combined, **model; + struct attribute **combined, **model, **cfvn, **csvn; struct attribute *none[] = { NULL }; + struct cpumf_ctr_info ci; struct cpuid cpu_id; + /* Determine generic counters set(s) */ + qctri(&ci); + switch (ci.cfvn) { + case 1: + cfvn = cpumcf_fvn1_pmu_event_attr; + break; + case 3: + cfvn = cpumcf_fvn3_pmu_event_attr; + break; + default: + cfvn = none; + } + csvn = cpumcf_svn_generic_pmu_event_attr; + + /* Determine model-specific counter set(s) */ get_cpu_id(&cpu_id); switch (cpu_id.machine) { case 0x2097: @@ -427,12 +582,15 @@ __init const struct attribute_group **cpumf_cf_event_group(void) case 0x2965: model = cpumcf_z13_pmu_event_attr; break; + case 0x3906: + model = cpumcf_z14_pmu_event_attr; + break; default: model = none; break; } - combined = merge_attr(cpumcf_pmu_event_attr, model); + combined = merge_attr(cfvn, csvn, model); if (combined) cpumcf_pmu_events_group.attrs = combined; return cpumcf_pmu_attr_groups; diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 7e1e40323b78..bd4bbf61aaf3 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -823,12 +823,8 @@ static int cpumsf_pmu_event_init(struct perf_event *event) } /* Check online status of the CPU to which the event is pinned */ - if (event->cpu >= 0) { - if ((unsigned int)event->cpu >= nr_cpumask_bits) + if (event->cpu >= 0 && !cpu_online(event->cpu)) return -ENODEV; - if (!cpu_online(event->cpu)) - return -ENODEV; - } /* Force reset of idle/hv excludes regardless of what the * user requested. diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index a4a84fb08046..70576a2f69cf 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -44,27 +44,14 @@ asmlinkage void ret_from_fork(void) asm ("ret_from_fork"); extern void kernel_thread_starter(void); -/* - * Free current thread data structures etc.. - */ -void exit_thread(struct task_struct *tsk) -{ - if (tsk == current) { - exit_thread_runtime_instr(); - exit_thread_gs(); - } -} - void flush_thread(void) { } -void release_thread(struct task_struct *dead_task) -{ -} - void arch_release_task_struct(struct task_struct *tsk) { + runtime_instr_release(tsk); + guarded_storage_release(tsk); } int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) @@ -100,6 +87,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long new_stackp, memset(&p->thread.per_user, 0, sizeof(p->thread.per_user)); memset(&p->thread.per_event, 0, sizeof(p->thread.per_event)); clear_tsk_thread_flag(p, TIF_SINGLE_STEP); + p->thread.per_flags = 0; /* Initialize per thread user and system timer values */ p->thread.user_timer = 0; p->thread.guest_timer = 0; diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 1427d60ce628..26c0523c1488 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -31,6 +31,9 @@ #include <linux/uaccess.h> #include <asm/unistd.h> #include <asm/switch_to.h> +#include <asm/runtime_instr.h> +#include <asm/facility.h> + #include "entry.h" #ifdef CONFIG_COMPAT @@ -45,42 +48,42 @@ void update_cr_regs(struct task_struct *task) struct pt_regs *regs = task_pt_regs(task); struct thread_struct *thread = &task->thread; struct per_regs old, new; - unsigned long cr0_old, cr0_new; - unsigned long cr2_old, cr2_new; + union ctlreg0 cr0_old, cr0_new; + union ctlreg2 cr2_old, cr2_new; int cr0_changed, cr2_changed; - __ctl_store(cr0_old, 0, 0); - __ctl_store(cr2_old, 2, 2); + __ctl_store(cr0_old.val, 0, 0); + __ctl_store(cr2_old.val, 2, 2); cr0_new = cr0_old; cr2_new = cr2_old; /* Take care of the enable/disable of transactional execution. */ if (MACHINE_HAS_TE) { /* Set or clear transaction execution TXC bit 8. */ - cr0_new |= (1UL << 55); + cr0_new.tcx = 1; if (task->thread.per_flags & PER_FLAG_NO_TE) - cr0_new &= ~(1UL << 55); + cr0_new.tcx = 0; /* Set or clear transaction execution TDC bits 62 and 63. */ - cr2_new &= ~3UL; + cr2_new.tdc = 0; if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) { if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND) - cr2_new |= 1UL; + cr2_new.tdc = 1; else - cr2_new |= 2UL; + cr2_new.tdc = 2; } } /* Take care of enable/disable of guarded storage. */ if (MACHINE_HAS_GS) { - cr2_new &= ~(1UL << 4); + cr2_new.gse = 0; if (task->thread.gs_cb) - cr2_new |= (1UL << 4); + cr2_new.gse = 1; } /* Load control register 0/2 iff changed */ - cr0_changed = cr0_new != cr0_old; - cr2_changed = cr2_new != cr2_old; + cr0_changed = cr0_new.val != cr0_old.val; + cr2_changed = cr2_new.val != cr2_old.val; if (cr0_changed) - __ctl_load(cr0_new, 0, 0); + __ctl_load(cr0_new.val, 0, 0); if (cr2_changed) - __ctl_load(cr2_new, 2, 2); + __ctl_load(cr2_new.val, 2, 2); /* Copy user specified PER registers */ new.control = thread->per_user.control; new.start = thread->per_user.start; @@ -1172,26 +1175,37 @@ static int s390_gs_cb_set(struct task_struct *target, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { - struct gs_cb *data = target->thread.gs_cb; + struct gs_cb gs_cb = { }, *data = NULL; int rc; if (!MACHINE_HAS_GS) return -ENODEV; - if (!data) { + if (!target->thread.gs_cb) { data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; - data->gsd = 25; - target->thread.gs_cb = data; - if (target == current) - __ctl_set_bit(2, 4); - } else if (target == current) { - save_gs_cb(data); } + if (!target->thread.gs_cb) + gs_cb.gsd = 25; + else if (target == current) + save_gs_cb(&gs_cb); + else + gs_cb = *target->thread.gs_cb; rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - data, 0, sizeof(struct gs_cb)); - if (target == current) - restore_gs_cb(data); + &gs_cb, 0, sizeof(gs_cb)); + if (rc) { + kfree(data); + return -EFAULT; + } + preempt_disable(); + if (!target->thread.gs_cb) + target->thread.gs_cb = data; + *target->thread.gs_cb = gs_cb; + if (target == current) { + __ctl_set_bit(2, 4); + restore_gs_cb(target->thread.gs_cb); + } + preempt_enable(); return rc; } @@ -1229,6 +1243,96 @@ static int s390_gs_bc_set(struct task_struct *target, data, 0, sizeof(struct gs_cb)); } +static bool is_ri_cb_valid(struct runtime_instr_cb *cb) +{ + return (cb->rca & 0x1f) == 0 && + (cb->roa & 0xfff) == 0 && + (cb->rla & 0xfff) == 0xfff && + cb->s == 1 && + cb->k == 1 && + cb->h == 0 && + cb->reserved1 == 0 && + cb->ps == 1 && + cb->qs == 0 && + cb->pc == 1 && + cb->qc == 0 && + cb->reserved2 == 0 && + cb->key == PAGE_DEFAULT_KEY && + cb->reserved3 == 0 && + cb->reserved4 == 0 && + cb->reserved5 == 0 && + cb->reserved6 == 0 && + cb->reserved7 == 0 && + cb->reserved8 == 0 && + cb->rla >= cb->roa && + cb->rca >= cb->roa && + cb->rca <= cb->rla+1 && + cb->m < 3; +} + +static int s390_runtime_instr_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct runtime_instr_cb *data = target->thread.ri_cb; + + if (!test_facility(64)) + return -ENODEV; + if (!data) + return -ENODATA; + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(struct runtime_instr_cb)); +} + +static int s390_runtime_instr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct runtime_instr_cb ri_cb = { }, *data = NULL; + int rc; + + if (!test_facility(64)) + return -ENODEV; + + if (!target->thread.ri_cb) { + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + } + + if (target->thread.ri_cb) { + if (target == current) + store_runtime_instr_cb(&ri_cb); + else + ri_cb = *target->thread.ri_cb; + } + + rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &ri_cb, 0, sizeof(struct runtime_instr_cb)); + if (rc) { + kfree(data); + return -EFAULT; + } + + if (!is_ri_cb_valid(&ri_cb)) { + kfree(data); + return -EINVAL; + } + + preempt_disable(); + if (!target->thread.ri_cb) + target->thread.ri_cb = data; + *target->thread.ri_cb = ri_cb; + if (target == current) + load_runtime_instr_cb(target->thread.ri_cb); + preempt_enable(); + + return 0; +} + static const struct user_regset s390_regsets[] = { { .core_note_type = NT_PRSTATUS, @@ -1302,6 +1406,14 @@ static const struct user_regset s390_regsets[] = { .get = s390_gs_bc_get, .set = s390_gs_bc_set, }, + { + .core_note_type = NT_S390_RI_CB, + .n = sizeof(struct runtime_instr_cb) / sizeof(__u64), + .size = sizeof(__u64), + .align = sizeof(__u64), + .get = s390_runtime_instr_get, + .set = s390_runtime_instr_set, + }, }; static const struct user_regset_view user_s390_view = { @@ -1538,6 +1650,14 @@ static const struct user_regset s390_compat_regsets[] = { .get = s390_gs_cb_get, .set = s390_gs_cb_set, }, + { + .core_note_type = NT_S390_RI_CB, + .n = sizeof(struct runtime_instr_cb) / sizeof(__u64), + .size = sizeof(__u64), + .align = sizeof(__u64), + .get = s390_runtime_instr_get, + .set = s390_runtime_instr_set, + }, }; static const struct user_regset_view user_s390_compat_view = { diff --git a/arch/s390/kernel/relocate_kernel.S b/arch/s390/kernel/relocate_kernel.S index ca37e5d5b40c..9c2c96da23d0 100644 --- a/arch/s390/kernel/relocate_kernel.S +++ b/arch/s390/kernel/relocate_kernel.S @@ -29,7 +29,6 @@ ENTRY(relocate_kernel) basr %r13,0 # base address .base: - stnsm sys_msk-.base(%r13),0xfb # disable DAT stctg %c0,%c15,ctlregs-.base(%r13) stmg %r0,%r15,gprregs-.base(%r13) lghi %r0,3 @@ -103,8 +102,6 @@ ENTRY(relocate_kernel) .align 8 load_psw: .long 0x00080000,0x80000000 - sys_msk: - .quad 0 ctlregs: .rept 16 .quad 0 diff --git a/arch/s390/kernel/runtime_instr.c b/arch/s390/kernel/runtime_instr.c index 32aefb215e59..09f5bf0d5c0c 100644 --- a/arch/s390/kernel/runtime_instr.c +++ b/arch/s390/kernel/runtime_instr.c @@ -21,11 +21,24 @@ /* empty control block to disable RI by loading it */ struct runtime_instr_cb runtime_instr_empty_cb; +void runtime_instr_release(struct task_struct *tsk) +{ + kfree(tsk->thread.ri_cb); +} + static void disable_runtime_instr(void) { - struct pt_regs *regs = task_pt_regs(current); + struct task_struct *task = current; + struct pt_regs *regs; + if (!task->thread.ri_cb) + return; + regs = task_pt_regs(task); + preempt_disable(); load_runtime_instr_cb(&runtime_instr_empty_cb); + kfree(task->thread.ri_cb); + task->thread.ri_cb = NULL; + preempt_enable(); /* * Make sure the RI bit is deleted from the PSW. If the user did not @@ -37,24 +50,13 @@ static void disable_runtime_instr(void) static void init_runtime_instr_cb(struct runtime_instr_cb *cb) { - cb->buf_limit = 0xfff; - cb->pstate = 1; - cb->pstate_set_buf = 1; - cb->pstate_sample = 1; - cb->pstate_collect = 1; + cb->rla = 0xfff; + cb->s = 1; + cb->k = 1; + cb->ps = 1; + cb->pc = 1; cb->key = PAGE_DEFAULT_KEY; - cb->valid = 1; -} - -void exit_thread_runtime_instr(void) -{ - struct task_struct *task = current; - - if (!task->thread.ri_cb) - return; - disable_runtime_instr(); - kfree(task->thread.ri_cb); - task->thread.ri_cb = NULL; + cb->v = 1; } SYSCALL_DEFINE1(s390_runtime_instr, int, command) @@ -65,9 +67,7 @@ SYSCALL_DEFINE1(s390_runtime_instr, int, command) return -EOPNOTSUPP; if (command == S390_RUNTIME_INSTR_STOP) { - preempt_disable(); - exit_thread_runtime_instr(); - preempt_enable(); + disable_runtime_instr(); return 0; } diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 164a1e16b53e..090053cf279b 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -55,17 +55,18 @@ #include <asm/mmu_context.h> #include <asm/cpcmd.h> #include <asm/lowcore.h> +#include <asm/nmi.h> #include <asm/irq.h> #include <asm/page.h> #include <asm/ptrace.h> #include <asm/sections.h> #include <asm/ebcdic.h> -#include <asm/kvm_virtio.h> #include <asm/diag.h> #include <asm/os_info.h> #include <asm/sclp.h> #include <asm/sysinfo.h> #include <asm/numa.h> +#include <asm/alternative.h> #include "entry.h" /* @@ -339,16 +340,8 @@ static void __init setup_lowcore(void) lc->stfl_fac_list = S390_lowcore.stfl_fac_list; memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, MAX_FACILITY_BIT/8); - if (MACHINE_HAS_VX || MACHINE_HAS_GS) { - unsigned long bits, size; - - bits = MACHINE_HAS_GS ? 11 : 10; - size = 1UL << bits; - lc->mcesad = (__u64) memblock_virt_alloc(size, size); - if (MACHINE_HAS_GS) - lc->mcesad |= bits; - } - lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0]; + nmi_alloc_boot_cpu(lc); + vdso_alloc_boot_cpu(lc); lc->sync_enter_timer = S390_lowcore.sync_enter_timer; lc->async_enter_timer = S390_lowcore.async_enter_timer; lc->exit_timer = S390_lowcore.exit_timer; @@ -380,6 +373,8 @@ static void __init setup_lowcore(void) #ifdef CONFIG_SMP lc->spinlock_lockval = arch_spin_lockval(0); + lc->spinlock_index = 0; + arch_spin_lock_setup(0); #endif set_prefix((u32)(unsigned long) lc); @@ -764,7 +759,7 @@ static int __init setup_hwcaps(void) /* * Transactional execution support HWCAP_S390_TE is bit 10. */ - if (test_facility(50) && test_facility(73)) + if (MACHINE_HAS_TE) elf_hwcap |= HWCAP_S390_TE; /* @@ -955,6 +950,8 @@ void __init setup_arch(char **cmdline_p) conmode_default(); set_preferred_console(); + apply_alternative_instructions(); + /* Setup zfcpdump support */ setup_zfcpdump(); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 092c4154abd7..cd4334e80b64 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -37,6 +37,7 @@ #include <linux/sched/task_stack.h> #include <linux/crash_dump.h> #include <linux/memblock.h> +#include <linux/kprobes.h> #include <asm/asm-offsets.h> #include <asm/diag.h> #include <asm/switch_to.h> @@ -81,8 +82,6 @@ struct pcpu { static u8 boot_core_type; static struct pcpu pcpu_devices[NR_CPUS]; -static struct kmem_cache *pcpu_mcesa_cache; - unsigned int smp_cpu_mt_shift; EXPORT_SYMBOL(smp_cpu_mt_shift); @@ -193,10 +192,8 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit) static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) { unsigned long async_stack, panic_stack; - unsigned long mcesa_origin, mcesa_bits; struct lowcore *lc; - mcesa_origin = mcesa_bits = 0; if (pcpu != &pcpu_devices[0]) { pcpu->lowcore = (struct lowcore *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); @@ -204,39 +201,30 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) panic_stack = __get_free_page(GFP_KERNEL); if (!pcpu->lowcore || !panic_stack || !async_stack) goto out; - if (MACHINE_HAS_VX || MACHINE_HAS_GS) { - mcesa_origin = (unsigned long) - kmem_cache_alloc(pcpu_mcesa_cache, GFP_KERNEL); - if (!mcesa_origin) - goto out; - /* The pointer is stored with mcesa_bits ORed in */ - kmemleak_not_leak((void *) mcesa_origin); - mcesa_bits = MACHINE_HAS_GS ? 11 : 0; - } } else { async_stack = pcpu->lowcore->async_stack - ASYNC_FRAME_OFFSET; panic_stack = pcpu->lowcore->panic_stack - PANIC_FRAME_OFFSET; - mcesa_origin = pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK; - mcesa_bits = pcpu->lowcore->mcesad & MCESA_LC_MASK; } lc = pcpu->lowcore; memcpy(lc, &S390_lowcore, 512); memset((char *) lc + 512, 0, sizeof(*lc) - 512); lc->async_stack = async_stack + ASYNC_FRAME_OFFSET; lc->panic_stack = panic_stack + PANIC_FRAME_OFFSET; - lc->mcesad = mcesa_origin | mcesa_bits; lc->cpu_nr = cpu; lc->spinlock_lockval = arch_spin_lockval(cpu); - if (vdso_alloc_per_cpu(lc)) + lc->spinlock_index = 0; + if (nmi_alloc_per_cpu(lc)) goto out; + if (vdso_alloc_per_cpu(lc)) + goto out_mcesa; lowcore_ptr[cpu] = lc; pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, (u32)(unsigned long) lc); return 0; + +out_mcesa: + nmi_free_per_cpu(lc); out: if (pcpu != &pcpu_devices[0]) { - if (mcesa_origin) - kmem_cache_free(pcpu_mcesa_cache, - (void *) mcesa_origin); free_page(panic_stack); free_pages(async_stack, ASYNC_ORDER); free_pages((unsigned long) pcpu->lowcore, LC_ORDER); @@ -248,17 +236,12 @@ out: static void pcpu_free_lowcore(struct pcpu *pcpu) { - unsigned long mcesa_origin; - pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0); lowcore_ptr[pcpu - pcpu_devices] = NULL; vdso_free_per_cpu(pcpu->lowcore); + nmi_free_per_cpu(pcpu->lowcore); if (pcpu == &pcpu_devices[0]) return; - if (MACHINE_HAS_VX || MACHINE_HAS_GS) { - mcesa_origin = pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK; - kmem_cache_free(pcpu_mcesa_cache, (void *) mcesa_origin); - } free_page(pcpu->lowcore->panic_stack-PANIC_FRAME_OFFSET); free_pages(pcpu->lowcore->async_stack-ASYNC_FRAME_OFFSET, ASYNC_ORDER); free_pages((unsigned long) pcpu->lowcore, LC_ORDER); @@ -274,6 +257,7 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) cpumask_set_cpu(cpu, mm_cpumask(&init_mm)); lc->cpu_nr = cpu; lc->spinlock_lockval = arch_spin_lockval(cpu); + lc->spinlock_index = 0; lc->percpu_offset = __per_cpu_offset[cpu]; lc->kernel_asce = S390_lowcore.kernel_asce; lc->machine_flags = S390_lowcore.machine_flags; @@ -282,6 +266,7 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) save_access_regs((unsigned int *) lc->access_regs_save_area); memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, MAX_FACILITY_BIT/8); + arch_spin_lock_setup(cpu); } static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk) @@ -423,13 +408,17 @@ void smp_yield_cpu(int cpu) * Send cpus emergency shutdown signal. This gives the cpus the * opportunity to complete outstanding interrupts. */ -static void smp_emergency_stop(cpumask_t *cpumask) +void notrace smp_emergency_stop(void) { + cpumask_t cpumask; u64 end; int cpu; + cpumask_copy(&cpumask, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), &cpumask); + end = get_tod_clock() + (1000000UL << 12); - for_each_cpu(cpu, cpumask) { + for_each_cpu(cpu, &cpumask) { struct pcpu *pcpu = pcpu_devices + cpu; set_bit(ec_stop_cpu, &pcpu->ec_mask); while (__pcpu_sigp(pcpu->address, SIGP_EMERGENCY_SIGNAL, @@ -438,21 +427,21 @@ static void smp_emergency_stop(cpumask_t *cpumask) cpu_relax(); } while (get_tod_clock() < end) { - for_each_cpu(cpu, cpumask) + for_each_cpu(cpu, &cpumask) if (pcpu_stopped(pcpu_devices + cpu)) - cpumask_clear_cpu(cpu, cpumask); - if (cpumask_empty(cpumask)) + cpumask_clear_cpu(cpu, &cpumask); + if (cpumask_empty(&cpumask)) break; cpu_relax(); } } +NOKPROBE_SYMBOL(smp_emergency_stop); /* * Stop all cpus but the current one. */ void smp_send_stop(void) { - cpumask_t cpumask; int cpu; /* Disable all interrupts/machine checks */ @@ -460,17 +449,16 @@ void smp_send_stop(void) trace_hardirqs_off(); debug_set_critical(); - cpumask_copy(&cpumask, cpu_online_mask); - cpumask_clear_cpu(smp_processor_id(), &cpumask); if (oops_in_progress) - smp_emergency_stop(&cpumask); + smp_emergency_stop(); /* stop all processors */ - for_each_cpu(cpu, &cpumask) { - struct pcpu *pcpu = pcpu_devices + cpu; - pcpu_sigp_retry(pcpu, SIGP_STOP, 0); - while (!pcpu_stopped(pcpu)) + for_each_online_cpu(cpu) { + if (cpu == smp_processor_id()) + continue; + pcpu_sigp_retry(pcpu_devices + cpu, SIGP_STOP, 0); + while (!pcpu_stopped(pcpu_devices + cpu)) cpu_relax(); } } @@ -804,6 +792,8 @@ void __init smp_detect_cpus(void) */ static void smp_start_secondary(void *cpuvoid) { + int cpu = smp_processor_id(); + S390_lowcore.last_update_clock = get_tod_clock(); S390_lowcore.restart_stack = (unsigned long) restart_stack; S390_lowcore.restart_fn = (unsigned long) do_restart; @@ -817,8 +807,12 @@ static void smp_start_secondary(void *cpuvoid) init_cpu_timer(); vtime_init(); pfault_init(); - notify_cpu_starting(smp_processor_id()); - set_cpu_online(smp_processor_id(), true); + notify_cpu_starting(cpu); + if (topology_cpu_dedicated(cpu)) + set_cpu_flag(CIF_DEDICATED_CPU); + else + clear_cpu_flag(CIF_DEDICATED_CPU); + set_cpu_online(cpu, true); inc_irq_stat(CPU_RST); local_irq_enable(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); @@ -927,22 +921,12 @@ void __init smp_fill_possible_mask(void) void __init smp_prepare_cpus(unsigned int max_cpus) { - unsigned long size; - /* request the 0x1201 emergency signal external interrupt */ if (register_external_irq(EXT_IRQ_EMERGENCY_SIG, do_ext_call_interrupt)) panic("Couldn't request external interrupt 0x1201"); /* request the 0x1202 external call external interrupt */ if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt)) panic("Couldn't request external interrupt 0x1202"); - /* create slab cache for the machine-check-extended-save-areas */ - if (MACHINE_HAS_VX || MACHINE_HAS_GS) { - size = 1UL << (MACHINE_HAS_GS ? 11 : 10); - pcpu_mcesa_cache = kmem_cache_create("nmi_save_areas", - size, size, 0, NULL); - if (!pcpu_mcesa_cache) - panic("Couldn't create nmi save area cache"); - } } void __init smp_prepare_boot_cpu(void) @@ -965,6 +949,7 @@ void __init smp_setup_processor_id(void) pcpu_devices[0].address = stap(); S390_lowcore.cpu_nr = 0; S390_lowcore.spinlock_lockval = arch_spin_lockval(0); + S390_lowcore.spinlock_index = 0; } /* diff --git a/arch/s390/kvm/sthyi.c b/arch/s390/kernel/sthyi.c index 395926b8c1ed..12981e197f01 100644 --- a/arch/s390/kvm/sthyi.c +++ b/arch/s390/kernel/sthyi.c @@ -8,22 +8,19 @@ * Copyright IBM Corp. 2016 * Author(s): Janosch Frank <frankja@linux.vnet.ibm.com> */ -#include <linux/kvm_host.h> #include <linux/errno.h> #include <linux/pagemap.h> #include <linux/vmalloc.h> -#include <linux/ratelimit.h> - -#include <asm/kvm_host.h> +#include <linux/syscalls.h> +#include <linux/mutex.h> #include <asm/asm-offsets.h> #include <asm/sclp.h> #include <asm/diag.h> #include <asm/sysinfo.h> #include <asm/ebcdic.h> - -#include "kvm-s390.h" -#include "gaccess.h" -#include "trace.h" +#include <asm/facility.h> +#include <asm/sthyi.h> +#include "entry.h" #define DED_WEIGHT 0xffff /* @@ -144,6 +141,21 @@ struct lpar_cpu_inf { struct cpu_inf ifl; }; +/* + * STHYI requires extensive locking in the higher hypervisors + * and is very computational/memory expensive. Therefore we + * cache the retrieved data whose valid period is 1s. + */ +#define CACHE_VALID_JIFFIES HZ + +struct sthyi_info { + void *info; + unsigned long end; +}; + +static DEFINE_MUTEX(sthyi_mutex); +static struct sthyi_info sthyi_cache; + static inline u64 cpu_id(u8 ctidx, void *diag224_buf) { return *((u64 *)(diag224_buf + (ctidx + 1) * DIAG204_CPU_NAME_LEN)); @@ -382,88 +394,124 @@ out: vfree(diag204_buf); } -static int sthyi(u64 vaddr) +static int sthyi(u64 vaddr, u64 *rc) { register u64 code asm("0") = 0; register u64 addr asm("2") = vaddr; + register u64 rcode asm("3"); int cc; asm volatile( ".insn rre,0xB2560000,%[code],%[addr]\n" "ipm %[cc]\n" "srl %[cc],28\n" - : [cc] "=d" (cc) + : [cc] "=d" (cc), "=d" (rcode) : [code] "d" (code), [addr] "a" (addr) - : "3", "memory", "cc"); + : "memory", "cc"); + *rc = rcode; return cc; } -int handle_sthyi(struct kvm_vcpu *vcpu) +static int fill_dst(void *dst, u64 *rc) { - int reg1, reg2, r = 0; - u64 code, addr, cc = 0; - struct sthyi_sctns *sctns = NULL; - - if (!test_kvm_facility(vcpu->kvm, 74)) - return kvm_s390_inject_program_int(vcpu, PGM_OPERATION); + struct sthyi_sctns *sctns = (struct sthyi_sctns *)dst; /* - * STHYI requires extensive locking in the higher hypervisors - * and is very computational/memory expensive. Therefore we - * ratelimit the executions per VM. + * If the facility is on, we don't want to emulate the instruction. + * We ask the hypervisor to provide the data. */ - if (!__ratelimit(&vcpu->kvm->arch.sthyi_limit)) { - kvm_s390_retry_instr(vcpu); + if (test_facility(74)) + return sthyi((u64)dst, rc); + + fill_hdr(sctns); + fill_stsi(sctns); + fill_diag(sctns); + *rc = 0; + return 0; +} + +static int sthyi_init_cache(void) +{ + if (sthyi_cache.info) return 0; - } + sthyi_cache.info = (void *)get_zeroed_page(GFP_KERNEL); + if (!sthyi_cache.info) + return -ENOMEM; + sthyi_cache.end = jiffies - 1; /* expired */ + return 0; +} - kvm_s390_get_regs_rre(vcpu, ®1, ®2); - code = vcpu->run->s.regs.gprs[reg1]; - addr = vcpu->run->s.regs.gprs[reg2]; +static int sthyi_update_cache(u64 *rc) +{ + int r; - vcpu->stat.instruction_sthyi++; - VCPU_EVENT(vcpu, 3, "STHYI: fc: %llu addr: 0x%016llx", code, addr); - trace_kvm_s390_handle_sthyi(vcpu, code, addr); + memset(sthyi_cache.info, 0, PAGE_SIZE); + r = fill_dst(sthyi_cache.info, rc); + if (r) + return r; + sthyi_cache.end = jiffies + CACHE_VALID_JIFFIES; + return r; +} - if (reg1 == reg2 || reg1 & 1 || reg2 & 1) - return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); +/* + * sthyi_fill - Fill page with data returned by the STHYI instruction + * + * @dst: Pointer to zeroed page + * @rc: Pointer for storing the return code of the instruction + * + * Fills the destination with system information returned by the STHYI + * instruction. The data is generated by emulation or execution of STHYI, + * if available. The return value is the condition code that would be + * returned, the rc parameter is the return code which is passed in + * register R2 + 1. + */ +int sthyi_fill(void *dst, u64 *rc) +{ + int r; - if (code & 0xffff) { - cc = 3; + mutex_lock(&sthyi_mutex); + r = sthyi_init_cache(); + if (r) goto out; - } - if (addr & ~PAGE_MASK) - return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + if (time_is_before_jiffies(sthyi_cache.end)) { + /* cache expired */ + r = sthyi_update_cache(rc); + if (r) + goto out; + } + *rc = 0; + memcpy(dst, sthyi_cache.info, PAGE_SIZE); +out: + mutex_unlock(&sthyi_mutex); + return r; +} +EXPORT_SYMBOL_GPL(sthyi_fill); - sctns = (void *)get_zeroed_page(GFP_KERNEL); - if (!sctns) +SYSCALL_DEFINE4(s390_sthyi, unsigned long, function_code, void __user *, buffer, + u64 __user *, return_code, unsigned long, flags) +{ + u64 sthyi_rc; + void *info; + int r; + + if (flags) + return -EINVAL; + if (function_code != STHYI_FC_CP_IFL_CAP) + return -EOPNOTSUPP; + info = (void *)get_zeroed_page(GFP_KERNEL); + if (!info) return -ENOMEM; - - /* - * If we are a guest, we don't want to emulate an emulated - * instruction. We ask the hypervisor to provide the data. - */ - if (test_facility(74)) { - cc = sthyi((u64)sctns); + r = sthyi_fill(info, &sthyi_rc); + if (r < 0) + goto out; + if (return_code && put_user(sthyi_rc, return_code)) { + r = -EFAULT; goto out; } - - fill_hdr(sctns); - fill_stsi(sctns); - fill_diag(sctns); - + if (copy_to_user(buffer, info, PAGE_SIZE)) + r = -EFAULT; out: - if (!cc) { - r = write_guest(vcpu, addr, reg2, sctns, PAGE_SIZE); - if (r) { - free_page((unsigned long)sctns); - return kvm_s390_inject_prog_cond(vcpu, r); - } - } - - free_page((unsigned long)sctns); - vcpu->run->s.regs.gprs[reg2 + 1] = cc ? 4 : 0; - kvm_s390_set_psw_cc(vcpu, cc); + free_page((unsigned long)info); return r; } diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c index a8af9c825628..ce329c876d8c 100644 --- a/arch/s390/kernel/suspend.c +++ b/arch/s390/kernel/suspend.c @@ -153,7 +153,7 @@ int pfn_is_nosave(unsigned long pfn) { unsigned long nosave_begin_pfn = PFN_DOWN(__pa(&__nosave_begin)); unsigned long nosave_end_pfn = PFN_DOWN(__pa(&__nosave_end)); - unsigned long eshared_pfn = PFN_DOWN(__pa(&_eshared)) - 1; + unsigned long end_rodata_pfn = PFN_DOWN(__pa(&__end_rodata)) - 1; unsigned long stext_pfn = PFN_DOWN(__pa(&_stext)); /* Always save lowcore pages (LC protection might be enabled). */ @@ -161,9 +161,9 @@ int pfn_is_nosave(unsigned long pfn) return 0; if (pfn >= nosave_begin_pfn && pfn < nosave_end_pfn) return 1; - /* Skip memory holes and read-only pages (NSS, DCSS, ...). */ - if (pfn >= stext_pfn && pfn <= eshared_pfn) - return ipl_info.type == IPL_TYPE_NSS ? 1 : 0; + /* Skip memory holes and read-only pages (DCSS, ...). */ + if (pfn >= stext_pfn && pfn <= end_rodata_pfn) + return 0; if (tprot(PFN_PHYS(pfn))) return 1; return 0; diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index d39f121e67a9..308a7b63348b 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -389,3 +389,4 @@ SYSCALL(sys_preadv2,compat_sys_preadv2) SYSCALL(sys_pwritev2,compat_sys_pwritev2) SYSCALL(sys_s390_guarded_storage,compat_sys_s390_guarded_storage) /* 378 */ SYSCALL(sys_statx,compat_sys_statx) +SYSCALL(sys_s390_sthyi,compat_sys_s390_sthyi) diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index ed0bdd220e1a..f9b393d4a078 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -133,6 +133,7 @@ static void add_cpus_to_mask(struct topology_core *tl_core, topo->socket_id = socket->id; topo->core_id = rcore; topo->thread_id = lcpu + i; + topo->dedicated = tl_core->d; cpumask_set_cpu(lcpu + i, &drawer->mask); cpumask_set_cpu(lcpu + i, &book->mask); cpumask_set_cpu(lcpu + i, &socket->mask); @@ -273,6 +274,14 @@ void store_topology(struct sysinfo_15_1_x *info) stsi(info, 15, 1, topology_mnest_limit()); } +static void __arch_update_dedicated_flag(void *arg) +{ + if (topology_cpu_dedicated(smp_processor_id())) + set_cpu_flag(CIF_DEDICATED_CPU); + else + clear_cpu_flag(CIF_DEDICATED_CPU); +} + static int __arch_update_cpu_topology(void) { struct sysinfo_15_1_x *info = tl_info; @@ -298,6 +307,7 @@ int arch_update_cpu_topology(void) int cpu, rc; rc = __arch_update_cpu_topology(); + on_each_cpu(__arch_update_dedicated_flag, NULL, 0); for_each_online_cpu(cpu) { dev = get_cpu_device(cpu); kobject_uevent(&dev->kobj, KOBJ_CHANGE); @@ -320,15 +330,14 @@ static void topology_flush_work(void) flush_work(&topology_work); } -static void topology_timer_fn(unsigned long ignored) +static void topology_timer_fn(struct timer_list *unused) { if (ptf(PTF_CHECK)) topology_schedule_update(); set_topology_timer(); } -static struct timer_list topology_timer = - TIMER_DEFERRED_INITIALIZER(topology_timer_fn, 0, 0); +static struct timer_list topology_timer; static atomic_t topology_poll = ATOMIC_INIT(0); @@ -435,9 +444,39 @@ static struct attribute_group topology_cpu_attr_group = { .attrs = topology_cpu_attrs, }; +static ssize_t cpu_dedicated_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int cpu = dev->id; + ssize_t count; + + mutex_lock(&smp_cpu_state_mutex); + count = sprintf(buf, "%d\n", topology_cpu_dedicated(cpu)); + mutex_unlock(&smp_cpu_state_mutex); + return count; +} +static DEVICE_ATTR(dedicated, 0444, cpu_dedicated_show, NULL); + +static struct attribute *topology_extra_cpu_attrs[] = { + &dev_attr_dedicated.attr, + NULL, +}; + +static struct attribute_group topology_extra_cpu_attr_group = { + .attrs = topology_extra_cpu_attrs, +}; + int topology_cpu_init(struct cpu *cpu) { - return sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group); + int rc; + + rc = sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group); + if (rc || !MACHINE_HAS_TOPOLOGY) + return rc; + rc = sysfs_create_group(&cpu->dev.kobj, &topology_extra_cpu_attr_group); + if (rc) + sysfs_remove_group(&cpu->dev.kobj, &topology_cpu_attr_group); + return rc; } static const struct cpumask *cpu_thread_mask(int cpu) @@ -509,6 +548,7 @@ void __init topology_init_early(void) alloc_masks(info, &drawer_info, 3); out: __arch_update_cpu_topology(); + __arch_update_dedicated_flag(NULL); } static inline int topology_get_mode(int enabled) @@ -597,6 +637,7 @@ static struct ctl_table topology_dir_table[] = { static int __init topology_init(void) { + timer_setup(&topology_timer, topology_timer_fn, TIMER_DEFERRABLE); if (MACHINE_HAS_TOPOLOGY) set_topology_timer(); else diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index eacda05b45d7..0520854a4dab 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -140,6 +140,20 @@ static void __init vdso_init_data(struct vdso_data *vd) */ #define SEGMENT_ORDER 2 +/* + * The initial vdso_data structure for the boot CPU. Eventually + * it is replaced with a properly allocated structure in vdso_init. + * This is necessary because a valid S390_lowcore.vdso_per_cpu_data + * pointer is required to be able to return from an interrupt or + * program check. See the exit paths in entry.S. + */ +struct vdso_data boot_vdso_data __initdata; + +void __init vdso_alloc_boot_cpu(struct lowcore *lowcore) +{ + lowcore->vdso_per_cpu_data = (unsigned long) &boot_vdso_data; +} + int vdso_alloc_per_cpu(struct lowcore *lowcore) { unsigned long segment_table, page_table, page_frame; @@ -166,10 +180,8 @@ int vdso_alloc_per_cpu(struct lowcore *lowcore) vd->node_id = cpu_to_node(vd->cpu_nr); /* Set up access register mode page table */ - clear_table((unsigned long *) segment_table, _SEGMENT_ENTRY_EMPTY, - PAGE_SIZE << SEGMENT_ORDER); - clear_table((unsigned long *) page_table, _PAGE_INVALID, - 256*sizeof(unsigned long)); + memset64((u64 *)segment_table, _SEGMENT_ENTRY_EMPTY, _CRST_ENTRIES); + memset64((u64 *)page_table, _PAGE_INVALID, PTRS_PER_PTE); *(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table; *(unsigned long *) page_table = _PAGE_PROTECT + page_frame; diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 96a713a470e7..a049ff005f03 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -60,12 +60,7 @@ SECTIONS RO_DATA_SECTION(PAGE_SIZE) -#ifdef CONFIG_SHARED_KERNEL - . = ALIGN(0x100000); /* VM shared segments are 1MB aligned */ -#endif - . = ALIGN(PAGE_SIZE); - _eshared = .; /* End of shareable data */ _sdata = .; /* Start of data section */ . = ALIGN(PAGE_SIZE); @@ -105,6 +100,29 @@ SECTIONS EXIT_DATA } + /* + * struct alt_inst entries. From the header (alternative.h): + * "Alternative instructions for different CPU types or capabilities" + * Think locking instructions on spinlocks. + * Note, that it is a part of __init region. + */ + . = ALIGN(8); + .altinstructions : { + __alt_instructions = .; + *(.altinstructions) + __alt_instructions_end = .; + } + + /* + * And here are the replacement instructions. The linker sticks + * them as binary blobs. The .altinstructions has enough data to + * get the address and the length of them to patch the kernel safely. + * Note, that it is a part of __init region. + */ + .altinstr_replacement : { + *(.altinstr_replacement) + } + /* early.c uses stsi, which requires page aligned data. */ . = ALIGN(PAGE_SIZE); INIT_DATA_SECTION(0x100) diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile index 09a9e6dfc09f..6048b1c6e580 100644 --- a/arch/s390/kvm/Makefile +++ b/arch/s390/kvm/Makefile @@ -12,6 +12,6 @@ common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqch ccflags-y := -Ivirt/kvm -Iarch/s390/kvm kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o -kvm-objs += diag.o gaccess.o guestdbg.o sthyi.o vsie.o +kvm-objs += diag.o gaccess.o guestdbg.o vsie.o obj-$(CONFIG_KVM) += kvm.o diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index a4752bf6b526..8fe034beb623 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -18,6 +18,7 @@ #include <asm/kvm_host.h> #include <asm/asm-offsets.h> #include <asm/irq.h> +#include <asm/sysinfo.h> #include "kvm-s390.h" #include "gaccess.h" @@ -360,6 +361,61 @@ static int handle_partial_execution(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; } +/* + * Handle the sthyi instruction that provides the guest with system + * information, like current CPU resources available at each level of + * the machine. + */ +int handle_sthyi(struct kvm_vcpu *vcpu) +{ + int reg1, reg2, r = 0; + u64 code, addr, cc = 0, rc = 0; + struct sthyi_sctns *sctns = NULL; + + if (!test_kvm_facility(vcpu->kvm, 74)) + return kvm_s390_inject_program_int(vcpu, PGM_OPERATION); + + kvm_s390_get_regs_rre(vcpu, ®1, ®2); + code = vcpu->run->s.regs.gprs[reg1]; + addr = vcpu->run->s.regs.gprs[reg2]; + + vcpu->stat.instruction_sthyi++; + VCPU_EVENT(vcpu, 3, "STHYI: fc: %llu addr: 0x%016llx", code, addr); + trace_kvm_s390_handle_sthyi(vcpu, code, addr); + + if (reg1 == reg2 || reg1 & 1 || reg2 & 1) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + if (code & 0xffff) { + cc = 3; + rc = 4; + goto out; + } + + if (addr & ~PAGE_MASK) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + sctns = (void *)get_zeroed_page(GFP_KERNEL); + if (!sctns) + return -ENOMEM; + + cc = sthyi_fill(sctns, &rc); + +out: + if (!cc) { + r = write_guest(vcpu, addr, reg2, sctns, PAGE_SIZE); + if (r) { + free_page((unsigned long)sctns); + return kvm_s390_inject_prog_cond(vcpu, r); + } + } + + free_page((unsigned long)sctns); + vcpu->run->s.regs.gprs[reg2 + 1] = rc; + kvm_s390_set_psw_cc(vcpu, cc); + return r; +} + static int handle_operexc(struct kvm_vcpu *vcpu) { psw_t oldpsw, newpsw; diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index a832ad031cee..329b2843fee2 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -2483,11 +2483,11 @@ void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu, mci.val = mcck_info->mcic; if (mci.sr) - cr14 |= MCCK_CR14_RECOVERY_SUB_MASK; + cr14 |= CR14_RECOVERY_SUBMASK; if (mci.dg) - cr14 |= MCCK_CR14_DEGRAD_SUB_MASK; + cr14 |= CR14_DEGRADATION_SUBMASK; if (mci.w) - cr14 |= MCCK_CR14_WARN_SUB_MASK; + cr14 |= CR14_WARNING_SUBMASK; mchk = mci.ck ? &inti.mchk : &irq.u.mchk; mchk->cr14 = cr14; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 40d0a1a97889..4bc70afe0a10 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1884,8 +1884,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) rc = -ENOMEM; - ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500); - kvm->arch.use_esca = 0; /* start with basic SCA */ if (!sclp.has_64bscao) alloc_flags |= GFP_DMA; @@ -3283,7 +3281,7 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) */ if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) && test_kvm_facility(vcpu->kvm, 64) && - riccb->valid && + riccb->v && !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) { VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)"); vcpu->arch.sie_block->ecb3 |= ECB3_RI; diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 9f8fdd7b2311..10d65dfbc306 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -242,6 +242,8 @@ static inline void kvm_s390_retry_instr(struct kvm_vcpu *vcpu) kvm_s390_rewind_psw(vcpu, kvm_s390_get_ilen(vcpu)); } +int handle_sthyi(struct kvm_vcpu *vcpu); + /* implemented in priv.c */ int is_valid_psw(psw_t *psw); int kvm_s390_handle_aa(struct kvm_vcpu *vcpu); @@ -268,9 +270,6 @@ void kvm_s390_vsie_destroy(struct kvm *kvm); int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu); -/* implemented in sthyi.c */ -int handle_sthyi(struct kvm_vcpu *vcpu); - /* implemented in kvm-s390.c */ void kvm_s390_set_tod_clock_ext(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod); diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S index d66751397e72..495c9c4bacc7 100644 --- a/arch/s390/lib/mem.S +++ b/arch/s390/lib/mem.S @@ -79,21 +79,25 @@ ENTRY(memset) ex %r4,0(%r3) br %r14 .Lmemset_fill: - stc %r3,0(%r2) cghi %r4,1 lgr %r1,%r2 - ber %r14 + je .Lmemset_fill_exit aghi %r4,-2 - srlg %r3,%r4,8 - ltgr %r3,%r3 + srlg %r5,%r4,8 + ltgr %r5,%r5 jz .Lmemset_fill_remainder .Lmemset_fill_loop: - mvc 1(256,%r1),0(%r1) + stc %r3,0(%r1) + mvc 1(255,%r1),0(%r1) la %r1,256(%r1) - brctg %r3,.Lmemset_fill_loop + brctg %r5,.Lmemset_fill_loop .Lmemset_fill_remainder: - larl %r3,.Lmemset_mvc - ex %r4,0(%r3) + stc %r3,0(%r1) + larl %r5,.Lmemset_mvc + ex %r4,0(%r5) + br %r14 +.Lmemset_fill_exit: + stc %r3,0(%r1) br %r14 .Lmemset_xc: xc 0(1,%r1),0(%r1) @@ -127,3 +131,47 @@ ENTRY(memcpy) .Lmemcpy_mvc: mvc 0(1,%r1),0(%r3) EXPORT_SYMBOL(memcpy) + +/* + * __memset16/32/64 + * + * void *__memset16(uint16_t *s, uint16_t v, size_t count) + * void *__memset32(uint32_t *s, uint32_t v, size_t count) + * void *__memset64(uint64_t *s, uint64_t v, size_t count) + */ +.macro __MEMSET bits,bytes,insn +ENTRY(__memset\bits) + ltgr %r4,%r4 + bzr %r14 + cghi %r4,\bytes + je .L__memset_exit\bits + aghi %r4,-(\bytes+1) + srlg %r5,%r4,8 + ltgr %r5,%r5 + lgr %r1,%r2 + jz .L__memset_remainder\bits +.L__memset_loop\bits: + \insn %r3,0(%r1) + mvc \bytes(256-\bytes,%r1),0(%r1) + la %r1,256(%r1) + brctg %r5,.L__memset_loop\bits +.L__memset_remainder\bits: + \insn %r3,0(%r1) + larl %r5,.L__memset_mvc\bits + ex %r4,0(%r5) + br %r14 +.L__memset_exit\bits: + \insn %r3,0(%r2) + br %r14 +.L__memset_mvc\bits: + mvc \bytes(1,%r1),0(%r1) +.endm + +__MEMSET 16,2,sth +EXPORT_SYMBOL(__memset16) + +__MEMSET 32,4,st +EXPORT_SYMBOL(__memset32) + +__MEMSET 64,8,stg +EXPORT_SYMBOL(__memset64) diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index 1dc85f552f48..84c0faeaf7ea 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -9,8 +9,11 @@ #include <linux/types.h> #include <linux/export.h> #include <linux/spinlock.h> +#include <linux/jiffies.h> #include <linux/init.h> #include <linux/smp.h> +#include <linux/percpu.h> +#include <asm/alternative.h> #include <asm/io.h> int spin_retry = -1; @@ -33,14 +36,46 @@ static int __init spin_retry_setup(char *str) } __setup("spin_retry=", spin_retry_setup); +struct spin_wait { + struct spin_wait *next, *prev; + int node_id; +} __aligned(32); + +static DEFINE_PER_CPU_ALIGNED(struct spin_wait, spin_wait[4]); + +#define _Q_LOCK_CPU_OFFSET 0 +#define _Q_LOCK_STEAL_OFFSET 16 +#define _Q_TAIL_IDX_OFFSET 18 +#define _Q_TAIL_CPU_OFFSET 20 + +#define _Q_LOCK_CPU_MASK 0x0000ffff +#define _Q_LOCK_STEAL_ADD 0x00010000 +#define _Q_LOCK_STEAL_MASK 0x00030000 +#define _Q_TAIL_IDX_MASK 0x000c0000 +#define _Q_TAIL_CPU_MASK 0xfff00000 + +#define _Q_LOCK_MASK (_Q_LOCK_CPU_MASK | _Q_LOCK_STEAL_MASK) +#define _Q_TAIL_MASK (_Q_TAIL_IDX_MASK | _Q_TAIL_CPU_MASK) + +void arch_spin_lock_setup(int cpu) +{ + struct spin_wait *node; + int ix; + + node = per_cpu_ptr(&spin_wait[0], cpu); + for (ix = 0; ix < 4; ix++, node++) { + memset(node, 0, sizeof(*node)); + node->node_id = ((cpu + 1) << _Q_TAIL_CPU_OFFSET) + + (ix << _Q_TAIL_IDX_OFFSET); + } +} + static inline int arch_load_niai4(int *lock) { int owner; asm volatile( -#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES - " .long 0xb2fa0040\n" /* NIAI 4 */ -#endif + ALTERNATIVE("", ".long 0xb2fa0040", 49) /* NIAI 4 */ " l %0,%1\n" : "=d" (owner) : "Q" (*lock) : "memory"); return owner; @@ -51,9 +86,7 @@ static inline int arch_cmpxchg_niai8(int *lock, int old, int new) int expected = old; asm volatile( -#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES - " .long 0xb2fa0080\n" /* NIAI 8 */ -#endif + ALTERNATIVE("", ".long 0xb2fa0080", 49) /* NIAI 8 */ " cs %0,%3,%1\n" : "=d" (old), "=Q" (*lock) : "0" (old), "d" (new), "Q" (*lock) @@ -61,75 +94,160 @@ static inline int arch_cmpxchg_niai8(int *lock, int old, int new) return expected == old; } -void arch_spin_lock_wait(arch_spinlock_t *lp) +static inline struct spin_wait *arch_spin_decode_tail(int lock) { - int cpu = SPINLOCK_LOCKVAL; - int owner, count; + int ix, cpu; + + ix = (lock & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET; + cpu = (lock & _Q_TAIL_CPU_MASK) >> _Q_TAIL_CPU_OFFSET; + return per_cpu_ptr(&spin_wait[ix], cpu - 1); +} + +static inline int arch_spin_yield_target(int lock, struct spin_wait *node) +{ + if (lock & _Q_LOCK_CPU_MASK) + return lock & _Q_LOCK_CPU_MASK; + if (node == NULL || node->prev == NULL) + return 0; /* 0 -> no target cpu */ + while (node->prev) + node = node->prev; + return node->node_id >> _Q_TAIL_CPU_OFFSET; +} + +static inline void arch_spin_lock_queued(arch_spinlock_t *lp) +{ + struct spin_wait *node, *next; + int lockval, ix, node_id, tail_id, old, new, owner, count; + + ix = S390_lowcore.spinlock_index++; + barrier(); + lockval = SPINLOCK_LOCKVAL; /* cpu + 1 */ + node = this_cpu_ptr(&spin_wait[ix]); + node->prev = node->next = NULL; + node_id = node->node_id; + + /* Enqueue the node for this CPU in the spinlock wait queue */ + while (1) { + old = READ_ONCE(lp->lock); + if ((old & _Q_LOCK_CPU_MASK) == 0 && + (old & _Q_LOCK_STEAL_MASK) != _Q_LOCK_STEAL_MASK) { + /* + * The lock is free but there may be waiters. + * With no waiters simply take the lock, if there + * are waiters try to steal the lock. The lock may + * be stolen three times before the next queued + * waiter will get the lock. + */ + new = (old ? (old + _Q_LOCK_STEAL_ADD) : 0) | lockval; + if (__atomic_cmpxchg_bool(&lp->lock, old, new)) + /* Got the lock */ + goto out; + /* lock passing in progress */ + continue; + } + /* Make the node of this CPU the new tail. */ + new = node_id | (old & _Q_LOCK_MASK); + if (__atomic_cmpxchg_bool(&lp->lock, old, new)) + break; + } + /* Set the 'next' pointer of the tail node in the queue */ + tail_id = old & _Q_TAIL_MASK; + if (tail_id != 0) { + node->prev = arch_spin_decode_tail(tail_id); + WRITE_ONCE(node->prev->next, node); + } /* Pass the virtual CPU to the lock holder if it is not running */ - owner = arch_load_niai4(&lp->lock); - if (owner && arch_vcpu_is_preempted(~owner)) - smp_yield_cpu(~owner); + owner = arch_spin_yield_target(old, node); + if (owner && arch_vcpu_is_preempted(owner - 1)) + smp_yield_cpu(owner - 1); + /* Spin on the CPU local node->prev pointer */ + if (tail_id != 0) { + count = spin_retry; + while (READ_ONCE(node->prev) != NULL) { + if (count-- >= 0) + continue; + count = spin_retry; + /* Query running state of lock holder again. */ + owner = arch_spin_yield_target(old, node); + if (owner && arch_vcpu_is_preempted(owner - 1)) + smp_yield_cpu(owner - 1); + } + } + + /* Spin on the lock value in the spinlock_t */ count = spin_retry; while (1) { - owner = arch_load_niai4(&lp->lock); - /* Try to get the lock if it is free. */ + old = READ_ONCE(lp->lock); + owner = old & _Q_LOCK_CPU_MASK; if (!owner) { - if (arch_cmpxchg_niai8(&lp->lock, 0, cpu)) - return; + tail_id = old & _Q_TAIL_MASK; + new = ((tail_id != node_id) ? tail_id : 0) | lockval; + if (__atomic_cmpxchg_bool(&lp->lock, old, new)) + /* Got the lock */ + break; continue; } if (count-- >= 0) continue; count = spin_retry; - /* - * For multiple layers of hypervisors, e.g. z/VM + LPAR - * yield the CPU unconditionally. For LPAR rely on the - * sense running status. - */ - if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(~owner)) - smp_yield_cpu(~owner); + if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(owner - 1)) + smp_yield_cpu(owner - 1); } + + /* Pass lock_spin job to next CPU in the queue */ + if (node_id && tail_id != node_id) { + /* Wait until the next CPU has set up the 'next' pointer */ + while ((next = READ_ONCE(node->next)) == NULL) + ; + next->prev = NULL; + } + + out: + S390_lowcore.spinlock_index--; } -EXPORT_SYMBOL(arch_spin_lock_wait); -void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags) +static inline void arch_spin_lock_classic(arch_spinlock_t *lp) { - int cpu = SPINLOCK_LOCKVAL; - int owner, count; + int lockval, old, new, owner, count; - local_irq_restore(flags); + lockval = SPINLOCK_LOCKVAL; /* cpu + 1 */ /* Pass the virtual CPU to the lock holder if it is not running */ - owner = arch_load_niai4(&lp->lock); - if (owner && arch_vcpu_is_preempted(~owner)) - smp_yield_cpu(~owner); + owner = arch_spin_yield_target(READ_ONCE(lp->lock), NULL); + if (owner && arch_vcpu_is_preempted(owner - 1)) + smp_yield_cpu(owner - 1); count = spin_retry; while (1) { - owner = arch_load_niai4(&lp->lock); + old = arch_load_niai4(&lp->lock); + owner = old & _Q_LOCK_CPU_MASK; /* Try to get the lock if it is free. */ if (!owner) { - local_irq_disable(); - if (arch_cmpxchg_niai8(&lp->lock, 0, cpu)) - return; - local_irq_restore(flags); + new = (old & _Q_TAIL_MASK) | lockval; + if (arch_cmpxchg_niai8(&lp->lock, old, new)) + /* Got the lock */ + return; continue; } if (count-- >= 0) continue; count = spin_retry; - /* - * For multiple layers of hypervisors, e.g. z/VM + LPAR - * yield the CPU unconditionally. For LPAR rely on the - * sense running status. - */ - if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(~owner)) - smp_yield_cpu(~owner); + if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(owner - 1)) + smp_yield_cpu(owner - 1); } } -EXPORT_SYMBOL(arch_spin_lock_wait_flags); + +void arch_spin_lock_wait(arch_spinlock_t *lp) +{ + /* Use classic spinlocks + niai if the steal time is >= 10% */ + if (test_cpu_flag(CIF_DEDICATED_CPU)) + arch_spin_lock_queued(lp); + else + arch_spin_lock_classic(lp); +} +EXPORT_SYMBOL(arch_spin_lock_wait); int arch_spin_trylock_retry(arch_spinlock_t *lp) { @@ -148,126 +266,59 @@ int arch_spin_trylock_retry(arch_spinlock_t *lp) } EXPORT_SYMBOL(arch_spin_trylock_retry); -void _raw_read_lock_wait(arch_rwlock_t *rw) +void arch_read_lock_wait(arch_rwlock_t *rw) { - int count = spin_retry; - int owner, old; - -#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES - __RAW_LOCK(&rw->lock, -1, __RAW_OP_ADD); -#endif - owner = 0; - while (1) { - if (count-- <= 0) { - if (owner && arch_vcpu_is_preempted(~owner)) - smp_yield_cpu(~owner); - count = spin_retry; - } - old = ACCESS_ONCE(rw->lock); - owner = ACCESS_ONCE(rw->owner); - if (old < 0) - continue; - if (__atomic_cmpxchg_bool(&rw->lock, old, old + 1)) - return; + if (unlikely(in_interrupt())) { + while (READ_ONCE(rw->cnts) & 0x10000) + barrier(); + return; } + + /* Remove this reader again to allow recursive read locking */ + __atomic_add_const(-1, &rw->cnts); + /* Put the reader into the wait queue */ + arch_spin_lock(&rw->wait); + /* Now add this reader to the count value again */ + __atomic_add_const(1, &rw->cnts); + /* Loop until the writer is done */ + while (READ_ONCE(rw->cnts) & 0x10000) + barrier(); + arch_spin_unlock(&rw->wait); } -EXPORT_SYMBOL(_raw_read_lock_wait); +EXPORT_SYMBOL(arch_read_lock_wait); -int _raw_read_trylock_retry(arch_rwlock_t *rw) +void arch_write_lock_wait(arch_rwlock_t *rw) { - int count = spin_retry; int old; - while (count-- > 0) { - old = ACCESS_ONCE(rw->lock); - if (old < 0) - continue; - if (__atomic_cmpxchg_bool(&rw->lock, old, old + 1)) - return 1; - } - return 0; -} -EXPORT_SYMBOL(_raw_read_trylock_retry); - -#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES + /* Add this CPU to the write waiters */ + __atomic_add(0x20000, &rw->cnts); -void _raw_write_lock_wait(arch_rwlock_t *rw, int prev) -{ - int count = spin_retry; - int owner, old; + /* Put the writer into the wait queue */ + arch_spin_lock(&rw->wait); - owner = 0; while (1) { - if (count-- <= 0) { - if (owner && arch_vcpu_is_preempted(~owner)) - smp_yield_cpu(~owner); - count = spin_retry; - } - old = ACCESS_ONCE(rw->lock); - owner = ACCESS_ONCE(rw->owner); - smp_mb(); - if (old >= 0) { - prev = __RAW_LOCK(&rw->lock, 0x80000000, __RAW_OP_OR); - old = prev; - } - if ((old & 0x7fffffff) == 0 && prev >= 0) + old = READ_ONCE(rw->cnts); + if ((old & 0x1ffff) == 0 && + __atomic_cmpxchg_bool(&rw->cnts, old, old | 0x10000)) + /* Got the lock */ break; + barrier(); } -} -EXPORT_SYMBOL(_raw_write_lock_wait); - -#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */ - -void _raw_write_lock_wait(arch_rwlock_t *rw) -{ - int count = spin_retry; - int owner, old, prev; - prev = 0x80000000; - owner = 0; - while (1) { - if (count-- <= 0) { - if (owner && arch_vcpu_is_preempted(~owner)) - smp_yield_cpu(~owner); - count = spin_retry; - } - old = ACCESS_ONCE(rw->lock); - owner = ACCESS_ONCE(rw->owner); - if (old >= 0 && - __atomic_cmpxchg_bool(&rw->lock, old, old | 0x80000000)) - prev = old; - else - smp_mb(); - if ((old & 0x7fffffff) == 0 && prev >= 0) - break; - } + arch_spin_unlock(&rw->wait); } -EXPORT_SYMBOL(_raw_write_lock_wait); - -#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */ +EXPORT_SYMBOL(arch_write_lock_wait); -int _raw_write_trylock_retry(arch_rwlock_t *rw) +void arch_spin_relax(arch_spinlock_t *lp) { - int count = spin_retry; - int old; + int cpu; - while (count-- > 0) { - old = ACCESS_ONCE(rw->lock); - if (old) - continue; - if (__atomic_cmpxchg_bool(&rw->lock, 0, 0x80000000)) - return 1; - } - return 0; -} -EXPORT_SYMBOL(_raw_write_trylock_retry); - -void arch_lock_relax(int cpu) -{ + cpu = READ_ONCE(lp->lock) & _Q_LOCK_CPU_MASK; if (!cpu) return; - if (MACHINE_IS_LPAR && !arch_vcpu_is_preempted(~cpu)) + if (MACHINE_IS_LPAR && !arch_vcpu_is_preempted(cpu - 1)) return; - smp_yield_cpu(~cpu); + smp_yield_cpu(cpu - 1); } -EXPORT_SYMBOL(arch_lock_relax); +EXPORT_SYMBOL(arch_spin_relax); diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c index dbf2fdad2724..a10e11f7a5f7 100644 --- a/arch/s390/lib/string.c +++ b/arch/s390/lib/string.c @@ -56,7 +56,7 @@ EXPORT_SYMBOL(strlen); * * returns the minimum of the length of @s and @n */ -size_t strnlen(const char * s, size_t n) +size_t strnlen(const char *s, size_t n) { return __strnend(s, n) - s; } @@ -195,14 +195,14 @@ EXPORT_SYMBOL(strncat); /** * strcmp - Compare two strings - * @cs: One string - * @ct: Another string + * @s1: One string + * @s2: Another string * - * returns 0 if @cs and @ct are equal, - * < 0 if @cs is less than @ct - * > 0 if @cs is greater than @ct + * returns 0 if @s1 and @s2 are equal, + * < 0 if @s1 is less than @s2 + * > 0 if @s1 is greater than @s2 */ -int strcmp(const char *cs, const char *ct) +int strcmp(const char *s1, const char *s2) { register int r0 asm("0") = 0; int ret = 0; @@ -214,7 +214,7 @@ int strcmp(const char *cs, const char *ct) " ic %1,0(%3)\n" " sr %0,%1\n" "1:" - : "+d" (ret), "+d" (r0), "+a" (cs), "+a" (ct) + : "+d" (ret), "+d" (r0), "+a" (s1), "+a" (s2) : : "cc", "memory"); return ret; } @@ -225,7 +225,7 @@ EXPORT_SYMBOL(strcmp); * @s: The string to be searched * @c: The character to search for */ -char * strrchr(const char * s, int c) +char *strrchr(const char *s, int c) { size_t len = __strend(s) - s; @@ -261,7 +261,7 @@ static inline int clcle(const char *s1, unsigned long l1, * @s1: The string to be searched * @s2: The string to search for */ -char * strstr(const char * s1,const char * s2) +char *strstr(const char *s1, const char *s2) { int l1, l2; @@ -307,15 +307,15 @@ EXPORT_SYMBOL(memchr); /** * memcmp - Compare two areas of memory - * @cs: One area of memory - * @ct: Another area of memory + * @s1: One area of memory + * @s2: Another area of memory * @count: The size of the area. */ -int memcmp(const void *cs, const void *ct, size_t n) +int memcmp(const void *s1, const void *s2, size_t n) { int ret; - ret = clcle(cs, n, ct, n); + ret = clcle(s1, n, s2, n); if (ret) ret = ret == 1 ? -1 : 1; return ret; diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index 829c63dbc81a..2dbdcd85b68f 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -56,7 +56,7 @@ static DEFINE_SPINLOCK(cmm_lock); static struct task_struct *cmm_thread_ptr; static DECLARE_WAIT_QUEUE_HEAD(cmm_thread_wait); -static DEFINE_TIMER(cmm_timer, NULL, 0, 0); +static DEFINE_TIMER(cmm_timer, NULL); static void cmm_timer_fn(unsigned long); static void cmm_set_timer(void); diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 41ba9bd53e48..817c9e16e83e 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -145,8 +145,8 @@ void __init mem_init(void) void free_initmem(void) { - __set_memory((unsigned long) _sinittext, - (_einittext - _sinittext) >> PAGE_SHIFT, + __set_memory((unsigned long)_sinittext, + (unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT, SET_MEMORY_RW | SET_MEMORY_NX); free_initmem_default(POISON_FREE_INITMEM); } diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index cc2faffa7d6e..4ad4c4f77b4d 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -159,13 +159,13 @@ static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) struct page *page_table_alloc_pgste(struct mm_struct *mm) { struct page *page; - unsigned long *table; + u64 *table; page = alloc_page(GFP_KERNEL); if (page) { - table = (unsigned long *) page_to_phys(page); - clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); - clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); + table = (u64 *)page_to_phys(page); + memset64(table, _PAGE_INVALID, PTRS_PER_PTE); + memset64(table + PTRS_PER_PTE, 0, PTRS_PER_PTE); } return page; } @@ -222,12 +222,12 @@ unsigned long *page_table_alloc(struct mm_struct *mm) if (mm_alloc_pgste(mm)) { /* Return 4K page table with PGSTEs */ atomic_set(&page->_mapcount, 3); - clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); - clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); + memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE); + memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE); } else { /* Return the first 2K fragment of the page */ atomic_set(&page->_mapcount, 1); - clear_table(table, _PAGE_INVALID, PAGE_SIZE); + memset64((u64 *)table, _PAGE_INVALID, 2 * PTRS_PER_PTE); spin_lock_bh(&mm->context.lock); list_add(&page->lru, &mm->context.pgtable_list); spin_unlock_bh(&mm->context.lock); diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index f2ada0bc08e6..3316d463fc29 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -60,7 +60,7 @@ pte_t __ref *vmem_pte_alloc(void) pte = (pte_t *) memblock_alloc(size, size); if (!pte) return NULL; - clear_table((unsigned long *) pte, _PAGE_INVALID, size); + memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE); return pte; } @@ -403,17 +403,17 @@ void __init vmem_map_init(void) for_each_memblock(memory, reg) vmem_add_mem(reg->base, reg->size); - __set_memory((unsigned long) _stext, - (_etext - _stext) >> PAGE_SHIFT, + __set_memory((unsigned long)_stext, + (unsigned long)(_etext - _stext) >> PAGE_SHIFT, SET_MEMORY_RO | SET_MEMORY_X); - __set_memory((unsigned long) _etext, - (_eshared - _etext) >> PAGE_SHIFT, + __set_memory((unsigned long)_etext, + (unsigned long)(__end_rodata - _etext) >> PAGE_SHIFT, SET_MEMORY_RO); - __set_memory((unsigned long) _sinittext, - (_einittext - _sinittext) >> PAGE_SHIFT, + __set_memory((unsigned long)_sinittext, + (unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT, SET_MEMORY_RO | SET_MEMORY_X); pr_info("Write protected kernel read-only data: %luk\n", - (_eshared - _stext) >> 10); + (unsigned long)(__end_rodata - _stext) >> 10); } /* diff --git a/arch/s390/net/bpf_jit.h b/arch/s390/net/bpf_jit.h index 7fa55ccffe48..5e1e5133132d 100644 --- a/arch/s390/net/bpf_jit.h +++ b/arch/s390/net/bpf_jit.h @@ -53,10 +53,13 @@ extern u8 sk_load_word[], sk_load_half[], sk_load_byte[]; * * We get 160 bytes stack space from calling function, but only use * 12 * 8 byte for old backchain, r15..r6, and tail_call_cnt. + * + * The stack size used by the BPF program ("BPF stack" above) is passed + * via "aux->stack_depth". */ -#define STK_SPACE (MAX_BPF_STACK + 8 + 8 + 4 + 4 + 160) +#define STK_SPACE_ADD (8 + 8 + 4 + 4 + 160) #define STK_160_UNUSED (160 - 12 * 8) -#define STK_OFF (STK_SPACE - STK_160_UNUSED) +#define STK_OFF (STK_SPACE_ADD - STK_160_UNUSED) #define STK_OFF_TMP 160 /* Offset of tmp buffer on stack */ #define STK_OFF_HLEN 168 /* Offset of SKB header length on stack */ #define STK_OFF_SKBP 176 /* Offset of SKB pointer on stack */ diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index b15cd2f0320f..e81c16838b90 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -320,12 +320,12 @@ static void save_regs(struct bpf_jit *jit, u32 rs, u32 re) /* * Restore registers from "rs" (register start) to "re" (register end) on stack */ -static void restore_regs(struct bpf_jit *jit, u32 rs, u32 re) +static void restore_regs(struct bpf_jit *jit, u32 rs, u32 re, u32 stack_depth) { u32 off = STK_OFF_R6 + (rs - 6) * 8; if (jit->seen & SEEN_STACK) - off += STK_OFF; + off += STK_OFF + stack_depth; if (rs == re) /* lg %rs,off(%r15) */ @@ -369,7 +369,7 @@ static int get_end(struct bpf_jit *jit, int start) * Save and restore clobbered registers (6-15) on stack. * We save/restore registers in chunks with gap >= 2 registers. */ -static void save_restore_regs(struct bpf_jit *jit, int op) +static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth) { int re = 6, rs; @@ -382,7 +382,7 @@ static void save_restore_regs(struct bpf_jit *jit, int op) if (op == REGS_SAVE) save_regs(jit, rs, re); else - restore_regs(jit, rs, re); + restore_regs(jit, rs, re, stack_depth); re++; } while (re <= 15); } @@ -414,7 +414,7 @@ static void emit_load_skb_data_hlen(struct bpf_jit *jit) * Save registers and create stack frame if necessary. * See stack frame layout desription in "bpf_jit.h"! */ -static void bpf_jit_prologue(struct bpf_jit *jit) +static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth) { if (jit->seen & SEEN_TAIL_CALL) { /* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */ @@ -427,7 +427,7 @@ static void bpf_jit_prologue(struct bpf_jit *jit) /* Tail calls have to skip above initialization */ jit->tail_call_start = jit->prg; /* Save registers */ - save_restore_regs(jit, REGS_SAVE); + save_restore_regs(jit, REGS_SAVE, stack_depth); /* Setup literal pool */ if (jit->seen & SEEN_LITERAL) { /* basr %r13,0 */ @@ -442,7 +442,7 @@ static void bpf_jit_prologue(struct bpf_jit *jit) /* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */ EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED); /* aghi %r15,-STK_OFF */ - EMIT4_IMM(0xa70b0000, REG_15, -STK_OFF); + EMIT4_IMM(0xa70b0000, REG_15, -(STK_OFF + stack_depth)); if (jit->seen & SEEN_FUNC) /* stg %w1,152(%r15) (backchain) */ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, @@ -459,7 +459,7 @@ static void bpf_jit_prologue(struct bpf_jit *jit) /* * Function epilogue */ -static void bpf_jit_epilogue(struct bpf_jit *jit) +static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth) { /* Return 0 */ if (jit->seen & SEEN_RET0) { @@ -471,7 +471,7 @@ static void bpf_jit_epilogue(struct bpf_jit *jit) /* Load exit code: lgr %r2,%b0 */ EMIT4(0xb9040000, REG_2, BPF_REG_0); /* Restore registers */ - save_restore_regs(jit, REGS_RESTORE); + save_restore_regs(jit, REGS_RESTORE, stack_depth); /* br %r14 */ _EMIT2(0x07fe); } @@ -1019,7 +1019,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i */ if (jit->seen & SEEN_STACK) - off = STK_OFF_TCCNT + STK_OFF; + off = STK_OFF_TCCNT + STK_OFF + fp->aux->stack_depth; else off = STK_OFF_TCCNT; /* lhi %w0,1 */ @@ -1047,7 +1047,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i /* * Restore registers before calling function */ - save_restore_regs(jit, REGS_RESTORE); + save_restore_regs(jit, REGS_RESTORE, fp->aux->stack_depth); /* * goto *(prog->bpf_func + tail_call_start); @@ -1273,7 +1273,7 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp) jit->lit = jit->lit_start; jit->prg = 0; - bpf_jit_prologue(jit); + bpf_jit_prologue(jit, fp->aux->stack_depth); for (i = 0; i < fp->len; i += insn_count) { insn_count = bpf_jit_insn(jit, fp, i); if (insn_count < 0) @@ -1281,7 +1281,7 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp) /* Next instruction address */ jit->addrs[i + insn_count] = jit->prg; } - bpf_jit_epilogue(jit); + bpf_jit_epilogue(jit, fp->aux->stack_depth); jit->lit_start = jit->prg; jit->size = jit->lit; diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index a25d95a6612d..0fe649c0d542 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -368,7 +368,8 @@ static void zpci_irq_handler(struct airq_struct *airq) /* End of second scan with interrupts on. */ break; /* First scan complete, reenable interrupts. */ - zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC); + if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC)) + break; si = 0; continue; } @@ -956,7 +957,7 @@ static int __init pci_base_init(void) if (!s390_pci_probe) return 0; - if (!test_facility(69) || !test_facility(71) || !test_facility(72)) + if (!test_facility(69) || !test_facility(71)) return 0; rc = zpci_debug_init(); diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c index ea34086c8674..81b840bc6e4e 100644 --- a/arch/s390/pci/pci_insn.c +++ b/arch/s390/pci/pci_insn.c @@ -7,6 +7,7 @@ #include <linux/export.h> #include <linux/errno.h> #include <linux/delay.h> +#include <asm/facility.h> #include <asm/pci_insn.h> #include <asm/pci_debug.h> #include <asm/processor.h> @@ -91,11 +92,14 @@ int zpci_refresh_trans(u64 fn, u64 addr, u64 range) } /* Set Interruption Controls */ -void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc) +int zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc) { + if (!test_facility(72)) + return -EIO; asm volatile ( " .insn rsy,0xeb00000000d1,%[ctl],%[isc],%[u]\n" : : [ctl] "d" (ctl), [isc] "d" (isc << 27), [u] "Q" (*unused)); + return 0; } /* PCI Load */ diff --git a/arch/s390/tools/Makefile b/arch/s390/tools/Makefile index d54c149fbb6b..2ebf2872cc16 100644 --- a/arch/s390/tools/Makefile +++ b/arch/s390/tools/Makefile @@ -4,11 +4,21 @@ # hostprogs-y += gen_facilities +hostprogs-y += gen_opcode_table + HOSTCFLAGS_gen_facilities.o += -Wall $(LINUXINCLUDE) +HOSTCFLAGS_gen_opcode_table.o += -Wall $(LINUXINCLUDE) define filechk_facilities.h $(obj)/gen_facilities endef +define filechk_dis.h + ( $(obj)/gen_opcode_table < $(srctree)/arch/$(ARCH)/tools/opcodes.txt ) +endef + include/generated/facilities.h: $(obj)/gen_facilities FORCE $(call filechk,facilities.h) + +include/generated/dis.h: $(obj)/gen_opcode_table FORCE + $(call filechk,dis.h,__FUN) diff --git a/arch/s390/tools/gen_opcode_table.c b/arch/s390/tools/gen_opcode_table.c new file mode 100644 index 000000000000..01d4c5a4bfe9 --- /dev/null +++ b/arch/s390/tools/gen_opcode_table.c @@ -0,0 +1,336 @@ +/* + * Generate opcode table initializers for the in-kernel disassembler. + * + * Copyright IBM Corp. 2017 + * + */ + +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <stdio.h> + +#define STRING_SIZE_MAX 20 + +struct insn_type { + unsigned char byte; + unsigned char mask; + char **format; +}; + +struct insn { + struct insn_type *type; + char opcode[STRING_SIZE_MAX]; + char name[STRING_SIZE_MAX]; + char upper[STRING_SIZE_MAX]; + char format[STRING_SIZE_MAX]; + unsigned int name_len; +}; + +struct insn_group { + struct insn_type *type; + int offset; + int count; + char opcode[2]; +}; + +struct insn_format { + char *format; + int type; +}; + +struct gen_opcode { + struct insn *insn; + int nr; + struct insn_group *group; + int nr_groups; +}; + +/* + * Table of instruction format types. Each opcode is defined with at + * least one byte (two nibbles), three nibbles, or two bytes (four + * nibbles). + * The byte member of each instruction format type entry defines + * within which byte of an instruction the third (and fourth) nibble + * of an opcode can be found. The mask member is the and-mask that + * needs to be applied on this byte in order to get the third (and + * fourth) nibble of the opcode. + * The format array defines all instruction formats (as defined in the + * Principles of Operation) which have the same position of the opcode + * nibbles. + * A special case are instruction formats with 1-byte opcodes. In this + * case the byte member always is zero, so that the mask is applied on + * the (only) byte that contains the opcode. + */ +static struct insn_type insn_type_table[] = { + { + .byte = 0, + .mask = 0xff, + .format = (char *[]) { + "MII", + "RR", + "RS", + "RSI", + "RX", + "SI", + "SMI", + "SS", + NULL, + }, + }, + { + .byte = 1, + .mask = 0x0f, + .format = (char *[]) { + "RI", + "RIL", + "SSF", + NULL, + }, + }, + { + .byte = 1, + .mask = 0xff, + .format = (char *[]) { + "E", + "IE", + "RRE", + "RRF", + "RRR", + "S", + "SIL", + "SSE", + NULL, + }, + }, + { + .byte = 5, + .mask = 0xff, + .format = (char *[]) { + "RIE", + "RIS", + "RRS", + "RSE", + "RSL", + "RSY", + "RXE", + "RXF", + "RXY", + "SIY", + "VRI", + "VRR", + "VRS", + "VRV", + "VRX", + "VSI", + NULL, + }, + }, +}; + +static struct insn_type *insn_format_to_type(char *format) +{ + char tmp[STRING_SIZE_MAX]; + char *base_format, **ptr; + int i; + + strcpy(tmp, format); + base_format = tmp; + base_format = strsep(&base_format, "_"); + for (i = 0; i < sizeof(insn_type_table) / sizeof(insn_type_table[0]); i++) { + ptr = insn_type_table[i].format; + while (*ptr) { + if (!strcmp(base_format, *ptr)) + return &insn_type_table[i]; + ptr++; + } + } + exit(EXIT_FAILURE); +} + +static void read_instructions(struct gen_opcode *desc) +{ + struct insn insn; + int rc, i; + + while (1) { + rc = scanf("%s %s %s", insn.opcode, insn.name, insn.format); + if (rc == EOF) + break; + if (rc != 3) + exit(EXIT_FAILURE); + insn.type = insn_format_to_type(insn.format); + insn.name_len = strlen(insn.name); + for (i = 0; i <= insn.name_len; i++) + insn.upper[i] = toupper((unsigned char)insn.name[i]); + desc->nr++; + desc->insn = realloc(desc->insn, desc->nr * sizeof(*desc->insn)); + if (!desc->insn) + exit(EXIT_FAILURE); + desc->insn[desc->nr - 1] = insn; + } +} + +static int cmpformat(const void *a, const void *b) +{ + return strcmp(((struct insn *)a)->format, ((struct insn *)b)->format); +} + +static void print_formats(struct gen_opcode *desc) +{ + char *format; + int i, count; + + qsort(desc->insn, desc->nr, sizeof(*desc->insn), cmpformat); + format = ""; + count = 0; + printf("enum {\n"); + for (i = 0; i < desc->nr; i++) { + if (!strcmp(format, desc->insn[i].format)) + continue; + count++; + format = desc->insn[i].format; + printf("\tINSTR_%s,\n", format); + } + printf("}; /* %d */\n\n", count); +} + +static int cmp_long_insn(const void *a, const void *b) +{ + return strcmp(((struct insn *)a)->name, ((struct insn *)b)->name); +} + +static void print_long_insn(struct gen_opcode *desc) +{ + struct insn *insn; + int i, count; + + qsort(desc->insn, desc->nr, sizeof(*desc->insn), cmp_long_insn); + count = 0; + printf("enum {\n"); + for (i = 0; i < desc->nr; i++) { + insn = &desc->insn[i]; + if (insn->name_len < 6) + continue; + printf("\tLONG_INSN_%s,\n", insn->upper); + count++; + } + printf("}; /* %d */\n\n", count); + + printf("#define LONG_INSN_INITIALIZER { \\\n"); + for (i = 0; i < desc->nr; i++) { + insn = &desc->insn[i]; + if (insn->name_len < 6) + continue; + printf("\t[LONG_INSN_%s] = \"%s\", \\\n", insn->upper, insn->name); + } + printf("}\n\n"); +} + +static void print_opcode(struct insn *insn, int nr) +{ + char *opcode; + + opcode = insn->opcode; + if (insn->type->byte != 0) + opcode += 2; + printf("\t[%4d] = { .opfrag = 0x%s, .format = INSTR_%s, ", nr, opcode, insn->format); + if (insn->name_len < 6) + printf(".name = \"%s\" ", insn->name); + else + printf(".offset = LONG_INSN_%s ", insn->upper); + printf("}, \\\n"); +} + +static void add_to_group(struct gen_opcode *desc, struct insn *insn, int offset) +{ + struct insn_group *group; + + group = desc->group ? &desc->group[desc->nr_groups - 1] : NULL; + if (group && (!strncmp(group->opcode, insn->opcode, 2) || group->type->byte == 0)) { + group->count++; + return; + } + desc->nr_groups++; + desc->group = realloc(desc->group, desc->nr_groups * sizeof(*desc->group)); + if (!desc->group) + exit(EXIT_FAILURE); + group = &desc->group[desc->nr_groups - 1]; + strncpy(group->opcode, insn->opcode, 2); + group->type = insn->type; + group->offset = offset; + group->count = 1; +} + +static int cmpopcode(const void *a, const void *b) +{ + return strcmp(((struct insn *)a)->opcode, ((struct insn *)b)->opcode); +} + +static void print_opcode_table(struct gen_opcode *desc) +{ + char opcode[2] = ""; + struct insn *insn; + int i, offset; + + qsort(desc->insn, desc->nr, sizeof(*desc->insn), cmpopcode); + printf("#define OPCODE_TABLE_INITIALIZER { \\\n"); + offset = 0; + for (i = 0; i < desc->nr; i++) { + insn = &desc->insn[i]; + if (insn->type->byte == 0) + continue; + add_to_group(desc, insn, offset); + if (strncmp(opcode, insn->opcode, 2)) { + strncpy(opcode, insn->opcode, 2); + printf("\t/* %.2s */ \\\n", opcode); + } + print_opcode(insn, offset); + offset++; + } + printf("\t/* 1-byte opcode instructions */ \\\n"); + for (i = 0; i < desc->nr; i++) { + insn = &desc->insn[i]; + if (insn->type->byte != 0) + continue; + add_to_group(desc, insn, offset); + print_opcode(insn, offset); + offset++; + } + printf("}\n\n"); +} + +static void print_opcode_table_offsets(struct gen_opcode *desc) +{ + struct insn_group *group; + int i; + + printf("#define OPCODE_OFFSET_INITIALIZER { \\\n"); + for (i = 0; i < desc->nr_groups; i++) { + group = &desc->group[i]; + printf("\t{ .opcode = 0x%.2s, .mask = 0x%02x, .byte = %d, .offset = %d, .count = %d }, \\\n", + group->opcode, group->type->mask, group->type->byte, group->offset, group->count); + } + printf("}\n\n"); +} + +int main(int argc, char **argv) +{ + struct gen_opcode _desc = { 0 }; + struct gen_opcode *desc = &_desc; + + read_instructions(desc); + printf("#ifndef __S390_GENERATED_DIS_H__\n"); + printf("#define __S390_GENERATED_DIS_H__\n"); + printf("/*\n"); + printf(" * DO NOT MODIFY.\n"); + printf(" *\n"); + printf(" * This file was generated by %s\n", __FILE__); + printf(" */\n\n"); + print_formats(desc); + print_long_insn(desc); + print_opcode_table(desc); + print_opcode_table_offsets(desc); + printf("#endif\n"); + exit(EXIT_SUCCESS); +} diff --git a/arch/s390/tools/opcodes.txt b/arch/s390/tools/opcodes.txt new file mode 100644 index 000000000000..1cbed82cd17b --- /dev/null +++ b/arch/s390/tools/opcodes.txt @@ -0,0 +1,1183 @@ +0101 pr E +0102 upt E +0104 ptff E +0107 sckpf E +010a pfpo E +010b tam E +010c sam24 E +010d sam31 E +010e sam64 E +01ff trap2 E +04 spm RR_R0 +05 balr RR_RR +06 bctr RR_RR +07 bcr RR_UR +0a svc RR_U0 +0b bsm RR_RR +0c bassm RR_RR +0d basr RR_RR +0e mvcl RR_RR +0f clcl RR_RR +10 lpr RR_RR +11 lnr RR_RR +12 ltr RR_RR +13 lcr RR_RR +14 nr RR_RR +15 clr RR_RR +16 or RR_RR +17 xr RR_RR +18 lr RR_RR +19 cr RR_RR +1a ar RR_RR +1b sr RR_RR +1c mr RR_RR +1d dr RR_RR +1e alr RR_RR +1f slr RR_RR +20 lpdr RR_FF +21 lndr RR_FF +22 ltdr RR_FF +23 lcdr RR_FF +24 hdr RR_FF +25 ldxr RR_FF +26 mxr RR_FF +27 mxdr RR_FF +28 ldr RR_FF +29 cdr RR_FF +2a adr RR_FF +2b sdr RR_FF +2c mdr RR_FF +2d ddr RR_FF +2e awr RR_FF +2f swr RR_FF +30 lper RR_FF +31 lner RR_FF +32 lter RR_FF +33 lcer RR_FF +34 her RR_FF +35 ledr RR_FF +36 axr RR_FF +37 sxr RR_FF +38 ler RR_FF +39 cer RR_FF +3a aer RR_FF +3b ser RR_FF +3c mder RR_FF +3d der RR_FF +3e aur RR_FF +3f sur RR_FF +40 sth RX_RRRD +41 la RX_RRRD +42 stc RX_RRRD +43 ic RX_RRRD +44 ex RX_RRRD +45 bal RX_RRRD +46 bct RX_RRRD +47 bc RX_URRD +48 lh RX_RRRD +49 ch RX_RRRD +4a ah RX_RRRD +4b sh RX_RRRD +4c mh RX_RRRD +4d bas RX_RRRD +4e cvd RX_RRRD +4f cvb RX_RRRD +50 st RX_RRRD +51 lae RX_RRRD +54 n RX_RRRD +55 cl RX_RRRD +56 o RX_RRRD +57 x RX_RRRD +58 l RX_RRRD +59 c RX_RRRD +5a a RX_RRRD +5b s RX_RRRD +5c m RX_RRRD +5d d RX_RRRD +5e al RX_RRRD +5f sl RX_RRRD +60 std RX_FRRD +67 mxd RX_FRRD +68 ld RX_FRRD +69 cd RX_FRRD +6a ad RX_FRRD +6b sd RX_FRRD +6c md RX_FRRD +6d dd RX_FRRD +6e aw RX_FRRD +6f sw RX_FRRD +70 ste RX_FRRD +71 ms RX_RRRD +78 le RX_FRRD +79 ce RX_FRRD +7a ae RX_FRRD +7b se RX_FRRD +7c mde RX_FRRD +7d de RX_FRRD +7e au RX_FRRD +7f su RX_FRRD +80 ssm SI_RD +82 lpsw SI_RD +83 diag RS_RRRD +84 brxh RSI_RRP +85 brxle RSI_RRP +86 bxh RS_RRRD +87 bxle RS_RRRD +88 srl RS_R0RD +89 sll RS_R0RD +8a sra RS_R0RD +8b sla RS_R0RD +8c srdl RS_R0RD +8d sldl RS_R0RD +8e srda RS_R0RD +8f slda RS_R0RD +90 stm RS_RRRD +91 tm SI_URD +92 mvi SI_URD +93 ts SI_RD +94 ni SI_URD +95 cli SI_URD +96 oi SI_URD +97 xi SI_URD +98 lm RS_RRRD +99 trace RS_RRRD +9a lam RS_AARD +9b stam RS_AARD +a50 iihh RI_RU +a51 iihl RI_RU +a52 iilh RI_RU +a53 iill RI_RU +a54 nihh RI_RU +a55 nihl RI_RU +a56 nilh RI_RU +a57 nill RI_RU +a58 oihh RI_RU +a59 oihl RI_RU +a5a oilh RI_RU +a5b oill RI_RU +a5c llihh RI_RU +a5d llihl RI_RU +a5e llilh RI_RU +a5f llill RI_RU +a70 tmlh RI_RU +a71 tmll RI_RU +a72 tmhh RI_RU +a73 tmhl RI_RU +a74 brc RI_UP +a75 bras RI_RP +a76 brct RI_RP +a77 brctg RI_RP +a78 lhi RI_RI +a79 lghi RI_RI +a7a ahi RI_RI +a7b aghi RI_RI +a7c mhi RI_RI +a7d mghi RI_RI +a7e chi RI_RI +a7f cghi RI_RI +a8 mvcle RS_RRRD +a9 clcle RS_RRRD +aa0 rinext RI_RI +aa1 rion RI_RI +aa2 tric RI_RI +aa3 rioff RI_RI +aa4 riemit RI_RI +ac stnsm SI_URD +ad stosm SI_URD +ae sigp RS_RRRD +af mc SI_URD +b1 lra RX_RRRD +b202 stidp S_RD +b204 sck S_RD +b205 stck S_RD +b206 sckc S_RD +b207 stckc S_RD +b208 spt S_RD +b209 stpt S_RD +b20a spka S_RD +b20b ipk S_00 +b20d ptlb S_00 +b210 spx S_RD +b211 stpx S_RD +b212 stap S_RD +b214 sie S_RD +b218 pc S_RD +b219 sac S_RD +b21a cfc S_RD +b220 servc RRE_RR +b221 ipte RRF_RURR +b222 ipm RRE_R0 +b223 ivsk RRE_RR +b224 iac RRE_R0 +b225 ssar RRE_R0 +b226 epar RRE_R0 +b227 esar RRE_R0 +b228 pt RRE_RR +b229 iske RRE_RR +b22a rrbe RRE_RR +b22b sske RRF_U0RR +b22c tb RRE_RR +b22d dxr RRE_FF +b22e pgin RRE_RR +b22f pgout RRE_RR +b230 csch S_00 +b231 hsch S_00 +b232 msch S_RD +b233 ssch S_RD +b234 stsch S_RD +b235 tsch S_RD +b236 tpi S_RD +b237 sal S_00 +b238 rsch S_00 +b239 stcrw S_RD +b23a stcps S_RD +b23b rchp S_00 +b23c schm S_00 +b240 bakr RRE_RR +b241 cksm RRE_RR +b244 sqdr RRE_FF +b245 sqer RRE_FF +b246 stura RRE_RR +b247 msta RRE_R0 +b248 palb RRE_00 +b249 ereg RRE_RR +b24a esta RRE_RR +b24b lura RRE_RR +b24c tar RRE_AR +b24d cpya RRE_AA +b24e sar RRE_AR +b24f ear RRE_RA +b250 csp RRE_RR +b252 msr RRE_RR +b254 mvpg RRE_RR +b255 mvst RRE_RR +b256 sthyi RRE_RR +b257 cuse RRE_RR +b258 bsg RRE_RR +b25a bsa RRE_RR +b25d clst RRE_RR +b25e srst RRE_RR +b263 cmpsc RRE_RR +b274 siga S_RD +b276 xsch S_00 +b277 rp S_RD +b278 stcke S_RD +b279 sacf S_RD +b27c stckf S_RD +b27d stsi S_RD +b280 lpp S_RD +b284 lcctl S_RD +b285 lpctl S_RD +b286 qsi S_RD +b287 lsctl S_RD +b28e qctri S_RD +b299 srnm S_RD +b29c stfpc S_RD +b29d lfpc S_RD +b2a5 tre RRE_RR +b2a6 cu21 RRF_U0RR +b2a7 cu12 RRF_U0RR +b2b0 stfle S_RD +b2b1 stfl S_RD +b2b2 lpswe S_RD +b2b8 srnmb S_RD +b2b9 srnmt S_RD +b2bd lfas S_RD +b2e0 scctr RRE_RR +b2e1 spctr RRE_RR +b2e4 ecctr RRE_RR +b2e5 epctr RRE_RR +b2e8 ppa RRF_U0RR +b2ec etnd RRE_R0 +b2ed ecpga RRE_RR +b2f8 tend S_00 +b2fa niai IE_UU +b2fc tabort S_RD +b2ff trap4 S_RD +b300 lpebr RRE_FF +b301 lnebr RRE_FF +b302 ltebr RRE_FF +b303 lcebr RRE_FF +b304 ldebr RRE_FF +b305 lxdbr RRE_FF +b306 lxebr RRE_FF +b307 mxdbr RRE_FF +b308 kebr RRE_FF +b309 cebr RRE_FF +b30a aebr RRE_FF +b30b sebr RRE_FF +b30c mdebr RRE_FF +b30d debr RRE_FF +b30e maebr RRF_F0FF +b30f msebr RRF_F0FF +b310 lpdbr RRE_FF +b311 lndbr RRE_FF +b312 ltdbr RRE_FF +b313 lcdbr RRE_FF +b314 sqebr RRE_FF +b315 sqdbr RRE_FF +b316 sqxbr RRE_FF +b317 meebr RRE_FF +b318 kdbr RRE_FF +b319 cdbr RRE_FF +b31a adbr RRE_FF +b31b sdbr RRE_FF +b31c mdbr RRE_FF +b31d ddbr RRE_FF +b31e madbr RRF_F0FF +b31f msdbr RRF_F0FF +b324 lder RRE_FF +b325 lxdr RRE_FF +b326 lxer RRE_FF +b32e maer RRF_F0FF +b32f mser RRF_F0FF +b336 sqxr RRE_FF +b337 meer RRE_FF +b338 maylr RRF_F0FF +b339 mylr RRF_F0FF +b33a mayr RRF_F0FF +b33b myr RRF_F0FF +b33c mayhr RRF_F0FF +b33d myhr RRF_F0FF +b33e madr RRF_F0FF +b33f msdr RRF_F0FF +b340 lpxbr RRE_FF +b341 lnxbr RRE_FF +b342 ltxbr RRE_FF +b343 lcxbr RRE_FF +b344 ledbra RRF_UUFF +b345 ldxbra RRF_UUFF +b346 lexbra RRF_UUFF +b347 fixbra RRF_UUFF +b348 kxbr RRE_FF +b349 cxbr RRE_FF +b34a axbr RRE_FF +b34b sxbr RRE_FF +b34c mxbr RRE_FF +b34d dxbr RRE_FF +b350 tbedr RRF_U0FF +b351 tbdr RRF_U0FF +b353 diebr RRF_FUFF +b357 fiebra RRF_UUFF +b358 thder RRE_FF +b359 thdr RRE_FF +b35b didbr RRF_FUFF +b35f fidbra RRF_UUFF +b360 lpxr RRE_FF +b361 lnxr RRE_FF +b362 ltxr RRE_FF +b363 lcxr RRE_FF +b365 lxr RRE_FF +b366 lexr RRE_FF +b367 fixr RRE_FF +b369 cxr RRE_FF +b370 lpdfr RRE_FF +b371 lndfr RRE_FF +b372 cpsdr RRF_F0FF2 +b373 lcdfr RRE_FF +b374 lzer RRE_F0 +b375 lzdr RRE_F0 +b376 lzxr RRE_F0 +b377 fier RRE_FF +b37f fidr RRE_FF +b384 sfpc RRE_RR +b385 sfasr RRE_R0 +b38c efpc RRE_RR +b390 celfbr RRF_UUFR +b391 cdlfbr RRF_UUFR +b392 cxlfbr RRF_UUFR +b394 cefbra RRF_UUFR +b395 cdfbra RRF_UUFR +b396 cxfbra RRF_UUFR +b398 cfebra RRF_UURF +b399 cfdbra RRF_UURF +b39a cfxbra RRF_UURF +b39c clfebr RRF_UURF +b39d clfdbr RRF_UURF +b39e clfxbr RRF_UURF +b3a0 celgbr RRF_UUFR +b3a1 cdlgbr RRF_UUFR +b3a2 cxlgbr RRF_UUFR +b3a4 cegbra RRF_UUFR +b3a5 cdgbra RRF_UUFR +b3a6 cxgbra RRF_UUFR +b3a8 cgebra RRF_UURF +b3a9 cgdbra RRF_UURF +b3aa cgxbra RRF_UURF +b3ac clgebr RRF_UURF +b3ad clgdbr RRF_UURF +b3ae clgxbr RRF_UURF +b3b4 cefr RRE_FR +b3b5 cdfr RRE_FR +b3b6 cxfr RRE_FR +b3b8 cfer RRF_U0RF +b3b9 cfdr RRF_U0RF +b3ba cfxr RRF_U0RF +b3c1 ldgr RRE_FR +b3c4 cegr RRE_FR +b3c5 cdgr RRE_FR +b3c6 cxgr RRE_FR +b3c8 cger RRF_U0RF +b3c9 cgdr RRF_U0RF +b3ca cgxr RRF_U0RF +b3cd lgdr RRE_RF +b3d0 mdtra RRF_FUFF2 +b3d1 ddtra RRF_FUFF2 +b3d2 adtra RRF_FUFF2 +b3d3 sdtra RRF_FUFF2 +b3d4 ldetr RRF_0UFF +b3d5 ledtr RRF_UUFF +b3d6 ltdtr RRE_FF +b3d7 fidtr RRF_UUFF +b3d8 mxtra RRF_FUFF2 +b3d9 dxtra RRF_FUFF2 +b3da axtra RRF_FUFF2 +b3db sxtra RRF_FUFF2 +b3dc lxdtr RRF_0UFF +b3dd ldxtr RRF_UUFF +b3de ltxtr RRE_FF +b3df fixtr RRF_UUFF +b3e0 kdtr RRE_FF +b3e1 cgdtra RRF_UURF +b3e2 cudtr RRE_RF +b3e3 csdtr RRF_0URF +b3e4 cdtr RRE_FF +b3e5 eedtr RRE_RF +b3e7 esdtr RRE_RF +b3e8 kxtr RRE_FF +b3e9 cgxtra RRF_UURF +b3ea cuxtr RRE_RF +b3eb csxtr RRF_0URF +b3ec cxtr RRE_FF +b3ed eextr RRE_RF +b3ef esxtr RRE_RF +b3f1 cdgtra RRF_UUFR +b3f2 cdutr RRE_FR +b3f3 cdstr RRE_FR +b3f4 cedtr RRE_FF +b3f5 qadtr RRF_FUFF +b3f6 iedtr RRF_F0FR +b3f7 rrdtr RRF_FFRU +b3f9 cxgtra RRF_UUFR +b3fa cxutr RRE_FR +b3fb cxstr RRE_FR +b3fc cextr RRE_FF +b3fd qaxtr RRF_FUFF +b3fe iextr RRF_F0FR +b3ff rrxtr RRF_FFRU +b6 stctl RS_CCRD +b7 lctl RS_CCRD +b900 lpgr RRE_RR +b901 lngr RRE_RR +b902 ltgr RRE_RR +b903 lcgr RRE_RR +b904 lgr RRE_RR +b905 lurag RRE_RR +b906 lgbr RRE_RR +b907 lghr RRE_RR +b908 agr RRE_RR +b909 sgr RRE_RR +b90a algr RRE_RR +b90b slgr RRE_RR +b90c msgr RRE_RR +b90d dsgr RRE_RR +b90e eregg RRE_RR +b90f lrvgr RRE_RR +b910 lpgfr RRE_RR +b911 lngfr RRE_RR +b912 ltgfr RRE_RR +b913 lcgfr RRE_RR +b914 lgfr RRE_RR +b916 llgfr RRE_RR +b917 llgtr RRE_RR +b918 agfr RRE_RR +b919 sgfr RRE_RR +b91a algfr RRE_RR +b91b slgfr RRE_RR +b91c msgfr RRE_RR +b91d dsgfr RRE_RR +b91e kmac RRE_RR +b91f lrvr RRE_RR +b920 cgr RRE_RR +b921 clgr RRE_RR +b925 sturg RRE_RR +b926 lbr RRE_RR +b927 lhr RRE_RR +b928 pckmo RRE_00 +b929 kma RRF_R0RR +b92a kmf RRE_RR +b92b kmo RRE_RR +b92c pcc RRE_00 +b92d kmctr RRF_R0RR +b92e km RRE_RR +b92f kmc RRE_RR +b930 cgfr RRE_RR +b931 clgfr RRE_RR +b93c ppno RRE_RR +b93e kimd RRE_RR +b93f klmd RRE_RR +b941 cfdtr RRF_UURF +b942 clgdtr RRF_UURF +b943 clfdtr RRF_UURF +b946 bctgr RRE_RR +b949 cfxtr RRF_UURF +b94a clgxtr RRF_UURF +b94b clfxtr RRF_UURF +b951 cdftr RRF_UUFR +b952 cdlgtr RRF_UUFR +b953 cdlftr RRF_UUFR +b959 cxftr RRF_UUFR +b95a cxlgtr RRF_UUFR +b95b cxlftr RRF_UUFR +b960 cgrt RRF_U0RR +b961 clgrt RRF_U0RR +b972 crt RRF_U0RR +b973 clrt RRF_U0RR +b980 ngr RRE_RR +b981 ogr RRE_RR +b982 xgr RRE_RR +b983 flogr RRE_RR +b984 llgcr RRE_RR +b985 llghr RRE_RR +b986 mlgr RRE_RR +b987 dlgr RRE_RR +b988 alcgr RRE_RR +b989 slbgr RRE_RR +b98a cspg RRE_RR +b98d epsw RRE_RR +b98e idte RRF_RURR2 +b98f crdte RRF_RURR2 +b990 trtt RRF_U0RR +b991 trto RRF_U0RR +b992 trot RRF_U0RR +b993 troo RRF_U0RR +b994 llcr RRE_RR +b995 llhr RRE_RR +b996 mlr RRE_RR +b997 dlr RRE_RR +b998 alcr RRE_RR +b999 slbr RRE_RR +b99a epair RRE_R0 +b99b esair RRE_R0 +b99d esea RRE_R0 +b99e pti RRE_RR +b99f ssair RRE_R0 +b9a1 tpei RRE_RR +b9a2 ptf RRE_R0 +b9aa lptea RRF_RURR2 +b9ac irbm RRE_RR +b9ae rrbm RRE_RR +b9af pfmf RRE_RR +b9b0 cu14 RRF_U0RR +b9b1 cu24 RRF_U0RR +b9b2 cu41 RRE_RR +b9b3 cu42 RRE_RR +b9bd trtre RRF_U0RR +b9be srstu RRE_RR +b9bf trte RRF_U0RR +b9c8 ahhhr RRF_R0RR2 +b9c9 shhhr RRF_R0RR2 +b9ca alhhhr RRF_R0RR2 +b9cb slhhhr RRF_R0RR2 +b9cd chhr RRE_RR +b9cf clhhr RRE_RR +b9d0 pcistg RRE_RR +b9d2 pcilg RRE_RR +b9d3 rpcit RRE_RR +b9d8 ahhlr RRF_R0RR2 +b9d9 shhlr RRF_R0RR2 +b9da alhhlr RRF_R0RR2 +b9db slhhlr RRF_R0RR2 +b9dd chlr RRE_RR +b9df clhlr RRE_RR +b9e0 locfhr RRF_U0RR +b9e1 popcnt RRE_RR +b9e2 locgr RRF_U0RR +b9e4 ngrk RRF_R0RR2 +b9e6 ogrk RRF_R0RR2 +b9e7 xgrk RRF_R0RR2 +b9e8 agrk RRF_R0RR2 +b9e9 sgrk RRF_R0RR2 +b9ea algrk RRF_R0RR2 +b9eb slgrk RRF_R0RR2 +b9ec mgrk RRF_R0RR2 +b9ed msgrkc RRF_R0RR2 +b9f2 locr RRF_U0RR +b9f4 nrk RRF_R0RR2 +b9f6 ork RRF_R0RR2 +b9f7 xrk RRF_R0RR2 +b9f8 ark RRF_R0RR2 +b9f9 srk RRF_R0RR2 +b9fa alrk RRF_R0RR2 +b9fb slrk RRF_R0RR2 +b9fd msrkc RRF_R0RR2 +ba cs RS_RRRD +bb cds RS_RRRD +bd clm RS_RURD +be stcm RS_RURD +bf icm RS_RURD +c00 larl RIL_RP +c01 lgfi RIL_RI +c04 brcl RIL_UP +c05 brasl RIL_RP +c06 xihf RIL_RU +c07 xilf RIL_RU +c08 iihf RIL_RU +c09 iilf RIL_RU +c0a nihf RIL_RU +c0b nilf RIL_RU +c0c oihf RIL_RU +c0d oilf RIL_RU +c0e llihf RIL_RU +c0f llilf RIL_RU +c20 msgfi RIL_RI +c21 msfi RIL_RI +c24 slgfi RIL_RU +c25 slfi RIL_RU +c28 agfi RIL_RI +c29 afi RIL_RI +c2a algfi RIL_RU +c2b alfi RIL_RU +c2c cgfi RIL_RI +c2d cfi RIL_RI +c2e clgfi RIL_RU +c2f clfi RIL_RU +c42 llhrl RIL_RP +c44 lghrl RIL_RP +c45 lhrl RIL_RP +c46 llghrl RIL_RP +c47 sthrl RIL_RP +c48 lgrl RIL_RP +c4b stgrl RIL_RP +c4c lgfrl RIL_RP +c4d lrl RIL_RP +c4e llgfrl RIL_RP +c4f strl RIL_RP +c5 bprp MII_UPP +c60 exrl RIL_RP +c62 pfdrl RIL_UP +c64 cghrl RIL_RP +c65 chrl RIL_RP +c66 clghrl RIL_RP +c67 clhrl RIL_RP +c68 cgrl RIL_RP +c6a clgrl RIL_RP +c6c cgfrl RIL_RP +c6d crl RIL_RP +c6e clgfrl RIL_RP +c6f clrl RIL_RP +c7 bpp SMI_U0RDP +c80 mvcos SSF_RRDRD +c81 ectg SSF_RRDRD +c82 csst SSF_RRDRD +c84 lpd SSF_RRDRD2 +c85 lpdg SSF_RRDRD2 +cc6 brcth RIL_RP +cc8 aih RIL_RI +cca alsih RIL_RI +ccb alsihn RIL_RI +ccd cih RIL_RI +ccf clih RIL_RU +d0 trtr SS_L0RDRD +d1 mvn SS_L0RDRD +d2 mvc SS_L0RDRD +d3 mvz SS_L0RDRD +d4 nc SS_L0RDRD +d5 clc SS_L0RDRD +d6 oc SS_L0RDRD +d7 xc SS_L0RDRD +d9 mvck SS_RRRDRD +da mvcp SS_RRRDRD +db mvcs SS_RRRDRD +dc tr SS_L0RDRD +dd trt SS_L0RDRD +de ed SS_L0RDRD +df edmk SS_L0RDRD +e1 pku SS_L2RDRD +e2 unpku SS_L0RDRD +e302 ltg RXY_RRRD +e303 lrag RXY_RRRD +e304 lg RXY_RRRD +e306 cvby RXY_RRRD +e308 ag RXY_RRRD +e309 sg RXY_RRRD +e30a alg RXY_RRRD +e30b slg RXY_RRRD +e30c msg RXY_RRRD +e30d dsg RXY_RRRD +e30e cvbg RXY_RRRD +e30f lrvg RXY_RRRD +e312 lt RXY_RRRD +e313 lray RXY_RRRD +e314 lgf RXY_RRRD +e315 lgh RXY_RRRD +e316 llgf RXY_RRRD +e317 llgt RXY_RRRD +e318 agf RXY_RRRD +e319 sgf RXY_RRRD +e31a algf RXY_RRRD +e31b slgf RXY_RRRD +e31c msgf RXY_RRRD +e31d dsgf RXY_RRRD +e31e lrv RXY_RRRD +e31f lrvh RXY_RRRD +e320 cg RXY_RRRD +e321 clg RXY_RRRD +e324 stg RXY_RRRD +e325 ntstg RXY_RRRD +e326 cvdy RXY_RRRD +e32a lzrg RXY_RRRD +e32e cvdg RXY_RRRD +e32f strvg RXY_RRRD +e330 cgf RXY_RRRD +e331 clgf RXY_RRRD +e332 ltgf RXY_RRRD +e334 cgh RXY_RRRD +e336 pfd RXY_URRD +e338 agh RXY_RRRD +e339 sgh RXY_RRRD +e33a llzrgf RXY_RRRD +e33b lzrf RXY_RRRD +e33c mgh RXY_RRRD +e33e strv RXY_RRRD +e33f strvh RXY_RRRD +e346 bctg RXY_RRRD +e347 bic RXY_URRD +e348 llgfsg RXY_RRRD +e349 stgsc RXY_RRRD +e34c lgg RXY_RRRD +e34d lgsc RXY_RRRD +e350 sty RXY_RRRD +e351 msy RXY_RRRD +e353 msc RXY_RRRD +e354 ny RXY_RRRD +e355 cly RXY_RRRD +e356 oy RXY_RRRD +e357 xy RXY_RRRD +e358 ly RXY_RRRD +e359 cy RXY_RRRD +e35a ay RXY_RRRD +e35b sy RXY_RRRD +e35c mfy RXY_RRRD +e35e aly RXY_RRRD +e35f sly RXY_RRRD +e370 sthy RXY_RRRD +e371 lay RXY_RRRD +e372 stcy RXY_RRRD +e373 icy RXY_RRRD +e375 laey RXY_RRRD +e376 lb RXY_RRRD +e377 lgb RXY_RRRD +e378 lhy RXY_RRRD +e379 chy RXY_RRRD +e37a ahy RXY_RRRD +e37b shy RXY_RRRD +e37c mhy RXY_RRRD +e380 ng RXY_RRRD +e381 og RXY_RRRD +e382 xg RXY_RRRD +e383 msgc RXY_RRRD +e384 mg RXY_RRRD +e385 lgat RXY_RRRD +e386 mlg RXY_RRRD +e387 dlg RXY_RRRD +e388 alcg RXY_RRRD +e389 slbg RXY_RRRD +e38e stpq RXY_RRRD +e38f lpq RXY_RRRD +e390 llgc RXY_RRRD +e391 llgh RXY_RRRD +e394 llc RXY_RRRD +e395 llh RXY_RRRD +e396 ml RXY_RRRD +e397 dl RXY_RRRD +e398 alc RXY_RRRD +e399 slb RXY_RRRD +e39c llgtat RXY_RRRD +e39d llgfat RXY_RRRD +e39f lat RXY_RRRD +e3c0 lbh RXY_RRRD +e3c2 llch RXY_RRRD +e3c3 stch RXY_RRRD +e3c4 lhh RXY_RRRD +e3c6 llhh RXY_RRRD +e3c7 sthh RXY_RRRD +e3c8 lfhat RXY_RRRD +e3ca lfh RXY_RRRD +e3cb stfh RXY_RRRD +e3cd chf RXY_RRRD +e3cf clhf RXY_RRRD +e3d0 mpcifc RXY_RRRD +e3d4 stpcifc RXY_RRRD +e500 lasp SSE_RDRD +e501 tprot SSE_RDRD +e502 strag SSE_RDRD +e50e mvcsk SSE_RDRD +e50f mvcdk SSE_RDRD +e544 mvhhi SIL_RDI +e548 mvghi SIL_RDI +e54c mvhi SIL_RDI +e554 chhsi SIL_RDI +e555 clhhsi SIL_RDU +e558 cghsi SIL_RDI +e559 clghsi SIL_RDU +e55c chsi SIL_RDI +e55d clfhsi SIL_RDU +e560 tbegin SIL_RDU +e561 tbeginc SIL_RDU +e634 vpkz VSI_URDV +e635 vlrl VSI_URDV +e637 vlrlr VRS_RRDV +e63c vupkz VSI_URDV +e63d vstrl VSI_URDV +e63f vstrlr VRS_RRDV +e649 vlip VRI_V0UU2 +e650 vcvb VRR_RV0U +e652 vcvbg VRR_RV0U +e658 vcvd VRI_VR0UU +e659 vsrp VRI_VVUUU2 +e65a vcvdg VRI_VR0UU +e65b vpsop VRI_VVUUU2 +e65f vtp VRR_0V +e671 vap VRI_VVV0UU2 +e673 vsp VRI_VVV0UU2 +e677 vcp VRR_0VV0U +e678 vmp VRI_VVV0UU2 +e679 vmsp VRI_VVV0UU2 +e67a vdp VRI_VVV0UU2 +e67b vrp VRI_VVV0UU2 +e67e vsdp VRI_VVV0UU2 +e700 vleb VRX_VRRDU +e701 vleh VRX_VRRDU +e702 vleg VRX_VRRDU +e703 vlef VRX_VRRDU +e704 vllez VRX_VRRDU +e705 vlrep VRX_VRRDU +e706 vl VRX_VRRD +e707 vlbb VRX_VRRDU +e708 vsteb VRX_VRRDU +e709 vsteh VRX_VRRDU +e70a vsteg VRX_VRRDU +e70b vstef VRX_VRRDU +e70e vst VRX_VRRD +e712 vgeg VRV_VVXRDU +e713 vgef VRV_VVXRDU +e71a vsceg VRV_VVXRDU +e71b vscef VRV_VVXRDU +e721 vlgv VRS_RVRDU +e722 vlvg VRS_VRRDU +e727 lcbb RXE_RRRDU +e730 vesl VRS_VVRDU +e733 verll VRS_VVRDU +e736 vlm VRS_VVRD +e737 vll VRS_VRRD +e738 vesrl VRS_VVRDU +e73a vesra VRS_VVRDU +e73e vstm VRS_VVRD +e73f vstl VRS_VRRD +e740 vleib VRI_V0IU +e741 vleih VRI_V0IU +e742 vleig VRI_V0IU +e743 vleif VRI_V0IU +e744 vgbm VRI_V0U +e745 vrepi VRI_V0IU +e746 vgm VRI_V0UUU +e74a vftci VRI_VVUUU +e74d vrep VRI_VVUU +e750 vpopct VRR_VV0U +e752 vctz VRR_VV0U +e753 vclz VRR_VV0U +e756 vlr VRX_VV +e75c vistr VRR_VV0U0U +e75f vseg VRR_VV0U +e760 vmrl VRR_VVV0U +e761 vmrh VRR_VVV0U +e762 vlvgp VRR_VRR +e764 vsum VRR_VVV0U +e765 vsumg VRR_VVV0U +e766 vcksm VRR_VVV +e767 vsumq VRR_VVV0U +e768 vn VRR_VVV +e769 vnc VRR_VVV +e76a vo VRR_VVV +e76b vno VRR_VVV +e76c vnx VRR_VVV +e76d vx VRR_VVV +e76e vnn VRR_VVV +e76f voc VRR_VVV +e770 veslv VRR_VVV0U +e772 verim VRI_VVV0UU +e773 verllv VRR_VVV0U +e774 vsl VRR_VVV +e775 vslb VRR_VVV +e777 vsldb VRI_VVV0U +e778 vesrlv VRR_VVV0U +e77a vesrav VRR_VVV0U +e77c vsrl VRR_VVV +e77d vsrlb VRR_VVV +e77e vsra VRR_VVV +e77f vsrab VRR_VVV +e780 vfee VRR_VVV0U0U +e781 vfene VRR_VVV0U0U +e782 vfae VRR_VVV0U0U +e784 vpdi VRR_VVV0U +e785 vbperm VRR_VVV +e78a vstrc VRR_VVVUU0V +e78c vperm VRR_VVV0V +e78d vsel VRR_VVV0V +e78e vfms VRR_VVVU0UV +e78f vfma VRR_VVVU0UV +e794 vpk VRR_VVV0U +e795 vpkls VRR_VVV0U0U +e797 vpks VRR_VVV0U0U +e79e vfnms VRR_VVVU0UV +e79f vfnma VRR_VVVU0UV +e7a1 vmlh VRR_VVV0U +e7a2 vml VRR_VVV0U +e7a3 vmh VRR_VVV0U +e7a4 vmle VRR_VVV0U +e7a5 vmlo VRR_VVV0U +e7a6 vme VRR_VVV0U +e7a7 vmo VRR_VVV0U +e7a9 vmalh VRR_VVVU0V +e7aa vmal VRR_VVVU0V +e7ab vmah VRR_VVVU0V +e7ac vmale VRR_VVVU0V +e7ad vmalo VRR_VVVU0V +e7ae vmae VRR_VVVU0V +e7af vmao VRR_VVVU0V +e7b4 vgfm VRR_VVV0U +e7b8 vmsl VRR_VVVUU0V +e7b9 vaccc VRR_VVVU0V +e7bb vac VRR_VVVU0V +e7bc vgfma VRR_VVVU0V +e7bd vsbcbi VRR_VVVU0V +e7bf vsbi VRR_VVVU0V +e7c0 vclgd VRR_VV0UUU +e7c1 vcdlg VRR_VV0UUU +e7c2 vcgd VRR_VV0UUU +e7c3 vcdg VRR_VV0UUU +e7c4 vlde VRR_VV0UU2 +e7c5 vled VRR_VV0UUU +e7c7 vfi VRR_VV0UUU +e7ca wfk VRR_VV0UU2 +e7cb wfc VRR_VV0UU2 +e7cc vfpso VRR_VV0UUU +e7ce vfsq VRR_VV0UU2 +e7d4 vupll VRR_VV0U +e7d5 vuplh VRR_VV0U +e7d6 vupl VRR_VV0U +e7d7 vuph VRR_VV0U +e7d8 vtm VRR_VV +e7d9 vecl VRR_VV0U +e7db vec VRR_VV0U +e7de vlc VRR_VV0U +e7df vlp VRR_VV0U +e7e2 vfs VRR_VVV0UU +e7e3 vfa VRR_VVV0UU +e7e5 vfd VRR_VVV0UU +e7e7 vfm VRR_VVV0UU +e7e8 vfce VRR_VVV0UUU +e7ea vfche VRR_VVV0UUU +e7eb vfch VRR_VVV0UUU +e7ee vfmin VRR_VVV0UUU +e7ef vfmax VRR_VVV0UUU +e7f0 vavgl VRR_VVV0U +e7f1 vacc VRR_VVV0U +e7f2 vavg VRR_VVV0U +e7f3 va VRR_VVV0U +e7f5 vscbi VRR_VVV0U +e7f7 vs VRR_VVV0U +e7f8 vceq VRR_VVV0U0U +e7f9 vchl VRR_VVV0U0U +e7fb vch VRR_VVV0U0U +e7fc vmnl VRR_VVV0U +e7fd vmxl VRR_VVV0U +e7fe vmn VRR_VVV0U +e7ff vmx VRR_VVV0U +e8 mvcin SS_L0RDRD +e9 pka SS_L2RDRD +ea unpka SS_L0RDRD +eb04 lmg RSY_RRRD +eb0a srag RSY_RRRD +eb0b slag RSY_RRRD +eb0c srlg RSY_RRRD +eb0d sllg RSY_RRRD +eb0f tracg RSY_RRRD +eb14 csy RSY_RRRD +eb17 stcctm RSY_RURD +eb1c rllg RSY_RRRD +eb1d rll RSY_RRRD +eb20 clmh RSY_RURD +eb21 clmy RSY_RURD +eb23 clt RSY_RURD +eb24 stmg RSY_RRRD +eb25 stctg RSY_CCRD +eb26 stmh RSY_RRRD +eb2b clgt RSY_RURD +eb2c stcmh RSY_RURD +eb2d stcmy RSY_RURD +eb2f lctlg RSY_CCRD +eb30 csg RSY_RRRD +eb31 cdsy RSY_RRRD +eb3e cdsg RSY_RRRD +eb44 bxhg RSY_RRRD +eb45 bxleg RSY_RRRD +eb4c ecag RSY_RRRD +eb51 tmy SIY_URD +eb52 mviy SIY_URD +eb54 niy SIY_URD +eb55 cliy SIY_URD +eb56 oiy SIY_URD +eb57 xiy SIY_URD +eb60 lric RSY_RDRU +eb61 stric RSY_RDRU +eb62 mric RSY_RDRU +eb6a asi SIY_IRD +eb6e alsi SIY_IRD +eb7a agsi SIY_IRD +eb7e algsi SIY_IRD +eb80 icmh RSY_RURD +eb81 icmy RSY_RURD +eb8e mvclu RSY_RRRD +eb8f clclu RSY_RRRD +eb90 stmy RSY_RRRD +eb96 lmh RSY_RRRD +eb98 lmy RSY_RRRD +eb9a lamy RSY_AARD +eb9b stamy RSY_AARD +ebc0 tp RSL_R0RD +ebd0 pcistb RSY_RRRD +ebd1 sic RSY_RRRD +ebdc srak RSY_RRRD +ebdd slak RSY_RRRD +ebde srlk RSY_RRRD +ebdf sllk RSY_RRRD +ebe0 locfh RSY_RURD2 +ebe1 stocfh RSY_RURD2 +ebe2 locg RSY_RURD2 +ebe3 stocg RSY_RURD2 +ebe4 lang RSY_RRRD +ebe6 laog RSY_RRRD +ebe7 laxg RSY_RRRD +ebe8 laag RSY_RRRD +ebea laalg RSY_RRRD +ebf2 loc RSY_RURD2 +ebf3 stoc RSY_RURD2 +ebf4 lan RSY_RRRD +ebf6 lao RSY_RRRD +ebf7 lax RSY_RRRD +ebf8 laa RSY_RRRD +ebfa laal RSY_RRRD +ec42 lochi RIE_RUI0 +ec44 brxhg RIE_RRP +ec45 brxlg RIE_RRP +ec46 locghi RIE_RUI0 +ec4e lochhi RIE_RUI0 +ec51 risblg RIE_RRUUU +ec54 rnsbg RIE_RRUUU +ec55 risbg RIE_RRUUU +ec56 rosbg RIE_RRUUU +ec57 rxsbg RIE_RRUUU +ec59 risbgn RIE_RRUUU +ec5d risbhg RIE_RRUUU +ec64 cgrj RIE_RRPU +ec65 clgrj RIE_RRPU +ec70 cgit RIE_R0IU +ec71 clgit RIE_R0UU +ec72 cit RIE_R0IU +ec73 clfit RIE_R0UU +ec76 crj RIE_RRPU +ec77 clrj RIE_RRPU +ec7c cgij RIE_RUPI +ec7d clgij RIE_RUPU +ec7e cij RIE_RUPI +ec7f clij RIE_RUPU +ecd8 ahik RIE_RRI0 +ecd9 aghik RIE_RRI0 +ecda alhsik RIE_RRI0 +ecdb alghsik RIE_RRI0 +ece4 cgrb RRS_RRRDU +ece5 clgrb RRS_RRRDU +ecf6 crb RRS_RRRDU +ecf7 clrb RRS_RRRDU +ecfc cgib RIS_RURDI +ecfd clgib RIS_RURDU +ecfe cib RIS_RURDI +ecff clib RIS_RURDU +ed04 ldeb RXE_FRRD +ed05 lxdb RXE_FRRD +ed06 lxeb RXE_FRRD +ed07 mxdb RXE_FRRD +ed08 keb RXE_FRRD +ed09 ceb RXE_FRRD +ed0a aeb RXE_FRRD +ed0b seb RXE_FRRD +ed0c mdeb RXE_FRRD +ed0d deb RXE_FRRD +ed0e maeb RXF_FRRDF +ed0f mseb RXF_FRRDF +ed10 tceb RXE_FRRD +ed11 tcdb RXE_FRRD +ed12 tcxb RXE_FRRD +ed14 sqeb RXE_FRRD +ed15 sqdb RXE_FRRD +ed17 meeb RXE_FRRD +ed18 kdb RXE_FRRD +ed19 cdb RXE_FRRD +ed1a adb RXE_FRRD +ed1b sdb RXE_FRRD +ed1c mdb RXE_FRRD +ed1d ddb RXE_FRRD +ed1e madb RXF_FRRDF +ed1f msdb RXF_FRRDF +ed24 lde RXE_FRRD +ed25 lxd RXE_FRRD +ed26 lxe RXE_FRRD +ed2e mae RXF_FRRDF +ed2f mse RXF_FRRDF +ed34 sqe RXE_FRRD +ed35 sqd RXE_FRRD +ed37 mee RXE_FRRD +ed38 mayl RXF_FRRDF +ed39 myl RXF_FRRDF +ed3a may RXF_FRRDF +ed3b my RXF_FRRDF +ed3c mayh RXF_FRRDF +ed3d myh RXF_FRRDF +ed3e mad RXF_FRRDF +ed3f msd RXF_FRRDF +ed40 sldt RXF_FRRDF +ed41 srdt RXF_FRRDF +ed48 slxt RXF_FRRDF +ed49 srxt RXF_FRRDF +ed50 tdcet RXE_FRRD +ed51 tdget RXE_FRRD +ed54 tdcdt RXE_FRRD +ed55 tdgdt RXE_FRRD +ed58 tdcxt RXE_FRRD +ed59 tdgxt RXE_FRRD +ed64 ley RXY_FRRD +ed65 ldy RXY_FRRD +ed66 stey RXY_FRRD +ed67 stdy RXY_FRRD +eda8 czdt RSL_LRDFU +eda9 czxt RSL_LRDFU +edaa cdzt RSL_LRDFU +edab cxzt RSL_LRDFU +edac cpdt RSL_LRDFU +edad cpxt RSL_LRDFU +edae cdpt RSL_LRDFU +edaf cxpt RSL_LRDFU +ee plo SS_RRRDRD2 +ef lmd SS_RRRDRD3 +f0 srp SS_LIRDRD +f1 mvo SS_LLRDRD +f2 pack SS_LLRDRD +f3 unpk SS_LLRDRD +f8 zap SS_LLRDRD +f9 cp SS_LLRDRD +fa ap SS_LLRDRD +fb sp SS_LLRDRD +fc mp SS_LLRDRD +fd dp SS_LLRDRD diff --git a/arch/sh/include/asm/spinlock-cas.h b/arch/sh/include/asm/spinlock-cas.h index 5ed7dbbd94ff..270ee4d3e25b 100644 --- a/arch/sh/include/asm/spinlock-cas.h +++ b/arch/sh/include/asm/spinlock-cas.h @@ -27,7 +27,6 @@ static inline unsigned __sl_cas(volatile unsigned *p, unsigned old, unsigned new */ #define arch_spin_is_locked(x) ((x)->lock <= 0) -#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) static inline void arch_spin_lock(arch_spinlock_t *lock) { @@ -53,18 +52,6 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) * read-locks. */ -/** - * read_can_lock - would read_trylock() succeed? - * @lock: the rwlock in question. - */ -#define arch_read_can_lock(x) ((x)->lock > 0) - -/** - * write_can_lock - would write_trylock() succeed? - * @lock: the rwlock in question. - */ -#define arch_write_can_lock(x) ((x)->lock == RW_LOCK_BIAS) - static inline void arch_read_lock(arch_rwlock_t *rw) { unsigned old; @@ -102,11 +89,4 @@ static inline int arch_write_trylock(arch_rwlock_t *rw) return __sl_cas(&rw->lock, RW_LOCK_BIAS, 0) == RW_LOCK_BIAS; } -#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) -#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) - -#define arch_spin_relax(lock) cpu_relax() -#define arch_read_relax(lock) cpu_relax() -#define arch_write_relax(lock) cpu_relax() - #endif /* __ASM_SH_SPINLOCK_CAS_H */ diff --git a/arch/sh/include/asm/spinlock-llsc.h b/arch/sh/include/asm/spinlock-llsc.h index f77263aae760..715595de286a 100644 --- a/arch/sh/include/asm/spinlock-llsc.h +++ b/arch/sh/include/asm/spinlock-llsc.h @@ -19,7 +19,6 @@ */ #define arch_spin_is_locked(x) ((x)->lock <= 0) -#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) /* * Simple spin lock operations. There are two variants, one clears IRQ's @@ -89,18 +88,6 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) * read-locks. */ -/** - * read_can_lock - would read_trylock() succeed? - * @lock: the rwlock in question. - */ -#define arch_read_can_lock(x) ((x)->lock > 0) - -/** - * write_can_lock - would write_trylock() succeed? - * @lock: the rwlock in question. - */ -#define arch_write_can_lock(x) ((x)->lock == RW_LOCK_BIAS) - static inline void arch_read_lock(arch_rwlock_t *rw) { unsigned long tmp; @@ -209,11 +196,4 @@ static inline int arch_write_trylock(arch_rwlock_t *rw) return (oldval > (RW_LOCK_BIAS - 1)); } -#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) -#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) - -#define arch_spin_relax(lock) cpu_relax() -#define arch_read_relax(lock) cpu_relax() -#define arch_write_relax(lock) cpu_relax() - #endif /* __ASM_SH_SPINLOCK_LLSC_H */ diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h index 0c3b3b4a9963..d13ce517f4b9 100644 --- a/arch/sparc/include/asm/atomic_32.h +++ b/arch/sparc/include/asm/atomic_32.h @@ -32,7 +32,7 @@ void atomic_set(atomic_t *, int); #define atomic_set_release(v, i) atomic_set((v), (i)) -#define atomic_read(v) ACCESS_ONCE((v)->counter) +#define atomic_read(v) READ_ONCE((v)->counter) #define atomic_add(i, v) ((void)atomic_add_return( (int)(i), (v))) #define atomic_sub(i, v) ((void)atomic_add_return(-(int)(i), (v))) diff --git a/arch/sparc/include/asm/ptrace.h b/arch/sparc/include/asm/ptrace.h index 6a339a78f4f4..71dd82b43cc5 100644 --- a/arch/sparc/include/asm/ptrace.h +++ b/arch/sparc/include/asm/ptrace.h @@ -7,6 +7,7 @@ #if defined(__sparc__) && defined(__arch64__) #ifndef __ASSEMBLY__ +#include <linux/compiler.h> #include <linux/threads.h> #include <asm/switch_to.h> diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h index 26f00ac2b470..bc5aa6f61676 100644 --- a/arch/sparc/include/asm/spinlock_32.h +++ b/arch/sparc/include/asm/spinlock_32.h @@ -183,17 +183,6 @@ static inline int __arch_read_trylock(arch_rwlock_t *rw) res; \ }) -#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) -#define arch_read_lock_flags(rw, flags) arch_read_lock(rw) -#define arch_write_lock_flags(rw, flags) arch_write_lock(rw) - -#define arch_spin_relax(lock) cpu_relax() -#define arch_read_relax(lock) cpu_relax() -#define arch_write_relax(lock) cpu_relax() - -#define arch_read_can_lock(rw) (!((rw)->lock & 0xff)) -#define arch_write_can_lock(rw) (!(rw)->lock) - #endif /* !(__ASSEMBLY__) */ #endif /* __SPARC_SPINLOCK_H */ diff --git a/arch/sparc/include/asm/spinlock_64.h b/arch/sparc/include/asm/spinlock_64.h index 4822a7e94a30..7fc82a233f49 100644 --- a/arch/sparc/include/asm/spinlock_64.h +++ b/arch/sparc/include/asm/spinlock_64.h @@ -14,13 +14,6 @@ #include <asm/qrwlock.h> #include <asm/qspinlock.h> -#define arch_read_lock_flags(p, f) arch_read_lock(p) -#define arch_write_lock_flags(p, f) arch_write_lock(p) - -#define arch_spin_relax(lock) cpu_relax() -#define arch_read_relax(lock) cpu_relax() -#define arch_write_relax(lock) cpu_relax() - #endif /* !(__ASSEMBLY__) */ #endif /* !(__SPARC64_SPINLOCK_H) */ diff --git a/arch/sparc/kernel/led.c b/arch/sparc/kernel/led.c index e278bf52963b..519f5ba7ed7e 100644 --- a/arch/sparc/kernel/led.c +++ b/arch/sparc/kernel/led.c @@ -31,19 +31,20 @@ static inline void led_toggle(void) } static struct timer_list led_blink_timer; +static unsigned long led_blink_timer_timeout; -static void led_blink(unsigned long timeout) +static void led_blink(struct timer_list *unused) { + unsigned long timeout = led_blink_timer_timeout; + led_toggle(); /* reschedule */ if (!timeout) { /* blink according to load */ led_blink_timer.expires = jiffies + ((1 + (avenrun[0] >> FSHIFT)) * HZ); - led_blink_timer.data = 0; } else { /* blink at user specified interval */ led_blink_timer.expires = jiffies + (timeout * HZ); - led_blink_timer.data = timeout; } add_timer(&led_blink_timer); } @@ -88,9 +89,11 @@ static ssize_t led_proc_write(struct file *file, const char __user *buffer, } else if (!strcmp(buf, "toggle")) { led_toggle(); } else if ((*buf > '0') && (*buf <= '9')) { - led_blink(simple_strtoul(buf, NULL, 10)); + led_blink_timer_timeout = simple_strtoul(buf, NULL, 10); + led_blink(&led_blink_timer); } else if (!strcmp(buf, "load")) { - led_blink(0); + led_blink_timer_timeout = 0; + led_blink(&led_blink_timer); } else { auxio_set_led(AUXIO_LED_OFF); } @@ -115,8 +118,7 @@ static struct proc_dir_entry *led; static int __init led_init(void) { - init_timer(&led_blink_timer); - led_blink_timer.function = led_blink; + timer_setup(&led_blink_timer, led_blink, 0); led = proc_create("led", 0, NULL, &led_proc_fops); if (!led) diff --git a/arch/tile/gxio/dma_queue.c b/arch/tile/gxio/dma_queue.c index baa60357f8ba..b7ba577d82ca 100644 --- a/arch/tile/gxio/dma_queue.c +++ b/arch/tile/gxio/dma_queue.c @@ -163,14 +163,14 @@ int __gxio_dma_queue_is_complete(__gxio_dma_queue_t *dma_queue, int64_t completion_slot, int update) { if (update) { - if (ACCESS_ONCE(dma_queue->hw_complete_count) > + if (READ_ONCE(dma_queue->hw_complete_count) > completion_slot) return 1; __gxio_dma_queue_update_credits(dma_queue); } - return ACCESS_ONCE(dma_queue->hw_complete_count) > completion_slot; + return READ_ONCE(dma_queue->hw_complete_count) > completion_slot; } EXPORT_SYMBOL_GPL(__gxio_dma_queue_is_complete); diff --git a/arch/tile/include/asm/spinlock_32.h b/arch/tile/include/asm/spinlock_32.h index cba8ba9b8da6..fb5313d77315 100644 --- a/arch/tile/include/asm/spinlock_32.h +++ b/arch/tile/include/asm/spinlock_32.h @@ -51,9 +51,6 @@ static inline int arch_spin_is_locked(arch_spinlock_t *lock) void arch_spin_lock(arch_spinlock_t *lock); -/* We cannot take an interrupt after getting a ticket, so don't enable them. */ -#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) - int arch_spin_trylock(arch_spinlock_t *lock); static inline void arch_spin_unlock(arch_spinlock_t *lock) @@ -80,22 +77,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) #define _RD_COUNT_WIDTH 8 /** - * arch_read_can_lock() - would read_trylock() succeed? - */ -static inline int arch_read_can_lock(arch_rwlock_t *rwlock) -{ - return (rwlock->lock << _RD_COUNT_WIDTH) == 0; -} - -/** - * arch_write_can_lock() - would write_trylock() succeed? - */ -static inline int arch_write_can_lock(arch_rwlock_t *rwlock) -{ - return rwlock->lock == 0; -} - -/** * arch_read_lock() - acquire a read lock. */ void arch_read_lock(arch_rwlock_t *rwlock); @@ -125,7 +106,4 @@ void arch_read_unlock(arch_rwlock_t *rwlock); */ void arch_write_unlock(arch_rwlock_t *rwlock); -#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) -#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) - #endif /* _ASM_TILE_SPINLOCK_32_H */ diff --git a/arch/tile/include/asm/spinlock_64.h b/arch/tile/include/asm/spinlock_64.h index 9a2c2d605752..5b616ef642a8 100644 --- a/arch/tile/include/asm/spinlock_64.h +++ b/arch/tile/include/asm/spinlock_64.h @@ -75,9 +75,6 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) /* Try to get the lock, and return whether we succeeded. */ int arch_spin_trylock(arch_spinlock_t *lock); -/* We cannot take an interrupt after getting a ticket, so don't enable them. */ -#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) - /* * Read-write spinlocks, allowing multiple readers * but only one writer. @@ -93,24 +90,6 @@ static inline int arch_write_val_locked(int val) return val < 0; /* Optimize "val & __WRITE_LOCK_BIT". */ } -/** - * read_can_lock - would read_trylock() succeed? - * @lock: the rwlock in question. - */ -static inline int arch_read_can_lock(arch_rwlock_t *rw) -{ - return !arch_write_val_locked(rw->lock); -} - -/** - * write_can_lock - would write_trylock() succeed? - * @lock: the rwlock in question. - */ -static inline int arch_write_can_lock(arch_rwlock_t *rw) -{ - return rw->lock == 0; -} - extern void __read_lock_failed(arch_rwlock_t *rw); static inline void arch_read_lock(arch_rwlock_t *rw) @@ -156,7 +135,4 @@ static inline int arch_write_trylock(arch_rwlock_t *rw) return 0; } -#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) -#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) - #endif /* _ASM_TILE_SPINLOCK_64_H */ diff --git a/arch/tile/include/gxio/dma_queue.h b/arch/tile/include/gxio/dma_queue.h index b9e45e37649e..c8fd47edba30 100644 --- a/arch/tile/include/gxio/dma_queue.h +++ b/arch/tile/include/gxio/dma_queue.h @@ -121,7 +121,7 @@ static inline int64_t __gxio_dma_queue_reserve(__gxio_dma_queue_t *dma_queue, * if the result is LESS than "hw_complete_count". */ uint64_t complete; - complete = ACCESS_ONCE(dma_queue->hw_complete_count); + complete = READ_ONCE(dma_queue->hw_complete_count); slot |= (complete & 0xffffffffff000000); if (slot < complete) slot += 0x1000000; diff --git a/arch/tile/kernel/ptrace.c b/arch/tile/kernel/ptrace.c index e1a078e6828e..d516d61751c2 100644 --- a/arch/tile/kernel/ptrace.c +++ b/arch/tile/kernel/ptrace.c @@ -255,7 +255,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, int do_syscall_trace_enter(struct pt_regs *regs) { - u32 work = ACCESS_ONCE(current_thread_info()->flags); + u32 work = READ_ONCE(current_thread_info()->flags); if ((work & _TIF_SYSCALL_TRACE) && tracehook_report_syscall_entry(regs)) { diff --git a/arch/um/include/shared/init.h b/arch/um/include/shared/init.h index 390572daa40d..b3f5865a92c9 100644 --- a/arch/um/include/shared/init.h +++ b/arch/um/include/shared/init.h @@ -41,7 +41,7 @@ typedef int (*initcall_t)(void); typedef void (*exitcall_t)(void); -#include <linux/compiler.h> +#include <linux/compiler_types.h> /* These are for everybody (although not all archs will actually discard it in modules) */ diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2fdb23313dd5..f08977d82ca0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -56,7 +56,7 @@ config X86 select ARCH_HAS_KCOV if X86_64 select ARCH_HAS_PMEM_API if X86_64 # Causing hangs/crashes, see the commit that added this change for details. - select ARCH_HAS_REFCOUNT if BROKEN + select ARCH_HAS_REFCOUNT select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 select ARCH_HAS_SET_MEMORY select ARCH_HAS_SG_CHAIN @@ -93,8 +93,10 @@ config X86 select GENERIC_FIND_FIRST_BIT select GENERIC_IOMAP select GENERIC_IRQ_EFFECTIVE_AFF_MASK if SMP + select GENERIC_IRQ_MATRIX_ALLOCATOR if X86_LOCAL_APIC select GENERIC_IRQ_MIGRATION if SMP select GENERIC_IRQ_PROBE + select GENERIC_IRQ_RESERVATION_MODE select GENERIC_IRQ_SHOW select GENERIC_PENDING_IRQ if SMP select GENERIC_SMP_IDLE_THREAD @@ -171,7 +173,7 @@ config X86 select HAVE_PERF_USER_STACK_DUMP select HAVE_RCU_TABLE_FREE select HAVE_REGS_AND_STACK_ACCESS_API - select HAVE_RELIABLE_STACKTRACE if X86_64 && FRAME_POINTER_UNWINDER && STACK_VALIDATION + select HAVE_RELIABLE_STACKTRACE if X86_64 && UNWINDER_FRAME_POINTER && STACK_VALIDATION select HAVE_STACK_VALIDATION if X86_64 select HAVE_SYSCALL_TRACEPOINTS select HAVE_UNSTABLE_SCHED_CLOCK @@ -303,7 +305,6 @@ config ARCH_SUPPORTS_DEBUG_PAGEALLOC config KASAN_SHADOW_OFFSET hex depends on KASAN - default 0xdff8000000000000 if X86_5LEVEL default 0xdffffc0000000000 config HAVE_INTEL_TXT @@ -1803,6 +1804,16 @@ config X86_SMAP If unsure, say Y. +config X86_INTEL_UMIP + def_bool n + depends on CPU_SUP_INTEL + prompt "Intel User Mode Instruction Prevention" if EXPERT + ---help--- + The User Mode Instruction Prevention (UMIP) is a security + feature in newer Intel processors. If enabled, a general + protection fault is issued if the instructions SGDT, SLDT, + SIDT, SMSW and STR are executed in user mode. + config X86_INTEL_MPX prompt "Intel MPX (Memory Protection Extensions)" def_bool n diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 90b123056f4b..6293a8768a91 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -359,28 +359,14 @@ config PUNIT_ATOM_DEBUG choice prompt "Choose kernel unwinder" - default FRAME_POINTER_UNWINDER + default UNWINDER_ORC if X86_64 + default UNWINDER_FRAME_POINTER if X86_32 ---help--- This determines which method will be used for unwinding kernel stack traces for panics, oopses, bugs, warnings, perf, /proc/<pid>/stack, livepatch, lockdep, and more. -config FRAME_POINTER_UNWINDER - bool "Frame pointer unwinder" - select FRAME_POINTER - ---help--- - This option enables the frame pointer unwinder for unwinding kernel - stack traces. - - The unwinder itself is fast and it uses less RAM than the ORC - unwinder, but the kernel text size will grow by ~3% and the kernel's - overall performance will degrade by roughly 5-10%. - - This option is recommended if you want to use the livepatch - consistency model, as this is currently the only way to get a - reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE). - -config ORC_UNWINDER +config UNWINDER_ORC bool "ORC unwinder" depends on X86_64 select STACK_VALIDATION @@ -396,7 +382,22 @@ config ORC_UNWINDER Enabling this option will increase the kernel's runtime memory usage by roughly 2-4MB, depending on your kernel config. -config GUESS_UNWINDER +config UNWINDER_FRAME_POINTER + bool "Frame pointer unwinder" + select FRAME_POINTER + ---help--- + This option enables the frame pointer unwinder for unwinding kernel + stack traces. + + The unwinder itself is fast and it uses less RAM than the ORC + unwinder, but the kernel text size will grow by ~3% and the kernel's + overall performance will degrade by roughly 5-10%. + + This option is recommended if you want to use the livepatch + consistency model, as this is currently the only way to get a + reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE). + +config UNWINDER_GUESS bool "Guess unwinder" depends on EXPERT ---help--- @@ -411,7 +412,7 @@ config GUESS_UNWINDER endchoice config FRAME_POINTER - depends on !ORC_UNWINDER && !GUESS_UNWINDER + depends on !UNWINDER_ORC && !UNWINDER_GUESS bool endmenu diff --git a/arch/x86/boot/.gitignore b/arch/x86/boot/.gitignore index e3cf9f682be5..09d25dd09307 100644 --- a/arch/x86/boot/.gitignore +++ b/arch/x86/boot/.gitignore @@ -7,3 +7,6 @@ zoffset.h setup setup.bin setup.elf +fdimage +mtools.conf +image.iso diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index d88a2fddba8c..9b5adae9cc40 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -123,63 +123,26 @@ image_cmdline = default linux $(FDARGS) $(if $(FDINITRD),initrd=initrd.img,) $(obj)/mtools.conf: $(src)/mtools.conf.in sed -e 's|@OBJ@|$(obj)|g' < $< > $@ +quiet_cmd_genimage = GENIMAGE $3 +cmd_genimage = sh $(srctree)/$(src)/genimage.sh $2 $3 $(obj)/bzImage \ + $(obj)/mtools.conf '$(image_cmdline)' $(FDINITRD) + # This requires write access to /dev/fd0 bzdisk: $(obj)/bzImage $(obj)/mtools.conf - MTOOLSRC=$(obj)/mtools.conf mformat a: ; sync - syslinux /dev/fd0 ; sync - echo '$(image_cmdline)' | \ - MTOOLSRC=$(src)/mtools.conf mcopy - a:syslinux.cfg - if [ -f '$(FDINITRD)' ] ; then \ - MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' a:initrd.img ; \ - fi - MTOOLSRC=$(obj)/mtools.conf mcopy $(obj)/bzImage a:linux ; sync + $(call cmd,genimage,bzdisk,/dev/fd0) # These require being root or having syslinux 2.02 or higher installed fdimage fdimage144: $(obj)/bzImage $(obj)/mtools.conf - dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=1440 - MTOOLSRC=$(obj)/mtools.conf mformat v: ; sync - syslinux $(obj)/fdimage ; sync - echo '$(image_cmdline)' | \ - MTOOLSRC=$(obj)/mtools.conf mcopy - v:syslinux.cfg - if [ -f '$(FDINITRD)' ] ; then \ - MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' v:initrd.img ; \ - fi - MTOOLSRC=$(obj)/mtools.conf mcopy $(obj)/bzImage v:linux ; sync + $(call cmd,genimage,fdimage144,$(obj)/fdimage) + @$(kecho) 'Kernel: $(obj)/fdimage is ready' fdimage288: $(obj)/bzImage $(obj)/mtools.conf - dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=2880 - MTOOLSRC=$(obj)/mtools.conf mformat w: ; sync - syslinux $(obj)/fdimage ; sync - echo '$(image_cmdline)' | \ - MTOOLSRC=$(obj)/mtools.conf mcopy - w:syslinux.cfg - if [ -f '$(FDINITRD)' ] ; then \ - MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' w:initrd.img ; \ - fi - MTOOLSRC=$(obj)/mtools.conf mcopy $(obj)/bzImage w:linux ; sync + $(call cmd,genimage,fdimage288,$(obj)/fdimage) + @$(kecho) 'Kernel: $(obj)/fdimage is ready' isoimage: $(obj)/bzImage - -rm -rf $(obj)/isoimage - mkdir $(obj)/isoimage - for i in lib lib64 share end ; do \ - if [ -f /usr/$$i/syslinux/isolinux.bin ] ; then \ - cp /usr/$$i/syslinux/isolinux.bin $(obj)/isoimage ; \ - if [ -f /usr/$$i/syslinux/ldlinux.c32 ]; then \ - cp /usr/$$i/syslinux/ldlinux.c32 $(obj)/isoimage ; \ - fi ; \ - break ; \ - fi ; \ - if [ $$i = end ] ; then exit 1 ; fi ; \ - done - cp $(obj)/bzImage $(obj)/isoimage/linux - echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg - if [ -f '$(FDINITRD)' ] ; then \ - cp '$(FDINITRD)' $(obj)/isoimage/initrd.img ; \ - fi - mkisofs -J -r -o $(obj)/image.iso -b isolinux.bin -c boot.cat \ - -no-emul-boot -boot-load-size 4 -boot-info-table \ - $(obj)/isoimage - isohybrid $(obj)/image.iso 2>/dev/null || true - rm -rf $(obj)/isoimage + $(call cmd,genimage,isoimage,$(obj)/image.iso) + @$(kecho) 'Kernel: $(obj)/image.iso is ready' bzlilo: $(obj)/bzImage if [ -f $(INSTALL_PATH)/vmlinuz ]; then mv $(INSTALL_PATH)/vmlinuz $(INSTALL_PATH)/vmlinuz.old; fi diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 4b7575b00563..1e9c322e973a 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -36,6 +36,7 @@ KBUILD_CFLAGS += -mno-mmx -mno-sse KBUILD_CFLAGS += $(call cc-option,-ffreestanding) KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector) KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) +KBUILD_CFLAGS += $(call cc-disable-warning, gnu) KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ GCOV_PROFILE := n @@ -78,6 +79,7 @@ vmlinux-objs-$(CONFIG_EARLY_PRINTK) += $(obj)/early_serial_console.o vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o ifdef CONFIG_X86_64 vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/pagetable.o + vmlinux-objs-y += $(obj)/mem_encrypt.o endif $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index beb255b66447..20919b4f3133 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -131,6 +131,19 @@ ENTRY(startup_32) /* * Build early 4G boot pagetable */ + /* + * If SEV is active then set the encryption mask in the page tables. + * This will insure that when the kernel is copied and decompressed + * it will be done so encrypted. + */ + call get_sev_encryption_bit + xorl %edx, %edx + testl %eax, %eax + jz 1f + subl $32, %eax /* Encryption bit is always above bit 31 */ + bts %eax, %edx /* Set encryption mask for page tables */ +1: + /* Initialize Page tables to 0 */ leal pgtable(%ebx), %edi xorl %eax, %eax @@ -141,12 +154,14 @@ ENTRY(startup_32) leal pgtable + 0(%ebx), %edi leal 0x1007 (%edi), %eax movl %eax, 0(%edi) + addl %edx, 4(%edi) /* Build Level 3 */ leal pgtable + 0x1000(%ebx), %edi leal 0x1007(%edi), %eax movl $4, %ecx 1: movl %eax, 0x00(%edi) + addl %edx, 0x04(%edi) addl $0x00001000, %eax addl $8, %edi decl %ecx @@ -157,6 +172,7 @@ ENTRY(startup_32) movl $0x00000183, %eax movl $2048, %ecx 1: movl %eax, 0(%edi) + addl %edx, 4(%edi) addl $0x00200000, %eax addl $8, %edi decl %ecx diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S new file mode 100644 index 000000000000..54f5f6625a73 --- /dev/null +++ b/arch/x86/boot/compressed/mem_encrypt.S @@ -0,0 +1,120 @@ +/* + * AMD Memory Encryption Support + * + * Copyright (C) 2017 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> + +#include <asm/processor-flags.h> +#include <asm/msr.h> +#include <asm/asm-offsets.h> + + .text + .code32 +ENTRY(get_sev_encryption_bit) + xor %eax, %eax + +#ifdef CONFIG_AMD_MEM_ENCRYPT + push %ebx + push %ecx + push %edx + push %edi + + /* + * RIP-relative addressing is needed to access the encryption bit + * variable. Since we are running in 32-bit mode we need this call/pop + * sequence to get the proper relative addressing. + */ + call 1f +1: popl %edi + subl $1b, %edi + + movl enc_bit(%edi), %eax + cmpl $0, %eax + jge .Lsev_exit + + /* Check if running under a hypervisor */ + movl $1, %eax + cpuid + bt $31, %ecx /* Check the hypervisor bit */ + jnc .Lno_sev + + movl $0x80000000, %eax /* CPUID to check the highest leaf */ + cpuid + cmpl $0x8000001f, %eax /* See if 0x8000001f is available */ + jb .Lno_sev + + /* + * Check for the SEV feature: + * CPUID Fn8000_001F[EAX] - Bit 1 + * CPUID Fn8000_001F[EBX] - Bits 5:0 + * Pagetable bit position used to indicate encryption + */ + movl $0x8000001f, %eax + cpuid + bt $1, %eax /* Check if SEV is available */ + jnc .Lno_sev + + movl $MSR_AMD64_SEV, %ecx /* Read the SEV MSR */ + rdmsr + bt $MSR_AMD64_SEV_ENABLED_BIT, %eax /* Check if SEV is active */ + jnc .Lno_sev + + movl %ebx, %eax + andl $0x3f, %eax /* Return the encryption bit location */ + movl %eax, enc_bit(%edi) + jmp .Lsev_exit + +.Lno_sev: + xor %eax, %eax + movl %eax, enc_bit(%edi) + +.Lsev_exit: + pop %edi + pop %edx + pop %ecx + pop %ebx + +#endif /* CONFIG_AMD_MEM_ENCRYPT */ + + ret +ENDPROC(get_sev_encryption_bit) + + .code64 +ENTRY(get_sev_encryption_mask) + xor %rax, %rax + +#ifdef CONFIG_AMD_MEM_ENCRYPT + push %rbp + push %rdx + + movq %rsp, %rbp /* Save current stack pointer */ + + call get_sev_encryption_bit /* Get the encryption bit position */ + testl %eax, %eax + jz .Lno_sev_mask + + xor %rdx, %rdx + bts %rax, %rdx /* Create the encryption mask */ + mov %rdx, %rax /* ... and return it */ + +.Lno_sev_mask: + movq %rbp, %rsp /* Restore original stack pointer */ + + pop %rdx + pop %rbp +#endif + + ret +ENDPROC(get_sev_encryption_mask) + + .data +enc_bit: + .int 0xffffffff diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 32d4ec2e0243..9d323dc6b159 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -109,4 +109,6 @@ static inline void console_init(void) { } #endif +unsigned long get_sev_encryption_mask(void); + #endif diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c index 972319ff5b01..d5364ca2e3f9 100644 --- a/arch/x86/boot/compressed/pagetable.c +++ b/arch/x86/boot/compressed/pagetable.c @@ -77,16 +77,18 @@ static unsigned long top_level_pgt; * Mapping information structure passed to kernel_ident_mapping_init(). * Due to relocation, pointers must be assigned at run time not build time. */ -static struct x86_mapping_info mapping_info = { - .page_flag = __PAGE_KERNEL_LARGE_EXEC, -}; +static struct x86_mapping_info mapping_info; /* Locates and clears a region for a new top level page table. */ void initialize_identity_maps(void) { + unsigned long sev_me_mask = get_sev_encryption_mask(); + /* Init mapping_info with run-time function/buffer pointers. */ mapping_info.alloc_pgt_page = alloc_pgt_page; mapping_info.context = &pgt_data; + mapping_info.page_flag = __PAGE_KERNEL_LARGE_EXEC | sev_me_mask; + mapping_info.kernpg_flag = _KERNPG_TABLE | sev_me_mask; /* * It should be impossible for this not to already be true, diff --git a/arch/x86/boot/genimage.sh b/arch/x86/boot/genimage.sh new file mode 100644 index 000000000000..49f4970f693b --- /dev/null +++ b/arch/x86/boot/genimage.sh @@ -0,0 +1,124 @@ +#!/bin/sh +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 2017 by Changbin Du <changbin.du@intel.com> +# +# Adapted from code in arch/x86/boot/Makefile by H. Peter Anvin and others +# +# "make fdimage/fdimage144/fdimage288/isoimage" script for x86 architecture +# +# Arguments: +# $1 - fdimage format +# $2 - target image file +# $3 - kernel bzImage file +# $4 - mtool configuration file +# $5 - kernel cmdline +# $6 - inird image file +# + +# Use "make V=1" to debug this script +case "${KBUILD_VERBOSE}" in +*1*) + set -x + ;; +esac + +verify () { + if [ ! -f "$1" ]; then + echo "" 1>&2 + echo " *** Missing file: $1" 1>&2 + echo "" 1>&2 + exit 1 + fi +} + + +export MTOOLSRC=$4 +FIMAGE=$2 +FBZIMAGE=$3 +KCMDLINE=$5 +FDINITRD=$6 + +# Make sure the files actually exist +verify "$FBZIMAGE" +verify "$MTOOLSRC" + +genbzdisk() { + mformat a: + syslinux $FIMAGE + echo "$KCMDLINE" | mcopy - a:syslinux.cfg + if [ -f "$FDINITRD" ] ; then + mcopy "$FDINITRD" a:initrd.img + fi + mcopy $FBZIMAGE a:linux +} + +genfdimage144() { + dd if=/dev/zero of=$FIMAGE bs=1024 count=1440 2> /dev/null + mformat v: + syslinux $FIMAGE + echo "$KCMDLINE" | mcopy - v:syslinux.cfg + if [ -f "$FDINITRD" ] ; then + mcopy "$FDINITRD" v:initrd.img + fi + mcopy $FBZIMAGE v:linux +} + +genfdimage288() { + dd if=/dev/zero of=$FIMAGE bs=1024 count=2880 2> /dev/null + mformat w: + syslinux $FIMAGE + echo "$KCMDLINE" | mcopy - W:syslinux.cfg + if [ -f "$FDINITRD" ] ; then + mcopy "$FDINITRD" w:initrd.img + fi + mcopy $FBZIMAGE w:linux +} + +genisoimage() { + tmp_dir=`dirname $FIMAGE`/isoimage + rm -rf $tmp_dir + mkdir $tmp_dir + for i in lib lib64 share end ; do + for j in syslinux ISOLINUX ; do + if [ -f /usr/$i/$j/isolinux.bin ] ; then + isolinux=/usr/$i/$j/isolinux.bin + cp $isolinux $tmp_dir + fi + done + for j in syslinux syslinux/modules/bios ; do + if [ -f /usr/$i/$j/ldlinux.c32 ]; then + ldlinux=/usr/$i/$j/ldlinux.c32 + cp $ldlinux $tmp_dir + fi + done + if [ -n "$isolinux" -a -n "$ldlinux" ] ; then + break + fi + if [ $i = end -a -z "$isolinux" ] ; then + echo 'Need an isolinux.bin file, please install syslinux/isolinux.' + exit 1 + fi + done + cp $FBZIMAGE $tmp_dir/linux + echo "$KCMDLINE" > $tmp_dir/isolinux.cfg + if [ -f "$FDINITRD" ] ; then + cp "$FDINITRD" $tmp_dir/initrd.img + fi + mkisofs -J -r -input-charset=utf-8 -quiet -o $FIMAGE -b isolinux.bin \ + -c boot.cat -no-emul-boot -boot-load-size 4 -boot-info-table \ + $tmp_dir + isohybrid $FIMAGE 2>/dev/null || true + rm -rf $tmp_dir +} + +case $1 in + bzdisk) genbzdisk;; + fdimage144) genfdimage144;; + fdimage288) genfdimage288;; + isoimage) genisoimage;; + *) echo 'Unknown image format'; exit 1; +esac diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 9c7ea597eee6..850b8762e889 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -17,7 +17,6 @@ */ #include <asm/segment.h> -#include <generated/utsrelease.h> #include <asm/boot.h> #include <asm/page_types.h> #include <asm/setup.h> diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c index 45bc9402aa49..a14c5178d4ba 100644 --- a/arch/x86/boot/video-vga.c +++ b/arch/x86/boot/video-vga.c @@ -241,9 +241,9 @@ static int vga_probe(void) vga_modes, }; static int mode_count[] = { - sizeof(cga_modes)/sizeof(struct mode_info), - sizeof(ega_modes)/sizeof(struct mode_info), - sizeof(vga_modes)/sizeof(struct mode_info), + ARRAY_SIZE(cga_modes), + ARRAY_SIZE(ega_modes), + ARRAY_SIZE(vga_modes), }; struct biosregs ireg, oreg; diff --git a/arch/x86/configs/tiny.config b/arch/x86/configs/tiny.config index 550cd5012b73..66c9e2aab16c 100644 --- a/arch/x86/configs/tiny.config +++ b/arch/x86/configs/tiny.config @@ -1,5 +1,5 @@ CONFIG_NOHIGHMEM=y # CONFIG_HIGHMEM4G is not set # CONFIG_HIGHMEM64G is not set -CONFIG_GUESS_UNWINDER=y -# CONFIG_FRAME_POINTER_UNWINDER is not set +CONFIG_UNWINDER_GUESS=y +# CONFIG_UNWINDER_FRAME_POINTER is not set diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 4a4b16e56d35..e32fc1f274d8 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -299,6 +299,7 @@ CONFIG_DEBUG_STACKOVERFLOW=y # CONFIG_DEBUG_RODATA_TEST is not set CONFIG_DEBUG_BOOT_PARAMS=y CONFIG_OPTIMIZE_INLINING=y +CONFIG_UNWINDER_ORC=y CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y CONFIG_SECURITY_SELINUX=y diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S index 93b945597ecf..7cfba738f104 100644 --- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S +++ b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S @@ -157,8 +157,8 @@ LABEL skip_ %I .endr # Find min length - vmovdqa _lens+0*16(state), %xmm0 - vmovdqa _lens+1*16(state), %xmm1 + vmovdqu _lens+0*16(state), %xmm0 + vmovdqu _lens+1*16(state), %xmm1 vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} @@ -178,8 +178,8 @@ LABEL skip_ %I vpsubd %xmm2, %xmm0, %xmm0 vpsubd %xmm2, %xmm1, %xmm1 - vmovdqa %xmm0, _lens+0*16(state) - vmovdqa %xmm1, _lens+1*16(state) + vmovdqu %xmm0, _lens+0*16(state) + vmovdqu %xmm1, _lens+1*16(state) # "state" and "args" are the same address, arg1 # len is arg2 @@ -235,8 +235,8 @@ ENTRY(sha1_mb_mgr_get_comp_job_avx2) jc .return_null # Find min length - vmovdqa _lens(state), %xmm0 - vmovdqa _lens+1*16(state), %xmm1 + vmovdqu _lens(state), %xmm0 + vmovdqu _lens+1*16(state), %xmm1 vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S index 8fe6338bcc84..16c4ccb1f154 100644 --- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S +++ b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S @@ -155,8 +155,8 @@ LABEL skip_ %I .endr # Find min length - vmovdqa _lens+0*16(state), %xmm0 - vmovdqa _lens+1*16(state), %xmm1 + vmovdqu _lens+0*16(state), %xmm0 + vmovdqu _lens+1*16(state), %xmm1 vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} @@ -176,8 +176,8 @@ LABEL skip_ %I vpsubd %xmm2, %xmm0, %xmm0 vpsubd %xmm2, %xmm1, %xmm1 - vmovdqa %xmm0, _lens+0*16(state) - vmovdqa %xmm1, _lens+1*16(state) + vmovdqu %xmm0, _lens+0*16(state) + vmovdqu %xmm1, _lens+1*16(state) # "state" and "args" are the same address, arg1 # len is arg2 @@ -234,8 +234,8 @@ ENTRY(sha256_mb_mgr_get_comp_job_avx2) jc .return_null # Find min length - vmovdqa _lens(state), %xmm0 - vmovdqa _lens+1*16(state), %xmm1 + vmovdqu _lens(state), %xmm0 + vmovdqu _lens+1*16(state), %xmm1 vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 6e160031cfea..3fd8bc560fae 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -142,56 +142,25 @@ For 32-bit we have the following conventions - kernel is built with UNWIND_HINT_REGS offset=\offset .endm - .macro RESTORE_EXTRA_REGS offset=0 - movq 0*8+\offset(%rsp), %r15 - movq 1*8+\offset(%rsp), %r14 - movq 2*8+\offset(%rsp), %r13 - movq 3*8+\offset(%rsp), %r12 - movq 4*8+\offset(%rsp), %rbp - movq 5*8+\offset(%rsp), %rbx - UNWIND_HINT_REGS offset=\offset extra=0 - .endm - - .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1 - .if \rstor_r11 - movq 6*8(%rsp), %r11 - .endif - .if \rstor_r8910 - movq 7*8(%rsp), %r10 - movq 8*8(%rsp), %r9 - movq 9*8(%rsp), %r8 - .endif - .if \rstor_rax - movq 10*8(%rsp), %rax - .endif - .if \rstor_rcx - movq 11*8(%rsp), %rcx - .endif - .if \rstor_rdx - movq 12*8(%rsp), %rdx - .endif - movq 13*8(%rsp), %rsi - movq 14*8(%rsp), %rdi - UNWIND_HINT_IRET_REGS offset=16*8 - .endm - .macro RESTORE_C_REGS - RESTORE_C_REGS_HELPER 1,1,1,1,1 - .endm - .macro RESTORE_C_REGS_EXCEPT_RAX - RESTORE_C_REGS_HELPER 0,1,1,1,1 - .endm - .macro RESTORE_C_REGS_EXCEPT_RCX - RESTORE_C_REGS_HELPER 1,0,1,1,1 - .endm - .macro RESTORE_C_REGS_EXCEPT_R11 - RESTORE_C_REGS_HELPER 1,1,0,1,1 - .endm - .macro RESTORE_C_REGS_EXCEPT_RCX_R11 - RESTORE_C_REGS_HELPER 1,0,0,1,1 - .endm - - .macro REMOVE_PT_GPREGS_FROM_STACK addskip=0 - subq $-(15*8+\addskip), %rsp + .macro POP_EXTRA_REGS + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + .endm + + .macro POP_C_REGS + popq %r11 + popq %r10 + popq %r9 + popq %r8 + popq %rax + popq %rcx + popq %rdx + popq %rsi + popq %rdi .endm .macro icebp diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 03505ffbe1b6..d7d3cc24baf4 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -75,7 +75,7 @@ static long syscall_trace_enter(struct pt_regs *regs) if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) BUG_ON(regs != task_pt_regs(current)); - work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY; + work = READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY; if (unlikely(work & _TIF_SYSCALL_EMU)) emulated = true; @@ -186,9 +186,7 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) addr_limit_user_check(); - if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled())) - local_irq_disable(); - + lockdep_assert_irqs_disabled(); lockdep_sys_exit(); cached_flags = READ_ONCE(ti->flags); diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index bcfc5668dcb2..a2b30ec69497 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -221,10 +221,9 @@ entry_SYSCALL_64_fastpath: TRACE_IRQS_ON /* user mode is traced as IRQs on */ movq RIP(%rsp), %rcx movq EFLAGS(%rsp), %r11 - RESTORE_C_REGS_EXCEPT_RCX_R11 - movq RSP(%rsp), %rsp + addq $6*8, %rsp /* skip extra regs -- they were preserved */ UNWIND_HINT_EMPTY - USERGS_SYSRET64 + jmp .Lpop_c_regs_except_rcx_r11_and_sysret 1: /* @@ -246,17 +245,18 @@ entry_SYSCALL64_slow_path: call do_syscall_64 /* returns with IRQs disabled */ return_from_SYSCALL_64: - RESTORE_EXTRA_REGS TRACE_IRQS_IRETQ /* we're about to change IF */ /* * Try to use SYSRET instead of IRET if we're returning to - * a completely clean 64-bit userspace context. + * a completely clean 64-bit userspace context. If we're not, + * go to the slow exit path. */ movq RCX(%rsp), %rcx movq RIP(%rsp), %r11 - cmpq %rcx, %r11 /* RCX == RIP */ - jne opportunistic_sysret_failed + + cmpq %rcx, %r11 /* SYSRET requires RCX == RIP */ + jne swapgs_restore_regs_and_return_to_usermode /* * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP @@ -274,14 +274,14 @@ return_from_SYSCALL_64: /* If this changed %rcx, it was not canonical */ cmpq %rcx, %r11 - jne opportunistic_sysret_failed + jne swapgs_restore_regs_and_return_to_usermode cmpq $__USER_CS, CS(%rsp) /* CS must match SYSRET */ - jne opportunistic_sysret_failed + jne swapgs_restore_regs_and_return_to_usermode movq R11(%rsp), %r11 cmpq %r11, EFLAGS(%rsp) /* R11 == RFLAGS */ - jne opportunistic_sysret_failed + jne swapgs_restore_regs_and_return_to_usermode /* * SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot @@ -302,12 +302,12 @@ return_from_SYSCALL_64: * would never get past 'stuck_here'. */ testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11 - jnz opportunistic_sysret_failed + jnz swapgs_restore_regs_and_return_to_usermode /* nothing to check for RSP */ cmpq $__USER_DS, SS(%rsp) /* SS must match SYSRET */ - jne opportunistic_sysret_failed + jne swapgs_restore_regs_and_return_to_usermode /* * We win! This label is here just for ease of understanding @@ -315,14 +315,20 @@ return_from_SYSCALL_64: */ syscall_return_via_sysret: /* rcx and r11 are already restored (see code above) */ - RESTORE_C_REGS_EXCEPT_RCX_R11 - movq RSP(%rsp), %rsp UNWIND_HINT_EMPTY + POP_EXTRA_REGS +.Lpop_c_regs_except_rcx_r11_and_sysret: + popq %rsi /* skip r11 */ + popq %r10 + popq %r9 + popq %r8 + popq %rax + popq %rsi /* skip rcx */ + popq %rdx + popq %rsi + popq %rdi + movq RSP-ORIG_RAX(%rsp), %rsp USERGS_SYSRET64 - -opportunistic_sysret_failed: - SWAPGS - jmp restore_c_regs_and_iret END(entry_SYSCALL_64) ENTRY(stub_ptregs_64) @@ -423,8 +429,7 @@ ENTRY(ret_from_fork) movq %rsp, %rdi call syscall_return_slowpath /* returns with IRQs disabled */ TRACE_IRQS_ON /* user mode is traced as IRQS on */ - SWAPGS - jmp restore_regs_and_iret + jmp swapgs_restore_regs_and_return_to_usermode 1: /* kernel thread */ @@ -612,8 +617,21 @@ GLOBAL(retint_user) mov %rsp,%rdi call prepare_exit_to_usermode TRACE_IRQS_IRETQ + +GLOBAL(swapgs_restore_regs_and_return_to_usermode) +#ifdef CONFIG_DEBUG_ENTRY + /* Assert that pt_regs indicates user mode. */ + testb $3, CS(%rsp) + jnz 1f + ud2 +1: +#endif SWAPGS - jmp restore_regs_and_iret + POP_EXTRA_REGS + POP_C_REGS + addq $8, %rsp /* skip regs->orig_ax */ + INTERRUPT_RETURN + /* Returning to kernel space */ retint_kernel: @@ -633,15 +651,17 @@ retint_kernel: */ TRACE_IRQS_IRETQ -/* - * At this label, code paths which return to kernel and to user, - * which come from interrupts/exception and from syscalls, merge. - */ -GLOBAL(restore_regs_and_iret) - RESTORE_EXTRA_REGS -restore_c_regs_and_iret: - RESTORE_C_REGS - REMOVE_PT_GPREGS_FROM_STACK 8 +GLOBAL(restore_regs_and_return_to_kernel) +#ifdef CONFIG_DEBUG_ENTRY + /* Assert that pt_regs indicates kernel mode. */ + testb $3, CS(%rsp) + jz 1f + ud2 +1: +#endif + POP_EXTRA_REGS + POP_C_REGS + addq $8, %rsp /* skip regs->orig_ax */ INTERRUPT_RETURN ENTRY(native_iret) @@ -818,7 +838,7 @@ ENTRY(\sym) ASM_CLAC - .ifeq \has_error_code + .if \has_error_code == 0 pushq $-1 /* ORIG_RAX: no syscall to restart */ .endif @@ -1059,6 +1079,7 @@ idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK idtentry stack_segment do_stack_segment has_error_code=1 #ifdef CONFIG_XEN +idtentry xennmi do_nmi has_error_code=0 idtentry xendebug do_debug has_error_code=0 idtentry xenint3 do_int3 has_error_code=0 #endif @@ -1112,17 +1133,14 @@ ENTRY(paranoid_exit) DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF_DEBUG testl %ebx, %ebx /* swapgs needed? */ - jnz paranoid_exit_no_swapgs + jnz .Lparanoid_exit_no_swapgs TRACE_IRQS_IRETQ SWAPGS_UNSAFE_STACK - jmp paranoid_exit_restore -paranoid_exit_no_swapgs: + jmp .Lparanoid_exit_restore +.Lparanoid_exit_no_swapgs: TRACE_IRQS_IRETQ_DEBUG -paranoid_exit_restore: - RESTORE_EXTRA_REGS - RESTORE_C_REGS - REMOVE_PT_GPREGS_FROM_STACK 8 - INTERRUPT_RETURN +.Lparanoid_exit_restore: + jmp restore_regs_and_return_to_kernel END(paranoid_exit) /* @@ -1223,10 +1241,13 @@ ENTRY(error_exit) jmp retint_user END(error_exit) -/* Runs on exception stack */ -/* XXX: broken on Xen PV */ +/* + * Runs on exception stack. Xen PV does not go through this path at all, + * so we can use real assembly here. + */ ENTRY(nmi) UNWIND_HINT_IRET_REGS + /* * We allow breakpoints in NMIs. If a breakpoint occurs, then * the iretq it performs will take us out of NMI context. @@ -1284,7 +1305,7 @@ ENTRY(nmi) * stacks lest we corrupt the "NMI executing" variable. */ - SWAPGS_UNSAFE_STACK + swapgs cld movq %rsp, %rdx movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp @@ -1328,8 +1349,7 @@ ENTRY(nmi) * Return back to user mode. We must *not* do the normal exit * work, because we don't want to enable interrupts. */ - SWAPGS - jmp restore_regs_and_iret + jmp swapgs_restore_regs_and_return_to_usermode .Lnmi_from_kernel: /* @@ -1450,7 +1470,7 @@ nested_nmi_out: popq %rdx /* We are returning to kernel mode, so this cannot result in a fault. */ - INTERRUPT_RETURN + iretq first_nmi: /* Restore rdx. */ @@ -1481,7 +1501,7 @@ first_nmi: pushfq /* RFLAGS */ pushq $__KERNEL_CS /* CS */ pushq $1f /* RIP */ - INTERRUPT_RETURN /* continues at repeat_nmi below */ + iretq /* continues at repeat_nmi below */ UNWIND_HINT_IRET_REGS 1: #endif @@ -1544,29 +1564,34 @@ end_repeat_nmi: nmi_swapgs: SWAPGS_UNSAFE_STACK nmi_restore: - RESTORE_EXTRA_REGS - RESTORE_C_REGS + POP_EXTRA_REGS + POP_C_REGS - /* Point RSP at the "iret" frame. */ - REMOVE_PT_GPREGS_FROM_STACK 6*8 + /* + * Skip orig_ax and the "outermost" frame to point RSP at the "iret" + * at the "iret" frame. + */ + addq $6*8, %rsp /* * Clear "NMI executing". Set DF first so that we can easily * distinguish the remaining code between here and IRET from - * the SYSCALL entry and exit paths. On a native kernel, we - * could just inspect RIP, but, on paravirt kernels, - * INTERRUPT_RETURN can translate into a jump into a - * hypercall page. + * the SYSCALL entry and exit paths. + * + * We arguably should just inspect RIP instead, but I (Andy) wrote + * this code when I had the misapprehension that Xen PV supported + * NMIs, and Xen PV would break that approach. */ std movq $0, 5*8(%rsp) /* clear "NMI executing" */ /* - * INTERRUPT_RETURN reads the "iret" frame and exits the NMI - * stack in a single instruction. We are returning to kernel - * mode, so this cannot result in a fault. + * iretq reads the "iret" frame and exits the NMI stack in a + * single instruction. We are returning to kernel mode, so this + * cannot result in a fault. Similarly, we don't need to worry + * about espfix64 on the way back to kernel mode. */ - INTERRUPT_RETURN + iretq END(nmi) ENTRY(ignore_sysret) diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index b5c7a56ed256..568e130d932c 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -337,8 +337,7 @@ ENTRY(entry_INT80_compat) /* Go back to user mode. */ TRACE_IRQS_ON - SWAPGS - jmp restore_regs_and_iret + jmp swapgs_restore_regs_and_return_to_usermode END(entry_INT80_compat) ENTRY(stub32_clone) diff --git a/arch/x86/entry/syscalls/Makefile b/arch/x86/entry/syscalls/Makefile index 331f1dca5085..6fb9b57ed5ba 100644 --- a/arch/x86/entry/syscalls/Makefile +++ b/arch/x86/entry/syscalls/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -out := $(obj)/../../include/generated/asm -uapi := $(obj)/../../include/generated/uapi/asm +out := arch/$(SRCARCH)/include/generated/asm +uapi := arch/$(SRCARCH)/include/generated/uapi/asm # Create output directory if not already present _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') \ diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index fa8dbfcf7ed3..11b13c4b43d5 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -318,7 +318,7 @@ int gettimeofday(struct timeval *, struct timezone *) notrace time_t __vdso_time(time_t *t) { /* This is atomic on x86 so we don't need any locks. */ - time_t result = ACCESS_ONCE(gtod->wall_time_sec); + time_t result = READ_ONCE(gtod->wall_time_sec); if (t) *t = result; diff --git a/arch/x86/entry/vdso/vdso2c.c b/arch/x86/entry/vdso/vdso2c.c index 0780a443a53b..4674f58581a1 100644 --- a/arch/x86/entry/vdso/vdso2c.c +++ b/arch/x86/entry/vdso/vdso2c.c @@ -65,6 +65,7 @@ #include <linux/elf.h> #include <linux/types.h> +#include <linux/kernel.h> const char *outfilename; @@ -151,7 +152,7 @@ extern void bad_put_le(void); PLE(x, val, 64, PLE(x, val, 32, PLE(x, val, 16, LAST_PLE(x, val)))) -#define NSYMS (sizeof(required_syms) / sizeof(required_syms[0])) +#define NSYMS ARRAY_SIZE(required_syms) #define BITSFUNC3(name, bits, suffix) name##bits##suffix #define BITSFUNC2(name, bits, suffix) BITSFUNC3(name, bits, suffix) diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 1911310959f8..d63053142b16 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -114,10 +114,11 @@ static int vvar_fault(const struct vm_special_mapping *sm, struct pvclock_vsyscall_time_info *pvti = pvclock_pvti_cpu0_va(); if (pvti && vclock_was_used(VCLOCK_PVCLOCK)) { - ret = vm_insert_pfn( + ret = vm_insert_pfn_prot( vma, vmf->address, - __pa(pvti) >> PAGE_SHIFT); + __pa(pvti) >> PAGE_SHIFT, + pgprot_decrypted(vma->vm_page_prot)); } } else if (sym_offset == image->sym_hvclock_page) { struct ms_hyperv_tsc_page *tsc_pg = hv_get_tsc_page(); diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c index 3641e24fdac5..38b5d41b0c37 100644 --- a/arch/x86/events/amd/iommu.c +++ b/arch/x86/events/amd/iommu.c @@ -405,7 +405,7 @@ const struct attribute_group *amd_iommu_attr_groups[] = { NULL, }; -static struct pmu iommu_pmu = { +static const struct pmu iommu_pmu __initconst = { .event_init = perf_iommu_event_init, .add = perf_iommu_add, .del = perf_iommu_del, diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 80534d3c2480..140d33288e78 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2118,7 +2118,7 @@ static int x86_pmu_event_init(struct perf_event *event) event->destroy(event); } - if (ACCESS_ONCE(x86_pmu.attr_rdpmc)) + if (READ_ONCE(x86_pmu.attr_rdpmc)) event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED; return err; @@ -2371,7 +2371,7 @@ static unsigned long get_segment_base(unsigned int segment) struct ldt_struct *ldt; /* IRQs are off, so this synchronizes with smp_store_release */ - ldt = lockless_dereference(current->active_mm->context.ldt); + ldt = READ_ONCE(current->active_mm->context.ldt); if (!ldt || idx >= ldt->nr_entries) return 0; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 9fb9a1f1e47b..43445da30cea 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2958,6 +2958,10 @@ static unsigned long intel_pmu_free_running_flags(struct perf_event *event) if (event->attr.use_clockid) flags &= ~PERF_SAMPLE_TIME; + if (!event->attr.exclude_kernel) + flags &= ~PERF_SAMPLE_REGS_USER; + if (event->attr.sample_regs_user & ~PEBS_REGS) + flags &= ~(PERF_SAMPLE_REGS_USER | PERF_SAMPLE_REGS_INTR); return flags; } diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 4196f81ec0e1..f7aaadf9331f 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -85,13 +85,15 @@ struct amd_nb { * Flags PEBS can handle without an PMI. * * TID can only be handled by flushing at context switch. + * REGS_USER can be handled for events limited to ring 3. * */ #define PEBS_FREERUNNING_FLAGS \ (PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \ PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \ PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \ - PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR) + PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \ + PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER) /* * A debug store configuration. @@ -110,6 +112,26 @@ struct debug_store { u64 pebs_event_reset[MAX_PEBS_EVENTS]; }; +#define PEBS_REGS \ + (PERF_REG_X86_AX | \ + PERF_REG_X86_BX | \ + PERF_REG_X86_CX | \ + PERF_REG_X86_DX | \ + PERF_REG_X86_DI | \ + PERF_REG_X86_SI | \ + PERF_REG_X86_SP | \ + PERF_REG_X86_BP | \ + PERF_REG_X86_IP | \ + PERF_REG_X86_FLAGS | \ + PERF_REG_X86_R8 | \ + PERF_REG_X86_R9 | \ + PERF_REG_X86_R10 | \ + PERF_REG_X86_R11 | \ + PERF_REG_X86_R12 | \ + PERF_REG_X86_R13 | \ + PERF_REG_X86_R14 | \ + PERF_REG_X86_R15) + /* * Per register state. */ diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index a5db63f728a2..a0b86cf486e0 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -113,7 +113,7 @@ void hyperv_init(void) u64 guest_id; union hv_x64_msr_hypercall_contents hypercall_msr; - if (x86_hyper != &x86_hyper_ms_hyperv) + if (x86_hyper_type != X86_HYPER_MS_HYPERV) return; /* Allocate percpu VP index */ diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 5f01671c68f2..a9e57f08bfa6 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -53,6 +53,15 @@ extern int local_apic_timer_c2_ok; extern int disable_apic; extern unsigned int lapic_timer_frequency; +extern enum apic_intr_mode_id apic_intr_mode; +enum apic_intr_mode_id { + APIC_PIC, + APIC_VIRTUAL_WIRE, + APIC_VIRTUAL_WIRE_NO_CONFIG, + APIC_SYMMETRIC_IO, + APIC_SYMMETRIC_IO_NO_ROUTING +}; + #ifdef CONFIG_SMP extern void __inquire_remote_apic(int apicid); #else /* CONFIG_SMP */ @@ -127,14 +136,13 @@ extern void disconnect_bsp_APIC(int virt_wire_setup); extern void disable_local_APIC(void); extern void lapic_shutdown(void); extern void sync_Arb_IDs(void); -extern void init_bsp_APIC(void); +extern void apic_intr_mode_init(void); extern void setup_local_APIC(void); extern void init_apic_mappings(void); void register_lapic_address(unsigned long address); extern void setup_boot_APIC_clock(void); extern void setup_secondary_APIC_clock(void); extern void lapic_update_tsc_freq(void); -extern int APIC_init_uniprocessor(void); #ifdef CONFIG_X86_64 static inline int apic_force_enable(unsigned long addr) @@ -145,7 +153,7 @@ static inline int apic_force_enable(unsigned long addr) extern int apic_force_enable(unsigned long addr); #endif -extern int apic_bsp_setup(bool upmode); +extern void apic_bsp_setup(bool upmode); extern void apic_ap_setup(void); /* @@ -161,6 +169,10 @@ static inline int apic_is_clustered_box(void) #endif extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask); +extern void lapic_assign_system_vectors(void); +extern void lapic_assign_legacy_vector(unsigned int isairq, bool replace); +extern void lapic_online(void); +extern void lapic_offline(void); #else /* !CONFIG_X86_LOCAL_APIC */ static inline void lapic_shutdown(void) { } @@ -170,6 +182,9 @@ static inline void disable_local_APIC(void) { } # define setup_boot_APIC_clock x86_init_noop # define setup_secondary_APIC_clock x86_init_noop static inline void lapic_update_tsc_freq(void) { } +static inline void apic_intr_mode_init(void) { } +static inline void lapic_assign_system_vectors(void) { } +static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { } #endif /* !CONFIG_X86_LOCAL_APIC */ #ifdef CONFIG_X86_X2APIC @@ -265,73 +280,63 @@ struct irq_data; * James Cleverdon. */ struct apic { - char *name; - - int (*probe)(void); - int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); - int (*apic_id_valid)(int apicid); - int (*apic_id_registered)(void); - - u32 irq_delivery_mode; - u32 irq_dest_mode; - - const struct cpumask *(*target_cpus)(void); - - int disable_esr; - - int dest_logical; - unsigned long (*check_apicid_used)(physid_mask_t *map, int apicid); - - void (*vector_allocation_domain)(int cpu, struct cpumask *retmask, - const struct cpumask *mask); - void (*init_apic_ldr)(void); - - void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap); - - void (*setup_apic_routing)(void); - int (*cpu_present_to_apicid)(int mps_cpu); - void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap); - int (*check_phys_apicid_present)(int phys_apicid); - int (*phys_pkg_id)(int cpuid_apic, int index_msb); - - unsigned int (*get_apic_id)(unsigned long x); - /* Can't be NULL on 64-bit */ - unsigned long (*set_apic_id)(unsigned int id); - - int (*cpu_mask_to_apicid)(const struct cpumask *cpumask, - struct irq_data *irqdata, - unsigned int *apicid); - - /* ipi */ - void (*send_IPI)(int cpu, int vector); - void (*send_IPI_mask)(const struct cpumask *mask, int vector); - void (*send_IPI_mask_allbutself)(const struct cpumask *mask, - int vector); - void (*send_IPI_allbutself)(int vector); - void (*send_IPI_all)(int vector); - void (*send_IPI_self)(int vector); + /* Hotpath functions first */ + void (*eoi_write)(u32 reg, u32 v); + void (*native_eoi_write)(u32 reg, u32 v); + void (*write)(u32 reg, u32 v); + u32 (*read)(u32 reg); + + /* IPI related functions */ + void (*wait_icr_idle)(void); + u32 (*safe_wait_icr_idle)(void); + + void (*send_IPI)(int cpu, int vector); + void (*send_IPI_mask)(const struct cpumask *mask, int vector); + void (*send_IPI_mask_allbutself)(const struct cpumask *msk, int vec); + void (*send_IPI_allbutself)(int vector); + void (*send_IPI_all)(int vector); + void (*send_IPI_self)(int vector); + + /* dest_logical is used by the IPI functions */ + u32 dest_logical; + u32 disable_esr; + u32 irq_delivery_mode; + u32 irq_dest_mode; + + /* Functions and data related to vector allocation */ + void (*vector_allocation_domain)(int cpu, struct cpumask *retmask, + const struct cpumask *mask); + int (*cpu_mask_to_apicid)(const struct cpumask *cpumask, + struct irq_data *irqdata, + unsigned int *apicid); + u32 (*calc_dest_apicid)(unsigned int cpu); + + /* ICR related functions */ + u64 (*icr_read)(void); + void (*icr_write)(u32 low, u32 high); + + /* Probe, setup and smpboot functions */ + int (*probe)(void); + int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); + int (*apic_id_valid)(int apicid); + int (*apic_id_registered)(void); + + bool (*check_apicid_used)(physid_mask_t *map, int apicid); + void (*init_apic_ldr)(void); + void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap); + void (*setup_apic_routing)(void); + int (*cpu_present_to_apicid)(int mps_cpu); + void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap); + int (*check_phys_apicid_present)(int phys_apicid); + int (*phys_pkg_id)(int cpuid_apic, int index_msb); + + u32 (*get_apic_id)(unsigned long x); + u32 (*set_apic_id)(unsigned int id); /* wakeup_secondary_cpu */ - int (*wakeup_secondary_cpu)(int apicid, unsigned long start_eip); + int (*wakeup_secondary_cpu)(int apicid, unsigned long start_eip); - void (*inquire_remote_apic)(int apicid); - - /* apic ops */ - u32 (*read)(u32 reg); - void (*write)(u32 reg, u32 v); - /* - * ->eoi_write() has the same signature as ->write(). - * - * Drivers can support both ->eoi_write() and ->write() by passing the same - * callback value. Kernel can override ->eoi_write() and fall back - * on write for EOI. - */ - void (*eoi_write)(u32 reg, u32 v); - void (*native_eoi_write)(u32 reg, u32 v); - u64 (*icr_read)(void); - void (*icr_write)(u32 low, u32 high); - void (*wait_icr_idle)(void); - u32 (*safe_wait_icr_idle)(void); + void (*inquire_remote_apic)(int apicid); #ifdef CONFIG_X86_32 /* @@ -346,6 +351,7 @@ struct apic { */ int (*x86_32_early_logical_apicid)(int cpu); #endif + char *name; }; /* @@ -380,6 +386,7 @@ extern struct apic *__apicdrivers[], *__apicdrivers_end[]; */ #ifdef CONFIG_SMP extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip); +extern int lapic_can_unplug_cpu(void); #endif #ifdef CONFIG_X86_LOCAL_APIC @@ -463,84 +470,33 @@ static inline unsigned default_get_apic_id(unsigned long x) extern void apic_send_IPI_self(int vector); DECLARE_PER_CPU(int, x2apic_extra_bits); - -extern int default_cpu_present_to_apicid(int mps_cpu); -extern int default_check_phys_apicid_present(int phys_apicid); #endif extern void generic_bigsmp_probe(void); - #ifdef CONFIG_X86_LOCAL_APIC #include <asm/smp.h> #define APIC_DFR_VALUE (APIC_DFR_FLAT) -static inline const struct cpumask *default_target_cpus(void) -{ -#ifdef CONFIG_SMP - return cpu_online_mask; -#else - return cpumask_of(0); -#endif -} - -static inline const struct cpumask *online_target_cpus(void) -{ - return cpu_online_mask; -} - DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid); +extern struct apic apic_noop; static inline unsigned int read_apic_id(void) { - unsigned int reg; - - reg = apic_read(APIC_ID); + unsigned int reg = apic_read(APIC_ID); return apic->get_apic_id(reg); } -static inline int default_apic_id_valid(int apicid) -{ - return (apicid < 255); -} - +extern int default_apic_id_valid(int apicid); extern int default_acpi_madt_oem_check(char *, char *); - extern void default_setup_apic_routing(void); -extern struct apic apic_noop; - -#ifdef CONFIG_X86_32 - -static inline int noop_x86_32_early_logical_apicid(int cpu) -{ - return BAD_APICID; -} - -/* - * Set up the logical destination ID. - * - * Intel recommends to set DFR, LDR and TPR before enabling - * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel - * document number 292116). So here it goes... - */ -extern void default_init_apic_ldr(void); - -static inline int default_apic_id_registered(void) -{ - return physid_isset(read_apic_id(), phys_cpu_present_map); -} - -static inline int default_phys_pkg_id(int cpuid_apic, int index_msb) -{ - return cpuid_apic >> index_msb; -} - -#endif +extern u32 apic_default_calc_apicid(unsigned int cpu); +extern u32 apic_flat_calc_apicid(unsigned int cpu); extern int flat_cpu_mask_to_apicid(const struct cpumask *cpumask, struct irq_data *irqdata, @@ -548,71 +504,17 @@ extern int flat_cpu_mask_to_apicid(const struct cpumask *cpumask, extern int default_cpu_mask_to_apicid(const struct cpumask *cpumask, struct irq_data *irqdata, unsigned int *apicid); - -static inline void -flat_vector_allocation_domain(int cpu, struct cpumask *retmask, - const struct cpumask *mask) -{ - /* Careful. Some cpus do not strictly honor the set of cpus - * specified in the interrupt destination when using lowest - * priority interrupt delivery mode. - * - * In particular there was a hyperthreading cpu observed to - * deliver interrupts to the wrong hyperthread when only one - * hyperthread was specified in the interrupt desitination. - */ - cpumask_clear(retmask); - cpumask_bits(retmask)[0] = APIC_ALL_CPUS; -} - -static inline void -default_vector_allocation_domain(int cpu, struct cpumask *retmask, - const struct cpumask *mask) -{ - cpumask_copy(retmask, cpumask_of(cpu)); -} - -static inline unsigned long default_check_apicid_used(physid_mask_t *map, int apicid) -{ - return physid_isset(apicid, *map); -} - -static inline void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) -{ - *retmap = *phys_map; -} - -static inline int __default_cpu_present_to_apicid(int mps_cpu) -{ - if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu)) - return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); - else - return BAD_APICID; -} - -static inline int -__default_check_phys_apicid_present(int phys_apicid) -{ - return physid_isset(phys_apicid, phys_cpu_present_map); -} - -#ifdef CONFIG_X86_32 -static inline int default_cpu_present_to_apicid(int mps_cpu) -{ - return __default_cpu_present_to_apicid(mps_cpu); -} - -static inline int -default_check_phys_apicid_present(int phys_apicid) -{ - return __default_check_phys_apicid_present(phys_apicid); -} -#else +extern bool default_check_apicid_used(physid_mask_t *map, int apicid); +extern void flat_vector_allocation_domain(int cpu, struct cpumask *retmask, + const struct cpumask *mask); +extern void default_vector_allocation_domain(int cpu, struct cpumask *retmask, + const struct cpumask *mask); +extern void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap); extern int default_cpu_present_to_apicid(int mps_cpu); extern int default_check_phys_apicid_present(int phys_apicid); -#endif #endif /* CONFIG_X86_LOCAL_APIC */ + extern void irq_enter(void); extern void irq_exit(void); diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h index 5b0579abb398..3ac991d81e74 100644 --- a/arch/x86/include/asm/archrandom.h +++ b/arch/x86/include/asm/archrandom.h @@ -45,7 +45,7 @@ static inline bool rdrand_long(unsigned long *v) bool ok; unsigned int retry = RDRAND_RETRY_LOOPS; do { - asm volatile(RDRAND_LONG "\n\t" + asm volatile(RDRAND_LONG CC_SET(c) : CC_OUT(c) (ok), "=a" (*v)); if (ok) @@ -59,7 +59,7 @@ static inline bool rdrand_int(unsigned int *v) bool ok; unsigned int retry = RDRAND_RETRY_LOOPS; do { - asm volatile(RDRAND_INT "\n\t" + asm volatile(RDRAND_INT CC_SET(c) : CC_OUT(c) (ok), "=a" (*v)); if (ok) @@ -71,7 +71,7 @@ static inline bool rdrand_int(unsigned int *v) static inline bool rdseed_long(unsigned long *v) { bool ok; - asm volatile(RDSEED_LONG "\n\t" + asm volatile(RDSEED_LONG CC_SET(c) : CC_OUT(c) (ok), "=a" (*v)); return ok; @@ -80,7 +80,7 @@ static inline bool rdseed_long(unsigned long *v) static inline bool rdseed_int(unsigned int *v) { bool ok; - asm volatile(RDSEED_INT "\n\t" + asm volatile(RDSEED_INT CC_SET(c) : CC_OUT(c) (ok), "=a" (*v)); return ok; diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 01727dbc294a..7fb336210e1b 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -12,11 +12,11 @@ */ #ifdef CONFIG_X86_32 -#define mb() asm volatile(ALTERNATIVE("lock; addl $0,0(%%esp)", "mfence", \ +#define mb() asm volatile(ALTERNATIVE("lock; addl $0,-4(%%esp)", "mfence", \ X86_FEATURE_XMM2) ::: "memory", "cc") -#define rmb() asm volatile(ALTERNATIVE("lock; addl $0,0(%%esp)", "lfence", \ +#define rmb() asm volatile(ALTERNATIVE("lock; addl $0,-4(%%esp)", "lfence", \ X86_FEATURE_XMM2) ::: "memory", "cc") -#define wmb() asm volatile(ALTERNATIVE("lock; addl $0,0(%%esp)", "sfence", \ +#define wmb() asm volatile(ALTERNATIVE("lock; addl $0,-4(%%esp)", "sfence", \ X86_FEATURE_XMM2) ::: "memory", "cc") #else #define mb() asm volatile("mfence":::"memory") @@ -31,7 +31,11 @@ #endif #define dma_wmb() barrier() -#define __smp_mb() mb() +#ifdef CONFIG_X86_32 +#define __smp_mb() asm volatile("lock; addl $0,-4(%%esp)" ::: "memory", "cc") +#else +#define __smp_mb() asm volatile("lock; addl $0,-4(%%rsp)" ::: "memory", "cc") +#endif #define __smp_rmb() dma_rmb() #define __smp_wmb() barrier() #define __smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0) diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 2bcf47314959..3fa039855b8f 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -143,7 +143,7 @@ static __always_inline void __clear_bit(long nr, volatile unsigned long *addr) static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr) { bool negative; - asm volatile(LOCK_PREFIX "andb %2,%1\n\t" + asm volatile(LOCK_PREFIX "andb %2,%1" CC_SET(s) : CC_OUT(s) (negative), ADDR : "ir" ((char) ~(1 << nr)) : "memory"); @@ -246,7 +246,7 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long * { bool oldbit; - asm("bts %2,%1\n\t" + asm("bts %2,%1" CC_SET(c) : CC_OUT(c) (oldbit), ADDR : "Ir" (nr)); @@ -286,7 +286,7 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long { bool oldbit; - asm volatile("btr %2,%1\n\t" + asm volatile("btr %2,%1" CC_SET(c) : CC_OUT(c) (oldbit), ADDR : "Ir" (nr)); @@ -298,7 +298,7 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon { bool oldbit; - asm volatile("btc %2,%1\n\t" + asm volatile("btc %2,%1" CC_SET(c) : CC_OUT(c) (oldbit), ADDR : "Ir" (nr) : "memory"); @@ -329,7 +329,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l { bool oldbit; - asm volatile("bt %2,%1\n\t" + asm volatile("bt %2,%1" CC_SET(c) : CC_OUT(c) (oldbit) : "m" (*(unsigned long *)addr), "Ir" (nr)); diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index 9eef9cc64c68..a600a6cda9ec 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -7,6 +7,7 @@ */ #include <linux/types.h> #include <linux/sched.h> +#include <linux/sched/task_stack.h> #include <asm/processor.h> #include <asm/user32.h> #include <asm/unistd.h> diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 0dfa68438e80..bf6a76202a77 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -126,11 +126,10 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; #define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit) #define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability)) -#define clear_cpu_cap(c, bit) clear_bit(bit, (unsigned long *)((c)->x86_capability)) -#define setup_clear_cpu_cap(bit) do { \ - clear_cpu_cap(&boot_cpu_data, bit); \ - set_bit(bit, (unsigned long *)cpu_caps_cleared); \ -} while (0) + +extern void setup_clear_cpu_cap(unsigned int bit); +extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); + #define setup_force_cpu_cap(bit) do { \ set_cpu_cap(&boot_cpu_data, bit); \ set_bit(bit, (unsigned long *)cpu_caps_set); \ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 793690fbda36..c0b0e9e8aa66 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -13,173 +13,176 @@ /* * Defines x86 CPU feature bits */ -#define NCAPINTS 18 /* N 32-bit words worth of info */ -#define NBUGINTS 1 /* N 32-bit bug flags */ +#define NCAPINTS 18 /* N 32-bit words worth of info */ +#define NBUGINTS 1 /* N 32-bit bug flags */ /* * Note: If the comment begins with a quoted string, that string is used * in /proc/cpuinfo instead of the macro name. If the string is "", * this feature bit is not displayed in /proc/cpuinfo at all. + * + * When adding new features here that depend on other features, + * please update the table in kernel/cpu/cpuid-deps.c as well. */ -/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */ -#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */ -#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */ -#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */ -#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */ -#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */ -#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */ -#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */ -#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */ -#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */ -#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */ -#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */ -#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */ -#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */ -#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */ -#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions */ - /* (plus FCMOVcc, FCOMI with FPU) */ -#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */ -#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */ -#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */ -#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */ -#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */ -#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */ -#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */ -#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */ -#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */ -#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */ -#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */ -#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */ -#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */ -#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */ -#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */ +/* Intel-defined CPU features, CPUID level 0x00000001 (EDX), word 0 */ +#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */ +#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */ +#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */ +#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */ +#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */ +#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */ +#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */ +#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */ +#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */ +#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */ +#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */ +#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */ +#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */ +#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */ +#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions (plus FCMOVcc, FCOMI with FPU) */ +#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */ +#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */ +#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */ +#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */ +#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */ +#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */ +#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */ +#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */ +#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */ +#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */ +#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */ +#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */ +#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */ +#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */ +#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */ /* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ /* Don't duplicate feature flags which are redundant with Intel! */ -#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */ -#define X86_FEATURE_MP ( 1*32+19) /* MP Capable. */ -#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */ -#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */ -#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */ -#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */ -#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */ -#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64) */ -#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow! extensions */ -#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow! */ +#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */ +#define X86_FEATURE_MP ( 1*32+19) /* MP Capable */ +#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */ +#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */ +#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */ +#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */ +#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */ +#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64, 64-bit support) */ +#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow extensions */ +#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow */ /* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */ -#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */ -#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */ -#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */ +#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */ +#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */ +#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */ /* Other features, Linux-defined mapping, word 3 */ /* This range is used for feature bits which conflict or are synthesized */ -#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */ -#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */ -#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */ -#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */ -/* cpu types for specific tunings: */ -#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ -#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */ -#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ -#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ -#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ -#define X86_FEATURE_UP ( 3*32+ 9) /* smp kernel running on up */ -#define X86_FEATURE_ART ( 3*32+10) /* Platform has always running timer (ART) */ -#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */ -#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */ -#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */ -#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */ -#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in ia32 userspace */ -#define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */ -#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */ -#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */ -#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ -#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ -#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ -#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */ -#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */ -#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */ -#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */ -#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ -#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ -#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ -#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ -#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */ +#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */ +#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */ +#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */ +#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */ + +/* CPU types for specific tunings: */ +#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ +#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */ +#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ +#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ +#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ +#define X86_FEATURE_UP ( 3*32+ 9) /* SMP kernel running on UP */ +#define X86_FEATURE_ART ( 3*32+10) /* Always running timer (ART) */ +#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */ +#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */ +#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */ +#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */ +#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */ +#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */ +#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" MFENCE synchronizes RDTSC */ +#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */ +#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ +#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ +#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ +#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* CPU topology enum extensions */ +#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */ +#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */ +#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */ +#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* Extended APICID (8 bits) */ +#define X86_FEATURE_AMD_DCM ( 3*32+27) /* AMD multi-node processor */ +#define X86_FEATURE_APERFMPERF ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */ +#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ +#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */ -/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ -#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */ -#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */ -#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */ -#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" Monitor/Mwait support */ -#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */ -#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */ -#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer mode */ -#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */ -#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */ -#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */ -#define X86_FEATURE_CID ( 4*32+10) /* Context ID */ -#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */ -#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */ -#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B */ -#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */ -#define X86_FEATURE_PDCM ( 4*32+15) /* Performance Capabilities */ -#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */ -#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */ -#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */ -#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */ -#define X86_FEATURE_X2APIC ( 4*32+21) /* x2APIC */ -#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */ -#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */ -#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* Tsc deadline timer */ -#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */ -#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ -#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE enabled in the OS */ -#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */ -#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit fp conversions */ -#define X86_FEATURE_RDRAND ( 4*32+30) /* The RDRAND instruction */ -#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */ +/* Intel-defined CPU features, CPUID level 0x00000001 (ECX), word 4 */ +#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */ +#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */ +#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */ +#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" MONITOR/MWAIT support */ +#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL-qualified (filtered) Debug Store */ +#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */ +#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer Mode eXtensions */ +#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */ +#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */ +#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */ +#define X86_FEATURE_CID ( 4*32+10) /* Context ID */ +#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */ +#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */ +#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B instruction */ +#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */ +#define X86_FEATURE_PDCM ( 4*32+15) /* Perf/Debug Capabilities MSR */ +#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */ +#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */ +#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */ +#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */ +#define X86_FEATURE_X2APIC ( 4*32+21) /* X2APIC */ +#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */ +#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */ +#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* TSC deadline timer */ +#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */ +#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV instructions */ +#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE instruction enabled in the OS */ +#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */ +#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit FP conversions */ +#define X86_FEATURE_RDRAND ( 4*32+30) /* RDRAND instruction */ +#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */ /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ -#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */ -#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */ -#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */ -#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */ -#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */ -#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */ -#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */ -#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */ -#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */ -#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */ +#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */ +#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */ +#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */ +#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */ +#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */ +#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */ +#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */ +#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */ +#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */ +#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */ -/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */ -#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */ -#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */ -#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure virtual machine */ -#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */ -#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */ -#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */ -#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */ -#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */ -#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */ -#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */ -#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */ -#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */ -#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */ -#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */ -#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */ -#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */ -#define X86_FEATURE_TCE ( 6*32+17) /* translation cache extension */ -#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */ -#define X86_FEATURE_TBM ( 6*32+21) /* trailing bit manipulations */ -#define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */ -#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */ -#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ -#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */ -#define X86_FEATURE_PTSC ( 6*32+27) /* performance time-stamp counter */ -#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */ -#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */ +/* More extended AMD flags: CPUID level 0x80000001, ECX, word 6 */ +#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */ +#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */ +#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure Virtual Machine */ +#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */ +#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */ +#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */ +#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */ +#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */ +#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */ +#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */ +#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */ +#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */ +#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */ +#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */ +#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */ +#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */ +#define X86_FEATURE_TCE ( 6*32+17) /* Translation Cache Extension */ +#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */ +#define X86_FEATURE_TBM ( 6*32+21) /* Trailing Bit Manipulations */ +#define X86_FEATURE_TOPOEXT ( 6*32+22) /* Topology extensions CPUID leafs */ +#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* Core performance counter extensions */ +#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ +#define X86_FEATURE_BPEXT ( 6*32+26) /* Data breakpoint extension */ +#define X86_FEATURE_PTSC ( 6*32+27) /* Performance time-stamp counter */ +#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */ +#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX instructions) */ /* * Auxiliary flags: Linux defined - For features scattered in various @@ -187,146 +190,154 @@ * * Reuse free bits when adding new feature flags! */ -#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */ -#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */ -#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ -#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ -#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ -#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ -#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ +#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT instructions */ +#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */ +#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ +#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ +#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ +#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ +#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ -#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ -#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ -#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ +#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ +#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ +#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ -#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ -#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ -#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ -#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ +#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ +#define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */ +#define X86_FEATURE_AVX512_4FMAPS ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */ -#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ +#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ /* Virtualization flags: Linux defined, word 8 */ -#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ -#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ -#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */ -#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */ -#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */ +#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ +#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ +#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */ +#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */ +#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */ -#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */ -#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */ +#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer VMMCALL to VMCALL */ +#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */ -/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ -#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ -#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3b */ -#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */ -#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */ -#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */ -#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */ -#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */ -#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */ -#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */ -#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ -#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */ -#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */ -#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */ -#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ -#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */ -#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */ -#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */ -#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ -#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */ -#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ -#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ -#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ -#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ -#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ -#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */ -#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */ -#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */ +/* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ +#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ +#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3B */ +#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */ +#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */ +#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */ +#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */ +#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */ +#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB instructions */ +#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */ +#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ +#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */ +#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */ +#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */ +#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ +#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */ +#define X86_FEATURE_RDSEED ( 9*32+18) /* RDSEED instruction */ +#define X86_FEATURE_ADX ( 9*32+19) /* ADCX and ADOX instructions */ +#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ +#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */ +#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ +#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ +#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ +#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ +#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ +#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */ +#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */ +#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */ -/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */ -#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */ -#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC */ -#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */ -#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */ +/* Extended state features, CPUID level 0x0000000d:1 (EAX), word 10 */ +#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT instruction */ +#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC instruction */ +#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */ +#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */ -/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */ -#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */ +/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (EDX), word 11 */ +#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */ -/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */ -#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */ -#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */ -#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */ +/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (EDX), word 12 */ +#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring */ +#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */ +#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */ -/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ -#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ -#define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */ +/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ +#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ +#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */ -/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ -#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ -#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */ -#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */ -#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */ -#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */ -#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */ -#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */ -#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */ -#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */ -#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */ +/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ +#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ +#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */ +#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */ +#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */ +#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */ +#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */ +#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */ +#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */ +#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */ +#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */ -/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */ -#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */ -#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */ -#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */ -#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */ -#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */ -#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */ -#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */ -#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */ -#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */ -#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ -#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ -#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ -#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ +/* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */ +#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */ +#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */ +#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */ +#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */ +#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */ +#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */ +#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */ +#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */ +#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */ +#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ +#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ +#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ +#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ -/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */ -#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ -#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ -#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ -#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ -#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ -#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ +/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */ +#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ +#define X86_FEATURE_UMIP (16*32+ 2) /* User Mode Instruction Protection */ +#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ +#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ +#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */ +#define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */ +#define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */ +#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */ +#define X86_FEATURE_AVX512_VNNI (16*32+11) /* Vector Neural Network Instructions */ +#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */ +#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ +#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ +#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ -/* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */ -#define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */ -#define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */ -#define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */ +/* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */ +#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */ +#define X86_FEATURE_SUCCOR (17*32+ 1) /* Uncorrectable error containment and recovery */ +#define X86_FEATURE_SMCA (17*32+ 3) /* Scalable MCA */ /* * BUG word(s) */ -#define X86_BUG(x) (NCAPINTS*32 + (x)) +#define X86_BUG(x) (NCAPINTS*32 + (x)) -#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */ -#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */ -#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */ -#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */ -#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */ -#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */ -#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */ -#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */ -#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */ +#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */ +#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */ +#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */ +#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */ +#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */ +#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */ +#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */ +#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */ +#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */ #ifdef CONFIG_X86_32 /* * 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional * to avoid confusion. */ -#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */ +#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */ #endif -#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */ -#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ -#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ -#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ +#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */ +#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ +#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ +#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ + #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 0a3e808b9123..4011cb03ef08 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -393,7 +393,7 @@ static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit) void update_intr_gate(unsigned int n, const void *addr); void alloc_intr_gate(unsigned int n, const void *addr); -extern unsigned long used_vectors[]; +extern unsigned long system_vectors[]; #ifdef CONFIG_X86_64 DECLARE_PER_CPU(u32, debug_idt_ctr); diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index c10c9128f54e..14d6d5007314 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -16,6 +16,12 @@ # define DISABLE_MPX (1<<(X86_FEATURE_MPX & 31)) #endif +#ifdef CONFIG_X86_INTEL_UMIP +# define DISABLE_UMIP 0 +#else +# define DISABLE_UMIP (1<<(X86_FEATURE_UMIP & 31)) +#endif + #ifdef CONFIG_X86_64 # define DISABLE_VME (1<<(X86_FEATURE_VME & 31)) # define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31)) @@ -63,7 +69,7 @@ #define DISABLED_MASK13 0 #define DISABLED_MASK14 0 #define DISABLED_MASK15 0 -#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57) +#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP) #define DISABLED_MASK17 0 #define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18) diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index c1a125e47ff3..3a091cea36c5 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -253,7 +253,7 @@ extern int force_personality32; * space open for things that want to use the area for 32-bit pointers. */ #define ELF_ET_DYN_BASE (mmap_is_ia32() ? 0x000400000UL : \ - (TASK_SIZE / 3 * 2)) + (DEFAULT_MAP_WINDOW / 3 * 2)) /* This yields a mask that user programs can use to figure out what instruction set this CPU supports. This could be done in user space, diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 8ec99a55e6b9..b80e46733909 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -16,6 +16,8 @@ #include <asm/irq_vectors.h> +#define IRQ_MATRIX_BITS NR_VECTORS + #ifndef __ASSEMBLY__ #include <linux/percpu.h> @@ -123,15 +125,13 @@ struct irq_alloc_info { struct irq_cfg { unsigned int dest_apicid; - u8 vector; - u8 old_vector; + unsigned int vector; }; extern struct irq_cfg *irq_cfg(unsigned int irq); extern struct irq_cfg *irqd_cfg(struct irq_data *irq_data); extern void lock_vector_lock(void); extern void unlock_vector_lock(void); -extern void setup_vector_irq(int cpu); #ifdef CONFIG_SMP extern void send_cleanup_vector(struct irq_cfg *); extern void irq_complete_move(struct irq_cfg *cfg); diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h index 0ead9dbb9130..1b0a5abcd8ae 100644 --- a/arch/x86/include/asm/hypervisor.h +++ b/arch/x86/include/asm/hypervisor.h @@ -23,11 +23,22 @@ #ifdef CONFIG_HYPERVISOR_GUEST #include <asm/kvm_para.h> +#include <asm/x86_init.h> #include <asm/xen/hypervisor.h> /* * x86 hypervisor information */ + +enum x86_hypervisor_type { + X86_HYPER_NATIVE = 0, + X86_HYPER_VMWARE, + X86_HYPER_MS_HYPERV, + X86_HYPER_XEN_PV, + X86_HYPER_XEN_HVM, + X86_HYPER_KVM, +}; + struct hypervisor_x86 { /* Hypervisor name */ const char *name; @@ -35,40 +46,19 @@ struct hypervisor_x86 { /* Detection routine */ uint32_t (*detect)(void); - /* Platform setup (run once per boot) */ - void (*init_platform)(void); - - /* X2APIC detection (run once per boot) */ - bool (*x2apic_available)(void); + /* Hypervisor type */ + enum x86_hypervisor_type type; - /* pin current vcpu to specified physical cpu (run rarely) */ - void (*pin_vcpu)(int); + /* init time callbacks */ + struct x86_hyper_init init; - /* called during init_mem_mapping() to setup early mappings. */ - void (*init_mem_mapping)(void); + /* runtime callbacks */ + struct x86_hyper_runtime runtime; }; -extern const struct hypervisor_x86 *x86_hyper; - -/* Recognized hypervisors */ -extern const struct hypervisor_x86 x86_hyper_vmware; -extern const struct hypervisor_x86 x86_hyper_ms_hyperv; -extern const struct hypervisor_x86 x86_hyper_xen_pv; -extern const struct hypervisor_x86 x86_hyper_xen_hvm; -extern const struct hypervisor_x86 x86_hyper_kvm; - +extern enum x86_hypervisor_type x86_hyper_type; extern void init_hypervisor_platform(void); -extern bool hypervisor_x2apic_available(void); -extern void hypervisor_pin_vcpu(int cpu); - -static inline void hypervisor_init_mem_mapping(void) -{ - if (x86_hyper && x86_hyper->init_mem_mapping) - x86_hyper->init_mem_mapping(); -} #else static inline void init_hypervisor_platform(void) { } -static inline bool hypervisor_x2apic_available(void) { return false; } -static inline void hypervisor_init_mem_mapping(void) { } #endif /* CONFIG_HYPERVISOR_GUEST */ #endif /* _ASM_X86_HYPERVISOR_H */ diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h index 02aff0867211..1c78580e58be 100644 --- a/arch/x86/include/asm/inat.h +++ b/arch/x86/include/asm/inat.h @@ -97,6 +97,16 @@ #define INAT_MAKE_GROUP(grp) ((grp << INAT_GRP_OFFS) | INAT_MODRM) #define INAT_MAKE_IMM(imm) (imm << INAT_IMM_OFFS) +/* Identifiers for segment registers */ +#define INAT_SEG_REG_IGNORE 0 +#define INAT_SEG_REG_DEFAULT 1 +#define INAT_SEG_REG_CS 2 +#define INAT_SEG_REG_SS 3 +#define INAT_SEG_REG_DS 4 +#define INAT_SEG_REG_ES 5 +#define INAT_SEG_REG_FS 6 +#define INAT_SEG_REG_GS 7 + /* Attribute search APIs */ extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode); extern int inat_get_last_prefix_id(insn_byte_t last_pfx); diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h new file mode 100644 index 000000000000..e1d3b4ce8a92 --- /dev/null +++ b/arch/x86/include/asm/insn-eval.h @@ -0,0 +1,23 @@ +#ifndef _ASM_X86_INSN_EVAL_H +#define _ASM_X86_INSN_EVAL_H +/* + * A collection of utility functions for x86 instruction analysis to be + * used in a kernel context. Useful when, for instance, making sense + * of the registers indicated by operands. + */ + +#include <linux/compiler.h> +#include <linux/bug.h> +#include <linux/err.h> +#include <asm/ptrace.h> + +#define INSN_CODE_SEG_ADDR_SZ(params) ((params >> 4) & 0xf) +#define INSN_CODE_SEG_OPND_SZ(params) (params & 0xf) +#define INSN_CODE_SEG_PARAMS(oper_sz, addr_sz) (oper_sz | (addr_sz << 4)) + +void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs); +int insn_get_modrm_rm_off(struct insn *insn, struct pt_regs *regs); +unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx); +char insn_get_code_seg_params(struct pt_regs *regs); + +#endif /* _ASM_X86_INSN_EVAL_H */ diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 11398d55aefa..93ae8aee1780 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -266,6 +266,21 @@ static inline void slow_down_io(void) #endif +#ifdef CONFIG_AMD_MEM_ENCRYPT +#include <linux/jump_label.h> + +extern struct static_key_false sev_enable_key; +static inline bool sev_key_active(void) +{ + return static_branch_unlikely(&sev_enable_key); +} + +#else /* !CONFIG_AMD_MEM_ENCRYPT */ + +static inline bool sev_key_active(void) { return false; } + +#endif /* CONFIG_AMD_MEM_ENCRYPT */ + #define BUILDIO(bwl, bw, type) \ static inline void out##bwl(unsigned type value, int port) \ { \ @@ -296,14 +311,34 @@ static inline unsigned type in##bwl##_p(int port) \ \ static inline void outs##bwl(int port, const void *addr, unsigned long count) \ { \ - asm volatile("rep; outs" #bwl \ - : "+S"(addr), "+c"(count) : "d"(port) : "memory"); \ + if (sev_key_active()) { \ + unsigned type *value = (unsigned type *)addr; \ + while (count) { \ + out##bwl(*value, port); \ + value++; \ + count--; \ + } \ + } else { \ + asm volatile("rep; outs" #bwl \ + : "+S"(addr), "+c"(count) \ + : "d"(port) : "memory"); \ + } \ } \ \ static inline void ins##bwl(int port, void *addr, unsigned long count) \ { \ - asm volatile("rep; ins" #bwl \ - : "+D"(addr), "+c"(count) : "d"(port) : "memory"); \ + if (sev_key_active()) { \ + unsigned type *value = (unsigned type *)addr; \ + while (count) { \ + *value = in##bwl(port); \ + value++; \ + count--; \ + } \ + } else { \ + asm volatile("rep; ins" #bwl \ + : "+D"(addr), "+c"(count) \ + : "d"(port) : "memory"); \ + } \ } BUILDIO(b, b, char) diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 5c27e146a166..a8834dd546cd 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -193,7 +193,6 @@ static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) extern void setup_IO_APIC(void); extern void enable_IO_APIC(void); extern void disable_IO_APIC(void); -extern void setup_ioapic_dest(void); extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin); extern void print_IO_APICs(void); #else /* !CONFIG_X86_IO_APIC */ @@ -233,7 +232,6 @@ static inline void io_apic_init_mappings(void) { } static inline void setup_IO_APIC(void) { } static inline void enable_IO_APIC(void) { } -static inline void setup_ioapic_dest(void) { } #endif diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index d8632f8fa17d..2395bb794c7b 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -26,11 +26,7 @@ extern void irq_ctx_init(int cpu); struct irq_desc; -#ifdef CONFIG_HOTPLUG_CPU -#include <linux/cpumask.h> -extern int check_irq_vectors_for_cpu_disable(void); extern void fixup_irqs(void); -#endif #ifdef CONFIG_HAVE_KVM extern void kvm_set_posted_intr_wakeup_handler(void (*handler)(void)); diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index c20ffca8fef1..67421f649cfa 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -102,12 +102,8 @@ #define POSTED_INTR_NESTED_VECTOR 0xf0 #endif -/* - * Local APIC timer IRQ vector is on a different priority level, - * to work around the 'lost local interrupt if more than 2 IRQ - * sources per level' errata. - */ -#define LOCAL_TIMER_VECTOR 0xef +#define MANAGED_IRQ_SHUTDOWN_VECTOR 0xef +#define LOCAL_TIMER_VECTOR 0xee #define NR_VECTORS 256 diff --git a/arch/x86/include/asm/irqdomain.h b/arch/x86/include/asm/irqdomain.h index 423e112c1e8f..f695cc6b8e1f 100644 --- a/arch/x86/include/asm/irqdomain.h +++ b/arch/x86/include/asm/irqdomain.h @@ -9,6 +9,7 @@ enum { /* Allocate contiguous CPU vectors */ X86_IRQ_ALLOC_CONTIGUOUS_VECTORS = 0x1, + X86_IRQ_ALLOC_LEGACY = 0x2, }; extern struct irq_domain *x86_vector_domain; @@ -42,8 +43,8 @@ extern int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs, void *arg); extern void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs); -extern void mp_irqdomain_activate(struct irq_domain *domain, - struct irq_data *irq_data); +extern int mp_irqdomain_activate(struct irq_domain *domain, + struct irq_data *irq_data, bool early); extern void mp_irqdomain_deactivate(struct irq_domain *domain, struct irq_data *irq_data); extern int mp_irqdomain_ioapic_idx(struct irq_domain *domain); diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 6cf65437b5e5..9f2e3102e0bb 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -58,8 +58,8 @@ extern __visible kprobe_opcode_t optprobe_template_call[]; extern __visible kprobe_opcode_t optprobe_template_end[]; #define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + RELATIVE_ADDR_SIZE) #define MAX_OPTINSN_SIZE \ - (((unsigned long)&optprobe_template_end - \ - (unsigned long)&optprobe_template_entry) + \ + (((unsigned long)optprobe_template_end - \ + (unsigned long)optprobe_template_entry) + \ MAX_OPTIMIZED_LENGTH + RELATIVEJUMP_SIZE) extern const int kretprobe_blacklist_size; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c73e493adf07..9d7d856b2d89 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1419,7 +1419,7 @@ static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {} static inline int kvm_cpu_get_apicid(int mps_cpu) { #ifdef CONFIG_X86_LOCAL_APIC - return __default_cpu_present_to_apicid(mps_cpu); + return default_cpu_present_to_apicid(mps_cpu); #else WARN_ON_ONCE(1); return BAD_APICID; diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index c373e44049b1..7b407dda2bd7 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -88,7 +88,6 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, #ifdef CONFIG_KVM_GUEST bool kvm_para_available(void); unsigned int kvm_arch_para_features(void); -void __init kvm_guest_init(void); void kvm_async_pf_task_wait(u32 token, int interrupt_kernel); void kvm_async_pf_task_wake(u32 token); u32 kvm_read_and_reset_pf_reason(void); @@ -103,7 +102,6 @@ static inline void kvm_spinlock_init(void) #endif /* CONFIG_PARAVIRT_SPINLOCKS */ #else /* CONFIG_KVM_GUEST */ -#define kvm_guest_init() do {} while (0) #define kvm_async_pf_task_wait(T, I) do {} while(0) #define kvm_async_pf_task_wake(T) do {} while(0) diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index 6a77c63540f7..c9459a4c3c68 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -42,11 +42,17 @@ void __init sme_early_init(void); void __init sme_encrypt_kernel(void); void __init sme_enable(struct boot_params *bp); +int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size); +int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size); + /* Architecture __weak replacement functions */ void __init mem_encrypt_init(void); void swiotlb_set_mem_attributes(void *vaddr, unsigned long size); +bool sme_active(void); +bool sev_active(void); + #else /* !CONFIG_AMD_MEM_ENCRYPT */ #define sme_me_mask 0ULL @@ -64,6 +70,14 @@ static inline void __init sme_early_init(void) { } static inline void __init sme_encrypt_kernel(void) { } static inline void __init sme_enable(struct boot_params *bp) { } +static inline bool sme_active(void) { return false; } +static inline bool sev_active(void) { return false; } + +static inline int __init +early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0; } +static inline int __init +early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; } + #endif /* CONFIG_AMD_MEM_ENCRYPT */ /* diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 6699fc441644..6d16d15d09a0 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -73,8 +73,8 @@ static inline void load_mm_ldt(struct mm_struct *mm) #ifdef CONFIG_MODIFY_LDT_SYSCALL struct ldt_struct *ldt; - /* lockless_dereference synchronizes with smp_store_release */ - ldt = lockless_dereference(mm->context.ldt); + /* READ_ONCE synchronizes with smp_store_release */ + ldt = READ_ONCE(mm->context.ldt); /* * Any change to mm->context.ldt is followed by an IPI to all diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h index 8546fafa21a9..7948a17febb4 100644 --- a/arch/x86/include/asm/module.h +++ b/arch/x86/include/asm/module.h @@ -6,7 +6,7 @@ #include <asm/orc_types.h> struct mod_arch_specific { -#ifdef CONFIG_ORC_UNWINDER +#ifdef CONFIG_UNWINDER_ORC unsigned int num_orcs; int *orc_unwind_ip; struct orc_entry *orc_unwind; diff --git a/arch/x86/include/asm/mpspec_def.h b/arch/x86/include/asm/mpspec_def.h index 9492893aec52..a6bec8028480 100644 --- a/arch/x86/include/asm/mpspec_def.h +++ b/arch/x86/include/asm/mpspec_def.h @@ -59,7 +59,7 @@ struct mpc_table { #define MP_TRANSLATION 192 #define CPU_ENABLED 1 /* Processor is available */ -#define CPU_BOOTPROCESSOR 2 /* Processor is the BP */ +#define CPU_BOOTPROCESSOR 2 /* Processor is the boot CPU */ #define CPU_STEPPING_MASK 0x000F #define CPU_MODEL_MASK 0x00F0 diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index ab022618a50a..34c4922bbc3f 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -324,6 +324,9 @@ #define MSR_AMD64_IBSBRTARGET 0xc001103b #define MSR_AMD64_IBSOPDATA4 0xc001103d #define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ +#define MSR_AMD64_SEV 0xc0010131 +#define MSR_AMD64_SEV_ENABLED_BIT 0 +#define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT) /* Fam 17h MSRs */ #define MSR_F17H_IRPERF 0xc00000e9 diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index fd81228e8037..283efcaac8af 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -16,10 +16,9 @@ #include <linux/cpumask.h> #include <asm/frame.h> -static inline void load_sp0(struct tss_struct *tss, - struct thread_struct *thread) +static inline void load_sp0(unsigned long sp0) { - PVOP_VCALL2(pv_cpu_ops.load_sp0, tss, thread); + PVOP_VCALL1(pv_cpu_ops.load_sp0, sp0); } /* The paravirtualized CPUID instruction. */ diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 10cc3b9709fe..6ec54d01972d 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -134,7 +134,7 @@ struct pv_cpu_ops { void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries); void (*free_ldt)(struct desc_struct *ldt, unsigned entries); - void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t); + void (*load_sp0)(unsigned long sp0); void (*set_iopl_mask)(unsigned mask); diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 377f1ffd18be..ba3c523aaf16 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -526,7 +526,7 @@ static inline bool x86_this_cpu_variable_test_bit(int nr, { bool oldbit; - asm volatile("bt "__percpu_arg(2)",%1\n\t" + asm volatile("bt "__percpu_arg(2)",%1" CC_SET(c) : CC_OUT(c) (oldbit) : "m" (*(unsigned long __percpu *)addr), "Ir" (nr)); diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 59df7b47a434..9e9b05fc4860 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -200,10 +200,9 @@ enum page_cache_mode { #define _PAGE_ENC (_AT(pteval_t, sme_me_mask)) -#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ - _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_ENC) #define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \ _PAGE_DIRTY | _PAGE_ENC) +#define _PAGE_TABLE (_KERNPG_TABLE | _PAGE_USER) #define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _PAGE_ENC) #define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _PAGE_ENC) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index bdac19ab2488..2db7cf720b04 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -431,7 +431,9 @@ typedef struct { struct thread_struct { /* Cached TLS descriptors: */ struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; +#ifdef CONFIG_X86_32 unsigned long sp0; +#endif unsigned long sp; #ifdef CONFIG_X86_32 unsigned long sysenter_cs; @@ -518,16 +520,9 @@ static inline void native_set_iopl_mask(unsigned mask) } static inline void -native_load_sp0(struct tss_struct *tss, struct thread_struct *thread) +native_load_sp0(unsigned long sp0) { - tss->x86_tss.sp0 = thread->sp0; -#ifdef CONFIG_X86_32 - /* Only happens when SEP is enabled, no need to test "SEP"arately: */ - if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) { - tss->x86_tss.ss1 = thread->sysenter_cs; - wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); - } -#endif + this_cpu_write(cpu_tss.x86_tss.sp0, sp0); } static inline void native_swapgs(void) @@ -547,15 +542,20 @@ static inline unsigned long current_top_of_stack(void) #endif } +static inline bool on_thread_stack(void) +{ + return (unsigned long)(current_top_of_stack() - + current_stack_pointer) < THREAD_SIZE; +} + #ifdef CONFIG_PARAVIRT #include <asm/paravirt.h> #else #define __cpuid native_cpuid -static inline void load_sp0(struct tss_struct *tss, - struct thread_struct *thread) +static inline void load_sp0(unsigned long sp0) { - native_load_sp0(tss, thread); + native_load_sp0(sp0); } #define set_iopl_mask native_set_iopl_mask @@ -804,6 +804,15 @@ static inline void spin_lock_prefetch(const void *x) #define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \ TOP_OF_KERNEL_STACK_PADDING) +#define task_top_of_stack(task) ((unsigned long)(task_pt_regs(task) + 1)) + +#define task_pt_regs(task) \ +({ \ + unsigned long __ptr = (unsigned long)task_stack_page(task); \ + __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \ + ((struct pt_regs *)__ptr) - 1; \ +}) + #ifdef CONFIG_X86_32 /* * User space process size: 3GB (default). @@ -823,23 +832,6 @@ static inline void spin_lock_prefetch(const void *x) .addr_limit = KERNEL_DS, \ } -/* - * TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack. - * This is necessary to guarantee that the entire "struct pt_regs" - * is accessible even if the CPU haven't stored the SS/ESP registers - * on the stack (interrupt gate does not save these registers - * when switching to the same priv ring). - * Therefore beware: accessing the ss/esp fields of the - * "struct pt_regs" is possible, but they may contain the - * completely wrong values. - */ -#define task_pt_regs(task) \ -({ \ - unsigned long __ptr = (unsigned long)task_stack_page(task); \ - __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \ - ((struct pt_regs *)__ptr) - 1; \ -}) - #define KSTK_ESP(task) (task_pt_regs(task)->sp) #else @@ -873,11 +865,9 @@ static inline void spin_lock_prefetch(const void *x) #define STACK_TOP_MAX TASK_SIZE_MAX #define INIT_THREAD { \ - .sp0 = TOP_OF_INIT_STACK, \ .addr_limit = KERNEL_DS, \ } -#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1) extern unsigned long KSTK_ESP(struct task_struct *task); #endif /* CONFIG_X86_64 */ diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index c0e3c45cf6ab..14131dd06b29 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -136,9 +136,9 @@ static inline int v8086_mode(struct pt_regs *regs) #endif } -#ifdef CONFIG_X86_64 static inline bool user_64bit_mode(struct pt_regs *regs) { +#ifdef CONFIG_X86_64 #ifndef CONFIG_PARAVIRT /* * On non-paravirt systems, this is the only long mode CPL 3 @@ -149,8 +149,12 @@ static inline bool user_64bit_mode(struct pt_regs *regs) /* Headers are too twisted for this to go in paravirt.h. */ return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs; #endif +#else /* !CONFIG_X86_64 */ + return false; +#endif } +#ifdef CONFIG_X86_64 #define current_user_stack_pointer() current_pt_regs()->sp #define compat_user_stack_pointer() current_pt_regs()->sp #endif diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index 9982dd96f093..5e16b5d40d32 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -2,6 +2,7 @@ #ifndef _ASM_X86_QSPINLOCK_H #define _ASM_X86_QSPINLOCK_H +#include <linux/jump_label.h> #include <asm/cpufeature.h> #include <asm-generic/qspinlock_types.h> #include <asm/paravirt.h> @@ -47,10 +48,14 @@ static inline void queued_spin_unlock(struct qspinlock *lock) #endif #ifdef CONFIG_PARAVIRT +DECLARE_STATIC_KEY_TRUE(virt_spin_lock_key); + +void native_pv_lock_init(void) __init; + #define virt_spin_lock virt_spin_lock static inline bool virt_spin_lock(struct qspinlock *lock) { - if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) + if (!static_branch_likely(&virt_spin_lock_key)) return false; /* @@ -66,6 +71,10 @@ static inline bool virt_spin_lock(struct qspinlock *lock) return true; } +#else +static inline void native_pv_lock_init(void) +{ +} #endif /* CONFIG_PARAVIRT */ #include <asm-generic/qspinlock.h> diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h index ff871210b9f2..4e44250e7d0d 100644 --- a/arch/x86/include/asm/refcount.h +++ b/arch/x86/include/asm/refcount.h @@ -15,7 +15,7 @@ * back to the regular execution flow in .text. */ #define _REFCOUNT_EXCEPTION \ - ".pushsection .text.unlikely\n" \ + ".pushsection .text..refcount\n" \ "111:\tlea %[counter], %%" _ASM_CX "\n" \ "112:\t" ASM_UD0 "\n" \ ASM_UNREACHABLE \ diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h index d8f3a6ae9f6c..f91c365e57c3 100644 --- a/arch/x86/include/asm/rmwcc.h +++ b/arch/x86/include/asm/rmwcc.h @@ -29,7 +29,7 @@ cc_label: \ #define __GEN_RMWcc(fullop, var, cc, clobbers, ...) \ do { \ bool c; \ - asm volatile (fullop ";" CC_SET(cc) \ + asm volatile (fullop CC_SET(cc) \ : [counter] "+m" (var), CC_OUT(cc) (c) \ : __VA_ARGS__ : clobbers); \ return c; \ diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h index 4d38d85a16ad..4c25cf6caefa 100644 --- a/arch/x86/include/asm/rwsem.h +++ b/arch/x86/include/asm/rwsem.h @@ -61,18 +61,33 @@ /* * lock for reading */ +#define ____down_read(sem, slow_path) \ +({ \ + struct rw_semaphore* ret; \ + asm volatile("# beginning down_read\n\t" \ + LOCK_PREFIX _ASM_INC "(%[sem])\n\t" \ + /* adds 0x00000001 */ \ + " jns 1f\n" \ + " call " slow_path "\n" \ + "1:\n\t" \ + "# ending down_read\n\t" \ + : "+m" (sem->count), "=a" (ret), \ + ASM_CALL_CONSTRAINT \ + : [sem] "a" (sem) \ + : "memory", "cc"); \ + ret; \ +}) + static inline void __down_read(struct rw_semaphore *sem) { - asm volatile("# beginning down_read\n\t" - LOCK_PREFIX _ASM_INC "(%1)\n\t" - /* adds 0x00000001 */ - " jns 1f\n" - " call call_rwsem_down_read_failed\n" - "1:\n\t" - "# ending down_read\n\t" - : "+m" (sem->count) - : "a" (sem) - : "memory", "cc"); + ____down_read(sem, "call_rwsem_down_read_failed"); +} + +static inline int __down_read_killable(struct rw_semaphore *sem) +{ + if (IS_ERR(____down_read(sem, "call_rwsem_down_read_failed_killable"))) + return -EINTR; + return 0; } /* @@ -82,17 +97,18 @@ static inline bool __down_read_trylock(struct rw_semaphore *sem) { long result, tmp; asm volatile("# beginning __down_read_trylock\n\t" - " mov %0,%1\n\t" + " mov %[count],%[result]\n\t" "1:\n\t" - " mov %1,%2\n\t" - " add %3,%2\n\t" + " mov %[result],%[tmp]\n\t" + " add %[inc],%[tmp]\n\t" " jle 2f\n\t" - LOCK_PREFIX " cmpxchg %2,%0\n\t" + LOCK_PREFIX " cmpxchg %[tmp],%[count]\n\t" " jnz 1b\n\t" "2:\n\t" "# ending __down_read_trylock\n\t" - : "+m" (sem->count), "=&a" (result), "=&r" (tmp) - : "i" (RWSEM_ACTIVE_READ_BIAS) + : [count] "+m" (sem->count), [result] "=&a" (result), + [tmp] "=&r" (tmp) + : [inc] "i" (RWSEM_ACTIVE_READ_BIAS) : "memory", "cc"); return result >= 0; } @@ -106,7 +122,7 @@ static inline bool __down_read_trylock(struct rw_semaphore *sem) struct rw_semaphore* ret; \ \ asm volatile("# beginning down_write\n\t" \ - LOCK_PREFIX " xadd %1,(%4)\n\t" \ + LOCK_PREFIX " xadd %[tmp],(%[sem])\n\t" \ /* adds 0xffff0001, returns the old value */ \ " test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t" \ /* was the active mask 0 before? */\ @@ -114,9 +130,9 @@ static inline bool __down_read_trylock(struct rw_semaphore *sem) " call " slow_path "\n" \ "1:\n" \ "# ending down_write" \ - : "+m" (sem->count), "=d" (tmp), \ + : "+m" (sem->count), [tmp] "=d" (tmp), \ "=a" (ret), ASM_CALL_CONSTRAINT \ - : "a" (sem), "1" (RWSEM_ACTIVE_WRITE_BIAS) \ + : [sem] "a" (sem), "[tmp]" (RWSEM_ACTIVE_WRITE_BIAS) \ : "memory", "cc"); \ ret; \ }) @@ -142,21 +158,21 @@ static inline bool __down_write_trylock(struct rw_semaphore *sem) bool result; long tmp0, tmp1; asm volatile("# beginning __down_write_trylock\n\t" - " mov %0,%1\n\t" + " mov %[count],%[tmp0]\n\t" "1:\n\t" " test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t" /* was the active mask 0 before? */ " jnz 2f\n\t" - " mov %1,%2\n\t" - " add %4,%2\n\t" - LOCK_PREFIX " cmpxchg %2,%0\n\t" + " mov %[tmp0],%[tmp1]\n\t" + " add %[inc],%[tmp1]\n\t" + LOCK_PREFIX " cmpxchg %[tmp1],%[count]\n\t" " jnz 1b\n\t" "2:\n\t" CC_SET(e) "# ending __down_write_trylock\n\t" - : "+m" (sem->count), "=&a" (tmp0), "=&r" (tmp1), - CC_OUT(e) (result) - : "er" (RWSEM_ACTIVE_WRITE_BIAS) + : [count] "+m" (sem->count), [tmp0] "=&a" (tmp0), + [tmp1] "=&r" (tmp1), CC_OUT(e) (result) + : [inc] "er" (RWSEM_ACTIVE_WRITE_BIAS) : "memory"); return result; } @@ -168,14 +184,14 @@ static inline void __up_read(struct rw_semaphore *sem) { long tmp; asm volatile("# beginning __up_read\n\t" - LOCK_PREFIX " xadd %1,(%2)\n\t" + LOCK_PREFIX " xadd %[tmp],(%[sem])\n\t" /* subtracts 1, returns the old value */ " jns 1f\n\t" " call call_rwsem_wake\n" /* expects old value in %edx */ "1:\n" "# ending __up_read\n" - : "+m" (sem->count), "=d" (tmp) - : "a" (sem), "1" (-RWSEM_ACTIVE_READ_BIAS) + : "+m" (sem->count), [tmp] "=d" (tmp) + : [sem] "a" (sem), "[tmp]" (-RWSEM_ACTIVE_READ_BIAS) : "memory", "cc"); } @@ -186,14 +202,14 @@ static inline void __up_write(struct rw_semaphore *sem) { long tmp; asm volatile("# beginning __up_write\n\t" - LOCK_PREFIX " xadd %1,(%2)\n\t" + LOCK_PREFIX " xadd %[tmp],(%[sem])\n\t" /* subtracts 0xffff0001, returns the old value */ " jns 1f\n\t" " call call_rwsem_wake\n" /* expects old value in %edx */ "1:\n\t" "# ending __up_write\n" - : "+m" (sem->count), "=d" (tmp) - : "a" (sem), "1" (-RWSEM_ACTIVE_WRITE_BIAS) + : "+m" (sem->count), [tmp] "=d" (tmp) + : [sem] "a" (sem), "[tmp]" (-RWSEM_ACTIVE_WRITE_BIAS) : "memory", "cc"); } @@ -203,7 +219,7 @@ static inline void __up_write(struct rw_semaphore *sem) static inline void __downgrade_write(struct rw_semaphore *sem) { asm volatile("# beginning __downgrade_write\n\t" - LOCK_PREFIX _ASM_ADD "%2,(%1)\n\t" + LOCK_PREFIX _ASM_ADD "%[inc],(%[sem])\n\t" /* * transitions 0xZZZZ0001 -> 0xYYYY0001 (i386) * 0xZZZZZZZZ00000001 -> 0xYYYYYYYY00000001 (x86_64) @@ -213,7 +229,7 @@ static inline void __downgrade_write(struct rw_semaphore *sem) "1:\n\t" "# ending __downgrade_write\n" : "+m" (sem->count) - : "a" (sem), "er" (-RWSEM_WAITING_BIAS) + : [sem] "a" (sem), [inc] "er" (-RWSEM_WAITING_BIAS) : "memory", "cc"); } diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index b34625796eb2..5b6bc7016c22 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -42,11 +42,4 @@ #include <asm/qrwlock.h> -#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) -#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) - -#define arch_spin_relax(lock) cpu_relax() -#define arch_read_relax(lock) cpu_relax() -#define arch_write_relax(lock) cpu_relax() - #endif /* _ASM_X86_SPINLOCK_H */ diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 899084b70412..8c6bd6863db9 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_SWITCH_TO_H #define _ASM_X86_SWITCH_TO_H +#include <linux/sched/task_stack.h> + struct task_struct; /* one of the stranger aspects of C forward declarations */ struct task_struct *__switch_to_asm(struct task_struct *prev, @@ -73,4 +75,26 @@ do { \ ((last) = __switch_to_asm((prev), (next))); \ } while (0) +#ifdef CONFIG_X86_32 +static inline void refresh_sysenter_cs(struct thread_struct *thread) +{ + /* Only happens when SEP is enabled, no need to test "SEP"arately: */ + if (unlikely(this_cpu_read(cpu_tss.x86_tss.ss1) == thread->sysenter_cs)) + return; + + this_cpu_write(cpu_tss.x86_tss.ss1, thread->sysenter_cs); + wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); +} +#endif + +/* This is used when switching tasks or entering/exiting vm86 mode. */ +static inline void update_sp0(struct task_struct *task) +{ +#ifdef CONFIG_X86_32 + load_sp0(task->thread.sp0); +#else + load_sp0(task_top_of_stack(task)); +#endif +} + #endif /* _ASM_X86_SWITCH_TO_H */ diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 91dfcafe27a6..bad25bb80679 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -21,7 +21,7 @@ asmlinkage long sys_ioperm(unsigned long, unsigned long, int); asmlinkage long sys_iopl(unsigned int); /* kernel/ldt.c */ -asmlinkage int sys_modify_ldt(int, void __user *, unsigned long); +asmlinkage long sys_modify_ldt(int, void __user *, unsigned long); /* kernel/signal.c */ asmlinkage long sys_rt_sigreturn(void); diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index 47457ab975fd..7365dd4acffb 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h @@ -9,7 +9,7 @@ #define TICK_SIZE (tick_nsec / 1000) unsigned long long native_sched_clock(void); -extern int recalibrate_cpu_khz(void); +extern void recalibrate_cpu_khz(void); extern int no_timer_check; diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h index fa60398bbc3a..069c04be1507 100644 --- a/arch/x86/include/asm/trace/fpu.h +++ b/arch/x86/include/asm/trace/fpu.h @@ -34,11 +34,6 @@ DECLARE_EVENT_CLASS(x86_fpu, ) ); -DEFINE_EVENT(x86_fpu, x86_fpu_state, - TP_PROTO(struct fpu *fpu), - TP_ARGS(fpu) -); - DEFINE_EVENT(x86_fpu, x86_fpu_before_save, TP_PROTO(struct fpu *fpu), TP_ARGS(fpu) @@ -74,11 +69,6 @@ DEFINE_EVENT(x86_fpu, x86_fpu_activate_state, TP_ARGS(fpu) ); -DEFINE_EVENT(x86_fpu, x86_fpu_deactivate_state, - TP_PROTO(struct fpu *fpu), - TP_ARGS(fpu) -); - DEFINE_EVENT(x86_fpu, x86_fpu_init_state, TP_PROTO(struct fpu *fpu), TP_ARGS(fpu) diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h index 8eb139ed1a03..84b9ec0c1bc0 100644 --- a/arch/x86/include/asm/trace/irq_vectors.h +++ b/arch/x86/include/asm/trace/irq_vectors.h @@ -138,6 +138,254 @@ DEFINE_IRQ_VECTOR_EVENT(deferred_error_apic); DEFINE_IRQ_VECTOR_EVENT(thermal_apic); #endif +TRACE_EVENT(vector_config, + + TP_PROTO(unsigned int irq, unsigned int vector, + unsigned int cpu, unsigned int apicdest), + + TP_ARGS(irq, vector, cpu, apicdest), + + TP_STRUCT__entry( + __field( unsigned int, irq ) + __field( unsigned int, vector ) + __field( unsigned int, cpu ) + __field( unsigned int, apicdest ) + ), + + TP_fast_assign( + __entry->irq = irq; + __entry->vector = vector; + __entry->cpu = cpu; + __entry->apicdest = apicdest; + ), + + TP_printk("irq=%u vector=%u cpu=%u apicdest=0x%08x", + __entry->irq, __entry->vector, __entry->cpu, + __entry->apicdest) +); + +DECLARE_EVENT_CLASS(vector_mod, + + TP_PROTO(unsigned int irq, unsigned int vector, + unsigned int cpu, unsigned int prev_vector, + unsigned int prev_cpu), + + TP_ARGS(irq, vector, cpu, prev_vector, prev_cpu), + + TP_STRUCT__entry( + __field( unsigned int, irq ) + __field( unsigned int, vector ) + __field( unsigned int, cpu ) + __field( unsigned int, prev_vector ) + __field( unsigned int, prev_cpu ) + ), + + TP_fast_assign( + __entry->irq = irq; + __entry->vector = vector; + __entry->cpu = cpu; + __entry->prev_vector = prev_vector; + __entry->prev_cpu = prev_cpu; + + ), + + TP_printk("irq=%u vector=%u cpu=%u prev_vector=%u prev_cpu=%u", + __entry->irq, __entry->vector, __entry->cpu, + __entry->prev_vector, __entry->prev_cpu) +); + +#define DEFINE_IRQ_VECTOR_MOD_EVENT(name) \ +DEFINE_EVENT_FN(vector_mod, name, \ + TP_PROTO(unsigned int irq, unsigned int vector, \ + unsigned int cpu, unsigned int prev_vector, \ + unsigned int prev_cpu), \ + TP_ARGS(irq, vector, cpu, prev_vector, prev_cpu), NULL, NULL); \ + +DEFINE_IRQ_VECTOR_MOD_EVENT(vector_update); +DEFINE_IRQ_VECTOR_MOD_EVENT(vector_clear); + +DECLARE_EVENT_CLASS(vector_reserve, + + TP_PROTO(unsigned int irq, int ret), + + TP_ARGS(irq, ret), + + TP_STRUCT__entry( + __field( unsigned int, irq ) + __field( int, ret ) + ), + + TP_fast_assign( + __entry->irq = irq; + __entry->ret = ret; + ), + + TP_printk("irq=%u ret=%d", __entry->irq, __entry->ret) +); + +#define DEFINE_IRQ_VECTOR_RESERVE_EVENT(name) \ +DEFINE_EVENT_FN(vector_reserve, name, \ + TP_PROTO(unsigned int irq, int ret), \ + TP_ARGS(irq, ret), NULL, NULL); \ + +DEFINE_IRQ_VECTOR_RESERVE_EVENT(vector_reserve_managed); +DEFINE_IRQ_VECTOR_RESERVE_EVENT(vector_reserve); + +TRACE_EVENT(vector_alloc, + + TP_PROTO(unsigned int irq, unsigned int vector, bool reserved, + int ret), + + TP_ARGS(irq, vector, ret, reserved), + + TP_STRUCT__entry( + __field( unsigned int, irq ) + __field( unsigned int, vector ) + __field( bool, reserved ) + __field( int, ret ) + ), + + TP_fast_assign( + __entry->irq = irq; + __entry->vector = ret < 0 ? 0 : vector; + __entry->reserved = reserved; + __entry->ret = ret > 0 ? 0 : ret; + ), + + TP_printk("irq=%u vector=%u reserved=%d ret=%d", + __entry->irq, __entry->vector, + __entry->reserved, __entry->ret) +); + +TRACE_EVENT(vector_alloc_managed, + + TP_PROTO(unsigned int irq, unsigned int vector, + int ret), + + TP_ARGS(irq, vector, ret), + + TP_STRUCT__entry( + __field( unsigned int, irq ) + __field( unsigned int, vector ) + __field( int, ret ) + ), + + TP_fast_assign( + __entry->irq = irq; + __entry->vector = ret < 0 ? 0 : vector; + __entry->ret = ret > 0 ? 0 : ret; + ), + + TP_printk("irq=%u vector=%u ret=%d", + __entry->irq, __entry->vector, __entry->ret) +); + +DECLARE_EVENT_CLASS(vector_activate, + + TP_PROTO(unsigned int irq, bool is_managed, bool can_reserve, + bool early), + + TP_ARGS(irq, is_managed, can_reserve, early), + + TP_STRUCT__entry( + __field( unsigned int, irq ) + __field( bool, is_managed ) + __field( bool, can_reserve ) + __field( bool, early ) + ), + + TP_fast_assign( + __entry->irq = irq; + __entry->is_managed = is_managed; + __entry->can_reserve = can_reserve; + __entry->early = early; + ), + + TP_printk("irq=%u is_managed=%d can_reserve=%d early=%d", + __entry->irq, __entry->is_managed, __entry->can_reserve, + __entry->early) +); + +#define DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(name) \ +DEFINE_EVENT_FN(vector_activate, name, \ + TP_PROTO(unsigned int irq, bool is_managed, \ + bool can_reserve, bool early), \ + TP_ARGS(irq, is_managed, can_reserve, early), NULL, NULL); \ + +DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_activate); +DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_deactivate); + +TRACE_EVENT(vector_teardown, + + TP_PROTO(unsigned int irq, bool is_managed, bool has_reserved), + + TP_ARGS(irq, is_managed, has_reserved), + + TP_STRUCT__entry( + __field( unsigned int, irq ) + __field( bool, is_managed ) + __field( bool, has_reserved ) + ), + + TP_fast_assign( + __entry->irq = irq; + __entry->is_managed = is_managed; + __entry->has_reserved = has_reserved; + ), + + TP_printk("irq=%u is_managed=%d has_reserved=%d", + __entry->irq, __entry->is_managed, __entry->has_reserved) +); + +TRACE_EVENT(vector_setup, + + TP_PROTO(unsigned int irq, bool is_legacy, int ret), + + TP_ARGS(irq, is_legacy, ret), + + TP_STRUCT__entry( + __field( unsigned int, irq ) + __field( bool, is_legacy ) + __field( int, ret ) + ), + + TP_fast_assign( + __entry->irq = irq; + __entry->is_legacy = is_legacy; + __entry->ret = ret; + ), + + TP_printk("irq=%u is_legacy=%d ret=%d", + __entry->irq, __entry->is_legacy, __entry->ret) +); + +TRACE_EVENT(vector_free_moved, + + TP_PROTO(unsigned int irq, unsigned int cpu, unsigned int vector, + bool is_managed), + + TP_ARGS(irq, cpu, vector, is_managed), + + TP_STRUCT__entry( + __field( unsigned int, irq ) + __field( unsigned int, cpu ) + __field( unsigned int, vector ) + __field( bool, is_managed ) + ), + + TP_fast_assign( + __entry->irq = irq; + __entry->cpu = cpu; + __entry->vector = vector; + __entry->is_managed = is_managed; + ), + + TP_printk("irq=%u cpu=%u vector=%u is_managed=%d", + __entry->irq, __entry->cpu, __entry->vector, + __entry->is_managed) +); + + #endif /* CONFIG_X86_LOCAL_APIC */ #undef TRACE_INCLUDE_PATH diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index b0cced97a6ce..1fadd310ff68 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -38,9 +38,9 @@ asmlinkage void simd_coprocessor_error(void); #if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV) asmlinkage void xen_divide_error(void); +asmlinkage void xen_xennmi(void); asmlinkage void xen_xendebug(void); asmlinkage void xen_xenint3(void); -asmlinkage void xen_nmi(void); asmlinkage void xen_overflow(void); asmlinkage void xen_bounds(void); asmlinkage void xen_invalid_op(void); @@ -145,4 +145,22 @@ enum { X86_TRAP_IRET = 32, /* 32, IRET Exception */ }; +/* + * Page fault error code bits: + * + * bit 0 == 0: no page found 1: protection fault + * bit 1 == 0: read access 1: write access + * bit 2 == 0: kernel-mode access 1: user-mode access + * bit 3 == 1: use of reserved bit detected + * bit 4 == 1: fault was an instruction fetch + * bit 5 == 1: protection keys block access + */ +enum x86_pf_error_code { + X86_PF_PROT = 1 << 0, + X86_PF_WRITE = 1 << 1, + X86_PF_USER = 1 << 2, + X86_PF_RSVD = 1 << 3, + X86_PF_INSTR = 1 << 4, + X86_PF_PK = 1 << 5, +}; #endif /* _ASM_X86_TRAPS_H */ diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 8da0efb13544..cf5d53c3f9ea 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -32,15 +32,22 @@ static inline cycles_t get_cycles(void) extern struct system_counterval_t convert_art_to_tsc(u64 art); +extern void tsc_early_delay_calibrate(void); extern void tsc_init(void); extern void mark_tsc_unstable(char *reason); extern int unsynchronized_tsc(void); extern int check_tsc_unstable(void); +extern void mark_tsc_async_resets(char *reason); extern unsigned long native_calibrate_cpu(void); extern unsigned long native_calibrate_tsc(void); extern unsigned long long native_sched_clock_from_tsc(u64 tsc); extern int tsc_clocksource_reliable; +#ifdef CONFIG_X86_TSC +extern bool tsc_async_resets; +#else +# define tsc_async_resets false +#endif /* * Boot-time check whether the TSCs are synchronized across diff --git a/arch/x86/include/asm/umip.h b/arch/x86/include/asm/umip.h new file mode 100644 index 000000000000..db43f2a0d92c --- /dev/null +++ b/arch/x86/include/asm/umip.h @@ -0,0 +1,12 @@ +#ifndef _ASM_X86_UMIP_H +#define _ASM_X86_UMIP_H + +#include <linux/types.h> +#include <asm/ptrace.h> + +#ifdef CONFIG_X86_INTEL_UMIP +bool fixup_umip_exception(struct pt_regs *regs); +#else +static inline bool fixup_umip_exception(struct pt_regs *regs) { return false; } +#endif /* CONFIG_X86_INTEL_UMIP */ +#endif /* _ASM_X86_UMIP_H */ diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h index 87adc0d38c4a..e9cc6fe1fc6f 100644 --- a/arch/x86/include/asm/unwind.h +++ b/arch/x86/include/asm/unwind.h @@ -13,11 +13,11 @@ struct unwind_state { struct task_struct *task; int graph_idx; bool error; -#if defined(CONFIG_ORC_UNWINDER) +#if defined(CONFIG_UNWINDER_ORC) bool signal, full_regs; unsigned long sp, bp, ip; struct pt_regs *regs; -#elif defined(CONFIG_FRAME_POINTER_UNWINDER) +#elif defined(CONFIG_UNWINDER_FRAME_POINTER) bool got_irq; unsigned long *bp, *orig_sp, ip; struct pt_regs *regs; @@ -51,7 +51,7 @@ void unwind_start(struct unwind_state *state, struct task_struct *task, __unwind_start(state, task, regs, first_frame); } -#if defined(CONFIG_ORC_UNWINDER) || defined(CONFIG_FRAME_POINTER_UNWINDER) +#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER) static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state) { if (unwind_done(state)) @@ -66,7 +66,7 @@ static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state) } #endif -#ifdef CONFIG_ORC_UNWINDER +#ifdef CONFIG_UNWINDER_ORC void unwind_init(void); void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, void *orc, size_t orc_size); diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 9cffb44a3cf5..036e26d63d9a 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -776,23 +776,36 @@ static inline int uv_num_possible_blades(void) extern void uv_nmi_setup(void); extern void uv_nmi_setup_hubless(void); +/* BIOS/Kernel flags exchange MMR */ +#define UVH_BIOS_KERNEL_MMR UVH_SCRATCH5 +#define UVH_BIOS_KERNEL_MMR_ALIAS UVH_SCRATCH5_ALIAS +#define UVH_BIOS_KERNEL_MMR_ALIAS_2 UVH_SCRATCH5_ALIAS_2 + +/* TSC sync valid, set by BIOS */ +#define UVH_TSC_SYNC_MMR UVH_BIOS_KERNEL_MMR +#define UVH_TSC_SYNC_SHIFT 10 +#define UVH_TSC_SYNC_SHIFT_UV2K 16 /* UV2/3k have different bits */ +#define UVH_TSC_SYNC_MASK 3 /* 0011 */ +#define UVH_TSC_SYNC_VALID 3 /* 0011 */ +#define UVH_TSC_SYNC_INVALID 2 /* 0010 */ + /* BMC sets a bit this MMR non-zero before sending an NMI */ -#define UVH_NMI_MMR UVH_SCRATCH5 -#define UVH_NMI_MMR_CLEAR UVH_SCRATCH5_ALIAS +#define UVH_NMI_MMR UVH_BIOS_KERNEL_MMR +#define UVH_NMI_MMR_CLEAR UVH_BIOS_KERNEL_MMR_ALIAS #define UVH_NMI_MMR_SHIFT 63 -#define UVH_NMI_MMR_TYPE "SCRATCH5" +#define UVH_NMI_MMR_TYPE "SCRATCH5" /* Newer SMM NMI handler, not present in all systems */ #define UVH_NMI_MMRX UVH_EVENT_OCCURRED0 #define UVH_NMI_MMRX_CLEAR UVH_EVENT_OCCURRED0_ALIAS #define UVH_NMI_MMRX_SHIFT UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT -#define UVH_NMI_MMRX_TYPE "EXTIO_INT0" +#define UVH_NMI_MMRX_TYPE "EXTIO_INT0" /* Non-zero indicates newer SMM NMI handler present */ #define UVH_NMI_MMRX_SUPPORTED UVH_EXTIO_INT0_BROADCAST /* Indicates to BIOS that we want to use the newer SMM NMI handler */ -#define UVH_NMI_MMRX_REQ UVH_SCRATCH5_ALIAS_2 +#define UVH_NMI_MMRX_REQ UVH_BIOS_KERNEL_MMR_ALIAS_2 #define UVH_NMI_MMRX_REQ_SHIFT 62 struct uv_hub_nmi_s { diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 52250681f68c..fb856c9f0449 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -49,7 +49,7 @@ static inline unsigned gtod_read_begin(const struct vsyscall_gtod_data *s) unsigned ret; repeat: - ret = ACCESS_ONCE(s->seq); + ret = READ_ONCE(s->seq); if (unlikely(ret & 1)) { cpu_relax(); goto repeat; diff --git a/arch/x86/include/asm/x2apic.h b/arch/x86/include/asm/x2apic.h deleted file mode 100644 index 78ccf28d17db..000000000000 --- a/arch/x86/include/asm/x2apic.h +++ /dev/null @@ -1,50 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Common bits for X2APIC cluster/physical modes. - */ - -#ifndef _ASM_X86_X2APIC_H -#define _ASM_X86_X2APIC_H - -#include <asm/apic.h> -#include <asm/ipi.h> -#include <linux/cpumask.h> - -static int x2apic_apic_id_valid(int apicid) -{ - return 1; -} - -static int x2apic_apic_id_registered(void) -{ - return 1; -} - -static void -__x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest) -{ - unsigned long cfg = __prepare_ICR(0, vector, dest); - native_x2apic_icr_write(cfg, apicid); -} - -static unsigned int x2apic_get_apic_id(unsigned long id) -{ - return id; -} - -static unsigned long x2apic_set_apic_id(unsigned int id) -{ - return id; -} - -static int x2apic_phys_pkg_id(int initial_apicid, int index_msb) -{ - return initial_apicid >> index_msb; -} - -static void x2apic_send_IPI_self(int vector) -{ - apic_write(APIC_SELF_IPI, vector); -} - -#endif /* _ASM_X86_X2APIC_H */ diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 8a1ebf9540dd..aa4747569e23 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -51,11 +51,13 @@ struct x86_init_resources { * are set up. * @intr_init: interrupt init code * @trap_init: platform specific trap setup + * @intr_mode_init: interrupt delivery mode setup */ struct x86_init_irqs { void (*pre_vector_init)(void); void (*intr_init)(void); void (*trap_init)(void); + void (*intr_mode_init)(void); }; /** @@ -115,6 +117,20 @@ struct x86_init_pci { }; /** + * struct x86_hyper_init - x86 hypervisor init functions + * @init_platform: platform setup + * @guest_late_init: guest late init + * @x2apic_available: X2APIC detection + * @init_mem_mapping: setup early mappings during init_mem_mapping() + */ +struct x86_hyper_init { + void (*init_platform)(void); + void (*guest_late_init)(void); + bool (*x2apic_available)(void); + void (*init_mem_mapping)(void); +}; + +/** * struct x86_init_ops - functions for platform specific setup * */ @@ -127,6 +143,7 @@ struct x86_init_ops { struct x86_init_timers timers; struct x86_init_iommu iommu; struct x86_init_pci pci; + struct x86_hyper_init hyper; }; /** @@ -195,11 +212,21 @@ enum x86_legacy_i8042_state { struct x86_legacy_features { enum x86_legacy_i8042_state i8042; int rtc; + int no_vga; int reserve_bios_regions; struct x86_legacy_devices devices; }; /** + * struct x86_hyper_runtime - x86 hypervisor specific runtime callbacks + * + * @pin_vcpu: pin current vcpu to specified physical cpu (run rarely) + */ +struct x86_hyper_runtime { + void (*pin_vcpu)(int cpu); +}; + +/** * struct x86_platform_ops - platform specific runtime functions * @calibrate_cpu: calibrate CPU * @calibrate_tsc: calibrate TSC, if different from CPU @@ -218,6 +245,7 @@ struct x86_legacy_features { * possible in x86_early_init_platform_quirks() by * only using the current x86_hardware_subarch * semantics. + * @hyper: x86 hypervisor specific runtime callbacks */ struct x86_platform_ops { unsigned long (*calibrate_cpu)(void); @@ -233,6 +261,7 @@ struct x86_platform_ops { void (*apic_post_init)(void); struct x86_legacy_features legacy; void (*set_legacy_features)(void); + struct x86_hyper_runtime hyper; }; struct pci_dev; diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 554aa8f24f91..09cc06483bed 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -110,5 +110,4 @@ struct kvm_vcpu_pv_apf_data { #define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK #define KVM_PV_EOI_DISABLED 0x0 - #endif /* _UAPI_ASM_X86_KVM_PARA_H */ diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h index 6f3355399665..7e1e730396ae 100644 --- a/arch/x86/include/uapi/asm/processor-flags.h +++ b/arch/x86/include/uapi/asm/processor-flags.h @@ -105,6 +105,8 @@ #define X86_CR4_OSFXSR _BITUL(X86_CR4_OSFXSR_BIT) #define X86_CR4_OSXMMEXCPT_BIT 10 /* enable unmasked SSE exceptions */ #define X86_CR4_OSXMMEXCPT _BITUL(X86_CR4_OSXMMEXCPT_BIT) +#define X86_CR4_UMIP_BIT 11 /* enable UMIP support */ +#define X86_CR4_UMIP _BITUL(X86_CR4_UMIP_BIT) #define X86_CR4_LA57_BIT 12 /* enable 5-level page tables */ #define X86_CR4_LA57 _BITUL(X86_CR4_LA57_BIT) #define X86_CR4_VMXE_BIT 13 /* enable VMX virtualization */ @@ -152,5 +154,8 @@ #define CX86_ARR_BASE 0xc4 #define CX86_RCR_BASE 0xdc +#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \ + X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \ + X86_CR0_PG) #endif /* _UAPI_ASM_X86_PROCESSOR_FLAGS_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 5f70044340ff..81bb565f4497 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -25,9 +25,9 @@ endif KASAN_SANITIZE_head$(BITS).o := n KASAN_SANITIZE_dumpstack.o := n KASAN_SANITIZE_dumpstack_$(BITS).o := n -KASAN_SANITIZE_stacktrace.o := n +KASAN_SANITIZE_stacktrace.o := n +KASAN_SANITIZE_paravirt.o := n -OBJECT_FILES_NON_STANDARD_head_$(BITS).o := y OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y OBJECT_FILES_NON_STANDARD_test_nx.o := y @@ -127,10 +127,11 @@ obj-$(CONFIG_EFI) += sysfb_efi.o obj-$(CONFIG_PERF_EVENTS) += perf_regs.o obj-$(CONFIG_TRACING) += tracepoint.o obj-$(CONFIG_SCHED_MC_PRIO) += itmt.o +obj-$(CONFIG_X86_INTEL_UMIP) += umip.o -obj-$(CONFIG_ORC_UNWINDER) += unwind_orc.o -obj-$(CONFIG_FRAME_POINTER_UNWINDER) += unwind_frame.o -obj-$(CONFIG_GUESS_UNWINDER) += unwind_guess.o +obj-$(CONFIG_UNWINDER_ORC) += unwind_orc.o +obj-$(CONFIG_UNWINDER_FRAME_POINTER) += unwind_frame.o +obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o ### # 64 bit specific files diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 079535e53e2a..ef9e02e614d0 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -961,6 +961,11 @@ static int __init acpi_parse_fadt(struct acpi_table_header *table) x86_platform.legacy.rtc = 0; } + if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_VGA) { + pr_debug("ACPI: probing for VGA not safe\n"); + x86_platform.legacy.no_vga = 1; + } + #ifdef CONFIG_X86_PM_TIMER /* detect the location of the ACPI PM Timer */ if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) { diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 3344d3382e91..dbaf14d69ebd 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -442,7 +442,6 @@ static void alternatives_smp_lock(const s32 *start, const s32 *end, { const s32 *poff; - mutex_lock(&text_mutex); for (poff = start; poff < end; poff++) { u8 *ptr = (u8 *)poff + *poff; @@ -452,7 +451,6 @@ static void alternatives_smp_lock(const s32 *start, const s32 *end, if (*ptr == 0x3e) text_poke(ptr, ((unsigned char []){0xf0}), 1); } - mutex_unlock(&text_mutex); } static void alternatives_smp_unlock(const s32 *start, const s32 *end, @@ -460,7 +458,6 @@ static void alternatives_smp_unlock(const s32 *start, const s32 *end, { const s32 *poff; - mutex_lock(&text_mutex); for (poff = start; poff < end; poff++) { u8 *ptr = (u8 *)poff + *poff; @@ -470,7 +467,6 @@ static void alternatives_smp_unlock(const s32 *start, const s32 *end, if (*ptr == 0xf0) text_poke(ptr, ((unsigned char []){0x3E}), 1); } - mutex_unlock(&text_mutex); } struct smp_alt_module { @@ -489,8 +485,7 @@ struct smp_alt_module { struct list_head next; }; static LIST_HEAD(smp_alt_modules); -static DEFINE_MUTEX(smp_alt); -static bool uniproc_patched = false; /* protected by smp_alt */ +static bool uniproc_patched = false; /* protected by text_mutex */ void __init_or_module alternatives_smp_module_add(struct module *mod, char *name, @@ -499,7 +494,7 @@ void __init_or_module alternatives_smp_module_add(struct module *mod, { struct smp_alt_module *smp; - mutex_lock(&smp_alt); + mutex_lock(&text_mutex); if (!uniproc_patched) goto unlock; @@ -526,14 +521,14 @@ void __init_or_module alternatives_smp_module_add(struct module *mod, smp_unlock: alternatives_smp_unlock(locks, locks_end, text, text_end); unlock: - mutex_unlock(&smp_alt); + mutex_unlock(&text_mutex); } void __init_or_module alternatives_smp_module_del(struct module *mod) { struct smp_alt_module *item; - mutex_lock(&smp_alt); + mutex_lock(&text_mutex); list_for_each_entry(item, &smp_alt_modules, next) { if (mod != item->mod) continue; @@ -541,7 +536,7 @@ void __init_or_module alternatives_smp_module_del(struct module *mod) kfree(item); break; } - mutex_unlock(&smp_alt); + mutex_unlock(&text_mutex); } void alternatives_enable_smp(void) @@ -551,7 +546,7 @@ void alternatives_enable_smp(void) /* Why bother if there are no other CPUs? */ BUG_ON(num_possible_cpus() == 1); - mutex_lock(&smp_alt); + mutex_lock(&text_mutex); if (uniproc_patched) { pr_info("switching to SMP code\n"); @@ -563,10 +558,13 @@ void alternatives_enable_smp(void) mod->text, mod->text_end); uniproc_patched = false; } - mutex_unlock(&smp_alt); + mutex_unlock(&text_mutex); } -/* Return 1 if the address range is reserved for smp-alternatives */ +/* + * Return 1 if the address range is reserved for SMP-alternatives. + * Must hold text_mutex. + */ int alternatives_text_reserved(void *start, void *end) { struct smp_alt_module *mod; @@ -574,6 +572,8 @@ int alternatives_text_reserved(void *start, void *end) u8 *text_start = start; u8 *text_end = end; + lockdep_assert_held(&text_mutex); + list_for_each_entry(mod, &smp_alt_modules, next) { if (mod->text > text_end || mod->text_end < text_start) continue; diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index 2fb7309c6900..a9e08924927e 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile @@ -7,7 +7,7 @@ # In particualr, smp_apic_timer_interrupt() is called in random places. KCOV_INSTRUMENT := n -obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o ipi.o vector.o +obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_common.o apic_noop.o ipi.o vector.o obj-y += hw_nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index ff891772c9f8..6e272f3ea984 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -211,11 +211,7 @@ static inline int lapic_get_version(void) */ static inline int lapic_is_integrated(void) { -#ifdef CONFIG_X86_64 - return 1; -#else return APIC_INTEGRATED(lapic_get_version()); -#endif } /* @@ -298,14 +294,11 @@ int get_physical_broadcast(void) */ int lapic_get_maxlvt(void) { - unsigned int v; - - v = apic_read(APIC_LVR); /* * - we always have APIC integrated on 64bit mode * - 82489DXs do not report # of LVT entries */ - return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; + return lapic_is_integrated() ? GET_APIC_MAXLVT(apic_read(APIC_LVR)) : 2; } /* @@ -1229,53 +1222,100 @@ void __init sync_Arb_IDs(void) APIC_INT_LEVELTRIG | APIC_DM_INIT); } -/* - * An initial setup of the virtual wire mode. - */ -void __init init_bsp_APIC(void) +enum apic_intr_mode_id apic_intr_mode; + +static int __init apic_intr_mode_select(void) { - unsigned int value; + /* Check kernel option */ + if (disable_apic) { + pr_info("APIC disabled via kernel command line\n"); + return APIC_PIC; + } - /* - * Don't do the setup now if we have a SMP BIOS as the - * through-I/O-APIC virtual wire mode might be active. - */ - if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC)) - return; + /* Check BIOS */ +#ifdef CONFIG_X86_64 + /* On 64-bit, the APIC must be integrated, Check local APIC only */ + if (!boot_cpu_has(X86_FEATURE_APIC)) { + disable_apic = 1; + pr_info("APIC disabled by BIOS\n"); + return APIC_PIC; + } +#else + /* On 32-bit, the APIC may be integrated APIC or 82489DX */ - /* - * Do not trust the local APIC being empty at bootup. - */ - clear_local_APIC(); + /* Neither 82489DX nor integrated APIC ? */ + if (!boot_cpu_has(X86_FEATURE_APIC) && !smp_found_config) { + disable_apic = 1; + return APIC_PIC; + } - /* - * Enable APIC. - */ - value = apic_read(APIC_SPIV); - value &= ~APIC_VECTOR_MASK; - value |= APIC_SPIV_APIC_ENABLED; + /* If the BIOS pretends there is an integrated APIC ? */ + if (!boot_cpu_has(X86_FEATURE_APIC) && + APIC_INTEGRATED(boot_cpu_apic_version)) { + disable_apic = 1; + pr_err(FW_BUG "Local APIC %d not detected, force emulation\n", + boot_cpu_physical_apicid); + return APIC_PIC; + } +#endif -#ifdef CONFIG_X86_32 - /* This bit is reserved on P4/Xeon and should be cleared */ - if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && - (boot_cpu_data.x86 == 15)) - value &= ~APIC_SPIV_FOCUS_DISABLED; - else + /* Check MP table or ACPI MADT configuration */ + if (!smp_found_config) { + disable_ioapic_support(); + if (!acpi_lapic) { + pr_info("APIC: ACPI MADT or MP tables are not detected\n"); + return APIC_VIRTUAL_WIRE_NO_CONFIG; + } + return APIC_VIRTUAL_WIRE; + } + +#ifdef CONFIG_SMP + /* If SMP should be disabled, then really disable it! */ + if (!setup_max_cpus) { + pr_info("APIC: SMP mode deactivated\n"); + return APIC_SYMMETRIC_IO_NO_ROUTING; + } + + if (read_apic_id() != boot_cpu_physical_apicid) { + panic("Boot APIC ID in local APIC unexpected (%d vs %d)", + read_apic_id(), boot_cpu_physical_apicid); + /* Or can we switch back to PIC here? */ + } #endif - value |= APIC_SPIV_FOCUS_DISABLED; - value |= SPURIOUS_APIC_VECTOR; - apic_write(APIC_SPIV, value); - /* - * Set up the virtual wire mode. - */ - apic_write(APIC_LVT0, APIC_DM_EXTINT); - value = APIC_DM_NMI; - if (!lapic_is_integrated()) /* 82489DX */ - value |= APIC_LVT_LEVEL_TRIGGER; - if (apic_extnmi == APIC_EXTNMI_NONE) - value |= APIC_LVT_MASKED; - apic_write(APIC_LVT1, value); + return APIC_SYMMETRIC_IO; +} + +/* Init the interrupt delivery mode for the BSP */ +void __init apic_intr_mode_init(void) +{ + bool upmode = IS_ENABLED(CONFIG_UP_LATE_INIT); + + apic_intr_mode = apic_intr_mode_select(); + + switch (apic_intr_mode) { + case APIC_PIC: + pr_info("APIC: Keep in PIC mode(8259)\n"); + return; + case APIC_VIRTUAL_WIRE: + pr_info("APIC: Switch to virtual wire mode setup\n"); + default_setup_apic_routing(); + break; + case APIC_VIRTUAL_WIRE_NO_CONFIG: + pr_info("APIC: Switch to virtual wire mode setup with no configuration\n"); + upmode = true; + default_setup_apic_routing(); + break; + case APIC_SYMMETRIC_IO: + pr_info("APIC: Switch to symmetric I/O mode setup\n"); + default_setup_apic_routing(); + break; + case APIC_SYMMETRIC_IO_NO_ROUTING: + pr_info("APIC: Switch to symmetric I/O mode setup in no SMP routine\n"); + break; + } + + apic_bsp_setup(upmode); } static void lapic_setup_esr(void) @@ -1473,7 +1513,7 @@ void setup_local_APIC(void) /* * Set up LVT0, LVT1: * - * set up through-local-APIC on the BP's LINT0. This is not + * set up through-local-APIC on the boot CPU's LINT0. This is not * strictly necessary in pure symmetric-IO mode, but sometimes * we delegate interrupts to the 8259A. */ @@ -1499,7 +1539,9 @@ void setup_local_APIC(void) value = APIC_DM_NMI; else value = APIC_DM_NMI | APIC_LVT_MASKED; - if (!lapic_is_integrated()) /* 82489DX */ + + /* Is 82489DX ? */ + if (!lapic_is_integrated()) value |= APIC_LVT_LEVEL_TRIGGER; apic_write(APIC_LVT1, value); @@ -1645,7 +1687,7 @@ static __init void try_to_enable_x2apic(int remap_mode) * under KVM */ if (max_physical_apicid > 255 || - !hypervisor_x2apic_available()) { + !x86_init.hyper.x2apic_available()) { pr_info("x2apic: IRQ remapping doesn't support X2APIC mode\n"); x2apic_disable(); return; @@ -1885,8 +1927,8 @@ void __init init_apic_mappings(void) * yeah -- we lie about apic_version * in case if apic was disabled via boot option * but it's not a problem for SMP compiled kernel - * since smp_sanity_check is prepared for such a case - * and disable smp mode + * since apic_intr_mode_select is prepared for such + * a case and disable smp mode */ boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR)); } @@ -2242,44 +2284,6 @@ int hard_smp_processor_id(void) return read_apic_id(); } -void default_init_apic_ldr(void) -{ - unsigned long val; - - apic_write(APIC_DFR, APIC_DFR_VALUE); - val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; - val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id()); - apic_write(APIC_LDR, val); -} - -int default_cpu_mask_to_apicid(const struct cpumask *mask, - struct irq_data *irqdata, - unsigned int *apicid) -{ - unsigned int cpu = cpumask_first(mask); - - if (cpu >= nr_cpu_ids) - return -EINVAL; - *apicid = per_cpu(x86_cpu_to_apicid, cpu); - irq_data_update_effective_affinity(irqdata, cpumask_of(cpu)); - return 0; -} - -int flat_cpu_mask_to_apicid(const struct cpumask *mask, - struct irq_data *irqdata, - unsigned int *apicid) - -{ - struct cpumask *effmsk = irq_data_get_effective_affinity_mask(irqdata); - unsigned long cpu_mask = cpumask_bits(mask)[0] & APIC_ALL_CPUS; - - if (!cpu_mask) - return -EINVAL; - *apicid = (unsigned int)cpu_mask; - cpumask_bits(effmsk)[0] = cpu_mask; - return 0; -} - /* * Override the generic EOI implementation with an optimized version. * Only called during early boot when only one CPU is active and with @@ -2322,72 +2326,27 @@ static void __init apic_bsp_up_setup(void) * Returns: * apic_id of BSP APIC */ -int __init apic_bsp_setup(bool upmode) +void __init apic_bsp_setup(bool upmode) { - int id; - connect_bsp_APIC(); if (upmode) apic_bsp_up_setup(); setup_local_APIC(); - if (x2apic_mode) - id = apic_read(APIC_LDR); - else - id = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); - enable_IO_APIC(); end_local_APIC_setup(); irq_remap_enable_fault_handling(); setup_IO_APIC(); - /* Setup local timer */ - x86_init.timers.setup_percpu_clockev(); - return id; -} - -/* - * This initializes the IO-APIC and APIC hardware if this is - * a UP kernel. - */ -int __init APIC_init_uniprocessor(void) -{ - if (disable_apic) { - pr_info("Apic disabled\n"); - return -1; - } -#ifdef CONFIG_X86_64 - if (!boot_cpu_has(X86_FEATURE_APIC)) { - disable_apic = 1; - pr_info("Apic disabled by BIOS\n"); - return -1; - } -#else - if (!smp_found_config && !boot_cpu_has(X86_FEATURE_APIC)) - return -1; - - /* - * Complain if the BIOS pretends there is one. - */ - if (!boot_cpu_has(X86_FEATURE_APIC) && - APIC_INTEGRATED(boot_cpu_apic_version)) { - pr_err("BIOS bug, local APIC 0x%x not detected!...\n", - boot_cpu_physical_apicid); - return -1; - } -#endif - - if (!smp_found_config) - disable_ioapic_support(); - - default_setup_apic_routing(); - apic_bsp_setup(true); - return 0; } #ifdef CONFIG_UP_LATE_INIT void __init up_late_init(void) { - APIC_init_uniprocessor(); + if (apic_intr_mode == APIC_PIC) + return; + + /* Setup local timer */ + x86_init.timers.setup_percpu_clockev(); } #endif diff --git a/arch/x86/kernel/apic/apic_common.c b/arch/x86/kernel/apic/apic_common.c new file mode 100644 index 000000000000..a360801779ae --- /dev/null +++ b/arch/x86/kernel/apic/apic_common.c @@ -0,0 +1,46 @@ +/* + * Common functions shared between the various APIC flavours + * + * SPDX-License-Identifier: GPL-2.0 + */ +#include <linux/irq.h> +#include <asm/apic.h> + +u32 apic_default_calc_apicid(unsigned int cpu) +{ + return per_cpu(x86_cpu_to_apicid, cpu); +} + +u32 apic_flat_calc_apicid(unsigned int cpu) +{ + return 1U << cpu; +} + +bool default_check_apicid_used(physid_mask_t *map, int apicid) +{ + return physid_isset(apicid, *map); +} + +void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) +{ + *retmap = *phys_map; +} + +int default_cpu_present_to_apicid(int mps_cpu) +{ + if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu)) + return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); + else + return BAD_APICID; +} +EXPORT_SYMBOL_GPL(default_cpu_present_to_apicid); + +int default_check_phys_apicid_present(int phys_apicid) +{ + return physid_isset(phys_apicid, phys_cpu_present_map); +} + +int default_apic_id_valid(int apicid) +{ + return (apicid < 255); +} diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index dedd5a41ba48..aa85690e9b64 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -119,7 +119,7 @@ static unsigned int flat_get_apic_id(unsigned long x) return (x >> 24) & 0xFF; } -static unsigned long set_apic_id(unsigned int id) +static u32 set_apic_id(unsigned int id) { return (id & 0xFF) << 24; } @@ -154,12 +154,10 @@ static struct apic apic_flat __ro_after_init = { .irq_delivery_mode = dest_LowestPrio, .irq_dest_mode = 1, /* logical */ - .target_cpus = online_target_cpus, .disable_esr = 0, .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = NULL, - .vector_allocation_domain = flat_vector_allocation_domain, .init_apic_ldr = flat_init_apic_ldr, .ioapic_phys_id_map = NULL, @@ -172,7 +170,7 @@ static struct apic apic_flat __ro_after_init = { .get_apic_id = flat_get_apic_id, .set_apic_id = set_apic_id, - .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, + .calc_dest_apicid = apic_flat_calc_apicid, .send_IPI = default_send_IPI_single, .send_IPI_mask = flat_send_IPI_mask, @@ -249,12 +247,10 @@ static struct apic apic_physflat __ro_after_init = { .irq_delivery_mode = dest_Fixed, .irq_dest_mode = 0, /* physical */ - .target_cpus = online_target_cpus, .disable_esr = 0, .dest_logical = 0, .check_apicid_used = NULL, - .vector_allocation_domain = default_vector_allocation_domain, /* not needed, but shouldn't hurt: */ .init_apic_ldr = flat_init_apic_ldr, @@ -268,7 +264,7 @@ static struct apic apic_physflat __ro_after_init = { .get_apic_id = flat_get_apic_id, .set_apic_id = set_apic_id, - .cpu_mask_to_apicid = default_cpu_mask_to_apicid, + .calc_dest_apicid = apic_default_calc_apicid, .send_IPI = default_send_IPI_single_phys, .send_IPI_mask = default_send_IPI_mask_sequence_phys, diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index c8d211277315..7b659c4480c9 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -84,20 +84,6 @@ static int noop_apic_id_registered(void) return physid_isset(0, phys_cpu_present_map); } -static const struct cpumask *noop_target_cpus(void) -{ - /* only BSP here */ - return cpumask_of(0); -} - -static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask, - const struct cpumask *mask) -{ - if (cpu != 0) - pr_warning("APIC: Vector allocated for non-BSP cpu\n"); - cpumask_copy(retmask, cpumask_of(cpu)); -} - static u32 noop_apic_read(u32 reg) { WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic); @@ -109,6 +95,13 @@ static void noop_apic_write(u32 reg, u32 v) WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic); } +#ifdef CONFIG_X86_32 +static int noop_x86_32_early_logical_apicid(int cpu) +{ + return BAD_APICID; +} +#endif + struct apic apic_noop __ro_after_init = { .name = "noop", .probe = noop_probe, @@ -121,12 +114,10 @@ struct apic apic_noop __ro_after_init = { /* logical delivery broadcast to all CPUs: */ .irq_dest_mode = 1, - .target_cpus = noop_target_cpus, .disable_esr = 0, .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = default_check_apicid_used, - .vector_allocation_domain = noop_vector_allocation_domain, .init_apic_ldr = noop_init_apic_ldr, .ioapic_phys_id_map = default_ioapic_phys_id_map, @@ -142,7 +133,7 @@ struct apic apic_noop __ro_after_init = { .get_apic_id = noop_get_apic_id, .set_apic_id = NULL, - .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, + .calc_dest_apicid = apic_flat_calc_apicid, .send_IPI = noop_send_IPI, .send_IPI_mask = noop_send_IPI_mask, diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index 2fda912219a6..134e04506ab4 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -38,7 +38,7 @@ static unsigned int numachip1_get_apic_id(unsigned long x) return id; } -static unsigned long numachip1_set_apic_id(unsigned int id) +static u32 numachip1_set_apic_id(unsigned int id) { return (id & 0xff) << 24; } @@ -51,7 +51,7 @@ static unsigned int numachip2_get_apic_id(unsigned long x) return ((mcfg >> (28 - 8)) & 0xfff00) | (x >> 24); } -static unsigned long numachip2_set_apic_id(unsigned int id) +static u32 numachip2_set_apic_id(unsigned int id) { return id << 24; } @@ -249,12 +249,10 @@ static const struct apic apic_numachip1 __refconst = { .irq_delivery_mode = dest_Fixed, .irq_dest_mode = 0, /* physical */ - .target_cpus = online_target_cpus, .disable_esr = 0, .dest_logical = 0, .check_apicid_used = NULL, - .vector_allocation_domain = default_vector_allocation_domain, .init_apic_ldr = flat_init_apic_ldr, .ioapic_phys_id_map = NULL, @@ -267,7 +265,7 @@ static const struct apic apic_numachip1 __refconst = { .get_apic_id = numachip1_get_apic_id, .set_apic_id = numachip1_set_apic_id, - .cpu_mask_to_apicid = default_cpu_mask_to_apicid, + .calc_dest_apicid = apic_default_calc_apicid, .send_IPI = numachip_send_IPI_one, .send_IPI_mask = numachip_send_IPI_mask, @@ -300,12 +298,10 @@ static const struct apic apic_numachip2 __refconst = { .irq_delivery_mode = dest_Fixed, .irq_dest_mode = 0, /* physical */ - .target_cpus = online_target_cpus, .disable_esr = 0, .dest_logical = 0, .check_apicid_used = NULL, - .vector_allocation_domain = default_vector_allocation_domain, .init_apic_ldr = flat_init_apic_ldr, .ioapic_phys_id_map = NULL, @@ -318,7 +314,7 @@ static const struct apic apic_numachip2 __refconst = { .get_apic_id = numachip2_get_apic_id, .set_apic_id = numachip2_set_apic_id, - .cpu_mask_to_apicid = default_cpu_mask_to_apicid, + .calc_dest_apicid = apic_default_calc_apicid, .send_IPI = numachip_send_IPI_one, .send_IPI_mask = numachip_send_IPI_mask, diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index e12fbcfc9571..afee386ff711 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -27,9 +27,9 @@ static int bigsmp_apic_id_registered(void) return 1; } -static unsigned long bigsmp_check_apicid_used(physid_mask_t *map, int apicid) +static bool bigsmp_check_apicid_used(physid_mask_t *map, int apicid) { - return 0; + return false; } static int bigsmp_early_logical_apicid(int cpu) @@ -155,12 +155,10 @@ static struct apic apic_bigsmp __ro_after_init = { /* phys delivery to target CPU: */ .irq_dest_mode = 0, - .target_cpus = default_target_cpus, .disable_esr = 1, .dest_logical = 0, .check_apicid_used = bigsmp_check_apicid_used, - .vector_allocation_domain = default_vector_allocation_domain, .init_apic_ldr = bigsmp_init_apic_ldr, .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map, @@ -173,7 +171,7 @@ static struct apic apic_bigsmp __ro_after_init = { .get_apic_id = bigsmp_get_apic_id, .set_apic_id = NULL, - .cpu_mask_to_apicid = default_cpu_mask_to_apicid, + .calc_dest_apicid = apic_default_calc_apicid, .send_IPI = default_send_IPI_single_phys, .send_IPI_mask = default_send_IPI_mask_sequence_phys, diff --git a/arch/x86/kernel/apic/htirq.c b/arch/x86/kernel/apic/htirq.c index 56ccf9346b08..b07075dce8b7 100644 --- a/arch/x86/kernel/apic/htirq.c +++ b/arch/x86/kernel/apic/htirq.c @@ -112,8 +112,8 @@ static void htirq_domain_free(struct irq_domain *domain, unsigned int virq, irq_domain_free_irqs_top(domain, virq, nr_irqs); } -static void htirq_domain_activate(struct irq_domain *domain, - struct irq_data *irq_data) +static int htirq_domain_activate(struct irq_domain *domain, + struct irq_data *irq_data, bool early) { struct ht_irq_msg msg; struct irq_cfg *cfg = irqd_cfg(irq_data); @@ -132,6 +132,7 @@ static void htirq_domain_activate(struct irq_domain *domain, HT_IRQ_LOW_MT_ARBITRATED) | HT_IRQ_LOW_IRQ_MASKED; write_ht_irq_msg(irq_data->irq, &msg); + return 0; } static void htirq_domain_deactivate(struct irq_domain *domain, diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 3b89b27945ff..201579dc5242 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1014,6 +1014,7 @@ static int alloc_isa_irq_from_domain(struct irq_domain *domain, info->ioapic_pin)) return -ENOMEM; } else { + info->flags |= X86_IRQ_ALLOC_LEGACY; irq = __irq_domain_alloc_irqs(domain, irq, 1, node, info, true, NULL); if (irq >= 0) { @@ -1586,6 +1587,43 @@ static int __init notimercheck(char *s) } __setup("no_timer_check", notimercheck); +static void __init delay_with_tsc(void) +{ + unsigned long long start, now; + unsigned long end = jiffies + 4; + + start = rdtsc(); + + /* + * We don't know the TSC frequency yet, but waiting for + * 40000000000/HZ TSC cycles is safe: + * 4 GHz == 10 jiffies + * 1 GHz == 40 jiffies + */ + do { + rep_nop(); + now = rdtsc(); + } while ((now - start) < 40000000000UL / HZ && + time_before_eq(jiffies, end)); +} + +static void __init delay_without_tsc(void) +{ + unsigned long end = jiffies + 4; + int band = 1; + + /* + * We don't know any frequency yet, but waiting for + * 40940000000/HZ cycles is safe: + * 4 GHz == 10 jiffies + * 1 GHz == 40 jiffies + * 1 << 1 + 1 << 2 +...+ 1 << 11 = 4094 + */ + do { + __delay(((1U << band++) * 10000000UL) / HZ); + } while (band < 12 && time_before_eq(jiffies, end)); +} + /* * There is a nasty bug in some older SMP boards, their mptable lies * about the timer IRQ. We do the following to work around the situation: @@ -1604,8 +1642,12 @@ static int __init timer_irq_works(void) local_save_flags(flags); local_irq_enable(); - /* Let ten ticks pass... */ - mdelay((10 * 1000) / HZ); + + if (boot_cpu_has(X86_FEATURE_TSC)) + delay_with_tsc(); + else + delay_without_tsc(); + local_irq_restore(flags); /* @@ -1821,26 +1863,36 @@ static void ioapic_ir_ack_level(struct irq_data *irq_data) eoi_ioapic_pin(data->entry.vector, data); } +static void ioapic_configure_entry(struct irq_data *irqd) +{ + struct mp_chip_data *mpd = irqd->chip_data; + struct irq_cfg *cfg = irqd_cfg(irqd); + struct irq_pin_list *entry; + + /* + * Only update when the parent is the vector domain, don't touch it + * if the parent is the remapping domain. Check the installed + * ioapic chip to verify that. + */ + if (irqd->chip == &ioapic_chip) { + mpd->entry.dest = cfg->dest_apicid; + mpd->entry.vector = cfg->vector; + } + for_each_irq_pin(entry, mpd->irq_2_pin) + __ioapic_write_entry(entry->apic, entry->pin, mpd->entry); +} + static int ioapic_set_affinity(struct irq_data *irq_data, const struct cpumask *mask, bool force) { struct irq_data *parent = irq_data->parent_data; - struct mp_chip_data *data = irq_data->chip_data; - struct irq_pin_list *entry; - struct irq_cfg *cfg; unsigned long flags; int ret; ret = parent->chip->irq_set_affinity(parent, mask, force); raw_spin_lock_irqsave(&ioapic_lock, flags); - if (ret >= 0 && ret != IRQ_SET_MASK_OK_DONE) { - cfg = irqd_cfg(irq_data); - data->entry.dest = cfg->dest_apicid; - data->entry.vector = cfg->vector; - for_each_irq_pin(entry, data->irq_2_pin) - __ioapic_write_entry(entry->apic, entry->pin, - data->entry); - } + if (ret >= 0 && ret != IRQ_SET_MASK_OK_DONE) + ioapic_configure_entry(irq_data); raw_spin_unlock_irqrestore(&ioapic_lock, flags); return ret; @@ -2097,7 +2149,7 @@ static inline void __init check_timer(void) unmask_ioapic_irq(irq_get_irq_data(0)); } irq_domain_deactivate_irq(irq_data); - irq_domain_activate_irq(irq_data); + irq_domain_activate_irq(irq_data, false); if (timer_irq_works()) { if (disable_timer_pin_1 > 0) clear_IO_APIC_pin(0, pin1); @@ -2119,7 +2171,7 @@ static inline void __init check_timer(void) */ replace_pin_at_irq_node(data, node, apic1, pin1, apic2, pin2); irq_domain_deactivate_irq(irq_data); - irq_domain_activate_irq(irq_data); + irq_domain_activate_irq(irq_data, false); legacy_pic->unmask(0); if (timer_irq_works()) { apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); @@ -2513,52 +2565,9 @@ int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity) } /* - * This function currently is only a helper for the i386 smp boot process where - * we need to reprogram the ioredtbls to cater for the cpus which have come online - * so mask in all cases should simply be apic->target_cpus() + * This function updates target affinity of IOAPIC interrupts to include + * the CPUs which came online during SMP bringup. */ -#ifdef CONFIG_SMP -void __init setup_ioapic_dest(void) -{ - int pin, ioapic, irq, irq_entry; - const struct cpumask *mask; - struct irq_desc *desc; - struct irq_data *idata; - struct irq_chip *chip; - - if (skip_ioapic_setup == 1) - return; - - for_each_ioapic_pin(ioapic, pin) { - irq_entry = find_irq_entry(ioapic, pin, mp_INT); - if (irq_entry == -1) - continue; - - irq = pin_2_irq(irq_entry, ioapic, pin, 0); - if (irq < 0 || !mp_init_irq_at_boot(ioapic, irq)) - continue; - - desc = irq_to_desc(irq); - raw_spin_lock_irq(&desc->lock); - idata = irq_desc_get_irq_data(desc); - - /* - * Honour affinities which have been set in early boot - */ - if (!irqd_can_balance(idata) || irqd_affinity_was_set(idata)) - mask = irq_data_get_affinity_mask(idata); - else - mask = apic->target_cpus(); - - chip = irq_data_get_irq_chip(idata); - /* Might be lapic_chip for irq 0 */ - if (chip->irq_set_affinity) - chip->irq_set_affinity(idata, mask, false); - raw_spin_unlock_irq(&desc->lock); - } -} -#endif - #define IOAPIC_RESOURCE_NAME_SIZE 11 static struct resource *ioapic_resources; @@ -2978,17 +2987,15 @@ void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq, irq_domain_free_irqs_top(domain, virq, nr_irqs); } -void mp_irqdomain_activate(struct irq_domain *domain, - struct irq_data *irq_data) +int mp_irqdomain_activate(struct irq_domain *domain, + struct irq_data *irq_data, bool early) { unsigned long flags; - struct irq_pin_list *entry; - struct mp_chip_data *data = irq_data->chip_data; raw_spin_lock_irqsave(&ioapic_lock, flags); - for_each_irq_pin(entry, data->irq_2_pin) - __ioapic_write_entry(entry->apic, entry->pin, data->entry); + ioapic_configure_entry(irq_data); raw_spin_unlock_irqrestore(&ioapic_lock, flags); + return 0; } void mp_irqdomain_deactivate(struct irq_domain *domain, diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 63287659adb6..fa22017de806 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -66,6 +66,31 @@ static void setup_apic_flat_routing(void) #endif } +static int default_apic_id_registered(void) +{ + return physid_isset(read_apic_id(), phys_cpu_present_map); +} + +/* + * Set up the logical destination ID. Intel recommends to set DFR, LDR and + * TPR before enabling an APIC. See e.g. "AP-388 82489DX User's Manual" + * (Intel document number 292116). + */ +static void default_init_apic_ldr(void) +{ + unsigned long val; + + apic_write(APIC_DFR, APIC_DFR_VALUE); + val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; + val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id()); + apic_write(APIC_LDR, val); +} + +static int default_phys_pkg_id(int cpuid_apic, int index_msb) +{ + return cpuid_apic >> index_msb; +} + /* should be called last. */ static int probe_default(void) { @@ -84,12 +109,10 @@ static struct apic apic_default __ro_after_init = { /* logical delivery broadcast to all CPUs: */ .irq_dest_mode = 1, - .target_cpus = default_target_cpus, .disable_esr = 0, .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = default_check_apicid_used, - .vector_allocation_domain = flat_vector_allocation_domain, .init_apic_ldr = default_init_apic_ldr, .ioapic_phys_id_map = default_ioapic_phys_id_map, @@ -102,7 +125,7 @@ static struct apic apic_default __ro_after_init = { .get_apic_id = default_get_apic_id, .set_apic_id = NULL, - .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, + .calc_dest_apicid = apic_flat_calc_apicid, .send_IPI = default_send_IPI_single, .send_IPI_mask = default_send_IPI_mask_logical, diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 88c214e75a6b..05c85e693a5d 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -11,6 +11,7 @@ * published by the Free Software Foundation. */ #include <linux/interrupt.h> +#include <linux/seq_file.h> #include <linux/init.h> #include <linux/compiler.h> #include <linux/slab.h> @@ -21,20 +22,30 @@ #include <asm/desc.h> #include <asm/irq_remapping.h> +#include <asm/trace/irq_vectors.h> + struct apic_chip_data { - struct irq_cfg cfg; - cpumask_var_t domain; - cpumask_var_t old_domain; - u8 move_in_progress : 1; + struct irq_cfg hw_irq_cfg; + unsigned int vector; + unsigned int prev_vector; + unsigned int cpu; + unsigned int prev_cpu; + unsigned int irq; + struct hlist_node clist; + unsigned int move_in_progress : 1, + is_managed : 1, + can_reserve : 1, + has_reserved : 1; }; struct irq_domain *x86_vector_domain; EXPORT_SYMBOL_GPL(x86_vector_domain); static DEFINE_RAW_SPINLOCK(vector_lock); -static cpumask_var_t vector_cpumask, vector_searchmask, searched_cpumask; +static cpumask_var_t vector_searchmask; static struct irq_chip lapic_controller; -#ifdef CONFIG_X86_IO_APIC -static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY]; +static struct irq_matrix *vector_matrix; +#ifdef CONFIG_SMP +static DEFINE_PER_CPU(struct hlist_head, cleanup_list); #endif void lock_vector_lock(void) @@ -50,22 +61,37 @@ void unlock_vector_lock(void) raw_spin_unlock(&vector_lock); } -static struct apic_chip_data *apic_chip_data(struct irq_data *irq_data) +void init_irq_alloc_info(struct irq_alloc_info *info, + const struct cpumask *mask) +{ + memset(info, 0, sizeof(*info)); + info->mask = mask; +} + +void copy_irq_alloc_info(struct irq_alloc_info *dst, struct irq_alloc_info *src) { - if (!irq_data) + if (src) + *dst = *src; + else + memset(dst, 0, sizeof(*dst)); +} + +static struct apic_chip_data *apic_chip_data(struct irq_data *irqd) +{ + if (!irqd) return NULL; - while (irq_data->parent_data) - irq_data = irq_data->parent_data; + while (irqd->parent_data) + irqd = irqd->parent_data; - return irq_data->chip_data; + return irqd->chip_data; } -struct irq_cfg *irqd_cfg(struct irq_data *irq_data) +struct irq_cfg *irqd_cfg(struct irq_data *irqd) { - struct apic_chip_data *data = apic_chip_data(irq_data); + struct apic_chip_data *apicd = apic_chip_data(irqd); - return data ? &data->cfg : NULL; + return apicd ? &apicd->hw_irq_cfg : NULL; } EXPORT_SYMBOL_GPL(irqd_cfg); @@ -76,270 +102,395 @@ struct irq_cfg *irq_cfg(unsigned int irq) static struct apic_chip_data *alloc_apic_chip_data(int node) { - struct apic_chip_data *data; + struct apic_chip_data *apicd; - data = kzalloc_node(sizeof(*data), GFP_KERNEL, node); - if (!data) - return NULL; - if (!zalloc_cpumask_var_node(&data->domain, GFP_KERNEL, node)) - goto out_data; - if (!zalloc_cpumask_var_node(&data->old_domain, GFP_KERNEL, node)) - goto out_domain; - return data; -out_domain: - free_cpumask_var(data->domain); -out_data: - kfree(data); - return NULL; -} - -static void free_apic_chip_data(struct apic_chip_data *data) -{ - if (data) { - free_cpumask_var(data->domain); - free_cpumask_var(data->old_domain); - kfree(data); + apicd = kzalloc_node(sizeof(*apicd), GFP_KERNEL, node); + if (apicd) + INIT_HLIST_NODE(&apicd->clist); + return apicd; +} + +static void free_apic_chip_data(struct apic_chip_data *apicd) +{ + kfree(apicd); +} + +static void apic_update_irq_cfg(struct irq_data *irqd, unsigned int vector, + unsigned int cpu) +{ + struct apic_chip_data *apicd = apic_chip_data(irqd); + + lockdep_assert_held(&vector_lock); + + apicd->hw_irq_cfg.vector = vector; + apicd->hw_irq_cfg.dest_apicid = apic->calc_dest_apicid(cpu); + irq_data_update_effective_affinity(irqd, cpumask_of(cpu)); + trace_vector_config(irqd->irq, vector, cpu, + apicd->hw_irq_cfg.dest_apicid); +} + +static void apic_update_vector(struct irq_data *irqd, unsigned int newvec, + unsigned int newcpu) +{ + struct apic_chip_data *apicd = apic_chip_data(irqd); + struct irq_desc *desc = irq_data_to_desc(irqd); + + lockdep_assert_held(&vector_lock); + + trace_vector_update(irqd->irq, newvec, newcpu, apicd->vector, + apicd->cpu); + + /* Setup the vector move, if required */ + if (apicd->vector && cpu_online(apicd->cpu)) { + apicd->move_in_progress = true; + apicd->prev_vector = apicd->vector; + apicd->prev_cpu = apicd->cpu; + } else { + apicd->prev_vector = 0; } + + apicd->vector = newvec; + apicd->cpu = newcpu; + BUG_ON(!IS_ERR_OR_NULL(per_cpu(vector_irq, newcpu)[newvec])); + per_cpu(vector_irq, newcpu)[newvec] = desc; } -static int __assign_irq_vector(int irq, struct apic_chip_data *d, - const struct cpumask *mask, - struct irq_data *irqdata) +static void vector_assign_managed_shutdown(struct irq_data *irqd) { - /* - * NOTE! The local APIC isn't very good at handling - * multiple interrupts at the same interrupt level. - * As the interrupt level is determined by taking the - * vector number and shifting that right by 4, we - * want to spread these out a bit so that they don't - * all fall in the same interrupt level. - * - * Also, we've got to be careful not to trash gate - * 0x80, because int 0x80 is hm, kind of importantish. ;) - */ - static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START; - static int current_offset = VECTOR_OFFSET_START % 16; - int cpu, vector; + unsigned int cpu = cpumask_first(cpu_online_mask); - /* - * If there is still a move in progress or the previous move has not - * been cleaned up completely, tell the caller to come back later. - */ - if (d->move_in_progress || - cpumask_intersects(d->old_domain, cpu_online_mask)) - return -EBUSY; + apic_update_irq_cfg(irqd, MANAGED_IRQ_SHUTDOWN_VECTOR, cpu); +} - /* Only try and allocate irqs on cpus that are present */ - cpumask_clear(d->old_domain); - cpumask_clear(searched_cpumask); - cpu = cpumask_first_and(mask, cpu_online_mask); - while (cpu < nr_cpu_ids) { - int new_cpu, offset; +static int reserve_managed_vector(struct irq_data *irqd) +{ + const struct cpumask *affmsk = irq_data_get_affinity_mask(irqd); + struct apic_chip_data *apicd = apic_chip_data(irqd); + unsigned long flags; + int ret; - /* Get the possible target cpus for @mask/@cpu from the apic */ - apic->vector_allocation_domain(cpu, vector_cpumask, mask); + raw_spin_lock_irqsave(&vector_lock, flags); + apicd->is_managed = true; + ret = irq_matrix_reserve_managed(vector_matrix, affmsk); + raw_spin_unlock_irqrestore(&vector_lock, flags); + trace_vector_reserve_managed(irqd->irq, ret); + return ret; +} - /* - * Clear the offline cpus from @vector_cpumask for searching - * and verify whether the result overlaps with @mask. If true, - * then the call to apic->cpu_mask_to_apicid() will - * succeed as well. If not, no point in trying to find a - * vector in this mask. - */ - cpumask_and(vector_searchmask, vector_cpumask, cpu_online_mask); - if (!cpumask_intersects(vector_searchmask, mask)) - goto next_cpu; - - if (cpumask_subset(vector_cpumask, d->domain)) { - if (cpumask_equal(vector_cpumask, d->domain)) - goto success; - /* - * Mark the cpus which are not longer in the mask for - * cleanup. - */ - cpumask_andnot(d->old_domain, d->domain, vector_cpumask); - vector = d->cfg.vector; - goto update; - } +static void reserve_irq_vector_locked(struct irq_data *irqd) +{ + struct apic_chip_data *apicd = apic_chip_data(irqd); - vector = current_vector; - offset = current_offset; -next: - vector += 16; - if (vector >= FIRST_SYSTEM_VECTOR) { - offset = (offset + 1) % 16; - vector = FIRST_EXTERNAL_VECTOR + offset; - } + irq_matrix_reserve(vector_matrix); + apicd->can_reserve = true; + apicd->has_reserved = true; + trace_vector_reserve(irqd->irq, 0); + vector_assign_managed_shutdown(irqd); +} - /* If the search wrapped around, try the next cpu */ - if (unlikely(current_vector == vector)) - goto next_cpu; +static int reserve_irq_vector(struct irq_data *irqd) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&vector_lock, flags); + reserve_irq_vector_locked(irqd); + raw_spin_unlock_irqrestore(&vector_lock, flags); + return 0; +} - if (test_bit(vector, used_vectors)) - goto next; +static int allocate_vector(struct irq_data *irqd, const struct cpumask *dest) +{ + struct apic_chip_data *apicd = apic_chip_data(irqd); + bool resvd = apicd->has_reserved; + unsigned int cpu = apicd->cpu; + int vector = apicd->vector; - for_each_cpu(new_cpu, vector_searchmask) { - if (!IS_ERR_OR_NULL(per_cpu(vector_irq, new_cpu)[vector])) - goto next; - } - /* Found one! */ - current_vector = vector; - current_offset = offset; - /* Schedule the old vector for cleanup on all cpus */ - if (d->cfg.vector) - cpumask_copy(d->old_domain, d->domain); - for_each_cpu(new_cpu, vector_searchmask) - per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq); - goto update; - -next_cpu: - /* - * We exclude the current @vector_cpumask from the requested - * @mask and try again with the next online cpu in the - * result. We cannot modify @mask, so we use @vector_cpumask - * as a temporary buffer here as it will be reassigned when - * calling apic->vector_allocation_domain() above. - */ - cpumask_or(searched_cpumask, searched_cpumask, vector_cpumask); - cpumask_andnot(vector_cpumask, mask, searched_cpumask); - cpu = cpumask_first_and(vector_cpumask, cpu_online_mask); - continue; - } - return -ENOSPC; + lockdep_assert_held(&vector_lock); -update: /* - * Exclude offline cpus from the cleanup mask and set the - * move_in_progress flag when the result is not empty. + * If the current target CPU is online and in the new requested + * affinity mask, there is no point in moving the interrupt from + * one CPU to another. */ - cpumask_and(d->old_domain, d->old_domain, cpu_online_mask); - d->move_in_progress = !cpumask_empty(d->old_domain); - d->cfg.old_vector = d->move_in_progress ? d->cfg.vector : 0; - d->cfg.vector = vector; - cpumask_copy(d->domain, vector_cpumask); -success: - /* - * Cache destination APIC IDs into cfg->dest_apicid. This cannot fail - * as we already established, that mask & d->domain & cpu_online_mask - * is not empty. - * - * vector_searchmask is a subset of d->domain and has the offline - * cpus masked out. - */ - cpumask_and(vector_searchmask, vector_searchmask, mask); - BUG_ON(apic->cpu_mask_to_apicid(vector_searchmask, irqdata, - &d->cfg.dest_apicid)); + if (vector && cpu_online(cpu) && cpumask_test_cpu(cpu, dest)) + return 0; + + vector = irq_matrix_alloc(vector_matrix, dest, resvd, &cpu); + if (vector > 0) + apic_update_vector(irqd, vector, cpu); + trace_vector_alloc(irqd->irq, vector, resvd, vector); + return vector; +} + +static int assign_vector_locked(struct irq_data *irqd, + const struct cpumask *dest) +{ + struct apic_chip_data *apicd = apic_chip_data(irqd); + int vector = allocate_vector(irqd, dest); + + if (vector < 0) + return vector; + + apic_update_irq_cfg(irqd, apicd->vector, apicd->cpu); return 0; } -static int assign_irq_vector(int irq, struct apic_chip_data *data, - const struct cpumask *mask, - struct irq_data *irqdata) +static int assign_irq_vector(struct irq_data *irqd, const struct cpumask *dest) { - int err; unsigned long flags; + int ret; raw_spin_lock_irqsave(&vector_lock, flags); - err = __assign_irq_vector(irq, data, mask, irqdata); + cpumask_and(vector_searchmask, dest, cpu_online_mask); + ret = assign_vector_locked(irqd, vector_searchmask); raw_spin_unlock_irqrestore(&vector_lock, flags); - return err; + return ret; } -static int assign_irq_vector_policy(int irq, int node, - struct apic_chip_data *data, - struct irq_alloc_info *info, - struct irq_data *irqdata) +static int assign_irq_vector_any_locked(struct irq_data *irqd) { - if (info && info->mask) - return assign_irq_vector(irq, data, info->mask, irqdata); - if (node != NUMA_NO_NODE && - assign_irq_vector(irq, data, cpumask_of_node(node), irqdata) == 0) + /* Get the affinity mask - either irq_default_affinity or (user) set */ + const struct cpumask *affmsk = irq_data_get_affinity_mask(irqd); + int node = irq_data_get_node(irqd); + + if (node == NUMA_NO_NODE) + goto all; + /* Try the intersection of @affmsk and node mask */ + cpumask_and(vector_searchmask, cpumask_of_node(node), affmsk); + if (!assign_vector_locked(irqd, vector_searchmask)) + return 0; + /* Try the node mask */ + if (!assign_vector_locked(irqd, cpumask_of_node(node))) return 0; - return assign_irq_vector(irq, data, apic->target_cpus(), irqdata); +all: + /* Try the full affinity mask */ + cpumask_and(vector_searchmask, affmsk, cpu_online_mask); + if (!assign_vector_locked(irqd, vector_searchmask)) + return 0; + /* Try the full online mask */ + return assign_vector_locked(irqd, cpu_online_mask); +} + +static int +assign_irq_vector_policy(struct irq_data *irqd, struct irq_alloc_info *info) +{ + if (irqd_affinity_is_managed(irqd)) + return reserve_managed_vector(irqd); + if (info->mask) + return assign_irq_vector(irqd, info->mask); + /* + * Make only a global reservation with no guarantee. A real vector + * is associated at activation time. + */ + return reserve_irq_vector(irqd); } -static void clear_irq_vector(int irq, struct apic_chip_data *data) +static int +assign_managed_vector(struct irq_data *irqd, const struct cpumask *dest) { - struct irq_desc *desc; - int cpu, vector; + const struct cpumask *affmsk = irq_data_get_affinity_mask(irqd); + struct apic_chip_data *apicd = apic_chip_data(irqd); + int vector, cpu; - if (!data->cfg.vector) + cpumask_and(vector_searchmask, vector_searchmask, affmsk); + cpu = cpumask_first(vector_searchmask); + if (cpu >= nr_cpu_ids) + return -EINVAL; + /* set_affinity might call here for nothing */ + if (apicd->vector && cpumask_test_cpu(apicd->cpu, vector_searchmask)) + return 0; + vector = irq_matrix_alloc_managed(vector_matrix, cpu); + trace_vector_alloc_managed(irqd->irq, vector, vector); + if (vector < 0) + return vector; + apic_update_vector(irqd, vector, cpu); + apic_update_irq_cfg(irqd, vector, cpu); + return 0; +} + +static void clear_irq_vector(struct irq_data *irqd) +{ + struct apic_chip_data *apicd = apic_chip_data(irqd); + bool managed = irqd_affinity_is_managed(irqd); + unsigned int vector = apicd->vector; + + lockdep_assert_held(&vector_lock); + + if (!vector) return; - vector = data->cfg.vector; - for_each_cpu_and(cpu, data->domain, cpu_online_mask) - per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED; + trace_vector_clear(irqd->irq, vector, apicd->cpu, apicd->prev_vector, + apicd->prev_cpu); - data->cfg.vector = 0; - cpumask_clear(data->domain); + per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_UNUSED; + irq_matrix_free(vector_matrix, apicd->cpu, vector, managed); + apicd->vector = 0; - /* - * If move is in progress or the old_domain mask is not empty, - * i.e. the cleanup IPI has not been processed yet, we need to remove - * the old references to desc from all cpus vector tables. - */ - if (!data->move_in_progress && cpumask_empty(data->old_domain)) + /* Clean up move in progress */ + vector = apicd->prev_vector; + if (!vector) return; - desc = irq_to_desc(irq); - for_each_cpu_and(cpu, data->old_domain, cpu_online_mask) { - for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; - vector++) { - if (per_cpu(vector_irq, cpu)[vector] != desc) - continue; - per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED; - break; - } + per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_UNUSED; + irq_matrix_free(vector_matrix, apicd->prev_cpu, vector, managed); + apicd->prev_vector = 0; + apicd->move_in_progress = 0; + hlist_del_init(&apicd->clist); +} + +static void x86_vector_deactivate(struct irq_domain *dom, struct irq_data *irqd) +{ + struct apic_chip_data *apicd = apic_chip_data(irqd); + unsigned long flags; + + trace_vector_deactivate(irqd->irq, apicd->is_managed, + apicd->can_reserve, false); + + /* Regular fixed assigned interrupt */ + if (!apicd->is_managed && !apicd->can_reserve) + return; + /* If the interrupt has a global reservation, nothing to do */ + if (apicd->has_reserved) + return; + + raw_spin_lock_irqsave(&vector_lock, flags); + clear_irq_vector(irqd); + if (apicd->can_reserve) + reserve_irq_vector_locked(irqd); + else + vector_assign_managed_shutdown(irqd); + raw_spin_unlock_irqrestore(&vector_lock, flags); +} + +static int activate_reserved(struct irq_data *irqd) +{ + struct apic_chip_data *apicd = apic_chip_data(irqd); + int ret; + + ret = assign_irq_vector_any_locked(irqd); + if (!ret) + apicd->has_reserved = false; + return ret; +} + +static int activate_managed(struct irq_data *irqd) +{ + const struct cpumask *dest = irq_data_get_affinity_mask(irqd); + int ret; + + cpumask_and(vector_searchmask, dest, cpu_online_mask); + if (WARN_ON_ONCE(cpumask_empty(vector_searchmask))) { + /* Something in the core code broke! Survive gracefully */ + pr_err("Managed startup for irq %u, but no CPU\n", irqd->irq); + return EINVAL; + } + + ret = assign_managed_vector(irqd, vector_searchmask); + /* + * This should not happen. The vector reservation got buggered. Handle + * it gracefully. + */ + if (WARN_ON_ONCE(ret < 0)) { + pr_err("Managed startup irq %u, no vector available\n", + irqd->irq); } - data->move_in_progress = 0; + return ret; } -void init_irq_alloc_info(struct irq_alloc_info *info, - const struct cpumask *mask) +static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd, + bool early) { - memset(info, 0, sizeof(*info)); - info->mask = mask; + struct apic_chip_data *apicd = apic_chip_data(irqd); + unsigned long flags; + int ret = 0; + + trace_vector_activate(irqd->irq, apicd->is_managed, + apicd->can_reserve, early); + + /* Nothing to do for fixed assigned vectors */ + if (!apicd->can_reserve && !apicd->is_managed) + return 0; + + raw_spin_lock_irqsave(&vector_lock, flags); + if (early || irqd_is_managed_and_shutdown(irqd)) + vector_assign_managed_shutdown(irqd); + else if (apicd->is_managed) + ret = activate_managed(irqd); + else if (apicd->has_reserved) + ret = activate_reserved(irqd); + raw_spin_unlock_irqrestore(&vector_lock, flags); + return ret; } -void copy_irq_alloc_info(struct irq_alloc_info *dst, struct irq_alloc_info *src) +static void vector_free_reserved_and_managed(struct irq_data *irqd) { - if (src) - *dst = *src; - else - memset(dst, 0, sizeof(*dst)); + const struct cpumask *dest = irq_data_get_affinity_mask(irqd); + struct apic_chip_data *apicd = apic_chip_data(irqd); + + trace_vector_teardown(irqd->irq, apicd->is_managed, + apicd->has_reserved); + + if (apicd->has_reserved) + irq_matrix_remove_reserved(vector_matrix); + if (apicd->is_managed) + irq_matrix_remove_managed(vector_matrix, dest); } static void x86_vector_free_irqs(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs) { - struct apic_chip_data *apic_data; - struct irq_data *irq_data; + struct apic_chip_data *apicd; + struct irq_data *irqd; unsigned long flags; int i; for (i = 0; i < nr_irqs; i++) { - irq_data = irq_domain_get_irq_data(x86_vector_domain, virq + i); - if (irq_data && irq_data->chip_data) { + irqd = irq_domain_get_irq_data(x86_vector_domain, virq + i); + if (irqd && irqd->chip_data) { raw_spin_lock_irqsave(&vector_lock, flags); - clear_irq_vector(virq + i, irq_data->chip_data); - apic_data = irq_data->chip_data; - irq_domain_reset_irq_data(irq_data); + clear_irq_vector(irqd); + vector_free_reserved_and_managed(irqd); + apicd = irqd->chip_data; + irq_domain_reset_irq_data(irqd); raw_spin_unlock_irqrestore(&vector_lock, flags); - free_apic_chip_data(apic_data); -#ifdef CONFIG_X86_IO_APIC - if (virq + i < nr_legacy_irqs()) - legacy_irq_data[virq + i] = NULL; -#endif + free_apic_chip_data(apicd); } } } +static bool vector_configure_legacy(unsigned int virq, struct irq_data *irqd, + struct apic_chip_data *apicd) +{ + unsigned long flags; + bool realloc = false; + + apicd->vector = ISA_IRQ_VECTOR(virq); + apicd->cpu = 0; + + raw_spin_lock_irqsave(&vector_lock, flags); + /* + * If the interrupt is activated, then it must stay at this vector + * position. That's usually the timer interrupt (0). + */ + if (irqd_is_activated(irqd)) { + trace_vector_setup(virq, true, 0); + apic_update_irq_cfg(irqd, apicd->vector, apicd->cpu); + } else { + /* Release the vector */ + apicd->can_reserve = true; + clear_irq_vector(irqd); + realloc = true; + } + raw_spin_unlock_irqrestore(&vector_lock, flags); + return realloc; +} + static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs, void *arg) { struct irq_alloc_info *info = arg; - struct apic_chip_data *data; - struct irq_data *irq_data; + struct apic_chip_data *apicd; + struct irq_data *irqd; int i, err, node; if (disable_apic) @@ -350,34 +501,37 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq, return -ENOSYS; for (i = 0; i < nr_irqs; i++) { - irq_data = irq_domain_get_irq_data(domain, virq + i); - BUG_ON(!irq_data); - node = irq_data_get_node(irq_data); -#ifdef CONFIG_X86_IO_APIC - if (virq + i < nr_legacy_irqs() && legacy_irq_data[virq + i]) - data = legacy_irq_data[virq + i]; - else -#endif - data = alloc_apic_chip_data(node); - if (!data) { + irqd = irq_domain_get_irq_data(domain, virq + i); + BUG_ON(!irqd); + node = irq_data_get_node(irqd); + WARN_ON_ONCE(irqd->chip_data); + apicd = alloc_apic_chip_data(node); + if (!apicd) { err = -ENOMEM; goto error; } - irq_data->chip = &lapic_controller; - irq_data->chip_data = data; - irq_data->hwirq = virq + i; - err = assign_irq_vector_policy(virq + i, node, data, info, - irq_data); - if (err) - goto error; + apicd->irq = virq + i; + irqd->chip = &lapic_controller; + irqd->chip_data = apicd; + irqd->hwirq = virq + i; + irqd_set_single_target(irqd); /* - * If the apic destination mode is physical, then the - * effective affinity is restricted to a single target - * CPU. Mark the interrupt accordingly. + * Legacy vectors are already assigned when the IOAPIC + * takes them over. They stay on the same vector. This is + * required for check_timer() to work correctly as it might + * switch back to legacy mode. Only update the hardware + * config. */ - if (!apic->irq_dest_mode) - irqd_set_single_target(irq_data); + if (info->flags & X86_IRQ_ALLOC_LEGACY) { + if (!vector_configure_legacy(virq + i, irqd, apicd)) + continue; + } + + err = assign_irq_vector_policy(irqd, info); + trace_vector_setup(virq + i, false, err); + if (err) + goto error; } return 0; @@ -387,9 +541,56 @@ error: return err; } +#ifdef CONFIG_GENERIC_IRQ_DEBUGFS +void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d, + struct irq_data *irqd, int ind) +{ + unsigned int cpu, vector, prev_cpu, prev_vector; + struct apic_chip_data *apicd; + unsigned long flags; + int irq; + + if (!irqd) { + irq_matrix_debug_show(m, vector_matrix, ind); + return; + } + + irq = irqd->irq; + if (irq < nr_legacy_irqs() && !test_bit(irq, &io_apic_irqs)) { + seq_printf(m, "%*sVector: %5d\n", ind, "", ISA_IRQ_VECTOR(irq)); + seq_printf(m, "%*sTarget: Legacy PIC all CPUs\n", ind, ""); + return; + } + + apicd = irqd->chip_data; + if (!apicd) { + seq_printf(m, "%*sVector: Not assigned\n", ind, ""); + return; + } + + raw_spin_lock_irqsave(&vector_lock, flags); + cpu = apicd->cpu; + vector = apicd->vector; + prev_cpu = apicd->prev_cpu; + prev_vector = apicd->prev_vector; + raw_spin_unlock_irqrestore(&vector_lock, flags); + seq_printf(m, "%*sVector: %5u\n", ind, "", vector); + seq_printf(m, "%*sTarget: %5u\n", ind, "", cpu); + if (prev_vector) { + seq_printf(m, "%*sPrevious vector: %5u\n", ind, "", prev_vector); + seq_printf(m, "%*sPrevious target: %5u\n", ind, "", prev_cpu); + } +} +#endif + static const struct irq_domain_ops x86_vector_domain_ops = { - .alloc = x86_vector_alloc_irqs, - .free = x86_vector_free_irqs, + .alloc = x86_vector_alloc_irqs, + .free = x86_vector_free_irqs, + .activate = x86_vector_activate, + .deactivate = x86_vector_deactivate, +#ifdef CONFIG_GENERIC_IRQ_DEBUGFS + .debug_show = x86_vector_debug_show, +#endif }; int __init arch_probe_nr_irqs(void) @@ -419,35 +620,40 @@ int __init arch_probe_nr_irqs(void) return legacy_pic->probe(); } -#ifdef CONFIG_X86_IO_APIC -static void __init init_legacy_irqs(void) +void lapic_assign_legacy_vector(unsigned int irq, bool replace) { - int i, node = cpu_to_node(0); - struct apic_chip_data *data; - /* - * For legacy IRQ's, start with assigning irq0 to irq15 to - * ISA_IRQ_VECTOR(i) for all cpu's. + * Use assign system here so it wont get accounted as allocated + * and moveable in the cpu hotplug check and it prevents managed + * irq reservation from touching it. */ - for (i = 0; i < nr_legacy_irqs(); i++) { - data = legacy_irq_data[i] = alloc_apic_chip_data(node); - BUG_ON(!data); + irq_matrix_assign_system(vector_matrix, ISA_IRQ_VECTOR(irq), replace); +} + +void __init lapic_assign_system_vectors(void) +{ + unsigned int i, vector = 0; - data->cfg.vector = ISA_IRQ_VECTOR(i); - cpumask_setall(data->domain); - irq_set_chip_data(i, data); + for_each_set_bit_from(vector, system_vectors, NR_VECTORS) + irq_matrix_assign_system(vector_matrix, vector, false); + + if (nr_legacy_irqs() > 1) + lapic_assign_legacy_vector(PIC_CASCADE_IR, false); + + /* System vectors are reserved, online it */ + irq_matrix_online(vector_matrix); + + /* Mark the preallocated legacy interrupts */ + for (i = 0; i < nr_legacy_irqs(); i++) { + if (i != PIC_CASCADE_IR) + irq_matrix_assign(vector_matrix, ISA_IRQ_VECTOR(i)); } } -#else -static inline void init_legacy_irqs(void) { } -#endif int __init arch_early_irq_init(void) { struct fwnode_handle *fn; - init_legacy_irqs(); - fn = irq_domain_alloc_named_fwnode("VECTOR"); BUG_ON(!fn); x86_vector_domain = irq_domain_create_tree(fn, &x86_vector_domain_ops, @@ -459,100 +665,115 @@ int __init arch_early_irq_init(void) arch_init_msi_domain(x86_vector_domain); arch_init_htirq_domain(x86_vector_domain); - BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL)); BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL)); - BUG_ON(!alloc_cpumask_var(&searched_cpumask, GFP_KERNEL)); + + /* + * Allocate the vector matrix allocator data structure and limit the + * search area. + */ + vector_matrix = irq_alloc_matrix(NR_VECTORS, FIRST_EXTERNAL_VECTOR, + FIRST_SYSTEM_VECTOR); + BUG_ON(!vector_matrix); return arch_early_ioapic_init(); } -/* Initialize vector_irq on a new cpu */ -static void __setup_vector_irq(int cpu) +#ifdef CONFIG_SMP + +static struct irq_desc *__setup_vector_irq(int vector) { - struct apic_chip_data *data; - struct irq_desc *desc; - int irq, vector; + int isairq = vector - ISA_IRQ_VECTOR(0); + + /* Check whether the irq is in the legacy space */ + if (isairq < 0 || isairq >= nr_legacy_irqs()) + return VECTOR_UNUSED; + /* Check whether the irq is handled by the IOAPIC */ + if (test_bit(isairq, &io_apic_irqs)) + return VECTOR_UNUSED; + return irq_to_desc(isairq); +} - /* Mark the inuse vectors */ - for_each_irq_desc(irq, desc) { - struct irq_data *idata = irq_desc_get_irq_data(desc); +/* Online the local APIC infrastructure and initialize the vectors */ +void lapic_online(void) +{ + unsigned int vector; - data = apic_chip_data(idata); - if (!data || !cpumask_test_cpu(cpu, data->domain)) - continue; - vector = data->cfg.vector; - per_cpu(vector_irq, cpu)[vector] = desc; - } - /* Mark the free vectors */ - for (vector = 0; vector < NR_VECTORS; ++vector) { - desc = per_cpu(vector_irq, cpu)[vector]; - if (IS_ERR_OR_NULL(desc)) - continue; + lockdep_assert_held(&vector_lock); - data = apic_chip_data(irq_desc_get_irq_data(desc)); - if (!cpumask_test_cpu(cpu, data->domain)) - per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED; - } + /* Online the vector matrix array for this CPU */ + irq_matrix_online(vector_matrix); + + /* + * The interrupt affinity logic never targets interrupts to offline + * CPUs. The exception are the legacy PIC interrupts. In general + * they are only targeted to CPU0, but depending on the platform + * they can be distributed to any online CPU in hardware. The + * kernel has no influence on that. So all active legacy vectors + * must be installed on all CPUs. All non legacy interrupts can be + * cleared. + */ + for (vector = 0; vector < NR_VECTORS; vector++) + this_cpu_write(vector_irq[vector], __setup_vector_irq(vector)); } -/* - * Setup the vector to irq mappings. Must be called with vector_lock held. - */ -void setup_vector_irq(int cpu) +void lapic_offline(void) { - int irq; + lock_vector_lock(); + irq_matrix_offline(vector_matrix); + unlock_vector_lock(); +} + +static int apic_set_affinity(struct irq_data *irqd, + const struct cpumask *dest, bool force) +{ + struct apic_chip_data *apicd = apic_chip_data(irqd); + int err; - lockdep_assert_held(&vector_lock); /* - * On most of the platforms, legacy PIC delivers the interrupts on the - * boot cpu. But there are certain platforms where PIC interrupts are - * delivered to multiple cpu's. If the legacy IRQ is handled by the - * legacy PIC, for the new cpu that is coming online, setup the static - * legacy vector to irq mapping: + * Core code can call here for inactive interrupts. For inactive + * interrupts which use managed or reservation mode there is no + * point in going through the vector assignment right now as the + * activation will assign a vector which fits the destination + * cpumask. Let the core code store the destination mask and be + * done with it. */ - for (irq = 0; irq < nr_legacy_irqs(); irq++) - per_cpu(vector_irq, cpu)[ISA_IRQ_VECTOR(irq)] = irq_to_desc(irq); + if (!irqd_is_activated(irqd) && + (apicd->is_managed || apicd->can_reserve)) + return IRQ_SET_MASK_OK; - __setup_vector_irq(cpu); + raw_spin_lock(&vector_lock); + cpumask_and(vector_searchmask, dest, cpu_online_mask); + if (irqd_affinity_is_managed(irqd)) + err = assign_managed_vector(irqd, vector_searchmask); + else + err = assign_vector_locked(irqd, vector_searchmask); + raw_spin_unlock(&vector_lock); + return err ? err : IRQ_SET_MASK_OK; } -static int apic_retrigger_irq(struct irq_data *irq_data) +#else +# define apic_set_affinity NULL +#endif + +static int apic_retrigger_irq(struct irq_data *irqd) { - struct apic_chip_data *data = apic_chip_data(irq_data); + struct apic_chip_data *apicd = apic_chip_data(irqd); unsigned long flags; - int cpu; raw_spin_lock_irqsave(&vector_lock, flags); - cpu = cpumask_first_and(data->domain, cpu_online_mask); - apic->send_IPI_mask(cpumask_of(cpu), data->cfg.vector); + apic->send_IPI(apicd->cpu, apicd->vector); raw_spin_unlock_irqrestore(&vector_lock, flags); return 1; } -void apic_ack_edge(struct irq_data *data) +void apic_ack_edge(struct irq_data *irqd) { - irq_complete_move(irqd_cfg(data)); - irq_move_irq(data); + irq_complete_move(irqd_cfg(irqd)); + irq_move_irq(irqd); ack_APIC_irq(); } -static int apic_set_affinity(struct irq_data *irq_data, - const struct cpumask *dest, bool force) -{ - struct apic_chip_data *data = irq_data->chip_data; - int err, irq = irq_data->irq; - - if (!IS_ENABLED(CONFIG_SMP)) - return -EPERM; - - if (!cpumask_intersects(dest, cpu_online_mask)) - return -EINVAL; - - err = assign_irq_vector(irq, data, dest, irq_data); - return err ? err : IRQ_SET_MASK_OK; -} - static struct irq_chip lapic_controller = { .name = "APIC", .irq_ack = apic_ack_edge, @@ -561,115 +782,98 @@ static struct irq_chip lapic_controller = { }; #ifdef CONFIG_SMP -static void __send_cleanup_vector(struct apic_chip_data *data) -{ - raw_spin_lock(&vector_lock); - cpumask_and(data->old_domain, data->old_domain, cpu_online_mask); - data->move_in_progress = 0; - if (!cpumask_empty(data->old_domain)) - apic->send_IPI_mask(data->old_domain, IRQ_MOVE_CLEANUP_VECTOR); - raw_spin_unlock(&vector_lock); -} -void send_cleanup_vector(struct irq_cfg *cfg) +static void free_moved_vector(struct apic_chip_data *apicd) { - struct apic_chip_data *data; + unsigned int vector = apicd->prev_vector; + unsigned int cpu = apicd->prev_cpu; + bool managed = apicd->is_managed; - data = container_of(cfg, struct apic_chip_data, cfg); - if (data->move_in_progress) - __send_cleanup_vector(data); + /* + * This should never happen. Managed interrupts are not + * migrated except on CPU down, which does not involve the + * cleanup vector. But try to keep the accounting correct + * nevertheless. + */ + WARN_ON_ONCE(managed); + + trace_vector_free_moved(apicd->irq, cpu, vector, managed); + irq_matrix_free(vector_matrix, cpu, vector, managed); + per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED; + hlist_del_init(&apicd->clist); + apicd->prev_vector = 0; + apicd->move_in_progress = 0; } asmlinkage __visible void __irq_entry smp_irq_move_cleanup_interrupt(void) { - unsigned vector, me; + struct hlist_head *clhead = this_cpu_ptr(&cleanup_list); + struct apic_chip_data *apicd; + struct hlist_node *tmp; entering_ack_irq(); - /* Prevent vectors vanishing under us */ raw_spin_lock(&vector_lock); - me = smp_processor_id(); - for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { - struct apic_chip_data *data; - struct irq_desc *desc; - unsigned int irr; - - retry: - desc = __this_cpu_read(vector_irq[vector]); - if (IS_ERR_OR_NULL(desc)) - continue; - - if (!raw_spin_trylock(&desc->lock)) { - raw_spin_unlock(&vector_lock); - cpu_relax(); - raw_spin_lock(&vector_lock); - goto retry; - } - - data = apic_chip_data(irq_desc_get_irq_data(desc)); - if (!data) - goto unlock; + hlist_for_each_entry_safe(apicd, tmp, clhead, clist) { + unsigned int irr, vector = apicd->prev_vector; /* - * Nothing to cleanup if irq migration is in progress - * or this cpu is not set in the cleanup mask. - */ - if (data->move_in_progress || - !cpumask_test_cpu(me, data->old_domain)) - goto unlock; - - /* - * We have two cases to handle here: - * 1) vector is unchanged but the target mask got reduced - * 2) vector and the target mask has changed - * - * #1 is obvious, but in #2 we have two vectors with the same - * irq descriptor: the old and the new vector. So we need to - * make sure that we only cleanup the old vector. The new - * vector has the current @vector number in the config and - * this cpu is part of the target mask. We better leave that - * one alone. - */ - if (vector == data->cfg.vector && - cpumask_test_cpu(me, data->domain)) - goto unlock; - - irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); - /* - * Check if the vector that needs to be cleanedup is - * registered at the cpu's IRR. If so, then this is not - * the best time to clean it up. Lets clean it up in the + * Paranoia: Check if the vector that needs to be cleaned + * up is registered at the APICs IRR. If so, then this is + * not the best time to clean it up. Clean it up in the * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR - * to myself. + * to this CPU. IRQ_MOVE_CLEANUP_VECTOR is the lowest + * priority external vector, so on return from this + * interrupt the device interrupt will happen first. */ - if (irr & (1 << (vector % 32))) { + irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); + if (irr & (1U << (vector % 32))) { apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR); - goto unlock; + continue; } - __this_cpu_write(vector_irq[vector], VECTOR_UNUSED); - cpumask_clear_cpu(me, data->old_domain); -unlock: - raw_spin_unlock(&desc->lock); + free_moved_vector(apicd); } raw_spin_unlock(&vector_lock); - exiting_irq(); } +static void __send_cleanup_vector(struct apic_chip_data *apicd) +{ + unsigned int cpu; + + raw_spin_lock(&vector_lock); + apicd->move_in_progress = 0; + cpu = apicd->prev_cpu; + if (cpu_online(cpu)) { + hlist_add_head(&apicd->clist, per_cpu_ptr(&cleanup_list, cpu)); + apic->send_IPI(cpu, IRQ_MOVE_CLEANUP_VECTOR); + } else { + apicd->prev_vector = 0; + } + raw_spin_unlock(&vector_lock); +} + +void send_cleanup_vector(struct irq_cfg *cfg) +{ + struct apic_chip_data *apicd; + + apicd = container_of(cfg, struct apic_chip_data, hw_irq_cfg); + if (apicd->move_in_progress) + __send_cleanup_vector(apicd); +} + static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector) { - unsigned me; - struct apic_chip_data *data; + struct apic_chip_data *apicd; - data = container_of(cfg, struct apic_chip_data, cfg); - if (likely(!data->move_in_progress)) + apicd = container_of(cfg, struct apic_chip_data, hw_irq_cfg); + if (likely(!apicd->move_in_progress)) return; - me = smp_processor_id(); - if (vector == data->cfg.vector && cpumask_test_cpu(me, data->domain)) - __send_cleanup_vector(data); + if (vector == apicd->vector && apicd->cpu == smp_processor_id()) + __send_cleanup_vector(apicd); } void irq_complete_move(struct irq_cfg *cfg) @@ -682,10 +886,9 @@ void irq_complete_move(struct irq_cfg *cfg) */ void irq_force_complete_move(struct irq_desc *desc) { - struct irq_data *irqdata; - struct apic_chip_data *data; - struct irq_cfg *cfg; - unsigned int cpu; + struct apic_chip_data *apicd; + struct irq_data *irqd; + unsigned int vector; /* * The function is called for all descriptors regardless of which @@ -696,43 +899,31 @@ void irq_force_complete_move(struct irq_desc *desc) * Check first that the chip_data is what we expect * (apic_chip_data) before touching it any further. */ - irqdata = irq_domain_get_irq_data(x86_vector_domain, - irq_desc_get_irq(desc)); - if (!irqdata) + irqd = irq_domain_get_irq_data(x86_vector_domain, + irq_desc_get_irq(desc)); + if (!irqd) return; - data = apic_chip_data(irqdata); - cfg = data ? &data->cfg : NULL; + raw_spin_lock(&vector_lock); + apicd = apic_chip_data(irqd); + if (!apicd) + goto unlock; - if (!cfg) - return; + /* + * If prev_vector is empty, no action required. + */ + vector = apicd->prev_vector; + if (!vector) + goto unlock; /* - * This is tricky. If the cleanup of @data->old_domain has not been + * This is tricky. If the cleanup of the old vector has not been * done yet, then the following setaffinity call will fail with * -EBUSY. This can leave the interrupt in a stale state. * * All CPUs are stuck in stop machine with interrupts disabled so * calling __irq_complete_move() would be completely pointless. - */ - raw_spin_lock(&vector_lock); - /* - * Clean out all offline cpus (including the outgoing one) from the - * old_domain mask. - */ - cpumask_and(data->old_domain, data->old_domain, cpu_online_mask); - - /* - * If move_in_progress is cleared and the old_domain mask is empty, - * then there is nothing to cleanup. fixup_irqs() will take care of - * the stale vectors on the outgoing cpu. - */ - if (!data->move_in_progress && cpumask_empty(data->old_domain)) { - raw_spin_unlock(&vector_lock); - return; - } - - /* + * * 1) The interrupt is in move_in_progress state. That means that we * have not seen an interrupt since the io_apic was reprogrammed to * the new vector. @@ -740,7 +931,7 @@ void irq_force_complete_move(struct irq_desc *desc) * 2) The interrupt has fired on the new vector, but the cleanup IPIs * have not been processed yet. */ - if (data->move_in_progress) { + if (apicd->move_in_progress) { /* * In theory there is a race: * @@ -774,21 +965,43 @@ void irq_force_complete_move(struct irq_desc *desc) * area arises. */ pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n", - irqdata->irq, cfg->old_vector); + irqd->irq, vector); } - /* - * If old_domain is not empty, then other cpus still have the irq - * descriptor set in their vector array. Clean it up. - */ - for_each_cpu(cpu, data->old_domain) - per_cpu(vector_irq, cpu)[cfg->old_vector] = VECTOR_UNUSED; + free_moved_vector(apicd); +unlock: + raw_spin_unlock(&vector_lock); +} + +#ifdef CONFIG_HOTPLUG_CPU +/* + * Note, this is not accurate accounting, but at least good enough to + * prevent that the actual interrupt move will run out of vectors. + */ +int lapic_can_unplug_cpu(void) +{ + unsigned int rsvd, avl, tomove, cpu = smp_processor_id(); + int ret = 0; - /* Cleanup the left overs of the (half finished) move */ - cpumask_clear(data->old_domain); - data->move_in_progress = 0; + raw_spin_lock(&vector_lock); + tomove = irq_matrix_allocated(vector_matrix); + avl = irq_matrix_available(vector_matrix, true); + if (avl < tomove) { + pr_warn("CPU %u has %u vectors, %u available. Cannot disable CPU\n", + cpu, tomove, avl); + ret = -ENOSPC; + goto out; + } + rsvd = irq_matrix_reserved(vector_matrix); + if (avl < rsvd) { + pr_warn("Reserved vectors %u > available %u. IRQ request may fail\n", + rsvd, avl); + } +out: raw_spin_unlock(&vector_lock); + return ret; } -#endif +#endif /* HOTPLUG_CPU */ +#endif /* SMP */ static void __init print_APIC_field(int base) { diff --git a/arch/x86/kernel/apic/x2apic.h b/arch/x86/kernel/apic/x2apic.h new file mode 100644 index 000000000000..b107de381cb5 --- /dev/null +++ b/arch/x86/kernel/apic/x2apic.h @@ -0,0 +1,9 @@ +/* Common bits for X2APIC cluster/physical modes. */ + +int x2apic_apic_id_valid(int apicid); +int x2apic_apic_id_registered(void); +void __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest); +unsigned int x2apic_get_apic_id(unsigned long id); +u32 x2apic_set_apic_id(unsigned int id); +int x2apic_phys_pkg_id(int initial_apicid, int index_msb); +void x2apic_send_IPI_self(int vector); diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index e216cf3d64d2..622f13ca8a94 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -9,22 +9,24 @@ #include <linux/cpu.h> #include <asm/smp.h> -#include <asm/x2apic.h> +#include "x2apic.h" + +struct cluster_mask { + unsigned int clusterid; + int node; + struct cpumask mask; +}; static DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); -static DEFINE_PER_CPU(cpumask_var_t, cpus_in_cluster); static DEFINE_PER_CPU(cpumask_var_t, ipi_mask); +static DEFINE_PER_CPU(struct cluster_mask *, cluster_masks); +static struct cluster_mask *cluster_hotplug_mask; static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return x2apic_enabled(); } -static inline u32 x2apic_cluster(int cpu) -{ - return per_cpu(x86_cpu_to_logical_apicid, cpu) >> 16; -} - static void x2apic_send_IPI(int cpu, int vector) { u32 dest = per_cpu(x86_cpu_to_logical_apicid, cpu); @@ -36,49 +38,34 @@ static void x2apic_send_IPI(int cpu, int vector) static void __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) { - struct cpumask *cpus_in_cluster_ptr; - struct cpumask *ipi_mask_ptr; - unsigned int cpu, this_cpu; + unsigned int cpu, clustercpu; + struct cpumask *tmpmsk; unsigned long flags; u32 dest; x2apic_wrmsr_fence(); - local_irq_save(flags); - this_cpu = smp_processor_id(); + tmpmsk = this_cpu_cpumask_var_ptr(ipi_mask); + cpumask_copy(tmpmsk, mask); + /* If IPI should not be sent to self, clear current CPU */ + if (apic_dest != APIC_DEST_ALLINC) + cpumask_clear_cpu(smp_processor_id(), tmpmsk); - /* - * We are to modify mask, so we need an own copy - * and be sure it's manipulated with irq off. - */ - ipi_mask_ptr = this_cpu_cpumask_var_ptr(ipi_mask); - cpumask_copy(ipi_mask_ptr, mask); - - /* - * The idea is to send one IPI per cluster. - */ - for_each_cpu(cpu, ipi_mask_ptr) { - unsigned long i; + /* Collapse cpus in a cluster so a single IPI per cluster is sent */ + for_each_cpu(cpu, tmpmsk) { + struct cluster_mask *cmsk = per_cpu(cluster_masks, cpu); - cpus_in_cluster_ptr = per_cpu(cpus_in_cluster, cpu); dest = 0; - - /* Collect cpus in cluster. */ - for_each_cpu_and(i, ipi_mask_ptr, cpus_in_cluster_ptr) { - if (apic_dest == APIC_DEST_ALLINC || i != this_cpu) - dest |= per_cpu(x86_cpu_to_logical_apicid, i); - } + for_each_cpu_and(clustercpu, tmpmsk, &cmsk->mask) + dest |= per_cpu(x86_cpu_to_logical_apicid, clustercpu); if (!dest) continue; __x2apic_send_IPI_dest(dest, vector, apic->dest_logical); - /* - * Cluster sibling cpus should be discared now so - * we would not send IPI them second time. - */ - cpumask_andnot(ipi_mask_ptr, ipi_mask_ptr, cpus_in_cluster_ptr); + /* Remove cluster CPUs from tmpmask */ + cpumask_andnot(tmpmsk, tmpmsk, &cmsk->mask); } local_irq_restore(flags); @@ -105,125 +92,90 @@ static void x2apic_send_IPI_all(int vector) __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC); } -static int -x2apic_cpu_mask_to_apicid(const struct cpumask *mask, struct irq_data *irqdata, - unsigned int *apicid) +static u32 x2apic_calc_apicid(unsigned int cpu) { - struct cpumask *effmsk = irq_data_get_effective_affinity_mask(irqdata); - unsigned int cpu; - u32 dest = 0; - u16 cluster; - - cpu = cpumask_first(mask); - if (cpu >= nr_cpu_ids) - return -EINVAL; - - dest = per_cpu(x86_cpu_to_logical_apicid, cpu); - cluster = x2apic_cluster(cpu); - - cpumask_clear(effmsk); - for_each_cpu(cpu, mask) { - if (cluster != x2apic_cluster(cpu)) - continue; - dest |= per_cpu(x86_cpu_to_logical_apicid, cpu); - cpumask_set_cpu(cpu, effmsk); - } - - *apicid = dest; - return 0; + return per_cpu(x86_cpu_to_logical_apicid, cpu); } static void init_x2apic_ldr(void) { - unsigned int this_cpu = smp_processor_id(); + struct cluster_mask *cmsk = this_cpu_read(cluster_masks); + u32 cluster, apicid = apic_read(APIC_LDR); unsigned int cpu; - per_cpu(x86_cpu_to_logical_apicid, this_cpu) = apic_read(APIC_LDR); + this_cpu_write(x86_cpu_to_logical_apicid, apicid); + + if (cmsk) + goto update; - cpumask_set_cpu(this_cpu, per_cpu(cpus_in_cluster, this_cpu)); + cluster = apicid >> 16; for_each_online_cpu(cpu) { - if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu)) - continue; - cpumask_set_cpu(this_cpu, per_cpu(cpus_in_cluster, cpu)); - cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, this_cpu)); + cmsk = per_cpu(cluster_masks, cpu); + /* Matching cluster found. Link and update it. */ + if (cmsk && cmsk->clusterid == cluster) + goto update; } + cmsk = cluster_hotplug_mask; + cluster_hotplug_mask = NULL; +update: + this_cpu_write(cluster_masks, cmsk); + cpumask_set_cpu(smp_processor_id(), &cmsk->mask); } -/* - * At CPU state changes, update the x2apic cluster sibling info. - */ -static int x2apic_prepare_cpu(unsigned int cpu) +static int alloc_clustermask(unsigned int cpu, int node) { - if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL)) - return -ENOMEM; + if (per_cpu(cluster_masks, cpu)) + return 0; + /* + * If a hotplug spare mask exists, check whether it's on the right + * node. If not, free it and allocate a new one. + */ + if (cluster_hotplug_mask) { + if (cluster_hotplug_mask->node == node) + return 0; + kfree(cluster_hotplug_mask); + } - if (!zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL)) { - free_cpumask_var(per_cpu(cpus_in_cluster, cpu)); + cluster_hotplug_mask = kzalloc_node(sizeof(*cluster_hotplug_mask), + GFP_KERNEL, node); + if (!cluster_hotplug_mask) return -ENOMEM; - } + cluster_hotplug_mask->node = node; + return 0; +} +static int x2apic_prepare_cpu(unsigned int cpu) +{ + if (alloc_clustermask(cpu, cpu_to_node(cpu)) < 0) + return -ENOMEM; + if (!zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL)) + return -ENOMEM; return 0; } -static int x2apic_dead_cpu(unsigned int this_cpu) +static int x2apic_dead_cpu(unsigned int dead_cpu) { - int cpu; + struct cluster_mask *cmsk = per_cpu(cluster_masks, dead_cpu); - for_each_online_cpu(cpu) { - if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu)) - continue; - cpumask_clear_cpu(this_cpu, per_cpu(cpus_in_cluster, cpu)); - cpumask_clear_cpu(cpu, per_cpu(cpus_in_cluster, this_cpu)); - } - free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu)); - free_cpumask_var(per_cpu(ipi_mask, this_cpu)); + cpumask_clear_cpu(dead_cpu, &cmsk->mask); + free_cpumask_var(per_cpu(ipi_mask, dead_cpu)); return 0; } static int x2apic_cluster_probe(void) { - int cpu = smp_processor_id(); - int ret; - if (!x2apic_mode) return 0; - ret = cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "x86/x2apic:prepare", - x2apic_prepare_cpu, x2apic_dead_cpu); - if (ret < 0) { + if (cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "x86/x2apic:prepare", + x2apic_prepare_cpu, x2apic_dead_cpu) < 0) { pr_err("Failed to register X2APIC_PREPARE\n"); return 0; } - cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu)); + init_x2apic_ldr(); return 1; } -static const struct cpumask *x2apic_cluster_target_cpus(void) -{ - return cpu_all_mask; -} - -/* - * Each x2apic cluster is an allocation domain. - */ -static void cluster_vector_allocation_domain(int cpu, struct cpumask *retmask, - const struct cpumask *mask) -{ - /* - * To minimize vector pressure, default case of boot, device bringup - * etc will use a single cpu for the interrupt destination. - * - * On explicit migration requests coming from irqbalance etc, - * interrupts will be routed to the x2apic cluster (cluster-id - * derived from the first cpu in the mask) members specified - * in the mask. - */ - if (mask == x2apic_cluster_target_cpus()) - cpumask_copy(retmask, cpumask_of(cpu)); - else - cpumask_and(retmask, mask, per_cpu(cpus_in_cluster, cpu)); -} - static struct apic apic_x2apic_cluster __ro_after_init = { .name = "cluster x2apic", @@ -235,12 +187,10 @@ static struct apic apic_x2apic_cluster __ro_after_init = { .irq_delivery_mode = dest_LowestPrio, .irq_dest_mode = 1, /* logical */ - .target_cpus = x2apic_cluster_target_cpus, .disable_esr = 0, .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = NULL, - .vector_allocation_domain = cluster_vector_allocation_domain, .init_apic_ldr = init_x2apic_ldr, .ioapic_phys_id_map = NULL, @@ -253,7 +203,7 @@ static struct apic apic_x2apic_cluster __ro_after_init = { .get_apic_id = x2apic_get_apic_id, .set_apic_id = x2apic_set_apic_id, - .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, + .calc_dest_apicid = x2apic_calc_apicid, .send_IPI = x2apic_send_IPI, .send_IPI_mask = x2apic_send_IPI_mask, diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index b94d35320f85..f8d9d69994e6 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -7,7 +7,8 @@ #include <linux/dmar.h> #include <asm/smp.h> -#include <asm/x2apic.h> +#include <asm/ipi.h> +#include "x2apic.h" int x2apic_phys; @@ -99,6 +100,43 @@ static int x2apic_phys_probe(void) return apic == &apic_x2apic_phys; } +/* Common x2apic functions, also used by x2apic_cluster */ +int x2apic_apic_id_valid(int apicid) +{ + return 1; +} + +int x2apic_apic_id_registered(void) +{ + return 1; +} + +void __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest) +{ + unsigned long cfg = __prepare_ICR(0, vector, dest); + native_x2apic_icr_write(cfg, apicid); +} + +unsigned int x2apic_get_apic_id(unsigned long id) +{ + return id; +} + +u32 x2apic_set_apic_id(unsigned int id) +{ + return id; +} + +int x2apic_phys_pkg_id(int initial_apicid, int index_msb) +{ + return initial_apicid >> index_msb; +} + +void x2apic_send_IPI_self(int vector) +{ + apic_write(APIC_SELF_IPI, vector); +} + static struct apic apic_x2apic_phys __ro_after_init = { .name = "physical x2apic", @@ -110,12 +148,10 @@ static struct apic apic_x2apic_phys __ro_after_init = { .irq_delivery_mode = dest_Fixed, .irq_dest_mode = 0, /* physical */ - .target_cpus = online_target_cpus, .disable_esr = 0, .dest_logical = 0, .check_apicid_used = NULL, - .vector_allocation_domain = default_vector_allocation_domain, .init_apic_ldr = init_x2apic_ldr, .ioapic_phys_id_map = NULL, @@ -128,7 +164,7 @@ static struct apic apic_x2apic_phys __ro_after_init = { .get_apic_id = x2apic_get_apic_id, .set_apic_id = x2apic_set_apic_id, - .cpu_mask_to_apicid = default_cpu_mask_to_apicid, + .calc_dest_apicid = apic_default_calc_apicid, .send_IPI = x2apic_send_IPI, .send_IPI_mask = x2apic_send_IPI_mask, diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 0d57bb9079c9..e1b8e8bf6b3c 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -154,6 +154,48 @@ static int __init early_get_pnodeid(void) return pnode; } +static void __init uv_tsc_check_sync(void) +{ + u64 mmr; + int sync_state; + int mmr_shift; + char *state; + bool valid; + + /* Accommodate different UV arch BIOSes */ + mmr = uv_early_read_mmr(UVH_TSC_SYNC_MMR); + mmr_shift = + is_uv1_hub() ? 0 : + is_uv2_hub() ? UVH_TSC_SYNC_SHIFT_UV2K : UVH_TSC_SYNC_SHIFT; + if (mmr_shift) + sync_state = (mmr >> mmr_shift) & UVH_TSC_SYNC_MASK; + else + sync_state = 0; + + switch (sync_state) { + case UVH_TSC_SYNC_VALID: + state = "in sync"; + valid = true; + break; + + case UVH_TSC_SYNC_INVALID: + state = "unstable"; + valid = false; + break; + default: + state = "unknown: assuming valid"; + valid = true; + break; + } + pr_info("UV: TSC sync state from BIOS:0%d(%s)\n", sync_state, state); + + /* Mark flag that says TSC != 0 is valid for socket 0 */ + if (valid) + mark_tsc_async_resets("UV BIOS"); + else + mark_tsc_unstable("UV BIOS"); +} + /* [Copied from arch/x86/kernel/cpu/topology.c:detect_extended_topology()] */ #define SMT_LEVEL 0 /* Leaf 0xb SMT level */ @@ -288,6 +330,7 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) } pr_info("UV: OEM IDs %s/%s, System/HUB Types %d/%d, uv_apic %d\n", oem_id, oem_table_id, uv_system_type, uv_min_hub_revision_id, uv_apic); + uv_tsc_check_sync(); return uv_apic; @@ -525,16 +568,9 @@ static void uv_init_apic_ldr(void) { } -static int -uv_cpu_mask_to_apicid(const struct cpumask *mask, struct irq_data *irqdata, - unsigned int *apicid) +static u32 apic_uv_calc_apicid(unsigned int cpu) { - int ret = default_cpu_mask_to_apicid(mask, irqdata, apicid); - - if (!ret) - *apicid |= uv_apicid_hibits; - - return ret; + return apic_default_calc_apicid(cpu) | uv_apicid_hibits; } static unsigned int x2apic_get_apic_id(unsigned long x) @@ -547,7 +583,7 @@ static unsigned int x2apic_get_apic_id(unsigned long x) return id; } -static unsigned long set_apic_id(unsigned int id) +static u32 set_apic_id(unsigned int id) { /* CHECKME: Do we need to mask out the xapic extra bits? */ return id; @@ -584,12 +620,10 @@ static struct apic apic_x2apic_uv_x __ro_after_init = { .irq_delivery_mode = dest_Fixed, .irq_dest_mode = 0, /* Physical */ - .target_cpus = online_target_cpus, .disable_esr = 0, .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = NULL, - .vector_allocation_domain = default_vector_allocation_domain, .init_apic_ldr = uv_init_apic_ldr, .ioapic_phys_id_map = NULL, @@ -602,7 +636,7 @@ static struct apic apic_x2apic_uv_x __ro_after_init = { .get_apic_id = x2apic_get_apic_id, .set_apic_id = set_apic_id, - .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, + .calc_dest_apicid = apic_uv_calc_apicid, .send_IPI = uv_send_IPI_one, .send_IPI_mask = uv_send_IPI_mask, @@ -920,9 +954,8 @@ static __init void uv_rtc_init(void) /* * percpu heartbeat timer */ -static void uv_heartbeat(unsigned long ignored) +static void uv_heartbeat(struct timer_list *timer) { - struct timer_list *timer = &uv_scir_info->timer; unsigned char bits = uv_scir_info->state; /* Flip heartbeat bit: */ @@ -947,7 +980,7 @@ static int uv_heartbeat_enable(unsigned int cpu) struct timer_list *timer = &uv_cpu_scir_info(cpu)->timer; uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY); - setup_pinned_timer(timer, uv_heartbeat, cpu); + timer_setup(timer, uv_heartbeat, TIMER_PINNED); timer->expires = jiffies + SCIR_CPU_HB_INTERVAL; add_timer_on(timer, cpu); uv_cpu_scir_info(cpu)->enabled = 1; diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 236999c54edc..90cb82dbba57 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -22,7 +22,8 @@ obj-y += common.o obj-y += rdrand.o obj-y += match.o obj-y += bugs.o -obj-y += aperfmperf.o +obj-$(CONFIG_CPU_FREQ) += aperfmperf.o +obj-y += cpuid-deps.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c9176bae7fd8..13ae9e5eec2f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -329,6 +329,28 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c) } } +static __always_inline void setup_umip(struct cpuinfo_x86 *c) +{ + /* Check the boot processor, plus build option for UMIP. */ + if (!cpu_feature_enabled(X86_FEATURE_UMIP)) + goto out; + + /* Check the current processor's cpuid bits. */ + if (!cpu_has(c, X86_FEATURE_UMIP)) + goto out; + + cr4_set_bits(X86_CR4_UMIP); + + return; + +out: + /* + * Make sure UMIP is disabled in case it was enabled in a + * previous boot (e.g., via kexec). + */ + cr4_clear_bits(X86_CR4_UMIP); +} + /* * Protection Keys are not available in 32-bit mode. */ @@ -863,8 +885,8 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) * cache alignment. * The others are not touched to avoid unwanted side effects. * - * WARNING: this function is only called on the BP. Don't add code here - * that is supposed to run on all CPUs. + * WARNING: this function is only called on the boot CPU. Don't add code + * here that is supposed to run on all CPUs. */ static void __init early_identify_cpu(struct cpuinfo_x86 *c) { @@ -1147,9 +1169,10 @@ static void identify_cpu(struct cpuinfo_x86 *c) /* Disable the PN if appropriate */ squash_the_stupid_serial_number(c); - /* Set up SMEP/SMAP */ + /* Set up SMEP/SMAP/UMIP */ setup_smep(c); setup_smap(c); + setup_umip(c); /* * The vendor-specific functions might have changed features. @@ -1301,18 +1324,16 @@ void print_cpu_info(struct cpuinfo_x86 *c) pr_cont(")\n"); } -static __init int setup_disablecpuid(char *arg) +/* + * clearcpuid= was already parsed in fpu__init_parse_early_param. + * But we need to keep a dummy __setup around otherwise it would + * show up as an environment variable for init. + */ +static __init int setup_clearcpuid(char *arg) { - int bit; - - if (get_option(&arg, &bit) && bit >= 0 && bit < NCAPINTS * 32) - setup_clear_cpu_cap(bit); - else - return 0; - return 1; } -__setup("clearcpuid=", setup_disablecpuid); +__setup("clearcpuid=", setup_clearcpuid); #ifdef CONFIG_X86_64 DEFINE_PER_CPU_FIRST(union irq_stack_union, @@ -1572,9 +1593,13 @@ void cpu_init(void) initialize_tlbstate_and_flush(); enter_lazy_tlb(&init_mm, me); - load_sp0(t, ¤t->thread); + /* + * Initialize the TSS. Don't bother initializing sp0, as the initial + * task never enters user mode. + */ set_tss_desc(cpu, t); load_TR_desc(); + load_mm_ldt(&init_mm); clear_all_debug_regs(); @@ -1596,7 +1621,6 @@ void cpu_init(void) int cpu = smp_processor_id(); struct task_struct *curr = current; struct tss_struct *t = &per_cpu(cpu_tss, cpu); - struct thread_struct *thread = &curr->thread; wait_for_master_cpu(cpu); @@ -1627,9 +1651,13 @@ void cpu_init(void) initialize_tlbstate_and_flush(); enter_lazy_tlb(&init_mm, curr); - load_sp0(t, thread); + /* + * Initialize the TSS. Don't bother initializing sp0, as the initial + * task never enters user mode. + */ set_tss_desc(cpu, t); load_TR_desc(); + load_mm_ldt(&init_mm); t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c new file mode 100644 index 000000000000..904b0a3c4e53 --- /dev/null +++ b/arch/x86/kernel/cpu/cpuid-deps.c @@ -0,0 +1,121 @@ +/* Declare dependencies between CPUIDs */ +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <asm/cpufeature.h> + +struct cpuid_dep { + unsigned int feature; + unsigned int depends; +}; + +/* + * Table of CPUID features that depend on others. + * + * This only includes dependencies that can be usefully disabled, not + * features part of the base set (like FPU). + * + * Note this all is not __init / __initdata because it can be + * called from cpu hotplug. It shouldn't do anything in this case, + * but it's difficult to tell that to the init reference checker. + */ +const static struct cpuid_dep cpuid_deps[] = { + { X86_FEATURE_XSAVEOPT, X86_FEATURE_XSAVE }, + { X86_FEATURE_XSAVEC, X86_FEATURE_XSAVE }, + { X86_FEATURE_XSAVES, X86_FEATURE_XSAVE }, + { X86_FEATURE_AVX, X86_FEATURE_XSAVE }, + { X86_FEATURE_PKU, X86_FEATURE_XSAVE }, + { X86_FEATURE_MPX, X86_FEATURE_XSAVE }, + { X86_FEATURE_XGETBV1, X86_FEATURE_XSAVE }, + { X86_FEATURE_FXSR_OPT, X86_FEATURE_FXSR }, + { X86_FEATURE_XMM, X86_FEATURE_FXSR }, + { X86_FEATURE_XMM2, X86_FEATURE_XMM }, + { X86_FEATURE_XMM3, X86_FEATURE_XMM2 }, + { X86_FEATURE_XMM4_1, X86_FEATURE_XMM2 }, + { X86_FEATURE_XMM4_2, X86_FEATURE_XMM2 }, + { X86_FEATURE_XMM3, X86_FEATURE_XMM2 }, + { X86_FEATURE_PCLMULQDQ, X86_FEATURE_XMM2 }, + { X86_FEATURE_SSSE3, X86_FEATURE_XMM2, }, + { X86_FEATURE_F16C, X86_FEATURE_XMM2, }, + { X86_FEATURE_AES, X86_FEATURE_XMM2 }, + { X86_FEATURE_SHA_NI, X86_FEATURE_XMM2 }, + { X86_FEATURE_FMA, X86_FEATURE_AVX }, + { X86_FEATURE_AVX2, X86_FEATURE_AVX, }, + { X86_FEATURE_AVX512F, X86_FEATURE_AVX, }, + { X86_FEATURE_AVX512IFMA, X86_FEATURE_AVX512F }, + { X86_FEATURE_AVX512PF, X86_FEATURE_AVX512F }, + { X86_FEATURE_AVX512ER, X86_FEATURE_AVX512F }, + { X86_FEATURE_AVX512CD, X86_FEATURE_AVX512F }, + { X86_FEATURE_AVX512DQ, X86_FEATURE_AVX512F }, + { X86_FEATURE_AVX512BW, X86_FEATURE_AVX512F }, + { X86_FEATURE_AVX512VL, X86_FEATURE_AVX512F }, + { X86_FEATURE_AVX512VBMI, X86_FEATURE_AVX512F }, + { X86_FEATURE_AVX512_VBMI2, X86_FEATURE_AVX512VL }, + { X86_FEATURE_GFNI, X86_FEATURE_AVX512VL }, + { X86_FEATURE_VAES, X86_FEATURE_AVX512VL }, + { X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX512VL }, + { X86_FEATURE_AVX512_VNNI, X86_FEATURE_AVX512VL }, + { X86_FEATURE_AVX512_BITALG, X86_FEATURE_AVX512VL }, + { X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F }, + { X86_FEATURE_AVX512_4FMAPS, X86_FEATURE_AVX512F }, + { X86_FEATURE_AVX512_VPOPCNTDQ, X86_FEATURE_AVX512F }, + {} +}; + +static inline void clear_feature(struct cpuinfo_x86 *c, unsigned int feature) +{ + /* + * Note: This could use the non atomic __*_bit() variants, but the + * rest of the cpufeature code uses atomics as well, so keep it for + * consistency. Cleanup all of it separately. + */ + if (!c) { + clear_cpu_cap(&boot_cpu_data, feature); + set_bit(feature, (unsigned long *)cpu_caps_cleared); + } else { + clear_bit(feature, (unsigned long *)c->x86_capability); + } +} + +/* Take the capabilities and the BUG bits into account */ +#define MAX_FEATURE_BITS ((NCAPINTS + NBUGINTS) * sizeof(u32) * 8) + +static void do_clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature) +{ + DECLARE_BITMAP(disable, MAX_FEATURE_BITS); + const struct cpuid_dep *d; + bool changed; + + if (WARN_ON(feature >= MAX_FEATURE_BITS)) + return; + + clear_feature(c, feature); + + /* Collect all features to disable, handling dependencies */ + memset(disable, 0, sizeof(disable)); + __set_bit(feature, disable); + + /* Loop until we get a stable state. */ + do { + changed = false; + for (d = cpuid_deps; d->feature; d++) { + if (!test_bit(d->depends, disable)) + continue; + if (__test_and_set_bit(d->feature, disable)) + continue; + + changed = true; + clear_feature(c, d->feature); + } + } while (changed); +} + +void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature) +{ + do_clear_cpu_cap(c, feature); +} + +void setup_clear_cpu_cap(unsigned int feature) +{ + do_clear_cpu_cap(NULL, feature); +} diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index 4fa90006ac68..bea8d3e24f50 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c @@ -26,6 +26,12 @@ #include <asm/processor.h> #include <asm/hypervisor.h> +extern const struct hypervisor_x86 x86_hyper_vmware; +extern const struct hypervisor_x86 x86_hyper_ms_hyperv; +extern const struct hypervisor_x86 x86_hyper_xen_pv; +extern const struct hypervisor_x86 x86_hyper_xen_hvm; +extern const struct hypervisor_x86 x86_hyper_kvm; + static const __initconst struct hypervisor_x86 * const hypervisors[] = { #ifdef CONFIG_XEN_PV @@ -41,54 +47,52 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] = #endif }; -const struct hypervisor_x86 *x86_hyper; -EXPORT_SYMBOL(x86_hyper); +enum x86_hypervisor_type x86_hyper_type; +EXPORT_SYMBOL(x86_hyper_type); -static inline void __init +static inline const struct hypervisor_x86 * __init detect_hypervisor_vendor(void) { - const struct hypervisor_x86 *h, * const *p; + const struct hypervisor_x86 *h = NULL, * const *p; uint32_t pri, max_pri = 0; for (p = hypervisors; p < hypervisors + ARRAY_SIZE(hypervisors); p++) { - h = *p; - pri = h->detect(); - if (pri != 0 && pri > max_pri) { + pri = (*p)->detect(); + if (pri > max_pri) { max_pri = pri; - x86_hyper = h; + h = *p; } } - if (max_pri) - pr_info("Hypervisor detected: %s\n", x86_hyper->name); + if (h) + pr_info("Hypervisor detected: %s\n", h->name); + + return h; } -void __init init_hypervisor_platform(void) +static void __init copy_array(const void *src, void *target, unsigned int size) { + unsigned int i, n = size / sizeof(void *); + const void * const *from = (const void * const *)src; + const void **to = (const void **)target; - detect_hypervisor_vendor(); - - if (!x86_hyper) - return; - - if (x86_hyper->init_platform) - x86_hyper->init_platform(); + for (i = 0; i < n; i++) + if (from[i]) + to[i] = from[i]; } -bool __init hypervisor_x2apic_available(void) +void __init init_hypervisor_platform(void) { - return x86_hyper && - x86_hyper->x2apic_available && - x86_hyper->x2apic_available(); -} + const struct hypervisor_x86 *h; -void hypervisor_pin_vcpu(int cpu) -{ - if (!x86_hyper) + h = detect_hypervisor_vendor(); + + if (!h) return; - if (x86_hyper->pin_vcpu) - x86_hyper->pin_vcpu(cpu); - else - WARN_ONCE(1, "vcpu pinning requested but not supported!\n"); + copy_array(&h->init, &x86_init.hyper, sizeof(h->init)); + copy_array(&h->runtime, &x86_platform.hyper, sizeof(h->runtime)); + + x86_hyper_type = h->type; + x86_init.hyper.init_platform(); } diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c index cd5fc61ba450..88dcf8479013 100644 --- a/arch/x86/kernel/cpu/intel_rdt.c +++ b/arch/x86/kernel/cpu/intel_rdt.c @@ -267,6 +267,7 @@ static void rdt_get_cdp_l3_config(int type) r->num_closid = r_l3->num_closid / 2; r->cache.cbm_len = r_l3->cache.cbm_len; r->default_ctrl = r_l3->default_ctrl; + r->cache.shareable_bits = r_l3->cache.shareable_bits; r->data_width = (r->cache.cbm_len + 3) / 4; r->alloc_capable = true; /* diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h index a43a72d8e88e..3397244984f5 100644 --- a/arch/x86/kernel/cpu/intel_rdt.h +++ b/arch/x86/kernel/cpu/intel_rdt.h @@ -127,12 +127,15 @@ struct rdtgroup { #define RFTYPE_BASE BIT(1) #define RF_CTRLSHIFT 4 #define RF_MONSHIFT 5 +#define RF_TOPSHIFT 6 #define RFTYPE_CTRL BIT(RF_CTRLSHIFT) #define RFTYPE_MON BIT(RF_MONSHIFT) +#define RFTYPE_TOP BIT(RF_TOPSHIFT) #define RFTYPE_RES_CACHE BIT(8) #define RFTYPE_RES_MB BIT(9) #define RF_CTRL_INFO (RFTYPE_INFO | RFTYPE_CTRL) #define RF_MON_INFO (RFTYPE_INFO | RFTYPE_MON) +#define RF_TOP_INFO (RFTYPE_INFO | RFTYPE_TOP) #define RF_CTRL_BASE (RFTYPE_BASE | RFTYPE_CTRL) /* List of all resource groups */ @@ -409,6 +412,10 @@ union cpuid_0x10_x_edx { unsigned int full; }; +void rdt_last_cmd_clear(void); +void rdt_last_cmd_puts(const char *s); +void rdt_last_cmd_printf(const char *fmt, ...); + void rdt_ctrl_update(void *arg); struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn); void rdtgroup_kn_unlock(struct kernfs_node *kn); diff --git a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c index f6ea94f8954a..23e1d5c249c6 100644 --- a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c +++ b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c @@ -42,15 +42,22 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r) /* * Only linear delay values is supported for current Intel SKUs. */ - if (!r->membw.delay_linear) + if (!r->membw.delay_linear) { + rdt_last_cmd_puts("No support for non-linear MB domains\n"); return false; + } ret = kstrtoul(buf, 10, &bw); - if (ret) + if (ret) { + rdt_last_cmd_printf("Non-decimal digit in MB value %s\n", buf); return false; + } - if (bw < r->membw.min_bw || bw > r->default_ctrl) + if (bw < r->membw.min_bw || bw > r->default_ctrl) { + rdt_last_cmd_printf("MB value %ld out of range [%d,%d]\n", bw, + r->membw.min_bw, r->default_ctrl); return false; + } *data = roundup(bw, (unsigned long)r->membw.bw_gran); return true; @@ -60,8 +67,10 @@ int parse_bw(char *buf, struct rdt_resource *r, struct rdt_domain *d) { unsigned long data; - if (d->have_new_ctrl) + if (d->have_new_ctrl) { + rdt_last_cmd_printf("duplicate domain %d\n", d->id); return -EINVAL; + } if (!bw_validate(buf, &data, r)) return -EINVAL; @@ -84,20 +93,29 @@ static bool cbm_validate(char *buf, unsigned long *data, struct rdt_resource *r) int ret; ret = kstrtoul(buf, 16, &val); - if (ret) + if (ret) { + rdt_last_cmd_printf("non-hex character in mask %s\n", buf); return false; + } - if (val == 0 || val > r->default_ctrl) + if (val == 0 || val > r->default_ctrl) { + rdt_last_cmd_puts("mask out of range\n"); return false; + } first_bit = find_first_bit(&val, cbm_len); zero_bit = find_next_zero_bit(&val, cbm_len, first_bit); - if (find_next_bit(&val, cbm_len, zero_bit) < cbm_len) + if (find_next_bit(&val, cbm_len, zero_bit) < cbm_len) { + rdt_last_cmd_printf("mask %lx has non-consecutive 1-bits\n", val); return false; + } - if ((zero_bit - first_bit) < r->cache.min_cbm_bits) + if ((zero_bit - first_bit) < r->cache.min_cbm_bits) { + rdt_last_cmd_printf("Need at least %d bits in mask\n", + r->cache.min_cbm_bits); return false; + } *data = val; return true; @@ -111,8 +129,10 @@ int parse_cbm(char *buf, struct rdt_resource *r, struct rdt_domain *d) { unsigned long data; - if (d->have_new_ctrl) + if (d->have_new_ctrl) { + rdt_last_cmd_printf("duplicate domain %d\n", d->id); return -EINVAL; + } if(!cbm_validate(buf, &data, r)) return -EINVAL; @@ -139,8 +159,10 @@ next: return 0; dom = strsep(&line, ";"); id = strsep(&dom, "="); - if (!dom || kstrtoul(id, 10, &dom_id)) + if (!dom || kstrtoul(id, 10, &dom_id)) { + rdt_last_cmd_puts("Missing '=' or non-numeric domain\n"); return -EINVAL; + } dom = strim(dom); list_for_each_entry(d, &r->domains, list) { if (d->id == dom_id) { @@ -196,6 +218,7 @@ static int rdtgroup_parse_resource(char *resname, char *tok, int closid) if (!strcmp(resname, r->name) && closid < r->num_closid) return parse_line(tok, r); } + rdt_last_cmd_printf("unknown/unsupported resource name '%s'\n", resname); return -EINVAL; } @@ -218,6 +241,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, rdtgroup_kn_unlock(of->kn); return -ENOENT; } + rdt_last_cmd_clear(); closid = rdtgrp->closid; @@ -229,6 +253,12 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, while ((tok = strsep(&buf, "\n")) != NULL) { resname = strim(strsep(&tok, ":")); if (!tok) { + rdt_last_cmd_puts("Missing ':'\n"); + ret = -EINVAL; + goto out; + } + if (tok[0] == '\0') { + rdt_last_cmd_printf("Missing '%s' value\n", resname); ret = -EINVAL; goto out; } diff --git a/arch/x86/kernel/cpu/intel_rdt_monitor.c b/arch/x86/kernel/cpu/intel_rdt_monitor.c index 30827510094b..681450eee428 100644 --- a/arch/x86/kernel/cpu/intel_rdt_monitor.c +++ b/arch/x86/kernel/cpu/intel_rdt_monitor.c @@ -51,7 +51,7 @@ static LIST_HEAD(rmid_free_lru); * may have a occupancy value > intel_cqm_threshold. User can change * the threshold occupancy value. */ -unsigned int rmid_limbo_count; +static unsigned int rmid_limbo_count; /** * @rmid_entry - The entry in the limbo and free lists. diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index a869d4a073c5..64c5ff97ee0d 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -24,6 +24,7 @@ #include <linux/fs.h> #include <linux/sysfs.h> #include <linux/kernfs.h> +#include <linux/seq_buf.h> #include <linux/seq_file.h> #include <linux/sched/signal.h> #include <linux/sched/task.h> @@ -51,6 +52,31 @@ static struct kernfs_node *kn_mongrp; /* Kernel fs node for "mon_data" directory under root */ static struct kernfs_node *kn_mondata; +static struct seq_buf last_cmd_status; +static char last_cmd_status_buf[512]; + +void rdt_last_cmd_clear(void) +{ + lockdep_assert_held(&rdtgroup_mutex); + seq_buf_clear(&last_cmd_status); +} + +void rdt_last_cmd_puts(const char *s) +{ + lockdep_assert_held(&rdtgroup_mutex); + seq_buf_puts(&last_cmd_status, s); +} + +void rdt_last_cmd_printf(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + lockdep_assert_held(&rdtgroup_mutex); + seq_buf_vprintf(&last_cmd_status, fmt, ap); + va_end(ap); +} + /* * Trivial allocator for CLOSIDs. Since h/w only supports a small number, * we can keep a bitmap of free CLOSIDs in a single integer. @@ -238,8 +264,10 @@ static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask, /* Check whether cpus belong to parent ctrl group */ cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask); - if (cpumask_weight(tmpmask)) + if (cpumask_weight(tmpmask)) { + rdt_last_cmd_puts("can only add CPUs to mongroup that belong to parent\n"); return -EINVAL; + } /* Check whether cpus are dropped from this group */ cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask); @@ -291,8 +319,10 @@ static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask, cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask); if (cpumask_weight(tmpmask)) { /* Can't drop from default group */ - if (rdtgrp == &rdtgroup_default) + if (rdtgrp == &rdtgroup_default) { + rdt_last_cmd_puts("Can't drop CPUs from default group\n"); return -EINVAL; + } /* Give any dropped cpus to rdtgroup_default */ cpumask_or(&rdtgroup_default.cpu_mask, @@ -357,8 +387,10 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of, } rdtgrp = rdtgroup_kn_lock_live(of->kn); + rdt_last_cmd_clear(); if (!rdtgrp) { ret = -ENOENT; + rdt_last_cmd_puts("directory was removed\n"); goto unlock; } @@ -367,13 +399,16 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of, else ret = cpumask_parse(buf, newmask); - if (ret) + if (ret) { + rdt_last_cmd_puts("bad cpu list/mask\n"); goto unlock; + } /* check that user didn't specify any offline cpus */ cpumask_andnot(tmpmask, newmask, cpu_online_mask); if (cpumask_weight(tmpmask)) { ret = -EINVAL; + rdt_last_cmd_puts("can only assign online cpus\n"); goto unlock; } @@ -452,6 +487,7 @@ static int __rdtgroup_move_task(struct task_struct *tsk, */ atomic_dec(&rdtgrp->waitcount); kfree(callback); + rdt_last_cmd_puts("task exited\n"); } else { /* * For ctrl_mon groups move both closid and rmid. @@ -462,10 +498,12 @@ static int __rdtgroup_move_task(struct task_struct *tsk, tsk->closid = rdtgrp->closid; tsk->rmid = rdtgrp->mon.rmid; } else if (rdtgrp->type == RDTMON_GROUP) { - if (rdtgrp->mon.parent->closid == tsk->closid) + if (rdtgrp->mon.parent->closid == tsk->closid) { tsk->rmid = rdtgrp->mon.rmid; - else + } else { + rdt_last_cmd_puts("Can't move task to different control group\n"); ret = -EINVAL; + } } } return ret; @@ -484,8 +522,10 @@ static int rdtgroup_task_write_permission(struct task_struct *task, */ if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) && !uid_eq(cred->euid, tcred->uid) && - !uid_eq(cred->euid, tcred->suid)) + !uid_eq(cred->euid, tcred->suid)) { + rdt_last_cmd_printf("No permission to move task %d\n", task->pid); ret = -EPERM; + } put_cred(tcred); return ret; @@ -502,6 +542,7 @@ static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp, tsk = find_task_by_vpid(pid); if (!tsk) { rcu_read_unlock(); + rdt_last_cmd_printf("No task %d\n", pid); return -ESRCH; } } else { @@ -529,6 +570,7 @@ static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of, if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0) return -EINVAL; rdtgrp = rdtgroup_kn_lock_live(of->kn); + rdt_last_cmd_clear(); if (rdtgrp) ret = rdtgroup_move_task(pid, rdtgrp, of); @@ -569,6 +611,21 @@ static int rdtgroup_tasks_show(struct kernfs_open_file *of, return ret; } +static int rdt_last_cmd_status_show(struct kernfs_open_file *of, + struct seq_file *seq, void *v) +{ + int len; + + mutex_lock(&rdtgroup_mutex); + len = seq_buf_used(&last_cmd_status); + if (len) + seq_printf(seq, "%.*s", len, last_cmd_status_buf); + else + seq_puts(seq, "ok\n"); + mutex_unlock(&rdtgroup_mutex); + return 0; +} + static int rdt_num_closids_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { @@ -686,6 +743,13 @@ static ssize_t max_threshold_occ_write(struct kernfs_open_file *of, /* rdtgroup information files for one cache resource. */ static struct rftype res_common_files[] = { { + .name = "last_cmd_status", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = rdt_last_cmd_status_show, + .fflags = RF_TOP_INFO, + }, + { .name = "num_closids", .mode = 0444, .kf_ops = &rdtgroup_kf_single_ops, @@ -855,6 +919,10 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) return PTR_ERR(kn_info); kernfs_get(kn_info); + ret = rdtgroup_add_files(kn_info, RF_TOP_INFO); + if (ret) + goto out_destroy; + for_each_alloc_enabled_rdt_resource(r) { fflags = r->fflags | RF_CTRL_INFO; ret = rdtgroup_mkdir_info_resdir(r, r->name, fflags); @@ -1081,6 +1149,7 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type, struct dentry *dentry; int ret; + cpus_read_lock(); mutex_lock(&rdtgroup_mutex); /* * resctrl file system can only be mounted once. @@ -1130,12 +1199,12 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type, goto out_mondata; if (rdt_alloc_capable) - static_branch_enable(&rdt_alloc_enable_key); + static_branch_enable_cpuslocked(&rdt_alloc_enable_key); if (rdt_mon_capable) - static_branch_enable(&rdt_mon_enable_key); + static_branch_enable_cpuslocked(&rdt_mon_enable_key); if (rdt_alloc_capable || rdt_mon_capable) - static_branch_enable(&rdt_enable_key); + static_branch_enable_cpuslocked(&rdt_enable_key); if (is_mbm_enabled()) { r = &rdt_resources_all[RDT_RESOURCE_L3]; @@ -1156,7 +1225,9 @@ out_info: out_cdp: cdp_disable(); out: + rdt_last_cmd_clear(); mutex_unlock(&rdtgroup_mutex); + cpus_read_unlock(); return dentry; } @@ -1295,9 +1366,7 @@ static void rmdir_all_sub(void) kfree(rdtgrp); } /* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */ - get_online_cpus(); update_closid_rmid(cpu_online_mask, &rdtgroup_default); - put_online_cpus(); kernfs_remove(kn_info); kernfs_remove(kn_mongrp); @@ -1308,6 +1377,7 @@ static void rdt_kill_sb(struct super_block *sb) { struct rdt_resource *r; + cpus_read_lock(); mutex_lock(&rdtgroup_mutex); /*Put everything back to default values. */ @@ -1315,11 +1385,12 @@ static void rdt_kill_sb(struct super_block *sb) reset_all_ctrls(r); cdp_disable(); rmdir_all_sub(); - static_branch_disable(&rdt_alloc_enable_key); - static_branch_disable(&rdt_mon_enable_key); - static_branch_disable(&rdt_enable_key); + static_branch_disable_cpuslocked(&rdt_alloc_enable_key); + static_branch_disable_cpuslocked(&rdt_mon_enable_key); + static_branch_disable_cpuslocked(&rdt_enable_key); kernfs_kill_sb(sb); mutex_unlock(&rdtgroup_mutex); + cpus_read_unlock(); } static struct file_system_type rdt_fs_type = { @@ -1524,8 +1595,10 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, int ret; prdtgrp = rdtgroup_kn_lock_live(prgrp_kn); + rdt_last_cmd_clear(); if (!prdtgrp) { ret = -ENODEV; + rdt_last_cmd_puts("directory was removed\n"); goto out_unlock; } @@ -1533,6 +1606,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL); if (!rdtgrp) { ret = -ENOSPC; + rdt_last_cmd_puts("kernel out of memory\n"); goto out_unlock; } *r = rdtgrp; @@ -1544,6 +1618,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, kn = kernfs_create_dir(parent_kn, name, mode, rdtgrp); if (IS_ERR(kn)) { ret = PTR_ERR(kn); + rdt_last_cmd_puts("kernfs create error\n"); goto out_free_rgrp; } rdtgrp->kn = kn; @@ -1557,24 +1632,31 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, kernfs_get(kn); ret = rdtgroup_kn_set_ugid(kn); - if (ret) + if (ret) { + rdt_last_cmd_puts("kernfs perm error\n"); goto out_destroy; + } - files = RFTYPE_BASE | RFTYPE_CTRL; files = RFTYPE_BASE | BIT(RF_CTRLSHIFT + rtype); ret = rdtgroup_add_files(kn, files); - if (ret) + if (ret) { + rdt_last_cmd_puts("kernfs fill error\n"); goto out_destroy; + } if (rdt_mon_capable) { ret = alloc_rmid(); - if (ret < 0) + if (ret < 0) { + rdt_last_cmd_puts("out of RMIDs\n"); goto out_destroy; + } rdtgrp->mon.rmid = ret; ret = mkdir_mondata_all(kn, rdtgrp, &rdtgrp->mon.mon_data_kn); - if (ret) + if (ret) { + rdt_last_cmd_puts("kernfs subdir error\n"); goto out_idfree; + } } kernfs_activate(kn); @@ -1652,8 +1734,10 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, kn = rdtgrp->kn; ret = closid_alloc(); - if (ret < 0) + if (ret < 0) { + rdt_last_cmd_puts("out of CLOSIDs\n"); goto out_common_fail; + } closid = ret; rdtgrp->closid = closid; @@ -1665,8 +1749,10 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, * of tasks and cpus to monitor. */ ret = mongroup_create_dir(kn, NULL, "mon_groups", NULL); - if (ret) + if (ret) { + rdt_last_cmd_puts("kernfs subdir error\n"); goto out_id_free; + } } goto out_unlock; @@ -1902,6 +1988,9 @@ int __init rdtgroup_init(void) { int ret = 0; + seq_buf_init(&last_cmd_status, last_cmd_status_buf, + sizeof(last_cmd_status_buf)); + ret = rdtgroup_setup_root(); if (ret) return ret; diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index 87cc9ab7a13c..4ca632a06e0b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c @@ -204,7 +204,7 @@ static int error_context(struct mce *m) return IN_KERNEL; } -static int mce_severity_amd_smca(struct mce *m, int err_ctx) +static int mce_severity_amd_smca(struct mce *m, enum context err_ctx) { u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank); u32 low, high; @@ -245,6 +245,9 @@ static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_exc if (m->status & MCI_STATUS_UC) { + if (ctx == IN_KERNEL) + return MCE_PANIC_SEVERITY; + /* * On older systems where overflow_recov flag is not present, we * should simply panic if an error overflow occurs. If @@ -255,10 +258,6 @@ static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_exc if (mce_flags.smca) return mce_severity_amd_smca(m, ctx); - /* software can try to contain */ - if (!(m->mcgstatus & MCG_STATUS_RIPV) && (ctx == IN_KERNEL)) - return MCE_PANIC_SEVERITY; - /* kill current process */ return MCE_AR_SEVERITY; } else { diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 3b413065c613..b1d616d08eee 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1367,13 +1367,12 @@ static void __start_timer(struct timer_list *t, unsigned long interval) local_irq_restore(flags); } -static void mce_timer_fn(unsigned long data) +static void mce_timer_fn(struct timer_list *t) { - struct timer_list *t = this_cpu_ptr(&mce_timer); - int cpu = smp_processor_id(); + struct timer_list *cpu_t = this_cpu_ptr(&mce_timer); unsigned long iv; - WARN_ON(cpu != data); + WARN_ON(cpu_t != t); iv = __this_cpu_read(mce_next_interval); @@ -1763,17 +1762,15 @@ static void mce_start_timer(struct timer_list *t) static void __mcheck_cpu_setup_timer(void) { struct timer_list *t = this_cpu_ptr(&mce_timer); - unsigned int cpu = smp_processor_id(); - setup_pinned_timer(t, mce_timer_fn, cpu); + timer_setup(t, mce_timer_fn, TIMER_PINNED); } static void __mcheck_cpu_init_timer(void) { struct timer_list *t = this_cpu_ptr(&mce_timer); - unsigned int cpu = smp_processor_id(); - setup_pinned_timer(t, mce_timer_fn, cpu); + timer_setup(t, mce_timer_fn, TIMER_PINNED); mce_start_timer(t); } diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 236324e83a3a..85eb5fc180c8 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -254,9 +254,9 @@ static void __init ms_hyperv_init_platform(void) #endif } -const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { +const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = { .name = "Microsoft Hyper-V", .detect = ms_hyperv_platform, - .init_platform = ms_hyperv_init_platform, + .type = X86_HYPER_MS_HYPERV, + .init.init_platform = ms_hyperv_init_platform, }; -EXPORT_SYMBOL(x86_hyper_ms_hyperv); diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 4378a729b933..6b7e17bf0b71 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -78,11 +78,9 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "microcode\t: 0x%x\n", c->microcode); if (cpu_has(c, X86_FEATURE_TSC)) { - unsigned int freq = arch_freq_get_on_cpu(cpu); + unsigned int freq = cpufreq_quick_get(cpu); if (!freq) - freq = cpufreq_quick_get(cpu); - if (!freq) freq = cpu_khz; seq_printf(m, "cpu MHz\t\t: %u.%03u\n", freq / 1000, (freq % 1000)); diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 40ed26852ebd..8e005329648b 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -205,10 +205,10 @@ static bool __init vmware_legacy_x2apic_available(void) (eax & (1 << VMWARE_PORT_CMD_LEGACY_X2APIC)) != 0; } -const __refconst struct hypervisor_x86 x86_hyper_vmware = { +const __initconst struct hypervisor_x86 x86_hyper_vmware = { .name = "VMware", .detect = vmware_platform, - .init_platform = vmware_platform_setup, - .x2apic_available = vmware_legacy_x2apic_available, + .type = X86_HYPER_VMWARE, + .init.init_platform = vmware_platform_setup, + .init.x2apic_available = vmware_legacy_x2apic_available, }; -EXPORT_SYMBOL(x86_hyper_vmware); diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 44404e2307bb..10e74d4778a1 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -209,7 +209,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs) } #ifdef CONFIG_KEXEC_FILE -static int get_nr_ram_ranges_callback(u64 start, u64 end, void *arg) +static int get_nr_ram_ranges_callback(struct resource *res, void *arg) { unsigned int *nr_ranges = arg; @@ -342,7 +342,7 @@ static int elf_header_exclude_ranges(struct crash_elf_data *ced, return ret; } -static int prepare_elf64_ram_headers_callback(u64 start, u64 end, void *arg) +static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg) { struct crash_elf_data *ced = arg; Elf64_Ehdr *ehdr; @@ -355,7 +355,7 @@ static int prepare_elf64_ram_headers_callback(u64 start, u64 end, void *arg) ehdr = ced->ehdr; /* Exclude unwanted mem ranges */ - ret = elf_header_exclude_ranges(ced, start, end); + ret = elf_header_exclude_ranges(ced, res->start, res->end); if (ret) return ret; @@ -518,14 +518,14 @@ static int add_e820_entry(struct boot_params *params, struct e820_entry *entry) return 0; } -static int memmap_entry_callback(u64 start, u64 end, void *arg) +static int memmap_entry_callback(struct resource *res, void *arg) { struct crash_memmap_data *cmd = arg; struct boot_params *params = cmd->params; struct e820_entry ei; - ei.addr = start; - ei.size = end - start + 1; + ei.addr = res->start; + ei.size = resource_size(res); ei.type = cmd->type; add_e820_entry(params, &ei); @@ -619,12 +619,12 @@ out: return ret; } -static int determine_backup_region(u64 start, u64 end, void *arg) +static int determine_backup_region(struct resource *res, void *arg) { struct kimage *image = arg; - image->arch.backup_src_start = start; - image->arch.backup_src_sz = end - start + 1; + image->arch.backup_src_start = res->start; + image->arch.backup_src_sz = resource_size(res); /* Expecting only one range for backup region */ return 1; diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c index 9c4e7ba6870c..7d7715dde901 100644 --- a/arch/x86/kernel/espfix_64.c +++ b/arch/x86/kernel/espfix_64.c @@ -155,14 +155,14 @@ void init_espfix_ap(int cpu) page = cpu/ESPFIX_STACKS_PER_PAGE; /* Did another CPU already set this up? */ - stack_page = ACCESS_ONCE(espfix_pages[page]); + stack_page = READ_ONCE(espfix_pages[page]); if (likely(stack_page)) goto done; mutex_lock(&espfix_init_mutex); /* Did we race on the lock? */ - stack_page = ACCESS_ONCE(espfix_pages[page]); + stack_page = READ_ONCE(espfix_pages[page]); if (stack_page) goto unlock_done; @@ -200,7 +200,7 @@ void init_espfix_ap(int cpu) set_pte(&pte_p[n*PTE_STRIDE], pte); /* Job is done for this CPU and any CPU which shares this page */ - ACCESS_ONCE(espfix_pages[page]) = stack_page; + WRITE_ONCE(espfix_pages[page], stack_page); unlock_done: mutex_unlock(&espfix_init_mutex); diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 7affb7e3d9a5..6abd83572b01 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -249,6 +249,10 @@ static void __init fpu__init_system_ctx_switch(void) */ static void __init fpu__init_parse_early_param(void) { + char arg[32]; + char *argptr = arg; + int bit; + if (cmdline_find_option_bool(boot_command_line, "no387")) setup_clear_cpu_cap(X86_FEATURE_FPU); @@ -266,6 +270,13 @@ static void __init fpu__init_parse_early_param(void) if (cmdline_find_option_bool(boot_command_line, "noxsaves")) setup_clear_cpu_cap(X86_FEATURE_XSAVES); + + if (cmdline_find_option(boot_command_line, "clearcpuid", arg, + sizeof(arg)) && + get_option(&argptr, &bit) && + bit >= 0 && + bit < NCAPINTS * 32) + setup_clear_cpu_cap(bit); } /* diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index f1d5476c9022..87a57b7642d3 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -15,6 +15,7 @@ #include <asm/fpu/xstate.h> #include <asm/tlbflush.h> +#include <asm/cpufeature.h> /* * Although we spell it out in here, the Processor Trace @@ -36,6 +37,19 @@ static const char *xfeature_names[] = "unknown xstate feature" , }; +static short xsave_cpuid_features[] __initdata = { + X86_FEATURE_FPU, + X86_FEATURE_XMM, + X86_FEATURE_AVX, + X86_FEATURE_MPX, + X86_FEATURE_MPX, + X86_FEATURE_AVX512F, + X86_FEATURE_AVX512F, + X86_FEATURE_AVX512F, + X86_FEATURE_INTEL_PT, + X86_FEATURE_PKU, +}; + /* * Mask of xstate features supported by the CPU and the kernel: */ @@ -59,26 +73,6 @@ unsigned int fpu_user_xstate_size; void fpu__xstate_clear_all_cpu_caps(void) { setup_clear_cpu_cap(X86_FEATURE_XSAVE); - setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); - setup_clear_cpu_cap(X86_FEATURE_XSAVEC); - setup_clear_cpu_cap(X86_FEATURE_XSAVES); - setup_clear_cpu_cap(X86_FEATURE_AVX); - setup_clear_cpu_cap(X86_FEATURE_AVX2); - setup_clear_cpu_cap(X86_FEATURE_AVX512F); - setup_clear_cpu_cap(X86_FEATURE_AVX512IFMA); - setup_clear_cpu_cap(X86_FEATURE_AVX512PF); - setup_clear_cpu_cap(X86_FEATURE_AVX512ER); - setup_clear_cpu_cap(X86_FEATURE_AVX512CD); - setup_clear_cpu_cap(X86_FEATURE_AVX512DQ); - setup_clear_cpu_cap(X86_FEATURE_AVX512BW); - setup_clear_cpu_cap(X86_FEATURE_AVX512VL); - setup_clear_cpu_cap(X86_FEATURE_MPX); - setup_clear_cpu_cap(X86_FEATURE_XGETBV1); - setup_clear_cpu_cap(X86_FEATURE_AVX512VBMI); - setup_clear_cpu_cap(X86_FEATURE_PKU); - setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW); - setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS); - setup_clear_cpu_cap(X86_FEATURE_AVX512_VPOPCNTDQ); } /* @@ -726,6 +720,7 @@ void __init fpu__init_system_xstate(void) unsigned int eax, ebx, ecx, edx; static int on_boot_cpu __initdata = 1; int err; + int i; WARN_ON_FPU(!on_boot_cpu); on_boot_cpu = 0; @@ -759,6 +754,14 @@ void __init fpu__init_system_xstate(void) goto out_disable; } + /* + * Clear XSAVE features that are disabled in the normal CPUID. + */ + for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) { + if (!boot_cpu_has(xsave_cpuid_features[i])) + xfeatures_mask &= ~BIT(i); + } + xfeatures_mask &= fpu__get_supported_xfeatures_mask(); /* Enable xstate instructions to be able to continue with initialization: */ diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index f1d528bb66a6..c29020907886 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -212,9 +212,6 @@ ENTRY(startup_32_smp) #endif .Ldefault_entry: -#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \ - X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \ - X86_CR0_PG) movl $(CR0_STATE & ~X86_CR0_PG),%eax movl %eax,%cr0 @@ -402,7 +399,7 @@ ENTRY(early_idt_handler_array) # 24(%rsp) error code i = 0 .rept NUM_EXCEPTION_VECTORS - .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1 + .if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0 pushl $0 # Dummy error code, to make stack frame uniform .endif pushl $i # 20(%esp) Vector number diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 6dde3f3fc1f8..7dca675fe78d 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -38,11 +38,12 @@ * */ -#define p4d_index(x) (((x) >> P4D_SHIFT) & (PTRS_PER_P4D-1)) #define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) +#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH) PGD_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE) PGD_START_KERNEL = pgd_index(__START_KERNEL_map) +#endif L3_START_KERNEL = pud_index(__START_KERNEL_map) .text @@ -50,6 +51,7 @@ L3_START_KERNEL = pud_index(__START_KERNEL_map) .code64 .globl startup_64 startup_64: + UNWIND_HINT_EMPTY /* * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0, * and someone has loaded an identity mapped page table @@ -89,6 +91,7 @@ startup_64: addq $(early_top_pgt - __START_KERNEL_map), %rax jmp 1f ENTRY(secondary_startup_64) + UNWIND_HINT_EMPTY /* * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0, * and someone has loaded a mapped page table. @@ -133,6 +136,7 @@ ENTRY(secondary_startup_64) movq $1f, %rax jmp *%rax 1: + UNWIND_HINT_EMPTY /* Check if nx is implemented */ movl $0x80000001, %eax @@ -150,9 +154,6 @@ ENTRY(secondary_startup_64) 1: wrmsr /* Make changes effective */ /* Setup cr0 */ -#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \ - X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \ - X86_CR0_PG) movl $CR0_STATE, %eax /* Make changes effective */ movq %rax, %cr0 @@ -235,7 +236,7 @@ ENTRY(secondary_startup_64) pushq %rax # target address in negative space lretq .Lafter_lret: -ENDPROC(secondary_startup_64) +END(secondary_startup_64) #include "verify_cpu.S" @@ -247,6 +248,7 @@ ENDPROC(secondary_startup_64) */ ENTRY(start_cpu0) movq initial_stack(%rip), %rsp + UNWIND_HINT_EMPTY jmp .Ljump_to_C_code ENDPROC(start_cpu0) #endif @@ -266,26 +268,24 @@ ENDPROC(start_cpu0) .quad init_thread_union + THREAD_SIZE - SIZEOF_PTREGS __FINITDATA -bad_address: - jmp bad_address - __INIT ENTRY(early_idt_handler_array) - # 104(%rsp) %rflags - # 96(%rsp) %cs - # 88(%rsp) %rip - # 80(%rsp) error code i = 0 .rept NUM_EXCEPTION_VECTORS - .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1 - pushq $0 # Dummy error code, to make stack frame uniform + .if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0 + UNWIND_HINT_IRET_REGS + pushq $0 # Dummy error code, to make stack frame uniform + .else + UNWIND_HINT_IRET_REGS offset=8 .endif pushq $i # 72(%rsp) Vector number jmp early_idt_handler_common + UNWIND_HINT_IRET_REGS i = i + 1 .fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc .endr -ENDPROC(early_idt_handler_array) + UNWIND_HINT_IRET_REGS offset=16 +END(early_idt_handler_array) early_idt_handler_common: /* @@ -313,6 +313,7 @@ early_idt_handler_common: pushq %r13 /* pt_regs->r13 */ pushq %r14 /* pt_regs->r14 */ pushq %r15 /* pt_regs->r15 */ + UNWIND_HINT_REGS cmpq $14,%rsi /* Page fault? */ jnz 10f @@ -327,8 +328,8 @@ early_idt_handler_common: 20: decl early_recursion_flag(%rip) - jmp restore_regs_and_iret -ENDPROC(early_idt_handler_common) + jmp restore_regs_and_return_to_kernel +END(early_idt_handler_common) __INITDATA @@ -362,10 +363,7 @@ NEXT_PAGE(early_dynamic_pgts) .data -#ifndef CONFIG_XEN -NEXT_PAGE(init_top_pgt) - .fill 512,8,0 -#else +#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH) NEXT_PAGE(init_top_pgt) .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC .org init_top_pgt + PGD_PAGE_OFFSET*8, 0 @@ -382,6 +380,9 @@ NEXT_PAGE(level2_ident_pgt) * Don't set NX because code runs from these pages. */ PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) +#else +NEXT_PAGE(init_top_pgt) + .fill 512,8,0 #endif #ifdef CONFIG_X86_5LEVEL @@ -435,7 +436,7 @@ ENTRY(phys_base) EXPORT_SYMBOL(phys_base) #include "../../x86/xen/xen-head.S" - + __PAGE_ALIGNED_BSS NEXT_PAGE(empty_zero_page) .skip PAGE_SIZE diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 8f5cb2c7060c..86c4439f9d74 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -114,6 +114,7 @@ static void make_8259A_irq(unsigned int irq) io_apic_irqs &= ~(1<<irq); irq_set_chip_and_handler(irq, &i8259A_chip, handle_level_irq); enable_irq(irq); + lapic_assign_legacy_vector(irq, true); } /* diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 6107ee1cb8d5..d985cef3984f 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -92,8 +92,6 @@ static const __initdata struct idt_data def_idts[] = { INTG(X86_TRAP_DF, double_fault), #endif INTG(X86_TRAP_DB, debug), - INTG(X86_TRAP_NMI, nmi), - INTG(X86_TRAP_BP, int3), #ifdef CONFIG_X86_MCE INTG(X86_TRAP_MC, &machine_check), @@ -225,7 +223,7 @@ idt_setup_from_table(gate_desc *idt, const struct idt_data *t, int size, bool sy idt_init_desc(&desc, t); write_idt_entry(idt, t->vector, &desc); if (sys) - set_bit(t->vector, used_vectors); + set_bit(t->vector, system_vectors); } } @@ -313,14 +311,14 @@ void __init idt_setup_apic_and_irq_gates(void) idt_setup_from_table(idt_table, apic_idts, ARRAY_SIZE(apic_idts), true); - for_each_clear_bit_from(i, used_vectors, FIRST_SYSTEM_VECTOR) { + for_each_clear_bit_from(i, system_vectors, FIRST_SYSTEM_VECTOR) { entry = irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR); set_intr_gate(i, entry); } - for_each_clear_bit_from(i, used_vectors, NR_VECTORS) { + for_each_clear_bit_from(i, system_vectors, NR_VECTORS) { #ifdef CONFIG_X86_LOCAL_APIC - set_bit(i, used_vectors); + set_bit(i, system_vectors); set_intr_gate(i, spurious_interrupt); #else entry = irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR); @@ -358,7 +356,7 @@ void idt_invalidate(void *addr) void __init update_intr_gate(unsigned int n, const void *addr) { - if (WARN_ON_ONCE(!test_bit(n, used_vectors))) + if (WARN_ON_ONCE(!test_bit(n, system_vectors))) return; set_intr_gate(n, addr); } @@ -366,6 +364,6 @@ void __init update_intr_gate(unsigned int n, const void *addr) void alloc_intr_gate(unsigned int n, const void *addr) { BUG_ON(n < FIRST_SYSTEM_VECTOR); - if (!test_and_set_bit(n, used_vectors)) + if (!test_and_set_bit(n, system_vectors)) set_intr_gate(n, addr); } diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 52089c043160..49cfd9fe7589 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -134,7 +134,7 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_puts(p, " Machine check polls\n"); #endif #if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN) - if (test_bit(HYPERVISOR_CALLBACK_VECTOR, used_vectors)) { + if (test_bit(HYPERVISOR_CALLBACK_VECTOR, system_vectors)) { seq_printf(p, "%*s: ", prec, "HYP"); for_each_online_cpu(j) seq_printf(p, "%10u ", @@ -333,105 +333,6 @@ __visible void smp_kvm_posted_intr_nested_ipi(struct pt_regs *regs) #ifdef CONFIG_HOTPLUG_CPU - -/* These two declarations are only used in check_irq_vectors_for_cpu_disable() - * below, which is protected by stop_machine(). Putting them on the stack - * results in a stack frame overflow. Dynamically allocating could result in a - * failure so declare these two cpumasks as global. - */ -static struct cpumask affinity_new, online_new; - -/* - * This cpu is going to be removed and its vectors migrated to the remaining - * online cpus. Check to see if there are enough vectors in the remaining cpus. - * This function is protected by stop_machine(). - */ -int check_irq_vectors_for_cpu_disable(void) -{ - unsigned int this_cpu, vector, this_count, count; - struct irq_desc *desc; - struct irq_data *data; - int cpu; - - this_cpu = smp_processor_id(); - cpumask_copy(&online_new, cpu_online_mask); - cpumask_clear_cpu(this_cpu, &online_new); - - this_count = 0; - for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { - desc = __this_cpu_read(vector_irq[vector]); - if (IS_ERR_OR_NULL(desc)) - continue; - /* - * Protect against concurrent action removal, affinity - * changes etc. - */ - raw_spin_lock(&desc->lock); - data = irq_desc_get_irq_data(desc); - cpumask_copy(&affinity_new, - irq_data_get_affinity_mask(data)); - cpumask_clear_cpu(this_cpu, &affinity_new); - - /* Do not count inactive or per-cpu irqs. */ - if (!irq_desc_has_action(desc) || irqd_is_per_cpu(data)) { - raw_spin_unlock(&desc->lock); - continue; - } - - raw_spin_unlock(&desc->lock); - /* - * A single irq may be mapped to multiple cpu's - * vector_irq[] (for example IOAPIC cluster mode). In - * this case we have two possibilities: - * - * 1) the resulting affinity mask is empty; that is - * this the down'd cpu is the last cpu in the irq's - * affinity mask, or - * - * 2) the resulting affinity mask is no longer a - * subset of the online cpus but the affinity mask is - * not zero; that is the down'd cpu is the last online - * cpu in a user set affinity mask. - */ - if (cpumask_empty(&affinity_new) || - !cpumask_subset(&affinity_new, &online_new)) - this_count++; - } - /* No need to check any further. */ - if (!this_count) - return 0; - - count = 0; - for_each_online_cpu(cpu) { - if (cpu == this_cpu) - continue; - /* - * We scan from FIRST_EXTERNAL_VECTOR to first system - * vector. If the vector is marked in the used vectors - * bitmap or an irq is assigned to it, we don't count - * it as available. - * - * As this is an inaccurate snapshot anyway, we can do - * this w/o holding vector_lock. - */ - for (vector = FIRST_EXTERNAL_VECTOR; - vector < FIRST_SYSTEM_VECTOR; vector++) { - if (!test_bit(vector, used_vectors) && - IS_ERR_OR_NULL(per_cpu(vector_irq, cpu)[vector])) { - if (++count == this_count) - return 0; - } - } - } - - if (count < this_count) { - pr_warn("CPU %d disable failed: CPU has %u vectors assigned and there are only %u available.\n", - this_cpu, this_count, count); - return -ERANGE; - } - return 0; -} - /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ void fixup_irqs(void) { diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 1e4094eba15e..8da3e909e967 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -61,9 +61,6 @@ void __init init_ISA_irqs(void) struct irq_chip *chip = legacy_pic->chip; int i; -#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) - init_bsp_APIC(); -#endif legacy_pic->init(0); for (i = 0; i < nr_legacy_irqs(); i++) @@ -94,6 +91,7 @@ void __init native_init_IRQ(void) x86_init.irqs.pre_vector_init(); idt_setup_apic_and_irq_gates(); + lapic_assign_system_vectors(); if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs()) setup_irq(2, &irq2); diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h index 615105cf7d58..ae38dccf0c8f 100644 --- a/arch/x86/kernel/kprobes/common.h +++ b/arch/x86/kernel/kprobes/common.h @@ -85,11 +85,11 @@ extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf, * Copy an instruction and adjust the displacement if the instruction * uses the %rip-relative addressing mode. */ -extern int __copy_instruction(u8 *dest, u8 *src, struct insn *insn); +extern int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn); /* Generate a relative-jump/call instruction */ -extern void synthesize_reljump(void *from, void *to); -extern void synthesize_relcall(void *from, void *to); +extern void synthesize_reljump(void *dest, void *from, void *to); +extern void synthesize_relcall(void *dest, void *from, void *to); #ifdef CONFIG_OPTPROBES extern int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter); diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 0742491cbb73..bd36f3c33cd0 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -119,29 +119,29 @@ struct kretprobe_blackpoint kretprobe_blacklist[] = { const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); static nokprobe_inline void -__synthesize_relative_insn(void *from, void *to, u8 op) +__synthesize_relative_insn(void *dest, void *from, void *to, u8 op) { struct __arch_relative_insn { u8 op; s32 raddr; } __packed *insn; - insn = (struct __arch_relative_insn *)from; + insn = (struct __arch_relative_insn *)dest; insn->raddr = (s32)((long)(to) - ((long)(from) + 5)); insn->op = op; } /* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ -void synthesize_reljump(void *from, void *to) +void synthesize_reljump(void *dest, void *from, void *to) { - __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE); + __synthesize_relative_insn(dest, from, to, RELATIVEJUMP_OPCODE); } NOKPROBE_SYMBOL(synthesize_reljump); /* Insert a call instruction at address 'from', which calls address 'to'.*/ -void synthesize_relcall(void *from, void *to) +void synthesize_relcall(void *dest, void *from, void *to) { - __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE); + __synthesize_relative_insn(dest, from, to, RELATIVECALL_OPCODE); } NOKPROBE_SYMBOL(synthesize_relcall); @@ -346,10 +346,11 @@ static int is_IF_modifier(kprobe_opcode_t *insn) /* * Copy an instruction with recovering modified instruction by kprobes * and adjust the displacement if the instruction uses the %rip-relative - * addressing mode. + * addressing mode. Note that since @real will be the final place of copied + * instruction, displacement must be adjust by @real, not @dest. * This returns the length of copied instruction, or 0 if it has an error. */ -int __copy_instruction(u8 *dest, u8 *src, struct insn *insn) +int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn) { kprobe_opcode_t buf[MAX_INSN_SIZE]; unsigned long recovered_insn = @@ -387,11 +388,11 @@ int __copy_instruction(u8 *dest, u8 *src, struct insn *insn) * have given. */ newdisp = (u8 *) src + (s64) insn->displacement.value - - (u8 *) dest; + - (u8 *) real; if ((s64) (s32) newdisp != newdisp) { pr_err("Kprobes error: new displacement does not fit into s32 (%llx)\n", newdisp); pr_err("\tSrc: %p, Dest: %p, old disp: %x\n", - src, dest, insn->displacement.value); + src, real, insn->displacement.value); return 0; } disp = (u8 *) dest + insn_offset_displacement(insn); @@ -402,20 +403,38 @@ int __copy_instruction(u8 *dest, u8 *src, struct insn *insn) } /* Prepare reljump right after instruction to boost */ -static void prepare_boost(struct kprobe *p, struct insn *insn) +static int prepare_boost(kprobe_opcode_t *buf, struct kprobe *p, + struct insn *insn) { + int len = insn->length; + if (can_boost(insn, p->addr) && - MAX_INSN_SIZE - insn->length >= RELATIVEJUMP_SIZE) { + MAX_INSN_SIZE - len >= RELATIVEJUMP_SIZE) { /* * These instructions can be executed directly if it * jumps back to correct address. */ - synthesize_reljump(p->ainsn.insn + insn->length, + synthesize_reljump(buf + len, p->ainsn.insn + len, p->addr + insn->length); + len += RELATIVEJUMP_SIZE; p->ainsn.boostable = true; } else { p->ainsn.boostable = false; } + + return len; +} + +/* Make page to RO mode when allocate it */ +void *alloc_insn_page(void) +{ + void *page; + + page = module_alloc(PAGE_SIZE); + if (page) + set_memory_ro((unsigned long)page & PAGE_MASK, 1); + + return page; } /* Recover page to RW mode before releasing it */ @@ -429,12 +448,11 @@ void free_insn_page(void *page) static int arch_copy_kprobe(struct kprobe *p) { struct insn insn; + kprobe_opcode_t buf[MAX_INSN_SIZE]; int len; - set_memory_rw((unsigned long)p->ainsn.insn & PAGE_MASK, 1); - /* Copy an instruction with recovering if other optprobe modifies it.*/ - len = __copy_instruction(p->ainsn.insn, p->addr, &insn); + len = __copy_instruction(buf, p->addr, p->ainsn.insn, &insn); if (!len) return -EINVAL; @@ -442,15 +460,16 @@ static int arch_copy_kprobe(struct kprobe *p) * __copy_instruction can modify the displacement of the instruction, * but it doesn't affect boostable check. */ - prepare_boost(p, &insn); - - set_memory_ro((unsigned long)p->ainsn.insn & PAGE_MASK, 1); + len = prepare_boost(buf, p, &insn); /* Check whether the instruction modifies Interrupt Flag or not */ - p->ainsn.if_modifier = is_IF_modifier(p->ainsn.insn); + p->ainsn.if_modifier = is_IF_modifier(buf); /* Also, displacement change doesn't affect the first byte */ - p->opcode = p->ainsn.insn[0]; + p->opcode = buf[0]; + + /* OK, write back the instruction(s) into ROX insn buffer */ + text_poke(p->ainsn.insn, buf, len); return 0; } diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c index 041f7b6dfa0f..8dc0161cec8f 100644 --- a/arch/x86/kernel/kprobes/ftrace.c +++ b/arch/x86/kernel/kprobes/ftrace.c @@ -26,7 +26,7 @@ #include "common.h" static nokprobe_inline -int __skip_singlestep(struct kprobe *p, struct pt_regs *regs, +void __skip_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb, unsigned long orig_ip) { /* @@ -41,33 +41,31 @@ int __skip_singlestep(struct kprobe *p, struct pt_regs *regs, __this_cpu_write(current_kprobe, NULL); if (orig_ip) regs->ip = orig_ip; - return 1; } int skip_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { - if (kprobe_ftrace(p)) - return __skip_singlestep(p, regs, kcb, 0); - else - return 0; + if (kprobe_ftrace(p)) { + __skip_singlestep(p, regs, kcb, 0); + preempt_enable_no_resched(); + return 1; + } + return 0; } NOKPROBE_SYMBOL(skip_singlestep); -/* Ftrace callback handler for kprobes */ +/* Ftrace callback handler for kprobes -- called under preepmt disabed */ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ops, struct pt_regs *regs) { struct kprobe *p; struct kprobe_ctlblk *kcb; - unsigned long flags; - - /* Disable irq for emulating a breakpoint and avoiding preempt */ - local_irq_save(flags); + /* Preempt is disabled by ftrace */ p = get_kprobe((kprobe_opcode_t *)ip); if (unlikely(!p) || kprobe_disabled(p)) - goto end; + return; kcb = get_kprobe_ctlblk(); if (kprobe_running()) { @@ -77,17 +75,19 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, /* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */ regs->ip = ip + sizeof(kprobe_opcode_t); + /* To emulate trap based kprobes, preempt_disable here */ + preempt_disable(); __this_cpu_write(current_kprobe, p); kcb->kprobe_status = KPROBE_HIT_ACTIVE; - if (!p->pre_handler || !p->pre_handler(p, regs)) + if (!p->pre_handler || !p->pre_handler(p, regs)) { __skip_singlestep(p, regs, kcb, orig_ip); + preempt_enable_no_resched(); + } /* * If pre_handler returns !0, it sets regs->ip and - * resets current kprobe. + * resets current kprobe, and keep preempt count +1. */ } -end: - local_irq_restore(flags); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 4f98aad38237..e941136e24d8 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -142,11 +142,11 @@ void optprobe_template_func(void); STACK_FRAME_NON_STANDARD(optprobe_template_func); #define TMPL_MOVE_IDX \ - ((long)&optprobe_template_val - (long)&optprobe_template_entry) + ((long)optprobe_template_val - (long)optprobe_template_entry) #define TMPL_CALL_IDX \ - ((long)&optprobe_template_call - (long)&optprobe_template_entry) + ((long)optprobe_template_call - (long)optprobe_template_entry) #define TMPL_END_IDX \ - ((long)&optprobe_template_end - (long)&optprobe_template_entry) + ((long)optprobe_template_end - (long)optprobe_template_entry) #define INT3_SIZE sizeof(kprobe_opcode_t) @@ -154,17 +154,15 @@ STACK_FRAME_NON_STANDARD(optprobe_template_func); static void optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) { - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - unsigned long flags; - /* This is possible if op is under delayed unoptimizing */ if (kprobe_disabled(&op->kp)) return; - local_irq_save(flags); + preempt_disable(); if (kprobe_running()) { kprobes_inc_nmissed_count(&op->kp); } else { + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); /* Save skipped registers */ #ifdef CONFIG_X86_64 regs->cs = __KERNEL_CS; @@ -180,17 +178,17 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) opt_pre_handler(&op->kp, regs); __this_cpu_write(current_kprobe, NULL); } - local_irq_restore(flags); + preempt_enable_no_resched(); } NOKPROBE_SYMBOL(optimized_callback); -static int copy_optimized_instructions(u8 *dest, u8 *src) +static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real) { struct insn insn; int len = 0, ret; while (len < RELATIVEJUMP_SIZE) { - ret = __copy_instruction(dest + len, src + len, &insn); + ret = __copy_instruction(dest + len, src + len, real, &insn); if (!ret || !can_boost(&insn, src + len)) return -EINVAL; len += ret; @@ -343,57 +341,66 @@ void arch_remove_optimized_kprobe(struct optimized_kprobe *op) int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *__unused) { - u8 *buf; - int ret; + u8 *buf = NULL, *slot; + int ret, len; long rel; if (!can_optimize((unsigned long)op->kp.addr)) return -EILSEQ; - op->optinsn.insn = get_optinsn_slot(); - if (!op->optinsn.insn) + buf = kzalloc(MAX_OPTINSN_SIZE, GFP_KERNEL); + if (!buf) return -ENOMEM; + op->optinsn.insn = slot = get_optinsn_slot(); + if (!slot) { + ret = -ENOMEM; + goto out; + } + /* * Verify if the address gap is in 2GB range, because this uses * a relative jump. */ - rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE; + rel = (long)slot - (long)op->kp.addr + RELATIVEJUMP_SIZE; if (abs(rel) > 0x7fffffff) { - __arch_remove_optimized_kprobe(op, 0); - return -ERANGE; + ret = -ERANGE; + goto err; } - buf = (u8 *)op->optinsn.insn; - set_memory_rw((unsigned long)buf & PAGE_MASK, 1); + /* Copy arch-dep-instance from template */ + memcpy(buf, optprobe_template_entry, TMPL_END_IDX); /* Copy instructions into the out-of-line buffer */ - ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr); - if (ret < 0) { - __arch_remove_optimized_kprobe(op, 0); - return ret; - } + ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr, + slot + TMPL_END_IDX); + if (ret < 0) + goto err; op->optinsn.size = ret; - - /* Copy arch-dep-instance from template */ - memcpy(buf, &optprobe_template_entry, TMPL_END_IDX); + len = TMPL_END_IDX + op->optinsn.size; /* Set probe information */ synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op); /* Set probe function call */ - synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback); + synthesize_relcall(buf + TMPL_CALL_IDX, + slot + TMPL_CALL_IDX, optimized_callback); /* Set returning jmp instruction at the tail of out-of-line buffer */ - synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size, + synthesize_reljump(buf + len, slot + len, (u8 *)op->kp.addr + op->optinsn.size); - - set_memory_ro((unsigned long)buf & PAGE_MASK, 1); - - flush_icache_range((unsigned long) buf, - (unsigned long) buf + TMPL_END_IDX + - op->optinsn.size + RELATIVEJUMP_SIZE); - return 0; + len += RELATIVEJUMP_SIZE; + + /* We have to use text_poke for instuction buffer because it is RO */ + text_poke(slot, buf, len); + ret = 0; +out: + kfree(buf); + return ret; + +err: + __arch_remove_optimized_kprobe(op, 0); + goto out; } /* diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 8bb9594d0761..b40ffbf156c1 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -75,8 +75,8 @@ static int parse_no_kvmclock_vsyscall(char *arg) early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall); -static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64); -static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64); +static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64); +static DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64); static int has_steal_clock = 0; /* @@ -312,7 +312,7 @@ static void kvm_register_steal_time(void) cpu, (unsigned long long) slow_virt_to_phys(st)); } -static DEFINE_PER_CPU(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED; +static DEFINE_PER_CPU_DECRYPTED(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED; static notrace void kvm_guest_apic_eoi_write(u32 reg, u32 val) { @@ -426,9 +426,42 @@ void kvm_disable_steal_time(void) wrmsr(MSR_KVM_STEAL_TIME, 0, 0); } +static inline void __set_percpu_decrypted(void *ptr, unsigned long size) +{ + early_set_memory_decrypted((unsigned long) ptr, size); +} + +/* + * Iterate through all possible CPUs and map the memory region pointed + * by apf_reason, steal_time and kvm_apic_eoi as decrypted at once. + * + * Note: we iterate through all possible CPUs to ensure that CPUs + * hotplugged will have their per-cpu variable already mapped as + * decrypted. + */ +static void __init sev_map_percpu_data(void) +{ + int cpu; + + if (!sev_active()) + return; + + for_each_possible_cpu(cpu) { + __set_percpu_decrypted(&per_cpu(apf_reason, cpu), sizeof(apf_reason)); + __set_percpu_decrypted(&per_cpu(steal_time, cpu), sizeof(steal_time)); + __set_percpu_decrypted(&per_cpu(kvm_apic_eoi, cpu), sizeof(kvm_apic_eoi)); + } +} + #ifdef CONFIG_SMP static void __init kvm_smp_prepare_boot_cpu(void) { + /* + * Map the per-cpu variables as decrypted before kvm_guest_cpu_init() + * shares the guest physical address with the hypervisor. + */ + sev_map_percpu_data(); + kvm_guest_cpu_init(); native_smp_prepare_boot_cpu(); kvm_spinlock_init(); @@ -465,7 +498,7 @@ static void __init kvm_apf_trap_init(void) update_intr_gate(X86_TRAP_PF, async_page_fault); } -void __init kvm_guest_init(void) +static void __init kvm_guest_init(void) { int i; @@ -496,6 +529,7 @@ void __init kvm_guest_init(void) kvm_cpu_online, kvm_cpu_down_prepare) < 0) pr_err("kvm_guest: Failed to install cpu hotplug callbacks\n"); #else + sev_map_percpu_data(); kvm_guest_cpu_init(); #endif @@ -544,12 +578,13 @@ static uint32_t __init kvm_detect(void) return kvm_cpuid_base(); } -const struct hypervisor_x86 x86_hyper_kvm __refconst = { +const __initconst struct hypervisor_x86 x86_hyper_kvm = { .name = "KVM", .detect = kvm_detect, - .x2apic_available = kvm_para_available, + .type = X86_HYPER_KVM, + .init.guest_late_init = kvm_guest_init, + .init.x2apic_available = kvm_para_available, }; -EXPORT_SYMBOL_GPL(x86_hyper_kvm); static __init int activate_jump_labels(void) { diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 5b609e28ce3f..77b492c2d658 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -27,6 +27,7 @@ #include <linux/sched.h> #include <linux/sched/clock.h> +#include <asm/mem_encrypt.h> #include <asm/x86_init.h> #include <asm/reboot.h> #include <asm/kvmclock.h> @@ -45,7 +46,7 @@ early_param("no-kvmclock", parse_no_kvmclock); /* The hypervisor will put information about time periodically here */ static struct pvclock_vsyscall_time_info *hv_clock; -static struct pvclock_wall_clock wall_clock; +static struct pvclock_wall_clock *wall_clock; struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void) { @@ -64,15 +65,15 @@ static void kvm_get_wallclock(struct timespec *now) int low, high; int cpu; - low = (int)__pa_symbol(&wall_clock); - high = ((u64)__pa_symbol(&wall_clock) >> 32); + low = (int)slow_virt_to_phys(wall_clock); + high = ((u64)slow_virt_to_phys(wall_clock) >> 32); native_write_msr(msr_kvm_wall_clock, low, high); cpu = get_cpu(); vcpu_time = &hv_clock[cpu].pvti; - pvclock_read_wallclock(&wall_clock, vcpu_time, now); + pvclock_read_wallclock(wall_clock, vcpu_time, now); put_cpu(); } @@ -249,11 +250,39 @@ static void kvm_shutdown(void) native_machine_shutdown(); } +static phys_addr_t __init kvm_memblock_alloc(phys_addr_t size, + phys_addr_t align) +{ + phys_addr_t mem; + + mem = memblock_alloc(size, align); + if (!mem) + return 0; + + if (sev_active()) { + if (early_set_memory_decrypted((unsigned long)__va(mem), size)) + goto e_free; + } + + return mem; +e_free: + memblock_free(mem, size); + return 0; +} + +static void __init kvm_memblock_free(phys_addr_t addr, phys_addr_t size) +{ + if (sev_active()) + early_set_memory_encrypted((unsigned long)__va(addr), size); + + memblock_free(addr, size); +} + void __init kvmclock_init(void) { struct pvclock_vcpu_time_info *vcpu_time; - unsigned long mem; - int size, cpu; + unsigned long mem, mem_wall_clock; + int size, cpu, wall_clock_size; u8 flags; size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS); @@ -267,21 +296,35 @@ void __init kvmclock_init(void) } else if (!(kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE))) return; - printk(KERN_INFO "kvm-clock: Using msrs %x and %x", - msr_kvm_system_time, msr_kvm_wall_clock); + wall_clock_size = PAGE_ALIGN(sizeof(struct pvclock_wall_clock)); + mem_wall_clock = kvm_memblock_alloc(wall_clock_size, PAGE_SIZE); + if (!mem_wall_clock) + return; - mem = memblock_alloc(size, PAGE_SIZE); - if (!mem) + wall_clock = __va(mem_wall_clock); + memset(wall_clock, 0, wall_clock_size); + + mem = kvm_memblock_alloc(size, PAGE_SIZE); + if (!mem) { + kvm_memblock_free(mem_wall_clock, wall_clock_size); + wall_clock = NULL; return; + } + hv_clock = __va(mem); memset(hv_clock, 0, size); if (kvm_register_clock("primary cpu clock")) { hv_clock = NULL; - memblock_free(mem, size); + kvm_memblock_free(mem, size); + kvm_memblock_free(mem_wall_clock, wall_clock_size); + wall_clock = NULL; return; } + printk(KERN_INFO "kvm-clock: Using msrs %x and %x", + msr_kvm_system_time, msr_kvm_wall_clock); + if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT); diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 4d17bacf4030..1c1eae961340 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -13,6 +13,7 @@ #include <linux/string.h> #include <linux/mm.h> #include <linux/smp.h> +#include <linux/syscalls.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/uaccess.h> @@ -102,7 +103,7 @@ static void finalize_ldt_struct(struct ldt_struct *ldt) static void install_ldt(struct mm_struct *current_mm, struct ldt_struct *ldt) { - /* Synchronizes with lockless_dereference in load_mm_ldt. */ + /* Synchronizes with READ_ONCE in load_mm_ldt. */ smp_store_release(¤t_mm->context.ldt, ldt); /* Activate the LDT for all CPUs using current_mm. */ @@ -295,8 +296,8 @@ out: return error; } -asmlinkage int sys_modify_ldt(int func, void __user *ptr, - unsigned long bytecount) +SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr , + unsigned long , bytecount) { int ret = -ENOSYS; @@ -314,5 +315,14 @@ asmlinkage int sys_modify_ldt(int func, void __user *ptr, ret = write_ldt(ptr, bytecount, 0); break; } - return ret; + /* + * The SYSCALL_DEFINE() macros give us an 'unsigned long' + * return type, but tht ABI for sys_modify_ldt() expects + * 'int'. This cast gives us an int-sized value in %rax + * for the return code. The 'unsigned' is necessary so + * the compiler does not try to sign-extend the negative + * return codes into the high half of the register when + * taking the value from int->long. + */ + return (unsigned int)ret; } diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 35aafc95e4b8..18bc9b51ac9b 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -105,7 +105,7 @@ static void nmi_max_handler(struct irq_work *w) { struct nmiaction *a = container_of(w, struct nmiaction, irq_work); int remainder_ns, decimal_msecs; - u64 whole_msecs = ACCESS_ONCE(a->max_duration); + u64 whole_msecs = READ_ONCE(a->max_duration); remainder_ns = do_div(whole_msecs, (1000 * 1000)); decimal_msecs = remainder_ns / 1000; diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 19a3e8f961c7..041096bdef86 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -115,8 +115,18 @@ unsigned paravirt_patch_jmp(void *insnbuf, const void *target, return 5; } -/* Neat trick to map patch type back to the call within the - * corresponding structure. */ +DEFINE_STATIC_KEY_TRUE(virt_spin_lock_key); + +void __init native_pv_lock_init(void) +{ + if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) + static_branch_disable(&virt_spin_lock_key); +} + +/* + * Neat trick to map patch type back to the call within the + * corresponding structure. + */ static void *get_call_destination(u8 type) { struct paravirt_patch_template tmpl = { diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 5286a4a92cf7..35c461f21815 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -898,10 +898,9 @@ static void calioc2_dump_error_regs(struct iommu_table *tbl) PHB_ROOT_COMPLEX_STATUS); } -static void calgary_watchdog(unsigned long data) +static void calgary_watchdog(struct timer_list *t) { - struct pci_dev *dev = (struct pci_dev *)data; - struct iommu_table *tbl = pci_iommu(dev->bus); + struct iommu_table *tbl = from_timer(tbl, t, watchdog_timer); void __iomem *bbar = tbl->bbar; u32 val32; void __iomem *target; @@ -1016,8 +1015,7 @@ static void __init calgary_enable_translation(struct pci_dev *dev) writel(cpu_to_be32(val32), target); readl(target); /* flush */ - setup_timer(&tbl->watchdog_timer, &calgary_watchdog, - (unsigned long)dev); + timer_setup(&tbl->watchdog_timer, calgary_watchdog, 0); mod_timer(&tbl->watchdog_timer, jiffies); } diff --git a/arch/x86/kernel/pmem.c b/arch/x86/kernel/pmem.c index 3fe690067802..6b07faaa1579 100644 --- a/arch/x86/kernel/pmem.c +++ b/arch/x86/kernel/pmem.c @@ -7,7 +7,7 @@ #include <linux/init.h> #include <linux/ioport.h> -static int found(u64 start, u64 end, void *data) +static int found(struct resource *res, void *data) { return 1; } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c67685337c5a..97fb3e5737f5 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -49,7 +49,13 @@ */ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { .x86_tss = { - .sp0 = TOP_OF_INIT_STACK, + /* + * .sp0 is only used when entering ring 0 from a lower + * privilege level. Since the init task never runs anything + * but ring 0 code, there is no need for a valid value here. + * Poison it. + */ + .sp0 = (1UL << (BITS_PER_LONG-1)) + 1, #ifdef CONFIG_X86_32 .ss0 = __KERNEL_DS, .ss1 = __KERNEL_CS, diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 11966251cd42..45bf0c5f93e1 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -284,9 +284,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* * Reload esp0 and cpu_current_top_of_stack. This changes - * current_thread_info(). + * current_thread_info(). Refresh the SYSENTER configuration in + * case prev or next is vm86. */ - load_sp0(tss, next); + update_sp0(next_p); + refresh_sysenter_cs(next); this_cpu_write(cpu_current_top_of_stack, (unsigned long)task_stack_page(next_p) + THREAD_SIZE); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 302e7b2572d1..eeeb34f85c25 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -274,7 +274,6 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, struct inactive_task_frame *frame; struct task_struct *me = current; - p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE; childregs = task_pt_regs(p); fork_frame = container_of(childregs, struct fork_frame, regs); frame = &fork_frame->frame; @@ -464,8 +463,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ this_cpu_write(current_task, next_p); - /* Reload esp0 and ss1. This changes current_thread_info(). */ - load_sp0(tss, next); + /* Reload sp0. */ + update_sp0(next_p); /* * Now maybe reload the debug registers and handle I/O bitmaps diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 0957dd73d127..8af2e8d0c0a1 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -136,18 +136,6 @@ RESERVE_BRK(dmi_alloc, 65536); static __initdata unsigned long _brk_start = (unsigned long)__brk_base; unsigned long _brk_end = (unsigned long)__brk_base; -#ifdef CONFIG_X86_64 -int default_cpu_present_to_apicid(int mps_cpu) -{ - return __default_cpu_present_to_apicid(mps_cpu); -} - -int default_check_phys_apicid_present(int phys_apicid) -{ - return __default_check_phys_apicid_present(phys_apicid); -} -#endif - struct boot_params boot_params; /* @@ -380,9 +368,11 @@ static void __init reserve_initrd(void) * If SME is active, this memory will be marked encrypted by the * kernel when it is accessed (including relocation). However, the * ramdisk image was loaded decrypted by the bootloader, so make - * sure that it is encrypted before accessing it. + * sure that it is encrypted before accessing it. For SEV the + * ramdisk will already be encrypted, so only do this for SME. */ - sme_early_encrypt(ramdisk_image, ramdisk_end - ramdisk_image); + if (sme_active()) + sme_early_encrypt(ramdisk_image, ramdisk_end - ramdisk_image); initrd_start = 0; @@ -822,26 +812,6 @@ dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p) return 0; } -static void __init simple_udelay_calibration(void) -{ - unsigned int tsc_khz, cpu_khz; - unsigned long lpj; - - if (!boot_cpu_has(X86_FEATURE_TSC)) - return; - - cpu_khz = x86_platform.calibrate_cpu(); - tsc_khz = x86_platform.calibrate_tsc(); - - tsc_khz = tsc_khz ? : cpu_khz; - if (!tsc_khz) - return; - - lpj = tsc_khz * 1000; - do_div(lpj, HZ); - loops_per_jiffy = lpj; -} - /* * Determine if we were loaded by an EFI loader. If so, then we have also been * passed the efi memmap, systab, etc., so we should use these data structures @@ -1045,12 +1015,10 @@ void __init setup_arch(char **cmdline_p) /* * VMware detection requires dmi to be available, so this - * needs to be done after dmi_scan_machine, for the BP. + * needs to be done after dmi_scan_machine(), for the boot CPU. */ init_hypervisor_platform(); - simple_udelay_calibration(); - x86_init.resources.probe_roms(); /* after parse_early_param, so could debug it */ @@ -1135,9 +1103,6 @@ void __init setup_arch(char **cmdline_p) memblock_set_current_limit(ISA_END_ADDRESS); e820__memblock_setup(); - if (!early_xdbc_setup_hardware()) - early_xdbc_register_console(); - reserve_bios_regions(); if (efi_enabled(EFI_MEMMAP)) { @@ -1243,6 +1208,10 @@ void __init setup_arch(char **cmdline_p) kvmclock_init(); #endif + tsc_early_delay_calibrate(); + if (!early_xdbc_setup_hardware()) + early_xdbc_register_console(); + x86_init.paging.pagetable_init(); kasan_init(); @@ -1294,7 +1263,7 @@ void __init setup_arch(char **cmdline_p) io_apic_init_mappings(); - kvm_guest_init(); + x86_init.hyper.guest_late_init(); e820__reserve_resources(); e820__register_nosave_regions(max_low_pfn); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index ad59edd84de7..5f59e6bee123 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -77,6 +77,7 @@ #include <asm/i8259.h> #include <asm/realmode.h> #include <asm/misc.h> +#include <asm/qspinlock.h> /* Number of siblings per CPU package */ int smp_num_siblings = 1; @@ -194,6 +195,12 @@ static void smp_callin(void) smp_store_cpu_info(cpuid); /* + * The topology information must be up to date before + * calibrate_delay() and notify_cpu_starting(). + */ + set_cpu_sibling_map(raw_smp_processor_id()); + + /* * Get our bogomips. * Update loops_per_jiffy in cpu_data. Previous call to * smp_store_cpu_info() stored a value that is close but not as @@ -203,11 +210,6 @@ static void smp_callin(void) cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy; pr_debug("Stack at about %p\n", &cpuid); - /* - * This must be done before setting cpu_online_mask - * or calling notify_cpu_starting. - */ - set_cpu_sibling_map(raw_smp_processor_id()); wmb(); notify_cpu_starting(cpuid); @@ -249,19 +251,19 @@ static void notrace start_secondary(void *unused) /* otherwise gcc will move up smp_processor_id before the cpu_init */ barrier(); /* - * Check TSC synchronization with the BP: + * Check TSC synchronization with the boot CPU: */ check_tsc_sync_target(); /* - * Lock vector_lock and initialize the vectors on this cpu - * before setting the cpu online. We must set it online with - * vector_lock held to prevent a concurrent setup/teardown - * from seeing a half valid vector space. + * Lock vector_lock, set CPU online and bring the vector + * allocator online. Online must be set with vector_lock held + * to prevent a concurrent irq setup/teardown from seeing a + * half valid vector space. */ lock_vector_lock(); - setup_vector_irq(smp_processor_id()); set_cpu_online(smp_processor_id(), true); + lapic_online(); unlock_vector_lock(); cpu_set_state_online(smp_processor_id()); x86_platform.nmi_init(); @@ -961,8 +963,7 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle) #ifdef CONFIG_X86_32 /* Stack for startup_32 can be just as for start_secondary onwards */ irq_ctx_init(cpu); - per_cpu(cpu_current_top_of_stack, cpu) = - (unsigned long)task_stack_page(idle) + THREAD_SIZE; + per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle); #else initial_gs = per_cpu_offset(cpu); #endif @@ -1094,7 +1095,7 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle) unsigned long flags; int err, ret = 0; - WARN_ON(irqs_disabled()); + lockdep_assert_irqs_enabled(); pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu); @@ -1190,17 +1191,10 @@ static __init void disable_smp(void) cpumask_set_cpu(0, topology_core_cpumask(0)); } -enum { - SMP_OK, - SMP_NO_CONFIG, - SMP_NO_APIC, - SMP_FORCE_UP, -}; - /* * Various sanity checks. */ -static int __init smp_sanity_check(unsigned max_cpus) +static void __init smp_sanity_check(void) { preempt_disable(); @@ -1238,16 +1232,6 @@ static int __init smp_sanity_check(unsigned max_cpus) } /* - * If we couldn't find an SMP configuration at boot time, - * get out of here now! - */ - if (!smp_found_config && !acpi_lapic) { - preempt_enable(); - pr_notice("SMP motherboard not detected\n"); - return SMP_NO_CONFIG; - } - - /* * Should not be necessary because the MP table should list the boot * CPU too, but we do it for the sake of robustness anyway. */ @@ -1257,29 +1241,6 @@ static int __init smp_sanity_check(unsigned max_cpus) physid_set(hard_smp_processor_id(), phys_cpu_present_map); } preempt_enable(); - - /* - * If we couldn't find a local APIC, then get out of here now! - */ - if (APIC_INTEGRATED(boot_cpu_apic_version) && - !boot_cpu_has(X86_FEATURE_APIC)) { - if (!disable_apic) { - pr_err("BIOS bug, local APIC #%d not detected!...\n", - boot_cpu_physical_apicid); - pr_err("... forcing use of dummy APIC emulation (tell your hw vendor)\n"); - } - return SMP_NO_APIC; - } - - /* - * If SMP should be disabled, then really disable it! - */ - if (!max_cpus) { - pr_info("SMP mode deactivated\n"); - return SMP_FORCE_UP; - } - - return SMP_OK; } static void __init smp_cpu_index_default(void) @@ -1294,9 +1255,18 @@ static void __init smp_cpu_index_default(void) } } +static void __init smp_get_logical_apicid(void) +{ + if (x2apic_mode) + cpu0_logical_apicid = apic_read(APIC_LDR); + else + cpu0_logical_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); +} + /* - * Prepare for SMP bootup. The MP table or ACPI has been read - * earlier. Just do some sanity checking here and enable APIC mode. + * Prepare for SMP bootup. + * @max_cpus: configured maximum number of CPUs, It is a legacy parameter + * for common interface support. */ void __init native_smp_prepare_cpus(unsigned int max_cpus) { @@ -1328,35 +1298,33 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) set_cpu_sibling_map(0); - switch (smp_sanity_check(max_cpus)) { - case SMP_NO_CONFIG: - disable_smp(); - if (APIC_init_uniprocessor()) - pr_notice("Local APIC not detected. Using dummy APIC emulation.\n"); - return; - case SMP_NO_APIC: + smp_sanity_check(); + + switch (apic_intr_mode) { + case APIC_PIC: + case APIC_VIRTUAL_WIRE_NO_CONFIG: disable_smp(); return; - case SMP_FORCE_UP: + case APIC_SYMMETRIC_IO_NO_ROUTING: disable_smp(); - apic_bsp_setup(false); + /* Setup local timer */ + x86_init.timers.setup_percpu_clockev(); return; - case SMP_OK: + case APIC_VIRTUAL_WIRE: + case APIC_SYMMETRIC_IO: break; } - if (read_apic_id() != boot_cpu_physical_apicid) { - panic("Boot APIC ID in local APIC unexpected (%d vs %d)", - read_apic_id(), boot_cpu_physical_apicid); - /* Or can we switch back to PIC here? */ - } + /* Setup local timer */ + x86_init.timers.setup_percpu_clockev(); - default_setup_apic_routing(); - cpu0_logical_apicid = apic_bsp_setup(false); + smp_get_logical_apicid(); pr_info("CPU0: "); print_cpu_info(&cpu_data(0)); + native_pv_lock_init(); + uv_system_init(); set_mtrr_aps_delayed_init(); @@ -1395,7 +1363,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus) nmi_selftest(); impress_friends(); - setup_ioapic_dest(); mtrr_aps_init(); } @@ -1554,13 +1521,14 @@ void cpu_disable_common(void) remove_cpu_from_maps(cpu); unlock_vector_lock(); fixup_irqs(); + lapic_offline(); } int native_cpu_disable(void) { int ret; - ret = check_irq_vectors_for_cpu_disable(); + ret = lapic_can_unplug_cpu(); if (ret) return ret; diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 8dabd7bf1673..77835bc021c7 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -30,7 +30,7 @@ static int save_stack_address(struct stack_trace *trace, unsigned long addr, return 0; } -static void __save_stack_trace(struct stack_trace *trace, +static void noinline __save_stack_trace(struct stack_trace *trace, struct task_struct *task, struct pt_regs *regs, bool nosched) { @@ -56,6 +56,7 @@ static void __save_stack_trace(struct stack_trace *trace, */ void save_stack_trace(struct stack_trace *trace) { + trace->skip++; __save_stack_trace(trace, current, NULL, false); } EXPORT_SYMBOL_GPL(save_stack_trace); @@ -70,6 +71,8 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) if (!try_get_task_stack(tsk)) return; + if (tsk == current) + trace->skip++; __save_stack_trace(trace, tsk, NULL, true); put_task_stack(tsk); @@ -88,8 +91,9 @@ EXPORT_SYMBOL_GPL(save_stack_trace_tsk); } \ }) -static int __save_stack_trace_reliable(struct stack_trace *trace, - struct task_struct *task) +static int __always_inline +__save_stack_trace_reliable(struct stack_trace *trace, + struct task_struct *task) { struct unwind_state state; struct pt_regs *regs; diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index 879af864d99a..749d189f8cd4 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -85,6 +85,11 @@ void __init hpet_time_init(void) static __init void x86_late_time_init(void) { x86_init.timers.timer_init(); + /* + * After PIT/HPET timers init, select and setup + * the final interrupt mode for delivering IRQs. + */ + x86_init.irqs.intr_mode_init(); tsc_init(); } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 67db4f43309e..b7b0f74a2150 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -60,6 +60,7 @@ #include <asm/trace/mpx.h> #include <asm/mpx.h> #include <asm/vm86.h> +#include <asm/umip.h> #ifdef CONFIG_X86_64 #include <asm/x86_init.h> @@ -71,7 +72,7 @@ #include <asm/proto.h> #endif -DECLARE_BITMAP(used_vectors, NR_VECTORS); +DECLARE_BITMAP(system_vectors, NR_VECTORS); static inline void cond_local_irq_enable(struct pt_regs *regs) { @@ -141,8 +142,7 @@ void ist_begin_non_atomic(struct pt_regs *regs) * will catch asm bugs and any attempt to use ist_preempt_enable * from double_fault. */ - BUG_ON((unsigned long)(current_top_of_stack() - - current_stack_pointer) >= THREAD_SIZE); + BUG_ON(!on_thread_stack()); preempt_enable_no_resched(); } @@ -209,9 +209,6 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, if (fixup_exception(regs, trapnr)) return 0; - if (fixup_bug(regs, trapnr)) - return 0; - tsk->thread.error_code = error_code; tsk->thread.trap_nr = trapnr; die(str, regs, error_code); @@ -292,6 +289,13 @@ static void do_error_trap(struct pt_regs *regs, long error_code, char *str, RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); + /* + * WARN*()s end up here; fix them up before we call the + * notifier chain. + */ + if (!user_mode(regs) && fixup_bug(regs, trapnr)) + return; + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) != NOTIFY_STOP) { cond_local_irq_enable(regs); @@ -514,6 +518,11 @@ do_general_protection(struct pt_regs *regs, long error_code) RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); cond_local_irq_enable(regs); + if (static_cpu_has(X86_FEATURE_UMIP)) { + if (user_mode(regs) && fixup_umip_exception(regs)) + return; + } + if (v8086_mode(regs)) { local_irq_enable(); handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 796d96bb0821..8ea117f8142e 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -112,7 +112,7 @@ static void cyc2ns_data_init(struct cyc2ns_data *data) data->cyc2ns_offset = 0; } -static void cyc2ns_init(int cpu) +static void __init cyc2ns_init(int cpu) { struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu); @@ -812,13 +812,13 @@ unsigned long native_calibrate_cpu(void) return tsc_pit_min; } -int recalibrate_cpu_khz(void) +void recalibrate_cpu_khz(void) { #ifndef CONFIG_SMP unsigned long cpu_khz_old = cpu_khz; if (!boot_cpu_has(X86_FEATURE_TSC)) - return -ENODEV; + return; cpu_khz = x86_platform.calibrate_cpu(); tsc_khz = x86_platform.calibrate_tsc(); @@ -828,10 +828,6 @@ int recalibrate_cpu_khz(void) cpu_khz = tsc_khz; cpu_data(0).loops_per_jiffy = cpufreq_scale(cpu_data(0).loops_per_jiffy, cpu_khz_old, cpu_khz); - - return 0; -#else - return -ENODEV; #endif } @@ -959,17 +955,21 @@ core_initcall(cpufreq_register_tsc_scaling); /* * If ART is present detect the numerator:denominator to convert to TSC */ -static void detect_art(void) +static void __init detect_art(void) { unsigned int unused[2]; if (boot_cpu_data.cpuid_level < ART_CPUID_LEAF) return; - /* Don't enable ART in a VM, non-stop TSC and TSC_ADJUST required */ + /* + * Don't enable ART in a VM, non-stop TSC and TSC_ADJUST required, + * and the TSC counter resets must not occur asynchronously. + */ if (boot_cpu_has(X86_FEATURE_HYPERVISOR) || !boot_cpu_has(X86_FEATURE_NONSTOP_TSC) || - !boot_cpu_has(X86_FEATURE_TSC_ADJUST)) + !boot_cpu_has(X86_FEATURE_TSC_ADJUST) || + tsc_async_resets) return; cpuid(ART_CPUID_LEAF, &art_to_tsc_denominator, @@ -1263,6 +1263,25 @@ static int __init init_tsc_clocksource(void) */ device_initcall(init_tsc_clocksource); +void __init tsc_early_delay_calibrate(void) +{ + unsigned long lpj; + + if (!boot_cpu_has(X86_FEATURE_TSC)) + return; + + cpu_khz = x86_platform.calibrate_cpu(); + tsc_khz = x86_platform.calibrate_tsc(); + + tsc_khz = tsc_khz ? : cpu_khz; + if (!tsc_khz) + return; + + lpj = tsc_khz * 1000; + do_div(lpj, HZ); + loops_per_jiffy = lpj; +} + void __init tsc_init(void) { u64 lpj, cyc; @@ -1346,12 +1365,10 @@ void __init tsc_init(void) unsigned long calibrate_delay_is_known(void) { int sibling, cpu = smp_processor_id(); - struct cpumask *mask = topology_core_cpumask(cpu); - - if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC)) - return 0; + int constant_tsc = cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC); + const struct cpumask *mask = topology_core_cpumask(cpu); - if (!mask) + if (tsc_disabled || !constant_tsc || !mask) return 0; sibling = cpumask_any_but(mask, cpu); diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index e76a9881306b..ec534f978867 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -31,6 +31,20 @@ struct tsc_adjust { static DEFINE_PER_CPU(struct tsc_adjust, tsc_adjust); +/* + * TSC's on different sockets may be reset asynchronously. + * This may cause the TSC ADJUST value on socket 0 to be NOT 0. + */ +bool __read_mostly tsc_async_resets; + +void mark_tsc_async_resets(char *reason) +{ + if (tsc_async_resets) + return; + tsc_async_resets = true; + pr_info("tsc: Marking TSC async resets true due to %s\n", reason); +} + void tsc_verify_tsc_adjust(bool resume) { struct tsc_adjust *adj = this_cpu_ptr(&tsc_adjust); @@ -39,6 +53,10 @@ void tsc_verify_tsc_adjust(bool resume) if (!boot_cpu_has(X86_FEATURE_TSC_ADJUST)) return; + /* Skip unnecessary error messages if TSC already unstable */ + if (check_tsc_unstable()) + return; + /* Rate limit the MSR check */ if (!resume && time_before(jiffies, adj->nextcheck)) return; @@ -72,12 +90,22 @@ static void tsc_sanitize_first_cpu(struct tsc_adjust *cur, s64 bootval, * non zero. We don't do that on non boot cpus because physical * hotplug should have set the ADJUST register to a value > 0 so * the TSC is in sync with the already running cpus. + * + * Also don't force the ADJUST value to zero if that is a valid value + * for socket 0 as determined by the system arch. This is required + * when multiple sockets are reset asynchronously with each other + * and socket 0 may not have an TSC ADJUST value of 0. */ if (bootcpu && bootval != 0) { - pr_warn(FW_BUG "TSC ADJUST: CPU%u: %lld force to 0\n", cpu, - bootval); - wrmsrl(MSR_IA32_TSC_ADJUST, 0); - bootval = 0; + if (likely(!tsc_async_resets)) { + pr_warn(FW_BUG "TSC ADJUST: CPU%u: %lld force to 0\n", + cpu, bootval); + wrmsrl(MSR_IA32_TSC_ADJUST, 0); + bootval = 0; + } else { + pr_info("TSC ADJUST: CPU%u: %lld NOT forced to 0\n", + cpu, bootval); + } } cur->adjusted = bootval; } @@ -91,6 +119,10 @@ bool __init tsc_store_and_check_tsc_adjust(bool bootcpu) if (!boot_cpu_has(X86_FEATURE_TSC_ADJUST)) return false; + /* Skip unnecessary error messages if TSC already unstable */ + if (check_tsc_unstable()) + return false; + rdmsrl(MSR_IA32_TSC_ADJUST, bootval); cur->bootval = bootval; cur->nextcheck = jiffies + HZ; @@ -119,6 +151,13 @@ bool tsc_store_and_check_tsc_adjust(bool bootcpu) cur->warned = false; /* + * If a non-zero TSC value for socket 0 may be valid then the default + * adjusted value cannot assumed to be zero either. + */ + if (tsc_async_resets) + cur->adjusted = bootval; + + /* * Check whether this CPU is the first in a package to come up. In * this case do not check the boot value against another package * because the new package might have been physically hotplugged, @@ -139,10 +178,9 @@ bool tsc_store_and_check_tsc_adjust(bool bootcpu) * Compare the boot value and complain if it differs in the * package. */ - if (bootval != ref->bootval) { - pr_warn(FW_BUG "TSC ADJUST differs: Reference CPU%u: %lld CPU%u: %lld\n", - refcpu, ref->bootval, cpu, bootval); - } + if (bootval != ref->bootval) + printk_once(FW_BUG "TSC ADJUST differs within socket(s), fixing all errors\n"); + /* * The TSC_ADJUST values in a package must be the same. If the boot * value on this newly upcoming CPU differs from the adjustment @@ -150,8 +188,6 @@ bool tsc_store_and_check_tsc_adjust(bool bootcpu) * adjusted value. */ if (bootval != ref->adjusted) { - pr_warn("TSC ADJUST synchronize: Reference CPU%u: %lld CPU%u: %lld\n", - refcpu, ref->adjusted, cpu, bootval); cur->adjusted = ref->adjusted; wrmsrl(MSR_IA32_TSC_ADJUST, ref->adjusted); } diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c new file mode 100644 index 000000000000..6ba82be68cff --- /dev/null +++ b/arch/x86/kernel/umip.c @@ -0,0 +1,366 @@ +/* + * umip.c Emulation for instruction protected by the Intel User-Mode + * Instruction Prevention feature + * + * Copyright (c) 2017, Intel Corporation. + * Ricardo Neri <ricardo.neri-calderon@linux.intel.com> + */ + +#include <linux/uaccess.h> +#include <asm/umip.h> +#include <asm/traps.h> +#include <asm/insn.h> +#include <asm/insn-eval.h> +#include <linux/ratelimit.h> + +#undef pr_fmt +#define pr_fmt(fmt) "umip: " fmt + +/** DOC: Emulation for User-Mode Instruction Prevention (UMIP) + * + * The feature User-Mode Instruction Prevention present in recent Intel + * processor prevents a group of instructions (sgdt, sidt, sldt, smsw, and str) + * from being executed with CPL > 0. Otherwise, a general protection fault is + * issued. + * + * Rather than relaying to the user space the general protection fault caused by + * the UMIP-protected instructions (in the form of a SIGSEGV signal), it can be + * trapped and emulate the result of such instructions to provide dummy values. + * This allows to both conserve the current kernel behavior and not reveal the + * system resources that UMIP intends to protect (i.e., the locations of the + * global descriptor and interrupt descriptor tables, the segment selectors of + * the local descriptor table, the value of the task state register and the + * contents of the CR0 register). + * + * This emulation is needed because certain applications (e.g., WineHQ and + * DOSEMU2) rely on this subset of instructions to function. + * + * The instructions protected by UMIP can be split in two groups. Those which + * return a kernel memory address (sgdt and sidt) and those which return a + * value (sldt, str and smsw). + * + * For the instructions that return a kernel memory address, applications + * such as WineHQ rely on the result being located in the kernel memory space, + * not the actual location of the table. The result is emulated as a hard-coded + * value that, lies close to the top of the kernel memory. The limit for the GDT + * and the IDT are set to zero. + * + * Given that sldt and str are not commonly used in programs that run on WineHQ + * or DOSEMU2, they are not emulated. + * + * The instruction smsw is emulated to return the value that the register CR0 + * has at boot time as set in the head_32. + * + * Also, emulation is provided only for 32-bit processes; 64-bit processes + * that attempt to use the instructions that UMIP protects will receive the + * SIGSEGV signal issued as a consequence of the general protection fault. + * + * Care is taken to appropriately emulate the results when segmentation is + * used. That is, rather than relying on USER_DS and USER_CS, the function + * insn_get_addr_ref() inspects the segment descriptor pointed by the + * registers in pt_regs. This ensures that we correctly obtain the segment + * base address and the address and operand sizes even if the user space + * application uses a local descriptor table. + */ + +#define UMIP_DUMMY_GDT_BASE 0xfffe0000 +#define UMIP_DUMMY_IDT_BASE 0xffff0000 + +/* + * The SGDT and SIDT instructions store the contents of the global descriptor + * table and interrupt table registers, respectively. The destination is a + * memory operand of X+2 bytes. X bytes are used to store the base address of + * the table and 2 bytes are used to store the limit. In 32-bit processes, the + * only processes for which emulation is provided, X has a value of 4. + */ +#define UMIP_GDT_IDT_BASE_SIZE 4 +#define UMIP_GDT_IDT_LIMIT_SIZE 2 + +#define UMIP_INST_SGDT 0 /* 0F 01 /0 */ +#define UMIP_INST_SIDT 1 /* 0F 01 /1 */ +#define UMIP_INST_SMSW 3 /* 0F 01 /4 */ + +/** + * identify_insn() - Identify a UMIP-protected instruction + * @insn: Instruction structure with opcode and ModRM byte. + * + * From the opcode and ModRM.reg in @insn identify, if any, a UMIP-protected + * instruction that can be emulated. + * + * Returns: + * + * On success, a constant identifying a specific UMIP-protected instruction that + * can be emulated. + * + * -EINVAL on error or when not an UMIP-protected instruction that can be + * emulated. + */ +static int identify_insn(struct insn *insn) +{ + /* By getting modrm we also get the opcode. */ + insn_get_modrm(insn); + + if (!insn->modrm.nbytes) + return -EINVAL; + + /* All the instructions of interest start with 0x0f. */ + if (insn->opcode.bytes[0] != 0xf) + return -EINVAL; + + if (insn->opcode.bytes[1] == 0x1) { + switch (X86_MODRM_REG(insn->modrm.value)) { + case 0: + return UMIP_INST_SGDT; + case 1: + return UMIP_INST_SIDT; + case 4: + return UMIP_INST_SMSW; + default: + return -EINVAL; + } + } + + /* SLDT AND STR are not emulated */ + return -EINVAL; +} + +/** + * emulate_umip_insn() - Emulate UMIP instructions and return dummy values + * @insn: Instruction structure with operands + * @umip_inst: A constant indicating the instruction to emulate + * @data: Buffer into which the dummy result is stored + * @data_size: Size of the emulated result + * + * Emulate an instruction protected by UMIP and provide a dummy result. The + * result of the emulation is saved in @data. The size of the results depends + * on both the instruction and type of operand (register vs memory address). + * The size of the result is updated in @data_size. Caller is responsible + * of providing a @data buffer of at least UMIP_GDT_IDT_BASE_SIZE + + * UMIP_GDT_IDT_LIMIT_SIZE bytes. + * + * Returns: + * + * 0 on success, -EINVAL on error while emulating. + */ +static int emulate_umip_insn(struct insn *insn, int umip_inst, + unsigned char *data, int *data_size) +{ + unsigned long dummy_base_addr, dummy_value; + unsigned short dummy_limit = 0; + + if (!data || !data_size || !insn) + return -EINVAL; + /* + * These two instructions return the base address and limit of the + * global and interrupt descriptor table, respectively. According to the + * Intel Software Development manual, the base address can be 24-bit, + * 32-bit or 64-bit. Limit is always 16-bit. If the operand size is + * 16-bit, the returned value of the base address is supposed to be a + * zero-extended 24-byte number. However, it seems that a 32-byte number + * is always returned irrespective of the operand size. + */ + if (umip_inst == UMIP_INST_SGDT || umip_inst == UMIP_INST_SIDT) { + /* SGDT and SIDT do not use registers operands. */ + if (X86_MODRM_MOD(insn->modrm.value) == 3) + return -EINVAL; + + if (umip_inst == UMIP_INST_SGDT) + dummy_base_addr = UMIP_DUMMY_GDT_BASE; + else + dummy_base_addr = UMIP_DUMMY_IDT_BASE; + + *data_size = UMIP_GDT_IDT_LIMIT_SIZE + UMIP_GDT_IDT_BASE_SIZE; + + memcpy(data + 2, &dummy_base_addr, UMIP_GDT_IDT_BASE_SIZE); + memcpy(data, &dummy_limit, UMIP_GDT_IDT_LIMIT_SIZE); + + } else if (umip_inst == UMIP_INST_SMSW) { + dummy_value = CR0_STATE; + + /* + * Even though the CR0 register has 4 bytes, the number + * of bytes to be copied in the result buffer is determined + * by whether the operand is a register or a memory location. + * If operand is a register, return as many bytes as the operand + * size. If operand is memory, return only the two least + * siginificant bytes of CR0. + */ + if (X86_MODRM_MOD(insn->modrm.value) == 3) + *data_size = insn->opnd_bytes; + else + *data_size = 2; + + memcpy(data, &dummy_value, *data_size); + /* STR and SLDT are not emulated */ + } else { + return -EINVAL; + } + + return 0; +} + +/** + * force_sig_info_umip_fault() - Force a SIGSEGV with SEGV_MAPERR + * @addr: Address that caused the signal + * @regs: Register set containing the instruction pointer + * + * Force a SIGSEGV signal with SEGV_MAPERR as the error code. This function is + * intended to be used to provide a segmentation fault when the result of the + * UMIP emulation could not be copied to the user space memory. + * + * Returns: none + */ +static void force_sig_info_umip_fault(void __user *addr, struct pt_regs *regs) +{ + siginfo_t info; + struct task_struct *tsk = current; + + tsk->thread.cr2 = (unsigned long)addr; + tsk->thread.error_code = X86_PF_USER | X86_PF_WRITE; + tsk->thread.trap_nr = X86_TRAP_PF; + + info.si_signo = SIGSEGV; + info.si_errno = 0; + info.si_code = SEGV_MAPERR; + info.si_addr = addr; + force_sig_info(SIGSEGV, &info, tsk); + + if (!(show_unhandled_signals && unhandled_signal(tsk, SIGSEGV))) + return; + + pr_err_ratelimited("%s[%d] umip emulation segfault ip:%lx sp:%lx error:%x in %lx\n", + tsk->comm, task_pid_nr(tsk), regs->ip, + regs->sp, X86_PF_USER | X86_PF_WRITE, + regs->ip); +} + +/** + * fixup_umip_exception() - Fixup a general protection fault caused by UMIP + * @regs: Registers as saved when entering the #GP handler + * + * The instructions sgdt, sidt, str, smsw, sldt cause a general protection + * fault if executed with CPL > 0 (i.e., from user space). If the offending + * user-space process is not in long mode, this function fixes the exception + * up and provides dummy results for sgdt, sidt and smsw; str and sldt are not + * fixed up. Also long mode user-space processes are not fixed up. + * + * If operands are memory addresses, results are copied to user-space memory as + * indicated by the instruction pointed by eIP using the registers indicated in + * the instruction operands. If operands are registers, results are copied into + * the context that was saved when entering kernel mode. + * + * Returns: + * + * True if emulation was successful; false if not. + */ +bool fixup_umip_exception(struct pt_regs *regs) +{ + int not_copied, nr_copied, reg_offset, dummy_data_size, umip_inst; + unsigned long seg_base = 0, *reg_addr; + /* 10 bytes is the maximum size of the result of UMIP instructions */ + unsigned char dummy_data[10] = { 0 }; + unsigned char buf[MAX_INSN_SIZE]; + void __user *uaddr; + struct insn insn; + char seg_defs; + + if (!regs) + return false; + + /* Do not emulate 64-bit processes. */ + if (user_64bit_mode(regs)) + return false; + + /* + * If not in user-space long mode, a custom code segment could be in + * use. This is true in protected mode (if the process defined a local + * descriptor table), or virtual-8086 mode. In most of the cases + * seg_base will be zero as in USER_CS. + */ + if (!user_64bit_mode(regs)) + seg_base = insn_get_seg_base(regs, INAT_SEG_REG_CS); + + if (seg_base == -1L) + return false; + + not_copied = copy_from_user(buf, (void __user *)(seg_base + regs->ip), + sizeof(buf)); + nr_copied = sizeof(buf) - not_copied; + + /* + * The copy_from_user above could have failed if user code is protected + * by a memory protection key. Give up on emulation in such a case. + * Should we issue a page fault? + */ + if (!nr_copied) + return false; + + insn_init(&insn, buf, nr_copied, user_64bit_mode(regs)); + + /* + * Override the default operand and address sizes with what is specified + * in the code segment descriptor. The instruction decoder only sets + * the address size it to either 4 or 8 address bytes and does nothing + * for the operand bytes. This OK for most of the cases, but we could + * have special cases where, for instance, a 16-bit code segment + * descriptor is used. + * If there is an address override prefix, the instruction decoder + * correctly updates these values, even for 16-bit defaults. + */ + seg_defs = insn_get_code_seg_params(regs); + if (seg_defs == -EINVAL) + return false; + + insn.addr_bytes = INSN_CODE_SEG_ADDR_SZ(seg_defs); + insn.opnd_bytes = INSN_CODE_SEG_OPND_SZ(seg_defs); + + insn_get_length(&insn); + if (nr_copied < insn.length) + return false; + + umip_inst = identify_insn(&insn); + if (umip_inst < 0) + return false; + + if (emulate_umip_insn(&insn, umip_inst, dummy_data, &dummy_data_size)) + return false; + + /* + * If operand is a register, write result to the copy of the register + * value that was pushed to the stack when entering into kernel mode. + * Upon exit, the value we write will be restored to the actual hardware + * register. + */ + if (X86_MODRM_MOD(insn.modrm.value) == 3) { + reg_offset = insn_get_modrm_rm_off(&insn, regs); + + /* + * Negative values are usually errors. In memory addressing, + * the exception is -EDOM. Since we expect a register operand, + * all negative values are errors. + */ + if (reg_offset < 0) + return false; + + reg_addr = (unsigned long *)((unsigned long)regs + reg_offset); + memcpy(reg_addr, dummy_data, dummy_data_size); + } else { + uaddr = insn_get_addr_ref(&insn, regs); + if ((unsigned long)uaddr == -1L) + return false; + + nr_copied = copy_to_user(uaddr, dummy_data, dummy_data_size); + if (nr_copied > 0) { + /* + * If copy fails, send a signal and tell caller that + * fault was fixed up. + */ + force_sig_info_umip_fault(uaddr, regs); + return true; + } + } + + /* increase IP to let the program keep going */ + regs->ip += insn.length; + return true; +} diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index b95007e7c1b3..a3f973b2c97a 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -279,7 +279,7 @@ static bool deref_stack_reg(struct unwind_state *state, unsigned long addr, if (!stack_access_ok(state, addr, sizeof(long))) return false; - *val = READ_ONCE_TASK_STACK(state->task, *(unsigned long *)addr); + *val = READ_ONCE_NOCHECK(*(unsigned long *)addr); return true; } diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 495c776de4b4..a3755d293a48 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -271,12 +271,15 @@ static bool is_prefix_bad(struct insn *insn) int i; for (i = 0; i < insn->prefixes.nbytes; i++) { - switch (insn->prefixes.bytes[i]) { - case 0x26: /* INAT_PFX_ES */ - case 0x2E: /* INAT_PFX_CS */ - case 0x36: /* INAT_PFX_DS */ - case 0x3E: /* INAT_PFX_SS */ - case 0xF0: /* INAT_PFX_LOCK */ + insn_attr_t attr; + + attr = inat_get_opcode_attribute(insn->prefixes.bytes[i]); + switch (attr) { + case INAT_MAKE_PREFIX(INAT_PFX_ES): + case INAT_MAKE_PREFIX(INAT_PFX_CS): + case INAT_MAKE_PREFIX(INAT_PFX_DS): + case INAT_MAKE_PREFIX(INAT_PFX_SS): + case INAT_MAKE_PREFIX(INAT_PFX_LOCK): return true; } } diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S index 014ea59aa153..3d3c2f71f617 100644 --- a/arch/x86/kernel/verify_cpu.S +++ b/arch/x86/kernel/verify_cpu.S @@ -33,7 +33,7 @@ #include <asm/cpufeatures.h> #include <asm/msr-index.h> -verify_cpu: +ENTRY(verify_cpu) pushf # Save caller passed flags push $0 # Kill any dangerous flags popf @@ -139,3 +139,4 @@ verify_cpu: popf # Restore caller passed flags xorl %eax, %eax ret +ENDPROC(verify_cpu) diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 68244742ecb0..5edb27f1a2c4 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -55,6 +55,7 @@ #include <asm/irq.h> #include <asm/traps.h> #include <asm/vm86.h> +#include <asm/switch_to.h> /* * Known problems: @@ -94,7 +95,6 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval) { - struct tss_struct *tss; struct task_struct *tsk = current; struct vm86plus_struct __user *user; struct vm86 *vm86 = current->thread.vm86; @@ -146,12 +146,13 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval) do_exit(SIGSEGV); } - tss = &per_cpu(cpu_tss, get_cpu()); + preempt_disable(); tsk->thread.sp0 = vm86->saved_sp0; tsk->thread.sysenter_cs = __KERNEL_CS; - load_sp0(tss, &tsk->thread); + update_sp0(tsk); + refresh_sysenter_cs(&tsk->thread); vm86->saved_sp0 = 0; - put_cpu(); + preempt_enable(); memcpy(®s->pt, &vm86->regs32, sizeof(struct pt_regs)); @@ -237,7 +238,6 @@ SYSCALL_DEFINE2(vm86, unsigned long, cmd, unsigned long, arg) static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus) { - struct tss_struct *tss; struct task_struct *tsk = current; struct vm86 *vm86 = tsk->thread.vm86; struct kernel_vm86_regs vm86regs; @@ -365,15 +365,17 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus) vm86->saved_sp0 = tsk->thread.sp0; lazy_save_gs(vm86->regs32.gs); - tss = &per_cpu(cpu_tss, get_cpu()); /* make room for real-mode segments */ + preempt_disable(); tsk->thread.sp0 += 16; - if (static_cpu_has(X86_FEATURE_SEP)) + if (static_cpu_has(X86_FEATURE_SEP)) { tsk->thread.sysenter_cs = 0; + refresh_sysenter_cs(&tsk->thread); + } - load_sp0(tss, &tsk->thread); - put_cpu(); + update_sp0(tsk); + preempt_enable(); if (vm86->flags & VM86_SCREEN_BITMAP) mark_screen_rdonly(tsk->mm); diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index b034b1b14b9c..44685fb2a192 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -26,9 +26,6 @@ #define TOPOLOGY_REGISTER_OFFSET 0x10 -/* Flag below is initialized once during vSMP PCI initialization. */ -static int irq_routing_comply = 1; - #if defined CONFIG_PCI && defined CONFIG_PARAVIRT /* * Interrupt control on vSMPowered systems: @@ -105,9 +102,6 @@ static void __init set_vsmp_pv_ops(void) if (cap & ctl & BIT(8)) { ctl &= ~BIT(8); - /* Interrupt routing set to ignore */ - irq_routing_comply = 0; - #ifdef CONFIG_PROC_FS /* Don't let users change irq affinity via procfs */ no_irq_affinity = 1; @@ -211,23 +205,10 @@ static int apicid_phys_pkg_id(int initial_apic_id, int index_msb) return hard_smp_processor_id() >> index_msb; } -/* - * In vSMP, all cpus should be capable of handling interrupts, regardless of - * the APIC used. - */ -static void fill_vector_allocation_domain(int cpu, struct cpumask *retmask, - const struct cpumask *mask) -{ - cpumask_setall(retmask); -} - static void vsmp_apic_post_init(void) { /* need to update phys_pkg_id */ apic->phys_pkg_id = apicid_phys_pkg_id; - - if (!irq_routing_comply) - apic->vector_allocation_domain = fill_vector_allocation_domain; } void __init vsmp_init(void) diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index a088b2c47f73..1151ccd72ce9 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -28,6 +28,8 @@ void x86_init_noop(void) { } void __init x86_init_uint_noop(unsigned int unused) { } int __init iommu_init_noop(void) { return 0; } void iommu_shutdown_noop(void) { } +bool __init bool_x86_init_noop(void) { return false; } +void x86_op_int_noop(int cpu) { } /* * The platform setup functions are preset with the default functions @@ -55,6 +57,7 @@ struct x86_init_ops x86_init __initdata = { .pre_vector_init = init_ISA_irqs, .intr_init = native_init_IRQ, .trap_init = x86_init_noop, + .intr_mode_init = apic_intr_mode_init }, .oem = { @@ -81,6 +84,13 @@ struct x86_init_ops x86_init __initdata = { .init_irq = x86_default_pci_init_irq, .fixup_irqs = x86_default_pci_fixup_irqs, }, + + .hyper = { + .init_platform = x86_init_noop, + .guest_late_init = x86_init_noop, + .x2apic_available = bool_x86_init_noop, + .init_mem_mapping = x86_init_noop, + }, }; struct x86_cpuinit_ops x86_cpuinit = { @@ -101,6 +111,7 @@ struct x86_platform_ops x86_platform __ro_after_init = { .get_nmi_reason = default_get_nmi_reason, .save_sched_clock_state = tsc_save_sched_clock_state, .restore_sched_clock_state = tsc_restore_sched_clock_state, + .hyper.pin_vcpu = x86_op_int_noop, }; EXPORT_SYMBOL_GPL(x86_platform); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 7a69cf053711..a119b361b8b7 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -443,7 +443,7 @@ static u64 __update_clear_spte_slow(u64 *sptep, u64 spte) static u64 __get_spte_lockless(u64 *sptep) { - return ACCESS_ONCE(*sptep); + return READ_ONCE(*sptep); } #else union split_spte { @@ -4819,7 +4819,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, * If we don't have indirect shadow pages, it means no page is * write-protected, so we can exit simply. */ - if (!ACCESS_ONCE(vcpu->kvm->arch.indirect_shadow_pages)) + if (!READ_ONCE(vcpu->kvm->arch.indirect_shadow_pages)) return; remote_flush = local_flush = false; diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c index ea67dc876316..01c1371f39f8 100644 --- a/arch/x86/kvm/page_track.c +++ b/arch/x86/kvm/page_track.c @@ -157,7 +157,7 @@ bool kvm_page_track_is_active(struct kvm_vcpu *vcpu, gfn_t gfn, return false; index = gfn_to_index(gfn, slot->base_gfn, PT_PAGE_TABLE_LEVEL); - return !!ACCESS_ONCE(slot->arch.gfn_track[mode][index]); + return !!READ_ONCE(slot->arch.gfn_track[mode][index]); } void kvm_page_track_cleanup(struct kvm *kvm) diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 457f681ef379..7b181b61170e 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -24,7 +24,7 @@ lib-y := delay.o misc.o cmdline.o cpu.o lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o lib-y += memcpy_$(BITS).o lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o -lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o +lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c new file mode 100644 index 000000000000..35625d279458 --- /dev/null +++ b/arch/x86/lib/insn-eval.c @@ -0,0 +1,1364 @@ +/* + * Utility functions for x86 operand and address decoding + * + * Copyright (C) Intel Corporation 2017 + */ +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/ratelimit.h> +#include <linux/mmu_context.h> +#include <asm/desc_defs.h> +#include <asm/desc.h> +#include <asm/inat.h> +#include <asm/insn.h> +#include <asm/insn-eval.h> +#include <asm/ldt.h> +#include <asm/vm86.h> + +#undef pr_fmt +#define pr_fmt(fmt) "insn: " fmt + +enum reg_type { + REG_TYPE_RM = 0, + REG_TYPE_INDEX, + REG_TYPE_BASE, +}; + +/** + * is_string_insn() - Determine if instruction is a string instruction + * @insn: Instruction containing the opcode to inspect + * + * Returns: + * + * true if the instruction, determined by the opcode, is any of the + * string instructions as defined in the Intel Software Development manual. + * False otherwise. + */ +static bool is_string_insn(struct insn *insn) +{ + insn_get_opcode(insn); + + /* All string instructions have a 1-byte opcode. */ + if (insn->opcode.nbytes != 1) + return false; + + switch (insn->opcode.bytes[0]) { + case 0x6c ... 0x6f: /* INS, OUTS */ + case 0xa4 ... 0xa7: /* MOVS, CMPS */ + case 0xaa ... 0xaf: /* STOS, LODS, SCAS */ + return true; + default: + return false; + } +} + +/** + * get_seg_reg_override_idx() - obtain segment register override index + * @insn: Valid instruction with segment override prefixes + * + * Inspect the instruction prefixes in @insn and find segment overrides, if any. + * + * Returns: + * + * A constant identifying the segment register to use, among CS, SS, DS, + * ES, FS, or GS. INAT_SEG_REG_DEFAULT is returned if no segment override + * prefixes were found. + * + * -EINVAL in case of error. + */ +static int get_seg_reg_override_idx(struct insn *insn) +{ + int idx = INAT_SEG_REG_DEFAULT; + int num_overrides = 0, i; + + insn_get_prefixes(insn); + + /* Look for any segment override prefixes. */ + for (i = 0; i < insn->prefixes.nbytes; i++) { + insn_attr_t attr; + + attr = inat_get_opcode_attribute(insn->prefixes.bytes[i]); + switch (attr) { + case INAT_MAKE_PREFIX(INAT_PFX_CS): + idx = INAT_SEG_REG_CS; + num_overrides++; + break; + case INAT_MAKE_PREFIX(INAT_PFX_SS): + idx = INAT_SEG_REG_SS; + num_overrides++; + break; + case INAT_MAKE_PREFIX(INAT_PFX_DS): + idx = INAT_SEG_REG_DS; + num_overrides++; + break; + case INAT_MAKE_PREFIX(INAT_PFX_ES): + idx = INAT_SEG_REG_ES; + num_overrides++; + break; + case INAT_MAKE_PREFIX(INAT_PFX_FS): + idx = INAT_SEG_REG_FS; + num_overrides++; + break; + case INAT_MAKE_PREFIX(INAT_PFX_GS): + idx = INAT_SEG_REG_GS; + num_overrides++; + break; + /* No default action needed. */ + } + } + + /* More than one segment override prefix leads to undefined behavior. */ + if (num_overrides > 1) + return -EINVAL; + + return idx; +} + +/** + * check_seg_overrides() - check if segment override prefixes are allowed + * @insn: Valid instruction with segment override prefixes + * @regoff: Operand offset, in pt_regs, for which the check is performed + * + * For a particular register used in register-indirect addressing, determine if + * segment override prefixes can be used. Specifically, no overrides are allowed + * for rDI if used with a string instruction. + * + * Returns: + * + * True if segment override prefixes can be used with the register indicated + * in @regoff. False if otherwise. + */ +static bool check_seg_overrides(struct insn *insn, int regoff) +{ + if (regoff == offsetof(struct pt_regs, di) && is_string_insn(insn)) + return false; + + return true; +} + +/** + * resolve_default_seg() - resolve default segment register index for an operand + * @insn: Instruction with opcode and address size. Must be valid. + * @regs: Register values as seen when entering kernel mode + * @off: Operand offset, in pt_regs, for which resolution is needed + * + * Resolve the default segment register index associated with the instruction + * operand register indicated by @off. Such index is resolved based on defaults + * described in the Intel Software Development Manual. + * + * Returns: + * + * If in protected mode, a constant identifying the segment register to use, + * among CS, SS, ES or DS. If in long mode, INAT_SEG_REG_IGNORE. + * + * -EINVAL in case of error. + */ +static int resolve_default_seg(struct insn *insn, struct pt_regs *regs, int off) +{ + if (user_64bit_mode(regs)) + return INAT_SEG_REG_IGNORE; + /* + * Resolve the default segment register as described in Section 3.7.4 + * of the Intel Software Development Manual Vol. 1: + * + * + DS for all references involving r[ABCD]X, and rSI. + * + If used in a string instruction, ES for rDI. Otherwise, DS. + * + AX, CX and DX are not valid register operands in 16-bit address + * encodings but are valid for 32-bit and 64-bit encodings. + * + -EDOM is reserved to identify for cases in which no register + * is used (i.e., displacement-only addressing). Use DS. + * + SS for rSP or rBP. + * + CS for rIP. + */ + + switch (off) { + case offsetof(struct pt_regs, ax): + case offsetof(struct pt_regs, cx): + case offsetof(struct pt_regs, dx): + /* Need insn to verify address size. */ + if (insn->addr_bytes == 2) + return -EINVAL; + + case -EDOM: + case offsetof(struct pt_regs, bx): + case offsetof(struct pt_regs, si): + return INAT_SEG_REG_DS; + + case offsetof(struct pt_regs, di): + if (is_string_insn(insn)) + return INAT_SEG_REG_ES; + return INAT_SEG_REG_DS; + + case offsetof(struct pt_regs, bp): + case offsetof(struct pt_regs, sp): + return INAT_SEG_REG_SS; + + case offsetof(struct pt_regs, ip): + return INAT_SEG_REG_CS; + + default: + return -EINVAL; + } +} + +/** + * resolve_seg_reg() - obtain segment register index + * @insn: Instruction with operands + * @regs: Register values as seen when entering kernel mode + * @regoff: Operand offset, in pt_regs, used to deterimine segment register + * + * Determine the segment register associated with the operands and, if + * applicable, prefixes and the instruction pointed by @insn. + * + * The segment register associated to an operand used in register-indirect + * addressing depends on: + * + * a) Whether running in long mode (in such a case segments are ignored, except + * if FS or GS are used). + * + * b) Whether segment override prefixes can be used. Certain instructions and + * registers do not allow override prefixes. + * + * c) Whether segment overrides prefixes are found in the instruction prefixes. + * + * d) If there are not segment override prefixes or they cannot be used, the + * default segment register associated with the operand register is used. + * + * The function checks first if segment override prefixes can be used with the + * operand indicated by @regoff. If allowed, obtain such overridden segment + * register index. Lastly, if not prefixes were found or cannot be used, resolve + * the segment register index to use based on the defaults described in the + * Intel documentation. In long mode, all segment register indexes will be + * ignored, except if overrides were found for FS or GS. All these operations + * are done using helper functions. + * + * The operand register, @regoff, is represented as the offset from the base of + * pt_regs. + * + * As stated, the main use of this function is to determine the segment register + * index based on the instruction, its operands and prefixes. Hence, @insn + * must be valid. However, if @regoff indicates rIP, we don't need to inspect + * @insn at all as in this case CS is used in all cases. This case is checked + * before proceeding further. + * + * Please note that this function does not return the value in the segment + * register (i.e., the segment selector) but our defined index. The segment + * selector needs to be obtained using get_segment_selector() and passing the + * segment register index resolved by this function. + * + * Returns: + * + * An index identifying the segment register to use, among CS, SS, DS, + * ES, FS, or GS. INAT_SEG_REG_IGNORE is returned if running in long mode. + * + * -EINVAL in case of error. + */ +static int resolve_seg_reg(struct insn *insn, struct pt_regs *regs, int regoff) +{ + int idx; + + /* + * In the unlikely event of having to resolve the segment register + * index for rIP, do it first. Segment override prefixes should not + * be used. Hence, it is not necessary to inspect the instruction, + * which may be invalid at this point. + */ + if (regoff == offsetof(struct pt_regs, ip)) { + if (user_64bit_mode(regs)) + return INAT_SEG_REG_IGNORE; + else + return INAT_SEG_REG_CS; + } + + if (!insn) + return -EINVAL; + + if (!check_seg_overrides(insn, regoff)) + return resolve_default_seg(insn, regs, regoff); + + idx = get_seg_reg_override_idx(insn); + if (idx < 0) + return idx; + + if (idx == INAT_SEG_REG_DEFAULT) + return resolve_default_seg(insn, regs, regoff); + + /* + * In long mode, segment override prefixes are ignored, except for + * overrides for FS and GS. + */ + if (user_64bit_mode(regs)) { + if (idx != INAT_SEG_REG_FS && + idx != INAT_SEG_REG_GS) + idx = INAT_SEG_REG_IGNORE; + } + + return idx; +} + +/** + * get_segment_selector() - obtain segment selector + * @regs: Register values as seen when entering kernel mode + * @seg_reg_idx: Segment register index to use + * + * Obtain the segment selector from any of the CS, SS, DS, ES, FS, GS segment + * registers. In CONFIG_X86_32, the segment is obtained from either pt_regs or + * kernel_vm86_regs as applicable. In CONFIG_X86_64, CS and SS are obtained + * from pt_regs. DS, ES, FS and GS are obtained by reading the actual CPU + * registers. This done for only for completeness as in CONFIG_X86_64 segment + * registers are ignored. + * + * Returns: + * + * Value of the segment selector, including null when running in + * long mode. + * + * -EINVAL on error. + */ +static short get_segment_selector(struct pt_regs *regs, int seg_reg_idx) +{ +#ifdef CONFIG_X86_64 + unsigned short sel; + + switch (seg_reg_idx) { + case INAT_SEG_REG_IGNORE: + return 0; + case INAT_SEG_REG_CS: + return (unsigned short)(regs->cs & 0xffff); + case INAT_SEG_REG_SS: + return (unsigned short)(regs->ss & 0xffff); + case INAT_SEG_REG_DS: + savesegment(ds, sel); + return sel; + case INAT_SEG_REG_ES: + savesegment(es, sel); + return sel; + case INAT_SEG_REG_FS: + savesegment(fs, sel); + return sel; + case INAT_SEG_REG_GS: + savesegment(gs, sel); + return sel; + default: + return -EINVAL; + } +#else /* CONFIG_X86_32 */ + struct kernel_vm86_regs *vm86regs = (struct kernel_vm86_regs *)regs; + + if (v8086_mode(regs)) { + switch (seg_reg_idx) { + case INAT_SEG_REG_CS: + return (unsigned short)(regs->cs & 0xffff); + case INAT_SEG_REG_SS: + return (unsigned short)(regs->ss & 0xffff); + case INAT_SEG_REG_DS: + return vm86regs->ds; + case INAT_SEG_REG_ES: + return vm86regs->es; + case INAT_SEG_REG_FS: + return vm86regs->fs; + case INAT_SEG_REG_GS: + return vm86regs->gs; + case INAT_SEG_REG_IGNORE: + /* fall through */ + default: + return -EINVAL; + } + } + + switch (seg_reg_idx) { + case INAT_SEG_REG_CS: + return (unsigned short)(regs->cs & 0xffff); + case INAT_SEG_REG_SS: + return (unsigned short)(regs->ss & 0xffff); + case INAT_SEG_REG_DS: + return (unsigned short)(regs->ds & 0xffff); + case INAT_SEG_REG_ES: + return (unsigned short)(regs->es & 0xffff); + case INAT_SEG_REG_FS: + return (unsigned short)(regs->fs & 0xffff); + case INAT_SEG_REG_GS: + /* + * GS may or may not be in regs as per CONFIG_X86_32_LAZY_GS. + * The macro below takes care of both cases. + */ + return get_user_gs(regs); + case INAT_SEG_REG_IGNORE: + /* fall through */ + default: + return -EINVAL; + } +#endif /* CONFIG_X86_64 */ +} + +static int get_reg_offset(struct insn *insn, struct pt_regs *regs, + enum reg_type type) +{ + int regno = 0; + + static const int regoff[] = { + offsetof(struct pt_regs, ax), + offsetof(struct pt_regs, cx), + offsetof(struct pt_regs, dx), + offsetof(struct pt_regs, bx), + offsetof(struct pt_regs, sp), + offsetof(struct pt_regs, bp), + offsetof(struct pt_regs, si), + offsetof(struct pt_regs, di), +#ifdef CONFIG_X86_64 + offsetof(struct pt_regs, r8), + offsetof(struct pt_regs, r9), + offsetof(struct pt_regs, r10), + offsetof(struct pt_regs, r11), + offsetof(struct pt_regs, r12), + offsetof(struct pt_regs, r13), + offsetof(struct pt_regs, r14), + offsetof(struct pt_regs, r15), +#endif + }; + int nr_registers = ARRAY_SIZE(regoff); + /* + * Don't possibly decode a 32-bit instructions as + * reading a 64-bit-only register. + */ + if (IS_ENABLED(CONFIG_X86_64) && !insn->x86_64) + nr_registers -= 8; + + switch (type) { + case REG_TYPE_RM: + regno = X86_MODRM_RM(insn->modrm.value); + + /* + * ModRM.mod == 0 and ModRM.rm == 5 means a 32-bit displacement + * follows the ModRM byte. + */ + if (!X86_MODRM_MOD(insn->modrm.value) && regno == 5) + return -EDOM; + + if (X86_REX_B(insn->rex_prefix.value)) + regno += 8; + break; + + case REG_TYPE_INDEX: + regno = X86_SIB_INDEX(insn->sib.value); + if (X86_REX_X(insn->rex_prefix.value)) + regno += 8; + + /* + * If ModRM.mod != 3 and SIB.index = 4 the scale*index + * portion of the address computation is null. This is + * true only if REX.X is 0. In such a case, the SIB index + * is used in the address computation. + */ + if (X86_MODRM_MOD(insn->modrm.value) != 3 && regno == 4) + return -EDOM; + break; + + case REG_TYPE_BASE: + regno = X86_SIB_BASE(insn->sib.value); + /* + * If ModRM.mod is 0 and SIB.base == 5, the base of the + * register-indirect addressing is 0. In this case, a + * 32-bit displacement follows the SIB byte. + */ + if (!X86_MODRM_MOD(insn->modrm.value) && regno == 5) + return -EDOM; + + if (X86_REX_B(insn->rex_prefix.value)) + regno += 8; + break; + + default: + pr_err_ratelimited("invalid register type: %d\n", type); + return -EINVAL; + } + + if (regno >= nr_registers) { + WARN_ONCE(1, "decoded an instruction with an invalid register"); + return -EINVAL; + } + return regoff[regno]; +} + +/** + * get_reg_offset_16() - Obtain offset of register indicated by instruction + * @insn: Instruction containing ModRM byte + * @regs: Register values as seen when entering kernel mode + * @offs1: Offset of the first operand register + * @offs2: Offset of the second opeand register, if applicable + * + * Obtain the offset, in pt_regs, of the registers indicated by the ModRM byte + * in @insn. This function is to be used with 16-bit address encodings. The + * @offs1 and @offs2 will be written with the offset of the two registers + * indicated by the instruction. In cases where any of the registers is not + * referenced by the instruction, the value will be set to -EDOM. + * + * Returns: + * + * 0 on success, -EINVAL on error. + */ +static int get_reg_offset_16(struct insn *insn, struct pt_regs *regs, + int *offs1, int *offs2) +{ + /* + * 16-bit addressing can use one or two registers. Specifics of + * encodings are given in Table 2-1. "16-Bit Addressing Forms with the + * ModR/M Byte" of the Intel Software Development Manual. + */ + static const int regoff1[] = { + offsetof(struct pt_regs, bx), + offsetof(struct pt_regs, bx), + offsetof(struct pt_regs, bp), + offsetof(struct pt_regs, bp), + offsetof(struct pt_regs, si), + offsetof(struct pt_regs, di), + offsetof(struct pt_regs, bp), + offsetof(struct pt_regs, bx), + }; + + static const int regoff2[] = { + offsetof(struct pt_regs, si), + offsetof(struct pt_regs, di), + offsetof(struct pt_regs, si), + offsetof(struct pt_regs, di), + -EDOM, + -EDOM, + -EDOM, + -EDOM, + }; + + if (!offs1 || !offs2) + return -EINVAL; + + /* Operand is a register, use the generic function. */ + if (X86_MODRM_MOD(insn->modrm.value) == 3) { + *offs1 = insn_get_modrm_rm_off(insn, regs); + *offs2 = -EDOM; + return 0; + } + + *offs1 = regoff1[X86_MODRM_RM(insn->modrm.value)]; + *offs2 = regoff2[X86_MODRM_RM(insn->modrm.value)]; + + /* + * If ModRM.mod is 0 and ModRM.rm is 110b, then we use displacement- + * only addressing. This means that no registers are involved in + * computing the effective address. Thus, ensure that the first + * register offset is invalild. The second register offset is already + * invalid under the aforementioned conditions. + */ + if ((X86_MODRM_MOD(insn->modrm.value) == 0) && + (X86_MODRM_RM(insn->modrm.value) == 6)) + *offs1 = -EDOM; + + return 0; +} + +/** + * get_desc() - Obtain pointer to a segment descriptor + * @sel: Segment selector + * + * Given a segment selector, obtain a pointer to the segment descriptor. + * Both global and local descriptor tables are supported. + * + * Returns: + * + * Pointer to segment descriptor on success. + * + * NULL on error. + */ +static struct desc_struct *get_desc(unsigned short sel) +{ + struct desc_ptr gdt_desc = {0, 0}; + unsigned long desc_base; + +#ifdef CONFIG_MODIFY_LDT_SYSCALL + if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT) { + struct desc_struct *desc = NULL; + struct ldt_struct *ldt; + + /* Bits [15:3] contain the index of the desired entry. */ + sel >>= 3; + + mutex_lock(¤t->active_mm->context.lock); + ldt = current->active_mm->context.ldt; + if (ldt && sel < ldt->nr_entries) + desc = &ldt->entries[sel]; + + mutex_unlock(¤t->active_mm->context.lock); + + return desc; + } +#endif + native_store_gdt(&gdt_desc); + + /* + * Segment descriptors have a size of 8 bytes. Thus, the index is + * multiplied by 8 to obtain the memory offset of the desired descriptor + * from the base of the GDT. As bits [15:3] of the segment selector + * contain the index, it can be regarded as multiplied by 8 already. + * All that remains is to clear bits [2:0]. + */ + desc_base = sel & ~(SEGMENT_RPL_MASK | SEGMENT_TI_MASK); + + if (desc_base > gdt_desc.size) + return NULL; + + return (struct desc_struct *)(gdt_desc.address + desc_base); +} + +/** + * insn_get_seg_base() - Obtain base address of segment descriptor. + * @regs: Register values as seen when entering kernel mode + * @seg_reg_idx: Index of the segment register pointing to seg descriptor + * + * Obtain the base address of the segment as indicated by the segment descriptor + * pointed by the segment selector. The segment selector is obtained from the + * input segment register index @seg_reg_idx. + * + * Returns: + * + * In protected mode, base address of the segment. Zero in long mode, + * except when FS or GS are used. In virtual-8086 mode, the segment + * selector shifted 4 bits to the right. + * + * -1L in case of error. + */ +unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx) +{ + struct desc_struct *desc; + short sel; + + sel = get_segment_selector(regs, seg_reg_idx); + if (sel < 0) + return -1L; + + if (v8086_mode(regs)) + /* + * Base is simply the segment selector shifted 4 + * bits to the right. + */ + return (unsigned long)(sel << 4); + + if (user_64bit_mode(regs)) { + /* + * Only FS or GS will have a base address, the rest of + * the segments' bases are forced to 0. + */ + unsigned long base; + + if (seg_reg_idx == INAT_SEG_REG_FS) + rdmsrl(MSR_FS_BASE, base); + else if (seg_reg_idx == INAT_SEG_REG_GS) + /* + * swapgs was called at the kernel entry point. Thus, + * MSR_KERNEL_GS_BASE will have the user-space GS base. + */ + rdmsrl(MSR_KERNEL_GS_BASE, base); + else + base = 0; + return base; + } + + /* In protected mode the segment selector cannot be null. */ + if (!sel) + return -1L; + + desc = get_desc(sel); + if (!desc) + return -1L; + + return get_desc_base(desc); +} + +/** + * get_seg_limit() - Obtain the limit of a segment descriptor + * @regs: Register values as seen when entering kernel mode + * @seg_reg_idx: Index of the segment register pointing to seg descriptor + * + * Obtain the limit of the segment as indicated by the segment descriptor + * pointed by the segment selector. The segment selector is obtained from the + * input segment register index @seg_reg_idx. + * + * Returns: + * + * In protected mode, the limit of the segment descriptor in bytes. + * In long mode and virtual-8086 mode, segment limits are not enforced. Thus, + * limit is returned as -1L to imply a limit-less segment. + * + * Zero is returned on error. + */ +static unsigned long get_seg_limit(struct pt_regs *regs, int seg_reg_idx) +{ + struct desc_struct *desc; + unsigned long limit; + short sel; + + sel = get_segment_selector(regs, seg_reg_idx); + if (sel < 0) + return 0; + + if (user_64bit_mode(regs) || v8086_mode(regs)) + return -1L; + + if (!sel) + return 0; + + desc = get_desc(sel); + if (!desc) + return 0; + + /* + * If the granularity bit is set, the limit is given in multiples + * of 4096. This also means that the 12 least significant bits are + * not tested when checking the segment limits. In practice, + * this means that the segment ends in (limit << 12) + 0xfff. + */ + limit = get_desc_limit(desc); + if (desc->g) + limit = (limit << 12) + 0xfff; + + return limit; +} + +/** + * insn_get_code_seg_params() - Obtain code segment parameters + * @regs: Structure with register values as seen when entering kernel mode + * + * Obtain address and operand sizes of the code segment. It is obtained from the + * selector contained in the CS register in regs. In protected mode, the default + * address is determined by inspecting the L and D bits of the segment + * descriptor. In virtual-8086 mode, the default is always two bytes for both + * address and operand sizes. + * + * Returns: + * + * A signed 8-bit value containing the default parameters on success. + * + * -EINVAL on error. + */ +char insn_get_code_seg_params(struct pt_regs *regs) +{ + struct desc_struct *desc; + short sel; + + if (v8086_mode(regs)) + /* Address and operand size are both 16-bit. */ + return INSN_CODE_SEG_PARAMS(2, 2); + + sel = get_segment_selector(regs, INAT_SEG_REG_CS); + if (sel < 0) + return sel; + + desc = get_desc(sel); + if (!desc) + return -EINVAL; + + /* + * The most significant byte of the Type field of the segment descriptor + * determines whether a segment contains data or code. If this is a data + * segment, return error. + */ + if (!(desc->type & BIT(3))) + return -EINVAL; + + switch ((desc->l << 1) | desc->d) { + case 0: /* + * Legacy mode. CS.L=0, CS.D=0. Address and operand size are + * both 16-bit. + */ + return INSN_CODE_SEG_PARAMS(2, 2); + case 1: /* + * Legacy mode. CS.L=0, CS.D=1. Address and operand size are + * both 32-bit. + */ + return INSN_CODE_SEG_PARAMS(4, 4); + case 2: /* + * IA-32e 64-bit mode. CS.L=1, CS.D=0. Address size is 64-bit; + * operand size is 32-bit. + */ + return INSN_CODE_SEG_PARAMS(4, 8); + case 3: /* Invalid setting. CS.L=1, CS.D=1 */ + /* fall through */ + default: + return -EINVAL; + } +} + +/** + * insn_get_modrm_rm_off() - Obtain register in r/m part of the ModRM byte + * @insn: Instruction containing the ModRM byte + * @regs: Register values as seen when entering kernel mode + * + * Returns: + * + * The register indicated by the r/m part of the ModRM byte. The + * register is obtained as an offset from the base of pt_regs. In specific + * cases, the returned value can be -EDOM to indicate that the particular value + * of ModRM does not refer to a register and shall be ignored. + */ +int insn_get_modrm_rm_off(struct insn *insn, struct pt_regs *regs) +{ + return get_reg_offset(insn, regs, REG_TYPE_RM); +} + +/** + * get_seg_base_limit() - obtain base address and limit of a segment + * @insn: Instruction. Must be valid. + * @regs: Register values as seen when entering kernel mode + * @regoff: Operand offset, in pt_regs, used to resolve segment descriptor + * @base: Obtained segment base + * @limit: Obtained segment limit + * + * Obtain the base address and limit of the segment associated with the operand + * @regoff and, if any or allowed, override prefixes in @insn. This function is + * different from insn_get_seg_base() as the latter does not resolve the segment + * associated with the instruction operand. If a limit is not needed (e.g., + * when running in long mode), @limit can be NULL. + * + * Returns: + * + * 0 on success. @base and @limit will contain the base address and of the + * resolved segment, respectively. + * + * -EINVAL on error. + */ +static int get_seg_base_limit(struct insn *insn, struct pt_regs *regs, + int regoff, unsigned long *base, + unsigned long *limit) +{ + int seg_reg_idx; + + if (!base) + return -EINVAL; + + seg_reg_idx = resolve_seg_reg(insn, regs, regoff); + if (seg_reg_idx < 0) + return seg_reg_idx; + + *base = insn_get_seg_base(regs, seg_reg_idx); + if (*base == -1L) + return -EINVAL; + + if (!limit) + return 0; + + *limit = get_seg_limit(regs, seg_reg_idx); + if (!(*limit)) + return -EINVAL; + + return 0; +} + +/** + * get_eff_addr_reg() - Obtain effective address from register operand + * @insn: Instruction. Must be valid. + * @regs: Register values as seen when entering kernel mode + * @regoff: Obtained operand offset, in pt_regs, with the effective address + * @eff_addr: Obtained effective address + * + * Obtain the effective address stored in the register operand as indicated by + * the ModRM byte. This function is to be used only with register addressing + * (i.e., ModRM.mod is 3). The effective address is saved in @eff_addr. The + * register operand, as an offset from the base of pt_regs, is saved in @regoff; + * such offset can then be used to resolve the segment associated with the + * operand. This function can be used with any of the supported address sizes + * in x86. + * + * Returns: + * + * 0 on success. @eff_addr will have the effective address stored in the + * operand indicated by ModRM. @regoff will have such operand as an offset from + * the base of pt_regs. + * + * -EINVAL on error. + */ +static int get_eff_addr_reg(struct insn *insn, struct pt_regs *regs, + int *regoff, long *eff_addr) +{ + insn_get_modrm(insn); + + if (!insn->modrm.nbytes) + return -EINVAL; + + if (X86_MODRM_MOD(insn->modrm.value) != 3) + return -EINVAL; + + *regoff = get_reg_offset(insn, regs, REG_TYPE_RM); + if (*regoff < 0) + return -EINVAL; + + /* Ignore bytes that are outside the address size. */ + if (insn->addr_bytes == 2) + *eff_addr = regs_get_register(regs, *regoff) & 0xffff; + else if (insn->addr_bytes == 4) + *eff_addr = regs_get_register(regs, *regoff) & 0xffffffff; + else /* 64-bit address */ + *eff_addr = regs_get_register(regs, *regoff); + + return 0; +} + +/** + * get_eff_addr_modrm() - Obtain referenced effective address via ModRM + * @insn: Instruction. Must be valid. + * @regs: Register values as seen when entering kernel mode + * @regoff: Obtained operand offset, in pt_regs, associated with segment + * @eff_addr: Obtained effective address + * + * Obtain the effective address referenced by the ModRM byte of @insn. After + * identifying the registers involved in the register-indirect memory reference, + * its value is obtained from the operands in @regs. The computed address is + * stored @eff_addr. Also, the register operand that indicates the associated + * segment is stored in @regoff, this parameter can later be used to determine + * such segment. + * + * Returns: + * + * 0 on success. @eff_addr will have the referenced effective address. @regoff + * will have a register, as an offset from the base of pt_regs, that can be used + * to resolve the associated segment. + * + * -EINVAL on error. + */ +static int get_eff_addr_modrm(struct insn *insn, struct pt_regs *regs, + int *regoff, long *eff_addr) +{ + long tmp; + + if (insn->addr_bytes != 8 && insn->addr_bytes != 4) + return -EINVAL; + + insn_get_modrm(insn); + + if (!insn->modrm.nbytes) + return -EINVAL; + + if (X86_MODRM_MOD(insn->modrm.value) > 2) + return -EINVAL; + + *regoff = get_reg_offset(insn, regs, REG_TYPE_RM); + + /* + * -EDOM means that we must ignore the address_offset. In such a case, + * in 64-bit mode the effective address relative to the rIP of the + * following instruction. + */ + if (*regoff == -EDOM) { + if (user_64bit_mode(regs)) + tmp = regs->ip + insn->length; + else + tmp = 0; + } else if (*regoff < 0) { + return -EINVAL; + } else { + tmp = regs_get_register(regs, *regoff); + } + + if (insn->addr_bytes == 4) { + int addr32 = (int)(tmp & 0xffffffff) + insn->displacement.value; + + *eff_addr = addr32 & 0xffffffff; + } else { + *eff_addr = tmp + insn->displacement.value; + } + + return 0; +} + +/** + * get_eff_addr_modrm_16() - Obtain referenced effective address via ModRM + * @insn: Instruction. Must be valid. + * @regs: Register values as seen when entering kernel mode + * @regoff: Obtained operand offset, in pt_regs, associated with segment + * @eff_addr: Obtained effective address + * + * Obtain the 16-bit effective address referenced by the ModRM byte of @insn. + * After identifying the registers involved in the register-indirect memory + * reference, its value is obtained from the operands in @regs. The computed + * address is stored @eff_addr. Also, the register operand that indicates + * the associated segment is stored in @regoff, this parameter can later be used + * to determine such segment. + * + * Returns: + * + * 0 on success. @eff_addr will have the referenced effective address. @regoff + * will have a register, as an offset from the base of pt_regs, that can be used + * to resolve the associated segment. + * + * -EINVAL on error. + */ +static int get_eff_addr_modrm_16(struct insn *insn, struct pt_regs *regs, + int *regoff, short *eff_addr) +{ + int addr_offset1, addr_offset2, ret; + short addr1 = 0, addr2 = 0, displacement; + + if (insn->addr_bytes != 2) + return -EINVAL; + + insn_get_modrm(insn); + + if (!insn->modrm.nbytes) + return -EINVAL; + + if (X86_MODRM_MOD(insn->modrm.value) > 2) + return -EINVAL; + + ret = get_reg_offset_16(insn, regs, &addr_offset1, &addr_offset2); + if (ret < 0) + return -EINVAL; + + /* + * Don't fail on invalid offset values. They might be invalid because + * they cannot be used for this particular value of ModRM. Instead, use + * them in the computation only if they contain a valid value. + */ + if (addr_offset1 != -EDOM) + addr1 = regs_get_register(regs, addr_offset1) & 0xffff; + + if (addr_offset2 != -EDOM) + addr2 = regs_get_register(regs, addr_offset2) & 0xffff; + + displacement = insn->displacement.value & 0xffff; + *eff_addr = addr1 + addr2 + displacement; + + /* + * The first operand register could indicate to use of either SS or DS + * registers to obtain the segment selector. The second operand + * register can only indicate the use of DS. Thus, the first operand + * will be used to obtain the segment selector. + */ + *regoff = addr_offset1; + + return 0; +} + +/** + * get_eff_addr_sib() - Obtain referenced effective address via SIB + * @insn: Instruction. Must be valid. + * @regs: Register values as seen when entering kernel mode + * @regoff: Obtained operand offset, in pt_regs, associated with segment + * @eff_addr: Obtained effective address + * + * Obtain the effective address referenced by the SIB byte of @insn. After + * identifying the registers involved in the indexed, register-indirect memory + * reference, its value is obtained from the operands in @regs. The computed + * address is stored @eff_addr. Also, the register operand that indicates the + * associated segment is stored in @regoff, this parameter can later be used to + * determine such segment. + * + * Returns: + * + * 0 on success. @eff_addr will have the referenced effective address. + * @base_offset will have a register, as an offset from the base of pt_regs, + * that can be used to resolve the associated segment. + * + * -EINVAL on error. + */ +static int get_eff_addr_sib(struct insn *insn, struct pt_regs *regs, + int *base_offset, long *eff_addr) +{ + long base, indx; + int indx_offset; + + if (insn->addr_bytes != 8 && insn->addr_bytes != 4) + return -EINVAL; + + insn_get_modrm(insn); + + if (!insn->modrm.nbytes) + return -EINVAL; + + if (X86_MODRM_MOD(insn->modrm.value) > 2) + return -EINVAL; + + insn_get_sib(insn); + + if (!insn->sib.nbytes) + return -EINVAL; + + *base_offset = get_reg_offset(insn, regs, REG_TYPE_BASE); + indx_offset = get_reg_offset(insn, regs, REG_TYPE_INDEX); + + /* + * Negative values in the base and index offset means an error when + * decoding the SIB byte. Except -EDOM, which means that the registers + * should not be used in the address computation. + */ + if (*base_offset == -EDOM) + base = 0; + else if (*base_offset < 0) + return -EINVAL; + else + base = regs_get_register(regs, *base_offset); + + if (indx_offset == -EDOM) + indx = 0; + else if (indx_offset < 0) + return -EINVAL; + else + indx = regs_get_register(regs, indx_offset); + + if (insn->addr_bytes == 4) { + int addr32, base32, idx32; + + base32 = base & 0xffffffff; + idx32 = indx & 0xffffffff; + + addr32 = base32 + idx32 * (1 << X86_SIB_SCALE(insn->sib.value)); + addr32 += insn->displacement.value; + + *eff_addr = addr32 & 0xffffffff; + } else { + *eff_addr = base + indx * (1 << X86_SIB_SCALE(insn->sib.value)); + *eff_addr += insn->displacement.value; + } + + return 0; +} + +/** + * get_addr_ref_16() - Obtain the 16-bit address referred by instruction + * @insn: Instruction containing ModRM byte and displacement + * @regs: Register values as seen when entering kernel mode + * + * This function is to be used with 16-bit address encodings. Obtain the memory + * address referred by the instruction's ModRM and displacement bytes. Also, the + * segment used as base is determined by either any segment override prefixes in + * @insn or the default segment of the registers involved in the address + * computation. In protected mode, segment limits are enforced. + * + * Returns: + * + * Linear address referenced by the instruction operands on success. + * + * -1L on error. + */ +static void __user *get_addr_ref_16(struct insn *insn, struct pt_regs *regs) +{ + unsigned long linear_addr = -1L, seg_base, seg_limit; + int ret, regoff; + short eff_addr; + long tmp; + + insn_get_modrm(insn); + insn_get_displacement(insn); + + if (insn->addr_bytes != 2) + goto out; + + if (X86_MODRM_MOD(insn->modrm.value) == 3) { + ret = get_eff_addr_reg(insn, regs, ®off, &tmp); + if (ret) + goto out; + + eff_addr = tmp; + } else { + ret = get_eff_addr_modrm_16(insn, regs, ®off, &eff_addr); + if (ret) + goto out; + } + + ret = get_seg_base_limit(insn, regs, regoff, &seg_base, &seg_limit); + if (ret) + goto out; + + /* + * Before computing the linear address, make sure the effective address + * is within the limits of the segment. In virtual-8086 mode, segment + * limits are not enforced. In such a case, the segment limit is -1L to + * reflect this fact. + */ + if ((unsigned long)(eff_addr & 0xffff) > seg_limit) + goto out; + + linear_addr = (unsigned long)(eff_addr & 0xffff) + seg_base; + + /* Limit linear address to 20 bits */ + if (v8086_mode(regs)) + linear_addr &= 0xfffff; + +out: + return (void __user *)linear_addr; +} + +/** + * get_addr_ref_32() - Obtain a 32-bit linear address + * @insn: Instruction with ModRM, SIB bytes and displacement + * @regs: Register values as seen when entering kernel mode + * + * This function is to be used with 32-bit address encodings to obtain the + * linear memory address referred by the instruction's ModRM, SIB, + * displacement bytes and segment base address, as applicable. If in protected + * mode, segment limits are enforced. + * + * Returns: + * + * Linear address referenced by instruction and registers on success. + * + * -1L on error. + */ +static void __user *get_addr_ref_32(struct insn *insn, struct pt_regs *regs) +{ + unsigned long linear_addr = -1L, seg_base, seg_limit; + int eff_addr, regoff; + long tmp; + int ret; + + if (insn->addr_bytes != 4) + goto out; + + if (X86_MODRM_MOD(insn->modrm.value) == 3) { + ret = get_eff_addr_reg(insn, regs, ®off, &tmp); + if (ret) + goto out; + + eff_addr = tmp; + + } else { + if (insn->sib.nbytes) { + ret = get_eff_addr_sib(insn, regs, ®off, &tmp); + if (ret) + goto out; + + eff_addr = tmp; + } else { + ret = get_eff_addr_modrm(insn, regs, ®off, &tmp); + if (ret) + goto out; + + eff_addr = tmp; + } + } + + ret = get_seg_base_limit(insn, regs, regoff, &seg_base, &seg_limit); + if (ret) + goto out; + + /* + * In protected mode, before computing the linear address, make sure + * the effective address is within the limits of the segment. + * 32-bit addresses can be used in long and virtual-8086 modes if an + * address override prefix is used. In such cases, segment limits are + * not enforced. When in virtual-8086 mode, the segment limit is -1L + * to reflect this situation. + * + * After computed, the effective address is treated as an unsigned + * quantity. + */ + if (!user_64bit_mode(regs) && ((unsigned int)eff_addr > seg_limit)) + goto out; + + /* + * Even though 32-bit address encodings are allowed in virtual-8086 + * mode, the address range is still limited to [0x-0xffff]. + */ + if (v8086_mode(regs) && (eff_addr & ~0xffff)) + goto out; + + /* + * Data type long could be 64 bits in size. Ensure that our 32-bit + * effective address is not sign-extended when computing the linear + * address. + */ + linear_addr = (unsigned long)(eff_addr & 0xffffffff) + seg_base; + + /* Limit linear address to 20 bits */ + if (v8086_mode(regs)) + linear_addr &= 0xfffff; + +out: + return (void __user *)linear_addr; +} + +/** + * get_addr_ref_64() - Obtain a 64-bit linear address + * @insn: Instruction struct with ModRM and SIB bytes and displacement + * @regs: Structure with register values as seen when entering kernel mode + * + * This function is to be used with 64-bit address encodings to obtain the + * linear memory address referred by the instruction's ModRM, SIB, + * displacement bytes and segment base address, as applicable. + * + * Returns: + * + * Linear address referenced by instruction and registers on success. + * + * -1L on error. + */ +#ifndef CONFIG_X86_64 +static void __user *get_addr_ref_64(struct insn *insn, struct pt_regs *regs) +{ + return (void __user *)-1L; +} +#else +static void __user *get_addr_ref_64(struct insn *insn, struct pt_regs *regs) +{ + unsigned long linear_addr = -1L, seg_base; + int regoff, ret; + long eff_addr; + + if (insn->addr_bytes != 8) + goto out; + + if (X86_MODRM_MOD(insn->modrm.value) == 3) { + ret = get_eff_addr_reg(insn, regs, ®off, &eff_addr); + if (ret) + goto out; + + } else { + if (insn->sib.nbytes) { + ret = get_eff_addr_sib(insn, regs, ®off, &eff_addr); + if (ret) + goto out; + } else { + ret = get_eff_addr_modrm(insn, regs, ®off, &eff_addr); + if (ret) + goto out; + } + + } + + ret = get_seg_base_limit(insn, regs, regoff, &seg_base, NULL); + if (ret) + goto out; + + linear_addr = (unsigned long)eff_addr + seg_base; + +out: + return (void __user *)linear_addr; +} +#endif /* CONFIG_X86_64 */ + +/** + * insn_get_addr_ref() - Obtain the linear address referred by instruction + * @insn: Instruction structure containing ModRM byte and displacement + * @regs: Structure with register values as seen when entering kernel mode + * + * Obtain the linear address referred by the instruction's ModRM, SIB and + * displacement bytes, and segment base, as applicable. In protected mode, + * segment limits are enforced. + * + * Returns: + * + * Linear address referenced by instruction and registers on success. + * + * -1L on error. + */ +void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs) +{ + if (!insn || !regs) + return (void __user *)-1L; + + switch (insn->addr_bytes) { + case 2: + return get_addr_ref_16(insn, regs); + case 4: + return get_addr_ref_32(insn, regs); + case 8: + return get_addr_ref_64(insn, regs); + default: + return (void __user *)-1L; + } +} diff --git a/arch/x86/lib/rwsem.S b/arch/x86/lib/rwsem.S index bf2c6074efd2..dc2ab6ea6768 100644 --- a/arch/x86/lib/rwsem.S +++ b/arch/x86/lib/rwsem.S @@ -98,6 +98,18 @@ ENTRY(call_rwsem_down_read_failed) ret ENDPROC(call_rwsem_down_read_failed) +ENTRY(call_rwsem_down_read_failed_killable) + FRAME_BEGIN + save_common_regs + __ASM_SIZE(push,) %__ASM_REG(dx) + movq %rax,%rdi + call rwsem_down_read_failed_killable + __ASM_SIZE(pop,) %__ASM_REG(dx) + restore_common_regs + FRAME_END + ret +ENDPROC(call_rwsem_down_read_failed_killable) + ENTRY(call_rwsem_down_write_failed) FRAME_BEGIN save_common_regs diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index c3521e2be396..3321b446b66c 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -67,12 +67,17 @@ bool ex_handler_refcount(const struct exception_table_entry *fixup, * wrapped around) will be set. Additionally, seeing the refcount * reach 0 will set ZF (Zero Flag: result was zero). In each of * these cases we want a report, since it's a boundary condition. - * + * The SF case is not reported since it indicates post-boundary + * manipulations below zero or above INT_MAX. And if none of the + * flags are set, something has gone very wrong, so report it. */ if (regs->flags & (X86_EFLAGS_OF | X86_EFLAGS_ZF)) { bool zero = regs->flags & X86_EFLAGS_ZF; refcount_error_report(regs, zero ? "hit zero" : "overflow"); + } else if ((regs->flags & X86_EFLAGS_SF) == 0) { + /* Report if none of OF, ZF, nor SF are set. */ + refcount_error_report(regs, "unexpected saturation"); } return true; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index b0ff378650a9..3109ba6c6ede 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -30,26 +30,6 @@ #include <asm/trace/exceptions.h> /* - * Page fault error code bits: - * - * bit 0 == 0: no page found 1: protection fault - * bit 1 == 0: read access 1: write access - * bit 2 == 0: kernel-mode access 1: user-mode access - * bit 3 == 1: use of reserved bit detected - * bit 4 == 1: fault was an instruction fetch - * bit 5 == 1: protection keys block access - */ -enum x86_pf_error_code { - - PF_PROT = 1 << 0, - PF_WRITE = 1 << 1, - PF_USER = 1 << 2, - PF_RSVD = 1 << 3, - PF_INSTR = 1 << 4, - PF_PK = 1 << 5, -}; - -/* * Returns 0 if mmiotrace is disabled, or if the fault is not * handled by mmiotrace: */ @@ -150,7 +130,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) * If it was a exec (instruction fetch) fault on NX page, then * do not ignore the fault: */ - if (error_code & PF_INSTR) + if (error_code & X86_PF_INSTR) return 0; instr = (void *)convert_ip_to_linear(current, regs); @@ -180,7 +160,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) * siginfo so userspace can discover which protection key was set * on the PTE. * - * If we get here, we know that the hardware signaled a PF_PK + * If we get here, we know that the hardware signaled a X86_PF_PK * fault and that there was a VMA once we got in the fault * handler. It does *not* guarantee that the VMA we find here * was the one that we faulted on. @@ -205,7 +185,7 @@ static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey) /* * force_sig_info_fault() is called from a number of * contexts, some of which have a VMA and some of which - * do not. The PF_PK handing happens after we have a + * do not. The X86_PF_PK handing happens after we have a * valid VMA, so we should never reach this without a * valid VMA. */ @@ -698,7 +678,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, if (!oops_may_print()) return; - if (error_code & PF_INSTR) { + if (error_code & X86_PF_INSTR) { unsigned int level; pgd_t *pgd; pte_t *pte; @@ -780,7 +760,7 @@ no_context(struct pt_regs *regs, unsigned long error_code, */ if (current->thread.sig_on_uaccess_err && signal) { tsk->thread.trap_nr = X86_TRAP_PF; - tsk->thread.error_code = error_code | PF_USER; + tsk->thread.error_code = error_code | X86_PF_USER; tsk->thread.cr2 = address; /* XXX: hwpoison faults will set the wrong code. */ @@ -898,7 +878,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, struct task_struct *tsk = current; /* User mode accesses just cause a SIGSEGV */ - if (error_code & PF_USER) { + if (error_code & X86_PF_USER) { /* * It's possible to have interrupts off here: */ @@ -919,7 +899,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, * Instruction fetch faults in the vsyscall page might need * emulation. */ - if (unlikely((error_code & PF_INSTR) && + if (unlikely((error_code & X86_PF_INSTR) && ((address & ~0xfff) == VSYSCALL_ADDR))) { if (emulate_vsyscall(regs, address)) return; @@ -932,7 +912,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, * are always protection faults. */ if (address >= TASK_SIZE_MAX) - error_code |= PF_PROT; + error_code |= X86_PF_PROT; if (likely(show_unhandled_signals)) show_signal_msg(regs, error_code, address, tsk); @@ -993,11 +973,11 @@ static inline bool bad_area_access_from_pkeys(unsigned long error_code, if (!boot_cpu_has(X86_FEATURE_OSPKE)) return false; - if (error_code & PF_PK) + if (error_code & X86_PF_PK) return true; /* this checks permission keys on the VMA: */ - if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE), - (error_code & PF_INSTR), foreign)) + if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE), + (error_code & X86_PF_INSTR), foreign)) return true; return false; } @@ -1025,7 +1005,7 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, int code = BUS_ADRERR; /* Kernel mode? Handle exceptions or die: */ - if (!(error_code & PF_USER)) { + if (!(error_code & X86_PF_USER)) { no_context(regs, error_code, address, SIGBUS, BUS_ADRERR); return; } @@ -1053,14 +1033,14 @@ static noinline void mm_fault_error(struct pt_regs *regs, unsigned long error_code, unsigned long address, u32 *pkey, unsigned int fault) { - if (fatal_signal_pending(current) && !(error_code & PF_USER)) { + if (fatal_signal_pending(current) && !(error_code & X86_PF_USER)) { no_context(regs, error_code, address, 0, 0); return; } if (fault & VM_FAULT_OOM) { /* Kernel mode? Handle exceptions or die: */ - if (!(error_code & PF_USER)) { + if (!(error_code & X86_PF_USER)) { no_context(regs, error_code, address, SIGSEGV, SEGV_MAPERR); return; @@ -1085,16 +1065,16 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, static int spurious_fault_check(unsigned long error_code, pte_t *pte) { - if ((error_code & PF_WRITE) && !pte_write(*pte)) + if ((error_code & X86_PF_WRITE) && !pte_write(*pte)) return 0; - if ((error_code & PF_INSTR) && !pte_exec(*pte)) + if ((error_code & X86_PF_INSTR) && !pte_exec(*pte)) return 0; /* * Note: We do not do lazy flushing on protection key - * changes, so no spurious fault will ever set PF_PK. + * changes, so no spurious fault will ever set X86_PF_PK. */ - if ((error_code & PF_PK)) + if ((error_code & X86_PF_PK)) return 1; return 1; @@ -1140,8 +1120,8 @@ spurious_fault(unsigned long error_code, unsigned long address) * change, so user accesses are not expected to cause spurious * faults. */ - if (error_code != (PF_WRITE | PF_PROT) - && error_code != (PF_INSTR | PF_PROT)) + if (error_code != (X86_PF_WRITE | X86_PF_PROT) && + error_code != (X86_PF_INSTR | X86_PF_PROT)) return 0; pgd = init_mm.pgd + pgd_index(address); @@ -1201,19 +1181,19 @@ access_error(unsigned long error_code, struct vm_area_struct *vma) * always an unconditional error and can never result in * a follow-up action to resolve the fault, like a COW. */ - if (error_code & PF_PK) + if (error_code & X86_PF_PK) return 1; /* * Make sure to check the VMA so that we do not perform - * faults just to hit a PF_PK as soon as we fill in a + * faults just to hit a X86_PF_PK as soon as we fill in a * page. */ - if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE), - (error_code & PF_INSTR), foreign)) + if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE), + (error_code & X86_PF_INSTR), foreign)) return 1; - if (error_code & PF_WRITE) { + if (error_code & X86_PF_WRITE) { /* write, present and write, not present: */ if (unlikely(!(vma->vm_flags & VM_WRITE))) return 1; @@ -1221,7 +1201,7 @@ access_error(unsigned long error_code, struct vm_area_struct *vma) } /* read, present: */ - if (unlikely(error_code & PF_PROT)) + if (unlikely(error_code & X86_PF_PROT)) return 1; /* read, not present: */ @@ -1244,7 +1224,7 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs) if (!static_cpu_has(X86_FEATURE_SMAP)) return false; - if (error_code & PF_USER) + if (error_code & X86_PF_USER) return false; if (!user_mode(regs) && (regs->flags & X86_EFLAGS_AC)) @@ -1297,7 +1277,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, * protection error (error_code & 9) == 0. */ if (unlikely(fault_in_kernel_space(address))) { - if (!(error_code & (PF_RSVD | PF_USER | PF_PROT))) { + if (!(error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) { if (vmalloc_fault(address) >= 0) return; @@ -1325,7 +1305,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, if (unlikely(kprobes_fault(regs))) return; - if (unlikely(error_code & PF_RSVD)) + if (unlikely(error_code & X86_PF_RSVD)) pgtable_bad(regs, error_code, address); if (unlikely(smap_violation(error_code, regs))) { @@ -1351,7 +1331,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, */ if (user_mode(regs)) { local_irq_enable(); - error_code |= PF_USER; + error_code |= X86_PF_USER; flags |= FAULT_FLAG_USER; } else { if (regs->flags & X86_EFLAGS_IF) @@ -1360,9 +1340,9 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); - if (error_code & PF_WRITE) + if (error_code & X86_PF_WRITE) flags |= FAULT_FLAG_WRITE; - if (error_code & PF_INSTR) + if (error_code & X86_PF_INSTR) flags |= FAULT_FLAG_INSTRUCTION; /* @@ -1382,7 +1362,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, * space check, thus avoiding the deadlock: */ if (unlikely(!down_read_trylock(&mm->mmap_sem))) { - if ((error_code & PF_USER) == 0 && + if (!(error_code & X86_PF_USER) && !search_exception_tables(regs->ip)) { bad_area_nosemaphore(regs, error_code, address, NULL); return; @@ -1409,7 +1389,7 @@ retry: bad_area(regs, error_code, address); return; } - if (error_code & PF_USER) { + if (error_code & X86_PF_USER) { /* * Accessing the stack below %sp is always a bug. * The large cushion allows instructions like enter diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index af5c1ed21d43..a22c2b95e513 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -671,7 +671,7 @@ void __init init_mem_mapping(void) load_cr3(swapper_pg_dir); __flush_tlb_all(); - hypervisor_init_mem_mapping(); + x86_init.hyper.init_mem_mapping(); early_memtest(0, max_pfn_mapped << PAGE_SHIFT); } diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 048fbe8fc274..adcea90a2046 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1426,16 +1426,16 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HAVE_BOOTMEM_INFO_NODE) void register_page_bootmem_memmap(unsigned long section_nr, - struct page *start_page, unsigned long size) + struct page *start_page, unsigned long nr_pages) { unsigned long addr = (unsigned long)start_page; - unsigned long end = (unsigned long)(start_page + size); + unsigned long end = (unsigned long)(start_page + nr_pages); unsigned long next; pgd_t *pgd; p4d_t *p4d; pud_t *pud; pmd_t *pmd; - unsigned int nr_pages; + unsigned int nr_pmd_pages; struct page *page; for (; addr < end; addr = next) { @@ -1482,9 +1482,9 @@ void register_page_bootmem_memmap(unsigned long section_nr, if (pmd_none(*pmd)) continue; - nr_pages = 1 << (get_order(PMD_SIZE)); + nr_pmd_pages = 1 << get_order(PMD_SIZE); page = pmd_page(*pmd); - while (nr_pages--) + while (nr_pmd_pages--) get_page_bootmem(section_nr, page++, SECTION_INFO); } diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 34f0e1847dd6..6e4573b1da34 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -27,6 +27,11 @@ #include "physaddr.h" +struct ioremap_mem_flags { + bool system_ram; + bool desc_other; +}; + /* * Fix up the linear direct mapping of the kernel to avoid cache attribute * conflicts. @@ -56,17 +61,59 @@ int ioremap_change_attr(unsigned long vaddr, unsigned long size, return err; } -static int __ioremap_check_ram(unsigned long start_pfn, unsigned long nr_pages, - void *arg) +static bool __ioremap_check_ram(struct resource *res) { + unsigned long start_pfn, stop_pfn; unsigned long i; - for (i = 0; i < nr_pages; ++i) - if (pfn_valid(start_pfn + i) && - !PageReserved(pfn_to_page(start_pfn + i))) - return 1; + if ((res->flags & IORESOURCE_SYSTEM_RAM) != IORESOURCE_SYSTEM_RAM) + return false; - return 0; + start_pfn = (res->start + PAGE_SIZE - 1) >> PAGE_SHIFT; + stop_pfn = (res->end + 1) >> PAGE_SHIFT; + if (stop_pfn > start_pfn) { + for (i = 0; i < (stop_pfn - start_pfn); ++i) + if (pfn_valid(start_pfn + i) && + !PageReserved(pfn_to_page(start_pfn + i))) + return true; + } + + return false; +} + +static int __ioremap_check_desc_other(struct resource *res) +{ + return (res->desc != IORES_DESC_NONE); +} + +static int __ioremap_res_check(struct resource *res, void *arg) +{ + struct ioremap_mem_flags *flags = arg; + + if (!flags->system_ram) + flags->system_ram = __ioremap_check_ram(res); + + if (!flags->desc_other) + flags->desc_other = __ioremap_check_desc_other(res); + + return flags->system_ram && flags->desc_other; +} + +/* + * To avoid multiple resource walks, this function walks resources marked as + * IORESOURCE_MEM and IORESOURCE_BUSY and looking for system RAM and/or a + * resource described not as IORES_DESC_NONE (e.g. IORES_DESC_ACPI_TABLES). + */ +static void __ioremap_check_mem(resource_size_t addr, unsigned long size, + struct ioremap_mem_flags *flags) +{ + u64 start, end; + + start = (u64)addr; + end = start + size - 1; + memset(flags, 0, sizeof(*flags)); + + walk_mem_res(start, end, flags, __ioremap_res_check); } /* @@ -87,9 +134,10 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, unsigned long size, enum page_cache_mode pcm, void *caller) { unsigned long offset, vaddr; - resource_size_t pfn, last_pfn, last_addr; + resource_size_t last_addr; const resource_size_t unaligned_phys_addr = phys_addr; const unsigned long unaligned_size = size; + struct ioremap_mem_flags mem_flags; struct vm_struct *area; enum page_cache_mode new_pcm; pgprot_t prot; @@ -108,13 +156,12 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, return NULL; } + __ioremap_check_mem(phys_addr, size, &mem_flags); + /* * Don't allow anybody to remap normal RAM that we're using.. */ - pfn = phys_addr >> PAGE_SHIFT; - last_pfn = last_addr >> PAGE_SHIFT; - if (walk_system_ram_range(pfn, last_pfn - pfn + 1, NULL, - __ioremap_check_ram) == 1) { + if (mem_flags.system_ram) { WARN_ONCE(1, "ioremap on RAM at %pa - %pa\n", &phys_addr, &last_addr); return NULL; @@ -146,7 +193,15 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, pcm = new_pcm; } + /* + * If the page being mapped is in memory and SEV is active then + * make sure the memory encryption attribute is enabled in the + * resulting mapping. + */ prot = PAGE_KERNEL_IO; + if (sev_active() && mem_flags.desc_other) + prot = pgprot_encrypted(prot); + switch (pcm) { case _PAGE_CACHE_MODE_UC: default: @@ -422,6 +477,9 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr) * areas should be mapped decrypted. And since the encryption key can * change across reboots, persistent memory should also be mapped * decrypted. + * + * If SEV is active, that implies that BIOS/UEFI also ran encrypted so + * only persistent memory should be mapped decrypted. */ static bool memremap_should_map_decrypted(resource_size_t phys_addr, unsigned long size) @@ -458,6 +516,11 @@ static bool memremap_should_map_decrypted(resource_size_t phys_addr, case E820_TYPE_ACPI: case E820_TYPE_NVS: case E820_TYPE_UNUSABLE: + /* For SEV, these areas are encrypted */ + if (sev_active()) + break; + /* Fallthrough */ + case E820_TYPE_PRAM: return true; default: @@ -581,7 +644,7 @@ static bool __init early_memremap_is_setup_data(resource_size_t phys_addr, bool arch_memremap_can_ram_remap(resource_size_t phys_addr, unsigned long size, unsigned long flags) { - if (!sme_active()) + if (!mem_encrypt_active()) return true; if (flags & MEMREMAP_ENC) @@ -590,12 +653,13 @@ bool arch_memremap_can_ram_remap(resource_size_t phys_addr, unsigned long size, if (flags & MEMREMAP_DEC) return false; - if (memremap_is_setup_data(phys_addr, size) || - memremap_is_efi_data(phys_addr, size) || - memremap_should_map_decrypted(phys_addr, size)) - return false; + if (sme_active()) { + if (memremap_is_setup_data(phys_addr, size) || + memremap_is_efi_data(phys_addr, size)) + return false; + } - return true; + return !memremap_should_map_decrypted(phys_addr, size); } /* @@ -608,17 +672,24 @@ pgprot_t __init early_memremap_pgprot_adjust(resource_size_t phys_addr, unsigned long size, pgprot_t prot) { - if (!sme_active()) + bool encrypted_prot; + + if (!mem_encrypt_active()) return prot; - if (early_memremap_is_setup_data(phys_addr, size) || - memremap_is_efi_data(phys_addr, size) || - memremap_should_map_decrypted(phys_addr, size)) - prot = pgprot_decrypted(prot); - else - prot = pgprot_encrypted(prot); + encrypted_prot = true; + + if (sme_active()) { + if (early_memremap_is_setup_data(phys_addr, size) || + memremap_is_efi_data(phys_addr, size)) + encrypted_prot = false; + } + + if (encrypted_prot && memremap_should_map_decrypted(phys_addr, size)) + encrypted_prot = false; - return prot; + return encrypted_prot ? pgprot_encrypted(prot) + : pgprot_decrypted(prot); } bool phys_mem_access_encrypted(unsigned long phys_addr, unsigned long size) diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 8f5be3eb40dd..2b60dc6e64b1 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -16,6 +16,8 @@ extern struct range pfn_mapped[E820_MAX_ENTRIES]; +static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); + static int __init map_range(struct range *range) { unsigned long start; @@ -31,8 +33,10 @@ static void __init clear_pgds(unsigned long start, unsigned long end) { pgd_t *pgd; + /* See comment in kasan_init() */ + unsigned long pgd_end = end & PGDIR_MASK; - for (; start < end; start += PGDIR_SIZE) { + for (; start < pgd_end; start += PGDIR_SIZE) { pgd = pgd_offset_k(start); /* * With folded p4d, pgd_clear() is nop, use p4d_clear() @@ -43,29 +47,61 @@ static void __init clear_pgds(unsigned long start, else pgd_clear(pgd); } + + pgd = pgd_offset_k(start); + for (; start < end; start += P4D_SIZE) + p4d_clear(p4d_offset(pgd, start)); +} + +static inline p4d_t *early_p4d_offset(pgd_t *pgd, unsigned long addr) +{ + unsigned long p4d; + + if (!IS_ENABLED(CONFIG_X86_5LEVEL)) + return (p4d_t *)pgd; + + p4d = __pa_nodebug(pgd_val(*pgd)) & PTE_PFN_MASK; + p4d += __START_KERNEL_map - phys_base; + return (p4d_t *)p4d + p4d_index(addr); +} + +static void __init kasan_early_p4d_populate(pgd_t *pgd, + unsigned long addr, + unsigned long end) +{ + pgd_t pgd_entry; + p4d_t *p4d, p4d_entry; + unsigned long next; + + if (pgd_none(*pgd)) { + pgd_entry = __pgd(_KERNPG_TABLE | __pa_nodebug(kasan_zero_p4d)); + set_pgd(pgd, pgd_entry); + } + + p4d = early_p4d_offset(pgd, addr); + do { + next = p4d_addr_end(addr, end); + + if (!p4d_none(*p4d)) + continue; + + p4d_entry = __p4d(_KERNPG_TABLE | __pa_nodebug(kasan_zero_pud)); + set_p4d(p4d, p4d_entry); + } while (p4d++, addr = next, addr != end && p4d_none(*p4d)); } static void __init kasan_map_early_shadow(pgd_t *pgd) { - int i; - unsigned long start = KASAN_SHADOW_START; + /* See comment in kasan_init() */ + unsigned long addr = KASAN_SHADOW_START & PGDIR_MASK; unsigned long end = KASAN_SHADOW_END; + unsigned long next; - for (i = pgd_index(start); start < end; i++) { - switch (CONFIG_PGTABLE_LEVELS) { - case 4: - pgd[i] = __pgd(__pa_nodebug(kasan_zero_pud) | - _KERNPG_TABLE); - break; - case 5: - pgd[i] = __pgd(__pa_nodebug(kasan_zero_p4d) | - _KERNPG_TABLE); - break; - default: - BUILD_BUG(); - } - start += PGDIR_SIZE; - } + pgd += pgd_index(addr); + do { + next = pgd_addr_end(addr, end); + kasan_early_p4d_populate(pgd, addr, next); + } while (pgd++, addr = next, addr != end); } #ifdef CONFIG_KASAN_INLINE @@ -102,7 +138,7 @@ void __init kasan_early_init(void) for (i = 0; i < PTRS_PER_PUD; i++) kasan_zero_pud[i] = __pud(pud_val); - for (i = 0; CONFIG_PGTABLE_LEVELS >= 5 && i < PTRS_PER_P4D; i++) + for (i = 0; IS_ENABLED(CONFIG_X86_5LEVEL) && i < PTRS_PER_P4D; i++) kasan_zero_p4d[i] = __p4d(p4d_val); kasan_map_early_shadow(early_top_pgt); @@ -118,12 +154,35 @@ void __init kasan_init(void) #endif memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt)); + + /* + * We use the same shadow offset for 4- and 5-level paging to + * facilitate boot-time switching between paging modes. + * As result in 5-level paging mode KASAN_SHADOW_START and + * KASAN_SHADOW_END are not aligned to PGD boundary. + * + * KASAN_SHADOW_START doesn't share PGD with anything else. + * We claim whole PGD entry to make things easier. + * + * KASAN_SHADOW_END lands in the last PGD entry and it collides with + * bunch of things like kernel code, modules, EFI mapping, etc. + * We need to take extra steps to not overwrite them. + */ + if (IS_ENABLED(CONFIG_X86_5LEVEL)) { + void *ptr; + + ptr = (void *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END)); + memcpy(tmp_p4d_table, (void *)ptr, sizeof(tmp_p4d_table)); + set_pgd(&early_top_pgt[pgd_index(KASAN_SHADOW_END)], + __pgd(__pa(tmp_p4d_table) | _KERNPG_TABLE)); + } + load_cr3(early_top_pgt); __flush_tlb_all(); - clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); + clear_pgds(KASAN_SHADOW_START & PGDIR_MASK, KASAN_SHADOW_END); - kasan_populate_zero_shadow((void *)KASAN_SHADOW_START, + kasan_populate_zero_shadow((void *)(KASAN_SHADOW_START & PGDIR_MASK), kasan_mem_to_shadow((void *)PAGE_OFFSET)); for (i = 0; i < E820_MAX_ENTRIES; i++) { diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index 16c5f37933a2..d9a9e9fc75dd 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -30,6 +30,8 @@ #include <asm/msr.h> #include <asm/cmdline.h> +#include "mm_internal.h" + static char sme_cmdline_arg[] __initdata = "mem_encrypt"; static char sme_cmdline_on[] __initdata = "on"; static char sme_cmdline_off[] __initdata = "off"; @@ -40,7 +42,11 @@ static char sme_cmdline_off[] __initdata = "off"; * section is later cleared. */ u64 sme_me_mask __section(.data) = 0; -EXPORT_SYMBOL_GPL(sme_me_mask); +EXPORT_SYMBOL(sme_me_mask); +DEFINE_STATIC_KEY_FALSE(sev_enable_key); +EXPORT_SYMBOL_GPL(sev_enable_key); + +static bool sev_enabled __section(.data); /* Buffer used for early in-place encryption by BSP, no locking needed */ static char sme_early_buffer[PAGE_SIZE] __aligned(PAGE_SIZE); @@ -63,7 +69,6 @@ static void __init __sme_early_enc_dec(resource_size_t paddr, if (!sme_me_mask) return; - local_flush_tlb(); wbinvd(); /* @@ -190,8 +195,238 @@ void __init sme_early_init(void) /* Update the protection map with memory encryption mask */ for (i = 0; i < ARRAY_SIZE(protection_map); i++) protection_map[i] = pgprot_encrypted(protection_map[i]); + + if (sev_active()) + swiotlb_force = SWIOTLB_FORCE; +} + +static void *sev_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t gfp, unsigned long attrs) +{ + unsigned long dma_mask; + unsigned int order; + struct page *page; + void *vaddr = NULL; + + dma_mask = dma_alloc_coherent_mask(dev, gfp); + order = get_order(size); + + /* + * Memory will be memset to zero after marking decrypted, so don't + * bother clearing it before. + */ + gfp &= ~__GFP_ZERO; + + page = alloc_pages_node(dev_to_node(dev), gfp, order); + if (page) { + dma_addr_t addr; + + /* + * Since we will be clearing the encryption bit, check the + * mask with it already cleared. + */ + addr = __sme_clr(phys_to_dma(dev, page_to_phys(page))); + if ((addr + size) > dma_mask) { + __free_pages(page, get_order(size)); + } else { + vaddr = page_address(page); + *dma_handle = addr; + } + } + + if (!vaddr) + vaddr = swiotlb_alloc_coherent(dev, size, dma_handle, gfp); + + if (!vaddr) + return NULL; + + /* Clear the SME encryption bit for DMA use if not swiotlb area */ + if (!is_swiotlb_buffer(dma_to_phys(dev, *dma_handle))) { + set_memory_decrypted((unsigned long)vaddr, 1 << order); + memset(vaddr, 0, PAGE_SIZE << order); + *dma_handle = __sme_clr(*dma_handle); + } + + return vaddr; } +static void sev_free(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle, unsigned long attrs) +{ + /* Set the SME encryption bit for re-use if not swiotlb area */ + if (!is_swiotlb_buffer(dma_to_phys(dev, dma_handle))) + set_memory_encrypted((unsigned long)vaddr, + 1 << get_order(size)); + + swiotlb_free_coherent(dev, size, vaddr, dma_handle); +} + +static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc) +{ + pgprot_t old_prot, new_prot; + unsigned long pfn, pa, size; + pte_t new_pte; + + switch (level) { + case PG_LEVEL_4K: + pfn = pte_pfn(*kpte); + old_prot = pte_pgprot(*kpte); + break; + case PG_LEVEL_2M: + pfn = pmd_pfn(*(pmd_t *)kpte); + old_prot = pmd_pgprot(*(pmd_t *)kpte); + break; + case PG_LEVEL_1G: + pfn = pud_pfn(*(pud_t *)kpte); + old_prot = pud_pgprot(*(pud_t *)kpte); + break; + default: + return; + } + + new_prot = old_prot; + if (enc) + pgprot_val(new_prot) |= _PAGE_ENC; + else + pgprot_val(new_prot) &= ~_PAGE_ENC; + + /* If prot is same then do nothing. */ + if (pgprot_val(old_prot) == pgprot_val(new_prot)) + return; + + pa = pfn << page_level_shift(level); + size = page_level_size(level); + + /* + * We are going to perform in-place en-/decryption and change the + * physical page attribute from C=1 to C=0 or vice versa. Flush the + * caches to ensure that data gets accessed with the correct C-bit. + */ + clflush_cache_range(__va(pa), size); + + /* Encrypt/decrypt the contents in-place */ + if (enc) + sme_early_encrypt(pa, size); + else + sme_early_decrypt(pa, size); + + /* Change the page encryption mask. */ + new_pte = pfn_pte(pfn, new_prot); + set_pte_atomic(kpte, new_pte); +} + +static int __init early_set_memory_enc_dec(unsigned long vaddr, + unsigned long size, bool enc) +{ + unsigned long vaddr_end, vaddr_next; + unsigned long psize, pmask; + int split_page_size_mask; + int level, ret; + pte_t *kpte; + + vaddr_next = vaddr; + vaddr_end = vaddr + size; + + for (; vaddr < vaddr_end; vaddr = vaddr_next) { + kpte = lookup_address(vaddr, &level); + if (!kpte || pte_none(*kpte)) { + ret = 1; + goto out; + } + + if (level == PG_LEVEL_4K) { + __set_clr_pte_enc(kpte, level, enc); + vaddr_next = (vaddr & PAGE_MASK) + PAGE_SIZE; + continue; + } + + psize = page_level_size(level); + pmask = page_level_mask(level); + + /* + * Check whether we can change the large page in one go. + * We request a split when the address is not aligned and + * the number of pages to set/clear encryption bit is smaller + * than the number of pages in the large page. + */ + if (vaddr == (vaddr & pmask) && + ((vaddr_end - vaddr) >= psize)) { + __set_clr_pte_enc(kpte, level, enc); + vaddr_next = (vaddr & pmask) + psize; + continue; + } + + /* + * The virtual address is part of a larger page, create the next + * level page table mapping (4K or 2M). If it is part of a 2M + * page then we request a split of the large page into 4K + * chunks. A 1GB large page is split into 2M pages, resp. + */ + if (level == PG_LEVEL_2M) + split_page_size_mask = 0; + else + split_page_size_mask = 1 << PG_LEVEL_2M; + + kernel_physical_mapping_init(__pa(vaddr & pmask), + __pa((vaddr_end & pmask) + psize), + split_page_size_mask); + } + + ret = 0; + +out: + __flush_tlb_all(); + return ret; +} + +int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size) +{ + return early_set_memory_enc_dec(vaddr, size, false); +} + +int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size) +{ + return early_set_memory_enc_dec(vaddr, size, true); +} + +/* + * SME and SEV are very similar but they are not the same, so there are + * times that the kernel will need to distinguish between SME and SEV. The + * sme_active() and sev_active() functions are used for this. When a + * distinction isn't needed, the mem_encrypt_active() function can be used. + * + * The trampoline code is a good example for this requirement. Before + * paging is activated, SME will access all memory as decrypted, but SEV + * will access all memory as encrypted. So, when APs are being brought + * up under SME the trampoline area cannot be encrypted, whereas under SEV + * the trampoline area must be encrypted. + */ +bool sme_active(void) +{ + return sme_me_mask && !sev_enabled; +} +EXPORT_SYMBOL_GPL(sme_active); + +bool sev_active(void) +{ + return sme_me_mask && sev_enabled; +} +EXPORT_SYMBOL_GPL(sev_active); + +static const struct dma_map_ops sev_dma_ops = { + .alloc = sev_alloc, + .free = sev_free, + .map_page = swiotlb_map_page, + .unmap_page = swiotlb_unmap_page, + .map_sg = swiotlb_map_sg_attrs, + .unmap_sg = swiotlb_unmap_sg_attrs, + .sync_single_for_cpu = swiotlb_sync_single_for_cpu, + .sync_single_for_device = swiotlb_sync_single_for_device, + .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, + .sync_sg_for_device = swiotlb_sync_sg_for_device, + .mapping_error = swiotlb_dma_mapping_error, +}; + /* Architecture __weak replacement functions */ void __init mem_encrypt_init(void) { @@ -201,7 +436,23 @@ void __init mem_encrypt_init(void) /* Call into SWIOTLB to update the SWIOTLB DMA buffers */ swiotlb_update_mem_attributes(); - pr_info("AMD Secure Memory Encryption (SME) active\n"); + /* + * With SEV, DMA operations cannot use encryption. New DMA ops + * are required in order to mark the DMA areas as decrypted or + * to use bounce buffers. + */ + if (sev_active()) + dma_ops = &sev_dma_ops; + + /* + * With SEV, we need to unroll the rep string I/O instructions. + */ + if (sev_active()) + static_branch_enable(&sev_enable_key); + + pr_info("AMD %s active\n", + sev_active() ? "Secure Encrypted Virtualization (SEV)" + : "Secure Memory Encryption (SME)"); } void swiotlb_set_mem_attributes(void *vaddr, unsigned long size) @@ -529,37 +780,63 @@ void __init __nostackprotector sme_enable(struct boot_params *bp) { const char *cmdline_ptr, *cmdline_arg, *cmdline_on, *cmdline_off; unsigned int eax, ebx, ecx, edx; + unsigned long feature_mask; bool active_by_default; unsigned long me_mask; char buffer[16]; u64 msr; - /* Check for the SME support leaf */ + /* Check for the SME/SEV support leaf */ eax = 0x80000000; ecx = 0; native_cpuid(&eax, &ebx, &ecx, &edx); if (eax < 0x8000001f) return; +#define AMD_SME_BIT BIT(0) +#define AMD_SEV_BIT BIT(1) /* - * Check for the SME feature: - * CPUID Fn8000_001F[EAX] - Bit 0 - * Secure Memory Encryption support - * CPUID Fn8000_001F[EBX] - Bits 5:0 - * Pagetable bit position used to indicate encryption + * Set the feature mask (SME or SEV) based on whether we are + * running under a hypervisor. + */ + eax = 1; + ecx = 0; + native_cpuid(&eax, &ebx, &ecx, &edx); + feature_mask = (ecx & BIT(31)) ? AMD_SEV_BIT : AMD_SME_BIT; + + /* + * Check for the SME/SEV feature: + * CPUID Fn8000_001F[EAX] + * - Bit 0 - Secure Memory Encryption support + * - Bit 1 - Secure Encrypted Virtualization support + * CPUID Fn8000_001F[EBX] + * - Bits 5:0 - Pagetable bit position used to indicate encryption */ eax = 0x8000001f; ecx = 0; native_cpuid(&eax, &ebx, &ecx, &edx); - if (!(eax & 1)) + if (!(eax & feature_mask)) return; me_mask = 1UL << (ebx & 0x3f); - /* Check if SME is enabled */ - msr = __rdmsr(MSR_K8_SYSCFG); - if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT)) + /* Check if memory encryption is enabled */ + if (feature_mask == AMD_SME_BIT) { + /* For SME, check the SYSCFG MSR */ + msr = __rdmsr(MSR_K8_SYSCFG); + if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT)) + return; + } else { + /* For SEV, check the SEV MSR */ + msr = __rdmsr(MSR_AMD64_SEV); + if (!(msr & MSR_AMD64_SEV_ENABLED)) + return; + + /* SEV state cannot be controlled by a command line option */ + sme_me_mask = me_mask; + sev_enabled = true; return; + } /* * Fixups have not been applied to phys_base yet and we're running diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index 7eb06701a935..e500949bae24 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -13,6 +13,7 @@ #include <linux/sched/sysctl.h> #include <asm/insn.h> +#include <asm/insn-eval.h> #include <asm/mman.h> #include <asm/mmu_context.h> #include <asm/mpx.h> @@ -61,123 +62,6 @@ static unsigned long mpx_mmap(unsigned long len) return addr; } -enum reg_type { - REG_TYPE_RM = 0, - REG_TYPE_INDEX, - REG_TYPE_BASE, -}; - -static int get_reg_offset(struct insn *insn, struct pt_regs *regs, - enum reg_type type) -{ - int regno = 0; - - static const int regoff[] = { - offsetof(struct pt_regs, ax), - offsetof(struct pt_regs, cx), - offsetof(struct pt_regs, dx), - offsetof(struct pt_regs, bx), - offsetof(struct pt_regs, sp), - offsetof(struct pt_regs, bp), - offsetof(struct pt_regs, si), - offsetof(struct pt_regs, di), -#ifdef CONFIG_X86_64 - offsetof(struct pt_regs, r8), - offsetof(struct pt_regs, r9), - offsetof(struct pt_regs, r10), - offsetof(struct pt_regs, r11), - offsetof(struct pt_regs, r12), - offsetof(struct pt_regs, r13), - offsetof(struct pt_regs, r14), - offsetof(struct pt_regs, r15), -#endif - }; - int nr_registers = ARRAY_SIZE(regoff); - /* - * Don't possibly decode a 32-bit instructions as - * reading a 64-bit-only register. - */ - if (IS_ENABLED(CONFIG_X86_64) && !insn->x86_64) - nr_registers -= 8; - - switch (type) { - case REG_TYPE_RM: - regno = X86_MODRM_RM(insn->modrm.value); - if (X86_REX_B(insn->rex_prefix.value)) - regno += 8; - break; - - case REG_TYPE_INDEX: - regno = X86_SIB_INDEX(insn->sib.value); - if (X86_REX_X(insn->rex_prefix.value)) - regno += 8; - break; - - case REG_TYPE_BASE: - regno = X86_SIB_BASE(insn->sib.value); - if (X86_REX_B(insn->rex_prefix.value)) - regno += 8; - break; - - default: - pr_err("invalid register type"); - BUG(); - break; - } - - if (regno >= nr_registers) { - WARN_ONCE(1, "decoded an instruction with an invalid register"); - return -EINVAL; - } - return regoff[regno]; -} - -/* - * return the address being referenced be instruction - * for rm=3 returning the content of the rm reg - * for rm!=3 calculates the address using SIB and Disp - */ -static void __user *mpx_get_addr_ref(struct insn *insn, struct pt_regs *regs) -{ - unsigned long addr, base, indx; - int addr_offset, base_offset, indx_offset; - insn_byte_t sib; - - insn_get_modrm(insn); - insn_get_sib(insn); - sib = insn->sib.value; - - if (X86_MODRM_MOD(insn->modrm.value) == 3) { - addr_offset = get_reg_offset(insn, regs, REG_TYPE_RM); - if (addr_offset < 0) - goto out_err; - addr = regs_get_register(regs, addr_offset); - } else { - if (insn->sib.nbytes) { - base_offset = get_reg_offset(insn, regs, REG_TYPE_BASE); - if (base_offset < 0) - goto out_err; - - indx_offset = get_reg_offset(insn, regs, REG_TYPE_INDEX); - if (indx_offset < 0) - goto out_err; - - base = regs_get_register(regs, base_offset); - indx = regs_get_register(regs, indx_offset); - addr = base + indx * (1 << X86_SIB_SCALE(sib)); - } else { - addr_offset = get_reg_offset(insn, regs, REG_TYPE_RM); - if (addr_offset < 0) - goto out_err; - addr = regs_get_register(regs, addr_offset); - } - addr += insn->displacement.value; - } - return (void __user *)addr; -out_err: - return (void __user *)-1; -} - static int mpx_insn_decode(struct insn *insn, struct pt_regs *regs) { @@ -290,7 +174,7 @@ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs) info->si_signo = SIGSEGV; info->si_errno = 0; info->si_code = SEGV_BNDERR; - info->si_addr = mpx_get_addr_ref(&insn, regs); + info->si_addr = insn_get_addr_ref(&insn, regs); /* * We were not able to extract an address from the instruction, * probably because there was something invalid in it. diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index dfb7d657cf43..3fe68483463c 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1781,8 +1781,8 @@ static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc) unsigned long start; int ret; - /* Nothing to do if the SME is not active */ - if (!sme_active()) + /* Nothing to do if memory encryption is not active */ + if (!mem_encrypt_active()) return 0; /* Should not be working on unaligned addresses */ diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 350f7096baac..7913b6921959 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -212,8 +212,8 @@ static void arch_perfmon_setup_counters(void) eax.full = cpuid_eax(0xa); /* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */ - if (eax.split.version_id == 0 && __this_cpu_read(cpu_info.x86) == 6 && - __this_cpu_read(cpu_info.x86_model) == 15) { + if (eax.split.version_id == 0 && boot_cpu_data.x86 == 6 && + boot_cpu_data.x86_model == 15) { eax.split.version_id = 2; eax.split.num_counters = 2; eax.split.bit_width = 40; diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 20fb31579b69..9e4ee5b04b2d 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -33,6 +33,7 @@ #include <linux/reboot.h> #include <linux/slab.h> #include <linux/ucs2_string.h> +#include <linux/mem_encrypt.h> #include <asm/setup.h> #include <asm/page.h> @@ -370,7 +371,11 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) * as trim_bios_range() will reserve the first page and isolate it away * from memory allocators anyway. */ - if (kernel_map_pages_in_pgd(pgd, 0x0, 0x0, 1, _PAGE_RW)) { + pf = _PAGE_RW; + if (sev_active()) + pf |= _PAGE_ENC; + + if (kernel_map_pages_in_pgd(pgd, 0x0, 0x0, 1, pf)) { pr_err("Failed to create 1:1 mapping for the first page!\n"); return 1; } @@ -413,6 +418,9 @@ static void __init __map_region(efi_memory_desc_t *md, u64 va) if (!(md->attribute & EFI_MEMORY_WB)) flags |= _PAGE_PCD; + if (sev_active()) + flags |= _PAGE_ENC; + pfn = md->phys_addr >> PAGE_SHIFT; if (kernel_map_pages_in_pgd(pgd, pfn, va, md->num_pages, flags)) pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n", @@ -539,6 +547,9 @@ static int __init efi_update_mem_attr(struct mm_struct *mm, efi_memory_desc_t *m if (!(md->attribute & EFI_MEMORY_RO)) pf |= _PAGE_RW; + if (sev_active()) + pf |= _PAGE_ENC; + return efi_update_mappings(md, pf); } @@ -590,6 +601,9 @@ void __init efi_runtime_update_mappings(void) (md->type != EFI_RUNTIME_SERVICES_CODE)) pf |= _PAGE_RW; + if (sev_active()) + pf |= _PAGE_ENC; + efi_update_mappings(md, pf); } } diff --git a/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c b/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c index 74283875c7e8..e639e3116acf 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c @@ -62,10 +62,9 @@ static struct platform_device pb_device = { static int __init pb_keys_init(void) { struct gpio_keys_button *gb = gpio_button; - int i, num, good = 0; + int i, good = 0; - num = sizeof(gpio_button) / sizeof(struct gpio_keys_button); - for (i = 0; i < num; i++) { + for (i = 0; i < ARRAY_SIZE(gpio_button); i++) { gb[i].gpio = get_gpio_by_name(gb[i].desc); pr_debug("info[%2d]: name = %s, gpio = %d\n", i, gb[i].desc, gb[i].gpio); diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c index 03fc397335b7..5f6fd860820a 100644 --- a/arch/x86/platform/uv/uv_irq.c +++ b/arch/x86/platform/uv/uv_irq.c @@ -127,10 +127,11 @@ static void uv_domain_free(struct irq_domain *domain, unsigned int virq, * Re-target the irq to the specified CPU and enable the specified MMR located * on the specified blade to allow the sending of MSIs to the specified CPU. */ -static void uv_domain_activate(struct irq_domain *domain, - struct irq_data *irq_data) +static int uv_domain_activate(struct irq_domain *domain, + struct irq_data *irq_data, bool early) { uv_program_mmr(irqd_cfg(irq_data), irq_data->chip_data); + return 0; } /* diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c index ed84d3917a59..d10105825d57 100644 --- a/arch/x86/realmode/init.c +++ b/arch/x86/realmode/init.c @@ -64,9 +64,10 @@ static void __init setup_real_mode(void) /* * If SME is active, the trampoline area will need to be in * decrypted memory in order to bring up other processors - * successfully. + * successfully. This is not needed for SEV. */ - set_memory_decrypted((unsigned long)base, size >> PAGE_SHIFT); + if (sme_active()) + set_memory_decrypted((unsigned long)base, size >> PAGE_SHIFT); memcpy(base, real_mode_blob, size); diff --git a/arch/x86/um/ldt.c b/arch/x86/um/ldt.c index 836a1eb5df43..3ee234b6234d 100644 --- a/arch/x86/um/ldt.c +++ b/arch/x86/um/ldt.c @@ -6,6 +6,7 @@ #include <linux/mm.h> #include <linux/sched.h> #include <linux/slab.h> +#include <linux/syscalls.h> #include <linux/uaccess.h> #include <asm/unistd.h> #include <os.h> @@ -369,7 +370,9 @@ void free_ldt(struct mm_context *mm) mm->arch.ldt.entry_count = 0; } -int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount) +SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr , + unsigned long , bytecount) { - return do_modify_ldt_skas(func, ptr, bytecount); + /* See non-um modify_ldt() for why we do this cast */ + return (unsigned int)do_modify_ldt_skas(func, ptr, bytecount); } diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c index 30434b8708f2..6b830d4cb4c8 100644 --- a/arch/x86/xen/apic.c +++ b/arch/x86/xen/apic.c @@ -31,7 +31,7 @@ static unsigned int xen_io_apic_read(unsigned apic, unsigned reg) return 0xfd; } -static unsigned long xen_set_apic_id(unsigned int x) +static u32 xen_set_apic_id(unsigned int x) { WARN_ON(1); return x; @@ -161,12 +161,10 @@ static struct apic xen_pv_apic = { /* .irq_delivery_mode - used in native_compose_msi_msg only */ /* .irq_dest_mode - used in native_compose_msi_msg only */ - .target_cpus = default_target_cpus, .disable_esr = 0, /* .dest_logical - default_send_IPI_ use it but we use our own. */ .check_apicid_used = default_check_apicid_used, /* Used on 32-bit */ - .vector_allocation_domain = flat_vector_allocation_domain, .init_apic_ldr = xen_noop, /* setup_local_APIC calls it */ .ioapic_phys_id_map = default_ioapic_phys_id_map, /* Used on 32-bit */ @@ -179,7 +177,7 @@ static struct apic xen_pv_apic = { .get_apic_id = xen_get_apic_id, .set_apic_id = xen_set_apic_id, /* Can be NULL on 32-bit. */ - .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, + .calc_dest_apicid = apic_flat_calc_apicid, #ifdef CONFIG_SMP .send_IPI_mask = xen_send_IPI_mask, diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index de503c225ae1..826898701045 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -1,3 +1,4 @@ +#include <linux/acpi.h> #include <linux/cpu.h> #include <linux/kexec.h> #include <linux/memblock.h> @@ -188,8 +189,6 @@ static void __init xen_hvm_guest_init(void) xen_hvm_init_time_ops(); xen_hvm_init_mmu_ops(); - if (xen_pvh_domain()) - machine_ops.emergency_restart = xen_emergency_restart; #ifdef CONFIG_KEXEC_CORE machine_ops.shutdown = xen_hvm_shutdown; machine_ops.crash_shutdown = xen_hvm_crash_shutdown; @@ -226,12 +225,33 @@ static uint32_t __init xen_platform_hvm(void) return xen_cpuid_base(); } -const struct hypervisor_x86 x86_hyper_xen_hvm = { +static __init void xen_hvm_guest_late_init(void) +{ +#ifdef CONFIG_XEN_PVH + /* Test for PVH domain (PVH boot path taken overrides ACPI flags). */ + if (!xen_pvh && + (x86_platform.legacy.rtc || !x86_platform.legacy.no_vga)) + return; + + /* PVH detected. */ + xen_pvh = true; + + /* Make sure we don't fall back to (default) ACPI_IRQ_MODEL_PIC. */ + if (!nr_ioapics && acpi_irq_model == ACPI_IRQ_MODEL_PIC) + acpi_irq_model = ACPI_IRQ_MODEL_PLATFORM; + + machine_ops.emergency_restart = xen_emergency_restart; + pv_info.name = "Xen PVH"; +#endif +} + +const __initconst struct hypervisor_x86 x86_hyper_xen_hvm = { .name = "Xen HVM", .detect = xen_platform_hvm, - .init_platform = xen_hvm_guest_init, - .pin_vcpu = xen_pin_vcpu, - .x2apic_available = xen_x2apic_para_available, - .init_mem_mapping = xen_hvm_init_mem_mapping, + .type = X86_HYPER_XEN_HVM, + .init.init_platform = xen_hvm_guest_init, + .init.x2apic_available = xen_x2apic_para_available, + .init.init_mem_mapping = xen_hvm_init_mem_mapping, + .init.guest_late_init = xen_hvm_guest_late_init, + .runtime.pin_vcpu = xen_pin_vcpu, }; -EXPORT_SYMBOL(x86_hyper_xen_hvm); diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index d4396e27b1fb..5b2b3f3f6531 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -601,7 +601,7 @@ static struct trap_array_entry trap_array[] = { #ifdef CONFIG_X86_MCE { machine_check, xen_machine_check, true }, #endif - { nmi, xen_nmi, true }, + { nmi, xen_xennmi, true }, { overflow, xen_overflow, false }, #ifdef CONFIG_IA32_EMULATION { entry_INT80_compat, xen_entry_INT80_compat, false }, @@ -811,15 +811,14 @@ static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry, } } -static void xen_load_sp0(struct tss_struct *tss, - struct thread_struct *thread) +static void xen_load_sp0(unsigned long sp0) { struct multicall_space mcs; mcs = xen_mc_entry(0); - MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0); + MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0); xen_mc_issue(PARAVIRT_LAZY_CPU); - tss->x86_tss.sp0 = thread->sp0; + this_cpu_write(cpu_tss.x86_tss.sp0, sp0); } void xen_set_iopl_mask(unsigned mask) @@ -1231,6 +1230,7 @@ asmlinkage __visible void __init xen_start_kernel(void) x86_platform.get_nmi_reason = xen_get_nmi_reason; x86_init.resources.memory_setup = xen_memory_setup; + x86_init.irqs.intr_mode_init = x86_init_noop; x86_init.oem.arch_setup = xen_arch_setup; x86_init.oem.banner = xen_banner; @@ -1460,9 +1460,9 @@ static uint32_t __init xen_platform_pv(void) return 0; } -const struct hypervisor_x86 x86_hyper_xen_pv = { +const __initconst struct hypervisor_x86 x86_hyper_xen_pv = { .name = "Xen PV", .detect = xen_platform_pv, - .pin_vcpu = xen_pin_vcpu, + .type = X86_HYPER_XEN_PV, + .runtime.pin_vcpu = xen_pin_vcpu, }; -EXPORT_SYMBOL(x86_hyper_xen_pv); diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c index 7bd3ee08393e..436c4f003e17 100644 --- a/arch/x86/xen/enlighten_pvh.c +++ b/arch/x86/xen/enlighten_pvh.c @@ -25,13 +25,6 @@ struct boot_params pvh_bootparams __attribute__((section(".data"))); struct hvm_start_info pvh_start_info; unsigned int pvh_start_info_sz = sizeof(pvh_start_info); -static void xen_pvh_arch_setup(void) -{ - /* Make sure we don't fall back to (default) ACPI_IRQ_MODEL_PIC. */ - if (nr_ioapics == 0) - acpi_irq_model = ACPI_IRQ_MODEL_PLATFORM; -} - static void __init init_pvh_bootparams(void) { struct xen_memory_map memmap; @@ -102,6 +95,4 @@ void __init xen_prepare_pvh(void) wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32)); init_pvh_bootparams(); - - x86_init.oem.arch_setup = xen_pvh_arch_setup; } diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 71495f1a86d7..2ccdaba31a07 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -449,7 +449,7 @@ __visible pmd_t xen_make_pmd(pmdval_t pmd) } PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd); -#if CONFIG_PGTABLE_LEVELS == 4 +#ifdef CONFIG_X86_64 __visible pudval_t xen_pud_val(pud_t pud) { return pte_mfn_to_pfn(pud.pud); @@ -538,7 +538,7 @@ static void xen_set_p4d(p4d_t *ptr, p4d_t val) xen_mc_issue(PARAVIRT_LAZY_MMU); } -#endif /* CONFIG_PGTABLE_LEVELS == 4 */ +#endif /* CONFIG_X86_64 */ static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd, int (*func)(struct mm_struct *mm, struct page *, enum pt_level), @@ -580,21 +580,17 @@ static int xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d, int (*func)(struct mm_struct *mm, struct page *, enum pt_level), bool last, unsigned long limit) { - int i, nr, flush = 0; + int flush = 0; + pud_t *pud; - nr = last ? p4d_index(limit) + 1 : PTRS_PER_P4D; - for (i = 0; i < nr; i++) { - pud_t *pud; - if (p4d_none(p4d[i])) - continue; + if (p4d_none(*p4d)) + return flush; - pud = pud_offset(&p4d[i], 0); - if (PTRS_PER_PUD > 1) - flush |= (*func)(mm, virt_to_page(pud), PT_PUD); - flush |= xen_pud_walk(mm, pud, func, - last && i == nr - 1, limit); - } + pud = pud_offset(p4d, 0); + if (PTRS_PER_PUD > 1) + flush |= (*func)(mm, virt_to_page(pud), PT_PUD); + flush |= xen_pud_walk(mm, pud, func, last, limit); return flush; } @@ -644,8 +640,6 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd, continue; p4d = p4d_offset(&pgd[i], 0); - if (PTRS_PER_P4D > 1) - flush |= (*func)(mm, virt_to_page(p4d), PT_P4D); flush |= xen_p4d_walk(mm, p4d, func, i == nr - 1, limit); } @@ -1176,22 +1170,14 @@ static void __init xen_cleanmfnmap(unsigned long vaddr) { pgd_t *pgd; p4d_t *p4d; - unsigned int i; bool unpin; unpin = (vaddr == 2 * PGDIR_SIZE); vaddr &= PMD_MASK; pgd = pgd_offset_k(vaddr); p4d = p4d_offset(pgd, 0); - for (i = 0; i < PTRS_PER_P4D; i++) { - if (p4d_none(p4d[i])) - continue; - xen_cleanmfnmap_p4d(p4d + i, unpin); - } - if (IS_ENABLED(CONFIG_X86_5LEVEL)) { - set_pgd(pgd, __pgd(0)); - xen_cleanmfnmap_free_pgtbl(p4d, unpin); - } + if (!p4d_none(*p4d)) + xen_cleanmfnmap_p4d(p4d, unpin); } static void __init xen_pagetable_p2m_free(void) @@ -1692,7 +1678,7 @@ static void xen_release_pmd(unsigned long pfn) xen_release_ptpage(pfn, PT_PMD); } -#if CONFIG_PGTABLE_LEVELS >= 4 +#ifdef CONFIG_X86_64 static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn) { xen_alloc_ptpage(mm, pfn, PT_PUD); @@ -2029,13 +2015,12 @@ static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr) */ void __init xen_relocate_p2m(void) { - phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys, p4d_phys; + phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys; unsigned long p2m_pfn, p2m_pfn_end, n_frames, pfn, pfn_end; - int n_pte, n_pt, n_pmd, n_pud, n_p4d, idx_pte, idx_pt, idx_pmd, idx_pud, idx_p4d; + int n_pte, n_pt, n_pmd, n_pud, idx_pte, idx_pt, idx_pmd, idx_pud; pte_t *pt; pmd_t *pmd; pud_t *pud; - p4d_t *p4d = NULL; pgd_t *pgd; unsigned long *new_p2m; int save_pud; @@ -2045,11 +2030,7 @@ void __init xen_relocate_p2m(void) n_pt = roundup(size, PMD_SIZE) >> PMD_SHIFT; n_pmd = roundup(size, PUD_SIZE) >> PUD_SHIFT; n_pud = roundup(size, P4D_SIZE) >> P4D_SHIFT; - if (PTRS_PER_P4D > 1) - n_p4d = roundup(size, PGDIR_SIZE) >> PGDIR_SHIFT; - else - n_p4d = 0; - n_frames = n_pte + n_pt + n_pmd + n_pud + n_p4d; + n_frames = n_pte + n_pt + n_pmd + n_pud; new_area = xen_find_free_area(PFN_PHYS(n_frames)); if (!new_area) { @@ -2065,76 +2046,56 @@ void __init xen_relocate_p2m(void) * To avoid any possible virtual address collision, just use * 2 * PUD_SIZE for the new area. */ - p4d_phys = new_area; - pud_phys = p4d_phys + PFN_PHYS(n_p4d); + pud_phys = new_area; pmd_phys = pud_phys + PFN_PHYS(n_pud); pt_phys = pmd_phys + PFN_PHYS(n_pmd); p2m_pfn = PFN_DOWN(pt_phys) + n_pt; pgd = __va(read_cr3_pa()); new_p2m = (unsigned long *)(2 * PGDIR_SIZE); - idx_p4d = 0; save_pud = n_pud; - do { - if (n_p4d > 0) { - p4d = early_memremap(p4d_phys, PAGE_SIZE); - clear_page(p4d); - n_pud = min(save_pud, PTRS_PER_P4D); - } - for (idx_pud = 0; idx_pud < n_pud; idx_pud++) { - pud = early_memremap(pud_phys, PAGE_SIZE); - clear_page(pud); - for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD); - idx_pmd++) { - pmd = early_memremap(pmd_phys, PAGE_SIZE); - clear_page(pmd); - for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD); - idx_pt++) { - pt = early_memremap(pt_phys, PAGE_SIZE); - clear_page(pt); - for (idx_pte = 0; - idx_pte < min(n_pte, PTRS_PER_PTE); - idx_pte++) { - set_pte(pt + idx_pte, - pfn_pte(p2m_pfn, PAGE_KERNEL)); - p2m_pfn++; - } - n_pte -= PTRS_PER_PTE; - early_memunmap(pt, PAGE_SIZE); - make_lowmem_page_readonly(__va(pt_phys)); - pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, - PFN_DOWN(pt_phys)); - set_pmd(pmd + idx_pt, - __pmd(_PAGE_TABLE | pt_phys)); - pt_phys += PAGE_SIZE; + for (idx_pud = 0; idx_pud < n_pud; idx_pud++) { + pud = early_memremap(pud_phys, PAGE_SIZE); + clear_page(pud); + for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD); + idx_pmd++) { + pmd = early_memremap(pmd_phys, PAGE_SIZE); + clear_page(pmd); + for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD); + idx_pt++) { + pt = early_memremap(pt_phys, PAGE_SIZE); + clear_page(pt); + for (idx_pte = 0; + idx_pte < min(n_pte, PTRS_PER_PTE); + idx_pte++) { + set_pte(pt + idx_pte, + pfn_pte(p2m_pfn, PAGE_KERNEL)); + p2m_pfn++; } - n_pt -= PTRS_PER_PMD; - early_memunmap(pmd, PAGE_SIZE); - make_lowmem_page_readonly(__va(pmd_phys)); - pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE, - PFN_DOWN(pmd_phys)); - set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys)); - pmd_phys += PAGE_SIZE; + n_pte -= PTRS_PER_PTE; + early_memunmap(pt, PAGE_SIZE); + make_lowmem_page_readonly(__va(pt_phys)); + pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, + PFN_DOWN(pt_phys)); + set_pmd(pmd + idx_pt, + __pmd(_PAGE_TABLE | pt_phys)); + pt_phys += PAGE_SIZE; } - n_pmd -= PTRS_PER_PUD; - early_memunmap(pud, PAGE_SIZE); - make_lowmem_page_readonly(__va(pud_phys)); - pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys)); - if (n_p4d > 0) - set_p4d(p4d + idx_pud, __p4d(_PAGE_TABLE | pud_phys)); - else - set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys)); - pud_phys += PAGE_SIZE; - } - if (n_p4d > 0) { - save_pud -= PTRS_PER_P4D; - early_memunmap(p4d, PAGE_SIZE); - make_lowmem_page_readonly(__va(p4d_phys)); - pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, PFN_DOWN(p4d_phys)); - set_pgd(pgd + 2 + idx_p4d, __pgd(_PAGE_TABLE | p4d_phys)); - p4d_phys += PAGE_SIZE; + n_pt -= PTRS_PER_PMD; + early_memunmap(pmd, PAGE_SIZE); + make_lowmem_page_readonly(__va(pmd_phys)); + pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE, + PFN_DOWN(pmd_phys)); + set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys)); + pmd_phys += PAGE_SIZE; } - } while (++idx_p4d < n_p4d); + n_pmd -= PTRS_PER_PUD; + early_memunmap(pud, PAGE_SIZE); + make_lowmem_page_readonly(__va(pud_phys)); + pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys)); + set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys)); + pud_phys += PAGE_SIZE; + } /* Now copy the old p2m info to the new area. */ memcpy(new_p2m, xen_p2m_addr, size); @@ -2361,7 +2322,7 @@ static void __init xen_post_allocator_init(void) pv_mmu_ops.set_pte = xen_set_pte; pv_mmu_ops.set_pmd = xen_set_pmd; pv_mmu_ops.set_pud = xen_set_pud; -#if CONFIG_PGTABLE_LEVELS >= 4 +#ifdef CONFIG_X86_64 pv_mmu_ops.set_p4d = xen_set_p4d; #endif @@ -2371,7 +2332,7 @@ static void __init xen_post_allocator_init(void) pv_mmu_ops.alloc_pmd = xen_alloc_pmd; pv_mmu_ops.release_pte = xen_release_pte; pv_mmu_ops.release_pmd = xen_release_pmd; -#if CONFIG_PGTABLE_LEVELS >= 4 +#ifdef CONFIG_X86_64 pv_mmu_ops.alloc_pud = xen_alloc_pud; pv_mmu_ops.release_pud = xen_release_pud; #endif @@ -2435,14 +2396,14 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { .make_pmd = PV_CALLEE_SAVE(xen_make_pmd), .pmd_val = PV_CALLEE_SAVE(xen_pmd_val), -#if CONFIG_PGTABLE_LEVELS >= 4 +#ifdef CONFIG_X86_64 .pud_val = PV_CALLEE_SAVE(xen_pud_val), .make_pud = PV_CALLEE_SAVE(xen_make_pud), .set_p4d = xen_set_p4d_hyper, .alloc_pud = xen_alloc_pmd_init, .release_pud = xen_release_pmd_init, -#endif /* CONFIG_PGTABLE_LEVELS == 4 */ +#endif /* CONFIG_X86_64 */ .activate_mm = xen_activate_mm, .dup_mmap = xen_dup_mmap, diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 6083ba462f35..13b4f19b9131 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -547,7 +547,7 @@ int xen_alloc_p2m_entry(unsigned long pfn) if (p2m_top_mfn && pfn < MAX_P2M_PFN) { topidx = p2m_top_index(pfn); top_mfn_p = &p2m_top_mfn[topidx]; - mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]); + mid_mfn = READ_ONCE(p2m_top_mfn_p[topidx]); BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 05f91ce9b55e..c0c756c76afe 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -14,6 +14,7 @@ * single-threaded. */ #include <linux/sched.h> +#include <linux/sched/task_stack.h> #include <linux/err.h> #include <linux/slab.h> #include <linux/smp.h> @@ -294,12 +295,19 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) #endif memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); + /* + * Bring up the CPU in cpu_bringup_and_idle() with the stack + * pointing just below where pt_regs would be if it were a normal + * kernel entry. + */ ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; ctxt->flags = VGCF_IN_KERNEL; ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ ctxt->user_regs.ds = __USER_DS; ctxt->user_regs.es = __USER_DS; ctxt->user_regs.ss = __KERNEL_DS; + ctxt->user_regs.cs = __KERNEL_CS; + ctxt->user_regs.esp = (unsigned long)task_pt_regs(idle); xen_copy_trap_info(ctxt->trap_ctxt); @@ -314,8 +322,13 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) ctxt->gdt_frames[0] = gdt_mfn; ctxt->gdt_ents = GDT_ENTRIES; + /* + * Set SS:SP that Xen will use when entering guest kernel mode + * from guest user mode. Subsequent calls to load_sp0() can + * change this value. + */ ctxt->kernel_ss = __KERNEL_DS; - ctxt->kernel_sp = idle->thread.sp0; + ctxt->kernel_sp = task_top_of_stack(idle); #ifdef CONFIG_X86_32 ctxt->event_callback_cs = __KERNEL_CS; @@ -327,10 +340,8 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) (unsigned long)xen_hypervisor_callback; ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback; - ctxt->user_regs.cs = __KERNEL_CS; per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); - ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir)); if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt)) BUG(); diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 08324c64005d..02f3445a2b5f 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -11,6 +11,7 @@ #include <linux/slab.h> #include <asm/paravirt.h> +#include <asm/qspinlock.h> #include <xen/interface/xen.h> #include <xen/events.h> @@ -81,8 +82,11 @@ void xen_init_lock_cpu(int cpu) int irq; char *name; - if (!xen_pvspin) + if (!xen_pvspin) { + if (cpu == 0) + static_branch_disable(&virt_spin_lock_key); return; + } WARN(per_cpu(lock_kicker_irq, cpu) >= 0, "spinlock on CPU%d exists on IRQ%d!\n", cpu, per_cpu(lock_kicker_irq, cpu)); diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index c98a48c861fd..8a10c9a9e2b5 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -30,7 +30,7 @@ xen_pv_trap debug xen_pv_trap xendebug xen_pv_trap int3 xen_pv_trap xenint3 -xen_pv_trap nmi +xen_pv_trap xennmi xen_pv_trap overflow xen_pv_trap bounds xen_pv_trap invalid_op diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index b5b8d7f43557..497cc55a0c16 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -10,6 +10,7 @@ #include <asm/boot.h> #include <asm/asm.h> #include <asm/page_types.h> +#include <asm/unwind_hints.h> #include <xen/interface/elfnote.h> #include <xen/interface/features.h> @@ -20,6 +21,7 @@ #ifdef CONFIG_XEN_PV __INIT ENTRY(startup_xen) + UNWIND_HINT_EMPTY cld /* Clear .bss */ @@ -34,21 +36,24 @@ ENTRY(startup_xen) mov $init_thread_union+THREAD_SIZE, %_ASM_SP jmp xen_start_kernel - +END(startup_xen) __FINIT #endif .pushsection .text .balign PAGE_SIZE ENTRY(hypercall_page) - .skip PAGE_SIZE + .rept (PAGE_SIZE / 32) + UNWIND_HINT_EMPTY + .skip 32 + .endr #define HYPERCALL(n) \ .equ xen_hypercall_##n, hypercall_page + __HYPERVISOR_##n * 32; \ .type xen_hypercall_##n, @function; .size xen_hypercall_##n, 32 #include <asm/xen-hypercalls.h> #undef HYPERCALL - +END(hypercall_page) .popsection ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux") diff --git a/arch/xtensa/include/asm/spinlock.h b/arch/xtensa/include/asm/spinlock.h index 3bb49681ee24..c6e1290dcbb7 100644 --- a/arch/xtensa/include/asm/spinlock.h +++ b/arch/xtensa/include/asm/spinlock.h @@ -33,8 +33,6 @@ #define arch_spin_is_locked(x) ((x)->slock != 0) -#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) - static inline void arch_spin_lock(arch_spinlock_t *lock) { unsigned long tmp; @@ -97,8 +95,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) * 0x80000000 one writer owns the rwlock, no other writers, no readers */ -#define arch_write_can_lock(x) ((x)->lock == 0) - static inline void arch_write_lock(arch_rwlock_t *rw) { unsigned long tmp; @@ -200,7 +196,4 @@ static inline void arch_read_unlock(arch_rwlock_t *rw) : "memory"); } -#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) -#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) - #endif /* _XTENSA_SPINLOCK_H */ diff --git a/arch/xtensa/platforms/iss/console.c b/arch/xtensa/platforms/iss/console.c index 0140a22551c8..464c2684c4f1 100644 --- a/arch/xtensa/platforms/iss/console.c +++ b/arch/xtensa/platforms/iss/console.c @@ -47,15 +47,14 @@ static char *serial_name = "ISS serial driver"; * initialization for the tty structure. */ -static void rs_poll(unsigned long); +static void rs_poll(struct timer_list *); static int rs_open(struct tty_struct *tty, struct file * filp) { tty->port = &serial_port; spin_lock_bh(&timer_lock); if (tty->count == 1) { - setup_timer(&serial_timer, rs_poll, - (unsigned long)&serial_port); + timer_setup(&serial_timer, rs_poll, 0); mod_timer(&serial_timer, jiffies + SERIAL_TIMER_VALUE); } spin_unlock_bh(&timer_lock); @@ -92,9 +91,9 @@ static int rs_write(struct tty_struct * tty, return count; } -static void rs_poll(unsigned long priv) +static void rs_poll(struct timer_list *unused) { - struct tty_port *port = (struct tty_port *)priv; + struct tty_port *port = &serial_port; int i = 0; int rd = 1; unsigned char c; diff --git a/arch/xtensa/platforms/iss/network.c b/arch/xtensa/platforms/iss/network.c index 66a5d15a9e0e..6363b18e5b8c 100644 --- a/arch/xtensa/platforms/iss/network.c +++ b/arch/xtensa/platforms/iss/network.c @@ -349,9 +349,9 @@ static int iss_net_poll(void) } -static void iss_net_timer(unsigned long priv) +static void iss_net_timer(struct timer_list *t) { - struct iss_net_private *lp = (struct iss_net_private *)priv; + struct iss_net_private *lp = from_timer(lp, t, timer); iss_net_poll(); spin_lock(&lp->lock); @@ -386,10 +386,8 @@ static int iss_net_open(struct net_device *dev) spin_unlock_bh(&opened_lock); spin_lock_bh(&lp->lock); - init_timer(&lp->timer); + timer_setup(&lp->timer, iss_net_timer, 0); lp->timer_val = ISS_NET_TIMER_VALUE; - lp->timer.data = (unsigned long) lp; - lp->timer.function = iss_net_timer; mod_timer(&lp->timer, jiffies + lp->timer_val); out: @@ -482,7 +480,7 @@ static int iss_net_change_mtu(struct net_device *dev, int new_mtu) return -EINVAL; } -void iss_net_user_timer_expire(unsigned long _conn) +void iss_net_user_timer_expire(struct timer_list *unused) { } @@ -582,8 +580,7 @@ static int iss_net_configure(int index, char *init) return 1; } - init_timer(&lp->tl); - lp->tl.function = iss_net_user_timer_expire; + timer_setup(&lp->tl, iss_net_user_timer_expire, 0); return 0; diff --git a/arch/xtensa/platforms/xtfpga/lcd.c b/arch/xtensa/platforms/xtfpga/lcd.c index 4dc0c1b43f4b..2f7eb66c23ec 100644 --- a/arch/xtensa/platforms/xtfpga/lcd.c +++ b/arch/xtensa/platforms/xtfpga/lcd.c @@ -34,23 +34,23 @@ static void lcd_put_byte(u8 *addr, u8 data) { #ifdef CONFIG_XTFPGA_LCD_8BIT_ACCESS - ACCESS_ONCE(*addr) = data; + WRITE_ONCE(*addr, data); #else - ACCESS_ONCE(*addr) = data & 0xf0; - ACCESS_ONCE(*addr) = (data << 4) & 0xf0; + WRITE_ONCE(*addr, data & 0xf0); + WRITE_ONCE(*addr, (data << 4) & 0xf0); #endif } static int __init lcd_init(void) { - ACCESS_ONCE(*LCD_INSTR_ADDR) = LCD_DISPLAY_MODE8BIT; + WRITE_ONCE(*LCD_INSTR_ADDR, LCD_DISPLAY_MODE8BIT); mdelay(5); - ACCESS_ONCE(*LCD_INSTR_ADDR) = LCD_DISPLAY_MODE8BIT; + WRITE_ONCE(*LCD_INSTR_ADDR, LCD_DISPLAY_MODE8BIT); udelay(200); - ACCESS_ONCE(*LCD_INSTR_ADDR) = LCD_DISPLAY_MODE8BIT; + WRITE_ONCE(*LCD_INSTR_ADDR, LCD_DISPLAY_MODE8BIT); udelay(50); #ifndef CONFIG_XTFPGA_LCD_8BIT_ACCESS - ACCESS_ONCE(*LCD_INSTR_ADDR) = LCD_DISPLAY_MODE4BIT; + WRITE_ONCE(*LCD_INSTR_ADDR, LCD_DISPLAY_MODE4BIT); udelay(50); lcd_put_byte(LCD_INSTR_ADDR, LCD_DISPLAY_MODE4BIT); udelay(50); |