diff options
Diffstat (limited to 'arch')
331 files changed, 7753 insertions, 4061 deletions
diff --git a/arch/alpha/include/asm/processor.h b/arch/alpha/include/asm/processor.h index 6cb7fe85c4b5..b4cf03690394 100644 --- a/arch/alpha/include/asm/processor.h +++ b/arch/alpha/include/asm/processor.h @@ -57,6 +57,7 @@ unsigned long get_wchan(struct task_struct *p); ((tsk) == current ? rdusp() : task_thread_info(tsk)->pcb.usp) #define cpu_relax() barrier() +#define cpu_relax_lowlatency() cpu_relax() #define ARCH_HAS_PREFETCH #define ARCH_HAS_PREFETCHW diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h index d99f9b37cd15..82588f3ba77f 100644 --- a/arch/arc/include/asm/processor.h +++ b/arch/arc/include/asm/processor.h @@ -62,6 +62,8 @@ unsigned long thread_saved_pc(struct task_struct *t); #define cpu_relax() do { } while (0) #endif +#define cpu_relax_lowlatency() cpu_relax() + #define copy_segments(tsk, mm) do { } while (0) #define release_segments(mm) do { } while (0) diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c index 63177e4cb66d..b9a5685a990e 100644 --- a/arch/arc/kernel/perf_event.c +++ b/arch/arc/kernel/perf_event.c @@ -99,10 +99,6 @@ static int arc_pmu_event_init(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; int ret; - /* ARC 700 PMU does not support sampling events */ - if (is_sampling_event(event)) - return -ENOENT; - switch (event->attr.type) { case PERF_TYPE_HARDWARE: if (event->attr.config >= PERF_COUNT_HW_MAX) @@ -298,6 +294,9 @@ static int arc_pmu_device_probe(struct platform_device *pdev) .read = arc_pmu_read, }; + /* ARC 700 PMU does not support sampling events */ + arc_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + ret = perf_pmu_register(&arc_pmu->pmu, pdev->name, PERF_TYPE_RAW); return ret; diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 245058b3b0ef..290f02ee0157 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -6,6 +6,7 @@ config ARM select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAVE_CUSTOM_GPIO_H select ARCH_MIGHT_HAVE_PC_PARPORT + select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF select ARCH_WANT_IPC_PARSE_VERSION @@ -312,7 +313,7 @@ config ARCH_MULTIPLATFORM config ARCH_INTEGRATOR bool "ARM Ltd. Integrator family" select ARM_AMBA - select ARM_PATCH_PHYS_VIRT + select ARM_PATCH_PHYS_VIRT if MMU select AUTO_ZRELADDR select COMMON_CLK select COMMON_CLK_VERSATILE @@ -658,7 +659,7 @@ config ARCH_MSM config ARCH_SHMOBILE_LEGACY bool "Renesas ARM SoCs (non-multiplatform)" select ARCH_SHMOBILE - select ARM_PATCH_PHYS_VIRT + select ARM_PATCH_PHYS_VIRT if MMU select CLKDEV_LOOKUP select GENERIC_CLOCKEVENTS select HAVE_ARM_SCU if SMP diff --git a/arch/arm/boot/dts/at91sam9n12.dtsi b/arch/arm/boot/dts/at91sam9n12.dtsi index 287795985e32..b84bac5bada4 100644 --- a/arch/arm/boot/dts/at91sam9n12.dtsi +++ b/arch/arm/boot/dts/at91sam9n12.dtsi @@ -925,7 +925,7 @@ compatible = "atmel,at91rm9200-ohci", "usb-ohci"; reg = <0x00500000 0x00100000>; interrupts = <22 IRQ_TYPE_LEVEL_HIGH 2>; - clocks = <&usb>, <&uhphs_clk>, <&udphs_clk>, + clocks = <&usb>, <&uhphs_clk>, <&uhphs_clk>, <&uhpck>; clock-names = "usb_clk", "ohci_clk", "hclk", "uhpck"; status = "disabled"; diff --git a/arch/arm/boot/dts/at91sam9x5.dtsi b/arch/arm/boot/dts/at91sam9x5.dtsi index 2ebc42140ea6..2c0d6ea3ab41 100644 --- a/arch/arm/boot/dts/at91sam9x5.dtsi +++ b/arch/arm/boot/dts/at91sam9x5.dtsi @@ -1124,6 +1124,7 @@ compatible = "atmel,at91sam9rl-pwm"; reg = <0xf8034000 0x300>; interrupts = <18 IRQ_TYPE_LEVEL_HIGH 4>; + clocks = <&pwm_clk>; #pwm-cells = <3>; status = "disabled"; }; @@ -1155,8 +1156,7 @@ compatible = "atmel,at91rm9200-ohci", "usb-ohci"; reg = <0x00600000 0x100000>; interrupts = <22 IRQ_TYPE_LEVEL_HIGH 2>; - clocks = <&usb>, <&uhphs_clk>, <&udphs_clk>, - <&uhpck>; + clocks = <&usb>, <&uhphs_clk>, <&uhphs_clk>, <&uhpck>; clock-names = "usb_clk", "ohci_clk", "hclk", "uhpck"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/exynos4210.dtsi b/arch/arm/boot/dts/exynos4210.dtsi index ee3001f38821..97ea7a9b1f62 100644 --- a/arch/arm/boot/dts/exynos4210.dtsi +++ b/arch/arm/boot/dts/exynos4210.dtsi @@ -31,6 +31,16 @@ pinctrl2 = &pinctrl_2; }; + pmu_system_controller: system-controller@10020000 { + clock-names = "clkout0", "clkout1", "clkout2", "clkout3", + "clkout4", "clkout8", "clkout9"; + clocks = <&clock CLK_OUT_DMC>, <&clock CLK_OUT_TOP>, + <&clock CLK_OUT_LEFTBUS>, <&clock CLK_OUT_RIGHTBUS>, + <&clock CLK_OUT_CPU>, <&clock CLK_XXTI>, + <&clock CLK_XUSBXTI>; + #clock-cells = <1>; + }; + sysram@02020000 { compatible = "mmio-sram"; reg = <0x02020000 0x20000>; diff --git a/arch/arm/boot/dts/exynos4x12.dtsi b/arch/arm/boot/dts/exynos4x12.dtsi index c5a943df1cd7..de1f9c77b589 100644 --- a/arch/arm/boot/dts/exynos4x12.dtsi +++ b/arch/arm/boot/dts/exynos4x12.dtsi @@ -139,6 +139,13 @@ pmu_system_controller: system-controller@10020000 { compatible = "samsung,exynos4212-pmu", "syscon"; + clock-names = "clkout0", "clkout1", "clkout2", "clkout3", + "clkout4", "clkout8", "clkout9"; + clocks = <&clock CLK_OUT_DMC>, <&clock CLK_OUT_TOP>, + <&clock CLK_OUT_LEFTBUS>, <&clock CLK_OUT_RIGHTBUS>, + <&clock CLK_OUT_CPU>, <&clock CLK_XXTI>, + <&clock CLK_XUSBXTI>; + #clock-cells = <1>; }; g2d@10800000 { diff --git a/arch/arm/boot/dts/exynos5250.dtsi b/arch/arm/boot/dts/exynos5250.dtsi index 834fb5a5306f..492e1eff37bd 100644 --- a/arch/arm/boot/dts/exynos5250.dtsi +++ b/arch/arm/boot/dts/exynos5250.dtsi @@ -191,6 +191,9 @@ pmu_system_controller: system-controller@10040000 { compatible = "samsung,exynos5250-pmu", "syscon"; reg = <0x10040000 0x5000>; + clock-names = "clkout16"; + clocks = <&clock CLK_FIN_PLL>; + #clock-cells = <1>; }; sysreg_system_controller: syscon@10050000 { diff --git a/arch/arm/boot/dts/exynos5420.dtsi b/arch/arm/boot/dts/exynos5420.dtsi index 15957227ffda..a40a5c2b5a4f 100644 --- a/arch/arm/boot/dts/exynos5420.dtsi +++ b/arch/arm/boot/dts/exynos5420.dtsi @@ -727,6 +727,9 @@ pmu_system_controller: system-controller@10040000 { compatible = "samsung,exynos5420-pmu", "syscon"; reg = <0x10040000 0x5000>; + clock-names = "clkout16"; + clocks = <&clock CLK_FIN_PLL>; + #clock-cells = <1>; }; sysreg_system_controller: syscon@10050000 { diff --git a/arch/arm/boot/dts/hi3620.dtsi b/arch/arm/boot/dts/hi3620.dtsi index ab1116d086be..83a5b8685bd9 100644 --- a/arch/arm/boot/dts/hi3620.dtsi +++ b/arch/arm/boot/dts/hi3620.dtsi @@ -73,7 +73,7 @@ L2: l2-cache { compatible = "arm,pl310-cache"; - reg = <0xfc10000 0x100000>; + reg = <0x100000 0x100000>; interrupts = <0 15 4>; cache-unified; cache-level = <2>; diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts index 1fe45d1f75ec..b15f1a77d684 100644 --- a/arch/arm/boot/dts/omap3-n900.dts +++ b/arch/arm/boot/dts/omap3-n900.dts @@ -353,7 +353,7 @@ }; twl_power: power { - compatible = "ti,twl4030-power-n900", "ti,twl4030-power-idle-osc-off"; + compatible = "ti,twl4030-power-n900"; ti,use_poweroff; }; }; diff --git a/arch/arm/boot/dts/r8a7791.dtsi b/arch/arm/boot/dts/r8a7791.dtsi index 8d7ffaeff6e0..79f68acfd5d4 100644 --- a/arch/arm/boot/dts/r8a7791.dtsi +++ b/arch/arm/boot/dts/r8a7791.dtsi @@ -540,9 +540,9 @@ #clock-cells = <0>; clock-output-names = "sd1"; }; - sd2_clk: sd3_clk@e615007c { + sd2_clk: sd3_clk@e615026c { compatible = "renesas,r8a7791-div6-clock", "renesas,cpg-div6-clock"; - reg = <0 0xe615007c 0 4>; + reg = <0 0xe615026c 0 4>; clocks = <&pll1_div2_clk>; #clock-cells = <0>; clock-output-names = "sd2"; diff --git a/arch/arm/boot/dts/ste-nomadik-s8815.dts b/arch/arm/boot/dts/ste-nomadik-s8815.dts index f557feb997f4..90d8b6c7a205 100644 --- a/arch/arm/boot/dts/ste-nomadik-s8815.dts +++ b/arch/arm/boot/dts/ste-nomadik-s8815.dts @@ -4,7 +4,7 @@ */ /dts-v1/; -/include/ "ste-nomadik-stn8815.dtsi" +#include "ste-nomadik-stn8815.dtsi" / { model = "Calao Systems USB-S8815"; diff --git a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi index d316c955bd5f..dbcf521b017f 100644 --- a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi +++ b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi @@ -1,7 +1,9 @@ /* * Device Tree for the ST-Ericsson Nomadik 8815 STn8815 SoC */ -/include/ "skeleton.dtsi" + +#include <dt-bindings/gpio/gpio.h> +#include "skeleton.dtsi" / { #address-cells = <1>; @@ -842,8 +844,7 @@ bus-width = <4>; cap-mmc-highspeed; cap-sd-highspeed; - cd-gpios = <&gpio3 15 0x1>; - cd-inverted; + cd-gpios = <&gpio3 15 GPIO_ACTIVE_LOW>; pinctrl-names = "default"; pinctrl-0 = <&mmcsd_default_mux>, <&mmcsd_default_mode>; vmmc-supply = <&vmmc_regulator>; diff --git a/arch/arm/crypto/aesbs-glue.c b/arch/arm/crypto/aesbs-glue.c index 4522366da759..15468fbbdea3 100644 --- a/arch/arm/crypto/aesbs-glue.c +++ b/arch/arm/crypto/aesbs-glue.c @@ -137,7 +137,7 @@ static int aesbs_cbc_encrypt(struct blkcipher_desc *desc, dst += AES_BLOCK_SIZE; } while (--blocks); } - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } return err; } @@ -158,7 +158,7 @@ static int aesbs_cbc_decrypt(struct blkcipher_desc *desc, bsaes_cbc_encrypt(walk.src.virt.addr, walk.dst.virt.addr, walk.nbytes, &ctx->dec, walk.iv); kernel_neon_end(); - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } while (walk.nbytes) { u32 blocks = walk.nbytes / AES_BLOCK_SIZE; @@ -182,7 +182,7 @@ static int aesbs_cbc_decrypt(struct blkcipher_desc *desc, dst += AES_BLOCK_SIZE; src += AES_BLOCK_SIZE; } while (--blocks); - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } return err; } @@ -268,7 +268,7 @@ static int aesbs_xts_encrypt(struct blkcipher_desc *desc, bsaes_xts_encrypt(walk.src.virt.addr, walk.dst.virt.addr, walk.nbytes, &ctx->enc, walk.iv); kernel_neon_end(); - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } return err; } @@ -292,7 +292,7 @@ static int aesbs_xts_decrypt(struct blkcipher_desc *desc, bsaes_xts_decrypt(walk.src.virt.addr, walk.dst.virt.addr, walk.nbytes, &ctx->dec, walk.iv); kernel_neon_end(); - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } return err; } diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h index 060a75e99263..0406cb3f1af7 100644 --- a/arch/arm/include/asm/mach/arch.h +++ b/arch/arm/include/asm/mach/arch.h @@ -50,6 +50,7 @@ struct machine_desc { struct smp_operations *smp; /* SMP operations */ bool (*smp_init)(void); void (*fixup)(struct tag *, char **); + void (*dt_fixup)(void); void (*init_meminfo)(void); void (*reserve)(void);/* reserve mem blocks */ void (*map_io)(void);/* IO mapping function */ diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h index c3d5fc124a05..8a1e8e995dae 100644 --- a/arch/arm/include/asm/processor.h +++ b/arch/arm/include/asm/processor.h @@ -82,6 +82,8 @@ unsigned long get_wchan(struct task_struct *p); #define cpu_relax() barrier() #endif +#define cpu_relax_lowlatency() cpu_relax() + #define task_pt_regs(p) \ ((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1) diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c index e94a157ddff1..11c54de9f8cf 100644 --- a/arch/arm/kernel/devtree.c +++ b/arch/arm/kernel/devtree.c @@ -212,7 +212,7 @@ const struct machine_desc * __init setup_machine_fdt(unsigned int dt_phys) mdesc_best = &__mach_desc_GENERIC_DT; #endif - if (!dt_phys || !early_init_dt_scan(phys_to_virt(dt_phys))) + if (!dt_phys || !early_init_dt_verify(phys_to_virt(dt_phys))) return NULL; mdesc = of_flat_dt_match_machine(mdesc_best, arch_get_next_mach); @@ -237,6 +237,12 @@ const struct machine_desc * __init setup_machine_fdt(unsigned int dt_phys) dump_machine_table(); /* does not return */ } + /* We really don't want to do this, but sometimes firmware provides buggy data */ + if (mdesc->dt_fixup) + mdesc->dt_fixup(); + + early_init_dt_scan_nodes(); + /* Change machine number to match the mdesc we're using */ __machine_arch_type = mdesc->nr; diff --git a/arch/arm/kernel/iwmmxt.S b/arch/arm/kernel/iwmmxt.S index a5599cfc43cb..2b32978ae905 100644 --- a/arch/arm/kernel/iwmmxt.S +++ b/arch/arm/kernel/iwmmxt.S @@ -94,13 +94,19 @@ ENTRY(iwmmxt_task_enable) mrc p15, 0, r2, c2, c0, 0 mov r2, r2 @ cpwait + bl concan_save - teq r1, #0 @ test for last ownership - mov lr, r9 @ normal exit from exception - beq concan_load @ no owner, skip save +#ifdef CONFIG_PREEMPT_COUNT + get_thread_info r10 +#endif +4: dec_preempt_count r10, r3 + mov pc, r9 @ normal exit from exception concan_save: + teq r1, #0 @ test for last ownership + beq concan_load @ no owner, skip save + tmrc r2, wCon @ CUP? wCx @@ -138,7 +144,7 @@ concan_dump: wstrd wR15, [r1, #MMX_WR15] 2: teq r0, #0 @ anything to load? - beq 3f + moveq pc, lr @ if not, return concan_load: @@ -171,14 +177,9 @@ concan_load: @ clear CUP/MUP (only if r1 != 0) teq r1, #0 mov r2, #0 - beq 3f - tmcr wCon, r2 + moveq pc, lr -3: -#ifdef CONFIG_PREEMPT_COUNT - get_thread_info r10 -#endif -4: dec_preempt_count r10, r3 + tmcr wCon, r2 mov pc, lr /* diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index 778c2f7024ff..a74b53c1b7df 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c @@ -160,12 +160,16 @@ static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int instr) static struct undef_hook kgdb_brkpt_hook = { .instr_mask = 0xffffffff, .instr_val = KGDB_BREAKINST, + .cpsr_mask = MODE_MASK, + .cpsr_val = SVC_MODE, .fn = kgdb_brk_fn }; static struct undef_hook kgdb_compiled_brkpt_hook = { .instr_mask = 0xffffffff, .instr_val = KGDB_COMPILED_BREAK, + .cpsr_mask = MODE_MASK, + .cpsr_val = SVC_MODE, .fn = kgdb_compiled_brk_fn }; diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c index 9d853189028b..e35d880f9773 100644 --- a/arch/arm/kernel/topology.c +++ b/arch/arm/kernel/topology.c @@ -275,7 +275,7 @@ void store_cpu_topology(unsigned int cpuid) cpu_topology[cpuid].socket_id, mpidr); } -static inline const int cpu_corepower_flags(void) +static inline int cpu_corepower_flags(void) { return SD_SHARE_PKG_RESOURCES | SD_SHARE_POWERDOMAIN; } diff --git a/arch/arm/mach-exynos/exynos.c b/arch/arm/mach-exynos/exynos.c index 46d893fcbe85..66c9b9614f3c 100644 --- a/arch/arm/mach-exynos/exynos.c +++ b/arch/arm/mach-exynos/exynos.c @@ -335,6 +335,15 @@ static void __init exynos_reserve(void) #endif } +static void __init exynos_dt_fixup(void) +{ + /* + * Some versions of uboot pass garbage entries in the memory node, + * use the old CONFIG_ARM_NR_BANKS + */ + of_fdt_limit_memory(8); +} + DT_MACHINE_START(EXYNOS_DT, "SAMSUNG EXYNOS (Flattened Device Tree)") /* Maintainer: Thomas Abraham <thomas.abraham@linaro.org> */ /* Maintainer: Kukjin Kim <kgene.kim@samsung.com> */ @@ -348,4 +357,5 @@ DT_MACHINE_START(EXYNOS_DT, "SAMSUNG EXYNOS (Flattened Device Tree)") .dt_compat = exynos_dt_compat, .restart = exynos_restart, .reserve = exynos_reserve, + .dt_fixup = exynos_dt_fixup, MACHINE_END diff --git a/arch/arm/mach-exynos/hotplug.c b/arch/arm/mach-exynos/hotplug.c index 8a134d019cb3..920a4baa53cd 100644 --- a/arch/arm/mach-exynos/hotplug.c +++ b/arch/arm/mach-exynos/hotplug.c @@ -40,15 +40,17 @@ static inline void cpu_leave_lowpower(void) static inline void platform_do_lowpower(unsigned int cpu, int *spurious) { + u32 mpidr = cpu_logical_map(cpu); + u32 core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); + for (;;) { - /* make cpu1 to be turned off at next WFI command */ - if (cpu == 1) - exynos_cpu_power_down(cpu); + /* Turn the CPU off on next WFI instruction. */ + exynos_cpu_power_down(core_id); wfi(); - if (pen_release == cpu_logical_map(cpu)) { + if (pen_release == core_id) { /* * OK, proper wakeup, we're done */ diff --git a/arch/arm/mach-exynos/platsmp.c b/arch/arm/mach-exynos/platsmp.c index 1c8d31e39520..50b9aad5e27b 100644 --- a/arch/arm/mach-exynos/platsmp.c +++ b/arch/arm/mach-exynos/platsmp.c @@ -90,7 +90,8 @@ static void exynos_secondary_init(unsigned int cpu) static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) { unsigned long timeout; - unsigned long phys_cpu = cpu_logical_map(cpu); + u32 mpidr = cpu_logical_map(cpu); + u32 core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); int ret = -ENOSYS; /* @@ -104,17 +105,18 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) * the holding pen - release it, then wait for it to flag * that it has been released by resetting pen_release. * - * Note that "pen_release" is the hardware CPU ID, whereas + * Note that "pen_release" is the hardware CPU core ID, whereas * "cpu" is Linux's internal ID. */ - write_pen_release(phys_cpu); + write_pen_release(core_id); - if (!exynos_cpu_power_state(cpu)) { - exynos_cpu_power_up(cpu); + if (!exynos_cpu_power_state(core_id)) { + exynos_cpu_power_up(core_id); timeout = 10; /* wait max 10 ms until cpu1 is on */ - while (exynos_cpu_power_state(cpu) != S5P_CORE_LOCAL_PWR_EN) { + while (exynos_cpu_power_state(core_id) + != S5P_CORE_LOCAL_PWR_EN) { if (timeout-- == 0) break; @@ -145,20 +147,20 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) * Try to set boot address using firmware first * and fall back to boot register if it fails. */ - ret = call_firmware_op(set_cpu_boot_addr, phys_cpu, boot_addr); + ret = call_firmware_op(set_cpu_boot_addr, core_id, boot_addr); if (ret && ret != -ENOSYS) goto fail; if (ret == -ENOSYS) { - void __iomem *boot_reg = cpu_boot_reg(phys_cpu); + void __iomem *boot_reg = cpu_boot_reg(core_id); if (IS_ERR(boot_reg)) { ret = PTR_ERR(boot_reg); goto fail; } - __raw_writel(boot_addr, cpu_boot_reg(phys_cpu)); + __raw_writel(boot_addr, cpu_boot_reg(core_id)); } - call_firmware_op(cpu_boot, phys_cpu); + call_firmware_op(cpu_boot, core_id); arch_send_wakeup_ipi_mask(cpumask_of(cpu)); @@ -227,22 +229,24 @@ static void __init exynos_smp_prepare_cpus(unsigned int max_cpus) * boot register if it fails. */ for (i = 1; i < max_cpus; ++i) { - unsigned long phys_cpu; unsigned long boot_addr; + u32 mpidr; + u32 core_id; int ret; - phys_cpu = cpu_logical_map(i); + mpidr = cpu_logical_map(i); + core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); boot_addr = virt_to_phys(exynos4_secondary_startup); - ret = call_firmware_op(set_cpu_boot_addr, phys_cpu, boot_addr); + ret = call_firmware_op(set_cpu_boot_addr, core_id, boot_addr); if (ret && ret != -ENOSYS) break; if (ret == -ENOSYS) { - void __iomem *boot_reg = cpu_boot_reg(phys_cpu); + void __iomem *boot_reg = cpu_boot_reg(core_id); if (IS_ERR(boot_reg)) break; - __raw_writel(boot_addr, cpu_boot_reg(phys_cpu)); + __raw_writel(boot_addr, cpu_boot_reg(core_id)); } } } diff --git a/arch/arm/mach-imx/clk-imx6q.c b/arch/arm/mach-imx/clk-imx6q.c index 8e795dea02ec..8556c787e59c 100644 --- a/arch/arm/mach-imx/clk-imx6q.c +++ b/arch/arm/mach-imx/clk-imx6q.c @@ -70,7 +70,7 @@ static const char *cko_sels[] = { "cko1", "cko2", }; static const char *lvds_sels[] = { "dummy", "dummy", "dummy", "dummy", "dummy", "dummy", "pll4_audio", "pll5_video", "pll8_mlb", "enet_ref", - "pcie_ref", "sata_ref", + "pcie_ref_125m", "sata_ref_100m", }; enum mx6q_clks { @@ -491,7 +491,7 @@ static void __init imx6q_clocks_init(struct device_node *ccm_node) /* All existing boards with PCIe use LVDS1 */ if (IS_ENABLED(CONFIG_PCI_IMX6)) - clk_set_parent(clk[lvds1_sel], clk[sata_ref]); + clk_set_parent(clk[lvds1_sel], clk[sata_ref_100m]); /* Set initial power mode */ imx6q_set_lpm(WAIT_CLOCKED); diff --git a/arch/arm/mach-mvebu/coherency.c b/arch/arm/mach-mvebu/coherency.c index 477202fd39cc..2bdc3233abe2 100644 --- a/arch/arm/mach-mvebu/coherency.c +++ b/arch/arm/mach-mvebu/coherency.c @@ -292,6 +292,10 @@ static struct notifier_block mvebu_hwcc_nb = { .notifier_call = mvebu_hwcc_notifier, }; +static struct notifier_block mvebu_hwcc_pci_nb = { + .notifier_call = mvebu_hwcc_notifier, +}; + static void __init armada_370_coherency_init(struct device_node *np) { struct resource res; @@ -427,7 +431,7 @@ static int __init coherency_pci_init(void) { if (coherency_available()) bus_register_notifier(&pci_bus_type, - &mvebu_hwcc_nb); + &mvebu_hwcc_pci_nb); return 0; } diff --git a/arch/arm/mach-mvebu/headsmp-a9.S b/arch/arm/mach-mvebu/headsmp-a9.S index 5925366bc03c..da5bb292b91c 100644 --- a/arch/arm/mach-mvebu/headsmp-a9.S +++ b/arch/arm/mach-mvebu/headsmp-a9.S @@ -15,6 +15,8 @@ #include <linux/linkage.h> #include <linux/init.h> +#include <asm/assembler.h> + __CPUINIT #define CPU_RESUME_ADDR_REG 0xf10182d4 @@ -22,13 +24,18 @@ .global armada_375_smp_cpu1_enable_code_end armada_375_smp_cpu1_enable_code_start: - ldr r0, [pc, #4] +ARM_BE8(setend be) + adr r0, 1f + ldr r0, [r0] ldr r1, [r0] +ARM_BE8(rev r1, r1) mov pc, r1 +1: .word CPU_RESUME_ADDR_REG armada_375_smp_cpu1_enable_code_end: ENTRY(mvebu_cortex_a9_secondary_startup) +ARM_BE8(setend be) bl v7_invalidate_l1 b secondary_startup ENDPROC(mvebu_cortex_a9_secondary_startup) diff --git a/arch/arm/mach-mvebu/pmsu.c b/arch/arm/mach-mvebu/pmsu.c index a1d407c0febe..25aa8237d668 100644 --- a/arch/arm/mach-mvebu/pmsu.c +++ b/arch/arm/mach-mvebu/pmsu.c @@ -201,12 +201,12 @@ static noinline int do_armada_370_xp_cpu_suspend(unsigned long deepidle) /* Test the CR_C bit and set it if it was cleared */ asm volatile( - "mrc p15, 0, %0, c1, c0, 0 \n\t" - "tst %0, #(1 << 2) \n\t" - "orreq %0, %0, #(1 << 2) \n\t" - "mcreq p15, 0, %0, c1, c0, 0 \n\t" + "mrc p15, 0, r0, c1, c0, 0 \n\t" + "tst r0, #(1 << 2) \n\t" + "orreq r0, r0, #(1 << 2) \n\t" + "mcreq p15, 0, r0, c1, c0, 0 \n\t" "isb " - : : "r" (0)); + : : : "r0"); pr_warn("Failed to suspend the system\n"); diff --git a/arch/arm/mach-omap2/gpmc-nand.c b/arch/arm/mach-omap2/gpmc-nand.c index 17cd39360afe..93914d220069 100644 --- a/arch/arm/mach-omap2/gpmc-nand.c +++ b/arch/arm/mach-omap2/gpmc-nand.c @@ -50,6 +50,16 @@ static bool gpmc_hwecc_bch_capable(enum omap_ecc ecc_opt) soc_is_omap54xx() || soc_is_dra7xx()) return 1; + if (ecc_opt == OMAP_ECC_BCH4_CODE_HW_DETECTION_SW || + ecc_opt == OMAP_ECC_BCH8_CODE_HW_DETECTION_SW) { + if (cpu_is_omap24xx()) + return 0; + else if (cpu_is_omap3630() && (GET_OMAP_REVISION() == 0)) + return 0; + else + return 1; + } + /* OMAP3xxx do not have ELM engine, so cannot support ECC schemes * which require H/W based ECC error detection */ if ((cpu_is_omap34xx() || cpu_is_omap3630()) && @@ -57,14 +67,6 @@ static bool gpmc_hwecc_bch_capable(enum omap_ecc ecc_opt) (ecc_opt == OMAP_ECC_BCH8_CODE_HW))) return 0; - /* - * For now, assume 4-bit mode is only supported on OMAP3630 ES1.x, x>=1 - * and AM33xx derivates. Other chips may be added if confirmed to work. - */ - if ((ecc_opt == OMAP_ECC_BCH4_CODE_HW_DETECTION_SW) && - (!cpu_is_omap3630() || (GET_OMAP_REVISION() == 0))) - return 0; - /* legacy platforms support only HAM1 (1-bit Hamming) ECC scheme */ if (ecc_opt == OMAP_ECC_HAM1_CODE_HW) return 1; diff --git a/arch/arm/mach-omap2/omap4-common.c b/arch/arm/mach-omap2/omap4-common.c index 539e8106eb96..a0fe747634c1 100644 --- a/arch/arm/mach-omap2/omap4-common.c +++ b/arch/arm/mach-omap2/omap4-common.c @@ -168,6 +168,10 @@ static void omap4_l2c310_write_sec(unsigned long val, unsigned reg) smc_op = OMAP4_MON_L2X0_PREFETCH_INDEX; break; + case L310_POWER_CTRL: + pr_info_once("OMAP L2C310: ROM does not support power control setting\n"); + return; + default: WARN_ONCE(1, "OMAP L2C310: ignoring write to reg 0x%x\n", reg); return; diff --git a/arch/arm/mach-rockchip/Kconfig b/arch/arm/mach-rockchip/Kconfig index 1caee6d548b8..e4564c259ed1 100644 --- a/arch/arm/mach-rockchip/Kconfig +++ b/arch/arm/mach-rockchip/Kconfig @@ -2,6 +2,7 @@ config ARCH_ROCKCHIP bool "Rockchip RK2928 and RK3xxx SOCs" if ARCH_MULTI_V7 select PINCTRL select PINCTRL_ROCKCHIP + select ARCH_HAS_RESET_CONTROLLER select ARCH_REQUIRE_GPIOLIB select ARM_GIC select CACHE_L2X0 diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 4c88935654ca..1f88db06b133 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -461,12 +461,21 @@ void __init dma_contiguous_remap(void) map.type = MT_MEMORY_DMA_READY; /* - * Clear previous low-memory mapping + * Clear previous low-memory mapping to ensure that the + * TLB does not see any conflicting entries, then flush + * the TLB of the old entries before creating new mappings. + * + * This ensures that any speculatively loaded TLB entries + * (even though they may be rare) can not cause any problems, + * and ensures that this code is architecturally compliant. */ for (addr = __phys_to_virt(start); addr < __phys_to_virt(end); addr += PMD_SIZE) pmd_clear(pmd_off_k(addr)); + flush_tlb_kernel_range(__phys_to_virt(start), + __phys_to_virt(end)); + iotable_init(&map, 1); } } diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c index 8e0e52eb76b5..c447ec70e868 100644 --- a/arch/arm/mm/idmap.c +++ b/arch/arm/mm/idmap.c @@ -9,6 +9,11 @@ #include <asm/sections.h> #include <asm/system_info.h> +/* + * Note: accesses outside of the kernel image and the identity map area + * are not supported on any CPU using the idmap tables as its current + * page tables. + */ pgd_t *idmap_pgd; phys_addr_t (*arch_virt_to_idmap) (unsigned long x); @@ -25,6 +30,13 @@ static void idmap_add_pmd(pud_t *pud, unsigned long addr, unsigned long end, pr_warning("Failed to allocate identity pmd.\n"); return; } + /* + * Copy the original PMD to ensure that the PMD entries for + * the kernel image are preserved. + */ + if (!pud_none(*pud)) + memcpy(pmd, pmd_offset(pud, 0), + PTRS_PER_PMD * sizeof(pmd_t)); pud_populate(&init_mm, pud, pmd); pmd += pmd_index(addr); } else diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index ab14b79b03f0..6e3ba8d112a2 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1406,8 +1406,8 @@ void __init early_paging_init(const struct machine_desc *mdesc, return; /* remap kernel code and data */ - map_start = init_mm.start_code; - map_end = init_mm.brk; + map_start = init_mm.start_code & PMD_MASK; + map_end = ALIGN(init_mm.brk, PMD_SIZE); /* get a handle on things... */ pgd0 = pgd_offset_k(0); @@ -1442,7 +1442,7 @@ void __init early_paging_init(const struct machine_desc *mdesc, } /* remap pmds for kernel mapping */ - phys = __pa(map_start) & PMD_MASK; + phys = __pa(map_start); do { *pmdk++ = __pmd(phys | pmdprot); phys += PMD_SIZE; diff --git a/arch/arm/xen/grant-table.c b/arch/arm/xen/grant-table.c index 859a9bb002d5..91cf08ba1e95 100644 --- a/arch/arm/xen/grant-table.c +++ b/arch/arm/xen/grant-table.c @@ -51,3 +51,8 @@ int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes, { return -ENOSYS; } + +int arch_gnttab_init(unsigned long nr_shared, unsigned long nr_status) +{ + return 0; +} diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index a474de346be6..b0f9c9db9590 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1,15 +1,17 @@ config ARM64 def_bool y select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE - select ARCH_HAS_OPP select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_USE_CMPXCHG_LOCKREF + select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_WANT_OPTIONAL_GPIOLIB select ARCH_WANT_COMPAT_IPC_PARSE_VERSION select ARCH_WANT_FRAME_POINTERS select ARM_AMBA select ARM_ARCH_TIMER select ARM_GIC + select AUDIT_ARCH_COMPAT_GENERIC + select ARM_GIC_V3 select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS select COMMON_CLK @@ -28,10 +30,12 @@ config ARM64 select GENERIC_STRNLEN_USER select GENERIC_TIME_VSYSCALL select HARDIRQS_SW_RESEND + select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_KGDB select HAVE_ARCH_TRACEHOOK select HAVE_C_RECORDMCOUNT + select HAVE_CC_STACKPROTECTOR select HAVE_DEBUG_BUGVERBOSE select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_API_DEBUG @@ -62,6 +66,7 @@ config ARM64 select RTC_LIB select SPARSE_IRQ select SYSCTL_EXCEPTION_TRACE + select HAVE_CONTEXT_TRACKING help ARM 64-bit (AArch64) Linux support. @@ -154,14 +159,63 @@ endmenu menu "Kernel Features" +choice + prompt "Page size" + default ARM64_4K_PAGES + help + Page size (translation granule) configuration. + +config ARM64_4K_PAGES + bool "4KB" + help + This feature enables 4KB pages support. + config ARM64_64K_PAGES - bool "Enable 64KB pages support" + bool "64KB" help This feature enables 64KB pages support (4KB by default) allowing only two levels of page tables and faster TLB look-up. AArch32 emulation is not available when this feature is enabled. +endchoice + +choice + prompt "Virtual address space size" + default ARM64_VA_BITS_39 if ARM64_4K_PAGES + default ARM64_VA_BITS_42 if ARM64_64K_PAGES + help + Allows choosing one of multiple possible virtual address + space sizes. The level of translation table is determined by + a combination of page size and virtual address space size. + +config ARM64_VA_BITS_39 + bool "39-bit" + depends on ARM64_4K_PAGES + +config ARM64_VA_BITS_42 + bool "42-bit" + depends on ARM64_64K_PAGES + +config ARM64_VA_BITS_48 + bool "48-bit" + depends on BROKEN + +endchoice + +config ARM64_VA_BITS + int + default 39 if ARM64_VA_BITS_39 + default 42 if ARM64_VA_BITS_42 + default 48 if ARM64_VA_BITS_48 + +config ARM64_PGTABLE_LEVELS + int + default 2 if ARM64_64K_PAGES && ARM64_VA_BITS_42 + default 3 if ARM64_64K_PAGES && ARM64_VA_BITS_48 + default 3 if ARM64_4K_PAGES && ARM64_VA_BITS_39 + default 4 if ARM64_4K_PAGES && ARM64_VA_BITS_48 + config CPU_BIG_ENDIAN bool "Build big-endian kernel" help @@ -293,12 +347,18 @@ config CMDLINE_FORCE This is useful if you cannot or don't want to change the command-line options your boot loader passes to the kernel. +config EFI_STUB + bool + config EFI bool "UEFI runtime support" depends on OF && !CPU_BIG_ENDIAN select LIBFDT select UCS2_STRING select EFI_PARAMS_FROM_FDT + select EFI_RUNTIME_WRAPPERS + select EFI_STUB + select EFI_ARMSTUB default y help This option provides support for runtime services provided diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index 1c1b75629842..4ee8e90b7a45 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -28,4 +28,19 @@ config PID_IN_CONTEXTIDR instructions during context switch. Say Y here only if you are planning to use hardware trace tools with this kernel. +config ARM64_RANDOMIZE_TEXT_OFFSET + bool "Randomize TEXT_OFFSET at build time" + help + Say Y here if you want the image load offset (AKA TEXT_OFFSET) + of the kernel to be randomized at build-time. When selected, + this option will cause TEXT_OFFSET to be randomized upon any + build of the kernel, and the offset will be reflected in the + text_offset field of the resulting Image. This can be used to + fuzz-test bootloaders which respect text_offset. + + This option is intended for bootloader and/or kernel testing + only. Bootloaders must make no assumptions regarding the value + of TEXT_OFFSET and platforms must not require a specific + value. + endmenu diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 8185a913c5ed..57833546bf00 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -38,7 +38,11 @@ CHECKFLAGS += -D__aarch64__ head-y := arch/arm64/kernel/head.o # The byte offset of the kernel image in RAM from the start of RAM. +ifeq ($(CONFIG_ARM64_RANDOMIZE_TEXT_OFFSET), y) +TEXT_OFFSET := $(shell awk 'BEGIN {srand(); printf "0x%04x0\n", int(65535 * rand())}') +else TEXT_OFFSET := 0x00080000 +endif export TEXT_OFFSET GZFLAGS @@ -48,6 +52,7 @@ core-$(CONFIG_XEN) += arch/arm64/xen/ core-$(CONFIG_CRYPTO) += arch/arm64/crypto/ libs-y := arch/arm64/lib/ $(libs-y) libs-y += $(LIBGCC) +libs-$(CONFIG_EFI_STUB) += drivers/firmware/efi/libstub/ # Default target when executing plain make KBUILD_IMAGE := Image.gz diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 3421f316f5dc..1e52b741d806 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -52,8 +52,11 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set # CONFIG_WIRELESS is not set +CONFIG_NET_9P=y +CONFIG_NET_9P_VIRTIO=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y CONFIG_DMA_CMA=y CONFIG_BLK_DEV_LOOP=y CONFIG_VIRTIO_BLK=y @@ -65,6 +68,7 @@ CONFIG_PATA_PLATFORM=y CONFIG_PATA_OF_PLATFORM=y CONFIG_NETDEVICES=y CONFIG_TUN=y +CONFIG_VIRTIO_NET=y CONFIG_SMC91X=y CONFIG_SMSC911X=y # CONFIG_WLAN is not set @@ -76,6 +80,7 @@ CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_AMBA_PL011=y CONFIG_SERIAL_AMBA_PL011_CONSOLE=y CONFIG_SERIAL_OF_PLATFORM=y +CONFIG_VIRTIO_CONSOLE=y # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set CONFIG_REGULATOR=y @@ -90,6 +95,7 @@ CONFIG_USB_ISP1760_HCD=y CONFIG_USB_STORAGE=y CONFIG_MMC=y CONFIG_MMC_ARMMMCI=y +CONFIG_VIRTIO_BALLOON=y CONFIG_VIRTIO_MMIO=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_EXT2_FS=y @@ -107,6 +113,7 @@ CONFIG_HUGETLBFS=y # CONFIG_MISC_FILESYSTEMS is not set CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y +CONFIG_9P_FS=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_VIRTUALIZATION=y diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index 2070a56ecc46..a3f935fde975 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -35,4 +35,4 @@ AFLAGS_aes-neon.o := -DINTERLEAVE=4 CFLAGS_aes-glue-ce.o := -DUSE_V8_CRYPTO_EXTENSIONS $(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE - $(call if_changed_dep,cc_o_c) + $(call if_changed_rule,cc_o_c) diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 60f2f4c12256..79cd911ef88c 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -106,7 +106,7 @@ static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key_enc, rounds, blocks, first); - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; @@ -128,7 +128,7 @@ static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key_dec, rounds, blocks, first); - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; @@ -151,7 +151,7 @@ static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key_enc, rounds, blocks, walk.iv, first); - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; @@ -174,7 +174,7 @@ static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key_dec, rounds, blocks, walk.iv, first); - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; @@ -243,7 +243,7 @@ static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key1.key_enc, rounds, blocks, (u8 *)ctx->key2.key_enc, walk.iv, first); - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); @@ -267,7 +267,7 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key1.key_dec, rounds, blocks, (u8 *)ctx->key2.key_enc, walk.iv, first); - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index a5176cf32dad..f2defe1c380c 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -138,19 +138,10 @@ static inline void __flush_icache_all(void) #define flush_icache_page(vma,page) do { } while (0) /* - * flush_cache_vmap() is used when creating mappings (eg, via vmap, - * vmalloc, ioremap etc) in kernel space for pages. On non-VIPT - * caches, since the direct-mappings of these pages may contain cached - * data, we need to do a full cache flush to ensure that writebacks - * don't corrupt data placed into these pages via the new mappings. + * Not required on AArch64 (PIPT or VIPT non-aliasing D-cache). */ static inline void flush_cache_vmap(unsigned long start, unsigned long end) { - /* - * set_pte_at() called from vmap_pte_range() does not - * have a DSB after cleaning the cache line. - */ - dsb(ish); } static inline void flush_cache_vunmap(unsigned long start, unsigned long end) diff --git a/arch/arm64/include/asm/cachetype.h b/arch/arm64/include/asm/cachetype.h index 4b23e758d5e0..7a2e0762cb40 100644 --- a/arch/arm64/include/asm/cachetype.h +++ b/arch/arm64/include/asm/cachetype.h @@ -30,10 +30,14 @@ #ifndef __ASSEMBLY__ -static inline u32 icache_policy(void) -{ - return (read_cpuid_cachetype() >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK; -} +#include <linux/bitops.h> + +#define CTR_L1IP(ctr) (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK) + +#define ICACHEF_ALIASING BIT(0) +#define ICACHEF_AIVIVT BIT(1) + +extern unsigned long __icache_flags; /* * Whilst the D-side always behaves as PIPT on AArch64, aliasing is @@ -41,12 +45,12 @@ static inline u32 icache_policy(void) */ static inline int icache_is_aliasing(void) { - return icache_policy() != ICACHE_POLICY_PIPT; + return test_bit(ICACHEF_ALIASING, &__icache_flags); } static inline int icache_is_aivivt(void) { - return icache_policy() == ICACHE_POLICY_AIVIVT; + return test_bit(ICACHEF_AIVIVT, &__icache_flags); } static inline u32 cache_type_cwg(void) diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h new file mode 100644 index 000000000000..056443086019 --- /dev/null +++ b/arch/arm64/include/asm/cpu.h @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2014 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_CPU_H +#define __ASM_CPU_H + +#include <linux/cpu.h> +#include <linux/init.h> +#include <linux/percpu.h> + +/* + * Records attributes of an individual CPU. + */ +struct cpuinfo_arm64 { + struct cpu cpu; + u32 reg_ctr; + u32 reg_cntfrq; + u32 reg_dczid; + u32 reg_midr; + + u64 reg_id_aa64isar0; + u64 reg_id_aa64isar1; + u64 reg_id_aa64mmfr0; + u64 reg_id_aa64mmfr1; + u64 reg_id_aa64pfr0; + u64 reg_id_aa64pfr1; + + u32 reg_id_isar0; + u32 reg_id_isar1; + u32 reg_id_isar2; + u32 reg_id_isar3; + u32 reg_id_isar4; + u32 reg_id_isar5; + u32 reg_id_mmfr0; + u32 reg_id_mmfr1; + u32 reg_id_mmfr2; + u32 reg_id_mmfr3; + u32 reg_id_pfr0; + u32 reg_id_pfr1; +}; + +DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data); + +void cpuinfo_store_cpu(void); +void __init cpuinfo_store_boot_cpu(void); + +#endif /* __ASM_CPU_H */ diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 27f54a7cc81b..379d0b874328 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -18,6 +18,8 @@ #define INVALID_HWID ULONG_MAX +#define MPIDR_UP_BITMASK (0x1 << 30) +#define MPIDR_MT_BITMASK (0x1 << 24) #define MPIDR_HWID_BITMASK 0xff00ffffff #define MPIDR_LEVEL_BITS_SHIFT 3 @@ -36,15 +38,34 @@ __val; \ }) +#define MIDR_REVISION_MASK 0xf +#define MIDR_REVISION(midr) ((midr) & MIDR_REVISION_MASK) +#define MIDR_PARTNUM_SHIFT 4 +#define MIDR_PARTNUM_MASK (0xfff << MIDR_PARTNUM_SHIFT) +#define MIDR_PARTNUM(midr) \ + (((midr) & MIDR_PARTNUM_MASK) >> MIDR_PARTNUM_SHIFT) +#define MIDR_ARCHITECTURE_SHIFT 16 +#define MIDR_ARCHITECTURE_MASK (0xf << MIDR_ARCHITECTURE_SHIFT) +#define MIDR_ARCHITECTURE(midr) \ + (((midr) & MIDR_ARCHITECTURE_MASK) >> MIDR_ARCHITECTURE_SHIFT) +#define MIDR_VARIANT_SHIFT 20 +#define MIDR_VARIANT_MASK (0xf << MIDR_VARIANT_SHIFT) +#define MIDR_VARIANT(midr) \ + (((midr) & MIDR_VARIANT_MASK) >> MIDR_VARIANT_SHIFT) +#define MIDR_IMPLEMENTOR_SHIFT 24 +#define MIDR_IMPLEMENTOR_MASK (0xff << MIDR_IMPLEMENTOR_SHIFT) +#define MIDR_IMPLEMENTOR(midr) \ + (((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT) + #define ARM_CPU_IMP_ARM 0x41 #define ARM_CPU_IMP_APM 0x50 -#define ARM_CPU_PART_AEM_V8 0xD0F0 -#define ARM_CPU_PART_FOUNDATION 0xD000 -#define ARM_CPU_PART_CORTEX_A53 0xD030 -#define ARM_CPU_PART_CORTEX_A57 0xD070 +#define ARM_CPU_PART_AEM_V8 0xD0F +#define ARM_CPU_PART_FOUNDATION 0xD00 +#define ARM_CPU_PART_CORTEX_A57 0xD07 +#define ARM_CPU_PART_CORTEX_A53 0xD03 -#define APM_CPU_PART_POTENZA 0x0000 +#define APM_CPU_PART_POTENZA 0x000 #ifndef __ASSEMBLY__ @@ -65,12 +86,12 @@ static inline u64 __attribute_const__ read_cpuid_mpidr(void) static inline unsigned int __attribute_const__ read_cpuid_implementor(void) { - return (read_cpuid_id() & 0xFF000000) >> 24; + return MIDR_IMPLEMENTOR(read_cpuid_id()); } static inline unsigned int __attribute_const__ read_cpuid_part_number(void) { - return (read_cpuid_id() & 0xFFF0); + return MIDR_PARTNUM(read_cpuid_id()); } static inline u32 __attribute_const__ read_cpuid_cachetype(void) diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 5a46c4e7f539..a34fd3b12e2b 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -2,6 +2,7 @@ #define _ASM_EFI_H #include <asm/io.h> +#include <asm/neon.h> #ifdef CONFIG_EFI extern void efi_init(void); @@ -11,4 +12,36 @@ extern void efi_idmap_init(void); #define efi_idmap_init() #endif +#define efi_call_virt(f, ...) \ +({ \ + efi_##f##_t *__f = efi.systab->runtime->f; \ + efi_status_t __s; \ + \ + kernel_neon_begin(); \ + __s = __f(__VA_ARGS__); \ + kernel_neon_end(); \ + __s; \ +}) + +#define __efi_call_virt(f, ...) \ +({ \ + efi_##f##_t *__f = efi.systab->runtime->f; \ + \ + kernel_neon_begin(); \ + __f(__VA_ARGS__); \ + kernel_neon_end(); \ +}) + +/* arch specific definitions used by the stub code */ + +/* + * AArch64 requires the DTB to be 8-byte aligned in the first 512MiB from + * start of kernel and may not cross a 2MiB boundary. We set alignment to + * 2MiB so we know it won't cross a 2MiB boundary. + */ +#define EFI_FDT_ALIGN SZ_2M /* used by allocate_new_fdt_and_exit_boot() */ +#define MAX_FDT_OFFSET SZ_512M + +#define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__) + #endif /* _ASM_EFI_H */ diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index 768414d55e64..007618b8188c 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -40,6 +40,19 @@ str w\tmpnr, [\state, #16 * 2 + 4] .endm +.macro fpsimd_restore_fpcr state, tmp + /* + * Writes to fpcr may be self-synchronising, so avoid restoring + * the register if it hasn't changed. + */ + mrs \tmp, fpcr + cmp \tmp, \state + b.eq 9999f + msr fpcr, \state +9999: +.endm + +/* Clobbers \state */ .macro fpsimd_restore state, tmpnr ldp q0, q1, [\state, #16 * 0] ldp q2, q3, [\state, #16 * 2] @@ -60,7 +73,7 @@ ldr w\tmpnr, [\state, #16 * 2] msr fpsr, x\tmpnr ldr w\tmpnr, [\state, #16 * 2 + 4] - msr fpcr, x\tmpnr + fpsimd_restore_fpcr x\tmpnr, \state .endm .altmacro @@ -84,7 +97,7 @@ .macro fpsimd_restore_partial state, tmpnr1, tmpnr2 ldp w\tmpnr1, w\tmpnr2, [\state] msr fpsr, x\tmpnr1 - msr fpcr, x\tmpnr2 + fpsimd_restore_fpcr x\tmpnr2, x\tmpnr1 adr x\tmpnr1, 0f ldr w\tmpnr2, [\state, #8] add \state, \state, x\tmpnr2, lsl #4 diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 902eb708804a..ccc7087d3c4e 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -41,11 +41,7 @@ * The module space lives between the addresses given by TASK_SIZE * and PAGE_OFFSET - it must be within 128MB of the kernel text. */ -#ifdef CONFIG_ARM64_64K_PAGES -#define VA_BITS (42) -#else -#define VA_BITS (39) -#endif +#define VA_BITS (CONFIG_ARM64_VA_BITS) #define PAGE_OFFSET (UL(0xffffffffffffffff) << (VA_BITS - 1)) #define MODULES_END (PAGE_OFFSET) #define MODULES_VADDR (MODULES_END - SZ_64M) diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 46bf66628b6a..7a3f462133b0 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -31,14 +31,26 @@ /* We do define AT_SYSINFO_EHDR but don't use the gate mechanism */ #define __HAVE_ARCH_GATE_AREA 1 -#ifndef __ASSEMBLY__ - +/* + * The idmap and swapper page tables need some space reserved in the kernel + * image. Both require pgd, pud (4 levels only) and pmd tables to (section) + * map the kernel. With the 64K page configuration, swapper and idmap need to + * map to pte level. The swapper also maps the FDT (see __create_page_tables + * for more information). + */ #ifdef CONFIG_ARM64_64K_PAGES -#include <asm/pgtable-2level-types.h> +#define SWAPPER_PGTABLE_LEVELS (CONFIG_ARM64_PGTABLE_LEVELS) #else -#include <asm/pgtable-3level-types.h> +#define SWAPPER_PGTABLE_LEVELS (CONFIG_ARM64_PGTABLE_LEVELS - 1) #endif +#define SWAPPER_DIR_SIZE (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE) +#define IDMAP_DIR_SIZE (SWAPPER_DIR_SIZE) + +#ifndef __ASSEMBLY__ + +#include <asm/pgtable-types.h> + extern void __cpu_clear_user_page(void *p, unsigned long user); extern void __cpu_copy_user_page(void *to, const void *from, unsigned long user); diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index 9bea6e74a001..d5bed02073d6 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -26,7 +26,7 @@ #define check_pgt_cache() do { } while (0) -#ifndef CONFIG_ARM64_64K_PAGES +#if CONFIG_ARM64_PGTABLE_LEVELS > 2 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { @@ -44,7 +44,27 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) set_pud(pud, __pud(__pa(pmd) | PMD_TYPE_TABLE)); } -#endif /* CONFIG_ARM64_64K_PAGES */ +#endif /* CONFIG_ARM64_PGTABLE_LEVELS > 2 */ + +#if CONFIG_ARM64_PGTABLE_LEVELS > 3 + +static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + return (pud_t *)get_zeroed_page(GFP_KERNEL | __GFP_REPEAT); +} + +static inline void pud_free(struct mm_struct *mm, pud_t *pud) +{ + BUG_ON((unsigned long)pud & (PAGE_SIZE-1)); + free_page((unsigned long)pud); +} + +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) +{ + set_pgd(pgd, __pgd(__pa(pud) | PUD_TYPE_TABLE)); +} + +#endif /* CONFIG_ARM64_PGTABLE_LEVELS > 3 */ extern pgd_t *pgd_alloc(struct mm_struct *mm); extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); diff --git a/arch/arm64/include/asm/pgtable-2level-hwdef.h b/arch/arm64/include/asm/pgtable-2level-hwdef.h deleted file mode 100644 index 2593b490c56a..000000000000 --- a/arch/arm64/include/asm/pgtable-2level-hwdef.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ -#ifndef __ASM_PGTABLE_2LEVEL_HWDEF_H -#define __ASM_PGTABLE_2LEVEL_HWDEF_H - -/* - * With LPAE and 64KB pages, there are 2 levels of page tables. Each level has - * 8192 entries of 8 bytes each, occupying a 64KB page. Levels 0 and 1 are not - * used. The 2nd level table (PGD for Linux) can cover a range of 4TB, each - * entry representing 512MB. The user and kernel address spaces are limited to - * 4TB in the 64KB page configuration. - */ -#define PTRS_PER_PTE 8192 -#define PTRS_PER_PGD 8192 - -/* - * PGDIR_SHIFT determines the size a top-level page table entry can map. - */ -#define PGDIR_SHIFT 29 -#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) -#define PGDIR_MASK (~(PGDIR_SIZE-1)) - -/* - * section address mask and size definitions. - */ -#define SECTION_SHIFT 29 -#define SECTION_SIZE (_AC(1, UL) << SECTION_SHIFT) -#define SECTION_MASK (~(SECTION_SIZE-1)) - -#endif diff --git a/arch/arm64/include/asm/pgtable-2level-types.h b/arch/arm64/include/asm/pgtable-2level-types.h deleted file mode 100644 index 5f101e63dfc1..000000000000 --- a/arch/arm64/include/asm/pgtable-2level-types.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ -#ifndef __ASM_PGTABLE_2LEVEL_TYPES_H -#define __ASM_PGTABLE_2LEVEL_TYPES_H - -#include <asm/types.h> - -typedef u64 pteval_t; -typedef u64 pgdval_t; -typedef pgdval_t pmdval_t; - -#undef STRICT_MM_TYPECHECKS - -#ifdef STRICT_MM_TYPECHECKS - -/* - * These are used to make use of C type-checking.. - */ -typedef struct { pteval_t pte; } pte_t; -typedef struct { pgdval_t pgd; } pgd_t; -typedef struct { pteval_t pgprot; } pgprot_t; - -#define pte_val(x) ((x).pte) -#define pgd_val(x) ((x).pgd) -#define pgprot_val(x) ((x).pgprot) - -#define __pte(x) ((pte_t) { (x) } ) -#define __pgd(x) ((pgd_t) { (x) } ) -#define __pgprot(x) ((pgprot_t) { (x) } ) - -#else /* !STRICT_MM_TYPECHECKS */ - -typedef pteval_t pte_t; -typedef pgdval_t pgd_t; -typedef pteval_t pgprot_t; - -#define pte_val(x) (x) -#define pgd_val(x) (x) -#define pgprot_val(x) (x) - -#define __pte(x) (x) -#define __pgd(x) (x) -#define __pgprot(x) (x) - -#endif /* STRICT_MM_TYPECHECKS */ - -#include <asm-generic/pgtable-nopmd.h> - -#endif /* __ASM_PGTABLE_2LEVEL_TYPES_H */ diff --git a/arch/arm64/include/asm/pgtable-3level-hwdef.h b/arch/arm64/include/asm/pgtable-3level-hwdef.h deleted file mode 100644 index 3dbf941d7767..000000000000 --- a/arch/arm64/include/asm/pgtable-3level-hwdef.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ -#ifndef __ASM_PGTABLE_3LEVEL_HWDEF_H -#define __ASM_PGTABLE_3LEVEL_HWDEF_H - -/* - * With LPAE and 4KB pages, there are 3 levels of page tables. Each level has - * 512 entries of 8 bytes each, occupying a 4K page. The first level table - * covers a range of 512GB, each entry representing 1GB. The user and kernel - * address spaces are limited to 512GB each. - */ -#define PTRS_PER_PTE 512 -#define PTRS_PER_PMD 512 -#define PTRS_PER_PGD 512 - -/* - * PGDIR_SHIFT determines the size a top-level page table entry can map. - */ -#define PGDIR_SHIFT 30 -#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) -#define PGDIR_MASK (~(PGDIR_SIZE-1)) - -/* - * PMD_SHIFT determines the size a middle-level page table entry can map. - */ -#define PMD_SHIFT 21 -#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) -#define PMD_MASK (~(PMD_SIZE-1)) - -/* - * section address mask and size definitions. - */ -#define SECTION_SHIFT 21 -#define SECTION_SIZE (_AC(1, UL) << SECTION_SHIFT) -#define SECTION_MASK (~(SECTION_SIZE-1)) - -#endif diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 955e8c5f0afb..88174e0bfafe 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -16,18 +16,50 @@ #ifndef __ASM_PGTABLE_HWDEF_H #define __ASM_PGTABLE_HWDEF_H -#ifdef CONFIG_ARM64_64K_PAGES -#include <asm/pgtable-2level-hwdef.h> -#else -#include <asm/pgtable-3level-hwdef.h> +#define PTRS_PER_PTE (1 << (PAGE_SHIFT - 3)) + +/* + * PMD_SHIFT determines the size a level 2 page table entry can map. + */ +#if CONFIG_ARM64_PGTABLE_LEVELS > 2 +#define PMD_SHIFT ((PAGE_SHIFT - 3) * 2 + 3) +#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE-1)) +#define PTRS_PER_PMD PTRS_PER_PTE +#endif + +/* + * PUD_SHIFT determines the size a level 1 page table entry can map. + */ +#if CONFIG_ARM64_PGTABLE_LEVELS > 3 +#define PUD_SHIFT ((PAGE_SHIFT - 3) * 3 + 3) +#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE-1)) +#define PTRS_PER_PUD PTRS_PER_PTE #endif /* + * PGDIR_SHIFT determines the size a top-level page table entry can map + * (depending on the configuration, this level can be 0, 1 or 2). + */ +#define PGDIR_SHIFT ((PAGE_SHIFT - 3) * CONFIG_ARM64_PGTABLE_LEVELS + 3) +#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE-1)) +#define PTRS_PER_PGD (1 << (VA_BITS - PGDIR_SHIFT)) + +/* + * Section address mask and size definitions. + */ +#define SECTION_SHIFT PMD_SHIFT +#define SECTION_SIZE (_AC(1, UL) << SECTION_SHIFT) +#define SECTION_MASK (~(SECTION_SIZE-1)) + +/* * Hardware page table definitions. * * Level 1 descriptor (PUD). */ - +#define PUD_TYPE_TABLE (_AT(pudval_t, 3) << 0) #define PUD_TABLE_BIT (_AT(pgdval_t, 1) << 1) #define PUD_TYPE_MASK (_AT(pgdval_t, 3) << 0) #define PUD_TYPE_SECT (_AT(pgdval_t, 1) << 0) diff --git a/arch/arm64/include/asm/pgtable-3level-types.h b/arch/arm64/include/asm/pgtable-types.h index 4e94424938a4..ca9df80af896 100644 --- a/arch/arm64/include/asm/pgtable-3level-types.h +++ b/arch/arm64/include/asm/pgtable-types.h @@ -1,7 +1,10 @@ /* - * Copyright (C) 2012 ARM Ltd. + * Page table types definitions. * - * This program is free software; you can redistribute it and/or modify + * Copyright (C) 2014 ARM Ltd. + * Author: Catalin Marinas <catalin.marinas@arm.com> + * + * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. * @@ -13,13 +16,15 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#ifndef __ASM_PGTABLE_3LEVEL_TYPES_H -#define __ASM_PGTABLE_3LEVEL_TYPES_H + +#ifndef __ASM_PGTABLE_TYPES_H +#define __ASM_PGTABLE_TYPES_H #include <asm/types.h> typedef u64 pteval_t; typedef u64 pmdval_t; +typedef u64 pudval_t; typedef u64 pgdval_t; #undef STRICT_MM_TYPECHECKS @@ -30,39 +35,61 @@ typedef u64 pgdval_t; * These are used to make use of C type-checking.. */ typedef struct { pteval_t pte; } pte_t; +#define pte_val(x) ((x).pte) +#define __pte(x) ((pte_t) { (x) } ) + +#if CONFIG_ARM64_PGTABLE_LEVELS > 2 typedef struct { pmdval_t pmd; } pmd_t; -typedef struct { pgdval_t pgd; } pgd_t; -typedef struct { pteval_t pgprot; } pgprot_t; +#define pmd_val(x) ((x).pmd) +#define __pmd(x) ((pmd_t) { (x) } ) +#endif -#define pte_val(x) ((x).pte) -#define pmd_val(x) ((x).pmd) -#define pgd_val(x) ((x).pgd) -#define pgprot_val(x) ((x).pgprot) +#if CONFIG_ARM64_PGTABLE_LEVELS > 3 +typedef struct { pudval_t pud; } pud_t; +#define pud_val(x) ((x).pud) +#define __pud(x) ((pud_t) { (x) } ) +#endif -#define __pte(x) ((pte_t) { (x) } ) -#define __pmd(x) ((pmd_t) { (x) } ) +typedef struct { pgdval_t pgd; } pgd_t; +#define pgd_val(x) ((x).pgd) #define __pgd(x) ((pgd_t) { (x) } ) -#define __pgprot(x) ((pgprot_t) { (x) } ) + +typedef struct { pteval_t pgprot; } pgprot_t; +#define pgprot_val(x) ((x).pgprot) +#define __pgprot(x) ((pgprot_t) { (x) } ) #else /* !STRICT_MM_TYPECHECKS */ typedef pteval_t pte_t; -typedef pmdval_t pmd_t; -typedef pgdval_t pgd_t; -typedef pteval_t pgprot_t; - #define pte_val(x) (x) -#define pmd_val(x) (x) -#define pgd_val(x) (x) -#define pgprot_val(x) (x) - #define __pte(x) (x) + +#if CONFIG_ARM64_PGTABLE_LEVELS > 2 +typedef pmdval_t pmd_t; +#define pmd_val(x) (x) #define __pmd(x) (x) +#endif + +#if CONFIG_ARM64_PGTABLE_LEVELS > 3 +typedef pudval_t pud_t; +#define pud_val(x) (x) +#define __pud(x) (x) +#endif + +typedef pgdval_t pgd_t; +#define pgd_val(x) (x) #define __pgd(x) (x) + +typedef pteval_t pgprot_t; +#define pgprot_val(x) (x) #define __pgprot(x) (x) -#endif /* STRICT_MM_TYPECHECKS */ +#endif /* STRICT_MM_TYPECHECKS */ +#if CONFIG_ARM64_PGTABLE_LEVELS == 2 +#include <asm-generic/pgtable-nopmd.h> +#elif CONFIG_ARM64_PGTABLE_LEVELS == 3 #include <asm-generic/pgtable-nopud.h> +#endif -#endif /* __ASM_PGTABLE_3LEVEL_TYPES_H */ +#endif /* __ASM_PGTABLE_TYPES_H */ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index e0ccceb317d9..ffe1ba0506d1 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -33,9 +33,16 @@ /* * VMALLOC and SPARSEMEM_VMEMMAP ranges. + * + * VMEMAP_SIZE: allows the whole VA space to be covered by a struct page array + * (rounded up to PUD_SIZE). + * VMALLOC_START: beginning of the kernel VA space + * VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space, + * fixed mappings and modules */ +#define VMEMMAP_SIZE ALIGN((1UL << (VA_BITS - PAGE_SHIFT)) * sizeof(struct page), PUD_SIZE) #define VMALLOC_START (UL(0xffffffffffffffff) << VA_BITS) -#define VMALLOC_END (PAGE_OFFSET - UL(0x400000000) - SZ_64K) +#define VMALLOC_END (PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K) #define vmemmap ((struct page *)(VMALLOC_END + SZ_64K)) @@ -44,14 +51,9 @@ #ifndef __ASSEMBLY__ extern void __pte_error(const char *file, int line, unsigned long val); extern void __pmd_error(const char *file, int line, unsigned long val); +extern void __pud_error(const char *file, int line, unsigned long val); extern void __pgd_error(const char *file, int line, unsigned long val); -#define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte)) -#ifndef CONFIG_ARM64_64K_PAGES -#define pmd_ERROR(pmd) __pmd_error(__FILE__, __LINE__, pmd_val(pmd)) -#endif -#define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd)) - #ifdef CONFIG_SMP #define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) #define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) @@ -112,6 +114,8 @@ extern void __pgd_error(const char *file, int line, unsigned long val); extern struct page *empty_zero_page; #define ZERO_PAGE(vaddr) (empty_zero_page) +#define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte)) + #define pte_pfn(pte) ((pte_val(pte) & PHYS_MASK) >> PAGE_SHIFT) #define pfn_pte(pfn,prot) (__pte(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))) @@ -119,6 +123,10 @@ extern struct page *empty_zero_page; #define pte_none(pte) (!pte_val(pte)) #define pte_clear(mm,addr,ptep) set_pte(ptep, __pte(0)) #define pte_page(pte) (pfn_to_page(pte_pfn(pte))) + +/* Find an entry in the third-level page table. */ +#define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) + #define pte_offset_kernel(dir,addr) (pmd_page_vaddr(*(dir)) + pte_index(addr)) #define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) @@ -138,6 +146,8 @@ extern struct page *empty_zero_page; #define pte_valid_user(pte) \ ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) +#define pte_valid_not_user(pte) \ + ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID) static inline pte_t pte_wrprotect(pte_t pte) { @@ -184,6 +194,15 @@ static inline pte_t pte_mkspecial(pte_t pte) static inline void set_pte(pte_t *ptep, pte_t pte) { *ptep = pte; + + /* + * Only if the new pte is valid and kernel, otherwise TLB maintenance + * or update_mmu_cache() have the necessary barriers. + */ + if (pte_valid_not_user(pte)) { + dsb(ishst); + isb(); + } } extern void __sync_icache_dcache(pte_t pteval, unsigned long addr); @@ -303,6 +322,7 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) { *pmdp = pmd; dsb(ishst); + isb(); } static inline void pmd_clear(pmd_t *pmdp) @@ -323,7 +343,9 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd) */ #define mk_pte(page,prot) pfn_pte(page_to_pfn(page),prot) -#ifndef CONFIG_ARM64_64K_PAGES +#if CONFIG_ARM64_PGTABLE_LEVELS > 2 + +#define pmd_ERROR(pmd) __pmd_error(__FILE__, __LINE__, pmd_val(pmd)) #define pud_none(pud) (!pud_val(pud)) #define pud_bad(pud) (!(pud_val(pud) & 2)) @@ -333,6 +355,7 @@ static inline void set_pud(pud_t *pudp, pud_t pud) { *pudp = pud; dsb(ishst); + isb(); } static inline void pud_clear(pud_t *pudp) @@ -345,7 +368,51 @@ static inline pmd_t *pud_page_vaddr(pud_t pud) return __va(pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK); } -#endif /* CONFIG_ARM64_64K_PAGES */ +/* Find an entry in the second-level page table. */ +#define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) + +static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) +{ + return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(addr); +} + +#endif /* CONFIG_ARM64_PGTABLE_LEVELS > 2 */ + +#if CONFIG_ARM64_PGTABLE_LEVELS > 3 + +#define pud_ERROR(pud) __pud_error(__FILE__, __LINE__, pud_val(pud)) + +#define pgd_none(pgd) (!pgd_val(pgd)) +#define pgd_bad(pgd) (!(pgd_val(pgd) & 2)) +#define pgd_present(pgd) (pgd_val(pgd)) + +static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) +{ + *pgdp = pgd; + dsb(ishst); +} + +static inline void pgd_clear(pgd_t *pgdp) +{ + set_pgd(pgdp, __pgd(0)); +} + +static inline pud_t *pgd_page_vaddr(pgd_t pgd) +{ + return __va(pgd_val(pgd) & PHYS_MASK & (s32)PAGE_MASK); +} + +/* Find an entry in the frst-level page table. */ +#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) + +static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr) +{ + return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(addr); +} + +#endif /* CONFIG_ARM64_PGTABLE_LEVELS > 3 */ + +#define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd)) /* to find an entry in a page-table-directory */ #define pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) @@ -355,18 +422,6 @@ static inline pmd_t *pud_page_vaddr(pud_t pud) /* to find an entry in a kernel page-table-directory */ #define pgd_offset_k(addr) pgd_offset(&init_mm, addr) -/* Find an entry in the second-level page table.. */ -#ifndef CONFIG_ARM64_64K_PAGES -#define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) -static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) -{ - return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(addr); -} -#endif - -/* Find an entry in the third-level page table.. */ -#define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) - static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY | @@ -383,9 +438,6 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; -#define SWAPPER_DIR_SIZE (3 * PAGE_SIZE) -#define IDMAP_DIR_SIZE (2 * PAGE_SIZE) - /* * Encode and decode a swap entry: * bits 0-1: present (must be zero) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 34de2a8f7d93..3df21feeabdd 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -129,6 +129,7 @@ extern void release_thread(struct task_struct *); unsigned long get_wchan(struct task_struct *p); #define cpu_relax() barrier() +#define cpu_relax_lowlatency() cpu_relax() /* Thread switching */ extern struct task_struct *cpu_switch_to(struct task_struct *prev, @@ -137,8 +138,8 @@ extern struct task_struct *cpu_switch_to(struct task_struct *prev, #define task_pt_regs(p) \ ((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1) -#define KSTK_EIP(tsk) task_pt_regs(tsk)->pc -#define KSTK_ESP(tsk) task_pt_regs(tsk)->sp +#define KSTK_EIP(tsk) ((unsigned long)task_pt_regs(tsk)->pc) +#define KSTK_ESP(tsk) ((unsigned long)task_pt_regs(tsk)->sp) /* * Prefetching support diff --git a/arch/arm64/include/asm/stackprotector.h b/arch/arm64/include/asm/stackprotector.h new file mode 100644 index 000000000000..fe5e287dc56b --- /dev/null +++ b/arch/arm64/include/asm/stackprotector.h @@ -0,0 +1,38 @@ +/* + * GCC stack protector support. + * + * Stack protector works by putting predefined pattern at the start of + * the stack frame and verifying that it hasn't been overwritten when + * returning from the function. The pattern is called stack canary + * and gcc expects it to be defined by a global variable called + * "__stack_chk_guard" on ARM. This unfortunately means that on SMP + * we cannot have a different canary value per task. + */ + +#ifndef __ASM_STACKPROTECTOR_H +#define __ASM_STACKPROTECTOR_H + +#include <linux/random.h> +#include <linux/version.h> + +extern unsigned long __stack_chk_guard; + +/* + * Initialize the stackprotector canary value. + * + * NOTE: this must only be called from functions that never return, + * and it must always be inlined. + */ +static __always_inline void boot_init_stack_canary(void) +{ + unsigned long canary; + + /* Try to get a semi random initial value. */ + get_random_bytes(&canary, sizeof(canary)); + canary ^= LINUX_VERSION_CODE; + + current->stack_canary = canary; + __stack_chk_guard = current->stack_canary; +} + +#endif /* _ASM_STACKPROTECTOR_H */ diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h index 383771eb0b87..709a574468f0 100644 --- a/arch/arm64/include/asm/syscall.h +++ b/arch/arm64/include/asm/syscall.h @@ -16,6 +16,8 @@ #ifndef __ASM_SYSCALL_H #define __ASM_SYSCALL_H +#include <uapi/linux/audit.h> +#include <linux/compat.h> #include <linux/err.h> extern const void *sys_call_table[]; @@ -105,4 +107,16 @@ static inline void syscall_set_arguments(struct task_struct *task, memcpy(®s->regs[i], args, n * sizeof(args[0])); } +/* + * We don't care about endianness (__AUDIT_ARCH_LE bit) here because + * AArch64 has the same system calls both on little- and big- endian. + */ +static inline int syscall_get_arch(void) +{ + if (is_compat_task()) + return AUDIT_ARCH_ARM; + + return AUDIT_ARCH_AARCH64; +} + #endif /* __ASM_SYSCALL_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h new file mode 100644 index 000000000000..5c89df0acbcb --- /dev/null +++ b/arch/arm64/include/asm/sysreg.h @@ -0,0 +1,60 @@ +/* + * Macros for accessing system registers with older binutils. + * + * Copyright (C) 2014 ARM Ltd. + * Author: Catalin Marinas <catalin.marinas@arm.com> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __ASM_SYSREG_H +#define __ASM_SYSREG_H + +#define sys_reg(op0, op1, crn, crm, op2) \ + ((((op0)-2)<<19)|((op1)<<16)|((crn)<<12)|((crm)<<8)|((op2)<<5)) + +#ifdef __ASSEMBLY__ + + .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30 + .equ __reg_num_x\num, \num + .endr + .equ __reg_num_xzr, 31 + + .macro mrs_s, rt, sreg + .inst 0xd5300000|(\sreg)|(__reg_num_\rt) + .endm + + .macro msr_s, sreg, rt + .inst 0xd5100000|(\sreg)|(__reg_num_\rt) + .endm + +#else + +asm( +" .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" +" .equ __reg_num_x\\num, \\num\n" +" .endr\n" +" .equ __reg_num_xzr, 31\n" +"\n" +" .macro mrs_s, rt, sreg\n" +" .inst 0xd5300000|(\\sreg)|(__reg_num_\\rt)\n" +" .endm\n" +"\n" +" .macro msr_s, sreg, rt\n" +" .inst 0xd5100000|(\\sreg)|(__reg_num_\\rt)\n" +" .endm\n" +); + +#endif + +#endif /* __ASM_SYSREG_H */ diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index e40b6d06d515..45108d802f5e 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -103,6 +103,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_NEED_RESCHED 1 #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ #define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */ +#define TIF_NOHZ 7 #define TIF_SYSCALL_TRACE 8 #define TIF_SYSCALL_AUDIT 9 #define TIF_SYSCALL_TRACEPOINT 10 @@ -118,6 +119,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE) +#define _TIF_NOHZ (1 << TIF_NOHZ) #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) @@ -128,7 +130,8 @@ static inline struct thread_info *current_thread_info(void) _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE) #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ - _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP) + _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ + _TIF_NOHZ) #endif /* __KERNEL__ */ #endif /* __ASM_THREAD_INFO_H */ diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index 80e2c08900d6..62731ef9749a 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -91,7 +91,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, tlb_remove_page(tlb, pte); } -#ifndef CONFIG_ARM64_64K_PAGES +#if CONFIG_ARM64_PGTABLE_LEVELS > 2 static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr) { @@ -100,6 +100,15 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, } #endif +#if CONFIG_ARM64_PGTABLE_LEVELS > 3 +static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp, + unsigned long addr) +{ + tlb_add_flush(tlb, addr); + tlb_remove_page(tlb, virt_to_page(pudp)); +} +#endif + static inline void __tlb_remove_pmd_tlb_entry(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long address) { diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index b9349c4513ea..73f0ce570fb3 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -98,8 +98,8 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, dsb(ish); } -static inline void flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) +static inline void __flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) { unsigned long asid = (unsigned long)ASID(vma->vm_mm) << 48; unsigned long addr; @@ -112,7 +112,7 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, dsb(ish); } -static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) +static inline void __flush_tlb_kernel_range(unsigned long start, unsigned long end) { unsigned long addr; start >>= 12; @@ -122,6 +122,30 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) asm("tlbi vaae1is, %0" : : "r"(addr)); dsb(ish); + isb(); +} + +/* + * This is meant to avoid soft lock-ups on large TLB flushing ranges and not + * necessarily a performance improvement. + */ +#define MAX_TLB_RANGE (1024UL << PAGE_SHIFT) + +static inline void flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + if ((end - start) <= MAX_TLB_RANGE) + __flush_tlb_range(vma, start, end); + else + flush_tlb_mm(vma->vm_mm); +} + +static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + if ((end - start) <= MAX_TLB_RANGE) + __flush_tlb_kernel_range(start, end); + else + flush_tlb_all(); } /* @@ -131,8 +155,8 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { /* - * set_pte() does not have a DSB, so make sure that the page table - * write is visible. + * set_pte() does not have a DSB for user mappings, so make sure that + * the page table write is visible. */ dsb(ishst); } diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index e5f47df00c24..4bc95d27e063 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -26,7 +26,24 @@ #define __ARCH_WANT_COMPAT_SYS_SENDFILE #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_SYS_VFORK + +/* + * Compat syscall numbers used by the AArch64 kernel. + */ +#define __NR_compat_restart_syscall 0 +#define __NR_compat_sigreturn 119 +#define __NR_compat_rt_sigreturn 173 + +/* + * The following SVCs are ARM private. + */ +#define __ARM_NR_COMPAT_BASE 0x0f0000 +#define __ARM_NR_compat_cacheflush (__ARM_NR_COMPAT_BASE+2) +#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE+5) + +#define __NR_compat_syscalls 383 #endif + #define __ARCH_WANT_SYS_CLONE #include <uapi/asm/unistd.h> diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index c8d8fc17bd5a..e242600c4046 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -21,403 +21,769 @@ #define __SYSCALL(x, y) #endif -__SYSCALL(0, sys_restart_syscall) -__SYSCALL(1, sys_exit) -__SYSCALL(2, sys_fork) -__SYSCALL(3, sys_read) -__SYSCALL(4, sys_write) -__SYSCALL(5, compat_sys_open) -__SYSCALL(6, sys_close) -__SYSCALL(7, sys_ni_syscall) /* 7 was sys_waitpid */ -__SYSCALL(8, sys_creat) -__SYSCALL(9, sys_link) -__SYSCALL(10, sys_unlink) -__SYSCALL(11, compat_sys_execve) -__SYSCALL(12, sys_chdir) -__SYSCALL(13, sys_ni_syscall) /* 13 was sys_time */ -__SYSCALL(14, sys_mknod) -__SYSCALL(15, sys_chmod) -__SYSCALL(16, sys_lchown16) -__SYSCALL(17, sys_ni_syscall) /* 17 was sys_break */ -__SYSCALL(18, sys_ni_syscall) /* 18 was sys_stat */ -__SYSCALL(19, compat_sys_lseek) -__SYSCALL(20, sys_getpid) -__SYSCALL(21, compat_sys_mount) -__SYSCALL(22, sys_ni_syscall) /* 22 was sys_umount */ -__SYSCALL(23, sys_setuid16) -__SYSCALL(24, sys_getuid16) -__SYSCALL(25, sys_ni_syscall) /* 25 was sys_stime */ -__SYSCALL(26, compat_sys_ptrace) -__SYSCALL(27, sys_ni_syscall) /* 27 was sys_alarm */ -__SYSCALL(28, sys_ni_syscall) /* 28 was sys_fstat */ -__SYSCALL(29, sys_pause) -__SYSCALL(30, sys_ni_syscall) /* 30 was sys_utime */ -__SYSCALL(31, sys_ni_syscall) /* 31 was sys_stty */ -__SYSCALL(32, sys_ni_syscall) /* 32 was sys_gtty */ -__SYSCALL(33, sys_access) -__SYSCALL(34, sys_nice) -__SYSCALL(35, sys_ni_syscall) /* 35 was sys_ftime */ -__SYSCALL(36, sys_sync) -__SYSCALL(37, sys_kill) -__SYSCALL(38, sys_rename) -__SYSCALL(39, sys_mkdir) -__SYSCALL(40, sys_rmdir) -__SYSCALL(41, sys_dup) -__SYSCALL(42, sys_pipe) -__SYSCALL(43, compat_sys_times) -__SYSCALL(44, sys_ni_syscall) /* 44 was sys_prof */ -__SYSCALL(45, sys_brk) -__SYSCALL(46, sys_setgid16) -__SYSCALL(47, sys_getgid16) -__SYSCALL(48, sys_ni_syscall) /* 48 was sys_signal */ -__SYSCALL(49, sys_geteuid16) -__SYSCALL(50, sys_getegid16) -__SYSCALL(51, sys_acct) -__SYSCALL(52, sys_umount) -__SYSCALL(53, sys_ni_syscall) /* 53 was sys_lock */ -__SYSCALL(54, compat_sys_ioctl) -__SYSCALL(55, compat_sys_fcntl) -__SYSCALL(56, sys_ni_syscall) /* 56 was sys_mpx */ -__SYSCALL(57, sys_setpgid) -__SYSCALL(58, sys_ni_syscall) /* 58 was sys_ulimit */ -__SYSCALL(59, sys_ni_syscall) /* 59 was sys_olduname */ -__SYSCALL(60, sys_umask) -__SYSCALL(61, sys_chroot) -__SYSCALL(62, compat_sys_ustat) -__SYSCALL(63, sys_dup2) -__SYSCALL(64, sys_getppid) -__SYSCALL(65, sys_getpgrp) -__SYSCALL(66, sys_setsid) -__SYSCALL(67, compat_sys_sigaction) -__SYSCALL(68, sys_ni_syscall) /* 68 was sys_sgetmask */ -__SYSCALL(69, sys_ni_syscall) /* 69 was sys_ssetmask */ -__SYSCALL(70, sys_setreuid16) -__SYSCALL(71, sys_setregid16) -__SYSCALL(72, sys_sigsuspend) -__SYSCALL(73, compat_sys_sigpending) -__SYSCALL(74, sys_sethostname) -__SYSCALL(75, compat_sys_setrlimit) -__SYSCALL(76, sys_ni_syscall) /* 76 was compat_sys_getrlimit */ -__SYSCALL(77, compat_sys_getrusage) -__SYSCALL(78, compat_sys_gettimeofday) -__SYSCALL(79, compat_sys_settimeofday) -__SYSCALL(80, sys_getgroups16) -__SYSCALL(81, sys_setgroups16) -__SYSCALL(82, sys_ni_syscall) /* 82 was compat_sys_select */ -__SYSCALL(83, sys_symlink) -__SYSCALL(84, sys_ni_syscall) /* 84 was sys_lstat */ -__SYSCALL(85, sys_readlink) -__SYSCALL(86, sys_uselib) -__SYSCALL(87, sys_swapon) -__SYSCALL(88, sys_reboot) -__SYSCALL(89, sys_ni_syscall) /* 89 was sys_readdir */ -__SYSCALL(90, sys_ni_syscall) /* 90 was sys_mmap */ -__SYSCALL(91, sys_munmap) -__SYSCALL(92, compat_sys_truncate) -__SYSCALL(93, compat_sys_ftruncate) -__SYSCALL(94, sys_fchmod) -__SYSCALL(95, sys_fchown16) -__SYSCALL(96, sys_getpriority) -__SYSCALL(97, sys_setpriority) -__SYSCALL(98, sys_ni_syscall) /* 98 was sys_profil */ -__SYSCALL(99, compat_sys_statfs) -__SYSCALL(100, compat_sys_fstatfs) -__SYSCALL(101, sys_ni_syscall) /* 101 was sys_ioperm */ -__SYSCALL(102, sys_ni_syscall) /* 102 was sys_socketcall */ -__SYSCALL(103, sys_syslog) -__SYSCALL(104, compat_sys_setitimer) -__SYSCALL(105, compat_sys_getitimer) -__SYSCALL(106, compat_sys_newstat) -__SYSCALL(107, compat_sys_newlstat) -__SYSCALL(108, compat_sys_newfstat) -__SYSCALL(109, sys_ni_syscall) /* 109 was sys_uname */ -__SYSCALL(110, sys_ni_syscall) /* 110 was sys_iopl */ -__SYSCALL(111, sys_vhangup) -__SYSCALL(112, sys_ni_syscall) /* 112 was sys_idle */ -__SYSCALL(113, sys_ni_syscall) /* 113 was sys_syscall */ -__SYSCALL(114, compat_sys_wait4) -__SYSCALL(115, sys_swapoff) -__SYSCALL(116, compat_sys_sysinfo) -__SYSCALL(117, sys_ni_syscall) /* 117 was sys_ipc */ -__SYSCALL(118, sys_fsync) -__SYSCALL(119, compat_sys_sigreturn_wrapper) -__SYSCALL(120, sys_clone) -__SYSCALL(121, sys_setdomainname) -__SYSCALL(122, sys_newuname) -__SYSCALL(123, sys_ni_syscall) /* 123 was sys_modify_ldt */ -__SYSCALL(124, compat_sys_adjtimex) -__SYSCALL(125, sys_mprotect) -__SYSCALL(126, compat_sys_sigprocmask) -__SYSCALL(127, sys_ni_syscall) /* 127 was sys_create_module */ -__SYSCALL(128, sys_init_module) -__SYSCALL(129, sys_delete_module) -__SYSCALL(130, sys_ni_syscall) /* 130 was sys_get_kernel_syms */ -__SYSCALL(131, sys_quotactl) -__SYSCALL(132, sys_getpgid) -__SYSCALL(133, sys_fchdir) -__SYSCALL(134, sys_bdflush) -__SYSCALL(135, sys_sysfs) -__SYSCALL(136, sys_personality) -__SYSCALL(137, sys_ni_syscall) /* 137 was sys_afs_syscall */ -__SYSCALL(138, sys_setfsuid16) -__SYSCALL(139, sys_setfsgid16) -__SYSCALL(140, sys_llseek) -__SYSCALL(141, compat_sys_getdents) -__SYSCALL(142, compat_sys_select) -__SYSCALL(143, sys_flock) -__SYSCALL(144, sys_msync) -__SYSCALL(145, compat_sys_readv) -__SYSCALL(146, compat_sys_writev) -__SYSCALL(147, sys_getsid) -__SYSCALL(148, sys_fdatasync) -__SYSCALL(149, compat_sys_sysctl) -__SYSCALL(150, sys_mlock) -__SYSCALL(151, sys_munlock) -__SYSCALL(152, sys_mlockall) -__SYSCALL(153, sys_munlockall) -__SYSCALL(154, sys_sched_setparam) -__SYSCALL(155, sys_sched_getparam) -__SYSCALL(156, sys_sched_setscheduler) -__SYSCALL(157, sys_sched_getscheduler) -__SYSCALL(158, sys_sched_yield) -__SYSCALL(159, sys_sched_get_priority_max) -__SYSCALL(160, sys_sched_get_priority_min) -__SYSCALL(161, compat_sys_sched_rr_get_interval) -__SYSCALL(162, compat_sys_nanosleep) -__SYSCALL(163, sys_mremap) -__SYSCALL(164, sys_setresuid16) -__SYSCALL(165, sys_getresuid16) -__SYSCALL(166, sys_ni_syscall) /* 166 was sys_vm86 */ -__SYSCALL(167, sys_ni_syscall) /* 167 was sys_query_module */ -__SYSCALL(168, sys_poll) -__SYSCALL(169, sys_ni_syscall) -__SYSCALL(170, sys_setresgid16) -__SYSCALL(171, sys_getresgid16) -__SYSCALL(172, sys_prctl) -__SYSCALL(173, compat_sys_rt_sigreturn_wrapper) -__SYSCALL(174, compat_sys_rt_sigaction) -__SYSCALL(175, compat_sys_rt_sigprocmask) -__SYSCALL(176, compat_sys_rt_sigpending) -__SYSCALL(177, compat_sys_rt_sigtimedwait) -__SYSCALL(178, compat_sys_rt_sigqueueinfo) -__SYSCALL(179, compat_sys_rt_sigsuspend) -__SYSCALL(180, compat_sys_pread64_wrapper) -__SYSCALL(181, compat_sys_pwrite64_wrapper) -__SYSCALL(182, sys_chown16) -__SYSCALL(183, sys_getcwd) -__SYSCALL(184, sys_capget) -__SYSCALL(185, sys_capset) -__SYSCALL(186, compat_sys_sigaltstack) -__SYSCALL(187, compat_sys_sendfile) -__SYSCALL(188, sys_ni_syscall) /* 188 reserved */ -__SYSCALL(189, sys_ni_syscall) /* 189 reserved */ -__SYSCALL(190, sys_vfork) -__SYSCALL(191, compat_sys_getrlimit) /* SuS compliant getrlimit */ -__SYSCALL(192, sys_mmap_pgoff) -__SYSCALL(193, compat_sys_truncate64_wrapper) -__SYSCALL(194, compat_sys_ftruncate64_wrapper) -__SYSCALL(195, sys_stat64) -__SYSCALL(196, sys_lstat64) -__SYSCALL(197, sys_fstat64) -__SYSCALL(198, sys_lchown) -__SYSCALL(199, sys_getuid) -__SYSCALL(200, sys_getgid) -__SYSCALL(201, sys_geteuid) -__SYSCALL(202, sys_getegid) -__SYSCALL(203, sys_setreuid) -__SYSCALL(204, sys_setregid) -__SYSCALL(205, sys_getgroups) -__SYSCALL(206, sys_setgroups) -__SYSCALL(207, sys_fchown) -__SYSCALL(208, sys_setresuid) -__SYSCALL(209, sys_getresuid) -__SYSCALL(210, sys_setresgid) -__SYSCALL(211, sys_getresgid) -__SYSCALL(212, sys_chown) -__SYSCALL(213, sys_setuid) -__SYSCALL(214, sys_setgid) -__SYSCALL(215, sys_setfsuid) -__SYSCALL(216, sys_setfsgid) -__SYSCALL(217, compat_sys_getdents64) -__SYSCALL(218, sys_pivot_root) -__SYSCALL(219, sys_mincore) -__SYSCALL(220, sys_madvise) -__SYSCALL(221, compat_sys_fcntl64) -__SYSCALL(222, sys_ni_syscall) /* 222 for tux */ -__SYSCALL(223, sys_ni_syscall) /* 223 is unused */ -__SYSCALL(224, sys_gettid) -__SYSCALL(225, compat_sys_readahead_wrapper) -__SYSCALL(226, sys_setxattr) -__SYSCALL(227, sys_lsetxattr) -__SYSCALL(228, sys_fsetxattr) -__SYSCALL(229, sys_getxattr) -__SYSCALL(230, sys_lgetxattr) -__SYSCALL(231, sys_fgetxattr) -__SYSCALL(232, sys_listxattr) -__SYSCALL(233, sys_llistxattr) -__SYSCALL(234, sys_flistxattr) -__SYSCALL(235, sys_removexattr) -__SYSCALL(236, sys_lremovexattr) -__SYSCALL(237, sys_fremovexattr) -__SYSCALL(238, sys_tkill) -__SYSCALL(239, sys_sendfile64) -__SYSCALL(240, compat_sys_futex) -__SYSCALL(241, compat_sys_sched_setaffinity) -__SYSCALL(242, compat_sys_sched_getaffinity) -__SYSCALL(243, compat_sys_io_setup) -__SYSCALL(244, sys_io_destroy) -__SYSCALL(245, compat_sys_io_getevents) -__SYSCALL(246, compat_sys_io_submit) -__SYSCALL(247, sys_io_cancel) -__SYSCALL(248, sys_exit_group) -__SYSCALL(249, compat_sys_lookup_dcookie) -__SYSCALL(250, sys_epoll_create) -__SYSCALL(251, sys_epoll_ctl) -__SYSCALL(252, sys_epoll_wait) -__SYSCALL(253, sys_remap_file_pages) -__SYSCALL(254, sys_ni_syscall) /* 254 for set_thread_area */ -__SYSCALL(255, sys_ni_syscall) /* 255 for get_thread_area */ -__SYSCALL(256, sys_set_tid_address) -__SYSCALL(257, compat_sys_timer_create) -__SYSCALL(258, compat_sys_timer_settime) -__SYSCALL(259, compat_sys_timer_gettime) -__SYSCALL(260, sys_timer_getoverrun) -__SYSCALL(261, sys_timer_delete) -__SYSCALL(262, compat_sys_clock_settime) -__SYSCALL(263, compat_sys_clock_gettime) -__SYSCALL(264, compat_sys_clock_getres) -__SYSCALL(265, compat_sys_clock_nanosleep) -__SYSCALL(266, compat_sys_statfs64_wrapper) -__SYSCALL(267, compat_sys_fstatfs64_wrapper) -__SYSCALL(268, sys_tgkill) -__SYSCALL(269, compat_sys_utimes) -__SYSCALL(270, compat_sys_fadvise64_64_wrapper) -__SYSCALL(271, sys_pciconfig_iobase) -__SYSCALL(272, sys_pciconfig_read) -__SYSCALL(273, sys_pciconfig_write) -__SYSCALL(274, compat_sys_mq_open) -__SYSCALL(275, sys_mq_unlink) -__SYSCALL(276, compat_sys_mq_timedsend) -__SYSCALL(277, compat_sys_mq_timedreceive) -__SYSCALL(278, compat_sys_mq_notify) -__SYSCALL(279, compat_sys_mq_getsetattr) -__SYSCALL(280, compat_sys_waitid) -__SYSCALL(281, sys_socket) -__SYSCALL(282, sys_bind) -__SYSCALL(283, sys_connect) -__SYSCALL(284, sys_listen) -__SYSCALL(285, sys_accept) -__SYSCALL(286, sys_getsockname) -__SYSCALL(287, sys_getpeername) -__SYSCALL(288, sys_socketpair) -__SYSCALL(289, sys_send) -__SYSCALL(290, sys_sendto) -__SYSCALL(291, compat_sys_recv) -__SYSCALL(292, compat_sys_recvfrom) -__SYSCALL(293, sys_shutdown) -__SYSCALL(294, compat_sys_setsockopt) -__SYSCALL(295, compat_sys_getsockopt) -__SYSCALL(296, compat_sys_sendmsg) -__SYSCALL(297, compat_sys_recvmsg) -__SYSCALL(298, sys_semop) -__SYSCALL(299, sys_semget) -__SYSCALL(300, compat_sys_semctl) -__SYSCALL(301, compat_sys_msgsnd) -__SYSCALL(302, compat_sys_msgrcv) -__SYSCALL(303, sys_msgget) -__SYSCALL(304, compat_sys_msgctl) -__SYSCALL(305, compat_sys_shmat) -__SYSCALL(306, sys_shmdt) -__SYSCALL(307, sys_shmget) -__SYSCALL(308, compat_sys_shmctl) -__SYSCALL(309, sys_add_key) -__SYSCALL(310, sys_request_key) -__SYSCALL(311, compat_sys_keyctl) -__SYSCALL(312, compat_sys_semtimedop) -__SYSCALL(313, sys_ni_syscall) -__SYSCALL(314, sys_ioprio_set) -__SYSCALL(315, sys_ioprio_get) -__SYSCALL(316, sys_inotify_init) -__SYSCALL(317, sys_inotify_add_watch) -__SYSCALL(318, sys_inotify_rm_watch) -__SYSCALL(319, compat_sys_mbind) -__SYSCALL(320, compat_sys_get_mempolicy) -__SYSCALL(321, compat_sys_set_mempolicy) -__SYSCALL(322, compat_sys_openat) -__SYSCALL(323, sys_mkdirat) -__SYSCALL(324, sys_mknodat) -__SYSCALL(325, sys_fchownat) -__SYSCALL(326, compat_sys_futimesat) -__SYSCALL(327, sys_fstatat64) -__SYSCALL(328, sys_unlinkat) -__SYSCALL(329, sys_renameat) -__SYSCALL(330, sys_linkat) -__SYSCALL(331, sys_symlinkat) -__SYSCALL(332, sys_readlinkat) -__SYSCALL(333, sys_fchmodat) -__SYSCALL(334, sys_faccessat) -__SYSCALL(335, compat_sys_pselect6) -__SYSCALL(336, compat_sys_ppoll) -__SYSCALL(337, sys_unshare) -__SYSCALL(338, compat_sys_set_robust_list) -__SYSCALL(339, compat_sys_get_robust_list) -__SYSCALL(340, sys_splice) -__SYSCALL(341, compat_sys_sync_file_range2_wrapper) -__SYSCALL(342, sys_tee) -__SYSCALL(343, compat_sys_vmsplice) -__SYSCALL(344, compat_sys_move_pages) -__SYSCALL(345, sys_getcpu) -__SYSCALL(346, compat_sys_epoll_pwait) -__SYSCALL(347, compat_sys_kexec_load) -__SYSCALL(348, compat_sys_utimensat) -__SYSCALL(349, compat_sys_signalfd) -__SYSCALL(350, sys_timerfd_create) -__SYSCALL(351, sys_eventfd) -__SYSCALL(352, compat_sys_fallocate_wrapper) -__SYSCALL(353, compat_sys_timerfd_settime) -__SYSCALL(354, compat_sys_timerfd_gettime) -__SYSCALL(355, compat_sys_signalfd4) -__SYSCALL(356, sys_eventfd2) -__SYSCALL(357, sys_epoll_create1) -__SYSCALL(358, sys_dup3) -__SYSCALL(359, sys_pipe2) -__SYSCALL(360, sys_inotify_init1) -__SYSCALL(361, compat_sys_preadv) -__SYSCALL(362, compat_sys_pwritev) -__SYSCALL(363, compat_sys_rt_tgsigqueueinfo) -__SYSCALL(364, sys_perf_event_open) -__SYSCALL(365, compat_sys_recvmmsg) -__SYSCALL(366, sys_accept4) -__SYSCALL(367, sys_fanotify_init) -__SYSCALL(368, compat_sys_fanotify_mark) -__SYSCALL(369, sys_prlimit64) -__SYSCALL(370, sys_name_to_handle_at) -__SYSCALL(371, compat_sys_open_by_handle_at) -__SYSCALL(372, compat_sys_clock_adjtime) -__SYSCALL(373, sys_syncfs) -__SYSCALL(374, compat_sys_sendmmsg) -__SYSCALL(375, sys_setns) -__SYSCALL(376, compat_sys_process_vm_readv) -__SYSCALL(377, compat_sys_process_vm_writev) -__SYSCALL(378, sys_kcmp) -__SYSCALL(379, sys_finit_module) -__SYSCALL(380, sys_sched_setattr) -__SYSCALL(381, sys_sched_getattr) -__SYSCALL(382, sys_renameat2) - -#define __NR_compat_syscalls 383 - -/* - * Compat syscall numbers used by the AArch64 kernel. - */ -#define __NR_compat_restart_syscall 0 -#define __NR_compat_sigreturn 119 -#define __NR_compat_rt_sigreturn 173 - - -/* - * The following SVCs are ARM private. - */ -#define __ARM_NR_COMPAT_BASE 0x0f0000 -#define __ARM_NR_compat_cacheflush (__ARM_NR_COMPAT_BASE+2) -#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE+5) +#define __NR_restart_syscall 0 +__SYSCALL(__NR_restart_syscall, sys_restart_syscall) +#define __NR_exit 1 +__SYSCALL(__NR_exit, sys_exit) +#define __NR_fork 2 +__SYSCALL(__NR_fork, sys_fork) +#define __NR_read 3 +__SYSCALL(__NR_read, sys_read) +#define __NR_write 4 +__SYSCALL(__NR_write, sys_write) +#define __NR_open 5 +__SYSCALL(__NR_open, compat_sys_open) +#define __NR_close 6 +__SYSCALL(__NR_close, sys_close) + /* 7 was sys_waitpid */ +__SYSCALL(7, sys_ni_syscall) +#define __NR_creat 8 +__SYSCALL(__NR_creat, sys_creat) +#define __NR_link 9 +__SYSCALL(__NR_link, sys_link) +#define __NR_unlink 10 +__SYSCALL(__NR_unlink, sys_unlink) +#define __NR_execve 11 +__SYSCALL(__NR_execve, compat_sys_execve) +#define __NR_chdir 12 +__SYSCALL(__NR_chdir, sys_chdir) + /* 13 was sys_time */ +__SYSCALL(13, sys_ni_syscall) +#define __NR_mknod 14 +__SYSCALL(__NR_mknod, sys_mknod) +#define __NR_chmod 15 +__SYSCALL(__NR_chmod, sys_chmod) +#define __NR_lchown 16 +__SYSCALL(__NR_lchown, sys_lchown16) + /* 17 was sys_break */ +__SYSCALL(17, sys_ni_syscall) + /* 18 was sys_stat */ +__SYSCALL(18, sys_ni_syscall) +#define __NR_lseek 19 +__SYSCALL(__NR_lseek, compat_sys_lseek) +#define __NR_getpid 20 +__SYSCALL(__NR_getpid, sys_getpid) +#define __NR_mount 21 +__SYSCALL(__NR_mount, compat_sys_mount) + /* 22 was sys_umount */ +__SYSCALL(22, sys_ni_syscall) +#define __NR_setuid 23 +__SYSCALL(__NR_setuid, sys_setuid16) +#define __NR_getuid 24 +__SYSCALL(__NR_getuid, sys_getuid16) + /* 25 was sys_stime */ +__SYSCALL(25, sys_ni_syscall) +#define __NR_ptrace 26 +__SYSCALL(__NR_ptrace, compat_sys_ptrace) + /* 27 was sys_alarm */ +__SYSCALL(27, sys_ni_syscall) + /* 28 was sys_fstat */ +__SYSCALL(28, sys_ni_syscall) +#define __NR_pause 29 +__SYSCALL(__NR_pause, sys_pause) + /* 30 was sys_utime */ +__SYSCALL(30, sys_ni_syscall) + /* 31 was sys_stty */ +__SYSCALL(31, sys_ni_syscall) + /* 32 was sys_gtty */ +__SYSCALL(32, sys_ni_syscall) +#define __NR_access 33 +__SYSCALL(__NR_access, sys_access) +#define __NR_nice 34 +__SYSCALL(__NR_nice, sys_nice) + /* 35 was sys_ftime */ +__SYSCALL(35, sys_ni_syscall) +#define __NR_sync 36 +__SYSCALL(__NR_sync, sys_sync) +#define __NR_kill 37 +__SYSCALL(__NR_kill, sys_kill) +#define __NR_rename 38 +__SYSCALL(__NR_rename, sys_rename) +#define __NR_mkdir 39 +__SYSCALL(__NR_mkdir, sys_mkdir) +#define __NR_rmdir 40 +__SYSCALL(__NR_rmdir, sys_rmdir) +#define __NR_dup 41 +__SYSCALL(__NR_dup, sys_dup) +#define __NR_pipe 42 +__SYSCALL(__NR_pipe, sys_pipe) +#define __NR_times 43 +__SYSCALL(__NR_times, compat_sys_times) + /* 44 was sys_prof */ +__SYSCALL(44, sys_ni_syscall) +#define __NR_brk 45 +__SYSCALL(__NR_brk, sys_brk) +#define __NR_setgid 46 +__SYSCALL(__NR_setgid, sys_setgid16) +#define __NR_getgid 47 +__SYSCALL(__NR_getgid, sys_getgid16) + /* 48 was sys_signal */ +__SYSCALL(48, sys_ni_syscall) +#define __NR_geteuid 49 +__SYSCALL(__NR_geteuid, sys_geteuid16) +#define __NR_getegid 50 +__SYSCALL(__NR_getegid, sys_getegid16) +#define __NR_acct 51 +__SYSCALL(__NR_acct, sys_acct) +#define __NR_umount2 52 +__SYSCALL(__NR_umount2, sys_umount) + /* 53 was sys_lock */ +__SYSCALL(53, sys_ni_syscall) +#define __NR_ioctl 54 +__SYSCALL(__NR_ioctl, compat_sys_ioctl) +#define __NR_fcntl 55 +__SYSCALL(__NR_fcntl, compat_sys_fcntl) + /* 56 was sys_mpx */ +__SYSCALL(56, sys_ni_syscall) +#define __NR_setpgid 57 +__SYSCALL(__NR_setpgid, sys_setpgid) + /* 58 was sys_ulimit */ +__SYSCALL(58, sys_ni_syscall) + /* 59 was sys_olduname */ +__SYSCALL(59, sys_ni_syscall) +#define __NR_umask 60 +__SYSCALL(__NR_umask, sys_umask) +#define __NR_chroot 61 +__SYSCALL(__NR_chroot, sys_chroot) +#define __NR_ustat 62 +__SYSCALL(__NR_ustat, compat_sys_ustat) +#define __NR_dup2 63 +__SYSCALL(__NR_dup2, sys_dup2) +#define __NR_getppid 64 +__SYSCALL(__NR_getppid, sys_getppid) +#define __NR_getpgrp 65 +__SYSCALL(__NR_getpgrp, sys_getpgrp) +#define __NR_setsid 66 +__SYSCALL(__NR_setsid, sys_setsid) +#define __NR_sigaction 67 +__SYSCALL(__NR_sigaction, compat_sys_sigaction) + /* 68 was sys_sgetmask */ +__SYSCALL(68, sys_ni_syscall) + /* 69 was sys_ssetmask */ +__SYSCALL(69, sys_ni_syscall) +#define __NR_setreuid 70 +__SYSCALL(__NR_setreuid, sys_setreuid16) +#define __NR_setregid 71 +__SYSCALL(__NR_setregid, sys_setregid16) +#define __NR_sigsuspend 72 +__SYSCALL(__NR_sigsuspend, sys_sigsuspend) +#define __NR_sigpending 73 +__SYSCALL(__NR_sigpending, compat_sys_sigpending) +#define __NR_sethostname 74 +__SYSCALL(__NR_sethostname, sys_sethostname) +#define __NR_setrlimit 75 +__SYSCALL(__NR_setrlimit, compat_sys_setrlimit) + /* 76 was compat_sys_getrlimit */ +__SYSCALL(76, sys_ni_syscall) +#define __NR_getrusage 77 +__SYSCALL(__NR_getrusage, compat_sys_getrusage) +#define __NR_gettimeofday 78 +__SYSCALL(__NR_gettimeofday, compat_sys_gettimeofday) +#define __NR_settimeofday 79 +__SYSCALL(__NR_settimeofday, compat_sys_settimeofday) +#define __NR_getgroups 80 +__SYSCALL(__NR_getgroups, sys_getgroups16) +#define __NR_setgroups 81 +__SYSCALL(__NR_setgroups, sys_setgroups16) + /* 82 was compat_sys_select */ +__SYSCALL(82, sys_ni_syscall) +#define __NR_symlink 83 +__SYSCALL(__NR_symlink, sys_symlink) + /* 84 was sys_lstat */ +__SYSCALL(84, sys_ni_syscall) +#define __NR_readlink 85 +__SYSCALL(__NR_readlink, sys_readlink) +#define __NR_uselib 86 +__SYSCALL(__NR_uselib, sys_uselib) +#define __NR_swapon 87 +__SYSCALL(__NR_swapon, sys_swapon) +#define __NR_reboot 88 +__SYSCALL(__NR_reboot, sys_reboot) + /* 89 was sys_readdir */ +__SYSCALL(89, sys_ni_syscall) + /* 90 was sys_mmap */ +__SYSCALL(90, sys_ni_syscall) +#define __NR_munmap 91 +__SYSCALL(__NR_munmap, sys_munmap) +#define __NR_truncate 92 +__SYSCALL(__NR_truncate, compat_sys_truncate) +#define __NR_ftruncate 93 +__SYSCALL(__NR_ftruncate, compat_sys_ftruncate) +#define __NR_fchmod 94 +__SYSCALL(__NR_fchmod, sys_fchmod) +#define __NR_fchown 95 +__SYSCALL(__NR_fchown, sys_fchown16) +#define __NR_getpriority 96 +__SYSCALL(__NR_getpriority, sys_getpriority) +#define __NR_setpriority 97 +__SYSCALL(__NR_setpriority, sys_setpriority) + /* 98 was sys_profil */ +__SYSCALL(98, sys_ni_syscall) +#define __NR_statfs 99 +__SYSCALL(__NR_statfs, compat_sys_statfs) +#define __NR_fstatfs 100 +__SYSCALL(__NR_fstatfs, compat_sys_fstatfs) + /* 101 was sys_ioperm */ +__SYSCALL(101, sys_ni_syscall) + /* 102 was sys_socketcall */ +__SYSCALL(102, sys_ni_syscall) +#define __NR_syslog 103 +__SYSCALL(__NR_syslog, sys_syslog) +#define __NR_setitimer 104 +__SYSCALL(__NR_setitimer, compat_sys_setitimer) +#define __NR_getitimer 105 +__SYSCALL(__NR_getitimer, compat_sys_getitimer) +#define __NR_stat 106 +__SYSCALL(__NR_stat, compat_sys_newstat) +#define __NR_lstat 107 +__SYSCALL(__NR_lstat, compat_sys_newlstat) +#define __NR_fstat 108 +__SYSCALL(__NR_fstat, compat_sys_newfstat) + /* 109 was sys_uname */ +__SYSCALL(109, sys_ni_syscall) + /* 110 was sys_iopl */ +__SYSCALL(110, sys_ni_syscall) +#define __NR_vhangup 111 +__SYSCALL(__NR_vhangup, sys_vhangup) + /* 112 was sys_idle */ +__SYSCALL(112, sys_ni_syscall) + /* 113 was sys_syscall */ +__SYSCALL(113, sys_ni_syscall) +#define __NR_wait4 114 +__SYSCALL(__NR_wait4, compat_sys_wait4) +#define __NR_swapoff 115 +__SYSCALL(__NR_swapoff, sys_swapoff) +#define __NR_sysinfo 116 +__SYSCALL(__NR_sysinfo, compat_sys_sysinfo) + /* 117 was sys_ipc */ +__SYSCALL(117, sys_ni_syscall) +#define __NR_fsync 118 +__SYSCALL(__NR_fsync, sys_fsync) +#define __NR_sigreturn 119 +__SYSCALL(__NR_sigreturn, compat_sys_sigreturn_wrapper) +#define __NR_clone 120 +__SYSCALL(__NR_clone, sys_clone) +#define __NR_setdomainname 121 +__SYSCALL(__NR_setdomainname, sys_setdomainname) +#define __NR_uname 122 +__SYSCALL(__NR_uname, sys_newuname) + /* 123 was sys_modify_ldt */ +__SYSCALL(123, sys_ni_syscall) +#define __NR_adjtimex 124 +__SYSCALL(__NR_adjtimex, compat_sys_adjtimex) +#define __NR_mprotect 125 +__SYSCALL(__NR_mprotect, sys_mprotect) +#define __NR_sigprocmask 126 +__SYSCALL(__NR_sigprocmask, compat_sys_sigprocmask) + /* 127 was sys_create_module */ +__SYSCALL(127, sys_ni_syscall) +#define __NR_init_module 128 +__SYSCALL(__NR_init_module, sys_init_module) +#define __NR_delete_module 129 +__SYSCALL(__NR_delete_module, sys_delete_module) + /* 130 was sys_get_kernel_syms */ +__SYSCALL(130, sys_ni_syscall) +#define __NR_quotactl 131 +__SYSCALL(__NR_quotactl, sys_quotactl) +#define __NR_getpgid 132 +__SYSCALL(__NR_getpgid, sys_getpgid) +#define __NR_fchdir 133 +__SYSCALL(__NR_fchdir, sys_fchdir) +#define __NR_bdflush 134 +__SYSCALL(__NR_bdflush, sys_bdflush) +#define __NR_sysfs 135 +__SYSCALL(__NR_sysfs, sys_sysfs) +#define __NR_personality 136 +__SYSCALL(__NR_personality, sys_personality) + /* 137 was sys_afs_syscall */ +__SYSCALL(137, sys_ni_syscall) +#define __NR_setfsuid 138 +__SYSCALL(__NR_setfsuid, sys_setfsuid16) +#define __NR_setfsgid 139 +__SYSCALL(__NR_setfsgid, sys_setfsgid16) +#define __NR__llseek 140 +__SYSCALL(__NR__llseek, sys_llseek) +#define __NR_getdents 141 +__SYSCALL(__NR_getdents, compat_sys_getdents) +#define __NR__newselect 142 +__SYSCALL(__NR__newselect, compat_sys_select) +#define __NR_flock 143 +__SYSCALL(__NR_flock, sys_flock) +#define __NR_msync 144 +__SYSCALL(__NR_msync, sys_msync) +#define __NR_readv 145 +__SYSCALL(__NR_readv, compat_sys_readv) +#define __NR_writev 146 +__SYSCALL(__NR_writev, compat_sys_writev) +#define __NR_getsid 147 +__SYSCALL(__NR_getsid, sys_getsid) +#define __NR_fdatasync 148 +__SYSCALL(__NR_fdatasync, sys_fdatasync) +#define __NR__sysctl 149 +__SYSCALL(__NR__sysctl, compat_sys_sysctl) +#define __NR_mlock 150 +__SYSCALL(__NR_mlock, sys_mlock) +#define __NR_munlock 151 +__SYSCALL(__NR_munlock, sys_munlock) +#define __NR_mlockall 152 +__SYSCALL(__NR_mlockall, sys_mlockall) +#define __NR_munlockall 153 +__SYSCALL(__NR_munlockall, sys_munlockall) +#define __NR_sched_setparam 154 +__SYSCALL(__NR_sched_setparam, sys_sched_setparam) +#define __NR_sched_getparam 155 +__SYSCALL(__NR_sched_getparam, sys_sched_getparam) +#define __NR_sched_setscheduler 156 +__SYSCALL(__NR_sched_setscheduler, sys_sched_setscheduler) +#define __NR_sched_getscheduler 157 +__SYSCALL(__NR_sched_getscheduler, sys_sched_getscheduler) +#define __NR_sched_yield 158 +__SYSCALL(__NR_sched_yield, sys_sched_yield) +#define __NR_sched_get_priority_max 159 +__SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max) +#define __NR_sched_get_priority_min 160 +__SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min) +#define __NR_sched_rr_get_interval 161 +__SYSCALL(__NR_sched_rr_get_interval, compat_sys_sched_rr_get_interval) +#define __NR_nanosleep 162 +__SYSCALL(__NR_nanosleep, compat_sys_nanosleep) +#define __NR_mremap 163 +__SYSCALL(__NR_mremap, sys_mremap) +#define __NR_setresuid 164 +__SYSCALL(__NR_setresuid, sys_setresuid16) +#define __NR_getresuid 165 +__SYSCALL(__NR_getresuid, sys_getresuid16) + /* 166 was sys_vm86 */ +__SYSCALL(166, sys_ni_syscall) + /* 167 was sys_query_module */ +__SYSCALL(167, sys_ni_syscall) +#define __NR_poll 168 +__SYSCALL(__NR_poll, sys_poll) +#define __NR_nfsservctl 169 +__SYSCALL(__NR_nfsservctl, sys_ni_syscall) +#define __NR_setresgid 170 +__SYSCALL(__NR_setresgid, sys_setresgid16) +#define __NR_getresgid 171 +__SYSCALL(__NR_getresgid, sys_getresgid16) +#define __NR_prctl 172 +__SYSCALL(__NR_prctl, sys_prctl) +#define __NR_rt_sigreturn 173 +__SYSCALL(__NR_rt_sigreturn, compat_sys_rt_sigreturn_wrapper) +#define __NR_rt_sigaction 174 +__SYSCALL(__NR_rt_sigaction, compat_sys_rt_sigaction) +#define __NR_rt_sigprocmask 175 +__SYSCALL(__NR_rt_sigprocmask, compat_sys_rt_sigprocmask) +#define __NR_rt_sigpending 176 +__SYSCALL(__NR_rt_sigpending, compat_sys_rt_sigpending) +#define __NR_rt_sigtimedwait 177 +__SYSCALL(__NR_rt_sigtimedwait, compat_sys_rt_sigtimedwait) +#define __NR_rt_sigqueueinfo 178 +__SYSCALL(__NR_rt_sigqueueinfo, compat_sys_rt_sigqueueinfo) +#define __NR_rt_sigsuspend 179 +__SYSCALL(__NR_rt_sigsuspend, compat_sys_rt_sigsuspend) +#define __NR_pread64 180 +__SYSCALL(__NR_pread64, compat_sys_pread64_wrapper) +#define __NR_pwrite64 181 +__SYSCALL(__NR_pwrite64, compat_sys_pwrite64_wrapper) +#define __NR_chown 182 +__SYSCALL(__NR_chown, sys_chown16) +#define __NR_getcwd 183 +__SYSCALL(__NR_getcwd, sys_getcwd) +#define __NR_capget 184 +__SYSCALL(__NR_capget, sys_capget) +#define __NR_capset 185 +__SYSCALL(__NR_capset, sys_capset) +#define __NR_sigaltstack 186 +__SYSCALL(__NR_sigaltstack, compat_sys_sigaltstack) +#define __NR_sendfile 187 +__SYSCALL(__NR_sendfile, compat_sys_sendfile) + /* 188 reserved */ +__SYSCALL(188, sys_ni_syscall) + /* 189 reserved */ +__SYSCALL(189, sys_ni_syscall) +#define __NR_vfork 190 +__SYSCALL(__NR_vfork, sys_vfork) +#define __NR_ugetrlimit 191 /* SuS compliant getrlimit */ +__SYSCALL(__NR_ugetrlimit, compat_sys_getrlimit) /* SuS compliant getrlimit */ +#define __NR_mmap2 192 +__SYSCALL(__NR_mmap2, sys_mmap_pgoff) +#define __NR_truncate64 193 +__SYSCALL(__NR_truncate64, compat_sys_truncate64_wrapper) +#define __NR_ftruncate64 194 +__SYSCALL(__NR_ftruncate64, compat_sys_ftruncate64_wrapper) +#define __NR_stat64 195 +__SYSCALL(__NR_stat64, sys_stat64) +#define __NR_lstat64 196 +__SYSCALL(__NR_lstat64, sys_lstat64) +#define __NR_fstat64 197 +__SYSCALL(__NR_fstat64, sys_fstat64) +#define __NR_lchown32 198 +__SYSCALL(__NR_lchown32, sys_lchown) +#define __NR_getuid32 199 +__SYSCALL(__NR_getuid32, sys_getuid) +#define __NR_getgid32 200 +__SYSCALL(__NR_getgid32, sys_getgid) +#define __NR_geteuid32 201 +__SYSCALL(__NR_geteuid32, sys_geteuid) +#define __NR_getegid32 202 +__SYSCALL(__NR_getegid32, sys_getegid) +#define __NR_setreuid32 203 +__SYSCALL(__NR_setreuid32, sys_setreuid) +#define __NR_setregid32 204 +__SYSCALL(__NR_setregid32, sys_setregid) +#define __NR_getgroups32 205 +__SYSCALL(__NR_getgroups32, sys_getgroups) +#define __NR_setgroups32 206 +__SYSCALL(__NR_setgroups32, sys_setgroups) +#define __NR_fchown32 207 +__SYSCALL(__NR_fchown32, sys_fchown) +#define __NR_setresuid32 208 +__SYSCALL(__NR_setresuid32, sys_setresuid) +#define __NR_getresuid32 209 +__SYSCALL(__NR_getresuid32, sys_getresuid) +#define __NR_setresgid32 210 +__SYSCALL(__NR_setresgid32, sys_setresgid) +#define __NR_getresgid32 211 +__SYSCALL(__NR_getresgid32, sys_getresgid) +#define __NR_chown32 212 +__SYSCALL(__NR_chown32, sys_chown) +#define __NR_setuid32 213 +__SYSCALL(__NR_setuid32, sys_setuid) +#define __NR_setgid32 214 +__SYSCALL(__NR_setgid32, sys_setgid) +#define __NR_setfsuid32 215 +__SYSCALL(__NR_setfsuid32, sys_setfsuid) +#define __NR_setfsgid32 216 +__SYSCALL(__NR_setfsgid32, sys_setfsgid) +#define __NR_getdents64 217 +__SYSCALL(__NR_getdents64, compat_sys_getdents64) +#define __NR_pivot_root 218 +__SYSCALL(__NR_pivot_root, sys_pivot_root) +#define __NR_mincore 219 +__SYSCALL(__NR_mincore, sys_mincore) +#define __NR_madvise 220 +__SYSCALL(__NR_madvise, sys_madvise) +#define __NR_fcntl64 221 +__SYSCALL(__NR_fcntl64, compat_sys_fcntl64) + /* 222 for tux */ +__SYSCALL(222, sys_ni_syscall) + /* 223 is unused */ +__SYSCALL(223, sys_ni_syscall) +#define __NR_gettid 224 +__SYSCALL(__NR_gettid, sys_gettid) +#define __NR_readahead 225 +__SYSCALL(__NR_readahead, compat_sys_readahead_wrapper) +#define __NR_setxattr 226 +__SYSCALL(__NR_setxattr, sys_setxattr) +#define __NR_lsetxattr 227 +__SYSCALL(__NR_lsetxattr, sys_lsetxattr) +#define __NR_fsetxattr 228 +__SYSCALL(__NR_fsetxattr, sys_fsetxattr) +#define __NR_getxattr 229 +__SYSCALL(__NR_getxattr, sys_getxattr) +#define __NR_lgetxattr 230 +__SYSCALL(__NR_lgetxattr, sys_lgetxattr) +#define __NR_fgetxattr 231 +__SYSCALL(__NR_fgetxattr, sys_fgetxattr) +#define __NR_listxattr 232 +__SYSCALL(__NR_listxattr, sys_listxattr) +#define __NR_llistxattr 233 +__SYSCALL(__NR_llistxattr, sys_llistxattr) +#define __NR_flistxattr 234 +__SYSCALL(__NR_flistxattr, sys_flistxattr) +#define __NR_removexattr 235 +__SYSCALL(__NR_removexattr, sys_removexattr) +#define __NR_lremovexattr 236 +__SYSCALL(__NR_lremovexattr, sys_lremovexattr) +#define __NR_fremovexattr 237 +__SYSCALL(__NR_fremovexattr, sys_fremovexattr) +#define __NR_tkill 238 +__SYSCALL(__NR_tkill, sys_tkill) +#define __NR_sendfile64 239 +__SYSCALL(__NR_sendfile64, sys_sendfile64) +#define __NR_futex 240 +__SYSCALL(__NR_futex, compat_sys_futex) +#define __NR_sched_setaffinity 241 +__SYSCALL(__NR_sched_setaffinity, compat_sys_sched_setaffinity) +#define __NR_sched_getaffinity 242 +__SYSCALL(__NR_sched_getaffinity, compat_sys_sched_getaffinity) +#define __NR_io_setup 243 +__SYSCALL(__NR_io_setup, compat_sys_io_setup) +#define __NR_io_destroy 244 +__SYSCALL(__NR_io_destroy, sys_io_destroy) +#define __NR_io_getevents 245 +__SYSCALL(__NR_io_getevents, compat_sys_io_getevents) +#define __NR_io_submit 246 +__SYSCALL(__NR_io_submit, compat_sys_io_submit) +#define __NR_io_cancel 247 +__SYSCALL(__NR_io_cancel, sys_io_cancel) +#define __NR_exit_group 248 +__SYSCALL(__NR_exit_group, sys_exit_group) +#define __NR_lookup_dcookie 249 +__SYSCALL(__NR_lookup_dcookie, compat_sys_lookup_dcookie) +#define __NR_epoll_create 250 +__SYSCALL(__NR_epoll_create, sys_epoll_create) +#define __NR_epoll_ctl 251 +__SYSCALL(__NR_epoll_ctl, sys_epoll_ctl) +#define __NR_epoll_wait 252 +__SYSCALL(__NR_epoll_wait, sys_epoll_wait) +#define __NR_remap_file_pages 253 +__SYSCALL(__NR_remap_file_pages, sys_remap_file_pages) + /* 254 for set_thread_area */ +__SYSCALL(254, sys_ni_syscall) + /* 255 for get_thread_area */ +__SYSCALL(255, sys_ni_syscall) +#define __NR_set_tid_address 256 +__SYSCALL(__NR_set_tid_address, sys_set_tid_address) +#define __NR_timer_create 257 +__SYSCALL(__NR_timer_create, compat_sys_timer_create) +#define __NR_timer_settime 258 +__SYSCALL(__NR_timer_settime, compat_sys_timer_settime) +#define __NR_timer_gettime 259 +__SYSCALL(__NR_timer_gettime, compat_sys_timer_gettime) +#define __NR_timer_getoverrun 260 +__SYSCALL(__NR_timer_getoverrun, sys_timer_getoverrun) +#define __NR_timer_delete 261 +__SYSCALL(__NR_timer_delete, sys_timer_delete) +#define __NR_clock_settime 262 +__SYSCALL(__NR_clock_settime, compat_sys_clock_settime) +#define __NR_clock_gettime 263 +__SYSCALL(__NR_clock_gettime, compat_sys_clock_gettime) +#define __NR_clock_getres 264 +__SYSCALL(__NR_clock_getres, compat_sys_clock_getres) +#define __NR_clock_nanosleep 265 +__SYSCALL(__NR_clock_nanosleep, compat_sys_clock_nanosleep) +#define __NR_statfs64 266 +__SYSCALL(__NR_statfs64, compat_sys_statfs64_wrapper) +#define __NR_fstatfs64 267 +__SYSCALL(__NR_fstatfs64, compat_sys_fstatfs64_wrapper) +#define __NR_tgkill 268 +__SYSCALL(__NR_tgkill, sys_tgkill) +#define __NR_utimes 269 +__SYSCALL(__NR_utimes, compat_sys_utimes) +#define __NR_arm_fadvise64_64 270 +__SYSCALL(__NR_arm_fadvise64_64, compat_sys_fadvise64_64_wrapper) +#define __NR_pciconfig_iobase 271 +__SYSCALL(__NR_pciconfig_iobase, sys_pciconfig_iobase) +#define __NR_pciconfig_read 272 +__SYSCALL(__NR_pciconfig_read, sys_pciconfig_read) +#define __NR_pciconfig_write 273 +__SYSCALL(__NR_pciconfig_write, sys_pciconfig_write) +#define __NR_mq_open 274 +__SYSCALL(__NR_mq_open, compat_sys_mq_open) +#define __NR_mq_unlink 275 +__SYSCALL(__NR_mq_unlink, sys_mq_unlink) +#define __NR_mq_timedsend 276 +__SYSCALL(__NR_mq_timedsend, compat_sys_mq_timedsend) +#define __NR_mq_timedreceive 277 +__SYSCALL(__NR_mq_timedreceive, compat_sys_mq_timedreceive) +#define __NR_mq_notify 278 +__SYSCALL(__NR_mq_notify, compat_sys_mq_notify) +#define __NR_mq_getsetattr 279 +__SYSCALL(__NR_mq_getsetattr, compat_sys_mq_getsetattr) +#define __NR_waitid 280 +__SYSCALL(__NR_waitid, compat_sys_waitid) +#define __NR_socket 281 +__SYSCALL(__NR_socket, sys_socket) +#define __NR_bind 282 +__SYSCALL(__NR_bind, sys_bind) +#define __NR_connect 283 +__SYSCALL(__NR_connect, sys_connect) +#define __NR_listen 284 +__SYSCALL(__NR_listen, sys_listen) +#define __NR_accept 285 +__SYSCALL(__NR_accept, sys_accept) +#define __NR_getsockname 286 +__SYSCALL(__NR_getsockname, sys_getsockname) +#define __NR_getpeername 287 +__SYSCALL(__NR_getpeername, sys_getpeername) +#define __NR_socketpair 288 +__SYSCALL(__NR_socketpair, sys_socketpair) +#define __NR_send 289 +__SYSCALL(__NR_send, sys_send) +#define __NR_sendto 290 +__SYSCALL(__NR_sendto, sys_sendto) +#define __NR_recv 291 +__SYSCALL(__NR_recv, compat_sys_recv) +#define __NR_recvfrom 292 +__SYSCALL(__NR_recvfrom, compat_sys_recvfrom) +#define __NR_shutdown 293 +__SYSCALL(__NR_shutdown, sys_shutdown) +#define __NR_setsockopt 294 +__SYSCALL(__NR_setsockopt, compat_sys_setsockopt) +#define __NR_getsockopt 295 +__SYSCALL(__NR_getsockopt, compat_sys_getsockopt) +#define __NR_sendmsg 296 +__SYSCALL(__NR_sendmsg, compat_sys_sendmsg) +#define __NR_recvmsg 297 +__SYSCALL(__NR_recvmsg, compat_sys_recvmsg) +#define __NR_semop 298 +__SYSCALL(__NR_semop, sys_semop) +#define __NR_semget 299 +__SYSCALL(__NR_semget, sys_semget) +#define __NR_semctl 300 +__SYSCALL(__NR_semctl, compat_sys_semctl) +#define __NR_msgsnd 301 +__SYSCALL(__NR_msgsnd, compat_sys_msgsnd) +#define __NR_msgrcv 302 +__SYSCALL(__NR_msgrcv, compat_sys_msgrcv) +#define __NR_msgget 303 +__SYSCALL(__NR_msgget, sys_msgget) +#define __NR_msgctl 304 +__SYSCALL(__NR_msgctl, compat_sys_msgctl) +#define __NR_shmat 305 +__SYSCALL(__NR_shmat, compat_sys_shmat) +#define __NR_shmdt 306 +__SYSCALL(__NR_shmdt, sys_shmdt) +#define __NR_shmget 307 +__SYSCALL(__NR_shmget, sys_shmget) +#define __NR_shmctl 308 +__SYSCALL(__NR_shmctl, compat_sys_shmctl) +#define __NR_add_key 309 +__SYSCALL(__NR_add_key, sys_add_key) +#define __NR_request_key 310 +__SYSCALL(__NR_request_key, sys_request_key) +#define __NR_keyctl 311 +__SYSCALL(__NR_keyctl, compat_sys_keyctl) +#define __NR_semtimedop 312 +__SYSCALL(__NR_semtimedop, compat_sys_semtimedop) +#define __NR_vserver 313 +__SYSCALL(__NR_vserver, sys_ni_syscall) +#define __NR_ioprio_set 314 +__SYSCALL(__NR_ioprio_set, sys_ioprio_set) +#define __NR_ioprio_get 315 +__SYSCALL(__NR_ioprio_get, sys_ioprio_get) +#define __NR_inotify_init 316 +__SYSCALL(__NR_inotify_init, sys_inotify_init) +#define __NR_inotify_add_watch 317 +__SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch) +#define __NR_inotify_rm_watch 318 +__SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch) +#define __NR_mbind 319 +__SYSCALL(__NR_mbind, compat_sys_mbind) +#define __NR_get_mempolicy 320 +__SYSCALL(__NR_get_mempolicy, compat_sys_get_mempolicy) +#define __NR_set_mempolicy 321 +__SYSCALL(__NR_set_mempolicy, compat_sys_set_mempolicy) +#define __NR_openat 322 +__SYSCALL(__NR_openat, compat_sys_openat) +#define __NR_mkdirat 323 +__SYSCALL(__NR_mkdirat, sys_mkdirat) +#define __NR_mknodat 324 +__SYSCALL(__NR_mknodat, sys_mknodat) +#define __NR_fchownat 325 +__SYSCALL(__NR_fchownat, sys_fchownat) +#define __NR_futimesat 326 +__SYSCALL(__NR_futimesat, compat_sys_futimesat) +#define __NR_fstatat64 327 +__SYSCALL(__NR_fstatat64, sys_fstatat64) +#define __NR_unlinkat 328 +__SYSCALL(__NR_unlinkat, sys_unlinkat) +#define __NR_renameat 329 +__SYSCALL(__NR_renameat, sys_renameat) +#define __NR_linkat 330 +__SYSCALL(__NR_linkat, sys_linkat) +#define __NR_symlinkat 331 +__SYSCALL(__NR_symlinkat, sys_symlinkat) +#define __NR_readlinkat 332 +__SYSCALL(__NR_readlinkat, sys_readlinkat) +#define __NR_fchmodat 333 +__SYSCALL(__NR_fchmodat, sys_fchmodat) +#define __NR_faccessat 334 +__SYSCALL(__NR_faccessat, sys_faccessat) +#define __NR_pselect6 335 +__SYSCALL(__NR_pselect6, compat_sys_pselect6) +#define __NR_ppoll 336 +__SYSCALL(__NR_ppoll, compat_sys_ppoll) +#define __NR_unshare 337 +__SYSCALL(__NR_unshare, sys_unshare) +#define __NR_set_robust_list 338 +__SYSCALL(__NR_set_robust_list, compat_sys_set_robust_list) +#define __NR_get_robust_list 339 +__SYSCALL(__NR_get_robust_list, compat_sys_get_robust_list) +#define __NR_splice 340 +__SYSCALL(__NR_splice, sys_splice) +#define __NR_sync_file_range2 341 +__SYSCALL(__NR_sync_file_range2, compat_sys_sync_file_range2_wrapper) +#define __NR_tee 342 +__SYSCALL(__NR_tee, sys_tee) +#define __NR_vmsplice 343 +__SYSCALL(__NR_vmsplice, compat_sys_vmsplice) +#define __NR_move_pages 344 +__SYSCALL(__NR_move_pages, compat_sys_move_pages) +#define __NR_getcpu 345 +__SYSCALL(__NR_getcpu, sys_getcpu) +#define __NR_epoll_pwait 346 +__SYSCALL(__NR_epoll_pwait, compat_sys_epoll_pwait) +#define __NR_kexec_load 347 +__SYSCALL(__NR_kexec_load, compat_sys_kexec_load) +#define __NR_utimensat 348 +__SYSCALL(__NR_utimensat, compat_sys_utimensat) +#define __NR_signalfd 349 +__SYSCALL(__NR_signalfd, compat_sys_signalfd) +#define __NR_timerfd_create 350 +__SYSCALL(__NR_timerfd_create, sys_timerfd_create) +#define __NR_eventfd 351 +__SYSCALL(__NR_eventfd, sys_eventfd) +#define __NR_fallocate 352 +__SYSCALL(__NR_fallocate, compat_sys_fallocate_wrapper) +#define __NR_timerfd_settime 353 +__SYSCALL(__NR_timerfd_settime, compat_sys_timerfd_settime) +#define __NR_timerfd_gettime 354 +__SYSCALL(__NR_timerfd_gettime, compat_sys_timerfd_gettime) +#define __NR_signalfd4 355 +__SYSCALL(__NR_signalfd4, compat_sys_signalfd4) +#define __NR_eventfd2 356 +__SYSCALL(__NR_eventfd2, sys_eventfd2) +#define __NR_epoll_create1 357 +__SYSCALL(__NR_epoll_create1, sys_epoll_create1) +#define __NR_dup3 358 +__SYSCALL(__NR_dup3, sys_dup3) +#define __NR_pipe2 359 +__SYSCALL(__NR_pipe2, sys_pipe2) +#define __NR_inotify_init1 360 +__SYSCALL(__NR_inotify_init1, sys_inotify_init1) +#define __NR_preadv 361 +__SYSCALL(__NR_preadv, compat_sys_preadv) +#define __NR_pwritev 362 +__SYSCALL(__NR_pwritev, compat_sys_pwritev) +#define __NR_rt_tgsigqueueinfo 363 +__SYSCALL(__NR_rt_tgsigqueueinfo, compat_sys_rt_tgsigqueueinfo) +#define __NR_perf_event_open 364 +__SYSCALL(__NR_perf_event_open, sys_perf_event_open) +#define __NR_recvmmsg 365 +__SYSCALL(__NR_recvmmsg, compat_sys_recvmmsg) +#define __NR_accept4 366 +__SYSCALL(__NR_accept4, sys_accept4) +#define __NR_fanotify_init 367 +__SYSCALL(__NR_fanotify_init, sys_fanotify_init) +#define __NR_fanotify_mark 368 +__SYSCALL(__NR_fanotify_mark, compat_sys_fanotify_mark) +#define __NR_prlimit64 369 +__SYSCALL(__NR_prlimit64, sys_prlimit64) +#define __NR_name_to_handle_at 370 +__SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at) +#define __NR_open_by_handle_at 371 +__SYSCALL(__NR_open_by_handle_at, compat_sys_open_by_handle_at) +#define __NR_clock_adjtime 372 +__SYSCALL(__NR_clock_adjtime, compat_sys_clock_adjtime) +#define __NR_syncfs 373 +__SYSCALL(__NR_syncfs, sys_syncfs) +#define __NR_sendmmsg 374 +__SYSCALL(__NR_sendmmsg, compat_sys_sendmmsg) +#define __NR_setns 375 +__SYSCALL(__NR_setns, sys_setns) +#define __NR_process_vm_readv 376 +__SYSCALL(__NR_process_vm_readv, compat_sys_process_vm_readv) +#define __NR_process_vm_writev 377 +__SYSCALL(__NR_process_vm_writev, compat_sys_process_vm_writev) +#define __NR_kcmp 378 +__SYSCALL(__NR_kcmp, sys_kcmp) +#define __NR_finit_module 379 +__SYSCALL(__NR_finit_module, sys_finit_module) +#define __NR_sched_setattr 380 +__SYSCALL(__NR_sched_setattr, sys_sched_setattr) +#define __NR_sched_getattr 381 +__SYSCALL(__NR_sched_getattr, sys_sched_getattr) +#define __NR_renameat2 382 +__SYSCALL(__NR_renameat2, sys_renameat2) diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index cdaedad3afe5..df7ef8768fc2 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -4,8 +4,7 @@ CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET) AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) -CFLAGS_efi-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET) \ - -I$(src)/../../../scripts/dtc/libfdt +CFLAGS_efi-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET) CFLAGS_REMOVE_ftrace.o = -pg CFLAGS_REMOVE_insn.o = -pg @@ -15,7 +14,8 @@ CFLAGS_REMOVE_return_address.o = -pg arm64-obj-y := cputable.o debug-monitors.o entry.o irq.o fpsimd.o \ entry-fpsimd.o process.o ptrace.o setup.o signal.o \ sys.o stacktrace.o time.o traps.o io.o vdso.o \ - hyp-stub.o psci.o cpu_ops.o insn.o return_address.o + hyp-stub.o psci.o cpu_ops.o insn.o return_address.o \ + cpuinfo.o arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ sys_compat.o diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c index d62d12fb36c8..cce952440c64 100644 --- a/arch/arm64/kernel/cpu_ops.c +++ b/arch/arm64/kernel/cpu_ops.c @@ -30,8 +30,8 @@ const struct cpu_operations *cpu_ops[NR_CPUS]; static const struct cpu_operations *supported_cpu_ops[] __initconst = { #ifdef CONFIG_SMP &smp_spin_table_ops, - &cpu_psci_ops, #endif + &cpu_psci_ops, NULL, }; diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c new file mode 100644 index 000000000000..f798f66634af --- /dev/null +++ b/arch/arm64/kernel/cpuinfo.c @@ -0,0 +1,192 @@ +/* + * Record and handle CPU attributes. + * + * Copyright (C) 2014 ARM Ltd. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <asm/arch_timer.h> +#include <asm/cachetype.h> +#include <asm/cpu.h> +#include <asm/cputype.h> + +#include <linux/bitops.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/printk.h> +#include <linux/smp.h> + +/* + * In case the boot CPU is hotpluggable, we record its initial state and + * current state separately. Certain system registers may contain different + * values depending on configuration at or after reset. + */ +DEFINE_PER_CPU(struct cpuinfo_arm64, cpu_data); +static struct cpuinfo_arm64 boot_cpu_data; + +static char *icache_policy_str[] = { + [ICACHE_POLICY_RESERVED] = "RESERVED/UNKNOWN", + [ICACHE_POLICY_AIVIVT] = "AIVIVT", + [ICACHE_POLICY_VIPT] = "VIPT", + [ICACHE_POLICY_PIPT] = "PIPT", +}; + +unsigned long __icache_flags; + +static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info) +{ + unsigned int cpu = smp_processor_id(); + u32 l1ip = CTR_L1IP(info->reg_ctr); + + if (l1ip != ICACHE_POLICY_PIPT) + set_bit(ICACHEF_ALIASING, &__icache_flags); + if (l1ip == ICACHE_POLICY_AIVIVT); + set_bit(ICACHEF_AIVIVT, &__icache_flags); + + pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu); +} + +static int check_reg_mask(char *name, u64 mask, u64 boot, u64 cur, int cpu) +{ + if ((boot & mask) == (cur & mask)) + return 0; + + pr_warn("SANITY CHECK: Unexpected variation in %s. Boot CPU: %#016lx, CPU%d: %#016lx\n", + name, (unsigned long)boot, cpu, (unsigned long)cur); + + return 1; +} + +#define CHECK_MASK(field, mask, boot, cur, cpu) \ + check_reg_mask(#field, mask, (boot)->reg_ ## field, (cur)->reg_ ## field, cpu) + +#define CHECK(field, boot, cur, cpu) \ + CHECK_MASK(field, ~0ULL, boot, cur, cpu) + +/* + * Verify that CPUs don't have unexpected differences that will cause problems. + */ +static void cpuinfo_sanity_check(struct cpuinfo_arm64 *cur) +{ + unsigned int cpu = smp_processor_id(); + struct cpuinfo_arm64 *boot = &boot_cpu_data; + unsigned int diff = 0; + + /* + * The kernel can handle differing I-cache policies, but otherwise + * caches should look identical. Userspace JITs will make use of + * *minLine. + */ + diff |= CHECK_MASK(ctr, 0xffff3fff, boot, cur, cpu); + + /* + * Userspace may perform DC ZVA instructions. Mismatched block sizes + * could result in too much or too little memory being zeroed if a + * process is preempted and migrated between CPUs. + */ + diff |= CHECK(dczid, boot, cur, cpu); + + /* If different, timekeeping will be broken (especially with KVM) */ + diff |= CHECK(cntfrq, boot, cur, cpu); + + /* + * Even in big.LITTLE, processors should be identical instruction-set + * wise. + */ + diff |= CHECK(id_aa64isar0, boot, cur, cpu); + diff |= CHECK(id_aa64isar1, boot, cur, cpu); + + /* + * Differing PARange support is fine as long as all peripherals and + * memory are mapped within the minimum PARange of all CPUs. + * Linux should not care about secure memory. + * ID_AA64MMFR1 is currently RES0. + */ + diff |= CHECK_MASK(id_aa64mmfr0, 0xffffffffffff0ff0, boot, cur, cpu); + diff |= CHECK(id_aa64mmfr1, boot, cur, cpu); + + /* + * EL3 is not our concern. + * ID_AA64PFR1 is currently RES0. + */ + diff |= CHECK_MASK(id_aa64pfr0, 0xffffffffffff0fff, boot, cur, cpu); + diff |= CHECK(id_aa64pfr1, boot, cur, cpu); + + /* + * If we have AArch32, we care about 32-bit features for compat. These + * registers should be RES0 otherwise. + */ + diff |= CHECK(id_isar0, boot, cur, cpu); + diff |= CHECK(id_isar1, boot, cur, cpu); + diff |= CHECK(id_isar2, boot, cur, cpu); + diff |= CHECK(id_isar3, boot, cur, cpu); + diff |= CHECK(id_isar4, boot, cur, cpu); + diff |= CHECK(id_isar5, boot, cur, cpu); + diff |= CHECK(id_mmfr0, boot, cur, cpu); + diff |= CHECK(id_mmfr1, boot, cur, cpu); + diff |= CHECK(id_mmfr2, boot, cur, cpu); + diff |= CHECK(id_mmfr3, boot, cur, cpu); + diff |= CHECK(id_pfr0, boot, cur, cpu); + diff |= CHECK(id_pfr1, boot, cur, cpu); + + /* + * Mismatched CPU features are a recipe for disaster. Don't even + * pretend to support them. + */ + WARN_TAINT_ONCE(diff, TAINT_CPU_OUT_OF_SPEC, + "Unsupported CPU feature variation."); +} + +static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) +{ + info->reg_cntfrq = arch_timer_get_cntfrq(); + info->reg_ctr = read_cpuid_cachetype(); + info->reg_dczid = read_cpuid(DCZID_EL0); + info->reg_midr = read_cpuid_id(); + + info->reg_id_aa64isar0 = read_cpuid(ID_AA64ISAR0_EL1); + info->reg_id_aa64isar1 = read_cpuid(ID_AA64ISAR1_EL1); + info->reg_id_aa64mmfr0 = read_cpuid(ID_AA64MMFR0_EL1); + info->reg_id_aa64mmfr1 = read_cpuid(ID_AA64MMFR1_EL1); + info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1); + info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1); + + info->reg_id_isar0 = read_cpuid(ID_ISAR0_EL1); + info->reg_id_isar1 = read_cpuid(ID_ISAR1_EL1); + info->reg_id_isar2 = read_cpuid(ID_ISAR2_EL1); + info->reg_id_isar3 = read_cpuid(ID_ISAR3_EL1); + info->reg_id_isar4 = read_cpuid(ID_ISAR4_EL1); + info->reg_id_isar5 = read_cpuid(ID_ISAR5_EL1); + info->reg_id_mmfr0 = read_cpuid(ID_MMFR0_EL1); + info->reg_id_mmfr1 = read_cpuid(ID_MMFR1_EL1); + info->reg_id_mmfr2 = read_cpuid(ID_MMFR2_EL1); + info->reg_id_mmfr3 = read_cpuid(ID_MMFR3_EL1); + info->reg_id_pfr0 = read_cpuid(ID_PFR0_EL1); + info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1); + + cpuinfo_detect_icache_policy(info); +} + +void cpuinfo_store_cpu(void) +{ + struct cpuinfo_arm64 *info = this_cpu_ptr(&cpu_data); + __cpuinfo_store_cpu(info); + cpuinfo_sanity_check(info); +} + +void __init cpuinfo_store_boot_cpu(void) +{ + struct cpuinfo_arm64 *info = &per_cpu(cpu_data, 0); + __cpuinfo_store_cpu(info); + + boot_cpu_data = *info; +} diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index a7fb874b595e..fe5b94078d82 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -315,20 +315,20 @@ static int brk_handler(unsigned long addr, unsigned int esr, { siginfo_t info; - if (call_break_hook(regs, esr) == DBG_HOOK_HANDLED) - return 0; + if (user_mode(regs)) { + info = (siginfo_t) { + .si_signo = SIGTRAP, + .si_errno = 0, + .si_code = TRAP_BRKPT, + .si_addr = (void __user *)instruction_pointer(regs), + }; - if (!user_mode(regs)) + force_sig_info(SIGTRAP, &info, current); + } else if (call_break_hook(regs, esr) != DBG_HOOK_HANDLED) { + pr_warning("Unexpected kernel BRK exception at EL1\n"); return -EFAULT; + } - info = (siginfo_t) { - .si_signo = SIGTRAP, - .si_errno = 0, - .si_code = TRAP_BRKPT, - .si_addr = (void __user *)instruction_pointer(regs), - }; - - force_sig_info(SIGTRAP, &info, current); return 0; } diff --git a/arch/arm64/kernel/efi-stub.c b/arch/arm64/kernel/efi-stub.c index 60e98a639ac5..1317fef8dde9 100644 --- a/arch/arm64/kernel/efi-stub.c +++ b/arch/arm64/kernel/efi-stub.c @@ -10,48 +10,16 @@ * */ #include <linux/efi.h> -#include <linux/libfdt.h> +#include <asm/efi.h> #include <asm/sections.h> -#include <generated/compile.h> -#include <generated/utsrelease.h> -/* - * AArch64 requires the DTB to be 8-byte aligned in the first 512MiB from - * start of kernel and may not cross a 2MiB boundary. We set alignment to - * 2MiB so we know it won't cross a 2MiB boundary. - */ -#define EFI_FDT_ALIGN SZ_2M /* used by allocate_new_fdt_and_exit_boot() */ -#define MAX_FDT_OFFSET SZ_512M - -#define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__) - -static void efi_char16_printk(efi_system_table_t *sys_table_arg, - efi_char16_t *str); - -static efi_status_t efi_open_volume(efi_system_table_t *sys_table, - void *__image, void **__fh); -static efi_status_t efi_file_close(void *handle); - -static efi_status_t -efi_file_read(void *handle, unsigned long *size, void *addr); - -static efi_status_t -efi_file_size(efi_system_table_t *sys_table, void *__fh, - efi_char16_t *filename_16, void **handle, u64 *file_sz); - -/* Include shared EFI stub code */ -#include "../../../drivers/firmware/efi/efi-stub-helper.c" -#include "../../../drivers/firmware/efi/fdt.c" -#include "../../../drivers/firmware/efi/arm-stub.c" - - -static efi_status_t handle_kernel_image(efi_system_table_t *sys_table, - unsigned long *image_addr, - unsigned long *image_size, - unsigned long *reserve_addr, - unsigned long *reserve_size, - unsigned long dram_base, - efi_loaded_image_t *image) +efi_status_t handle_kernel_image(efi_system_table_t *sys_table, + unsigned long *image_addr, + unsigned long *image_size, + unsigned long *reserve_addr, + unsigned long *reserve_size, + unsigned long dram_base, + efi_loaded_image_t *image) { efi_status_t status; unsigned long kernel_size, kernel_memsize = 0; @@ -71,7 +39,7 @@ static efi_status_t handle_kernel_image(efi_system_table_t *sys_table, if (*image_addr != (dram_base + TEXT_OFFSET)) { pr_efi_err(sys_table, "Failed to alloc kernel memory\n"); efi_free(sys_table, kernel_memsize, *image_addr); - return EFI_ERROR; + return EFI_LOAD_ERROR; } *image_size = kernel_memsize; } diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 14db1f6e8d7f..e72f3100958f 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -414,13 +414,24 @@ static int __init arm64_enter_virtual_mode(void) for_each_efi_memory_desc(&memmap, md) { if (!(md->attribute & EFI_MEMORY_RUNTIME)) continue; - if (remap_region(md, &virt_md)) - ++count; + if (!remap_region(md, &virt_md)) + goto err_unmap; + ++count; } efi.systab = (__force void *)efi_lookup_mapped_addr(efi_system_table); - if (efi.systab) - set_bit(EFI_SYSTEM_TABLES, &efi.flags); + if (!efi.systab) { + /* + * If we have no virtual mapping for the System Table at this + * point, the memory map doesn't cover the physical offset where + * it resides. This means the System Table will be inaccessible + * to Runtime Services themselves once the virtual mapping is + * installed. + */ + pr_err("Failed to remap EFI System Table -- buggy firmware?\n"); + goto err_unmap; + } + set_bit(EFI_SYSTEM_TABLES, &efi.flags); local_irq_save(flags); cpu_switch_mm(idmap_pg_dir, &init_mm); @@ -449,21 +460,18 @@ static int __init arm64_enter_virtual_mode(void) /* Set up runtime services function pointers */ runtime = efi.systab->runtime; - efi.get_time = runtime->get_time; - efi.set_time = runtime->set_time; - efi.get_wakeup_time = runtime->get_wakeup_time; - efi.set_wakeup_time = runtime->set_wakeup_time; - efi.get_variable = runtime->get_variable; - efi.get_next_variable = runtime->get_next_variable; - efi.set_variable = runtime->set_variable; - efi.query_variable_info = runtime->query_variable_info; - efi.update_capsule = runtime->update_capsule; - efi.query_capsule_caps = runtime->query_capsule_caps; - efi.get_next_high_mono_count = runtime->get_next_high_mono_count; - efi.reset_system = runtime->reset_system; - + efi_native_runtime_setup(); set_bit(EFI_RUNTIME_SERVICES, &efi.flags); return 0; + +err_unmap: + /* unmap all mappings that succeeded: there are 'count' of those */ + for (virt_md = virtmap; count--; virt_md += memmap.desc_size) { + md = virt_md; + iounmap((__force void __iomem *)md->virt_addr); + } + kfree(virtmap); + return -1; } early_initcall(arm64_enter_virtual_mode); diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index d358ccacfc00..c44a82f146b1 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -52,7 +52,7 @@ ENDPROC(fpsimd_load_state) ENTRY(fpsimd_save_partial_state) fpsimd_save_partial x0, 1, 8, 9 ret -ENDPROC(fpsimd_load_partial_state) +ENDPROC(fpsimd_save_partial_state) /* * Load the bottom n FP registers. diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S index aa5f9fcbf9ee..38e704e597f7 100644 --- a/arch/arm64/kernel/entry-ftrace.S +++ b/arch/arm64/kernel/entry-ftrace.S @@ -96,11 +96,6 @@ * - ftrace_graph_caller to set up an exit hook */ ENTRY(_mcount) -#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST - ldr x0, =ftrace_trace_stop - ldr x0, [x0] // if ftrace_trace_stop - ret // return; -#endif mcount_enter ldr x0, =ftrace_trace_function diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 9ce04ba6bcb0..f0b5e5120a87 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -27,7 +27,32 @@ #include <asm/esr.h> #include <asm/thread_info.h> #include <asm/unistd.h> -#include <asm/unistd32.h> + +/* + * Context tracking subsystem. Used to instrument transitions + * between user and kernel mode. + */ + .macro ct_user_exit, syscall = 0 +#ifdef CONFIG_CONTEXT_TRACKING + bl context_tracking_user_exit + .if \syscall == 1 + /* + * Save/restore needed during syscalls. Restore syscall arguments from + * the values already saved on stack during kernel_entry. + */ + ldp x0, x1, [sp] + ldp x2, x3, [sp, #S_X2] + ldp x4, x5, [sp, #S_X4] + ldp x6, x7, [sp, #S_X6] + .endif +#endif + .endm + + .macro ct_user_enter +#ifdef CONFIG_CONTEXT_TRACKING + bl context_tracking_user_enter +#endif + .endm /* * Bad Abort numbers @@ -91,6 +116,7 @@ .macro kernel_exit, el, ret = 0 ldp x21, x22, [sp, #S_PC] // load ELR, SPSR .if \el == 0 + ct_user_enter ldr x23, [sp, #S_SP] // load return stack pointer .endif .if \ret @@ -353,7 +379,6 @@ el0_sync: lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class cmp x24, #ESR_EL1_EC_SVC64 // SVC in 64-bit state b.eq el0_svc - adr lr, ret_to_user cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0 b.eq el0_da cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0 @@ -382,7 +407,6 @@ el0_sync_compat: lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class cmp x24, #ESR_EL1_EC_SVC32 // SVC in 32-bit state b.eq el0_svc_compat - adr lr, ret_to_user cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0 b.eq el0_da cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0 @@ -425,48 +449,59 @@ el0_da: /* * Data abort handling */ - mrs x0, far_el1 - bic x0, x0, #(0xff << 56) + mrs x26, far_el1 // enable interrupts before calling the main handler enable_dbg_and_irq + ct_user_exit + bic x0, x26, #(0xff << 56) mov x1, x25 mov x2, sp + adr lr, ret_to_user b do_mem_abort el0_ia: /* * Instruction abort handling */ - mrs x0, far_el1 + mrs x26, far_el1 // enable interrupts before calling the main handler enable_dbg_and_irq + ct_user_exit + mov x0, x26 orr x1, x25, #1 << 24 // use reserved ISS bit for instruction aborts mov x2, sp + adr lr, ret_to_user b do_mem_abort el0_fpsimd_acc: /* * Floating Point or Advanced SIMD access */ enable_dbg + ct_user_exit mov x0, x25 mov x1, sp + adr lr, ret_to_user b do_fpsimd_acc el0_fpsimd_exc: /* * Floating Point or Advanced SIMD exception */ enable_dbg + ct_user_exit mov x0, x25 mov x1, sp + adr lr, ret_to_user b do_fpsimd_exc el0_sp_pc: /* * Stack or PC alignment exception handling */ - mrs x0, far_el1 + mrs x26, far_el1 // enable interrupts before calling the main handler enable_dbg_and_irq + mov x0, x26 mov x1, x25 mov x2, sp + adr lr, ret_to_user b do_sp_pc_abort el0_undef: /* @@ -474,7 +509,9 @@ el0_undef: */ // enable interrupts before calling the main handler enable_dbg_and_irq + ct_user_exit mov x0, sp + adr lr, ret_to_user b do_undefinstr el0_dbg: /* @@ -486,12 +523,15 @@ el0_dbg: mov x2, sp bl do_debug_exception enable_dbg + ct_user_exit b ret_to_user el0_inv: enable_dbg + ct_user_exit mov x0, sp mov x1, #BAD_SYNC mrs x2, esr_el1 + adr lr, ret_to_user b bad_mode ENDPROC(el0_sync) @@ -504,6 +544,7 @@ el0_irq_naked: bl trace_hardirqs_off #endif + ct_user_exit irq_handler #ifdef CONFIG_TRACE_IRQFLAGS @@ -608,6 +649,7 @@ el0_svc: el0_svc_naked: // compat entry point stp x0, scno, [sp, #S_ORIG_X0] // save the original x0 and syscall number enable_dbg_and_irq + ct_user_exit 1 ldr x16, [tsk, #TI_FLAGS] // check for syscall hooks tst x16, #_TIF_SYSCALL_WORK diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index a2c1195abb7f..144f10567f82 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -22,6 +22,7 @@ #include <linux/linkage.h> #include <linux/init.h> +#include <linux/irqchip/arm-gic-v3.h> #include <asm/assembler.h> #include <asm/ptrace.h> @@ -35,37 +36,31 @@ #include <asm/page.h> #include <asm/virt.h> -/* - * swapper_pg_dir is the virtual address of the initial page table. We place - * the page tables 3 * PAGE_SIZE below KERNEL_RAM_VADDR. The idmap_pg_dir has - * 2 pages and is placed below swapper_pg_dir. - */ #define KERNEL_RAM_VADDR (PAGE_OFFSET + TEXT_OFFSET) -#if (KERNEL_RAM_VADDR & 0xfffff) != 0x80000 -#error KERNEL_RAM_VADDR must start at 0xXXX80000 +#if (TEXT_OFFSET & 0xf) != 0 +#error TEXT_OFFSET must be at least 16B aligned +#elif (PAGE_OFFSET & 0xfffff) != 0 +#error PAGE_OFFSET must be at least 2MB aligned +#elif TEXT_OFFSET > 0xfffff +#error TEXT_OFFSET must be less than 2MB #endif -#define SWAPPER_DIR_SIZE (3 * PAGE_SIZE) -#define IDMAP_DIR_SIZE (2 * PAGE_SIZE) - - .globl swapper_pg_dir - .equ swapper_pg_dir, KERNEL_RAM_VADDR - SWAPPER_DIR_SIZE - - .globl idmap_pg_dir - .equ idmap_pg_dir, swapper_pg_dir - IDMAP_DIR_SIZE - - .macro pgtbl, ttb0, ttb1, phys - add \ttb1, \phys, #TEXT_OFFSET - SWAPPER_DIR_SIZE - sub \ttb0, \ttb1, #IDMAP_DIR_SIZE + .macro pgtbl, ttb0, ttb1, virt_to_phys + ldr \ttb1, =swapper_pg_dir + ldr \ttb0, =idmap_pg_dir + add \ttb1, \ttb1, \virt_to_phys + add \ttb0, \ttb0, \virt_to_phys .endm #ifdef CONFIG_ARM64_64K_PAGES #define BLOCK_SHIFT PAGE_SHIFT #define BLOCK_SIZE PAGE_SIZE +#define TABLE_SHIFT PMD_SHIFT #else #define BLOCK_SHIFT SECTION_SHIFT #define BLOCK_SIZE SECTION_SIZE +#define TABLE_SHIFT PUD_SHIFT #endif #define KERNEL_START KERNEL_RAM_VADDR @@ -120,9 +115,9 @@ efi_head: b stext // branch to kernel start, magic .long 0 // reserved #endif - .quad TEXT_OFFSET // Image load offset from start of RAM - .quad 0 // reserved - .quad 0 // reserved + .quad _kernel_offset_le // Image load offset from start of RAM, little-endian + .quad _kernel_size_le // Effective size of kernel image, little-endian + .quad _kernel_flags_le // Informative flags, little-endian .quad 0 // reserved .quad 0 // reserved .quad 0 // reserved @@ -295,6 +290,23 @@ CPU_LE( bic x0, x0, #(3 << 24) ) // Clear the EE and E0E bits for EL1 msr cnthctl_el2, x0 msr cntvoff_el2, xzr // Clear virtual offset +#ifdef CONFIG_ARM_GIC_V3 + /* GICv3 system register access */ + mrs x0, id_aa64pfr0_el1 + ubfx x0, x0, #24, #4 + cmp x0, #1 + b.ne 3f + + mrs_s x0, ICC_SRE_EL2 + orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1 + orr x0, x0, #ICC_SRE_EL2_ENABLE // Set ICC_SRE_EL2.Enable==1 + msr_s ICC_SRE_EL2, x0 + isb // Make sure SRE is now set + msr_s ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults + +3: +#endif + /* Populate ID registers. */ mrs x0, midr_el1 mrs x1, mpidr_el1 @@ -413,7 +425,7 @@ ENTRY(secondary_startup) mov x23, x0 // x23=current cpu_table cbz x23, __error_p // invalid processor (x23=0)? - pgtbl x25, x26, x24 // x25=TTBR0, x26=TTBR1 + pgtbl x25, x26, x28 // x25=TTBR0, x26=TTBR1 ldr x12, [x23, #CPU_INFO_SETUP] add x12, x12, x28 // __virt_to_phys blr x12 // initialise processor @@ -455,8 +467,13 @@ ENDPROC(__enable_mmu) * x27 = *virtual* address to jump to upon completion * * other registers depend on the function called upon completion + * + * We align the entire function to the smallest power of two larger than it to + * ensure it fits within a single block map entry. Otherwise were PHYS_OFFSET + * close to the end of a 512MB or 1GB block we might require an additional + * table to map the entire function. */ - .align 6 + .align 4 __turn_mmu_on: msr sctlr_el1, x0 isb @@ -479,17 +496,38 @@ ENDPROC(__calc_phys_offset) .quad PAGE_OFFSET /* - * Macro to populate the PGD for the corresponding block entry in the next - * level (tbl) for the given virtual address. + * Macro to create a table entry to the next page. + * + * tbl: page table address + * virt: virtual address + * shift: #imm page table shift + * ptrs: #imm pointers per table page + * + * Preserves: virt + * Corrupts: tmp1, tmp2 + * Returns: tbl -> next level table page address + */ + .macro create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2 + lsr \tmp1, \virt, #\shift + and \tmp1, \tmp1, #\ptrs - 1 // table index + add \tmp2, \tbl, #PAGE_SIZE + orr \tmp2, \tmp2, #PMD_TYPE_TABLE // address of next table and entry type + str \tmp2, [\tbl, \tmp1, lsl #3] + add \tbl, \tbl, #PAGE_SIZE // next level table page + .endm + +/* + * Macro to populate the PGD (and possibily PUD) for the corresponding + * block entry in the next level (tbl) for the given virtual address. * - * Preserves: pgd, tbl, virt + * Preserves: tbl, next, virt * Corrupts: tmp1, tmp2 */ - .macro create_pgd_entry, pgd, tbl, virt, tmp1, tmp2 - lsr \tmp1, \virt, #PGDIR_SHIFT - and \tmp1, \tmp1, #PTRS_PER_PGD - 1 // PGD index - orr \tmp2, \tbl, #3 // PGD entry table type - str \tmp2, [\pgd, \tmp1, lsl #3] + .macro create_pgd_entry, tbl, virt, tmp1, tmp2 + create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2 +#if SWAPPER_PGTABLE_LEVELS == 3 + create_table_entry \tbl, \virt, TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2 +#endif .endm /* @@ -522,7 +560,7 @@ ENDPROC(__calc_phys_offset) * - pgd entry for fixed mappings (TTBR1) */ __create_page_tables: - pgtbl x25, x26, x24 // idmap_pg_dir and swapper_pg_dir addresses + pgtbl x25, x26, x28 // idmap_pg_dir and swapper_pg_dir addresses mov x27, lr /* @@ -550,10 +588,10 @@ __create_page_tables: /* * Create the identity mapping. */ - add x0, x25, #PAGE_SIZE // section table address + mov x0, x25 // idmap_pg_dir ldr x3, =KERNEL_START add x3, x3, x28 // __pa(KERNEL_START) - create_pgd_entry x25, x0, x3, x5, x6 + create_pgd_entry x0, x3, x5, x6 ldr x6, =KERNEL_END mov x5, x3 // __pa(KERNEL_START) add x6, x6, x28 // __pa(KERNEL_END) @@ -562,9 +600,9 @@ __create_page_tables: /* * Map the kernel image (starting with PHYS_OFFSET). */ - add x0, x26, #PAGE_SIZE // section table address + mov x0, x26 // swapper_pg_dir mov x5, #PAGE_OFFSET - create_pgd_entry x26, x0, x5, x3, x6 + create_pgd_entry x0, x5, x3, x6 ldr x6, =KERNEL_END mov x3, x24 // phys offset create_block_map x0, x7, x3, x5, x6 @@ -586,13 +624,6 @@ __create_page_tables: create_block_map x0, x7, x3, x5, x6 1: /* - * Create the pgd entry for the fixed mappings. - */ - ldr x5, =FIXADDR_TOP // Fixed mapping virtual address - add x0, x26, #2 * PAGE_SIZE // section table address - create_pgd_entry x26, x0, x5, x6, x7 - - /* * Since the page tables have been populated with non-cacheable * accesses (MMU disabled), invalidate the idmap and swapper page * tables again to remove any speculatively loaded cache lines. @@ -611,7 +642,7 @@ ENDPROC(__create_page_tables) __switch_data: .quad __mmap_switched .quad __bss_start // x6 - .quad _end // x7 + .quad __bss_stop // x7 .quad processor_id // x4 .quad __fdt_pointer // x5 .quad memstart_addr // x6 diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index 0959611d9ff1..a272f335c289 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -19,6 +19,7 @@ #include <linux/init.h> #include <linux/linkage.h> +#include <linux/irqchip/arm-gic-v3.h> #include <asm/assembler.h> #include <asm/ptrace.h> diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h new file mode 100644 index 000000000000..8fae0756e175 --- /dev/null +++ b/arch/arm64/kernel/image.h @@ -0,0 +1,62 @@ +/* + * Linker script macros to generate Image header fields. + * + * Copyright (C) 2014 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_IMAGE_H +#define __ASM_IMAGE_H + +#ifndef LINKER_SCRIPT +#error This file should only be included in vmlinux.lds.S +#endif + +/* + * There aren't any ELF relocations we can use to endian-swap values known only + * at link time (e.g. the subtraction of two symbol addresses), so we must get + * the linker to endian-swap certain values before emitting them. + */ +#ifdef CONFIG_CPU_BIG_ENDIAN +#define DATA_LE64(data) \ + ((((data) & 0x00000000000000ff) << 56) | \ + (((data) & 0x000000000000ff00) << 40) | \ + (((data) & 0x0000000000ff0000) << 24) | \ + (((data) & 0x00000000ff000000) << 8) | \ + (((data) & 0x000000ff00000000) >> 8) | \ + (((data) & 0x0000ff0000000000) >> 24) | \ + (((data) & 0x00ff000000000000) >> 40) | \ + (((data) & 0xff00000000000000) >> 56)) +#else +#define DATA_LE64(data) ((data) & 0xffffffffffffffff) +#endif + +#ifdef CONFIG_CPU_BIG_ENDIAN +#define __HEAD_FLAG_BE 1 +#else +#define __HEAD_FLAG_BE 0 +#endif + +#define __HEAD_FLAGS (__HEAD_FLAG_BE << 0) + +/* + * These will output as part of the Image header, which should be little-endian + * regardless of the endianness of the kernel. While constant values could be + * endian swapped in head.S, all are done here for consistency. + */ +#define HEAD_SYMBOLS \ + _kernel_size_le = DATA_LE64(_end - _text); \ + _kernel_offset_le = DATA_LE64(TEXT_OFFSET); \ + _kernel_flags_le = DATA_LE64(__HEAD_FLAGS); + +#endif /* __ASM_IMAGE_H */ diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S index 7787208e8cc6..997e6b27ff6a 100644 --- a/arch/arm64/kernel/kuser32.S +++ b/arch/arm64/kernel/kuser32.S @@ -28,7 +28,7 @@ * See Documentation/arm/kernel_user_helpers.txt for formal definitions. */ -#include <asm/unistd32.h> +#include <asm/unistd.h> .align 5 .globl __kuser_helper_start diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 43b7c34f92cb..1309d64aa926 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -51,6 +51,12 @@ #include <asm/processor.h> #include <asm/stacktrace.h> +#ifdef CONFIG_CC_STACKPROTECTOR +#include <linux/stackprotector.h> +unsigned long __stack_chk_guard __read_mostly; +EXPORT_SYMBOL(__stack_chk_guard); +#endif + static void setup_restart(void) { /* diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index 9e9798f91172..553954771a67 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -235,7 +235,7 @@ static void psci_sys_poweroff(void) * PSCI Function IDs for v0.2+ are well defined so use * standard values. */ -static int psci_0_2_init(struct device_node *np) +static int __init psci_0_2_init(struct device_node *np) { int err, ver; @@ -296,7 +296,7 @@ out_put_node: /* * PSCI < v0.2 get PSCI Function IDs via DT. */ -static int psci_0_1_init(struct device_node *np) +static int __init psci_0_1_init(struct device_node *np) { u32 id; int err; @@ -434,9 +434,11 @@ static int cpu_psci_cpu_kill(unsigned int cpu) return 0; } #endif +#endif const struct cpu_operations cpu_psci_ops = { .name = "psci", +#ifdef CONFIG_SMP .cpu_init = cpu_psci_cpu_init, .cpu_prepare = cpu_psci_cpu_prepare, .cpu_boot = cpu_psci_cpu_boot, @@ -445,6 +447,6 @@ const struct cpu_operations cpu_psci_ops = { .cpu_die = cpu_psci_cpu_die, .cpu_kill = cpu_psci_cpu_kill, #endif +#endif }; -#endif diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 9fde010c945f..0310811bd77d 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -19,6 +19,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/audit.h> #include <linux/compat.h> #include <linux/kernel.h> #include <linux/sched.h> @@ -39,6 +40,7 @@ #include <asm/compat.h> #include <asm/debug-monitors.h> #include <asm/pgtable.h> +#include <asm/syscall.h> #include <asm/traps.h> #include <asm/system_misc.h> @@ -1113,11 +1115,20 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs) if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) trace_sys_enter(regs, regs->syscallno); +#ifdef CONFIG_AUDITSYSCALL + audit_syscall_entry(syscall_get_arch(), regs->syscallno, + regs->orig_x0, regs->regs[1], regs->regs[2], regs->regs[3]); +#endif + return regs->syscallno; } asmlinkage void syscall_trace_exit(struct pt_regs *regs) { +#ifdef CONFIG_AUDITSYSCALL + audit_syscall_exit(regs); +#endif + if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) trace_sys_exit(regs, regs_return_value(regs)); diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 46d1125571f6..f6f0ccf35ae6 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -45,6 +45,7 @@ #include <linux/efi.h> #include <asm/fixmap.h> +#include <asm/cpu.h> #include <asm/cputype.h> #include <asm/elf.h> #include <asm/cputable.h> @@ -77,7 +78,6 @@ unsigned int compat_elf_hwcap2 __read_mostly; #endif static const char *cpu_name; -static const char *machine_name; phys_addr_t __fdt_pointer __initdata; /* @@ -219,6 +219,8 @@ static void __init setup_processor(void) sprintf(init_utsname()->machine, ELF_PLATFORM); elf_hwcap = 0; + cpuinfo_store_boot_cpu(); + /* * Check for sane CTR_EL0.CWG value. */ @@ -307,8 +309,6 @@ static void __init setup_machine_fdt(phys_addr_t dt_phys) while (true) cpu_relax(); } - - machine_name = of_flat_dt_get_machine_name(); } /* @@ -417,14 +417,12 @@ static int __init arm64_device_init(void) } arch_initcall_sync(arm64_device_init); -static DEFINE_PER_CPU(struct cpu, cpu_data); - static int __init topology_init(void) { int i; for_each_possible_cpu(i) { - struct cpu *cpu = &per_cpu(cpu_data, i); + struct cpu *cpu = &per_cpu(cpu_data.cpu, i); cpu->hotpluggable = 1; register_cpu(cpu, i); } @@ -449,10 +447,21 @@ static int c_show(struct seq_file *m, void *v) { int i; - seq_printf(m, "Processor\t: %s rev %d (%s)\n", - cpu_name, read_cpuid_id() & 15, ELF_PLATFORM); + /* + * Dump out the common processor features in a single line. Userspace + * should read the hwcaps with getauxval(AT_HWCAP) rather than + * attempting to parse this. + */ + seq_puts(m, "features\t:"); + for (i = 0; hwcap_str[i]; i++) + if (elf_hwcap & (1 << i)) + seq_printf(m, " %s", hwcap_str[i]); + seq_puts(m, "\n\n"); for_each_online_cpu(i) { + struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i); + u32 midr = cpuinfo->reg_midr; + /* * glibc reads /proc/cpuinfo to determine the number of * online processors, looking for lines beginning with @@ -461,25 +470,13 @@ static int c_show(struct seq_file *m, void *v) #ifdef CONFIG_SMP seq_printf(m, "processor\t: %d\n", i); #endif + seq_printf(m, "implementer\t: 0x%02x\n", + MIDR_IMPLEMENTOR(midr)); + seq_printf(m, "variant\t\t: 0x%x\n", MIDR_VARIANT(midr)); + seq_printf(m, "partnum\t\t: 0x%03x\n", MIDR_PARTNUM(midr)); + seq_printf(m, "revision\t: 0x%x\n\n", MIDR_REVISION(midr)); } - /* dump out the processor features */ - seq_puts(m, "Features\t: "); - - for (i = 0; hwcap_str[i]; i++) - if (elf_hwcap & (1 << i)) - seq_printf(m, "%s ", hwcap_str[i]); - - seq_printf(m, "\nCPU implementer\t: 0x%02x\n", read_cpuid_id() >> 24); - seq_printf(m, "CPU architecture: AArch64\n"); - seq_printf(m, "CPU variant\t: 0x%x\n", (read_cpuid_id() >> 20) & 15); - seq_printf(m, "CPU part\t: 0x%03x\n", (read_cpuid_id() >> 4) & 0xfff); - seq_printf(m, "CPU revision\t: %d\n", read_cpuid_id() & 15); - - seq_puts(m, "\n"); - - seq_printf(m, "Hardware\t: %s\n", machine_name); - return 0; } diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 3491c638f172..c5ee208321c3 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -27,7 +27,7 @@ #include <asm/fpsimd.h> #include <asm/signal32.h> #include <asm/uaccess.h> -#include <asm/unistd32.h> +#include <asm/unistd.h> struct compat_sigcontext { /* We always set these two fields to 0 */ diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 40f38f46c8e0..3e2f5ebbf63e 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -39,6 +39,7 @@ #include <asm/atomic.h> #include <asm/cacheflush.h> +#include <asm/cpu.h> #include <asm/cputype.h> #include <asm/cpu_ops.h> #include <asm/mmu_context.h> @@ -155,6 +156,11 @@ asmlinkage void secondary_start_kernel(void) cpu_ops[cpu]->cpu_postboot(); /* + * Log the CPU info before it is marked online and might get read. + */ + cpuinfo_store_cpu(); + + /* * Enable GIC and timers. */ notify_cpu_starting(cpu); diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 1fa9ce4afd8f..55a99b9a97e0 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -119,7 +119,7 @@ int cpu_suspend(unsigned long arg) extern struct sleep_save_sp sleep_save_sp; extern phys_addr_t sleep_idmap_phys; -static int cpu_suspend_init(void) +static int __init cpu_suspend_init(void) { void *ctx_ptr; diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index 26e9c4eeaba8..de2b0226e06d 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -26,7 +26,7 @@ #include <linux/uaccess.h> #include <asm/cacheflush.h> -#include <asm/unistd32.h> +#include <asm/unistd.h> static inline void do_compat_cache_op(unsigned long start, unsigned long end, int flags) diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 43514f905916..b6ee26b0939a 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -20,6 +20,7 @@ #include <linux/of.h> #include <linux/sched.h> +#include <asm/cputype.h> #include <asm/topology.h> static int __init get_cpu_for_node(struct device_node *node) @@ -188,13 +189,9 @@ static int __init parse_dt_topology(void) * Check that all cores are in the topology; the SMP code will * only mark cores described in the DT as possible. */ - for_each_possible_cpu(cpu) { - if (cpu_topology[cpu].cluster_id == -1) { - pr_err("CPU%d: No topology information specified\n", - cpu); + for_each_possible_cpu(cpu) + if (cpu_topology[cpu].cluster_id == -1) ret = -EINVAL; - } - } out_map: of_node_put(map); @@ -219,14 +216,6 @@ static void update_siblings_masks(unsigned int cpuid) struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid]; int cpu; - if (cpuid_topo->cluster_id == -1) { - /* - * DT does not contain topology information for this cpu. - */ - pr_debug("CPU%u: No topology information configured\n", cpuid); - return; - } - /* update core and thread sibling masks */ for_each_possible_cpu(cpu) { cpu_topo = &cpu_topology[cpu]; @@ -249,6 +238,36 @@ static void update_siblings_masks(unsigned int cpuid) void store_cpu_topology(unsigned int cpuid) { + struct cpu_topology *cpuid_topo = &cpu_topology[cpuid]; + u64 mpidr; + + if (cpuid_topo->cluster_id != -1) + goto topology_populated; + + mpidr = read_cpuid_mpidr(); + + /* Uniprocessor systems can rely on default topology values */ + if (mpidr & MPIDR_UP_BITMASK) + return; + + /* Create cpu topology mapping based on MPIDR. */ + if (mpidr & MPIDR_MT_BITMASK) { + /* Multiprocessor system : Multi-threads per core */ + cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 1); + cpuid_topo->cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 2); + } else { + /* Multiprocessor system : Single-thread per core */ + cpuid_topo->thread_id = -1; + cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cpuid_topo->cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 1); + } + + pr_debug("CPU%u: cluster %d core %d thread %d mpidr %#016llx\n", + cpuid, cpuid_topo->cluster_id, cpuid_topo->core_id, + cpuid_topo->thread_id, mpidr); + +topology_populated: update_siblings_masks(cpuid); } diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index c43cfa9b8304..02cd3f023e9a 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -156,7 +156,7 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) frame.pc = thread_saved_pc(tsk); } - printk("Call trace:\n"); + pr_emerg("Call trace:\n"); while (1) { unsigned long where = frame.pc; int ret; @@ -331,17 +331,22 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) void __pte_error(const char *file, int line, unsigned long val) { - printk("%s:%d: bad pte %016lx.\n", file, line, val); + pr_crit("%s:%d: bad pte %016lx.\n", file, line, val); } void __pmd_error(const char *file, int line, unsigned long val) { - printk("%s:%d: bad pmd %016lx.\n", file, line, val); + pr_crit("%s:%d: bad pmd %016lx.\n", file, line, val); +} + +void __pud_error(const char *file, int line, unsigned long val) +{ + pr_crit("%s:%d: bad pud %016lx.\n", file, line, val); } void __pgd_error(const char *file, int line, unsigned long val) { - printk("%s:%d: bad pgd %016lx.\n", file, line, val); + pr_crit("%s:%d: bad pgd %016lx.\n", file, line, val); } void __init trap_init(void) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 50384fec56c4..24f2e8c62479 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -88,22 +88,29 @@ int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; unsigned long addr = AARCH32_VECTORS_BASE; - int ret; + static struct vm_special_mapping spec = { + .name = "[vectors]", + .pages = vectors_page, + + }; + void *ret; down_write(&mm->mmap_sem); current->mm->context.vdso = (void *)addr; /* Map vectors page at the high address. */ - ret = install_special_mapping(mm, addr, PAGE_SIZE, - VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC, - vectors_page); + ret = _install_special_mapping(mm, addr, PAGE_SIZE, + VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC, + &spec); up_write(&mm->mmap_sem); - return ret; + return PTR_ERR_OR_ZERO(ret); } #endif /* CONFIG_COMPAT */ +static struct vm_special_mapping vdso_spec[2]; + static int __init vdso_init(void) { int i; @@ -114,8 +121,8 @@ static int __init vdso_init(void) } vdso_pages = (&vdso_end - &vdso_start) >> PAGE_SHIFT; - pr_info("vdso: %ld pages (%ld code, %ld data) at base %p\n", - vdso_pages + 1, vdso_pages, 1L, &vdso_start); + pr_info("vdso: %ld pages (%ld code @ %p, %ld data @ %p)\n", + vdso_pages + 1, vdso_pages, &vdso_start, 1L, vdso_data); /* Allocate the vDSO pagelist, plus a page for the data. */ vdso_pagelist = kcalloc(vdso_pages + 1, sizeof(struct page *), @@ -123,12 +130,23 @@ static int __init vdso_init(void) if (vdso_pagelist == NULL) return -ENOMEM; + /* Grab the vDSO data page. */ + vdso_pagelist[0] = virt_to_page(vdso_data); + /* Grab the vDSO code pages. */ for (i = 0; i < vdso_pages; i++) - vdso_pagelist[i] = virt_to_page(&vdso_start + i * PAGE_SIZE); + vdso_pagelist[i + 1] = virt_to_page(&vdso_start + i * PAGE_SIZE); - /* Grab the vDSO data page. */ - vdso_pagelist[i] = virt_to_page(vdso_data); + /* Populate the special mapping structures */ + vdso_spec[0] = (struct vm_special_mapping) { + .name = "[vvar]", + .pages = vdso_pagelist, + }; + + vdso_spec[1] = (struct vm_special_mapping) { + .name = "[vdso]", + .pages = &vdso_pagelist[1], + }; return 0; } @@ -138,52 +156,42 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; - unsigned long vdso_base, vdso_mapping_len; - int ret; + unsigned long vdso_base, vdso_text_len, vdso_mapping_len; + void *ret; + vdso_text_len = vdso_pages << PAGE_SHIFT; /* Be sure to map the data page */ - vdso_mapping_len = (vdso_pages + 1) << PAGE_SHIFT; + vdso_mapping_len = vdso_text_len + PAGE_SIZE; down_write(&mm->mmap_sem); vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0); if (IS_ERR_VALUE(vdso_base)) { - ret = vdso_base; + ret = ERR_PTR(vdso_base); goto up_fail; } - mm->context.vdso = (void *)vdso_base; - - ret = install_special_mapping(mm, vdso_base, vdso_mapping_len, - VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, - vdso_pagelist); - if (ret) { - mm->context.vdso = NULL; + ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE, + VM_READ|VM_MAYREAD, + &vdso_spec[0]); + if (IS_ERR(ret)) goto up_fail; - } -up_fail: - up_write(&mm->mmap_sem); + vdso_base += PAGE_SIZE; + mm->context.vdso = (void *)vdso_base; + ret = _install_special_mapping(mm, vdso_base, vdso_text_len, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, + &vdso_spec[1]); + if (IS_ERR(ret)) + goto up_fail; - return ret; -} -const char *arch_vma_name(struct vm_area_struct *vma) -{ - /* - * We can re-use the vdso pointer in mm_context_t for identifying - * the vectors page for compat applications. The vDSO will always - * sit above TASK_UNMAPPED_BASE and so we don't need to worry about - * it conflicting with the vectors base. - */ - if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) { -#ifdef CONFIG_COMPAT - if (vma->vm_start == AARCH32_VECTORS_BASE) - return "[vectors]"; -#endif - return "[vdso]"; - } + up_write(&mm->mmap_sem); + return 0; - return NULL; +up_fail: + mm->context.vdso = NULL; + up_write(&mm->mmap_sem); + return PTR_ERR(ret); } /* diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index 6d20b7d162d8..ff3bddea482d 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -43,13 +43,13 @@ $(obj)/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE $(call if_changed,vdsosym) # Assembly rules for the .S files -$(obj-vdso): %.o: %.S +$(obj-vdso): %.o: %.S FORCE $(call if_changed_dep,vdsoas) # Actual build commands -quiet_cmd_vdsold = VDSOL $@ +quiet_cmd_vdsold = VDSOL $@ cmd_vdsold = $(CC) $(c_flags) -Wl,-n -Wl,-T $^ -o $@ -quiet_cmd_vdsoas = VDSOA $@ +quiet_cmd_vdsoas = VDSOA $@ cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $< # Install commands for the unstripped file diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S index 8154b8d1c826..beca249bc2f3 100644 --- a/arch/arm64/kernel/vdso/vdso.lds.S +++ b/arch/arm64/kernel/vdso/vdso.lds.S @@ -28,6 +28,7 @@ OUTPUT_ARCH(aarch64) SECTIONS { + PROVIDE(_vdso_data = . - PAGE_SIZE); . = VDSO_LBASE + SIZEOF_HEADERS; .hash : { *(.hash) } :text @@ -57,9 +58,6 @@ SECTIONS _end = .; PROVIDE(end = .); - . = ALIGN(PAGE_SIZE); - PROVIDE(_vdso_data = .); - /DISCARD/ : { *(.note.GNU-stack) *(.data .data.* .gnu.linkonce.d.* .sdata*) diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index f1e6d5c032e1..97f0c0429dfa 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -9,6 +9,8 @@ #include <asm/memory.h> #include <asm/page.h> +#include "image.h" + #define ARM_EXIT_KEEP(x) #define ARM_EXIT_DISCARD(x) x @@ -104,9 +106,18 @@ SECTIONS _edata = .; BSS_SECTION(0, 0, 0) + + . = ALIGN(PAGE_SIZE); + idmap_pg_dir = .; + . += IDMAP_DIR_SIZE; + swapper_pg_dir = .; + . += SWAPPER_DIR_SIZE; + _end = .; STABS_DEBUG + + HEAD_SYMBOLS } /* @@ -114,3 +125,8 @@ SECTIONS */ ASSERT(((__hyp_idmap_text_start + PAGE_SIZE) > __hyp_idmap_text_end), "HYP init code too big") + +/* + * If padding is applied before .head.text, virt<->phys conversions will fail. + */ +ASSERT(_text == (PAGE_OFFSET + TEXT_OFFSET), "HEAD is misaligned") diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index bcc965e2cce1..41cb6d3d6075 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -62,6 +62,7 @@ void show_pte(struct mm_struct *mm, unsigned long addr) break; pud = pud_offset(pgd, addr); + printk(", *pud=%016llx", pud_val(*pud)); if (pud_none(*pud) || pud_bad(*pud)) break; diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index f43db8a69262..5b4526ee3a01 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -33,6 +33,7 @@ #include <linux/dma-mapping.h> #include <linux/dma-contiguous.h> +#include <asm/fixmap.h> #include <asm/sections.h> #include <asm/setup.h> #include <asm/sizes.h> @@ -60,6 +61,17 @@ static int __init early_initrd(char *p) early_param("initrd", early_initrd); #endif +/* + * Return the maximum physical address for ZONE_DMA (DMA_BIT_MASK(32)). It + * currently assumes that for memory starting above 4G, 32-bit devices will + * use a DMA offset. + */ +static phys_addr_t max_zone_dma_phys(void) +{ + phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, 32); + return min(offset + (1ULL << 32), memblock_end_of_DRAM()); +} + static void __init zone_sizes_init(unsigned long min, unsigned long max) { struct memblock_region *reg; @@ -70,9 +82,7 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) /* 4GB maximum for 32-bit only capable devices */ if (IS_ENABLED(CONFIG_ZONE_DMA)) { - unsigned long max_dma_phys = - (unsigned long)(dma_to_phys(NULL, DMA_BIT_MASK(32)) + 1); - max_dma = max(min, min(max, max_dma_phys >> PAGE_SHIFT)); + max_dma = PFN_DOWN(max_zone_dma_phys()); zone_size[ZONE_DMA] = max_dma - min; } zone_size[ZONE_NORMAL] = max - max_dma; @@ -128,25 +138,21 @@ void __init arm64_memblock_init(void) { phys_addr_t dma_phys_limit = 0; - /* Register the kernel text, kernel data and initrd with memblock */ + /* + * Register the kernel text, kernel data, initrd, and initial + * pagetables with memblock. + */ memblock_reserve(__pa(_text), _end - _text); #ifdef CONFIG_BLK_DEV_INITRD if (initrd_start) memblock_reserve(__virt_to_phys(initrd_start), initrd_end - initrd_start); #endif - /* - * Reserve the page tables. These are already in use, - * and can only be in node 0. - */ - memblock_reserve(__pa(swapper_pg_dir), SWAPPER_DIR_SIZE); - memblock_reserve(__pa(idmap_pg_dir), IDMAP_DIR_SIZE); - early_init_fdt_scan_reserved_mem(); /* 4GB maximum for 32-bit only capable devices */ if (IS_ENABLED(CONFIG_ZONE_DMA)) - dma_phys_limit = dma_to_phys(NULL, DMA_BIT_MASK(32)) + 1; + dma_phys_limit = max_zone_dma_phys(); dma_contiguous_reserve(dma_phys_limit); memblock_allow_resize(); @@ -260,26 +266,33 @@ void __init mem_init(void) #define MLK(b, t) b, t, ((t) - (b)) >> 10 #define MLM(b, t) b, t, ((t) - (b)) >> 20 +#define MLG(b, t) b, t, ((t) - (b)) >> 30 #define MLK_ROUNDUP(b, t) b, t, DIV_ROUND_UP(((t) - (b)), SZ_1K) pr_notice("Virtual kernel memory layout:\n" - " vmalloc : 0x%16lx - 0x%16lx (%6ld MB)\n" + " vmalloc : 0x%16lx - 0x%16lx (%6ld GB)\n" #ifdef CONFIG_SPARSEMEM_VMEMMAP - " vmemmap : 0x%16lx - 0x%16lx (%6ld MB)\n" + " vmemmap : 0x%16lx - 0x%16lx (%6ld GB maximum)\n" + " 0x%16lx - 0x%16lx (%6ld MB actual)\n" #endif + " PCI I/O : 0x%16lx - 0x%16lx (%6ld MB)\n" + " fixed : 0x%16lx - 0x%16lx (%6ld KB)\n" " modules : 0x%16lx - 0x%16lx (%6ld MB)\n" " memory : 0x%16lx - 0x%16lx (%6ld MB)\n" - " .init : 0x%p" " - 0x%p" " (%6ld kB)\n" - " .text : 0x%p" " - 0x%p" " (%6ld kB)\n" - " .data : 0x%p" " - 0x%p" " (%6ld kB)\n", - MLM(VMALLOC_START, VMALLOC_END), + " .init : 0x%p" " - 0x%p" " (%6ld KB)\n" + " .text : 0x%p" " - 0x%p" " (%6ld KB)\n" + " .data : 0x%p" " - 0x%p" " (%6ld KB)\n", + MLG(VMALLOC_START, VMALLOC_END), #ifdef CONFIG_SPARSEMEM_VMEMMAP + MLG((unsigned long)vmemmap, + (unsigned long)vmemmap + VMEMMAP_SIZE), MLM((unsigned long)virt_to_page(PAGE_OFFSET), (unsigned long)virt_to_page(high_memory)), #endif + MLM((unsigned long)PCI_IOBASE, (unsigned long)PCI_IOBASE + SZ_16M), + MLK(FIXADDR_START, FIXADDR_TOP), MLM(MODULES_VADDR, MODULES_END), MLM(PAGE_OFFSET, (unsigned long)high_memory), - MLK_ROUNDUP(__init_begin, __init_end), MLK_ROUNDUP(_text, _etext), MLK_ROUNDUP(_sdata, _edata)); diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c index 7ec328392ae0..fa324bd5a5c4 100644 --- a/arch/arm64/mm/ioremap.c +++ b/arch/arm64/mm/ioremap.c @@ -103,19 +103,28 @@ void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size) } EXPORT_SYMBOL(ioremap_cache); -#ifndef CONFIG_ARM64_64K_PAGES static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; +#if CONFIG_ARM64_PGTABLE_LEVELS > 2 +static pte_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss; +#endif +#if CONFIG_ARM64_PGTABLE_LEVELS > 3 +static pte_t bm_pud[PTRS_PER_PUD] __page_aligned_bss; #endif -static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) +static inline pud_t * __init early_ioremap_pud(unsigned long addr) { pgd_t *pgd; - pud_t *pud; pgd = pgd_offset_k(addr); BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd)); - pud = pud_offset(pgd, addr); + return pud_offset(pgd, addr); +} + +static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) +{ + pud_t *pud = early_ioremap_pud(addr); + BUG_ON(pud_none(*pud) || pud_bad(*pud)); return pmd_offset(pud, addr); @@ -132,13 +141,18 @@ static inline pte_t * __init early_ioremap_pte(unsigned long addr) void __init early_ioremap_init(void) { + pgd_t *pgd; + pud_t *pud; pmd_t *pmd; + unsigned long addr = fix_to_virt(FIX_BTMAP_BEGIN); - pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); -#ifndef CONFIG_ARM64_64K_PAGES - /* need to populate pmd for 4k pagesize only */ + pgd = pgd_offset_k(addr); + pgd_populate(&init_mm, pgd, bm_pud); + pud = pud_offset(pgd, addr); + pud_populate(&init_mm, pud, bm_pmd); + pmd = pmd_offset(pud, addr); pmd_populate_kernel(&init_mm, pmd, bm_pte); -#endif + /* * The boot-ioremap range spans multiple pmds, for which * we are not prepared: diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index c43f1dd19489..c55567283cde 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -32,6 +32,7 @@ #include <asm/setup.h> #include <asm/sizes.h> #include <asm/tlb.h> +#include <asm/memblock.h> #include <asm/mmu_context.h> #include "mm.h" @@ -204,9 +205,16 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, unsigned long phys, int map_io) { - pud_t *pud = pud_offset(pgd, addr); + pud_t *pud; unsigned long next; + if (pgd_none(*pgd)) { + pud = early_alloc(PTRS_PER_PUD * sizeof(pud_t)); + pgd_populate(&init_mm, pgd, pud); + } + BUG_ON(pgd_bad(*pgd)); + + pud = pud_offset(pgd, addr); do { next = pud_addr_end(addr, end); @@ -290,10 +298,10 @@ static void __init map_mem(void) * memory addressable from the initial direct kernel mapping. * * The initial direct kernel mapping, located at swapper_pg_dir, - * gives us PGDIR_SIZE memory starting from PHYS_OFFSET (which must be + * gives us PUD_SIZE memory starting from PHYS_OFFSET (which must be * aligned to 2MB as per Documentation/arm64/booting.txt). */ - limit = PHYS_OFFSET + PGDIR_SIZE; + limit = PHYS_OFFSET + PUD_SIZE; memblock_set_current_limit(limit); /* map all the memory banks */ diff --git a/arch/avr32/include/asm/processor.h b/arch/avr32/include/asm/processor.h index 972adcc1e8f4..941593c7d9f3 100644 --- a/arch/avr32/include/asm/processor.h +++ b/arch/avr32/include/asm/processor.h @@ -92,6 +92,7 @@ extern struct avr32_cpuinfo boot_cpu_data; #define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 3)) #define cpu_relax() barrier() +#define cpu_relax_lowlatency() cpu_relax() #define cpu_sync_pipeline() asm volatile("sub pc, -2" : : : "memory") struct cpu_context { diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig index f81e7b989fff..ed30699cc635 100644 --- a/arch/blackfin/Kconfig +++ b/arch/blackfin/Kconfig @@ -18,7 +18,6 @@ config BLACKFIN select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER - select HAVE_FUNCTION_TRACE_MCOUNT_TEST select HAVE_IDE select HAVE_KERNEL_GZIP if RAMKERNEL select HAVE_KERNEL_BZIP2 if RAMKERNEL diff --git a/arch/blackfin/configs/BF609-EZKIT_defconfig b/arch/blackfin/configs/BF609-EZKIT_defconfig index a7e9bfd84183..fcec5ce71392 100644 --- a/arch/blackfin/configs/BF609-EZKIT_defconfig +++ b/arch/blackfin/configs/BF609-EZKIT_defconfig @@ -102,7 +102,7 @@ CONFIG_I2C_CHARDEV=y CONFIG_I2C_BLACKFIN_TWI=y CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ=100 CONFIG_SPI=y -CONFIG_SPI_BFIN_V3=y +CONFIG_SPI_ADI_V3=y CONFIG_GPIOLIB=y CONFIG_GPIO_SYSFS=y # CONFIG_HWMON is not set diff --git a/arch/blackfin/include/asm/processor.h b/arch/blackfin/include/asm/processor.h index d0e72e9475a6..7acd46653df3 100644 --- a/arch/blackfin/include/asm/processor.h +++ b/arch/blackfin/include/asm/processor.h @@ -99,7 +99,7 @@ unsigned long get_wchan(struct task_struct *p); #define KSTK_ESP(tsk) ((tsk) == current ? rdusp() : (tsk)->thread.usp) #define cpu_relax() smp_mb() - +#define cpu_relax_lowlatency() cpu_relax() /* Get the Silicon Revision of the chip */ static inline uint32_t __pure bfin_revid(void) diff --git a/arch/blackfin/kernel/ftrace-entry.S b/arch/blackfin/kernel/ftrace-entry.S index 7eed00bbd26d..28d059540424 100644 --- a/arch/blackfin/kernel/ftrace-entry.S +++ b/arch/blackfin/kernel/ftrace-entry.S @@ -33,15 +33,6 @@ ENDPROC(__mcount) * function will be waiting there. mmmm pie. */ ENTRY(_ftrace_caller) -# ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST - /* optional micro optimization: return if stopped */ - p1.l = _function_trace_stop; - p1.h = _function_trace_stop; - r3 = [p1]; - cc = r3 == 0; - if ! cc jump _ftrace_stub (bp); -# endif - /* save first/second/third function arg and the return register */ [--sp] = r2; [--sp] = r0; @@ -83,15 +74,6 @@ ENDPROC(_ftrace_caller) /* See documentation for _ftrace_caller */ ENTRY(__mcount) -# ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST - /* optional micro optimization: return if stopped */ - p1.l = _function_trace_stop; - p1.h = _function_trace_stop; - r3 = [p1]; - cc = r3 == 0; - if ! cc jump _ftrace_stub (bp); -# endif - /* save third function arg early so we can do testing below */ [--sp] = r2; diff --git a/arch/blackfin/kernel/perf_event.c b/arch/blackfin/kernel/perf_event.c index 974e55496db3..ea2032013cc2 100644 --- a/arch/blackfin/kernel/perf_event.c +++ b/arch/blackfin/kernel/perf_event.c @@ -389,14 +389,6 @@ static int bfin_pmu_event_init(struct perf_event *event) if (attr->exclude_hv || attr->exclude_idle) return -EPERM; - /* - * All of the on-chip counters are "limited", in that they have - * no interrupts, and are therefore unable to do sampling without - * further work and timer assistance. - */ - if (hwc->sample_period) - return -EINVAL; - ret = 0; switch (attr->type) { case PERF_TYPE_RAW: @@ -490,6 +482,13 @@ static int __init bfin_pmu_init(void) { int ret; + /* + * All of the on-chip counters are "limited", in that they have + * no interrupts, and are therefore unable to do sampling without + * further work and timer assistance. + */ + pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + ret = perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); if (!ret) perf_cpu_notifier(bfin_pmu_notifier); diff --git a/arch/blackfin/kernel/vmlinux.lds.S b/arch/blackfin/kernel/vmlinux.lds.S index ba35864b2b74..c9eec84aa258 100644 --- a/arch/blackfin/kernel/vmlinux.lds.S +++ b/arch/blackfin/kernel/vmlinux.lds.S @@ -145,7 +145,7 @@ SECTIONS .text_l1 L1_CODE_START : AT(LOADADDR(.exit.data) + SIZEOF(.exit.data)) #else - .init.data : AT(__data_lma + __data_len) + .init.data : AT(__data_lma + __data_len + 32) { __sinitdata = .; INIT_DATA diff --git a/arch/blackfin/mach-bf533/boards/blackstamp.c b/arch/blackfin/mach-bf533/boards/blackstamp.c index 63b0e4fe760c..0ccf0cf4daaf 100644 --- a/arch/blackfin/mach-bf533/boards/blackstamp.c +++ b/arch/blackfin/mach-bf533/boards/blackstamp.c @@ -20,6 +20,7 @@ #include <linux/spi/spi.h> #include <linux/spi/flash.h> #include <linux/irq.h> +#include <linux/gpio.h> #include <linux/i2c.h> #include <asm/dma.h> #include <asm/bfin5xx_spi.h> diff --git a/arch/blackfin/mach-bf537/boards/cm_bf537e.c b/arch/blackfin/mach-bf537/boards/cm_bf537e.c index c65c6dbda3da..1e7290ef3525 100644 --- a/arch/blackfin/mach-bf537/boards/cm_bf537e.c +++ b/arch/blackfin/mach-bf537/boards/cm_bf537e.c @@ -21,6 +21,7 @@ #endif #include <linux/ata_platform.h> #include <linux/irq.h> +#include <linux/gpio.h> #include <asm/dma.h> #include <asm/bfin5xx_spi.h> #include <asm/portmux.h> diff --git a/arch/blackfin/mach-bf537/boards/cm_bf537u.c b/arch/blackfin/mach-bf537/boards/cm_bf537u.c index af58454b4bff..c7495dc74690 100644 --- a/arch/blackfin/mach-bf537/boards/cm_bf537u.c +++ b/arch/blackfin/mach-bf537/boards/cm_bf537u.c @@ -21,6 +21,7 @@ #endif #include <linux/ata_platform.h> #include <linux/irq.h> +#include <linux/gpio.h> #include <asm/dma.h> #include <asm/bfin5xx_spi.h> #include <asm/portmux.h> diff --git a/arch/blackfin/mach-bf537/boards/tcm_bf537.c b/arch/blackfin/mach-bf537/boards/tcm_bf537.c index a0211225748d..6b988ad653d8 100644 --- a/arch/blackfin/mach-bf537/boards/tcm_bf537.c +++ b/arch/blackfin/mach-bf537/boards/tcm_bf537.c @@ -21,6 +21,7 @@ #endif #include <linux/ata_platform.h> #include <linux/irq.h> +#include <linux/gpio.h> #include <asm/dma.h> #include <asm/bfin5xx_spi.h> #include <asm/portmux.h> diff --git a/arch/blackfin/mach-bf548/boards/ezkit.c b/arch/blackfin/mach-bf548/boards/ezkit.c index 90138e6112c1..1fe7ff286619 100644 --- a/arch/blackfin/mach-bf548/boards/ezkit.c +++ b/arch/blackfin/mach-bf548/boards/ezkit.c @@ -2118,7 +2118,7 @@ static struct pinctrl_map __initdata bfin_pinmux_map[] = { PIN_MAP_MUX_GROUP_DEFAULT("bfin-rotary", "pinctrl-adi2.0", NULL, "rotary"), PIN_MAP_MUX_GROUP_DEFAULT("bfin_can.0", "pinctrl-adi2.0", NULL, "can0"), PIN_MAP_MUX_GROUP_DEFAULT("bfin_can.1", "pinctrl-adi2.0", NULL, "can1"), - PIN_MAP_MUX_GROUP_DEFAULT("bf54x-lq043", "pinctrl-adi2.0", NULL, "ppi0_24b"), + PIN_MAP_MUX_GROUP_DEFAULT("bf54x-lq043", "pinctrl-adi2.0", "ppi0_24bgrp", "ppi0"), PIN_MAP_MUX_GROUP_DEFAULT("bfin-i2s.0", "pinctrl-adi2.0", NULL, "sport0"), PIN_MAP_MUX_GROUP_DEFAULT("bfin-tdm.0", "pinctrl-adi2.0", NULL, "sport0"), PIN_MAP_MUX_GROUP_DEFAULT("bfin-ac97.0", "pinctrl-adi2.0", NULL, "sport0"), @@ -2140,7 +2140,9 @@ static struct pinctrl_map __initdata bfin_pinmux_map[] = { PIN_MAP_MUX_GROUP_DEFAULT("pata-bf54x", "pinctrl-adi2.0", NULL, "atapi_alter"), #endif PIN_MAP_MUX_GROUP_DEFAULT("bf5xx-nand.0", "pinctrl-adi2.0", NULL, "nfc0"), - PIN_MAP_MUX_GROUP_DEFAULT("bf54x-keys", "pinctrl-adi2.0", NULL, "keys_4x4"), + PIN_MAP_MUX_GROUP_DEFAULT("bf54x-keys", "pinctrl-adi2.0", "keys_4x4grp", "keys"), + PIN_MAP_MUX_GROUP("bf54x-keys", "4bit", "pinctrl-adi2.0", "keys_4x4grp", "keys"), + PIN_MAP_MUX_GROUP("bf54x-keys", "8bit", "pinctrl-adi2.0", "keys_8x8grp", "keys"), }; static int __init ezkit_init(void) diff --git a/arch/blackfin/mach-bf561/boards/acvilon.c b/arch/blackfin/mach-bf561/boards/acvilon.c index 430b16d5ccb1..6ab951534d79 100644 --- a/arch/blackfin/mach-bf561/boards/acvilon.c +++ b/arch/blackfin/mach-bf561/boards/acvilon.c @@ -44,6 +44,7 @@ #include <linux/spi/flash.h> #include <linux/irq.h> #include <linux/interrupt.h> +#include <linux/gpio.h> #include <linux/jiffies.h> #include <linux/i2c-pca-platform.h> #include <linux/delay.h> diff --git a/arch/blackfin/mach-bf561/boards/cm_bf561.c b/arch/blackfin/mach-bf561/boards/cm_bf561.c index 9f777df4cacc..e862f7823e68 100644 --- a/arch/blackfin/mach-bf561/boards/cm_bf561.c +++ b/arch/blackfin/mach-bf561/boards/cm_bf561.c @@ -18,6 +18,7 @@ #endif #include <linux/ata_platform.h> #include <linux/irq.h> +#include <linux/gpio.h> #include <asm/dma.h> #include <asm/bfin5xx_spi.h> #include <asm/portmux.h> diff --git a/arch/blackfin/mach-bf561/boards/ezkit.c b/arch/blackfin/mach-bf561/boards/ezkit.c index 88dee43e7abe..2de71e8c104b 100644 --- a/arch/blackfin/mach-bf561/boards/ezkit.c +++ b/arch/blackfin/mach-bf561/boards/ezkit.c @@ -14,6 +14,7 @@ #include <linux/spi/spi.h> #include <linux/irq.h> #include <linux/interrupt.h> +#include <linux/gpio.h> #include <linux/delay.h> #include <asm/dma.h> #include <asm/bfin5xx_spi.h> diff --git a/arch/blackfin/mach-bf609/boards/ezkit.c b/arch/blackfin/mach-bf609/boards/ezkit.c index 1ba4600de69f..e2c0b024ce88 100644 --- a/arch/blackfin/mach-bf609/boards/ezkit.c +++ b/arch/blackfin/mach-bf609/boards/ezkit.c @@ -698,8 +698,6 @@ int bf609_nor_flash_init(struct platform_device *pdev) { #define CONFIG_SMC_GCTL_VAL 0x00000010 - if (!devm_pinctrl_get_select_default(&pdev->dev)) - return -EBUSY; bfin_write32(SMC_GCTL, CONFIG_SMC_GCTL_VAL); bfin_write32(SMC_B0CTL, 0x01002011); bfin_write32(SMC_B0TIM, 0x08170977); @@ -709,7 +707,6 @@ int bf609_nor_flash_init(struct platform_device *pdev) void bf609_nor_flash_exit(struct platform_device *pdev) { - devm_pinctrl_put(pdev->dev.pins->p); bfin_write32(SMC_GCTL, 0); } @@ -2058,15 +2055,14 @@ static struct pinctrl_map __initdata bfin_pinmux_map[] = { PIN_MAP_MUX_GROUP_DEFAULT("bfin-rotary", "pinctrl-adi2.0", NULL, "rotary"), PIN_MAP_MUX_GROUP_DEFAULT("bfin_can.0", "pinctrl-adi2.0", NULL, "can0"), PIN_MAP_MUX_GROUP_DEFAULT("physmap-flash.0", "pinctrl-adi2.0", NULL, "smc0"), - PIN_MAP_MUX_GROUP_DEFAULT("bf609_nl8048.2", "pinctrl-adi2.0", NULL, "ppi2_16b"), - PIN_MAP_MUX_GROUP_DEFAULT("bfin_display.0", "pinctrl-adi2.0", NULL, "ppi0_16b"), -#if IS_ENABLED(CONFIG_VIDEO_MT9M114) - PIN_MAP_MUX_GROUP_DEFAULT("bfin_capture.0", "pinctrl-adi2.0", NULL, "ppi0_8b"), -#elif IS_ENABLED(CONFIG_VIDEO_VS6624) - PIN_MAP_MUX_GROUP_DEFAULT("bfin_capture.0", "pinctrl-adi2.0", NULL, "ppi0_16b"), -#else - PIN_MAP_MUX_GROUP_DEFAULT("bfin_capture.0", "pinctrl-adi2.0", NULL, "ppi0_24b"), -#endif + PIN_MAP_MUX_GROUP_DEFAULT("bf609_nl8048.2", "pinctrl-adi2.0", "ppi2_16bgrp", "ppi2"), + PIN_MAP_MUX_GROUP("bfin_display.0", "8bit", "pinctrl-adi2.0", "ppi2_8bgrp", "ppi2"), + PIN_MAP_MUX_GROUP_DEFAULT("bfin_display.0", "pinctrl-adi2.0", "ppi2_16bgrp", "ppi2"), + PIN_MAP_MUX_GROUP("bfin_display.0", "16bit", "pinctrl-adi2.0", "ppi2_16bgrp", "ppi2"), + PIN_MAP_MUX_GROUP("bfin_capture.0", "8bit", "pinctrl-adi2.0", "ppi0_8bgrp", "ppi0"), + PIN_MAP_MUX_GROUP_DEFAULT("bfin_capture.0", "pinctrl-adi2.0", "ppi0_16bgrp", "ppi0"), + PIN_MAP_MUX_GROUP("bfin_capture.0", "16bit", "pinctrl-adi2.0", "ppi0_16bgrp", "ppi0"), + PIN_MAP_MUX_GROUP("bfin_capture.0", "24bit", "pinctrl-adi2.0", "ppi0_24bgrp", "ppi0"), PIN_MAP_MUX_GROUP_DEFAULT("bfin-i2s.0", "pinctrl-adi2.0", NULL, "sport0"), PIN_MAP_MUX_GROUP_DEFAULT("bfin-tdm.0", "pinctrl-adi2.0", NULL, "sport0"), PIN_MAP_MUX_GROUP_DEFAULT("bfin-i2s.1", "pinctrl-adi2.0", NULL, "sport1"), diff --git a/arch/blackfin/mach-bf609/include/mach/pm.h b/arch/blackfin/mach-bf609/include/mach/pm.h index 3ca0fb965636..a1efd936dd30 100644 --- a/arch/blackfin/mach-bf609/include/mach/pm.h +++ b/arch/blackfin/mach-bf609/include/mach/pm.h @@ -10,6 +10,7 @@ #define __MACH_BF609_PM_H__ #include <linux/suspend.h> +#include <linux/platform_device.h> extern int bfin609_pm_enter(suspend_state_t state); extern int bf609_pm_prepare(void); @@ -19,6 +20,6 @@ void bf609_hibernate(void); void bfin_sec_raise_irq(unsigned int sid); void coreb_enable(void); -int bf609_nor_flash_init(void); -void bf609_nor_flash_exit(void); +int bf609_nor_flash_init(struct platform_device *pdev); +void bf609_nor_flash_exit(struct platform_device *pdev); #endif diff --git a/arch/blackfin/mach-bf609/pm.c b/arch/blackfin/mach-bf609/pm.c index 0cdd6955c7be..b1bfcf434d16 100644 --- a/arch/blackfin/mach-bf609/pm.c +++ b/arch/blackfin/mach-bf609/pm.c @@ -291,13 +291,13 @@ static struct bfin_cpu_pm_fns bf609_cpu_pm = { #if defined(CONFIG_MTD_PHYSMAP) || defined(CONFIG_MTD_PHYSMAP_MODULE) static int smc_pm_syscore_suspend(void) { - bf609_nor_flash_exit(); + bf609_nor_flash_exit(NULL); return 0; } static void smc_pm_syscore_resume(void) { - bf609_nor_flash_init(); + bf609_nor_flash_init(NULL); } static struct syscore_ops smc_pm_syscore_ops = { diff --git a/arch/blackfin/mach-common/ints-priority.c b/arch/blackfin/mach-common/ints-priority.c index 867b7cef204c..1f94784eab6d 100644 --- a/arch/blackfin/mach-common/ints-priority.c +++ b/arch/blackfin/mach-common/ints-priority.c @@ -1208,8 +1208,6 @@ int __init init_arch_irq(void) bfin_sec_set_priority(CONFIG_SEC_IRQ_PRIORITY_LEVELS, sec_int_priority); - bfin_sec_set_priority(CONFIG_SEC_IRQ_PRIORITY_LEVELS, sec_int_priority); - /* Enable interrupts IVG7-15 */ bfin_irq_flags |= IMASK_IVG15 | IMASK_IVG14 | IMASK_IVG13 | IMASK_IVG12 | IMASK_IVG11 | diff --git a/arch/c6x/include/asm/processor.h b/arch/c6x/include/asm/processor.h index b9eb3da7f278..f2ef31be2f8b 100644 --- a/arch/c6x/include/asm/processor.h +++ b/arch/c6x/include/asm/processor.h @@ -121,6 +121,7 @@ extern unsigned long get_wchan(struct task_struct *p); #define KSTK_ESP(task) (task_pt_regs(task)->sp) #define cpu_relax() do { } while (0) +#define cpu_relax_lowlatency() cpu_relax() extern const struct seq_operations cpuinfo_op; diff --git a/arch/cris/include/asm/processor.h b/arch/cris/include/asm/processor.h index 15b815df29c1..862126b58116 100644 --- a/arch/cris/include/asm/processor.h +++ b/arch/cris/include/asm/processor.h @@ -63,6 +63,7 @@ static inline void release_thread(struct task_struct *dead_task) #define init_stack (init_thread_union.stack) #define cpu_relax() barrier() +#define cpu_relax_lowlatency() cpu_relax() void default_idle(void); diff --git a/arch/hexagon/include/asm/processor.h b/arch/hexagon/include/asm/processor.h index 45a825402f63..d8501137c8d0 100644 --- a/arch/hexagon/include/asm/processor.h +++ b/arch/hexagon/include/asm/processor.h @@ -56,6 +56,7 @@ struct thread_struct { } #define cpu_relax() __vmyield() +#define cpu_relax_lowlatency() cpu_relax() /* * Decides where the kernel will search for a free chunk of vm space during diff --git a/arch/ia64/include/asm/io.h b/arch/ia64/include/asm/io.h index 0d2bcb37ec35..bee0acd52f7e 100644 --- a/arch/ia64/include/asm/io.h +++ b/arch/ia64/include/asm/io.h @@ -426,6 +426,7 @@ extern void iounmap (volatile void __iomem *addr); extern void __iomem * early_ioremap (unsigned long phys_addr, unsigned long size); #define early_memremap(phys_addr, size) early_ioremap(phys_addr, size) extern void early_iounmap (volatile void __iomem *addr, unsigned long size); +#define early_memunmap(addr, size) early_iounmap(addr, size) static inline void __iomem * ioremap_cache (unsigned long phys_addr, unsigned long size) { return ioremap(phys_addr, size); diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h index efd1b927ccb7..c7367130ab14 100644 --- a/arch/ia64/include/asm/processor.h +++ b/arch/ia64/include/asm/processor.h @@ -548,6 +548,7 @@ ia64_eoi (void) } #define cpu_relax() ia64_hint(ia64_hint_pause) +#define cpu_relax_lowlatency() cpu_relax() static inline int ia64_get_irr(unsigned int vector) diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 55d4ba47a907..deed6fa96bb0 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -662,7 +662,7 @@ void machine_restart (char *restart_cmd) { (void) notify_die(DIE_MACHINE_RESTART, restart_cmd, NULL, 0, 0, 0); - (*efi.reset_system)(EFI_RESET_WARM, 0, 0, NULL); + efi_reboot(REBOOT_WARM, NULL); } void diff --git a/arch/ia64/pci/fixup.c b/arch/ia64/pci/fixup.c index 1fe9aa5068ea..ec73b2cf912a 100644 --- a/arch/ia64/pci/fixup.c +++ b/arch/ia64/pci/fixup.c @@ -6,6 +6,7 @@ #include <linux/pci.h> #include <linux/init.h> #include <linux/vgaarb.h> +#include <linux/screen_info.h> #include <asm/machvec.h> @@ -37,6 +38,27 @@ static void pci_fixup_video(struct pci_dev *pdev) return; /* Maybe, this machine supports legacy memory map. */ + if (!vga_default_device()) { + resource_size_t start, end; + int i; + + /* Does firmware framebuffer belong to us? */ + for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { + if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM)) + continue; + + start = pci_resource_start(pdev, i); + end = pci_resource_end(pdev, i); + + if (!start || !end) + continue; + + if (screen_info.lfb_base >= start && + (screen_info.lfb_base + screen_info.lfb_size) < end) + vga_set_default_device(pdev); + } + } + /* Is VGA routed to us? */ bus = pdev->bus; while (bus) { diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c index cad775a1a157..b2eb48490754 100644 --- a/arch/ia64/sn/kernel/bte.c +++ b/arch/ia64/sn/kernel/bte.c @@ -114,7 +114,7 @@ bte_result_t bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification) if (mode & BTE_USE_ANY) { nasid_to_try[1] = my_nasid; } else { - nasid_to_try[1] = (int)NULL; + nasid_to_try[1] = 0; } } else { /* try local then remote */ @@ -122,7 +122,7 @@ bte_result_t bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification) if (mode & BTE_USE_ANY) { nasid_to_try[1] = NASID_GET(dest); } else { - nasid_to_try[1] = (int)NULL; + nasid_to_try[1] = 0; } } diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c index 53b01b8e2f19..36182c84363c 100644 --- a/arch/ia64/sn/kernel/setup.c +++ b/arch/ia64/sn/kernel/setup.c @@ -579,7 +579,7 @@ void sn_cpu_init(void) (sn_prom_type == 1) ? "real" : "fake"); } - memset(pda, 0, sizeof(pda)); + memset(pda, 0, sizeof(*pda)); if (ia64_sn_get_sn_info(0, &sn_hub_info->shub2, &sn_hub_info->nasid_bitmask, &sn_hub_info->nasid_shift, diff --git a/arch/m32r/include/asm/processor.h b/arch/m32r/include/asm/processor.h index 5767367550c6..9f8fd9bef70f 100644 --- a/arch/m32r/include/asm/processor.h +++ b/arch/m32r/include/asm/processor.h @@ -133,5 +133,6 @@ unsigned long get_wchan(struct task_struct *p); #define KSTK_ESP(tsk) ((tsk)->thread.sp) #define cpu_relax() barrier() +#define cpu_relax_lowlatency() cpu_relax() #endif /* _ASM_M32R_PROCESSOR_H */ diff --git a/arch/m68k/include/asm/processor.h b/arch/m68k/include/asm/processor.h index b0768a657920..20dda1d4b860 100644 --- a/arch/m68k/include/asm/processor.h +++ b/arch/m68k/include/asm/processor.h @@ -176,5 +176,6 @@ unsigned long get_wchan(struct task_struct *p); #define task_pt_regs(tsk) ((struct pt_regs *) ((tsk)->thread.esp0)) #define cpu_relax() barrier() +#define cpu_relax_lowlatency() cpu_relax() #endif diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h index f868506e3350..0931388de47f 100644 --- a/arch/m68k/include/asm/sun3_pgalloc.h +++ b/arch/m68k/include/asm/sun3_pgalloc.h @@ -12,10 +12,6 @@ #include <asm/tlb.h> -/* FIXME - when we get this compiling */ -/* erm, now that it's compiling, what do we do with it? */ -#define _KERNPG_TABLE 0 - extern const char bad_pmd_string[]; #define pmd_alloc_one(mm,address) ({ BUG(); ((pmd_t *)2); }) diff --git a/arch/metag/Kconfig b/arch/metag/Kconfig index 499b7610eaaf..0b389a81c43a 100644 --- a/arch/metag/Kconfig +++ b/arch/metag/Kconfig @@ -13,7 +13,6 @@ config METAG select HAVE_DYNAMIC_FTRACE select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_TRACER - select HAVE_FUNCTION_TRACE_MCOUNT_TEST select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZO diff --git a/arch/metag/include/asm/processor.h b/arch/metag/include/asm/processor.h index a8a37477c66e..881071c07942 100644 --- a/arch/metag/include/asm/processor.h +++ b/arch/metag/include/asm/processor.h @@ -155,6 +155,7 @@ unsigned long get_wchan(struct task_struct *p); #define user_stack_pointer(regs) ((regs)->ctx.AX[0].U0) #define cpu_relax() barrier() +#define cpu_relax_lowlatency() cpu_relax() extern void setup_priv(void); diff --git a/arch/metag/kernel/ftrace_stub.S b/arch/metag/kernel/ftrace_stub.S index e70bff745bdd..3acc288217c0 100644 --- a/arch/metag/kernel/ftrace_stub.S +++ b/arch/metag/kernel/ftrace_stub.S @@ -16,13 +16,6 @@ _mcount_wrapper: .global _ftrace_caller .type _ftrace_caller,function _ftrace_caller: - MOVT D0Re0,#HI(_function_trace_stop) - ADD D0Re0,D0Re0,#LO(_function_trace_stop) - GETD D0Re0,[D0Re0] - CMP D0Re0,#0 - BEQ $Lcall_stub - MOV PC,D0.4 -$Lcall_stub: MSETL [A0StP], D0Ar6, D0Ar4, D0Ar2, D0.4 MOV D1Ar1, D0.4 MOV D0Ar2, D1RtP @@ -42,13 +35,6 @@ _ftrace_call: .global _mcount_wrapper .type _mcount_wrapper,function _mcount_wrapper: - MOVT D0Re0,#HI(_function_trace_stop) - ADD D0Re0,D0Re0,#LO(_function_trace_stop) - GETD D0Re0,[D0Re0] - CMP D0Re0,#0 - BEQ $Lcall_mcount - MOV PC,D0.4 -$Lcall_mcount: MSETL [A0StP], D0Ar6, D0Ar4, D0Ar2, D0.4 MOV D1Ar1, D0.4 MOV D0Ar2, D1RtP diff --git a/arch/metag/kernel/perf/perf_event.c b/arch/metag/kernel/perf/perf_event.c index 5cc4d4dcf3cf..02c08737f6aa 100644 --- a/arch/metag/kernel/perf/perf_event.c +++ b/arch/metag/kernel/perf/perf_event.c @@ -568,16 +568,6 @@ static int _hw_perf_event_init(struct perf_event *event) return -EINVAL; /* - * Early cores have "limited" counters - they have no overflow - * interrupts - and so are unable to do sampling without extra work - * and timer assistance. - */ - if (metag_pmu->max_period == 0) { - if (hwc->sample_period) - return -EINVAL; - } - - /* * Don't assign an index until the event is placed into the hardware. * -1 signifies that we're still deciding where to put it. On SMP * systems each core has its own set of counters, so we can't do any @@ -866,6 +856,15 @@ static int __init init_hw_perf_events(void) pr_info("enabled with %s PMU driver, %d counters available\n", metag_pmu->name, metag_pmu->max_events); + /* + * Early cores have "limited" counters - they have no overflow + * interrupts - and so are unable to do sampling without extra work + * and timer assistance. + */ + if (metag_pmu->max_period == 0) { + metag_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + } + /* Initialise the active events and reservation mutex */ atomic_set(&metag_pmu->active_events, 0); mutex_init(&metag_pmu->reserve_mutex); diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 9ae08541e30d..40e1c1dd0e24 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -22,7 +22,6 @@ config MICROBLAZE select HAVE_DYNAMIC_FTRACE select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_GRAPH_TRACER - select HAVE_FUNCTION_TRACE_MCOUNT_TEST select HAVE_FUNCTION_TRACER select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP diff --git a/arch/microblaze/include/asm/processor.h b/arch/microblaze/include/asm/processor.h index 9d31b057c355..497a988d79c2 100644 --- a/arch/microblaze/include/asm/processor.h +++ b/arch/microblaze/include/asm/processor.h @@ -22,6 +22,7 @@ extern const struct seq_operations cpuinfo_op; # define cpu_relax() barrier() +# define cpu_relax_lowlatency() cpu_relax() #define task_pt_regs(tsk) \ (((struct pt_regs *)(THREAD_SIZE + task_stack_page(tsk))) - 1) diff --git a/arch/microblaze/kernel/ftrace.c b/arch/microblaze/kernel/ftrace.c index bbcd2533766c..fc7b48a52cd5 100644 --- a/arch/microblaze/kernel/ftrace.c +++ b/arch/microblaze/kernel/ftrace.c @@ -27,6 +27,9 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) unsigned long return_hooker = (unsigned long) &return_to_handler; + if (unlikely(ftrace_graph_is_dead())) + return; + if (unlikely(atomic_read(¤t->tracing_graph_pause))) return; diff --git a/arch/microblaze/kernel/mcount.S b/arch/microblaze/kernel/mcount.S index fc1e1322ce4c..fed9da5de8c4 100644 --- a/arch/microblaze/kernel/mcount.S +++ b/arch/microblaze/kernel/mcount.S @@ -91,11 +91,6 @@ ENTRY(ftrace_caller) #endif /* CONFIG_DYNAMIC_FTRACE */ SAVE_REGS swi r15, r1, 0; - /* MS: HAVE_FUNCTION_TRACE_MCOUNT_TEST begin of checking */ - lwi r5, r0, function_trace_stop; - bneid r5, end; - nop; - /* MS: HAVE_FUNCTION_TRACE_MCOUNT_TEST end of checking */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER #ifndef CONFIG_DYNAMIC_FTRACE lwi r5, r0, ftrace_graph_return; diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 4e238e6e661c..10f270bd3e25 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -15,7 +15,6 @@ config MIPS select HAVE_BPF_JIT if !CPU_MICROMIPS select ARCH_HAVE_CUSTOM_GPIO_H select HAVE_FUNCTION_TRACER - select HAVE_FUNCTION_TRACE_MCOUNT_TEST select HAVE_DYNAMIC_FTRACE select HAVE_FTRACE_MCOUNT_RECORD select HAVE_C_RECORDMCOUNT diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index b0aa95565752..7a3fc67bd7f9 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -359,13 +359,17 @@ enum emulation_result { #define MIPS3_PG_FRAME 0x3fffffc0 #define VPN2_MASK 0xffffe000 -#define TLB_IS_GLOBAL(x) (((x).tlb_lo0 & MIPS3_PG_G) && \ +#define TLB_IS_GLOBAL(x) (((x).tlb_lo0 & MIPS3_PG_G) && \ ((x).tlb_lo1 & MIPS3_PG_G)) #define TLB_VPN2(x) ((x).tlb_hi & VPN2_MASK) #define TLB_ASID(x) ((x).tlb_hi & ASID_MASK) -#define TLB_IS_VALID(x, va) (((va) & (1 << PAGE_SHIFT)) \ - ? ((x).tlb_lo1 & MIPS3_PG_V) \ +#define TLB_IS_VALID(x, va) (((va) & (1 << PAGE_SHIFT)) \ + ? ((x).tlb_lo1 & MIPS3_PG_V) \ : ((x).tlb_lo0 & MIPS3_PG_V)) +#define TLB_HI_VPN2_HIT(x, y) ((TLB_VPN2(x) & ~(x).tlb_mask) == \ + ((y) & VPN2_MASK & ~(x).tlb_mask)) +#define TLB_HI_ASID_HIT(x, y) (TLB_IS_GLOBAL(x) || \ + TLB_ASID(x) == ((y) & ASID_MASK)) struct kvm_mips_tlb { long tlb_mask; @@ -760,7 +764,7 @@ extern int kvm_mips_trans_mtc0(uint32_t inst, uint32_t *opc, struct kvm_vcpu *vcpu); /* Misc */ -extern int kvm_mips_dump_stats(struct kvm_vcpu *vcpu); +extern void kvm_mips_dump_stats(struct kvm_vcpu *vcpu); extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm); diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h index ad70cba8daff..d5098bc554f4 100644 --- a/arch/mips/include/asm/processor.h +++ b/arch/mips/include/asm/processor.h @@ -367,6 +367,7 @@ unsigned long get_wchan(struct task_struct *p); #define KSTK_STATUS(tsk) (task_pt_regs(tsk)->cp0_status) #define cpu_relax() barrier() +#define cpu_relax_lowlatency() cpu_relax() /* * Return_address is a replacement for __builtin_return_address(count) diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h index 0b8bd28a0df1..4520adc8699b 100644 --- a/arch/mips/include/asm/r4kcache.h +++ b/arch/mips/include/asm/r4kcache.h @@ -19,6 +19,9 @@ #include <asm/mipsmtregs.h> #include <asm/uaccess.h> /* for segment_eq() */ +extern void (*r4k_blast_dcache)(void); +extern void (*r4k_blast_icache)(void); + /* * This macro return a properly sign-extended address suitable as base address * for indexed cache operations. Two issues here: diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c index 60e7e5e45af1..8b6538750fe1 100644 --- a/arch/mips/kernel/ftrace.c +++ b/arch/mips/kernel/ftrace.c @@ -302,6 +302,9 @@ void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra, &return_to_handler; int faulted, insns; + if (unlikely(ftrace_graph_is_dead())) + return; + if (unlikely(atomic_read(¤t->tracing_graph_pause))) return; diff --git a/arch/mips/kernel/mcount.S b/arch/mips/kernel/mcount.S index 539b6294b613..00940d1d5c4f 100644 --- a/arch/mips/kernel/mcount.S +++ b/arch/mips/kernel/mcount.S @@ -74,10 +74,6 @@ _mcount: #endif /* When tracing is activated, it calls ftrace_caller+8 (aka here) */ - lw t1, function_trace_stop - bnez t1, ftrace_stub - nop - MCOUNT_SAVE_REGS #ifdef KBUILD_MCOUNT_RA_ADDRESS PTR_S MCOUNT_RA_ADDRESS_REG, PT_R12(sp) @@ -105,9 +101,6 @@ ftrace_stub: #else /* ! CONFIG_DYNAMIC_FTRACE */ NESTED(_mcount, PT_SIZE, ra) - lw t1, function_trace_stop - bnez t1, ftrace_stub - nop PTR_LA t1, ftrace_stub PTR_L t2, ftrace_trace_function /* Prepare t2 for (1) */ bne t1, t2, static_trace diff --git a/arch/mips/kvm/Makefile b/arch/mips/kvm/Makefile index 78d87bbc99db..401fe027c261 100644 --- a/arch/mips/kvm/Makefile +++ b/arch/mips/kvm/Makefile @@ -5,9 +5,9 @@ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) EXTRA_CFLAGS += -Ivirt/kvm -Iarch/mips/kvm -kvm-objs := $(common-objs) kvm_mips.o kvm_mips_emul.o kvm_locore.o \ - kvm_mips_int.o kvm_mips_stats.o kvm_mips_commpage.o \ - kvm_mips_dyntrans.o kvm_trap_emul.o +kvm-objs := $(common-objs) mips.o emulate.o locore.o \ + interrupt.o stats.o commpage.o \ + dyntrans.o trap_emul.o obj-$(CONFIG_KVM) += kvm.o -obj-y += kvm_cb.o kvm_tlb.o +obj-y += callback.o tlb.o diff --git a/arch/mips/kvm/kvm_cb.c b/arch/mips/kvm/callback.c index 313c2e37b978..313c2e37b978 100644 --- a/arch/mips/kvm/kvm_cb.c +++ b/arch/mips/kvm/callback.c diff --git a/arch/mips/kvm/commpage.c b/arch/mips/kvm/commpage.c new file mode 100644 index 000000000000..2d6e976d1add --- /dev/null +++ b/arch/mips/kvm/commpage.c @@ -0,0 +1,33 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * commpage, currently used for Virtual COP0 registers. + * Mapped into the guest kernel @ 0x0. + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal <sanjayl@kymasys.com> + */ + +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/module.h> +#include <linux/vmalloc.h> +#include <linux/fs.h> +#include <linux/bootmem.h> +#include <asm/page.h> +#include <asm/cacheflush.h> +#include <asm/mmu_context.h> + +#include <linux/kvm_host.h> + +#include "commpage.h" + +void kvm_mips_commpage_init(struct kvm_vcpu *vcpu) +{ + struct kvm_mips_commpage *page = vcpu->arch.kseg0_commpage; + + /* Specific init values for fields */ + vcpu->arch.cop0 = &page->cop0; +} diff --git a/arch/mips/kvm/commpage.h b/arch/mips/kvm/commpage.h new file mode 100644 index 000000000000..08c5fa2bbc0f --- /dev/null +++ b/arch/mips/kvm/commpage.h @@ -0,0 +1,24 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * KVM/MIPS: commpage: mapped into get kernel space + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal <sanjayl@kymasys.com> + */ + +#ifndef __KVM_MIPS_COMMPAGE_H__ +#define __KVM_MIPS_COMMPAGE_H__ + +struct kvm_mips_commpage { + /* COP0 state is mapped into Guest kernel via commpage */ + struct mips_coproc cop0; +}; + +#define KVM_MIPS_COMM_EIDI_OFFSET 0x0 + +extern void kvm_mips_commpage_init(struct kvm_vcpu *vcpu); + +#endif /* __KVM_MIPS_COMMPAGE_H__ */ diff --git a/arch/mips/kvm/kvm_mips_dyntrans.c b/arch/mips/kvm/dyntrans.c index b80e41d858fd..521121bdebff 100644 --- a/arch/mips/kvm/kvm_mips_dyntrans.c +++ b/arch/mips/kvm/dyntrans.c @@ -1,13 +1,13 @@ /* -* This file is subject to the terms and conditions of the GNU General Public -* License. See the file "COPYING" in the main directory of this archive -* for more details. -* -* KVM/MIPS: Binary Patching for privileged instructions, reduces traps. -* -* Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. -* Authors: Sanjay Lal <sanjayl@kymasys.com> -*/ + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * KVM/MIPS: Binary Patching for privileged instructions, reduces traps. + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal <sanjayl@kymasys.com> + */ #include <linux/errno.h> #include <linux/err.h> @@ -18,7 +18,7 @@ #include <linux/bootmem.h> #include <asm/cacheflush.h> -#include "kvm_mips_comm.h" +#include "commpage.h" #define SYNCI_TEMPLATE 0x041f0000 #define SYNCI_BASE(x) (((x) >> 21) & 0x1f) @@ -28,9 +28,8 @@ #define CLEAR_TEMPLATE 0x00000020 #define SW_TEMPLATE 0xac000000 -int -kvm_mips_trans_cache_index(uint32_t inst, uint32_t *opc, - struct kvm_vcpu *vcpu) +int kvm_mips_trans_cache_index(uint32_t inst, uint32_t *opc, + struct kvm_vcpu *vcpu) { int result = 0; unsigned long kseg0_opc; @@ -47,12 +46,11 @@ kvm_mips_trans_cache_index(uint32_t inst, uint32_t *opc, } /* - * Address based CACHE instructions are transformed into synci(s). A little heavy - * for just D-cache invalidates, but avoids an expensive trap + * Address based CACHE instructions are transformed into synci(s). A little + * heavy for just D-cache invalidates, but avoids an expensive trap */ -int -kvm_mips_trans_cache_va(uint32_t inst, uint32_t *opc, - struct kvm_vcpu *vcpu) +int kvm_mips_trans_cache_va(uint32_t inst, uint32_t *opc, + struct kvm_vcpu *vcpu) { int result = 0; unsigned long kseg0_opc; @@ -72,8 +70,7 @@ kvm_mips_trans_cache_va(uint32_t inst, uint32_t *opc, return result; } -int -kvm_mips_trans_mfc0(uint32_t inst, uint32_t *opc, struct kvm_vcpu *vcpu) +int kvm_mips_trans_mfc0(uint32_t inst, uint32_t *opc, struct kvm_vcpu *vcpu) { int32_t rt, rd, sel; uint32_t mfc0_inst; @@ -115,8 +112,7 @@ kvm_mips_trans_mfc0(uint32_t inst, uint32_t *opc, struct kvm_vcpu *vcpu) return 0; } -int -kvm_mips_trans_mtc0(uint32_t inst, uint32_t *opc, struct kvm_vcpu *vcpu) +int kvm_mips_trans_mtc0(uint32_t inst, uint32_t *opc, struct kvm_vcpu *vcpu) { int32_t rt, rd, sel; uint32_t mtc0_inst = SW_TEMPLATE; diff --git a/arch/mips/kvm/kvm_mips_emul.c b/arch/mips/kvm/emulate.c index 8d4840090082..fb3e8dfd1ff6 100644 --- a/arch/mips/kvm/kvm_mips_emul.c +++ b/arch/mips/kvm/emulate.c @@ -1,13 +1,13 @@ /* -* This file is subject to the terms and conditions of the GNU General Public -* License. See the file "COPYING" in the main directory of this archive -* for more details. -* -* KVM/MIPS: Instruction/Exception emulation -* -* Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. -* Authors: Sanjay Lal <sanjayl@kymasys.com> -*/ + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * KVM/MIPS: Instruction/Exception emulation + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal <sanjayl@kymasys.com> + */ #include <linux/errno.h> #include <linux/err.h> @@ -29,9 +29,9 @@ #include <asm/r4kcache.h> #define CONFIG_MIPS_MT -#include "kvm_mips_opcode.h" -#include "kvm_mips_int.h" -#include "kvm_mips_comm.h" +#include "opcode.h" +#include "interrupt.h" +#include "commpage.h" #include "trace.h" @@ -51,18 +51,14 @@ unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu, if (epc & 3) goto unaligned; - /* - * Read the instruction - */ + /* Read the instruction */ insn.word = kvm_get_inst((uint32_t *) epc, vcpu); if (insn.word == KVM_INVALID_INST) return KVM_INVALID_INST; switch (insn.i_format.opcode) { - /* - * jr and jalr are in r_format format. - */ + /* jr and jalr are in r_format format. */ case spec_op: switch (insn.r_format.func) { case jalr_op: @@ -124,18 +120,16 @@ unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu, dspcontrol = rddsp(0x01); - if (dspcontrol >= 32) { + if (dspcontrol >= 32) epc = epc + 4 + (insn.i_format.simmediate << 2); - } else + else epc += 8; nextpc = epc; break; } break; - /* - * These are unconditional and in j_format. - */ + /* These are unconditional and in j_format. */ case jal_op: arch->gprs[31] = instpc + 8; case j_op: @@ -146,9 +140,7 @@ unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu, nextpc = epc; break; - /* - * These are conditional and in i_format. - */ + /* These are conditional and in i_format. */ case beq_op: case beql_op: if (arch->gprs[insn.i_format.rs] == @@ -189,22 +181,20 @@ unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu, nextpc = epc; break; - /* - * And now the FPA/cp1 branch instructions. - */ + /* And now the FPA/cp1 branch instructions. */ case cop1_op: - printk("%s: unsupported cop1_op\n", __func__); + kvm_err("%s: unsupported cop1_op\n", __func__); break; } return nextpc; unaligned: - printk("%s: unaligned epc\n", __func__); + kvm_err("%s: unaligned epc\n", __func__); return nextpc; sigill: - printk("%s: DSP branch but not DSP ASE\n", __func__); + kvm_err("%s: DSP branch but not DSP ASE\n", __func__); return nextpc; } @@ -219,7 +209,8 @@ enum emulation_result update_pc(struct kvm_vcpu *vcpu, uint32_t cause) er = EMULATE_FAIL; } else { vcpu->arch.pc = branch_pc; - kvm_debug("BD update_pc(): New PC: %#lx\n", vcpu->arch.pc); + kvm_debug("BD update_pc(): New PC: %#lx\n", + vcpu->arch.pc); } } else vcpu->arch.pc += 4; @@ -240,6 +231,7 @@ enum emulation_result update_pc(struct kvm_vcpu *vcpu, uint32_t cause) static inline int kvm_mips_count_disabled(struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; + return (vcpu->arch.count_ctl & KVM_REG_MIPS_COUNT_CTL_DC) || (kvm_read_c0_guest_cause(cop0) & CAUSEF_DC); } @@ -392,7 +384,6 @@ static ktime_t kvm_mips_freeze_hrtimer(struct kvm_vcpu *vcpu, return now; } - /** * kvm_mips_resume_hrtimer() - Resume hrtimer, updating expiry. * @vcpu: Virtual CPU. @@ -760,8 +751,8 @@ enum emulation_result kvm_mips_emul_eret(struct kvm_vcpu *vcpu) kvm_clear_c0_guest_status(cop0, ST0_ERL); vcpu->arch.pc = kvm_read_c0_guest_errorepc(cop0); } else { - printk("[%#lx] ERET when MIPS_SR_EXL|MIPS_SR_ERL == 0\n", - vcpu->arch.pc); + kvm_err("[%#lx] ERET when MIPS_SR_EXL|MIPS_SR_ERL == 0\n", + vcpu->arch.pc); er = EMULATE_FAIL; } @@ -770,8 +761,6 @@ enum emulation_result kvm_mips_emul_eret(struct kvm_vcpu *vcpu) enum emulation_result kvm_mips_emul_wait(struct kvm_vcpu *vcpu) { - enum emulation_result er = EMULATE_DONE; - kvm_debug("[%#lx] !!!WAIT!!! (%#lx)\n", vcpu->arch.pc, vcpu->arch.pending_exceptions); @@ -781,8 +770,9 @@ enum emulation_result kvm_mips_emul_wait(struct kvm_vcpu *vcpu) vcpu->arch.wait = 1; kvm_vcpu_block(vcpu); - /* We we are runnable, then definitely go off to user space to check if any - * I/O interrupts are pending. + /* + * We we are runnable, then definitely go off to user space to + * check if any I/O interrupts are pending. */ if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) { clear_bit(KVM_REQ_UNHALT, &vcpu->requests); @@ -790,20 +780,20 @@ enum emulation_result kvm_mips_emul_wait(struct kvm_vcpu *vcpu) } } - return er; + return EMULATE_DONE; } -/* XXXKYMA: Linux doesn't seem to use TLBR, return EMULATE_FAIL for now so that we can catch - * this, if things ever change +/* + * XXXKYMA: Linux doesn't seem to use TLBR, return EMULATE_FAIL for now so that + * we can catch this, if things ever change */ enum emulation_result kvm_mips_emul_tlbr(struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; - enum emulation_result er = EMULATE_FAIL; uint32_t pc = vcpu->arch.pc; - printk("[%#x] COP0_TLBR [%ld]\n", pc, kvm_read_c0_guest_index(cop0)); - return er; + kvm_err("[%#x] COP0_TLBR [%ld]\n", pc, kvm_read_c0_guest_index(cop0)); + return EMULATE_FAIL; } /* Write Guest TLB Entry @ Index */ @@ -811,88 +801,76 @@ enum emulation_result kvm_mips_emul_tlbwi(struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; int index = kvm_read_c0_guest_index(cop0); - enum emulation_result er = EMULATE_DONE; struct kvm_mips_tlb *tlb = NULL; uint32_t pc = vcpu->arch.pc; if (index < 0 || index >= KVM_MIPS_GUEST_TLB_SIZE) { - printk("%s: illegal index: %d\n", __func__, index); - printk - ("[%#x] COP0_TLBWI [%d] (entryhi: %#lx, entrylo0: %#lx entrylo1: %#lx, mask: %#lx)\n", - pc, index, kvm_read_c0_guest_entryhi(cop0), - kvm_read_c0_guest_entrylo0(cop0), - kvm_read_c0_guest_entrylo1(cop0), - kvm_read_c0_guest_pagemask(cop0)); + kvm_debug("%s: illegal index: %d\n", __func__, index); + kvm_debug("[%#x] COP0_TLBWI [%d] (entryhi: %#lx, entrylo0: %#lx entrylo1: %#lx, mask: %#lx)\n", + pc, index, kvm_read_c0_guest_entryhi(cop0), + kvm_read_c0_guest_entrylo0(cop0), + kvm_read_c0_guest_entrylo1(cop0), + kvm_read_c0_guest_pagemask(cop0)); index = (index & ~0x80000000) % KVM_MIPS_GUEST_TLB_SIZE; } tlb = &vcpu->arch.guest_tlb[index]; -#if 1 - /* Probe the shadow host TLB for the entry being overwritten, if one matches, invalidate it */ + /* + * Probe the shadow host TLB for the entry being overwritten, if one + * matches, invalidate it + */ kvm_mips_host_tlb_inv(vcpu, tlb->tlb_hi); -#endif tlb->tlb_mask = kvm_read_c0_guest_pagemask(cop0); tlb->tlb_hi = kvm_read_c0_guest_entryhi(cop0); tlb->tlb_lo0 = kvm_read_c0_guest_entrylo0(cop0); tlb->tlb_lo1 = kvm_read_c0_guest_entrylo1(cop0); - kvm_debug - ("[%#x] COP0_TLBWI [%d] (entryhi: %#lx, entrylo0: %#lx entrylo1: %#lx, mask: %#lx)\n", - pc, index, kvm_read_c0_guest_entryhi(cop0), - kvm_read_c0_guest_entrylo0(cop0), kvm_read_c0_guest_entrylo1(cop0), - kvm_read_c0_guest_pagemask(cop0)); + kvm_debug("[%#x] COP0_TLBWI [%d] (entryhi: %#lx, entrylo0: %#lx entrylo1: %#lx, mask: %#lx)\n", + pc, index, kvm_read_c0_guest_entryhi(cop0), + kvm_read_c0_guest_entrylo0(cop0), + kvm_read_c0_guest_entrylo1(cop0), + kvm_read_c0_guest_pagemask(cop0)); - return er; + return EMULATE_DONE; } /* Write Guest TLB Entry @ Random Index */ enum emulation_result kvm_mips_emul_tlbwr(struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; - enum emulation_result er = EMULATE_DONE; struct kvm_mips_tlb *tlb = NULL; uint32_t pc = vcpu->arch.pc; int index; -#if 1 get_random_bytes(&index, sizeof(index)); index &= (KVM_MIPS_GUEST_TLB_SIZE - 1); -#else - index = jiffies % KVM_MIPS_GUEST_TLB_SIZE; -#endif - - if (index < 0 || index >= KVM_MIPS_GUEST_TLB_SIZE) { - printk("%s: illegal index: %d\n", __func__, index); - return EMULATE_FAIL; - } tlb = &vcpu->arch.guest_tlb[index]; -#if 1 - /* Probe the shadow host TLB for the entry being overwritten, if one matches, invalidate it */ + /* + * Probe the shadow host TLB for the entry being overwritten, if one + * matches, invalidate it + */ kvm_mips_host_tlb_inv(vcpu, tlb->tlb_hi); -#endif tlb->tlb_mask = kvm_read_c0_guest_pagemask(cop0); tlb->tlb_hi = kvm_read_c0_guest_entryhi(cop0); tlb->tlb_lo0 = kvm_read_c0_guest_entrylo0(cop0); tlb->tlb_lo1 = kvm_read_c0_guest_entrylo1(cop0); - kvm_debug - ("[%#x] COP0_TLBWR[%d] (entryhi: %#lx, entrylo0: %#lx entrylo1: %#lx)\n", - pc, index, kvm_read_c0_guest_entryhi(cop0), - kvm_read_c0_guest_entrylo0(cop0), - kvm_read_c0_guest_entrylo1(cop0)); + kvm_debug("[%#x] COP0_TLBWR[%d] (entryhi: %#lx, entrylo0: %#lx entrylo1: %#lx)\n", + pc, index, kvm_read_c0_guest_entryhi(cop0), + kvm_read_c0_guest_entrylo0(cop0), + kvm_read_c0_guest_entrylo1(cop0)); - return er; + return EMULATE_DONE; } enum emulation_result kvm_mips_emul_tlbp(struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; long entryhi = kvm_read_c0_guest_entryhi(cop0); - enum emulation_result er = EMULATE_DONE; uint32_t pc = vcpu->arch.pc; int index = -1; @@ -903,12 +881,12 @@ enum emulation_result kvm_mips_emul_tlbp(struct kvm_vcpu *vcpu) kvm_debug("[%#x] COP0_TLBP (entryhi: %#lx), index: %d\n", pc, entryhi, index); - return er; + return EMULATE_DONE; } -enum emulation_result -kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, uint32_t cause, - struct kvm_run *run, struct kvm_vcpu *vcpu) +enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, + uint32_t cause, struct kvm_run *run, + struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; enum emulation_result er = EMULATE_DONE; @@ -922,9 +900,8 @@ kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, uint32_t cause, */ curr_pc = vcpu->arch.pc; er = update_pc(vcpu, cause); - if (er == EMULATE_FAIL) { + if (er == EMULATE_FAIL) return er; - } copz = (inst >> 21) & 0x1f; rt = (inst >> 16) & 0x1f; @@ -949,7 +926,7 @@ kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, uint32_t cause, er = kvm_mips_emul_tlbp(vcpu); break; case rfe_op: - printk("!!!COP0_RFE!!!\n"); + kvm_err("!!!COP0_RFE!!!\n"); break; case eret_op: er = kvm_mips_emul_eret(vcpu); @@ -973,8 +950,7 @@ kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, uint32_t cause, #ifdef CONFIG_KVM_MIPS_DYN_TRANS kvm_mips_trans_mfc0(inst, opc, vcpu); #endif - } - else { + } else { vcpu->arch.gprs[rt] = cop0->reg[rd][sel]; #ifdef CONFIG_KVM_MIPS_DYN_TRANS @@ -999,8 +975,8 @@ kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, uint32_t cause, if ((rd == MIPS_CP0_TLB_INDEX) && (vcpu->arch.gprs[rt] >= KVM_MIPS_GUEST_TLB_SIZE)) { - printk("Invalid TLB Index: %ld", - vcpu->arch.gprs[rt]); + kvm_err("Invalid TLB Index: %ld", + vcpu->arch.gprs[rt]); er = EMULATE_FAIL; break; } @@ -1010,21 +986,19 @@ kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, uint32_t cause, kvm_change_c0_guest_ebase(cop0, ~(C0_EBASE_CORE_MASK), vcpu->arch.gprs[rt]); - printk("MTCz, cop0->reg[EBASE]: %#lx\n", - kvm_read_c0_guest_ebase(cop0)); + kvm_err("MTCz, cop0->reg[EBASE]: %#lx\n", + kvm_read_c0_guest_ebase(cop0)); } else if (rd == MIPS_CP0_TLB_HI && sel == 0) { uint32_t nasid = - vcpu->arch.gprs[rt] & ASID_MASK; - if ((KSEGX(vcpu->arch.gprs[rt]) != CKSEG0) - && + vcpu->arch.gprs[rt] & ASID_MASK; + if ((KSEGX(vcpu->arch.gprs[rt]) != CKSEG0) && ((kvm_read_c0_guest_entryhi(cop0) & ASID_MASK) != nasid)) { - - kvm_debug - ("MTCz, change ASID from %#lx to %#lx\n", - kvm_read_c0_guest_entryhi(cop0) & - ASID_MASK, - vcpu->arch.gprs[rt] & ASID_MASK); + kvm_debug("MTCz, change ASID from %#lx to %#lx\n", + kvm_read_c0_guest_entryhi(cop0) + & ASID_MASK, + vcpu->arch.gprs[rt] + & ASID_MASK); /* Blow away the shadow host TLBs */ kvm_mips_flush_host_tlb(1); @@ -1049,7 +1023,10 @@ kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, uint32_t cause, } else if ((rd == MIPS_CP0_STATUS) && (sel == 0)) { kvm_write_c0_guest_status(cop0, vcpu->arch.gprs[rt]); - /* Make sure that CU1 and NMI bits are never set */ + /* + * Make sure that CU1 and NMI bits are + * never set + */ kvm_clear_c0_guest_status(cop0, (ST0_CU1 | ST0_NMI)); @@ -1058,6 +1035,7 @@ kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, uint32_t cause, #endif } else if ((rd == MIPS_CP0_CAUSE) && (sel == 0)) { uint32_t old_cause, new_cause; + old_cause = kvm_read_c0_guest_cause(cop0); new_cause = vcpu->arch.gprs[rt]; /* Update R/W bits */ @@ -1082,9 +1060,8 @@ kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, uint32_t cause, break; case dmtc_op: - printk - ("!!!!!!![%#lx]dmtc_op: rt: %d, rd: %d, sel: %d!!!!!!\n", - vcpu->arch.pc, rt, rd, sel); + kvm_err("!!!!!!![%#lx]dmtc_op: rt: %d, rd: %d, sel: %d!!!!!!\n", + vcpu->arch.pc, rt, rd, sel); er = EMULATE_FAIL; break; @@ -1115,7 +1092,10 @@ kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, uint32_t cause, cop0->reg[MIPS_CP0_STATUS][2] & 0xf; uint32_t pss = (cop0->reg[MIPS_CP0_STATUS][2] >> 6) & 0xf; - /* We don't support any shadow register sets, so SRSCtl[PSS] == SRSCtl[CSS] = 0 */ + /* + * We don't support any shadow register sets, so + * SRSCtl[PSS] == SRSCtl[CSS] = 0 + */ if (css || pss) { er = EMULATE_FAIL; break; @@ -1126,21 +1106,17 @@ kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, uint32_t cause, } break; default: - printk - ("[%#lx]MachEmulateCP0: unsupported COP0, copz: 0x%x\n", - vcpu->arch.pc, copz); + kvm_err("[%#lx]MachEmulateCP0: unsupported COP0, copz: 0x%x\n", + vcpu->arch.pc, copz); er = EMULATE_FAIL; break; } } done: - /* - * Rollback PC only if emulation was unsuccessful - */ - if (er == EMULATE_FAIL) { + /* Rollback PC only if emulation was unsuccessful */ + if (er == EMULATE_FAIL) vcpu->arch.pc = curr_pc; - } dont_update_pc: /* @@ -1152,9 +1128,9 @@ dont_update_pc: return er; } -enum emulation_result -kvm_mips_emulate_store(uint32_t inst, uint32_t cause, - struct kvm_run *run, struct kvm_vcpu *vcpu) +enum emulation_result kvm_mips_emulate_store(uint32_t inst, uint32_t cause, + struct kvm_run *run, + struct kvm_vcpu *vcpu) { enum emulation_result er = EMULATE_DO_MMIO; int32_t op, base, rt, offset; @@ -1252,24 +1228,21 @@ kvm_mips_emulate_store(uint32_t inst, uint32_t cause, break; default: - printk("Store not yet supported"); + kvm_err("Store not yet supported"); er = EMULATE_FAIL; break; } - /* - * Rollback PC if emulation was unsuccessful - */ - if (er == EMULATE_FAIL) { + /* Rollback PC if emulation was unsuccessful */ + if (er == EMULATE_FAIL) vcpu->arch.pc = curr_pc; - } return er; } -enum emulation_result -kvm_mips_emulate_load(uint32_t inst, uint32_t cause, - struct kvm_run *run, struct kvm_vcpu *vcpu) +enum emulation_result kvm_mips_emulate_load(uint32_t inst, uint32_t cause, + struct kvm_run *run, + struct kvm_vcpu *vcpu) { enum emulation_result er = EMULATE_DO_MMIO; int32_t op, base, rt, offset; @@ -1364,7 +1337,7 @@ kvm_mips_emulate_load(uint32_t inst, uint32_t cause, break; default: - printk("Load not yet supported"); + kvm_err("Load not yet supported"); er = EMULATE_FAIL; break; } @@ -1383,7 +1356,7 @@ int kvm_mips_sync_icache(unsigned long va, struct kvm_vcpu *vcpu) gfn = va >> PAGE_SHIFT; if (gfn >= kvm->arch.guest_pmap_npages) { - printk("%s: Invalid gfn: %#llx\n", __func__, gfn); + kvm_err("%s: Invalid gfn: %#llx\n", __func__, gfn); kvm_mips_dump_host_tlbs(); kvm_arch_vcpu_dump_regs(vcpu); return -1; @@ -1391,7 +1364,8 @@ int kvm_mips_sync_icache(unsigned long va, struct kvm_vcpu *vcpu) pfn = kvm->arch.guest_pmap[gfn]; pa = (pfn << PAGE_SHIFT) | offset; - printk("%s: va: %#lx, unmapped: %#x\n", __func__, va, CKSEG0ADDR(pa)); + kvm_debug("%s: va: %#lx, unmapped: %#x\n", __func__, va, + CKSEG0ADDR(pa)); local_flush_icache_range(CKSEG0ADDR(pa), 32); return 0; @@ -1410,13 +1384,12 @@ int kvm_mips_sync_icache(unsigned long va, struct kvm_vcpu *vcpu) #define MIPS_CACHE_DCACHE 0x1 #define MIPS_CACHE_SEC 0x3 -enum emulation_result -kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc, uint32_t cause, - struct kvm_run *run, struct kvm_vcpu *vcpu) +enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc, + uint32_t cause, + struct kvm_run *run, + struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; - extern void (*r4k_blast_dcache) (void); - extern void (*r4k_blast_icache) (void); enum emulation_result er = EMULATE_DONE; int32_t offset, cache, op_inst, op, base; struct kvm_vcpu_arch *arch = &vcpu->arch; @@ -1443,22 +1416,23 @@ kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc, uint32_t cause, kvm_debug("CACHE (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n", cache, op, base, arch->gprs[base], offset); - /* Treat INDEX_INV as a nop, basically issued by Linux on startup to invalidate - * the caches entirely by stepping through all the ways/indexes + /* + * Treat INDEX_INV as a nop, basically issued by Linux on startup to + * invalidate the caches entirely by stepping through all the + * ways/indexes */ if (op == MIPS_CACHE_OP_INDEX_INV) { - kvm_debug - ("@ %#lx/%#lx CACHE (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n", - vcpu->arch.pc, vcpu->arch.gprs[31], cache, op, base, - arch->gprs[base], offset); + kvm_debug("@ %#lx/%#lx CACHE (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n", + vcpu->arch.pc, vcpu->arch.gprs[31], cache, op, base, + arch->gprs[base], offset); if (cache == MIPS_CACHE_DCACHE) r4k_blast_dcache(); else if (cache == MIPS_CACHE_ICACHE) r4k_blast_icache(); else { - printk("%s: unsupported CACHE INDEX operation\n", - __func__); + kvm_err("%s: unsupported CACHE INDEX operation\n", + __func__); return EMULATE_FAIL; } @@ -1470,21 +1444,19 @@ kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc, uint32_t cause, preempt_disable(); if (KVM_GUEST_KSEGX(va) == KVM_GUEST_KSEG0) { - - if (kvm_mips_host_tlb_lookup(vcpu, va) < 0) { + if (kvm_mips_host_tlb_lookup(vcpu, va) < 0) kvm_mips_handle_kseg0_tlb_fault(va, vcpu); - } } else if ((KVM_GUEST_KSEGX(va) < KVM_GUEST_KSEG0) || KVM_GUEST_KSEGX(va) == KVM_GUEST_KSEG23) { int index; /* If an entry already exists then skip */ - if (kvm_mips_host_tlb_lookup(vcpu, va) >= 0) { + if (kvm_mips_host_tlb_lookup(vcpu, va) >= 0) goto skip_fault; - } - /* If address not in the guest TLB, then give the guest a fault, the - * resulting handler will do the right thing + /* + * If address not in the guest TLB, then give the guest a fault, + * the resulting handler will do the right thing */ index = kvm_mips_guest_tlb_lookup(vcpu, (va & VPN2_MASK) | (kvm_read_c0_guest_entryhi @@ -1499,23 +1471,28 @@ kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc, uint32_t cause, goto dont_update_pc; } else { struct kvm_mips_tlb *tlb = &vcpu->arch.guest_tlb[index]; - /* Check if the entry is valid, if not then setup a TLB invalid exception to the guest */ + /* + * Check if the entry is valid, if not then setup a TLB + * invalid exception to the guest + */ if (!TLB_IS_VALID(*tlb, va)) { er = kvm_mips_emulate_tlbinv_ld(cause, NULL, run, vcpu); preempt_enable(); goto dont_update_pc; } else { - /* We fault an entry from the guest tlb to the shadow host TLB */ + /* + * We fault an entry from the guest tlb to the + * shadow host TLB + */ kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb, NULL, NULL); } } } else { - printk - ("INVALID CACHE INDEX/ADDRESS (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n", - cache, op, base, arch->gprs[base], offset); + kvm_err("INVALID CACHE INDEX/ADDRESS (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n", + cache, op, base, arch->gprs[base], offset); er = EMULATE_FAIL; preempt_enable(); goto dont_update_pc; @@ -1530,7 +1507,10 @@ skip_fault: flush_dcache_line(va); #ifdef CONFIG_KVM_MIPS_DYN_TRANS - /* Replace the CACHE instruction, with a SYNCI, not the same, but avoids a trap */ + /* + * Replace the CACHE instruction, with a SYNCI, not the same, + * but avoids a trap + */ kvm_mips_trans_cache_va(inst, opc, vcpu); #endif } else if (op == MIPS_CACHE_OP_HIT_INV && cache == MIPS_CACHE_ICACHE) { @@ -1542,9 +1522,8 @@ skip_fault: kvm_mips_trans_cache_va(inst, opc, vcpu); #endif } else { - printk - ("NO-OP CACHE (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n", - cache, op, base, arch->gprs[base], offset); + kvm_err("NO-OP CACHE (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n", + cache, op, base, arch->gprs[base], offset); er = EMULATE_FAIL; preempt_enable(); goto dont_update_pc; @@ -1552,28 +1531,23 @@ skip_fault: preempt_enable(); - dont_update_pc: - /* - * Rollback PC - */ +dont_update_pc: + /* Rollback PC */ vcpu->arch.pc = curr_pc; - done: +done: return er; } -enum emulation_result -kvm_mips_emulate_inst(unsigned long cause, uint32_t *opc, - struct kvm_run *run, struct kvm_vcpu *vcpu) +enum emulation_result kvm_mips_emulate_inst(unsigned long cause, uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) { enum emulation_result er = EMULATE_DONE; uint32_t inst; - /* - * Fetch the instruction. - */ - if (cause & CAUSEF_BD) { + /* Fetch the instruction. */ + if (cause & CAUSEF_BD) opc += 1; - } inst = kvm_get_inst(opc, vcpu); @@ -1601,8 +1575,8 @@ kvm_mips_emulate_inst(unsigned long cause, uint32_t *opc, break; default: - printk("Instruction emulation not supported (%p/%#x)\n", opc, - inst); + kvm_err("Instruction emulation not supported (%p/%#x)\n", opc, + inst); kvm_arch_vcpu_dump_regs(vcpu); er = EMULATE_FAIL; break; @@ -1611,9 +1585,10 @@ kvm_mips_emulate_inst(unsigned long cause, uint32_t *opc, return er; } -enum emulation_result -kvm_mips_emulate_syscall(unsigned long cause, uint32_t *opc, - struct kvm_run *run, struct kvm_vcpu *vcpu) +enum emulation_result kvm_mips_emulate_syscall(unsigned long cause, + uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; struct kvm_vcpu_arch *arch = &vcpu->arch; @@ -1638,20 +1613,20 @@ kvm_mips_emulate_syscall(unsigned long cause, uint32_t *opc, arch->pc = KVM_GUEST_KSEG0 + 0x180; } else { - printk("Trying to deliver SYSCALL when EXL is already set\n"); + kvm_err("Trying to deliver SYSCALL when EXL is already set\n"); er = EMULATE_FAIL; } return er; } -enum emulation_result -kvm_mips_emulate_tlbmiss_ld(unsigned long cause, uint32_t *opc, - struct kvm_run *run, struct kvm_vcpu *vcpu) +enum emulation_result kvm_mips_emulate_tlbmiss_ld(unsigned long cause, + uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; struct kvm_vcpu_arch *arch = &vcpu->arch; - enum emulation_result er = EMULATE_DONE; unsigned long entryhi = (vcpu->arch. host_cp0_badvaddr & VPN2_MASK) | (kvm_read_c0_guest_entryhi(cop0) & ASID_MASK); @@ -1688,16 +1663,16 @@ kvm_mips_emulate_tlbmiss_ld(unsigned long cause, uint32_t *opc, /* Blow away the shadow host TLBs */ kvm_mips_flush_host_tlb(1); - return er; + return EMULATE_DONE; } -enum emulation_result -kvm_mips_emulate_tlbinv_ld(unsigned long cause, uint32_t *opc, - struct kvm_run *run, struct kvm_vcpu *vcpu) +enum emulation_result kvm_mips_emulate_tlbinv_ld(unsigned long cause, + uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; struct kvm_vcpu_arch *arch = &vcpu->arch; - enum emulation_result er = EMULATE_DONE; unsigned long entryhi = (vcpu->arch.host_cp0_badvaddr & VPN2_MASK) | (kvm_read_c0_guest_entryhi(cop0) & ASID_MASK); @@ -1734,16 +1709,16 @@ kvm_mips_emulate_tlbinv_ld(unsigned long cause, uint32_t *opc, /* Blow away the shadow host TLBs */ kvm_mips_flush_host_tlb(1); - return er; + return EMULATE_DONE; } -enum emulation_result -kvm_mips_emulate_tlbmiss_st(unsigned long cause, uint32_t *opc, - struct kvm_run *run, struct kvm_vcpu *vcpu) +enum emulation_result kvm_mips_emulate_tlbmiss_st(unsigned long cause, + uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; struct kvm_vcpu_arch *arch = &vcpu->arch; - enum emulation_result er = EMULATE_DONE; unsigned long entryhi = (vcpu->arch.host_cp0_badvaddr & VPN2_MASK) | (kvm_read_c0_guest_entryhi(cop0) & ASID_MASK); @@ -1778,16 +1753,16 @@ kvm_mips_emulate_tlbmiss_st(unsigned long cause, uint32_t *opc, /* Blow away the shadow host TLBs */ kvm_mips_flush_host_tlb(1); - return er; + return EMULATE_DONE; } -enum emulation_result -kvm_mips_emulate_tlbinv_st(unsigned long cause, uint32_t *opc, - struct kvm_run *run, struct kvm_vcpu *vcpu) +enum emulation_result kvm_mips_emulate_tlbinv_st(unsigned long cause, + uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; struct kvm_vcpu_arch *arch = &vcpu->arch; - enum emulation_result er = EMULATE_DONE; unsigned long entryhi = (vcpu->arch.host_cp0_badvaddr & VPN2_MASK) | (kvm_read_c0_guest_entryhi(cop0) & ASID_MASK); @@ -1822,13 +1797,13 @@ kvm_mips_emulate_tlbinv_st(unsigned long cause, uint32_t *opc, /* Blow away the shadow host TLBs */ kvm_mips_flush_host_tlb(1); - return er; + return EMULATE_DONE; } /* TLBMOD: store into address matching TLB with Dirty bit off */ -enum emulation_result -kvm_mips_handle_tlbmod(unsigned long cause, uint32_t *opc, - struct kvm_run *run, struct kvm_vcpu *vcpu) +enum emulation_result kvm_mips_handle_tlbmod(unsigned long cause, uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) { enum emulation_result er = EMULATE_DONE; #ifdef DEBUG @@ -1837,9 +1812,7 @@ kvm_mips_handle_tlbmod(unsigned long cause, uint32_t *opc, (kvm_read_c0_guest_entryhi(cop0) & ASID_MASK); int index; - /* - * If address not in the guest TLB, then we are in trouble - */ + /* If address not in the guest TLB, then we are in trouble */ index = kvm_mips_guest_tlb_lookup(vcpu, entryhi); if (index < 0) { /* XXXKYMA Invalidate and retry */ @@ -1856,15 +1829,15 @@ kvm_mips_handle_tlbmod(unsigned long cause, uint32_t *opc, return er; } -enum emulation_result -kvm_mips_emulate_tlbmod(unsigned long cause, uint32_t *opc, - struct kvm_run *run, struct kvm_vcpu *vcpu) +enum emulation_result kvm_mips_emulate_tlbmod(unsigned long cause, + uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; unsigned long entryhi = (vcpu->arch.host_cp0_badvaddr & VPN2_MASK) | (kvm_read_c0_guest_entryhi(cop0) & ASID_MASK); struct kvm_vcpu_arch *arch = &vcpu->arch; - enum emulation_result er = EMULATE_DONE; if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) { /* save old pc */ @@ -1895,16 +1868,16 @@ kvm_mips_emulate_tlbmod(unsigned long cause, uint32_t *opc, /* Blow away the shadow host TLBs */ kvm_mips_flush_host_tlb(1); - return er; + return EMULATE_DONE; } -enum emulation_result -kvm_mips_emulate_fpu_exc(unsigned long cause, uint32_t *opc, - struct kvm_run *run, struct kvm_vcpu *vcpu) +enum emulation_result kvm_mips_emulate_fpu_exc(unsigned long cause, + uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; struct kvm_vcpu_arch *arch = &vcpu->arch; - enum emulation_result er = EMULATE_DONE; if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) { /* save old pc */ @@ -1924,12 +1897,13 @@ kvm_mips_emulate_fpu_exc(unsigned long cause, uint32_t *opc, (T_COP_UNUSABLE << CAUSEB_EXCCODE)); kvm_change_c0_guest_cause(cop0, (CAUSEF_CE), (0x1 << CAUSEB_CE)); - return er; + return EMULATE_DONE; } -enum emulation_result -kvm_mips_emulate_ri_exc(unsigned long cause, uint32_t *opc, - struct kvm_run *run, struct kvm_vcpu *vcpu) +enum emulation_result kvm_mips_emulate_ri_exc(unsigned long cause, + uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; struct kvm_vcpu_arch *arch = &vcpu->arch; @@ -1961,9 +1935,10 @@ kvm_mips_emulate_ri_exc(unsigned long cause, uint32_t *opc, return er; } -enum emulation_result -kvm_mips_emulate_bp_exc(unsigned long cause, uint32_t *opc, - struct kvm_run *run, struct kvm_vcpu *vcpu) +enum emulation_result kvm_mips_emulate_bp_exc(unsigned long cause, + uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; struct kvm_vcpu_arch *arch = &vcpu->arch; @@ -1988,16 +1963,14 @@ kvm_mips_emulate_bp_exc(unsigned long cause, uint32_t *opc, arch->pc = KVM_GUEST_KSEG0 + 0x180; } else { - printk("Trying to deliver BP when EXL is already set\n"); + kvm_err("Trying to deliver BP when EXL is already set\n"); er = EMULATE_FAIL; } return er; } -/* - * ll/sc, rdhwr, sync emulation - */ +/* ll/sc, rdhwr, sync emulation */ #define OPCODE 0xfc000000 #define BASE 0x03e00000 @@ -2012,9 +1985,9 @@ kvm_mips_emulate_bp_exc(unsigned long cause, uint32_t *opc, #define SYNC 0x0000000f #define RDHWR 0x0000003b -enum emulation_result -kvm_mips_handle_ri(unsigned long cause, uint32_t *opc, - struct kvm_run *run, struct kvm_vcpu *vcpu) +enum emulation_result kvm_mips_handle_ri(unsigned long cause, uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; struct kvm_vcpu_arch *arch = &vcpu->arch; @@ -2031,16 +2004,14 @@ kvm_mips_handle_ri(unsigned long cause, uint32_t *opc, if (er == EMULATE_FAIL) return er; - /* - * Fetch the instruction. - */ + /* Fetch the instruction. */ if (cause & CAUSEF_BD) opc += 1; inst = kvm_get_inst(opc, vcpu); if (inst == KVM_INVALID_INST) { - printk("%s: Cannot get inst @ %p\n", __func__, opc); + kvm_err("%s: Cannot get inst @ %p\n", __func__, opc); return EMULATE_FAIL; } @@ -2099,15 +2070,15 @@ emulate_ri: return kvm_mips_emulate_ri_exc(cause, opc, run, vcpu); } -enum emulation_result -kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu, struct kvm_run *run) +enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu, + struct kvm_run *run) { unsigned long *gpr = &vcpu->arch.gprs[vcpu->arch.io_gpr]; enum emulation_result er = EMULATE_DONE; unsigned long curr_pc; if (run->mmio.len > sizeof(*gpr)) { - printk("Bad MMIO length: %d", run->mmio.len); + kvm_err("Bad MMIO length: %d", run->mmio.len); er = EMULATE_FAIL; goto done; } @@ -2142,18 +2113,18 @@ kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu, struct kvm_run *run) } if (vcpu->arch.pending_load_cause & CAUSEF_BD) - kvm_debug - ("[%#lx] Completing %d byte BD Load to gpr %d (0x%08lx) type %d\n", - vcpu->arch.pc, run->mmio.len, vcpu->arch.io_gpr, *gpr, - vcpu->mmio_needed); + kvm_debug("[%#lx] Completing %d byte BD Load to gpr %d (0x%08lx) type %d\n", + vcpu->arch.pc, run->mmio.len, vcpu->arch.io_gpr, *gpr, + vcpu->mmio_needed); done: return er; } -static enum emulation_result -kvm_mips_emulate_exc(unsigned long cause, uint32_t *opc, - struct kvm_run *run, struct kvm_vcpu *vcpu) +static enum emulation_result kvm_mips_emulate_exc(unsigned long cause, + uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) { uint32_t exccode = (cause >> CAUSEB_EXCCODE) & 0x1f; struct mips_coproc *cop0 = vcpu->arch.cop0; @@ -2181,16 +2152,17 @@ kvm_mips_emulate_exc(unsigned long cause, uint32_t *opc, exccode, kvm_read_c0_guest_epc(cop0), kvm_read_c0_guest_badvaddr(cop0)); } else { - printk("Trying to deliver EXC when EXL is already set\n"); + kvm_err("Trying to deliver EXC when EXL is already set\n"); er = EMULATE_FAIL; } return er; } -enum emulation_result -kvm_mips_check_privilege(unsigned long cause, uint32_t *opc, - struct kvm_run *run, struct kvm_vcpu *vcpu) +enum emulation_result kvm_mips_check_privilege(unsigned long cause, + uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) { enum emulation_result er = EMULATE_DONE; uint32_t exccode = (cause >> CAUSEB_EXCCODE) & 0x1f; @@ -2215,10 +2187,13 @@ kvm_mips_check_privilege(unsigned long cause, uint32_t *opc, break; case T_TLB_LD_MISS: - /* We we are accessing Guest kernel space, then send an address error exception to the guest */ + /* + * We we are accessing Guest kernel space, then send an + * address error exception to the guest + */ if (badvaddr >= (unsigned long) KVM_GUEST_KSEG0) { - printk("%s: LD MISS @ %#lx\n", __func__, - badvaddr); + kvm_debug("%s: LD MISS @ %#lx\n", __func__, + badvaddr); cause &= ~0xff; cause |= (T_ADDR_ERR_LD << CAUSEB_EXCCODE); er = EMULATE_PRIV_FAIL; @@ -2226,10 +2201,13 @@ kvm_mips_check_privilege(unsigned long cause, uint32_t *opc, break; case T_TLB_ST_MISS: - /* We we are accessing Guest kernel space, then send an address error exception to the guest */ + /* + * We we are accessing Guest kernel space, then send an + * address error exception to the guest + */ if (badvaddr >= (unsigned long) KVM_GUEST_KSEG0) { - printk("%s: ST MISS @ %#lx\n", __func__, - badvaddr); + kvm_debug("%s: ST MISS @ %#lx\n", __func__, + badvaddr); cause &= ~0xff; cause |= (T_ADDR_ERR_ST << CAUSEB_EXCCODE); er = EMULATE_PRIV_FAIL; @@ -2237,8 +2215,8 @@ kvm_mips_check_privilege(unsigned long cause, uint32_t *opc, break; case T_ADDR_ERR_ST: - printk("%s: address error ST @ %#lx\n", __func__, - badvaddr); + kvm_debug("%s: address error ST @ %#lx\n", __func__, + badvaddr); if ((badvaddr & PAGE_MASK) == KVM_GUEST_COMMPAGE_ADDR) { cause &= ~0xff; cause |= (T_TLB_ST_MISS << CAUSEB_EXCCODE); @@ -2246,8 +2224,8 @@ kvm_mips_check_privilege(unsigned long cause, uint32_t *opc, er = EMULATE_PRIV_FAIL; break; case T_ADDR_ERR_LD: - printk("%s: address error LD @ %#lx\n", __func__, - badvaddr); + kvm_debug("%s: address error LD @ %#lx\n", __func__, + badvaddr); if ((badvaddr & PAGE_MASK) == KVM_GUEST_COMMPAGE_ADDR) { cause &= ~0xff; cause |= (T_TLB_LD_MISS << CAUSEB_EXCCODE); @@ -2260,21 +2238,23 @@ kvm_mips_check_privilege(unsigned long cause, uint32_t *opc, } } - if (er == EMULATE_PRIV_FAIL) { + if (er == EMULATE_PRIV_FAIL) kvm_mips_emulate_exc(cause, opc, run, vcpu); - } + return er; } -/* User Address (UA) fault, this could happen if +/* + * User Address (UA) fault, this could happen if * (1) TLB entry not present/valid in both Guest and shadow host TLBs, in this * case we pass on the fault to the guest kernel and let it handle it. * (2) TLB entry is present in the Guest TLB but not in the shadow, in this * case we inject the TLB from the Guest TLB into the shadow host TLB */ -enum emulation_result -kvm_mips_handle_tlbmiss(unsigned long cause, uint32_t *opc, - struct kvm_run *run, struct kvm_vcpu *vcpu) +enum emulation_result kvm_mips_handle_tlbmiss(unsigned long cause, + uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) { enum emulation_result er = EMULATE_DONE; uint32_t exccode = (cause >> CAUSEB_EXCCODE) & 0x1f; @@ -2284,10 +2264,11 @@ kvm_mips_handle_tlbmiss(unsigned long cause, uint32_t *opc, kvm_debug("kvm_mips_handle_tlbmiss: badvaddr: %#lx, entryhi: %#lx\n", vcpu->arch.host_cp0_badvaddr, vcpu->arch.host_cp0_entryhi); - /* KVM would not have got the exception if this entry was valid in the shadow host TLB - * Check the Guest TLB, if the entry is not there then send the guest an - * exception. The guest exc handler should then inject an entry into the - * guest TLB + /* + * KVM would not have got the exception if this entry was valid in the + * shadow host TLB. Check the Guest TLB, if the entry is not there then + * send the guest an exception. The guest exc handler should then inject + * an entry into the guest TLB. */ index = kvm_mips_guest_tlb_lookup(vcpu, (va & VPN2_MASK) | @@ -2299,13 +2280,17 @@ kvm_mips_handle_tlbmiss(unsigned long cause, uint32_t *opc, } else if (exccode == T_TLB_ST_MISS) { er = kvm_mips_emulate_tlbmiss_st(cause, opc, run, vcpu); } else { - printk("%s: invalid exc code: %d\n", __func__, exccode); + kvm_err("%s: invalid exc code: %d\n", __func__, + exccode); er = EMULATE_FAIL; } } else { struct kvm_mips_tlb *tlb = &vcpu->arch.guest_tlb[index]; - /* Check if the entry is valid, if not then setup a TLB invalid exception to the guest */ + /* + * Check if the entry is valid, if not then setup a TLB invalid + * exception to the guest + */ if (!TLB_IS_VALID(*tlb, va)) { if (exccode == T_TLB_LD_MISS) { er = kvm_mips_emulate_tlbinv_ld(cause, opc, run, @@ -2314,15 +2299,17 @@ kvm_mips_handle_tlbmiss(unsigned long cause, uint32_t *opc, er = kvm_mips_emulate_tlbinv_st(cause, opc, run, vcpu); } else { - printk("%s: invalid exc code: %d\n", __func__, - exccode); + kvm_err("%s: invalid exc code: %d\n", __func__, + exccode); er = EMULATE_FAIL; } } else { - kvm_debug - ("Injecting hi: %#lx, lo0: %#lx, lo1: %#lx into shadow host TLB\n", - tlb->tlb_hi, tlb->tlb_lo0, tlb->tlb_lo1); - /* OK we have a Guest TLB entry, now inject it into the shadow host TLB */ + kvm_debug("Injecting hi: %#lx, lo0: %#lx, lo1: %#lx into shadow host TLB\n", + tlb->tlb_hi, tlb->tlb_lo0, tlb->tlb_lo1); + /* + * OK we have a Guest TLB entry, now inject it into the + * shadow host TLB + */ kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb, NULL, NULL); } diff --git a/arch/mips/kvm/kvm_mips_int.c b/arch/mips/kvm/interrupt.c index 1e5de16afe29..9b4445940c2b 100644 --- a/arch/mips/kvm/kvm_mips_int.c +++ b/arch/mips/kvm/interrupt.c @@ -1,13 +1,13 @@ /* -* This file is subject to the terms and conditions of the GNU General Public -* License. See the file "COPYING" in the main directory of this archive -* for more details. -* -* KVM/MIPS: Interrupt delivery -* -* Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. -* Authors: Sanjay Lal <sanjayl@kymasys.com> -*/ + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * KVM/MIPS: Interrupt delivery + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal <sanjayl@kymasys.com> + */ #include <linux/errno.h> #include <linux/err.h> @@ -20,7 +20,7 @@ #include <linux/kvm_host.h> -#include "kvm_mips_int.h" +#include "interrupt.h" void kvm_mips_queue_irq(struct kvm_vcpu *vcpu, uint32_t priority) { @@ -34,7 +34,8 @@ void kvm_mips_dequeue_irq(struct kvm_vcpu *vcpu, uint32_t priority) void kvm_mips_queue_timer_int_cb(struct kvm_vcpu *vcpu) { - /* Cause bits to reflect the pending timer interrupt, + /* + * Cause bits to reflect the pending timer interrupt, * the EXC code will be set when we are actually * delivering the interrupt: */ @@ -51,12 +52,13 @@ void kvm_mips_dequeue_timer_int_cb(struct kvm_vcpu *vcpu) kvm_mips_dequeue_irq(vcpu, MIPS_EXC_INT_TIMER); } -void -kvm_mips_queue_io_int_cb(struct kvm_vcpu *vcpu, struct kvm_mips_interrupt *irq) +void kvm_mips_queue_io_int_cb(struct kvm_vcpu *vcpu, + struct kvm_mips_interrupt *irq) { int intr = (int)irq->irq; - /* Cause bits to reflect the pending IO interrupt, + /* + * Cause bits to reflect the pending IO interrupt, * the EXC code will be set when we are actually * delivering the interrupt: */ @@ -83,11 +85,11 @@ kvm_mips_queue_io_int_cb(struct kvm_vcpu *vcpu, struct kvm_mips_interrupt *irq) } -void -kvm_mips_dequeue_io_int_cb(struct kvm_vcpu *vcpu, - struct kvm_mips_interrupt *irq) +void kvm_mips_dequeue_io_int_cb(struct kvm_vcpu *vcpu, + struct kvm_mips_interrupt *irq) { int intr = (int)irq->irq; + switch (intr) { case -2: kvm_clear_c0_guest_cause(vcpu->arch.cop0, (C_IRQ0)); @@ -111,9 +113,8 @@ kvm_mips_dequeue_io_int_cb(struct kvm_vcpu *vcpu, } /* Deliver the interrupt of the corresponding priority, if possible. */ -int -kvm_mips_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority, - uint32_t cause) +int kvm_mips_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority, + uint32_t cause) { int allowed = 0; uint32_t exccode; @@ -164,7 +165,6 @@ kvm_mips_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority, /* Are we allowed to deliver the interrupt ??? */ if (allowed) { - if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) { /* save old pc */ kvm_write_c0_guest_epc(cop0, arch->pc); @@ -195,9 +195,8 @@ kvm_mips_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority, return allowed; } -int -kvm_mips_irq_clear_cb(struct kvm_vcpu *vcpu, unsigned int priority, - uint32_t cause) +int kvm_mips_irq_clear_cb(struct kvm_vcpu *vcpu, unsigned int priority, + uint32_t cause) { return 1; } diff --git a/arch/mips/kvm/kvm_mips_int.h b/arch/mips/kvm/interrupt.h index 20da7d29eede..4ab4bdfad703 100644 --- a/arch/mips/kvm/kvm_mips_int.h +++ b/arch/mips/kvm/interrupt.h @@ -1,14 +1,15 @@ /* -* This file is subject to the terms and conditions of the GNU General Public -* License. See the file "COPYING" in the main directory of this archive -* for more details. -* -* KVM/MIPS: Interrupts -* Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. -* Authors: Sanjay Lal <sanjayl@kymasys.com> -*/ + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * KVM/MIPS: Interrupts + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal <sanjayl@kymasys.com> + */ -/* MIPS Exception Priorities, exceptions (including interrupts) are queued up +/* + * MIPS Exception Priorities, exceptions (including interrupts) are queued up * for the guest in the order specified by their priorities */ @@ -27,6 +28,9 @@ #define MIPS_EXC_MAX 12 /* XXXSL More to follow */ +extern char mips32_exception[], mips32_exceptionEnd[]; +extern char mips32_GuestException[], mips32_GuestExceptionEnd[]; + #define C_TI (_ULCAST_(1) << 30) #define KVM_MIPS_IRQ_DELIVER_ALL_AT_ONCE (0) diff --git a/arch/mips/kvm/kvm_mips_comm.h b/arch/mips/kvm/kvm_mips_comm.h deleted file mode 100644 index a4a8c85cc8f7..000000000000 --- a/arch/mips/kvm/kvm_mips_comm.h +++ /dev/null @@ -1,23 +0,0 @@ -/* -* This file is subject to the terms and conditions of the GNU General Public -* License. See the file "COPYING" in the main directory of this archive -* for more details. -* -* KVM/MIPS: commpage: mapped into get kernel space -* -* Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. -* Authors: Sanjay Lal <sanjayl@kymasys.com> -*/ - -#ifndef __KVM_MIPS_COMMPAGE_H__ -#define __KVM_MIPS_COMMPAGE_H__ - -struct kvm_mips_commpage { - struct mips_coproc cop0; /* COP0 state is mapped into Guest kernel via commpage */ -}; - -#define KVM_MIPS_COMM_EIDI_OFFSET 0x0 - -extern void kvm_mips_commpage_init(struct kvm_vcpu *vcpu); - -#endif /* __KVM_MIPS_COMMPAGE_H__ */ diff --git a/arch/mips/kvm/kvm_mips_commpage.c b/arch/mips/kvm/kvm_mips_commpage.c deleted file mode 100644 index 3873b1ecc40f..000000000000 --- a/arch/mips/kvm/kvm_mips_commpage.c +++ /dev/null @@ -1,37 +0,0 @@ -/* -* This file is subject to the terms and conditions of the GNU General Public -* License. See the file "COPYING" in the main directory of this archive -* for more details. -* -* commpage, currently used for Virtual COP0 registers. -* Mapped into the guest kernel @ 0x0. -* -* Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. -* Authors: Sanjay Lal <sanjayl@kymasys.com> -*/ - -#include <linux/errno.h> -#include <linux/err.h> -#include <linux/module.h> -#include <linux/vmalloc.h> -#include <linux/fs.h> -#include <linux/bootmem.h> -#include <asm/page.h> -#include <asm/cacheflush.h> -#include <asm/mmu_context.h> - -#include <linux/kvm_host.h> - -#include "kvm_mips_comm.h" - -void kvm_mips_commpage_init(struct kvm_vcpu *vcpu) -{ - struct kvm_mips_commpage *page = vcpu->arch.kseg0_commpage; - memset(page, 0, sizeof(struct kvm_mips_commpage)); - - /* Specific init values for fields */ - vcpu->arch.cop0 = &page->cop0; - memset(vcpu->arch.cop0, 0, sizeof(struct mips_coproc)); - - return; -} diff --git a/arch/mips/kvm/kvm_mips_opcode.h b/arch/mips/kvm/kvm_mips_opcode.h deleted file mode 100644 index 86d3b4cc348b..000000000000 --- a/arch/mips/kvm/kvm_mips_opcode.h +++ /dev/null @@ -1,24 +0,0 @@ -/* -* This file is subject to the terms and conditions of the GNU General Public -* License. See the file "COPYING" in the main directory of this archive -* for more details. -* -* Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. -* Authors: Sanjay Lal <sanjayl@kymasys.com> -*/ - -/* - * Define opcode values not defined in <asm/isnt.h> - */ - -#ifndef __KVM_MIPS_OPCODE_H__ -#define __KVM_MIPS_OPCODE_H__ - -/* COP0 Ops */ -#define mfmcz_op 0x0b /* 01011 */ -#define wrpgpr_op 0x0e /* 01110 */ - -/* COP0 opcodes (only if COP0 and CO=1): */ -#define wait_op 0x20 /* 100000 */ - -#endif /* __KVM_MIPS_OPCODE_H__ */ diff --git a/arch/mips/kvm/kvm_locore.S b/arch/mips/kvm/locore.S index 033ac343e72c..d7279c03c517 100644 --- a/arch/mips/kvm/kvm_locore.S +++ b/arch/mips/kvm/locore.S @@ -16,7 +16,6 @@ #include <asm/stackframe.h> #include <asm/asm-offsets.h> - #define _C_LABEL(x) x #define MIPSX(name) mips32_ ## name #define CALLFRAME_SIZ 32 @@ -91,7 +90,10 @@ FEXPORT(__kvm_mips_vcpu_run) LONG_S $24, PT_R24(k1) LONG_S $25, PT_R25(k1) - /* XXXKYMA k0/k1 not saved, not being used if we got here through an ioctl() */ + /* + * XXXKYMA k0/k1 not saved, not being used if we got here through + * an ioctl() + */ LONG_S $28, PT_R28(k1) LONG_S $29, PT_R29(k1) @@ -132,7 +134,10 @@ FEXPORT(__kvm_mips_vcpu_run) /* Save the kernel gp as well */ LONG_S gp, VCPU_HOST_GP(k1) - /* Setup status register for running the guest in UM, interrupts are disabled */ + /* + * Setup status register for running the guest in UM, interrupts + * are disabled + */ li k0, (ST0_EXL | KSU_USER | ST0_BEV) mtc0 k0, CP0_STATUS ehb @@ -152,7 +157,6 @@ FEXPORT(__kvm_mips_vcpu_run) mtc0 k0, CP0_STATUS ehb - /* Set Guest EPC */ LONG_L t0, VCPU_PC(k1) mtc0 t0, CP0_EPC @@ -165,7 +169,7 @@ FEXPORT(__kvm_mips_load_asid) INT_ADDIU t1, k1, VCPU_GUEST_KERNEL_ASID /* (BD) */ INT_ADDIU t1, k1, VCPU_GUEST_USER_ASID /* else user */ 1: - /* t1: contains the base of the ASID array, need to get the cpu id */ + /* t1: contains the base of the ASID array, need to get the cpu id */ LONG_L t2, TI_CPU($28) /* smp_processor_id */ INT_SLL t2, t2, 2 /* x4 */ REG_ADDU t3, t1, t2 @@ -229,9 +233,7 @@ FEXPORT(__kvm_mips_load_k0k1) eret VECTOR(MIPSX(exception), unknown) -/* - * Find out what mode we came from and jump to the proper handler. - */ +/* Find out what mode we came from and jump to the proper handler. */ mtc0 k0, CP0_ERROREPC #01: Save guest k0 ehb #02: @@ -239,7 +241,8 @@ VECTOR(MIPSX(exception), unknown) INT_SRL k0, k0, 10 #03: Get rid of CPUNum INT_SLL k0, k0, 10 #04 LONG_S k1, 0x3000(k0) #05: Save k1 @ offset 0x3000 - INT_ADDIU k0, k0, 0x2000 #06: Exception handler is installed @ offset 0x2000 + INT_ADDIU k0, k0, 0x2000 #06: Exception handler is + # installed @ offset 0x2000 j k0 #07: jump to the function nop #08: branch delay slot VECTOR_END(MIPSX(exceptionEnd)) @@ -248,7 +251,6 @@ VECTOR_END(MIPSX(exceptionEnd)) /* * Generic Guest exception handler. We end up here when the guest * does something that causes a trap to kernel mode. - * */ NESTED (MIPSX(GuestException), CALLFRAME_SIZ, ra) /* Get the VCPU pointer from DDTATA_LO */ @@ -290,9 +292,7 @@ NESTED (MIPSX(GuestException), CALLFRAME_SIZ, ra) LONG_S $30, VCPU_R30(k1) LONG_S $31, VCPU_R31(k1) - /* We need to save hi/lo and restore them on - * the way out - */ + /* We need to save hi/lo and restore them on the way out */ mfhi t0 LONG_S t0, VCPU_HI(k1) @@ -321,8 +321,10 @@ NESTED (MIPSX(GuestException), CALLFRAME_SIZ, ra) /* Save pointer to run in s0, will be saved by the compiler */ move s0, a0 - /* Save Host level EPC, BadVaddr and Cause to VCPU, useful to - * process the exception */ + /* + * Save Host level EPC, BadVaddr and Cause to VCPU, useful to + * process the exception + */ mfc0 k0,CP0_EPC LONG_S k0, VCPU_PC(k1) @@ -351,7 +353,6 @@ NESTED (MIPSX(GuestException), CALLFRAME_SIZ, ra) LONG_L k0, VCPU_HOST_EBASE(k1) mtc0 k0,CP0_EBASE - /* Now that the new EBASE has been loaded, unset BEV and KSU_USER */ .set at and v0, v0, ~(ST0_EXL | KSU_USER | ST0_IE) @@ -369,7 +370,8 @@ NESTED (MIPSX(GuestException), CALLFRAME_SIZ, ra) /* Saved host state */ INT_ADDIU sp, sp, -PT_SIZE - /* XXXKYMA do we need to load the host ASID, maybe not because the + /* + * XXXKYMA do we need to load the host ASID, maybe not because the * kernel entries are marked GLOBAL, need to verify */ @@ -383,9 +385,11 @@ NESTED (MIPSX(GuestException), CALLFRAME_SIZ, ra) /* Jump to handler */ FEXPORT(__kvm_mips_jump_to_handler) - /* XXXKYMA: not sure if this is safe, how large is the stack?? + /* + * XXXKYMA: not sure if this is safe, how large is the stack?? * Now jump to the kvm_mips_handle_exit() to see if we can deal - * with this in the kernel */ + * with this in the kernel + */ PTR_LA t9, kvm_mips_handle_exit jalr.hb t9 INT_ADDIU sp, sp, -CALLFRAME_SIZ /* BD Slot */ @@ -394,7 +398,8 @@ FEXPORT(__kvm_mips_jump_to_handler) di ehb - /* XXXKYMA: k0/k1 could have been blown away if we processed + /* + * XXXKYMA: k0/k1 could have been blown away if we processed * an exception while we were handling the exception from the * guest, reload k1 */ @@ -402,7 +407,8 @@ FEXPORT(__kvm_mips_jump_to_handler) move k1, s1 INT_ADDIU k1, k1, VCPU_HOST_ARCH - /* Check return value, should tell us if we are returning to the + /* + * Check return value, should tell us if we are returning to the * host (handle I/O etc)or resuming the guest */ andi t0, v0, RESUME_HOST @@ -521,8 +527,10 @@ __kvm_mips_return_to_host: LONG_L $0, PT_R0(k1) LONG_L $1, PT_R1(k1) - /* r2/v0 is the return code, shift it down by 2 (arithmetic) - * to recover the err code */ + /* + * r2/v0 is the return code, shift it down by 2 (arithmetic) + * to recover the err code + */ INT_SRA k0, v0, 2 move $2, k0 @@ -566,7 +574,6 @@ __kvm_mips_return_to_host: PTR_LI k0, 0x2000000F mtc0 k0, CP0_HWRENA - /* Restore RA, which is the address we will return to */ LONG_L ra, PT_R31(k1) j ra diff --git a/arch/mips/kvm/kvm_mips.c b/arch/mips/kvm/mips.c index f3c56a182fd8..4fda672cb58e 100644 --- a/arch/mips/kvm/kvm_mips.c +++ b/arch/mips/kvm/mips.c @@ -7,7 +7,7 @@ * * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. * Authors: Sanjay Lal <sanjayl@kymasys.com> -*/ + */ #include <linux/errno.h> #include <linux/err.h> @@ -21,8 +21,8 @@ #include <linux/kvm_host.h> -#include "kvm_mips_int.h" -#include "kvm_mips_comm.h" +#include "interrupt.h" +#include "commpage.h" #define CREATE_TRACE_POINTS #include "trace.h" @@ -31,38 +31,41 @@ #define VECTORSPACING 0x100 /* for EI/VI mode */ #endif -#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU +#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x) struct kvm_stats_debugfs_item debugfs_entries[] = { - { "wait", VCPU_STAT(wait_exits) }, - { "cache", VCPU_STAT(cache_exits) }, - { "signal", VCPU_STAT(signal_exits) }, - { "interrupt", VCPU_STAT(int_exits) }, - { "cop_unsuable", VCPU_STAT(cop_unusable_exits) }, - { "tlbmod", VCPU_STAT(tlbmod_exits) }, - { "tlbmiss_ld", VCPU_STAT(tlbmiss_ld_exits) }, - { "tlbmiss_st", VCPU_STAT(tlbmiss_st_exits) }, - { "addrerr_st", VCPU_STAT(addrerr_st_exits) }, - { "addrerr_ld", VCPU_STAT(addrerr_ld_exits) }, - { "syscall", VCPU_STAT(syscall_exits) }, - { "resvd_inst", VCPU_STAT(resvd_inst_exits) }, - { "break_inst", VCPU_STAT(break_inst_exits) }, - { "flush_dcache", VCPU_STAT(flush_dcache_exits) }, - { "halt_wakeup", VCPU_STAT(halt_wakeup) }, + { "wait", VCPU_STAT(wait_exits), KVM_STAT_VCPU }, + { "cache", VCPU_STAT(cache_exits), KVM_STAT_VCPU }, + { "signal", VCPU_STAT(signal_exits), KVM_STAT_VCPU }, + { "interrupt", VCPU_STAT(int_exits), KVM_STAT_VCPU }, + { "cop_unsuable", VCPU_STAT(cop_unusable_exits), KVM_STAT_VCPU }, + { "tlbmod", VCPU_STAT(tlbmod_exits), KVM_STAT_VCPU }, + { "tlbmiss_ld", VCPU_STAT(tlbmiss_ld_exits), KVM_STAT_VCPU }, + { "tlbmiss_st", VCPU_STAT(tlbmiss_st_exits), KVM_STAT_VCPU }, + { "addrerr_st", VCPU_STAT(addrerr_st_exits), KVM_STAT_VCPU }, + { "addrerr_ld", VCPU_STAT(addrerr_ld_exits), KVM_STAT_VCPU }, + { "syscall", VCPU_STAT(syscall_exits), KVM_STAT_VCPU }, + { "resvd_inst", VCPU_STAT(resvd_inst_exits), KVM_STAT_VCPU }, + { "break_inst", VCPU_STAT(break_inst_exits), KVM_STAT_VCPU }, + { "flush_dcache", VCPU_STAT(flush_dcache_exits), KVM_STAT_VCPU }, + { "halt_wakeup", VCPU_STAT(halt_wakeup), KVM_STAT_VCPU }, {NULL} }; static int kvm_mips_reset_vcpu(struct kvm_vcpu *vcpu) { int i; + for_each_possible_cpu(i) { vcpu->arch.guest_kernel_asid[i] = 0; vcpu->arch.guest_user_asid[i] = 0; } + return 0; } -/* XXXKYMA: We are simulatoring a processor that has the WII bit set in Config7, so we - * are "runnable" if interrupts are pending +/* + * XXXKYMA: We are simulatoring a processor that has the WII bit set in + * Config7, so we are "runnable" if interrupts are pending */ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { @@ -94,16 +97,17 @@ void kvm_arch_hardware_unsetup(void) void kvm_arch_check_processor_compat(void *rtn) { - int *r = (int *)rtn; - *r = 0; - return; + *(int *)rtn = 0; } static void kvm_mips_init_tlbs(struct kvm *kvm) { unsigned long wired; - /* Add a wired entry to the TLB, it is used to map the commpage to the Guest kernel */ + /* + * Add a wired entry to the TLB, it is used to map the commpage to + * the Guest kernel + */ wired = read_c0_wired(); write_c0_wired(wired + 1); mtc0_tlbw_hazard(); @@ -130,7 +134,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) on_each_cpu(kvm_mips_init_vm_percpu, kvm, 1); } - return 0; } @@ -185,8 +188,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm) } } -long -kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) +long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, + unsigned long arg) { return -ENOIOCTLCMD; } @@ -207,20 +210,20 @@ void kvm_arch_memslots_updated(struct kvm *kvm) } int kvm_arch_prepare_memory_region(struct kvm *kvm, - struct kvm_memory_slot *memslot, - struct kvm_userspace_memory_region *mem, - enum kvm_mr_change change) + struct kvm_memory_slot *memslot, + struct kvm_userspace_memory_region *mem, + enum kvm_mr_change change) { return 0; } void kvm_arch_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - const struct kvm_memory_slot *old, - enum kvm_mr_change change) + struct kvm_userspace_memory_region *mem, + const struct kvm_memory_slot *old, + enum kvm_mr_change change) { unsigned long npages = 0; - int i, err = 0; + int i; kvm_debug("%s: kvm: %p slot: %d, GPA: %llx, size: %llx, QVA: %llx\n", __func__, kvm, mem->slot, mem->guest_phys_addr, @@ -238,21 +241,17 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, if (!kvm->arch.guest_pmap) { kvm_err("Failed to allocate guest PMAP"); - err = -ENOMEM; - goto out; + return; } kvm_debug("Allocated space for Guest PMAP Table (%ld pages) @ %p\n", npages, kvm->arch.guest_pmap); /* Now setup the page table */ - for (i = 0; i < npages; i++) { + for (i = 0; i < npages; i++) kvm->arch.guest_pmap[i] = KVM_INVALID_PAGE; - } } } -out: - return; } void kvm_arch_flush_shadow_all(struct kvm *kvm) @@ -270,8 +269,6 @@ void kvm_arch_flush_shadow(struct kvm *kvm) struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) { - extern char mips32_exception[], mips32_exceptionEnd[]; - extern char mips32_GuestException[], mips32_GuestExceptionEnd[]; int err, size, offset; void *gebase; int i; @@ -290,14 +287,14 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) kvm_debug("kvm @ %p: create cpu %d at %p\n", kvm, id, vcpu); - /* Allocate space for host mode exception handlers that handle + /* + * Allocate space for host mode exception handlers that handle * guest mode exits */ - if (cpu_has_veic || cpu_has_vint) { + if (cpu_has_veic || cpu_has_vint) size = 0x200 + VECTORSPACING * 64; - } else { + else size = 0x4000; - } /* Save Linux EBASE */ vcpu->arch.host_ebase = (void *)read_c0_ebase(); @@ -345,7 +342,10 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) local_flush_icache_range((unsigned long)gebase, (unsigned long)gebase + ALIGN(size, PAGE_SIZE)); - /* Allocate comm page for guest kernel, a TLB will be reserved for mapping GVA @ 0xFFFF8000 to this page */ + /* + * Allocate comm page for guest kernel, a TLB will be reserved for + * mapping GVA @ 0xFFFF8000 to this page + */ vcpu->arch.kseg0_commpage = kzalloc(PAGE_SIZE << 1, GFP_KERNEL); if (!vcpu->arch.kseg0_commpage) { @@ -392,9 +392,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) kvm_arch_vcpu_free(vcpu); } -int -kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, - struct kvm_guest_debug *dbg) +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg) { return -ENOIOCTLCMD; } @@ -431,8 +430,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) return r; } -int -kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_mips_interrupt *irq) +int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, + struct kvm_mips_interrupt *irq) { int intr = (int)irq->irq; struct kvm_vcpu *dvcpu = NULL; @@ -459,23 +458,20 @@ kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_mips_interrupt *irq) dvcpu->arch.wait = 0; - if (waitqueue_active(&dvcpu->wq)) { + if (waitqueue_active(&dvcpu->wq)) wake_up_interruptible(&dvcpu->wq); - } return 0; } -int -kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, - struct kvm_mp_state *mp_state) +int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) { return -ENOIOCTLCMD; } -int -kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, - struct kvm_mp_state *mp_state) +int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) { return -ENOIOCTLCMD; } @@ -632,10 +628,12 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu, } if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64) { u64 __user *uaddr64 = (u64 __user *)(long)reg->addr; + return put_user(v, uaddr64); } else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U32) { u32 __user *uaddr32 = (u32 __user *)(long)reg->addr; u32 v32 = (u32)v; + return put_user(v32, uaddr32); } else { return -EINVAL; @@ -728,8 +726,8 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu, return 0; } -long -kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) +long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, + unsigned long arg) { struct kvm_vcpu *vcpu = filp->private_data; void __user *argp = (void __user *)arg; @@ -739,6 +737,7 @@ kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) case KVM_SET_ONE_REG: case KVM_GET_ONE_REG: { struct kvm_one_reg reg; + if (copy_from_user(®, argp, sizeof(reg))) return -EFAULT; if (ioctl == KVM_SET_ONE_REG) @@ -773,6 +772,7 @@ kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) case KVM_INTERRUPT: { struct kvm_mips_interrupt irq; + r = -EFAULT; if (copy_from_user(&irq, argp, sizeof(irq))) goto out; @@ -791,9 +791,7 @@ out: return r; } -/* - * Get (and clear) the dirty memory log for a memory slot. - */ +/* Get (and clear) the dirty memory log for a memory slot. */ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) { struct kvm_memory_slot *memslot; @@ -815,8 +813,8 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) ga = memslot->base_gfn << PAGE_SHIFT; ga_end = ga + (memslot->npages << PAGE_SHIFT); - printk("%s: dirty, ga: %#lx, ga_end %#lx\n", __func__, ga, - ga_end); + kvm_info("%s: dirty, ga: %#lx, ga_end %#lx\n", __func__, ga, + ga_end); n = kvm_dirty_bitmap_bytes(memslot); memset(memslot->dirty_bitmap, 0, n); @@ -843,16 +841,12 @@ long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) int kvm_arch_init(void *opaque) { - int ret; - if (kvm_mips_callbacks) { kvm_err("kvm: module already exists\n"); return -EEXIST; } - ret = kvm_mips_emulation_init(&kvm_mips_callbacks); - - return ret; + return kvm_mips_emulation_init(&kvm_mips_callbacks); } void kvm_arch_exit(void) @@ -860,14 +854,14 @@ void kvm_arch_exit(void) kvm_mips_callbacks = NULL; } -int -kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) { return -ENOIOCTLCMD; } -int -kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) { return -ENOIOCTLCMD; } @@ -923,24 +917,25 @@ int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu) if (!vcpu) return -1; - printk("VCPU Register Dump:\n"); - printk("\tpc = 0x%08lx\n", vcpu->arch.pc); - printk("\texceptions: %08lx\n", vcpu->arch.pending_exceptions); + kvm_debug("VCPU Register Dump:\n"); + kvm_debug("\tpc = 0x%08lx\n", vcpu->arch.pc); + kvm_debug("\texceptions: %08lx\n", vcpu->arch.pending_exceptions); for (i = 0; i < 32; i += 4) { - printk("\tgpr%02d: %08lx %08lx %08lx %08lx\n", i, + kvm_debug("\tgpr%02d: %08lx %08lx %08lx %08lx\n", i, vcpu->arch.gprs[i], vcpu->arch.gprs[i + 1], vcpu->arch.gprs[i + 2], vcpu->arch.gprs[i + 3]); } - printk("\thi: 0x%08lx\n", vcpu->arch.hi); - printk("\tlo: 0x%08lx\n", vcpu->arch.lo); + kvm_debug("\thi: 0x%08lx\n", vcpu->arch.hi); + kvm_debug("\tlo: 0x%08lx\n", vcpu->arch.lo); cop0 = vcpu->arch.cop0; - printk("\tStatus: 0x%08lx, Cause: 0x%08lx\n", - kvm_read_c0_guest_status(cop0), kvm_read_c0_guest_cause(cop0)); + kvm_debug("\tStatus: 0x%08lx, Cause: 0x%08lx\n", + kvm_read_c0_guest_status(cop0), + kvm_read_c0_guest_cause(cop0)); - printk("\tEPC: 0x%08lx\n", kvm_read_c0_guest_epc(cop0)); + kvm_debug("\tEPC: 0x%08lx\n", kvm_read_c0_guest_epc(cop0)); return 0; } @@ -980,14 +975,11 @@ static void kvm_mips_comparecount_func(unsigned long data) kvm_mips_callbacks->queue_timer_int(vcpu); vcpu->arch.wait = 0; - if (waitqueue_active(&vcpu->wq)) { + if (waitqueue_active(&vcpu->wq)) wake_up_interruptible(&vcpu->wq); - } } -/* - * low level hrtimer wake routine. - */ +/* low level hrtimer wake routine */ static enum hrtimer_restart kvm_mips_comparecount_wakeup(struct hrtimer *timer) { struct kvm_vcpu *vcpu; @@ -1008,11 +1000,10 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) { - return; } -int -kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, struct kvm_translation *tr) +int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr) { return 0; } @@ -1023,8 +1014,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) return kvm_mips_callbacks->vcpu_setup(vcpu); } -static -void kvm_mips_set_c0_status(void) +static void kvm_mips_set_c0_status(void) { uint32_t status = read_c0_status(); @@ -1054,7 +1044,10 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) run->exit_reason = KVM_EXIT_UNKNOWN; run->ready_for_interrupt_injection = 1; - /* Set the appropriate status bits based on host CPU features, before we hit the scheduler */ + /* + * Set the appropriate status bits based on host CPU features, + * before we hit the scheduler + */ kvm_mips_set_c0_status(); local_irq_enable(); @@ -1062,7 +1055,8 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) kvm_debug("kvm_mips_handle_exit: cause: %#x, PC: %p, kvm_run: %p, kvm_vcpu: %p\n", cause, opc, run, vcpu); - /* Do a privilege check, if in UM most of these exit conditions end up + /* + * Do a privilege check, if in UM most of these exit conditions end up * causing an exception to be delivered to the Guest Kernel */ er = kvm_mips_check_privilege(cause, opc, run, vcpu); @@ -1081,9 +1075,8 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) ++vcpu->stat.int_exits; trace_kvm_exit(vcpu, INT_EXITS); - if (need_resched()) { + if (need_resched()) cond_resched(); - } ret = RESUME_GUEST; break; @@ -1095,9 +1088,8 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) trace_kvm_exit(vcpu, COP_UNUSABLE_EXITS); ret = kvm_mips_callbacks->handle_cop_unusable(vcpu); /* XXXKYMA: Might need to return to user space */ - if (run->exit_reason == KVM_EXIT_IRQ_WINDOW_OPEN) { + if (run->exit_reason == KVM_EXIT_IRQ_WINDOW_OPEN) ret = RESUME_HOST; - } break; case T_TLB_MOD: @@ -1107,10 +1099,9 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) break; case T_TLB_ST_MISS: - kvm_debug - ("TLB ST fault: cause %#x, status %#lx, PC: %p, BadVaddr: %#lx\n", - cause, kvm_read_c0_guest_status(vcpu->arch.cop0), opc, - badvaddr); + kvm_debug("TLB ST fault: cause %#x, status %#lx, PC: %p, BadVaddr: %#lx\n", + cause, kvm_read_c0_guest_status(vcpu->arch.cop0), opc, + badvaddr); ++vcpu->stat.tlbmiss_st_exits; trace_kvm_exit(vcpu, TLBMISS_ST_EXITS); @@ -1157,10 +1148,9 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) break; default: - kvm_err - ("Exception Code: %d, not yet handled, @ PC: %p, inst: 0x%08x BadVaddr: %#lx Status: %#lx\n", - exccode, opc, kvm_get_inst(opc, vcpu), badvaddr, - kvm_read_c0_guest_status(vcpu->arch.cop0)); + kvm_err("Exception Code: %d, not yet handled, @ PC: %p, inst: 0x%08x BadVaddr: %#lx Status: %#lx\n", + exccode, opc, kvm_get_inst(opc, vcpu), badvaddr, + kvm_read_c0_guest_status(vcpu->arch.cop0)); kvm_arch_vcpu_dump_regs(vcpu); run->exit_reason = KVM_EXIT_INTERNAL_ERROR; ret = RESUME_HOST; @@ -1175,7 +1165,7 @@ skip_emul: kvm_mips_deliver_interrupts(vcpu, cause); if (!(ret & RESUME_HOST)) { - /* Only check for signals if not already exiting to userspace */ + /* Only check for signals if not already exiting to userspace */ if (signal_pending(current)) { run->exit_reason = KVM_EXIT_INTR; ret = (-EINTR << 2) | RESUME_HOST; @@ -1196,11 +1186,13 @@ int __init kvm_mips_init(void) if (ret) return ret; - /* On MIPS, kernel modules are executed from "mapped space", which requires TLBs. - * The TLB handling code is statically linked with the rest of the kernel (kvm_tlb.c) - * to avoid the possibility of double faulting. The issue is that the TLB code - * references routines that are part of the the KVM module, - * which are only available once the module is loaded. + /* + * On MIPS, kernel modules are executed from "mapped space", which + * requires TLBs. The TLB handling code is statically linked with + * the rest of the kernel (tlb.c) to avoid the possibility of + * double faulting. The issue is that the TLB code references + * routines that are part of the the KVM module, which are only + * available once the module is loaded. */ kvm_mips_gfn_to_pfn = gfn_to_pfn; kvm_mips_release_pfn_clean = kvm_release_pfn_clean; diff --git a/arch/mips/kvm/opcode.h b/arch/mips/kvm/opcode.h new file mode 100644 index 000000000000..03a6ae84c7df --- /dev/null +++ b/arch/mips/kvm/opcode.h @@ -0,0 +1,22 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal <sanjayl@kymasys.com> + */ + +/* Define opcode values not defined in <asm/isnt.h> */ + +#ifndef __KVM_MIPS_OPCODE_H__ +#define __KVM_MIPS_OPCODE_H__ + +/* COP0 Ops */ +#define mfmcz_op 0x0b /* 01011 */ +#define wrpgpr_op 0x0e /* 01110 */ + +/* COP0 opcodes (only if COP0 and CO=1): */ +#define wait_op 0x20 /* 100000 */ + +#endif /* __KVM_MIPS_OPCODE_H__ */ diff --git a/arch/mips/kvm/kvm_mips_stats.c b/arch/mips/kvm/stats.c index 075904bcac1b..a74d6024c5ad 100644 --- a/arch/mips/kvm/kvm_mips_stats.c +++ b/arch/mips/kvm/stats.c @@ -1,13 +1,13 @@ /* -* This file is subject to the terms and conditions of the GNU General Public -* License. See the file "COPYING" in the main directory of this archive -* for more details. -* -* KVM/MIPS: COP0 access histogram -* -* Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. -* Authors: Sanjay Lal <sanjayl@kymasys.com> -*/ + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * KVM/MIPS: COP0 access histogram + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal <sanjayl@kymasys.com> + */ #include <linux/kvm_host.h> @@ -63,20 +63,18 @@ char *kvm_cop0_str[N_MIPS_COPROC_REGS] = { "DESAVE" }; -int kvm_mips_dump_stats(struct kvm_vcpu *vcpu) +void kvm_mips_dump_stats(struct kvm_vcpu *vcpu) { #ifdef CONFIG_KVM_MIPS_DEBUG_COP0_COUNTERS int i, j; - printk("\nKVM VCPU[%d] COP0 Access Profile:\n", vcpu->vcpu_id); + kvm_info("\nKVM VCPU[%d] COP0 Access Profile:\n", vcpu->vcpu_id); for (i = 0; i < N_MIPS_COPROC_REGS; i++) { for (j = 0; j < N_MIPS_COPROC_SEL; j++) { if (vcpu->arch.cop0->stat[i][j]) - printk("%s[%d]: %lu\n", kvm_cop0_str[i], j, - vcpu->arch.cop0->stat[i][j]); + kvm_info("%s[%d]: %lu\n", kvm_cop0_str[i], j, + vcpu->arch.cop0->stat[i][j]); } } #endif - - return 0; } diff --git a/arch/mips/kvm/kvm_tlb.c b/arch/mips/kvm/tlb.c index 8a5a700ad8de..bbcd82242059 100644 --- a/arch/mips/kvm/kvm_tlb.c +++ b/arch/mips/kvm/tlb.c @@ -1,14 +1,14 @@ /* -* This file is subject to the terms and conditions of the GNU General Public -* License. See the file "COPYING" in the main directory of this archive -* for more details. -* -* KVM/MIPS TLB handling, this file is part of the Linux host kernel so that -* TLB handlers run from KSEG0 -* -* Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. -* Authors: Sanjay Lal <sanjayl@kymasys.com> -*/ + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * KVM/MIPS TLB handling, this file is part of the Linux host kernel so that + * TLB handlers run from KSEG0 + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal <sanjayl@kymasys.com> + */ #include <linux/sched.h> #include <linux/smp.h> @@ -18,7 +18,6 @@ #include <linux/kvm_host.h> #include <linux/srcu.h> - #include <asm/cpu.h> #include <asm/bootinfo.h> #include <asm/mmu_context.h> @@ -39,13 +38,13 @@ atomic_t kvm_mips_instance; EXPORT_SYMBOL(kvm_mips_instance); /* These function pointers are initialized once the KVM module is loaded */ -pfn_t(*kvm_mips_gfn_to_pfn) (struct kvm *kvm, gfn_t gfn); +pfn_t (*kvm_mips_gfn_to_pfn)(struct kvm *kvm, gfn_t gfn); EXPORT_SYMBOL(kvm_mips_gfn_to_pfn); -void (*kvm_mips_release_pfn_clean) (pfn_t pfn); +void (*kvm_mips_release_pfn_clean)(pfn_t pfn); EXPORT_SYMBOL(kvm_mips_release_pfn_clean); -bool(*kvm_mips_is_error_pfn) (pfn_t pfn); +bool (*kvm_mips_is_error_pfn)(pfn_t pfn); EXPORT_SYMBOL(kvm_mips_is_error_pfn); uint32_t kvm_mips_get_kernel_asid(struct kvm_vcpu *vcpu) @@ -53,21 +52,17 @@ uint32_t kvm_mips_get_kernel_asid(struct kvm_vcpu *vcpu) return vcpu->arch.guest_kernel_asid[smp_processor_id()] & ASID_MASK; } - uint32_t kvm_mips_get_user_asid(struct kvm_vcpu *vcpu) { return vcpu->arch.guest_user_asid[smp_processor_id()] & ASID_MASK; } -inline uint32_t kvm_mips_get_commpage_asid (struct kvm_vcpu *vcpu) +inline uint32_t kvm_mips_get_commpage_asid(struct kvm_vcpu *vcpu) { return vcpu->kvm->arch.commpage_tlb; } - -/* - * Structure defining an tlb entry data set. - */ +/* Structure defining an tlb entry data set. */ void kvm_mips_dump_host_tlbs(void) { @@ -82,8 +77,8 @@ void kvm_mips_dump_host_tlbs(void) old_entryhi = read_c0_entryhi(); old_pagemask = read_c0_pagemask(); - printk("HOST TLBs:\n"); - printk("ASID: %#lx\n", read_c0_entryhi() & ASID_MASK); + kvm_info("HOST TLBs:\n"); + kvm_info("ASID: %#lx\n", read_c0_entryhi() & ASID_MASK); for (i = 0; i < current_cpu_data.tlbsize; i++) { write_c0_index(i); @@ -97,25 +92,26 @@ void kvm_mips_dump_host_tlbs(void) tlb.tlb_lo1 = read_c0_entrylo1(); tlb.tlb_mask = read_c0_pagemask(); - printk("TLB%c%3d Hi 0x%08lx ", - (tlb.tlb_lo0 | tlb.tlb_lo1) & MIPS3_PG_V ? ' ' : '*', - i, tlb.tlb_hi); - printk("Lo0=0x%09" PRIx64 " %c%c attr %lx ", - (uint64_t) mips3_tlbpfn_to_paddr(tlb.tlb_lo0), - (tlb.tlb_lo0 & MIPS3_PG_D) ? 'D' : ' ', - (tlb.tlb_lo0 & MIPS3_PG_G) ? 'G' : ' ', - (tlb.tlb_lo0 >> 3) & 7); - printk("Lo1=0x%09" PRIx64 " %c%c attr %lx sz=%lx\n", - (uint64_t) mips3_tlbpfn_to_paddr(tlb.tlb_lo1), - (tlb.tlb_lo1 & MIPS3_PG_D) ? 'D' : ' ', - (tlb.tlb_lo1 & MIPS3_PG_G) ? 'G' : ' ', - (tlb.tlb_lo1 >> 3) & 7, tlb.tlb_mask); + kvm_info("TLB%c%3d Hi 0x%08lx ", + (tlb.tlb_lo0 | tlb.tlb_lo1) & MIPS3_PG_V ? ' ' : '*', + i, tlb.tlb_hi); + kvm_info("Lo0=0x%09" PRIx64 " %c%c attr %lx ", + (uint64_t) mips3_tlbpfn_to_paddr(tlb.tlb_lo0), + (tlb.tlb_lo0 & MIPS3_PG_D) ? 'D' : ' ', + (tlb.tlb_lo0 & MIPS3_PG_G) ? 'G' : ' ', + (tlb.tlb_lo0 >> 3) & 7); + kvm_info("Lo1=0x%09" PRIx64 " %c%c attr %lx sz=%lx\n", + (uint64_t) mips3_tlbpfn_to_paddr(tlb.tlb_lo1), + (tlb.tlb_lo1 & MIPS3_PG_D) ? 'D' : ' ', + (tlb.tlb_lo1 & MIPS3_PG_G) ? 'G' : ' ', + (tlb.tlb_lo1 >> 3) & 7, tlb.tlb_mask); } write_c0_entryhi(old_entryhi); write_c0_pagemask(old_pagemask); mtc0_tlbw_hazard(); local_irq_restore(flags); } +EXPORT_SYMBOL(kvm_mips_dump_host_tlbs); void kvm_mips_dump_guest_tlbs(struct kvm_vcpu *vcpu) { @@ -123,26 +119,27 @@ void kvm_mips_dump_guest_tlbs(struct kvm_vcpu *vcpu) struct kvm_mips_tlb tlb; int i; - printk("Guest TLBs:\n"); - printk("Guest EntryHi: %#lx\n", kvm_read_c0_guest_entryhi(cop0)); + kvm_info("Guest TLBs:\n"); + kvm_info("Guest EntryHi: %#lx\n", kvm_read_c0_guest_entryhi(cop0)); for (i = 0; i < KVM_MIPS_GUEST_TLB_SIZE; i++) { tlb = vcpu->arch.guest_tlb[i]; - printk("TLB%c%3d Hi 0x%08lx ", - (tlb.tlb_lo0 | tlb.tlb_lo1) & MIPS3_PG_V ? ' ' : '*', - i, tlb.tlb_hi); - printk("Lo0=0x%09" PRIx64 " %c%c attr %lx ", - (uint64_t) mips3_tlbpfn_to_paddr(tlb.tlb_lo0), - (tlb.tlb_lo0 & MIPS3_PG_D) ? 'D' : ' ', - (tlb.tlb_lo0 & MIPS3_PG_G) ? 'G' : ' ', - (tlb.tlb_lo0 >> 3) & 7); - printk("Lo1=0x%09" PRIx64 " %c%c attr %lx sz=%lx\n", - (uint64_t) mips3_tlbpfn_to_paddr(tlb.tlb_lo1), - (tlb.tlb_lo1 & MIPS3_PG_D) ? 'D' : ' ', - (tlb.tlb_lo1 & MIPS3_PG_G) ? 'G' : ' ', - (tlb.tlb_lo1 >> 3) & 7, tlb.tlb_mask); + kvm_info("TLB%c%3d Hi 0x%08lx ", + (tlb.tlb_lo0 | tlb.tlb_lo1) & MIPS3_PG_V ? ' ' : '*', + i, tlb.tlb_hi); + kvm_info("Lo0=0x%09" PRIx64 " %c%c attr %lx ", + (uint64_t) mips3_tlbpfn_to_paddr(tlb.tlb_lo0), + (tlb.tlb_lo0 & MIPS3_PG_D) ? 'D' : ' ', + (tlb.tlb_lo0 & MIPS3_PG_G) ? 'G' : ' ', + (tlb.tlb_lo0 >> 3) & 7); + kvm_info("Lo1=0x%09" PRIx64 " %c%c attr %lx sz=%lx\n", + (uint64_t) mips3_tlbpfn_to_paddr(tlb.tlb_lo1), + (tlb.tlb_lo1 & MIPS3_PG_D) ? 'D' : ' ', + (tlb.tlb_lo1 & MIPS3_PG_G) ? 'G' : ' ', + (tlb.tlb_lo1 >> 3) & 7, tlb.tlb_mask); } } +EXPORT_SYMBOL(kvm_mips_dump_guest_tlbs); static int kvm_mips_map_page(struct kvm *kvm, gfn_t gfn) { @@ -152,7 +149,7 @@ static int kvm_mips_map_page(struct kvm *kvm, gfn_t gfn) if (kvm->arch.guest_pmap[gfn] != KVM_INVALID_PAGE) return 0; - srcu_idx = srcu_read_lock(&kvm->srcu); + srcu_idx = srcu_read_lock(&kvm->srcu); pfn = kvm_mips_gfn_to_pfn(kvm, gfn); if (kvm_mips_is_error_pfn(pfn)) { @@ -169,7 +166,7 @@ out: /* Translate guest KSEG0 addresses to Host PA */ unsigned long kvm_mips_translate_guest_kseg0_to_hpa(struct kvm_vcpu *vcpu, - unsigned long gva) + unsigned long gva) { gfn_t gfn; uint32_t offset = gva & ~PAGE_MASK; @@ -194,20 +191,20 @@ unsigned long kvm_mips_translate_guest_kseg0_to_hpa(struct kvm_vcpu *vcpu, return (kvm->arch.guest_pmap[gfn] << PAGE_SHIFT) + offset; } +EXPORT_SYMBOL(kvm_mips_translate_guest_kseg0_to_hpa); /* XXXKYMA: Must be called with interrupts disabled */ /* set flush_dcache_mask == 0 if no dcache flush required */ -int -kvm_mips_host_tlb_write(struct kvm_vcpu *vcpu, unsigned long entryhi, - unsigned long entrylo0, unsigned long entrylo1, int flush_dcache_mask) +int kvm_mips_host_tlb_write(struct kvm_vcpu *vcpu, unsigned long entryhi, + unsigned long entrylo0, unsigned long entrylo1, + int flush_dcache_mask) { unsigned long flags; unsigned long old_entryhi; - volatile int idx; + int idx; local_irq_save(flags); - old_entryhi = read_c0_entryhi(); write_c0_entryhi(entryhi); mtc0_tlbw_hazard(); @@ -240,12 +237,14 @@ kvm_mips_host_tlb_write(struct kvm_vcpu *vcpu, unsigned long entryhi, if (flush_dcache_mask) { if (entrylo0 & MIPS3_PG_V) { ++vcpu->stat.flush_dcache_exits; - flush_data_cache_page((entryhi & VPN2_MASK) & ~flush_dcache_mask); + flush_data_cache_page((entryhi & VPN2_MASK) & + ~flush_dcache_mask); } if (entrylo1 & MIPS3_PG_V) { ++vcpu->stat.flush_dcache_exits; - flush_data_cache_page(((entryhi & VPN2_MASK) & ~flush_dcache_mask) | - (0x1 << PAGE_SHIFT)); + flush_data_cache_page(((entryhi & VPN2_MASK) & + ~flush_dcache_mask) | + (0x1 << PAGE_SHIFT)); } } @@ -257,10 +256,9 @@ kvm_mips_host_tlb_write(struct kvm_vcpu *vcpu, unsigned long entryhi, return 0; } - /* XXXKYMA: Must be called with interrupts disabled */ int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr, - struct kvm_vcpu *vcpu) + struct kvm_vcpu *vcpu) { gfn_t gfn; pfn_t pfn0, pfn1; @@ -270,7 +268,6 @@ int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr, struct kvm *kvm = vcpu->kvm; const int flush_dcache_mask = 0; - if (KVM_GUEST_KSEGX(badvaddr) != KVM_GUEST_KSEG0) { kvm_err("%s: Invalid BadVaddr: %#lx\n", __func__, badvaddr); kvm_mips_dump_host_tlbs(); @@ -302,14 +299,15 @@ int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr, } entryhi = (vaddr | kvm_mips_get_kernel_asid(vcpu)); - entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) | (0x3 << 3) | (1 << 2) | - (0x1 << 1); - entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) | (0x3 << 3) | (1 << 2) | - (0x1 << 1); + entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) | (0x3 << 3) | + (1 << 2) | (0x1 << 1); + entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) | (0x3 << 3) | + (1 << 2) | (0x1 << 1); return kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1, flush_dcache_mask); } +EXPORT_SYMBOL(kvm_mips_handle_kseg0_tlb_fault); int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr, struct kvm_vcpu *vcpu) @@ -318,11 +316,10 @@ int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr, unsigned long flags, old_entryhi = 0, vaddr = 0; unsigned long entrylo0 = 0, entrylo1 = 0; - pfn0 = CPHYSADDR(vcpu->arch.kseg0_commpage) >> PAGE_SHIFT; pfn1 = 0; - entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) | (0x3 << 3) | (1 << 2) | - (0x1 << 1); + entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) | (0x3 << 3) | + (1 << 2) | (0x1 << 1); entrylo1 = 0; local_irq_save(flags); @@ -341,9 +338,9 @@ int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr, mtc0_tlbw_hazard(); tlbw_use_hazard(); - kvm_debug ("@ %#lx idx: %2d [entryhi(R): %#lx] entrylo0 (R): 0x%08lx, entrylo1(R): 0x%08lx\n", - vcpu->arch.pc, read_c0_index(), read_c0_entryhi(), - read_c0_entrylo0(), read_c0_entrylo1()); + kvm_debug("@ %#lx idx: %2d [entryhi(R): %#lx] entrylo0 (R): 0x%08lx, entrylo1(R): 0x%08lx\n", + vcpu->arch.pc, read_c0_index(), read_c0_entryhi(), + read_c0_entrylo0(), read_c0_entrylo1()); /* Restore old ASID */ write_c0_entryhi(old_entryhi); @@ -353,28 +350,33 @@ int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr, return 0; } +EXPORT_SYMBOL(kvm_mips_handle_commpage_tlb_fault); -int -kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu, - struct kvm_mips_tlb *tlb, unsigned long *hpa0, unsigned long *hpa1) +int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu, + struct kvm_mips_tlb *tlb, + unsigned long *hpa0, + unsigned long *hpa1) { unsigned long entryhi = 0, entrylo0 = 0, entrylo1 = 0; struct kvm *kvm = vcpu->kvm; pfn_t pfn0, pfn1; - if ((tlb->tlb_hi & VPN2_MASK) == 0) { pfn0 = 0; pfn1 = 0; } else { - if (kvm_mips_map_page(kvm, mips3_tlbpfn_to_paddr(tlb->tlb_lo0) >> PAGE_SHIFT) < 0) + if (kvm_mips_map_page(kvm, mips3_tlbpfn_to_paddr(tlb->tlb_lo0) + >> PAGE_SHIFT) < 0) return -1; - if (kvm_mips_map_page(kvm, mips3_tlbpfn_to_paddr(tlb->tlb_lo1) >> PAGE_SHIFT) < 0) + if (kvm_mips_map_page(kvm, mips3_tlbpfn_to_paddr(tlb->tlb_lo1) + >> PAGE_SHIFT) < 0) return -1; - pfn0 = kvm->arch.guest_pmap[mips3_tlbpfn_to_paddr(tlb->tlb_lo0) >> PAGE_SHIFT]; - pfn1 = kvm->arch.guest_pmap[mips3_tlbpfn_to_paddr(tlb->tlb_lo1) >> PAGE_SHIFT]; + pfn0 = kvm->arch.guest_pmap[mips3_tlbpfn_to_paddr(tlb->tlb_lo0) + >> PAGE_SHIFT]; + pfn1 = kvm->arch.guest_pmap[mips3_tlbpfn_to_paddr(tlb->tlb_lo1) + >> PAGE_SHIFT]; } if (hpa0) @@ -385,11 +387,12 @@ kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu, /* Get attributes from the Guest TLB */ entryhi = (tlb->tlb_hi & VPN2_MASK) | (KVM_GUEST_KERNEL_MODE(vcpu) ? - kvm_mips_get_kernel_asid(vcpu) : kvm_mips_get_user_asid(vcpu)); + kvm_mips_get_kernel_asid(vcpu) : + kvm_mips_get_user_asid(vcpu)); entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) | (0x3 << 3) | - (tlb->tlb_lo0 & MIPS3_PG_D) | (tlb->tlb_lo0 & MIPS3_PG_V); + (tlb->tlb_lo0 & MIPS3_PG_D) | (tlb->tlb_lo0 & MIPS3_PG_V); entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) | (0x3 << 3) | - (tlb->tlb_lo1 & MIPS3_PG_D) | (tlb->tlb_lo1 & MIPS3_PG_V); + (tlb->tlb_lo1 & MIPS3_PG_D) | (tlb->tlb_lo1 & MIPS3_PG_V); kvm_debug("@ %#lx tlb_lo0: 0x%08lx tlb_lo1: 0x%08lx\n", vcpu->arch.pc, tlb->tlb_lo0, tlb->tlb_lo1); @@ -397,6 +400,7 @@ kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu, return kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1, tlb->tlb_mask); } +EXPORT_SYMBOL(kvm_mips_handle_mapped_seg_tlb_fault); int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long entryhi) { @@ -404,10 +408,9 @@ int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long entryhi) int index = -1; struct kvm_mips_tlb *tlb = vcpu->arch.guest_tlb; - for (i = 0; i < KVM_MIPS_GUEST_TLB_SIZE; i++) { - if (((TLB_VPN2(tlb[i]) & ~tlb[i].tlb_mask) == ((entryhi & VPN2_MASK) & ~tlb[i].tlb_mask)) && - (TLB_IS_GLOBAL(tlb[i]) || (TLB_ASID(tlb[i]) == (entryhi & ASID_MASK)))) { + if (TLB_HI_VPN2_HIT(tlb[i], entryhi) && + TLB_HI_ASID_HIT(tlb[i], entryhi)) { index = i; break; } @@ -418,21 +421,23 @@ int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long entryhi) return index; } +EXPORT_SYMBOL(kvm_mips_guest_tlb_lookup); int kvm_mips_host_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long vaddr) { unsigned long old_entryhi, flags; - volatile int idx; - + int idx; local_irq_save(flags); old_entryhi = read_c0_entryhi(); if (KVM_GUEST_KERNEL_MODE(vcpu)) - write_c0_entryhi((vaddr & VPN2_MASK) | kvm_mips_get_kernel_asid(vcpu)); + write_c0_entryhi((vaddr & VPN2_MASK) | + kvm_mips_get_kernel_asid(vcpu)); else { - write_c0_entryhi((vaddr & VPN2_MASK) | kvm_mips_get_user_asid(vcpu)); + write_c0_entryhi((vaddr & VPN2_MASK) | + kvm_mips_get_user_asid(vcpu)); } mtc0_tlbw_hazard(); @@ -452,6 +457,7 @@ int kvm_mips_host_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long vaddr) return idx; } +EXPORT_SYMBOL(kvm_mips_host_tlb_lookup); int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va) { @@ -460,7 +466,6 @@ int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va) local_irq_save(flags); - old_entryhi = read_c0_entryhi(); write_c0_entryhi((va & VPN2_MASK) | kvm_mips_get_user_asid(vcpu)); @@ -499,8 +504,9 @@ int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va) return 0; } +EXPORT_SYMBOL(kvm_mips_host_tlb_inv); -/* XXXKYMA: Fix Guest USER/KERNEL no longer share the same ASID*/ +/* XXXKYMA: Fix Guest USER/KERNEL no longer share the same ASID */ int kvm_mips_host_tlb_inv_index(struct kvm_vcpu *vcpu, int index) { unsigned long flags, old_entryhi; @@ -510,7 +516,6 @@ int kvm_mips_host_tlb_inv_index(struct kvm_vcpu *vcpu, int index) local_irq_save(flags); - old_entryhi = read_c0_entryhi(); write_c0_entryhi(UNIQUE_ENTRYHI(index)); @@ -546,7 +551,6 @@ void kvm_mips_flush_host_tlb(int skip_kseg0) int entry = 0; int maxentry = current_cpu_data.tlbsize; - local_irq_save(flags); old_entryhi = read_c0_entryhi(); @@ -554,7 +558,6 @@ void kvm_mips_flush_host_tlb(int skip_kseg0) /* Blast 'em all away. */ for (entry = 0; entry < maxentry; entry++) { - write_c0_index(entry); mtc0_tlbw_hazard(); @@ -565,9 +568,8 @@ void kvm_mips_flush_host_tlb(int skip_kseg0) entryhi = read_c0_entryhi(); /* Don't blow away guest kernel entries */ - if (KVM_GUEST_KSEGX(entryhi) == KVM_GUEST_KSEG0) { + if (KVM_GUEST_KSEGX(entryhi) == KVM_GUEST_KSEG0) continue; - } } /* Make sure all entries differ. */ @@ -591,17 +593,17 @@ void kvm_mips_flush_host_tlb(int skip_kseg0) local_irq_restore(flags); } +EXPORT_SYMBOL(kvm_mips_flush_host_tlb); -void -kvm_get_new_mmu_context(struct mm_struct *mm, unsigned long cpu, - struct kvm_vcpu *vcpu) +void kvm_get_new_mmu_context(struct mm_struct *mm, unsigned long cpu, + struct kvm_vcpu *vcpu) { unsigned long asid = asid_cache(cpu); - if (!((asid += ASID_INC) & ASID_MASK)) { - if (cpu_has_vtag_icache) { + asid += ASID_INC; + if (!(asid & ASID_MASK)) { + if (cpu_has_vtag_icache) flush_icache_all(); - } kvm_local_flush_tlb_all(); /* start new asid cycle */ @@ -639,6 +641,7 @@ void kvm_local_flush_tlb_all(void) local_irq_restore(flags); } +EXPORT_SYMBOL(kvm_local_flush_tlb_all); /** * kvm_mips_migrate_count() - Migrate timer. @@ -699,7 +702,10 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) } if (!newasid) { - /* If we preempted while the guest was executing, then reload the pre-empted ASID */ + /* + * If we preempted while the guest was executing, then reload + * the pre-empted ASID + */ if (current->flags & PF_VCPU) { write_c0_entryhi(vcpu->arch. preempt_entryhi & ASID_MASK); @@ -708,9 +714,10 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) } else { /* New ASIDs were allocated for the VM */ - /* Were we in guest context? If so then the pre-empted ASID is no longer - * valid, we need to set it to what it should be based on the mode of - * the Guest (Kernel/User) + /* + * Were we in guest context? If so then the pre-empted ASID is + * no longer valid, we need to set it to what it should be based + * on the mode of the Guest (Kernel/User) */ if (current->flags & PF_VCPU) { if (KVM_GUEST_KERNEL_MODE(vcpu)) @@ -728,6 +735,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) local_irq_restore(flags); } +EXPORT_SYMBOL(kvm_arch_vcpu_load); /* ASID can change if another task is scheduled during preemption */ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) @@ -739,7 +747,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) cpu = smp_processor_id(); - vcpu->arch.preempt_entryhi = read_c0_entryhi(); vcpu->arch.last_sched_cpu = cpu; @@ -754,11 +761,12 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) local_irq_restore(flags); } +EXPORT_SYMBOL(kvm_arch_vcpu_put); uint32_t kvm_get_inst(uint32_t *opc, struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; - unsigned long paddr, flags; + unsigned long paddr, flags, vpn2, asid; uint32_t inst; int index; @@ -769,16 +777,12 @@ uint32_t kvm_get_inst(uint32_t *opc, struct kvm_vcpu *vcpu) if (index >= 0) { inst = *(opc); } else { - index = - kvm_mips_guest_tlb_lookup(vcpu, - ((unsigned long) opc & VPN2_MASK) - | - (kvm_read_c0_guest_entryhi - (cop0) & ASID_MASK)); + vpn2 = (unsigned long) opc & VPN2_MASK; + asid = kvm_read_c0_guest_entryhi(cop0) & ASID_MASK; + index = kvm_mips_guest_tlb_lookup(vcpu, vpn2 | asid); if (index < 0) { - kvm_err - ("%s: get_user_failed for %p, vcpu: %p, ASID: %#lx\n", - __func__, opc, vcpu, read_c0_entryhi()); + kvm_err("%s: get_user_failed for %p, vcpu: %p, ASID: %#lx\n", + __func__, opc, vcpu, read_c0_entryhi()); kvm_mips_dump_host_tlbs(); local_irq_restore(flags); return KVM_INVALID_INST; @@ -793,7 +797,7 @@ uint32_t kvm_get_inst(uint32_t *opc, struct kvm_vcpu *vcpu) } else if (KVM_GUEST_KSEGX(opc) == KVM_GUEST_KSEG0) { paddr = kvm_mips_translate_guest_kseg0_to_hpa(vcpu, - (unsigned long) opc); + (unsigned long) opc); inst = *(uint32_t *) CKSEG0ADDR(paddr); } else { kvm_err("%s: illegal address: %p\n", __func__, opc); @@ -802,18 +806,4 @@ uint32_t kvm_get_inst(uint32_t *opc, struct kvm_vcpu *vcpu) return inst; } - -EXPORT_SYMBOL(kvm_local_flush_tlb_all); -EXPORT_SYMBOL(kvm_mips_handle_mapped_seg_tlb_fault); -EXPORT_SYMBOL(kvm_mips_handle_commpage_tlb_fault); -EXPORT_SYMBOL(kvm_mips_dump_host_tlbs); -EXPORT_SYMBOL(kvm_mips_handle_kseg0_tlb_fault); -EXPORT_SYMBOL(kvm_mips_host_tlb_lookup); -EXPORT_SYMBOL(kvm_mips_flush_host_tlb); -EXPORT_SYMBOL(kvm_mips_guest_tlb_lookup); -EXPORT_SYMBOL(kvm_mips_host_tlb_inv); -EXPORT_SYMBOL(kvm_mips_translate_guest_kseg0_to_hpa); -EXPORT_SYMBOL(kvm_mips_dump_guest_tlbs); EXPORT_SYMBOL(kvm_get_inst); -EXPORT_SYMBOL(kvm_arch_vcpu_load); -EXPORT_SYMBOL(kvm_arch_vcpu_put); diff --git a/arch/mips/kvm/trace.h b/arch/mips/kvm/trace.h index bc9e0f406c08..c1388d40663b 100644 --- a/arch/mips/kvm/trace.h +++ b/arch/mips/kvm/trace.h @@ -1,11 +1,11 @@ /* -* This file is subject to the terms and conditions of the GNU General Public -* License. See the file "COPYING" in the main directory of this archive -* for more details. -* -* Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. -* Authors: Sanjay Lal <sanjayl@kymasys.com> -*/ + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal <sanjayl@kymasys.com> + */ #if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_KVM_H @@ -17,9 +17,7 @@ #define TRACE_INCLUDE_PATH . #define TRACE_INCLUDE_FILE trace -/* - * Tracepoints for VM eists - */ +/* Tracepoints for VM eists */ extern char *kvm_mips_exit_types_str[MAX_KVM_MIPS_EXIT_TYPES]; TRACE_EVENT(kvm_exit, diff --git a/arch/mips/kvm/kvm_trap_emul.c b/arch/mips/kvm/trap_emul.c index 693f952b2fbb..fd7257b70e65 100644 --- a/arch/mips/kvm/kvm_trap_emul.c +++ b/arch/mips/kvm/trap_emul.c @@ -1,13 +1,13 @@ /* -* This file is subject to the terms and conditions of the GNU General Public -* License. See the file "COPYING" in the main directory of this archive -* for more details. -* -* KVM/MIPS: Deliver/Emulate exceptions to the guest kernel -* -* Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. -* Authors: Sanjay Lal <sanjayl@kymasys.com> -*/ + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * KVM/MIPS: Deliver/Emulate exceptions to the guest kernel + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal <sanjayl@kymasys.com> + */ #include <linux/errno.h> #include <linux/err.h> @@ -16,8 +16,8 @@ #include <linux/kvm_host.h> -#include "kvm_mips_opcode.h" -#include "kvm_mips_int.h" +#include "opcode.h" +#include "interrupt.h" static gpa_t kvm_trap_emul_gva_to_gpa_cb(gva_t gva) { @@ -27,7 +27,7 @@ static gpa_t kvm_trap_emul_gva_to_gpa_cb(gva_t gva) if ((kseg == CKSEG0) || (kseg == CKSEG1)) gpa = CPHYSADDR(gva); else { - printk("%s: cannot find GPA for GVA: %#lx\n", __func__, gva); + kvm_err("%s: cannot find GPA for GVA: %#lx\n", __func__, gva); kvm_mips_dump_host_tlbs(); gpa = KVM_INVALID_ADDR; } @@ -37,7 +37,6 @@ static gpa_t kvm_trap_emul_gva_to_gpa_cb(gva_t gva) return gpa; } - static int kvm_trap_emul_handle_cop_unusable(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; @@ -46,9 +45,9 @@ static int kvm_trap_emul_handle_cop_unusable(struct kvm_vcpu *vcpu) enum emulation_result er = EMULATE_DONE; int ret = RESUME_GUEST; - if (((cause & CAUSEF_CE) >> CAUSEB_CE) == 1) { + if (((cause & CAUSEF_CE) >> CAUSEB_CE) == 1) er = kvm_mips_emulate_fpu_exc(cause, opc, run, vcpu); - } else + else er = kvm_mips_emulate_inst(cause, opc, run, vcpu); switch (er) { @@ -83,9 +82,8 @@ static int kvm_trap_emul_handle_tlb_mod(struct kvm_vcpu *vcpu) if (KVM_GUEST_KSEGX(badvaddr) < KVM_GUEST_KSEG0 || KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG23) { - kvm_debug - ("USER/KSEG23 ADDR TLB MOD fault: cause %#lx, PC: %p, BadVaddr: %#lx\n", - cause, opc, badvaddr); + kvm_debug("USER/KSEG23 ADDR TLB MOD fault: cause %#lx, PC: %p, BadVaddr: %#lx\n", + cause, opc, badvaddr); er = kvm_mips_handle_tlbmod(cause, opc, run, vcpu); if (er == EMULATE_DONE) @@ -95,20 +93,20 @@ static int kvm_trap_emul_handle_tlb_mod(struct kvm_vcpu *vcpu) ret = RESUME_HOST; } } else if (KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG0) { - /* XXXKYMA: The guest kernel does not expect to get this fault when we are not - * using HIGHMEM. Need to address this in a HIGHMEM kernel + /* + * XXXKYMA: The guest kernel does not expect to get this fault + * when we are not using HIGHMEM. Need to address this in a + * HIGHMEM kernel */ - printk - ("TLB MOD fault not handled, cause %#lx, PC: %p, BadVaddr: %#lx\n", - cause, opc, badvaddr); + kvm_err("TLB MOD fault not handled, cause %#lx, PC: %p, BadVaddr: %#lx\n", + cause, opc, badvaddr); kvm_mips_dump_host_tlbs(); kvm_arch_vcpu_dump_regs(vcpu); run->exit_reason = KVM_EXIT_INTERNAL_ERROR; ret = RESUME_HOST; } else { - printk - ("Illegal TLB Mod fault address , cause %#lx, PC: %p, BadVaddr: %#lx\n", - cause, opc, badvaddr); + kvm_err("Illegal TLB Mod fault address , cause %#lx, PC: %p, BadVaddr: %#lx\n", + cause, opc, badvaddr); kvm_mips_dump_host_tlbs(); kvm_arch_vcpu_dump_regs(vcpu); run->exit_reason = KVM_EXIT_INTERNAL_ERROR; @@ -134,9 +132,8 @@ static int kvm_trap_emul_handle_tlb_st_miss(struct kvm_vcpu *vcpu) } } else if (KVM_GUEST_KSEGX(badvaddr) < KVM_GUEST_KSEG0 || KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG23) { - kvm_debug - ("USER ADDR TLB LD fault: cause %#lx, PC: %p, BadVaddr: %#lx\n", - cause, opc, badvaddr); + kvm_debug("USER ADDR TLB LD fault: cause %#lx, PC: %p, BadVaddr: %#lx\n", + cause, opc, badvaddr); er = kvm_mips_handle_tlbmiss(cause, opc, run, vcpu); if (er == EMULATE_DONE) ret = RESUME_GUEST; @@ -145,8 +142,9 @@ static int kvm_trap_emul_handle_tlb_st_miss(struct kvm_vcpu *vcpu) ret = RESUME_HOST; } } else if (KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG0) { - /* All KSEG0 faults are handled by KVM, as the guest kernel does not - * expect to ever get them + /* + * All KSEG0 faults are handled by KVM, as the guest kernel does + * not expect to ever get them */ if (kvm_mips_handle_kseg0_tlb_fault (vcpu->arch.host_cp0_badvaddr, vcpu) < 0) { @@ -154,9 +152,8 @@ static int kvm_trap_emul_handle_tlb_st_miss(struct kvm_vcpu *vcpu) ret = RESUME_HOST; } } else { - kvm_err - ("Illegal TLB LD fault address , cause %#lx, PC: %p, BadVaddr: %#lx\n", - cause, opc, badvaddr); + kvm_err("Illegal TLB LD fault address , cause %#lx, PC: %p, BadVaddr: %#lx\n", + cause, opc, badvaddr); kvm_mips_dump_host_tlbs(); kvm_arch_vcpu_dump_regs(vcpu); run->exit_reason = KVM_EXIT_INTERNAL_ERROR; @@ -185,11 +182,14 @@ static int kvm_trap_emul_handle_tlb_ld_miss(struct kvm_vcpu *vcpu) kvm_debug("USER ADDR TLB ST fault: PC: %#lx, BadVaddr: %#lx\n", vcpu->arch.pc, badvaddr); - /* User Address (UA) fault, this could happen if - * (1) TLB entry not present/valid in both Guest and shadow host TLBs, in this - * case we pass on the fault to the guest kernel and let it handle it. - * (2) TLB entry is present in the Guest TLB but not in the shadow, in this - * case we inject the TLB from the Guest TLB into the shadow host TLB + /* + * User Address (UA) fault, this could happen if + * (1) TLB entry not present/valid in both Guest and shadow host + * TLBs, in this case we pass on the fault to the guest + * kernel and let it handle it. + * (2) TLB entry is present in the Guest TLB but not in the + * shadow, in this case we inject the TLB from the Guest TLB + * into the shadow host TLB */ er = kvm_mips_handle_tlbmiss(cause, opc, run, vcpu); @@ -206,9 +206,8 @@ static int kvm_trap_emul_handle_tlb_ld_miss(struct kvm_vcpu *vcpu) ret = RESUME_HOST; } } else { - printk - ("Illegal TLB ST fault address , cause %#lx, PC: %p, BadVaddr: %#lx\n", - cause, opc, badvaddr); + kvm_err("Illegal TLB ST fault address , cause %#lx, PC: %p, BadVaddr: %#lx\n", + cause, opc, badvaddr); kvm_mips_dump_host_tlbs(); kvm_arch_vcpu_dump_regs(vcpu); run->exit_reason = KVM_EXIT_INTERNAL_ERROR; @@ -231,7 +230,7 @@ static int kvm_trap_emul_handle_addr_err_st(struct kvm_vcpu *vcpu) kvm_debug("Emulate Store to MMIO space\n"); er = kvm_mips_emulate_inst(cause, opc, run, vcpu); if (er == EMULATE_FAIL) { - printk("Emulate Store to MMIO space failed\n"); + kvm_err("Emulate Store to MMIO space failed\n"); run->exit_reason = KVM_EXIT_INTERNAL_ERROR; ret = RESUME_HOST; } else { @@ -239,9 +238,8 @@ static int kvm_trap_emul_handle_addr_err_st(struct kvm_vcpu *vcpu) ret = RESUME_HOST; } } else { - printk - ("Address Error (STORE): cause %#lx, PC: %p, BadVaddr: %#lx\n", - cause, opc, badvaddr); + kvm_err("Address Error (STORE): cause %#lx, PC: %p, BadVaddr: %#lx\n", + cause, opc, badvaddr); run->exit_reason = KVM_EXIT_INTERNAL_ERROR; ret = RESUME_HOST; } @@ -261,7 +259,7 @@ static int kvm_trap_emul_handle_addr_err_ld(struct kvm_vcpu *vcpu) kvm_debug("Emulate Load from MMIO space @ %#lx\n", badvaddr); er = kvm_mips_emulate_inst(cause, opc, run, vcpu); if (er == EMULATE_FAIL) { - printk("Emulate Load from MMIO space failed\n"); + kvm_err("Emulate Load from MMIO space failed\n"); run->exit_reason = KVM_EXIT_INTERNAL_ERROR; ret = RESUME_HOST; } else { @@ -269,9 +267,8 @@ static int kvm_trap_emul_handle_addr_err_ld(struct kvm_vcpu *vcpu) ret = RESUME_HOST; } } else { - printk - ("Address Error (LOAD): cause %#lx, PC: %p, BadVaddr: %#lx\n", - cause, opc, badvaddr); + kvm_err("Address Error (LOAD): cause %#lx, PC: %p, BadVaddr: %#lx\n", + cause, opc, badvaddr); run->exit_reason = KVM_EXIT_INTERNAL_ERROR; ret = RESUME_HOST; er = EMULATE_FAIL; @@ -349,9 +346,9 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu) uint32_t config1; int vcpu_id = vcpu->vcpu_id; - /* Arch specific stuff, set up config registers properly so that the - * guest will come up as expected, for now we simulate a - * MIPS 24kc + /* + * Arch specific stuff, set up config registers properly so that the + * guest will come up as expected, for now we simulate a MIPS 24kc */ kvm_write_c0_guest_prid(cop0, 0x00019300); kvm_write_c0_guest_config(cop0, @@ -373,14 +370,15 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu) kvm_write_c0_guest_config2(cop0, MIPS_CONFIG2); /* MIPS_CONFIG2 | (read_c0_config2() & 0xfff) */ - kvm_write_c0_guest_config3(cop0, - MIPS_CONFIG3 | (0 << CP0C3_VInt) | (1 << - CP0C3_ULRI)); + kvm_write_c0_guest_config3(cop0, MIPS_CONFIG3 | (0 << CP0C3_VInt) | + (1 << CP0C3_ULRI)); /* Set Wait IE/IXMT Ignore in Config7, IAR, AR */ kvm_write_c0_guest_config7(cop0, (MIPS_CONF7_WII) | (1 << 10)); - /* Setup IntCtl defaults, compatibilty mode for timer interrupts (HW5) */ + /* + * Setup IntCtl defaults, compatibilty mode for timer interrupts (HW5) + */ kvm_write_c0_guest_intctl(cop0, 0xFC000000); /* Put in vcpu id as CPUNum into Ebase Reg to handle SMP Guests */ diff --git a/arch/mn10300/include/asm/processor.h b/arch/mn10300/include/asm/processor.h index 8b80b19d0c8a..769d5ed8e992 100644 --- a/arch/mn10300/include/asm/processor.h +++ b/arch/mn10300/include/asm/processor.h @@ -68,7 +68,9 @@ extern struct mn10300_cpuinfo cpu_data[]; extern void identify_cpu(struct mn10300_cpuinfo *); extern void print_cpu_info(struct mn10300_cpuinfo *); extern void dodgy_tsc(void); + #define cpu_relax() barrier() +#define cpu_relax_lowlatency() cpu_relax() /* * User space process size: 1.75GB (default). diff --git a/arch/openrisc/include/asm/processor.h b/arch/openrisc/include/asm/processor.h index cab746fa9e87..4d235e3d2534 100644 --- a/arch/openrisc/include/asm/processor.h +++ b/arch/openrisc/include/asm/processor.h @@ -101,6 +101,7 @@ extern unsigned long thread_saved_pc(struct task_struct *t); #define init_stack (init_thread_union.stack) #define cpu_relax() barrier() +#define cpu_relax_lowlatency() cpu_relax() #endif /* __ASSEMBLY__ */ #endif /* __ASM_OPENRISC_PROCESSOR_H */ diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 108d48e652af..6e75e2030927 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -6,7 +6,6 @@ config PARISC select HAVE_OPROFILE select HAVE_FUNCTION_TRACER if 64BIT select HAVE_FUNCTION_GRAPH_TRACER if 64BIT - select HAVE_FUNCTION_TRACE_MCOUNT_TEST if 64BIT select ARCH_WANT_FRAME_POINTERS select RTC_CLASS select RTC_DRV_GENERIC diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h index d951c9681ab3..689a8ade3606 100644 --- a/arch/parisc/include/asm/processor.h +++ b/arch/parisc/include/asm/processor.h @@ -338,6 +338,7 @@ extern unsigned long get_wchan(struct task_struct *p); #define KSTK_ESP(tsk) ((tsk)->thread.regs.gr[30]) #define cpu_relax() barrier() +#define cpu_relax_lowlatency() cpu_relax() /* Used as a macro to identify the combined VIPT/PIPT cached * CPUs which require a guarantee of coherency (no inequivalent diff --git a/arch/parisc/include/uapi/asm/signal.h b/arch/parisc/include/uapi/asm/signal.h index a2fa297196bc..f5645d6a89f2 100644 --- a/arch/parisc/include/uapi/asm/signal.h +++ b/arch/parisc/include/uapi/asm/signal.h @@ -69,8 +69,6 @@ #define SA_NOMASK SA_NODEFER #define SA_ONESHOT SA_RESETHAND -#define SA_RESTORER 0x04000000 /* obsolete -- ignored */ - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index 5beb97bafbb1..559d400f9385 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c @@ -112,6 +112,9 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) unsigned long long calltime; struct ftrace_graph_ent trace; + if (unlikely(ftrace_graph_is_dead())) + return; + if (unlikely(atomic_read(¤t->tracing_graph_pause))) return; @@ -152,9 +155,6 @@ void ftrace_function_trampoline(unsigned long parent, { extern ftrace_func_t ftrace_trace_function; - if (function_trace_stop) - return; - if (ftrace_trace_function != ftrace_stub) { ftrace_trace_function(parent, self_addr); return; diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index ae085ad0fba0..0bef864264c0 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -728,7 +728,6 @@ static void __init pagetable_init(void) #endif empty_zero_page = alloc_bootmem_pages(PAGE_SIZE); - memset(empty_zero_page, 0, PAGE_SIZE); } static void __init gateway_init(void) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index fefe7c8bf05f..80b94b0add1f 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -145,6 +145,7 @@ config PPC select HAVE_IRQ_EXIT_ON_IRQ_STACK select ARCH_USE_CMPXCHG_LOCKREF if PPC64 select HAVE_ARCH_AUDITSYSCALL + select ARCH_SUPPORTS_ATOMIC_RMW config GENERIC_CSUM def_bool CPU_LITTLE_ENDIAN diff --git a/arch/powerpc/boot/dts/fsl/qoriq-sec6.0-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-sec6.0-0.dtsi index f75b4f820c3c..7d4a6a2354f4 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-sec6.0-0.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-sec6.0-0.dtsi @@ -32,7 +32,8 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - compatible = "fsl,sec-v6.0"; + compatible = "fsl,sec-v6.0", "fsl,sec-v5.0", + "fsl,sec-v4.0"; fsl,sec-era = <6>; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index bc2347774f0a..0fdd7eece6d9 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -447,6 +447,7 @@ extern const char *powerpc_base_platform; CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \ CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP) #define CPU_FTRS_POWER8E (CPU_FTRS_POWER8 | CPU_FTR_PMAO_BUG) +#define CPU_FTRS_POWER8_DD1 (CPU_FTRS_POWER8 & ~CPU_FTR_DBELL) #define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \ diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index fddb72b48ce9..d645428a65a4 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -198,8 +198,10 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, return rb; } -static inline unsigned long hpte_page_size(unsigned long h, unsigned long l) +static inline unsigned long __hpte_page_size(unsigned long h, unsigned long l, + bool is_base_size) { + int size, a_psize; /* Look at the 8 bit LP value */ unsigned int lp = (l >> LP_SHIFT) & ((1 << LP_BITS) - 1); @@ -214,14 +216,27 @@ static inline unsigned long hpte_page_size(unsigned long h, unsigned long l) continue; a_psize = __hpte_actual_psize(lp, size); - if (a_psize != -1) + if (a_psize != -1) { + if (is_base_size) + return 1ul << mmu_psize_defs[size].shift; return 1ul << mmu_psize_defs[a_psize].shift; + } } } return 0; } +static inline unsigned long hpte_page_size(unsigned long h, unsigned long l) +{ + return __hpte_page_size(h, l, 0); +} + +static inline unsigned long hpte_base_page_size(unsigned long h, unsigned long l) +{ + return __hpte_page_size(h, l, 1); +} + static inline unsigned long hpte_rpn(unsigned long ptel, unsigned long psize) { return ((ptel & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT; diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h index 807014dde821..c2b4dcf23d03 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/mmu-hash64.h @@ -22,6 +22,7 @@ */ #include <asm/pgtable-ppc64.h> #include <asm/bug.h> +#include <asm/processor.h> /* * Segment table @@ -496,7 +497,7 @@ extern void slb_set_size(u16 size); */ struct subpage_prot_table { unsigned long maxaddr; /* only addresses < this are protected */ - unsigned int **protptrs[2]; + unsigned int **protptrs[(TASK_SIZE_USER64 >> 43)]; unsigned int *low_prot[4]; }; diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 9ea266eae33e..7e4612528546 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -277,6 +277,8 @@ n: .globl n; \ n: +#define _GLOBAL_TOC(name) _GLOBAL(name) + #define _KPROBE(n) \ .section ".kprobes.text","a"; \ .globl n; \ diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 6d59072e13a7..dda7ac4c80bd 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -400,6 +400,8 @@ static inline unsigned long __pack_fe01(unsigned int fpmode) #define cpu_relax() barrier() #endif +#define cpu_relax_lowlatency() cpu_relax() + /* Check that a certain kernel stack pointer is valid in task_struct p */ int validate_sp(unsigned long sp, struct task_struct *p, unsigned long nbytes); diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 965291b4c2fa..0c157642c2a1 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -527,6 +527,26 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check_early = __machine_check_early_realmode_p8, .platform = "power8", }, + { /* Power8 DD1: Does not support doorbell IPIs */ + .pvr_mask = 0xffffff00, + .pvr_value = 0x004d0100, + .cpu_name = "POWER8 (raw)", + .cpu_features = CPU_FTRS_POWER8_DD1, + .cpu_user_features = COMMON_USER_POWER8, + .cpu_user_features2 = COMMON_USER2_POWER8, + .mmu_features = MMU_FTRS_POWER8, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .oprofile_cpu_type = "ppc64/power8", + .oprofile_type = PPC_OPROFILE_INVALID, + .cpu_setup = __setup_cpu_power8, + .cpu_restore = __restore_cpu_power8, + .flush_tlb = __flush_tlb_power8, + .machine_check_early = __machine_check_early_realmode_p8, + .platform = "power8", + }, { /* Power8 */ .pvr_mask = 0xffff0000, .pvr_value = 0x004d0000, diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index d178834fe508..390311c0f03d 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -525,6 +525,9 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) struct ftrace_graph_ent trace; unsigned long return_hooker = (unsigned long)&return_to_handler; + if (unlikely(ftrace_graph_is_dead())) + return; + if (unlikely(atomic_read(¤t->tracing_graph_pause))) return; diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index b49c72fd7f16..b2814e23e1ed 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -123,21 +123,12 @@ resource_size_t pcibios_window_alignment(struct pci_bus *bus, void pcibios_reset_secondary_bus(struct pci_dev *dev) { - u16 ctrl; - if (ppc_md.pcibios_reset_secondary_bus) { ppc_md.pcibios_reset_secondary_bus(dev); return; } - pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &ctrl); - ctrl |= PCI_BRIDGE_CTL_BUS_RESET; - pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl); - msleep(2); - - ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET; - pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl); - ssleep(1); + pci_reset_secondary_bus(dev); } static resource_size_t pcibios_io_size(const struct pci_controller *hose) diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index 658e89d2025b..db2b482af658 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -611,17 +611,19 @@ static void rtas_flash_firmware(int reboot_type) for (f = flist; f; f = next) { /* Translate data addrs to absolute */ for (i = 0; i < f->num_blocks; i++) { - f->blocks[i].data = (char *)__pa(f->blocks[i].data); + f->blocks[i].data = (char *)cpu_to_be64(__pa(f->blocks[i].data)); image_size += f->blocks[i].length; + f->blocks[i].length = cpu_to_be64(f->blocks[i].length); } next = f->next; /* Don't translate NULL pointer for last entry */ if (f->next) - f->next = (struct flash_block_list *)__pa(f->next); + f->next = (struct flash_block_list *)cpu_to_be64(__pa(f->next)); else f->next = NULL; /* make num_blocks into the version/length field */ f->num_blocks = (FLASH_BLOCK_LIST_VERSION << 56) | ((f->num_blocks+1)*16); + f->num_blocks = cpu_to_be64(f->num_blocks); } printk(KERN_ALERT "FLASH: flash image is %ld bytes\n", image_size); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 51a3ff78838a..1007fb802e6b 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -747,7 +747,7 @@ int setup_profiling_timer(unsigned int multiplier) #ifdef CONFIG_SCHED_SMT /* cpumask of CPUs with asymetric SMT dependancy */ -static const int powerpc_smt_flags(void) +static int powerpc_smt_flags(void) { int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES; diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 80561074078d..68468d695f12 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -1562,7 +1562,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, goto out; } if (!rma_setup && is_vrma_hpte(v)) { - unsigned long psize = hpte_page_size(v, r); + unsigned long psize = hpte_base_page_size(v, r); unsigned long senc = slb_pgsize_encoding(psize); unsigned long lpcr; diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 6e6224318c36..5a24d3c2b6b8 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -814,13 +814,10 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, r = hpte[i+1]; /* - * Check the HPTE again, including large page size - * Since we don't currently allow any MPSS (mixed - * page-size segment) page sizes, it is sufficient - * to check against the actual page size. + * Check the HPTE again, including base page size */ if ((v & valid) && (v & mask) == val && - hpte_page_size(v, r) == (1ul << pshift)) + hpte_base_page_size(v, r) == (1ul << pshift)) /* Return with the HPTE still locked */ return (hash << 3) + (i >> 1); diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 868347ef09fd..558a67df8126 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -48,7 +48,7 @@ * * LR = return address to continue at after eventually re-enabling MMU */ -_GLOBAL(kvmppc_hv_entry_trampoline) +_GLOBAL_TOC(kvmppc_hv_entry_trampoline) mflr r0 std r0, PPC_LR_STKOFF(r1) stdu r1, -112(r1) diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S index e2c29e381dc7..d044b8b7c69d 100644 --- a/arch/powerpc/kvm/book3s_interrupts.S +++ b/arch/powerpc/kvm/book3s_interrupts.S @@ -25,7 +25,11 @@ #include <asm/exception-64s.h> #if defined(CONFIG_PPC_BOOK3S_64) +#if defined(_CALL_ELF) && _CALL_ELF == 2 +#define FUNC(name) name +#else #define FUNC(name) GLUE(.,name) +#endif #define GET_SHADOW_VCPU(reg) addi reg, r13, PACA_SVCPU #elif defined(CONFIG_PPC_BOOK3S_32) diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S index 9eec675220e6..16c4d88ba27d 100644 --- a/arch/powerpc/kvm/book3s_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_rmhandlers.S @@ -36,7 +36,11 @@ #if defined(CONFIG_PPC_BOOK3S_64) +#if defined(_CALL_ELF) && _CALL_ELF == 2 +#define FUNC(name) name +#else #define FUNC(name) GLUE(.,name) +#endif #elif defined(CONFIG_PPC_BOOK3S_32) @@ -146,7 +150,7 @@ kvmppc_handler_skip_ins: * On entry, r4 contains the guest shadow MSR * MSR.EE has to be 0 when calling this function */ -_GLOBAL(kvmppc_entry_trampoline) +_GLOBAL_TOC(kvmppc_entry_trampoline) mfmsr r5 LOAD_REG_ADDR(r7, kvmppc_handler_trampoline_enter) toreal(r7) diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c index edb14ba992b3..ef27fbd5d9c5 100644 --- a/arch/powerpc/kvm/book3s_rtas.c +++ b/arch/powerpc/kvm/book3s_rtas.c @@ -23,20 +23,20 @@ static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) u32 irq, server, priority; int rc; - if (args->nargs != 3 || args->nret != 1) { + if (be32_to_cpu(args->nargs) != 3 || be32_to_cpu(args->nret) != 1) { rc = -3; goto out; } - irq = args->args[0]; - server = args->args[1]; - priority = args->args[2]; + irq = be32_to_cpu(args->args[0]); + server = be32_to_cpu(args->args[1]); + priority = be32_to_cpu(args->args[2]); rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority); if (rc) rc = -3; out: - args->rets[0] = rc; + args->rets[0] = cpu_to_be32(rc); } static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) @@ -44,12 +44,12 @@ static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) u32 irq, server, priority; int rc; - if (args->nargs != 1 || args->nret != 3) { + if (be32_to_cpu(args->nargs) != 1 || be32_to_cpu(args->nret) != 3) { rc = -3; goto out; } - irq = args->args[0]; + irq = be32_to_cpu(args->args[0]); server = priority = 0; rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority); @@ -58,10 +58,10 @@ static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) goto out; } - args->rets[1] = server; - args->rets[2] = priority; + args->rets[1] = cpu_to_be32(server); + args->rets[2] = cpu_to_be32(priority); out: - args->rets[0] = rc; + args->rets[0] = cpu_to_be32(rc); } static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args) @@ -69,18 +69,18 @@ static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args) u32 irq; int rc; - if (args->nargs != 1 || args->nret != 1) { + if (be32_to_cpu(args->nargs) != 1 || be32_to_cpu(args->nret) != 1) { rc = -3; goto out; } - irq = args->args[0]; + irq = be32_to_cpu(args->args[0]); rc = kvmppc_xics_int_off(vcpu->kvm, irq); if (rc) rc = -3; out: - args->rets[0] = rc; + args->rets[0] = cpu_to_be32(rc); } static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args) @@ -88,18 +88,18 @@ static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args) u32 irq; int rc; - if (args->nargs != 1 || args->nret != 1) { + if (be32_to_cpu(args->nargs) != 1 || be32_to_cpu(args->nret) != 1) { rc = -3; goto out; } - irq = args->args[0]; + irq = be32_to_cpu(args->args[0]); rc = kvmppc_xics_int_on(vcpu->kvm, irq); if (rc) rc = -3; out: - args->rets[0] = rc; + args->rets[0] = cpu_to_be32(rc); } #endif /* CONFIG_KVM_XICS */ @@ -205,32 +205,6 @@ int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp) return rc; } -static void kvmppc_rtas_swap_endian_in(struct rtas_args *args) -{ -#ifdef __LITTLE_ENDIAN__ - int i; - - args->token = be32_to_cpu(args->token); - args->nargs = be32_to_cpu(args->nargs); - args->nret = be32_to_cpu(args->nret); - for (i = 0; i < args->nargs; i++) - args->args[i] = be32_to_cpu(args->args[i]); -#endif -} - -static void kvmppc_rtas_swap_endian_out(struct rtas_args *args) -{ -#ifdef __LITTLE_ENDIAN__ - int i; - - for (i = 0; i < args->nret; i++) - args->args[i] = cpu_to_be32(args->args[i]); - args->token = cpu_to_be32(args->token); - args->nargs = cpu_to_be32(args->nargs); - args->nret = cpu_to_be32(args->nret); -#endif -} - int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) { struct rtas_token_definition *d; @@ -249,8 +223,6 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) if (rc) goto fail; - kvmppc_rtas_swap_endian_in(&args); - /* * args->rets is a pointer into args->args. Now that we've * copied args we need to fix it up to point into our copy, @@ -258,13 +230,13 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) * value so we can restore it on the way out. */ orig_rets = args.rets; - args.rets = &args.args[args.nargs]; + args.rets = &args.args[be32_to_cpu(args.nargs)]; mutex_lock(&vcpu->kvm->lock); rc = -ENOENT; list_for_each_entry(d, &vcpu->kvm->arch.rtas_tokens, list) { - if (d->token == args.token) { + if (d->token == be32_to_cpu(args.token)) { d->handler->handler(vcpu, &args); rc = 0; break; @@ -275,7 +247,6 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) if (rc == 0) { args.rets = orig_rets; - kvmppc_rtas_swap_endian_out(&args); rc = kvm_write_guest(vcpu->kvm, args_phys, &args, sizeof(args)); if (rc) goto fail; diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index dd2cc03f406f..86903d3f5a03 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -473,7 +473,8 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, if (printk_ratelimit()) pr_err("%s: pte not present: gfn %lx, pfn %lx\n", __func__, (long)gfn, pfn); - return -EINVAL; + ret = -EINVAL; + goto out; } kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg); diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S index 0738f96befbf..43435c6892fb 100644 --- a/arch/powerpc/lib/mem_64.S +++ b/arch/powerpc/lib/mem_64.S @@ -77,7 +77,7 @@ _GLOBAL(memset) stb r4,0(r6) blr -_GLOBAL(memmove) +_GLOBAL_TOC(memmove) cmplw 0,r3,r4 bgt backwards_memcpy b memcpy diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 412dd46dd0b7..5c09f365c842 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1198,7 +1198,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) sh = regs->gpr[rb] & 0x3f; ival = (signed int) regs->gpr[rd]; regs->gpr[ra] = ival >> (sh < 32 ? sh : 31); - if (ival < 0 && (sh >= 32 || (ival & ((1 << sh) - 1)) != 0)) + if (ival < 0 && (sh >= 32 || (ival & ((1ul << sh) - 1)) != 0)) regs->xer |= XER_CA; else regs->xer &= ~XER_CA; @@ -1208,7 +1208,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) sh = rb; ival = (signed int) regs->gpr[rd]; regs->gpr[ra] = ival >> sh; - if (ival < 0 && (ival & ((1 << sh) - 1)) != 0) + if (ival < 0 && (ival & ((1ul << sh) - 1)) != 0) regs->xer |= XER_CA; else regs->xer &= ~XER_CA; @@ -1216,7 +1216,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) #ifdef __powerpc64__ case 27: /* sld */ - sh = regs->gpr[rd] & 0x7f; + sh = regs->gpr[rb] & 0x7f; if (sh < 64) regs->gpr[ra] = regs->gpr[rd] << sh; else @@ -1235,7 +1235,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) sh = regs->gpr[rb] & 0x7f; ival = (signed long int) regs->gpr[rd]; regs->gpr[ra] = ival >> (sh < 64 ? sh : 63); - if (ival < 0 && (sh >= 64 || (ival & ((1 << sh) - 1)) != 0)) + if (ival < 0 && (sh >= 64 || (ival & ((1ul << sh) - 1)) != 0)) regs->xer |= XER_CA; else regs->xer &= ~XER_CA; @@ -1246,7 +1246,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) sh = rb | ((instr & 2) << 4); ival = (signed long int) regs->gpr[rd]; regs->gpr[ra] = ival >> sh; - if (ival < 0 && (ival & ((1 << sh) - 1)) != 0) + if (ival < 0 && (ival & ((1ul << sh) - 1)) != 0) regs->xer |= XER_CA; else regs->xer &= ~XER_CA; diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 6dcdadefd8d0..82e82cadcde5 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -390,12 +390,16 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, case BPF_ANC | SKF_AD_VLAN_TAG: case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); + BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000); + PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, vlan_tci)); - if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) - PPC_ANDI(r_A, r_A, VLAN_VID_MASK); - else + if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) { + PPC_ANDI(r_A, r_A, ~VLAN_TAG_PRESENT); + } else { PPC_ANDI(r_A, r_A, VLAN_TAG_PRESENT); + PPC_SRWI(r_A, r_A, 12); + } break; case BPF_ANC | SKF_AD_QUEUE: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 6b0641c3f03f..fe52db2eea6a 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -1307,6 +1307,9 @@ static void power_pmu_enable(struct pmu *pmu) out_enable: pmao_restore_workaround(ebb); + if (ppmu->flags & PPMU_ARCH_207S) + mtspr(SPRN_MMCR2, 0); + mmcr0 = ebb_switch_in(ebb, cpuhw->mmcr[0]); mb(); @@ -1315,9 +1318,6 @@ static void power_pmu_enable(struct pmu *pmu) write_mmcr0(cpuhw, mmcr0); - if (ppmu->flags & PPMU_ARCH_207S) - mtspr(SPRN_MMCR2, 0); - /* * Enable instruction sampling if necessary */ diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index e0766b82e165..66d0f179650f 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -387,8 +387,7 @@ static int h_24x7_event_init(struct perf_event *event) event->attr.exclude_hv || event->attr.exclude_idle || event->attr.exclude_host || - event->attr.exclude_guest || - is_sampling_event(event)) /* no sampling */ + event->attr.exclude_guest) return -EINVAL; /* no branch sampling */ @@ -513,6 +512,9 @@ static int hv_24x7_init(void) if (!hv_page_cache) return -ENOMEM; + /* sampling not supported */ + h_24x7_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + r = perf_pmu_register(&h_24x7_pmu, h_24x7_pmu.name, -1); if (r) return r; diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c index c9d399a2df82..15fc76c93022 100644 --- a/arch/powerpc/perf/hv-gpci.c +++ b/arch/powerpc/perf/hv-gpci.c @@ -210,8 +210,7 @@ static int h_gpci_event_init(struct perf_event *event) event->attr.exclude_hv || event->attr.exclude_idle || event->attr.exclude_host || - event->attr.exclude_guest || - is_sampling_event(event)) /* no sampling */ + event->attr.exclude_guest) return -EINVAL; /* no branch sampling */ @@ -284,6 +283,9 @@ static int hv_gpci_init(void) return -ENODEV; } + /* sampling not supported */ + h_gpci_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + r = perf_pmu_register(&h_gpci_pmu, h_gpci_pmu.name, -1); if (r) return r; diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c index 10268c41d830..0ad533b617f7 100644 --- a/arch/powerpc/platforms/powernv/opal-elog.c +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -249,7 +249,7 @@ static void elog_work_fn(struct work_struct *work) rc = opal_get_elog_size(&id, &size, &type); if (rc != OPAL_SUCCESS) { - pr_err("ELOG: Opal log read failed\n"); + pr_err("ELOG: OPAL log info read failed\n"); return; } @@ -257,7 +257,7 @@ static void elog_work_fn(struct work_struct *work) log_id = be64_to_cpu(id); elog_type = be64_to_cpu(type); - BUG_ON(elog_size > OPAL_MAX_ERRLOG_SIZE); + WARN_ON(elog_size > OPAL_MAX_ERRLOG_SIZE); if (elog_size >= OPAL_MAX_ERRLOG_SIZE) elog_size = OPAL_MAX_ERRLOG_SIZE; diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 022b38e6a80b..2d0b4d68a40a 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -86,6 +86,7 @@ static struct device_node *dlpar_parse_cc_node(struct cc_workarea *ccwa, } of_node_set_flag(dn, OF_DYNAMIC); + of_node_init(dn); return dn; } diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c index 0435bb65d0aa..1c0a60d98867 100644 --- a/arch/powerpc/platforms/pseries/reconfig.c +++ b/arch/powerpc/platforms/pseries/reconfig.c @@ -69,6 +69,7 @@ static int pSeries_reconfig_add_node(const char *path, struct property *proplist np->properties = proplist; of_node_set_flag(np, OF_DYNAMIC); + of_node_init(np); np->parent = derive_parent(path); if (IS_ERR(np->parent)) { diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index bb63499fc5d3..f5af5f6ef0f4 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -116,7 +116,6 @@ config S390 select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER - select HAVE_FUNCTION_TRACE_MCOUNT_TEST select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_GZIP diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 4181d7baabba..773bef7614d8 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -305,7 +305,6 @@ struct kvm_s390_local_interrupt { struct list_head list; atomic_t active; struct kvm_s390_float_interrupt *float_int; - int timer_due; /* event indicator for waitqueue below */ wait_queue_head_t *wq; atomic_t *cpuflags; unsigned int action_bits; @@ -367,7 +366,6 @@ struct kvm_vcpu_arch { s390_fp_regs guest_fpregs; struct kvm_s390_local_interrupt local_int; struct hrtimer ckc_timer; - struct tasklet_struct tasklet; struct kvm_s390_pgm_info pgm; union { struct cpuid cpu_id; @@ -418,6 +416,7 @@ struct kvm_arch{ int css_support; int use_irqchip; int use_cmma; + int user_cpu_state_ctrl; struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS]; wait_queue_head_t ipte_wq; spinlock_t start_stop_lock; diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 6f02d452bbee..e568fc8a7250 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -217,7 +217,7 @@ static inline void cpu_relax(void) barrier(); } -#define arch_mutex_cpu_relax() barrier() +#define cpu_relax_lowlatency() barrier() static inline void psw_set_key(unsigned int key) { diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h index df38c70cd59e..18ea9e3f8142 100644 --- a/arch/s390/include/asm/switch_to.h +++ b/arch/s390/include/asm/switch_to.h @@ -51,8 +51,8 @@ static inline int restore_fp_ctl(u32 *fpc) return 0; asm volatile( - "0: lfpc %1\n" - " la %0,0\n" + " lfpc %1\n" + "0: la %0,0\n" "1:\n" EX_TABLE(0b,1b) : "=d" (rc) : "Q" (*fpc), "0" (-EINVAL)); diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild index 736637363d31..08fe6dad9026 100644 --- a/arch/s390/include/uapi/asm/Kbuild +++ b/arch/s390/include/uapi/asm/Kbuild @@ -16,6 +16,7 @@ header-y += ioctls.h header-y += ipcbuf.h header-y += kvm.h header-y += kvm_para.h +header-y += kvm_perf.h header-y += kvm_virtio.h header-y += mman.h header-y += monwriter.h diff --git a/arch/s390/include/uapi/asm/kvm_perf.h b/arch/s390/include/uapi/asm/kvm_perf.h new file mode 100644 index 000000000000..397282727e21 --- /dev/null +++ b/arch/s390/include/uapi/asm/kvm_perf.h @@ -0,0 +1,25 @@ +/* + * Definitions for perf-kvm on s390 + * + * Copyright 2014 IBM Corp. + * Author(s): Alexander Yarygin <yarygin@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ + +#ifndef __LINUX_KVM_PERF_S390_H +#define __LINUX_KVM_PERF_S390_H + +#include <asm/sie.h> + +#define DECODE_STR_LEN 40 + +#define VCPU_ID "id" + +#define KVM_ENTRY_TRACE "kvm:kvm_s390_sie_enter" +#define KVM_EXIT_TRACE "kvm:kvm_s390_sie_exit" +#define KVM_EXIT_REASON "icptcode" + +#endif diff --git a/arch/s390/include/uapi/asm/sie.h b/arch/s390/include/uapi/asm/sie.h index 5d9cc19462c4..d4096fdfc6ab 100644 --- a/arch/s390/include/uapi/asm/sie.h +++ b/arch/s390/include/uapi/asm/sie.h @@ -108,6 +108,7 @@ exit_code_ipa0(0xB2, 0x17, "STETR"), \ exit_code_ipa0(0xB2, 0x18, "PC"), \ exit_code_ipa0(0xB2, 0x20, "SERVC"), \ + exit_code_ipa0(0xB2, 0x21, "IPTE"), \ exit_code_ipa0(0xB2, 0x28, "PT"), \ exit_code_ipa0(0xB2, 0x29, "ISKE"), \ exit_code_ipa0(0xB2, 0x2a, "RRBE"), \ diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index 7ba7d6784510..e88d35d74950 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -437,11 +437,11 @@ ENTRY(startup_kdump) #if defined(CONFIG_64BIT) #if defined(CONFIG_MARCH_ZEC12) - .long 3, 0xc100efea, 0xf46ce800, 0x00400000 + .long 3, 0xc100eff2, 0xf46ce800, 0x00400000 #elif defined(CONFIG_MARCH_Z196) - .long 2, 0xc100efea, 0xf46c0000 + .long 2, 0xc100eff2, 0xf46c0000 #elif defined(CONFIG_MARCH_Z10) - .long 2, 0xc100efea, 0xf0680000 + .long 2, 0xc100eff2, 0xf0680000 #elif defined(CONFIG_MARCH_Z9_109) .long 1, 0xc100efc2 #elif defined(CONFIG_MARCH_Z990) diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 08dcf21cb8df..433c6dbfa442 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -21,13 +21,9 @@ ENTRY(_mcount) ENTRY(ftrace_caller) #endif stm %r2,%r5,16(%r15) - bras %r1,2f + bras %r1,1f 0: .long ftrace_trace_function -1: .long function_trace_stop -2: l %r2,1b-0b(%r1) - icm %r2,0xf,0(%r2) - jnz 3f - st %r14,56(%r15) +1: st %r14,56(%r15) lr %r0,%r15 ahi %r15,-96 l %r3,100(%r15) @@ -50,7 +46,7 @@ ENTRY(ftrace_graph_caller) #endif ahi %r15,96 l %r14,56(%r15) -3: lm %r2,%r5,16(%r15) + lm %r2,%r5,16(%r15) br %r14 #ifdef CONFIG_FUNCTION_GRAPH_TRACER diff --git a/arch/s390/kernel/mcount64.S b/arch/s390/kernel/mcount64.S index 1c52eae3396a..c67a8bf0fd9a 100644 --- a/arch/s390/kernel/mcount64.S +++ b/arch/s390/kernel/mcount64.S @@ -20,9 +20,6 @@ ENTRY(_mcount) ENTRY(ftrace_caller) #endif - larl %r1,function_trace_stop - icm %r1,0xf,0(%r1) - bnzr %r14 stmg %r2,%r5,32(%r15) stg %r14,112(%r15) lgr %r1,%r15 diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index ea75d011a6fc..d3194de7ae1e 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -411,12 +411,6 @@ static int cpumf_pmu_event_init(struct perf_event *event) case PERF_TYPE_HARDWARE: case PERF_TYPE_HW_CACHE: case PERF_TYPE_RAW: - /* The CPU measurement counter facility does not have overflow - * interrupts to do sampling. Sampling must be provided by - * external means, for example, by timers. - */ - if (is_sampling_event(event)) - return -ENOENT; err = __hw_perf_event_init(event); break; default: @@ -681,6 +675,12 @@ static int __init cpumf_pmu_init(void) goto out; } + /* The CPU measurement counter facility does not have overflow + * interrupts to do sampling. Sampling must be provided by + * external means, for example, by timers. + */ + cpumf_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + cpumf_pmu.attr_groups = cpumf_cf_event_group(); rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW); if (rc) { diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 2d716734b5b1..5dc7ad9e2fbf 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -334,9 +334,14 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) unsigned long mask = PSW_MASK_USER; mask |= is_ri_task(child) ? PSW_MASK_RI : 0; - if ((data & ~mask) != PSW_USER_BITS) + if ((data ^ PSW_USER_BITS) & ~mask) + /* Invalid psw mask. */ + return -EINVAL; + if ((data & PSW_MASK_ASC) == PSW_ASC_HOME) + /* Invalid address-space-control bits */ return -EINVAL; if ((data & PSW_MASK_EA) && !(data & PSW_MASK_BA)) + /* Invalid addressing mode bits */ return -EINVAL; } *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr) = data; @@ -672,9 +677,12 @@ static int __poke_user_compat(struct task_struct *child, mask |= is_ri_task(child) ? PSW32_MASK_RI : 0; /* Build a 64 bit psw mask from 31 bit mask. */ - if ((tmp & ~mask) != PSW32_USER_BITS) + if ((tmp ^ PSW32_USER_BITS) & ~mask) /* Invalid psw mask. */ return -EINVAL; + if ((data & PSW32_MASK_ASC) == PSW32_ASC_HOME) + /* Invalid address-space-control bits */ + return -EINVAL; regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) | (regs->psw.mask & PSW_MASK_BA) | (__u64)(tmp & mask) << 32; diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 0161675878a2..59bd8f991b98 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -176,7 +176,8 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; } - kvm_s390_vcpu_stop(vcpu); + if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) + kvm_s390_vcpu_stop(vcpu); vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM; vcpu->run->s390_reset_flags |= KVM_S390_RESET_IPL; vcpu->run->s390_reset_flags |= KVM_S390_RESET_CPU_INIT; diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index a0b586c1913c..eaf46291d361 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -56,32 +56,26 @@ static int handle_noop(struct kvm_vcpu *vcpu) static int handle_stop(struct kvm_vcpu *vcpu) { int rc = 0; + unsigned int action_bits; vcpu->stat.exit_stop_request++; - spin_lock_bh(&vcpu->arch.local_int.lock); - trace_kvm_s390_stop_request(vcpu->arch.local_int.action_bits); - if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) { - kvm_s390_vcpu_stop(vcpu); - vcpu->arch.local_int.action_bits &= ~ACTION_STOP_ON_STOP; - VCPU_EVENT(vcpu, 3, "%s", "cpu stopped"); - rc = -EOPNOTSUPP; - } + action_bits = vcpu->arch.local_int.action_bits; - if (vcpu->arch.local_int.action_bits & ACTION_STORE_ON_STOP) { - vcpu->arch.local_int.action_bits &= ~ACTION_STORE_ON_STOP; - /* store status must be called unlocked. Since local_int.lock - * only protects local_int.* and not guest memory we can give - * up the lock here */ - spin_unlock_bh(&vcpu->arch.local_int.lock); + if (!(action_bits & ACTION_STOP_ON_STOP)) + return 0; + + if (action_bits & ACTION_STORE_ON_STOP) { rc = kvm_s390_vcpu_store_status(vcpu, KVM_S390_STORE_STATUS_NOADDR); - if (rc >= 0) - rc = -EOPNOTSUPP; - } else - spin_unlock_bh(&vcpu->arch.local_int.lock); - return rc; + if (rc) + return rc; + } + + if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) + kvm_s390_vcpu_stop(vcpu); + return -EOPNOTSUPP; } static int handle_validity(struct kvm_vcpu *vcpu) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 90c8de22a2a0..92528a0bdda6 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -158,6 +158,9 @@ static void __reset_intercept_indicators(struct kvm_vcpu *vcpu) LCTL_CR10 | LCTL_CR11); vcpu->arch.sie_block->ictl |= (ICTL_STCTL | ICTL_PINT); } + + if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) + atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags); } static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag) @@ -544,13 +547,13 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) int rc = 0; if (atomic_read(&li->active)) { - spin_lock_bh(&li->lock); + spin_lock(&li->lock); list_for_each_entry(inti, &li->list, list) if (__interrupt_is_deliverable(vcpu, inti)) { rc = 1; break; } - spin_unlock_bh(&li->lock); + spin_unlock(&li->lock); } if ((!rc) && atomic_read(&fi->active)) { @@ -585,88 +588,56 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) { u64 now, sltime; - DECLARE_WAITQUEUE(wait, current); vcpu->stat.exit_wait_state++; - if (kvm_cpu_has_interrupt(vcpu)) - return 0; - __set_cpu_idle(vcpu); - spin_lock_bh(&vcpu->arch.local_int.lock); - vcpu->arch.local_int.timer_due = 0; - spin_unlock_bh(&vcpu->arch.local_int.lock); + /* fast path */ + if (kvm_cpu_has_pending_timer(vcpu) || kvm_arch_vcpu_runnable(vcpu)) + return 0; if (psw_interrupts_disabled(vcpu)) { VCPU_EVENT(vcpu, 3, "%s", "disabled wait"); - __unset_cpu_idle(vcpu); return -EOPNOTSUPP; /* disabled wait */ } + __set_cpu_idle(vcpu); if (!ckc_interrupts_enabled(vcpu)) { VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer"); goto no_timer; } now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch; - if (vcpu->arch.sie_block->ckc < now) { - __unset_cpu_idle(vcpu); - return 0; - } - sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); - hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL); VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime); no_timer: srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); - spin_lock(&vcpu->arch.local_int.float_int->lock); - spin_lock_bh(&vcpu->arch.local_int.lock); - add_wait_queue(&vcpu->wq, &wait); - while (list_empty(&vcpu->arch.local_int.list) && - list_empty(&vcpu->arch.local_int.float_int->list) && - (!vcpu->arch.local_int.timer_due) && - !signal_pending(current) && - !kvm_s390_si_ext_call_pending(vcpu)) { - set_current_state(TASK_INTERRUPTIBLE); - spin_unlock_bh(&vcpu->arch.local_int.lock); - spin_unlock(&vcpu->arch.local_int.float_int->lock); - schedule(); - spin_lock(&vcpu->arch.local_int.float_int->lock); - spin_lock_bh(&vcpu->arch.local_int.lock); - } + kvm_vcpu_block(vcpu); __unset_cpu_idle(vcpu); - __set_current_state(TASK_RUNNING); - remove_wait_queue(&vcpu->wq, &wait); - spin_unlock_bh(&vcpu->arch.local_int.lock); - spin_unlock(&vcpu->arch.local_int.float_int->lock); vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); hrtimer_try_to_cancel(&vcpu->arch.ckc_timer); return 0; } -void kvm_s390_tasklet(unsigned long parm) +void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu) { - struct kvm_vcpu *vcpu = (struct kvm_vcpu *) parm; - - spin_lock(&vcpu->arch.local_int.lock); - vcpu->arch.local_int.timer_due = 1; - if (waitqueue_active(&vcpu->wq)) + if (waitqueue_active(&vcpu->wq)) { + /* + * The vcpu gave up the cpu voluntarily, mark it as a good + * yield-candidate. + */ + vcpu->preempted = true; wake_up_interruptible(&vcpu->wq); - spin_unlock(&vcpu->arch.local_int.lock); + } } -/* - * low level hrtimer wake routine. Because this runs in hardirq context - * we schedule a tasklet to do the real work. - */ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer) { struct kvm_vcpu *vcpu; vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer); - vcpu->preempted = true; - tasklet_schedule(&vcpu->arch.tasklet); + kvm_s390_vcpu_wakeup(vcpu); return HRTIMER_NORESTART; } @@ -676,13 +647,13 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu) struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; struct kvm_s390_interrupt_info *n, *inti = NULL; - spin_lock_bh(&li->lock); + spin_lock(&li->lock); list_for_each_entry_safe(inti, n, &li->list, list) { list_del(&inti->list); kfree(inti); } atomic_set(&li->active, 0); - spin_unlock_bh(&li->lock); + spin_unlock(&li->lock); /* clear pending external calls set by sigp interpretation facility */ atomic_clear_mask(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags); @@ -701,7 +672,7 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) if (atomic_read(&li->active)) { do { deliver = 0; - spin_lock_bh(&li->lock); + spin_lock(&li->lock); list_for_each_entry_safe(inti, n, &li->list, list) { if (__interrupt_is_deliverable(vcpu, inti)) { list_del(&inti->list); @@ -712,7 +683,7 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) } if (list_empty(&li->list)) atomic_set(&li->active, 0); - spin_unlock_bh(&li->lock); + spin_unlock(&li->lock); if (deliver) { __do_deliver_interrupt(vcpu, inti); kfree(inti); @@ -758,7 +729,7 @@ void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu) if (atomic_read(&li->active)) { do { deliver = 0; - spin_lock_bh(&li->lock); + spin_lock(&li->lock); list_for_each_entry_safe(inti, n, &li->list, list) { if ((inti->type == KVM_S390_MCHK) && __interrupt_is_deliverable(vcpu, inti)) { @@ -770,7 +741,7 @@ void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu) } if (list_empty(&li->list)) atomic_set(&li->active, 0); - spin_unlock_bh(&li->lock); + spin_unlock(&li->lock); if (deliver) { __do_deliver_interrupt(vcpu, inti); kfree(inti); @@ -817,11 +788,11 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code); trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, inti->type, code, 0, 1); - spin_lock_bh(&li->lock); + spin_lock(&li->lock); list_add(&inti->list, &li->list); atomic_set(&li->active, 1); BUG_ON(waitqueue_active(li->wq)); - spin_unlock_bh(&li->lock); + spin_unlock(&li->lock); return 0; } @@ -842,11 +813,11 @@ int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu, inti->type = KVM_S390_PROGRAM_INT; memcpy(&inti->pgm, pgm_info, sizeof(inti->pgm)); - spin_lock_bh(&li->lock); + spin_lock(&li->lock); list_add(&inti->list, &li->list); atomic_set(&li->active, 1); BUG_ON(waitqueue_active(li->wq)); - spin_unlock_bh(&li->lock); + spin_unlock(&li->lock); return 0; } @@ -934,12 +905,10 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) } dst_vcpu = kvm_get_vcpu(kvm, sigcpu); li = &dst_vcpu->arch.local_int; - spin_lock_bh(&li->lock); + spin_lock(&li->lock); atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); - if (waitqueue_active(li->wq)) - wake_up_interruptible(li->wq); - kvm_get_vcpu(kvm, sigcpu)->preempted = true; - spin_unlock_bh(&li->lock); + spin_unlock(&li->lock); + kvm_s390_vcpu_wakeup(kvm_get_vcpu(kvm, sigcpu)); unlock_fi: spin_unlock(&fi->lock); mutex_unlock(&kvm->lock); @@ -1081,7 +1050,7 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, mutex_lock(&vcpu->kvm->lock); li = &vcpu->arch.local_int; - spin_lock_bh(&li->lock); + spin_lock(&li->lock); if (inti->type == KVM_S390_PROGRAM_INT) list_add(&inti->list, &li->list); else @@ -1090,11 +1059,9 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, if (inti->type == KVM_S390_SIGP_STOP) li->action_bits |= ACTION_STOP_ON_STOP; atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); - if (waitqueue_active(&vcpu->wq)) - wake_up_interruptible(&vcpu->wq); - vcpu->preempted = true; - spin_unlock_bh(&li->lock); + spin_unlock(&li->lock); mutex_unlock(&vcpu->kvm->lock); + kvm_s390_vcpu_wakeup(vcpu); return 0; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 2f3e14fe91a4..339b34a02fb8 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -166,7 +166,9 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_IOEVENTFD: case KVM_CAP_DEVICE_CTRL: case KVM_CAP_ENABLE_CAP_VM: + case KVM_CAP_S390_IRQCHIP: case KVM_CAP_VM_ATTRIBUTES: + case KVM_CAP_MP_STATE: r = 1; break; case KVM_CAP_NR_VCPUS: @@ -595,7 +597,8 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->pp = 0; vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; kvm_clear_async_pf_completion_queue(vcpu); - kvm_s390_vcpu_stop(vcpu); + if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) + kvm_s390_vcpu_stop(vcpu); kvm_s390_clear_local_irqs(vcpu); } @@ -647,8 +650,6 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) return rc; } hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); - tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet, - (unsigned long) vcpu); vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; get_cpu_id(&vcpu->arch.cpu_id); vcpu->arch.cpu_id.version = 0xff; @@ -926,7 +927,7 @@ static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) { int rc = 0; - if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED)) + if (!is_vcpu_stopped(vcpu)) rc = -EBUSY; else { vcpu->run->psw_mask = psw.mask; @@ -980,13 +981,34 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { - return -EINVAL; /* not implemented yet */ + /* CHECK_STOP and LOAD are not supported yet */ + return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED : + KVM_MP_STATE_OPERATING; } int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { - return -EINVAL; /* not implemented yet */ + int rc = 0; + + /* user space knows about this interface - let it control the state */ + vcpu->kvm->arch.user_cpu_state_ctrl = 1; + + switch (mp_state->mp_state) { + case KVM_MP_STATE_STOPPED: + kvm_s390_vcpu_stop(vcpu); + break; + case KVM_MP_STATE_OPERATING: + kvm_s390_vcpu_start(vcpu); + break; + case KVM_MP_STATE_LOAD: + case KVM_MP_STATE_CHECK_STOP: + /* fall through - CHECK_STOP and LOAD are not supported yet */ + default: + rc = -ENXIO; + } + + return rc; } bool kvm_s390_cmma_enabled(struct kvm *kvm) @@ -1045,6 +1067,9 @@ retry: goto retry; } + /* nothing to do, just clear the request */ + clear_bit(KVM_REQ_UNHALT, &vcpu->requests); + return 0; } @@ -1284,7 +1309,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); - kvm_s390_vcpu_start(vcpu); + if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) { + kvm_s390_vcpu_start(vcpu); + } else if (is_vcpu_stopped(vcpu)) { + pr_err_ratelimited("kvm-s390: can't run stopped vcpu %d\n", + vcpu->vcpu_id); + return -EINVAL; + } switch (kvm_run->exit_reason) { case KVM_EXIT_S390_SIEIC: @@ -1413,11 +1444,6 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) return kvm_s390_store_status_unloaded(vcpu, addr); } -static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu) -{ - return atomic_read(&(vcpu)->arch.sie_block->cpuflags) & CPUSTAT_STOPPED; -} - static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu) { kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu); @@ -1451,7 +1477,7 @@ void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu) trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1); /* Only one cpu at a time may enter/leave the STOPPED state. */ - spin_lock_bh(&vcpu->kvm->arch.start_stop_lock); + spin_lock(&vcpu->kvm->arch.start_stop_lock); online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); for (i = 0; i < online_vcpus; i++) { @@ -1477,7 +1503,7 @@ void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu) * Let's play safe and flush the VCPU at startup. */ vcpu->arch.sie_block->ihcpu = 0xffff; - spin_unlock_bh(&vcpu->kvm->arch.start_stop_lock); + spin_unlock(&vcpu->kvm->arch.start_stop_lock); return; } @@ -1491,10 +1517,18 @@ void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu) trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0); /* Only one cpu at a time may enter/leave the STOPPED state. */ - spin_lock_bh(&vcpu->kvm->arch.start_stop_lock); + spin_lock(&vcpu->kvm->arch.start_stop_lock); online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); + /* Need to lock access to action_bits to avoid a SIGP race condition */ + spin_lock(&vcpu->arch.local_int.lock); atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); + + /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */ + vcpu->arch.local_int.action_bits &= + ~(ACTION_STOP_ON_STOP | ACTION_STORE_ON_STOP); + spin_unlock(&vcpu->arch.local_int.lock); + __disable_ibs_on_vcpu(vcpu); for (i = 0; i < online_vcpus; i++) { @@ -1512,7 +1546,7 @@ void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu) __enable_ibs_on_vcpu(started_vcpu); } - spin_unlock_bh(&vcpu->kvm->arch.start_stop_lock); + spin_unlock(&vcpu->kvm->arch.start_stop_lock); return; } diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index a8655ed31616..3862fa2cefe0 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -45,9 +45,9 @@ do { \ d_args); \ } while (0) -static inline int __cpu_is_stopped(struct kvm_vcpu *vcpu) +static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu) { - return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOP_INT; + return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED; } static inline int kvm_is_ucontrol(struct kvm *kvm) @@ -129,9 +129,15 @@ static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc) vcpu->arch.sie_block->gpsw.mask |= cc << 44; } +/* are cpu states controlled by user space */ +static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm) +{ + return kvm->arch.user_cpu_state_ctrl != 0; +} + int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); +void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu); enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); -void kvm_s390_tasklet(unsigned long parm); void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu); void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu); diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 43079a48cc98..cf243ba3d50f 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -125,8 +125,9 @@ static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr) return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED; } -static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action) +static int __inject_sigp_stop(struct kvm_vcpu *dst_vcpu, int action) { + struct kvm_s390_local_interrupt *li = &dst_vcpu->arch.local_int; struct kvm_s390_interrupt_info *inti; int rc = SIGP_CC_ORDER_CODE_ACCEPTED; @@ -135,7 +136,13 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action) return -ENOMEM; inti->type = KVM_S390_SIGP_STOP; - spin_lock_bh(&li->lock); + spin_lock(&li->lock); + if (li->action_bits & ACTION_STOP_ON_STOP) { + /* another SIGP STOP is pending */ + kfree(inti); + rc = SIGP_CC_BUSY; + goto out; + } if ((atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) { kfree(inti); if ((action & ACTION_STORE_ON_STOP) != 0) @@ -144,19 +151,17 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action) } list_add_tail(&inti->list, &li->list); atomic_set(&li->active, 1); - atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags); li->action_bits |= action; - if (waitqueue_active(li->wq)) - wake_up_interruptible(li->wq); + atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags); + kvm_s390_vcpu_wakeup(dst_vcpu); out: - spin_unlock_bh(&li->lock); + spin_unlock(&li->lock); return rc; } static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action) { - struct kvm_s390_local_interrupt *li; struct kvm_vcpu *dst_vcpu = NULL; int rc; @@ -166,9 +171,8 @@ static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action) dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); if (!dst_vcpu) return SIGP_CC_NOT_OPERATIONAL; - li = &dst_vcpu->arch.local_int; - rc = __inject_sigp_stop(li, action); + rc = __inject_sigp_stop(dst_vcpu, action); VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr); @@ -238,7 +242,7 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, if (!inti) return SIGP_CC_BUSY; - spin_lock_bh(&li->lock); + spin_lock(&li->lock); /* cpu must be in stopped state */ if (!(atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) { *reg &= 0xffffffff00000000UL; @@ -253,13 +257,12 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, list_add_tail(&inti->list, &li->list); atomic_set(&li->active, 1); - if (waitqueue_active(li->wq)) - wake_up_interruptible(li->wq); + kvm_s390_vcpu_wakeup(dst_vcpu); rc = SIGP_CC_ORDER_CODE_ACCEPTED; VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address); out_li: - spin_unlock_bh(&li->lock); + spin_unlock(&li->lock); return rc; } @@ -275,9 +278,9 @@ static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, u16 cpu_id, if (!dst_vcpu) return SIGP_CC_NOT_OPERATIONAL; - spin_lock_bh(&dst_vcpu->arch.local_int.lock); + spin_lock(&dst_vcpu->arch.local_int.lock); flags = atomic_read(dst_vcpu->arch.local_int.cpuflags); - spin_unlock_bh(&dst_vcpu->arch.local_int.lock); + spin_unlock(&dst_vcpu->arch.local_int.lock); if (!(flags & CPUSTAT_STOPPED)) { *reg &= 0xffffffff00000000UL; *reg |= SIGP_STATUS_INCORRECT_STATE; @@ -338,10 +341,10 @@ static int sigp_check_callable(struct kvm_vcpu *vcpu, u16 cpu_addr) if (!dst_vcpu) return SIGP_CC_NOT_OPERATIONAL; li = &dst_vcpu->arch.local_int; - spin_lock_bh(&li->lock); + spin_lock(&li->lock); if (li->action_bits & ACTION_STOP_ON_STOP) rc = SIGP_CC_BUSY; - spin_unlock_bh(&li->lock); + spin_unlock(&li->lock); return rc; } @@ -461,12 +464,7 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu) dest_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); BUG_ON(dest_vcpu == NULL); - spin_lock_bh(&dest_vcpu->arch.local_int.lock); - if (waitqueue_active(&dest_vcpu->wq)) - wake_up_interruptible(&dest_vcpu->wq); - dest_vcpu->preempted = true; - spin_unlock_bh(&dest_vcpu->arch.local_int.lock); - + kvm_s390_vcpu_wakeup(dest_vcpu); kvm_s390_set_psw_cc(vcpu, SIGP_CC_ORDER_CODE_ACCEPTED); return 0; } diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 9ddc51eeb8d6..30de42730b2f 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -48,13 +48,10 @@ static LIST_HEAD(zpci_list); static DEFINE_SPINLOCK(zpci_list_lock); -static void zpci_enable_irq(struct irq_data *data); -static void zpci_disable_irq(struct irq_data *data); - static struct irq_chip zpci_irq_chip = { .name = "zPCI", - .irq_unmask = zpci_enable_irq, - .irq_mask = zpci_disable_irq, + .irq_unmask = unmask_msi_irq, + .irq_mask = mask_msi_irq, }; static DECLARE_BITMAP(zpci_domain, ZPCI_NR_DEVICES); @@ -244,43 +241,6 @@ static int zpci_cfg_store(struct zpci_dev *zdev, int offset, u32 val, u8 len) return rc; } -static int zpci_msi_set_mask_bits(struct msi_desc *msi, u32 mask, u32 flag) -{ - int offset, pos; - u32 mask_bits; - - if (msi->msi_attrib.is_msix) { - offset = msi->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_VECTOR_CTRL; - msi->masked = readl(msi->mask_base + offset); - writel(flag, msi->mask_base + offset); - } else if (msi->msi_attrib.maskbit) { - pos = (long) msi->mask_base; - pci_read_config_dword(msi->dev, pos, &mask_bits); - mask_bits &= ~(mask); - mask_bits |= flag & mask; - pci_write_config_dword(msi->dev, pos, mask_bits); - } else - return 0; - - msi->msi_attrib.maskbit = !!flag; - return 1; -} - -static void zpci_enable_irq(struct irq_data *data) -{ - struct msi_desc *msi = irq_get_msi_desc(data->irq); - - zpci_msi_set_mask_bits(msi, 1, 0); -} - -static void zpci_disable_irq(struct irq_data *data) -{ - struct msi_desc *msi = irq_get_msi_desc(data->irq); - - zpci_msi_set_mask_bits(msi, 1, 1); -} - void pcibios_fixup_bus(struct pci_bus *bus) { } @@ -487,7 +447,10 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev) /* Release MSI interrupts */ list_for_each_entry(msi, &pdev->msi_list, list) { - zpci_msi_set_mask_bits(msi, 1, 1); + if (msi->msi_attrib.is_msix) + default_msix_mask_irq(msi, 1); + else + default_msi_mask_irq(msi, 1, 1); irq_set_msi_desc(msi->irq, NULL); irq_free_desc(msi->irq); msi->msg.address_lo = 0; diff --git a/arch/score/include/asm/processor.h b/arch/score/include/asm/processor.h index d9a922d8711b..851f441991d2 100644 --- a/arch/score/include/asm/processor.h +++ b/arch/score/include/asm/processor.h @@ -24,6 +24,7 @@ extern unsigned long get_wchan(struct task_struct *p); #define current_text_addr() ({ __label__ _l; _l: &&_l; }) #define cpu_relax() barrier() +#define cpu_relax_lowlatency() cpu_relax() #define release_thread(thread) do {} while (0) /* diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 834b67c4db5a..aa2df3eaeb29 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -57,7 +57,6 @@ config SUPERH32 select HAVE_FUNCTION_TRACER select HAVE_FTRACE_MCOUNT_RECORD select HAVE_DYNAMIC_FTRACE - select HAVE_FUNCTION_TRACE_MCOUNT_TEST select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE select ARCH_WANT_IPC_PARSE_VERSION select HAVE_FUNCTION_GRAPH_TRACER diff --git a/arch/sh/Makefile b/arch/sh/Makefile index d4d16e4be07c..bf5b3f5f4962 100644 --- a/arch/sh/Makefile +++ b/arch/sh/Makefile @@ -32,7 +32,8 @@ endif cflags-$(CONFIG_CPU_SH2) := $(call cc-option,-m2,) cflags-$(CONFIG_CPU_SH2A) += $(call cc-option,-m2a,) \ - $(call cc-option,-m2a-nofpu,) + $(call cc-option,-m2a-nofpu,) \ + $(call cc-option,-m4-nofpu,) cflags-$(CONFIG_CPU_SH3) := $(call cc-option,-m3,) cflags-$(CONFIG_CPU_SH4) := $(call cc-option,-m4,) \ $(call cc-option,-mno-implicit-fp,-m4-nofpu) diff --git a/arch/sh/include/asm/processor.h b/arch/sh/include/asm/processor.h index 5448f9bbf4ab..1506897648aa 100644 --- a/arch/sh/include/asm/processor.h +++ b/arch/sh/include/asm/processor.h @@ -97,6 +97,7 @@ extern struct sh_cpuinfo cpu_data[]; #define cpu_sleep() __asm__ __volatile__ ("sleep" : : : "memory") #define cpu_relax() barrier() +#define cpu_relax_lowlatency() cpu_relax() void default_idle(void); void stop_this_cpu(void *); diff --git a/arch/sh/kernel/ftrace.c b/arch/sh/kernel/ftrace.c index 3c74f53db6db..079d70e6d74b 100644 --- a/arch/sh/kernel/ftrace.c +++ b/arch/sh/kernel/ftrace.c @@ -344,6 +344,9 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) struct ftrace_graph_ent trace; unsigned long return_hooker = (unsigned long)&return_to_handler; + if (unlikely(ftrace_graph_is_dead())) + return; + if (unlikely(atomic_read(¤t->tracing_graph_pause))) return; diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c index 02331672b6db..7cfd7f153966 100644 --- a/arch/sh/kernel/perf_event.c +++ b/arch/sh/kernel/perf_event.c @@ -129,14 +129,6 @@ static int __hw_perf_event_init(struct perf_event *event) return -ENODEV; /* - * All of the on-chip counters are "limited", in that they have - * no interrupts, and are therefore unable to do sampling without - * further work and timer assistance. - */ - if (hwc->sample_period) - return -EINVAL; - - /* * See if we need to reserve the counter. * * If no events are currently in use, then we have to take a @@ -392,6 +384,13 @@ int register_sh_pmu(struct sh_pmu *_pmu) pr_info("Performance Events: %s support registered\n", _pmu->name); + /* + * All of the on-chip counters are "limited", in that they have + * no interrupts, and are therefore unable to do sampling without + * further work and timer assistance. + */ + pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + WARN_ON(_pmu->num_events > MAX_HWEVENTS); perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); diff --git a/arch/sh/lib/mcount.S b/arch/sh/lib/mcount.S index 52aa2011d753..7a8572f9d58b 100644 --- a/arch/sh/lib/mcount.S +++ b/arch/sh/lib/mcount.S @@ -92,13 +92,6 @@ mcount: rts nop #else -#ifndef CONFIG_DYNAMIC_FTRACE - mov.l .Lfunction_trace_stop, r0 - mov.l @r0, r0 - tst r0, r0 - bf ftrace_stub -#endif - MCOUNT_ENTER() #ifdef CONFIG_DYNAMIC_FTRACE @@ -174,11 +167,6 @@ ftrace_graph_call: .globl ftrace_caller ftrace_caller: - mov.l .Lfunction_trace_stop, r0 - mov.l @r0, r0 - tst r0, r0 - bf ftrace_stub - MCOUNT_ENTER() .globl ftrace_call @@ -196,8 +184,6 @@ ftrace_call: #endif /* CONFIG_DYNAMIC_FTRACE */ .align 2 -.Lfunction_trace_stop: - .long function_trace_stop /* * NOTE: From here on the locations of the .Lftrace_stub label and @@ -217,12 +203,7 @@ ftrace_stub: #ifdef CONFIG_FUNCTION_GRAPH_TRACER .globl ftrace_graph_caller ftrace_graph_caller: - mov.l 2f, r0 - mov.l @r0, r0 - tst r0, r0 - bt 1f - - mov.l 3f, r1 + mov.l 2f, r1 jmp @r1 nop 1: @@ -242,8 +223,7 @@ ftrace_graph_caller: MCOUNT_LEAVE() .align 2 -2: .long function_trace_stop -3: .long skip_trace +2: .long skip_trace .Lprepare_ftrace_return: .long prepare_ftrace_return diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 29f2e988c56a..4692c90936f1 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -55,7 +55,6 @@ config SPARC64 select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_GRAPH_FP_TEST - select HAVE_FUNCTION_TRACE_MCOUNT_TEST select HAVE_KRETPROBES select HAVE_KPROBES select HAVE_RCU_TABLE_FREE if SMP @@ -78,6 +77,7 @@ config SPARC64 select HAVE_C_RECORDMCOUNT select NO_BOOTMEM select HAVE_ARCH_AUDITSYSCALL + select ARCH_SUPPORTS_ATOMIC_RMW config ARCH_DEFCONFIG string diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h index a564817bbc2e..812fd08f3e62 100644 --- a/arch/sparc/include/asm/processor_32.h +++ b/arch/sparc/include/asm/processor_32.h @@ -119,6 +119,8 @@ extern struct task_struct *last_task_used_math; int do_mathemu(struct pt_regs *regs, struct task_struct *fpt); #define cpu_relax() barrier() +#define cpu_relax_lowlatency() cpu_relax() + extern void (*sparc_idle)(void); #endif diff --git a/arch/sparc/include/asm/processor_64.h b/arch/sparc/include/asm/processor_64.h index 7028fe1a7c04..6924bdefe148 100644 --- a/arch/sparc/include/asm/processor_64.h +++ b/arch/sparc/include/asm/processor_64.h @@ -216,6 +216,7 @@ unsigned long get_wchan(struct task_struct *task); "nop\n\t" \ ".previous" \ ::: "memory") +#define cpu_relax_lowlatency() cpu_relax() /* Prefetch support. This is tuned for UltraSPARC-III and later. * UltraSPARC-I will treat these as nops, and UltraSPARC-II has diff --git a/arch/sparc/include/uapi/asm/unistd.h b/arch/sparc/include/uapi/asm/unistd.h index b73274fb961a..42f2bca1d338 100644 --- a/arch/sparc/include/uapi/asm/unistd.h +++ b/arch/sparc/include/uapi/asm/unistd.h @@ -410,8 +410,9 @@ #define __NR_finit_module 342 #define __NR_sched_setattr 343 #define __NR_sched_getattr 344 +#define __NR_renameat2 345 -#define NR_syscalls 345 +#define NR_syscalls 346 /* Bitmask values returned from kern_features system call. */ #define KERN_FEATURE_MIXED_MODE_STACK 0x00000001 diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S index d066eb18650c..f834224208ed 100644 --- a/arch/sparc/kernel/sys32.S +++ b/arch/sparc/kernel/sys32.S @@ -48,6 +48,7 @@ SIGN1(sys32_futex, compat_sys_futex, %o1) SIGN1(sys32_recvfrom, compat_sys_recvfrom, %o0) SIGN1(sys32_recvmsg, compat_sys_recvmsg, %o0) SIGN1(sys32_sendmsg, compat_sys_sendmsg, %o0) +SIGN2(sys32_renameat2, sys_renameat2, %o0, %o2) .globl sys32_mmap2 sys32_mmap2: diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S index 151ace8766cc..85fe9b1087cd 100644 --- a/arch/sparc/kernel/systbls_32.S +++ b/arch/sparc/kernel/systbls_32.S @@ -86,3 +86,4 @@ sys_call_table: /*330*/ .long sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime /*335*/ .long sys_syncfs, sys_sendmmsg, sys_setns, sys_process_vm_readv, sys_process_vm_writev /*340*/ .long sys_ni_syscall, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr +/*345*/ .long sys_renameat2 diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S index 4bd4e2bb26cf..33ecba2826ea 100644 --- a/arch/sparc/kernel/systbls_64.S +++ b/arch/sparc/kernel/systbls_64.S @@ -87,6 +87,7 @@ sys_call_table32: /*330*/ .word compat_sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, compat_sys_open_by_handle_at, compat_sys_clock_adjtime .word sys_syncfs, compat_sys_sendmmsg, sys_setns, compat_sys_process_vm_readv, compat_sys_process_vm_writev /*340*/ .word sys_kern_features, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr + .word sys32_renameat2 #endif /* CONFIG_COMPAT */ @@ -165,3 +166,4 @@ sys_call_table: /*330*/ .word sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime .word sys_syncfs, sys_sendmmsg, sys_setns, sys_process_vm_readv, sys_process_vm_writev /*340*/ .word sys_kern_features, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr + .word sys_renameat2 diff --git a/arch/sparc/lib/mcount.S b/arch/sparc/lib/mcount.S index 3ad6cbdc2163..0b0ed4d34219 100644 --- a/arch/sparc/lib/mcount.S +++ b/arch/sparc/lib/mcount.S @@ -24,10 +24,7 @@ mcount: #ifdef CONFIG_DYNAMIC_FTRACE /* Do nothing, the retl/nop below is all we need. */ #else - sethi %hi(function_trace_stop), %g1 - lduw [%g1 + %lo(function_trace_stop)], %g2 - brnz,pn %g2, 2f - sethi %hi(ftrace_trace_function), %g1 + sethi %hi(ftrace_trace_function), %g1 sethi %hi(ftrace_stub), %g2 ldx [%g1 + %lo(ftrace_trace_function)], %g1 or %g2, %lo(ftrace_stub), %g2 @@ -80,11 +77,8 @@ ftrace_stub: .globl ftrace_caller .type ftrace_caller,#function ftrace_caller: - sethi %hi(function_trace_stop), %g1 mov %i7, %g2 - lduw [%g1 + %lo(function_trace_stop)], %g1 - brnz,pn %g1, ftrace_stub - mov %fp, %g3 + mov %fp, %g3 save %sp, -176, %sp mov %g2, %o1 mov %g2, %l0 diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 4f3006b600e3..7fcd492adbfc 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -128,7 +128,6 @@ config TILEGX select SPARSE_IRQ select GENERIC_IRQ_LEGACY_ALLOC_HWIRQ select HAVE_FUNCTION_TRACER - select HAVE_FUNCTION_TRACE_MCOUNT_TEST select HAVE_FUNCTION_GRAPH_TRACER select HAVE_DYNAMIC_FTRACE select HAVE_FTRACE_MCOUNT_RECORD diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h index 42323636c459..dd4f9f17e30a 100644 --- a/arch/tile/include/asm/processor.h +++ b/arch/tile/include/asm/processor.h @@ -266,6 +266,8 @@ static inline void cpu_relax(void) barrier(); } +#define cpu_relax_lowlatency() cpu_relax() + /* Info on this processor (see fs/proc/cpuinfo.c) */ struct seq_operations; extern const struct seq_operations cpuinfo_op; diff --git a/arch/tile/kernel/mcount_64.S b/arch/tile/kernel/mcount_64.S index 70d7bb0c4d8f..3c2b8d5e1d1a 100644 --- a/arch/tile/kernel/mcount_64.S +++ b/arch/tile/kernel/mcount_64.S @@ -77,15 +77,6 @@ STD_ENDPROC(__mcount) .align 64 STD_ENTRY(ftrace_caller) - moveli r11, hw2_last(function_trace_stop) - { shl16insli r11, r11, hw1(function_trace_stop); move r12, lr } - { shl16insli r11, r11, hw0(function_trace_stop); move lr, r10 } - ld r11, r11 - beqz r11, 1f - jrp r12 - -1: - { move r10, lr; move lr, r12 } MCOUNT_SAVE_REGS /* arg1: self return address */ @@ -119,15 +110,6 @@ STD_ENDPROC(ftrace_caller) .align 64 STD_ENTRY(__mcount) - moveli r11, hw2_last(function_trace_stop) - { shl16insli r11, r11, hw1(function_trace_stop); move r12, lr } - { shl16insli r11, r11, hw0(function_trace_stop); move lr, r10 } - ld r11, r11 - beqz r11, 1f - jrp r12 - -1: - { move r10, lr; move lr, r12 } { moveli r11, hw2_last(ftrace_trace_function) moveli r13, hw2_last(ftrace_stub) diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index 9472079471bb..f1b3eb14b855 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -12,6 +12,7 @@ #include <mem_user.h> #include <os.h> #include <skas.h> +#include <kern_util.h> struct host_vm_change { struct host_vm_op { @@ -124,6 +125,9 @@ static int add_munmap(unsigned long addr, unsigned long len, struct host_vm_op *last; int ret = 0; + if ((addr >= STUB_START) && (addr < STUB_END)) + return -EINVAL; + if (hvc->index != 0) { last = &hvc->ops[hvc->index - 1]; if ((last->type == MUNMAP) && @@ -283,8 +287,11 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, /* This is not an else because ret is modified above */ if (ret) { printk(KERN_ERR "fix_range_common: failed, killing current " - "process\n"); + "process: %d\n", task_tgid_vnr(current)); + /* We are under mmap_sem, release it such that current can terminate */ + up_write(¤t->mm->mmap_sem); force_sig(SIGKILL, current); + do_signal(); } } diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 974b87474a99..5678c3571e7c 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -206,7 +206,7 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, int is_write = FAULT_WRITE(fi); unsigned long address = FAULT_ADDRESS(fi); - if (regs) + if (!is_user && regs) current->thread.segv_regs = container_of(regs, struct pt_regs, regs); if (!is_user && (address >= start_vm) && (address < end_vm)) { diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index d531879a4617..908579f2b0ab 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -54,7 +54,7 @@ static int ptrace_dump_regs(int pid) void wait_stub_done(int pid) { - int n, status, err, bad_stop = 0; + int n, status, err; while (1) { CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED | __WALL)); @@ -74,8 +74,6 @@ void wait_stub_done(int pid) if (((1 << WSTOPSIG(status)) & STUB_DONE_MASK) != 0) return; - else - bad_stop = 1; bad_wait: err = ptrace_dump_regs(pid); @@ -85,10 +83,7 @@ bad_wait: printk(UM_KERN_ERR "wait_stub_done : failed to wait for SIGTRAP, " "pid = %d, n = %d, errno = %d, status = 0x%x\n", pid, n, errno, status); - if (bad_stop) - kill(pid, SIGKILL); - else - fatal_sigsegv(); + fatal_sigsegv(); } extern unsigned long current_stub_stack(void); diff --git a/arch/unicore32/include/asm/processor.h b/arch/unicore32/include/asm/processor.h index 4eaa42167667..8d21b7adf26b 100644 --- a/arch/unicore32/include/asm/processor.h +++ b/arch/unicore32/include/asm/processor.h @@ -71,6 +71,7 @@ extern void release_thread(struct task_struct *); unsigned long get_wchan(struct task_struct *p); #define cpu_relax() barrier() +#define cpu_relax_lowlatency() cpu_relax() #define task_pt_regs(p) \ ((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6295a2182a76..86216a55eb59 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -54,7 +54,6 @@ config X86 select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_GRAPH_FP_TEST - select HAVE_FUNCTION_TRACE_MCOUNT_TEST select HAVE_SYSCALL_TRACEPOINTS select SYSCTL_EXCEPTION_TRACE select HAVE_KVM @@ -131,6 +130,7 @@ config X86 select HAVE_CC_STACKPROTECTOR select GENERIC_CPU_AUTOPROBE select HAVE_ARCH_AUDITSYSCALL + select ARCH_SUPPORTS_ATOMIC_RMW config INSTRUCTION_DECODER def_bool y @@ -1522,6 +1522,7 @@ config EFI bool "EFI runtime service support" depends on ACPI select UCS2_STRING + select EFI_RUNTIME_WRAPPERS ---help--- This enables the kernel to use EFI runtime services that are available (such as the EFI variable services). diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 33f71b01fd22..c65fd9650467 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -15,12 +15,9 @@ endif # that way we can complain to the user if the CPU is insufficient. # # The -m16 option is supported by GCC >= 4.9 and clang >= 3.5. For -# older versions of GCC, we need to play evil and unreliable tricks to -# attempt to ensure that our asm(".code16gcc") is first in the asm -# output. -CODE16GCC_CFLAGS := -m32 -include $(srctree)/arch/x86/boot/code16gcc.h \ - $(call cc-option, -fno-toplevel-reorder,\ - $(call cc-option, -fno-unit-at-a-time)) +# older versions of GCC, include an *assembly* header to make sure that +# gcc doesn't play any games behind our back. +CODE16GCC_CFLAGS := -m32 -Wa,$(srctree)/arch/x86/boot/code16gcc.h M16_CFLAGS := $(call cc-option, -m16, $(CODE16GCC_CFLAGS)) REALMODE_CFLAGS := $(M16_CFLAGS) -g -Os -D__KERNEL__ \ diff --git a/arch/x86/boot/code16gcc.h b/arch/x86/boot/code16gcc.h index d93e48010b61..5ff426535397 100644 --- a/arch/x86/boot/code16gcc.h +++ b/arch/x86/boot/code16gcc.h @@ -1,15 +1,11 @@ -/* - * code16gcc.h - * - * This file is -include'd when compiling 16-bit C code. - * Note: this asm() needs to be emitted before gcc emits any code. - * Depending on gcc version, this requires -fno-unit-at-a-time or - * -fno-toplevel-reorder. - * - * Hopefully gcc will eventually have a real -m16 option so we can - * drop this hack long term. - */ +# +# code16gcc.h +# +# This file is added to the assembler via -Wa when compiling 16-bit C code. +# This is done this way instead via asm() to make sure gcc does not reorder +# things around us. +# +# gcc 4.9+ has a real -m16 option so we can drop this hack long term. +# -#ifndef __ASSEMBLY__ -asm(".code16gcc"); -#endif + .code16gcc diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 0fcd9133790c..7a801a310e37 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -33,7 +33,8 @@ VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \ $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone ifeq ($(CONFIG_EFI_STUB), y) - VMLINUX_OBJS += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o + VMLINUX_OBJS += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o \ + $(objtree)/drivers/firmware/efi/libstub/lib.a endif $(obj)/vmlinux: $(VMLINUX_OBJS) FORCE diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 0331d765c2bb..f277184e2ac1 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -19,10 +19,7 @@ static efi_system_table_t *sys_table; -static struct efi_config *efi_early; - -#define efi_call_early(f, ...) \ - efi_early->call(efi_early->f, __VA_ARGS__); +struct efi_config *efi_early; #define BOOT_SERVICES(bits) \ static void setup_boot_services##bits(struct efi_config *c) \ @@ -48,8 +45,7 @@ static void setup_boot_services##bits(struct efi_config *c) \ BOOT_SERVICES(32); BOOT_SERVICES(64); -static void efi_printk(efi_system_table_t *, char *); -static void efi_char16_printk(efi_system_table_t *, efi_char16_t *); +void efi_char16_printk(efi_system_table_t *, efi_char16_t *); static efi_status_t __file_size32(void *__fh, efi_char16_t *filename_16, @@ -156,7 +152,7 @@ grow: return status; } -static efi_status_t +efi_status_t efi_file_size(efi_system_table_t *sys_table, void *__fh, efi_char16_t *filename_16, void **handle, u64 *file_sz) { @@ -166,7 +162,7 @@ efi_file_size(efi_system_table_t *sys_table, void *__fh, return __file_size32(__fh, filename_16, handle, file_sz); } -static inline efi_status_t +efi_status_t efi_file_read(void *handle, unsigned long *size, void *addr) { unsigned long func; @@ -184,7 +180,7 @@ efi_file_read(void *handle, unsigned long *size, void *addr) } } -static inline efi_status_t efi_file_close(void *handle) +efi_status_t efi_file_close(void *handle) { if (efi_early->is64) { efi_file_handle_64_t *fh = handle; @@ -249,7 +245,7 @@ static inline efi_status_t __open_volume64(void *__image, void **__fh) return status; } -static inline efi_status_t +efi_status_t efi_open_volume(efi_system_table_t *sys_table, void *__image, void **__fh) { if (efi_early->is64) @@ -258,7 +254,7 @@ efi_open_volume(efi_system_table_t *sys_table, void *__image, void **__fh) return __open_volume32(__image, __fh); } -static void efi_char16_printk(efi_system_table_t *table, efi_char16_t *str) +void efi_char16_printk(efi_system_table_t *table, efi_char16_t *str) { unsigned long output_string; size_t offset; @@ -284,8 +280,6 @@ static void efi_char16_printk(efi_system_table_t *table, efi_char16_t *str) } } -#include "../../../../drivers/firmware/efi/efi-stub-helper.c" - static void find_bits(unsigned long mask, u8 *pos, u8 *size) { u8 first, len; @@ -1038,6 +1032,7 @@ struct boot_params *make_boot_params(struct efi_config *c) int i; unsigned long ramdisk_addr; unsigned long ramdisk_size; + unsigned long initrd_addr_max; efi_early = c; sys_table = (efi_system_table_t *)(unsigned long)efi_early->table; @@ -1100,14 +1095,21 @@ struct boot_params *make_boot_params(struct efi_config *c) memset(sdt, 0, sizeof(*sdt)); + if (hdr->xloadflags & XLF_CAN_BE_LOADED_ABOVE_4G) + initrd_addr_max = -1UL; + else + initrd_addr_max = hdr->initrd_addr_max; + status = handle_cmdline_files(sys_table, image, (char *)(unsigned long)hdr->cmd_line_ptr, - "initrd=", hdr->initrd_addr_max, + "initrd=", initrd_addr_max, &ramdisk_addr, &ramdisk_size); if (status != EFI_SUCCESS) goto fail2; - hdr->ramdisk_image = ramdisk_addr; - hdr->ramdisk_size = ramdisk_size; + hdr->ramdisk_image = ramdisk_addr & 0xffffffff; + hdr->ramdisk_size = ramdisk_size & 0xffffffff; + boot_params->ext_ramdisk_image = (u64)ramdisk_addr >> 32; + boot_params->ext_ramdisk_size = (u64)ramdisk_size >> 32; return boot_params; fail2: @@ -1374,7 +1376,10 @@ struct boot_params *efi_main(struct efi_config *c, setup_graphics(boot_params); - setup_efi_pci(boot_params); + status = setup_efi_pci(boot_params); + if (status != EFI_SUCCESS) { + efi_printk(sys_table, "setup_efi_pci() failed!\n"); + } status = efi_call_early(allocate_pool, EFI_LOADER_DATA, sizeof(*gdt), (void **)&gdt); @@ -1401,16 +1406,20 @@ struct boot_params *efi_main(struct efi_config *c, hdr->init_size, hdr->init_size, hdr->pref_address, hdr->kernel_alignment); - if (status != EFI_SUCCESS) + if (status != EFI_SUCCESS) { + efi_printk(sys_table, "efi_relocate_kernel() failed!\n"); goto fail; + } hdr->pref_address = hdr->code32_start; hdr->code32_start = bzimage_addr; } status = exit_boot(boot_params, handle, is64); - if (status != EFI_SUCCESS) + if (status != EFI_SUCCESS) { + efi_printk(sys_table, "exit_boot() failed!\n"); goto fail; + } memset((char *)gdt->address, 0x0, gdt->size); desc = (struct desc_struct *)gdt->address; @@ -1470,5 +1479,6 @@ struct boot_params *efi_main(struct efi_config *c, return boot_params; fail: + efi_printk(sys_table, "efi_main() failed!\n"); return NULL; } diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h index c88c31ecad12..d487e727f1ec 100644 --- a/arch/x86/boot/compressed/eboot.h +++ b/arch/x86/boot/compressed/eboot.h @@ -103,20 +103,4 @@ struct efi_uga_draw_protocol { void *blt; }; -struct efi_config { - u64 image_handle; - u64 table; - u64 allocate_pool; - u64 allocate_pages; - u64 get_memory_map; - u64 free_pool; - u64 free_pages; - u64 locate_handle; - u64 handle_protocol; - u64 exit_boot_services; - u64 text_output; - efi_status_t (*call)(unsigned long, ...); - bool is64; -} __packed; - #endif /* BOOT_COMPRESSED_EBOOT_H */ diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 84c223479e3c..16ef02596db2 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -91,10 +91,9 @@ bs_die: .section ".bsdata", "a" bugger_off_msg: - .ascii "Direct floppy boot is not supported. " - .ascii "Use a boot loader program instead.\r\n" + .ascii "Use a boot loader.\r\n" .ascii "\n" - .ascii "Remove disk and press any key to reboot ...\r\n" + .ascii "Remove disk and press any key to reboot...\r\n" .byte 0 #ifdef CONFIG_EFI_STUB @@ -108,7 +107,7 @@ coff_header: #else .word 0x8664 # x86-64 #endif - .word 3 # nr_sections + .word 4 # nr_sections .long 0 # TimeDateStamp .long 0 # PointerToSymbolTable .long 1 # NumberOfSymbols @@ -155,7 +154,7 @@ extra_header_fields: #else .quad 0 # ImageBase #endif - .long 0x20 # SectionAlignment + .long CONFIG_PHYSICAL_ALIGN # SectionAlignment .long 0x20 # FileAlignment .word 0 # MajorOperatingSystemVersion .word 0 # MinorOperatingSystemVersion @@ -250,6 +249,25 @@ section_table: .word 0 # NumberOfLineNumbers .long 0x60500020 # Characteristics (section flags) + # + # The offset & size fields are filled in by build.c. + # + .ascii ".bss" + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .long 0 + .long 0x0 + .long 0 # Size of initialized data + # on disk + .long 0x0 + .long 0 # PointerToRelocations + .long 0 # PointerToLineNumbers + .word 0 # NumberOfRelocations + .word 0 # NumberOfLineNumbers + .long 0xc8000080 # Characteristics (section flags) + #endif /* CONFIG_EFI_STUB */ # Kernel attributes; used by setup. This is part 1 of the diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index 1a2f2121cada..a7661c430cd9 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -143,7 +143,7 @@ static void usage(void) #ifdef CONFIG_EFI_STUB -static void update_pecoff_section_header(char *section_name, u32 offset, u32 size) +static void update_pecoff_section_header_fields(char *section_name, u32 vma, u32 size, u32 datasz, u32 offset) { unsigned int pe_header; unsigned short num_sections; @@ -164,10 +164,10 @@ static void update_pecoff_section_header(char *section_name, u32 offset, u32 siz put_unaligned_le32(size, section + 0x8); /* section header vma field */ - put_unaligned_le32(offset, section + 0xc); + put_unaligned_le32(vma, section + 0xc); /* section header 'size of initialised data' field */ - put_unaligned_le32(size, section + 0x10); + put_unaligned_le32(datasz, section + 0x10); /* section header 'file offset' field */ put_unaligned_le32(offset, section + 0x14); @@ -179,6 +179,11 @@ static void update_pecoff_section_header(char *section_name, u32 offset, u32 siz } } +static void update_pecoff_section_header(char *section_name, u32 offset, u32 size) +{ + update_pecoff_section_header_fields(section_name, offset, size, size, offset); +} + static void update_pecoff_setup_and_reloc(unsigned int size) { u32 setup_offset = 0x200; @@ -203,9 +208,6 @@ static void update_pecoff_text(unsigned int text_start, unsigned int file_sz) pe_header = get_unaligned_le32(&buf[0x3c]); - /* Size of image */ - put_unaligned_le32(file_sz, &buf[pe_header + 0x50]); - /* * Size of code: Subtract the size of the first sector (512 bytes) * which includes the header. @@ -220,6 +222,22 @@ static void update_pecoff_text(unsigned int text_start, unsigned int file_sz) update_pecoff_section_header(".text", text_start, text_sz); } +static void update_pecoff_bss(unsigned int file_sz, unsigned int init_sz) +{ + unsigned int pe_header; + unsigned int bss_sz = init_sz - file_sz; + + pe_header = get_unaligned_le32(&buf[0x3c]); + + /* Size of uninitialized data */ + put_unaligned_le32(bss_sz, &buf[pe_header + 0x24]); + + /* Size of image */ + put_unaligned_le32(init_sz, &buf[pe_header + 0x50]); + + update_pecoff_section_header_fields(".bss", file_sz, bss_sz, 0, 0); +} + static int reserve_pecoff_reloc_section(int c) { /* Reserve 0x20 bytes for .reloc section */ @@ -259,6 +277,8 @@ static void efi_stub_entry_update(void) static inline void update_pecoff_setup_and_reloc(unsigned int size) {} static inline void update_pecoff_text(unsigned int text_start, unsigned int file_sz) {} +static inline void update_pecoff_bss(unsigned int file_sz, + unsigned int init_sz) {} static inline void efi_stub_defaults(void) {} static inline void efi_stub_entry_update(void) {} @@ -310,7 +330,7 @@ static void parse_zoffset(char *fname) int main(int argc, char ** argv) { - unsigned int i, sz, setup_sectors; + unsigned int i, sz, setup_sectors, init_sz; int c; u32 sys_size; struct stat sb; @@ -376,7 +396,9 @@ int main(int argc, char ** argv) buf[0x1f1] = setup_sectors-1; put_unaligned_le32(sys_size, &buf[0x1f4]); - update_pecoff_text(setup_sectors * 512, sz + i + ((sys_size * 16) - sz)); + update_pecoff_text(setup_sectors * 512, i + (sys_size * 16)); + init_sz = get_unaligned_le32(&buf[0x260]); + update_pecoff_bss(i + (sys_size * 16), init_sz); efi_stub_entry_update(); diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 61d6e281898b..d551165a3159 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -14,6 +14,7 @@ obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o +obj-$(CONFIG_CRYPTO_DES3_EDE_X86_64) += des3_ede-x86_64.o obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o @@ -52,6 +53,7 @@ salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o +des3_ede-x86_64-y := des3_ede-asm_64.o des3_ede_glue.o camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o @@ -76,7 +78,7 @@ ifeq ($(avx2_supported),yes) endif aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o -aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o +aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o aes_ctrby8_avx-x86_64.o ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o ifeq ($(avx2_supported),yes) diff --git a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S new file mode 100644 index 000000000000..f091f122ed24 --- /dev/null +++ b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S @@ -0,0 +1,546 @@ +/* + * Implement AES CTR mode by8 optimization with AVX instructions. (x86_64) + * + * This is AES128/192/256 CTR mode optimization implementation. It requires + * the support of Intel(R) AESNI and AVX instructions. + * + * This work was inspired by the AES CTR mode optimization published + * in Intel Optimized IPSEC Cryptograhpic library. + * Additional information on it can be found at: + * http://downloadcenter.intel.com/Detail_Desc.aspx?agr=Y&DwnldID=22972 + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * James Guilford <james.guilford@intel.com> + * Sean Gulley <sean.m.gulley@intel.com> + * Chandramouli Narayanan <mouli@linux.intel.com> + * + * BSD LICENSE + * + * Copyright(c) 2014 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include <linux/linkage.h> +#include <asm/inst.h> + +#define CONCAT(a,b) a##b +#define VMOVDQ vmovdqu + +#define xdata0 %xmm0 +#define xdata1 %xmm1 +#define xdata2 %xmm2 +#define xdata3 %xmm3 +#define xdata4 %xmm4 +#define xdata5 %xmm5 +#define xdata6 %xmm6 +#define xdata7 %xmm7 +#define xcounter %xmm8 +#define xbyteswap %xmm9 +#define xkey0 %xmm10 +#define xkey3 %xmm11 +#define xkey6 %xmm12 +#define xkey9 %xmm13 +#define xkey4 %xmm11 +#define xkey8 %xmm12 +#define xkey12 %xmm13 +#define xkeyA %xmm14 +#define xkeyB %xmm15 + +#define p_in %rdi +#define p_iv %rsi +#define p_keys %rdx +#define p_out %rcx +#define num_bytes %r8 + +#define tmp %r10 +#define DDQ(i) CONCAT(ddq_add_,i) +#define XMM(i) CONCAT(%xmm, i) +#define DDQ_DATA 0 +#define XDATA 1 +#define KEY_128 1 +#define KEY_192 2 +#define KEY_256 3 + +.section .rodata +.align 16 + +byteswap_const: + .octa 0x000102030405060708090A0B0C0D0E0F +ddq_add_1: + .octa 0x00000000000000000000000000000001 +ddq_add_2: + .octa 0x00000000000000000000000000000002 +ddq_add_3: + .octa 0x00000000000000000000000000000003 +ddq_add_4: + .octa 0x00000000000000000000000000000004 +ddq_add_5: + .octa 0x00000000000000000000000000000005 +ddq_add_6: + .octa 0x00000000000000000000000000000006 +ddq_add_7: + .octa 0x00000000000000000000000000000007 +ddq_add_8: + .octa 0x00000000000000000000000000000008 + +.text + +/* generate a unique variable for ddq_add_x */ + +.macro setddq n + var_ddq_add = DDQ(\n) +.endm + +/* generate a unique variable for xmm register */ +.macro setxdata n + var_xdata = XMM(\n) +.endm + +/* club the numeric 'id' to the symbol 'name' */ + +.macro club name, id +.altmacro + .if \name == DDQ_DATA + setddq %\id + .elseif \name == XDATA + setxdata %\id + .endif +.noaltmacro +.endm + +/* + * do_aes num_in_par load_keys key_len + * This increments p_in, but not p_out + */ +.macro do_aes b, k, key_len + .set by, \b + .set load_keys, \k + .set klen, \key_len + + .if (load_keys) + vmovdqa 0*16(p_keys), xkey0 + .endif + + vpshufb xbyteswap, xcounter, xdata0 + + .set i, 1 + .rept (by - 1) + club DDQ_DATA, i + club XDATA, i + vpaddd var_ddq_add(%rip), xcounter, var_xdata + vpshufb xbyteswap, var_xdata, var_xdata + .set i, (i +1) + .endr + + vmovdqa 1*16(p_keys), xkeyA + + vpxor xkey0, xdata0, xdata0 + club DDQ_DATA, by + vpaddd var_ddq_add(%rip), xcounter, xcounter + + .set i, 1 + .rept (by - 1) + club XDATA, i + vpxor xkey0, var_xdata, var_xdata + .set i, (i +1) + .endr + + vmovdqa 2*16(p_keys), xkeyB + + .set i, 0 + .rept by + club XDATA, i + vaesenc xkeyA, var_xdata, var_xdata /* key 1 */ + .set i, (i +1) + .endr + + .if (klen == KEY_128) + .if (load_keys) + vmovdqa 3*16(p_keys), xkeyA + .endif + .else + vmovdqa 3*16(p_keys), xkeyA + .endif + + .set i, 0 + .rept by + club XDATA, i + vaesenc xkeyB, var_xdata, var_xdata /* key 2 */ + .set i, (i +1) + .endr + + add $(16*by), p_in + + .if (klen == KEY_128) + vmovdqa 4*16(p_keys), xkey4 + .else + .if (load_keys) + vmovdqa 4*16(p_keys), xkey4 + .endif + .endif + + .set i, 0 + .rept by + club XDATA, i + vaesenc xkeyA, var_xdata, var_xdata /* key 3 */ + .set i, (i +1) + .endr + + vmovdqa 5*16(p_keys), xkeyA + + .set i, 0 + .rept by + club XDATA, i + vaesenc xkey4, var_xdata, var_xdata /* key 4 */ + .set i, (i +1) + .endr + + .if (klen == KEY_128) + .if (load_keys) + vmovdqa 6*16(p_keys), xkeyB + .endif + .else + vmovdqa 6*16(p_keys), xkeyB + .endif + + .set i, 0 + .rept by + club XDATA, i + vaesenc xkeyA, var_xdata, var_xdata /* key 5 */ + .set i, (i +1) + .endr + + vmovdqa 7*16(p_keys), xkeyA + + .set i, 0 + .rept by + club XDATA, i + vaesenc xkeyB, var_xdata, var_xdata /* key 6 */ + .set i, (i +1) + .endr + + .if (klen == KEY_128) + vmovdqa 8*16(p_keys), xkey8 + .else + .if (load_keys) + vmovdqa 8*16(p_keys), xkey8 + .endif + .endif + + .set i, 0 + .rept by + club XDATA, i + vaesenc xkeyA, var_xdata, var_xdata /* key 7 */ + .set i, (i +1) + .endr + + .if (klen == KEY_128) + .if (load_keys) + vmovdqa 9*16(p_keys), xkeyA + .endif + .else + vmovdqa 9*16(p_keys), xkeyA + .endif + + .set i, 0 + .rept by + club XDATA, i + vaesenc xkey8, var_xdata, var_xdata /* key 8 */ + .set i, (i +1) + .endr + + vmovdqa 10*16(p_keys), xkeyB + + .set i, 0 + .rept by + club XDATA, i + vaesenc xkeyA, var_xdata, var_xdata /* key 9 */ + .set i, (i +1) + .endr + + .if (klen != KEY_128) + vmovdqa 11*16(p_keys), xkeyA + .endif + + .set i, 0 + .rept by + club XDATA, i + /* key 10 */ + .if (klen == KEY_128) + vaesenclast xkeyB, var_xdata, var_xdata + .else + vaesenc xkeyB, var_xdata, var_xdata + .endif + .set i, (i +1) + .endr + + .if (klen != KEY_128) + .if (load_keys) + vmovdqa 12*16(p_keys), xkey12 + .endif + + .set i, 0 + .rept by + club XDATA, i + vaesenc xkeyA, var_xdata, var_xdata /* key 11 */ + .set i, (i +1) + .endr + + .if (klen == KEY_256) + vmovdqa 13*16(p_keys), xkeyA + .endif + + .set i, 0 + .rept by + club XDATA, i + .if (klen == KEY_256) + /* key 12 */ + vaesenc xkey12, var_xdata, var_xdata + .else + vaesenclast xkey12, var_xdata, var_xdata + .endif + .set i, (i +1) + .endr + + .if (klen == KEY_256) + vmovdqa 14*16(p_keys), xkeyB + + .set i, 0 + .rept by + club XDATA, i + /* key 13 */ + vaesenc xkeyA, var_xdata, var_xdata + .set i, (i +1) + .endr + + .set i, 0 + .rept by + club XDATA, i + /* key 14 */ + vaesenclast xkeyB, var_xdata, var_xdata + .set i, (i +1) + .endr + .endif + .endif + + .set i, 0 + .rept (by / 2) + .set j, (i+1) + VMOVDQ (i*16 - 16*by)(p_in), xkeyA + VMOVDQ (j*16 - 16*by)(p_in), xkeyB + club XDATA, i + vpxor xkeyA, var_xdata, var_xdata + club XDATA, j + vpxor xkeyB, var_xdata, var_xdata + .set i, (i+2) + .endr + + .if (i < by) + VMOVDQ (i*16 - 16*by)(p_in), xkeyA + club XDATA, i + vpxor xkeyA, var_xdata, var_xdata + .endif + + .set i, 0 + .rept by + club XDATA, i + VMOVDQ var_xdata, i*16(p_out) + .set i, (i+1) + .endr +.endm + +.macro do_aes_load val, key_len + do_aes \val, 1, \key_len +.endm + +.macro do_aes_noload val, key_len + do_aes \val, 0, \key_len +.endm + +/* main body of aes ctr load */ + +.macro do_aes_ctrmain key_len + + cmp $16, num_bytes + jb .Ldo_return2\key_len + + vmovdqa byteswap_const(%rip), xbyteswap + vmovdqu (p_iv), xcounter + vpshufb xbyteswap, xcounter, xcounter + + mov num_bytes, tmp + and $(7*16), tmp + jz .Lmult_of_8_blks\key_len + + /* 1 <= tmp <= 7 */ + cmp $(4*16), tmp + jg .Lgt4\key_len + je .Leq4\key_len + +.Llt4\key_len: + cmp $(2*16), tmp + jg .Leq3\key_len + je .Leq2\key_len + +.Leq1\key_len: + do_aes_load 1, \key_len + add $(1*16), p_out + and $(~7*16), num_bytes + jz .Ldo_return2\key_len + jmp .Lmain_loop2\key_len + +.Leq2\key_len: + do_aes_load 2, \key_len + add $(2*16), p_out + and $(~7*16), num_bytes + jz .Ldo_return2\key_len + jmp .Lmain_loop2\key_len + + +.Leq3\key_len: + do_aes_load 3, \key_len + add $(3*16), p_out + and $(~7*16), num_bytes + jz .Ldo_return2\key_len + jmp .Lmain_loop2\key_len + +.Leq4\key_len: + do_aes_load 4, \key_len + add $(4*16), p_out + and $(~7*16), num_bytes + jz .Ldo_return2\key_len + jmp .Lmain_loop2\key_len + +.Lgt4\key_len: + cmp $(6*16), tmp + jg .Leq7\key_len + je .Leq6\key_len + +.Leq5\key_len: + do_aes_load 5, \key_len + add $(5*16), p_out + and $(~7*16), num_bytes + jz .Ldo_return2\key_len + jmp .Lmain_loop2\key_len + +.Leq6\key_len: + do_aes_load 6, \key_len + add $(6*16), p_out + and $(~7*16), num_bytes + jz .Ldo_return2\key_len + jmp .Lmain_loop2\key_len + +.Leq7\key_len: + do_aes_load 7, \key_len + add $(7*16), p_out + and $(~7*16), num_bytes + jz .Ldo_return2\key_len + jmp .Lmain_loop2\key_len + +.Lmult_of_8_blks\key_len: + .if (\key_len != KEY_128) + vmovdqa 0*16(p_keys), xkey0 + vmovdqa 4*16(p_keys), xkey4 + vmovdqa 8*16(p_keys), xkey8 + vmovdqa 12*16(p_keys), xkey12 + .else + vmovdqa 0*16(p_keys), xkey0 + vmovdqa 3*16(p_keys), xkey4 + vmovdqa 6*16(p_keys), xkey8 + vmovdqa 9*16(p_keys), xkey12 + .endif +.align 16 +.Lmain_loop2\key_len: + /* num_bytes is a multiple of 8 and >0 */ + do_aes_noload 8, \key_len + add $(8*16), p_out + sub $(8*16), num_bytes + jne .Lmain_loop2\key_len + +.Ldo_return2\key_len: + /* return updated IV */ + vpshufb xbyteswap, xcounter, xcounter + vmovdqu xcounter, (p_iv) + ret +.endm + +/* + * routine to do AES128 CTR enc/decrypt "by8" + * XMM registers are clobbered. + * Saving/restoring must be done at a higher level + * aes_ctr_enc_128_avx_by8(void *in, void *iv, void *keys, void *out, + * unsigned int num_bytes) + */ +ENTRY(aes_ctr_enc_128_avx_by8) + /* call the aes main loop */ + do_aes_ctrmain KEY_128 + +ENDPROC(aes_ctr_enc_128_avx_by8) + +/* + * routine to do AES192 CTR enc/decrypt "by8" + * XMM registers are clobbered. + * Saving/restoring must be done at a higher level + * aes_ctr_enc_192_avx_by8(void *in, void *iv, void *keys, void *out, + * unsigned int num_bytes) + */ +ENTRY(aes_ctr_enc_192_avx_by8) + /* call the aes main loop */ + do_aes_ctrmain KEY_192 + +ENDPROC(aes_ctr_enc_192_avx_by8) + +/* + * routine to do AES256 CTR enc/decrypt "by8" + * XMM registers are clobbered. + * Saving/restoring must be done at a higher level + * aes_ctr_enc_256_avx_by8(void *in, void *iv, void *keys, void *out, + * unsigned int num_bytes) + */ +ENTRY(aes_ctr_enc_256_avx_by8) + /* call the aes main loop */ + do_aes_ctrmain KEY_256 + +ENDPROC(aes_ctr_enc_256_avx_by8) diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 948ad0e77741..888950f29fd9 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -105,6 +105,9 @@ void crypto_fpu_exit(void); #define AVX_GEN4_OPTSIZE 4096 #ifdef CONFIG_X86_64 + +static void (*aesni_ctr_enc_tfm)(struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len, u8 *iv); asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in, unsigned int len, u8 *iv); @@ -155,6 +158,12 @@ asmlinkage void aesni_gcm_dec(void *ctx, u8 *out, #ifdef CONFIG_AS_AVX +asmlinkage void aes_ctr_enc_128_avx_by8(const u8 *in, u8 *iv, + void *keys, u8 *out, unsigned int num_bytes); +asmlinkage void aes_ctr_enc_192_avx_by8(const u8 *in, u8 *iv, + void *keys, u8 *out, unsigned int num_bytes); +asmlinkage void aes_ctr_enc_256_avx_by8(const u8 *in, u8 *iv, + void *keys, u8 *out, unsigned int num_bytes); /* * asmlinkage void aesni_gcm_precomp_avx_gen2() * gcm_data *my_ctx_data, context data @@ -472,6 +481,25 @@ static void ctr_crypt_final(struct crypto_aes_ctx *ctx, crypto_inc(ctrblk, AES_BLOCK_SIZE); } +#ifdef CONFIG_AS_AVX +static void aesni_ctr_enc_avx_tfm(struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len, u8 *iv) +{ + /* + * based on key length, override with the by8 version + * of ctr mode encryption/decryption for improved performance + * aes_set_key_common() ensures that key length is one of + * {128,192,256} + */ + if (ctx->key_length == AES_KEYSIZE_128) + aes_ctr_enc_128_avx_by8(in, iv, (void *)ctx, out, len); + else if (ctx->key_length == AES_KEYSIZE_192) + aes_ctr_enc_192_avx_by8(in, iv, (void *)ctx, out, len); + else + aes_ctr_enc_256_avx_by8(in, iv, (void *)ctx, out, len); +} +#endif + static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) @@ -486,8 +514,8 @@ static int ctr_crypt(struct blkcipher_desc *desc, kernel_fpu_begin(); while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { - aesni_ctr_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, - nbytes & AES_BLOCK_MASK, walk.iv); + aesni_ctr_enc_tfm(ctx, walk.dst.virt.addr, walk.src.virt.addr, + nbytes & AES_BLOCK_MASK, walk.iv); nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } @@ -1493,6 +1521,14 @@ static int __init aesni_init(void) aesni_gcm_enc_tfm = aesni_gcm_enc; aesni_gcm_dec_tfm = aesni_gcm_dec; } + aesni_ctr_enc_tfm = aesni_ctr_enc; +#ifdef CONFIG_AS_AVX + if (cpu_has_avx) { + /* optimize performance of ctr mode encryption transform */ + aesni_ctr_enc_tfm = aesni_ctr_enc_avx_tfm; + pr_info("AES CTR mode by8 optimization enabled\n"); + } +#endif #endif err = crypto_fpu_init(); diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S index dbc4339b5417..26d49ebae040 100644 --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S @@ -72,6 +72,7 @@ # unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init); +.text ENTRY(crc_pcl) #define bufp %rdi #define bufp_dw %edi @@ -216,15 +217,11 @@ LABEL crc_ %i ## 4) Combine three results: ################################################################ - lea (K_table-16)(%rip), bufp # first entry is for idx 1 + lea (K_table-8)(%rip), bufp # first entry is for idx 1 shlq $3, %rax # rax *= 8 - subq %rax, tmp # tmp -= rax*8 - shlq $1, %rax - subq %rax, tmp # tmp -= rax*16 - # (total tmp -= rax*24) - addq %rax, bufp - - movdqa (bufp), %xmm0 # 2 consts: K1:K2 + pmovzxdq (bufp,%rax), %xmm0 # 2 consts: K1:K2 + leal (%eax,%eax,2), %eax # rax *= 3 (total *24) + subq %rax, tmp # tmp -= rax*24 movq crc_init, %xmm1 # CRC for block 1 PCLMULQDQ 0x00,%xmm0,%xmm1 # Multiply by K2 @@ -238,9 +235,9 @@ LABEL crc_ %i mov crc2, crc_init crc32 %rax, crc_init -################################################################ -## 5) Check for end: -################################################################ + ################################################################ + ## 5) Check for end: + ################################################################ LABEL crc_ 0 mov tmp, len @@ -331,136 +328,136 @@ ENDPROC(crc_pcl) ################################################################ ## PCLMULQDQ tables - ## Table is 128 entries x 2 quad words each + ## Table is 128 entries x 2 words (8 bytes) each ################################################################ -.data -.align 64 +.section .rotata, "a", %progbits +.align 8 K_table: - .quad 0x14cd00bd6,0x105ec76f0 - .quad 0x0ba4fc28e,0x14cd00bd6 - .quad 0x1d82c63da,0x0f20c0dfe - .quad 0x09e4addf8,0x0ba4fc28e - .quad 0x039d3b296,0x1384aa63a - .quad 0x102f9b8a2,0x1d82c63da - .quad 0x14237f5e6,0x01c291d04 - .quad 0x00d3b6092,0x09e4addf8 - .quad 0x0c96cfdc0,0x0740eef02 - .quad 0x18266e456,0x039d3b296 - .quad 0x0daece73e,0x0083a6eec - .quad 0x0ab7aff2a,0x102f9b8a2 - .quad 0x1248ea574,0x1c1733996 - .quad 0x083348832,0x14237f5e6 - .quad 0x12c743124,0x02ad91c30 - .quad 0x0b9e02b86,0x00d3b6092 - .quad 0x018b33a4e,0x06992cea2 - .quad 0x1b331e26a,0x0c96cfdc0 - .quad 0x17d35ba46,0x07e908048 - .quad 0x1bf2e8b8a,0x18266e456 - .quad 0x1a3e0968a,0x11ed1f9d8 - .quad 0x0ce7f39f4,0x0daece73e - .quad 0x061d82e56,0x0f1d0f55e - .quad 0x0d270f1a2,0x0ab7aff2a - .quad 0x1c3f5f66c,0x0a87ab8a8 - .quad 0x12ed0daac,0x1248ea574 - .quad 0x065863b64,0x08462d800 - .quad 0x11eef4f8e,0x083348832 - .quad 0x1ee54f54c,0x071d111a8 - .quad 0x0b3e32c28,0x12c743124 - .quad 0x0064f7f26,0x0ffd852c6 - .quad 0x0dd7e3b0c,0x0b9e02b86 - .quad 0x0f285651c,0x0dcb17aa4 - .quad 0x010746f3c,0x018b33a4e - .quad 0x1c24afea4,0x0f37c5aee - .quad 0x0271d9844,0x1b331e26a - .quad 0x08e766a0c,0x06051d5a2 - .quad 0x093a5f730,0x17d35ba46 - .quad 0x06cb08e5c,0x11d5ca20e - .quad 0x06b749fb2,0x1bf2e8b8a - .quad 0x1167f94f2,0x021f3d99c - .quad 0x0cec3662e,0x1a3e0968a - .quad 0x19329634a,0x08f158014 - .quad 0x0e6fc4e6a,0x0ce7f39f4 - .quad 0x08227bb8a,0x1a5e82106 - .quad 0x0b0cd4768,0x061d82e56 - .quad 0x13c2b89c4,0x188815ab2 - .quad 0x0d7a4825c,0x0d270f1a2 - .quad 0x10f5ff2ba,0x105405f3e - .quad 0x00167d312,0x1c3f5f66c - .quad 0x0f6076544,0x0e9adf796 - .quad 0x026f6a60a,0x12ed0daac - .quad 0x1a2adb74e,0x096638b34 - .quad 0x19d34af3a,0x065863b64 - .quad 0x049c3cc9c,0x1e50585a0 - .quad 0x068bce87a,0x11eef4f8e - .quad 0x1524fa6c6,0x19f1c69dc - .quad 0x16cba8aca,0x1ee54f54c - .quad 0x042d98888,0x12913343e - .quad 0x1329d9f7e,0x0b3e32c28 - .quad 0x1b1c69528,0x088f25a3a - .quad 0x02178513a,0x0064f7f26 - .quad 0x0e0ac139e,0x04e36f0b0 - .quad 0x0170076fa,0x0dd7e3b0c - .quad 0x141a1a2e2,0x0bd6f81f8 - .quad 0x16ad828b4,0x0f285651c - .quad 0x041d17b64,0x19425cbba - .quad 0x1fae1cc66,0x010746f3c - .quad 0x1a75b4b00,0x18db37e8a - .quad 0x0f872e54c,0x1c24afea4 - .quad 0x01e41e9fc,0x04c144932 - .quad 0x086d8e4d2,0x0271d9844 - .quad 0x160f7af7a,0x052148f02 - .quad 0x05bb8f1bc,0x08e766a0c - .quad 0x0a90fd27a,0x0a3c6f37a - .quad 0x0b3af077a,0x093a5f730 - .quad 0x04984d782,0x1d22c238e - .quad 0x0ca6ef3ac,0x06cb08e5c - .quad 0x0234e0b26,0x063ded06a - .quad 0x1d88abd4a,0x06b749fb2 - .quad 0x04597456a,0x04d56973c - .quad 0x0e9e28eb4,0x1167f94f2 - .quad 0x07b3ff57a,0x19385bf2e - .quad 0x0c9c8b782,0x0cec3662e - .quad 0x13a9cba9e,0x0e417f38a - .quad 0x093e106a4,0x19329634a - .quad 0x167001a9c,0x14e727980 - .quad 0x1ddffc5d4,0x0e6fc4e6a - .quad 0x00df04680,0x0d104b8fc - .quad 0x02342001e,0x08227bb8a - .quad 0x00a2a8d7e,0x05b397730 - .quad 0x168763fa6,0x0b0cd4768 - .quad 0x1ed5a407a,0x0e78eb416 - .quad 0x0d2c3ed1a,0x13c2b89c4 - .quad 0x0995a5724,0x1641378f0 - .quad 0x19b1afbc4,0x0d7a4825c - .quad 0x109ffedc0,0x08d96551c - .quad 0x0f2271e60,0x10f5ff2ba - .quad 0x00b0bf8ca,0x00bf80dd2 - .quad 0x123888b7a,0x00167d312 - .quad 0x1e888f7dc,0x18dcddd1c - .quad 0x002ee03b2,0x0f6076544 - .quad 0x183e8d8fe,0x06a45d2b2 - .quad 0x133d7a042,0x026f6a60a - .quad 0x116b0f50c,0x1dd3e10e8 - .quad 0x05fabe670,0x1a2adb74e - .quad 0x130004488,0x0de87806c - .quad 0x000bcf5f6,0x19d34af3a - .quad 0x18f0c7078,0x014338754 - .quad 0x017f27698,0x049c3cc9c - .quad 0x058ca5f00,0x15e3e77ee - .quad 0x1af900c24,0x068bce87a - .quad 0x0b5cfca28,0x0dd07448e - .quad 0x0ded288f8,0x1524fa6c6 - .quad 0x059f229bc,0x1d8048348 - .quad 0x06d390dec,0x16cba8aca - .quad 0x037170390,0x0a3e3e02c - .quad 0x06353c1cc,0x042d98888 - .quad 0x0c4584f5c,0x0d73c7bea - .quad 0x1f16a3418,0x1329d9f7e - .quad 0x0531377e2,0x185137662 - .quad 0x1d8d9ca7c,0x1b1c69528 - .quad 0x0b25b29f2,0x18a08b5bc - .quad 0x19fb2a8b0,0x02178513a - .quad 0x1a08fe6ac,0x1da758ae0 - .quad 0x045cddf4e,0x0e0ac139e - .quad 0x1a91647f2,0x169cf9eb0 - .quad 0x1a0f717c4,0x0170076fa + .long 0x493c7d27, 0x00000001 + .long 0xba4fc28e, 0x493c7d27 + .long 0xddc0152b, 0xf20c0dfe + .long 0x9e4addf8, 0xba4fc28e + .long 0x39d3b296, 0x3da6d0cb + .long 0x0715ce53, 0xddc0152b + .long 0x47db8317, 0x1c291d04 + .long 0x0d3b6092, 0x9e4addf8 + .long 0xc96cfdc0, 0x740eef02 + .long 0x878a92a7, 0x39d3b296 + .long 0xdaece73e, 0x083a6eec + .long 0xab7aff2a, 0x0715ce53 + .long 0x2162d385, 0xc49f4f67 + .long 0x83348832, 0x47db8317 + .long 0x299847d5, 0x2ad91c30 + .long 0xb9e02b86, 0x0d3b6092 + .long 0x18b33a4e, 0x6992cea2 + .long 0xb6dd949b, 0xc96cfdc0 + .long 0x78d9ccb7, 0x7e908048 + .long 0xbac2fd7b, 0x878a92a7 + .long 0xa60ce07b, 0x1b3d8f29 + .long 0xce7f39f4, 0xdaece73e + .long 0x61d82e56, 0xf1d0f55e + .long 0xd270f1a2, 0xab7aff2a + .long 0xc619809d, 0xa87ab8a8 + .long 0x2b3cac5d, 0x2162d385 + .long 0x65863b64, 0x8462d800 + .long 0x1b03397f, 0x83348832 + .long 0xebb883bd, 0x71d111a8 + .long 0xb3e32c28, 0x299847d5 + .long 0x064f7f26, 0xffd852c6 + .long 0xdd7e3b0c, 0xb9e02b86 + .long 0xf285651c, 0xdcb17aa4 + .long 0x10746f3c, 0x18b33a4e + .long 0xc7a68855, 0xf37c5aee + .long 0x271d9844, 0xb6dd949b + .long 0x8e766a0c, 0x6051d5a2 + .long 0x93a5f730, 0x78d9ccb7 + .long 0x6cb08e5c, 0x18b0d4ff + .long 0x6b749fb2, 0xbac2fd7b + .long 0x1393e203, 0x21f3d99c + .long 0xcec3662e, 0xa60ce07b + .long 0x96c515bb, 0x8f158014 + .long 0xe6fc4e6a, 0xce7f39f4 + .long 0x8227bb8a, 0xa00457f7 + .long 0xb0cd4768, 0x61d82e56 + .long 0x39c7ff35, 0x8d6d2c43 + .long 0xd7a4825c, 0xd270f1a2 + .long 0x0ab3844b, 0x00ac29cf + .long 0x0167d312, 0xc619809d + .long 0xf6076544, 0xe9adf796 + .long 0x26f6a60a, 0x2b3cac5d + .long 0xa741c1bf, 0x96638b34 + .long 0x98d8d9cb, 0x65863b64 + .long 0x49c3cc9c, 0xe0e9f351 + .long 0x68bce87a, 0x1b03397f + .long 0x57a3d037, 0x9af01f2d + .long 0x6956fc3b, 0xebb883bd + .long 0x42d98888, 0x2cff42cf + .long 0x3771e98f, 0xb3e32c28 + .long 0xb42ae3d9, 0x88f25a3a + .long 0x2178513a, 0x064f7f26 + .long 0xe0ac139e, 0x4e36f0b0 + .long 0x170076fa, 0xdd7e3b0c + .long 0x444dd413, 0xbd6f81f8 + .long 0x6f345e45, 0xf285651c + .long 0x41d17b64, 0x91c9bd4b + .long 0xff0dba97, 0x10746f3c + .long 0xa2b73df1, 0x885f087b + .long 0xf872e54c, 0xc7a68855 + .long 0x1e41e9fc, 0x4c144932 + .long 0x86d8e4d2, 0x271d9844 + .long 0x651bd98b, 0x52148f02 + .long 0x5bb8f1bc, 0x8e766a0c + .long 0xa90fd27a, 0xa3c6f37a + .long 0xb3af077a, 0x93a5f730 + .long 0x4984d782, 0xd7c0557f + .long 0xca6ef3ac, 0x6cb08e5c + .long 0x234e0b26, 0x63ded06a + .long 0xdd66cbbb, 0x6b749fb2 + .long 0x4597456a, 0x4d56973c + .long 0xe9e28eb4, 0x1393e203 + .long 0x7b3ff57a, 0x9669c9df + .long 0xc9c8b782, 0xcec3662e + .long 0x3f70cc6f, 0xe417f38a + .long 0x93e106a4, 0x96c515bb + .long 0x62ec6c6d, 0x4b9e0f71 + .long 0xd813b325, 0xe6fc4e6a + .long 0x0df04680, 0xd104b8fc + .long 0x2342001e, 0x8227bb8a + .long 0x0a2a8d7e, 0x5b397730 + .long 0x6d9a4957, 0xb0cd4768 + .long 0xe8b6368b, 0xe78eb416 + .long 0xd2c3ed1a, 0x39c7ff35 + .long 0x995a5724, 0x61ff0e01 + .long 0x9ef68d35, 0xd7a4825c + .long 0x0c139b31, 0x8d96551c + .long 0xf2271e60, 0x0ab3844b + .long 0x0b0bf8ca, 0x0bf80dd2 + .long 0x2664fd8b, 0x0167d312 + .long 0xed64812d, 0x8821abed + .long 0x02ee03b2, 0xf6076544 + .long 0x8604ae0f, 0x6a45d2b2 + .long 0x363bd6b3, 0x26f6a60a + .long 0x135c83fd, 0xd8d26619 + .long 0x5fabe670, 0xa741c1bf + .long 0x35ec3279, 0xde87806c + .long 0x00bcf5f6, 0x98d8d9cb + .long 0x8ae00689, 0x14338754 + .long 0x17f27698, 0x49c3cc9c + .long 0x58ca5f00, 0x5bd2011f + .long 0xaa7c7ad5, 0x68bce87a + .long 0xb5cfca28, 0xdd07448e + .long 0xded288f8, 0x57a3d037 + .long 0x59f229bc, 0xdde8f5b9 + .long 0x6d390dec, 0x6956fc3b + .long 0x37170390, 0xa3e3e02c + .long 0x6353c1cc, 0x42d98888 + .long 0xc4584f5c, 0xd73c7bea + .long 0xf48642e9, 0x3771e98f + .long 0x531377e2, 0x80ff0093 + .long 0xdd35bc8d, 0xb42ae3d9 + .long 0xb25b29f2, 0x8fe4c34d + .long 0x9a5ede41, 0x2178513a + .long 0xa563905d, 0xdf99fc11 + .long 0x45cddf4e, 0xe0ac139e + .long 0xacfa3103, 0x6c23e841 + .long 0xa51b6135, 0x170076fa diff --git a/arch/x86/crypto/des3_ede-asm_64.S b/arch/x86/crypto/des3_ede-asm_64.S new file mode 100644 index 000000000000..038f6ae87c5e --- /dev/null +++ b/arch/x86/crypto/des3_ede-asm_64.S @@ -0,0 +1,805 @@ +/* + * des3_ede-asm_64.S - x86-64 assembly implementation of 3DES cipher + * + * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/linkage.h> + +.file "des3_ede-asm_64.S" +.text + +#define s1 .L_s1 +#define s2 ((s1) + (64*8)) +#define s3 ((s2) + (64*8)) +#define s4 ((s3) + (64*8)) +#define s5 ((s4) + (64*8)) +#define s6 ((s5) + (64*8)) +#define s7 ((s6) + (64*8)) +#define s8 ((s7) + (64*8)) + +/* register macros */ +#define CTX %rdi + +#define RL0 %r8 +#define RL1 %r9 +#define RL2 %r10 + +#define RL0d %r8d +#define RL1d %r9d +#define RL2d %r10d + +#define RR0 %r11 +#define RR1 %r12 +#define RR2 %r13 + +#define RR0d %r11d +#define RR1d %r12d +#define RR2d %r13d + +#define RW0 %rax +#define RW1 %rbx +#define RW2 %rcx + +#define RW0d %eax +#define RW1d %ebx +#define RW2d %ecx + +#define RW0bl %al +#define RW1bl %bl +#define RW2bl %cl + +#define RW0bh %ah +#define RW1bh %bh +#define RW2bh %ch + +#define RT0 %r15 +#define RT1 %rbp +#define RT2 %r14 +#define RT3 %rdx + +#define RT0d %r15d +#define RT1d %ebp +#define RT2d %r14d +#define RT3d %edx + +/*********************************************************************** + * 1-way 3DES + ***********************************************************************/ +#define do_permutation(a, b, offset, mask) \ + movl a, RT0d; \ + shrl $(offset), RT0d; \ + xorl b, RT0d; \ + andl $(mask), RT0d; \ + xorl RT0d, b; \ + shll $(offset), RT0d; \ + xorl RT0d, a; + +#define expand_to_64bits(val, mask) \ + movl val##d, RT0d; \ + rorl $4, RT0d; \ + shlq $32, RT0; \ + orq RT0, val; \ + andq mask, val; + +#define compress_to_64bits(val) \ + movq val, RT0; \ + shrq $32, RT0; \ + roll $4, RT0d; \ + orl RT0d, val##d; + +#define initial_permutation(left, right) \ + do_permutation(left##d, right##d, 4, 0x0f0f0f0f); \ + do_permutation(left##d, right##d, 16, 0x0000ffff); \ + do_permutation(right##d, left##d, 2, 0x33333333); \ + do_permutation(right##d, left##d, 8, 0x00ff00ff); \ + movabs $0x3f3f3f3f3f3f3f3f, RT3; \ + movl left##d, RW0d; \ + roll $1, right##d; \ + xorl right##d, RW0d; \ + andl $0xaaaaaaaa, RW0d; \ + xorl RW0d, left##d; \ + xorl RW0d, right##d; \ + roll $1, left##d; \ + expand_to_64bits(right, RT3); \ + expand_to_64bits(left, RT3); + +#define final_permutation(left, right) \ + compress_to_64bits(right); \ + compress_to_64bits(left); \ + movl right##d, RW0d; \ + rorl $1, left##d; \ + xorl left##d, RW0d; \ + andl $0xaaaaaaaa, RW0d; \ + xorl RW0d, right##d; \ + xorl RW0d, left##d; \ + rorl $1, right##d; \ + do_permutation(right##d, left##d, 8, 0x00ff00ff); \ + do_permutation(right##d, left##d, 2, 0x33333333); \ + do_permutation(left##d, right##d, 16, 0x0000ffff); \ + do_permutation(left##d, right##d, 4, 0x0f0f0f0f); + +#define round1(n, from, to, load_next_key) \ + xorq from, RW0; \ + \ + movzbl RW0bl, RT0d; \ + movzbl RW0bh, RT1d; \ + shrq $16, RW0; \ + movzbl RW0bl, RT2d; \ + movzbl RW0bh, RT3d; \ + shrq $16, RW0; \ + movq s8(, RT0, 8), RT0; \ + xorq s6(, RT1, 8), to; \ + movzbl RW0bl, RL1d; \ + movzbl RW0bh, RT1d; \ + shrl $16, RW0d; \ + xorq s4(, RT2, 8), RT0; \ + xorq s2(, RT3, 8), to; \ + movzbl RW0bl, RT2d; \ + movzbl RW0bh, RT3d; \ + xorq s7(, RL1, 8), RT0; \ + xorq s5(, RT1, 8), to; \ + xorq s3(, RT2, 8), RT0; \ + load_next_key(n, RW0); \ + xorq RT0, to; \ + xorq s1(, RT3, 8), to; \ + +#define load_next_key(n, RWx) \ + movq (((n) + 1) * 8)(CTX), RWx; + +#define dummy2(a, b) /*_*/ + +#define read_block(io, left, right) \ + movl (io), left##d; \ + movl 4(io), right##d; \ + bswapl left##d; \ + bswapl right##d; + +#define write_block(io, left, right) \ + bswapl left##d; \ + bswapl right##d; \ + movl left##d, (io); \ + movl right##d, 4(io); + +ENTRY(des3_ede_x86_64_crypt_blk) + /* input: + * %rdi: round keys, CTX + * %rsi: dst + * %rdx: src + */ + pushq %rbp; + pushq %rbx; + pushq %r12; + pushq %r13; + pushq %r14; + pushq %r15; + + read_block(%rdx, RL0, RR0); + initial_permutation(RL0, RR0); + + movq (CTX), RW0; + + round1(0, RR0, RL0, load_next_key); + round1(1, RL0, RR0, load_next_key); + round1(2, RR0, RL0, load_next_key); + round1(3, RL0, RR0, load_next_key); + round1(4, RR0, RL0, load_next_key); + round1(5, RL0, RR0, load_next_key); + round1(6, RR0, RL0, load_next_key); + round1(7, RL0, RR0, load_next_key); + round1(8, RR0, RL0, load_next_key); + round1(9, RL0, RR0, load_next_key); + round1(10, RR0, RL0, load_next_key); + round1(11, RL0, RR0, load_next_key); + round1(12, RR0, RL0, load_next_key); + round1(13, RL0, RR0, load_next_key); + round1(14, RR0, RL0, load_next_key); + round1(15, RL0, RR0, load_next_key); + + round1(16+0, RL0, RR0, load_next_key); + round1(16+1, RR0, RL0, load_next_key); + round1(16+2, RL0, RR0, load_next_key); + round1(16+3, RR0, RL0, load_next_key); + round1(16+4, RL0, RR0, load_next_key); + round1(16+5, RR0, RL0, load_next_key); + round1(16+6, RL0, RR0, load_next_key); + round1(16+7, RR0, RL0, load_next_key); + round1(16+8, RL0, RR0, load_next_key); + round1(16+9, RR0, RL0, load_next_key); + round1(16+10, RL0, RR0, load_next_key); + round1(16+11, RR0, RL0, load_next_key); + round1(16+12, RL0, RR0, load_next_key); + round1(16+13, RR0, RL0, load_next_key); + round1(16+14, RL0, RR0, load_next_key); + round1(16+15, RR0, RL0, load_next_key); + + round1(32+0, RR0, RL0, load_next_key); + round1(32+1, RL0, RR0, load_next_key); + round1(32+2, RR0, RL0, load_next_key); + round1(32+3, RL0, RR0, load_next_key); + round1(32+4, RR0, RL0, load_next_key); + round1(32+5, RL0, RR0, load_next_key); + round1(32+6, RR0, RL0, load_next_key); + round1(32+7, RL0, RR0, load_next_key); + round1(32+8, RR0, RL0, load_next_key); + round1(32+9, RL0, RR0, load_next_key); + round1(32+10, RR0, RL0, load_next_key); + round1(32+11, RL0, RR0, load_next_key); + round1(32+12, RR0, RL0, load_next_key); + round1(32+13, RL0, RR0, load_next_key); + round1(32+14, RR0, RL0, load_next_key); + round1(32+15, RL0, RR0, dummy2); + + final_permutation(RR0, RL0); + write_block(%rsi, RR0, RL0); + + popq %r15; + popq %r14; + popq %r13; + popq %r12; + popq %rbx; + popq %rbp; + + ret; +ENDPROC(des3_ede_x86_64_crypt_blk) + +/*********************************************************************** + * 3-way 3DES + ***********************************************************************/ +#define expand_to_64bits(val, mask) \ + movl val##d, RT0d; \ + rorl $4, RT0d; \ + shlq $32, RT0; \ + orq RT0, val; \ + andq mask, val; + +#define compress_to_64bits(val) \ + movq val, RT0; \ + shrq $32, RT0; \ + roll $4, RT0d; \ + orl RT0d, val##d; + +#define initial_permutation3(left, right) \ + do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \ + do_permutation(left##0d, right##0d, 16, 0x0000ffff); \ + do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \ + do_permutation(left##1d, right##1d, 16, 0x0000ffff); \ + do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f); \ + do_permutation(left##2d, right##2d, 16, 0x0000ffff); \ + \ + do_permutation(right##0d, left##0d, 2, 0x33333333); \ + do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \ + do_permutation(right##1d, left##1d, 2, 0x33333333); \ + do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \ + do_permutation(right##2d, left##2d, 2, 0x33333333); \ + do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \ + \ + movabs $0x3f3f3f3f3f3f3f3f, RT3; \ + \ + movl left##0d, RW0d; \ + roll $1, right##0d; \ + xorl right##0d, RW0d; \ + andl $0xaaaaaaaa, RW0d; \ + xorl RW0d, left##0d; \ + xorl RW0d, right##0d; \ + roll $1, left##0d; \ + expand_to_64bits(right##0, RT3); \ + expand_to_64bits(left##0, RT3); \ + movl left##1d, RW1d; \ + roll $1, right##1d; \ + xorl right##1d, RW1d; \ + andl $0xaaaaaaaa, RW1d; \ + xorl RW1d, left##1d; \ + xorl RW1d, right##1d; \ + roll $1, left##1d; \ + expand_to_64bits(right##1, RT3); \ + expand_to_64bits(left##1, RT3); \ + movl left##2d, RW2d; \ + roll $1, right##2d; \ + xorl right##2d, RW2d; \ + andl $0xaaaaaaaa, RW2d; \ + xorl RW2d, left##2d; \ + xorl RW2d, right##2d; \ + roll $1, left##2d; \ + expand_to_64bits(right##2, RT3); \ + expand_to_64bits(left##2, RT3); + +#define final_permutation3(left, right) \ + compress_to_64bits(right##0); \ + compress_to_64bits(left##0); \ + movl right##0d, RW0d; \ + rorl $1, left##0d; \ + xorl left##0d, RW0d; \ + andl $0xaaaaaaaa, RW0d; \ + xorl RW0d, right##0d; \ + xorl RW0d, left##0d; \ + rorl $1, right##0d; \ + compress_to_64bits(right##1); \ + compress_to_64bits(left##1); \ + movl right##1d, RW1d; \ + rorl $1, left##1d; \ + xorl left##1d, RW1d; \ + andl $0xaaaaaaaa, RW1d; \ + xorl RW1d, right##1d; \ + xorl RW1d, left##1d; \ + rorl $1, right##1d; \ + compress_to_64bits(right##2); \ + compress_to_64bits(left##2); \ + movl right##2d, RW2d; \ + rorl $1, left##2d; \ + xorl left##2d, RW2d; \ + andl $0xaaaaaaaa, RW2d; \ + xorl RW2d, right##2d; \ + xorl RW2d, left##2d; \ + rorl $1, right##2d; \ + \ + do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \ + do_permutation(right##0d, left##0d, 2, 0x33333333); \ + do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \ + do_permutation(right##1d, left##1d, 2, 0x33333333); \ + do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \ + do_permutation(right##2d, left##2d, 2, 0x33333333); \ + \ + do_permutation(left##0d, right##0d, 16, 0x0000ffff); \ + do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \ + do_permutation(left##1d, right##1d, 16, 0x0000ffff); \ + do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \ + do_permutation(left##2d, right##2d, 16, 0x0000ffff); \ + do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f); + +#define round3(n, from, to, load_next_key, do_movq) \ + xorq from##0, RW0; \ + movzbl RW0bl, RT3d; \ + movzbl RW0bh, RT1d; \ + shrq $16, RW0; \ + xorq s8(, RT3, 8), to##0; \ + xorq s6(, RT1, 8), to##0; \ + movzbl RW0bl, RT3d; \ + movzbl RW0bh, RT1d; \ + shrq $16, RW0; \ + xorq s4(, RT3, 8), to##0; \ + xorq s2(, RT1, 8), to##0; \ + movzbl RW0bl, RT3d; \ + movzbl RW0bh, RT1d; \ + shrl $16, RW0d; \ + xorq s7(, RT3, 8), to##0; \ + xorq s5(, RT1, 8), to##0; \ + movzbl RW0bl, RT3d; \ + movzbl RW0bh, RT1d; \ + load_next_key(n, RW0); \ + xorq s3(, RT3, 8), to##0; \ + xorq s1(, RT1, 8), to##0; \ + xorq from##1, RW1; \ + movzbl RW1bl, RT3d; \ + movzbl RW1bh, RT1d; \ + shrq $16, RW1; \ + xorq s8(, RT3, 8), to##1; \ + xorq s6(, RT1, 8), to##1; \ + movzbl RW1bl, RT3d; \ + movzbl RW1bh, RT1d; \ + shrq $16, RW1; \ + xorq s4(, RT3, 8), to##1; \ + xorq s2(, RT1, 8), to##1; \ + movzbl RW1bl, RT3d; \ + movzbl RW1bh, RT1d; \ + shrl $16, RW1d; \ + xorq s7(, RT3, 8), to##1; \ + xorq s5(, RT1, 8), to##1; \ + movzbl RW1bl, RT3d; \ + movzbl RW1bh, RT1d; \ + do_movq(RW0, RW1); \ + xorq s3(, RT3, 8), to##1; \ + xorq s1(, RT1, 8), to##1; \ + xorq from##2, RW2; \ + movzbl RW2bl, RT3d; \ + movzbl RW2bh, RT1d; \ + shrq $16, RW2; \ + xorq s8(, RT3, 8), to##2; \ + xorq s6(, RT1, 8), to##2; \ + movzbl RW2bl, RT3d; \ + movzbl RW2bh, RT1d; \ + shrq $16, RW2; \ + xorq s4(, RT3, 8), to##2; \ + xorq s2(, RT1, 8), to##2; \ + movzbl RW2bl, RT3d; \ + movzbl RW2bh, RT1d; \ + shrl $16, RW2d; \ + xorq s7(, RT3, 8), to##2; \ + xorq s5(, RT1, 8), to##2; \ + movzbl RW2bl, RT3d; \ + movzbl RW2bh, RT1d; \ + do_movq(RW0, RW2); \ + xorq s3(, RT3, 8), to##2; \ + xorq s1(, RT1, 8), to##2; + +#define __movq(src, dst) \ + movq src, dst; + +ENTRY(des3_ede_x86_64_crypt_blk_3way) + /* input: + * %rdi: ctx, round keys + * %rsi: dst (3 blocks) + * %rdx: src (3 blocks) + */ + + pushq %rbp; + pushq %rbx; + pushq %r12; + pushq %r13; + pushq %r14; + pushq %r15; + + /* load input */ + movl 0 * 4(%rdx), RL0d; + movl 1 * 4(%rdx), RR0d; + movl 2 * 4(%rdx), RL1d; + movl 3 * 4(%rdx), RR1d; + movl 4 * 4(%rdx), RL2d; + movl 5 * 4(%rdx), RR2d; + + bswapl RL0d; + bswapl RR0d; + bswapl RL1d; + bswapl RR1d; + bswapl RL2d; + bswapl RR2d; + + initial_permutation3(RL, RR); + + movq 0(CTX), RW0; + movq RW0, RW1; + movq RW0, RW2; + + round3(0, RR, RL, load_next_key, __movq); + round3(1, RL, RR, load_next_key, __movq); + round3(2, RR, RL, load_next_key, __movq); + round3(3, RL, RR, load_next_key, __movq); + round3(4, RR, RL, load_next_key, __movq); + round3(5, RL, RR, load_next_key, __movq); + round3(6, RR, RL, load_next_key, __movq); + round3(7, RL, RR, load_next_key, __movq); + round3(8, RR, RL, load_next_key, __movq); + round3(9, RL, RR, load_next_key, __movq); + round3(10, RR, RL, load_next_key, __movq); + round3(11, RL, RR, load_next_key, __movq); + round3(12, RR, RL, load_next_key, __movq); + round3(13, RL, RR, load_next_key, __movq); + round3(14, RR, RL, load_next_key, __movq); + round3(15, RL, RR, load_next_key, __movq); + + round3(16+0, RL, RR, load_next_key, __movq); + round3(16+1, RR, RL, load_next_key, __movq); + round3(16+2, RL, RR, load_next_key, __movq); + round3(16+3, RR, RL, load_next_key, __movq); + round3(16+4, RL, RR, load_next_key, __movq); + round3(16+5, RR, RL, load_next_key, __movq); + round3(16+6, RL, RR, load_next_key, __movq); + round3(16+7, RR, RL, load_next_key, __movq); + round3(16+8, RL, RR, load_next_key, __movq); + round3(16+9, RR, RL, load_next_key, __movq); + round3(16+10, RL, RR, load_next_key, __movq); + round3(16+11, RR, RL, load_next_key, __movq); + round3(16+12, RL, RR, load_next_key, __movq); + round3(16+13, RR, RL, load_next_key, __movq); + round3(16+14, RL, RR, load_next_key, __movq); + round3(16+15, RR, RL, load_next_key, __movq); + + round3(32+0, RR, RL, load_next_key, __movq); + round3(32+1, RL, RR, load_next_key, __movq); + round3(32+2, RR, RL, load_next_key, __movq); + round3(32+3, RL, RR, load_next_key, __movq); + round3(32+4, RR, RL, load_next_key, __movq); + round3(32+5, RL, RR, load_next_key, __movq); + round3(32+6, RR, RL, load_next_key, __movq); + round3(32+7, RL, RR, load_next_key, __movq); + round3(32+8, RR, RL, load_next_key, __movq); + round3(32+9, RL, RR, load_next_key, __movq); + round3(32+10, RR, RL, load_next_key, __movq); + round3(32+11, RL, RR, load_next_key, __movq); + round3(32+12, RR, RL, load_next_key, __movq); + round3(32+13, RL, RR, load_next_key, __movq); + round3(32+14, RR, RL, load_next_key, __movq); + round3(32+15, RL, RR, dummy2, dummy2); + + final_permutation3(RR, RL); + + bswapl RR0d; + bswapl RL0d; + bswapl RR1d; + bswapl RL1d; + bswapl RR2d; + bswapl RL2d; + + movl RR0d, 0 * 4(%rsi); + movl RL0d, 1 * 4(%rsi); + movl RR1d, 2 * 4(%rsi); + movl RL1d, 3 * 4(%rsi); + movl RR2d, 4 * 4(%rsi); + movl RL2d, 5 * 4(%rsi); + + popq %r15; + popq %r14; + popq %r13; + popq %r12; + popq %rbx; + popq %rbp; + + ret; +ENDPROC(des3_ede_x86_64_crypt_blk_3way) + +.data +.align 16 +.L_s1: + .quad 0x0010100001010400, 0x0000000000000000 + .quad 0x0000100000010000, 0x0010100001010404 + .quad 0x0010100001010004, 0x0000100000010404 + .quad 0x0000000000000004, 0x0000100000010000 + .quad 0x0000000000000400, 0x0010100001010400 + .quad 0x0010100001010404, 0x0000000000000400 + .quad 0x0010000001000404, 0x0010100001010004 + .quad 0x0010000001000000, 0x0000000000000004 + .quad 0x0000000000000404, 0x0010000001000400 + .quad 0x0010000001000400, 0x0000100000010400 + .quad 0x0000100000010400, 0x0010100001010000 + .quad 0x0010100001010000, 0x0010000001000404 + .quad 0x0000100000010004, 0x0010000001000004 + .quad 0x0010000001000004, 0x0000100000010004 + .quad 0x0000000000000000, 0x0000000000000404 + .quad 0x0000100000010404, 0x0010000001000000 + .quad 0x0000100000010000, 0x0010100001010404 + .quad 0x0000000000000004, 0x0010100001010000 + .quad 0x0010100001010400, 0x0010000001000000 + .quad 0x0010000001000000, 0x0000000000000400 + .quad 0x0010100001010004, 0x0000100000010000 + .quad 0x0000100000010400, 0x0010000001000004 + .quad 0x0000000000000400, 0x0000000000000004 + .quad 0x0010000001000404, 0x0000100000010404 + .quad 0x0010100001010404, 0x0000100000010004 + .quad 0x0010100001010000, 0x0010000001000404 + .quad 0x0010000001000004, 0x0000000000000404 + .quad 0x0000100000010404, 0x0010100001010400 + .quad 0x0000000000000404, 0x0010000001000400 + .quad 0x0010000001000400, 0x0000000000000000 + .quad 0x0000100000010004, 0x0000100000010400 + .quad 0x0000000000000000, 0x0010100001010004 +.L_s2: + .quad 0x0801080200100020, 0x0800080000000000 + .quad 0x0000080000000000, 0x0001080200100020 + .quad 0x0001000000100000, 0x0000000200000020 + .quad 0x0801000200100020, 0x0800080200000020 + .quad 0x0800000200000020, 0x0801080200100020 + .quad 0x0801080000100000, 0x0800000000000000 + .quad 0x0800080000000000, 0x0001000000100000 + .quad 0x0000000200000020, 0x0801000200100020 + .quad 0x0001080000100000, 0x0001000200100020 + .quad 0x0800080200000020, 0x0000000000000000 + .quad 0x0800000000000000, 0x0000080000000000 + .quad 0x0001080200100020, 0x0801000000100000 + .quad 0x0001000200100020, 0x0800000200000020 + .quad 0x0000000000000000, 0x0001080000100000 + .quad 0x0000080200000020, 0x0801080000100000 + .quad 0x0801000000100000, 0x0000080200000020 + .quad 0x0000000000000000, 0x0001080200100020 + .quad 0x0801000200100020, 0x0001000000100000 + .quad 0x0800080200000020, 0x0801000000100000 + .quad 0x0801080000100000, 0x0000080000000000 + .quad 0x0801000000100000, 0x0800080000000000 + .quad 0x0000000200000020, 0x0801080200100020 + .quad 0x0001080200100020, 0x0000000200000020 + .quad 0x0000080000000000, 0x0800000000000000 + .quad 0x0000080200000020, 0x0801080000100000 + .quad 0x0001000000100000, 0x0800000200000020 + .quad 0x0001000200100020, 0x0800080200000020 + .quad 0x0800000200000020, 0x0001000200100020 + .quad 0x0001080000100000, 0x0000000000000000 + .quad 0x0800080000000000, 0x0000080200000020 + .quad 0x0800000000000000, 0x0801000200100020 + .quad 0x0801080200100020, 0x0001080000100000 +.L_s3: + .quad 0x0000002000000208, 0x0000202008020200 + .quad 0x0000000000000000, 0x0000200008020008 + .quad 0x0000002008000200, 0x0000000000000000 + .quad 0x0000202000020208, 0x0000002008000200 + .quad 0x0000200000020008, 0x0000000008000008 + .quad 0x0000000008000008, 0x0000200000020000 + .quad 0x0000202008020208, 0x0000200000020008 + .quad 0x0000200008020000, 0x0000002000000208 + .quad 0x0000000008000000, 0x0000000000000008 + .quad 0x0000202008020200, 0x0000002000000200 + .quad 0x0000202000020200, 0x0000200008020000 + .quad 0x0000200008020008, 0x0000202000020208 + .quad 0x0000002008000208, 0x0000202000020200 + .quad 0x0000200000020000, 0x0000002008000208 + .quad 0x0000000000000008, 0x0000202008020208 + .quad 0x0000002000000200, 0x0000000008000000 + .quad 0x0000202008020200, 0x0000000008000000 + .quad 0x0000200000020008, 0x0000002000000208 + .quad 0x0000200000020000, 0x0000202008020200 + .quad 0x0000002008000200, 0x0000000000000000 + .quad 0x0000002000000200, 0x0000200000020008 + .quad 0x0000202008020208, 0x0000002008000200 + .quad 0x0000000008000008, 0x0000002000000200 + .quad 0x0000000000000000, 0x0000200008020008 + .quad 0x0000002008000208, 0x0000200000020000 + .quad 0x0000000008000000, 0x0000202008020208 + .quad 0x0000000000000008, 0x0000202000020208 + .quad 0x0000202000020200, 0x0000000008000008 + .quad 0x0000200008020000, 0x0000002008000208 + .quad 0x0000002000000208, 0x0000200008020000 + .quad 0x0000202000020208, 0x0000000000000008 + .quad 0x0000200008020008, 0x0000202000020200 +.L_s4: + .quad 0x1008020000002001, 0x1000020800002001 + .quad 0x1000020800002001, 0x0000000800000000 + .quad 0x0008020800002000, 0x1008000800000001 + .quad 0x1008000000000001, 0x1000020000002001 + .quad 0x0000000000000000, 0x0008020000002000 + .quad 0x0008020000002000, 0x1008020800002001 + .quad 0x1000000800000001, 0x0000000000000000 + .quad 0x0008000800000000, 0x1008000000000001 + .quad 0x1000000000000001, 0x0000020000002000 + .quad 0x0008000000000000, 0x1008020000002001 + .quad 0x0000000800000000, 0x0008000000000000 + .quad 0x1000020000002001, 0x0000020800002000 + .quad 0x1008000800000001, 0x1000000000000001 + .quad 0x0000020800002000, 0x0008000800000000 + .quad 0x0000020000002000, 0x0008020800002000 + .quad 0x1008020800002001, 0x1000000800000001 + .quad 0x0008000800000000, 0x1008000000000001 + .quad 0x0008020000002000, 0x1008020800002001 + .quad 0x1000000800000001, 0x0000000000000000 + .quad 0x0000000000000000, 0x0008020000002000 + .quad 0x0000020800002000, 0x0008000800000000 + .quad 0x1008000800000001, 0x1000000000000001 + .quad 0x1008020000002001, 0x1000020800002001 + .quad 0x1000020800002001, 0x0000000800000000 + .quad 0x1008020800002001, 0x1000000800000001 + .quad 0x1000000000000001, 0x0000020000002000 + .quad 0x1008000000000001, 0x1000020000002001 + .quad 0x0008020800002000, 0x1008000800000001 + .quad 0x1000020000002001, 0x0000020800002000 + .quad 0x0008000000000000, 0x1008020000002001 + .quad 0x0000000800000000, 0x0008000000000000 + .quad 0x0000020000002000, 0x0008020800002000 +.L_s5: + .quad 0x0000001000000100, 0x0020001002080100 + .quad 0x0020000002080000, 0x0420001002000100 + .quad 0x0000000000080000, 0x0000001000000100 + .quad 0x0400000000000000, 0x0020000002080000 + .quad 0x0400001000080100, 0x0000000000080000 + .quad 0x0020001002000100, 0x0400001000080100 + .quad 0x0420001002000100, 0x0420000002080000 + .quad 0x0000001000080100, 0x0400000000000000 + .quad 0x0020000002000000, 0x0400000000080000 + .quad 0x0400000000080000, 0x0000000000000000 + .quad 0x0400001000000100, 0x0420001002080100 + .quad 0x0420001002080100, 0x0020001002000100 + .quad 0x0420000002080000, 0x0400001000000100 + .quad 0x0000000000000000, 0x0420000002000000 + .quad 0x0020001002080100, 0x0020000002000000 + .quad 0x0420000002000000, 0x0000001000080100 + .quad 0x0000000000080000, 0x0420001002000100 + .quad 0x0000001000000100, 0x0020000002000000 + .quad 0x0400000000000000, 0x0020000002080000 + .quad 0x0420001002000100, 0x0400001000080100 + .quad 0x0020001002000100, 0x0400000000000000 + .quad 0x0420000002080000, 0x0020001002080100 + .quad 0x0400001000080100, 0x0000001000000100 + .quad 0x0020000002000000, 0x0420000002080000 + .quad 0x0420001002080100, 0x0000001000080100 + .quad 0x0420000002000000, 0x0420001002080100 + .quad 0x0020000002080000, 0x0000000000000000 + .quad 0x0400000000080000, 0x0420000002000000 + .quad 0x0000001000080100, 0x0020001002000100 + .quad 0x0400001000000100, 0x0000000000080000 + .quad 0x0000000000000000, 0x0400000000080000 + .quad 0x0020001002080100, 0x0400001000000100 +.L_s6: + .quad 0x0200000120000010, 0x0204000020000000 + .quad 0x0000040000000000, 0x0204040120000010 + .quad 0x0204000020000000, 0x0000000100000010 + .quad 0x0204040120000010, 0x0004000000000000 + .quad 0x0200040020000000, 0x0004040100000010 + .quad 0x0004000000000000, 0x0200000120000010 + .quad 0x0004000100000010, 0x0200040020000000 + .quad 0x0200000020000000, 0x0000040100000010 + .quad 0x0000000000000000, 0x0004000100000010 + .quad 0x0200040120000010, 0x0000040000000000 + .quad 0x0004040000000000, 0x0200040120000010 + .quad 0x0000000100000010, 0x0204000120000010 + .quad 0x0204000120000010, 0x0000000000000000 + .quad 0x0004040100000010, 0x0204040020000000 + .quad 0x0000040100000010, 0x0004040000000000 + .quad 0x0204040020000000, 0x0200000020000000 + .quad 0x0200040020000000, 0x0000000100000010 + .quad 0x0204000120000010, 0x0004040000000000 + .quad 0x0204040120000010, 0x0004000000000000 + .quad 0x0000040100000010, 0x0200000120000010 + .quad 0x0004000000000000, 0x0200040020000000 + .quad 0x0200000020000000, 0x0000040100000010 + .quad 0x0200000120000010, 0x0204040120000010 + .quad 0x0004040000000000, 0x0204000020000000 + .quad 0x0004040100000010, 0x0204040020000000 + .quad 0x0000000000000000, 0x0204000120000010 + .quad 0x0000000100000010, 0x0000040000000000 + .quad 0x0204000020000000, 0x0004040100000010 + .quad 0x0000040000000000, 0x0004000100000010 + .quad 0x0200040120000010, 0x0000000000000000 + .quad 0x0204040020000000, 0x0200000020000000 + .quad 0x0004000100000010, 0x0200040120000010 +.L_s7: + .quad 0x0002000000200000, 0x2002000004200002 + .quad 0x2000000004000802, 0x0000000000000000 + .quad 0x0000000000000800, 0x2000000004000802 + .quad 0x2002000000200802, 0x0002000004200800 + .quad 0x2002000004200802, 0x0002000000200000 + .quad 0x0000000000000000, 0x2000000004000002 + .quad 0x2000000000000002, 0x0000000004000000 + .quad 0x2002000004200002, 0x2000000000000802 + .quad 0x0000000004000800, 0x2002000000200802 + .quad 0x2002000000200002, 0x0000000004000800 + .quad 0x2000000004000002, 0x0002000004200000 + .quad 0x0002000004200800, 0x2002000000200002 + .quad 0x0002000004200000, 0x0000000000000800 + .quad 0x2000000000000802, 0x2002000004200802 + .quad 0x0002000000200800, 0x2000000000000002 + .quad 0x0000000004000000, 0x0002000000200800 + .quad 0x0000000004000000, 0x0002000000200800 + .quad 0x0002000000200000, 0x2000000004000802 + .quad 0x2000000004000802, 0x2002000004200002 + .quad 0x2002000004200002, 0x2000000000000002 + .quad 0x2002000000200002, 0x0000000004000000 + .quad 0x0000000004000800, 0x0002000000200000 + .quad 0x0002000004200800, 0x2000000000000802 + .quad 0x2002000000200802, 0x0002000004200800 + .quad 0x2000000000000802, 0x2000000004000002 + .quad 0x2002000004200802, 0x0002000004200000 + .quad 0x0002000000200800, 0x0000000000000000 + .quad 0x2000000000000002, 0x2002000004200802 + .quad 0x0000000000000000, 0x2002000000200802 + .quad 0x0002000004200000, 0x0000000000000800 + .quad 0x2000000004000002, 0x0000000004000800 + .quad 0x0000000000000800, 0x2002000000200002 +.L_s8: + .quad 0x0100010410001000, 0x0000010000001000 + .quad 0x0000000000040000, 0x0100010410041000 + .quad 0x0100000010000000, 0x0100010410001000 + .quad 0x0000000400000000, 0x0100000010000000 + .quad 0x0000000400040000, 0x0100000010040000 + .quad 0x0100010410041000, 0x0000010000041000 + .quad 0x0100010010041000, 0x0000010400041000 + .quad 0x0000010000001000, 0x0000000400000000 + .quad 0x0100000010040000, 0x0100000410000000 + .quad 0x0100010010001000, 0x0000010400001000 + .quad 0x0000010000041000, 0x0000000400040000 + .quad 0x0100000410040000, 0x0100010010041000 + .quad 0x0000010400001000, 0x0000000000000000 + .quad 0x0000000000000000, 0x0100000410040000 + .quad 0x0100000410000000, 0x0100010010001000 + .quad 0x0000010400041000, 0x0000000000040000 + .quad 0x0000010400041000, 0x0000000000040000 + .quad 0x0100010010041000, 0x0000010000001000 + .quad 0x0000000400000000, 0x0100000410040000 + .quad 0x0000010000001000, 0x0000010400041000 + .quad 0x0100010010001000, 0x0000000400000000 + .quad 0x0100000410000000, 0x0100000010040000 + .quad 0x0100000410040000, 0x0100000010000000 + .quad 0x0000000000040000, 0x0100010410001000 + .quad 0x0000000000000000, 0x0100010410041000 + .quad 0x0000000400040000, 0x0100000410000000 + .quad 0x0100000010040000, 0x0100010010001000 + .quad 0x0100010410001000, 0x0000000000000000 + .quad 0x0100010410041000, 0x0000010000041000 + .quad 0x0000010000041000, 0x0000010400001000 + .quad 0x0000010400001000, 0x0000000400040000 + .quad 0x0100000010000000, 0x0100010010041000 diff --git a/arch/x86/crypto/des3_ede_glue.c b/arch/x86/crypto/des3_ede_glue.c new file mode 100644 index 000000000000..0e9c0668fe4e --- /dev/null +++ b/arch/x86/crypto/des3_ede_glue.c @@ -0,0 +1,509 @@ +/* + * Glue Code for assembler optimized version of 3DES + * + * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> + * + * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: + * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> + * CTR part based on code (crypto/ctr.c) by: + * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <asm/processor.h> +#include <crypto/des.h> +#include <linux/crypto.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/types.h> +#include <crypto/algapi.h> + +struct des3_ede_x86_ctx { + u32 enc_expkey[DES3_EDE_EXPKEY_WORDS]; + u32 dec_expkey[DES3_EDE_EXPKEY_WORDS]; +}; + +/* regular block cipher functions */ +asmlinkage void des3_ede_x86_64_crypt_blk(const u32 *expkey, u8 *dst, + const u8 *src); + +/* 3-way parallel cipher functions */ +asmlinkage void des3_ede_x86_64_crypt_blk_3way(const u32 *expkey, u8 *dst, + const u8 *src); + +static inline void des3_ede_enc_blk(struct des3_ede_x86_ctx *ctx, u8 *dst, + const u8 *src) +{ + u32 *enc_ctx = ctx->enc_expkey; + + des3_ede_x86_64_crypt_blk(enc_ctx, dst, src); +} + +static inline void des3_ede_dec_blk(struct des3_ede_x86_ctx *ctx, u8 *dst, + const u8 *src) +{ + u32 *dec_ctx = ctx->dec_expkey; + + des3_ede_x86_64_crypt_blk(dec_ctx, dst, src); +} + +static inline void des3_ede_enc_blk_3way(struct des3_ede_x86_ctx *ctx, u8 *dst, + const u8 *src) +{ + u32 *enc_ctx = ctx->enc_expkey; + + des3_ede_x86_64_crypt_blk_3way(enc_ctx, dst, src); +} + +static inline void des3_ede_dec_blk_3way(struct des3_ede_x86_ctx *ctx, u8 *dst, + const u8 *src) +{ + u32 *dec_ctx = ctx->dec_expkey; + + des3_ede_x86_64_crypt_blk_3way(dec_ctx, dst, src); +} + +static void des3_ede_x86_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + des3_ede_enc_blk(crypto_tfm_ctx(tfm), dst, src); +} + +static void des3_ede_x86_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + des3_ede_dec_blk(crypto_tfm_ctx(tfm), dst, src); +} + +static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, + const u32 *expkey) +{ + unsigned int bsize = DES3_EDE_BLOCK_SIZE; + unsigned int nbytes; + int err; + + err = blkcipher_walk_virt(desc, walk); + + while ((nbytes = walk->nbytes)) { + u8 *wsrc = walk->src.virt.addr; + u8 *wdst = walk->dst.virt.addr; + + /* Process four block batch */ + if (nbytes >= bsize * 3) { + do { + des3_ede_x86_64_crypt_blk_3way(expkey, wdst, + wsrc); + + wsrc += bsize * 3; + wdst += bsize * 3; + nbytes -= bsize * 3; + } while (nbytes >= bsize * 3); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + do { + des3_ede_x86_64_crypt_blk(expkey, wdst, wsrc); + + wsrc += bsize; + wdst += bsize; + nbytes -= bsize; + } while (nbytes >= bsize); + +done: + err = blkcipher_walk_done(desc, walk, nbytes); + } + + return err; +} + +static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_crypt(desc, &walk, ctx->enc_expkey); +} + +static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_crypt(desc, &walk, ctx->dec_expkey); +} + +static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + unsigned int bsize = DES3_EDE_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u64 *src = (u64 *)walk->src.virt.addr; + u64 *dst = (u64 *)walk->dst.virt.addr; + u64 *iv = (u64 *)walk->iv; + + do { + *dst = *src ^ *iv; + des3_ede_enc_blk(ctx, (u8 *)dst, (u8 *)dst); + iv = dst; + + src += 1; + dst += 1; + nbytes -= bsize; + } while (nbytes >= bsize); + + *(u64 *)walk->iv = *iv; + return nbytes; +} + +static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + while ((nbytes = walk.nbytes)) { + nbytes = __cbc_encrypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + return err; +} + +static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + unsigned int bsize = DES3_EDE_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u64 *src = (u64 *)walk->src.virt.addr; + u64 *dst = (u64 *)walk->dst.virt.addr; + u64 ivs[3 - 1]; + u64 last_iv; + + /* Start of the last block. */ + src += nbytes / bsize - 1; + dst += nbytes / bsize - 1; + + last_iv = *src; + + /* Process four block batch */ + if (nbytes >= bsize * 3) { + do { + nbytes -= bsize * 3 - bsize; + src -= 3 - 1; + dst -= 3 - 1; + + ivs[0] = src[0]; + ivs[1] = src[1]; + + des3_ede_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src); + + dst[1] ^= ivs[0]; + dst[2] ^= ivs[1]; + + nbytes -= bsize; + if (nbytes < bsize) + goto done; + + *dst ^= *(src - 1); + src -= 1; + dst -= 1; + } while (nbytes >= bsize * 3); + } + + /* Handle leftovers */ + for (;;) { + des3_ede_dec_blk(ctx, (u8 *)dst, (u8 *)src); + + nbytes -= bsize; + if (nbytes < bsize) + break; + + *dst ^= *(src - 1); + src -= 1; + dst -= 1; + } + +done: + *dst ^= *(u64 *)walk->iv; + *(u64 *)walk->iv = last_iv; + + return nbytes; +} + +static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + while ((nbytes = walk.nbytes)) { + nbytes = __cbc_decrypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + return err; +} + +static void ctr_crypt_final(struct des3_ede_x86_ctx *ctx, + struct blkcipher_walk *walk) +{ + u8 *ctrblk = walk->iv; + u8 keystream[DES3_EDE_BLOCK_SIZE]; + u8 *src = walk->src.virt.addr; + u8 *dst = walk->dst.virt.addr; + unsigned int nbytes = walk->nbytes; + + des3_ede_enc_blk(ctx, keystream, ctrblk); + crypto_xor(keystream, src, nbytes); + memcpy(dst, keystream, nbytes); + + crypto_inc(ctrblk, DES3_EDE_BLOCK_SIZE); +} + +static unsigned int __ctr_crypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + unsigned int bsize = DES3_EDE_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + __be64 *src = (__be64 *)walk->src.virt.addr; + __be64 *dst = (__be64 *)walk->dst.virt.addr; + u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv); + __be64 ctrblocks[3]; + + /* Process four block batch */ + if (nbytes >= bsize * 3) { + do { + /* create ctrblks for parallel encrypt */ + ctrblocks[0] = cpu_to_be64(ctrblk++); + ctrblocks[1] = cpu_to_be64(ctrblk++); + ctrblocks[2] = cpu_to_be64(ctrblk++); + + des3_ede_enc_blk_3way(ctx, (u8 *)ctrblocks, + (u8 *)ctrblocks); + + dst[0] = src[0] ^ ctrblocks[0]; + dst[1] = src[1] ^ ctrblocks[1]; + dst[2] = src[2] ^ ctrblocks[2]; + + src += 3; + dst += 3; + } while ((nbytes -= bsize * 3) >= bsize * 3); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + do { + ctrblocks[0] = cpu_to_be64(ctrblk++); + + des3_ede_enc_blk(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks); + + dst[0] = src[0] ^ ctrblocks[0]; + + src += 1; + dst += 1; + } while ((nbytes -= bsize) >= bsize); + +done: + *(__be64 *)walk->iv = cpu_to_be64(ctrblk); + return nbytes; +} + +static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt_block(desc, &walk, DES3_EDE_BLOCK_SIZE); + + while ((nbytes = walk.nbytes) >= DES3_EDE_BLOCK_SIZE) { + nbytes = __ctr_crypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + if (walk.nbytes) { + ctr_crypt_final(crypto_blkcipher_ctx(desc->tfm), &walk); + err = blkcipher_walk_done(desc, &walk, 0); + } + + return err; +} + +static int des3_ede_x86_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + struct des3_ede_x86_ctx *ctx = crypto_tfm_ctx(tfm); + u32 i, j, tmp; + int err; + + /* Generate encryption context using generic implementation. */ + err = __des3_ede_setkey(ctx->enc_expkey, &tfm->crt_flags, key, keylen); + if (err < 0) + return err; + + /* Fix encryption context for this implementation and form decryption + * context. */ + j = DES3_EDE_EXPKEY_WORDS - 2; + for (i = 0; i < DES3_EDE_EXPKEY_WORDS; i += 2, j -= 2) { + tmp = ror32(ctx->enc_expkey[i + 1], 4); + ctx->enc_expkey[i + 1] = tmp; + + ctx->dec_expkey[j + 0] = ctx->enc_expkey[i + 0]; + ctx->dec_expkey[j + 1] = tmp; + } + + return 0; +} + +static struct crypto_alg des3_ede_algs[4] = { { + .cra_name = "des3_ede", + .cra_driver_name = "des3_ede-asm", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct des3_ede_x86_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_u = { + .cipher = { + .cia_min_keysize = DES3_EDE_KEY_SIZE, + .cia_max_keysize = DES3_EDE_KEY_SIZE, + .cia_setkey = des3_ede_x86_setkey, + .cia_encrypt = des3_ede_x86_encrypt, + .cia_decrypt = des3_ede_x86_decrypt, + } + } +}, { + .cra_name = "ecb(des3_ede)", + .cra_driver_name = "ecb-des3_ede-asm", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct des3_ede_x86_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .setkey = des3_ede_x86_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, + }, +}, { + .cra_name = "cbc(des3_ede)", + .cra_driver_name = "cbc-des3_ede-asm", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct des3_ede_x86_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .ivsize = DES3_EDE_BLOCK_SIZE, + .setkey = des3_ede_x86_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, + }, +}, { + .cra_name = "ctr(des3_ede)", + .cra_driver_name = "ctr-des3_ede-asm", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct des3_ede_x86_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .ivsize = DES3_EDE_BLOCK_SIZE, + .setkey = des3_ede_x86_setkey, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, + }, +} }; + +static bool is_blacklisted_cpu(void) +{ + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return false; + + if (boot_cpu_data.x86 == 0x0f) { + /* + * On Pentium 4, des3_ede-x86_64 is slower than generic C + * implementation because use of 64bit rotates (which are really + * slow on P4). Therefore blacklist P4s. + */ + return true; + } + + return false; +} + +static int force; +module_param(force, int, 0); +MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); + +static int __init des3_ede_x86_init(void) +{ + if (!force && is_blacklisted_cpu()) { + pr_info("des3_ede-x86_64: performance on this CPU would be suboptimal: disabling des3_ede-x86_64.\n"); + return -ENODEV; + } + + return crypto_register_algs(des3_ede_algs, ARRAY_SIZE(des3_ede_algs)); +} + +static void __exit des3_ede_x86_fini(void) +{ + crypto_unregister_algs(des3_ede_algs, ARRAY_SIZE(des3_ede_algs)); +} + +module_init(des3_ede_x86_init); +module_exit(des3_ede_x86_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Triple DES EDE Cipher Algorithm, asm optimized"); +MODULE_ALIAS("des3_ede"); +MODULE_ALIAS("des3_ede-asm"); +MODULE_ALIAS("des"); +MODULE_ALIAS("des-asm"); +MODULE_AUTHOR("Jussi Kivilinna <jussi.kivilinna@iki.fi>"); diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 19b0ebafcd3e..79752f2bdec5 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -99,7 +99,7 @@ static inline void native_apic_mem_write(u32 reg, u32 v) { volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg); - alternative_io("movl %0, %1", "xchgl %0, %1", X86_FEATURE_11AP, + alternative_io("movl %0, %1", "xchgl %0, %1", X86_BUG_11AP, ASM_OUTPUT2("=r" (v), "=m" (*addr)), ASM_OUTPUT2("0" (v), "m" (*addr))); } diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 5c7198cca5ed..0f4460b5636d 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -99,7 +99,7 @@ #if defined(CONFIG_X86_PPRO_FENCE) /* - * For either of these options x86 doesn't have a strong TSO memory + * For this option x86 doesn't have a strong TSO memory * model and we should fall back to full barriers. */ diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h index d47786acb016..99c105d78b7e 100644 --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h @@ -4,6 +4,8 @@ #include <linux/compiler.h> #include <asm/alternative.h> /* Provides LOCK_PREFIX */ +#define __HAVE_ARCH_CMPXCHG 1 + /* * Non-existant functions to indicate usage errors at link time * (or compile-time if the compiler implements __compiletime_error(). @@ -143,7 +145,6 @@ extern void __add_wrong_size(void) # include <asm/cmpxchg_64.h> #endif -#ifdef __HAVE_ARCH_CMPXCHG #define cmpxchg(ptr, old, new) \ __cmpxchg(ptr, old, new, sizeof(*(ptr))) @@ -152,7 +153,6 @@ extern void __add_wrong_size(void) #define cmpxchg_local(ptr, old, new) \ __cmpxchg_local(ptr, old, new, sizeof(*(ptr))) -#endif /* * xadd() adds "inc" to "*ptr" and atomically returns the previous diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index f8bf2eecab86..f7e142926481 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -34,8 +34,6 @@ static inline void set_64bit(volatile u64 *ptr, u64 value) : "memory"); } -#define __HAVE_ARCH_CMPXCHG 1 - #ifdef CONFIG_X86_CMPXCHG64 #define cmpxchg64(ptr, o, n) \ ((__typeof__(*(ptr)))__cmpxchg64((ptr), (unsigned long long)(o), \ diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h index 614be87f1a9b..1af94697aae5 100644 --- a/arch/x86/include/asm/cmpxchg_64.h +++ b/arch/x86/include/asm/cmpxchg_64.h @@ -6,8 +6,6 @@ static inline void set_64bit(volatile u64 *ptr, u64 val) *ptr = val; } -#define __HAVE_ARCH_CMPXCHG 1 - #define cmpxchg64(ptr, o, n) \ ({ \ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index e265ff95d16d..bb9b258d60e7 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -8,7 +8,7 @@ #include <asm/required-features.h> #endif -#define NCAPINTS 10 /* N 32-bit words worth of info */ +#define NCAPINTS 11 /* N 32-bit words worth of info */ #define NBUGINTS 1 /* N 32-bit bug flags */ /* @@ -18,213 +18,218 @@ */ /* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */ -#define X86_FEATURE_FPU (0*32+ 0) /* Onboard FPU */ -#define X86_FEATURE_VME (0*32+ 1) /* Virtual Mode Extensions */ -#define X86_FEATURE_DE (0*32+ 2) /* Debugging Extensions */ -#define X86_FEATURE_PSE (0*32+ 3) /* Page Size Extensions */ -#define X86_FEATURE_TSC (0*32+ 4) /* Time Stamp Counter */ -#define X86_FEATURE_MSR (0*32+ 5) /* Model-Specific Registers */ -#define X86_FEATURE_PAE (0*32+ 6) /* Physical Address Extensions */ -#define X86_FEATURE_MCE (0*32+ 7) /* Machine Check Exception */ -#define X86_FEATURE_CX8 (0*32+ 8) /* CMPXCHG8 instruction */ -#define X86_FEATURE_APIC (0*32+ 9) /* Onboard APIC */ -#define X86_FEATURE_SEP (0*32+11) /* SYSENTER/SYSEXIT */ -#define X86_FEATURE_MTRR (0*32+12) /* Memory Type Range Registers */ -#define X86_FEATURE_PGE (0*32+13) /* Page Global Enable */ -#define X86_FEATURE_MCA (0*32+14) /* Machine Check Architecture */ -#define X86_FEATURE_CMOV (0*32+15) /* CMOV instructions */ +#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */ +#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */ +#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */ +#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */ +#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */ +#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */ +#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */ +#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */ +#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */ +#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */ +#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */ +#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */ +#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */ +#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */ +#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions */ /* (plus FCMOVcc, FCOMI with FPU) */ -#define X86_FEATURE_PAT (0*32+16) /* Page Attribute Table */ -#define X86_FEATURE_PSE36 (0*32+17) /* 36-bit PSEs */ -#define X86_FEATURE_PN (0*32+18) /* Processor serial number */ -#define X86_FEATURE_CLFLUSH (0*32+19) /* CLFLUSH instruction */ -#define X86_FEATURE_DS (0*32+21) /* "dts" Debug Store */ -#define X86_FEATURE_ACPI (0*32+22) /* ACPI via MSR */ -#define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */ -#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */ -#define X86_FEATURE_XMM (0*32+25) /* "sse" */ -#define X86_FEATURE_XMM2 (0*32+26) /* "sse2" */ -#define X86_FEATURE_SELFSNOOP (0*32+27) /* "ss" CPU self snoop */ -#define X86_FEATURE_HT (0*32+28) /* Hyper-Threading */ -#define X86_FEATURE_ACC (0*32+29) /* "tm" Automatic clock control */ -#define X86_FEATURE_IA64 (0*32+30) /* IA-64 processor */ -#define X86_FEATURE_PBE (0*32+31) /* Pending Break Enable */ +#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */ +#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */ +#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */ +#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */ +#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */ +#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */ +#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */ +#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */ +#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */ +#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */ +#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */ +#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */ +#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */ +#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */ +#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */ /* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ /* Don't duplicate feature flags which are redundant with Intel! */ -#define X86_FEATURE_SYSCALL (1*32+11) /* SYSCALL/SYSRET */ -#define X86_FEATURE_MP (1*32+19) /* MP Capable. */ -#define X86_FEATURE_NX (1*32+20) /* Execute Disable */ -#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ -#define X86_FEATURE_FXSR_OPT (1*32+25) /* FXSAVE/FXRSTOR optimizations */ -#define X86_FEATURE_GBPAGES (1*32+26) /* "pdpe1gb" GB pages */ -#define X86_FEATURE_RDTSCP (1*32+27) /* RDTSCP */ -#define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */ -#define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */ -#define X86_FEATURE_3DNOW (1*32+31) /* 3DNow! */ +#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */ +#define X86_FEATURE_MP ( 1*32+19) /* MP Capable. */ +#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */ +#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */ +#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */ +#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */ +#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */ +#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64) */ +#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow! extensions */ +#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow! */ /* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */ -#define X86_FEATURE_RECOVERY (2*32+ 0) /* CPU in recovery mode */ -#define X86_FEATURE_LONGRUN (2*32+ 1) /* Longrun power control */ -#define X86_FEATURE_LRTI (2*32+ 3) /* LongRun table interface */ +#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */ +#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */ +#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */ /* Other features, Linux-defined mapping, word 3 */ /* This range is used for feature bits which conflict or are synthesized */ -#define X86_FEATURE_CXMMX (3*32+ 0) /* Cyrix MMX extensions */ -#define X86_FEATURE_K6_MTRR (3*32+ 1) /* AMD K6 nonstandard MTRRs */ -#define X86_FEATURE_CYRIX_ARR (3*32+ 2) /* Cyrix ARRs (= MTRRs) */ -#define X86_FEATURE_CENTAUR_MCR (3*32+ 3) /* Centaur MCRs (= MTRRs) */ +#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */ +#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */ +#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */ +#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */ /* cpu types for specific tunings: */ -#define X86_FEATURE_K8 (3*32+ 4) /* "" Opteron, Athlon64 */ -#define X86_FEATURE_K7 (3*32+ 5) /* "" Athlon */ -#define X86_FEATURE_P3 (3*32+ 6) /* "" P3 */ -#define X86_FEATURE_P4 (3*32+ 7) /* "" P4 */ -#define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */ -#define X86_FEATURE_UP (3*32+ 9) /* smp kernel running on up */ -#define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* "" FXSAVE leaks FOP/FIP/FOP */ -#define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */ -#define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */ -#define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */ -#define X86_FEATURE_SYSCALL32 (3*32+14) /* "" syscall in ia32 userspace */ -#define X86_FEATURE_SYSENTER32 (3*32+15) /* "" sysenter in ia32 userspace */ -#define X86_FEATURE_REP_GOOD (3*32+16) /* rep microcode works well */ -#define X86_FEATURE_MFENCE_RDTSC (3*32+17) /* "" Mfence synchronizes RDTSC */ -#define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* "" Lfence synchronizes RDTSC */ -#define X86_FEATURE_11AP (3*32+19) /* "" Bad local APIC aka 11AP */ -#define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */ -#define X86_FEATURE_ALWAYS (3*32+21) /* "" Always-present feature */ -#define X86_FEATURE_XTOPOLOGY (3*32+22) /* cpu topology enum extensions */ -#define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */ -#define X86_FEATURE_NONSTOP_TSC (3*32+24) /* TSC does not stop in C states */ -#define X86_FEATURE_CLFLUSH_MONITOR (3*32+25) /* "" clflush reqd with monitor */ -#define X86_FEATURE_EXTD_APICID (3*32+26) /* has extended APICID (8 bits) */ -#define X86_FEATURE_AMD_DCM (3*32+27) /* multi-node processor */ -#define X86_FEATURE_APERFMPERF (3*32+28) /* APERFMPERF */ -#define X86_FEATURE_EAGER_FPU (3*32+29) /* "eagerfpu" Non lazy FPU restore */ -#define X86_FEATURE_NONSTOP_TSC_S3 (3*32+30) /* TSC doesn't stop in S3 state */ +#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ +#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */ +#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ +#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ +#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ +#define X86_FEATURE_UP ( 3*32+ 9) /* smp kernel running on up */ +/* free, was #define X86_FEATURE_FXSAVE_LEAK ( 3*32+10) * "" FXSAVE leaks FOP/FIP/FOP */ +#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */ +#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */ +#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */ +#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */ +#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in ia32 userspace */ +#define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */ +#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */ +#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */ +/* free, was #define X86_FEATURE_11AP ( 3*32+19) * "" Bad local APIC aka 11AP */ +#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ +#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ +#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */ +#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */ +#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */ +/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */ +#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ +#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ +#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ +#define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */ +#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ -#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ -#define X86_FEATURE_PCLMULQDQ (4*32+ 1) /* PCLMULQDQ instruction */ -#define X86_FEATURE_DTES64 (4*32+ 2) /* 64-bit Debug Store */ -#define X86_FEATURE_MWAIT (4*32+ 3) /* "monitor" Monitor/Mwait support */ -#define X86_FEATURE_DSCPL (4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */ -#define X86_FEATURE_VMX (4*32+ 5) /* Hardware virtualization */ -#define X86_FEATURE_SMX (4*32+ 6) /* Safer mode */ -#define X86_FEATURE_EST (4*32+ 7) /* Enhanced SpeedStep */ -#define X86_FEATURE_TM2 (4*32+ 8) /* Thermal Monitor 2 */ -#define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */ -#define X86_FEATURE_CID (4*32+10) /* Context ID */ -#define X86_FEATURE_FMA (4*32+12) /* Fused multiply-add */ -#define X86_FEATURE_CX16 (4*32+13) /* CMPXCHG16B */ -#define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */ -#define X86_FEATURE_PDCM (4*32+15) /* Performance Capabilities */ -#define X86_FEATURE_PCID (4*32+17) /* Process Context Identifiers */ -#define X86_FEATURE_DCA (4*32+18) /* Direct Cache Access */ -#define X86_FEATURE_XMM4_1 (4*32+19) /* "sse4_1" SSE-4.1 */ -#define X86_FEATURE_XMM4_2 (4*32+20) /* "sse4_2" SSE-4.2 */ -#define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */ -#define X86_FEATURE_MOVBE (4*32+22) /* MOVBE instruction */ -#define X86_FEATURE_POPCNT (4*32+23) /* POPCNT instruction */ -#define X86_FEATURE_TSC_DEADLINE_TIMER (4*32+24) /* Tsc deadline timer */ -#define X86_FEATURE_AES (4*32+25) /* AES instructions */ -#define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ -#define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */ -#define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */ -#define X86_FEATURE_F16C (4*32+29) /* 16-bit fp conversions */ -#define X86_FEATURE_RDRAND (4*32+30) /* The RDRAND instruction */ -#define X86_FEATURE_HYPERVISOR (4*32+31) /* Running on a hypervisor */ +#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */ +#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */ +#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */ +#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" Monitor/Mwait support */ +#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */ +#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */ +#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer mode */ +#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */ +#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */ +#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */ +#define X86_FEATURE_CID ( 4*32+10) /* Context ID */ +#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */ +#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B */ +#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */ +#define X86_FEATURE_PDCM ( 4*32+15) /* Performance Capabilities */ +#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */ +#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */ +#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */ +#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */ +#define X86_FEATURE_X2APIC ( 4*32+21) /* x2APIC */ +#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */ +#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */ +#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* Tsc deadline timer */ +#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */ +#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ +#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE enabled in the OS */ +#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */ +#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit fp conversions */ +#define X86_FEATURE_RDRAND ( 4*32+30) /* The RDRAND instruction */ +#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */ /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ -#define X86_FEATURE_XSTORE (5*32+ 2) /* "rng" RNG present (xstore) */ -#define X86_FEATURE_XSTORE_EN (5*32+ 3) /* "rng_en" RNG enabled */ -#define X86_FEATURE_XCRYPT (5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */ -#define X86_FEATURE_XCRYPT_EN (5*32+ 7) /* "ace_en" on-CPU crypto enabled */ -#define X86_FEATURE_ACE2 (5*32+ 8) /* Advanced Cryptography Engine v2 */ -#define X86_FEATURE_ACE2_EN (5*32+ 9) /* ACE v2 enabled */ -#define X86_FEATURE_PHE (5*32+10) /* PadLock Hash Engine */ -#define X86_FEATURE_PHE_EN (5*32+11) /* PHE enabled */ -#define X86_FEATURE_PMM (5*32+12) /* PadLock Montgomery Multiplier */ -#define X86_FEATURE_PMM_EN (5*32+13) /* PMM enabled */ +#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */ +#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */ +#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */ +#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */ +#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */ +#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */ +#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */ +#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */ +#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */ +#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */ /* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */ -#define X86_FEATURE_LAHF_LM (6*32+ 0) /* LAHF/SAHF in long mode */ -#define X86_FEATURE_CMP_LEGACY (6*32+ 1) /* If yes HyperThreading not valid */ -#define X86_FEATURE_SVM (6*32+ 2) /* Secure virtual machine */ -#define X86_FEATURE_EXTAPIC (6*32+ 3) /* Extended APIC space */ -#define X86_FEATURE_CR8_LEGACY (6*32+ 4) /* CR8 in 32-bit mode */ -#define X86_FEATURE_ABM (6*32+ 5) /* Advanced bit manipulation */ -#define X86_FEATURE_SSE4A (6*32+ 6) /* SSE-4A */ -#define X86_FEATURE_MISALIGNSSE (6*32+ 7) /* Misaligned SSE mode */ -#define X86_FEATURE_3DNOWPREFETCH (6*32+ 8) /* 3DNow prefetch instructions */ -#define X86_FEATURE_OSVW (6*32+ 9) /* OS Visible Workaround */ -#define X86_FEATURE_IBS (6*32+10) /* Instruction Based Sampling */ -#define X86_FEATURE_XOP (6*32+11) /* extended AVX instructions */ -#define X86_FEATURE_SKINIT (6*32+12) /* SKINIT/STGI instructions */ -#define X86_FEATURE_WDT (6*32+13) /* Watchdog timer */ -#define X86_FEATURE_LWP (6*32+15) /* Light Weight Profiling */ -#define X86_FEATURE_FMA4 (6*32+16) /* 4 operands MAC instructions */ -#define X86_FEATURE_TCE (6*32+17) /* translation cache extension */ -#define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */ -#define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */ -#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */ -#define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */ -#define X86_FEATURE_PERFCTR_NB (6*32+24) /* NB performance counter extensions */ -#define X86_FEATURE_PERFCTR_L2 (6*32+28) /* L2 performance counter extensions */ +#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */ +#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */ +#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure virtual machine */ +#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */ +#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */ +#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */ +#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */ +#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */ +#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */ +#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */ +#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */ +#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */ +#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */ +#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */ +#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */ +#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */ +#define X86_FEATURE_TCE ( 6*32+17) /* translation cache extension */ +#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */ +#define X86_FEATURE_TBM ( 6*32+21) /* trailing bit manipulations */ +#define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */ +#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */ +#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ +#define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */ /* * Auxiliary flags: Linux defined - For features scattered in various * CPUID levels like 0x6, 0xA etc, word 7 */ -#define X86_FEATURE_IDA (7*32+ 0) /* Intel Dynamic Acceleration */ -#define X86_FEATURE_ARAT (7*32+ 1) /* Always Running APIC Timer */ -#define X86_FEATURE_CPB (7*32+ 2) /* AMD Core Performance Boost */ -#define X86_FEATURE_EPB (7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ -#define X86_FEATURE_XSAVEOPT (7*32+ 4) /* Optimized Xsave */ -#define X86_FEATURE_PLN (7*32+ 5) /* Intel Power Limit Notification */ -#define X86_FEATURE_PTS (7*32+ 6) /* Intel Package Thermal Status */ -#define X86_FEATURE_DTHERM (7*32+ 7) /* Digital Thermal Sensor */ -#define X86_FEATURE_HW_PSTATE (7*32+ 8) /* AMD HW-PState */ -#define X86_FEATURE_PROC_FEEDBACK (7*32+ 9) /* AMD ProcFeedbackInterface */ +#define X86_FEATURE_IDA ( 7*32+ 0) /* Intel Dynamic Acceleration */ +#define X86_FEATURE_ARAT ( 7*32+ 1) /* Always Running APIC Timer */ +#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ +#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ +#define X86_FEATURE_PLN ( 7*32+ 5) /* Intel Power Limit Notification */ +#define X86_FEATURE_PTS ( 7*32+ 6) /* Intel Package Thermal Status */ +#define X86_FEATURE_DTHERM ( 7*32+ 7) /* Digital Thermal Sensor */ +#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ +#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ /* Virtualization flags: Linux defined, word 8 */ -#define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */ -#define X86_FEATURE_VNMI (8*32+ 1) /* Intel Virtual NMI */ -#define X86_FEATURE_FLEXPRIORITY (8*32+ 2) /* Intel FlexPriority */ -#define X86_FEATURE_EPT (8*32+ 3) /* Intel Extended Page Table */ -#define X86_FEATURE_VPID (8*32+ 4) /* Intel Virtual Processor ID */ -#define X86_FEATURE_NPT (8*32+ 5) /* AMD Nested Page Table support */ -#define X86_FEATURE_LBRV (8*32+ 6) /* AMD LBR Virtualization support */ -#define X86_FEATURE_SVML (8*32+ 7) /* "svm_lock" AMD SVM locking MSR */ -#define X86_FEATURE_NRIPS (8*32+ 8) /* "nrip_save" AMD SVM next_rip save */ -#define X86_FEATURE_TSCRATEMSR (8*32+ 9) /* "tsc_scale" AMD TSC scaling support */ -#define X86_FEATURE_VMCBCLEAN (8*32+10) /* "vmcb_clean" AMD VMCB clean bits support */ -#define X86_FEATURE_FLUSHBYASID (8*32+11) /* AMD flush-by-ASID support */ -#define X86_FEATURE_DECODEASSISTS (8*32+12) /* AMD Decode Assists support */ -#define X86_FEATURE_PAUSEFILTER (8*32+13) /* AMD filtered pause intercept */ -#define X86_FEATURE_PFTHRESHOLD (8*32+14) /* AMD pause filter threshold */ +#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ +#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ +#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */ +#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */ +#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */ +#define X86_FEATURE_NPT ( 8*32+ 5) /* AMD Nested Page Table support */ +#define X86_FEATURE_LBRV ( 8*32+ 6) /* AMD LBR Virtualization support */ +#define X86_FEATURE_SVML ( 8*32+ 7) /* "svm_lock" AMD SVM locking MSR */ +#define X86_FEATURE_NRIPS ( 8*32+ 8) /* "nrip_save" AMD SVM next_rip save */ +#define X86_FEATURE_TSCRATEMSR ( 8*32+ 9) /* "tsc_scale" AMD TSC scaling support */ +#define X86_FEATURE_VMCBCLEAN ( 8*32+10) /* "vmcb_clean" AMD VMCB clean bits support */ +#define X86_FEATURE_FLUSHBYASID ( 8*32+11) /* AMD flush-by-ASID support */ +#define X86_FEATURE_DECODEASSISTS ( 8*32+12) /* AMD Decode Assists support */ +#define X86_FEATURE_PAUSEFILTER ( 8*32+13) /* AMD filtered pause intercept */ +#define X86_FEATURE_PFTHRESHOLD ( 8*32+14) /* AMD pause filter threshold */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ -#define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ -#define X86_FEATURE_TSC_ADJUST (9*32+ 1) /* TSC adjustment MSR 0x3b */ -#define X86_FEATURE_BMI1 (9*32+ 3) /* 1st group bit manipulation extensions */ -#define X86_FEATURE_HLE (9*32+ 4) /* Hardware Lock Elision */ -#define X86_FEATURE_AVX2 (9*32+ 5) /* AVX2 instructions */ -#define X86_FEATURE_SMEP (9*32+ 7) /* Supervisor Mode Execution Protection */ -#define X86_FEATURE_BMI2 (9*32+ 8) /* 2nd group bit manipulation extensions */ -#define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */ -#define X86_FEATURE_INVPCID (9*32+10) /* Invalidate Processor Context ID */ -#define X86_FEATURE_RTM (9*32+11) /* Restricted Transactional Memory */ -#define X86_FEATURE_MPX (9*32+14) /* Memory Protection Extension */ -#define X86_FEATURE_AVX512F (9*32+16) /* AVX-512 Foundation */ -#define X86_FEATURE_RDSEED (9*32+18) /* The RDSEED instruction */ -#define X86_FEATURE_ADX (9*32+19) /* The ADCX and ADOX instructions */ -#define X86_FEATURE_SMAP (9*32+20) /* Supervisor Mode Access Prevention */ -#define X86_FEATURE_CLFLUSHOPT (9*32+23) /* CLFLUSHOPT instruction */ -#define X86_FEATURE_AVX512PF (9*32+26) /* AVX-512 Prefetch */ -#define X86_FEATURE_AVX512ER (9*32+27) /* AVX-512 Exponential and Reciprocal */ -#define X86_FEATURE_AVX512CD (9*32+28) /* AVX-512 Conflict Detection */ +#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ +#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3b */ +#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */ +#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */ +#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */ +#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */ +#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */ +#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */ +#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */ +#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ +#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */ +#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ +#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */ +#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */ +#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ +#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ +#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ +#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ +#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ + +/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */ +#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */ +#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC */ +#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */ +#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */ /* * BUG word(s) @@ -234,8 +239,11 @@ #define X86_BUG_F00F X86_BUG(0) /* Intel F00F */ #define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */ #define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */ -#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* AMD Erratum 383 */ -#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* AMD Erratum 400 */ +#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */ +#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */ +#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */ +#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */ +#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */ #if defined(__KERNEL__) && !defined(__ASSEMBLY__) @@ -245,6 +253,12 @@ extern const char * const x86_cap_flags[NCAPINTS*32]; extern const char * const x86_power_flags[32]; +/* + * In order to save room, we index into this array by doing + * X86_BUG_<name> - NCAPINTS*32. + */ +extern const char * const x86_bug_flags[NBUGINTS*32]; + #define test_cpu_cap(c, bit) \ test_bit(bit, (unsigned long *)((c)->x86_capability)) @@ -301,7 +315,6 @@ extern const char * const x86_power_flags[32]; #define cpu_has_avx boot_cpu_has(X86_FEATURE_AVX) #define cpu_has_avx2 boot_cpu_has(X86_FEATURE_AVX2) #define cpu_has_ht boot_cpu_has(X86_FEATURE_HT) -#define cpu_has_mp boot_cpu_has(X86_FEATURE_MP) #define cpu_has_nx boot_cpu_has(X86_FEATURE_NX) #define cpu_has_k6_mtrr boot_cpu_has(X86_FEATURE_K6_MTRR) #define cpu_has_cyrix_arr boot_cpu_has(X86_FEATURE_CYRIX_ARR) @@ -328,6 +341,7 @@ extern const char * const x86_power_flags[32]; #define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) #define cpu_has_xsaveopt boot_cpu_has(X86_FEATURE_XSAVEOPT) +#define cpu_has_xsaves boot_cpu_has(X86_FEATURE_XSAVES) #define cpu_has_osxsave boot_cpu_has(X86_FEATURE_OSXSAVE) #define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) #define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ) @@ -347,9 +361,6 @@ extern const char * const x86_power_flags[32]; #undef cpu_has_pae #define cpu_has_pae ___BUG___ -#undef cpu_has_mp -#define cpu_has_mp 1 - #undef cpu_has_k6_mtrr #define cpu_has_k6_mtrr 0 @@ -539,20 +550,20 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit) #define static_cpu_has_safe(bit) boot_cpu_has(bit) #endif -#define cpu_has_bug(c, bit) cpu_has(c, (bit)) -#define set_cpu_bug(c, bit) set_cpu_cap(c, (bit)) -#define clear_cpu_bug(c, bit) clear_cpu_cap(c, (bit)); +#define cpu_has_bug(c, bit) cpu_has(c, (bit)) +#define set_cpu_bug(c, bit) set_cpu_cap(c, (bit)) +#define clear_cpu_bug(c, bit) clear_cpu_cap(c, (bit)) -#define static_cpu_has_bug(bit) static_cpu_has((bit)) -#define boot_cpu_has_bug(bit) cpu_has_bug(&boot_cpu_data, (bit)) +#define static_cpu_has_bug(bit) static_cpu_has((bit)) +#define static_cpu_has_bug_safe(bit) static_cpu_has_safe((bit)) +#define boot_cpu_has_bug(bit) cpu_has_bug(&boot_cpu_data, (bit)) -#define MAX_CPU_FEATURES (NCAPINTS * 32) -#define cpu_have_feature boot_cpu_has +#define MAX_CPU_FEATURES (NCAPINTS * 32) +#define cpu_have_feature boot_cpu_has -#define CPU_FEATURE_TYPEFMT "x86,ven%04Xfam%04Xmod%04X" -#define CPU_FEATURE_TYPEVAL boot_cpu_data.x86_vendor, boot_cpu_data.x86, \ - boot_cpu_data.x86_model +#define CPU_FEATURE_TYPEFMT "x86,ven%04Xfam%04Xmod%04X" +#define CPU_FEATURE_TYPEVAL boot_cpu_data.x86_vendor, boot_cpu_data.x86, \ + boot_cpu_data.x86_model #endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */ - #endif /* _ASM_X86_CPUFEATURE_H */ diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 1eb5f6433ad8..044a2fd3c5fe 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -104,6 +104,8 @@ extern void __init runtime_code_page_mkexec(void); extern void __init efi_runtime_mkexec(void); extern void __init efi_dump_pagetable(void); extern void __init efi_apply_memmap_quirks(void); +extern int __init efi_reuse_config(u64 tables, int nr_tables); +extern void efi_delete_dummy_variable(void); struct efi_setup_data { u64 fw_vendor; @@ -156,6 +158,33 @@ static inline efi_status_t efi_thunk_set_virtual_address_map( return EFI_SUCCESS; } #endif /* CONFIG_EFI_MIXED */ + + +/* arch specific definitions used by the stub code */ + +struct efi_config { + u64 image_handle; + u64 table; + u64 allocate_pool; + u64 allocate_pages; + u64 get_memory_map; + u64 free_pool; + u64 free_pages; + u64 locate_handle; + u64 handle_protocol; + u64 exit_boot_services; + u64 text_output; + efi_status_t (*call)(unsigned long, ...); + bool is64; +} __packed; + +extern struct efi_config *efi_early; + +#define efi_call_early(f, ...) \ + efi_early->call(efi_early->f, __VA_ARGS__); + +extern bool efi_reboot_required(void); + #else /* * IF EFI is not configured, have the EFI calls return -ENOSYS. @@ -168,6 +197,10 @@ static inline efi_status_t efi_thunk_set_virtual_address_map( #define efi_call5(_f, _a1, _a2, _a3, _a4, _a5) (-ENOSYS) #define efi_call6(_f, _a1, _a2, _a3, _a4, _a5, _a6) (-ENOSYS) static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {} +static inline bool efi_reboot_required(void) +{ + return false; +} #endif /* CONFIG_EFI */ #endif /* _ASM_X86_EFI_H */ diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 115e3689cd53..e3b85422cf12 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -293,7 +293,7 @@ static inline int restore_fpu_checking(struct task_struct *tsk) /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is pending. Clear the x87 state here by setting it to fixed values. "m" is a random variable that should be in L1 */ - if (unlikely(static_cpu_has_safe(X86_FEATURE_FXSAVE_LEAK))) { + if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) { asm volatile( "fnclex\n\t" "emms\n\t" diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 0525a8bdf65d..e1f7fecaa7d6 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -68,6 +68,8 @@ struct dyn_arch_ftrace { int ftrace_int3_handler(struct pt_regs *regs); +#define FTRACE_GRAPH_TRAMP_ADDR FTRACE_GRAPH_ADDR + #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* __ASSEMBLY__ */ #endif /* CONFIG_FUNCTION_TRACER */ diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index bba3cf88e624..0a8b519226b8 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -129,7 +129,7 @@ static inline notrace unsigned long arch_local_irq_save(void) #define PARAVIRT_ADJUST_EXCEPTION_FRAME /* */ -#define INTERRUPT_RETURN iretq +#define INTERRUPT_RETURN jmp native_iret #define USERGS_SYSRET64 \ swapgs; \ sysretq; diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index a04fe4eb237d..eb181178fe0b 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -37,6 +37,7 @@ struct x86_instruction_info { u8 modrm_reg; /* index of register used */ u8 modrm_rm; /* rm part of modrm */ u64 src_val; /* value of source operand */ + u64 dst_val; /* value of destination operand */ u8 src_bytes; /* size of source operand */ u8 dst_bytes; /* size of destination operand */ u8 ad_bytes; /* size of src/dst address */ @@ -194,6 +195,7 @@ struct x86_emulate_ops { int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value); int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data); int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata); + int (*check_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc); int (*read_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc, u64 *pdata); void (*halt)(struct x86_emulate_ctxt *ctxt); void (*wbinvd)(struct x86_emulate_ctxt *ctxt); @@ -231,7 +233,7 @@ struct operand { union { unsigned long val; u64 val64; - char valptr[sizeof(unsigned long) + 2]; + char valptr[sizeof(sse128_t)]; sse128_t vec_val; u64 mm_val; void *data; @@ -240,8 +242,8 @@ struct operand { struct fetch_cache { u8 data[15]; - unsigned long start; - unsigned long end; + u8 *ptr; + u8 *end; }; struct read_cache { @@ -286,30 +288,36 @@ struct x86_emulate_ctxt { u8 opcode_len; u8 b; u8 intercept; - u8 lock_prefix; - u8 rep_prefix; u8 op_bytes; u8 ad_bytes; - u8 rex_prefix; struct operand src; struct operand src2; struct operand dst; - bool has_seg_override; - u8 seg_override; - u64 d; int (*execute)(struct x86_emulate_ctxt *ctxt); int (*check_perm)(struct x86_emulate_ctxt *ctxt); + /* + * The following six fields are cleared together, + * the rest are initialized unconditionally in x86_decode_insn + * or elsewhere + */ + bool rip_relative; + u8 rex_prefix; + u8 lock_prefix; + u8 rep_prefix; + /* bitmaps of registers in _regs[] that can be read */ + u32 regs_valid; + /* bitmaps of registers in _regs[] that have been written */ + u32 regs_dirty; /* modrm */ u8 modrm; u8 modrm_mod; u8 modrm_reg; u8 modrm_rm; u8 modrm_seg; - bool rip_relative; + u8 seg_override; + u64 d; unsigned long _eip; struct operand memop; - u32 regs_valid; /* bitmaps of registers in _regs[] that can be read */ - u32 regs_dirty; /* bitmaps of registers in _regs[] that have been written */ /* Fields above regs are cleared together. */ unsigned long _regs[NR_VCPU_REGS]; struct operand *memopp; @@ -407,6 +415,7 @@ bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt); #define EMULATION_OK 0 #define EMULATION_RESTART 1 #define EMULATION_INTERCEPTED 2 +void init_decode_cache(struct x86_emulate_ctxt *ctxt); int x86_emulate_insn(struct x86_emulate_ctxt *ctxt); int emulator_task_switch(struct x86_emulate_ctxt *ctxt, u16 tss_selector, int idt_index, int reason, diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 49205d01b9ad..572460175ba5 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -152,14 +152,16 @@ enum { #define DR6_BD (1 << 13) #define DR6_BS (1 << 14) -#define DR6_FIXED_1 0xffff0ff0 -#define DR6_VOLATILE 0x0000e00f +#define DR6_RTM (1 << 16) +#define DR6_FIXED_1 0xfffe0ff0 +#define DR6_INIT 0xffff0ff0 +#define DR6_VOLATILE 0x0001e00f #define DR7_BP_EN_MASK 0x000000ff #define DR7_GE (1 << 9) #define DR7_GD (1 << 13) #define DR7_FIXED_1 0x00000400 -#define DR7_VOLATILE 0xffff23ff +#define DR7_VOLATILE 0xffff2bff /* apic attention bits */ #define KVM_APIC_CHECK_VAPIC 0 @@ -448,7 +450,7 @@ struct kvm_vcpu_arch { u64 tsc_offset_adjustment; u64 this_tsc_nsec; u64 this_tsc_write; - u8 this_tsc_generation; + u64 this_tsc_generation; bool tsc_catchup; bool tsc_always_catchup; s8 virtual_tsc_shift; @@ -591,7 +593,7 @@ struct kvm_arch { u64 cur_tsc_nsec; u64 cur_tsc_write; u64 cur_tsc_offset; - u8 cur_tsc_generation; + u64 cur_tsc_generation; int nr_vcpus_matched_tsc; spinlock_t pvclock_gtod_sync_lock; @@ -717,7 +719,7 @@ struct kvm_x86_ops { int (*handle_exit)(struct kvm_vcpu *vcpu); void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); - u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); + u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu); void (*patch_hypercall)(struct kvm_vcpu *vcpu, unsigned char *hypercall_addr); void (*set_irq)(struct kvm_vcpu *vcpu); @@ -1070,6 +1072,7 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu); bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr); int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data); int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info); +int kvm_pmu_check_pmc(struct kvm_vcpu *vcpu, unsigned pmc); int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data); void kvm_handle_pmu_event(struct kvm_vcpu *vcpu); void kvm_deliver_pmi(struct kvm_vcpu *vcpu); diff --git a/arch/x86/include/asm/mc146818rtc.h b/arch/x86/include/asm/mc146818rtc.h index a55c7efcc4ed..0f555cc31984 100644 --- a/arch/x86/include/asm/mc146818rtc.h +++ b/arch/x86/include/asm/mc146818rtc.h @@ -13,7 +13,7 @@ #define RTC_ALWAYS_BCD 1 /* RTC operates in binary mode */ #endif -#if defined(CONFIG_X86_32) && defined(__HAVE_ARCH_CMPXCHG) +#if defined(CONFIG_X86_32) /* * This lock provides nmi access to the CMOS/RTC registers. It has some * special properties. It is owned by a CPU and stores the index register diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index be12c534fd59..166af2a8e865 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -3,6 +3,10 @@ #include <asm/desc.h> #include <linux/atomic.h> +#include <linux/mm_types.h> + +#include <trace/events/tlb.h> + #include <asm/pgalloc.h> #include <asm/tlbflush.h> #include <asm/paravirt.h> @@ -44,6 +48,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, /* Re-load page tables */ load_cr3(next->pgd); + trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); /* Stop flush ipis for the previous mm */ cpumask_clear_cpu(cpu, mm_cpumask(prev)); @@ -71,6 +76,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, * to make sure to use no freed page tables. */ load_cr3(next->pgd); + trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); load_LDT_nolock(&next->context); } } diff --git a/arch/x86/include/asm/mutex_32.h b/arch/x86/include/asm/mutex_32.h index 0208c3c2cbc6..85e6cda45a02 100644 --- a/arch/x86/include/asm/mutex_32.h +++ b/arch/x86/include/asm/mutex_32.h @@ -100,23 +100,11 @@ do { \ static inline int __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) { - /* - * We have two variants here. The cmpxchg based one is the best one - * because it never induce a false contention state. It is included - * here because architectures using the inc/dec algorithms over the - * xchg ones are much more likely to support cmpxchg natively. - * - * If not we fall back to the spinlock based variant - that is - * just as efficient (and simpler) as a 'destructive' probing of - * the mutex state would be. - */ -#ifdef __HAVE_ARCH_CMPXCHG + /* cmpxchg because it never induces a false contention state. */ if (likely(atomic_cmpxchg(count, 1, 0) == 1)) return 1; + return 0; -#else - return fail_fn(count); -#endif } #endif /* _ASM_X86_MUTEX_32_H */ diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index 1da25a5f96f9..a1410db38a1a 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -43,7 +43,7 @@ static inline void __mwait(unsigned long eax, unsigned long ecx) static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) { if (!current_set_polling_and_test()) { - if (static_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) { + if (static_cpu_has_bug(X86_BUG_CLFLUSH_MONITOR)) { mb(); clflush((void *)¤t_thread_info()->flags); mb(); diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 851bcdc5db04..fd472181a1d0 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -52,10 +52,9 @@ * Compared to the generic __my_cpu_offset version, the following * saves one instruction and avoids clobbering a temp register. */ -#define raw_cpu_ptr(ptr) \ +#define arch_raw_cpu_ptr(ptr) \ ({ \ unsigned long tcp_ptr__; \ - __verify_pcpu_ptr(ptr); \ asm volatile("add " __percpu_arg(1) ", %0" \ : "=r" (tcp_ptr__) \ : "m" (this_cpu_off), "0" (ptr)); \ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index a4ea02351f4d..ee30b9f0b91c 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -72,7 +72,6 @@ extern u16 __read_mostly tlb_lld_4k[NR_INFO]; extern u16 __read_mostly tlb_lld_2m[NR_INFO]; extern u16 __read_mostly tlb_lld_4m[NR_INFO]; extern u16 __read_mostly tlb_lld_1g[NR_INFO]; -extern s8 __read_mostly tlb_flushall_shift; /* * CPU type and hardware bug flags. Kept separately for each CPU. @@ -696,6 +695,8 @@ static inline void cpu_relax(void) rep_nop(); } +#define cpu_relax_lowlatency() cpu_relax() + /* Stop speculative execution and prefetching of modified code. */ static inline void sync_core(void) { diff --git a/arch/x86/include/asm/qrwlock.h b/arch/x86/include/asm/qrwlock.h index 70f46f07f94e..ae0e241e228b 100644 --- a/arch/x86/include/asm/qrwlock.h +++ b/arch/x86/include/asm/qrwlock.h @@ -3,7 +3,7 @@ #include <asm-generic/qrwlock_types.h> -#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE) +#ifndef CONFIG_X86_PPRO_FENCE #define queue_write_unlock queue_write_unlock static inline void queue_write_unlock(struct qrwlock *lock) { diff --git a/arch/x86/include/asm/vga.h b/arch/x86/include/asm/vga.h index 44282fbf7bf9..c4b9dc2f67c5 100644 --- a/arch/x86/include/asm/vga.h +++ b/arch/x86/include/asm/vga.h @@ -17,10 +17,4 @@ #define vga_readb(x) (*(x)) #define vga_writeb(x, y) (*(y) = (x)) -#ifdef CONFIG_FB_EFI -#define __ARCH_HAS_VGA_DEFAULT_DEVICE -extern struct pci_dev *vga_default_device(void); -extern void vga_set_default_device(struct pci_dev *pdev); -#endif - #endif /* _ASM_X86_VGA_H */ diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 7004d21e6219..bcbfade26d8d 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -51,6 +51,9 @@ #define CPU_BASED_MONITOR_EXITING 0x20000000 #define CPU_BASED_PAUSE_EXITING 0x40000000 #define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000 + +#define CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x0401e172 + /* * Definitions of Secondary Processor-Based VM-Execution Controls. */ @@ -76,7 +79,7 @@ #define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016 -#define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000002 +#define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000004 #define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 #define VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL 0x00001000 #define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 @@ -89,7 +92,7 @@ #define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff -#define VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000002 +#define VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000004 #define VM_ENTRY_IA32E_MODE 0x00000200 #define VM_ENTRY_SMM 0x00000400 #define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 diff --git a/arch/x86/include/uapi/asm/Kbuild b/arch/x86/include/uapi/asm/Kbuild index 09409c44f9a5..3dec769cadf7 100644 --- a/arch/x86/include/uapi/asm/Kbuild +++ b/arch/x86/include/uapi/asm/Kbuild @@ -22,6 +22,7 @@ header-y += ipcbuf.h header-y += ist.h header-y += kvm.h header-y += kvm_para.h +header-y += kvm_perf.h header-y += ldt.h header-y += mce.h header-y += mman.h diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index d3a87780c70b..d7dcef58aefa 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -23,7 +23,10 @@ #define GP_VECTOR 13 #define PF_VECTOR 14 #define MF_VECTOR 16 +#define AC_VECTOR 17 #define MC_VECTOR 18 +#define XM_VECTOR 19 +#define VE_VECTOR 20 /* Select x86 specific features in <linux/kvm.h> */ #define __KVM_HAVE_PIT diff --git a/arch/x86/include/uapi/asm/kvm_perf.h b/arch/x86/include/uapi/asm/kvm_perf.h new file mode 100644 index 000000000000..3bb964f88aa1 --- /dev/null +++ b/arch/x86/include/uapi/asm/kvm_perf.h @@ -0,0 +1,16 @@ +#ifndef _ASM_X86_KVM_PERF_H +#define _ASM_X86_KVM_PERF_H + +#include <asm/svm.h> +#include <asm/vmx.h> +#include <asm/kvm.h> + +#define DECODE_STR_LEN 20 + +#define VCPU_ID "vcpu_id" + +#define KVM_ENTRY_TRACE "kvm:kvm_entry" +#define KVM_EXIT_TRACE "kvm:kvm_exit" +#define KVM_EXIT_REASON "exit_reason" + +#endif /* _ASM_X86_KVM_PERF_H */ diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index fcf2b3ae1bf0..eac9e92fe181 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h @@ -297,6 +297,8 @@ #define MSR_IA32_TSC_ADJUST 0x0000003b #define MSR_IA32_BNDCFGS 0x00000d90 +#define MSR_IA32_XSS 0x00000da0 + #define FEATURE_CONTROL_LOCKED (1<<0) #define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1) #define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2) @@ -558,6 +560,7 @@ /* VMX_BASIC bits and bitmasks */ #define VMX_BASIC_VMCS_SIZE_SHIFT 32 +#define VMX_BASIC_TRUE_CTLS (1ULL << 55) #define VMX_BASIC_64 0x0001000000000000LLU #define VMX_BASIC_MEM_TYPE_SHIFT 50 #define VMX_BASIC_MEM_TYPE_MASK 0x003c000000000000LLU diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 86281ffb96d6..a531f6564ed0 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -74,10 +74,6 @@ int acpi_fix_pin2_polarity __initdata; static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; #endif -#ifndef __HAVE_ARCH_CMPXCHG -#warning ACPI uses CMPXCHG, i486 and later hardware -#endif - /* -------------------------------------------------------------------------- Boot-time Configuration -------------------------------------------------------------------------- */ diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index f3a1f04ed4cb..584874451414 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -841,7 +841,6 @@ static int apm_do_idle(void) u32 eax; u8 ret = 0; int idled = 0; - int polling; int err = 0; if (!need_resched()) { diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index ce8b8ff0e0ef..60e5497681f5 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -8,6 +8,7 @@ #include <asm/processor.h> #include <asm/apic.h> #include <asm/cpu.h> +#include <asm/smp.h> #include <asm/pci-direct.h> #ifdef CONFIG_X86_64 @@ -50,7 +51,6 @@ static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val) return wrmsr_safe_regs(gprs); } -#ifdef CONFIG_X86_32 /* * B step AMD K6 before B 9730xxxx have hardware bugs that can cause * misexecution of code under Linux. Owners of such processors should @@ -70,6 +70,7 @@ __asm__(".globl vide\n\t.align 4\nvide: ret"); static void init_amd_k5(struct cpuinfo_x86 *c) { +#ifdef CONFIG_X86_32 /* * General Systems BIOSen alias the cpu frequency registers * of the Elan at 0x000df000. Unfortuantly, one of the Linux @@ -83,11 +84,12 @@ static void init_amd_k5(struct cpuinfo_x86 *c) if (inl(CBAR) & CBAR_ENB) outl(0 | CBAR_KEY, CBAR); } +#endif } - static void init_amd_k6(struct cpuinfo_x86 *c) { +#ifdef CONFIG_X86_32 u32 l, h; int mbytes = get_num_physpages() >> (20-PAGE_SHIFT); @@ -176,10 +178,44 @@ static void init_amd_k6(struct cpuinfo_x86 *c) /* placeholder for any needed mods */ return; } +#endif } -static void amd_k7_smp_check(struct cpuinfo_x86 *c) +static void init_amd_k7(struct cpuinfo_x86 *c) { +#ifdef CONFIG_X86_32 + u32 l, h; + + /* + * Bit 15 of Athlon specific MSR 15, needs to be 0 + * to enable SSE on Palomino/Morgan/Barton CPU's. + * If the BIOS didn't enable it already, enable it here. + */ + if (c->x86_model >= 6 && c->x86_model <= 10) { + if (!cpu_has(c, X86_FEATURE_XMM)) { + printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); + msr_clear_bit(MSR_K7_HWCR, 15); + set_cpu_cap(c, X86_FEATURE_XMM); + } + } + + /* + * It's been determined by AMD that Athlons since model 8 stepping 1 + * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx + * As per AMD technical note 27212 0.2 + */ + if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { + rdmsr(MSR_K7_CLK_CTL, l, h); + if ((l & 0xfff00000) != 0x20000000) { + printk(KERN_INFO + "CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", + l, ((l & 0x000fffff)|0x20000000)); + wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); + } + } + + set_cpu_cap(c, X86_FEATURE_K7); + /* calling is from identify_secondary_cpu() ? */ if (!c->cpu_index) return; @@ -207,7 +243,7 @@ static void amd_k7_smp_check(struct cpuinfo_x86 *c) if (((c->x86_model == 6) && (c->x86_mask >= 2)) || ((c->x86_model == 7) && (c->x86_mask >= 1)) || (c->x86_model > 7)) - if (cpu_has_mp) + if (cpu_has(c, X86_FEATURE_MP)) return; /* If we get here, not a certified SMP capable AMD system. */ @@ -219,45 +255,8 @@ static void amd_k7_smp_check(struct cpuinfo_x86 *c) WARN_ONCE(1, "WARNING: This combination of AMD" " processors is not suitable for SMP.\n"); add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_NOW_UNRELIABLE); -} - -static void init_amd_k7(struct cpuinfo_x86 *c) -{ - u32 l, h; - - /* - * Bit 15 of Athlon specific MSR 15, needs to be 0 - * to enable SSE on Palomino/Morgan/Barton CPU's. - * If the BIOS didn't enable it already, enable it here. - */ - if (c->x86_model >= 6 && c->x86_model <= 10) { - if (!cpu_has(c, X86_FEATURE_XMM)) { - printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); - msr_clear_bit(MSR_K7_HWCR, 15); - set_cpu_cap(c, X86_FEATURE_XMM); - } - } - - /* - * It's been determined by AMD that Athlons since model 8 stepping 1 - * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx - * As per AMD technical note 27212 0.2 - */ - if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { - rdmsr(MSR_K7_CLK_CTL, l, h); - if ((l & 0xfff00000) != 0x20000000) { - printk(KERN_INFO - "CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", - l, ((l & 0x000fffff)|0x20000000)); - wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); - } - } - - set_cpu_cap(c, X86_FEATURE_K7); - - amd_k7_smp_check(c); -} #endif +} #ifdef CONFIG_NUMA /* @@ -446,6 +445,26 @@ static void early_init_amd_mc(struct cpuinfo_x86 *c) static void bsp_init_amd(struct cpuinfo_x86 *c) { + +#ifdef CONFIG_X86_64 + if (c->x86 >= 0xf) { + unsigned long long tseg; + + /* + * Split up direct mapping around the TSEG SMM area. + * Don't do it for gbpages because there seems very little + * benefit in doing so. + */ + if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { + unsigned long pfn = tseg >> PAGE_SHIFT; + + printk(KERN_DEBUG "tseg: %010llx\n", tseg); + if (pfn_range_is_mapped(pfn, pfn + 1)) + set_memory_4k((unsigned long)__va(tseg), 1); + } + } +#endif + if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { if (c->x86 > 0x10 || @@ -515,101 +534,74 @@ static const int amd_erratum_383[]; static const int amd_erratum_400[]; static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum); -static void init_amd(struct cpuinfo_x86 *c) +static void init_amd_k8(struct cpuinfo_x86 *c) { - u32 dummy; - unsigned long long value; + u32 level; + u64 value; -#ifdef CONFIG_SMP - /* - * Disable TLB flush filter by setting HWCR.FFDIS on K8 - * bit 6 of msr C001_0015 - * - * Errata 63 for SH-B3 steppings - * Errata 122 for all steppings (F+ have it disabled by default) - */ - if (c->x86 == 0xf) - msr_set_bit(MSR_K7_HWCR, 6); -#endif - - early_init_amd(c); + /* On C+ stepping K8 rep microcode works well for copy/memset */ + level = cpuid_eax(1); + if ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) + set_cpu_cap(c, X86_FEATURE_REP_GOOD); /* - * Bit 31 in normal CPUID used for nonstandard 3DNow ID; - * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway + * Some BIOSes incorrectly force this feature, but only K8 revision D + * (model = 0x14) and later actually support it. + * (AMD Erratum #110, docId: 25759). */ - clear_cpu_cap(c, 0*32+31); - -#ifdef CONFIG_X86_64 - /* On C+ stepping K8 rep microcode works well for copy/memset */ - if (c->x86 == 0xf) { - u32 level; - - level = cpuid_eax(1); - if ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) - set_cpu_cap(c, X86_FEATURE_REP_GOOD); - - /* - * Some BIOSes incorrectly force this feature, but only K8 - * revision D (model = 0x14) and later actually support it. - * (AMD Erratum #110, docId: 25759). - */ - if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM)) { - clear_cpu_cap(c, X86_FEATURE_LAHF_LM); - if (!rdmsrl_amd_safe(0xc001100d, &value)) { - value &= ~(1ULL << 32); - wrmsrl_amd_safe(0xc001100d, value); - } + if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM)) { + clear_cpu_cap(c, X86_FEATURE_LAHF_LM); + if (!rdmsrl_amd_safe(0xc001100d, &value)) { + value &= ~BIT_64(32); + wrmsrl_amd_safe(0xc001100d, value); } - } - if (c->x86 >= 0x10) - set_cpu_cap(c, X86_FEATURE_REP_GOOD); - /* get apicid instead of initial apic id from cpuid */ - c->apicid = hard_smp_processor_id(); -#else + if (!c->x86_model_id[0]) + strcpy(c->x86_model_id, "Hammer"); +} + +static void init_amd_gh(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_X86_64 + /* do this for boot cpu */ + if (c == &boot_cpu_data) + check_enable_amd_mmconf_dmi(); + + fam10h_check_enable_mmcfg(); +#endif /* - * FIXME: We should handle the K5 here. Set up the write - * range and also turn on MSR 83 bits 4 and 31 (write alloc, - * no bus pipeline) + * Disable GART TLB Walk Errors on Fam10h. We do this here because this + * is always needed when GART is enabled, even in a kernel which has no + * MCE support built in. BIOS should disable GartTlbWlk Errors already. + * If it doesn't, we do it here as suggested by the BKDG. + * + * Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=33012 */ + msr_set_bit(MSR_AMD64_MCx_MASK(4), 10); - switch (c->x86) { - case 4: - init_amd_k5(c); - break; - case 5: - init_amd_k6(c); - break; - case 6: /* An Athlon/Duron */ - init_amd_k7(c); - break; - } + /* + * On family 10h BIOS may not have properly enabled WC+ support, causing + * it to be converted to CD memtype. This may result in performance + * degradation for certain nested-paging guests. Prevent this conversion + * by clearing bit 24 in MSR_AMD64_BU_CFG2. + * + * NOTE: we want to use the _safe accessors so as not to #GP kvm + * guests on older kvm hosts. + */ + msr_clear_bit(MSR_AMD64_BU_CFG2, 24); - /* K6s reports MCEs but don't actually have all the MSRs */ - if (c->x86 < 6) - clear_cpu_cap(c, X86_FEATURE_MCE); -#endif + if (cpu_has_amd_erratum(c, amd_erratum_383)) + set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH); +} - /* Enable workaround for FXSAVE leak */ - if (c->x86 >= 6) - set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); - - if (!c->x86_model_id[0]) { - switch (c->x86) { - case 0xf: - /* Should distinguish Models here, but this is only - a fallback anyways. */ - strcpy(c->x86_model_id, "Hammer"); - break; - } - } +static void init_amd_bd(struct cpuinfo_x86 *c) +{ + u64 value; /* re-enable TopologyExtensions if switched off by BIOS */ - if ((c->x86 == 0x15) && - (c->x86_model >= 0x10) && (c->x86_model <= 0x1f) && + if ((c->x86_model >= 0x10) && (c->x86_model <= 0x1f) && !cpu_has(c, X86_FEATURE_TOPOEXT)) { if (msr_set_bit(0xc0011005, 54) > 0) { @@ -625,14 +617,60 @@ static void init_amd(struct cpuinfo_x86 *c) * The way access filter has a performance penalty on some workloads. * Disable it on the affected CPUs. */ - if ((c->x86 == 0x15) && - (c->x86_model >= 0x02) && (c->x86_model < 0x20)) { - + if ((c->x86_model >= 0x02) && (c->x86_model < 0x20)) { if (!rdmsrl_safe(0xc0011021, &value) && !(value & 0x1E)) { value |= 0x1E; wrmsrl_safe(0xc0011021, value); } } +} + +static void init_amd(struct cpuinfo_x86 *c) +{ + u32 dummy; + +#ifdef CONFIG_SMP + /* + * Disable TLB flush filter by setting HWCR.FFDIS on K8 + * bit 6 of msr C001_0015 + * + * Errata 63 for SH-B3 steppings + * Errata 122 for all steppings (F+ have it disabled by default) + */ + if (c->x86 == 0xf) + msr_set_bit(MSR_K7_HWCR, 6); +#endif + + early_init_amd(c); + + /* + * Bit 31 in normal CPUID used for nonstandard 3DNow ID; + * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway + */ + clear_cpu_cap(c, 0*32+31); + + if (c->x86 >= 0x10) + set_cpu_cap(c, X86_FEATURE_REP_GOOD); + + /* get apicid instead of initial apic id from cpuid */ + c->apicid = hard_smp_processor_id(); + + /* K6s reports MCEs but don't actually have all the MSRs */ + if (c->x86 < 6) + clear_cpu_cap(c, X86_FEATURE_MCE); + + switch (c->x86) { + case 4: init_amd_k5(c); break; + case 5: init_amd_k6(c); break; + case 6: init_amd_k7(c); break; + case 0xf: init_amd_k8(c); break; + case 0x10: init_amd_gh(c); break; + case 0x15: init_amd_bd(c); break; + } + + /* Enable workaround for FXSAVE leak */ + if (c->x86 >= 6) + set_cpu_bug(c, X86_BUG_FXSAVE_LEAK); cpu_detect_cache_sizes(c); @@ -656,33 +694,6 @@ static void init_amd(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); } -#ifdef CONFIG_X86_64 - if (c->x86 == 0x10) { - /* do this for boot cpu */ - if (c == &boot_cpu_data) - check_enable_amd_mmconf_dmi(); - - fam10h_check_enable_mmcfg(); - } - - if (c == &boot_cpu_data && c->x86 >= 0xf) { - unsigned long long tseg; - - /* - * Split up direct mapping around the TSEG SMM area. - * Don't do it for gbpages because there seems very little - * benefit in doing so. - */ - if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { - unsigned long pfn = tseg >> PAGE_SHIFT; - - printk(KERN_DEBUG "tseg: %010llx\n", tseg); - if (pfn_range_is_mapped(pfn, pfn + 1)) - set_memory_4k((unsigned long)__va(tseg), 1); - } - } -#endif - /* * Family 0x12 and above processors have APIC timer * running in deep C states. @@ -690,34 +701,6 @@ static void init_amd(struct cpuinfo_x86 *c) if (c->x86 > 0x11) set_cpu_cap(c, X86_FEATURE_ARAT); - if (c->x86 == 0x10) { - /* - * Disable GART TLB Walk Errors on Fam10h. We do this here - * because this is always needed when GART is enabled, even in a - * kernel which has no MCE support built in. - * BIOS should disable GartTlbWlk Errors already. If - * it doesn't, do it here as suggested by the BKDG. - * - * Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=33012 - */ - msr_set_bit(MSR_AMD64_MCx_MASK(4), 10); - - /* - * On family 10h BIOS may not have properly enabled WC+ support, - * causing it to be converted to CD memtype. This may result in - * performance degradation for certain nested-paging guests. - * Prevent this conversion by clearing bit 24 in - * MSR_AMD64_BU_CFG2. - * - * NOTE: we want to use the _safe accessors so as not to #GP kvm - * guests on older kvm hosts. - */ - msr_clear_bit(MSR_AMD64_BU_CFG2, 24); - - if (cpu_has_amd_erratum(c, amd_erratum_383)) - set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH); - } - if (cpu_has_amd_erratum(c, amd_erratum_400)) set_cpu_bug(c, X86_BUG_AMD_APIC_C1E); @@ -741,11 +724,6 @@ static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) } #endif -static void cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c) -{ - tlb_flushall_shift = 6; -} - static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c) { u32 ebx, eax, ecx, edx; @@ -793,8 +771,6 @@ static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c) tlb_lli_2m[ENTRIES] = eax & mask; tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1; - - cpu_set_tlb_flushall_shift(c); } static const struct cpu_dev amd_cpu_dev = { diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index ef1b93f18ed1..333fd5209336 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -481,26 +481,17 @@ u16 __read_mostly tlb_lld_2m[NR_INFO]; u16 __read_mostly tlb_lld_4m[NR_INFO]; u16 __read_mostly tlb_lld_1g[NR_INFO]; -/* - * tlb_flushall_shift shows the balance point in replacing cr3 write - * with multiple 'invlpg'. It will do this replacement when - * flush_tlb_lines <= active_lines/2^tlb_flushall_shift. - * If tlb_flushall_shift is -1, means the replacement will be disabled. - */ -s8 __read_mostly tlb_flushall_shift = -1; - void cpu_detect_tlb(struct cpuinfo_x86 *c) { if (this_cpu->c_detect_tlb) this_cpu->c_detect_tlb(c); printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" - "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d, 1GB %d\n" - "tlb_flushall_shift: %d\n", + "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d, 1GB %d\n", tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES], tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES], tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES], - tlb_lld_1g[ENTRIES], tlb_flushall_shift); + tlb_lld_1g[ENTRIES]); } void detect_ht(struct cpuinfo_x86 *c) @@ -634,6 +625,15 @@ void get_cpu_cap(struct cpuinfo_x86 *c) c->x86_capability[9] = ebx; } + /* Extended state features: level 0x0000000d */ + if (c->cpuid_level >= 0x0000000d) { + u32 eax, ebx, ecx, edx; + + cpuid_count(0x0000000d, 1, &eax, &ebx, &ecx, &edx); + + c->x86_capability[10] = eax; + } + /* AMD-defined flags: level 0x80000001 */ xlvl = cpuid_eax(0x80000000); c->extended_cpuid_level = xlvl; diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index a80029035bf2..74e804ddc5c7 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -253,7 +253,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c) */ if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 && (c->x86_mask < 0x6 || c->x86_mask == 0xb)) - set_cpu_cap(c, X86_FEATURE_11AP); + set_cpu_bug(c, X86_BUG_11AP); #ifdef CONFIG_X86_INTEL_USERCOPY @@ -370,6 +370,17 @@ static void init_intel(struct cpuinfo_x86 *c) */ detect_extended_topology(c); + if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { + /* + * let's use the legacy cpuid vector 0x1 and 0x4 for topology + * detection. + */ + c->x86_max_cores = intel_num_cpu_cores(c); +#ifdef CONFIG_X86_32 + detect_ht(c); +#endif + } + l2 = init_intel_cacheinfo(c); if (c->cpuid_level > 9) { unsigned eax = cpuid_eax(10); @@ -391,7 +402,7 @@ static void init_intel(struct cpuinfo_x86 *c) if (c->x86 == 6 && cpu_has_clflush && (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47)) - set_cpu_cap(c, X86_FEATURE_CLFLUSH_MONITOR); + set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR); #ifdef CONFIG_X86_64 if (c->x86 == 15) @@ -438,17 +449,6 @@ static void init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_P3); #endif - if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { - /* - * let's use the legacy cpuid vector 0x1 and 0x4 for topology - * detection. - */ - c->x86_max_cores = intel_num_cpu_cores(c); -#ifdef CONFIG_X86_32 - detect_ht(c); -#endif - } - /* Work around errata */ srat_detect_node(c); @@ -634,31 +634,6 @@ static void intel_tlb_lookup(const unsigned char desc) } } -static void intel_tlb_flushall_shift_set(struct cpuinfo_x86 *c) -{ - switch ((c->x86 << 8) + c->x86_model) { - case 0x60f: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ - case 0x616: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ - case 0x617: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ - case 0x61d: /* six-core 45 nm xeon "Dunnington" */ - tlb_flushall_shift = -1; - break; - case 0x63a: /* Ivybridge */ - tlb_flushall_shift = 2; - break; - case 0x61a: /* 45 nm nehalem, "Bloomfield" */ - case 0x61e: /* 45 nm nehalem, "Lynnfield" */ - case 0x625: /* 32 nm nehalem, "Clarkdale" */ - case 0x62c: /* 32 nm nehalem, "Gulftown" */ - case 0x62e: /* 45 nm nehalem-ex, "Beckton" */ - case 0x62f: /* 32 nm Xeon E7 */ - case 0x62a: /* SandyBridge */ - case 0x62d: /* SandyBridge, "Romely-EP" */ - default: - tlb_flushall_shift = 6; - } -} - static void intel_detect_tlb(struct cpuinfo_x86 *c) { int i, j, n; @@ -683,7 +658,6 @@ static void intel_detect_tlb(struct cpuinfo_x86 *c) for (j = 1 ; j < 16 ; j++) intel_tlb_lookup(desc[j]); } - intel_tlb_flushall_shift_set(c); } static const struct cpu_dev intel_cpu_dev = { diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index a952e9c85b6f..9c8f7394c612 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -730,6 +730,18 @@ unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c) #endif } +#ifdef CONFIG_X86_HT + /* + * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in + * turns means that the only possibility is SMT (as indicated in + * cpuid1). Since cpuid2 doesn't specify shared caches, and we know + * that SMT shares all caches, we can unconditionally set cpu_llc_id to + * c->phys_proc_id. + */ + if (per_cpu(cpu_llc_id, cpu) == BAD_APICID) + per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; +#endif + c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); return l2; diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index bb92f38153b2..9a79c8dbd8e8 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -2451,6 +2451,12 @@ static __init int mcheck_init_device(void) for_each_online_cpu(i) { err = mce_device_create(i); if (err) { + /* + * Register notifier anyway (and do not unreg it) so + * that we don't leave undeleted timers, see notifier + * callback above. + */ + __register_hotcpu_notifier(&mce_cpu_notifier); cpu_notifier_register_done(); goto err_device_create; } @@ -2471,10 +2477,6 @@ static __init int mcheck_init_device(void) err_register: unregister_syscore_ops(&mce_syscore_ops); - cpu_notifier_register_begin(); - __unregister_hotcpu_notifier(&mce_cpu_notifier); - cpu_notifier_register_done(); - err_device_create: /* * We didn't keep track of which devices were created above, but diff --git a/arch/x86/kernel/cpu/mkcapflags.sh b/arch/x86/kernel/cpu/mkcapflags.sh index 2bf616505499..e2b22df964cd 100644 --- a/arch/x86/kernel/cpu/mkcapflags.sh +++ b/arch/x86/kernel/cpu/mkcapflags.sh @@ -1,23 +1,25 @@ #!/bin/sh # -# Generate the x86_cap_flags[] array from include/asm/cpufeature.h +# Generate the x86_cap/bug_flags[] arrays from include/asm/cpufeature.h # IN=$1 OUT=$2 -TABS="$(printf '\t\t\t\t\t')" -trap 'rm "$OUT"' EXIT +function dump_array() +{ + ARRAY=$1 + SIZE=$2 + PFX=$3 + POSTFIX=$4 -( - echo "#ifndef _ASM_X86_CPUFEATURE_H" - echo "#include <asm/cpufeature.h>" - echo "#endif" - echo "" - echo "const char * const x86_cap_flags[NCAPINTS*32] = {" + PFX_SZ=$(echo $PFX | wc -c) + TABS="$(printf '\t\t\t\t\t')" + + echo "const char * const $ARRAY[$SIZE] = {" - # Iterate through any input lines starting with #define X86_FEATURE_ - sed -n -e 's/\t/ /g' -e 's/^ *# *define *X86_FEATURE_//p' $IN | + # Iterate through any input lines starting with #define $PFX + sed -n -e 's/\t/ /g' -e "s/^ *# *define *$PFX//p" $IN | while read i do # Name is everything up to the first whitespace @@ -31,11 +33,32 @@ trap 'rm "$OUT"' EXIT # Name is uppercase, VALUE is all lowercase VALUE="$(echo "$VALUE" | tr A-Z a-z)" - TABCOUNT=$(( ( 5*8 - 14 - $(echo "$NAME" | wc -c) ) / 8 )) - printf "\t[%s]%.*s = %s,\n" \ - "X86_FEATURE_$NAME" "$TABCOUNT" "$TABS" "$VALUE" + if [ -n "$POSTFIX" ]; then + T=$(( $PFX_SZ + $(echo $POSTFIX | wc -c) + 2 )) + TABS="$(printf '\t\t\t\t\t\t')" + TABCOUNT=$(( ( 6*8 - ($T + 1) - $(echo "$NAME" | wc -c) ) / 8 )) + printf "\t[%s - %s]%.*s = %s,\n" "$PFX$NAME" "$POSTFIX" "$TABCOUNT" "$TABS" "$VALUE" + else + TABCOUNT=$(( ( 5*8 - ($PFX_SZ + 1) - $(echo "$NAME" | wc -c) ) / 8 )) + printf "\t[%s]%.*s = %s,\n" "$PFX$NAME" "$TABCOUNT" "$TABS" "$VALUE" + fi done echo "};" +} + +trap 'rm "$OUT"' EXIT + +( + echo "#ifndef _ASM_X86_CPUFEATURE_H" + echo "#include <asm/cpufeature.h>" + echo "#endif" + echo "" + + dump_array "x86_cap_flags" "NCAPINTS*32" "X86_FEATURE_" "" + echo "" + + dump_array "x86_bug_flags" "NBUGINTS*32" "X86_BUG_" "NCAPINTS*32" + ) > $OUT trap - EXIT diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 2bdfbff8a4f6..2879ecdaac43 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -118,6 +118,9 @@ static int x86_pmu_extra_regs(u64 config, struct perf_event *event) continue; if (event->attr.config1 & ~er->valid_mask) return -EINVAL; + /* Check if the extra msrs can be safely accessed*/ + if (!er->extra_msr_access) + return -ENXIO; reg->idx = er->idx; reg->config = event->attr.config1; diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 3b2f9bdd974b..8ade93111e03 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -295,14 +295,16 @@ struct extra_reg { u64 config_mask; u64 valid_mask; int idx; /* per_xxx->regs[] reg index */ + bool extra_msr_access; }; #define EVENT_EXTRA_REG(e, ms, m, vm, i) { \ - .event = (e), \ - .msr = (ms), \ - .config_mask = (m), \ - .valid_mask = (vm), \ - .idx = EXTRA_REG_##i, \ + .event = (e), \ + .msr = (ms), \ + .config_mask = (m), \ + .valid_mask = (vm), \ + .idx = EXTRA_REG_##i, \ + .extra_msr_access = true, \ } #define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \ diff --git a/arch/x86/kernel/cpu/perf_event_amd_uncore.c b/arch/x86/kernel/cpu/perf_event_amd_uncore.c index 3bbdf4cd38b9..30790d798e6b 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_amd_uncore.c @@ -294,31 +294,41 @@ static struct amd_uncore *amd_uncore_alloc(unsigned int cpu) cpu_to_node(cpu)); } -static void amd_uncore_cpu_up_prepare(unsigned int cpu) +static int amd_uncore_cpu_up_prepare(unsigned int cpu) { - struct amd_uncore *uncore; + struct amd_uncore *uncore_nb = NULL, *uncore_l2; if (amd_uncore_nb) { - uncore = amd_uncore_alloc(cpu); - uncore->cpu = cpu; - uncore->num_counters = NUM_COUNTERS_NB; - uncore->rdpmc_base = RDPMC_BASE_NB; - uncore->msr_base = MSR_F15H_NB_PERF_CTL; - uncore->active_mask = &amd_nb_active_mask; - uncore->pmu = &amd_nb_pmu; - *per_cpu_ptr(amd_uncore_nb, cpu) = uncore; + uncore_nb = amd_uncore_alloc(cpu); + if (!uncore_nb) + goto fail; + uncore_nb->cpu = cpu; + uncore_nb->num_counters = NUM_COUNTERS_NB; + uncore_nb->rdpmc_base = RDPMC_BASE_NB; + uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL; + uncore_nb->active_mask = &amd_nb_active_mask; + uncore_nb->pmu = &amd_nb_pmu; + *per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb; } if (amd_uncore_l2) { - uncore = amd_uncore_alloc(cpu); - uncore->cpu = cpu; - uncore->num_counters = NUM_COUNTERS_L2; - uncore->rdpmc_base = RDPMC_BASE_L2; - uncore->msr_base = MSR_F16H_L2I_PERF_CTL; - uncore->active_mask = &amd_l2_active_mask; - uncore->pmu = &amd_l2_pmu; - *per_cpu_ptr(amd_uncore_l2, cpu) = uncore; + uncore_l2 = amd_uncore_alloc(cpu); + if (!uncore_l2) + goto fail; + uncore_l2->cpu = cpu; + uncore_l2->num_counters = NUM_COUNTERS_L2; + uncore_l2->rdpmc_base = RDPMC_BASE_L2; + uncore_l2->msr_base = MSR_F16H_L2I_PERF_CTL; + uncore_l2->active_mask = &amd_l2_active_mask; + uncore_l2->pmu = &amd_l2_pmu; + *per_cpu_ptr(amd_uncore_l2, cpu) = uncore_l2; } + + return 0; + +fail: + kfree(uncore_nb); + return -ENOMEM; } static struct amd_uncore * @@ -441,7 +451,7 @@ static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores) if (!--uncore->refcnt) kfree(uncore); - *per_cpu_ptr(amd_uncore_nb, cpu) = NULL; + *per_cpu_ptr(uncores, cpu) = NULL; } static void amd_uncore_cpu_dead(unsigned int cpu) @@ -461,7 +471,8 @@ amd_uncore_cpu_notifier(struct notifier_block *self, unsigned long action, switch (action & ~CPU_TASKS_FROZEN) { case CPU_UP_PREPARE: - amd_uncore_cpu_up_prepare(cpu); + if (amd_uncore_cpu_up_prepare(cpu)) + return notifier_from_errno(-ENOMEM); break; case CPU_STARTING: @@ -501,20 +512,33 @@ static void __init init_cpu_already_online(void *dummy) amd_uncore_cpu_online(cpu); } +static void cleanup_cpu_online(void *dummy) +{ + unsigned int cpu = smp_processor_id(); + + amd_uncore_cpu_dead(cpu); +} + static int __init amd_uncore_init(void) { - unsigned int cpu; + unsigned int cpu, cpu2; int ret = -ENODEV; if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) - return -ENODEV; + goto fail_nodev; if (!cpu_has_topoext) - return -ENODEV; + goto fail_nodev; if (cpu_has_perfctr_nb) { amd_uncore_nb = alloc_percpu(struct amd_uncore *); - perf_pmu_register(&amd_nb_pmu, amd_nb_pmu.name, -1); + if (!amd_uncore_nb) { + ret = -ENOMEM; + goto fail_nb; + } + ret = perf_pmu_register(&amd_nb_pmu, amd_nb_pmu.name, -1); + if (ret) + goto fail_nb; printk(KERN_INFO "perf: AMD NB counters detected\n"); ret = 0; @@ -522,20 +546,28 @@ static int __init amd_uncore_init(void) if (cpu_has_perfctr_l2) { amd_uncore_l2 = alloc_percpu(struct amd_uncore *); - perf_pmu_register(&amd_l2_pmu, amd_l2_pmu.name, -1); + if (!amd_uncore_l2) { + ret = -ENOMEM; + goto fail_l2; + } + ret = perf_pmu_register(&amd_l2_pmu, amd_l2_pmu.name, -1); + if (ret) + goto fail_l2; printk(KERN_INFO "perf: AMD L2I counters detected\n"); ret = 0; } if (ret) - return -ENODEV; + goto fail_nodev; cpu_notifier_register_begin(); /* init cpus already online before registering for hotplug notifier */ for_each_online_cpu(cpu) { - amd_uncore_cpu_up_prepare(cpu); + ret = amd_uncore_cpu_up_prepare(cpu); + if (ret) + goto fail_online; smp_call_function_single(cpu, init_cpu_already_online, NULL, 1); } @@ -543,5 +575,30 @@ static int __init amd_uncore_init(void) cpu_notifier_register_done(); return 0; + + +fail_online: + for_each_online_cpu(cpu2) { + if (cpu2 == cpu) + break; + smp_call_function_single(cpu, cleanup_cpu_online, NULL, 1); + } + cpu_notifier_register_done(); + + /* amd_uncore_nb/l2 should have been freed by cleanup_cpu_online */ + amd_uncore_nb = amd_uncore_l2 = NULL; + if (cpu_has_perfctr_l2) + perf_pmu_unregister(&amd_l2_pmu); +fail_l2: + if (cpu_has_perfctr_nb) + perf_pmu_unregister(&amd_nb_pmu); + if (amd_uncore_l2) + free_percpu(amd_uncore_l2); +fail_nb: + if (amd_uncore_nb) + free_percpu(amd_uncore_nb); + +fail_nodev: + return ret; } device_initcall(amd_uncore_init); diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index adb02aa62af5..2502d0d9d246 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1382,6 +1382,15 @@ again: intel_pmu_lbr_read(); /* + * CondChgd bit 63 doesn't mean any overflow status. Ignore + * and clear the bit. + */ + if (__test_and_clear_bit(63, (unsigned long *)&status)) { + if (!status) + goto done; + } + + /* * PEBS overflow sets bit 62 in the global status register */ if (__test_and_clear_bit(62, (unsigned long *)&status)) { @@ -2173,6 +2182,41 @@ static void intel_snb_check_microcode(void) } } +/* + * Under certain circumstances, access certain MSR may cause #GP. + * The function tests if the input MSR can be safely accessed. + */ +static bool check_msr(unsigned long msr, u64 mask) +{ + u64 val_old, val_new, val_tmp; + + /* + * Read the current value, change it and read it back to see if it + * matches, this is needed to detect certain hardware emulators + * (qemu/kvm) that don't trap on the MSR access and always return 0s. + */ + if (rdmsrl_safe(msr, &val_old)) + return false; + + /* + * Only change the bits which can be updated by wrmsrl. + */ + val_tmp = val_old ^ mask; + if (wrmsrl_safe(msr, val_tmp) || + rdmsrl_safe(msr, &val_new)) + return false; + + if (val_new != val_tmp) + return false; + + /* Here it's sure that the MSR can be safely accessed. + * Restore the old value and return. + */ + wrmsrl(msr, val_old); + + return true; +} + static __init void intel_sandybridge_quirk(void) { x86_pmu.check_microcode = intel_snb_check_microcode; @@ -2262,7 +2306,8 @@ __init int intel_pmu_init(void) union cpuid10_ebx ebx; struct event_constraint *c; unsigned int unused; - int version; + struct extra_reg *er; + int version, i; if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { switch (boot_cpu_data.x86) { @@ -2465,6 +2510,9 @@ __init int intel_pmu_init(void) case 62: /* IvyBridge EP */ memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + /* dTLB-load-misses on IVB is different than SNB */ + hw_cache_event_ids[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = 0x8108; /* DTLB_LOAD_MISSES.DEMAND_LD_MISS_CAUSES_A_WALK */ + memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); @@ -2565,6 +2613,34 @@ __init int intel_pmu_init(void) } } + /* + * Access LBR MSR may cause #GP under certain circumstances. + * E.g. KVM doesn't support LBR MSR + * Check all LBT MSR here. + * Disable LBR access if any LBR MSRs can not be accessed. + */ + if (x86_pmu.lbr_nr && !check_msr(x86_pmu.lbr_tos, 0x3UL)) + x86_pmu.lbr_nr = 0; + for (i = 0; i < x86_pmu.lbr_nr; i++) { + if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) && + check_msr(x86_pmu.lbr_to + i, 0xffffUL))) + x86_pmu.lbr_nr = 0; + } + + /* + * Access extra MSR may cause #GP under certain circumstances. + * E.g. KVM doesn't support offcore event + * Check all extra_regs here. + */ + if (x86_pmu.extra_regs) { + for (er = x86_pmu.extra_regs; er->msr; er++) { + er->extra_msr_access = check_msr(er->msr, 0x1ffUL); + /* Disable LBR select mapping */ + if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access) + x86_pmu.lbr_sel_map = NULL; + } + } + /* Support full width counters using alternative MSR range */ if (x86_pmu.intel_cap.full_width_write) { x86_pmu.max_period = x86_pmu.cntval_mask; diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 980970cb744d..696ade311ded 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -311,9 +311,11 @@ static int alloc_bts_buffer(int cpu) if (!x86_pmu.bts) return 0; - buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL, node); - if (unlikely(!buffer)) + buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node); + if (unlikely(!buffer)) { + WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__); return -ENOMEM; + } max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; thresh = max / 16; diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 65bbbea38b9c..cfc6f9dfcd90 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -550,16 +550,16 @@ static struct extra_reg snbep_uncore_cbox_extra_regs[] = { SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0x6), SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x8), SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x8), - SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0xc), - SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0xc), + SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0xa), + SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0xa), SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x2), SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x2), SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x2), SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x2), SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x8), SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x8), - SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0xc), - SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0xc), + SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0xa), + SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0xa), SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x2), SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x2), SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x2), @@ -1222,6 +1222,7 @@ static struct extra_reg ivt_uncore_cbox_extra_regs[] = { SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN, SNBEP_CBO_PMON_CTL_TID_EN, 0x1), SNBEP_CBO_EVENT_EXTRA_REG(0x1031, 0x10ff, 0x2), + SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4), SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0xc), SNBEP_CBO_EVENT_EXTRA_REG(0x5134, 0xffff, 0xc), @@ -1245,7 +1246,7 @@ static struct extra_reg ivt_uncore_cbox_extra_regs[] = { SNBEP_CBO_EVENT_EXTRA_REG(0x8335, 0xffff, 0x10), SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x10), SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x10), - SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x2136, 0xffff, 0x10), SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10), SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0x18), SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0x18), @@ -2946,10 +2947,7 @@ again: * extra registers. If we failed to take an extra * register, try the alternative. */ - if (idx % 2) - idx--; - else - idx++; + idx ^= 1; if (idx != reg1->idx % 6) { if (idx == 2) config1 >>= 8; diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 06fe3ed8b851..5433658e598d 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -97,6 +97,14 @@ static int show_cpuinfo(struct seq_file *m, void *v) if (cpu_has(c, i) && x86_cap_flags[i] != NULL) seq_printf(m, " %s", x86_cap_flags[i]); + seq_printf(m, "\nbugs\t\t:"); + for (i = 0; i < 32*NBUGINTS; i++) { + unsigned int bug_bit = 32*NCAPINTS + i; + + if (cpu_has_bug(c, bug_bit) && x86_bug_flags[i]) + seq_printf(m, " %s", x86_bug_flags[i]); + } + seq_printf(m, "\nbogomips\t: %lu.%02lu\n", c->loops_per_jiffy/(500000/HZ), (c->loops_per_jiffy/(5000/HZ)) % 100); diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index b6f794aa1693..4a8013d55947 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -38,7 +38,6 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) { X86_FEATURE_PTS, CR_EAX, 6, 0x00000006, 0 }, { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 }, { X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 }, - { X86_FEATURE_XSAVEOPT, CR_EAX, 0, 0x0000000d, 1 }, { X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 }, { X86_FEATURE_CPB, CR_EDX, 9, 0x80000007, 0 }, { X86_FEATURE_PROC_FEEDBACK, CR_EDX,11, 0x80000007, 0 }, diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index dbaa23e78b36..47c410d99f5d 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -425,8 +425,8 @@ sysenter_do_call: cmpl $(NR_syscalls), %eax jae sysenter_badsys call *sys_call_table(,%eax,4) - movl %eax,PT_EAX(%esp) sysenter_after_call: + movl %eax,PT_EAX(%esp) LOCKDEP_SYS_EXIT DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF @@ -502,6 +502,7 @@ ENTRY(system_call) jae syscall_badsys syscall_call: call *sys_call_table(,%eax,4) +syscall_after_call: movl %eax,PT_EAX(%esp) # store the return value syscall_exit: LOCKDEP_SYS_EXIT @@ -675,12 +676,12 @@ syscall_fault: END(syscall_fault) syscall_badsys: - movl $-ENOSYS,PT_EAX(%esp) - jmp syscall_exit + movl $-ENOSYS,%eax + jmp syscall_after_call END(syscall_badsys) sysenter_badsys: - movl $-ENOSYS,PT_EAX(%esp) + movl $-ENOSYS,%eax jmp sysenter_after_call END(syscall_badsys) CFI_ENDPROC @@ -1058,9 +1059,6 @@ ENTRY(mcount) END(mcount) ENTRY(ftrace_caller) - cmpl $0, function_trace_stop - jne ftrace_stub - pushl %eax pushl %ecx pushl %edx @@ -1092,8 +1090,6 @@ END(ftrace_caller) ENTRY(ftrace_regs_caller) pushf /* push flags before compare (in cs location) */ - cmpl $0, function_trace_stop - jne ftrace_restore_flags /* * i386 does not save SS and ESP when coming from kernel. @@ -1152,7 +1148,6 @@ GLOBAL(ftrace_regs_call) popf /* Pop flags at end (no addl to corrupt flags) */ jmp ftrace_ret -ftrace_restore_flags: popf jmp ftrace_stub #else /* ! CONFIG_DYNAMIC_FTRACE */ @@ -1161,9 +1156,6 @@ ENTRY(mcount) cmpl $__PAGE_OFFSET, %esp jb ftrace_stub /* Paging not enabled yet? */ - cmpl $0, function_trace_stop - jne ftrace_stub - cmpl $ftrace_stub, ftrace_trace_function jnz trace #ifdef CONFIG_FUNCTION_GRAPH_TRACER diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index b25ca969edd2..2fac1343a90b 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -207,7 +207,6 @@ ENDPROC(native_usergs_sysret64) */ .macro XCPT_FRAME start=1 offset=0 INTR_FRAME \start, RIP+\offset-ORIG_RAX - /*CFI_REL_OFFSET orig_rax, ORIG_RAX-ORIG_RAX*/ .endm /* @@ -287,21 +286,21 @@ ENDPROC(native_usergs_sysret64) ENTRY(save_paranoid) XCPT_FRAME 1 RDI+8 cld - movq_cfi rdi, RDI+8 - movq_cfi rsi, RSI+8 + movq %rdi, RDI+8(%rsp) + movq %rsi, RSI+8(%rsp) movq_cfi rdx, RDX+8 movq_cfi rcx, RCX+8 movq_cfi rax, RAX+8 - movq_cfi r8, R8+8 - movq_cfi r9, R9+8 - movq_cfi r10, R10+8 - movq_cfi r11, R11+8 + movq %r8, R8+8(%rsp) + movq %r9, R9+8(%rsp) + movq %r10, R10+8(%rsp) + movq %r11, R11+8(%rsp) movq_cfi rbx, RBX+8 - movq_cfi rbp, RBP+8 - movq_cfi r12, R12+8 - movq_cfi r13, R13+8 - movq_cfi r14, R14+8 - movq_cfi r15, R15+8 + movq %rbp, RBP+8(%rsp) + movq %r12, R12+8(%rsp) + movq %r13, R13+8(%rsp) + movq %r14, R14+8(%rsp) + movq %r15, R15+8(%rsp) movl $1,%ebx movl $MSR_GS_BASE,%ecx rdmsr @@ -830,27 +829,24 @@ restore_args: RESTORE_ARGS 1,8,1 irq_return: + INTERRUPT_RETURN + +ENTRY(native_iret) /* * Are we returning to a stack segment from the LDT? Note: in * 64-bit mode SS:RSP on the exception stack is always valid. */ #ifdef CONFIG_X86_ESPFIX64 testb $4,(SS-RIP)(%rsp) - jnz irq_return_ldt + jnz native_irq_return_ldt #endif -irq_return_iret: - INTERRUPT_RETURN - _ASM_EXTABLE(irq_return_iret, bad_iret) - -#ifdef CONFIG_PARAVIRT -ENTRY(native_iret) +native_irq_return_iret: iretq - _ASM_EXTABLE(native_iret, bad_iret) -#endif + _ASM_EXTABLE(native_irq_return_iret, bad_iret) #ifdef CONFIG_X86_ESPFIX64 -irq_return_ldt: +native_irq_return_ldt: pushq_cfi %rax pushq_cfi %rdi SWAPGS @@ -872,7 +868,7 @@ irq_return_ldt: SWAPGS movq %rax,%rsp popq_cfi %rax - jmp irq_return_iret + jmp native_irq_return_iret #endif .section .fixup,"ax" @@ -956,13 +952,8 @@ __do_double_fault: cmpl $__KERNEL_CS,CS(%rdi) jne do_double_fault movq RIP(%rdi),%rax - cmpq $irq_return_iret,%rax -#ifdef CONFIG_PARAVIRT - je 1f - cmpq $native_iret,%rax -#endif + cmpq $native_irq_return_iret,%rax jne do_double_fault /* This shouldn't happen... */ -1: movq PER_CPU_VAR(kernel_stack),%rax subq $(6*8-KERNEL_STACK_OFFSET),%rax /* Reset to original stack */ movq %rax,RSP(%rdi) @@ -1395,21 +1386,21 @@ ENTRY(error_entry) CFI_ADJUST_CFA_OFFSET 15*8 /* oldrax contains error code */ cld - movq_cfi rdi, RDI+8 - movq_cfi rsi, RSI+8 - movq_cfi rdx, RDX+8 - movq_cfi rcx, RCX+8 - movq_cfi rax, RAX+8 - movq_cfi r8, R8+8 - movq_cfi r9, R9+8 - movq_cfi r10, R10+8 - movq_cfi r11, R11+8 + movq %rdi, RDI+8(%rsp) + movq %rsi, RSI+8(%rsp) + movq %rdx, RDX+8(%rsp) + movq %rcx, RCX+8(%rsp) + movq %rax, RAX+8(%rsp) + movq %r8, R8+8(%rsp) + movq %r9, R9+8(%rsp) + movq %r10, R10+8(%rsp) + movq %r11, R11+8(%rsp) movq_cfi rbx, RBX+8 - movq_cfi rbp, RBP+8 - movq_cfi r12, R12+8 - movq_cfi r13, R13+8 - movq_cfi r14, R14+8 - movq_cfi r15, R15+8 + movq %rbp, RBP+8(%rsp) + movq %r12, R12+8(%rsp) + movq %r13, R13+8(%rsp) + movq %r14, R14+8(%rsp) + movq %r15, R15+8(%rsp) xorl %ebx,%ebx testl $3,CS+8(%rsp) je error_kernelspace @@ -1427,8 +1418,9 @@ error_sti: * compat mode. Check for these here too. */ error_kernelspace: + CFI_REL_OFFSET rcx, RCX+8 incl %ebx - leaq irq_return_iret(%rip),%rcx + leaq native_irq_return_iret(%rip),%rcx cmpq %rcx,RIP+8(%rsp) je error_swapgs movl %ecx,%eax /* zero extend */ diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c index 6afbb16e9b79..94d857fb1033 100644 --- a/arch/x86/kernel/espfix_64.c +++ b/arch/x86/kernel/espfix_64.c @@ -175,7 +175,7 @@ void init_espfix_ap(void) if (!pud_present(pud)) { pmd_p = (pmd_t *)__get_free_page(PGALLOC_GFP); pud = __pud(__pa(pmd_p) | (PGTABLE_PROT & ptemask)); - paravirt_alloc_pud(&init_mm, __pa(pmd_p) >> PAGE_SHIFT); + paravirt_alloc_pmd(&init_mm, __pa(pmd_p) >> PAGE_SHIFT); for (n = 0; n < ESPFIX_PUD_CLONES; n++) set_pud(&pud_p[n], pud); } @@ -185,7 +185,7 @@ void init_espfix_ap(void) if (!pmd_present(pmd)) { pte_p = (pte_t *)__get_free_page(PGALLOC_GFP); pmd = __pmd(__pa(pte_p) | (PGTABLE_PROT & ptemask)); - paravirt_alloc_pmd(&init_mm, __pa(pte_p) >> PAGE_SHIFT); + paravirt_alloc_pte(&init_mm, __pa(pte_p) >> PAGE_SHIFT); for (n = 0; n < ESPFIX_PMD_CLONES; n++) set_pmd(&pmd_p[n], pmd); } @@ -193,7 +193,6 @@ void init_espfix_ap(void) pte_p = pte_offset_kernel(&pmd, addr); stack_page = (void *)__get_free_page(GFP_KERNEL); pte = __pte(__pa(stack_page) | (__PAGE_KERNEL_RO & ptemask)); - paravirt_alloc_pte(&init_mm, __pa(stack_page) >> PAGE_SHIFT); for (n = 0; n < ESPFIX_PTE_CLONES; n++) set_pte(&pte_p[n*PTE_STRIDE], pte); diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index cbc4a91b131e..3386dc9aa333 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -703,6 +703,9 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, unsigned long return_hooker = (unsigned long) &return_to_handler; + if (unlikely(ftrace_graph_is_dead())) + return; + if (unlikely(atomic_read(¤t->tracing_graph_pause))) return; diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 7596df664901..67e6d19ef1be 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -574,6 +574,9 @@ int kprobe_int3_handler(struct pt_regs *regs) struct kprobe *p; struct kprobe_ctlblk *kcb; + if (user_mode_vm(regs)) + return 0; + addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t)); /* * We don't want to be preempted for the entire diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S index c050a0153168..c73aecf10d34 100644 --- a/arch/x86/kernel/mcount_64.S +++ b/arch/x86/kernel/mcount_64.S @@ -46,10 +46,6 @@ END(function_hook) .endm ENTRY(ftrace_caller) - /* Check if tracing was disabled (quick check) */ - cmpl $0, function_trace_stop - jne ftrace_stub - ftrace_caller_setup /* regs go into 4th parameter (but make it NULL) */ movq $0, %rcx @@ -73,10 +69,6 @@ ENTRY(ftrace_regs_caller) /* Save the current flags before compare (in SS location)*/ pushfq - /* Check if tracing was disabled (quick check) */ - cmpl $0, function_trace_stop - jne ftrace_restore_flags - /* skip=8 to skip flags saved in SS */ ftrace_caller_setup 8 @@ -131,7 +123,7 @@ GLOBAL(ftrace_regs_call) popfq jmp ftrace_return -ftrace_restore_flags: + popfq jmp ftrace_stub @@ -141,9 +133,6 @@ END(ftrace_regs_caller) #else /* ! CONFIG_DYNAMIC_FTRACE */ ENTRY(function_hook) - cmpl $0, function_trace_stop - jne ftrace_stub - cmpq $ftrace_stub, ftrace_trace_function jnz trace diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index 3f08f34f93eb..a1da6737ba5b 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c @@ -6,7 +6,6 @@ DEF_NATIVE(pv_irq_ops, irq_disable, "cli"); DEF_NATIVE(pv_irq_ops, irq_enable, "sti"); DEF_NATIVE(pv_irq_ops, restore_fl, "pushq %rdi; popfq"); DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax"); -DEF_NATIVE(pv_cpu_ops, iret, "iretq"); DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax"); DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax"); DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3"); @@ -50,7 +49,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, PATCH_SITE(pv_irq_ops, save_fl); PATCH_SITE(pv_irq_ops, irq_enable); PATCH_SITE(pv_irq_ops, irq_disable); - PATCH_SITE(pv_cpu_ops, iret); PATCH_SITE(pv_cpu_ops, irq_enable_sysexit); PATCH_SITE(pv_cpu_ops, usergs_sysret32); PATCH_SITE(pv_cpu_ops, usergs_sysret64); diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 52b1157c53eb..17962e667a91 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -28,6 +28,7 @@ #include <linux/mc146818rtc.h> #include <asm/realmode.h> #include <asm/x86_init.h> +#include <asm/efi.h> /* * Power off function, if any @@ -401,12 +402,25 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { static int __init reboot_init(void) { + int rv; + /* * Only do the DMI check if reboot_type hasn't been overridden * on the command line */ - if (reboot_default) - dmi_check_system(reboot_dmi_table); + if (!reboot_default) + return 0; + + /* + * The DMI quirks table takes precedence. If no quirks entry + * matches and the ACPI Hardware Reduced bit is set, force EFI + * reboot. + */ + rv = dmi_check_system(reboot_dmi_table); + + if (!rv && efi_reboot_required()) + reboot_type = BOOT_EFI; + return 0; } core_initcall(reboot_init); @@ -528,11 +542,7 @@ static void native_machine_emergency_restart(void) break; case BOOT_EFI: - if (efi_enabled(EFI_RUNTIME_SERVICES)) - efi.reset_system(reboot_mode == REBOOT_WARM ? - EFI_RESET_WARM : - EFI_RESET_COLD, - EFI_SUCCESS, 0, NULL); + efi_reboot(reboot_mode, NULL); reboot_type = BOOT_BIOS; break; diff --git a/arch/x86/kernel/resource.c b/arch/x86/kernel/resource.c index 2a26819bb6a8..80eab01c1a68 100644 --- a/arch/x86/kernel/resource.c +++ b/arch/x86/kernel/resource.c @@ -37,10 +37,12 @@ static void remove_e820_regions(struct resource *avail) void arch_remove_reservations(struct resource *avail) { - /* Trim out BIOS areas (low 1MB and high 2MB) and E820 regions */ + /* + * Trim out BIOS area (high 2MB) and E820 regions. We do not remove + * the low 1MB unconditionally, as this area is needed for some ISA + * cards requiring a memory range, e.g. the i82365 PCMCIA controller. + */ if (avail->flags & IORESOURCE_MEM) { - if (avail->start < BIOS_END) - avail->start = BIOS_END; resource_clip(avail, BIOS_ROM_BASE, BIOS_ROM_END); remove_e820_regions(avail); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 78a0e6298922..41ead8d3bc0b 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -924,10 +924,10 @@ void __init setup_arch(char **cmdline_p) #endif #ifdef CONFIG_EFI if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, - "EL32", 4)) { + EFI32_LOADER_SIGNATURE, 4)) { set_bit(EFI_BOOT, &efi.flags); } else if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, - "EL64", 4)) { + EFI64_LOADER_SIGNATURE, 4)) { set_bit(EFI_BOOT, &efi.flags); set_bit(EFI_64BIT, &efi.flags); } diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 57e5ce126d5a..56b0c338061e 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -234,9 +234,6 @@ static inline unsigned long long cycles_2_ns(unsigned long long cyc) return ns; } -/* XXX surely we already have this someplace in the kernel?! */ -#define DIV_ROUND(n, d) (((n) + ((d) / 2)) / (d)) - static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) { unsigned long long tsc_now, ns_now; @@ -259,7 +256,9 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) * time function is continuous; see the comment near struct * cyc2ns_data. */ - data->cyc2ns_mul = DIV_ROUND(NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR, cpu_khz); + data->cyc2ns_mul = + DIV_ROUND_CLOSEST(NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR, + cpu_khz); data->cyc2ns_shift = CYC2NS_SCALE_FACTOR; data->cyc2ns_offset = ns_now - mul_u64_u32_shr(tsc_now, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR); @@ -920,9 +919,9 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); if (!(freq->flags & CPUFREQ_CONST_LOOPS)) mark_tsc_unstable("cpufreq changes"); - } - set_cyc2ns_scale(tsc_khz, freq->cpu); + set_cyc2ns_scale(tsc_khz, freq->cpu); + } return 0; } diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index f9087315e0cd..a5380590ab0e 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -95,4 +95,12 @@ static inline bool guest_cpuid_has_gbpages(struct kvm_vcpu *vcpu) best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); return best && (best->edx & bit(X86_FEATURE_GBPAGES)); } + +static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 7, 0); + return best && (best->ebx & bit(X86_FEATURE_RTM)); +} #endif diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index e4e833d3d7d7..56657b0bb3bb 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -162,6 +162,10 @@ #define NoWrite ((u64)1 << 45) /* No writeback */ #define SrcWrite ((u64)1 << 46) /* Write back src operand */ #define NoMod ((u64)1 << 47) /* Mod field is ignored */ +#define Intercept ((u64)1 << 48) /* Has valid intercept field */ +#define CheckPerm ((u64)1 << 49) /* Has valid check_perm field */ +#define NoBigReal ((u64)1 << 50) /* No big real mode */ +#define PrivUD ((u64)1 << 51) /* #UD instead of #GP on CPL > 0 */ #define DstXacc (DstAccLo | SrcAccHi | SrcWrite) @@ -426,6 +430,7 @@ static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt, .modrm_reg = ctxt->modrm_reg, .modrm_rm = ctxt->modrm_rm, .src_val = ctxt->src.val64, + .dst_val = ctxt->dst.val64, .src_bytes = ctxt->src.bytes, .dst_bytes = ctxt->dst.bytes, .ad_bytes = ctxt->ad_bytes, @@ -511,12 +516,6 @@ static u32 desc_limit_scaled(struct desc_struct *desc) return desc->g ? (limit << 12) | 0xfff : limit; } -static void set_seg_override(struct x86_emulate_ctxt *ctxt, int seg) -{ - ctxt->has_seg_override = true; - ctxt->seg_override = seg; -} - static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg) { if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS) @@ -525,14 +524,6 @@ static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg) return ctxt->ops->get_cached_segment_base(ctxt, seg); } -static unsigned seg_override(struct x86_emulate_ctxt *ctxt) -{ - if (!ctxt->has_seg_override) - return 0; - - return ctxt->seg_override; -} - static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, u32 error, bool valid) { @@ -651,7 +642,12 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, if (!fetch && (desc.type & 8) && !(desc.type & 2)) goto bad; lim = desc_limit_scaled(&desc); - if ((desc.type & 8) || !(desc.type & 4)) { + if ((ctxt->mode == X86EMUL_MODE_REAL) && !fetch && + (ctxt->d & NoBigReal)) { + /* la is between zero and 0xffff */ + if (la > 0xffff || (u32)(la + size - 1) > 0xffff) + goto bad; + } else if ((desc.type & 8) || !(desc.type & 4)) { /* expand-up segment */ if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim) goto bad; @@ -716,68 +712,71 @@ static int segmented_read_std(struct x86_emulate_ctxt *ctxt, } /* - * Fetch the next byte of the instruction being emulated which is pointed to - * by ctxt->_eip, then increment ctxt->_eip. - * - * Also prefetch the remaining bytes of the instruction without crossing page + * Prefetch the remaining bytes of the instruction without crossing page * boundary if they are not in fetch_cache yet. */ -static int do_insn_fetch_byte(struct x86_emulate_ctxt *ctxt, u8 *dest) +static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size) { - struct fetch_cache *fc = &ctxt->fetch; int rc; - int size, cur_size; - - if (ctxt->_eip == fc->end) { - unsigned long linear; - struct segmented_address addr = { .seg = VCPU_SREG_CS, - .ea = ctxt->_eip }; - cur_size = fc->end - fc->start; - size = min(15UL - cur_size, - PAGE_SIZE - offset_in_page(ctxt->_eip)); - rc = __linearize(ctxt, addr, size, false, true, &linear); - if (unlikely(rc != X86EMUL_CONTINUE)) - return rc; - rc = ctxt->ops->fetch(ctxt, linear, fc->data + cur_size, - size, &ctxt->exception); - if (unlikely(rc != X86EMUL_CONTINUE)) - return rc; - fc->end += size; - } - *dest = fc->data[ctxt->_eip - fc->start]; - ctxt->_eip++; - return X86EMUL_CONTINUE; -} + unsigned size; + unsigned long linear; + int cur_size = ctxt->fetch.end - ctxt->fetch.data; + struct segmented_address addr = { .seg = VCPU_SREG_CS, + .ea = ctxt->eip + cur_size }; + + size = 15UL ^ cur_size; + rc = __linearize(ctxt, addr, size, false, true, &linear); + if (unlikely(rc != X86EMUL_CONTINUE)) + return rc; -static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, - void *dest, unsigned size) -{ - int rc; + size = min_t(unsigned, size, PAGE_SIZE - offset_in_page(linear)); - /* x86 instructions are limited to 15 bytes. */ - if (unlikely(ctxt->_eip + size - ctxt->eip > 15)) + /* + * One instruction can only straddle two pages, + * and one has been loaded at the beginning of + * x86_decode_insn. So, if not enough bytes + * still, we must have hit the 15-byte boundary. + */ + if (unlikely(size < op_size)) return X86EMUL_UNHANDLEABLE; - while (size--) { - rc = do_insn_fetch_byte(ctxt, dest++); - if (rc != X86EMUL_CONTINUE) - return rc; - } + rc = ctxt->ops->fetch(ctxt, linear, ctxt->fetch.end, + size, &ctxt->exception); + if (unlikely(rc != X86EMUL_CONTINUE)) + return rc; + ctxt->fetch.end += size; return X86EMUL_CONTINUE; } +static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, + unsigned size) +{ + if (unlikely(ctxt->fetch.end - ctxt->fetch.ptr < size)) + return __do_insn_fetch_bytes(ctxt, size); + else + return X86EMUL_CONTINUE; +} + /* Fetch next part of the instruction being emulated. */ #define insn_fetch(_type, _ctxt) \ -({ unsigned long _x; \ - rc = do_insn_fetch(_ctxt, &_x, sizeof(_type)); \ +({ _type _x; \ + \ + rc = do_insn_fetch_bytes(_ctxt, sizeof(_type)); \ if (rc != X86EMUL_CONTINUE) \ goto done; \ - (_type)_x; \ + ctxt->_eip += sizeof(_type); \ + _x = *(_type __aligned(1) *) ctxt->fetch.ptr; \ + ctxt->fetch.ptr += sizeof(_type); \ + _x; \ }) #define insn_fetch_arr(_arr, _size, _ctxt) \ -({ rc = do_insn_fetch(_ctxt, _arr, (_size)); \ +({ \ + rc = do_insn_fetch_bytes(_ctxt, _size); \ if (rc != X86EMUL_CONTINUE) \ goto done; \ + ctxt->_eip += (_size); \ + memcpy(_arr, ctxt->fetch.ptr, _size); \ + ctxt->fetch.ptr += (_size); \ }) /* @@ -1063,19 +1062,17 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, struct operand *op) { u8 sib; - int index_reg = 0, base_reg = 0, scale; + int index_reg, base_reg, scale; int rc = X86EMUL_CONTINUE; ulong modrm_ea = 0; - if (ctxt->rex_prefix) { - ctxt->modrm_reg = (ctxt->rex_prefix & 4) << 1; /* REX.R */ - index_reg = (ctxt->rex_prefix & 2) << 2; /* REX.X */ - ctxt->modrm_rm = base_reg = (ctxt->rex_prefix & 1) << 3; /* REG.B */ - } + ctxt->modrm_reg = ((ctxt->rex_prefix << 1) & 8); /* REX.R */ + index_reg = (ctxt->rex_prefix << 2) & 8; /* REX.X */ + base_reg = (ctxt->rex_prefix << 3) & 8; /* REX.B */ - ctxt->modrm_mod |= (ctxt->modrm & 0xc0) >> 6; + ctxt->modrm_mod = (ctxt->modrm & 0xc0) >> 6; ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3; - ctxt->modrm_rm |= (ctxt->modrm & 0x07); + ctxt->modrm_rm = base_reg | (ctxt->modrm & 0x07); ctxt->modrm_seg = VCPU_SREG_DS; if (ctxt->modrm_mod == 3 || (ctxt->d & NoMod)) { @@ -1093,7 +1090,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, if (ctxt->d & Mmx) { op->type = OP_MM; op->bytes = 8; - op->addr.xmm = ctxt->modrm_rm & 7; + op->addr.mm = ctxt->modrm_rm & 7; return rc; } fetch_register_operand(op); @@ -1190,6 +1187,9 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, } } op->addr.mem.ea = modrm_ea; + if (ctxt->ad_bytes != 8) + ctxt->memop.addr.mem.ea = (u32)ctxt->memop.addr.mem.ea; + done: return rc; } @@ -1220,12 +1220,14 @@ static void fetch_bit_operand(struct x86_emulate_ctxt *ctxt) long sv = 0, mask; if (ctxt->dst.type == OP_MEM && ctxt->src.type == OP_REG) { - mask = ~(ctxt->dst.bytes * 8 - 1); + mask = ~((long)ctxt->dst.bytes * 8 - 1); if (ctxt->src.bytes == 2) sv = (s16)ctxt->src.val & (s16)mask; else if (ctxt->src.bytes == 4) sv = (s32)ctxt->src.val & (s32)mask; + else + sv = (s64)ctxt->src.val & (s64)mask; ctxt->dst.addr.mem.ea += (sv >> 3); } @@ -1315,8 +1317,7 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, in_page = (ctxt->eflags & EFLG_DF) ? offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) : PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)); - n = min(min(in_page, (unsigned int)sizeof(rc->data)) / size, - count); + n = min3(in_page, (unsigned int)sizeof(rc->data) / size, count); if (n == 0) n = 1; rc->pos = rc->end = 0; @@ -1358,17 +1359,19 @@ static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, u16 selector, struct desc_ptr *dt) { const struct x86_emulate_ops *ops = ctxt->ops; + u32 base3 = 0; if (selector & 1 << 2) { struct desc_struct desc; u16 sel; memset (dt, 0, sizeof *dt); - if (!ops->get_segment(ctxt, &sel, &desc, NULL, VCPU_SREG_LDTR)) + if (!ops->get_segment(ctxt, &sel, &desc, &base3, + VCPU_SREG_LDTR)) return; dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */ - dt->address = get_desc_base(&desc); + dt->address = get_desc_base(&desc) | ((u64)base3 << 32); } else ops->get_gdt(ctxt, dt); } @@ -1422,6 +1425,7 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, ulong desc_addr; int ret; u16 dummy; + u32 base3 = 0; memset(&seg_desc, 0, sizeof seg_desc); @@ -1538,9 +1542,14 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, ret = write_segment_descriptor(ctxt, selector, &seg_desc); if (ret != X86EMUL_CONTINUE) return ret; + } else if (ctxt->mode == X86EMUL_MODE_PROT64) { + ret = ctxt->ops->read_std(ctxt, desc_addr+8, &base3, + sizeof(base3), &ctxt->exception); + if (ret != X86EMUL_CONTINUE) + return ret; } load: - ctxt->ops->set_segment(ctxt, selector, &seg_desc, 0, seg); + ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg); return X86EMUL_CONTINUE; exception: emulate_exception(ctxt, err_vec, err_code, true); @@ -1575,34 +1584,28 @@ static void write_register_operand(struct operand *op) static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op) { - int rc; - switch (op->type) { case OP_REG: write_register_operand(op); break; case OP_MEM: if (ctxt->lock_prefix) - rc = segmented_cmpxchg(ctxt, + return segmented_cmpxchg(ctxt, + op->addr.mem, + &op->orig_val, + &op->val, + op->bytes); + else + return segmented_write(ctxt, op->addr.mem, - &op->orig_val, &op->val, op->bytes); - else - rc = segmented_write(ctxt, - op->addr.mem, - &op->val, - op->bytes); - if (rc != X86EMUL_CONTINUE) - return rc; break; case OP_MEM_STR: - rc = segmented_write(ctxt, - op->addr.mem, - op->data, - op->bytes * op->count); - if (rc != X86EMUL_CONTINUE) - return rc; + return segmented_write(ctxt, + op->addr.mem, + op->data, + op->bytes * op->count); break; case OP_XMM: write_sse_reg(ctxt, &op->vec_val, op->addr.xmm); @@ -1671,7 +1674,7 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, return rc; change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF - | EFLG_TF | EFLG_DF | EFLG_NT | EFLG_RF | EFLG_AC | EFLG_ID; + | EFLG_TF | EFLG_DF | EFLG_NT | EFLG_AC | EFLG_ID; switch(ctxt->mode) { case X86EMUL_MODE_PROT64: @@ -1754,6 +1757,9 @@ static int em_pop_sreg(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; + if (ctxt->modrm_reg == VCPU_SREG_SS) + ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS; + rc = load_segment_descriptor(ctxt, (u16)selector, seg); return rc; } @@ -1991,6 +1997,9 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt) { u64 old = ctxt->dst.orig_val64; + if (ctxt->dst.bytes == 16) + return X86EMUL_UNHANDLEABLE; + if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) || ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) { *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0); @@ -2017,6 +2026,7 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt) { int rc; unsigned long cs; + int cpl = ctxt->ops->cpl(ctxt); rc = emulate_pop(ctxt, &ctxt->_eip, ctxt->op_bytes); if (rc != X86EMUL_CONTINUE) @@ -2026,6 +2036,9 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt) rc = emulate_pop(ctxt, &cs, ctxt->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; + /* Outer-privilege level return is not implemented */ + if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl) + return X86EMUL_UNHANDLEABLE; rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS); return rc; } @@ -2044,8 +2057,10 @@ static int em_ret_far_imm(struct x86_emulate_ctxt *ctxt) static int em_cmpxchg(struct x86_emulate_ctxt *ctxt) { /* Save real source value, then compare EAX against destination. */ + ctxt->dst.orig_val = ctxt->dst.val; + ctxt->dst.val = reg_read(ctxt, VCPU_REGS_RAX); ctxt->src.orig_val = ctxt->src.val; - ctxt->src.val = reg_read(ctxt, VCPU_REGS_RAX); + ctxt->src.val = ctxt->dst.orig_val; fastop(ctxt, em_cmp); if (ctxt->eflags & EFLG_ZF) { @@ -2055,6 +2070,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt) /* Failure: write the value we saw to EAX. */ ctxt->dst.type = OP_REG; ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX); + ctxt->dst.val = ctxt->dst.orig_val; } return X86EMUL_CONTINUE; } @@ -2194,7 +2210,7 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt) *reg_write(ctxt, VCPU_REGS_RCX) = ctxt->_eip; if (efer & EFER_LMA) { #ifdef CONFIG_X86_64 - *reg_write(ctxt, VCPU_REGS_R11) = ctxt->eflags & ~EFLG_RF; + *reg_write(ctxt, VCPU_REGS_R11) = ctxt->eflags; ops->get_msr(ctxt, ctxt->mode == X86EMUL_MODE_PROT64 ? @@ -2202,14 +2218,14 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt) ctxt->_eip = msr_data; ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data); - ctxt->eflags &= ~(msr_data | EFLG_RF); + ctxt->eflags &= ~msr_data; #endif } else { /* legacy mode */ ops->get_msr(ctxt, MSR_STAR, &msr_data); ctxt->_eip = (u32)msr_data; - ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); + ctxt->eflags &= ~(EFLG_VM | EFLG_IF); } return X86EMUL_CONTINUE; @@ -2258,7 +2274,7 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt) break; } - ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); + ctxt->eflags &= ~(EFLG_VM | EFLG_IF); cs_sel = (u16)msr_data; cs_sel &= ~SELECTOR_RPL_MASK; ss_sel = cs_sel + 8; @@ -2964,7 +2980,7 @@ static int em_rdpmc(struct x86_emulate_ctxt *ctxt) static int em_mov(struct x86_emulate_ctxt *ctxt) { - memcpy(ctxt->dst.valptr, ctxt->src.valptr, ctxt->op_bytes); + memcpy(ctxt->dst.valptr, ctxt->src.valptr, sizeof(ctxt->src.valptr)); return X86EMUL_CONTINUE; } @@ -3221,7 +3237,8 @@ static int em_lidt(struct x86_emulate_ctxt *ctxt) static int em_smsw(struct x86_emulate_ctxt *ctxt) { - ctxt->dst.bytes = 2; + if (ctxt->dst.type == OP_MEM) + ctxt->dst.bytes = 2; ctxt->dst.val = ctxt->ops->get_cr(ctxt, 0); return X86EMUL_CONTINUE; } @@ -3496,7 +3513,7 @@ static int check_rdpmc(struct x86_emulate_ctxt *ctxt) u64 rcx = reg_read(ctxt, VCPU_REGS_RCX); if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) || - (rcx > 3)) + ctxt->ops->check_pmc(ctxt, rcx)) return emulate_gp(ctxt, 0); return X86EMUL_CONTINUE; @@ -3521,9 +3538,9 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) } #define D(_y) { .flags = (_y) } -#define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i } -#define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \ - .check_perm = (_p) } +#define DI(_y, _i) { .flags = (_y)|Intercept, .intercept = x86_intercept_##_i } +#define DIP(_y, _i, _p) { .flags = (_y)|Intercept|CheckPerm, \ + .intercept = x86_intercept_##_i, .check_perm = (_p) } #define N D(NotImpl) #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) } #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) } @@ -3532,10 +3549,10 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } #define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) } #define II(_f, _e, _i) \ - { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i } + { .flags = (_f)|Intercept, .u.execute = (_e), .intercept = x86_intercept_##_i } #define IIP(_f, _e, _i, _p) \ - { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i, \ - .check_perm = (_p) } + { .flags = (_f)|Intercept|CheckPerm, .u.execute = (_e), \ + .intercept = x86_intercept_##_i, .check_perm = (_p) } #define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) } #define D2bv(_f) D((_f) | ByteOp), D(_f) @@ -3634,8 +3651,8 @@ static const struct opcode group6[] = { }; static const struct group_dual group7 = { { - II(Mov | DstMem | Priv, em_sgdt, sgdt), - II(Mov | DstMem | Priv, em_sidt, sidt), + II(Mov | DstMem, em_sgdt, sgdt), + II(Mov | DstMem, em_sidt, sidt), II(SrcMem | Priv, em_lgdt, lgdt), II(SrcMem | Priv, em_lidt, lidt), II(SrcNone | DstMem | Mov, em_smsw, smsw), N, @@ -3899,7 +3916,7 @@ static const struct opcode twobyte_table[256] = { N, N, N, N, N, N, N, N, N, N, /* 0x40 - 0x4F */ - X16(D(DstReg | SrcMem | ModRM | Mov)), + X16(D(DstReg | SrcMem | ModRM)), /* 0x50 - 0x5F */ N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, /* 0x60 - 0x6F */ @@ -4061,12 +4078,12 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, mem_common: *op = ctxt->memop; ctxt->memopp = op; - if ((ctxt->d & BitOp) && op == &ctxt->dst) + if (ctxt->d & BitOp) fetch_bit_operand(ctxt); op->orig_val = op->val; break; case OpMem64: - ctxt->memop.bytes = 8; + ctxt->memop.bytes = (ctxt->op_bytes == 8) ? 16 : 8; goto mem_common; case OpAcc: op->type = OP_REG; @@ -4150,7 +4167,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; op->addr.mem.ea = register_address(ctxt, reg_read(ctxt, VCPU_REGS_RSI)); - op->addr.mem.seg = seg_override(ctxt); + op->addr.mem.seg = ctxt->seg_override; op->val = 0; op->count = 1; break; @@ -4161,7 +4178,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, register_address(ctxt, reg_read(ctxt, VCPU_REGS_RBX) + (reg_read(ctxt, VCPU_REGS_RAX) & 0xff)); - op->addr.mem.seg = seg_override(ctxt); + op->addr.mem.seg = ctxt->seg_override; op->val = 0; break; case OpImmFAddr: @@ -4208,16 +4225,22 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) int mode = ctxt->mode; int def_op_bytes, def_ad_bytes, goffset, simd_prefix; bool op_prefix = false; + bool has_seg_override = false; struct opcode opcode; ctxt->memop.type = OP_NONE; ctxt->memopp = NULL; ctxt->_eip = ctxt->eip; - ctxt->fetch.start = ctxt->_eip; - ctxt->fetch.end = ctxt->fetch.start + insn_len; + ctxt->fetch.ptr = ctxt->fetch.data; + ctxt->fetch.end = ctxt->fetch.data + insn_len; ctxt->opcode_len = 1; if (insn_len > 0) memcpy(ctxt->fetch.data, insn, insn_len); + else { + rc = __do_insn_fetch_bytes(ctxt, 1); + if (rc != X86EMUL_CONTINUE) + return rc; + } switch (mode) { case X86EMUL_MODE_REAL: @@ -4261,11 +4284,13 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) case 0x2e: /* CS override */ case 0x36: /* SS override */ case 0x3e: /* DS override */ - set_seg_override(ctxt, (ctxt->b >> 3) & 3); + has_seg_override = true; + ctxt->seg_override = (ctxt->b >> 3) & 3; break; case 0x64: /* FS override */ case 0x65: /* GS override */ - set_seg_override(ctxt, ctxt->b & 7); + has_seg_override = true; + ctxt->seg_override = ctxt->b & 7; break; case 0x40 ... 0x4f: /* REX */ if (mode != X86EMUL_MODE_PROT64) @@ -4314,6 +4339,13 @@ done_prefixes: if (ctxt->d & ModRM) ctxt->modrm = insn_fetch(u8, ctxt); + /* vex-prefix instructions are not implemented */ + if (ctxt->opcode_len == 1 && (ctxt->b == 0xc5 || ctxt->b == 0xc4) && + (mode == X86EMUL_MODE_PROT64 || + (mode >= X86EMUL_MODE_PROT16 && (ctxt->modrm & 0x80)))) { + ctxt->d = NotImpl; + } + while (ctxt->d & GroupMask) { switch (ctxt->d & GroupMask) { case Group: @@ -4356,49 +4388,59 @@ done_prefixes: ctxt->d |= opcode.flags; } - ctxt->execute = opcode.u.execute; - ctxt->check_perm = opcode.check_perm; - ctxt->intercept = opcode.intercept; - /* Unrecognised? */ - if (ctxt->d == 0 || (ctxt->d & NotImpl)) + if (ctxt->d == 0) return EMULATION_FAILED; - if (!(ctxt->d & EmulateOnUD) && ctxt->ud) - return EMULATION_FAILED; + ctxt->execute = opcode.u.execute; - if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack)) - ctxt->op_bytes = 8; + if (unlikely(ctxt->d & + (NotImpl|EmulateOnUD|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm))) { + /* + * These are copied unconditionally here, and checked unconditionally + * in x86_emulate_insn. + */ + ctxt->check_perm = opcode.check_perm; + ctxt->intercept = opcode.intercept; + + if (ctxt->d & NotImpl) + return EMULATION_FAILED; + + if (!(ctxt->d & EmulateOnUD) && ctxt->ud) + return EMULATION_FAILED; - if (ctxt->d & Op3264) { - if (mode == X86EMUL_MODE_PROT64) + if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack)) ctxt->op_bytes = 8; - else - ctxt->op_bytes = 4; - } - if (ctxt->d & Sse) - ctxt->op_bytes = 16; - else if (ctxt->d & Mmx) - ctxt->op_bytes = 8; + if (ctxt->d & Op3264) { + if (mode == X86EMUL_MODE_PROT64) + ctxt->op_bytes = 8; + else + ctxt->op_bytes = 4; + } + + if (ctxt->d & Sse) + ctxt->op_bytes = 16; + else if (ctxt->d & Mmx) + ctxt->op_bytes = 8; + } /* ModRM and SIB bytes. */ if (ctxt->d & ModRM) { rc = decode_modrm(ctxt, &ctxt->memop); - if (!ctxt->has_seg_override) - set_seg_override(ctxt, ctxt->modrm_seg); + if (!has_seg_override) { + has_seg_override = true; + ctxt->seg_override = ctxt->modrm_seg; + } } else if (ctxt->d & MemAbs) rc = decode_abs(ctxt, &ctxt->memop); if (rc != X86EMUL_CONTINUE) goto done; - if (!ctxt->has_seg_override) - set_seg_override(ctxt, VCPU_SREG_DS); - - ctxt->memop.addr.mem.seg = seg_override(ctxt); + if (!has_seg_override) + ctxt->seg_override = VCPU_SREG_DS; - if (ctxt->memop.type == OP_MEM && ctxt->ad_bytes != 8) - ctxt->memop.addr.mem.ea = (u32)ctxt->memop.addr.mem.ea; + ctxt->memop.addr.mem.seg = ctxt->seg_override; /* * Decode and fetch the source operand: register, memory @@ -4420,7 +4462,7 @@ done_prefixes: rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask); done: - if (ctxt->memopp && ctxt->memopp->type == OP_MEM && ctxt->rip_relative) + if (ctxt->rip_relative) ctxt->memopp->addr.mem.ea += ctxt->_eip; return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK; @@ -4495,6 +4537,16 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)) return X86EMUL_CONTINUE; } +void init_decode_cache(struct x86_emulate_ctxt *ctxt) +{ + memset(&ctxt->rip_relative, 0, + (void *)&ctxt->modrm - (void *)&ctxt->rip_relative); + + ctxt->io_read.pos = 0; + ctxt->io_read.end = 0; + ctxt->mem_read.end = 0; +} + int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) { const struct x86_emulate_ops *ops = ctxt->ops; @@ -4503,12 +4555,6 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) ctxt->mem_read.pos = 0; - if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) || - (ctxt->d & Undefined)) { - rc = emulate_ud(ctxt); - goto done; - } - /* LOCK prefix is allowed only with some instructions */ if (ctxt->lock_prefix && (!(ctxt->d & Lock) || ctxt->dst.type != OP_MEM)) { rc = emulate_ud(ctxt); @@ -4520,69 +4566,82 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) goto done; } - if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM))) - || ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) { - rc = emulate_ud(ctxt); - goto done; - } - - if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) { - rc = emulate_nm(ctxt); - goto done; - } + if (unlikely(ctxt->d & + (No64|Undefined|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) { + if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) || + (ctxt->d & Undefined)) { + rc = emulate_ud(ctxt); + goto done; + } - if (ctxt->d & Mmx) { - rc = flush_pending_x87_faults(ctxt); - if (rc != X86EMUL_CONTINUE) + if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM))) + || ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) { + rc = emulate_ud(ctxt); goto done; - /* - * Now that we know the fpu is exception safe, we can fetch - * operands from it. - */ - fetch_possible_mmx_operand(ctxt, &ctxt->src); - fetch_possible_mmx_operand(ctxt, &ctxt->src2); - if (!(ctxt->d & Mov)) - fetch_possible_mmx_operand(ctxt, &ctxt->dst); - } + } - if (unlikely(ctxt->guest_mode) && ctxt->intercept) { - rc = emulator_check_intercept(ctxt, ctxt->intercept, - X86_ICPT_PRE_EXCEPT); - if (rc != X86EMUL_CONTINUE) + if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) { + rc = emulate_nm(ctxt); goto done; - } + } - /* Privileged instruction can be executed only in CPL=0 */ - if ((ctxt->d & Priv) && ops->cpl(ctxt)) { - rc = emulate_gp(ctxt, 0); - goto done; - } + if (ctxt->d & Mmx) { + rc = flush_pending_x87_faults(ctxt); + if (rc != X86EMUL_CONTINUE) + goto done; + /* + * Now that we know the fpu is exception safe, we can fetch + * operands from it. + */ + fetch_possible_mmx_operand(ctxt, &ctxt->src); + fetch_possible_mmx_operand(ctxt, &ctxt->src2); + if (!(ctxt->d & Mov)) + fetch_possible_mmx_operand(ctxt, &ctxt->dst); + } - /* Instruction can only be executed in protected mode */ - if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) { - rc = emulate_ud(ctxt); - goto done; - } + if (unlikely(ctxt->guest_mode) && (ctxt->d & Intercept)) { + rc = emulator_check_intercept(ctxt, ctxt->intercept, + X86_ICPT_PRE_EXCEPT); + if (rc != X86EMUL_CONTINUE) + goto done; + } - /* Do instruction specific permission checks */ - if (ctxt->check_perm) { - rc = ctxt->check_perm(ctxt); - if (rc != X86EMUL_CONTINUE) + /* Privileged instruction can be executed only in CPL=0 */ + if ((ctxt->d & Priv) && ops->cpl(ctxt)) { + if (ctxt->d & PrivUD) + rc = emulate_ud(ctxt); + else + rc = emulate_gp(ctxt, 0); goto done; - } + } - if (unlikely(ctxt->guest_mode) && ctxt->intercept) { - rc = emulator_check_intercept(ctxt, ctxt->intercept, - X86_ICPT_POST_EXCEPT); - if (rc != X86EMUL_CONTINUE) + /* Instruction can only be executed in protected mode */ + if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) { + rc = emulate_ud(ctxt); goto done; - } + } - if (ctxt->rep_prefix && (ctxt->d & String)) { - /* All REP prefixes have the same first termination condition */ - if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) { - ctxt->eip = ctxt->_eip; - goto done; + /* Do instruction specific permission checks */ + if (ctxt->d & CheckPerm) { + rc = ctxt->check_perm(ctxt); + if (rc != X86EMUL_CONTINUE) + goto done; + } + + if (unlikely(ctxt->guest_mode) && (ctxt->d & Intercept)) { + rc = emulator_check_intercept(ctxt, ctxt->intercept, + X86_ICPT_POST_EXCEPT); + if (rc != X86EMUL_CONTINUE) + goto done; + } + + if (ctxt->rep_prefix && (ctxt->d & String)) { + /* All REP prefixes have the same first termination condition */ + if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) { + ctxt->eip = ctxt->_eip; + ctxt->eflags &= ~EFLG_RF; + goto done; + } } } @@ -4616,13 +4675,18 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) special_insn: - if (unlikely(ctxt->guest_mode) && ctxt->intercept) { + if (unlikely(ctxt->guest_mode) && (ctxt->d & Intercept)) { rc = emulator_check_intercept(ctxt, ctxt->intercept, X86_ICPT_POST_MEMACCESS); if (rc != X86EMUL_CONTINUE) goto done; } + if (ctxt->rep_prefix && (ctxt->d & String)) + ctxt->eflags |= EFLG_RF; + else + ctxt->eflags &= ~EFLG_RF; + if (ctxt->execute) { if (ctxt->d & Fastop) { void (*fop)(struct fastop *) = (void *)ctxt->execute; @@ -4657,8 +4721,9 @@ special_insn: break; case 0x90 ... 0x97: /* nop / xchg reg, rax */ if (ctxt->dst.addr.reg == reg_rmw(ctxt, VCPU_REGS_RAX)) - break; - rc = em_xchg(ctxt); + ctxt->dst.type = OP_NONE; + else + rc = em_xchg(ctxt); break; case 0x98: /* cbw/cwde/cdqe */ switch (ctxt->op_bytes) { @@ -4709,17 +4774,17 @@ special_insn: goto done; writeback: - if (!(ctxt->d & NoWrite)) { - rc = writeback(ctxt, &ctxt->dst); - if (rc != X86EMUL_CONTINUE) - goto done; - } if (ctxt->d & SrcWrite) { BUG_ON(ctxt->src.type == OP_MEM || ctxt->src.type == OP_MEM_STR); rc = writeback(ctxt, &ctxt->src); if (rc != X86EMUL_CONTINUE) goto done; } + if (!(ctxt->d & NoWrite)) { + rc = writeback(ctxt, &ctxt->dst); + if (rc != X86EMUL_CONTINUE) + goto done; + } /* * restore dst type in case the decoding will be reused @@ -4761,6 +4826,7 @@ writeback: } goto done; /* skip rip writeback */ } + ctxt->eflags &= ~EFLG_RF; } ctxt->eip = ctxt->_eip; @@ -4793,8 +4859,10 @@ twobyte_insn: ops->get_dr(ctxt, ctxt->modrm_reg, &ctxt->dst.val); break; case 0x40 ... 0x4f: /* cmov */ - ctxt->dst.val = ctxt->dst.orig_val = ctxt->src.val; - if (!test_cc(ctxt->b, ctxt->eflags)) + if (test_cc(ctxt->b, ctxt->eflags)) + ctxt->dst.val = ctxt->src.val; + else if (ctxt->mode != X86EMUL_MODE_PROT64 || + ctxt->op_bytes != 4) ctxt->dst.type = OP_NONE; /* no writeback */ break; case 0x80 ... 0x8f: /* jnz rel, etc*/ @@ -4818,8 +4886,8 @@ twobyte_insn: break; case 0xc3: /* movnti */ ctxt->dst.bytes = ctxt->op_bytes; - ctxt->dst.val = (ctxt->op_bytes == 4) ? (u32) ctxt->src.val : - (u64) ctxt->src.val; + ctxt->dst.val = (ctxt->op_bytes == 8) ? (u64) ctxt->src.val : + (u32) ctxt->src.val; break; default: goto cannot_emulate; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 006911858174..3855103f71fd 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1451,7 +1451,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) vcpu->arch.apic_arb_prio = 0; vcpu->arch.apic_attention = 0; - apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr=" + apic_debug("%s: vcpu=%p, id=%d, base_msr=" "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__, vcpu, kvm_apic_id(apic), vcpu->arch.apic_base, apic->base_address); @@ -1895,7 +1895,7 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu) /* evaluate pending_events before reading the vector */ smp_rmb(); sipi_vector = apic->sipi_vector; - pr_debug("vcpu %d received sipi with vector # %x\n", + apic_debug("vcpu %d received sipi with vector # %x\n", vcpu->vcpu_id, sipi_vector); kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector); vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index 9d2e0ffcb190..5aaf35641768 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h @@ -22,7 +22,7 @@ __entry->unsync = sp->unsync; #define KVM_MMU_PAGE_PRINTK() ({ \ - const char *ret = p->buffer + p->len; \ + const u32 saved_len = p->len; \ static const char *access_str[] = { \ "---", "--x", "w--", "w-x", "-u-", "-ux", "wu-", "wux" \ }; \ @@ -41,7 +41,7 @@ role.nxe ? "" : "!", \ __entry->root_count, \ __entry->unsync ? "unsync" : "sync", 0); \ - ret; \ + p->buffer + saved_len; \ }) #define kvm_mmu_trace_pferr_flags \ diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index cbecaa90399c..3dd6accb64ec 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -428,6 +428,15 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; } +int kvm_pmu_check_pmc(struct kvm_vcpu *vcpu, unsigned pmc) +{ + struct kvm_pmu *pmu = &vcpu->arch.pmu; + bool fixed = pmc & (1u << 30); + pmc &= ~(3u << 30); + return (!fixed && pmc >= pmu->nr_arch_gp_counters) || + (fixed && pmc >= pmu->nr_arch_fixed_counters); +} + int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data) { struct kvm_pmu *pmu = &vcpu->arch.pmu; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index b5e994ad0135..ddf742768ecf 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -486,14 +486,14 @@ static int is_external_interrupt(u32 info) return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR); } -static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) +static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); u32 ret = 0; if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) - ret |= KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS; - return ret & mask; + ret = KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS; + return ret; } static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) @@ -1415,7 +1415,16 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, var->avl = (s->attrib >> SVM_SELECTOR_AVL_SHIFT) & 1; var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1; var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; - var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1; + + /* + * AMD CPUs circa 2014 track the G bit for all segments except CS. + * However, the SVM spec states that the G bit is not observed by the + * CPU, and some VMware virtual CPUs drop the G bit for all segments. + * So let's synthesize a legal G bit for all segments, this helps + * running KVM nested. It also helps cross-vendor migration, because + * Intel's vmentry has a check on the 'G' bit. + */ + var->g = s->limit > 0xfffff; /* * AMD's VMCB does not have an explicit unusable field, so emulate it @@ -1424,14 +1433,6 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, var->unusable = !var->present || (var->type == 0); switch (seg) { - case VCPU_SREG_CS: - /* - * SVM always stores 0 for the 'G' bit in the CS selector in - * the VMCB on a VMEXIT. This hurts cross-vendor migration: - * Intel's VMENTRY has a check on the 'G' bit. - */ - var->g = s->limit > 0xfffff; - break; case VCPU_SREG_TR: /* * Work around a bug where the busy flag in the tr selector @@ -2116,22 +2117,27 @@ static void nested_svm_unmap(struct page *page) static int nested_svm_intercept_ioio(struct vcpu_svm *svm) { - unsigned port; - u8 val, bit; + unsigned port, size, iopm_len; + u16 val, mask; + u8 start_bit; u64 gpa; if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT))) return NESTED_EXIT_HOST; port = svm->vmcb->control.exit_info_1 >> 16; + size = (svm->vmcb->control.exit_info_1 & SVM_IOIO_SIZE_MASK) >> + SVM_IOIO_SIZE_SHIFT; gpa = svm->nested.vmcb_iopm + (port / 8); - bit = port % 8; - val = 0; + start_bit = port % 8; + iopm_len = (start_bit + size > 8) ? 2 : 1; + mask = (0xf >> (4 - size)) << start_bit; + val = 0; - if (kvm_read_guest(svm->vcpu.kvm, gpa, &val, 1)) - val &= (1 << bit); + if (kvm_read_guest(svm->vcpu.kvm, gpa, &val, iopm_len)) + return NESTED_EXIT_DONE; - return val ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; + return (val & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; } static int nested_svm_exit_handled_msr(struct vcpu_svm *svm) @@ -4205,7 +4211,8 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu, if (info->intercept == x86_intercept_cr_write) icpt_info.exit_code += info->modrm_reg; - if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0) + if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0 || + info->intercept == x86_intercept_clts) break; intercept = svm->nested.intercept; @@ -4250,14 +4257,14 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu, u64 exit_info; u32 bytes; - exit_info = (vcpu->arch.regs[VCPU_REGS_RDX] & 0xffff) << 16; - if (info->intercept == x86_intercept_in || info->intercept == x86_intercept_ins) { - exit_info |= SVM_IOIO_TYPE_MASK; - bytes = info->src_bytes; - } else { + exit_info = ((info->src_val & 0xffff) << 16) | + SVM_IOIO_TYPE_MASK; bytes = info->dst_bytes; + } else { + exit_info = (info->dst_val & 0xffff) << 16; + bytes = info->src_bytes; } if (info->intercept == x86_intercept_outs || diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 33574c95220d..e850a7d332be 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -721,10 +721,10 @@ TRACE_EVENT(kvm_emulate_insn, ), TP_fast_assign( - __entry->rip = vcpu->arch.emulate_ctxt.fetch.start; __entry->csbase = kvm_x86_ops->get_segment_base(vcpu, VCPU_SREG_CS); - __entry->len = vcpu->arch.emulate_ctxt._eip - - vcpu->arch.emulate_ctxt.fetch.start; + __entry->len = vcpu->arch.emulate_ctxt.fetch.ptr + - vcpu->arch.emulate_ctxt.fetch.data; + __entry->rip = vcpu->arch.emulate_ctxt._eip - __entry->len; memcpy(__entry->insn, vcpu->arch.emulate_ctxt.fetch.data, 15); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 801332edefc3..e618f34bde2d 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -383,6 +383,9 @@ struct nested_vmx { struct hrtimer preemption_timer; bool preemption_timer_expired; + + /* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */ + u64 vmcs01_debugctl; }; #define POSTED_INTR_ON 0 @@ -740,7 +743,6 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var); static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu); static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); -static bool vmx_mpx_supported(void); static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); @@ -820,7 +822,6 @@ static const u32 vmx_msr_index[] = { #endif MSR_EFER, MSR_TSC_AUX, MSR_STAR, }; -#define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) static inline bool is_page_fault(u32 intr_info) { @@ -1940,7 +1941,7 @@ static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) vmcs_writel(GUEST_RFLAGS, rflags); } -static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) +static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu) { u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); int ret = 0; @@ -1950,7 +1951,7 @@ static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) if (interruptibility & GUEST_INTR_STATE_MOV_SS) ret |= KVM_X86_SHADOW_INT_MOV_SS; - return ret & mask; + return ret; } static void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) @@ -2239,10 +2240,13 @@ static inline bool nested_vmx_allowed(struct kvm_vcpu *vcpu) * or other means. */ static u32 nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high; +static u32 nested_vmx_true_procbased_ctls_low; static u32 nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high; static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high; static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high; +static u32 nested_vmx_true_exit_ctls_low; static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high; +static u32 nested_vmx_true_entry_ctls_low; static u32 nested_vmx_misc_low, nested_vmx_misc_high; static u32 nested_vmx_ept_caps; static __init void nested_vmx_setup_ctls_msrs(void) @@ -2265,21 +2269,13 @@ static __init void nested_vmx_setup_ctls_msrs(void) /* pin-based controls */ rdmsr(MSR_IA32_VMX_PINBASED_CTLS, nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high); - /* - * According to the Intel spec, if bit 55 of VMX_BASIC is off (as it is - * in our case), bits 1, 2 and 4 (i.e., 0x16) must be 1 in this MSR. - */ nested_vmx_pinbased_ctls_low |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; nested_vmx_pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS; nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR | PIN_BASED_VMX_PREEMPTION_TIMER; - /* - * Exit controls - * If bit 55 of VMX_BASIC is off, bits 0-8 and 10, 11, 13, 14, 16 and - * 17 must be 1. - */ + /* exit controls */ rdmsr(MSR_IA32_VMX_EXIT_CTLS, nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high); nested_vmx_exit_ctls_low = VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; @@ -2296,10 +2292,13 @@ static __init void nested_vmx_setup_ctls_msrs(void) if (vmx_mpx_supported()) nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS; + /* We support free control of debug control saving. */ + nested_vmx_true_exit_ctls_low = nested_vmx_exit_ctls_low & + ~VM_EXIT_SAVE_DEBUG_CONTROLS; + /* entry controls */ rdmsr(MSR_IA32_VMX_ENTRY_CTLS, nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high); - /* If bit 55 of VMX_BASIC is off, bits 0-8 and 12 must be 1. */ nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; nested_vmx_entry_ctls_high &= #ifdef CONFIG_X86_64 @@ -2311,10 +2310,14 @@ static __init void nested_vmx_setup_ctls_msrs(void) if (vmx_mpx_supported()) nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS; + /* We support free control of debug control loading. */ + nested_vmx_true_entry_ctls_low = nested_vmx_entry_ctls_low & + ~VM_ENTRY_LOAD_DEBUG_CONTROLS; + /* cpu-based controls */ rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high); - nested_vmx_procbased_ctls_low = 0; + nested_vmx_procbased_ctls_low = CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR; nested_vmx_procbased_ctls_high &= CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_USE_TSC_OFFSETING | @@ -2335,7 +2338,12 @@ static __init void nested_vmx_setup_ctls_msrs(void) * can use it to avoid exits to L1 - even when L0 runs L2 * without MSR bitmaps. */ - nested_vmx_procbased_ctls_high |= CPU_BASED_USE_MSR_BITMAPS; + nested_vmx_procbased_ctls_high |= CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR | + CPU_BASED_USE_MSR_BITMAPS; + + /* We support free control of CR3 access interception. */ + nested_vmx_true_procbased_ctls_low = nested_vmx_procbased_ctls_low & + ~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING); /* secondary cpu-based controls */ rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, @@ -2394,7 +2402,7 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) * guest, and the VMCS structure we give it - not about the * VMX support of the underlying hardware. */ - *pdata = VMCS12_REVISION | + *pdata = VMCS12_REVISION | VMX_BASIC_TRUE_CTLS | ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) | (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT); break; @@ -2404,16 +2412,25 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) nested_vmx_pinbased_ctls_high); break; case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: + *pdata = vmx_control_msr(nested_vmx_true_procbased_ctls_low, + nested_vmx_procbased_ctls_high); + break; case MSR_IA32_VMX_PROCBASED_CTLS: *pdata = vmx_control_msr(nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high); break; case MSR_IA32_VMX_TRUE_EXIT_CTLS: + *pdata = vmx_control_msr(nested_vmx_true_exit_ctls_low, + nested_vmx_exit_ctls_high); + break; case MSR_IA32_VMX_EXIT_CTLS: *pdata = vmx_control_msr(nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high); break; case MSR_IA32_VMX_TRUE_ENTRY_CTLS: + *pdata = vmx_control_msr(nested_vmx_true_entry_ctls_low, + nested_vmx_entry_ctls_high); + break; case MSR_IA32_VMX_ENTRY_CTLS: *pdata = vmx_control_msr(nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high); @@ -2442,7 +2459,7 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) *pdata = -1ULL; break; case MSR_IA32_VMX_VMCS_ENUM: - *pdata = 0x1f; + *pdata = 0x2e; /* highest index: VMX_PREEMPTION_TIMER_VALUE */ break; case MSR_IA32_VMX_PROCBASED_CTLS2: *pdata = vmx_control_msr(nested_vmx_secondary_ctls_low, @@ -3653,7 +3670,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var)); out: - vmx->emulation_required |= emulation_required(vcpu); + vmx->emulation_required = emulation_required(vcpu); } static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) @@ -4422,7 +4439,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmx->vcpu.arch.pat = host_pat; } - for (i = 0; i < NR_VMX_MSR; ++i) { + for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) { u32 index = vmx_msr_index[i]; u32 data_low, data_high; int j = vmx->nmsrs; @@ -4873,7 +4890,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) if (!(vcpu->guest_debug & (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { vcpu->arch.dr6 &= ~15; - vcpu->arch.dr6 |= dr6; + vcpu->arch.dr6 |= dr6 | DR6_RTM; if (!(dr6 & ~DR6_RESERVED)) /* icebp */ skip_emulated_instruction(vcpu); @@ -5039,7 +5056,7 @@ static int handle_cr(struct kvm_vcpu *vcpu) reg = (exit_qualification >> 8) & 15; switch ((exit_qualification >> 4) & 3) { case 0: /* mov to cr */ - val = kvm_register_read(vcpu, reg); + val = kvm_register_readl(vcpu, reg); trace_kvm_cr_write(cr, val); switch (cr) { case 0: @@ -5056,7 +5073,7 @@ static int handle_cr(struct kvm_vcpu *vcpu) return 1; case 8: { u8 cr8_prev = kvm_get_cr8(vcpu); - u8 cr8 = kvm_register_read(vcpu, reg); + u8 cr8 = (u8)val; err = kvm_set_cr8(vcpu, cr8); kvm_complete_insn_gp(vcpu, err); if (irqchip_in_kernel(vcpu->kvm)) @@ -5132,7 +5149,7 @@ static int handle_dr(struct kvm_vcpu *vcpu) return 0; } else { vcpu->arch.dr7 &= ~DR7_GD; - vcpu->arch.dr6 |= DR6_BD; + vcpu->arch.dr6 |= DR6_BD | DR6_RTM; vmcs_writel(GUEST_DR7, vcpu->arch.dr7); kvm_queue_exception(vcpu, DB_VECTOR); return 1; @@ -5165,7 +5182,7 @@ static int handle_dr(struct kvm_vcpu *vcpu) return 1; kvm_register_write(vcpu, reg, val); } else - if (kvm_set_dr(vcpu, dr, kvm_register_read(vcpu, reg))) + if (kvm_set_dr(vcpu, dr, kvm_register_readl(vcpu, reg))) return 1; skip_emulated_instruction(vcpu); @@ -5621,7 +5638,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); intr_window_requested = cpu_exec_ctrl & CPU_BASED_VIRTUAL_INTR_PENDING; - while (!guest_state_valid(vcpu) && count-- != 0) { + while (vmx->emulation_required && count-- != 0) { if (intr_window_requested && vmx_interrupt_allowed(vcpu)) return handle_interrupt_window(&vmx->vcpu); @@ -5655,7 +5672,6 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) schedule(); } - vmx->emulation_required = emulation_required(vcpu); out: return ret; } @@ -5754,22 +5770,27 @@ static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr) /* * Free all VMCSs saved for this vcpu, except the one pointed by - * vmx->loaded_vmcs. These include the VMCSs in vmcs02_pool (except the one - * currently used, if running L2), and vmcs01 when running L2. + * vmx->loaded_vmcs. We must be running L1, so vmx->loaded_vmcs + * must be &vmx->vmcs01. */ static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx) { struct vmcs02_list *item, *n; + + WARN_ON(vmx->loaded_vmcs != &vmx->vmcs01); list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) { - if (vmx->loaded_vmcs != &item->vmcs02) - free_loaded_vmcs(&item->vmcs02); + /* + * Something will leak if the above WARN triggers. Better than + * a use-after-free. + */ + if (vmx->loaded_vmcs == &item->vmcs02) + continue; + + free_loaded_vmcs(&item->vmcs02); list_del(&item->list); kfree(item); + vmx->nested.vmcs02_num--; } - vmx->nested.vmcs02_num = 0; - - if (vmx->loaded_vmcs != &vmx->vmcs01) - free_loaded_vmcs(&vmx->vmcs01); } /* @@ -5918,7 +5939,7 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason, * which replaces physical address width with 32 * */ - if (!IS_ALIGNED(vmptr, PAGE_SIZE) || (vmptr >> maxphyaddr)) { + if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) { nested_vmx_failInvalid(vcpu); skip_emulated_instruction(vcpu); return 1; @@ -5936,7 +5957,7 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason, vmx->nested.vmxon_ptr = vmptr; break; case EXIT_REASON_VMCLEAR: - if (!IS_ALIGNED(vmptr, PAGE_SIZE) || (vmptr >> maxphyaddr)) { + if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) { nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_INVALID_ADDRESS); skip_emulated_instruction(vcpu); @@ -5951,7 +5972,7 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason, } break; case EXIT_REASON_VMPTRLD: - if (!IS_ALIGNED(vmptr, PAGE_SIZE) || (vmptr >> maxphyaddr)) { + if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) { nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_INVALID_ADDRESS); skip_emulated_instruction(vcpu); @@ -6086,20 +6107,27 @@ static int nested_vmx_check_permission(struct kvm_vcpu *vcpu) static inline void nested_release_vmcs12(struct vcpu_vmx *vmx) { u32 exec_control; + if (vmx->nested.current_vmptr == -1ull) + return; + + /* current_vmptr and current_vmcs12 are always set/reset together */ + if (WARN_ON(vmx->nested.current_vmcs12 == NULL)) + return; + if (enable_shadow_vmcs) { - if (vmx->nested.current_vmcs12 != NULL) { - /* copy to memory all shadowed fields in case - they were modified */ - copy_shadow_to_vmcs12(vmx); - vmx->nested.sync_shadow_vmcs = false; - exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); - exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; - vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); - vmcs_write64(VMCS_LINK_POINTER, -1ull); - } + /* copy to memory all shadowed fields in case + they were modified */ + copy_shadow_to_vmcs12(vmx); + vmx->nested.sync_shadow_vmcs = false; + exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); + exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; + vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); + vmcs_write64(VMCS_LINK_POINTER, -1ull); } kunmap(vmx->nested.current_vmcs12_page); nested_release_page(vmx->nested.current_vmcs12_page); + vmx->nested.current_vmptr = -1ull; + vmx->nested.current_vmcs12 = NULL; } /* @@ -6110,12 +6138,9 @@ static void free_nested(struct vcpu_vmx *vmx) { if (!vmx->nested.vmxon) return; + vmx->nested.vmxon = false; - if (vmx->nested.current_vmptr != -1ull) { - nested_release_vmcs12(vmx); - vmx->nested.current_vmptr = -1ull; - vmx->nested.current_vmcs12 = NULL; - } + nested_release_vmcs12(vmx); if (enable_shadow_vmcs) free_vmcs(vmx->nested.current_shadow_vmcs); /* Unpin physical memory we referred to in current vmcs02 */ @@ -6152,11 +6177,8 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMCLEAR, &vmptr)) return 1; - if (vmptr == vmx->nested.current_vmptr) { + if (vmptr == vmx->nested.current_vmptr) nested_release_vmcs12(vmx); - vmx->nested.current_vmptr = -1ull; - vmx->nested.current_vmcs12 = NULL; - } page = nested_get_page(vcpu, vmptr); if (page == NULL) { @@ -6384,7 +6406,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu) return 1; /* Decode instruction info and find the field to read */ - field = kvm_register_read(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); + field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); /* Read the field, zero-extended to a u64 field_value */ if (!vmcs12_read_any(vcpu, field, &field_value)) { nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); @@ -6397,7 +6419,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu) * on the guest's mode (32 or 64 bit), not on the given field's length. */ if (vmx_instruction_info & (1u << 10)) { - kvm_register_write(vcpu, (((vmx_instruction_info) >> 3) & 0xf), + kvm_register_writel(vcpu, (((vmx_instruction_info) >> 3) & 0xf), field_value); } else { if (get_vmx_mem_address(vcpu, exit_qualification, @@ -6434,21 +6456,21 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) return 1; if (vmx_instruction_info & (1u << 10)) - field_value = kvm_register_read(vcpu, + field_value = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 3) & 0xf)); else { if (get_vmx_mem_address(vcpu, exit_qualification, vmx_instruction_info, &gva)) return 1; if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, - &field_value, (is_long_mode(vcpu) ? 8 : 4), &e)) { + &field_value, (is_64_bit_mode(vcpu) ? 8 : 4), &e)) { kvm_inject_page_fault(vcpu, &e); return 1; } } - field = kvm_register_read(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); + field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); if (vmcs_field_readonly(field)) { nested_vmx_failValid(vcpu, VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT); @@ -6498,9 +6520,8 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) skip_emulated_instruction(vcpu); return 1; } - if (vmx->nested.current_vmptr != -1ull) - nested_release_vmcs12(vmx); + nested_release_vmcs12(vmx); vmx->nested.current_vmptr = vmptr; vmx->nested.current_vmcs12 = new_vmcs12; vmx->nested.current_vmcs12_page = page; @@ -6571,7 +6592,7 @@ static int handle_invept(struct kvm_vcpu *vcpu) } vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); - type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf); + type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf); types = (nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6; @@ -6751,7 +6772,7 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); int cr = exit_qualification & 15; int reg = (exit_qualification >> 8) & 15; - unsigned long val = kvm_register_read(vcpu, reg); + unsigned long val = kvm_register_readl(vcpu, reg); switch ((exit_qualification >> 4) & 3) { case 0: /* mov to cr */ @@ -7112,7 +7133,26 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) if (max_irr == -1) return; - vmx_set_rvi(max_irr); + /* + * If a vmexit is needed, vmx_check_nested_events handles it. + */ + if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) + return; + + if (!is_guest_mode(vcpu)) { + vmx_set_rvi(max_irr); + return; + } + + /* + * Fall back to pre-APICv interrupt injection since L2 + * is run without virtual interrupt delivery. + */ + if (!kvm_event_needs_reinjection(vcpu) && + vmx_interrupt_allowed(vcpu)) { + kvm_queue_interrupt(vcpu, max_irr, false); + vmx_inject_irq(vcpu); + } } static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) @@ -7520,13 +7560,31 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx_complete_interrupts(vmx); } +static void vmx_load_vmcs01(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + int cpu; + + if (vmx->loaded_vmcs == &vmx->vmcs01) + return; + + cpu = get_cpu(); + vmx->loaded_vmcs = &vmx->vmcs01; + vmx_vcpu_put(vcpu); + vmx_vcpu_load(vcpu, cpu); + vcpu->cpu = cpu; + put_cpu(); +} + static void vmx_free_vcpu(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); free_vpid(vmx); - free_loaded_vmcs(vmx->loaded_vmcs); + leave_guest_mode(vcpu); + vmx_load_vmcs01(vcpu); free_nested(vmx); + free_loaded_vmcs(vmx->loaded_vmcs); kfree(vmx->guest_msrs); kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, vmx); @@ -7548,6 +7606,9 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) goto free_vcpu; vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); + BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) * sizeof(vmx->guest_msrs[0]) + > PAGE_SIZE); + err = -ENOMEM; if (!vmx->guest_msrs) { goto uninit_vcpu; @@ -7836,7 +7897,13 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base); vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base); - vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl); + if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) { + kvm_set_dr(vcpu, 7, vmcs12->guest_dr7); + vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl); + } else { + kvm_set_dr(vcpu, 7, vcpu->arch.dr7); + vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl); + } vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, vmcs12->vm_entry_intr_info_field); vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, @@ -7846,7 +7913,6 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, vmcs12->guest_interruptibility_info); vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs); - kvm_set_dr(vcpu, 7, vmcs12->guest_dr7); vmx_set_rflags(vcpu, vmcs12->guest_rflags); vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, vmcs12->guest_pending_dbg_exceptions); @@ -8113,14 +8179,14 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) } if ((vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_MSR_BITMAPS) && - !IS_ALIGNED(vmcs12->msr_bitmap, PAGE_SIZE)) { + !PAGE_ALIGNED(vmcs12->msr_bitmap)) { /*TODO: Also verify bits beyond physical address width are 0*/ nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); return 1; } if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) && - !IS_ALIGNED(vmcs12->apic_access_addr, PAGE_SIZE)) { + !PAGE_ALIGNED(vmcs12->apic_access_addr)) { /*TODO: Also verify bits beyond physical address width are 0*/ nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); return 1; @@ -8136,15 +8202,18 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) } if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control, - nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high) || + nested_vmx_true_procbased_ctls_low, + nested_vmx_procbased_ctls_high) || !vmx_control_verify(vmcs12->secondary_vm_exec_control, nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high) || !vmx_control_verify(vmcs12->pin_based_vm_exec_control, nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high) || !vmx_control_verify(vmcs12->vm_exit_controls, - nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high) || + nested_vmx_true_exit_ctls_low, + nested_vmx_exit_ctls_high) || !vmx_control_verify(vmcs12->vm_entry_controls, - nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high)) + nested_vmx_true_entry_ctls_low, + nested_vmx_entry_ctls_high)) { nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); return 1; @@ -8221,6 +8290,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET); + if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) + vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); + cpu = get_cpu(); vmx->loaded_vmcs = vmcs02; vmx_vcpu_put(vcpu); @@ -8398,7 +8470,6 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12); - kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7); vmcs12->guest_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP); vmcs12->guest_rip = kvm_register_read(vcpu, VCPU_REGS_RIP); vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS); @@ -8477,9 +8548,13 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE); + if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS) { + kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7); + vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); + } + /* TODO: These cannot have changed unless we have MSR bitmaps and * the relevant bit asks not to trap the change */ - vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT) vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT); if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER) @@ -8670,7 +8745,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, unsigned long exit_qualification) { struct vcpu_vmx *vmx = to_vmx(vcpu); - int cpu; struct vmcs12 *vmcs12 = get_vmcs12(vcpu); /* trying to cancel vmlaunch/vmresume is a bug */ @@ -8695,12 +8769,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, vmcs12->vm_exit_intr_error_code, KVM_ISA_VMX); - cpu = get_cpu(); - vmx->loaded_vmcs = &vmx->vmcs01; - vmx_vcpu_put(vcpu); - vmx_vcpu_load(vcpu, cpu); - vcpu->cpu = cpu; - put_cpu(); + vmx_load_vmcs01(vcpu); vm_entry_controls_init(vmx, vmcs_read32(VM_ENTRY_CONTROLS)); vm_exit_controls_init(vmx, vmcs_read32(VM_EXIT_CONTROLS)); @@ -8890,7 +8959,7 @@ static int __init vmx_init(void) rdmsrl_safe(MSR_EFER, &host_efer); - for (i = 0; i < NR_VMX_MSR; ++i) + for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) kvm_define_shared_msr(i, vmx_msr_index[i]); vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f6449334ec45..b86d329b953a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -87,6 +87,7 @@ static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE); static void update_cr8_intercept(struct kvm_vcpu *vcpu); static void process_nmi(struct kvm_vcpu *vcpu); +static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); struct kvm_x86_ops *kvm_x86_ops; EXPORT_SYMBOL_GPL(kvm_x86_ops); @@ -211,6 +212,7 @@ static void shared_msr_update(unsigned slot, u32 msr) void kvm_define_shared_msr(unsigned slot, u32 msr) { + BUG_ON(slot >= KVM_NR_SHARED_MSRS); if (slot >= shared_msrs_global.nr) shared_msrs_global.nr = slot + 1; shared_msrs_global.msrs[slot] = msr; @@ -310,6 +312,31 @@ static int exception_class(int vector) return EXCPT_BENIGN; } +#define EXCPT_FAULT 0 +#define EXCPT_TRAP 1 +#define EXCPT_ABORT 2 +#define EXCPT_INTERRUPT 3 + +static int exception_type(int vector) +{ + unsigned int mask; + + if (WARN_ON(vector > 31 || vector == NMI_VECTOR)) + return EXCPT_INTERRUPT; + + mask = 1 << vector; + + /* #DB is trap, as instruction watchpoints are handled elsewhere */ + if (mask & ((1 << DB_VECTOR) | (1 << BP_VECTOR) | (1 << OF_VECTOR))) + return EXCPT_TRAP; + + if (mask & ((1 << DF_VECTOR) | (1 << MC_VECTOR))) + return EXCPT_ABORT; + + /* Reserved exceptions will result in fault */ + return EXCPT_FAULT; +} + static void kvm_multiple_exception(struct kvm_vcpu *vcpu, unsigned nr, bool has_error, u32 error_code, bool reinject) @@ -758,6 +785,15 @@ static void kvm_update_dr7(struct kvm_vcpu *vcpu) vcpu->arch.switch_db_regs |= KVM_DEBUGREG_BP_ENABLED; } +static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu) +{ + u64 fixed = DR6_FIXED_1; + + if (!guest_cpuid_has_rtm(vcpu)) + fixed |= DR6_RTM; + return fixed; +} + static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) { switch (dr) { @@ -773,7 +809,7 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) case 6: if (val & 0xffffffff00000000ULL) return -1; /* #GP */ - vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; + vcpu->arch.dr6 = (val & DR6_VOLATILE) | kvm_dr6_fixed(vcpu); kvm_update_dr6(vcpu); break; case 5: @@ -1215,6 +1251,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) unsigned long flags; s64 usdiff; bool matched; + bool already_matched; u64 data = msr->data; raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); @@ -1279,6 +1316,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) pr_debug("kvm: adjusted tsc offset by %llu\n", delta); } matched = true; + already_matched = (vcpu->arch.this_tsc_generation == kvm->arch.cur_tsc_generation); } else { /* * We split periods of matched TSC writes into generations. @@ -1294,7 +1332,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) kvm->arch.cur_tsc_write = data; kvm->arch.cur_tsc_offset = offset; matched = false; - pr_debug("kvm: new tsc generation %u, clock %llu\n", + pr_debug("kvm: new tsc generation %llu, clock %llu\n", kvm->arch.cur_tsc_generation, data); } @@ -1319,10 +1357,11 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); spin_lock(&kvm->arch.pvclock_gtod_sync_lock); - if (matched) - kvm->arch.nr_vcpus_matched_tsc++; - else + if (!matched) { kvm->arch.nr_vcpus_matched_tsc = 0; + } else if (!already_matched) { + kvm->arch.nr_vcpus_matched_tsc++; + } kvm_track_tsc_matching(vcpu); spin_unlock(&kvm->arch.pvclock_gtod_sync_lock); @@ -2032,6 +2071,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) data &= ~(u64)0x40; /* ignore flush filter disable */ data &= ~(u64)0x100; /* ignore ignne emulation enable */ data &= ~(u64)0x8; /* ignore TLB cache disable */ + data &= ~(u64)0x40000; /* ignore Mc status write enable */ if (data != 0) { vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n", data); @@ -2974,9 +3014,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft; events->interrupt.nr = vcpu->arch.interrupt.nr; events->interrupt.soft = 0; - events->interrupt.shadow = - kvm_x86_ops->get_interrupt_shadow(vcpu, - KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI); + events->interrupt.shadow = kvm_x86_ops->get_interrupt_shadow(vcpu); events->nmi.injected = vcpu->arch.nmi_injected; events->nmi.pending = vcpu->arch.nmi_pending != 0; @@ -4082,7 +4120,8 @@ static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes, if (gpa == UNMAPPED_GVA) return X86EMUL_PROPAGATE_FAULT; - ret = kvm_read_guest(vcpu->kvm, gpa, data, toread); + ret = kvm_read_guest_page(vcpu->kvm, gpa >> PAGE_SHIFT, data, + offset, toread); if (ret < 0) { r = X86EMUL_IO_NEEDED; goto out; @@ -4103,10 +4142,24 @@ static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt, { struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; + unsigned offset; + int ret; - return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, - access | PFERR_FETCH_MASK, - exception); + /* Inline kvm_read_guest_virt_helper for speed. */ + gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access|PFERR_FETCH_MASK, + exception); + if (unlikely(gpa == UNMAPPED_GVA)) + return X86EMUL_PROPAGATE_FAULT; + + offset = addr & (PAGE_SIZE-1); + if (WARN_ON(offset + bytes > PAGE_SIZE)) + bytes = (unsigned)PAGE_SIZE - offset; + ret = kvm_read_guest_page(vcpu->kvm, gpa >> PAGE_SHIFT, val, + offset, bytes); + if (unlikely(ret < 0)) + return X86EMUL_IO_NEEDED; + + return X86EMUL_CONTINUE; } int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt, @@ -4730,7 +4783,6 @@ static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector, if (desc->g) var.limit = (var.limit << 12) | 0xfff; var.type = desc->type; - var.present = desc->p; var.dpl = desc->dpl; var.db = desc->d; var.s = desc->s; @@ -4762,6 +4814,12 @@ static int emulator_set_msr(struct x86_emulate_ctxt *ctxt, return kvm_set_msr(emul_to_vcpu(ctxt), &msr); } +static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt, + u32 pmc) +{ + return kvm_pmu_check_pmc(emul_to_vcpu(ctxt), pmc); +} + static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt, u32 pmc, u64 *pdata) { @@ -4838,6 +4896,7 @@ static const struct x86_emulate_ops emulate_ops = { .set_dr = emulator_set_dr, .set_msr = emulator_set_msr, .get_msr = emulator_get_msr, + .check_pmc = emulator_check_pmc, .read_pmc = emulator_read_pmc, .halt = emulator_halt, .wbinvd = emulator_wbinvd, @@ -4850,7 +4909,7 @@ static const struct x86_emulate_ops emulate_ops = { static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) { - u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu, mask); + u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu); /* * an sti; sti; sequence only disable interrupts for the first * instruction. So, if the last instruction, be it emulated or @@ -4858,8 +4917,13 @@ static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) * means that the last instruction is an sti. We should not * leave the flag on in this case. The same goes for mov ss */ - if (!(int_shadow & mask)) + if (int_shadow & mask) + mask = 0; + if (unlikely(int_shadow || mask)) { kvm_x86_ops->set_interrupt_shadow(vcpu, mask); + if (!mask) + kvm_make_request(KVM_REQ_EVENT, vcpu); + } } static void inject_emulated_exception(struct kvm_vcpu *vcpu) @@ -4874,19 +4938,6 @@ static void inject_emulated_exception(struct kvm_vcpu *vcpu) kvm_queue_exception(vcpu, ctxt->exception.vector); } -static void init_decode_cache(struct x86_emulate_ctxt *ctxt) -{ - memset(&ctxt->opcode_len, 0, - (void *)&ctxt->_regs - (void *)&ctxt->opcode_len); - - ctxt->fetch.start = 0; - ctxt->fetch.end = 0; - ctxt->io_read.pos = 0; - ctxt->io_read.end = 0; - ctxt->mem_read.pos = 0; - ctxt->mem_read.end = 0; -} - static void init_emulate_ctxt(struct kvm_vcpu *vcpu) { struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; @@ -5085,23 +5136,22 @@ static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7, return dr6; } -static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, int *r) +static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflags, int *r) { struct kvm_run *kvm_run = vcpu->run; /* - * Use the "raw" value to see if TF was passed to the processor. - * Note that the new value of the flags has not been saved yet. + * rflags is the old, "raw" value of the flags. The new value has + * not been saved yet. * * This is correct even for TF set by the guest, because "the * processor will not generate this exception after the instruction * that sets the TF flag". */ - unsigned long rflags = kvm_x86_ops->get_rflags(vcpu); - if (unlikely(rflags & X86_EFLAGS_TF)) { if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { - kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1; + kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | + DR6_RTM; kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip; kvm_run->debug.arch.exception = DB_VECTOR; kvm_run->exit_reason = KVM_EXIT_DEBUG; @@ -5114,7 +5164,7 @@ static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, int *r) * cleared by the processor". */ vcpu->arch.dr6 &= ~15; - vcpu->arch.dr6 |= DR6_BS; + vcpu->arch.dr6 |= DR6_BS | DR6_RTM; kvm_queue_exception(vcpu, DB_VECTOR); } } @@ -5133,7 +5183,7 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r) vcpu->arch.eff_db); if (dr6 != 0) { - kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1; + kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1 | DR6_RTM; kvm_run->debug.arch.pc = kvm_rip_read(vcpu) + get_segment_base(vcpu, VCPU_SREG_CS); @@ -5144,14 +5194,15 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r) } } - if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK)) { + if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK) && + !(kvm_get_rflags(vcpu) & X86_EFLAGS_RF)) { dr6 = kvm_vcpu_check_hw_bp(eip, 0, vcpu->arch.dr7, vcpu->arch.db); if (dr6 != 0) { vcpu->arch.dr6 &= ~15; - vcpu->arch.dr6 |= dr6; + vcpu->arch.dr6 |= dr6 | DR6_RTM; kvm_queue_exception(vcpu, DB_VECTOR); *r = EMULATE_DONE; return true; @@ -5215,6 +5266,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, if (emulation_type & EMULTYPE_SKIP) { kvm_rip_write(vcpu, ctxt->_eip); + if (ctxt->eflags & X86_EFLAGS_RF) + kvm_set_rflags(vcpu, ctxt->eflags & ~X86_EFLAGS_RF); return EMULATE_DONE; } @@ -5265,13 +5318,22 @@ restart: r = EMULATE_DONE; if (writeback) { + unsigned long rflags = kvm_x86_ops->get_rflags(vcpu); toggle_interruptibility(vcpu, ctxt->interruptibility); - kvm_make_request(KVM_REQ_EVENT, vcpu); vcpu->arch.emulate_regs_need_sync_to_vcpu = false; kvm_rip_write(vcpu, ctxt->eip); if (r == EMULATE_DONE) - kvm_vcpu_check_singlestep(vcpu, &r); - kvm_set_rflags(vcpu, ctxt->eflags); + kvm_vcpu_check_singlestep(vcpu, rflags, &r); + __kvm_set_rflags(vcpu, ctxt->eflags); + + /* + * For STI, interrupts are shadowed; so KVM_REQ_EVENT will + * do nothing, and it will be requested again as soon as + * the shadow expires. But we still need to check here, + * because POPF has no interrupt shadow. + */ + if (unlikely((ctxt->eflags & ~rflags) & X86_EFLAGS_IF)) + kvm_make_request(KVM_REQ_EVENT, vcpu); } else vcpu->arch.emulate_regs_need_sync_to_vcpu = true; @@ -5662,7 +5724,6 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) u64 param, ingpa, outgpa, ret; uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0; bool fast, longmode; - int cs_db, cs_l; /* * hypercall generates UD from non zero cpl and real mode @@ -5673,8 +5734,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) return 0; } - kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); - longmode = is_long_mode(vcpu) && cs_l == 1; + longmode = is_64_bit_mode(vcpu); if (!longmode) { param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) | @@ -5739,7 +5799,7 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid) int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) { unsigned long nr, a0, a1, a2, a3, ret; - int r = 1; + int op_64_bit, r = 1; if (kvm_hv_hypercall_enabled(vcpu->kvm)) return kvm_hv_hypercall(vcpu); @@ -5752,7 +5812,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) trace_kvm_hypercall(nr, a0, a1, a2, a3); - if (!is_long_mode(vcpu)) { + op_64_bit = is_64_bit_mode(vcpu); + if (!op_64_bit) { nr &= 0xFFFFFFFF; a0 &= 0xFFFFFFFF; a1 &= 0xFFFFFFFF; @@ -5778,6 +5839,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) break; } out: + if (!op_64_bit) + ret = (u32)ret; kvm_register_write(vcpu, VCPU_REGS_RAX, ret); ++vcpu->stat.hypercalls; return r; @@ -5856,6 +5919,11 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) trace_kvm_inj_exception(vcpu->arch.exception.nr, vcpu->arch.exception.has_error_code, vcpu->arch.exception.error_code); + + if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT) + __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) | + X86_EFLAGS_RF); + kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr, vcpu->arch.exception.has_error_code, vcpu->arch.exception.error_code, @@ -5887,6 +5955,18 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) kvm_x86_ops->set_nmi(vcpu); } } else if (kvm_cpu_has_injectable_intr(vcpu)) { + /* + * Because interrupts can be injected asynchronously, we are + * calling check_nested_events again here to avoid a race condition. + * See https://lkml.org/lkml/2014/7/2/60 for discussion about this + * proposal and current concerns. Perhaps we should be setting + * KVM_REQ_EVENT only on certain events and not unconditionally? + */ + if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) { + r = kvm_x86_ops->check_nested_events(vcpu, req_int_win); + if (r != 0) + return r; + } if (kvm_x86_ops->interrupt_allowed(vcpu)) { kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), false); @@ -6835,9 +6915,11 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu) atomic_set(&vcpu->arch.nmi_queued, 0); vcpu->arch.nmi_pending = 0; vcpu->arch.nmi_injected = false; + kvm_clear_interrupt_queue(vcpu); + kvm_clear_exception_queue(vcpu); memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); - vcpu->arch.dr6 = DR6_FIXED_1; + vcpu->arch.dr6 = DR6_INIT; kvm_update_dr6(vcpu); vcpu->arch.dr7 = DR7_FIXED_1; kvm_update_dr7(vcpu); @@ -7393,12 +7475,17 @@ unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_get_rflags); -void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) +static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) { if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP && kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip)) rflags |= X86_EFLAGS_TF; kvm_x86_ops->set_rflags(vcpu, rflags); +} + +void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) +{ + __kvm_set_rflags(vcpu, rflags); kvm_make_request(KVM_REQ_EVENT, vcpu); } EXPORT_SYMBOL_GPL(kvm_set_rflags); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 8c97bac9a895..306a1b77581f 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -47,6 +47,16 @@ static inline int is_long_mode(struct kvm_vcpu *vcpu) #endif } +static inline bool is_64_bit_mode(struct kvm_vcpu *vcpu) +{ + int cs_db, cs_l; + + if (!is_long_mode(vcpu)) + return false; + kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); + return cs_l; +} + static inline bool mmu_is_nested(struct kvm_vcpu *vcpu) { return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu; @@ -108,6 +118,23 @@ static inline bool vcpu_match_mmio_gpa(struct kvm_vcpu *vcpu, gpa_t gpa) return false; } +static inline unsigned long kvm_register_readl(struct kvm_vcpu *vcpu, + enum kvm_reg reg) +{ + unsigned long val = kvm_register_read(vcpu, reg); + + return is_64_bit_mode(vcpu) ? val : (u32)val; +} + +static inline void kvm_register_writel(struct kvm_vcpu *vcpu, + enum kvm_reg reg, + unsigned long val) +{ + if (!is_64_bit_mode(vcpu)) + val = (u32)val; + return kvm_register_write(vcpu, reg, val); +} + void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 36642793e315..1dbade870f90 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -577,6 +577,8 @@ static int is_f00f_bug(struct pt_regs *regs, unsigned long address) static const char nx_warning[] = KERN_CRIT "kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n"; +static const char smep_warning[] = KERN_CRIT +"unable to execute userspace code (SMEP?) (uid: %d)\n"; static void show_fault_oops(struct pt_regs *regs, unsigned long error_code, @@ -597,6 +599,10 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, if (pte && pte_present(*pte) && !pte_exec(*pte)) printk(nx_warning, from_kuid(&init_user_ns, current_uid())); + if (pte && pte_present(*pte) && pte_exec(*pte) && + (pgd_flags(*pgd) & _PAGE_USER) && + (read_cr4() & X86_CR4_SMEP)) + printk(smep_warning, from_kuid(&init_user_ns, current_uid())); } printk(KERN_ALERT "BUG: unable to handle kernel "); diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index f97130618113..66dba36f2343 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -18,6 +18,13 @@ #include <asm/dma.h> /* for MAX_DMA_PFN */ #include <asm/microcode.h> +/* + * We need to define the tracepoints somewhere, and tlb.c + * is only compied when SMP=y. + */ +#define CREATE_TRACE_POINTS +#include <trace/events/tlb.h> + #include "mm_internal.h" static unsigned long __initdata pgt_buf_start; diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index dd8dda167a24..1fe33987de02 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -49,6 +49,7 @@ void leave_mm(int cpu) if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) { cpumask_clear_cpu(cpu, mm_cpumask(active_mm)); load_cr3(swapper_pg_dir); + trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); } } EXPORT_SYMBOL_GPL(leave_mm); @@ -102,20 +103,24 @@ static void flush_tlb_func(void *info) if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm)) return; + if (!f->flush_end) + f->flush_end = f->flush_start + PAGE_SIZE; count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { - if (f->flush_end == TLB_FLUSH_ALL) + if (f->flush_end == TLB_FLUSH_ALL) { local_flush_tlb(); - else if (!f->flush_end) - __flush_tlb_single(f->flush_start); - else { + trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, TLB_FLUSH_ALL); + } else { unsigned long addr; + unsigned long nr_pages = + f->flush_end - f->flush_start / PAGE_SIZE; addr = f->flush_start; while (addr < f->flush_end) { __flush_tlb_single(addr); addr += PAGE_SIZE; } + trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, nr_pages); } } else leave_mm(smp_processor_id()); @@ -153,46 +158,45 @@ void flush_tlb_current_task(void) count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); local_flush_tlb(); + trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL); if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL); preempt_enable(); } +/* + * See Documentation/x86/tlb.txt for details. We choose 33 + * because it is large enough to cover the vast majority (at + * least 95%) of allocations, and is small enough that we are + * confident it will not cause too much overhead. Each single + * flush is about 100 ns, so this caps the maximum overhead at + * _about_ 3,000 ns. + * + * This is in units of pages. + */ +unsigned long tlb_single_page_flush_ceiling = 33; + void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long vmflag) { unsigned long addr; - unsigned act_entries, tlb_entries = 0; - unsigned long nr_base_pages; + /* do a global flush by default */ + unsigned long base_pages_to_flush = TLB_FLUSH_ALL; preempt_disable(); if (current->active_mm != mm) - goto flush_all; + goto out; if (!current->mm) { leave_mm(smp_processor_id()); - goto flush_all; + goto out; } - if (end == TLB_FLUSH_ALL || tlb_flushall_shift == -1 - || vmflag & VM_HUGETLB) { - local_flush_tlb(); - goto flush_all; - } - - /* In modern CPU, last level tlb used for both data/ins */ - if (vmflag & VM_EXEC) - tlb_entries = tlb_lli_4k[ENTRIES]; - else - tlb_entries = tlb_lld_4k[ENTRIES]; + if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB)) + base_pages_to_flush = (end - start) >> PAGE_SHIFT; - /* Assume all of TLB entries was occupied by this task */ - act_entries = tlb_entries >> tlb_flushall_shift; - act_entries = mm->total_vm > act_entries ? act_entries : mm->total_vm; - nr_base_pages = (end - start) >> PAGE_SHIFT; - - /* tlb_flushall_shift is on balance point, details in commit log */ - if (nr_base_pages > act_entries) { + if (base_pages_to_flush > tlb_single_page_flush_ceiling) { + base_pages_to_flush = TLB_FLUSH_ALL; count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); local_flush_tlb(); } else { @@ -201,17 +205,15 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); __flush_tlb_single(addr); } - - if (cpumask_any_but(mm_cpumask(mm), - smp_processor_id()) < nr_cpu_ids) - flush_tlb_others(mm_cpumask(mm), mm, start, end); - preempt_enable(); - return; } - -flush_all: + trace_tlb_flush(TLB_LOCAL_MM_SHOOTDOWN, base_pages_to_flush); +out: + if (base_pages_to_flush == TLB_FLUSH_ALL) { + start = 0UL; + end = TLB_FLUSH_ALL; + } if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) - flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL); + flush_tlb_others(mm_cpumask(mm), mm, start, end); preempt_enable(); } @@ -260,32 +262,26 @@ static void do_kernel_range_flush(void *info) void flush_tlb_kernel_range(unsigned long start, unsigned long end) { - unsigned act_entries; - struct flush_tlb_info info; - - /* In modern CPU, last level tlb used for both data/ins */ - act_entries = tlb_lld_4k[ENTRIES]; /* Balance as user space task's flush, a bit conservative */ - if (end == TLB_FLUSH_ALL || tlb_flushall_shift == -1 || - (end - start) >> PAGE_SHIFT > act_entries >> tlb_flushall_shift) - + if (end == TLB_FLUSH_ALL || + (end - start) > tlb_single_page_flush_ceiling * PAGE_SIZE) { on_each_cpu(do_flush_tlb_all, NULL, 1); - else { + } else { + struct flush_tlb_info info; info.flush_start = start; info.flush_end = end; on_each_cpu(do_kernel_range_flush, &info, 1); } } -#ifdef CONFIG_DEBUG_TLBFLUSH static ssize_t tlbflush_read_file(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { char buf[32]; unsigned int len; - len = sprintf(buf, "%hd\n", tlb_flushall_shift); + len = sprintf(buf, "%ld\n", tlb_single_page_flush_ceiling); return simple_read_from_buffer(user_buf, count, ppos, buf, len); } @@ -294,20 +290,20 @@ static ssize_t tlbflush_write_file(struct file *file, { char buf[32]; ssize_t len; - s8 shift; + int ceiling; len = min(count, sizeof(buf) - 1); if (copy_from_user(buf, user_buf, len)) return -EFAULT; buf[len] = '\0'; - if (kstrtos8(buf, 0, &shift)) + if (kstrtoint(buf, 0, &ceiling)) return -EINVAL; - if (shift < -1 || shift >= BITS_PER_LONG) + if (ceiling < 0) return -EINVAL; - tlb_flushall_shift = shift; + tlb_single_page_flush_ceiling = ceiling; return count; } @@ -317,11 +313,10 @@ static const struct file_operations fops_tlbflush = { .llseek = default_llseek, }; -static int __init create_tlb_flushall_shift(void) +static int __init create_tlb_single_page_flush_ceiling(void) { - debugfs_create_file("tlb_flushall_shift", S_IRUSR | S_IWUSR, + debugfs_create_file("tlb_single_page_flush_ceiling", S_IRUSR | S_IWUSR, arch_debugfs_dir, NULL, &fops_tlbflush); return 0; } -late_initcall(create_tlb_flushall_shift); -#endif +late_initcall(create_tlb_single_page_flush_ceiling); diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index b5e60268d93f..c61ea57d1ba1 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -326,6 +326,27 @@ static void pci_fixup_video(struct pci_dev *pdev) struct pci_bus *bus; u16 config; + if (!vga_default_device()) { + resource_size_t start, end; + int i; + + /* Does firmware framebuffer belong to us? */ + for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { + if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM)) + continue; + + start = pci_resource_start(pdev, i); + end = pci_resource_end(pdev, i); + + if (!start || !end) + continue; + + if (screen_info.lfb_base >= start && + (screen_info.lfb_base + screen_info.lfb_size) < end) + vga_set_default_device(pdev); + } + } + /* Is VGA routed to us? */ bus = pdev->bus; while (bus) { diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index a19ed92e74e4..2ae525e0d8ba 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -162,6 +162,10 @@ pcibios_align_resource(void *data, const struct resource *res, return start; if (start & 0x300) start = (start + 0x3ff) & ~0x3ff; + } else if (res->flags & IORESOURCE_MEM) { + /* The low 1MB range is reserved for ISA cards */ + if (start < BIOS_END) + start = BIOS_END; } return start; } diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile index d51045afcaaf..2846aaab5103 100644 --- a/arch/x86/platform/efi/Makefile +++ b/arch/x86/platform/efi/Makefile @@ -1,4 +1,4 @@ -obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o +obj-$(CONFIG_EFI) += quirks.o efi.o efi_$(BITS).o efi_stub_$(BITS).o obj-$(CONFIG_ACPI_BGRT) += efi-bgrt.o obj-$(CONFIG_EARLY_PRINTK_EFI) += early_printk.o obj-$(CONFIG_EFI_MIXED) += efi_thunk_$(BITS).o diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 87fc96bcc13c..850da94fef30 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -56,13 +56,6 @@ #define EFI_DEBUG -#define EFI_MIN_RESERVE 5120 - -#define EFI_DUMMY_GUID \ - EFI_GUID(0x4424ac57, 0xbe4b, 0x47dd, 0x9e, 0x97, 0xed, 0x50, 0xf0, 0x9f, 0x92, 0xa9) - -static efi_char16_t efi_dummy_name[6] = { 'D', 'U', 'M', 'M', 'Y', 0 }; - struct efi_memory_map memmap; static struct efi efi_phys __initdata; @@ -95,139 +88,6 @@ static int __init setup_add_efi_memmap(char *arg) } early_param("add_efi_memmap", setup_add_efi_memmap); -static bool efi_no_storage_paranoia; - -static int __init setup_storage_paranoia(char *arg) -{ - efi_no_storage_paranoia = true; - return 0; -} -early_param("efi_no_storage_paranoia", setup_storage_paranoia); - -static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) -{ - unsigned long flags; - efi_status_t status; - - spin_lock_irqsave(&rtc_lock, flags); - status = efi_call_virt(get_time, tm, tc); - spin_unlock_irqrestore(&rtc_lock, flags); - return status; -} - -static efi_status_t virt_efi_set_time(efi_time_t *tm) -{ - unsigned long flags; - efi_status_t status; - - spin_lock_irqsave(&rtc_lock, flags); - status = efi_call_virt(set_time, tm); - spin_unlock_irqrestore(&rtc_lock, flags); - return status; -} - -static efi_status_t virt_efi_get_wakeup_time(efi_bool_t *enabled, - efi_bool_t *pending, - efi_time_t *tm) -{ - unsigned long flags; - efi_status_t status; - - spin_lock_irqsave(&rtc_lock, flags); - status = efi_call_virt(get_wakeup_time, enabled, pending, tm); - spin_unlock_irqrestore(&rtc_lock, flags); - return status; -} - -static efi_status_t virt_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) -{ - unsigned long flags; - efi_status_t status; - - spin_lock_irqsave(&rtc_lock, flags); - status = efi_call_virt(set_wakeup_time, enabled, tm); - spin_unlock_irqrestore(&rtc_lock, flags); - return status; -} - -static efi_status_t virt_efi_get_variable(efi_char16_t *name, - efi_guid_t *vendor, - u32 *attr, - unsigned long *data_size, - void *data) -{ - return efi_call_virt(get_variable, - name, vendor, attr, - data_size, data); -} - -static efi_status_t virt_efi_get_next_variable(unsigned long *name_size, - efi_char16_t *name, - efi_guid_t *vendor) -{ - return efi_call_virt(get_next_variable, - name_size, name, vendor); -} - -static efi_status_t virt_efi_set_variable(efi_char16_t *name, - efi_guid_t *vendor, - u32 attr, - unsigned long data_size, - void *data) -{ - return efi_call_virt(set_variable, - name, vendor, attr, - data_size, data); -} - -static efi_status_t virt_efi_query_variable_info(u32 attr, - u64 *storage_space, - u64 *remaining_space, - u64 *max_variable_size) -{ - if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) - return EFI_UNSUPPORTED; - - return efi_call_virt(query_variable_info, attr, storage_space, - remaining_space, max_variable_size); -} - -static efi_status_t virt_efi_get_next_high_mono_count(u32 *count) -{ - return efi_call_virt(get_next_high_mono_count, count); -} - -static void virt_efi_reset_system(int reset_type, - efi_status_t status, - unsigned long data_size, - efi_char16_t *data) -{ - __efi_call_virt(reset_system, reset_type, status, - data_size, data); -} - -static efi_status_t virt_efi_update_capsule(efi_capsule_header_t **capsules, - unsigned long count, - unsigned long sg_list) -{ - if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) - return EFI_UNSUPPORTED; - - return efi_call_virt(update_capsule, capsules, count, sg_list); -} - -static efi_status_t virt_efi_query_capsule_caps(efi_capsule_header_t **capsules, - unsigned long count, - u64 *max_size, - int *reset_type) -{ - if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) - return EFI_UNSUPPORTED; - - return efi_call_virt(query_capsule_caps, capsules, count, max_size, - reset_type); -} - static efi_status_t __init phys_efi_set_virtual_address_map( unsigned long memory_map_size, unsigned long descriptor_size, @@ -244,42 +104,6 @@ static efi_status_t __init phys_efi_set_virtual_address_map( return status; } -int efi_set_rtc_mmss(const struct timespec *now) -{ - unsigned long nowtime = now->tv_sec; - efi_status_t status; - efi_time_t eft; - efi_time_cap_t cap; - struct rtc_time tm; - - status = efi.get_time(&eft, &cap); - if (status != EFI_SUCCESS) { - pr_err("Oops: efitime: can't read time!\n"); - return -1; - } - - rtc_time_to_tm(nowtime, &tm); - if (!rtc_valid_tm(&tm)) { - eft.year = tm.tm_year + 1900; - eft.month = tm.tm_mon + 1; - eft.day = tm.tm_mday; - eft.minute = tm.tm_min; - eft.second = tm.tm_sec; - eft.nanosecond = 0; - } else { - pr_err("%s: Invalid EFI RTC value: write of %lx to EFI RTC failed\n", - __func__, nowtime); - return -1; - } - - status = efi.set_time(&eft); - if (status != EFI_SUCCESS) { - pr_err("Oops: efitime: can't write time!\n"); - return -1; - } - return 0; -} - void efi_get_time(struct timespec *now) { efi_status_t status; @@ -350,6 +174,9 @@ int __init efi_memblock_x86_reserve_range(void) struct efi_info *e = &boot_params.efi_info; unsigned long pmap; + if (efi_enabled(EFI_PARAVIRT)) + return 0; + #ifdef CONFIG_X86_32 /* Can't handle data above 4GB at this time */ if (e->efi_memmap_hi) { @@ -392,69 +219,15 @@ static void __init print_efi_memmap(void) #endif /* EFI_DEBUG */ } -void __init efi_reserve_boot_services(void) -{ - void *p; - - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - efi_memory_desc_t *md = p; - u64 start = md->phys_addr; - u64 size = md->num_pages << EFI_PAGE_SHIFT; - - if (md->type != EFI_BOOT_SERVICES_CODE && - md->type != EFI_BOOT_SERVICES_DATA) - continue; - /* Only reserve where possible: - * - Not within any already allocated areas - * - Not over any memory area (really needed, if above?) - * - Not within any part of the kernel - * - Not the bios reserved area - */ - if ((start + size > __pa_symbol(_text) - && start <= __pa_symbol(_end)) || - !e820_all_mapped(start, start+size, E820_RAM) || - memblock_is_region_reserved(start, size)) { - /* Could not reserve, skip it */ - md->num_pages = 0; - memblock_dbg("Could not reserve boot range [0x%010llx-0x%010llx]\n", - start, start+size-1); - } else - memblock_reserve(start, size); - } -} - void __init efi_unmap_memmap(void) { clear_bit(EFI_MEMMAP, &efi.flags); if (memmap.map) { - early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size); + early_memunmap(memmap.map, memmap.nr_map * memmap.desc_size); memmap.map = NULL; } } -void __init efi_free_boot_services(void) -{ - void *p; - - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - efi_memory_desc_t *md = p; - unsigned long long start = md->phys_addr; - unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; - - if (md->type != EFI_BOOT_SERVICES_CODE && - md->type != EFI_BOOT_SERVICES_DATA) - continue; - - /* Could not reserve boot area */ - if (!size) - continue; - - free_bootmem_late(start, size); - } - - efi_unmap_memmap(); -} - static int __init efi_systab_init(void *phys) { if (efi_enabled(EFI_64BIT)) { @@ -467,12 +240,12 @@ static int __init efi_systab_init(void *phys) if (!data) return -ENOMEM; } - systab64 = early_ioremap((unsigned long)phys, + systab64 = early_memremap((unsigned long)phys, sizeof(*systab64)); if (systab64 == NULL) { pr_err("Couldn't map the system table!\n"); if (data) - early_iounmap(data, sizeof(*data)); + early_memunmap(data, sizeof(*data)); return -ENOMEM; } @@ -504,9 +277,9 @@ static int __init efi_systab_init(void *phys) systab64->tables; tmp |= data ? data->tables : systab64->tables; - early_iounmap(systab64, sizeof(*systab64)); + early_memunmap(systab64, sizeof(*systab64)); if (data) - early_iounmap(data, sizeof(*data)); + early_memunmap(data, sizeof(*data)); #ifdef CONFIG_X86_32 if (tmp >> 32) { pr_err("EFI data located above 4GB, disabling EFI.\n"); @@ -516,7 +289,7 @@ static int __init efi_systab_init(void *phys) } else { efi_system_table_32_t *systab32; - systab32 = early_ioremap((unsigned long)phys, + systab32 = early_memremap((unsigned long)phys, sizeof(*systab32)); if (systab32 == NULL) { pr_err("Couldn't map the system table!\n"); @@ -537,7 +310,7 @@ static int __init efi_systab_init(void *phys) efi_systab.nr_tables = systab32->nr_tables; efi_systab.tables = systab32->tables; - early_iounmap(systab32, sizeof(*systab32)); + early_memunmap(systab32, sizeof(*systab32)); } efi.systab = &efi_systab; @@ -563,7 +336,7 @@ static int __init efi_runtime_init32(void) { efi_runtime_services_32_t *runtime; - runtime = early_ioremap((unsigned long)efi.systab->runtime, + runtime = early_memremap((unsigned long)efi.systab->runtime, sizeof(efi_runtime_services_32_t)); if (!runtime) { pr_err("Could not map the runtime service table!\n"); @@ -578,7 +351,7 @@ static int __init efi_runtime_init32(void) efi_phys.set_virtual_address_map = (efi_set_virtual_address_map_t *) (unsigned long)runtime->set_virtual_address_map; - early_iounmap(runtime, sizeof(efi_runtime_services_32_t)); + early_memunmap(runtime, sizeof(efi_runtime_services_32_t)); return 0; } @@ -587,7 +360,7 @@ static int __init efi_runtime_init64(void) { efi_runtime_services_64_t *runtime; - runtime = early_ioremap((unsigned long)efi.systab->runtime, + runtime = early_memremap((unsigned long)efi.systab->runtime, sizeof(efi_runtime_services_64_t)); if (!runtime) { pr_err("Could not map the runtime service table!\n"); @@ -602,7 +375,7 @@ static int __init efi_runtime_init64(void) efi_phys.set_virtual_address_map = (efi_set_virtual_address_map_t *) (unsigned long)runtime->set_virtual_address_map; - early_iounmap(runtime, sizeof(efi_runtime_services_64_t)); + early_memunmap(runtime, sizeof(efi_runtime_services_64_t)); return 0; } @@ -616,14 +389,24 @@ static int __init efi_runtime_init(void) * the runtime services table so that we can grab the physical * address of several of the EFI runtime functions, needed to * set the firmware into virtual mode. + * + * When EFI_PARAVIRT is in force then we could not map runtime + * service memory region because we do not have direct access to it. + * However, runtime services are available through proxy functions + * (e.g. in case of Xen dom0 EFI implementation they call special + * hypercall which executes relevant EFI functions) and that is why + * they are always enabled. */ - if (efi_enabled(EFI_64BIT)) - rv = efi_runtime_init64(); - else - rv = efi_runtime_init32(); - if (rv) - return rv; + if (!efi_enabled(EFI_PARAVIRT)) { + if (efi_enabled(EFI_64BIT)) + rv = efi_runtime_init64(); + else + rv = efi_runtime_init32(); + + if (rv) + return rv; + } set_bit(EFI_RUNTIME_SERVICES, &efi.flags); @@ -632,8 +415,11 @@ static int __init efi_runtime_init(void) static int __init efi_memmap_init(void) { + if (efi_enabled(EFI_PARAVIRT)) + return 0; + /* Map the EFI memory map */ - memmap.map = early_ioremap((unsigned long)memmap.phys_map, + memmap.map = early_memremap((unsigned long)memmap.phys_map, memmap.nr_map * memmap.desc_size); if (memmap.map == NULL) { pr_err("Could not map the memory map!\n"); @@ -649,62 +435,6 @@ static int __init efi_memmap_init(void) return 0; } -/* - * A number of config table entries get remapped to virtual addresses - * after entering EFI virtual mode. However, the kexec kernel requires - * their physical addresses therefore we pass them via setup_data and - * correct those entries to their respective physical addresses here. - * - * Currently only handles smbios which is necessary for some firmware - * implementation. - */ -static int __init efi_reuse_config(u64 tables, int nr_tables) -{ - int i, sz, ret = 0; - void *p, *tablep; - struct efi_setup_data *data; - - if (!efi_setup) - return 0; - - if (!efi_enabled(EFI_64BIT)) - return 0; - - data = early_memremap(efi_setup, sizeof(*data)); - if (!data) { - ret = -ENOMEM; - goto out; - } - - if (!data->smbios) - goto out_memremap; - - sz = sizeof(efi_config_table_64_t); - - p = tablep = early_memremap(tables, nr_tables * sz); - if (!p) { - pr_err("Could not map Configuration table!\n"); - ret = -ENOMEM; - goto out_memremap; - } - - for (i = 0; i < efi.systab->nr_tables; i++) { - efi_guid_t guid; - - guid = ((efi_config_table_64_t *)p)->guid; - - if (!efi_guidcmp(guid, SMBIOS_TABLE_GUID)) - ((efi_config_table_64_t *)p)->table = data->smbios; - p += sz; - } - early_iounmap(tablep, nr_tables * sz); - -out_memremap: - early_iounmap(data, sizeof(*data)); -out: - return ret; -} - void __init efi_init(void) { efi_char16_t *c16; @@ -728,8 +458,6 @@ void __init efi_init(void) if (efi_systab_init(efi_phys.systab)) return; - set_bit(EFI_SYSTEM_TABLES, &efi.flags); - efi.config_table = (unsigned long)efi.systab->tables; efi.fw_vendor = (unsigned long)efi.systab->fw_vendor; efi.runtime = (unsigned long)efi.systab->runtime; @@ -737,14 +465,14 @@ void __init efi_init(void) /* * Show what we know for posterity */ - c16 = tmp = early_ioremap(efi.systab->fw_vendor, 2); + c16 = tmp = early_memremap(efi.systab->fw_vendor, 2); if (c16) { for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i) vendor[i] = *c16++; vendor[i] = '\0'; } else pr_err("Could not map the firmware vendor!\n"); - early_iounmap(tmp, 2); + early_memunmap(tmp, 2); pr_info("EFI v%u.%.02u by %s\n", efi.systab->hdr.revision >> 16, @@ -770,8 +498,6 @@ void __init efi_init(void) if (efi_memmap_init()) return; - set_bit(EFI_MEMMAP, &efi.flags); - print_efi_memmap(); } @@ -847,22 +573,6 @@ void __init old_map_region(efi_memory_desc_t *md) (unsigned long long)md->phys_addr); } -static void native_runtime_setup(void) -{ - efi.get_time = virt_efi_get_time; - efi.set_time = virt_efi_set_time; - efi.get_wakeup_time = virt_efi_get_wakeup_time; - efi.set_wakeup_time = virt_efi_set_wakeup_time; - efi.get_variable = virt_efi_get_variable; - efi.get_next_variable = virt_efi_get_next_variable; - efi.set_variable = virt_efi_set_variable; - efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count; - efi.reset_system = virt_efi_reset_system; - efi.query_variable_info = virt_efi_query_variable_info; - efi.update_capsule = virt_efi_update_capsule; - efi.query_capsule_caps = virt_efi_query_capsule_caps; -} - /* Merge contiguous regions of the same type and attribute */ static void __init efi_merge_regions(void) { @@ -1049,7 +759,7 @@ static void __init kexec_enter_virtual_mode(void) */ efi.runtime_version = efi_systab.hdr.revision; - native_runtime_setup(); + efi_native_runtime_setup(); efi.set_virtual_address_map = NULL; @@ -1057,11 +767,7 @@ static void __init kexec_enter_virtual_mode(void) runtime_code_page_mkexec(); /* clean DUMMY object */ - efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID, - EFI_VARIABLE_NON_VOLATILE | - EFI_VARIABLE_BOOTSERVICE_ACCESS | - EFI_VARIABLE_RUNTIME_ACCESS, - 0, NULL); + efi_delete_dummy_variable(); #endif } @@ -1142,7 +848,7 @@ static void __init __efi_enter_virtual_mode(void) efi.runtime_version = efi_systab.hdr.revision; if (efi_is_native()) - native_runtime_setup(); + efi_native_runtime_setup(); else efi_thunk_runtime_setup(); @@ -1179,15 +885,14 @@ static void __init __efi_enter_virtual_mode(void) free_pages((unsigned long)new_memmap, pg_shift); /* clean DUMMY object */ - efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID, - EFI_VARIABLE_NON_VOLATILE | - EFI_VARIABLE_BOOTSERVICE_ACCESS | - EFI_VARIABLE_RUNTIME_ACCESS, - 0, NULL); + efi_delete_dummy_variable(); } void __init efi_enter_virtual_mode(void) { + if (efi_enabled(EFI_PARAVIRT)) + return; + if (efi_setup) kexec_enter_virtual_mode(); else @@ -1220,6 +925,9 @@ u64 efi_mem_attributes(unsigned long phys_addr) efi_memory_desc_t *md; void *p; + if (!efi_enabled(EFI_MEMMAP)) + return 0; + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { md = p; if ((md->phys_addr <= phys_addr) && @@ -1230,86 +938,6 @@ u64 efi_mem_attributes(unsigned long phys_addr) return 0; } -/* - * Some firmware implementations refuse to boot if there's insufficient space - * in the variable store. Ensure that we never use more than a safe limit. - * - * Return EFI_SUCCESS if it is safe to write 'size' bytes to the variable - * store. - */ -efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) -{ - efi_status_t status; - u64 storage_size, remaining_size, max_size; - - if (!(attributes & EFI_VARIABLE_NON_VOLATILE)) - return 0; - - status = efi.query_variable_info(attributes, &storage_size, - &remaining_size, &max_size); - if (status != EFI_SUCCESS) - return status; - - /* - * We account for that by refusing the write if permitting it would - * reduce the available space to under 5KB. This figure was provided by - * Samsung, so should be safe. - */ - if ((remaining_size - size < EFI_MIN_RESERVE) && - !efi_no_storage_paranoia) { - - /* - * Triggering garbage collection may require that the firmware - * generate a real EFI_OUT_OF_RESOURCES error. We can force - * that by attempting to use more space than is available. - */ - unsigned long dummy_size = remaining_size + 1024; - void *dummy = kzalloc(dummy_size, GFP_ATOMIC); - - if (!dummy) - return EFI_OUT_OF_RESOURCES; - - status = efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID, - EFI_VARIABLE_NON_VOLATILE | - EFI_VARIABLE_BOOTSERVICE_ACCESS | - EFI_VARIABLE_RUNTIME_ACCESS, - dummy_size, dummy); - - if (status == EFI_SUCCESS) { - /* - * This should have failed, so if it didn't make sure - * that we delete it... - */ - efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID, - EFI_VARIABLE_NON_VOLATILE | - EFI_VARIABLE_BOOTSERVICE_ACCESS | - EFI_VARIABLE_RUNTIME_ACCESS, - 0, dummy); - } - - kfree(dummy); - - /* - * The runtime code may now have triggered a garbage collection - * run, so check the variable info again - */ - status = efi.query_variable_info(attributes, &storage_size, - &remaining_size, &max_size); - - if (status != EFI_SUCCESS) - return status; - - /* - * There still isn't enough room, so return an error - */ - if (remaining_size - size < EFI_MIN_RESERVE) - return EFI_OUT_OF_RESOURCES; - } - - return EFI_SUCCESS; -} -EXPORT_SYMBOL_GPL(efi_query_variable_store); - static int __init parse_efi_cmdline(char *str) { if (*str == '=') @@ -1321,22 +949,3 @@ static int __init parse_efi_cmdline(char *str) return 0; } early_param("efi", parse_efi_cmdline); - -void __init efi_apply_memmap_quirks(void) -{ - /* - * Once setup is done earlier, unmap the EFI memory map on mismatched - * firmware/kernel architectures since there is no support for runtime - * services. - */ - if (!efi_runtime_supported()) { - pr_info("efi: Setup done, disabling due to 32/64-bit mismatch\n"); - efi_unmap_memmap(); - } - - /* - * UV doesn't support the new EFI pagetable mapping yet. - */ - if (is_uv_system()) - set_bit(EFI_OLD_MEMMAP, &efi.flags); -} diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c new file mode 100644 index 000000000000..1c7380da65ff --- /dev/null +++ b/arch/x86/platform/efi/quirks.c @@ -0,0 +1,290 @@ +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/time.h> +#include <linux/types.h> +#include <linux/efi.h> +#include <linux/slab.h> +#include <linux/memblock.h> +#include <linux/bootmem.h> +#include <linux/acpi.h> +#include <asm/efi.h> +#include <asm/uv/uv.h> + +#define EFI_MIN_RESERVE 5120 + +#define EFI_DUMMY_GUID \ + EFI_GUID(0x4424ac57, 0xbe4b, 0x47dd, 0x9e, 0x97, 0xed, 0x50, 0xf0, 0x9f, 0x92, 0xa9) + +static efi_char16_t efi_dummy_name[6] = { 'D', 'U', 'M', 'M', 'Y', 0 }; + +static bool efi_no_storage_paranoia; + +/* + * Some firmware implementations refuse to boot if there's insufficient + * space in the variable store. The implementation of garbage collection + * in some FW versions causes stale (deleted) variables to take up space + * longer than intended and space is only freed once the store becomes + * almost completely full. + * + * Enabling this option disables the space checks in + * efi_query_variable_store() and forces garbage collection. + * + * Only enable this option if deleting EFI variables does not free up + * space in your variable store, e.g. if despite deleting variables + * you're unable to create new ones. + */ +static int __init setup_storage_paranoia(char *arg) +{ + efi_no_storage_paranoia = true; + return 0; +} +early_param("efi_no_storage_paranoia", setup_storage_paranoia); + +/* + * Deleting the dummy variable which kicks off garbage collection +*/ +void efi_delete_dummy_variable(void) +{ + efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID, + EFI_VARIABLE_NON_VOLATILE | + EFI_VARIABLE_BOOTSERVICE_ACCESS | + EFI_VARIABLE_RUNTIME_ACCESS, + 0, NULL); +} + +/* + * Some firmware implementations refuse to boot if there's insufficient space + * in the variable store. Ensure that we never use more than a safe limit. + * + * Return EFI_SUCCESS if it is safe to write 'size' bytes to the variable + * store. + */ +efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) +{ + efi_status_t status; + u64 storage_size, remaining_size, max_size; + + if (!(attributes & EFI_VARIABLE_NON_VOLATILE)) + return 0; + + status = efi.query_variable_info(attributes, &storage_size, + &remaining_size, &max_size); + if (status != EFI_SUCCESS) + return status; + + /* + * We account for that by refusing the write if permitting it would + * reduce the available space to under 5KB. This figure was provided by + * Samsung, so should be safe. + */ + if ((remaining_size - size < EFI_MIN_RESERVE) && + !efi_no_storage_paranoia) { + + /* + * Triggering garbage collection may require that the firmware + * generate a real EFI_OUT_OF_RESOURCES error. We can force + * that by attempting to use more space than is available. + */ + unsigned long dummy_size = remaining_size + 1024; + void *dummy = kzalloc(dummy_size, GFP_ATOMIC); + + if (!dummy) + return EFI_OUT_OF_RESOURCES; + + status = efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID, + EFI_VARIABLE_NON_VOLATILE | + EFI_VARIABLE_BOOTSERVICE_ACCESS | + EFI_VARIABLE_RUNTIME_ACCESS, + dummy_size, dummy); + + if (status == EFI_SUCCESS) { + /* + * This should have failed, so if it didn't make sure + * that we delete it... + */ + efi_delete_dummy_variable(); + } + + kfree(dummy); + + /* + * The runtime code may now have triggered a garbage collection + * run, so check the variable info again + */ + status = efi.query_variable_info(attributes, &storage_size, + &remaining_size, &max_size); + + if (status != EFI_SUCCESS) + return status; + + /* + * There still isn't enough room, so return an error + */ + if (remaining_size - size < EFI_MIN_RESERVE) + return EFI_OUT_OF_RESOURCES; + } + + return EFI_SUCCESS; +} +EXPORT_SYMBOL_GPL(efi_query_variable_store); + +/* + * The UEFI specification makes it clear that the operating system is free to do + * whatever it wants with boot services code after ExitBootServices() has been + * called. Ignoring this recommendation a significant bunch of EFI implementations + * continue calling into boot services code (SetVirtualAddressMap). In order to + * work around such buggy implementations we reserve boot services region during + * EFI init and make sure it stays executable. Then, after SetVirtualAddressMap(), it +* is discarded. +*/ +void __init efi_reserve_boot_services(void) +{ + void *p; + + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + efi_memory_desc_t *md = p; + u64 start = md->phys_addr; + u64 size = md->num_pages << EFI_PAGE_SHIFT; + + if (md->type != EFI_BOOT_SERVICES_CODE && + md->type != EFI_BOOT_SERVICES_DATA) + continue; + /* Only reserve where possible: + * - Not within any already allocated areas + * - Not over any memory area (really needed, if above?) + * - Not within any part of the kernel + * - Not the bios reserved area + */ + if ((start + size > __pa_symbol(_text) + && start <= __pa_symbol(_end)) || + !e820_all_mapped(start, start+size, E820_RAM) || + memblock_is_region_reserved(start, size)) { + /* Could not reserve, skip it */ + md->num_pages = 0; + memblock_dbg("Could not reserve boot range [0x%010llx-0x%010llx]\n", + start, start+size-1); + } else + memblock_reserve(start, size); + } +} + +void __init efi_free_boot_services(void) +{ + void *p; + + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + efi_memory_desc_t *md = p; + unsigned long long start = md->phys_addr; + unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; + + if (md->type != EFI_BOOT_SERVICES_CODE && + md->type != EFI_BOOT_SERVICES_DATA) + continue; + + /* Could not reserve boot area */ + if (!size) + continue; + + free_bootmem_late(start, size); + } + + efi_unmap_memmap(); +} + +/* + * A number of config table entries get remapped to virtual addresses + * after entering EFI virtual mode. However, the kexec kernel requires + * their physical addresses therefore we pass them via setup_data and + * correct those entries to their respective physical addresses here. + * + * Currently only handles smbios which is necessary for some firmware + * implementation. + */ +int __init efi_reuse_config(u64 tables, int nr_tables) +{ + int i, sz, ret = 0; + void *p, *tablep; + struct efi_setup_data *data; + + if (!efi_setup) + return 0; + + if (!efi_enabled(EFI_64BIT)) + return 0; + + data = early_memremap(efi_setup, sizeof(*data)); + if (!data) { + ret = -ENOMEM; + goto out; + } + + if (!data->smbios) + goto out_memremap; + + sz = sizeof(efi_config_table_64_t); + + p = tablep = early_memremap(tables, nr_tables * sz); + if (!p) { + pr_err("Could not map Configuration table!\n"); + ret = -ENOMEM; + goto out_memremap; + } + + for (i = 0; i < efi.systab->nr_tables; i++) { + efi_guid_t guid; + + guid = ((efi_config_table_64_t *)p)->guid; + + if (!efi_guidcmp(guid, SMBIOS_TABLE_GUID)) + ((efi_config_table_64_t *)p)->table = data->smbios; + p += sz; + } + early_memunmap(tablep, nr_tables * sz); + +out_memremap: + early_memunmap(data, sizeof(*data)); +out: + return ret; +} + +void __init efi_apply_memmap_quirks(void) +{ + /* + * Once setup is done earlier, unmap the EFI memory map on mismatched + * firmware/kernel architectures since there is no support for runtime + * services. + */ + if (!efi_runtime_supported()) { + pr_info("efi: Setup done, disabling due to 32/64-bit mismatch\n"); + efi_unmap_memmap(); + } + + /* + * UV doesn't support the new EFI pagetable mapping yet. + */ + if (is_uv_system()) + set_bit(EFI_OLD_MEMMAP, &efi.flags); +} + +/* + * For most modern platforms the preferred method of powering off is via + * ACPI. However, there are some that are known to require the use of + * EFI runtime services and for which ACPI does not work at all. + * + * Using EFI is a last resort, to be used only if no other option + * exists. + */ +bool efi_reboot_required(void) +{ + if (!acpi_gbl_reduced_hardware) + return false; + + efi_reboot_quirk_mode = EFI_RESET_WARM; + return true; +} + +bool efi_poweroff_required(void) +{ + return !!acpi_gbl_reduced_hardware; +} diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 424f4c97a44d..6ec7910f59bf 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -165,7 +165,7 @@ static void fix_processor_context(void) * by __save_processor_state() * @ctxt - structure to load the registers contents from */ -static void __restore_processor_state(struct saved_context *ctxt) +static void notrace __restore_processor_state(struct saved_context *ctxt) { if (ctxt->misc_enable_saved) wrmsrl(MSR_IA32_MISC_ENABLE, ctxt->misc_enable); @@ -239,7 +239,7 @@ static void __restore_processor_state(struct saved_context *ctxt) } /* Needed by apm.c */ -void restore_processor_state(void) +void notrace restore_processor_state(void) { __restore_processor_state(&saved_context); } diff --git a/arch/x86/um/asm/processor.h b/arch/x86/um/asm/processor.h index 04f82e020f2b..2a206d2b14ab 100644 --- a/arch/x86/um/asm/processor.h +++ b/arch/x86/um/asm/processor.h @@ -25,7 +25,8 @@ static inline void rep_nop(void) __asm__ __volatile__("rep;nop": : :"memory"); } -#define cpu_relax() rep_nop() +#define cpu_relax() rep_nop() +#define cpu_relax_lowlatency() cpu_relax() #include <asm/processor-generic.h> diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 96ab2c09cb68..7322755f337a 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -22,3 +22,4 @@ obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o obj-$(CONFIG_XEN_DOM0) += apic.o vga.o obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o +obj-$(CONFIG_XEN_EFI) += efi.o diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c new file mode 100644 index 000000000000..a02e09e18f57 --- /dev/null +++ b/arch/x86/xen/efi.c @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2014 Oracle Co., Daniel Kiper + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/efi.h> +#include <linux/init.h> +#include <linux/string.h> + +#include <xen/xen-ops.h> + +#include <asm/setup.h> + +void __init xen_efi_init(void) +{ + efi_system_table_t *efi_systab_xen; + + efi_systab_xen = xen_efi_probe(); + + if (efi_systab_xen == NULL) + return; + + strncpy((char *)&boot_params.efi_info.efi_loader_signature, "Xen", + sizeof(boot_params.efi_info.efi_loader_signature)); + boot_params.efi_info.efi_systab = (__u32)__pa(efi_systab_xen); + boot_params.efi_info.efi_systab_hi = (__u32)(__pa(efi_systab_xen) >> 32); + + set_bit(EFI_BOOT, &efi.flags); + set_bit(EFI_PARAVIRT, &efi.flags); + set_bit(EFI_64BIT, &efi.flags); +} diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index ffb101e45731..94813515fdd6 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1718,6 +1718,8 @@ asmlinkage __visible void __init xen_start_kernel(void) xen_setup_runstate_info(0); + xen_efi_init(); + /* Start the world */ #ifdef CONFIG_X86_32 i386_start_kernel(); diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c index c98583588580..ebfa9b2c871d 100644 --- a/arch/x86/xen/grant-table.c +++ b/arch/x86/xen/grant-table.c @@ -36,99 +36,133 @@ #include <linux/sched.h> #include <linux/mm.h> +#include <linux/slab.h> #include <linux/vmalloc.h> #include <xen/interface/xen.h> #include <xen/page.h> #include <xen/grant_table.h> +#include <xen/xen.h> #include <asm/pgtable.h> -static int map_pte_fn(pte_t *pte, struct page *pmd_page, - unsigned long addr, void *data) +static struct gnttab_vm_area { + struct vm_struct *area; + pte_t **ptes; +} gnttab_shared_vm_area, gnttab_status_vm_area; + +int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes, + unsigned long max_nr_gframes, + void **__shared) { - unsigned long **frames = (unsigned long **)data; + void *shared = *__shared; + unsigned long addr; + unsigned long i; - set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL)); - (*frames)++; - return 0; -} + if (shared == NULL) + *__shared = shared = gnttab_shared_vm_area.area->addr; -/* - * This function is used to map shared frames to store grant status. It is - * different from map_pte_fn above, the frames type here is uint64_t. - */ -static int map_pte_fn_status(pte_t *pte, struct page *pmd_page, - unsigned long addr, void *data) -{ - uint64_t **frames = (uint64_t **)data; + addr = (unsigned long)shared; + + for (i = 0; i < nr_gframes; i++) { + set_pte_at(&init_mm, addr, gnttab_shared_vm_area.ptes[i], + mfn_pte(frames[i], PAGE_KERNEL)); + addr += PAGE_SIZE; + } - set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL)); - (*frames)++; return 0; } -static int unmap_pte_fn(pte_t *pte, struct page *pmd_page, - unsigned long addr, void *data) +int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes, + unsigned long max_nr_gframes, + grant_status_t **__shared) { + grant_status_t *shared = *__shared; + unsigned long addr; + unsigned long i; + + if (shared == NULL) + *__shared = shared = gnttab_status_vm_area.area->addr; + + addr = (unsigned long)shared; + + for (i = 0; i < nr_gframes; i++) { + set_pte_at(&init_mm, addr, gnttab_status_vm_area.ptes[i], + mfn_pte(frames[i], PAGE_KERNEL)); + addr += PAGE_SIZE; + } - set_pte_at(&init_mm, addr, pte, __pte(0)); return 0; } -int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes, - unsigned long max_nr_gframes, - void **__shared) +void arch_gnttab_unmap(void *shared, unsigned long nr_gframes) { - int rc; - void *shared = *__shared; + pte_t **ptes; + unsigned long addr; + unsigned long i; - if (shared == NULL) { - struct vm_struct *area = - alloc_vm_area(PAGE_SIZE * max_nr_gframes, NULL); - BUG_ON(area == NULL); - shared = area->addr; - *__shared = shared; - } + if (shared == gnttab_status_vm_area.area->addr) + ptes = gnttab_status_vm_area.ptes; + else + ptes = gnttab_shared_vm_area.ptes; - rc = apply_to_page_range(&init_mm, (unsigned long)shared, - PAGE_SIZE * nr_gframes, - map_pte_fn, &frames); - return rc; + addr = (unsigned long)shared; + + for (i = 0; i < nr_gframes; i++) { + set_pte_at(&init_mm, addr, ptes[i], __pte(0)); + addr += PAGE_SIZE; + } } -int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes, - unsigned long max_nr_gframes, - grant_status_t **__shared) +static int arch_gnttab_valloc(struct gnttab_vm_area *area, unsigned nr_frames) { - int rc; - grant_status_t *shared = *__shared; + area->ptes = kmalloc(sizeof(pte_t *) * nr_frames, GFP_KERNEL); + if (area->ptes == NULL) + return -ENOMEM; - if (shared == NULL) { - /* No need to pass in PTE as we are going to do it - * in apply_to_page_range anyhow. */ - struct vm_struct *area = - alloc_vm_area(PAGE_SIZE * max_nr_gframes, NULL); - BUG_ON(area == NULL); - shared = area->addr; - *__shared = shared; + area->area = alloc_vm_area(PAGE_SIZE * nr_frames, area->ptes); + if (area->area == NULL) { + kfree(area->ptes); + return -ENOMEM; } - rc = apply_to_page_range(&init_mm, (unsigned long)shared, - PAGE_SIZE * nr_gframes, - map_pte_fn_status, &frames); - return rc; + return 0; } -void arch_gnttab_unmap(void *shared, unsigned long nr_gframes) +static void arch_gnttab_vfree(struct gnttab_vm_area *area) +{ + free_vm_area(area->area); + kfree(area->ptes); +} + +int arch_gnttab_init(unsigned long nr_shared, unsigned long nr_status) { - apply_to_page_range(&init_mm, (unsigned long)shared, - PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL); + int ret; + + if (!xen_pv_domain()) + return 0; + + ret = arch_gnttab_valloc(&gnttab_shared_vm_area, nr_shared); + if (ret < 0) + return ret; + + /* + * Always allocate the space for the status frames in case + * we're migrated to a host with V2 support. + */ + ret = arch_gnttab_valloc(&gnttab_status_vm_area, nr_status); + if (ret < 0) + goto err; + + return 0; + err: + arch_gnttab_vfree(&gnttab_shared_vm_area); + return -ENOMEM; } + #ifdef CONFIG_XEN_PVH #include <xen/balloon.h> #include <xen/events.h> -#include <xen/xen.h> #include <linux/slab.h> static int __init xlated_setup_gnttab_pages(void) { diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 97d87659f779..28c7e0be56e4 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -105,6 +105,14 @@ static inline void __init xen_init_apic(void) } #endif +#ifdef CONFIG_XEN_EFI +extern void xen_efi_init(void); +#else +static inline void __init xen_efi_init(void) +{ +} +#endif + /* Declare an asm function, along with symbols needed to make it inlineable */ #define DECL_ASM(ret, name, ...) \ diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h index abb59708a3b7..b61bdf0eea25 100644 --- a/arch/xtensa/include/asm/processor.h +++ b/arch/xtensa/include/asm/processor.h @@ -182,6 +182,7 @@ extern unsigned long get_wchan(struct task_struct *p); #define KSTK_ESP(tsk) (task_pt_regs(tsk)->areg[1]) #define cpu_relax() barrier() +#define cpu_relax_lowlatency() cpu_relax() /* Special register access. */ diff --git a/arch/xtensa/kernel/vectors.S b/arch/xtensa/kernel/vectors.S index f9e1ec346e35..8453e6e39895 100644 --- a/arch/xtensa/kernel/vectors.S +++ b/arch/xtensa/kernel/vectors.S @@ -376,38 +376,42 @@ _DoubleExceptionVector_WindowOverflow: beqz a2, 1f # if at start of vector, don't restore addi a0, a0, -128 - bbsi a0, 8, 1f # don't restore except for overflow 8 and 12 - bbsi a0, 7, 2f + bbsi.l a0, 8, 1f # don't restore except for overflow 8 and 12 + + /* + * This fixup handler is for the extremely unlikely case where the + * overflow handler's reference thru a0 gets a hardware TLB refill + * that bumps out the (distinct, aliasing) TLB entry that mapped its + * prior references thru a9/a13, and where our reference now thru + * a9/a13 gets a 2nd-level miss exception (not hardware TLB refill). + */ + movi a2, window_overflow_restore_a0_fixup + s32i a2, a3, EXC_TABLE_FIXUP + l32i a2, a3, EXC_TABLE_DOUBLE_SAVE + xsr a3, excsave1 + + bbsi.l a0, 7, 2f /* * Restore a0 as saved by _WindowOverflow8(). - * - * FIXME: we really need a fixup handler for this L32E, - * for the extremely unlikely case where the overflow handler's - * reference thru a0 gets a hardware TLB refill that bumps out - * the (distinct, aliasing) TLB entry that mapped its prior - * references thru a9, and where our reference now thru a9 - * gets a 2nd-level miss exception (not hardware TLB refill). */ - l32e a2, a9, -16 - wsr a2, depc # replace the saved a0 - j 1f + l32e a0, a9, -16 + wsr a0, depc # replace the saved a0 + j 3f 2: /* * Restore a0 as saved by _WindowOverflow12(). - * - * FIXME: we really need a fixup handler for this L32E, - * for the extremely unlikely case where the overflow handler's - * reference thru a0 gets a hardware TLB refill that bumps out - * the (distinct, aliasing) TLB entry that mapped its prior - * references thru a13, and where our reference now thru a13 - * gets a 2nd-level miss exception (not hardware TLB refill). */ - l32e a2, a13, -16 - wsr a2, depc # replace the saved a0 + l32e a0, a13, -16 + wsr a0, depc # replace the saved a0 +3: + xsr a3, excsave1 + movi a0, 0 + s32i a0, a3, EXC_TABLE_FIXUP + s32i a2, a3, EXC_TABLE_DOUBLE_SAVE 1: /* * Restore WindowBase while leaving all address registers restored. @@ -449,6 +453,7 @@ _DoubleExceptionVector_WindowOverflow: s32i a0, a2, PT_DEPC +_DoubleExceptionVector_handle_exception: addx4 a0, a0, a3 l32i a0, a0, EXC_TABLE_FAST_USER xsr a3, excsave1 @@ -464,11 +469,120 @@ _DoubleExceptionVector_WindowOverflow: rotw -3 j 1b - .end literal_prefix ENDPROC(_DoubleExceptionVector) /* + * Fixup handler for TLB miss in double exception handler for window owerflow. + * We get here with windowbase set to the window that was being spilled and + * a0 trashed. a0 bit 7 determines if this is a call8 (bit clear) or call12 + * (bit set) window. + * + * We do the following here: + * - go to the original window retaining a0 value; + * - set up exception stack to return back to appropriate a0 restore code + * (we'll need to rotate window back and there's no place to save this + * information, use different return address for that); + * - handle the exception; + * - go to the window that was being spilled; + * - set up window_overflow_restore_a0_fixup as a fixup routine; + * - reload a0; + * - restore the original window; + * - reset the default fixup routine; + * - return to user. By the time we get to this fixup handler all information + * about the conditions of the original double exception that happened in + * the window overflow handler is lost, so we just return to userspace to + * retry overflow from start. + * + * a0: value of depc, original value in depc + * a2: trashed, original value in EXC_TABLE_DOUBLE_SAVE + * a3: exctable, original value in excsave1 + */ + +ENTRY(window_overflow_restore_a0_fixup) + + rsr a0, ps + extui a0, a0, PS_OWB_SHIFT, PS_OWB_WIDTH + rsr a2, windowbase + sub a0, a2, a0 + extui a0, a0, 0, 3 + l32i a2, a3, EXC_TABLE_DOUBLE_SAVE + xsr a3, excsave1 + + _beqi a0, 1, .Lhandle_1 + _beqi a0, 3, .Lhandle_3 + + .macro overflow_fixup_handle_exception_pane n + + rsr a0, depc + rotw -\n + + xsr a3, excsave1 + wsr a2, depc + l32i a2, a3, EXC_TABLE_KSTK + s32i a0, a2, PT_AREG0 + + movi a0, .Lrestore_\n + s32i a0, a2, PT_DEPC + rsr a0, exccause + j _DoubleExceptionVector_handle_exception + + .endm + + overflow_fixup_handle_exception_pane 2 +.Lhandle_1: + overflow_fixup_handle_exception_pane 1 +.Lhandle_3: + overflow_fixup_handle_exception_pane 3 + + .macro overflow_fixup_restore_a0_pane n + + rotw \n + /* Need to preserve a0 value here to be able to handle exception + * that may occur on a0 reload from stack. It may occur because + * TLB miss handler may not be atomic and pointer to page table + * may be lost before we get here. There are no free registers, + * so we need to use EXC_TABLE_DOUBLE_SAVE area. + */ + xsr a3, excsave1 + s32i a2, a3, EXC_TABLE_DOUBLE_SAVE + movi a2, window_overflow_restore_a0_fixup + s32i a2, a3, EXC_TABLE_FIXUP + l32i a2, a3, EXC_TABLE_DOUBLE_SAVE + xsr a3, excsave1 + bbsi.l a0, 7, 1f + l32e a0, a9, -16 + j 2f +1: + l32e a0, a13, -16 +2: + rotw -\n + + .endm + +.Lrestore_2: + overflow_fixup_restore_a0_pane 2 + +.Lset_default_fixup: + xsr a3, excsave1 + s32i a2, a3, EXC_TABLE_DOUBLE_SAVE + movi a2, 0 + s32i a2, a3, EXC_TABLE_FIXUP + l32i a2, a3, EXC_TABLE_DOUBLE_SAVE + xsr a3, excsave1 + rfe + +.Lrestore_1: + overflow_fixup_restore_a0_pane 1 + j .Lset_default_fixup +.Lrestore_3: + overflow_fixup_restore_a0_pane 3 + j .Lset_default_fixup + +ENDPROC(window_overflow_restore_a0_fixup) + + .end literal_prefix +/* * Debug interrupt vector * * There is not much space here, so simply jump to another handler. diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S index ee32c0085dff..d16db6df86f8 100644 --- a/arch/xtensa/kernel/vmlinux.lds.S +++ b/arch/xtensa/kernel/vmlinux.lds.S @@ -269,13 +269,13 @@ SECTIONS .UserExceptionVector.literal) SECTION_VECTOR (_DoubleExceptionVector_literal, .DoubleExceptionVector.literal, - DOUBLEEXC_VECTOR_VADDR - 16, + DOUBLEEXC_VECTOR_VADDR - 40, SIZEOF(.UserExceptionVector.text), .UserExceptionVector.text) SECTION_VECTOR (_DoubleExceptionVector_text, .DoubleExceptionVector.text, DOUBLEEXC_VECTOR_VADDR, - 32, + 40, .DoubleExceptionVector.literal) . = (LOADADDR( .DoubleExceptionVector.text ) + SIZEOF( .DoubleExceptionVector.text ) + 3) & ~ 3; diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index 4224256bb215..77ed20209ca5 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -191,7 +191,7 @@ int __init mem_reserve(unsigned long start, unsigned long end, int must_exist) return -EINVAL; } - if (it && start - it->start < bank_sz) { + if (it && start - it->start <= bank_sz) { if (start == it->start) { if (end - it->start < bank_sz) { it->start = end; |