diff options
Diffstat (limited to 'arch')
824 files changed, 16395 insertions, 14937 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 4e949e58b192..ba1b626bca00 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -511,6 +511,74 @@ config ARCH_HAS_ELF_RANDOMIZE - arch_mmap_rnd() - arch_randomize_brk() +config HAVE_ARCH_MMAP_RND_BITS + bool + help + An arch should select this symbol if it supports setting a variable + number of bits for use in establishing the base address for mmap + allocations, has MMU enabled and provides values for both: + - ARCH_MMAP_RND_BITS_MIN + - ARCH_MMAP_RND_BITS_MAX + +config ARCH_MMAP_RND_BITS_MIN + int + +config ARCH_MMAP_RND_BITS_MAX + int + +config ARCH_MMAP_RND_BITS_DEFAULT + int + +config ARCH_MMAP_RND_BITS + int "Number of bits to use for ASLR of mmap base address" if EXPERT + range ARCH_MMAP_RND_BITS_MIN ARCH_MMAP_RND_BITS_MAX + default ARCH_MMAP_RND_BITS_DEFAULT if ARCH_MMAP_RND_BITS_DEFAULT + default ARCH_MMAP_RND_BITS_MIN + depends on HAVE_ARCH_MMAP_RND_BITS + help + This value can be used to select the number of bits to use to + determine the random offset to the base address of vma regions + resulting from mmap allocations. This value will be bounded + by the architecture's minimum and maximum supported values. + + This value can be changed after boot using the + /proc/sys/vm/mmap_rnd_bits tunable + +config HAVE_ARCH_MMAP_RND_COMPAT_BITS + bool + help + An arch should select this symbol if it supports running applications + in compatibility mode, supports setting a variable number of bits for + use in establishing the base address for mmap allocations, has MMU + enabled and provides values for both: + - ARCH_MMAP_RND_COMPAT_BITS_MIN + - ARCH_MMAP_RND_COMPAT_BITS_MAX + +config ARCH_MMAP_RND_COMPAT_BITS_MIN + int + +config ARCH_MMAP_RND_COMPAT_BITS_MAX + int + +config ARCH_MMAP_RND_COMPAT_BITS_DEFAULT + int + +config ARCH_MMAP_RND_COMPAT_BITS + int "Number of bits to use for ASLR of mmap base address for compatible applications" if EXPERT + range ARCH_MMAP_RND_COMPAT_BITS_MIN ARCH_MMAP_RND_COMPAT_BITS_MAX + default ARCH_MMAP_RND_COMPAT_BITS_DEFAULT if ARCH_MMAP_RND_COMPAT_BITS_DEFAULT + default ARCH_MMAP_RND_COMPAT_BITS_MIN + depends on HAVE_ARCH_MMAP_RND_COMPAT_BITS + help + This value can be used to select the number of bits to use to + determine the random offset to the base address of vma regions + resulting from mmap allocations for compatible applications This + value will be bounded by the architecture's minimum and maximum + supported values. + + This value can be changed after boot using the + /proc/sys/vm/mmap_rnd_compat_bits tunable + config HAVE_COPY_THREAD_TLS bool help diff --git a/arch/alpha/include/uapi/asm/mman.h b/arch/alpha/include/uapi/asm/mman.h index f2f949671798..ab336c06153e 100644 --- a/arch/alpha/include/uapi/asm/mman.h +++ b/arch/alpha/include/uapi/asm/mman.h @@ -47,8 +47,10 @@ #define MADV_WILLNEED 3 /* will need these pages */ #define MADV_SPACEAVAIL 5 /* ensure resources are available */ #define MADV_DONTNEED 6 /* don't need these pages */ +#define MADV_FREE 7 /* free pages only if memory pressure */ /* common/generic parameters */ +#define MADV_FREE 8 /* free pages only if memory pressure */ #define MADV_REMOVE 9 /* remove these pages & resources */ #define MADV_DONTFORK 10 /* don't inherit across fork */ #define MADV_DOFORK 11 /* do inherit across fork */ diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index 9a20821b111c..c5fb9e6bc3a5 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -92,4 +92,7 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/alpha/kernel/module.c b/arch/alpha/kernel/module.c index 2fd00b7077e4..936bc8f89a67 100644 --- a/arch/alpha/kernel/module.c +++ b/arch/alpha/kernel/module.c @@ -160,7 +160,7 @@ apply_relocate_add(Elf64_Shdr *sechdrs, const char *strtab, /* The small sections were sorted to the end of the segment. The following should definitely cover them. */ - gp = (u64)me->module_core + me->core_size - 0x8000; + gp = (u64)me->core_layout.base + me->core_layout.size - 0x8000; got = sechdrs[me->arch.gotsecindex].sh_addr; for (i = 0; i < n; i++) { diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 6312f607932f..76dde9db7934 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -73,9 +73,6 @@ config STACKTRACE_SUPPORT def_bool y select STACKTRACE -config HAVE_LATENCYTOP_SUPPORT - def_bool y - config HAVE_ARCH_TRANSPARENT_HUGEPAGE def_bool y depends on ARC_MMU_V4 diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c index 5eb707640e9c..0587bf121d11 100644 --- a/arch/arc/kernel/unwind.c +++ b/arch/arc/kernel/unwind.c @@ -385,8 +385,8 @@ void *unwind_add_table(struct module *module, const void *table_start, return NULL; init_unwind_table(table, module->name, - module->module_core, module->core_size, - module->module_init, module->init_size, + module->core_layout.base, module->core_layout.size, + module->init_layout.base, module->init_layout.size, table_start, table_size, NULL, 0); diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index ff7ff6cbb811..b65f797e9ad6 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -617,7 +617,7 @@ void flush_dcache_page(struct page *page) */ if (!mapping_mapped(mapping)) { clear_bit(PG_dc_clean, &page->flags); - } else if (page_mapped(page)) { + } else if (page_mapcount(page)) { /* kernel reading from page with U-mapping */ phys_addr_t paddr = (unsigned long)page_address(page); @@ -857,7 +857,7 @@ void copy_user_highpage(struct page *to, struct page *from, * For !VIPT cache, all of this gets compiled out as * addr_not_cache_congruent() is 0 */ - if (page_mapped(from) && addr_not_cache_congruent(kfrom, u_vaddr)) { + if (page_mapcount(from) && addr_not_cache_congruent(kfrom, u_vaddr)) { __flush_dcache_page((unsigned long)kfrom, u_vaddr); clean_src_k_mappings = 1; } diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 34e1569a11ee..6a889afa6a2c 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -2,6 +2,7 @@ config ARM bool default y select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE + select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAVE_CUSTOM_GPIO_H @@ -20,6 +21,7 @@ config ARM select GENERIC_ALLOCATOR select GENERIC_ATOMIC64 if (CPU_V7M || CPU_V6 || !CPU_32v6K || !AEABI) select GENERIC_CLOCKEVENTS_BROADCAST if SMP + select GENERIC_EARLY_IOREMAP select GENERIC_IDLE_POLL_SETUP select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW @@ -33,10 +35,12 @@ config ARM select HARDIRQS_SW_RESEND select HAVE_ARCH_AUDITSYSCALL if (AEABI && !OABI_COMPAT) select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6 - select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 - select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 + select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU + select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU + select HAVE_ARCH_MMAP_RND_BITS if MMU select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT) select HAVE_ARCH_TRACEHOOK + select HAVE_ARM_SMCCC if CPU_V7 select HAVE_BPF_JIT select HAVE_CC_STACKPROTECTOR select HAVE_CONTEXT_TRACKING @@ -45,7 +49,7 @@ config ARM select HAVE_DMA_API_DEBUG select HAVE_DMA_ATTRS select HAVE_DMA_CONTIGUOUS if MMU - select HAVE_DYNAMIC_FTRACE if (!XIP_KERNEL) && !CPU_ENDIAN_BE32 + select HAVE_DYNAMIC_FTRACE if (!XIP_KERNEL) && !CPU_ENDIAN_BE32 && MMU select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU select HAVE_FTRACE_MCOUNT_RECORD if (!XIP_KERNEL) select HAVE_FUNCTION_GRAPH_TRACER if (!THUMB2_KERNEL) @@ -164,11 +168,6 @@ config STACKTRACE_SUPPORT bool default y -config HAVE_LATENCYTOP_SUPPORT - bool - depends on !SMP - default y - config LOCKDEP_SUPPORT bool default y @@ -308,6 +307,14 @@ config MMU Select if you want MMU-based virtualised addressing space support by paged memory management. If unsure, say 'Y'. +config ARCH_MMAP_RND_BITS_MIN + default 8 + +config ARCH_MMAP_RND_BITS_MAX + default 14 if PAGE_OFFSET=0x40000000 + default 15 if PAGE_OFFSET=0x80000000 + default 16 + # # The "ARM system type" choice list is ordered alphabetically by option # text. Please add new entries in the option alphabetic order. @@ -611,6 +618,7 @@ config ARCH_PXA select AUTO_ZRELADDR select COMMON_CLK select CLKDEV_LOOKUP + select CLKSRC_PXA select CLKSRC_MMIO select CLKSRC_OF select GENERIC_CLOCKEVENTS @@ -650,6 +658,8 @@ config ARCH_SA1100 select ARCH_SPARSEMEM_ENABLE select CLKDEV_LOOKUP select CLKSRC_MMIO + select CLKSRC_PXA + select CLKSRC_OF if OF select CPU_FREQ select CPU_SA1100 select GENERIC_CLOCKEVENTS @@ -799,6 +809,7 @@ config ARCH_VIRT bool "Dummy Virtual Machine" if ARCH_MULTI_V7 select ARM_AMBA select ARM_GIC + select ARM_GIC_V2M if PCI_MSI select ARM_GIC_V3 select ARM_PSCI select HAVE_ARM_ARCH_TIMER @@ -1422,7 +1433,7 @@ config BIG_LITTLE config BL_SWITCHER bool "big.LITTLE switcher support" - depends on BIG_LITTLE && MCPM && HOTPLUG_CPU + depends on BIG_LITTLE && MCPM && HOTPLUG_CPU && ARM_GIC select ARM_CPU_SUSPEND select CPU_PM help @@ -1481,7 +1492,7 @@ config HOTPLUG_CPU config ARM_PSCI bool "Support for the ARM Power State Coordination Interface (PSCI)" - depends on CPU_V7 + depends on HAVE_ARM_SMCCC select ARM_PSCI_FW help Say Y here if you want Linux to communicate with system firmware @@ -1604,6 +1615,24 @@ config THUMB2_AVOID_R_ARM_THM_JUMP11 config ARM_ASM_UNIFIED bool +config ARM_PATCH_IDIV + bool "Runtime patch udiv/sdiv instructions into __aeabi_{u}idiv()" + depends on CPU_32v7 && !XIP_KERNEL + default y + help + The ARM compiler inserts calls to __aeabi_idiv() and + __aeabi_uidiv() when it needs to perform division on signed + and unsigned integers. Some v7 CPUs have support for the sdiv + and udiv instructions that can be used to implement those + functions. + + Enabling this option allows the kernel to modify itself to + replace the first two instructions of these library functions + with the sdiv or udiv plus "bx lr" instructions when the CPU + it is running on supports them. Typically this will be faster + and less power intensive than running the original library + code to do integer division. + config AEABI bool "Use the ARM EABI to compile the kernel" help @@ -1800,6 +1829,25 @@ config SWIOTLB config IOMMU_HELPER def_bool SWIOTLB +config PARAVIRT + bool "Enable paravirtualization code" + help + This changes the kernel so it can modify itself when it is run + under a hypervisor, potentially improving performance significantly + over full virtualization. + +config PARAVIRT_TIME_ACCOUNTING + bool "Paravirtual steal time accounting" + select PARAVIRT + default n + help + Select this option to enable fine granularity task steal time + accounting. Time spent executing other tasks in parallel with + the current vCPU is discounted from the vCPU power. To account for + that, there can be a small performance impact. + + If in doubt, say N here. + config XEN_DOM0 def_bool y depends on XEN @@ -1813,6 +1861,7 @@ config XEN select ARCH_DMA_ADDR_T_64BIT select ARM_PSCI select SWIOTLB_XEN + select PARAVIRT help Say Y if you want to run Linux in a Virtual Machine on Xen on ARM. @@ -2040,6 +2089,25 @@ config AUTO_ZRELADDR 0xf8000000. This assumes the zImage being placed in the first 128MB from start of memory. +config EFI_STUB + bool + +config EFI + bool "UEFI runtime support" + depends on OF && !CPU_BIG_ENDIAN && MMU && AUTO_ZRELADDR && !XIP_KERNEL + select UCS2_STRING + select EFI_PARAMS_FROM_FDT + select EFI_STUB + select EFI_ARMSTUB + select EFI_RUNTIME_WRAPPERS + ---help--- + This option provides support for runtime services provided + by UEFI firmware (such as non-volatile variables, realtime + clock, and platform reset). A UEFI stub is also provided to + allow the kernel to be booted as an EFI application. This + is only useful for kernels that may run on systems that have + UEFI firmware. + endmenu menu "CPU Power Management" diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index 259c0ca9c99a..e356357d86bb 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -15,20 +15,6 @@ config ARM_PTDUMP kernel. If in doubt, say "N" -config STRICT_DEVMEM - bool "Filter access to /dev/mem" - depends on MMU - ---help--- - If this option is disabled, you allow userspace (root) access to all - of memory, including kernel and userspace memory. Accidental - access to this is obviously disastrous, but specific access can - be used by people debugging the kernel. - - If this option is switched on, the /dev/mem file only allows - userspace access to memory mapped peripherals. - - If in doubt, say Y. - # RMK wants arm kernels compiled with frame pointers or stack unwinding. # If you know what you are doing and are willing to live without stack # traces, you can get a slightly smaller kernel by setting this option to diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 3f9a9ebc77c3..4c23a68a0917 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -167,9 +167,11 @@ if [ $(words $(ZRELADDR)) -gt 1 -a "$(CONFIG_AUTO_ZRELADDR)" = "" ]; then \ false; \ fi +efi-obj-$(CONFIG_EFI_STUB) := $(objtree)/drivers/firmware/efi/libstub/lib.a + $(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/$(HEAD) $(obj)/piggy.$(suffix_y).o \ $(addprefix $(obj)/, $(OBJS)) $(lib1funcs) $(ashldi3) \ - $(bswapsdi2) FORCE + $(bswapsdi2) $(efi-obj-y) FORCE @$(check_for_multiple_zreladdr) $(call if_changed,ld) @$(check_for_bad_syms) diff --git a/arch/arm/boot/compressed/efi-header.S b/arch/arm/boot/compressed/efi-header.S new file mode 100644 index 000000000000..9d5dc4fda3c1 --- /dev/null +++ b/arch/arm/boot/compressed/efi-header.S @@ -0,0 +1,130 @@ +/* + * Copyright (C) 2013-2015 Linaro Ltd + * Authors: Roy Franz <roy.franz@linaro.org> + * Ard Biesheuvel <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + + .macro __nop +#ifdef CONFIG_EFI_STUB + @ This is almost but not quite a NOP, since it does clobber the + @ condition flags. But it is the best we can do for EFI, since + @ PE/COFF expects the magic string "MZ" at offset 0, while the + @ ARM/Linux boot protocol expects an executable instruction + @ there. + .inst 'M' | ('Z' << 8) | (0x1310 << 16) @ tstne r0, #0x4d000 +#else + mov r0, r0 +#endif + .endm + + .macro __EFI_HEADER +#ifdef CONFIG_EFI_STUB + b __efi_start + + .set start_offset, __efi_start - start + .org start + 0x3c + @ + @ The PE header can be anywhere in the file, but for + @ simplicity we keep it together with the MSDOS header + @ The offset to the PE/COFF header needs to be at offset + @ 0x3C in the MSDOS header. + @ The only 2 fields of the MSDOS header that are used are this + @ PE/COFF offset, and the "MZ" bytes at offset 0x0. + @ + .long pe_header - start @ Offset to the PE header. + +pe_header: + .ascii "PE\0\0" + +coff_header: + .short 0x01c2 @ ARM or Thumb + .short 2 @ nr_sections + .long 0 @ TimeDateStamp + .long 0 @ PointerToSymbolTable + .long 1 @ NumberOfSymbols + .short section_table - optional_header + @ SizeOfOptionalHeader + .short 0x306 @ Characteristics. + @ IMAGE_FILE_32BIT_MACHINE | + @ IMAGE_FILE_DEBUG_STRIPPED | + @ IMAGE_FILE_EXECUTABLE_IMAGE | + @ IMAGE_FILE_LINE_NUMS_STRIPPED + +optional_header: + .short 0x10b @ PE32 format + .byte 0x02 @ MajorLinkerVersion + .byte 0x14 @ MinorLinkerVersion + .long _end - __efi_start @ SizeOfCode + .long 0 @ SizeOfInitializedData + .long 0 @ SizeOfUninitializedData + .long efi_stub_entry - start @ AddressOfEntryPoint + .long start_offset @ BaseOfCode + .long 0 @ data + +extra_header_fields: + .long 0 @ ImageBase + .long 0x200 @ SectionAlignment + .long 0x200 @ FileAlignment + .short 0 @ MajorOperatingSystemVersion + .short 0 @ MinorOperatingSystemVersion + .short 0 @ MajorImageVersion + .short 0 @ MinorImageVersion + .short 0 @ MajorSubsystemVersion + .short 0 @ MinorSubsystemVersion + .long 0 @ Win32VersionValue + + .long _end - start @ SizeOfImage + .long start_offset @ SizeOfHeaders + .long 0 @ CheckSum + .short 0xa @ Subsystem (EFI application) + .short 0 @ DllCharacteristics + .long 0 @ SizeOfStackReserve + .long 0 @ SizeOfStackCommit + .long 0 @ SizeOfHeapReserve + .long 0 @ SizeOfHeapCommit + .long 0 @ LoaderFlags + .long 0x6 @ NumberOfRvaAndSizes + + .quad 0 @ ExportTable + .quad 0 @ ImportTable + .quad 0 @ ResourceTable + .quad 0 @ ExceptionTable + .quad 0 @ CertificationTable + .quad 0 @ BaseRelocationTable + +section_table: + @ + @ The EFI application loader requires a relocation section + @ because EFI applications must be relocatable. This is a + @ dummy section as far as we are concerned. + @ + .ascii ".reloc\0\0" + .long 0 @ VirtualSize + .long 0 @ VirtualAddress + .long 0 @ SizeOfRawData + .long 0 @ PointerToRawData + .long 0 @ PointerToRelocations + .long 0 @ PointerToLineNumbers + .short 0 @ NumberOfRelocations + .short 0 @ NumberOfLineNumbers + .long 0x42100040 @ Characteristics + + .ascii ".text\0\0\0" + .long _end - __efi_start @ VirtualSize + .long __efi_start @ VirtualAddress + .long _edata - __efi_start @ SizeOfRawData + .long __efi_start @ PointerToRawData + .long 0 @ PointerToRelocations + .long 0 @ PointerToLineNumbers + .short 0 @ NumberOfRelocations + .short 0 @ NumberOfLineNumbers + .long 0xe0500020 @ Characteristics + + .align 9 +__efi_start: +#endif + .endm diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 06e983f59980..af11c2f8f3b7 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -12,6 +12,8 @@ #include <asm/assembler.h> #include <asm/v7m.h> +#include "efi-header.S" + AR_CLASS( .arch armv7-a ) M_CLASS( .arch armv7-m ) @@ -126,7 +128,7 @@ start: .type start,#function .rept 7 - mov r0, r0 + __nop .endr ARM( mov r0, r0 ) ARM( b 1f ) @@ -139,7 +141,8 @@ start: .word 0x04030201 @ endianness flag THUMB( .thumb ) -1: +1: __EFI_HEADER + ARM_BE8( setend be ) @ go BE8 if compiled for BE8 AR_CLASS( mrs r9, cpsr ) #ifdef CONFIG_ARM_VIRT_EXT @@ -1353,6 +1356,53 @@ __enter_kernel: reloc_code_end: +#ifdef CONFIG_EFI_STUB + .align 2 +_start: .long start - . + +ENTRY(efi_stub_entry) + @ allocate space on stack for passing current zImage address + @ and for the EFI stub to return of new entry point of + @ zImage, as EFI stub may copy the kernel. Pointer address + @ is passed in r2. r0 and r1 are passed through from the + @ EFI firmware to efi_entry + adr ip, _start + ldr r3, [ip] + add r3, r3, ip + stmfd sp!, {r3, lr} + mov r2, sp @ pass zImage address in r2 + bl efi_entry + + @ Check for error return from EFI stub. r0 has FDT address + @ or error code. + cmn r0, #1 + beq efi_load_fail + + @ Preserve return value of efi_entry() in r4 + mov r4, r0 + bl cache_clean_flush + bl cache_off + + @ Set parameters for booting zImage according to boot protocol + @ put FDT address in r2, it was returned by efi_entry() + @ r1 is the machine type, and r0 needs to be 0 + mov r0, #0 + mov r1, #0xFFFFFFFF + mov r2, r4 + + @ Branch to (possibly) relocated zImage that is in [sp] + ldr lr, [sp] + ldr ip, =start_offset + add lr, lr, ip + mov pc, lr @ no mode switch + +efi_load_fail: + @ Return EFI_LOAD_ERROR to EFI firmware on error. + ldr r0, =0x80000001 + ldmfd sp!, {ip, pc} +ENDPROC(efi_stub_entry) +#endif + .align .section ".stack", "aw", %nobits .L_user_stack: .space 4096 diff --git a/arch/arm/boot/compressed/vmlinux.lds.S b/arch/arm/boot/compressed/vmlinux.lds.S index 2b60b843ac5e..81c493156ce8 100644 --- a/arch/arm/boot/compressed/vmlinux.lds.S +++ b/arch/arm/boot/compressed/vmlinux.lds.S @@ -48,6 +48,13 @@ SECTIONS *(.rodata) *(.rodata.*) } + .data : { + /* + * The EFI stub always executes from RAM, and runs strictly before the + * decompressor, so we can make an exception for its r/w data, and keep it + */ + *(.data.efistub) + } .piggydata : { *(.piggydata) } diff --git a/arch/arm/boot/dts/exynos4412.dtsi b/arch/arm/boot/dts/exynos4412.dtsi index 294cfe40388d..40beede46e55 100644 --- a/arch/arm/boot/dts/exynos4412.dtsi +++ b/arch/arm/boot/dts/exynos4412.dtsi @@ -64,73 +64,73 @@ compatible = "operating-points-v2"; opp-shared; - opp00 { + opp@200000000 { opp-hz = /bits/ 64 <200000000>; opp-microvolt = <900000>; clock-latency-ns = <200000>; }; - opp01 { + opp@300000000 { opp-hz = /bits/ 64 <300000000>; opp-microvolt = <900000>; clock-latency-ns = <200000>; }; - opp02 { + opp@400000000 { opp-hz = /bits/ 64 <400000000>; opp-microvolt = <925000>; clock-latency-ns = <200000>; }; - opp03 { + opp@500000000 { opp-hz = /bits/ 64 <500000000>; opp-microvolt = <950000>; clock-latency-ns = <200000>; }; - opp04 { + opp@600000000 { opp-hz = /bits/ 64 <600000000>; opp-microvolt = <975000>; clock-latency-ns = <200000>; }; - opp05 { + opp@700000000 { opp-hz = /bits/ 64 <700000000>; opp-microvolt = <987500>; clock-latency-ns = <200000>; }; - opp06 { + opp@800000000 { opp-hz = /bits/ 64 <800000000>; opp-microvolt = <1000000>; clock-latency-ns = <200000>; opp-suspend; }; - opp07 { + opp@900000000 { opp-hz = /bits/ 64 <900000000>; opp-microvolt = <1037500>; clock-latency-ns = <200000>; }; - opp08 { + opp@1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <1087500>; clock-latency-ns = <200000>; }; - opp09 { + opp@1100000000 { opp-hz = /bits/ 64 <1100000000>; opp-microvolt = <1137500>; clock-latency-ns = <200000>; }; - opp10 { + opp@1200000000 { opp-hz = /bits/ 64 <1200000000>; opp-microvolt = <1187500>; clock-latency-ns = <200000>; }; - opp11 { + opp@1300000000 { opp-hz = /bits/ 64 <1300000000>; opp-microvolt = <1250000>; clock-latency-ns = <200000>; }; - opp12 { + opp@1400000000 { opp-hz = /bits/ 64 <1400000000>; opp-microvolt = <1287500>; clock-latency-ns = <200000>; }; - opp13 { + opp@1500000000 { opp-hz = /bits/ 64 <1500000000>; opp-microvolt = <1350000>; clock-latency-ns = <200000>; diff --git a/arch/arm/boot/dts/exynos5800-peach-pi.dts b/arch/arm/boot/dts/exynos5800-peach-pi.dts index 49a4f43e5ac2..1cc2e95ffc66 100644 --- a/arch/arm/boot/dts/exynos5800-peach-pi.dts +++ b/arch/arm/boot/dts/exynos5800-peach-pi.dts @@ -122,6 +122,12 @@ compatible = "auo,b133htn01"; power-supply = <&tps65090_fet6>; backlight = <&backlight>; + + port { + panel_in: endpoint { + remote-endpoint = <&dp_out>; + }; + }; }; mmc1_pwrseq: mmc1_pwrseq { @@ -148,7 +154,14 @@ samsung,link-rate = <0x0a>; samsung,lane-count = <2>; samsung,hpd-gpio = <&gpx2 6 GPIO_ACTIVE_HIGH>; - panel = <&panel>; + + ports { + port { + dp_out: endpoint { + remote-endpoint = <&panel_in>; + }; + }; + }; }; &fimd { diff --git a/arch/arm/boot/dts/stihxxx-b2120.dtsi b/arch/arm/boot/dts/stihxxx-b2120.dtsi index ad21a4293a33..133375bc8aa5 100644 --- a/arch/arm/boot/dts/stihxxx-b2120.dtsi +++ b/arch/arm/boot/dts/stihxxx-b2120.dtsi @@ -6,6 +6,9 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#include <dt-bindings/clock/stih407-clks.h> +#include <dt-bindings/gpio/gpio.h> +#include <dt-bindings/media/c8sectpfe.h> / { soc { sbc_serial0: serial@9530000 { @@ -35,12 +38,18 @@ status = "okay"; }; - i2c@9842000 { + ssc2: i2c@9842000 { status = "okay"; + clock-frequency = <100000>; + st,i2c-min-scl-pulse-width-us = <0>; + st,i2c-min-sda-pulse-width-us = <5>; }; - i2c@9843000 { + ssc3: i2c@9843000 { status = "okay"; + clock-frequency = <100000>; + st,i2c-min-scl-pulse-width-us = <0>; + st,i2c-min-sda-pulse-width-us = <5>; }; i2c@9844000 { @@ -93,5 +102,38 @@ phy-mode = "rgmii"; fixed-link = <0 1 1000 0 0>; }; + + demux@08a20000 { + compatible = "st,stih407-c8sectpfe"; + status = "okay"; + reg = <0x08a20000 0x10000>, + <0x08a00000 0x4000>; + reg-names = "c8sectpfe", "c8sectpfe-ram"; + interrupts = <GIC_SPI 34 IRQ_TYPE_NONE>, + <GIC_SPI 35 IRQ_TYPE_NONE>; + interrupt-names = "c8sectpfe-error-irq", + "c8sectpfe-idle-irq"; + pinctrl-0 = <&pinctrl_tsin0_serial>; + pinctrl-1 = <&pinctrl_tsin0_parallel>; + pinctrl-2 = <&pinctrl_tsin3_serial>; + pinctrl-3 = <&pinctrl_tsin4_serial_alt3>; + pinctrl-4 = <&pinctrl_tsin5_serial_alt1>; + pinctrl-names = "tsin0-serial", + "tsin0-parallel", + "tsin3-serial", + "tsin4-serial", + "tsin5-serial"; + clocks = <&clk_s_c0_flexgen CLK_PROC_STFE>; + clock-names = "c8sectpfe"; + + /* tsin0 is TSA on NIMA */ + tsin0: port@0 { + tsin-num = <0>; + serial-not-parallel; + i2c-bus = <&ssc2>; + reset-gpios = <&pio15 4 GPIO_ACTIVE_HIGH>; + dvb-card = <STV0367_TDA18212_NIMA_1>; + }; + }; }; }; diff --git a/arch/arm/configs/stm32_defconfig b/arch/arm/configs/stm32_defconfig index 4725fab562cb..ec5250547d14 100644 --- a/arch/arm/configs/stm32_defconfig +++ b/arch/arm/configs/stm32_defconfig @@ -54,6 +54,8 @@ CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_HEARTBEAT=y +CONFIG_DMADEVICES=y +CONFIG_STM32_DMA=y # CONFIG_FILE_LOCKING is not set # CONFIG_DNOTIFY is not set # CONFIG_INOTIFY_USER is not set diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index bd425302c97a..16da6380eb85 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -3,6 +3,7 @@ generic-y += bitsperlong.h generic-y += cputime.h generic-y += current.h +generic-y += early_ioremap.h generic-y += emergency-restart.h generic-y += errno.h generic-y += exec.h diff --git a/arch/arm/include/asm/bug.h b/arch/arm/include/asm/bug.h index e7335a92144e..4e6e88a6b2f4 100644 --- a/arch/arm/include/asm/bug.h +++ b/arch/arm/include/asm/bug.h @@ -5,8 +5,6 @@ #include <linux/types.h> #include <asm/opcodes.h> -#ifdef CONFIG_BUG - /* * Use a suitable undefined instruction to use for ARM/Thumb2 bug handling. * We need to be careful not to conflict with those used by other modules and @@ -47,7 +45,7 @@ do { \ unreachable(); \ } while (0) -#else /* not CONFIG_DEBUG_BUGVERBOSE */ +#else #define __BUG(__file, __line, __value) \ do { \ @@ -57,7 +55,6 @@ do { \ #endif /* CONFIG_DEBUG_BUGVERBOSE */ #define HAVE_ARCH_BUG -#endif /* CONFIG_BUG */ #include <asm-generic/bug.h> diff --git a/arch/arm/include/asm/cpuidle.h b/arch/arm/include/asm/cpuidle.h index 0f8424924902..3848259bebf8 100644 --- a/arch/arm/include/asm/cpuidle.h +++ b/arch/arm/include/asm/cpuidle.h @@ -30,7 +30,7 @@ static inline int arm_cpuidle_simple_enter(struct cpuidle_device *dev, struct device_node; struct cpuidle_ops { - int (*suspend)(int cpu, unsigned long arg); + int (*suspend)(unsigned long arg); int (*init)(struct device_node *, int cpu); }; diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h new file mode 100644 index 000000000000..e0eea72deb87 --- /dev/null +++ b/arch/arm/include/asm/efi.h @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __ASM_ARM_EFI_H +#define __ASM_ARM_EFI_H + +#include <asm/cacheflush.h> +#include <asm/cachetype.h> +#include <asm/early_ioremap.h> +#include <asm/fixmap.h> +#include <asm/highmem.h> +#include <asm/mach/map.h> +#include <asm/mmu_context.h> +#include <asm/pgtable.h> + +#ifdef CONFIG_EFI +void efi_init(void); + +int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); + +#define efi_call_virt(f, ...) \ +({ \ + efi_##f##_t *__f; \ + efi_status_t __s; \ + \ + efi_virtmap_load(); \ + __f = efi.systab->runtime->f; \ + __s = __f(__VA_ARGS__); \ + efi_virtmap_unload(); \ + __s; \ +}) + +#define __efi_call_virt(f, ...) \ +({ \ + efi_##f##_t *__f; \ + \ + efi_virtmap_load(); \ + __f = efi.systab->runtime->f; \ + __f(__VA_ARGS__); \ + efi_virtmap_unload(); \ +}) + +static inline void efi_set_pgd(struct mm_struct *mm) +{ + check_and_switch_context(mm, NULL); +} + +void efi_virtmap_load(void); +void efi_virtmap_unload(void); + +#else +#define efi_init() +#endif /* CONFIG_EFI */ + +/* arch specific definitions used by the stub code */ + +#define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__) + +/* + * A reasonable upper bound for the uncompressed kernel size is 32 MBytes, + * so we will reserve that amount of memory. We have no easy way to tell what + * the actuall size of code + data the uncompressed kernel will use. + * If this is insufficient, the decompressor will relocate itself out of the + * way before performing the decompression. + */ +#define MAX_UNCOMP_KERNEL_SIZE SZ_32M + +/* + * The kernel zImage should preferably be located between 32 MB and 128 MB + * from the base of DRAM. The min address leaves space for a maximal size + * uncompressed image, and the max address is due to how the zImage decompressor + * picks a destination address. + */ +#define ZIMAGE_OFFSET_LIMIT SZ_128M +#define MIN_ZIMAGE_OFFSET MAX_UNCOMP_KERNEL_SIZE +#define MAX_FDT_OFFSET ZIMAGE_OFFSET_LIMIT + +#endif /* _ASM_ARM_EFI_H */ diff --git a/arch/arm/include/asm/fixmap.h b/arch/arm/include/asm/fixmap.h index 58cfe9f1a687..5c17d2dec777 100644 --- a/arch/arm/include/asm/fixmap.h +++ b/arch/arm/include/asm/fixmap.h @@ -19,20 +19,47 @@ enum fixed_addresses { FIX_TEXT_POKE0, FIX_TEXT_POKE1, - __end_of_fixed_addresses + __end_of_fixmap_region, + + /* + * Share the kmap() region with early_ioremap(): this is guaranteed + * not to clash since early_ioremap() is only available before + * paging_init(), and kmap() only after. + */ +#define NR_FIX_BTMAPS 32 +#define FIX_BTMAPS_SLOTS 7 +#define TOTAL_FIX_BTMAPS (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS) + + FIX_BTMAP_END = __end_of_permanent_fixed_addresses, + FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1, + __end_of_early_ioremap_region }; +static const enum fixed_addresses __end_of_fixed_addresses = + __end_of_fixmap_region > __end_of_early_ioremap_region ? + __end_of_fixmap_region : __end_of_early_ioremap_region; + #define FIXMAP_PAGE_COMMON (L_PTE_YOUNG | L_PTE_PRESENT | L_PTE_XN | L_PTE_DIRTY) #define FIXMAP_PAGE_NORMAL (FIXMAP_PAGE_COMMON | L_PTE_MT_WRITEBACK) +#define FIXMAP_PAGE_RO (FIXMAP_PAGE_NORMAL | L_PTE_RDONLY) /* Used by set_fixmap_(io|nocache), both meant for mapping a device */ #define FIXMAP_PAGE_IO (FIXMAP_PAGE_COMMON | L_PTE_MT_DEV_SHARED | L_PTE_SHARED) #define FIXMAP_PAGE_NOCACHE FIXMAP_PAGE_IO +#define __early_set_fixmap __set_fixmap + +#ifdef CONFIG_MMU + void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot); void __init early_fixmap_init(void); #include <asm-generic/fixmap.h> +#else + +static inline void early_fixmap_init(void) { } + +#endif #endif diff --git a/arch/arm/include/asm/hardirq.h b/arch/arm/include/asm/hardirq.h index fe3ea776dc34..3d7351c844aa 100644 --- a/arch/arm/include/asm/hardirq.h +++ b/arch/arm/include/asm/hardirq.h @@ -5,7 +5,7 @@ #include <linux/threads.h> #include <asm/irq.h> -#define NR_IPI 8 +#define NR_IPI 7 typedef struct { unsigned int __softirq_pending; diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index dc641ddf0784..e22089fb44dc 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -19,6 +19,7 @@ #ifndef __ARM_KVM_ARM_H__ #define __ARM_KVM_ARM_H__ +#include <linux/const.h> #include <linux/types.h> /* Hyp Configuration Register (HCR) bits */ @@ -132,10 +133,9 @@ * space. */ #define KVM_PHYS_SHIFT (40) -#define KVM_PHYS_SIZE (1ULL << KVM_PHYS_SHIFT) -#define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1ULL) -#define PTRS_PER_S2_PGD (1ULL << (KVM_PHYS_SHIFT - 30)) -#define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t)) +#define KVM_PHYS_SIZE (_AC(1, ULL) << KVM_PHYS_SHIFT) +#define KVM_PHYS_MASK (KVM_PHYS_SIZE - _AC(1, ULL)) +#define PTRS_PER_S2_PGD (_AC(1, ULL) << (KVM_PHYS_SHIFT - 30)) /* Virtualization Translation Control Register (VTCR) bits */ #define VTCR_SH0 (3 << 12) @@ -162,17 +162,17 @@ #define VTTBR_X (5 - KVM_T0SZ) #endif #define VTTBR_BADDR_SHIFT (VTTBR_X - 1) -#define VTTBR_BADDR_MASK (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) -#define VTTBR_VMID_SHIFT (48LLU) -#define VTTBR_VMID_MASK (0xffLLU << VTTBR_VMID_SHIFT) +#define VTTBR_BADDR_MASK (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) +#define VTTBR_VMID_SHIFT _AC(48, ULL) +#define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT) /* Hyp Syndrome Register (HSR) bits */ #define HSR_EC_SHIFT (26) -#define HSR_EC (0x3fU << HSR_EC_SHIFT) -#define HSR_IL (1U << 25) +#define HSR_EC (_AC(0x3f, UL) << HSR_EC_SHIFT) +#define HSR_IL (_AC(1, UL) << 25) #define HSR_ISS (HSR_IL - 1) #define HSR_ISV_SHIFT (24) -#define HSR_ISV (1U << HSR_ISV_SHIFT) +#define HSR_ISV (_AC(1, UL) << HSR_ISV_SHIFT) #define HSR_SRT_SHIFT (16) #define HSR_SRT_MASK (0xf << HSR_SRT_SHIFT) #define HSR_FSC (0x3f) @@ -180,9 +180,9 @@ #define HSR_SSE (1 << 21) #define HSR_WNR (1 << 6) #define HSR_CV_SHIFT (24) -#define HSR_CV (1U << HSR_CV_SHIFT) +#define HSR_CV (_AC(1, UL) << HSR_CV_SHIFT) #define HSR_COND_SHIFT (20) -#define HSR_COND (0xfU << HSR_COND_SHIFT) +#define HSR_COND (_AC(0xf, UL) << HSR_COND_SHIFT) #define FSC_FAULT (0x04) #define FSC_ACCESS (0x08) @@ -210,13 +210,13 @@ #define HSR_EC_DABT (0x24) #define HSR_EC_DABT_HYP (0x25) -#define HSR_WFI_IS_WFE (1U << 0) +#define HSR_WFI_IS_WFE (_AC(1, UL) << 0) -#define HSR_HVC_IMM_MASK ((1UL << 16) - 1) +#define HSR_HVC_IMM_MASK ((_AC(1, UL) << 16) - 1) -#define HSR_DABT_S1PTW (1U << 7) -#define HSR_DABT_CM (1U << 8) -#define HSR_DABT_EA (1U << 9) +#define HSR_DABT_S1PTW (_AC(1, UL) << 7) +#define HSR_DABT_CM (_AC(1, UL) << 8) +#define HSR_DABT_EA (_AC(1, UL) << 9) #define kvm_arm_exception_type \ {0, "RESET" }, \ diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 6692982c9b57..f9f27792d8ed 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -150,6 +150,12 @@ struct kvm_vcpu_stat { u32 halt_successful_poll; u32 halt_attempted_poll; u32 halt_wakeup; + u32 hvc_exit_stat; + u64 wfe_exit_stat; + u64 wfi_exit_stat; + u64 mmio_exit_user; + u64 mmio_exit_kernel; + u64 exits; }; int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 405aa1883307..a520b7987a29 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -182,7 +182,8 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) return (vcpu->arch.cp15[c1_SCTLR] & 0b101) == 0b101; } -static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn, +static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, + kvm_pfn_t pfn, unsigned long size, bool ipa_uncached) { @@ -246,7 +247,7 @@ static inline void __kvm_flush_dcache_pte(pte_t pte) static inline void __kvm_flush_dcache_pmd(pmd_t pmd) { unsigned long size = PMD_SIZE; - pfn_t pfn = pmd_pfn(pmd); + kvm_pfn_t pfn = pmd_pfn(pmd); while (size) { void *va = kmap_atomic_pfn(pfn); @@ -279,6 +280,11 @@ static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd, pgd_t *merged_hyp_pgd, unsigned long hyp_idmap_start) { } +static inline unsigned int kvm_get_vmid_bits(void) +{ + return 8; +} + #endif /* !__ASSEMBLY__ */ #endif /* __ARM_KVM_MMU_H__ */ diff --git a/arch/arm/include/asm/mach/map.h b/arch/arm/include/asm/mach/map.h index f98c7f32c9c8..9b7c328fb207 100644 --- a/arch/arm/include/asm/mach/map.h +++ b/arch/arm/include/asm/mach/map.h @@ -42,6 +42,8 @@ enum { extern void iotable_init(struct map_desc *, int); extern void vm_reserve_area_early(unsigned long addr, unsigned long size, void *caller); +extern void create_mapping_late(struct mm_struct *mm, struct map_desc *md, + bool ng); #ifdef CONFIG_DEBUG_LL extern void debug_ll_addr(unsigned long *paddr, unsigned long *vaddr); diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h index 9b32f76bb0dd..432ce8176498 100644 --- a/arch/arm/include/asm/mmu_context.h +++ b/arch/arm/include/asm/mmu_context.h @@ -26,7 +26,7 @@ void __check_vmalloc_seq(struct mm_struct *mm); #ifdef CONFIG_CPU_HAS_ASID void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk); -#define init_new_context(tsk,mm) ({ atomic64_set(&mm->context.id, 0); 0; }) +#define init_new_context(tsk,mm) ({ atomic64_set(&(mm)->context.id, 0); 0; }) #ifdef CONFIG_ARM_ERRATA_798181 void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm, diff --git a/arch/arm/include/asm/paravirt.h b/arch/arm/include/asm/paravirt.h new file mode 100644 index 000000000000..8435ff591386 --- /dev/null +++ b/arch/arm/include/asm/paravirt.h @@ -0,0 +1,20 @@ +#ifndef _ASM_ARM_PARAVIRT_H +#define _ASM_ARM_PARAVIRT_H + +#ifdef CONFIG_PARAVIRT +struct static_key; +extern struct static_key paravirt_steal_enabled; +extern struct static_key paravirt_steal_rq_enabled; + +struct pv_time_ops { + unsigned long long (*steal_clock)(int cpu); +}; +extern struct pv_time_ops pv_time_ops; + +static inline u64 paravirt_steal_clock(int cpu) +{ + return pv_time_ops.steal_clock(cpu); +} +#endif + +#endif diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index a745a2a53853..dc46398bc3a5 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -88,7 +88,6 @@ #define L_PMD_SECT_VALID (_AT(pmdval_t, 1) << 0) #define L_PMD_SECT_DIRTY (_AT(pmdval_t, 1) << 55) -#define L_PMD_SECT_SPLITTING (_AT(pmdval_t, 1) << 56) #define L_PMD_SECT_NONE (_AT(pmdval_t, 1) << 57) #define L_PMD_SECT_RDONLY (_AT(pteval_t, 1) << 58) @@ -232,13 +231,6 @@ static inline pte_t pte_mkspecial(pte_t pte) #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define pmd_trans_huge(pmd) (pmd_val(pmd) && !pmd_table(pmd)) -#define pmd_trans_splitting(pmd) (pmd_isset((pmd), L_PMD_SECT_SPLITTING)) - -#ifdef CONFIG_HAVE_RCU_TABLE_FREE -#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH -void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, - pmd_t *pmdp); -#endif #endif #define PMD_BIT_FUNC(fn,op) \ @@ -246,9 +238,9 @@ static inline pmd_t pmd_##fn(pmd_t pmd) { pmd_val(pmd) op; return pmd; } PMD_BIT_FUNC(wrprotect, |= L_PMD_SECT_RDONLY); PMD_BIT_FUNC(mkold, &= ~PMD_SECT_AF); -PMD_BIT_FUNC(mksplitting, |= L_PMD_SECT_SPLITTING); PMD_BIT_FUNC(mkwrite, &= ~L_PMD_SECT_RDONLY); PMD_BIT_FUNC(mkdirty, |= L_PMD_SECT_DIRTY); +PMD_BIT_FUNC(mkclean, &= ~L_PMD_SECT_DIRTY); PMD_BIT_FUNC(mkyoung, |= PMD_SECT_AF); #define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT)) diff --git a/arch/arm/include/asm/psci.h b/arch/arm/include/asm/psci.h index 68ee3ce17b82..b4c6d99364f1 100644 --- a/arch/arm/include/asm/psci.h +++ b/arch/arm/include/asm/psci.h @@ -16,7 +16,7 @@ extern struct smp_operations psci_smp_ops; -#ifdef CONFIG_ARM_PSCI +#if defined(CONFIG_SMP) && defined(CONFIG_ARM_PSCI) bool psci_smp_available(void); #else static inline bool psci_smp_available(void) { return false; } diff --git a/arch/arm/include/asm/setup.h b/arch/arm/include/asm/setup.h index e0adb9f1bf94..3613d7e9fc40 100644 --- a/arch/arm/include/asm/setup.h +++ b/arch/arm/include/asm/setup.h @@ -25,4 +25,10 @@ extern int arm_add_memory(u64 start, u64 size); extern void early_print(const char *str, ...); extern void dump_machine_table(void); +#ifdef CONFIG_ATAGS_PROC +extern void save_atags(const struct tag *tags); +#else +static inline void save_atags(const struct tag *tags) { } +#endif + #endif diff --git a/arch/arm/include/asm/xen/hypercall.h b/arch/arm/include/asm/xen/hypercall.h index 712b50e0a6dc..d769972db8cb 100644 --- a/arch/arm/include/asm/xen/hypercall.h +++ b/arch/arm/include/asm/xen/hypercall.h @@ -35,6 +35,7 @@ #include <xen/interface/xen.h> #include <xen/interface/sched.h> +#include <xen/interface/platform.h> long privcmd_call(unsigned call, unsigned long a1, unsigned long a2, unsigned long a3, @@ -49,6 +50,12 @@ int HYPERVISOR_memory_op(unsigned int cmd, void *arg); int HYPERVISOR_physdev_op(int cmd, void *arg); int HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args); int HYPERVISOR_tmem_op(void *arg); +int HYPERVISOR_platform_op_raw(void *arg); +static inline int HYPERVISOR_platform_op(struct xen_platform_op *op) +{ + op->interface_version = XENPF_INTERFACE_VERSION; + return HYPERVISOR_platform_op_raw(op); +} int HYPERVISOR_multicall(struct multicall_entry *calls, uint32_t nr); static inline int diff --git a/arch/arm/include/asm/xen/interface.h b/arch/arm/include/asm/xen/interface.h index 50066006e6bd..75d596862892 100644 --- a/arch/arm/include/asm/xen/interface.h +++ b/arch/arm/include/asm/xen/interface.h @@ -27,6 +27,8 @@ (hnd).p = val; \ } while (0) +#define __HYPERVISOR_platform_op_raw __HYPERVISOR_platform_op + #ifndef __ASSEMBLY__ /* Explicitly size integers that represent pfns in the interface with * Xen so that we can have one ABI that works for 32 and 64 bit guests. @@ -76,6 +78,7 @@ struct pvclock_wall_clock { u32 version; u32 sec; u32 nsec; + u32 sec_hi; } __attribute__((__packed__)); #endif diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index af9e59bf3831..2c5f160be65e 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -73,14 +73,15 @@ obj-$(CONFIG_IWMMXT) += iwmmxt.o obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o obj-$(CONFIG_HW_PERF_EVENTS) += perf_event_xscale.o perf_event_v6.o \ perf_event_v7.o -CFLAGS_pj4-cp0.o := -marm AFLAGS_iwmmxt.o := -Wa,-mcpu=iwmmxt obj-$(CONFIG_ARM_CPU_TOPOLOGY) += topology.o obj-$(CONFIG_VDSO) += vdso.o +obj-$(CONFIG_EFI) += efi.o ifneq ($(CONFIG_ARCH_EBSA110),y) obj-y += io.o endif +obj-$(CONFIG_PARAVIRT) += paravirt.o head-y := head$(MMUEXT).o obj-$(CONFIG_DEBUG_LL) += debug.o @@ -88,8 +89,9 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_ARM_VIRT_EXT) += hyp-stub.o ifeq ($(CONFIG_ARM_PSCI),y) -obj-y += psci-call.o obj-$(CONFIG_SMP) += psci_smp.o endif +obj-$(CONFIG_HAVE_ARM_SMCCC) += smccc-call.o + extra-y := $(head-y) vmlinux.lds diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c index f89811fb9a55..7e45f69a0ddc 100644 --- a/arch/arm/kernel/armksyms.c +++ b/arch/arm/kernel/armksyms.c @@ -16,6 +16,7 @@ #include <linux/syscalls.h> #include <linux/uaccess.h> #include <linux/io.h> +#include <linux/arm-smccc.h> #include <asm/checksum.h> #include <asm/ftrace.h> @@ -175,3 +176,8 @@ EXPORT_SYMBOL(__gnu_mcount_nc); EXPORT_SYMBOL(__pv_phys_pfn_offset); EXPORT_SYMBOL(__pv_offset); #endif + +#ifdef CONFIG_HAVE_ARM_SMCCC +EXPORT_SYMBOL(arm_smccc_smc); +EXPORT_SYMBOL(arm_smccc_hvc); +#endif diff --git a/arch/arm/kernel/atags.h b/arch/arm/kernel/atags.h index ec4164da6e30..edfa2268c127 100644 --- a/arch/arm/kernel/atags.h +++ b/arch/arm/kernel/atags.h @@ -1,9 +1,3 @@ -#ifdef CONFIG_ATAGS_PROC -extern void save_atags(struct tag *tags); -#else -static inline void save_atags(struct tag *tags) { } -#endif - void convert_to_tag_list(struct tag *tags); #ifdef CONFIG_ATAGS diff --git a/arch/arm/kernel/cpuidle.c b/arch/arm/kernel/cpuidle.c index 318da33465f4..703926e7007b 100644 --- a/arch/arm/kernel/cpuidle.c +++ b/arch/arm/kernel/cpuidle.c @@ -56,7 +56,7 @@ int arm_cpuidle_suspend(int index) int cpu = smp_processor_id(); if (cpuidle_ops[cpu].suspend) - ret = cpuidle_ops[cpu].suspend(cpu, index); + ret = cpuidle_ops[cpu].suspend(index); return ret; } diff --git a/arch/arm/kernel/efi.c b/arch/arm/kernel/efi.c new file mode 100644 index 000000000000..ff8a9d8acfac --- /dev/null +++ b/arch/arm/kernel/efi.c @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/efi.h> +#include <asm/efi.h> +#include <asm/mach/map.h> +#include <asm/mmu_context.h> + +int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md) +{ + struct map_desc desc = { + .virtual = md->virt_addr, + .pfn = __phys_to_pfn(md->phys_addr), + .length = md->num_pages * EFI_PAGE_SIZE, + }; + + /* + * Order is important here: memory regions may have all of the + * bits below set (and usually do), so we check them in order of + * preference. + */ + if (md->attribute & EFI_MEMORY_WB) + desc.type = MT_MEMORY_RWX; + else if (md->attribute & EFI_MEMORY_WT) + desc.type = MT_MEMORY_RWX_NONCACHED; + else if (md->attribute & EFI_MEMORY_WC) + desc.type = MT_DEVICE_WC; + else + desc.type = MT_DEVICE; + + create_mapping_late(mm, &desc, true); + return 0; +} diff --git a/arch/arm/kernel/entry-v7m.S b/arch/arm/kernel/entry-v7m.S index b6c8bb9315e7..907534f97053 100644 --- a/arch/arm/kernel/entry-v7m.S +++ b/arch/arm/kernel/entry-v7m.S @@ -88,7 +88,7 @@ __pendsv_entry: @ execute the pending work, including reschedule get_thread_info tsk mov why, #0 - b ret_to_user + b ret_to_user_from_irq ENDPROC(__pendsv_entry) /* diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c index 097e2e201b9f..0c7efc3446c0 100644 --- a/arch/arm/kernel/module-plts.c +++ b/arch/arm/kernel/module-plts.c @@ -32,7 +32,7 @@ struct plt_entries { static bool in_init(const struct module *mod, u32 addr) { - return addr - (u32)mod->module_init < mod->init_size; + return addr - (u32)mod->init_layout.base < mod->init_layout.size; } u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val) diff --git a/arch/arm64/kernel/psci-call.S b/arch/arm/kernel/paravirt.c index cf83e61cd3b5..53f371ed4568 100644 --- a/arch/arm64/kernel/psci-call.S +++ b/arch/arm/kernel/paravirt.c @@ -8,21 +8,18 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * - * Copyright (C) 2015 ARM Limited + * Copyright (C) 2013 Citrix Systems * - * Author: Will Deacon <will.deacon@arm.com> + * Author: Stefano Stabellini <stefano.stabellini@eu.citrix.com> */ -#include <linux/linkage.h> +#include <linux/export.h> +#include <linux/jump_label.h> +#include <linux/types.h> +#include <asm/paravirt.h> -/* int __invoke_psci_fn_hvc(u64 function_id, u64 arg0, u64 arg1, u64 arg2) */ -ENTRY(__invoke_psci_fn_hvc) - hvc #0 - ret -ENDPROC(__invoke_psci_fn_hvc) +struct static_key paravirt_steal_enabled; +struct static_key paravirt_steal_rq_enabled; -/* int __invoke_psci_fn_smc(u64 function_id, u64 arg0, u64 arg1, u64 arg2) */ -ENTRY(__invoke_psci_fn_smc) - smc #0 - ret -ENDPROC(__invoke_psci_fn_smc) +struct pv_time_ops pv_time_ops; +EXPORT_SYMBOL_GPL(pv_time_ops); diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 126dc679b230..4152158f6e6a 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -35,133 +35,117 @@ * but the encodings are considered to be `reserved' in the case that * they are not available. */ -enum armv7_perf_types { - ARMV7_PERFCTR_PMNC_SW_INCR = 0x00, - ARMV7_PERFCTR_L1_ICACHE_REFILL = 0x01, - ARMV7_PERFCTR_ITLB_REFILL = 0x02, - ARMV7_PERFCTR_L1_DCACHE_REFILL = 0x03, - ARMV7_PERFCTR_L1_DCACHE_ACCESS = 0x04, - ARMV7_PERFCTR_DTLB_REFILL = 0x05, - ARMV7_PERFCTR_MEM_READ = 0x06, - ARMV7_PERFCTR_MEM_WRITE = 0x07, - ARMV7_PERFCTR_INSTR_EXECUTED = 0x08, - ARMV7_PERFCTR_EXC_TAKEN = 0x09, - ARMV7_PERFCTR_EXC_EXECUTED = 0x0A, - ARMV7_PERFCTR_CID_WRITE = 0x0B, +#define ARMV7_PERFCTR_PMNC_SW_INCR 0x00 +#define ARMV7_PERFCTR_L1_ICACHE_REFILL 0x01 +#define ARMV7_PERFCTR_ITLB_REFILL 0x02 +#define ARMV7_PERFCTR_L1_DCACHE_REFILL 0x03 +#define ARMV7_PERFCTR_L1_DCACHE_ACCESS 0x04 +#define ARMV7_PERFCTR_DTLB_REFILL 0x05 +#define ARMV7_PERFCTR_MEM_READ 0x06 +#define ARMV7_PERFCTR_MEM_WRITE 0x07 +#define ARMV7_PERFCTR_INSTR_EXECUTED 0x08 +#define ARMV7_PERFCTR_EXC_TAKEN 0x09 +#define ARMV7_PERFCTR_EXC_EXECUTED 0x0A +#define ARMV7_PERFCTR_CID_WRITE 0x0B - /* - * ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS. - * It counts: - * - all (taken) branch instructions, - * - instructions that explicitly write the PC, - * - exception generating instructions. - */ - ARMV7_PERFCTR_PC_WRITE = 0x0C, - ARMV7_PERFCTR_PC_IMM_BRANCH = 0x0D, - ARMV7_PERFCTR_PC_PROC_RETURN = 0x0E, - ARMV7_PERFCTR_MEM_UNALIGNED_ACCESS = 0x0F, - ARMV7_PERFCTR_PC_BRANCH_MIS_PRED = 0x10, - ARMV7_PERFCTR_CLOCK_CYCLES = 0x11, - ARMV7_PERFCTR_PC_BRANCH_PRED = 0x12, - - /* These events are defined by the PMUv2 supplement (ARM DDI 0457A). */ - ARMV7_PERFCTR_MEM_ACCESS = 0x13, - ARMV7_PERFCTR_L1_ICACHE_ACCESS = 0x14, - ARMV7_PERFCTR_L1_DCACHE_WB = 0x15, - ARMV7_PERFCTR_L2_CACHE_ACCESS = 0x16, - ARMV7_PERFCTR_L2_CACHE_REFILL = 0x17, - ARMV7_PERFCTR_L2_CACHE_WB = 0x18, - ARMV7_PERFCTR_BUS_ACCESS = 0x19, - ARMV7_PERFCTR_MEM_ERROR = 0x1A, - ARMV7_PERFCTR_INSTR_SPEC = 0x1B, - ARMV7_PERFCTR_TTBR_WRITE = 0x1C, - ARMV7_PERFCTR_BUS_CYCLES = 0x1D, - - ARMV7_PERFCTR_CPU_CYCLES = 0xFF -}; +/* + * ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS. + * It counts: + * - all (taken) branch instructions, + * - instructions that explicitly write the PC, + * - exception generating instructions. + */ +#define ARMV7_PERFCTR_PC_WRITE 0x0C +#define ARMV7_PERFCTR_PC_IMM_BRANCH 0x0D +#define ARMV7_PERFCTR_PC_PROC_RETURN 0x0E +#define ARMV7_PERFCTR_MEM_UNALIGNED_ACCESS 0x0F +#define ARMV7_PERFCTR_PC_BRANCH_MIS_PRED 0x10 +#define ARMV7_PERFCTR_CLOCK_CYCLES 0x11 +#define ARMV7_PERFCTR_PC_BRANCH_PRED 0x12 + +/* These events are defined by the PMUv2 supplement (ARM DDI 0457A). */ +#define ARMV7_PERFCTR_MEM_ACCESS 0x13 +#define ARMV7_PERFCTR_L1_ICACHE_ACCESS 0x14 +#define ARMV7_PERFCTR_L1_DCACHE_WB 0x15 +#define ARMV7_PERFCTR_L2_CACHE_ACCESS 0x16 +#define ARMV7_PERFCTR_L2_CACHE_REFILL 0x17 +#define ARMV7_PERFCTR_L2_CACHE_WB 0x18 +#define ARMV7_PERFCTR_BUS_ACCESS 0x19 +#define ARMV7_PERFCTR_MEM_ERROR 0x1A +#define ARMV7_PERFCTR_INSTR_SPEC 0x1B +#define ARMV7_PERFCTR_TTBR_WRITE 0x1C +#define ARMV7_PERFCTR_BUS_CYCLES 0x1D + +#define ARMV7_PERFCTR_CPU_CYCLES 0xFF /* ARMv7 Cortex-A8 specific event types */ -enum armv7_a8_perf_types { - ARMV7_A8_PERFCTR_L2_CACHE_ACCESS = 0x43, - ARMV7_A8_PERFCTR_L2_CACHE_REFILL = 0x44, - ARMV7_A8_PERFCTR_L1_ICACHE_ACCESS = 0x50, - ARMV7_A8_PERFCTR_STALL_ISIDE = 0x56, -}; +#define ARMV7_A8_PERFCTR_L2_CACHE_ACCESS 0x43 +#define ARMV7_A8_PERFCTR_L2_CACHE_REFILL 0x44 +#define ARMV7_A8_PERFCTR_L1_ICACHE_ACCESS 0x50 +#define ARMV7_A8_PERFCTR_STALL_ISIDE 0x56 /* ARMv7 Cortex-A9 specific event types */ -enum armv7_a9_perf_types { - ARMV7_A9_PERFCTR_INSTR_CORE_RENAME = 0x68, - ARMV7_A9_PERFCTR_STALL_ICACHE = 0x60, - ARMV7_A9_PERFCTR_STALL_DISPATCH = 0x66, -}; +#define ARMV7_A9_PERFCTR_INSTR_CORE_RENAME 0x68 +#define ARMV7_A9_PERFCTR_STALL_ICACHE 0x60 +#define ARMV7_A9_PERFCTR_STALL_DISPATCH 0x66 /* ARMv7 Cortex-A5 specific event types */ -enum armv7_a5_perf_types { - ARMV7_A5_PERFCTR_PREFETCH_LINEFILL = 0xc2, - ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP = 0xc3, -}; +#define ARMV7_A5_PERFCTR_PREFETCH_LINEFILL 0xc2 +#define ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP 0xc3 /* ARMv7 Cortex-A15 specific event types */ -enum armv7_a15_perf_types { - ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_READ = 0x40, - ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_WRITE = 0x41, - ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_READ = 0x42, - ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_WRITE = 0x43, +#define ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_READ 0x40 +#define ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_WRITE 0x41 +#define ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_READ 0x42 +#define ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_WRITE 0x43 - ARMV7_A15_PERFCTR_DTLB_REFILL_L1_READ = 0x4C, - ARMV7_A15_PERFCTR_DTLB_REFILL_L1_WRITE = 0x4D, +#define ARMV7_A15_PERFCTR_DTLB_REFILL_L1_READ 0x4C +#define ARMV7_A15_PERFCTR_DTLB_REFILL_L1_WRITE 0x4D - ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_READ = 0x50, - ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_WRITE = 0x51, - ARMV7_A15_PERFCTR_L2_CACHE_REFILL_READ = 0x52, - ARMV7_A15_PERFCTR_L2_CACHE_REFILL_WRITE = 0x53, +#define ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_READ 0x50 +#define ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_WRITE 0x51 +#define ARMV7_A15_PERFCTR_L2_CACHE_REFILL_READ 0x52 +#define ARMV7_A15_PERFCTR_L2_CACHE_REFILL_WRITE 0x53 - ARMV7_A15_PERFCTR_PC_WRITE_SPEC = 0x76, -}; +#define ARMV7_A15_PERFCTR_PC_WRITE_SPEC 0x76 /* ARMv7 Cortex-A12 specific event types */ -enum armv7_a12_perf_types { - ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_READ = 0x40, - ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_WRITE = 0x41, +#define ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_READ 0x40 +#define ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_WRITE 0x41 - ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_READ = 0x50, - ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_WRITE = 0x51, +#define ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_READ 0x50 +#define ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_WRITE 0x51 - ARMV7_A12_PERFCTR_PC_WRITE_SPEC = 0x76, +#define ARMV7_A12_PERFCTR_PC_WRITE_SPEC 0x76 - ARMV7_A12_PERFCTR_PF_TLB_REFILL = 0xe7, -}; +#define ARMV7_A12_PERFCTR_PF_TLB_REFILL 0xe7 /* ARMv7 Krait specific event types */ -enum krait_perf_types { - KRAIT_PMRESR0_GROUP0 = 0xcc, - KRAIT_PMRESR1_GROUP0 = 0xd0, - KRAIT_PMRESR2_GROUP0 = 0xd4, - KRAIT_VPMRESR0_GROUP0 = 0xd8, +#define KRAIT_PMRESR0_GROUP0 0xcc +#define KRAIT_PMRESR1_GROUP0 0xd0 +#define KRAIT_PMRESR2_GROUP0 0xd4 +#define KRAIT_VPMRESR0_GROUP0 0xd8 - KRAIT_PERFCTR_L1_ICACHE_ACCESS = 0x10011, - KRAIT_PERFCTR_L1_ICACHE_MISS = 0x10010, +#define KRAIT_PERFCTR_L1_ICACHE_ACCESS 0x10011 +#define KRAIT_PERFCTR_L1_ICACHE_MISS 0x10010 - KRAIT_PERFCTR_L1_ITLB_ACCESS = 0x12222, - KRAIT_PERFCTR_L1_DTLB_ACCESS = 0x12210, -}; +#define KRAIT_PERFCTR_L1_ITLB_ACCESS 0x12222 +#define KRAIT_PERFCTR_L1_DTLB_ACCESS 0x12210 /* ARMv7 Scorpion specific event types */ -enum scorpion_perf_types { - SCORPION_LPM0_GROUP0 = 0x4c, - SCORPION_LPM1_GROUP0 = 0x50, - SCORPION_LPM2_GROUP0 = 0x54, - SCORPION_L2LPM_GROUP0 = 0x58, - SCORPION_VLPM_GROUP0 = 0x5c, +#define SCORPION_LPM0_GROUP0 0x4c +#define SCORPION_LPM1_GROUP0 0x50 +#define SCORPION_LPM2_GROUP0 0x54 +#define SCORPION_L2LPM_GROUP0 0x58 +#define SCORPION_VLPM_GROUP0 0x5c - SCORPION_ICACHE_ACCESS = 0x10053, - SCORPION_ICACHE_MISS = 0x10052, +#define SCORPION_ICACHE_ACCESS 0x10053 +#define SCORPION_ICACHE_MISS 0x10052 - SCORPION_DTLB_ACCESS = 0x12013, - SCORPION_DTLB_MISS = 0x12012, +#define SCORPION_DTLB_ACCESS 0x12013 +#define SCORPION_DTLB_MISS 0x12012 - SCORPION_ITLB_MISS = 0x12021, -}; +#define SCORPION_ITLB_MISS 0x12021 /* * Cortex-A8 HW events mapping @@ -547,6 +531,134 @@ static const unsigned scorpion_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, }; +PMU_FORMAT_ATTR(event, "config:0-7"); + +static struct attribute *armv7_pmu_format_attrs[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group armv7_pmu_format_attr_group = { + .name = "format", + .attrs = armv7_pmu_format_attrs, +}; + +#define ARMV7_EVENT_ATTR_RESOLVE(m) #m +#define ARMV7_EVENT_ATTR(name, config) \ + PMU_EVENT_ATTR_STRING(name, armv7_event_attr_##name, \ + "event=" ARMV7_EVENT_ATTR_RESOLVE(config)) + +ARMV7_EVENT_ATTR(sw_incr, ARMV7_PERFCTR_PMNC_SW_INCR); +ARMV7_EVENT_ATTR(l1i_cache_refill, ARMV7_PERFCTR_L1_ICACHE_REFILL); +ARMV7_EVENT_ATTR(l1i_tlb_refill, ARMV7_PERFCTR_ITLB_REFILL); +ARMV7_EVENT_ATTR(l1d_cache_refill, ARMV7_PERFCTR_L1_DCACHE_REFILL); +ARMV7_EVENT_ATTR(l1d_cache, ARMV7_PERFCTR_L1_DCACHE_ACCESS); +ARMV7_EVENT_ATTR(l1d_tlb_refill, ARMV7_PERFCTR_DTLB_REFILL); +ARMV7_EVENT_ATTR(ld_retired, ARMV7_PERFCTR_MEM_READ); +ARMV7_EVENT_ATTR(st_retired, ARMV7_PERFCTR_MEM_WRITE); +ARMV7_EVENT_ATTR(inst_retired, ARMV7_PERFCTR_INSTR_EXECUTED); +ARMV7_EVENT_ATTR(exc_taken, ARMV7_PERFCTR_EXC_TAKEN); +ARMV7_EVENT_ATTR(exc_return, ARMV7_PERFCTR_EXC_EXECUTED); +ARMV7_EVENT_ATTR(cid_write_retired, ARMV7_PERFCTR_CID_WRITE); +ARMV7_EVENT_ATTR(pc_write_retired, ARMV7_PERFCTR_PC_WRITE); +ARMV7_EVENT_ATTR(br_immed_retired, ARMV7_PERFCTR_PC_IMM_BRANCH); +ARMV7_EVENT_ATTR(br_return_retired, ARMV7_PERFCTR_PC_PROC_RETURN); +ARMV7_EVENT_ATTR(unaligned_ldst_retired, ARMV7_PERFCTR_MEM_UNALIGNED_ACCESS); +ARMV7_EVENT_ATTR(br_mis_pred, ARMV7_PERFCTR_PC_BRANCH_MIS_PRED); +ARMV7_EVENT_ATTR(cpu_cycles, ARMV7_PERFCTR_CLOCK_CYCLES); +ARMV7_EVENT_ATTR(br_pred, ARMV7_PERFCTR_PC_BRANCH_PRED); + +static struct attribute *armv7_pmuv1_event_attrs[] = { + &armv7_event_attr_sw_incr.attr.attr, + &armv7_event_attr_l1i_cache_refill.attr.attr, + &armv7_event_attr_l1i_tlb_refill.attr.attr, + &armv7_event_attr_l1d_cache_refill.attr.attr, + &armv7_event_attr_l1d_cache.attr.attr, + &armv7_event_attr_l1d_tlb_refill.attr.attr, + &armv7_event_attr_ld_retired.attr.attr, + &armv7_event_attr_st_retired.attr.attr, + &armv7_event_attr_inst_retired.attr.attr, + &armv7_event_attr_exc_taken.attr.attr, + &armv7_event_attr_exc_return.attr.attr, + &armv7_event_attr_cid_write_retired.attr.attr, + &armv7_event_attr_pc_write_retired.attr.attr, + &armv7_event_attr_br_immed_retired.attr.attr, + &armv7_event_attr_br_return_retired.attr.attr, + &armv7_event_attr_unaligned_ldst_retired.attr.attr, + &armv7_event_attr_br_mis_pred.attr.attr, + &armv7_event_attr_cpu_cycles.attr.attr, + &armv7_event_attr_br_pred.attr.attr, + NULL, +}; + +static struct attribute_group armv7_pmuv1_events_attr_group = { + .name = "events", + .attrs = armv7_pmuv1_event_attrs, +}; + +static const struct attribute_group *armv7_pmuv1_attr_groups[] = { + &armv7_pmuv1_events_attr_group, + &armv7_pmu_format_attr_group, + NULL, +}; + +ARMV7_EVENT_ATTR(mem_access, ARMV7_PERFCTR_MEM_ACCESS); +ARMV7_EVENT_ATTR(l1i_cache, ARMV7_PERFCTR_L1_ICACHE_ACCESS); +ARMV7_EVENT_ATTR(l1d_cache_wb, ARMV7_PERFCTR_L1_DCACHE_WB); +ARMV7_EVENT_ATTR(l2d_cache, ARMV7_PERFCTR_L2_CACHE_ACCESS); +ARMV7_EVENT_ATTR(l2d_cache_refill, ARMV7_PERFCTR_L2_CACHE_REFILL); +ARMV7_EVENT_ATTR(l2d_cache_wb, ARMV7_PERFCTR_L2_CACHE_WB); +ARMV7_EVENT_ATTR(bus_access, ARMV7_PERFCTR_BUS_ACCESS); +ARMV7_EVENT_ATTR(memory_error, ARMV7_PERFCTR_MEM_ERROR); +ARMV7_EVENT_ATTR(inst_spec, ARMV7_PERFCTR_INSTR_SPEC); +ARMV7_EVENT_ATTR(ttbr_write_retired, ARMV7_PERFCTR_TTBR_WRITE); +ARMV7_EVENT_ATTR(bus_cycles, ARMV7_PERFCTR_BUS_CYCLES); + +static struct attribute *armv7_pmuv2_event_attrs[] = { + &armv7_event_attr_sw_incr.attr.attr, + &armv7_event_attr_l1i_cache_refill.attr.attr, + &armv7_event_attr_l1i_tlb_refill.attr.attr, + &armv7_event_attr_l1d_cache_refill.attr.attr, + &armv7_event_attr_l1d_cache.attr.attr, + &armv7_event_attr_l1d_tlb_refill.attr.attr, + &armv7_event_attr_ld_retired.attr.attr, + &armv7_event_attr_st_retired.attr.attr, + &armv7_event_attr_inst_retired.attr.attr, + &armv7_event_attr_exc_taken.attr.attr, + &armv7_event_attr_exc_return.attr.attr, + &armv7_event_attr_cid_write_retired.attr.attr, + &armv7_event_attr_pc_write_retired.attr.attr, + &armv7_event_attr_br_immed_retired.attr.attr, + &armv7_event_attr_br_return_retired.attr.attr, + &armv7_event_attr_unaligned_ldst_retired.attr.attr, + &armv7_event_attr_br_mis_pred.attr.attr, + &armv7_event_attr_cpu_cycles.attr.attr, + &armv7_event_attr_br_pred.attr.attr, + &armv7_event_attr_mem_access.attr.attr, + &armv7_event_attr_l1i_cache.attr.attr, + &armv7_event_attr_l1d_cache_wb.attr.attr, + &armv7_event_attr_l2d_cache.attr.attr, + &armv7_event_attr_l2d_cache_refill.attr.attr, + &armv7_event_attr_l2d_cache_wb.attr.attr, + &armv7_event_attr_bus_access.attr.attr, + &armv7_event_attr_memory_error.attr.attr, + &armv7_event_attr_inst_spec.attr.attr, + &armv7_event_attr_ttbr_write_retired.attr.attr, + &armv7_event_attr_bus_cycles.attr.attr, + NULL, +}; + +static struct attribute_group armv7_pmuv2_events_attr_group = { + .name = "events", + .attrs = armv7_pmuv2_event_attrs, +}; + +static const struct attribute_group *armv7_pmuv2_attr_groups[] = { + &armv7_pmuv2_events_attr_group, + &armv7_pmu_format_attr_group, + NULL, +}; + /* * Perf Events' indices */ @@ -1085,6 +1197,7 @@ static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a8"; cpu_pmu->map_event = armv7_a8_map_event; + cpu_pmu->pmu.attr_groups = armv7_pmuv1_attr_groups; return armv7_probe_num_events(cpu_pmu); } @@ -1093,6 +1206,7 @@ static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a9"; cpu_pmu->map_event = armv7_a9_map_event; + cpu_pmu->pmu.attr_groups = armv7_pmuv1_attr_groups; return armv7_probe_num_events(cpu_pmu); } @@ -1101,6 +1215,7 @@ static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a5"; cpu_pmu->map_event = armv7_a5_map_event; + cpu_pmu->pmu.attr_groups = armv7_pmuv1_attr_groups; return armv7_probe_num_events(cpu_pmu); } @@ -1110,6 +1225,7 @@ static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->name = "armv7_cortex_a15"; cpu_pmu->map_event = armv7_a15_map_event; cpu_pmu->set_event_filter = armv7pmu_set_event_filter; + cpu_pmu->pmu.attr_groups = armv7_pmuv2_attr_groups; return armv7_probe_num_events(cpu_pmu); } @@ -1119,6 +1235,7 @@ static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->name = "armv7_cortex_a7"; cpu_pmu->map_event = armv7_a7_map_event; cpu_pmu->set_event_filter = armv7pmu_set_event_filter; + cpu_pmu->pmu.attr_groups = armv7_pmuv2_attr_groups; return armv7_probe_num_events(cpu_pmu); } @@ -1128,6 +1245,7 @@ static int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->name = "armv7_cortex_a12"; cpu_pmu->map_event = armv7_a12_map_event; cpu_pmu->set_event_filter = armv7pmu_set_event_filter; + cpu_pmu->pmu.attr_groups = armv7_pmuv2_attr_groups; return armv7_probe_num_events(cpu_pmu); } @@ -1135,6 +1253,7 @@ static int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu) { int ret = armv7_a12_pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a17"; + cpu_pmu->pmu.attr_groups = armv7_pmuv2_attr_groups; return ret; } diff --git a/arch/arm/kernel/pj4-cp0.c b/arch/arm/kernel/pj4-cp0.c index 8153e36b2491..7c9248b74d3f 100644 --- a/arch/arm/kernel/pj4-cp0.c +++ b/arch/arm/kernel/pj4-cp0.c @@ -66,9 +66,13 @@ static void __init pj4_cp_access_write(u32 value) __asm__ __volatile__ ( "mcr p15, 0, %1, c1, c0, 2\n\t" +#ifdef CONFIG_THUMB2_KERNEL + "isb\n\t" +#else "mrc p15, 0, %0, c1, c0, 2\n\t" "mov %0, %0\n\t" "sub pc, pc, #4\n\t" +#endif : "=r" (temp) : "r" (value)); } diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 20edd349d379..7d0cba6f1cc5 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -7,6 +7,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#include <linux/efi.h> #include <linux/export.h> #include <linux/kernel.h> #include <linux/stddef.h> @@ -37,7 +38,9 @@ #include <asm/cp15.h> #include <asm/cpu.h> #include <asm/cputype.h> +#include <asm/efi.h> #include <asm/elf.h> +#include <asm/early_ioremap.h> #include <asm/fixmap.h> #include <asm/procinfo.h> #include <asm/psci.h> @@ -375,6 +378,72 @@ void __init early_print(const char *str, ...) printk("%s", buf); } +#ifdef CONFIG_ARM_PATCH_IDIV + +static inline u32 __attribute_const__ sdiv_instruction(void) +{ + if (IS_ENABLED(CONFIG_THUMB2_KERNEL)) { + /* "sdiv r0, r0, r1" */ + u32 insn = __opcode_thumb32_compose(0xfb90, 0xf0f1); + return __opcode_to_mem_thumb32(insn); + } + + /* "sdiv r0, r0, r1" */ + return __opcode_to_mem_arm(0xe710f110); +} + +static inline u32 __attribute_const__ udiv_instruction(void) +{ + if (IS_ENABLED(CONFIG_THUMB2_KERNEL)) { + /* "udiv r0, r0, r1" */ + u32 insn = __opcode_thumb32_compose(0xfbb0, 0xf0f1); + return __opcode_to_mem_thumb32(insn); + } + + /* "udiv r0, r0, r1" */ + return __opcode_to_mem_arm(0xe730f110); +} + +static inline u32 __attribute_const__ bx_lr_instruction(void) +{ + if (IS_ENABLED(CONFIG_THUMB2_KERNEL)) { + /* "bx lr; nop" */ + u32 insn = __opcode_thumb32_compose(0x4770, 0x46c0); + return __opcode_to_mem_thumb32(insn); + } + + /* "bx lr" */ + return __opcode_to_mem_arm(0xe12fff1e); +} + +static void __init patch_aeabi_idiv(void) +{ + extern void __aeabi_uidiv(void); + extern void __aeabi_idiv(void); + uintptr_t fn_addr; + unsigned int mask; + + mask = IS_ENABLED(CONFIG_THUMB2_KERNEL) ? HWCAP_IDIVT : HWCAP_IDIVA; + if (!(elf_hwcap & mask)) + return; + + pr_info("CPU: div instructions available: patching division code\n"); + + fn_addr = ((uintptr_t)&__aeabi_uidiv) & ~1; + ((u32 *)fn_addr)[0] = udiv_instruction(); + ((u32 *)fn_addr)[1] = bx_lr_instruction(); + flush_icache_range(fn_addr, fn_addr + 8); + + fn_addr = ((uintptr_t)&__aeabi_idiv) & ~1; + ((u32 *)fn_addr)[0] = sdiv_instruction(); + ((u32 *)fn_addr)[1] = bx_lr_instruction(); + flush_icache_range(fn_addr, fn_addr + 8); +} + +#else +static inline void patch_aeabi_idiv(void) { } +#endif + static void __init cpuid_init_hwcaps(void) { int block; @@ -642,6 +711,7 @@ static void __init setup_processor(void) elf_hwcap = list->elf_hwcap; cpuid_init_hwcaps(); + patch_aeabi_idiv(); #ifndef CONFIG_ARM_THUMB elf_hwcap &= ~(HWCAP_THUMB | HWCAP_IDIVT); @@ -956,8 +1026,8 @@ void __init setup_arch(char **cmdline_p) strlcpy(cmd_line, boot_command_line, COMMAND_LINE_SIZE); *cmdline_p = cmd_line; - if (IS_ENABLED(CONFIG_FIX_EARLYCON_MEM)) - early_fixmap_init(); + early_fixmap_init(); + early_ioremap_init(); parse_early_param(); @@ -965,9 +1035,12 @@ void __init setup_arch(char **cmdline_p) early_paging_init(mdesc); #endif setup_dma_zone(mdesc); + efi_init(); sanity_check_meminfo(); arm_memblock_init(mdesc); + early_ioremap_reset(); + paging_init(mdesc); request_standard_resources(mdesc); diff --git a/arch/arm/kernel/smccc-call.S b/arch/arm/kernel/smccc-call.S new file mode 100644 index 000000000000..2e48b674aab1 --- /dev/null +++ b/arch/arm/kernel/smccc-call.S @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2015, Linaro Limited + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include <linux/linkage.h> + +#include <asm/opcodes-sec.h> +#include <asm/opcodes-virt.h> +#include <asm/unwind.h> + + /* + * Wrap c macros in asm macros to delay expansion until after the + * SMCCC asm macro is expanded. + */ + .macro SMCCC_SMC + __SMC(0) + .endm + + .macro SMCCC_HVC + __HVC(0) + .endm + + .macro SMCCC instr +UNWIND( .fnstart) + mov r12, sp + push {r4-r7} +UNWIND( .save {r4-r7}) + ldm r12, {r4-r7} + \instr + pop {r4-r7} + ldr r12, [sp, #(4 * 4)] + stm r12, {r0-r3} + bx lr +UNWIND( .fnend) + .endm + +/* + * void smccc_smc(unsigned long a0, unsigned long a1, unsigned long a2, + * unsigned long a3, unsigned long a4, unsigned long a5, + * unsigned long a6, unsigned long a7, struct arm_smccc_res *res) + */ +ENTRY(arm_smccc_smc) + SMCCC SMCCC_SMC +ENDPROC(arm_smccc_smc) + +/* + * void smccc_hvc(unsigned long a0, unsigned long a1, unsigned long a2, + * unsigned long a3, unsigned long a4, unsigned long a5, + * unsigned long a6, unsigned long a7, struct arm_smccc_res *res) + */ +ENTRY(arm_smccc_hvc) + SMCCC SMCCC_HVC +ENDPROC(arm_smccc_hvc) diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index b26361355dae..37312f6749f3 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -69,11 +69,15 @@ enum ipi_msg_type { IPI_TIMER, IPI_RESCHEDULE, IPI_CALL_FUNC, - IPI_CALL_FUNC_SINGLE, IPI_CPU_STOP, IPI_IRQ_WORK, IPI_COMPLETION, - IPI_CPU_BACKTRACE = 15, + IPI_CPU_BACKTRACE, + /* + * SGI8-15 can be reserved by secure firmware, and thus may + * not be usable by the kernel. Please keep the above limited + * to at most 8 entries. + */ }; static DECLARE_COMPLETION(cpu_running); @@ -475,7 +479,6 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = { S(IPI_TIMER, "Timer broadcast interrupts"), S(IPI_RESCHEDULE, "Rescheduling interrupts"), S(IPI_CALL_FUNC, "Function call interrupts"), - S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"), S(IPI_CPU_STOP, "CPU stop interrupts"), S(IPI_IRQ_WORK, "IRQ work interrupts"), S(IPI_COMPLETION, "completion interrupts"), @@ -525,7 +528,7 @@ void arch_send_wakeup_ipi_mask(const struct cpumask *mask) void arch_send_call_function_single_ipi(int cpu) { - smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE); + smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC); } #ifdef CONFIG_IRQ_WORK @@ -620,12 +623,6 @@ void handle_IPI(int ipinr, struct pt_regs *regs) irq_exit(); break; - case IPI_CALL_FUNC_SINGLE: - irq_enter(); - generic_smp_call_function_single_interrupt(); - irq_exit(); - break; - case IPI_CPU_STOP: irq_enter(); ipi_cpu_stop(cpu); diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c index 54a5aeab988d..994e971a8538 100644 --- a/arch/arm/kernel/vdso.c +++ b/arch/arm/kernel/vdso.c @@ -224,7 +224,7 @@ static int install_vvar(struct mm_struct *mm, unsigned long addr) VM_READ | VM_MAYREAD, &vdso_data_mapping); - return IS_ERR(vma) ? PTR_ERR(vma) : 0; + return PTR_ERR_OR_ZERO(vma); } /* assumes mmap_sem is write-locked */ diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index e06fd299de08..dda1959f0dde 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -44,6 +44,7 @@ #include <asm/kvm_emulate.h> #include <asm/kvm_coproc.h> #include <asm/kvm_psci.h> +#include <asm/sections.h> #ifdef REQUIRES_VIRT __asm__(".arch_extension virt"); @@ -58,9 +59,12 @@ static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu); /* The VMID used in the VTTBR */ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); -static u8 kvm_next_vmid; +static u32 kvm_next_vmid; +static unsigned int kvm_vmid_bits __read_mostly; static DEFINE_SPINLOCK(kvm_vmid_lock); +static bool vgic_present; + static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu) { BUG_ON(preemptible()); @@ -132,7 +136,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.vmid_gen = 0; /* The maximum number of VCPUs is limited by the host's GIC model */ - kvm->arch.max_vcpus = kvm_vgic_get_max_vcpus(); + kvm->arch.max_vcpus = vgic_present ? + kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS; return ret; out_free_stage2_pgd: @@ -172,6 +177,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) int r; switch (ext) { case KVM_CAP_IRQCHIP: + r = vgic_present; + break; case KVM_CAP_IOEVENTFD: case KVM_CAP_DEVICE_CTRL: case KVM_CAP_USER_MEMORY: @@ -433,11 +440,12 @@ static void update_vttbr(struct kvm *kvm) kvm->arch.vmid_gen = atomic64_read(&kvm_vmid_gen); kvm->arch.vmid = kvm_next_vmid; kvm_next_vmid++; + kvm_next_vmid &= (1 << kvm_vmid_bits) - 1; /* update vttbr to be used with the new vmid */ pgd_phys = virt_to_phys(kvm_get_hwpgd(kvm)); BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK); - vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK; + vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits); kvm->arch.vttbr = pgd_phys | vmid; spin_unlock(&kvm_vmid_lock); @@ -603,6 +611,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) ret = kvm_call_hyp(__kvm_vcpu_run, vcpu); vcpu->mode = OUTSIDE_GUEST_MODE; + vcpu->stat.exits++; /* * Back from guest *************************************************************/ @@ -913,6 +922,8 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, switch (dev_id) { case KVM_ARM_DEVICE_VGIC_V2: + if (!vgic_present) + return -ENXIO; return kvm_vgic_addr(kvm, type, &dev_addr->addr, true); default: return -ENODEV; @@ -927,6 +938,8 @@ long kvm_arch_vm_ioctl(struct file *filp, switch (ioctl) { case KVM_CREATE_IRQCHIP: { + if (!vgic_present) + return -ENXIO; return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); } case KVM_ARM_SET_DEVICE_ADDR: { @@ -1067,6 +1080,12 @@ static int init_hyp_mode(void) goto out_free_mappings; } + err = create_hyp_mappings(__start_rodata, __end_rodata); + if (err) { + kvm_err("Cannot map rodata section\n"); + goto out_free_mappings; + } + /* * Map the Hyp stack pages */ @@ -1111,8 +1130,17 @@ static int init_hyp_mode(void) * Init HYP view of VGIC */ err = kvm_vgic_hyp_init(); - if (err) + switch (err) { + case 0: + vgic_present = true; + break; + case -ENODEV: + case -ENXIO: + vgic_present = false; + break; + default: goto out_free_context; + } /* * Init HYP architected timer support @@ -1127,6 +1155,10 @@ static int init_hyp_mode(void) kvm_perf_init(); + /* set size of VMID supported by CPU */ + kvm_vmid_bits = kvm_get_vmid_bits(); + kvm_info("%d-bit VMID\n", kvm_vmid_bits); + kvm_info("Hyp mode initialized successfully\n"); return 0; diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c index d6c005283678..dc99159857b4 100644 --- a/arch/arm/kvm/emulate.c +++ b/arch/arm/kvm/emulate.c @@ -275,6 +275,40 @@ static u32 exc_vector_base(struct kvm_vcpu *vcpu) return vbar; } +/* + * Switch to an exception mode, updating both CPSR and SPSR. Follow + * the logic described in AArch32.EnterMode() from the ARMv8 ARM. + */ +static void kvm_update_psr(struct kvm_vcpu *vcpu, unsigned long mode) +{ + unsigned long cpsr = *vcpu_cpsr(vcpu); + u32 sctlr = vcpu->arch.cp15[c1_SCTLR]; + + *vcpu_cpsr(vcpu) = (cpsr & ~MODE_MASK) | mode; + + switch (mode) { + case FIQ_MODE: + *vcpu_cpsr(vcpu) |= PSR_F_BIT; + /* Fall through */ + case ABT_MODE: + case IRQ_MODE: + *vcpu_cpsr(vcpu) |= PSR_A_BIT; + /* Fall through */ + default: + *vcpu_cpsr(vcpu) |= PSR_I_BIT; + } + + *vcpu_cpsr(vcpu) &= ~(PSR_IT_MASK | PSR_J_BIT | PSR_E_BIT | PSR_T_BIT); + + if (sctlr & SCTLR_TE) + *vcpu_cpsr(vcpu) |= PSR_T_BIT; + if (sctlr & SCTLR_EE) + *vcpu_cpsr(vcpu) |= PSR_E_BIT; + + /* Note: These now point to the mode banked copies */ + *vcpu_spsr(vcpu) = cpsr; +} + /** * kvm_inject_undefined - inject an undefined exception into the guest * @vcpu: The VCPU to receive the undefined exception @@ -286,29 +320,13 @@ static u32 exc_vector_base(struct kvm_vcpu *vcpu) */ void kvm_inject_undefined(struct kvm_vcpu *vcpu) { - unsigned long new_lr_value; - unsigned long new_spsr_value; unsigned long cpsr = *vcpu_cpsr(vcpu); - u32 sctlr = vcpu->arch.cp15[c1_SCTLR]; bool is_thumb = (cpsr & PSR_T_BIT); u32 vect_offset = 4; u32 return_offset = (is_thumb) ? 2 : 4; - new_spsr_value = cpsr; - new_lr_value = *vcpu_pc(vcpu) - return_offset; - - *vcpu_cpsr(vcpu) = (cpsr & ~MODE_MASK) | UND_MODE; - *vcpu_cpsr(vcpu) |= PSR_I_BIT; - *vcpu_cpsr(vcpu) &= ~(PSR_IT_MASK | PSR_J_BIT | PSR_E_BIT | PSR_T_BIT); - - if (sctlr & SCTLR_TE) - *vcpu_cpsr(vcpu) |= PSR_T_BIT; - if (sctlr & SCTLR_EE) - *vcpu_cpsr(vcpu) |= PSR_E_BIT; - - /* Note: These now point to UND banked copies */ - *vcpu_spsr(vcpu) = cpsr; - *vcpu_reg(vcpu, 14) = new_lr_value; + kvm_update_psr(vcpu, UND_MODE); + *vcpu_reg(vcpu, 14) = *vcpu_pc(vcpu) - return_offset; /* Branch to exception vector */ *vcpu_pc(vcpu) = exc_vector_base(vcpu) + vect_offset; @@ -320,30 +338,14 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu) */ static void inject_abt(struct kvm_vcpu *vcpu, bool is_pabt, unsigned long addr) { - unsigned long new_lr_value; - unsigned long new_spsr_value; unsigned long cpsr = *vcpu_cpsr(vcpu); - u32 sctlr = vcpu->arch.cp15[c1_SCTLR]; bool is_thumb = (cpsr & PSR_T_BIT); u32 vect_offset; u32 return_offset = (is_thumb) ? 4 : 0; bool is_lpae; - new_spsr_value = cpsr; - new_lr_value = *vcpu_pc(vcpu) + return_offset; - - *vcpu_cpsr(vcpu) = (cpsr & ~MODE_MASK) | ABT_MODE; - *vcpu_cpsr(vcpu) |= PSR_I_BIT | PSR_A_BIT; - *vcpu_cpsr(vcpu) &= ~(PSR_IT_MASK | PSR_J_BIT | PSR_E_BIT | PSR_T_BIT); - - if (sctlr & SCTLR_TE) - *vcpu_cpsr(vcpu) |= PSR_T_BIT; - if (sctlr & SCTLR_EE) - *vcpu_cpsr(vcpu) |= PSR_E_BIT; - - /* Note: These now point to ABT banked copies */ - *vcpu_spsr(vcpu) = cpsr; - *vcpu_reg(vcpu, 14) = new_lr_value; + kvm_update_psr(vcpu, ABT_MODE); + *vcpu_reg(vcpu, 14) = *vcpu_pc(vcpu) + return_offset; if (is_pabt) vect_offset = 12; diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index 96e935bbc38c..5fa69d7bae58 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -33,6 +33,12 @@ #define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU } struct kvm_stats_debugfs_item debugfs_entries[] = { + VCPU_STAT(hvc_exit_stat), + VCPU_STAT(wfe_exit_stat), + VCPU_STAT(wfi_exit_stat), + VCPU_STAT(mmio_exit_user), + VCPU_STAT(mmio_exit_kernel), + VCPU_STAT(exits), { NULL } }; diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index 95f12b2ccdcb..3ede90d8b20b 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -42,6 +42,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) trace_kvm_hvc(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0), kvm_vcpu_hvc_get_imm(vcpu)); + vcpu->stat.hvc_exit_stat++; ret = kvm_psci_call(vcpu); if (ret < 0) { @@ -89,9 +90,11 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) { if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE) { trace_kvm_wfx(*vcpu_pc(vcpu), true); + vcpu->stat.wfe_exit_stat++; kvm_vcpu_on_spin(vcpu); } else { trace_kvm_wfx(*vcpu_pc(vcpu), false); + vcpu->stat.wfi_exit_stat++; kvm_vcpu_block(vcpu); } diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c index 3a10c9f1d0a4..7f33b2056ae6 100644 --- a/arch/arm/kvm/mmio.c +++ b/arch/arm/kvm/mmio.c @@ -210,8 +210,11 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, if (!ret) { /* We handled the access successfully in the kernel. */ + vcpu->stat.mmio_exit_kernel++; kvm_handle_mmio_return(vcpu, run); return 1; + } else { + vcpu->stat.mmio_exit_user++; } run->exit_reason = KVM_EXIT_MMIO; diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 61d96a645ff3..aba61fd3697a 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -656,9 +656,9 @@ static void *kvm_alloc_hwpgd(void) * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation. * @kvm: The KVM struct pointer for the VM. * - * Allocates the 1st level table only of size defined by S2_PGD_ORDER (can - * support either full 40-bit input addresses or limited to 32-bit input - * addresses). Clears the allocated pages. + * Allocates only the stage-2 HW PGD level table(s) (can support either full + * 40-bit input addresses or limited to 32-bit input addresses). Clears the + * allocated pages. * * Note we don't need locking here as this is only called when the VM is * created, which can only be done once. @@ -992,9 +992,9 @@ out: return ret; } -static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap) +static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap) { - pfn_t pfn = *pfnp; + kvm_pfn_t pfn = *pfnp; gfn_t gfn = *ipap >> PAGE_SHIFT; if (PageTransCompound(pfn_to_page(pfn))) { @@ -1201,7 +1201,7 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask); } -static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn, +static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, kvm_pfn_t pfn, unsigned long size, bool uncached) { __coherent_cache_guest_page(vcpu, pfn, size, uncached); @@ -1218,7 +1218,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm *kvm = vcpu->kvm; struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; struct vm_area_struct *vma; - pfn_t pfn; + kvm_pfn_t pfn; pgprot_t mem_type = PAGE_S2; bool fault_ipa_uncached; bool logging_active = memslot_is_logging(memslot); @@ -1346,7 +1346,7 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) { pmd_t *pmd; pte_t *pte; - pfn_t pfn; + kvm_pfn_t pfn; bool pfn_valid = false; trace_kvm_access_fault(fault_ipa); diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S index af2267f6a529..9397b2e532af 100644 --- a/arch/arm/lib/lib1funcs.S +++ b/arch/arm/lib/lib1funcs.S @@ -205,6 +205,10 @@ Boston, MA 02111-1307, USA. */ .endm +#ifdef CONFIG_ARM_PATCH_IDIV + .align 3 +#endif + ENTRY(__udivsi3) ENTRY(__aeabi_uidiv) UNWIND(.fnstart) @@ -253,6 +257,10 @@ UNWIND(.fnstart) UNWIND(.fnend) ENDPROC(__umodsi3) +#ifdef CONFIG_ARM_PATCH_IDIV + .align 3 +#endif + ENTRY(__divsi3) ENTRY(__aeabi_idiv) UNWIND(.fnstart) diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c index 588bbc288396..6bd1089b07e0 100644 --- a/arch/arm/lib/uaccess_with_memcpy.c +++ b/arch/arm/lib/uaccess_with_memcpy.c @@ -52,14 +52,13 @@ pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp) * * Lock the page table for the destination and check * to see that it's still huge and whether or not we will - * need to fault on write, or if we have a splitting THP. + * need to fault on write. */ if (unlikely(pmd_thp_or_huge(*pmd))) { ptl = ¤t->mm->page_table_lock; spin_lock(ptl); if (unlikely(!pmd_thp_or_huge(*pmd) - || pmd_hugewillfault(*pmd) - || pmd_trans_splitting(*pmd))) { + || pmd_hugewillfault(*pmd))) { spin_unlock(ptl); return 0; } diff --git a/arch/arm/mach-clps711x/board-autcpu12.c b/arch/arm/mach-clps711x/board-autcpu12.c index c3d964221767..ba3d7d1b28f8 100644 --- a/arch/arm/mach-clps711x/board-autcpu12.c +++ b/arch/arm/mach-clps711x/board-autcpu12.c @@ -31,7 +31,7 @@ #include <linux/mtd/partitions.h> #include <linux/mtd/nand-gpio.h> #include <linux/platform_device.h> -#include <linux/basic_mmio_gpio.h> +#include <linux/gpio/driver.h> #include <mach/hardware.h> #include <asm/sizes.h> diff --git a/arch/arm/mach-clps711x/board-p720t.c b/arch/arm/mach-clps711x/board-p720t.c index e68dd629bda2..80a16a8b3776 100644 --- a/arch/arm/mach-clps711x/board-p720t.c +++ b/arch/arm/mach-clps711x/board-p720t.c @@ -28,7 +28,7 @@ #include <linux/leds.h> #include <linux/sizes.h> #include <linux/backlight.h> -#include <linux/basic_mmio_gpio.h> +#include <linux/gpio/driver.h> #include <linux/platform_device.h> #include <linux/mtd/partitions.h> #include <linux/mtd/nand-gpio.h> diff --git a/arch/arm/mach-davinci/board-da850-evm.c b/arch/arm/mach-davinci/board-da850-evm.c index 1ed545cc2b83..9cc7b818fbf6 100644 --- a/arch/arm/mach-davinci/board-da850-evm.c +++ b/arch/arm/mach-davinci/board-da850-evm.c @@ -49,8 +49,8 @@ #include <asm/mach/arch.h> #include <asm/system_info.h> -#include <media/tvp514x.h> -#include <media/adv7343.h> +#include <media/i2c/tvp514x.h> +#include <media/i2c/adv7343.h> #define DA850_EVM_PHY_ID "davinci_mdio-0:00" #define DA850_LCD_PWR_PIN GPIO_TO_PIN(2, 8) diff --git a/arch/arm/mach-davinci/board-dm355-evm.c b/arch/arm/mach-davinci/board-dm355-evm.c index b46b4d25f93e..c71dd9982f03 100644 --- a/arch/arm/mach-davinci/board-dm355-evm.c +++ b/arch/arm/mach-davinci/board-dm355-evm.c @@ -19,7 +19,7 @@ #include <linux/gpio.h> #include <linux/clk.h> #include <linux/videodev2.h> -#include <media/tvp514x.h> +#include <media/i2c/tvp514x.h> #include <linux/spi/spi.h> #include <linux/spi/eeprom.h> #include <linux/platform_data/gpio-davinci.h> diff --git a/arch/arm/mach-davinci/board-dm365-evm.c b/arch/arm/mach-davinci/board-dm365-evm.c index a756003595e9..f073518f621a 100644 --- a/arch/arm/mach-davinci/board-dm365-evm.c +++ b/arch/arm/mach-davinci/board-dm365-evm.c @@ -40,8 +40,8 @@ #include <linux/platform_data/mtd-davinci.h> #include <linux/platform_data/keyscan-davinci.h> -#include <media/ths7303.h> -#include <media/tvp514x.h> +#include <media/i2c/ths7303.h> +#include <media/i2c/tvp514x.h> #include "davinci.h" diff --git a/arch/arm/mach-davinci/board-dm644x-evm.c b/arch/arm/mach-davinci/board-dm644x-evm.c index bbdd2d614b49..7a20507a3eef 100644 --- a/arch/arm/mach-davinci/board-dm644x-evm.c +++ b/arch/arm/mach-davinci/board-dm644x-evm.c @@ -26,7 +26,7 @@ #include <linux/v4l2-dv-timings.h> #include <linux/export.h> -#include <media/tvp514x.h> +#include <media/i2c/tvp514x.h> #include <asm/mach-types.h> #include <asm/mach/arch.h> diff --git a/arch/arm/mach-davinci/board-dm646x-evm.c b/arch/arm/mach-davinci/board-dm646x-evm.c index 846a84ddc28e..ee6ab7e8d3b0 100644 --- a/arch/arm/mach-davinci/board-dm646x-evm.c +++ b/arch/arm/mach-davinci/board-dm646x-evm.c @@ -25,8 +25,8 @@ #include <linux/platform_data/at24.h> #include <linux/i2c/pcf857x.h> -#include <media/tvp514x.h> -#include <media/adv7343.h> +#include <media/i2c/tvp514x.h> +#include <media/i2c/adv7343.h> #include <linux/mtd/mtd.h> #include <linux/mtd/nand.h> diff --git a/arch/arm/mach-ep93xx/snappercl15.c b/arch/arm/mach-ep93xx/snappercl15.c index c4904264256a..b2db791b3b38 100644 --- a/arch/arm/mach-ep93xx/snappercl15.c +++ b/arch/arm/mach-ep93xx/snappercl15.c @@ -49,7 +49,7 @@ static void snappercl15_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl) { - struct nand_chip *chip = mtd->priv; + struct nand_chip *chip = mtd_to_nand(mtd); static u16 nand_state = SNAPPERCL15_NAND_WPN; u16 set; @@ -76,7 +76,7 @@ static void snappercl15_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, static int snappercl15_nand_dev_ready(struct mtd_info *mtd) { - struct nand_chip *chip = mtd->priv; + struct nand_chip *chip = mtd_to_nand(mtd); return !!(__raw_readw(NAND_CTRL_ADDR(chip)) & SNAPPERCL15_NAND_RDY); } diff --git a/arch/arm/mach-ep93xx/ts72xx.c b/arch/arm/mach-ep93xx/ts72xx.c index 61f4b5dc4d7d..45b81a2bcd4b 100644 --- a/arch/arm/mach-ep93xx/ts72xx.c +++ b/arch/arm/mach-ep93xx/ts72xx.c @@ -74,7 +74,7 @@ static void __init ts72xx_map_io(void) static void ts72xx_nand_hwcontrol(struct mtd_info *mtd, int cmd, unsigned int ctrl) { - struct nand_chip *chip = mtd->priv; + struct nand_chip *chip = mtd_to_nand(mtd); if (ctrl & NAND_CTRL_CHANGE) { void __iomem *addr = chip->IO_ADDR_R; @@ -96,7 +96,7 @@ static void ts72xx_nand_hwcontrol(struct mtd_info *mtd, static int ts72xx_nand_device_ready(struct mtd_info *mtd) { - struct nand_chip *chip = mtd->priv; + struct nand_chip *chip = mtd_to_nand(mtd); void __iomem *addr = chip->IO_ADDR_R; addr += (1 << TS72XX_NAND_BUSY_ADDR_LINE); diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig index 3a10f1a8317a..ff105399aae4 100644 --- a/arch/arm/mach-exynos/Kconfig +++ b/arch/arm/mach-exynos/Kconfig @@ -27,6 +27,7 @@ menuconfig ARCH_EXYNOS select SRAM select THERMAL select MFD_SYSCON + select CLKSRC_EXYNOS_MCT help Support for SAMSUNG EXYNOS SoCs (EXYNOS4/5) diff --git a/arch/arm/mach-imx/devices/devices-common.h b/arch/arm/mach-imx/devices/devices-common.h index 67f7fb13050d..09cebd8cef2b 100644 --- a/arch/arm/mach-imx/devices/devices-common.h +++ b/arch/arm/mach-imx/devices/devices-common.h @@ -177,7 +177,7 @@ struct platform_device *__init imx_add_imx_uart_1irq( const struct imxuart_platform_data *pdata); #include <linux/platform_data/video-mx3fb.h> -#include <linux/platform_data/camera-mx3.h> +#include <linux/platform_data/media/camera-mx3.h> struct imx_ipu_core_data { resource_size_t iobase; resource_size_t synirq; @@ -192,7 +192,7 @@ struct platform_device *__init imx_add_mx3_sdc_fb( const struct imx_ipu_core_data *data, struct mx3fb_platform_data *pdata); -#include <linux/platform_data/camera-mx2.h> +#include <linux/platform_data/media/camera-mx2.h> struct imx_mx2_camera_data { const char *devid; resource_size_t iobasecsi; diff --git a/arch/arm/mach-imx/mach-mx21ads.c b/arch/arm/mach-imx/mach-mx21ads.c index 703ce31d7379..9986f9a697c8 100644 --- a/arch/arm/mach-imx/mach-mx21ads.c +++ b/arch/arm/mach-imx/mach-mx21ads.c @@ -17,7 +17,7 @@ #include <linux/platform_device.h> #include <linux/mtd/mtd.h> #include <linux/mtd/physmap.h> -#include <linux/basic_mmio_gpio.h> +#include <linux/gpio/driver.h> #include <linux/gpio.h> #include <linux/regulator/fixed.h> #include <linux/regulator/machine.h> diff --git a/arch/arm/mach-imx/mach-qong.c b/arch/arm/mach-imx/mach-qong.c index a213e7b9cb1c..5c2764604727 100644 --- a/arch/arm/mach-imx/mach-qong.c +++ b/arch/arm/mach-imx/mach-qong.c @@ -131,7 +131,7 @@ static void qong_init_nor_mtd(void) */ static void qong_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl) { - struct nand_chip *nand_chip = mtd->priv; + struct nand_chip *nand_chip = mtd_to_nand(mtd); if (cmd == NAND_CMD_NONE) return; diff --git a/arch/arm/mach-ixp4xx/ixdp425-setup.c b/arch/arm/mach-ixp4xx/ixdp425-setup.c index e7b8befa8729..508c2d7786e2 100644 --- a/arch/arm/mach-ixp4xx/ixdp425-setup.c +++ b/arch/arm/mach-ixp4xx/ixdp425-setup.c @@ -76,8 +76,8 @@ static struct mtd_partition ixdp425_partitions[] = { static void ixdp425_flash_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl) { - struct nand_chip *this = mtd->priv; - int offset = (int)this->priv; + struct nand_chip *this = mtd_to_nand(mtd); + int offset = (int)nand_get_controller_data(this); if (ctrl & NAND_CTRL_CHANGE) { if (ctrl & NAND_NCE) { @@ -88,7 +88,7 @@ ixdp425_flash_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl) offset = (ctrl & NAND_CLE) ? IXDP425_NAND_CMD_BYTE : 0; offset |= (ctrl & NAND_ALE) ? IXDP425_NAND_ADDR_BYTE : 0; - this->priv = (void *)offset; + nand_set_controller_data(this, (void *)offset); } if (cmd != NAND_CMD_NONE) diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c index a95499ea8706..97e66558c238 100644 --- a/arch/arm/mach-omap1/board-ams-delta.c +++ b/arch/arm/mach-omap1/board-ams-delta.c @@ -11,7 +11,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -#include <linux/basic_mmio_gpio.h> +#include <linux/gpio/driver.h> #include <linux/gpio.h> #include <linux/kernel.h> #include <linux/init.h> diff --git a/arch/arm/mach-omap1/board-nand.c b/arch/arm/mach-omap1/board-nand.c index 4d0835327d20..7684f9203474 100644 --- a/arch/arm/mach-omap1/board-nand.c +++ b/arch/arm/mach-omap1/board-nand.c @@ -22,7 +22,7 @@ void omap1_nand_cmd_ctl(struct mtd_info *mtd, int cmd, unsigned int ctrl) { - struct nand_chip *this = mtd->priv; + struct nand_chip *this = mtd_to_nand(mtd); unsigned long mask; if (cmd == NAND_CMD_NONE) diff --git a/arch/arm/mach-omap1/include/mach/camera.h b/arch/arm/mach-omap1/include/mach/camera.h index 847d00f0bb0a..caa6c0d6f0ac 100644 --- a/arch/arm/mach-omap1/include/mach/camera.h +++ b/arch/arm/mach-omap1/include/mach/camera.h @@ -1,7 +1,7 @@ #ifndef __ASM_ARCH_CAMERA_H_ #define __ASM_ARCH_CAMERA_H_ -#include <media/omap1_camera.h> +#include <linux/platform_data/media/omap1_camera.h> void omap1_camera_init(void *); diff --git a/arch/arm/mach-omap2/board-generic.c b/arch/arm/mach-omap2/board-generic.c index 04a56cc04dfa..809827265fb3 100644 --- a/arch/arm/mach-omap2/board-generic.c +++ b/arch/arm/mach-omap2/board-generic.c @@ -16,6 +16,7 @@ #include <linux/of_platform.h> #include <linux/irqdomain.h> +#include <asm/setup.h> #include <asm/mach/arch.h> #include "common.h" @@ -76,8 +77,17 @@ static const char *const n900_boards_compat[] __initconst = { NULL, }; +/* Legacy userspace on Nokia N900 needs ATAGS exported in /proc/atags, + * save them while the data is still not overwritten + */ +static void __init rx51_reserve(void) +{ + save_atags((const struct tag *)(PAGE_OFFSET + 0x100)); + omap_reserve(); +} + DT_MACHINE_START(OMAP3_N900_DT, "Nokia RX-51 board") - .reserve = omap_reserve, + .reserve = rx51_reserve, .map_io = omap3_map_io, .init_early = omap3430_init_early, .init_machine = omap_generic_init, diff --git a/arch/arm/mach-omap2/board-rx51-peripherals.c b/arch/arm/mach-omap2/board-rx51-peripherals.c index 14edcd7a2a1d..0a0567f8e8a0 100644 --- a/arch/arm/mach-omap2/board-rx51-peripherals.c +++ b/arch/arm/mach-omap2/board-rx51-peripherals.c @@ -39,7 +39,7 @@ #include <sound/tlv320aic3x.h> #include <sound/tpa6130a2-plat.h> -#include <media/si4713.h> +#include <linux/platform_data/media/si4713.h> #include <linux/platform_data/leds-lp55xx.h> #include <linux/platform_data/tsl2563.h> @@ -48,7 +48,7 @@ #include <video/omap-panel-data.h> #if defined(CONFIG_IR_RX51) || defined(CONFIG_IR_RX51_MODULE) -#include <media/ir-rx51.h> +#include <linux/platform_data/media/ir-rx51.h> #endif #include "mux.h" diff --git a/arch/arm/mach-omap2/clock.c b/arch/arm/mach-omap2/clock.c index acb60ed17273..d058125876d8 100644 --- a/arch/arm/mach-omap2/clock.c +++ b/arch/arm/mach-omap2/clock.c @@ -225,5 +225,9 @@ void __init ti_clk_init_features(void) if (omap_rev() == OMAP3430_REV_ES1_0) features.flags |= TI_CLK_DPLL4_DENY_REPROGRAM; + /* Errata I810 for omap5 / dra7 */ + if (soc_is_omap54xx() || soc_is_dra7xx()) + features.flags |= TI_CLK_ERRATA_I810; + ti_clk_setup_features(&features); } diff --git a/arch/arm/mach-orion5x/ts78xx-setup.c b/arch/arm/mach-orion5x/ts78xx-setup.c index 1b704d35cf5b..96cf6b51eddc 100644 --- a/arch/arm/mach-orion5x/ts78xx-setup.c +++ b/arch/arm/mach-orion5x/ts78xx-setup.c @@ -176,7 +176,7 @@ static void ts78xx_ts_rtc_unload(void) static void ts78xx_ts_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl) { - struct nand_chip *this = mtd->priv; + struct nand_chip *this = mtd_to_nand(mtd); if (ctrl & NAND_CTRL_CHANGE) { unsigned char bits; @@ -200,7 +200,7 @@ static int ts78xx_ts_nand_dev_ready(struct mtd_info *mtd) static void ts78xx_ts_nand_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len) { - struct nand_chip *chip = mtd->priv; + struct nand_chip *chip = mtd_to_nand(mtd); void __iomem *io_base = chip->IO_ADDR_W; unsigned long off = ((unsigned long)buf & 3); int sz; @@ -227,7 +227,7 @@ static void ts78xx_ts_nand_write_buf(struct mtd_info *mtd, static void ts78xx_ts_nand_read_buf(struct mtd_info *mtd, uint8_t *buf, int len) { - struct nand_chip *chip = mtd->priv; + struct nand_chip *chip = mtd_to_nand(mtd); void __iomem *io_base = chip->IO_ADDR_R; unsigned long off = ((unsigned long)buf & 3); int sz; diff --git a/arch/arm/mach-pxa/balloon3.c b/arch/arm/mach-pxa/balloon3.c index a727282bfa99..7734ec4f1385 100644 --- a/arch/arm/mach-pxa/balloon3.c +++ b/arch/arm/mach-pxa/balloon3.c @@ -572,7 +572,7 @@ static inline void balloon3_i2c_init(void) {} #if defined(CONFIG_MTD_NAND_PLATFORM)||defined(CONFIG_MTD_NAND_PLATFORM_MODULE) static void balloon3_nand_cmd_ctl(struct mtd_info *mtd, int cmd, unsigned int ctrl) { - struct nand_chip *this = mtd->priv; + struct nand_chip *this = mtd_to_nand(mtd); uint8_t balloon3_ctl_set = 0, balloon3_ctl_clr = 0; if (ctrl & NAND_CTRL_CHANGE) { diff --git a/arch/arm/mach-pxa/devices.c b/arch/arm/mach-pxa/devices.c index 2a6e0ae2b920..d1211a40f400 100644 --- a/arch/arm/mach-pxa/devices.c +++ b/arch/arm/mach-pxa/devices.c @@ -14,7 +14,7 @@ #include <mach/irqs.h> #include <linux/platform_data/usb-ohci-pxa27x.h> #include <linux/platform_data/keypad-pxa27x.h> -#include <linux/platform_data/camera-pxa.h> +#include <linux/platform_data/media/camera-pxa.h> #include <mach/audio.h> #include <mach/hardware.h> #include <linux/platform_data/mmp_dma.h> diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c index 9d7072b04045..2a76c4ef8d03 100644 --- a/arch/arm/mach-pxa/em-x270.c +++ b/arch/arm/mach-pxa/em-x270.c @@ -46,7 +46,7 @@ #include <linux/platform_data/usb-ohci-pxa27x.h> #include <linux/platform_data/mmc-pxamci.h> #include <linux/platform_data/keypad-pxa27x.h> -#include <linux/platform_data/camera-pxa.h> +#include <linux/platform_data/media/camera-pxa.h> #include "generic.h" #include "devices.h" @@ -289,7 +289,7 @@ static void nand_cs_off(void) static void em_x270_nand_cmd_ctl(struct mtd_info *mtd, int dat, unsigned int ctrl) { - struct nand_chip *this = mtd->priv; + struct nand_chip *this = mtd_to_nand(mtd); unsigned long nandaddr = (unsigned long)this->IO_ADDR_W; dsb(); diff --git a/arch/arm/mach-pxa/ezx.c b/arch/arm/mach-pxa/ezx.c index 7c0d5618be5e..cd6224032109 100644 --- a/arch/arm/mach-pxa/ezx.c +++ b/arch/arm/mach-pxa/ezx.c @@ -34,7 +34,7 @@ #include <linux/platform_data/usb-ohci-pxa27x.h> #include <mach/hardware.h> #include <linux/platform_data/keypad-pxa27x.h> -#include <linux/platform_data/camera-pxa.h> +#include <linux/platform_data/media/camera-pxa.h> #include "devices.h" #include "generic.h" diff --git a/arch/arm/mach-pxa/mioa701.c b/arch/arm/mach-pxa/mioa701.c index 3b52b1aa0659..ccfd2b63c6a4 100644 --- a/arch/arm/mach-pxa/mioa701.c +++ b/arch/arm/mach-pxa/mioa701.c @@ -54,7 +54,7 @@ #include <linux/platform_data/mmc-pxamci.h> #include <mach/udc.h> #include <mach/pxa27x-udc.h> -#include <linux/platform_data/camera-pxa.h> +#include <linux/platform_data/media/camera-pxa.h> #include <mach/audio.h> #include <mach/smemc.h> #include <media/soc_camera.h> diff --git a/arch/arm/mach-pxa/palmtreo.c b/arch/arm/mach-pxa/palmtreo.c index d8b937c870de..2dc56062fb7e 100644 --- a/arch/arm/mach-pxa/palmtreo.c +++ b/arch/arm/mach-pxa/palmtreo.c @@ -43,7 +43,7 @@ #include <linux/platform_data/usb-ohci-pxa27x.h> #include <mach/pxa2xx-regs.h> #include <linux/platform_data/asoc-palm27x.h> -#include <linux/platform_data/camera-pxa.h> +#include <linux/platform_data/media/camera-pxa.h> #include <mach/palm27x.h> #include <sound/pxa2xx-lib.h> diff --git a/arch/arm/mach-pxa/palmtx.c b/arch/arm/mach-pxa/palmtx.c index 83f830dd8ad8..d787dd17f6b2 100644 --- a/arch/arm/mach-pxa/palmtx.c +++ b/arch/arm/mach-pxa/palmtx.c @@ -250,7 +250,7 @@ static inline void palmtx_keys_init(void) {} static void palmtx_nand_cmd_ctl(struct mtd_info *mtd, int cmd, unsigned int ctrl) { - struct nand_chip *this = mtd->priv; + struct nand_chip *this = mtd_to_nand(mtd); char __iomem *nandaddr = this->IO_ADDR_W; if (cmd == NAND_CMD_NONE) diff --git a/arch/arm/mach-pxa/palmz72.c b/arch/arm/mach-pxa/palmz72.c index 1a35ddf218da..e3df17a7e8d4 100644 --- a/arch/arm/mach-pxa/palmz72.c +++ b/arch/arm/mach-pxa/palmz72.c @@ -49,7 +49,7 @@ #include <mach/palm27x.h> #include <mach/pm.h> -#include <linux/platform_data/camera-pxa.h> +#include <linux/platform_data/media/camera-pxa.h> #include <media/soc_camera.h> diff --git a/arch/arm/mach-pxa/pcm990-baseboard.c b/arch/arm/mach-pxa/pcm990-baseboard.c index b71c96f614f9..8459239a093c 100644 --- a/arch/arm/mach-pxa/pcm990-baseboard.c +++ b/arch/arm/mach-pxa/pcm990-baseboard.c @@ -27,10 +27,10 @@ #include <linux/pwm.h> #include <linux/pwm_backlight.h> -#include <media/mt9v022.h> +#include <media/i2c/mt9v022.h> #include <media/soc_camera.h> -#include <linux/platform_data/camera-pxa.h> +#include <linux/platform_data/media/camera-pxa.h> #include <asm/mach/map.h> #include <mach/pxa27x.h> #include <mach/audio.h> diff --git a/arch/arm/mach-s3c64xx/dev-audio.c b/arch/arm/mach-s3c64xx/dev-audio.c index ff780a8d8366..b57783371d52 100644 --- a/arch/arm/mach-s3c64xx/dev-audio.c +++ b/arch/arm/mach-s3c64xx/dev-audio.c @@ -54,12 +54,13 @@ static int s3c64xx_i2s_cfg_gpio(struct platform_device *pdev) static struct resource s3c64xx_iis0_resource[] = { [0] = DEFINE_RES_MEM(S3C64XX_PA_IIS0, SZ_256), - [1] = DEFINE_RES_DMA(DMACH_I2S0_OUT), - [2] = DEFINE_RES_DMA(DMACH_I2S0_IN), }; -static struct s3c_audio_pdata i2sv3_pdata = { +static struct s3c_audio_pdata i2s0_pdata = { .cfg_gpio = s3c64xx_i2s_cfg_gpio, + .dma_filter = pl08x_filter_id, + .dma_playback = DMACH_I2S0_OUT, + .dma_capture = DMACH_I2S0_IN, }; struct platform_device s3c64xx_device_iis0 = { @@ -68,15 +69,20 @@ struct platform_device s3c64xx_device_iis0 = { .num_resources = ARRAY_SIZE(s3c64xx_iis0_resource), .resource = s3c64xx_iis0_resource, .dev = { - .platform_data = &i2sv3_pdata, + .platform_data = &i2s0_pdata, }, }; EXPORT_SYMBOL(s3c64xx_device_iis0); static struct resource s3c64xx_iis1_resource[] = { [0] = DEFINE_RES_MEM(S3C64XX_PA_IIS1, SZ_256), - [1] = DEFINE_RES_DMA(DMACH_I2S1_OUT), - [2] = DEFINE_RES_DMA(DMACH_I2S1_IN), +}; + +static struct s3c_audio_pdata i2s1_pdata = { + .cfg_gpio = s3c64xx_i2s_cfg_gpio, + .dma_filter = pl08x_filter_id, + .dma_playback = DMACH_I2S1_OUT, + .dma_capture = DMACH_I2S1_IN, }; struct platform_device s3c64xx_device_iis1 = { @@ -85,19 +91,20 @@ struct platform_device s3c64xx_device_iis1 = { .num_resources = ARRAY_SIZE(s3c64xx_iis1_resource), .resource = s3c64xx_iis1_resource, .dev = { - .platform_data = &i2sv3_pdata, + .platform_data = &i2s1_pdata, }, }; EXPORT_SYMBOL(s3c64xx_device_iis1); static struct resource s3c64xx_iisv4_resource[] = { [0] = DEFINE_RES_MEM(S3C64XX_PA_IISV4, SZ_256), - [1] = DEFINE_RES_DMA(DMACH_HSI_I2SV40_TX), - [2] = DEFINE_RES_DMA(DMACH_HSI_I2SV40_RX), }; static struct s3c_audio_pdata i2sv4_pdata = { .cfg_gpio = s3c64xx_i2s_cfg_gpio, + .dma_filter = pl08x_filter_id, + .dma_playback = DMACH_HSI_I2SV40_TX, + .dma_capture = DMACH_HSI_I2SV40_RX, .type = { .i2s = { .quirks = QUIRK_PRI_6CHAN, @@ -142,12 +149,13 @@ static int s3c64xx_pcm_cfg_gpio(struct platform_device *pdev) static struct resource s3c64xx_pcm0_resource[] = { [0] = DEFINE_RES_MEM(S3C64XX_PA_PCM0, SZ_256), - [1] = DEFINE_RES_DMA(DMACH_PCM0_TX), - [2] = DEFINE_RES_DMA(DMACH_PCM0_RX), }; static struct s3c_audio_pdata s3c_pcm0_pdata = { .cfg_gpio = s3c64xx_pcm_cfg_gpio, + .dma_filter = pl08x_filter_id, + .dma_capture = DMACH_PCM0_RX, + .dma_playback = DMACH_PCM0_TX, }; struct platform_device s3c64xx_device_pcm0 = { @@ -163,12 +171,13 @@ EXPORT_SYMBOL(s3c64xx_device_pcm0); static struct resource s3c64xx_pcm1_resource[] = { [0] = DEFINE_RES_MEM(S3C64XX_PA_PCM1, SZ_256), - [1] = DEFINE_RES_DMA(DMACH_PCM1_TX), - [2] = DEFINE_RES_DMA(DMACH_PCM1_RX), }; static struct s3c_audio_pdata s3c_pcm1_pdata = { .cfg_gpio = s3c64xx_pcm_cfg_gpio, + .dma_filter = pl08x_filter_id, + .dma_playback = DMACH_PCM1_TX, + .dma_capture = DMACH_PCM1_RX, }; struct platform_device s3c64xx_device_pcm1 = { @@ -196,13 +205,15 @@ static int s3c64xx_ac97_cfg_gpe(struct platform_device *pdev) static struct resource s3c64xx_ac97_resource[] = { [0] = DEFINE_RES_MEM(S3C64XX_PA_AC97, SZ_256), - [1] = DEFINE_RES_DMA(DMACH_AC97_PCMOUT), - [2] = DEFINE_RES_DMA(DMACH_AC97_PCMIN), - [3] = DEFINE_RES_DMA(DMACH_AC97_MICIN), - [4] = DEFINE_RES_IRQ(IRQ_AC97), + [1] = DEFINE_RES_IRQ(IRQ_AC97), }; -static struct s3c_audio_pdata s3c_ac97_pdata; +static struct s3c_audio_pdata s3c_ac97_pdata = { + .dma_playback = DMACH_AC97_PCMOUT, + .dma_filter = pl08x_filter_id, + .dma_capture = DMACH_AC97_PCMIN, + .dma_capture_mic = DMACH_AC97_MICIN, +}; static u64 s3c64xx_ac97_dmamask = DMA_BIT_MASK(32); diff --git a/arch/arm/mach-s3c64xx/include/mach/dma.h b/arch/arm/mach-s3c64xx/include/mach/dma.h index 096e14073bd9..9c739eafe95c 100644 --- a/arch/arm/mach-s3c64xx/include/mach/dma.h +++ b/arch/arm/mach-s3c64xx/include/mach/dma.h @@ -14,38 +14,38 @@ #define S3C64XX_DMA_CHAN(name) ((unsigned long)(name)) /* DMA0/SDMA0 */ -#define DMACH_UART0 S3C64XX_DMA_CHAN("uart0_tx") -#define DMACH_UART0_SRC2 S3C64XX_DMA_CHAN("uart0_rx") -#define DMACH_UART1 S3C64XX_DMA_CHAN("uart1_tx") -#define DMACH_UART1_SRC2 S3C64XX_DMA_CHAN("uart1_rx") -#define DMACH_UART2 S3C64XX_DMA_CHAN("uart2_tx") -#define DMACH_UART2_SRC2 S3C64XX_DMA_CHAN("uart2_rx") -#define DMACH_UART3 S3C64XX_DMA_CHAN("uart3_tx") -#define DMACH_UART3_SRC2 S3C64XX_DMA_CHAN("uart3_rx") -#define DMACH_PCM0_TX S3C64XX_DMA_CHAN("pcm0_tx") -#define DMACH_PCM0_RX S3C64XX_DMA_CHAN("pcm0_rx") -#define DMACH_I2S0_OUT S3C64XX_DMA_CHAN("i2s0_tx") -#define DMACH_I2S0_IN S3C64XX_DMA_CHAN("i2s0_rx") +#define DMACH_UART0 "uart0_tx" +#define DMACH_UART0_SRC2 "uart0_rx" +#define DMACH_UART1 "uart1_tx" +#define DMACH_UART1_SRC2 "uart1_rx" +#define DMACH_UART2 "uart2_tx" +#define DMACH_UART2_SRC2 "uart2_rx" +#define DMACH_UART3 "uart3_tx" +#define DMACH_UART3_SRC2 "uart3_rx" +#define DMACH_PCM0_TX "pcm0_tx" +#define DMACH_PCM0_RX "pcm0_rx" +#define DMACH_I2S0_OUT "i2s0_tx" +#define DMACH_I2S0_IN "i2s0_rx" #define DMACH_SPI0_TX S3C64XX_DMA_CHAN("spi0_tx") #define DMACH_SPI0_RX S3C64XX_DMA_CHAN("spi0_rx") -#define DMACH_HSI_I2SV40_TX S3C64XX_DMA_CHAN("i2s2_tx") -#define DMACH_HSI_I2SV40_RX S3C64XX_DMA_CHAN("i2s2_rx") +#define DMACH_HSI_I2SV40_TX "i2s2_tx" +#define DMACH_HSI_I2SV40_RX "i2s2_rx" /* DMA1/SDMA1 */ -#define DMACH_PCM1_TX S3C64XX_DMA_CHAN("pcm1_tx") -#define DMACH_PCM1_RX S3C64XX_DMA_CHAN("pcm1_rx") -#define DMACH_I2S1_OUT S3C64XX_DMA_CHAN("i2s1_tx") -#define DMACH_I2S1_IN S3C64XX_DMA_CHAN("i2s1_rx") +#define DMACH_PCM1_TX "pcm1_tx" +#define DMACH_PCM1_RX "pcm1_rx" +#define DMACH_I2S1_OUT "i2s1_tx" +#define DMACH_I2S1_IN "i2s1_rx" #define DMACH_SPI1_TX S3C64XX_DMA_CHAN("spi1_tx") #define DMACH_SPI1_RX S3C64XX_DMA_CHAN("spi1_rx") -#define DMACH_AC97_PCMOUT S3C64XX_DMA_CHAN("ac97_out") -#define DMACH_AC97_PCMIN S3C64XX_DMA_CHAN("ac97_in") -#define DMACH_AC97_MICIN S3C64XX_DMA_CHAN("ac97_mic") -#define DMACH_PWM S3C64XX_DMA_CHAN("pwm") -#define DMACH_IRDA S3C64XX_DMA_CHAN("irda") -#define DMACH_EXTERNAL S3C64XX_DMA_CHAN("external") -#define DMACH_SECURITY_RX S3C64XX_DMA_CHAN("sec_rx") -#define DMACH_SECURITY_TX S3C64XX_DMA_CHAN("sec_tx") +#define DMACH_AC97_PCMOUT "ac97_out" +#define DMACH_AC97_PCMIN "ac97_in" +#define DMACH_AC97_MICIN "ac97_mic" +#define DMACH_PWM "pwm" +#define DMACH_IRDA "irda" +#define DMACH_EXTERNAL "external" +#define DMACH_SECURITY_RX "sec_rx" +#define DMACH_SECURITY_TX "sec_tx" enum dma_ch { DMACH_MAX = 32 diff --git a/arch/arm/mach-s3c64xx/mach-crag6410.c b/arch/arm/mach-s3c64xx/mach-crag6410.c index f776adcdaee8..723f47fefc81 100644 --- a/arch/arm/mach-s3c64xx/mach-crag6410.c +++ b/arch/arm/mach-s3c64xx/mach-crag6410.c @@ -29,7 +29,7 @@ #include <linux/pwm_backlight.h> #include <linux/dm9000.h> #include <linux/gpio_keys.h> -#include <linux/basic_mmio_gpio.h> +#include <linux/gpio/driver.h> #include <linux/spi/spi.h> #include <linux/platform_data/pca953x.h> diff --git a/arch/arm/mach-sti/Kconfig b/arch/arm/mach-sti/Kconfig index 125865daaf17..12dd1dc0a041 100644 --- a/arch/arm/mach-sti/Kconfig +++ b/arch/arm/mach-sti/Kconfig @@ -3,6 +3,7 @@ menuconfig ARCH_STI select ARM_GIC select ST_IRQCHIP select ARM_GLOBAL_TIMER + select CLKSRC_ST_LPC select PINCTRL select PINCTRL_ST select MFD_SYSCON diff --git a/arch/arm/mach-ux500/Kconfig b/arch/arm/mach-ux500/Kconfig index c9ac19b24e5a..5eacdd61e61c 100644 --- a/arch/arm/mach-ux500/Kconfig +++ b/arch/arm/mach-ux500/Kconfig @@ -32,6 +32,7 @@ config UX500_SOC_DB8500 select PINCTRL_AB8540 select REGULATOR select REGULATOR_DB8500_PRCMU + select CLKSRC_DBX500_PRCMU select PM_GENERIC_DOMAINS if PM config MACH_MOP500 diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index 493692d838c6..9f9d54271aad 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -790,7 +790,7 @@ static const struct l2c_init_data l2c310_init_fns __initconst = { }; static int __init __l2c_init(const struct l2c_init_data *data, - u32 aux_val, u32 aux_mask, u32 cache_id) + u32 aux_val, u32 aux_mask, u32 cache_id, bool nosync) { struct outer_cache_fns fns; unsigned way_size_bits, ways; @@ -866,6 +866,10 @@ static int __init __l2c_init(const struct l2c_init_data *data, fns.configure = outer_cache.configure; if (data->fixup) data->fixup(l2x0_base, cache_id, &fns); + if (nosync) { + pr_info("L2C: disabling outer sync\n"); + fns.sync = NULL; + } /* * Check if l2x0 controller is already enabled. If we are booting @@ -925,7 +929,7 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask) if (data->save) data->save(l2x0_base); - __l2c_init(data, aux_val, aux_mask, cache_id); + __l2c_init(data, aux_val, aux_mask, cache_id, false); } #ifdef CONFIG_OF @@ -1060,6 +1064,18 @@ static void __init l2x0_of_parse(const struct device_node *np, val |= (dirty - 1) << L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT; } + if (of_property_read_bool(np, "arm,parity-enable")) { + mask &= ~L2C_AUX_CTRL_PARITY_ENABLE; + val |= L2C_AUX_CTRL_PARITY_ENABLE; + } else if (of_property_read_bool(np, "arm,parity-disable")) { + mask &= ~L2C_AUX_CTRL_PARITY_ENABLE; + } + + if (of_property_read_bool(np, "arm,shared-override")) { + mask &= ~L2C_AUX_CTRL_SHARED_OVERRIDE; + val |= L2C_AUX_CTRL_SHARED_OVERRIDE; + } + ret = l2x0_cache_size_of_parse(np, aux_val, aux_mask, &assoc, SZ_256K); if (ret) return; @@ -1176,6 +1192,14 @@ static void __init l2c310_of_parse(const struct device_node *np, *aux_mask &= ~L2C_AUX_CTRL_SHARED_OVERRIDE; } + if (of_property_read_bool(np, "arm,parity-enable")) { + *aux_val |= L2C_AUX_CTRL_PARITY_ENABLE; + *aux_mask &= ~L2C_AUX_CTRL_PARITY_ENABLE; + } else if (of_property_read_bool(np, "arm,parity-disable")) { + *aux_val &= ~L2C_AUX_CTRL_PARITY_ENABLE; + *aux_mask &= ~L2C_AUX_CTRL_PARITY_ENABLE; + } + prefetch = l2x0_saved_regs.prefetch_ctrl; ret = of_property_read_u32(np, "arm,double-linefill", &val); @@ -1704,6 +1728,7 @@ int __init l2x0_of_init(u32 aux_val, u32 aux_mask) struct resource res; u32 cache_id, old_aux; u32 cache_level = 2; + bool nosync = false; np = of_find_matching_node(NULL, l2x0_ids); if (!np) @@ -1742,6 +1767,8 @@ int __init l2x0_of_init(u32 aux_val, u32 aux_mask) if (cache_level != 2) pr_err("L2C: device tree specifies invalid cache level\n"); + nosync = of_property_read_bool(np, "arm,outer-sync-disable"); + /* Read back current (default) hardware configuration */ if (data->save) data->save(l2x0_base); @@ -1756,6 +1783,6 @@ int __init l2x0_of_init(u32 aux_val, u32 aux_mask) else cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID); - return __l2c_init(data, aux_val, aux_mask, cache_id); + return __l2c_init(data, aux_val, aux_mask, cache_id, nosync); } #endif diff --git a/arch/arm/mm/cache-uniphier.c b/arch/arm/mm/cache-uniphier.c index 0502ba17a3ab..a6fa7b73fbe0 100644 --- a/arch/arm/mm/cache-uniphier.c +++ b/arch/arm/mm/cache-uniphier.c @@ -377,17 +377,6 @@ static const struct of_device_id uniphier_cache_match[] __initconst = { { /* sentinel */ } }; -static struct device_node * __init uniphier_cache_get_next_level_node( - struct device_node *np) -{ - u32 phandle; - - if (of_property_read_u32(np, "next-level-cache", &phandle)) - return NULL; - - return of_find_node_by_phandle(phandle); -} - static int __init __uniphier_cache_init(struct device_node *np, unsigned int *cache_level) { @@ -491,7 +480,7 @@ static int __init __uniphier_cache_init(struct device_node *np, * next level cache fails because we want to continue with available * cache levels. */ - next_np = uniphier_cache_get_next_level_node(np); + next_np = of_find_next_cache_node(np); if (next_np) { (*cache_level)++; ret = __uniphier_cache_init(next_np, cache_level); diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index 1ec8e7590fc6..d0ba3551d49a 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -330,7 +330,7 @@ void flush_dcache_page(struct page *page) mapping = page_mapping(page); if (!cache_ops_need_broadcast() && - mapping && !page_mapped(page)) + mapping && !page_mapcount(page)) clear_bit(PG_dcache_clean, &page->flags); else { __flush_dcache_page(mapping, page); @@ -415,18 +415,3 @@ void __flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned l */ __cpuc_flush_dcache_area(page_address(page), PAGE_SIZE); } - -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -#ifdef CONFIG_HAVE_RCU_TABLE_FREE -void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, - pmd_t *pmdp) -{ - pmd_t pmd = pmd_mksplitting(*pmdp); - VM_BUG_ON(address & ~PMD_MASK); - set_pmd_at(vma->vm_mm, address, pmdp, pmd); - - /* dummy IPI to serialise against fast_gup */ - kick_all_cpus_sync(); -} -#endif /* CONFIG_HAVE_RCU_TABLE_FREE */ -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 7f8cd1b3557f..49bd08178008 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -192,7 +192,7 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max_low, #ifdef CONFIG_HAVE_ARCH_PFN_VALID int pfn_valid(unsigned long pfn) { - return memblock_is_memory(__pfn_to_phys(pfn)); + return memblock_is_map_memory(__pfn_to_phys(pfn)); } EXPORT_SYMBOL(pfn_valid); #endif @@ -433,6 +433,9 @@ static void __init free_highpages(void) if (end <= max_low) continue; + if (memblock_is_nomap(mem)) + continue; + /* Truncate partial highmem entries */ if (start < max_low) start = max_low; diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index 0c81056c1dd7..66a978d05958 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -30,6 +30,7 @@ #include <asm/cp15.h> #include <asm/cputype.h> #include <asm/cacheflush.h> +#include <asm/early_ioremap.h> #include <asm/mmu_context.h> #include <asm/pgalloc.h> #include <asm/tlbflush.h> @@ -469,3 +470,11 @@ int pci_ioremap_io(unsigned int offset, phys_addr_t phys_addr) } EXPORT_SYMBOL_GPL(pci_ioremap_io); #endif + +/* + * Must be called after early_fixmap_init + */ +void __init early_ioremap_init(void) +{ + early_ioremap_setup(); +} diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index 407dc786583a..4b4058db0781 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c @@ -173,8 +173,7 @@ unsigned long arch_mmap_rnd(void) { unsigned long rnd; - /* 8 bits of randomness in 20 address space bits */ - rnd = (unsigned long)get_random_int() % (1 << 8); + rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1); return rnd << PAGE_SHIFT; } diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 4867f5daf82c..a87f6cc3fa2b 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -390,7 +390,7 @@ void __init early_fixmap_init(void) * The early fixmap range spans multiple pmds, for which * we are not prepared: */ - BUILD_BUG_ON((__fix_to_virt(__end_of_permanent_fixed_addresses) >> PMD_SHIFT) + BUILD_BUG_ON((__fix_to_virt(__end_of_early_ioremap_region) >> PMD_SHIFT) != FIXADDR_TOP >> PMD_SHIFT); pmd = fixmap_pmd(FIXADDR_TOP); @@ -572,7 +572,7 @@ static void __init build_mem_type_table(void) * in the Short-descriptor translation table format descriptors. */ if (cpu_arch == CPU_ARCH_ARMv7 && - (read_cpuid_ext(CPUID_EXT_MMFR0) & 0xF) == 4) { + (read_cpuid_ext(CPUID_EXT_MMFR0) & 0xF) >= 4) { user_pmd_table |= PMD_PXNTABLE; } #endif @@ -724,30 +724,49 @@ static void __init *early_alloc(unsigned long sz) return early_alloc_aligned(sz, sz); } -static pte_t * __init early_pte_alloc(pmd_t *pmd, unsigned long addr, unsigned long prot) +static void *__init late_alloc(unsigned long sz) +{ + void *ptr = (void *)__get_free_pages(PGALLOC_GFP, get_order(sz)); + + BUG_ON(!ptr); + return ptr; +} + +static pte_t * __init pte_alloc(pmd_t *pmd, unsigned long addr, + unsigned long prot, + void *(*alloc)(unsigned long sz)) { if (pmd_none(*pmd)) { - pte_t *pte = early_alloc(PTE_HWTABLE_OFF + PTE_HWTABLE_SIZE); + pte_t *pte = alloc(PTE_HWTABLE_OFF + PTE_HWTABLE_SIZE); __pmd_populate(pmd, __pa(pte), prot); } BUG_ON(pmd_bad(*pmd)); return pte_offset_kernel(pmd, addr); } +static pte_t * __init early_pte_alloc(pmd_t *pmd, unsigned long addr, + unsigned long prot) +{ + return pte_alloc(pmd, addr, prot, early_alloc); +} + static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr, unsigned long end, unsigned long pfn, - const struct mem_type *type) + const struct mem_type *type, + void *(*alloc)(unsigned long sz), + bool ng) { - pte_t *pte = early_pte_alloc(pmd, addr, type->prot_l1); + pte_t *pte = pte_alloc(pmd, addr, type->prot_l1, alloc); do { - set_pte_ext(pte, pfn_pte(pfn, __pgprot(type->prot_pte)), 0); + set_pte_ext(pte, pfn_pte(pfn, __pgprot(type->prot_pte)), + ng ? PTE_EXT_NG : 0); pfn++; } while (pte++, addr += PAGE_SIZE, addr != end); } static void __init __map_init_section(pmd_t *pmd, unsigned long addr, unsigned long end, phys_addr_t phys, - const struct mem_type *type) + const struct mem_type *type, bool ng) { pmd_t *p = pmd; @@ -765,7 +784,7 @@ static void __init __map_init_section(pmd_t *pmd, unsigned long addr, pmd++; #endif do { - *pmd = __pmd(phys | type->prot_sect); + *pmd = __pmd(phys | type->prot_sect | (ng ? PMD_SECT_nG : 0)); phys += SECTION_SIZE; } while (pmd++, addr += SECTION_SIZE, addr != end); @@ -774,7 +793,8 @@ static void __init __map_init_section(pmd_t *pmd, unsigned long addr, static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, phys_addr_t phys, - const struct mem_type *type) + const struct mem_type *type, + void *(*alloc)(unsigned long sz), bool ng) { pmd_t *pmd = pmd_offset(pud, addr); unsigned long next; @@ -792,10 +812,10 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, */ if (type->prot_sect && ((addr | next | phys) & ~SECTION_MASK) == 0) { - __map_init_section(pmd, addr, next, phys, type); + __map_init_section(pmd, addr, next, phys, type, ng); } else { alloc_init_pte(pmd, addr, next, - __phys_to_pfn(phys), type); + __phys_to_pfn(phys), type, alloc, ng); } phys += next - addr; @@ -805,21 +825,24 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, phys_addr_t phys, - const struct mem_type *type) + const struct mem_type *type, + void *(*alloc)(unsigned long sz), bool ng) { pud_t *pud = pud_offset(pgd, addr); unsigned long next; do { next = pud_addr_end(addr, end); - alloc_init_pmd(pud, addr, next, phys, type); + alloc_init_pmd(pud, addr, next, phys, type, alloc, ng); phys += next - addr; } while (pud++, addr = next, addr != end); } #ifndef CONFIG_ARM_LPAE -static void __init create_36bit_mapping(struct map_desc *md, - const struct mem_type *type) +static void __init create_36bit_mapping(struct mm_struct *mm, + struct map_desc *md, + const struct mem_type *type, + bool ng) { unsigned long addr, length, end; phys_addr_t phys; @@ -859,7 +882,7 @@ static void __init create_36bit_mapping(struct map_desc *md, */ phys |= (((md->pfn >> (32 - PAGE_SHIFT)) & 0xF) << 20); - pgd = pgd_offset_k(addr); + pgd = pgd_offset(mm, addr); end = addr + length; do { pud_t *pud = pud_offset(pgd, addr); @@ -867,7 +890,8 @@ static void __init create_36bit_mapping(struct map_desc *md, int i; for (i = 0; i < 16; i++) - *pmd++ = __pmd(phys | type->prot_sect | PMD_SECT_SUPER); + *pmd++ = __pmd(phys | type->prot_sect | PMD_SECT_SUPER | + (ng ? PMD_SECT_nG : 0)); addr += SUPERSECTION_SIZE; phys += SUPERSECTION_SIZE; @@ -876,33 +900,15 @@ static void __init create_36bit_mapping(struct map_desc *md, } #endif /* !CONFIG_ARM_LPAE */ -/* - * Create the page directory entries and any necessary - * page tables for the mapping specified by `md'. We - * are able to cope here with varying sizes and address - * offsets, and we take full advantage of sections and - * supersections. - */ -static void __init create_mapping(struct map_desc *md) +static void __init __create_mapping(struct mm_struct *mm, struct map_desc *md, + void *(*alloc)(unsigned long sz), + bool ng) { unsigned long addr, length, end; phys_addr_t phys; const struct mem_type *type; pgd_t *pgd; - if (md->virtual != vectors_base() && md->virtual < TASK_SIZE) { - pr_warn("BUG: not creating mapping for 0x%08llx at 0x%08lx in user region\n", - (long long)__pfn_to_phys((u64)md->pfn), md->virtual); - return; - } - - if ((md->type == MT_DEVICE || md->type == MT_ROM) && - md->virtual >= PAGE_OFFSET && md->virtual < FIXADDR_START && - (md->virtual < VMALLOC_START || md->virtual >= VMALLOC_END)) { - pr_warn("BUG: mapping for 0x%08llx at 0x%08lx out of vmalloc space\n", - (long long)__pfn_to_phys((u64)md->pfn), md->virtual); - } - type = &mem_types[md->type]; #ifndef CONFIG_ARM_LPAE @@ -910,7 +916,7 @@ static void __init create_mapping(struct map_desc *md) * Catch 36-bit addresses */ if (md->pfn >= 0x100000) { - create_36bit_mapping(md, type); + create_36bit_mapping(mm, md, type, ng); return; } #endif @@ -925,12 +931,12 @@ static void __init create_mapping(struct map_desc *md) return; } - pgd = pgd_offset_k(addr); + pgd = pgd_offset(mm, addr); end = addr + length; do { unsigned long next = pgd_addr_end(addr, end); - alloc_init_pud(pgd, addr, next, phys, type); + alloc_init_pud(pgd, addr, next, phys, type, alloc, ng); phys += next - addr; addr = next; @@ -938,6 +944,43 @@ static void __init create_mapping(struct map_desc *md) } /* + * Create the page directory entries and any necessary + * page tables for the mapping specified by `md'. We + * are able to cope here with varying sizes and address + * offsets, and we take full advantage of sections and + * supersections. + */ +static void __init create_mapping(struct map_desc *md) +{ + if (md->virtual != vectors_base() && md->virtual < TASK_SIZE) { + pr_warn("BUG: not creating mapping for 0x%08llx at 0x%08lx in user region\n", + (long long)__pfn_to_phys((u64)md->pfn), md->virtual); + return; + } + + if ((md->type == MT_DEVICE || md->type == MT_ROM) && + md->virtual >= PAGE_OFFSET && md->virtual < FIXADDR_START && + (md->virtual < VMALLOC_START || md->virtual >= VMALLOC_END)) { + pr_warn("BUG: mapping for 0x%08llx at 0x%08lx out of vmalloc space\n", + (long long)__pfn_to_phys((u64)md->pfn), md->virtual); + } + + __create_mapping(&init_mm, md, early_alloc, false); +} + +void __init create_mapping_late(struct mm_struct *mm, struct map_desc *md, + bool ng) +{ +#ifdef CONFIG_ARM_LPAE + pud_t *pud = pud_alloc(mm, pgd_offset(mm, md->virtual), md->virtual); + if (WARN_ON(!pud)) + return; + pmd_alloc(mm, pud, 0); +#endif + __create_mapping(mm, md, late_alloc, ng); +} + +/* * Create the architecture specific mappings */ void __init iotable_init(struct map_desc *io_desc, int nr) @@ -1392,6 +1435,9 @@ static void __init map_lowmem(void) phys_addr_t end = start + reg->size; struct map_desc map; + if (memblock_is_nomap(reg)) + continue; + if (end > arm_lowmem_limit) end = arm_lowmem_limit; if (start >= end) diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 8e1ea433c3f1..0f92d575a304 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -274,10 +274,12 @@ __v7_ca15mp_setup: __v7_b15mp_setup: __v7_ca17mp_setup: mov r10, #0 -1: adr r12, __v7_setup_stack @ the local stack - stmia r12, {r0-r5, lr} @ v7_invalidate_l1 touches r0-r6 +1: adr r0, __v7_setup_stack_ptr + ldr r12, [r0] + add r12, r12, r0 @ the local stack + stmia r12, {r1-r6, lr} @ v7_invalidate_l1 touches r0-r6 bl v7_invalidate_l1 - ldmia r12, {r0-r5, lr} + ldmia r12, {r1-r6, lr} #ifdef CONFIG_SMP ALT_SMP(mrc p15, 0, r0, c1, c0, 1) ALT_UP(mov r0, #(1 << 6)) @ fake it for UP @@ -415,10 +417,12 @@ __v7_pj4b_setup: #endif /* CONFIG_CPU_PJ4B */ __v7_setup: - adr r12, __v7_setup_stack @ the local stack - stmia r12, {r0-r5, lr} @ v7_invalidate_l1 touches r0-r6 + adr r0, __v7_setup_stack_ptr + ldr r12, [r0] + add r12, r12, r0 @ the local stack + stmia r12, {r1-r6, lr} @ v7_invalidate_l1 touches r0-r6 bl v7_invalidate_l1 - ldmia r12, {r0-r5, lr} + ldmia r12, {r1-r6, lr} __v7_setup_cont: and r0, r9, #0xff000000 @ ARM? @@ -480,11 +484,16 @@ __errata_finish: orr r0, r0, r6 @ set them THUMB( orr r0, r0, #1 << 30 ) @ Thumb exceptions ret lr @ return to head.S:__ret + + .align 2 +__v7_setup_stack_ptr: + .word __v7_setup_stack - . ENDPROC(__v7_setup) + .bss .align 2 __v7_setup_stack: - .space 4 * 7 @ 12 registers + .space 4 * 7 @ 7 registers __INITDATA diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S index 67d9209077c6..7229d8d0be1a 100644 --- a/arch/arm/mm/proc-v7m.S +++ b/arch/arm/mm/proc-v7m.S @@ -12,6 +12,7 @@ */ #include <linux/linkage.h> #include <asm/assembler.h> +#include <asm/memory.h> #include <asm/v7m.h> #include "proc-macros.S" @@ -97,19 +98,19 @@ __v7m_setup: mov r5, #0x00800000 str r5, [r0, V7M_SCB_SHPR3] @ set PendSV priority - @ SVC to run the kernel in this mode + @ SVC to switch to handler mode. Notice that this requires sp to + @ point to writeable memory because the processor saves + @ some registers to the stack. badr r1, 1f ldr r5, [r12, #11 * 4] @ read the SVC vector entry str r1, [r12, #11 * 4] @ write the temporary SVC vector entry mov r6, lr @ save LR - mov r7, sp @ save SP - ldr sp, =__v7m_setup_stack_top + ldr sp, =init_thread_union + THREAD_START_SP cpsie i svc #0 1: cpsid i str r5, [r12, #11 * 4] @ restore the original SVC vector entry mov lr, r6 @ restore LR - mov sp, r7 @ restore SP @ Special-purpose control register mov r1, #1 @@ -123,11 +124,6 @@ __v7m_setup: ret lr ENDPROC(__v7m_setup) - .align 2 -__v7m_setup_stack: - .space 4 * 8 @ 8 registers -__v7m_setup_stack_top: - define_processor_functions v7m, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1 .section ".rodata" diff --git a/arch/arm/plat-samsung/devs.c b/arch/arm/plat-samsung/devs.c index 82074625de5c..f39938fa9ef6 100644 --- a/arch/arm/plat-samsung/devs.c +++ b/arch/arm/plat-samsung/devs.c @@ -36,7 +36,7 @@ #include <linux/platform_data/s3c-hsotg.h> #include <linux/platform_data/dma-s3c24xx.h> -#include <media/s5p_hdmi.h> +#include <linux/platform_data/media/s5p_hdmi.h> #include <asm/irq.h> #include <asm/mach/arch.h> @@ -65,6 +65,7 @@ #include <linux/platform_data/usb-ohci-s3c2410.h> #include <plat/usb-phy.h> #include <plat/regs-spi.h> +#include <linux/platform_data/asoc-s3c.h> #include <linux/platform_data/spi-s3c64xx.h> static u64 samsung_device_dma_mask = DMA_BIT_MASK(32); @@ -74,9 +75,15 @@ static u64 samsung_device_dma_mask = DMA_BIT_MASK(32); static struct resource s3c_ac97_resource[] = { [0] = DEFINE_RES_MEM(S3C2440_PA_AC97, S3C2440_SZ_AC97), [1] = DEFINE_RES_IRQ(IRQ_S3C244X_AC97), - [2] = DEFINE_RES_DMA_NAMED(DMACH_PCM_OUT, "PCM out"), - [3] = DEFINE_RES_DMA_NAMED(DMACH_PCM_IN, "PCM in"), - [4] = DEFINE_RES_DMA_NAMED(DMACH_MIC_IN, "Mic in"), +}; + +static struct s3c_audio_pdata s3c_ac97_pdata = { +#ifdef CONFIG_S3C24XX_DMAC + .dma_filter = s3c24xx_dma_filter, +#endif + .dma_playback = (void *)DMACH_PCM_OUT, + .dma_capture = (void *)DMACH_PCM_IN, + .dma_capture_mic = (void *)DMACH_MIC_IN, }; struct platform_device s3c_device_ac97 = { @@ -87,6 +94,7 @@ struct platform_device s3c_device_ac97 = { .dev = { .dma_mask = &samsung_device_dma_mask, .coherent_dma_mask = DMA_BIT_MASK(32), + .platform_data = &s3c_ac97_pdata, } }; #endif /* CONFIG_CPU_S3C2440 */ @@ -566,6 +574,14 @@ static struct resource s3c_iis_resource[] = { [0] = DEFINE_RES_MEM(S3C24XX_PA_IIS, S3C24XX_SZ_IIS), }; +static struct s3c_audio_pdata s3c_iis_platdata = { +#ifdef CONFIG_S3C24XX_DMAC + .dma_filter = s3c24xx_dma_filter, +#endif + .dma_playback = (void *)DMACH_I2S_OUT, + .dma_capture = (void *)DMACH_I2S_IN, +}; + struct platform_device s3c_device_iis = { .name = "s3c24xx-iis", .id = -1, @@ -574,6 +590,7 @@ struct platform_device s3c_device_iis = { .dev = { .dma_mask = &samsung_device_dma_mask, .coherent_dma_mask = DMA_BIT_MASK(32), + .platform_data = &s3c_iis_platdata, } }; #endif /* CONFIG_PLAT_S3C24XX */ @@ -1100,9 +1117,7 @@ struct platform_device s3c_device_wdt = { #ifdef CONFIG_S3C64XX_DEV_SPI0 static struct resource s3c64xx_spi0_resource[] = { [0] = DEFINE_RES_MEM(S3C_PA_SPI0, SZ_256), - [1] = DEFINE_RES_DMA(DMACH_SPI0_TX), - [2] = DEFINE_RES_DMA(DMACH_SPI0_RX), - [3] = DEFINE_RES_IRQ(IRQ_SPI0), + [1] = DEFINE_RES_IRQ(IRQ_SPI0), }; struct platform_device s3c64xx_device_spi0 = { @@ -1130,6 +1145,8 @@ void __init s3c64xx_spi0_set_platdata(int (*cfg_gpio)(void), int src_clk_nr, pd.num_cs = num_cs; pd.src_clk_nr = src_clk_nr; pd.cfg_gpio = (cfg_gpio) ? cfg_gpio : s3c64xx_spi0_cfg_gpio; + pd.dma_tx = (void *)DMACH_SPI0_TX; + pd.dma_rx = (void *)DMACH_SPI0_RX; #if defined(CONFIG_PL330_DMA) pd.filter = pl330_filter; #elif defined(CONFIG_S3C64XX_PL080) @@ -1145,9 +1162,7 @@ void __init s3c64xx_spi0_set_platdata(int (*cfg_gpio)(void), int src_clk_nr, #ifdef CONFIG_S3C64XX_DEV_SPI1 static struct resource s3c64xx_spi1_resource[] = { [0] = DEFINE_RES_MEM(S3C_PA_SPI1, SZ_256), - [1] = DEFINE_RES_DMA(DMACH_SPI1_TX), - [2] = DEFINE_RES_DMA(DMACH_SPI1_RX), - [3] = DEFINE_RES_IRQ(IRQ_SPI1), + [1] = DEFINE_RES_IRQ(IRQ_SPI1), }; struct platform_device s3c64xx_device_spi1 = { @@ -1175,12 +1190,15 @@ void __init s3c64xx_spi1_set_platdata(int (*cfg_gpio)(void), int src_clk_nr, pd.num_cs = num_cs; pd.src_clk_nr = src_clk_nr; pd.cfg_gpio = (cfg_gpio) ? cfg_gpio : s3c64xx_spi1_cfg_gpio; + pd.dma_tx = (void *)DMACH_SPI1_TX; + pd.dma_rx = (void *)DMACH_SPI1_RX; #if defined(CONFIG_PL330_DMA) pd.filter = pl330_filter; #elif defined(CONFIG_S3C64XX_PL080) pd.filter = pl08x_filter_id; #endif + s3c_set_platdata(&pd, sizeof(pd), &s3c64xx_device_spi1); } #endif /* CONFIG_S3C64XX_DEV_SPI1 */ @@ -1188,9 +1206,7 @@ void __init s3c64xx_spi1_set_platdata(int (*cfg_gpio)(void), int src_clk_nr, #ifdef CONFIG_S3C64XX_DEV_SPI2 static struct resource s3c64xx_spi2_resource[] = { [0] = DEFINE_RES_MEM(S3C_PA_SPI2, SZ_256), - [1] = DEFINE_RES_DMA(DMACH_SPI2_TX), - [2] = DEFINE_RES_DMA(DMACH_SPI2_RX), - [3] = DEFINE_RES_IRQ(IRQ_SPI2), + [1] = DEFINE_RES_IRQ(IRQ_SPI2), }; struct platform_device s3c64xx_device_spi2 = { @@ -1218,6 +1234,8 @@ void __init s3c64xx_spi2_set_platdata(int (*cfg_gpio)(void), int src_clk_nr, pd.num_cs = num_cs; pd.src_clk_nr = src_clk_nr; pd.cfg_gpio = (cfg_gpio) ? cfg_gpio : s3c64xx_spi2_cfg_gpio; + pd.dma_tx = (void *)DMACH_SPI2_TX; + pd.dma_rx = (void *)DMACH_SPI2_RX; #if defined(CONFIG_PL330_DMA) pd.filter = pl330_filter; #elif defined(CONFIG_S3C64XX_PL080) diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index fc7ea529f462..75cd7345c654 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -12,6 +12,7 @@ #include <xen/page.h> #include <xen/interface/sched.h> #include <xen/xen-ops.h> +#include <asm/paravirt.h> #include <asm/xen/hypervisor.h> #include <asm/xen/hypercall.h> #include <asm/system_misc.h> @@ -25,6 +26,10 @@ #include <linux/cpufreq.h> #include <linux/cpu.h> #include <linux/console.h> +#include <linux/pvclock_gtod.h> +#include <linux/time64.h> +#include <linux/timekeeping.h> +#include <linux/timekeeper_internal.h> #include <linux/mm.h> @@ -79,6 +84,83 @@ int xen_unmap_domain_gfn_range(struct vm_area_struct *vma, } EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range); +static unsigned long long xen_stolen_accounting(int cpu) +{ + struct vcpu_runstate_info state; + + BUG_ON(cpu != smp_processor_id()); + + xen_get_runstate_snapshot(&state); + + WARN_ON(state.state != RUNSTATE_running); + + return state.time[RUNSTATE_runnable] + state.time[RUNSTATE_offline]; +} + +static void xen_read_wallclock(struct timespec64 *ts) +{ + u32 version; + struct timespec64 now, ts_monotonic; + struct shared_info *s = HYPERVISOR_shared_info; + struct pvclock_wall_clock *wall_clock = &(s->wc); + + /* get wallclock at system boot */ + do { + version = wall_clock->version; + rmb(); /* fetch version before time */ + now.tv_sec = ((uint64_t)wall_clock->sec_hi << 32) | wall_clock->sec; + now.tv_nsec = wall_clock->nsec; + rmb(); /* fetch time before checking version */ + } while ((wall_clock->version & 1) || (version != wall_clock->version)); + + /* time since system boot */ + ktime_get_ts64(&ts_monotonic); + *ts = timespec64_add(now, ts_monotonic); +} + +static int xen_pvclock_gtod_notify(struct notifier_block *nb, + unsigned long was_set, void *priv) +{ + /* Protected by the calling core code serialization */ + static struct timespec64 next_sync; + + struct xen_platform_op op; + struct timespec64 now, system_time; + struct timekeeper *tk = priv; + + now.tv_sec = tk->xtime_sec; + now.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift); + system_time = timespec64_add(now, tk->wall_to_monotonic); + + /* + * We only take the expensive HV call when the clock was set + * or when the 11 minutes RTC synchronization time elapsed. + */ + if (!was_set && timespec64_compare(&now, &next_sync) < 0) + return NOTIFY_OK; + + op.cmd = XENPF_settime64; + op.u.settime64.mbz = 0; + op.u.settime64.secs = now.tv_sec; + op.u.settime64.nsecs = now.tv_nsec; + op.u.settime64.system_time = timespec64_to_ns(&system_time); + (void)HYPERVISOR_platform_op(&op); + + /* + * Move the next drift compensation time 11 minutes + * ahead. That's emulating the sync_cmos_clock() update for + * the hardware RTC. + */ + next_sync = now; + next_sync.tv_sec += 11 * 60; + + return NOTIFY_OK; +} + +static struct notifier_block xen_pvclock_gtod_notifier = { + .notifier_call = xen_pvclock_gtod_notify, +}; + static void xen_percpu_init(void) { struct vcpu_register_vcpu_info info; @@ -104,6 +186,8 @@ static void xen_percpu_init(void) BUG_ON(err); per_cpu(xen_vcpu, cpu) = vcpup; + xen_setup_runstate_info(cpu); + after_register_vcpu_info: enable_percpu_irq(xen_events_irq, 0); put_cpu(); @@ -271,6 +355,11 @@ static int __init xen_guest_init(void) register_cpu_notifier(&xen_cpu_notifier); + pv_time_ops.steal_clock = xen_stolen_accounting; + static_key_slow_inc(¶virt_steal_enabled); + if (xen_initial_domain()) + pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier); + return 0; } early_initcall(xen_guest_init); @@ -282,6 +371,11 @@ static int __init xen_pm_init(void) pm_power_off = xen_power_off; arm_pm_restart = xen_restart; + if (!xen_initial_domain()) { + struct timespec64 ts; + xen_read_wallclock(&ts); + do_settimeofday64(&ts); + } return 0; } @@ -307,5 +401,6 @@ EXPORT_SYMBOL_GPL(HYPERVISOR_memory_op); EXPORT_SYMBOL_GPL(HYPERVISOR_physdev_op); EXPORT_SYMBOL_GPL(HYPERVISOR_vcpu_op); EXPORT_SYMBOL_GPL(HYPERVISOR_tmem_op); +EXPORT_SYMBOL_GPL(HYPERVISOR_platform_op); EXPORT_SYMBOL_GPL(HYPERVISOR_multicall); EXPORT_SYMBOL_GPL(privcmd_call); diff --git a/arch/arm/xen/hypercall.S b/arch/arm/xen/hypercall.S index 10fd99c568c6..9a36f4f49c10 100644 --- a/arch/arm/xen/hypercall.S +++ b/arch/arm/xen/hypercall.S @@ -89,6 +89,7 @@ HYPERCALL2(memory_op); HYPERCALL2(physdev_op); HYPERCALL3(vcpu_op); HYPERCALL1(tmem_op); +HYPERCALL1(platform_op_raw); HYPERCALL2(multicall); ENTRY(privcmd_call) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 871f21783866..6be3fa2310ee 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -3,6 +3,7 @@ config ARM64 select ACPI_CCA_REQUIRED if ACPI select ACPI_GENERIC_GSI if ACPI select ACPI_REDUCED_HARDWARE_ONLY if ACPI + select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_GCOV_PROFILE_ALL @@ -51,6 +52,8 @@ config ARM64 select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP && !(ARM64_16K_PAGES && ARM64_VA_BITS_48) select HAVE_ARCH_KGDB + select HAVE_ARCH_MMAP_RND_BITS + select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_BPF_JIT @@ -70,6 +73,7 @@ config ARM64 select HAVE_FUNCTION_GRAPH_TRACER select HAVE_GENERIC_DMA_COHERENT select HAVE_HW_BREAKPOINT if PERF_EVENTS + select HAVE_IRQ_TIME_ACCOUNTING select HAVE_MEMBLOCK select HAVE_PATA_PLATFORM select HAVE_PERF_EVENTS @@ -92,6 +96,7 @@ config ARM64 select SPARSE_IRQ select SYSCTL_EXCEPTION_TRACE select HAVE_CONTEXT_TRACKING + select HAVE_ARM_SMCCC help ARM 64-bit (AArch64) Linux support. @@ -104,6 +109,33 @@ config ARCH_PHYS_ADDR_T_64BIT config MMU def_bool y +config ARCH_MMAP_RND_BITS_MIN + default 14 if ARM64_64K_PAGES + default 16 if ARM64_16K_PAGES + default 18 + +# max bits determined by the following formula: +# VA_BITS - PAGE_SHIFT - 3 +config ARCH_MMAP_RND_BITS_MAX + default 19 if ARM64_VA_BITS=36 + default 24 if ARM64_VA_BITS=39 + default 27 if ARM64_VA_BITS=42 + default 30 if ARM64_VA_BITS=47 + default 29 if ARM64_VA_BITS=48 && ARM64_64K_PAGES + default 31 if ARM64_VA_BITS=48 && ARM64_16K_PAGES + default 33 if ARM64_VA_BITS=48 + default 14 if ARM64_64K_PAGES + default 16 if ARM64_16K_PAGES + default 18 + +config ARCH_MMAP_RND_COMPAT_BITS_MIN + default 7 if ARM64_64K_PAGES + default 9 if ARM64_16K_PAGES + default 11 + +config ARCH_MMAP_RND_COMPAT_BITS_MAX + default 16 + config NO_IOPORT_MAP def_bool y if !PCI @@ -529,9 +561,6 @@ config HW_PERF_EVENTS config SYS_SUPPORTS_HUGETLBFS def_bool y -config ARCH_WANT_GENERAL_HUGETLB - def_bool y - config ARCH_WANT_HUGE_PMD_SHARE def_bool y if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36) @@ -556,6 +585,25 @@ config SECCOMP and the task is only allowed to execute a few safe syscalls defined by each seccomp mode. +config PARAVIRT + bool "Enable paravirtualization code" + help + This changes the kernel so it can modify itself when it is run + under a hypervisor, potentially improving performance significantly + over full virtualization. + +config PARAVIRT_TIME_ACCOUNTING + bool "Paravirtual steal time accounting" + select PARAVIRT + default n + help + Select this option to enable fine granularity task steal time + accounting. Time spent executing other tasks in parallel with + the current vCPU is discounted from the vCPU power. To account for + that, there can be a small performance impact. + + If in doubt, say N here. + config XEN_DOM0 def_bool y depends on XEN @@ -564,6 +612,7 @@ config XEN bool "Xen guest support on ARM64" depends on ARM64 && OF select SWIOTLB_XEN + select PARAVIRT help Say Y if you want to run Linux in a Virtual Machine on Xen on ARM64. diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index 04fb73b973f1..e13c4bf84d9e 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -14,20 +14,6 @@ config ARM64_PTDUMP kernel. If in doubt, say "N" -config STRICT_DEVMEM - bool "Filter access to /dev/mem" - depends on MMU - help - If this option is disabled, you allow userspace (root) access to all - of memory, including kernel and userspace memory. Accidental - access to this is obviously disastrous, but specific access can - be used by people debugging the kernel. - - If this option is switched on, the /dev/mem file only allows - userspace access to memory mapped peripherals. - - If in doubt, say Y. - config PID_IN_CONTEXTIDR bool "Write the current PID to the CONTEXTIDR register" help diff --git a/arch/arm64/boot/dts/hisilicon/hip05.dtsi b/arch/arm64/boot/dts/hisilicon/hip05.dtsi index 4ff16d016e34..c1ea999c7be1 100644 --- a/arch/arm64/boot/dts/hisilicon/hip05.dtsi +++ b/arch/arm64/boot/dts/hisilicon/hip05.dtsi @@ -246,6 +246,11 @@ clock-frequency = <200000000>; }; + peri_c_subctrl: syscon@80000000 { + compatible = "hisilicon,hip05-perisubc", "syscon"; + reg = < 0x0 0x80000000 0x0 0x10000>; + }; + uart0: uart@80300000 { compatible = "snps,dw-apb-uart"; reg = <0x0 0x80300000 0x0 0x10000>; diff --git a/arch/arm64/boot/dts/hisilicon/hip05_hns.dtsi b/arch/arm64/boot/dts/hisilicon/hip05_hns.dtsi index 606dd5a05c2d..da7b6e613257 100644 --- a/arch/arm64/boot/dts/hisilicon/hip05_hns.dtsi +++ b/arch/arm64/boot/dts/hisilicon/hip05_hns.dtsi @@ -10,8 +10,8 @@ soc0: soc@000000000 { #address-cells = <1>; #size-cells = <0>; compatible = "hisilicon,hns-mdio"; - reg = <0x0 0x803c0000 0x0 0x10000 - 0x0 0x80000000 0x0 0x10000>; + reg = <0x0 0x803c0000 0x0 0x10000>; + subctrl-vbase = <&peri_c_subctrl>; soc0_phy0: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts index 811cb760ba49..9b1482a1d007 100644 --- a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts +++ b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts @@ -13,6 +13,7 @@ */ /dts-v1/; +#include <dt-bindings/gpio/gpio.h> #include "mt8173.dtsi" / { @@ -32,6 +33,15 @@ }; chosen { }; + + usb_p1_vbus: regulator@0 { + compatible = "regulator-fixed"; + regulator-name = "usb_vbus"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + gpio = <&pio 130 GPIO_ACTIVE_HIGH>; + enable-active-high; + }; }; &i2c1 { @@ -408,3 +418,9 @@ &uart0 { status = "okay"; }; + +&usb30 { + vusb33-supply = <&mt6397_vusb_reg>; + vbus-supply = <&usb_p1_vbus>; + mediatek,wakeup-src = <1>; +}; diff --git a/arch/arm64/boot/dts/mediatek/mt8173.dtsi b/arch/arm64/boot/dts/mediatek/mt8173.dtsi index 4dd5f93d0303..c1fd2757f8e4 100644 --- a/arch/arm64/boot/dts/mediatek/mt8173.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8173.dtsi @@ -14,6 +14,7 @@ #include <dt-bindings/clock/mt8173-clk.h> #include <dt-bindings/interrupt-controller/irq.h> #include <dt-bindings/interrupt-controller/arm-gic.h> +#include <dt-bindings/phy/phy.h> #include <dt-bindings/power/mt8173-power.h> #include <dt-bindings/reset-controller/mt8173-resets.h> #include "mt8173-pinfunc.h" @@ -510,6 +511,47 @@ status = "disabled"; }; + usb30: usb@11270000 { + compatible = "mediatek,mt8173-xhci"; + reg = <0 0x11270000 0 0x1000>, + <0 0x11280700 0 0x0100>; + interrupts = <GIC_SPI 115 IRQ_TYPE_LEVEL_LOW>; + power-domains = <&scpsys MT8173_POWER_DOMAIN_USB>; + clocks = <&topckgen CLK_TOP_USB30_SEL>, + <&pericfg CLK_PERI_USB0>, + <&pericfg CLK_PERI_USB1>; + clock-names = "sys_ck", + "wakeup_deb_p0", + "wakeup_deb_p1"; + phys = <&phy_port0 PHY_TYPE_USB3>, + <&phy_port1 PHY_TYPE_USB2>; + mediatek,syscon-wakeup = <&pericfg>; + status = "okay"; + }; + + u3phy: usb-phy@11290000 { + compatible = "mediatek,mt8173-u3phy"; + reg = <0 0x11290000 0 0x800>; + clocks = <&apmixedsys CLK_APMIXED_REF2USB_TX>; + clock-names = "u3phya_ref"; + #address-cells = <2>; + #size-cells = <2>; + ranges; + status = "okay"; + + phy_port0: port@11290800 { + reg = <0 0x11290800 0 0x800>; + #phy-cells = <1>; + status = "okay"; + }; + + phy_port1: port@11291000 { + reg = <0 0x11291000 0 0x800>; + #phy-cells = <1>; + status = "okay"; + }; + }; + mmsys: clock-controller@14000000 { compatible = "mediatek,mt8173-mmsys", "syscon"; reg = <0 0x14000000 0 0x1000>; diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index d56ec0715157..e4962f04201e 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -19,7 +19,6 @@ struct alt_instr { void __init apply_alternatives_all(void); void apply_alternatives(void *start, size_t length); -void free_alternatives_memory(void); #define ALTINSTR_ENTRY(feature) \ " .word 661b - .\n" /* label */ \ diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 12eff928ef8b..bb7b72734c24 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -193,6 +193,17 @@ lr .req x30 // link register str \src, [\tmp, :lo12:\sym] .endm + /* + * @sym: The name of the per-cpu variable + * @reg: Result of per_cpu(sym, smp_processor_id()) + * @tmp: scratch register + */ + .macro this_cpu_ptr, sym, reg, tmp + adr_l \reg, \sym + mrs \tmp, tpidr_el1 + add \reg, \reg, \tmp + .endm + /* * Annotate a function as position independent, i.e., safe to be called before * the kernel virtual mapping is activated. diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 54efedaf331f..7fc294c3bc5b 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -68,6 +68,7 @@ extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); extern void flush_icache_range(unsigned long start, unsigned long end); extern void __flush_dcache_area(void *addr, size_t len); +extern void __clean_dcache_area_pou(void *addr, size_t len); extern long __flush_cache_user_range(unsigned long start, unsigned long end); static inline void flush_cache_mm(struct mm_struct *mm) diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index 9ea611ea69df..510c7b404454 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -19,7 +19,6 @@ #define __ASM_CMPXCHG_H #include <linux/bug.h> -#include <linux/mmdebug.h> #include <asm/atomic.h> #include <asm/barrier.h> diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index ef572206f1c3..8e88a696c9cb 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -2,7 +2,9 @@ #define _ASM_EFI_H #include <asm/io.h> +#include <asm/mmu_context.h> #include <asm/neon.h> +#include <asm/tlbflush.h> #ifdef CONFIG_EFI extern void efi_init(void); @@ -10,6 +12,8 @@ extern void efi_init(void); #define efi_init() #endif +int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); + #define efi_call_virt(f, ...) \ ({ \ efi_##f##_t *__f; \ @@ -63,6 +67,11 @@ extern void efi_init(void); * Services are enabled and the EFI_RUNTIME_SERVICES bit set. */ +static inline void efi_set_pgd(struct mm_struct *mm) +{ + switch_mm(NULL, mm, NULL); +} + void efi_virtmap_load(void); void efi_virtmap_unload(void); diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h index c5534facf941..3c60f37e48ab 100644 --- a/arch/arm64/include/asm/ftrace.h +++ b/arch/arm64/include/asm/ftrace.h @@ -28,6 +28,8 @@ struct dyn_arch_ftrace { extern unsigned long ftrace_graph_call; +extern void return_to_handler(void); + static inline unsigned long ftrace_call_adjust(unsigned long addr) { /* diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h index bb4052e85dba..bbc1e35aa601 100644 --- a/arch/arm64/include/asm/hugetlb.h +++ b/arch/arm64/include/asm/hugetlb.h @@ -26,36 +26,7 @@ static inline pte_t huge_ptep_get(pte_t *ptep) return *ptep; } -static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte) -{ - set_pte_at(mm, addr, ptep, pte); -} - -static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep) -{ - ptep_clear_flush(vma, addr, ptep); -} - -static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) -{ - ptep_set_wrprotect(mm, addr, ptep); -} -static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) -{ - return ptep_get_and_clear(mm, addr, ptep); -} - -static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep, - pte_t pte, int dirty) -{ - return ptep_set_access_flags(vma, addr, ptep, pte, dirty); -} static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, @@ -97,4 +68,19 @@ static inline void arch_clear_hugepage_flags(struct page *page) clear_bit(PG_dcache_clean, &page->flags); } +extern pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, + struct page *page, int writable); +#define arch_make_huge_pte arch_make_huge_pte +extern void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte); +extern int huge_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t pte, int dirty); +extern pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep); +extern void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep); +extern void huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep); + #endif /* __ASM_HUGETLB_H */ diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h index 8e8d30684392..b77197d941fc 100644 --- a/arch/arm64/include/asm/irq.h +++ b/arch/arm64/include/asm/irq.h @@ -1,10 +1,45 @@ #ifndef __ASM_IRQ_H #define __ASM_IRQ_H +#define IRQ_STACK_SIZE THREAD_SIZE +#define IRQ_STACK_START_SP THREAD_START_SP + +#ifndef __ASSEMBLER__ + +#include <linux/percpu.h> + #include <asm-generic/irq.h> +#include <asm/thread_info.h> struct pt_regs; +DECLARE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack); + +/* + * The highest address on the stack, and the first to be used. Used to + * find the dummy-stack frame put down by el?_irq() in entry.S, which + * is structured as follows: + * + * ------------ + * | | <- irq_stack_ptr + * top ------------ + * | x19 | <- irq_stack_ptr - 0x08 + * ------------ + * | x29 | <- irq_stack_ptr - 0x10 + * ------------ + * + * where x19 holds a copy of the task stack pointer where the struct pt_regs + * from kernel_entry can be found. + * + */ +#define IRQ_STACK_PTR(cpu) ((unsigned long)per_cpu(irq_stack, cpu) + IRQ_STACK_START_SP) + +/* + * The offset from irq_stack_ptr where entry.S will store the original + * stack pointer. Used by unwind_frame() and dump_backtrace(). + */ +#define IRQ_STACK_TO_TASK_STACK(ptr) (*((unsigned long *)((ptr) - 0x08))) + extern void set_handle_irq(void (*handle_irq)(struct pt_regs *)); static inline int nr_legacy_irqs(void) @@ -12,4 +47,14 @@ static inline int nr_legacy_irqs(void) return 0; } +static inline bool on_irq_stack(unsigned long sp, int cpu) +{ + /* variable names the same as kernel/stacktrace.c */ + unsigned long low = (unsigned long)per_cpu(irq_stack, cpu); + unsigned long high = low + IRQ_STACK_START_SP; + + return (low <= sp && sp <= high); +} + +#endif /* !__ASSEMBLER__ */ #endif diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 5e6857b6bdc4..738a95f93e49 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -125,6 +125,7 @@ #define VTCR_EL2_SL0_LVL1 (1 << 6) #define VTCR_EL2_T0SZ_MASK 0x3f #define VTCR_EL2_T0SZ_40B 24 +#define VTCR_EL2_VS 19 /* * We configure the Stage-2 page tables to always restrict the IPA space to be @@ -169,7 +170,7 @@ #define VTTBR_BADDR_SHIFT (VTTBR_X - 1) #define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) #define VTTBR_VMID_SHIFT (UL(48)) -#define VTTBR_VMID_MASK (UL(0xFF) << VTTBR_VMID_SHIFT) +#define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT) /* Hyp System Trap Register */ #define HSTR_EL2_T(x) (1 << x) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 5e377101f919..52b777b7d407 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -20,82 +20,6 @@ #include <asm/virt.h> -/* - * 0 is reserved as an invalid value. - * Order *must* be kept in sync with the hyp switch code. - */ -#define MPIDR_EL1 1 /* MultiProcessor Affinity Register */ -#define CSSELR_EL1 2 /* Cache Size Selection Register */ -#define SCTLR_EL1 3 /* System Control Register */ -#define ACTLR_EL1 4 /* Auxiliary Control Register */ -#define CPACR_EL1 5 /* Coprocessor Access Control */ -#define TTBR0_EL1 6 /* Translation Table Base Register 0 */ -#define TTBR1_EL1 7 /* Translation Table Base Register 1 */ -#define TCR_EL1 8 /* Translation Control Register */ -#define ESR_EL1 9 /* Exception Syndrome Register */ -#define AFSR0_EL1 10 /* Auxilary Fault Status Register 0 */ -#define AFSR1_EL1 11 /* Auxilary Fault Status Register 1 */ -#define FAR_EL1 12 /* Fault Address Register */ -#define MAIR_EL1 13 /* Memory Attribute Indirection Register */ -#define VBAR_EL1 14 /* Vector Base Address Register */ -#define CONTEXTIDR_EL1 15 /* Context ID Register */ -#define TPIDR_EL0 16 /* Thread ID, User R/W */ -#define TPIDRRO_EL0 17 /* Thread ID, User R/O */ -#define TPIDR_EL1 18 /* Thread ID, Privileged */ -#define AMAIR_EL1 19 /* Aux Memory Attribute Indirection Register */ -#define CNTKCTL_EL1 20 /* Timer Control Register (EL1) */ -#define PAR_EL1 21 /* Physical Address Register */ -#define MDSCR_EL1 22 /* Monitor Debug System Control Register */ -#define MDCCINT_EL1 23 /* Monitor Debug Comms Channel Interrupt Enable Reg */ - -/* 32bit specific registers. Keep them at the end of the range */ -#define DACR32_EL2 24 /* Domain Access Control Register */ -#define IFSR32_EL2 25 /* Instruction Fault Status Register */ -#define FPEXC32_EL2 26 /* Floating-Point Exception Control Register */ -#define DBGVCR32_EL2 27 /* Debug Vector Catch Register */ -#define NR_SYS_REGS 28 - -/* 32bit mapping */ -#define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */ -#define c0_CSSELR (CSSELR_EL1 * 2)/* Cache Size Selection Register */ -#define c1_SCTLR (SCTLR_EL1 * 2) /* System Control Register */ -#define c1_ACTLR (ACTLR_EL1 * 2) /* Auxiliary Control Register */ -#define c1_CPACR (CPACR_EL1 * 2) /* Coprocessor Access Control */ -#define c2_TTBR0 (TTBR0_EL1 * 2) /* Translation Table Base Register 0 */ -#define c2_TTBR0_high (c2_TTBR0 + 1) /* TTBR0 top 32 bits */ -#define c2_TTBR1 (TTBR1_EL1 * 2) /* Translation Table Base Register 1 */ -#define c2_TTBR1_high (c2_TTBR1 + 1) /* TTBR1 top 32 bits */ -#define c2_TTBCR (TCR_EL1 * 2) /* Translation Table Base Control R. */ -#define c3_DACR (DACR32_EL2 * 2)/* Domain Access Control Register */ -#define c5_DFSR (ESR_EL1 * 2) /* Data Fault Status Register */ -#define c5_IFSR (IFSR32_EL2 * 2)/* Instruction Fault Status Register */ -#define c5_ADFSR (AFSR0_EL1 * 2) /* Auxiliary Data Fault Status R */ -#define c5_AIFSR (AFSR1_EL1 * 2) /* Auxiliary Instr Fault Status R */ -#define c6_DFAR (FAR_EL1 * 2) /* Data Fault Address Register */ -#define c6_IFAR (c6_DFAR + 1) /* Instruction Fault Address Register */ -#define c7_PAR (PAR_EL1 * 2) /* Physical Address Register */ -#define c7_PAR_high (c7_PAR + 1) /* PAR top 32 bits */ -#define c10_PRRR (MAIR_EL1 * 2) /* Primary Region Remap Register */ -#define c10_NMRR (c10_PRRR + 1) /* Normal Memory Remap Register */ -#define c12_VBAR (VBAR_EL1 * 2) /* Vector Base Address Register */ -#define c13_CID (CONTEXTIDR_EL1 * 2) /* Context ID Register */ -#define c13_TID_URW (TPIDR_EL0 * 2) /* Thread ID, User R/W */ -#define c13_TID_URO (TPIDRRO_EL0 * 2)/* Thread ID, User R/O */ -#define c13_TID_PRIV (TPIDR_EL1 * 2) /* Thread ID, Privileged */ -#define c10_AMAIR0 (AMAIR_EL1 * 2) /* Aux Memory Attr Indirection Reg */ -#define c10_AMAIR1 (c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */ -#define c14_CNTKCTL (CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */ - -#define cp14_DBGDSCRext (MDSCR_EL1 * 2) -#define cp14_DBGBCR0 (DBGBCR0_EL1 * 2) -#define cp14_DBGBVR0 (DBGBVR0_EL1 * 2) -#define cp14_DBGBXVR0 (cp14_DBGBVR0 + 1) -#define cp14_DBGWCR0 (DBGWCR0_EL1 * 2) -#define cp14_DBGWVR0 (DBGWVR0_EL1 * 2) -#define cp14_DBGDCCINT (MDCCINT_EL1 * 2) - -#define NR_COPRO_REGS (NR_SYS_REGS * 2) - #define ARM_EXCEPTION_IRQ 0 #define ARM_EXCEPTION_TRAP 1 diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 25a40213bd9b..3066328cd86b 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -26,7 +26,6 @@ #include <asm/esr.h> #include <asm/kvm_arm.h> -#include <asm/kvm_asm.h> #include <asm/kvm_mmio.h> #include <asm/ptrace.h> #include <asm/cputype.h> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index a35ce7266aac..689d4c95e12f 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -25,7 +25,6 @@ #include <linux/types.h> #include <linux/kvm_types.h> #include <asm/kvm.h> -#include <asm/kvm_asm.h> #include <asm/kvm_mmio.h> #define __KVM_HAVE_ARCH_INTC_INITIALIZED @@ -85,6 +84,86 @@ struct kvm_vcpu_fault_info { u64 hpfar_el2; /* Hyp IPA Fault Address Register */ }; +/* + * 0 is reserved as an invalid value. + * Order should be kept in sync with the save/restore code. + */ +enum vcpu_sysreg { + __INVALID_SYSREG__, + MPIDR_EL1, /* MultiProcessor Affinity Register */ + CSSELR_EL1, /* Cache Size Selection Register */ + SCTLR_EL1, /* System Control Register */ + ACTLR_EL1, /* Auxiliary Control Register */ + CPACR_EL1, /* Coprocessor Access Control */ + TTBR0_EL1, /* Translation Table Base Register 0 */ + TTBR1_EL1, /* Translation Table Base Register 1 */ + TCR_EL1, /* Translation Control Register */ + ESR_EL1, /* Exception Syndrome Register */ + AFSR0_EL1, /* Auxilary Fault Status Register 0 */ + AFSR1_EL1, /* Auxilary Fault Status Register 1 */ + FAR_EL1, /* Fault Address Register */ + MAIR_EL1, /* Memory Attribute Indirection Register */ + VBAR_EL1, /* Vector Base Address Register */ + CONTEXTIDR_EL1, /* Context ID Register */ + TPIDR_EL0, /* Thread ID, User R/W */ + TPIDRRO_EL0, /* Thread ID, User R/O */ + TPIDR_EL1, /* Thread ID, Privileged */ + AMAIR_EL1, /* Aux Memory Attribute Indirection Register */ + CNTKCTL_EL1, /* Timer Control Register (EL1) */ + PAR_EL1, /* Physical Address Register */ + MDSCR_EL1, /* Monitor Debug System Control Register */ + MDCCINT_EL1, /* Monitor Debug Comms Channel Interrupt Enable Reg */ + + /* 32bit specific registers. Keep them at the end of the range */ + DACR32_EL2, /* Domain Access Control Register */ + IFSR32_EL2, /* Instruction Fault Status Register */ + FPEXC32_EL2, /* Floating-Point Exception Control Register */ + DBGVCR32_EL2, /* Debug Vector Catch Register */ + + NR_SYS_REGS /* Nothing after this line! */ +}; + +/* 32bit mapping */ +#define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */ +#define c0_CSSELR (CSSELR_EL1 * 2)/* Cache Size Selection Register */ +#define c1_SCTLR (SCTLR_EL1 * 2) /* System Control Register */ +#define c1_ACTLR (ACTLR_EL1 * 2) /* Auxiliary Control Register */ +#define c1_CPACR (CPACR_EL1 * 2) /* Coprocessor Access Control */ +#define c2_TTBR0 (TTBR0_EL1 * 2) /* Translation Table Base Register 0 */ +#define c2_TTBR0_high (c2_TTBR0 + 1) /* TTBR0 top 32 bits */ +#define c2_TTBR1 (TTBR1_EL1 * 2) /* Translation Table Base Register 1 */ +#define c2_TTBR1_high (c2_TTBR1 + 1) /* TTBR1 top 32 bits */ +#define c2_TTBCR (TCR_EL1 * 2) /* Translation Table Base Control R. */ +#define c3_DACR (DACR32_EL2 * 2)/* Domain Access Control Register */ +#define c5_DFSR (ESR_EL1 * 2) /* Data Fault Status Register */ +#define c5_IFSR (IFSR32_EL2 * 2)/* Instruction Fault Status Register */ +#define c5_ADFSR (AFSR0_EL1 * 2) /* Auxiliary Data Fault Status R */ +#define c5_AIFSR (AFSR1_EL1 * 2) /* Auxiliary Instr Fault Status R */ +#define c6_DFAR (FAR_EL1 * 2) /* Data Fault Address Register */ +#define c6_IFAR (c6_DFAR + 1) /* Instruction Fault Address Register */ +#define c7_PAR (PAR_EL1 * 2) /* Physical Address Register */ +#define c7_PAR_high (c7_PAR + 1) /* PAR top 32 bits */ +#define c10_PRRR (MAIR_EL1 * 2) /* Primary Region Remap Register */ +#define c10_NMRR (c10_PRRR + 1) /* Normal Memory Remap Register */ +#define c12_VBAR (VBAR_EL1 * 2) /* Vector Base Address Register */ +#define c13_CID (CONTEXTIDR_EL1 * 2) /* Context ID Register */ +#define c13_TID_URW (TPIDR_EL0 * 2) /* Thread ID, User R/W */ +#define c13_TID_URO (TPIDRRO_EL0 * 2)/* Thread ID, User R/O */ +#define c13_TID_PRIV (TPIDR_EL1 * 2) /* Thread ID, Privileged */ +#define c10_AMAIR0 (AMAIR_EL1 * 2) /* Aux Memory Attr Indirection Reg */ +#define c10_AMAIR1 (c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */ +#define c14_CNTKCTL (CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */ + +#define cp14_DBGDSCRext (MDSCR_EL1 * 2) +#define cp14_DBGBCR0 (DBGBCR0_EL1 * 2) +#define cp14_DBGBVR0 (DBGBVR0_EL1 * 2) +#define cp14_DBGBXVR0 (cp14_DBGBVR0 + 1) +#define cp14_DBGWCR0 (DBGWCR0_EL1 * 2) +#define cp14_DBGWVR0 (DBGWVR0_EL1 * 2) +#define cp14_DBGDCCINT (MDCCINT_EL1 * 2) + +#define NR_COPRO_REGS (NR_SYS_REGS * 2) + struct kvm_cpu_context { struct kvm_regs gp_regs; union { @@ -197,6 +276,12 @@ struct kvm_vcpu_stat { u32 halt_successful_poll; u32 halt_attempted_poll; u32 halt_wakeup; + u32 hvc_exit_stat; + u64 wfe_exit_stat; + u64 wfi_exit_stat; + u64 mmio_exit_user; + u64 mmio_exit_kernel; + u64 exits; }; int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); diff --git a/arch/arm64/include/asm/kvm_mmio.h b/arch/arm64/include/asm/kvm_mmio.h index 889c908ee631..fe612a962576 100644 --- a/arch/arm64/include/asm/kvm_mmio.h +++ b/arch/arm64/include/asm/kvm_mmio.h @@ -19,7 +19,6 @@ #define __ARM64_KVM_MMIO_H__ #include <linux/kvm_host.h> -#include <asm/kvm_asm.h> #include <asm/kvm_arm.h> /* diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 61505676d085..736433912a1e 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -20,6 +20,7 @@ #include <asm/page.h> #include <asm/memory.h> +#include <asm/cpufeature.h> /* * As we only have the TTBR0_EL2 register, we cannot express @@ -158,7 +159,6 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd) #define PTRS_PER_S2_PGD_SHIFT (KVM_PHYS_SHIFT - PGDIR_SHIFT) #endif #define PTRS_PER_S2_PGD (1 << PTRS_PER_S2_PGD_SHIFT) -#define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t)) #define kvm_pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1)) @@ -230,7 +230,8 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) return (vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101; } -static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn, +static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, + kvm_pfn_t pfn, unsigned long size, bool ipa_uncached) { @@ -302,5 +303,12 @@ static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd, merged_hyp_pgd[idmap_idx] = __pgd(__pa(boot_hyp_pgd) | PMD_TYPE_TABLE); } +static inline unsigned int kvm_get_vmid_bits(void) +{ + int reg = read_system_reg(SYS_ID_AA64MMFR1_EL1); + + return (cpuid_feature_extract_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8; +} + #endif /* __ASSEMBLY__ */ #endif /* __ARM64_KVM_MMU_H__ */ diff --git a/arch/arm64/include/asm/paravirt.h b/arch/arm64/include/asm/paravirt.h new file mode 100644 index 000000000000..fd5f42886251 --- /dev/null +++ b/arch/arm64/include/asm/paravirt.h @@ -0,0 +1,20 @@ +#ifndef _ASM_ARM64_PARAVIRT_H +#define _ASM_ARM64_PARAVIRT_H + +#ifdef CONFIG_PARAVIRT +struct static_key; +extern struct static_key paravirt_steal_enabled; +extern struct static_key paravirt_steal_rq_enabled; + +struct pv_time_ops { + unsigned long long (*steal_clock)(int cpu); +}; +extern struct pv_time_ops pv_time_ops; + +static inline u64 paravirt_steal_clock(int cpu) +{ + return pv_time_ops.steal_clock(cpu); +} +#endif + +#endif diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index d6739e836f7b..5c25b831273d 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -90,7 +90,23 @@ /* * Contiguous page definitions. */ -#define CONT_PTES (_AC(1, UL) << CONT_SHIFT) +#ifdef CONFIG_ARM64_64K_PAGES +#define CONT_PTE_SHIFT 5 +#define CONT_PMD_SHIFT 5 +#elif defined(CONFIG_ARM64_16K_PAGES) +#define CONT_PTE_SHIFT 7 +#define CONT_PMD_SHIFT 5 +#else +#define CONT_PTE_SHIFT 4 +#define CONT_PMD_SHIFT 4 +#endif + +#define CONT_PTES (1 << CONT_PTE_SHIFT) +#define CONT_PTE_SIZE (CONT_PTES * PAGE_SIZE) +#define CONT_PTE_MASK (~(CONT_PTE_SIZE - 1)) +#define CONT_PMDS (1 << CONT_PMD_SHIFT) +#define CONT_PMD_SIZE (CONT_PMDS * PMD_SIZE) +#define CONT_PMD_MASK (~(CONT_PMD_SIZE - 1)) /* the the numerical offset of the PTE within a range of CONT_PTES */ #define CONT_RANGE_OFFSET(addr) (((addr)>>PAGE_SHIFT)&(CONT_PTES-1)) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 63f52b55defe..2d545d7aa80b 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -167,6 +167,16 @@ extern struct page *empty_zero_page; ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) #define pte_valid_not_user(pte) \ ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID) +#define pte_valid_young(pte) \ + ((pte_val(pte) & (PTE_VALID | PTE_AF)) == (PTE_VALID | PTE_AF)) + +/* + * Could the pte be present in the TLB? We must check mm_tlb_flush_pending + * so that we don't erroneously return false for pages that have been + * remapped as PROT_NONE but are yet to be flushed from the TLB. + */ +#define pte_accessible(mm, pte) \ + (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid_young(pte)) static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot) { @@ -217,7 +227,8 @@ static inline pte_t pte_mkspecial(pte_t pte) static inline pte_t pte_mkcont(pte_t pte) { - return set_pte_bit(pte, __pgprot(PTE_CONT)); + pte = set_pte_bit(pte, __pgprot(PTE_CONT)); + return set_pte_bit(pte, __pgprot(PTE_TYPE_PAGE)); } static inline pte_t pte_mknoncont(pte_t pte) @@ -225,6 +236,11 @@ static inline pte_t pte_mknoncont(pte_t pte) return clear_pte_bit(pte, __pgprot(PTE_CONT)); } +static inline pmd_t pmd_mkcont(pmd_t pmd) +{ + return __pmd(pmd_val(pmd) | PMD_SECT_CONT); +} + static inline void set_pte(pte_t *ptep, pte_t pte) { *ptep = pte; @@ -298,7 +314,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, /* * Hugetlb definitions. */ -#define HUGE_MAX_HSTATE 2 +#define HUGE_MAX_HSTATE 4 #define HPAGE_SHIFT PMD_SHIFT #define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT) #define HPAGE_MASK (~(HPAGE_SIZE - 1)) @@ -337,21 +353,14 @@ static inline pgprot_t mk_sect_prot(pgprot_t prot) #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define pmd_trans_huge(pmd) (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT)) -#define pmd_trans_splitting(pmd) pte_special(pmd_pte(pmd)) -#ifdef CONFIG_HAVE_RCU_TABLE_FREE -#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH -struct vm_area_struct; -void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, - pmd_t *pmdp); -#endif /* CONFIG_HAVE_RCU_TABLE_FREE */ #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd)) #define pmd_young(pmd) pte_young(pmd_pte(pmd)) #define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd))) -#define pmd_mksplitting(pmd) pte_pmd(pte_mkspecial(pmd_pte(pmd))) #define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd))) #define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd))) +#define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd))) #define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) #define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) #define pmd_mknotpresent(pmd) (__pmd(pmd_val(pmd) & ~PMD_TYPE_MASK)) @@ -664,7 +673,8 @@ extern int kern_addr_valid(unsigned long addr); #include <asm-generic/pgtable.h> -#define pgtable_cache_init() do { } while (0) +void pgd_cache_init(void); +#define pgtable_cache_init pgd_cache_init /* * On AArch64, the cache coherency is handled via the set_pte_at() function. diff --git a/arch/arm64/include/asm/shmparam.h b/arch/arm64/include/asm/shmparam.h index 4df608a8459e..e368a55ebd22 100644 --- a/arch/arm64/include/asm/shmparam.h +++ b/arch/arm64/include/asm/shmparam.h @@ -21,7 +21,7 @@ * alignment value. Since we don't have aliasing D-caches, the rest of * the time we can safely use PAGE_SIZE. */ -#define COMPAT_SHMLBA 0x4000 +#define COMPAT_SHMLBA (4 * PAGE_SIZE) #include <asm-generic/shmparam.h> diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index c85e96d174a5..fc9682bfe002 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -26,9 +26,28 @@ * The memory barriers are implicit with the load-acquire and store-release * instructions. */ +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + unsigned int tmp; + arch_spinlock_t lockval; -#define arch_spin_unlock_wait(lock) \ - do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) + asm volatile( +" sevl\n" +"1: wfe\n" +"2: ldaxr %w0, %2\n" +" eor %w1, %w0, %w0, ror #16\n" +" cbnz %w1, 1b\n" + ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ +" stxr %w1, %w0, %2\n" +" cbnz %w1, 2b\n", /* Serialise against any concurrent lockers */ + /* LSE atomics */ +" nop\n" +" nop\n") + : "=&r" (lockval), "=&r" (tmp), "+Q" (*lock) + : + : "memory"); +} #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index 7318f6d54aa9..801a16dbbdf6 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -16,14 +16,19 @@ #ifndef __ASM_STACKTRACE_H #define __ASM_STACKTRACE_H +struct task_struct; + struct stackframe { unsigned long fp; unsigned long sp; unsigned long pc; +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + unsigned int graph; +#endif }; -extern int unwind_frame(struct stackframe *frame); -extern void walk_stackframe(struct stackframe *frame, +extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame); +extern void walk_stackframe(struct task_struct *tsk, struct stackframe *frame, int (*fn)(struct stackframe *, void *), void *data); #endif /* __ASM_STACKTRACE_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index d48ab5b41f52..4aeebec3d882 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -20,6 +20,8 @@ #ifndef __ASM_SYSREG_H #define __ASM_SYSREG_H +#include <linux/stringify.h> + #include <asm/opcodes.h> /* @@ -208,6 +210,8 @@ #else +#include <linux/types.h> + asm( " .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" " .equ __reg_num_x\\num, \\num\n" @@ -232,6 +236,23 @@ static inline void config_sctlr_el1(u32 clear, u32 set) val |= set; asm volatile("msr sctlr_el1, %0" : : "r" (val)); } + +/* + * Unlike read_cpuid, calls to read_sysreg are never expected to be + * optimized away or replaced with synthetic values. + */ +#define read_sysreg(r) ({ \ + u64 __val; \ + asm volatile("mrs %0, " __stringify(r) : "=r" (__val)); \ + __val; \ +}) + +#define write_sysreg(v, r) do { \ + u64 __val = (u64)v; \ + asm volatile("msr " __stringify(r) ", %0" \ + : : "r" (__val)); \ +} while (0) + #endif #endif /* __ASM_SYSREG_H */ diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 90c7ff233735..abd64bd1f6d9 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -73,10 +73,16 @@ register unsigned long current_stack_pointer asm ("sp"); */ static inline struct thread_info *current_thread_info(void) __attribute_const__; +/* + * struct thread_info can be accessed directly via sp_el0. + */ static inline struct thread_info *current_thread_info(void) { - return (struct thread_info *) - (current_stack_pointer & ~(THREAD_SIZE - 1)); + unsigned long sp_el0; + + asm ("mrs %0, sp_el0" : "=r" (sp_el0)); + + return (struct thread_info *)sp_el0; } #define thread_saved_pc(tsk) \ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 474691f8b13a..83cd7e68e83b 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -14,10 +14,10 @@ CFLAGS_REMOVE_return_address.o = -pg arm64-obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ entry-fpsimd.o process.o ptrace.o setup.o signal.o \ sys.o stacktrace.o time.o traps.o io.o vdso.o \ - hyp-stub.o psci.o psci-call.o cpu_ops.o insn.o \ + hyp-stub.o psci.o cpu_ops.o insn.o \ return_address.o cpuinfo.o cpu_errata.o \ cpufeature.o alternative.o cacheinfo.o \ - smp.o smp_spin_table.o topology.o + smp.o smp_spin_table.o topology.o smccc-call.o extra-$(CONFIG_EFI) := efi-entry.o @@ -41,6 +41,7 @@ arm64-obj-$(CONFIG_EFI) += efi.o efi-entry.stub.o arm64-obj-$(CONFIG_PCI) += pci.o arm64-obj-$(CONFIG_ARMV8_DEPRECATED) += armv8_deprecated.o arm64-obj-$(CONFIG_ACPI) += acpi.o +arm64-obj-$(CONFIG_PARAVIRT) += paravirt.o obj-y += $(arm64-obj-y) vdso/ obj-m += $(arm64-obj-m) diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index ab9db0e9818c..d2ee1b21a10d 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -158,9 +158,3 @@ void apply_alternatives(void *start, size_t length) __apply_alternatives(®ion); } - -void free_alternatives_memory(void) -{ - free_reserved_area(__alt_instructions, __alt_instructions_end, - 0, "alternatives"); -} diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c index 3b6d8cc9dfe0..678f30b05a45 100644 --- a/arch/arm64/kernel/arm64ksyms.c +++ b/arch/arm64/kernel/arm64ksyms.c @@ -26,6 +26,7 @@ #include <linux/syscalls.h> #include <linux/uaccess.h> #include <linux/io.h> +#include <linux/arm-smccc.h> #include <asm/checksum.h> @@ -68,3 +69,7 @@ EXPORT_SYMBOL(test_and_change_bit); #ifdef CONFIG_FUNCTION_TRACER EXPORT_SYMBOL(_mcount); #endif + + /* arm-smccc */ +EXPORT_SYMBOL(arm_smccc_smc); +EXPORT_SYMBOL(arm_smccc_hvc); diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 937f5e58a4d3..3e01207917b1 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -62,7 +62,7 @@ struct insn_emulation { }; static LIST_HEAD(insn_emulation); -static int nr_insn_emulated; +static int nr_insn_emulated __initdata; static DEFINE_RAW_SPINLOCK(insn_emulation_lock); static void register_emulation_hooks(struct insn_emulation_ops *ops) @@ -173,7 +173,7 @@ static int update_insn_emulation_mode(struct insn_emulation *insn, return ret; } -static void register_insn_emulation(struct insn_emulation_ops *ops) +static void __init register_insn_emulation(struct insn_emulation_ops *ops) { unsigned long flags; struct insn_emulation *insn; @@ -237,7 +237,7 @@ static struct ctl_table ctl_abi[] = { { } }; -static void register_insn_emulation_sysctl(struct ctl_table *table) +static void __init register_insn_emulation_sysctl(struct ctl_table *table) { unsigned long flags; int i = 0; diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 25de8b244961..fffa4ac6c25a 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -28,6 +28,7 @@ #include <asm/suspend.h> #include <asm/vdso_datapage.h> #include <linux/kbuild.h> +#include <linux/arm-smccc.h> int main(void) { @@ -108,49 +109,11 @@ int main(void) DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs)); DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs)); DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs)); - DEFINE(CPU_SP_EL1, offsetof(struct kvm_regs, sp_el1)); - DEFINE(CPU_ELR_EL1, offsetof(struct kvm_regs, elr_el1)); - DEFINE(CPU_SPSR, offsetof(struct kvm_regs, spsr)); - DEFINE(CPU_SYSREGS, offsetof(struct kvm_cpu_context, sys_regs)); + DEFINE(VCPU_FPEXC32_EL2, offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2])); DEFINE(VCPU_ESR_EL2, offsetof(struct kvm_vcpu, arch.fault.esr_el2)); DEFINE(VCPU_FAR_EL2, offsetof(struct kvm_vcpu, arch.fault.far_el2)); DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, arch.fault.hpfar_el2)); - DEFINE(VCPU_DEBUG_FLAGS, offsetof(struct kvm_vcpu, arch.debug_flags)); - DEFINE(VCPU_DEBUG_PTR, offsetof(struct kvm_vcpu, arch.debug_ptr)); - DEFINE(DEBUG_BCR, offsetof(struct kvm_guest_debug_arch, dbg_bcr)); - DEFINE(DEBUG_BVR, offsetof(struct kvm_guest_debug_arch, dbg_bvr)); - DEFINE(DEBUG_WCR, offsetof(struct kvm_guest_debug_arch, dbg_wcr)); - DEFINE(DEBUG_WVR, offsetof(struct kvm_guest_debug_arch, dbg_wvr)); - DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2)); - DEFINE(VCPU_MDCR_EL2, offsetof(struct kvm_vcpu, arch.mdcr_el2)); - DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines)); DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context)); - DEFINE(VCPU_HOST_DEBUG_STATE, offsetof(struct kvm_vcpu, arch.host_debug_state)); - DEFINE(VCPU_TIMER_CNTV_CTL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl)); - DEFINE(VCPU_TIMER_CNTV_CVAL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval)); - DEFINE(KVM_TIMER_CNTVOFF, offsetof(struct kvm, arch.timer.cntvoff)); - DEFINE(KVM_TIMER_ENABLED, offsetof(struct kvm, arch.timer.enabled)); - DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); - DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu)); - DEFINE(VGIC_V2_CPU_HCR, offsetof(struct vgic_cpu, vgic_v2.vgic_hcr)); - DEFINE(VGIC_V2_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr)); - DEFINE(VGIC_V2_CPU_MISR, offsetof(struct vgic_cpu, vgic_v2.vgic_misr)); - DEFINE(VGIC_V2_CPU_EISR, offsetof(struct vgic_cpu, vgic_v2.vgic_eisr)); - DEFINE(VGIC_V2_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr)); - DEFINE(VGIC_V2_CPU_APR, offsetof(struct vgic_cpu, vgic_v2.vgic_apr)); - DEFINE(VGIC_V2_CPU_LR, offsetof(struct vgic_cpu, vgic_v2.vgic_lr)); - DEFINE(VGIC_V3_CPU_SRE, offsetof(struct vgic_cpu, vgic_v3.vgic_sre)); - DEFINE(VGIC_V3_CPU_HCR, offsetof(struct vgic_cpu, vgic_v3.vgic_hcr)); - DEFINE(VGIC_V3_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v3.vgic_vmcr)); - DEFINE(VGIC_V3_CPU_MISR, offsetof(struct vgic_cpu, vgic_v3.vgic_misr)); - DEFINE(VGIC_V3_CPU_EISR, offsetof(struct vgic_cpu, vgic_v3.vgic_eisr)); - DEFINE(VGIC_V3_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v3.vgic_elrsr)); - DEFINE(VGIC_V3_CPU_AP0R, offsetof(struct vgic_cpu, vgic_v3.vgic_ap0r)); - DEFINE(VGIC_V3_CPU_AP1R, offsetof(struct vgic_cpu, vgic_v3.vgic_ap1r)); - DEFINE(VGIC_V3_CPU_LR, offsetof(struct vgic_cpu, vgic_v3.vgic_lr)); - DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr)); - DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr)); - DEFINE(KVM_VGIC_VCTRL, offsetof(struct kvm, arch.vgic.vctrl_base)); #endif #ifdef CONFIG_CPU_PM DEFINE(CPU_SUSPEND_SZ, sizeof(struct cpu_suspend_ctx)); @@ -161,5 +124,7 @@ int main(void) DEFINE(SLEEP_SAVE_SP_PHYS, offsetof(struct sleep_save_sp, save_ptr_stash_phys)); DEFINE(SLEEP_SAVE_SP_VIRT, offsetof(struct sleep_save_sp, save_ptr_stash)); #endif + DEFINE(ARM_SMCCC_RES_X0_OFFS, offsetof(struct arm_smccc_res, a0)); + DEFINE(ARM_SMCCC_RES_X2_OFFS, offsetof(struct arm_smccc_res, a2)); return 0; } diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 0669c63281ea..5c90aa490a2b 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -684,7 +684,7 @@ static const struct arm64_cpu_capabilities arm64_hwcaps[] = { {}, }; -static void cap_set_hwcap(const struct arm64_cpu_capabilities *cap) +static void __init cap_set_hwcap(const struct arm64_cpu_capabilities *cap) { switch (cap->hwcap_type) { case CAP_HWCAP: @@ -729,7 +729,7 @@ static bool __maybe_unused cpus_have_hwcap(const struct arm64_cpu_capabilities * return rc; } -static void setup_cpu_hwcaps(void) +static void __init setup_cpu_hwcaps(void) { int i; const struct arm64_cpu_capabilities *hwcaps = arm64_hwcaps; @@ -758,7 +758,8 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, * Run through the enabled capabilities and enable() it on all active * CPUs */ -static void enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) +static void __init +enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) { int i; @@ -897,7 +898,7 @@ static inline void set_sys_caps_initialised(void) #endif /* CONFIG_HOTPLUG_CPU */ -static void setup_feature_capabilities(void) +static void __init setup_feature_capabilities(void) { update_cpu_capabilities(arm64_features, "detected feature:"); enable_cpu_capabilities(arm64_features); diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 4eeb17198cfa..b6abc852f2a1 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -11,317 +11,34 @@ * */ -#include <linux/atomic.h> #include <linux/dmi.h> #include <linux/efi.h> -#include <linux/export.h> -#include <linux/memblock.h> -#include <linux/mm_types.h> -#include <linux/bootmem.h> -#include <linux/of.h> -#include <linux/of_fdt.h> -#include <linux/preempt.h> -#include <linux/rbtree.h> -#include <linux/rwsem.h> -#include <linux/sched.h> -#include <linux/slab.h> -#include <linux/spinlock.h> +#include <linux/init.h> -#include <asm/cacheflush.h> #include <asm/efi.h> -#include <asm/tlbflush.h> -#include <asm/mmu_context.h> -#include <asm/mmu.h> -#include <asm/pgtable.h> -struct efi_memory_map memmap; - -static u64 efi_system_table; - -static pgd_t efi_pgd[PTRS_PER_PGD] __page_aligned_bss; - -static struct mm_struct efi_mm = { - .mm_rb = RB_ROOT, - .pgd = efi_pgd, - .mm_users = ATOMIC_INIT(2), - .mm_count = ATOMIC_INIT(1), - .mmap_sem = __RWSEM_INITIALIZER(efi_mm.mmap_sem), - .page_table_lock = __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock), - .mmlist = LIST_HEAD_INIT(efi_mm.mmlist), -}; - -static int __init is_normal_ram(efi_memory_desc_t *md) -{ - if (md->attribute & EFI_MEMORY_WB) - return 1; - return 0; -} - -/* - * Translate a EFI virtual address into a physical address: this is necessary, - * as some data members of the EFI system table are virtually remapped after - * SetVirtualAddressMap() has been called. - */ -static phys_addr_t efi_to_phys(unsigned long addr) +int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md) { - efi_memory_desc_t *md; - - for_each_efi_memory_desc(&memmap, md) { - if (!(md->attribute & EFI_MEMORY_RUNTIME)) - continue; - if (md->virt_addr == 0) - /* no virtual mapping has been installed by the stub */ - break; - if (md->virt_addr <= addr && - (addr - md->virt_addr) < (md->num_pages << EFI_PAGE_SHIFT)) - return md->phys_addr + addr - md->virt_addr; - } - return addr; -} - -static int __init uefi_init(void) -{ - efi_char16_t *c16; - void *config_tables; - u64 table_size; - char vendor[100] = "unknown"; - int i, retval; - - efi.systab = early_memremap(efi_system_table, - sizeof(efi_system_table_t)); - if (efi.systab == NULL) { - pr_warn("Unable to map EFI system table.\n"); - return -ENOMEM; - } - - set_bit(EFI_BOOT, &efi.flags); - set_bit(EFI_64BIT, &efi.flags); + pteval_t prot_val; /* - * Verify the EFI Table + * Only regions of type EFI_RUNTIME_SERVICES_CODE need to be + * executable, everything else can be mapped with the XN bits + * set. */ - if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) { - pr_err("System table signature incorrect\n"); - retval = -EINVAL; - goto out; - } - if ((efi.systab->hdr.revision >> 16) < 2) - pr_warn("Warning: EFI system table version %d.%02d, expected 2.00 or greater\n", - efi.systab->hdr.revision >> 16, - efi.systab->hdr.revision & 0xffff); - - /* Show what we know for posterity */ - c16 = early_memremap(efi_to_phys(efi.systab->fw_vendor), - sizeof(vendor) * sizeof(efi_char16_t)); - if (c16) { - for (i = 0; i < (int) sizeof(vendor) - 1 && *c16; ++i) - vendor[i] = c16[i]; - vendor[i] = '\0'; - early_memunmap(c16, sizeof(vendor) * sizeof(efi_char16_t)); - } - - pr_info("EFI v%u.%.02u by %s\n", - efi.systab->hdr.revision >> 16, - efi.systab->hdr.revision & 0xffff, vendor); - - table_size = sizeof(efi_config_table_64_t) * efi.systab->nr_tables; - config_tables = early_memremap(efi_to_phys(efi.systab->tables), - table_size); - if (config_tables == NULL) { - pr_warn("Unable to map EFI config table array.\n"); - retval = -ENOMEM; - goto out; - } - retval = efi_config_parse_tables(config_tables, efi.systab->nr_tables, - sizeof(efi_config_table_64_t), NULL); - - early_memunmap(config_tables, table_size); -out: - early_memunmap(efi.systab, sizeof(efi_system_table_t)); - return retval; -} - -/* - * Return true for RAM regions we want to permanently reserve. - */ -static __init int is_reserve_region(efi_memory_desc_t *md) -{ - switch (md->type) { - case EFI_LOADER_CODE: - case EFI_LOADER_DATA: - case EFI_BOOT_SERVICES_CODE: - case EFI_BOOT_SERVICES_DATA: - case EFI_CONVENTIONAL_MEMORY: - case EFI_PERSISTENT_MEMORY: - return 0; - default: - break; - } - return is_normal_ram(md); -} - -static __init void reserve_regions(void) -{ - efi_memory_desc_t *md; - u64 paddr, npages, size; - - if (efi_enabled(EFI_DBG)) - pr_info("Processing EFI memory map:\n"); - - for_each_efi_memory_desc(&memmap, md) { - paddr = md->phys_addr; - npages = md->num_pages; - - if (efi_enabled(EFI_DBG)) { - char buf[64]; - - pr_info(" 0x%012llx-0x%012llx %s", - paddr, paddr + (npages << EFI_PAGE_SHIFT) - 1, - efi_md_typeattr_format(buf, sizeof(buf), md)); - } - - memrange_efi_to_native(&paddr, &npages); - size = npages << PAGE_SHIFT; - - if (is_normal_ram(md)) - early_init_dt_add_memory_arch(paddr, size); - - if (is_reserve_region(md)) { - memblock_reserve(paddr, size); - if (efi_enabled(EFI_DBG)) - pr_cont("*"); - } - - if (efi_enabled(EFI_DBG)) - pr_cont("\n"); - } - - set_bit(EFI_MEMMAP, &efi.flags); -} - -void __init efi_init(void) -{ - struct efi_fdt_params params; - - /* Grab UEFI information placed in FDT by stub */ - if (!efi_get_fdt_params(¶ms)) - return; - - efi_system_table = params.system_table; - - memblock_reserve(params.mmap & PAGE_MASK, - PAGE_ALIGN(params.mmap_size + (params.mmap & ~PAGE_MASK))); - memmap.phys_map = params.mmap; - memmap.map = early_memremap(params.mmap, params.mmap_size); - if (memmap.map == NULL) { - /* - * If we are booting via UEFI, the UEFI memory map is the only - * description of memory we have, so there is little point in - * proceeding if we cannot access it. - */ - panic("Unable to map EFI memory map.\n"); - } - memmap.map_end = memmap.map + params.mmap_size; - memmap.desc_size = params.desc_size; - memmap.desc_version = params.desc_ver; - - if (uefi_init() < 0) - return; - - reserve_regions(); - early_memunmap(memmap.map, params.mmap_size); -} - -static bool __init efi_virtmap_init(void) -{ - efi_memory_desc_t *md; - - init_new_context(NULL, &efi_mm); - - for_each_efi_memory_desc(&memmap, md) { - pgprot_t prot; - - if (!(md->attribute & EFI_MEMORY_RUNTIME)) - continue; - if (md->virt_addr == 0) - return false; - - pr_info(" EFI remap 0x%016llx => %p\n", - md->phys_addr, (void *)md->virt_addr); - - /* - * Only regions of type EFI_RUNTIME_SERVICES_CODE need to be - * executable, everything else can be mapped with the XN bits - * set. - */ - if (!is_normal_ram(md)) - prot = __pgprot(PROT_DEVICE_nGnRE); - else if (md->type == EFI_RUNTIME_SERVICES_CODE || - !PAGE_ALIGNED(md->phys_addr)) - prot = PAGE_KERNEL_EXEC; - else - prot = PAGE_KERNEL; - - create_pgd_mapping(&efi_mm, md->phys_addr, md->virt_addr, - md->num_pages << EFI_PAGE_SHIFT, - __pgprot(pgprot_val(prot) | PTE_NG)); - } - return true; -} - -/* - * Enable the UEFI Runtime Services if all prerequisites are in place, i.e., - * non-early mapping of the UEFI system table and virtual mappings for all - * EFI_MEMORY_RUNTIME regions. - */ -static int __init arm64_enable_runtime_services(void) -{ - u64 mapsize; - - if (!efi_enabled(EFI_BOOT)) { - pr_info("EFI services will not be available.\n"); - return 0; - } - - if (efi_runtime_disabled()) { - pr_info("EFI runtime services will be disabled.\n"); - return 0; - } - - pr_info("Remapping and enabling EFI services.\n"); - - mapsize = memmap.map_end - memmap.map; - memmap.map = (__force void *)ioremap_cache(memmap.phys_map, - mapsize); - if (!memmap.map) { - pr_err("Failed to remap EFI memory map\n"); - return -ENOMEM; - } - memmap.map_end = memmap.map + mapsize; - efi.memmap = &memmap; - - efi.systab = (__force void *)ioremap_cache(efi_system_table, - sizeof(efi_system_table_t)); - if (!efi.systab) { - pr_err("Failed to remap EFI System Table\n"); - return -ENOMEM; - } - set_bit(EFI_SYSTEM_TABLES, &efi.flags); - - if (!efi_virtmap_init()) { - pr_err("No UEFI virtual mapping was installed -- runtime services will not be available\n"); - return -ENOMEM; - } - - /* Set up runtime services function pointers */ - efi_native_runtime_setup(); - set_bit(EFI_RUNTIME_SERVICES, &efi.flags); - - efi.runtime_version = efi.systab->hdr.revision; - + if ((md->attribute & EFI_MEMORY_WB) == 0) + prot_val = PROT_DEVICE_nGnRE; + else if (md->type == EFI_RUNTIME_SERVICES_CODE || + !PAGE_ALIGNED(md->phys_addr)) + prot_val = pgprot_val(PAGE_KERNEL_EXEC); + else + prot_val = pgprot_val(PAGE_KERNEL); + + create_pgd_mapping(mm, md->phys_addr, md->virt_addr, + md->num_pages << EFI_PAGE_SHIFT, + __pgprot(prot_val | PTE_NG)); return 0; } -early_initcall(arm64_enable_runtime_services); static int __init arm64_dmi_init(void) { @@ -337,23 +54,6 @@ static int __init arm64_dmi_init(void) } core_initcall(arm64_dmi_init); -static void efi_set_pgd(struct mm_struct *mm) -{ - switch_mm(NULL, mm, NULL); -} - -void efi_virtmap_load(void) -{ - preempt_disable(); - efi_set_pgd(&efi_mm); -} - -void efi_virtmap_unload(void) -{ - efi_set_pgd(current->active_mm); - preempt_enable(); -} - /* * UpdateCapsule() depends on the system being shutdown via * ResetSystem(). diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 7ed3d75f6304..1f7f5a2b61bf 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -27,6 +27,7 @@ #include <asm/cpufeature.h> #include <asm/errno.h> #include <asm/esr.h> +#include <asm/irq.h> #include <asm/thread_info.h> #include <asm/unistd.h> @@ -88,9 +89,12 @@ .if \el == 0 mrs x21, sp_el0 - get_thread_info tsk // Ensure MDSCR_EL1.SS is clear, + mov tsk, sp + and tsk, tsk, #~(THREAD_SIZE - 1) // Ensure MDSCR_EL1.SS is clear, ldr x19, [tsk, #TI_FLAGS] // since we can unmask debug disable_step_tsk x19, x20 // exceptions when scheduling. + + mov x29, xzr // fp pointed to user-space .else add x21, sp, #S_FRAME_SIZE .endif @@ -108,6 +112,13 @@ .endif /* + * Set sp_el0 to current thread_info. + */ + .if \el == 0 + msr sp_el0, tsk + .endif + + /* * Registers that may be useful after this macro is invoked: * * x21 - aborted SP @@ -164,8 +175,44 @@ alternative_endif .endm .macro get_thread_info, rd - mov \rd, sp - and \rd, \rd, #~(THREAD_SIZE - 1) // top of stack + mrs \rd, sp_el0 + .endm + + .macro irq_stack_entry + mov x19, sp // preserve the original sp + + /* + * Compare sp with the current thread_info, if the top + * ~(THREAD_SIZE - 1) bits match, we are on a task stack, and + * should switch to the irq stack. + */ + and x25, x19, #~(THREAD_SIZE - 1) + cmp x25, tsk + b.ne 9998f + + this_cpu_ptr irq_stack, x25, x26 + mov x26, #IRQ_STACK_START_SP + add x26, x25, x26 + + /* switch to the irq stack */ + mov sp, x26 + + /* + * Add a dummy stack frame, this non-standard format is fixed up + * by unwind_frame() + */ + stp x29, x19, [sp, #-16]! + mov x29, sp + +9998: + .endm + + /* + * x19 should be preserved between irq_stack_entry and + * irq_stack_exit. + */ + .macro irq_stack_exit + mov sp, x19 .endm /* @@ -183,10 +230,11 @@ tsk .req x28 // current thread_info * Interrupt handling. */ .macro irq_handler - adrp x1, handle_arch_irq - ldr x1, [x1, #:lo12:handle_arch_irq] + ldr_l x1, handle_arch_irq mov x0, sp + irq_stack_entry blr x1 + irq_stack_exit .endm .text @@ -358,10 +406,10 @@ el1_irq: bl trace_hardirqs_off #endif + get_thread_info tsk irq_handler #ifdef CONFIG_PREEMPT - get_thread_info tsk ldr w24, [tsk, #TI_PREEMPT] // get preempt count cbnz w24, 1f // preempt count != 0 ldr x0, [tsk, #TI_FLAGS] // get flags @@ -599,6 +647,8 @@ ENTRY(cpu_switch_to) ldp x29, x9, [x8], #16 ldr lr, [x8] mov sp, x9 + and x9, x9, #~(THREAD_SIZE - 1) + msr sp_el0, x9 ret ENDPROC(cpu_switch_to) @@ -626,14 +676,14 @@ ret_fast_syscall_trace: work_pending: tbnz x1, #TIF_NEED_RESCHED, work_resched /* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */ - ldr x2, [sp, #S_PSTATE] mov x0, sp // 'regs' - tst x2, #PSR_MODE_MASK // user mode regs? - b.ne no_work_pending // returning to kernel enable_irq // enable interrupts for do_notify_resume() bl do_notify_resume b ret_to_user work_resched: +#ifdef CONFIG_TRACE_IRQFLAGS + bl trace_hardirqs_off // the IRQs are off here, inform the tracing code +#endif bl schedule /* @@ -645,7 +695,6 @@ ret_to_user: and x2, x1, #_TIF_WORK_MASK cbnz x2, work_pending enable_step_tsk x1, x2 -no_work_pending: kernel_exit 0 ENDPROC(ret_to_user) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 4c46c54a3ad7..acc1afd5c749 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -289,7 +289,7 @@ static struct notifier_block fpsimd_cpu_pm_notifier_block = { .notifier_call = fpsimd_cpu_pm_notifier, }; -static void fpsimd_pm_init(void) +static void __init fpsimd_pm_init(void) { cpu_pm_register_notifier(&fpsimd_cpu_pm_notifier_block); } diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index c851be795080..ebecf9aa33d1 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -29,12 +29,11 @@ static int ftrace_modify_code(unsigned long pc, u32 old, u32 new, /* * Note: - * Due to modules and __init, code can disappear and change, - * we need to protect against faulting as well as code changing. - * We do this by aarch64_insn_*() which use the probe_kernel_*(). - * - * No lock is held here because all the modifications are run - * through stop_machine(). + * We are paranoid about modifying text, as if a bug were to happen, it + * could cause us to read or write to someplace that could cause harm. + * Carefully read and modify the code with aarch64_insn_*() which uses + * probe_kernel_*(), and make sure what we read is what we expected it + * to be before modifying it. */ if (validate) { if (aarch64_insn_read((void *)pc, &replaced)) @@ -93,6 +92,11 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, return ftrace_modify_code(pc, old, new, true); } +void arch_ftrace_update_code(int command) +{ + ftrace_modify_all_code(command); +} + int __init ftrace_dyn_arch_init(void) { return 0; @@ -125,23 +129,20 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, * on other archs. It's unlikely on AArch64. */ old = *parent; - *parent = return_hooker; trace.func = self_addr; trace.depth = current->curr_ret_stack + 1; /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) { - *parent = old; + if (!ftrace_graph_entry(&trace)) return; - } err = ftrace_push_return_trace(old, self_addr, &trace.depth, frame_pointer); - if (err == -EBUSY) { - *parent = old; + if (err == -EBUSY) return; - } + else + *parent = return_hooker; } #ifdef CONFIG_DYNAMIC_FTRACE diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 23cfc08fc8ba..ffe9c2b6431b 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -415,15 +415,17 @@ ENDPROC(__create_page_tables) */ .set initial_sp, init_thread_union + THREAD_START_SP __mmap_switched: - adr_l x6, __bss_start - adr_l x7, __bss_stop - -1: cmp x6, x7 - b.hs 2f - str xzr, [x6], #8 // Clear BSS - b 1b -2: + // Clear BSS + adr_l x0, __bss_start + mov x1, xzr + adr_l x2, __bss_stop + sub x2, x2, x0 + bl __pi_memset + adr_l sp, initial_sp, x4 + mov x4, sp + and x4, x4, #~(THREAD_SIZE - 1) + msr sp_el0, x4 // Save thread_info str_l x21, __fdt_pointer, x5 // Save FDT pointer str_l x24, memstart_addr, x6 // Save PHYS_OFFSET mov x29, #0 @@ -606,6 +608,8 @@ ENDPROC(secondary_startup) ENTRY(__secondary_switched) ldr x0, [x21] // get secondary_data.stack mov sp, x0 + and x0, x0, #~(THREAD_SIZE - 1) + msr sp_el0, x0 // save thread_info mov x29, #0 b secondary_start_kernel ENDPROC(__secondary_switched) diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index c08b9ad6f429..7371455160e5 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -2,7 +2,7 @@ * Copyright (C) 2013 Huawei Ltd. * Author: Jiang Liu <liuj97@gmail.com> * - * Copyright (C) 2014 Zi Shen Lim <zlim.lnx@gmail.com> + * Copyright (C) 2014-2016 Zi Shen Lim <zlim.lnx@gmail.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -363,6 +363,9 @@ u32 __kprobes aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type, u32 immlo, immhi, mask; int shift; + if (insn == AARCH64_BREAK_FAULT) + return AARCH64_BREAK_FAULT; + switch (type) { case AARCH64_INSN_IMM_ADR: shift = 0; @@ -377,7 +380,7 @@ u32 __kprobes aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type, if (aarch64_get_imm_shift_mask(type, &mask, &shift) < 0) { pr_err("aarch64_insn_encode_immediate: unknown immediate encoding %d\n", type); - return 0; + return AARCH64_BREAK_FAULT; } } @@ -394,9 +397,12 @@ static u32 aarch64_insn_encode_register(enum aarch64_insn_register_type type, { int shift; + if (insn == AARCH64_BREAK_FAULT) + return AARCH64_BREAK_FAULT; + if (reg < AARCH64_INSN_REG_0 || reg > AARCH64_INSN_REG_SP) { pr_err("%s: unknown register encoding %d\n", __func__, reg); - return 0; + return AARCH64_BREAK_FAULT; } switch (type) { @@ -417,7 +423,7 @@ static u32 aarch64_insn_encode_register(enum aarch64_insn_register_type type, default: pr_err("%s: unknown register type encoding %d\n", __func__, type); - return 0; + return AARCH64_BREAK_FAULT; } insn &= ~(GENMASK(4, 0) << shift); @@ -446,7 +452,7 @@ static u32 aarch64_insn_encode_ldst_size(enum aarch64_insn_size_type type, break; default: pr_err("%s: unknown size encoding %d\n", __func__, type); - return 0; + return AARCH64_BREAK_FAULT; } insn &= ~GENMASK(31, 30); @@ -460,14 +466,17 @@ static inline long branch_imm_common(unsigned long pc, unsigned long addr, { long offset; - /* - * PC: A 64-bit Program Counter holding the address of the current - * instruction. A64 instructions must be word-aligned. - */ - BUG_ON((pc & 0x3) || (addr & 0x3)); + if ((pc & 0x3) || (addr & 0x3)) { + pr_err("%s: A64 instructions must be word aligned\n", __func__); + return range; + } offset = ((long)addr - (long)pc); - BUG_ON(offset < -range || offset >= range); + + if (offset < -range || offset >= range) { + pr_err("%s: offset out of range\n", __func__); + return range; + } return offset; } @@ -484,6 +493,8 @@ u32 __kprobes aarch64_insn_gen_branch_imm(unsigned long pc, unsigned long addr, * texts are within +/-128M. */ offset = branch_imm_common(pc, addr, SZ_128M); + if (offset >= SZ_128M) + return AARCH64_BREAK_FAULT; switch (type) { case AARCH64_INSN_BRANCH_LINK: @@ -493,7 +504,7 @@ u32 __kprobes aarch64_insn_gen_branch_imm(unsigned long pc, unsigned long addr, insn = aarch64_insn_get_b_value(); break; default: - BUG_ON(1); + pr_err("%s: unknown branch encoding %d\n", __func__, type); return AARCH64_BREAK_FAULT; } @@ -510,6 +521,8 @@ u32 aarch64_insn_gen_comp_branch_imm(unsigned long pc, unsigned long addr, long offset; offset = branch_imm_common(pc, addr, SZ_1M); + if (offset >= SZ_1M) + return AARCH64_BREAK_FAULT; switch (type) { case AARCH64_INSN_BRANCH_COMP_ZERO: @@ -519,7 +532,7 @@ u32 aarch64_insn_gen_comp_branch_imm(unsigned long pc, unsigned long addr, insn = aarch64_insn_get_cbnz_value(); break; default: - BUG_ON(1); + pr_err("%s: unknown branch encoding %d\n", __func__, type); return AARCH64_BREAK_FAULT; } @@ -530,7 +543,7 @@ u32 aarch64_insn_gen_comp_branch_imm(unsigned long pc, unsigned long addr, insn |= AARCH64_INSN_SF_BIT; break; default: - BUG_ON(1); + pr_err("%s: unknown variant encoding %d\n", __func__, variant); return AARCH64_BREAK_FAULT; } @@ -550,7 +563,10 @@ u32 aarch64_insn_gen_cond_branch_imm(unsigned long pc, unsigned long addr, insn = aarch64_insn_get_bcond_value(); - BUG_ON(cond < AARCH64_INSN_COND_EQ || cond > AARCH64_INSN_COND_AL); + if (cond < AARCH64_INSN_COND_EQ || cond > AARCH64_INSN_COND_AL) { + pr_err("%s: unknown condition encoding %d\n", __func__, cond); + return AARCH64_BREAK_FAULT; + } insn |= cond; return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_19, insn, @@ -583,7 +599,7 @@ u32 aarch64_insn_gen_branch_reg(enum aarch64_insn_register reg, insn = aarch64_insn_get_ret_value(); break; default: - BUG_ON(1); + pr_err("%s: unknown branch encoding %d\n", __func__, type); return AARCH64_BREAK_FAULT; } @@ -606,7 +622,7 @@ u32 aarch64_insn_gen_load_store_reg(enum aarch64_insn_register reg, insn = aarch64_insn_get_str_reg_value(); break; default: - BUG_ON(1); + pr_err("%s: unknown load/store encoding %d\n", __func__, type); return AARCH64_BREAK_FAULT; } @@ -645,26 +661,30 @@ u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1, insn = aarch64_insn_get_stp_post_value(); break; default: - BUG_ON(1); + pr_err("%s: unknown load/store encoding %d\n", __func__, type); return AARCH64_BREAK_FAULT; } switch (variant) { case AARCH64_INSN_VARIANT_32BIT: - /* offset must be multiples of 4 in the range [-256, 252] */ - BUG_ON(offset & 0x3); - BUG_ON(offset < -256 || offset > 252); + if ((offset & 0x3) || (offset < -256) || (offset > 252)) { + pr_err("%s: offset must be multiples of 4 in the range of [-256, 252] %d\n", + __func__, offset); + return AARCH64_BREAK_FAULT; + } shift = 2; break; case AARCH64_INSN_VARIANT_64BIT: - /* offset must be multiples of 8 in the range [-512, 504] */ - BUG_ON(offset & 0x7); - BUG_ON(offset < -512 || offset > 504); + if ((offset & 0x7) || (offset < -512) || (offset > 504)) { + pr_err("%s: offset must be multiples of 8 in the range of [-512, 504] %d\n", + __func__, offset); + return AARCH64_BREAK_FAULT; + } shift = 3; insn |= AARCH64_INSN_SF_BIT; break; default: - BUG_ON(1); + pr_err("%s: unknown variant encoding %d\n", __func__, variant); return AARCH64_BREAK_FAULT; } @@ -702,7 +722,7 @@ u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst, insn = aarch64_insn_get_subs_imm_value(); break; default: - BUG_ON(1); + pr_err("%s: unknown add/sub encoding %d\n", __func__, type); return AARCH64_BREAK_FAULT; } @@ -713,11 +733,14 @@ u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst, insn |= AARCH64_INSN_SF_BIT; break; default: - BUG_ON(1); + pr_err("%s: unknown variant encoding %d\n", __func__, variant); return AARCH64_BREAK_FAULT; } - BUG_ON(imm & ~(SZ_4K - 1)); + if (imm & ~(SZ_4K - 1)) { + pr_err("%s: invalid immediate encoding %d\n", __func__, imm); + return AARCH64_BREAK_FAULT; + } insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst); @@ -746,7 +769,7 @@ u32 aarch64_insn_gen_bitfield(enum aarch64_insn_register dst, insn = aarch64_insn_get_sbfm_value(); break; default: - BUG_ON(1); + pr_err("%s: unknown bitfield encoding %d\n", __func__, type); return AARCH64_BREAK_FAULT; } @@ -759,12 +782,18 @@ u32 aarch64_insn_gen_bitfield(enum aarch64_insn_register dst, mask = GENMASK(5, 0); break; default: - BUG_ON(1); + pr_err("%s: unknown variant encoding %d\n", __func__, variant); return AARCH64_BREAK_FAULT; } - BUG_ON(immr & ~mask); - BUG_ON(imms & ~mask); + if (immr & ~mask) { + pr_err("%s: invalid immr encoding %d\n", __func__, immr); + return AARCH64_BREAK_FAULT; + } + if (imms & ~mask) { + pr_err("%s: invalid imms encoding %d\n", __func__, imms); + return AARCH64_BREAK_FAULT; + } insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst); @@ -793,23 +822,33 @@ u32 aarch64_insn_gen_movewide(enum aarch64_insn_register dst, insn = aarch64_insn_get_movn_value(); break; default: - BUG_ON(1); + pr_err("%s: unknown movewide encoding %d\n", __func__, type); return AARCH64_BREAK_FAULT; } - BUG_ON(imm & ~(SZ_64K - 1)); + if (imm & ~(SZ_64K - 1)) { + pr_err("%s: invalid immediate encoding %d\n", __func__, imm); + return AARCH64_BREAK_FAULT; + } switch (variant) { case AARCH64_INSN_VARIANT_32BIT: - BUG_ON(shift != 0 && shift != 16); + if (shift != 0 && shift != 16) { + pr_err("%s: invalid shift encoding %d\n", __func__, + shift); + return AARCH64_BREAK_FAULT; + } break; case AARCH64_INSN_VARIANT_64BIT: insn |= AARCH64_INSN_SF_BIT; - BUG_ON(shift != 0 && shift != 16 && shift != 32 && - shift != 48); + if (shift != 0 && shift != 16 && shift != 32 && shift != 48) { + pr_err("%s: invalid shift encoding %d\n", __func__, + shift); + return AARCH64_BREAK_FAULT; + } break; default: - BUG_ON(1); + pr_err("%s: unknown variant encoding %d\n", __func__, variant); return AARCH64_BREAK_FAULT; } @@ -843,20 +882,28 @@ u32 aarch64_insn_gen_add_sub_shifted_reg(enum aarch64_insn_register dst, insn = aarch64_insn_get_subs_value(); break; default: - BUG_ON(1); + pr_err("%s: unknown add/sub encoding %d\n", __func__, type); return AARCH64_BREAK_FAULT; } switch (variant) { case AARCH64_INSN_VARIANT_32BIT: - BUG_ON(shift & ~(SZ_32 - 1)); + if (shift & ~(SZ_32 - 1)) { + pr_err("%s: invalid shift encoding %d\n", __func__, + shift); + return AARCH64_BREAK_FAULT; + } break; case AARCH64_INSN_VARIANT_64BIT: insn |= AARCH64_INSN_SF_BIT; - BUG_ON(shift & ~(SZ_64 - 1)); + if (shift & ~(SZ_64 - 1)) { + pr_err("%s: invalid shift encoding %d\n", __func__, + shift); + return AARCH64_BREAK_FAULT; + } break; default: - BUG_ON(1); + pr_err("%s: unknown variant encoding %d\n", __func__, variant); return AARCH64_BREAK_FAULT; } @@ -885,11 +932,15 @@ u32 aarch64_insn_gen_data1(enum aarch64_insn_register dst, insn = aarch64_insn_get_rev32_value(); break; case AARCH64_INSN_DATA1_REVERSE_64: - BUG_ON(variant != AARCH64_INSN_VARIANT_64BIT); + if (variant != AARCH64_INSN_VARIANT_64BIT) { + pr_err("%s: invalid variant for reverse64 %d\n", + __func__, variant); + return AARCH64_BREAK_FAULT; + } insn = aarch64_insn_get_rev64_value(); break; default: - BUG_ON(1); + pr_err("%s: unknown data1 encoding %d\n", __func__, type); return AARCH64_BREAK_FAULT; } @@ -900,7 +951,7 @@ u32 aarch64_insn_gen_data1(enum aarch64_insn_register dst, insn |= AARCH64_INSN_SF_BIT; break; default: - BUG_ON(1); + pr_err("%s: unknown variant encoding %d\n", __func__, variant); return AARCH64_BREAK_FAULT; } @@ -937,7 +988,7 @@ u32 aarch64_insn_gen_data2(enum aarch64_insn_register dst, insn = aarch64_insn_get_rorv_value(); break; default: - BUG_ON(1); + pr_err("%s: unknown data2 encoding %d\n", __func__, type); return AARCH64_BREAK_FAULT; } @@ -948,7 +999,7 @@ u32 aarch64_insn_gen_data2(enum aarch64_insn_register dst, insn |= AARCH64_INSN_SF_BIT; break; default: - BUG_ON(1); + pr_err("%s: unknown variant encoding %d\n", __func__, variant); return AARCH64_BREAK_FAULT; } @@ -976,7 +1027,7 @@ u32 aarch64_insn_gen_data3(enum aarch64_insn_register dst, insn = aarch64_insn_get_msub_value(); break; default: - BUG_ON(1); + pr_err("%s: unknown data3 encoding %d\n", __func__, type); return AARCH64_BREAK_FAULT; } @@ -987,7 +1038,7 @@ u32 aarch64_insn_gen_data3(enum aarch64_insn_register dst, insn |= AARCH64_INSN_SF_BIT; break; default: - BUG_ON(1); + pr_err("%s: unknown variant encoding %d\n", __func__, variant); return AARCH64_BREAK_FAULT; } @@ -1037,20 +1088,28 @@ u32 aarch64_insn_gen_logical_shifted_reg(enum aarch64_insn_register dst, insn = aarch64_insn_get_bics_value(); break; default: - BUG_ON(1); + pr_err("%s: unknown logical encoding %d\n", __func__, type); return AARCH64_BREAK_FAULT; } switch (variant) { case AARCH64_INSN_VARIANT_32BIT: - BUG_ON(shift & ~(SZ_32 - 1)); + if (shift & ~(SZ_32 - 1)) { + pr_err("%s: invalid shift encoding %d\n", __func__, + shift); + return AARCH64_BREAK_FAULT; + } break; case AARCH64_INSN_VARIANT_64BIT: insn |= AARCH64_INSN_SF_BIT; - BUG_ON(shift & ~(SZ_64 - 1)); + if (shift & ~(SZ_64 - 1)) { + pr_err("%s: invalid shift encoding %d\n", __func__, + shift); + return AARCH64_BREAK_FAULT; + } break; default: - BUG_ON(1); + pr_err("%s: unknown variant encoding %d\n", __func__, variant); return AARCH64_BREAK_FAULT; } diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 9f17ec071ee0..2386b26c0712 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -30,6 +30,9 @@ unsigned long irq_err_count; +/* irq stack only needs to be 16 byte aligned - not IRQ_STACK_SIZE aligned. */ +DEFINE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack) __aligned(16); + int arch_show_interrupts(struct seq_file *p, int prec) { show_ipi_list(p, prec); diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index f4bc779e62e8..93e970231ca9 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -30,9 +30,6 @@ #include <asm/insn.h> #include <asm/sections.h> -#define AARCH64_INSN_IMM_MOVNZ AARCH64_INSN_IMM_MAX -#define AARCH64_INSN_IMM_MOVK AARCH64_INSN_IMM_16 - void *module_alloc(unsigned long size) { void *p; @@ -75,15 +72,18 @@ static u64 do_reloc(enum aarch64_reloc_op reloc_op, void *place, u64 val) static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len) { - u64 imm_mask = (1 << len) - 1; s64 sval = do_reloc(op, place, val); switch (len) { case 16: *(s16 *)place = sval; + if (sval < S16_MIN || sval > U16_MAX) + return -ERANGE; break; case 32: *(s32 *)place = sval; + if (sval < S32_MIN || sval > U32_MAX) + return -ERANGE; break; case 64: *(s64 *)place = sval; @@ -92,34 +92,23 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len) pr_err("Invalid length (%d) for data relocation\n", len); return 0; } - - /* - * Extract the upper value bits (including the sign bit) and - * shift them to bit 0. - */ - sval = (s64)(sval & ~(imm_mask >> 1)) >> (len - 1); - - /* - * Overflow has occurred if the value is not representable in - * len bits (i.e the bottom len bits are not sign-extended and - * the top bits are not all zero). - */ - if ((u64)(sval + 1) > 2) - return -ERANGE; - return 0; } +enum aarch64_insn_movw_imm_type { + AARCH64_INSN_IMM_MOVNZ, + AARCH64_INSN_IMM_MOVKZ, +}; + static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val, - int lsb, enum aarch64_insn_imm_type imm_type) + int lsb, enum aarch64_insn_movw_imm_type imm_type) { - u64 imm, limit = 0; + u64 imm; s64 sval; u32 insn = le32_to_cpu(*(u32 *)place); sval = do_reloc(op, place, val); - sval >>= lsb; - imm = sval & 0xffff; + imm = sval >> lsb; if (imm_type == AARCH64_INSN_IMM_MOVNZ) { /* @@ -128,7 +117,7 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val, * immediate is less than zero. */ insn &= ~(3 << 29); - if ((s64)imm >= 0) { + if (sval >= 0) { /* >=0: Set the instruction to MOVZ (opcode 10b). */ insn |= 2 << 29; } else { @@ -140,29 +129,13 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val, */ imm = ~imm; } - imm_type = AARCH64_INSN_IMM_MOVK; } /* Update the instruction with the new encoding. */ - insn = aarch64_insn_encode_immediate(imm_type, insn, imm); + insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_16, insn, imm); *(u32 *)place = cpu_to_le32(insn); - /* Shift out the immediate field. */ - sval >>= 16; - - /* - * For unsigned immediates, the overflow check is straightforward. - * For signed immediates, the sign bit is actually the bit past the - * most significant bit of the field. - * The AARCH64_INSN_IMM_16 immediate type is unsigned. - */ - if (imm_type != AARCH64_INSN_IMM_16) { - sval++; - limit++; - } - - /* Check the upper bits depending on the sign of the immediate. */ - if ((u64)sval > limit) + if (imm > U16_MAX) return -ERANGE; return 0; @@ -267,25 +240,25 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, overflow_check = false; case R_AARCH64_MOVW_UABS_G0: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0, - AARCH64_INSN_IMM_16); + AARCH64_INSN_IMM_MOVKZ); break; case R_AARCH64_MOVW_UABS_G1_NC: overflow_check = false; case R_AARCH64_MOVW_UABS_G1: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16, - AARCH64_INSN_IMM_16); + AARCH64_INSN_IMM_MOVKZ); break; case R_AARCH64_MOVW_UABS_G2_NC: overflow_check = false; case R_AARCH64_MOVW_UABS_G2: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32, - AARCH64_INSN_IMM_16); + AARCH64_INSN_IMM_MOVKZ); break; case R_AARCH64_MOVW_UABS_G3: /* We're using the top bits so we can't overflow. */ overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 48, - AARCH64_INSN_IMM_16); + AARCH64_INSN_IMM_MOVKZ); break; case R_AARCH64_MOVW_SABS_G0: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0, @@ -302,7 +275,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, case R_AARCH64_MOVW_PREL_G0_NC: overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0, - AARCH64_INSN_IMM_MOVK); + AARCH64_INSN_IMM_MOVKZ); break; case R_AARCH64_MOVW_PREL_G0: ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0, @@ -311,7 +284,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, case R_AARCH64_MOVW_PREL_G1_NC: overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16, - AARCH64_INSN_IMM_MOVK); + AARCH64_INSN_IMM_MOVKZ); break; case R_AARCH64_MOVW_PREL_G1: ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16, @@ -320,7 +293,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, case R_AARCH64_MOVW_PREL_G2_NC: overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32, - AARCH64_INSN_IMM_MOVK); + AARCH64_INSN_IMM_MOVKZ); break; case R_AARCH64_MOVW_PREL_G2: ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32, diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c new file mode 100644 index 000000000000..53f371ed4568 --- /dev/null +++ b/arch/arm64/kernel/paravirt.c @@ -0,0 +1,25 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Copyright (C) 2013 Citrix Systems + * + * Author: Stefano Stabellini <stefano.stabellini@eu.citrix.com> + */ + +#include <linux/export.h> +#include <linux/jump_label.h> +#include <linux/types.h> +#include <asm/paravirt.h> + +struct static_key paravirt_steal_enabled; +struct static_key paravirt_steal_rq_enabled; + +struct pv_time_ops pv_time_ops; +EXPORT_SYMBOL_GPL(pv_time_ops); diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c index 3aa74830cc69..ff4665462a02 100644 --- a/arch/arm64/kernel/perf_callchain.c +++ b/arch/arm64/kernel/perf_callchain.c @@ -164,8 +164,11 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry, frame.fp = regs->regs[29]; frame.sp = regs->sp; frame.pc = regs->pc; +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + frame.graph = current->curr_ret_stack; +#endif - walk_stackframe(&frame, callchain_trace, entry); + walk_stackframe(current, &frame, callchain_trace, entry); } unsigned long perf_instruction_pointer(struct pt_regs *regs) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 5b1897e8ca24..f7ab14c4d5df 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -29,60 +29,74 @@ * ARMv8 PMUv3 Performance Events handling code. * Common event types. */ -enum armv8_pmuv3_perf_types { - /* Required events. */ - ARMV8_PMUV3_PERFCTR_PMNC_SW_INCR = 0x00, - ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL = 0x03, - ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS = 0x04, - ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED = 0x10, - ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES = 0x11, - ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED = 0x12, - - /* At least one of the following is required. */ - ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED = 0x08, - ARMV8_PMUV3_PERFCTR_OP_SPEC = 0x1B, - - /* Common architectural events. */ - ARMV8_PMUV3_PERFCTR_MEM_READ = 0x06, - ARMV8_PMUV3_PERFCTR_MEM_WRITE = 0x07, - ARMV8_PMUV3_PERFCTR_EXC_TAKEN = 0x09, - ARMV8_PMUV3_PERFCTR_EXC_EXECUTED = 0x0A, - ARMV8_PMUV3_PERFCTR_CID_WRITE = 0x0B, - ARMV8_PMUV3_PERFCTR_PC_WRITE = 0x0C, - ARMV8_PMUV3_PERFCTR_PC_IMM_BRANCH = 0x0D, - ARMV8_PMUV3_PERFCTR_PC_PROC_RETURN = 0x0E, - ARMV8_PMUV3_PERFCTR_MEM_UNALIGNED_ACCESS = 0x0F, - ARMV8_PMUV3_PERFCTR_TTBR_WRITE = 0x1C, - - /* Common microarchitectural events. */ - ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL = 0x01, - ARMV8_PMUV3_PERFCTR_ITLB_REFILL = 0x02, - ARMV8_PMUV3_PERFCTR_DTLB_REFILL = 0x05, - ARMV8_PMUV3_PERFCTR_MEM_ACCESS = 0x13, - ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS = 0x14, - ARMV8_PMUV3_PERFCTR_L1_DCACHE_WB = 0x15, - ARMV8_PMUV3_PERFCTR_L2_CACHE_ACCESS = 0x16, - ARMV8_PMUV3_PERFCTR_L2_CACHE_REFILL = 0x17, - ARMV8_PMUV3_PERFCTR_L2_CACHE_WB = 0x18, - ARMV8_PMUV3_PERFCTR_BUS_ACCESS = 0x19, - ARMV8_PMUV3_PERFCTR_MEM_ERROR = 0x1A, - ARMV8_PMUV3_PERFCTR_BUS_CYCLES = 0x1D, -}; + +/* Required events. */ +#define ARMV8_PMUV3_PERFCTR_PMNC_SW_INCR 0x00 +#define ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL 0x03 +#define ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS 0x04 +#define ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED 0x10 +#define ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES 0x11 +#define ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED 0x12 + +/* At least one of the following is required. */ +#define ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED 0x08 +#define ARMV8_PMUV3_PERFCTR_OP_SPEC 0x1B + +/* Common architectural events. */ +#define ARMV8_PMUV3_PERFCTR_MEM_READ 0x06 +#define ARMV8_PMUV3_PERFCTR_MEM_WRITE 0x07 +#define ARMV8_PMUV3_PERFCTR_EXC_TAKEN 0x09 +#define ARMV8_PMUV3_PERFCTR_EXC_EXECUTED 0x0A +#define ARMV8_PMUV3_PERFCTR_CID_WRITE 0x0B +#define ARMV8_PMUV3_PERFCTR_PC_WRITE 0x0C +#define ARMV8_PMUV3_PERFCTR_PC_IMM_BRANCH 0x0D +#define ARMV8_PMUV3_PERFCTR_PC_PROC_RETURN 0x0E +#define ARMV8_PMUV3_PERFCTR_MEM_UNALIGNED_ACCESS 0x0F +#define ARMV8_PMUV3_PERFCTR_TTBR_WRITE 0x1C +#define ARMV8_PMUV3_PERFCTR_CHAIN 0x1E +#define ARMV8_PMUV3_PERFCTR_BR_RETIRED 0x21 + +/* Common microarchitectural events. */ +#define ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL 0x01 +#define ARMV8_PMUV3_PERFCTR_ITLB_REFILL 0x02 +#define ARMV8_PMUV3_PERFCTR_DTLB_REFILL 0x05 +#define ARMV8_PMUV3_PERFCTR_MEM_ACCESS 0x13 +#define ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS 0x14 +#define ARMV8_PMUV3_PERFCTR_L1_DCACHE_WB 0x15 +#define ARMV8_PMUV3_PERFCTR_L2_CACHE_ACCESS 0x16 +#define ARMV8_PMUV3_PERFCTR_L2_CACHE_REFILL 0x17 +#define ARMV8_PMUV3_PERFCTR_L2_CACHE_WB 0x18 +#define ARMV8_PMUV3_PERFCTR_BUS_ACCESS 0x19 +#define ARMV8_PMUV3_PERFCTR_MEM_ERROR 0x1A +#define ARMV8_PMUV3_PERFCTR_BUS_CYCLES 0x1D +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE 0x1F +#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE 0x20 +#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED 0x22 +#define ARMV8_PMUV3_PERFCTR_STALL_FRONTEND 0x23 +#define ARMV8_PMUV3_PERFCTR_STALL_BACKEND 0x24 +#define ARMV8_PMUV3_PERFCTR_L1D_TLB 0x25 +#define ARMV8_PMUV3_PERFCTR_L1I_TLB 0x26 +#define ARMV8_PMUV3_PERFCTR_L2I_CACHE 0x27 +#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL 0x28 +#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE 0x29 +#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL 0x2A +#define ARMV8_PMUV3_PERFCTR_L3D_CACHE 0x2B +#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB 0x2C +#define ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL 0x2D +#define ARMV8_PMUV3_PERFCTR_L21_TLB_REFILL 0x2E +#define ARMV8_PMUV3_PERFCTR_L2D_TLB 0x2F +#define ARMV8_PMUV3_PERFCTR_L21_TLB 0x30 /* ARMv8 Cortex-A53 specific event types. */ -enum armv8_a53_pmu_perf_types { - ARMV8_A53_PERFCTR_PREFETCH_LINEFILL = 0xC2, -}; +#define ARMV8_A53_PERFCTR_PREFETCH_LINEFILL 0xC2 -/* ARMv8 Cortex-A57 specific event types. */ -enum armv8_a57_perf_types { - ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_LD = 0x40, - ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_ST = 0x41, - ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_LD = 0x42, - ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_ST = 0x43, - ARMV8_A57_PERFCTR_DTLB_REFILL_LD = 0x4c, - ARMV8_A57_PERFCTR_DTLB_REFILL_ST = 0x4d, -}; +/* ARMv8 Cortex-A57 and Cortex-A72 specific event types. */ +#define ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_LD 0x40 +#define ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_ST 0x41 +#define ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_LD 0x42 +#define ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_ST 0x43 +#define ARMV8_A57_PERFCTR_DTLB_REFILL_LD 0x4c +#define ARMV8_A57_PERFCTR_DTLB_REFILL_ST 0x4d /* PMUv3 HW events mapping. */ static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = { @@ -106,6 +120,7 @@ static const unsigned armv8_a53_perf_map[PERF_COUNT_HW_MAX] = { [PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES, }; +/* ARM Cortex-A57 and Cortex-A72 events mapping. */ static const unsigned armv8_a57_perf_map[PERF_COUNT_HW_MAX] = { PERF_MAP_ALL_UNSUPPORTED, [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES, @@ -178,6 +193,137 @@ static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, }; +#define ARMV8_EVENT_ATTR_RESOLVE(m) #m +#define ARMV8_EVENT_ATTR(name, config) \ + PMU_EVENT_ATTR_STRING(name, armv8_event_attr_##name, \ + "event=" ARMV8_EVENT_ATTR_RESOLVE(config)) + +ARMV8_EVENT_ATTR(sw_incr, ARMV8_PMUV3_PERFCTR_PMNC_SW_INCR); +ARMV8_EVENT_ATTR(l1i_cache_refill, ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL); +ARMV8_EVENT_ATTR(l1i_tlb_refill, ARMV8_PMUV3_PERFCTR_ITLB_REFILL); +ARMV8_EVENT_ATTR(l1d_cache_refill, ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL); +ARMV8_EVENT_ATTR(l1d_cache, ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS); +ARMV8_EVENT_ATTR(l1d_tlb_refill, ARMV8_PMUV3_PERFCTR_DTLB_REFILL); +ARMV8_EVENT_ATTR(ld_retired, ARMV8_PMUV3_PERFCTR_MEM_READ); +ARMV8_EVENT_ATTR(st_retired, ARMV8_PMUV3_PERFCTR_MEM_WRITE); +ARMV8_EVENT_ATTR(inst_retired, ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED); +ARMV8_EVENT_ATTR(exc_taken, ARMV8_PMUV3_PERFCTR_EXC_TAKEN); +ARMV8_EVENT_ATTR(exc_return, ARMV8_PMUV3_PERFCTR_EXC_EXECUTED); +ARMV8_EVENT_ATTR(cid_write_retired, ARMV8_PMUV3_PERFCTR_CID_WRITE); +ARMV8_EVENT_ATTR(pc_write_retired, ARMV8_PMUV3_PERFCTR_PC_WRITE); +ARMV8_EVENT_ATTR(br_immed_retired, ARMV8_PMUV3_PERFCTR_PC_IMM_BRANCH); +ARMV8_EVENT_ATTR(br_return_retired, ARMV8_PMUV3_PERFCTR_PC_PROC_RETURN); +ARMV8_EVENT_ATTR(unaligned_ldst_retired, ARMV8_PMUV3_PERFCTR_MEM_UNALIGNED_ACCESS); +ARMV8_EVENT_ATTR(br_mis_pred, ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED); +ARMV8_EVENT_ATTR(cpu_cycles, ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES); +ARMV8_EVENT_ATTR(br_pred, ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED); +ARMV8_EVENT_ATTR(mem_access, ARMV8_PMUV3_PERFCTR_MEM_ACCESS); +ARMV8_EVENT_ATTR(l1i_cache, ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS); +ARMV8_EVENT_ATTR(l1d_cache_wb, ARMV8_PMUV3_PERFCTR_L1_DCACHE_WB); +ARMV8_EVENT_ATTR(l2d_cache, ARMV8_PMUV3_PERFCTR_L2_CACHE_ACCESS); +ARMV8_EVENT_ATTR(l2d_cache_refill, ARMV8_PMUV3_PERFCTR_L2_CACHE_REFILL); +ARMV8_EVENT_ATTR(l2d_cache_wb, ARMV8_PMUV3_PERFCTR_L2_CACHE_WB); +ARMV8_EVENT_ATTR(bus_access, ARMV8_PMUV3_PERFCTR_BUS_ACCESS); +ARMV8_EVENT_ATTR(memory_error, ARMV8_PMUV3_PERFCTR_MEM_ERROR); +ARMV8_EVENT_ATTR(inst_spec, ARMV8_PMUV3_PERFCTR_OP_SPEC); +ARMV8_EVENT_ATTR(ttbr_write_retired, ARMV8_PMUV3_PERFCTR_TTBR_WRITE); +ARMV8_EVENT_ATTR(bus_cycles, ARMV8_PMUV3_PERFCTR_BUS_CYCLES); +ARMV8_EVENT_ATTR(chain, ARMV8_PMUV3_PERFCTR_CHAIN); +ARMV8_EVENT_ATTR(l1d_cache_allocate, ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE); +ARMV8_EVENT_ATTR(l2d_cache_allocate, ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE); +ARMV8_EVENT_ATTR(br_retired, ARMV8_PMUV3_PERFCTR_BR_RETIRED); +ARMV8_EVENT_ATTR(br_mis_pred_retired, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED); +ARMV8_EVENT_ATTR(stall_frontend, ARMV8_PMUV3_PERFCTR_STALL_FRONTEND); +ARMV8_EVENT_ATTR(stall_backend, ARMV8_PMUV3_PERFCTR_STALL_BACKEND); +ARMV8_EVENT_ATTR(l1d_tlb, ARMV8_PMUV3_PERFCTR_L1D_TLB); +ARMV8_EVENT_ATTR(l1i_tlb, ARMV8_PMUV3_PERFCTR_L1I_TLB); +ARMV8_EVENT_ATTR(l2i_cache, ARMV8_PMUV3_PERFCTR_L2I_CACHE); +ARMV8_EVENT_ATTR(l2i_cache_refill, ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL); +ARMV8_EVENT_ATTR(l3d_cache_allocate, ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE); +ARMV8_EVENT_ATTR(l3d_cache_refill, ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL); +ARMV8_EVENT_ATTR(l3d_cache, ARMV8_PMUV3_PERFCTR_L3D_CACHE); +ARMV8_EVENT_ATTR(l3d_cache_wb, ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB); +ARMV8_EVENT_ATTR(l2d_tlb_refill, ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL); +ARMV8_EVENT_ATTR(l21_tlb_refill, ARMV8_PMUV3_PERFCTR_L21_TLB_REFILL); +ARMV8_EVENT_ATTR(l2d_tlb, ARMV8_PMUV3_PERFCTR_L2D_TLB); +ARMV8_EVENT_ATTR(l21_tlb, ARMV8_PMUV3_PERFCTR_L21_TLB); + +static struct attribute *armv8_pmuv3_event_attrs[] = { + &armv8_event_attr_sw_incr.attr.attr, + &armv8_event_attr_l1i_cache_refill.attr.attr, + &armv8_event_attr_l1i_tlb_refill.attr.attr, + &armv8_event_attr_l1d_cache_refill.attr.attr, + &armv8_event_attr_l1d_cache.attr.attr, + &armv8_event_attr_l1d_tlb_refill.attr.attr, + &armv8_event_attr_ld_retired.attr.attr, + &armv8_event_attr_st_retired.attr.attr, + &armv8_event_attr_inst_retired.attr.attr, + &armv8_event_attr_exc_taken.attr.attr, + &armv8_event_attr_exc_return.attr.attr, + &armv8_event_attr_cid_write_retired.attr.attr, + &armv8_event_attr_pc_write_retired.attr.attr, + &armv8_event_attr_br_immed_retired.attr.attr, + &armv8_event_attr_br_return_retired.attr.attr, + &armv8_event_attr_unaligned_ldst_retired.attr.attr, + &armv8_event_attr_br_mis_pred.attr.attr, + &armv8_event_attr_cpu_cycles.attr.attr, + &armv8_event_attr_br_pred.attr.attr, + &armv8_event_attr_mem_access.attr.attr, + &armv8_event_attr_l1i_cache.attr.attr, + &armv8_event_attr_l1d_cache_wb.attr.attr, + &armv8_event_attr_l2d_cache.attr.attr, + &armv8_event_attr_l2d_cache_refill.attr.attr, + &armv8_event_attr_l2d_cache_wb.attr.attr, + &armv8_event_attr_bus_access.attr.attr, + &armv8_event_attr_memory_error.attr.attr, + &armv8_event_attr_inst_spec.attr.attr, + &armv8_event_attr_ttbr_write_retired.attr.attr, + &armv8_event_attr_bus_cycles.attr.attr, + &armv8_event_attr_chain.attr.attr, + &armv8_event_attr_l1d_cache_allocate.attr.attr, + &armv8_event_attr_l2d_cache_allocate.attr.attr, + &armv8_event_attr_br_retired.attr.attr, + &armv8_event_attr_br_mis_pred_retired.attr.attr, + &armv8_event_attr_stall_frontend.attr.attr, + &armv8_event_attr_stall_backend.attr.attr, + &armv8_event_attr_l1d_tlb.attr.attr, + &armv8_event_attr_l1i_tlb.attr.attr, + &armv8_event_attr_l2i_cache.attr.attr, + &armv8_event_attr_l2i_cache_refill.attr.attr, + &armv8_event_attr_l3d_cache_allocate.attr.attr, + &armv8_event_attr_l3d_cache_refill.attr.attr, + &armv8_event_attr_l3d_cache.attr.attr, + &armv8_event_attr_l3d_cache_wb.attr.attr, + &armv8_event_attr_l2d_tlb_refill.attr.attr, + &armv8_event_attr_l21_tlb_refill.attr.attr, + &armv8_event_attr_l2d_tlb.attr.attr, + &armv8_event_attr_l21_tlb.attr.attr, + NULL, +}; + +static struct attribute_group armv8_pmuv3_events_attr_group = { + .name = "events", + .attrs = armv8_pmuv3_event_attrs, +}; + +PMU_FORMAT_ATTR(event, "config:0-9"); + +static struct attribute *armv8_pmuv3_format_attrs[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group armv8_pmuv3_format_attr_group = { + .name = "format", + .attrs = armv8_pmuv3_format_attrs, +}; + +static const struct attribute_group *armv8_pmuv3_attr_groups[] = { + &armv8_pmuv3_events_attr_group, + &armv8_pmuv3_format_attr_group, + NULL, +}; + /* * Perf Events' indices @@ -574,9 +720,6 @@ static void armv8pmu_reset(void *info) /* Initialize & Reset PMNC: C and P bits. */ armv8pmu_pmcr_write(ARMV8_PMCR_P | ARMV8_PMCR_C); - - /* Disable access from userspace. */ - asm volatile("msr pmuserenr_el0, %0" :: "r" (0)); } static int armv8_pmuv3_map_event(struct perf_event *event) @@ -646,6 +789,7 @@ static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu) armv8_pmu_init(cpu_pmu); cpu_pmu->name = "armv8_cortex_a53"; cpu_pmu->map_event = armv8_a53_map_event; + cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups; return armv8pmu_probe_num_events(cpu_pmu); } @@ -654,6 +798,16 @@ static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu) armv8_pmu_init(cpu_pmu); cpu_pmu->name = "armv8_cortex_a57"; cpu_pmu->map_event = armv8_a57_map_event; + cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups; + return armv8pmu_probe_num_events(cpu_pmu); +} + +static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv8_pmu_init(cpu_pmu); + cpu_pmu->name = "armv8_cortex_a72"; + cpu_pmu->map_event = armv8_a57_map_event; + cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups; return armv8pmu_probe_num_events(cpu_pmu); } @@ -661,6 +815,7 @@ static const struct of_device_id armv8_pmu_of_device_ids[] = { {.compatible = "arm,armv8-pmuv3", .data = armv8_pmuv3_init}, {.compatible = "arm,cortex-a53-pmu", .data = armv8_a53_pmu_init}, {.compatible = "arm,cortex-a57-pmu", .data = armv8_a57_pmu_init}, + {.compatible = "arm,cortex-a72-pmu", .data = armv8_a72_pmu_init}, {}, }; diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index f75b540bc3b4..88d742ba19d5 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -344,11 +344,14 @@ unsigned long get_wchan(struct task_struct *p) frame.fp = thread_saved_fp(p); frame.sp = thread_saved_sp(p); frame.pc = thread_saved_pc(p); +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + frame.graph = p->curr_ret_stack; +#endif stack_page = (unsigned long)task_stack_page(p); do { if (frame.sp < stack_page || frame.sp >= stack_page + THREAD_SIZE || - unwind_frame(&frame)) + unwind_frame(p, &frame)) return 0; if (!in_sched_functions(frame.pc)) return frame.pc; diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 1971f491bb90..ff7f13239515 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -58,6 +58,12 @@ */ void ptrace_disable(struct task_struct *child) { + /* + * This would be better off in core code, but PTRACE_DETACH has + * grown its fair share of arch-specific worts and changing it + * is likely to cause regressions on obscure architectures. + */ + user_disable_single_step(child); } #ifdef CONFIG_HAVE_HW_BREAKPOINT diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c index 6c4fd2810ecb..1718706fde83 100644 --- a/arch/arm64/kernel/return_address.c +++ b/arch/arm64/kernel/return_address.c @@ -43,8 +43,11 @@ void *return_address(unsigned int level) frame.fp = (unsigned long)__builtin_frame_address(0); frame.sp = current_stack_pointer; frame.pc = (unsigned long)return_address; /* dummy */ +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + frame.graph = current->curr_ret_stack; +#endif - walk_stackframe(&frame, save_return_addr, &data); + walk_stackframe(current, &frame, save_return_addr, &data); if (!data.level) return data.addr; diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index f586f7c875e2..e33fe33876ab 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -173,6 +173,9 @@ ENTRY(cpu_resume) /* load physical address of identity map page table in x1 */ adrp x1, idmap_pg_dir mov sp, x2 + /* save thread_info */ + and x2, x2, #~(THREAD_SIZE - 1) + msr sp_el0, x2 /* * cpu_do_resume expects x0 to contain context physical address * pointer and x1 to contain physical address of 1:1 page tables diff --git a/arch/arm64/kernel/smccc-call.S b/arch/arm64/kernel/smccc-call.S new file mode 100644 index 000000000000..ae0496fa4235 --- /dev/null +++ b/arch/arm64/kernel/smccc-call.S @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2015, Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License Version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include <linux/linkage.h> +#include <asm/asm-offsets.h> + + .macro SMCCC instr + .cfi_startproc + \instr #0 + ldr x4, [sp] + stp x0, x1, [x4, #ARM_SMCCC_RES_X0_OFFS] + stp x2, x3, [x4, #ARM_SMCCC_RES_X2_OFFS] + ret + .cfi_endproc + .endm + +/* + * void arm_smccc_smc(unsigned long a0, unsigned long a1, unsigned long a2, + * unsigned long a3, unsigned long a4, unsigned long a5, + * unsigned long a6, unsigned long a7, struct arm_smccc_res *res) + */ +ENTRY(arm_smccc_smc) + SMCCC smc +ENDPROC(arm_smccc_smc) + +/* + * void arm_smccc_hvc(unsigned long a0, unsigned long a1, unsigned long a2, + * unsigned long a3, unsigned long a4, unsigned long a5, + * unsigned long a6, unsigned long a7, struct arm_smccc_res *res) + */ +ENTRY(arm_smccc_hvc) + SMCCC hvc +ENDPROC(arm_smccc_hvc) diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index ccb6078ed9f2..4fad9787ab46 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -17,9 +17,11 @@ */ #include <linux/kernel.h> #include <linux/export.h> +#include <linux/ftrace.h> #include <linux/sched.h> #include <linux/stacktrace.h> +#include <asm/irq.h> #include <asm/stacktrace.h> /* @@ -35,25 +37,83 @@ * ldp x29, x30, [sp] * add sp, sp, #0x10 */ -int notrace unwind_frame(struct stackframe *frame) +int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) { unsigned long high, low; unsigned long fp = frame->fp; + unsigned long irq_stack_ptr; + + /* + * Use raw_smp_processor_id() to avoid false-positives from + * CONFIG_DEBUG_PREEMPT. get_wchan() calls unwind_frame() on sleeping + * task stacks, we can be pre-empted in this case, so + * {raw_,}smp_processor_id() may give us the wrong value. Sleeping + * tasks can't ever be on an interrupt stack, so regardless of cpu, + * the checks will always fail. + */ + irq_stack_ptr = IRQ_STACK_PTR(raw_smp_processor_id()); low = frame->sp; - high = ALIGN(low, THREAD_SIZE); + /* irq stacks are not THREAD_SIZE aligned */ + if (on_irq_stack(frame->sp, raw_smp_processor_id())) + high = irq_stack_ptr; + else + high = ALIGN(low, THREAD_SIZE) - 0x20; - if (fp < low || fp > high - 0x18 || fp & 0xf) + if (fp < low || fp > high || fp & 0xf) return -EINVAL; frame->sp = fp + 0x10; frame->fp = *(unsigned long *)(fp); frame->pc = *(unsigned long *)(fp + 8); +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + if (tsk && tsk->ret_stack && + (frame->pc == (unsigned long)return_to_handler)) { + /* + * This is a case where function graph tracer has + * modified a return address (LR) in a stack frame + * to hook a function return. + * So replace it to an original value. + */ + frame->pc = tsk->ret_stack[frame->graph--].ret; + } +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + + /* + * Check whether we are going to walk through from interrupt stack + * to task stack. + * If we reach the end of the stack - and its an interrupt stack, + * unpack the dummy frame to find the original elr. + * + * Check the frame->fp we read from the bottom of the irq_stack, + * and the original task stack pointer are both in current->stack. + */ + if (frame->sp == irq_stack_ptr) { + struct pt_regs *irq_args; + unsigned long orig_sp = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr); + + if (object_is_on_stack((void *)orig_sp) && + object_is_on_stack((void *)frame->fp)) { + frame->sp = orig_sp; + + /* orig_sp is the saved pt_regs, find the elr */ + irq_args = (struct pt_regs *)orig_sp; + frame->pc = irq_args->pc; + } else { + /* + * This frame has a non-standard format, and we + * didn't fix it, because the data looked wrong. + * Refuse to output this frame. + */ + return -EINVAL; + } + } + return 0; } -void notrace walk_stackframe(struct stackframe *frame, +void notrace walk_stackframe(struct task_struct *tsk, struct stackframe *frame, int (*fn)(struct stackframe *, void *), void *data) { while (1) { @@ -61,7 +121,7 @@ void notrace walk_stackframe(struct stackframe *frame, if (fn(frame, data)) break; - ret = unwind_frame(frame); + ret = unwind_frame(tsk, frame); if (ret < 0) break; } @@ -112,8 +172,11 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) frame.sp = current_stack_pointer; frame.pc = (unsigned long)save_stack_trace_tsk; } +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + frame.graph = tsk->curr_ret_stack; +#endif - walk_stackframe(&frame, save_trace, &data); + walk_stackframe(tsk, &frame, save_trace, &data); if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = ULONG_MAX; } diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c index 13339b6ffc1a..59779699a1a4 100644 --- a/arch/arm64/kernel/time.c +++ b/arch/arm64/kernel/time.c @@ -52,8 +52,11 @@ unsigned long profile_pc(struct pt_regs *regs) frame.fp = regs->regs[29]; frame.sp = regs->sp; frame.pc = regs->pc; +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + frame.graph = -1; /* no task info */ +#endif do { - int ret = unwind_frame(&frame); + int ret = unwind_frame(NULL, &frame); if (ret < 0) return 0; } while (in_lock_functions(frame.pc)); diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index e9b9b5364393..cbedd724f48e 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -146,17 +146,15 @@ static void dump_instr(const char *lvl, struct pt_regs *regs) static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) { struct stackframe frame; + unsigned long irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id()); + int skip; pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk); if (!tsk) tsk = current; - if (regs) { - frame.fp = regs->regs[29]; - frame.sp = regs->sp; - frame.pc = regs->pc; - } else if (tsk == current) { + if (tsk == current) { frame.fp = (unsigned long)__builtin_frame_address(0); frame.sp = current_stack_pointer; frame.pc = (unsigned long)dump_backtrace; @@ -168,21 +166,49 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) frame.sp = thread_saved_sp(tsk); frame.pc = thread_saved_pc(tsk); } +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + frame.graph = tsk->curr_ret_stack; +#endif - pr_emerg("Call trace:\n"); + skip = !!regs; + printk("Call trace:\n"); while (1) { unsigned long where = frame.pc; unsigned long stack; int ret; - dump_backtrace_entry(where); - ret = unwind_frame(&frame); + /* skip until specified stack frame */ + if (!skip) { + dump_backtrace_entry(where); + } else if (frame.fp == regs->regs[29]) { + skip = 0; + /* + * Mostly, this is the case where this function is + * called in panic/abort. As exception handler's + * stack frame does not contain the corresponding pc + * at which an exception has taken place, use regs->pc + * instead. + */ + dump_backtrace_entry(regs->pc); + } + ret = unwind_frame(tsk, &frame); if (ret < 0) break; stack = frame.sp; - if (in_exception_text(where)) + if (in_exception_text(where)) { + /* + * If we switched to the irq_stack before calling this + * exception handler, then the pt_regs will be on the + * task stack. The easiest way to tell is if the large + * pt_regs would overlap with the end of the irq_stack. + */ + if (stack < irq_stack_ptr && + (stack + sizeof(struct pt_regs)) > irq_stack_ptr) + stack = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr); + dump_mem("", "Exception stack", stack, stack + sizeof(struct pt_regs), false); + } } } @@ -456,22 +482,22 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) void __pte_error(const char *file, int line, unsigned long val) { - pr_crit("%s:%d: bad pte %016lx.\n", file, line, val); + pr_err("%s:%d: bad pte %016lx.\n", file, line, val); } void __pmd_error(const char *file, int line, unsigned long val) { - pr_crit("%s:%d: bad pmd %016lx.\n", file, line, val); + pr_err("%s:%d: bad pmd %016lx.\n", file, line, val); } void __pud_error(const char *file, int line, unsigned long val) { - pr_crit("%s:%d: bad pud %016lx.\n", file, line, val); + pr_err("%s:%d: bad pud %016lx.\n", file, line, val); } void __pgd_error(const char *file, int line, unsigned long val) { - pr_crit("%s:%d: bad pgd %016lx.\n", file, line, val); + pr_err("%s:%d: bad pgd %016lx.\n", file, line, val); } /* GENERIC_BUG traps */ diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 71426a78db12..e3928f578891 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -113,7 +113,6 @@ SECTIONS *(.got) /* Global offset table */ } - ALIGN_DEBUG_RO RO_DATA(PAGE_SIZE) EXCEPTION_TABLE(8) NOTES @@ -128,7 +127,6 @@ SECTIONS ARM_EXIT_KEEP(EXIT_TEXT) } - ALIGN_DEBUG_RO_MIN(16) .init.data : { INIT_DATA INIT_SETUP(16) @@ -143,9 +141,6 @@ SECTIONS PERCPU_SECTION(L1_CACHE_BYTES) - . = ALIGN(PAGE_SIZE); - __init_end = .; - . = ALIGN(4); .altinstructions : { __alt_instructions = .; @@ -157,6 +152,8 @@ SECTIONS } . = ALIGN(PAGE_SIZE); + __init_end = .; + _data = .; _sdata = .; RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 1949fe5f5424..caee9ee8e12a 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -10,6 +10,7 @@ KVM=../../../virt/kvm ARM=../../../arch/arm/kvm obj-$(CONFIG_KVM_ARM_HOST) += kvm.o +obj-$(CONFIG_KVM_ARM_HOST) += hyp/ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o @@ -22,8 +23,6 @@ kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generi kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2-emul.o -kvm-$(CONFIG_KVM_ARM_HOST) += vgic-v2-switch.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o -kvm-$(CONFIG_KVM_ARM_HOST) += vgic-v3-switch.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index d250160d32bc..fcb778899a38 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -28,13 +28,21 @@ #include <asm/cputype.h> #include <asm/uaccess.h> #include <asm/kvm.h> -#include <asm/kvm_asm.h> #include <asm/kvm_emulate.h> #include <asm/kvm_coproc.h> #include "trace.h" +#define VM_STAT(x) { #x, offsetof(struct kvm, stat.x), KVM_STAT_VM } +#define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU } + struct kvm_stats_debugfs_item debugfs_entries[] = { + VCPU_STAT(hvc_exit_stat), + VCPU_STAT(wfe_exit_stat), + VCPU_STAT(wfi_exit_stat), + VCPU_STAT(mmio_exit_user), + VCPU_STAT(mmio_exit_kernel), + VCPU_STAT(exits), { NULL } }; diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 15f0477b0d2a..eba89e42f0ed 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -23,6 +23,7 @@ #include <linux/kvm_host.h> #include <asm/esr.h> +#include <asm/kvm_asm.h> #include <asm/kvm_coproc.h> #include <asm/kvm_emulate.h> #include <asm/kvm_mmu.h> @@ -39,6 +40,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) trace_kvm_hvc_arm64(*vcpu_pc(vcpu), vcpu_get_reg(vcpu, 0), kvm_vcpu_hvc_get_imm(vcpu)); + vcpu->stat.hvc_exit_stat++; ret = kvm_psci_call(vcpu); if (ret < 0) { @@ -71,9 +73,11 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) { if (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE) { trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true); + vcpu->stat.wfe_exit_stat++; kvm_vcpu_on_spin(vcpu); } else { trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false); + vcpu->stat.wfi_exit_stat++; kvm_vcpu_block(vcpu); } diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index 178ba2248a98..3e568dcd907b 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -94,6 +94,15 @@ __do_hyp_init: */ mrs x5, ID_AA64MMFR0_EL1 bfi x4, x5, #16, #3 + /* + * Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS bit in + * VTCR_EL2. + */ + mrs x5, ID_AA64MMFR1_EL1 + ubfx x5, x5, #5, #1 + lsl x5, x5, #VTCR_EL2_VS + orr x4, x4, x5 + msr vtcr_el2, x4 mrs x4, mair_el1 diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 86c289832272..0ccdcbbef3c2 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -17,910 +17,7 @@ #include <linux/linkage.h> -#include <asm/alternative.h> -#include <asm/asm-offsets.h> #include <asm/assembler.h> -#include <asm/cpufeature.h> -#include <asm/debug-monitors.h> -#include <asm/esr.h> -#include <asm/fpsimdmacros.h> -#include <asm/kvm.h> -#include <asm/kvm_arm.h> -#include <asm/kvm_asm.h> -#include <asm/kvm_mmu.h> -#include <asm/memory.h> - -#define CPU_GP_REG_OFFSET(x) (CPU_GP_REGS + x) -#define CPU_XREG_OFFSET(x) CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x) -#define CPU_SPSR_OFFSET(x) CPU_GP_REG_OFFSET(CPU_SPSR + 8*x) -#define CPU_SYSREG_OFFSET(x) (CPU_SYSREGS + 8*x) - - .text - .pushsection .hyp.text, "ax" - .align PAGE_SHIFT - -.macro save_common_regs - // x2: base address for cpu context - // x3: tmp register - - add x3, x2, #CPU_XREG_OFFSET(19) - stp x19, x20, [x3] - stp x21, x22, [x3, #16] - stp x23, x24, [x3, #32] - stp x25, x26, [x3, #48] - stp x27, x28, [x3, #64] - stp x29, lr, [x3, #80] - - mrs x19, sp_el0 - mrs x20, elr_el2 // pc before entering el2 - mrs x21, spsr_el2 // pstate before entering el2 - - stp x19, x20, [x3, #96] - str x21, [x3, #112] - - mrs x22, sp_el1 - mrs x23, elr_el1 - mrs x24, spsr_el1 - - str x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)] - str x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)] - str x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)] -.endm - -.macro restore_common_regs - // x2: base address for cpu context - // x3: tmp register - - ldr x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)] - ldr x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)] - ldr x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)] - - msr sp_el1, x22 - msr elr_el1, x23 - msr spsr_el1, x24 - - add x3, x2, #CPU_XREG_OFFSET(31) // SP_EL0 - ldp x19, x20, [x3] - ldr x21, [x3, #16] - - msr sp_el0, x19 - msr elr_el2, x20 // pc on return from el2 - msr spsr_el2, x21 // pstate on return from el2 - - add x3, x2, #CPU_XREG_OFFSET(19) - ldp x19, x20, [x3] - ldp x21, x22, [x3, #16] - ldp x23, x24, [x3, #32] - ldp x25, x26, [x3, #48] - ldp x27, x28, [x3, #64] - ldp x29, lr, [x3, #80] -.endm - -.macro save_host_regs - save_common_regs -.endm - -.macro restore_host_regs - restore_common_regs -.endm - -.macro save_fpsimd - // x2: cpu context address - // x3, x4: tmp regs - add x3, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS) - fpsimd_save x3, 4 -.endm - -.macro restore_fpsimd - // x2: cpu context address - // x3, x4: tmp regs - add x3, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS) - fpsimd_restore x3, 4 -.endm - -.macro save_guest_regs - // x0 is the vcpu address - // x1 is the return code, do not corrupt! - // x2 is the cpu context - // x3 is a tmp register - // Guest's x0-x3 are on the stack - - // Compute base to save registers - add x3, x2, #CPU_XREG_OFFSET(4) - stp x4, x5, [x3] - stp x6, x7, [x3, #16] - stp x8, x9, [x3, #32] - stp x10, x11, [x3, #48] - stp x12, x13, [x3, #64] - stp x14, x15, [x3, #80] - stp x16, x17, [x3, #96] - str x18, [x3, #112] - - pop x6, x7 // x2, x3 - pop x4, x5 // x0, x1 - - add x3, x2, #CPU_XREG_OFFSET(0) - stp x4, x5, [x3] - stp x6, x7, [x3, #16] - - save_common_regs -.endm - -.macro restore_guest_regs - // x0 is the vcpu address. - // x2 is the cpu context - // x3 is a tmp register - - // Prepare x0-x3 for later restore - add x3, x2, #CPU_XREG_OFFSET(0) - ldp x4, x5, [x3] - ldp x6, x7, [x3, #16] - push x4, x5 // Push x0-x3 on the stack - push x6, x7 - - // x4-x18 - ldp x4, x5, [x3, #32] - ldp x6, x7, [x3, #48] - ldp x8, x9, [x3, #64] - ldp x10, x11, [x3, #80] - ldp x12, x13, [x3, #96] - ldp x14, x15, [x3, #112] - ldp x16, x17, [x3, #128] - ldr x18, [x3, #144] - - // x19-x29, lr, sp*, elr*, spsr* - restore_common_regs - - // Last bits of the 64bit state - pop x2, x3 - pop x0, x1 - - // Do not touch any register after this! -.endm - -/* - * Macros to perform system register save/restore. - * - * Ordering here is absolutely critical, and must be kept consistent - * in {save,restore}_sysregs, {save,restore}_guest_32bit_state, - * and in kvm_asm.h. - * - * In other words, don't touch any of these unless you know what - * you are doing. - */ -.macro save_sysregs - // x2: base address for cpu context - // x3: tmp register - - add x3, x2, #CPU_SYSREG_OFFSET(MPIDR_EL1) - - mrs x4, vmpidr_el2 - mrs x5, csselr_el1 - mrs x6, sctlr_el1 - mrs x7, actlr_el1 - mrs x8, cpacr_el1 - mrs x9, ttbr0_el1 - mrs x10, ttbr1_el1 - mrs x11, tcr_el1 - mrs x12, esr_el1 - mrs x13, afsr0_el1 - mrs x14, afsr1_el1 - mrs x15, far_el1 - mrs x16, mair_el1 - mrs x17, vbar_el1 - mrs x18, contextidr_el1 - mrs x19, tpidr_el0 - mrs x20, tpidrro_el0 - mrs x21, tpidr_el1 - mrs x22, amair_el1 - mrs x23, cntkctl_el1 - mrs x24, par_el1 - mrs x25, mdscr_el1 - - stp x4, x5, [x3] - stp x6, x7, [x3, #16] - stp x8, x9, [x3, #32] - stp x10, x11, [x3, #48] - stp x12, x13, [x3, #64] - stp x14, x15, [x3, #80] - stp x16, x17, [x3, #96] - stp x18, x19, [x3, #112] - stp x20, x21, [x3, #128] - stp x22, x23, [x3, #144] - stp x24, x25, [x3, #160] -.endm - -.macro save_debug type - // x4: pointer to register set - // x5: number of registers to skip - // x6..x22 trashed - - adr x22, 1f - add x22, x22, x5, lsl #2 - br x22 -1: - mrs x21, \type\()15_el1 - mrs x20, \type\()14_el1 - mrs x19, \type\()13_el1 - mrs x18, \type\()12_el1 - mrs x17, \type\()11_el1 - mrs x16, \type\()10_el1 - mrs x15, \type\()9_el1 - mrs x14, \type\()8_el1 - mrs x13, \type\()7_el1 - mrs x12, \type\()6_el1 - mrs x11, \type\()5_el1 - mrs x10, \type\()4_el1 - mrs x9, \type\()3_el1 - mrs x8, \type\()2_el1 - mrs x7, \type\()1_el1 - mrs x6, \type\()0_el1 - - adr x22, 1f - add x22, x22, x5, lsl #2 - br x22 -1: - str x21, [x4, #(15 * 8)] - str x20, [x4, #(14 * 8)] - str x19, [x4, #(13 * 8)] - str x18, [x4, #(12 * 8)] - str x17, [x4, #(11 * 8)] - str x16, [x4, #(10 * 8)] - str x15, [x4, #(9 * 8)] - str x14, [x4, #(8 * 8)] - str x13, [x4, #(7 * 8)] - str x12, [x4, #(6 * 8)] - str x11, [x4, #(5 * 8)] - str x10, [x4, #(4 * 8)] - str x9, [x4, #(3 * 8)] - str x8, [x4, #(2 * 8)] - str x7, [x4, #(1 * 8)] - str x6, [x4, #(0 * 8)] -.endm - -.macro restore_sysregs - // x2: base address for cpu context - // x3: tmp register - - add x3, x2, #CPU_SYSREG_OFFSET(MPIDR_EL1) - - ldp x4, x5, [x3] - ldp x6, x7, [x3, #16] - ldp x8, x9, [x3, #32] - ldp x10, x11, [x3, #48] - ldp x12, x13, [x3, #64] - ldp x14, x15, [x3, #80] - ldp x16, x17, [x3, #96] - ldp x18, x19, [x3, #112] - ldp x20, x21, [x3, #128] - ldp x22, x23, [x3, #144] - ldp x24, x25, [x3, #160] - - msr vmpidr_el2, x4 - msr csselr_el1, x5 - msr sctlr_el1, x6 - msr actlr_el1, x7 - msr cpacr_el1, x8 - msr ttbr0_el1, x9 - msr ttbr1_el1, x10 - msr tcr_el1, x11 - msr esr_el1, x12 - msr afsr0_el1, x13 - msr afsr1_el1, x14 - msr far_el1, x15 - msr mair_el1, x16 - msr vbar_el1, x17 - msr contextidr_el1, x18 - msr tpidr_el0, x19 - msr tpidrro_el0, x20 - msr tpidr_el1, x21 - msr amair_el1, x22 - msr cntkctl_el1, x23 - msr par_el1, x24 - msr mdscr_el1, x25 -.endm - -.macro restore_debug type - // x4: pointer to register set - // x5: number of registers to skip - // x6..x22 trashed - - adr x22, 1f - add x22, x22, x5, lsl #2 - br x22 -1: - ldr x21, [x4, #(15 * 8)] - ldr x20, [x4, #(14 * 8)] - ldr x19, [x4, #(13 * 8)] - ldr x18, [x4, #(12 * 8)] - ldr x17, [x4, #(11 * 8)] - ldr x16, [x4, #(10 * 8)] - ldr x15, [x4, #(9 * 8)] - ldr x14, [x4, #(8 * 8)] - ldr x13, [x4, #(7 * 8)] - ldr x12, [x4, #(6 * 8)] - ldr x11, [x4, #(5 * 8)] - ldr x10, [x4, #(4 * 8)] - ldr x9, [x4, #(3 * 8)] - ldr x8, [x4, #(2 * 8)] - ldr x7, [x4, #(1 * 8)] - ldr x6, [x4, #(0 * 8)] - - adr x22, 1f - add x22, x22, x5, lsl #2 - br x22 -1: - msr \type\()15_el1, x21 - msr \type\()14_el1, x20 - msr \type\()13_el1, x19 - msr \type\()12_el1, x18 - msr \type\()11_el1, x17 - msr \type\()10_el1, x16 - msr \type\()9_el1, x15 - msr \type\()8_el1, x14 - msr \type\()7_el1, x13 - msr \type\()6_el1, x12 - msr \type\()5_el1, x11 - msr \type\()4_el1, x10 - msr \type\()3_el1, x9 - msr \type\()2_el1, x8 - msr \type\()1_el1, x7 - msr \type\()0_el1, x6 -.endm - -.macro skip_32bit_state tmp, target - // Skip 32bit state if not needed - mrs \tmp, hcr_el2 - tbnz \tmp, #HCR_RW_SHIFT, \target -.endm - -.macro skip_tee_state tmp, target - // Skip ThumbEE state if not needed - mrs \tmp, id_pfr0_el1 - tbz \tmp, #12, \target -.endm - -.macro skip_debug_state tmp, target - ldr \tmp, [x0, #VCPU_DEBUG_FLAGS] - tbz \tmp, #KVM_ARM64_DEBUG_DIRTY_SHIFT, \target -.endm - -/* - * Branch to target if CPTR_EL2.TFP bit is set (VFP/SIMD trapping enabled) - */ -.macro skip_fpsimd_state tmp, target - mrs \tmp, cptr_el2 - tbnz \tmp, #CPTR_EL2_TFP_SHIFT, \target -.endm - -.macro compute_debug_state target - // Compute debug state: If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY - // is set, we do a full save/restore cycle and disable trapping. - add x25, x0, #VCPU_CONTEXT - - // Check the state of MDSCR_EL1 - ldr x25, [x25, #CPU_SYSREG_OFFSET(MDSCR_EL1)] - and x26, x25, #DBG_MDSCR_KDE - and x25, x25, #DBG_MDSCR_MDE - adds xzr, x25, x26 - b.eq 9998f // Nothing to see there - - // If any interesting bits was set, we must set the flag - mov x26, #KVM_ARM64_DEBUG_DIRTY - str x26, [x0, #VCPU_DEBUG_FLAGS] - b 9999f // Don't skip restore - -9998: - // Otherwise load the flags from memory in case we recently - // trapped - skip_debug_state x25, \target -9999: -.endm - -.macro save_guest_32bit_state - skip_32bit_state x3, 1f - - add x3, x2, #CPU_SPSR_OFFSET(KVM_SPSR_ABT) - mrs x4, spsr_abt - mrs x5, spsr_und - mrs x6, spsr_irq - mrs x7, spsr_fiq - stp x4, x5, [x3] - stp x6, x7, [x3, #16] - - add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2) - mrs x4, dacr32_el2 - mrs x5, ifsr32_el2 - stp x4, x5, [x3] - - skip_fpsimd_state x8, 2f - mrs x6, fpexc32_el2 - str x6, [x3, #16] -2: - skip_debug_state x8, 1f - mrs x7, dbgvcr32_el2 - str x7, [x3, #24] -1: -.endm - -.macro restore_guest_32bit_state - skip_32bit_state x3, 1f - - add x3, x2, #CPU_SPSR_OFFSET(KVM_SPSR_ABT) - ldp x4, x5, [x3] - ldp x6, x7, [x3, #16] - msr spsr_abt, x4 - msr spsr_und, x5 - msr spsr_irq, x6 - msr spsr_fiq, x7 - - add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2) - ldp x4, x5, [x3] - msr dacr32_el2, x4 - msr ifsr32_el2, x5 - - skip_debug_state x8, 1f - ldr x7, [x3, #24] - msr dbgvcr32_el2, x7 -1: -.endm - -.macro activate_traps - ldr x2, [x0, #VCPU_HCR_EL2] - - /* - * We are about to set CPTR_EL2.TFP to trap all floating point - * register accesses to EL2, however, the ARM ARM clearly states that - * traps are only taken to EL2 if the operation would not otherwise - * trap to EL1. Therefore, always make sure that for 32-bit guests, - * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit. - */ - tbnz x2, #HCR_RW_SHIFT, 99f // open code skip_32bit_state - mov x3, #(1 << 30) - msr fpexc32_el2, x3 - isb -99: - msr hcr_el2, x2 - mov x2, #CPTR_EL2_TTA - orr x2, x2, #CPTR_EL2_TFP - msr cptr_el2, x2 - - mov x2, #(1 << 15) // Trap CP15 Cr=15 - msr hstr_el2, x2 - - // Monitor Debug Config - see kvm_arm_setup_debug() - ldr x2, [x0, #VCPU_MDCR_EL2] - msr mdcr_el2, x2 -.endm - -.macro deactivate_traps - mov x2, #HCR_RW - msr hcr_el2, x2 - msr hstr_el2, xzr - - mrs x2, mdcr_el2 - and x2, x2, #MDCR_EL2_HPMN_MASK - msr mdcr_el2, x2 -.endm - -.macro activate_vm - ldr x1, [x0, #VCPU_KVM] - kern_hyp_va x1 - ldr x2, [x1, #KVM_VTTBR] - msr vttbr_el2, x2 -.endm - -.macro deactivate_vm - msr vttbr_el2, xzr -.endm - -/* - * Call into the vgic backend for state saving - */ -.macro save_vgic_state -alternative_if_not ARM64_HAS_SYSREG_GIC_CPUIF - bl __save_vgic_v2_state -alternative_else - bl __save_vgic_v3_state -alternative_endif - mrs x24, hcr_el2 - mov x25, #HCR_INT_OVERRIDE - neg x25, x25 - and x24, x24, x25 - msr hcr_el2, x24 -.endm - -/* - * Call into the vgic backend for state restoring - */ -.macro restore_vgic_state - mrs x24, hcr_el2 - ldr x25, [x0, #VCPU_IRQ_LINES] - orr x24, x24, #HCR_INT_OVERRIDE - orr x24, x24, x25 - msr hcr_el2, x24 -alternative_if_not ARM64_HAS_SYSREG_GIC_CPUIF - bl __restore_vgic_v2_state -alternative_else - bl __restore_vgic_v3_state -alternative_endif -.endm - -.macro save_timer_state - // x0: vcpu pointer - ldr x2, [x0, #VCPU_KVM] - kern_hyp_va x2 - ldr w3, [x2, #KVM_TIMER_ENABLED] - cbz w3, 1f - - mrs x3, cntv_ctl_el0 - and x3, x3, #3 - str w3, [x0, #VCPU_TIMER_CNTV_CTL] - - isb - - mrs x3, cntv_cval_el0 - str x3, [x0, #VCPU_TIMER_CNTV_CVAL] - -1: - // Disable the virtual timer - msr cntv_ctl_el0, xzr - - // Allow physical timer/counter access for the host - mrs x2, cnthctl_el2 - orr x2, x2, #3 - msr cnthctl_el2, x2 - - // Clear cntvoff for the host - msr cntvoff_el2, xzr -.endm - -.macro restore_timer_state - // x0: vcpu pointer - // Disallow physical timer access for the guest - // Physical counter access is allowed - mrs x2, cnthctl_el2 - orr x2, x2, #1 - bic x2, x2, #2 - msr cnthctl_el2, x2 - - ldr x2, [x0, #VCPU_KVM] - kern_hyp_va x2 - ldr w3, [x2, #KVM_TIMER_ENABLED] - cbz w3, 1f - - ldr x3, [x2, #KVM_TIMER_CNTVOFF] - msr cntvoff_el2, x3 - ldr x2, [x0, #VCPU_TIMER_CNTV_CVAL] - msr cntv_cval_el0, x2 - isb - - ldr w2, [x0, #VCPU_TIMER_CNTV_CTL] - and x2, x2, #3 - msr cntv_ctl_el0, x2 -1: -.endm - -__save_sysregs: - save_sysregs - ret - -__restore_sysregs: - restore_sysregs - ret - -/* Save debug state */ -__save_debug: - // x2: ptr to CPU context - // x3: ptr to debug reg struct - // x4/x5/x6-22/x24-26: trashed - - mrs x26, id_aa64dfr0_el1 - ubfx x24, x26, #12, #4 // Extract BRPs - ubfx x25, x26, #20, #4 // Extract WRPs - mov w26, #15 - sub w24, w26, w24 // How many BPs to skip - sub w25, w26, w25 // How many WPs to skip - - mov x5, x24 - add x4, x3, #DEBUG_BCR - save_debug dbgbcr - add x4, x3, #DEBUG_BVR - save_debug dbgbvr - - mov x5, x25 - add x4, x3, #DEBUG_WCR - save_debug dbgwcr - add x4, x3, #DEBUG_WVR - save_debug dbgwvr - - mrs x21, mdccint_el1 - str x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)] - ret - -/* Restore debug state */ -__restore_debug: - // x2: ptr to CPU context - // x3: ptr to debug reg struct - // x4/x5/x6-22/x24-26: trashed - - mrs x26, id_aa64dfr0_el1 - ubfx x24, x26, #12, #4 // Extract BRPs - ubfx x25, x26, #20, #4 // Extract WRPs - mov w26, #15 - sub w24, w26, w24 // How many BPs to skip - sub w25, w26, w25 // How many WPs to skip - - mov x5, x24 - add x4, x3, #DEBUG_BCR - restore_debug dbgbcr - add x4, x3, #DEBUG_BVR - restore_debug dbgbvr - - mov x5, x25 - add x4, x3, #DEBUG_WCR - restore_debug dbgwcr - add x4, x3, #DEBUG_WVR - restore_debug dbgwvr - - ldr x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)] - msr mdccint_el1, x21 - - ret - -__save_fpsimd: - skip_fpsimd_state x3, 1f - save_fpsimd -1: ret - -__restore_fpsimd: - skip_fpsimd_state x3, 1f - restore_fpsimd -1: ret - -switch_to_guest_fpsimd: - push x4, lr - - mrs x2, cptr_el2 - bic x2, x2, #CPTR_EL2_TFP - msr cptr_el2, x2 - isb - - mrs x0, tpidr_el2 - - ldr x2, [x0, #VCPU_HOST_CONTEXT] - kern_hyp_va x2 - bl __save_fpsimd - - add x2, x0, #VCPU_CONTEXT - bl __restore_fpsimd - - skip_32bit_state x3, 1f - ldr x4, [x2, #CPU_SYSREG_OFFSET(FPEXC32_EL2)] - msr fpexc32_el2, x4 -1: - pop x4, lr - pop x2, x3 - pop x0, x1 - - eret - -/* - * u64 __kvm_vcpu_run(struct kvm_vcpu *vcpu); - * - * This is the world switch. The first half of the function - * deals with entering the guest, and anything from __kvm_vcpu_return - * to the end of the function deals with reentering the host. - * On the enter path, only x0 (vcpu pointer) must be preserved until - * the last moment. On the exit path, x0 (vcpu pointer) and x1 (exception - * code) must both be preserved until the epilogue. - * In both cases, x2 points to the CPU context we're saving/restoring from/to. - */ -ENTRY(__kvm_vcpu_run) - kern_hyp_va x0 - msr tpidr_el2, x0 // Save the vcpu register - - // Host context - ldr x2, [x0, #VCPU_HOST_CONTEXT] - kern_hyp_va x2 - - save_host_regs - bl __save_sysregs - - compute_debug_state 1f - add x3, x0, #VCPU_HOST_DEBUG_STATE - bl __save_debug -1: - activate_traps - activate_vm - - restore_vgic_state - restore_timer_state - - // Guest context - add x2, x0, #VCPU_CONTEXT - - // We must restore the 32-bit state before the sysregs, thanks - // to Cortex-A57 erratum #852523. - restore_guest_32bit_state - bl __restore_sysregs - - skip_debug_state x3, 1f - ldr x3, [x0, #VCPU_DEBUG_PTR] - kern_hyp_va x3 - bl __restore_debug -1: - restore_guest_regs - - // That's it, no more messing around. - eret - -__kvm_vcpu_return: - // Assume x0 is the vcpu pointer, x1 the return code - // Guest's x0-x3 are on the stack - - // Guest context - add x2, x0, #VCPU_CONTEXT - - save_guest_regs - bl __save_fpsimd - bl __save_sysregs - - skip_debug_state x3, 1f - ldr x3, [x0, #VCPU_DEBUG_PTR] - kern_hyp_va x3 - bl __save_debug -1: - save_guest_32bit_state - - save_timer_state - save_vgic_state - - deactivate_traps - deactivate_vm - - // Host context - ldr x2, [x0, #VCPU_HOST_CONTEXT] - kern_hyp_va x2 - - bl __restore_sysregs - bl __restore_fpsimd - /* Clear FPSIMD and Trace trapping */ - msr cptr_el2, xzr - - skip_debug_state x3, 1f - // Clear the dirty flag for the next run, as all the state has - // already been saved. Note that we nuke the whole 64bit word. - // If we ever add more flags, we'll have to be more careful... - str xzr, [x0, #VCPU_DEBUG_FLAGS] - add x3, x0, #VCPU_HOST_DEBUG_STATE - bl __restore_debug -1: - restore_host_regs - - mov x0, x1 - ret -END(__kvm_vcpu_run) - -// void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); -ENTRY(__kvm_tlb_flush_vmid_ipa) - dsb ishst - - kern_hyp_va x0 - ldr x2, [x0, #KVM_VTTBR] - msr vttbr_el2, x2 - isb - - /* - * We could do so much better if we had the VA as well. - * Instead, we invalidate Stage-2 for this IPA, and the - * whole of Stage-1. Weep... - */ - lsr x1, x1, #12 - tlbi ipas2e1is, x1 - /* - * We have to ensure completion of the invalidation at Stage-2, - * since a table walk on another CPU could refill a TLB with a - * complete (S1 + S2) walk based on the old Stage-2 mapping if - * the Stage-1 invalidation happened first. - */ - dsb ish - tlbi vmalle1is - dsb ish - isb - - msr vttbr_el2, xzr - ret -ENDPROC(__kvm_tlb_flush_vmid_ipa) - -/** - * void __kvm_tlb_flush_vmid(struct kvm *kvm) - Flush per-VMID TLBs - * @struct kvm *kvm - pointer to kvm structure - * - * Invalidates all Stage 1 and 2 TLB entries for current VMID. - */ -ENTRY(__kvm_tlb_flush_vmid) - dsb ishst - - kern_hyp_va x0 - ldr x2, [x0, #KVM_VTTBR] - msr vttbr_el2, x2 - isb - - tlbi vmalls12e1is - dsb ish - isb - - msr vttbr_el2, xzr - ret -ENDPROC(__kvm_tlb_flush_vmid) - -ENTRY(__kvm_flush_vm_context) - dsb ishst - tlbi alle1is - ic ialluis - dsb ish - ret -ENDPROC(__kvm_flush_vm_context) - -__kvm_hyp_panic: - // Stash PAR_EL1 before corrupting it in __restore_sysregs - mrs x0, par_el1 - push x0, xzr - - // Guess the context by looking at VTTBR: - // If zero, then we're already a host. - // Otherwise restore a minimal host context before panicing. - mrs x0, vttbr_el2 - cbz x0, 1f - - mrs x0, tpidr_el2 - - deactivate_traps - deactivate_vm - - ldr x2, [x0, #VCPU_HOST_CONTEXT] - kern_hyp_va x2 - - bl __restore_sysregs - - /* - * Make sure we have a valid host stack, and don't leave junk in the - * frame pointer that will give us a misleading host stack unwinding. - */ - ldr x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)] - msr sp_el1, x22 - mov x29, xzr - -1: adr x0, __hyp_panic_str - adr x1, 2f - ldp x2, x3, [x1] - sub x0, x0, x2 - add x0, x0, x3 - mrs x1, spsr_el2 - mrs x2, elr_el2 - mrs x3, esr_el2 - mrs x4, far_el2 - mrs x5, hpfar_el2 - pop x6, xzr // active context PAR_EL1 - mrs x7, tpidr_el2 - - mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\ - PSR_MODE_EL1h) - msr spsr_el2, lr - ldr lr, =panic - msr elr_el2, lr - eret - - .align 3 -2: .quad HYP_PAGE_OFFSET - .quad PAGE_OFFSET -ENDPROC(__kvm_hyp_panic) - -__hyp_panic_str: - .ascii "HYP panic:\nPS:%08x PC:%016x ESR:%08x\nFAR:%016x HPFAR:%016x PAR:%016x\nVCPU:%p\n\0" - - .align 2 /* * u64 kvm_call_hyp(void *hypfn, ...); @@ -934,7 +31,7 @@ __hyp_panic_str: * passed as x0, x1, and x2 (a maximum of 3 arguments in addition to the * function pointer can be passed). The function being called must be mapped * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c). Return values are - * passed in r0 and r1. + * passed in x0. * * A function pointer with a value of 0 has a special meaning, and is * used to implement __hyp_get_vectors in the same way as in @@ -944,179 +41,3 @@ ENTRY(kvm_call_hyp) hvc #0 ret ENDPROC(kvm_call_hyp) - -.macro invalid_vector label, target - .align 2 -\label: - b \target -ENDPROC(\label) -.endm - - /* None of these should ever happen */ - invalid_vector el2t_sync_invalid, __kvm_hyp_panic - invalid_vector el2t_irq_invalid, __kvm_hyp_panic - invalid_vector el2t_fiq_invalid, __kvm_hyp_panic - invalid_vector el2t_error_invalid, __kvm_hyp_panic - invalid_vector el2h_sync_invalid, __kvm_hyp_panic - invalid_vector el2h_irq_invalid, __kvm_hyp_panic - invalid_vector el2h_fiq_invalid, __kvm_hyp_panic - invalid_vector el2h_error_invalid, __kvm_hyp_panic - invalid_vector el1_sync_invalid, __kvm_hyp_panic - invalid_vector el1_irq_invalid, __kvm_hyp_panic - invalid_vector el1_fiq_invalid, __kvm_hyp_panic - invalid_vector el1_error_invalid, __kvm_hyp_panic - -el1_sync: // Guest trapped into EL2 - push x0, x1 - push x2, x3 - - mrs x1, esr_el2 - lsr x2, x1, #ESR_ELx_EC_SHIFT - - cmp x2, #ESR_ELx_EC_HVC64 - b.ne el1_trap - - mrs x3, vttbr_el2 // If vttbr is valid, the 64bit guest - cbnz x3, el1_trap // called HVC - - /* Here, we're pretty sure the host called HVC. */ - pop x2, x3 - pop x0, x1 - - /* Check for __hyp_get_vectors */ - cbnz x0, 1f - mrs x0, vbar_el2 - b 2f - -1: push lr, xzr - - /* - * Compute the function address in EL2, and shuffle the parameters. - */ - kern_hyp_va x0 - mov lr, x0 - mov x0, x1 - mov x1, x2 - mov x2, x3 - blr lr - - pop lr, xzr -2: eret - -el1_trap: - /* - * x1: ESR - * x2: ESR_EC - */ - - /* Guest accessed VFP/SIMD registers, save host, restore Guest */ - cmp x2, #ESR_ELx_EC_FP_ASIMD - b.eq switch_to_guest_fpsimd - - cmp x2, #ESR_ELx_EC_DABT_LOW - mov x0, #ESR_ELx_EC_IABT_LOW - ccmp x2, x0, #4, ne - b.ne 1f // Not an abort we care about - - /* This is an abort. Check for permission fault */ -alternative_if_not ARM64_WORKAROUND_834220 - and x2, x1, #ESR_ELx_FSC_TYPE - cmp x2, #FSC_PERM - b.ne 1f // Not a permission fault -alternative_else - nop // Use the permission fault path to - nop // check for a valid S1 translation, - nop // regardless of the ESR value. -alternative_endif - - /* - * Check for Stage-1 page table walk, which is guaranteed - * to give a valid HPFAR_EL2. - */ - tbnz x1, #7, 1f // S1PTW is set - - /* Preserve PAR_EL1 */ - mrs x3, par_el1 - push x3, xzr - - /* - * Permission fault, HPFAR_EL2 is invalid. - * Resolve the IPA the hard way using the guest VA. - * Stage-1 translation already validated the memory access rights. - * As such, we can use the EL1 translation regime, and don't have - * to distinguish between EL0 and EL1 access. - */ - mrs x2, far_el2 - at s1e1r, x2 - isb - - /* Read result */ - mrs x3, par_el1 - pop x0, xzr // Restore PAR_EL1 from the stack - msr par_el1, x0 - tbnz x3, #0, 3f // Bail out if we failed the translation - ubfx x3, x3, #12, #36 // Extract IPA - lsl x3, x3, #4 // and present it like HPFAR - b 2f - -1: mrs x3, hpfar_el2 - mrs x2, far_el2 - -2: mrs x0, tpidr_el2 - str w1, [x0, #VCPU_ESR_EL2] - str x2, [x0, #VCPU_FAR_EL2] - str x3, [x0, #VCPU_HPFAR_EL2] - - mov x1, #ARM_EXCEPTION_TRAP - b __kvm_vcpu_return - - /* - * Translation failed. Just return to the guest and - * let it fault again. Another CPU is probably playing - * behind our back. - */ -3: pop x2, x3 - pop x0, x1 - - eret - -el1_irq: - push x0, x1 - push x2, x3 - mrs x0, tpidr_el2 - mov x1, #ARM_EXCEPTION_IRQ - b __kvm_vcpu_return - - .ltorg - - .align 11 - -ENTRY(__kvm_hyp_vector) - ventry el2t_sync_invalid // Synchronous EL2t - ventry el2t_irq_invalid // IRQ EL2t - ventry el2t_fiq_invalid // FIQ EL2t - ventry el2t_error_invalid // Error EL2t - - ventry el2h_sync_invalid // Synchronous EL2h - ventry el2h_irq_invalid // IRQ EL2h - ventry el2h_fiq_invalid // FIQ EL2h - ventry el2h_error_invalid // Error EL2h - - ventry el1_sync // Synchronous 64-bit EL1 - ventry el1_irq // IRQ 64-bit EL1 - ventry el1_fiq_invalid // FIQ 64-bit EL1 - ventry el1_error_invalid // Error 64-bit EL1 - - ventry el1_sync // Synchronous 32-bit EL1 - ventry el1_irq // IRQ 32-bit EL1 - ventry el1_fiq_invalid // FIQ 32-bit EL1 - ventry el1_error_invalid // Error 32-bit EL1 -ENDPROC(__kvm_hyp_vector) - - -ENTRY(__kvm_get_mdcr_el2) - mrs x0, mdcr_el2 - ret -ENDPROC(__kvm_get_mdcr_el2) - - .popsection diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile new file mode 100644 index 000000000000..826032bc3945 --- /dev/null +++ b/arch/arm64/kvm/hyp/Makefile @@ -0,0 +1,14 @@ +# +# Makefile for Kernel-based Virtual Machine module, HYP part +# + +obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-sr.o +obj-$(CONFIG_KVM_ARM_HOST) += vgic-v3-sr.o +obj-$(CONFIG_KVM_ARM_HOST) += timer-sr.o +obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o +obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o +obj-$(CONFIG_KVM_ARM_HOST) += entry.o +obj-$(CONFIG_KVM_ARM_HOST) += switch.o +obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o +obj-$(CONFIG_KVM_ARM_HOST) += tlb.o +obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c new file mode 100644 index 000000000000..c9c1e97501a9 --- /dev/null +++ b/arch/arm64/kvm/hyp/debug-sr.c @@ -0,0 +1,140 @@ +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/compiler.h> +#include <linux/kvm_host.h> + +#include <asm/kvm_asm.h> +#include <asm/kvm_mmu.h> + +#include "hyp.h" + +#define read_debug(r,n) read_sysreg(r##n##_el1) +#define write_debug(v,r,n) write_sysreg(v, r##n##_el1) + +#define save_debug(ptr,reg,nr) \ + switch (nr) { \ + case 15: ptr[15] = read_debug(reg, 15); \ + case 14: ptr[14] = read_debug(reg, 14); \ + case 13: ptr[13] = read_debug(reg, 13); \ + case 12: ptr[12] = read_debug(reg, 12); \ + case 11: ptr[11] = read_debug(reg, 11); \ + case 10: ptr[10] = read_debug(reg, 10); \ + case 9: ptr[9] = read_debug(reg, 9); \ + case 8: ptr[8] = read_debug(reg, 8); \ + case 7: ptr[7] = read_debug(reg, 7); \ + case 6: ptr[6] = read_debug(reg, 6); \ + case 5: ptr[5] = read_debug(reg, 5); \ + case 4: ptr[4] = read_debug(reg, 4); \ + case 3: ptr[3] = read_debug(reg, 3); \ + case 2: ptr[2] = read_debug(reg, 2); \ + case 1: ptr[1] = read_debug(reg, 1); \ + default: ptr[0] = read_debug(reg, 0); \ + } + +#define restore_debug(ptr,reg,nr) \ + switch (nr) { \ + case 15: write_debug(ptr[15], reg, 15); \ + case 14: write_debug(ptr[14], reg, 14); \ + case 13: write_debug(ptr[13], reg, 13); \ + case 12: write_debug(ptr[12], reg, 12); \ + case 11: write_debug(ptr[11], reg, 11); \ + case 10: write_debug(ptr[10], reg, 10); \ + case 9: write_debug(ptr[9], reg, 9); \ + case 8: write_debug(ptr[8], reg, 8); \ + case 7: write_debug(ptr[7], reg, 7); \ + case 6: write_debug(ptr[6], reg, 6); \ + case 5: write_debug(ptr[5], reg, 5); \ + case 4: write_debug(ptr[4], reg, 4); \ + case 3: write_debug(ptr[3], reg, 3); \ + case 2: write_debug(ptr[2], reg, 2); \ + case 1: write_debug(ptr[1], reg, 1); \ + default: write_debug(ptr[0], reg, 0); \ + } + +void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu, + struct kvm_guest_debug_arch *dbg, + struct kvm_cpu_context *ctxt) +{ + u64 aa64dfr0; + int brps, wrps; + + if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)) + return; + + aa64dfr0 = read_sysreg(id_aa64dfr0_el1); + brps = (aa64dfr0 >> 12) & 0xf; + wrps = (aa64dfr0 >> 20) & 0xf; + + save_debug(dbg->dbg_bcr, dbgbcr, brps); + save_debug(dbg->dbg_bvr, dbgbvr, brps); + save_debug(dbg->dbg_wcr, dbgwcr, wrps); + save_debug(dbg->dbg_wvr, dbgwvr, wrps); + + ctxt->sys_regs[MDCCINT_EL1] = read_sysreg(mdccint_el1); +} + +void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu, + struct kvm_guest_debug_arch *dbg, + struct kvm_cpu_context *ctxt) +{ + u64 aa64dfr0; + int brps, wrps; + + if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)) + return; + + aa64dfr0 = read_sysreg(id_aa64dfr0_el1); + + brps = (aa64dfr0 >> 12) & 0xf; + wrps = (aa64dfr0 >> 20) & 0xf; + + restore_debug(dbg->dbg_bcr, dbgbcr, brps); + restore_debug(dbg->dbg_bvr, dbgbvr, brps); + restore_debug(dbg->dbg_wcr, dbgwcr, wrps); + restore_debug(dbg->dbg_wvr, dbgwvr, wrps); + + write_sysreg(ctxt->sys_regs[MDCCINT_EL1], mdccint_el1); +} + +void __hyp_text __debug_cond_save_host_state(struct kvm_vcpu *vcpu) +{ + /* If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY is set, perform + * a full save/restore cycle. */ + if ((vcpu->arch.ctxt.sys_regs[MDSCR_EL1] & DBG_MDSCR_KDE) || + (vcpu->arch.ctxt.sys_regs[MDSCR_EL1] & DBG_MDSCR_MDE)) + vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; + + __debug_save_state(vcpu, &vcpu->arch.host_debug_state, + kern_hyp_va(vcpu->arch.host_cpu_context)); +} + +void __hyp_text __debug_cond_restore_host_state(struct kvm_vcpu *vcpu) +{ + __debug_restore_state(vcpu, &vcpu->arch.host_debug_state, + kern_hyp_va(vcpu->arch.host_cpu_context)); + + if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY) + vcpu->arch.debug_flags &= ~KVM_ARM64_DEBUG_DIRTY; +} + +static u32 __hyp_text __debug_read_mdcr_el2(void) +{ + return read_sysreg(mdcr_el2); +} + +__alias(__debug_read_mdcr_el2) u32 __kvm_get_mdcr_el2(void); diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S new file mode 100644 index 000000000000..fd0fbe9b7e6a --- /dev/null +++ b/arch/arm64/kvm/hyp/entry.S @@ -0,0 +1,160 @@ +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/linkage.h> + +#include <asm/asm-offsets.h> +#include <asm/assembler.h> +#include <asm/fpsimdmacros.h> +#include <asm/kvm.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_mmu.h> + +#define CPU_GP_REG_OFFSET(x) (CPU_GP_REGS + x) +#define CPU_XREG_OFFSET(x) CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x) + + .text + .pushsection .hyp.text, "ax" + +.macro save_callee_saved_regs ctxt + stp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)] + stp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)] + stp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)] + stp x25, x26, [\ctxt, #CPU_XREG_OFFSET(25)] + stp x27, x28, [\ctxt, #CPU_XREG_OFFSET(27)] + stp x29, lr, [\ctxt, #CPU_XREG_OFFSET(29)] +.endm + +.macro restore_callee_saved_regs ctxt + ldp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)] + ldp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)] + ldp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)] + ldp x25, x26, [\ctxt, #CPU_XREG_OFFSET(25)] + ldp x27, x28, [\ctxt, #CPU_XREG_OFFSET(27)] + ldp x29, lr, [\ctxt, #CPU_XREG_OFFSET(29)] +.endm + +/* + * u64 __guest_enter(struct kvm_vcpu *vcpu, + * struct kvm_cpu_context *host_ctxt); + */ +ENTRY(__guest_enter) + // x0: vcpu + // x1: host/guest context + // x2-x18: clobbered by macros + + // Store the host regs + save_callee_saved_regs x1 + + // Preserve vcpu & host_ctxt for use at exit time + stp x0, x1, [sp, #-16]! + + add x1, x0, #VCPU_CONTEXT + + // Prepare x0-x1 for later restore by pushing them onto the stack + ldp x2, x3, [x1, #CPU_XREG_OFFSET(0)] + stp x2, x3, [sp, #-16]! + + // x2-x18 + ldp x2, x3, [x1, #CPU_XREG_OFFSET(2)] + ldp x4, x5, [x1, #CPU_XREG_OFFSET(4)] + ldp x6, x7, [x1, #CPU_XREG_OFFSET(6)] + ldp x8, x9, [x1, #CPU_XREG_OFFSET(8)] + ldp x10, x11, [x1, #CPU_XREG_OFFSET(10)] + ldp x12, x13, [x1, #CPU_XREG_OFFSET(12)] + ldp x14, x15, [x1, #CPU_XREG_OFFSET(14)] + ldp x16, x17, [x1, #CPU_XREG_OFFSET(16)] + ldr x18, [x1, #CPU_XREG_OFFSET(18)] + + // x19-x29, lr + restore_callee_saved_regs x1 + + // Last bits of the 64bit state + ldp x0, x1, [sp], #16 + + // Do not touch any register after this! + eret +ENDPROC(__guest_enter) + +ENTRY(__guest_exit) + // x0: vcpu + // x1: return code + // x2-x3: free + // x4-x29,lr: vcpu regs + // vcpu x0-x3 on the stack + + add x2, x0, #VCPU_CONTEXT + + stp x4, x5, [x2, #CPU_XREG_OFFSET(4)] + stp x6, x7, [x2, #CPU_XREG_OFFSET(6)] + stp x8, x9, [x2, #CPU_XREG_OFFSET(8)] + stp x10, x11, [x2, #CPU_XREG_OFFSET(10)] + stp x12, x13, [x2, #CPU_XREG_OFFSET(12)] + stp x14, x15, [x2, #CPU_XREG_OFFSET(14)] + stp x16, x17, [x2, #CPU_XREG_OFFSET(16)] + str x18, [x2, #CPU_XREG_OFFSET(18)] + + ldp x6, x7, [sp], #16 // x2, x3 + ldp x4, x5, [sp], #16 // x0, x1 + + stp x4, x5, [x2, #CPU_XREG_OFFSET(0)] + stp x6, x7, [x2, #CPU_XREG_OFFSET(2)] + + save_callee_saved_regs x2 + + // Restore vcpu & host_ctxt from the stack + // (preserving return code in x1) + ldp x0, x2, [sp], #16 + // Now restore the host regs + restore_callee_saved_regs x2 + + mov x0, x1 + ret +ENDPROC(__guest_exit) + +ENTRY(__fpsimd_guest_restore) + stp x4, lr, [sp, #-16]! + + mrs x2, cptr_el2 + bic x2, x2, #CPTR_EL2_TFP + msr cptr_el2, x2 + isb + + mrs x3, tpidr_el2 + + ldr x0, [x3, #VCPU_HOST_CONTEXT] + kern_hyp_va x0 + add x0, x0, #CPU_GP_REG_OFFSET(CPU_FP_REGS) + bl __fpsimd_save_state + + add x2, x3, #VCPU_CONTEXT + add x0, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS) + bl __fpsimd_restore_state + + // Skip restoring fpexc32 for AArch64 guests + mrs x1, hcr_el2 + tbnz x1, #HCR_RW_SHIFT, 1f + ldr x4, [x3, #VCPU_FPEXC32_EL2] + msr fpexc32_el2, x4 +1: + ldp x4, lr, [sp], #16 + ldp x2, x3, [sp], #16 + ldp x0, x1, [sp], #16 + + eret +ENDPROC(__fpsimd_guest_restore) diff --git a/arch/arm/kernel/psci-call.S b/arch/arm64/kvm/hyp/fpsimd.S index a78e9e1e206d..da3f22c7f14a 100644 --- a/arch/arm/kernel/psci-call.S +++ b/arch/arm64/kvm/hyp/fpsimd.S @@ -1,4 +1,7 @@ /* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. @@ -8,24 +11,23 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * - * Copyright (C) 2015 ARM Limited - * - * Author: Mark Rutland <mark.rutland@arm.com> + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <linux/linkage.h> -#include <asm/opcodes-sec.h> -#include <asm/opcodes-virt.h> +#include <asm/fpsimdmacros.h> + + .text + .pushsection .hyp.text, "ax" -/* int __invoke_psci_fn_hvc(u32 function_id, u32 arg0, u32 arg1, u32 arg2) */ -ENTRY(__invoke_psci_fn_hvc) - __HVC(0) - bx lr -ENDPROC(__invoke_psci_fn_hvc) +ENTRY(__fpsimd_save_state) + fpsimd_save x0, 1 + ret +ENDPROC(__fpsimd_save_state) -/* int __invoke_psci_fn_smc(u32 function_id, u32 arg0, u32 arg1, u32 arg2) */ -ENTRY(__invoke_psci_fn_smc) - __SMC(0) - bx lr -ENDPROC(__invoke_psci_fn_smc) +ENTRY(__fpsimd_restore_state) + fpsimd_restore x0, 1 + ret +ENDPROC(__fpsimd_restore_state) diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S new file mode 100644 index 000000000000..93e8d983c0bd --- /dev/null +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -0,0 +1,212 @@ +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/linkage.h> + +#include <asm/alternative.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/cpufeature.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_mmu.h> + + .text + .pushsection .hyp.text, "ax" + +.macro save_x0_to_x3 + stp x0, x1, [sp, #-16]! + stp x2, x3, [sp, #-16]! +.endm + +.macro restore_x0_to_x3 + ldp x2, x3, [sp], #16 + ldp x0, x1, [sp], #16 +.endm + +el1_sync: // Guest trapped into EL2 + save_x0_to_x3 + + mrs x1, esr_el2 + lsr x2, x1, #ESR_ELx_EC_SHIFT + + cmp x2, #ESR_ELx_EC_HVC64 + b.ne el1_trap + + mrs x3, vttbr_el2 // If vttbr is valid, the 64bit guest + cbnz x3, el1_trap // called HVC + + /* Here, we're pretty sure the host called HVC. */ + restore_x0_to_x3 + + /* Check for __hyp_get_vectors */ + cbnz x0, 1f + mrs x0, vbar_el2 + b 2f + +1: stp lr, xzr, [sp, #-16]! + + /* + * Compute the function address in EL2, and shuffle the parameters. + */ + kern_hyp_va x0 + mov lr, x0 + mov x0, x1 + mov x1, x2 + mov x2, x3 + blr lr + + ldp lr, xzr, [sp], #16 +2: eret + +el1_trap: + /* + * x1: ESR + * x2: ESR_EC + */ + + /* Guest accessed VFP/SIMD registers, save host, restore Guest */ + cmp x2, #ESR_ELx_EC_FP_ASIMD + b.eq __fpsimd_guest_restore + + cmp x2, #ESR_ELx_EC_DABT_LOW + mov x0, #ESR_ELx_EC_IABT_LOW + ccmp x2, x0, #4, ne + b.ne 1f // Not an abort we care about + + /* This is an abort. Check for permission fault */ +alternative_if_not ARM64_WORKAROUND_834220 + and x2, x1, #ESR_ELx_FSC_TYPE + cmp x2, #FSC_PERM + b.ne 1f // Not a permission fault +alternative_else + nop // Use the permission fault path to + nop // check for a valid S1 translation, + nop // regardless of the ESR value. +alternative_endif + + /* + * Check for Stage-1 page table walk, which is guaranteed + * to give a valid HPFAR_EL2. + */ + tbnz x1, #7, 1f // S1PTW is set + + /* Preserve PAR_EL1 */ + mrs x3, par_el1 + stp x3, xzr, [sp, #-16]! + + /* + * Permission fault, HPFAR_EL2 is invalid. + * Resolve the IPA the hard way using the guest VA. + * Stage-1 translation already validated the memory access rights. + * As such, we can use the EL1 translation regime, and don't have + * to distinguish between EL0 and EL1 access. + */ + mrs x2, far_el2 + at s1e1r, x2 + isb + + /* Read result */ + mrs x3, par_el1 + ldp x0, xzr, [sp], #16 // Restore PAR_EL1 from the stack + msr par_el1, x0 + tbnz x3, #0, 3f // Bail out if we failed the translation + ubfx x3, x3, #12, #36 // Extract IPA + lsl x3, x3, #4 // and present it like HPFAR + b 2f + +1: mrs x3, hpfar_el2 + mrs x2, far_el2 + +2: mrs x0, tpidr_el2 + str w1, [x0, #VCPU_ESR_EL2] + str x2, [x0, #VCPU_FAR_EL2] + str x3, [x0, #VCPU_HPFAR_EL2] + + mov x1, #ARM_EXCEPTION_TRAP + b __guest_exit + + /* + * Translation failed. Just return to the guest and + * let it fault again. Another CPU is probably playing + * behind our back. + */ +3: restore_x0_to_x3 + + eret + +el1_irq: + save_x0_to_x3 + mrs x0, tpidr_el2 + mov x1, #ARM_EXCEPTION_IRQ + b __guest_exit + +ENTRY(__hyp_do_panic) + mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\ + PSR_MODE_EL1h) + msr spsr_el2, lr + ldr lr, =panic + msr elr_el2, lr + eret +ENDPROC(__hyp_do_panic) + +.macro invalid_vector label, target = __hyp_panic + .align 2 +\label: + b \target +ENDPROC(\label) +.endm + + /* None of these should ever happen */ + invalid_vector el2t_sync_invalid + invalid_vector el2t_irq_invalid + invalid_vector el2t_fiq_invalid + invalid_vector el2t_error_invalid + invalid_vector el2h_sync_invalid + invalid_vector el2h_irq_invalid + invalid_vector el2h_fiq_invalid + invalid_vector el2h_error_invalid + invalid_vector el1_sync_invalid + invalid_vector el1_irq_invalid + invalid_vector el1_fiq_invalid + invalid_vector el1_error_invalid + + .ltorg + + .align 11 + +ENTRY(__kvm_hyp_vector) + ventry el2t_sync_invalid // Synchronous EL2t + ventry el2t_irq_invalid // IRQ EL2t + ventry el2t_fiq_invalid // FIQ EL2t + ventry el2t_error_invalid // Error EL2t + + ventry el2h_sync_invalid // Synchronous EL2h + ventry el2h_irq_invalid // IRQ EL2h + ventry el2h_fiq_invalid // FIQ EL2h + ventry el2h_error_invalid // Error EL2h + + ventry el1_sync // Synchronous 64-bit EL1 + ventry el1_irq // IRQ 64-bit EL1 + ventry el1_fiq_invalid // FIQ 64-bit EL1 + ventry el1_error_invalid // Error 64-bit EL1 + + ventry el1_sync // Synchronous 32-bit EL1 + ventry el1_irq // IRQ 32-bit EL1 + ventry el1_fiq_invalid // FIQ 32-bit EL1 + ventry el1_error_invalid // Error 32-bit EL1 +ENDPROC(__kvm_hyp_vector) diff --git a/arch/arm64/kvm/hyp/hyp.h b/arch/arm64/kvm/hyp/hyp.h new file mode 100644 index 000000000000..fb275178b6af --- /dev/null +++ b/arch/arm64/kvm/hyp/hyp.h @@ -0,0 +1,90 @@ +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __ARM64_KVM_HYP_H__ +#define __ARM64_KVM_HYP_H__ + +#include <linux/compiler.h> +#include <linux/kvm_host.h> +#include <asm/kvm_mmu.h> +#include <asm/sysreg.h> + +#define __hyp_text __section(.hyp.text) notrace + +#define kern_hyp_va(v) (typeof(v))((unsigned long)(v) & HYP_PAGE_OFFSET_MASK) +#define hyp_kern_va(v) (typeof(v))((unsigned long)(v) - HYP_PAGE_OFFSET \ + + PAGE_OFFSET) + +/** + * hyp_alternate_select - Generates patchable code sequences that are + * used to switch between two implementations of a function, depending + * on the availability of a feature. + * + * @fname: a symbol name that will be defined as a function returning a + * function pointer whose type will match @orig and @alt + * @orig: A pointer to the default function, as returned by @fname when + * @cond doesn't hold + * @alt: A pointer to the alternate function, as returned by @fname + * when @cond holds + * @cond: a CPU feature (as described in asm/cpufeature.h) + */ +#define hyp_alternate_select(fname, orig, alt, cond) \ +typeof(orig) * __hyp_text fname(void) \ +{ \ + typeof(alt) *val = orig; \ + asm volatile(ALTERNATIVE("nop \n", \ + "mov %0, %1 \n", \ + cond) \ + : "+r" (val) : "r" (alt)); \ + return val; \ +} + +void __vgic_v2_save_state(struct kvm_vcpu *vcpu); +void __vgic_v2_restore_state(struct kvm_vcpu *vcpu); + +void __vgic_v3_save_state(struct kvm_vcpu *vcpu); +void __vgic_v3_restore_state(struct kvm_vcpu *vcpu); + +void __timer_save_state(struct kvm_vcpu *vcpu); +void __timer_restore_state(struct kvm_vcpu *vcpu); + +void __sysreg_save_state(struct kvm_cpu_context *ctxt); +void __sysreg_restore_state(struct kvm_cpu_context *ctxt); +void __sysreg32_save_state(struct kvm_vcpu *vcpu); +void __sysreg32_restore_state(struct kvm_vcpu *vcpu); + +void __debug_save_state(struct kvm_vcpu *vcpu, + struct kvm_guest_debug_arch *dbg, + struct kvm_cpu_context *ctxt); +void __debug_restore_state(struct kvm_vcpu *vcpu, + struct kvm_guest_debug_arch *dbg, + struct kvm_cpu_context *ctxt); +void __debug_cond_save_host_state(struct kvm_vcpu *vcpu); +void __debug_cond_restore_host_state(struct kvm_vcpu *vcpu); + +void __fpsimd_save_state(struct user_fpsimd_state *fp_regs); +void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs); +static inline bool __fpsimd_enabled(void) +{ + return !(read_sysreg(cptr_el2) & CPTR_EL2_TFP); +} + +u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt); +void __noreturn __hyp_do_panic(unsigned long, ...); + +#endif /* __ARM64_KVM_HYP_H__ */ + diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c new file mode 100644 index 000000000000..ca8f5a5e2f96 --- /dev/null +++ b/arch/arm64/kvm/hyp/switch.c @@ -0,0 +1,175 @@ +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "hyp.h" + +static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) +{ + u64 val; + + /* + * We are about to set CPTR_EL2.TFP to trap all floating point + * register accesses to EL2, however, the ARM ARM clearly states that + * traps are only taken to EL2 if the operation would not otherwise + * trap to EL1. Therefore, always make sure that for 32-bit guests, + * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit. + */ + val = vcpu->arch.hcr_el2; + if (!(val & HCR_RW)) { + write_sysreg(1 << 30, fpexc32_el2); + isb(); + } + write_sysreg(val, hcr_el2); + /* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */ + write_sysreg(1 << 15, hstr_el2); + write_sysreg(CPTR_EL2_TTA | CPTR_EL2_TFP, cptr_el2); + write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); +} + +static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu) +{ + write_sysreg(HCR_RW, hcr_el2); + write_sysreg(0, hstr_el2); + write_sysreg(read_sysreg(mdcr_el2) & MDCR_EL2_HPMN_MASK, mdcr_el2); + write_sysreg(0, cptr_el2); +} + +static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = kern_hyp_va(vcpu->kvm); + write_sysreg(kvm->arch.vttbr, vttbr_el2); +} + +static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu) +{ + write_sysreg(0, vttbr_el2); +} + +static hyp_alternate_select(__vgic_call_save_state, + __vgic_v2_save_state, __vgic_v3_save_state, + ARM64_HAS_SYSREG_GIC_CPUIF); + +static hyp_alternate_select(__vgic_call_restore_state, + __vgic_v2_restore_state, __vgic_v3_restore_state, + ARM64_HAS_SYSREG_GIC_CPUIF); + +static void __hyp_text __vgic_save_state(struct kvm_vcpu *vcpu) +{ + __vgic_call_save_state()(vcpu); + write_sysreg(read_sysreg(hcr_el2) & ~HCR_INT_OVERRIDE, hcr_el2); +} + +static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu) +{ + u64 val; + + val = read_sysreg(hcr_el2); + val |= HCR_INT_OVERRIDE; + val |= vcpu->arch.irq_lines; + write_sysreg(val, hcr_el2); + + __vgic_call_restore_state()(vcpu); +} + +static int __hyp_text __guest_run(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *host_ctxt; + struct kvm_cpu_context *guest_ctxt; + bool fp_enabled; + u64 exit_code; + + vcpu = kern_hyp_va(vcpu); + write_sysreg(vcpu, tpidr_el2); + + host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); + guest_ctxt = &vcpu->arch.ctxt; + + __sysreg_save_state(host_ctxt); + __debug_cond_save_host_state(vcpu); + + __activate_traps(vcpu); + __activate_vm(vcpu); + + __vgic_restore_state(vcpu); + __timer_restore_state(vcpu); + + /* + * We must restore the 32-bit state before the sysregs, thanks + * to Cortex-A57 erratum #852523. + */ + __sysreg32_restore_state(vcpu); + __sysreg_restore_state(guest_ctxt); + __debug_restore_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt); + + /* Jump in the fire! */ + exit_code = __guest_enter(vcpu, host_ctxt); + /* And we're baaack! */ + + fp_enabled = __fpsimd_enabled(); + + __sysreg_save_state(guest_ctxt); + __sysreg32_save_state(vcpu); + __timer_save_state(vcpu); + __vgic_save_state(vcpu); + + __deactivate_traps(vcpu); + __deactivate_vm(vcpu); + + __sysreg_restore_state(host_ctxt); + + if (fp_enabled) { + __fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs); + __fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs); + } + + __debug_save_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt); + __debug_cond_restore_host_state(vcpu); + + return exit_code; +} + +__alias(__guest_run) int __kvm_vcpu_run(struct kvm_vcpu *vcpu); + +static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n"; + +void __hyp_text __noreturn __hyp_panic(void) +{ + unsigned long str_va = (unsigned long)__hyp_panic_string; + u64 spsr = read_sysreg(spsr_el2); + u64 elr = read_sysreg(elr_el2); + u64 par = read_sysreg(par_el1); + + if (read_sysreg(vttbr_el2)) { + struct kvm_vcpu *vcpu; + struct kvm_cpu_context *host_ctxt; + + vcpu = (struct kvm_vcpu *)read_sysreg(tpidr_el2); + host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); + __deactivate_traps(vcpu); + __deactivate_vm(vcpu); + __sysreg_restore_state(host_ctxt); + } + + /* Call panic for real */ + __hyp_do_panic(hyp_kern_va(str_va), + spsr, elr, + read_sysreg(esr_el2), read_sysreg(far_el2), + read_sysreg(hpfar_el2), par, + (void *)read_sysreg(tpidr_el2)); + + unreachable(); +} diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c new file mode 100644 index 000000000000..425630980229 --- /dev/null +++ b/arch/arm64/kvm/hyp/sysreg-sr.c @@ -0,0 +1,138 @@ +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/compiler.h> +#include <linux/kvm_host.h> + +#include <asm/kvm_asm.h> +#include <asm/kvm_mmu.h> + +#include "hyp.h" + +/* ctxt is already in the HYP VA space */ +void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt) +{ + ctxt->sys_regs[MPIDR_EL1] = read_sysreg(vmpidr_el2); + ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1); + ctxt->sys_regs[SCTLR_EL1] = read_sysreg(sctlr_el1); + ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1); + ctxt->sys_regs[CPACR_EL1] = read_sysreg(cpacr_el1); + ctxt->sys_regs[TTBR0_EL1] = read_sysreg(ttbr0_el1); + ctxt->sys_regs[TTBR1_EL1] = read_sysreg(ttbr1_el1); + ctxt->sys_regs[TCR_EL1] = read_sysreg(tcr_el1); + ctxt->sys_regs[ESR_EL1] = read_sysreg(esr_el1); + ctxt->sys_regs[AFSR0_EL1] = read_sysreg(afsr0_el1); + ctxt->sys_regs[AFSR1_EL1] = read_sysreg(afsr1_el1); + ctxt->sys_regs[FAR_EL1] = read_sysreg(far_el1); + ctxt->sys_regs[MAIR_EL1] = read_sysreg(mair_el1); + ctxt->sys_regs[VBAR_EL1] = read_sysreg(vbar_el1); + ctxt->sys_regs[CONTEXTIDR_EL1] = read_sysreg(contextidr_el1); + ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0); + ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0); + ctxt->sys_regs[TPIDR_EL1] = read_sysreg(tpidr_el1); + ctxt->sys_regs[AMAIR_EL1] = read_sysreg(amair_el1); + ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg(cntkctl_el1); + ctxt->sys_regs[PAR_EL1] = read_sysreg(par_el1); + ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1); + + ctxt->gp_regs.regs.sp = read_sysreg(sp_el0); + ctxt->gp_regs.regs.pc = read_sysreg(elr_el2); + ctxt->gp_regs.regs.pstate = read_sysreg(spsr_el2); + ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1); + ctxt->gp_regs.elr_el1 = read_sysreg(elr_el1); + ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg(spsr_el1); +} + +void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt) +{ + write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2); + write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1); + write_sysreg(ctxt->sys_regs[SCTLR_EL1], sctlr_el1); + write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1); + write_sysreg(ctxt->sys_regs[CPACR_EL1], cpacr_el1); + write_sysreg(ctxt->sys_regs[TTBR0_EL1], ttbr0_el1); + write_sysreg(ctxt->sys_regs[TTBR1_EL1], ttbr1_el1); + write_sysreg(ctxt->sys_regs[TCR_EL1], tcr_el1); + write_sysreg(ctxt->sys_regs[ESR_EL1], esr_el1); + write_sysreg(ctxt->sys_regs[AFSR0_EL1], afsr0_el1); + write_sysreg(ctxt->sys_regs[AFSR1_EL1], afsr1_el1); + write_sysreg(ctxt->sys_regs[FAR_EL1], far_el1); + write_sysreg(ctxt->sys_regs[MAIR_EL1], mair_el1); + write_sysreg(ctxt->sys_regs[VBAR_EL1], vbar_el1); + write_sysreg(ctxt->sys_regs[CONTEXTIDR_EL1], contextidr_el1); + write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0); + write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0); + write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1); + write_sysreg(ctxt->sys_regs[AMAIR_EL1], amair_el1); + write_sysreg(ctxt->sys_regs[CNTKCTL_EL1], cntkctl_el1); + write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1); + write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1); + + write_sysreg(ctxt->gp_regs.regs.sp, sp_el0); + write_sysreg(ctxt->gp_regs.regs.pc, elr_el2); + write_sysreg(ctxt->gp_regs.regs.pstate, spsr_el2); + write_sysreg(ctxt->gp_regs.sp_el1, sp_el1); + write_sysreg(ctxt->gp_regs.elr_el1, elr_el1); + write_sysreg(ctxt->gp_regs.spsr[KVM_SPSR_EL1], spsr_el1); +} + +void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu) +{ + u64 *spsr, *sysreg; + + if (read_sysreg(hcr_el2) & HCR_RW) + return; + + spsr = vcpu->arch.ctxt.gp_regs.spsr; + sysreg = vcpu->arch.ctxt.sys_regs; + + spsr[KVM_SPSR_ABT] = read_sysreg(spsr_abt); + spsr[KVM_SPSR_UND] = read_sysreg(spsr_und); + spsr[KVM_SPSR_IRQ] = read_sysreg(spsr_irq); + spsr[KVM_SPSR_FIQ] = read_sysreg(spsr_fiq); + + sysreg[DACR32_EL2] = read_sysreg(dacr32_el2); + sysreg[IFSR32_EL2] = read_sysreg(ifsr32_el2); + + if (__fpsimd_enabled()) + sysreg[FPEXC32_EL2] = read_sysreg(fpexc32_el2); + + if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY) + sysreg[DBGVCR32_EL2] = read_sysreg(dbgvcr32_el2); +} + +void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu) +{ + u64 *spsr, *sysreg; + + if (read_sysreg(hcr_el2) & HCR_RW) + return; + + spsr = vcpu->arch.ctxt.gp_regs.spsr; + sysreg = vcpu->arch.ctxt.sys_regs; + + write_sysreg(spsr[KVM_SPSR_ABT], spsr_abt); + write_sysreg(spsr[KVM_SPSR_UND], spsr_und); + write_sysreg(spsr[KVM_SPSR_IRQ], spsr_irq); + write_sysreg(spsr[KVM_SPSR_FIQ], spsr_fiq); + + write_sysreg(sysreg[DACR32_EL2], dacr32_el2); + write_sysreg(sysreg[IFSR32_EL2], ifsr32_el2); + + if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY) + write_sysreg(sysreg[DBGVCR32_EL2], dbgvcr32_el2); +} diff --git a/arch/arm64/kvm/hyp/timer-sr.c b/arch/arm64/kvm/hyp/timer-sr.c new file mode 100644 index 000000000000..1051e5d7320f --- /dev/null +++ b/arch/arm64/kvm/hyp/timer-sr.c @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <clocksource/arm_arch_timer.h> +#include <linux/compiler.h> +#include <linux/kvm_host.h> + +#include <asm/kvm_mmu.h> + +#include "hyp.h" + +/* vcpu is already in the HYP VA space */ +void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = kern_hyp_va(vcpu->kvm); + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + u64 val; + + if (kvm->arch.timer.enabled) { + timer->cntv_ctl = read_sysreg(cntv_ctl_el0); + timer->cntv_cval = read_sysreg(cntv_cval_el0); + } + + /* Disable the virtual timer */ + write_sysreg(0, cntv_ctl_el0); + + /* Allow physical timer/counter access for the host */ + val = read_sysreg(cnthctl_el2); + val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN; + write_sysreg(val, cnthctl_el2); + + /* Clear cntvoff for the host */ + write_sysreg(0, cntvoff_el2); +} + +void __hyp_text __timer_restore_state(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = kern_hyp_va(vcpu->kvm); + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + u64 val; + + /* + * Disallow physical timer access for the guest + * Physical counter access is allowed + */ + val = read_sysreg(cnthctl_el2); + val &= ~CNTHCTL_EL1PCEN; + val |= CNTHCTL_EL1PCTEN; + write_sysreg(val, cnthctl_el2); + + if (kvm->arch.timer.enabled) { + write_sysreg(kvm->arch.timer.cntvoff, cntvoff_el2); + write_sysreg(timer->cntv_cval, cntv_cval_el0); + isb(); + write_sysreg(timer->cntv_ctl, cntv_ctl_el0); + } +} diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c new file mode 100644 index 000000000000..2a7e0d838698 --- /dev/null +++ b/arch/arm64/kvm/hyp/tlb.c @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "hyp.h" + +static void __hyp_text __tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) +{ + dsb(ishst); + + /* Switch to requested VMID */ + kvm = kern_hyp_va(kvm); + write_sysreg(kvm->arch.vttbr, vttbr_el2); + isb(); + + /* + * We could do so much better if we had the VA as well. + * Instead, we invalidate Stage-2 for this IPA, and the + * whole of Stage-1. Weep... + */ + ipa >>= 12; + asm volatile("tlbi ipas2e1is, %0" : : "r" (ipa)); + + /* + * We have to ensure completion of the invalidation at Stage-2, + * since a table walk on another CPU could refill a TLB with a + * complete (S1 + S2) walk based on the old Stage-2 mapping if + * the Stage-1 invalidation happened first. + */ + dsb(ish); + asm volatile("tlbi vmalle1is" : : ); + dsb(ish); + isb(); + + write_sysreg(0, vttbr_el2); +} + +__alias(__tlb_flush_vmid_ipa) void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, + phys_addr_t ipa); + +static void __hyp_text __tlb_flush_vmid(struct kvm *kvm) +{ + dsb(ishst); + + /* Switch to requested VMID */ + kvm = kern_hyp_va(kvm); + write_sysreg(kvm->arch.vttbr, vttbr_el2); + isb(); + + asm volatile("tlbi vmalls12e1is" : : ); + dsb(ish); + isb(); + + write_sysreg(0, vttbr_el2); +} + +__alias(__tlb_flush_vmid) void __kvm_tlb_flush_vmid(struct kvm *kvm); + +static void __hyp_text __tlb_flush_vm_context(void) +{ + dsb(ishst); + asm volatile("tlbi alle1is \n" + "ic ialluis ": : ); + dsb(ish); +} + +__alias(__tlb_flush_vm_context) void __kvm_flush_vm_context(void); diff --git a/arch/arm64/kvm/hyp/vgic-v2-sr.c b/arch/arm64/kvm/hyp/vgic-v2-sr.c new file mode 100644 index 000000000000..e71761238cfc --- /dev/null +++ b/arch/arm64/kvm/hyp/vgic-v2-sr.c @@ -0,0 +1,84 @@ +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/compiler.h> +#include <linux/irqchip/arm-gic.h> +#include <linux/kvm_host.h> + +#include <asm/kvm_mmu.h> + +#include "hyp.h" + +/* vcpu is already in the HYP VA space */ +void __hyp_text __vgic_v2_save_state(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = kern_hyp_va(vcpu->kvm); + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + struct vgic_dist *vgic = &kvm->arch.vgic; + void __iomem *base = kern_hyp_va(vgic->vctrl_base); + u32 eisr0, eisr1, elrsr0, elrsr1; + int i, nr_lr; + + if (!base) + return; + + nr_lr = vcpu->arch.vgic_cpu.nr_lr; + cpu_if->vgic_vmcr = readl_relaxed(base + GICH_VMCR); + cpu_if->vgic_misr = readl_relaxed(base + GICH_MISR); + eisr0 = readl_relaxed(base + GICH_EISR0); + elrsr0 = readl_relaxed(base + GICH_ELRSR0); + if (unlikely(nr_lr > 32)) { + eisr1 = readl_relaxed(base + GICH_EISR1); + elrsr1 = readl_relaxed(base + GICH_ELRSR1); + } else { + eisr1 = elrsr1 = 0; + } +#ifdef CONFIG_CPU_BIG_ENDIAN + cpu_if->vgic_eisr = ((u64)eisr0 << 32) | eisr1; + cpu_if->vgic_elrsr = ((u64)elrsr0 << 32) | elrsr1; +#else + cpu_if->vgic_eisr = ((u64)eisr1 << 32) | eisr0; + cpu_if->vgic_elrsr = ((u64)elrsr1 << 32) | elrsr0; +#endif + cpu_if->vgic_apr = readl_relaxed(base + GICH_APR); + + writel_relaxed(0, base + GICH_HCR); + + for (i = 0; i < nr_lr; i++) + cpu_if->vgic_lr[i] = readl_relaxed(base + GICH_LR0 + (i * 4)); +} + +/* vcpu is already in the HYP VA space */ +void __hyp_text __vgic_v2_restore_state(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = kern_hyp_va(vcpu->kvm); + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + struct vgic_dist *vgic = &kvm->arch.vgic; + void __iomem *base = kern_hyp_va(vgic->vctrl_base); + int i, nr_lr; + + if (!base) + return; + + writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR); + writel_relaxed(cpu_if->vgic_vmcr, base + GICH_VMCR); + writel_relaxed(cpu_if->vgic_apr, base + GICH_APR); + + nr_lr = vcpu->arch.vgic_cpu.nr_lr; + for (i = 0; i < nr_lr; i++) + writel_relaxed(cpu_if->vgic_lr[i], base + GICH_LR0 + (i * 4)); +} diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c new file mode 100644 index 000000000000..9142e082f5f3 --- /dev/null +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -0,0 +1,228 @@ +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/compiler.h> +#include <linux/irqchip/arm-gic-v3.h> +#include <linux/kvm_host.h> + +#include <asm/kvm_mmu.h> + +#include "hyp.h" + +#define vtr_to_max_lr_idx(v) ((v) & 0xf) +#define vtr_to_nr_pri_bits(v) (((u32)(v) >> 29) + 1) + +#define read_gicreg(r) \ + ({ \ + u64 reg; \ + asm volatile("mrs_s %0, " __stringify(r) : "=r" (reg)); \ + reg; \ + }) + +#define write_gicreg(v,r) \ + do { \ + u64 __val = (v); \ + asm volatile("msr_s " __stringify(r) ", %0" : : "r" (__val));\ + } while (0) + +/* vcpu is already in the HYP VA space */ +void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + u64 val; + u32 max_lr_idx, nr_pri_bits; + + /* + * Make sure stores to the GIC via the memory mapped interface + * are now visible to the system register interface. + */ + dsb(st); + + cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2); + cpu_if->vgic_misr = read_gicreg(ICH_MISR_EL2); + cpu_if->vgic_eisr = read_gicreg(ICH_EISR_EL2); + cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2); + + write_gicreg(0, ICH_HCR_EL2); + val = read_gicreg(ICH_VTR_EL2); + max_lr_idx = vtr_to_max_lr_idx(val); + nr_pri_bits = vtr_to_nr_pri_bits(val); + + switch (max_lr_idx) { + case 15: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(15)] = read_gicreg(ICH_LR15_EL2); + case 14: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(14)] = read_gicreg(ICH_LR14_EL2); + case 13: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(13)] = read_gicreg(ICH_LR13_EL2); + case 12: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(12)] = read_gicreg(ICH_LR12_EL2); + case 11: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(11)] = read_gicreg(ICH_LR11_EL2); + case 10: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(10)] = read_gicreg(ICH_LR10_EL2); + case 9: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(9)] = read_gicreg(ICH_LR9_EL2); + case 8: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(8)] = read_gicreg(ICH_LR8_EL2); + case 7: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(7)] = read_gicreg(ICH_LR7_EL2); + case 6: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(6)] = read_gicreg(ICH_LR6_EL2); + case 5: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(5)] = read_gicreg(ICH_LR5_EL2); + case 4: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(4)] = read_gicreg(ICH_LR4_EL2); + case 3: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(3)] = read_gicreg(ICH_LR3_EL2); + case 2: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(2)] = read_gicreg(ICH_LR2_EL2); + case 1: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(1)] = read_gicreg(ICH_LR1_EL2); + case 0: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(0)] = read_gicreg(ICH_LR0_EL2); + } + + switch (nr_pri_bits) { + case 7: + cpu_if->vgic_ap0r[3] = read_gicreg(ICH_AP0R3_EL2); + cpu_if->vgic_ap0r[2] = read_gicreg(ICH_AP0R2_EL2); + case 6: + cpu_if->vgic_ap0r[1] = read_gicreg(ICH_AP0R1_EL2); + default: + cpu_if->vgic_ap0r[0] = read_gicreg(ICH_AP0R0_EL2); + } + + switch (nr_pri_bits) { + case 7: + cpu_if->vgic_ap1r[3] = read_gicreg(ICH_AP1R3_EL2); + cpu_if->vgic_ap1r[2] = read_gicreg(ICH_AP1R2_EL2); + case 6: + cpu_if->vgic_ap1r[1] = read_gicreg(ICH_AP1R1_EL2); + default: + cpu_if->vgic_ap1r[0] = read_gicreg(ICH_AP1R0_EL2); + } + + val = read_gicreg(ICC_SRE_EL2); + write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2); + isb(); /* Make sure ENABLE is set at EL2 before setting SRE at EL1 */ + write_gicreg(1, ICC_SRE_EL1); +} + +void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + u64 val; + u32 max_lr_idx, nr_pri_bits; + + /* + * VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a + * Group0 interrupt (as generated in GICv2 mode) to be + * delivered as a FIQ to the guest, with potentially fatal + * consequences. So we must make sure that ICC_SRE_EL1 has + * been actually programmed with the value we want before + * starting to mess with the rest of the GIC. + */ + write_gicreg(cpu_if->vgic_sre, ICC_SRE_EL1); + isb(); + + write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); + write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2); + + val = read_gicreg(ICH_VTR_EL2); + max_lr_idx = vtr_to_max_lr_idx(val); + nr_pri_bits = vtr_to_nr_pri_bits(val); + + switch (nr_pri_bits) { + case 7: + write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2); + write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2); + case 6: + write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2); + default: + write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2); + } + + switch (nr_pri_bits) { + case 7: + write_gicreg(cpu_if->vgic_ap0r[3], ICH_AP0R3_EL2); + write_gicreg(cpu_if->vgic_ap0r[2], ICH_AP0R2_EL2); + case 6: + write_gicreg(cpu_if->vgic_ap0r[1], ICH_AP0R1_EL2); + default: + write_gicreg(cpu_if->vgic_ap0r[0], ICH_AP0R0_EL2); + } + + switch (max_lr_idx) { + case 15: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(15)], ICH_LR15_EL2); + case 14: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(14)], ICH_LR14_EL2); + case 13: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(13)], ICH_LR13_EL2); + case 12: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(12)], ICH_LR12_EL2); + case 11: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(11)], ICH_LR11_EL2); + case 10: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(10)], ICH_LR10_EL2); + case 9: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(9)], ICH_LR9_EL2); + case 8: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(8)], ICH_LR8_EL2); + case 7: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(7)], ICH_LR7_EL2); + case 6: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(6)], ICH_LR6_EL2); + case 5: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(5)], ICH_LR5_EL2); + case 4: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(4)], ICH_LR4_EL2); + case 3: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(3)], ICH_LR3_EL2); + case 2: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(2)], ICH_LR2_EL2); + case 1: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(1)], ICH_LR1_EL2); + case 0: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(0)], ICH_LR0_EL2); + } + + /* + * Ensures that the above will have reached the + * (re)distributors. This ensure the guest will read the + * correct values from the memory-mapped interface. + */ + isb(); + dsb(sy); + + /* + * Prevent the guest from touching the GIC system registers if + * SRE isn't enabled for GICv3 emulation. + */ + if (!cpu_if->vgic_sre) { + write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE, + ICC_SRE_EL2); + } +} + +static u64 __hyp_text __vgic_v3_read_ich_vtr_el2(void) +{ + return read_gicreg(ICH_VTR_EL2); +} + +__alias(__vgic_v3_read_ich_vtr_el2) u64 __vgic_v3_get_ich_vtr_el2(void); diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index d2650e84faf2..eec3598b4184 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -29,6 +29,7 @@ #include <asm/debug-monitors.h> #include <asm/esr.h> #include <asm/kvm_arm.h> +#include <asm/kvm_asm.h> #include <asm/kvm_coproc.h> #include <asm/kvm_emulate.h> #include <asm/kvm_host.h> @@ -219,9 +220,9 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu, * All writes will set the KVM_ARM64_DEBUG_DIRTY flag to ensure the * hyp.S code switches between host and guest values in future. */ -static inline void reg_to_dbg(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - u64 *dbg_reg) +static void reg_to_dbg(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + u64 *dbg_reg) { u64 val = p->regval; @@ -234,18 +235,18 @@ static inline void reg_to_dbg(struct kvm_vcpu *vcpu, vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; } -static inline void dbg_to_reg(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - u64 *dbg_reg) +static void dbg_to_reg(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + u64 *dbg_reg) { p->regval = *dbg_reg; if (p->is_32bit) p->regval &= 0xffffffffUL; } -static inline bool trap_bvr(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *rd) +static bool trap_bvr(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *rd) { u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; @@ -279,15 +280,15 @@ static int get_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, return 0; } -static inline void reset_bvr(struct kvm_vcpu *vcpu, - const struct sys_reg_desc *rd) +static void reset_bvr(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) { vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg] = rd->val; } -static inline bool trap_bcr(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *rd) +static bool trap_bcr(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *rd) { u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg]; @@ -322,15 +323,15 @@ static int get_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, return 0; } -static inline void reset_bcr(struct kvm_vcpu *vcpu, - const struct sys_reg_desc *rd) +static void reset_bcr(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) { vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg] = rd->val; } -static inline bool trap_wvr(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *rd) +static bool trap_wvr(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *rd) { u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]; @@ -365,15 +366,15 @@ static int get_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, return 0; } -static inline void reset_wvr(struct kvm_vcpu *vcpu, - const struct sys_reg_desc *rd) +static void reset_wvr(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) { vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg] = rd->val; } -static inline bool trap_wcr(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *rd) +static bool trap_wcr(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *rd) { u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg]; @@ -407,8 +408,8 @@ static int get_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, return 0; } -static inline void reset_wcr(struct kvm_vcpu *vcpu, - const struct sys_reg_desc *rd) +static void reset_wcr(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) { vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg] = rd->val; } @@ -722,9 +723,9 @@ static bool trap_debug32(struct kvm_vcpu *vcpu, * system is in. */ -static inline bool trap_xvr(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *rd) +static bool trap_xvr(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *rd) { u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; diff --git a/arch/arm64/kvm/vgic-v2-switch.S b/arch/arm64/kvm/vgic-v2-switch.S deleted file mode 100644 index 3f000712a85d..000000000000 --- a/arch/arm64/kvm/vgic-v2-switch.S +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Copyright (C) 2012,2013 - ARM Ltd - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <linux/linkage.h> -#include <linux/irqchip/arm-gic.h> - -#include <asm/assembler.h> -#include <asm/memory.h> -#include <asm/asm-offsets.h> -#include <asm/kvm.h> -#include <asm/kvm_asm.h> -#include <asm/kvm_arm.h> -#include <asm/kvm_mmu.h> - - .text - .pushsection .hyp.text, "ax" - -/* - * Save the VGIC CPU state into memory - * x0: Register pointing to VCPU struct - * Do not corrupt x1!!! - */ -ENTRY(__save_vgic_v2_state) -__save_vgic_v2_state: - /* Get VGIC VCTRL base into x2 */ - ldr x2, [x0, #VCPU_KVM] - kern_hyp_va x2 - ldr x2, [x2, #KVM_VGIC_VCTRL] - kern_hyp_va x2 - cbz x2, 2f // disabled - - /* Compute the address of struct vgic_cpu */ - add x3, x0, #VCPU_VGIC_CPU - - /* Save all interesting registers */ - ldr w5, [x2, #GICH_VMCR] - ldr w6, [x2, #GICH_MISR] - ldr w7, [x2, #GICH_EISR0] - ldr w8, [x2, #GICH_EISR1] - ldr w9, [x2, #GICH_ELRSR0] - ldr w10, [x2, #GICH_ELRSR1] - ldr w11, [x2, #GICH_APR] -CPU_BE( rev w5, w5 ) -CPU_BE( rev w6, w6 ) -CPU_BE( rev w7, w7 ) -CPU_BE( rev w8, w8 ) -CPU_BE( rev w9, w9 ) -CPU_BE( rev w10, w10 ) -CPU_BE( rev w11, w11 ) - - str w5, [x3, #VGIC_V2_CPU_VMCR] - str w6, [x3, #VGIC_V2_CPU_MISR] -CPU_LE( str w7, [x3, #VGIC_V2_CPU_EISR] ) -CPU_LE( str w8, [x3, #(VGIC_V2_CPU_EISR + 4)] ) -CPU_LE( str w9, [x3, #VGIC_V2_CPU_ELRSR] ) -CPU_LE( str w10, [x3, #(VGIC_V2_CPU_ELRSR + 4)] ) -CPU_BE( str w7, [x3, #(VGIC_V2_CPU_EISR + 4)] ) -CPU_BE( str w8, [x3, #VGIC_V2_CPU_EISR] ) -CPU_BE( str w9, [x3, #(VGIC_V2_CPU_ELRSR + 4)] ) -CPU_BE( str w10, [x3, #VGIC_V2_CPU_ELRSR] ) - str w11, [x3, #VGIC_V2_CPU_APR] - - /* Clear GICH_HCR */ - str wzr, [x2, #GICH_HCR] - - /* Save list registers */ - add x2, x2, #GICH_LR0 - ldr w4, [x3, #VGIC_CPU_NR_LR] - add x3, x3, #VGIC_V2_CPU_LR -1: ldr w5, [x2], #4 -CPU_BE( rev w5, w5 ) - str w5, [x3], #4 - sub w4, w4, #1 - cbnz w4, 1b -2: - ret -ENDPROC(__save_vgic_v2_state) - -/* - * Restore the VGIC CPU state from memory - * x0: Register pointing to VCPU struct - */ -ENTRY(__restore_vgic_v2_state) -__restore_vgic_v2_state: - /* Get VGIC VCTRL base into x2 */ - ldr x2, [x0, #VCPU_KVM] - kern_hyp_va x2 - ldr x2, [x2, #KVM_VGIC_VCTRL] - kern_hyp_va x2 - cbz x2, 2f // disabled - - /* Compute the address of struct vgic_cpu */ - add x3, x0, #VCPU_VGIC_CPU - - /* We only restore a minimal set of registers */ - ldr w4, [x3, #VGIC_V2_CPU_HCR] - ldr w5, [x3, #VGIC_V2_CPU_VMCR] - ldr w6, [x3, #VGIC_V2_CPU_APR] -CPU_BE( rev w4, w4 ) -CPU_BE( rev w5, w5 ) -CPU_BE( rev w6, w6 ) - - str w4, [x2, #GICH_HCR] - str w5, [x2, #GICH_VMCR] - str w6, [x2, #GICH_APR] - - /* Restore list registers */ - add x2, x2, #GICH_LR0 - ldr w4, [x3, #VGIC_CPU_NR_LR] - add x3, x3, #VGIC_V2_CPU_LR -1: ldr w5, [x3], #4 -CPU_BE( rev w5, w5 ) - str w5, [x2], #4 - sub w4, w4, #1 - cbnz w4, 1b -2: - ret -ENDPROC(__restore_vgic_v2_state) - - .popsection diff --git a/arch/arm64/kvm/vgic-v3-switch.S b/arch/arm64/kvm/vgic-v3-switch.S deleted file mode 100644 index 3c20730ddff5..000000000000 --- a/arch/arm64/kvm/vgic-v3-switch.S +++ /dev/null @@ -1,269 +0,0 @@ -/* - * Copyright (C) 2012,2013 - ARM Ltd - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <linux/linkage.h> -#include <linux/irqchip/arm-gic-v3.h> - -#include <asm/assembler.h> -#include <asm/memory.h> -#include <asm/asm-offsets.h> -#include <asm/kvm.h> -#include <asm/kvm_asm.h> -#include <asm/kvm_arm.h> - - .text - .pushsection .hyp.text, "ax" - -/* - * We store LRs in reverse order to let the CPU deal with streaming - * access. Use this macro to make it look saner... - */ -#define LR_OFFSET(n) (VGIC_V3_CPU_LR + (15 - n) * 8) - -/* - * Save the VGIC CPU state into memory - * x0: Register pointing to VCPU struct - * Do not corrupt x1!!! - */ -.macro save_vgic_v3_state - // Compute the address of struct vgic_cpu - add x3, x0, #VCPU_VGIC_CPU - - // Make sure stores to the GIC via the memory mapped interface - // are now visible to the system register interface - dsb st - - // Save all interesting registers - mrs_s x5, ICH_VMCR_EL2 - mrs_s x6, ICH_MISR_EL2 - mrs_s x7, ICH_EISR_EL2 - mrs_s x8, ICH_ELSR_EL2 - - str w5, [x3, #VGIC_V3_CPU_VMCR] - str w6, [x3, #VGIC_V3_CPU_MISR] - str w7, [x3, #VGIC_V3_CPU_EISR] - str w8, [x3, #VGIC_V3_CPU_ELRSR] - - msr_s ICH_HCR_EL2, xzr - - mrs_s x21, ICH_VTR_EL2 - mvn w22, w21 - ubfiz w23, w22, 2, 4 // w23 = (15 - ListRegs) * 4 - - adr x24, 1f - add x24, x24, x23 - br x24 - -1: - mrs_s x20, ICH_LR15_EL2 - mrs_s x19, ICH_LR14_EL2 - mrs_s x18, ICH_LR13_EL2 - mrs_s x17, ICH_LR12_EL2 - mrs_s x16, ICH_LR11_EL2 - mrs_s x15, ICH_LR10_EL2 - mrs_s x14, ICH_LR9_EL2 - mrs_s x13, ICH_LR8_EL2 - mrs_s x12, ICH_LR7_EL2 - mrs_s x11, ICH_LR6_EL2 - mrs_s x10, ICH_LR5_EL2 - mrs_s x9, ICH_LR4_EL2 - mrs_s x8, ICH_LR3_EL2 - mrs_s x7, ICH_LR2_EL2 - mrs_s x6, ICH_LR1_EL2 - mrs_s x5, ICH_LR0_EL2 - - adr x24, 1f - add x24, x24, x23 - br x24 - -1: - str x20, [x3, #LR_OFFSET(15)] - str x19, [x3, #LR_OFFSET(14)] - str x18, [x3, #LR_OFFSET(13)] - str x17, [x3, #LR_OFFSET(12)] - str x16, [x3, #LR_OFFSET(11)] - str x15, [x3, #LR_OFFSET(10)] - str x14, [x3, #LR_OFFSET(9)] - str x13, [x3, #LR_OFFSET(8)] - str x12, [x3, #LR_OFFSET(7)] - str x11, [x3, #LR_OFFSET(6)] - str x10, [x3, #LR_OFFSET(5)] - str x9, [x3, #LR_OFFSET(4)] - str x8, [x3, #LR_OFFSET(3)] - str x7, [x3, #LR_OFFSET(2)] - str x6, [x3, #LR_OFFSET(1)] - str x5, [x3, #LR_OFFSET(0)] - - tbnz w21, #29, 6f // 6 bits - tbz w21, #30, 5f // 5 bits - // 7 bits - mrs_s x20, ICH_AP0R3_EL2 - str w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)] - mrs_s x19, ICH_AP0R2_EL2 - str w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)] -6: mrs_s x18, ICH_AP0R1_EL2 - str w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)] -5: mrs_s x17, ICH_AP0R0_EL2 - str w17, [x3, #VGIC_V3_CPU_AP0R] - - tbnz w21, #29, 6f // 6 bits - tbz w21, #30, 5f // 5 bits - // 7 bits - mrs_s x20, ICH_AP1R3_EL2 - str w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)] - mrs_s x19, ICH_AP1R2_EL2 - str w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)] -6: mrs_s x18, ICH_AP1R1_EL2 - str w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)] -5: mrs_s x17, ICH_AP1R0_EL2 - str w17, [x3, #VGIC_V3_CPU_AP1R] - - // Restore SRE_EL1 access and re-enable SRE at EL1. - mrs_s x5, ICC_SRE_EL2 - orr x5, x5, #ICC_SRE_EL2_ENABLE - msr_s ICC_SRE_EL2, x5 - isb - mov x5, #1 - msr_s ICC_SRE_EL1, x5 -.endm - -/* - * Restore the VGIC CPU state from memory - * x0: Register pointing to VCPU struct - */ -.macro restore_vgic_v3_state - // Compute the address of struct vgic_cpu - add x3, x0, #VCPU_VGIC_CPU - - // Restore all interesting registers - ldr w4, [x3, #VGIC_V3_CPU_HCR] - ldr w5, [x3, #VGIC_V3_CPU_VMCR] - ldr w25, [x3, #VGIC_V3_CPU_SRE] - - msr_s ICC_SRE_EL1, x25 - - // make sure SRE is valid before writing the other registers - isb - - msr_s ICH_HCR_EL2, x4 - msr_s ICH_VMCR_EL2, x5 - - mrs_s x21, ICH_VTR_EL2 - - tbnz w21, #29, 6f // 6 bits - tbz w21, #30, 5f // 5 bits - // 7 bits - ldr w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)] - msr_s ICH_AP1R3_EL2, x20 - ldr w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)] - msr_s ICH_AP1R2_EL2, x19 -6: ldr w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)] - msr_s ICH_AP1R1_EL2, x18 -5: ldr w17, [x3, #VGIC_V3_CPU_AP1R] - msr_s ICH_AP1R0_EL2, x17 - - tbnz w21, #29, 6f // 6 bits - tbz w21, #30, 5f // 5 bits - // 7 bits - ldr w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)] - msr_s ICH_AP0R3_EL2, x20 - ldr w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)] - msr_s ICH_AP0R2_EL2, x19 -6: ldr w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)] - msr_s ICH_AP0R1_EL2, x18 -5: ldr w17, [x3, #VGIC_V3_CPU_AP0R] - msr_s ICH_AP0R0_EL2, x17 - - and w22, w21, #0xf - mvn w22, w21 - ubfiz w23, w22, 2, 4 // w23 = (15 - ListRegs) * 4 - - adr x24, 1f - add x24, x24, x23 - br x24 - -1: - ldr x20, [x3, #LR_OFFSET(15)] - ldr x19, [x3, #LR_OFFSET(14)] - ldr x18, [x3, #LR_OFFSET(13)] - ldr x17, [x3, #LR_OFFSET(12)] - ldr x16, [x3, #LR_OFFSET(11)] - ldr x15, [x3, #LR_OFFSET(10)] - ldr x14, [x3, #LR_OFFSET(9)] - ldr x13, [x3, #LR_OFFSET(8)] - ldr x12, [x3, #LR_OFFSET(7)] - ldr x11, [x3, #LR_OFFSET(6)] - ldr x10, [x3, #LR_OFFSET(5)] - ldr x9, [x3, #LR_OFFSET(4)] - ldr x8, [x3, #LR_OFFSET(3)] - ldr x7, [x3, #LR_OFFSET(2)] - ldr x6, [x3, #LR_OFFSET(1)] - ldr x5, [x3, #LR_OFFSET(0)] - - adr x24, 1f - add x24, x24, x23 - br x24 - -1: - msr_s ICH_LR15_EL2, x20 - msr_s ICH_LR14_EL2, x19 - msr_s ICH_LR13_EL2, x18 - msr_s ICH_LR12_EL2, x17 - msr_s ICH_LR11_EL2, x16 - msr_s ICH_LR10_EL2, x15 - msr_s ICH_LR9_EL2, x14 - msr_s ICH_LR8_EL2, x13 - msr_s ICH_LR7_EL2, x12 - msr_s ICH_LR6_EL2, x11 - msr_s ICH_LR5_EL2, x10 - msr_s ICH_LR4_EL2, x9 - msr_s ICH_LR3_EL2, x8 - msr_s ICH_LR2_EL2, x7 - msr_s ICH_LR1_EL2, x6 - msr_s ICH_LR0_EL2, x5 - - // Ensure that the above will have reached the - // (re)distributors. This ensure the guest will read - // the correct values from the memory-mapped interface. - isb - dsb sy - - // Prevent the guest from touching the GIC system registers - // if SRE isn't enabled for GICv3 emulation - cbnz x25, 1f - mrs_s x5, ICC_SRE_EL2 - and x5, x5, #~ICC_SRE_EL2_ENABLE - msr_s ICC_SRE_EL2, x5 -1: -.endm - -ENTRY(__save_vgic_v3_state) - save_vgic_v3_state - ret -ENDPROC(__save_vgic_v3_state) - -ENTRY(__restore_vgic_v3_state) - restore_vgic_v3_state - ret -ENDPROC(__restore_vgic_v3_state) - -ENTRY(__vgic_v3_get_ich_vtr_el2) - mrs_s x0, ICH_VTR_EL2 - ret -ENDPROC(__vgic_v3_get_ich_vtr_el2) - - .popsection diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index cfa44a6adc0a..6df07069a025 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -81,26 +81,32 @@ ENDPROC(__flush_cache_user_range) /* * __flush_dcache_area(kaddr, size) * - * Ensure that the data held in the page kaddr is written back to the - * page in question. + * Ensure that any D-cache lines for the interval [kaddr, kaddr+size) + * are cleaned and invalidated to the PoC. * * - kaddr - kernel address * - size - size in question */ ENTRY(__flush_dcache_area) - dcache_line_size x2, x3 - add x1, x0, x1 - sub x3, x2, #1 - bic x0, x0, x3 -1: dc civac, x0 // clean & invalidate D line / unified line - add x0, x0, x2 - cmp x0, x1 - b.lo 1b - dsb sy + dcache_by_line_op civac, sy, x0, x1, x2, x3 ret ENDPIPROC(__flush_dcache_area) /* + * __clean_dcache_area_pou(kaddr, size) + * + * Ensure that any D-cache lines for the interval [kaddr, kaddr+size) + * are cleaned to the PoU. + * + * - kaddr - kernel address + * - size - size in question + */ +ENTRY(__clean_dcache_area_pou) + dcache_by_line_op cvau, ish, x0, x1, x2, x3 + ret +ENDPROC(__clean_dcache_area_pou) + +/* * __inval_cache_range(start, end) * - start - start address of region * - end - end address of region diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c index 13bbc3be6f5a..22e4cb4d6f53 100644 --- a/arch/arm64/mm/copypage.c +++ b/arch/arm64/mm/copypage.c @@ -24,8 +24,9 @@ void __cpu_copy_user_page(void *kto, const void *kfrom, unsigned long vaddr) { + struct page *page = virt_to_page(kto); copy_page(kto, kfrom); - __flush_dcache_area(kto, PAGE_SIZE); + flush_dcache_page(page); } EXPORT_SYMBOL_GPL(__cpu_copy_user_page); diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 7963aa4b5d28..331c4ca6205c 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -40,7 +40,7 @@ static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot, static struct gen_pool *atomic_pool; #define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K -static size_t atomic_pool_size = DEFAULT_DMA_COHERENT_POOL_SIZE; +static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE; static int __init early_coherent_pool(char *p) { @@ -896,7 +896,7 @@ static int __iommu_attach_notifier(struct notifier_block *nb, return 0; } -static int register_iommu_dma_ops_notifier(struct bus_type *bus) +static int __init register_iommu_dma_ops_notifier(struct bus_type *bus) { struct notifier_block *nb = kzalloc(sizeof(*nb), GFP_KERNEL); int ret; diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index c26b804015e8..60585bde1264 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -34,19 +34,24 @@ void flush_cache_range(struct vm_area_struct *vma, unsigned long start, __flush_icache_all(); } +static void sync_icache_aliases(void *kaddr, unsigned long len) +{ + unsigned long addr = (unsigned long)kaddr; + + if (icache_is_aliasing()) { + __clean_dcache_area_pou(kaddr, len); + __flush_icache_all(); + } else { + flush_icache_range(addr, addr + len); + } +} + static void flush_ptrace_access(struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *kaddr, unsigned long len) { - if (vma->vm_flags & VM_EXEC) { - unsigned long addr = (unsigned long)kaddr; - if (icache_is_aliasing()) { - __flush_dcache_area(kaddr, len); - __flush_icache_all(); - } else { - flush_icache_range(addr, addr + len); - } - } + if (vma->vm_flags & VM_EXEC) + sync_icache_aliases(kaddr, len); } /* @@ -74,13 +79,11 @@ void __sync_icache_dcache(pte_t pte, unsigned long addr) if (!page_mapping(page)) return; - if (!test_and_set_bit(PG_dcache_clean, &page->flags)) { - __flush_dcache_area(page_address(page), - PAGE_SIZE << compound_order(page)); + if (!test_and_set_bit(PG_dcache_clean, &page->flags)) + sync_icache_aliases(page_address(page), + PAGE_SIZE << compound_order(page)); + else if (icache_is_aivivt()) __flush_icache_all(); - } else if (icache_is_aivivt()) { - __flush_icache_all(); - } } /* @@ -99,19 +102,3 @@ EXPORT_SYMBOL(flush_dcache_page); * Additional functions defined in assembly. */ EXPORT_SYMBOL(flush_icache_range); - -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -#ifdef CONFIG_HAVE_RCU_TABLE_FREE -void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, - pmd_t *pmdp) -{ - pmd_t pmd = pmd_mksplitting(*pmdp); - - VM_BUG_ON(address & ~PMD_MASK); - set_pmd_at(vma->vm_mm, address, pmdp, pmd); - - /* dummy IPI to serialise against fast_gup */ - kick_all_cpus_sync(); -} -#endif /* CONFIG_HAVE_RCU_TABLE_FREE */ -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 383b03ff38f8..82d607c3614e 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -41,17 +41,289 @@ int pud_huge(pud_t pud) #endif } +static int find_num_contig(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte, size_t *pgsize) +{ + pgd_t *pgd = pgd_offset(mm, addr); + pud_t *pud; + pmd_t *pmd; + + *pgsize = PAGE_SIZE; + if (!pte_cont(pte)) + return 1; + if (!pgd_present(*pgd)) { + VM_BUG_ON(!pgd_present(*pgd)); + return 1; + } + pud = pud_offset(pgd, addr); + if (!pud_present(*pud)) { + VM_BUG_ON(!pud_present(*pud)); + return 1; + } + pmd = pmd_offset(pud, addr); + if (!pmd_present(*pmd)) { + VM_BUG_ON(!pmd_present(*pmd)); + return 1; + } + if ((pte_t *)pmd == ptep) { + *pgsize = PMD_SIZE; + return CONT_PMDS; + } + return CONT_PTES; +} + +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + size_t pgsize; + int i; + int ncontig = find_num_contig(mm, addr, ptep, pte, &pgsize); + unsigned long pfn; + pgprot_t hugeprot; + + if (ncontig == 1) { + set_pte_at(mm, addr, ptep, pte); + return; + } + + pfn = pte_pfn(pte); + hugeprot = __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte)); + for (i = 0; i < ncontig; i++) { + pr_debug("%s: set pte %p to 0x%llx\n", __func__, ptep, + pte_val(pfn_pte(pfn, hugeprot))); + set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot)); + ptep++; + pfn += pgsize >> PAGE_SHIFT; + addr += pgsize; + } +} + +pte_t *huge_pte_alloc(struct mm_struct *mm, + unsigned long addr, unsigned long sz) +{ + pgd_t *pgd; + pud_t *pud; + pte_t *pte = NULL; + + pr_debug("%s: addr:0x%lx sz:0x%lx\n", __func__, addr, sz); + pgd = pgd_offset(mm, addr); + pud = pud_alloc(mm, pgd, addr); + if (!pud) + return NULL; + + if (sz == PUD_SIZE) { + pte = (pte_t *)pud; + } else if (sz == (PAGE_SIZE * CONT_PTES)) { + pmd_t *pmd = pmd_alloc(mm, pud, addr); + + WARN_ON(addr & (sz - 1)); + /* + * Note that if this code were ever ported to the + * 32-bit arm platform then it will cause trouble in + * the case where CONFIG_HIGHPTE is set, since there + * will be no pte_unmap() to correspond with this + * pte_alloc_map(). + */ + pte = pte_alloc_map(mm, NULL, pmd, addr); + } else if (sz == PMD_SIZE) { + if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) && + pud_none(*pud)) + pte = huge_pmd_share(mm, addr, pud); + else + pte = (pte_t *)pmd_alloc(mm, pud, addr); + } else if (sz == (PMD_SIZE * CONT_PMDS)) { + pmd_t *pmd; + + pmd = pmd_alloc(mm, pud, addr); + WARN_ON(addr & (sz - 1)); + return (pte_t *)pmd; + } + + pr_debug("%s: addr:0x%lx sz:0x%lx ret pte=%p/0x%llx\n", __func__, addr, + sz, pte, pte_val(*pte)); + return pte; +} + +pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd = NULL; + pte_t *pte = NULL; + + pgd = pgd_offset(mm, addr); + pr_debug("%s: addr:0x%lx pgd:%p\n", __func__, addr, pgd); + if (!pgd_present(*pgd)) + return NULL; + pud = pud_offset(pgd, addr); + if (!pud_present(*pud)) + return NULL; + + if (pud_huge(*pud)) + return (pte_t *)pud; + pmd = pmd_offset(pud, addr); + if (!pmd_present(*pmd)) + return NULL; + + if (pte_cont(pmd_pte(*pmd))) { + pmd = pmd_offset( + pud, (addr & CONT_PMD_MASK)); + return (pte_t *)pmd; + } + if (pmd_huge(*pmd)) + return (pte_t *)pmd; + pte = pte_offset_kernel(pmd, addr); + if (pte_present(*pte) && pte_cont(*pte)) { + pte = pte_offset_kernel( + pmd, (addr & CONT_PTE_MASK)); + return pte; + } + return NULL; +} + +pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, + struct page *page, int writable) +{ + size_t pagesize = huge_page_size(hstate_vma(vma)); + + if (pagesize == CONT_PTE_SIZE) { + entry = pte_mkcont(entry); + } else if (pagesize == CONT_PMD_SIZE) { + entry = pmd_pte(pmd_mkcont(pte_pmd(entry))); + } else if (pagesize != PUD_SIZE && pagesize != PMD_SIZE) { + pr_warn("%s: unrecognized huge page size 0x%lx\n", + __func__, pagesize); + } + return entry; +} + +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + pte_t pte; + + if (pte_cont(*ptep)) { + int ncontig, i; + size_t pgsize; + pte_t *cpte; + bool is_dirty = false; + + cpte = huge_pte_offset(mm, addr); + ncontig = find_num_contig(mm, addr, cpte, *cpte, &pgsize); + /* save the 1st pte to return */ + pte = ptep_get_and_clear(mm, addr, cpte); + for (i = 1; i < ncontig; ++i) { + /* + * If HW_AFDBM is enabled, then the HW could + * turn on the dirty bit for any of the page + * in the set, so check them all. + */ + ++cpte; + if (pte_dirty(ptep_get_and_clear(mm, addr, cpte))) + is_dirty = true; + } + if (is_dirty) + return pte_mkdirty(pte); + else + return pte; + } else { + return ptep_get_and_clear(mm, addr, ptep); + } +} + +int huge_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t pte, int dirty) +{ + pte_t *cpte; + + if (pte_cont(pte)) { + int ncontig, i, changed = 0; + size_t pgsize = 0; + unsigned long pfn = pte_pfn(pte); + /* Select all bits except the pfn */ + pgprot_t hugeprot = + __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ + pte_val(pte)); + + cpte = huge_pte_offset(vma->vm_mm, addr); + pfn = pte_pfn(*cpte); + ncontig = find_num_contig(vma->vm_mm, addr, cpte, + *cpte, &pgsize); + for (i = 0; i < ncontig; ++i, ++cpte) { + changed = ptep_set_access_flags(vma, addr, cpte, + pfn_pte(pfn, + hugeprot), + dirty); + pfn += pgsize >> PAGE_SHIFT; + } + return changed; + } else { + return ptep_set_access_flags(vma, addr, ptep, pte, dirty); + } +} + +void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + if (pte_cont(*ptep)) { + int ncontig, i; + pte_t *cpte; + size_t pgsize = 0; + + cpte = huge_pte_offset(mm, addr); + ncontig = find_num_contig(mm, addr, cpte, *cpte, &pgsize); + for (i = 0; i < ncontig; ++i, ++cpte) + ptep_set_wrprotect(mm, addr, cpte); + } else { + ptep_set_wrprotect(mm, addr, ptep); + } +} + +void huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ + if (pte_cont(*ptep)) { + int ncontig, i; + pte_t *cpte; + size_t pgsize = 0; + + cpte = huge_pte_offset(vma->vm_mm, addr); + ncontig = find_num_contig(vma->vm_mm, addr, cpte, + *cpte, &pgsize); + for (i = 0; i < ncontig; ++i, ++cpte) + ptep_clear_flush(vma, addr, cpte); + } else { + ptep_clear_flush(vma, addr, ptep); + } +} + static __init int setup_hugepagesz(char *opt) { unsigned long ps = memparse(opt, &opt); + if (ps == PMD_SIZE) { hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); } else if (ps == PUD_SIZE) { hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); + } else if (ps == (PAGE_SIZE * CONT_PTES)) { + hugetlb_add_hstate(CONT_PTE_SHIFT); + } else if (ps == (PMD_SIZE * CONT_PMDS)) { + hugetlb_add_hstate((PMD_SHIFT + CONT_PMD_SHIFT) - PAGE_SHIFT); } else { - pr_err("hugepagesz: Unsupported page size %lu M\n", ps >> 20); + pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10); return 0; } return 1; } __setup("hugepagesz=", setup_hugepagesz); + +#ifdef CONFIG_ARM64_64K_PAGES +static __init int add_default_hugepagesz(void) +{ + if (size_to_hstate(CONT_PTES * PAGE_SIZE) == NULL) + hugetlb_add_hstate(CONT_PMD_SHIFT); + return 0; +} +arch_initcall(add_default_hugepagesz); +#endif diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 17bf39ac83ba..f3b061e67bfe 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -71,7 +71,7 @@ early_param("initrd", early_initrd); * currently assumes that for memory starting above 4G, 32-bit devices will * use a DMA offset. */ -static phys_addr_t max_zone_dma_phys(void) +static phys_addr_t __init max_zone_dma_phys(void) { phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, 32); return min(offset + (1ULL << 32), memblock_end_of_DRAM()); @@ -120,17 +120,17 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) #ifdef CONFIG_HAVE_ARCH_PFN_VALID int pfn_valid(unsigned long pfn) { - return memblock_is_memory(pfn << PAGE_SHIFT); + return memblock_is_map_memory(pfn << PAGE_SHIFT); } EXPORT_SYMBOL(pfn_valid); #endif #ifndef CONFIG_SPARSEMEM -static void arm64_memory_present(void) +static void __init arm64_memory_present(void) { } #else -static void arm64_memory_present(void) +static void __init arm64_memory_present(void) { struct memblock_region *reg; @@ -360,7 +360,6 @@ void free_initmem(void) { fixup_init(); free_initmem_default(0); - free_alternatives_memory(); } #ifdef CONFIG_BLK_DEV_INITRD diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c index ed177475dd8c..4c893b5189dd 100644 --- a/arch/arm64/mm/mmap.c +++ b/arch/arm64/mm/mmap.c @@ -51,8 +51,12 @@ unsigned long arch_mmap_rnd(void) { unsigned long rnd; - rnd = (unsigned long)get_random_int() & STACK_RND_MASK; - +#ifdef CONFIG_COMPAT + if (test_thread_flag(TIF_32BIT)) + rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_compat_bits) - 1); + else +#endif + rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1); return rnd << PAGE_SHIFT; } diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 873e363048c6..58faeaa7fbdc 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -251,6 +251,14 @@ static void __create_mapping(struct mm_struct *mm, pgd_t *pgd, { unsigned long addr, length, end, next; + /* + * If the virtual and physical address don't have the same offset + * within a page, we cannot map the region as the caller expects. + */ + if (WARN_ON((phys ^ virt) & ~PAGE_MASK)) + return; + + phys &= PAGE_MASK; addr = virt & PAGE_MASK; length = PAGE_ALIGN(size + (virt & ~PAGE_MASK)); @@ -280,7 +288,7 @@ static void __init create_mapping(phys_addr_t phys, unsigned long virt, &phys, virt); return; } - __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), phys, virt, + __create_mapping(&init_mm, pgd_offset_k(virt), phys, virt, size, prot, early_alloc); } @@ -301,7 +309,7 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt, return; } - return __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), + return __create_mapping(&init_mm, pgd_offset_k(virt), phys, virt, size, prot, late_alloc); } @@ -372,6 +380,8 @@ static void __init map_mem(void) if (start >= end) break; + if (memblock_is_nomap(reg)) + continue; if (ARM64_SWAPPER_USES_SECTION_MAPS) { /* @@ -456,6 +466,9 @@ void __init paging_init(void) empty_zero_page = virt_to_page(zero_page); + /* Ensure the zero page is visible to the page table walker */ + dsb(ishst); + /* * TTBR0 is only used for the identity mapping at this stage. Make it * point to zero page to avoid speculatively fetching new entries. diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c index cb3ba1b812e7..ae11d4e03d0e 100644 --- a/arch/arm64/mm/pgd.c +++ b/arch/arm64/mm/pgd.c @@ -46,14 +46,14 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd) kmem_cache_free(pgd_cache, pgd); } -static int __init pgd_cache_init(void) +void __init pgd_cache_init(void) { + if (PGD_SIZE == PAGE_SIZE) + return; + /* * Naturally aligned pgds required by the architecture. */ - if (PGD_SIZE != PAGE_SIZE) - pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_SIZE, - SLAB_PANIC, NULL); - return 0; + pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_SIZE, + SLAB_PANIC, NULL); } -core_initcall(pgd_cache_init); diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S index 4c4d93c4bf65..146bd99a7532 100644 --- a/arch/arm64/mm/proc-macros.S +++ b/arch/arm64/mm/proc-macros.S @@ -62,3 +62,25 @@ bfi \valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH #endif .endm + +/* + * Macro to perform a data cache maintenance for the interval + * [kaddr, kaddr + size) + * + * op: operation passed to dc instruction + * domain: domain used in dsb instruciton + * kaddr: starting virtual address of the region + * size: size of the region + * Corrupts: kaddr, size, tmp1, tmp2 + */ + .macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2 + dcache_line_size \tmp1, \tmp2 + add \size, \kaddr, \size + sub \tmp2, \tmp1, #1 + bic \kaddr, \kaddr, \tmp2 +9998: dc \op, \kaddr + add \kaddr, \kaddr, \tmp1 + cmp \kaddr, \size + b.lo 9998b + dsb \domain + .endm diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index cacecc4ad3e5..a3d867e723b4 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -117,6 +117,7 @@ ENTRY(cpu_do_resume) */ ubfx x11, x11, #1, #1 msr oslar_el1, x11 + msr pmuserenr_el0, xzr // Disable PMU access from EL0 mov x0, x12 dsb nsh // Make sure local tlb invalidation completed isb @@ -139,8 +140,6 @@ ENTRY(cpu_do_switch_mm) ret ENDPROC(cpu_do_switch_mm) - .section ".text.init", #alloc, #execinstr - /* * __cpu_setup * @@ -155,6 +154,7 @@ ENTRY(__cpu_setup) msr cpacr_el1, x0 // Enable FP/ASIMD mov x0, #1 << 12 // Reset mdscr_el1 and disable msr mdscr_el1, x0 // access to the DCC from EL0 + msr pmuserenr_el0, xzr // Disable PMU access from EL0 /* * Memory region attributes for LPAE: * diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index b162ad70effc..a34420a5df9a 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1,7 +1,7 @@ /* * BPF JIT compiler for ARM64 * - * Copyright (C) 2014-2015 Zi Shen Lim <zlim.lnx@gmail.com> + * Copyright (C) 2014-2016 Zi Shen Lim <zlim.lnx@gmail.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -152,8 +152,6 @@ static void build_prologue(struct jit_ctx *ctx) const u8 r8 = bpf2a64[BPF_REG_8]; const u8 r9 = bpf2a64[BPF_REG_9]; const u8 fp = bpf2a64[BPF_REG_FP]; - const u8 ra = bpf2a64[BPF_REG_A]; - const u8 rx = bpf2a64[BPF_REG_X]; const u8 tmp1 = bpf2a64[TMP_REG_1]; const u8 tmp2 = bpf2a64[TMP_REG_2]; @@ -200,10 +198,6 @@ static void build_prologue(struct jit_ctx *ctx) /* Set up function call stack */ emit(A64_SUB_I(1, A64_SP, A64_SP, STACK_SIZE), ctx); - - /* Clear registers A and X */ - emit_a64_mov_i64(ra, 0, ctx); - emit_a64_mov_i64(rx, 0, ctx); } static void build_epilogue(struct jit_ctx *ctx) @@ -743,6 +737,20 @@ static int build_body(struct jit_ctx *ctx) return 0; } +static int validate_code(struct jit_ctx *ctx) +{ + int i; + + for (i = 0; i < ctx->idx; i++) { + u32 a64_insn = le32_to_cpu(ctx->image[i]); + + if (a64_insn == AARCH64_BREAK_FAULT) + return -1; + } + + return 0; +} + static inline void bpf_flush_icache(void *start, void *end) { flush_icache_range((unsigned long)start, (unsigned long)end); @@ -805,6 +813,12 @@ void bpf_int_jit_compile(struct bpf_prog *prog) build_epilogue(&ctx); + /* 3. Extra pass to validate JITed code. */ + if (validate_code(&ctx)) { + bpf_jit_binary_free(header); + goto out; + } + /* And we're done. */ if (bpf_jit_enable > 1) bpf_jit_dump(prog->len, image_size, 2, ctx.image); diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S index 8bbe9401f4f0..70df80e8da2c 100644 --- a/arch/arm64/xen/hypercall.S +++ b/arch/arm64/xen/hypercall.S @@ -80,6 +80,7 @@ HYPERCALL2(memory_op); HYPERCALL2(physdev_op); HYPERCALL3(vcpu_op); HYPERCALL1(tmem_op); +HYPERCALL1(platform_op_raw); HYPERCALL2(multicall); ENTRY(privcmd_call) diff --git a/arch/avr32/include/asm/page.h b/arch/avr32/include/asm/page.h index f805d1cb11bc..c5d2a3e2c62f 100644 --- a/arch/avr32/include/asm/page.h +++ b/arch/avr32/include/asm/page.h @@ -83,11 +83,9 @@ static inline int get_order(unsigned long size) #ifndef CONFIG_NEED_MULTIPLE_NODES -#define PHYS_PFN_OFFSET (CONFIG_PHYS_OFFSET >> PAGE_SHIFT) +#define ARCH_PFN_OFFSET (CONFIG_PHYS_OFFSET >> PAGE_SHIFT) -#define pfn_to_page(pfn) (mem_map + ((pfn) - PHYS_PFN_OFFSET)) -#define page_to_pfn(page) ((unsigned long)((page) - mem_map) + PHYS_PFN_OFFSET) -#define pfn_valid(pfn) ((pfn) >= PHYS_PFN_OFFSET && (pfn) < (PHYS_PFN_OFFSET + max_mapnr)) +#define pfn_valid(pfn) ((pfn) >= ARCH_PFN_OFFSET && (pfn) < (ARCH_PFN_OFFSET + max_mapnr)) #endif /* CONFIG_NEED_MULTIPLE_NODES */ #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) @@ -101,4 +99,6 @@ static inline int get_order(unsigned long size) */ #define HIGHMEM_START 0x20000000UL +#include <asm-generic/memory_model.h> + #endif /* __ASM_AVR32_PAGE_H */ diff --git a/arch/avr32/include/uapi/asm/socket.h b/arch/avr32/include/uapi/asm/socket.h index 2b65ed6b277c..9de0796240a0 100644 --- a/arch/avr32/include/uapi/asm/socket.h +++ b/arch/avr32/include/uapi/asm/socket.h @@ -85,4 +85,7 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* _UAPI__ASM_AVR32_SOCKET_H */ diff --git a/arch/avr32/kernel/module.c b/arch/avr32/kernel/module.c index 164efa009e5b..2b4c54c04cb6 100644 --- a/arch/avr32/kernel/module.c +++ b/arch/avr32/kernel/module.c @@ -118,9 +118,9 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, * Increase core size to make room for GOT and set start * offset for GOT. */ - module->core_size = ALIGN(module->core_size, 4); - module->arch.got_offset = module->core_size; - module->core_size += module->arch.got_size; + module->core_layout.size = ALIGN(module->core_layout.size, 4); + module->arch.got_offset = module->core_layout.size; + module->core_layout.size += module->arch.got_size; return 0; @@ -177,7 +177,7 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab, if (!info->got_initialized) { Elf32_Addr *gotent; - gotent = (module->module_core + gotent = (module->core_layout.base + module->arch.got_offset + info->got_offset); *gotent = relocation; @@ -255,8 +255,8 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab, */ pr_debug("GOTPC: PC=0x%x, got_offset=0x%lx, core=0x%p\n", relocation, module->arch.got_offset, - module->module_core); - relocation -= ((unsigned long)module->module_core + module->core_layout.base); + relocation -= ((unsigned long)module->core_layout.base + module->arch.got_offset); *location = relocation; break; diff --git a/arch/avr32/mach-at32ap/pio.c b/arch/avr32/mach-at32ap/pio.c index 4f61378c3453..5020057ac7a2 100644 --- a/arch/avr32/mach-at32ap/pio.c +++ b/arch/avr32/mach-at32ap/pio.c @@ -203,7 +203,7 @@ fail: static int direction_input(struct gpio_chip *chip, unsigned offset) { - struct pio_device *pio = container_of(chip, struct pio_device, chip); + struct pio_device *pio = gpiochip_get_data(chip); u32 mask = 1 << offset; if (!(pio_readl(pio, PSR) & mask)) @@ -215,7 +215,7 @@ static int direction_input(struct gpio_chip *chip, unsigned offset) static int gpio_get(struct gpio_chip *chip, unsigned offset) { - struct pio_device *pio = container_of(chip, struct pio_device, chip); + struct pio_device *pio = gpiochip_get_data(chip); return (pio_readl(pio, PDSR) >> offset) & 1; } @@ -224,7 +224,7 @@ static void gpio_set(struct gpio_chip *chip, unsigned offset, int value); static int direction_output(struct gpio_chip *chip, unsigned offset, int value) { - struct pio_device *pio = container_of(chip, struct pio_device, chip); + struct pio_device *pio = gpiochip_get_data(chip); u32 mask = 1 << offset; if (!(pio_readl(pio, PSR) & mask)) @@ -237,7 +237,7 @@ static int direction_output(struct gpio_chip *chip, unsigned offset, int value) static void gpio_set(struct gpio_chip *chip, unsigned offset, int value) { - struct pio_device *pio = container_of(chip, struct pio_device, chip); + struct pio_device *pio = gpiochip_get_data(chip); u32 mask = 1 << offset; if (value) @@ -335,7 +335,7 @@ gpio_irq_setup(struct pio_device *pio, int irq, int gpio_irq) */ static void pio_bank_show(struct seq_file *s, struct gpio_chip *chip) { - struct pio_device *pio = container_of(chip, struct pio_device, chip); + struct pio_device *pio = gpiochip_get_data(chip); u32 psr, osr, imr, pdsr, pusr, ifsr, mdsr; unsigned i; u32 mask; @@ -397,7 +397,7 @@ static int __init pio_probe(struct platform_device *pdev) pio->chip.label = pio->name; pio->chip.base = pdev->id * 32; pio->chip.ngpio = 32; - pio->chip.dev = &pdev->dev; + pio->chip.parent = &pdev->dev; pio->chip.owner = THIS_MODULE; pio->chip.direction_input = direction_input; @@ -406,7 +406,7 @@ static int __init pio_probe(struct platform_device *pdev) pio->chip.set = gpio_set; pio->chip.dbg_show = pio_bank_show; - gpiochip_add(&pio->chip); + gpiochip_add_data(&pio->chip, pio); gpio_irq_setup(pio, irq, gpio_irq_base); diff --git a/arch/blackfin/include/asm/cmpxchg.h b/arch/blackfin/include/asm/cmpxchg.h index c05868cc61c1..253928854299 100644 --- a/arch/blackfin/include/asm/cmpxchg.h +++ b/arch/blackfin/include/asm/cmpxchg.h @@ -128,6 +128,5 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, #endif /* !CONFIG_SMP */ #define xchg(ptr, x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), sizeof(*(ptr)))) -#define tas(ptr) ((void)xchg((ptr), 1)) #endif /* __ARCH_BLACKFIN_CMPXCHG__ */ diff --git a/arch/blackfin/include/asm/uaccess.h b/arch/blackfin/include/asm/uaccess.h index 90612a7f2cf3..12f5d6851bbc 100644 --- a/arch/blackfin/include/asm/uaccess.h +++ b/arch/blackfin/include/asm/uaccess.h @@ -168,12 +168,6 @@ static inline int bad_user_access_length(void) #define __copy_to_user_inatomic __copy_to_user #define __copy_from_user_inatomic __copy_from_user -#define copy_to_user_ret(to, from, n, retval) ({ if (copy_to_user(to, from, n))\ - return retval; }) - -#define copy_from_user_ret(to, from, n, retval) ({ if (copy_from_user(to, from, n))\ - return retval; }) - static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) { diff --git a/arch/blackfin/mach-bf537/boards/stamp.c b/arch/blackfin/mach-bf537/boards/stamp.c index 88a19fc9844d..c181543a399a 100644 --- a/arch/blackfin/mach-bf537/boards/stamp.c +++ b/arch/blackfin/mach-bf537/boards/stamp.c @@ -404,7 +404,7 @@ static struct mtd_partition bfin_plat_nand_partitions[] = { #define BFIN_NAND_PLAT_ALE 1 static void bfin_plat_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl) { - struct nand_chip *this = mtd->priv; + struct nand_chip *this = mtd_to_nand(mtd); if (cmd == NAND_CMD_NONE) return; diff --git a/arch/blackfin/mach-bf561/boards/acvilon.c b/arch/blackfin/mach-bf561/boards/acvilon.c index 6ab951534d79..37f8f25a1347 100644 --- a/arch/blackfin/mach-bf561/boards/acvilon.c +++ b/arch/blackfin/mach-bf561/boards/acvilon.c @@ -267,7 +267,7 @@ static struct mtd_partition bfin_plat_nand_partitions[] = { static void bfin_plat_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl) { - struct nand_chip *this = mtd->priv; + struct nand_chip *this = mtd_to_nand(mtd); if (cmd == NAND_CMD_NONE) return; diff --git a/arch/blackfin/mach-bf561/boards/ezkit.c b/arch/blackfin/mach-bf561/boards/ezkit.c index 2de71e8c104b..f35525b55819 100644 --- a/arch/blackfin/mach-bf561/boards/ezkit.c +++ b/arch/blackfin/mach-bf561/boards/ezkit.c @@ -443,7 +443,7 @@ static const struct ppi_info ppi_info = { }; #if IS_ENABLED(CONFIG_VIDEO_ADV7183) -#include <media/adv7183.h> +#include <media/i2c/adv7183.h> static struct v4l2_input adv7183_inputs[] = { { .index = 0, diff --git a/arch/blackfin/mach-bf609/boards/ezkit.c b/arch/blackfin/mach-bf609/boards/ezkit.c index 2c61fc0c98f9..c7928d8ebb82 100644 --- a/arch/blackfin/mach-bf609/boards/ezkit.c +++ b/arch/blackfin/mach-bf609/boards/ezkit.c @@ -933,7 +933,7 @@ static struct bfin_capture_config bfin_capture_data = { #endif #if IS_ENABLED(CONFIG_VIDEO_ADV7842) -#include <media/adv7842.h> +#include <media/i2c/adv7842.h> static struct v4l2_input adv7842_inputs[] = { { @@ -1084,7 +1084,7 @@ static const struct ppi_info ppi_info = { }; #if IS_ENABLED(CONFIG_VIDEO_ADV7511) -#include <media/adv7511.h> +#include <media/i2c/adv7511.h> static struct v4l2_output adv7511_outputs[] = { { @@ -1125,7 +1125,7 @@ static struct bfin_display_config bfin_display_data = { #endif #if IS_ENABLED(CONFIG_VIDEO_ADV7343) -#include <media/adv7343.h> +#include <media/i2c/adv7343.h> static struct v4l2_output adv7343_outputs[] = { { diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild index 945544ec603e..64465e7e2245 100644 --- a/arch/c6x/include/asm/Kbuild +++ b/arch/c6x/include/asm/Kbuild @@ -4,6 +4,7 @@ generic-y += auxvec.h generic-y += barrier.h generic-y += bitsperlong.h generic-y += bugs.h +generic-y += clkdev.h generic-y += cputime.h generic-y += current.h generic-y += device.h diff --git a/arch/c6x/include/asm/clkdev.h b/arch/c6x/include/asm/clkdev.h deleted file mode 100644 index 76a070b1c2e5..000000000000 --- a/arch/c6x/include/asm/clkdev.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef _ASM_CLKDEV_H -#define _ASM_CLKDEV_H - -#include <linux/slab.h> - -struct clk; - -static inline int __clk_get(struct clk *clk) -{ - return 1; -} - -static inline void __clk_put(struct clk *clk) -{ -} - -static inline struct clk_lookup_alloc *__clkdev_alloc(size_t size) -{ - return kzalloc(size, GFP_KERNEL); -} - -#endif /* _ASM_CLKDEV_H */ diff --git a/arch/c6x/include/asm/cmpxchg.h b/arch/c6x/include/asm/cmpxchg.h index b27c8cefb8c3..93d0a5a047a2 100644 --- a/arch/c6x/include/asm/cmpxchg.h +++ b/arch/c6x/include/asm/cmpxchg.h @@ -47,8 +47,6 @@ static inline unsigned int __xchg(unsigned int x, volatile void *ptr, int size) #define xchg(ptr, x) \ ((__typeof__(*(ptr)))__xchg((unsigned int)(x), (void *) (ptr), \ sizeof(*(ptr)))) -#define tas(ptr) xchg((ptr), 1) - #include <asm-generic/cmpxchg-local.h> diff --git a/arch/cris/arch-v32/drivers/mach-a3/nandflash.c b/arch/cris/arch-v32/drivers/mach-a3/nandflash.c index 7fb52128ddc9..5aa3f5162310 100644 --- a/arch/cris/arch-v32/drivers/mach-a3/nandflash.c +++ b/arch/cris/arch-v32/drivers/mach-a3/nandflash.c @@ -36,7 +36,6 @@ #define CE_BIT 12 struct mtd_info_wrapper { - struct mtd_info info; struct nand_chip chip; }; @@ -52,7 +51,7 @@ static void crisv32_hwcontrol(struct mtd_info *mtd, int cmd, { unsigned long flags; reg_pio_rw_dout dout; - struct nand_chip *this = mtd->priv; + struct nand_chip *this = mtd_to_nand(mtd); local_irq_save(flags); @@ -148,10 +147,7 @@ struct mtd_info *__init crisv32_nand_flash_probe(void) /* Get pointer to private data */ this = &wrapper->chip; - crisv32_mtd = &wrapper->info; - - /* Link the private data with the MTD structure */ - crisv32_mtd->priv = this; + crisv32_mtd = nand_to_mtd(this); /* Set address of NAND IO lines */ this->IO_ADDR_R = read_cs; diff --git a/arch/cris/arch-v32/drivers/mach-fs/nandflash.c b/arch/cris/arch-v32/drivers/mach-fs/nandflash.c index e03238454b0e..a7c17b0f172a 100644 --- a/arch/cris/arch-v32/drivers/mach-fs/nandflash.c +++ b/arch/cris/arch-v32/drivers/mach-fs/nandflash.c @@ -31,7 +31,6 @@ #define BY_BIT 7 struct mtd_info_wrapper { - struct mtd_info info; struct nand_chip chip; }; @@ -51,7 +50,7 @@ static void crisv32_hwcontrol(struct mtd_info *mtd, int cmd, { unsigned long flags; reg_gio_rw_pa_dout dout; - struct nand_chip *this = mtd->priv; + struct nand_chip *this = mtd_to_nand(mtd); local_irq_save(flags); @@ -129,7 +128,7 @@ struct mtd_info *__init crisv32_nand_flash_probe(void) /* Get pointer to private data */ this = &wrapper->chip; - crisv32_mtd = &wrapper->info; + crisv32_mtd = nand_to_mtd(this); pa_oe.oe |= 1 << CE_BIT; pa_oe.oe |= 1 << ALE_BIT; @@ -141,9 +140,6 @@ struct mtd_info *__init crisv32_nand_flash_probe(void) bif_cfg.gated_csp1 = regk_bif_core_wr; REG_WR(bif_core, regi_bif_core, rw_grp3_cfg, bif_cfg); - /* Link the private data with the MTD structure */ - crisv32_mtd->priv = this; - /* Set address of NAND IO lines */ this->IO_ADDR_R = read_cs; this->IO_ADDR_W = write_cs; diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig index 34aa19352dc1..03bfd6bf03e7 100644 --- a/arch/frv/Kconfig +++ b/arch/frv/Kconfig @@ -10,6 +10,7 @@ config FRV select HAVE_DEBUG_BUGVERBOSE select ARCH_HAVE_NMI_SAFE_CMPXCHG select GENERIC_CPU_DEVICES + select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_WANT_IPC_PARSE_VERSION select OLD_SIGSUSPEND3 select OLD_SIGACTION diff --git a/arch/frv/include/asm/cmpxchg.h b/arch/frv/include/asm/cmpxchg.h index 5b04dd0aecab..a899765102ea 100644 --- a/arch/frv/include/asm/cmpxchg.h +++ b/arch/frv/include/asm/cmpxchg.h @@ -69,8 +69,6 @@ extern uint32_t __xchg_32(uint32_t i, volatile void *v); #endif -#define tas(ptr) (xchg((ptr), 1)) - /*****************************************************************************/ /* * compare and conditionally exchange value with memory diff --git a/arch/frv/include/asm/page.h b/arch/frv/include/asm/page.h index 8c97068ac8fc..688d8076a43a 100644 --- a/arch/frv/include/asm/page.h +++ b/arch/frv/include/asm/page.h @@ -34,7 +34,7 @@ typedef struct page *pgtable_t; #define pgprot_val(x) ((x).pgprot) #define __pte(x) ((pte_t) { (x) } ) -#define __pmd(x) ((pmd_t) { (x) } ) +#define __pmd(x) ((pmd_t) { { (x) } } ) #define __pud(x) ((pud_t) { (x) } ) #define __pgd(x) ((pgd_t) { (x) } ) #define __pgprot(x) ((pgprot_t) { (x) } ) diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h index 4823ad125578..f02e4849ae83 100644 --- a/arch/frv/include/uapi/asm/socket.h +++ b/arch/frv/include/uapi/asm/socket.h @@ -85,5 +85,8 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index dd3ac75776ad..2e20333cbce9 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -17,6 +17,7 @@ config H8300 select HAVE_MEMBLOCK select HAVE_DMA_ATTRS select CLKSRC_OF + select H8300_TMR8 config RWSEM_GENERIC_SPINLOCK def_bool y diff --git a/arch/h8300/include/asm/io.h b/arch/h8300/include/asm/io.h index bb837cded268..f0e14f3a800d 100644 --- a/arch/h8300/include/asm/io.h +++ b/arch/h8300/include/asm/io.h @@ -3,40 +3,45 @@ #ifdef __KERNEL__ -#include <asm-generic/io.h> - /* H8/300 internal I/O functions */ -static inline unsigned char ctrl_inb(unsigned long addr) + +#define __raw_readb __raw_readb +static inline u8 __raw_readb(const volatile void __iomem *addr) { - return *(volatile unsigned char *)addr; + return *(volatile u8 *)addr; } -static inline unsigned short ctrl_inw(unsigned long addr) +#define __raw_readw __raw_readw +static inline u16 __raw_readw(const volatile void __iomem *addr) { - return *(volatile unsigned short *)addr; + return *(volatile u16 *)addr; } -static inline unsigned long ctrl_inl(unsigned long addr) +#define __raw_readl __raw_readl +static inline u32 __raw_readl(const volatile void __iomem *addr) { - return *(volatile unsigned long *)addr; + return *(volatile u32 *)addr; } -static inline void ctrl_outb(unsigned char b, unsigned long addr) +#define __raw_writeb __raw_writeb +static inline void __raw_writeb(u8 b, const volatile void __iomem *addr) { - *(volatile unsigned char *)addr = b; + *(volatile u8 *)addr = b; } -static inline void ctrl_outw(unsigned short b, unsigned long addr) +#define __raw_writew __raw_writew +static inline void __raw_writew(u16 b, const volatile void __iomem *addr) { - *(volatile unsigned short *)addr = b; + *(volatile u16 *)addr = b; } -static inline void ctrl_outl(unsigned long b, unsigned long addr) +#define __raw_writel __raw_writel +static inline void __raw_writel(u32 b, const volatile void __iomem *addr) { - *(volatile unsigned long *)addr = b; + *(volatile u32 *)addr = b; } -static inline void ctrl_bclr(int b, unsigned char *addr) +static inline void ctrl_bclr(int b, void __iomem *addr) { if (__builtin_constant_p(b)) __asm__("bclr %1,%0" : "+WU"(*addr): "i"(b)); @@ -44,7 +49,7 @@ static inline void ctrl_bclr(int b, unsigned char *addr) __asm__("bclr %w1,%0" : "+WU"(*addr): "r"(b)); } -static inline void ctrl_bset(int b, unsigned char *addr) +static inline void ctrl_bset(int b, void __iomem *addr) { if (__builtin_constant_p(b)) __asm__("bset %1,%0" : "+WU"(*addr): "i"(b)); @@ -52,6 +57,8 @@ static inline void ctrl_bset(int b, unsigned char *addr) __asm__("bset %w1,%0" : "+WU"(*addr): "r"(b)); } +#include <asm-generic/io.h> + #endif /* __KERNEL__ */ #endif /* _H8300_IO_H */ diff --git a/arch/h8300/kernel/setup.c b/arch/h8300/kernel/setup.c index c772abe6d19c..e4985dfa91dc 100644 --- a/arch/h8300/kernel/setup.c +++ b/arch/h8300/kernel/setup.c @@ -207,14 +207,14 @@ device_initcall(device_probe); #define get_wait(base, addr) ({ \ int baddr; \ baddr = ((addr) / 0x200000 * 2); \ - w *= (ctrl_inw((unsigned long)(base) + 2) & (3 << baddr)) + 1; \ + w *= (readw((base) + 2) & (3 << baddr)) + 1; \ }) #endif #if defined(CONFIG_CPU_H8S) #define get_wait(base, addr) ({ \ int baddr; \ baddr = ((addr) / 0x200000 * 16); \ - w *= (ctrl_inl((unsigned long)(base) + 2) & (7 << baddr)) + 1; \ + w *= (readl((base) + 2) & (7 << baddr)) + 1; \ }) #endif @@ -228,8 +228,8 @@ static __init int access_timing(void) bsc = of_find_compatible_node(NULL, NULL, "renesas,h8300-bsc"); base = of_iomap(bsc, 0); - w = (ctrl_inb((unsigned long)base + 0) & bit)?2:1; - if (ctrl_inb((unsigned long)base + 1) & bit) + w = (readb(base + 0) & bit)?2:1; + if (readb(base + 1) & bit) w *= get_wait(base, addr); else w *= 2; diff --git a/arch/ia64/include/asm/barrier.h b/arch/ia64/include/asm/barrier.h index df896a1c41d3..209c4b817c95 100644 --- a/arch/ia64/include/asm/barrier.h +++ b/arch/ia64/include/asm/barrier.h @@ -77,7 +77,7 @@ do { \ ___p1; \ }) -#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) +#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0) /* * The group barrier in front of the rsm & ssm are necessary to ensure diff --git a/arch/ia64/include/asm/early_ioremap.h b/arch/ia64/include/asm/early_ioremap.h new file mode 100644 index 000000000000..eec9e1d1b833 --- /dev/null +++ b/arch/ia64/include/asm/early_ioremap.h @@ -0,0 +1,10 @@ +#ifndef _ASM_IA64_EARLY_IOREMAP_H +#define _ASM_IA64_EARLY_IOREMAP_H + +extern void __iomem * early_ioremap (unsigned long phys_addr, unsigned long size); +#define early_memremap(phys_addr, size) early_ioremap(phys_addr, size) + +extern void early_iounmap (volatile void __iomem *addr, unsigned long size); +#define early_memunmap(addr, size) early_iounmap(addr, size) + +#endif diff --git a/arch/ia64/include/asm/io.h b/arch/ia64/include/asm/io.h index 9041bbe2b7b4..a865d2a04f75 100644 --- a/arch/ia64/include/asm/io.h +++ b/arch/ia64/include/asm/io.h @@ -20,6 +20,7 @@ */ #include <asm/unaligned.h> +#include <asm/early_ioremap.h> /* We don't use IO slowdowns on the ia64, but.. */ #define __SLOW_DOWN_IO do { } while (0) @@ -427,10 +428,6 @@ __writeq (unsigned long val, volatile void __iomem *addr) extern void __iomem * ioremap(unsigned long offset, unsigned long size); extern void __iomem * ioremap_nocache (unsigned long offset, unsigned long size); extern void iounmap (volatile void __iomem *addr); -extern void __iomem * early_ioremap (unsigned long phys_addr, unsigned long size); -#define early_memremap(phys_addr, size) early_ioremap(phys_addr, size) -extern void early_iounmap (volatile void __iomem *addr, unsigned long size); -#define early_memunmap(addr, size) early_iounmap(addr, size) static inline void __iomem * ioremap_cache (unsigned long phys_addr, unsigned long size) { return ioremap(phys_addr, size); diff --git a/arch/ia64/include/asm/page.h b/arch/ia64/include/asm/page.h index ec48bb9f95e1..e8c486ef0d76 100644 --- a/arch/ia64/include/asm/page.h +++ b/arch/ia64/include/asm/page.h @@ -105,6 +105,7 @@ extern struct page *vmem_map; #ifdef CONFIG_DISCONTIGMEM # define page_to_pfn(page) ((unsigned long) (page - vmem_map)) # define pfn_to_page(pfn) (vmem_map + (pfn)) +# define __pfn_to_phys(pfn) PFN_PHYS(pfn) #else # include <asm-generic/memory_model.h> #endif diff --git a/arch/ia64/include/asm/percpu.h b/arch/ia64/include/asm/percpu.h index 0ec484d2dcbc..b9295793a5e2 100644 --- a/arch/ia64/include/asm/percpu.h +++ b/arch/ia64/include/asm/percpu.h @@ -6,8 +6,6 @@ * David Mosberger-Tang <davidm@hpl.hp.com> */ -#define PERCPU_ENOUGH_ROOM PERCPU_PAGE_SIZE - #ifdef __ASSEMBLY__ # define THIS_CPU(var) (var) /* use this to mark accesses to per-CPU variables... */ #else /* !__ASSEMBLY__ */ diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h index 59be3d87f86d..bce29166de1b 100644 --- a/arch/ia64/include/uapi/asm/socket.h +++ b/arch/ia64/include/uapi/asm/socket.h @@ -94,4 +94,7 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* _ASM_IA64_SOCKET_H */ diff --git a/arch/ia64/kernel/ftrace.c b/arch/ia64/kernel/ftrace.c index 3b0c2aa07857..cee411e647ca 100644 --- a/arch/ia64/kernel/ftrace.c +++ b/arch/ia64/kernel/ftrace.c @@ -97,13 +97,11 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned char replaced[MCOUNT_INSN_SIZE]; /* - * Note: Due to modules and __init, code can - * disappear and change, we need to protect against faulting - * as well as code changing. We do this by using the - * probe_kernel_* functions. - * - * No real locking needed, this code is run through - * kstop_machine, or before SMP starts. + * Note: + * We are paranoid about modifying text, as if a bug was to happen, it + * could cause us to read or write to someplace that could cause harm. + * Carefully read and modify the code with probe_kernel_*(), and make + * sure what we read is what we expected it to be before modifying it. */ if (!do_check) diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c index b15933c31b2f..6ab0ae7d6535 100644 --- a/arch/ia64/kernel/module.c +++ b/arch/ia64/kernel/module.c @@ -486,13 +486,13 @@ module_frob_arch_sections (Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings, static inline int in_init (const struct module *mod, uint64_t addr) { - return addr - (uint64_t) mod->module_init < mod->init_size; + return addr - (uint64_t) mod->init_layout.base < mod->init_layout.size; } static inline int in_core (const struct module *mod, uint64_t addr) { - return addr - (uint64_t) mod->module_core < mod->core_size; + return addr - (uint64_t) mod->core_layout.base < mod->core_layout.size; } static inline int @@ -675,7 +675,7 @@ do_reloc (struct module *mod, uint8_t r_type, Elf64_Sym *sym, uint64_t addend, break; case RV_BDREL: - val -= (uint64_t) (in_init(mod, val) ? mod->module_init : mod->module_core); + val -= (uint64_t) (in_init(mod, val) ? mod->init_layout.base : mod->core_layout.base); break; case RV_LTV: @@ -810,15 +810,15 @@ apply_relocate_add (Elf64_Shdr *sechdrs, const char *strtab, unsigned int symind * addresses have been selected... */ uint64_t gp; - if (mod->core_size > MAX_LTOFF) + if (mod->core_layout.size > MAX_LTOFF) /* * This takes advantage of fact that SHF_ARCH_SMALL gets allocated * at the end of the module. */ - gp = mod->core_size - MAX_LTOFF / 2; + gp = mod->core_layout.size - MAX_LTOFF / 2; else - gp = mod->core_size / 2; - gp = (uint64_t) mod->module_core + ((gp + 7) & -8); + gp = mod->core_layout.size / 2; + gp = (uint64_t) mod->core_layout.base + ((gp + 7) & -8); mod->arch.gp = gp; DEBUGP("%s: placing gp at 0x%lx\n", __func__, gp); } diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 60e02f7747ff..9cd607b06964 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -2332,8 +2332,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t */ insert_vm_struct(mm, vma); - vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, - vma_pages(vma)); + vm_stat_account(vma->vm_mm, vma->vm_flags, vma_pages(vma)); up_write(&task->mm->mmap_sem); /* diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig index 9e44bbd8051e..836ac5a963c8 100644 --- a/arch/m32r/Kconfig +++ b/arch/m32r/Kconfig @@ -13,6 +13,7 @@ config M32R select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW select GENERIC_ATOMIC64 + select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_USES_GETTIMEOFFSET select MODULES_USE_ELF_RELA select HAVE_DEBUG_STACKOVERFLOW diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h index 7bc4cb273856..14aa4a6bccf1 100644 --- a/arch/m32r/include/uapi/asm/socket.h +++ b/arch/m32r/include/uapi/asm/socket.h @@ -85,4 +85,7 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* _ASM_M32R_SOCKET_H */ diff --git a/arch/m32r/kernel/setup.c b/arch/m32r/kernel/setup.c index 0392112a5d70..a5ecef7188ba 100644 --- a/arch/m32r/kernel/setup.c +++ b/arch/m32r/kernel/setup.c @@ -81,7 +81,10 @@ static struct resource code_resource = { }; unsigned long memory_start; +EXPORT_SYMBOL(memory_start); + unsigned long memory_end; +EXPORT_SYMBOL(memory_end); void __init setup_arch(char **); int get_cpuinfo(char *); diff --git a/arch/m68k/atari/config.c b/arch/m68k/atari/config.c index 192b00f098f4..cbd5991fd49a 100644 --- a/arch/m68k/atari/config.c +++ b/arch/m68k/atari/config.c @@ -858,7 +858,7 @@ static struct platform_device *atari_netusbee_devices[] __initdata = { }; #endif /* CONFIG_ATARI_ETHERNEC */ -#ifdef CONFIG_ATARI_SCSI +#if IS_ENABLED(CONFIG_ATARI_SCSI) static const struct resource atari_scsi_st_rsrc[] __initconst = { { .flags = IORESOURCE_IRQ, @@ -910,7 +910,7 @@ int __init atari_platform_init(void) } #endif -#ifdef CONFIG_ATARI_SCSI +#if IS_ENABLED(CONFIG_ATARI_SCSI) if (ATARIHW_PRESENT(ST_SCSI)) platform_device_register_simple("atari_scsi", -1, atari_scsi_st_rsrc, ARRAY_SIZE(atari_scsi_st_rsrc)); diff --git a/arch/m68k/coldfire/gpio.c b/arch/m68k/coldfire/gpio.c index e7e428681ec5..37a83e27c7a6 100644 --- a/arch/m68k/coldfire/gpio.c +++ b/arch/m68k/coldfire/gpio.c @@ -121,7 +121,7 @@ static int mcfgpio_direction_input(struct gpio_chip *chip, unsigned offset) static int mcfgpio_get_value(struct gpio_chip *chip, unsigned offset) { - return __mcfgpio_get_value(offset); + return !!__mcfgpio_get_value(offset); } static int mcfgpio_direction_output(struct gpio_chip *chip, unsigned offset, diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index 5b4ec541ba7c..fc96e814188e 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -276,6 +276,7 @@ CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m +CONFIG_NET_L3_MASTER_DEV=y # CONFIG_WIRELESS is not set # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -532,11 +533,13 @@ CONFIG_NLS_MAC_INUIT=m CONFIG_NLS_MAC_ROMANIAN=m CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m +# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m +CONFIG_TEST_PRINTF=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m @@ -559,6 +562,7 @@ CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m +CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_MICHAEL_MIC=m diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index 6e5198e2c124..05c904f08d9d 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -274,6 +274,7 @@ CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m +CONFIG_NET_L3_MASTER_DEV=y # CONFIG_WIRELESS is not set # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -372,6 +373,7 @@ CONFIG_INPUT_EVDEV=m # CONFIG_MOUSE_PS2 is not set CONFIG_MOUSE_SERIAL=m CONFIG_SERIO=m +CONFIG_USERIO=m # CONFIG_LEGACY_PTYS is not set # CONFIG_DEVKMEM is not set # CONFIG_HW_RANDOM is not set @@ -490,11 +492,13 @@ CONFIG_NLS_MAC_INUIT=m CONFIG_NLS_MAC_ROMANIAN=m CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m +# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m +CONFIG_TEST_PRINTF=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m @@ -517,6 +521,7 @@ CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m +CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_MICHAEL_MIC=m diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index f75600b0ca23..d572b731c510 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -274,6 +274,7 @@ CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m +CONFIG_NET_L3_MASTER_DEV=y # CONFIG_WIRELESS is not set # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -512,11 +513,13 @@ CONFIG_NLS_MAC_INUIT=m CONFIG_NLS_MAC_ROMANIAN=m CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m +# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m +CONFIG_TEST_PRINTF=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m @@ -539,6 +542,7 @@ CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m +CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_MICHAEL_MIC=m diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index a42d91c389a6..11a30c65ad44 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -272,6 +272,7 @@ CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m +CONFIG_NET_L3_MASTER_DEV=y # CONFIG_WIRELESS is not set # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -483,11 +484,13 @@ CONFIG_NLS_MAC_INUIT=m CONFIG_NLS_MAC_ROMANIAN=m CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m +# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m +CONFIG_TEST_PRINTF=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m @@ -510,6 +513,7 @@ CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m +CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_MICHAEL_MIC=m diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index 77f4a11083e9..6630a5154b9d 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -274,6 +274,7 @@ CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m +CONFIG_NET_L3_MASTER_DEV=y # CONFIG_WIRELESS is not set # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -375,6 +376,7 @@ CONFIG_MOUSE_SERIAL=m CONFIG_INPUT_MISC=y CONFIG_HP_SDC_RTC=m CONFIG_SERIO_SERPORT=m +CONFIG_USERIO=m # CONFIG_LEGACY_PTYS is not set # CONFIG_DEVKMEM is not set # CONFIG_HW_RANDOM is not set @@ -492,11 +494,13 @@ CONFIG_NLS_MAC_INUIT=m CONFIG_NLS_MAC_ROMANIAN=m CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m +# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m +CONFIG_TEST_PRINTF=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m @@ -519,6 +523,7 @@ CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m +CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_MICHAEL_MIC=m diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index 5a329f77329b..1d90b71d0903 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -276,6 +276,7 @@ CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m +CONFIG_NET_L3_MASTER_DEV=y # CONFIG_WIRELESS is not set # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -394,6 +395,7 @@ CONFIG_MOUSE_SERIAL=m CONFIG_INPUT_MISC=y CONFIG_INPUT_M68K_BEEP=m CONFIG_SERIO=m +CONFIG_USERIO=m # CONFIG_LEGACY_PTYS is not set # CONFIG_DEVKMEM is not set CONFIG_SERIAL_PMACZILOG=y @@ -514,11 +516,13 @@ CONFIG_NLS_MAC_INUIT=m CONFIG_NLS_MAC_ROMANIAN=m CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m +# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m +CONFIG_TEST_PRINTF=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m @@ -541,6 +545,7 @@ CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m +CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_MICHAEL_MIC=m diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index 83c80d2030ec..1fd21c1ca87f 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -286,6 +286,7 @@ CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m +CONFIG_NET_L3_MASTER_DEV=y # CONFIG_WIRELESS is not set # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -449,6 +450,7 @@ CONFIG_INPUT_MISC=y CONFIG_INPUT_M68K_BEEP=m CONFIG_HP_SDC_RTC=m CONFIG_SERIO_Q40KBD=y +CONFIG_USERIO=m # CONFIG_LEGACY_PTYS is not set # CONFIG_DEVKMEM is not set CONFIG_SERIAL_PMACZILOG=y @@ -594,11 +596,13 @@ CONFIG_NLS_MAC_INUIT=m CONFIG_NLS_MAC_ROMANIAN=m CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m +# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m +CONFIG_TEST_PRINTF=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m @@ -621,6 +625,7 @@ CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m +CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_MICHAEL_MIC=m diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index 6cb42c3bf5a2..74e10f79d7b1 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -271,6 +271,7 @@ CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m +CONFIG_NET_L3_MASTER_DEV=y # CONFIG_WIRELESS is not set # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -483,11 +484,13 @@ CONFIG_NLS_MAC_INUIT=m CONFIG_NLS_MAC_ROMANIAN=m CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m +# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m +CONFIG_TEST_PRINTF=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m @@ -510,6 +513,7 @@ CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m +CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_MICHAEL_MIC=m diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index c7508c30330c..7034e716f166 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -272,6 +272,7 @@ CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m +CONFIG_NET_L3_MASTER_DEV=y # CONFIG_WIRELESS is not set # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -483,11 +484,13 @@ CONFIG_NLS_MAC_INUIT=m CONFIG_NLS_MAC_ROMANIAN=m CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m +# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m +CONFIG_TEST_PRINTF=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m @@ -510,6 +513,7 @@ CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m +CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_MICHAEL_MIC=m diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index 64b71664a303..f7deb5f702a6 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -272,6 +272,7 @@ CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m +CONFIG_NET_L3_MASTER_DEV=y # CONFIG_WIRELESS is not set # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -385,6 +386,7 @@ CONFIG_MOUSE_SERIAL=m CONFIG_INPUT_MISC=y CONFIG_INPUT_M68K_BEEP=m CONFIG_SERIO_Q40KBD=y +CONFIG_USERIO=m # CONFIG_LEGACY_PTYS is not set # CONFIG_DEVKMEM is not set CONFIG_PRINTER=m @@ -505,11 +507,13 @@ CONFIG_NLS_MAC_INUIT=m CONFIG_NLS_MAC_ROMANIAN=m CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m +# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m +CONFIG_TEST_PRINTF=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m @@ -532,6 +536,7 @@ CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m +CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_MICHAEL_MIC=m diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index 9a4cab78a2ea..0ce79eb0d805 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -269,6 +269,7 @@ CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m +CONFIG_NET_L3_MASTER_DEV=y # CONFIG_WIRELESS is not set # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -369,6 +370,7 @@ CONFIG_INPUT_EVDEV=m CONFIG_KEYBOARD_SUNKBD=y # CONFIG_MOUSE_PS2 is not set CONFIG_MOUSE_SERIAL=m +CONFIG_USERIO=m # CONFIG_LEGACY_PTYS is not set # CONFIG_DEVKMEM is not set # CONFIG_HW_RANDOM is not set @@ -484,11 +486,13 @@ CONFIG_NLS_MAC_INUIT=m CONFIG_NLS_MAC_ROMANIAN=m CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m +# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m +CONFIG_TEST_PRINTF=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m @@ -510,6 +514,7 @@ CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m +CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_MICHAEL_MIC=m diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index 1a2eaac13dbd..4cb787e4991f 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -269,6 +269,7 @@ CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m +CONFIG_NET_L3_MASTER_DEV=y # CONFIG_WIRELESS is not set # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -369,6 +370,7 @@ CONFIG_INPUT_EVDEV=m CONFIG_KEYBOARD_SUNKBD=y # CONFIG_MOUSE_PS2 is not set CONFIG_MOUSE_SERIAL=m +CONFIG_USERIO=m # CONFIG_LEGACY_PTYS is not set # CONFIG_DEVKMEM is not set # CONFIG_HW_RANDOM is not set @@ -484,11 +486,13 @@ CONFIG_NLS_MAC_INUIT=m CONFIG_NLS_MAC_ROMANIAN=m CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m +# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m +CONFIG_TEST_PRINTF=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m @@ -511,6 +515,7 @@ CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m +CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_MICHAEL_MIC=m diff --git a/arch/m68k/include/asm/mac_psc.h b/arch/m68k/include/asm/mac_psc.h index e5c0d71d1543..923305117a69 100644 --- a/arch/m68k/include/asm/mac_psc.h +++ b/arch/m68k/include/asm/mac_psc.h @@ -209,7 +209,6 @@ #ifndef __ASSEMBLY__ extern volatile __u8 *psc; -extern int psc_present; extern void psc_register_interrupts(void); extern void psc_irq_enable(int); diff --git a/arch/m68k/include/asm/page.h b/arch/m68k/include/asm/page.h index 38b024a0b045..430d4d54c883 100644 --- a/arch/m68k/include/asm/page.h +++ b/arch/m68k/include/asm/page.h @@ -48,6 +48,9 @@ extern unsigned long _ramend; #include <asm/page_no.h> #endif +#define __phys_to_pfn(paddr) ((unsigned long)((paddr) >> PAGE_SHIFT)) +#define __pfn_to_phys(pfn) PFN_PHYS(pfn) + #define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) diff --git a/arch/m68k/include/asm/uaccess_no.h b/arch/m68k/include/asm/uaccess_no.h index 68bbe9b312f1..1bdf15263754 100644 --- a/arch/m68k/include/asm/uaccess_no.h +++ b/arch/m68k/include/asm/uaccess_no.h @@ -135,10 +135,6 @@ extern int __get_user_bad(void); #define __copy_to_user_inatomic __copy_to_user #define __copy_from_user_inatomic __copy_from_user -#define copy_to_user_ret(to,from,n,retval) ({ if (copy_to_user(to,from,n)) return retval; }) - -#define copy_from_user_ret(to,from,n,retval) ({ if (copy_from_user(to,from,n)) return retval; }) - /* * Copy a null terminated string from userspace. */ diff --git a/arch/m68k/mac/macints.c b/arch/m68k/mac/macints.c index 5c1a6b2ff0af..9f98c0871901 100644 --- a/arch/m68k/mac/macints.c +++ b/arch/m68k/mac/macints.c @@ -174,7 +174,7 @@ void __init mac_init_IRQ(void) oss_register_interrupts(); else via_register_interrupts(); - if (psc_present) + if (psc) psc_register_interrupts(); if (baboon_present) baboon_register_interrupts(); @@ -212,7 +212,7 @@ void mac_irq_enable(struct irq_data *data) case 4: case 5: case 6: - if (psc_present) + if (psc) psc_irq_enable(irq); else if (oss_present) oss_irq_enable(irq); @@ -242,7 +242,7 @@ void mac_irq_disable(struct irq_data *data) case 4: case 5: case 6: - if (psc_present) + if (psc) psc_irq_disable(irq); else if (oss_present) oss_irq_disable(irq); diff --git a/arch/m68k/mac/psc.c b/arch/m68k/mac/psc.c index 2290c0cae48b..cb2b1a3a2b62 100644 --- a/arch/m68k/mac/psc.c +++ b/arch/m68k/mac/psc.c @@ -27,7 +27,6 @@ #define DEBUG_PSC -int psc_present; volatile __u8 *psc; EXPORT_SYMBOL_GPL(psc); @@ -39,7 +38,9 @@ static void psc_debug_dump(void) { int i; - if (!psc_present) return; + if (!psc) + return; + for (i = 0x30 ; i < 0x70 ; i += 0x10) { printk("PSC #%d: IFR = 0x%02X IER = 0x%02X\n", i >> 4, @@ -81,7 +82,6 @@ void __init psc_init(void) && macintosh_config->ident != MAC_MODEL_Q840) { psc = NULL; - psc_present = 0; return; } @@ -91,7 +91,6 @@ void __init psc_init(void) */ psc = (void *) PSC_BASE; - psc_present = 1; printk("PSC detected at %p\n", psc); diff --git a/arch/m68k/sun3/config.c b/arch/m68k/sun3/config.c index 2a5f43a68ae3..71884bf01d72 100644 --- a/arch/m68k/sun3/config.c +++ b/arch/m68k/sun3/config.c @@ -171,7 +171,7 @@ static void __init sun3_sched_init(irq_handler_t timer_routine) intersil_clear(); } -#ifdef CONFIG_SUN3_SCSI +#if IS_ENABLED(CONFIG_SUN3_SCSI) static const struct resource sun3_scsi_vme_rsrc[] __initconst = { { diff --git a/arch/metag/Kconfig b/arch/metag/Kconfig index 0b389a81c43a..a0fa88da3e31 100644 --- a/arch/metag/Kconfig +++ b/arch/metag/Kconfig @@ -36,9 +36,6 @@ config STACKTRACE_SUPPORT config LOCKDEP_SUPPORT def_bool y -config HAVE_LATENCYTOP_SUPPORT - def_bool y - config RWSEM_GENERIC_SPINLOCK def_bool y diff --git a/arch/metag/kernel/ftrace.c b/arch/metag/kernel/ftrace.c index ed1d685157c2..ac8c039b0318 100644 --- a/arch/metag/kernel/ftrace.c +++ b/arch/metag/kernel/ftrace.c @@ -54,12 +54,11 @@ static int ftrace_modify_code(unsigned long pc, unsigned char *old_code, unsigned char replaced[MCOUNT_INSN_SIZE]; /* - * Note: Due to modules and __init, code can - * disappear and change, we need to protect against faulting - * as well as code changing. - * - * No real locking needed, this code is run through - * kstop_machine. + * Note: + * We are paranoid about modifying text, as if a bug was to happen, it + * could cause us to read or write to someplace that could cause harm. + * Carefully read and modify the code with probe_kernel_*(), and make + * sure what we read is what we expected it to be before modifying it. */ /* read the text we want to modify */ diff --git a/arch/metag/kernel/module.c b/arch/metag/kernel/module.c index 986331cd0a52..bb8dfba9a763 100644 --- a/arch/metag/kernel/module.c +++ b/arch/metag/kernel/module.c @@ -176,8 +176,8 @@ static uint32_t do_plt_call(void *location, Elf32_Addr val, tramp[1] = 0xac000001 | ((val & 0x0000ffff) << 3); /* Init, or core PLT? */ - if (location >= mod->module_core - && location < mod->module_core + mod->core_size) + if (location >= mod->core_layout.base + && location < mod->core_layout.base + mod->core_layout.size) entry = (void *)sechdrs[mod->arch.core_plt_section].sh_addr; else entry = (void *)sechdrs[mod->arch.init_plt_section].sh_addr; diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 0bce820428fc..5ecd0287a874 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -67,9 +67,6 @@ config STACKTRACE_SUPPORT config LOCKDEP_SUPPORT def_bool y -config HAVE_LATENCYTOP_SUPPORT - def_bool y - source "init/Kconfig" source "kernel/Kconfig.freezer" diff --git a/arch/mips/alchemy/devboards/db1200.c b/arch/mips/alchemy/devboards/db1200.c index 8c13675a12e7..992442a03d8b 100644 --- a/arch/mips/alchemy/devboards/db1200.c +++ b/arch/mips/alchemy/devboards/db1200.c @@ -200,7 +200,7 @@ static struct i2c_board_info db1200_i2c_devs[] __initdata = { static void au1200_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl) { - struct nand_chip *this = mtd->priv; + struct nand_chip *this = mtd_to_nand(mtd); unsigned long ioaddr = (unsigned long)this->IO_ADDR_W; ioaddr &= 0xffffff00; diff --git a/arch/mips/alchemy/devboards/db1300.c b/arch/mips/alchemy/devboards/db1300.c index b58077008a53..d3c087f59f1a 100644 --- a/arch/mips/alchemy/devboards/db1300.c +++ b/arch/mips/alchemy/devboards/db1300.c @@ -150,7 +150,7 @@ static void __init db1300_gpio_config(void) static void au1300_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl) { - struct nand_chip *this = mtd->priv; + struct nand_chip *this = mtd_to_nand(mtd); unsigned long ioaddr = (unsigned long)this->IO_ADDR_W; ioaddr &= 0xffffff00; diff --git a/arch/mips/alchemy/devboards/db1550.c b/arch/mips/alchemy/devboards/db1550.c index 5740bcfdfc7f..b518f029f5e7 100644 --- a/arch/mips/alchemy/devboards/db1550.c +++ b/arch/mips/alchemy/devboards/db1550.c @@ -128,7 +128,7 @@ static struct i2c_board_info db1550_i2c_devs[] __initdata = { static void au1550_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl) { - struct nand_chip *this = mtd->priv; + struct nand_chip *this = mtd_to_nand(mtd); unsigned long ioaddr = (unsigned long)this->IO_ADDR_W; ioaddr &= 0xffffff00; diff --git a/arch/mips/bcm47xx/setup.c b/arch/mips/bcm47xx/setup.c index 6d38948f0f1e..c807e32d6d81 100644 --- a/arch/mips/bcm47xx/setup.c +++ b/arch/mips/bcm47xx/setup.c @@ -101,50 +101,13 @@ static void bcm47xx_machine_halt(void) } #ifdef CONFIG_BCM47XX_SSB -static int bcm47xx_get_invariants(struct ssb_bus *bus, - struct ssb_init_invariants *iv) -{ - char buf[20]; - int len, err; - - /* Fill boardinfo structure */ - memset(&iv->boardinfo, 0 , sizeof(struct ssb_boardinfo)); - - len = bcm47xx_nvram_getenv("boardvendor", buf, sizeof(buf)); - if (len > 0) { - err = kstrtou16(strim(buf), 0, &iv->boardinfo.vendor); - if (err) - pr_warn("Couldn't parse nvram board vendor entry with value \"%s\"\n", - buf); - } - if (!iv->boardinfo.vendor) - iv->boardinfo.vendor = SSB_BOARDVENDOR_BCM; - - len = bcm47xx_nvram_getenv("boardtype", buf, sizeof(buf)); - if (len > 0) { - err = kstrtou16(strim(buf), 0, &iv->boardinfo.type); - if (err) - pr_warn("Couldn't parse nvram board type entry with value \"%s\"\n", - buf); - } - - memset(&iv->sprom, 0, sizeof(struct ssb_sprom)); - bcm47xx_fill_sprom(&iv->sprom, NULL, false); - - if (bcm47xx_nvram_getenv("cardbus", buf, sizeof(buf)) >= 0) - iv->has_cardbus_slot = !!simple_strtoul(buf, NULL, 10); - - return 0; -} - static void __init bcm47xx_register_ssb(void) { int err; char buf[100]; struct ssb_mipscore *mcore; - err = ssb_bus_ssbbus_register(&bcm47xx_bus.ssb, SSB_ENUM_BASE, - bcm47xx_get_invariants); + err = ssb_bus_host_soc_register(&bcm47xx_bus.ssb, SSB_ENUM_BASE); if (err) panic("Failed to initialize SSB bus (err %d)", err); diff --git a/arch/mips/boot/dts/brcm/bcm6328.dtsi b/arch/mips/boot/dts/brcm/bcm6328.dtsi index 41891c1e58bd..d52ce3d07f16 100644 --- a/arch/mips/boot/dts/brcm/bcm6328.dtsi +++ b/arch/mips/boot/dts/brcm/bcm6328.dtsi @@ -73,7 +73,6 @@ timer: timer@10000040 { compatible = "syscon"; reg = <0x10000040 0x2c>; - little-endian; }; reboot { diff --git a/arch/mips/boot/dts/brcm/bcm7125.dtsi b/arch/mips/boot/dts/brcm/bcm7125.dtsi index 1a7efa883c5e..4fc7ecee273c 100644 --- a/arch/mips/boot/dts/brcm/bcm7125.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7125.dtsi @@ -98,7 +98,6 @@ sun_top_ctrl: syscon@404000 { compatible = "brcm,bcm7125-sun-top-ctrl", "syscon"; reg = <0x404000 0x60c>; - little-endian; }; reboot { diff --git a/arch/mips/boot/dts/brcm/bcm7346.dtsi b/arch/mips/boot/dts/brcm/bcm7346.dtsi index d4bf52cfcf17..a3039bb53477 100644 --- a/arch/mips/boot/dts/brcm/bcm7346.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7346.dtsi @@ -118,7 +118,6 @@ sun_top_ctrl: syscon@404000 { compatible = "brcm,bcm7346-sun-top-ctrl", "syscon"; reg = <0x404000 0x51c>; - little-endian; }; reboot { diff --git a/arch/mips/boot/dts/brcm/bcm7358.dtsi b/arch/mips/boot/dts/brcm/bcm7358.dtsi index 8e2501694d03..4274ff41ec21 100644 --- a/arch/mips/boot/dts/brcm/bcm7358.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7358.dtsi @@ -112,7 +112,6 @@ sun_top_ctrl: syscon@404000 { compatible = "brcm,bcm7358-sun-top-ctrl", "syscon"; reg = <0x404000 0x51c>; - little-endian; }; reboot { diff --git a/arch/mips/boot/dts/brcm/bcm7360.dtsi b/arch/mips/boot/dts/brcm/bcm7360.dtsi index 7e5f76040fb8..0dcc9163c27b 100644 --- a/arch/mips/boot/dts/brcm/bcm7360.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7360.dtsi @@ -112,7 +112,6 @@ sun_top_ctrl: syscon@404000 { compatible = "brcm,bcm7360-sun-top-ctrl", "syscon"; reg = <0x404000 0x51c>; - little-endian; }; reboot { diff --git a/arch/mips/boot/dts/brcm/bcm7362.dtsi b/arch/mips/boot/dts/brcm/bcm7362.dtsi index c739ea77acb0..2f3f9fc2c478 100644 --- a/arch/mips/boot/dts/brcm/bcm7362.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7362.dtsi @@ -118,7 +118,6 @@ sun_top_ctrl: syscon@404000 { compatible = "brcm,bcm7362-sun-top-ctrl", "syscon"; reg = <0x404000 0x51c>; - little-endian; }; reboot { diff --git a/arch/mips/boot/dts/brcm/bcm7420.dtsi b/arch/mips/boot/dts/brcm/bcm7420.dtsi index 5f55d0a50a28..bee221b3b568 100644 --- a/arch/mips/boot/dts/brcm/bcm7420.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7420.dtsi @@ -99,7 +99,6 @@ sun_top_ctrl: syscon@404000 { compatible = "brcm,bcm7420-sun-top-ctrl", "syscon"; reg = <0x404000 0x60c>; - little-endian; }; reboot { diff --git a/arch/mips/boot/dts/brcm/bcm7425.dtsi b/arch/mips/boot/dts/brcm/bcm7425.dtsi index e24d41ab4e30..571f30f52e3f 100644 --- a/arch/mips/boot/dts/brcm/bcm7425.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7425.dtsi @@ -100,7 +100,6 @@ sun_top_ctrl: syscon@404000 { compatible = "brcm,bcm7425-sun-top-ctrl", "syscon"; reg = <0x404000 0x51c>; - little-endian; }; reboot { diff --git a/arch/mips/boot/dts/brcm/bcm7435.dtsi b/arch/mips/boot/dts/brcm/bcm7435.dtsi index 8b9432cc062b..614ee211f71a 100644 --- a/arch/mips/boot/dts/brcm/bcm7435.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7435.dtsi @@ -114,7 +114,6 @@ sun_top_ctrl: syscon@404000 { compatible = "brcm,bcm7425-sun-top-ctrl", "syscon"; reg = <0x404000 0x51c>; - little-endian; }; reboot { diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 6ded8d347af9..7c191443c7ea 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -101,9 +101,9 @@ #define CAUSEF_DC (_ULCAST_(1) << 27) extern atomic_t kvm_mips_instance; -extern pfn_t(*kvm_mips_gfn_to_pfn) (struct kvm *kvm, gfn_t gfn); -extern void (*kvm_mips_release_pfn_clean) (pfn_t pfn); -extern bool(*kvm_mips_is_error_pfn) (pfn_t pfn); +extern kvm_pfn_t (*kvm_mips_gfn_to_pfn)(struct kvm *kvm, gfn_t gfn); +extern void (*kvm_mips_release_pfn_clean)(kvm_pfn_t pfn); +extern bool (*kvm_mips_is_error_pfn)(kvm_pfn_t pfn); struct kvm_vm_stat { u32 remote_tlb_flush; diff --git a/arch/mips/include/asm/pgtable-bits.h b/arch/mips/include/asm/pgtable-bits.h index ff7ad91c85db..97b313882678 100644 --- a/arch/mips/include/asm/pgtable-bits.h +++ b/arch/mips/include/asm/pgtable-bits.h @@ -131,14 +131,12 @@ /* Huge TLB page */ #define _PAGE_HUGE_SHIFT (_PAGE_MODIFIED_SHIFT + 1) #define _PAGE_HUGE (1 << _PAGE_HUGE_SHIFT) -#define _PAGE_SPLITTING_SHIFT (_PAGE_HUGE_SHIFT + 1) -#define _PAGE_SPLITTING (1 << _PAGE_SPLITTING_SHIFT) #endif /* CONFIG_64BIT && CONFIG_MIPS_HUGE_TLB_SUPPORT */ #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) /* XI - page cannot be executed */ -#ifdef _PAGE_SPLITTING_SHIFT -#define _PAGE_NO_EXEC_SHIFT (_PAGE_SPLITTING_SHIFT + 1) +#ifdef _PAGE_HUGE_SHIFT +#define _PAGE_NO_EXEC_SHIFT (_PAGE_HUGE_SHIFT + 1) #else #define _PAGE_NO_EXEC_SHIFT (_PAGE_MODIFIED_SHIFT + 1) #endif @@ -153,8 +151,8 @@ #if defined(_PAGE_NO_READ_SHIFT) #define _PAGE_GLOBAL_SHIFT (_PAGE_NO_READ_SHIFT + 1) -#elif defined(_PAGE_SPLITTING_SHIFT) -#define _PAGE_GLOBAL_SHIFT (_PAGE_SPLITTING_SHIFT + 1) +#elif defined(_PAGE_HUGE_SHIFT) +#define _PAGE_GLOBAL_SHIFT (_PAGE_HUGE_SHIFT + 1) #else #define _PAGE_GLOBAL_SHIFT (_PAGE_MODIFIED_SHIFT + 1) #endif diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 8957f15e21ec..6995b4a02e23 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -482,27 +482,9 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd) return pmd; } -static inline int pmd_trans_splitting(pmd_t pmd) -{ - return !!(pmd_val(pmd) & _PAGE_SPLITTING); -} - -static inline pmd_t pmd_mksplitting(pmd_t pmd) -{ - pmd_val(pmd) |= _PAGE_SPLITTING; - - return pmd; -} - extern void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd); -#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH -/* Extern to avoid header file madness */ -extern void pmdp_splitting_flush(struct vm_area_struct *vma, - unsigned long address, - pmd_t *pmdp); - #define __HAVE_ARCH_PMD_WRITE static inline int pmd_write(pmd_t pmd) { diff --git a/arch/mips/include/uapi/asm/mman.h b/arch/mips/include/uapi/asm/mman.h index 97c03f468924..b0ebe59f73fd 100644 --- a/arch/mips/include/uapi/asm/mman.h +++ b/arch/mips/include/uapi/asm/mman.h @@ -73,8 +73,10 @@ #define MADV_SEQUENTIAL 2 /* expect sequential page references */ #define MADV_WILLNEED 3 /* will need these pages */ #define MADV_DONTNEED 4 /* don't need these pages */ +#define MADV_FREE 5 /* free pages only if memory pressure */ /* common parameters: try to keep these consistent across architectures */ +#define MADV_FREE 8 /* free pages only if memory pressure */ #define MADV_REMOVE 9 /* remove these pages & resources */ #define MADV_DONTFORK 10 /* don't inherit across fork */ #define MADV_DOFORK 11 /* do inherit across fork */ diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index dec3c850f36b..5910fe294e93 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -103,4 +103,7 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c index 9067b651c7a2..544ea21bfef9 100644 --- a/arch/mips/kernel/vpe.c +++ b/arch/mips/kernel/vpe.c @@ -205,11 +205,11 @@ static void layout_sections(struct module *mod, const Elf_Ehdr *hdr, || s->sh_entsize != ~0UL) continue; s->sh_entsize = - get_offset((unsigned long *)&mod->core_size, s); + get_offset((unsigned long *)&mod->core_layout.size, s); } if (m == 0) - mod->core_text_size = mod->core_size; + mod->core_layout.text_size = mod->core_layout.size; } } @@ -641,7 +641,7 @@ static int vpe_elfload(struct vpe *v) layout_sections(&mod, hdr, sechdrs, secstrings); } - v->load_addr = alloc_progmem(mod.core_size); + v->load_addr = alloc_progmem(mod.core_layout.size); if (!v->load_addr) return -ENOMEM; diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index 41b1b090f56f..1b675c7ce89f 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -1525,7 +1525,7 @@ int kvm_mips_sync_icache(unsigned long va, struct kvm_vcpu *vcpu) struct kvm *kvm = vcpu->kvm; unsigned long pa; gfn_t gfn; - pfn_t pfn; + kvm_pfn_t pfn; gfn = va >> PAGE_SHIFT; diff --git a/arch/mips/kvm/tlb.c b/arch/mips/kvm/tlb.c index aed0ac2a4972..570479c03bdc 100644 --- a/arch/mips/kvm/tlb.c +++ b/arch/mips/kvm/tlb.c @@ -38,13 +38,13 @@ atomic_t kvm_mips_instance; EXPORT_SYMBOL(kvm_mips_instance); /* These function pointers are initialized once the KVM module is loaded */ -pfn_t (*kvm_mips_gfn_to_pfn)(struct kvm *kvm, gfn_t gfn); +kvm_pfn_t (*kvm_mips_gfn_to_pfn)(struct kvm *kvm, gfn_t gfn); EXPORT_SYMBOL(kvm_mips_gfn_to_pfn); -void (*kvm_mips_release_pfn_clean)(pfn_t pfn); +void (*kvm_mips_release_pfn_clean)(kvm_pfn_t pfn); EXPORT_SYMBOL(kvm_mips_release_pfn_clean); -bool (*kvm_mips_is_error_pfn)(pfn_t pfn); +bool (*kvm_mips_is_error_pfn)(kvm_pfn_t pfn); EXPORT_SYMBOL(kvm_mips_is_error_pfn); uint32_t kvm_mips_get_kernel_asid(struct kvm_vcpu *vcpu) @@ -144,7 +144,7 @@ EXPORT_SYMBOL(kvm_mips_dump_guest_tlbs); static int kvm_mips_map_page(struct kvm *kvm, gfn_t gfn) { int srcu_idx, err = 0; - pfn_t pfn; + kvm_pfn_t pfn; if (kvm->arch.guest_pmap[gfn] != KVM_INVALID_PAGE) return 0; @@ -262,7 +262,7 @@ int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr, struct kvm_vcpu *vcpu) { gfn_t gfn; - pfn_t pfn0, pfn1; + kvm_pfn_t pfn0, pfn1; unsigned long vaddr = 0; unsigned long entryhi = 0, entrylo0 = 0, entrylo1 = 0; int even; @@ -313,7 +313,7 @@ EXPORT_SYMBOL(kvm_mips_handle_kseg0_tlb_fault); int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr, struct kvm_vcpu *vcpu) { - pfn_t pfn0, pfn1; + kvm_pfn_t pfn0, pfn1; unsigned long flags, old_entryhi = 0, vaddr = 0; unsigned long entrylo0 = 0, entrylo1 = 0; @@ -360,7 +360,7 @@ int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu, { unsigned long entryhi = 0, entrylo0 = 0, entrylo1 = 0; struct kvm *kvm = vcpu->kvm; - pfn_t pfn0, pfn1; + kvm_pfn_t pfn0, pfn1; if ((tlb->tlb_hi & VPN2_MASK) == 0) { pfn0 = 0; diff --git a/arch/mips/lasat/picvue_proc.c b/arch/mips/lasat/picvue_proc.c index 2bcd8391bc93..b42095880667 100644 --- a/arch/mips/lasat/picvue_proc.c +++ b/arch/mips/lasat/picvue_proc.c @@ -22,7 +22,6 @@ static DEFINE_MUTEX(pvc_mutex); static char pvc_lines[PVC_NLINES][PVC_LINELEN+1]; static int pvc_linedata[PVC_NLINES]; -static struct proc_dir_entry *pvc_display_dir; static char *pvc_linename[PVC_NLINES] = {"line1", "line2"}; #define DISPLAY_DIR_NAME "display" static int scroll_dir, scroll_interval; @@ -169,22 +168,17 @@ void pvc_proc_timerfunc(unsigned long data) static void pvc_proc_cleanup(void) { - int i; - for (i = 0; i < PVC_NLINES; i++) - remove_proc_entry(pvc_linename[i], pvc_display_dir); - remove_proc_entry("scroll", pvc_display_dir); - remove_proc_entry(DISPLAY_DIR_NAME, NULL); - + remove_proc_subtree(DISPLAY_DIR_NAME, NULL); del_timer_sync(&timer); } static int __init pvc_proc_init(void) { - struct proc_dir_entry *proc_entry; + struct proc_dir_entry *dir, *proc_entry; int i; - pvc_display_dir = proc_mkdir(DISPLAY_DIR_NAME, NULL); - if (pvc_display_dir == NULL) + dir = proc_mkdir(DISPLAY_DIR_NAME, NULL); + if (dir == NULL) goto error; for (i = 0; i < PVC_NLINES; i++) { @@ -192,12 +186,12 @@ static int __init pvc_proc_init(void) pvc_linedata[i] = i; } for (i = 0; i < PVC_NLINES; i++) { - proc_entry = proc_create_data(pvc_linename[i], 0644, pvc_display_dir, + proc_entry = proc_create_data(pvc_linename[i], 0644, dir, &pvc_line_proc_fops, &pvc_linedata[i]); if (proc_entry == NULL) goto error; } - proc_entry = proc_create("scroll", 0644, pvc_display_dir, + proc_entry = proc_create("scroll", 0644, dir, &pvc_scroll_proc_fops); if (proc_entry == NULL) goto error; diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index 5d3a25e1cfae..caac3d747a90 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -587,7 +587,8 @@ static inline void local_r4k_flush_cache_page(void *args) * another ASID than the current one. */ map_coherent = (cpu_has_dc_aliases && - page_mapped(page) && !Page_dcache_dirty(page)); + page_mapcount(page) && + !Page_dcache_dirty(page)); if (map_coherent) vaddr = kmap_coherent(page, addr); else diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c index aab218c36e0d..3f159caf6dbc 100644 --- a/arch/mips/mm/cache.c +++ b/arch/mips/mm/cache.c @@ -106,7 +106,7 @@ void __flush_anon_page(struct page *page, unsigned long vmaddr) unsigned long addr = (unsigned long) page_address(page); if (pages_do_alias(addr, vmaddr)) { - if (page_mapped(page) && !Page_dcache_dirty(page)) { + if (page_mapcount(page) && !Page_dcache_dirty(page)) { void *kaddr; kaddr = kmap_coherent(page, vmaddr); diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c index 349995d19c7f..1afd87c999b0 100644 --- a/arch/mips/mm/gup.c +++ b/arch/mips/mm/gup.c @@ -87,8 +87,6 @@ static int gup_huge_pmd(pmd_t pmd, unsigned long addr, unsigned long end, do { VM_BUG_ON(compound_head(page) != head); pages[*nr] = page; - if (PageTail(page)) - get_huge_page_tail(page); (*nr)++; page++; refs++; @@ -109,18 +107,7 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, pmd_t pmd = *pmdp; next = pmd_addr_end(addr, end); - /* - * The pmd_trans_splitting() check below explains why - * pmdp_splitting_flush has to flush the tlb, to stop - * this gup-fast code from running while we set the - * splitting bit in the pmd. Returning zero will take - * the slow path that will call wait_split_huge_page() - * if the pmd is still in splitting state. gup-fast - * can't because it has irq disabled and - * wait_split_huge_page() would never return as the - * tlb flush IPI wouldn't run. - */ - if (pmd_none(pmd) || pmd_trans_splitting(pmd)) + if (pmd_none(pmd)) return 0; if (unlikely(pmd_huge(pmd))) { if (!gup_huge_pmd(pmd, addr, next, write, pages,nr)) @@ -153,8 +140,6 @@ static int gup_huge_pud(pud_t pud, unsigned long addr, unsigned long end, do { VM_BUG_ON(compound_head(page) != head); pages[*nr] = page; - if (PageTail(page)) - get_huge_page_tail(page); (*nr)++; page++; refs++; diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 8770e619185e..7e5fa0938c21 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -165,7 +165,7 @@ void copy_user_highpage(struct page *to, struct page *from, vto = kmap_atomic(to); if (cpu_has_dc_aliases && - page_mapped(from) && !Page_dcache_dirty(from)) { + page_mapcount(from) && !Page_dcache_dirty(from)) { vfrom = kmap_coherent(from, vaddr); copy_page(vto, vfrom); kunmap_coherent(); @@ -187,7 +187,7 @@ void copy_to_user_page(struct vm_area_struct *vma, unsigned long len) { if (cpu_has_dc_aliases && - page_mapped(page) && !Page_dcache_dirty(page)) { + page_mapcount(page) && !Page_dcache_dirty(page)) { void *vto = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK); memcpy(vto, src, len); kunmap_coherent(); @@ -205,7 +205,7 @@ void copy_from_user_page(struct vm_area_struct *vma, unsigned long len) { if (cpu_has_dc_aliases && - page_mapped(page) && !Page_dcache_dirty(page)) { + page_mapcount(page) && !Page_dcache_dirty(page)) { void *vfrom = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK); memcpy(dst, vfrom, len); kunmap_coherent(); diff --git a/arch/mips/mm/pgtable-64.c b/arch/mips/mm/pgtable-64.c index e8adc0069d66..ce4473e7c0d2 100644 --- a/arch/mips/mm/pgtable-64.c +++ b/arch/mips/mm/pgtable-64.c @@ -62,20 +62,6 @@ void pmd_init(unsigned long addr, unsigned long pagetable) } #endif -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - -void pmdp_splitting_flush(struct vm_area_struct *vma, - unsigned long address, - pmd_t *pmdp) -{ - if (!pmd_trans_splitting(*pmdp)) { - pmd_t pmd = pmd_mksplitting(*pmdp); - set_pmd_at(vma->vm_mm, address, pmdp, pmd); - } -} - -#endif - pmd_t mk_pmd(struct page *page, pgprot_t prot) { pmd_t pmd; diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 32e0be27673f..482192cc8f2b 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -240,7 +240,6 @@ static void output_pgtable_bits_defines(void) pr_define("_PAGE_MODIFIED_SHIFT %d\n", _PAGE_MODIFIED_SHIFT); #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT pr_define("_PAGE_HUGE_SHIFT %d\n", _PAGE_HUGE_SHIFT); - pr_define("_PAGE_SPLITTING_SHIFT %d\n", _PAGE_SPLITTING_SHIFT); #endif #ifdef CONFIG_CPU_MIPSR2 if (cpu_has_rixi) { diff --git a/arch/mips/pnx833x/common/platform.c b/arch/mips/pnx833x/common/platform.c index b4b774bc3178..3cd357737a26 100644 --- a/arch/mips/pnx833x/common/platform.c +++ b/arch/mips/pnx833x/common/platform.c @@ -180,7 +180,7 @@ static struct platform_device pnx833x_sata_device = { static void pnx833x_flash_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl) { - struct nand_chip *this = mtd->priv; + struct nand_chip *this = mtd_to_nand(mtd); unsigned long nandaddr = (unsigned long)this->IO_ADDR_W; if (cmd == NAND_CMD_NONE) diff --git a/arch/mips/rb532/devices.c b/arch/mips/rb532/devices.c index 9bd7a2de0765..0966adccf520 100644 --- a/arch/mips/rb532/devices.c +++ b/arch/mips/rb532/devices.c @@ -148,7 +148,7 @@ static int rb532_dev_ready(struct mtd_info *mtd) static void rb532_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl) { - struct nand_chip *chip = mtd->priv; + struct nand_chip *chip = mtd_to_nand(mtd); unsigned char orbits, nandbits; if (ctrl & NAND_CTRL_CHANGE) { diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile index 14568900fc1d..ee3617c0c5e2 100644 --- a/arch/mips/vdso/Makefile +++ b/arch/mips/vdso/Makefile @@ -26,7 +26,7 @@ aflags-vdso := $(ccflags-vdso) \ # the comments on that file. # ifndef CONFIG_CPU_MIPSR6 - ifeq ($(call ld-ifversion, -lt, 22500000, y),y) + ifeq ($(call ld-ifversion, -lt, 225000000, y),y) $(warning MIPS VDSO requires binutils >= 2.25) obj-vdso-y := $(filter-out gettimeofday.o, $(obj-vdso-y)) ccflags-vdso += -DDISABLE_MIPS_VDSO diff --git a/arch/mn10300/include/asm/page.h b/arch/mn10300/include/asm/page.h index 8288e124165b..3810a6f740fd 100644 --- a/arch/mn10300/include/asm/page.h +++ b/arch/mn10300/include/asm/page.h @@ -107,6 +107,7 @@ static inline int get_order(unsigned long size) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) #define pfn_to_page(pfn) (mem_map + ((pfn) - __pfn_disp)) #define page_to_pfn(page) ((unsigned long)((page) - mem_map) + __pfn_disp) +#define __pfn_to_phys(pfn) PFN_PHYS(pfn) #define pfn_valid(pfn) \ ({ \ diff --git a/arch/mn10300/include/asm/uaccess.h b/arch/mn10300/include/asm/uaccess.h index 537278746a15..20f7bf6de384 100644 --- a/arch/mn10300/include/asm/uaccess.h +++ b/arch/mn10300/include/asm/uaccess.h @@ -110,21 +110,6 @@ extern int fixup_exception(struct pt_regs *regs); #define __put_user(x, ptr) __put_user_nocheck((x), (ptr), sizeof(*(ptr))) #define __get_user(x, ptr) __get_user_nocheck((x), (ptr), sizeof(*(ptr))) -/* - * The "xxx_ret" versions return constant specified in third argument, if - * something bad happens. These macros can be optimized for the - * case of just returning from the function xxx_ret is used. - */ - -#define put_user_ret(x, ptr, ret) \ - ({ if (put_user((x), (ptr))) return (ret); }) -#define get_user_ret(x, ptr, ret) \ - ({ if (get_user((x), (ptr))) return (ret); }) -#define __put_user_ret(x, ptr, ret) \ - ({ if (__put_user((x), (ptr))) return (ret); }) -#define __get_user_ret(x, ptr, ret) \ - ({ if (__get_user((x), (ptr))) return (ret); }) - struct __large_struct { unsigned long buf[100]; }; #define __m(x) (*(struct __large_struct *)(x)) diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h index cab7d6d50051..58b1aa01ab9f 100644 --- a/arch/mn10300/include/uapi/asm/socket.h +++ b/arch/mn10300/include/uapi/asm/socket.h @@ -85,4 +85,7 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 729f89163bc3..7c34cafdf301 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -79,9 +79,6 @@ config TIME_LOW_RES depends on SMP default y -config HAVE_LATENCYTOP_SUPPORT - def_bool y - # unless you want to implement ACPI on PA-RISC ... ;-) config PM bool diff --git a/arch/parisc/include/asm/hugetlb.h b/arch/parisc/include/asm/hugetlb.h index 7d56a9ccb752..a65d888716c4 100644 --- a/arch/parisc/include/asm/hugetlb.h +++ b/arch/parisc/include/asm/hugetlb.h @@ -54,24 +54,12 @@ static inline pte_t huge_pte_wrprotect(pte_t pte) return pte_wrprotect(pte); } -static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) -{ - pte_t old_pte = *ptep; - set_huge_pte_at(mm, addr, ptep, pte_wrprotect(old_pte)); -} +void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep); -static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, +int huge_ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, - pte_t pte, int dirty) -{ - int changed = !pte_same(*ptep, pte); - if (changed) { - set_huge_pte_at(vma->vm_mm, addr, ptep, pte); - flush_tlb_page(vma, addr); - } - return changed; -} + pte_t pte, int dirty); static inline pte_t huge_ptep_get(pte_t *ptep) { diff --git a/arch/parisc/include/asm/pci.h b/arch/parisc/include/asm/pci.h index 71889ea72740..89c53bfff055 100644 --- a/arch/parisc/include/asm/pci.h +++ b/arch/parisc/include/asm/pci.h @@ -167,6 +167,7 @@ static inline void pcibios_register_hba(struct pci_hba_data *x) { } #endif +extern void pcibios_init_bridge(struct pci_dev *); /* * pcibios_assign_all_busses() is used in drivers/pci/pci.c:pci_do_scan_bus() diff --git a/arch/parisc/include/asm/pdc.h b/arch/parisc/include/asm/pdc.h index 7eb616e4bf8a..451906d78136 100644 --- a/arch/parisc/include/asm/pdc.h +++ b/arch/parisc/include/asm/pdc.h @@ -63,7 +63,7 @@ struct pdc_tlb_cf { /* for PDC_CACHE (I/D-TLB's) */ tc_page : 1, /* 0 = 2K page-size-machine, 1 = 4k page size */ tc_cst : 3, /* 0 = incoherent operations, else coherent operations */ tc_aid : 5, /* ITLB: width of access ids of processor (encoded!) */ - tc_pad1 : 8; /* ITLB: width of space-registers (encoded) */ + tc_sr : 8; /* ITLB: width of space-registers (encoded) */ }; struct pdc_cache_info { /* main-PDC_CACHE-structure (caches & TLB's) */ diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h index 7e759ecb1343..2e674e13e005 100644 --- a/arch/parisc/include/asm/processor.h +++ b/arch/parisc/include/asm/processor.h @@ -311,18 +311,17 @@ extern unsigned long get_wchan(struct task_struct *p); #define cpu_relax() barrier() #define cpu_relax_lowlatency() cpu_relax() -/* Used as a macro to identify the combined VIPT/PIPT cached - * CPUs which require a guarantee of coherency (no inequivalent - * aliases with different data, whether clean or not) to operate */ -static inline int parisc_requires_coherency(void) -{ +/* + * parisc_requires_coherency() is used to identify the combined VIPT/PIPT + * cached CPUs which require a guarantee of coherency (no inequivalent aliases + * with different data, whether clean or not) to operate + */ #ifdef CONFIG_PA8X00 - return (boot_cpu_data.cpu_type == mako) || - (boot_cpu_data.cpu_type == mako2); +extern int _parisc_requires_coherency; +#define parisc_requires_coherency() _parisc_requires_coherency #else - return 0; +#define parisc_requires_coherency() (0) #endif -} #endif /* __ASSEMBLY__ */ diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h index dd4d1876a020..cf830d465f75 100644 --- a/arch/parisc/include/uapi/asm/mman.h +++ b/arch/parisc/include/uapi/asm/mman.h @@ -43,8 +43,10 @@ #define MADV_SPACEAVAIL 5 /* insure that resources are reserved */ #define MADV_VPS_PURGE 6 /* Purge pages from VM page cache */ #define MADV_VPS_INHERIT 7 /* Inherit parents page size */ +#define MADV_FREE 8 /* free pages only if memory pressure */ /* common/generic parameters */ +#define MADV_FREE 8 /* free pages only if memory pressure */ #define MADV_REMOVE 9 /* remove these pages & resources */ #define MADV_DONTFORK 10 /* don't inherit across fork */ #define MADV_DOFORK 11 /* do inherit across fork */ diff --git a/arch/parisc/include/uapi/asm/siginfo.h b/arch/parisc/include/uapi/asm/siginfo.h index d7034728f377..8fd10f85c50e 100644 --- a/arch/parisc/include/uapi/asm/siginfo.h +++ b/arch/parisc/include/uapi/asm/siginfo.h @@ -1,9 +1,10 @@ #ifndef _PARISC_SIGINFO_H #define _PARISC_SIGINFO_H -#include <asm-generic/siginfo.h> +#if defined(__LP64__) +#define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int)) +#endif -#undef NSIGTRAP -#define NSIGTRAP 4 +#include <asm-generic/siginfo.h> #endif diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index a5cd40cd8ee1..f9cf1223422c 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -84,4 +84,7 @@ #define SO_ATTACH_BPF 0x402B #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 0x402C +#define SO_ATTACH_REUSEPORT_EBPF 0x402D + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index cda6dbbe9842..91c2a39cd5aa 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -172,6 +172,24 @@ parisc_cache_init(void) cache_info.ic_count, cache_info.ic_loop); + printk("IT base 0x%lx stride 0x%lx count 0x%lx loop 0x%lx off_base 0x%lx off_stride 0x%lx off_count 0x%lx\n", + cache_info.it_sp_base, + cache_info.it_sp_stride, + cache_info.it_sp_count, + cache_info.it_loop, + cache_info.it_off_base, + cache_info.it_off_stride, + cache_info.it_off_count); + + printk("DT base 0x%lx stride 0x%lx count 0x%lx loop 0x%lx off_base 0x%lx off_stride 0x%lx off_count 0x%lx\n", + cache_info.dt_sp_base, + cache_info.dt_sp_stride, + cache_info.dt_sp_count, + cache_info.dt_loop, + cache_info.dt_off_base, + cache_info.dt_off_stride, + cache_info.dt_off_count); + printk("ic_conf = 0x%lx alias %d blk %d line %d shift %d\n", *(unsigned long *) (&cache_info.ic_conf), cache_info.ic_conf.cc_alias, @@ -184,19 +202,19 @@ parisc_cache_init(void) cache_info.ic_conf.cc_cst, cache_info.ic_conf.cc_hv); - printk("D-TLB conf: sh %d page %d cst %d aid %d pad1 %d\n", + printk("D-TLB conf: sh %d page %d cst %d aid %d sr %d\n", cache_info.dt_conf.tc_sh, cache_info.dt_conf.tc_page, cache_info.dt_conf.tc_cst, cache_info.dt_conf.tc_aid, - cache_info.dt_conf.tc_pad1); + cache_info.dt_conf.tc_sr); - printk("I-TLB conf: sh %d page %d cst %d aid %d pad1 %d\n", + printk("I-TLB conf: sh %d page %d cst %d aid %d sr %d\n", cache_info.it_conf.tc_sh, cache_info.it_conf.tc_page, cache_info.it_conf.tc_cst, cache_info.it_conf.tc_aid, - cache_info.it_conf.tc_pad1); + cache_info.it_conf.tc_sr); #endif split_tlb = 0; diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index 3c63a820fcda..b9d75d9fa9ac 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c @@ -42,9 +42,9 @@ * We are not doing SEGREL32 handling correctly. According to the ABI, we * should do a value offset, like this: * if (in_init(me, (void *)val)) - * val -= (uint32_t)me->module_init; + * val -= (uint32_t)me->init_layout.base; * else - * val -= (uint32_t)me->module_core; + * val -= (uint32_t)me->core_layout.base; * However, SEGREL32 is used only for PARISC unwind entries, and we want * those entries to have an absolute address, and not just an offset. * @@ -100,14 +100,14 @@ * or init pieces the location is */ static inline int in_init(struct module *me, void *loc) { - return (loc >= me->module_init && - loc <= (me->module_init + me->init_size)); + return (loc >= me->init_layout.base && + loc <= (me->init_layout.base + me->init_layout.size)); } static inline int in_core(struct module *me, void *loc) { - return (loc >= me->module_core && - loc <= (me->module_core + me->core_size)); + return (loc >= me->core_layout.base && + loc <= (me->core_layout.base + me->core_layout.size)); } static inline int in_local(struct module *me, void *loc) @@ -367,13 +367,13 @@ int module_frob_arch_sections(CONST Elf_Ehdr *hdr, } /* align things a bit */ - me->core_size = ALIGN(me->core_size, 16); - me->arch.got_offset = me->core_size; - me->core_size += gots * sizeof(struct got_entry); + me->core_layout.size = ALIGN(me->core_layout.size, 16); + me->arch.got_offset = me->core_layout.size; + me->core_layout.size += gots * sizeof(struct got_entry); - me->core_size = ALIGN(me->core_size, 16); - me->arch.fdesc_offset = me->core_size; - me->core_size += fdescs * sizeof(Elf_Fdesc); + me->core_layout.size = ALIGN(me->core_layout.size, 16); + me->arch.fdesc_offset = me->core_layout.size; + me->core_layout.size += fdescs * sizeof(Elf_Fdesc); me->arch.got_max = gots; me->arch.fdesc_max = fdescs; @@ -391,7 +391,7 @@ static Elf64_Word get_got(struct module *me, unsigned long value, long addend) BUG_ON(value == 0); - got = me->module_core + me->arch.got_offset; + got = me->core_layout.base + me->arch.got_offset; for (i = 0; got[i].addr; i++) if (got[i].addr == value) goto out; @@ -409,7 +409,7 @@ static Elf64_Word get_got(struct module *me, unsigned long value, long addend) #ifdef CONFIG_64BIT static Elf_Addr get_fdesc(struct module *me, unsigned long value) { - Elf_Fdesc *fdesc = me->module_core + me->arch.fdesc_offset; + Elf_Fdesc *fdesc = me->core_layout.base + me->arch.fdesc_offset; if (!value) { printk(KERN_ERR "%s: zero OPD requested!\n", me->name); @@ -427,7 +427,7 @@ static Elf_Addr get_fdesc(struct module *me, unsigned long value) /* Create new one */ fdesc->addr = value; - fdesc->gp = (Elf_Addr)me->module_core + me->arch.got_offset; + fdesc->gp = (Elf_Addr)me->core_layout.base + me->arch.got_offset; return (Elf_Addr)fdesc; } #endif /* CONFIG_64BIT */ @@ -839,7 +839,7 @@ register_unwind_table(struct module *me, table = (unsigned char *)sechdrs[me->arch.unwind_section].sh_addr; end = table + sechdrs[me->arch.unwind_section].sh_size; - gp = (Elf_Addr)me->module_core + me->arch.got_offset; + gp = (Elf_Addr)me->core_layout.base + me->arch.got_offset; DEBUGP("register_unwind_table(), sect = %d at 0x%p - 0x%p (gp=0x%lx)\n", me->arch.unwind_section, table, end, gp); diff --git a/arch/parisc/kernel/pci.c b/arch/parisc/kernel/pci.c index c99f3dde455c..0903c6abd7a4 100644 --- a/arch/parisc/kernel/pci.c +++ b/arch/parisc/kernel/pci.c @@ -170,6 +170,32 @@ void pcibios_set_master(struct pci_dev *dev) (0x80 << 8) | pci_cache_line_size); } +/* + * pcibios_init_bridge() initializes cache line and default latency + * for pci controllers and pci-pci bridges + */ +void __init pcibios_init_bridge(struct pci_dev *dev) +{ + unsigned short bridge_ctl, bridge_ctl_new; + + /* We deal only with pci controllers and pci-pci bridges. */ + if (!dev || (dev->class >> 8) != PCI_CLASS_BRIDGE_PCI) + return; + + /* PCI-PCI bridge - set the cache line and default latency + * (32) for primary and secondary buses. + */ + pci_write_config_byte(dev, PCI_SEC_LATENCY_TIMER, 32); + + pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &bridge_ctl); + + bridge_ctl_new = bridge_ctl | PCI_BRIDGE_CTL_PARITY | + PCI_BRIDGE_CTL_SERR | PCI_BRIDGE_CTL_MASTER_ABORT; + dev_info(&dev->dev, "Changing bridge control from 0x%08x to 0x%08x\n", + bridge_ctl, bridge_ctl_new); + + pci_write_config_word(dev, PCI_BRIDGE_CONTROL, bridge_ctl_new); +} /* * pcibios align resources() is called every time generic PCI code diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c index b68d977ce30f..e81ccf1716e9 100644 --- a/arch/parisc/kernel/processor.c +++ b/arch/parisc/kernel/processor.c @@ -44,6 +44,10 @@ struct system_cpuinfo_parisc boot_cpu_data __read_mostly; EXPORT_SYMBOL(boot_cpu_data); +#ifdef CONFIG_PA8X00 +int _parisc_requires_coherency __read_mostly; +EXPORT_SYMBOL(_parisc_requires_coherency); +#endif DEFINE_PER_CPU(struct cpuinfo_parisc, cpu_data); @@ -277,8 +281,12 @@ void __init collect_boot_cpu_data(void) boot_cpu_data.cpu_type = parisc_get_cpu_type(boot_cpu_data.hversion); boot_cpu_data.cpu_name = cpu_name_version[boot_cpu_data.cpu_type][0]; boot_cpu_data.family_name = cpu_name_version[boot_cpu_data.cpu_type][1]; -} +#ifdef CONFIG_PA8X00 + _parisc_requires_coherency = (boot_cpu_data.cpu_type == mako) || + (boot_cpu_data.cpu_type == mako2); +#endif +} /** diff --git a/arch/parisc/mm/hugetlbpage.c b/arch/parisc/mm/hugetlbpage.c index f6fdc77a72bd..54ba39262b82 100644 --- a/arch/parisc/mm/hugetlbpage.c +++ b/arch/parisc/mm/hugetlbpage.c @@ -105,15 +105,13 @@ static inline void purge_tlb_entries_huge(struct mm_struct *mm, unsigned long ad addr |= _HUGE_PAGE_SIZE_ENCODING_DEFAULT; for (i = 0; i < (1 << (HPAGE_SHIFT-REAL_HPAGE_SHIFT)); i++) { - mtsp(mm->context, 1); - pdtlb(addr); - if (unlikely(split_tlb)) - pitlb(addr); + purge_tlb_entries(mm, addr); addr += (1UL << REAL_HPAGE_SHIFT); } } -void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, +/* __set_huge_pte_at() must be called holding the pa_tlb_lock. */ +static void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t entry) { unsigned long addr_start; @@ -123,14 +121,9 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, addr_start = addr; for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { - /* Directly write pte entry. We could call set_pte_at(mm, addr, ptep, entry) - * instead, but then we get double locking on pa_tlb_lock. */ - *ptep = entry; + set_pte(ptep, entry); ptep++; - /* Drop the PAGE_SIZE/non-huge tlb entry */ - purge_tlb_entries(mm, addr); - addr += PAGE_SIZE; pte_val(entry) += PAGE_SIZE; } @@ -138,18 +131,61 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, purge_tlb_entries_huge(mm, addr_start); } +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t entry) +{ + unsigned long flags; + + purge_tlb_start(flags); + __set_huge_pte_at(mm, addr, ptep, entry); + purge_tlb_end(flags); +} + pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { + unsigned long flags; pte_t entry; + purge_tlb_start(flags); entry = *ptep; - set_huge_pte_at(mm, addr, ptep, __pte(0)); + __set_huge_pte_at(mm, addr, ptep, __pte(0)); + purge_tlb_end(flags); return entry; } + +void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + unsigned long flags; + pte_t old_pte; + + purge_tlb_start(flags); + old_pte = *ptep; + __set_huge_pte_at(mm, addr, ptep, pte_wrprotect(old_pte)); + purge_tlb_end(flags); +} + +int huge_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t pte, int dirty) +{ + unsigned long flags; + int changed; + + purge_tlb_start(flags); + changed = !pte_same(*ptep, pte); + if (changed) { + __set_huge_pte_at(vma->vm_mm, addr, ptep, pte); + } + purge_tlb_end(flags); + return changed; +} + + int pmd_huge(pmd_t pmd) { return 0; diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index db49e0d796b1..94f6c5089e0c 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -47,9 +47,6 @@ config STACKTRACE_SUPPORT bool default y -config HAVE_LATENCYTOP_SUPPORT - def_bool y - config TRACE_IRQFLAGS_SUPPORT bool default y @@ -159,6 +156,7 @@ config PPC select EDAC_SUPPORT select EDAC_ATOMIC_SCRUB select ARCH_HAS_DMA_SET_COHERENT_MASK + select ARCH_HAS_DEVMEM_IS_ALLOWED select HAVE_ARCH_SECCOMP_FILTER config GENERIC_CSUM @@ -559,6 +557,7 @@ choice config PPC_4K_PAGES bool "4k page size" + select HAVE_ARCH_SOFT_DIRTY if CHECKPOINT_RESTORE && PPC_BOOK3S config PPC_16K_PAGES bool "16k page size" @@ -567,6 +566,7 @@ config PPC_16K_PAGES config PPC_64K_PAGES bool "64k page size" depends on !PPC_FSL_BOOK3E && (44x || PPC_STD_MMU_64 || PPC_BOOK3E_64) + select HAVE_ARCH_SOFT_DIRTY if CHECKPOINT_RESTORE && PPC_BOOK3S config PPC_256K_PAGES bool "256k page size" @@ -1074,8 +1074,6 @@ source "drivers/Kconfig" source "fs/Kconfig" -source "arch/powerpc/sysdev/qe_lib/Kconfig" - source "lib/Kconfig" source "arch/powerpc/Kconfig.debug" diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 3a510f4a6b68..638f9ce740f5 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -64,17 +64,17 @@ config PPC_EMULATED_STATS emulated. config CODE_PATCHING_SELFTEST - bool "Run self-tests of the code-patching code." + bool "Run self-tests of the code-patching code" depends on DEBUG_KERNEL default n config FTR_FIXUP_SELFTEST - bool "Run self-tests of the feature-fixup code." + bool "Run self-tests of the feature-fixup code" depends on DEBUG_KERNEL default n config MSI_BITMAP_SELFTEST - bool "Run self-tests of the MSI bitmap code." + bool "Run self-tests of the MSI bitmap code" depends on DEBUG_KERNEL default n @@ -335,18 +335,6 @@ config PPC_EARLY_DEBUG_CPM_ADDR platform probing is done, all platforms selected must share the same address. -config STRICT_DEVMEM - def_bool y - prompt "Filter access to /dev/mem" - help - This option restricts access to /dev/mem. If this option is - disabled, you allow userspace access to all memory, including - kernel and userspace memory. Accidental memory access is likely - to be disastrous. - Memory access is required for experts who want to debug the kernel. - - If you are unsure, say Y. - config FAIL_IOMMU bool "Fault-injection capability for IOMMU" depends on FAULT_INJECTION diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 99e4487248ff..61165101342c 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -113,7 +113,6 @@ src-plat-$(CONFIG_EPAPR_BOOT) += epapr.c epapr-wrapper.c src-plat-$(CONFIG_PPC_PSERIES) += pseries-head.S src-plat-$(CONFIG_PPC_POWERNV) += pseries-head.S src-plat-$(CONFIG_PPC_IBM_CELL_BLADE) += pseries-head.S -src-plat-$(CONFIG_PPC_CELL_QPACE) += pseries-head.S src-wlib := $(sort $(src-wlib-y)) src-plat := $(sort $(src-plat-y)) @@ -217,7 +216,6 @@ image-$(CONFIG_PPC_POWERNV) += zImage.pseries image-$(CONFIG_PPC_MAPLE) += zImage.maple image-$(CONFIG_PPC_IBM_CELL_BLADE) += zImage.pseries image-$(CONFIG_PPC_PS3) += dtbImage.ps3 -image-$(CONFIG_PPC_CELL_QPACE) += zImage.pseries image-$(CONFIG_PPC_CHRP) += zImage.chrp image-$(CONFIG_PPC_EFIKA) += zImage.chrp image-$(CONFIG_PPC_PMAC) += zImage.pmac diff --git a/arch/powerpc/boot/dts/fsl/b4si-post.dtsi b/arch/powerpc/boot/dts/fsl/b4si-post.dtsi index 74866ac52f39..1b33f5157c8a 100644 --- a/arch/powerpc/boot/dts/fsl/b4si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/b4si-post.dtsi @@ -474,6 +474,11 @@ fman@400000 { interrupts = <96 2 0 0>, <16 2 1 30>; + muram@0 { + compatible = "fsl,fman-muram"; + reg = <0x0 0x80000>; + }; + enet0: ethernet@e0000 { }; diff --git a/arch/powerpc/boot/dts/fsl/bsc9132qds.dts b/arch/powerpc/boot/dts/fsl/bsc9132qds.dts index 70882ade606d..56e6f1337e96 100644 --- a/arch/powerpc/boot/dts/fsl/bsc9132qds.dts +++ b/arch/powerpc/boot/dts/fsl/bsc9132qds.dts @@ -29,6 +29,21 @@ soc: soc@ff700000 { ranges = <0x0 0x0 0xff700000 0x100000>; }; + + pci0: pcie@ff70a000 { + reg = <0 0xff70a000 0 0x1000>; + ranges = <0x2000000 0x0 0x90000000 0 0x90000000 0x0 0x20000000 + 0x1000000 0x0 0x00000000 0 0xc0010000 0x0 0x10000>; + pcie@0 { + ranges = <0x2000000 0x0 0x90000000 + 0x2000000 0x0 0x90000000 + 0x0 0x20000000 + + 0x1000000 0x0 0x0 + 0x1000000 0x0 0x0 + 0x0 0x100000>; + }; + }; }; /include/ "bsc9132qds.dtsi" diff --git a/arch/powerpc/boot/dts/fsl/bsc9132si-post.dtsi b/arch/powerpc/boot/dts/fsl/bsc9132si-post.dtsi index c72307198140..b5f071574e83 100644 --- a/arch/powerpc/boot/dts/fsl/bsc9132si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/bsc9132si-post.dtsi @@ -40,6 +40,34 @@ interrupts = <16 2 0 0 20 2 0 0>; }; +/* controller at 0xa000 */ +&pci0 { + compatible = "fsl,bsc9132-pcie", "fsl,qoriq-pcie-v2.2"; + device_type = "pci"; + #size-cells = <2>; + #address-cells = <3>; + bus-range = <0 255>; + interrupts = <16 2 0 0>; + + pcie@0 { + reg = <0 0 0 0 0>; + #interrupt-cells = <1>; + #size-cells = <2>; + #address-cells = <3>; + device_type = "pci"; + interrupts = <16 2 0 0>; + interrupt-map-mask = <0xf800 0 0 7>; + + interrupt-map = < + /* IDSEL 0x0 */ + 0000 0x0 0x0 0x1 &mpic 0x0 0x2 0x0 0x0 + 0000 0x0 0x0 0x2 &mpic 0x1 0x2 0x0 0x0 + 0000 0x0 0x0 0x3 &mpic 0x2 0x2 0x0 0x0 + 0000 0x0 0x0 0x4 &mpic 0x3 0x2 0x0 0x0 + >; + }; +}; + &soc { #address-cells = <1>; #size-cells = <1>; diff --git a/arch/powerpc/boot/dts/fsl/bsc9132si-pre.dtsi b/arch/powerpc/boot/dts/fsl/bsc9132si-pre.dtsi index 301a9dba5790..90f7949fe312 100644 --- a/arch/powerpc/boot/dts/fsl/bsc9132si-pre.dtsi +++ b/arch/powerpc/boot/dts/fsl/bsc9132si-pre.dtsi @@ -45,6 +45,7 @@ serial0 = &serial0; ethernet0 = &enet0; ethernet1 = &enet1; + pci0 = &pci0; }; cpus { diff --git a/arch/powerpc/boot/dts/fsl/p1010rdb.dtsi b/arch/powerpc/boot/dts/fsl/p1010rdb.dtsi index 0f0ced69835a..14b629505038 100644 --- a/arch/powerpc/boot/dts/fsl/p1010rdb.dtsi +++ b/arch/powerpc/boot/dts/fsl/p1010rdb.dtsi @@ -215,3 +215,19 @@ phy-connection-type = "sgmii"; }; }; + +&pci0 { + pcie@0 { + interrupt-map = < + /* IDSEL 0x0 */ + /* + *irq[4:5] are active-high + *irq[6:7] are active-low + */ + 0000 0x0 0x0 0x1 &mpic 0x4 0x2 0x0 0x0 + 0000 0x0 0x0 0x2 &mpic 0x5 0x2 0x0 0x0 + 0000 0x0 0x0 0x3 &mpic 0x6 0x1 0x0 0x0 + 0000 0x0 0x0 0x4 &mpic 0x7 0x1 0x0 0x0 + >; + }; +}; diff --git a/arch/powerpc/boot/dts/fsl/t1023rdb.dts b/arch/powerpc/boot/dts/fsl/t1023rdb.dts index 2b2fff4a12a2..6bd842beb1dc 100644 --- a/arch/powerpc/boot/dts/fsl/t1023rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t1023rdb.dts @@ -159,4 +159,4 @@ }; }; -/include/ "t1023si-post.dtsi" +#include "t1023si-post.dtsi" diff --git a/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi index 518ddaa8da2d..99e421df79d4 100644 --- a/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi @@ -32,6 +32,8 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include <dt-bindings/thermal/thermal.h> + &ifc { #address-cells = <2>; #size-cells = <1>; @@ -275,6 +277,90 @@ reg = <0xea000 0x4000>; }; + tmu: tmu@f0000 { + compatible = "fsl,qoriq-tmu"; + reg = <0xf0000 0x1000>; + interrupts = <18 2 0 0>; + fsl,tmu-range = <0xb0000 0xa0026 0x80048 0x30061>; + fsl,tmu-calibration = <0x00000000 0x0000000f + 0x00000001 0x00000017 + 0x00000002 0x0000001e + 0x00000003 0x00000026 + 0x00000004 0x0000002e + 0x00000005 0x00000035 + 0x00000006 0x0000003d + 0x00000007 0x00000044 + 0x00000008 0x0000004c + 0x00000009 0x00000053 + 0x0000000a 0x0000005b + 0x0000000b 0x00000064 + + 0x00010000 0x00000011 + 0x00010001 0x0000001c + 0x00010002 0x00000024 + 0x00010003 0x0000002b + 0x00010004 0x00000034 + 0x00010005 0x00000039 + 0x00010006 0x00000042 + 0x00010007 0x0000004c + 0x00010008 0x00000051 + 0x00010009 0x0000005a + 0x0001000a 0x00000063 + + 0x00020000 0x00000013 + 0x00020001 0x00000019 + 0x00020002 0x00000024 + 0x00020003 0x0000002c + 0x00020004 0x00000035 + 0x00020005 0x0000003d + 0x00020006 0x00000046 + 0x00020007 0x00000050 + 0x00020008 0x00000059 + + 0x00030000 0x00000002 + 0x00030001 0x0000000d + 0x00030002 0x00000019 + 0x00030003 0x00000024>; + #thermal-sensor-cells = <0>; + }; + + thermal-zones { + cpu_thermal: cpu-thermal { + polling-delay-passive = <1000>; + polling-delay = <5000>; + + thermal-sensors = <&tmu>; + + trips { + cpu_alert: cpu-alert { + temperature = <85000>; + hysteresis = <2000>; + type = "passive"; + }; + cpu_crit: cpu-crit { + temperature = <95000>; + hysteresis = <2000>; + type = "critical"; + }; + }; + + cooling-maps { + map0 { + trip = <&cpu_alert>; + cooling-device = + <&cpu0 THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>; + }; + map1 { + trip = <&cpu_alert>; + cooling-device = + <&cpu1 THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>; + }; + }; + }; + }; + scfg: global-utilities@fc000 { compatible = "fsl,t1023-scfg"; reg = <0xfc000 0x1000>; diff --git a/arch/powerpc/boot/dts/fsl/t1024qds.dts b/arch/powerpc/boot/dts/fsl/t1024qds.dts index 43cd5b50cd0a..6a3581b8e1f8 100644 --- a/arch/powerpc/boot/dts/fsl/t1024qds.dts +++ b/arch/powerpc/boot/dts/fsl/t1024qds.dts @@ -248,4 +248,4 @@ }; }; -/include/ "t1024si-post.dtsi" +#include "t1024si-post.dtsi" diff --git a/arch/powerpc/boot/dts/fsl/t1024rdb.dts b/arch/powerpc/boot/dts/fsl/t1024rdb.dts index 429d8c73650a..0ccc7d03335e 100644 --- a/arch/powerpc/boot/dts/fsl/t1024rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t1024rdb.dts @@ -188,4 +188,4 @@ }; }; -/include/ "t1024si-post.dtsi" +#include "t1024si-post.dtsi" diff --git a/arch/powerpc/boot/dts/fsl/t1024si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1024si-post.dtsi index 95e3af8d768e..bb480346a58d 100644 --- a/arch/powerpc/boot/dts/fsl/t1024si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t1024si-post.dtsi @@ -32,7 +32,7 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/include/ "t1023si-post.dtsi" +#include "t1023si-post.dtsi" / { aliases { diff --git a/arch/powerpc/boot/dts/fsl/t102xsi-pre.dtsi b/arch/powerpc/boot/dts/fsl/t102xsi-pre.dtsi index 3e1528abf3f4..9d08a363bab3 100644 --- a/arch/powerpc/boot/dts/fsl/t102xsi-pre.dtsi +++ b/arch/powerpc/boot/dts/fsl/t102xsi-pre.dtsi @@ -76,6 +76,7 @@ reg = <0>; clocks = <&mux0>; next-level-cache = <&L2_1>; + #cooling-cells = <2>; L2_1: l2-cache { next-level-cache = <&cpc>; }; @@ -85,6 +86,7 @@ reg = <1>; clocks = <&mux1>; next-level-cache = <&L2_2>; + #cooling-cells = <2>; L2_2: l2-cache { next-level-cache = <&cpc>; }; diff --git a/arch/powerpc/boot/dts/fsl/t1040d4rdb.dts b/arch/powerpc/boot/dts/fsl/t1040d4rdb.dts index 681746efd31d..fb6bc02ebb60 100644 --- a/arch/powerpc/boot/dts/fsl/t1040d4rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t1040d4rdb.dts @@ -43,4 +43,4 @@ interrupt-parent = <&mpic>; }; -/include/ "t1040si-post.dtsi" +#include "t1040si-post.dtsi" diff --git a/arch/powerpc/boot/dts/fsl/t1040qds.dts b/arch/powerpc/boot/dts/fsl/t1040qds.dts index 4d298659468c..5f76edc7838c 100644 --- a/arch/powerpc/boot/dts/fsl/t1040qds.dts +++ b/arch/powerpc/boot/dts/fsl/t1040qds.dts @@ -43,4 +43,4 @@ interrupt-parent = <&mpic>; }; -/include/ "t1040si-post.dtsi" +#include "t1040si-post.dtsi" diff --git a/arch/powerpc/boot/dts/fsl/t1040rdb.dts b/arch/powerpc/boot/dts/fsl/t1040rdb.dts index 8f9e65b47515..cf194154bbdc 100644 --- a/arch/powerpc/boot/dts/fsl/t1040rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t1040rdb.dts @@ -45,4 +45,4 @@ }; }; -/include/ "t1040si-post.dtsi" +#include "t1040si-post.dtsi" diff --git a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi index d30b3de1cfc5..e0f4da554774 100644 --- a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi @@ -32,6 +32,8 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include <dt-bindings/thermal/thermal.h> + &bman_fbpr { compatible = "fsl,bman-fbpr"; alloc-ranges = <0 0 0x10000 0>; @@ -484,6 +486,98 @@ reg = <0xea000 0x4000>; }; + tmu: tmu@f0000 { + compatible = "fsl,qoriq-tmu"; + reg = <0xf0000 0x1000>; + interrupts = <18 2 0 0>; + fsl,tmu-range = <0xa0000 0x90026 0x8004a 0x1006a>; + fsl,tmu-calibration = <0x00000000 0x00000025 + 0x00000001 0x00000028 + 0x00000002 0x0000002d + 0x00000003 0x00000031 + 0x00000004 0x00000036 + 0x00000005 0x0000003a + 0x00000006 0x00000040 + 0x00000007 0x00000044 + 0x00000008 0x0000004a + 0x00000009 0x0000004f + 0x0000000a 0x00000054 + + 0x00010000 0x0000000d + 0x00010001 0x00000013 + 0x00010002 0x00000019 + 0x00010003 0x0000001f + 0x00010004 0x00000025 + 0x00010005 0x0000002d + 0x00010006 0x00000033 + 0x00010007 0x00000043 + 0x00010008 0x0000004b + 0x00010009 0x00000053 + + 0x00020000 0x00000010 + 0x00020001 0x00000017 + 0x00020002 0x0000001f + 0x00020003 0x00000029 + 0x00020004 0x00000031 + 0x00020005 0x0000003c + 0x00020006 0x00000042 + 0x00020007 0x0000004d + 0x00020008 0x00000056 + + 0x00030000 0x00000012 + 0x00030001 0x0000001d>; + #thermal-sensor-cells = <0>; + }; + + thermal-zones { + cpu_thermal: cpu-thermal { + polling-delay-passive = <1000>; + polling-delay = <5000>; + + thermal-sensors = <&tmu>; + + trips { + cpu_alert: cpu-alert { + temperature = <85000>; + hysteresis = <2000>; + type = "passive"; + }; + cpu_crit: cpu-crit { + temperature = <95000>; + hysteresis = <2000>; + type = "critical"; + }; + }; + + cooling-maps { + map0 { + trip = <&cpu_alert>; + cooling-device = + <&cpu0 THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>; + }; + map1 { + trip = <&cpu_alert>; + cooling-device = + <&cpu1 THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>; + }; + map2 { + trip = <&cpu_alert>; + cooling-device = + <&cpu2 THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>; + }; + map3 { + trip = <&cpu_alert>; + cooling-device = + <&cpu3 THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>; + }; + }; + }; + }; + scfg: global-utilities@fc000 { compatible = "fsl,t1040-scfg"; reg = <0xfc000 0x1000>; diff --git a/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts b/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts index b245b31b8279..2a5a90dd272e 100644 --- a/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts @@ -50,4 +50,4 @@ }; }; -/include/ "t1040si-post.dtsi" +#include "t1042si-post.dtsi" diff --git a/arch/powerpc/boot/dts/fsl/t1042qds.dts b/arch/powerpc/boot/dts/fsl/t1042qds.dts index 4ab9bbe7c5c5..90a4a73bb905 100644 --- a/arch/powerpc/boot/dts/fsl/t1042qds.dts +++ b/arch/powerpc/boot/dts/fsl/t1042qds.dts @@ -43,4 +43,4 @@ interrupt-parent = <&mpic>; }; -/include/ "t1042si-post.dtsi" +#include "t1042si-post.dtsi" diff --git a/arch/powerpc/boot/dts/fsl/t1042rdb.dts b/arch/powerpc/boot/dts/fsl/t1042rdb.dts index 67af56bc5ee9..8d908e795e4d 100644 --- a/arch/powerpc/boot/dts/fsl/t1042rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t1042rdb.dts @@ -45,4 +45,4 @@ }; }; -/include/ "t1042si-post.dtsi" +#include "t1042si-post.dtsi" diff --git a/arch/powerpc/boot/dts/fsl/t1042rdb_pi.dts b/arch/powerpc/boot/dts/fsl/t1042rdb_pi.dts index 2f67677530a4..98c001019d6a 100644 --- a/arch/powerpc/boot/dts/fsl/t1042rdb_pi.dts +++ b/arch/powerpc/boot/dts/fsl/t1042rdb_pi.dts @@ -54,4 +54,4 @@ }; }; -/include/ "t1042si-post.dtsi" +#include "t1042si-post.dtsi" diff --git a/arch/powerpc/boot/dts/fsl/t1042si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1042si-post.dtsi index 319b74f29724..a5544f93689c 100644 --- a/arch/powerpc/boot/dts/fsl/t1042si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t1042si-post.dtsi @@ -32,6 +32,6 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/include/ "t1040si-post.dtsi" +#include "t1040si-post.dtsi" /* Place holder for ethernet related device tree nodes */ diff --git a/arch/powerpc/boot/dts/fsl/t104xsi-pre.dtsi b/arch/powerpc/boot/dts/fsl/t104xsi-pre.dtsi index fcfa38ae5e02..6db0ee8b1384 100644 --- a/arch/powerpc/boot/dts/fsl/t104xsi-pre.dtsi +++ b/arch/powerpc/boot/dts/fsl/t104xsi-pre.dtsi @@ -76,6 +76,7 @@ reg = <0>; clocks = <&mux0>; next-level-cache = <&L2_1>; + #cooling-cells = <2>; L2_1: l2-cache { next-level-cache = <&cpc>; }; @@ -85,6 +86,7 @@ reg = <1>; clocks = <&mux1>; next-level-cache = <&L2_2>; + #cooling-cells = <2>; L2_2: l2-cache { next-level-cache = <&cpc>; }; @@ -94,6 +96,7 @@ reg = <2>; clocks = <&mux2>; next-level-cache = <&L2_3>; + #cooling-cells = <2>; L2_3: l2-cache { next-level-cache = <&cpc>; }; @@ -103,6 +106,7 @@ reg = <3>; clocks = <&mux3>; next-level-cache = <&L2_4>; + #cooling-cells = <2>; L2_4: l2-cache { next-level-cache = <&cpc>; }; diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index ceaa75d5a684..6a19fcef5596 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -154,7 +154,7 @@ if [ -z "$kernel" ]; then kernel=vmlinux fi -elfformat="`${CROSS}objdump -p "$kernel" | grep 'file format' | awk '{print $4}'`" +LANG=C elfformat="`${CROSS}objdump -p "$kernel" | grep 'file format' | awk '{print $4}'`" case "$elfformat" in elf64-powerpcle) format=elf64lppc ;; elf64-powerpc) format=elf32ppc ;; diff --git a/arch/powerpc/configs/mpc85xx_basic_defconfig b/arch/powerpc/configs/mpc85xx_basic_defconfig index 850bd195d0e8..b1593fe6f70b 100644 --- a/arch/powerpc/configs/mpc85xx_basic_defconfig +++ b/arch/powerpc/configs/mpc85xx_basic_defconfig @@ -12,6 +12,7 @@ CONFIG_P1010_RDB=y CONFIG_P1022_DS=y CONFIG_P1022_RDK=y CONFIG_P1023_RDB=y +CONFIG_TWR_P102x=y CONFIG_SBC8548=y CONFIG_SOCRATES=y CONFIG_STX_GP3=y diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 2c041b535a64..b041fb607376 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -36,7 +36,6 @@ CONFIG_PS3_ROM=m CONFIG_PS3_FLASH=m CONFIG_PS3_LPM=m CONFIG_PPC_IBM_CELL_BLADE=y -CONFIG_PPC_CELL_QPACE=y CONFIG_RTAS_FLASH=m CONFIG_IBMEBUS=y CONFIG_CPU_FREQ_PMAC64=y diff --git a/arch/powerpc/crypto/aes-spe-glue.c b/arch/powerpc/crypto/aes-spe-glue.c index bd5e63f72ad4..93ee046d12cd 100644 --- a/arch/powerpc/crypto/aes-spe-glue.c +++ b/arch/powerpc/crypto/aes-spe-glue.c @@ -85,6 +85,7 @@ static void spe_begin(void) static void spe_end(void) { + disable_kernel_spe(); /* reenable preemption */ preempt_enable(); } diff --git a/arch/powerpc/crypto/sha1-spe-glue.c b/arch/powerpc/crypto/sha1-spe-glue.c index 3e1d22212521..f9ebc38d3fe7 100644 --- a/arch/powerpc/crypto/sha1-spe-glue.c +++ b/arch/powerpc/crypto/sha1-spe-glue.c @@ -46,6 +46,7 @@ static void spe_begin(void) static void spe_end(void) { + disable_kernel_spe(); /* reenable preemption */ preempt_enable(); } diff --git a/arch/powerpc/crypto/sha256-spe-glue.c b/arch/powerpc/crypto/sha256-spe-glue.c index f4a616fe1a82..718a079dcdbf 100644 --- a/arch/powerpc/crypto/sha256-spe-glue.c +++ b/arch/powerpc/crypto/sha256-spe-glue.c @@ -47,6 +47,7 @@ static void spe_begin(void) static void spe_end(void) { + disable_kernel_spe(); /* reenable preemption */ preempt_enable(); } diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index 0eca6efc0631..a7af5fb7b914 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -34,7 +34,7 @@ #define rmb() __asm__ __volatile__ ("sync" : : : "memory") #define wmb() __asm__ __volatile__ ("sync" : : : "memory") -#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) +#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0) #ifdef __SUBARCH_HAS_LWSYNC # define SMPWMB LWSYNC diff --git a/arch/powerpc/include/asm/pte-hash32.h b/arch/powerpc/include/asm/book3s/32/hash.h index 62cfb0c663bb..264b754d65b0 100644 --- a/arch/powerpc/include/asm/pte-hash32.h +++ b/arch/powerpc/include/asm/book3s/32/hash.h @@ -1,5 +1,5 @@ -#ifndef _ASM_POWERPC_PTE_HASH32_H -#define _ASM_POWERPC_PTE_HASH32_H +#ifndef _ASM_POWERPC_BOOK3S_32_HASH_H +#define _ASM_POWERPC_BOOK3S_32_HASH_H #ifdef __KERNEL__ /* @@ -43,4 +43,4 @@ #define PTE_ATOMIC_UPDATES 1 #endif /* __KERNEL__ */ -#endif /* _ASM_POWERPC_PTE_HASH32_H */ +#endif /* _ASM_POWERPC_BOOK3S_32_HASH_H */ diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h new file mode 100644 index 000000000000..38b33dcfcc9d --- /dev/null +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -0,0 +1,482 @@ +#ifndef _ASM_POWERPC_BOOK3S_32_PGTABLE_H +#define _ASM_POWERPC_BOOK3S_32_PGTABLE_H + +#include <asm-generic/pgtable-nopmd.h> + +#include <asm/book3s/32/hash.h> + +/* And here we include common definitions */ +#include <asm/pte-common.h> + +/* + * The normal case is that PTEs are 32-bits and we have a 1-page + * 1024-entry pgdir pointing to 1-page 1024-entry PTE pages. -- paulus + * + * For any >32-bit physical address platform, we can use the following + * two level page table layout where the pgdir is 8KB and the MS 13 bits + * are an index to the second level table. The combined pgdir/pmd first + * level has 2048 entries and the second level has 512 64-bit PTE entries. + * -Matt + */ +/* PGDIR_SHIFT determines what a top-level page table entry can map */ +#define PGDIR_SHIFT (PAGE_SHIFT + PTE_SHIFT) +#define PGDIR_SIZE (1UL << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE-1)) + +#define PTRS_PER_PTE (1 << PTE_SHIFT) +#define PTRS_PER_PMD 1 +#define PTRS_PER_PGD (1 << (32 - PGDIR_SHIFT)) + +#define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) +/* + * This is the bottom of the PKMAP area with HIGHMEM or an arbitrary + * value (for now) on others, from where we can start layout kernel + * virtual space that goes below PKMAP and FIXMAP + */ +#ifdef CONFIG_HIGHMEM +#define KVIRT_TOP PKMAP_BASE +#else +#define KVIRT_TOP (0xfe000000UL) /* for now, could be FIXMAP_BASE ? */ +#endif + +/* + * ioremap_bot starts at that address. Early ioremaps move down from there, + * until mem_init() at which point this becomes the top of the vmalloc + * and ioremap space + */ +#ifdef CONFIG_NOT_COHERENT_CACHE +#define IOREMAP_TOP ((KVIRT_TOP - CONFIG_CONSISTENT_SIZE) & PAGE_MASK) +#else +#define IOREMAP_TOP KVIRT_TOP +#endif + +/* + * Just any arbitrary offset to the start of the vmalloc VM area: the + * current 16MB value just means that there will be a 64MB "hole" after the + * physical memory until the kernel virtual memory starts. That means that + * any out-of-bounds memory accesses will hopefully be caught. + * The vmalloc() routines leaves a hole of 4kB between each vmalloced + * area for the same reason. ;) + * + * We no longer map larger than phys RAM with the BATs so we don't have + * to worry about the VMALLOC_OFFSET causing problems. We do have to worry + * about clashes between our early calls to ioremap() that start growing down + * from ioremap_base being run into the VM area allocations (growing upwards + * from VMALLOC_START). For this reason we have ioremap_bot to check when + * we actually run into our mappings setup in the early boot with the VM + * system. This really does become a problem for machines with good amounts + * of RAM. -- Cort + */ +#define VMALLOC_OFFSET (0x1000000) /* 16M */ +#ifdef PPC_PIN_SIZE +#define VMALLOC_START (((_ALIGN((long)high_memory, PPC_PIN_SIZE) + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1))) +#else +#define VMALLOC_START ((((long)high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1))) +#endif +#define VMALLOC_END ioremap_bot + +#ifndef __ASSEMBLY__ +#include <linux/sched.h> +#include <linux/threads.h> +#include <asm/io.h> /* For sub-arch specific PPC_PIN_SIZE */ + +extern unsigned long ioremap_bot; + +/* + * entries per page directory level: our page-table tree is two-level, so + * we don't really have any PMD directory. + */ +#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_SHIFT) +#define PGD_TABLE_SIZE (sizeof(pgd_t) << (32 - PGDIR_SHIFT)) + +#define pte_ERROR(e) \ + pr_err("%s:%d: bad pte %llx.\n", __FILE__, __LINE__, \ + (unsigned long long)pte_val(e)) +#define pgd_ERROR(e) \ + pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) +/* + * Bits in a linux-style PTE. These match the bits in the + * (hardware-defined) PowerPC PTE as closely as possible. + */ + +#define pte_clear(mm, addr, ptep) \ + do { pte_update(ptep, ~_PAGE_HASHPTE, 0); } while (0) + +#define pmd_none(pmd) (!pmd_val(pmd)) +#define pmd_bad(pmd) (pmd_val(pmd) & _PMD_BAD) +#define pmd_present(pmd) (pmd_val(pmd) & _PMD_PRESENT_MASK) +static inline void pmd_clear(pmd_t *pmdp) +{ + *pmdp = __pmd(0); +} + + +/* + * When flushing the tlb entry for a page, we also need to flush the hash + * table entry. flush_hash_pages is assembler (for speed) in hashtable.S. + */ +extern int flush_hash_pages(unsigned context, unsigned long va, + unsigned long pmdval, int count); + +/* Add an HPTE to the hash table */ +extern void add_hash_page(unsigned context, unsigned long va, + unsigned long pmdval); + +/* Flush an entry from the TLB/hash table */ +extern void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, + unsigned long address); + +/* + * PTE updates. This function is called whenever an existing + * valid PTE is updated. This does -not- include set_pte_at() + * which nowadays only sets a new PTE. + * + * Depending on the type of MMU, we may need to use atomic updates + * and the PTE may be either 32 or 64 bit wide. In the later case, + * when using atomic updates, only the low part of the PTE is + * accessed atomically. + * + * In addition, on 44x, we also maintain a global flag indicating + * that an executable user mapping was modified, which is needed + * to properly flush the virtually tagged instruction cache of + * those implementations. + */ +#ifndef CONFIG_PTE_64BIT +static inline unsigned long pte_update(pte_t *p, + unsigned long clr, + unsigned long set) +{ + unsigned long old, tmp; + + __asm__ __volatile__("\ +1: lwarx %0,0,%3\n\ + andc %1,%0,%4\n\ + or %1,%1,%5\n" + PPC405_ERR77(0,%3) +" stwcx. %1,0,%3\n\ + bne- 1b" + : "=&r" (old), "=&r" (tmp), "=m" (*p) + : "r" (p), "r" (clr), "r" (set), "m" (*p) + : "cc" ); + + return old; +} +#else /* CONFIG_PTE_64BIT */ +static inline unsigned long long pte_update(pte_t *p, + unsigned long clr, + unsigned long set) +{ + unsigned long long old; + unsigned long tmp; + + __asm__ __volatile__("\ +1: lwarx %L0,0,%4\n\ + lwzx %0,0,%3\n\ + andc %1,%L0,%5\n\ + or %1,%1,%6\n" + PPC405_ERR77(0,%3) +" stwcx. %1,0,%4\n\ + bne- 1b" + : "=&r" (old), "=&r" (tmp), "=m" (*p) + : "r" (p), "r" ((unsigned long)(p) + 4), "r" (clr), "r" (set), "m" (*p) + : "cc" ); + + return old; +} +#endif /* CONFIG_PTE_64BIT */ + +/* + * 2.6 calls this without flushing the TLB entry; this is wrong + * for our hash-based implementation, we fix that up here. + */ +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG +static inline int __ptep_test_and_clear_young(unsigned int context, unsigned long addr, pte_t *ptep) +{ + unsigned long old; + old = pte_update(ptep, _PAGE_ACCESSED, 0); + if (old & _PAGE_HASHPTE) { + unsigned long ptephys = __pa(ptep) & PAGE_MASK; + flush_hash_pages(context, addr, ptephys, 1); + } + return (old & _PAGE_ACCESSED) != 0; +} +#define ptep_test_and_clear_young(__vma, __addr, __ptep) \ + __ptep_test_and_clear_young((__vma)->vm_mm->context.id, __addr, __ptep) + +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) +{ + return __pte(pte_update(ptep, ~_PAGE_HASHPTE, 0)); +} + +#define __HAVE_ARCH_PTEP_SET_WRPROTECT +static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) +{ + pte_update(ptep, (_PAGE_RW | _PAGE_HWWRITE), _PAGE_RO); +} +static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + ptep_set_wrprotect(mm, addr, ptep); +} + + +static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry) +{ + unsigned long set = pte_val(entry) & + (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC); + unsigned long clr = ~pte_val(entry) & _PAGE_RO; + + pte_update(ptep, clr, set); +} + +#define __HAVE_ARCH_PTE_SAME +#define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HASHPTE) == 0) + +/* + * Note that on Book E processors, the pmd contains the kernel virtual + * (lowmem) address of the pte page. The physical address is less useful + * because everything runs with translation enabled (even the TLB miss + * handler). On everything else the pmd contains the physical address + * of the pte page. -- paulus + */ +#ifndef CONFIG_BOOKE +#define pmd_page_vaddr(pmd) \ + ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) +#define pmd_page(pmd) \ + pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT) +#else +#define pmd_page_vaddr(pmd) \ + ((unsigned long) (pmd_val(pmd) & PAGE_MASK)) +#define pmd_page(pmd) \ + pfn_to_page((__pa(pmd_val(pmd)) >> PAGE_SHIFT)) +#endif + +/* to find an entry in a kernel page-table-directory */ +#define pgd_offset_k(address) pgd_offset(&init_mm, address) + +/* to find an entry in a page-table-directory */ +#define pgd_index(address) ((address) >> PGDIR_SHIFT) +#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) + +/* Find an entry in the third-level page table.. */ +#define pte_index(address) \ + (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +#define pte_offset_kernel(dir, addr) \ + ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(addr)) +#define pte_offset_map(dir, addr) \ + ((pte_t *) kmap_atomic(pmd_page(*(dir))) + pte_index(addr)) +#define pte_unmap(pte) kunmap_atomic(pte) + +/* + * Encode and decode a swap entry. + * Note that the bits we use in a PTE for representing a swap entry + * must not include the _PAGE_PRESENT bit or the _PAGE_HASHPTE bit (if used). + * -- paulus + */ +#define __swp_type(entry) ((entry).val & 0x1f) +#define __swp_offset(entry) ((entry).val >> 5) +#define __swp_entry(type, offset) ((swp_entry_t) { (type) | ((offset) << 5) }) +#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 }) +#define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 }) + +#ifndef CONFIG_PPC_4K_PAGES +void pgtable_cache_init(void); +#else +/* + * No page table caches to initialise + */ +#define pgtable_cache_init() do { } while (0) +#endif + +extern int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep, + pmd_t **pmdp); + +/* Generic accessors to PTE bits */ +static inline int pte_write(pte_t pte) { return !!(pte_val(pte) & _PAGE_RW);} +static inline int pte_dirty(pte_t pte) { return !!(pte_val(pte) & _PAGE_DIRTY); } +static inline int pte_young(pte_t pte) { return !!(pte_val(pte) & _PAGE_ACCESSED); } +static inline int pte_special(pte_t pte) { return !!(pte_val(pte) & _PAGE_SPECIAL); } +static inline int pte_none(pte_t pte) { return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; } +static inline pgprot_t pte_pgprot(pte_t pte) { return __pgprot(pte_val(pte) & PAGE_PROT_BITS); } + +static inline int pte_present(pte_t pte) +{ + return pte_val(pte) & _PAGE_PRESENT; +} + +/* Conversion functions: convert a page and protection to a page entry, + * and a page entry and page directory to the page they refer to. + * + * Even if PTEs can be unsigned long long, a PFN is always an unsigned + * long for now. + */ +static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) +{ + return __pte(((pte_basic_t)(pfn) << PTE_RPN_SHIFT) | + pgprot_val(pgprot)); +} + +static inline unsigned long pte_pfn(pte_t pte) +{ + return pte_val(pte) >> PTE_RPN_SHIFT; +} + +/* Generic modifiers for PTE bits */ +static inline pte_t pte_wrprotect(pte_t pte) +{ + return __pte(pte_val(pte) & ~_PAGE_RW); +} + +static inline pte_t pte_mkclean(pte_t pte) +{ + return __pte(pte_val(pte) & ~_PAGE_DIRTY); +} + +static inline pte_t pte_mkold(pte_t pte) +{ + return __pte(pte_val(pte) & ~_PAGE_ACCESSED); +} + +static inline pte_t pte_mkwrite(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_RW); +} + +static inline pte_t pte_mkdirty(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_DIRTY); +} + +static inline pte_t pte_mkyoung(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_ACCESSED); +} + +static inline pte_t pte_mkspecial(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_SPECIAL); +} + +static inline pte_t pte_mkhuge(pte_t pte) +{ + return pte; +} + +static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) +{ + return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot)); +} + + + +/* This low level function performs the actual PTE insertion + * Setting the PTE depends on the MMU type and other factors. It's + * an horrible mess that I'm not going to try to clean up now but + * I'm keeping it in one place rather than spread around + */ +static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte, int percpu) +{ +#if defined(CONFIG_PPC_STD_MMU_32) && defined(CONFIG_SMP) && !defined(CONFIG_PTE_64BIT) + /* First case is 32-bit Hash MMU in SMP mode with 32-bit PTEs. We use the + * helper pte_update() which does an atomic update. We need to do that + * because a concurrent invalidation can clear _PAGE_HASHPTE. If it's a + * per-CPU PTE such as a kmap_atomic, we do a simple update preserving + * the hash bits instead (ie, same as the non-SMP case) + */ + if (percpu) + *ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE) + | (pte_val(pte) & ~_PAGE_HASHPTE)); + else + pte_update(ptep, ~_PAGE_HASHPTE, pte_val(pte)); + +#elif defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT) + /* Second case is 32-bit with 64-bit PTE. In this case, we + * can just store as long as we do the two halves in the right order + * with a barrier in between. This is possible because we take care, + * in the hash code, to pre-invalidate if the PTE was already hashed, + * which synchronizes us with any concurrent invalidation. + * In the percpu case, we also fallback to the simple update preserving + * the hash bits + */ + if (percpu) { + *ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE) + | (pte_val(pte) & ~_PAGE_HASHPTE)); + return; + } + if (pte_val(*ptep) & _PAGE_HASHPTE) + flush_hash_entry(mm, ptep, addr); + __asm__ __volatile__("\ + stw%U0%X0 %2,%0\n\ + eieio\n\ + stw%U0%X0 %L2,%1" + : "=m" (*ptep), "=m" (*((unsigned char *)ptep+4)) + : "r" (pte) : "memory"); + +#elif defined(CONFIG_PPC_STD_MMU_32) + /* Third case is 32-bit hash table in UP mode, we need to preserve + * the _PAGE_HASHPTE bit since we may not have invalidated the previous + * translation in the hash yet (done in a subsequent flush_tlb_xxx()) + * and see we need to keep track that this PTE needs invalidating + */ + *ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE) + | (pte_val(pte) & ~_PAGE_HASHPTE)); + +#else +#error "Not supported " +#endif +} + +/* + * Macro to mark a page protection value as "uncacheable". + */ + +#define _PAGE_CACHE_CTL (_PAGE_COHERENT | _PAGE_GUARDED | _PAGE_NO_CACHE | \ + _PAGE_WRITETHRU) + +#define pgprot_noncached pgprot_noncached +static inline pgprot_t pgprot_noncached(pgprot_t prot) +{ + return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | + _PAGE_NO_CACHE | _PAGE_GUARDED); +} + +#define pgprot_noncached_wc pgprot_noncached_wc +static inline pgprot_t pgprot_noncached_wc(pgprot_t prot) +{ + return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | + _PAGE_NO_CACHE); +} + +#define pgprot_cached pgprot_cached +static inline pgprot_t pgprot_cached(pgprot_t prot) +{ + return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | + _PAGE_COHERENT); +} + +#define pgprot_cached_wthru pgprot_cached_wthru +static inline pgprot_t pgprot_cached_wthru(pgprot_t prot) +{ + return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | + _PAGE_COHERENT | _PAGE_WRITETHRU); +} + +#define pgprot_cached_noncoherent pgprot_cached_noncoherent +static inline pgprot_t pgprot_cached_noncoherent(pgprot_t prot) +{ + return __pgprot(pgprot_val(prot) & ~_PAGE_CACHE_CTL); +} + +#define pgprot_writecombine pgprot_writecombine +static inline pgprot_t pgprot_writecombine(pgprot_t prot) +{ + return pgprot_noncached_wc(prot); +} + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_POWERPC_BOOK3S_32_PGTABLE_H */ diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h new file mode 100644 index 000000000000..ea0414d6659e --- /dev/null +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -0,0 +1,132 @@ +#ifndef _ASM_POWERPC_BOOK3S_64_HASH_4K_H +#define _ASM_POWERPC_BOOK3S_64_HASH_4K_H +/* + * Entries per page directory level. The PTE level must use a 64b record + * for each page table entry. The PMD and PGD level use a 32b record for + * each entry by assuming that each entry is page aligned. + */ +#define PTE_INDEX_SIZE 9 +#define PMD_INDEX_SIZE 7 +#define PUD_INDEX_SIZE 9 +#define PGD_INDEX_SIZE 9 + +#ifndef __ASSEMBLY__ +#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE) +#define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE) +#define PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE) +#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) +#endif /* __ASSEMBLY__ */ + +#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) +#define PTRS_PER_PMD (1 << PMD_INDEX_SIZE) +#define PTRS_PER_PUD (1 << PUD_INDEX_SIZE) +#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) + +/* PMD_SHIFT determines what a second-level page table entry can map */ +#define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE) +#define PMD_SIZE (1UL << PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE-1)) + +/* With 4k base page size, hugepage PTEs go at the PMD level */ +#define MIN_HUGEPTE_SHIFT PMD_SHIFT + +/* PUD_SHIFT determines what a third-level page table entry can map */ +#define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) +#define PUD_SIZE (1UL << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE-1)) + +/* PGDIR_SHIFT determines what a fourth-level page table entry can map */ +#define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE) +#define PGDIR_SIZE (1UL << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE-1)) + +/* Bits to mask out from a PMD to get to the PTE page */ +#define PMD_MASKED_BITS 0 +/* Bits to mask out from a PUD to get to the PMD page */ +#define PUD_MASKED_BITS 0 +/* Bits to mask out from a PGD to get to the PUD page */ +#define PGD_MASKED_BITS 0 + +/* PTE flags to conserve for HPTE identification */ +#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | \ + _PAGE_F_SECOND | _PAGE_F_GIX) + +/* shift to put page number into pte */ +#define PTE_RPN_SHIFT (18) + +#define _PAGE_4K_PFN 0 +#ifndef __ASSEMBLY__ +/* + * 4-level page tables related bits + */ + +#define pgd_none(pgd) (!pgd_val(pgd)) +#define pgd_bad(pgd) (pgd_val(pgd) == 0) +#define pgd_present(pgd) (pgd_val(pgd) != 0) +#define pgd_page_vaddr(pgd) (pgd_val(pgd) & ~PGD_MASKED_BITS) + +static inline void pgd_clear(pgd_t *pgdp) +{ + *pgdp = __pgd(0); +} + +static inline pte_t pgd_pte(pgd_t pgd) +{ + return __pte(pgd_val(pgd)); +} + +static inline pgd_t pte_pgd(pte_t pte) +{ + return __pgd(pte_val(pte)); +} +extern struct page *pgd_page(pgd_t pgd); + +#define pud_offset(pgdp, addr) \ + (((pud_t *) pgd_page_vaddr(*(pgdp))) + \ + (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))) + +#define pud_ERROR(e) \ + pr_err("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e)) + +/* + * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range() */ +#define remap_4k_pfn(vma, addr, pfn, prot) \ + remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE, (prot)) + +#ifdef CONFIG_HUGETLB_PAGE +/* + * For 4k page size, we support explicit hugepage via hugepd + */ +static inline int pmd_huge(pmd_t pmd) +{ + return 0; +} + +static inline int pud_huge(pud_t pud) +{ + return 0; +} + +static inline int pgd_huge(pgd_t pgd) +{ + return 0; +} +#define pgd_huge pgd_huge + +static inline int hugepd_ok(hugepd_t hpd) +{ + /* + * if it is not a pte and have hugepd shift mask + * set, then it is a hugepd directory pointer + */ + if (!(hpd.pd & _PAGE_PTE) && + ((hpd.pd & HUGEPD_SHIFT_MASK) != 0)) + return true; + return false; +} +#define is_hugepd(hpd) (hugepd_ok(hpd)) +#endif + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_POWERPC_BOOK3S_64_HASH_4K_H */ diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h new file mode 100644 index 000000000000..849bbec80f7b --- /dev/null +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -0,0 +1,300 @@ +#ifndef _ASM_POWERPC_BOOK3S_64_HASH_64K_H +#define _ASM_POWERPC_BOOK3S_64_HASH_64K_H + +#include <asm-generic/pgtable-nopud.h> + +#define PTE_INDEX_SIZE 8 +#define PMD_INDEX_SIZE 10 +#define PUD_INDEX_SIZE 0 +#define PGD_INDEX_SIZE 12 + +#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) +#define PTRS_PER_PMD (1 << PMD_INDEX_SIZE) +#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) + +/* With 4k base page size, hugepage PTEs go at the PMD level */ +#define MIN_HUGEPTE_SHIFT PAGE_SHIFT + +/* PMD_SHIFT determines what a second-level page table entry can map */ +#define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE) +#define PMD_SIZE (1UL << PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE-1)) + +/* PGDIR_SHIFT determines what a third-level page table entry can map */ +#define PGDIR_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) +#define PGDIR_SIZE (1UL << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE-1)) + +#define _PAGE_COMBO 0x00040000 /* this is a combo 4k page */ +#define _PAGE_4K_PFN 0x00080000 /* PFN is for a single 4k page */ +/* + * Used to track subpage group valid if _PAGE_COMBO is set + * This overloads _PAGE_F_GIX and _PAGE_F_SECOND + */ +#define _PAGE_COMBO_VALID (_PAGE_F_GIX | _PAGE_F_SECOND) + +/* PTE flags to conserve for HPTE identification */ +#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_F_SECOND | \ + _PAGE_F_GIX | _PAGE_HASHPTE | _PAGE_COMBO) + +/* Shift to put page number into pte. + * + * That gives us a max RPN of 34 bits, which means a max of 50 bits + * of addressable physical space, or 46 bits for the special 4k PFNs. + */ +#define PTE_RPN_SHIFT (30) +/* + * we support 16 fragments per PTE page of 64K size. + */ +#define PTE_FRAG_NR 16 +/* + * We use a 2K PTE page fragment and another 2K for storing + * real_pte_t hash index + */ +#define PTE_FRAG_SIZE_SHIFT 12 +#define PTE_FRAG_SIZE (1UL << PTE_FRAG_SIZE_SHIFT) + +/* + * Bits to mask out from a PMD to get to the PTE page + * PMDs point to PTE table fragments which are PTE_FRAG_SIZE aligned. + */ +#define PMD_MASKED_BITS (PTE_FRAG_SIZE - 1) +/* Bits to mask out from a PGD/PUD to get to the PMD page */ +#define PUD_MASKED_BITS 0x1ff + +#ifndef __ASSEMBLY__ + +/* + * With 64K pages on hash table, we have a special PTE format that + * uses a second "half" of the page table to encode sub-page information + * in order to deal with 64K made of 4K HW pages. Thus we override the + * generic accessors and iterators here + */ +#define __real_pte __real_pte +static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep) +{ + real_pte_t rpte; + unsigned long *hidxp; + + rpte.pte = pte; + rpte.hidx = 0; + if (pte_val(pte) & _PAGE_COMBO) { + /* + * Make sure we order the hidx load against the _PAGE_COMBO + * check. The store side ordering is done in __hash_page_4K + */ + smp_rmb(); + hidxp = (unsigned long *)(ptep + PTRS_PER_PTE); + rpte.hidx = *hidxp; + } + return rpte; +} + +static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index) +{ + if ((pte_val(rpte.pte) & _PAGE_COMBO)) + return (rpte.hidx >> (index<<2)) & 0xf; + return (pte_val(rpte.pte) >> _PAGE_F_GIX_SHIFT) & 0xf; +} + +#define __rpte_to_pte(r) ((r).pte) +extern bool __rpte_sub_valid(real_pte_t rpte, unsigned long index); +/* + * Trick: we set __end to va + 64k, which happens works for + * a 16M page as well as we want only one iteration + */ +#define pte_iterate_hashed_subpages(rpte, psize, vpn, index, shift) \ + do { \ + unsigned long __end = vpn + (1UL << (PAGE_SHIFT - VPN_SHIFT)); \ + unsigned __split = (psize == MMU_PAGE_4K || \ + psize == MMU_PAGE_64K_AP); \ + shift = mmu_psize_defs[psize].shift; \ + for (index = 0; vpn < __end; index++, \ + vpn += (1L << (shift - VPN_SHIFT))) { \ + if (!__split || __rpte_sub_valid(rpte, index)) \ + do { + +#define pte_iterate_hashed_end() } while(0); } } while(0) + +#define pte_pagesize_index(mm, addr, pte) \ + (((pte) & _PAGE_COMBO)? MMU_PAGE_4K: MMU_PAGE_64K) + +#define remap_4k_pfn(vma, addr, pfn, prot) \ + (WARN_ON(((pfn) >= (1UL << (64 - PTE_RPN_SHIFT)))) ? -EINVAL : \ + remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE, \ + __pgprot(pgprot_val((prot)) | _PAGE_4K_PFN))) + +#define PTE_TABLE_SIZE PTE_FRAG_SIZE +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define PMD_TABLE_SIZE ((sizeof(pmd_t) << PMD_INDEX_SIZE) + (sizeof(unsigned long) << PMD_INDEX_SIZE)) +#else +#define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE) +#endif +#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) + +#define pgd_pte(pgd) (pud_pte(((pud_t){ pgd }))) +#define pte_pgd(pte) ((pgd_t)pte_pud(pte)) + +#ifdef CONFIG_HUGETLB_PAGE +/* + * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have + * 16GB hugepage pte in PGD and 16MB hugepage pte at PMD; + * + * Defined in such a way that we can optimize away code block at build time + * if CONFIG_HUGETLB_PAGE=n. + */ +static inline int pmd_huge(pmd_t pmd) +{ + /* + * leaf pte for huge page + */ + return !!(pmd_val(pmd) & _PAGE_PTE); +} + +static inline int pud_huge(pud_t pud) +{ + /* + * leaf pte for huge page + */ + return !!(pud_val(pud) & _PAGE_PTE); +} + +static inline int pgd_huge(pgd_t pgd) +{ + /* + * leaf pte for huge page + */ + return !!(pgd_val(pgd) & _PAGE_PTE); +} +#define pgd_huge pgd_huge + +#ifdef CONFIG_DEBUG_VM +extern int hugepd_ok(hugepd_t hpd); +#define is_hugepd(hpd) (hugepd_ok(hpd)) +#else +/* + * With 64k page size, we have hugepage ptes in the pgd and pmd entries. We don't + * need to setup hugepage directory for them. Our pte and page directory format + * enable us to have this enabled. + */ +static inline int hugepd_ok(hugepd_t hpd) +{ + return 0; +} +#define is_hugepd(pdep) 0 +#endif /* CONFIG_DEBUG_VM */ + +#endif /* CONFIG_HUGETLB_PAGE */ + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +extern unsigned long pmd_hugepage_update(struct mm_struct *mm, + unsigned long addr, + pmd_t *pmdp, + unsigned long clr, + unsigned long set); +static inline char *get_hpte_slot_array(pmd_t *pmdp) +{ + /* + * The hpte hindex is stored in the pgtable whose address is in the + * second half of the PMD + * + * Order this load with the test for pmd_trans_huge in the caller + */ + smp_rmb(); + return *(char **)(pmdp + PTRS_PER_PMD); + + +} +/* + * The linux hugepage PMD now include the pmd entries followed by the address + * to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits. + * [ 1 bit secondary | 3 bit hidx | 1 bit valid | 000]. We use one byte per + * each HPTE entry. With 16MB hugepage and 64K HPTE we need 256 entries and + * with 4K HPTE we need 4096 entries. Both will fit in a 4K pgtable_t. + * + * The last three bits are intentionally left to zero. This memory location + * are also used as normal page PTE pointers. So if we have any pointers + * left around while we collapse a hugepage, we need to make sure + * _PAGE_PRESENT bit of that is zero when we look at them + */ +static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int index) +{ + return (hpte_slot_array[index] >> 3) & 0x1; +} + +static inline unsigned int hpte_hash_index(unsigned char *hpte_slot_array, + int index) +{ + return hpte_slot_array[index] >> 4; +} + +static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array, + unsigned int index, unsigned int hidx) +{ + hpte_slot_array[index] = hidx << 4 | 0x1 << 3; +} + +/* + * + * For core kernel code by design pmd_trans_huge is never run on any hugetlbfs + * page. The hugetlbfs page table walking and mangling paths are totally + * separated form the core VM paths and they're differentiated by + * VM_HUGETLB being set on vm_flags well before any pmd_trans_huge could run. + * + * pmd_trans_huge() is defined as false at build time if + * CONFIG_TRANSPARENT_HUGEPAGE=n to optimize away code blocks at build + * time in such case. + * + * For ppc64 we need to differntiate from explicit hugepages from THP, because + * for THP we also track the subpage details at the pmd level. We don't do + * that for explicit huge pages. + * + */ +static inline int pmd_trans_huge(pmd_t pmd) +{ + return !!((pmd_val(pmd) & (_PAGE_PTE | _PAGE_THP_HUGE)) == + (_PAGE_PTE | _PAGE_THP_HUGE)); +} + +static inline int pmd_large(pmd_t pmd) +{ + return !!(pmd_val(pmd) & _PAGE_PTE); +} + +static inline pmd_t pmd_mknotpresent(pmd_t pmd) +{ + return __pmd(pmd_val(pmd) & ~_PAGE_PRESENT); +} + +#define __HAVE_ARCH_PMD_SAME +static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) +{ + return (((pmd_val(pmd_a) ^ pmd_val(pmd_b)) & ~_PAGE_HPTEFLAGS) == 0); +} + +static inline int __pmdp_test_and_clear_young(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp) +{ + unsigned long old; + + if ((pmd_val(*pmdp) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0) + return 0; + old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED, 0); + return ((old & _PAGE_ACCESSED) != 0); +} + +#define __HAVE_ARCH_PMDP_SET_WRPROTECT +static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp) +{ + + if ((pmd_val(*pmdp) & _PAGE_RW) == 0) + return; + + pmd_hugepage_update(mm, addr, pmdp, _PAGE_RW, 0); +} + +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_POWERPC_BOOK3S_64_HASH_64K_H */ diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h new file mode 100644 index 000000000000..06f17e778c27 --- /dev/null +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -0,0 +1,545 @@ +#ifndef _ASM_POWERPC_BOOK3S_64_HASH_H +#define _ASM_POWERPC_BOOK3S_64_HASH_H +#ifdef __KERNEL__ + +/* + * Common bits between 4K and 64K pages in a linux-style PTE. + * These match the bits in the (hardware-defined) PowerPC PTE as closely + * as possible. Additional bits may be defined in pgtable-hash64-*.h + * + * Note: We only support user read/write permissions. Supervisor always + * have full read/write to pages above PAGE_OFFSET (pages below that + * always use the user access permissions). + * + * We could create separate kernel read-only if we used the 3 PP bits + * combinations that newer processors provide but we currently don't. + */ +#define _PAGE_PTE 0x00001 +#define _PAGE_PRESENT 0x00002 /* software: pte contains a translation */ +#define _PAGE_BIT_SWAP_TYPE 2 +#define _PAGE_USER 0x00004 /* matches one of the PP bits */ +#define _PAGE_EXEC 0x00008 /* No execute on POWER4 and newer (we invert) */ +#define _PAGE_GUARDED 0x00010 +/* We can derive Memory coherence from _PAGE_NO_CACHE */ +#define _PAGE_COHERENT 0x0 +#define _PAGE_NO_CACHE 0x00020 /* I: cache inhibit */ +#define _PAGE_WRITETHRU 0x00040 /* W: cache write-through */ +#define _PAGE_DIRTY 0x00080 /* C: page changed */ +#define _PAGE_ACCESSED 0x00100 /* R: page referenced */ +#define _PAGE_RW 0x00200 /* software: user write access allowed */ +#define _PAGE_HASHPTE 0x00400 /* software: pte has an associated HPTE */ +#define _PAGE_BUSY 0x00800 /* software: PTE & hash are busy */ +#define _PAGE_F_GIX 0x07000 /* full page: hidx bits */ +#define _PAGE_F_GIX_SHIFT 12 +#define _PAGE_F_SECOND 0x08000 /* Whether to use secondary hash or not */ +#define _PAGE_SPECIAL 0x10000 /* software: special page */ + +#ifdef CONFIG_MEM_SOFT_DIRTY +#define _PAGE_SOFT_DIRTY 0x20000 /* software: software dirty tracking */ +#else +#define _PAGE_SOFT_DIRTY 0x00000 +#endif + +/* + * We need to differentiate between explicit huge page and THP huge + * page, since THP huge page also need to track real subpage details + */ +#define _PAGE_THP_HUGE _PAGE_4K_PFN + +/* + * set of bits not changed in pmd_modify. + */ +#define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \ + _PAGE_ACCESSED | _PAGE_THP_HUGE) + +#ifdef CONFIG_PPC_64K_PAGES +#include <asm/book3s/64/hash-64k.h> +#else +#include <asm/book3s/64/hash-4k.h> +#endif + +/* + * Size of EA range mapped by our pagetables. + */ +#define PGTABLE_EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \ + PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT) +#define PGTABLE_RANGE (ASM_CONST(1) << PGTABLE_EADDR_SIZE) + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define PMD_CACHE_INDEX (PMD_INDEX_SIZE + 1) +#else +#define PMD_CACHE_INDEX PMD_INDEX_SIZE +#endif +/* + * Define the address range of the kernel non-linear virtual area + */ +#define KERN_VIRT_START ASM_CONST(0xD000000000000000) +#define KERN_VIRT_SIZE ASM_CONST(0x0000100000000000) + +/* + * The vmalloc space starts at the beginning of that region, and + * occupies half of it on hash CPUs and a quarter of it on Book3E + * (we keep a quarter for the virtual memmap) + */ +#define VMALLOC_START KERN_VIRT_START +#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1) +#define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE) + +/* + * Region IDs + */ +#define REGION_SHIFT 60UL +#define REGION_MASK (0xfUL << REGION_SHIFT) +#define REGION_ID(ea) (((unsigned long)(ea)) >> REGION_SHIFT) + +#define VMALLOC_REGION_ID (REGION_ID(VMALLOC_START)) +#define KERNEL_REGION_ID (REGION_ID(PAGE_OFFSET)) +#define VMEMMAP_REGION_ID (0xfUL) /* Server only */ +#define USER_REGION_ID (0UL) + +/* + * Defines the address of the vmemap area, in its own region on + * hash table CPUs. + */ +#define VMEMMAP_BASE (VMEMMAP_REGION_ID << REGION_SHIFT) + +#ifdef CONFIG_PPC_MM_SLICES +#define HAVE_ARCH_UNMAPPED_AREA +#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN +#endif /* CONFIG_PPC_MM_SLICES */ + +/* No separate kernel read-only */ +#define _PAGE_KERNEL_RW (_PAGE_RW | _PAGE_DIRTY) /* user access blocked by key */ +#define _PAGE_KERNEL_RO _PAGE_KERNEL_RW +#define _PAGE_KERNEL_RWX (_PAGE_DIRTY | _PAGE_RW | _PAGE_EXEC) + +/* Strong Access Ordering */ +#define _PAGE_SAO (_PAGE_WRITETHRU | _PAGE_NO_CACHE | _PAGE_COHERENT) + +/* No page size encoding in the linux PTE */ +#define _PAGE_PSIZE 0 + +/* PTEIDX nibble */ +#define _PTEIDX_SECONDARY 0x8 +#define _PTEIDX_GROUP_IX 0x7 + +/* Hash table based platforms need atomic updates of the linux PTE */ +#define PTE_ATOMIC_UPDATES 1 +#define _PTE_NONE_MASK _PAGE_HPTEFLAGS +/* + * The mask convered by the RPN must be a ULL on 32-bit platforms with + * 64-bit PTEs + */ +#define PTE_RPN_MASK (~((1UL << PTE_RPN_SHIFT) - 1)) +/* + * _PAGE_CHG_MASK masks of bits that are to be preserved across + * pgprot changes + */ +#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \ + _PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE | \ + _PAGE_SOFT_DIRTY) +/* + * Mask of bits returned by pte_pgprot() + */ +#define PAGE_PROT_BITS (_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \ + _PAGE_WRITETHRU | _PAGE_4K_PFN | \ + _PAGE_USER | _PAGE_ACCESSED | \ + _PAGE_RW | _PAGE_DIRTY | _PAGE_EXEC | \ + _PAGE_SOFT_DIRTY) +/* + * We define 2 sets of base prot bits, one for basic pages (ie, + * cacheable kernel and user pages) and one for non cacheable + * pages. We always set _PAGE_COHERENT when SMP is enabled or + * the processor might need it for DMA coherency. + */ +#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_PSIZE) +#define _PAGE_BASE (_PAGE_BASE_NC | _PAGE_COHERENT) + +/* Permission masks used to generate the __P and __S table, + * + * Note:__pgprot is defined in arch/powerpc/include/asm/page.h + * + * Write permissions imply read permissions for now (we could make write-only + * pages on BookE but we don't bother for now). Execute permission control is + * possible on platforms that define _PAGE_EXEC + * + * Note due to the way vm flags are laid out, the bits are XWR + */ +#define PAGE_NONE __pgprot(_PAGE_BASE) +#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW) +#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | \ + _PAGE_EXEC) +#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER ) +#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC) +#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER ) +#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC) + +#define __P000 PAGE_NONE +#define __P001 PAGE_READONLY +#define __P010 PAGE_COPY +#define __P011 PAGE_COPY +#define __P100 PAGE_READONLY_X +#define __P101 PAGE_READONLY_X +#define __P110 PAGE_COPY_X +#define __P111 PAGE_COPY_X + +#define __S000 PAGE_NONE +#define __S001 PAGE_READONLY +#define __S010 PAGE_SHARED +#define __S011 PAGE_SHARED +#define __S100 PAGE_READONLY_X +#define __S101 PAGE_READONLY_X +#define __S110 PAGE_SHARED_X +#define __S111 PAGE_SHARED_X + +/* Permission masks used for kernel mappings */ +#define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_KERNEL_RW) +#define PAGE_KERNEL_NC __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \ + _PAGE_NO_CACHE) +#define PAGE_KERNEL_NCG __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \ + _PAGE_NO_CACHE | _PAGE_GUARDED) +#define PAGE_KERNEL_X __pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX) +#define PAGE_KERNEL_RO __pgprot(_PAGE_BASE | _PAGE_KERNEL_RO) +#define PAGE_KERNEL_ROX __pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX) + +/* Protection used for kernel text. We want the debuggers to be able to + * set breakpoints anywhere, so don't write protect the kernel text + * on platforms where such control is possible. + */ +#if defined(CONFIG_KGDB) || defined(CONFIG_XMON) || defined(CONFIG_BDI_SWITCH) ||\ + defined(CONFIG_KPROBES) || defined(CONFIG_DYNAMIC_FTRACE) +#define PAGE_KERNEL_TEXT PAGE_KERNEL_X +#else +#define PAGE_KERNEL_TEXT PAGE_KERNEL_ROX +#endif + +/* Make modules code happy. We don't set RO yet */ +#define PAGE_KERNEL_EXEC PAGE_KERNEL_X +#define PAGE_AGP (PAGE_KERNEL_NC) + +#define PMD_BAD_BITS (PTE_TABLE_SIZE-1) +#define PUD_BAD_BITS (PMD_TABLE_SIZE-1) + +#ifndef __ASSEMBLY__ +#define pmd_bad(pmd) (!is_kernel_addr(pmd_val(pmd)) \ + || (pmd_val(pmd) & PMD_BAD_BITS)) +#define pmd_page_vaddr(pmd) (pmd_val(pmd) & ~PMD_MASKED_BITS) + +#define pud_bad(pud) (!is_kernel_addr(pud_val(pud)) \ + || (pud_val(pud) & PUD_BAD_BITS)) +#define pud_page_vaddr(pud) (pud_val(pud) & ~PUD_MASKED_BITS) + +#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & (PTRS_PER_PGD - 1)) +#define pmd_index(address) (((address) >> (PMD_SHIFT)) & (PTRS_PER_PMD - 1)) +#define pte_index(address) (((address) >> (PAGE_SHIFT)) & (PTRS_PER_PTE - 1)) + +extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned long pte, int huge); +extern unsigned long htab_convert_pte_flags(unsigned long pteflags); +/* Atomic PTE updates */ +static inline unsigned long pte_update(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, unsigned long clr, + unsigned long set, + int huge) +{ + unsigned long old, tmp; + + __asm__ __volatile__( + "1: ldarx %0,0,%3 # pte_update\n\ + andi. %1,%0,%6\n\ + bne- 1b \n\ + andc %1,%0,%4 \n\ + or %1,%1,%7\n\ + stdcx. %1,0,%3 \n\ + bne- 1b" + : "=&r" (old), "=&r" (tmp), "=m" (*ptep) + : "r" (ptep), "r" (clr), "m" (*ptep), "i" (_PAGE_BUSY), "r" (set) + : "cc" ); + /* huge pages use the old page table lock */ + if (!huge) + assert_pte_locked(mm, addr); + + if (old & _PAGE_HASHPTE) + hpte_need_flush(mm, addr, ptep, old, huge); + + return old; +} + +static inline int __ptep_test_and_clear_young(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + unsigned long old; + + if ((pte_val(*ptep) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0) + return 0; + old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0); + return (old & _PAGE_ACCESSED) != 0; +} +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG +#define ptep_test_and_clear_young(__vma, __addr, __ptep) \ +({ \ + int __r; \ + __r = __ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep); \ + __r; \ +}) + +#define __HAVE_ARCH_PTEP_SET_WRPROTECT +static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) +{ + + if ((pte_val(*ptep) & _PAGE_RW) == 0) + return; + + pte_update(mm, addr, ptep, _PAGE_RW, 0, 0); +} + +static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + if ((pte_val(*ptep) & _PAGE_RW) == 0) + return; + + pte_update(mm, addr, ptep, _PAGE_RW, 0, 1); +} + +/* + * We currently remove entries from the hashtable regardless of whether + * the entry was young or dirty. The generic routines only flush if the + * entry was young or dirty which is not good enough. + * + * We should be more intelligent about this but for the moment we override + * these functions and force a tlb flush unconditionally + */ +#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH +#define ptep_clear_flush_young(__vma, __address, __ptep) \ +({ \ + int __young = __ptep_test_and_clear_young((__vma)->vm_mm, __address, \ + __ptep); \ + __young; \ +}) + +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + unsigned long old = pte_update(mm, addr, ptep, ~0UL, 0, 0); + return __pte(old); +} + +static inline void pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t * ptep) +{ + pte_update(mm, addr, ptep, ~0UL, 0, 0); +} + + +/* Set the dirty and/or accessed bits atomically in a linux PTE, this + * function doesn't need to flush the hash entry + */ +static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry) +{ + unsigned long bits = pte_val(entry) & + (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC | + _PAGE_SOFT_DIRTY); + + unsigned long old, tmp; + + __asm__ __volatile__( + "1: ldarx %0,0,%4\n\ + andi. %1,%0,%6\n\ + bne- 1b \n\ + or %0,%3,%0\n\ + stdcx. %0,0,%4\n\ + bne- 1b" + :"=&r" (old), "=&r" (tmp), "=m" (*ptep) + :"r" (bits), "r" (ptep), "m" (*ptep), "i" (_PAGE_BUSY) + :"cc"); +} + +#define __HAVE_ARCH_PTE_SAME +#define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0) + +/* Generic accessors to PTE bits */ +static inline int pte_write(pte_t pte) { return !!(pte_val(pte) & _PAGE_RW);} +static inline int pte_dirty(pte_t pte) { return !!(pte_val(pte) & _PAGE_DIRTY); } +static inline int pte_young(pte_t pte) { return !!(pte_val(pte) & _PAGE_ACCESSED); } +static inline int pte_special(pte_t pte) { return !!(pte_val(pte) & _PAGE_SPECIAL); } +static inline int pte_none(pte_t pte) { return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; } +static inline pgprot_t pte_pgprot(pte_t pte) { return __pgprot(pte_val(pte) & PAGE_PROT_BITS); } + +#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY +static inline bool pte_soft_dirty(pte_t pte) +{ + return !!(pte_val(pte) & _PAGE_SOFT_DIRTY); +} +static inline pte_t pte_mksoft_dirty(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_SOFT_DIRTY); +} + +static inline pte_t pte_clear_soft_dirty(pte_t pte) +{ + return __pte(pte_val(pte) & ~_PAGE_SOFT_DIRTY); +} +#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */ + +#ifdef CONFIG_NUMA_BALANCING +/* + * These work without NUMA balancing but the kernel does not care. See the + * comment in include/asm-generic/pgtable.h . On powerpc, this will only + * work for user pages and always return true for kernel pages. + */ +static inline int pte_protnone(pte_t pte) +{ + return (pte_val(pte) & + (_PAGE_PRESENT | _PAGE_USER)) == _PAGE_PRESENT; +} +#endif /* CONFIG_NUMA_BALANCING */ + +static inline int pte_present(pte_t pte) +{ + return pte_val(pte) & _PAGE_PRESENT; +} + +/* Conversion functions: convert a page and protection to a page entry, + * and a page entry and page directory to the page they refer to. + * + * Even if PTEs can be unsigned long long, a PFN is always an unsigned + * long for now. + */ +static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) +{ + return __pte(((pte_basic_t)(pfn) << PTE_RPN_SHIFT) | + pgprot_val(pgprot)); +} + +static inline unsigned long pte_pfn(pte_t pte) +{ + return pte_val(pte) >> PTE_RPN_SHIFT; +} + +/* Generic modifiers for PTE bits */ +static inline pte_t pte_wrprotect(pte_t pte) +{ + return __pte(pte_val(pte) & ~_PAGE_RW); +} + +static inline pte_t pte_mkclean(pte_t pte) +{ + return __pte(pte_val(pte) & ~_PAGE_DIRTY); +} + +static inline pte_t pte_mkold(pte_t pte) +{ + return __pte(pte_val(pte) & ~_PAGE_ACCESSED); +} + +static inline pte_t pte_mkwrite(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_RW); +} + +static inline pte_t pte_mkdirty(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_DIRTY | _PAGE_SOFT_DIRTY); +} + +static inline pte_t pte_mkyoung(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_ACCESSED); +} + +static inline pte_t pte_mkspecial(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_SPECIAL); +} + +static inline pte_t pte_mkhuge(pte_t pte) +{ + return pte; +} + +static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) +{ + return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot)); +} + +/* This low level function performs the actual PTE insertion + * Setting the PTE depends on the MMU type and other factors. It's + * an horrible mess that I'm not going to try to clean up now but + * I'm keeping it in one place rather than spread around + */ +static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte, int percpu) +{ + /* + * Anything else just stores the PTE normally. That covers all 64-bit + * cases, and 32-bit non-hash with 32-bit PTEs. + */ + *ptep = pte; +} + +/* + * Macro to mark a page protection value as "uncacheable". + */ + +#define _PAGE_CACHE_CTL (_PAGE_COHERENT | _PAGE_GUARDED | _PAGE_NO_CACHE | \ + _PAGE_WRITETHRU) + +#define pgprot_noncached pgprot_noncached +static inline pgprot_t pgprot_noncached(pgprot_t prot) +{ + return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | + _PAGE_NO_CACHE | _PAGE_GUARDED); +} + +#define pgprot_noncached_wc pgprot_noncached_wc +static inline pgprot_t pgprot_noncached_wc(pgprot_t prot) +{ + return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | + _PAGE_NO_CACHE); +} + +#define pgprot_cached pgprot_cached +static inline pgprot_t pgprot_cached(pgprot_t prot) +{ + return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | + _PAGE_COHERENT); +} + +#define pgprot_cached_wthru pgprot_cached_wthru +static inline pgprot_t pgprot_cached_wthru(pgprot_t prot) +{ + return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | + _PAGE_COHERENT | _PAGE_WRITETHRU); +} + +#define pgprot_cached_noncoherent pgprot_cached_noncoherent +static inline pgprot_t pgprot_cached_noncoherent(pgprot_t prot) +{ + return __pgprot(pgprot_val(prot) & ~_PAGE_CACHE_CTL); +} + +#define pgprot_writecombine pgprot_writecombine +static inline pgprot_t pgprot_writecombine(pgprot_t prot) +{ + return pgprot_noncached_wc(prot); +} + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +extern void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, unsigned long old_pmd); +#else +static inline void hpte_do_hugepage_flush(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp, + unsigned long old_pmd) +{ + WARN(1, "%s called with THP disabled\n", __func__); +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + +#endif /* !__ASSEMBLY__ */ +#endif /* __KERNEL__ */ +#endif /* _ASM_POWERPC_BOOK3S_64_HASH_H */ diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h new file mode 100644 index 000000000000..8204b0c393aa --- /dev/null +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -0,0 +1,298 @@ +#ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ +#define _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ +/* + * This file contains the functions and defines necessary to modify and use + * the ppc64 hashed page table. + */ + +#include <asm/book3s/64/hash.h> +#include <asm/barrier.h> + +/* + * The second half of the kernel virtual space is used for IO mappings, + * it's itself carved into the PIO region (ISA and PHB IO space) and + * the ioremap space + * + * ISA_IO_BASE = KERN_IO_START, 64K reserved area + * PHB_IO_BASE = ISA_IO_BASE + 64K to ISA_IO_BASE + 2G, PHB IO spaces + * IOREMAP_BASE = ISA_IO_BASE + 2G to VMALLOC_START + PGTABLE_RANGE + */ +#define KERN_IO_START (KERN_VIRT_START + (KERN_VIRT_SIZE >> 1)) +#define FULL_IO_SIZE 0x80000000ul +#define ISA_IO_BASE (KERN_IO_START) +#define ISA_IO_END (KERN_IO_START + 0x10000ul) +#define PHB_IO_BASE (ISA_IO_END) +#define PHB_IO_END (KERN_IO_START + FULL_IO_SIZE) +#define IOREMAP_BASE (PHB_IO_END) +#define IOREMAP_END (KERN_VIRT_START + KERN_VIRT_SIZE) + +#define vmemmap ((struct page *)VMEMMAP_BASE) + +/* Advertise special mapping type for AGP */ +#define HAVE_PAGE_AGP + +/* Advertise support for _PAGE_SPECIAL */ +#define __HAVE_ARCH_PTE_SPECIAL + +#ifndef __ASSEMBLY__ + +/* + * This is the default implementation of various PTE accessors, it's + * used in all cases except Book3S with 64K pages where we have a + * concept of sub-pages + */ +#ifndef __real_pte + +#ifdef CONFIG_STRICT_MM_TYPECHECKS +#define __real_pte(e,p) ((real_pte_t){(e)}) +#define __rpte_to_pte(r) ((r).pte) +#else +#define __real_pte(e,p) (e) +#define __rpte_to_pte(r) (__pte(r)) +#endif +#define __rpte_to_hidx(r,index) (pte_val(__rpte_to_pte(r)) >>_PAGE_F_GIX_SHIFT) + +#define pte_iterate_hashed_subpages(rpte, psize, va, index, shift) \ + do { \ + index = 0; \ + shift = mmu_psize_defs[psize].shift; \ + +#define pte_iterate_hashed_end() } while(0) + +/* + * We expect this to be called only for user addresses or kernel virtual + * addresses other than the linear mapping. + */ +#define pte_pagesize_index(mm, addr, pte) MMU_PAGE_4K + +#endif /* __real_pte */ + +static inline void pmd_set(pmd_t *pmdp, unsigned long val) +{ + *pmdp = __pmd(val); +} + +static inline void pmd_clear(pmd_t *pmdp) +{ + *pmdp = __pmd(0); +} + +#define pmd_none(pmd) (!pmd_val(pmd)) +#define pmd_present(pmd) (!pmd_none(pmd)) + +static inline void pud_set(pud_t *pudp, unsigned long val) +{ + *pudp = __pud(val); +} + +static inline void pud_clear(pud_t *pudp) +{ + *pudp = __pud(0); +} + +#define pud_none(pud) (!pud_val(pud)) +#define pud_present(pud) (pud_val(pud) != 0) + +extern struct page *pud_page(pud_t pud); +extern struct page *pmd_page(pmd_t pmd); +static inline pte_t pud_pte(pud_t pud) +{ + return __pte(pud_val(pud)); +} + +static inline pud_t pte_pud(pte_t pte) +{ + return __pud(pte_val(pte)); +} +#define pud_write(pud) pte_write(pud_pte(pud)) +#define pgd_write(pgd) pte_write(pgd_pte(pgd)) +static inline void pgd_set(pgd_t *pgdp, unsigned long val) +{ + *pgdp = __pgd(val); +} + +/* + * Find an entry in a page-table-directory. We combine the address region + * (the high order N bits) and the pgd portion of the address. + */ + +#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) + +#define pmd_offset(pudp,addr) \ + (((pmd_t *) pud_page_vaddr(*(pudp))) + pmd_index(addr)) + +#define pte_offset_kernel(dir,addr) \ + (((pte_t *) pmd_page_vaddr(*(dir))) + pte_index(addr)) + +#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) +#define pte_unmap(pte) do { } while(0) + +/* to find an entry in a kernel page-table-directory */ +/* This now only contains the vmalloc pages */ +#define pgd_offset_k(address) pgd_offset(&init_mm, address) + +#define pte_ERROR(e) \ + pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) +#define pmd_ERROR(e) \ + pr_err("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) +#define pgd_ERROR(e) \ + pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) + +/* Encode and de-code a swap entry */ +#define MAX_SWAPFILES_CHECK() do { \ + BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS); \ + /* \ + * Don't have overlapping bits with _PAGE_HPTEFLAGS \ + * We filter HPTEFLAGS on set_pte. \ + */ \ + BUILD_BUG_ON(_PAGE_HPTEFLAGS & (0x1f << _PAGE_BIT_SWAP_TYPE)); \ + BUILD_BUG_ON(_PAGE_HPTEFLAGS & _PAGE_SWP_SOFT_DIRTY); \ + } while (0) +/* + * on pte we don't need handle RADIX_TREE_EXCEPTIONAL_SHIFT; + */ +#define SWP_TYPE_BITS 5 +#define __swp_type(x) (((x).val >> _PAGE_BIT_SWAP_TYPE) \ + & ((1UL << SWP_TYPE_BITS) - 1)) +#define __swp_offset(x) ((x).val >> PTE_RPN_SHIFT) +#define __swp_entry(type, offset) ((swp_entry_t) { \ + ((type) << _PAGE_BIT_SWAP_TYPE) \ + | ((offset) << PTE_RPN_SHIFT) }) +/* + * swp_entry_t must be independent of pte bits. We build a swp_entry_t from + * swap type and offset we get from swap and convert that to pte to find a + * matching pte in linux page table. + * Clear bits not found in swap entries here. + */ +#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) & ~_PAGE_PTE }) +#define __swp_entry_to_pte(x) __pte((x).val | _PAGE_PTE) + +#ifdef CONFIG_MEM_SOFT_DIRTY +#define _PAGE_SWP_SOFT_DIRTY (1UL << (SWP_TYPE_BITS + _PAGE_BIT_SWAP_TYPE)) +#else +#define _PAGE_SWP_SOFT_DIRTY 0UL +#endif /* CONFIG_MEM_SOFT_DIRTY */ + +#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY +static inline pte_t pte_swp_mksoft_dirty(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_SWP_SOFT_DIRTY); +} +static inline bool pte_swp_soft_dirty(pte_t pte) +{ + return !!(pte_val(pte) & _PAGE_SWP_SOFT_DIRTY); +} +static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) +{ + return __pte(pte_val(pte) & ~_PAGE_SWP_SOFT_DIRTY); +} +#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */ + +void pgtable_cache_add(unsigned shift, void (*ctor)(void *)); +void pgtable_cache_init(void); + +struct page *realmode_pfn_to_page(unsigned long pfn); + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot); +extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot); +extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot); +extern void set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t pmd); +extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, + pmd_t *pmd); +extern int has_transparent_hugepage(void); +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + + +static inline pte_t pmd_pte(pmd_t pmd) +{ + return __pte(pmd_val(pmd)); +} + +static inline pmd_t pte_pmd(pte_t pte) +{ + return __pmd(pte_val(pte)); +} + +static inline pte_t *pmdp_ptep(pmd_t *pmd) +{ + return (pte_t *)pmd; +} + +#define pmd_pfn(pmd) pte_pfn(pmd_pte(pmd)) +#define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd)) +#define pmd_young(pmd) pte_young(pmd_pte(pmd)) +#define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd)) +#define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd))) +#define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd))) +#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) +#define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd))) +#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) +#define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd))) + +#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY +#define pmd_soft_dirty(pmd) pte_soft_dirty(pmd_pte(pmd)) +#define pmd_mksoft_dirty(pmd) pte_pmd(pte_mksoft_dirty(pmd_pte(pmd))) +#define pmd_clear_soft_dirty(pmd) pte_pmd(pte_clear_soft_dirty(pmd_pte(pmd))) +#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */ + +#ifdef CONFIG_NUMA_BALANCING +static inline int pmd_protnone(pmd_t pmd) +{ + return pte_protnone(pmd_pte(pmd)); +} +#endif /* CONFIG_NUMA_BALANCING */ + +#define __HAVE_ARCH_PMD_WRITE +#define pmd_write(pmd) pte_write(pmd_pte(pmd)) + +static inline pmd_t pmd_mkhuge(pmd_t pmd) +{ + return __pmd(pmd_val(pmd) | (_PAGE_PTE | _PAGE_THP_HUGE)); +} + +#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS +extern int pmdp_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp, + pmd_t entry, int dirty); + +#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG +extern int pmdp_test_and_clear_young(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp); +#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH +extern int pmdp_clear_flush_young(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp); + +#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR +extern pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp); + +extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp); +#define pmdp_collapse_flush pmdp_collapse_flush + +#define __HAVE_ARCH_PGTABLE_DEPOSIT +extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable); +#define __HAVE_ARCH_PGTABLE_WITHDRAW +extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); + +#define __HAVE_ARCH_PMDP_INVALIDATE +extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, + pmd_t *pmdp); + +#define pmd_move_must_withdraw pmd_move_must_withdraw +struct spinlock; +static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, + struct spinlock *old_pmd_ptl) +{ + /* + * Archs like ppc64 use pgtable to store per pmd + * specific information. So when we switch the pmd, + * we should also withdraw and deposit the pgtable + */ + return true; +} +#endif /* __ASSEMBLY__ */ +#endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */ diff --git a/arch/powerpc/include/asm/book3s/pgtable.h b/arch/powerpc/include/asm/book3s/pgtable.h new file mode 100644 index 000000000000..8b0f4a29259a --- /dev/null +++ b/arch/powerpc/include/asm/book3s/pgtable.h @@ -0,0 +1,29 @@ +#ifndef _ASM_POWERPC_BOOK3S_PGTABLE_H +#define _ASM_POWERPC_BOOK3S_PGTABLE_H + +#ifdef CONFIG_PPC64 +#include <asm/book3s/64/pgtable.h> +#else +#include <asm/book3s/32/pgtable.h> +#endif + +#define FIRST_USER_ADDRESS 0UL +#ifndef __ASSEMBLY__ +/* Insert a PTE, top-level function is out of line. It uses an inline + * low level function in the respective pgtable-* files + */ +extern void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, + pte_t pte); + + +#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS +extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, + pte_t *ptep, pte_t entry, int dirty); + +struct file; +extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t vma_prot); +#define __HAVE_PHYS_MEM_ACCESS_PROT + +#endif /* __ASSEMBLY__ */ +#endif diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h index ad6263cffb0f..d1a8d93cccfd 100644 --- a/arch/powerpc/include/asm/cmpxchg.h +++ b/arch/powerpc/include/asm/cmpxchg.h @@ -18,12 +18,12 @@ __xchg_u32(volatile void *p, unsigned long val) unsigned long prev; __asm__ __volatile__( - PPC_RELEASE_BARRIER + PPC_ATOMIC_ENTRY_BARRIER "1: lwarx %0,0,%2 \n" PPC405_ERR77(0,%2) " stwcx. %3,0,%2 \n\ bne- 1b" - PPC_ACQUIRE_BARRIER + PPC_ATOMIC_EXIT_BARRIER : "=&r" (prev), "+m" (*(volatile unsigned int *)p) : "r" (p), "r" (val) : "cc", "memory"); @@ -61,12 +61,12 @@ __xchg_u64(volatile void *p, unsigned long val) unsigned long prev; __asm__ __volatile__( - PPC_RELEASE_BARRIER + PPC_ATOMIC_ENTRY_BARRIER "1: ldarx %0,0,%2 \n" PPC405_ERR77(0,%2) " stdcx. %3,0,%2 \n\ bne- 1b" - PPC_ACQUIRE_BARRIER + PPC_ATOMIC_EXIT_BARRIER : "=&r" (prev), "+m" (*(volatile unsigned long *)p) : "r" (p), "r" (val) : "cc", "memory"); @@ -151,14 +151,14 @@ __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new) unsigned int prev; __asm__ __volatile__ ( - PPC_RELEASE_BARRIER + PPC_ATOMIC_ENTRY_BARRIER "1: lwarx %0,0,%2 # __cmpxchg_u32\n\ cmpw 0,%0,%3\n\ bne- 2f\n" PPC405_ERR77(0,%2) " stwcx. %4,0,%2\n\ bne- 1b" - PPC_ACQUIRE_BARRIER + PPC_ATOMIC_EXIT_BARRIER "\n\ 2:" : "=&r" (prev), "+m" (*p) @@ -197,13 +197,13 @@ __cmpxchg_u64(volatile unsigned long *p, unsigned long old, unsigned long new) unsigned long prev; __asm__ __volatile__ ( - PPC_RELEASE_BARRIER + PPC_ATOMIC_ENTRY_BARRIER "1: ldarx %0,0,%2 # __cmpxchg_u64\n\ cmpd 0,%0,%3\n\ bne- 2f\n\ stdcx. %4,0,%2\n\ bne- 1b" - PPC_ACQUIRE_BARRIER + PPC_ATOMIC_EXIT_BARRIER "\n\ 2:" : "=&r" (prev), "+m" (*p) diff --git a/arch/powerpc/include/asm/cpm.h b/arch/powerpc/include/asm/cpm.h index 4398a6cdcf53..2c5c5b476804 100644 --- a/arch/powerpc/include/asm/cpm.h +++ b/arch/powerpc/include/asm/cpm.h @@ -5,6 +5,7 @@ #include <linux/types.h> #include <linux/errno.h> #include <linux/of.h> +#include <soc/fsl/qe/qe.h> /* * SPI Parameter RAM common to QE and CPM. @@ -155,49 +156,6 @@ typedef struct cpm_buf_desc { */ #define BD_I2C_START (0x0400) -int cpm_muram_init(void); - -#if defined(CONFIG_CPM) || defined(CONFIG_QUICC_ENGINE) -unsigned long cpm_muram_alloc(unsigned long size, unsigned long align); -int cpm_muram_free(unsigned long offset); -unsigned long cpm_muram_alloc_fixed(unsigned long offset, unsigned long size); -void __iomem *cpm_muram_addr(unsigned long offset); -unsigned long cpm_muram_offset(void __iomem *addr); -dma_addr_t cpm_muram_dma(void __iomem *addr); -#else -static inline unsigned long cpm_muram_alloc(unsigned long size, - unsigned long align) -{ - return -ENOSYS; -} - -static inline int cpm_muram_free(unsigned long offset) -{ - return -ENOSYS; -} - -static inline unsigned long cpm_muram_alloc_fixed(unsigned long offset, - unsigned long size) -{ - return -ENOSYS; -} - -static inline void __iomem *cpm_muram_addr(unsigned long offset) -{ - return NULL; -} - -static inline unsigned long cpm_muram_offset(void __iomem *addr) -{ - return -ENOSYS; -} - -static inline dma_addr_t cpm_muram_dma(void __iomem *addr) -{ - return 0; -} -#endif /* defined(CONFIG_CPM) || defined(CONFIG_QUICC_ENGINE) */ - #ifdef CONFIG_CPM int cpm_command(u32 command, u8 opcode); #else diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 77f52b26dad6..93ae809fe5ea 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -130,15 +130,6 @@ BEGIN_FTR_SECTION_NESTED(941) \ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,941) /* - * Increase the priority on systems where PPR save/restore is not - * implemented/ supported. - */ -#define HMT_MEDIUM_PPR_DISCARD \ -BEGIN_FTR_SECTION_NESTED(942) \ - HMT_MEDIUM; \ -END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,0,942) /*non P7*/ - -/* * Get an SPR into a register if the CPU has the given feature */ #define OPT_GET_SPR(ra, spr, ftr) \ @@ -263,17 +254,6 @@ do_kvm_##n: \ #define KVM_HANDLER_SKIP(area, h, n) #endif -#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE -#define KVMTEST_PR(n) __KVMTEST(n) -#define KVM_HANDLER_PR(area, h, n) __KVM_HANDLER(area, h, n) -#define KVM_HANDLER_PR_SKIP(area, h, n) __KVM_HANDLER_SKIP(area, h, n) - -#else -#define KVMTEST_PR(n) -#define KVM_HANDLER_PR(area, h, n) -#define KVM_HANDLER_PR_SKIP(area, h, n) -#endif - #define NOTEST(n) /* @@ -353,27 +333,25 @@ do_kvm_##n: \ /* * Exception vectors. */ -#define STD_EXCEPTION_PSERIES(loc, vec, label) \ - . = loc; \ +#define STD_EXCEPTION_PSERIES(vec, label) \ + . = vec; \ .globl label##_pSeries; \ label##_pSeries: \ - HMT_MEDIUM_PPR_DISCARD; \ SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common, \ - EXC_STD, KVMTEST_PR, vec) + EXC_STD, KVMTEST, vec) /* Version of above for when we have to branch out-of-line */ #define STD_EXCEPTION_PSERIES_OOL(vec, label) \ .globl label##_pSeries; \ label##_pSeries: \ - EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_PR, vec); \ + EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST, vec); \ EXCEPTION_PROLOG_PSERIES_1(label##_common, EXC_STD) #define STD_EXCEPTION_HV(loc, vec, label) \ . = loc; \ .globl label##_hv; \ label##_hv: \ - HMT_MEDIUM_PPR_DISCARD; \ SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common, \ EXC_HV, KVMTEST, vec) @@ -389,7 +367,6 @@ label##_hv: \ . = loc; \ .globl label##_relon_pSeries; \ label##_relon_pSeries: \ - HMT_MEDIUM_PPR_DISCARD; \ /* No guest interrupts come through here */ \ SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label##_common, \ @@ -405,7 +382,6 @@ label##_relon_pSeries: \ . = loc; \ .globl label##_relon_hv; \ label##_relon_hv: \ - HMT_MEDIUM_PPR_DISCARD; \ /* No guest interrupts come through here */ \ SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label##_common, \ @@ -436,17 +412,13 @@ label##_relon_hv: \ #define _SOFTEN_TEST(h, vec) __SOFTEN_TEST(h, vec) #define SOFTEN_TEST_PR(vec) \ - KVMTEST_PR(vec); \ + KVMTEST(vec); \ _SOFTEN_TEST(EXC_STD, vec) #define SOFTEN_TEST_HV(vec) \ KVMTEST(vec); \ _SOFTEN_TEST(EXC_HV, vec) -#define SOFTEN_TEST_HV_201(vec) \ - KVMTEST(vec); \ - _SOFTEN_TEST(EXC_STD, vec) - #define SOFTEN_NOTEST_PR(vec) _SOFTEN_TEST(EXC_STD, vec) #define SOFTEN_NOTEST_HV(vec) _SOFTEN_TEST(EXC_HV, vec) @@ -463,7 +435,6 @@ label##_relon_hv: \ . = loc; \ .globl label##_pSeries; \ label##_pSeries: \ - HMT_MEDIUM_PPR_DISCARD; \ _MASKABLE_EXCEPTION_PSERIES(vec, label, \ EXC_STD, SOFTEN_TEST_PR) @@ -481,7 +452,6 @@ label##_hv: \ EXCEPTION_PROLOG_PSERIES_1(label##_common, EXC_HV); #define __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra) \ - HMT_MEDIUM_PPR_DISCARD; \ SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_PROLOG_0(PACA_EXGEN); \ __EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec); \ diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index e05808a328db..b0629249778b 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -47,12 +47,10 @@ #define FW_FEATURE_VPHN ASM_CONST(0x0000000004000000) #define FW_FEATURE_XCMO ASM_CONST(0x0000000008000000) #define FW_FEATURE_OPAL ASM_CONST(0x0000000010000000) -#define FW_FEATURE_OPALv2 ASM_CONST(0x0000000020000000) #define FW_FEATURE_SET_MODE ASM_CONST(0x0000000040000000) #define FW_FEATURE_BEST_ENERGY ASM_CONST(0x0000000080000000) #define FW_FEATURE_TYPE1_AFFINITY ASM_CONST(0x0000000100000000) #define FW_FEATURE_PRRN ASM_CONST(0x0000000200000000) -#define FW_FEATURE_OPALv3 ASM_CONST(0x0000000400000000) #ifndef __ASSEMBLY__ @@ -70,8 +68,7 @@ enum { FW_FEATURE_SET_MODE | FW_FEATURE_BEST_ENERGY | FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN, FW_FEATURE_PSERIES_ALWAYS = 0, - FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL | FW_FEATURE_OPALv2 | - FW_FEATURE_OPALv3, + FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL, FW_FEATURE_POWERNV_ALWAYS = 0, FW_FEATURE_PS3_POSSIBLE = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1, FW_FEATURE_PS3_ALWAYS = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1, diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 85bc8c0d257b..e3b54dd4f730 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -258,11 +258,16 @@ #define H_DEL_CONN 0x288 #define H_JOIN 0x298 #define H_VASI_STATE 0x2A4 +#define H_VIOCTL 0x2A8 #define H_ENABLE_CRQ 0x2B0 #define H_GET_EM_PARMS 0x2B8 #define H_SET_MPP 0x2D0 #define H_GET_MPP 0x2D4 +#define H_REG_SUB_CRQ 0x2DC #define H_HOME_NODE_ASSOCIATIVITY 0x2EC +#define H_FREE_SUB_CRQ 0x2E0 +#define H_SEND_SUB_CRQ 0x2E4 +#define H_SEND_SUB_CRQ_INDIRECT 0x2E8 #define H_BEST_ENERGY 0x2F4 #define H_XIRR_X 0x2FC #define H_RANDOM 0x300 @@ -271,6 +276,21 @@ #define H_SET_MODE 0x31C #define MAX_HCALL_OPCODE H_SET_MODE +/* H_VIOCTL functions */ +#define H_GET_VIOA_DUMP_SIZE 0x01 +#define H_GET_VIOA_DUMP 0x02 +#define H_GET_ILLAN_NUM_VLAN_IDS 0x03 +#define H_GET_ILLAN_VLAN_ID_LIST 0x04 +#define H_GET_ILLAN_SWITCH_ID 0x05 +#define H_DISABLE_MIGRATION 0x06 +#define H_ENABLE_MIGRATION 0x07 +#define H_GET_PARTNER_INFO 0x08 +#define H_GET_PARTNER_WWPN_LIST 0x09 +#define H_DISABLE_ALL_VIO_INTS 0x0A +#define H_DISABLE_VIO_INTERRUPT 0x0B +#define H_ENABLE_VIO_INTERRUPT 0x0C + + /* Platform specific hcalls, used by KVM */ #define H_RTAS 0xf000 diff --git a/arch/powerpc/include/asm/icswx.h b/arch/powerpc/include/asm/icswx.h index 9f8402b35115..27e588f6c72e 100644 --- a/arch/powerpc/include/asm/icswx.h +++ b/arch/powerpc/include/asm/icswx.h @@ -164,6 +164,7 @@ struct coprocessor_request_block { #define ICSWX_INITIATED (0x8) #define ICSWX_BUSY (0x4) #define ICSWX_REJECTED (0x2) +#define ICSWX_XERS0 (0x1) /* undefined or set from XERSO. */ static inline int icswx(__be32 ccw, struct coprocessor_request_block *crb) { diff --git a/arch/powerpc/include/asm/immap_qe.h b/arch/powerpc/include/asm/immap_qe.h deleted file mode 100644 index bedbff891423..000000000000 --- a/arch/powerpc/include/asm/immap_qe.h +++ /dev/null @@ -1,491 +0,0 @@ -/* - * QUICC Engine (QE) Internal Memory Map. - * The Internal Memory Map for devices with QE on them. This - * is the superset of all QE devices (8360, etc.). - - * Copyright (C) 2006. Freescale Semiconductor, Inc. All rights reserved. - * - * Authors: Shlomi Gridish <gridish@freescale.com> - * Li Yang <leoli@freescale.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ -#ifndef _ASM_POWERPC_IMMAP_QE_H -#define _ASM_POWERPC_IMMAP_QE_H -#ifdef __KERNEL__ - -#include <linux/kernel.h> -#include <asm/io.h> - -#define QE_IMMAP_SIZE (1024 * 1024) /* 1MB from 1MB+IMMR */ - -/* QE I-RAM */ -struct qe_iram { - __be32 iadd; /* I-RAM Address Register */ - __be32 idata; /* I-RAM Data Register */ - u8 res0[0x04]; - __be32 iready; /* I-RAM Ready Register */ - u8 res1[0x70]; -} __attribute__ ((packed)); - -/* QE Interrupt Controller */ -struct qe_ic_regs { - __be32 qicr; - __be32 qivec; - __be32 qripnr; - __be32 qipnr; - __be32 qipxcc; - __be32 qipycc; - __be32 qipwcc; - __be32 qipzcc; - __be32 qimr; - __be32 qrimr; - __be32 qicnr; - u8 res0[0x4]; - __be32 qiprta; - __be32 qiprtb; - u8 res1[0x4]; - __be32 qricr; - u8 res2[0x20]; - __be32 qhivec; - u8 res3[0x1C]; -} __attribute__ ((packed)); - -/* Communications Processor */ -struct cp_qe { - __be32 cecr; /* QE command register */ - __be32 ceccr; /* QE controller configuration register */ - __be32 cecdr; /* QE command data register */ - u8 res0[0xA]; - __be16 ceter; /* QE timer event register */ - u8 res1[0x2]; - __be16 cetmr; /* QE timers mask register */ - __be32 cetscr; /* QE time-stamp timer control register */ - __be32 cetsr1; /* QE time-stamp register 1 */ - __be32 cetsr2; /* QE time-stamp register 2 */ - u8 res2[0x8]; - __be32 cevter; /* QE virtual tasks event register */ - __be32 cevtmr; /* QE virtual tasks mask register */ - __be16 cercr; /* QE RAM control register */ - u8 res3[0x2]; - u8 res4[0x24]; - __be16 ceexe1; /* QE external request 1 event register */ - u8 res5[0x2]; - __be16 ceexm1; /* QE external request 1 mask register */ - u8 res6[0x2]; - __be16 ceexe2; /* QE external request 2 event register */ - u8 res7[0x2]; - __be16 ceexm2; /* QE external request 2 mask register */ - u8 res8[0x2]; - __be16 ceexe3; /* QE external request 3 event register */ - u8 res9[0x2]; - __be16 ceexm3; /* QE external request 3 mask register */ - u8 res10[0x2]; - __be16 ceexe4; /* QE external request 4 event register */ - u8 res11[0x2]; - __be16 ceexm4; /* QE external request 4 mask register */ - u8 res12[0x3A]; - __be32 ceurnr; /* QE microcode revision number register */ - u8 res13[0x244]; -} __attribute__ ((packed)); - -/* QE Multiplexer */ -struct qe_mux { - __be32 cmxgcr; /* CMX general clock route register */ - __be32 cmxsi1cr_l; /* CMX SI1 clock route low register */ - __be32 cmxsi1cr_h; /* CMX SI1 clock route high register */ - __be32 cmxsi1syr; /* CMX SI1 SYNC route register */ - __be32 cmxucr[4]; /* CMX UCCx clock route registers */ - __be32 cmxupcr; /* CMX UPC clock route register */ - u8 res0[0x1C]; -} __attribute__ ((packed)); - -/* QE Timers */ -struct qe_timers { - u8 gtcfr1; /* Timer 1 and Timer 2 global config register*/ - u8 res0[0x3]; - u8 gtcfr2; /* Timer 3 and timer 4 global config register*/ - u8 res1[0xB]; - __be16 gtmdr1; /* Timer 1 mode register */ - __be16 gtmdr2; /* Timer 2 mode register */ - __be16 gtrfr1; /* Timer 1 reference register */ - __be16 gtrfr2; /* Timer 2 reference register */ - __be16 gtcpr1; /* Timer 1 capture register */ - __be16 gtcpr2; /* Timer 2 capture register */ - __be16 gtcnr1; /* Timer 1 counter */ - __be16 gtcnr2; /* Timer 2 counter */ - __be16 gtmdr3; /* Timer 3 mode register */ - __be16 gtmdr4; /* Timer 4 mode register */ - __be16 gtrfr3; /* Timer 3 reference register */ - __be16 gtrfr4; /* Timer 4 reference register */ - __be16 gtcpr3; /* Timer 3 capture register */ - __be16 gtcpr4; /* Timer 4 capture register */ - __be16 gtcnr3; /* Timer 3 counter */ - __be16 gtcnr4; /* Timer 4 counter */ - __be16 gtevr1; /* Timer 1 event register */ - __be16 gtevr2; /* Timer 2 event register */ - __be16 gtevr3; /* Timer 3 event register */ - __be16 gtevr4; /* Timer 4 event register */ - __be16 gtps; /* Timer 1 prescale register */ - u8 res2[0x46]; -} __attribute__ ((packed)); - -/* BRG */ -struct qe_brg { - __be32 brgc[16]; /* BRG configuration registers */ - u8 res0[0x40]; -} __attribute__ ((packed)); - -/* SPI */ -struct spi { - u8 res0[0x20]; - __be32 spmode; /* SPI mode register */ - u8 res1[0x2]; - u8 spie; /* SPI event register */ - u8 res2[0x1]; - u8 res3[0x2]; - u8 spim; /* SPI mask register */ - u8 res4[0x1]; - u8 res5[0x1]; - u8 spcom; /* SPI command register */ - u8 res6[0x2]; - __be32 spitd; /* SPI transmit data register (cpu mode) */ - __be32 spird; /* SPI receive data register (cpu mode) */ - u8 res7[0x8]; -} __attribute__ ((packed)); - -/* SI */ -struct si1 { - __be16 siamr1; /* SI1 TDMA mode register */ - __be16 sibmr1; /* SI1 TDMB mode register */ - __be16 sicmr1; /* SI1 TDMC mode register */ - __be16 sidmr1; /* SI1 TDMD mode register */ - u8 siglmr1_h; /* SI1 global mode register high */ - u8 res0[0x1]; - u8 sicmdr1_h; /* SI1 command register high */ - u8 res2[0x1]; - u8 sistr1_h; /* SI1 status register high */ - u8 res3[0x1]; - __be16 sirsr1_h; /* SI1 RAM shadow address register high */ - u8 sitarc1; /* SI1 RAM counter Tx TDMA */ - u8 sitbrc1; /* SI1 RAM counter Tx TDMB */ - u8 sitcrc1; /* SI1 RAM counter Tx TDMC */ - u8 sitdrc1; /* SI1 RAM counter Tx TDMD */ - u8 sirarc1; /* SI1 RAM counter Rx TDMA */ - u8 sirbrc1; /* SI1 RAM counter Rx TDMB */ - u8 sircrc1; /* SI1 RAM counter Rx TDMC */ - u8 sirdrc1; /* SI1 RAM counter Rx TDMD */ - u8 res4[0x8]; - __be16 siemr1; /* SI1 TDME mode register 16 bits */ - __be16 sifmr1; /* SI1 TDMF mode register 16 bits */ - __be16 sigmr1; /* SI1 TDMG mode register 16 bits */ - __be16 sihmr1; /* SI1 TDMH mode register 16 bits */ - u8 siglmg1_l; /* SI1 global mode register low 8 bits */ - u8 res5[0x1]; - u8 sicmdr1_l; /* SI1 command register low 8 bits */ - u8 res6[0x1]; - u8 sistr1_l; /* SI1 status register low 8 bits */ - u8 res7[0x1]; - __be16 sirsr1_l; /* SI1 RAM shadow address register low 16 bits*/ - u8 siterc1; /* SI1 RAM counter Tx TDME 8 bits */ - u8 sitfrc1; /* SI1 RAM counter Tx TDMF 8 bits */ - u8 sitgrc1; /* SI1 RAM counter Tx TDMG 8 bits */ - u8 sithrc1; /* SI1 RAM counter Tx TDMH 8 bits */ - u8 sirerc1; /* SI1 RAM counter Rx TDME 8 bits */ - u8 sirfrc1; /* SI1 RAM counter Rx TDMF 8 bits */ - u8 sirgrc1; /* SI1 RAM counter Rx TDMG 8 bits */ - u8 sirhrc1; /* SI1 RAM counter Rx TDMH 8 bits */ - u8 res8[0x8]; - __be32 siml1; /* SI1 multiframe limit register */ - u8 siedm1; /* SI1 extended diagnostic mode register */ - u8 res9[0xBB]; -} __attribute__ ((packed)); - -/* SI Routing Tables */ -struct sir { - u8 tx[0x400]; - u8 rx[0x400]; - u8 res0[0x800]; -} __attribute__ ((packed)); - -/* USB Controller */ -struct qe_usb_ctlr { - u8 usb_usmod; - u8 usb_usadr; - u8 usb_uscom; - u8 res1[1]; - __be16 usb_usep[4]; - u8 res2[4]; - __be16 usb_usber; - u8 res3[2]; - __be16 usb_usbmr; - u8 res4[1]; - u8 usb_usbs; - __be16 usb_ussft; - u8 res5[2]; - __be16 usb_usfrn; - u8 res6[0x22]; -} __attribute__ ((packed)); - -/* MCC */ -struct qe_mcc { - __be32 mcce; /* MCC event register */ - __be32 mccm; /* MCC mask register */ - __be32 mccf; /* MCC configuration register */ - __be32 merl; /* MCC emergency request level register */ - u8 res0[0xF0]; -} __attribute__ ((packed)); - -/* QE UCC Slow */ -struct ucc_slow { - __be32 gumr_l; /* UCCx general mode register (low) */ - __be32 gumr_h; /* UCCx general mode register (high) */ - __be16 upsmr; /* UCCx protocol-specific mode register */ - u8 res0[0x2]; - __be16 utodr; /* UCCx transmit on demand register */ - __be16 udsr; /* UCCx data synchronization register */ - __be16 ucce; /* UCCx event register */ - u8 res1[0x2]; - __be16 uccm; /* UCCx mask register */ - u8 res2[0x1]; - u8 uccs; /* UCCx status register */ - u8 res3[0x24]; - __be16 utpt; - u8 res4[0x52]; - u8 guemr; /* UCC general extended mode register */ -} __attribute__ ((packed)); - -/* QE UCC Fast */ -struct ucc_fast { - __be32 gumr; /* UCCx general mode register */ - __be32 upsmr; /* UCCx protocol-specific mode register */ - __be16 utodr; /* UCCx transmit on demand register */ - u8 res0[0x2]; - __be16 udsr; /* UCCx data synchronization register */ - u8 res1[0x2]; - __be32 ucce; /* UCCx event register */ - __be32 uccm; /* UCCx mask register */ - u8 uccs; /* UCCx status register */ - u8 res2[0x7]; - __be32 urfb; /* UCC receive FIFO base */ - __be16 urfs; /* UCC receive FIFO size */ - u8 res3[0x2]; - __be16 urfet; /* UCC receive FIFO emergency threshold */ - __be16 urfset; /* UCC receive FIFO special emergency - threshold */ - __be32 utfb; /* UCC transmit FIFO base */ - __be16 utfs; /* UCC transmit FIFO size */ - u8 res4[0x2]; - __be16 utfet; /* UCC transmit FIFO emergency threshold */ - u8 res5[0x2]; - __be16 utftt; /* UCC transmit FIFO transmit threshold */ - u8 res6[0x2]; - __be16 utpt; /* UCC transmit polling timer */ - u8 res7[0x2]; - __be32 urtry; /* UCC retry counter register */ - u8 res8[0x4C]; - u8 guemr; /* UCC general extended mode register */ -} __attribute__ ((packed)); - -struct ucc { - union { - struct ucc_slow slow; - struct ucc_fast fast; - u8 res[0x200]; /* UCC blocks are 512 bytes each */ - }; -} __attribute__ ((packed)); - -/* MultiPHY UTOPIA POS Controllers (UPC) */ -struct upc { - __be32 upgcr; /* UTOPIA/POS general configuration register */ - __be32 uplpa; /* UTOPIA/POS last PHY address */ - __be32 uphec; /* ATM HEC register */ - __be32 upuc; /* UTOPIA/POS UCC configuration */ - __be32 updc1; /* UTOPIA/POS device 1 configuration */ - __be32 updc2; /* UTOPIA/POS device 2 configuration */ - __be32 updc3; /* UTOPIA/POS device 3 configuration */ - __be32 updc4; /* UTOPIA/POS device 4 configuration */ - __be32 upstpa; /* UTOPIA/POS STPA threshold */ - u8 res0[0xC]; - __be32 updrs1_h; /* UTOPIA/POS device 1 rate select */ - __be32 updrs1_l; /* UTOPIA/POS device 1 rate select */ - __be32 updrs2_h; /* UTOPIA/POS device 2 rate select */ - __be32 updrs2_l; /* UTOPIA/POS device 2 rate select */ - __be32 updrs3_h; /* UTOPIA/POS device 3 rate select */ - __be32 updrs3_l; /* UTOPIA/POS device 3 rate select */ - __be32 updrs4_h; /* UTOPIA/POS device 4 rate select */ - __be32 updrs4_l; /* UTOPIA/POS device 4 rate select */ - __be32 updrp1; /* UTOPIA/POS device 1 receive priority low */ - __be32 updrp2; /* UTOPIA/POS device 2 receive priority low */ - __be32 updrp3; /* UTOPIA/POS device 3 receive priority low */ - __be32 updrp4; /* UTOPIA/POS device 4 receive priority low */ - __be32 upde1; /* UTOPIA/POS device 1 event */ - __be32 upde2; /* UTOPIA/POS device 2 event */ - __be32 upde3; /* UTOPIA/POS device 3 event */ - __be32 upde4; /* UTOPIA/POS device 4 event */ - __be16 uprp1; - __be16 uprp2; - __be16 uprp3; - __be16 uprp4; - u8 res1[0x8]; - __be16 uptirr1_0; /* Device 1 transmit internal rate 0 */ - __be16 uptirr1_1; /* Device 1 transmit internal rate 1 */ - __be16 uptirr1_2; /* Device 1 transmit internal rate 2 */ - __be16 uptirr1_3; /* Device 1 transmit internal rate 3 */ - __be16 uptirr2_0; /* Device 2 transmit internal rate 0 */ - __be16 uptirr2_1; /* Device 2 transmit internal rate 1 */ - __be16 uptirr2_2; /* Device 2 transmit internal rate 2 */ - __be16 uptirr2_3; /* Device 2 transmit internal rate 3 */ - __be16 uptirr3_0; /* Device 3 transmit internal rate 0 */ - __be16 uptirr3_1; /* Device 3 transmit internal rate 1 */ - __be16 uptirr3_2; /* Device 3 transmit internal rate 2 */ - __be16 uptirr3_3; /* Device 3 transmit internal rate 3 */ - __be16 uptirr4_0; /* Device 4 transmit internal rate 0 */ - __be16 uptirr4_1; /* Device 4 transmit internal rate 1 */ - __be16 uptirr4_2; /* Device 4 transmit internal rate 2 */ - __be16 uptirr4_3; /* Device 4 transmit internal rate 3 */ - __be32 uper1; /* Device 1 port enable register */ - __be32 uper2; /* Device 2 port enable register */ - __be32 uper3; /* Device 3 port enable register */ - __be32 uper4; /* Device 4 port enable register */ - u8 res2[0x150]; -} __attribute__ ((packed)); - -/* SDMA */ -struct sdma { - __be32 sdsr; /* Serial DMA status register */ - __be32 sdmr; /* Serial DMA mode register */ - __be32 sdtr1; /* SDMA system bus threshold register */ - __be32 sdtr2; /* SDMA secondary bus threshold register */ - __be32 sdhy1; /* SDMA system bus hysteresis register */ - __be32 sdhy2; /* SDMA secondary bus hysteresis register */ - __be32 sdta1; /* SDMA system bus address register */ - __be32 sdta2; /* SDMA secondary bus address register */ - __be32 sdtm1; /* SDMA system bus MSNUM register */ - __be32 sdtm2; /* SDMA secondary bus MSNUM register */ - u8 res0[0x10]; - __be32 sdaqr; /* SDMA address bus qualify register */ - __be32 sdaqmr; /* SDMA address bus qualify mask register */ - u8 res1[0x4]; - __be32 sdebcr; /* SDMA CAM entries base register */ - u8 res2[0x38]; -} __attribute__ ((packed)); - -/* Debug Space */ -struct dbg { - __be32 bpdcr; /* Breakpoint debug command register */ - __be32 bpdsr; /* Breakpoint debug status register */ - __be32 bpdmr; /* Breakpoint debug mask register */ - __be32 bprmrr0; /* Breakpoint request mode risc register 0 */ - __be32 bprmrr1; /* Breakpoint request mode risc register 1 */ - u8 res0[0x8]; - __be32 bprmtr0; /* Breakpoint request mode trb register 0 */ - __be32 bprmtr1; /* Breakpoint request mode trb register 1 */ - u8 res1[0x8]; - __be32 bprmir; /* Breakpoint request mode immediate register */ - __be32 bprmsr; /* Breakpoint request mode serial register */ - __be32 bpemr; /* Breakpoint exit mode register */ - u8 res2[0x48]; -} __attribute__ ((packed)); - -/* - * RISC Special Registers (Trap and Breakpoint). These are described in - * the QE Developer's Handbook. - */ -struct rsp { - __be32 tibcr[16]; /* Trap/instruction breakpoint control regs */ - u8 res0[64]; - __be32 ibcr0; - __be32 ibs0; - __be32 ibcnr0; - u8 res1[4]; - __be32 ibcr1; - __be32 ibs1; - __be32 ibcnr1; - __be32 npcr; - __be32 dbcr; - __be32 dbar; - __be32 dbamr; - __be32 dbsr; - __be32 dbcnr; - u8 res2[12]; - __be32 dbdr_h; - __be32 dbdr_l; - __be32 dbdmr_h; - __be32 dbdmr_l; - __be32 bsr; - __be32 bor; - __be32 bior; - u8 res3[4]; - __be32 iatr[4]; - __be32 eccr; /* Exception control configuration register */ - __be32 eicr; - u8 res4[0x100-0xf8]; -} __attribute__ ((packed)); - -struct qe_immap { - struct qe_iram iram; /* I-RAM */ - struct qe_ic_regs ic; /* Interrupt Controller */ - struct cp_qe cp; /* Communications Processor */ - struct qe_mux qmx; /* QE Multiplexer */ - struct qe_timers qet; /* QE Timers */ - struct spi spi[0x2]; /* spi */ - struct qe_mcc mcc; /* mcc */ - struct qe_brg brg; /* brg */ - struct qe_usb_ctlr usb; /* USB */ - struct si1 si1; /* SI */ - u8 res11[0x800]; - struct sir sir; /* SI Routing Tables */ - struct ucc ucc1; /* ucc1 */ - struct ucc ucc3; /* ucc3 */ - struct ucc ucc5; /* ucc5 */ - struct ucc ucc7; /* ucc7 */ - u8 res12[0x600]; - struct upc upc1; /* MultiPHY UTOPIA POS Ctrlr 1*/ - struct ucc ucc2; /* ucc2 */ - struct ucc ucc4; /* ucc4 */ - struct ucc ucc6; /* ucc6 */ - struct ucc ucc8; /* ucc8 */ - u8 res13[0x600]; - struct upc upc2; /* MultiPHY UTOPIA POS Ctrlr 2*/ - struct sdma sdma; /* SDMA */ - struct dbg dbg; /* 0x104080 - 0x1040FF - Debug Space */ - struct rsp rsp[0x2]; /* 0x104100 - 0x1042FF - RISC Special Registers - (Trap and Breakpoint) */ - u8 res14[0x300]; /* 0x104300 - 0x1045FF */ - u8 res15[0x3A00]; /* 0x104600 - 0x107FFF */ - u8 res16[0x8000]; /* 0x108000 - 0x110000 */ - u8 muram[0xC000]; /* 0x110000 - 0x11C000 - Multi-user RAM */ - u8 res17[0x24000]; /* 0x11C000 - 0x140000 */ - u8 res18[0xC0000]; /* 0x140000 - 0x200000 */ -} __attribute__ ((packed)); - -extern struct qe_immap __iomem *qe_immr; -extern phys_addr_t get_qe_base(void); - -/* - * Returns the offset within the QE address space of the given pointer. - * - * Note that the QE does not support 36-bit physical addresses, so if - * get_qe_base() returns a number above 4GB, the caller will probably fail. - */ -static inline phys_addr_t immrbar_virt_to_phys(void *address) -{ - void *q = (void *)qe_immr; - - /* Is it a MURAM address? */ - if ((address >= q) && (address < (q + QE_IMMAP_SIZE))) - return get_qe_base() + (address - q); - - /* It's an address returned by kmalloc */ - return virt_to_phys(address); -} - -#endif /* __KERNEL__ */ -#endif /* _ASM_POWERPC_IMMAP_QE_H */ diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 5879fde56f3c..6c1297ec374c 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -385,6 +385,17 @@ static inline void __raw_writeq(unsigned long v, volatile void __iomem *addr) { *(volatile unsigned long __force *)PCI_FIX_ADDR(addr) = v; } + +/* + * Real mode version of the above. stdcix is only supposed to be used + * in hypervisor real mode as per the architecture spec. + */ +static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr) +{ + __asm__ __volatile__("stdcix %0,0,%1" + : : "r" (val), "r" (paddr) : "memory"); +} + #endif /* __powerpc64__ */ /* diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 9fac01cb89c1..8f39796c9da8 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -154,8 +154,8 @@ extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, bool upper, u32 val); extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu); -extern pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa, bool writing, - bool *writable); +extern kvm_pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa, + bool writing, bool *writable); extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, unsigned long *rmap, long pte_index, int realmode); extern void kvmppc_update_rmap_change(unsigned long *rmap, unsigned long psize); diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index cfa758c6b4f6..271fefbbe521 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -50,6 +50,10 @@ #define KVM_NR_IRQCHIPS 1 #define KVM_IRQCHIP_NUM_PINS 256 +/* PPC-specific vcpu->requests bit members */ +#define KVM_REQ_WATCHDOG 8 +#define KVM_REQ_EPR_EXIT 9 + #include <linux/mmu_notifier.h> #define KVM_ARCH_WANT_MMU_NOTIFIER diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index c6ef05bd0765..2241d5357129 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -515,7 +515,7 @@ void kvmppc_claim_lpid(long lpid); void kvmppc_free_lpid(long lpid); void kvmppc_init_lpid(unsigned long nr_lpids); -static inline void kvmppc_mmu_flush_icache(pfn_t pfn) +static inline void kvmppc_mmu_flush_icache(kvm_pfn_t pfn) { struct page *page; /* diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h index ba3342bbdbda..7352d3f212df 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/mmu-hash64.h @@ -21,7 +21,7 @@ * need for various slices related matters. Note that this isn't the * complete pgtable.h but only a portion of it. */ -#include <asm/pgtable-ppc64.h> +#include <asm/book3s/64/pgtable.h> #include <asm/bug.h> #include <asm/processor.h> diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 9c326565d498..c82cbf52d19e 100644 --- a/arch/powerpc/include/asm/pgtable-ppc32.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -1,5 +1,5 @@ -#ifndef _ASM_POWERPC_PGTABLE_PPC32_H -#define _ASM_POWERPC_PGTABLE_PPC32_H +#ifndef _ASM_POWERPC_NOHASH_32_PGTABLE_H +#define _ASM_POWERPC_NOHASH_32_PGTABLE_H #include <asm-generic/pgtable-nopmd.h> @@ -106,17 +106,15 @@ extern int icache_44x_need_flush; */ #if defined(CONFIG_40x) -#include <asm/pte-40x.h> +#include <asm/nohash/32/pte-40x.h> #elif defined(CONFIG_44x) -#include <asm/pte-44x.h> +#include <asm/nohash/32/pte-44x.h> #elif defined(CONFIG_FSL_BOOKE) && defined(CONFIG_PTE_64BIT) -#include <asm/pte-book3e.h> +#include <asm/nohash/pte-book3e.h> #elif defined(CONFIG_FSL_BOOKE) -#include <asm/pte-fsl-booke.h> +#include <asm/nohash/32/pte-fsl-booke.h> #elif defined(CONFIG_8xx) -#include <asm/pte-8xx.h> -#else /* CONFIG_6xx */ -#include <asm/pte-hash32.h> +#include <asm/nohash/32/pte-8xx.h> #endif /* And here we include common definitions */ @@ -130,7 +128,12 @@ extern int icache_44x_need_flush; #define pmd_none(pmd) (!pmd_val(pmd)) #define pmd_bad(pmd) (pmd_val(pmd) & _PMD_BAD) #define pmd_present(pmd) (pmd_val(pmd) & _PMD_PRESENT_MASK) -#define pmd_clear(pmdp) do { pmd_val(*(pmdp)) = 0; } while (0) +static inline void pmd_clear(pmd_t *pmdp) +{ + *pmdp = __pmd(0); +} + + /* * When flushing the tlb entry for a page, we also need to flush the hash @@ -337,4 +340,4 @@ extern int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep, #endif /* !__ASSEMBLY__ */ -#endif /* _ASM_POWERPC_PGTABLE_PPC32_H */ +#endif /* __ASM_POWERPC_NOHASH_32_PGTABLE_H */ diff --git a/arch/powerpc/include/asm/pte-40x.h b/arch/powerpc/include/asm/nohash/32/pte-40x.h index 486b1ef81338..9624ebdacc47 100644 --- a/arch/powerpc/include/asm/pte-40x.h +++ b/arch/powerpc/include/asm/nohash/32/pte-40x.h @@ -1,5 +1,5 @@ -#ifndef _ASM_POWERPC_PTE_40x_H -#define _ASM_POWERPC_PTE_40x_H +#ifndef _ASM_POWERPC_NOHASH_32_PTE_40x_H +#define _ASM_POWERPC_NOHASH_32_PTE_40x_H #ifdef __KERNEL__ /* @@ -61,4 +61,4 @@ #define PTE_ATOMIC_UPDATES 1 #endif /* __KERNEL__ */ -#endif /* _ASM_POWERPC_PTE_40x_H */ +#endif /* _ASM_POWERPC_NOHASH_32_PTE_40x_H */ diff --git a/arch/powerpc/include/asm/pte-44x.h b/arch/powerpc/include/asm/nohash/32/pte-44x.h index 36f75fab23f5..fdab41c654ef 100644 --- a/arch/powerpc/include/asm/pte-44x.h +++ b/arch/powerpc/include/asm/nohash/32/pte-44x.h @@ -1,5 +1,5 @@ -#ifndef _ASM_POWERPC_PTE_44x_H -#define _ASM_POWERPC_PTE_44x_H +#ifndef _ASM_POWERPC_NOHASH_32_PTE_44x_H +#define _ASM_POWERPC_NOHASH_32_PTE_44x_H #ifdef __KERNEL__ /* @@ -94,4 +94,4 @@ #endif /* __KERNEL__ */ -#endif /* _ASM_POWERPC_PTE_44x_H */ +#endif /* _ASM_POWERPC_NOHASH_32_PTE_44x_H */ diff --git a/arch/powerpc/include/asm/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h index a0e2ba960976..3742b1919661 100644 --- a/arch/powerpc/include/asm/pte-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h @@ -1,5 +1,5 @@ -#ifndef _ASM_POWERPC_PTE_8xx_H -#define _ASM_POWERPC_PTE_8xx_H +#ifndef _ASM_POWERPC_NOHASH_32_PTE_8xx_H +#define _ASM_POWERPC_NOHASH_32_PTE_8xx_H #ifdef __KERNEL__ /* @@ -62,4 +62,4 @@ _PAGE_HWWRITE | _PAGE_EXEC) #endif /* __KERNEL__ */ -#endif /* _ASM_POWERPC_PTE_8xx_H */ +#endif /* _ASM_POWERPC_NOHASH_32_PTE_8xx_H */ diff --git a/arch/powerpc/include/asm/pte-fsl-booke.h b/arch/powerpc/include/asm/nohash/32/pte-fsl-booke.h index 9f5c3d04a1a3..5422d00c6145 100644 --- a/arch/powerpc/include/asm/pte-fsl-booke.h +++ b/arch/powerpc/include/asm/nohash/32/pte-fsl-booke.h @@ -1,5 +1,5 @@ -#ifndef _ASM_POWERPC_PTE_FSL_BOOKE_H -#define _ASM_POWERPC_PTE_FSL_BOOKE_H +#ifndef _ASM_POWERPC_NOHASH_32_PTE_FSL_BOOKE_H +#define _ASM_POWERPC_NOHASH_32_PTE_FSL_BOOKE_H #ifdef __KERNEL__ /* PTE bit definitions for Freescale BookE SW loaded TLB MMU based @@ -37,4 +37,4 @@ #define PTE_WIMGE_SHIFT (6) #endif /* __KERNEL__ */ -#endif /* _ASM_POWERPC_PTE_FSL_BOOKE_H */ +#endif /* _ASM_POWERPC_NOHASH_32_PTE_FSL_BOOKE_H */ diff --git a/arch/powerpc/include/asm/pgtable-ppc64-4k.h b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h index 132ee1d482c2..fc7d51753f81 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64-4k.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h @@ -1,5 +1,5 @@ -#ifndef _ASM_POWERPC_PGTABLE_PPC64_4K_H -#define _ASM_POWERPC_PGTABLE_PPC64_4K_H +#ifndef _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H +#define _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H /* * Entries per page directory level. The PTE level must use a 64b record * for each page table entry. The PMD and PGD level use a 32b record for @@ -55,11 +55,15 @@ #define pgd_none(pgd) (!pgd_val(pgd)) #define pgd_bad(pgd) (pgd_val(pgd) == 0) #define pgd_present(pgd) (pgd_val(pgd) != 0) -#define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0) #define pgd_page_vaddr(pgd) (pgd_val(pgd) & ~PGD_MASKED_BITS) #ifndef __ASSEMBLY__ +static inline void pgd_clear(pgd_t *pgdp) +{ + *pgdp = __pgd(0); +} + static inline pte_t pgd_pte(pgd_t pgd) { return __pte(pgd_val(pgd)); @@ -85,4 +89,4 @@ extern struct page *pgd_page(pgd_t pgd); #define remap_4k_pfn(vma, addr, pfn, prot) \ remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE, (prot)) -#endif /* _ASM_POWERPC_PGTABLE_PPC64_4K_H */ +#endif /* _ _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H */ diff --git a/arch/powerpc/include/asm/pgtable-ppc64-64k.h b/arch/powerpc/include/asm/nohash/64/pgtable-64k.h index 1de35bbd02a6..570fb30be21c 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64-64k.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable-64k.h @@ -1,5 +1,5 @@ -#ifndef _ASM_POWERPC_PGTABLE_PPC64_64K_H -#define _ASM_POWERPC_PGTABLE_PPC64_64K_H +#ifndef _ASM_POWERPC_NOHASH_64_PGTABLE_64K_H +#define _ASM_POWERPC_NOHASH_64_PGTABLE_64K_H #include <asm-generic/pgtable-nopud.h> @@ -9,8 +9,19 @@ #define PUD_INDEX_SIZE 0 #define PGD_INDEX_SIZE 12 +/* + * we support 32 fragments per PTE page of 64K size + */ +#define PTE_FRAG_NR 32 +/* + * We use a 2K PTE page fragment and another 2K for storing + * real_pte_t hash index + */ +#define PTE_FRAG_SIZE_SHIFT 11 +#define PTE_FRAG_SIZE (1UL << PTE_FRAG_SIZE_SHIFT) + #ifndef __ASSEMBLY__ -#define PTE_TABLE_SIZE (sizeof(real_pte_t) << PTE_INDEX_SIZE) +#define PTE_TABLE_SIZE PTE_FRAG_SIZE #define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE) #define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) #endif /* __ASSEMBLY__ */ @@ -32,13 +43,15 @@ #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) -/* Bits to mask out from a PMD to get to the PTE page */ -/* PMDs point to PTE table fragments which are 4K aligned. */ -#define PMD_MASKED_BITS 0xfff +/* + * Bits to mask out from a PMD to get to the PTE page + * PMDs point to PTE table fragments which are PTE_FRAG_SIZE aligned. + */ +#define PMD_MASKED_BITS (PTE_FRAG_SIZE - 1) /* Bits to mask out from a PGD/PUD to get to the PMD page */ #define PUD_MASKED_BITS 0x1ff #define pgd_pte(pgd) (pud_pte(((pud_t){ pgd }))) #define pte_pgd(pte) ((pgd_t)pte_pud(pte)) -#endif /* _ASM_POWERPC_PGTABLE_PPC64_64K_H */ +#endif /* _ASM_POWERPC_NOHASH_64_PGTABLE_64K_H */ diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index 3245f2d96d4f..b9f734dd5b81 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -1,14 +1,14 @@ -#ifndef _ASM_POWERPC_PGTABLE_PPC64_H_ -#define _ASM_POWERPC_PGTABLE_PPC64_H_ +#ifndef _ASM_POWERPC_NOHASH_64_PGTABLE_H +#define _ASM_POWERPC_NOHASH_64_PGTABLE_H /* * This file contains the functions and defines necessary to modify and use * the ppc64 hashed page table. */ #ifdef CONFIG_PPC_64K_PAGES -#include <asm/pgtable-ppc64-64k.h> +#include <asm/nohash/64/pgtable-64k.h> #else -#include <asm/pgtable-ppc64-4k.h> +#include <asm/nohash/64/pgtable-4k.h> #endif #include <asm/barrier.h> @@ -18,7 +18,7 @@ * Size of EA range mapped by our pagetables. */ #define PGTABLE_EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \ - PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT) + PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT) #define PGTABLE_RANGE (ASM_CONST(1) << PGTABLE_EADDR_SIZE) #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -97,11 +97,7 @@ /* * Include the PTE bits definitions */ -#ifdef CONFIG_PPC_BOOK3S -#include <asm/pte-hash64.h> -#else -#include <asm/pte-book3e.h> -#endif +#include <asm/nohash/pte-book3e.h> #include <asm/pte-common.h> #ifdef CONFIG_PPC_MM_SLICES @@ -110,59 +106,47 @@ #endif /* CONFIG_PPC_MM_SLICES */ #ifndef __ASSEMBLY__ - -/* - * This is the default implementation of various PTE accessors, it's - * used in all cases except Book3S with 64K pages where we have a - * concept of sub-pages - */ -#ifndef __real_pte - -#ifdef CONFIG_STRICT_MM_TYPECHECKS -#define __real_pte(e,p) ((real_pte_t){(e)}) -#define __rpte_to_pte(r) ((r).pte) -#else -#define __real_pte(e,p) (e) -#define __rpte_to_pte(r) (__pte(r)) -#endif -#define __rpte_to_hidx(r,index) (pte_val(__rpte_to_pte(r)) >> 12) - -#define pte_iterate_hashed_subpages(rpte, psize, va, index, shift) \ - do { \ - index = 0; \ - shift = mmu_psize_defs[psize].shift; \ - -#define pte_iterate_hashed_end() } while(0) - -/* - * We expect this to be called only for user addresses or kernel virtual - * addresses other than the linear mapping. - */ -#define pte_pagesize_index(mm, addr, pte) MMU_PAGE_4K - -#endif /* __real_pte */ - - /* pte_clear moved to later in this file */ #define PMD_BAD_BITS (PTE_TABLE_SIZE-1) #define PUD_BAD_BITS (PMD_TABLE_SIZE-1) -#define pmd_set(pmdp, pmdval) (pmd_val(*(pmdp)) = (pmdval)) +static inline void pmd_set(pmd_t *pmdp, unsigned long val) +{ + *pmdp = __pmd(val); +} + +static inline void pmd_clear(pmd_t *pmdp) +{ + *pmdp = __pmd(0); +} + +static inline pte_t pmd_pte(pmd_t pmd) +{ + return __pte(pmd_val(pmd)); +} + #define pmd_none(pmd) (!pmd_val(pmd)) #define pmd_bad(pmd) (!is_kernel_addr(pmd_val(pmd)) \ || (pmd_val(pmd) & PMD_BAD_BITS)) #define pmd_present(pmd) (!pmd_none(pmd)) -#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0) #define pmd_page_vaddr(pmd) (pmd_val(pmd) & ~PMD_MASKED_BITS) extern struct page *pmd_page(pmd_t pmd); -#define pud_set(pudp, pudval) (pud_val(*(pudp)) = (pudval)) +static inline void pud_set(pud_t *pudp, unsigned long val) +{ + *pudp = __pud(val); +} + +static inline void pud_clear(pud_t *pudp) +{ + *pudp = __pud(0); +} + #define pud_none(pud) (!pud_val(pud)) #define pud_bad(pud) (!is_kernel_addr(pud_val(pud)) \ || (pud_val(pud) & PUD_BAD_BITS)) #define pud_present(pud) (pud_val(pud) != 0) -#define pud_clear(pudp) (pud_val(*(pudp)) = 0) #define pud_page_vaddr(pud) (pud_val(pud) & ~PUD_MASKED_BITS) extern struct page *pud_page(pud_t pud); @@ -177,9 +161,13 @@ static inline pud_t pte_pud(pte_t pte) return __pud(pte_val(pte)); } #define pud_write(pud) pte_write(pud_pte(pud)) -#define pgd_set(pgdp, pudp) ({pgd_val(*(pgdp)) = (unsigned long)(pudp);}) #define pgd_write(pgd) pte_write(pgd_pte(pgd)) +static inline void pgd_set(pgd_t *pgdp, unsigned long val) +{ + *pgdp = __pgd(val); +} + /* * Find an entry in a page-table-directory. We combine the address region * (the high order N bits) and the pgd portion of the address. @@ -373,254 +361,4 @@ void pgtable_cache_add(unsigned shift, void (*ctor)(void *)); void pgtable_cache_init(void); #endif /* __ASSEMBLY__ */ -/* - * THP pages can't be special. So use the _PAGE_SPECIAL - */ -#define _PAGE_SPLITTING _PAGE_SPECIAL - -/* - * We need to differentiate between explicit huge page and THP huge - * page, since THP huge page also need to track real subpage details - */ -#define _PAGE_THP_HUGE _PAGE_4K_PFN - -/* - * set of bits not changed in pmd_modify. - */ -#define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | \ - _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPLITTING | \ - _PAGE_THP_HUGE) - -#ifndef __ASSEMBLY__ -/* - * The linux hugepage PMD now include the pmd entries followed by the address - * to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits. - * [ 1 bit secondary | 3 bit hidx | 1 bit valid | 000]. We use one byte per - * each HPTE entry. With 16MB hugepage and 64K HPTE we need 256 entries and - * with 4K HPTE we need 4096 entries. Both will fit in a 4K pgtable_t. - * - * The last three bits are intentionally left to zero. This memory location - * are also used as normal page PTE pointers. So if we have any pointers - * left around while we collapse a hugepage, we need to make sure - * _PAGE_PRESENT bit of that is zero when we look at them - */ -static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int index) -{ - return (hpte_slot_array[index] >> 3) & 0x1; -} - -static inline unsigned int hpte_hash_index(unsigned char *hpte_slot_array, - int index) -{ - return hpte_slot_array[index] >> 4; -} - -static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array, - unsigned int index, unsigned int hidx) -{ - hpte_slot_array[index] = hidx << 4 | 0x1 << 3; -} - -struct page *realmode_pfn_to_page(unsigned long pfn); - -static inline char *get_hpte_slot_array(pmd_t *pmdp) -{ - /* - * The hpte hindex is stored in the pgtable whose address is in the - * second half of the PMD - * - * Order this load with the test for pmd_trans_huge in the caller - */ - smp_rmb(); - return *(char **)(pmdp + PTRS_PER_PMD); - - -} - -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -extern void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp, unsigned long old_pmd); -extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot); -extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot); -extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot); -extern void set_pmd_at(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp, pmd_t pmd); -extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, - pmd_t *pmd); -/* - * - * For core kernel code by design pmd_trans_huge is never run on any hugetlbfs - * page. The hugetlbfs page table walking and mangling paths are totally - * separated form the core VM paths and they're differentiated by - * VM_HUGETLB being set on vm_flags well before any pmd_trans_huge could run. - * - * pmd_trans_huge() is defined as false at build time if - * CONFIG_TRANSPARENT_HUGEPAGE=n to optimize away code blocks at build - * time in such case. - * - * For ppc64 we need to differntiate from explicit hugepages from THP, because - * for THP we also track the subpage details at the pmd level. We don't do - * that for explicit huge pages. - * - */ -static inline int pmd_trans_huge(pmd_t pmd) -{ - /* - * leaf pte for huge page, bottom two bits != 00 - */ - return (pmd_val(pmd) & 0x3) && (pmd_val(pmd) & _PAGE_THP_HUGE); -} - -static inline int pmd_trans_splitting(pmd_t pmd) -{ - if (pmd_trans_huge(pmd)) - return pmd_val(pmd) & _PAGE_SPLITTING; - return 0; -} - -extern int has_transparent_hugepage(void); -#else -static inline void hpte_do_hugepage_flush(struct mm_struct *mm, - unsigned long addr, pmd_t *pmdp, - unsigned long old_pmd) -{ - - WARN(1, "%s called with THP disabled\n", __func__); -} -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ - -static inline int pmd_large(pmd_t pmd) -{ - /* - * leaf pte for huge page, bottom two bits != 00 - */ - return ((pmd_val(pmd) & 0x3) != 0x0); -} - -static inline pte_t pmd_pte(pmd_t pmd) -{ - return __pte(pmd_val(pmd)); -} - -static inline pmd_t pte_pmd(pte_t pte) -{ - return __pmd(pte_val(pte)); -} - -static inline pte_t *pmdp_ptep(pmd_t *pmd) -{ - return (pte_t *)pmd; -} - -#define pmd_pfn(pmd) pte_pfn(pmd_pte(pmd)) -#define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd)) -#define pmd_young(pmd) pte_young(pmd_pte(pmd)) -#define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd))) -#define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd))) -#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) -#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) -#define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd))) - -#define __HAVE_ARCH_PMD_WRITE -#define pmd_write(pmd) pte_write(pmd_pte(pmd)) - -static inline pmd_t pmd_mkhuge(pmd_t pmd) -{ - /* Do nothing, mk_pmd() does this part. */ - return pmd; -} - -static inline pmd_t pmd_mknotpresent(pmd_t pmd) -{ - pmd_val(pmd) &= ~_PAGE_PRESENT; - return pmd; -} - -static inline pmd_t pmd_mksplitting(pmd_t pmd) -{ - pmd_val(pmd) |= _PAGE_SPLITTING; - return pmd; -} - -#define __HAVE_ARCH_PMD_SAME -static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) -{ - return (((pmd_val(pmd_a) ^ pmd_val(pmd_b)) & ~_PAGE_HPTEFLAGS) == 0); -} - -#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS -extern int pmdp_set_access_flags(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp, - pmd_t entry, int dirty); - -extern unsigned long pmd_hugepage_update(struct mm_struct *mm, - unsigned long addr, - pmd_t *pmdp, - unsigned long clr, - unsigned long set); - -static inline int __pmdp_test_and_clear_young(struct mm_struct *mm, - unsigned long addr, pmd_t *pmdp) -{ - unsigned long old; - - if ((pmd_val(*pmdp) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0) - return 0; - old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED, 0); - return ((old & _PAGE_ACCESSED) != 0); -} - -#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG -extern int pmdp_test_and_clear_young(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp); -#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH -extern int pmdp_clear_flush_young(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp); - -#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR -extern pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, - unsigned long addr, pmd_t *pmdp); - -#define __HAVE_ARCH_PMDP_SET_WRPROTECT -static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp) -{ - - if ((pmd_val(*pmdp) & _PAGE_RW) == 0) - return; - - pmd_hugepage_update(mm, addr, pmdp, _PAGE_RW, 0); -} - -#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH -extern void pmdp_splitting_flush(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp); - -extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp); -#define pmdp_collapse_flush pmdp_collapse_flush - -#define __HAVE_ARCH_PGTABLE_DEPOSIT -extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, - pgtable_t pgtable); -#define __HAVE_ARCH_PGTABLE_WITHDRAW -extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); - -#define __HAVE_ARCH_PMDP_INVALIDATE -extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, - pmd_t *pmdp); - -#define pmd_move_must_withdraw pmd_move_must_withdraw -struct spinlock; -static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, - struct spinlock *old_pmd_ptl) -{ - /* - * Archs like ppc64 use pgtable to store per pmd - * specific information. So when we switch the pmd, - * we should also withdraw and deposit the pgtable - */ - return true; -} -#endif /* __ASSEMBLY__ */ -#endif /* _ASM_POWERPC_PGTABLE_PPC64_H_ */ +#endif /* _ASM_POWERPC_NOHASH_64_PGTABLE_H */ diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h new file mode 100644 index 000000000000..1263c22d60d8 --- /dev/null +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -0,0 +1,252 @@ +#ifndef _ASM_POWERPC_NOHASH_PGTABLE_H +#define _ASM_POWERPC_NOHASH_PGTABLE_H + +#if defined(CONFIG_PPC64) +#include <asm/nohash/64/pgtable.h> +#else +#include <asm/nohash/32/pgtable.h> +#endif + +#ifndef __ASSEMBLY__ + +/* Generic accessors to PTE bits */ +static inline int pte_write(pte_t pte) +{ + return (pte_val(pte) & (_PAGE_RW | _PAGE_RO)) != _PAGE_RO; +} +static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } +static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } +static inline int pte_special(pte_t pte) { return pte_val(pte) & _PAGE_SPECIAL; } +static inline int pte_none(pte_t pte) { return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; } +static inline pgprot_t pte_pgprot(pte_t pte) { return __pgprot(pte_val(pte) & PAGE_PROT_BITS); } + +#ifdef CONFIG_NUMA_BALANCING +/* + * These work without NUMA balancing but the kernel does not care. See the + * comment in include/asm-generic/pgtable.h . On powerpc, this will only + * work for user pages and always return true for kernel pages. + */ +static inline int pte_protnone(pte_t pte) +{ + return (pte_val(pte) & + (_PAGE_PRESENT | _PAGE_USER)) == _PAGE_PRESENT; +} + +static inline int pmd_protnone(pmd_t pmd) +{ + return pte_protnone(pmd_pte(pmd)); +} +#endif /* CONFIG_NUMA_BALANCING */ + +static inline int pte_present(pte_t pte) +{ + return pte_val(pte) & _PAGE_PRESENT; +} + +/* Conversion functions: convert a page and protection to a page entry, + * and a page entry and page directory to the page they refer to. + * + * Even if PTEs can be unsigned long long, a PFN is always an unsigned + * long for now. + */ +static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) { + return __pte(((pte_basic_t)(pfn) << PTE_RPN_SHIFT) | + pgprot_val(pgprot)); } +static inline unsigned long pte_pfn(pte_t pte) { + return pte_val(pte) >> PTE_RPN_SHIFT; } + +/* Generic modifiers for PTE bits */ +static inline pte_t pte_wrprotect(pte_t pte) +{ + pte_basic_t ptev; + + ptev = pte_val(pte) & ~(_PAGE_RW | _PAGE_HWWRITE); + ptev |= _PAGE_RO; + return __pte(ptev); +} + +static inline pte_t pte_mkclean(pte_t pte) +{ + return __pte(pte_val(pte) & ~(_PAGE_DIRTY | _PAGE_HWWRITE)); +} + +static inline pte_t pte_mkold(pte_t pte) +{ + return __pte(pte_val(pte) & ~_PAGE_ACCESSED); +} + +static inline pte_t pte_mkwrite(pte_t pte) +{ + pte_basic_t ptev; + + ptev = pte_val(pte) & ~_PAGE_RO; + ptev |= _PAGE_RW; + return __pte(ptev); +} + +static inline pte_t pte_mkdirty(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_DIRTY); +} + +static inline pte_t pte_mkyoung(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_ACCESSED); +} + +static inline pte_t pte_mkspecial(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_SPECIAL); +} + +static inline pte_t pte_mkhuge(pte_t pte) +{ + return pte; +} + +static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) +{ + return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot)); +} + +/* Insert a PTE, top-level function is out of line. It uses an inline + * low level function in the respective pgtable-* files + */ +extern void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, + pte_t pte); + +/* This low level function performs the actual PTE insertion + * Setting the PTE depends on the MMU type and other factors. It's + * an horrible mess that I'm not going to try to clean up now but + * I'm keeping it in one place rather than spread around + */ +static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte, int percpu) +{ +#if defined(CONFIG_PPC_STD_MMU_32) && defined(CONFIG_SMP) && !defined(CONFIG_PTE_64BIT) + /* First case is 32-bit Hash MMU in SMP mode with 32-bit PTEs. We use the + * helper pte_update() which does an atomic update. We need to do that + * because a concurrent invalidation can clear _PAGE_HASHPTE. If it's a + * per-CPU PTE such as a kmap_atomic, we do a simple update preserving + * the hash bits instead (ie, same as the non-SMP case) + */ + if (percpu) + *ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE) + | (pte_val(pte) & ~_PAGE_HASHPTE)); + else + pte_update(ptep, ~_PAGE_HASHPTE, pte_val(pte)); + +#elif defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT) + /* Second case is 32-bit with 64-bit PTE. In this case, we + * can just store as long as we do the two halves in the right order + * with a barrier in between. This is possible because we take care, + * in the hash code, to pre-invalidate if the PTE was already hashed, + * which synchronizes us with any concurrent invalidation. + * In the percpu case, we also fallback to the simple update preserving + * the hash bits + */ + if (percpu) { + *ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE) + | (pte_val(pte) & ~_PAGE_HASHPTE)); + return; + } +#if _PAGE_HASHPTE != 0 + if (pte_val(*ptep) & _PAGE_HASHPTE) + flush_hash_entry(mm, ptep, addr); +#endif + __asm__ __volatile__("\ + stw%U0%X0 %2,%0\n\ + eieio\n\ + stw%U0%X0 %L2,%1" + : "=m" (*ptep), "=m" (*((unsigned char *)ptep+4)) + : "r" (pte) : "memory"); + +#elif defined(CONFIG_PPC_STD_MMU_32) + /* Third case is 32-bit hash table in UP mode, we need to preserve + * the _PAGE_HASHPTE bit since we may not have invalidated the previous + * translation in the hash yet (done in a subsequent flush_tlb_xxx()) + * and see we need to keep track that this PTE needs invalidating + */ + *ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE) + | (pte_val(pte) & ~_PAGE_HASHPTE)); + +#else + /* Anything else just stores the PTE normally. That covers all 64-bit + * cases, and 32-bit non-hash with 32-bit PTEs. + */ + *ptep = pte; + +#ifdef CONFIG_PPC_BOOK3E_64 + /* + * With hardware tablewalk, a sync is needed to ensure that + * subsequent accesses see the PTE we just wrote. Unlike userspace + * mappings, we can't tolerate spurious faults, so make sure + * the new PTE will be seen the first time. + */ + if (is_kernel_addr(addr)) + mb(); +#endif +#endif +} + + +#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS +extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, + pte_t *ptep, pte_t entry, int dirty); + +/* + * Macro to mark a page protection value as "uncacheable". + */ + +#define _PAGE_CACHE_CTL (_PAGE_COHERENT | _PAGE_GUARDED | _PAGE_NO_CACHE | \ + _PAGE_WRITETHRU) + +#define pgprot_noncached(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \ + _PAGE_NO_CACHE | _PAGE_GUARDED)) + +#define pgprot_noncached_wc(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \ + _PAGE_NO_CACHE)) + +#define pgprot_cached(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \ + _PAGE_COHERENT)) + +#define pgprot_cached_wthru(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \ + _PAGE_COHERENT | _PAGE_WRITETHRU)) + +#define pgprot_cached_noncoherent(prot) \ + (__pgprot(pgprot_val(prot) & ~_PAGE_CACHE_CTL)) + +#define pgprot_writecombine pgprot_noncached_wc + +struct file; +extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t vma_prot); +#define __HAVE_PHYS_MEM_ACCESS_PROT + +#ifdef CONFIG_HUGETLB_PAGE +static inline int hugepd_ok(hugepd_t hpd) +{ + return (hpd.pd > 0); +} + +static inline int pmd_huge(pmd_t pmd) +{ + return 0; +} + +static inline int pud_huge(pud_t pud) +{ + return 0; +} + +static inline int pgd_huge(pgd_t pgd) +{ + return 0; +} +#define pgd_huge pgd_huge + +#define is_hugepd(hpd) (hugepd_ok(hpd)) +#endif + +#endif /* __ASSEMBLY__ */ +#endif diff --git a/arch/powerpc/include/asm/pte-book3e.h b/arch/powerpc/include/asm/nohash/pte-book3e.h index 8d8473278d91..e16807b78edf 100644 --- a/arch/powerpc/include/asm/pte-book3e.h +++ b/arch/powerpc/include/asm/nohash/pte-book3e.h @@ -1,5 +1,5 @@ -#ifndef _ASM_POWERPC_PTE_BOOK3E_H -#define _ASM_POWERPC_PTE_BOOK3E_H +#ifndef _ASM_POWERPC_NOHASH_PTE_BOOK3E_H +#define _ASM_POWERPC_NOHASH_PTE_BOOK3E_H #ifdef __KERNEL__ /* PTE bit definitions for processors compliant to the Book3E @@ -84,4 +84,4 @@ #endif #endif /* __KERNEL__ */ -#endif /* _ASM_POWERPC_PTE_FSL_BOOKE_H */ +#endif /* _ASM_POWERPC_NOHASH_PTE_BOOK3E_H */ diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 8374afed9d0a..f8faaaeeca1e 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -157,7 +157,8 @@ #define OPAL_LEDS_GET_INDICATOR 114 #define OPAL_LEDS_SET_INDICATOR 115 #define OPAL_CEC_REBOOT2 116 -#define OPAL_LAST 116 +#define OPAL_CONSOLE_FLUSH 117 +#define OPAL_LAST 117 /* Device tree flags */ diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 800115910e43..07a99e638449 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -35,6 +35,7 @@ int64_t opal_console_read(int64_t term_number, __be64 *length, uint8_t *buffer); int64_t opal_console_write_buffer_space(int64_t term_number, __be64 *length); +int64_t opal_console_flush(int64_t term_number); int64_t opal_rtc_read(__be32 *year_month_day, __be64 *hour_minute_second_millisecond); int64_t opal_rtc_write(uint32_t year_month_day, @@ -262,6 +263,8 @@ extern int opal_resync_timebase(void); extern void opal_lpc_init(void); +extern void opal_kmsg_init(void); + extern int opal_event_request(unsigned int opal_event_nr); struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr, diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 70bd4381f8e6..546540b91095 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -16,6 +16,7 @@ #ifdef CONFIG_PPC64 +#include <linux/string.h> #include <asm/types.h> #include <asm/lppaca.h> #include <asm/mmu.h> @@ -131,7 +132,16 @@ struct paca_struct { struct tlb_core_data tcd; #endif /* CONFIG_PPC_BOOK3E */ - mm_context_t context; +#ifdef CONFIG_PPC_BOOK3S + mm_context_id_t mm_ctx_id; +#ifdef CONFIG_PPC_MM_SLICES + u64 mm_ctx_low_slices_psize; + unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE]; +#else + u16 mm_ctx_user_psize; + u16 mm_ctx_sllp; +#endif +#endif /* * then miscellaneous read-write fields @@ -194,6 +204,23 @@ struct paca_struct { #endif }; +#ifdef CONFIG_PPC_BOOK3S +static inline void copy_mm_to_paca(mm_context_t *context) +{ + get_paca()->mm_ctx_id = context->id; +#ifdef CONFIG_PPC_MM_SLICES + get_paca()->mm_ctx_low_slices_psize = context->low_slices_psize; + memcpy(&get_paca()->mm_ctx_high_slices_psize, + &context->high_slices_psize, SLICE_ARRAY_SIZE); +#else + get_paca()->mm_ctx_user_psize = context->user_psize; + get_paca()->mm_ctx_sllp = context->sllp; +#endif +} +#else +static inline void copy_mm_to_paca(mm_context_t *context){} +#endif + extern struct paca_struct *paca; extern void initialise_paca(struct paca_struct *new_paca, int cpu); extern void setup_paca(struct paca_struct *new_paca); diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 3140c19c448c..e34124f6fbf2 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -286,8 +286,11 @@ extern long long virt_phys_offset; /* PTE level */ typedef struct { pte_basic_t pte; } pte_t; -#define pte_val(x) ((x).pte) #define __pte(x) ((pte_t) { (x) }) +static inline pte_basic_t pte_val(pte_t x) +{ + return x.pte; +} /* 64k pages additionally define a bigger "real PTE" type that gathers * the "second half" part of the PTE for pseudo 64k pages @@ -301,21 +304,30 @@ typedef struct { pte_t pte; } real_pte_t; /* PMD level */ #ifdef CONFIG_PPC64 typedef struct { unsigned long pmd; } pmd_t; -#define pmd_val(x) ((x).pmd) #define __pmd(x) ((pmd_t) { (x) }) +static inline unsigned long pmd_val(pmd_t x) +{ + return x.pmd; +} /* PUD level exusts only on 4k pages */ #ifndef CONFIG_PPC_64K_PAGES typedef struct { unsigned long pud; } pud_t; -#define pud_val(x) ((x).pud) #define __pud(x) ((pud_t) { (x) }) +static inline unsigned long pud_val(pud_t x) +{ + return x.pud; +} #endif /* !CONFIG_PPC_64K_PAGES */ #endif /* CONFIG_PPC64 */ /* PGD level */ typedef struct { unsigned long pgd; } pgd_t; -#define pgd_val(x) ((x).pgd) #define __pgd(x) ((pgd_t) { (x) }) +static inline unsigned long pgd_val(pgd_t x) +{ + return x.pgd; +} /* Page protection bits */ typedef struct { unsigned long pgprot; } pgprot_t; @@ -329,8 +341,11 @@ typedef struct { unsigned long pgprot; } pgprot_t; */ typedef pte_basic_t pte_t; -#define pte_val(x) (x) #define __pte(x) (x) +static inline pte_basic_t pte_val(pte_t pte) +{ + return pte; +} #if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64) typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; @@ -341,67 +356,42 @@ typedef pte_t real_pte_t; #ifdef CONFIG_PPC64 typedef unsigned long pmd_t; -#define pmd_val(x) (x) #define __pmd(x) (x) +static inline unsigned long pmd_val(pmd_t pmd) +{ + return pmd; +} #ifndef CONFIG_PPC_64K_PAGES typedef unsigned long pud_t; -#define pud_val(x) (x) #define __pud(x) (x) +static inline unsigned long pud_val(pud_t pud) +{ + return pud; +} #endif /* !CONFIG_PPC_64K_PAGES */ #endif /* CONFIG_PPC64 */ typedef unsigned long pgd_t; -#define pgd_val(x) (x) -#define pgprot_val(x) (x) +#define __pgd(x) (x) +static inline unsigned long pgd_val(pgd_t pgd) +{ + return pgd; +} typedef unsigned long pgprot_t; -#define __pgd(x) (x) +#define pgprot_val(x) (x) #define __pgprot(x) (x) #endif typedef struct { signed long pd; } hugepd_t; -#ifdef CONFIG_HUGETLB_PAGE -#ifdef CONFIG_PPC_BOOK3S_64 -#ifdef CONFIG_PPC_64K_PAGES -/* - * With 64k page size, we have hugepage ptes in the pgd and pmd entries. We don't - * need to setup hugepage directory for them. Our pte and page directory format - * enable us to have this enabled. But to avoid errors when implementing new - * features disable hugepd for 64K. We enable a debug version here, So we catch - * wrong usage. - */ -#ifdef CONFIG_DEBUG_VM -extern int hugepd_ok(hugepd_t hpd); -#else -#define hugepd_ok(x) (0) -#endif -#else -static inline int hugepd_ok(hugepd_t hpd) -{ - /* - * hugepd pointer, bottom two bits == 00 and next 4 bits - * indicate size of table - */ - return (((hpd.pd & 0x3) == 0x0) && ((hpd.pd & HUGEPD_SHIFT_MASK) != 0)); -} -#endif -#else -static inline int hugepd_ok(hugepd_t hpd) -{ - return (hpd.pd > 0); -} -#endif - -#define is_hugepd(hpd) (hugepd_ok(hpd)) -#define pgd_huge pgd_huge -int pgd_huge(pgd_t pgd); -#else /* CONFIG_HUGETLB_PAGE */ -#define is_hugepd(pdep) 0 -#define pgd_huge(pgd) 0 +#ifndef CONFIG_HUGETLB_PAGE +#define is_hugepd(pdep) (0) +#define pgd_huge(pgd) (0) #endif /* CONFIG_HUGETLB_PAGE */ + #define __hugepd(x) ((hugepd_t) { (x) }) struct page; diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 37fc53587bb4..54843ca5fa2b 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -205,6 +205,7 @@ struct pci_dn { int pci_ext_config_space; /* for pci devices */ + struct pci_dev *pcidev; /* back-pointer to the pci device */ #ifdef CONFIG_EEH struct eeh_dev *edev; /* eeh device */ #endif diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h index 3453bd8dc18f..6f8065a7d487 100644 --- a/arch/powerpc/include/asm/pci.h +++ b/arch/powerpc/include/asm/pci.h @@ -149,4 +149,8 @@ extern void pcibios_setup_phb_io_space(struct pci_controller *hose); extern void pcibios_scan_phb(struct pci_controller *hose); #endif /* __KERNEL__ */ + +extern struct pci_dev *pnv_pci_get_gpu_dev(struct pci_dev *npdev); +extern struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index); + #endif /* __ASM_POWERPC_PCI_H */ diff --git a/arch/powerpc/include/asm/pgalloc-32.h b/arch/powerpc/include/asm/pgalloc-32.h index 842846c1b711..76d6b9e0c8a9 100644 --- a/arch/powerpc/include/asm/pgalloc-32.h +++ b/arch/powerpc/include/asm/pgalloc-32.h @@ -21,16 +21,34 @@ extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); /* #define pgd_populate(mm, pmd, pte) BUG() */ #ifndef CONFIG_BOOKE -#define pmd_populate_kernel(mm, pmd, pte) \ - (pmd_val(*(pmd)) = __pa(pte) | _PMD_PRESENT) -#define pmd_populate(mm, pmd, pte) \ - (pmd_val(*(pmd)) = (page_to_pfn(pte) << PAGE_SHIFT) | _PMD_PRESENT) + +static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, + pte_t *pte) +{ + *pmdp = __pmd(__pa(pte) | _PMD_PRESENT); +} + +static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pte_page) +{ + *pmdp = __pmd((page_to_pfn(pte_page) << PAGE_SHIFT) | _PMD_PRESENT); +} + #define pmd_pgtable(pmd) pmd_page(pmd) #else -#define pmd_populate_kernel(mm, pmd, pte) \ - (pmd_val(*(pmd)) = (unsigned long)pte | _PMD_PRESENT) -#define pmd_populate(mm, pmd, pte) \ - (pmd_val(*(pmd)) = (unsigned long)lowmem_page_address(pte) | _PMD_PRESENT) + +static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, + pte_t *pte) +{ + *pmdp = __pmd((unsigned long)pte | _PMD_PRESENT); +} + +static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pte_page) +{ + *pmdp = __pmd((unsigned long)lowmem_page_address(pte_page) | _PMD_PRESENT); +} + #define pmd_pgtable(pmd) pmd_page(pmd) #endif diff --git a/arch/powerpc/include/asm/pgalloc-64.h b/arch/powerpc/include/asm/pgalloc-64.h index 4b0be20fcbfd..69ef28a81733 100644 --- a/arch/powerpc/include/asm/pgalloc-64.h +++ b/arch/powerpc/include/asm/pgalloc-64.h @@ -53,7 +53,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) #ifndef CONFIG_PPC_64K_PAGES -#define pgd_populate(MM, PGD, PUD) pgd_set(PGD, PUD) +#define pgd_populate(MM, PGD, PUD) pgd_set(PGD, (unsigned long)PUD) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { @@ -71,9 +71,18 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) pud_set(pud, (unsigned long)pmd); } -#define pmd_populate(mm, pmd, pte_page) \ - pmd_populate_kernel(mm, pmd, page_address(pte_page)) -#define pmd_populate_kernel(mm, pmd, pte) pmd_set(pmd, (unsigned long)(pte)) +static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, + pte_t *pte) +{ + pmd_set(pmd, (unsigned long)pte); +} + +static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, + pgtable_t pte_page) +{ + pmd_set(pmd, (unsigned long)page_address(pte_page)); +} + #define pmd_pgtable(pmd) pmd_page(pmd) static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, @@ -154,16 +163,6 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, } #else /* if CONFIG_PPC_64K_PAGES */ -/* - * we support 16 fragments per PTE page. - */ -#define PTE_FRAG_NR 16 -/* - * We use a 2K PTE page fragment and another 2K for storing - * real_pte_t hash index - */ -#define PTE_FRAG_SIZE_SHIFT 12 -#define PTE_FRAG_SIZE (2 * PTRS_PER_PTE * sizeof(pte_t)) extern pte_t *page_table_alloc(struct mm_struct *, unsigned long, int); extern void page_table_free(struct mm_struct *, unsigned long *, int); diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index b64b4212b71f..ac9fb114e25d 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -1,6 +1,5 @@ #ifndef _ASM_POWERPC_PGTABLE_H #define _ASM_POWERPC_PGTABLE_H -#ifdef __KERNEL__ #ifndef __ASSEMBLY__ #include <linux/mmdebug.h> @@ -13,210 +12,20 @@ struct mm_struct; #endif /* !__ASSEMBLY__ */ -#if defined(CONFIG_PPC64) -# include <asm/pgtable-ppc64.h> +#ifdef CONFIG_PPC_BOOK3S +#include <asm/book3s/pgtable.h> #else -# include <asm/pgtable-ppc32.h> -#endif - -/* - * We save the slot number & secondary bit in the second half of the - * PTE page. We use the 8 bytes per each pte entry. - */ -#define PTE_PAGE_HIDX_OFFSET (PTRS_PER_PTE * 8) +#include <asm/nohash/pgtable.h> +#endif /* !CONFIG_PPC_BOOK3S */ #ifndef __ASSEMBLY__ #include <asm/tlbflush.h> -/* Generic accessors to PTE bits */ -static inline int pte_write(pte_t pte) -{ return (pte_val(pte) & (_PAGE_RW | _PAGE_RO)) != _PAGE_RO; } -static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } -static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } -static inline int pte_special(pte_t pte) { return pte_val(pte) & _PAGE_SPECIAL; } -static inline int pte_none(pte_t pte) { return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; } -static inline pgprot_t pte_pgprot(pte_t pte) { return __pgprot(pte_val(pte) & PAGE_PROT_BITS); } - -#ifdef CONFIG_NUMA_BALANCING -/* - * These work without NUMA balancing but the kernel does not care. See the - * comment in include/asm-generic/pgtable.h . On powerpc, this will only - * work for user pages and always return true for kernel pages. - */ -static inline int pte_protnone(pte_t pte) -{ - return (pte_val(pte) & - (_PAGE_PRESENT | _PAGE_USER)) == _PAGE_PRESENT; -} - -static inline int pmd_protnone(pmd_t pmd) -{ - return pte_protnone(pmd_pte(pmd)); -} -#endif /* CONFIG_NUMA_BALANCING */ - -static inline int pte_present(pte_t pte) -{ - return pte_val(pte) & _PAGE_PRESENT; -} - -/* Conversion functions: convert a page and protection to a page entry, - * and a page entry and page directory to the page they refer to. - * - * Even if PTEs can be unsigned long long, a PFN is always an unsigned - * long for now. - */ -static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) { - return __pte(((pte_basic_t)(pfn) << PTE_RPN_SHIFT) | - pgprot_val(pgprot)); } -static inline unsigned long pte_pfn(pte_t pte) { - return pte_val(pte) >> PTE_RPN_SHIFT; } - /* Keep these as a macros to avoid include dependency mess */ #define pte_page(x) pfn_to_page(pte_pfn(x)) #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) -/* Generic modifiers for PTE bits */ -static inline pte_t pte_wrprotect(pte_t pte) { - pte_val(pte) &= ~(_PAGE_RW | _PAGE_HWWRITE); - pte_val(pte) |= _PAGE_RO; return pte; } -static inline pte_t pte_mkclean(pte_t pte) { - pte_val(pte) &= ~(_PAGE_DIRTY | _PAGE_HWWRITE); return pte; } -static inline pte_t pte_mkold(pte_t pte) { - pte_val(pte) &= ~_PAGE_ACCESSED; return pte; } -static inline pte_t pte_mkwrite(pte_t pte) { - pte_val(pte) &= ~_PAGE_RO; - pte_val(pte) |= _PAGE_RW; return pte; } -static inline pte_t pte_mkdirty(pte_t pte) { - pte_val(pte) |= _PAGE_DIRTY; return pte; } -static inline pte_t pte_mkyoung(pte_t pte) { - pte_val(pte) |= _PAGE_ACCESSED; return pte; } -static inline pte_t pte_mkspecial(pte_t pte) { - pte_val(pte) |= _PAGE_SPECIAL; return pte; } -static inline pte_t pte_mkhuge(pte_t pte) { - return pte; } -static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) -{ - pte_val(pte) = (pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot); - return pte; -} - - -/* Insert a PTE, top-level function is out of line. It uses an inline - * low level function in the respective pgtable-* files - */ -extern void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, - pte_t pte); - -/* This low level function performs the actual PTE insertion - * Setting the PTE depends on the MMU type and other factors. It's - * an horrible mess that I'm not going to try to clean up now but - * I'm keeping it in one place rather than spread around - */ -static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte, int percpu) -{ -#if defined(CONFIG_PPC_STD_MMU_32) && defined(CONFIG_SMP) && !defined(CONFIG_PTE_64BIT) - /* First case is 32-bit Hash MMU in SMP mode with 32-bit PTEs. We use the - * helper pte_update() which does an atomic update. We need to do that - * because a concurrent invalidation can clear _PAGE_HASHPTE. If it's a - * per-CPU PTE such as a kmap_atomic, we do a simple update preserving - * the hash bits instead (ie, same as the non-SMP case) - */ - if (percpu) - *ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE) - | (pte_val(pte) & ~_PAGE_HASHPTE)); - else - pte_update(ptep, ~_PAGE_HASHPTE, pte_val(pte)); - -#elif defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT) - /* Second case is 32-bit with 64-bit PTE. In this case, we - * can just store as long as we do the two halves in the right order - * with a barrier in between. This is possible because we take care, - * in the hash code, to pre-invalidate if the PTE was already hashed, - * which synchronizes us with any concurrent invalidation. - * In the percpu case, we also fallback to the simple update preserving - * the hash bits - */ - if (percpu) { - *ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE) - | (pte_val(pte) & ~_PAGE_HASHPTE)); - return; - } -#if _PAGE_HASHPTE != 0 - if (pte_val(*ptep) & _PAGE_HASHPTE) - flush_hash_entry(mm, ptep, addr); -#endif - __asm__ __volatile__("\ - stw%U0%X0 %2,%0\n\ - eieio\n\ - stw%U0%X0 %L2,%1" - : "=m" (*ptep), "=m" (*((unsigned char *)ptep+4)) - : "r" (pte) : "memory"); - -#elif defined(CONFIG_PPC_STD_MMU_32) - /* Third case is 32-bit hash table in UP mode, we need to preserve - * the _PAGE_HASHPTE bit since we may not have invalidated the previous - * translation in the hash yet (done in a subsequent flush_tlb_xxx()) - * and see we need to keep track that this PTE needs invalidating - */ - *ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE) - | (pte_val(pte) & ~_PAGE_HASHPTE)); - -#else - /* Anything else just stores the PTE normally. That covers all 64-bit - * cases, and 32-bit non-hash with 32-bit PTEs. - */ - *ptep = pte; - -#ifdef CONFIG_PPC_BOOK3E_64 - /* - * With hardware tablewalk, a sync is needed to ensure that - * subsequent accesses see the PTE we just wrote. Unlike userspace - * mappings, we can't tolerate spurious faults, so make sure - * the new PTE will be seen the first time. - */ - if (is_kernel_addr(addr)) - mb(); -#endif -#endif -} - - -#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS -extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, - pte_t *ptep, pte_t entry, int dirty); - -/* - * Macro to mark a page protection value as "uncacheable". - */ - -#define _PAGE_CACHE_CTL (_PAGE_COHERENT | _PAGE_GUARDED | _PAGE_NO_CACHE | \ - _PAGE_WRITETHRU) - -#define pgprot_noncached(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \ - _PAGE_NO_CACHE | _PAGE_GUARDED)) - -#define pgprot_noncached_wc(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \ - _PAGE_NO_CACHE)) - -#define pgprot_cached(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \ - _PAGE_COHERENT)) - -#define pgprot_cached_wthru(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \ - _PAGE_COHERENT | _PAGE_WRITETHRU)) - -#define pgprot_cached_noncoherent(prot) \ - (__pgprot(pgprot_val(prot) & ~_PAGE_CACHE_CTL)) - -#define pgprot_writecombine pgprot_noncached_wc - -struct file; -extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, - unsigned long size, pgprot_t vma_prot); -#define __HAVE_PHYS_MEM_ACCESS_PROT - /* * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. @@ -271,5 +80,4 @@ static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, } #endif /* __ASSEMBLY__ */ -#endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_PGTABLE_H */ diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h index 67859edbf8fd..1b394247afc2 100644 --- a/arch/powerpc/include/asm/plpar_wrappers.h +++ b/arch/powerpc/include/asm/plpar_wrappers.h @@ -202,6 +202,23 @@ static inline long plpar_pte_read_raw(unsigned long flags, unsigned long ptex, } /* + * ptes must be 8*sizeof(unsigned long) + */ +static inline long plpar_pte_read_4(unsigned long flags, unsigned long ptex, + unsigned long *ptes) + +{ + long rc; + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; + + rc = plpar_hcall9(H_READ, retbuf, flags | H_READ_4, ptex); + + memcpy(ptes, retbuf, 8*sizeof(unsigned long)); + + return rc; +} + +/* * plpar_pte_read_4_raw can be called in real mode. * ptes must be 8*sizeof(unsigned long) */ diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index dd0fc18d8103..499d9f89435a 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -413,24 +413,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601) FTR_SECTION_ELSE_NESTED(848); \ mtocrf (FXM), RS; \ ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_NOEXECUTE, 848) - -/* - * PPR restore macros used in entry_64.S - * Used for P7 or later processors - */ -#define HMT_MEDIUM_LOW_HAS_PPR \ -BEGIN_FTR_SECTION_NESTED(944) \ - HMT_MEDIUM_LOW; \ -END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,944) - -#define SET_DEFAULT_THREAD_PPR(ra, rb) \ -BEGIN_FTR_SECTION_NESTED(945) \ - lis ra,INIT_PPR@highest; /* default ppr=3 */ \ - ld rb,PACACURRENT(r13); \ - sldi ra,ra,32; /* 11- 13 bits are used for ppr */ \ - std ra,TASKTHREADPPR(rb); \ -END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,945) - #endif /* diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 5afea361beaa..ac2330820b9a 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -88,12 +88,6 @@ struct task_struct; void start_thread(struct pt_regs *regs, unsigned long fdptr, unsigned long sp); void release_thread(struct task_struct *); -/* Lazy FPU handling on uni-processor */ -extern struct task_struct *last_task_used_math; -extern struct task_struct *last_task_used_altivec; -extern struct task_struct *last_task_used_vsx; -extern struct task_struct *last_task_used_spe; - #ifdef CONFIG_PPC32 #if CONFIG_TASK_SIZE > CONFIG_KERNEL_START @@ -294,6 +288,7 @@ struct thread_struct { #endif #ifdef CONFIG_PPC64 unsigned long dscr; + unsigned long fscr; /* * This member element dscr_inherit indicates that the process * has explicitly attempted and changed the DSCR register value @@ -385,8 +380,6 @@ extern int set_endian(struct task_struct *tsk, unsigned int val); extern int get_unalign_ctl(struct task_struct *tsk, unsigned long adr); extern int set_unalign_ctl(struct task_struct *tsk, unsigned int val); -extern void fp_enable(void); -extern void vec_enable(void); extern void load_fp_state(struct thread_fp_state *fp); extern void store_fp_state(struct thread_fp_state *fp); extern void load_vr_state(struct thread_vr_state *vr); diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h index 71537a319fc8..1ec67b043065 100644 --- a/arch/powerpc/include/asm/pte-common.h +++ b/arch/powerpc/include/asm/pte-common.h @@ -40,6 +40,11 @@ #else #define _PAGE_RW 0 #endif + +#ifndef _PAGE_PTE +#define _PAGE_PTE 0 +#endif + #ifndef _PMD_PRESENT_MASK #define _PMD_PRESENT_MASK _PMD_PRESENT #endif diff --git a/arch/powerpc/include/asm/pte-hash64-4k.h b/arch/powerpc/include/asm/pte-hash64-4k.h deleted file mode 100644 index c134e809aac3..000000000000 --- a/arch/powerpc/include/asm/pte-hash64-4k.h +++ /dev/null @@ -1,17 +0,0 @@ -/* To be include by pgtable-hash64.h only */ - -/* PTE bits */ -#define _PAGE_HASHPTE 0x0400 /* software: pte has an associated HPTE */ -#define _PAGE_SECONDARY 0x8000 /* software: HPTE is in secondary group */ -#define _PAGE_GROUP_IX 0x7000 /* software: HPTE index within group */ -#define _PAGE_F_SECOND _PAGE_SECONDARY -#define _PAGE_F_GIX _PAGE_GROUP_IX -#define _PAGE_SPECIAL 0x10000 /* software: special page */ - -/* PTE flags to conserve for HPTE identification */ -#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | \ - _PAGE_SECONDARY | _PAGE_GROUP_IX) - -/* shift to put page number into pte */ -#define PTE_RPN_SHIFT (17) - diff --git a/arch/powerpc/include/asm/pte-hash64-64k.h b/arch/powerpc/include/asm/pte-hash64-64k.h deleted file mode 100644 index 4f4ec2ab45c9..000000000000 --- a/arch/powerpc/include/asm/pte-hash64-64k.h +++ /dev/null @@ -1,102 +0,0 @@ -/* To be include by pgtable-hash64.h only */ - -/* Additional PTE bits (don't change without checking asm in hash_low.S) */ -#define _PAGE_SPECIAL 0x00000400 /* software: special page */ -#define _PAGE_HPTE_SUB 0x0ffff000 /* combo only: sub pages HPTE bits */ -#define _PAGE_HPTE_SUB0 0x08000000 /* combo only: first sub page */ -#define _PAGE_COMBO 0x10000000 /* this is a combo 4k page */ -#define _PAGE_4K_PFN 0x20000000 /* PFN is for a single 4k page */ - -/* For 64K page, we don't have a separate _PAGE_HASHPTE bit. Instead, - * we set that to be the whole sub-bits mask. The C code will only - * test this, so a multi-bit mask will work. For combo pages, this - * is equivalent as effectively, the old _PAGE_HASHPTE was an OR of - * all the sub bits. For real 64k pages, we now have the assembly set - * _PAGE_HPTE_SUB0 in addition to setting the HIDX bits which overlap - * that mask. This is fine as long as the HIDX bits are never set on - * a PTE that isn't hashed, which is the case today. - * - * A little nit is for the huge page C code, which does the hashing - * in C, we need to provide which bit to use. - */ -#define _PAGE_HASHPTE _PAGE_HPTE_SUB - -/* Note the full page bits must be in the same location as for normal - * 4k pages as the same assembly will be used to insert 64K pages - * whether the kernel has CONFIG_PPC_64K_PAGES or not - */ -#define _PAGE_F_SECOND 0x00008000 /* full page: hidx bits */ -#define _PAGE_F_GIX 0x00007000 /* full page: hidx bits */ - -/* PTE flags to conserve for HPTE identification */ -#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | _PAGE_COMBO) - -/* Shift to put page number into pte. - * - * That gives us a max RPN of 34 bits, which means a max of 50 bits - * of addressable physical space, or 46 bits for the special 4k PFNs. - */ -#define PTE_RPN_SHIFT (30) - -#ifndef __ASSEMBLY__ - -/* - * With 64K pages on hash table, we have a special PTE format that - * uses a second "half" of the page table to encode sub-page information - * in order to deal with 64K made of 4K HW pages. Thus we override the - * generic accessors and iterators here - */ -#define __real_pte __real_pte -static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep) -{ - real_pte_t rpte; - - rpte.pte = pte; - rpte.hidx = 0; - if (pte_val(pte) & _PAGE_COMBO) { - /* - * Make sure we order the hidx load against the _PAGE_COMBO - * check. The store side ordering is done in __hash_page_4K - */ - smp_rmb(); - rpte.hidx = pte_val(*((ptep) + PTRS_PER_PTE)); - } - return rpte; -} - -static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index) -{ - if ((pte_val(rpte.pte) & _PAGE_COMBO)) - return (rpte.hidx >> (index<<2)) & 0xf; - return (pte_val(rpte.pte) >> 12) & 0xf; -} - -#define __rpte_to_pte(r) ((r).pte) -#define __rpte_sub_valid(rpte, index) \ - (pte_val(rpte.pte) & (_PAGE_HPTE_SUB0 >> (index))) - -/* Trick: we set __end to va + 64k, which happens works for - * a 16M page as well as we want only one iteration - */ -#define pte_iterate_hashed_subpages(rpte, psize, vpn, index, shift) \ - do { \ - unsigned long __end = vpn + (1UL << (PAGE_SHIFT - VPN_SHIFT)); \ - unsigned __split = (psize == MMU_PAGE_4K || \ - psize == MMU_PAGE_64K_AP); \ - shift = mmu_psize_defs[psize].shift; \ - for (index = 0; vpn < __end; index++, \ - vpn += (1L << (shift - VPN_SHIFT))) { \ - if (!__split || __rpte_sub_valid(rpte, index)) \ - do { - -#define pte_iterate_hashed_end() } while(0); } } while(0) - -#define pte_pagesize_index(mm, addr, pte) \ - (((pte) & _PAGE_COMBO)? MMU_PAGE_4K: MMU_PAGE_64K) - -#define remap_4k_pfn(vma, addr, pfn, prot) \ - (WARN_ON(((pfn) >= (1UL << (64 - PTE_RPN_SHIFT)))) ? -EINVAL : \ - remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE, \ - __pgprot(pgprot_val((prot)) | _PAGE_4K_PFN))) - -#endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/pte-hash64.h b/arch/powerpc/include/asm/pte-hash64.h deleted file mode 100644 index ef612c160da7..000000000000 --- a/arch/powerpc/include/asm/pte-hash64.h +++ /dev/null @@ -1,54 +0,0 @@ -#ifndef _ASM_POWERPC_PTE_HASH64_H -#define _ASM_POWERPC_PTE_HASH64_H -#ifdef __KERNEL__ - -/* - * Common bits between 4K and 64K pages in a linux-style PTE. - * These match the bits in the (hardware-defined) PowerPC PTE as closely - * as possible. Additional bits may be defined in pgtable-hash64-*.h - * - * Note: We only support user read/write permissions. Supervisor always - * have full read/write to pages above PAGE_OFFSET (pages below that - * always use the user access permissions). - * - * We could create separate kernel read-only if we used the 3 PP bits - * combinations that newer processors provide but we currently don't. - */ -#define _PAGE_PRESENT 0x0001 /* software: pte contains a translation */ -#define _PAGE_USER 0x0002 /* matches one of the PP bits */ -#define _PAGE_BIT_SWAP_TYPE 2 -#define _PAGE_EXEC 0x0004 /* No execute on POWER4 and newer (we invert) */ -#define _PAGE_GUARDED 0x0008 -/* We can derive Memory coherence from _PAGE_NO_CACHE */ -#define _PAGE_NO_CACHE 0x0020 /* I: cache inhibit */ -#define _PAGE_WRITETHRU 0x0040 /* W: cache write-through */ -#define _PAGE_DIRTY 0x0080 /* C: page changed */ -#define _PAGE_ACCESSED 0x0100 /* R: page referenced */ -#define _PAGE_RW 0x0200 /* software: user write access allowed */ -#define _PAGE_BUSY 0x0800 /* software: PTE & hash are busy */ - -/* No separate kernel read-only */ -#define _PAGE_KERNEL_RW (_PAGE_RW | _PAGE_DIRTY) /* user access blocked by key */ -#define _PAGE_KERNEL_RO _PAGE_KERNEL_RW - -/* Strong Access Ordering */ -#define _PAGE_SAO (_PAGE_WRITETHRU | _PAGE_NO_CACHE | _PAGE_COHERENT) - -/* No page size encoding in the linux PTE */ -#define _PAGE_PSIZE 0 - -/* PTEIDX nibble */ -#define _PTEIDX_SECONDARY 0x8 -#define _PTEIDX_GROUP_IX 0x7 - -/* Hash table based platforms need atomic updates of the linux PTE */ -#define PTE_ATOMIC_UPDATES 1 - -#ifdef CONFIG_PPC_64K_PAGES -#include <asm/pte-hash64-64k.h> -#else -#include <asm/pte-hash64-4k.h> -#endif - -#endif /* __KERNEL__ */ -#endif /* _ASM_POWERPC_PTE_HASH64_H */ diff --git a/arch/powerpc/include/asm/qe.h b/arch/powerpc/include/asm/qe.h deleted file mode 100644 index 32b9bfa0c9bd..000000000000 --- a/arch/powerpc/include/asm/qe.h +++ /dev/null @@ -1,740 +0,0 @@ -/* - * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved. - * - * Authors: Shlomi Gridish <gridish@freescale.com> - * Li Yang <leoli@freescale.com> - * - * Description: - * QUICC Engine (QE) external definitions and structure. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ -#ifndef _ASM_POWERPC_QE_H -#define _ASM_POWERPC_QE_H -#ifdef __KERNEL__ - -#include <linux/spinlock.h> -#include <linux/errno.h> -#include <linux/err.h> -#include <asm/cpm.h> -#include <asm/immap_qe.h> - -#define QE_NUM_OF_SNUM 256 /* There are 256 serial number in QE */ -#define QE_NUM_OF_BRGS 16 -#define QE_NUM_OF_PORTS 1024 - -/* Memory partitions -*/ -#define MEM_PART_SYSTEM 0 -#define MEM_PART_SECONDARY 1 -#define MEM_PART_MURAM 2 - -/* Clocks and BRGs */ -enum qe_clock { - QE_CLK_NONE = 0, - QE_BRG1, /* Baud Rate Generator 1 */ - QE_BRG2, /* Baud Rate Generator 2 */ - QE_BRG3, /* Baud Rate Generator 3 */ - QE_BRG4, /* Baud Rate Generator 4 */ - QE_BRG5, /* Baud Rate Generator 5 */ - QE_BRG6, /* Baud Rate Generator 6 */ - QE_BRG7, /* Baud Rate Generator 7 */ - QE_BRG8, /* Baud Rate Generator 8 */ - QE_BRG9, /* Baud Rate Generator 9 */ - QE_BRG10, /* Baud Rate Generator 10 */ - QE_BRG11, /* Baud Rate Generator 11 */ - QE_BRG12, /* Baud Rate Generator 12 */ - QE_BRG13, /* Baud Rate Generator 13 */ - QE_BRG14, /* Baud Rate Generator 14 */ - QE_BRG15, /* Baud Rate Generator 15 */ - QE_BRG16, /* Baud Rate Generator 16 */ - QE_CLK1, /* Clock 1 */ - QE_CLK2, /* Clock 2 */ - QE_CLK3, /* Clock 3 */ - QE_CLK4, /* Clock 4 */ - QE_CLK5, /* Clock 5 */ - QE_CLK6, /* Clock 6 */ - QE_CLK7, /* Clock 7 */ - QE_CLK8, /* Clock 8 */ - QE_CLK9, /* Clock 9 */ - QE_CLK10, /* Clock 10 */ - QE_CLK11, /* Clock 11 */ - QE_CLK12, /* Clock 12 */ - QE_CLK13, /* Clock 13 */ - QE_CLK14, /* Clock 14 */ - QE_CLK15, /* Clock 15 */ - QE_CLK16, /* Clock 16 */ - QE_CLK17, /* Clock 17 */ - QE_CLK18, /* Clock 18 */ - QE_CLK19, /* Clock 19 */ - QE_CLK20, /* Clock 20 */ - QE_CLK21, /* Clock 21 */ - QE_CLK22, /* Clock 22 */ - QE_CLK23, /* Clock 23 */ - QE_CLK24, /* Clock 24 */ - QE_CLK_DUMMY -}; - -static inline bool qe_clock_is_brg(enum qe_clock clk) -{ - return clk >= QE_BRG1 && clk <= QE_BRG16; -} - -extern spinlock_t cmxgcr_lock; - -/* Export QE common operations */ -#ifdef CONFIG_QUICC_ENGINE -extern void qe_reset(void); -#else -static inline void qe_reset(void) {} -#endif - -/* QE PIO */ -#define QE_PIO_PINS 32 - -struct qe_pio_regs { - __be32 cpodr; /* Open drain register */ - __be32 cpdata; /* Data register */ - __be32 cpdir1; /* Direction register */ - __be32 cpdir2; /* Direction register */ - __be32 cppar1; /* Pin assignment register */ - __be32 cppar2; /* Pin assignment register */ -#ifdef CONFIG_PPC_85xx - u8 pad[8]; -#endif -}; - -#define QE_PIO_DIR_IN 2 -#define QE_PIO_DIR_OUT 1 -extern void __par_io_config_pin(struct qe_pio_regs __iomem *par_io, u8 pin, - int dir, int open_drain, int assignment, - int has_irq); -#ifdef CONFIG_QUICC_ENGINE -extern int par_io_init(struct device_node *np); -extern int par_io_of_config(struct device_node *np); -extern int par_io_config_pin(u8 port, u8 pin, int dir, int open_drain, - int assignment, int has_irq); -extern int par_io_data_set(u8 port, u8 pin, u8 val); -#else -static inline int par_io_init(struct device_node *np) { return -ENOSYS; } -static inline int par_io_of_config(struct device_node *np) { return -ENOSYS; } -static inline int par_io_config_pin(u8 port, u8 pin, int dir, int open_drain, - int assignment, int has_irq) { return -ENOSYS; } -static inline int par_io_data_set(u8 port, u8 pin, u8 val) { return -ENOSYS; } -#endif /* CONFIG_QUICC_ENGINE */ - -/* - * Pin multiplexing functions. - */ -struct qe_pin; -#ifdef CONFIG_QE_GPIO -extern struct qe_pin *qe_pin_request(struct device_node *np, int index); -extern void qe_pin_free(struct qe_pin *qe_pin); -extern void qe_pin_set_gpio(struct qe_pin *qe_pin); -extern void qe_pin_set_dedicated(struct qe_pin *pin); -#else -static inline struct qe_pin *qe_pin_request(struct device_node *np, int index) -{ - return ERR_PTR(-ENOSYS); -} -static inline void qe_pin_free(struct qe_pin *qe_pin) {} -static inline void qe_pin_set_gpio(struct qe_pin *qe_pin) {} -static inline void qe_pin_set_dedicated(struct qe_pin *pin) {} -#endif /* CONFIG_QE_GPIO */ - -#ifdef CONFIG_QUICC_ENGINE -int qe_issue_cmd(u32 cmd, u32 device, u8 mcn_protocol, u32 cmd_input); -#else -static inline int qe_issue_cmd(u32 cmd, u32 device, u8 mcn_protocol, - u32 cmd_input) -{ - return -ENOSYS; -} -#endif /* CONFIG_QUICC_ENGINE */ - -/* QE internal API */ -enum qe_clock qe_clock_source(const char *source); -unsigned int qe_get_brg_clk(void); -int qe_setbrg(enum qe_clock brg, unsigned int rate, unsigned int multiplier); -int qe_get_snum(void); -void qe_put_snum(u8 snum); -unsigned int qe_get_num_of_risc(void); -unsigned int qe_get_num_of_snums(void); - -static inline int qe_alive_during_sleep(void) -{ - /* - * MPC8568E reference manual says: - * - * "...power down sequence waits for all I/O interfaces to become idle. - * In some applications this may happen eventually without actively - * shutting down interfaces, but most likely, software will have to - * take steps to shut down the eTSEC, QUICC Engine Block, and PCI - * interfaces before issuing the command (either the write to the core - * MSR[WE] as described above or writing to POWMGTCSR) to put the - * device into sleep state." - * - * MPC8569E reference manual has a similar paragraph. - */ -#ifdef CONFIG_PPC_85xx - return 0; -#else - return 1; -#endif -} - -/* we actually use cpm_muram implementation, define this for convenience */ -#define qe_muram_init cpm_muram_init -#define qe_muram_alloc cpm_muram_alloc -#define qe_muram_alloc_fixed cpm_muram_alloc_fixed -#define qe_muram_free cpm_muram_free -#define qe_muram_addr cpm_muram_addr -#define qe_muram_offset cpm_muram_offset - -/* Structure that defines QE firmware binary files. - * - * See Documentation/powerpc/qe_firmware.txt for a description of these - * fields. - */ -struct qe_firmware { - struct qe_header { - __be32 length; /* Length of the entire structure, in bytes */ - u8 magic[3]; /* Set to { 'Q', 'E', 'F' } */ - u8 version; /* Version of this layout. First ver is '1' */ - } header; - u8 id[62]; /* Null-terminated identifier string */ - u8 split; /* 0 = shared I-RAM, 1 = split I-RAM */ - u8 count; /* Number of microcode[] structures */ - struct { - __be16 model; /* The SOC model */ - u8 major; /* The SOC revision major */ - u8 minor; /* The SOC revision minor */ - } __attribute__ ((packed)) soc; - u8 padding[4]; /* Reserved, for alignment */ - __be64 extended_modes; /* Extended modes */ - __be32 vtraps[8]; /* Virtual trap addresses */ - u8 reserved[4]; /* Reserved, for future expansion */ - struct qe_microcode { - u8 id[32]; /* Null-terminated identifier */ - __be32 traps[16]; /* Trap addresses, 0 == ignore */ - __be32 eccr; /* The value for the ECCR register */ - __be32 iram_offset; /* Offset into I-RAM for the code */ - __be32 count; /* Number of 32-bit words of the code */ - __be32 code_offset; /* Offset of the actual microcode */ - u8 major; /* The microcode version major */ - u8 minor; /* The microcode version minor */ - u8 revision; /* The microcode version revision */ - u8 padding; /* Reserved, for alignment */ - u8 reserved[4]; /* Reserved, for future expansion */ - } __attribute__ ((packed)) microcode[1]; - /* All microcode binaries should be located here */ - /* CRC32 should be located here, after the microcode binaries */ -} __attribute__ ((packed)); - -struct qe_firmware_info { - char id[64]; /* Firmware name */ - u32 vtraps[8]; /* Virtual trap addresses */ - u64 extended_modes; /* Extended modes */ -}; - -#ifdef CONFIG_QUICC_ENGINE -/* Upload a firmware to the QE */ -int qe_upload_firmware(const struct qe_firmware *firmware); -#else -static inline int qe_upload_firmware(const struct qe_firmware *firmware) -{ - return -ENOSYS; -} -#endif /* CONFIG_QUICC_ENGINE */ - -/* Obtain information on the uploaded firmware */ -struct qe_firmware_info *qe_get_firmware_info(void); - -/* QE USB */ -int qe_usb_clock_set(enum qe_clock clk, int rate); - -/* Buffer descriptors */ -struct qe_bd { - __be16 status; - __be16 length; - __be32 buf; -} __attribute__ ((packed)); - -#define BD_STATUS_MASK 0xffff0000 -#define BD_LENGTH_MASK 0x0000ffff - -/* Alignment */ -#define QE_INTR_TABLE_ALIGN 16 /* ??? */ -#define QE_ALIGNMENT_OF_BD 8 -#define QE_ALIGNMENT_OF_PRAM 64 - -/* RISC allocation */ -#define QE_RISC_ALLOCATION_RISC1 0x1 /* RISC 1 */ -#define QE_RISC_ALLOCATION_RISC2 0x2 /* RISC 2 */ -#define QE_RISC_ALLOCATION_RISC3 0x4 /* RISC 3 */ -#define QE_RISC_ALLOCATION_RISC4 0x8 /* RISC 4 */ -#define QE_RISC_ALLOCATION_RISC1_AND_RISC2 (QE_RISC_ALLOCATION_RISC1 | \ - QE_RISC_ALLOCATION_RISC2) -#define QE_RISC_ALLOCATION_FOUR_RISCS (QE_RISC_ALLOCATION_RISC1 | \ - QE_RISC_ALLOCATION_RISC2 | \ - QE_RISC_ALLOCATION_RISC3 | \ - QE_RISC_ALLOCATION_RISC4) - -/* QE extended filtering Table Lookup Key Size */ -enum qe_fltr_tbl_lookup_key_size { - QE_FLTR_TABLE_LOOKUP_KEY_SIZE_8_BYTES - = 0x3f, /* LookupKey parsed by the Generate LookupKey - CMD is truncated to 8 bytes */ - QE_FLTR_TABLE_LOOKUP_KEY_SIZE_16_BYTES - = 0x5f, /* LookupKey parsed by the Generate LookupKey - CMD is truncated to 16 bytes */ -}; - -/* QE FLTR extended filtering Largest External Table Lookup Key Size */ -enum qe_fltr_largest_external_tbl_lookup_key_size { - QE_FLTR_LARGEST_EXTERNAL_TABLE_LOOKUP_KEY_SIZE_NONE - = 0x0,/* not used */ - QE_FLTR_LARGEST_EXTERNAL_TABLE_LOOKUP_KEY_SIZE_8_BYTES - = QE_FLTR_TABLE_LOOKUP_KEY_SIZE_8_BYTES, /* 8 bytes */ - QE_FLTR_LARGEST_EXTERNAL_TABLE_LOOKUP_KEY_SIZE_16_BYTES - = QE_FLTR_TABLE_LOOKUP_KEY_SIZE_16_BYTES, /* 16 bytes */ -}; - -/* structure representing QE parameter RAM */ -struct qe_timer_tables { - u16 tm_base; /* QE timer table base adr */ - u16 tm_ptr; /* QE timer table pointer */ - u16 r_tmr; /* QE timer mode register */ - u16 r_tmv; /* QE timer valid register */ - u32 tm_cmd; /* QE timer cmd register */ - u32 tm_cnt; /* QE timer internal cnt */ -} __attribute__ ((packed)); - -#define QE_FLTR_TAD_SIZE 8 - -/* QE extended filtering Termination Action Descriptor (TAD) */ -struct qe_fltr_tad { - u8 serialized[QE_FLTR_TAD_SIZE]; -} __attribute__ ((packed)); - -/* Communication Direction */ -enum comm_dir { - COMM_DIR_NONE = 0, - COMM_DIR_RX = 1, - COMM_DIR_TX = 2, - COMM_DIR_RX_AND_TX = 3 -}; - -/* QE CMXUCR Registers. - * There are two UCCs represented in each of the four CMXUCR registers. - * These values are for the UCC in the LSBs - */ -#define QE_CMXUCR_MII_ENET_MNG 0x00007000 -#define QE_CMXUCR_MII_ENET_MNG_SHIFT 12 -#define QE_CMXUCR_GRANT 0x00008000 -#define QE_CMXUCR_TSA 0x00004000 -#define QE_CMXUCR_BKPT 0x00000100 -#define QE_CMXUCR_TX_CLK_SRC_MASK 0x0000000F - -/* QE CMXGCR Registers. -*/ -#define QE_CMXGCR_MII_ENET_MNG 0x00007000 -#define QE_CMXGCR_MII_ENET_MNG_SHIFT 12 -#define QE_CMXGCR_USBCS 0x0000000f -#define QE_CMXGCR_USBCS_CLK3 0x1 -#define QE_CMXGCR_USBCS_CLK5 0x2 -#define QE_CMXGCR_USBCS_CLK7 0x3 -#define QE_CMXGCR_USBCS_CLK9 0x4 -#define QE_CMXGCR_USBCS_CLK13 0x5 -#define QE_CMXGCR_USBCS_CLK17 0x6 -#define QE_CMXGCR_USBCS_CLK19 0x7 -#define QE_CMXGCR_USBCS_CLK21 0x8 -#define QE_CMXGCR_USBCS_BRG9 0x9 -#define QE_CMXGCR_USBCS_BRG10 0xa - -/* QE CECR Commands. -*/ -#define QE_CR_FLG 0x00010000 -#define QE_RESET 0x80000000 -#define QE_INIT_TX_RX 0x00000000 -#define QE_INIT_RX 0x00000001 -#define QE_INIT_TX 0x00000002 -#define QE_ENTER_HUNT_MODE 0x00000003 -#define QE_STOP_TX 0x00000004 -#define QE_GRACEFUL_STOP_TX 0x00000005 -#define QE_RESTART_TX 0x00000006 -#define QE_CLOSE_RX_BD 0x00000007 -#define QE_SWITCH_COMMAND 0x00000007 -#define QE_SET_GROUP_ADDRESS 0x00000008 -#define QE_START_IDMA 0x00000009 -#define QE_MCC_STOP_RX 0x00000009 -#define QE_ATM_TRANSMIT 0x0000000a -#define QE_HPAC_CLEAR_ALL 0x0000000b -#define QE_GRACEFUL_STOP_RX 0x0000001a -#define QE_RESTART_RX 0x0000001b -#define QE_HPAC_SET_PRIORITY 0x0000010b -#define QE_HPAC_STOP_TX 0x0000020b -#define QE_HPAC_STOP_RX 0x0000030b -#define QE_HPAC_GRACEFUL_STOP_TX 0x0000040b -#define QE_HPAC_GRACEFUL_STOP_RX 0x0000050b -#define QE_HPAC_START_TX 0x0000060b -#define QE_HPAC_START_RX 0x0000070b -#define QE_USB_STOP_TX 0x0000000a -#define QE_USB_RESTART_TX 0x0000000c -#define QE_QMC_STOP_TX 0x0000000c -#define QE_QMC_STOP_RX 0x0000000d -#define QE_SS7_SU_FIL_RESET 0x0000000e -/* jonathbr added from here down for 83xx */ -#define QE_RESET_BCS 0x0000000a -#define QE_MCC_INIT_TX_RX_16 0x00000003 -#define QE_MCC_STOP_TX 0x00000004 -#define QE_MCC_INIT_TX_1 0x00000005 -#define QE_MCC_INIT_RX_1 0x00000006 -#define QE_MCC_RESET 0x00000007 -#define QE_SET_TIMER 0x00000008 -#define QE_RANDOM_NUMBER 0x0000000c -#define QE_ATM_MULTI_THREAD_INIT 0x00000011 -#define QE_ASSIGN_PAGE 0x00000012 -#define QE_ADD_REMOVE_HASH_ENTRY 0x00000013 -#define QE_START_FLOW_CONTROL 0x00000014 -#define QE_STOP_FLOW_CONTROL 0x00000015 -#define QE_ASSIGN_PAGE_TO_DEVICE 0x00000016 - -#define QE_ASSIGN_RISC 0x00000010 -#define QE_CR_MCN_NORMAL_SHIFT 6 -#define QE_CR_MCN_USB_SHIFT 4 -#define QE_CR_MCN_RISC_ASSIGN_SHIFT 8 -#define QE_CR_SNUM_SHIFT 17 - -/* QE CECR Sub Block - sub block of QE command. -*/ -#define QE_CR_SUBBLOCK_INVALID 0x00000000 -#define QE_CR_SUBBLOCK_USB 0x03200000 -#define QE_CR_SUBBLOCK_UCCFAST1 0x02000000 -#define QE_CR_SUBBLOCK_UCCFAST2 0x02200000 -#define QE_CR_SUBBLOCK_UCCFAST3 0x02400000 -#define QE_CR_SUBBLOCK_UCCFAST4 0x02600000 -#define QE_CR_SUBBLOCK_UCCFAST5 0x02800000 -#define QE_CR_SUBBLOCK_UCCFAST6 0x02a00000 -#define QE_CR_SUBBLOCK_UCCFAST7 0x02c00000 -#define QE_CR_SUBBLOCK_UCCFAST8 0x02e00000 -#define QE_CR_SUBBLOCK_UCCSLOW1 0x00000000 -#define QE_CR_SUBBLOCK_UCCSLOW2 0x00200000 -#define QE_CR_SUBBLOCK_UCCSLOW3 0x00400000 -#define QE_CR_SUBBLOCK_UCCSLOW4 0x00600000 -#define QE_CR_SUBBLOCK_UCCSLOW5 0x00800000 -#define QE_CR_SUBBLOCK_UCCSLOW6 0x00a00000 -#define QE_CR_SUBBLOCK_UCCSLOW7 0x00c00000 -#define QE_CR_SUBBLOCK_UCCSLOW8 0x00e00000 -#define QE_CR_SUBBLOCK_MCC1 0x03800000 -#define QE_CR_SUBBLOCK_MCC2 0x03a00000 -#define QE_CR_SUBBLOCK_MCC3 0x03000000 -#define QE_CR_SUBBLOCK_IDMA1 0x02800000 -#define QE_CR_SUBBLOCK_IDMA2 0x02a00000 -#define QE_CR_SUBBLOCK_IDMA3 0x02c00000 -#define QE_CR_SUBBLOCK_IDMA4 0x02e00000 -#define QE_CR_SUBBLOCK_HPAC 0x01e00000 -#define QE_CR_SUBBLOCK_SPI1 0x01400000 -#define QE_CR_SUBBLOCK_SPI2 0x01600000 -#define QE_CR_SUBBLOCK_RAND 0x01c00000 -#define QE_CR_SUBBLOCK_TIMER 0x01e00000 -#define QE_CR_SUBBLOCK_GENERAL 0x03c00000 - -/* QE CECR Protocol - For non-MCC, specifies mode for QE CECR command */ -#define QE_CR_PROTOCOL_UNSPECIFIED 0x00 /* For all other protocols */ -#define QE_CR_PROTOCOL_HDLC_TRANSPARENT 0x00 -#define QE_CR_PROTOCOL_QMC 0x02 -#define QE_CR_PROTOCOL_UART 0x04 -#define QE_CR_PROTOCOL_ATM_POS 0x0A -#define QE_CR_PROTOCOL_ETHERNET 0x0C -#define QE_CR_PROTOCOL_L2_SWITCH 0x0D - -/* BRG configuration register */ -#define QE_BRGC_ENABLE 0x00010000 -#define QE_BRGC_DIVISOR_SHIFT 1 -#define QE_BRGC_DIVISOR_MAX 0xFFF -#define QE_BRGC_DIV16 1 - -/* QE Timers registers */ -#define QE_GTCFR1_PCAS 0x80 -#define QE_GTCFR1_STP2 0x20 -#define QE_GTCFR1_RST2 0x10 -#define QE_GTCFR1_GM2 0x08 -#define QE_GTCFR1_GM1 0x04 -#define QE_GTCFR1_STP1 0x02 -#define QE_GTCFR1_RST1 0x01 - -/* SDMA registers */ -#define QE_SDSR_BER1 0x02000000 -#define QE_SDSR_BER2 0x01000000 - -#define QE_SDMR_GLB_1_MSK 0x80000000 -#define QE_SDMR_ADR_SEL 0x20000000 -#define QE_SDMR_BER1_MSK 0x02000000 -#define QE_SDMR_BER2_MSK 0x01000000 -#define QE_SDMR_EB1_MSK 0x00800000 -#define QE_SDMR_ER1_MSK 0x00080000 -#define QE_SDMR_ER2_MSK 0x00040000 -#define QE_SDMR_CEN_MASK 0x0000E000 -#define QE_SDMR_SBER_1 0x00000200 -#define QE_SDMR_SBER_2 0x00000200 -#define QE_SDMR_EB1_PR_MASK 0x000000C0 -#define QE_SDMR_ER1_PR 0x00000008 - -#define QE_SDMR_CEN_SHIFT 13 -#define QE_SDMR_EB1_PR_SHIFT 6 - -#define QE_SDTM_MSNUM_SHIFT 24 - -#define QE_SDEBCR_BA_MASK 0x01FFFFFF - -/* Communication Processor */ -#define QE_CP_CERCR_MEE 0x8000 /* Multi-user RAM ECC enable */ -#define QE_CP_CERCR_IEE 0x4000 /* Instruction RAM ECC enable */ -#define QE_CP_CERCR_CIR 0x0800 /* Common instruction RAM */ - -/* I-RAM */ -#define QE_IRAM_IADD_AIE 0x80000000 /* Auto Increment Enable */ -#define QE_IRAM_IADD_BADDR 0x00080000 /* Base Address */ -#define QE_IRAM_READY 0x80000000 /* Ready */ - -/* UPC */ -#define UPGCR_PROTOCOL 0x80000000 /* protocol ul2 or pl2 */ -#define UPGCR_TMS 0x40000000 /* Transmit master/slave mode */ -#define UPGCR_RMS 0x20000000 /* Receive master/slave mode */ -#define UPGCR_ADDR 0x10000000 /* Master MPHY Addr multiplexing */ -#define UPGCR_DIAG 0x01000000 /* Diagnostic mode */ - -/* UCC GUEMR register */ -#define UCC_GUEMR_MODE_MASK_RX 0x02 -#define UCC_GUEMR_MODE_FAST_RX 0x02 -#define UCC_GUEMR_MODE_SLOW_RX 0x00 -#define UCC_GUEMR_MODE_MASK_TX 0x01 -#define UCC_GUEMR_MODE_FAST_TX 0x01 -#define UCC_GUEMR_MODE_SLOW_TX 0x00 -#define UCC_GUEMR_MODE_MASK (UCC_GUEMR_MODE_MASK_RX | UCC_GUEMR_MODE_MASK_TX) -#define UCC_GUEMR_SET_RESERVED3 0x10 /* Bit 3 in the guemr is reserved but - must be set 1 */ - -/* structure representing UCC SLOW parameter RAM */ -struct ucc_slow_pram { - __be16 rbase; /* RX BD base address */ - __be16 tbase; /* TX BD base address */ - u8 rbmr; /* RX bus mode register (same as CPM's RFCR) */ - u8 tbmr; /* TX bus mode register (same as CPM's TFCR) */ - __be16 mrblr; /* Rx buffer length */ - __be32 rstate; /* Rx internal state */ - __be32 rptr; /* Rx internal data pointer */ - __be16 rbptr; /* rb BD Pointer */ - __be16 rcount; /* Rx internal byte count */ - __be32 rtemp; /* Rx temp */ - __be32 tstate; /* Tx internal state */ - __be32 tptr; /* Tx internal data pointer */ - __be16 tbptr; /* Tx BD pointer */ - __be16 tcount; /* Tx byte count */ - __be32 ttemp; /* Tx temp */ - __be32 rcrc; /* temp receive CRC */ - __be32 tcrc; /* temp transmit CRC */ -} __attribute__ ((packed)); - -/* General UCC SLOW Mode Register (GUMRH & GUMRL) */ -#define UCC_SLOW_GUMR_H_SAM_QMC 0x00000000 -#define UCC_SLOW_GUMR_H_SAM_SATM 0x00008000 -#define UCC_SLOW_GUMR_H_REVD 0x00002000 -#define UCC_SLOW_GUMR_H_TRX 0x00001000 -#define UCC_SLOW_GUMR_H_TTX 0x00000800 -#define UCC_SLOW_GUMR_H_CDP 0x00000400 -#define UCC_SLOW_GUMR_H_CTSP 0x00000200 -#define UCC_SLOW_GUMR_H_CDS 0x00000100 -#define UCC_SLOW_GUMR_H_CTSS 0x00000080 -#define UCC_SLOW_GUMR_H_TFL 0x00000040 -#define UCC_SLOW_GUMR_H_RFW 0x00000020 -#define UCC_SLOW_GUMR_H_TXSY 0x00000010 -#define UCC_SLOW_GUMR_H_4SYNC 0x00000004 -#define UCC_SLOW_GUMR_H_8SYNC 0x00000008 -#define UCC_SLOW_GUMR_H_16SYNC 0x0000000c -#define UCC_SLOW_GUMR_H_RTSM 0x00000002 -#define UCC_SLOW_GUMR_H_RSYN 0x00000001 - -#define UCC_SLOW_GUMR_L_TCI 0x10000000 -#define UCC_SLOW_GUMR_L_RINV 0x02000000 -#define UCC_SLOW_GUMR_L_TINV 0x01000000 -#define UCC_SLOW_GUMR_L_TEND 0x00040000 -#define UCC_SLOW_GUMR_L_TDCR_MASK 0x00030000 -#define UCC_SLOW_GUMR_L_TDCR_32 0x00030000 -#define UCC_SLOW_GUMR_L_TDCR_16 0x00020000 -#define UCC_SLOW_GUMR_L_TDCR_8 0x00010000 -#define UCC_SLOW_GUMR_L_TDCR_1 0x00000000 -#define UCC_SLOW_GUMR_L_RDCR_MASK 0x0000c000 -#define UCC_SLOW_GUMR_L_RDCR_32 0x0000c000 -#define UCC_SLOW_GUMR_L_RDCR_16 0x00008000 -#define UCC_SLOW_GUMR_L_RDCR_8 0x00004000 -#define UCC_SLOW_GUMR_L_RDCR_1 0x00000000 -#define UCC_SLOW_GUMR_L_RENC_NRZI 0x00000800 -#define UCC_SLOW_GUMR_L_RENC_NRZ 0x00000000 -#define UCC_SLOW_GUMR_L_TENC_NRZI 0x00000100 -#define UCC_SLOW_GUMR_L_TENC_NRZ 0x00000000 -#define UCC_SLOW_GUMR_L_DIAG_MASK 0x000000c0 -#define UCC_SLOW_GUMR_L_DIAG_LE 0x000000c0 -#define UCC_SLOW_GUMR_L_DIAG_ECHO 0x00000080 -#define UCC_SLOW_GUMR_L_DIAG_LOOP 0x00000040 -#define UCC_SLOW_GUMR_L_DIAG_NORM 0x00000000 -#define UCC_SLOW_GUMR_L_ENR 0x00000020 -#define UCC_SLOW_GUMR_L_ENT 0x00000010 -#define UCC_SLOW_GUMR_L_MODE_MASK 0x0000000F -#define UCC_SLOW_GUMR_L_MODE_BISYNC 0x00000008 -#define UCC_SLOW_GUMR_L_MODE_AHDLC 0x00000006 -#define UCC_SLOW_GUMR_L_MODE_UART 0x00000004 -#define UCC_SLOW_GUMR_L_MODE_QMC 0x00000002 - -/* General UCC FAST Mode Register */ -#define UCC_FAST_GUMR_TCI 0x20000000 -#define UCC_FAST_GUMR_TRX 0x10000000 -#define UCC_FAST_GUMR_TTX 0x08000000 -#define UCC_FAST_GUMR_CDP 0x04000000 -#define UCC_FAST_GUMR_CTSP 0x02000000 -#define UCC_FAST_GUMR_CDS 0x01000000 -#define UCC_FAST_GUMR_CTSS 0x00800000 -#define UCC_FAST_GUMR_TXSY 0x00020000 -#define UCC_FAST_GUMR_RSYN 0x00010000 -#define UCC_FAST_GUMR_RTSM 0x00002000 -#define UCC_FAST_GUMR_REVD 0x00000400 -#define UCC_FAST_GUMR_ENR 0x00000020 -#define UCC_FAST_GUMR_ENT 0x00000010 - -/* UART Slow UCC Event Register (UCCE) */ -#define UCC_UART_UCCE_AB 0x0200 -#define UCC_UART_UCCE_IDLE 0x0100 -#define UCC_UART_UCCE_GRA 0x0080 -#define UCC_UART_UCCE_BRKE 0x0040 -#define UCC_UART_UCCE_BRKS 0x0020 -#define UCC_UART_UCCE_CCR 0x0008 -#define UCC_UART_UCCE_BSY 0x0004 -#define UCC_UART_UCCE_TX 0x0002 -#define UCC_UART_UCCE_RX 0x0001 - -/* HDLC Slow UCC Event Register (UCCE) */ -#define UCC_HDLC_UCCE_GLR 0x1000 -#define UCC_HDLC_UCCE_GLT 0x0800 -#define UCC_HDLC_UCCE_IDLE 0x0100 -#define UCC_HDLC_UCCE_BRKE 0x0040 -#define UCC_HDLC_UCCE_BRKS 0x0020 -#define UCC_HDLC_UCCE_TXE 0x0010 -#define UCC_HDLC_UCCE_RXF 0x0008 -#define UCC_HDLC_UCCE_BSY 0x0004 -#define UCC_HDLC_UCCE_TXB 0x0002 -#define UCC_HDLC_UCCE_RXB 0x0001 - -/* BISYNC Slow UCC Event Register (UCCE) */ -#define UCC_BISYNC_UCCE_GRA 0x0080 -#define UCC_BISYNC_UCCE_TXE 0x0010 -#define UCC_BISYNC_UCCE_RCH 0x0008 -#define UCC_BISYNC_UCCE_BSY 0x0004 -#define UCC_BISYNC_UCCE_TXB 0x0002 -#define UCC_BISYNC_UCCE_RXB 0x0001 - -/* Gigabit Ethernet Fast UCC Event Register (UCCE) */ -#define UCC_GETH_UCCE_MPD 0x80000000 -#define UCC_GETH_UCCE_SCAR 0x40000000 -#define UCC_GETH_UCCE_GRA 0x20000000 -#define UCC_GETH_UCCE_CBPR 0x10000000 -#define UCC_GETH_UCCE_BSY 0x08000000 -#define UCC_GETH_UCCE_RXC 0x04000000 -#define UCC_GETH_UCCE_TXC 0x02000000 -#define UCC_GETH_UCCE_TXE 0x01000000 -#define UCC_GETH_UCCE_TXB7 0x00800000 -#define UCC_GETH_UCCE_TXB6 0x00400000 -#define UCC_GETH_UCCE_TXB5 0x00200000 -#define UCC_GETH_UCCE_TXB4 0x00100000 -#define UCC_GETH_UCCE_TXB3 0x00080000 -#define UCC_GETH_UCCE_TXB2 0x00040000 -#define UCC_GETH_UCCE_TXB1 0x00020000 -#define UCC_GETH_UCCE_TXB0 0x00010000 -#define UCC_GETH_UCCE_RXB7 0x00008000 -#define UCC_GETH_UCCE_RXB6 0x00004000 -#define UCC_GETH_UCCE_RXB5 0x00002000 -#define UCC_GETH_UCCE_RXB4 0x00001000 -#define UCC_GETH_UCCE_RXB3 0x00000800 -#define UCC_GETH_UCCE_RXB2 0x00000400 -#define UCC_GETH_UCCE_RXB1 0x00000200 -#define UCC_GETH_UCCE_RXB0 0x00000100 -#define UCC_GETH_UCCE_RXF7 0x00000080 -#define UCC_GETH_UCCE_RXF6 0x00000040 -#define UCC_GETH_UCCE_RXF5 0x00000020 -#define UCC_GETH_UCCE_RXF4 0x00000010 -#define UCC_GETH_UCCE_RXF3 0x00000008 -#define UCC_GETH_UCCE_RXF2 0x00000004 -#define UCC_GETH_UCCE_RXF1 0x00000002 -#define UCC_GETH_UCCE_RXF0 0x00000001 - -/* UCC Protocol Specific Mode Register (UPSMR), when used for UART */ -#define UCC_UART_UPSMR_FLC 0x8000 -#define UCC_UART_UPSMR_SL 0x4000 -#define UCC_UART_UPSMR_CL_MASK 0x3000 -#define UCC_UART_UPSMR_CL_8 0x3000 -#define UCC_UART_UPSMR_CL_7 0x2000 -#define UCC_UART_UPSMR_CL_6 0x1000 -#define UCC_UART_UPSMR_CL_5 0x0000 -#define UCC_UART_UPSMR_UM_MASK 0x0c00 -#define UCC_UART_UPSMR_UM_NORMAL 0x0000 -#define UCC_UART_UPSMR_UM_MAN_MULTI 0x0400 -#define UCC_UART_UPSMR_UM_AUTO_MULTI 0x0c00 -#define UCC_UART_UPSMR_FRZ 0x0200 -#define UCC_UART_UPSMR_RZS 0x0100 -#define UCC_UART_UPSMR_SYN 0x0080 -#define UCC_UART_UPSMR_DRT 0x0040 -#define UCC_UART_UPSMR_PEN 0x0010 -#define UCC_UART_UPSMR_RPM_MASK 0x000c -#define UCC_UART_UPSMR_RPM_ODD 0x0000 -#define UCC_UART_UPSMR_RPM_LOW 0x0004 -#define UCC_UART_UPSMR_RPM_EVEN 0x0008 -#define UCC_UART_UPSMR_RPM_HIGH 0x000C -#define UCC_UART_UPSMR_TPM_MASK 0x0003 -#define UCC_UART_UPSMR_TPM_ODD 0x0000 -#define UCC_UART_UPSMR_TPM_LOW 0x0001 -#define UCC_UART_UPSMR_TPM_EVEN 0x0002 -#define UCC_UART_UPSMR_TPM_HIGH 0x0003 - -/* UCC Protocol Specific Mode Register (UPSMR), when used for Ethernet */ -#define UCC_GETH_UPSMR_FTFE 0x80000000 -#define UCC_GETH_UPSMR_PTPE 0x40000000 -#define UCC_GETH_UPSMR_ECM 0x04000000 -#define UCC_GETH_UPSMR_HSE 0x02000000 -#define UCC_GETH_UPSMR_PRO 0x00400000 -#define UCC_GETH_UPSMR_CAP 0x00200000 -#define UCC_GETH_UPSMR_RSH 0x00100000 -#define UCC_GETH_UPSMR_RPM 0x00080000 -#define UCC_GETH_UPSMR_R10M 0x00040000 -#define UCC_GETH_UPSMR_RLPB 0x00020000 -#define UCC_GETH_UPSMR_TBIM 0x00010000 -#define UCC_GETH_UPSMR_RES1 0x00002000 -#define UCC_GETH_UPSMR_RMM 0x00001000 -#define UCC_GETH_UPSMR_CAM 0x00000400 -#define UCC_GETH_UPSMR_BRO 0x00000200 -#define UCC_GETH_UPSMR_SMM 0x00000080 -#define UCC_GETH_UPSMR_SGMM 0x00000020 - -/* UCC Transmit On Demand Register (UTODR) */ -#define UCC_SLOW_TOD 0x8000 -#define UCC_FAST_TOD 0x8000 - -/* UCC Bus Mode Register masks */ -/* Not to be confused with the Bundle Mode Register */ -#define UCC_BMR_GBL 0x20 -#define UCC_BMR_BO_BE 0x10 -#define UCC_BMR_CETM 0x04 -#define UCC_BMR_DTB 0x02 -#define UCC_BMR_BDB 0x01 - -/* Function code masks */ -#define FC_GBL 0x20 -#define FC_DTB_LCL 0x02 -#define UCC_FAST_FUNCTION_CODE_GBL 0x20 -#define UCC_FAST_FUNCTION_CODE_DTB_LCL 0x02 -#define UCC_FAST_FUNCTION_CODE_BDB_LCL 0x01 - -#endif /* __KERNEL__ */ -#endif /* _ASM_POWERPC_QE_H */ diff --git a/arch/powerpc/include/asm/qe_ic.h b/arch/powerpc/include/asm/qe_ic.h deleted file mode 100644 index 1e155ca6d33c..000000000000 --- a/arch/powerpc/include/asm/qe_ic.h +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved. - * - * Authors: Shlomi Gridish <gridish@freescale.com> - * Li Yang <leoli@freescale.com> - * - * Description: - * QE IC external definitions and structure. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ -#ifndef _ASM_POWERPC_QE_IC_H -#define _ASM_POWERPC_QE_IC_H - -#include <linux/irq.h> - -struct device_node; -struct qe_ic; - -#define NUM_OF_QE_IC_GROUPS 6 - -/* Flags when we init the QE IC */ -#define QE_IC_SPREADMODE_GRP_W 0x00000001 -#define QE_IC_SPREADMODE_GRP_X 0x00000002 -#define QE_IC_SPREADMODE_GRP_Y 0x00000004 -#define QE_IC_SPREADMODE_GRP_Z 0x00000008 -#define QE_IC_SPREADMODE_GRP_RISCA 0x00000010 -#define QE_IC_SPREADMODE_GRP_RISCB 0x00000020 - -#define QE_IC_LOW_SIGNAL 0x00000100 -#define QE_IC_HIGH_SIGNAL 0x00000200 - -#define QE_IC_GRP_W_PRI0_DEST_SIGNAL_HIGH 0x00001000 -#define QE_IC_GRP_W_PRI1_DEST_SIGNAL_HIGH 0x00002000 -#define QE_IC_GRP_X_PRI0_DEST_SIGNAL_HIGH 0x00004000 -#define QE_IC_GRP_X_PRI1_DEST_SIGNAL_HIGH 0x00008000 -#define QE_IC_GRP_Y_PRI0_DEST_SIGNAL_HIGH 0x00010000 -#define QE_IC_GRP_Y_PRI1_DEST_SIGNAL_HIGH 0x00020000 -#define QE_IC_GRP_Z_PRI0_DEST_SIGNAL_HIGH 0x00040000 -#define QE_IC_GRP_Z_PRI1_DEST_SIGNAL_HIGH 0x00080000 -#define QE_IC_GRP_RISCA_PRI0_DEST_SIGNAL_HIGH 0x00100000 -#define QE_IC_GRP_RISCA_PRI1_DEST_SIGNAL_HIGH 0x00200000 -#define QE_IC_GRP_RISCB_PRI0_DEST_SIGNAL_HIGH 0x00400000 -#define QE_IC_GRP_RISCB_PRI1_DEST_SIGNAL_HIGH 0x00800000 -#define QE_IC_GRP_W_DEST_SIGNAL_SHIFT (12) - -/* QE interrupt sources groups */ -enum qe_ic_grp_id { - QE_IC_GRP_W = 0, /* QE interrupt controller group W */ - QE_IC_GRP_X, /* QE interrupt controller group X */ - QE_IC_GRP_Y, /* QE interrupt controller group Y */ - QE_IC_GRP_Z, /* QE interrupt controller group Z */ - QE_IC_GRP_RISCA, /* QE interrupt controller RISC group A */ - QE_IC_GRP_RISCB /* QE interrupt controller RISC group B */ -}; - -#ifdef CONFIG_QUICC_ENGINE -void qe_ic_init(struct device_node *node, unsigned int flags, - void (*low_handler)(struct irq_desc *desc), - void (*high_handler)(struct irq_desc *desc)); -unsigned int qe_ic_get_low_irq(struct qe_ic *qe_ic); -unsigned int qe_ic_get_high_irq(struct qe_ic *qe_ic); -#else -static inline void qe_ic_init(struct device_node *node, unsigned int flags, - void (*low_handler)(struct irq_desc *desc), - void (*high_handler)(struct irq_desc *desc)) -{} -static inline unsigned int qe_ic_get_low_irq(struct qe_ic *qe_ic) -{ return 0; } -static inline unsigned int qe_ic_get_high_irq(struct qe_ic *qe_ic) -{ return 0; } -#endif /* CONFIG_QUICC_ENGINE */ - -void qe_ic_set_highest_priority(unsigned int virq, int high); -int qe_ic_set_priority(unsigned int virq, unsigned int priority); -int qe_ic_set_high_priority(unsigned int virq, unsigned int priority, int high); - -static inline void qe_ic_cascade_low_ipic(struct irq_desc *desc) -{ - struct qe_ic *qe_ic = irq_desc_get_handler_data(desc); - unsigned int cascade_irq = qe_ic_get_low_irq(qe_ic); - - if (cascade_irq != NO_IRQ) - generic_handle_irq(cascade_irq); -} - -static inline void qe_ic_cascade_high_ipic(struct irq_desc *desc) -{ - struct qe_ic *qe_ic = irq_desc_get_handler_data(desc); - unsigned int cascade_irq = qe_ic_get_high_irq(qe_ic); - - if (cascade_irq != NO_IRQ) - generic_handle_irq(cascade_irq); -} - -static inline void qe_ic_cascade_low_mpic(struct irq_desc *desc) -{ - struct qe_ic *qe_ic = irq_desc_get_handler_data(desc); - unsigned int cascade_irq = qe_ic_get_low_irq(qe_ic); - struct irq_chip *chip = irq_desc_get_chip(desc); - - if (cascade_irq != NO_IRQ) - generic_handle_irq(cascade_irq); - - chip->irq_eoi(&desc->irq_data); -} - -static inline void qe_ic_cascade_high_mpic(struct irq_desc *desc) -{ - struct qe_ic *qe_ic = irq_desc_get_handler_data(desc); - unsigned int cascade_irq = qe_ic_get_high_irq(qe_ic); - struct irq_chip *chip = irq_desc_get_chip(desc); - - if (cascade_irq != NO_IRQ) - generic_handle_irq(cascade_irq); - - chip->irq_eoi(&desc->irq_data); -} - -static inline void qe_ic_cascade_muxed_mpic(struct irq_desc *desc) -{ - struct qe_ic *qe_ic = irq_desc_get_handler_data(desc); - unsigned int cascade_irq; - struct irq_chip *chip = irq_desc_get_chip(desc); - - cascade_irq = qe_ic_get_high_irq(qe_ic); - if (cascade_irq == NO_IRQ) - cascade_irq = qe_ic_get_low_irq(qe_ic); - - if (cascade_irq != NO_IRQ) - generic_handle_irq(cascade_irq); - - chip->irq_eoi(&desc->irq_data); -} - -#endif /* _ASM_POWERPC_QE_IC_H */ diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 2220f7a60def..c4cb2ffc624e 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1194,12 +1194,20 @@ #define __mtmsrd(v, l) asm volatile("mtmsrd %0," __stringify(l) \ : : "r" (v) : "memory") #define mtmsr(v) __mtmsrd((v), 0) +#define __MTMSR "mtmsrd" #else #define mtmsr(v) asm volatile("mtmsr %0" : \ : "r" ((unsigned long)(v)) \ : "memory") +#define __MTMSR "mtmsr" #endif +static inline void mtmsr_isync(unsigned long val) +{ + asm volatile(__MTMSR " %0; " ASM_FTR_IFCLR("isync", "nop", %1) : : + "r" (val), "i" (CPU_FTR_ARCH_206) : "memory"); +} + #define mfspr(rn) ({unsigned long rval; \ asm volatile("mfspr %0," __stringify(rn) \ : "=r" (rval)); rval;}) @@ -1207,6 +1215,15 @@ : "r" ((unsigned long)(v)) \ : "memory") +extern void msr_check_and_set(unsigned long bits); +extern bool strict_msr_control; +extern void __msr_check_and_clear(unsigned long bits); +static inline void msr_check_and_clear(unsigned long bits) +{ + if (strict_msr_control) + __msr_check_and_clear(bits); +} + static inline unsigned long mfvtb (void) { #ifdef CONFIG_PPC_BOOK3S_64 diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index b77ef369c0f0..51400baa8d48 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -334,10 +334,11 @@ extern void (*rtas_flash_term_hook)(int); extern struct rtas_t rtas; -extern void enter_rtas(unsigned long); extern int rtas_token(const char *service); extern int rtas_service_present(const char *service); extern int rtas_call(int token, int, int, int *, ...); +void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, + int nret, ...); extern void rtas_restart(char *cmd); extern void rtas_power_off(void); extern void rtas_halt(void); diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index 15cca17cba4b..5b268b6be74c 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -4,6 +4,8 @@ #ifndef _ASM_POWERPC_SWITCH_TO_H #define _ASM_POWERPC_SWITCH_TO_H +#include <asm/reg.h> + struct thread_struct; struct task_struct; struct pt_regs; @@ -12,74 +14,59 @@ extern struct task_struct *__switch_to(struct task_struct *, struct task_struct *); #define switch_to(prev, next, last) ((last) = __switch_to((prev), (next))) -struct thread_struct; extern struct task_struct *_switch(struct thread_struct *prev, struct thread_struct *next); -#ifdef CONFIG_PPC_BOOK3S_64 -static inline void save_early_sprs(struct thread_struct *prev) -{ - if (cpu_has_feature(CPU_FTR_ARCH_207S)) - prev->tar = mfspr(SPRN_TAR); - if (cpu_has_feature(CPU_FTR_DSCR)) - prev->dscr = mfspr(SPRN_DSCR); -} -#else -static inline void save_early_sprs(struct thread_struct *prev) {} -#endif -extern void enable_kernel_fp(void); -extern void enable_kernel_altivec(void); -extern void enable_kernel_vsx(void); -extern int emulate_altivec(struct pt_regs *); -extern void __giveup_vsx(struct task_struct *); -extern void giveup_vsx(struct task_struct *); -extern void enable_kernel_spe(void); -extern void giveup_spe(struct task_struct *); -extern void load_up_spe(struct task_struct *); extern void switch_booke_debug_regs(struct debug_reg *new_debug); -#ifndef CONFIG_SMP -extern void discard_lazy_cpu_state(void); -#else -static inline void discard_lazy_cpu_state(void) -{ -} -#endif +extern int emulate_altivec(struct pt_regs *); + +extern void flush_all_to_thread(struct task_struct *); +extern void giveup_all(struct task_struct *); #ifdef CONFIG_PPC_FPU +extern void enable_kernel_fp(void); extern void flush_fp_to_thread(struct task_struct *); extern void giveup_fpu(struct task_struct *); +extern void __giveup_fpu(struct task_struct *); +static inline void disable_kernel_fp(void) +{ + msr_check_and_clear(MSR_FP); +} #else static inline void flush_fp_to_thread(struct task_struct *t) { } -static inline void giveup_fpu(struct task_struct *t) { } #endif #ifdef CONFIG_ALTIVEC +extern void enable_kernel_altivec(void); extern void flush_altivec_to_thread(struct task_struct *); extern void giveup_altivec(struct task_struct *); -extern void giveup_altivec_notask(void); -#else -static inline void flush_altivec_to_thread(struct task_struct *t) -{ -} -static inline void giveup_altivec(struct task_struct *t) +extern void __giveup_altivec(struct task_struct *); +static inline void disable_kernel_altivec(void) { + msr_check_and_clear(MSR_VEC); } #endif #ifdef CONFIG_VSX +extern void enable_kernel_vsx(void); extern void flush_vsx_to_thread(struct task_struct *); -#else -static inline void flush_vsx_to_thread(struct task_struct *t) +extern void giveup_vsx(struct task_struct *); +extern void __giveup_vsx(struct task_struct *); +static inline void disable_kernel_vsx(void) { + msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX); } #endif #ifdef CONFIG_SPE +extern void enable_kernel_spe(void); extern void flush_spe_to_thread(struct task_struct *); -#else -static inline void flush_spe_to_thread(struct task_struct *t) +extern void giveup_spe(struct task_struct *); +extern void __giveup_spe(struct task_struct *); +static inline void disable_kernel_spe(void) { + msr_check_and_clear(MSR_SPE); } #endif diff --git a/arch/powerpc/include/asm/synch.h b/arch/powerpc/include/asm/synch.h index e682a7143edb..c50868681f9e 100644 --- a/arch/powerpc/include/asm/synch.h +++ b/arch/powerpc/include/asm/synch.h @@ -44,7 +44,7 @@ static inline void isync(void) MAKE_LWSYNC_SECTION_ENTRY(97, __lwsync_fixup); #define PPC_ACQUIRE_BARRIER "\n" stringify_in_c(__PPC_ACQUIRE_BARRIER) #define PPC_RELEASE_BARRIER stringify_in_c(LWSYNC) "\n" -#define PPC_ATOMIC_ENTRY_BARRIER "\n" stringify_in_c(LWSYNC) "\n" +#define PPC_ATOMIC_ENTRY_BARRIER "\n" stringify_in_c(sync) "\n" #define PPC_ATOMIC_EXIT_BARRIER "\n" stringify_in_c(sync) "\n" #else #define PPC_ACQUIRE_BARRIER diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 10fc784a2ad4..2d7109a8d296 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -27,7 +27,6 @@ extern struct clock_event_device decrementer_clockevent; struct rtc_time; extern void to_tm(int tim, struct rtc_time * tm); -extern void GregorianDay(struct rtc_time *tm); extern void tick_broadcast_ipi_handler(void); extern void generic_calibrate_decr(void); diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 2a8ebae0936b..b7c20f0b8fbe 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -274,21 +274,6 @@ do { \ __gu_err; \ }) -#ifndef __powerpc64__ -#define __get_user64_nocheck(x, ptr, size) \ -({ \ - long __gu_err; \ - long long __gu_val; \ - __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ - __chk_user_ptr(ptr); \ - if (!is_kernel_addr((unsigned long)__gu_addr)) \ - might_fault(); \ - __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ - (x) = (__force __typeof__(*(ptr)))__gu_val; \ - __gu_err; \ -}) -#endif /* __powerpc64__ */ - #define __get_user_check(x, ptr, size) \ ({ \ long __gu_err = -EFAULT; \ diff --git a/arch/powerpc/include/asm/ucc.h b/arch/powerpc/include/asm/ucc.h deleted file mode 100644 index 6927ac26516e..000000000000 --- a/arch/powerpc/include/asm/ucc.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved. - * - * Authors: Shlomi Gridish <gridish@freescale.com> - * Li Yang <leoli@freescale.com> - * - * Description: - * Internal header file for UCC unit routines. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ -#ifndef __UCC_H__ -#define __UCC_H__ - -#include <asm/immap_qe.h> -#include <asm/qe.h> - -#define STATISTICS - -#define UCC_MAX_NUM 8 - -/* Slow or fast type for UCCs. -*/ -enum ucc_speed_type { - UCC_SPEED_TYPE_FAST = UCC_GUEMR_MODE_FAST_RX | UCC_GUEMR_MODE_FAST_TX, - UCC_SPEED_TYPE_SLOW = UCC_GUEMR_MODE_SLOW_RX | UCC_GUEMR_MODE_SLOW_TX -}; - -/* ucc_set_type - * Sets UCC to slow or fast mode. - * - * ucc_num - (In) number of UCC (0-7). - * speed - (In) slow or fast mode for UCC. - */ -int ucc_set_type(unsigned int ucc_num, enum ucc_speed_type speed); - -int ucc_set_qe_mux_mii_mng(unsigned int ucc_num); - -int ucc_set_qe_mux_rxtx(unsigned int ucc_num, enum qe_clock clock, - enum comm_dir mode); - -int ucc_mux_set_grant_tsa_bkpt(unsigned int ucc_num, int set, u32 mask); - -/* QE MUX clock routing for UCC -*/ -static inline int ucc_set_qe_mux_grant(unsigned int ucc_num, int set) -{ - return ucc_mux_set_grant_tsa_bkpt(ucc_num, set, QE_CMXUCR_GRANT); -} - -static inline int ucc_set_qe_mux_tsa(unsigned int ucc_num, int set) -{ - return ucc_mux_set_grant_tsa_bkpt(ucc_num, set, QE_CMXUCR_TSA); -} - -static inline int ucc_set_qe_mux_bkpt(unsigned int ucc_num, int set) -{ - return ucc_mux_set_grant_tsa_bkpt(ucc_num, set, QE_CMXUCR_BKPT); -} - -#endif /* __UCC_H__ */ diff --git a/arch/powerpc/include/asm/ucc_fast.h b/arch/powerpc/include/asm/ucc_fast.h deleted file mode 100644 index 72ea9bab07df..000000000000 --- a/arch/powerpc/include/asm/ucc_fast.h +++ /dev/null @@ -1,244 +0,0 @@ -/* - * Internal header file for UCC FAST unit routines. - * - * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved. - * - * Authors: Shlomi Gridish <gridish@freescale.com> - * Li Yang <leoli@freescale.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ -#ifndef __UCC_FAST_H__ -#define __UCC_FAST_H__ - -#include <linux/kernel.h> - -#include <asm/immap_qe.h> -#include <asm/qe.h> - -#include <asm/ucc.h> - -/* Receive BD's status */ -#define R_E 0x80000000 /* buffer empty */ -#define R_W 0x20000000 /* wrap bit */ -#define R_I 0x10000000 /* interrupt on reception */ -#define R_L 0x08000000 /* last */ -#define R_F 0x04000000 /* first */ - -/* transmit BD's status */ -#define T_R 0x80000000 /* ready bit */ -#define T_W 0x20000000 /* wrap bit */ -#define T_I 0x10000000 /* interrupt on completion */ -#define T_L 0x08000000 /* last */ - -/* Rx Data buffer must be 4 bytes aligned in most cases */ -#define UCC_FAST_RX_ALIGN 4 -#define UCC_FAST_MRBLR_ALIGNMENT 4 -#define UCC_FAST_VIRT_FIFO_REGS_ALIGNMENT 8 - -/* Sizes */ -#define UCC_FAST_URFS_MIN_VAL 0x88 -#define UCC_FAST_RECEIVE_VIRTUAL_FIFO_SIZE_FUDGE_FACTOR 8 - -/* ucc_fast_channel_protocol_mode - UCC FAST mode */ -enum ucc_fast_channel_protocol_mode { - UCC_FAST_PROTOCOL_MODE_HDLC = 0x00000000, - UCC_FAST_PROTOCOL_MODE_RESERVED01 = 0x00000001, - UCC_FAST_PROTOCOL_MODE_RESERVED_QMC = 0x00000002, - UCC_FAST_PROTOCOL_MODE_RESERVED02 = 0x00000003, - UCC_FAST_PROTOCOL_MODE_RESERVED_UART = 0x00000004, - UCC_FAST_PROTOCOL_MODE_RESERVED03 = 0x00000005, - UCC_FAST_PROTOCOL_MODE_RESERVED_EX_MAC_1 = 0x00000006, - UCC_FAST_PROTOCOL_MODE_RESERVED_EX_MAC_2 = 0x00000007, - UCC_FAST_PROTOCOL_MODE_RESERVED_BISYNC = 0x00000008, - UCC_FAST_PROTOCOL_MODE_RESERVED04 = 0x00000009, - UCC_FAST_PROTOCOL_MODE_ATM = 0x0000000A, - UCC_FAST_PROTOCOL_MODE_RESERVED05 = 0x0000000B, - UCC_FAST_PROTOCOL_MODE_ETHERNET = 0x0000000C, - UCC_FAST_PROTOCOL_MODE_RESERVED06 = 0x0000000D, - UCC_FAST_PROTOCOL_MODE_POS = 0x0000000E, - UCC_FAST_PROTOCOL_MODE_RESERVED07 = 0x0000000F -}; - -/* ucc_fast_transparent_txrx - UCC Fast Transparent TX & RX */ -enum ucc_fast_transparent_txrx { - UCC_FAST_GUMR_TRANSPARENT_TTX_TRX_NORMAL = 0x00000000, - UCC_FAST_GUMR_TRANSPARENT_TTX_TRX_TRANSPARENT = 0x18000000 -}; - -/* UCC fast diagnostic mode */ -enum ucc_fast_diag_mode { - UCC_FAST_DIAGNOSTIC_NORMAL = 0x0, - UCC_FAST_DIAGNOSTIC_LOCAL_LOOP_BACK = 0x40000000, - UCC_FAST_DIAGNOSTIC_AUTO_ECHO = 0x80000000, - UCC_FAST_DIAGNOSTIC_LOOP_BACK_AND_ECHO = 0xC0000000 -}; - -/* UCC fast Sync length (transparent mode only) */ -enum ucc_fast_sync_len { - UCC_FAST_SYNC_LEN_NOT_USED = 0x0, - UCC_FAST_SYNC_LEN_AUTOMATIC = 0x00004000, - UCC_FAST_SYNC_LEN_8_BIT = 0x00008000, - UCC_FAST_SYNC_LEN_16_BIT = 0x0000C000 -}; - -/* UCC fast RTS mode */ -enum ucc_fast_ready_to_send { - UCC_FAST_SEND_IDLES_BETWEEN_FRAMES = 0x00000000, - UCC_FAST_SEND_FLAGS_BETWEEN_FRAMES = 0x00002000 -}; - -/* UCC fast receiver decoding mode */ -enum ucc_fast_rx_decoding_method { - UCC_FAST_RX_ENCODING_NRZ = 0x00000000, - UCC_FAST_RX_ENCODING_NRZI = 0x00000800, - UCC_FAST_RX_ENCODING_RESERVED0 = 0x00001000, - UCC_FAST_RX_ENCODING_RESERVED1 = 0x00001800 -}; - -/* UCC fast transmitter encoding mode */ -enum ucc_fast_tx_encoding_method { - UCC_FAST_TX_ENCODING_NRZ = 0x00000000, - UCC_FAST_TX_ENCODING_NRZI = 0x00000100, - UCC_FAST_TX_ENCODING_RESERVED0 = 0x00000200, - UCC_FAST_TX_ENCODING_RESERVED1 = 0x00000300 -}; - -/* UCC fast CRC length */ -enum ucc_fast_transparent_tcrc { - UCC_FAST_16_BIT_CRC = 0x00000000, - UCC_FAST_CRC_RESERVED0 = 0x00000040, - UCC_FAST_32_BIT_CRC = 0x00000080, - UCC_FAST_CRC_RESERVED1 = 0x000000C0 -}; - -/* Fast UCC initialization structure */ -struct ucc_fast_info { - int ucc_num; - enum qe_clock rx_clock; - enum qe_clock tx_clock; - u32 regs; - int irq; - u32 uccm_mask; - int bd_mem_part; - int brkpt_support; - int grant_support; - int tsa; - int cdp; - int cds; - int ctsp; - int ctss; - int tci; - int txsy; - int rtsm; - int revd; - int rsyn; - u16 max_rx_buf_length; - u16 urfs; - u16 urfet; - u16 urfset; - u16 utfs; - u16 utfet; - u16 utftt; - u16 ufpt; - enum ucc_fast_channel_protocol_mode mode; - enum ucc_fast_transparent_txrx ttx_trx; - enum ucc_fast_tx_encoding_method tenc; - enum ucc_fast_rx_decoding_method renc; - enum ucc_fast_transparent_tcrc tcrc; - enum ucc_fast_sync_len synl; -}; - -struct ucc_fast_private { - struct ucc_fast_info *uf_info; - struct ucc_fast __iomem *uf_regs; /* a pointer to the UCC regs. */ - u32 __iomem *p_ucce; /* a pointer to the event register in memory. */ - u32 __iomem *p_uccm; /* a pointer to the mask register in memory. */ -#ifdef CONFIG_UGETH_TX_ON_DEMAND - u16 __iomem *p_utodr; /* pointer to the transmit on demand register */ -#endif - int enabled_tx; /* Whether channel is enabled for Tx (ENT) */ - int enabled_rx; /* Whether channel is enabled for Rx (ENR) */ - int stopped_tx; /* Whether channel has been stopped for Tx - (STOP_TX, etc.) */ - int stopped_rx; /* Whether channel has been stopped for Rx */ - u32 ucc_fast_tx_virtual_fifo_base_offset;/* pointer to base of Tx - virtual fifo */ - u32 ucc_fast_rx_virtual_fifo_base_offset;/* pointer to base of Rx - virtual fifo */ -#ifdef STATISTICS - u32 tx_frames; /* Transmitted frames counter. */ - u32 rx_frames; /* Received frames counter (only frames - passed to application). */ - u32 tx_discarded; /* Discarded tx frames counter (frames that - were discarded by the driver due to errors). - */ - u32 rx_discarded; /* Discarded rx frames counter (frames that - were discarded by the driver due to errors). - */ -#endif /* STATISTICS */ - u16 mrblr; /* maximum receive buffer length */ -}; - -/* ucc_fast_init - * Initializes Fast UCC according to user provided parameters. - * - * uf_info - (In) pointer to the fast UCC info structure. - * uccf_ret - (Out) pointer to the fast UCC structure. - */ -int ucc_fast_init(struct ucc_fast_info * uf_info, struct ucc_fast_private ** uccf_ret); - -/* ucc_fast_free - * Frees all resources for fast UCC. - * - * uccf - (In) pointer to the fast UCC structure. - */ -void ucc_fast_free(struct ucc_fast_private * uccf); - -/* ucc_fast_enable - * Enables a fast UCC port. - * This routine enables Tx and/or Rx through the General UCC Mode Register. - * - * uccf - (In) pointer to the fast UCC structure. - * mode - (In) TX, RX, or both. - */ -void ucc_fast_enable(struct ucc_fast_private * uccf, enum comm_dir mode); - -/* ucc_fast_disable - * Disables a fast UCC port. - * This routine disables Tx and/or Rx through the General UCC Mode Register. - * - * uccf - (In) pointer to the fast UCC structure. - * mode - (In) TX, RX, or both. - */ -void ucc_fast_disable(struct ucc_fast_private * uccf, enum comm_dir mode); - -/* ucc_fast_irq - * Handles interrupts on fast UCC. - * Called from the general interrupt routine to handle interrupts on fast UCC. - * - * uccf - (In) pointer to the fast UCC structure. - */ -void ucc_fast_irq(struct ucc_fast_private * uccf); - -/* ucc_fast_transmit_on_demand - * Immediately forces a poll of the transmitter for data to be sent. - * Typically, the hardware performs a periodic poll for data that the - * transmit routine has set up to be transmitted. In cases where - * this polling cycle is not soon enough, this optional routine can - * be invoked to force a poll right away, instead. Proper use for - * each transmission for which this functionality is desired is to - * call the transmit routine and then this routine right after. - * - * uccf - (In) pointer to the fast UCC structure. - */ -void ucc_fast_transmit_on_demand(struct ucc_fast_private * uccf); - -u32 ucc_fast_get_qe_cr_subblock(int uccf_num); - -void ucc_fast_dump_regs(struct ucc_fast_private * uccf); - -#endif /* __UCC_FAST_H__ */ diff --git a/arch/powerpc/include/asm/ucc_slow.h b/arch/powerpc/include/asm/ucc_slow.h deleted file mode 100644 index 233ef5fe5fde..000000000000 --- a/arch/powerpc/include/asm/ucc_slow.h +++ /dev/null @@ -1,277 +0,0 @@ -/* - * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved. - * - * Authors: Shlomi Gridish <gridish@freescale.com> - * Li Yang <leoli@freescale.com> - * - * Description: - * Internal header file for UCC SLOW unit routines. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ -#ifndef __UCC_SLOW_H__ -#define __UCC_SLOW_H__ - -#include <linux/kernel.h> - -#include <asm/immap_qe.h> -#include <asm/qe.h> - -#include <asm/ucc.h> - -/* transmit BD's status */ -#define T_R 0x80000000 /* ready bit */ -#define T_PAD 0x40000000 /* add pads to short frames */ -#define T_W 0x20000000 /* wrap bit */ -#define T_I 0x10000000 /* interrupt on completion */ -#define T_L 0x08000000 /* last */ - -#define T_A 0x04000000 /* Address - the data transmitted as address - chars */ -#define T_TC 0x04000000 /* transmit CRC */ -#define T_CM 0x02000000 /* continuous mode */ -#define T_DEF 0x02000000 /* collision on previous attempt to transmit */ -#define T_P 0x01000000 /* Preamble - send Preamble sequence before - data */ -#define T_HB 0x01000000 /* heartbeat */ -#define T_NS 0x00800000 /* No Stop */ -#define T_LC 0x00800000 /* late collision */ -#define T_RL 0x00400000 /* retransmission limit */ -#define T_UN 0x00020000 /* underrun */ -#define T_CT 0x00010000 /* CTS lost */ -#define T_CSL 0x00010000 /* carrier sense lost */ -#define T_RC 0x003c0000 /* retry count */ - -/* Receive BD's status */ -#define R_E 0x80000000 /* buffer empty */ -#define R_W 0x20000000 /* wrap bit */ -#define R_I 0x10000000 /* interrupt on reception */ -#define R_L 0x08000000 /* last */ -#define R_C 0x08000000 /* the last byte in this buffer is a cntl - char */ -#define R_F 0x04000000 /* first */ -#define R_A 0x04000000 /* the first byte in this buffer is address - byte */ -#define R_CM 0x02000000 /* continuous mode */ -#define R_ID 0x01000000 /* buffer close on reception of idles */ -#define R_M 0x01000000 /* Frame received because of promiscuous - mode */ -#define R_AM 0x00800000 /* Address match */ -#define R_DE 0x00800000 /* Address match */ -#define R_LG 0x00200000 /* Break received */ -#define R_BR 0x00200000 /* Frame length violation */ -#define R_NO 0x00100000 /* Rx Non Octet Aligned Packet */ -#define R_FR 0x00100000 /* Framing Error (no stop bit) character - received */ -#define R_PR 0x00080000 /* Parity Error character received */ -#define R_AB 0x00080000 /* Frame Aborted */ -#define R_SH 0x00080000 /* frame is too short */ -#define R_CR 0x00040000 /* CRC Error */ -#define R_OV 0x00020000 /* Overrun */ -#define R_CD 0x00010000 /* CD lost */ -#define R_CL 0x00010000 /* this frame is closed because of a - collision */ - -/* Rx Data buffer must be 4 bytes aligned in most cases.*/ -#define UCC_SLOW_RX_ALIGN 4 -#define UCC_SLOW_MRBLR_ALIGNMENT 4 -#define UCC_SLOW_PRAM_SIZE 0x100 -#define ALIGNMENT_OF_UCC_SLOW_PRAM 64 - -/* UCC Slow Channel Protocol Mode */ -enum ucc_slow_channel_protocol_mode { - UCC_SLOW_CHANNEL_PROTOCOL_MODE_QMC = 0x00000002, - UCC_SLOW_CHANNEL_PROTOCOL_MODE_UART = 0x00000004, - UCC_SLOW_CHANNEL_PROTOCOL_MODE_BISYNC = 0x00000008, -}; - -/* UCC Slow Transparent Transmit CRC (TCRC) */ -enum ucc_slow_transparent_tcrc { - /* 16-bit CCITT CRC (HDLC). (X16 + X12 + X5 + 1) */ - UCC_SLOW_TRANSPARENT_TCRC_CCITT_CRC16 = 0x00000000, - /* CRC16 (BISYNC). (X16 + X15 + X2 + 1) */ - UCC_SLOW_TRANSPARENT_TCRC_CRC16 = 0x00004000, - /* 32-bit CCITT CRC (Ethernet and HDLC) */ - UCC_SLOW_TRANSPARENT_TCRC_CCITT_CRC32 = 0x00008000, -}; - -/* UCC Slow oversampling rate for transmitter (TDCR) */ -enum ucc_slow_tx_oversampling_rate { - /* 1x clock mode */ - UCC_SLOW_OVERSAMPLING_RATE_TX_TDCR_1 = 0x00000000, - /* 8x clock mode */ - UCC_SLOW_OVERSAMPLING_RATE_TX_TDCR_8 = 0x00010000, - /* 16x clock mode */ - UCC_SLOW_OVERSAMPLING_RATE_TX_TDCR_16 = 0x00020000, - /* 32x clock mode */ - UCC_SLOW_OVERSAMPLING_RATE_TX_TDCR_32 = 0x00030000, -}; - -/* UCC Slow Oversampling rate for receiver (RDCR) -*/ -enum ucc_slow_rx_oversampling_rate { - /* 1x clock mode */ - UCC_SLOW_OVERSAMPLING_RATE_RX_RDCR_1 = 0x00000000, - /* 8x clock mode */ - UCC_SLOW_OVERSAMPLING_RATE_RX_RDCR_8 = 0x00004000, - /* 16x clock mode */ - UCC_SLOW_OVERSAMPLING_RATE_RX_RDCR_16 = 0x00008000, - /* 32x clock mode */ - UCC_SLOW_OVERSAMPLING_RATE_RX_RDCR_32 = 0x0000c000, -}; - -/* UCC Slow Transmitter encoding method (TENC) -*/ -enum ucc_slow_tx_encoding_method { - UCC_SLOW_TRANSMITTER_ENCODING_METHOD_TENC_NRZ = 0x00000000, - UCC_SLOW_TRANSMITTER_ENCODING_METHOD_TENC_NRZI = 0x00000100 -}; - -/* UCC Slow Receiver decoding method (RENC) -*/ -enum ucc_slow_rx_decoding_method { - UCC_SLOW_RECEIVER_DECODING_METHOD_RENC_NRZ = 0x00000000, - UCC_SLOW_RECEIVER_DECODING_METHOD_RENC_NRZI = 0x00000800 -}; - -/* UCC Slow Diagnostic mode (DIAG) -*/ -enum ucc_slow_diag_mode { - UCC_SLOW_DIAG_MODE_NORMAL = 0x00000000, - UCC_SLOW_DIAG_MODE_LOOPBACK = 0x00000040, - UCC_SLOW_DIAG_MODE_ECHO = 0x00000080, - UCC_SLOW_DIAG_MODE_LOOPBACK_ECHO = 0x000000c0 -}; - -struct ucc_slow_info { - int ucc_num; - int protocol; /* QE_CR_PROTOCOL_xxx */ - enum qe_clock rx_clock; - enum qe_clock tx_clock; - phys_addr_t regs; - int irq; - u16 uccm_mask; - int data_mem_part; - int init_tx; - int init_rx; - u32 tx_bd_ring_len; - u32 rx_bd_ring_len; - int rx_interrupts; - int brkpt_support; - int grant_support; - int tsa; - int cdp; - int cds; - int ctsp; - int ctss; - int rinv; - int tinv; - int rtsm; - int rfw; - int tci; - int tend; - int tfl; - int txsy; - u16 max_rx_buf_length; - enum ucc_slow_transparent_tcrc tcrc; - enum ucc_slow_channel_protocol_mode mode; - enum ucc_slow_diag_mode diag; - enum ucc_slow_tx_oversampling_rate tdcr; - enum ucc_slow_rx_oversampling_rate rdcr; - enum ucc_slow_tx_encoding_method tenc; - enum ucc_slow_rx_decoding_method renc; -}; - -struct ucc_slow_private { - struct ucc_slow_info *us_info; - struct ucc_slow __iomem *us_regs; /* Ptr to memory map of UCC regs */ - struct ucc_slow_pram *us_pram; /* a pointer to the parameter RAM */ - u32 us_pram_offset; - int enabled_tx; /* Whether channel is enabled for Tx (ENT) */ - int enabled_rx; /* Whether channel is enabled for Rx (ENR) */ - int stopped_tx; /* Whether channel has been stopped for Tx - (STOP_TX, etc.) */ - int stopped_rx; /* Whether channel has been stopped for Rx */ - struct list_head confQ; /* frames passed to chip waiting for tx */ - u32 first_tx_bd_mask; /* mask is used in Tx routine to save status - and length for first BD in a frame */ - u32 tx_base_offset; /* first BD in Tx BD table offset (In MURAM) */ - u32 rx_base_offset; /* first BD in Rx BD table offset (In MURAM) */ - struct qe_bd *confBd; /* next BD for confirm after Tx */ - struct qe_bd *tx_bd; /* next BD for new Tx request */ - struct qe_bd *rx_bd; /* next BD to collect after Rx */ - void *p_rx_frame; /* accumulating receive frame */ - u16 *p_ucce; /* a pointer to the event register in memory. - */ - u16 *p_uccm; /* a pointer to the mask register in memory */ - u16 saved_uccm; /* a saved mask for the RX Interrupt bits */ -#ifdef STATISTICS - u32 tx_frames; /* Transmitted frames counters */ - u32 rx_frames; /* Received frames counters (only frames - passed to application) */ - u32 rx_discarded; /* Discarded frames counters (frames that - were discarded by the driver due to - errors) */ -#endif /* STATISTICS */ -}; - -/* ucc_slow_init - * Initializes Slow UCC according to provided parameters. - * - * us_info - (In) pointer to the slow UCC info structure. - * uccs_ret - (Out) pointer to the slow UCC structure. - */ -int ucc_slow_init(struct ucc_slow_info * us_info, struct ucc_slow_private ** uccs_ret); - -/* ucc_slow_free - * Frees all resources for slow UCC. - * - * uccs - (In) pointer to the slow UCC structure. - */ -void ucc_slow_free(struct ucc_slow_private * uccs); - -/* ucc_slow_enable - * Enables a fast UCC port. - * This routine enables Tx and/or Rx through the General UCC Mode Register. - * - * uccs - (In) pointer to the slow UCC structure. - * mode - (In) TX, RX, or both. - */ -void ucc_slow_enable(struct ucc_slow_private * uccs, enum comm_dir mode); - -/* ucc_slow_disable - * Disables a fast UCC port. - * This routine disables Tx and/or Rx through the General UCC Mode Register. - * - * uccs - (In) pointer to the slow UCC structure. - * mode - (In) TX, RX, or both. - */ -void ucc_slow_disable(struct ucc_slow_private * uccs, enum comm_dir mode); - -/* ucc_slow_graceful_stop_tx - * Smoothly stops transmission on a specified slow UCC. - * - * uccs - (In) pointer to the slow UCC structure. - */ -void ucc_slow_graceful_stop_tx(struct ucc_slow_private * uccs); - -/* ucc_slow_stop_tx - * Stops transmission on a specified slow UCC. - * - * uccs - (In) pointer to the slow UCC structure. - */ -void ucc_slow_stop_tx(struct ucc_slow_private * uccs); - -/* ucc_slow_restart_tx - * Restarts transmitting on a specified slow UCC. - * - * uccs - (In) pointer to the slow UCC structure. - */ -void ucc_slow_restart_tx(struct ucc_slow_private *uccs); - -u32 ucc_slow_get_qe_cr_subblock(int uccs_num); - -#endif /* __UCC_SLOW_H__ */ diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index 4b6b8ace18e0..6a5ace5fa0c8 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -12,10 +12,9 @@ #include <uapi/asm/unistd.h> -#define __NR_syscalls 379 +#define NR_syscalls 379 #define __NR__exit __NR_exit -#define NR_syscalls __NR_syscalls #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/include/asm/vdso_datapage.h b/arch/powerpc/include/asm/vdso_datapage.h index b73a8199f161..1afe90ade595 100644 --- a/arch/powerpc/include/asm/vdso_datapage.h +++ b/arch/powerpc/include/asm/vdso_datapage.h @@ -41,7 +41,7 @@ #include <linux/unistd.h> #include <linux/time.h> -#define SYSCALL_MAP_SIZE ((__NR_syscalls + 31) / 32) +#define SYSCALL_MAP_SIZE ((NR_syscalls + 31) / 32) /* * So here is the ppc64 backward compatible version diff --git a/arch/powerpc/include/uapi/asm/cputable.h b/arch/powerpc/include/uapi/asm/cputable.h index 43686043e297..8dde19962a5b 100644 --- a/arch/powerpc/include/uapi/asm/cputable.h +++ b/arch/powerpc/include/uapi/asm/cputable.h @@ -43,5 +43,7 @@ #define PPC_FEATURE2_TAR 0x04000000 #define PPC_FEATURE2_VEC_CRYPTO 0x02000000 #define PPC_FEATURE2_HTM_NOSC 0x01000000 +#define PPC_FEATURE2_ARCH_3_00 0x00800000 /* ISA 3.00 */ +#define PPC_FEATURE2_HAS_IEEE128 0x00400000 /* VSX IEEE Binary Float 128-bit */ #endif /* _UAPI__ASM_POWERPC_CPUTABLE_H */ diff --git a/arch/powerpc/include/uapi/asm/elf.h b/arch/powerpc/include/uapi/asm/elf.h index 59dad113897b..c2d21d11c2d2 100644 --- a/arch/powerpc/include/uapi/asm/elf.h +++ b/arch/powerpc/include/uapi/asm/elf.h @@ -295,6 +295,8 @@ do { \ #define R_PPC64_TLSLD 108 #define R_PPC64_TOCSAVE 109 +#define R_PPC64_ENTRY 118 + #define R_PPC64_REL16 249 #define R_PPC64_REL16_LO 250 #define R_PPC64_REL16_HI 251 diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h index c046666038f8..dd54f28ecdec 100644 --- a/arch/powerpc/include/uapi/asm/socket.h +++ b/arch/powerpc/include/uapi/asm/socket.h @@ -92,4 +92,7 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* _ASM_POWERPC_SOCKET_H */ diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index 86150fbb42c3..8e7cb8e2b21a 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -960,6 +960,7 @@ int fix_alignment(struct pt_regs *regs) preempt_disable(); enable_kernel_fp(); cvt_df(&data.dd, (float *)&data.x32.low32); + disable_kernel_fp(); preempt_enable(); #else return 0; @@ -1000,6 +1001,7 @@ int fix_alignment(struct pt_regs *regs) preempt_disable(); enable_kernel_fp(); cvt_fd((float *)&data.x32.low32, &data.dd); + disable_kernel_fp(); preempt_enable(); #else return 0; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 221d584d089f..07cebc3514f3 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -185,14 +185,16 @@ int main(void) DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr)); DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled)); DEFINE(PACAIRQHAPPENED, offsetof(struct paca_struct, irq_happened)); - DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); +#ifdef CONFIG_PPC_BOOK3S + DEFINE(PACACONTEXTID, offsetof(struct paca_struct, mm_ctx_id)); #ifdef CONFIG_PPC_MM_SLICES DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct, - context.low_slices_psize)); + mm_ctx_low_slices_psize)); DEFINE(PACAHIGHSLICEPSIZE, offsetof(struct paca_struct, - context.high_slices_psize)); + mm_ctx_high_slices_psize)); DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def)); #endif /* CONFIG_PPC_MM_SLICES */ +#endif #ifdef CONFIG_PPC_BOOK3E DEFINE(PACAPGD, offsetof(struct paca_struct, pgd)); @@ -222,7 +224,7 @@ int main(void) #ifdef CONFIG_PPC_MM_SLICES DEFINE(MMUPSIZESLLP, offsetof(struct mmu_psize_def, sllp)); #else - DEFINE(PACACONTEXTSLLP, offsetof(struct paca_struct, context.sllp)); + DEFINE(PACACONTEXTSLLP, offsetof(struct paca_struct, mm_ctx_sllp)); #endif /* CONFIG_PPC_MM_SLICES */ DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen)); DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc)); diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index a94f155db78e..0d525ce3717f 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -223,7 +223,11 @@ END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) beq- 1f ACCOUNT_CPU_USER_EXIT(r11, r12) - HMT_MEDIUM_LOW_HAS_PPR + +BEGIN_FTR_SECTION + HMT_MEDIUM_LOW +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) + ld r13,GPR13(r1) /* only restore r13 if returning to usermode */ 1: ld r2,GPR2(r1) ld r1,GPR1(r1) @@ -312,7 +316,13 @@ syscall_exit_work: subi r12,r12,TI_FLAGS 4: /* Anything else left to do? */ - SET_DEFAULT_THREAD_PPR(r3, r10) /* Set thread.ppr = 3 */ +BEGIN_FTR_SECTION + lis r3,INIT_PPR@highest /* Set thread.ppr = 3 */ + ld r10,PACACURRENT(r13) + sldi r3,r3,32 /* bits 11-13 are used for ppr */ + std r3,TASKTHREADPPR(r10) +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) + andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP) beq ret_from_except_lite @@ -452,43 +462,11 @@ _GLOBAL(_switch) /* r3-r13 are caller saved -- Cort */ SAVE_8GPRS(14, r1) SAVE_10GPRS(22, r1) - mflr r20 /* Return to switch caller */ - mfmsr r22 - li r0, MSR_FP -#ifdef CONFIG_VSX -BEGIN_FTR_SECTION - oris r0,r0,MSR_VSX@h /* Disable VSX */ -END_FTR_SECTION_IFSET(CPU_FTR_VSX) -#endif /* CONFIG_VSX */ -#ifdef CONFIG_ALTIVEC -BEGIN_FTR_SECTION - oris r0,r0,MSR_VEC@h /* Disable altivec */ - mfspr r24,SPRN_VRSAVE /* save vrsave register value */ - std r24,THREAD_VRSAVE(r3) -END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) -#endif /* CONFIG_ALTIVEC */ - and. r0,r0,r22 - beq+ 1f - andc r22,r22,r0 - MTMSRD(r22) - isync -1: std r20,_NIP(r1) + std r0,_NIP(r1) /* Return to switch caller */ mfcr r23 std r23,_CCR(r1) std r1,KSP(r3) /* Set old stack pointer */ -#ifdef CONFIG_PPC_BOOK3S_64 -BEGIN_FTR_SECTION - /* Event based branch registers */ - mfspr r0, SPRN_BESCR - std r0, THREAD_BESCR(r3) - mfspr r0, SPRN_EBBHR - std r0, THREAD_EBBHR(r3) - mfspr r0, SPRN_EBBRR - std r0, THREAD_EBBRR(r3) -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) -#endif - #ifdef CONFIG_SMP /* We need a sync somewhere here to make sure that if the * previous task gets rescheduled on another CPU, it sees all @@ -576,47 +554,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) mr r1,r8 /* start using new stack pointer */ std r7,PACAKSAVE(r13) -#ifdef CONFIG_PPC_BOOK3S_64 -BEGIN_FTR_SECTION - /* Event based branch registers */ - ld r0, THREAD_BESCR(r4) - mtspr SPRN_BESCR, r0 - ld r0, THREAD_EBBHR(r4) - mtspr SPRN_EBBHR, r0 - ld r0, THREAD_EBBRR(r4) - mtspr SPRN_EBBRR, r0 - - ld r0,THREAD_TAR(r4) - mtspr SPRN_TAR,r0 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) -#endif - -#ifdef CONFIG_ALTIVEC -BEGIN_FTR_SECTION - ld r0,THREAD_VRSAVE(r4) - mtspr SPRN_VRSAVE,r0 /* if G4, restore VRSAVE reg */ -END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) -#endif /* CONFIG_ALTIVEC */ -#ifdef CONFIG_PPC64 -BEGIN_FTR_SECTION - lwz r6,THREAD_DSCR_INHERIT(r4) - ld r0,THREAD_DSCR(r4) - cmpwi r6,0 - bne 1f - ld r0,PACA_DSCR_DEFAULT(r13) -1: -BEGIN_FTR_SECTION_NESTED(70) - mfspr r8, SPRN_FSCR - rldimi r8, r6, FSCR_DSCR_LG, (63 - FSCR_DSCR_LG) - mtspr SPRN_FSCR, r8 -END_FTR_SECTION_NESTED(CPU_FTR_ARCH_207S, CPU_FTR_ARCH_207S, 70) - cmpd r0,r25 - beq 2f - mtspr SPRN_DSCR,r0 -2: -END_FTR_SECTION_IFSET(CPU_FTR_DSCR) -#endif - ld r6,_CCR(r1) mtcrf 0xFF,r6 diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 0a0399c2af11..7716cebf4b8e 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -96,7 +96,6 @@ __start_interrupts: .globl system_reset_pSeries; system_reset_pSeries: - HMT_MEDIUM_PPR_DISCARD SET_SCRATCH0(r13) #ifdef CONFIG_PPC_P7_NAP BEGIN_FTR_SECTION @@ -164,7 +163,6 @@ machine_check_pSeries_1: * some code path might still want to branch into the original * vector */ - HMT_MEDIUM_PPR_DISCARD SET_SCRATCH0(r13) /* save r13 */ #ifdef CONFIG_PPC_P7_NAP BEGIN_FTR_SECTION @@ -199,7 +197,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) . = 0x300 .globl data_access_pSeries data_access_pSeries: - HMT_MEDIUM_PPR_DISCARD SET_SCRATCH0(r13) EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common, EXC_STD, KVMTEST, 0x300) @@ -207,7 +204,6 @@ data_access_pSeries: . = 0x380 .globl data_access_slb_pSeries data_access_slb_pSeries: - HMT_MEDIUM_PPR_DISCARD SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXSLB) EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST, 0x380) @@ -234,15 +230,14 @@ data_access_slb_pSeries: bctr #endif - STD_EXCEPTION_PSERIES(0x400, 0x400, instruction_access) + STD_EXCEPTION_PSERIES(0x400, instruction_access) . = 0x480 .globl instruction_access_slb_pSeries instruction_access_slb_pSeries: - HMT_MEDIUM_PPR_DISCARD SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXSLB) - EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x480) + EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST, 0x480) std r3,PACA_EXSLB+EX_R3(r13) mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ #ifdef __DISABLED__ @@ -269,25 +264,24 @@ instruction_access_slb_pSeries: .globl hardware_interrupt_hv; hardware_interrupt_pSeries: hardware_interrupt_hv: - HMT_MEDIUM_PPR_DISCARD BEGIN_FTR_SECTION _MASKABLE_EXCEPTION_PSERIES(0x502, hardware_interrupt, EXC_HV, SOFTEN_TEST_HV) KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x502) FTR_SECTION_ELSE _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt, - EXC_STD, SOFTEN_TEST_HV_201) + EXC_STD, SOFTEN_TEST_PR) KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x500) ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) - STD_EXCEPTION_PSERIES(0x600, 0x600, alignment) - KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x600) + STD_EXCEPTION_PSERIES(0x600, alignment) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x600) - STD_EXCEPTION_PSERIES(0x700, 0x700, program_check) - KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x700) + STD_EXCEPTION_PSERIES(0x700, program_check) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x700) - STD_EXCEPTION_PSERIES(0x800, 0x800, fp_unavailable) - KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x800) + STD_EXCEPTION_PSERIES(0x800, fp_unavailable) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x800) . = 0x900 .globl decrementer_pSeries @@ -297,10 +291,10 @@ decrementer_pSeries: STD_EXCEPTION_HV(0x980, 0x982, hdecrementer) MASKABLE_EXCEPTION_PSERIES(0xa00, 0xa00, doorbell_super) - KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xa00) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xa00) - STD_EXCEPTION_PSERIES(0xb00, 0xb00, trap_0b) - KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xb00) + STD_EXCEPTION_PSERIES(0xb00, trap_0b) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xb00) . = 0xc00 .globl system_call_pSeries @@ -331,8 +325,8 @@ system_call_pSeries: SYSCALL_PSERIES_3 KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xc00) - STD_EXCEPTION_PSERIES(0xd00, 0xd00, single_step) - KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xd00) + STD_EXCEPTION_PSERIES(0xd00, single_step) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xd00) /* At 0xe??? we have a bunch of hypervisor exceptions, we branch * out of line to handle them @@ -407,13 +401,12 @@ hv_facility_unavailable_trampoline: KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1202) #endif /* CONFIG_CBE_RAS */ - STD_EXCEPTION_PSERIES(0x1300, 0x1300, instruction_breakpoint) - KVM_HANDLER_PR_SKIP(PACA_EXGEN, EXC_STD, 0x1300) + STD_EXCEPTION_PSERIES(0x1300, instruction_breakpoint) + KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x1300) . = 0x1500 .global denorm_exception_hv denorm_exception_hv: - HMT_MEDIUM_PPR_DISCARD mtspr SPRN_SPRG_HSCRATCH0,r13 EXCEPTION_PROLOG_0(PACA_EXGEN) EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0x1500) @@ -435,8 +428,8 @@ denorm_exception_hv: KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1602) #endif /* CONFIG_CBE_RAS */ - STD_EXCEPTION_PSERIES(0x1700, 0x1700, altivec_assist) - KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x1700) + STD_EXCEPTION_PSERIES(0x1700, altivec_assist) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x1700) #ifdef CONFIG_CBE_RAS STD_EXCEPTION_HV(0x1800, 0x1802, cbe_thermal) @@ -527,7 +520,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) machine_check_pSeries: .globl machine_check_fwnmi machine_check_fwnmi: - HMT_MEDIUM_PPR_DISCARD SET_SCRATCH0(r13) /* save r13 */ EXCEPTION_PROLOG_0(PACA_EXMC) machine_check_pSeries_0: @@ -536,9 +528,9 @@ machine_check_pSeries_0: KVM_HANDLER_SKIP(PACA_EXMC, EXC_STD, 0x200) KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x300) KVM_HANDLER_SKIP(PACA_EXSLB, EXC_STD, 0x380) - KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x400) - KVM_HANDLER_PR(PACA_EXSLB, EXC_STD, 0x480) - KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x900) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x400) + KVM_HANDLER(PACA_EXSLB, EXC_STD, 0x480) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x900) KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x982) #ifdef CONFIG_PPC_DENORMALISATION @@ -621,13 +613,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) /* moved from 0xf00 */ STD_EXCEPTION_PSERIES_OOL(0xf00, performance_monitor) - KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf00) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xf00) STD_EXCEPTION_PSERIES_OOL(0xf20, altivec_unavailable) - KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf20) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xf20) STD_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable) - KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf40) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xf40) STD_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable) - KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf60) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xf60) STD_EXCEPTION_HV_OOL(0xf82, facility_unavailable) KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xf82) @@ -711,7 +703,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) .globl system_reset_fwnmi .align 7 system_reset_fwnmi: - HMT_MEDIUM_PPR_DISCARD SET_SCRATCH0(r13) /* save r13 */ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD, NOTEST, 0x100) @@ -1556,29 +1547,19 @@ do_hash_page: lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */ andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */ bne 77f /* then don't call hash_page now */ - /* - * We need to set the _PAGE_USER bit if MSR_PR is set or if we are - * accessing a userspace segment (even from the kernel). We assume - * kernel addresses always have the high bit set. - */ - rlwinm r4,r4,32-25+9,31-9,31-9 /* DSISR_STORE -> _PAGE_RW */ - rotldi r0,r3,15 /* Move high bit into MSR_PR posn */ - orc r0,r12,r0 /* MSR_PR | ~high_bit */ - rlwimi r4,r0,32-13,30,30 /* becomes _PAGE_USER access bit */ - ori r4,r4,1 /* add _PAGE_PRESENT */ - rlwimi r4,r5,22+2,31-2,31-2 /* Set _PAGE_EXEC if trap is 0x400 */ /* * r3 contains the faulting address - * r4 contains the required access permissions + * r4 msr * r5 contains the trap number * r6 contains dsisr * * at return r3 = 0 for success, 1 for page fault, negative for error */ + mr r4,r12 ld r6,_DSISR(r1) - bl hash_page /* build HPTE if possible */ - cmpdi r3,0 /* see if hash_page succeeded */ + bl __hash_page /* build HPTE if possible */ + cmpdi r3,0 /* see if __hash_page succeeded */ /* Success */ beq fast_exc_return_irq /* Return from exception on success */ diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index 9ad236e5d2c9..2117eaca3d28 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -73,30 +73,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) MTFSF_L(fr0) REST_32FPVSRS(0, R4, R7) - /* FP/VSX off again */ - MTMSRD(r6) - SYNC - blr #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ /* - * Enable use of the FPU, and VSX if possible, for the caller. - */ -_GLOBAL(fp_enable) - mfmsr r3 - ori r3,r3,MSR_FP -#ifdef CONFIG_VSX -BEGIN_FTR_SECTION - oris r3,r3,MSR_VSX@h -END_FTR_SECTION_IFSET(CPU_FTR_VSX) -#endif - SYNC - MTMSRD(r3) - isync /* (not necessary for arch 2.02 and later) */ - blr - -/* * Load state from memory into FP registers including FPSCR. * Assumes the caller has enabled FP in the MSR. */ @@ -136,31 +116,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) SYNC MTMSRD(r5) /* enable use of fpu now */ isync -/* - * For SMP, we don't do lazy FPU switching because it just gets too - * horrendously complex, especially when a task switches from one CPU - * to another. Instead we call giveup_fpu in switch_to. - */ -#ifndef CONFIG_SMP - LOAD_REG_ADDRBASE(r3, last_task_used_math) - toreal(r3) - PPC_LL r4,ADDROFF(last_task_used_math)(r3) - PPC_LCMPI 0,r4,0 - beq 1f - toreal(r4) - addi r4,r4,THREAD /* want last_task_used_math->thread */ - addi r10,r4,THREAD_FPSTATE - SAVE_32FPVSRS(0, R5, R10) - mffs fr0 - stfd fr0,FPSTATE_FPSCR(r10) - PPC_LL r5,PT_REGS(r4) - toreal(r5) - PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5) - li r10,MSR_FP|MSR_FE0|MSR_FE1 - andc r4,r4,r10 /* disable FP for previous task */ - PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5) -1: -#endif /* CONFIG_SMP */ /* enable use of FP after return */ #ifdef CONFIG_PPC32 mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ @@ -179,36 +134,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) lfd fr0,FPSTATE_FPSCR(r10) MTFSF_L(fr0) REST_32FPVSRS(0, R4, R10) -#ifndef CONFIG_SMP - subi r4,r5,THREAD - fromreal(r4) - PPC_STL r4,ADDROFF(last_task_used_math)(r3) -#endif /* CONFIG_SMP */ /* restore registers and return */ /* we haven't used ctr or xer or lr */ blr /* - * giveup_fpu(tsk) + * __giveup_fpu(tsk) * Disable FP for the task given as the argument, * and save the floating-point registers in its thread_struct. * Enables the FPU for use in the kernel on return. */ -_GLOBAL(giveup_fpu) - mfmsr r5 - ori r5,r5,MSR_FP -#ifdef CONFIG_VSX -BEGIN_FTR_SECTION - oris r5,r5,MSR_VSX@h -END_FTR_SECTION_IFSET(CPU_FTR_VSX) -#endif - SYNC_601 - ISYNC_601 - MTMSRD(r5) /* enable use of fpu now */ - SYNC_601 - isync - PPC_LCMPI 0,r3,0 - beqlr- /* if no previous owner, done */ +_GLOBAL(__giveup_fpu) addi r3,r3,THREAD /* want THREAD of task */ PPC_LL r6,THREAD_FPSAVEAREA(r3) PPC_LL r5,PT_REGS(r3) @@ -230,11 +166,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) andc r4,r4,r3 /* disable FP for previous task */ PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5) 1: -#ifndef CONFIG_SMP - li r5,0 - LOAD_REG_ADDRBASE(r4,last_task_used_math) - PPC_STL r5,ADDROFF(last_task_used_math)(r4) -#endif /* CONFIG_SMP */ blr /* diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index fffd1f96bb1d..f705171b924b 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -857,29 +857,6 @@ _GLOBAL(load_up_spe) oris r5,r5,MSR_SPE@h mtmsr r5 /* enable use of SPE now */ isync -/* - * For SMP, we don't do lazy SPE switching because it just gets too - * horrendously complex, especially when a task switches from one CPU - * to another. Instead we call giveup_spe in switch_to. - */ -#ifndef CONFIG_SMP - lis r3,last_task_used_spe@ha - lwz r4,last_task_used_spe@l(r3) - cmpi 0,r4,0 - beq 1f - addi r4,r4,THREAD /* want THREAD of last_task_used_spe */ - SAVE_32EVRS(0,r10,r4,THREAD_EVR0) - evxor evr10, evr10, evr10 /* clear out evr10 */ - evmwumiaa evr10, evr10, evr10 /* evr10 <- ACC = 0 * 0 + ACC */ - li r5,THREAD_ACC - evstddx evr10, r4, r5 /* save off accumulator */ - lwz r5,PT_REGS(r4) - lwz r4,_MSR-STACK_FRAME_OVERHEAD(r5) - lis r10,MSR_SPE@h - andc r4,r4,r10 /* disable SPE for previous task */ - stw r4,_MSR-STACK_FRAME_OVERHEAD(r5) -1: -#endif /* !CONFIG_SMP */ /* enable use of SPE after return */ oris r9,r9,MSR_SPE@h mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ @@ -889,10 +866,6 @@ _GLOBAL(load_up_spe) evlddx evr4,r10,r5 evmra evr4,evr4 REST_32EVRS(0,r10,r5,THREAD_EVR0) -#ifndef CONFIG_SMP - subi r4,r5,THREAD - stw r4,last_task_used_spe@l(r3) -#endif /* !CONFIG_SMP */ blr /* @@ -1011,16 +984,10 @@ _GLOBAL(__setup_ehv_ivors) #ifdef CONFIG_SPE /* - * extern void giveup_spe(struct task_struct *prev) + * extern void __giveup_spe(struct task_struct *prev) * */ -_GLOBAL(giveup_spe) - mfmsr r5 - oris r5,r5,MSR_SPE@h - mtmsr r5 /* enable use of SPE now */ - isync - cmpi 0,r3,0 - beqlr- /* if no previous owner, done */ +_GLOBAL(__giveup_spe) addi r3,r3,THREAD /* want THREAD of task */ lwz r5,PT_REGS(r3) cmpi 0,r5,0 @@ -1035,11 +1002,6 @@ _GLOBAL(giveup_spe) andc r4,r4,r3 /* disable SPE for previous task */ stw r4,_MSR-STACK_FRAME_OVERHEAD(r5) 1: -#ifndef CONFIG_SMP - li r5,0 - lis r4,last_task_used_spe@ha - stw r5,last_task_used_spe@l(r4) -#endif /* !CONFIG_SMP */ blr #endif /* CONFIG_SPE */ diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index 112ccf497562..cf4fb5429cf1 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -89,13 +89,6 @@ _GLOBAL(power7_powersave_common) std r0,_LINK(r1) std r0,_NIP(r1) -#ifndef CONFIG_SMP - /* Make sure FPU, VSX etc... are flushed as we may lose - * state when going to nap mode - */ - bl discard_lazy_cpu_state -#endif /* CONFIG_SMP */ - /* Hard disable interrupts */ mfmsr r9 rldicl r9,r9,48,1 diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index ed3ab509faca..be8edd67f05b 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -743,6 +743,8 @@ relocate_new_kernel: /* Check for 47x cores */ mfspr r3,SPRN_PVR srwi r3,r3,16 + cmplwi cr0,r3,PVR_476FPE@h + beq setup_map_47x cmplwi cr0,r3,PVR_476@h beq setup_map_47x cmplwi cr0,r3,PVR_476_ISS@h diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index c94d2e018d84..2c01665eb410 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -188,8 +188,8 @@ static uint32_t do_plt_call(void *location, pr_debug("Doing plt for call to 0x%x at 0x%x\n", val, (unsigned int)location); /* Init, or core PLT? */ - if (location >= mod->module_core - && location < mod->module_core + mod->core_size) + if (location >= mod->core_layout.base + && location < mod->core_layout.base + mod->core_layout.size) entry = (void *)sechdrs[mod->arch.core_plt_section].sh_addr; else entry = (void *)sechdrs[mod->arch.init_plt_section].sh_addr; @@ -296,7 +296,7 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, } #ifdef CONFIG_DYNAMIC_FTRACE module->arch.tramp = - do_plt_call(module->module_core, + do_plt_call(module->core_layout.base, (unsigned long)ftrace_caller, sechdrs, module); #endif diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 68384514506b..59663af9315f 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -635,6 +635,33 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, */ break; + case R_PPC64_ENTRY: + /* + * Optimize ELFv2 large code model entry point if + * the TOC is within 2GB range of current location. + */ + value = my_r2(sechdrs, me) - (unsigned long)location; + if (value + 0x80008000 > 0xffffffff) + break; + /* + * Check for the large code model prolog sequence: + * ld r2, ...(r12) + * add r2, r2, r12 + */ + if ((((uint32_t *)location)[0] & ~0xfffc) + != 0xe84c0000) + break; + if (((uint32_t *)location)[1] != 0x7c426214) + break; + /* + * If found, replace it with: + * addis r2, r12, (.TOC.-func)@ha + * addi r2, r12, (.TOC.-func)@l + */ + ((uint32_t *)location)[0] = 0x3c4c0000 + PPC_HA(value); + ((uint32_t *)location)[1] = 0x38420000 + PPC_LO(value); + break; + case R_PPC64_REL16_HA: /* Subtract location pointer */ value -= (unsigned long)location; diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index 32e26526f7e4..0cab9e8c3794 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -27,6 +27,7 @@ #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/kmsg_dump.h> +#include <linux/pagemap.h> #include <linux/pstore.h> #include <linux/zlib.h> #include <asm/uaccess.h> @@ -733,24 +734,10 @@ static void oops_to_nvram(struct kmsg_dumper *dumper, static loff_t dev_nvram_llseek(struct file *file, loff_t offset, int origin) { - int size; - if (ppc_md.nvram_size == NULL) return -ENODEV; - size = ppc_md.nvram_size(); - - switch (origin) { - case 1: - offset += file->f_pos; - break; - case 2: - offset += size; - break; - } - if (offset < 0) - return -EINVAL; - file->f_pos = offset; - return file->f_pos; + return generic_file_llseek_size(file, offset, origin, MAX_LFS_FILESIZE, + ppc_md.nvram_size()); } diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 202963ee013a..41e1607e800c 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -19,13 +19,11 @@ EXPORT_SYMBOL(_mcount); #endif #ifdef CONFIG_PPC_FPU -EXPORT_SYMBOL(giveup_fpu); EXPORT_SYMBOL(load_fp_state); EXPORT_SYMBOL(store_fp_state); #endif #ifdef CONFIG_ALTIVEC -EXPORT_SYMBOL(giveup_altivec); EXPORT_SYMBOL(load_vr_state); EXPORT_SYMBOL(store_vr_state); #endif @@ -34,10 +32,6 @@ EXPORT_SYMBOL(store_vr_state); EXPORT_SYMBOL_GPL(__giveup_vsx); #endif -#ifdef CONFIG_SPE -EXPORT_SYMBOL(giveup_spe); -#endif - #ifdef CONFIG_EPAPR_PARAVIRT EXPORT_SYMBOL(epapr_hypercall_start); #endif diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 646bf4d222c1..dccc87e8fee5 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -67,15 +67,8 @@ extern unsigned long _get_SP(void); -#ifndef CONFIG_SMP -struct task_struct *last_task_used_math = NULL; -struct task_struct *last_task_used_altivec = NULL; -struct task_struct *last_task_used_vsx = NULL; -struct task_struct *last_task_used_spe = NULL; -#endif - #ifdef CONFIG_PPC_TRANSACTIONAL_MEM -void giveup_fpu_maybe_transactional(struct task_struct *tsk) +static void check_if_tm_restore_required(struct task_struct *tsk) { /* * If we are saving the current thread's registers, and the @@ -89,34 +82,67 @@ void giveup_fpu_maybe_transactional(struct task_struct *tsk) tsk->thread.ckpt_regs.msr = tsk->thread.regs->msr; set_thread_flag(TIF_RESTORE_TM); } +} +#else +static inline void check_if_tm_restore_required(struct task_struct *tsk) { } +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ + +bool strict_msr_control; +EXPORT_SYMBOL(strict_msr_control); + +static int __init enable_strict_msr_control(char *str) +{ + strict_msr_control = true; + pr_info("Enabling strict facility control\n"); - giveup_fpu(tsk); + return 0; } +early_param("ppc_strict_facility_enable", enable_strict_msr_control); -void giveup_altivec_maybe_transactional(struct task_struct *tsk) +void msr_check_and_set(unsigned long bits) { - /* - * If we are saving the current thread's registers, and the - * thread is in a transactional state, set the TIF_RESTORE_TM - * bit so that we know to restore the registers before - * returning to userspace. - */ - if (tsk == current && tsk->thread.regs && - MSR_TM_ACTIVE(tsk->thread.regs->msr) && - !test_thread_flag(TIF_RESTORE_TM)) { - tsk->thread.ckpt_regs.msr = tsk->thread.regs->msr; - set_thread_flag(TIF_RESTORE_TM); - } + unsigned long oldmsr = mfmsr(); + unsigned long newmsr; - giveup_altivec(tsk); + newmsr = oldmsr | bits; + +#ifdef CONFIG_VSX + if (cpu_has_feature(CPU_FTR_VSX) && (bits & MSR_FP)) + newmsr |= MSR_VSX; +#endif + + if (oldmsr != newmsr) + mtmsr_isync(newmsr); } -#else -#define giveup_fpu_maybe_transactional(tsk) giveup_fpu(tsk) -#define giveup_altivec_maybe_transactional(tsk) giveup_altivec(tsk) -#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ +void __msr_check_and_clear(unsigned long bits) +{ + unsigned long oldmsr = mfmsr(); + unsigned long newmsr; + + newmsr = oldmsr & ~bits; + +#ifdef CONFIG_VSX + if (cpu_has_feature(CPU_FTR_VSX) && (bits & MSR_FP)) + newmsr &= ~MSR_VSX; +#endif + + if (oldmsr != newmsr) + mtmsr_isync(newmsr); +} +EXPORT_SYMBOL(__msr_check_and_clear); #ifdef CONFIG_PPC_FPU +void giveup_fpu(struct task_struct *tsk) +{ + check_if_tm_restore_required(tsk); + + msr_check_and_set(MSR_FP); + __giveup_fpu(tsk); + msr_check_and_clear(MSR_FP); +} +EXPORT_SYMBOL(giveup_fpu); + /* * Make sure the floating-point register state in the * the thread_struct is up to date for task tsk. @@ -134,52 +160,56 @@ void flush_fp_to_thread(struct task_struct *tsk) */ preempt_disable(); if (tsk->thread.regs->msr & MSR_FP) { -#ifdef CONFIG_SMP /* * This should only ever be called for current or * for a stopped child process. Since we save away - * the FP register state on context switch on SMP, + * the FP register state on context switch, * there is something wrong if a stopped child appears * to still have its FP state in the CPU registers. */ BUG_ON(tsk != current); -#endif - giveup_fpu_maybe_transactional(tsk); + giveup_fpu(tsk); } preempt_enable(); } } EXPORT_SYMBOL_GPL(flush_fp_to_thread); -#endif /* CONFIG_PPC_FPU */ void enable_kernel_fp(void) { WARN_ON(preemptible()); -#ifdef CONFIG_SMP - if (current->thread.regs && (current->thread.regs->msr & MSR_FP)) - giveup_fpu_maybe_transactional(current); - else - giveup_fpu(NULL); /* just enables FP for kernel */ -#else - giveup_fpu_maybe_transactional(last_task_used_math); -#endif /* CONFIG_SMP */ + msr_check_and_set(MSR_FP); + + if (current->thread.regs && (current->thread.regs->msr & MSR_FP)) { + check_if_tm_restore_required(current); + __giveup_fpu(current); + } } EXPORT_SYMBOL(enable_kernel_fp); +#endif /* CONFIG_PPC_FPU */ #ifdef CONFIG_ALTIVEC +void giveup_altivec(struct task_struct *tsk) +{ + check_if_tm_restore_required(tsk); + + msr_check_and_set(MSR_VEC); + __giveup_altivec(tsk); + msr_check_and_clear(MSR_VEC); +} +EXPORT_SYMBOL(giveup_altivec); + void enable_kernel_altivec(void) { WARN_ON(preemptible()); -#ifdef CONFIG_SMP - if (current->thread.regs && (current->thread.regs->msr & MSR_VEC)) - giveup_altivec_maybe_transactional(current); - else - giveup_altivec_notask(); -#else - giveup_altivec_maybe_transactional(last_task_used_altivec); -#endif /* CONFIG_SMP */ + msr_check_and_set(MSR_VEC); + + if (current->thread.regs && (current->thread.regs->msr & MSR_VEC)) { + check_if_tm_restore_required(current); + __giveup_altivec(current); + } } EXPORT_SYMBOL(enable_kernel_altivec); @@ -192,10 +222,8 @@ void flush_altivec_to_thread(struct task_struct *tsk) if (tsk->thread.regs) { preempt_disable(); if (tsk->thread.regs->msr & MSR_VEC) { -#ifdef CONFIG_SMP BUG_ON(tsk != current); -#endif - giveup_altivec_maybe_transactional(tsk); + giveup_altivec(tsk); } preempt_enable(); } @@ -204,37 +232,43 @@ EXPORT_SYMBOL_GPL(flush_altivec_to_thread); #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_VSX -void enable_kernel_vsx(void) +void giveup_vsx(struct task_struct *tsk) { - WARN_ON(preemptible()); + check_if_tm_restore_required(tsk); -#ifdef CONFIG_SMP - if (current->thread.regs && (current->thread.regs->msr & MSR_VSX)) - giveup_vsx(current); - else - giveup_vsx(NULL); /* just enable vsx for kernel - force */ -#else - giveup_vsx(last_task_used_vsx); -#endif /* CONFIG_SMP */ + msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX); + if (tsk->thread.regs->msr & MSR_FP) + __giveup_fpu(tsk); + if (tsk->thread.regs->msr & MSR_VEC) + __giveup_altivec(tsk); + __giveup_vsx(tsk); + msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX); } -EXPORT_SYMBOL(enable_kernel_vsx); +EXPORT_SYMBOL(giveup_vsx); -void giveup_vsx(struct task_struct *tsk) +void enable_kernel_vsx(void) { - giveup_fpu_maybe_transactional(tsk); - giveup_altivec_maybe_transactional(tsk); - __giveup_vsx(tsk); + WARN_ON(preemptible()); + + msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX); + + if (current->thread.regs && (current->thread.regs->msr & MSR_VSX)) { + check_if_tm_restore_required(current); + if (current->thread.regs->msr & MSR_FP) + __giveup_fpu(current); + if (current->thread.regs->msr & MSR_VEC) + __giveup_altivec(current); + __giveup_vsx(current); + } } -EXPORT_SYMBOL(giveup_vsx); +EXPORT_SYMBOL(enable_kernel_vsx); void flush_vsx_to_thread(struct task_struct *tsk) { if (tsk->thread.regs) { preempt_disable(); if (tsk->thread.regs->msr & MSR_VSX) { -#ifdef CONFIG_SMP BUG_ON(tsk != current); -#endif giveup_vsx(tsk); } preempt_enable(); @@ -244,19 +278,26 @@ EXPORT_SYMBOL_GPL(flush_vsx_to_thread); #endif /* CONFIG_VSX */ #ifdef CONFIG_SPE +void giveup_spe(struct task_struct *tsk) +{ + check_if_tm_restore_required(tsk); + + msr_check_and_set(MSR_SPE); + __giveup_spe(tsk); + msr_check_and_clear(MSR_SPE); +} +EXPORT_SYMBOL(giveup_spe); void enable_kernel_spe(void) { WARN_ON(preemptible()); -#ifdef CONFIG_SMP - if (current->thread.regs && (current->thread.regs->msr & MSR_SPE)) - giveup_spe(current); - else - giveup_spe(NULL); /* just enable SPE for kernel - force */ -#else - giveup_spe(last_task_used_spe); -#endif /* __SMP __ */ + msr_check_and_set(MSR_SPE); + + if (current->thread.regs && (current->thread.regs->msr & MSR_SPE)) { + check_if_tm_restore_required(current); + __giveup_spe(current); + } } EXPORT_SYMBOL(enable_kernel_spe); @@ -265,9 +306,7 @@ void flush_spe_to_thread(struct task_struct *tsk) if (tsk->thread.regs) { preempt_disable(); if (tsk->thread.regs->msr & MSR_SPE) { -#ifdef CONFIG_SMP BUG_ON(tsk != current); -#endif tsk->thread.spefscr = mfspr(SPRN_SPEFSCR); giveup_spe(tsk); } @@ -276,31 +315,81 @@ void flush_spe_to_thread(struct task_struct *tsk) } #endif /* CONFIG_SPE */ -#ifndef CONFIG_SMP -/* - * If we are doing lazy switching of CPU state (FP, altivec or SPE), - * and the current task has some state, discard it. - */ -void discard_lazy_cpu_state(void) +static unsigned long msr_all_available; + +static int __init init_msr_all_available(void) { - preempt_disable(); - if (last_task_used_math == current) - last_task_used_math = NULL; +#ifdef CONFIG_PPC_FPU + msr_all_available |= MSR_FP; +#endif #ifdef CONFIG_ALTIVEC - if (last_task_used_altivec == current) - last_task_used_altivec = NULL; -#endif /* CONFIG_ALTIVEC */ + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + msr_all_available |= MSR_VEC; +#endif #ifdef CONFIG_VSX - if (last_task_used_vsx == current) - last_task_used_vsx = NULL; -#endif /* CONFIG_VSX */ + if (cpu_has_feature(CPU_FTR_VSX)) + msr_all_available |= MSR_VSX; +#endif #ifdef CONFIG_SPE - if (last_task_used_spe == current) - last_task_used_spe = NULL; + if (cpu_has_feature(CPU_FTR_SPE)) + msr_all_available |= MSR_SPE; #endif - preempt_enable(); + + return 0; +} +early_initcall(init_msr_all_available); + +void giveup_all(struct task_struct *tsk) +{ + unsigned long usermsr; + + if (!tsk->thread.regs) + return; + + usermsr = tsk->thread.regs->msr; + + if ((usermsr & msr_all_available) == 0) + return; + + msr_check_and_set(msr_all_available); + +#ifdef CONFIG_PPC_FPU + if (usermsr & MSR_FP) + __giveup_fpu(tsk); +#endif +#ifdef CONFIG_ALTIVEC + if (usermsr & MSR_VEC) + __giveup_altivec(tsk); +#endif +#ifdef CONFIG_VSX + if (usermsr & MSR_VSX) + __giveup_vsx(tsk); +#endif +#ifdef CONFIG_SPE + if (usermsr & MSR_SPE) + __giveup_spe(tsk); +#endif + + msr_check_and_clear(msr_all_available); +} +EXPORT_SYMBOL(giveup_all); + +void flush_all_to_thread(struct task_struct *tsk) +{ + if (tsk->thread.regs) { + preempt_disable(); + BUG_ON(tsk != current); + giveup_all(tsk); + +#ifdef CONFIG_SPE + if (tsk->thread.regs->msr & MSR_SPE) + tsk->thread.spefscr = mfspr(SPRN_SPEFSCR); +#endif + + preempt_enable(); + } } -#endif /* CONFIG_SMP */ +EXPORT_SYMBOL(flush_all_to_thread); #ifdef CONFIG_PPC_ADV_DEBUG_REGS void do_send_trap(struct pt_regs *regs, unsigned long address, @@ -744,13 +833,15 @@ void restore_tm_state(struct pt_regs *regs) msr_diff = current->thread.ckpt_regs.msr & ~regs->msr; msr_diff &= MSR_FP | MSR_VEC | MSR_VSX; if (msr_diff & MSR_FP) { - fp_enable(); + msr_check_and_set(MSR_FP); load_fp_state(¤t->thread.fp_state); + msr_check_and_clear(MSR_FP); regs->msr |= current->thread.fpexc_mode; } if (msr_diff & MSR_VEC) { - vec_enable(); + msr_check_and_set(MSR_VEC); load_vr_state(¤t->thread.vr_state); + msr_check_and_clear(MSR_VEC); } regs->msr |= msr_diff; } @@ -760,112 +851,87 @@ void restore_tm_state(struct pt_regs *regs) #define __switch_to_tm(prev) #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ -struct task_struct *__switch_to(struct task_struct *prev, - struct task_struct *new) +static inline void save_sprs(struct thread_struct *t) { - struct thread_struct *new_thread, *old_thread; - struct task_struct *last; -#ifdef CONFIG_PPC_BOOK3S_64 - struct ppc64_tlb_batch *batch; +#ifdef CONFIG_ALTIVEC + if (cpu_has_feature(cpu_has_feature(CPU_FTR_ALTIVEC))) + t->vrsave = mfspr(SPRN_VRSAVE); #endif +#ifdef CONFIG_PPC_BOOK3S_64 + if (cpu_has_feature(CPU_FTR_DSCR)) + t->dscr = mfspr(SPRN_DSCR); - WARN_ON(!irqs_disabled()); + if (cpu_has_feature(CPU_FTR_ARCH_207S)) { + t->bescr = mfspr(SPRN_BESCR); + t->ebbhr = mfspr(SPRN_EBBHR); + t->ebbrr = mfspr(SPRN_EBBRR); - /* Back up the TAR and DSCR across context switches. - * Note that the TAR is not available for use in the kernel. (To - * provide this, the TAR should be backed up/restored on exception - * entry/exit instead, and be in pt_regs. FIXME, this should be in - * pt_regs anyway (for debug).) - * Save the TAR and DSCR here before we do treclaim/trecheckpoint as - * these will change them. - */ - save_early_sprs(&prev->thread); + t->fscr = mfspr(SPRN_FSCR); - __switch_to_tm(prev); + /* + * Note that the TAR is not available for use in the kernel. + * (To provide this, the TAR should be backed up/restored on + * exception entry/exit instead, and be in pt_regs. FIXME, + * this should be in pt_regs anyway (for debug).) + */ + t->tar = mfspr(SPRN_TAR); + } +#endif +} -#ifdef CONFIG_SMP - /* avoid complexity of lazy save/restore of fpu - * by just saving it every time we switch out if - * this task used the fpu during the last quantum. - * - * If it tries to use the fpu again, it'll trap and - * reload its fp regs. So we don't have to do a restore - * every switch, just a save. - * -- Cort - */ - if (prev->thread.regs && (prev->thread.regs->msr & MSR_FP)) - giveup_fpu(prev); +static inline void restore_sprs(struct thread_struct *old_thread, + struct thread_struct *new_thread) +{ #ifdef CONFIG_ALTIVEC - /* - * If the previous thread used altivec in the last quantum - * (thus changing altivec regs) then save them. - * We used to check the VRSAVE register but not all apps - * set it, so we don't rely on it now (and in fact we need - * to save & restore VSCR even if VRSAVE == 0). -- paulus - * - * On SMP we always save/restore altivec regs just to avoid the - * complexity of changing processors. - * -- Cort - */ - if (prev->thread.regs && (prev->thread.regs->msr & MSR_VEC)) - giveup_altivec(prev); -#endif /* CONFIG_ALTIVEC */ -#ifdef CONFIG_VSX - if (prev->thread.regs && (prev->thread.regs->msr & MSR_VSX)) - /* VMX and FPU registers are already save here */ - __giveup_vsx(prev); -#endif /* CONFIG_VSX */ -#ifdef CONFIG_SPE - /* - * If the previous thread used spe in the last quantum - * (thus changing spe regs) then save them. - * - * On SMP we always save/restore spe regs just to avoid the - * complexity of changing processors. - */ - if ((prev->thread.regs && (prev->thread.regs->msr & MSR_SPE))) - giveup_spe(prev); -#endif /* CONFIG_SPE */ + if (cpu_has_feature(CPU_FTR_ALTIVEC) && + old_thread->vrsave != new_thread->vrsave) + mtspr(SPRN_VRSAVE, new_thread->vrsave); +#endif +#ifdef CONFIG_PPC_BOOK3S_64 + if (cpu_has_feature(CPU_FTR_DSCR)) { + u64 dscr = get_paca()->dscr_default; + u64 fscr = old_thread->fscr & ~FSCR_DSCR; -#else /* CONFIG_SMP */ -#ifdef CONFIG_ALTIVEC - /* Avoid the trap. On smp this this never happens since - * we don't set last_task_used_altivec -- Cort - */ - if (new->thread.regs && last_task_used_altivec == new) - new->thread.regs->msr |= MSR_VEC; -#endif /* CONFIG_ALTIVEC */ -#ifdef CONFIG_VSX - if (new->thread.regs && last_task_used_vsx == new) - new->thread.regs->msr |= MSR_VSX; -#endif /* CONFIG_VSX */ -#ifdef CONFIG_SPE - /* Avoid the trap. On smp this this never happens since - * we don't set last_task_used_spe - */ - if (new->thread.regs && last_task_used_spe == new) - new->thread.regs->msr |= MSR_SPE; -#endif /* CONFIG_SPE */ + if (new_thread->dscr_inherit) { + dscr = new_thread->dscr; + fscr |= FSCR_DSCR; + } -#endif /* CONFIG_SMP */ + if (old_thread->dscr != dscr) + mtspr(SPRN_DSCR, dscr); -#ifdef CONFIG_PPC_ADV_DEBUG_REGS - switch_booke_debug_regs(&new->thread.debug); -#else -/* - * For PPC_BOOK3S_64, we use the hw-breakpoint interfaces that would - * schedule DABR - */ -#ifndef CONFIG_HAVE_HW_BREAKPOINT - if (unlikely(!hw_brk_match(this_cpu_ptr(¤t_brk), &new->thread.hw_brk))) - __set_breakpoint(&new->thread.hw_brk); -#endif /* CONFIG_HAVE_HW_BREAKPOINT */ + if (old_thread->fscr != fscr) + mtspr(SPRN_FSCR, fscr); + } + + if (cpu_has_feature(CPU_FTR_ARCH_207S)) { + if (old_thread->bescr != new_thread->bescr) + mtspr(SPRN_BESCR, new_thread->bescr); + if (old_thread->ebbhr != new_thread->ebbhr) + mtspr(SPRN_EBBHR, new_thread->ebbhr); + if (old_thread->ebbrr != new_thread->ebbrr) + mtspr(SPRN_EBBRR, new_thread->ebbrr); + + if (old_thread->tar != new_thread->tar) + mtspr(SPRN_TAR, new_thread->tar); + } #endif +} +struct task_struct *__switch_to(struct task_struct *prev, + struct task_struct *new) +{ + struct thread_struct *new_thread, *old_thread; + struct task_struct *last; +#ifdef CONFIG_PPC_BOOK3S_64 + struct ppc64_tlb_batch *batch; +#endif new_thread = &new->thread; old_thread = ¤t->thread; + WARN_ON(!irqs_disabled()); + #ifdef CONFIG_PPC64 /* * Collect processor utilization data per process @@ -890,6 +956,30 @@ struct task_struct *__switch_to(struct task_struct *prev, } #endif /* CONFIG_PPC_BOOK3S_64 */ +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + switch_booke_debug_regs(&new->thread.debug); +#else +/* + * For PPC_BOOK3S_64, we use the hw-breakpoint interfaces that would + * schedule DABR + */ +#ifndef CONFIG_HAVE_HW_BREAKPOINT + if (unlikely(!hw_brk_match(this_cpu_ptr(¤t_brk), &new->thread.hw_brk))) + __set_breakpoint(&new->thread.hw_brk); +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ +#endif + + /* + * We need to save SPRs before treclaim/trecheckpoint as these will + * change a number of them. + */ + save_sprs(&prev->thread); + + __switch_to_tm(prev); + + /* Save FPU, Altivec, VSX and SPE state */ + giveup_all(prev); + /* * We can't take a PMU exception inside _switch() since there is a * window where the kernel stack SLB and the kernel stack are out @@ -899,6 +989,15 @@ struct task_struct *__switch_to(struct task_struct *prev, tm_recheckpoint_new_task(new); + /* + * Call restore_sprs() before calling _switch(). If we move it after + * _switch() then we miss out on calling it for new tasks. The reason + * for this is we manually create a stack frame for new tasks that + * directly returns through ret_from_fork() or + * ret_from_kernel_thread(). See copy_thread() for details. + */ + restore_sprs(old_thread, new_thread); + last = _switch(old_thread, new_thread); #ifdef CONFIG_PPC_BOOK3S_64 @@ -952,10 +1051,12 @@ static void show_instructions(struct pt_regs *regs) printk("\n"); } -static struct regbit { +struct regbit { unsigned long bit; const char *name; -} msr_bits[] = { +}; + +static struct regbit msr_bits[] = { #if defined(CONFIG_PPC64) && !defined(CONFIG_BOOKE) {MSR_SF, "SF"}, {MSR_HV, "HV"}, @@ -985,16 +1086,49 @@ static struct regbit { {0, NULL} }; -static void printbits(unsigned long val, struct regbit *bits) +static void print_bits(unsigned long val, struct regbit *bits, const char *sep) { - const char *sep = ""; + const char *s = ""; - printk("<"); for (; bits->bit; ++bits) if (val & bits->bit) { - printk("%s%s", sep, bits->name); - sep = ","; + printk("%s%s", s, bits->name); + s = sep; } +} + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +static struct regbit msr_tm_bits[] = { + {MSR_TS_T, "T"}, + {MSR_TS_S, "S"}, + {MSR_TM, "E"}, + {0, NULL} +}; + +static void print_tm_bits(unsigned long val) +{ +/* + * This only prints something if at least one of the TM bit is set. + * Inside the TM[], the output means: + * E: Enabled (bit 32) + * S: Suspended (bit 33) + * T: Transactional (bit 34) + */ + if (val & (MSR_TM | MSR_TS_S | MSR_TS_T)) { + printk(",TM["); + print_bits(val, msr_tm_bits, ""); + printk("]"); + } +} +#else +static void print_tm_bits(unsigned long val) {} +#endif + +static void print_msr_bits(unsigned long val) +{ + printk("<"); + print_bits(val, msr_bits, ","); + print_tm_bits(val); printk(">"); } @@ -1019,7 +1153,7 @@ void show_regs(struct pt_regs * regs) printk("REGS: %p TRAP: %04lx %s (%s)\n", regs, regs->trap, print_tainted(), init_utsname()->release); printk("MSR: "REG" ", regs->msr); - printbits(regs->msr, msr_bits); + print_msr_bits(regs->msr); printk(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer); trap = TRAP(regs); if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR)) @@ -1061,13 +1195,10 @@ void show_regs(struct pt_regs * regs) void exit_thread(void) { - discard_lazy_cpu_state(); } void flush_thread(void) { - discard_lazy_cpu_state(); - #ifdef CONFIG_HAVE_HW_BREAKPOINT flush_ptrace_hw_breakpoint(current); #else /* CONFIG_HAVE_HW_BREAKPOINT */ @@ -1086,10 +1217,7 @@ release_thread(struct task_struct *t) */ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { - flush_fp_to_thread(src); - flush_altivec_to_thread(src); - flush_vsx_to_thread(src); - flush_spe_to_thread(src); + flush_all_to_thread(src); /* * Flush TM state out so we can copy it. __switch_to_tm() does this * flush but it removes the checkpointed state from the current CPU and @@ -1212,7 +1340,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, #ifdef CONFIG_PPC64 if (cpu_has_feature(CPU_FTR_DSCR)) { p->thread.dscr_inherit = current->thread.dscr_inherit; - p->thread.dscr = current->thread.dscr; + p->thread.dscr = mfspr(SPRN_DSCR); } if (cpu_has_feature(CPU_FTR_HAS_PPR)) p->thread.ppr = INIT_PPR; @@ -1305,7 +1433,6 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) regs->msr = MSR_USER32; } #endif - discard_lazy_cpu_state(); #ifdef CONFIG_VSX current->thread.used_vsr = 0; #endif diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 92dea8df6b26..da5192590c44 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -389,6 +389,7 @@ static void __init prom_printf(const char *format, ...) break; } } + va_end(args); } diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 737c0d0b53ac..30a03c03fe73 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -60,6 +60,7 @@ struct pt_regs_offset { #define STR(s) #s /* convert to string */ #define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)} #define GPR_OFFSET_NAME(num) \ + {.name = STR(r##num), .offset = offsetof(struct pt_regs, gpr[num])}, \ {.name = STR(gpr##num), .offset = offsetof(struct pt_regs, gpr[num])} #define REG_OFFSET_END {.name = NULL, .offset = 0} diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 5a753fae8265..28736ff27fea 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -44,6 +44,9 @@ #include <asm/mmu.h> #include <asm/topology.h> +/* This is here deliberately so it's only used in this file */ +void enter_rtas(unsigned long); + struct rtas_t rtas = { .lock = __ARCH_SPIN_LOCK_UNLOCKED }; @@ -93,21 +96,13 @@ static void unlock_rtas(unsigned long flags) */ static void call_rtas_display_status(unsigned char c) { - struct rtas_args *args = &rtas.args; unsigned long s; if (!rtas.base) return; - s = lock_rtas(); - - args->token = cpu_to_be32(10); - args->nargs = cpu_to_be32(1); - args->nret = cpu_to_be32(1); - args->rets = &(args->args[1]); - args->args[0] = cpu_to_be32(c); - - enter_rtas(__pa(args)); + s = lock_rtas(); + rtas_call_unlocked(&rtas.args, 10, 1, 1, NULL, c); unlock_rtas(s); } @@ -418,6 +413,36 @@ static char *__fetch_rtas_last_error(char *altbuf) #define get_errorlog_buffer() NULL #endif + +static void +va_rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, + va_list list) +{ + int i; + + args->token = cpu_to_be32(token); + args->nargs = cpu_to_be32(nargs); + args->nret = cpu_to_be32(nret); + args->rets = &(args->args[nargs]); + + for (i = 0; i < nargs; ++i) + args->args[i] = cpu_to_be32(va_arg(list, __u32)); + + for (i = 0; i < nret; ++i) + args->rets[i] = 0; + + enter_rtas(__pa(args)); +} + +void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, ...) +{ + va_list list; + + va_start(list, nret); + va_rtas_call_unlocked(args, token, nargs, nret, list); + va_end(list); +} + int rtas_call(int token, int nargs, int nret, int *outputs, ...) { va_list list; @@ -431,22 +456,14 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...) return -1; s = lock_rtas(); + + /* We use the global rtas args buffer */ rtas_args = &rtas.args; - rtas_args->token = cpu_to_be32(token); - rtas_args->nargs = cpu_to_be32(nargs); - rtas_args->nret = cpu_to_be32(nret); - rtas_args->rets = &(rtas_args->args[nargs]); va_start(list, outputs); - for (i = 0; i < nargs; ++i) - rtas_args->args[i] = cpu_to_be32(va_arg(list, __u32)); + va_rtas_call_unlocked(rtas_args, token, nargs, nret, list); va_end(list); - for (i = 0; i < nret; ++i) - rtas_args->rets[i] = 0; - - enter_rtas(__pa(rtas_args)); - /* A -1 return code indicates that the last command couldn't be completed due to a hardware error. */ if (be32_to_cpu(rtas_args->rets[0]) == -1) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index ef7c24e84a62..b6aa378aff63 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -458,7 +458,7 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, * contains valid data */ if (current->thread.used_vsr && ctx_has_vsx_region) { - __giveup_vsx(current); + flush_vsx_to_thread(current); if (copy_vsx_to_user(&frame->mc_vsregs, current)) return 1; msr |= MSR_VSX; @@ -606,7 +606,7 @@ static int save_tm_user_regs(struct pt_regs *regs, * contains valid data */ if (current->thread.used_vsr) { - __giveup_vsx(current); + flush_vsx_to_thread(current); if (copy_vsx_to_user(&frame->mc_vsregs, current)) return 1; if (msr & MSR_VSX) { @@ -687,15 +687,6 @@ static long restore_user_regs(struct pt_regs *regs, if (sig) regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); - /* - * Do this before updating the thread state in - * current->thread.fpr/vr/evr. That way, if we get preempted - * and another task grabs the FPU/Altivec/SPE, it won't be - * tempted to save the current CPU state into the thread_struct - * and corrupt what we are writing there. - */ - discard_lazy_cpu_state(); - #ifdef CONFIG_ALTIVEC /* * Force the process to reload the altivec registers from @@ -798,15 +789,6 @@ static long restore_tm_user_regs(struct pt_regs *regs, /* Restore the previous little-endian mode */ regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); - /* - * Do this before updating the thread state in - * current->thread.fpr/vr/evr. That way, if we get preempted - * and another task grabs the FPU/Altivec/SPE, it won't be - * tempted to save the current CPU state into the thread_struct - * and corrupt what we are writing there. - */ - discard_lazy_cpu_state(); - #ifdef CONFIG_ALTIVEC regs->msr &= ~MSR_VEC; if (msr & MSR_VEC) { diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index c676ecec0869..25520794aa37 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -147,7 +147,7 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, * VMX data. */ if (current->thread.used_vsr && ctx_has_vsx_region) { - __giveup_vsx(current); + flush_vsx_to_thread(current); v_regs += ELF_NVRREG; err |= copy_vsx_to_user(v_regs, current); /* set MSR_VSX in the MSR value in the frame to @@ -270,7 +270,7 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, * VMX data. */ if (current->thread.used_vsr) { - __giveup_vsx(current); + flush_vsx_to_thread(current); v_regs += ELF_NVRREG; tm_v_regs += ELF_NVRREG; @@ -350,15 +350,6 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig, err |= __get_user(set->sig[0], &sc->oldmask); /* - * Do this before updating the thread state in - * current->thread.fpr/vr. That way, if we get preempted - * and another task grabs the FPU/Altivec, it won't be - * tempted to save the current CPU state into the thread_struct - * and corrupt what we are writing there. - */ - discard_lazy_cpu_state(); - - /* * Force reload of FP/VEC. * This has to be done before copying stuff into current->thread.fpr/vr * for the reasons explained in the previous comment. @@ -469,15 +460,6 @@ static long restore_tm_sigcontexts(struct pt_regs *regs, err |= __get_user(regs->result, &sc->gp_regs[PT_RESULT]); /* - * Do this before updating the thread state in - * current->thread.fpr/vr. That way, if we get preempted - * and another task grabs the FPU/Altivec, it won't be - * tempted to save the current CPU state into the thread_struct - * and corrupt what we are writing there. - */ - discard_lazy_cpu_state(); - - /* * Force reload of FP/VEC. * This has to be done before copying stuff into current->thread.fpr/vr * for the reasons explained in the previous comment. diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index ea43a347a104..4f24606afc3f 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -61,3 +61,10 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) save_context_stack(trace, tsk->thread.ksp, tsk, 0); } EXPORT_SYMBOL_GPL(save_stack_trace_tsk); + +void +save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) +{ + save_context_stack(trace, regs->gpr[1], current, 0); +} +EXPORT_SYMBOL_GPL(save_stack_trace_regs); diff --git a/arch/powerpc/kernel/swsusp.c b/arch/powerpc/kernel/swsusp.c index eae33e10b65f..6669b1752512 100644 --- a/arch/powerpc/kernel/swsusp.c +++ b/arch/powerpc/kernel/swsusp.c @@ -20,9 +20,7 @@ void save_processor_state(void) * flush out all the special registers so we don't need * to save them in the snapshot */ - flush_fp_to_thread(current); - flush_altivec_to_thread(current); - flush_spe_to_thread(current); + flush_all_to_thread(current); #ifdef CONFIG_PPC64 hard_irq_disable(); diff --git a/arch/powerpc/kernel/systbl_chk.c b/arch/powerpc/kernel/systbl_chk.c index 2384129f5893..55323a620cfe 100644 --- a/arch/powerpc/kernel/systbl_chk.c +++ b/arch/powerpc/kernel/systbl_chk.c @@ -57,4 +57,4 @@ START_TABLE #include <asm/systbl.h> -END_TABLE __NR_syscalls +END_TABLE NR_syscalls diff --git a/arch/powerpc/kernel/systbl_chk.sh b/arch/powerpc/kernel/systbl_chk.sh index 19415e7674a5..31b6e7c358ca 100644 --- a/arch/powerpc/kernel/systbl_chk.sh +++ b/arch/powerpc/kernel/systbl_chk.sh @@ -16,7 +16,7 @@ awk 'BEGIN { num = -1; } # Ignore the beginning of the file /^START_TABLE/ { num = 0; next; } /^END_TABLE/ { if (num != $2) { - printf "__NR_syscalls (%s) is not one more than the last syscall (%s)\n", + printf "NR_syscalls (%s) is not one more than the last syscall (%s)\n", $2, num - 1; exit(1); } diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 1be1092c7204..81b0900a39ee 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -1002,38 +1002,6 @@ static int month_days[12] = { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }; -/* - * This only works for the Gregorian calendar - i.e. after 1752 (in the UK) - */ -void GregorianDay(struct rtc_time * tm) -{ - int leapsToDate; - int lastYear; - int day; - int MonthOffset[] = { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 }; - - lastYear = tm->tm_year - 1; - - /* - * Number of leap corrections to apply up to end of last year - */ - leapsToDate = lastYear / 4 - lastYear / 100 + lastYear / 400; - - /* - * This year is a leap year if it is divisible by 4 except when it is - * divisible by 100 unless it is divisible by 400 - * - * e.g. 1904 was a leap year, 1900 was not, 1996 is, and 2000 was - */ - day = tm->tm_mon > 2 && leapyear(tm->tm_year); - - day += lastYear*365 + leapsToDate + MonthOffset[tm->tm_mon-1] + - tm->tm_mday; - - tm->tm_wday = day % 7; -} -EXPORT_SYMBOL_GPL(GregorianDay); - void to_tm(int tim, struct rtc_time * tm) { register int i; @@ -1064,9 +1032,9 @@ void to_tm(int tim, struct rtc_time * tm) tm->tm_mday = day + 1; /* - * Determine the day of week + * No-one uses the day of the week. */ - GregorianDay(tm); + tm->tm_wday = -1; } EXPORT_SYMBOL(to_tm); diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 37de90f8a845..b6becc795bb5 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1313,13 +1313,6 @@ void nonrecoverable_exception(struct pt_regs *regs) die("nonrecoverable exception", regs, SIGKILL); } -void trace_syscall(struct pt_regs *regs) -{ - printk("Task: %p(%d), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld %s\n", - current, task_pid_nr(current), regs->nip, regs->link, regs->gpr[0], - regs->ccr&0x10000000?"Error=":"", regs->gpr[3], print_tainted()); -} - void kernel_fp_unavailable_exception(struct pt_regs *regs) { enum ctx_state prev_state = exception_enter(); diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index b457bfa28436..def1b8b5e6c1 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -671,7 +671,7 @@ static void __init vdso_setup_syscall_map(void) extern unsigned long sys_ni_syscall; - for (i = 0; i < __NR_syscalls; i++) { + for (i = 0; i < NR_syscalls; i++) { #ifdef CONFIG_PPC64 if (sys_call_table[i*2] != sys_ni_syscall) vdso_data->syscall_map_64[i >> 5] |= diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S index 59cf5f452879..3745113fcc65 100644 --- a/arch/powerpc/kernel/vdso32/datapage.S +++ b/arch/powerpc/kernel/vdso32/datapage.S @@ -61,7 +61,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map) addi r3,r3,CFG_SYSCALL_MAP32 cmpli cr0,r4,0 beqlr - li r0,__NR_syscalls + li r0,NR_syscalls stw r0,0(r4) crclr cr0*4+so blr diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S index 2f01c4a0d8a0..184a6ba7f283 100644 --- a/arch/powerpc/kernel/vdso64/datapage.S +++ b/arch/powerpc/kernel/vdso64/datapage.S @@ -62,7 +62,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map) cmpli cr0,r4,0 crclr cr0*4+so beqlr - li r0,__NR_syscalls + li r0,NR_syscalls stw r0,0(r4) blr .cfi_endproc diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index f5c80d567d8d..162d0f714941 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -29,24 +29,10 @@ _GLOBAL(do_load_up_transact_altivec) addi r10,r3,THREAD_TRANSACT_VRSTATE REST_32VRS(0,r4,r10) - /* Disable VEC again. */ - MTMSRD(r6) - isync - blr #endif /* - * Enable use of VMX/Altivec for the caller. - */ -_GLOBAL(vec_enable) - mfmsr r3 - oris r3,r3,MSR_VEC@h - MTMSRD(r3) - isync - blr - -/* * Load state from memory into VMX registers including VSCR. * Assumes the caller has enabled VMX in the MSR. */ @@ -84,39 +70,6 @@ _GLOBAL(load_up_altivec) MTMSRD(r5) /* enable use of AltiVec now */ isync -/* - * For SMP, we don't do lazy VMX switching because it just gets too - * horrendously complex, especially when a task switches from one CPU - * to another. Instead we call giveup_altvec in switch_to. - * VRSAVE isn't dealt with here, that is done in the normal context - * switch code. Note that we could rely on vrsave value to eventually - * avoid saving all of the VREGs here... - */ -#ifndef CONFIG_SMP - LOAD_REG_ADDRBASE(r3, last_task_used_altivec) - toreal(r3) - PPC_LL r4,ADDROFF(last_task_used_altivec)(r3) - PPC_LCMPI 0,r4,0 - beq 1f - - /* Save VMX state to last_task_used_altivec's THREAD struct */ - toreal(r4) - addi r4,r4,THREAD - addi r6,r4,THREAD_VRSTATE - SAVE_32VRS(0,r5,r6) - mfvscr v0 - li r10,VRSTATE_VSCR - stvx v0,r10,r6 - /* Disable VMX for last_task_used_altivec */ - PPC_LL r5,PT_REGS(r4) - toreal(r5) - PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5) - lis r10,MSR_VEC@h - andc r4,r4,r10 - PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5) -1: -#endif /* CONFIG_SMP */ - /* Hack: if we get an altivec unavailable trap with VRSAVE * set to all zeros, we assume this is a broken application * that fails to set it properly, and thus we switch it to @@ -145,39 +98,15 @@ _GLOBAL(load_up_altivec) lvx v0,r10,r6 mtvscr v0 REST_32VRS(0,r4,r6) -#ifndef CONFIG_SMP - /* Update last_task_used_altivec to 'current' */ - subi r4,r5,THREAD /* Back to 'current' */ - fromreal(r4) - PPC_STL r4,ADDROFF(last_task_used_altivec)(r3) -#endif /* CONFIG_SMP */ /* restore registers and return */ blr -_GLOBAL(giveup_altivec_notask) - mfmsr r3 - andis. r4,r3,MSR_VEC@h - bnelr /* Already enabled? */ - oris r3,r3,MSR_VEC@h - SYNC - MTMSRD(r3) /* enable use of VMX now */ - isync - blr - /* - * giveup_altivec(tsk) + * __giveup_altivec(tsk) * Disable VMX for the task given as the argument, * and save the vector registers in its thread_struct. - * Enables the VMX for use in the kernel on return. */ -_GLOBAL(giveup_altivec) - mfmsr r5 - oris r5,r5,MSR_VEC@h - SYNC - MTMSRD(r5) /* enable use of VMX now */ - isync - PPC_LCMPI 0,r3,0 - beqlr /* if no previous owner, done */ +_GLOBAL(__giveup_altivec) addi r3,r3,THREAD /* want THREAD of task */ PPC_LL r7,THREAD_VRSAVEAREA(r3) PPC_LL r5,PT_REGS(r3) @@ -203,11 +132,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX) andc r4,r4,r3 /* disable FP for previous task */ PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5) 1: -#ifndef CONFIG_SMP - li r5,0 - LOAD_REG_ADDRBASE(r4,last_task_used_altivec) - PPC_STL r5,ADDROFF(last_task_used_altivec)(r4) -#endif /* CONFIG_SMP */ blr #ifdef CONFIG_VSX @@ -230,20 +154,6 @@ _GLOBAL(load_up_vsx) andis. r5,r12,MSR_VEC@h beql+ load_up_altivec /* skip if already loaded */ -#ifndef CONFIG_SMP - ld r3,last_task_used_vsx@got(r2) - ld r4,0(r3) - cmpdi 0,r4,0 - beq 1f - /* Disable VSX for last_task_used_vsx */ - addi r4,r4,THREAD - ld r5,PT_REGS(r4) - ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) - lis r6,MSR_VSX@h - andc r6,r4,r6 - std r6,_MSR-STACK_FRAME_OVERHEAD(r5) -1: -#endif /* CONFIG_SMP */ ld r4,PACACURRENT(r13) addi r4,r4,THREAD /* Get THREAD */ li r6,1 @@ -251,27 +161,14 @@ _GLOBAL(load_up_vsx) /* enable use of VSX after return */ oris r12,r12,MSR_VSX@h std r12,_MSR(r1) -#ifndef CONFIG_SMP - /* Update last_task_used_vsx to 'current' */ - ld r4,PACACURRENT(r13) - std r4,0(r3) -#endif /* CONFIG_SMP */ b fast_exception_return /* * __giveup_vsx(tsk) * Disable VSX for the task given as the argument. * Does NOT save vsx registers. - * Enables the VSX for use in the kernel on return. */ _GLOBAL(__giveup_vsx) - mfmsr r5 - oris r5,r5,MSR_VSX@h - mtmsrd r5 /* enable use of VSX now */ - isync - - cmpdi 0,r3,0 - beqlr- /* if no previous owner, done */ addi r3,r3,THREAD /* want THREAD of task */ ld r5,PT_REGS(r3) cmpdi 0,r5,0 @@ -281,11 +178,6 @@ _GLOBAL(__giveup_vsx) andc r4,r4,r3 /* disable VSX for previous task */ std r4,_MSR-STACK_FRAME_OVERHEAD(r5) 1: -#ifndef CONFIG_SMP - li r5,0 - ld r4,last_task_used_vsx@got(r2) - std r5,0(r4) -#endif /* CONFIG_SMP */ blr #endif /* CONFIG_VSX */ diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 099c79d8c160..638c6d9be9e0 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -366,7 +366,7 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvmppc_core_prepare_to_enter); -pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa, bool writing, +kvm_pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa, bool writing, bool *writable) { ulong mp_pa = vcpu->arch.magic_page_pa & KVM_PAM; @@ -379,9 +379,9 @@ pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa, bool writing, gpa &= ~0xFFFULL; if (unlikely(mp_pa) && unlikely((gpa & KVM_PAM) == mp_pa)) { ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK; - pfn_t pfn; + kvm_pfn_t pfn; - pfn = (pfn_t)virt_to_phys((void*)shared_page) >> PAGE_SHIFT; + pfn = (kvm_pfn_t)virt_to_phys((void*)shared_page) >> PAGE_SHIFT; get_page(pfn_to_page(pfn)); if (writable) *writable = true; diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c index d5c9bfeb0c9c..55c4d51ea3e2 100644 --- a/arch/powerpc/kvm/book3s_32_mmu_host.c +++ b/arch/powerpc/kvm/book3s_32_mmu_host.c @@ -142,7 +142,7 @@ extern char etext[]; int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte, bool iswrite) { - pfn_t hpaddr; + kvm_pfn_t hpaddr; u64 vpn; u64 vsid; struct kvmppc_sid_map *map; diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index 79ad35abd196..913cd2198fa6 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -83,7 +83,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte, bool iswrite) { unsigned long vpn; - pfn_t hpaddr; + kvm_pfn_t hpaddr; ulong hash, hpteg; u64 vsid; int ret; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index a7352b59e6f9..cff207b72c46 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -314,16 +314,10 @@ static void kvmppc_dump_regs(struct kvm_vcpu *vcpu) static struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id) { - int r; - struct kvm_vcpu *v, *ret = NULL; + struct kvm_vcpu *ret; mutex_lock(&kvm->lock); - kvm_for_each_vcpu(r, v, kvm) { - if (v->vcpu_id == id) { - ret = v; - break; - } - } + ret = kvm_get_vcpu_by_id(kvm, id); mutex_unlock(&kvm->lock); return ret; } @@ -2706,9 +2700,8 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) goto out; } - flush_fp_to_thread(current); - flush_altivec_to_thread(current); - flush_vsx_to_thread(current); + flush_all_to_thread(current); + vcpu->arch.wqp = &vcpu->arch.vcore->wq; vcpu->arch.pgdir = current->mm->pgd; vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c index a759d9adb0b6..eab96cfe82fa 100644 --- a/arch/powerpc/kvm/book3s_paired_singles.c +++ b/arch/powerpc/kvm/book3s_paired_singles.c @@ -1265,6 +1265,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) if (rcomp) kvmppc_set_cr(vcpu, cr); + disable_kernel_fp(); preempt_enable(); return emulated; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 64891b081ad5..95bceca8f40e 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -512,7 +512,7 @@ static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) put_page(hpage); } -static int kvmppc_visible_gpa(struct kvm_vcpu *vcpu, gpa_t gpa) +static bool kvmppc_visible_gpa(struct kvm_vcpu *vcpu, gpa_t gpa) { ulong mp_pa = vcpu->arch.magic_page_pa; @@ -521,7 +521,7 @@ static int kvmppc_visible_gpa(struct kvm_vcpu *vcpu, gpa_t gpa) gpa &= ~0xFFFULL; if (unlikely(mp_pa) && unlikely((mp_pa & KVM_PAM) == (gpa & KVM_PAM))) { - return 1; + return true; } return kvm_is_visible_gfn(vcpu->kvm, gpa >> PAGE_SHIFT); @@ -751,6 +751,7 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, preempt_disable(); enable_kernel_fp(); load_fp_state(&vcpu->arch.fp); + disable_kernel_fp(); t->fp_save_area = &vcpu->arch.fp; preempt_enable(); } @@ -760,6 +761,7 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, preempt_disable(); enable_kernel_altivec(); load_vr_state(&vcpu->arch.vr); + disable_kernel_altivec(); t->vr_save_area = &vcpu->arch.vr; preempt_enable(); #endif @@ -788,6 +790,7 @@ static void kvmppc_handle_lost_ext(struct kvm_vcpu *vcpu) preempt_disable(); enable_kernel_fp(); load_fp_state(&vcpu->arch.fp); + disable_kernel_fp(); preempt_enable(); } #ifdef CONFIG_ALTIVEC @@ -795,6 +798,7 @@ static void kvmppc_handle_lost_ext(struct kvm_vcpu *vcpu) preempt_disable(); enable_kernel_altivec(); load_vr_state(&vcpu->arch.vr); + disable_kernel_altivec(); preempt_enable(); } #endif @@ -1486,21 +1490,8 @@ static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) goto out; /* interrupts now hard-disabled */ - /* Save FPU state in thread_struct */ - if (current->thread.regs->msr & MSR_FP) - giveup_fpu(current); - -#ifdef CONFIG_ALTIVEC - /* Save Altivec state in thread_struct */ - if (current->thread.regs->msr & MSR_VEC) - giveup_altivec(current); -#endif - -#ifdef CONFIG_VSX - /* Save VSX state in thread_struct */ - if (current->thread.regs->msr & MSR_VSX) - __giveup_vsx(current); -#endif + /* Save FPU, Altivec and VSX state */ + giveup_all(current); /* Preload FPU if it's enabled */ if (kvmppc_get_msr(vcpu) & MSR_FP) diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index fd5875179e5c..778ef86e187e 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -98,6 +98,7 @@ void kvmppc_vcpu_disable_spe(struct kvm_vcpu *vcpu) preempt_disable(); enable_kernel_spe(); kvmppc_save_guest_spe(vcpu); + disable_kernel_spe(); vcpu->arch.shadow_msr &= ~MSR_SPE; preempt_enable(); } @@ -107,6 +108,7 @@ static void kvmppc_vcpu_enable_spe(struct kvm_vcpu *vcpu) preempt_disable(); enable_kernel_spe(); kvmppc_load_guest_spe(vcpu); + disable_kernel_spe(); vcpu->arch.shadow_msr |= MSR_SPE; preempt_enable(); } @@ -141,6 +143,7 @@ static inline void kvmppc_load_guest_fp(struct kvm_vcpu *vcpu) if (!(current->thread.regs->msr & MSR_FP)) { enable_kernel_fp(); load_fp_state(&vcpu->arch.fp); + disable_kernel_fp(); current->thread.fp_save_area = &vcpu->arch.fp; current->thread.regs->msr |= MSR_FP; } @@ -182,6 +185,7 @@ static inline void kvmppc_load_guest_altivec(struct kvm_vcpu *vcpu) if (!(current->thread.regs->msr & MSR_VEC)) { enable_kernel_altivec(); load_vr_state(&vcpu->arch.vr); + disable_kernel_altivec(); current->thread.vr_save_area = &vcpu->arch.vr; current->thread.regs->msr |= MSR_VEC; } diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index 72920bed3ac6..94f04fcb373e 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h @@ -41,7 +41,7 @@ enum vcpu_ftr { #define E500_TLB_MAS2_ATTR (0x7f) struct tlbe_ref { - pfn_t pfn; /* valid only for TLB0, except briefly */ + kvm_pfn_t pfn; /* valid only for TLB0, except briefly */ unsigned int flags; /* E500_TLB_* */ }; diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 34c43fff4adb..b0333cc737dd 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -163,9 +163,9 @@ void kvmppc_map_magic(struct kvm_vcpu *vcpu) struct kvm_book3e_206_tlb_entry magic; ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK; unsigned int stid; - pfn_t pfn; + kvm_pfn_t pfn; - pfn = (pfn_t)virt_to_phys((void *)shared_page) >> PAGE_SHIFT; + pfn = (kvm_pfn_t)virt_to_phys((void *)shared_page) >> PAGE_SHIFT; get_page(pfn_to_page(pfn)); preempt_disable(); @@ -246,7 +246,7 @@ static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe) static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref, struct kvm_book3e_206_tlb_entry *gtlbe, - pfn_t pfn, unsigned int wimg) + kvm_pfn_t pfn, unsigned int wimg) { ref->pfn = pfn; ref->flags = E500_TLB_VALID; @@ -309,7 +309,7 @@ static void kvmppc_e500_setup_stlbe( int tsize, struct tlbe_ref *ref, u64 gvaddr, struct kvm_book3e_206_tlb_entry *stlbe) { - pfn_t pfn = ref->pfn; + kvm_pfn_t pfn = ref->pfn; u32 pr = vcpu->arch.shared->msr & MSR_PR; BUG_ON(!(ref->flags & E500_TLB_VALID)); diff --git a/arch/powerpc/kvm/trace_pr.h b/arch/powerpc/kvm/trace_pr.h index 810507cb688a..d44f324184fb 100644 --- a/arch/powerpc/kvm/trace_pr.h +++ b/arch/powerpc/kvm/trace_pr.h @@ -30,7 +30,7 @@ TRACE_EVENT(kvm_book3s_reenter, #ifdef CONFIG_PPC_BOOK3S_64 TRACE_EVENT(kvm_book3s_64_mmu_map, - TP_PROTO(int rflags, ulong hpteg, ulong va, pfn_t hpaddr, + TP_PROTO(int rflags, ulong hpteg, ulong va, kvm_pfn_t hpaddr, struct kvmppc_pte *orig_pte), TP_ARGS(rflags, hpteg, va, hpaddr, orig_pte), diff --git a/arch/powerpc/lib/vmx-helper.c b/arch/powerpc/lib/vmx-helper.c index ac93a3bd2730..b27e030fc9f8 100644 --- a/arch/powerpc/lib/vmx-helper.c +++ b/arch/powerpc/lib/vmx-helper.c @@ -46,6 +46,7 @@ int enter_vmx_usercopy(void) */ int exit_vmx_usercopy(void) { + disable_kernel_altivec(); pagefault_enable(); preempt_enable(); return 0; @@ -70,6 +71,7 @@ int enter_vmx_copy(void) */ void *exit_vmx_copy(void *dest) { + disable_kernel_altivec(); preempt_enable(); return dest; } diff --git a/arch/powerpc/lib/xor_vmx.c b/arch/powerpc/lib/xor_vmx.c index e905f7c2ea7b..07f49f1568e5 100644 --- a/arch/powerpc/lib/xor_vmx.c +++ b/arch/powerpc/lib/xor_vmx.c @@ -74,6 +74,7 @@ void xor_altivec_2(unsigned long bytes, unsigned long *v1_in, v2 += 4; } while (--lines > 0); + disable_kernel_altivec(); preempt_enable(); } EXPORT_SYMBOL(xor_altivec_2); @@ -102,6 +103,7 @@ void xor_altivec_3(unsigned long bytes, unsigned long *v1_in, v3 += 4; } while (--lines > 0); + disable_kernel_altivec(); preempt_enable(); } EXPORT_SYMBOL(xor_altivec_3); @@ -135,6 +137,7 @@ void xor_altivec_4(unsigned long bytes, unsigned long *v1_in, v4 += 4; } while (--lines > 0); + disable_kernel_altivec(); preempt_enable(); } EXPORT_SYMBOL(xor_altivec_4); @@ -172,6 +175,7 @@ void xor_altivec_5(unsigned long bytes, unsigned long *v1_in, v5 += 4; } while (--lines > 0); + disable_kernel_altivec(); preempt_enable(); } EXPORT_SYMBOL(xor_altivec_5); diff --git a/arch/powerpc/mm/40x_mmu.c b/arch/powerpc/mm/40x_mmu.c index 5810967511d4..31a5d42df8c9 100644 --- a/arch/powerpc/mm/40x_mmu.c +++ b/arch/powerpc/mm/40x_mmu.c @@ -110,10 +110,10 @@ unsigned long __init mmu_mapin_ram(unsigned long top) unsigned long val = p | _PMD_SIZE_16M | _PAGE_EXEC | _PAGE_HWWRITE; pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v); - pmd_val(*pmdp++) = val; - pmd_val(*pmdp++) = val; - pmd_val(*pmdp++) = val; - pmd_val(*pmdp++) = val; + *pmdp++ = __pmd(val); + *pmdp++ = __pmd(val); + *pmdp++ = __pmd(val); + *pmdp++ = __pmd(val); v += LARGE_PAGE_SIZE_16M; p += LARGE_PAGE_SIZE_16M; @@ -125,7 +125,7 @@ unsigned long __init mmu_mapin_ram(unsigned long top) unsigned long val = p | _PMD_SIZE_4M | _PAGE_EXEC | _PAGE_HWWRITE; pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v); - pmd_val(*pmdp) = val; + *pmdp = __pmd(val); v += LARGE_PAGE_SIZE_4M; p += LARGE_PAGE_SIZE_4M; diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 3eb73a38220d..1ffeda85c086 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -14,10 +14,13 @@ obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(CONFIG_WORD_SIZE)e.o hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o obj-$(CONFIG_PPC_STD_MMU_64) += hash_utils_64.o slb_low.o slb.o $(hash64-y) -obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o -obj-$(CONFIG_PPC_STD_MMU) += hash_low_$(CONFIG_WORD_SIZE).o \ - tlb_hash$(CONFIG_WORD_SIZE).o \ +obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o hash_low_32.o +obj-$(CONFIG_PPC_STD_MMU) += tlb_hash$(CONFIG_WORD_SIZE).o \ mmu_context_hash$(CONFIG_WORD_SIZE).o +ifeq ($(CONFIG_PPC_STD_MMU_64),y) +obj-$(CONFIG_PPC_4K_PAGES) += hash64_4k.o +obj-$(CONFIG_PPC_64K_PAGES) += hash64_64k.o +endif obj-$(CONFIG_PPC_ICSWX) += icswx.o obj-$(CONFIG_PPC_ICSWX_PID) += icswx_pid.o obj-$(CONFIG_40x) += 40x_mmu.o diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c new file mode 100644 index 000000000000..e7c04542ba62 --- /dev/null +++ b/arch/powerpc/mm/hash64_4k.c @@ -0,0 +1,123 @@ +/* + * Copyright IBM Corporation, 2015 + * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + */ + +#include <linux/mm.h> +#include <asm/machdep.h> +#include <asm/mmu.h> + +int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, + pte_t *ptep, unsigned long trap, unsigned long flags, + int ssize, int subpg_prot) +{ + unsigned long hpte_group; + unsigned long rflags, pa; + unsigned long old_pte, new_pte; + unsigned long vpn, hash, slot; + unsigned long shift = mmu_psize_defs[MMU_PAGE_4K].shift; + + /* + * atomically mark the linux large page PTE busy and dirty + */ + do { + pte_t pte = READ_ONCE(*ptep); + + old_pte = pte_val(pte); + /* If PTE busy, retry the access */ + if (unlikely(old_pte & _PAGE_BUSY)) + return 0; + /* If PTE permissions don't match, take page fault */ + if (unlikely(access & ~old_pte)) + return 1; + /* + * Try to lock the PTE, add ACCESSED and DIRTY if it was + * a write access. Since this is 4K insert of 64K page size + * also add _PAGE_COMBO + */ + new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE; + if (access & _PAGE_RW) + new_pte |= _PAGE_DIRTY; + } while (old_pte != __cmpxchg_u64((unsigned long *)ptep, + old_pte, new_pte)); + /* + * PP bits. _PAGE_USER is already PP bit 0x2, so we only + * need to add in 0x1 if it's a read-only user page + */ + rflags = htab_convert_pte_flags(new_pte); + + if (!cpu_has_feature(CPU_FTR_NOEXECUTE) && + !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) + rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap); + + vpn = hpt_vpn(ea, vsid, ssize); + if (unlikely(old_pte & _PAGE_HASHPTE)) { + /* + * There MIGHT be an HPTE for this pte + */ + hash = hpt_hash(vpn, shift, ssize); + if (old_pte & _PAGE_F_SECOND) + hash = ~hash; + slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; + slot += (old_pte & _PAGE_F_GIX) >> _PAGE_F_GIX_SHIFT; + + if (ppc_md.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_4K, + MMU_PAGE_4K, ssize, flags) == -1) + old_pte &= ~_PAGE_HPTEFLAGS; + } + + if (likely(!(old_pte & _PAGE_HASHPTE))) { + + pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT; + hash = hpt_hash(vpn, shift, ssize); + +repeat: + hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; + + /* Insert into the hash table, primary slot */ + slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0, + MMU_PAGE_4K, MMU_PAGE_4K, ssize); + /* + * Primary is full, try the secondary + */ + if (unlikely(slot == -1)) { + hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; + slot = ppc_md.hpte_insert(hpte_group, vpn, pa, + rflags, HPTE_V_SECONDARY, + MMU_PAGE_4K, MMU_PAGE_4K, ssize); + if (slot == -1) { + if (mftb() & 0x1) + hpte_group = ((hash & htab_hash_mask) * + HPTES_PER_GROUP) & ~0x7UL; + ppc_md.hpte_remove(hpte_group); + /* + * FIXME!! Should be try the group from which we removed ? + */ + goto repeat; + } + } + /* + * Hypervisor failure. Restore old pmd and return -1 + * similar to __hash_page_* + */ + if (unlikely(slot == -2)) { + *ptep = __pte(old_pte); + hash_failure_debug(ea, access, vsid, trap, ssize, + MMU_PAGE_4K, MMU_PAGE_4K, old_pte); + return -1; + } + new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; + new_pte |= (slot << _PAGE_F_GIX_SHIFT) & (_PAGE_F_SECOND | _PAGE_F_GIX); + } + *ptep = __pte(new_pte & ~_PAGE_BUSY); + return 0; +} diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c new file mode 100644 index 000000000000..0762c1e08c88 --- /dev/null +++ b/arch/powerpc/mm/hash64_64k.c @@ -0,0 +1,322 @@ +/* + * Copyright IBM Corporation, 2015 + * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + */ + +#include <linux/mm.h> +#include <asm/machdep.h> +#include <asm/mmu.h> +/* + * index from 0 - 15 + */ +bool __rpte_sub_valid(real_pte_t rpte, unsigned long index) +{ + unsigned long g_idx; + unsigned long ptev = pte_val(rpte.pte); + + g_idx = (ptev & _PAGE_COMBO_VALID) >> _PAGE_F_GIX_SHIFT; + index = index >> 2; + if (g_idx & (0x1 << index)) + return true; + else + return false; +} +/* + * index from 0 - 15 + */ +static unsigned long mark_subptegroup_valid(unsigned long ptev, unsigned long index) +{ + unsigned long g_idx; + + if (!(ptev & _PAGE_COMBO)) + return ptev; + index = index >> 2; + g_idx = 0x1 << index; + + return ptev | (g_idx << _PAGE_F_GIX_SHIFT); +} + +int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, + pte_t *ptep, unsigned long trap, unsigned long flags, + int ssize, int subpg_prot) +{ + real_pte_t rpte; + unsigned long *hidxp; + unsigned long hpte_group; + unsigned int subpg_index; + unsigned long rflags, pa, hidx; + unsigned long old_pte, new_pte, subpg_pte; + unsigned long vpn, hash, slot; + unsigned long shift = mmu_psize_defs[MMU_PAGE_4K].shift; + + /* + * atomically mark the linux large page PTE busy and dirty + */ + do { + pte_t pte = READ_ONCE(*ptep); + + old_pte = pte_val(pte); + /* If PTE busy, retry the access */ + if (unlikely(old_pte & _PAGE_BUSY)) + return 0; + /* If PTE permissions don't match, take page fault */ + if (unlikely(access & ~old_pte)) + return 1; + /* + * Try to lock the PTE, add ACCESSED and DIRTY if it was + * a write access. Since this is 4K insert of 64K page size + * also add _PAGE_COMBO + */ + new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED | _PAGE_COMBO; + if (access & _PAGE_RW) + new_pte |= _PAGE_DIRTY; + } while (old_pte != __cmpxchg_u64((unsigned long *)ptep, + old_pte, new_pte)); + /* + * Handle the subpage protection bits + */ + subpg_pte = new_pte & ~subpg_prot; + rflags = htab_convert_pte_flags(subpg_pte); + + if (!cpu_has_feature(CPU_FTR_NOEXECUTE) && + !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) { + + /* + * No CPU has hugepages but lacks no execute, so we + * don't need to worry about that case + */ + rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap); + } + + subpg_index = (ea & (PAGE_SIZE - 1)) >> shift; + vpn = hpt_vpn(ea, vsid, ssize); + rpte = __real_pte(__pte(old_pte), ptep); + /* + *None of the sub 4k page is hashed + */ + if (!(old_pte & _PAGE_HASHPTE)) + goto htab_insert_hpte; + /* + * Check if the pte was already inserted into the hash table + * as a 64k HW page, and invalidate the 64k HPTE if so. + */ + if (!(old_pte & _PAGE_COMBO)) { + flush_hash_page(vpn, rpte, MMU_PAGE_64K, ssize, flags); + old_pte &= ~_PAGE_HASHPTE | _PAGE_F_GIX | _PAGE_F_SECOND; + goto htab_insert_hpte; + } + /* + * Check for sub page valid and update + */ + if (__rpte_sub_valid(rpte, subpg_index)) { + int ret; + + hash = hpt_hash(vpn, shift, ssize); + hidx = __rpte_to_hidx(rpte, subpg_index); + if (hidx & _PTEIDX_SECONDARY) + hash = ~hash; + slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; + slot += hidx & _PTEIDX_GROUP_IX; + + ret = ppc_md.hpte_updatepp(slot, rflags, vpn, + MMU_PAGE_4K, MMU_PAGE_4K, + ssize, flags); + /* + *if we failed because typically the HPTE wasn't really here + * we try an insertion. + */ + if (ret == -1) + goto htab_insert_hpte; + + *ptep = __pte(new_pte & ~_PAGE_BUSY); + return 0; + } + +htab_insert_hpte: + /* + * handle _PAGE_4K_PFN case + */ + if (old_pte & _PAGE_4K_PFN) { + /* + * All the sub 4k page have the same + * physical address. + */ + pa = pte_pfn(__pte(old_pte)) << HW_PAGE_SHIFT; + } else { + pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT; + pa += (subpg_index << shift); + } + hash = hpt_hash(vpn, shift, ssize); +repeat: + hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; + + /* Insert into the hash table, primary slot */ + slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0, + MMU_PAGE_4K, MMU_PAGE_4K, ssize); + /* + * Primary is full, try the secondary + */ + if (unlikely(slot == -1)) { + hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; + slot = ppc_md.hpte_insert(hpte_group, vpn, pa, + rflags, HPTE_V_SECONDARY, + MMU_PAGE_4K, MMU_PAGE_4K, ssize); + if (slot == -1) { + if (mftb() & 0x1) + hpte_group = ((hash & htab_hash_mask) * + HPTES_PER_GROUP) & ~0x7UL; + ppc_md.hpte_remove(hpte_group); + /* + * FIXME!! Should be try the group from which we removed ? + */ + goto repeat; + } + } + /* + * Hypervisor failure. Restore old pmd and return -1 + * similar to __hash_page_* + */ + if (unlikely(slot == -2)) { + *ptep = __pte(old_pte); + hash_failure_debug(ea, access, vsid, trap, ssize, + MMU_PAGE_4K, MMU_PAGE_4K, old_pte); + return -1; + } + /* + * Insert slot number & secondary bit in PTE second half, + * clear _PAGE_BUSY and set appropriate HPTE slot bit + * Since we have _PAGE_BUSY set on ptep, we can be sure + * nobody is undating hidx. + */ + hidxp = (unsigned long *)(ptep + PTRS_PER_PTE); + rpte.hidx &= ~(0xfUL << (subpg_index << 2)); + *hidxp = rpte.hidx | (slot << (subpg_index << 2)); + new_pte = mark_subptegroup_valid(new_pte, subpg_index); + new_pte |= _PAGE_HASHPTE; + /* + * check __real_pte for details on matching smp_rmb() + */ + smp_wmb(); + *ptep = __pte(new_pte & ~_PAGE_BUSY); + return 0; +} + +int __hash_page_64K(unsigned long ea, unsigned long access, + unsigned long vsid, pte_t *ptep, unsigned long trap, + unsigned long flags, int ssize) +{ + + unsigned long hpte_group; + unsigned long rflags, pa; + unsigned long old_pte, new_pte; + unsigned long vpn, hash, slot; + unsigned long shift = mmu_psize_defs[MMU_PAGE_64K].shift; + + /* + * atomically mark the linux large page PTE busy and dirty + */ + do { + pte_t pte = READ_ONCE(*ptep); + + old_pte = pte_val(pte); + /* If PTE busy, retry the access */ + if (unlikely(old_pte & _PAGE_BUSY)) + return 0; + /* If PTE permissions don't match, take page fault */ + if (unlikely(access & ~old_pte)) + return 1; + /* + * Check if PTE has the cache-inhibit bit set + * If so, bail out and refault as a 4k page + */ + if (!mmu_has_feature(MMU_FTR_CI_LARGE_PAGE) && + unlikely(old_pte & _PAGE_NO_CACHE)) + return 0; + /* + * Try to lock the PTE, add ACCESSED and DIRTY if it was + * a write access. Since this is 4K insert of 64K page size + * also add _PAGE_COMBO + */ + new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED; + if (access & _PAGE_RW) + new_pte |= _PAGE_DIRTY; + } while (old_pte != __cmpxchg_u64((unsigned long *)ptep, + old_pte, new_pte)); + + rflags = htab_convert_pte_flags(new_pte); + + if (!cpu_has_feature(CPU_FTR_NOEXECUTE) && + !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) + rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap); + + vpn = hpt_vpn(ea, vsid, ssize); + if (unlikely(old_pte & _PAGE_HASHPTE)) { + /* + * There MIGHT be an HPTE for this pte + */ + hash = hpt_hash(vpn, shift, ssize); + if (old_pte & _PAGE_F_SECOND) + hash = ~hash; + slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; + slot += (old_pte & _PAGE_F_GIX) >> _PAGE_F_GIX_SHIFT; + + if (ppc_md.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_64K, + MMU_PAGE_64K, ssize, flags) == -1) + old_pte &= ~_PAGE_HPTEFLAGS; + } + + if (likely(!(old_pte & _PAGE_HASHPTE))) { + + pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT; + hash = hpt_hash(vpn, shift, ssize); + +repeat: + hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; + + /* Insert into the hash table, primary slot */ + slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0, + MMU_PAGE_64K, MMU_PAGE_64K, ssize); + /* + * Primary is full, try the secondary + */ + if (unlikely(slot == -1)) { + hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; + slot = ppc_md.hpte_insert(hpte_group, vpn, pa, + rflags, HPTE_V_SECONDARY, + MMU_PAGE_64K, MMU_PAGE_64K, ssize); + if (slot == -1) { + if (mftb() & 0x1) + hpte_group = ((hash & htab_hash_mask) * + HPTES_PER_GROUP) & ~0x7UL; + ppc_md.hpte_remove(hpte_group); + /* + * FIXME!! Should be try the group from which we removed ? + */ + goto repeat; + } + } + /* + * Hypervisor failure. Restore old pmd and return -1 + * similar to __hash_page_* + */ + if (unlikely(slot == -2)) { + *ptep = __pte(old_pte); + hash_failure_debug(ea, access, vsid, trap, ssize, + MMU_PAGE_64K, MMU_PAGE_64K, old_pte); + return -1; + } + new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; + new_pte |= (slot << _PAGE_F_GIX_SHIFT) & (_PAGE_F_SECOND | _PAGE_F_GIX); + } + *ptep = __pte(new_pte & ~_PAGE_BUSY); + return 0; +} diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S deleted file mode 100644 index 3b49e3295901..000000000000 --- a/arch/powerpc/mm/hash_low_64.S +++ /dev/null @@ -1,1003 +0,0 @@ -/* - * ppc64 MMU hashtable management routines - * - * (c) Copyright IBM Corp. 2003, 2005 - * - * Maintained by: Benjamin Herrenschmidt - * <benh@kernel.crashing.org> - * - * This file is covered by the GNU Public Licence v2 as - * described in the kernel's COPYING file. - */ - -#include <asm/reg.h> -#include <asm/pgtable.h> -#include <asm/mmu.h> -#include <asm/page.h> -#include <asm/types.h> -#include <asm/ppc_asm.h> -#include <asm/asm-offsets.h> -#include <asm/cputable.h> - - .text - -/* - * Stackframe: - * - * +-> Back chain (SP + 256) - * | General register save area (SP + 112) - * | Parameter save area (SP + 48) - * | TOC save area (SP + 40) - * | link editor doubleword (SP + 32) - * | compiler doubleword (SP + 24) - * | LR save area (SP + 16) - * | CR save area (SP + 8) - * SP ---> +-- Back chain (SP + 0) - */ - -#ifndef CONFIG_PPC_64K_PAGES - -/***************************************************************************** - * * - * 4K SW & 4K HW pages implementation * - * * - *****************************************************************************/ - - -/* - * _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, - * pte_t *ptep, unsigned long trap, unsigned long flags, - * int ssize) - * - * Adds a 4K page to the hash table in a segment of 4K pages only - */ - -_GLOBAL(__hash_page_4K) - mflr r0 - std r0,16(r1) - stdu r1,-STACKFRAMESIZE(r1) - /* Save all params that we need after a function call */ - std r6,STK_PARAM(R6)(r1) - std r8,STK_PARAM(R8)(r1) - std r9,STK_PARAM(R9)(r1) - - /* Save non-volatile registers. - * r31 will hold "old PTE" - * r30 is "new PTE" - * r29 is vpn - * r28 is a hash value - * r27 is hashtab mask (maybe dynamic patched instead ?) - */ - std r27,STK_REG(R27)(r1) - std r28,STK_REG(R28)(r1) - std r29,STK_REG(R29)(r1) - std r30,STK_REG(R30)(r1) - std r31,STK_REG(R31)(r1) - - /* Step 1: - * - * Check permissions, atomically mark the linux PTE busy - * and hashed. - */ -1: - ldarx r31,0,r6 - /* Check access rights (access & ~(pte_val(*ptep))) */ - andc. r0,r4,r31 - bne- htab_wrong_access - /* Check if PTE is busy */ - andi. r0,r31,_PAGE_BUSY - /* If so, just bail out and refault if needed. Someone else - * is changing this PTE anyway and might hash it. - */ - bne- htab_bail_ok - - /* Prepare new PTE value (turn access RW into DIRTY, then - * add BUSY,HASHPTE and ACCESSED) - */ - rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */ - or r30,r30,r31 - ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE - /* Write the linux PTE atomically (setting busy) */ - stdcx. r30,0,r6 - bne- 1b - isync - - /* Step 2: - * - * Insert/Update the HPTE in the hash table. At this point, - * r4 (access) is re-useable, we use it for the new HPTE flags - */ - -BEGIN_FTR_SECTION - cmpdi r9,0 /* check segment size */ - bne 3f -END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) - /* Calc vpn and put it in r29 */ - sldi r29,r5,SID_SHIFT - VPN_SHIFT - rldicl r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT - VPN_SHIFT) - or r29,r28,r29 - /* - * Calculate hash value for primary slot and store it in r28 - * r3 = va, r5 = vsid - * r0 = (va >> 12) & ((1ul << (28 - 12)) -1) - */ - rldicl r0,r3,64-12,48 - xor r28,r5,r0 /* hash */ - b 4f - -3: /* Calc vpn and put it in r29 */ - sldi r29,r5,SID_SHIFT_1T - VPN_SHIFT - rldicl r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT_1T - VPN_SHIFT) - or r29,r28,r29 - - /* - * calculate hash value for primary slot and - * store it in r28 for 1T segment - * r3 = va, r5 = vsid - */ - sldi r28,r5,25 /* vsid << 25 */ - /* r0 = (va >> 12) & ((1ul << (40 - 12)) -1) */ - rldicl r0,r3,64-12,36 - xor r28,r28,r5 /* vsid ^ ( vsid << 25) */ - xor r28,r28,r0 /* hash */ - - /* Convert linux PTE bits into HW equivalents */ -4: andi. r3,r30,0x1fe /* Get basic set of flags */ - xori r3,r3,HPTE_R_N /* _PAGE_EXEC -> NOEXEC */ - rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */ - rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */ - and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/ - andc r0,r30,r0 /* r0 = pte & ~r0 */ - rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */ - /* - * Always add "C" bit for perf. Memory coherence is always enabled - */ - ori r3,r3,HPTE_R_C | HPTE_R_M - - /* We eventually do the icache sync here (maybe inline that - * code rather than call a C function...) - */ -BEGIN_FTR_SECTION - mr r4,r30 - mr r5,r7 - bl hash_page_do_lazy_icache -END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE) - - /* At this point, r3 contains new PP bits, save them in - * place of "access" in the param area (sic) - */ - std r3,STK_PARAM(R4)(r1) - - /* Get htab_hash_mask */ - ld r4,htab_hash_mask@got(2) - ld r27,0(r4) /* htab_hash_mask -> r27 */ - - /* Check if we may already be in the hashtable, in this case, we - * go to out-of-line code to try to modify the HPTE - */ - andi. r0,r31,_PAGE_HASHPTE - bne htab_modify_pte - -htab_insert_pte: - /* Clear hpte bits in new pte (we also clear BUSY btw) and - * add _PAGE_HASHPTE - */ - lis r0,_PAGE_HPTEFLAGS@h - ori r0,r0,_PAGE_HPTEFLAGS@l - andc r30,r30,r0 - ori r30,r30,_PAGE_HASHPTE - - /* physical address r5 */ - rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT - sldi r5,r5,PAGE_SHIFT - - /* Calculate primary group hash */ - and r0,r28,r27 - rldicr r3,r0,3,63-3 /* r3 = (hash & mask) << 3 */ - - /* Call ppc_md.hpte_insert */ - ld r6,STK_PARAM(R4)(r1) /* Retrieve new pp bits */ - mr r4,r29 /* Retrieve vpn */ - li r7,0 /* !bolted, !secondary */ - li r8,MMU_PAGE_4K /* page size */ - li r9,MMU_PAGE_4K /* actual page size */ - ld r10,STK_PARAM(R9)(r1) /* segment size */ -.globl htab_call_hpte_insert1 -htab_call_hpte_insert1: - bl . /* Patched by htab_finish_init() */ - cmpdi 0,r3,0 - bge htab_pte_insert_ok /* Insertion successful */ - cmpdi 0,r3,-2 /* Critical failure */ - beq- htab_pte_insert_failure - - /* Now try secondary slot */ - - /* physical address r5 */ - rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT - sldi r5,r5,PAGE_SHIFT - - /* Calculate secondary group hash */ - andc r0,r27,r28 - rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */ - - /* Call ppc_md.hpte_insert */ - ld r6,STK_PARAM(R4)(r1) /* Retrieve new pp bits */ - mr r4,r29 /* Retrieve vpn */ - li r7,HPTE_V_SECONDARY /* !bolted, secondary */ - li r8,MMU_PAGE_4K /* page size */ - li r9,MMU_PAGE_4K /* actual page size */ - ld r10,STK_PARAM(R9)(r1) /* segment size */ -.globl htab_call_hpte_insert2 -htab_call_hpte_insert2: - bl . /* Patched by htab_finish_init() */ - cmpdi 0,r3,0 - bge+ htab_pte_insert_ok /* Insertion successful */ - cmpdi 0,r3,-2 /* Critical failure */ - beq- htab_pte_insert_failure - - /* Both are full, we need to evict something */ - mftb r0 - /* Pick a random group based on TB */ - andi. r0,r0,1 - mr r5,r28 - bne 2f - not r5,r5 -2: and r0,r5,r27 - rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */ - /* Call ppc_md.hpte_remove */ -.globl htab_call_hpte_remove -htab_call_hpte_remove: - bl . /* Patched by htab_finish_init() */ - - /* Try all again */ - b htab_insert_pte - -htab_bail_ok: - li r3,0 - b htab_bail - -htab_pte_insert_ok: - /* Insert slot number & secondary bit in PTE */ - rldimi r30,r3,12,63-15 - - /* Write out the PTE with a normal write - * (maybe add eieio may be good still ?) - */ -htab_write_out_pte: - ld r6,STK_PARAM(R6)(r1) - std r30,0(r6) - li r3, 0 -htab_bail: - ld r27,STK_REG(R27)(r1) - ld r28,STK_REG(R28)(r1) - ld r29,STK_REG(R29)(r1) - ld r30,STK_REG(R30)(r1) - ld r31,STK_REG(R31)(r1) - addi r1,r1,STACKFRAMESIZE - ld r0,16(r1) - mtlr r0 - blr - -htab_modify_pte: - /* Keep PP bits in r4 and slot idx from the PTE around in r3 */ - mr r4,r3 - rlwinm r3,r31,32-12,29,31 - - /* Secondary group ? if yes, get a inverted hash value */ - mr r5,r28 - andi. r0,r31,_PAGE_SECONDARY - beq 1f - not r5,r5 -1: - /* Calculate proper slot value for ppc_md.hpte_updatepp */ - and r0,r5,r27 - rldicr r0,r0,3,63-3 /* r0 = (hash & mask) << 3 */ - add r3,r0,r3 /* add slot idx */ - - /* Call ppc_md.hpte_updatepp */ - mr r5,r29 /* vpn */ - li r6,MMU_PAGE_4K /* base page size */ - li r7,MMU_PAGE_4K /* actual page size */ - ld r8,STK_PARAM(R9)(r1) /* segment size */ - ld r9,STK_PARAM(R8)(r1) /* get "flags" param */ -.globl htab_call_hpte_updatepp -htab_call_hpte_updatepp: - bl . /* Patched by htab_finish_init() */ - - /* if we failed because typically the HPTE wasn't really here - * we try an insertion. - */ - cmpdi 0,r3,-1 - beq- htab_insert_pte - - /* Clear the BUSY bit and Write out the PTE */ - li r0,_PAGE_BUSY - andc r30,r30,r0 - b htab_write_out_pte - -htab_wrong_access: - /* Bail out clearing reservation */ - stdcx. r31,0,r6 - li r3,1 - b htab_bail - -htab_pte_insert_failure: - /* Bail out restoring old PTE */ - ld r6,STK_PARAM(R6)(r1) - std r31,0(r6) - li r3,-1 - b htab_bail - - -#else /* CONFIG_PPC_64K_PAGES */ - - -/***************************************************************************** - * * - * 64K SW & 4K or 64K HW in a 4K segment pages implementation * - * * - *****************************************************************************/ - -/* _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, - * pte_t *ptep, unsigned long trap, unsigned local flags, - * int ssize, int subpg_prot) - */ - -/* - * For now, we do NOT implement Admixed pages - */ -_GLOBAL(__hash_page_4K) - mflr r0 - std r0,16(r1) - stdu r1,-STACKFRAMESIZE(r1) - /* Save all params that we need after a function call */ - std r6,STK_PARAM(R6)(r1) - std r8,STK_PARAM(R8)(r1) - std r9,STK_PARAM(R9)(r1) - - /* Save non-volatile registers. - * r31 will hold "old PTE" - * r30 is "new PTE" - * r29 is vpn - * r28 is a hash value - * r27 is hashtab mask (maybe dynamic patched instead ?) - * r26 is the hidx mask - * r25 is the index in combo page - */ - std r25,STK_REG(R25)(r1) - std r26,STK_REG(R26)(r1) - std r27,STK_REG(R27)(r1) - std r28,STK_REG(R28)(r1) - std r29,STK_REG(R29)(r1) - std r30,STK_REG(R30)(r1) - std r31,STK_REG(R31)(r1) - - /* Step 1: - * - * Check permissions, atomically mark the linux PTE busy - * and hashed. - */ -1: - ldarx r31,0,r6 - /* Check access rights (access & ~(pte_val(*ptep))) */ - andc. r0,r4,r31 - bne- htab_wrong_access - /* Check if PTE is busy */ - andi. r0,r31,_PAGE_BUSY - /* If so, just bail out and refault if needed. Someone else - * is changing this PTE anyway and might hash it. - */ - bne- htab_bail_ok - /* Prepare new PTE value (turn access RW into DIRTY, then - * add BUSY and ACCESSED) - */ - rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */ - or r30,r30,r31 - ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED - oris r30,r30,_PAGE_COMBO@h - /* Write the linux PTE atomically (setting busy) */ - stdcx. r30,0,r6 - bne- 1b - isync - - /* Step 2: - * - * Insert/Update the HPTE in the hash table. At this point, - * r4 (access) is re-useable, we use it for the new HPTE flags - */ - - /* Load the hidx index */ - rldicl r25,r3,64-12,60 - -BEGIN_FTR_SECTION - cmpdi r9,0 /* check segment size */ - bne 3f -END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) - /* Calc vpn and put it in r29 */ - sldi r29,r5,SID_SHIFT - VPN_SHIFT - /* - * clrldi r3,r3,64 - SID_SHIFT --> ea & 0xfffffff - * srdi r28,r3,VPN_SHIFT - */ - rldicl r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT - VPN_SHIFT) - or r29,r28,r29 - /* - * Calculate hash value for primary slot and store it in r28 - * r3 = va, r5 = vsid - * r0 = (va >> 12) & ((1ul << (28 - 12)) -1) - */ - rldicl r0,r3,64-12,48 - xor r28,r5,r0 /* hash */ - b 4f - -3: /* Calc vpn and put it in r29 */ - sldi r29,r5,SID_SHIFT_1T - VPN_SHIFT - /* - * clrldi r3,r3,64 - SID_SHIFT_1T --> ea & 0xffffffffff - * srdi r28,r3,VPN_SHIFT - */ - rldicl r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT_1T - VPN_SHIFT) - or r29,r28,r29 - - /* - * Calculate hash value for primary slot and - * store it in r28 for 1T segment - * r3 = va, r5 = vsid - */ - sldi r28,r5,25 /* vsid << 25 */ - /* r0 = (va >> 12) & ((1ul << (40 - 12)) -1) */ - rldicl r0,r3,64-12,36 - xor r28,r28,r5 /* vsid ^ ( vsid << 25) */ - xor r28,r28,r0 /* hash */ - - /* Convert linux PTE bits into HW equivalents */ -4: -#ifdef CONFIG_PPC_SUBPAGE_PROT - andc r10,r30,r10 - andi. r3,r10,0x1fe /* Get basic set of flags */ - rlwinm r0,r10,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */ -#else - andi. r3,r30,0x1fe /* Get basic set of flags */ - rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */ -#endif - xori r3,r3,HPTE_R_N /* _PAGE_EXEC -> NOEXEC */ - rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */ - and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/ - andc r0,r3,r0 /* r0 = pte & ~r0 */ - rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */ - /* - * Always add "C" bit for perf. Memory coherence is always enabled - */ - ori r3,r3,HPTE_R_C | HPTE_R_M - - /* We eventually do the icache sync here (maybe inline that - * code rather than call a C function...) - */ -BEGIN_FTR_SECTION - mr r4,r30 - mr r5,r7 - bl hash_page_do_lazy_icache -END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE) - - /* At this point, r3 contains new PP bits, save them in - * place of "access" in the param area (sic) - */ - std r3,STK_PARAM(R4)(r1) - - /* Get htab_hash_mask */ - ld r4,htab_hash_mask@got(2) - ld r27,0(r4) /* htab_hash_mask -> r27 */ - - /* Check if we may already be in the hashtable, in this case, we - * go to out-of-line code to try to modify the HPTE. We look for - * the bit at (1 >> (index + 32)) - */ - rldicl. r0,r31,64-12,48 - li r26,0 /* Default hidx */ - beq htab_insert_pte - - /* - * Check if the pte was already inserted into the hash table - * as a 64k HW page, and invalidate the 64k HPTE if so. - */ - andis. r0,r31,_PAGE_COMBO@h - beq htab_inval_old_hpte - - ld r6,STK_PARAM(R6)(r1) - ori r26,r6,PTE_PAGE_HIDX_OFFSET /* Load the hidx mask. */ - ld r26,0(r26) - addi r5,r25,36 /* Check actual HPTE_SUB bit, this */ - rldcr. r0,r31,r5,0 /* must match pgtable.h definition */ - bne htab_modify_pte - -htab_insert_pte: - /* real page number in r5, PTE RPN value + index */ - andis. r0,r31,_PAGE_4K_PFN@h - srdi r5,r31,PTE_RPN_SHIFT - bne- htab_special_pfn - sldi r5,r5,PAGE_FACTOR - add r5,r5,r25 -htab_special_pfn: - sldi r5,r5,HW_PAGE_SHIFT - - /* Calculate primary group hash */ - and r0,r28,r27 - rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */ - - /* Call ppc_md.hpte_insert */ - ld r6,STK_PARAM(R4)(r1) /* Retrieve new pp bits */ - mr r4,r29 /* Retrieve vpn */ - li r7,0 /* !bolted, !secondary */ - li r8,MMU_PAGE_4K /* page size */ - li r9,MMU_PAGE_4K /* actual page size */ - ld r10,STK_PARAM(R9)(r1) /* segment size */ -.globl htab_call_hpte_insert1 -htab_call_hpte_insert1: - bl . /* patched by htab_finish_init() */ - cmpdi 0,r3,0 - bge htab_pte_insert_ok /* Insertion successful */ - cmpdi 0,r3,-2 /* Critical failure */ - beq- htab_pte_insert_failure - - /* Now try secondary slot */ - - /* real page number in r5, PTE RPN value + index */ - andis. r0,r31,_PAGE_4K_PFN@h - srdi r5,r31,PTE_RPN_SHIFT - bne- 3f - sldi r5,r5,PAGE_FACTOR - add r5,r5,r25 -3: sldi r5,r5,HW_PAGE_SHIFT - - /* Calculate secondary group hash */ - andc r0,r27,r28 - rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */ - - /* Call ppc_md.hpte_insert */ - ld r6,STK_PARAM(R4)(r1) /* Retrieve new pp bits */ - mr r4,r29 /* Retrieve vpn */ - li r7,HPTE_V_SECONDARY /* !bolted, secondary */ - li r8,MMU_PAGE_4K /* page size */ - li r9,MMU_PAGE_4K /* actual page size */ - ld r10,STK_PARAM(R9)(r1) /* segment size */ -.globl htab_call_hpte_insert2 -htab_call_hpte_insert2: - bl . /* patched by htab_finish_init() */ - cmpdi 0,r3,0 - bge+ htab_pte_insert_ok /* Insertion successful */ - cmpdi 0,r3,-2 /* Critical failure */ - beq- htab_pte_insert_failure - - /* Both are full, we need to evict something */ - mftb r0 - /* Pick a random group based on TB */ - andi. r0,r0,1 - mr r5,r28 - bne 2f - not r5,r5 -2: and r0,r5,r27 - rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */ - /* Call ppc_md.hpte_remove */ -.globl htab_call_hpte_remove -htab_call_hpte_remove: - bl . /* patched by htab_finish_init() */ - - /* Try all again */ - b htab_insert_pte - - /* - * Call out to C code to invalidate an 64k HW HPTE that is - * useless now that the segment has been switched to 4k pages. - */ -htab_inval_old_hpte: - mr r3,r29 /* vpn */ - mr r4,r31 /* PTE.pte */ - li r5,0 /* PTE.hidx */ - li r6,MMU_PAGE_64K /* psize */ - ld r7,STK_PARAM(R9)(r1) /* ssize */ - ld r8,STK_PARAM(R8)(r1) /* flags */ - bl flush_hash_page - /* Clear out _PAGE_HPTE_SUB bits in the new linux PTE */ - lis r0,_PAGE_HPTE_SUB@h - ori r0,r0,_PAGE_HPTE_SUB@l - andc r30,r30,r0 - b htab_insert_pte - -htab_bail_ok: - li r3,0 - b htab_bail - -htab_pte_insert_ok: - /* Insert slot number & secondary bit in PTE second half, - * clear _PAGE_BUSY and set approriate HPTE slot bit - */ - ld r6,STK_PARAM(R6)(r1) - li r0,_PAGE_BUSY - andc r30,r30,r0 - /* HPTE SUB bit */ - li r0,1 - subfic r5,r25,27 /* Must match bit position in */ - sld r0,r0,r5 /* pgtable.h */ - or r30,r30,r0 - /* hindx */ - sldi r5,r25,2 - sld r3,r3,r5 - li r4,0xf - sld r4,r4,r5 - andc r26,r26,r4 - or r26,r26,r3 - ori r5,r6,PTE_PAGE_HIDX_OFFSET - std r26,0(r5) - lwsync - std r30,0(r6) - li r3, 0 -htab_bail: - ld r25,STK_REG(R25)(r1) - ld r26,STK_REG(R26)(r1) - ld r27,STK_REG(R27)(r1) - ld r28,STK_REG(R28)(r1) - ld r29,STK_REG(R29)(r1) - ld r30,STK_REG(R30)(r1) - ld r31,STK_REG(R31)(r1) - addi r1,r1,STACKFRAMESIZE - ld r0,16(r1) - mtlr r0 - blr - -htab_modify_pte: - /* Keep PP bits in r4 and slot idx from the PTE around in r3 */ - mr r4,r3 - sldi r5,r25,2 - srd r3,r26,r5 - - /* Secondary group ? if yes, get a inverted hash value */ - mr r5,r28 - andi. r0,r3,0x8 /* page secondary ? */ - beq 1f - not r5,r5 -1: andi. r3,r3,0x7 /* extract idx alone */ - - /* Calculate proper slot value for ppc_md.hpte_updatepp */ - and r0,r5,r27 - rldicr r0,r0,3,63-3 /* r0 = (hash & mask) << 3 */ - add r3,r0,r3 /* add slot idx */ - - /* Call ppc_md.hpte_updatepp */ - mr r5,r29 /* vpn */ - li r6,MMU_PAGE_4K /* base page size */ - li r7,MMU_PAGE_4K /* actual page size */ - ld r8,STK_PARAM(R9)(r1) /* segment size */ - ld r9,STK_PARAM(R8)(r1) /* get "flags" param */ -.globl htab_call_hpte_updatepp -htab_call_hpte_updatepp: - bl . /* patched by htab_finish_init() */ - - /* if we failed because typically the HPTE wasn't really here - * we try an insertion. - */ - cmpdi 0,r3,-1 - beq- htab_insert_pte - - /* Clear the BUSY bit and Write out the PTE */ - li r0,_PAGE_BUSY - andc r30,r30,r0 - ld r6,STK_PARAM(R6)(r1) - std r30,0(r6) - li r3,0 - b htab_bail - -htab_wrong_access: - /* Bail out clearing reservation */ - stdcx. r31,0,r6 - li r3,1 - b htab_bail - -htab_pte_insert_failure: - /* Bail out restoring old PTE */ - ld r6,STK_PARAM(R6)(r1) - std r31,0(r6) - li r3,-1 - b htab_bail - -#endif /* CONFIG_PPC_64K_PAGES */ - -#ifdef CONFIG_PPC_64K_PAGES - -/***************************************************************************** - * * - * 64K SW & 64K HW in a 64K segment pages implementation * - * * - *****************************************************************************/ - -_GLOBAL(__hash_page_64K) - mflr r0 - std r0,16(r1) - stdu r1,-STACKFRAMESIZE(r1) - /* Save all params that we need after a function call */ - std r6,STK_PARAM(R6)(r1) - std r8,STK_PARAM(R8)(r1) - std r9,STK_PARAM(R9)(r1) - - /* Save non-volatile registers. - * r31 will hold "old PTE" - * r30 is "new PTE" - * r29 is vpn - * r28 is a hash value - * r27 is hashtab mask (maybe dynamic patched instead ?) - */ - std r27,STK_REG(R27)(r1) - std r28,STK_REG(R28)(r1) - std r29,STK_REG(R29)(r1) - std r30,STK_REG(R30)(r1) - std r31,STK_REG(R31)(r1) - - /* Step 1: - * - * Check permissions, atomically mark the linux PTE busy - * and hashed. - */ -1: - ldarx r31,0,r6 - /* Check access rights (access & ~(pte_val(*ptep))) */ - andc. r0,r4,r31 - bne- ht64_wrong_access - /* Check if PTE is busy */ - andi. r0,r31,_PAGE_BUSY - /* If so, just bail out and refault if needed. Someone else - * is changing this PTE anyway and might hash it. - */ - bne- ht64_bail_ok -BEGIN_FTR_SECTION - /* Check if PTE has the cache-inhibit bit set */ - andi. r0,r31,_PAGE_NO_CACHE - /* If so, bail out and refault as a 4k page */ - bne- ht64_bail_ok -END_MMU_FTR_SECTION_IFCLR(MMU_FTR_CI_LARGE_PAGE) - /* Prepare new PTE value (turn access RW into DIRTY, then - * add BUSY and ACCESSED) - */ - rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */ - or r30,r30,r31 - ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED - /* Write the linux PTE atomically (setting busy) */ - stdcx. r30,0,r6 - bne- 1b - isync - - /* Step 2: - * - * Insert/Update the HPTE in the hash table. At this point, - * r4 (access) is re-useable, we use it for the new HPTE flags - */ - -BEGIN_FTR_SECTION - cmpdi r9,0 /* check segment size */ - bne 3f -END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) - /* Calc vpn and put it in r29 */ - sldi r29,r5,SID_SHIFT - VPN_SHIFT - rldicl r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT - VPN_SHIFT) - or r29,r28,r29 - - /* Calculate hash value for primary slot and store it in r28 - * r3 = va, r5 = vsid - * r0 = (va >> 16) & ((1ul << (28 - 16)) -1) - */ - rldicl r0,r3,64-16,52 - xor r28,r5,r0 /* hash */ - b 4f - -3: /* Calc vpn and put it in r29 */ - sldi r29,r5,SID_SHIFT_1T - VPN_SHIFT - rldicl r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT_1T - VPN_SHIFT) - or r29,r28,r29 - /* - * calculate hash value for primary slot and - * store it in r28 for 1T segment - * r3 = va, r5 = vsid - */ - sldi r28,r5,25 /* vsid << 25 */ - /* r0 = (va >> 16) & ((1ul << (40 - 16)) -1) */ - rldicl r0,r3,64-16,40 - xor r28,r28,r5 /* vsid ^ ( vsid << 25) */ - xor r28,r28,r0 /* hash */ - - /* Convert linux PTE bits into HW equivalents */ -4: andi. r3,r30,0x1fe /* Get basic set of flags */ - xori r3,r3,HPTE_R_N /* _PAGE_EXEC -> NOEXEC */ - rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */ - rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */ - and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/ - andc r0,r30,r0 /* r0 = pte & ~r0 */ - rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */ - /* - * Always add "C" bit for perf. Memory coherence is always enabled - */ - ori r3,r3,HPTE_R_C | HPTE_R_M - - /* We eventually do the icache sync here (maybe inline that - * code rather than call a C function...) - */ -BEGIN_FTR_SECTION - mr r4,r30 - mr r5,r7 - bl hash_page_do_lazy_icache -END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE) - - /* At this point, r3 contains new PP bits, save them in - * place of "access" in the param area (sic) - */ - std r3,STK_PARAM(R4)(r1) - - /* Get htab_hash_mask */ - ld r4,htab_hash_mask@got(2) - ld r27,0(r4) /* htab_hash_mask -> r27 */ - - /* Check if we may already be in the hashtable, in this case, we - * go to out-of-line code to try to modify the HPTE - */ - rldicl. r0,r31,64-12,48 - bne ht64_modify_pte - -ht64_insert_pte: - /* Clear hpte bits in new pte (we also clear BUSY btw) and - * add _PAGE_HPTE_SUB0 - */ - lis r0,_PAGE_HPTEFLAGS@h - ori r0,r0,_PAGE_HPTEFLAGS@l - andc r30,r30,r0 -#ifdef CONFIG_PPC_64K_PAGES - oris r30,r30,_PAGE_HPTE_SUB0@h -#else - ori r30,r30,_PAGE_HASHPTE -#endif - /* Phyical address in r5 */ - rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT - sldi r5,r5,PAGE_SHIFT - - /* Calculate primary group hash */ - and r0,r28,r27 - rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */ - - /* Call ppc_md.hpte_insert */ - ld r6,STK_PARAM(R4)(r1) /* Retrieve new pp bits */ - mr r4,r29 /* Retrieve vpn */ - li r7,0 /* !bolted, !secondary */ - li r8,MMU_PAGE_64K - li r9,MMU_PAGE_64K /* actual page size */ - ld r10,STK_PARAM(R9)(r1) /* segment size */ -.globl ht64_call_hpte_insert1 -ht64_call_hpte_insert1: - bl . /* patched by htab_finish_init() */ - cmpdi 0,r3,0 - bge ht64_pte_insert_ok /* Insertion successful */ - cmpdi 0,r3,-2 /* Critical failure */ - beq- ht64_pte_insert_failure - - /* Now try secondary slot */ - - /* Phyical address in r5 */ - rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT - sldi r5,r5,PAGE_SHIFT - - /* Calculate secondary group hash */ - andc r0,r27,r28 - rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */ - - /* Call ppc_md.hpte_insert */ - ld r6,STK_PARAM(R4)(r1) /* Retrieve new pp bits */ - mr r4,r29 /* Retrieve vpn */ - li r7,HPTE_V_SECONDARY /* !bolted, secondary */ - li r8,MMU_PAGE_64K - li r9,MMU_PAGE_64K /* actual page size */ - ld r10,STK_PARAM(R9)(r1) /* segment size */ -.globl ht64_call_hpte_insert2 -ht64_call_hpte_insert2: - bl . /* patched by htab_finish_init() */ - cmpdi 0,r3,0 - bge+ ht64_pte_insert_ok /* Insertion successful */ - cmpdi 0,r3,-2 /* Critical failure */ - beq- ht64_pte_insert_failure - - /* Both are full, we need to evict something */ - mftb r0 - /* Pick a random group based on TB */ - andi. r0,r0,1 - mr r5,r28 - bne 2f - not r5,r5 -2: and r0,r5,r27 - rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */ - /* Call ppc_md.hpte_remove */ -.globl ht64_call_hpte_remove -ht64_call_hpte_remove: - bl . /* patched by htab_finish_init() */ - - /* Try all again */ - b ht64_insert_pte - -ht64_bail_ok: - li r3,0 - b ht64_bail - -ht64_pte_insert_ok: - /* Insert slot number & secondary bit in PTE */ - rldimi r30,r3,12,63-15 - - /* Write out the PTE with a normal write - * (maybe add eieio may be good still ?) - */ -ht64_write_out_pte: - ld r6,STK_PARAM(R6)(r1) - std r30,0(r6) - li r3, 0 -ht64_bail: - ld r27,STK_REG(R27)(r1) - ld r28,STK_REG(R28)(r1) - ld r29,STK_REG(R29)(r1) - ld r30,STK_REG(R30)(r1) - ld r31,STK_REG(R31)(r1) - addi r1,r1,STACKFRAMESIZE - ld r0,16(r1) - mtlr r0 - blr - -ht64_modify_pte: - /* Keep PP bits in r4 and slot idx from the PTE around in r3 */ - mr r4,r3 - rlwinm r3,r31,32-12,29,31 - - /* Secondary group ? if yes, get a inverted hash value */ - mr r5,r28 - andi. r0,r31,_PAGE_F_SECOND - beq 1f - not r5,r5 -1: - /* Calculate proper slot value for ppc_md.hpte_updatepp */ - and r0,r5,r27 - rldicr r0,r0,3,63-3 /* r0 = (hash & mask) << 3 */ - add r3,r0,r3 /* add slot idx */ - - /* Call ppc_md.hpte_updatepp */ - mr r5,r29 /* vpn */ - li r6,MMU_PAGE_64K /* base page size */ - li r7,MMU_PAGE_64K /* actual page size */ - ld r8,STK_PARAM(R9)(r1) /* segment size */ - ld r9,STK_PARAM(R8)(r1) /* get "flags" param */ -.globl ht64_call_hpte_updatepp -ht64_call_hpte_updatepp: - bl . /* patched by htab_finish_init() */ - - /* if we failed because typically the HPTE wasn't really here - * we try an insertion. - */ - cmpdi 0,r3,-1 - beq- ht64_insert_pte - - /* Clear the BUSY bit and Write out the PTE */ - li r0,_PAGE_BUSY - andc r30,r30,r0 - b ht64_write_out_pte - -ht64_wrong_access: - /* Bail out clearing reservation */ - stdcx. r31,0,r6 - li r3,1 - b ht64_bail - -ht64_pte_insert_failure: - /* Bail out restoring old PTE */ - ld r6,STK_PARAM(R6)(r1) - std r31,0(r6) - li r3,-1 - b ht64_bail - - -#endif /* CONFIG_PPC_64K_PAGES */ - - -/***************************************************************************** - * * - * Huge pages implementation is in hugetlbpage.c * - * * - *****************************************************************************/ diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index c8822af10a58..8eaac81347fd 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -429,6 +429,7 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, local_irq_restore(flags); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE static void native_hugepage_invalidate(unsigned long vsid, unsigned long addr, unsigned char *hpte_slot_array, @@ -482,6 +483,15 @@ static void native_hugepage_invalidate(unsigned long vsid, } local_irq_restore(flags); } +#else +static void native_hugepage_invalidate(unsigned long vsid, + unsigned long addr, + unsigned char *hpte_slot_array, + int psize, int ssize, int local) +{ + WARN(1, "%s called without THP support\n", __func__); +} +#endif static inline int __hpte_actual_psize(unsigned int lp, int psize) { diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 7f9616f7c479..ba59d5977f34 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -159,24 +159,41 @@ static struct mmu_psize_def mmu_psize_defaults_gp[] = { }, }; -static unsigned long htab_convert_pte_flags(unsigned long pteflags) +unsigned long htab_convert_pte_flags(unsigned long pteflags) { - unsigned long rflags = pteflags & 0x1fa; + unsigned long rflags = 0; /* _PAGE_EXEC -> NOEXEC */ if ((pteflags & _PAGE_EXEC) == 0) rflags |= HPTE_R_N; - - /* PP bits. PAGE_USER is already PP bit 0x2, so we only - * need to add in 0x1 if it's a read-only user page + /* + * PP bits: + * Linux use slb key 0 for kernel and 1 for user. + * kernel areas are mapped by PP bits 00 + * and and there is no kernel RO (_PAGE_KERNEL_RO). + * User area mapped by 0x2 and read only use by + * 0x3. */ - if ((pteflags & _PAGE_USER) && !((pteflags & _PAGE_RW) && - (pteflags & _PAGE_DIRTY))) - rflags |= 1; + if (pteflags & _PAGE_USER) { + rflags |= 0x2; + if (!((pteflags & _PAGE_RW) && (pteflags & _PAGE_DIRTY))) + rflags |= 0x1; + } /* * Always add "C" bit for perf. Memory coherence is always enabled */ - return rflags | HPTE_R_C | HPTE_R_M; + rflags |= HPTE_R_C | HPTE_R_M; + /* + * Add in WIG bits + */ + if (pteflags & _PAGE_WRITETHRU) + rflags |= HPTE_R_W; + if (pteflags & _PAGE_NO_CACHE) + rflags |= HPTE_R_I; + if (pteflags & _PAGE_GUARDED) + rflags |= HPTE_R_G; + + return rflags; } int htab_bolt_mapping(unsigned long vstart, unsigned long vend, @@ -629,46 +646,6 @@ int remove_section_mapping(unsigned long start, unsigned long end) } #endif /* CONFIG_MEMORY_HOTPLUG */ -extern u32 htab_call_hpte_insert1[]; -extern u32 htab_call_hpte_insert2[]; -extern u32 htab_call_hpte_remove[]; -extern u32 htab_call_hpte_updatepp[]; -extern u32 ht64_call_hpte_insert1[]; -extern u32 ht64_call_hpte_insert2[]; -extern u32 ht64_call_hpte_remove[]; -extern u32 ht64_call_hpte_updatepp[]; - -static void __init htab_finish_init(void) -{ -#ifdef CONFIG_PPC_64K_PAGES - patch_branch(ht64_call_hpte_insert1, - ppc_function_entry(ppc_md.hpte_insert), - BRANCH_SET_LINK); - patch_branch(ht64_call_hpte_insert2, - ppc_function_entry(ppc_md.hpte_insert), - BRANCH_SET_LINK); - patch_branch(ht64_call_hpte_remove, - ppc_function_entry(ppc_md.hpte_remove), - BRANCH_SET_LINK); - patch_branch(ht64_call_hpte_updatepp, - ppc_function_entry(ppc_md.hpte_updatepp), - BRANCH_SET_LINK); -#endif /* CONFIG_PPC_64K_PAGES */ - - patch_branch(htab_call_hpte_insert1, - ppc_function_entry(ppc_md.hpte_insert), - BRANCH_SET_LINK); - patch_branch(htab_call_hpte_insert2, - ppc_function_entry(ppc_md.hpte_insert), - BRANCH_SET_LINK); - patch_branch(htab_call_hpte_remove, - ppc_function_entry(ppc_md.hpte_remove), - BRANCH_SET_LINK); - patch_branch(htab_call_hpte_updatepp, - ppc_function_entry(ppc_md.hpte_updatepp), - BRANCH_SET_LINK); -} - static void __init htab_initialize(void) { unsigned long table; @@ -815,7 +792,6 @@ static void __init htab_initialize(void) mmu_linear_psize, mmu_kernel_ssize)); } - htab_finish_init(); DBG(" <- htab_initialize()\n"); } @@ -877,11 +853,11 @@ static unsigned int get_paca_psize(unsigned long addr) unsigned long index, mask_index; if (addr < SLICE_LOW_TOP) { - lpsizes = get_paca()->context.low_slices_psize; + lpsizes = get_paca()->mm_ctx_low_slices_psize; index = GET_LOW_SLICE_INDEX(addr); return (lpsizes >> (index * 4)) & 0xF; } - hpsizes = get_paca()->context.high_slices_psize; + hpsizes = get_paca()->mm_ctx_high_slices_psize; index = GET_HIGH_SLICE_INDEX(addr); mask_index = index & 0x1; return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xF; @@ -890,7 +866,7 @@ static unsigned int get_paca_psize(unsigned long addr) #else unsigned int get_paca_psize(unsigned long addr) { - return get_paca()->context.user_psize; + return get_paca()->mm_ctx_user_psize; } #endif @@ -906,7 +882,8 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr) slice_set_range_psize(mm, addr, 1, MMU_PAGE_4K); copro_flush_all_slbs(mm); if ((get_paca_psize(addr) != MMU_PAGE_4K) && (current->mm == mm)) { - get_paca()->context = mm->context; + + copy_mm_to_paca(&mm->context); slb_flush_and_rebolt(); } } @@ -973,7 +950,7 @@ static void check_paca_psize(unsigned long ea, struct mm_struct *mm, { if (user_region) { if (psize != get_paca_psize(ea)) { - get_paca()->context = mm->context; + copy_mm_to_paca(&mm->context); slb_flush_and_rebolt(); } } else if (get_paca()->vmalloc_sllp != @@ -1148,9 +1125,10 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, } } +#endif /* CONFIG_PPC_64K_PAGES */ + if (current->mm == mm) check_paca_psize(ea, mm, psize, user_region); -#endif /* CONFIG_PPC_64K_PAGES */ #ifdef CONFIG_PPC_64K_PAGES if (psize == MMU_PAGE_64K) @@ -1203,6 +1181,35 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap, } EXPORT_SYMBOL_GPL(hash_page); +int __hash_page(unsigned long ea, unsigned long msr, unsigned long trap, + unsigned long dsisr) +{ + unsigned long access = _PAGE_PRESENT; + unsigned long flags = 0; + struct mm_struct *mm = current->mm; + + if (REGION_ID(ea) == VMALLOC_REGION_ID) + mm = &init_mm; + + if (dsisr & DSISR_NOHPTE) + flags |= HPTE_NOHPTE_UPDATE; + + if (dsisr & DSISR_ISSTORE) + access |= _PAGE_RW; + /* + * We need to set the _PAGE_USER bit if MSR_PR is set or if we are + * accessing a userspace segment (even from the kernel). We assume + * kernel addresses always have the high bit set. + */ + if ((msr & MSR_PR) || (REGION_ID(ea) == USER_REGION_ID)) + access |= _PAGE_USER; + + if (trap == 0x400) + access |= _PAGE_EXEC; + + return hash_page_mm(mm, ea, access, trap, flags); +} + void hash_preload(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap) { diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index 4d87122cf6a7..49b152b0f926 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -39,9 +39,6 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, /* If PMD busy, retry the access */ if (unlikely(old_pmd & _PAGE_BUSY)) return 0; - /* If PMD is trans splitting retry the access */ - if (unlikely(old_pmd & _PAGE_SPLITTING)) - return 0; /* If PMD permissions don't match, take page fault */ if (unlikely(access & ~old_pmd)) return 1; @@ -54,18 +51,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, new_pmd |= _PAGE_DIRTY; } while (old_pmd != __cmpxchg_u64((unsigned long *)pmdp, old_pmd, new_pmd)); - /* - * PP bits. _PAGE_USER is already PP bit 0x2, so we only - * need to add in 0x1 if it's a read-only user page - */ - rflags = new_pmd & _PAGE_USER; - if ((new_pmd & _PAGE_USER) && !((new_pmd & _PAGE_RW) && - (new_pmd & _PAGE_DIRTY))) - rflags |= 0x1; - /* - * _PAGE_EXEC -> HW_NO_EXEC since it's inverted - */ - rflags |= ((new_pmd & _PAGE_EXEC) ? 0 : HPTE_R_N); + rflags = htab_convert_pte_flags(new_pmd); #if 0 if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) { @@ -82,7 +68,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, */ shift = mmu_psize_defs[psize].shift; index = (ea & ~HPAGE_PMD_MASK) >> shift; - BUG_ON(index >= 4096); + BUG_ON(index >= PTE_FRAG_SIZE); vpn = hpt_vpn(ea, vsid, ssize); hpte_slot_array = get_hpte_slot_array(pmdp); @@ -131,13 +117,6 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, pa = pmd_pfn(__pmd(old_pmd)) << PAGE_SHIFT; new_pmd |= _PAGE_HASHPTE; - /* Add in WIMG bits */ - rflags |= (new_pmd & (_PAGE_WRITETHRU | _PAGE_NO_CACHE | - _PAGE_GUARDED)); - /* - * enable the memory coherence always - */ - rflags |= HPTE_R_M; repeat: hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/hugetlbpage-book3e.c index ba47aaf33a4b..7e6d0880813f 100644 --- a/arch/powerpc/mm/hugetlbpage-book3e.c +++ b/arch/powerpc/mm/hugetlbpage-book3e.c @@ -51,6 +51,48 @@ static inline int mmu_get_tsize(int psize) return mmu_psize_defs[psize].enc; } +#if defined(CONFIG_PPC_FSL_BOOK3E) && defined(CONFIG_PPC64) +#include <asm/paca.h> + +static inline void book3e_tlb_lock(void) +{ + struct paca_struct *paca = get_paca(); + unsigned long tmp; + int token = smp_processor_id() + 1; + + asm volatile("1: lbarx %0, 0, %1;" + "cmpwi %0, 0;" + "bne 2f;" + "stbcx. %2, 0, %1;" + "bne 1b;" + "b 3f;" + "2: lbzx %0, 0, %1;" + "cmpwi %0, 0;" + "bne 2b;" + "b 1b;" + "3:" + : "=&r" (tmp) + : "r" (&paca->tcd_ptr->lock), "r" (token) + : "memory"); +} + +static inline void book3e_tlb_unlock(void) +{ + struct paca_struct *paca = get_paca(); + + isync(); + paca->tcd_ptr->lock = 0; +} +#else +static inline void book3e_tlb_lock(void) +{ +} + +static inline void book3e_tlb_unlock(void) +{ +} +#endif + static inline int book3e_tlb_exists(unsigned long ea, unsigned long pid) { int found = 0; @@ -109,7 +151,10 @@ void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea, */ local_irq_save(flags); + book3e_tlb_lock(); + if (unlikely(book3e_tlb_exists(ea, mm->context.id))) { + book3e_tlb_unlock(); local_irq_restore(flags); return; } @@ -141,6 +186,7 @@ void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea, asm volatile ("tlbwe"); + book3e_tlb_unlock(); local_irq_restore(flags); } diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c index d94b1af53a93..e2138c7ae70f 100644 --- a/arch/powerpc/mm/hugetlbpage-hash64.c +++ b/arch/powerpc/mm/hugetlbpage-hash64.c @@ -59,10 +59,8 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, new_pte |= _PAGE_DIRTY; } while(old_pte != __cmpxchg_u64((unsigned long *)ptep, old_pte, new_pte)); + rflags = htab_convert_pte_flags(new_pte); - rflags = 0x2 | (!(new_pte & _PAGE_RW)); - /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ - rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N); sz = ((1UL) << shift); if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) /* No CPU has hugepages but lacks no execute, so we @@ -91,18 +89,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT; /* clear HPTE slot informations in new PTE */ -#ifdef CONFIG_PPC_64K_PAGES - new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HPTE_SUB0; -#else new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; -#endif - /* Add in WIMG bits */ - rflags |= (new_pte & (_PAGE_WRITETHRU | _PAGE_NO_CACHE | - _PAGE_COHERENT | _PAGE_GUARDED)); - /* - * enable the memory coherence always - */ - rflags |= HPTE_R_M; slot = hpte_insert_repeating(hash, vpn, pa, rflags, 0, mmu_psize, ssize); @@ -127,3 +114,21 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, *ptep = __pte(new_pte & ~_PAGE_BUSY); return 0; } + +#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_DEBUG_VM) +/* + * This enables us to catch the wrong page directory format + * Moved here so that we can use WARN() in the call. + */ +int hugepd_ok(hugepd_t hpd) +{ + bool is_hugepd; + + /* + * We should not find this format in page directory, warn otherwise. + */ + is_hugepd = (((hpd.pd & 0x3) == 0x0) && ((hpd.pd & HUGEPD_SHIFT_MASK) != 0)); + WARN(is_hugepd, "Found wrong page directory format\n"); + return 0; +} +#endif diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 9833fee493ec..744e24bcb85c 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -53,78 +53,6 @@ static unsigned nr_gpages; #define hugepd_none(hpd) ((hpd).pd == 0) -#ifdef CONFIG_PPC_BOOK3S_64 -/* - * At this point we do the placement change only for BOOK3S 64. This would - * possibly work on other subarchs. - */ - -/* - * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have - * 16GB hugepage pte in PGD and 16MB hugepage pte at PMD; - * - * Defined in such a way that we can optimize away code block at build time - * if CONFIG_HUGETLB_PAGE=n. - */ -int pmd_huge(pmd_t pmd) -{ - /* - * leaf pte for huge page, bottom two bits != 00 - */ - return ((pmd_val(pmd) & 0x3) != 0x0); -} - -int pud_huge(pud_t pud) -{ - /* - * leaf pte for huge page, bottom two bits != 00 - */ - return ((pud_val(pud) & 0x3) != 0x0); -} - -int pgd_huge(pgd_t pgd) -{ - /* - * leaf pte for huge page, bottom two bits != 00 - */ - return ((pgd_val(pgd) & 0x3) != 0x0); -} - -#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_DEBUG_VM) -/* - * This enables us to catch the wrong page directory format - * Moved here so that we can use WARN() in the call. - */ -int hugepd_ok(hugepd_t hpd) -{ - bool is_hugepd; - - /* - * We should not find this format in page directory, warn otherwise. - */ - is_hugepd = (((hpd.pd & 0x3) == 0x0) && ((hpd.pd & HUGEPD_SHIFT_MASK) != 0)); - WARN(is_hugepd, "Found wrong page directory format\n"); - return 0; -} -#endif - -#else -int pmd_huge(pmd_t pmd) -{ - return 0; -} - -int pud_huge(pud_t pud) -{ - return 0; -} - -int pgd_huge(pgd_t pgd) -{ - return 0; -} -#endif - pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) { /* Only called for hugetlbfs pages, hence can ignore THP */ @@ -966,8 +894,8 @@ void flush_dcache_icache_hugepage(struct page *page) * We have 4 cases for pgds and pmds: * (1) invalid (all zeroes) * (2) pointer to next table, as normal; bottom 6 bits == 0 - * (3) leaf pte for huge page, bottom two bits != 00 - * (4) hugepd pointer, bottom two bits == 00, next 4 bits indicate size of table + * (3) leaf pte for huge page _PAGE_PTE set + * (4) hugepd pointer, _PAGE_PTE = 0 and bits [2..6] indicate size of table * * So long as we atomically load page table pointers we are safe against teardown, * we can follow the address down to the the page and take a ref on it. @@ -1030,10 +958,6 @@ pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, /* * A hugepage collapse is captured by pmd_none, because * it mark the pmd none and do a hpte invalidate. - * - * We don't worry about pmd_trans_splitting here, The - * caller if it needs to handle the splitting case - * should check for that. */ if (pmd_none(pmd)) return NULL; @@ -1071,7 +995,7 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, { unsigned long mask; unsigned long pte_end; - struct page *head, *page, *tail; + struct page *head, *page; pte_t pte; int refs; @@ -1094,7 +1018,6 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, head = pte_page(pte); page = head + ((addr & (sz-1)) >> PAGE_SHIFT); - tail = page; do { VM_BUG_ON(compound_head(page) != head); pages[*nr] = page; @@ -1116,15 +1039,5 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, return 0; } - /* - * Any tail page need their mapcount reference taken before we - * return. - */ - while (refs--) { - if (PageTail(tail)) - get_huge_page_tail(tail); - tail++; - } - return 1; } diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index d747dd7bc90b..379a6a90644b 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -87,11 +87,7 @@ static void pgd_ctor(void *addr) static void pmd_ctor(void *addr) { -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - memset(addr, 0, PMD_TABLE_SIZE * 2); -#else memset(addr, 0, PMD_TABLE_SIZE); -#endif } struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE]; diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 83dfcb55ffef..83dfd7925c72 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -179,6 +179,10 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, */ VM_WARN_ON((pte_val(*ptep) & (_PAGE_PRESENT | _PAGE_USER)) == (_PAGE_PRESENT | _PAGE_USER)); + /* + * Add the pte bit when tryint set a pte + */ + pte = __pte(pte_val(pte) | _PAGE_PTE); /* Note: mm->context.id might not yet have been assigned as * this context might not have been activated yet when this diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index e92cb2146b18..3124a20d0fab 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -359,7 +359,7 @@ struct page *pud_page(pud_t pud) struct page *pmd_page(pmd_t pmd) { if (pmd_trans_huge(pmd) || pmd_huge(pmd)) - return pfn_to_page(pmd_pfn(pmd)); + return pte_page(pmd_pte(pmd)); return virt_to_page(pmd_page_vaddr(pmd)); } @@ -604,55 +604,6 @@ int pmdp_clear_flush_young(struct vm_area_struct *vma, } /* - * We mark the pmd splitting and invalidate all the hpte - * entries for this hugepage. - */ -void pmdp_splitting_flush(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp) -{ - unsigned long old, tmp; - - VM_BUG_ON(address & ~HPAGE_PMD_MASK); - -#ifdef CONFIG_DEBUG_VM - WARN_ON(!pmd_trans_huge(*pmdp)); - assert_spin_locked(&vma->vm_mm->page_table_lock); -#endif - -#ifdef PTE_ATOMIC_UPDATES - - __asm__ __volatile__( - "1: ldarx %0,0,%3\n\ - andi. %1,%0,%6\n\ - bne- 1b \n\ - ori %1,%0,%4 \n\ - stdcx. %1,0,%3 \n\ - bne- 1b" - : "=&r" (old), "=&r" (tmp), "=m" (*pmdp) - : "r" (pmdp), "i" (_PAGE_SPLITTING), "m" (*pmdp), "i" (_PAGE_BUSY) - : "cc" ); -#else - old = pmd_val(*pmdp); - *pmdp = __pmd(old | _PAGE_SPLITTING); -#endif - /* - * If we didn't had the splitting flag set, go and flush the - * HPTE entries. - */ - trace_hugepage_splitting(address, old); - if (!(old & _PAGE_SPLITTING)) { - /* We need to flush the hpte */ - if (old & _PAGE_HASHPTE) - hpte_do_hugepage_flush(vma->vm_mm, address, pmdp, old); - } - /* - * This ensures that generic code that rely on IRQ disabling - * to prevent a parallel THP split work as expected. - */ - kick_all_cpus_sync(); -} - -/* * We want to put the pgtable in pmd and use pgtable for tracking * the base page size hptes */ @@ -759,22 +710,15 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot) { - pmd_val(pmd) |= pgprot_val(pgprot); - return pmd; + return __pmd(pmd_val(pmd) | pgprot_val(pgprot)); } pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot) { - pmd_t pmd; - /* - * For a valid pte, we would have _PAGE_PRESENT always - * set. We use this to check THP page at pmd level. - * leaf pte for huge page, bottom two bits != 00 - */ - pmd_val(pmd) = pfn << PTE_RPN_SHIFT; - pmd_val(pmd) |= _PAGE_THP_HUGE; - pmd = pmd_set_protbits(pmd, pgprot); - return pmd; + unsigned long pmdv; + + pmdv = pfn << PTE_RPN_SHIFT; + return pmd_set_protbits(__pmd(pmdv), pgprot); } pmd_t mk_pmd(struct page *page, pgprot_t pgprot) @@ -784,10 +728,11 @@ pmd_t mk_pmd(struct page *page, pgprot_t pgprot) pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) { + unsigned long pmdv; - pmd_val(pmd) &= _HPAGE_CHG_MASK; - pmd = pmd_set_protbits(pmd, newprot); - return pmd; + pmdv = pmd_val(pmd); + pmdv &= _HPAGE_CHG_MASK; + return pmd_set_protbits(__pmd(pmdv), newprot); } /* diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 515730e499fe..825b6873391f 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -228,7 +228,7 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) asm volatile("slbie %0" : : "r" (slbie_data)); get_paca()->slb_cache_ptr = 0; - get_paca()->context = mm->context; + copy_mm_to_paca(&mm->context); /* * preload some userspace segments into the SLB. diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index 0f432a702870..42954f0b47ac 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -185,8 +185,7 @@ static void slice_flush_segments(void *parm) if (mm != current->active_mm) return; - /* update the paca copy of the context struct */ - get_paca()->context = current->active_mm->context; + copy_mm_to_paca(¤t->active_mm->context); local_irq_save(flags); slb_flush_and_rebolt(); diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c index fa9fb5b4c66c..d5543514c1df 100644 --- a/arch/powerpc/mm/subpage-prot.c +++ b/arch/powerpc/mm/subpage-prot.c @@ -135,7 +135,7 @@ static int subpage_walk_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) { struct vm_area_struct *vma = walk->vma; - split_huge_page_pmd(vma, addr, pmd); + split_huge_pmd(vma, pmd, addr); return 0; } diff --git a/arch/powerpc/platforms/82xx/ep8248e.c b/arch/powerpc/platforms/82xx/ep8248e.c index a0cb8bd41958..6781bda117be 100644 --- a/arch/powerpc/platforms/82xx/ep8248e.c +++ b/arch/powerpc/platforms/82xx/ep8248e.c @@ -131,23 +131,15 @@ static int ep8248e_mdio_probe(struct platform_device *ofdev) if (!bus) return -ENOMEM; - bus->irq = kmalloc(sizeof(int) * PHY_MAX_ADDR, GFP_KERNEL); - if (bus->irq == NULL) { - ret = -ENOMEM; - goto err_free_bus; - } - bus->name = "ep8248e-mdio-bitbang"; bus->parent = &ofdev->dev; snprintf(bus->id, MII_BUS_ID_SIZE, "%x", res.start); ret = of_mdiobus_register(bus, ofdev->dev.of_node); if (ret) - goto err_free_irq; + goto err_free_bus; return 0; -err_free_irq: - kfree(bus->irq); err_free_bus: free_mdio_bitbang(bus); return ret; diff --git a/arch/powerpc/platforms/83xx/km83xx.c b/arch/powerpc/platforms/83xx/km83xx.c index bf4c4473abb9..4bc6bbbe9ada 100644 --- a/arch/powerpc/platforms/83xx/km83xx.c +++ b/arch/powerpc/platforms/83xx/km83xx.c @@ -37,8 +37,8 @@ #include <asm/udbg.h> #include <sysdev/fsl_soc.h> #include <sysdev/fsl_pci.h> -#include <asm/qe.h> -#include <asm/qe_ic.h> +#include <soc/fsl/qe/qe.h> +#include <soc/fsl/qe/qe_ic.h> #include "mpc83xx.h" @@ -136,8 +136,6 @@ static void __init mpc83xx_km_setup_arch(void) mpc83xx_setup_pci(); #ifdef CONFIG_QUICC_ENGINE - qe_reset(); - np = of_find_node_by_name(NULL, "par_io"); if (np != NULL) { par_io_init(np); diff --git a/arch/powerpc/platforms/83xx/misc.c b/arch/powerpc/platforms/83xx/misc.c index ef9d01a049c1..7e923cad56cf 100644 --- a/arch/powerpc/platforms/83xx/misc.c +++ b/arch/powerpc/platforms/83xx/misc.c @@ -17,7 +17,7 @@ #include <asm/io.h> #include <asm/hw_irq.h> #include <asm/ipic.h> -#include <asm/qe_ic.h> +#include <soc/fsl/qe/qe_ic.h> #include <sysdev/fsl_soc.h> #include <sysdev/fsl_pci.h> diff --git a/arch/powerpc/platforms/83xx/mpc832x_mds.c b/arch/powerpc/platforms/83xx/mpc832x_mds.c index 8d762203eeff..a973b2ae5df6 100644 --- a/arch/powerpc/platforms/83xx/mpc832x_mds.c +++ b/arch/powerpc/platforms/83xx/mpc832x_mds.c @@ -36,8 +36,8 @@ #include <asm/udbg.h> #include <sysdev/fsl_soc.h> #include <sysdev/fsl_pci.h> -#include <asm/qe.h> -#include <asm/qe_ic.h> +#include <soc/fsl/qe/qe.h> +#include <soc/fsl/qe/qe_ic.h> #include "mpc83xx.h" @@ -74,8 +74,6 @@ static void __init mpc832x_sys_setup_arch(void) mpc83xx_setup_pci(); #ifdef CONFIG_QUICC_ENGINE - qe_reset(); - if ((np = of_find_node_by_name(NULL, "par_io")) != NULL) { par_io_init(np); of_node_put(np); diff --git a/arch/powerpc/platforms/83xx/mpc832x_rdb.c b/arch/powerpc/platforms/83xx/mpc832x_rdb.c index eff5baabc3fb..ea2b87d202ca 100644 --- a/arch/powerpc/platforms/83xx/mpc832x_rdb.c +++ b/arch/powerpc/platforms/83xx/mpc832x_rdb.c @@ -25,8 +25,8 @@ #include <asm/time.h> #include <asm/ipic.h> #include <asm/udbg.h> -#include <asm/qe.h> -#include <asm/qe_ic.h> +#include <soc/fsl/qe/qe.h> +#include <soc/fsl/qe/qe_ic.h> #include <sysdev/fsl_soc.h> #include <sysdev/fsl_pci.h> @@ -203,8 +203,6 @@ static void __init mpc832x_rdb_setup_arch(void) mpc83xx_setup_pci(); #ifdef CONFIG_QUICC_ENGINE - qe_reset(); - if ((np = of_find_node_by_name(NULL, "par_io")) != NULL) { par_io_init(np); of_node_put(np); diff --git a/arch/powerpc/platforms/83xx/mpc836x_mds.c b/arch/powerpc/platforms/83xx/mpc836x_mds.c index 1a26d2f83401..dd70b85f56d4 100644 --- a/arch/powerpc/platforms/83xx/mpc836x_mds.c +++ b/arch/powerpc/platforms/83xx/mpc836x_mds.c @@ -44,8 +44,8 @@ #include <sysdev/fsl_soc.h> #include <sysdev/fsl_pci.h> #include <sysdev/simple_gpio.h> -#include <asm/qe.h> -#include <asm/qe_ic.h> +#include <soc/fsl/qe/qe.h> +#include <soc/fsl/qe/qe_ic.h> #include "mpc83xx.h" @@ -82,8 +82,6 @@ static void __init mpc836x_mds_setup_arch(void) mpc83xx_setup_pci(); #ifdef CONFIG_QUICC_ENGINE - qe_reset(); - if ((np = of_find_node_by_name(NULL, "par_io")) != NULL) { par_io_init(np); of_node_put(np); diff --git a/arch/powerpc/platforms/83xx/mpc836x_rdk.c b/arch/powerpc/platforms/83xx/mpc836x_rdk.c index b63b42d11d6c..4cd7153a6c88 100644 --- a/arch/powerpc/platforms/83xx/mpc836x_rdk.c +++ b/arch/powerpc/platforms/83xx/mpc836x_rdk.c @@ -20,8 +20,8 @@ #include <asm/time.h> #include <asm/ipic.h> #include <asm/udbg.h> -#include <asm/qe.h> -#include <asm/qe_ic.h> +#include <soc/fsl/qe/qe.h> +#include <soc/fsl/qe/qe_ic.h> #include <sysdev/fsl_soc.h> #include <sysdev/fsl_pci.h> @@ -35,9 +35,6 @@ static void __init mpc836x_rdk_setup_arch(void) ppc_md.progress("mpc836x_rdk_setup_arch()", 0); mpc83xx_setup_pci(); -#ifdef CONFIG_QUICC_ENGINE - qe_reset(); -#endif } /* diff --git a/arch/powerpc/platforms/85xx/bsc913x_qds.c b/arch/powerpc/platforms/85xx/bsc913x_qds.c index f0927e58af25..dcfafd6b91ee 100644 --- a/arch/powerpc/platforms/85xx/bsc913x_qds.c +++ b/arch/powerpc/platforms/85xx/bsc913x_qds.c @@ -17,6 +17,7 @@ #include <linux/pci.h> #include <asm/mpic.h> #include <sysdev/fsl_soc.h> +#include <sysdev/fsl_pci.h> #include <asm/udbg.h> #include "mpc85xx.h" @@ -46,10 +47,12 @@ static void __init bsc913x_qds_setup_arch(void) mpc85xx_smp_init(); #endif + fsl_pci_assign_primary(); + pr_info("bsc913x board from Freescale Semiconductor\n"); } -machine_device_initcall(bsc9132_qds, mpc85xx_common_publish_devices); +machine_arch_initcall(bsc9132_qds, mpc85xx_common_publish_devices); /* * Called very early, device-tree isn't unflattened @@ -67,6 +70,9 @@ define_machine(bsc9132_qds) { .probe = bsc9132_qds_probe, .setup_arch = bsc913x_qds_setup_arch, .init_IRQ = bsc913x_qds_pic_init, +#ifdef CONFIG_PCI + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, +#endif .get_irq = mpic_get_irq, .restart = fsl_rstcr_restart, .calibrate_decr = generic_calibrate_decr, diff --git a/arch/powerpc/platforms/85xx/common.c b/arch/powerpc/platforms/85xx/common.c index 23791de7b688..949f22c86e61 100644 --- a/arch/powerpc/platforms/85xx/common.c +++ b/arch/powerpc/platforms/85xx/common.c @@ -9,7 +9,7 @@ #include <linux/of_irq.h> #include <linux/of_platform.h> -#include <asm/qe.h> +#include <soc/fsl/qe/qe.h> #include <sysdev/cpm2_pic.h> #include "mpc85xx.h" @@ -105,7 +105,6 @@ void __init mpc85xx_qe_init(void) return; } - qe_reset(); of_node_put(np); } diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c index 46d05c94add6..a2b0bc859de0 100644 --- a/arch/powerpc/platforms/85xx/corenet_generic.c +++ b/arch/powerpc/platforms/85xx/corenet_generic.c @@ -27,7 +27,7 @@ #include <asm/udbg.h> #include <asm/mpic.h> #include <asm/ehv_pic.h> -#include <asm/qe_ic.h> +#include <soc/fsl/qe/qe_ic.h> #include <linux/of_platform.h> #include <sysdev/fsl_soc.h> diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ads.c b/arch/powerpc/platforms/85xx/mpc85xx_ads.c index 7d12a19aa7ee..de72a5f464b1 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_ads.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_ads.c @@ -36,17 +36,6 @@ #include "mpc85xx.h" -#ifdef CONFIG_PCI -static int mpc85xx_exclude_device(struct pci_controller *hose, - u_char bus, u_char devfn) -{ - if (bus == 0 && PCI_SLOT(devfn) == 0) - return PCIBIOS_DEVICE_NOT_FOUND; - else - return PCIBIOS_SUCCESSFUL; -} -#endif /* CONFIG_PCI */ - static void __init mpc85xx_ads_pic_init(void) { struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN, @@ -145,10 +134,6 @@ static void __init mpc85xx_ads_setup_arch(void) init_ioports(); #endif -#ifdef CONFIG_PCI - ppc_md.pci_exclude_device = mpc85xx_exclude_device; -#endif - fsl_pci_assign_primary(); } diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/powerpc/platforms/85xx/mpc85xx_mds.c index f0be439ceaaa..f61cbe235581 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_mds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c @@ -48,8 +48,8 @@ #include <sysdev/fsl_soc.h> #include <sysdev/fsl_pci.h> #include <sysdev/simple_gpio.h> -#include <asm/qe.h> -#include <asm/qe_ic.h> +#include <soc/fsl/qe/qe.h> +#include <soc/fsl/qe/qe_ic.h> #include <asm/mpic.h> #include <asm/swiotlb.h> #include "smp.h" diff --git a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c index 50dcc00a0f5a..3f4dad133338 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c @@ -26,8 +26,8 @@ #include <asm/prom.h> #include <asm/udbg.h> #include <asm/mpic.h> -#include <asm/qe.h> -#include <asm/qe_ic.h> +#include <soc/fsl/qe/qe.h> +#include <soc/fsl/qe/qe_ic.h> #include <sysdev/fsl_soc.h> #include <sysdev/fsl_pci.h> diff --git a/arch/powerpc/platforms/85xx/twr_p102x.c b/arch/powerpc/platforms/85xx/twr_p102x.c index 892e613519cc..71bc255b4324 100644 --- a/arch/powerpc/platforms/85xx/twr_p102x.c +++ b/arch/powerpc/platforms/85xx/twr_p102x.c @@ -22,8 +22,8 @@ #include <asm/pci-bridge.h> #include <asm/udbg.h> #include <asm/mpic.h> -#include <asm/qe.h> -#include <asm/qe_ic.h> +#include <soc/fsl/qe/qe.h> +#include <soc/fsl/qe/qe_ic.h> #include <sysdev/fsl_soc.h> #include <sysdev/fsl_pci.h> diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index b7f9c408bf24..46a3533d3acb 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -272,17 +272,6 @@ config TAU_AVERAGE If in doubt, say N here. -config QUICC_ENGINE - bool "Freescale QUICC Engine (QE) Support" - depends on FSL_SOC && PPC32 - select PPC_LIB_RHEAP - select CRC32 - help - The QUICC Engine (QE) is a new generation of communications - coprocessors on Freescale embedded CPUs (akin to CPM in older chips). - Selecting this option means that you wish to build a kernel - for a machine with a QE coprocessor. - config QE_GPIO bool "QE GPIO support" depends on QUICC_ENGINE @@ -295,7 +284,6 @@ config CPM2 bool "Enable support for the CPM2 (Communications Processor Module)" depends on (FSL_SOC_BOOKE && PPC32) || 8260 select CPM - select PPC_LIB_RHEAP select PPC_PCI_CHOICE select ARCH_REQUIRE_GPIOLIB help @@ -325,6 +313,7 @@ config FSL_ULI1575 config CPM bool + select GENERIC_ALLOCATOR config OF_RTC bool diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig index 429fc59d2a47..d9088f0b8fcc 100644 --- a/arch/powerpc/platforms/cell/Kconfig +++ b/arch/powerpc/platforms/cell/Kconfig @@ -33,11 +33,6 @@ config PPC_IBM_CELL_BLADE select PPC_UDBG_16550 select UDBG_RTAS_CONSOLE -config PPC_CELL_QPACE - bool "IBM Cell - QPACE" - depends on PPC64 && PPC_BOOK3S && CPU_BIG_ENDIAN - select PPC_CELL_COMMON - config AXON_MSI bool depends on PPC_IBM_CELL_BLADE && PCI_MSI diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile index 34699bddfddd..00464305763d 100644 --- a/arch/powerpc/platforms/cell/Makefile +++ b/arch/powerpc/platforms/cell/Makefile @@ -11,7 +11,6 @@ obj-$(CONFIG_PPC_IBM_CELL_POWERBUTTON) += cbe_powerbutton.o ifeq ($(CONFIG_SMP),y) obj-$(CONFIG_PPC_CELL_NATIVE) += smp.o -obj-$(CONFIG_PPC_CELL_QPACE) += smp.o endif # needed only when building loadable spufs.ko @@ -26,6 +25,3 @@ obj-$(CONFIG_SPU_BASE) += spu_callbacks.o spu_base.o \ spufs/ obj-$(CONFIG_AXON_MSI) += axon_msi.o - -# qpace setup -obj-$(CONFIG_PPC_CELL_QPACE) += qpace_setup.o diff --git a/arch/powerpc/platforms/cell/qpace_setup.c b/arch/powerpc/platforms/cell/qpace_setup.c deleted file mode 100644 index d328140dc6f5..000000000000 --- a/arch/powerpc/platforms/cell/qpace_setup.c +++ /dev/null @@ -1,148 +0,0 @@ -/* - * linux/arch/powerpc/platforms/cell/qpace_setup.c - * - * Copyright (C) 1995 Linus Torvalds - * Adapted from 'alpha' version by Gary Thomas - * Modified by Cort Dougan (cort@cs.nmt.edu) - * Modified by PPC64 Team, IBM Corp - * Modified by Cell Team, IBM Deutschland Entwicklung GmbH - * Modified by Benjamin Krill <ben@codiert.org>, IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/export.h> -#include <linux/delay.h> -#include <linux/irq.h> -#include <linux/console.h> -#include <linux/of_platform.h> - -#include <asm/mmu.h> -#include <asm/processor.h> -#include <asm/io.h> -#include <asm/kexec.h> -#include <asm/pgtable.h> -#include <asm/prom.h> -#include <asm/rtas.h> -#include <asm/dma.h> -#include <asm/machdep.h> -#include <asm/time.h> -#include <asm/cputable.h> -#include <asm/irq.h> -#include <asm/spu.h> -#include <asm/spu_priv1.h> -#include <asm/udbg.h> -#include <asm/cell-regs.h> - -#include "interrupt.h" -#include "pervasive.h" -#include "ras.h" - -static void qpace_show_cpuinfo(struct seq_file *m) -{ - struct device_node *root; - const char *model = ""; - - root = of_find_node_by_path("/"); - if (root) - model = of_get_property(root, "model", NULL); - seq_printf(m, "machine\t\t: CHRP %s\n", model); - of_node_put(root); -} - -static void qpace_progress(char *s, unsigned short hex) -{ - printk("*** %04x : %s\n", hex, s ? s : ""); -} - -static const struct of_device_id qpace_bus_ids[] __initconst = { - { .type = "soc", }, - { .compatible = "soc", }, - { .type = "spider", }, - { .type = "axon", }, - { .type = "plb5", }, - { .type = "plb4", }, - { .type = "opb", }, - { .type = "ebc", }, - {}, -}; - -static int __init qpace_publish_devices(void) -{ - int node; - - /* Publish OF platform devices for southbridge IOs */ - of_platform_bus_probe(NULL, qpace_bus_ids, NULL); - - /* There is no device for the MIC memory controller, thus we create - * a platform device for it to attach the EDAC driver to. - */ - for_each_online_node(node) { - if (cbe_get_cpu_mic_tm_regs(cbe_node_to_cpu(node)) == NULL) - continue; - platform_device_register_simple("cbe-mic", node, NULL, 0); - } - - return 0; -} -machine_subsys_initcall(qpace, qpace_publish_devices); - -static void __init qpace_setup_arch(void) -{ -#ifdef CONFIG_SPU_BASE - spu_priv1_ops = &spu_priv1_mmio_ops; - spu_management_ops = &spu_management_of_ops; -#endif - - cbe_regs_init(); - -#ifdef CONFIG_CBE_RAS - cbe_ras_init(); -#endif - -#ifdef CONFIG_SMP - smp_init_cell(); -#endif - - /* init to some ~sane value until calibrate_delay() runs */ - loops_per_jiffy = 50000000; - - cbe_pervasive_init(); -#ifdef CONFIG_DUMMY_CONSOLE - conswitchp = &dummy_con; -#endif -} - -static int __init qpace_probe(void) -{ - unsigned long root = of_get_flat_dt_root(); - - if (!of_flat_dt_is_compatible(root, "IBM,QPACE")) - return 0; - - hpte_init_native(); - pm_power_off = rtas_power_off; - - return 1; -} - -define_machine(qpace) { - .name = "QPACE", - .probe = qpace_probe, - .setup_arch = qpace_setup_arch, - .show_cpuinfo = qpace_show_cpuinfo, - .restart = rtas_restart, - .halt = rtas_halt, - .get_boot_time = rtas_get_boot_time, - .get_rtc_time = rtas_get_rtc_time, - .set_rtc_time = rtas_set_rtc_time, - .calibrate_decr = generic_calibrate_decr, - .progress = qpace_progress, - .init_IRQ = iic_init_IRQ, -}; diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 11634fa7ab3c..ad4840f86be1 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -767,7 +767,7 @@ static int __init spufs_init(void) ret = -ENOMEM; spufs_inode_cache = kmem_cache_create("spufs_inode_cache", sizeof(struct spufs_inode_info), 0, - SLAB_HWCACHE_ALIGN, spufs_init_once); + SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT, spufs_init_once); if (!spufs_inode_cache) goto out; diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c index 4ddf769a64e5..9f79004e6d6f 100644 --- a/arch/powerpc/platforms/cell/spufs/run.c +++ b/arch/powerpc/platforms/cell/spufs/run.c @@ -326,7 +326,7 @@ static int spu_process_callback(struct spu_context *ctx) spu_ret = -ENOSYS; npc += 4; - if (s.nr_ret < __NR_syscalls) { + if (s.nr_ret < NR_syscalls) { spu_release(ctx); /* do actual system call from here */ spu_ret = spu_sys_callback(&s); diff --git a/arch/powerpc/platforms/maple/time.c b/arch/powerpc/platforms/maple/time.c index b4a369dac3a8..81799d70a1ee 100644 --- a/arch/powerpc/platforms/maple/time.c +++ b/arch/powerpc/platforms/maple/time.c @@ -77,7 +77,7 @@ void maple_get_rtc_time(struct rtc_time *tm) if ((tm->tm_year + 1900) < 1970) tm->tm_year += 100; - GregorianDay(tm); + tm->tm_wday = -1; } int maple_set_rtc_time(struct rtc_time *tm) diff --git a/arch/powerpc/platforms/pasemi/gpio_mdio.c b/arch/powerpc/platforms/pasemi/gpio_mdio.c index ae3f47b25b18..ddf635000c6b 100644 --- a/arch/powerpc/platforms/pasemi/gpio_mdio.c +++ b/arch/powerpc/platforms/pasemi/gpio_mdio.c @@ -41,7 +41,6 @@ static void __iomem *gpio_regs; struct gpio_priv { int mdc_pin; int mdio_pin; - int mdio_irqs[PHY_MAX_ADDR]; }; #define MDC_PIN(bus) (((struct gpio_priv *)bus->priv)->mdc_pin) @@ -245,8 +244,6 @@ static int gpio_mdio_probe(struct platform_device *ofdev) snprintf(new_bus->id, MII_BUS_ID_SIZE, "%x", *prop); new_bus->priv = priv; - new_bus->irq = priv->mdio_irqs; - prop = of_get_property(np, "mdc-pin", NULL); priv->mdc_pin = *prop; diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c index 76f5013c35e5..c3c9bbb3573a 100644 --- a/arch/powerpc/platforms/powermac/bootx_init.c +++ b/arch/powerpc/platforms/powermac/bootx_init.c @@ -84,6 +84,7 @@ static void __init bootx_printf(const char *format, ...) break; } } + va_end(args); } #else /* CONFIG_BOOTX_TEXT */ static void __init bootx_printf(const char *format, ...) {} diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c index 6f4f8b060def..981546345033 100644 --- a/arch/powerpc/platforms/powermac/pic.c +++ b/arch/powerpc/platforms/powermac/pic.c @@ -258,13 +258,14 @@ static unsigned int pmac_pic_get_irq(void) #ifdef CONFIG_XMON static struct irqaction xmon_action = { .handler = xmon_irq, - .flags = 0, + .flags = IRQF_NO_THREAD, .name = "NMI - XMON" }; #endif static struct irqaction gatwick_cascade_action = { .handler = gatwick_action, + .flags = IRQF_NO_THREAD, .name = "cascade", }; diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index 1c8cdb6250e7..f1516b5ecec9 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -2,9 +2,10 @@ obj-y += setup.o opal-wrappers.o opal.o opal-async.o idle.o obj-y += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o +obj-y += opal-kmsg.o obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o -obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o +obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o npu-dma.o obj-$(CONFIG_EEH) += eeh-powernv.o obj-$(CONFIG_PPC_SCOM) += opal-xscom.o obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index e1c90725522a..5f152b95ca0c 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -48,8 +48,8 @@ static int pnv_eeh_init(void) struct pci_controller *hose; struct pnv_phb *phb; - if (!firmware_has_feature(FW_FEATURE_OPALv3)) { - pr_warn("%s: OPALv3 is required !\n", + if (!firmware_has_feature(FW_FEATURE_OPAL)) { + pr_warn("%s: OPAL is required !\n", __func__); return -EINVAL; } diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 59d735d2e5c0..15bfbcd5debc 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -242,7 +242,7 @@ static int __init pnv_init_idle_states(void) if (cpuidle_disable != IDLE_NO_OVERRIDE) goto out; - if (!firmware_has_feature(FW_FEATURE_OPALv3)) + if (!firmware_has_feature(FW_FEATURE_OPAL)) goto out; power_mgt = of_find_node_by_path("/ibm,opal/power-mgt"); diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c new file mode 100644 index 000000000000..e85aa900f5c0 --- /dev/null +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -0,0 +1,348 @@ +/* + * This file implements the DMA operations for NVLink devices. The NPU + * devices all point to the same iommu table as the parent PCI device. + * + * Copyright Alistair Popple, IBM Corporation 2015. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ + +#include <linux/export.h> +#include <linux/pci.h> +#include <linux/memblock.h> + +#include <asm/iommu.h> +#include <asm/pnv-pci.h> +#include <asm/msi_bitmap.h> +#include <asm/opal.h> + +#include "powernv.h" +#include "pci.h" + +/* + * Other types of TCE cache invalidation are not functional in the + * hardware. + */ +#define TCE_KILL_INVAL_ALL PPC_BIT(0) + +static struct pci_dev *get_pci_dev(struct device_node *dn) +{ + return PCI_DN(dn)->pcidev; +} + +/* Given a NPU device get the associated PCI device. */ +struct pci_dev *pnv_pci_get_gpu_dev(struct pci_dev *npdev) +{ + struct device_node *dn; + struct pci_dev *gpdev; + + /* Get assoicated PCI device */ + dn = of_parse_phandle(npdev->dev.of_node, "ibm,gpu", 0); + if (!dn) + return NULL; + + gpdev = get_pci_dev(dn); + of_node_put(dn); + + return gpdev; +} +EXPORT_SYMBOL(pnv_pci_get_gpu_dev); + +/* Given the real PCI device get a linked NPU device. */ +struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index) +{ + struct device_node *dn; + struct pci_dev *npdev; + + /* Get assoicated PCI device */ + dn = of_parse_phandle(gpdev->dev.of_node, "ibm,npu", index); + if (!dn) + return NULL; + + npdev = get_pci_dev(dn); + of_node_put(dn); + + return npdev; +} +EXPORT_SYMBOL(pnv_pci_get_npu_dev); + +#define NPU_DMA_OP_UNSUPPORTED() \ + dev_err_once(dev, "%s operation unsupported for NVLink devices\n", \ + __func__) + +static void *dma_npu_alloc(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag, + struct dma_attrs *attrs) +{ + NPU_DMA_OP_UNSUPPORTED(); + return NULL; +} + +static void dma_npu_free(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle, + struct dma_attrs *attrs) +{ + NPU_DMA_OP_UNSUPPORTED(); +} + +static dma_addr_t dma_npu_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + NPU_DMA_OP_UNSUPPORTED(); + return 0; +} + +static int dma_npu_map_sg(struct device *dev, struct scatterlist *sglist, + int nelems, enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + NPU_DMA_OP_UNSUPPORTED(); + return 0; +} + +static int dma_npu_dma_supported(struct device *dev, u64 mask) +{ + NPU_DMA_OP_UNSUPPORTED(); + return 0; +} + +static u64 dma_npu_get_required_mask(struct device *dev) +{ + NPU_DMA_OP_UNSUPPORTED(); + return 0; +} + +struct dma_map_ops dma_npu_ops = { + .map_page = dma_npu_map_page, + .map_sg = dma_npu_map_sg, + .alloc = dma_npu_alloc, + .free = dma_npu_free, + .dma_supported = dma_npu_dma_supported, + .get_required_mask = dma_npu_get_required_mask, +}; + +/* + * Returns the PE assoicated with the PCI device of the given + * NPU. Returns the linked pci device if pci_dev != NULL. + */ +static struct pnv_ioda_pe *get_gpu_pci_dev_and_pe(struct pnv_ioda_pe *npe, + struct pci_dev **gpdev) +{ + struct pnv_phb *phb; + struct pci_controller *hose; + struct pci_dev *pdev; + struct pnv_ioda_pe *pe; + struct pci_dn *pdn; + + if (npe->flags & PNV_IODA_PE_PEER) { + pe = npe->peers[0]; + pdev = pe->pdev; + } else { + pdev = pnv_pci_get_gpu_dev(npe->pdev); + if (!pdev) + return NULL; + + pdn = pci_get_pdn(pdev); + if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) + return NULL; + + hose = pci_bus_to_host(pdev->bus); + phb = hose->private_data; + pe = &phb->ioda.pe_array[pdn->pe_number]; + } + + if (gpdev) + *gpdev = pdev; + + return pe; +} + +void pnv_npu_tce_invalidate_entire(struct pnv_ioda_pe *npe) +{ + struct pnv_phb *phb = npe->phb; + + if (WARN_ON(phb->type != PNV_PHB_NPU || + !phb->ioda.tce_inval_reg || + !(npe->flags & PNV_IODA_PE_DEV))) + return; + + mb(); /* Ensure previous TCE table stores are visible */ + __raw_writeq(cpu_to_be64(TCE_KILL_INVAL_ALL), + phb->ioda.tce_inval_reg); +} + +void pnv_npu_tce_invalidate(struct pnv_ioda_pe *npe, + struct iommu_table *tbl, + unsigned long index, + unsigned long npages, + bool rm) +{ + struct pnv_phb *phb = npe->phb; + + /* We can only invalidate the whole cache on NPU */ + unsigned long val = TCE_KILL_INVAL_ALL; + + if (WARN_ON(phb->type != PNV_PHB_NPU || + !phb->ioda.tce_inval_reg || + !(npe->flags & PNV_IODA_PE_DEV))) + return; + + mb(); /* Ensure previous TCE table stores are visible */ + if (rm) + __raw_rm_writeq(cpu_to_be64(val), + (__be64 __iomem *) phb->ioda.tce_inval_reg_phys); + else + __raw_writeq(cpu_to_be64(val), + phb->ioda.tce_inval_reg); +} + +void pnv_npu_init_dma_pe(struct pnv_ioda_pe *npe) +{ + struct pnv_ioda_pe *gpe; + struct pci_dev *gpdev; + int i, avail = -1; + + if (!npe->pdev || !(npe->flags & PNV_IODA_PE_DEV)) + return; + + gpe = get_gpu_pci_dev_and_pe(npe, &gpdev); + if (!gpe) + return; + + for (i = 0; i < PNV_IODA_MAX_PEER_PES; i++) { + /* Nothing to do if the PE is already connected. */ + if (gpe->peers[i] == npe) + return; + + if (!gpe->peers[i]) + avail = i; + } + + if (WARN_ON(avail < 0)) + return; + + gpe->peers[avail] = npe; + gpe->flags |= PNV_IODA_PE_PEER; + + /* + * We assume that the NPU devices only have a single peer PE + * (the GPU PCIe device PE). + */ + npe->peers[0] = gpe; + npe->flags |= PNV_IODA_PE_PEER; +} + +/* + * For the NPU we want to point the TCE table at the same table as the + * real PCI device. + */ +static void pnv_npu_disable_bypass(struct pnv_ioda_pe *npe) +{ + struct pnv_phb *phb = npe->phb; + struct pci_dev *gpdev; + struct pnv_ioda_pe *gpe; + void *addr; + unsigned int size; + int64_t rc; + + /* + * Find the assoicated PCI devices and get the dma window + * information from there. + */ + if (!npe->pdev || !(npe->flags & PNV_IODA_PE_DEV)) + return; + + gpe = get_gpu_pci_dev_and_pe(npe, &gpdev); + if (!gpe) + return; + + addr = (void *)gpe->table_group.tables[0]->it_base; + size = gpe->table_group.tables[0]->it_size << 3; + rc = opal_pci_map_pe_dma_window(phb->opal_id, npe->pe_number, + npe->pe_number, 1, __pa(addr), + size, 0x1000); + if (rc != OPAL_SUCCESS) + pr_warn("%s: Error %lld setting DMA window on PHB#%d-PE#%d\n", + __func__, rc, phb->hose->global_number, npe->pe_number); + + /* + * We don't initialise npu_pe->tce32_table as we always use + * dma_npu_ops which are nops. + */ + set_dma_ops(&npe->pdev->dev, &dma_npu_ops); +} + +/* + * Enable/disable bypass mode on the NPU. The NPU only supports one + * window per link, so bypass needs to be explicity enabled or + * disabled. Unlike for a PHB3 bypass and non-bypass modes can't be + * active at the same time. + */ +int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe, bool enable) +{ + struct pnv_phb *phb = npe->phb; + int64_t rc = 0; + + if (phb->type != PNV_PHB_NPU || !npe->pdev) + return -EINVAL; + + if (enable) { + /* Enable the bypass window */ + phys_addr_t top = memblock_end_of_DRAM(); + + npe->tce_bypass_base = 0; + top = roundup_pow_of_two(top); + dev_info(&npe->pdev->dev, "Enabling bypass for PE %d\n", + npe->pe_number); + rc = opal_pci_map_pe_dma_window_real(phb->opal_id, + npe->pe_number, npe->pe_number, + npe->tce_bypass_base, top); + } else { + /* + * Disable the bypass window by replacing it with the + * TCE32 window. + */ + pnv_npu_disable_bypass(npe); + } + + return rc; +} + +int pnv_npu_dma_set_mask(struct pci_dev *npdev, u64 dma_mask) +{ + struct pci_controller *hose = pci_bus_to_host(npdev->bus); + struct pnv_phb *phb = hose->private_data; + struct pci_dn *pdn = pci_get_pdn(npdev); + struct pnv_ioda_pe *npe, *gpe; + struct pci_dev *gpdev; + uint64_t top; + bool bypass = false; + + if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) + return -ENXIO; + + /* We only do bypass if it's enabled on the linked device */ + npe = &phb->ioda.pe_array[pdn->pe_number]; + gpe = get_gpu_pci_dev_and_pe(npe, &gpdev); + if (!gpe) + return -ENODEV; + + if (gpe->tce_bypass_enabled) { + top = gpe->tce_bypass_base + memblock_end_of_DRAM() - 1; + bypass = (dma_mask >= top); + } + + if (bypass) + dev_info(&npdev->dev, "Using 64-bit DMA iommu bypass\n"); + else + dev_info(&npdev->dev, "Using 32-bit DMA via iommu\n"); + + pnv_npu_dma_set_bypass(npe, bypass); + *npdev->dev.dma_mask = dma_mask; + + return 0; +} diff --git a/arch/powerpc/platforms/powernv/opal-kmsg.c b/arch/powerpc/platforms/powernv/opal-kmsg.c new file mode 100644 index 000000000000..6f1214d4de92 --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-kmsg.c @@ -0,0 +1,75 @@ +/* + * kmsg dumper that ensures the OPAL console fully flushes panic messages + * + * Author: Russell Currey <ruscur@russell.cc> + * + * Copyright 2015 IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include <linux/kmsg_dump.h> + +#include <asm/opal.h> +#include <asm/opal-api.h> + +/* + * Console output is controlled by OPAL firmware. The kernel regularly calls + * OPAL_POLL_EVENTS, which flushes some console output. In a panic state, + * however, the kernel no longer calls OPAL_POLL_EVENTS and the panic message + * may not be completely printed. This function does not actually dump the + * message, it just ensures that OPAL completely flushes the console buffer. + */ +static void force_opal_console_flush(struct kmsg_dumper *dumper, + enum kmsg_dump_reason reason) +{ + int i; + int64_t ret; + + /* + * Outside of a panic context the pollers will continue to run, + * so we don't need to do any special flushing. + */ + if (reason != KMSG_DUMP_PANIC) + return; + + if (opal_check_token(OPAL_CONSOLE_FLUSH)) { + ret = opal_console_flush(0); + + if (ret == OPAL_UNSUPPORTED || ret == OPAL_PARAMETER) + return; + + /* Incrementally flush until there's nothing left */ + while (opal_console_flush(0) != OPAL_SUCCESS); + } else { + /* + * If OPAL_CONSOLE_FLUSH is not implemented in the firmware, + * the console can still be flushed by calling the polling + * function enough times to flush the buffer. We don't know + * how much output still needs to be flushed, but we can be + * generous since the kernel is in panic and doesn't need + * to do much else. + */ + printk(KERN_NOTICE "opal: OPAL_CONSOLE_FLUSH missing.\n"); + for (i = 0; i < 1024; i++) { + opal_poll_events(NULL); + } + } +} + +static struct kmsg_dumper opal_kmsg_dumper = { + .dump = force_opal_console_flush +}; + +void __init opal_kmsg_init(void) +{ + int rc; + + /* Add our dumper to the list */ + rc = kmsg_dump_register(&opal_kmsg_dumper); + if (rc != 0) + pr_err("opal: kmsg_dump_register failed; returned %d\n", rc); +} diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c index 4ece8e40dd54..e315e704cca7 100644 --- a/arch/powerpc/platforms/powernv/opal-prd.c +++ b/arch/powerpc/platforms/powernv/opal-prd.c @@ -434,7 +434,6 @@ static const struct of_device_id opal_prd_match[] = { static struct platform_driver opal_prd_driver = { .driver = { .name = "opal-prd", - .owner = THIS_MODULE, .of_match_table = opal_prd_match, }, .probe = opal_prd_probe, diff --git a/arch/powerpc/platforms/powernv/opal-rtc.c b/arch/powerpc/platforms/powernv/opal-rtc.c index 37dbee15769f..f8868864f373 100644 --- a/arch/powerpc/platforms/powernv/opal-rtc.c +++ b/arch/powerpc/platforms/powernv/opal-rtc.c @@ -31,8 +31,7 @@ static void opal_to_tm(u32 y_m_d, u64 h_m_s_ms, struct rtc_time *tm) tm->tm_hour = bcd2bin((h_m_s_ms >> 56) & 0xff); tm->tm_min = bcd2bin((h_m_s_ms >> 48) & 0xff); tm->tm_sec = bcd2bin((h_m_s_ms >> 40) & 0xff); - - GregorianDay(tm); + tm->tm_wday = -1; } unsigned long __init opal_get_boot_time(void) @@ -51,7 +50,7 @@ unsigned long __init opal_get_boot_time(void) rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms); if (rc == OPAL_BUSY_EVENT) opal_poll_events(NULL); - else + else if (rc == OPAL_BUSY) mdelay(10); } if (rc != OPAL_SUCCESS) diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index b7a464fef7a7..e45b88a5d7e0 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -301,3 +301,4 @@ OPAL_CALL(opal_flash_erase, OPAL_FLASH_ERASE); OPAL_CALL(opal_prd_msg, OPAL_PRD_MSG); OPAL_CALL(opal_leds_get_ind, OPAL_LEDS_GET_INDICATOR); OPAL_CALL(opal_leds_set_ind, OPAL_LEDS_SET_INDICATOR); +OPAL_CALL(opal_console_flush, OPAL_CONSOLE_FLUSH); diff --git a/arch/powerpc/platforms/powernv/opal-xscom.c b/arch/powerpc/platforms/powernv/opal-xscom.c index 7634d1c62299..d0ac535cf5d7 100644 --- a/arch/powerpc/platforms/powernv/opal-xscom.c +++ b/arch/powerpc/platforms/powernv/opal-xscom.c @@ -126,7 +126,7 @@ static const struct scom_controller opal_scom_controller = { static int opal_xscom_init(void) { - if (firmware_has_feature(FW_FEATURE_OPALv3)) + if (firmware_has_feature(FW_FEATURE_OPAL)) scom_init(&opal_scom_controller); return 0; } diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 57cffb80bc36..4e0da5af94a1 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -98,16 +98,11 @@ int __init early_init_dt_scan_opal(unsigned long node, pr_debug("OPAL Entry = 0x%llx (sizep=%p runtimesz=%d)\n", opal.size, sizep, runtimesz); - powerpc_firmware_features |= FW_FEATURE_OPAL; if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) { - powerpc_firmware_features |= FW_FEATURE_OPALv2; - powerpc_firmware_features |= FW_FEATURE_OPALv3; - pr_info("OPAL V3 detected !\n"); - } else if (of_flat_dt_is_compatible(node, "ibm,opal-v2")) { - powerpc_firmware_features |= FW_FEATURE_OPALv2; - pr_info("OPAL V2 detected !\n"); + powerpc_firmware_features |= FW_FEATURE_OPAL; + pr_info("OPAL detected !\n"); } else { - pr_info("OPAL V1 detected !\n"); + panic("OPAL != V3 detected, no longer supported.\n"); } /* Reinit all cores with the right endian */ @@ -352,17 +347,15 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len) * enough room and be done with it */ spin_lock_irqsave(&opal_write_lock, flags); - if (firmware_has_feature(FW_FEATURE_OPALv2)) { - rc = opal_console_write_buffer_space(vtermno, &olen); - len = be64_to_cpu(olen); - if (rc || len < total_len) { - spin_unlock_irqrestore(&opal_write_lock, flags); - /* Closed -> drop characters */ - if (rc) - return total_len; - opal_poll_events(NULL); - return -EAGAIN; - } + rc = opal_console_write_buffer_space(vtermno, &olen); + len = be64_to_cpu(olen); + if (rc || len < total_len) { + spin_unlock_irqrestore(&opal_write_lock, flags); + /* Closed -> drop characters */ + if (rc) + return total_len; + opal_poll_events(NULL); + return -EAGAIN; } /* We still try to handle partial completions, though they @@ -555,7 +548,7 @@ bool opal_mce_check_early_recovery(struct pt_regs *regs) goto out; if ((regs->nip >= opal.base) && - (regs->nip <= (opal.base + opal.size))) + (regs->nip < (opal.base + opal.size))) recover_addr = find_recovery_address(regs->nip); /* @@ -696,10 +689,7 @@ static int __init opal_init(void) } /* Register OPAL consoles if any ports */ - if (firmware_has_feature(FW_FEATURE_OPALv2)) - consoles = of_find_node_by_path("/ibm,opal/consoles"); - else - consoles = of_node_get(opal_node); + consoles = of_find_node_by_path("/ibm,opal/consoles"); if (consoles) { for_each_child_of_node(consoles, np) { if (strcmp(np->name, "serial")) @@ -758,6 +748,9 @@ static int __init opal_init(void) opal_pdev_init(opal_node, "ibm,opal-flash"); opal_pdev_init(opal_node, "ibm,opal-prd"); + /* Initialise OPAL kmsg dumper for flushing console on panic */ + opal_kmsg_init(); + return 0; } machine_subsys_initcall(powernv, opal_init); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 414fd1a00fda..573ae1994097 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -116,16 +116,6 @@ static int __init iommu_setup(char *str) } early_param("iommu", iommu_setup); -/* - * stdcix is only supposed to be used in hypervisor real mode as per - * the architecture spec - */ -static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr) -{ - __asm__ __volatile__("stdcix %0,0,%1" - : : "r" (val), "r" (paddr) : "memory"); -} - static inline bool pnv_pci_is_mem_pref_64(unsigned long flags) { return ((flags & (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)) == @@ -344,7 +334,7 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb) return; } - if (!firmware_has_feature(FW_FEATURE_OPALv3)) { + if (!firmware_has_feature(FW_FEATURE_OPAL)) { pr_info(" Firmware too old to support M64 window\n"); return; } @@ -357,6 +347,7 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb) } res = &hose->mem_resources[1]; + res->name = dn->full_name; res->start = of_translate_address(dn, r + 2); res->end = res->start + of_read_number(r + 4, 2) - 1; res->flags = (IORESOURCE_MEM | IORESOURCE_MEM_64 | IORESOURCE_PREFETCH); @@ -780,8 +771,12 @@ static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) return -ENXIO; } - /* Configure PELTV */ - pnv_ioda_set_peltv(phb, pe, true); + /* + * Configure PELTV. NPUs don't have a PELTV table so skip + * configuration on them. + */ + if (phb->type != PNV_PHB_NPU) + pnv_ioda_set_peltv(phb, pe, true); /* Setup reverse map */ for (rid = pe->rid; rid < rid_end; rid++) @@ -924,7 +919,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset) } #endif /* CONFIG_PCI_IOV */ -#if 0 static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) { struct pci_controller *hose = pci_bus_to_host(dev->bus); @@ -941,11 +935,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) if (pdn->pe_number != IODA_INVALID_PE) return NULL; - /* PE#0 has been pre-set */ - if (dev->bus->number == 0) - pe_num = 0; - else - pe_num = pnv_ioda_alloc_pe(phb); + pe_num = pnv_ioda_alloc_pe(phb); if (pe_num == IODA_INVALID_PE) { pr_warning("%s: Not enough PE# available, disabling device\n", pci_name(dev)); @@ -963,6 +953,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) pci_dev_get(dev); pdn->pcidev = dev; pdn->pe_number = pe_num; + pe->flags = PNV_IODA_PE_DEV; pe->pdev = dev; pe->pbus = NULL; pe->tce32_seg = -1; @@ -993,7 +984,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) return pe; } -#endif /* Useful for SRIOV case */ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe) { @@ -1007,6 +997,7 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe) pci_name(dev)); continue; } + pdn->pcidev = dev; pdn->pe_number = pe->pe_number; pe->dma_weight += pnv_ioda_dma_weight(dev); if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) @@ -1083,6 +1074,77 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) pnv_ioda_link_pe_by_weight(phb, pe); } +static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev) +{ + int pe_num, found_pe = false, rc; + long rid; + struct pnv_ioda_pe *pe; + struct pci_dev *gpu_pdev; + struct pci_dn *npu_pdn; + struct pci_controller *hose = pci_bus_to_host(npu_pdev->bus); + struct pnv_phb *phb = hose->private_data; + + /* + * Due to a hardware errata PE#0 on the NPU is reserved for + * error handling. This means we only have three PEs remaining + * which need to be assigned to four links, implying some + * links must share PEs. + * + * To achieve this we assign PEs such that NPUs linking the + * same GPU get assigned the same PE. + */ + gpu_pdev = pnv_pci_get_gpu_dev(npu_pdev); + for (pe_num = 0; pe_num < phb->ioda.total_pe; pe_num++) { + pe = &phb->ioda.pe_array[pe_num]; + if (!pe->pdev) + continue; + + if (pnv_pci_get_gpu_dev(pe->pdev) == gpu_pdev) { + /* + * This device has the same peer GPU so should + * be assigned the same PE as the existing + * peer NPU. + */ + dev_info(&npu_pdev->dev, + "Associating to existing PE %d\n", pe_num); + pci_dev_get(npu_pdev); + npu_pdn = pci_get_pdn(npu_pdev); + rid = npu_pdev->bus->number << 8 | npu_pdn->devfn; + npu_pdn->pcidev = npu_pdev; + npu_pdn->pe_number = pe_num; + pe->dma_weight += pnv_ioda_dma_weight(npu_pdev); + phb->ioda.pe_rmap[rid] = pe->pe_number; + + /* Map the PE to this link */ + rc = opal_pci_set_pe(phb->opal_id, pe_num, rid, + OpalPciBusAll, + OPAL_COMPARE_RID_DEVICE_NUMBER, + OPAL_COMPARE_RID_FUNCTION_NUMBER, + OPAL_MAP_PE); + WARN_ON(rc != OPAL_SUCCESS); + found_pe = true; + break; + } + } + + if (!found_pe) + /* + * Could not find an existing PE so allocate a new + * one. + */ + return pnv_ioda_setup_dev_PE(npu_pdev); + else + return pe; +} + +static void pnv_ioda_setup_npu_PEs(struct pci_bus *bus) +{ + struct pci_dev *pdev; + + list_for_each_entry(pdev, &bus->devices, bus_list) + pnv_ioda_setup_npu_PE(pdev); +} + static void pnv_ioda_setup_PEs(struct pci_bus *bus) { struct pci_dev *dev; @@ -1119,7 +1181,17 @@ static void pnv_pci_ioda_setup_PEs(void) if (phb->reserve_m64_pe) phb->reserve_m64_pe(hose->bus, NULL, true); - pnv_ioda_setup_PEs(hose->bus); + /* + * On NPU PHB, we expect separate PEs for individual PCI + * functions. PCI bus dependent PEs are required for the + * remaining types of PHBs. + */ + if (phb->type == PNV_PHB_NPU) { + /* PE#0 is needed for error reporting */ + pnv_ioda_reserve_pe(phb, 0); + pnv_ioda_setup_npu_PEs(hose->bus); + } else + pnv_ioda_setup_PEs(hose->bus); } } @@ -1578,6 +1650,8 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) struct pnv_ioda_pe *pe; uint64_t top; bool bypass = false; + struct pci_dev *linked_npu_dev; + int i; if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) return -ENODEV;; @@ -1596,6 +1670,18 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) set_dma_ops(&pdev->dev, &dma_iommu_ops); } *pdev->dev.dma_mask = dma_mask; + + /* Update peer npu devices */ + if (pe->flags & PNV_IODA_PE_PEER) + for (i = 0; i < PNV_IODA_MAX_PEER_PES; i++) { + if (!pe->peers[i]) + continue; + + linked_npu_dev = pe->peers[i]->pdev; + if (dma_get_mask(&linked_npu_dev->dev) != dma_mask) + dma_set_mask(&linked_npu_dev->dev, dma_mask); + } + return 0; } @@ -1740,12 +1826,23 @@ static inline void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_ioda_pe *pe) /* 01xb - invalidate TCEs that match the specified PE# */ unsigned long val = (0x4ull << 60) | (pe->pe_number & 0xFF); struct pnv_phb *phb = pe->phb; + struct pnv_ioda_pe *npe; + int i; if (!phb->ioda.tce_inval_reg) return; mb(); /* Ensure above stores are visible */ __raw_writeq(cpu_to_be64(val), phb->ioda.tce_inval_reg); + + if (pe->flags & PNV_IODA_PE_PEER) + for (i = 0; i < PNV_IODA_MAX_PEER_PES; i++) { + npe = pe->peers[i]; + if (!npe || npe->phb->type != PNV_PHB_NPU) + continue; + + pnv_npu_tce_invalidate_entire(npe); + } } static void pnv_pci_ioda2_do_tce_invalidate(unsigned pe_number, bool rm, @@ -1780,15 +1877,28 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, struct iommu_table_group_link *tgl; list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) { + struct pnv_ioda_pe *npe; struct pnv_ioda_pe *pe = container_of(tgl->table_group, struct pnv_ioda_pe, table_group); __be64 __iomem *invalidate = rm ? (__be64 __iomem *)pe->phb->ioda.tce_inval_reg_phys : pe->phb->ioda.tce_inval_reg; + int i; pnv_pci_ioda2_do_tce_invalidate(pe->pe_number, rm, invalidate, tbl->it_page_shift, index, npages); + + if (pe->flags & PNV_IODA_PE_PEER) + /* Invalidate PEs using the same TCE table */ + for (i = 0; i < PNV_IODA_MAX_PEER_PES; i++) { + npe = pe->peers[i]; + if (!npe || npe->phb->type != PNV_PHB_NPU) + continue; + + pnv_npu_tce_invalidate(npe, tbl, index, + npages, rm); + } } } @@ -2436,10 +2546,17 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb) pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n", pe->dma_weight, segs); pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs); - } else { + } else if (phb->type == PNV_PHB_IODA2) { pe_info(pe, "Assign DMA32 space\n"); segs = 0; pnv_pci_ioda2_setup_dma_pe(phb, pe); + } else if (phb->type == PNV_PHB_NPU) { + /* + * We initialise the DMA space for an NPU PHB + * after setup of the PHB is complete as we + * point the NPU TVT to the the same location + * as the PHB3 TVT. + */ } remaining -= segs; @@ -2881,6 +2998,11 @@ static void pnv_pci_ioda_setup_seg(void) list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { phb = hose->private_data; + + /* NPU PHB does not support IO or MMIO segmentation */ + if (phb->type == PNV_PHB_NPU) + continue; + list_for_each_entry(pe, &phb->ioda.pe_list, list) { pnv_ioda_setup_pe_seg(hose, pe); } @@ -2920,6 +3042,27 @@ static void pnv_pci_ioda_create_dbgfs(void) #endif /* CONFIG_DEBUG_FS */ } +static void pnv_npu_ioda_fixup(void) +{ + bool enable_bypass; + struct pci_controller *hose, *tmp; + struct pnv_phb *phb; + struct pnv_ioda_pe *pe; + + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { + phb = hose->private_data; + if (phb->type != PNV_PHB_NPU) + continue; + + list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) { + enable_bypass = dma_get_mask(&pe->pdev->dev) == + DMA_BIT_MASK(64); + pnv_npu_init_dma_pe(pe); + pnv_npu_dma_set_bypass(pe, enable_bypass); + } + } +} + static void pnv_pci_ioda_fixup(void) { pnv_pci_ioda_setup_PEs(); @@ -2932,6 +3075,9 @@ static void pnv_pci_ioda_fixup(void) eeh_init(); eeh_addr_cache_build(); #endif + + /* Link NPU IODA tables to their PCI devices. */ + pnv_npu_ioda_fixup(); } /* @@ -3046,6 +3192,19 @@ static const struct pci_controller_ops pnv_pci_ioda_controller_ops = { .shutdown = pnv_pci_ioda_shutdown, }; +static const struct pci_controller_ops pnv_npu_ioda_controller_ops = { + .dma_dev_setup = pnv_pci_dma_dev_setup, +#ifdef CONFIG_PCI_MSI + .setup_msi_irqs = pnv_setup_msi_irqs, + .teardown_msi_irqs = pnv_teardown_msi_irqs, +#endif + .enable_device_hook = pnv_pci_enable_device_hook, + .window_alignment = pnv_pci_window_alignment, + .reset_secondary_bus = pnv_pci_reset_secondary_bus, + .dma_set_mask = pnv_npu_dma_set_mask, + .shutdown = pnv_pci_ioda_shutdown, +}; + static void __init pnv_pci_init_ioda_phb(struct device_node *np, u64 hub_id, int ioda_type) { @@ -3101,6 +3260,8 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, phb->model = PNV_PHB_MODEL_P7IOC; else if (of_device_is_compatible(np, "ibm,power8-pciex")) phb->model = PNV_PHB_MODEL_PHB3; + else if (of_device_is_compatible(np, "ibm,power8-npu-pciex")) + phb->model = PNV_PHB_MODEL_NPU; else phb->model = PNV_PHB_MODEL_UNKNOWN; @@ -3201,7 +3362,11 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, * the child P2P bridges) can form individual PE. */ ppc_md.pcibios_fixup = pnv_pci_ioda_fixup; - hose->controller_ops = pnv_pci_ioda_controller_ops; + + if (phb->type == PNV_PHB_NPU) + hose->controller_ops = pnv_npu_ioda_controller_ops; + else + hose->controller_ops = pnv_pci_ioda_controller_ops; #ifdef CONFIG_PCI_IOV ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov_resources; @@ -3236,6 +3401,11 @@ void __init pnv_pci_init_ioda2_phb(struct device_node *np) pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2); } +void __init pnv_pci_init_npu_phb(struct device_node *np) +{ + pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU); +} + void __init pnv_pci_init_ioda_hub(struct device_node *np) { struct device_node *phbn; diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index f2dd77234240..2f55c86df703 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -1,8 +1,6 @@ /* * Support PCI/PCIe on PowerNV platforms * - * Currently supports only P5IOC2 - * * Copyright 2011 Benjamin Herrenschmidt, IBM Corp. * * This program is free software; you can redistribute it and/or @@ -807,6 +805,10 @@ void __init pnv_pci_init(void) for_each_compatible_node(np, NULL, "ibm,ioda2-phb") pnv_pci_init_ioda2_phb(np); + /* Look for NPU PHBs */ + for_each_compatible_node(np, NULL, "ibm,ioda2-npu-phb") + pnv_pci_init_npu_phb(np); + /* Setup the linkage between OF nodes and PHBs */ pci_devs_phb_init(); diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index c8ff50e90766..7f56313e8d72 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -7,6 +7,7 @@ enum pnv_phb_type { PNV_PHB_P5IOC2 = 0, PNV_PHB_IODA1 = 1, PNV_PHB_IODA2 = 2, + PNV_PHB_NPU = 3, }; /* Precise PHB model for error management */ @@ -15,6 +16,7 @@ enum pnv_phb_model { PNV_PHB_MODEL_P5IOC2, PNV_PHB_MODEL_P7IOC, PNV_PHB_MODEL_PHB3, + PNV_PHB_MODEL_NPU, }; #define PNV_PCI_DIAG_BUF_SIZE 8192 @@ -24,6 +26,7 @@ enum pnv_phb_model { #define PNV_IODA_PE_MASTER (1 << 3) /* Master PE in compound case */ #define PNV_IODA_PE_SLAVE (1 << 4) /* Slave PE in compound case */ #define PNV_IODA_PE_VF (1 << 5) /* PE for one VF */ +#define PNV_IODA_PE_PEER (1 << 6) /* PE has peers */ /* Data associated with a PE, including IOMMU tracking etc.. */ struct pnv_phb; @@ -31,6 +34,9 @@ struct pnv_ioda_pe { unsigned long flags; struct pnv_phb *phb; +#define PNV_IODA_MAX_PEER_PES 8 + struct pnv_ioda_pe *peers[PNV_IODA_MAX_PEER_PES]; + /* A PE can be associated with a single device or an * entire bus (& children). In the former case, pdev * is populated, in the later case, pbus is. @@ -229,6 +235,7 @@ extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl, extern void pnv_pci_init_p5ioc2_hub(struct device_node *np); extern void pnv_pci_init_ioda_hub(struct device_node *np); extern void pnv_pci_init_ioda2_phb(struct device_node *np); +extern void pnv_pci_init_npu_phb(struct device_node *np); extern void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl, __be64 *startp, __be64 *endp, bool rm); extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev); @@ -238,4 +245,16 @@ extern void pnv_pci_dma_dev_setup(struct pci_dev *pdev); extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type); extern void pnv_teardown_msi_irqs(struct pci_dev *pdev); +/* Nvlink functions */ +extern void pnv_npu_tce_invalidate_entire(struct pnv_ioda_pe *npe); +extern void pnv_npu_tce_invalidate(struct pnv_ioda_pe *npe, + struct iommu_table *tbl, + unsigned long index, + unsigned long npages, + bool rm); +extern void pnv_npu_init_dma_pe(struct pnv_ioda_pe *npe); +extern void pnv_npu_setup_dma_pe(struct pnv_ioda_pe *npe); +extern int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe, bool enabled); +extern int pnv_npu_dma_set_mask(struct pci_dev *npdev, u64 dma_mask); + #endif /* __POWERNV_PCI_H */ diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index a9a8fa37a555..1acb0c72d923 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -90,12 +90,8 @@ static void pnv_show_cpuinfo(struct seq_file *m) if (root) model = of_get_property(root, "model", NULL); seq_printf(m, "machine\t\t: PowerNV %s\n", model); - if (firmware_has_feature(FW_FEATURE_OPALv3)) - seq_printf(m, "firmware\t: OPAL v3\n"); - else if (firmware_has_feature(FW_FEATURE_OPALv2)) - seq_printf(m, "firmware\t: OPAL v2\n"); - else if (firmware_has_feature(FW_FEATURE_OPAL)) - seq_printf(m, "firmware\t: OPAL v1\n"); + if (firmware_has_feature(FW_FEATURE_OPAL)) + seq_printf(m, "firmware\t: OPAL\n"); else seq_printf(m, "firmware\t: BML\n"); of_node_put(root); @@ -224,9 +220,9 @@ static void pnv_kexec_cpu_down(int crash_shutdown, int secondary) { xics_kexec_teardown_cpu(secondary); - /* On OPAL v3, we return all CPUs to firmware */ + /* On OPAL, we return all CPUs to firmware */ - if (!firmware_has_feature(FW_FEATURE_OPALv3)) + if (!firmware_has_feature(FW_FEATURE_OPAL)) return; if (secondary) { diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index ca264833ee64..ad7b1a3dbed0 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -61,14 +61,15 @@ static int pnv_smp_kick_cpu(int nr) unsigned long start_here = __pa(ppc_function_entry(generic_secondary_smp_init)); long rc; + uint8_t status; BUG_ON(nr < 0 || nr >= NR_CPUS); /* - * If we already started or OPALv2 is not supported, we just + * If we already started or OPAL is not supported, we just * kick the CPU via the PACA */ - if (paca[nr].cpu_start || !firmware_has_feature(FW_FEATURE_OPALv2)) + if (paca[nr].cpu_start || !firmware_has_feature(FW_FEATURE_OPAL)) goto kick; /* @@ -77,55 +78,42 @@ static int pnv_smp_kick_cpu(int nr) * first time. OPAL v3 allows us to query OPAL to know if it * has the CPUs, so we do that */ - if (firmware_has_feature(FW_FEATURE_OPALv3)) { - uint8_t status; - - rc = opal_query_cpu_status(pcpu, &status); - if (rc != OPAL_SUCCESS) { - pr_warn("OPAL Error %ld querying CPU %d state\n", - rc, nr); - return -ENODEV; - } + rc = opal_query_cpu_status(pcpu, &status); + if (rc != OPAL_SUCCESS) { + pr_warn("OPAL Error %ld querying CPU %d state\n", rc, nr); + return -ENODEV; + } - /* - * Already started, just kick it, probably coming from - * kexec and spinning - */ - if (status == OPAL_THREAD_STARTED) - goto kick; + /* + * Already started, just kick it, probably coming from + * kexec and spinning + */ + if (status == OPAL_THREAD_STARTED) + goto kick; - /* - * Available/inactive, let's kick it - */ - if (status == OPAL_THREAD_INACTIVE) { - pr_devel("OPAL: Starting CPU %d (HW 0x%x)...\n", - nr, pcpu); - rc = opal_start_cpu(pcpu, start_here); - if (rc != OPAL_SUCCESS) { - pr_warn("OPAL Error %ld starting CPU %d\n", - rc, nr); - return -ENODEV; - } - } else { - /* - * An unavailable CPU (or any other unknown status) - * shouldn't be started. It should also - * not be in the possible map but currently it can - * happen - */ - pr_devel("OPAL: CPU %d (HW 0x%x) is unavailable" - " (status %d)...\n", nr, pcpu, status); + /* + * Available/inactive, let's kick it + */ + if (status == OPAL_THREAD_INACTIVE) { + pr_devel("OPAL: Starting CPU %d (HW 0x%x)...\n", nr, pcpu); + rc = opal_start_cpu(pcpu, start_here); + if (rc != OPAL_SUCCESS) { + pr_warn("OPAL Error %ld starting CPU %d\n", rc, nr); return -ENODEV; } } else { /* - * On OPAL v2, we just kick it and hope for the best, - * we must not test the error from opal_start_cpu() or - * we would fail to get CPUs from kexec. + * An unavailable CPU (or any other unknown status) + * shouldn't be started. It should also + * not be in the possible map but currently it can + * happen */ - opal_start_cpu(pcpu, start_here); + pr_devel("OPAL: CPU %d (HW 0x%x) is unavailable" + " (status %d)...\n", nr, pcpu, status); + return -ENODEV; } - kick: + +kick: return smp_generic_kick_cpu(nr); } diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index f244dcb4f2cf..2b93ae8d557a 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -20,7 +20,6 @@ #include <linux/of.h> #include "of_helpers.h" -#include "offline_states.h" #include "pseries.h" #include <asm/prom.h> @@ -338,185 +337,6 @@ int dlpar_release_drc(u32 drc_index) return 0; } -#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE - -static int dlpar_online_cpu(struct device_node *dn) -{ - int rc = 0; - unsigned int cpu; - int len, nthreads, i; - const __be32 *intserv; - u32 thread; - - intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len); - if (!intserv) - return -EINVAL; - - nthreads = len / sizeof(u32); - - cpu_maps_update_begin(); - for (i = 0; i < nthreads; i++) { - thread = be32_to_cpu(intserv[i]); - for_each_present_cpu(cpu) { - if (get_hard_smp_processor_id(cpu) != thread) - continue; - BUG_ON(get_cpu_current_state(cpu) - != CPU_STATE_OFFLINE); - cpu_maps_update_done(); - rc = device_online(get_cpu_device(cpu)); - if (rc) - goto out; - cpu_maps_update_begin(); - - break; - } - if (cpu == num_possible_cpus()) - printk(KERN_WARNING "Could not find cpu to online " - "with physical id 0x%x\n", thread); - } - cpu_maps_update_done(); - -out: - return rc; - -} - -static ssize_t dlpar_cpu_probe(const char *buf, size_t count) -{ - struct device_node *dn, *parent; - u32 drc_index; - int rc; - - rc = kstrtou32(buf, 0, &drc_index); - if (rc) - return -EINVAL; - - rc = dlpar_acquire_drc(drc_index); - if (rc) - return -EINVAL; - - parent = of_find_node_by_path("/cpus"); - if (!parent) - return -ENODEV; - - dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent); - of_node_put(parent); - if (!dn) { - dlpar_release_drc(drc_index); - return -EINVAL; - } - - rc = dlpar_attach_node(dn); - if (rc) { - dlpar_release_drc(drc_index); - dlpar_free_cc_nodes(dn); - return rc; - } - - rc = dlpar_online_cpu(dn); - if (rc) - return rc; - - return count; -} - -static int dlpar_offline_cpu(struct device_node *dn) -{ - int rc = 0; - unsigned int cpu; - int len, nthreads, i; - const __be32 *intserv; - u32 thread; - - intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len); - if (!intserv) - return -EINVAL; - - nthreads = len / sizeof(u32); - - cpu_maps_update_begin(); - for (i = 0; i < nthreads; i++) { - thread = be32_to_cpu(intserv[i]); - for_each_present_cpu(cpu) { - if (get_hard_smp_processor_id(cpu) != thread) - continue; - - if (get_cpu_current_state(cpu) == CPU_STATE_OFFLINE) - break; - - if (get_cpu_current_state(cpu) == CPU_STATE_ONLINE) { - set_preferred_offline_state(cpu, CPU_STATE_OFFLINE); - cpu_maps_update_done(); - rc = device_offline(get_cpu_device(cpu)); - if (rc) - goto out; - cpu_maps_update_begin(); - break; - - } - - /* - * The cpu is in CPU_STATE_INACTIVE. - * Upgrade it's state to CPU_STATE_OFFLINE. - */ - set_preferred_offline_state(cpu, CPU_STATE_OFFLINE); - BUG_ON(plpar_hcall_norets(H_PROD, thread) - != H_SUCCESS); - __cpu_die(cpu); - break; - } - if (cpu == num_possible_cpus()) - printk(KERN_WARNING "Could not find cpu to offline " - "with physical id 0x%x\n", thread); - } - cpu_maps_update_done(); - -out: - return rc; - -} - -static ssize_t dlpar_cpu_release(const char *buf, size_t count) -{ - struct device_node *dn; - u32 drc_index; - int rc; - - dn = of_find_node_by_path(buf); - if (!dn) - return -EINVAL; - - rc = of_property_read_u32(dn, "ibm,my-drc-index", &drc_index); - if (rc) { - of_node_put(dn); - return -EINVAL; - } - - rc = dlpar_offline_cpu(dn); - if (rc) { - of_node_put(dn); - return -EINVAL; - } - - rc = dlpar_release_drc(drc_index); - if (rc) { - of_node_put(dn); - return rc; - } - - rc = dlpar_detach_node(dn); - if (rc) { - dlpar_acquire_drc(drc_index); - return rc; - } - - of_node_put(dn); - - return count; -} - -#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */ - static int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog) { int rc; @@ -536,6 +356,9 @@ static int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog) case PSERIES_HP_ELOG_RESOURCE_MEM: rc = dlpar_memory(hp_elog); break; + case PSERIES_HP_ELOG_RESOURCE_CPU: + rc = dlpar_cpu(hp_elog); + break; default: pr_warn_ratelimited("Invalid resource (%d) specified\n", hp_elog->resource); @@ -565,6 +388,9 @@ static ssize_t dlpar_store(struct class *class, struct class_attribute *attr, if (!strncmp(arg, "memory", 6)) { hp_elog->resource = PSERIES_HP_ELOG_RESOURCE_MEM; arg += strlen("memory "); + } else if (!strncmp(arg, "cpu", 3)) { + hp_elog->resource = PSERIES_HP_ELOG_RESOURCE_CPU; + arg += strlen("cpu "); } else { pr_err("Invalid resource specified: \"%s\"\n", buf); rc = -EINVAL; @@ -624,16 +450,7 @@ static CLASS_ATTR(dlpar, S_IWUSR, NULL, dlpar_store); static int __init pseries_dlpar_init(void) { - int rc; - -#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE - ppc_md.cpu_probe = dlpar_cpu_probe; - ppc_md.cpu_release = dlpar_cpu_release; -#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */ - - rc = sysfs_create_file(kernel_kobj, &class_attr_dlpar.attr); - - return rc; + return sysfs_create_file(kernel_kobj, &class_attr_dlpar.attr); } machine_device_initcall(pseries, pseries_dlpar_init); diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index 62475440fd45..32274f72fe3f 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -18,12 +18,15 @@ * 2 of the License, or (at your option) any later version. */ +#define pr_fmt(fmt) "pseries-hotplug-cpu: " fmt + #include <linux/kernel.h> #include <linux/interrupt.h> #include <linux/delay.h> #include <linux/sched.h> /* for idle_task_exit */ #include <linux/cpu.h> #include <linux/of.h> +#include <linux/slab.h> #include <asm/prom.h> #include <asm/rtas.h> #include <asm/firmware.h> @@ -32,6 +35,7 @@ #include <asm/xics.h> #include <asm/plpar_wrappers.h> +#include "pseries.h" #include "offline_states.h" /* This version can't take the spinlock, because it never returns */ @@ -88,13 +92,7 @@ void set_default_offline_state(int cpu) static void rtas_stop_self(void) { - static struct rtas_args args = { - .nargs = 0, - .nret = cpu_to_be32(1), - .rets = &args.args[0], - }; - - args.token = cpu_to_be32(rtas_stop_self_token); + static struct rtas_args args; local_irq_disable(); @@ -102,7 +100,8 @@ static void rtas_stop_self(void) printk("cpu %u (hwid %u) Ready to die...\n", smp_processor_id(), hard_smp_processor_id()); - enter_rtas(__pa(&args)); + + rtas_call_unlocked(&args, rtas_stop_self_token, 0, 1, NULL); panic("Alas, I survived.\n"); } @@ -339,6 +338,536 @@ static void pseries_remove_processor(struct device_node *np) cpu_maps_update_done(); } +static int dlpar_online_cpu(struct device_node *dn) +{ + int rc = 0; + unsigned int cpu; + int len, nthreads, i; + const __be32 *intserv; + u32 thread; + + intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len); + if (!intserv) + return -EINVAL; + + nthreads = len / sizeof(u32); + + cpu_maps_update_begin(); + for (i = 0; i < nthreads; i++) { + thread = be32_to_cpu(intserv[i]); + for_each_present_cpu(cpu) { + if (get_hard_smp_processor_id(cpu) != thread) + continue; + BUG_ON(get_cpu_current_state(cpu) + != CPU_STATE_OFFLINE); + cpu_maps_update_done(); + rc = device_online(get_cpu_device(cpu)); + if (rc) + goto out; + cpu_maps_update_begin(); + + break; + } + if (cpu == num_possible_cpus()) + printk(KERN_WARNING "Could not find cpu to online " + "with physical id 0x%x\n", thread); + } + cpu_maps_update_done(); + +out: + return rc; + +} + +static bool dlpar_cpu_exists(struct device_node *parent, u32 drc_index) +{ + struct device_node *child = NULL; + u32 my_drc_index; + bool found; + int rc; + + /* Assume cpu doesn't exist */ + found = false; + + for_each_child_of_node(parent, child) { + rc = of_property_read_u32(child, "ibm,my-drc-index", + &my_drc_index); + if (rc) + continue; + + if (my_drc_index == drc_index) { + of_node_put(child); + found = true; + break; + } + } + + return found; +} + +static bool valid_cpu_drc_index(struct device_node *parent, u32 drc_index) +{ + bool found = false; + int rc, index; + + index = 0; + while (!found) { + u32 drc; + + rc = of_property_read_u32_index(parent, "ibm,drc-indexes", + index++, &drc); + if (rc) + break; + + if (drc == drc_index) + found = true; + } + + return found; +} + +static ssize_t dlpar_cpu_add(u32 drc_index) +{ + struct device_node *dn, *parent; + int rc, saved_rc; + + pr_debug("Attempting to add CPU, drc index: %x\n", drc_index); + + parent = of_find_node_by_path("/cpus"); + if (!parent) { + pr_warn("Failed to find CPU root node \"/cpus\"\n"); + return -ENODEV; + } + + if (dlpar_cpu_exists(parent, drc_index)) { + of_node_put(parent); + pr_warn("CPU with drc index %x already exists\n", drc_index); + return -EINVAL; + } + + if (!valid_cpu_drc_index(parent, drc_index)) { + of_node_put(parent); + pr_warn("Cannot find CPU (drc index %x) to add.\n", drc_index); + return -EINVAL; + } + + rc = dlpar_acquire_drc(drc_index); + if (rc) { + pr_warn("Failed to acquire DRC, rc: %d, drc index: %x\n", + rc, drc_index); + of_node_put(parent); + return -EINVAL; + } + + dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent); + of_node_put(parent); + if (!dn) { + pr_warn("Failed call to configure-connector, drc index: %x\n", + drc_index); + dlpar_release_drc(drc_index); + return -EINVAL; + } + + rc = dlpar_attach_node(dn); + if (rc) { + saved_rc = rc; + pr_warn("Failed to attach node %s, rc: %d, drc index: %x\n", + dn->name, rc, drc_index); + + rc = dlpar_release_drc(drc_index); + if (!rc) + dlpar_free_cc_nodes(dn); + + return saved_rc; + } + + rc = dlpar_online_cpu(dn); + if (rc) { + saved_rc = rc; + pr_warn("Failed to online cpu %s, rc: %d, drc index: %x\n", + dn->name, rc, drc_index); + + rc = dlpar_detach_node(dn); + if (!rc) + dlpar_release_drc(drc_index); + + return saved_rc; + } + + pr_debug("Successfully added CPU %s, drc index: %x\n", dn->name, + drc_index); + return rc; +} + +static int dlpar_offline_cpu(struct device_node *dn) +{ + int rc = 0; + unsigned int cpu; + int len, nthreads, i; + const __be32 *intserv; + u32 thread; + + intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len); + if (!intserv) + return -EINVAL; + + nthreads = len / sizeof(u32); + + cpu_maps_update_begin(); + for (i = 0; i < nthreads; i++) { + thread = be32_to_cpu(intserv[i]); + for_each_present_cpu(cpu) { + if (get_hard_smp_processor_id(cpu) != thread) + continue; + + if (get_cpu_current_state(cpu) == CPU_STATE_OFFLINE) + break; + + if (get_cpu_current_state(cpu) == CPU_STATE_ONLINE) { + set_preferred_offline_state(cpu, + CPU_STATE_OFFLINE); + cpu_maps_update_done(); + rc = device_offline(get_cpu_device(cpu)); + if (rc) + goto out; + cpu_maps_update_begin(); + break; + + } + + /* + * The cpu is in CPU_STATE_INACTIVE. + * Upgrade it's state to CPU_STATE_OFFLINE. + */ + set_preferred_offline_state(cpu, CPU_STATE_OFFLINE); + BUG_ON(plpar_hcall_norets(H_PROD, thread) + != H_SUCCESS); + __cpu_die(cpu); + break; + } + if (cpu == num_possible_cpus()) + printk(KERN_WARNING "Could not find cpu to offline with physical id 0x%x\n", thread); + } + cpu_maps_update_done(); + +out: + return rc; + +} + +static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index) +{ + int rc; + + pr_debug("Attemping to remove CPU %s, drc index: %x\n", + dn->name, drc_index); + + rc = dlpar_offline_cpu(dn); + if (rc) { + pr_warn("Failed to offline CPU %s, rc: %d\n", dn->name, rc); + return -EINVAL; + } + + rc = dlpar_release_drc(drc_index); + if (rc) { + pr_warn("Failed to release drc (%x) for CPU %s, rc: %d\n", + drc_index, dn->name, rc); + dlpar_online_cpu(dn); + return rc; + } + + rc = dlpar_detach_node(dn); + if (rc) { + int saved_rc = rc; + + pr_warn("Failed to detach CPU %s, rc: %d", dn->name, rc); + + rc = dlpar_acquire_drc(drc_index); + if (!rc) + dlpar_online_cpu(dn); + + return saved_rc; + } + + pr_debug("Successfully removed CPU, drc index: %x\n", drc_index); + return 0; +} + +static struct device_node *cpu_drc_index_to_dn(u32 drc_index) +{ + struct device_node *dn; + u32 my_index; + int rc; + + for_each_node_by_type(dn, "cpu") { + rc = of_property_read_u32(dn, "ibm,my-drc-index", &my_index); + if (rc) + continue; + + if (my_index == drc_index) + break; + } + + return dn; +} + +static int dlpar_cpu_remove_by_index(u32 drc_index) +{ + struct device_node *dn; + int rc; + + dn = cpu_drc_index_to_dn(drc_index); + if (!dn) { + pr_warn("Cannot find CPU (drc index %x) to remove\n", + drc_index); + return -ENODEV; + } + + rc = dlpar_cpu_remove(dn, drc_index); + of_node_put(dn); + return rc; +} + +static int find_dlpar_cpus_to_remove(u32 *cpu_drcs, int cpus_to_remove) +{ + struct device_node *dn; + int cpus_found = 0; + int rc; + + /* We want to find cpus_to_remove + 1 CPUs to ensure we do not + * remove the last CPU. + */ + for_each_node_by_type(dn, "cpu") { + cpus_found++; + + if (cpus_found > cpus_to_remove) { + of_node_put(dn); + break; + } + + /* Note that cpus_found is always 1 ahead of the index + * into the cpu_drcs array, so we use cpus_found - 1 + */ + rc = of_property_read_u32(dn, "ibm,my-drc-index", + &cpu_drcs[cpus_found - 1]); + if (rc) { + pr_warn("Error occurred getting drc-index for %s\n", + dn->name); + of_node_put(dn); + return -1; + } + } + + if (cpus_found < cpus_to_remove) { + pr_warn("Failed to find enough CPUs (%d of %d) to remove\n", + cpus_found, cpus_to_remove); + } else if (cpus_found == cpus_to_remove) { + pr_warn("Cannot remove all CPUs\n"); + } + + return cpus_found; +} + +static int dlpar_cpu_remove_by_count(u32 cpus_to_remove) +{ + u32 *cpu_drcs; + int cpus_found; + int cpus_removed = 0; + int i, rc; + + pr_debug("Attempting to hot-remove %d CPUs\n", cpus_to_remove); + + cpu_drcs = kcalloc(cpus_to_remove, sizeof(*cpu_drcs), GFP_KERNEL); + if (!cpu_drcs) + return -EINVAL; + + cpus_found = find_dlpar_cpus_to_remove(cpu_drcs, cpus_to_remove); + if (cpus_found <= cpus_to_remove) { + kfree(cpu_drcs); + return -EINVAL; + } + + for (i = 0; i < cpus_to_remove; i++) { + rc = dlpar_cpu_remove_by_index(cpu_drcs[i]); + if (rc) + break; + + cpus_removed++; + } + + if (cpus_removed != cpus_to_remove) { + pr_warn("CPU hot-remove failed, adding back removed CPUs\n"); + + for (i = 0; i < cpus_removed; i++) + dlpar_cpu_add(cpu_drcs[i]); + + rc = -EINVAL; + } else { + rc = 0; + } + + kfree(cpu_drcs); + return rc; +} + +static int find_dlpar_cpus_to_add(u32 *cpu_drcs, u32 cpus_to_add) +{ + struct device_node *parent; + int cpus_found = 0; + int index, rc; + + parent = of_find_node_by_path("/cpus"); + if (!parent) { + pr_warn("Could not find CPU root node in device tree\n"); + kfree(cpu_drcs); + return -1; + } + + /* Search the ibm,drc-indexes array for possible CPU drcs to + * add. Note that the format of the ibm,drc-indexes array is + * the number of entries in the array followed by the array + * of drc values so we start looking at index = 1. + */ + index = 1; + while (cpus_found < cpus_to_add) { + u32 drc; + + rc = of_property_read_u32_index(parent, "ibm,drc-indexes", + index++, &drc); + if (rc) + break; + + if (dlpar_cpu_exists(parent, drc)) + continue; + + cpu_drcs[cpus_found++] = drc; + } + + of_node_put(parent); + return cpus_found; +} + +static int dlpar_cpu_add_by_count(u32 cpus_to_add) +{ + u32 *cpu_drcs; + int cpus_added = 0; + int cpus_found; + int i, rc; + + pr_debug("Attempting to hot-add %d CPUs\n", cpus_to_add); + + cpu_drcs = kcalloc(cpus_to_add, sizeof(*cpu_drcs), GFP_KERNEL); + if (!cpu_drcs) + return -EINVAL; + + cpus_found = find_dlpar_cpus_to_add(cpu_drcs, cpus_to_add); + if (cpus_found < cpus_to_add) { + pr_warn("Failed to find enough CPUs (%d of %d) to add\n", + cpus_found, cpus_to_add); + kfree(cpu_drcs); + return -EINVAL; + } + + for (i = 0; i < cpus_to_add; i++) { + rc = dlpar_cpu_add(cpu_drcs[i]); + if (rc) + break; + + cpus_added++; + } + + if (cpus_added < cpus_to_add) { + pr_warn("CPU hot-add failed, removing any added CPUs\n"); + + for (i = 0; i < cpus_added; i++) + dlpar_cpu_remove_by_index(cpu_drcs[i]); + + rc = -EINVAL; + } else { + rc = 0; + } + + kfree(cpu_drcs); + return rc; +} + +int dlpar_cpu(struct pseries_hp_errorlog *hp_elog) +{ + u32 count, drc_index; + int rc; + + count = hp_elog->_drc_u.drc_count; + drc_index = hp_elog->_drc_u.drc_index; + + lock_device_hotplug(); + + switch (hp_elog->action) { + case PSERIES_HP_ELOG_ACTION_REMOVE: + if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT) + rc = dlpar_cpu_remove_by_count(count); + else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) + rc = dlpar_cpu_remove_by_index(drc_index); + else + rc = -EINVAL; + break; + case PSERIES_HP_ELOG_ACTION_ADD: + if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT) + rc = dlpar_cpu_add_by_count(count); + else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) + rc = dlpar_cpu_add(drc_index); + else + rc = -EINVAL; + break; + default: + pr_err("Invalid action (%d) specified\n", hp_elog->action); + rc = -EINVAL; + break; + } + + unlock_device_hotplug(); + return rc; +} + +#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE + +static ssize_t dlpar_cpu_probe(const char *buf, size_t count) +{ + u32 drc_index; + int rc; + + rc = kstrtou32(buf, 0, &drc_index); + if (rc) + return -EINVAL; + + rc = dlpar_cpu_add(drc_index); + + return rc ? rc : count; +} + +static ssize_t dlpar_cpu_release(const char *buf, size_t count) +{ + struct device_node *dn; + u32 drc_index; + int rc; + + dn = of_find_node_by_path(buf); + if (!dn) + return -EINVAL; + + rc = of_property_read_u32(dn, "ibm,my-drc-index", &drc_index); + if (rc) { + of_node_put(dn); + return -EINVAL; + } + + rc = dlpar_cpu_remove(dn, drc_index); + of_node_put(dn); + + return rc ? rc : count; +} + +#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */ + static int pseries_smp_notifier(struct notifier_block *nb, unsigned long action, void *data) { @@ -385,6 +914,11 @@ static int __init pseries_cpu_hotplug_init(void) int cpu; int qcss_tok; +#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE + ppc_md.cpu_probe = dlpar_cpu_probe; + ppc_md.cpu_release = dlpar_cpu_release; +#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */ + for_each_node_by_name(np, "interrupt-controller") { typep = of_get_property(np, "compatible", NULL); if (strstr(typep, "open-pic")) { diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index b7a67e3d2201..477290ad855e 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -315,48 +315,48 @@ static long pSeries_lpar_hpte_updatepp(unsigned long slot, return 0; } -static unsigned long pSeries_lpar_hpte_getword0(unsigned long slot) +static long __pSeries_lpar_hpte_find(unsigned long want_v, unsigned long hpte_group) { - unsigned long dword0; - unsigned long lpar_rc; - unsigned long dummy_word1; - unsigned long flags; + long lpar_rc; + unsigned long i, j; + struct { + unsigned long pteh; + unsigned long ptel; + } ptes[4]; - /* Read 1 pte at a time */ - /* Do not need RPN to logical page translation */ - /* No cross CEC PFT access */ - flags = 0; + for (i = 0; i < HPTES_PER_GROUP; i += 4, hpte_group += 4) { - lpar_rc = plpar_pte_read(flags, slot, &dword0, &dummy_word1); + lpar_rc = plpar_pte_read_4(0, hpte_group, (void *)ptes); + if (lpar_rc != H_SUCCESS) + continue; - BUG_ON(lpar_rc != H_SUCCESS); + for (j = 0; j < 4; j++) { + if (HPTE_V_COMPARE(ptes[j].pteh, want_v) && + (ptes[j].pteh & HPTE_V_VALID)) + return i + j; + } + } - return dword0; + return -1; } static long pSeries_lpar_hpte_find(unsigned long vpn, int psize, int ssize) { - unsigned long hash; - unsigned long i; long slot; - unsigned long want_v, hpte_v; + unsigned long hash; + unsigned long want_v; + unsigned long hpte_group; hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize); want_v = hpte_encode_avpn(vpn, psize, ssize); /* Bolted entries are always in the primary group */ - slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; - for (i = 0; i < HPTES_PER_GROUP; i++) { - hpte_v = pSeries_lpar_hpte_getword0(slot); - - if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) - /* HPTE matches */ - return slot; - ++slot; - } - - return -1; -} + hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP; + slot = __pSeries_lpar_hpte_find(want_v, hpte_group); + if (slot < 0) + return -1; + return hpte_group + slot; +} static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, @@ -396,6 +396,7 @@ static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn, BUG_ON(lpar_rc != H_SUCCESS); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE /* * Limit iterations holding pSeries_lpar_tlbie_lock to 3. We also need * to make sure that we avoid bouncing the hypervisor tlbie lock. @@ -494,6 +495,15 @@ static void pSeries_lpar_hugepage_invalidate(unsigned long vsid, __pSeries_lpar_hugepage_invalidate(slot_array, vpn_array, index, psize, ssize); } +#else +static void pSeries_lpar_hugepage_invalidate(unsigned long vsid, + unsigned long addr, + unsigned char *hpte_slot_array, + int psize, int ssize, int local) +{ + WARN(1, "%s called without THP support\n", __func__); +} +#endif static void pSeries_lpar_hpte_removebolted(unsigned long ea, int psize, int ssize) diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 8411c27293e4..7aa83f00ac62 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -73,6 +73,15 @@ static inline int dlpar_memory(struct pseries_hp_errorlog *hp_elog) } #endif +#ifdef CONFIG_HOTPLUG_CPU +int dlpar_cpu(struct pseries_hp_errorlog *hp_elog); +#else +static inline int dlpar_cpu(struct pseries_hp_errorlog *hp_elog) +{ + return -EOPNOTSUPP; +} +#endif + /* PCI root bridge prepare function override for pseries */ struct pci_host_bridge; int pseries_root_bridge_prepare(struct pci_host_bridge *bridge); diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 3b6647e574b6..9a3e27b863ce 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -40,6 +40,9 @@ static int ras_check_exception_token; #define EPOW_SENSOR_TOKEN 9 #define EPOW_SENSOR_INDEX 0 +/* EPOW events counter variable */ +static int num_epow_events; + static irqreturn_t ras_epow_interrupt(int irq, void *dev_id); static irqreturn_t ras_error_interrupt(int irq, void *dev_id); @@ -82,32 +85,30 @@ static void handle_system_shutdown(char event_modifier) { switch (event_modifier) { case EPOW_SHUTDOWN_NORMAL: - pr_emerg("Firmware initiated power off"); + pr_emerg("Power off requested\n"); orderly_poweroff(true); break; case EPOW_SHUTDOWN_ON_UPS: - pr_emerg("Loss of power reported by firmware, system is " - "running on UPS/battery"); - pr_emerg("Check RTAS error log for details"); + pr_emerg("Loss of system power detected. System is running on" + " UPS/battery. Check RTAS error log for details\n"); orderly_poweroff(true); break; case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS: - pr_emerg("Loss of system critical functions reported by " - "firmware"); - pr_emerg("Check RTAS error log for details"); + pr_emerg("Loss of system critical functions detected. Check" + " RTAS error log for details\n"); orderly_poweroff(true); break; case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH: - pr_emerg("Ambient temperature too high reported by firmware"); - pr_emerg("Check RTAS error log for details"); + pr_emerg("High ambient temperature detected. Check RTAS" + " error log for details\n"); orderly_poweroff(true); break; default: - pr_err("Unknown power/cooling shutdown event (modifier %d)", + pr_err("Unknown power/cooling shutdown event (modifier = %d)\n", event_modifier); } } @@ -145,17 +146,20 @@ static void rtas_parse_epow_errlog(struct rtas_error_log *log) switch (action_code) { case EPOW_RESET: - pr_err("Non critical power or cooling issue cleared"); + if (num_epow_events) { + pr_info("Non critical power/cooling issue cleared\n"); + num_epow_events--; + } break; case EPOW_WARN_COOLING: - pr_err("Non critical cooling issue reported by firmware"); - pr_err("Check RTAS error log for details"); + pr_info("Non-critical cooling issue detected. Check RTAS error" + " log for details\n"); break; case EPOW_WARN_POWER: - pr_err("Non critical power issue reported by firmware"); - pr_err("Check RTAS error log for details"); + pr_info("Non-critical power issue detected. Check RTAS error" + " log for details\n"); break; case EPOW_SYSTEM_SHUTDOWN: @@ -163,23 +167,27 @@ static void rtas_parse_epow_errlog(struct rtas_error_log *log) break; case EPOW_SYSTEM_HALT: - pr_emerg("Firmware initiated power off"); + pr_emerg("Critical power/cooling issue detected. Check RTAS" + " error log for details. Powering off.\n"); orderly_poweroff(true); break; case EPOW_MAIN_ENCLOSURE: case EPOW_POWER_OFF: - pr_emerg("Critical power/cooling issue reported by firmware"); - pr_emerg("Check RTAS error log for details"); - pr_emerg("Immediate power off"); + pr_emerg("System about to lose power. Check RTAS error log " + " for details. Powering off immediately.\n"); emergency_sync(); kernel_power_off(); break; default: - pr_err("Unknown power/cooling event (action code %d)", + pr_err("Unknown power/cooling event (action code = %d)\n", action_code); } + + /* Increment epow events counter variable */ + if (action_code != EPOW_RESET) + num_epow_events++; } /* Handle environmental and power warning (EPOW) interrupts. */ @@ -249,13 +257,12 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id) log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, fatal); if (fatal) { - pr_emerg("Fatal hardware error reported by firmware"); - pr_emerg("Check RTAS error log for details"); - pr_emerg("Immediate power off"); + pr_emerg("Fatal hardware error detected. Check RTAS error" + " log for details. Powering off immediately\n"); emergency_sync(); kernel_power_off(); } else { - pr_err("Recoverable hardware error reported by firmware"); + pr_err("Recoverable hardware error detected\n"); } spin_unlock(&ras_log_buf_lock); diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile index 5b492a6438ff..bd6bd729969c 100644 --- a/arch/powerpc/sysdev/Makefile +++ b/arch/powerpc/sysdev/Makefile @@ -26,7 +26,6 @@ obj-$(CONFIG_FSL_85XX_CACHE_SRAM) += fsl_85xx_l2ctlr.o fsl_85xx_cache_sram.o obj-$(CONFIG_SIMPLE_GPIO) += simple_gpio.o obj-$(CONFIG_FSL_RIO) += fsl_rio.o fsl_rmu.o obj-$(CONFIG_TSI108_BRIDGE) += tsi108_pci.o tsi108_dev.o -obj-$(CONFIG_QUICC_ENGINE) += qe_lib/ mv64x60-$(CONFIG_PCI) += mv64x60_pci.o obj-$(CONFIG_MV64X60) += $(mv64x60-y) mv64x60_pic.o mv64x60_dev.o \ mv64x60_udbg.o diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c index 7a399b4d60a0..0d112b94d91d 100644 --- a/arch/powerpc/sysdev/axonram.c +++ b/arch/powerpc/sysdev/axonram.c @@ -43,6 +43,7 @@ #include <linux/types.h> #include <linux/of_device.h> #include <linux/of_platform.h> +#include <linux/pfn_t.h> #include <asm/page.h> #include <asm/prom.h> @@ -142,15 +143,13 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio) */ static long axon_ram_direct_access(struct block_device *device, sector_t sector, - void __pmem **kaddr, unsigned long *pfn) + void __pmem **kaddr, pfn_t *pfn) { struct axon_ram_bank *bank = device->bd_disk->private_data; loff_t offset = (loff_t)sector << AXON_RAM_SECTOR_SHIFT; - void *addr = (void *)(bank->ph_addr + offset); - - *kaddr = (void __pmem *)addr; - *pfn = virt_to_phys(addr) >> PAGE_SHIFT; + *kaddr = (void __pmem __force *) bank->io_addr + offset; + *pfn = phys_to_pfn_t(bank->ph_addr + offset, PFN_DEV); return bank->size - offset; } @@ -313,6 +312,7 @@ static const struct of_device_id axon_ram_device_id[] = { }, {} }; +MODULE_DEVICE_TABLE(of, axon_ram_device_id); static struct platform_driver axon_ram_driver = { .probe = axon_ram_probe, diff --git a/arch/powerpc/sysdev/cpm_common.c b/arch/powerpc/sysdev/cpm_common.c index e00a5ee58fd7..9d32465eddb1 100644 --- a/arch/powerpc/sysdev/cpm_common.c +++ b/arch/powerpc/sysdev/cpm_common.c @@ -27,8 +27,8 @@ #include <asm/udbg.h> #include <asm/io.h> -#include <asm/rheap.h> #include <asm/cpm.h> +#include <soc/fsl/qe/qe.h> #include <mm/mmu_decl.h> @@ -65,162 +65,6 @@ void __init udbg_init_cpm(void) } #endif -static spinlock_t cpm_muram_lock; -static rh_block_t cpm_boot_muram_rh_block[16]; -static rh_info_t cpm_muram_info; -static u8 __iomem *muram_vbase; -static phys_addr_t muram_pbase; - -/* Max address size we deal with */ -#define OF_MAX_ADDR_CELLS 4 - -int cpm_muram_init(void) -{ - struct device_node *np; - struct resource r; - u32 zero[OF_MAX_ADDR_CELLS] = {}; - resource_size_t max = 0; - int i = 0; - int ret = 0; - - if (muram_pbase) - return 0; - - spin_lock_init(&cpm_muram_lock); - /* initialize the info header */ - rh_init(&cpm_muram_info, 1, - sizeof(cpm_boot_muram_rh_block) / - sizeof(cpm_boot_muram_rh_block[0]), - cpm_boot_muram_rh_block); - - np = of_find_compatible_node(NULL, NULL, "fsl,cpm-muram-data"); - if (!np) { - /* try legacy bindings */ - np = of_find_node_by_name(NULL, "data-only"); - if (!np) { - printk(KERN_ERR "Cannot find CPM muram data node"); - ret = -ENODEV; - goto out; - } - } - - muram_pbase = of_translate_address(np, zero); - if (muram_pbase == (phys_addr_t)OF_BAD_ADDR) { - printk(KERN_ERR "Cannot translate zero through CPM muram node"); - ret = -ENODEV; - goto out; - } - - while (of_address_to_resource(np, i++, &r) == 0) { - if (r.end > max) - max = r.end; - - rh_attach_region(&cpm_muram_info, r.start - muram_pbase, - resource_size(&r)); - } - - muram_vbase = ioremap(muram_pbase, max - muram_pbase + 1); - if (!muram_vbase) { - printk(KERN_ERR "Cannot map CPM muram"); - ret = -ENOMEM; - } - -out: - of_node_put(np); - return ret; -} - -/** - * cpm_muram_alloc - allocate the requested size worth of multi-user ram - * @size: number of bytes to allocate - * @align: requested alignment, in bytes - * - * This function returns an offset into the muram area. - * Use cpm_dpram_addr() to get the virtual address of the area. - * Use cpm_muram_free() to free the allocation. - */ -unsigned long cpm_muram_alloc(unsigned long size, unsigned long align) -{ - unsigned long start; - unsigned long flags; - - spin_lock_irqsave(&cpm_muram_lock, flags); - cpm_muram_info.alignment = align; - start = rh_alloc(&cpm_muram_info, size, "commproc"); - if (!IS_ERR_VALUE(start)) - memset_io(cpm_muram_addr(start), 0, size); - spin_unlock_irqrestore(&cpm_muram_lock, flags); - - return start; -} -EXPORT_SYMBOL(cpm_muram_alloc); - -/** - * cpm_muram_free - free a chunk of multi-user ram - * @offset: The beginning of the chunk as returned by cpm_muram_alloc(). - */ -int cpm_muram_free(unsigned long offset) -{ - int ret; - unsigned long flags; - - spin_lock_irqsave(&cpm_muram_lock, flags); - ret = rh_free(&cpm_muram_info, offset); - spin_unlock_irqrestore(&cpm_muram_lock, flags); - - return ret; -} -EXPORT_SYMBOL(cpm_muram_free); - -/** - * cpm_muram_alloc_fixed - reserve a specific region of multi-user ram - * @offset: the offset into the muram area to reserve - * @size: the number of bytes to reserve - * - * This function returns "start" on success, -ENOMEM on failure. - * Use cpm_dpram_addr() to get the virtual address of the area. - * Use cpm_muram_free() to free the allocation. - */ -unsigned long cpm_muram_alloc_fixed(unsigned long offset, unsigned long size) -{ - unsigned long start; - unsigned long flags; - - spin_lock_irqsave(&cpm_muram_lock, flags); - cpm_muram_info.alignment = 1; - start = rh_alloc_fixed(&cpm_muram_info, offset, size, "commproc"); - spin_unlock_irqrestore(&cpm_muram_lock, flags); - - return start; -} -EXPORT_SYMBOL(cpm_muram_alloc_fixed); - -/** - * cpm_muram_addr - turn a muram offset into a virtual address - * @offset: muram offset to convert - */ -void __iomem *cpm_muram_addr(unsigned long offset) -{ - return muram_vbase + offset; -} -EXPORT_SYMBOL(cpm_muram_addr); - -unsigned long cpm_muram_offset(void __iomem *addr) -{ - return addr - (void __iomem *)muram_vbase; -} -EXPORT_SYMBOL(cpm_muram_offset); - -/** - * cpm_muram_dma - turn a muram virtual address into a DMA address - * @offset: virtual address from cpm_muram_addr() to convert - */ -dma_addr_t cpm_muram_dma(void __iomem *addr) -{ - return muram_pbase + ((u8 __iomem *)addr - muram_vbase); -} -EXPORT_SYMBOL(cpm_muram_dma); - #if defined(CONFIG_CPM2) || defined(CONFIG_8xx_GPIO) struct cpm2_ioports { diff --git a/arch/powerpc/sysdev/fsl_lbc.c b/arch/powerpc/sysdev/fsl_lbc.c index 38138cf8d33e..47f781059eeb 100644 --- a/arch/powerpc/sysdev/fsl_lbc.c +++ b/arch/powerpc/sysdev/fsl_lbc.c @@ -243,8 +243,6 @@ static irqreturn_t fsl_lbc_ctrl_irq(int irqno, void *data) if (status & LTESR_CS) dev_err(ctrl->dev, "Chip select error: " "LTESR 0x%08X\n", status); - if (status & LTESR_UPM) - ; if (status & LTESR_FCT) { dev_err(ctrl->dev, "FCM command time-out: " "LTESR 0x%08X\n", status); diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 610f472f91d1..c69e88e91459 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -21,10 +21,12 @@ #include <linux/pci.h> #include <linux/delay.h> #include <linux/string.h> +#include <linux/fsl/edac.h> #include <linux/init.h> #include <linux/interrupt.h> #include <linux/memblock.h> #include <linux/log2.h> +#include <linux/platform_device.h> #include <linux/slab.h> #include <linux/suspend.h> #include <linux/syscore_ops.h> @@ -216,6 +218,19 @@ static void setup_pci_atmu(struct pci_controller *hose) */ setup_inbound = !is_kdump(); + if (of_device_is_compatible(hose->dn, "fsl,bsc9132-pcie")) { + /* + * BSC9132 Rev1.0 has an issue where all the PEX inbound + * windows have implemented the default target value as 0xf + * for CCSR space.In all Freescale legacy devices the target + * of 0xf is reserved for local memory space. 9132 Rev1.0 + * now has local mempry space mapped to target 0x0 instead of + * 0xf. Hence adding a workaround to remove the target 0xf + * defined for memory space from Inbound window attributes. + */ + piwar &= ~PIWAR_TGI_LOCAL; + } + if (early_find_capability(hose, 0, 0, PCI_CAP_ID_EXP)) { if (in_be32(&pci->block_rev1) >= PCIE_IP_REV_2_2) { win_idx = 2; @@ -1255,6 +1270,25 @@ void fsl_pcibios_fixup_phb(struct pci_controller *phb) #endif } +static int add_err_dev(struct platform_device *pdev) +{ + struct platform_device *errdev; + struct mpc85xx_edac_pci_plat_data pd = { + .of_node = pdev->dev.of_node + }; + + errdev = platform_device_register_resndata(&pdev->dev, + "mpc85xx-pci-edac", + PLATFORM_DEVID_AUTO, + pdev->resource, + pdev->num_resources, + &pd, sizeof(pd)); + if (IS_ERR(errdev)) + return PTR_ERR(errdev); + + return 0; +} + static int fsl_pci_probe(struct platform_device *pdev) { struct device_node *node; @@ -1262,8 +1296,13 @@ static int fsl_pci_probe(struct platform_device *pdev) node = pdev->dev.of_node; ret = fsl_add_bridge(pdev, fsl_pci_primary == node); + if (ret) + return ret; - mpc85xx_pci_err_probe(pdev); + ret = add_err_dev(pdev); + if (ret) + dev_err(&pdev->dev, "couldn't register error device: %d\n", + ret); return 0; } diff --git a/arch/powerpc/sysdev/fsl_pci.h b/arch/powerpc/sysdev/fsl_pci.h index c1cec771d5ea..151588530b06 100644 --- a/arch/powerpc/sysdev/fsl_pci.h +++ b/arch/powerpc/sysdev/fsl_pci.h @@ -130,15 +130,6 @@ void fsl_pci_assign_primary(void); static inline void fsl_pci_assign_primary(void) {} #endif -#ifdef CONFIG_EDAC_MPC85XX -int mpc85xx_pci_err_probe(struct platform_device *op); -#else -static inline int mpc85xx_pci_err_probe(struct platform_device *op) -{ - return -ENOTSUPP; -} -#endif - #ifdef CONFIG_FSL_PCI extern int fsl_pci_mcheck_exception(struct pt_regs *); #else diff --git a/arch/powerpc/sysdev/qe_lib/Kconfig b/arch/powerpc/sysdev/qe_lib/Kconfig deleted file mode 100644 index 3c251993bacd..000000000000 --- a/arch/powerpc/sysdev/qe_lib/Kconfig +++ /dev/null @@ -1,27 +0,0 @@ -# -# QE Communication options -# - -config UCC_SLOW - bool - default y if SERIAL_QE - help - This option provides qe_lib support to UCC slow - protocols: UART, BISYNC, QMC - -config UCC_FAST - bool - default y if UCC_GETH - help - This option provides qe_lib support to UCC fast - protocols: HDLC, Ethernet, ATM, transparent - -config UCC - bool - default y if UCC_FAST || UCC_SLOW - -config QE_USB - bool - default y if USB_FSL_QE - help - QE USB Controller support diff --git a/arch/powerpc/sysdev/qe_lib/Makefile b/arch/powerpc/sysdev/qe_lib/Makefile deleted file mode 100644 index f1855c185291..000000000000 --- a/arch/powerpc/sysdev/qe_lib/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -# -# Makefile for the linux ppc-specific parts of QE -# -obj-$(CONFIG_QUICC_ENGINE)+= qe.o qe_ic.o qe_io.o - -obj-$(CONFIG_UCC) += ucc.o -obj-$(CONFIG_UCC_SLOW) += ucc_slow.o -obj-$(CONFIG_UCC_FAST) += ucc_fast.o -obj-$(CONFIG_QE_USB) += usb.o -obj-$(CONFIG_QE_GPIO) += gpio.o diff --git a/arch/powerpc/sysdev/qe_lib/gpio.c b/arch/powerpc/sysdev/qe_lib/gpio.c deleted file mode 100644 index 521e67a49dc4..000000000000 --- a/arch/powerpc/sysdev/qe_lib/gpio.c +++ /dev/null @@ -1,317 +0,0 @@ -/* - * QUICC Engine GPIOs - * - * Copyright (c) MontaVista Software, Inc. 2008. - * - * Author: Anton Vorontsov <avorontsov@ru.mvista.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/spinlock.h> -#include <linux/err.h> -#include <linux/io.h> -#include <linux/of.h> -#include <linux/of_gpio.h> -#include <linux/gpio.h> -#include <linux/slab.h> -#include <linux/export.h> -#include <asm/qe.h> - -struct qe_gpio_chip { - struct of_mm_gpio_chip mm_gc; - spinlock_t lock; - - unsigned long pin_flags[QE_PIO_PINS]; -#define QE_PIN_REQUESTED 0 - - /* shadowed data register to clear/set bits safely */ - u32 cpdata; - - /* saved_regs used to restore dedicated functions */ - struct qe_pio_regs saved_regs; -}; - -static inline struct qe_gpio_chip * -to_qe_gpio_chip(struct of_mm_gpio_chip *mm_gc) -{ - return container_of(mm_gc, struct qe_gpio_chip, mm_gc); -} - -static void qe_gpio_save_regs(struct of_mm_gpio_chip *mm_gc) -{ - struct qe_gpio_chip *qe_gc = to_qe_gpio_chip(mm_gc); - struct qe_pio_regs __iomem *regs = mm_gc->regs; - - qe_gc->cpdata = in_be32(®s->cpdata); - qe_gc->saved_regs.cpdata = qe_gc->cpdata; - qe_gc->saved_regs.cpdir1 = in_be32(®s->cpdir1); - qe_gc->saved_regs.cpdir2 = in_be32(®s->cpdir2); - qe_gc->saved_regs.cppar1 = in_be32(®s->cppar1); - qe_gc->saved_regs.cppar2 = in_be32(®s->cppar2); - qe_gc->saved_regs.cpodr = in_be32(®s->cpodr); -} - -static int qe_gpio_get(struct gpio_chip *gc, unsigned int gpio) -{ - struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); - struct qe_pio_regs __iomem *regs = mm_gc->regs; - u32 pin_mask = 1 << (QE_PIO_PINS - 1 - gpio); - - return in_be32(®s->cpdata) & pin_mask; -} - -static void qe_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val) -{ - struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); - struct qe_gpio_chip *qe_gc = to_qe_gpio_chip(mm_gc); - struct qe_pio_regs __iomem *regs = mm_gc->regs; - unsigned long flags; - u32 pin_mask = 1 << (QE_PIO_PINS - 1 - gpio); - - spin_lock_irqsave(&qe_gc->lock, flags); - - if (val) - qe_gc->cpdata |= pin_mask; - else - qe_gc->cpdata &= ~pin_mask; - - out_be32(®s->cpdata, qe_gc->cpdata); - - spin_unlock_irqrestore(&qe_gc->lock, flags); -} - -static int qe_gpio_dir_in(struct gpio_chip *gc, unsigned int gpio) -{ - struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); - struct qe_gpio_chip *qe_gc = to_qe_gpio_chip(mm_gc); - unsigned long flags; - - spin_lock_irqsave(&qe_gc->lock, flags); - - __par_io_config_pin(mm_gc->regs, gpio, QE_PIO_DIR_IN, 0, 0, 0); - - spin_unlock_irqrestore(&qe_gc->lock, flags); - - return 0; -} - -static int qe_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val) -{ - struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); - struct qe_gpio_chip *qe_gc = to_qe_gpio_chip(mm_gc); - unsigned long flags; - - qe_gpio_set(gc, gpio, val); - - spin_lock_irqsave(&qe_gc->lock, flags); - - __par_io_config_pin(mm_gc->regs, gpio, QE_PIO_DIR_OUT, 0, 0, 0); - - spin_unlock_irqrestore(&qe_gc->lock, flags); - - return 0; -} - -struct qe_pin { - /* - * The qe_gpio_chip name is unfortunate, we should change that to - * something like qe_pio_controller. Someday. - */ - struct qe_gpio_chip *controller; - int num; -}; - -/** - * qe_pin_request - Request a QE pin - * @np: device node to get a pin from - * @index: index of a pin in the device tree - * Context: non-atomic - * - * This function return qe_pin so that you could use it with the rest of - * the QE Pin Multiplexing API. - */ -struct qe_pin *qe_pin_request(struct device_node *np, int index) -{ - struct qe_pin *qe_pin; - struct gpio_chip *gc; - struct of_mm_gpio_chip *mm_gc; - struct qe_gpio_chip *qe_gc; - int err; - unsigned long flags; - - qe_pin = kzalloc(sizeof(*qe_pin), GFP_KERNEL); - if (!qe_pin) { - pr_debug("%s: can't allocate memory\n", __func__); - return ERR_PTR(-ENOMEM); - } - - err = of_get_gpio(np, index); - if (err < 0) - goto err0; - gc = gpio_to_chip(err); - if (WARN_ON(!gc)) - goto err0; - - if (!of_device_is_compatible(gc->of_node, "fsl,mpc8323-qe-pario-bank")) { - pr_debug("%s: tried to get a non-qe pin\n", __func__); - err = -EINVAL; - goto err0; - } - - mm_gc = to_of_mm_gpio_chip(gc); - qe_gc = to_qe_gpio_chip(mm_gc); - - spin_lock_irqsave(&qe_gc->lock, flags); - - err -= gc->base; - if (test_and_set_bit(QE_PIN_REQUESTED, &qe_gc->pin_flags[err]) == 0) { - qe_pin->controller = qe_gc; - qe_pin->num = err; - err = 0; - } else { - err = -EBUSY; - } - - spin_unlock_irqrestore(&qe_gc->lock, flags); - - if (!err) - return qe_pin; -err0: - kfree(qe_pin); - pr_debug("%s failed with status %d\n", __func__, err); - return ERR_PTR(err); -} -EXPORT_SYMBOL(qe_pin_request); - -/** - * qe_pin_free - Free a pin - * @qe_pin: pointer to the qe_pin structure - * Context: any - * - * This function frees the qe_pin structure and makes a pin available - * for further qe_pin_request() calls. - */ -void qe_pin_free(struct qe_pin *qe_pin) -{ - struct qe_gpio_chip *qe_gc = qe_pin->controller; - unsigned long flags; - const int pin = qe_pin->num; - - spin_lock_irqsave(&qe_gc->lock, flags); - test_and_clear_bit(QE_PIN_REQUESTED, &qe_gc->pin_flags[pin]); - spin_unlock_irqrestore(&qe_gc->lock, flags); - - kfree(qe_pin); -} -EXPORT_SYMBOL(qe_pin_free); - -/** - * qe_pin_set_dedicated - Revert a pin to a dedicated peripheral function mode - * @qe_pin: pointer to the qe_pin structure - * Context: any - * - * This function resets a pin to a dedicated peripheral function that - * has been set up by the firmware. - */ -void qe_pin_set_dedicated(struct qe_pin *qe_pin) -{ - struct qe_gpio_chip *qe_gc = qe_pin->controller; - struct qe_pio_regs __iomem *regs = qe_gc->mm_gc.regs; - struct qe_pio_regs *sregs = &qe_gc->saved_regs; - int pin = qe_pin->num; - u32 mask1 = 1 << (QE_PIO_PINS - (pin + 1)); - u32 mask2 = 0x3 << (QE_PIO_PINS - (pin % (QE_PIO_PINS / 2) + 1) * 2); - bool second_reg = pin > (QE_PIO_PINS / 2) - 1; - unsigned long flags; - - spin_lock_irqsave(&qe_gc->lock, flags); - - if (second_reg) { - clrsetbits_be32(®s->cpdir2, mask2, sregs->cpdir2 & mask2); - clrsetbits_be32(®s->cppar2, mask2, sregs->cppar2 & mask2); - } else { - clrsetbits_be32(®s->cpdir1, mask2, sregs->cpdir1 & mask2); - clrsetbits_be32(®s->cppar1, mask2, sregs->cppar1 & mask2); - } - - if (sregs->cpdata & mask1) - qe_gc->cpdata |= mask1; - else - qe_gc->cpdata &= ~mask1; - - out_be32(®s->cpdata, qe_gc->cpdata); - clrsetbits_be32(®s->cpodr, mask1, sregs->cpodr & mask1); - - spin_unlock_irqrestore(&qe_gc->lock, flags); -} -EXPORT_SYMBOL(qe_pin_set_dedicated); - -/** - * qe_pin_set_gpio - Set a pin to the GPIO mode - * @qe_pin: pointer to the qe_pin structure - * Context: any - * - * This function sets a pin to the GPIO mode. - */ -void qe_pin_set_gpio(struct qe_pin *qe_pin) -{ - struct qe_gpio_chip *qe_gc = qe_pin->controller; - struct qe_pio_regs __iomem *regs = qe_gc->mm_gc.regs; - unsigned long flags; - - spin_lock_irqsave(&qe_gc->lock, flags); - - /* Let's make it input by default, GPIO API is able to change that. */ - __par_io_config_pin(regs, qe_pin->num, QE_PIO_DIR_IN, 0, 0, 0); - - spin_unlock_irqrestore(&qe_gc->lock, flags); -} -EXPORT_SYMBOL(qe_pin_set_gpio); - -static int __init qe_add_gpiochips(void) -{ - struct device_node *np; - - for_each_compatible_node(np, NULL, "fsl,mpc8323-qe-pario-bank") { - int ret; - struct qe_gpio_chip *qe_gc; - struct of_mm_gpio_chip *mm_gc; - struct gpio_chip *gc; - - qe_gc = kzalloc(sizeof(*qe_gc), GFP_KERNEL); - if (!qe_gc) { - ret = -ENOMEM; - goto err; - } - - spin_lock_init(&qe_gc->lock); - - mm_gc = &qe_gc->mm_gc; - gc = &mm_gc->gc; - - mm_gc->save_regs = qe_gpio_save_regs; - gc->ngpio = QE_PIO_PINS; - gc->direction_input = qe_gpio_dir_in; - gc->direction_output = qe_gpio_dir_out; - gc->get = qe_gpio_get; - gc->set = qe_gpio_set; - - ret = of_mm_gpiochip_add(np, mm_gc); - if (ret) - goto err; - continue; -err: - pr_err("%s: registration failed with status %d\n", - np->full_name, ret); - kfree(qe_gc); - /* try others anyway */ - } - return 0; -} -arch_initcall(qe_add_gpiochips); diff --git a/arch/powerpc/sysdev/qe_lib/qe.c b/arch/powerpc/sysdev/qe_lib/qe.c deleted file mode 100644 index c2518cdb7ddb..000000000000 --- a/arch/powerpc/sysdev/qe_lib/qe.c +++ /dev/null @@ -1,706 +0,0 @@ -/* - * Copyright (C) 2006-2010 Freescale Semiconductor, Inc. All rights reserved. - * - * Authors: Shlomi Gridish <gridish@freescale.com> - * Li Yang <leoli@freescale.com> - * Based on cpm2_common.c from Dan Malek (dmalek@jlc.net) - * - * Description: - * General Purpose functions for the global management of the - * QUICC Engine (QE). - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ -#include <linux/errno.h> -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/param.h> -#include <linux/string.h> -#include <linux/spinlock.h> -#include <linux/mm.h> -#include <linux/interrupt.h> -#include <linux/module.h> -#include <linux/delay.h> -#include <linux/ioport.h> -#include <linux/crc32.h> -#include <linux/mod_devicetable.h> -#include <linux/of_platform.h> -#include <asm/irq.h> -#include <asm/page.h> -#include <asm/pgtable.h> -#include <asm/immap_qe.h> -#include <asm/qe.h> -#include <asm/prom.h> -#include <asm/rheap.h> - -static void qe_snums_init(void); -static int qe_sdma_init(void); - -static DEFINE_SPINLOCK(qe_lock); -DEFINE_SPINLOCK(cmxgcr_lock); -EXPORT_SYMBOL(cmxgcr_lock); - -/* QE snum state */ -enum qe_snum_state { - QE_SNUM_STATE_USED, - QE_SNUM_STATE_FREE -}; - -/* QE snum */ -struct qe_snum { - u8 num; - enum qe_snum_state state; -}; - -/* We allocate this here because it is used almost exclusively for - * the communication processor devices. - */ -struct qe_immap __iomem *qe_immr; -EXPORT_SYMBOL(qe_immr); - -static struct qe_snum snums[QE_NUM_OF_SNUM]; /* Dynamically allocated SNUMs */ -static unsigned int qe_num_of_snum; - -static phys_addr_t qebase = -1; - -phys_addr_t get_qe_base(void) -{ - struct device_node *qe; - int size; - const u32 *prop; - - if (qebase != -1) - return qebase; - - qe = of_find_compatible_node(NULL, NULL, "fsl,qe"); - if (!qe) { - qe = of_find_node_by_type(NULL, "qe"); - if (!qe) - return qebase; - } - - prop = of_get_property(qe, "reg", &size); - if (prop && size >= sizeof(*prop)) - qebase = of_translate_address(qe, prop); - of_node_put(qe); - - return qebase; -} - -EXPORT_SYMBOL(get_qe_base); - -void qe_reset(void) -{ - if (qe_immr == NULL) - qe_immr = ioremap(get_qe_base(), QE_IMMAP_SIZE); - - qe_snums_init(); - - qe_issue_cmd(QE_RESET, QE_CR_SUBBLOCK_INVALID, - QE_CR_PROTOCOL_UNSPECIFIED, 0); - - /* Reclaim the MURAM memory for our use. */ - qe_muram_init(); - - if (qe_sdma_init()) - panic("sdma init failed!"); -} - -int qe_issue_cmd(u32 cmd, u32 device, u8 mcn_protocol, u32 cmd_input) -{ - unsigned long flags; - u8 mcn_shift = 0, dev_shift = 0; - u32 ret; - - spin_lock_irqsave(&qe_lock, flags); - if (cmd == QE_RESET) { - out_be32(&qe_immr->cp.cecr, (u32) (cmd | QE_CR_FLG)); - } else { - if (cmd == QE_ASSIGN_PAGE) { - /* Here device is the SNUM, not sub-block */ - dev_shift = QE_CR_SNUM_SHIFT; - } else if (cmd == QE_ASSIGN_RISC) { - /* Here device is the SNUM, and mcnProtocol is - * e_QeCmdRiscAssignment value */ - dev_shift = QE_CR_SNUM_SHIFT; - mcn_shift = QE_CR_MCN_RISC_ASSIGN_SHIFT; - } else { - if (device == QE_CR_SUBBLOCK_USB) - mcn_shift = QE_CR_MCN_USB_SHIFT; - else - mcn_shift = QE_CR_MCN_NORMAL_SHIFT; - } - - out_be32(&qe_immr->cp.cecdr, cmd_input); - out_be32(&qe_immr->cp.cecr, - (cmd | QE_CR_FLG | ((u32) device << dev_shift) | (u32) - mcn_protocol << mcn_shift)); - } - - /* wait for the QE_CR_FLG to clear */ - ret = spin_event_timeout((in_be32(&qe_immr->cp.cecr) & QE_CR_FLG) == 0, - 100, 0); - /* On timeout (e.g. failure), the expression will be false (ret == 0), - otherwise it will be true (ret == 1). */ - spin_unlock_irqrestore(&qe_lock, flags); - - return ret == 1; -} -EXPORT_SYMBOL(qe_issue_cmd); - -/* Set a baud rate generator. This needs lots of work. There are - * 16 BRGs, which can be connected to the QE channels or output - * as clocks. The BRGs are in two different block of internal - * memory mapped space. - * The BRG clock is the QE clock divided by 2. - * It was set up long ago during the initial boot phase and is - * is given to us. - * Baud rate clocks are zero-based in the driver code (as that maps - * to port numbers). Documentation uses 1-based numbering. - */ -static unsigned int brg_clk = 0; - -unsigned int qe_get_brg_clk(void) -{ - struct device_node *qe; - int size; - const u32 *prop; - - if (brg_clk) - return brg_clk; - - qe = of_find_compatible_node(NULL, NULL, "fsl,qe"); - if (!qe) { - qe = of_find_node_by_type(NULL, "qe"); - if (!qe) - return brg_clk; - } - - prop = of_get_property(qe, "brg-frequency", &size); - if (prop && size == sizeof(*prop)) - brg_clk = *prop; - - of_node_put(qe); - - return brg_clk; -} -EXPORT_SYMBOL(qe_get_brg_clk); - -/* Program the BRG to the given sampling rate and multiplier - * - * @brg: the BRG, QE_BRG1 - QE_BRG16 - * @rate: the desired sampling rate - * @multiplier: corresponds to the value programmed in GUMR_L[RDCR] or - * GUMR_L[TDCR]. E.g., if this BRG is the RX clock, and GUMR_L[RDCR]=01, - * then 'multiplier' should be 8. - */ -int qe_setbrg(enum qe_clock brg, unsigned int rate, unsigned int multiplier) -{ - u32 divisor, tempval; - u32 div16 = 0; - - if ((brg < QE_BRG1) || (brg > QE_BRG16)) - return -EINVAL; - - divisor = qe_get_brg_clk() / (rate * multiplier); - - if (divisor > QE_BRGC_DIVISOR_MAX + 1) { - div16 = QE_BRGC_DIV16; - divisor /= 16; - } - - /* Errata QE_General4, which affects some MPC832x and MPC836x SOCs, says - that the BRG divisor must be even if you're not using divide-by-16 - mode. */ - if (!div16 && (divisor & 1) && (divisor > 3)) - divisor++; - - tempval = ((divisor - 1) << QE_BRGC_DIVISOR_SHIFT) | - QE_BRGC_ENABLE | div16; - - out_be32(&qe_immr->brg.brgc[brg - QE_BRG1], tempval); - - return 0; -} -EXPORT_SYMBOL(qe_setbrg); - -/* Convert a string to a QE clock source enum - * - * This function takes a string, typically from a property in the device - * tree, and returns the corresponding "enum qe_clock" value. -*/ -enum qe_clock qe_clock_source(const char *source) -{ - unsigned int i; - - if (strcasecmp(source, "none") == 0) - return QE_CLK_NONE; - - if (strncasecmp(source, "brg", 3) == 0) { - i = simple_strtoul(source + 3, NULL, 10); - if ((i >= 1) && (i <= 16)) - return (QE_BRG1 - 1) + i; - else - return QE_CLK_DUMMY; - } - - if (strncasecmp(source, "clk", 3) == 0) { - i = simple_strtoul(source + 3, NULL, 10); - if ((i >= 1) && (i <= 24)) - return (QE_CLK1 - 1) + i; - else - return QE_CLK_DUMMY; - } - - return QE_CLK_DUMMY; -} -EXPORT_SYMBOL(qe_clock_source); - -/* Initialize SNUMs (thread serial numbers) according to - * QE Module Control chapter, SNUM table - */ -static void qe_snums_init(void) -{ - int i; - static const u8 snum_init_76[] = { - 0x04, 0x05, 0x0C, 0x0D, 0x14, 0x15, 0x1C, 0x1D, - 0x24, 0x25, 0x2C, 0x2D, 0x34, 0x35, 0x88, 0x89, - 0x98, 0x99, 0xA8, 0xA9, 0xB8, 0xB9, 0xC8, 0xC9, - 0xD8, 0xD9, 0xE8, 0xE9, 0x44, 0x45, 0x4C, 0x4D, - 0x54, 0x55, 0x5C, 0x5D, 0x64, 0x65, 0x6C, 0x6D, - 0x74, 0x75, 0x7C, 0x7D, 0x84, 0x85, 0x8C, 0x8D, - 0x94, 0x95, 0x9C, 0x9D, 0xA4, 0xA5, 0xAC, 0xAD, - 0xB4, 0xB5, 0xBC, 0xBD, 0xC4, 0xC5, 0xCC, 0xCD, - 0xD4, 0xD5, 0xDC, 0xDD, 0xE4, 0xE5, 0xEC, 0xED, - 0xF4, 0xF5, 0xFC, 0xFD, - }; - static const u8 snum_init_46[] = { - 0x04, 0x05, 0x0C, 0x0D, 0x14, 0x15, 0x1C, 0x1D, - 0x24, 0x25, 0x2C, 0x2D, 0x34, 0x35, 0x88, 0x89, - 0x98, 0x99, 0xA8, 0xA9, 0xB8, 0xB9, 0xC8, 0xC9, - 0xD8, 0xD9, 0xE8, 0xE9, 0x08, 0x09, 0x18, 0x19, - 0x28, 0x29, 0x38, 0x39, 0x48, 0x49, 0x58, 0x59, - 0x68, 0x69, 0x78, 0x79, 0x80, 0x81, - }; - static const u8 *snum_init; - - qe_num_of_snum = qe_get_num_of_snums(); - - if (qe_num_of_snum == 76) - snum_init = snum_init_76; - else - snum_init = snum_init_46; - - for (i = 0; i < qe_num_of_snum; i++) { - snums[i].num = snum_init[i]; - snums[i].state = QE_SNUM_STATE_FREE; - } -} - -int qe_get_snum(void) -{ - unsigned long flags; - int snum = -EBUSY; - int i; - - spin_lock_irqsave(&qe_lock, flags); - for (i = 0; i < qe_num_of_snum; i++) { - if (snums[i].state == QE_SNUM_STATE_FREE) { - snums[i].state = QE_SNUM_STATE_USED; - snum = snums[i].num; - break; - } - } - spin_unlock_irqrestore(&qe_lock, flags); - - return snum; -} -EXPORT_SYMBOL(qe_get_snum); - -void qe_put_snum(u8 snum) -{ - int i; - - for (i = 0; i < qe_num_of_snum; i++) { - if (snums[i].num == snum) { - snums[i].state = QE_SNUM_STATE_FREE; - break; - } - } -} -EXPORT_SYMBOL(qe_put_snum); - -static int qe_sdma_init(void) -{ - struct sdma __iomem *sdma = &qe_immr->sdma; - static unsigned long sdma_buf_offset = (unsigned long)-ENOMEM; - - if (!sdma) - return -ENODEV; - - /* allocate 2 internal temporary buffers (512 bytes size each) for - * the SDMA */ - if (IS_ERR_VALUE(sdma_buf_offset)) { - sdma_buf_offset = qe_muram_alloc(512 * 2, 4096); - if (IS_ERR_VALUE(sdma_buf_offset)) - return -ENOMEM; - } - - out_be32(&sdma->sdebcr, (u32) sdma_buf_offset & QE_SDEBCR_BA_MASK); - out_be32(&sdma->sdmr, (QE_SDMR_GLB_1_MSK | - (0x1 << QE_SDMR_CEN_SHIFT))); - - return 0; -} - -/* The maximum number of RISCs we support */ -#define MAX_QE_RISC 4 - -/* Firmware information stored here for qe_get_firmware_info() */ -static struct qe_firmware_info qe_firmware_info; - -/* - * Set to 1 if QE firmware has been uploaded, and therefore - * qe_firmware_info contains valid data. - */ -static int qe_firmware_uploaded; - -/* - * Upload a QE microcode - * - * This function is a worker function for qe_upload_firmware(). It does - * the actual uploading of the microcode. - */ -static void qe_upload_microcode(const void *base, - const struct qe_microcode *ucode) -{ - const __be32 *code = base + be32_to_cpu(ucode->code_offset); - unsigned int i; - - if (ucode->major || ucode->minor || ucode->revision) - printk(KERN_INFO "qe-firmware: " - "uploading microcode '%s' version %u.%u.%u\n", - ucode->id, ucode->major, ucode->minor, ucode->revision); - else - printk(KERN_INFO "qe-firmware: " - "uploading microcode '%s'\n", ucode->id); - - /* Use auto-increment */ - out_be32(&qe_immr->iram.iadd, be32_to_cpu(ucode->iram_offset) | - QE_IRAM_IADD_AIE | QE_IRAM_IADD_BADDR); - - for (i = 0; i < be32_to_cpu(ucode->count); i++) - out_be32(&qe_immr->iram.idata, be32_to_cpu(code[i])); - - /* Set I-RAM Ready Register */ - out_be32(&qe_immr->iram.iready, be32_to_cpu(QE_IRAM_READY)); -} - -/* - * Upload a microcode to the I-RAM at a specific address. - * - * See Documentation/powerpc/qe_firmware.txt for information on QE microcode - * uploading. - * - * Currently, only version 1 is supported, so the 'version' field must be - * set to 1. - * - * The SOC model and revision are not validated, they are only displayed for - * informational purposes. - * - * 'calc_size' is the calculated size, in bytes, of the firmware structure and - * all of the microcode structures, minus the CRC. - * - * 'length' is the size that the structure says it is, including the CRC. - */ -int qe_upload_firmware(const struct qe_firmware *firmware) -{ - unsigned int i; - unsigned int j; - u32 crc; - size_t calc_size = sizeof(struct qe_firmware); - size_t length; - const struct qe_header *hdr; - - if (!firmware) { - printk(KERN_ERR "qe-firmware: invalid pointer\n"); - return -EINVAL; - } - - hdr = &firmware->header; - length = be32_to_cpu(hdr->length); - - /* Check the magic */ - if ((hdr->magic[0] != 'Q') || (hdr->magic[1] != 'E') || - (hdr->magic[2] != 'F')) { - printk(KERN_ERR "qe-firmware: not a microcode\n"); - return -EPERM; - } - - /* Check the version */ - if (hdr->version != 1) { - printk(KERN_ERR "qe-firmware: unsupported version\n"); - return -EPERM; - } - - /* Validate some of the fields */ - if ((firmware->count < 1) || (firmware->count > MAX_QE_RISC)) { - printk(KERN_ERR "qe-firmware: invalid data\n"); - return -EINVAL; - } - - /* Validate the length and check if there's a CRC */ - calc_size += (firmware->count - 1) * sizeof(struct qe_microcode); - - for (i = 0; i < firmware->count; i++) - /* - * For situations where the second RISC uses the same microcode - * as the first, the 'code_offset' and 'count' fields will be - * zero, so it's okay to add those. - */ - calc_size += sizeof(__be32) * - be32_to_cpu(firmware->microcode[i].count); - - /* Validate the length */ - if (length != calc_size + sizeof(__be32)) { - printk(KERN_ERR "qe-firmware: invalid length\n"); - return -EPERM; - } - - /* Validate the CRC */ - crc = be32_to_cpu(*(__be32 *)((void *)firmware + calc_size)); - if (crc != crc32(0, firmware, calc_size)) { - printk(KERN_ERR "qe-firmware: firmware CRC is invalid\n"); - return -EIO; - } - - /* - * If the microcode calls for it, split the I-RAM. - */ - if (!firmware->split) - setbits16(&qe_immr->cp.cercr, QE_CP_CERCR_CIR); - - if (firmware->soc.model) - printk(KERN_INFO - "qe-firmware: firmware '%s' for %u V%u.%u\n", - firmware->id, be16_to_cpu(firmware->soc.model), - firmware->soc.major, firmware->soc.minor); - else - printk(KERN_INFO "qe-firmware: firmware '%s'\n", - firmware->id); - - /* - * The QE only supports one microcode per RISC, so clear out all the - * saved microcode information and put in the new. - */ - memset(&qe_firmware_info, 0, sizeof(qe_firmware_info)); - strlcpy(qe_firmware_info.id, firmware->id, sizeof(qe_firmware_info.id)); - qe_firmware_info.extended_modes = firmware->extended_modes; - memcpy(qe_firmware_info.vtraps, firmware->vtraps, - sizeof(firmware->vtraps)); - - /* Loop through each microcode. */ - for (i = 0; i < firmware->count; i++) { - const struct qe_microcode *ucode = &firmware->microcode[i]; - - /* Upload a microcode if it's present */ - if (ucode->code_offset) - qe_upload_microcode(firmware, ucode); - - /* Program the traps for this processor */ - for (j = 0; j < 16; j++) { - u32 trap = be32_to_cpu(ucode->traps[j]); - - if (trap) - out_be32(&qe_immr->rsp[i].tibcr[j], trap); - } - - /* Enable traps */ - out_be32(&qe_immr->rsp[i].eccr, be32_to_cpu(ucode->eccr)); - } - - qe_firmware_uploaded = 1; - - return 0; -} -EXPORT_SYMBOL(qe_upload_firmware); - -/* - * Get info on the currently-loaded firmware - * - * This function also checks the device tree to see if the boot loader has - * uploaded a firmware already. - */ -struct qe_firmware_info *qe_get_firmware_info(void) -{ - static int initialized; - struct property *prop; - struct device_node *qe; - struct device_node *fw = NULL; - const char *sprop; - unsigned int i; - - /* - * If we haven't checked yet, and a driver hasn't uploaded a firmware - * yet, then check the device tree for information. - */ - if (qe_firmware_uploaded) - return &qe_firmware_info; - - if (initialized) - return NULL; - - initialized = 1; - - /* - * Newer device trees have an "fsl,qe" compatible property for the QE - * node, but we still need to support older device trees. - */ - qe = of_find_compatible_node(NULL, NULL, "fsl,qe"); - if (!qe) { - qe = of_find_node_by_type(NULL, "qe"); - if (!qe) - return NULL; - } - - /* Find the 'firmware' child node */ - for_each_child_of_node(qe, fw) { - if (strcmp(fw->name, "firmware") == 0) - break; - } - - of_node_put(qe); - - /* Did we find the 'firmware' node? */ - if (!fw) - return NULL; - - qe_firmware_uploaded = 1; - - /* Copy the data into qe_firmware_info*/ - sprop = of_get_property(fw, "id", NULL); - if (sprop) - strlcpy(qe_firmware_info.id, sprop, - sizeof(qe_firmware_info.id)); - - prop = of_find_property(fw, "extended-modes", NULL); - if (prop && (prop->length == sizeof(u64))) { - const u64 *iprop = prop->value; - - qe_firmware_info.extended_modes = *iprop; - } - - prop = of_find_property(fw, "virtual-traps", NULL); - if (prop && (prop->length == 32)) { - const u32 *iprop = prop->value; - - for (i = 0; i < ARRAY_SIZE(qe_firmware_info.vtraps); i++) - qe_firmware_info.vtraps[i] = iprop[i]; - } - - of_node_put(fw); - - return &qe_firmware_info; -} -EXPORT_SYMBOL(qe_get_firmware_info); - -unsigned int qe_get_num_of_risc(void) -{ - struct device_node *qe; - int size; - unsigned int num_of_risc = 0; - const u32 *prop; - - qe = of_find_compatible_node(NULL, NULL, "fsl,qe"); - if (!qe) { - /* Older devices trees did not have an "fsl,qe" - * compatible property, so we need to look for - * the QE node by name. - */ - qe = of_find_node_by_type(NULL, "qe"); - if (!qe) - return num_of_risc; - } - - prop = of_get_property(qe, "fsl,qe-num-riscs", &size); - if (prop && size == sizeof(*prop)) - num_of_risc = *prop; - - of_node_put(qe); - - return num_of_risc; -} -EXPORT_SYMBOL(qe_get_num_of_risc); - -unsigned int qe_get_num_of_snums(void) -{ - struct device_node *qe; - int size; - unsigned int num_of_snums; - const u32 *prop; - - num_of_snums = 28; /* The default number of snum for threads is 28 */ - qe = of_find_compatible_node(NULL, NULL, "fsl,qe"); - if (!qe) { - /* Older devices trees did not have an "fsl,qe" - * compatible property, so we need to look for - * the QE node by name. - */ - qe = of_find_node_by_type(NULL, "qe"); - if (!qe) - return num_of_snums; - } - - prop = of_get_property(qe, "fsl,qe-num-snums", &size); - if (prop && size == sizeof(*prop)) { - num_of_snums = *prop; - if ((num_of_snums < 28) || (num_of_snums > QE_NUM_OF_SNUM)) { - /* No QE ever has fewer than 28 SNUMs */ - pr_err("QE: number of snum is invalid\n"); - of_node_put(qe); - return -EINVAL; - } - } - - of_node_put(qe); - - return num_of_snums; -} -EXPORT_SYMBOL(qe_get_num_of_snums); - -#if defined(CONFIG_SUSPEND) && defined(CONFIG_PPC_85xx) -static int qe_resume(struct platform_device *ofdev) -{ - if (!qe_alive_during_sleep()) - qe_reset(); - return 0; -} - -static int qe_probe(struct platform_device *ofdev) -{ - return 0; -} - -static const struct of_device_id qe_ids[] = { - { .compatible = "fsl,qe", }, - { }, -}; - -static struct platform_driver qe_driver = { - .driver = { - .name = "fsl-qe", - .of_match_table = qe_ids, - }, - .probe = qe_probe, - .resume = qe_resume, -}; - -static int __init qe_drv_init(void) -{ - return platform_driver_register(&qe_driver); -} -device_initcall(qe_drv_init); -#endif /* defined(CONFIG_SUSPEND) && defined(CONFIG_PPC_85xx) */ diff --git a/arch/powerpc/sysdev/qe_lib/qe_ic.c b/arch/powerpc/sysdev/qe_lib/qe_ic.c deleted file mode 100644 index ef36f16f9f6f..000000000000 --- a/arch/powerpc/sysdev/qe_lib/qe_ic.c +++ /dev/null @@ -1,502 +0,0 @@ -/* - * arch/powerpc/sysdev/qe_lib/qe_ic.c - * - * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved. - * - * Author: Li Yang <leoli@freescale.com> - * Based on code from Shlomi Gridish <gridish@freescale.com> - * - * QUICC ENGINE Interrupt Controller - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/errno.h> -#include <linux/reboot.h> -#include <linux/slab.h> -#include <linux/stddef.h> -#include <linux/sched.h> -#include <linux/signal.h> -#include <linux/device.h> -#include <linux/spinlock.h> -#include <asm/irq.h> -#include <asm/io.h> -#include <asm/prom.h> -#include <asm/qe_ic.h> - -#include "qe_ic.h" - -static DEFINE_RAW_SPINLOCK(qe_ic_lock); - -static struct qe_ic_info qe_ic_info[] = { - [1] = { - .mask = 0x00008000, - .mask_reg = QEIC_CIMR, - .pri_code = 0, - .pri_reg = QEIC_CIPWCC, - }, - [2] = { - .mask = 0x00004000, - .mask_reg = QEIC_CIMR, - .pri_code = 1, - .pri_reg = QEIC_CIPWCC, - }, - [3] = { - .mask = 0x00002000, - .mask_reg = QEIC_CIMR, - .pri_code = 2, - .pri_reg = QEIC_CIPWCC, - }, - [10] = { - .mask = 0x00000040, - .mask_reg = QEIC_CIMR, - .pri_code = 1, - .pri_reg = QEIC_CIPZCC, - }, - [11] = { - .mask = 0x00000020, - .mask_reg = QEIC_CIMR, - .pri_code = 2, - .pri_reg = QEIC_CIPZCC, - }, - [12] = { - .mask = 0x00000010, - .mask_reg = QEIC_CIMR, - .pri_code = 3, - .pri_reg = QEIC_CIPZCC, - }, - [13] = { - .mask = 0x00000008, - .mask_reg = QEIC_CIMR, - .pri_code = 4, - .pri_reg = QEIC_CIPZCC, - }, - [14] = { - .mask = 0x00000004, - .mask_reg = QEIC_CIMR, - .pri_code = 5, - .pri_reg = QEIC_CIPZCC, - }, - [15] = { - .mask = 0x00000002, - .mask_reg = QEIC_CIMR, - .pri_code = 6, - .pri_reg = QEIC_CIPZCC, - }, - [20] = { - .mask = 0x10000000, - .mask_reg = QEIC_CRIMR, - .pri_code = 3, - .pri_reg = QEIC_CIPRTA, - }, - [25] = { - .mask = 0x00800000, - .mask_reg = QEIC_CRIMR, - .pri_code = 0, - .pri_reg = QEIC_CIPRTB, - }, - [26] = { - .mask = 0x00400000, - .mask_reg = QEIC_CRIMR, - .pri_code = 1, - .pri_reg = QEIC_CIPRTB, - }, - [27] = { - .mask = 0x00200000, - .mask_reg = QEIC_CRIMR, - .pri_code = 2, - .pri_reg = QEIC_CIPRTB, - }, - [28] = { - .mask = 0x00100000, - .mask_reg = QEIC_CRIMR, - .pri_code = 3, - .pri_reg = QEIC_CIPRTB, - }, - [32] = { - .mask = 0x80000000, - .mask_reg = QEIC_CIMR, - .pri_code = 0, - .pri_reg = QEIC_CIPXCC, - }, - [33] = { - .mask = 0x40000000, - .mask_reg = QEIC_CIMR, - .pri_code = 1, - .pri_reg = QEIC_CIPXCC, - }, - [34] = { - .mask = 0x20000000, - .mask_reg = QEIC_CIMR, - .pri_code = 2, - .pri_reg = QEIC_CIPXCC, - }, - [35] = { - .mask = 0x10000000, - .mask_reg = QEIC_CIMR, - .pri_code = 3, - .pri_reg = QEIC_CIPXCC, - }, - [36] = { - .mask = 0x08000000, - .mask_reg = QEIC_CIMR, - .pri_code = 4, - .pri_reg = QEIC_CIPXCC, - }, - [40] = { - .mask = 0x00800000, - .mask_reg = QEIC_CIMR, - .pri_code = 0, - .pri_reg = QEIC_CIPYCC, - }, - [41] = { - .mask = 0x00400000, - .mask_reg = QEIC_CIMR, - .pri_code = 1, - .pri_reg = QEIC_CIPYCC, - }, - [42] = { - .mask = 0x00200000, - .mask_reg = QEIC_CIMR, - .pri_code = 2, - .pri_reg = QEIC_CIPYCC, - }, - [43] = { - .mask = 0x00100000, - .mask_reg = QEIC_CIMR, - .pri_code = 3, - .pri_reg = QEIC_CIPYCC, - }, -}; - -static inline u32 qe_ic_read(volatile __be32 __iomem * base, unsigned int reg) -{ - return in_be32(base + (reg >> 2)); -} - -static inline void qe_ic_write(volatile __be32 __iomem * base, unsigned int reg, - u32 value) -{ - out_be32(base + (reg >> 2), value); -} - -static inline struct qe_ic *qe_ic_from_irq(unsigned int virq) -{ - return irq_get_chip_data(virq); -} - -static inline struct qe_ic *qe_ic_from_irq_data(struct irq_data *d) -{ - return irq_data_get_irq_chip_data(d); -} - -static void qe_ic_unmask_irq(struct irq_data *d) -{ - struct qe_ic *qe_ic = qe_ic_from_irq_data(d); - unsigned int src = irqd_to_hwirq(d); - unsigned long flags; - u32 temp; - - raw_spin_lock_irqsave(&qe_ic_lock, flags); - - temp = qe_ic_read(qe_ic->regs, qe_ic_info[src].mask_reg); - qe_ic_write(qe_ic->regs, qe_ic_info[src].mask_reg, - temp | qe_ic_info[src].mask); - - raw_spin_unlock_irqrestore(&qe_ic_lock, flags); -} - -static void qe_ic_mask_irq(struct irq_data *d) -{ - struct qe_ic *qe_ic = qe_ic_from_irq_data(d); - unsigned int src = irqd_to_hwirq(d); - unsigned long flags; - u32 temp; - - raw_spin_lock_irqsave(&qe_ic_lock, flags); - - temp = qe_ic_read(qe_ic->regs, qe_ic_info[src].mask_reg); - qe_ic_write(qe_ic->regs, qe_ic_info[src].mask_reg, - temp & ~qe_ic_info[src].mask); - - /* Flush the above write before enabling interrupts; otherwise, - * spurious interrupts will sometimes happen. To be 100% sure - * that the write has reached the device before interrupts are - * enabled, the mask register would have to be read back; however, - * this is not required for correctness, only to avoid wasting - * time on a large number of spurious interrupts. In testing, - * a sync reduced the observed spurious interrupts to zero. - */ - mb(); - - raw_spin_unlock_irqrestore(&qe_ic_lock, flags); -} - -static struct irq_chip qe_ic_irq_chip = { - .name = "QEIC", - .irq_unmask = qe_ic_unmask_irq, - .irq_mask = qe_ic_mask_irq, - .irq_mask_ack = qe_ic_mask_irq, -}; - -static int qe_ic_host_match(struct irq_domain *h, struct device_node *node, - enum irq_domain_bus_token bus_token) -{ - /* Exact match, unless qe_ic node is NULL */ - struct device_node *of_node = irq_domain_get_of_node(h); - return of_node == NULL || of_node == node; -} - -static int qe_ic_host_map(struct irq_domain *h, unsigned int virq, - irq_hw_number_t hw) -{ - struct qe_ic *qe_ic = h->host_data; - struct irq_chip *chip; - - if (qe_ic_info[hw].mask == 0) { - printk(KERN_ERR "Can't map reserved IRQ\n"); - return -EINVAL; - } - /* Default chip */ - chip = &qe_ic->hc_irq; - - irq_set_chip_data(virq, qe_ic); - irq_set_status_flags(virq, IRQ_LEVEL); - - irq_set_chip_and_handler(virq, chip, handle_level_irq); - - return 0; -} - -static const struct irq_domain_ops qe_ic_host_ops = { - .match = qe_ic_host_match, - .map = qe_ic_host_map, - .xlate = irq_domain_xlate_onetwocell, -}; - -/* Return an interrupt vector or NO_IRQ if no interrupt is pending. */ -unsigned int qe_ic_get_low_irq(struct qe_ic *qe_ic) -{ - int irq; - - BUG_ON(qe_ic == NULL); - - /* get the interrupt source vector. */ - irq = qe_ic_read(qe_ic->regs, QEIC_CIVEC) >> 26; - - if (irq == 0) - return NO_IRQ; - - return irq_linear_revmap(qe_ic->irqhost, irq); -} - -/* Return an interrupt vector or NO_IRQ if no interrupt is pending. */ -unsigned int qe_ic_get_high_irq(struct qe_ic *qe_ic) -{ - int irq; - - BUG_ON(qe_ic == NULL); - - /* get the interrupt source vector. */ - irq = qe_ic_read(qe_ic->regs, QEIC_CHIVEC) >> 26; - - if (irq == 0) - return NO_IRQ; - - return irq_linear_revmap(qe_ic->irqhost, irq); -} - -void __init qe_ic_init(struct device_node *node, unsigned int flags, - void (*low_handler)(struct irq_desc *desc), - void (*high_handler)(struct irq_desc *desc)) -{ - struct qe_ic *qe_ic; - struct resource res; - u32 temp = 0, ret, high_active = 0; - - ret = of_address_to_resource(node, 0, &res); - if (ret) - return; - - qe_ic = kzalloc(sizeof(*qe_ic), GFP_KERNEL); - if (qe_ic == NULL) - return; - - qe_ic->irqhost = irq_domain_add_linear(node, NR_QE_IC_INTS, - &qe_ic_host_ops, qe_ic); - if (qe_ic->irqhost == NULL) { - kfree(qe_ic); - return; - } - - qe_ic->regs = ioremap(res.start, resource_size(&res)); - - qe_ic->hc_irq = qe_ic_irq_chip; - - qe_ic->virq_high = irq_of_parse_and_map(node, 0); - qe_ic->virq_low = irq_of_parse_and_map(node, 1); - - if (qe_ic->virq_low == NO_IRQ) { - printk(KERN_ERR "Failed to map QE_IC low IRQ\n"); - kfree(qe_ic); - return; - } - - /* default priority scheme is grouped. If spread mode is */ - /* required, configure cicr accordingly. */ - if (flags & QE_IC_SPREADMODE_GRP_W) - temp |= CICR_GWCC; - if (flags & QE_IC_SPREADMODE_GRP_X) - temp |= CICR_GXCC; - if (flags & QE_IC_SPREADMODE_GRP_Y) - temp |= CICR_GYCC; - if (flags & QE_IC_SPREADMODE_GRP_Z) - temp |= CICR_GZCC; - if (flags & QE_IC_SPREADMODE_GRP_RISCA) - temp |= CICR_GRTA; - if (flags & QE_IC_SPREADMODE_GRP_RISCB) - temp |= CICR_GRTB; - - /* choose destination signal for highest priority interrupt */ - if (flags & QE_IC_HIGH_SIGNAL) { - temp |= (SIGNAL_HIGH << CICR_HPIT_SHIFT); - high_active = 1; - } - - qe_ic_write(qe_ic->regs, QEIC_CICR, temp); - - irq_set_handler_data(qe_ic->virq_low, qe_ic); - irq_set_chained_handler(qe_ic->virq_low, low_handler); - - if (qe_ic->virq_high != NO_IRQ && - qe_ic->virq_high != qe_ic->virq_low) { - irq_set_handler_data(qe_ic->virq_high, qe_ic); - irq_set_chained_handler(qe_ic->virq_high, high_handler); - } -} - -void qe_ic_set_highest_priority(unsigned int virq, int high) -{ - struct qe_ic *qe_ic = qe_ic_from_irq(virq); - unsigned int src = virq_to_hw(virq); - u32 temp = 0; - - temp = qe_ic_read(qe_ic->regs, QEIC_CICR); - - temp &= ~CICR_HP_MASK; - temp |= src << CICR_HP_SHIFT; - - temp &= ~CICR_HPIT_MASK; - temp |= (high ? SIGNAL_HIGH : SIGNAL_LOW) << CICR_HPIT_SHIFT; - - qe_ic_write(qe_ic->regs, QEIC_CICR, temp); -} - -/* Set Priority level within its group, from 1 to 8 */ -int qe_ic_set_priority(unsigned int virq, unsigned int priority) -{ - struct qe_ic *qe_ic = qe_ic_from_irq(virq); - unsigned int src = virq_to_hw(virq); - u32 temp; - - if (priority > 8 || priority == 0) - return -EINVAL; - if (src > 127) - return -EINVAL; - if (qe_ic_info[src].pri_reg == 0) - return -EINVAL; - - temp = qe_ic_read(qe_ic->regs, qe_ic_info[src].pri_reg); - - if (priority < 4) { - temp &= ~(0x7 << (32 - priority * 3)); - temp |= qe_ic_info[src].pri_code << (32 - priority * 3); - } else { - temp &= ~(0x7 << (24 - priority * 3)); - temp |= qe_ic_info[src].pri_code << (24 - priority * 3); - } - - qe_ic_write(qe_ic->regs, qe_ic_info[src].pri_reg, temp); - - return 0; -} - -/* Set a QE priority to use high irq, only priority 1~2 can use high irq */ -int qe_ic_set_high_priority(unsigned int virq, unsigned int priority, int high) -{ - struct qe_ic *qe_ic = qe_ic_from_irq(virq); - unsigned int src = virq_to_hw(virq); - u32 temp, control_reg = QEIC_CICNR, shift = 0; - - if (priority > 2 || priority == 0) - return -EINVAL; - - switch (qe_ic_info[src].pri_reg) { - case QEIC_CIPZCC: - shift = CICNR_ZCC1T_SHIFT; - break; - case QEIC_CIPWCC: - shift = CICNR_WCC1T_SHIFT; - break; - case QEIC_CIPYCC: - shift = CICNR_YCC1T_SHIFT; - break; - case QEIC_CIPXCC: - shift = CICNR_XCC1T_SHIFT; - break; - case QEIC_CIPRTA: - shift = CRICR_RTA1T_SHIFT; - control_reg = QEIC_CRICR; - break; - case QEIC_CIPRTB: - shift = CRICR_RTB1T_SHIFT; - control_reg = QEIC_CRICR; - break; - default: - return -EINVAL; - } - - shift += (2 - priority) * 2; - temp = qe_ic_read(qe_ic->regs, control_reg); - temp &= ~(SIGNAL_MASK << shift); - temp |= (high ? SIGNAL_HIGH : SIGNAL_LOW) << shift; - qe_ic_write(qe_ic->regs, control_reg, temp); - - return 0; -} - -static struct bus_type qe_ic_subsys = { - .name = "qe_ic", - .dev_name = "qe_ic", -}; - -static struct device device_qe_ic = { - .id = 0, - .bus = &qe_ic_subsys, -}; - -static int __init init_qe_ic_sysfs(void) -{ - int rc; - - printk(KERN_DEBUG "Registering qe_ic with sysfs...\n"); - - rc = subsys_system_register(&qe_ic_subsys, NULL); - if (rc) { - printk(KERN_ERR "Failed registering qe_ic sys class\n"); - return -ENODEV; - } - rc = device_register(&device_qe_ic); - if (rc) { - printk(KERN_ERR "Failed registering qe_ic sys device\n"); - return -ENODEV; - } - return 0; -} - -subsys_initcall(init_qe_ic_sysfs); diff --git a/arch/powerpc/sysdev/qe_lib/qe_ic.h b/arch/powerpc/sysdev/qe_lib/qe_ic.h deleted file mode 100644 index efef7ab9b753..000000000000 --- a/arch/powerpc/sysdev/qe_lib/qe_ic.h +++ /dev/null @@ -1,103 +0,0 @@ -/* - * arch/powerpc/sysdev/qe_lib/qe_ic.h - * - * QUICC ENGINE Interrupt Controller Header - * - * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved. - * - * Author: Li Yang <leoli@freescale.com> - * Based on code from Shlomi Gridish <gridish@freescale.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ -#ifndef _POWERPC_SYSDEV_QE_IC_H -#define _POWERPC_SYSDEV_QE_IC_H - -#include <asm/qe_ic.h> - -#define NR_QE_IC_INTS 64 - -/* QE IC registers offset */ -#define QEIC_CICR 0x00 -#define QEIC_CIVEC 0x04 -#define QEIC_CRIPNR 0x08 -#define QEIC_CIPNR 0x0c -#define QEIC_CIPXCC 0x10 -#define QEIC_CIPYCC 0x14 -#define QEIC_CIPWCC 0x18 -#define QEIC_CIPZCC 0x1c -#define QEIC_CIMR 0x20 -#define QEIC_CRIMR 0x24 -#define QEIC_CICNR 0x28 -#define QEIC_CIPRTA 0x30 -#define QEIC_CIPRTB 0x34 -#define QEIC_CRICR 0x3c -#define QEIC_CHIVEC 0x60 - -/* Interrupt priority registers */ -#define CIPCC_SHIFT_PRI0 29 -#define CIPCC_SHIFT_PRI1 26 -#define CIPCC_SHIFT_PRI2 23 -#define CIPCC_SHIFT_PRI3 20 -#define CIPCC_SHIFT_PRI4 13 -#define CIPCC_SHIFT_PRI5 10 -#define CIPCC_SHIFT_PRI6 7 -#define CIPCC_SHIFT_PRI7 4 - -/* CICR priority modes */ -#define CICR_GWCC 0x00040000 -#define CICR_GXCC 0x00020000 -#define CICR_GYCC 0x00010000 -#define CICR_GZCC 0x00080000 -#define CICR_GRTA 0x00200000 -#define CICR_GRTB 0x00400000 -#define CICR_HPIT_SHIFT 8 -#define CICR_HPIT_MASK 0x00000300 -#define CICR_HP_SHIFT 24 -#define CICR_HP_MASK 0x3f000000 - -/* CICNR */ -#define CICNR_WCC1T_SHIFT 20 -#define CICNR_ZCC1T_SHIFT 28 -#define CICNR_YCC1T_SHIFT 12 -#define CICNR_XCC1T_SHIFT 4 - -/* CRICR */ -#define CRICR_RTA1T_SHIFT 20 -#define CRICR_RTB1T_SHIFT 28 - -/* Signal indicator */ -#define SIGNAL_MASK 3 -#define SIGNAL_HIGH 2 -#define SIGNAL_LOW 0 - -struct qe_ic { - /* Control registers offset */ - volatile u32 __iomem *regs; - - /* The remapper for this QEIC */ - struct irq_domain *irqhost; - - /* The "linux" controller struct */ - struct irq_chip hc_irq; - - /* VIRQ numbers of QE high/low irqs */ - unsigned int virq_high; - unsigned int virq_low; -}; - -/* - * QE interrupt controller internal structure - */ -struct qe_ic_info { - u32 mask; /* location of this source at the QIMR register. */ - u32 mask_reg; /* Mask register offset */ - u8 pri_code; /* for grouped interrupts sources - the interrupt - code as appears at the group priority register */ - u32 pri_reg; /* Group priority register offset */ -}; - -#endif /* _POWERPC_SYSDEV_QE_IC_H */ diff --git a/arch/powerpc/sysdev/qe_lib/qe_io.c b/arch/powerpc/sysdev/qe_lib/qe_io.c deleted file mode 100644 index 7ea0174f6d3d..000000000000 --- a/arch/powerpc/sysdev/qe_lib/qe_io.c +++ /dev/null @@ -1,192 +0,0 @@ -/* - * arch/powerpc/sysdev/qe_lib/qe_io.c - * - * QE Parallel I/O ports configuration routines - * - * Copyright 2006 Freescale Semiconductor, Inc. All rights reserved. - * - * Author: Li Yang <LeoLi@freescale.com> - * Based on code from Shlomi Gridish <gridish@freescale.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#include <linux/stddef.h> -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/module.h> -#include <linux/ioport.h> - -#include <asm/io.h> -#include <asm/qe.h> -#include <asm/prom.h> -#include <sysdev/fsl_soc.h> - -#undef DEBUG - -static struct qe_pio_regs __iomem *par_io; -static int num_par_io_ports = 0; - -int par_io_init(struct device_node *np) -{ - struct resource res; - int ret; - const u32 *num_ports; - - /* Map Parallel I/O ports registers */ - ret = of_address_to_resource(np, 0, &res); - if (ret) - return ret; - par_io = ioremap(res.start, resource_size(&res)); - - num_ports = of_get_property(np, "num-ports", NULL); - if (num_ports) - num_par_io_ports = *num_ports; - - return 0; -} - -void __par_io_config_pin(struct qe_pio_regs __iomem *par_io, u8 pin, int dir, - int open_drain, int assignment, int has_irq) -{ - u32 pin_mask1bit; - u32 pin_mask2bits; - u32 new_mask2bits; - u32 tmp_val; - - /* calculate pin location for single and 2 bits information */ - pin_mask1bit = (u32) (1 << (QE_PIO_PINS - (pin + 1))); - - /* Set open drain, if required */ - tmp_val = in_be32(&par_io->cpodr); - if (open_drain) - out_be32(&par_io->cpodr, pin_mask1bit | tmp_val); - else - out_be32(&par_io->cpodr, ~pin_mask1bit & tmp_val); - - /* define direction */ - tmp_val = (pin > (QE_PIO_PINS / 2) - 1) ? - in_be32(&par_io->cpdir2) : - in_be32(&par_io->cpdir1); - - /* get all bits mask for 2 bit per port */ - pin_mask2bits = (u32) (0x3 << (QE_PIO_PINS - - (pin % (QE_PIO_PINS / 2) + 1) * 2)); - - /* Get the final mask we need for the right definition */ - new_mask2bits = (u32) (dir << (QE_PIO_PINS - - (pin % (QE_PIO_PINS / 2) + 1) * 2)); - - /* clear and set 2 bits mask */ - if (pin > (QE_PIO_PINS / 2) - 1) { - out_be32(&par_io->cpdir2, - ~pin_mask2bits & tmp_val); - tmp_val &= ~pin_mask2bits; - out_be32(&par_io->cpdir2, new_mask2bits | tmp_val); - } else { - out_be32(&par_io->cpdir1, - ~pin_mask2bits & tmp_val); - tmp_val &= ~pin_mask2bits; - out_be32(&par_io->cpdir1, new_mask2bits | tmp_val); - } - /* define pin assignment */ - tmp_val = (pin > (QE_PIO_PINS / 2) - 1) ? - in_be32(&par_io->cppar2) : - in_be32(&par_io->cppar1); - - new_mask2bits = (u32) (assignment << (QE_PIO_PINS - - (pin % (QE_PIO_PINS / 2) + 1) * 2)); - /* clear and set 2 bits mask */ - if (pin > (QE_PIO_PINS / 2) - 1) { - out_be32(&par_io->cppar2, - ~pin_mask2bits & tmp_val); - tmp_val &= ~pin_mask2bits; - out_be32(&par_io->cppar2, new_mask2bits | tmp_val); - } else { - out_be32(&par_io->cppar1, - ~pin_mask2bits & tmp_val); - tmp_val &= ~pin_mask2bits; - out_be32(&par_io->cppar1, new_mask2bits | tmp_val); - } -} -EXPORT_SYMBOL(__par_io_config_pin); - -int par_io_config_pin(u8 port, u8 pin, int dir, int open_drain, - int assignment, int has_irq) -{ - if (!par_io || port >= num_par_io_ports) - return -EINVAL; - - __par_io_config_pin(&par_io[port], pin, dir, open_drain, assignment, - has_irq); - return 0; -} -EXPORT_SYMBOL(par_io_config_pin); - -int par_io_data_set(u8 port, u8 pin, u8 val) -{ - u32 pin_mask, tmp_val; - - if (port >= num_par_io_ports) - return -EINVAL; - if (pin >= QE_PIO_PINS) - return -EINVAL; - /* calculate pin location */ - pin_mask = (u32) (1 << (QE_PIO_PINS - 1 - pin)); - - tmp_val = in_be32(&par_io[port].cpdata); - - if (val == 0) /* clear */ - out_be32(&par_io[port].cpdata, ~pin_mask & tmp_val); - else /* set */ - out_be32(&par_io[port].cpdata, pin_mask | tmp_val); - - return 0; -} -EXPORT_SYMBOL(par_io_data_set); - -int par_io_of_config(struct device_node *np) -{ - struct device_node *pio; - const phandle *ph; - int pio_map_len; - const unsigned int *pio_map; - - if (par_io == NULL) { - printk(KERN_ERR "par_io not initialized\n"); - return -1; - } - - ph = of_get_property(np, "pio-handle", NULL); - if (ph == NULL) { - printk(KERN_ERR "pio-handle not available\n"); - return -1; - } - - pio = of_find_node_by_phandle(*ph); - - pio_map = of_get_property(pio, "pio-map", &pio_map_len); - if (pio_map == NULL) { - printk(KERN_ERR "pio-map is not set!\n"); - return -1; - } - pio_map_len /= sizeof(unsigned int); - if ((pio_map_len % 6) != 0) { - printk(KERN_ERR "pio-map format wrong!\n"); - return -1; - } - - while (pio_map_len > 0) { - par_io_config_pin((u8) pio_map[0], (u8) pio_map[1], - (int) pio_map[2], (int) pio_map[3], - (int) pio_map[4], (int) pio_map[5]); - pio_map += 6; - pio_map_len -= 6; - } - of_node_put(pio); - return 0; -} -EXPORT_SYMBOL(par_io_of_config); diff --git a/arch/powerpc/sysdev/qe_lib/ucc.c b/arch/powerpc/sysdev/qe_lib/ucc.c deleted file mode 100644 index 621575b7e84a..000000000000 --- a/arch/powerpc/sysdev/qe_lib/ucc.c +++ /dev/null @@ -1,212 +0,0 @@ -/* - * arch/powerpc/sysdev/qe_lib/ucc.c - * - * QE UCC API Set - UCC specific routines implementations. - * - * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved. - * - * Authors: Shlomi Gridish <gridish@freescale.com> - * Li Yang <leoli@freescale.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/stddef.h> -#include <linux/spinlock.h> -#include <linux/export.h> - -#include <asm/irq.h> -#include <asm/io.h> -#include <asm/immap_qe.h> -#include <asm/qe.h> -#include <asm/ucc.h> - -int ucc_set_qe_mux_mii_mng(unsigned int ucc_num) -{ - unsigned long flags; - - if (ucc_num > UCC_MAX_NUM - 1) - return -EINVAL; - - spin_lock_irqsave(&cmxgcr_lock, flags); - clrsetbits_be32(&qe_immr->qmx.cmxgcr, QE_CMXGCR_MII_ENET_MNG, - ucc_num << QE_CMXGCR_MII_ENET_MNG_SHIFT); - spin_unlock_irqrestore(&cmxgcr_lock, flags); - - return 0; -} -EXPORT_SYMBOL(ucc_set_qe_mux_mii_mng); - -/* Configure the UCC to either Slow or Fast. - * - * A given UCC can be figured to support either "slow" devices (e.g. UART) - * or "fast" devices (e.g. Ethernet). - * - * 'ucc_num' is the UCC number, from 0 - 7. - * - * This function also sets the UCC_GUEMR_SET_RESERVED3 bit because that bit - * must always be set to 1. - */ -int ucc_set_type(unsigned int ucc_num, enum ucc_speed_type speed) -{ - u8 __iomem *guemr; - - /* The GUEMR register is at the same location for both slow and fast - devices, so we just use uccX.slow.guemr. */ - switch (ucc_num) { - case 0: guemr = &qe_immr->ucc1.slow.guemr; - break; - case 1: guemr = &qe_immr->ucc2.slow.guemr; - break; - case 2: guemr = &qe_immr->ucc3.slow.guemr; - break; - case 3: guemr = &qe_immr->ucc4.slow.guemr; - break; - case 4: guemr = &qe_immr->ucc5.slow.guemr; - break; - case 5: guemr = &qe_immr->ucc6.slow.guemr; - break; - case 6: guemr = &qe_immr->ucc7.slow.guemr; - break; - case 7: guemr = &qe_immr->ucc8.slow.guemr; - break; - default: - return -EINVAL; - } - - clrsetbits_8(guemr, UCC_GUEMR_MODE_MASK, - UCC_GUEMR_SET_RESERVED3 | speed); - - return 0; -} - -static void get_cmxucr_reg(unsigned int ucc_num, __be32 __iomem **cmxucr, - unsigned int *reg_num, unsigned int *shift) -{ - unsigned int cmx = ((ucc_num & 1) << 1) + (ucc_num > 3); - - *reg_num = cmx + 1; - *cmxucr = &qe_immr->qmx.cmxucr[cmx]; - *shift = 16 - 8 * (ucc_num & 2); -} - -int ucc_mux_set_grant_tsa_bkpt(unsigned int ucc_num, int set, u32 mask) -{ - __be32 __iomem *cmxucr; - unsigned int reg_num; - unsigned int shift; - - /* check if the UCC number is in range. */ - if (ucc_num > UCC_MAX_NUM - 1) - return -EINVAL; - - get_cmxucr_reg(ucc_num, &cmxucr, ®_num, &shift); - - if (set) - setbits32(cmxucr, mask << shift); - else - clrbits32(cmxucr, mask << shift); - - return 0; -} - -int ucc_set_qe_mux_rxtx(unsigned int ucc_num, enum qe_clock clock, - enum comm_dir mode) -{ - __be32 __iomem *cmxucr; - unsigned int reg_num; - unsigned int shift; - u32 clock_bits = 0; - - /* check if the UCC number is in range. */ - if (ucc_num > UCC_MAX_NUM - 1) - return -EINVAL; - - /* The communications direction must be RX or TX */ - if (!((mode == COMM_DIR_RX) || (mode == COMM_DIR_TX))) - return -EINVAL; - - get_cmxucr_reg(ucc_num, &cmxucr, ®_num, &shift); - - switch (reg_num) { - case 1: - switch (clock) { - case QE_BRG1: clock_bits = 1; break; - case QE_BRG2: clock_bits = 2; break; - case QE_BRG7: clock_bits = 3; break; - case QE_BRG8: clock_bits = 4; break; - case QE_CLK9: clock_bits = 5; break; - case QE_CLK10: clock_bits = 6; break; - case QE_CLK11: clock_bits = 7; break; - case QE_CLK12: clock_bits = 8; break; - case QE_CLK15: clock_bits = 9; break; - case QE_CLK16: clock_bits = 10; break; - default: break; - } - break; - case 2: - switch (clock) { - case QE_BRG5: clock_bits = 1; break; - case QE_BRG6: clock_bits = 2; break; - case QE_BRG7: clock_bits = 3; break; - case QE_BRG8: clock_bits = 4; break; - case QE_CLK13: clock_bits = 5; break; - case QE_CLK14: clock_bits = 6; break; - case QE_CLK19: clock_bits = 7; break; - case QE_CLK20: clock_bits = 8; break; - case QE_CLK15: clock_bits = 9; break; - case QE_CLK16: clock_bits = 10; break; - default: break; - } - break; - case 3: - switch (clock) { - case QE_BRG9: clock_bits = 1; break; - case QE_BRG10: clock_bits = 2; break; - case QE_BRG15: clock_bits = 3; break; - case QE_BRG16: clock_bits = 4; break; - case QE_CLK3: clock_bits = 5; break; - case QE_CLK4: clock_bits = 6; break; - case QE_CLK17: clock_bits = 7; break; - case QE_CLK18: clock_bits = 8; break; - case QE_CLK7: clock_bits = 9; break; - case QE_CLK8: clock_bits = 10; break; - case QE_CLK16: clock_bits = 11; break; - default: break; - } - break; - case 4: - switch (clock) { - case QE_BRG13: clock_bits = 1; break; - case QE_BRG14: clock_bits = 2; break; - case QE_BRG15: clock_bits = 3; break; - case QE_BRG16: clock_bits = 4; break; - case QE_CLK5: clock_bits = 5; break; - case QE_CLK6: clock_bits = 6; break; - case QE_CLK21: clock_bits = 7; break; - case QE_CLK22: clock_bits = 8; break; - case QE_CLK7: clock_bits = 9; break; - case QE_CLK8: clock_bits = 10; break; - case QE_CLK16: clock_bits = 11; break; - default: break; - } - break; - default: break; - } - - /* Check for invalid combination of clock and UCC number */ - if (!clock_bits) - return -ENOENT; - - if (mode == COMM_DIR_RX) - shift += 4; - - clrsetbits_be32(cmxucr, QE_CMXUCR_TX_CLK_SRC_MASK << shift, - clock_bits << shift); - - return 0; -} diff --git a/arch/powerpc/sysdev/qe_lib/ucc_fast.c b/arch/powerpc/sysdev/qe_lib/ucc_fast.c deleted file mode 100644 index 65aaf15032ae..000000000000 --- a/arch/powerpc/sysdev/qe_lib/ucc_fast.c +++ /dev/null @@ -1,363 +0,0 @@ -/* - * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved. - * - * Authors: Shlomi Gridish <gridish@freescale.com> - * Li Yang <leoli@freescale.com> - * - * Description: - * QE UCC Fast API Set - UCC Fast specific routines implementations. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/slab.h> -#include <linux/stddef.h> -#include <linux/interrupt.h> -#include <linux/err.h> -#include <linux/export.h> - -#include <asm/io.h> -#include <asm/immap_qe.h> -#include <asm/qe.h> - -#include <asm/ucc.h> -#include <asm/ucc_fast.h> - -void ucc_fast_dump_regs(struct ucc_fast_private * uccf) -{ - printk(KERN_INFO "UCC%u Fast registers:\n", uccf->uf_info->ucc_num); - printk(KERN_INFO "Base address: 0x%p\n", uccf->uf_regs); - - printk(KERN_INFO "gumr : addr=0x%p, val=0x%08x\n", - &uccf->uf_regs->gumr, in_be32(&uccf->uf_regs->gumr)); - printk(KERN_INFO "upsmr : addr=0x%p, val=0x%08x\n", - &uccf->uf_regs->upsmr, in_be32(&uccf->uf_regs->upsmr)); - printk(KERN_INFO "utodr : addr=0x%p, val=0x%04x\n", - &uccf->uf_regs->utodr, in_be16(&uccf->uf_regs->utodr)); - printk(KERN_INFO "udsr : addr=0x%p, val=0x%04x\n", - &uccf->uf_regs->udsr, in_be16(&uccf->uf_regs->udsr)); - printk(KERN_INFO "ucce : addr=0x%p, val=0x%08x\n", - &uccf->uf_regs->ucce, in_be32(&uccf->uf_regs->ucce)); - printk(KERN_INFO "uccm : addr=0x%p, val=0x%08x\n", - &uccf->uf_regs->uccm, in_be32(&uccf->uf_regs->uccm)); - printk(KERN_INFO "uccs : addr=0x%p, val=0x%02x\n", - &uccf->uf_regs->uccs, in_8(&uccf->uf_regs->uccs)); - printk(KERN_INFO "urfb : addr=0x%p, val=0x%08x\n", - &uccf->uf_regs->urfb, in_be32(&uccf->uf_regs->urfb)); - printk(KERN_INFO "urfs : addr=0x%p, val=0x%04x\n", - &uccf->uf_regs->urfs, in_be16(&uccf->uf_regs->urfs)); - printk(KERN_INFO "urfet : addr=0x%p, val=0x%04x\n", - &uccf->uf_regs->urfet, in_be16(&uccf->uf_regs->urfet)); - printk(KERN_INFO "urfset: addr=0x%p, val=0x%04x\n", - &uccf->uf_regs->urfset, in_be16(&uccf->uf_regs->urfset)); - printk(KERN_INFO "utfb : addr=0x%p, val=0x%08x\n", - &uccf->uf_regs->utfb, in_be32(&uccf->uf_regs->utfb)); - printk(KERN_INFO "utfs : addr=0x%p, val=0x%04x\n", - &uccf->uf_regs->utfs, in_be16(&uccf->uf_regs->utfs)); - printk(KERN_INFO "utfet : addr=0x%p, val=0x%04x\n", - &uccf->uf_regs->utfet, in_be16(&uccf->uf_regs->utfet)); - printk(KERN_INFO "utftt : addr=0x%p, val=0x%04x\n", - &uccf->uf_regs->utftt, in_be16(&uccf->uf_regs->utftt)); - printk(KERN_INFO "utpt : addr=0x%p, val=0x%04x\n", - &uccf->uf_regs->utpt, in_be16(&uccf->uf_regs->utpt)); - printk(KERN_INFO "urtry : addr=0x%p, val=0x%08x\n", - &uccf->uf_regs->urtry, in_be32(&uccf->uf_regs->urtry)); - printk(KERN_INFO "guemr : addr=0x%p, val=0x%02x\n", - &uccf->uf_regs->guemr, in_8(&uccf->uf_regs->guemr)); -} -EXPORT_SYMBOL(ucc_fast_dump_regs); - -u32 ucc_fast_get_qe_cr_subblock(int uccf_num) -{ - switch (uccf_num) { - case 0: return QE_CR_SUBBLOCK_UCCFAST1; - case 1: return QE_CR_SUBBLOCK_UCCFAST2; - case 2: return QE_CR_SUBBLOCK_UCCFAST3; - case 3: return QE_CR_SUBBLOCK_UCCFAST4; - case 4: return QE_CR_SUBBLOCK_UCCFAST5; - case 5: return QE_CR_SUBBLOCK_UCCFAST6; - case 6: return QE_CR_SUBBLOCK_UCCFAST7; - case 7: return QE_CR_SUBBLOCK_UCCFAST8; - default: return QE_CR_SUBBLOCK_INVALID; - } -} -EXPORT_SYMBOL(ucc_fast_get_qe_cr_subblock); - -void ucc_fast_transmit_on_demand(struct ucc_fast_private * uccf) -{ - out_be16(&uccf->uf_regs->utodr, UCC_FAST_TOD); -} -EXPORT_SYMBOL(ucc_fast_transmit_on_demand); - -void ucc_fast_enable(struct ucc_fast_private * uccf, enum comm_dir mode) -{ - struct ucc_fast __iomem *uf_regs; - u32 gumr; - - uf_regs = uccf->uf_regs; - - /* Enable reception and/or transmission on this UCC. */ - gumr = in_be32(&uf_regs->gumr); - if (mode & COMM_DIR_TX) { - gumr |= UCC_FAST_GUMR_ENT; - uccf->enabled_tx = 1; - } - if (mode & COMM_DIR_RX) { - gumr |= UCC_FAST_GUMR_ENR; - uccf->enabled_rx = 1; - } - out_be32(&uf_regs->gumr, gumr); -} -EXPORT_SYMBOL(ucc_fast_enable); - -void ucc_fast_disable(struct ucc_fast_private * uccf, enum comm_dir mode) -{ - struct ucc_fast __iomem *uf_regs; - u32 gumr; - - uf_regs = uccf->uf_regs; - - /* Disable reception and/or transmission on this UCC. */ - gumr = in_be32(&uf_regs->gumr); - if (mode & COMM_DIR_TX) { - gumr &= ~UCC_FAST_GUMR_ENT; - uccf->enabled_tx = 0; - } - if (mode & COMM_DIR_RX) { - gumr &= ~UCC_FAST_GUMR_ENR; - uccf->enabled_rx = 0; - } - out_be32(&uf_regs->gumr, gumr); -} -EXPORT_SYMBOL(ucc_fast_disable); - -int ucc_fast_init(struct ucc_fast_info * uf_info, struct ucc_fast_private ** uccf_ret) -{ - struct ucc_fast_private *uccf; - struct ucc_fast __iomem *uf_regs; - u32 gumr; - int ret; - - if (!uf_info) - return -EINVAL; - - /* check if the UCC port number is in range. */ - if ((uf_info->ucc_num < 0) || (uf_info->ucc_num > UCC_MAX_NUM - 1)) { - printk(KERN_ERR "%s: illegal UCC number\n", __func__); - return -EINVAL; - } - - /* Check that 'max_rx_buf_length' is properly aligned (4). */ - if (uf_info->max_rx_buf_length & (UCC_FAST_MRBLR_ALIGNMENT - 1)) { - printk(KERN_ERR "%s: max_rx_buf_length not aligned\n", - __func__); - return -EINVAL; - } - - /* Validate Virtual Fifo register values */ - if (uf_info->urfs < UCC_FAST_URFS_MIN_VAL) { - printk(KERN_ERR "%s: urfs is too small\n", __func__); - return -EINVAL; - } - - if (uf_info->urfs & (UCC_FAST_VIRT_FIFO_REGS_ALIGNMENT - 1)) { - printk(KERN_ERR "%s: urfs is not aligned\n", __func__); - return -EINVAL; - } - - if (uf_info->urfet & (UCC_FAST_VIRT_FIFO_REGS_ALIGNMENT - 1)) { - printk(KERN_ERR "%s: urfet is not aligned.\n", __func__); - return -EINVAL; - } - - if (uf_info->urfset & (UCC_FAST_VIRT_FIFO_REGS_ALIGNMENT - 1)) { - printk(KERN_ERR "%s: urfset is not aligned\n", __func__); - return -EINVAL; - } - - if (uf_info->utfs & (UCC_FAST_VIRT_FIFO_REGS_ALIGNMENT - 1)) { - printk(KERN_ERR "%s: utfs is not aligned\n", __func__); - return -EINVAL; - } - - if (uf_info->utfet & (UCC_FAST_VIRT_FIFO_REGS_ALIGNMENT - 1)) { - printk(KERN_ERR "%s: utfet is not aligned\n", __func__); - return -EINVAL; - } - - if (uf_info->utftt & (UCC_FAST_VIRT_FIFO_REGS_ALIGNMENT - 1)) { - printk(KERN_ERR "%s: utftt is not aligned\n", __func__); - return -EINVAL; - } - - uccf = kzalloc(sizeof(struct ucc_fast_private), GFP_KERNEL); - if (!uccf) { - printk(KERN_ERR "%s: Cannot allocate private data\n", - __func__); - return -ENOMEM; - } - - /* Fill fast UCC structure */ - uccf->uf_info = uf_info; - /* Set the PHY base address */ - uccf->uf_regs = ioremap(uf_info->regs, sizeof(struct ucc_fast)); - if (uccf->uf_regs == NULL) { - printk(KERN_ERR "%s: Cannot map UCC registers\n", __func__); - kfree(uccf); - return -ENOMEM; - } - - uccf->enabled_tx = 0; - uccf->enabled_rx = 0; - uccf->stopped_tx = 0; - uccf->stopped_rx = 0; - uf_regs = uccf->uf_regs; - uccf->p_ucce = &uf_regs->ucce; - uccf->p_uccm = &uf_regs->uccm; -#ifdef CONFIG_UGETH_TX_ON_DEMAND - uccf->p_utodr = &uf_regs->utodr; -#endif -#ifdef STATISTICS - uccf->tx_frames = 0; - uccf->rx_frames = 0; - uccf->rx_discarded = 0; -#endif /* STATISTICS */ - - /* Set UCC to fast type */ - ret = ucc_set_type(uf_info->ucc_num, UCC_SPEED_TYPE_FAST); - if (ret) { - printk(KERN_ERR "%s: cannot set UCC type\n", __func__); - ucc_fast_free(uccf); - return ret; - } - - uccf->mrblr = uf_info->max_rx_buf_length; - - /* Set GUMR */ - /* For more details see the hardware spec. */ - gumr = uf_info->ttx_trx; - if (uf_info->tci) - gumr |= UCC_FAST_GUMR_TCI; - if (uf_info->cdp) - gumr |= UCC_FAST_GUMR_CDP; - if (uf_info->ctsp) - gumr |= UCC_FAST_GUMR_CTSP; - if (uf_info->cds) - gumr |= UCC_FAST_GUMR_CDS; - if (uf_info->ctss) - gumr |= UCC_FAST_GUMR_CTSS; - if (uf_info->txsy) - gumr |= UCC_FAST_GUMR_TXSY; - if (uf_info->rsyn) - gumr |= UCC_FAST_GUMR_RSYN; - gumr |= uf_info->synl; - if (uf_info->rtsm) - gumr |= UCC_FAST_GUMR_RTSM; - gumr |= uf_info->renc; - if (uf_info->revd) - gumr |= UCC_FAST_GUMR_REVD; - gumr |= uf_info->tenc; - gumr |= uf_info->tcrc; - gumr |= uf_info->mode; - out_be32(&uf_regs->gumr, gumr); - - /* Allocate memory for Tx Virtual Fifo */ - uccf->ucc_fast_tx_virtual_fifo_base_offset = - qe_muram_alloc(uf_info->utfs, UCC_FAST_VIRT_FIFO_REGS_ALIGNMENT); - if (IS_ERR_VALUE(uccf->ucc_fast_tx_virtual_fifo_base_offset)) { - printk(KERN_ERR "%s: cannot allocate MURAM for TX FIFO\n", - __func__); - uccf->ucc_fast_tx_virtual_fifo_base_offset = 0; - ucc_fast_free(uccf); - return -ENOMEM; - } - - /* Allocate memory for Rx Virtual Fifo */ - uccf->ucc_fast_rx_virtual_fifo_base_offset = - qe_muram_alloc(uf_info->urfs + - UCC_FAST_RECEIVE_VIRTUAL_FIFO_SIZE_FUDGE_FACTOR, - UCC_FAST_VIRT_FIFO_REGS_ALIGNMENT); - if (IS_ERR_VALUE(uccf->ucc_fast_rx_virtual_fifo_base_offset)) { - printk(KERN_ERR "%s: cannot allocate MURAM for RX FIFO\n", - __func__); - uccf->ucc_fast_rx_virtual_fifo_base_offset = 0; - ucc_fast_free(uccf); - return -ENOMEM; - } - - /* Set Virtual Fifo registers */ - out_be16(&uf_regs->urfs, uf_info->urfs); - out_be16(&uf_regs->urfet, uf_info->urfet); - out_be16(&uf_regs->urfset, uf_info->urfset); - out_be16(&uf_regs->utfs, uf_info->utfs); - out_be16(&uf_regs->utfet, uf_info->utfet); - out_be16(&uf_regs->utftt, uf_info->utftt); - /* utfb, urfb are offsets from MURAM base */ - out_be32(&uf_regs->utfb, uccf->ucc_fast_tx_virtual_fifo_base_offset); - out_be32(&uf_regs->urfb, uccf->ucc_fast_rx_virtual_fifo_base_offset); - - /* Mux clocking */ - /* Grant Support */ - ucc_set_qe_mux_grant(uf_info->ucc_num, uf_info->grant_support); - /* Breakpoint Support */ - ucc_set_qe_mux_bkpt(uf_info->ucc_num, uf_info->brkpt_support); - /* Set Tsa or NMSI mode. */ - ucc_set_qe_mux_tsa(uf_info->ucc_num, uf_info->tsa); - /* If NMSI (not Tsa), set Tx and Rx clock. */ - if (!uf_info->tsa) { - /* Rx clock routing */ - if ((uf_info->rx_clock != QE_CLK_NONE) && - ucc_set_qe_mux_rxtx(uf_info->ucc_num, uf_info->rx_clock, - COMM_DIR_RX)) { - printk(KERN_ERR "%s: illegal value for RX clock\n", - __func__); - ucc_fast_free(uccf); - return -EINVAL; - } - /* Tx clock routing */ - if ((uf_info->tx_clock != QE_CLK_NONE) && - ucc_set_qe_mux_rxtx(uf_info->ucc_num, uf_info->tx_clock, - COMM_DIR_TX)) { - printk(KERN_ERR "%s: illegal value for TX clock\n", - __func__); - ucc_fast_free(uccf); - return -EINVAL; - } - } - - /* Set interrupt mask register at UCC level. */ - out_be32(&uf_regs->uccm, uf_info->uccm_mask); - - /* First, clear anything pending at UCC level, - * otherwise, old garbage may come through - * as soon as the dam is opened. */ - - /* Writing '1' clears */ - out_be32(&uf_regs->ucce, 0xffffffff); - - *uccf_ret = uccf; - return 0; -} -EXPORT_SYMBOL(ucc_fast_init); - -void ucc_fast_free(struct ucc_fast_private * uccf) -{ - if (!uccf) - return; - - if (uccf->ucc_fast_tx_virtual_fifo_base_offset) - qe_muram_free(uccf->ucc_fast_tx_virtual_fifo_base_offset); - - if (uccf->ucc_fast_rx_virtual_fifo_base_offset) - qe_muram_free(uccf->ucc_fast_rx_virtual_fifo_base_offset); - - if (uccf->uf_regs) - iounmap(uccf->uf_regs); - - kfree(uccf); -} -EXPORT_SYMBOL(ucc_fast_free); diff --git a/arch/powerpc/sysdev/qe_lib/ucc_slow.c b/arch/powerpc/sysdev/qe_lib/ucc_slow.c deleted file mode 100644 index 5f91628209eb..000000000000 --- a/arch/powerpc/sysdev/qe_lib/ucc_slow.c +++ /dev/null @@ -1,374 +0,0 @@ -/* - * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved. - * - * Authors: Shlomi Gridish <gridish@freescale.com> - * Li Yang <leoli@freescale.com> - * - * Description: - * QE UCC Slow API Set - UCC Slow specific routines implementations. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/slab.h> -#include <linux/stddef.h> -#include <linux/interrupt.h> -#include <linux/err.h> -#include <linux/export.h> - -#include <asm/io.h> -#include <asm/immap_qe.h> -#include <asm/qe.h> - -#include <asm/ucc.h> -#include <asm/ucc_slow.h> - -u32 ucc_slow_get_qe_cr_subblock(int uccs_num) -{ - switch (uccs_num) { - case 0: return QE_CR_SUBBLOCK_UCCSLOW1; - case 1: return QE_CR_SUBBLOCK_UCCSLOW2; - case 2: return QE_CR_SUBBLOCK_UCCSLOW3; - case 3: return QE_CR_SUBBLOCK_UCCSLOW4; - case 4: return QE_CR_SUBBLOCK_UCCSLOW5; - case 5: return QE_CR_SUBBLOCK_UCCSLOW6; - case 6: return QE_CR_SUBBLOCK_UCCSLOW7; - case 7: return QE_CR_SUBBLOCK_UCCSLOW8; - default: return QE_CR_SUBBLOCK_INVALID; - } -} -EXPORT_SYMBOL(ucc_slow_get_qe_cr_subblock); - -void ucc_slow_graceful_stop_tx(struct ucc_slow_private * uccs) -{ - struct ucc_slow_info *us_info = uccs->us_info; - u32 id; - - id = ucc_slow_get_qe_cr_subblock(us_info->ucc_num); - qe_issue_cmd(QE_GRACEFUL_STOP_TX, id, - QE_CR_PROTOCOL_UNSPECIFIED, 0); -} -EXPORT_SYMBOL(ucc_slow_graceful_stop_tx); - -void ucc_slow_stop_tx(struct ucc_slow_private * uccs) -{ - struct ucc_slow_info *us_info = uccs->us_info; - u32 id; - - id = ucc_slow_get_qe_cr_subblock(us_info->ucc_num); - qe_issue_cmd(QE_STOP_TX, id, QE_CR_PROTOCOL_UNSPECIFIED, 0); -} -EXPORT_SYMBOL(ucc_slow_stop_tx); - -void ucc_slow_restart_tx(struct ucc_slow_private * uccs) -{ - struct ucc_slow_info *us_info = uccs->us_info; - u32 id; - - id = ucc_slow_get_qe_cr_subblock(us_info->ucc_num); - qe_issue_cmd(QE_RESTART_TX, id, QE_CR_PROTOCOL_UNSPECIFIED, 0); -} -EXPORT_SYMBOL(ucc_slow_restart_tx); - -void ucc_slow_enable(struct ucc_slow_private * uccs, enum comm_dir mode) -{ - struct ucc_slow *us_regs; - u32 gumr_l; - - us_regs = uccs->us_regs; - - /* Enable reception and/or transmission on this UCC. */ - gumr_l = in_be32(&us_regs->gumr_l); - if (mode & COMM_DIR_TX) { - gumr_l |= UCC_SLOW_GUMR_L_ENT; - uccs->enabled_tx = 1; - } - if (mode & COMM_DIR_RX) { - gumr_l |= UCC_SLOW_GUMR_L_ENR; - uccs->enabled_rx = 1; - } - out_be32(&us_regs->gumr_l, gumr_l); -} -EXPORT_SYMBOL(ucc_slow_enable); - -void ucc_slow_disable(struct ucc_slow_private * uccs, enum comm_dir mode) -{ - struct ucc_slow *us_regs; - u32 gumr_l; - - us_regs = uccs->us_regs; - - /* Disable reception and/or transmission on this UCC. */ - gumr_l = in_be32(&us_regs->gumr_l); - if (mode & COMM_DIR_TX) { - gumr_l &= ~UCC_SLOW_GUMR_L_ENT; - uccs->enabled_tx = 0; - } - if (mode & COMM_DIR_RX) { - gumr_l &= ~UCC_SLOW_GUMR_L_ENR; - uccs->enabled_rx = 0; - } - out_be32(&us_regs->gumr_l, gumr_l); -} -EXPORT_SYMBOL(ucc_slow_disable); - -/* Initialize the UCC for Slow operations - * - * The caller should initialize the following us_info - */ -int ucc_slow_init(struct ucc_slow_info * us_info, struct ucc_slow_private ** uccs_ret) -{ - struct ucc_slow_private *uccs; - u32 i; - struct ucc_slow __iomem *us_regs; - u32 gumr; - struct qe_bd *bd; - u32 id; - u32 command; - int ret = 0; - - if (!us_info) - return -EINVAL; - - /* check if the UCC port number is in range. */ - if ((us_info->ucc_num < 0) || (us_info->ucc_num > UCC_MAX_NUM - 1)) { - printk(KERN_ERR "%s: illegal UCC number\n", __func__); - return -EINVAL; - } - - /* - * Set mrblr - * Check that 'max_rx_buf_length' is properly aligned (4), unless - * rfw is 1, meaning that QE accepts one byte at a time, unlike normal - * case when QE accepts 32 bits at a time. - */ - if ((!us_info->rfw) && - (us_info->max_rx_buf_length & (UCC_SLOW_MRBLR_ALIGNMENT - 1))) { - printk(KERN_ERR "max_rx_buf_length not aligned.\n"); - return -EINVAL; - } - - uccs = kzalloc(sizeof(struct ucc_slow_private), GFP_KERNEL); - if (!uccs) { - printk(KERN_ERR "%s: Cannot allocate private data\n", - __func__); - return -ENOMEM; - } - - /* Fill slow UCC structure */ - uccs->us_info = us_info; - /* Set the PHY base address */ - uccs->us_regs = ioremap(us_info->regs, sizeof(struct ucc_slow)); - if (uccs->us_regs == NULL) { - printk(KERN_ERR "%s: Cannot map UCC registers\n", __func__); - kfree(uccs); - return -ENOMEM; - } - - uccs->saved_uccm = 0; - uccs->p_rx_frame = 0; - us_regs = uccs->us_regs; - uccs->p_ucce = (u16 *) & (us_regs->ucce); - uccs->p_uccm = (u16 *) & (us_regs->uccm); -#ifdef STATISTICS - uccs->rx_frames = 0; - uccs->tx_frames = 0; - uccs->rx_discarded = 0; -#endif /* STATISTICS */ - - /* Get PRAM base */ - uccs->us_pram_offset = - qe_muram_alloc(UCC_SLOW_PRAM_SIZE, ALIGNMENT_OF_UCC_SLOW_PRAM); - if (IS_ERR_VALUE(uccs->us_pram_offset)) { - printk(KERN_ERR "%s: cannot allocate MURAM for PRAM", __func__); - ucc_slow_free(uccs); - return -ENOMEM; - } - id = ucc_slow_get_qe_cr_subblock(us_info->ucc_num); - qe_issue_cmd(QE_ASSIGN_PAGE_TO_DEVICE, id, us_info->protocol, - uccs->us_pram_offset); - - uccs->us_pram = qe_muram_addr(uccs->us_pram_offset); - - /* Set UCC to slow type */ - ret = ucc_set_type(us_info->ucc_num, UCC_SPEED_TYPE_SLOW); - if (ret) { - printk(KERN_ERR "%s: cannot set UCC type", __func__); - ucc_slow_free(uccs); - return ret; - } - - out_be16(&uccs->us_pram->mrblr, us_info->max_rx_buf_length); - - INIT_LIST_HEAD(&uccs->confQ); - - /* Allocate BDs. */ - uccs->rx_base_offset = - qe_muram_alloc(us_info->rx_bd_ring_len * sizeof(struct qe_bd), - QE_ALIGNMENT_OF_BD); - if (IS_ERR_VALUE(uccs->rx_base_offset)) { - printk(KERN_ERR "%s: cannot allocate %u RX BDs\n", __func__, - us_info->rx_bd_ring_len); - uccs->rx_base_offset = 0; - ucc_slow_free(uccs); - return -ENOMEM; - } - - uccs->tx_base_offset = - qe_muram_alloc(us_info->tx_bd_ring_len * sizeof(struct qe_bd), - QE_ALIGNMENT_OF_BD); - if (IS_ERR_VALUE(uccs->tx_base_offset)) { - printk(KERN_ERR "%s: cannot allocate TX BDs", __func__); - uccs->tx_base_offset = 0; - ucc_slow_free(uccs); - return -ENOMEM; - } - - /* Init Tx bds */ - bd = uccs->confBd = uccs->tx_bd = qe_muram_addr(uccs->tx_base_offset); - for (i = 0; i < us_info->tx_bd_ring_len - 1; i++) { - /* clear bd buffer */ - out_be32(&bd->buf, 0); - /* set bd status and length */ - out_be32((u32 *) bd, 0); - bd++; - } - /* for last BD set Wrap bit */ - out_be32(&bd->buf, 0); - out_be32((u32 *) bd, cpu_to_be32(T_W)); - - /* Init Rx bds */ - bd = uccs->rx_bd = qe_muram_addr(uccs->rx_base_offset); - for (i = 0; i < us_info->rx_bd_ring_len - 1; i++) { - /* set bd status and length */ - out_be32((u32*)bd, 0); - /* clear bd buffer */ - out_be32(&bd->buf, 0); - bd++; - } - /* for last BD set Wrap bit */ - out_be32((u32*)bd, cpu_to_be32(R_W)); - out_be32(&bd->buf, 0); - - /* Set GUMR (For more details see the hardware spec.). */ - /* gumr_h */ - gumr = us_info->tcrc; - if (us_info->cdp) - gumr |= UCC_SLOW_GUMR_H_CDP; - if (us_info->ctsp) - gumr |= UCC_SLOW_GUMR_H_CTSP; - if (us_info->cds) - gumr |= UCC_SLOW_GUMR_H_CDS; - if (us_info->ctss) - gumr |= UCC_SLOW_GUMR_H_CTSS; - if (us_info->tfl) - gumr |= UCC_SLOW_GUMR_H_TFL; - if (us_info->rfw) - gumr |= UCC_SLOW_GUMR_H_RFW; - if (us_info->txsy) - gumr |= UCC_SLOW_GUMR_H_TXSY; - if (us_info->rtsm) - gumr |= UCC_SLOW_GUMR_H_RTSM; - out_be32(&us_regs->gumr_h, gumr); - - /* gumr_l */ - gumr = us_info->tdcr | us_info->rdcr | us_info->tenc | us_info->renc | - us_info->diag | us_info->mode; - if (us_info->tci) - gumr |= UCC_SLOW_GUMR_L_TCI; - if (us_info->rinv) - gumr |= UCC_SLOW_GUMR_L_RINV; - if (us_info->tinv) - gumr |= UCC_SLOW_GUMR_L_TINV; - if (us_info->tend) - gumr |= UCC_SLOW_GUMR_L_TEND; - out_be32(&us_regs->gumr_l, gumr); - - /* Function code registers */ - - /* if the data is in cachable memory, the 'global' */ - /* in the function code should be set. */ - uccs->us_pram->tbmr = UCC_BMR_BO_BE; - uccs->us_pram->rbmr = UCC_BMR_BO_BE; - - /* rbase, tbase are offsets from MURAM base */ - out_be16(&uccs->us_pram->rbase, uccs->rx_base_offset); - out_be16(&uccs->us_pram->tbase, uccs->tx_base_offset); - - /* Mux clocking */ - /* Grant Support */ - ucc_set_qe_mux_grant(us_info->ucc_num, us_info->grant_support); - /* Breakpoint Support */ - ucc_set_qe_mux_bkpt(us_info->ucc_num, us_info->brkpt_support); - /* Set Tsa or NMSI mode. */ - ucc_set_qe_mux_tsa(us_info->ucc_num, us_info->tsa); - /* If NMSI (not Tsa), set Tx and Rx clock. */ - if (!us_info->tsa) { - /* Rx clock routing */ - if (ucc_set_qe_mux_rxtx(us_info->ucc_num, us_info->rx_clock, - COMM_DIR_RX)) { - printk(KERN_ERR "%s: illegal value for RX clock\n", - __func__); - ucc_slow_free(uccs); - return -EINVAL; - } - /* Tx clock routing */ - if (ucc_set_qe_mux_rxtx(us_info->ucc_num, us_info->tx_clock, - COMM_DIR_TX)) { - printk(KERN_ERR "%s: illegal value for TX clock\n", - __func__); - ucc_slow_free(uccs); - return -EINVAL; - } - } - - /* Set interrupt mask register at UCC level. */ - out_be16(&us_regs->uccm, us_info->uccm_mask); - - /* First, clear anything pending at UCC level, - * otherwise, old garbage may come through - * as soon as the dam is opened. */ - - /* Writing '1' clears */ - out_be16(&us_regs->ucce, 0xffff); - - /* Issue QE Init command */ - if (us_info->init_tx && us_info->init_rx) - command = QE_INIT_TX_RX; - else if (us_info->init_tx) - command = QE_INIT_TX; - else - command = QE_INIT_RX; /* We know at least one is TRUE */ - - qe_issue_cmd(command, id, us_info->protocol, 0); - - *uccs_ret = uccs; - return 0; -} -EXPORT_SYMBOL(ucc_slow_init); - -void ucc_slow_free(struct ucc_slow_private * uccs) -{ - if (!uccs) - return; - - if (uccs->rx_base_offset) - qe_muram_free(uccs->rx_base_offset); - - if (uccs->tx_base_offset) - qe_muram_free(uccs->tx_base_offset); - - if (uccs->us_pram) - qe_muram_free(uccs->us_pram_offset); - - if (uccs->us_regs) - iounmap(uccs->us_regs); - - kfree(uccs); -} -EXPORT_SYMBOL(ucc_slow_free); - diff --git a/arch/powerpc/sysdev/qe_lib/usb.c b/arch/powerpc/sysdev/qe_lib/usb.c deleted file mode 100644 index 27f23bd15eb6..000000000000 --- a/arch/powerpc/sysdev/qe_lib/usb.c +++ /dev/null @@ -1,56 +0,0 @@ -/* - * QE USB routines - * - * Copyright 2006 Freescale Semiconductor, Inc. - * Shlomi Gridish <gridish@freescale.com> - * Jerry Huang <Chang-Ming.Huang@freescale.com> - * Copyright (c) MontaVista Software, Inc. 2008. - * Anton Vorontsov <avorontsov@ru.mvista.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/export.h> -#include <linux/io.h> -#include <asm/immap_qe.h> -#include <asm/qe.h> - -int qe_usb_clock_set(enum qe_clock clk, int rate) -{ - struct qe_mux __iomem *mux = &qe_immr->qmx; - unsigned long flags; - u32 val; - - switch (clk) { - case QE_CLK3: val = QE_CMXGCR_USBCS_CLK3; break; - case QE_CLK5: val = QE_CMXGCR_USBCS_CLK5; break; - case QE_CLK7: val = QE_CMXGCR_USBCS_CLK7; break; - case QE_CLK9: val = QE_CMXGCR_USBCS_CLK9; break; - case QE_CLK13: val = QE_CMXGCR_USBCS_CLK13; break; - case QE_CLK17: val = QE_CMXGCR_USBCS_CLK17; break; - case QE_CLK19: val = QE_CMXGCR_USBCS_CLK19; break; - case QE_CLK21: val = QE_CMXGCR_USBCS_CLK21; break; - case QE_BRG9: val = QE_CMXGCR_USBCS_BRG9; break; - case QE_BRG10: val = QE_CMXGCR_USBCS_BRG10; break; - default: - pr_err("%s: requested unknown clock %d\n", __func__, clk); - return -EINVAL; - } - - if (qe_clock_is_brg(clk)) - qe_setbrg(clk, rate, 1); - - spin_lock_irqsave(&cmxgcr_lock, flags); - - clrsetbits_be32(&mux->cmxgcr, QE_CMXGCR_USBCS, val); - - spin_unlock_irqrestore(&cmxgcr_lock, flags); - - return 0; -} -EXPORT_SYMBOL(qe_usb_clock_set); diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 786bf01691c9..07a8508cb7fa 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -320,6 +320,7 @@ static inline void disable_surveillance(void) #ifdef CONFIG_PPC_PSERIES /* Since this can't be a module, args should end up below 4GB. */ static struct rtas_args args; + int token; /* * At this point we have got all the cpus we can into @@ -328,17 +329,12 @@ static inline void disable_surveillance(void) * If we did try to take rtas.lock there would be a * real possibility of deadlock. */ - args.token = rtas_token("set-indicator"); - if (args.token == RTAS_UNKNOWN_SERVICE) + token = rtas_token("set-indicator"); + if (token == RTAS_UNKNOWN_SERVICE) return; - args.token = cpu_to_be32(args.token); - args.nargs = cpu_to_be32(3); - args.nret = cpu_to_be32(1); - args.rets = &args.args[3]; - args.args[0] = cpu_to_be32(SURVEILLANCE_TOKEN); - args.args[1] = 0; - args.args[2] = 0; - enter_rtas(__pa(&args)); + + rtas_call_unlocked(&args, token, 3, 1, NULL, SURVEILLANCE_TOKEN, 0, 0); + #endif /* CONFIG_PPC_PSERIES */ } @@ -1522,6 +1518,8 @@ static void excprint(struct pt_regs *fp) if (trap == 0x700) print_bug_trap(fp); + + printf(linux_banner); } static void prregs(struct pt_regs *fp) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 3a55f493c7da..dbeeb3a049f2 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -10,9 +10,6 @@ config LOCKDEP_SUPPORT config STACKTRACE_SUPPORT def_bool y -config HAVE_LATENCYTOP_SUPPORT - def_bool y - config RWSEM_GENERIC_SPINLOCK bool @@ -66,6 +63,7 @@ config S390 def_bool y select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS + select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_SG_CHAIN @@ -166,8 +164,7 @@ config SCHED_OMIT_FRAME_POINTER config PGTABLE_LEVELS int - default 4 if 64BIT - default 2 + default 4 source "init/Kconfig" @@ -390,9 +387,6 @@ config HOTPLUG_CPU can be controlled through /sys/devices/system/cpu/cpu#. Say N if you want to disable CPU hotplug. -config SCHED_SMT - def_bool n - # Some NUMA nodes have memory ranges that span # other nodes. Even though a pfn is valid and # between a node's start and end pfns, it may not @@ -403,7 +397,7 @@ config NODES_SPAN_OTHER_NODES config NUMA bool "NUMA support" - depends on SMP && 64BIT && SCHED_TOPOLOGY + depends on SMP && SCHED_TOPOLOGY default n help Enable NUMA support @@ -463,6 +457,9 @@ config EMU_SIZE endmenu +config SCHED_SMT + def_bool n + config SCHED_MC def_bool n diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug index c56878e1245f..26c5d5beb4be 100644 --- a/arch/s390/Kconfig.debug +++ b/arch/s390/Kconfig.debug @@ -5,18 +5,6 @@ config TRACE_IRQFLAGS_SUPPORT source "lib/Kconfig.debug" -config STRICT_DEVMEM - def_bool y - prompt "Filter access to /dev/mem" - ---help--- - This option restricts access to /dev/mem. If this option is - disabled, you allow userspace access to all memory, including - kernel and userspace memory. Accidental memory access is likely - to be disastrous. - Memory access is required for experts who want to debug the kernel. - - If you are unsure, say Y. - config S390_PTDUMP bool "Export kernel pagetable layout to userspace via debugfs" depends on DEBUG_KERNEL diff --git a/arch/s390/Makefile b/arch/s390/Makefile index e8d4423e4f85..224b42734f0d 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -106,6 +106,7 @@ drivers-y += drivers/s390/ drivers-$(CONFIG_OPROFILE) += arch/s390/oprofile/ boot := arch/s390/boot +tools := arch/s390/tools all: image bzImage @@ -124,9 +125,17 @@ vdso_install: archclean: $(Q)$(MAKE) $(clean)=$(boot) + $(Q)$(MAKE) $(clean)=$(tools) + +archprepare: + $(Q)$(MAKE) $(build)=$(tools) include/generated/facilities.h # Don't use tabs in echo arguments define archhelp echo '* image - Kernel image for IPL ($(boot)/image)' echo '* bzImage - Compressed kernel image for IPL ($(boot)/bzImage)' + echo ' install - Install kernel using' + echo ' (your) ~/bin/$(INSTALLKERNEL) or' + echo ' (distribution) /sbin/$(INSTALLKERNEL) or' + echo ' install to $$(INSTALL_PATH)' endef diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig index ed7da281df66..0ac42cc4f880 100644 --- a/arch/s390/configs/default_defconfig +++ b/arch/s390/configs/default_defconfig @@ -10,28 +10,35 @@ CONFIG_TASKSTATS=y CONFIG_TASK_DELAY_ACCT=y CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y -CONFIG_RCU_FAST_NO_HZ=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_NUMA_BALANCING=y CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_PIDS=y CONFIG_CGROUP_DEVICE=y CONFIG_CPUSETS=y CONFIG_CGROUP_CPUACCT=y +CONFIG_MEMCG=y +CONFIG_MEMCG_SWAP=y +CONFIG_MEMCG_KMEM=y +CONFIG_CGROUP_HUGETLB=y CONFIG_CGROUP_PERF=y CONFIG_CFS_BANDWIDTH=y CONFIG_RT_GROUP_SCHED=y CONFIG_BLK_CGROUP=y CONFIG_NAMESPACES=y +CONFIG_USER_NS=y CONFIG_SCHED_AUTOGROUP=y CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y CONFIG_BPF_SYSCALL=y +CONFIG_USERFAULTFD=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_OPROFILE=m CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y +CONFIG_STATIC_KEYS_SELFTEST=y CONFIG_MODULES=y CONFIG_MODULE_FORCE_LOAD=y CONFIG_MODULE_UNLOAD=y @@ -64,7 +71,6 @@ CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_S390=y CONFIG_CHSC_SCH=y CONFIG_CRASH_DUMP=y -# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_BINFMT_MISC=m CONFIG_HIBERNATION=y CONFIG_NET=y @@ -106,7 +112,6 @@ CONFIG_TCP_CONG_LP=m CONFIG_TCP_CONG_VENO=m CONFIG_TCP_CONG_YEAH=m CONFIG_TCP_CONG_ILLINOIS=m -CONFIG_IPV6=y CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m @@ -457,19 +462,9 @@ CONFIG_INFINIBAND=m CONFIG_INFINIBAND_USER_ACCESS=m CONFIG_MLX4_INFINIBAND=m CONFIG_VIRTIO_BALLOON=m -# CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y -CONFIG_JBD_DEBUG=y CONFIG_JBD2_DEBUG=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y @@ -490,7 +485,7 @@ CONFIG_QUOTA_NETLINK_INTERFACE=y CONFIG_QFMT_V1=m CONFIG_QFMT_V2=m CONFIG_AUTOFS4_FS=m -CONFIG_FUSE_FS=m +CONFIG_FUSE_FS=y CONFIG_CUSE=m CONFIG_FSCACHE=m CONFIG_CACHEFILES=m @@ -542,10 +537,11 @@ CONFIG_DLM=m CONFIG_PRINTK_TIME=y CONFIG_DYNAMIC_DEBUG=y CONFIG_DEBUG_INFO=y -# CONFIG_ENABLE_MUST_CHECK is not set CONFIG_FRAME_WARN=1024 CONFIG_READABLE_ASM=y CONFIG_UNUSED_SYMBOLS=y +CONFIG_HEADERS_CHECK=y +CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_PAGEALLOC=y CONFIG_DEBUG_OBJECTS=y @@ -588,6 +584,7 @@ CONFIG_FAILSLAB=y CONFIG_FAIL_PAGE_ALLOC=y CONFIG_FAIL_MAKE_REQUEST=y CONFIG_FAIL_IO_TIMEOUT=y +CONFIG_FAIL_FUTEX=y CONFIG_FAULT_INJECTION_DEBUG_FS=y CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y CONFIG_LATENCYTOP=y diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig index 9858b14cde1e..a31dcd56f7c0 100644 --- a/arch/s390/configs/gcov_defconfig +++ b/arch/s390/configs/gcov_defconfig @@ -10,21 +10,27 @@ CONFIG_TASKSTATS=y CONFIG_TASK_DELAY_ACCT=y CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y -CONFIG_RCU_FAST_NO_HZ=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_NUMA_BALANCING=y CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_PIDS=y CONFIG_CGROUP_DEVICE=y CONFIG_CPUSETS=y CONFIG_CGROUP_CPUACCT=y +CONFIG_MEMCG=y +CONFIG_MEMCG_SWAP=y +CONFIG_MEMCG_KMEM=y +CONFIG_CGROUP_HUGETLB=y CONFIG_CGROUP_PERF=y CONFIG_BLK_CGROUP=y CONFIG_NAMESPACES=y +CONFIG_USER_NS=y CONFIG_SCHED_AUTOGROUP=y CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y CONFIG_BPF_SYSCALL=y +CONFIG_USERFAULTFD=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_OPROFILE=m @@ -61,7 +67,6 @@ CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_S390=y CONFIG_CHSC_SCH=y CONFIG_CRASH_DUMP=y -# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_BINFMT_MISC=m CONFIG_HIBERNATION=y CONFIG_NET=y @@ -103,7 +108,6 @@ CONFIG_TCP_CONG_LP=m CONFIG_TCP_CONG_VENO=m CONFIG_TCP_CONG_YEAH=m CONFIG_TCP_CONG_ILLINOIS=m -CONFIG_IPV6=y CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m @@ -453,19 +457,9 @@ CONFIG_INFINIBAND=m CONFIG_INFINIBAND_USER_ACCESS=m CONFIG_MLX4_INFINIBAND=m CONFIG_VIRTIO_BALLOON=m -# CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y -CONFIG_JBD_DEBUG=y CONFIG_JBD2_DEBUG=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y @@ -485,7 +479,7 @@ CONFIG_QUOTA_NETLINK_INTERFACE=y CONFIG_QFMT_V1=m CONFIG_QFMT_V2=m CONFIG_AUTOFS4_FS=m -CONFIG_FUSE_FS=m +CONFIG_FUSE_FS=y CONFIG_CUSE=m CONFIG_FSCACHE=m CONFIG_CACHEFILES=m @@ -550,6 +544,7 @@ CONFIG_NOTIFIER_ERROR_INJECTION=m CONFIG_CPU_NOTIFIER_ERROR_INJECT=m CONFIG_PM_NOTIFIER_ERROR_INJECT=m CONFIG_LATENCYTOP=y +CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y CONFIG_BLK_DEV_IO_TRACE=y # CONFIG_KPROBE_EVENT is not set CONFIG_LKDTM=m @@ -557,6 +552,7 @@ CONFIG_RBTREE_TEST=m CONFIG_INTERVAL_TREE_TEST=m CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y +CONFIG_TEST_BPF=m # CONFIG_STRICT_DEVMEM is not set CONFIG_S390_PTDUMP=y CONFIG_ENCRYPTED_KEYS=m diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig index 7f14f80717d4..7b73bf353345 100644 --- a/arch/s390/configs/performance_defconfig +++ b/arch/s390/configs/performance_defconfig @@ -10,22 +10,28 @@ CONFIG_TASKSTATS=y CONFIG_TASK_DELAY_ACCT=y CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y -CONFIG_RCU_FAST_NO_HZ=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_NUMA_BALANCING=y # CONFIG_NUMA_BALANCING_DEFAULT_ENABLED is not set CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_PIDS=y CONFIG_CGROUP_DEVICE=y CONFIG_CPUSETS=y CONFIG_CGROUP_CPUACCT=y +CONFIG_MEMCG=y +CONFIG_MEMCG_SWAP=y +CONFIG_MEMCG_KMEM=y +CONFIG_CGROUP_HUGETLB=y CONFIG_CGROUP_PERF=y CONFIG_BLK_CGROUP=y CONFIG_NAMESPACES=y +CONFIG_USER_NS=y CONFIG_SCHED_AUTOGROUP=y CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y CONFIG_BPF_SYSCALL=y +CONFIG_USERFAULTFD=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_OPROFILE=m @@ -61,7 +67,6 @@ CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_S390=y CONFIG_CHSC_SCH=y CONFIG_CRASH_DUMP=y -# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_BINFMT_MISC=m CONFIG_HIBERNATION=y CONFIG_NET=y @@ -103,7 +108,6 @@ CONFIG_TCP_CONG_LP=m CONFIG_TCP_CONG_VENO=m CONFIG_TCP_CONG_YEAH=m CONFIG_TCP_CONG_ILLINOIS=m -CONFIG_IPV6=y CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m @@ -453,19 +457,9 @@ CONFIG_INFINIBAND=m CONFIG_INFINIBAND_USER_ACCESS=m CONFIG_MLX4_INFINIBAND=m CONFIG_VIRTIO_BALLOON=m -# CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y -CONFIG_JBD_DEBUG=y CONFIG_JBD2_DEBUG=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y @@ -485,7 +479,7 @@ CONFIG_QUOTA_NETLINK_INTERFACE=y CONFIG_QFMT_V1=m CONFIG_QFMT_V2=m CONFIG_AUTOFS4_FS=m -CONFIG_FUSE_FS=m +CONFIG_FUSE_FS=y CONFIG_CUSE=m CONFIG_FSCACHE=m CONFIG_CACHEFILES=m @@ -546,6 +540,7 @@ CONFIG_TIMER_STATS=y CONFIG_RCU_TORTURE_TEST=m CONFIG_RCU_CPU_STALL_TIMEOUT=60 CONFIG_LATENCYTOP=y +CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_STACK_TRACER=y @@ -554,6 +549,7 @@ CONFIG_UPROBE_EVENT=y CONFIG_LKDTM=m CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y +CONFIG_TEST_BPF=m # CONFIG_STRICT_DEVMEM is not set CONFIG_S390_PTDUMP=y CONFIG_ENCRYPTED_KEYS=m diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 92805d604173..1719843a55a2 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -23,8 +23,6 @@ CONFIG_CRASH_DUMP=y # CONFIG_SECCOMP is not set CONFIG_NET=y # CONFIG_IUCV is not set -CONFIG_ATM=y -CONFIG_ATM_LANE=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y # CONFIG_FIRMWARE_IN_KERNEL is not set @@ -54,14 +52,10 @@ CONFIG_RAW_DRIVER=y # CONFIG_S390_VMUR is not set # CONFIG_HID is not set # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT4_FS=y -CONFIG_EXT4_FS_POSIX_ACL=y -CONFIG_EXT4_FS_SECURITY=y +# CONFIG_DNOTIFY is not set # CONFIG_INOTIFY_USER is not set CONFIG_CONFIGFS_FS=y +# CONFIG_MISC_FILESYSTEMS is not set CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO=y CONFIG_DEBUG_FS=y diff --git a/arch/s390/defconfig b/arch/s390/defconfig index 9256b48e7e43..e24f2af4c73b 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -11,22 +11,31 @@ CONFIG_TASK_IO_ACCOUNTING=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_CGROUPS=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_PIDS=y +CONFIG_CGROUP_DEVICE=y CONFIG_CPUSETS=y CONFIG_CGROUP_CPUACCT=y CONFIG_MEMCG=y CONFIG_MEMCG_SWAP=y +CONFIG_MEMCG_KMEM=y +CONFIG_CGROUP_HUGETLB=y +CONFIG_CGROUP_PERF=y CONFIG_CGROUP_SCHED=y CONFIG_RT_GROUP_SCHED=y CONFIG_BLK_CGROUP=y CONFIG_NAMESPACES=y +CONFIG_USER_NS=y CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y CONFIG_BPF_SYSCALL=y +CONFIG_USERFAULTFD=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_OPROFILE=y CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y +CONFIG_STATIC_KEYS_SELFTEST=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y @@ -37,6 +46,7 @@ CONFIG_DEFAULT_DEADLINE=y CONFIG_LIVEPATCH=y CONFIG_MARCH_Z196=y CONFIG_NR_CPUS=256 +CONFIG_NUMA=y CONFIG_HZ_100=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y @@ -52,7 +62,6 @@ CONFIG_NET_KEY=y CONFIG_INET=y CONFIG_IP_MULTICAST=y # CONFIG_INET_LRO is not set -CONFIG_IPV6=y CONFIG_L2TP=m CONFIG_L2TP_DEBUGFS=m CONFIG_VLAN_8021Q=y @@ -89,10 +98,26 @@ CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=y CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_LOGGING=y -CONFIG_SCSI_SCAN_ASYNC=y CONFIG_SCSI_FC_ATTRS=y CONFIG_ZFCP=y CONFIG_SCSI_VIRTIO=y +CONFIG_MD=y +CONFIG_MD_LINEAR=m +CONFIG_MD_RAID0=m +CONFIG_MD_MULTIPATH=m +CONFIG_BLK_DEV_DM=y +CONFIG_DM_CRYPT=m +CONFIG_DM_SNAPSHOT=m +CONFIG_DM_MIRROR=m +CONFIG_DM_LOG_USERSPACE=m +CONFIG_DM_RAID=m +CONFIG_DM_ZERO=m +CONFIG_DM_MULTIPATH=m +CONFIG_DM_MULTIPATH_QL=m +CONFIG_DM_MULTIPATH_ST=m +CONFIG_DM_UEVENT=y +CONFIG_DM_VERITY=m +CONFIG_DM_SWITCH=m CONFIG_NETDEVICES=y CONFIG_BONDING=m CONFIG_DUMMY=m @@ -137,7 +162,6 @@ CONFIG_DEBUG_PI_LIST=y CONFIG_DEBUG_SG=y CONFIG_DEBUG_NOTIFIERS=y CONFIG_RCU_CPU_STALL_TIMEOUT=60 -# CONFIG_RCU_CPU_STALL_INFO is not set CONFIG_RCU_TRACE=y CONFIG_LATENCYTOP=y CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h index d68e11e0df5e..7ffd0b19135c 100644 --- a/arch/s390/include/asm/barrier.h +++ b/arch/s390/include/asm/barrier.h @@ -36,7 +36,7 @@ #define smp_mb__before_atomic() smp_mb() #define smp_mb__after_atomic() smp_mb() -#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) +#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0) #define smp_store_release(p, v) \ do { \ diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h index d350ed9d0fbb..352f7bdaf11f 100644 --- a/arch/s390/include/asm/compat.h +++ b/arch/s390/include/asm/compat.h @@ -284,7 +284,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr) static inline int is_compat_task(void) { - return is_32bit_task(); + return test_thread_flag(TIF_31BIT); } static inline void __user *arch_compat_alloc_user_space(long len) diff --git a/arch/s390/include/asm/crw.h b/arch/s390/include/asm/crw.h index 7c31d3e25cd1..bcb9cd2a730a 100644 --- a/arch/s390/include/asm/crw.h +++ b/arch/s390/include/asm/crw.h @@ -52,18 +52,4 @@ void crw_wait_for_channel_report(void); #define CRW_ERC_PERRI 0x07 /* perm. error, facility init */ #define CRW_ERC_PMOD 0x08 /* installed parameters modified */ -static inline int stcrw(struct crw *pcrw) -{ - int ccode; - - asm volatile( - " stcrw 0(%2)\n" - " ipm %0\n" - " srl %0,28\n" - : "=d" (ccode), "=m" (*pcrw) - : "a" (pcrw) - : "cc" ); - return ccode; -} - #endif /* _ASM_S390_CRW_H */ diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index bab6739a1154..563ab9f44874 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -104,6 +104,9 @@ #define HWCAP_S390_TE 1024 #define HWCAP_S390_VXRS 2048 +/* Internal bits, not exposed via elf */ +#define HWCAP_INT_SIE 1UL + /* * These are used to set parameters in the core dumps. */ @@ -126,6 +129,7 @@ typedef s390_regs elf_gregset_t; typedef s390_fp_regs compat_elf_fpregset_t; typedef s390_compat_regs compat_elf_gregset_t; +#include <linux/compat.h> #include <linux/sched.h> /* for task_struct */ #include <asm/mmu_context.h> @@ -159,7 +163,7 @@ extern unsigned int vdso_enabled; the loader. We need to make sure that it is out of the way of the program that it will "exec", and that there is sufficient room for the brk. 64-bit tasks are aligned to 4GB. */ -#define ELF_ET_DYN_BASE (is_32bit_task() ? \ +#define ELF_ET_DYN_BASE (is_compat_task() ? \ (STACK_TOP / 3 * 2) : \ (STACK_TOP / 3 * 2) & ~((1UL << 32) - 1)) @@ -169,6 +173,10 @@ extern unsigned int vdso_enabled; extern unsigned long elf_hwcap; #define ELF_HWCAP (elf_hwcap) +/* Internal hardware capabilities, not exposed via elf */ + +extern unsigned long int_hwcap; + /* This yields a string that ld.so will use to load implementation specific libraries for optimization. This is more specific in intent than poking at uname or /proc/cpuinfo. @@ -212,9 +220,9 @@ do { \ * of up to 1GB. For 31-bit processes the virtual address space is limited, * use no alignment and limit the randomization to 8MB. */ -#define BRK_RND_MASK (is_32bit_task() ? 0x7ffUL : 0x3ffffUL) -#define MMAP_RND_MASK (is_32bit_task() ? 0x7ffUL : 0x3ff80UL) -#define MMAP_ALIGN_MASK (is_32bit_task() ? 0 : 0x7fUL) +#define BRK_RND_MASK (is_compat_task() ? 0x7ffUL : 0x3ffffUL) +#define MMAP_RND_MASK (is_compat_task() ? 0x7ffUL : 0x3ff80UL) +#define MMAP_ALIGN_MASK (is_compat_task() ? 0 : 0x7fUL) #define STACK_RND_MASK MMAP_RND_MASK #define ARCH_DLINFO \ @@ -229,6 +237,4 @@ struct linux_binprm; #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 int arch_setup_additional_pages(struct linux_binprm *, int); -void *fill_cpu_elf_notes(void *ptr, struct save_area *sa, __vector128 *vxrs); - #endif diff --git a/arch/s390/include/asm/facilities_src.h b/arch/s390/include/asm/facilities_src.h new file mode 100644 index 000000000000..4917728e5828 --- /dev/null +++ b/arch/s390/include/asm/facilities_src.h @@ -0,0 +1,58 @@ +/* + * Copyright IBM Corp. 2015 + */ + +#ifndef S390_GEN_FACILITIES_C +#error "This file can only be included by gen_facilities.c" +#endif + +#include <linux/kconfig.h> + +struct facility_def { + char *name; + int *bits; +}; + +static struct facility_def facility_defs[] = { + { + /* + * FACILITIES_ALS contains the list of facilities that are + * required to run a kernel that is compiled e.g. with + * -march=<machine>. + */ + .name = "FACILITIES_ALS", + .bits = (int[]){ +#ifdef CONFIG_HAVE_MARCH_Z900_FEATURES + 0, /* N3 instructions */ + 1, /* z/Arch mode installed */ +#endif +#ifdef CONFIG_HAVE_MARCH_Z990_FEATURES + 18, /* long displacement facility */ +#endif +#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES + 7, /* stfle */ + 17, /* message security assist */ + 21, /* extended-immediate facility */ + 25, /* store clock fast */ +#endif +#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES + 27, /* mvcos */ + 32, /* compare and swap and store */ + 33, /* compare and swap and store 2 */ + 34, /* general extension facility */ + 35, /* execute extensions */ +#endif +#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES + 45, /* fast-BCR, etc. */ +#endif +#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES + 49, /* misc-instruction-extensions */ + 52, /* interlocked facility 2 */ +#endif +#ifdef CONFIG_HAVE_MARCH_Z13_FEATURES + 53, /* load-and-zero-rightmost-byte, etc. */ +#endif + -1 /* END */ + } + }, +}; diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h index 0aa6a7ed95a3..09b406db7529 100644 --- a/arch/s390/include/asm/facility.h +++ b/arch/s390/include/asm/facility.h @@ -7,6 +7,10 @@ #ifndef __ASM_FACILITY_H #define __ASM_FACILITY_H +#include <generated/facilities.h> + +#ifndef __ASSEMBLY__ + #include <linux/string.h> #include <linux/preempt.h> #include <asm/lowcore.h> @@ -30,6 +34,12 @@ static inline int __test_facility(unsigned long nr, void *facilities) */ static inline int test_facility(unsigned long nr) { + unsigned long facilities_als[] = { FACILITIES_ALS }; + + if (__builtin_constant_p(nr) && nr < sizeof(facilities_als) * 8) { + if (__test_facility(nr, &facilities_als)) + return 1; + } return __test_facility(nr, &S390_lowcore.stfle_fac_list); } @@ -44,10 +54,8 @@ static inline void stfle(u64 *stfle_fac_list, int size) preempt_disable(); asm volatile( - " .insn s,0xb2b10000,0(0)\n" /* stfl */ - "0:\n" - EX_TABLE(0b, 0b) - : "+m" (S390_lowcore.stfl_fac_list)); + " stfl 0(0)\n" + : "=m" (S390_lowcore.stfl_fac_list)); nr = 4; /* bytes stored by stfl */ memcpy(stfle_fac_list, &S390_lowcore.stfl_fac_list, 4); if (S390_lowcore.stfl_fac_list & 0x01000000) { @@ -64,4 +72,5 @@ static inline void stfle(u64 *stfle_fac_list, int size) preempt_enable(); } +#endif /* __ASSEMBLY__ */ #endif /* __ASM_FACILITY_H */ diff --git a/arch/s390/include/asm/fpu/internal.h b/arch/s390/include/asm/fpu/internal.h index 2559b16da525..ea91ddfe54eb 100644 --- a/arch/s390/include/asm/fpu/internal.h +++ b/arch/s390/include/asm/fpu/internal.h @@ -12,21 +12,13 @@ #include <asm/ctl_reg.h> #include <asm/fpu/types.h> -static inline void save_vx_regs_safe(__vector128 *vxrs) +static inline void save_vx_regs(__vector128 *vxrs) { - unsigned long cr0, flags; - - flags = arch_local_irq_save(); - __ctl_store(cr0, 0, 0); - __ctl_set_bit(0, 17); - __ctl_set_bit(0, 18); asm volatile( " la 1,%0\n" " .word 0xe70f,0x1000,0x003e\n" /* vstm 0,15,0(1) */ " .word 0xe70f,0x1100,0x0c3e\n" /* vstm 16,31,256(1) */ : "=Q" (*(struct vx_array *) vxrs) : : "1"); - __ctl_load(cr0, 0, 0); - arch_local_irq_restore(flags); } static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs) diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h index 86634e71b69f..6fc44dca193e 100644 --- a/arch/s390/include/asm/ipl.h +++ b/arch/s390/include/asm/ipl.h @@ -87,14 +87,12 @@ struct ipl_parameter_block { * IPL validity flags */ extern u32 ipl_flags; -extern u32 dump_prefix_page; -struct dump_save_areas { - struct save_area_ext **areas; - int count; -}; - -extern struct dump_save_areas dump_save_areas; +struct save_area; +struct save_area * __init save_area_alloc(bool is_boot_cpu); +struct save_area * __init save_area_boot_cpu(void); +void __init save_area_add_regs(struct save_area *, void *regs); +void __init save_area_add_vxrs(struct save_area *, __vector128 *vxrs); extern void do_reipl(void); extern void do_halt(void); @@ -176,7 +174,7 @@ enum diag308_rc { extern int diag308(unsigned long subcode, void *addr); extern void diag308_reset(void); -extern void store_status(void); +extern void store_status(void (*fn)(void *), void *data); extern void lgr_info_log(void); #endif /* _ASM_S390_IPL_H */ diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index efaac2c3bb77..6742414dbd6f 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -25,7 +25,9 @@ #include <asm/fpu/api.h> #include <asm/isc.h> -#define KVM_MAX_VCPUS 64 +#define KVM_S390_BSCA_CPU_SLOTS 64 +#define KVM_S390_ESCA_CPU_SLOTS 248 +#define KVM_MAX_VCPUS KVM_S390_ESCA_CPU_SLOTS #define KVM_USER_MEM_SLOTS 32 /* @@ -37,12 +39,41 @@ #define KVM_IRQCHIP_NUM_PINS 4096 #define KVM_HALT_POLL_NS_DEFAULT 0 +/* s390-specific vcpu->requests bit members */ +#define KVM_REQ_ENABLE_IBS 8 +#define KVM_REQ_DISABLE_IBS 9 + #define SIGP_CTRL_C 0x80 #define SIGP_CTRL_SCN_MASK 0x3f -struct sca_entry { +union bsca_sigp_ctrl { + __u8 value; + struct { + __u8 c : 1; + __u8 r : 1; + __u8 scn : 6; + }; +} __packed; + +union esca_sigp_ctrl { + __u16 value; + struct { + __u8 c : 1; + __u8 reserved: 7; + __u8 scn; + }; +} __packed; + +struct esca_entry { + union esca_sigp_ctrl sigp_ctrl; + __u16 reserved1[3]; + __u64 sda; + __u64 reserved2[6]; +} __packed; + +struct bsca_entry { __u8 reserved0; - __u8 sigp_ctrl; + union bsca_sigp_ctrl sigp_ctrl; __u16 reserved[3]; __u64 sda; __u64 reserved2[2]; @@ -57,14 +88,22 @@ union ipte_control { }; }; -struct sca_block { +struct bsca_block { union ipte_control ipte_control; __u64 reserved[5]; __u64 mcn; __u64 reserved2; - struct sca_entry cpu[64]; + struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS]; } __attribute__((packed)); +struct esca_block { + union ipte_control ipte_control; + __u64 reserved1[7]; + __u64 mcn[4]; + __u64 reserved2[20]; + struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS]; +} __packed; + #define CPUSTAT_STOPPED 0x80000000 #define CPUSTAT_WAIT 0x10000000 #define CPUSTAT_ECALL_PEND 0x08000000 @@ -182,7 +221,8 @@ struct kvm_s390_sie_block { __u64 pp; /* 0x01de */ __u8 reserved1e6[2]; /* 0x01e6 */ __u64 itdba; /* 0x01e8 */ - __u8 reserved1f0[16]; /* 0x01f0 */ + __u64 riccbd; /* 0x01f0 */ + __u8 reserved1f8[8]; /* 0x01f8 */ } __attribute__((packed)); struct kvm_s390_itdb { @@ -585,11 +625,14 @@ struct kvm_s390_crypto_cb { }; struct kvm_arch{ - struct sca_block *sca; + void *sca; + int use_esca; + rwlock_t sca_lock; debug_info_t *dbf; struct kvm_s390_float_interrupt float_int; struct kvm_device *flic; struct gmap *gmap; + unsigned long mem_limit; int css_support; int use_irqchip; int use_cmma; diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index afe1cfebf1a4..d79ba7cf75b0 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -16,28 +16,7 @@ #define LC_ORDER 1 #define LC_PAGES 2 -struct save_area { - u64 fp_regs[16]; - u64 gp_regs[16]; - u8 psw[16]; - u8 pad1[8]; - u32 pref_reg; - u32 fp_ctrl_reg; - u8 pad2[4]; - u32 tod_reg; - u64 timer; - u64 clk_cmp; - u8 pad3[8]; - u32 acc_regs[16]; - u64 ctrl_regs[16]; -} __packed; - -struct save_area_ext { - struct save_area sa; - __vector128 vx_regs[32]; -}; - -struct _lowcore { +struct lowcore { __u8 pad_0x0000[0x0014-0x0000]; /* 0x0000 */ __u32 ipl_parmblock_ptr; /* 0x0014 */ __u8 pad_0x0018[0x0080-0x0018]; /* 0x0018 */ @@ -204,9 +183,9 @@ struct _lowcore { __u8 vector_save_area[1024]; /* 0x1c00 */ } __packed; -#define S390_lowcore (*((struct _lowcore *) 0)) +#define S390_lowcore (*((struct lowcore *) 0)) -extern struct _lowcore *lowcore_ptr[]; +extern struct lowcore *lowcore_ptr[]; static inline void set_prefix(__u32 address) { diff --git a/arch/s390/include/asm/os_info.h b/arch/s390/include/asm/os_info.h index 295f2c4f1c96..943475382d51 100644 --- a/arch/s390/include/asm/os_info.h +++ b/arch/s390/include/asm/os_info.h @@ -38,7 +38,7 @@ u32 os_info_csum(struct os_info *os_info); #ifdef CONFIG_CRASH_DUMP void *os_info_old_entry(int nr, unsigned long *size); -int copy_from_oldmem(void *dest, void *src, size_t count); +int copy_oldmem_kernel(void *dst, void *src, size_t count); #else static inline void *os_info_old_entry(int nr, unsigned long *size) { diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h index 1aac41e83ea1..92df3eb8d14e 100644 --- a/arch/s390/include/asm/pci_dma.h +++ b/arch/s390/include/asm/pci_dma.h @@ -23,6 +23,8 @@ enum zpci_ioat_dtype { #define ZPCI_IOTA_FS_2G 2 #define ZPCI_KEY (PAGE_DEFAULT_KEY << 5) +#define ZPCI_TABLE_SIZE_RT (1UL << 42) + #define ZPCI_IOTA_STO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_ST) #define ZPCI_IOTA_RTTO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RT) #define ZPCI_IOTA_RSTO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RS) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 024f85f947ae..64ead8091248 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -286,7 +286,6 @@ static inline int is_module_addr(void *addr) #define _SEGMENT_ENTRY_DIRTY 0x2000 /* SW segment dirty bit */ #define _SEGMENT_ENTRY_YOUNG 0x1000 /* SW segment young bit */ -#define _SEGMENT_ENTRY_SPLIT 0x0800 /* THP splitting bit */ #define _SEGMENT_ENTRY_LARGE 0x0400 /* STE-format control, large page */ #define _SEGMENT_ENTRY_READ 0x0002 /* SW segment read bit */ #define _SEGMENT_ENTRY_WRITE 0x0001 /* SW segment write bit */ @@ -318,8 +317,6 @@ static inline int is_module_addr(void *addr) * SW-bits: y young, d dirty, r read, w write */ -#define _SEGMENT_ENTRY_SPLIT_BIT 11 /* THP splitting bit number */ - /* Page status table bits for virtualization */ #define PGSTE_ACC_BITS 0xf000000000000000UL #define PGSTE_FP_BIT 0x0800000000000000UL @@ -523,10 +520,6 @@ static inline int pmd_bad(pmd_t pmd) return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0; } -#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH -extern void pmdp_splitting_flush(struct vm_area_struct *vma, - unsigned long addr, pmd_t *pmdp); - #define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS extern int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, @@ -1424,8 +1417,7 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) if (pmd_large(pmd)) { pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN_LARGE | _SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_YOUNG | - _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_SPLIT | - _SEGMENT_ENTRY_SOFT_DIRTY; + _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_SOFT_DIRTY; pmd_val(pmd) |= massage_pgprot_pmd(newprot); if (!(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY)) pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; @@ -1533,12 +1525,6 @@ extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, #define __HAVE_ARCH_PGTABLE_WITHDRAW extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); -static inline int pmd_trans_splitting(pmd_t pmd) -{ - return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) && - (pmd_val(pmd) & _SEGMENT_ENTRY_SPLIT); -} - static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t entry) { diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index b16c3d0a1b9f..f16debf6a612 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -18,12 +18,14 @@ #define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */ #define CIF_FPU 3 /* restore FPU registers */ #define CIF_IGNORE_IRQ 4 /* ignore interrupt (for udelay) */ +#define CIF_ENABLED_WAIT 5 /* in enabled wait state */ #define _CIF_MCCK_PENDING _BITUL(CIF_MCCK_PENDING) #define _CIF_ASCE _BITUL(CIF_ASCE) #define _CIF_NOHZ_DELAY _BITUL(CIF_NOHZ_DELAY) #define _CIF_FPU _BITUL(CIF_FPU) #define _CIF_IGNORE_IRQ _BITUL(CIF_IGNORE_IRQ) +#define _CIF_ENABLED_WAIT _BITUL(CIF_ENABLED_WAIT) #ifndef __ASSEMBLY__ @@ -52,6 +54,16 @@ static inline int test_cpu_flag(int flag) return !!(S390_lowcore.cpu_flags & (1UL << flag)); } +/* + * Test CIF flag of another CPU. The caller needs to ensure that + * CPU hotplug can not happen, e.g. by disabling preemption. + */ +static inline int test_cpu_flag_of(int flag, int cpu) +{ + struct lowcore *lc = lowcore_ptr[cpu]; + return !!(lc->cpu_flags & (1UL << flag)); +} + #define arch_needs_cpu() test_cpu_flag(CIF_NOHZ_DELAY) /* diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 37cbc50947f2..f00cd35c8ac4 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -24,25 +24,25 @@ PSW_MASK_PSTATE | PSW_ASC_PRIMARY) struct psw_bits { - unsigned long long : 1; - unsigned long long r : 1; /* PER-Mask */ - unsigned long long : 3; - unsigned long long t : 1; /* DAT Mode */ - unsigned long long i : 1; /* Input/Output Mask */ - unsigned long long e : 1; /* External Mask */ - unsigned long long key : 4; /* PSW Key */ - unsigned long long : 1; - unsigned long long m : 1; /* Machine-Check Mask */ - unsigned long long w : 1; /* Wait State */ - unsigned long long p : 1; /* Problem State */ - unsigned long long as : 2; /* Address Space Control */ - unsigned long long cc : 2; /* Condition Code */ - unsigned long long pm : 4; /* Program Mask */ - unsigned long long ri : 1; /* Runtime Instrumentation */ - unsigned long long : 6; - unsigned long long eaba : 2; /* Addressing Mode */ - unsigned long long : 31; - unsigned long long ia : 64;/* Instruction Address */ + unsigned long : 1; + unsigned long r : 1; /* PER-Mask */ + unsigned long : 3; + unsigned long t : 1; /* DAT Mode */ + unsigned long i : 1; /* Input/Output Mask */ + unsigned long e : 1; /* External Mask */ + unsigned long key : 4; /* PSW Key */ + unsigned long : 1; + unsigned long m : 1; /* Machine-Check Mask */ + unsigned long w : 1; /* Wait State */ + unsigned long p : 1; /* Problem State */ + unsigned long as : 2; /* Address Space Control */ + unsigned long cc : 2; /* Condition Code */ + unsigned long pm : 4; /* Program Mask */ + unsigned long ri : 1; /* Runtime Instrumentation */ + unsigned long : 6; + unsigned long eaba : 2; /* Addressing Mode */ + unsigned long : 31; + unsigned long ia : 64; /* Instruction Address */ }; enum { diff --git a/arch/s390/include/asm/reset.h b/arch/s390/include/asm/reset.h index 72786067b300..fe11fa88a0e0 100644 --- a/arch/s390/include/asm/reset.h +++ b/arch/s390/include/asm/reset.h @@ -15,6 +15,5 @@ struct reset_call { extern void register_reset_call(struct reset_call *reset); extern void unregister_reset_call(struct reset_call *reset); -extern void s390_reset_system(void (*fn_pre)(void), - void (*fn_post)(void *), void *data); +extern void s390_reset_system(void); #endif /* _ASM_S390_RESET_H */ diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index 821dde5f425d..bab456be9a4f 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -29,7 +29,10 @@ struct sclp_ipl_info { struct sclp_core_entry { u8 core_id; - u8 reserved0[2]; + u8 reserved0; + u8 : 4; + u8 sief2 : 1; + u8 : 3; u8 : 3; u8 siif : 1; u8 sigpif : 1; @@ -53,16 +56,19 @@ struct sclp_info { unsigned char has_sigpif : 1; unsigned char has_core_type : 1; unsigned char has_sprp : 1; + unsigned char has_hvs : 1; + unsigned char has_esca : 1; + unsigned char has_sief2 : 1; unsigned int ibc; unsigned int mtid; unsigned int mtid_cp; unsigned int mtid_prev; - unsigned long long rzm; - unsigned long long rnmax; - unsigned long long hamax; + unsigned long rzm; + unsigned long rnmax; + unsigned long hamax; unsigned int max_cores; unsigned long hsa_size; - unsigned long long facilities; + unsigned long facilities; }; extern struct sclp_info sclp; @@ -77,8 +83,9 @@ int sclp_chp_read_info(struct sclp_chp_info *info); void sclp_get_ipl_info(struct sclp_ipl_info *info); int sclp_pci_configure(u32 fid); int sclp_pci_deconfigure(u32 fid); -int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode); +int memcpy_hsa_kernel(void *dest, unsigned long src, size_t count); +int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count); void sclp_early_detect(void); -int _sclp_print_early(const char *); +void _sclp_print_early(const char *); #endif /* _ASM_S390_SCLP_H */ diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 23537661da0e..69837225119e 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -12,27 +12,24 @@ #define PARMAREA 0x10400 /* - * Machine features detected in head.S + * Machine features detected in early.c */ #define MACHINE_FLAG_VM _BITUL(0) -#define MACHINE_FLAG_IEEE _BITUL(1) -#define MACHINE_FLAG_CSP _BITUL(2) -#define MACHINE_FLAG_MVPG _BITUL(3) -#define MACHINE_FLAG_DIAG44 _BITUL(4) +#define MACHINE_FLAG_KVM _BITUL(1) +#define MACHINE_FLAG_LPAR _BITUL(2) +#define MACHINE_FLAG_DIAG9C _BITUL(3) +#define MACHINE_FLAG_ESOP _BITUL(4) #define MACHINE_FLAG_IDTE _BITUL(5) -#define MACHINE_FLAG_DIAG9C _BITUL(6) -#define MACHINE_FLAG_KVM _BITUL(8) -#define MACHINE_FLAG_ESOP _BITUL(9) -#define MACHINE_FLAG_EDAT1 _BITUL(10) -#define MACHINE_FLAG_EDAT2 _BITUL(11) -#define MACHINE_FLAG_LPAR _BITUL(12) -#define MACHINE_FLAG_LPP _BITUL(13) -#define MACHINE_FLAG_TOPOLOGY _BITUL(14) -#define MACHINE_FLAG_TE _BITUL(15) -#define MACHINE_FLAG_TLB_LC _BITUL(17) -#define MACHINE_FLAG_VX _BITUL(18) -#define MACHINE_FLAG_CAD _BITUL(19) +#define MACHINE_FLAG_DIAG44 _BITUL(6) +#define MACHINE_FLAG_EDAT1 _BITUL(7) +#define MACHINE_FLAG_EDAT2 _BITUL(8) +#define MACHINE_FLAG_LPP _BITUL(9) +#define MACHINE_FLAG_TOPOLOGY _BITUL(10) +#define MACHINE_FLAG_TE _BITUL(11) +#define MACHINE_FLAG_TLB_LC _BITUL(12) +#define MACHINE_FLAG_VX _BITUL(13) +#define MACHINE_FLAG_CAD _BITUL(14) #define LPP_MAGIC _BITUL(31) #define LPP_PFAULT_PID_MASK _AC(0xffffffff, UL) diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index 5df26b11cf47..0cc383b9be7f 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -18,6 +18,7 @@ extern struct mutex smp_cpu_state_mutex; extern unsigned int smp_cpu_mt_shift; extern unsigned int smp_cpu_mtid; +extern __vector128 __initdata boot_cpu_vector_save_area[__NUM_VXRS]; extern int __cpu_up(unsigned int cpu, struct task_struct *tidle); @@ -55,7 +56,6 @@ static inline int smp_store_status(int cpu) { return 0; } static inline int smp_vcpu_scheduled(int cpu) { return 1; } static inline void smp_yield_cpu(int cpu) { } static inline void smp_fill_possible_mask(void) { } -static inline void smp_save_dump_cpus(void) { } #endif /* CONFIG_SMP */ diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h index f7054a892d9e..2728114d5484 100644 --- a/arch/s390/include/asm/sysinfo.h +++ b/arch/s390/include/asm/sysinfo.h @@ -56,7 +56,12 @@ struct sysinfo_1_2_2 { char format; char reserved_0[1]; unsigned short acc_offset; - char reserved_1[20]; + unsigned char mt_installed :1; + unsigned char :2; + unsigned char mt_stid :5; + unsigned char :3; + unsigned char mt_gtid :5; + char reserved_1[18]; unsigned int nominal_cap; unsigned int secondary_cap; unsigned int capability; @@ -92,9 +97,13 @@ struct sysinfo_2_2_2 { char name[8]; unsigned int caf; char reserved_2[8]; - unsigned char mt_installed; - unsigned char mt_general; - unsigned char mt_psmtid; + unsigned char mt_installed :1; + unsigned char :2; + unsigned char mt_stid :5; + unsigned char :3; + unsigned char mt_gtid :5; + unsigned char :3; + unsigned char mt_psmtid :5; char reserved_3[5]; unsigned short cpus_dedicated; unsigned short cpus_shared; diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 692b9247c019..2fffc2c27581 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -96,6 +96,4 @@ void arch_release_task_struct(struct task_struct *tsk); #define _TIF_31BIT _BITUL(TIF_31BIT) #define _TIF_SINGLE_STEP _BITUL(TIF_SINGLE_STEP) -#define is_32bit_task() (test_thread_flag(TIF_31BIT)) - #endif /* _ASM_THREAD_INFO_H */ diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index 94fc55fc72ce..6b53962e807e 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -7,7 +7,7 @@ struct sysinfo_15_1_x; struct cpu; -#ifdef CONFIG_SCHED_BOOK +#ifdef CONFIG_SCHED_TOPOLOGY struct cpu_topology_s390 { unsigned short thread_id; @@ -40,13 +40,13 @@ void store_topology(struct sysinfo_15_1_x *info); void topology_expect_change(void); const struct cpumask *cpu_coregroup_mask(int cpu); -#else /* CONFIG_SCHED_BOOK */ +#else /* CONFIG_SCHED_TOPOLOGY */ static inline void topology_schedule_update(void) { } static inline int topology_cpu_init(struct cpu *cpu) { return 0; } static inline void topology_expect_change(void) { } -#endif /* CONFIG_SCHED_BOOK */ +#endif /* CONFIG_SCHED_TOPOLOGY */ #define POLARIZATION_UNKNOWN (-1) #define POLARIZATION_HRZ (0) diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index 787acd4f9668..d0a2dbf2433d 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -38,12 +38,14 @@ struct vdso_data { struct vdso_per_cpu_data { __u64 ectg_timer_base; __u64 ectg_user_time; + __u32 cpu_nr; + __u32 node_id; }; extern struct vdso_data *vdso_data; -int vdso_alloc_per_cpu(struct _lowcore *lowcore); -void vdso_free_per_cpu(struct _lowcore *lowcore); +int vdso_alloc_per_cpu(struct lowcore *lowcore); +void vdso_free_per_cpu(struct lowcore *lowcore); #endif /* __ASSEMBLY__ */ #endif /* __S390_VDSO_H__ */ diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index ef1a5fcc6c66..fe84bd5fe7ce 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -66,6 +66,8 @@ struct kvm_s390_io_adapter_req { #define KVM_S390_VM_MEM_CLR_CMMA 1 #define KVM_S390_VM_MEM_LIMIT_SIZE 2 +#define KVM_S390_NO_MEM_LIMIT U64_MAX + /* kvm attributes for KVM_S390_VM_TOD */ #define KVM_S390_VM_TOD_LOW 0 #define KVM_S390_VM_TOD_HIGH 1 @@ -151,6 +153,7 @@ struct kvm_guest_debug_arch { #define KVM_SYNC_ARCH0 (1UL << 4) #define KVM_SYNC_PFAULT (1UL << 5) #define KVM_SYNC_VRS (1UL << 6) +#define KVM_SYNC_RICCB (1UL << 7) /* definition of registers in kvm_run */ struct kvm_sync_regs { __u64 prefix; /* prefix register */ @@ -168,6 +171,8 @@ struct kvm_sync_regs { __u64 vrs[32][2]; /* vector registers */ __u8 reserved[512]; /* for future vector expansion */ __u32 fpc; /* only valid with vector registers */ + __u8 padding[52]; /* riccb needs to be 64byte aligned */ + __u8 riccb[64]; /* runtime instrumentation controls block */ }; #define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1) diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h index 296942d56e6a..d02e89d14fef 100644 --- a/arch/s390/include/uapi/asm/socket.h +++ b/arch/s390/include/uapi/asm/socket.h @@ -91,4 +91,7 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index dc167a23b920..2f5586ab8a6a 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -34,8 +34,10 @@ CFLAGS_sysinfo.o += -w # CFLAGS_REMOVE_sclp.o = $(CC_FLAGS_FTRACE) ifneq ($(CC_FLAGS_MARCH),-march=z900) -CFLAGS_REMOVE_sclp.o += $(CC_FLAGS_MARCH) -CFLAGS_sclp.o += -march=z900 +CFLAGS_REMOVE_sclp.o += $(CC_FLAGS_MARCH) +CFLAGS_sclp.o += -march=z900 +AFLAGS_REMOVE_head.o += $(CC_FLAGS_MARCH) +AFLAGS_head.o += -march=z900 endif GCOV_PROFILE_sclp.o := n @@ -50,7 +52,7 @@ extra-y += head.o head64.o vmlinux.lds obj-$(CONFIG_MODULES) += s390_ksyms.o module.o obj-$(CONFIG_SMP) += smp.o -obj-$(CONFIG_SCHED_BOOK) += topology.o +obj-$(CONFIG_SCHED_TOPOLOGY) += topology.o obj-$(CONFIG_HIBERNATION) += suspend.o swsusp.o obj-$(CONFIG_AUDIT) += audit.o compat-obj-$(CONFIG_AUDIT) += compat_audit.o diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 9cd248f637c7..53bbc9e8b281 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -80,6 +80,8 @@ int main(void) OFFSET(__VDSO_TK_SHIFT, vdso_data, tk_shift); OFFSET(__VDSO_ECTG_BASE, vdso_per_cpu_data, ectg_timer_base); OFFSET(__VDSO_ECTG_USER, vdso_per_cpu_data, ectg_user_time); + OFFSET(__VDSO_CPU_NR, vdso_per_cpu_data, cpu_nr); + OFFSET(__VDSO_NODE_ID, vdso_per_cpu_data, node_id); BLANK(); /* constants used by the vdso */ DEFINE(__CLOCK_REALTIME, CLOCK_REALTIME); @@ -97,95 +99,96 @@ int main(void) OFFSET(__TIMER_IDLE_EXIT, s390_idle_data, timer_idle_exit); BLANK(); /* hardware defined lowcore locations 0x000 - 0x1ff */ - OFFSET(__LC_EXT_PARAMS, _lowcore, ext_params); - OFFSET(__LC_EXT_CPU_ADDR, _lowcore, ext_cpu_addr); - OFFSET(__LC_EXT_INT_CODE, _lowcore, ext_int_code); - OFFSET(__LC_SVC_ILC, _lowcore, svc_ilc); - OFFSET(__LC_SVC_INT_CODE, _lowcore, svc_code); - OFFSET(__LC_PGM_ILC, _lowcore, pgm_ilc); - OFFSET(__LC_PGM_INT_CODE, _lowcore, pgm_code); - OFFSET(__LC_DATA_EXC_CODE, _lowcore, data_exc_code); - OFFSET(__LC_MON_CLASS_NR, _lowcore, mon_class_num); - OFFSET(__LC_PER_CODE, _lowcore, per_code); - OFFSET(__LC_PER_ATMID, _lowcore, per_atmid); - OFFSET(__LC_PER_ADDRESS, _lowcore, per_address); - OFFSET(__LC_EXC_ACCESS_ID, _lowcore, exc_access_id); - OFFSET(__LC_PER_ACCESS_ID, _lowcore, per_access_id); - OFFSET(__LC_OP_ACCESS_ID, _lowcore, op_access_id); - OFFSET(__LC_AR_MODE_ID, _lowcore, ar_mode_id); - OFFSET(__LC_TRANS_EXC_CODE, _lowcore, trans_exc_code); - OFFSET(__LC_MON_CODE, _lowcore, monitor_code); - OFFSET(__LC_SUBCHANNEL_ID, _lowcore, subchannel_id); - OFFSET(__LC_SUBCHANNEL_NR, _lowcore, subchannel_nr); - OFFSET(__LC_IO_INT_PARM, _lowcore, io_int_parm); - OFFSET(__LC_IO_INT_WORD, _lowcore, io_int_word); - OFFSET(__LC_STFL_FAC_LIST, _lowcore, stfl_fac_list); - OFFSET(__LC_MCCK_CODE, _lowcore, mcck_interruption_code); - OFFSET(__LC_MCCK_FAIL_STOR_ADDR, _lowcore, failing_storage_address); - OFFSET(__LC_LAST_BREAK, _lowcore, breaking_event_addr); - OFFSET(__LC_RST_OLD_PSW, _lowcore, restart_old_psw); - OFFSET(__LC_EXT_OLD_PSW, _lowcore, external_old_psw); - OFFSET(__LC_SVC_OLD_PSW, _lowcore, svc_old_psw); - OFFSET(__LC_PGM_OLD_PSW, _lowcore, program_old_psw); - OFFSET(__LC_MCK_OLD_PSW, _lowcore, mcck_old_psw); - OFFSET(__LC_IO_OLD_PSW, _lowcore, io_old_psw); - OFFSET(__LC_RST_NEW_PSW, _lowcore, restart_psw); - OFFSET(__LC_EXT_NEW_PSW, _lowcore, external_new_psw); - OFFSET(__LC_SVC_NEW_PSW, _lowcore, svc_new_psw); - OFFSET(__LC_PGM_NEW_PSW, _lowcore, program_new_psw); - OFFSET(__LC_MCK_NEW_PSW, _lowcore, mcck_new_psw); - OFFSET(__LC_IO_NEW_PSW, _lowcore, io_new_psw); + OFFSET(__LC_EXT_PARAMS, lowcore, ext_params); + OFFSET(__LC_EXT_CPU_ADDR, lowcore, ext_cpu_addr); + OFFSET(__LC_EXT_INT_CODE, lowcore, ext_int_code); + OFFSET(__LC_SVC_ILC, lowcore, svc_ilc); + OFFSET(__LC_SVC_INT_CODE, lowcore, svc_code); + OFFSET(__LC_PGM_ILC, lowcore, pgm_ilc); + OFFSET(__LC_PGM_INT_CODE, lowcore, pgm_code); + OFFSET(__LC_DATA_EXC_CODE, lowcore, data_exc_code); + OFFSET(__LC_MON_CLASS_NR, lowcore, mon_class_num); + OFFSET(__LC_PER_CODE, lowcore, per_code); + OFFSET(__LC_PER_ATMID, lowcore, per_atmid); + OFFSET(__LC_PER_ADDRESS, lowcore, per_address); + OFFSET(__LC_EXC_ACCESS_ID, lowcore, exc_access_id); + OFFSET(__LC_PER_ACCESS_ID, lowcore, per_access_id); + OFFSET(__LC_OP_ACCESS_ID, lowcore, op_access_id); + OFFSET(__LC_AR_MODE_ID, lowcore, ar_mode_id); + OFFSET(__LC_TRANS_EXC_CODE, lowcore, trans_exc_code); + OFFSET(__LC_MON_CODE, lowcore, monitor_code); + OFFSET(__LC_SUBCHANNEL_ID, lowcore, subchannel_id); + OFFSET(__LC_SUBCHANNEL_NR, lowcore, subchannel_nr); + OFFSET(__LC_IO_INT_PARM, lowcore, io_int_parm); + OFFSET(__LC_IO_INT_WORD, lowcore, io_int_word); + OFFSET(__LC_STFL_FAC_LIST, lowcore, stfl_fac_list); + OFFSET(__LC_STFLE_FAC_LIST, lowcore, stfle_fac_list); + OFFSET(__LC_MCCK_CODE, lowcore, mcck_interruption_code); + OFFSET(__LC_MCCK_FAIL_STOR_ADDR, lowcore, failing_storage_address); + OFFSET(__LC_LAST_BREAK, lowcore, breaking_event_addr); + OFFSET(__LC_RST_OLD_PSW, lowcore, restart_old_psw); + OFFSET(__LC_EXT_OLD_PSW, lowcore, external_old_psw); + OFFSET(__LC_SVC_OLD_PSW, lowcore, svc_old_psw); + OFFSET(__LC_PGM_OLD_PSW, lowcore, program_old_psw); + OFFSET(__LC_MCK_OLD_PSW, lowcore, mcck_old_psw); + OFFSET(__LC_IO_OLD_PSW, lowcore, io_old_psw); + OFFSET(__LC_RST_NEW_PSW, lowcore, restart_psw); + OFFSET(__LC_EXT_NEW_PSW, lowcore, external_new_psw); + OFFSET(__LC_SVC_NEW_PSW, lowcore, svc_new_psw); + OFFSET(__LC_PGM_NEW_PSW, lowcore, program_new_psw); + OFFSET(__LC_MCK_NEW_PSW, lowcore, mcck_new_psw); + OFFSET(__LC_IO_NEW_PSW, lowcore, io_new_psw); /* software defined lowcore locations 0x200 - 0xdff*/ - OFFSET(__LC_SAVE_AREA_SYNC, _lowcore, save_area_sync); - OFFSET(__LC_SAVE_AREA_ASYNC, _lowcore, save_area_async); - OFFSET(__LC_SAVE_AREA_RESTART, _lowcore, save_area_restart); - OFFSET(__LC_CPU_FLAGS, _lowcore, cpu_flags); - OFFSET(__LC_RETURN_PSW, _lowcore, return_psw); - OFFSET(__LC_RETURN_MCCK_PSW, _lowcore, return_mcck_psw); - OFFSET(__LC_SYNC_ENTER_TIMER, _lowcore, sync_enter_timer); - OFFSET(__LC_ASYNC_ENTER_TIMER, _lowcore, async_enter_timer); - OFFSET(__LC_MCCK_ENTER_TIMER, _lowcore, mcck_enter_timer); - OFFSET(__LC_EXIT_TIMER, _lowcore, exit_timer); - OFFSET(__LC_USER_TIMER, _lowcore, user_timer); - OFFSET(__LC_SYSTEM_TIMER, _lowcore, system_timer); - OFFSET(__LC_STEAL_TIMER, _lowcore, steal_timer); - OFFSET(__LC_LAST_UPDATE_TIMER, _lowcore, last_update_timer); - OFFSET(__LC_LAST_UPDATE_CLOCK, _lowcore, last_update_clock); - OFFSET(__LC_INT_CLOCK, _lowcore, int_clock); - OFFSET(__LC_MCCK_CLOCK, _lowcore, mcck_clock); - OFFSET(__LC_CURRENT, _lowcore, current_task); - OFFSET(__LC_THREAD_INFO, _lowcore, thread_info); - OFFSET(__LC_KERNEL_STACK, _lowcore, kernel_stack); - OFFSET(__LC_ASYNC_STACK, _lowcore, async_stack); - OFFSET(__LC_PANIC_STACK, _lowcore, panic_stack); - OFFSET(__LC_RESTART_STACK, _lowcore, restart_stack); - OFFSET(__LC_RESTART_FN, _lowcore, restart_fn); - OFFSET(__LC_RESTART_DATA, _lowcore, restart_data); - OFFSET(__LC_RESTART_SOURCE, _lowcore, restart_source); - OFFSET(__LC_USER_ASCE, _lowcore, user_asce); - OFFSET(__LC_LPP, _lowcore, lpp); - OFFSET(__LC_CURRENT_PID, _lowcore, current_pid); - OFFSET(__LC_PERCPU_OFFSET, _lowcore, percpu_offset); - OFFSET(__LC_VDSO_PER_CPU, _lowcore, vdso_per_cpu_data); - OFFSET(__LC_MACHINE_FLAGS, _lowcore, machine_flags); - OFFSET(__LC_GMAP, _lowcore, gmap); - OFFSET(__LC_PASTE, _lowcore, paste); + OFFSET(__LC_SAVE_AREA_SYNC, lowcore, save_area_sync); + OFFSET(__LC_SAVE_AREA_ASYNC, lowcore, save_area_async); + OFFSET(__LC_SAVE_AREA_RESTART, lowcore, save_area_restart); + OFFSET(__LC_CPU_FLAGS, lowcore, cpu_flags); + OFFSET(__LC_RETURN_PSW, lowcore, return_psw); + OFFSET(__LC_RETURN_MCCK_PSW, lowcore, return_mcck_psw); + OFFSET(__LC_SYNC_ENTER_TIMER, lowcore, sync_enter_timer); + OFFSET(__LC_ASYNC_ENTER_TIMER, lowcore, async_enter_timer); + OFFSET(__LC_MCCK_ENTER_TIMER, lowcore, mcck_enter_timer); + OFFSET(__LC_EXIT_TIMER, lowcore, exit_timer); + OFFSET(__LC_USER_TIMER, lowcore, user_timer); + OFFSET(__LC_SYSTEM_TIMER, lowcore, system_timer); + OFFSET(__LC_STEAL_TIMER, lowcore, steal_timer); + OFFSET(__LC_LAST_UPDATE_TIMER, lowcore, last_update_timer); + OFFSET(__LC_LAST_UPDATE_CLOCK, lowcore, last_update_clock); + OFFSET(__LC_INT_CLOCK, lowcore, int_clock); + OFFSET(__LC_MCCK_CLOCK, lowcore, mcck_clock); + OFFSET(__LC_CURRENT, lowcore, current_task); + OFFSET(__LC_THREAD_INFO, lowcore, thread_info); + OFFSET(__LC_KERNEL_STACK, lowcore, kernel_stack); + OFFSET(__LC_ASYNC_STACK, lowcore, async_stack); + OFFSET(__LC_PANIC_STACK, lowcore, panic_stack); + OFFSET(__LC_RESTART_STACK, lowcore, restart_stack); + OFFSET(__LC_RESTART_FN, lowcore, restart_fn); + OFFSET(__LC_RESTART_DATA, lowcore, restart_data); + OFFSET(__LC_RESTART_SOURCE, lowcore, restart_source); + OFFSET(__LC_USER_ASCE, lowcore, user_asce); + OFFSET(__LC_LPP, lowcore, lpp); + OFFSET(__LC_CURRENT_PID, lowcore, current_pid); + OFFSET(__LC_PERCPU_OFFSET, lowcore, percpu_offset); + OFFSET(__LC_VDSO_PER_CPU, lowcore, vdso_per_cpu_data); + OFFSET(__LC_MACHINE_FLAGS, lowcore, machine_flags); + OFFSET(__LC_GMAP, lowcore, gmap); + OFFSET(__LC_PASTE, lowcore, paste); /* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */ - OFFSET(__LC_DUMP_REIPL, _lowcore, ipib); + OFFSET(__LC_DUMP_REIPL, lowcore, ipib); /* hardware defined lowcore locations 0x1000 - 0x18ff */ - OFFSET(__LC_VX_SAVE_AREA_ADDR, _lowcore, vector_save_area_addr); - OFFSET(__LC_EXT_PARAMS2, _lowcore, ext_params2); - OFFSET(SAVE_AREA_BASE, _lowcore, floating_pt_save_area); - OFFSET(__LC_FPREGS_SAVE_AREA, _lowcore, floating_pt_save_area); - OFFSET(__LC_GPREGS_SAVE_AREA, _lowcore, gpregs_save_area); - OFFSET(__LC_PSW_SAVE_AREA, _lowcore, psw_save_area); - OFFSET(__LC_PREFIX_SAVE_AREA, _lowcore, prefixreg_save_area); - OFFSET(__LC_FP_CREG_SAVE_AREA, _lowcore, fpt_creg_save_area); - OFFSET(__LC_CPU_TIMER_SAVE_AREA, _lowcore, cpu_timer_save_area); - OFFSET(__LC_CLOCK_COMP_SAVE_AREA, _lowcore, clock_comp_save_area); - OFFSET(__LC_AREGS_SAVE_AREA, _lowcore, access_regs_save_area); - OFFSET(__LC_CREGS_SAVE_AREA, _lowcore, cregs_save_area); - OFFSET(__LC_PGM_TDB, _lowcore, pgm_tdb); + OFFSET(__LC_VX_SAVE_AREA_ADDR, lowcore, vector_save_area_addr); + OFFSET(__LC_EXT_PARAMS2, lowcore, ext_params2); + OFFSET(__LC_FPREGS_SAVE_AREA, lowcore, floating_pt_save_area); + OFFSET(__LC_GPREGS_SAVE_AREA, lowcore, gpregs_save_area); + OFFSET(__LC_PSW_SAVE_AREA, lowcore, psw_save_area); + OFFSET(__LC_PREFIX_SAVE_AREA, lowcore, prefixreg_save_area); + OFFSET(__LC_FP_CREG_SAVE_AREA, lowcore, fpt_creg_save_area); + OFFSET(__LC_TOD_PROGREG_SAVE_AREA, lowcore, tod_progreg_save_area); + OFFSET(__LC_CPU_TIMER_SAVE_AREA, lowcore, cpu_timer_save_area); + OFFSET(__LC_CLOCK_COMP_SAVE_AREA, lowcore, clock_comp_save_area); + OFFSET(__LC_AREGS_SAVE_AREA, lowcore, access_regs_save_area); + OFFSET(__LC_CREGS_SAVE_AREA, lowcore, cregs_save_area); + OFFSET(__LC_PGM_TDB, lowcore, pgm_tdb); BLANK(); /* gmap/sie offsets */ OFFSET(__GMAP_ASCE, gmap, asce); diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index 171e09bb8ea2..a92b39fd0e63 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -13,6 +13,7 @@ #include <linux/slab.h> #include <linux/bootmem.h> #include <linux/elf.h> +#include <asm/asm-offsets.h> #include <linux/memblock.h> #include <asm/os_info.h> #include <asm/elf.h> @@ -32,7 +33,84 @@ static struct memblock_type oldmem_type = { .regions = &oldmem_region, }; -struct dump_save_areas dump_save_areas; +struct save_area { + struct list_head list; + u64 psw[2]; + u64 ctrs[16]; + u64 gprs[16]; + u32 acrs[16]; + u64 fprs[16]; + u32 fpc; + u32 prefix; + u64 todpreg; + u64 timer; + u64 todcmp; + u64 vxrs_low[16]; + __vector128 vxrs_high[16]; +}; + +static LIST_HEAD(dump_save_areas); + +/* + * Allocate a save area + */ +struct save_area * __init save_area_alloc(bool is_boot_cpu) +{ + struct save_area *sa; + + sa = (void *) memblock_alloc(sizeof(*sa), 8); + if (!sa) + return NULL; + if (is_boot_cpu) + list_add(&sa->list, &dump_save_areas); + else + list_add_tail(&sa->list, &dump_save_areas); + return sa; +} + +/* + * Return the address of the save area for the boot CPU + */ +struct save_area * __init save_area_boot_cpu(void) +{ + if (list_empty(&dump_save_areas)) + return NULL; + return list_first_entry(&dump_save_areas, struct save_area, list); +} + +/* + * Copy CPU registers into the save area + */ +void __init save_area_add_regs(struct save_area *sa, void *regs) +{ + struct lowcore *lc; + + lc = (struct lowcore *)(regs - __LC_FPREGS_SAVE_AREA); + memcpy(&sa->psw, &lc->psw_save_area, sizeof(sa->psw)); + memcpy(&sa->ctrs, &lc->cregs_save_area, sizeof(sa->ctrs)); + memcpy(&sa->gprs, &lc->gpregs_save_area, sizeof(sa->gprs)); + memcpy(&sa->acrs, &lc->access_regs_save_area, sizeof(sa->acrs)); + memcpy(&sa->fprs, &lc->floating_pt_save_area, sizeof(sa->fprs)); + memcpy(&sa->fpc, &lc->fpt_creg_save_area, sizeof(sa->fpc)); + memcpy(&sa->prefix, &lc->prefixreg_save_area, sizeof(sa->prefix)); + memcpy(&sa->todpreg, &lc->tod_progreg_save_area, sizeof(sa->todpreg)); + memcpy(&sa->timer, &lc->cpu_timer_save_area, sizeof(sa->timer)); + memcpy(&sa->todcmp, &lc->clock_comp_save_area, sizeof(sa->todcmp)); +} + +/* + * Copy vector registers into the save area + */ +void __init save_area_add_vxrs(struct save_area *sa, __vector128 *vxrs) +{ + int i; + + /* Copy lower halves of vector registers 0-15 */ + for (i = 0; i < 16; i++) + memcpy(&sa->vxrs_low[i], &vxrs[i].u[2], 8); + /* Copy vector registers 16-31 */ + memcpy(sa->vxrs_high, vxrs + 16, 16 * sizeof(__vector128)); +} /* * Return physical address for virtual address @@ -51,79 +129,85 @@ static inline void *load_real_addr(void *addr) } /* - * Copy real to virtual or real memory - */ -static int copy_from_realmem(void *dest, void *src, size_t count) -{ - unsigned long size; - - if (!count) - return 0; - if (!is_vmalloc_or_module_addr(dest)) - return memcpy_real(dest, src, count); - do { - size = min(count, PAGE_SIZE - (__pa(dest) & ~PAGE_MASK)); - if (memcpy_real(load_real_addr(dest), src, size)) - return -EFAULT; - count -= size; - dest += size; - src += size; - } while (count); - return 0; -} - -/* - * Pointer to ELF header in new kernel - */ -static void *elfcorehdr_newmem; - -/* - * Copy one page from zfcpdump "oldmem" - * - * For pages below HSA size memory from the HSA is copied. Otherwise - * real memory copy is used. + * Copy memory of the old, dumped system to a kernel space virtual address */ -static ssize_t copy_oldmem_page_zfcpdump(char *buf, size_t csize, - unsigned long src, int userbuf) +int copy_oldmem_kernel(void *dst, void *src, size_t count) { + unsigned long from, len; + void *ra; int rc; - if (src < sclp.hsa_size) { - rc = memcpy_hsa(buf, src, csize, userbuf); - } else { - if (userbuf) - rc = copy_to_user_real((void __force __user *) buf, - (void *) src, csize); - else - rc = memcpy_real(buf, (void *) src, csize); + while (count) { + from = __pa(src); + if (!OLDMEM_BASE && from < sclp.hsa_size) { + /* Copy from zfcpdump HSA area */ + len = min(count, sclp.hsa_size - from); + rc = memcpy_hsa_kernel(dst, from, len); + if (rc) + return rc; + } else { + /* Check for swapped kdump oldmem areas */ + if (OLDMEM_BASE && from - OLDMEM_BASE < OLDMEM_SIZE) { + from -= OLDMEM_BASE; + len = min(count, OLDMEM_SIZE - from); + } else if (OLDMEM_BASE && from < OLDMEM_SIZE) { + len = min(count, OLDMEM_SIZE - from); + from += OLDMEM_BASE; + } else { + len = count; + } + if (is_vmalloc_or_module_addr(dst)) { + ra = load_real_addr(dst); + len = min(PAGE_SIZE - offset_in_page(ra), len); + } else { + ra = dst; + } + if (memcpy_real(ra, (void *) from, len)) + return -EFAULT; + } + dst += len; + src += len; + count -= len; } - return rc ? rc : csize; + return 0; } /* - * Copy one page from kdump "oldmem" - * - * For the kdump reserved memory this functions performs a swap operation: - * - [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] is mapped to [0 - OLDMEM_SIZE]. - * - [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] + * Copy memory of the old, dumped system to a user space virtual address */ -static ssize_t copy_oldmem_page_kdump(char *buf, size_t csize, - unsigned long src, int userbuf) - +int copy_oldmem_user(void __user *dst, void *src, size_t count) { + unsigned long from, len; int rc; - if (src < OLDMEM_SIZE) - src += OLDMEM_BASE; - else if (src > OLDMEM_BASE && - src < OLDMEM_BASE + OLDMEM_SIZE) - src -= OLDMEM_BASE; - if (userbuf) - rc = copy_to_user_real((void __force __user *) buf, - (void *) src, csize); - else - rc = copy_from_realmem(buf, (void *) src, csize); - return (rc == 0) ? rc : csize; + while (count) { + from = __pa(src); + if (!OLDMEM_BASE && from < sclp.hsa_size) { + /* Copy from zfcpdump HSA area */ + len = min(count, sclp.hsa_size - from); + rc = memcpy_hsa_user(dst, from, len); + if (rc) + return rc; + } else { + /* Check for swapped kdump oldmem areas */ + if (OLDMEM_BASE && from - OLDMEM_BASE < OLDMEM_SIZE) { + from -= OLDMEM_BASE; + len = min(count, OLDMEM_SIZE - from); + } else if (OLDMEM_BASE && from < OLDMEM_SIZE) { + len = min(count, OLDMEM_SIZE - from); + from += OLDMEM_BASE; + } else { + len = count; + } + rc = copy_to_user_real(dst, (void *) from, count); + if (rc) + return rc; + } + dst += len; + src += len; + count -= len; + } + return 0; } /* @@ -132,15 +216,17 @@ static ssize_t copy_oldmem_page_kdump(char *buf, size_t csize, ssize_t copy_oldmem_page(unsigned long pfn, char *buf, size_t csize, unsigned long offset, int userbuf) { - unsigned long src; + void *src; + int rc; if (!csize) return 0; - src = (pfn << PAGE_SHIFT) + offset; - if (OLDMEM_BASE) - return copy_oldmem_page_kdump(buf, csize, src, userbuf); + src = (void *) (pfn << PAGE_SHIFT) + offset; + if (userbuf) + rc = copy_oldmem_user((void __force __user *) buf, src, csize); else - return copy_oldmem_page_zfcpdump(buf, csize, src, userbuf); + rc = copy_oldmem_kernel((void *) buf, src, csize); + return rc; } /* @@ -209,33 +295,6 @@ int remap_oldmem_pfn_range(struct vm_area_struct *vma, unsigned long from, } /* - * Copy memory from old kernel - */ -int copy_from_oldmem(void *dest, void *src, size_t count) -{ - unsigned long copied = 0; - int rc; - - if (OLDMEM_BASE) { - if ((unsigned long) src < OLDMEM_SIZE) { - copied = min(count, OLDMEM_SIZE - (unsigned long) src); - rc = copy_from_realmem(dest, src + OLDMEM_BASE, copied); - if (rc) - return rc; - } - } else { - unsigned long hsa_end = sclp.hsa_size; - if ((unsigned long) src < hsa_end) { - copied = min(count, hsa_end - (unsigned long) src); - rc = memcpy_hsa(dest, (unsigned long) src, copied, 0); - if (rc) - return rc; - } - } - return copy_from_realmem(dest + copied, src + copied, count - copied); -} - -/* * Alloc memory and panic in case of ENOMEM */ static void *kzalloc_panic(int len) @@ -251,8 +310,8 @@ static void *kzalloc_panic(int len) /* * Initialize ELF note */ -static void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len, - const char *name) +static void *nt_init_name(void *buf, Elf64_Word type, void *desc, int d_len, + const char *name) { Elf64_Nhdr *note; u64 len; @@ -272,136 +331,42 @@ static void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len, return PTR_ADD(buf, len); } -/* - * Initialize prstatus note - */ -static void *nt_prstatus(void *ptr, struct save_area *sa) +static inline void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len) { - struct elf_prstatus nt_prstatus; - static int cpu_nr = 1; - - memset(&nt_prstatus, 0, sizeof(nt_prstatus)); - memcpy(&nt_prstatus.pr_reg.gprs, sa->gp_regs, sizeof(sa->gp_regs)); - memcpy(&nt_prstatus.pr_reg.psw, sa->psw, sizeof(sa->psw)); - memcpy(&nt_prstatus.pr_reg.acrs, sa->acc_regs, sizeof(sa->acc_regs)); - nt_prstatus.pr_pid = cpu_nr; - cpu_nr++; - - return nt_init(ptr, NT_PRSTATUS, &nt_prstatus, sizeof(nt_prstatus), - "CORE"); + return nt_init_name(buf, type, desc, d_len, KEXEC_CORE_NOTE_NAME); } /* - * Initialize fpregset (floating point) note + * Fill ELF notes for one CPU with save area registers */ -static void *nt_fpregset(void *ptr, struct save_area *sa) +static void *fill_cpu_elf_notes(void *ptr, int cpu, struct save_area *sa) { + struct elf_prstatus nt_prstatus; elf_fpregset_t nt_fpregset; + /* Prepare prstatus note */ + memset(&nt_prstatus, 0, sizeof(nt_prstatus)); + memcpy(&nt_prstatus.pr_reg.gprs, sa->gprs, sizeof(sa->gprs)); + memcpy(&nt_prstatus.pr_reg.psw, sa->psw, sizeof(sa->psw)); + memcpy(&nt_prstatus.pr_reg.acrs, sa->acrs, sizeof(sa->acrs)); + nt_prstatus.pr_pid = cpu; + /* Prepare fpregset (floating point) note */ memset(&nt_fpregset, 0, sizeof(nt_fpregset)); - memcpy(&nt_fpregset.fpc, &sa->fp_ctrl_reg, sizeof(sa->fp_ctrl_reg)); - memcpy(&nt_fpregset.fprs, &sa->fp_regs, sizeof(sa->fp_regs)); - - return nt_init(ptr, NT_PRFPREG, &nt_fpregset, sizeof(nt_fpregset), - "CORE"); -} - -/* - * Initialize timer note - */ -static void *nt_s390_timer(void *ptr, struct save_area *sa) -{ - return nt_init(ptr, NT_S390_TIMER, &sa->timer, sizeof(sa->timer), - KEXEC_CORE_NOTE_NAME); -} - -/* - * Initialize TOD clock comparator note - */ -static void *nt_s390_tod_cmp(void *ptr, struct save_area *sa) -{ - return nt_init(ptr, NT_S390_TODCMP, &sa->clk_cmp, - sizeof(sa->clk_cmp), KEXEC_CORE_NOTE_NAME); -} - -/* - * Initialize TOD programmable register note - */ -static void *nt_s390_tod_preg(void *ptr, struct save_area *sa) -{ - return nt_init(ptr, NT_S390_TODPREG, &sa->tod_reg, - sizeof(sa->tod_reg), KEXEC_CORE_NOTE_NAME); -} - -/* - * Initialize control register note - */ -static void *nt_s390_ctrs(void *ptr, struct save_area *sa) -{ - return nt_init(ptr, NT_S390_CTRS, &sa->ctrl_regs, - sizeof(sa->ctrl_regs), KEXEC_CORE_NOTE_NAME); -} - -/* - * Initialize prefix register note - */ -static void *nt_s390_prefix(void *ptr, struct save_area *sa) -{ - return nt_init(ptr, NT_S390_PREFIX, &sa->pref_reg, - sizeof(sa->pref_reg), KEXEC_CORE_NOTE_NAME); -} - -/* - * Initialize vxrs high note (full 128 bit VX registers 16-31) - */ -static void *nt_s390_vx_high(void *ptr, __vector128 *vx_regs) -{ - return nt_init(ptr, NT_S390_VXRS_HIGH, &vx_regs[16], - 16 * sizeof(__vector128), KEXEC_CORE_NOTE_NAME); -} - -/* - * Initialize vxrs low note (lower halves of VX registers 0-15) - */ -static void *nt_s390_vx_low(void *ptr, __vector128 *vx_regs) -{ - Elf64_Nhdr *note; - u64 len; - int i; - - note = (Elf64_Nhdr *)ptr; - note->n_namesz = strlen(KEXEC_CORE_NOTE_NAME) + 1; - note->n_descsz = 16 * 8; - note->n_type = NT_S390_VXRS_LOW; - len = sizeof(Elf64_Nhdr); - - memcpy(ptr + len, KEXEC_CORE_NOTE_NAME, note->n_namesz); - len = roundup(len + note->n_namesz, 4); - - ptr += len; - /* Copy lower halves of SIMD registers 0-15 */ - for (i = 0; i < 16; i++) { - memcpy(ptr, &vx_regs[i].u[2], 8); - ptr += 8; - } - return ptr; -} - -/* - * Fill ELF notes for one CPU with save area registers - */ -void *fill_cpu_elf_notes(void *ptr, struct save_area *sa, __vector128 *vx_regs) -{ - ptr = nt_prstatus(ptr, sa); - ptr = nt_fpregset(ptr, sa); - ptr = nt_s390_timer(ptr, sa); - ptr = nt_s390_tod_cmp(ptr, sa); - ptr = nt_s390_tod_preg(ptr, sa); - ptr = nt_s390_ctrs(ptr, sa); - ptr = nt_s390_prefix(ptr, sa); - if (MACHINE_HAS_VX && vx_regs) { - ptr = nt_s390_vx_low(ptr, vx_regs); - ptr = nt_s390_vx_high(ptr, vx_regs); + memcpy(&nt_fpregset.fpc, &sa->fpc, sizeof(sa->fpc)); + memcpy(&nt_fpregset.fprs, &sa->fprs, sizeof(sa->fprs)); + /* Create ELF notes for the CPU */ + ptr = nt_init(ptr, NT_PRSTATUS, &nt_prstatus, sizeof(nt_prstatus)); + ptr = nt_init(ptr, NT_PRFPREG, &nt_fpregset, sizeof(nt_fpregset)); + ptr = nt_init(ptr, NT_S390_TIMER, &sa->timer, sizeof(sa->timer)); + ptr = nt_init(ptr, NT_S390_TODCMP, &sa->todcmp, sizeof(sa->todcmp)); + ptr = nt_init(ptr, NT_S390_TODPREG, &sa->todpreg, sizeof(sa->todpreg)); + ptr = nt_init(ptr, NT_S390_CTRS, &sa->ctrs, sizeof(sa->ctrs)); + ptr = nt_init(ptr, NT_S390_PREFIX, &sa->prefix, sizeof(sa->prefix)); + if (MACHINE_HAS_VX) { + ptr = nt_init(ptr, NT_S390_VXRS_HIGH, + &sa->vxrs_high, sizeof(sa->vxrs_high)); + ptr = nt_init(ptr, NT_S390_VXRS_LOW, + &sa->vxrs_low, sizeof(sa->vxrs_low)); } return ptr; } @@ -416,8 +381,7 @@ static void *nt_prpsinfo(void *ptr) memset(&prpsinfo, 0, sizeof(prpsinfo)); prpsinfo.pr_sname = 'R'; strcpy(prpsinfo.pr_fname, "vmlinux"); - return nt_init(ptr, NT_PRPSINFO, &prpsinfo, sizeof(prpsinfo), - KEXEC_CORE_NOTE_NAME); + return nt_init(ptr, NT_PRPSINFO, &prpsinfo, sizeof(prpsinfo)); } /* @@ -429,17 +393,18 @@ static void *get_vmcoreinfo_old(unsigned long *size) Elf64_Nhdr note; void *addr; - if (copy_from_oldmem(&addr, &S390_lowcore.vmcore_info, sizeof(addr))) + if (copy_oldmem_kernel(&addr, &S390_lowcore.vmcore_info, sizeof(addr))) return NULL; memset(nt_name, 0, sizeof(nt_name)); - if (copy_from_oldmem(¬e, addr, sizeof(note))) + if (copy_oldmem_kernel(¬e, addr, sizeof(note))) return NULL; - if (copy_from_oldmem(nt_name, addr + sizeof(note), sizeof(nt_name) - 1)) + if (copy_oldmem_kernel(nt_name, addr + sizeof(note), + sizeof(nt_name) - 1)) return NULL; if (strcmp(nt_name, "VMCOREINFO") != 0) return NULL; vmcoreinfo = kzalloc_panic(note.n_descsz); - if (copy_from_oldmem(vmcoreinfo, addr + 24, note.n_descsz)) + if (copy_oldmem_kernel(vmcoreinfo, addr + 24, note.n_descsz)) return NULL; *size = note.n_descsz; return vmcoreinfo; @@ -458,7 +423,7 @@ static void *nt_vmcoreinfo(void *ptr) vmcoreinfo = get_vmcoreinfo_old(&size); if (!vmcoreinfo) return ptr; - return nt_init(ptr, 0, vmcoreinfo, size, "VMCOREINFO"); + return nt_init_name(ptr, 0, vmcoreinfo, size, "VMCOREINFO"); } /* @@ -487,13 +452,12 @@ static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt) */ static int get_cpu_cnt(void) { - int i, cpus = 0; + struct save_area *sa; + int cpus = 0; - for (i = 0; i < dump_save_areas.count; i++) { - if (dump_save_areas.areas[i]->sa.pref_reg == 0) - continue; - cpus++; - } + list_for_each_entry(sa, &dump_save_areas, list) + if (sa->prefix != 0) + cpus++; return cpus; } @@ -538,18 +502,16 @@ static void loads_init(Elf64_Phdr *phdr, u64 loads_offset) */ static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset) { - struct save_area_ext *sa_ext; + struct save_area *sa; void *ptr_start = ptr; - int i; + int cpu; ptr = nt_prpsinfo(ptr); - for (i = 0; i < dump_save_areas.count; i++) { - sa_ext = dump_save_areas.areas[i]; - if (sa_ext->sa.pref_reg == 0) - continue; - ptr = fill_cpu_elf_notes(ptr, &sa_ext->sa, sa_ext->vx_regs); - } + cpu = 1; + list_for_each_entry(sa, &dump_save_areas, list) + if (sa->prefix != 0) + ptr = fill_cpu_elf_notes(ptr, cpu++, sa); ptr = nt_vmcoreinfo(ptr); memset(phdr, 0, sizeof(*phdr)); phdr->p_type = PT_NOTE; @@ -573,9 +535,6 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size) /* If we are not in kdump or zfcpdump mode return */ if (!OLDMEM_BASE && ipl_info.type != IPL_TYPE_FCP_DUMP) return 0; - /* If elfcorehdr= has been passed via cmdline, we use that one */ - if (elfcorehdr_addr != ELFCORE_ADDR_MAX) - return 0; /* If we cannot get HSA size for zfcpdump return error */ if (ipl_info.type == IPL_TYPE_FCP_DUMP && !sclp.hsa_size) return -ENODEV; @@ -606,7 +565,6 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size) hdr_off = PTR_DIFF(ptr, hdr); loads_init(phdr_loads, hdr_off); *addr = (unsigned long long) hdr; - elfcorehdr_newmem = hdr; *size = (unsigned long long) hdr_off; BUG_ON(elfcorehdr_size > alloc_size); return 0; @@ -617,8 +575,6 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size) */ void elfcorehdr_free(unsigned long long addr) { - if (!elfcorehdr_newmem) - return; kfree((void *)(unsigned long)addr); } @@ -629,7 +585,6 @@ ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos) { void *src = (void *)(unsigned long)*ppos; - src = elfcorehdr_newmem ? src : src - OLDMEM_BASE; memcpy(buf, src, count); *ppos += count; return count; @@ -641,15 +596,8 @@ ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos) ssize_t elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos) { void *src = (void *)(unsigned long)*ppos; - int rc; - if (elfcorehdr_newmem) { - memcpy(buf, src, count); - } else { - rc = copy_from_oldmem(buf, src, count); - if (rc) - return rc; - } + memcpy(buf, src, count); *ppos += count; return count; } diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 6e72961608f0..62973efd214a 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -2022,7 +2022,7 @@ void show_code(struct pt_regs *regs) *ptr++ = '\t'; ptr += print_insn(ptr, code + start, addr); start += opsize; - printk(buffer); + printk("%s", buffer); ptr = buffer; ptr += sprintf(ptr, "\n "); hops++; @@ -2049,7 +2049,7 @@ void print_fn_code(unsigned char *code, unsigned long len) ptr += print_insn(ptr, code, (unsigned long) code); *ptr++ = '\n'; *ptr++ = 0; - printk(buffer); + printk("%s", buffer); code += opsize; len -= opsize; } diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 3c31609df959..20a5caf6d981 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -335,6 +335,14 @@ static __init void detect_machine_facilities(void) } } +static inline void save_vector_registers(void) +{ +#ifdef CONFIG_CRASH_DUMP + if (test_facility(129)) + save_vx_regs(boot_cpu_vector_save_area); +#endif +} + static int __init disable_vector_extension(char *str) { S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX; @@ -451,6 +459,7 @@ void __init startup_init(void) detect_diag9c(); detect_diag44(); detect_machine_facilities(); + save_vector_registers(); setup_topology(); sclp_early_detect(); lockdep_on(); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 857b6526d298..cd5a191381b9 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -764,6 +764,7 @@ ENTRY(psw_idle) .insn rsy,0xeb0000000017,%r1,5,__SF_EMPTY+16(%r15) .Lpsw_idle_stcctm: #endif + oi __LC_CPU_FLAGS+7,_CIF_ENABLED_WAIT STCK __CLOCK_IDLE_ENTER(%r2) stpt __TIMER_IDLE_ENTER(%r2) .Lpsw_idle_lpsw: @@ -1146,6 +1147,7 @@ cleanup_critical: .quad .Lio_done - 4 .Lcleanup_idle: + ni __LC_CPU_FLAGS+7,255-_CIF_ENABLED_WAIT # copy interrupt clock & cpu timer mvc __CLOCK_IDLE_EXIT(8,%r2),__LC_INT_CLOCK mvc __TIMER_IDLE_EXIT(8,%r2),__LC_ASYNC_ENTER_TIMER diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index 301ee9c70688..fcaefb041364 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -25,6 +25,7 @@ #include <linux/linkage.h> #include <asm/asm-offsets.h> #include <asm/thread_info.h> +#include <asm/facility.h> #include <asm/page.h> #include <asm/ptrace.h> @@ -300,27 +301,27 @@ ENTRY(startup_kdump) xc 0x200(256),0x200 # partially clear lowcore xc 0x300(256),0x300 xc 0xe00(256),0xe00 + xc 0xf00(256),0xf00 lctlg %c0,%c15,0x200(%r0) # initialize control registers stck __LC_LAST_UPDATE_CLOCK spt 6f-.LPG0(%r13) mvc __LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13) - xc __LC_STFL_FAC_LIST(8),__LC_STFL_FAC_LIST - # check capabilities against MARCH_{G5,Z900,Z990,Z9_109,Z10} - .insn s,0xb2b10000,0 # store facilities @ __LC_STFL_FAC_LIST - tm __LC_STFL_FAC_LIST,0x01 # stfle available ? + stfl 0(%r0) # store facilities @ __LC_STFL_FAC_LIST + mvc __LC_STFLE_FAC_LIST(4),__LC_STFL_FAC_LIST + tm __LC_STFLE_FAC_LIST,0x01 # stfle available ? jz 0f - la %r0,1 - .insn s,0xb2b00000,__LC_STFL_FAC_LIST # store facility list extended + lghi %r0,FACILITIES_ALS_DWORDS-1 + .insn s,0xb2b00000,__LC_STFLE_FAC_LIST # store facility list extended # verify if all required facilities are supported by the machine -0: la %r1,__LC_STFL_FAC_LIST +0: la %r1,__LC_STFLE_FAC_LIST la %r2,3f+8-.LPG0(%r13) - l %r3,0(%r2) -1: l %r0,0(%r1) - n %r0,4(%r2) - cl %r0,4(%r2) + lhi %r3,FACILITIES_ALS_DWORDS +1: lg %r0,0(%r1) + ng %r0,0(%r2) + clg %r0,0(%r2) jne 2f - la %r1,4(%r1) - la %r2,4(%r2) + la %r1,8(%r1) + la %r2,8(%r2) ahi %r3,-1 jnz 1b j 4f @@ -340,24 +341,10 @@ ENTRY(startup_kdump) 3: .long 0x000a0000,0x8badcccc # List of facilities that are required. If not all facilities are present -# the kernel will crash. Format is number of facility words with bits set, -# followed by the facility words. +# the kernel will crash. + + .quad FACILITIES_ALS -#if defined(CONFIG_MARCH_Z13) - .long 2, 0xc100eff2, 0xf46cc800 -#elif defined(CONFIG_MARCH_ZEC12) - .long 2, 0xc100eff2, 0xf46cc800 -#elif defined(CONFIG_MARCH_Z196) - .long 2, 0xc100eff2, 0xf46c0000 -#elif defined(CONFIG_MARCH_Z10) - .long 2, 0xc100eff2, 0xf0680000 -#elif defined(CONFIG_MARCH_Z9_109) - .long 1, 0xc100efc2 -#elif defined(CONFIG_MARCH_Z990) - .long 1, 0xc0002000 -#elif defined(CONFIG_MARCH_Z900) - .long 1, 0xc0000000 -#endif 4: /* Continue with startup code in head64.S */ jg startup_continue diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 58b719fa8067..c5febe84eba6 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -16,7 +16,7 @@ __HEAD ENTRY(startup_continue) - tm __LC_STFL_FAC_LIST+6,0x80 # LPP available ? + tm __LC_STFLE_FAC_LIST+6,0x80 # LPP available ? jz 0f xc __LC_LPP+1(7,0),__LC_LPP+1 # clear lpp and current_pid mvi __LC_LPP,0x80 # and set LPP_MAGIC diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index b1f0a90f933b..0a5a6b661b93 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -2039,21 +2039,15 @@ static void do_reset_calls(void) reset->fn(); } -u32 dump_prefix_page; - -void s390_reset_system(void (*fn_pre)(void), - void (*fn_post)(void *), void *data) +void s390_reset_system(void) { - struct _lowcore *lc; + struct lowcore *lc; - lc = (struct _lowcore *)(unsigned long) store_prefix(); + lc = (struct lowcore *)(unsigned long) store_prefix(); /* Stack for interrupt/machine check handler */ lc->panic_stack = S390_lowcore.panic_stack; - /* Save prefix page address for dump case */ - dump_prefix_page = (u32)(unsigned long) lc; - /* Disable prefixing */ set_prefix(0); @@ -2077,14 +2071,5 @@ void s390_reset_system(void (*fn_pre)(void), S390_lowcore.subchannel_id = 0; S390_lowcore.subchannel_nr = 0; - /* Store status at absolute zero */ - store_status(); - - /* Call function before reset */ - if (fn_pre) - fn_pre(); do_reset_calls(); - /* Call function after reset */ - if (fn_post) - fn_post(data); } diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index fb0901ec4306..2f1b7217c25c 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -35,46 +35,6 @@ extern const unsigned long long relocate_kernel_len; #ifdef CONFIG_CRASH_DUMP /* - * Create ELF notes for one CPU - */ -static void add_elf_notes(int cpu) -{ - struct save_area *sa = (void *) 4608 + store_prefix(); - void *ptr; - - memcpy((void *) (4608UL + sa->pref_reg), sa, sizeof(*sa)); - ptr = (u64 *) per_cpu_ptr(crash_notes, cpu); - ptr = fill_cpu_elf_notes(ptr, sa, NULL); - memset(ptr, 0, sizeof(struct elf_note)); -} - -/* - * Initialize CPU ELF notes - */ -static void setup_regs(void) -{ - unsigned long sa = S390_lowcore.prefixreg_save_area + SAVE_AREA_BASE; - struct _lowcore *lc; - int cpu, this_cpu; - - /* Get lowcore pointer from store status of this CPU (absolute zero) */ - lc = (struct _lowcore *)(unsigned long)S390_lowcore.prefixreg_save_area; - this_cpu = smp_find_processor_id(stap()); - add_elf_notes(this_cpu); - for_each_online_cpu(cpu) { - if (cpu == this_cpu) - continue; - if (smp_store_status(cpu)) - continue; - add_elf_notes(cpu); - } - if (MACHINE_HAS_VX) - save_vx_regs_safe((void *) lc->vector_save_area_addr); - /* Copy dump CPU store status info to absolute zero */ - memcpy((void *) SAVE_AREA_BASE, (void *) sa, sizeof(struct save_area)); -} - -/* * PM notifier callback for kdump */ static int machine_kdump_pm_cb(struct notifier_block *nb, unsigned long action, @@ -105,14 +65,66 @@ static int __init machine_kdump_pm_init(void) arch_initcall(machine_kdump_pm_init); /* - * Start kdump: We expect here that a store status has been done on our CPU + * Reset the system, copy boot CPU registers to absolute zero, + * and jump to the kdump image */ static void __do_machine_kdump(void *image) { - int (*start_kdump)(int) = (void *)((struct kimage *) image)->start; + int (*start_kdump)(int); + unsigned long prefix; + + /* store_status() saved the prefix register to lowcore */ + prefix = (unsigned long) S390_lowcore.prefixreg_save_area; + + /* Now do the reset */ + s390_reset_system(); + + /* + * Copy dump CPU store status info to absolute zero. + * This need to be done *after* s390_reset_system set the + * prefix register of this CPU to zero + */ + memcpy((void *) __LC_FPREGS_SAVE_AREA, + (void *)(prefix + __LC_FPREGS_SAVE_AREA), 512); __load_psw_mask(PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA); + start_kdump = (void *)((struct kimage *) image)->start; start_kdump(1); + + /* Die if start_kdump returns */ + disabled_wait((unsigned long) __builtin_return_address(0)); +} + +/* + * Start kdump: create a LGR log entry, store status of all CPUs and + * branch to __do_machine_kdump. + */ +static noinline void __machine_kdump(void *image) +{ + int this_cpu, cpu; + + lgr_info_log(); + /* Get status of the other CPUs */ + this_cpu = smp_find_processor_id(stap()); + for_each_online_cpu(cpu) { + if (cpu == this_cpu) + continue; + if (smp_store_status(cpu)) + continue; + } + /* Store status of the boot CPU */ + if (MACHINE_HAS_VX) + save_vx_regs((void *) &S390_lowcore.vector_save_area); + /* + * To create a good backchain for this CPU in the dump store_status + * is passed the address of a function. The address is saved into + * the PSW save area of the boot CPU and the function is invoked as + * a tail call of store_status. The backchain in the dump will look + * like this: + * restart_int_handler -> __machine_kexec -> __do_machine_kdump + * The call to store_status() will not return. + */ + store_status(__do_machine_kdump, image); } #endif @@ -235,10 +247,14 @@ static void __do_machine_kexec(void *data) relocate_kernel_t data_mover; struct kimage *image = data; + s390_reset_system(); data_mover = (relocate_kernel_t) page_to_phys(image->control_code_page); /* Call the moving routine */ (*data_mover)(&image->head, image->start); + + /* Die if kexec returns */ + disabled_wait((unsigned long) __builtin_return_address(0)); } /* @@ -251,14 +267,10 @@ static void __machine_kexec(void *data) tracing_off(); debug_locks_off(); #ifdef CONFIG_CRASH_DUMP - if (((struct kimage *) data)->type == KEXEC_TYPE_CRASH) { - - lgr_info_log(); - s390_reset_system(setup_regs, __do_machine_kdump, data); - } else + if (((struct kimage *) data)->type == KEXEC_TYPE_CRASH) + __machine_kdump(data); #endif - s390_reset_system(NULL, __do_machine_kexec, data); - disabled_wait((unsigned long) __builtin_return_address(0)); + __do_machine_kexec(data); } /* diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 0c1a679314dd..7873e171457c 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -159,11 +159,11 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, /* Increase core size by size of got & plt and set start offsets for got and plt. */ - me->core_size = ALIGN(me->core_size, 4); - me->arch.got_offset = me->core_size; - me->core_size += me->arch.got_size; - me->arch.plt_offset = me->core_size; - me->core_size += me->arch.plt_size; + me->core_layout.size = ALIGN(me->core_layout.size, 4); + me->arch.got_offset = me->core_layout.size; + me->core_layout.size += me->arch.got_size; + me->arch.plt_offset = me->core_layout.size; + me->core_layout.size += me->arch.plt_size; return 0; } @@ -279,7 +279,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, if (info->got_initialized == 0) { Elf_Addr *gotent; - gotent = me->module_core + me->arch.got_offset + + gotent = me->core_layout.base + me->arch.got_offset + info->got_offset; *gotent = val; info->got_initialized = 1; @@ -302,7 +302,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, rc = apply_rela_bits(loc, val, 0, 64, 0); else if (r_type == R_390_GOTENT || r_type == R_390_GOTPLTENT) { - val += (Elf_Addr) me->module_core - loc; + val += (Elf_Addr) me->core_layout.base - loc; rc = apply_rela_bits(loc, val, 1, 32, 1); } break; @@ -315,7 +315,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */ if (info->plt_initialized == 0) { unsigned int *ip; - ip = me->module_core + me->arch.plt_offset + + ip = me->core_layout.base + me->arch.plt_offset + info->plt_offset; ip[0] = 0x0d10e310; /* basr 1,0; lg 1,10(1); br 1 */ ip[1] = 0x100a0004; @@ -334,7 +334,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, val - loc + 0xffffUL < 0x1ffffeUL) || (r_type == R_390_PLT32DBL && val - loc + 0xffffffffULL < 0x1fffffffeULL))) - val = (Elf_Addr) me->module_core + + val = (Elf_Addr) me->core_layout.base + me->arch.plt_offset + info->plt_offset; val += rela->r_addend - loc; @@ -356,7 +356,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, case R_390_GOTOFF32: /* 32 bit offset to GOT. */ case R_390_GOTOFF64: /* 64 bit offset to GOT. */ val = val + rela->r_addend - - ((Elf_Addr) me->module_core + me->arch.got_offset); + ((Elf_Addr) me->core_layout.base + me->arch.got_offset); if (r_type == R_390_GOTOFF16) rc = apply_rela_bits(loc, val, 0, 16, 0); else if (r_type == R_390_GOTOFF32) @@ -366,7 +366,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, break; case R_390_GOTPC: /* 32 bit PC relative offset to GOT. */ case R_390_GOTPCDBL: /* 32 bit PC rel. off. to GOT shifted by 1. */ - val = (Elf_Addr) me->module_core + me->arch.got_offset + + val = (Elf_Addr) me->core_layout.base + me->arch.got_offset + rela->r_addend - loc; if (r_type == R_390_GOTPC) rc = apply_rela_bits(loc, val, 1, 32, 0); diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c index d112fc66f993..87f05e475ae8 100644 --- a/arch/s390/kernel/os_info.c +++ b/arch/s390/kernel/os_info.c @@ -89,7 +89,7 @@ static void os_info_old_alloc(int nr, int align) goto fail; } buf_align = PTR_ALIGN(buf, align); - if (copy_from_oldmem(buf_align, (void *) addr, size)) { + if (copy_oldmem_kernel(buf_align, (void *) addr, size)) { msg = "copy failed"; goto fail_free; } @@ -122,14 +122,15 @@ static void os_info_old_init(void) return; if (!OLDMEM_BASE) goto fail; - if (copy_from_oldmem(&addr, &S390_lowcore.os_info, sizeof(addr))) + if (copy_oldmem_kernel(&addr, &S390_lowcore.os_info, sizeof(addr))) goto fail; if (addr == 0 || addr % PAGE_SIZE) goto fail; os_info_old = kzalloc(sizeof(*os_info_old), GFP_KERNEL); if (!os_info_old) goto fail; - if (copy_from_oldmem(os_info_old, (void *) addr, sizeof(*os_info_old))) + if (copy_oldmem_kernel(os_info_old, (void *) addr, + sizeof(*os_info_old))) goto fail_free; if (os_info_old->magic != OS_INFO_MAGIC) goto fail_free; diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 7ce00e7a709a..647128d5b983 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -61,6 +61,9 @@ static int show_cpuinfo(struct seq_file *m, void *v) "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", "edat", "etf3eh", "highgprs", "te", "vx" }; + static const char * const int_hwcap_str[] = { + "sie" + }; unsigned long n = (unsigned long) v - 1; int i; @@ -75,6 +78,9 @@ static int show_cpuinfo(struct seq_file *m, void *v) for (i = 0; i < ARRAY_SIZE(hwcap_str); i++) if (hwcap_str[i] && (elf_hwcap & (1UL << i))) seq_printf(m, "%s ", hwcap_str[i]); + for (i = 0; i < ARRAY_SIZE(int_hwcap_str); i++) + if (int_hwcap_str[i] && (int_hwcap & (1UL << i))) + seq_printf(m, "%s ", int_hwcap_str[i]); seq_puts(m, "\n"); show_cacheinfo(m); } diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S index 52aab0bd84f8..89ea8c213d82 100644 --- a/arch/s390/kernel/reipl.S +++ b/arch/s390/kernel/reipl.S @@ -9,60 +9,66 @@ #include <asm/sigp.h> # -# store_status +# Issue "store status" for the current CPU to its prefix page +# and call passed function afterwards # -# Prerequisites to run this function: -# - Prefix register is set to zero -# - Original prefix register is stored in "dump_prefix_page" -# - Lowcore protection is off +# r2 = Function to be called after store status +# r3 = Parameter for function # ENTRY(store_status) /* Save register one and load save area base */ stg %r1,__LC_SAVE_AREA_RESTART - lghi %r1,SAVE_AREA_BASE /* General purpose registers */ - stmg %r0,%r15,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - lg %r2,__LC_SAVE_AREA_RESTART - stg %r2,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE+8(%r1) + lghi %r1,__LC_GPREGS_SAVE_AREA + stmg %r0,%r15,0(%r1) + mvc 8(8,%r1),__LC_SAVE_AREA_RESTART /* Control registers */ - stctg %c0,%c15,__LC_CREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + lghi %r1,__LC_CREGS_SAVE_AREA + stctg %c0,%c15,0(%r1) /* Access registers */ - stam %a0,%a15,__LC_AREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + lghi %r1,__LC_AREGS_SAVE_AREA + stam %a0,%a15,0(%r1) /* Floating point registers */ - std %f0, 0x00 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f1, 0x08 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f2, 0x10 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f3, 0x18 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f4, 0x20 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f5, 0x28 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f6, 0x30 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f7, 0x38 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f8, 0x40 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f9, 0x48 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f10,0x50 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f11,0x58 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f12,0x60 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f13,0x68 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f14,0x70 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f15,0x78 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + lghi %r1,__LC_FPREGS_SAVE_AREA + std %f0, 0x00(%r1) + std %f1, 0x08(%r1) + std %f2, 0x10(%r1) + std %f3, 0x18(%r1) + std %f4, 0x20(%r1) + std %f5, 0x28(%r1) + std %f6, 0x30(%r1) + std %f7, 0x38(%r1) + std %f8, 0x40(%r1) + std %f9, 0x48(%r1) + std %f10,0x50(%r1) + std %f11,0x58(%r1) + std %f12,0x60(%r1) + std %f13,0x68(%r1) + std %f14,0x70(%r1) + std %f15,0x78(%r1) /* Floating point control register */ - stfpc __LC_FP_CREG_SAVE_AREA-SAVE_AREA_BASE(%r1) + lghi %r1,__LC_FP_CREG_SAVE_AREA + stfpc 0(%r1) /* CPU timer */ - stpt __LC_CPU_TIMER_SAVE_AREA-SAVE_AREA_BASE(%r1) - /* Saved prefix register */ - larl %r2,dump_prefix_page - mvc __LC_PREFIX_SAVE_AREA-SAVE_AREA_BASE(4,%r1),0(%r2) + lghi %r1,__LC_CPU_TIMER_SAVE_AREA + stpt 0(%r1) + /* Store prefix register */ + lghi %r1,__LC_PREFIX_SAVE_AREA + stpx 0(%r1) /* Clock comparator - seven bytes */ - larl %r2,.Lclkcmp - stckc 0(%r2) - mvc __LC_CLOCK_COMP_SAVE_AREA-SAVE_AREA_BASE + 1(7,%r1),1(%r2) + lghi %r1,__LC_CLOCK_COMP_SAVE_AREA + larl %r4,.Lclkcmp + stckc 0(%r4) + mvc 1(7,%r1),1(%r4) /* Program status word */ - epsw %r2,%r3 - st %r2,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 0(%r1) - st %r3,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 4(%r1) - larl %r2,store_status - stg %r2,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 8(%r1) - br %r14 + lghi %r1,__LC_PSW_SAVE_AREA + epsw %r4,%r5 + st %r4,0(%r1) + st %r5,4(%r1) + stg %r2,8(%r1) + lgr %r1,%r2 + lgr %r2,%r3 + br %r1 .section .bss .align 8 @@ -77,9 +83,11 @@ ENTRY(store_status) ENTRY(do_reipl_asm) basr %r13,0 .Lpg0: lpswe .Lnewpsw-.Lpg0(%r13) -.Lpg1: brasl %r14,store_status +.Lpg1: lgr %r3,%r2 + larl %r2,.Lstatus + brasl %r14,store_status - lctlg %c6,%c6,.Lall-.Lpg0(%r13) +.Lstatus: lctlg %c6,%c6,.Lall-.Lpg0(%r13) lgr %r1,%r2 mvc __LC_PGM_NEW_PSW(16),.Lpcnew-.Lpg0(%r13) stsch .Lschib-.Lpg0(%r13) diff --git a/arch/s390/kernel/sclp.c b/arch/s390/kernel/sclp.c index 9fe7781a45cd..d88db40bdf15 100644 --- a/arch/s390/kernel/sclp.c +++ b/arch/s390/kernel/sclp.c @@ -9,7 +9,11 @@ #include <asm/processor.h> #include <asm/sclp.h> +#define EVTYP_VT220MSG_MASK 0x00000040 +#define EVTYP_MSG_MASK 0x40000000 + static char _sclp_work_area[4096] __aligned(PAGE_SIZE); +static bool have_vt220, have_linemode; static void _sclp_wait_int(void) { @@ -68,7 +72,7 @@ static int _sclp_setup(int disable) 0x00, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, - 0x80, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, + 0x80, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; unsigned int *masks; @@ -82,13 +86,13 @@ static int _sclp_setup(int disable) rc = _sclp_servc(0x00780005, _sclp_work_area); if (rc) return rc; - if ((masks[0] & masks[3]) != masks[0] || - (masks[1] & masks[2]) != masks[1]) - return -EIO; + have_vt220 = masks[2] & EVTYP_VT220MSG_MASK; + have_linemode = masks[2] & EVTYP_MSG_MASK; return 0; } -static int _sclp_print(const char *str) +/* Output multi-line text using SCLP Message interface. */ +static void _sclp_print_lm(const char *str) { static unsigned char write_head[] = { /* sccb header */ @@ -143,18 +147,49 @@ static int _sclp_print(const char *str) } while (ch != 0); /* SCLP write data */ - return _sclp_servc(0x00760005, _sclp_work_area); + _sclp_servc(0x00760005, _sclp_work_area); } -int _sclp_print_early(const char *str) +/* Output multi-line text (plus a newline) using SCLP VT220 + * interface. + */ +static void _sclp_print_vt220(const char *str) { - int rc; + static unsigned char const write_head[] = { + /* sccb header */ + 0x00, 0x0e, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /* evbuf header */ + 0x00, 0x06, + 0x1a, 0x00, 0x00, 0x00, + }; + size_t len = strlen(str); - rc = _sclp_setup(0); - if (rc) - return rc; - rc = _sclp_print(str); - if (rc) - return rc; - return _sclp_setup(1); + if (sizeof(write_head) + len >= sizeof(_sclp_work_area)) + len = sizeof(_sclp_work_area) - sizeof(write_head) - 1; + + memcpy(_sclp_work_area, write_head, sizeof(write_head)); + memcpy(_sclp_work_area + sizeof(write_head), str, len); + _sclp_work_area[sizeof(write_head) + len] = '\n'; + + /* Update length fields in evbuf and sccb headers */ + *(unsigned short *)(_sclp_work_area + 8) += len + 1; + *(unsigned short *)(_sclp_work_area + 0) += len + 1; + + /* SCLP write data */ + (void)_sclp_servc(0x00760005, _sclp_work_area); +} + +/* Output one or more lines of text on the SCLP console (VT220 and / + * or line-mode). All lines get terminated; no need for a trailing LF. + */ +void _sclp_print_early(const char *str) +{ + if (_sclp_setup(0) != 0) + return; + if (have_linemode) + _sclp_print_lm(str); + if (have_vt220) + _sclp_print_vt220(str); + _sclp_setup(1); } diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index c837bcacf218..c6878fbbcf13 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -80,6 +80,8 @@ EXPORT_SYMBOL(console_irq); unsigned long elf_hwcap __read_mostly = 0; char elf_platform[ELF_PLATFORM_SIZE]; +unsigned long int_hwcap = 0; + int __initdata memory_end_set; unsigned long __initdata memory_end; unsigned long __initdata max_physmem_end; @@ -97,7 +99,7 @@ unsigned long MODULES_VADDR; unsigned long MODULES_END; /* An array with a pointer to the lowcore of every CPU. */ -struct _lowcore *lowcore_ptr[NR_CPUS]; +struct lowcore *lowcore_ptr[NR_CPUS]; EXPORT_SYMBOL(lowcore_ptr); /* @@ -291,12 +293,12 @@ void *restart_stack __attribute__((__section__(".data"))); static void __init setup_lowcore(void) { - struct _lowcore *lc; + struct lowcore *lc; /* * Setup lowcore for boot cpu */ - BUILD_BUG_ON(sizeof(struct _lowcore) != LC_PAGES * 4096); + BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * 4096); lc = __alloc_bootmem_low(LC_PAGES * PAGE_SIZE, LC_PAGES * PAGE_SIZE, 0); lc->restart_psw.mask = PSW_KERNEL_BITS; lc->restart_psw.addr = @@ -661,15 +663,6 @@ static void __init reserve_kernel(void) #endif } -static void __init reserve_elfcorehdr(void) -{ -#ifdef CONFIG_CRASH_DUMP - if (is_kdump_kernel()) - memblock_reserve(elfcorehdr_addr - OLDMEM_BASE, - PAGE_ALIGN(elfcorehdr_size)); -#endif -} - static void __init setup_memory(void) { struct memblock_region *reg; @@ -793,6 +786,13 @@ static int __init setup_hwcaps(void) strcpy(elf_platform, "z13"); break; } + + /* + * Virtualization support HWCAP_INT_SIE is bit 0. + */ + if (sclp.has_sief2) + int_hwcap |= HWCAP_INT_SIE; + return 0; } arch_initcall(setup_hwcaps); @@ -841,6 +841,11 @@ void __init setup_arch(char **cmdline_p) init_mm.brk = (unsigned long) &_end; parse_early_param(); +#ifdef CONFIG_CRASH_DUMP + /* Deactivate elfcorehdr= kernel parameter */ + elfcorehdr_addr = ELFCORE_ADDR_MAX; +#endif + os_info_init(); setup_ipl(); @@ -849,7 +854,6 @@ void __init setup_arch(char **cmdline_p) reserve_oldmem(); reserve_kernel(); reserve_initrd(); - reserve_elfcorehdr(); memblock_allow_resize(); /* Get information about *all* installed memory */ @@ -870,11 +874,13 @@ void __init setup_arch(char **cmdline_p) check_initrd(); reserve_crashkernel(); +#ifdef CONFIG_CRASH_DUMP /* * Be aware that smp_save_dump_cpus() triggers a system reset. * Therefore CPU and device initialization should be done afterwards. */ smp_save_dump_cpus(); +#endif setup_resources(); setup_vmcoreinfo(); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 9062df575afe..a13468b9a913 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -64,8 +64,9 @@ enum { static DEFINE_PER_CPU(struct cpu *, cpu_device); struct pcpu { - struct _lowcore *lowcore; /* lowcore page(s) for the cpu */ + struct lowcore *lowcore; /* lowcore page(s) for the cpu */ unsigned long ec_mask; /* bit mask for ec_xxx functions */ + unsigned long ec_clk; /* sigp timestamp for ec_xxx */ signed char state; /* physical cpu state */ signed char polarization; /* physical polarization */ u16 address; /* physical cpu address */ @@ -80,6 +81,10 @@ EXPORT_SYMBOL(smp_cpu_mt_shift); unsigned int smp_cpu_mtid; EXPORT_SYMBOL(smp_cpu_mtid); +#ifdef CONFIG_CRASH_DUMP +__vector128 __initdata boot_cpu_vector_save_area[__NUM_VXRS]; +#endif + static unsigned int smp_max_threads __initdata = -1U; static int __init early_nosmt(char *s) @@ -105,8 +110,7 @@ DEFINE_MUTEX(smp_cpu_state_mutex); /* * Signal processor helper functions. */ -static inline int __pcpu_sigp_relax(u16 addr, u8 order, unsigned long parm, - u32 *status) +static inline int __pcpu_sigp_relax(u16 addr, u8 order, unsigned long parm) { int cc; @@ -171,6 +175,7 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit) if (test_and_set_bit(ec_bit, &pcpu->ec_mask)) return; order = pcpu_running(pcpu) ? SIGP_EXTERNAL_CALL : SIGP_EMERGENCY_SIGNAL; + pcpu->ec_clk = get_tod_clock_fast(); pcpu_sigp_retry(pcpu, order, 0); } @@ -180,10 +185,10 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit) static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) { unsigned long async_stack, panic_stack; - struct _lowcore *lc; + struct lowcore *lc; if (pcpu != &pcpu_devices[0]) { - pcpu->lowcore = (struct _lowcore *) + pcpu->lowcore = (struct lowcore *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER); panic_stack = __get_free_page(GFP_KERNEL); @@ -235,7 +240,7 @@ static void pcpu_free_lowcore(struct pcpu *pcpu) static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) { - struct _lowcore *lc = pcpu->lowcore; + struct lowcore *lc = pcpu->lowcore; if (MACHINE_HAS_TLB_LC) cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask); @@ -255,7 +260,7 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk) { - struct _lowcore *lc = pcpu->lowcore; + struct lowcore *lc = pcpu->lowcore; struct thread_info *ti = task_thread_info(tsk); lc->kernel_stack = (unsigned long) task_stack_page(tsk) @@ -271,7 +276,7 @@ static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk) static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data) { - struct _lowcore *lc = pcpu->lowcore; + struct lowcore *lc = pcpu->lowcore; lc->restart_stack = lc->kernel_stack; lc->restart_fn = (unsigned long) func; @@ -286,7 +291,7 @@ static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data) static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *), void *data, unsigned long stack) { - struct _lowcore *lc = lowcore_ptr[pcpu - pcpu_devices]; + struct lowcore *lc = lowcore_ptr[pcpu - pcpu_devices]; unsigned long source_cpu = stap(); __load_psw_mask(PSW_KERNEL_BITS); @@ -538,53 +543,24 @@ EXPORT_SYMBOL(smp_ctl_clear_bit); #ifdef CONFIG_CRASH_DUMP -static void __init __smp_store_cpu_state(struct save_area_ext *sa_ext, - u16 address, int is_boot_cpu) -{ - void *lc = (void *)(unsigned long) store_prefix(); - unsigned long vx_sa; - - if (is_boot_cpu) { - /* Copy the registers of the boot CPU. */ - copy_oldmem_page(1, (void *) &sa_ext->sa, sizeof(sa_ext->sa), - SAVE_AREA_BASE - PAGE_SIZE, 0); - if (MACHINE_HAS_VX) - save_vx_regs_safe(sa_ext->vx_regs); - return; - } - /* Get the registers of a non-boot cpu. */ - __pcpu_sigp_relax(address, SIGP_STOP_AND_STORE_STATUS, 0, NULL); - memcpy_real(&sa_ext->sa, lc + SAVE_AREA_BASE, sizeof(sa_ext->sa)); - if (!MACHINE_HAS_VX) - return; - /* Get the VX registers */ - vx_sa = memblock_alloc(PAGE_SIZE, PAGE_SIZE); - if (!vx_sa) - panic("could not allocate memory for VX save area\n"); - __pcpu_sigp_relax(address, SIGP_STORE_ADDITIONAL_STATUS, vx_sa, NULL); - memcpy(sa_ext->vx_regs, (void *) vx_sa, sizeof(sa_ext->vx_regs)); - memblock_free(vx_sa, PAGE_SIZE); -} - int smp_store_status(int cpu) { - unsigned long vx_sa; - struct pcpu *pcpu; + struct pcpu *pcpu = pcpu_devices + cpu; + unsigned long pa; - pcpu = pcpu_devices + cpu; - if (__pcpu_sigp_relax(pcpu->address, SIGP_STOP_AND_STORE_STATUS, - 0, NULL) != SIGP_CC_ORDER_CODE_ACCEPTED) + pa = __pa(&pcpu->lowcore->floating_pt_save_area); + if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS, + pa) != SIGP_CC_ORDER_CODE_ACCEPTED) return -EIO; if (!MACHINE_HAS_VX) return 0; - vx_sa = __pa(pcpu->lowcore->vector_save_area_addr); - __pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS, - vx_sa, NULL); + pa = __pa(pcpu->lowcore->vector_save_area_addr); + if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS, + pa) != SIGP_CC_ORDER_CODE_ACCEPTED) + return -EIO; return 0; } -#endif /* CONFIG_CRASH_DUMP */ - /* * Collect CPU state of the previous, crashed system. * There are four cases: @@ -593,7 +569,7 @@ int smp_store_status(int cpu) * The state for all CPUs except the boot CPU needs to be collected * with sigp stop-and-store-status. The boot CPU state is located in * the absolute lowcore of the memory stored in the HSA. The zcore code - * will allocate the save area and copy the boot CPU state from the HSA. + * will copy the boot CPU state from the HSA. * 2) stand-alone kdump for SCSI (zfcp dump with swapped memory) * condition: OLDMEM_BASE != NULL && ipl_info.type == IPL_TYPE_FCP_DUMP * The state for all CPUs except the boot CPU needs to be collected @@ -608,55 +584,78 @@ int smp_store_status(int cpu) * stored the registers of the boot CPU in the memory of the old system. * 4) kdump and the old kernel stored the CPU state * condition: OLDMEM_BASE != NULL && is_kdump_kernel() - * The state of all CPUs is stored in ELF sections in the memory of the - * old system. The ELF sections are picked up by the crash_dump code - * via elfcorehdr_addr. + * This case does not exist for s390 anymore, setup_arch explicitly + * deactivates the elfcorehdr= kernel parameter */ +static __init void smp_save_cpu_vxrs(struct save_area *sa, u16 addr, + bool is_boot_cpu, unsigned long page) +{ + __vector128 *vxrs = (__vector128 *) page; + + if (is_boot_cpu) + vxrs = boot_cpu_vector_save_area; + else + __pcpu_sigp_relax(addr, SIGP_STORE_ADDITIONAL_STATUS, page); + save_area_add_vxrs(sa, vxrs); +} + +static __init void smp_save_cpu_regs(struct save_area *sa, u16 addr, + bool is_boot_cpu, unsigned long page) +{ + void *regs = (void *) page; + + if (is_boot_cpu) + copy_oldmem_kernel(regs, (void *) __LC_FPREGS_SAVE_AREA, 512); + else + __pcpu_sigp_relax(addr, SIGP_STORE_STATUS_AT_ADDRESS, page); + save_area_add_regs(sa, regs); +} + void __init smp_save_dump_cpus(void) { -#ifdef CONFIG_CRASH_DUMP - int addr, cpu, boot_cpu_addr, max_cpu_addr; - struct save_area_ext *sa_ext; + int addr, boot_cpu_addr, max_cpu_addr; + struct save_area *sa; + unsigned long page; bool is_boot_cpu; - if (is_kdump_kernel()) - /* Previous system stored the CPU states. Nothing to do. */ - return; if (!(OLDMEM_BASE || ipl_info.type == IPL_TYPE_FCP_DUMP)) /* No previous system present, normal boot. */ return; + /* Allocate a page as dumping area for the store status sigps */ + page = memblock_alloc_base(PAGE_SIZE, PAGE_SIZE, 1UL << 31); + if (!page) + panic("could not allocate memory for save area\n"); /* Set multi-threading state to the previous system. */ pcpu_set_smt(sclp.mtid_prev); - max_cpu_addr = SCLP_MAX_CORES << sclp.mtid_prev; - for (cpu = 0, addr = 0; addr <= max_cpu_addr; addr++) { - if (__pcpu_sigp_relax(addr, SIGP_SENSE, 0, NULL) == - SIGP_CC_NOT_OPERATIONAL) - continue; - cpu += 1; - } - dump_save_areas.areas = (void *)memblock_alloc(sizeof(void *) * cpu, 8); - dump_save_areas.count = cpu; boot_cpu_addr = stap(); - for (cpu = 0, addr = 0; addr <= max_cpu_addr; addr++) { - if (__pcpu_sigp_relax(addr, SIGP_SENSE, 0, NULL) == + max_cpu_addr = SCLP_MAX_CORES << sclp.mtid_prev; + for (addr = 0; addr <= max_cpu_addr; addr++) { + if (__pcpu_sigp_relax(addr, SIGP_SENSE, 0) == SIGP_CC_NOT_OPERATIONAL) continue; - sa_ext = (void *) memblock_alloc(sizeof(*sa_ext), 8); - dump_save_areas.areas[cpu] = sa_ext; - if (!sa_ext) - panic("could not allocate memory for save area\n"); is_boot_cpu = (addr == boot_cpu_addr); - cpu += 1; - if (is_boot_cpu && !OLDMEM_BASE) - /* Skip boot CPU for standard zfcp dump. */ - continue; - /* Get state for this CPU. */ - __smp_store_cpu_state(sa_ext, addr, is_boot_cpu); + /* Allocate save area */ + sa = save_area_alloc(is_boot_cpu); + if (!sa) + panic("could not allocate memory for save area\n"); + if (MACHINE_HAS_VX) + /* Get the vector registers */ + smp_save_cpu_vxrs(sa, addr, is_boot_cpu, page); + /* + * For a zfcp dump OLDMEM_BASE == NULL and the registers + * of the boot CPU are stored in the HSA. To retrieve + * these registers an SCLP request is required which is + * done by drivers/s390/char/zcore.c:init_cpu_info() + */ + if (!is_boot_cpu || OLDMEM_BASE) + /* Get the CPU registers */ + smp_save_cpu_regs(sa, addr, is_boot_cpu, page); } + memblock_free(page, PAGE_SIZE); diag308_reset(); pcpu_set_smt(0); -#endif /* CONFIG_CRASH_DUMP */ } +#endif /* CONFIG_CRASH_DUMP */ void smp_cpu_set_polarization(int cpu, int val) { @@ -680,7 +679,7 @@ static struct sclp_core_info *smp_get_core_info(void) for (address = 0; address < (SCLP_MAX_CORES << smp_cpu_mt_shift); address += (1U << smp_cpu_mt_shift)) { - if (__pcpu_sigp_relax(address, SIGP_SENSE, 0, NULL) == + if (__pcpu_sigp_relax(address, SIGP_SENSE, 0) == SIGP_CC_NOT_OPERATIONAL) continue; info->core[info->configured].core_id = @@ -924,7 +923,7 @@ void __init smp_prepare_boot_cpu(void) pcpu->state = CPU_STATE_CONFIGURED; pcpu->address = stap(); - pcpu->lowcore = (struct _lowcore *)(unsigned long) store_prefix(); + pcpu->lowcore = (struct lowcore *)(unsigned long) store_prefix(); S390_lowcore.percpu_offset = __per_cpu_offset[0]; smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN); set_cpu_present(0, true); diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index 99babea026ca..f7dba3887a54 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -111,8 +111,7 @@ static void stsi_1_1_1(struct seq_file *m, struct sysinfo_1_1_1 *info) static void stsi_15_1_x(struct seq_file *m, struct sysinfo_15_1_x *info) { - static int max_mnest; - int i, rc; + int i; seq_putc(m, '\n'); if (!MACHINE_HAS_TOPOLOGY) @@ -123,7 +122,7 @@ static void stsi_15_1_x(struct seq_file *m, struct sysinfo_15_1_x *info) for (i = 0; i < TOPOLOGY_NR_MAG; i++) seq_printf(m, " %d", info->mag[i]); seq_putc(m, '\n'); -#ifdef CONFIG_SCHED_MC +#ifdef CONFIG_SCHED_TOPOLOGY store_topology(info); seq_printf(m, "CPU Topology SW: "); for (i = 0; i < TOPOLOGY_NR_MAG; i++) @@ -145,6 +144,10 @@ static void stsi_1_2_2(struct seq_file *m, struct sysinfo_1_2_2 *info) seq_printf(m, "CPUs Configured: %d\n", info->cpus_configured); seq_printf(m, "CPUs Standby: %d\n", info->cpus_standby); seq_printf(m, "CPUs Reserved: %d\n", info->cpus_reserved); + if (info->mt_installed) { + seq_printf(m, "CPUs G-MTID: %d\n", info->mt_gtid); + seq_printf(m, "CPUs S-MTID: %d\n", info->mt_stid); + } /* * Sigh 2. According to the specification the alternate * capability field is a 32 bit floating point number @@ -194,13 +197,10 @@ static void stsi_2_2_2(struct seq_file *m, struct sysinfo_2_2_2 *info) seq_printf(m, "LPAR CPUs Reserved: %d\n", info->cpus_reserved); seq_printf(m, "LPAR CPUs Dedicated: %d\n", info->cpus_dedicated); seq_printf(m, "LPAR CPUs Shared: %d\n", info->cpus_shared); - if (info->mt_installed & 0x80) { - seq_printf(m, "LPAR CPUs G-MTID: %d\n", - info->mt_general & 0x1f); - seq_printf(m, "LPAR CPUs S-MTID: %d\n", - info->mt_installed & 0x1f); - seq_printf(m, "LPAR CPUs PS-MTID: %d\n", - info->mt_psmtid & 0x1f); + if (info->mt_installed) { + seq_printf(m, "LPAR CPUs G-MTID: %d\n", info->mt_gtid); + seq_printf(m, "LPAR CPUs S-MTID: %d\n", info->mt_stid); + seq_printf(m, "LPAR CPUs PS-MTID: %d\n", info->mt_psmtid); } } diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 1b18118bbc06..d69d648759c9 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -260,11 +260,8 @@ void vector_exception(struct pt_regs *regs) void data_exception(struct pt_regs *regs) { - __u16 __user *location; int signal = 0; - location = get_trap_ip(regs); - save_fpu_regs(); if (current->thread.fpu.fpc & FPC_DXC_MASK) signal = SIGFPE; diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 59eddb0e1a3e..94495cac8be3 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -80,7 +80,7 @@ struct vdso_data *vdso_data = &vdso_data_store.data; /* * Setup vdso data page. */ -static void vdso_init_data(struct vdso_data *vd) +static void __init vdso_init_data(struct vdso_data *vd) { vd->ectg_available = test_facility(31); } @@ -90,9 +90,10 @@ static void vdso_init_data(struct vdso_data *vd) */ #define SEGMENT_ORDER 2 -int vdso_alloc_per_cpu(struct _lowcore *lowcore) +int vdso_alloc_per_cpu(struct lowcore *lowcore) { unsigned long segment_table, page_table, page_frame; + struct vdso_per_cpu_data *vd; u32 *psal, *aste; int i; @@ -107,6 +108,12 @@ int vdso_alloc_per_cpu(struct _lowcore *lowcore) if (!segment_table || !page_table || !page_frame) goto out; + /* Initialize per-cpu vdso data page */ + vd = (struct vdso_per_cpu_data *) page_frame; + vd->cpu_nr = lowcore->cpu_nr; + vd->node_id = cpu_to_node(vd->cpu_nr); + + /* Set up access register mode page table */ clear_table((unsigned long *) segment_table, _SEGMENT_ENTRY_EMPTY, PAGE_SIZE << SEGMENT_ORDER); clear_table((unsigned long *) page_table, _PAGE_INVALID, @@ -138,7 +145,7 @@ out: return -ENOMEM; } -void vdso_free_per_cpu(struct _lowcore *lowcore) +void vdso_free_per_cpu(struct lowcore *lowcore) { unsigned long segment_table, page_table, page_frame; u32 *psal, *aste; @@ -163,7 +170,7 @@ static void vdso_init_cr5(void) if (!vdso_enabled) return; - cr5 = offsetof(struct _lowcore, paste); + cr5 = offsetof(struct lowcore, paste); __ctl_load(cr5, 5, 5); } @@ -299,8 +306,6 @@ static int __init vdso_init(void) get_page(virt_to_page(vdso_data)); - smp_mb(); - return 0; } early_initcall(vdso_init); diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile index ee8a18e50a25..f9c459586649 100644 --- a/arch/s390/kernel/vdso32/Makefile +++ b/arch/s390/kernel/vdso32/Makefile @@ -1,6 +1,6 @@ # List of files in the vdso, has to be asm only for now -obj-vdso32 = gettimeofday.o clock_getres.o clock_gettime.o note.o +obj-vdso32 = gettimeofday.o clock_getres.o clock_gettime.o note.o getcpu.o # Build rules diff --git a/arch/s390/kernel/vdso32/getcpu.S b/arch/s390/kernel/vdso32/getcpu.S new file mode 100644 index 000000000000..c1ed0b72030f --- /dev/null +++ b/arch/s390/kernel/vdso32/getcpu.S @@ -0,0 +1,43 @@ +/* + * Userland implementation of getcpu() for 32 bits processes in a + * s390 kernel for use in the vDSO + * + * Copyright IBM Corp. 2016 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ +#include <asm/vdso.h> +#include <asm/asm-offsets.h> + + .text + .align 4 + .globl __kernel_getcpu + .type __kernel_getcpu,@function +__kernel_getcpu: + .cfi_startproc + ear %r1,%a4 + lhi %r4,1 + sll %r4,24 + sar %a4,%r4 + la %r4,0 + epsw %r0,0 + sacf 512 + l %r5,__VDSO_CPU_NR(%r4) + l %r4,__VDSO_NODE_ID(%r4) + tml %r0,0x4000 + jo 1f + tml %r0,0x8000 + jno 0f + sacf 256 + j 1f +0: sacf 0 +1: sar %a4,%r1 + ltr %r2,%r2 + jz 2f + st %r5,0(%r2) +2: ltr %r3,%r3 + jz 3f + st %r4,0(%r3) +3: lhi %r2,0 + br %r14 + .cfi_endproc + .size __kernel_getcpu,.-__kernel_getcpu diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S index a8c379fa1247..8f048c2d6d13 100644 --- a/arch/s390/kernel/vdso32/vdso32.lds.S +++ b/arch/s390/kernel/vdso32/vdso32.lds.S @@ -132,6 +132,7 @@ VERSION __kernel_gettimeofday; __kernel_clock_gettime; __kernel_clock_getres; + __kernel_getcpu; local: *; }; diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index c4b03f9ed228..058659c1b8cf 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -1,6 +1,6 @@ # List of files in the vdso, has to be asm only for now -obj-vdso64 = gettimeofday.o clock_getres.o clock_gettime.o note.o +obj-vdso64 = gettimeofday.o clock_getres.o clock_gettime.o note.o getcpu.o # Build rules diff --git a/arch/s390/kernel/vdso64/getcpu.S b/arch/s390/kernel/vdso64/getcpu.S new file mode 100644 index 000000000000..4cbe98291931 --- /dev/null +++ b/arch/s390/kernel/vdso64/getcpu.S @@ -0,0 +1,42 @@ +/* + * Userland implementation of getcpu() for 64 bits processes in a + * s390 kernel for use in the vDSO + * + * Copyright IBM Corp. 2016 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ +#include <asm/vdso.h> +#include <asm/asm-offsets.h> + + .text + .align 4 + .globl __kernel_getcpu + .type __kernel_getcpu,@function +__kernel_getcpu: + .cfi_startproc + ear %r1,%a4 + llilh %r4,0x0100 + sar %a4,%r4 + la %r4,0 + epsw %r0,0 + sacf 512 + l %r5,__VDSO_CPU_NR(%r4) + l %r4,__VDSO_NODE_ID(%r4) + tml %r0,0x4000 + jo 1f + tml %r0,0x8000 + jno 0f + sacf 256 + j 1f +0: sacf 0 +1: sar %a4,%r1 + ltgr %r2,%r2 + jz 2f + st %r5,0(%r2) +2: ltgr %r3,%r3 + jz 3f + st %r4,0(%r3) +3: lghi %r2,0 + br %r14 + .cfi_endproc + .size __kernel_getcpu,.-__kernel_getcpu diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S index 9f5979d102a9..f35455d497fe 100644 --- a/arch/s390/kernel/vdso64/vdso64.lds.S +++ b/arch/s390/kernel/vdso64/vdso64.lds.S @@ -132,6 +132,7 @@ VERSION __kernel_gettimeofday; __kernel_clock_gettime; __kernel_clock_getres; + __kernel_getcpu; local: *; }; diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 5fbfb88f8477..05f7de9869a9 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -155,10 +155,8 @@ static int __diag_time_slice_end(struct kvm_vcpu *vcpu) static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu) { - struct kvm *kvm = vcpu->kvm; struct kvm_vcpu *tcpu; int tid; - int i; tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; vcpu->stat.diagnose_9c++; @@ -167,12 +165,9 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu) if (tid == vcpu->vcpu_id) return 0; - kvm_for_each_vcpu(i, tcpu, kvm) - if (tcpu->vcpu_id == tid) { - kvm_vcpu_yield_to(tcpu); - break; - } - + tcpu = kvm_get_vcpu_by_id(vcpu->kvm, tid); + if (tcpu) + kvm_vcpu_yield_to(tcpu); return 0; } diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index a7559f7207df..d30db40437dc 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -259,10 +259,14 @@ struct aste { int ipte_lock_held(struct kvm_vcpu *vcpu) { - union ipte_control *ic = &vcpu->kvm->arch.sca->ipte_control; + if (vcpu->arch.sie_block->eca & 1) { + int rc; - if (vcpu->arch.sie_block->eca & 1) - return ic->kh != 0; + read_lock(&vcpu->kvm->arch.sca_lock); + rc = kvm_s390_get_ipte_control(vcpu->kvm)->kh != 0; + read_unlock(&vcpu->kvm->arch.sca_lock); + return rc; + } return vcpu->kvm->arch.ipte_lock_count != 0; } @@ -274,16 +278,20 @@ static void ipte_lock_simple(struct kvm_vcpu *vcpu) vcpu->kvm->arch.ipte_lock_count++; if (vcpu->kvm->arch.ipte_lock_count > 1) goto out; - ic = &vcpu->kvm->arch.sca->ipte_control; +retry: + read_lock(&vcpu->kvm->arch.sca_lock); + ic = kvm_s390_get_ipte_control(vcpu->kvm); do { old = READ_ONCE(*ic); - while (old.k) { + if (old.k) { + read_unlock(&vcpu->kvm->arch.sca_lock); cond_resched(); - old = READ_ONCE(*ic); + goto retry; } new = old; new.k = 1; } while (cmpxchg(&ic->val, old.val, new.val) != old.val); + read_unlock(&vcpu->kvm->arch.sca_lock); out: mutex_unlock(&vcpu->kvm->arch.ipte_mutex); } @@ -296,12 +304,14 @@ static void ipte_unlock_simple(struct kvm_vcpu *vcpu) vcpu->kvm->arch.ipte_lock_count--; if (vcpu->kvm->arch.ipte_lock_count) goto out; - ic = &vcpu->kvm->arch.sca->ipte_control; + read_lock(&vcpu->kvm->arch.sca_lock); + ic = kvm_s390_get_ipte_control(vcpu->kvm); do { old = READ_ONCE(*ic); new = old; new.k = 0; } while (cmpxchg(&ic->val, old.val, new.val) != old.val); + read_unlock(&vcpu->kvm->arch.sca_lock); wake_up(&vcpu->kvm->arch.ipte_wq); out: mutex_unlock(&vcpu->kvm->arch.ipte_mutex); @@ -311,24 +321,29 @@ static void ipte_lock_siif(struct kvm_vcpu *vcpu) { union ipte_control old, new, *ic; - ic = &vcpu->kvm->arch.sca->ipte_control; +retry: + read_lock(&vcpu->kvm->arch.sca_lock); + ic = kvm_s390_get_ipte_control(vcpu->kvm); do { old = READ_ONCE(*ic); - while (old.kg) { + if (old.kg) { + read_unlock(&vcpu->kvm->arch.sca_lock); cond_resched(); - old = READ_ONCE(*ic); + goto retry; } new = old; new.k = 1; new.kh++; } while (cmpxchg(&ic->val, old.val, new.val) != old.val); + read_unlock(&vcpu->kvm->arch.sca_lock); } static void ipte_unlock_siif(struct kvm_vcpu *vcpu) { union ipte_control old, new, *ic; - ic = &vcpu->kvm->arch.sca->ipte_control; + read_lock(&vcpu->kvm->arch.sca_lock); + ic = kvm_s390_get_ipte_control(vcpu->kvm); do { old = READ_ONCE(*ic); new = old; @@ -336,6 +351,7 @@ static void ipte_unlock_siif(struct kvm_vcpu *vcpu) if (!new.kh) new.k = 0; } while (cmpxchg(&ic->val, old.val, new.val) != old.val); + read_unlock(&vcpu->kvm->arch.sca_lock); if (!new.kh) wake_up(&vcpu->kvm->arch.ipte_wq); } diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index b4a5aa110cec..d53c10753c46 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -54,9 +54,6 @@ void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc) static int handle_noop(struct kvm_vcpu *vcpu) { switch (vcpu->arch.sie_block->icptcode) { - case 0x0: - vcpu->stat.exit_null++; - break; case 0x10: vcpu->stat.exit_external_request++; break; @@ -338,8 +335,10 @@ static int handle_partial_execution(struct kvm_vcpu *vcpu) int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu) { + if (kvm_is_ucontrol(vcpu->kvm)) + return -EOPNOTSUPP; + switch (vcpu->arch.sie_block->icptcode) { - case 0x00: case 0x10: case 0x18: return handle_noop(vcpu); diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 6a75352f453c..f88ca72c3a77 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -34,6 +34,106 @@ #define PFAULT_DONE 0x0680 #define VIRTIO_PARAM 0x0d00 +/* handle external calls via sigp interpretation facility */ +static int sca_ext_call_pending(struct kvm_vcpu *vcpu, int *src_id) +{ + int c, scn; + + if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND)) + return 0; + + read_lock(&vcpu->kvm->arch.sca_lock); + if (vcpu->kvm->arch.use_esca) { + struct esca_block *sca = vcpu->kvm->arch.sca; + union esca_sigp_ctrl sigp_ctrl = + sca->cpu[vcpu->vcpu_id].sigp_ctrl; + + c = sigp_ctrl.c; + scn = sigp_ctrl.scn; + } else { + struct bsca_block *sca = vcpu->kvm->arch.sca; + union bsca_sigp_ctrl sigp_ctrl = + sca->cpu[vcpu->vcpu_id].sigp_ctrl; + + c = sigp_ctrl.c; + scn = sigp_ctrl.scn; + } + read_unlock(&vcpu->kvm->arch.sca_lock); + + if (src_id) + *src_id = scn; + + return c; +} + +static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id) +{ + int expect, rc; + + read_lock(&vcpu->kvm->arch.sca_lock); + if (vcpu->kvm->arch.use_esca) { + struct esca_block *sca = vcpu->kvm->arch.sca; + union esca_sigp_ctrl *sigp_ctrl = + &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); + union esca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl; + + new_val.scn = src_id; + new_val.c = 1; + old_val.c = 0; + + expect = old_val.value; + rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value); + } else { + struct bsca_block *sca = vcpu->kvm->arch.sca; + union bsca_sigp_ctrl *sigp_ctrl = + &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); + union bsca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl; + + new_val.scn = src_id; + new_val.c = 1; + old_val.c = 0; + + expect = old_val.value; + rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value); + } + read_unlock(&vcpu->kvm->arch.sca_lock); + + if (rc != expect) { + /* another external call is pending */ + return -EBUSY; + } + atomic_or(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags); + return 0; +} + +static void sca_clear_ext_call(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + int rc, expect; + + atomic_andnot(CPUSTAT_ECALL_PEND, li->cpuflags); + read_lock(&vcpu->kvm->arch.sca_lock); + if (vcpu->kvm->arch.use_esca) { + struct esca_block *sca = vcpu->kvm->arch.sca; + union esca_sigp_ctrl *sigp_ctrl = + &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); + union esca_sigp_ctrl old = *sigp_ctrl; + + expect = old.value; + rc = cmpxchg(&sigp_ctrl->value, old.value, 0); + } else { + struct bsca_block *sca = vcpu->kvm->arch.sca; + union bsca_sigp_ctrl *sigp_ctrl = + &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); + union bsca_sigp_ctrl old = *sigp_ctrl; + + expect = old.value; + rc = cmpxchg(&sigp_ctrl->value, old.value, 0); + } + read_unlock(&vcpu->kvm->arch.sca_lock); + WARN_ON(rc != expect); /* cannot clear? */ +} + int psw_extint_disabled(struct kvm_vcpu *vcpu) { return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT); @@ -399,9 +499,9 @@ static int __must_check __deliver_restart(struct kvm_vcpu *vcpu) trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0); rc = write_guest_lc(vcpu, - offsetof(struct _lowcore, restart_old_psw), + offsetof(struct lowcore, restart_old_psw), &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= read_guest_lc(vcpu, offsetof(struct _lowcore, restart_psw), + rc |= read_guest_lc(vcpu, offsetof(struct lowcore, restart_psw), &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); clear_bit(IRQ_PEND_RESTART, &li->pending_irqs); return rc ? -EFAULT : 0; @@ -792,13 +892,11 @@ static const deliver_irq_t deliver_irq_funcs[] = { int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - uint8_t sigp_ctrl = vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl; if (!sclp.has_sigpif) return test_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs); - return (sigp_ctrl & SIGP_CTRL_C) && - (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND); + return sca_ext_call_pending(vcpu, NULL); } int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop) @@ -909,9 +1007,7 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu) memset(&li->irq, 0, sizeof(li->irq)); spin_unlock(&li->lock); - /* clear pending external calls set by sigp interpretation facility */ - atomic_andnot(CPUSTAT_ECALL_PEND, li->cpuflags); - vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl = 0; + sca_clear_ext_call(vcpu); } int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) @@ -1003,21 +1099,6 @@ static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) return 0; } -static int __inject_extcall_sigpif(struct kvm_vcpu *vcpu, uint16_t src_id) -{ - unsigned char new_val, old_val; - uint8_t *sigp_ctrl = &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl; - - new_val = SIGP_CTRL_C | (src_id & SIGP_CTRL_SCN_MASK); - old_val = *sigp_ctrl & ~SIGP_CTRL_C; - if (cmpxchg(sigp_ctrl, old_val, new_val) != old_val) { - /* another external call is pending */ - return -EBUSY; - } - atomic_or(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags); - return 0; -} - static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; @@ -1034,7 +1115,7 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) return -EINVAL; if (sclp.has_sigpif) - return __inject_extcall_sigpif(vcpu, src_id); + return sca_inject_ext_call(vcpu, src_id); if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs)) return -EBUSY; @@ -2203,7 +2284,7 @@ static void store_local_irq(struct kvm_s390_local_interrupt *li, int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len) { - uint8_t sigp_ctrl = vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl; + int scn; unsigned long sigp_emerg_pending[BITS_TO_LONGS(KVM_MAX_VCPUS)]; struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; unsigned long pending_irqs; @@ -2243,14 +2324,12 @@ int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len) } } - if ((sigp_ctrl & SIGP_CTRL_C) && - (atomic_read(&vcpu->arch.sie_block->cpuflags) & - CPUSTAT_ECALL_PEND)) { + if (sca_ext_call_pending(vcpu, &scn)) { if (n + sizeof(irq) > len) return -ENOBUFS; memset(&irq, 0, sizeof(irq)); irq.type = KVM_S390_INT_EXTERNAL_CALL; - irq.u.extcall.code = sigp_ctrl & SIGP_CTRL_SCN_MASK; + irq.u.extcall.code = scn; if (copy_to_user(&buf[n], &irq, sizeof(irq))) return -EFAULT; n += sizeof(irq); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 846589281b04..835d60bedb54 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -246,7 +246,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) break; case KVM_CAP_NR_VCPUS: case KVM_CAP_MAX_VCPUS: - r = KVM_MAX_VCPUS; + r = sclp.has_esca ? KVM_S390_ESCA_CPU_SLOTS + : KVM_S390_BSCA_CPU_SLOTS; break; case KVM_CAP_NR_MEMSLOTS: r = KVM_USER_MEM_SLOTS; @@ -257,6 +258,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_VECTOR_REGISTERS: r = MACHINE_HAS_VX; break; + case KVM_CAP_S390_RI: + r = test_facility(64); + break; default: r = 0; } @@ -283,6 +287,8 @@ static void kvm_s390_sync_dirty_log(struct kvm *kvm, } /* Section: vm related */ +static void sca_del_vcpu(struct kvm_vcpu *vcpu); + /* * Get (and clear) the dirty memory log for a memory slot. */ @@ -355,6 +361,20 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s", r ? "(not available)" : "(success)"); break; + case KVM_CAP_S390_RI: + r = -EINVAL; + mutex_lock(&kvm->lock); + if (atomic_read(&kvm->online_vcpus)) { + r = -EBUSY; + } else if (test_facility(64)) { + set_kvm_facility(kvm->arch.model.fac->mask, 64); + set_kvm_facility(kvm->arch.model.fac->list, 64); + r = 0; + } + mutex_unlock(&kvm->lock); + VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s", + r ? "(not available)" : "(success)"); + break; case KVM_CAP_S390_USER_STSI: VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI"); kvm->arch.user_stsi = 1; @@ -375,8 +395,8 @@ static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *att case KVM_S390_VM_MEM_LIMIT_SIZE: ret = 0; VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes", - kvm->arch.gmap->asce_end); - if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr)) + kvm->arch.mem_limit); + if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr)) ret = -EFAULT; break; default: @@ -428,9 +448,17 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att if (get_user(new_limit, (u64 __user *)attr->addr)) return -EFAULT; - if (new_limit > kvm->arch.gmap->asce_end) + if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT && + new_limit > kvm->arch.mem_limit) return -E2BIG; + if (!new_limit) + return -EINVAL; + + /* gmap_alloc takes last usable address */ + if (new_limit != KVM_S390_NO_MEM_LIMIT) + new_limit -= 1; + ret = -EBUSY; mutex_lock(&kvm->lock); if (atomic_read(&kvm->online_vcpus) == 0) { @@ -447,7 +475,9 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att } } mutex_unlock(&kvm->lock); - VM_EVENT(kvm, 3, "SET: max guest memory: %lu bytes", new_limit); + VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit); + VM_EVENT(kvm, 3, "New guest asce: 0x%pK", + (void *) kvm->arch.gmap->asce); break; } default: @@ -1024,7 +1054,7 @@ static int kvm_s390_apxa_installed(void) u8 config[128]; int cc; - if (test_facility(2) && test_facility(12)) { + if (test_facility(12)) { cc = kvm_s390_query_ap_config(config); if (cc) @@ -1075,6 +1105,15 @@ static int kvm_s390_crypto_init(struct kvm *kvm) return 0; } +static void sca_dispose(struct kvm *kvm) +{ + if (kvm->arch.use_esca) + free_pages_exact(kvm->arch.sca, sizeof(struct esca_block)); + else + free_page((unsigned long)(kvm->arch.sca)); + kvm->arch.sca = NULL; +} + int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { int i, rc; @@ -1098,14 +1137,17 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) rc = -ENOMEM; - kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL); + kvm->arch.use_esca = 0; /* start with basic SCA */ + rwlock_init(&kvm->arch.sca_lock); + kvm->arch.sca = (struct bsca_block *) get_zeroed_page(GFP_KERNEL); if (!kvm->arch.sca) goto out_err; spin_lock(&kvm_lock); sca_offset += 16; - if (sca_offset + sizeof(struct sca_block) > PAGE_SIZE) + if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE) sca_offset = 0; - kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset); + kvm->arch.sca = (struct bsca_block *) + ((char *) kvm->arch.sca + sca_offset); spin_unlock(&kvm_lock); sprintf(debug_name, "kvm-%u", current->pid); @@ -1157,8 +1199,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (type & KVM_VM_S390_UCONTROL) { kvm->arch.gmap = NULL; + kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT; } else { - kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1); + if (sclp.hamax == U64_MAX) + kvm->arch.mem_limit = TASK_MAX_SIZE; + else + kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE, + sclp.hamax + 1); + kvm->arch.gmap = gmap_alloc(current->mm, kvm->arch.mem_limit - 1); if (!kvm->arch.gmap) goto out_err; kvm->arch.gmap->private = kvm; @@ -1170,14 +1218,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.epoch = 0; spin_lock_init(&kvm->arch.start_stop_lock); - KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid); + KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid); return 0; out_err: kfree(kvm->arch.crypto.crycb); free_page((unsigned long)kvm->arch.model.fac); debug_unregister(kvm->arch.dbf); - free_page((unsigned long)(kvm->arch.sca)); + sca_dispose(kvm); KVM_EVENT(3, "creation of vm failed: %d", rc); return rc; } @@ -1188,14 +1236,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id); kvm_s390_clear_local_irqs(vcpu); kvm_clear_async_pf_completion_queue(vcpu); - if (!kvm_is_ucontrol(vcpu->kvm)) { - clear_bit(63 - vcpu->vcpu_id, - (unsigned long *) &vcpu->kvm->arch.sca->mcn); - if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda == - (__u64) vcpu->arch.sie_block) - vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0; - } - smp_mb(); + if (!kvm_is_ucontrol(vcpu->kvm)) + sca_del_vcpu(vcpu); if (kvm_is_ucontrol(vcpu->kvm)) gmap_free(vcpu->arch.gmap); @@ -1228,14 +1270,14 @@ void kvm_arch_destroy_vm(struct kvm *kvm) { kvm_free_vcpus(kvm); free_page((unsigned long)kvm->arch.model.fac); - free_page((unsigned long)(kvm->arch.sca)); + sca_dispose(kvm); debug_unregister(kvm->arch.dbf); kfree(kvm->arch.crypto.crycb); if (!kvm_is_ucontrol(kvm)) gmap_free(kvm->arch.gmap); kvm_s390_destroy_adapters(kvm); kvm_s390_clear_float_irqs(kvm); - KVM_EVENT(3, "vm 0x%p destroyed", kvm); + KVM_EVENT(3, "vm 0x%pK destroyed", kvm); } /* Section: vcpu related */ @@ -1249,6 +1291,117 @@ static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu) return 0; } +static void sca_del_vcpu(struct kvm_vcpu *vcpu) +{ + read_lock(&vcpu->kvm->arch.sca_lock); + if (vcpu->kvm->arch.use_esca) { + struct esca_block *sca = vcpu->kvm->arch.sca; + + clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); + sca->cpu[vcpu->vcpu_id].sda = 0; + } else { + struct bsca_block *sca = vcpu->kvm->arch.sca; + + clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); + sca->cpu[vcpu->vcpu_id].sda = 0; + } + read_unlock(&vcpu->kvm->arch.sca_lock); +} + +static void sca_add_vcpu(struct kvm_vcpu *vcpu) +{ + read_lock(&vcpu->kvm->arch.sca_lock); + if (vcpu->kvm->arch.use_esca) { + struct esca_block *sca = vcpu->kvm->arch.sca; + + sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; + vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); + vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU; + vcpu->arch.sie_block->ecb2 |= 0x04U; + set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); + } else { + struct bsca_block *sca = vcpu->kvm->arch.sca; + + sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; + vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); + vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; + set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); + } + read_unlock(&vcpu->kvm->arch.sca_lock); +} + +/* Basic SCA to Extended SCA data copy routines */ +static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s) +{ + d->sda = s->sda; + d->sigp_ctrl.c = s->sigp_ctrl.c; + d->sigp_ctrl.scn = s->sigp_ctrl.scn; +} + +static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s) +{ + int i; + + d->ipte_control = s->ipte_control; + d->mcn[0] = s->mcn; + for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++) + sca_copy_entry(&d->cpu[i], &s->cpu[i]); +} + +static int sca_switch_to_extended(struct kvm *kvm) +{ + struct bsca_block *old_sca = kvm->arch.sca; + struct esca_block *new_sca; + struct kvm_vcpu *vcpu; + unsigned int vcpu_idx; + u32 scaol, scaoh; + + new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO); + if (!new_sca) + return -ENOMEM; + + scaoh = (u32)((u64)(new_sca) >> 32); + scaol = (u32)(u64)(new_sca) & ~0x3fU; + + kvm_s390_vcpu_block_all(kvm); + write_lock(&kvm->arch.sca_lock); + + sca_copy_b_to_e(new_sca, old_sca); + + kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) { + vcpu->arch.sie_block->scaoh = scaoh; + vcpu->arch.sie_block->scaol = scaol; + vcpu->arch.sie_block->ecb2 |= 0x04U; + } + kvm->arch.sca = new_sca; + kvm->arch.use_esca = 1; + + write_unlock(&kvm->arch.sca_lock); + kvm_s390_vcpu_unblock_all(kvm); + + free_page((unsigned long)old_sca); + + VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)", + old_sca, kvm->arch.sca); + return 0; +} + +static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id) +{ + int rc; + + if (id < KVM_S390_BSCA_CPU_SLOTS) + return true; + if (!sclp.has_esca) + return false; + + mutex_lock(&kvm->lock); + rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm); + mutex_unlock(&kvm->lock); + + return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS; +} + int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) { vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; @@ -1259,6 +1412,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) KVM_SYNC_CRS | KVM_SYNC_ARCH0 | KVM_SYNC_PFAULT; + if (test_kvm_facility(vcpu->kvm, 64)) + vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; if (test_kvm_facility(vcpu->kvm, 129)) vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS; @@ -1369,8 +1524,11 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch; preempt_enable(); mutex_unlock(&vcpu->kvm->lock); - if (!kvm_is_ucontrol(vcpu->kvm)) + if (!kvm_is_ucontrol(vcpu->kvm)) { vcpu->arch.gmap = vcpu->kvm->arch.gmap; + sca_add_vcpu(vcpu); + } + } static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu) @@ -1439,10 +1597,13 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->eca |= 1; if (sclp.has_sigpif) vcpu->arch.sie_block->eca |= 0x10000000U; + if (test_kvm_facility(vcpu->kvm, 64)) + vcpu->arch.sie_block->ecb3 |= 0x01; if (test_kvm_facility(vcpu->kvm, 129)) { vcpu->arch.sie_block->eca |= 0x00020000; vcpu->arch.sie_block->ecd |= 0x20000000; } + vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; if (vcpu->kvm->arch.use_cmma) { @@ -1465,7 +1626,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, struct sie_page *sie_page; int rc = -EINVAL; - if (id >= KVM_MAX_VCPUS) + if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id)) goto out; rc = -ENOMEM; @@ -1482,20 +1643,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; vcpu->arch.sie_block->icpua = id; - if (!kvm_is_ucontrol(kvm)) { - if (!kvm->arch.sca) { - WARN_ON_ONCE(1); - goto out_free_cpu; - } - if (!kvm->arch.sca->cpu[id].sda) - kvm->arch.sca->cpu[id].sda = - (__u64) vcpu->arch.sie_block; - vcpu->arch.sie_block->scaoh = - (__u32)(((__u64)kvm->arch.sca) >> 32); - vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca; - set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn); - } - spin_lock_init(&vcpu->arch.local_int.lock); vcpu->arch.local_int.float_int = &kvm->arch.float_int; vcpu->arch.local_int.wq = &vcpu->wq; @@ -1509,15 +1656,13 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, */ vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS, GFP_KERNEL); - if (!vcpu->arch.guest_fpregs.fprs) { - rc = -ENOMEM; + if (!vcpu->arch.guest_fpregs.fprs) goto out_free_sie_block; - } rc = kvm_vcpu_init(vcpu, kvm, id); if (rc) goto out_free_sie_block; - VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu, + VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu, vcpu->arch.sie_block); trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block); @@ -2013,7 +2158,8 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu) */ kvm_check_async_pf_completion(vcpu); - memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16); + vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14]; + vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15]; if (need_resched()) schedule(); @@ -2071,8 +2217,6 @@ static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) { - int rc = -1; - VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", vcpu->arch.sie_block->icptcode); trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); @@ -2080,40 +2224,36 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) if (guestdbg_enabled(vcpu)) kvm_s390_restore_guest_per_regs(vcpu); - if (exit_reason >= 0) { - rc = 0; + vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14; + vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15; + + if (vcpu->arch.sie_block->icptcode > 0) { + int rc = kvm_handle_sie_intercept(vcpu); + + if (rc != -EOPNOTSUPP) + return rc; + vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC; + vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; + vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; + vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; + return -EREMOTE; + } else if (exit_reason != -EFAULT) { + vcpu->stat.exit_null++; + return 0; } else if (kvm_is_ucontrol(vcpu->kvm)) { vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; vcpu->run->s390_ucontrol.trans_exc_code = current->thread.gmap_addr; vcpu->run->s390_ucontrol.pgm_code = 0x10; - rc = -EREMOTE; - + return -EREMOTE; } else if (current->thread.gmap_pfault) { trace_kvm_s390_major_guest_pfault(vcpu); current->thread.gmap_pfault = 0; - if (kvm_arch_setup_async_pf(vcpu)) { - rc = 0; - } else { - gpa_t gpa = current->thread.gmap_addr; - rc = kvm_arch_fault_in_page(vcpu, gpa, 1); - } - } - - if (rc == -1) - rc = vcpu_post_run_fault_in_sie(vcpu); - - memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16); - - if (rc == 0) { - if (kvm_is_ucontrol(vcpu->kvm)) - /* Don't exit for host interrupts. */ - rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0; - else - rc = kvm_handle_sie_intercept(vcpu); + if (kvm_arch_setup_async_pf(vcpu)) + return 0; + return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1); } - - return rc; + return vcpu_post_run_fault_in_sie(vcpu); } static int __vcpu_run(struct kvm_vcpu *vcpu) @@ -2233,18 +2373,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) rc = 0; } - if (rc == -EOPNOTSUPP) { - /* intercept cannot be handled in-kernel, prepare kvm-run */ - kvm_run->exit_reason = KVM_EXIT_S390_SIEIC; - kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; - kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; - kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; - rc = 0; - } - if (rc == -EREMOTE) { - /* intercept was handled, but userspace support is needed - * kvm_run has been prepared by the handler */ + /* userspace support is needed, kvm_run has been prepared */ rc = 0; } @@ -2270,37 +2400,37 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) u64 clkcomp; int rc; + px = kvm_s390_get_prefix(vcpu); if (gpa == KVM_S390_STORE_STATUS_NOADDR) { if (write_guest_abs(vcpu, 163, &archmode, 1)) return -EFAULT; - gpa = SAVE_AREA_BASE; + gpa = 0; } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) { if (write_guest_real(vcpu, 163, &archmode, 1)) return -EFAULT; - gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE); - } - rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs), + gpa = px; + } else + gpa -= __LC_FPREGS_SAVE_AREA; + rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, vcpu->arch.guest_fpregs.fprs, 128); - rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs), + rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA, vcpu->run->s.regs.gprs, 128); - rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw), + rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA, &vcpu->arch.sie_block->gpsw, 16); - px = kvm_s390_get_prefix(vcpu); - rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg), + rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA, &px, 4); - rc |= write_guest_abs(vcpu, - gpa + offsetof(struct save_area, fp_ctrl_reg), + rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA, &vcpu->arch.guest_fpregs.fpc, 4); - rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg), + rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA, &vcpu->arch.sie_block->todpr, 4); - rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer), + rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA, &vcpu->arch.sie_block->cputm, 8); clkcomp = vcpu->arch.sie_block->ckc >> 8; - rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp), + rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA, &clkcomp, 8); - rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs), + rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA, &vcpu->run->s.regs.acrs, 64); - rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs), + rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA, &vcpu->arch.sie_block->gcr, 128); return rc ? -EFAULT : 0; } @@ -2736,6 +2866,9 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, if (mem->memory_size & 0xffffful) return -EINVAL; + if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit) + return -EINVAL; + return 0; } @@ -2767,6 +2900,11 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, static int __init kvm_s390_init(void) { + if (!sclp.has_sief2) { + pr_info("SIE not available\n"); + return -ENODEV; + } + return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); } diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 1e70e00d3c5e..df1abada1f36 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -340,4 +340,11 @@ void kvm_s390_clear_bp_data(struct kvm_vcpu *vcpu); void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu); void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu); +/* support for Basic/Extended SCA handling */ +static inline union ipte_control *kvm_s390_get_ipte_control(struct kvm *kvm) +{ + struct bsca_block *sca = kvm->arch.sca; /* SCA version doesn't matter */ + + return &sca->ipte_control; +} #endif diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index d76b51cb4b62..ed74e86d9b9e 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -355,7 +355,7 @@ static int handle_stfl(struct kvm_vcpu *vcpu) * into a u32 memory representation. They will remain bits 0-31. */ fac = *vcpu->kvm->arch.model.fac->list >> 32; - rc = write_guest_lc(vcpu, offsetof(struct _lowcore, stfl_fac_list), + rc = write_guest_lc(vcpu, offsetof(struct lowcore, stfl_fac_list), &fac, sizeof(fac)); if (rc) return rc; diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h index cc1d6c68356f..396485bca191 100644 --- a/arch/s390/kvm/trace-s390.h +++ b/arch/s390/kvm/trace-s390.h @@ -55,8 +55,8 @@ TRACE_EVENT(kvm_s390_create_vcpu, __entry->sie_block = sie_block; ), - TP_printk("create cpu %d at %p, sie block at %p", __entry->id, - __entry->vcpu, __entry->sie_block) + TP_printk("create cpu %d at 0x%pK, sie block at 0x%pK", + __entry->id, __entry->vcpu, __entry->sie_block) ); TRACE_EVENT(kvm_s390_destroy_vcpu, @@ -254,7 +254,7 @@ TRACE_EVENT(kvm_s390_enable_css, __entry->kvm = kvm; ), - TP_printk("enabling channel I/O support (kvm @ %p)\n", + TP_printk("enabling channel I/O support (kvm @ %pK)\n", __entry->kvm) ); diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index 427aa44b3505..d4549c964589 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -37,12 +37,22 @@ static inline void _raw_compare_and_delay(unsigned int *lock, unsigned int old) asm(".insn rsy,0xeb0000000022,%0,0,%1" : : "d" (old), "Q" (*lock)); } +static inline int cpu_is_preempted(int cpu) +{ + if (test_cpu_flag_of(CIF_ENABLED_WAIT, cpu)) + return 0; + if (smp_vcpu_scheduled(cpu)) + return 0; + return 1; +} + void arch_spin_lock_wait(arch_spinlock_t *lp) { unsigned int cpu = SPINLOCK_LOCKVAL; unsigned int owner; - int count; + int count, first_diag; + first_diag = 1; while (1) { owner = ACCESS_ONCE(lp->lock); /* Try to get the lock if it is free. */ @@ -51,9 +61,10 @@ void arch_spin_lock_wait(arch_spinlock_t *lp) return; continue; } - /* Check if the lock owner is running. */ - if (!smp_vcpu_scheduled(~owner)) { + /* First iteration: check if the lock owner is running. */ + if (first_diag && cpu_is_preempted(~owner)) { smp_yield_cpu(~owner); + first_diag = 0; continue; } /* Loop for a while on the lock value. */ @@ -67,10 +78,13 @@ void arch_spin_lock_wait(arch_spinlock_t *lp) continue; /* * For multiple layers of hypervisors, e.g. z/VM + LPAR - * yield the CPU if the lock is still unavailable. + * yield the CPU unconditionally. For LPAR rely on the + * sense running status. */ - if (!MACHINE_IS_LPAR) + if (!MACHINE_IS_LPAR || cpu_is_preempted(~owner)) { smp_yield_cpu(~owner); + first_diag = 0; + } } } EXPORT_SYMBOL(arch_spin_lock_wait); @@ -79,9 +93,10 @@ void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags) { unsigned int cpu = SPINLOCK_LOCKVAL; unsigned int owner; - int count; + int count, first_diag; local_irq_restore(flags); + first_diag = 1; while (1) { owner = ACCESS_ONCE(lp->lock); /* Try to get the lock if it is free. */ @@ -92,8 +107,9 @@ void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags) local_irq_restore(flags); } /* Check if the lock owner is running. */ - if (!smp_vcpu_scheduled(~owner)) { + if (first_diag && cpu_is_preempted(~owner)) { smp_yield_cpu(~owner); + first_diag = 0; continue; } /* Loop for a while on the lock value. */ @@ -107,10 +123,13 @@ void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags) continue; /* * For multiple layers of hypervisors, e.g. z/VM + LPAR - * yield the CPU if the lock is still unavailable. + * yield the CPU unconditionally. For LPAR rely on the + * sense running status. */ - if (!MACHINE_IS_LPAR) + if (!MACHINE_IS_LPAR || cpu_is_preempted(~owner)) { smp_yield_cpu(~owner); + first_diag = 0; + } } } EXPORT_SYMBOL(arch_spin_lock_wait_flags); @@ -145,7 +164,7 @@ void _raw_read_lock_wait(arch_rwlock_t *rw) owner = 0; while (1) { if (count-- <= 0) { - if (owner && !smp_vcpu_scheduled(~owner)) + if (owner && cpu_is_preempted(~owner)) smp_yield_cpu(~owner); count = spin_retry; } @@ -191,7 +210,7 @@ void _raw_write_lock_wait(arch_rwlock_t *rw, unsigned int prev) owner = 0; while (1) { if (count-- <= 0) { - if (owner && !smp_vcpu_scheduled(~owner)) + if (owner && cpu_is_preempted(~owner)) smp_yield_cpu(~owner); count = spin_retry; } @@ -221,7 +240,7 @@ void _raw_write_lock_wait(arch_rwlock_t *rw) owner = 0; while (1) { if (count-- <= 0) { - if (owner && !smp_vcpu_scheduled(~owner)) + if (owner && cpu_is_preempted(~owner)) smp_yield_cpu(~owner); count = spin_retry; } @@ -265,7 +284,7 @@ void arch_lock_relax(unsigned int cpu) { if (!cpu) return; - if (MACHINE_IS_LPAR && smp_vcpu_scheduled(~cpu)) + if (MACHINE_IS_LPAR && !cpu_is_preempted(~cpu)) return; smp_yield_cpu(~cpu); } diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c index 4d1ee88864e8..18c8b819b0aa 100644 --- a/arch/s390/mm/extable.c +++ b/arch/s390/mm/extable.c @@ -52,12 +52,16 @@ void sort_extable(struct exception_table_entry *start, int i; /* Normalize entries to being relative to the start of the section */ - for (p = start, i = 0; p < finish; p++, i += 8) + for (p = start, i = 0; p < finish; p++, i += 8) { p->insn += i; + p->fixup += i + 4; + } sort(start, finish - start, sizeof(*start), cmp_ex, NULL); /* Denormalize all entries */ - for (p = start, i = 0; p < finish; p++, i += 8) + for (p = start, i = 0; p < finish; p++, i += 8) { p->insn -= i; + p->fixup -= i + 4; + } } #ifdef CONFIG_MODULES diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index 18fccc303db7..a1bf4ad8925d 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -94,7 +94,7 @@ static DEFINE_MUTEX(dcss_lock); static LIST_HEAD(dcss_list); static char *segtype_string[] = { "SW", "EW", "SR", "ER", "SN", "EN", "SC", "EW/EN-MIXED" }; -static int loadshr_scode, loadnsr_scode, findseg_scode; +static int loadshr_scode, loadnsr_scode; static int segext_scode, purgeseg_scode; static int scode_set; @@ -130,7 +130,6 @@ dcss_set_subcodes(void) loadshr_scode = DCSS_LOADSHRX; loadnsr_scode = DCSS_LOADNSRX; purgeseg_scode = DCSS_PURGESEG; - findseg_scode = DCSS_FINDSEGX; segext_scode = DCSS_SEGEXTX; return 0; } @@ -138,7 +137,6 @@ dcss_set_subcodes(void) loadshr_scode = DCSS_LOADNOLY; loadnsr_scode = DCSS_LOADNSR; purgeseg_scode = DCSS_PURGESEG; - findseg_scode = DCSS_FINDSEG; segext_scode = DCSS_SEGEXT; return 0; } diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index ec1a30d0d11a..1b903f6ad54a 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -254,7 +254,6 @@ static noinline void do_sigsegv(struct pt_regs *regs, int si_code) static noinline void do_no_context(struct pt_regs *regs) { const struct exception_table_entry *fixup; - unsigned long address; /* Are we prepared to handle this kernel fault? */ fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN); @@ -267,7 +266,6 @@ static noinline void do_no_context(struct pt_regs *regs) * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. */ - address = regs->int_parm_long & __FAIL_ADDR_MASK; if (!user_space_fault(regs)) printk(KERN_ALERT "Unable to handle kernel pointer dereference" " in virtual kernel address space\n"); diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index 12bbf0e8478f..13dab0c1645c 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -55,7 +55,7 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { unsigned long mask, result; - struct page *head, *page, *tail; + struct page *head, *page; int refs; result = write ? 0 : _SEGMENT_ENTRY_PROTECT; @@ -67,7 +67,6 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, refs = 0; head = pmd_page(pmd); page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); - tail = page; do { VM_BUG_ON(compound_head(page) != head); pages[*nr] = page; @@ -88,16 +87,6 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, return 0; } - /* - * Any tail page need their mapcount reference taken before we - * return. - */ - while (refs--) { - if (PageTail(tail)) - get_huge_page_tail(tail); - tail++; - } - return 1; } @@ -116,16 +105,7 @@ static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, pmd = *pmdp; barrier(); next = pmd_addr_end(addr, end); - /* - * The pmd_trans_splitting() check below explains why - * pmdp_splitting_flush() has to serialize with - * smp_call_function() against our disabled IRQs, to stop - * this gup-fast code from running while we set the - * splitting bit in the pmd. Returning zero will take - * the slow path that will call wait_split_huge_page() - * if the pmd is still in splitting state. - */ - if (pmd_none(pmd) || pmd_trans_splitting(pmd)) + if (pmd_none(pmd)) return 0; if (unlikely(pmd_large(pmd))) { /* @@ -233,6 +213,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct mm_struct *mm = current->mm; int nr, ret; + might_sleep(); start &= PAGE_MASK; nr = __get_user_pages_fast(start, nr_pages, write, pages); if (nr == nr_pages) diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 8a993a53fcd6..fec59c067d0d 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -163,11 +163,11 @@ static int is_swapped(unsigned long addr) unsigned long lc; int cpu; - if (addr < sizeof(struct _lowcore)) + if (addr < sizeof(struct lowcore)) return 1; for_each_online_cpu(cpu) { lc = (unsigned long) lowcore_ptr[cpu]; - if (addr > lc + sizeof(struct _lowcore) - 1 || addr < lc) + if (addr > lc + sizeof(struct lowcore) - 1 || addr < lc) continue; return 1; } diff --git a/arch/s390/mm/mem_detect.c b/arch/s390/mm/mem_detect.c index e00f0d5d296d..d612cc3eec6a 100644 --- a/arch/s390/mm/mem_detect.c +++ b/arch/s390/mm/mem_detect.c @@ -14,8 +14,6 @@ #include <asm/sclp.h> #include <asm/setup.h> -#define ADDR2G (1ULL << 31) - #define CHUNK_READ_WRITE 0 #define CHUNK_READ_ONLY 1 @@ -27,15 +25,14 @@ static inline void memblock_physmem_add(phys_addr_t start, phys_addr_t size) void __init detect_memory_memblock(void) { - unsigned long long memsize, rnmax, rzm; - unsigned long addr, size; + unsigned long memsize, rnmax, rzm, addr, size; int type; rzm = sclp.rzm; rnmax = sclp.rnmax; memsize = rzm * rnmax; if (!rzm) - rzm = 1ULL << 17; + rzm = 1UL << 17; max_physmem_end = memsize; addr = 0; /* keep memblock lists close to the kernel */ diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 54ef3bc01b43..a809fa8e6f8b 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -133,7 +133,7 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) /** * gmap_alloc - allocate a guest address space * @mm: pointer to the parent mm_struct - * @limit: maximum size of the gmap address space + * @limit: maximum address of the gmap address space * * Returns a guest address space structure. */ @@ -402,7 +402,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, if ((from | to | len) & (PMD_SIZE - 1)) return -EINVAL; if (len == 0 || from + len < from || to + len < to || - from + len > TASK_MAX_SIZE || to + len > gmap->asce_end) + from + len - 1 > TASK_MAX_SIZE || to + len - 1 > gmap->asce_end) return -EINVAL; flush = 0; @@ -578,17 +578,29 @@ int gmap_fault(struct gmap *gmap, unsigned long gaddr, { unsigned long vmaddr; int rc; + bool unlocked; down_read(&gmap->mm->mmap_sem); + +retry: + unlocked = false; vmaddr = __gmap_translate(gmap, gaddr); if (IS_ERR_VALUE(vmaddr)) { rc = vmaddr; goto out_up; } - if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags)) { + if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags, + &unlocked)) { rc = -EFAULT; goto out_up; } + /* + * In the case that fixup_user_fault unlocked the mmap_sem during + * faultin redo __gmap_translate to not race with a map/unmap_segment. + */ + if (unlocked) + goto retry; + rc = __gmap_link(gmap, gaddr, vmaddr); out_up: up_read(&gmap->mm->mmap_sem); @@ -603,10 +615,7 @@ static void gmap_zap_swap_entry(swp_entry_t entry, struct mm_struct *mm) else if (is_migration_entry(entry)) { struct page *page = migration_entry_to_page(entry); - if (PageAnon(page)) - dec_mm_counter(mm, MM_ANONPAGES); - else - dec_mm_counter(mm, MM_FILEPAGES); + dec_mm_counter(mm, mm_counter(page)); } free_swap_and_cache(entry); } @@ -717,12 +726,14 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len) spinlock_t *ptl; pte_t *ptep, entry; pgste_t pgste; + bool unlocked; int rc = 0; if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK)) return -EINVAL; down_read(&gmap->mm->mmap_sem); while (len) { + unlocked = false; /* Convert gmap address and connect the page tables */ addr = __gmap_translate(gmap, gaddr); if (IS_ERR_VALUE(addr)) { @@ -730,10 +741,14 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len) break; } /* Get the page mapped */ - if (fixup_user_fault(current, gmap->mm, addr, FAULT_FLAG_WRITE)) { + if (fixup_user_fault(current, gmap->mm, addr, FAULT_FLAG_WRITE, + &unlocked)) { rc = -EFAULT; break; } + /* While trying to map mmap_sem got unlocked. Let us retry */ + if (unlocked) + continue; rc = __gmap_link(gmap, gaddr, addr); if (rc) break; @@ -794,9 +809,11 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, spinlock_t *ptl; pgste_t old, new; pte_t *ptep; + bool unlocked; down_read(&mm->mmap_sem); retry: + unlocked = false; ptep = get_locked_pte(mm, addr, &ptl); if (unlikely(!ptep)) { up_read(&mm->mmap_sem); @@ -805,7 +822,12 @@ retry: if (!(pte_val(*ptep) & _PAGE_INVALID) && (pte_val(*ptep) & _PAGE_PROTECT)) { pte_unmap_unlock(ptep, ptl); - if (fixup_user_fault(current, mm, addr, FAULT_FLAG_WRITE)) { + /* + * We do not really care about unlocked. We will retry either + * way. But this allows fixup_user_fault to enable userfaultfd. + */ + if (fixup_user_fault(current, mm, addr, FAULT_FLAG_WRITE, + &unlocked)) { up_read(&mm->mmap_sem); return -EFAULT; } @@ -1308,22 +1330,6 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, return 1; } -static void pmdp_splitting_flush_sync(void *arg) -{ - /* Simply deliver the interrupt */ -} - -void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, - pmd_t *pmdp) -{ - VM_BUG_ON(address & ~HPAGE_PMD_MASK); - if (!test_and_set_bit(_SEGMENT_ENTRY_SPLIT_BIT, - (unsigned long *) pmdp)) { - /* need to serialize against gup-fast (IRQ disabled) */ - smp_call_function(pmdp_splitting_flush_sync, NULL, 1); - } -} - void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, pgtable_t pgtable) { diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 9a0c4c22e536..3c0bfc1f2694 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -408,7 +408,7 @@ static void emit_load_skb_data_hlen(struct bpf_jit *jit) * Save registers and create stack frame if necessary. * See stack frame layout desription in "bpf_jit.h"! */ -static void bpf_jit_prologue(struct bpf_jit *jit, bool is_classic) +static void bpf_jit_prologue(struct bpf_jit *jit) { if (jit->seen & SEEN_TAIL_CALL) { /* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */ @@ -448,15 +448,6 @@ static void bpf_jit_prologue(struct bpf_jit *jit, bool is_classic) /* stg %b1,ST_OFF_SKBP(%r0,%r15) */ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, REG_15, STK_OFF_SKBP); - /* Clear A (%b0) and X (%b7) registers for converted BPF programs */ - if (is_classic) { - if (REG_SEEN(BPF_REG_A)) - /* lghi %ba,0 */ - EMIT4_IMM(0xa7090000, BPF_REG_A, 0); - if (REG_SEEN(BPF_REG_X)) - /* lghi %bx,0 */ - EMIT4_IMM(0xa7090000, BPF_REG_X, 0); - } } /* @@ -1245,7 +1236,7 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp) jit->lit = jit->lit_start; jit->prg = 0; - bpf_jit_prologue(jit, bpf_prog_was_classic(fp)); + bpf_jit_prologue(jit); for (i = 0; i < fp->len; i += insn_count) { insn_count = bpf_jit_insn(jit, fp, i); if (insn_count < 0) diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 7ef12a3ace3a..11d4f277e9f6 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -701,8 +701,7 @@ static int zpci_restore(struct device *dev) goto out; zpci_map_resources(pdev); - zpci_register_ioat(zdev, 0, zdev->start_dma + PAGE_OFFSET, - zdev->start_dma + zdev->iommu_size - 1, + zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, (u64) zdev->dma_table); out: diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index d348f2c09a1e..4638b93c7632 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -366,8 +366,7 @@ static void *s390_dma_alloc(struct device *dev, size_t size, pa = page_to_phys(page); memset((void *) pa, 0, size); - map = s390_dma_map_pages(dev, page, pa % PAGE_SIZE, - size, DMA_BIDIRECTIONAL, NULL); + map = s390_dma_map_pages(dev, page, 0, size, DMA_BIDIRECTIONAL, NULL); if (dma_mapping_error(dev, map)) { free_pages(pa, get_order(size)); return NULL; @@ -458,7 +457,19 @@ int zpci_dma_init_device(struct zpci_dev *zdev) goto out_clean; } - zdev->iommu_size = (unsigned long) high_memory - PAGE_OFFSET; + /* + * Restrict the iommu bitmap size to the minimum of the following: + * - main memory size + * - 3-level pagetable address limit minus start_dma offset + * - DMA address range allowed by the hardware (clp query pci fn) + * + * Also set zdev->end_dma to the actual end address of the usable + * range, instead of the theoretical maximum as reported by hardware. + */ + zdev->iommu_size = min3((u64) high_memory, + ZPCI_TABLE_SIZE_RT - zdev->start_dma, + zdev->end_dma - zdev->start_dma + 1); + zdev->end_dma = zdev->start_dma + zdev->iommu_size - 1; zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT; zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8); if (!zdev->iommu_bitmap) { @@ -466,10 +477,7 @@ int zpci_dma_init_device(struct zpci_dev *zdev) goto out_reg; } - rc = zpci_register_ioat(zdev, - 0, - zdev->start_dma + PAGE_OFFSET, - zdev->start_dma + zdev->iommu_size - 1, + rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, (u64) zdev->dma_table); if (rc) goto out_reg; diff --git a/arch/s390/tools/.gitignore b/arch/s390/tools/.gitignore new file mode 100644 index 000000000000..72a4b2cf1365 --- /dev/null +++ b/arch/s390/tools/.gitignore @@ -0,0 +1 @@ +gen_facilities diff --git a/arch/s390/tools/Makefile b/arch/s390/tools/Makefile new file mode 100644 index 000000000000..6d9814c9df2b --- /dev/null +++ b/arch/s390/tools/Makefile @@ -0,0 +1,15 @@ +# +# Makefile for s390 specific build tools +# + +hostprogs-y += gen_facilities +HOSTCFLAGS_gen_facilities.o += -Wall $(LINUXINCLUDE) + +define filechk_facilities.h + $(obj)/gen_facilities +endef + +$(obj)/gen_facilities.o: $(srctree)/arch/s390/tools/gen_facilities.c + +include/generated/facilities.h: $(obj)/gen_facilities FORCE + $(call filechk,facilities.h) diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c new file mode 100644 index 000000000000..e2660d27889b --- /dev/null +++ b/arch/s390/tools/gen_facilities.c @@ -0,0 +1,67 @@ +/* + * Simple program to generate defines out of facility lists that use the bit + * numbering scheme from the Princples of Operations: most significant bit + * has bit number 0. + * + * Copyright IBM Corp. 2015 + * + */ + +#define S390_GEN_FACILITIES_C + +#include <strings.h> +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include <asm/facilities_src.h> + +static void print_facility_list(struct facility_def *def) +{ + unsigned int high, bit, dword, i; + unsigned long long *array; + + array = calloc(1, 8); + if (!array) + exit(EXIT_FAILURE); + high = 0; + for (i = 0; def->bits[i] != -1; i++) { + bit = 63 - (def->bits[i] & 63); + dword = def->bits[i] / 64; + if (dword > high) { + array = realloc(array, (dword + 1) * 8); + if (!array) + exit(EXIT_FAILURE); + memset(array + high + 1, 0, (dword - high) * 8); + high = dword; + } + array[dword] |= 1ULL << bit; + } + printf("#define %s ", def->name); + for (i = 0; i <= high; i++) + printf("_AC(0x%016llx,UL)%c", array[i], i < high ? ',' : '\n'); + printf("#define %s_DWORDS %d\n", def->name, high + 1); + free(array); +} + +static void print_facility_lists(void) +{ + unsigned int i; + + for (i = 0; i < sizeof(facility_defs) / sizeof(facility_defs[0]); i++) + print_facility_list(&facility_defs[i]); +} + +int main(int argc, char **argv) +{ + printf("#ifndef __ASM_S390_FACILITIES__\n"); + printf("#define __ASM_S390_FACILITIES__\n"); + printf("/*\n"); + printf(" * DO NOT MODIFY.\n"); + printf(" *\n"); + printf(" * This file was generated by %s\n", __FILE__); + printf(" */\n\n"); + printf("#include <linux/const.h>\n\n"); + print_facility_lists(); + printf("\n#endif\n"); + return 0; +} diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index d514df7e04dd..6c391a5d3e5c 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -130,9 +130,6 @@ config STACKTRACE_SUPPORT config LOCKDEP_SUPPORT def_bool y -config HAVE_LATENCYTOP_SUPPORT - def_bool y - config ARCH_HAS_ILOG2_U32 def_bool n diff --git a/arch/sh/boards/mach-ap325rxa/setup.c b/arch/sh/boards/mach-ap325rxa/setup.c index cbd2a9f02a91..62c3b81300ed 100644 --- a/arch/sh/boards/mach-ap325rxa/setup.c +++ b/arch/sh/boards/mach-ap325rxa/setup.c @@ -27,10 +27,10 @@ #include <linux/gpio.h> #include <linux/videodev2.h> #include <linux/sh_intc.h> -#include <media/ov772x.h> +#include <media/i2c/ov772x.h> #include <media/soc_camera.h> -#include <media/soc_camera_platform.h> -#include <media/sh_mobile_ceu.h> +#include <linux/platform_data/media/soc_camera_platform.h> +#include <media/drv-intf/sh_mobile_ceu.h> #include <video/sh_mobile_lcdc.h> #include <asm/io.h> #include <asm/clock.h> diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c index d531791f06ff..a9c0c07386fd 100644 --- a/arch/sh/boards/mach-ecovec24/setup.c +++ b/arch/sh/boards/mach-ecovec24/setup.c @@ -38,10 +38,10 @@ #include <video/sh_mobile_lcdc.h> #include <sound/sh_fsi.h> #include <sound/simple_card.h> -#include <media/sh_mobile_ceu.h> +#include <media/drv-intf/sh_mobile_ceu.h> #include <media/soc_camera.h> -#include <media/tw9910.h> -#include <media/mt9t112.h> +#include <media/i2c/tw9910.h> +#include <media/i2c/mt9t112.h> #include <asm/heartbeat.h> #include <asm/clock.h> #include <asm/suspend.h> @@ -900,8 +900,8 @@ static struct platform_device irda_device = { .resource = irda_resources, }; -#include <media/ak881x.h> -#include <media/sh_vou.h> +#include <media/i2c/ak881x.h> +#include <media/drv-intf/sh_vou.h> static struct ak881x_pdata ak881x_pdata = { .flags = AK881X_IF_MODE_SLAVE, diff --git a/arch/sh/boards/mach-kfr2r09/setup.c b/arch/sh/boards/mach-kfr2r09/setup.c index 7d997cec09c5..6bd9230e64e3 100644 --- a/arch/sh/boards/mach-kfr2r09/setup.c +++ b/arch/sh/boards/mach-kfr2r09/setup.c @@ -27,9 +27,9 @@ #include <linux/usb/r8a66597.h> #include <linux/videodev2.h> #include <linux/sh_intc.h> -#include <media/rj54n1cb0c.h> +#include <media/i2c/rj54n1cb0c.h> #include <media/soc_camera.h> -#include <media/sh_mobile_ceu.h> +#include <media/drv-intf/sh_mobile_ceu.h> #include <video/sh_mobile_lcdc.h> #include <asm/suspend.h> #include <asm/clock.h> diff --git a/arch/sh/boards/mach-migor/setup.c b/arch/sh/boards/mach-migor/setup.c index 29b7c0dcfc51..7a04da3efce4 100644 --- a/arch/sh/boards/mach-migor/setup.c +++ b/arch/sh/boards/mach-migor/setup.c @@ -27,10 +27,10 @@ #include <linux/videodev2.h> #include <linux/sh_intc.h> #include <video/sh_mobile_lcdc.h> -#include <media/sh_mobile_ceu.h> -#include <media/ov772x.h> +#include <media/drv-intf/sh_mobile_ceu.h> +#include <media/i2c/ov772x.h> #include <media/soc_camera.h> -#include <media/tw9910.h> +#include <media/i2c/tw9910.h> #include <asm/clock.h> #include <asm/machvec.h> #include <asm/io.h> @@ -167,7 +167,7 @@ static struct mtd_partition migor_nand_flash_partitions[] = { static void migor_nand_flash_cmd_ctl(struct mtd_info *mtd, int cmd, unsigned int ctrl) { - struct nand_chip *chip = mtd->priv; + struct nand_chip *chip = mtd_to_nand(mtd); if (cmd == NAND_CMD_NONE) return; diff --git a/arch/sh/boards/mach-se/7724/setup.c b/arch/sh/boards/mach-se/7724/setup.c index 4f6635a075f2..e0e1df136642 100644 --- a/arch/sh/boards/mach-se/7724/setup.c +++ b/arch/sh/boards/mach-se/7724/setup.c @@ -30,7 +30,7 @@ #include <linux/sh_intc.h> #include <linux/videodev2.h> #include <video/sh_mobile_lcdc.h> -#include <media/sh_mobile_ceu.h> +#include <media/drv-intf/sh_mobile_ceu.h> #include <sound/sh_fsi.h> #include <sound/simple_card.h> #include <asm/io.h> @@ -534,8 +534,8 @@ static struct platform_device irda_device = { .resource = irda_resources, }; -#include <media/ak881x.h> -#include <media/sh_vou.h> +#include <media/i2c/ak881x.h> +#include <media/drv-intf/sh_vou.h> static struct ak881x_pdata ak881x_pdata = { .flags = AK881X_IF_MODE_SLAVE, diff --git a/arch/sh/kernel/cpu/clock-cpg.c b/arch/sh/kernel/cpu/clock-cpg.c index 8525a671266f..786c0769b4c3 100644 --- a/arch/sh/kernel/cpu/clock-cpg.c +++ b/arch/sh/kernel/cpu/clock-cpg.c @@ -63,7 +63,6 @@ int __init __deprecated cpg_clk_init(void) clk_add_alias("fck", "sh-mtu2", "peripheral_clk", NULL); clk_add_alias("fck", "sh-cmt-16.0", "peripheral_clk", NULL); clk_add_alias("fck", "sh-cmt-32.0", "peripheral_clk", NULL); - clk_add_alias("sci_ick", NULL, "peripheral_clk", NULL); return ret; } diff --git a/arch/sh/kernel/cpu/sh2a/clock-sh7264.c b/arch/sh/kernel/cpu/sh2a/clock-sh7264.c index 8638fba6cd7f..7e06e39b0958 100644 --- a/arch/sh/kernel/cpu/sh2a/clock-sh7264.c +++ b/arch/sh/kernel/cpu/sh2a/clock-sh7264.c @@ -115,7 +115,14 @@ static struct clk_lookup lookups[] = { CLKDEV_CON_ID("peripheral_clk", &div4_clks[DIV4_P]), /* MSTP clocks */ - CLKDEV_CON_ID("sci_ick", &mstp_clks[MSTP77]), + CLKDEV_ICK_ID("fck", "sh-sci.0", &mstp_clks[MSTP77]), + CLKDEV_ICK_ID("fck", "sh-sci.1", &mstp_clks[MSTP77]), + CLKDEV_ICK_ID("fck", "sh-sci.2", &mstp_clks[MSTP77]), + CLKDEV_ICK_ID("fck", "sh-sci.3", &mstp_clks[MSTP77]), + CLKDEV_ICK_ID("fck", "sh-sci.4", &mstp_clks[MSTP77]), + CLKDEV_ICK_ID("fck", "sh-sci.5", &mstp_clks[MSTP77]), + CLKDEV_ICK_ID("fck", "sh-sci.6", &mstp_clks[MSTP77]), + CLKDEV_ICK_ID("fck", "sh-sci.7", &mstp_clks[MSTP77]), CLKDEV_CON_ID("vdc3", &mstp_clks[MSTP74]), CLKDEV_ICK_ID("fck", "sh-cmt-16.0", &mstp_clks[MSTP72]), CLKDEV_CON_ID("usb0", &mstp_clks[MSTP60]), diff --git a/arch/sh/kernel/cpu/sh2a/clock-sh7269.c b/arch/sh/kernel/cpu/sh2a/clock-sh7269.c index f8a5c2abdfb3..663a97bed554 100644 --- a/arch/sh/kernel/cpu/sh2a/clock-sh7269.c +++ b/arch/sh/kernel/cpu/sh2a/clock-sh7269.c @@ -150,14 +150,14 @@ static struct clk_lookup lookups[] = { CLKDEV_CON_ID("bus_clk", &div4_clks[DIV4_B]), /* MSTP clocks */ - CLKDEV_ICK_ID("sci_fck", "sh-sci.0", &mstp_clks[MSTP47]), - CLKDEV_ICK_ID("sci_fck", "sh-sci.1", &mstp_clks[MSTP46]), - CLKDEV_ICK_ID("sci_fck", "sh-sci.2", &mstp_clks[MSTP45]), - CLKDEV_ICK_ID("sci_fck", "sh-sci.3", &mstp_clks[MSTP44]), - CLKDEV_ICK_ID("sci_fck", "sh-sci.4", &mstp_clks[MSTP43]), - CLKDEV_ICK_ID("sci_fck", "sh-sci.5", &mstp_clks[MSTP42]), - CLKDEV_ICK_ID("sci_fck", "sh-sci.6", &mstp_clks[MSTP41]), - CLKDEV_ICK_ID("sci_fck", "sh-sci.7", &mstp_clks[MSTP40]), + CLKDEV_ICK_ID("fck", "sh-sci.0", &mstp_clks[MSTP47]), + CLKDEV_ICK_ID("fck", "sh-sci.1", &mstp_clks[MSTP46]), + CLKDEV_ICK_ID("fck", "sh-sci.2", &mstp_clks[MSTP45]), + CLKDEV_ICK_ID("fck", "sh-sci.3", &mstp_clks[MSTP44]), + CLKDEV_ICK_ID("fck", "sh-sci.4", &mstp_clks[MSTP43]), + CLKDEV_ICK_ID("fck", "sh-sci.5", &mstp_clks[MSTP42]), + CLKDEV_ICK_ID("fck", "sh-sci.6", &mstp_clks[MSTP41]), + CLKDEV_ICK_ID("fck", "sh-sci.7", &mstp_clks[MSTP40]), CLKDEV_ICK_ID("fck", "sh-cmt-16.0", &mstp_clks[MSTP72]), CLKDEV_CON_ID("usb0", &mstp_clks[MSTP60]), CLKDEV_ICK_ID("fck", "sh-mtu2", &mstp_clks[MSTP35]), diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7343.c b/arch/sh/kernel/cpu/sh4a/clock-sh7343.c index 9edc06c02dcf..a907ee2388bf 100644 --- a/arch/sh/kernel/cpu/sh4a/clock-sh7343.c +++ b/arch/sh/kernel/cpu/sh4a/clock-sh7343.c @@ -232,10 +232,10 @@ static struct clk_lookup lookups[] = { CLKDEV_CON_ID("mfi0", &mstp_clks[MSTP011]), CLKDEV_CON_ID("flctl0", &mstp_clks[MSTP010]), - CLKDEV_ICK_ID("sci_fck", "sh-sci.0", &mstp_clks[MSTP007]), - CLKDEV_ICK_ID("sci_fck", "sh-sci.1", &mstp_clks[MSTP006]), - CLKDEV_ICK_ID("sci_fck", "sh-sci.2", &mstp_clks[MSTP005]), - CLKDEV_ICK_ID("sci_fck", "sh-sci.3", &mstp_clks[MSTP004]), + CLKDEV_ICK_ID("fck", "sh-sci.0", &mstp_clks[MSTP007]), + CLKDEV_ICK_ID("fck", "sh-sci.1", &mstp_clks[MSTP006]), + CLKDEV_ICK_ID("fck", "sh-sci.2", &mstp_clks[MSTP005]), + CLKDEV_ICK_ID("fck", "sh-sci.3", &mstp_clks[MSTP004]), CLKDEV_CON_ID("sio0", &mstp_clks[MSTP003]), CLKDEV_CON_ID("siof0", &mstp_clks[MSTP002]), diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7366.c b/arch/sh/kernel/cpu/sh4a/clock-sh7366.c index 955b9add7810..ac9854179dee 100644 --- a/arch/sh/kernel/cpu/sh4a/clock-sh7366.c +++ b/arch/sh/kernel/cpu/sh4a/clock-sh7366.c @@ -230,9 +230,9 @@ static struct clk_lookup lookups[] = { CLKDEV_CON_ID("mfi0", &mstp_clks[MSTP011]), CLKDEV_CON_ID("flctl0", &mstp_clks[MSTP010]), - CLKDEV_ICK_ID("sci_fck", "sh-sci.0", &mstp_clks[MSTP007]), - CLKDEV_ICK_ID("sci_fck", "sh-sci.1", &mstp_clks[MSTP006]), - CLKDEV_ICK_ID("sci_fck", "sh-sci.2", &mstp_clks[MSTP005]), + CLKDEV_ICK_ID("fck", "sh-sci.0", &mstp_clks[MSTP007]), + CLKDEV_ICK_ID("fck", "sh-sci.1", &mstp_clks[MSTP006]), + CLKDEV_ICK_ID("fck", "sh-sci.2", &mstp_clks[MSTP005]), CLKDEV_CON_ID("msiof0", &mstp_clks[MSTP002]), CLKDEV_CON_ID("sbr0", &mstp_clks[MSTP001]), diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7723.c b/arch/sh/kernel/cpu/sh4a/clock-sh7723.c index ccbcab550df2..fe844222f3f6 100644 --- a/arch/sh/kernel/cpu/sh4a/clock-sh7723.c +++ b/arch/sh/kernel/cpu/sh4a/clock-sh7723.c @@ -267,12 +267,12 @@ static struct clk_lookup lookups[] = { CLKDEV_ICK_ID("fck", "sh-tmu.0", &mstp_clks[HWBLK_TMU0]), CLKDEV_ICK_ID("fck", "sh-tmu.1", &mstp_clks[HWBLK_TMU1]), - CLKDEV_ICK_ID("sci_fck", "sh-sci.0", &mstp_clks[HWBLK_SCIF0]), - CLKDEV_ICK_ID("sci_fck", "sh-sci.1", &mstp_clks[HWBLK_SCIF1]), - CLKDEV_ICK_ID("sci_fck", "sh-sci.2", &mstp_clks[HWBLK_SCIF2]), - CLKDEV_ICK_ID("sci_fck", "sh-sci.3", &mstp_clks[HWBLK_SCIF3]), - CLKDEV_ICK_ID("sci_fck", "sh-sci.4", &mstp_clks[HWBLK_SCIF4]), - CLKDEV_ICK_ID("sci_fck", "sh-sci.5", &mstp_clks[HWBLK_SCIF5]), + CLKDEV_ICK_ID("fck", "sh-sci.0", &mstp_clks[HWBLK_SCIF0]), + CLKDEV_ICK_ID("fck", "sh-sci.1", &mstp_clks[HWBLK_SCIF1]), + CLKDEV_ICK_ID("fck", "sh-sci.2", &mstp_clks[HWBLK_SCIF2]), + CLKDEV_ICK_ID("fck", "sh-sci.3", &mstp_clks[HWBLK_SCIF3]), + CLKDEV_ICK_ID("fck", "sh-sci.4", &mstp_clks[HWBLK_SCIF4]), + CLKDEV_ICK_ID("fck", "sh-sci.5", &mstp_clks[HWBLK_SCIF5]), CLKDEV_DEV_ID("sh_mobile_lcdc_fb.0", &mstp_clks[HWBLK_LCDC]), }; diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7734.c b/arch/sh/kernel/cpu/sh4a/clock-sh7734.c index 7f54bf2f453d..354dcac5e4cd 100644 --- a/arch/sh/kernel/cpu/sh4a/clock-sh7734.c +++ b/arch/sh/kernel/cpu/sh4a/clock-sh7734.c @@ -194,12 +194,12 @@ static struct clk_lookup lookups[] = { /* MSTP32 clocks */ CLKDEV_DEV_ID("i2c-sh7734.0", &mstp_clks[MSTP030]), CLKDEV_DEV_ID("i2c-sh7734.1", &mstp_clks[MSTP029]), - CLKDEV_ICK_ID("sci_fck", "sh-sci.0", &mstp_clks[MSTP026]), - CLKDEV_ICK_ID("sci_fck", "sh-sci.1", &mstp_clks[MSTP025]), - CLKDEV_ICK_ID("sci_fck", "sh-sci.2", &mstp_clks[MSTP024]), - CLKDEV_ICK_ID("sci_fck", "sh-sci.3", &mstp_clks[MSTP023]), - CLKDEV_ICK_ID("sci_fck", "sh-sci.4", &mstp_clks[MSTP022]), - CLKDEV_ICK_ID("sci_fck", "sh-sci.5", &mstp_clks[MSTP021]), + CLKDEV_ICK_ID("fck", "sh-sci.0", &mstp_clks[MSTP026]), + CLKDEV_ICK_ID("fck", "sh-sci.1", &mstp_clks[MSTP025]), + CLKDEV_ICK_ID("fck", "sh-sci.2", &mstp_clks[MSTP024]), + CLKDEV_ICK_ID("fck", "sh-sci.3", &mstp_clks[MSTP023]), + CLKDEV_ICK_ID("fck", "sh-sci.4", &mstp_clks[MSTP022]), + CLKDEV_ICK_ID("fck", "sh-sci.5", &mstp_clks[MSTP021]), CLKDEV_CON_ID("hscif", &mstp_clks[MSTP019]), CLKDEV_ICK_ID("fck", "sh-tmu.0", &mstp_clks[MSTP016]), CLKDEV_ICK_ID("fck", "sh-tmu.1", &mstp_clks[MSTP015]), diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7757.c b/arch/sh/kernel/cpu/sh4a/clock-sh7757.c index e40ec2c97ad1..b10af2ae9f35 100644 --- a/arch/sh/kernel/cpu/sh4a/clock-sh7757.c +++ b/arch/sh/kernel/cpu/sh4a/clock-sh7757.c @@ -125,9 +125,9 @@ static struct clk_lookup lookups[] = { CLKDEV_ICK_ID("fck", "sh-tmu.0", &mstp_clks[MSTP113]), CLKDEV_ICK_ID("fck", "sh-tmu.1", &mstp_clks[MSTP114]), - CLKDEV_ICK_ID("sci_fck", "sh-sci.2", &mstp_clks[MSTP112]), - CLKDEV_ICK_ID("sci_fck", "sh-sci.1", &mstp_clks[MSTP111]), - CLKDEV_ICK_ID("sci_fck", "sh-sci.0", &mstp_clks[MSTP110]), + CLKDEV_ICK_ID("fck", "sh-sci.2", &mstp_clks[MSTP112]), + CLKDEV_ICK_ID("fck", "sh-sci.1", &mstp_clks[MSTP111]), + CLKDEV_ICK_ID("fck", "sh-sci.0", &mstp_clks[MSTP110]), CLKDEV_CON_ID("usb_fck", &mstp_clks[MSTP103]), CLKDEV_DEV_ID("renesas_usbhs.0", &mstp_clks[MSTP102]), diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7785.c b/arch/sh/kernel/cpu/sh4a/clock-sh7785.c index 8eb6e62340c9..1aafd5496752 100644 --- a/arch/sh/kernel/cpu/sh4a/clock-sh7785.c +++ b/arch/sh/kernel/cpu/sh4a/clock-sh7785.c @@ -132,12 +132,12 @@ static struct clk_lookup lookups[] = { CLKDEV_CON_ID("cpu_clk", &div4_clks[DIV4_I]), /* MSTP32 clocks */ - CLKDEV_ICK_ID("sci_fck", "sh-sci.5", &mstp_clks[MSTP029]), - CLKDEV_ICK_ID("sci_fck", "sh-sci.4", &mstp_clks[MSTP028]), - CLKDEV_ICK_ID("sci_fck", "sh-sci.3", &mstp_clks[MSTP027]), - CLKDEV_ICK_ID("sci_fck", "sh-sci.2", &mstp_clks[MSTP026]), - CLKDEV_ICK_ID("sci_fck", "sh-sci.1", &mstp_clks[MSTP025]), - CLKDEV_ICK_ID("sci_fck", "sh-sci.0", &mstp_clks[MSTP024]), + CLKDEV_ICK_ID("fck", "sh-sci.5", &mstp_clks[MSTP029]), + CLKDEV_ICK_ID("fck", "sh-sci.4", &mstp_clks[MSTP028]), + CLKDEV_ICK_ID("fck", "sh-sci.3", &mstp_clks[MSTP027]), + CLKDEV_ICK_ID("fck", "sh-sci.2", &mstp_clks[MSTP026]), + CLKDEV_ICK_ID("fck", "sh-sci.1", &mstp_clks[MSTP025]), + CLKDEV_ICK_ID("fck", "sh-sci.0", &mstp_clks[MSTP024]), CLKDEV_CON_ID("ssi1_fck", &mstp_clks[MSTP021]), CLKDEV_CON_ID("ssi0_fck", &mstp_clks[MSTP020]), diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7786.c b/arch/sh/kernel/cpu/sh4a/clock-sh7786.c index 5e50e7ebeff0..ac3dcfe5d303 100644 --- a/arch/sh/kernel/cpu/sh4a/clock-sh7786.c +++ b/arch/sh/kernel/cpu/sh4a/clock-sh7786.c @@ -139,12 +139,12 @@ static struct clk_lookup lookups[] = { CLKDEV_CON_ID("cpu_clk", &div4_clks[DIV4_I]), /* MSTP32 clocks */ - CLKDEV_ICK_ID("sci_fck", "sh-sci.5", &mstp_clks[MSTP029]), - CLKDEV_ICK_ID("sci_fck", "sh-sci.4", &mstp_clks[MSTP028]), - CLKDEV_ICK_ID("sci_fck", "sh-sci.3", &mstp_clks[MSTP027]), - CLKDEV_ICK_ID("sci_fck", "sh-sci.2", &mstp_clks[MSTP026]), - CLKDEV_ICK_ID("sci_fck", "sh-sci.1", &mstp_clks[MSTP025]), - CLKDEV_ICK_ID("sci_fck", "sh-sci.0", &mstp_clks[MSTP024]), + CLKDEV_ICK_ID("fck", "sh-sci.5", &mstp_clks[MSTP029]), + CLKDEV_ICK_ID("fck", "sh-sci.4", &mstp_clks[MSTP028]), + CLKDEV_ICK_ID("fck", "sh-sci.3", &mstp_clks[MSTP027]), + CLKDEV_ICK_ID("fck", "sh-sci.2", &mstp_clks[MSTP026]), + CLKDEV_ICK_ID("fck", "sh-sci.1", &mstp_clks[MSTP025]), + CLKDEV_ICK_ID("fck", "sh-sci.0", &mstp_clks[MSTP024]), CLKDEV_CON_ID("ssi3_fck", &mstp_clks[MSTP023]), CLKDEV_CON_ID("ssi2_fck", &mstp_clks[MSTP022]), diff --git a/arch/sh/kernel/cpu/sh4a/clock-shx3.c b/arch/sh/kernel/cpu/sh4a/clock-shx3.c index 605221d1448a..b1bdbc3cbc21 100644 --- a/arch/sh/kernel/cpu/sh4a/clock-shx3.c +++ b/arch/sh/kernel/cpu/sh4a/clock-shx3.c @@ -114,10 +114,10 @@ static struct clk_lookup lookups[] = { CLKDEV_CON_ID("cpu_clk", &div4_clks[DIV4_I]), /* MSTP32 clocks */ - CLKDEV_ICK_ID("sci_fck", "sh-sci.3", &mstp_clks[MSTP027]), - CLKDEV_ICK_ID("sci_fck", "sh-sci.2", &mstp_clks[MSTP026]), - CLKDEV_ICK_ID("sci_fck", "sh-sci.1", &mstp_clks[MSTP025]), - CLKDEV_ICK_ID("sci_fck", "sh-sci.0", &mstp_clks[MSTP024]), + CLKDEV_ICK_ID("fck", "sh-sci.3", &mstp_clks[MSTP027]), + CLKDEV_ICK_ID("fck", "sh-sci.2", &mstp_clks[MSTP026]), + CLKDEV_ICK_ID("fck", "sh-sci.1", &mstp_clks[MSTP025]), + CLKDEV_ICK_ID("fck", "sh-sci.0", &mstp_clks[MSTP024]), CLKDEV_CON_ID("h8ex_fck", &mstp_clks[MSTP003]), CLKDEV_CON_ID("csm_fck", &mstp_clks[MSTP002]), diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7734.c b/arch/sh/kernel/cpu/sh4a/setup-sh7734.c index f617bcb734df..69b8a50310d9 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-sh7734.c +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7734.c @@ -28,7 +28,7 @@ static struct plat_sci_port scif0_platform_data = { .flags = UPF_BOOT_AUTOCONF, .scscr = SCSCR_RE | SCSCR_TE | SCSCR_REIE, .type = PORT_SCIF, - .regtype = SCIx_SH4_SCIF_REGTYPE, + .regtype = SCIx_SH4_SCIF_BRG_REGTYPE, }; static struct resource scif0_resources[] = { @@ -50,7 +50,7 @@ static struct plat_sci_port scif1_platform_data = { .flags = UPF_BOOT_AUTOCONF, .scscr = SCSCR_RE | SCSCR_TE | SCSCR_REIE, .type = PORT_SCIF, - .regtype = SCIx_SH4_SCIF_REGTYPE, + .regtype = SCIx_SH4_SCIF_BRG_REGTYPE, }; static struct resource scif1_resources[] = { @@ -72,7 +72,7 @@ static struct plat_sci_port scif2_platform_data = { .flags = UPF_BOOT_AUTOCONF, .scscr = SCSCR_RE | SCSCR_TE | SCSCR_REIE, .type = PORT_SCIF, - .regtype = SCIx_SH4_SCIF_REGTYPE, + .regtype = SCIx_SH4_SCIF_BRG_REGTYPE, }; static struct resource scif2_resources[] = { @@ -94,7 +94,7 @@ static struct plat_sci_port scif3_platform_data = { .flags = UPF_BOOT_AUTOCONF, .scscr = SCSCR_RE | SCSCR_TE | SCSCR_REIE | SCSCR_TOIE, .type = PORT_SCIF, - .regtype = SCIx_SH4_SCIF_REGTYPE, + .regtype = SCIx_SH4_SCIF_BRG_REGTYPE, }; static struct resource scif3_resources[] = { @@ -116,7 +116,7 @@ static struct plat_sci_port scif4_platform_data = { .flags = UPF_BOOT_AUTOCONF, .scscr = SCSCR_RE | SCSCR_TE | SCSCR_REIE, .type = PORT_SCIF, - .regtype = SCIx_SH4_SCIF_REGTYPE, + .regtype = SCIx_SH4_SCIF_BRG_REGTYPE, }; static struct resource scif4_resources[] = { @@ -138,7 +138,7 @@ static struct plat_sci_port scif5_platform_data = { .flags = UPF_BOOT_AUTOCONF, .scscr = SCSCR_RE | SCSCR_TE | SCSCR_REIE, .type = PORT_SCIF, - .regtype = SCIx_SH4_SCIF_REGTYPE, + .regtype = SCIx_SH4_SCIF_BRG_REGTYPE, }; static struct resource scif5_resources[] = { diff --git a/arch/sh/kernel/ftrace.c b/arch/sh/kernel/ftrace.c index 079d70e6d74b..38993e09ef03 100644 --- a/arch/sh/kernel/ftrace.c +++ b/arch/sh/kernel/ftrace.c @@ -212,13 +212,11 @@ static int ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned char replaced[MCOUNT_INSN_SIZE]; /* - * Note: Due to modules and __init, code can - * disappear and change, we need to protect against faulting - * as well as code changing. We do this by using the - * probe_kernel_* functions. - * - * No real locking needed, this code is run through - * kstop_machine, or before SMP starts. + * Note: + * We are paranoid about modifying text, as if a bug was to happen, it + * could cause us to read or write to someplace that could cause harm. + * Carefully read and modify the code with probe_kernel_*(), and make + * sure what we read is what we expected it to be before modifying it. */ /* read the text we want to modify */ diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c index 51d8f7f31d1d..58aaa4f33b81 100644 --- a/arch/sh/mm/cache-sh4.c +++ b/arch/sh/mm/cache-sh4.c @@ -241,7 +241,7 @@ static void sh4_flush_cache_page(void *args) */ map_coherent = (current_cpu_data.dcache.n_aliases && test_bit(PG_dcache_clean, &page->flags) && - page_mapped(page)); + page_mapcount(page)); if (map_coherent) vaddr = kmap_coherent(page, address); else diff --git a/arch/sh/mm/cache.c b/arch/sh/mm/cache.c index f770e3992620..e58cfbf45150 100644 --- a/arch/sh/mm/cache.c +++ b/arch/sh/mm/cache.c @@ -59,7 +59,7 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page, unsigned long vaddr, void *dst, const void *src, unsigned long len) { - if (boot_cpu_data.dcache.n_aliases && page_mapped(page) && + if (boot_cpu_data.dcache.n_aliases && page_mapcount(page) && test_bit(PG_dcache_clean, &page->flags)) { void *vto = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK); memcpy(vto, src, len); @@ -78,7 +78,7 @@ void copy_from_user_page(struct vm_area_struct *vma, struct page *page, unsigned long vaddr, void *dst, const void *src, unsigned long len) { - if (boot_cpu_data.dcache.n_aliases && page_mapped(page) && + if (boot_cpu_data.dcache.n_aliases && page_mapcount(page) && test_bit(PG_dcache_clean, &page->flags)) { void *vfrom = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK); memcpy(dst, vfrom, len); @@ -97,7 +97,7 @@ void copy_user_highpage(struct page *to, struct page *from, vto = kmap_atomic(to); - if (boot_cpu_data.dcache.n_aliases && page_mapped(from) && + if (boot_cpu_data.dcache.n_aliases && page_mapcount(from) && test_bit(PG_dcache_clean, &from->flags)) { vfrom = kmap_coherent(from, vaddr); copy_page(vto, vfrom); @@ -153,7 +153,7 @@ void __flush_anon_page(struct page *page, unsigned long vmaddr) unsigned long addr = (unsigned long) page_address(page); if (pages_do_alias(addr, vmaddr)) { - if (boot_cpu_data.dcache.n_aliases && page_mapped(page) && + if (boot_cpu_data.dcache.n_aliases && page_mapcount(page) && test_bit(PG_dcache_clean, &page->flags)) { void *kaddr; diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 56442d2d7bbc..3203e42190dd 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -101,10 +101,6 @@ config LOCKDEP_SUPPORT bool default y if SPARC64 -config HAVE_LATENCYTOP_SUPPORT - bool - default y if SPARC64 - config ARCH_HIBERNATION_POSSIBLE def_bool y if SPARC64 diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 131d36fcd07a..7a38d6a576c5 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -681,13 +681,6 @@ static inline unsigned long pmd_trans_huge(pmd_t pmd) return pte_val(pte) & _PAGE_PMD_HUGE; } -static inline unsigned long pmd_trans_splitting(pmd_t pmd) -{ - pte_t pte = __pte(pmd_val(pmd)); - - return pmd_trans_huge(pmd) && pte_special(pte); -} - #define has_transparent_hugepage() 1 static inline pmd_t pmd_mkold(pmd_t pmd) @@ -717,29 +710,29 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd) return __pmd(pte_val(pte)); } -static inline pmd_t pmd_mkyoung(pmd_t pmd) +static inline pmd_t pmd_mkclean(pmd_t pmd) { pte_t pte = __pte(pmd_val(pmd)); - pte = pte_mkyoung(pte); + pte = pte_mkclean(pte); return __pmd(pte_val(pte)); } -static inline pmd_t pmd_mkwrite(pmd_t pmd) +static inline pmd_t pmd_mkyoung(pmd_t pmd) { pte_t pte = __pte(pmd_val(pmd)); - pte = pte_mkwrite(pte); + pte = pte_mkyoung(pte); return __pmd(pte_val(pte)); } -static inline pmd_t pmd_mksplitting(pmd_t pmd) +static inline pmd_t pmd_mkwrite(pmd_t pmd) { pte_t pte = __pte(pmd_val(pmd)); - pte = pte_mkspecial(pte); + pte = pte_mkwrite(pte); return __pmd(pte_val(pte)); } diff --git a/arch/sparc/include/asm/uaccess_32.h b/arch/sparc/include/asm/uaccess_32.h index 64ee103dc29d..57aca2792d29 100644 --- a/arch/sparc/include/asm/uaccess_32.h +++ b/arch/sparc/include/asm/uaccess_32.h @@ -205,31 +205,6 @@ int __put_user_bad(void); __gu_ret; \ }) -#define __get_user_check_ret(x, addr, size, type, retval) ({ \ - register unsigned long __gu_val __asm__ ("l1"); \ - if (__access_ok(addr, size)) { \ - switch (size) { \ - case 1: \ - __get_user_asm_ret(__gu_val, ub, addr, retval); \ - break; \ - case 2: \ - __get_user_asm_ret(__gu_val, uh, addr, retval); \ - break; \ - case 4: \ - __get_user_asm_ret(__gu_val, , addr, retval); \ - break; \ - case 8: \ - __get_user_asm_ret(__gu_val, d, addr, retval); \ - break; \ - default: \ - if (__get_user_bad()) \ - return retval; \ - } \ - x = (__force type) __gu_val; \ - } else \ - return retval; \ -}) - #define __get_user_nocheck(x, addr, size, type) ({ \ register int __gu_ret; \ register unsigned long __gu_val; \ @@ -247,20 +222,6 @@ int __put_user_bad(void); __gu_ret; \ }) -#define __get_user_nocheck_ret(x, addr, size, type, retval) ({ \ - register unsigned long __gu_val __asm__ ("l1"); \ - switch (size) { \ - case 1: __get_user_asm_ret(__gu_val, ub, addr, retval); break; \ - case 2: __get_user_asm_ret(__gu_val, uh, addr, retval); break; \ - case 4: __get_user_asm_ret(__gu_val, , addr, retval); break; \ - case 8: __get_user_asm_ret(__gu_val, d, addr, retval); break; \ - default: \ - if (__get_user_bad()) \ - return retval; \ - } \ - x = (__force type) __gu_val; \ -}) - #define __get_user_asm(x, size, addr, ret) \ __asm__ __volatile__( \ "/* Get user asm, inline. */\n" \ @@ -281,32 +242,6 @@ __asm__ __volatile__( \ : "=&r" (ret), "=&r" (x) : "m" (*__m(addr)), \ "i" (-EFAULT)) -#define __get_user_asm_ret(x, size, addr, retval) \ -if (__builtin_constant_p(retval) && retval == -EFAULT) \ - __asm__ __volatile__( \ - "/* Get user asm ret, inline. */\n" \ - "1:\t" "ld"#size " %1, %0\n\n\t" \ - ".section __ex_table,#alloc\n\t" \ - ".align 4\n\t" \ - ".word 1b,__ret_efault\n\n\t" \ - ".previous\n\t" \ - : "=&r" (x) : "m" (*__m(addr))); \ -else \ - __asm__ __volatile__( \ - "/* Get user asm ret, inline. */\n" \ - "1:\t" "ld"#size " %1, %0\n\n\t" \ - ".section .fixup,#alloc,#execinstr\n\t" \ - ".align 4\n" \ - "3:\n\t" \ - "ret\n\t" \ - " restore %%g0, %2, %%o0\n\n\t" \ - ".previous\n\t" \ - ".section __ex_table,#alloc\n\t" \ - ".align 4\n\t" \ - ".word 1b, 3b\n\n\t" \ - ".previous\n\t" \ - : "=&r" (x) : "m" (*__m(addr)), "i" (retval)) - int __get_user_bad(void); unsigned long __copy_user(void __user *to, const void __user *from, unsigned long size); diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h index ea6e9a20f3ff..e9a51d64974d 100644 --- a/arch/sparc/include/asm/uaccess_64.h +++ b/arch/sparc/include/asm/uaccess_64.h @@ -179,20 +179,6 @@ int __put_user_bad(void); __gu_ret; \ }) -#define __get_user_nocheck_ret(data, addr, size, type, retval) ({ \ - register unsigned long __gu_val __asm__ ("l1"); \ - switch (size) { \ - case 1: __get_user_asm_ret(__gu_val, ub, addr, retval); break; \ - case 2: __get_user_asm_ret(__gu_val, uh, addr, retval); break; \ - case 4: __get_user_asm_ret(__gu_val, uw, addr, retval); break; \ - case 8: __get_user_asm_ret(__gu_val, x, addr, retval); break; \ - default: \ - if (__get_user_bad()) \ - return retval; \ - } \ - data = (__force type) __gu_val; \ -}) - #define __get_user_asm(x, size, addr, ret) \ __asm__ __volatile__( \ "/* Get user asm, inline. */\n" \ @@ -214,32 +200,6 @@ __asm__ __volatile__( \ : "=r" (ret), "=r" (x) : "r" (__m(addr)), \ "i" (-EFAULT)) -#define __get_user_asm_ret(x, size, addr, retval) \ -if (__builtin_constant_p(retval) && retval == -EFAULT) \ - __asm__ __volatile__( \ - "/* Get user asm ret, inline. */\n" \ - "1:\t" "ld"#size "a [%1] %%asi, %0\n\n\t" \ - ".section __ex_table,\"a\"\n\t" \ - ".align 4\n\t" \ - ".word 1b,__ret_efault\n\n\t" \ - ".previous\n\t" \ - : "=r" (x) : "r" (__m(addr))); \ -else \ - __asm__ __volatile__( \ - "/* Get user asm ret, inline. */\n" \ - "1:\t" "ld"#size "a [%1] %%asi, %0\n\n\t" \ - ".section .fixup,#alloc,#execinstr\n\t" \ - ".align 4\n" \ - "3:\n\t" \ - "ret\n\t" \ - " restore %%g0, %2, %%o0\n\n\t" \ - ".previous\n\t" \ - ".section __ex_table,\"a\"\n\t" \ - ".align 4\n\t" \ - ".word 1b, 3b\n\n\t" \ - ".previous\n\t" \ - : "=r" (x) : "r" (__m(addr)), "i" (retval)) - int __get_user_bad(void); unsigned long __must_check ___copy_from_user(void *to, diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index e6a16c40be5f..d270ee91968e 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -81,6 +81,9 @@ #define SO_ATTACH_BPF 0x0034 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 0x0035 +#define SO_ATTACH_REUSEPORT_EBPF 0x0036 + /* Security levels - as per NRL IPv6 - don't actually do anything */ #define SO_SECURITY_AUTHENTICATION 0x5001 #define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002 diff --git a/arch/sparc/kernel/idprom.c b/arch/sparc/kernel/idprom.c index 6bd75012109d..f95dd11b75ea 100644 --- a/arch/sparc/kernel/idprom.c +++ b/arch/sparc/kernel/idprom.c @@ -9,6 +9,7 @@ #include <linux/types.h> #include <linux/init.h> #include <linux/export.h> +#include <linux/etherdevice.h> #include <asm/oplib.h> #include <asm/idprom.h> @@ -60,6 +61,12 @@ static void __init display_system_type(unsigned char machtype) { } #endif + +unsigned char *arch_get_platform_mac_address(void) +{ + return idprom->id_ethaddr; +} + /* Calculate the IDPROM checksum (xor of the data bytes). */ static unsigned char __init calc_idprom_cksum(struct idprom *idprom) { diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c index 6f80936e0eea..11228861d9b4 100644 --- a/arch/sparc/kernel/mdesc.c +++ b/arch/sparc/kernel/mdesc.c @@ -1033,25 +1033,9 @@ static ssize_t mdesc_read(struct file *file, char __user *buf, static loff_t mdesc_llseek(struct file *file, loff_t offset, int whence) { - struct mdesc_handle *hp; - - switch (whence) { - case SEEK_CUR: - offset += file->f_pos; - break; - case SEEK_SET: - break; - default: - return -EINVAL; - } - - hp = file->private_data; - if (offset > hp->handle_size) - return -EINVAL; - else - file->f_pos = offset; + struct mdesc_handle *hp = file->private_data; - return offset; + return no_seek_end_llseek_size(file, offset, whence, hp->handle_size); } /* mdesc_close() - /dev/mdesc is being closed, release the reference to diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 30e7ddb27a3a..c690c8e16a96 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -413,7 +413,7 @@ out: SYSCALL_DEFINE1(sparc64_personality, unsigned long, personality) { - int ret; + long ret; if (personality(current->personality) == PER_LINUX32 && personality(personality) == PER_LINUX) diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index dbabe5713a15..cb841a33da59 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -113,9 +113,6 @@ static unsigned int get_user_insn(unsigned long tpc) #ifdef CONFIG_TRANSPARENT_HUGEPAGE if (pmd_trans_huge(*pmdp)) { - if (pmd_trans_splitting(*pmdp)) - goto out_irq_enable; - pa = pmd_pfn(*pmdp) << PAGE_SHIFT; pa += tpc & ~HPAGE_MASK; diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c index 2e5c4fc2daa9..eb3d8e8ebc6b 100644 --- a/arch/sparc/mm/gup.c +++ b/arch/sparc/mm/gup.c @@ -56,8 +56,6 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, put_page(head); return 0; } - if (head != page) - get_huge_page_tail(page); pages[*nr] = page; (*nr)++; @@ -70,7 +68,7 @@ static int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { - struct page *head, *page, *tail; + struct page *head, *page; int refs; if (!(pmd_val(pmd) & _PAGE_VALID)) @@ -82,7 +80,6 @@ static int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, refs = 0; head = pmd_page(pmd); page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); - tail = page; do { VM_BUG_ON(compound_head(page) != head); pages[*nr] = page; @@ -103,15 +100,6 @@ static int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, return 0; } - /* Any tail page need their mapcount reference taken before we - * return. - */ - while (refs--) { - if (PageTail(tail)) - get_huge_page_tail(tail); - tail++; - } - return 1; } @@ -126,7 +114,7 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, pmd_t pmd = *pmdp; next = pmd_addr_end(addr, end); - if (pmd_none(pmd) || pmd_trans_splitting(pmd)) + if (pmd_none(pmd)) return 0; if (unlikely(pmd_large(pmd))) { if (!gup_huge_pmd(pmdp, pmd, addr, next, diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 3025bd57f7ab..6f216853f272 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -1267,13 +1267,6 @@ static int __init numa_parse_mdesc(void) int i, j, err, count; u64 node; - /* Some sane defaults for numa latency values */ - for (i = 0; i < MAX_NUMNODES; i++) { - for (j = 0; j < MAX_NUMNODES; j++) - numa_latency[i][j] = (i == j) ? - LOCAL_DISTANCE : REMOTE_DISTANCE; - } - node = mdesc_node_by_name(md, MDESC_NODE_NULL, "latency-groups"); if (node == MDESC_NODE_NULL) { mdesc_release(md); @@ -1369,10 +1362,18 @@ static int __init numa_parse_sun4u(void) static int __init bootmem_init_numa(void) { + int i, j; int err = -1; numadbg("bootmem_init_numa()\n"); + /* Some sane defaults for numa latency values */ + for (i = 0; i < MAX_NUMNODES; i++) { + for (j = 0; j < MAX_NUMNODES; j++) + numa_latency[i][j] = (i == j) ? + LOCAL_DISTANCE : REMOTE_DISTANCE; + } + if (numa_enabled) { if (tlb_type == hypervisor) err = numa_parse_mdesc(); diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 8ec7a4599c08..be05b4ae02b6 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -19,6 +19,7 @@ config TILE select VIRT_TO_BUS select SYS_HYPERVISOR select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS + select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAVE_NMI_SAFE_CMPXCHG select GENERIC_CLOCKEVENTS select MODULES_USE_ELF_RELA @@ -116,9 +117,6 @@ config ARCH_DISCONTIGMEM_DEFAULT config TRACE_IRQFLAGS_SUPPORT def_bool y -config STRICT_DEVMEM - def_bool y - # SMP is required for Tilera Linux. config SMP def_bool y diff --git a/arch/tile/include/asm/cmpxchg.h b/arch/tile/include/asm/cmpxchg.h index 0ccda3c425be..25d5899497be 100644 --- a/arch/tile/include/asm/cmpxchg.h +++ b/arch/tile/include/asm/cmpxchg.h @@ -127,8 +127,6 @@ long long _atomic64_cmpxchg(long long *v, long long o, long long n); #endif -#define tas(ptr) xchg((ptr), 1) - #endif /* __ASSEMBLY__ */ #endif /* _ASM_TILE_CMPXCHG_H */ diff --git a/arch/tile/include/asm/pgtable.h b/arch/tile/include/asm/pgtable.h index 2b05ccbebed9..96cecf55522e 100644 --- a/arch/tile/include/asm/pgtable.h +++ b/arch/tile/include/asm/pgtable.h @@ -489,16 +489,6 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define has_transparent_hugepage() 1 #define pmd_trans_huge pmd_huge_page - -static inline pmd_t pmd_mksplitting(pmd_t pmd) -{ - return pte_pmd(hv_pte_set_client2(pmd_pte(pmd))); -} - -static inline int pmd_trans_splitting(pmd_t pmd) -{ - return hv_pte_get_client2(pmd_pte(pmd)); -} #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ /* diff --git a/arch/um/Kconfig.common b/arch/um/Kconfig.common index d195a87ca542..cc0013475444 100644 --- a/arch/um/Kconfig.common +++ b/arch/um/Kconfig.common @@ -2,6 +2,7 @@ config UML bool default y select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_SECCOMP_FILTER select HAVE_UID16 select HAVE_FUTEX_CMPXCHG if FUTEX select GENERIC_IRQ_SHOW diff --git a/arch/um/Kconfig.um b/arch/um/Kconfig.um index 28a9885e3a37..4b2ed5858b2e 100644 --- a/arch/um/Kconfig.um +++ b/arch/um/Kconfig.um @@ -104,3 +104,19 @@ config PGTABLE_LEVELS int default 3 if 3_LEVEL_PGTABLES default 2 + +config SECCOMP + def_bool y + prompt "Enable seccomp to safely compute untrusted bytecode" + ---help--- + This kernel feature is useful for number crunching applications + that may need to compute untrusted bytecode during their + execution. By using pipes or other transports made available to + the process as file descriptors supporting the read/write + syscalls, it's possible to isolate those applications in + their own address space using seccomp. Once seccomp is + enabled via prctl(PR_SET_SECCOMP), it cannot be disabled + and the task is only allowed to execute a few safe syscalls + defined by each seccomp mode. + + If unsure, say Y. diff --git a/arch/um/drivers/hostaudio_kern.c b/arch/um/drivers/hostaudio_kern.c index f6b911cc3923..3a4b58730f5f 100644 --- a/arch/um/drivers/hostaudio_kern.c +++ b/arch/um/drivers/hostaudio_kern.c @@ -105,13 +105,9 @@ static ssize_t hostaudio_write(struct file *file, const char __user *buffer, printk(KERN_DEBUG "hostaudio: write called, count = %d\n", count); #endif - kbuf = kmalloc(count, GFP_KERNEL); - if (kbuf == NULL) - return -ENOMEM; - - err = -EFAULT; - if (copy_from_user(kbuf, buffer, count)) - goto out; + kbuf = memdup_user(buffer, count); + if (IS_ERR(kbuf)) + return PTR_ERR(kbuf); err = os_write_file(state->fd, kbuf, count); if (err < 0) diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index 29880c9b324e..b821b13d343a 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -748,19 +748,11 @@ static ssize_t mconsole_proc_write(struct file *file, { char *buf; - buf = kmalloc(count + 1, GFP_KERNEL); - if (buf == NULL) - return -ENOMEM; - - if (copy_from_user(buf, buffer, count)) { - count = -EFAULT; - goto out; - } - - buf[count] = '\0'; + buf = memdup_user_nul(buffer, count); + if (IS_ERR(buf)) + return PTR_ERR(buf); mconsole_notify(notify_socket, MCONSOLE_USER_NOTIFY, buf, count); - out: kfree(buf); return count; } diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index e8ab93c3e638..39ba20755e03 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -535,11 +535,7 @@ static int read_cow_bitmap(int fd, void *buf, int offset, int len) { int err; - err = os_seek_file(fd, offset); - if (err < 0) - return err; - - err = os_read_file(fd, buf, len); + err = os_pread_file(fd, buf, len, offset); if (err < 0) return err; @@ -1377,14 +1373,8 @@ static int update_bitmap(struct io_thread_req *req) if(req->cow_offset == -1) return 0; - n = os_seek_file(req->fds[1], req->cow_offset); - if(n < 0){ - printk("do_io - bitmap lseek failed : err = %d\n", -n); - return 1; - } - - n = os_write_file(req->fds[1], &req->bitmap_words, - sizeof(req->bitmap_words)); + n = os_pwrite_file(req->fds[1], &req->bitmap_words, + sizeof(req->bitmap_words), req->cow_offset); if(n != sizeof(req->bitmap_words)){ printk("do_io - bitmap update failed, err = %d fd = %d\n", -n, req->fds[1]); @@ -1399,7 +1389,6 @@ static void do_io(struct io_thread_req *req) char *buf; unsigned long len; int n, nsectors, start, end, bit; - int err; __u64 off; if (req->op == UBD_FLUSH) { @@ -1428,18 +1417,12 @@ static void do_io(struct io_thread_req *req) len = (end - start) * req->sectorsize; buf = &req->buffer[start * req->sectorsize]; - err = os_seek_file(req->fds[bit], off); - if(err < 0){ - printk("do_io - lseek failed : err = %d\n", -err); - req->error = 1; - return; - } if(req->op == UBD_READ){ n = 0; do { buf = &buf[n]; len -= n; - n = os_read_file(req->fds[bit], buf, len); + n = os_pread_file(req->fds[bit], buf, len, off); if (n < 0) { printk("do_io - read failed, err = %d " "fd = %d\n", -n, req->fds[bit]); @@ -1449,7 +1432,7 @@ static void do_io(struct io_thread_req *req) } while((n < len) && (n != 0)); if (n < len) memset(&buf[n], 0, len - n); } else { - n = os_write_file(req->fds[bit], buf, len); + n = os_pwrite_file(req->fds[bit], buf, len, off); if(n != len){ printk("do_io - write failed err = %d " "fd = %d\n", -n, req->fds[bit]); diff --git a/arch/um/include/asm/hardirq.h b/arch/um/include/asm/hardirq.h new file mode 100644 index 000000000000..756f0778e327 --- /dev/null +++ b/arch/um/include/asm/hardirq.h @@ -0,0 +1,23 @@ +#ifndef __ASM_UM_HARDIRQ_H +#define __ASM_UM_HARDIRQ_H + +#include <linux/cache.h> +#include <linux/threads.h> + +typedef struct { + unsigned int __softirq_pending; +} ____cacheline_aligned irq_cpustat_t; + +#include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */ +#include <linux/irq.h> + +#ifndef ack_bad_irq +static inline void ack_bad_irq(unsigned int irq) +{ + printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq); +} +#endif + +#define __ARCH_IRQ_EXIT_IRQS_DISABLED 1 + +#endif /* __ASM_UM_HARDIRQ_H */ diff --git a/arch/um/include/asm/page.h b/arch/um/include/asm/page.h index 71c5d132062a..e13d41c392ae 100644 --- a/arch/um/include/asm/page.h +++ b/arch/um/include/asm/page.h @@ -18,6 +18,7 @@ struct page; +#include <linux/pfn.h> #include <linux/types.h> #include <asm/vm-flags.h> @@ -52,7 +53,6 @@ typedef struct { unsigned long pgd; } pgd_t; #define pmd_val(x) ((x).pmd) #define __pmd(x) ((pmd_t) { (x) } ) -typedef unsigned long long pfn_t; typedef unsigned long long phys_t; #else @@ -76,7 +76,6 @@ typedef struct { unsigned long pmd; } pmd_t; #define pte_is_zero(p) (!((p).pte & ~_PAGE_NEWPAGE)) #define pte_set_val(p, phys, prot) (p).pte = (phys | pgprot_val(prot)) -typedef unsigned long pfn_t; typedef unsigned long phys_t; #endif @@ -109,8 +108,8 @@ extern unsigned long uml_physmem; #define __pa(virt) to_phys((void *) (unsigned long) (virt)) #define __va(phys) to_virt((unsigned long) (phys)) -#define phys_to_pfn(p) ((pfn_t) ((p) >> PAGE_SHIFT)) -#define pfn_to_phys(pfn) ((phys_t) ((pfn) << PAGE_SHIFT)) +#define phys_to_pfn(p) ((p) >> PAGE_SHIFT) +#define pfn_to_phys(pfn) PFN_PHYS(pfn) #define pfn_valid(pfn) ((pfn) < max_mapnr) #define virt_addr_valid(v) pfn_valid(phys_to_pfn(__pa(v))) diff --git a/arch/um/include/asm/pgtable-3level.h b/arch/um/include/asm/pgtable-3level.h index 2b4274e7c095..bae8523a162f 100644 --- a/arch/um/include/asm/pgtable-3level.h +++ b/arch/um/include/asm/pgtable-3level.h @@ -98,7 +98,7 @@ static inline unsigned long pte_pfn(pte_t pte) return phys_to_pfn(pte_val(pte)); } -static inline pte_t pfn_pte(pfn_t page_nr, pgprot_t pgprot) +static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) { pte_t pte; phys_t phys = pfn_to_phys(page_nr); @@ -107,7 +107,7 @@ static inline pte_t pfn_pte(pfn_t page_nr, pgprot_t pgprot) return pte; } -static inline pmd_t pfn_pmd(pfn_t page_nr, pgprot_t pgprot) +static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) { return __pmd((page_nr << PAGE_SHIFT) | pgprot_val(pgprot)); } diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h index 18eb9924dda3..7485398d0737 100644 --- a/arch/um/include/asm/pgtable.h +++ b/arch/um/include/asm/pgtable.h @@ -271,7 +271,7 @@ static inline int pte_same(pte_t pte_a, pte_t pte_b) #define phys_to_page(phys) pfn_to_page(phys_to_pfn(phys)) #define __virt_to_page(virt) phys_to_page(__pa(virt)) -#define page_to_phys(page) pfn_to_phys((pfn_t) page_to_pfn(page)) +#define page_to_phys(page) pfn_to_phys(page_to_pfn(page)) #define virt_to_page(addr) __virt_to_page((const unsigned long) addr) #define mk_pte(page, pgprot) \ diff --git a/arch/um/include/asm/syscall-generic.h b/arch/um/include/asm/syscall-generic.h new file mode 100644 index 000000000000..9fb9cf8cd39a --- /dev/null +++ b/arch/um/include/asm/syscall-generic.h @@ -0,0 +1,138 @@ +/* + * Access to user system call parameters and results + * + * See asm-generic/syscall.h for function descriptions. + * + * Copyright (C) 2015 Mickaël Salaün <mic@digikod.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __UM_SYSCALL_GENERIC_H +#define __UM_SYSCALL_GENERIC_H + +#include <asm/ptrace.h> +#include <linux/err.h> +#include <linux/sched.h> +#include <sysdep/ptrace.h> + +static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs) +{ + + return PT_REGS_SYSCALL_NR(regs); +} + +static inline void syscall_rollback(struct task_struct *task, + struct pt_regs *regs) +{ + /* do nothing */ +} + +static inline long syscall_get_error(struct task_struct *task, + struct pt_regs *regs) +{ + const long error = regs_return_value(regs); + + return IS_ERR_VALUE(error) ? error : 0; +} + +static inline long syscall_get_return_value(struct task_struct *task, + struct pt_regs *regs) +{ + return regs_return_value(regs); +} + +static inline void syscall_set_return_value(struct task_struct *task, + struct pt_regs *regs, + int error, long val) +{ + PT_REGS_SET_SYSCALL_RETURN(regs, (long) error ?: val); +} + +static inline void syscall_get_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned int i, unsigned int n, + unsigned long *args) +{ + const struct uml_pt_regs *r = ®s->regs; + + switch (i) { + case 0: + if (!n--) + break; + *args++ = UPT_SYSCALL_ARG1(r); + case 1: + if (!n--) + break; + *args++ = UPT_SYSCALL_ARG2(r); + case 2: + if (!n--) + break; + *args++ = UPT_SYSCALL_ARG3(r); + case 3: + if (!n--) + break; + *args++ = UPT_SYSCALL_ARG4(r); + case 4: + if (!n--) + break; + *args++ = UPT_SYSCALL_ARG5(r); + case 5: + if (!n--) + break; + *args++ = UPT_SYSCALL_ARG6(r); + case 6: + if (!n--) + break; + default: + BUG(); + break; + } +} + +static inline void syscall_set_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned int i, unsigned int n, + const unsigned long *args) +{ + struct uml_pt_regs *r = ®s->regs; + + switch (i) { + case 0: + if (!n--) + break; + UPT_SYSCALL_ARG1(r) = *args++; + case 1: + if (!n--) + break; + UPT_SYSCALL_ARG2(r) = *args++; + case 2: + if (!n--) + break; + UPT_SYSCALL_ARG3(r) = *args++; + case 3: + if (!n--) + break; + UPT_SYSCALL_ARG4(r) = *args++; + case 4: + if (!n--) + break; + UPT_SYSCALL_ARG5(r) = *args++; + case 5: + if (!n--) + break; + UPT_SYSCALL_ARG6(r) = *args++; + case 6: + if (!n--) + break; + default: + BUG(); + break; + } +} + +/* See arch/x86/um/asm/syscall.h for syscall_get_arch() definition. */ + +#endif /* __UM_SYSCALL_GENERIC_H */ diff --git a/arch/um/include/asm/thread_info.h b/arch/um/include/asm/thread_info.h index 53968aaf76f9..053baff03674 100644 --- a/arch/um/include/asm/thread_info.h +++ b/arch/um/include/asm/thread_info.h @@ -62,11 +62,13 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SYSCALL_AUDIT 6 #define TIF_RESTORE_SIGMASK 7 #define TIF_NOTIFY_RESUME 8 +#define TIF_SECCOMP 9 /* secure computing */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_MEMDIE (1 << TIF_MEMDIE) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) +#define _TIF_SECCOMP (1 << TIF_SECCOMP) #endif diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h index 868e6c3f83dd..de5d572225f3 100644 --- a/arch/um/include/shared/os.h +++ b/arch/um/include/shared/os.h @@ -146,6 +146,8 @@ extern int os_read_file(int fd, void *buf, int len); extern int os_write_file(int fd, const void *buf, int count); extern int os_sync_file(int fd); extern int os_file_size(const char *file, unsigned long long *size_out); +extern int os_pread_file(int fd, void *buf, int len, unsigned long long offset); +extern int os_pwrite_file(int fd, const void *buf, int count, unsigned long long offset); extern int os_file_modtime(const char *file, unsigned long *modtime); extern int os_pipe(int *fd, int stream, int close_on_exec); extern int os_set_fd_async(int fd); @@ -282,7 +284,6 @@ extern void initial_thread_cb_skas(void (*proc)(void *), void *arg); extern void halt_skas(void); extern void reboot_skas(void); -extern int get_syscall(struct uml_pt_regs *regs); /* irq.c */ extern int os_waiting_for_events(struct irq_fd *active_fds); diff --git a/arch/um/kernel/skas/syscall.c b/arch/um/kernel/skas/syscall.c index 1683b8efdfda..48b0dcbd87be 100644 --- a/arch/um/kernel/skas/syscall.c +++ b/arch/um/kernel/skas/syscall.c @@ -5,31 +5,38 @@ #include <linux/kernel.h> #include <linux/ptrace.h> +#include <linux/seccomp.h> #include <kern_util.h> #include <sysdep/ptrace.h> +#include <sysdep/ptrace_user.h> #include <sysdep/syscalls.h> -#include <os.h> void handle_syscall(struct uml_pt_regs *r) { struct pt_regs *regs = container_of(r, struct pt_regs, regs); - long result; int syscall; - if (syscall_trace_enter(regs)) { - result = -ENOSYS; + /* Initialize the syscall number and default return value. */ + UPT_SYSCALL_NR(r) = PT_SYSCALL_NR(r->gp); + PT_REGS_SET_SYSCALL_RETURN(regs, -ENOSYS); + + /* Do the secure computing check first; failures should be fast. */ + if (secure_computing() == -1) + return; + + if (syscall_trace_enter(regs)) goto out; - } - syscall = get_syscall(r); + /* Update the syscall number after orig_ax has potentially been updated + * with ptrace. + */ + UPT_SYSCALL_NR(r) = PT_SYSCALL_NR(r->gp); + syscall = UPT_SYSCALL_NR(r); - if ((syscall > __NR_syscall_max) || syscall < 0) - result = -ENOSYS; - else - result = EXECUTE_SYSCALL(syscall, regs); + if (syscall >= 0 && syscall <= __NR_syscall_max) + PT_REGS_SET_SYSCALL_RETURN(regs, + EXECUTE_SYSCALL(syscall, regs)); out: - PT_REGS_SET_SYSCALL_RETURN(regs, result); - syscall_trace_leave(regs); } diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c index 26e0164895e4..2db18cbbb0ea 100644 --- a/arch/um/os-Linux/file.c +++ b/arch/um/os-Linux/file.c @@ -264,6 +264,15 @@ int os_read_file(int fd, void *buf, int len) return n; } +int os_pread_file(int fd, void *buf, int len, unsigned long long offset) +{ + int n = pread(fd, buf, len, offset); + + if (n < 0) + return -errno; + return n; +} + int os_write_file(int fd, const void *buf, int len) { int n = write(fd, (void *) buf, len); @@ -282,6 +291,16 @@ int os_sync_file(int fd) return n; } +int os_pwrite_file(int fd, const void *buf, int len, unsigned long long offset) +{ + int n = pwrite(fd, (void *) buf, len, offset); + + if (n < 0) + return -errno; + return n; +} + + int os_file_size(const char *file, unsigned long long *size_out) { struct uml_stat buf; diff --git a/arch/um/os-Linux/mem.c b/arch/um/os-Linux/mem.c index 897e9ad0c108..8b1767668515 100644 --- a/arch/um/os-Linux/mem.c +++ b/arch/um/os-Linux/mem.c @@ -106,6 +106,17 @@ static int __init make_tempfile(const char *template) } } +#ifdef O_TMPFILE + fd = open(tempdir, O_CLOEXEC | O_RDWR | O_EXCL | O_TMPFILE, 0700); + /* + * If the running system does not support O_TMPFILE flag then retry + * without it. + */ + if (fd != -1 || (errno != EINVAL && errno != EISDIR && + errno != EOPNOTSUPP)) + return fd; +#endif + tempname = malloc(strlen(tempdir) + strlen(template) + 1); if (tempname == NULL) return -1; @@ -142,12 +153,6 @@ static int __init create_tmp_file(unsigned long long len) if (fd < 0) exit(1); - err = fchmod(fd, 0777); - if (err < 0) { - perror("fchmod"); - exit(1); - } - /* * Seek to len - 1 because writing a character there will * increase the file size by one byte, to the desired length. diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index c211153ca69a..7801666514ed 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -62,6 +62,7 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) static int signals_enabled; static unsigned int signals_pending; +static unsigned int signals_active = 0; void sig_handler(int sig, struct siginfo *si, mcontext_t *mc) { @@ -101,7 +102,12 @@ void timer_alarm_handler(int sig, struct siginfo *unused_si, mcontext_t *mc) block_signals(); + signals_active |= SIGALRM_MASK; + timer_real_alarm_handler(mc); + + signals_active &= ~SIGALRM_MASK; + set_signals(enabled); } @@ -286,8 +292,16 @@ void unblock_signals(void) if (save_pending & SIGIO_MASK) sig_handler_common(SIGIO, NULL, NULL); - if (save_pending & SIGALRM_MASK) + /* Do not reenter the handler */ + + if ((save_pending & SIGALRM_MASK) && (!(signals_active & SIGALRM_MASK))) timer_real_alarm_handler(NULL); + + /* Rerun the loop only if there is still pending SIGIO and not in TIMER handler */ + + if (!(signals_pending & SIGIO_MASK) && (signals_active & SIGALRM_MASK)) + return; + } } diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index b856c66ebd3a..23025d645160 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -172,13 +172,6 @@ static void handle_trap(int pid, struct uml_pt_regs *regs, handle_syscall(regs); } -int get_syscall(struct uml_pt_regs *regs) -{ - UPT_SYSCALL_NR(regs) = PT_SYSCALL_NR(regs->gp); - - return UPT_SYSCALL_NR(regs); -} - extern char __syscall_stub_start[]; static int userspace_tramp(void *stack) diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c index 47f1ff056a54..22a358ef1b0c 100644 --- a/arch/um/os-Linux/start_up.c +++ b/arch/um/os-Linux/start_up.c @@ -94,6 +94,8 @@ static int start_ptraced_child(void) { int pid, n, status; + fflush(stdout); + pid = fork(); if (pid == 0) ptrace_child(); diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig index c9faddc61100..877342640b6e 100644 --- a/arch/unicore32/Kconfig +++ b/arch/unicore32/Kconfig @@ -1,5 +1,6 @@ config UNICORE32 def_bool y + select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO select HAVE_MEMBLOCK @@ -33,9 +34,6 @@ config NO_IOPORT_MAP config STACKTRACE_SUPPORT def_bool y -config HAVE_LATENCYTOP_SUPPORT - def_bool y - config LOCKDEP_SUPPORT def_bool y diff --git a/arch/unicore32/Kconfig.debug b/arch/unicore32/Kconfig.debug index 1a3626239843..f075bbe1d46f 100644 --- a/arch/unicore32/Kconfig.debug +++ b/arch/unicore32/Kconfig.debug @@ -2,20 +2,6 @@ menu "Kernel hacking" source "lib/Kconfig.debug" -config STRICT_DEVMEM - bool "Filter access to /dev/mem" - depends on MMU - ---help--- - If this option is disabled, you allow userspace (root) access to all - of memory, including kernel and userspace memory. Accidental - access to this is obviously disastrous, but specific access can - be used by people debugging the kernel. - - If this option is switched on, the /dev/mem file only allows - userspace access to memory mapped peripherals. - - If in doubt, say Y. - config EARLY_PRINTK def_bool DEBUG_OCD help diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index db3622f22b61..4a10ba9e95da 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -24,6 +24,7 @@ config X86 select ARCH_DISCARD_MEMBLOCK select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS + select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FAST_MULTIPLIER select ARCH_HAS_GCOV_PROFILE_ALL @@ -82,6 +83,8 @@ config X86 select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP select HAVE_ARCH_KGDB select HAVE_ARCH_KMEMCHECK + select HAVE_ARCH_MMAP_RND_BITS if MMU + select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_SOFT_DIRTY if X86_64 select HAVE_ARCH_TRACEHOOK @@ -177,12 +180,23 @@ config LOCKDEP_SUPPORT config STACKTRACE_SUPPORT def_bool y -config HAVE_LATENCYTOP_SUPPORT - def_bool y - config MMU def_bool y +config ARCH_MMAP_RND_BITS_MIN + default 28 if 64BIT + default 8 + +config ARCH_MMAP_RND_BITS_MAX + default 32 if 64BIT + default 16 + +config ARCH_MMAP_RND_COMPAT_BITS_MIN + default 8 + +config ARCH_MMAP_RND_COMPAT_BITS_MAX + default 16 + config SBUS bool @@ -349,6 +363,17 @@ config X86_FEATURE_NAMES If in doubt, say Y. +config X86_FAST_FEATURE_TESTS + bool "Fast CPU feature tests" if EMBEDDED + default y + ---help--- + Some fast-paths in the kernel depend on the capabilities of the CPU. + Say Y here for the kernel to patch in the appropriate code at runtime + based on the capabilities of the CPU. The infrastructure for patching + code at runtime takes up some additional space; space-constrained + embedded systems may wish to say N here to produce smaller, slightly + slower code. + config X86_X2APIC bool "Support x2apic" depends on X86_LOCAL_APIC && X86_64 && (IRQ_REMAP || HYPERVISOR_GUEST) @@ -523,9 +548,10 @@ config X86_INTEL_QUARK config X86_INTEL_LPSS bool "Intel Low Power Subsystem Support" - depends on ACPI + depends on X86 && ACPI select COMMON_CLK select PINCTRL + select IOSF_MBI ---help--- Select to build support for Intel Low Power Subsystem such as found on Intel Lynxpoint PCH. Selecting this option enables @@ -687,6 +713,14 @@ config PARAVIRT_SPINLOCKS If you are unsure how to answer this question, answer Y. +config QUEUED_LOCK_STAT + bool "Paravirt queued spinlock statistics" + depends on PARAVIRT_SPINLOCKS && DEBUG_FS && QUEUED_SPINLOCKS + ---help--- + Enable the collection of statistical data on the slowpath + behavior of paravirtualized queued spinlocks and report + them on debugfs. + source "arch/x86/xen/Kconfig" config KVM_GUEST diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 137dfa96aa14..9b18ed97a8a2 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -5,23 +5,6 @@ config TRACE_IRQFLAGS_SUPPORT source "lib/Kconfig.debug" -config STRICT_DEVMEM - bool "Filter access to /dev/mem" - ---help--- - If this option is disabled, you allow userspace (root) access to all - of memory, including kernel and userspace memory. Accidental - access to this is obviously disastrous, but specific access can - be used by people debugging the kernel. Note that with PAT support - enabled, even in this case there are restrictions on /dev/mem - use due to the cache aliasing requirements. - - If this option is switched on, the /dev/mem file only allows - userspace access to PCI space and the BIOS code and data regions. - This is sufficient for dosemu and X and all common users of - /dev/mem. - - If in doubt, say Y. - config X86_VERBOSE_BOOTUP bool "Enable verbose x86 bootup info messages" default y @@ -69,7 +52,7 @@ config X86_PTDUMP_CORE def_bool n config X86_PTDUMP - bool "Export kernel pagetable layout to userspace via debugfs" + tristate "Export kernel pagetable layout to userspace via debugfs" depends on DEBUG_KERNEL select DEBUG_FS select X86_PTDUMP_CORE diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c index 722bacea040e..8baaff5af0b5 100644 --- a/arch/x86/crypto/chacha20_glue.c +++ b/arch/x86/crypto/chacha20_glue.c @@ -125,7 +125,7 @@ static struct crypto_alg alg = { static int __init chacha20_simd_mod_init(void) { - if (!cpu_has_ssse3) + if (!boot_cpu_has(X86_FEATURE_SSSE3)) return -ENODEV; #ifdef CONFIG_AS_AVX2 diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c index 81a595d75cf5..0e9871693f24 100644 --- a/arch/x86/crypto/crc32c-intel_glue.c +++ b/arch/x86/crypto/crc32c-intel_glue.c @@ -257,7 +257,7 @@ static int __init crc32c_intel_mod_init(void) if (!x86_match_cpu(crc32c_cpu_id)) return -ENODEV; #ifdef CONFIG_X86_64 - if (cpu_has_pclmulqdq) { + if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) { alg.update = crc32c_pcl_intel_update; alg.finup = crc32c_pcl_intel_finup; alg.digest = crc32c_pcl_intel_digest; diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index 440df0c7a2ee..a69321a77783 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -219,6 +219,29 @@ static int ghash_async_final(struct ahash_request *req) } } +static int ghash_async_import(struct ahash_request *req, const void *in) +{ + struct ahash_request *cryptd_req = ahash_request_ctx(req); + struct shash_desc *desc = cryptd_shash_desc(cryptd_req); + struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + + ghash_async_init(req); + memcpy(dctx, in, sizeof(*dctx)); + return 0; + +} + +static int ghash_async_export(struct ahash_request *req, void *out) +{ + struct ahash_request *cryptd_req = ahash_request_ctx(req); + struct shash_desc *desc = cryptd_shash_desc(cryptd_req); + struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + + memcpy(out, dctx, sizeof(*dctx)); + return 0; + +} + static int ghash_async_digest(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); @@ -288,8 +311,11 @@ static struct ahash_alg ghash_async_alg = { .final = ghash_async_final, .setkey = ghash_async_setkey, .digest = ghash_async_digest, + .export = ghash_async_export, + .import = ghash_async_import, .halg = { .digestsize = GHASH_DIGEST_SIZE, + .statesize = sizeof(struct ghash_desc_ctx), .base = { .cra_name = "ghash", .cra_driver_name = "ghash-clmulni", diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 3c71dd947c7b..e32206e09868 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -1,3 +1,5 @@ +#include <linux/jump_label.h> + /* x86 function call convention, 64-bit: @@ -232,3 +234,16 @@ For 32-bit we have the following conventions - kernel is built with #endif /* CONFIG_X86_64 */ +/* + * This does 'call enter_from_user_mode' unless we can avoid it based on + * kernel config or using the static jump infrastructure. + */ +.macro CALL_enter_from_user_mode +#ifdef CONFIG_CONTEXT_TRACKING +#ifdef HAVE_JUMP_LABEL + STATIC_JUMP_IF_FALSE .Lafter_call_\@, context_tracking_enabled, def=0 +#endif + call enter_from_user_mode +.Lafter_call_\@: +#endif +.endm diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index f3b6d54e0042..77d8c5112900 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -329,7 +329,8 @@ sysenter_past_esp: * Return back to the vDSO, which will pop ecx and edx. * Don't bother with DS and ES (they already contain __USER_DS). */ - ENABLE_INTERRUPTS_SYSEXIT + sti + sysexit .pushsection .fixup, "ax" 2: movl $0, PT_FS(%esp) @@ -552,11 +553,6 @@ ENTRY(native_iret) iret _ASM_EXTABLE(native_iret, iret_exc) END(native_iret) - -ENTRY(native_irq_enable_sysexit) - sti - sysexit -END(native_irq_enable_sysexit) #endif ENTRY(overflow) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index a55697d19824..9d34d3cfceb6 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -520,9 +520,7 @@ END(irq_entries_start) */ TRACE_IRQS_OFF -#ifdef CONFIG_CONTEXT_TRACKING - call enter_from_user_mode -#endif + CALL_enter_from_user_mode 1: /* @@ -1066,9 +1064,7 @@ ENTRY(error_entry) * (which can take locks). */ TRACE_IRQS_OFF -#ifdef CONFIG_CONTEXT_TRACKING - call enter_from_user_mode -#endif + CALL_enter_from_user_mode ret .Lerror_entry_done: diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 6a1ae3751e82..ff1c6d61f332 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -18,13 +18,6 @@ .section .entry.text, "ax" -#ifdef CONFIG_PARAVIRT -ENTRY(native_usergs_sysret32) - swapgs - sysretl -ENDPROC(native_usergs_sysret32) -#endif - /* * 32-bit SYSENTER instruction entry. * @@ -103,15 +96,15 @@ ENTRY(entry_SYSENTER_compat) * This needs to happen before enabling interrupts so that * we don't get preempted with NT set. * - * NB.: sysenter_fix_flags is a label with the code under it moved + * NB.: .Lsysenter_fix_flags is a label with the code under it moved * out-of-line as an optimization: NT is unlikely to be set in the * majority of the cases and instead of polluting the I$ unnecessarily, * we're keeping that code behind a branch which will predict as * not-taken and therefore its instructions won't be fetched. */ testl $X86_EFLAGS_NT, EFLAGS(%rsp) - jnz sysenter_fix_flags -sysenter_flags_fixed: + jnz .Lsysenter_fix_flags +.Lsysenter_flags_fixed: /* * User mode is traced as though IRQs are on, and SYSENTER @@ -126,10 +119,10 @@ sysenter_flags_fixed: "jmp .Lsyscall_32_done", X86_FEATURE_XENPV jmp sysret32_from_system_call -sysenter_fix_flags: +.Lsysenter_fix_flags: pushq $X86_EFLAGS_FIXED popfq - jmp sysenter_flags_fixed + jmp .Lsysenter_flags_fixed ENDPROC(entry_SYSENTER_compat) /* @@ -238,7 +231,8 @@ sysret32_from_system_call: xorq %r9, %r9 xorq %r10, %r10 movq RSP-ORIG_RAX(%rsp), %rsp - USERGS_SYSRET32 + swapgs + sysretl END(entry_SYSCALL_compat) /* diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index f17705e1332c..cb713df81180 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -383,3 +383,4 @@ 374 i386 userfaultfd sys_userfaultfd 375 i386 membarrier sys_membarrier 376 i386 mlock2 sys_mlock2 +377 i386 copy_file_range sys_copy_file_range diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 314a90bfc09c..dc1040a50bdc 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -332,6 +332,7 @@ 323 common userfaultfd sys_userfaultfd 324 common membarrier sys_membarrier 325 common mlock2 sys_mlock2 +326 common copy_file_range sys_copy_file_range # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index ca94fa649251..1a50e09c945b 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -17,8 +17,10 @@ #include <asm/vvar.h> #include <asm/unistd.h> #include <asm/msr.h> +#include <asm/pvclock.h> #include <linux/math64.h> #include <linux/time.h> +#include <linux/kernel.h> #define gtod (&VVAR(vsyscall_gtod_data)) @@ -36,12 +38,12 @@ static notrace cycle_t vread_hpet(void) } #endif -#ifndef BUILD_VDSO32 +#ifdef CONFIG_PARAVIRT_CLOCK +extern u8 pvclock_page + __attribute__((visibility("hidden"))); +#endif -#include <linux/kernel.h> -#include <asm/vsyscall.h> -#include <asm/fixmap.h> -#include <asm/pvclock.h> +#ifndef BUILD_VDSO32 notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) { @@ -60,75 +62,6 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) return ret; } -#ifdef CONFIG_PARAVIRT_CLOCK - -static notrace const struct pvclock_vsyscall_time_info *get_pvti(int cpu) -{ - const struct pvclock_vsyscall_time_info *pvti_base; - int idx = cpu / (PAGE_SIZE/PVTI_SIZE); - int offset = cpu % (PAGE_SIZE/PVTI_SIZE); - - BUG_ON(PVCLOCK_FIXMAP_BEGIN + idx > PVCLOCK_FIXMAP_END); - - pvti_base = (struct pvclock_vsyscall_time_info *) - __fix_to_virt(PVCLOCK_FIXMAP_BEGIN+idx); - - return &pvti_base[offset]; -} - -static notrace cycle_t vread_pvclock(int *mode) -{ - const struct pvclock_vsyscall_time_info *pvti; - cycle_t ret; - u64 last; - u32 version; - u8 flags; - unsigned cpu, cpu1; - - - /* - * Note: hypervisor must guarantee that: - * 1. cpu ID number maps 1:1 to per-CPU pvclock time info. - * 2. that per-CPU pvclock time info is updated if the - * underlying CPU changes. - * 3. that version is increased whenever underlying CPU - * changes. - * - */ - do { - cpu = __getcpu() & VGETCPU_CPU_MASK; - /* TODO: We can put vcpu id into higher bits of pvti.version. - * This will save a couple of cycles by getting rid of - * __getcpu() calls (Gleb). - */ - - pvti = get_pvti(cpu); - - version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags); - - /* - * Test we're still on the cpu as well as the version. - * We could have been migrated just after the first - * vgetcpu but before fetching the version, so we - * wouldn't notice a version change. - */ - cpu1 = __getcpu() & VGETCPU_CPU_MASK; - } while (unlikely(cpu != cpu1 || - (pvti->pvti.version & 1) || - pvti->pvti.version != version)); - - if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT))) - *mode = VCLOCK_NONE; - - /* refer to tsc.c read_tsc() comment for rationale */ - last = gtod->cycle_last; - - if (likely(ret >= last)) - return ret; - - return last; -} -#endif #else @@ -162,15 +95,77 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) return ret; } +#endif + #ifdef CONFIG_PARAVIRT_CLOCK +static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void) +{ + return (const struct pvclock_vsyscall_time_info *)&pvclock_page; +} static notrace cycle_t vread_pvclock(int *mode) { - *mode = VCLOCK_NONE; - return 0; -} -#endif + const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti; + cycle_t ret; + u64 tsc, pvti_tsc; + u64 last, delta, pvti_system_time; + u32 version, pvti_tsc_to_system_mul, pvti_tsc_shift; + /* + * Note: The kernel and hypervisor must guarantee that cpu ID + * number maps 1:1 to per-CPU pvclock time info. + * + * Because the hypervisor is entirely unaware of guest userspace + * preemption, it cannot guarantee that per-CPU pvclock time + * info is updated if the underlying CPU changes or that that + * version is increased whenever underlying CPU changes. + * + * On KVM, we are guaranteed that pvti updates for any vCPU are + * atomic as seen by *all* vCPUs. This is an even stronger + * guarantee than we get with a normal seqlock. + * + * On Xen, we don't appear to have that guarantee, but Xen still + * supplies a valid seqlock using the version field. + * + * We only do pvclock vdso timing at all if + * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to + * mean that all vCPUs have matching pvti and that the TSC is + * synced, so we can just look at vCPU 0's pvti. + */ + + do { + version = pvti->version; + + smp_rmb(); + + if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) { + *mode = VCLOCK_NONE; + return 0; + } + + tsc = rdtsc_ordered(); + pvti_tsc_to_system_mul = pvti->tsc_to_system_mul; + pvti_tsc_shift = pvti->tsc_shift; + pvti_system_time = pvti->system_time; + pvti_tsc = pvti->tsc_timestamp; + + /* Make sure that the version double-check is last. */ + smp_rmb(); + } while (unlikely((version & 1) || version != pvti->version)); + + delta = tsc - pvti_tsc; + ret = pvti_system_time + + pvclock_scale_delta(delta, pvti_tsc_to_system_mul, + pvti_tsc_shift); + + /* refer to vread_tsc() comment for rationale */ + last = gtod->cycle_last; + + if (likely(ret >= last)) + return ret; + + return last; +} #endif notrace static cycle_t vread_tsc(void) diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vdso-layout.lds.S index de2c921025f5..4158acc17df0 100644 --- a/arch/x86/entry/vdso/vdso-layout.lds.S +++ b/arch/x86/entry/vdso/vdso-layout.lds.S @@ -25,7 +25,7 @@ SECTIONS * segment. */ - vvar_start = . - 2 * PAGE_SIZE; + vvar_start = . - 3 * PAGE_SIZE; vvar_page = vvar_start; /* Place all vvars at the offsets in asm/vvar.h. */ @@ -36,6 +36,7 @@ SECTIONS #undef EMIT_VVAR hpet_page = vvar_start + PAGE_SIZE; + pvclock_page = vvar_start + 2 * PAGE_SIZE; . = SIZEOF_HEADERS; diff --git a/arch/x86/entry/vdso/vdso2c.c b/arch/x86/entry/vdso/vdso2c.c index 785d9922b106..491020b2826d 100644 --- a/arch/x86/entry/vdso/vdso2c.c +++ b/arch/x86/entry/vdso/vdso2c.c @@ -73,6 +73,7 @@ enum { sym_vvar_start, sym_vvar_page, sym_hpet_page, + sym_pvclock_page, sym_VDSO_FAKE_SECTION_TABLE_START, sym_VDSO_FAKE_SECTION_TABLE_END, }; @@ -80,6 +81,7 @@ enum { const int special_pages[] = { sym_vvar_page, sym_hpet_page, + sym_pvclock_page, }; struct vdso_sym { @@ -91,6 +93,7 @@ struct vdso_sym required_syms[] = { [sym_vvar_start] = {"vvar_start", true}, [sym_vvar_page] = {"vvar_page", true}, [sym_hpet_page] = {"hpet_page", true}, + [sym_pvclock_page] = {"pvclock_page", true}, [sym_VDSO_FAKE_SECTION_TABLE_START] = { "VDSO_FAKE_SECTION_TABLE_START", false }, diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 64df47148160..b8f69e264ac4 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -12,6 +12,7 @@ #include <linux/random.h> #include <linux/elf.h> #include <linux/cpu.h> +#include <asm/pvclock.h> #include <asm/vgtod.h> #include <asm/proto.h> #include <asm/vdso.h> @@ -100,6 +101,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr) .name = "[vvar]", .pages = no_pages, }; + struct pvclock_vsyscall_time_info *pvti; if (calculate_addr) { addr = vdso_addr(current->mm->start_stack, @@ -169,6 +171,18 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr) } #endif + pvti = pvclock_pvti_cpu0_va(); + if (pvti && image->sym_pvclock_page) { + ret = remap_pfn_range(vma, + text_start + image->sym_pvclock_page, + __pa(pvti) >> PAGE_SHIFT, + PAGE_SIZE, + PAGE_READONLY); + + if (ret) + goto up_fail; + } + up_fail: if (ret) current->mm->context.vdso = NULL; diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index a30316bf801a..c80f6b6f3da2 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -23,6 +23,11 @@ #define APIC_VERBOSE 1 #define APIC_DEBUG 2 +/* Macros for apic_extnmi which controls external NMI masking */ +#define APIC_EXTNMI_BSP 0 /* Default */ +#define APIC_EXTNMI_ALL 1 +#define APIC_EXTNMI_NONE 2 + /* * Define the default level of output to be very little * This can be turned up by using apic=verbose for more @@ -303,6 +308,7 @@ struct apic { unsigned int *apicid); /* ipi */ + void (*send_IPI)(int cpu, int vector); void (*send_IPI_mask)(const struct cpumask *mask, int vector); void (*send_IPI_mask_allbutself)(const struct cpumask *mask, int vector); diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index ae5fb83e6d91..3e8674288198 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -3,7 +3,6 @@ #include <linux/compiler.h> #include <linux/types.h> -#include <asm/processor.h> #include <asm/alternative.h> #include <asm/cmpxchg.h> #include <asm/rmwcc.h> diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h index a11c30b77fb5..a984111135b1 100644 --- a/arch/x86/include/asm/atomic64_32.h +++ b/arch/x86/include/asm/atomic64_32.h @@ -3,7 +3,6 @@ #include <linux/compiler.h> #include <linux/types.h> -#include <asm/processor.h> //#include <asm/cmpxchg.h> /* An 64bit atomic type */ diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index 4fa687a47a62..6b8d6e8cd449 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h @@ -27,7 +27,7 @@ #define BOOT_HEAP_SIZE 0x400000 #else /* !CONFIG_KERNEL_BZIP2 */ -#define BOOT_HEAP_SIZE 0x8000 +#define BOOT_HEAP_SIZE 0x10000 #endif /* !CONFIG_KERNEL_BZIP2 */ diff --git a/arch/x86/include/asm/calgary.h b/arch/x86/include/asm/calgary.h index 0d467b338835..a8303ebe089f 100644 --- a/arch/x86/include/asm/calgary.h +++ b/arch/x86/include/asm/calgary.h @@ -31,7 +31,7 @@ #include <asm/types.h> struct iommu_table { - struct cal_chipset_ops *chip_ops; /* chipset specific funcs */ + const struct cal_chipset_ops *chip_ops; /* chipset specific funcs */ unsigned long it_base; /* mapped address of tce table */ unsigned long it_hint; /* Hint for next alloc */ unsigned long *it_map; /* A simple allocation bitmap for now */ diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index f7e142926481..e4959d023af8 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -109,6 +109,6 @@ static inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new) #endif -#define system_has_cmpxchg_double() cpu_has_cx8 +#define system_has_cmpxchg_double() boot_cpu_has(X86_FEATURE_CX8) #endif /* _ASM_X86_CMPXCHG_32_H */ diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h index 1af94697aae5..caa23a34c963 100644 --- a/arch/x86/include/asm/cmpxchg_64.h +++ b/arch/x86/include/asm/cmpxchg_64.h @@ -18,6 +18,6 @@ static inline void set_64bit(volatile u64 *ptr, u64 val) cmpxchg_local((ptr), (o), (n)); \ }) -#define system_has_cmpxchg_double() cpu_has_cx16 +#define system_has_cmpxchg_double() boot_cpu_has(X86_FEATURE_CX16) #endif /* _ASM_X86_CMPXCHG_64_H */ diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index bf2caa1dedc5..678637ad7476 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -36,4 +36,7 @@ extern int _debug_hotplug_cpu(int cpu, int action); int mwait_usable(const struct cpuinfo_x86 *); +unsigned int x86_family(unsigned int sig); +unsigned int x86_model(unsigned int sig); +unsigned int x86_stepping(unsigned int sig); #endif /* _ASM_X86_CPU_H */ diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index f7ba9fbf12ee..7ad8c9464297 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -12,7 +12,7 @@ #include <asm/disabled-features.h> #endif -#define NCAPINTS 14 /* N 32-bit words worth of info */ +#define NCAPINTS 16 /* N 32-bit words worth of info */ #define NBUGINTS 1 /* N 32-bit bug flags */ /* @@ -181,22 +181,17 @@ /* * Auxiliary flags: Linux defined - For features scattered in various - * CPUID levels like 0x6, 0xA etc, word 7 + * CPUID levels like 0x6, 0xA etc, word 7. + * + * Reuse free bits when adding new feature flags! */ -#define X86_FEATURE_IDA ( 7*32+ 0) /* Intel Dynamic Acceleration */ -#define X86_FEATURE_ARAT ( 7*32+ 1) /* Always Running APIC Timer */ + #define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ #define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ -#define X86_FEATURE_PLN ( 7*32+ 5) /* Intel Power Limit Notification */ -#define X86_FEATURE_PTS ( 7*32+ 6) /* Intel Package Thermal Status */ -#define X86_FEATURE_DTHERM ( 7*32+ 7) /* Digital Thermal Sensor */ + #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ -#define X86_FEATURE_HWP ( 7*32+ 10) /* "hwp" Intel HWP */ -#define X86_FEATURE_HWP_NOTIFY ( 7*32+ 11) /* Intel HWP_NOTIFY */ -#define X86_FEATURE_HWP_ACT_WINDOW ( 7*32+ 12) /* Intel HWP_ACT_WINDOW */ -#define X86_FEATURE_HWP_EPP ( 7*32+13) /* Intel HWP_EPP */ -#define X86_FEATURE_HWP_PKG_REQ ( 7*32+14) /* Intel HWP_PKG_REQ */ + #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ /* Virtualization flags: Linux defined, word 8 */ @@ -205,16 +200,7 @@ #define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */ #define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */ #define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */ -#define X86_FEATURE_NPT ( 8*32+ 5) /* AMD Nested Page Table support */ -#define X86_FEATURE_LBRV ( 8*32+ 6) /* AMD LBR Virtualization support */ -#define X86_FEATURE_SVML ( 8*32+ 7) /* "svm_lock" AMD SVM locking MSR */ -#define X86_FEATURE_NRIPS ( 8*32+ 8) /* "nrip_save" AMD SVM next_rip save */ -#define X86_FEATURE_TSCRATEMSR ( 8*32+ 9) /* "tsc_scale" AMD TSC scaling support */ -#define X86_FEATURE_VMCBCLEAN ( 8*32+10) /* "vmcb_clean" AMD VMCB clean bits support */ -#define X86_FEATURE_FLUSHBYASID ( 8*32+11) /* AMD flush-by-ASID support */ -#define X86_FEATURE_DECODEASSISTS ( 8*32+12) /* AMD Decode Assists support */ -#define X86_FEATURE_PAUSEFILTER ( 8*32+13) /* AMD filtered pause intercept */ -#define X86_FEATURE_PFTHRESHOLD ( 8*32+14) /* AMD pause filter threshold */ + #define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */ #define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */ @@ -259,6 +245,30 @@ /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ #define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ +/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ +#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ +#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */ +#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */ +#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */ +#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */ +#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */ +#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */ +#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */ +#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */ +#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */ + +/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */ +#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */ +#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */ +#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */ +#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */ +#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */ +#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */ +#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */ +#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */ +#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */ +#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ + /* * BUG word(s) */ @@ -279,6 +289,26 @@ #include <asm/asm.h> #include <linux/bitops.h> +enum cpuid_leafs +{ + CPUID_1_EDX = 0, + CPUID_8000_0001_EDX, + CPUID_8086_0001_EDX, + CPUID_LNX_1, + CPUID_1_ECX, + CPUID_C000_0001_EDX, + CPUID_8000_0001_ECX, + CPUID_LNX_2, + CPUID_LNX_3, + CPUID_7_0_EBX, + CPUID_D_1_EAX, + CPUID_F_0_EDX, + CPUID_F_1_EDX, + CPUID_8000_0008_EBX, + CPUID_6_EAX, + CPUID_8000_000A_EDX, +}; + #ifdef CONFIG_X86_FEATURE_NAMES extern const char * const x86_cap_flags[NCAPINTS*32]; extern const char * const x86_power_flags[32]; @@ -356,60 +386,31 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; } while (0) #define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU) -#define cpu_has_de boot_cpu_has(X86_FEATURE_DE) #define cpu_has_pse boot_cpu_has(X86_FEATURE_PSE) #define cpu_has_tsc boot_cpu_has(X86_FEATURE_TSC) #define cpu_has_pge boot_cpu_has(X86_FEATURE_PGE) #define cpu_has_apic boot_cpu_has(X86_FEATURE_APIC) -#define cpu_has_sep boot_cpu_has(X86_FEATURE_SEP) -#define cpu_has_mtrr boot_cpu_has(X86_FEATURE_MTRR) -#define cpu_has_mmx boot_cpu_has(X86_FEATURE_MMX) #define cpu_has_fxsr boot_cpu_has(X86_FEATURE_FXSR) #define cpu_has_xmm boot_cpu_has(X86_FEATURE_XMM) #define cpu_has_xmm2 boot_cpu_has(X86_FEATURE_XMM2) -#define cpu_has_xmm3 boot_cpu_has(X86_FEATURE_XMM3) -#define cpu_has_ssse3 boot_cpu_has(X86_FEATURE_SSSE3) #define cpu_has_aes boot_cpu_has(X86_FEATURE_AES) #define cpu_has_avx boot_cpu_has(X86_FEATURE_AVX) #define cpu_has_avx2 boot_cpu_has(X86_FEATURE_AVX2) -#define cpu_has_ht boot_cpu_has(X86_FEATURE_HT) -#define cpu_has_nx boot_cpu_has(X86_FEATURE_NX) -#define cpu_has_xstore boot_cpu_has(X86_FEATURE_XSTORE) -#define cpu_has_xstore_enabled boot_cpu_has(X86_FEATURE_XSTORE_EN) -#define cpu_has_xcrypt boot_cpu_has(X86_FEATURE_XCRYPT) -#define cpu_has_xcrypt_enabled boot_cpu_has(X86_FEATURE_XCRYPT_EN) -#define cpu_has_ace2 boot_cpu_has(X86_FEATURE_ACE2) -#define cpu_has_ace2_enabled boot_cpu_has(X86_FEATURE_ACE2_EN) -#define cpu_has_phe boot_cpu_has(X86_FEATURE_PHE) -#define cpu_has_phe_enabled boot_cpu_has(X86_FEATURE_PHE_EN) -#define cpu_has_pmm boot_cpu_has(X86_FEATURE_PMM) -#define cpu_has_pmm_enabled boot_cpu_has(X86_FEATURE_PMM_EN) -#define cpu_has_ds boot_cpu_has(X86_FEATURE_DS) -#define cpu_has_pebs boot_cpu_has(X86_FEATURE_PEBS) #define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLUSH) -#define cpu_has_bts boot_cpu_has(X86_FEATURE_BTS) #define cpu_has_gbpages boot_cpu_has(X86_FEATURE_GBPAGES) #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) #define cpu_has_pat boot_cpu_has(X86_FEATURE_PAT) -#define cpu_has_xmm4_1 boot_cpu_has(X86_FEATURE_XMM4_1) -#define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2) #define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) -#define cpu_has_xsaveopt boot_cpu_has(X86_FEATURE_XSAVEOPT) #define cpu_has_xsaves boot_cpu_has(X86_FEATURE_XSAVES) #define cpu_has_osxsave boot_cpu_has(X86_FEATURE_OSXSAVE) #define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) -#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ) -#define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE) -#define cpu_has_perfctr_nb boot_cpu_has(X86_FEATURE_PERFCTR_NB) -#define cpu_has_perfctr_l2 boot_cpu_has(X86_FEATURE_PERFCTR_L2) -#define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8) -#define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16) -#define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU) -#define cpu_has_topoext boot_cpu_has(X86_FEATURE_TOPOEXT) -#define cpu_has_bpext boot_cpu_has(X86_FEATURE_BPEXT) - -#if __GNUC__ >= 4 +/* + * Do not add any more of those clumsy macros - use static_cpu_has_safe() for + * fast paths and boot_cpu_has() otherwise! + */ + +#if __GNUC__ >= 4 && defined(CONFIG_X86_FAST_FEATURE_TESTS) extern void warn_pre_alternatives(void); extern bool __static_cpu_has_safe(u16 bit); diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index f80d70009ff8..6d7d0e52ed5a 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -19,7 +19,6 @@ #include <asm/acpi.h> #include <asm/apicdef.h> #include <asm/page.h> -#include <asm/pvclock.h> #ifdef CONFIG_X86_32 #include <linux/threads.h> #include <asm/kmap_types.h> @@ -72,10 +71,6 @@ enum fixed_addresses { #ifdef CONFIG_X86_VSYSCALL_EMULATION VSYSCALL_PAGE = (FIXADDR_TOP - VSYSCALL_ADDR) >> PAGE_SHIFT, #endif -#ifdef CONFIG_PARAVIRT_CLOCK - PVCLOCK_FIXMAP_BEGIN, - PVCLOCK_FIXMAP_END = PVCLOCK_FIXMAP_BEGIN+PVCLOCK_VSYSCALL_NR_PAGES-1, -#endif #endif FIX_DBGP_BASE, FIX_EARLYCON_MEM_BASE, diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 3c3550c3a4a3..0fd440df63f1 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -42,6 +42,7 @@ extern void fpu__init_cpu_xstate(void); extern void fpu__init_system(struct cpuinfo_x86 *c); extern void fpu__init_check_bugs(void); extern void fpu__resume_cpu(void); +extern u64 fpu__get_supported_xfeatures_mask(void); /* * Debugging facility: @@ -224,18 +225,67 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu) #define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f" #define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f" -/* xstate instruction fault handler: */ -#define xstate_fault(__err) \ - \ - ".section .fixup,\"ax\"\n" \ - \ - "3: movl $-2,%[_err]\n" \ - " jmp 2b\n" \ - \ - ".previous\n" \ - \ - _ASM_EXTABLE(1b, 3b) \ - : [_err] "=r" (__err) +#define XSTATE_OP(op, st, lmask, hmask, err) \ + asm volatile("1:" op "\n\t" \ + "xor %[err], %[err]\n" \ + "2:\n\t" \ + ".pushsection .fixup,\"ax\"\n\t" \ + "3: movl $-2,%[err]\n\t" \ + "jmp 2b\n\t" \ + ".popsection\n\t" \ + _ASM_EXTABLE(1b, 3b) \ + : [err] "=r" (err) \ + : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ + : "memory") + +/* + * If XSAVES is enabled, it replaces XSAVEOPT because it supports a compact + * format and supervisor states in addition to modified optimization in + * XSAVEOPT. + * + * Otherwise, if XSAVEOPT is enabled, XSAVEOPT replaces XSAVE because XSAVEOPT + * supports modified optimization which is not supported by XSAVE. + * + * We use XSAVE as a fallback. + * + * The 661 label is defined in the ALTERNATIVE* macros as the address of the + * original instruction which gets replaced. We need to use it here as the + * address of the instruction where we might get an exception at. + */ +#define XSTATE_XSAVE(st, lmask, hmask, err) \ + asm volatile(ALTERNATIVE_2(XSAVE, \ + XSAVEOPT, X86_FEATURE_XSAVEOPT, \ + XSAVES, X86_FEATURE_XSAVES) \ + "\n" \ + "xor %[err], %[err]\n" \ + "3:\n" \ + ".pushsection .fixup,\"ax\"\n" \ + "4: movl $-2, %[err]\n" \ + "jmp 3b\n" \ + ".popsection\n" \ + _ASM_EXTABLE(661b, 4b) \ + : [err] "=r" (err) \ + : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ + : "memory") + +/* + * Use XRSTORS to restore context if it is enabled. XRSTORS supports compact + * XSAVE area format. + */ +#define XSTATE_XRESTORE(st, lmask, hmask, err) \ + asm volatile(ALTERNATIVE(XRSTOR, \ + XRSTORS, X86_FEATURE_XSAVES) \ + "\n" \ + "xor %[err], %[err]\n" \ + "3:\n" \ + ".pushsection .fixup,\"ax\"\n" \ + "4: movl $-2, %[err]\n" \ + "jmp 3b\n" \ + ".popsection\n" \ + _ASM_EXTABLE(661b, 4b) \ + : [err] "=r" (err) \ + : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ + : "memory") /* * This function is called only during boot time when x86 caps are not set @@ -246,22 +296,14 @@ static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate) u64 mask = -1; u32 lmask = mask; u32 hmask = mask >> 32; - int err = 0; + int err; WARN_ON(system_state != SYSTEM_BOOTING); - if (boot_cpu_has(X86_FEATURE_XSAVES)) - asm volatile("1:"XSAVES"\n\t" - "2:\n\t" - xstate_fault(err) - : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err) - : "memory"); + if (static_cpu_has_safe(X86_FEATURE_XSAVES)) + XSTATE_OP(XSAVES, xstate, lmask, hmask, err); else - asm volatile("1:"XSAVE"\n\t" - "2:\n\t" - xstate_fault(err) - : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err) - : "memory"); + XSTATE_OP(XSAVE, xstate, lmask, hmask, err); /* We should never fault when copying to a kernel buffer: */ WARN_ON_FPU(err); @@ -276,22 +318,14 @@ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate) u64 mask = -1; u32 lmask = mask; u32 hmask = mask >> 32; - int err = 0; + int err; WARN_ON(system_state != SYSTEM_BOOTING); - if (boot_cpu_has(X86_FEATURE_XSAVES)) - asm volatile("1:"XRSTORS"\n\t" - "2:\n\t" - xstate_fault(err) - : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err) - : "memory"); + if (static_cpu_has_safe(X86_FEATURE_XSAVES)) + XSTATE_OP(XRSTORS, xstate, lmask, hmask, err); else - asm volatile("1:"XRSTOR"\n\t" - "2:\n\t" - xstate_fault(err) - : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err) - : "memory"); + XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); /* We should never fault when copying from a kernel buffer: */ WARN_ON_FPU(err); @@ -305,33 +339,11 @@ static inline void copy_xregs_to_kernel(struct xregs_state *xstate) u64 mask = -1; u32 lmask = mask; u32 hmask = mask >> 32; - int err = 0; + int err; WARN_ON(!alternatives_patched); - /* - * If xsaves is enabled, xsaves replaces xsaveopt because - * it supports compact format and supervisor states in addition to - * modified optimization in xsaveopt. - * - * Otherwise, if xsaveopt is enabled, xsaveopt replaces xsave - * because xsaveopt supports modified optimization which is not - * supported by xsave. - * - * If none of xsaves and xsaveopt is enabled, use xsave. - */ - alternative_input_2( - "1:"XSAVE, - XSAVEOPT, - X86_FEATURE_XSAVEOPT, - XSAVES, - X86_FEATURE_XSAVES, - [xstate] "D" (xstate), "a" (lmask), "d" (hmask) : - "memory"); - asm volatile("2:\n\t" - xstate_fault(err) - : "0" (err) - : "memory"); + XSTATE_XSAVE(xstate, lmask, hmask, err); /* We should never fault when copying to a kernel buffer: */ WARN_ON_FPU(err); @@ -344,23 +356,9 @@ static inline void copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask) { u32 lmask = mask; u32 hmask = mask >> 32; - int err = 0; + int err; - /* - * Use xrstors to restore context if it is enabled. xrstors supports - * compacted format of xsave area which is not supported by xrstor. - */ - alternative_input( - "1: " XRSTOR, - XRSTORS, - X86_FEATURE_XSAVES, - "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask) - : "memory"); - - asm volatile("2:\n" - xstate_fault(err) - : "0" (err) - : "memory"); + XSTATE_XRESTORE(xstate, lmask, hmask, err); /* We should never fault when copying from a kernel buffer: */ WARN_ON_FPU(err); @@ -388,12 +386,10 @@ static inline int copy_xregs_to_user(struct xregs_state __user *buf) if (unlikely(err)) return -EFAULT; - __asm__ __volatile__(ASM_STAC "\n" - "1:"XSAVE"\n" - "2: " ASM_CLAC "\n" - xstate_fault(err) - : "D" (buf), "a" (-1), "d" (-1), "0" (err) - : "memory"); + stac(); + XSTATE_OP(XSAVE, buf, -1, -1, err); + clac(); + return err; } @@ -405,14 +401,12 @@ static inline int copy_user_to_xregs(struct xregs_state __user *buf, u64 mask) struct xregs_state *xstate = ((__force struct xregs_state *)buf); u32 lmask = mask; u32 hmask = mask >> 32; - int err = 0; - - __asm__ __volatile__(ASM_STAC "\n" - "1:"XRSTOR"\n" - "2: " ASM_CLAC "\n" - xstate_fault(err) - : "D" (xstate), "a" (lmask), "d" (hmask), "0" (err) - : "memory"); /* memory required? */ + int err; + + stac(); + XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); + clac(); + return err; } diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 3a6c89b70307..af30fdeb140d 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -20,15 +20,16 @@ /* Supported features which support lazy state saving */ #define XFEATURE_MASK_LAZY (XFEATURE_MASK_FP | \ - XFEATURE_MASK_SSE | \ + XFEATURE_MASK_SSE) + +/* Supported features which require eager state saving */ +#define XFEATURE_MASK_EAGER (XFEATURE_MASK_BNDREGS | \ + XFEATURE_MASK_BNDCSR | \ XFEATURE_MASK_YMM | \ - XFEATURE_MASK_OPMASK | \ + XFEATURE_MASK_OPMASK | \ XFEATURE_MASK_ZMM_Hi256 | \ XFEATURE_MASK_Hi16_ZMM) -/* Supported features which require eager state saving */ -#define XFEATURE_MASK_EAGER (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR) - /* All currently supported features */ #define XCNTXT_MASK (XFEATURE_MASK_LAZY | XFEATURE_MASK_EAGER) diff --git a/arch/x86/include/asm/intel_pt.h b/arch/x86/include/asm/intel_pt.h new file mode 100644 index 000000000000..e1a411786bf5 --- /dev/null +++ b/arch/x86/include/asm/intel_pt.h @@ -0,0 +1,10 @@ +#ifndef _ASM_X86_INTEL_PT_H +#define _ASM_X86_INTEL_PT_H + +#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) +void cpu_emergency_stop_pt(void); +#else +static inline void cpu_emergency_stop_pt(void) {} +#endif + +#endif /* _ASM_X86_INTEL_PT_H */ diff --git a/arch/x86/include/asm/iosf_mbi.h b/arch/x86/include/asm/iosf_mbi.h index b72ad0faa6c5..b41ee164930a 100644 --- a/arch/x86/include/asm/iosf_mbi.h +++ b/arch/x86/include/asm/iosf_mbi.h @@ -1,5 +1,5 @@ /* - * iosf_mbi.h: Intel OnChip System Fabric MailBox access support + * Intel OnChip System Fabric MailBox access support */ #ifndef IOSF_MBI_SYMS_H @@ -16,6 +16,18 @@ #define MBI_MASK_LO 0x000000FF #define MBI_ENABLE 0xF0 +/* IOSF SB read/write opcodes */ +#define MBI_MMIO_READ 0x00 +#define MBI_MMIO_WRITE 0x01 +#define MBI_CFG_READ 0x04 +#define MBI_CFG_WRITE 0x05 +#define MBI_CR_READ 0x06 +#define MBI_CR_WRITE 0x07 +#define MBI_REG_READ 0x10 +#define MBI_REG_WRITE 0x11 +#define MBI_ESRAM_READ 0x12 +#define MBI_ESRAM_WRITE 0x13 + /* Baytrail available units */ #define BT_MBI_UNIT_AUNIT 0x00 #define BT_MBI_UNIT_SMC 0x01 @@ -28,50 +40,13 @@ #define BT_MBI_UNIT_SATA 0xA3 #define BT_MBI_UNIT_PCIE 0xA6 -/* Baytrail read/write opcodes */ -#define BT_MBI_AUNIT_READ 0x10 -#define BT_MBI_AUNIT_WRITE 0x11 -#define BT_MBI_SMC_READ 0x10 -#define BT_MBI_SMC_WRITE 0x11 -#define BT_MBI_CPU_READ 0x10 -#define BT_MBI_CPU_WRITE 0x11 -#define BT_MBI_BUNIT_READ 0x10 -#define BT_MBI_BUNIT_WRITE 0x11 -#define BT_MBI_PMC_READ 0x06 -#define BT_MBI_PMC_WRITE 0x07 -#define BT_MBI_GFX_READ 0x00 -#define BT_MBI_GFX_WRITE 0x01 -#define BT_MBI_SMIO_READ 0x06 -#define BT_MBI_SMIO_WRITE 0x07 -#define BT_MBI_USB_READ 0x06 -#define BT_MBI_USB_WRITE 0x07 -#define BT_MBI_SATA_READ 0x00 -#define BT_MBI_SATA_WRITE 0x01 -#define BT_MBI_PCIE_READ 0x00 -#define BT_MBI_PCIE_WRITE 0x01 - /* Quark available units */ #define QRK_MBI_UNIT_HBA 0x00 #define QRK_MBI_UNIT_HB 0x03 #define QRK_MBI_UNIT_RMU 0x04 #define QRK_MBI_UNIT_MM 0x05 -#define QRK_MBI_UNIT_MMESRAM 0x05 #define QRK_MBI_UNIT_SOC 0x31 -/* Quark read/write opcodes */ -#define QRK_MBI_HBA_READ 0x10 -#define QRK_MBI_HBA_WRITE 0x11 -#define QRK_MBI_HB_READ 0x10 -#define QRK_MBI_HB_WRITE 0x11 -#define QRK_MBI_RMU_READ 0x10 -#define QRK_MBI_RMU_WRITE 0x11 -#define QRK_MBI_MM_READ 0x10 -#define QRK_MBI_MM_WRITE 0x11 -#define QRK_MBI_MMESRAM_READ 0x12 -#define QRK_MBI_MMESRAM_WRITE 0x13 -#define QRK_MBI_SOC_READ 0x06 -#define QRK_MBI_SOC_WRITE 0x07 - #if IS_ENABLED(CONFIG_IOSF_MBI) bool iosf_mbi_available(void); diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h index 615fa9061b57..cfc9a0d2d07c 100644 --- a/arch/x86/include/asm/ipi.h +++ b/arch/x86/include/asm/ipi.h @@ -119,6 +119,8 @@ static inline void native_apic_mem_write(APIC_ICR, cfg); } +extern void default_send_IPI_single(int cpu, int vector); +extern void default_send_IPI_single_phys(int cpu, int vector); extern void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector); extern void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index 5daeca3d0f9e..adc54c12cbd1 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -1,12 +1,18 @@ #ifndef _ASM_X86_JUMP_LABEL_H #define _ASM_X86_JUMP_LABEL_H -#ifndef __ASSEMBLY__ - -#include <linux/stringify.h> -#include <linux/types.h> -#include <asm/nops.h> -#include <asm/asm.h> +#ifndef HAVE_JUMP_LABEL +/* + * For better or for worse, if jump labels (the gcc extension) are missing, + * then the entire static branch patching infrastructure is compiled out. + * If that happens, the code in here will malfunction. Raise a compiler + * error instead. + * + * In theory, jump labels and the static branch patching infrastructure + * could be decoupled to fix this. + */ +#error asm/jump_label.h included on a non-jump-label kernel +#endif #define JUMP_LABEL_NOP_SIZE 5 @@ -16,6 +22,14 @@ # define STATIC_KEY_INIT_NOP GENERIC_NOP5_ATOMIC #endif +#include <asm/asm.h> +#include <asm/nops.h> + +#ifndef __ASSEMBLY__ + +#include <linux/stringify.h> +#include <linux/types.h> + static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { asm_volatile_goto("1:" @@ -59,5 +73,40 @@ struct jump_entry { jump_label_t key; }; -#endif /* __ASSEMBLY__ */ +#else /* __ASSEMBLY__ */ + +.macro STATIC_JUMP_IF_TRUE target, key, def +.Lstatic_jump_\@: + .if \def + /* Equivalent to "jmp.d32 \target" */ + .byte 0xe9 + .long \target - .Lstatic_jump_after_\@ +.Lstatic_jump_after_\@: + .else + .byte STATIC_KEY_INIT_NOP + .endif + .pushsection __jump_table, "aw" + _ASM_ALIGN + _ASM_PTR .Lstatic_jump_\@, \target, \key + .popsection +.endm + +.macro STATIC_JUMP_IF_FALSE target, key, def +.Lstatic_jump_\@: + .if \def + .byte STATIC_KEY_INIT_NOP + .else + /* Equivalent to "jmp.d32 \target" */ + .byte 0xe9 + .long \target - .Lstatic_jump_after_\@ +.Lstatic_jump_after_\@: + .endif + .pushsection __jump_table, "aw" + _ASM_ALIGN + _ASM_PTR .Lstatic_jump_\@, \target, \key + 1 + .popsection +.endm + +#endif /* __ASSEMBLY__ */ + #endif diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 30cfd64295a0..44adbb819041 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -25,6 +25,7 @@ #include <linux/pvclock_gtod.h> #include <linux/clocksource.h> #include <linux/irqbypass.h> +#include <linux/hyperv.h> #include <asm/pvclock-abi.h> #include <asm/desc.h> @@ -45,6 +46,31 @@ #define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS +/* x86-specific vcpu->requests bit members */ +#define KVM_REQ_MIGRATE_TIMER 8 +#define KVM_REQ_REPORT_TPR_ACCESS 9 +#define KVM_REQ_TRIPLE_FAULT 10 +#define KVM_REQ_MMU_SYNC 11 +#define KVM_REQ_CLOCK_UPDATE 12 +#define KVM_REQ_DEACTIVATE_FPU 13 +#define KVM_REQ_EVENT 14 +#define KVM_REQ_APF_HALT 15 +#define KVM_REQ_STEAL_UPDATE 16 +#define KVM_REQ_NMI 17 +#define KVM_REQ_PMU 18 +#define KVM_REQ_PMI 19 +#define KVM_REQ_SMI 20 +#define KVM_REQ_MASTERCLOCK_UPDATE 21 +#define KVM_REQ_MCLOCK_INPROGRESS 22 +#define KVM_REQ_SCAN_IOAPIC 23 +#define KVM_REQ_GLOBAL_CLOCK_UPDATE 24 +#define KVM_REQ_APIC_PAGE_RELOAD 25 +#define KVM_REQ_HV_CRASH 26 +#define KVM_REQ_IOAPIC_EOI_EXIT 27 +#define KVM_REQ_HV_RESET 28 +#define KVM_REQ_HV_EXIT 29 +#define KVM_REQ_HV_STIMER 30 + #define CR0_RESERVED_BITS \ (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ @@ -213,6 +239,10 @@ union kvm_mmu_page_role { }; }; +struct kvm_rmap_head { + unsigned long val; +}; + struct kvm_mmu_page { struct list_head link; struct hlist_node hash_link; @@ -230,7 +260,7 @@ struct kvm_mmu_page { bool unsync; int root_count; /* Currently serving as active root */ unsigned int unsync_children; - unsigned long parent_ptes; /* Reverse mapping for parent_pte */ + struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */ /* The page is obsolete if mmu_valid_gen != kvm->arch.mmu_valid_gen. */ unsigned long mmu_valid_gen; @@ -374,10 +404,38 @@ struct kvm_mtrr { struct list_head head; }; +/* Hyper-V SynIC timer */ +struct kvm_vcpu_hv_stimer { + struct hrtimer timer; + int index; + u64 config; + u64 count; + u64 exp_time; + struct hv_message msg; + bool msg_pending; +}; + +/* Hyper-V synthetic interrupt controller (SynIC)*/ +struct kvm_vcpu_hv_synic { + u64 version; + u64 control; + u64 msg_page; + u64 evt_page; + atomic64_t sint[HV_SYNIC_SINT_COUNT]; + atomic_t sint_to_gsi[HV_SYNIC_SINT_COUNT]; + DECLARE_BITMAP(auto_eoi_bitmap, 256); + DECLARE_BITMAP(vec_bitmap, 256); + bool active; +}; + /* Hyper-V per vcpu emulation context */ struct kvm_vcpu_hv { u64 hv_vapic; s64 runtime_offset; + struct kvm_vcpu_hv_synic synic; + struct kvm_hyperv_exit exit; + struct kvm_vcpu_hv_stimer stimer[HV_SYNIC_STIMER_COUNT]; + DECLARE_BITMAP(stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT); }; struct kvm_vcpu_arch { @@ -400,7 +458,8 @@ struct kvm_vcpu_arch { u64 efer; u64 apic_base; struct kvm_lapic *apic; /* kernel irqchip context */ - u64 eoi_exit_bitmap[4]; + bool apicv_active; + DECLARE_BITMAP(ioapic_handled_vectors, 256); unsigned long apic_attention; int32_t apic_arb_prio; int mp_state; @@ -589,7 +648,7 @@ struct kvm_lpage_info { }; struct kvm_arch_memory_slot { - unsigned long *rmap[KVM_NR_PAGE_SIZES]; + struct kvm_rmap_head *rmap[KVM_NR_PAGE_SIZES]; struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; }; @@ -831,10 +890,11 @@ struct kvm_x86_ops { void (*enable_nmi_window)(struct kvm_vcpu *vcpu); void (*enable_irq_window)(struct kvm_vcpu *vcpu); void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); - int (*cpu_uses_apicv)(struct kvm_vcpu *vcpu); + bool (*get_enable_apicv)(void); + void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu); void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); void (*hwapic_isr_update)(struct kvm *kvm, int isr); - void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu); + void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa); void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); @@ -1086,6 +1146,8 @@ gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, struct x86_exception *exception); +void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu); + int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code, @@ -1231,6 +1293,9 @@ u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc); unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu); bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); +void kvm_make_mclock_inprogress_request(struct kvm *kvm); +void kvm_make_scan_ioapic_request(struct kvm *kvm); + void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, struct kvm_async_pf *work); void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, diff --git a/arch/x86/include/asm/lguest.h b/arch/x86/include/asm/lguest.h index 3bbc07a57a31..73d0c9b92087 100644 --- a/arch/x86/include/asm/lguest.h +++ b/arch/x86/include/asm/lguest.h @@ -12,7 +12,9 @@ #define GUEST_PL 1 /* Page for Switcher text itself, then two pages per cpu */ -#define TOTAL_SWITCHER_PAGES (1 + 2 * nr_cpu_ids) +#define SWITCHER_TEXT_PAGES (1) +#define SWITCHER_STACK_PAGES (2 * nr_cpu_ids) +#define TOTAL_SWITCHER_PAGES (SWITCHER_TEXT_PAGES + SWITCHER_STACK_PAGES) /* Where we map the Switcher, in both Host and Guest. */ extern unsigned long switcher_addr; diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 34e62b1dcfce..1e1b07a5a738 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -1,6 +1,7 @@ #ifndef _ASM_X86_MICROCODE_H #define _ASM_X86_MICROCODE_H +#include <asm/cpu.h> #include <linux/earlycpio.h> #define native_rdmsr(msr, val1, val2) \ @@ -95,14 +96,14 @@ static inline void __exit exit_amd_microcode(void) {} /* * In early loading microcode phase on BSP, boot_cpu_data is not set up yet. - * x86_vendor() gets vendor id for BSP. + * x86_cpuid_vendor() gets vendor id for BSP. * * In 32 bit AP case, accessing boot_cpu_data needs linear address. To simplify - * coding, we still use x86_vendor() to get vendor id for AP. + * coding, we still use x86_cpuid_vendor() to get vendor id for AP. * - * x86_vendor() gets vendor information directly from CPUID. + * x86_cpuid_vendor() gets vendor information directly from CPUID. */ -static inline int x86_vendor(void) +static inline int x86_cpuid_vendor(void) { u32 eax = 0x00000000; u32 ebx, ecx = 0, edx; @@ -118,40 +119,14 @@ static inline int x86_vendor(void) return X86_VENDOR_UNKNOWN; } -static inline unsigned int __x86_family(unsigned int sig) -{ - unsigned int x86; - - x86 = (sig >> 8) & 0xf; - - if (x86 == 0xf) - x86 += (sig >> 20) & 0xff; - - return x86; -} - -static inline unsigned int x86_family(void) +static inline unsigned int x86_cpuid_family(void) { u32 eax = 0x00000001; u32 ebx, ecx = 0, edx; native_cpuid(&eax, &ebx, &ecx, &edx); - return __x86_family(eax); -} - -static inline unsigned int x86_model(unsigned int sig) -{ - unsigned int x86, model; - - x86 = __x86_family(sig); - - model = (sig >> 4) & 0xf; - - if (x86 == 0x6 || x86 == 0xf) - model += ((sig >> 16) & 0xf) << 4; - - return model; + return x86_family(eax); } #ifdef CONFIG_MICROCODE diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 379cd3658799..bfd9b2a35a0b 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -116,8 +116,36 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, #endif cpumask_set_cpu(cpu, mm_cpumask(next)); - /* Re-load page tables */ + /* + * Re-load page tables. + * + * This logic has an ordering constraint: + * + * CPU 0: Write to a PTE for 'next' + * CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI. + * CPU 1: set bit 1 in next's mm_cpumask + * CPU 1: load from the PTE that CPU 0 writes (implicit) + * + * We need to prevent an outcome in which CPU 1 observes + * the new PTE value and CPU 0 observes bit 1 clear in + * mm_cpumask. (If that occurs, then the IPI will never + * be sent, and CPU 0's TLB will contain a stale entry.) + * + * The bad outcome can occur if either CPU's load is + * reordered before that CPU's store, so both CPUs must + * execute full barriers to prevent this from happening. + * + * Thus, switch_mm needs a full barrier between the + * store to mm_cpumask and any operation that could load + * from next->pgd. TLB fills are special and can happen + * due to instruction fetches or for no reason at all, + * and neither LOCK nor MFENCE orders them. + * Fortunately, load_cr3() is serializing and gives the + * ordering guarantee we need. + * + */ load_cr3(next->pgd); + trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); /* Stop flush ipis for the previous mm */ @@ -156,10 +184,14 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, * schedule, protecting us from simultaneous changes. */ cpumask_set_cpu(cpu, mm_cpumask(next)); + /* * We were in lazy tlb mode and leave_mm disabled * tlb flush IPI delivery. We must reload CR3 * to make sure to use no freed page tables. + * + * As above, load_cr3() is serializing and orders TLB + * fills with respect to the mm_cpumask write. */ load_cr3(next->pgd); trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); diff --git a/arch/x86/include/asm/msi.h b/arch/x86/include/asm/msi.h index 93724cc62177..eb4b09b41df5 100644 --- a/arch/x86/include/asm/msi.h +++ b/arch/x86/include/asm/msi.h @@ -1,7 +1,13 @@ #ifndef _ASM_X86_MSI_H #define _ASM_X86_MSI_H #include <asm/hw_irq.h> +#include <asm/irqdomain.h> typedef struct irq_alloc_info msi_alloc_info_t; +int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec, + msi_alloc_info_t *arg); + +void pci_msi_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc); + #endif /* _ASM_X86_MSI_H */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 690b4027e17c..b05402ef3b84 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -321,6 +321,7 @@ #define MSR_F15H_PERF_CTR 0xc0010201 #define MSR_F15H_NB_PERF_CTL 0xc0010240 #define MSR_F15H_NB_PERF_CTR 0xc0010241 +#define MSR_F15H_IC_CFG 0xc0011021 /* Fam 10h MSRs */ #define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058 diff --git a/arch/x86/include/asm/msr-trace.h b/arch/x86/include/asm/msr-trace.h new file mode 100644 index 000000000000..7567225747d8 --- /dev/null +++ b/arch/x86/include/asm/msr-trace.h @@ -0,0 +1,57 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM msr + +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE msr-trace + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH asm/ + +#if !defined(_TRACE_MSR_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_MSR_H + +#include <linux/tracepoint.h> + +/* + * Tracing for x86 model specific registers. Directly maps to the + * RDMSR/WRMSR instructions. + */ + +DECLARE_EVENT_CLASS(msr_trace_class, + TP_PROTO(unsigned msr, u64 val, int failed), + TP_ARGS(msr, val, failed), + TP_STRUCT__entry( + __field( unsigned, msr ) + __field( u64, val ) + __field( int, failed ) + ), + TP_fast_assign( + __entry->msr = msr; + __entry->val = val; + __entry->failed = failed; + ), + TP_printk("%x, value %llx%s", + __entry->msr, + __entry->val, + __entry->failed ? " #GP" : "") +); + +DEFINE_EVENT(msr_trace_class, read_msr, + TP_PROTO(unsigned msr, u64 val, int failed), + TP_ARGS(msr, val, failed) +); + +DEFINE_EVENT(msr_trace_class, write_msr, + TP_PROTO(unsigned msr, u64 val, int failed), + TP_ARGS(msr, val, failed) +); + +DEFINE_EVENT(msr_trace_class, rdpmc, + TP_PROTO(unsigned msr, u64 val, int failed), + TP_ARGS(msr, val, failed) +); + +#endif /* _TRACE_MSR_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 77d8b284e4a7..93fb7c1cffda 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -32,6 +32,16 @@ struct msr_regs_info { int err; }; +struct saved_msr { + bool valid; + struct msr_info info; +}; + +struct saved_msrs { + unsigned int num; + struct saved_msr *array; +}; + static inline unsigned long long native_read_tscp(unsigned int *aux) { unsigned long low, high; @@ -57,11 +67,34 @@ static inline unsigned long long native_read_tscp(unsigned int *aux) #define EAX_EDX_RET(val, low, high) "=A" (val) #endif +#ifdef CONFIG_TRACEPOINTS +/* + * Be very careful with includes. This header is prone to include loops. + */ +#include <asm/atomic.h> +#include <linux/tracepoint-defs.h> + +extern struct tracepoint __tracepoint_read_msr; +extern struct tracepoint __tracepoint_write_msr; +extern struct tracepoint __tracepoint_rdpmc; +#define msr_tracepoint_active(t) static_key_false(&(t).key) +extern void do_trace_write_msr(unsigned msr, u64 val, int failed); +extern void do_trace_read_msr(unsigned msr, u64 val, int failed); +extern void do_trace_rdpmc(unsigned msr, u64 val, int failed); +#else +#define msr_tracepoint_active(t) false +static inline void do_trace_write_msr(unsigned msr, u64 val, int failed) {} +static inline void do_trace_read_msr(unsigned msr, u64 val, int failed) {} +static inline void do_trace_rdpmc(unsigned msr, u64 val, int failed) {} +#endif + static inline unsigned long long native_read_msr(unsigned int msr) { DECLARE_ARGS(val, low, high); asm volatile("rdmsr" : EAX_EDX_RET(val, low, high) : "c" (msr)); + if (msr_tracepoint_active(__tracepoint_read_msr)) + do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), 0); return EAX_EDX_VAL(val, low, high); } @@ -78,6 +111,8 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr, _ASM_EXTABLE(2b, 3b) : [err] "=r" (*err), EAX_EDX_RET(val, low, high) : "c" (msr), [fault] "i" (-EIO)); + if (msr_tracepoint_active(__tracepoint_read_msr)) + do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), *err); return EAX_EDX_VAL(val, low, high); } @@ -85,6 +120,8 @@ static inline void native_write_msr(unsigned int msr, unsigned low, unsigned high) { asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high) : "memory"); + if (msr_tracepoint_active(__tracepoint_read_msr)) + do_trace_write_msr(msr, ((u64)high << 32 | low), 0); } /* Can be uninlined because referenced by paravirt */ @@ -102,6 +139,8 @@ notrace static inline int native_write_msr_safe(unsigned int msr, : "c" (msr), "0" (low), "d" (high), [fault] "i" (-EIO) : "memory"); + if (msr_tracepoint_active(__tracepoint_read_msr)) + do_trace_write_msr(msr, ((u64)high << 32 | low), err); return err; } @@ -160,6 +199,8 @@ static inline unsigned long long native_read_pmc(int counter) DECLARE_ARGS(val, low, high); asm volatile("rdpmc" : EAX_EDX_RET(val, low, high) : "c" (counter)); + if (msr_tracepoint_active(__tracepoint_rdpmc)) + do_trace_rdpmc(counter, EAX_EDX_VAL(val, low, high), 0); return EAX_EDX_VAL(val, low, high); } @@ -190,7 +231,7 @@ static inline void wrmsr(unsigned msr, unsigned low, unsigned high) static inline void wrmsrl(unsigned msr, u64 val) { - native_write_msr(msr, (u32)val, (u32)(val >> 32)); + native_write_msr(msr, (u32)(val & 0xffffffffULL), (u32)(val >> 32)); } /* wrmsr with exception handling */ diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index cc071c6f7d4d..7bd0099384ca 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -5,9 +5,9 @@ #include <linux/types.h> /* PAGE_SHIFT determines the page size */ -#define PAGE_SHIFT 12 -#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE-1)) +#define PAGE_SHIFT 12 +#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE-1)) #define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT) #define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1)) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index c759b3cca663..f6192502149e 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -291,15 +291,6 @@ static inline void slow_down_io(void) #endif } -#ifdef CONFIG_SMP -static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip, - unsigned long start_esp) -{ - PVOP_VCALL3(pv_apic_ops.startup_ipi_hook, - phys_apicid, start_eip, start_esp); -} -#endif - static inline void paravirt_activate_mm(struct mm_struct *prev, struct mm_struct *next) { @@ -381,23 +372,6 @@ static inline void pte_update(struct mm_struct *mm, unsigned long addr, { PVOP_VCALL3(pv_mmu_ops.pte_update, mm, addr, ptep); } -static inline void pmd_update(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp) -{ - PVOP_VCALL3(pv_mmu_ops.pmd_update, mm, addr, pmdp); -} - -static inline void pte_update_defer(struct mm_struct *mm, unsigned long addr, - pte_t *ptep) -{ - PVOP_VCALL3(pv_mmu_ops.pte_update_defer, mm, addr, ptep); -} - -static inline void pmd_update_defer(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp) -{ - PVOP_VCALL3(pv_mmu_ops.pmd_update_defer, mm, addr, pmdp); -} static inline pte_t __pte(pteval_t val) { @@ -928,23 +902,11 @@ extern void default_banner(void); call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \ PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) -#define USERGS_SYSRET32 \ - PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32), \ - CLBR_NONE, \ - jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret32)) - #ifdef CONFIG_X86_32 #define GET_CR0_INTO_EAX \ push %ecx; push %edx; \ call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \ pop %edx; pop %ecx - -#define ENABLE_INTERRUPTS_SYSEXIT \ - PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \ - CLBR_NONE, \ - jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit)) - - #else /* !CONFIG_X86_32 */ /* diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 3d44191185f8..77db5616a473 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -162,15 +162,6 @@ struct pv_cpu_ops { u64 (*read_pmc)(int counter); -#ifdef CONFIG_X86_32 - /* - * Atomically enable interrupts and return to userspace. This - * is only used in 32-bit kernels. 64-bit kernels use - * usergs_sysret32 instead. - */ - void (*irq_enable_sysexit)(void); -#endif - /* * Switch to usermode gs and return to 64-bit usermode using * sysret. Only used in 64-bit kernels to return to 64-bit @@ -179,14 +170,6 @@ struct pv_cpu_ops { */ void (*usergs_sysret64)(void); - /* - * Switch to usermode gs and return to 32-bit usermode using - * sysret. Used to return to 32-on-64 compat processes. - * Other usermode register state, including %esp, must already - * be restored. - */ - void (*usergs_sysret32)(void); - /* Normal iret. Jump to this with the standard iret stack frame set up. */ void (*iret)(void); @@ -220,14 +203,6 @@ struct pv_irq_ops { #endif }; -struct pv_apic_ops { -#ifdef CONFIG_X86_LOCAL_APIC - void (*startup_ipi_hook)(int phys_apicid, - unsigned long start_eip, - unsigned long start_esp); -#endif -}; - struct pv_mmu_ops { unsigned long (*read_cr2)(void); void (*write_cr2)(unsigned long); @@ -279,12 +254,6 @@ struct pv_mmu_ops { pmd_t *pmdp, pmd_t pmdval); void (*pte_update)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); - void (*pte_update_defer)(struct mm_struct *mm, - unsigned long addr, pte_t *ptep); - void (*pmd_update)(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp); - void (*pmd_update_defer)(struct mm_struct *mm, - unsigned long addr, pmd_t *pmdp); pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); @@ -359,7 +328,6 @@ struct paravirt_patch_template { struct pv_time_ops pv_time_ops; struct pv_cpu_ops pv_cpu_ops; struct pv_irq_ops pv_irq_ops; - struct pv_apic_ops pv_apic_ops; struct pv_mmu_ops pv_mmu_ops; struct pv_lock_ops pv_lock_ops; }; @@ -369,7 +337,6 @@ extern struct pv_init_ops pv_init_ops; extern struct pv_time_ops pv_time_ops; extern struct pv_cpu_ops pv_cpu_ops; extern struct pv_irq_ops pv_irq_ops; -extern struct pv_apic_ops pv_apic_ops; extern struct pv_mmu_ops pv_mmu_ops; extern struct pv_lock_ops pv_lock_ops; @@ -407,10 +374,8 @@ extern struct pv_lock_ops pv_lock_ops; __visible extern const char start_##ops##_##name[], end_##ops##_##name[]; \ asm(NATIVE_LABEL("start_", ops, name) code NATIVE_LABEL("end_", ops, name)) -unsigned paravirt_patch_nop(void); unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len); unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len); -unsigned paravirt_patch_ignore(unsigned len); unsigned paravirt_patch_call(void *insnbuf, const void *target, u16 tgt_clobbers, unsigned long addr, u16 site_clobbers, diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 6ec0c8b2e9df..0687c4748b8f 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -69,9 +69,6 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page); #define pmd_clear(pmd) native_pmd_clear(pmd) #define pte_update(mm, addr, ptep) do { } while (0) -#define pte_update_defer(mm, addr, ptep) do { } while (0) -#define pmd_update(mm, addr, ptep) do { } while (0) -#define pmd_update_defer(mm, addr, ptep) do { } while (0) #define pgd_val(x) native_pgd_val(x) #define __pgd(x) native_make_pgd(x) @@ -165,20 +162,22 @@ static inline int pmd_large(pmd_t pte) } #ifdef CONFIG_TRANSPARENT_HUGEPAGE -static inline int pmd_trans_splitting(pmd_t pmd) -{ - return pmd_val(pmd) & _PAGE_SPLITTING; -} - static inline int pmd_trans_huge(pmd_t pmd) { - return pmd_val(pmd) & _PAGE_PSE; + return (pmd_val(pmd) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE; } static inline int has_transparent_hugepage(void) { return cpu_has_pse; } + +#ifdef __HAVE_ARCH_PTE_DEVMAP +static inline int pmd_devmap(pmd_t pmd) +{ + return !!(pmd_val(pmd) & _PAGE_DEVMAP); +} +#endif #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ static inline pte_t pte_set_flags(pte_t pte, pteval_t set) @@ -255,6 +254,11 @@ static inline pte_t pte_mkspecial(pte_t pte) return pte_set_flags(pte, _PAGE_SPECIAL); } +static inline pte_t pte_mkdevmap(pte_t pte) +{ + return pte_set_flags(pte, _PAGE_SPECIAL|_PAGE_DEVMAP); +} + static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set) { pmdval_t v = native_pmd_val(pmd); @@ -274,6 +278,11 @@ static inline pmd_t pmd_mkold(pmd_t pmd) return pmd_clear_flags(pmd, _PAGE_ACCESSED); } +static inline pmd_t pmd_mkclean(pmd_t pmd) +{ + return pmd_clear_flags(pmd, _PAGE_DIRTY); +} + static inline pmd_t pmd_wrprotect(pmd_t pmd) { return pmd_clear_flags(pmd, _PAGE_RW); @@ -284,6 +293,11 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd) return pmd_set_flags(pmd, _PAGE_DIRTY | _PAGE_SOFT_DIRTY); } +static inline pmd_t pmd_mkdevmap(pmd_t pmd) +{ + return pmd_set_flags(pmd, _PAGE_DEVMAP); +} + static inline pmd_t pmd_mkhuge(pmd_t pmd) { return pmd_set_flags(pmd, _PAGE_PSE); @@ -465,6 +479,13 @@ static inline int pte_present(pte_t a) return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE); } +#ifdef __HAVE_ARCH_PTE_DEVMAP +static inline int pte_devmap(pte_t a) +{ + return (pte_flags(a) & _PAGE_DEVMAP) == _PAGE_DEVMAP; +} +#endif + #define pte_accessible pte_accessible static inline bool pte_accessible(struct mm_struct *mm, pte_t a) { @@ -731,14 +752,9 @@ static inline void native_set_pmd_at(struct mm_struct *mm, unsigned long addr, * updates should either be sets, clears, or set_pte_atomic for P->P * transitions, which means this hook should only be called for user PTEs. * This hook implies a P->P protection or access change has taken place, which - * requires a subsequent TLB flush. The notification can optionally be delayed - * until the TLB flush event by using the pte_update_defer form of the - * interface, but care must be taken to assure that the flush happens while - * still holding the same page table lock so that the shadow and primary pages - * do not become out of sync on SMP. + * requires a subsequent TLB flush. */ #define pte_update(mm, addr, ptep) do { } while (0) -#define pte_update_defer(mm, addr, ptep) do { } while (0) #endif /* @@ -816,10 +832,6 @@ extern int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp); -#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH -extern void pmdp_splitting_flush(struct vm_area_struct *vma, - unsigned long addr, pmd_t *pmdp); - #define __HAVE_ARCH_PMD_WRITE static inline int pmd_write(pmd_t pmd) { @@ -830,9 +842,7 @@ static inline int pmd_write(pmd_t pmd) static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { - pmd_t pmd = native_pmdp_get_and_clear(pmdp); - pmd_update(mm, addr, pmdp); - return pmd; + return native_pmdp_get_and_clear(pmdp); } #define __HAVE_ARCH_PMDP_SET_WRPROTECT @@ -840,7 +850,6 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { clear_bit(_PAGE_BIT_RW, (unsigned long *)pmdp); - pmd_update(mm, addr, pmdp); } /* diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index a471cadb9630..04c27a013165 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -22,10 +22,11 @@ #define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */ #define _PAGE_BIT_SPECIAL _PAGE_BIT_SOFTW1 #define _PAGE_BIT_CPA_TEST _PAGE_BIT_SOFTW1 -#define _PAGE_BIT_SPLITTING _PAGE_BIT_SOFTW2 /* only valid on a PSE pmd */ #define _PAGE_BIT_HIDDEN _PAGE_BIT_SOFTW3 /* hidden by kmemcheck */ #define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */ -#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ +#define _PAGE_BIT_SOFTW4 58 /* available for programmer */ +#define _PAGE_BIT_DEVMAP _PAGE_BIT_SOFTW4 +#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ /* If _PAGE_BIT_PRESENT is clear, we use these: */ /* - if the user mapped it with PROT_NONE; pte_present gives true */ @@ -46,7 +47,6 @@ #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) #define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) #define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST) -#define _PAGE_SPLITTING (_AT(pteval_t, 1) << _PAGE_BIT_SPLITTING) #define __HAVE_ARCH_PTE_SPECIAL #ifdef CONFIG_KMEMCHECK @@ -85,8 +85,11 @@ #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) #define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) +#define _PAGE_DEVMAP (_AT(u64, 1) << _PAGE_BIT_DEVMAP) +#define __HAVE_ARCH_PTE_DEVMAP #else #define _PAGE_NX (_AT(pteval_t, 0)) +#define _PAGE_DEVMAP (_AT(pteval_t, 0)) #endif #define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) diff --git a/arch/x86/include/asm/platform_sst_audio.h b/arch/x86/include/asm/platform_sst_audio.h index 7249e6d0902d..5973a2f3db3d 100644 --- a/arch/x86/include/asm/platform_sst_audio.h +++ b/arch/x86/include/asm/platform_sst_audio.h @@ -55,6 +55,7 @@ enum sst_audio_device_id_mrfld { PIPE_MEDIA0_IN = 0x8F, PIPE_MEDIA1_IN = 0x90, PIPE_MEDIA2_IN = 0x91, + PIPE_MEDIA3_IN = 0x9C, PIPE_RSVD = 0xFF, }; diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h index d8ce3ec816ab..1544fabcd7f9 100644 --- a/arch/x86/include/asm/pmem.h +++ b/arch/x86/include/asm/pmem.h @@ -132,12 +132,7 @@ static inline void arch_clear_pmem(void __pmem *addr, size_t size) { void *vaddr = (void __force *)addr; - /* TODO: implement the zeroing via non-temporal writes */ - if (size == PAGE_SIZE && ((unsigned long)vaddr & ~PAGE_MASK) == 0) - clear_page(vaddr); - else - memset(vaddr, 0, size); - + memset(vaddr, 0, size); __arch_wb_cache_pmem(vaddr, size); } diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index 7a6bed5c08bc..fdcc04020636 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -4,6 +4,15 @@ #include <linux/clocksource.h> #include <asm/pvclock-abi.h> +#ifdef CONFIG_KVM_GUEST +extern struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void); +#else +static inline struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void) +{ + return NULL; +} +#endif + /* some helper functions for xen and kvm pv clock sources */ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src); u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src); @@ -91,10 +100,5 @@ struct pvclock_vsyscall_time_info { } __attribute__((__aligned__(SMP_CACHE_BYTES))); #define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info) -#define PVCLOCK_VSYSCALL_NR_PAGES (((NR_CPUS-1)/(PAGE_SIZE/PVTI_SIZE))+1) - -int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i, - int size); -struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu); #endif /* _ASM_X86_PVCLOCK_H */ diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h index b002e711ba88..9f92c180ed2f 100644 --- a/arch/x86/include/asm/qspinlock_paravirt.h +++ b/arch/x86/include/asm/qspinlock_paravirt.h @@ -1,6 +1,65 @@ #ifndef __ASM_QSPINLOCK_PARAVIRT_H #define __ASM_QSPINLOCK_PARAVIRT_H +/* + * For x86-64, PV_CALLEE_SAVE_REGS_THUNK() saves and restores 8 64-bit + * registers. For i386, however, only 1 32-bit register needs to be saved + * and restored. So an optimized version of __pv_queued_spin_unlock() is + * hand-coded for 64-bit, but it isn't worthwhile to do it for 32-bit. + */ +#ifdef CONFIG_64BIT + +PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath); +#define __pv_queued_spin_unlock __pv_queued_spin_unlock +#define PV_UNLOCK "__raw_callee_save___pv_queued_spin_unlock" +#define PV_UNLOCK_SLOWPATH "__raw_callee_save___pv_queued_spin_unlock_slowpath" + +/* + * Optimized assembly version of __raw_callee_save___pv_queued_spin_unlock + * which combines the registers saving trunk and the body of the following + * C code: + * + * void __pv_queued_spin_unlock(struct qspinlock *lock) + * { + * struct __qspinlock *l = (void *)lock; + * u8 lockval = cmpxchg(&l->locked, _Q_LOCKED_VAL, 0); + * + * if (likely(lockval == _Q_LOCKED_VAL)) + * return; + * pv_queued_spin_unlock_slowpath(lock, lockval); + * } + * + * For x86-64, + * rdi = lock (first argument) + * rsi = lockval (second argument) + * rdx = internal variable (set to 0) + */ +asm (".pushsection .text;" + ".globl " PV_UNLOCK ";" + ".align 4,0x90;" + PV_UNLOCK ": " + "push %rdx;" + "mov $0x1,%eax;" + "xor %edx,%edx;" + "lock cmpxchg %dl,(%rdi);" + "cmp $0x1,%al;" + "jne .slowpath;" + "pop %rdx;" + "ret;" + ".slowpath: " + "push %rsi;" + "movzbl %al,%esi;" + "call " PV_UNLOCK_SLOWPATH ";" + "pop %rsi;" + "pop %rdx;" + "ret;" + ".size " PV_UNLOCK ", .-" PV_UNLOCK ";" + ".popsection"); + +#else /* CONFIG_64BIT */ + +extern void __pv_queued_spin_unlock(struct qspinlock *lock); PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock); +#endif /* CONFIG_64BIT */ #endif diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h index a82c4f1b4d83..2cb1cc253d51 100644 --- a/arch/x86/include/asm/reboot.h +++ b/arch/x86/include/asm/reboot.h @@ -25,5 +25,6 @@ void __noreturn machine_real_restart(unsigned int type); typedef void (*nmi_shootdown_cb)(int, struct pt_regs*); void nmi_shootdown_cpus(nmi_shootdown_cb callback); +void run_crash_ipi_callback(struct pt_regs *regs); #endif /* _ASM_X86_REBOOT_H */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 222a6a3ca2b5..dfcf0727623b 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -21,15 +21,6 @@ extern int smp_num_siblings; extern unsigned int num_processors; -static inline bool cpu_has_ht_siblings(void) -{ - bool has_siblings = false; -#ifdef CONFIG_SMP - has_siblings = cpu_has_ht && smp_num_siblings > 1; -#endif - return has_siblings; -} - DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map); DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map); /* cpus sharing the last level cache: */ @@ -74,9 +65,6 @@ struct smp_ops { extern void set_cpu_sibling_map(int cpu); #ifdef CONFIG_SMP -#ifndef CONFIG_PARAVIRT -#define startup_ipi_hook(phys_apicid, start_eip, start_esp) do { } while (0) -#endif extern struct smp_ops smp_ops; static inline void smp_send_stop(void) diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h index d1793f06854d..8e9dbe7b73a1 100644 --- a/arch/x86/include/asm/suspend_32.h +++ b/arch/x86/include/asm/suspend_32.h @@ -15,6 +15,7 @@ struct saved_context { unsigned long cr0, cr2, cr3, cr4; u64 misc_enable; bool misc_enable_saved; + struct saved_msrs saved_msrs; struct desc_ptr gdt_desc; struct desc_ptr idt; u16 ldt; diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h index 7ebf0ebe4e68..6136a18152af 100644 --- a/arch/x86/include/asm/suspend_64.h +++ b/arch/x86/include/asm/suspend_64.h @@ -24,6 +24,7 @@ struct saved_context { unsigned long cr0, cr2, cr3, cr4, cr8; u64 misc_enable; bool misc_enable_saved; + struct saved_msrs saved_msrs; unsigned long efer; u16 gdt_pad; /* Unused */ struct desc_ptr gdt_desc; diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 09b1b0ab94b7..660458af425d 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -745,5 +745,14 @@ copy_to_user(void __user *to, const void *from, unsigned long n) #undef __copy_from_user_overflow #undef __copy_to_user_overflow +/* + * We rely on the nested NMI work to allow atomic faults from the NMI path; the + * nested NMI paths are careful to preserve CR2. + * + * Caller must use pagefault_enable/disable, or run in interrupt context, + * and also do a uaccess_ok() check + */ +#define __copy_from_user_nmi __copy_from_user_inatomic + #endif /* _ASM_X86_UACCESS_H */ diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h index 756de9190aec..deabaf9759b6 100644 --- a/arch/x86/include/asm/vdso.h +++ b/arch/x86/include/asm/vdso.h @@ -22,6 +22,7 @@ struct vdso_image { long sym_vvar_page; long sym_hpet_page; + long sym_pvclock_page; long sym_VDSO32_NOTE_MASK; long sym___kernel_sigreturn; long sym___kernel_rt_sigreturn; diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index cd0fc0cc78bc..1ae89a2721d6 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -82,13 +82,11 @@ struct x86_init_paging { * struct x86_init_timers - platform specific timer setup * @setup_perpcu_clockev: set up the per cpu clock event device for the * boot cpu - * @tsc_pre_init: platform function called before TSC init * @timer_init: initialize the platform timer (default PIT/HPET) * @wallclock_init: init the wallclock device */ struct x86_init_timers { void (*setup_percpu_clockev)(void); - void (*tsc_pre_init)(void); void (*timer_init)(void); void (*wallclock_init)(void); }; diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 4c20dd333412..3bcdcc84259d 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -310,10 +310,10 @@ HYPERVISOR_mca(struct xen_mc *mc_op) } static inline int -HYPERVISOR_dom0_op(struct xen_platform_op *platform_op) +HYPERVISOR_platform_op(struct xen_platform_op *op) { - platform_op->interface_version = XENPF_INTERFACE_VERSION; - return _hypercall1(int, dom0_op, platform_op); + op->interface_version = XENPF_INTERFACE_VERSION; + return _hypercall1(int, platform_op, op); } static inline int diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h index 5a08bc8bff33..c54beb44c4c1 100644 --- a/arch/x86/include/asm/xor_32.h +++ b/arch/x86/include/asm/xor_32.h @@ -553,7 +553,7 @@ do { \ if (cpu_has_xmm) { \ xor_speed(&xor_block_pIII_sse); \ xor_speed(&xor_block_sse_pf64); \ - } else if (cpu_has_mmx) { \ + } else if (boot_cpu_has(X86_FEATURE_MMX)) { \ xor_speed(&xor_block_pII_mmx); \ xor_speed(&xor_block_p5_mmx); \ } else { \ diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index 040d4083c24f..7956412d09bd 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h @@ -269,4 +269,96 @@ typedef struct _HV_REFERENCE_TSC_PAGE { #define HV_SYNIC_SINT_AUTO_EOI (1ULL << 17) #define HV_SYNIC_SINT_VECTOR_MASK (0xFF) +#define HV_SYNIC_STIMER_COUNT (4) + +/* Define synthetic interrupt controller message constants. */ +#define HV_MESSAGE_SIZE (256) +#define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240) +#define HV_MESSAGE_PAYLOAD_QWORD_COUNT (30) + +/* Define hypervisor message types. */ +enum hv_message_type { + HVMSG_NONE = 0x00000000, + + /* Memory access messages. */ + HVMSG_UNMAPPED_GPA = 0x80000000, + HVMSG_GPA_INTERCEPT = 0x80000001, + + /* Timer notification messages. */ + HVMSG_TIMER_EXPIRED = 0x80000010, + + /* Error messages. */ + HVMSG_INVALID_VP_REGISTER_VALUE = 0x80000020, + HVMSG_UNRECOVERABLE_EXCEPTION = 0x80000021, + HVMSG_UNSUPPORTED_FEATURE = 0x80000022, + + /* Trace buffer complete messages. */ + HVMSG_EVENTLOG_BUFFERCOMPLETE = 0x80000040, + + /* Platform-specific processor intercept messages. */ + HVMSG_X64_IOPORT_INTERCEPT = 0x80010000, + HVMSG_X64_MSR_INTERCEPT = 0x80010001, + HVMSG_X64_CPUID_INTERCEPT = 0x80010002, + HVMSG_X64_EXCEPTION_INTERCEPT = 0x80010003, + HVMSG_X64_APIC_EOI = 0x80010004, + HVMSG_X64_LEGACY_FP_ERROR = 0x80010005 +}; + +/* Define synthetic interrupt controller message flags. */ +union hv_message_flags { + __u8 asu8; + struct { + __u8 msg_pending:1; + __u8 reserved:7; + }; +}; + +/* Define port identifier type. */ +union hv_port_id { + __u32 asu32; + struct { + __u32 id:24; + __u32 reserved:8; + } u; +}; + +/* Define synthetic interrupt controller message header. */ +struct hv_message_header { + __u32 message_type; + __u8 payload_size; + union hv_message_flags message_flags; + __u8 reserved[2]; + union { + __u64 sender; + union hv_port_id port; + }; +}; + +/* Define synthetic interrupt controller message format. */ +struct hv_message { + struct hv_message_header header; + union { + __u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT]; + } u; +}; + +/* Define the synthetic interrupt message page layout. */ +struct hv_message_page { + struct hv_message sint_message[HV_SYNIC_SINT_COUNT]; +}; + +/* Define timer message payload structure. */ +struct hv_timer_message_payload { + __u32 timer_index; + __u32 reserved; + __u64 expiration_time; /* When the timer expired */ + __u64 delivery_time; /* When the message was delivered */ +}; + +#define HV_STIMER_ENABLE (1ULL << 0) +#define HV_STIMER_PERIODIC (1ULL << 1) +#define HV_STIMER_LAZY (1ULL << 2) +#define HV_STIMER_AUTOENABLE (1ULL << 3) +#define HV_STIMER_SINT(config) (__u8)(((config) >> 16) & 0x0F) + #endif diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h index 03429da2fa80..2184943341bf 100644 --- a/arch/x86/include/uapi/asm/mce.h +++ b/arch/x86/include/uapi/asm/mce.h @@ -16,7 +16,7 @@ struct mce { __u8 cpuvendor; /* cpu vendor as encoded in system.h */ __u8 inject_flags; /* software inject flags */ __u8 severity; - __u8 usable_addr; + __u8 pad; __u32 cpuid; /* CPUID 1 EAX */ __u8 cs; /* code segment */ __u8 bank; /* machine check bank */ diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 2f69e3b184f6..8a5cddac7d44 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -82,6 +82,12 @@ physid_mask_t phys_cpu_present_map; static unsigned int disabled_cpu_apicid __read_mostly = BAD_APICID; /* + * This variable controls which CPUs receive external NMIs. By default, + * external NMIs are delivered only to the BSP. + */ +static int apic_extnmi = APIC_EXTNMI_BSP; + +/* * Map cpu index to physical APIC ID */ DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID); @@ -1161,6 +1167,8 @@ void __init init_bsp_APIC(void) value = APIC_DM_NMI; if (!lapic_is_integrated()) /* 82489DX */ value |= APIC_LVT_LEVEL_TRIGGER; + if (apic_extnmi == APIC_EXTNMI_NONE) + value |= APIC_LVT_MASKED; apic_write(APIC_LVT1, value); } @@ -1378,9 +1386,11 @@ void setup_local_APIC(void) apic_write(APIC_LVT0, value); /* - * only the BP should see the LINT1 NMI signal, obviously. + * Only the BSP sees the LINT1 NMI signal by default. This can be + * modified by apic_extnmi= boot option. */ - if (!cpu) + if ((!cpu && apic_extnmi != APIC_EXTNMI_NONE) || + apic_extnmi == APIC_EXTNMI_ALL) value = APIC_DM_NMI; else value = APIC_DM_NMI | APIC_LVT_MASKED; @@ -2270,6 +2280,7 @@ static struct { unsigned int apic_tmict; unsigned int apic_tdcr; unsigned int apic_thmr; + unsigned int apic_cmci; } apic_pm_state; static int lapic_suspend(void) @@ -2299,6 +2310,10 @@ static int lapic_suspend(void) if (maxlvt >= 5) apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); #endif +#ifdef CONFIG_X86_MCE_INTEL + if (maxlvt >= 6) + apic_pm_state.apic_cmci = apic_read(APIC_LVTCMCI); +#endif local_irq_save(flags); disable_local_APIC(); @@ -2355,10 +2370,14 @@ static void lapic_resume(void) apic_write(APIC_SPIV, apic_pm_state.apic_spiv); apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); -#if defined(CONFIG_X86_MCE_INTEL) +#ifdef CONFIG_X86_THERMAL_VECTOR if (maxlvt >= 5) apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); #endif +#ifdef CONFIG_X86_MCE_INTEL + if (maxlvt >= 6) + apic_write(APIC_LVTCMCI, apic_pm_state.apic_cmci); +#endif if (maxlvt >= 4) apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); apic_write(APIC_LVTT, apic_pm_state.apic_lvtt); @@ -2548,3 +2567,23 @@ static int __init apic_set_disabled_cpu_apicid(char *arg) return 0; } early_param("disable_cpu_apicid", apic_set_disabled_cpu_apicid); + +static int __init apic_set_extnmi(char *arg) +{ + if (!arg) + return -EINVAL; + + if (!strncmp("all", arg, 3)) + apic_extnmi = APIC_EXTNMI_ALL; + else if (!strncmp("none", arg, 4)) + apic_extnmi = APIC_EXTNMI_NONE; + else if (!strncmp("bsp", arg, 3)) + apic_extnmi = APIC_EXTNMI_BSP; + else { + pr_warn("Unknown external NMI delivery mode `%s' ignored\n", arg); + return -EINVAL; + } + + return 0; +} +early_param("apic_extnmi", apic_set_extnmi); diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index f92ab36979a2..9968f30cca3e 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -185,6 +185,7 @@ static struct apic apic_flat = { .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, + .send_IPI = default_send_IPI_single, .send_IPI_mask = flat_send_IPI_mask, .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself, .send_IPI_allbutself = flat_send_IPI_allbutself, @@ -230,17 +231,6 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 0; } -static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector) -{ - default_send_IPI_mask_sequence_phys(cpumask, vector); -} - -static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask, - int vector) -{ - default_send_IPI_mask_allbutself_phys(cpumask, vector); -} - static void physflat_send_IPI_allbutself(int vector) { default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector); @@ -248,7 +238,7 @@ static void physflat_send_IPI_allbutself(int vector) static void physflat_send_IPI_all(int vector) { - physflat_send_IPI_mask(cpu_online_mask, vector); + default_send_IPI_mask_sequence_phys(cpu_online_mask, vector); } static int physflat_probe(void) @@ -292,8 +282,9 @@ static struct apic apic_physflat = { .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, - .send_IPI_mask = physflat_send_IPI_mask, - .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself, + .send_IPI = default_send_IPI_single_phys, + .send_IPI_mask = default_send_IPI_mask_sequence_phys, + .send_IPI_mask_allbutself = default_send_IPI_mask_allbutself_phys, .send_IPI_allbutself = physflat_send_IPI_allbutself, .send_IPI_all = physflat_send_IPI_all, .send_IPI_self = apic_send_IPI_self, diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index 0d96749cfcac..331a7a07c48f 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -30,6 +30,7 @@ #include <asm/e820.h> static void noop_init_apic_ldr(void) { } +static void noop_send_IPI(int cpu, int vector) { } static void noop_send_IPI_mask(const struct cpumask *cpumask, int vector) { } static void noop_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) { } static void noop_send_IPI_allbutself(int vector) { } @@ -144,6 +145,7 @@ struct apic apic_noop = { .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, + .send_IPI = noop_send_IPI, .send_IPI_mask = noop_send_IPI_mask, .send_IPI_mask_allbutself = noop_send_IPI_mask_allbutself, .send_IPI_allbutself = noop_send_IPI_allbutself, diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index 2bd2292a316d..c80c02c6ec49 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -273,6 +273,7 @@ static const struct apic apic_numachip1 __refconst = { .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, + .send_IPI = numachip_send_IPI_one, .send_IPI_mask = numachip_send_IPI_mask, .send_IPI_mask_allbutself = numachip_send_IPI_mask_allbutself, .send_IPI_allbutself = numachip_send_IPI_allbutself, @@ -324,6 +325,7 @@ static const struct apic apic_numachip2 __refconst = { .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, + .send_IPI = numachip_send_IPI_one, .send_IPI_mask = numachip_send_IPI_mask, .send_IPI_mask_allbutself = numachip_send_IPI_mask_allbutself, .send_IPI_allbutself = numachip_send_IPI_allbutself, diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 971cf8875939..cf9bd896c12d 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -96,11 +96,6 @@ static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb) return cpuid_apic >> index_msb; } -static inline void bigsmp_send_IPI_mask(const struct cpumask *mask, int vector) -{ - default_send_IPI_mask_sequence_phys(mask, vector); -} - static void bigsmp_send_IPI_allbutself(int vector) { default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector); @@ -108,7 +103,7 @@ static void bigsmp_send_IPI_allbutself(int vector) static void bigsmp_send_IPI_all(int vector) { - bigsmp_send_IPI_mask(cpu_online_mask, vector); + default_send_IPI_mask_sequence_phys(cpu_online_mask, vector); } static int dmi_bigsmp; /* can be set by dmi scanners */ @@ -180,7 +175,8 @@ static struct apic apic_bigsmp = { .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, - .send_IPI_mask = bigsmp_send_IPI_mask, + .send_IPI = default_send_IPI_single_phys, + .send_IPI_mask = default_send_IPI_mask_sequence_phys, .send_IPI_mask_allbutself = NULL, .send_IPI_allbutself = bigsmp_send_IPI_allbutself, .send_IPI_all = bigsmp_send_IPI_all, diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index 62071569bd50..eb45fc9b6124 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c @@ -18,6 +18,16 @@ #include <asm/proto.h> #include <asm/ipi.h> +void default_send_IPI_single_phys(int cpu, int vector) +{ + unsigned long flags; + + local_irq_save(flags); + __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, cpu), + vector, APIC_DEST_PHYSICAL); + local_irq_restore(flags); +} + void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector) { unsigned long query_cpu; @@ -55,6 +65,14 @@ void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, local_irq_restore(flags); } +/* + * Helper function for APICs which insist on cpumasks + */ +void default_send_IPI_single(int cpu, int vector) +{ + apic->send_IPI_mask(cpumask_of(cpu), vector); +} + #ifdef CONFIG_X86_32 void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index 5f1feb6854af..ade25320df96 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -96,8 +96,8 @@ static irq_hw_number_t pci_msi_get_hwirq(struct msi_domain_info *info, return arg->msi_hwirq; } -static int pci_msi_prepare(struct irq_domain *domain, struct device *dev, - int nvec, msi_alloc_info_t *arg) +int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec, + msi_alloc_info_t *arg) { struct pci_dev *pdev = to_pci_dev(dev); struct msi_desc *desc = first_pci_msi_entry(pdev); @@ -113,11 +113,13 @@ static int pci_msi_prepare(struct irq_domain *domain, struct device *dev, return 0; } +EXPORT_SYMBOL_GPL(pci_msi_prepare); -static void pci_msi_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc) +void pci_msi_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc) { arg->msi_hwirq = pci_msi_domain_calc_hwirq(arg->msi_dev, desc); } +EXPORT_SYMBOL_GPL(pci_msi_set_desc); static struct msi_domain_ops pci_msi_domain_ops = { .get_hwirq = pci_msi_get_hwirq, diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 7694ae6c1199..f316e34abb42 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -105,6 +105,7 @@ static struct apic apic_default = { .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, + .send_IPI = default_send_IPI_single, .send_IPI_mask = default_send_IPI_mask_logical, .send_IPI_mask_allbutself = default_send_IPI_mask_allbutself_logical, .send_IPI_allbutself = default_send_IPI_allbutself, diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 861bc59c8f25..908cb37da171 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -29,6 +29,7 @@ struct apic_chip_data { }; struct irq_domain *x86_vector_domain; +EXPORT_SYMBOL_GPL(x86_vector_domain); static DEFINE_RAW_SPINLOCK(vector_lock); static cpumask_var_t vector_cpumask; static struct irq_chip lapic_controller; @@ -66,6 +67,7 @@ struct irq_cfg *irqd_cfg(struct irq_data *irq_data) return data ? &data->cfg : NULL; } +EXPORT_SYMBOL_GPL(irqd_cfg); struct irq_cfg *irq_cfg(unsigned int irq) { diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index cc8311c4d298..aca8b75c1552 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -23,6 +23,14 @@ static inline u32 x2apic_cluster(int cpu) return per_cpu(x86_cpu_to_logical_apicid, cpu) >> 16; } +static void x2apic_send_IPI(int cpu, int vector) +{ + u32 dest = per_cpu(x86_cpu_to_logical_apicid, cpu); + + x2apic_wrmsr_fence(); + __x2apic_send_IPI_dest(dest, vector, APIC_DEST_LOGICAL); +} + static void __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) { @@ -266,6 +274,7 @@ static struct apic apic_x2apic_cluster = { .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, + .send_IPI = x2apic_send_IPI, .send_IPI_mask = x2apic_send_IPI_mask, .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, .send_IPI_allbutself = x2apic_send_IPI_allbutself, diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 662e9150ea6f..a1242e2c12e6 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -36,6 +36,14 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return x2apic_enabled() && (x2apic_phys || x2apic_fadt_phys()); } +static void x2apic_send_IPI(int cpu, int vector) +{ + u32 dest = per_cpu(x86_cpu_to_apicid, cpu); + + x2apic_wrmsr_fence(); + __x2apic_send_IPI_dest(dest, vector, APIC_DEST_PHYSICAL); +} + static void __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) { @@ -122,6 +130,7 @@ static struct apic apic_x2apic_phys = { .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, + .send_IPI = x2apic_send_IPI, .send_IPI_mask = x2apic_send_IPI_mask, .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, .send_IPI_allbutself = x2apic_send_IPI_allbutself, diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 4a139465f1d4..d760c6bb37b5 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -406,6 +406,7 @@ static struct apic __refdata apic_x2apic_uv_x = { .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and, + .send_IPI = uv_send_IPI_one, .send_IPI_mask = uv_send_IPI_mask, .send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself, .send_IPI_allbutself = uv_send_IPI_allbutself, diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 439df975bc7a..84a7524b202c 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -65,9 +65,6 @@ void common(void) { OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); OFFSET(PV_CPU_iret, pv_cpu_ops, iret); -#ifdef CONFIG_X86_32 - OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit); -#endif OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0); OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2); #endif diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index d8f42f902a0f..f2edafb5f24e 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -23,7 +23,6 @@ int main(void) { #ifdef CONFIG_PARAVIRT OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame); - OFFSET(PV_CPU_usergs_sysret32, pv_cpu_ops, usergs_sysret32); OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64); OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs); BLANK(); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index a8816b325162..a07956a08936 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -304,7 +304,7 @@ static void amd_get_topology(struct cpuinfo_x86 *c) int cpu = smp_processor_id(); /* get information required for multi-node processors */ - if (cpu_has_topoext) { + if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { u32 eax, ebx, ecx, edx; cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); @@ -434,8 +434,7 @@ static void srat_detect_node(struct cpuinfo_x86 *c) */ int ht_nodeid = c->initial_apicid; - if (ht_nodeid >= 0 && - __apicid_to_node[ht_nodeid] != NUMA_NO_NODE) + if (__apicid_to_node[ht_nodeid] != NUMA_NO_NODE) node = __apicid_to_node[ht_nodeid]; /* Pick a nearby node */ if (!node_online(node)) @@ -678,9 +677,9 @@ static void init_amd_bd(struct cpuinfo_x86 *c) * Disable it on the affected CPUs. */ if ((c->x86_model >= 0x02) && (c->x86_model < 0x20)) { - if (!rdmsrl_safe(0xc0011021, &value) && !(value & 0x1E)) { + if (!rdmsrl_safe(MSR_F15H_IC_CFG, &value) && !(value & 0x1E)) { value |= 0x1E; - wrmsrl_safe(0xc0011021, value); + wrmsrl_safe(MSR_F15H_IC_CFG, value); } } } @@ -922,7 +921,7 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum) void set_dr_addr_mask(unsigned long mask, int dr) { - if (!cpu_has_bpext) + if (!boot_cpu_has(X86_FEATURE_BPEXT)) return; switch (dr) { diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index d8fba5c15fbd..ae20be6e483c 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -43,7 +43,7 @@ static void init_c3(struct cpuinfo_x86 *c) /* store Centaur Extended Feature Flags as * word 5 of the CPU capability bit array */ - c->x86_capability[5] = cpuid_edx(0xC0000001); + c->x86_capability[CPUID_C000_0001_EDX] = cpuid_edx(0xC0000001); } #ifdef CONFIG_X86_32 /* Cyrix III family needs CX8 & PGE explicitly enabled. */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c2b7522cbf35..37830de8f60a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -581,14 +581,9 @@ void cpu_detect(struct cpuinfo_x86 *c) u32 junk, tfms, cap0, misc; cpuid(0x00000001, &tfms, &misc, &junk, &cap0); - c->x86 = (tfms >> 8) & 0xf; - c->x86_model = (tfms >> 4) & 0xf; - c->x86_mask = tfms & 0xf; - - if (c->x86 == 0xf) - c->x86 += (tfms >> 20) & 0xff; - if (c->x86 >= 0x6) - c->x86_model += ((tfms >> 16) & 0xf) << 4; + c->x86 = x86_family(tfms); + c->x86_model = x86_model(tfms); + c->x86_mask = x86_stepping(tfms); if (cap0 & (1<<19)) { c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; @@ -599,50 +594,47 @@ void cpu_detect(struct cpuinfo_x86 *c) void get_cpu_cap(struct cpuinfo_x86 *c) { - u32 tfms, xlvl; - u32 ebx; + u32 eax, ebx, ecx, edx; /* Intel-defined flags: level 0x00000001 */ if (c->cpuid_level >= 0x00000001) { - u32 capability, excap; + cpuid(0x00000001, &eax, &ebx, &ecx, &edx); - cpuid(0x00000001, &tfms, &ebx, &excap, &capability); - c->x86_capability[0] = capability; - c->x86_capability[4] = excap; + c->x86_capability[CPUID_1_ECX] = ecx; + c->x86_capability[CPUID_1_EDX] = edx; } /* Additional Intel-defined flags: level 0x00000007 */ if (c->cpuid_level >= 0x00000007) { - u32 eax, ebx, ecx, edx; - cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx); - c->x86_capability[9] = ebx; + c->x86_capability[CPUID_7_0_EBX] = ebx; + + c->x86_capability[CPUID_6_EAX] = cpuid_eax(0x00000006); } /* Extended state features: level 0x0000000d */ if (c->cpuid_level >= 0x0000000d) { - u32 eax, ebx, ecx, edx; - cpuid_count(0x0000000d, 1, &eax, &ebx, &ecx, &edx); - c->x86_capability[10] = eax; + c->x86_capability[CPUID_D_1_EAX] = eax; } /* Additional Intel-defined flags: level 0x0000000F */ if (c->cpuid_level >= 0x0000000F) { - u32 eax, ebx, ecx, edx; /* QoS sub-leaf, EAX=0Fh, ECX=0 */ cpuid_count(0x0000000F, 0, &eax, &ebx, &ecx, &edx); - c->x86_capability[11] = edx; + c->x86_capability[CPUID_F_0_EDX] = edx; + if (cpu_has(c, X86_FEATURE_CQM_LLC)) { /* will be overridden if occupancy monitoring exists */ c->x86_cache_max_rmid = ebx; /* QoS sub-leaf, EAX=0Fh, ECX=1 */ cpuid_count(0x0000000F, 1, &eax, &ebx, &ecx, &edx); - c->x86_capability[12] = edx; + c->x86_capability[CPUID_F_1_EDX] = edx; + if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC)) { c->x86_cache_max_rmid = ecx; c->x86_cache_occ_scale = ebx; @@ -654,22 +646,24 @@ void get_cpu_cap(struct cpuinfo_x86 *c) } /* AMD-defined flags: level 0x80000001 */ - xlvl = cpuid_eax(0x80000000); - c->extended_cpuid_level = xlvl; + eax = cpuid_eax(0x80000000); + c->extended_cpuid_level = eax; - if ((xlvl & 0xffff0000) == 0x80000000) { - if (xlvl >= 0x80000001) { - c->x86_capability[1] = cpuid_edx(0x80000001); - c->x86_capability[6] = cpuid_ecx(0x80000001); + if ((eax & 0xffff0000) == 0x80000000) { + if (eax >= 0x80000001) { + cpuid(0x80000001, &eax, &ebx, &ecx, &edx); + + c->x86_capability[CPUID_8000_0001_ECX] = ecx; + c->x86_capability[CPUID_8000_0001_EDX] = edx; } } if (c->extended_cpuid_level >= 0x80000008) { - u32 eax = cpuid_eax(0x80000008); + cpuid(0x80000008, &eax, &ebx, &ecx, &edx); c->x86_virt_bits = (eax >> 8) & 0xff; c->x86_phys_bits = eax & 0xff; - c->x86_capability[13] = cpuid_ebx(0x80000008); + c->x86_capability[CPUID_8000_0008_EBX] = ebx; } #ifdef CONFIG_X86_32 else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36)) @@ -679,6 +673,9 @@ void get_cpu_cap(struct cpuinfo_x86 *c) if (c->extended_cpuid_level >= 0x80000007) c->x86_power = cpuid_edx(0x80000007); + if (c->extended_cpuid_level >= 0x8000000a) + c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a); + init_scattered_cpuid_features(c); } @@ -1185,7 +1182,7 @@ void syscall_init(void) * They both write to the same internal register. STAR allows to * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip. */ - wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); + wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS); wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); #ifdef CONFIG_IA32_EMULATION @@ -1443,7 +1440,9 @@ void cpu_init(void) printk(KERN_INFO "Initializing CPU#%d\n", cpu); - if (cpu_feature_enabled(X86_FEATURE_VME) || cpu_has_tsc || cpu_has_de) + if (cpu_feature_enabled(X86_FEATURE_VME) || + cpu_has_tsc || + boot_cpu_has(X86_FEATURE_DE)) cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); load_current_idt(); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 209ac1e7d1f0..565648bc1a0a 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -445,7 +445,8 @@ static void init_intel(struct cpuinfo_x86 *c) if (cpu_has_xmm2) set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); - if (cpu_has_ds) { + + if (boot_cpu_has(X86_FEATURE_DS)) { unsigned int l1; rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); if (!(l1 & (1<<11))) diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index e38d338a6447..0b6c52388cf4 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -591,7 +591,7 @@ cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf) unsigned edx; if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { - if (cpu_has_topoext) + if (boot_cpu_has(X86_FEATURE_TOPOEXT)) cpuid_count(0x8000001d, index, &eax.full, &ebx.full, &ecx.full, &edx); else @@ -637,7 +637,7 @@ static int find_num_cache_leaves(struct cpuinfo_x86 *c) void init_amd_cacheinfo(struct cpuinfo_x86 *c) { - if (cpu_has_topoext) { + if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { num_cache_leaves = find_num_cache_leaves(c); } else if (c->extended_cpuid_level >= 0x80000006) { if (cpuid_edx(0x80000006) & 0xf000) @@ -809,7 +809,7 @@ static int __cache_amd_cpumap_setup(unsigned int cpu, int index, struct cacheinfo *this_leaf; int i, sibling; - if (cpu_has_topoext) { + if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { unsigned int apicid, nshared, first, last; this_leaf = this_cpu_ci->info_list + index; diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 7e8a736d09db..a006f4cd792b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -114,7 +114,6 @@ static struct work_struct mce_work; static struct irq_work mce_irq_work; static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); -static int mce_usable_address(struct mce *m); /* * CPU/chipset specific EDAC code can register a notifier call here to print @@ -475,6 +474,28 @@ static void mce_report_event(struct pt_regs *regs) irq_work_queue(&mce_irq_work); } +/* + * Check if the address reported by the CPU is in a format we can parse. + * It would be possible to add code for most other cases, but all would + * be somewhat complicated (e.g. segment offset would require an instruction + * parser). So only support physical addresses up to page granuality for now. + */ +static int mce_usable_address(struct mce *m) +{ + if (!(m->status & MCI_STATUS_MISCV) || !(m->status & MCI_STATUS_ADDRV)) + return 0; + + /* Checks after this one are Intel-specific: */ + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return 1; + + if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT) + return 0; + if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS) + return 0; + return 1; +} + static int srao_decode_notifier(struct notifier_block *nb, unsigned long val, void *data) { @@ -484,7 +505,7 @@ static int srao_decode_notifier(struct notifier_block *nb, unsigned long val, if (!mce) return NOTIFY_DONE; - if (mce->usable_addr && (mce->severity == MCE_AO_SEVERITY)) { + if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) { pfn = mce->addr >> PAGE_SHIFT; memory_failure(pfn, MCE_VECTOR, 0); } @@ -522,10 +543,10 @@ static bool memory_error(struct mce *m) struct cpuinfo_x86 *c = &boot_cpu_data; if (c->x86_vendor == X86_VENDOR_AMD) { - /* - * coming soon - */ - return false; + /* ErrCodeExt[20:16] */ + u8 xec = (m->status >> 16) & 0x1f; + + return (xec == 0x0 || xec == 0x8); } else if (c->x86_vendor == X86_VENDOR_INTEL) { /* * Intel SDM Volume 3B - 15.9.2 Compound Error Codes @@ -567,7 +588,7 @@ DEFINE_PER_CPU(unsigned, mce_poll_count); */ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) { - bool error_logged = false; + bool error_seen = false; struct mce m; int severity; int i; @@ -601,6 +622,8 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) (m.status & (mca_cfg.ser ? MCI_STATUS_S : MCI_STATUS_UC))) continue; + error_seen = true; + mce_read_aux(&m, i); if (!(flags & MCP_TIMESTAMP)) @@ -608,27 +631,24 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) severity = mce_severity(&m, mca_cfg.tolerant, NULL, false); - /* - * In the cases where we don't have a valid address after all, - * do not add it into the ring buffer. - */ - if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m)) { - if (m.status & MCI_STATUS_ADDRV) { + if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m)) + if (m.status & MCI_STATUS_ADDRV) m.severity = severity; - m.usable_addr = mce_usable_address(&m); - - if (!mce_gen_pool_add(&m)) - mce_schedule_work(); - } - } /* * Don't get the IP here because it's unlikely to * have anything to do with the actual error location. */ - if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce) { - error_logged = true; + if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce) mce_log(&m); + else if (mce_usable_address(&m)) { + /* + * Although we skipped logging this, we still want + * to take action. Add to the pool so the registered + * notifiers will see it. + */ + if (!mce_gen_pool_add(&m)) + mce_schedule_work(); } /* @@ -644,7 +664,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) sync_core(); - return error_logged; + return error_seen; } EXPORT_SYMBOL_GPL(machine_check_poll); @@ -931,23 +951,6 @@ reset: return ret; } -/* - * Check if the address reported by the CPU is in a format we can parse. - * It would be possible to add code for most other cases, but all would - * be somewhat complicated (e.g. segment offset would require an instruction - * parser). So only support physical addresses up to page granuality for now. - */ -static int mce_usable_address(struct mce *m) -{ - if (!(m->status & MCI_STATUS_MISCV) || !(m->status & MCI_STATUS_ADDRV)) - return 0; - if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT) - return 0; - if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS) - return 0; - return 1; -} - static void mce_clear_state(unsigned long *toclear) { int i; @@ -1100,7 +1103,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) /* assuming valid severity level != 0 */ m.severity = severity; - m.usable_addr = mce_usable_address(&m); mce_log(&m); diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index b3e94ef461fd..faec7120c508 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -129,8 +129,8 @@ void __init load_ucode_bsp(void) if (!have_cpuid_p()) return; - vendor = x86_vendor(); - family = x86_family(); + vendor = x86_cpuid_vendor(); + family = x86_cpuid_family(); switch (vendor) { case X86_VENDOR_INTEL: @@ -165,8 +165,8 @@ void load_ucode_ap(void) if (!have_cpuid_p()) return; - vendor = x86_vendor(); - family = x86_family(); + vendor = x86_cpuid_vendor(); + family = x86_cpuid_family(); switch (vendor) { case X86_VENDOR_INTEL: @@ -206,8 +206,8 @@ void reload_early_microcode(void) { int vendor, family; - vendor = x86_vendor(); - family = x86_family(); + vendor = x86_cpuid_vendor(); + family = x86_cpuid_family(); switch (vendor) { case X86_VENDOR_INTEL: diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index ce47402eb2f9..ee81c544ee0d 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -145,10 +145,10 @@ matching_model_microcode(struct microcode_header_intel *mc_header, int ext_sigcount, i; struct extended_signature *ext_sig; - fam = __x86_family(sig); + fam = x86_family(sig); model = x86_model(sig); - fam_ucode = __x86_family(mc_header->sig); + fam_ucode = x86_family(mc_header->sig); model_ucode = x86_model(mc_header->sig); if (fam == fam_ucode && model == model_ucode) @@ -163,7 +163,7 @@ matching_model_microcode(struct microcode_header_intel *mc_header, ext_sigcount = ext_header->count; for (i = 0; i < ext_sigcount; i++) { - fam_ucode = __x86_family(ext_sig->sig); + fam_ucode = x86_family(ext_sig->sig); model_ucode = x86_model(ext_sig->sig); if (fam == fam_ucode && model == model_ucode) @@ -365,7 +365,7 @@ static int collect_cpu_info_early(struct ucode_cpu_info *uci) native_cpuid(&eax, &ebx, &ecx, &edx); csig.sig = eax; - family = __x86_family(csig.sig); + family = x86_family(csig.sig); model = x86_model(csig.sig); if ((model >= 5) || (family > 6)) { @@ -521,16 +521,12 @@ static bool __init load_builtin_intel_microcode(struct cpio_data *cp) { #ifdef CONFIG_X86_64 unsigned int eax = 0x00000001, ebx, ecx = 0, edx; - unsigned int family, model, stepping; char name[30]; native_cpuid(&eax, &ebx, &ecx, &edx); - family = __x86_family(eax); - model = x86_model(eax); - stepping = eax & 0xf; - - sprintf(name, "intel-ucode/%02x-%02x-%02x", family, model, stepping); + sprintf(name, "intel-ucode/%02x-%02x-%02x", + x86_family(eax), x86_model(eax), x86_stepping(eax)); return get_builtin_firmware(cp, name); #else diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index 70d7c93f4550..0d98503c2245 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c @@ -593,9 +593,16 @@ mtrr_calc_range_state(u64 chunk_size, u64 gran_size, unsigned long x_remove_base, unsigned long x_remove_size, int i) { - static struct range range_new[RANGE_NUM]; + /* + * range_new should really be an automatic variable, but + * putting 4096 bytes on the stack is frowned upon, to put it + * mildly. It is safe to make it a static __initdata variable, + * since mtrr_calc_range_state is only called during init and + * there's no way it will call itself recursively. + */ + static struct range range_new[RANGE_NUM] __initdata; unsigned long range_sums_new; - static int nr_range_new; + int nr_range_new; int num_reg; /* Convert ranges to var ranges state: */ diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 3b533cf37c74..c870af161008 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -349,7 +349,7 @@ static void get_fixed_ranges(mtrr_type *frs) void mtrr_save_fixed_ranges(void *info) { - if (cpu_has_mtrr) + if (boot_cpu_has(X86_FEATURE_MTRR)) get_fixed_ranges(mtrr_state.fixed_ranges); } diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index f891b4750f04..5c3d149ee91c 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -682,7 +682,7 @@ void __init mtrr_bp_init(void) phys_addr = 32; - if (cpu_has_mtrr) { + if (boot_cpu_has(X86_FEATURE_MTRR)) { mtrr_if = &generic_mtrr_ops; size_or_mask = SIZE_OR_MASK_BITS(36); size_and_mask = 0x00f00000; diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 2bf79d7c97df..1b443db2db50 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -482,6 +482,9 @@ int x86_pmu_hw_config(struct perf_event *event) /* Support for IP fixup */ if (x86_pmu.lbr_nr || x86_pmu.intel_cap.pebs_format >= 2) precise++; + + if (x86_pmu.pebs_prec_dist) + precise++; } if (event->attr.precise_ip > precise) @@ -1531,6 +1534,7 @@ static void __init filter_events(struct attribute **attrs) { struct device_attribute *d; struct perf_pmu_events_attr *pmu_attr; + int offset = 0; int i, j; for (i = 0; attrs[i]; i++) { @@ -1539,7 +1543,7 @@ static void __init filter_events(struct attribute **attrs) /* str trumps id */ if (pmu_attr->event_str) continue; - if (x86_pmu.event_map(i)) + if (x86_pmu.event_map(i + offset)) continue; for (j = i; attrs[j]; j++) @@ -1547,6 +1551,14 @@ static void __init filter_events(struct attribute **attrs) /* Check the shifted attr. */ i--; + + /* + * event_map() is index based, the attrs array is organized + * by increasing event index. If we shift the events, then + * we need to compensate for the event_map(), otherwise + * we are looking up the wrong event in the map + */ + offset++; } } @@ -2250,12 +2262,19 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) ss_base = get_segment_base(regs->ss); fp = compat_ptr(ss_base + regs->bp); + pagefault_disable(); while (entry->nr < PERF_MAX_STACK_DEPTH) { unsigned long bytes; frame.next_frame = 0; frame.return_address = 0; - bytes = copy_from_user_nmi(&frame, fp, sizeof(frame)); + if (!access_ok(VERIFY_READ, fp, 8)) + break; + + bytes = __copy_from_user_nmi(&frame.next_frame, fp, 4); + if (bytes != 0) + break; + bytes = __copy_from_user_nmi(&frame.return_address, fp+4, 4); if (bytes != 0) break; @@ -2265,6 +2284,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) perf_callchain_store(entry, cs_base + frame.return_address); fp = compat_ptr(ss_base + frame.next_frame); } + pagefault_enable(); return 1; } #else @@ -2302,12 +2322,19 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) if (perf_callchain_user32(regs, entry)) return; + pagefault_disable(); while (entry->nr < PERF_MAX_STACK_DEPTH) { unsigned long bytes; frame.next_frame = NULL; frame.return_address = 0; - bytes = copy_from_user_nmi(&frame, fp, sizeof(frame)); + if (!access_ok(VERIFY_READ, fp, 16)) + break; + + bytes = __copy_from_user_nmi(&frame.next_frame, fp, 8); + if (bytes != 0) + break; + bytes = __copy_from_user_nmi(&frame.return_address, fp+8, 8); if (bytes != 0) break; @@ -2315,8 +2342,9 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) break; perf_callchain_store(entry, frame.return_address); - fp = frame.next_frame; + fp = (void __user *)frame.next_frame; } + pagefault_enable(); } /* diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index d0e35ebb2adb..7bb61e32fb29 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -14,17 +14,7 @@ #include <linux/perf_event.h> -#if 0 -#undef wrmsrl -#define wrmsrl(msr, val) \ -do { \ - unsigned int _msr = (msr); \ - u64 _val = (val); \ - trace_printk("wrmsrl(%x, %Lx)\n", (unsigned int)(_msr), \ - (unsigned long long)(_val)); \ - native_write_msr((_msr), (u32)(_val), (u32)(_val >> 32)); \ -} while (0) -#endif +/* To enable MSR tracing please use the generic trace points. */ /* * | NHM/WSM | SNB | @@ -318,6 +308,10 @@ struct cpu_hw_events { #define INTEL_UEVENT_CONSTRAINT(c, n) \ EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) +/* Constraint on specific umask bit only + event */ +#define INTEL_UBIT_EVENT_CONSTRAINT(c, n) \ + EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT|(c)) + /* Like UEVENT_CONSTRAINT, but match flags too */ #define INTEL_FLAGS_UEVENT_CONSTRAINT(c, n) \ EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS) @@ -589,7 +583,8 @@ struct x86_pmu { bts_active :1, pebs :1, pebs_active :1, - pebs_broken :1; + pebs_broken :1, + pebs_prec_dist :1; int pebs_record_size; void (*drain_pebs)(struct pt_regs *regs); struct event_constraint *pebs_constraints; @@ -907,6 +902,8 @@ void intel_pmu_lbr_init_hsw(void); void intel_pmu_lbr_init_skl(void); +void intel_pmu_lbr_init_knl(void); + int intel_pmu_setup_lbr_filter(struct perf_event *event); void intel_pt_interrupt(void); diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 1cee5d2d7ece..58610539b048 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -18,7 +18,7 @@ static __initconst const u64 amd_hw_cache_event_ids [ C(RESULT_MISS) ] = 0x0141, /* Data Cache Misses */ }, [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */ + [ C(RESULT_ACCESS) ] = 0, [ C(RESULT_MISS) ] = 0, }, [ C(OP_PREFETCH) ] = { @@ -160,7 +160,7 @@ static inline int amd_pmu_addr_offset(int index, bool eventsel) if (offset) return offset; - if (!cpu_has_perfctr_core) + if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE)) offset = index; else offset = index << 1; @@ -652,7 +652,7 @@ static __initconst const struct x86_pmu amd_pmu = { static int __init amd_core_pmu_init(void) { - if (!cpu_has_perfctr_core) + if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE)) return 0; switch (boot_cpu_data.x86) { diff --git a/arch/x86/kernel/cpu/perf_event_amd_uncore.c b/arch/x86/kernel/cpu/perf_event_amd_uncore.c index cc6cedb8f25d..49742746a6c9 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_amd_uncore.c @@ -523,10 +523,10 @@ static int __init amd_uncore_init(void) if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) goto fail_nodev; - if (!cpu_has_topoext) + if (!boot_cpu_has(X86_FEATURE_TOPOEXT)) goto fail_nodev; - if (cpu_has_perfctr_nb) { + if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) { amd_uncore_nb = alloc_percpu(struct amd_uncore *); if (!amd_uncore_nb) { ret = -ENOMEM; @@ -540,7 +540,7 @@ static int __init amd_uncore_init(void) ret = 0; } - if (cpu_has_perfctr_l2) { + if (boot_cpu_has(X86_FEATURE_PERFCTR_L2)) { amd_uncore_l2 = alloc_percpu(struct amd_uncore *); if (!amd_uncore_l2) { ret = -ENOMEM; @@ -583,10 +583,11 @@ fail_online: /* amd_uncore_nb/l2 should have been freed by cleanup_cpu_online */ amd_uncore_nb = amd_uncore_l2 = NULL; - if (cpu_has_perfctr_l2) + + if (boot_cpu_has(X86_FEATURE_PERFCTR_L2)) perf_pmu_unregister(&amd_l2_pmu); fail_l2: - if (cpu_has_perfctr_nb) + if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) perf_pmu_unregister(&amd_nb_pmu); if (amd_uncore_l2) free_percpu(amd_uncore_l2); diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index e2a430021e46..a667078a5180 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -185,6 +185,14 @@ struct event_constraint intel_skl_event_constraints[] = { EVENT_CONSTRAINT_END }; +static struct extra_reg intel_knl_extra_regs[] __read_mostly = { + INTEL_UEVENT_EXTRA_REG(0x01b7, + MSR_OFFCORE_RSP_0, 0x7f9ffbffffull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x02b7, + MSR_OFFCORE_RSP_1, 0x3f9ffbffffull, RSP_1), + EVENT_EXTRA_END +}; + static struct extra_reg intel_snb_extra_regs[] __read_mostly = { /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0), @@ -255,7 +263,7 @@ struct event_constraint intel_bdw_event_constraints[] = { FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */ - INTEL_UEVENT_CONSTRAINT(0x8a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_MISS */ + INTEL_UBIT_EVENT_CONSTRAINT(0x8a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_MISS */ EVENT_CONSTRAINT_END }; @@ -1457,6 +1465,42 @@ static __initconst const u64 slm_hw_cache_event_ids }, }; +#define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */ +#define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */ +#define KNL_MCDRAM_LOCAL BIT_ULL(21) +#define KNL_MCDRAM_FAR BIT_ULL(22) +#define KNL_DDR_LOCAL BIT_ULL(23) +#define KNL_DDR_FAR BIT_ULL(24) +#define KNL_DRAM_ANY (KNL_MCDRAM_LOCAL | KNL_MCDRAM_FAR | \ + KNL_DDR_LOCAL | KNL_DDR_FAR) +#define KNL_L2_READ SLM_DMND_READ +#define KNL_L2_WRITE SLM_DMND_WRITE +#define KNL_L2_PREFETCH SLM_DMND_PREFETCH +#define KNL_L2_ACCESS SLM_LLC_ACCESS +#define KNL_L2_MISS (KNL_OT_L2_HITE | KNL_OT_L2_HITF | \ + KNL_DRAM_ANY | SNB_SNP_ANY | \ + SNB_NON_DRAM) + +static __initconst const u64 knl_hw_cache_extra_regs + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = KNL_L2_READ | KNL_L2_ACCESS, + [C(RESULT_MISS)] = 0, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = KNL_L2_WRITE | KNL_L2_ACCESS, + [C(RESULT_MISS)] = KNL_L2_WRITE | KNL_L2_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = KNL_L2_PREFETCH | KNL_L2_ACCESS, + [C(RESULT_MISS)] = KNL_L2_PREFETCH | KNL_L2_MISS, + }, + }, +}; + /* * Use from PMIs where the LBRs are already disabled. */ @@ -2475,6 +2519,44 @@ static void intel_pebs_aliases_snb(struct perf_event *event) } } +static void intel_pebs_aliases_precdist(struct perf_event *event) +{ + if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { + /* + * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P + * (0x003c) so that we can use it with PEBS. + * + * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't + * PEBS capable. However we can use INST_RETIRED.PREC_DIST + * (0x01c0), which is a PEBS capable event, to get the same + * count. + * + * The PREC_DIST event has special support to minimize sample + * shadowing effects. One drawback is that it can be + * only programmed on counter 1, but that seems like an + * acceptable trade off. + */ + u64 alt_config = X86_CONFIG(.event=0xc0, .umask=0x01, .inv=1, .cmask=16); + + alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); + event->hw.config = alt_config; + } +} + +static void intel_pebs_aliases_ivb(struct perf_event *event) +{ + if (event->attr.precise_ip < 3) + return intel_pebs_aliases_snb(event); + return intel_pebs_aliases_precdist(event); +} + +static void intel_pebs_aliases_skl(struct perf_event *event) +{ + if (event->attr.precise_ip < 3) + return intel_pebs_aliases_core2(event); + return intel_pebs_aliases_precdist(event); +} + static unsigned long intel_pmu_free_running_flags(struct perf_event *event) { unsigned long flags = x86_pmu.free_running_flags; @@ -3332,6 +3414,7 @@ __init int intel_pmu_init(void) x86_pmu.event_constraints = intel_gen_event_constraints; x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints; + x86_pmu.pebs_aliases = intel_pebs_aliases_core2; pr_cont("Atom events, "); break; @@ -3431,7 +3514,8 @@ __init int intel_pmu_init(void) x86_pmu.event_constraints = intel_ivb_event_constraints; x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints; - x86_pmu.pebs_aliases = intel_pebs_aliases_snb; + x86_pmu.pebs_aliases = intel_pebs_aliases_ivb; + x86_pmu.pebs_prec_dist = true; if (boot_cpu_data.x86_model == 62) x86_pmu.extra_regs = intel_snbep_extra_regs; else @@ -3464,7 +3548,8 @@ __init int intel_pmu_init(void) x86_pmu.event_constraints = intel_hsw_event_constraints; x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints; x86_pmu.extra_regs = intel_snbep_extra_regs; - x86_pmu.pebs_aliases = intel_pebs_aliases_snb; + x86_pmu.pebs_aliases = intel_pebs_aliases_ivb; + x86_pmu.pebs_prec_dist = true; /* all extra regs are per-cpu when HT is on */ x86_pmu.flags |= PMU_FL_HAS_RSP_1; x86_pmu.flags |= PMU_FL_NO_HT_SHARING; @@ -3499,7 +3584,8 @@ __init int intel_pmu_init(void) x86_pmu.event_constraints = intel_bdw_event_constraints; x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints; x86_pmu.extra_regs = intel_snbep_extra_regs; - x86_pmu.pebs_aliases = intel_pebs_aliases_snb; + x86_pmu.pebs_aliases = intel_pebs_aliases_ivb; + x86_pmu.pebs_prec_dist = true; /* all extra regs are per-cpu when HT is on */ x86_pmu.flags |= PMU_FL_HAS_RSP_1; x86_pmu.flags |= PMU_FL_NO_HT_SHARING; @@ -3511,6 +3597,24 @@ __init int intel_pmu_init(void) pr_cont("Broadwell events, "); break; + case 87: /* Knights Landing Xeon Phi */ + memcpy(hw_cache_event_ids, + slm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, + knl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); + intel_pmu_lbr_init_knl(); + + x86_pmu.event_constraints = intel_slm_event_constraints; + x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints; + x86_pmu.extra_regs = intel_knl_extra_regs; + + /* all extra regs are per-cpu when HT is on */ + x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.flags |= PMU_FL_NO_HT_SHARING; + + pr_cont("Knights Landing events, "); + break; + case 78: /* 14nm Skylake Mobile */ case 94: /* 14nm Skylake Desktop */ x86_pmu.late_ack = true; @@ -3521,7 +3625,8 @@ __init int intel_pmu_init(void) x86_pmu.event_constraints = intel_skl_event_constraints; x86_pmu.pebs_constraints = intel_skl_pebs_event_constraints; x86_pmu.extra_regs = intel_skl_extra_regs; - x86_pmu.pebs_aliases = intel_pebs_aliases_snb; + x86_pmu.pebs_aliases = intel_pebs_aliases_skl; + x86_pmu.pebs_prec_dist = true; /* all extra regs are per-cpu when HT is on */ x86_pmu.flags |= PMU_FL_HAS_RSP_1; x86_pmu.flags |= PMU_FL_NO_HT_SHARING; diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 5db1c7755548..10602f0a438f 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -620,6 +620,8 @@ struct event_constraint intel_atom_pebs_event_constraints[] = { INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01), + /* Allow all events as PEBS with no flags */ + INTEL_ALL_EVENT_CONSTRAINT(0, 0x1), EVENT_CONSTRAINT_END }; @@ -686,6 +688,8 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = { INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */ /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf), + /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */ + INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2), INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ @@ -700,6 +704,8 @@ struct event_constraint intel_hsw_pebs_event_constraints[] = { INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */ /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf), + /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */ + INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2), INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */ @@ -718,9 +724,10 @@ struct event_constraint intel_hsw_pebs_event_constraints[] = { struct event_constraint intel_skl_pebs_event_constraints[] = { INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */ - INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ - /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ - INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf), + /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */ + INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2), + /* INST_RETIRED.TOTAL_CYCLES_PS (inv=1, cmask=16) (cycles:p). */ + INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f), INTEL_PLD_CONSTRAINT(0x1cd, 0xf), /* MEM_TRANS_RETIRED.* */ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */ @@ -1101,6 +1108,13 @@ get_next_pebs_record_by_bit(void *base, void *top, int bit) void *at; u64 pebs_status; + /* + * fmt0 does not have a status bitfield (does not use + * perf_record_nhm format) + */ + if (x86_pmu.intel_cap.pebs_format < 1) + return base; + if (base == NULL) return NULL; @@ -1186,7 +1200,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) if (!event->attr.precise_ip) return; - n = (top - at) / x86_pmu.pebs_record_size; + n = top - at; if (n <= 0) return; @@ -1230,12 +1244,21 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) pebs_status = p->status & cpuc->pebs_enabled; pebs_status &= (1ULL << x86_pmu.max_pebs_events) - 1; + /* + * On some CPUs the PEBS status can be zero when PEBS is + * racing with clearing of GLOBAL_STATUS. + * + * Normally we would drop that record, but in the + * case when there is only a single active PEBS event + * we can assume it's for that event. + */ + if (!pebs_status && cpuc->pebs_enabled && + !(cpuc->pebs_enabled & (cpuc->pebs_enabled-1))) + pebs_status = cpuc->pebs_enabled; + bit = find_first_bit((unsigned long *)&pebs_status, x86_pmu.max_pebs_events); - if (WARN(bit >= x86_pmu.max_pebs_events, - "PEBS record without PEBS event! status=%Lx pebs_enabled=%Lx active_mask=%Lx", - (unsigned long long)p->status, (unsigned long long)cpuc->pebs_enabled, - *(unsigned long long *)cpuc->active_mask)) + if (bit >= x86_pmu.max_pebs_events) continue; /* diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index 659f01e165d5..653f88d25987 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -42,6 +42,13 @@ static enum { #define LBR_FAR_BIT 8 /* do not capture far branches */ #define LBR_CALL_STACK_BIT 9 /* enable call stack */ +/* + * Following bit only exists in Linux; we mask it out before writing it to + * the actual MSR. But it helps the constraint perf code to understand + * that this is a separate configuration. + */ +#define LBR_NO_INFO_BIT 63 /* don't read LBR_INFO. */ + #define LBR_KERNEL (1 << LBR_KERNEL_BIT) #define LBR_USER (1 << LBR_USER_BIT) #define LBR_JCC (1 << LBR_JCC_BIT) @@ -52,6 +59,7 @@ static enum { #define LBR_IND_JMP (1 << LBR_IND_JMP_BIT) #define LBR_FAR (1 << LBR_FAR_BIT) #define LBR_CALL_STACK (1 << LBR_CALL_STACK_BIT) +#define LBR_NO_INFO (1ULL << LBR_NO_INFO_BIT) #define LBR_PLM (LBR_KERNEL | LBR_USER) @@ -152,8 +160,8 @@ static void __intel_pmu_lbr_enable(bool pmi) * did not change. */ if (cpuc->lbr_sel) - lbr_select = cpuc->lbr_sel->config; - if (!pmi) + lbr_select = cpuc->lbr_sel->config & x86_pmu.lbr_sel_mask; + if (!pmi && cpuc->lbr_sel) wrmsrl(MSR_LBR_SELECT, lbr_select); rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); @@ -422,6 +430,7 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) */ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) { + bool need_info = false; unsigned long mask = x86_pmu.lbr_nr - 1; int lbr_format = x86_pmu.intel_cap.lbr_format; u64 tos = intel_pmu_lbr_tos(); @@ -429,8 +438,11 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) int out = 0; int num = x86_pmu.lbr_nr; - if (cpuc->lbr_sel->config & LBR_CALL_STACK) - num = tos; + if (cpuc->lbr_sel) { + need_info = !(cpuc->lbr_sel->config & LBR_NO_INFO); + if (cpuc->lbr_sel->config & LBR_CALL_STACK) + num = tos; + } for (i = 0; i < num; i++) { unsigned long lbr_idx = (tos - i) & mask; @@ -442,7 +454,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) rdmsrl(x86_pmu.lbr_from + lbr_idx, from); rdmsrl(x86_pmu.lbr_to + lbr_idx, to); - if (lbr_format == LBR_FORMAT_INFO) { + if (lbr_format == LBR_FORMAT_INFO && need_info) { u64 info; rdmsrl(MSR_LBR_INFO_0 + lbr_idx, info); @@ -590,6 +602,7 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) if (v != LBR_IGN) mask |= v; } + reg = &event->hw.branch_reg; reg->idx = EXTRA_REG_LBR; @@ -600,6 +613,11 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) */ reg->config = mask ^ x86_pmu.lbr_sel_mask; + if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) && + (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) && + (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)) + reg->config |= LBR_NO_INFO; + return 0; } @@ -1028,3 +1046,17 @@ void __init intel_pmu_lbr_init_atom(void) */ pr_cont("8-deep LBR, "); } + +/* Knights Landing */ +void intel_pmu_lbr_init_knl(void) +{ + x86_pmu.lbr_nr = 8; + x86_pmu.lbr_tos = MSR_LBR_TOS; + x86_pmu.lbr_from = MSR_LBR_NHM_FROM; + x86_pmu.lbr_to = MSR_LBR_NHM_TO; + + x86_pmu.lbr_sel_mask = LBR_SEL_MASK; + x86_pmu.lbr_sel_map = snb_lbr_sel_map; + + pr_cont("8-deep LBR, "); +} diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/kernel/cpu/perf_event_intel_pt.c index 868e1194337f..c0bbd1033b7c 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_pt.c +++ b/arch/x86/kernel/cpu/perf_event_intel_pt.c @@ -27,6 +27,7 @@ #include <asm/perf_event.h> #include <asm/insn.h> #include <asm/io.h> +#include <asm/intel_pt.h> #include "perf_event.h" #include "intel_pt.h" @@ -1122,6 +1123,14 @@ static int pt_event_init(struct perf_event *event) return 0; } +void cpu_emergency_stop_pt(void) +{ + struct pt *pt = this_cpu_ptr(&pt_ctx); + + if (pt->handle.event) + pt_event_stop(pt->handle.event, PERF_EF_UPDATE); +} + static __init int pt_init(void) { int ret, cpu, prior_warn = 0; diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c index ed446bdcbf31..24a351ad628d 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c +++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c @@ -63,7 +63,7 @@ #define INTEL_RAPL_PP1 0x4 /* pseudo-encoding */ #define NR_RAPL_DOMAINS 0x4 -static const char *rapl_domain_names[NR_RAPL_DOMAINS] __initconst = { +static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = { "pp0-core", "package", "dram", @@ -109,11 +109,11 @@ static struct kobj_attribute format_attr_##_var = \ #define RAPL_CNTR_WIDTH 32 /* 32-bit rapl counters */ -#define RAPL_EVENT_ATTR_STR(_name, v, str) \ -static struct perf_pmu_events_attr event_attr_##v = { \ - .attr = __ATTR(_name, 0444, rapl_sysfs_show, NULL), \ - .id = 0, \ - .event_str = str, \ +#define RAPL_EVENT_ATTR_STR(_name, v, str) \ +static struct perf_pmu_events_attr event_attr_##v = { \ + .attr = __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \ + .id = 0, \ + .event_str = str, \ }; struct rapl_pmu { @@ -405,19 +405,6 @@ static struct attribute_group rapl_pmu_attr_group = { .attrs = rapl_pmu_attrs, }; -static ssize_t rapl_sysfs_show(struct device *dev, - struct device_attribute *attr, - char *page) -{ - struct perf_pmu_events_attr *pmu_attr = \ - container_of(attr, struct perf_pmu_events_attr, attr); - - if (pmu_attr->event_str) - return sprintf(page, "%s", pmu_attr->event_str); - - return 0; -} - RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01"); RAPL_EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02"); RAPL_EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03"); diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 61215a69b03d..f97f8075bf04 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -884,6 +884,15 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id * each box has a different function id. */ pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)]; + /* Knights Landing uses a common PCI device ID for multiple instances of + * an uncore PMU device type. There is only one entry per device type in + * the knl_uncore_pci_ids table inspite of multiple devices present for + * some device types. Hence PCI device idx would be 0 for all devices. + * So increment pmu pointer to point to an unused array element. + */ + if (boot_cpu_data.x86_model == 87) + while (pmu->func_id >= 0) + pmu++; if (pmu->func_id < 0) pmu->func_id = pdev->devfn; else @@ -966,6 +975,7 @@ static int __init uncore_pci_init(void) case 63: /* Haswell-EP */ ret = hswep_uncore_pci_init(); break; + case 79: /* BDX-EP */ case 86: /* BDX-DE */ ret = bdx_uncore_pci_init(); break; @@ -982,6 +992,9 @@ static int __init uncore_pci_init(void) case 61: /* Broadwell */ ret = bdw_uncore_pci_init(); break; + case 87: /* Knights Landing */ + ret = knl_uncore_pci_init(); + break; default: return 0; } @@ -1287,9 +1300,13 @@ static int __init uncore_cpu_init(void) case 63: /* Haswell-EP */ hswep_uncore_cpu_init(); break; + case 79: /* BDX-EP */ case 86: /* BDX-DE */ bdx_uncore_cpu_init(); break; + case 87: /* Knights Landing */ + knl_uncore_cpu_init(); + break; default: return 0; } diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index 2f0a4a98e16b..07aa2d6bd710 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h @@ -338,6 +338,7 @@ int hsw_uncore_pci_init(void); int bdw_uncore_pci_init(void); void snb_uncore_cpu_init(void); void nhm_uncore_cpu_init(void); +int snb_pci2phy_map_init(int devid); /* perf_event_intel_uncore_snbep.c */ int snbep_uncore_pci_init(void); @@ -348,6 +349,8 @@ int hswep_uncore_pci_init(void); void hswep_uncore_cpu_init(void); int bdx_uncore_pci_init(void); void bdx_uncore_cpu_init(void); +int knl_uncore_pci_init(void); +void knl_uncore_cpu_init(void); /* perf_event_intel_uncore_nhmex.c */ void nhmex_uncore_cpu_init(void); diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c index 845256158a10..0b934820fafd 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c @@ -417,7 +417,7 @@ static void snb_uncore_imc_event_del(struct perf_event *event, int flags) } } -static int snb_pci2phy_map_init(int devid) +int snb_pci2phy_map_init(int devid) { struct pci_dev *dev = NULL; struct pci2phy_map *map; diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c index f0f4fcba252e..33acb884ccf1 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c @@ -209,31 +209,98 @@ #define HSWEP_PCU_MSR_PMON_BOX_CTL 0x710 #define HSWEP_PCU_MSR_PMON_BOX_FILTER 0x715 +/* KNL Ubox */ +#define KNL_U_MSR_PMON_RAW_EVENT_MASK \ + (SNBEP_U_MSR_PMON_RAW_EVENT_MASK | \ + SNBEP_CBO_PMON_CTL_TID_EN) +/* KNL CHA */ +#define KNL_CHA_MSR_OFFSET 0xc +#define KNL_CHA_MSR_PMON_CTL_QOR (1 << 16) +#define KNL_CHA_MSR_PMON_RAW_EVENT_MASK \ + (SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK | \ + KNL_CHA_MSR_PMON_CTL_QOR) +#define KNL_CHA_MSR_PMON_BOX_FILTER_TID 0x1ff +#define KNL_CHA_MSR_PMON_BOX_FILTER_STATE (7 << 18) +#define KNL_CHA_MSR_PMON_BOX_FILTER_OP (0xfffffe2aULL << 32) + +/* KNL EDC/MC UCLK */ +#define KNL_UCLK_MSR_PMON_CTR0_LOW 0x400 +#define KNL_UCLK_MSR_PMON_CTL0 0x420 +#define KNL_UCLK_MSR_PMON_BOX_CTL 0x430 +#define KNL_UCLK_MSR_PMON_UCLK_FIXED_LOW 0x44c +#define KNL_UCLK_MSR_PMON_UCLK_FIXED_CTL 0x454 +#define KNL_PMON_FIXED_CTL_EN 0x1 + +/* KNL EDC */ +#define KNL_EDC0_ECLK_MSR_PMON_CTR0_LOW 0xa00 +#define KNL_EDC0_ECLK_MSR_PMON_CTL0 0xa20 +#define KNL_EDC0_ECLK_MSR_PMON_BOX_CTL 0xa30 +#define KNL_EDC0_ECLK_MSR_PMON_ECLK_FIXED_LOW 0xa3c +#define KNL_EDC0_ECLK_MSR_PMON_ECLK_FIXED_CTL 0xa44 + +/* KNL MC */ +#define KNL_MC0_CH0_MSR_PMON_CTR0_LOW 0xb00 +#define KNL_MC0_CH0_MSR_PMON_CTL0 0xb20 +#define KNL_MC0_CH0_MSR_PMON_BOX_CTL 0xb30 +#define KNL_MC0_CH0_MSR_PMON_FIXED_LOW 0xb3c +#define KNL_MC0_CH0_MSR_PMON_FIXED_CTL 0xb44 + +/* KNL IRP */ +#define KNL_IRP_PCI_PMON_BOX_CTL 0xf0 +#define KNL_IRP_PCI_PMON_RAW_EVENT_MASK (SNBEP_PMON_RAW_EVENT_MASK | \ + KNL_CHA_MSR_PMON_CTL_QOR) +/* KNL PCU */ +#define KNL_PCU_PMON_CTL_EV_SEL_MASK 0x0000007f +#define KNL_PCU_PMON_CTL_USE_OCC_CTR (1 << 7) +#define KNL_PCU_MSR_PMON_CTL_TRESH_MASK 0x3f000000 +#define KNL_PCU_MSR_PMON_RAW_EVENT_MASK \ + (KNL_PCU_PMON_CTL_EV_SEL_MASK | \ + KNL_PCU_PMON_CTL_USE_OCC_CTR | \ + SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \ + SNBEP_PMON_CTL_EDGE_DET | \ + SNBEP_CBO_PMON_CTL_TID_EN | \ + SNBEP_PMON_CTL_EV_SEL_EXT | \ + SNBEP_PMON_CTL_INVERT | \ + KNL_PCU_MSR_PMON_CTL_TRESH_MASK | \ + SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \ + SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET) DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); +DEFINE_UNCORE_FORMAT_ATTR(event2, event, "config:0-6"); DEFINE_UNCORE_FORMAT_ATTR(event_ext, event, "config:0-7,21"); +DEFINE_UNCORE_FORMAT_ATTR(use_occ_ctr, use_occ_ctr, "config:7"); DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); +DEFINE_UNCORE_FORMAT_ATTR(qor, qor, "config:16"); DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19"); DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23"); DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31"); +DEFINE_UNCORE_FORMAT_ATTR(thresh6, thresh, "config:24-29"); DEFINE_UNCORE_FORMAT_ATTR(thresh5, thresh, "config:24-28"); DEFINE_UNCORE_FORMAT_ATTR(occ_sel, occ_sel, "config:14-15"); DEFINE_UNCORE_FORMAT_ATTR(occ_invert, occ_invert, "config:30"); DEFINE_UNCORE_FORMAT_ATTR(occ_edge, occ_edge, "config:14-51"); +DEFINE_UNCORE_FORMAT_ATTR(occ_edge_det, occ_edge_det, "config:31"); DEFINE_UNCORE_FORMAT_ATTR(filter_tid, filter_tid, "config1:0-4"); DEFINE_UNCORE_FORMAT_ATTR(filter_tid2, filter_tid, "config1:0"); DEFINE_UNCORE_FORMAT_ATTR(filter_tid3, filter_tid, "config1:0-5"); +DEFINE_UNCORE_FORMAT_ATTR(filter_tid4, filter_tid, "config1:0-8"); DEFINE_UNCORE_FORMAT_ATTR(filter_cid, filter_cid, "config1:5"); DEFINE_UNCORE_FORMAT_ATTR(filter_link, filter_link, "config1:5-8"); DEFINE_UNCORE_FORMAT_ATTR(filter_link2, filter_link, "config1:6-8"); +DEFINE_UNCORE_FORMAT_ATTR(filter_link3, filter_link, "config1:12"); DEFINE_UNCORE_FORMAT_ATTR(filter_nid, filter_nid, "config1:10-17"); DEFINE_UNCORE_FORMAT_ATTR(filter_nid2, filter_nid, "config1:32-47"); DEFINE_UNCORE_FORMAT_ATTR(filter_state, filter_state, "config1:18-22"); DEFINE_UNCORE_FORMAT_ATTR(filter_state2, filter_state, "config1:17-22"); DEFINE_UNCORE_FORMAT_ATTR(filter_state3, filter_state, "config1:17-23"); +DEFINE_UNCORE_FORMAT_ATTR(filter_state4, filter_state, "config1:18-20"); +DEFINE_UNCORE_FORMAT_ATTR(filter_local, filter_local, "config1:33"); +DEFINE_UNCORE_FORMAT_ATTR(filter_all_op, filter_all_op, "config1:35"); +DEFINE_UNCORE_FORMAT_ATTR(filter_nnm, filter_nnm, "config1:37"); DEFINE_UNCORE_FORMAT_ATTR(filter_opc, filter_opc, "config1:23-31"); DEFINE_UNCORE_FORMAT_ATTR(filter_opc2, filter_opc, "config1:52-60"); +DEFINE_UNCORE_FORMAT_ATTR(filter_opc3, filter_opc, "config1:41-60"); DEFINE_UNCORE_FORMAT_ATTR(filter_nc, filter_nc, "config1:62"); DEFINE_UNCORE_FORMAT_ATTR(filter_c6, filter_c6, "config1:61"); DEFINE_UNCORE_FORMAT_ATTR(filter_isoc, filter_isoc, "config1:63"); @@ -315,8 +382,9 @@ static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box, struct pe static void snbep_uncore_pci_init_box(struct intel_uncore_box *box) { struct pci_dev *pdev = box->pci_dev; + int box_ctl = uncore_pci_box_ctl(box); - pci_write_config_dword(pdev, SNBEP_PCI_PMON_BOX_CTL, SNBEP_PMON_BOX_CTL_INT); + pci_write_config_dword(pdev, box_ctl, SNBEP_PMON_BOX_CTL_INT); } static void snbep_uncore_msr_disable_box(struct intel_uncore_box *box) @@ -1728,6 +1796,419 @@ int ivbep_uncore_pci_init(void) } /* end of IvyTown uncore support */ +/* KNL uncore support */ +static struct attribute *knl_uncore_ubox_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_tid_en.attr, + &format_attr_inv.attr, + &format_attr_thresh5.attr, + NULL, +}; + +static struct attribute_group knl_uncore_ubox_format_group = { + .name = "format", + .attrs = knl_uncore_ubox_formats_attr, +}; + +static struct intel_uncore_type knl_uncore_ubox = { + .name = "ubox", + .num_counters = 2, + .num_boxes = 1, + .perf_ctr_bits = 48, + .fixed_ctr_bits = 48, + .perf_ctr = HSWEP_U_MSR_PMON_CTR0, + .event_ctl = HSWEP_U_MSR_PMON_CTL0, + .event_mask = KNL_U_MSR_PMON_RAW_EVENT_MASK, + .fixed_ctr = HSWEP_U_MSR_PMON_UCLK_FIXED_CTR, + .fixed_ctl = HSWEP_U_MSR_PMON_UCLK_FIXED_CTL, + .ops = &snbep_uncore_msr_ops, + .format_group = &knl_uncore_ubox_format_group, +}; + +static struct attribute *knl_uncore_cha_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_qor.attr, + &format_attr_edge.attr, + &format_attr_tid_en.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + &format_attr_filter_tid4.attr, + &format_attr_filter_link3.attr, + &format_attr_filter_state4.attr, + &format_attr_filter_local.attr, + &format_attr_filter_all_op.attr, + &format_attr_filter_nnm.attr, + &format_attr_filter_opc3.attr, + &format_attr_filter_nc.attr, + &format_attr_filter_isoc.attr, + NULL, +}; + +static struct attribute_group knl_uncore_cha_format_group = { + .name = "format", + .attrs = knl_uncore_cha_formats_attr, +}; + +static struct event_constraint knl_uncore_cha_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x11, 0x1), + UNCORE_EVENT_CONSTRAINT(0x1f, 0x1), + UNCORE_EVENT_CONSTRAINT(0x36, 0x1), + EVENT_CONSTRAINT_END +}; + +static struct extra_reg knl_uncore_cha_extra_regs[] = { + SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN, + SNBEP_CBO_PMON_CTL_TID_EN, 0x1), + SNBEP_CBO_EVENT_EXTRA_REG(0x3d, 0xff, 0x2), + SNBEP_CBO_EVENT_EXTRA_REG(0x35, 0xff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x36, 0xff, 0x4), + EVENT_EXTRA_END +}; + +static u64 knl_cha_filter_mask(int fields) +{ + u64 mask = 0; + + if (fields & 0x1) + mask |= KNL_CHA_MSR_PMON_BOX_FILTER_TID; + if (fields & 0x2) + mask |= KNL_CHA_MSR_PMON_BOX_FILTER_STATE; + if (fields & 0x4) + mask |= KNL_CHA_MSR_PMON_BOX_FILTER_OP; + return mask; +} + +static struct event_constraint * +knl_cha_get_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + return __snbep_cbox_get_constraint(box, event, knl_cha_filter_mask); +} + +static int knl_cha_hw_config(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct extra_reg *er; + int idx = 0; + + for (er = knl_uncore_cha_extra_regs; er->msr; er++) { + if (er->event != (event->hw.config & er->config_mask)) + continue; + idx |= er->idx; + } + + if (idx) { + reg1->reg = HSWEP_C0_MSR_PMON_BOX_FILTER0 + + KNL_CHA_MSR_OFFSET * box->pmu->pmu_idx; + reg1->config = event->attr.config1 & knl_cha_filter_mask(idx); + reg1->idx = idx; + } + return 0; +} + +static void hswep_cbox_enable_event(struct intel_uncore_box *box, + struct perf_event *event); + +static struct intel_uncore_ops knl_uncore_cha_ops = { + .init_box = snbep_uncore_msr_init_box, + .disable_box = snbep_uncore_msr_disable_box, + .enable_box = snbep_uncore_msr_enable_box, + .disable_event = snbep_uncore_msr_disable_event, + .enable_event = hswep_cbox_enable_event, + .read_counter = uncore_msr_read_counter, + .hw_config = knl_cha_hw_config, + .get_constraint = knl_cha_get_constraint, + .put_constraint = snbep_cbox_put_constraint, +}; + +static struct intel_uncore_type knl_uncore_cha = { + .name = "cha", + .num_counters = 4, + .num_boxes = 38, + .perf_ctr_bits = 48, + .event_ctl = HSWEP_C0_MSR_PMON_CTL0, + .perf_ctr = HSWEP_C0_MSR_PMON_CTR0, + .event_mask = KNL_CHA_MSR_PMON_RAW_EVENT_MASK, + .box_ctl = HSWEP_C0_MSR_PMON_BOX_CTL, + .msr_offset = KNL_CHA_MSR_OFFSET, + .num_shared_regs = 1, + .constraints = knl_uncore_cha_constraints, + .ops = &knl_uncore_cha_ops, + .format_group = &knl_uncore_cha_format_group, +}; + +static struct attribute *knl_uncore_pcu_formats_attr[] = { + &format_attr_event2.attr, + &format_attr_use_occ_ctr.attr, + &format_attr_occ_sel.attr, + &format_attr_edge.attr, + &format_attr_tid_en.attr, + &format_attr_inv.attr, + &format_attr_thresh6.attr, + &format_attr_occ_invert.attr, + &format_attr_occ_edge_det.attr, + NULL, +}; + +static struct attribute_group knl_uncore_pcu_format_group = { + .name = "format", + .attrs = knl_uncore_pcu_formats_attr, +}; + +static struct intel_uncore_type knl_uncore_pcu = { + .name = "pcu", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .perf_ctr = HSWEP_PCU_MSR_PMON_CTR0, + .event_ctl = HSWEP_PCU_MSR_PMON_CTL0, + .event_mask = KNL_PCU_MSR_PMON_RAW_EVENT_MASK, + .box_ctl = HSWEP_PCU_MSR_PMON_BOX_CTL, + .ops = &snbep_uncore_msr_ops, + .format_group = &knl_uncore_pcu_format_group, +}; + +static struct intel_uncore_type *knl_msr_uncores[] = { + &knl_uncore_ubox, + &knl_uncore_cha, + &knl_uncore_pcu, + NULL, +}; + +void knl_uncore_cpu_init(void) +{ + uncore_msr_uncores = knl_msr_uncores; +} + +static void knl_uncore_imc_enable_box(struct intel_uncore_box *box) +{ + struct pci_dev *pdev = box->pci_dev; + int box_ctl = uncore_pci_box_ctl(box); + + pci_write_config_dword(pdev, box_ctl, 0); +} + +static void knl_uncore_imc_enable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + + if ((event->attr.config & SNBEP_PMON_CTL_EV_SEL_MASK) + == UNCORE_FIXED_EVENT) + pci_write_config_dword(pdev, hwc->config_base, + hwc->config | KNL_PMON_FIXED_CTL_EN); + else + pci_write_config_dword(pdev, hwc->config_base, + hwc->config | SNBEP_PMON_CTL_EN); +} + +static struct intel_uncore_ops knl_uncore_imc_ops = { + .init_box = snbep_uncore_pci_init_box, + .disable_box = snbep_uncore_pci_disable_box, + .enable_box = knl_uncore_imc_enable_box, + .read_counter = snbep_uncore_pci_read_counter, + .enable_event = knl_uncore_imc_enable_event, + .disable_event = snbep_uncore_pci_disable_event, +}; + +static struct intel_uncore_type knl_uncore_imc_uclk = { + .name = "imc_uclk", + .num_counters = 4, + .num_boxes = 2, + .perf_ctr_bits = 48, + .fixed_ctr_bits = 48, + .perf_ctr = KNL_UCLK_MSR_PMON_CTR0_LOW, + .event_ctl = KNL_UCLK_MSR_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .fixed_ctr = KNL_UCLK_MSR_PMON_UCLK_FIXED_LOW, + .fixed_ctl = KNL_UCLK_MSR_PMON_UCLK_FIXED_CTL, + .box_ctl = KNL_UCLK_MSR_PMON_BOX_CTL, + .ops = &knl_uncore_imc_ops, + .format_group = &snbep_uncore_format_group, +}; + +static struct intel_uncore_type knl_uncore_imc_dclk = { + .name = "imc", + .num_counters = 4, + .num_boxes = 6, + .perf_ctr_bits = 48, + .fixed_ctr_bits = 48, + .perf_ctr = KNL_MC0_CH0_MSR_PMON_CTR0_LOW, + .event_ctl = KNL_MC0_CH0_MSR_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .fixed_ctr = KNL_MC0_CH0_MSR_PMON_FIXED_LOW, + .fixed_ctl = KNL_MC0_CH0_MSR_PMON_FIXED_CTL, + .box_ctl = KNL_MC0_CH0_MSR_PMON_BOX_CTL, + .ops = &knl_uncore_imc_ops, + .format_group = &snbep_uncore_format_group, +}; + +static struct intel_uncore_type knl_uncore_edc_uclk = { + .name = "edc_uclk", + .num_counters = 4, + .num_boxes = 8, + .perf_ctr_bits = 48, + .fixed_ctr_bits = 48, + .perf_ctr = KNL_UCLK_MSR_PMON_CTR0_LOW, + .event_ctl = KNL_UCLK_MSR_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .fixed_ctr = KNL_UCLK_MSR_PMON_UCLK_FIXED_LOW, + .fixed_ctl = KNL_UCLK_MSR_PMON_UCLK_FIXED_CTL, + .box_ctl = KNL_UCLK_MSR_PMON_BOX_CTL, + .ops = &knl_uncore_imc_ops, + .format_group = &snbep_uncore_format_group, +}; + +static struct intel_uncore_type knl_uncore_edc_eclk = { + .name = "edc_eclk", + .num_counters = 4, + .num_boxes = 8, + .perf_ctr_bits = 48, + .fixed_ctr_bits = 48, + .perf_ctr = KNL_EDC0_ECLK_MSR_PMON_CTR0_LOW, + .event_ctl = KNL_EDC0_ECLK_MSR_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .fixed_ctr = KNL_EDC0_ECLK_MSR_PMON_ECLK_FIXED_LOW, + .fixed_ctl = KNL_EDC0_ECLK_MSR_PMON_ECLK_FIXED_CTL, + .box_ctl = KNL_EDC0_ECLK_MSR_PMON_BOX_CTL, + .ops = &knl_uncore_imc_ops, + .format_group = &snbep_uncore_format_group, +}; + +static struct event_constraint knl_uncore_m2pcie_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x23, 0x3), + EVENT_CONSTRAINT_END +}; + +static struct intel_uncore_type knl_uncore_m2pcie = { + .name = "m2pcie", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .constraints = knl_uncore_m2pcie_constraints, + SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + +static struct attribute *knl_uncore_irp_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_qor.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + NULL, +}; + +static struct attribute_group knl_uncore_irp_format_group = { + .name = "format", + .attrs = knl_uncore_irp_formats_attr, +}; + +static struct intel_uncore_type knl_uncore_irp = { + .name = "irp", + .num_counters = 2, + .num_boxes = 1, + .perf_ctr_bits = 48, + .perf_ctr = SNBEP_PCI_PMON_CTR0, + .event_ctl = SNBEP_PCI_PMON_CTL0, + .event_mask = KNL_IRP_PCI_PMON_RAW_EVENT_MASK, + .box_ctl = KNL_IRP_PCI_PMON_BOX_CTL, + .ops = &snbep_uncore_pci_ops, + .format_group = &knl_uncore_irp_format_group, +}; + +enum { + KNL_PCI_UNCORE_MC_UCLK, + KNL_PCI_UNCORE_MC_DCLK, + KNL_PCI_UNCORE_EDC_UCLK, + KNL_PCI_UNCORE_EDC_ECLK, + KNL_PCI_UNCORE_M2PCIE, + KNL_PCI_UNCORE_IRP, +}; + +static struct intel_uncore_type *knl_pci_uncores[] = { + [KNL_PCI_UNCORE_MC_UCLK] = &knl_uncore_imc_uclk, + [KNL_PCI_UNCORE_MC_DCLK] = &knl_uncore_imc_dclk, + [KNL_PCI_UNCORE_EDC_UCLK] = &knl_uncore_edc_uclk, + [KNL_PCI_UNCORE_EDC_ECLK] = &knl_uncore_edc_eclk, + [KNL_PCI_UNCORE_M2PCIE] = &knl_uncore_m2pcie, + [KNL_PCI_UNCORE_IRP] = &knl_uncore_irp, + NULL, +}; + +/* + * KNL uses a common PCI device ID for multiple instances of an Uncore PMU + * device type. prior to KNL, each instance of a PMU device type had a unique + * device ID. + * + * PCI Device ID Uncore PMU Devices + * ---------------------------------- + * 0x7841 MC0 UClk, MC1 UClk + * 0x7843 MC0 DClk CH 0, MC0 DClk CH 1, MC0 DClk CH 2, + * MC1 DClk CH 0, MC1 DClk CH 1, MC1 DClk CH 2 + * 0x7833 EDC0 UClk, EDC1 UClk, EDC2 UClk, EDC3 UClk, + * EDC4 UClk, EDC5 UClk, EDC6 UClk, EDC7 UClk + * 0x7835 EDC0 EClk, EDC1 EClk, EDC2 EClk, EDC3 EClk, + * EDC4 EClk, EDC5 EClk, EDC6 EClk, EDC7 EClk + * 0x7817 M2PCIe + * 0x7814 IRP +*/ + +static const struct pci_device_id knl_uncore_pci_ids[] = { + { /* MC UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7841), + .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_UCLK, 0), + }, + { /* MC DClk Channel */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843), + .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_DCLK, 0), + }, + { /* EDC UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_UCLK, 0), + }, + { /* EDC EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_ECLK, 0), + }, + { /* M2PCIe */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7817), + .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_M2PCIE, 0), + }, + { /* IRP */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7814), + .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_IRP, 0), + }, + { /* end: all zeroes */ } +}; + +static struct pci_driver knl_uncore_pci_driver = { + .name = "knl_uncore", + .id_table = knl_uncore_pci_ids, +}; + +int knl_uncore_pci_init(void) +{ + int ret; + + /* All KNL PCI based PMON units are on the same PCI bus except IRP */ + ret = snb_pci2phy_map_init(0x7814); /* IRP */ + if (ret) + return ret; + ret = snb_pci2phy_map_init(0x7817); /* M2PCIe */ + if (ret) + return ret; + uncore_pci_uncores = knl_pci_uncores; + uncore_pci_driver = &knl_uncore_pci_driver; + return 0; +} + +/* end of KNL uncore support */ + /* Haswell-EP uncore support */ static struct attribute *hswep_uncore_ubox_formats_attr[] = { &format_attr_event.attr, @@ -2338,7 +2819,7 @@ int hswep_uncore_pci_init(void) } /* end of Haswell-EP uncore support */ -/* BDX-DE uncore support */ +/* BDX uncore support */ static struct intel_uncore_type bdx_uncore_ubox = { .name = "ubox", @@ -2360,13 +2841,14 @@ static struct event_constraint bdx_uncore_cbox_constraints[] = { UNCORE_EVENT_CONSTRAINT(0x09, 0x3), UNCORE_EVENT_CONSTRAINT(0x11, 0x1), UNCORE_EVENT_CONSTRAINT(0x36, 0x1), + UNCORE_EVENT_CONSTRAINT(0x3e, 0x1), EVENT_CONSTRAINT_END }; static struct intel_uncore_type bdx_uncore_cbox = { .name = "cbox", .num_counters = 4, - .num_boxes = 8, + .num_boxes = 24, .perf_ctr_bits = 48, .event_ctl = HSWEP_C0_MSR_PMON_CTL0, .perf_ctr = HSWEP_C0_MSR_PMON_CTR0, @@ -2379,9 +2861,24 @@ static struct intel_uncore_type bdx_uncore_cbox = { .format_group = &hswep_uncore_cbox_format_group, }; +static struct intel_uncore_type bdx_uncore_sbox = { + .name = "sbox", + .num_counters = 4, + .num_boxes = 4, + .perf_ctr_bits = 48, + .event_ctl = HSWEP_S0_MSR_PMON_CTL0, + .perf_ctr = HSWEP_S0_MSR_PMON_CTR0, + .event_mask = HSWEP_S_MSR_PMON_RAW_EVENT_MASK, + .box_ctl = HSWEP_S0_MSR_PMON_BOX_CTL, + .msr_offset = HSWEP_SBOX_MSR_OFFSET, + .ops = &hswep_uncore_sbox_msr_ops, + .format_group = &hswep_uncore_sbox_format_group, +}; + static struct intel_uncore_type *bdx_msr_uncores[] = { &bdx_uncore_ubox, &bdx_uncore_cbox, + &bdx_uncore_sbox, &hswep_uncore_pcu, NULL, }; @@ -2396,7 +2893,7 @@ void bdx_uncore_cpu_init(void) static struct intel_uncore_type bdx_uncore_ha = { .name = "ha", .num_counters = 4, - .num_boxes = 1, + .num_boxes = 2, .perf_ctr_bits = 48, SNBEP_UNCORE_PCI_COMMON_INIT(), }; @@ -2404,7 +2901,7 @@ static struct intel_uncore_type bdx_uncore_ha = { static struct intel_uncore_type bdx_uncore_imc = { .name = "imc", .num_counters = 5, - .num_boxes = 2, + .num_boxes = 8, .perf_ctr_bits = 48, .fixed_ctr_bits = 48, .fixed_ctr = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR, @@ -2424,6 +2921,19 @@ static struct intel_uncore_type bdx_uncore_irp = { .format_group = &snbep_uncore_format_group, }; +static struct intel_uncore_type bdx_uncore_qpi = { + .name = "qpi", + .num_counters = 4, + .num_boxes = 3, + .perf_ctr_bits = 48, + .perf_ctr = SNBEP_PCI_PMON_CTR0, + .event_ctl = SNBEP_PCI_PMON_CTL0, + .event_mask = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_PCI_PMON_BOX_CTL, + .num_shared_regs = 1, + .ops = &snbep_uncore_qpi_ops, + .format_group = &snbep_uncore_qpi_format_group, +}; static struct event_constraint bdx_uncore_r2pcie_constraints[] = { UNCORE_EVENT_CONSTRAINT(0x10, 0x3), @@ -2432,6 +2942,8 @@ static struct event_constraint bdx_uncore_r2pcie_constraints[] = { UNCORE_EVENT_CONSTRAINT(0x23, 0x1), UNCORE_EVENT_CONSTRAINT(0x25, 0x1), UNCORE_EVENT_CONSTRAINT(0x26, 0x3), + UNCORE_EVENT_CONSTRAINT(0x28, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2c, 0x3), UNCORE_EVENT_CONSTRAINT(0x2d, 0x3), EVENT_CONSTRAINT_END }; @@ -2445,18 +2957,65 @@ static struct intel_uncore_type bdx_uncore_r2pcie = { SNBEP_UNCORE_PCI_COMMON_INIT(), }; +static struct event_constraint bdx_uncore_r3qpi_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x01, 0x7), + UNCORE_EVENT_CONSTRAINT(0x07, 0x7), + UNCORE_EVENT_CONSTRAINT(0x08, 0x7), + UNCORE_EVENT_CONSTRAINT(0x09, 0x7), + UNCORE_EVENT_CONSTRAINT(0x0a, 0x7), + UNCORE_EVENT_CONSTRAINT(0x0e, 0x7), + UNCORE_EVENT_CONSTRAINT(0x10, 0x3), + UNCORE_EVENT_CONSTRAINT(0x11, 0x3), + UNCORE_EVENT_CONSTRAINT(0x13, 0x1), + UNCORE_EVENT_CONSTRAINT(0x14, 0x3), + UNCORE_EVENT_CONSTRAINT(0x15, 0x3), + UNCORE_EVENT_CONSTRAINT(0x1f, 0x3), + UNCORE_EVENT_CONSTRAINT(0x20, 0x3), + UNCORE_EVENT_CONSTRAINT(0x21, 0x3), + UNCORE_EVENT_CONSTRAINT(0x22, 0x3), + UNCORE_EVENT_CONSTRAINT(0x23, 0x3), + UNCORE_EVENT_CONSTRAINT(0x25, 0x3), + UNCORE_EVENT_CONSTRAINT(0x26, 0x3), + UNCORE_EVENT_CONSTRAINT(0x28, 0x3), + UNCORE_EVENT_CONSTRAINT(0x29, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2c, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2d, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2e, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2f, 0x3), + UNCORE_EVENT_CONSTRAINT(0x33, 0x3), + UNCORE_EVENT_CONSTRAINT(0x34, 0x3), + UNCORE_EVENT_CONSTRAINT(0x36, 0x3), + UNCORE_EVENT_CONSTRAINT(0x37, 0x3), + UNCORE_EVENT_CONSTRAINT(0x38, 0x3), + UNCORE_EVENT_CONSTRAINT(0x39, 0x3), + EVENT_CONSTRAINT_END +}; + +static struct intel_uncore_type bdx_uncore_r3qpi = { + .name = "r3qpi", + .num_counters = 3, + .num_boxes = 3, + .perf_ctr_bits = 48, + .constraints = bdx_uncore_r3qpi_constraints, + SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + enum { BDX_PCI_UNCORE_HA, BDX_PCI_UNCORE_IMC, BDX_PCI_UNCORE_IRP, + BDX_PCI_UNCORE_QPI, BDX_PCI_UNCORE_R2PCIE, + BDX_PCI_UNCORE_R3QPI, }; static struct intel_uncore_type *bdx_pci_uncores[] = { [BDX_PCI_UNCORE_HA] = &bdx_uncore_ha, [BDX_PCI_UNCORE_IMC] = &bdx_uncore_imc, [BDX_PCI_UNCORE_IRP] = &bdx_uncore_irp, + [BDX_PCI_UNCORE_QPI] = &bdx_uncore_qpi, [BDX_PCI_UNCORE_R2PCIE] = &bdx_uncore_r2pcie, + [BDX_PCI_UNCORE_R3QPI] = &bdx_uncore_r3qpi, NULL, }; @@ -2465,6 +3024,10 @@ static const struct pci_device_id bdx_uncore_pci_ids[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f30), .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_HA, 0), }, + { /* Home Agent 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f38), + .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_HA, 1), + }, { /* MC0 Channel 0 */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb0), .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 0), @@ -2473,14 +3036,74 @@ static const struct pci_device_id bdx_uncore_pci_ids[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb1), .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 1), }, + { /* MC0 Channel 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb4), + .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 2), + }, + { /* MC0 Channel 3 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb5), + .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 3), + }, + { /* MC1 Channel 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fd0), + .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 4), + }, + { /* MC1 Channel 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fd1), + .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 5), + }, + { /* MC1 Channel 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fd4), + .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 6), + }, + { /* MC1 Channel 3 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fd5), + .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 7), + }, { /* IRP */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f39), .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IRP, 0), }, + { /* QPI0 Port 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f32), + .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_QPI, 0), + }, + { /* QPI0 Port 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f33), + .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_QPI, 1), + }, + { /* QPI1 Port 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f3a), + .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_QPI, 2), + }, { /* R2PCIe */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f34), .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R2PCIE, 0), }, + { /* R3QPI0 Link 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f36), + .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R3QPI, 0), + }, + { /* R3QPI0 Link 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f37), + .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R3QPI, 1), + }, + { /* R3QPI1 Link 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f3e), + .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R3QPI, 2), + }, + { /* QPI Port 0 filter */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f86), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 0), + }, + { /* QPI Port 1 filter */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f96), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 1), + }, + { /* QPI Port 2 filter */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f46), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 2), + }, { /* end: all zeroes */ } }; @@ -2500,4 +3123,4 @@ int bdx_uncore_pci_init(void) return 0; } -/* end of BDX-DE uncore support */ +/* end of BDX uncore support */ diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c index 136ac74dee82..819d94982e07 100644 --- a/arch/x86/kernel/cpu/rdrand.c +++ b/arch/x86/kernel/cpu/rdrand.c @@ -33,28 +33,27 @@ static int __init x86_rdrand_setup(char *s) __setup("nordrand", x86_rdrand_setup); /* - * Force a reseed cycle; we are architecturally guaranteed a reseed - * after no more than 512 128-bit chunks of random data. This also - * acts as a test of the CPU capability. + * RDRAND has Built-In-Self-Test (BIST) that runs on every invocation. + * Run the instruction a few times as a sanity check. + * If it fails, it is simple to disable RDRAND here. */ -#define RESEED_LOOP ((512*128)/sizeof(unsigned long)) +#define SANITY_CHECK_LOOPS 8 void x86_init_rdrand(struct cpuinfo_x86 *c) { #ifdef CONFIG_ARCH_RANDOM unsigned long tmp; - int i, count, ok; + int i; if (!cpu_has(c, X86_FEATURE_RDRAND)) - return; /* Nothing to do */ + return; - for (count = i = 0; i < RESEED_LOOP; i++) { - ok = rdrand_long(&tmp); - if (ok) - count++; + for (i = 0; i < SANITY_CHECK_LOOPS; i++) { + if (!rdrand_long(&tmp)) { + clear_cpu_cap(c, X86_FEATURE_RDRAND); + printk_once(KERN_WARNING "rdrand: disabled\n"); + return; + } } - - if (count != RESEED_LOOP) - clear_cpu_cap(c, X86_FEATURE_RDRAND); #endif } diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 608fb26c7254..8cb57df9398d 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -31,32 +31,12 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) const struct cpuid_bit *cb; static const struct cpuid_bit cpuid_bits[] = { - { X86_FEATURE_DTHERM, CR_EAX, 0, 0x00000006, 0 }, - { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006, 0 }, - { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006, 0 }, - { X86_FEATURE_PLN, CR_EAX, 4, 0x00000006, 0 }, - { X86_FEATURE_PTS, CR_EAX, 6, 0x00000006, 0 }, - { X86_FEATURE_HWP, CR_EAX, 7, 0x00000006, 0 }, - { X86_FEATURE_HWP_NOTIFY, CR_EAX, 8, 0x00000006, 0 }, - { X86_FEATURE_HWP_ACT_WINDOW, CR_EAX, 9, 0x00000006, 0 }, - { X86_FEATURE_HWP_EPP, CR_EAX,10, 0x00000006, 0 }, - { X86_FEATURE_HWP_PKG_REQ, CR_EAX,11, 0x00000006, 0 }, { X86_FEATURE_INTEL_PT, CR_EBX,25, 0x00000007, 0 }, { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 }, { X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 }, { X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 }, { X86_FEATURE_CPB, CR_EDX, 9, 0x80000007, 0 }, { X86_FEATURE_PROC_FEEDBACK, CR_EDX,11, 0x80000007, 0 }, - { X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a, 0 }, - { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a, 0 }, - { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a, 0 }, - { X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a, 0 }, - { X86_FEATURE_TSCRATEMSR, CR_EDX, 4, 0x8000000a, 0 }, - { X86_FEATURE_VMCBCLEAN, CR_EDX, 5, 0x8000000a, 0 }, - { X86_FEATURE_FLUSHBYASID, CR_EDX, 6, 0x8000000a, 0 }, - { X86_FEATURE_DECODEASSISTS, CR_EDX, 7, 0x8000000a, 0 }, - { X86_FEATURE_PAUSEFILTER, CR_EDX,10, 0x8000000a, 0 }, - { X86_FEATURE_PFTHRESHOLD, CR_EDX,12, 0x8000000a, 0 }, { 0, 0, 0, 0, 0 } }; diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index 3fa0e5ad86b4..252da7aceca6 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -12,7 +12,7 @@ static void early_init_transmeta(struct cpuinfo_x86 *c) xlvl = cpuid_eax(0x80860000); if ((xlvl & 0xffff0000) == 0x80860000) { if (xlvl >= 0x80860001) - c->x86_capability[2] = cpuid_edx(0x80860001); + c->x86_capability[CPUID_8086_0001_EDX] = cpuid_edx(0x80860001); } } @@ -82,7 +82,7 @@ static void init_transmeta(struct cpuinfo_x86 *c) /* Unhide possibly hidden capability flags */ rdmsr(0x80860004, cap_mask, uk); wrmsr(0x80860004, ~0, uk); - c->x86_capability[0] = cpuid_edx(0x00000001); + c->x86_capability[CPUID_1_EDX] = cpuid_edx(0x00000001); wrmsr(0x80860004, cap_mask, uk); /* All Transmeta CPUs have a constant TSC */ diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index bd3507da39f0..2836de390f95 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -58,28 +58,6 @@ static void cpuid_smp_cpuid(void *cmd_block) &cmd->eax, &cmd->ebx, &cmd->ecx, &cmd->edx); } -static loff_t cpuid_seek(struct file *file, loff_t offset, int orig) -{ - loff_t ret; - struct inode *inode = file->f_mapping->host; - - mutex_lock(&inode->i_mutex); - switch (orig) { - case 0: - file->f_pos = offset; - ret = file->f_pos; - break; - case 1: - file->f_pos += offset; - ret = file->f_pos; - break; - default: - ret = -EINVAL; - } - mutex_unlock(&inode->i_mutex); - return ret; -} - static ssize_t cpuid_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { @@ -132,7 +110,7 @@ static int cpuid_open(struct inode *inode, struct file *file) */ static const struct file_operations cpuid_fops = { .owner = THIS_MODULE, - .llseek = cpuid_seek, + .llseek = no_seek_end_llseek, .read = cpuid_read, .open = cpuid_open, }; diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 2c1910f6717e..58f34319b29a 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -35,6 +35,7 @@ #include <asm/cpu.h> #include <asm/reboot.h> #include <asm/virtext.h> +#include <asm/intel_pt.h> /* Alignment required for elf header segment */ #define ELF_CORE_HEADER_ALIGN 4096 @@ -125,6 +126,11 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs) cpu_emergency_vmxoff(); cpu_emergency_svm_disable(); + /* + * Disable Intel PT to stop its logging + */ + cpu_emergency_stop_pt(); + disable_local_APIC(); } @@ -169,6 +175,11 @@ void native_machine_crash_shutdown(struct pt_regs *regs) cpu_emergency_vmxoff(); cpu_emergency_svm_disable(); + /* + * Disable Intel PT to stop its logging + */ + cpu_emergency_stop_pt(); + #ifdef CONFIG_X86_IO_APIC /* Prevent crash_kexec() from deadlocking on ioapic_lock. */ ioapic_zap_locks(); diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index db9a675e751b..bca14c899137 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -547,6 +547,7 @@ static const struct pci_device_id intel_stolen_ids[] __initconst = { INTEL_CHV_IDS(&chv_stolen_funcs), INTEL_SKL_IDS(&gen9_stolen_funcs), INTEL_BXT_IDS(&gen9_stolen_funcs), + INTEL_KBL_IDS(&gen9_stolen_funcs), }; static void __init intel_graphics_stolen(int num, int slot, int func) diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index be39b5fde4b9..6d9f0a7ef4c8 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -3,8 +3,11 @@ */ #include <asm/fpu/internal.h> #include <asm/tlbflush.h> +#include <asm/setup.h> +#include <asm/cmdline.h> #include <linux/sched.h> +#include <linux/init.h> /* * Initialize the TS bit in CR0 according to the style of context-switches @@ -12,7 +15,7 @@ */ static void fpu__init_cpu_ctx_switch(void) { - if (!cpu_has_eager_fpu) + if (!boot_cpu_has(X86_FEATURE_EAGER_FPU)) stts(); else clts(); @@ -143,9 +146,18 @@ static void __init fpu__init_system_generic(void) unsigned int xstate_size; EXPORT_SYMBOL_GPL(xstate_size); -/* Enforce that 'MEMBER' is the last field of 'TYPE': */ +/* Get alignment of the TYPE. */ +#define TYPE_ALIGN(TYPE) offsetof(struct { char x; TYPE test; }, test) + +/* + * Enforce that 'MEMBER' is the last field of 'TYPE'. + * + * Align the computed size with alignment of the TYPE, + * because that's how C aligns structs. + */ #define CHECK_MEMBER_AT_END_OF(TYPE, MEMBER) \ - BUILD_BUG_ON(sizeof(TYPE) != offsetofend(TYPE, MEMBER)) + BUILD_BUG_ON(sizeof(TYPE) != ALIGN(offsetofend(TYPE, MEMBER), \ + TYPE_ALIGN(TYPE))) /* * We append the 'struct fpu' to the task_struct: @@ -188,7 +200,7 @@ static void __init fpu__init_task_struct_size(void) */ static void __init fpu__init_system_xstate_size_legacy(void) { - static int on_boot_cpu = 1; + static int on_boot_cpu __initdata = 1; WARN_ON_FPU(!on_boot_cpu); on_boot_cpu = 0; @@ -261,24 +273,56 @@ static void __init fpu__init_system_xstate_size_legacy(void) */ static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO; -static int __init eager_fpu_setup(char *s) +/* + * Find supported xfeatures based on cpu features and command-line input. + * This must be called after fpu__init_parse_early_param() is called and + * xfeatures_mask is enumerated. + */ +u64 __init fpu__get_supported_xfeatures_mask(void) { - if (!strcmp(s, "on")) - eagerfpu = ENABLE; - else if (!strcmp(s, "off")) - eagerfpu = DISABLE; - else if (!strcmp(s, "auto")) - eagerfpu = AUTO; - return 1; + /* Support all xfeatures known to us */ + if (eagerfpu != DISABLE) + return XCNTXT_MASK; + + /* Warning of xfeatures being disabled for no eagerfpu mode */ + if (xfeatures_mask & XFEATURE_MASK_EAGER) { + pr_err("x86/fpu: eagerfpu switching disabled, disabling the following xstate features: 0x%llx.\n", + xfeatures_mask & XFEATURE_MASK_EAGER); + } + + /* Return a mask that masks out all features requiring eagerfpu mode */ + return ~XFEATURE_MASK_EAGER; +} + +/* + * Disable features dependent on eagerfpu. + */ +static void __init fpu__clear_eager_fpu_features(void) +{ + setup_clear_cpu_cap(X86_FEATURE_MPX); + setup_clear_cpu_cap(X86_FEATURE_AVX); + setup_clear_cpu_cap(X86_FEATURE_AVX2); + setup_clear_cpu_cap(X86_FEATURE_AVX512F); + setup_clear_cpu_cap(X86_FEATURE_AVX512PF); + setup_clear_cpu_cap(X86_FEATURE_AVX512ER); + setup_clear_cpu_cap(X86_FEATURE_AVX512CD); } -__setup("eagerfpu=", eager_fpu_setup); /* * Pick the FPU context switching strategy: + * + * When eagerfpu is AUTO or ENABLE, we ensure it is ENABLE if either of + * the following is true: + * + * (1) the cpu has xsaveopt, as it has the optimization and doing eager + * FPU switching has a relatively low cost compared to a plain xsave; + * (2) the cpu has xsave features (e.g. MPX) that depend on eager FPU + * switching. Should the kernel boot with noxsaveopt, we support MPX + * with eager FPU switching at a higher cost. */ static void __init fpu__init_system_ctx_switch(void) { - static bool on_boot_cpu = 1; + static bool on_boot_cpu __initdata = 1; WARN_ON_FPU(!on_boot_cpu); on_boot_cpu = 0; @@ -286,19 +330,11 @@ static void __init fpu__init_system_ctx_switch(void) WARN_ON_FPU(current->thread.fpu.fpstate_active); current_thread_info()->status = 0; - /* Auto enable eagerfpu for xsaveopt */ - if (cpu_has_xsaveopt && eagerfpu != DISABLE) + if (boot_cpu_has(X86_FEATURE_XSAVEOPT) && eagerfpu != DISABLE) eagerfpu = ENABLE; - if (xfeatures_mask & XFEATURE_MASK_EAGER) { - if (eagerfpu == DISABLE) { - pr_err("x86/fpu: eagerfpu switching disabled, disabling the following xstate features: 0x%llx.\n", - xfeatures_mask & XFEATURE_MASK_EAGER); - xfeatures_mask &= ~XFEATURE_MASK_EAGER; - } else { - eagerfpu = ENABLE; - } - } + if (xfeatures_mask & XFEATURE_MASK_EAGER) + eagerfpu = ENABLE; if (eagerfpu == ENABLE) setup_force_cpu_cap(X86_FEATURE_EAGER_FPU); @@ -307,11 +343,48 @@ static void __init fpu__init_system_ctx_switch(void) } /* + * We parse fpu parameters early because fpu__init_system() is executed + * before parse_early_param(). + */ +static void __init fpu__init_parse_early_param(void) +{ + /* + * No need to check "eagerfpu=auto" again, since it is the + * initial default. + */ + if (cmdline_find_option_bool(boot_command_line, "eagerfpu=off")) { + eagerfpu = DISABLE; + fpu__clear_eager_fpu_features(); + } else if (cmdline_find_option_bool(boot_command_line, "eagerfpu=on")) { + eagerfpu = ENABLE; + } + + if (cmdline_find_option_bool(boot_command_line, "no387")) + setup_clear_cpu_cap(X86_FEATURE_FPU); + + if (cmdline_find_option_bool(boot_command_line, "nofxsr")) { + setup_clear_cpu_cap(X86_FEATURE_FXSR); + setup_clear_cpu_cap(X86_FEATURE_FXSR_OPT); + setup_clear_cpu_cap(X86_FEATURE_XMM); + } + + if (cmdline_find_option_bool(boot_command_line, "noxsave")) + fpu__xstate_clear_all_cpu_caps(); + + if (cmdline_find_option_bool(boot_command_line, "noxsaveopt")) + setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); + + if (cmdline_find_option_bool(boot_command_line, "noxsaves")) + setup_clear_cpu_cap(X86_FEATURE_XSAVES); +} + +/* * Called on the boot CPU once per system bootup, to set up the initial * FPU state that is later cloned into all processes: */ void __init fpu__init_system(struct cpuinfo_x86 *c) { + fpu__init_parse_early_param(); fpu__init_system_early_generic(c); /* @@ -335,62 +408,3 @@ void __init fpu__init_system(struct cpuinfo_x86 *c) fpu__init_system_ctx_switch(); } - -/* - * Boot parameter to turn off FPU support and fall back to math-emu: - */ -static int __init no_387(char *s) -{ - setup_clear_cpu_cap(X86_FEATURE_FPU); - return 1; -} -__setup("no387", no_387); - -/* - * Disable all xstate CPU features: - */ -static int __init x86_noxsave_setup(char *s) -{ - if (strlen(s)) - return 0; - - fpu__xstate_clear_all_cpu_caps(); - - return 1; -} -__setup("noxsave", x86_noxsave_setup); - -/* - * Disable the XSAVEOPT instruction specifically: - */ -static int __init x86_noxsaveopt_setup(char *s) -{ - setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); - - return 1; -} -__setup("noxsaveopt", x86_noxsaveopt_setup); - -/* - * Disable the XSAVES instruction: - */ -static int __init x86_noxsaves_setup(char *s) -{ - setup_clear_cpu_cap(X86_FEATURE_XSAVES); - - return 1; -} -__setup("noxsaves", x86_noxsaves_setup); - -/* - * Disable FX save/restore and SSE support: - */ -static int __init x86_nofxsr_setup(char *s) -{ - setup_clear_cpu_cap(X86_FEATURE_FXSR); - setup_clear_cpu_cap(X86_FEATURE_FXSR_OPT); - setup_clear_cpu_cap(X86_FEATURE_XMM); - - return 1; -} -__setup("nofxsr", x86_nofxsr_setup); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 70fc312221fc..d425cda5ae6d 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -52,6 +52,7 @@ void fpu__xstate_clear_all_cpu_caps(void) setup_clear_cpu_cap(X86_FEATURE_AVX512ER); setup_clear_cpu_cap(X86_FEATURE_AVX512CD); setup_clear_cpu_cap(X86_FEATURE_MPX); + setup_clear_cpu_cap(X86_FEATURE_XGETBV1); } /* @@ -297,7 +298,7 @@ static void __init setup_xstate_comp(void) */ static void __init setup_init_fpu_buf(void) { - static int on_boot_cpu = 1; + static int on_boot_cpu __initdata = 1; WARN_ON_FPU(!on_boot_cpu); on_boot_cpu = 0; @@ -608,7 +609,7 @@ static void fpu__init_disable_system_xstate(void) void __init fpu__init_system_xstate(void) { unsigned int eax, ebx, ecx, edx; - static int on_boot_cpu = 1; + static int on_boot_cpu __initdata = 1; int err; WARN_ON_FPU(!on_boot_cpu); @@ -632,8 +633,7 @@ void __init fpu__init_system_xstate(void) BUG(); } - /* Support only the state known to the OS: */ - xfeatures_mask = xfeatures_mask & XCNTXT_MASK; + xfeatures_mask &= fpu__get_supported_xfeatures_mask(); /* Enable xstate instructions to be able to continue with initialization: */ fpu__init_cpu_xstate(); diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 311bcf338f07..29408d6d6626 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -105,14 +105,14 @@ ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code, { unsigned char replaced[MCOUNT_INSN_SIZE]; + ftrace_expected = old_code; + /* - * Note: Due to modules and __init, code can - * disappear and change, we need to protect against faulting - * as well as code changing. We do this by using the - * probe_kernel_* functions. - * - * No real locking needed, this code is run through - * kstop_machine, or before SMP starts. + * Note: + * We are paranoid about modifying text, as if a bug was to happen, it + * could cause us to read or write to someplace that could cause harm. + * Carefully read and modify the code with probe_kernel_*(), and make + * sure what we read is what we expected it to be before modifying it. */ /* read the text we want to modify */ @@ -154,6 +154,8 @@ int ftrace_make_nop(struct module *mod, if (addr == MCOUNT_ADDR) return ftrace_modify_code_direct(rec->ip, old, new); + ftrace_expected = NULL; + /* Normal cases use add_brk_on_nop */ WARN_ONCE(1, "invalid use of ftrace_make_nop"); return -EINVAL; @@ -220,6 +222,7 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { WARN_ON(1); + ftrace_expected = NULL; return -EINVAL; } @@ -314,6 +317,8 @@ static int add_break(unsigned long ip, const char *old) if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) return -EFAULT; + ftrace_expected = old; + /* Make sure it is what we expect it to be */ if (memcmp(replaced, old, MCOUNT_INSN_SIZE) != 0) return -EINVAL; @@ -413,6 +418,8 @@ static int remove_breakpoint(struct dyn_ftrace *rec) ftrace_addr = ftrace_get_addr_curr(rec); nop = ftrace_call_replace(ip, ftrace_addr); + ftrace_expected = nop; + if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) return -EINVAL; } diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 50a3fad5b89f..2bcfb5f2bc44 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -300,6 +300,10 @@ static int arch_build_bp_info(struct perf_event *bp) return -EINVAL; if (bp->attr.bp_addr & (bp->attr.bp_len - 1)) return -EINVAL; + + if (!boot_cpu_has(X86_FEATURE_BPEXT)) + return -EOPNOTSUPP; + /* * It's impossible to use a range breakpoint to fake out * user vs kernel detection because bp_len - 1 can't @@ -307,8 +311,6 @@ static int arch_build_bp_info(struct perf_event *bp) * breakpoints, then we'll have to check for kprobe-blacklisted * addresses anywhere in the range. */ - if (!cpu_has_bpext) - return -EOPNOTSUPP; info->mask = bp->attr.bp_len - 1; info->len = X86_BREAKPOINT_LEN_1; } diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 2bd81e302427..72cef58693c7 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -45,6 +45,11 @@ early_param("no-kvmclock", parse_no_kvmclock); static struct pvclock_vsyscall_time_info *hv_clock; static struct pvclock_wall_clock wall_clock; +struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void) +{ + return hv_clock; +} + /* * The wallclock is the time of day when we booted. Since then, some time may * have elapsed since the hypervisor wrote the data. So we try to account for @@ -305,7 +310,6 @@ int __init kvm_setup_vsyscall_timeinfo(void) { #ifdef CONFIG_X86_64 int cpu; - int ret; u8 flags; struct pvclock_vcpu_time_info *vcpu_time; unsigned int size; @@ -325,11 +329,6 @@ int __init kvm_setup_vsyscall_timeinfo(void) return 1; } - if ((ret = pvclock_init_vsyscall(hv_clock, size))) { - put_cpu(); - return ret; - } - put_cpu(); kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK; diff --git a/arch/x86/kernel/livepatch.c b/arch/x86/kernel/livepatch.c index d1d35ccffed3..92fc1a51f994 100644 --- a/arch/x86/kernel/livepatch.c +++ b/arch/x86/kernel/livepatch.c @@ -20,8 +20,6 @@ #include <linux/module.h> #include <linux/uaccess.h> -#include <asm/cacheflush.h> -#include <asm/page_types.h> #include <asm/elf.h> #include <asm/livepatch.h> @@ -38,11 +36,10 @@ int klp_write_module_reloc(struct module *mod, unsigned long type, unsigned long loc, unsigned long value) { - int ret, numpages, size = 4; - bool readonly; + size_t size = 4; unsigned long val; - unsigned long core = (unsigned long)mod->module_core; - unsigned long core_size = mod->core_size; + unsigned long core = (unsigned long)mod->core_layout.base; + unsigned long core_size = mod->core_layout.size; switch (type) { case R_X86_64_NONE: @@ -69,23 +66,5 @@ int klp_write_module_reloc(struct module *mod, unsigned long type, /* loc does not point to any symbol inside the module */ return -EINVAL; - readonly = false; - -#ifdef CONFIG_DEBUG_SET_MODULE_RONX - if (loc < core + mod->core_ro_size) - readonly = true; -#endif - - /* determine if the relocation spans a page boundary */ - numpages = ((loc & PAGE_MASK) == ((loc + size) & PAGE_MASK)) ? 1 : 2; - - if (readonly) - set_memory_rw(loc & PAGE_MASK, numpages); - - ret = probe_kernel_write((void *)loc, &val, size); - - if (readonly) - set_memory_ro(loc & PAGE_MASK, numpages); - - return ret; + return probe_kernel_write((void *)loc, &val, size); } diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 113e70784854..64f9616f93f1 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -45,28 +45,6 @@ static struct class *msr_class; -static loff_t msr_seek(struct file *file, loff_t offset, int orig) -{ - loff_t ret; - struct inode *inode = file_inode(file); - - mutex_lock(&inode->i_mutex); - switch (orig) { - case SEEK_SET: - file->f_pos = offset; - ret = file->f_pos; - break; - case SEEK_CUR: - file->f_pos += offset; - ret = file->f_pos; - break; - default: - ret = -EINVAL; - } - mutex_unlock(&inode->i_mutex); - return ret; -} - static ssize_t msr_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { @@ -194,7 +172,7 @@ static int msr_open(struct inode *inode, struct file *file) */ static const struct file_operations msr_fops = { .owner = THIS_MODULE, - .llseek = msr_seek, + .llseek = no_seek_end_llseek, .read = msr_read, .write = msr_write, .open = msr_open, diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 697f90db0e37..8a2cdd736fa4 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -29,6 +29,7 @@ #include <asm/mach_traps.h> #include <asm/nmi.h> #include <asm/x86_init.h> +#include <asm/reboot.h> #define CREATE_TRACE_POINTS #include <trace/events/nmi.h> @@ -231,7 +232,7 @@ pci_serr_error(unsigned char reason, struct pt_regs *regs) #endif if (panic_on_unrecovered_nmi) - panic("NMI: Not continuing"); + nmi_panic(regs, "NMI: Not continuing"); pr_emerg("Dazed and confused, but trying to continue\n"); @@ -255,8 +256,16 @@ io_check_error(unsigned char reason, struct pt_regs *regs) reason, smp_processor_id()); show_regs(regs); - if (panic_on_io_nmi) - panic("NMI IOCK error: Not continuing"); + if (panic_on_io_nmi) { + nmi_panic(regs, "NMI IOCK error: Not continuing"); + + /* + * If we end up here, it means we have received an NMI while + * processing panic(). Simply return without delaying and + * re-enabling NMIs. + */ + return; + } /* Re-enable the IOCK line, wait for a few seconds */ reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK; @@ -297,7 +306,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) pr_emerg("Do you have a strange power saving mode enabled?\n"); if (unknown_nmi_panic || panic_on_unrecovered_nmi) - panic("NMI: Not continuing"); + nmi_panic(regs, "NMI: Not continuing"); pr_emerg("Dazed and confused, but trying to continue\n"); } @@ -348,8 +357,19 @@ static void default_do_nmi(struct pt_regs *regs) return; } - /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */ - raw_spin_lock(&nmi_reason_lock); + /* + * Non-CPU-specific NMI: NMI sources can be processed on any CPU. + * + * Another CPU may be processing panic routines while holding + * nmi_reason_lock. Check if the CPU issued the IPI for crash dumping, + * and if so, call its callback directly. If there is no CPU preparing + * crash dump, we simply loop here. + */ + while (!raw_spin_trylock(&nmi_reason_lock)) { + run_crash_ipi_callback(regs); + cpu_relax(); + } + reason = x86_platform.get_nmi_reason(); if (reason & NMI_REASON_MASK) { diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index c2130aef3f9d..f08ac28b8136 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -74,16 +74,6 @@ void __init default_banner(void) /* Undefined instruction for dealing with missing ops pointers. */ static const unsigned char ud2a[] = { 0x0f, 0x0b }; -unsigned paravirt_patch_nop(void) -{ - return 0; -} - -unsigned paravirt_patch_ignore(unsigned len) -{ - return len; -} - struct branch { unsigned char opcode; u32 delta; @@ -133,7 +123,6 @@ static void *get_call_destination(u8 type) .pv_time_ops = pv_time_ops, .pv_cpu_ops = pv_cpu_ops, .pv_irq_ops = pv_irq_ops, - .pv_apic_ops = pv_apic_ops, .pv_mmu_ops = pv_mmu_ops, #ifdef CONFIG_PARAVIRT_SPINLOCKS .pv_lock_ops = pv_lock_ops, @@ -152,8 +141,7 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, /* If there's no function, patch it with a ud2a (BUG) */ ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a)); else if (opfunc == _paravirt_nop) - /* If the operation is a nop, then nop the callsite */ - ret = paravirt_patch_nop(); + ret = 0; /* identity functions just return their single argument */ else if (opfunc == _paravirt_ident_32) @@ -162,10 +150,6 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, ret = paravirt_patch_ident_64(insnbuf, len); else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || -#ifdef CONFIG_X86_32 - type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) || -#endif - type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) || type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64)) /* If operation requires a jmp, then jmp */ ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len); @@ -220,8 +204,6 @@ static u64 native_steal_clock(int cpu) /* These are in entry.S */ extern void native_iret(void); -extern void native_irq_enable_sysexit(void); -extern void native_usergs_sysret32(void); extern void native_usergs_sysret64(void); static struct resource reserve_ioports = { @@ -379,13 +361,7 @@ __visible struct pv_cpu_ops pv_cpu_ops = { .load_sp0 = native_load_sp0, -#if defined(CONFIG_X86_32) - .irq_enable_sysexit = native_irq_enable_sysexit, -#endif #ifdef CONFIG_X86_64 -#ifdef CONFIG_IA32_EMULATION - .usergs_sysret32 = native_usergs_sysret32, -#endif .usergs_sysret64 = native_usergs_sysret64, #endif .iret = native_iret, @@ -403,12 +379,6 @@ NOKPROBE_SYMBOL(native_get_debugreg); NOKPROBE_SYMBOL(native_set_debugreg); NOKPROBE_SYMBOL(native_load_idt); -struct pv_apic_ops pv_apic_ops = { -#ifdef CONFIG_X86_LOCAL_APIC - .startup_ipi_hook = paravirt_nop, -#endif -}; - #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE) /* 32-bit pagetable entries */ #define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_32) @@ -444,9 +414,6 @@ struct pv_mmu_ops pv_mmu_ops = { .set_pmd = native_set_pmd, .set_pmd_at = native_set_pmd_at, .pte_update = paravirt_nop, - .pte_update_defer = paravirt_nop, - .pmd_update = paravirt_nop, - .pmd_update_defer = paravirt_nop, .ptep_modify_prot_start = __ptep_modify_prot_start, .ptep_modify_prot_commit = __ptep_modify_prot_commit, @@ -492,6 +459,5 @@ struct pv_mmu_ops pv_mmu_ops = { EXPORT_SYMBOL_GPL(pv_time_ops); EXPORT_SYMBOL (pv_cpu_ops); EXPORT_SYMBOL (pv_mmu_ops); -EXPORT_SYMBOL_GPL(pv_apic_ops); EXPORT_SYMBOL_GPL(pv_info); EXPORT_SYMBOL (pv_irq_ops); diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c index c89f50a76e97..158dc0650d5d 100644 --- a/arch/x86/kernel/paravirt_patch_32.c +++ b/arch/x86/kernel/paravirt_patch_32.c @@ -5,7 +5,6 @@ DEF_NATIVE(pv_irq_ops, irq_enable, "sti"); DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf"); DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax"); DEF_NATIVE(pv_cpu_ops, iret, "iret"); -DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit"); DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax"); DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3"); DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); @@ -46,7 +45,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, PATCH_SITE(pv_irq_ops, restore_fl); PATCH_SITE(pv_irq_ops, save_fl); PATCH_SITE(pv_cpu_ops, iret); - PATCH_SITE(pv_cpu_ops, irq_enable_sysexit); PATCH_SITE(pv_mmu_ops, read_cr2); PATCH_SITE(pv_mmu_ops, read_cr3); PATCH_SITE(pv_mmu_ops, write_cr3); diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index 8aa05583bc42..e70087a04cc8 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c @@ -13,9 +13,7 @@ DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)"); DEF_NATIVE(pv_cpu_ops, clts, "clts"); DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd"); -DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "swapgs; sti; sysexit"); DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq"); -DEF_NATIVE(pv_cpu_ops, usergs_sysret32, "swapgs; sysretl"); DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs"); DEF_NATIVE(, mov32, "mov %edi, %eax"); @@ -55,7 +53,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, PATCH_SITE(pv_irq_ops, save_fl); PATCH_SITE(pv_irq_ops, irq_enable); PATCH_SITE(pv_irq_ops, irq_disable); - PATCH_SITE(pv_cpu_ops, usergs_sysret32); PATCH_SITE(pv_cpu_ops, usergs_sysret64); PATCH_SITE(pv_cpu_ops, swapgs); PATCH_SITE(pv_mmu_ops, read_cr2); diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 0497f719977d..833b1d329c47 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -180,13 +180,13 @@ static void calioc2_dump_error_regs(struct iommu_table *tbl); static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl); static void get_tce_space_from_tar(void); -static struct cal_chipset_ops calgary_chip_ops = { +static const struct cal_chipset_ops calgary_chip_ops = { .handle_quirks = calgary_handle_quirks, .tce_cache_blast = calgary_tce_cache_blast, .dump_error_regs = calgary_dump_error_regs }; -static struct cal_chipset_ops calioc2_chip_ops = { +static const struct cal_chipset_ops calioc2_chip_ops = { .handle_quirks = calioc2_handle_quirks, .tce_cache_blast = calioc2_tce_cache_blast, .dump_error_regs = calioc2_dump_error_regs diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index adf0392d549a..7c577a178859 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -88,7 +88,7 @@ int __init pci_swiotlb_detect_4gb(void) { /* don't initialize swiotlb if iommu=off (no_iommu=1) */ #ifdef CONFIG_X86_64 - if (!no_iommu && max_pfn > MAX_DMA32_PFN) + if (!no_iommu && max_possible_pfn > MAX_DMA32_PFN) swiotlb = 1; #endif return swiotlb; diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index e835d263a33b..b9d99e0f82c4 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -125,7 +125,7 @@ void release_thread(struct task_struct *dead_task) if (dead_task->mm->context.ldt) { pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n", dead_task->comm, - dead_task->mm->context.ldt, + dead_task->mm->context.ldt->entries, dead_task->mm->context.ldt->size); BUG(); } diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 558f50edebca..32e9d9cbb884 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -124,21 +124,6 @@ const char *regs_query_register_name(unsigned int offset) return NULL; } -static const int arg_offs_table[] = { -#ifdef CONFIG_X86_32 - [0] = offsetof(struct pt_regs, ax), - [1] = offsetof(struct pt_regs, dx), - [2] = offsetof(struct pt_regs, cx) -#else /* CONFIG_X86_64 */ - [0] = offsetof(struct pt_regs, di), - [1] = offsetof(struct pt_regs, si), - [2] = offsetof(struct pt_regs, dx), - [3] = offsetof(struct pt_regs, cx), - [4] = offsetof(struct pt_regs, r8), - [5] = offsetof(struct pt_regs, r9) -#endif -}; - /* * does not yet catch signals sent when the child dies. * in exit.c or in signal.c. diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 2f355d229a58..99bfc025111d 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -140,27 +140,3 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock, set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); } - -#ifdef CONFIG_X86_64 -/* - * Initialize the generic pvclock vsyscall state. This will allocate - * a/some page(s) for the per-vcpu pvclock information, set up a - * fixmap mapping for the page(s) - */ - -int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i, - int size) -{ - int idx; - - WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE); - - for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) { - __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx, - __pa(i) + (idx*PAGE_SIZE), - PAGE_KERNEL_VVAR); - } - - return 0; -} -#endif diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 02693dd9a079..ab0adc0fa5db 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -182,6 +182,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "iMac9,1"), }, }, + { /* Handle problems with rebooting on the iMac10,1. */ + .callback = set_pci_reboot, + .ident = "Apple iMac10,1", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "iMac10,1"), + }, + }, /* ASRock */ { /* Handle problems with rebooting on ASRock Q1900DC-ITX */ @@ -718,6 +726,7 @@ static int crashing_cpu; static nmi_shootdown_cb shootdown_callback; static atomic_t waiting_for_crash_ipi; +static int crash_ipi_issued; static int crash_nmi_callback(unsigned int val, struct pt_regs *regs) { @@ -780,6 +789,9 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback) smp_send_nmi_allbutself(); + /* Kick CPUs looping in NMI context. */ + WRITE_ONCE(crash_ipi_issued, 1); + msecs = 1000; /* Wait at most a second for the other cpus to stop */ while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) { mdelay(1); @@ -788,9 +800,35 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback) /* Leave the nmi callback set */ } + +/* + * Check if the crash dumping IPI got issued and if so, call its callback + * directly. This function is used when we have already been in NMI handler. + * It doesn't return. + */ +void run_crash_ipi_callback(struct pt_regs *regs) +{ + if (crash_ipi_issued) + crash_nmi_callback(0, regs); +} + +/* Override the weak function in kernel/panic.c */ +void nmi_panic_self_stop(struct pt_regs *regs) +{ + while (1) { + /* If no CPU is preparing crash dump, we simply loop here. */ + run_crash_ipi_callback(regs); + cpu_relax(); + } +} + #else /* !CONFIG_SMP */ void nmi_shootdown_cpus(nmi_shootdown_cb callback) { /* No other CPUs to shoot down */ } + +void run_crash_ipi_callback(struct pt_regs *regs) +{ +} #endif diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d2bbe343fda7..d3d80e6d42a2 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1048,6 +1048,8 @@ void __init setup_arch(char **cmdline_p) if (mtrr_trim_uncached_memory(max_pfn)) max_pfn = e820_end_of_ram_pfn(); + max_possible_pfn = max_pfn; + #ifdef CONFIG_X86_32 /* max_low_pfn get updated here */ find_low_pfn_range(); diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 12c8286206ce..658777cf3851 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -125,12 +125,12 @@ static void native_smp_send_reschedule(int cpu) WARN_ON(1); return; } - apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR); + apic->send_IPI(cpu, RESCHEDULE_VECTOR); } void native_send_call_func_single_ipi(int cpu) { - apic->send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR); + apic->send_IPI(cpu, CALL_FUNCTION_SINGLE_VECTOR); } void native_send_call_func_ipi(const struct cpumask *mask) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index fbabe4fcc7fb..24d57f77b3c1 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -304,7 +304,7 @@ do { \ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) { - if (cpu_has_topoext) { + if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { int cpu1 = c->cpu_index, cpu2 = o->cpu_index; if (c->phys_proc_id == o->phys_proc_id && @@ -630,13 +630,6 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) num_starts = 0; /* - * Paravirt / VMI wants a startup IPI hook here to set up the - * target processor state. - */ - startup_ipi_hook(phys_apicid, (unsigned long) start_secondary, - stack_start); - - /* * Run STARTUP IPI loop. */ pr_debug("#startup loops: %d\n", num_starts); diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index c7c4d9c51e99..3d743da828d3 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1185,8 +1185,6 @@ void __init tsc_init(void) u64 lpj; int cpu; - x86_init.timers.tsc_pre_init(); - if (!cpu_has_tsc) { setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); return; diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S index 4cf401f581e7..07efb35ee4bc 100644 --- a/arch/x86/kernel/verify_cpu.S +++ b/arch/x86/kernel/verify_cpu.S @@ -48,31 +48,31 @@ verify_cpu: pushfl popl %eax cmpl %eax,%ebx - jz verify_cpu_no_longmode # cpu has no cpuid + jz .Lverify_cpu_no_longmode # cpu has no cpuid #endif movl $0x0,%eax # See if cpuid 1 is implemented cpuid cmpl $0x1,%eax - jb verify_cpu_no_longmode # no cpuid 1 + jb .Lverify_cpu_no_longmode # no cpuid 1 xor %di,%di cmpl $0x68747541,%ebx # AuthenticAMD - jnz verify_cpu_noamd + jnz .Lverify_cpu_noamd cmpl $0x69746e65,%edx - jnz verify_cpu_noamd + jnz .Lverify_cpu_noamd cmpl $0x444d4163,%ecx - jnz verify_cpu_noamd + jnz .Lverify_cpu_noamd mov $1,%di # cpu is from AMD - jmp verify_cpu_check + jmp .Lverify_cpu_check -verify_cpu_noamd: +.Lverify_cpu_noamd: cmpl $0x756e6547,%ebx # GenuineIntel? - jnz verify_cpu_check + jnz .Lverify_cpu_check cmpl $0x49656e69,%edx - jnz verify_cpu_check + jnz .Lverify_cpu_check cmpl $0x6c65746e,%ecx - jnz verify_cpu_check + jnz .Lverify_cpu_check # only call IA32_MISC_ENABLE when: # family > 6 || (family == 6 && model >= 0xd) @@ -83,59 +83,59 @@ verify_cpu_noamd: andl $0x0ff00f00, %eax # mask family and extended family shrl $8, %eax cmpl $6, %eax - ja verify_cpu_clear_xd # family > 6, ok - jb verify_cpu_check # family < 6, skip + ja .Lverify_cpu_clear_xd # family > 6, ok + jb .Lverify_cpu_check # family < 6, skip andl $0x000f00f0, %ecx # mask model and extended model shrl $4, %ecx cmpl $0xd, %ecx - jb verify_cpu_check # family == 6, model < 0xd, skip + jb .Lverify_cpu_check # family == 6, model < 0xd, skip -verify_cpu_clear_xd: +.Lverify_cpu_clear_xd: movl $MSR_IA32_MISC_ENABLE, %ecx rdmsr btrl $2, %edx # clear MSR_IA32_MISC_ENABLE_XD_DISABLE - jnc verify_cpu_check # only write MSR if bit was changed + jnc .Lverify_cpu_check # only write MSR if bit was changed wrmsr -verify_cpu_check: +.Lverify_cpu_check: movl $0x1,%eax # Does the cpu have what it takes cpuid andl $REQUIRED_MASK0,%edx xorl $REQUIRED_MASK0,%edx - jnz verify_cpu_no_longmode + jnz .Lverify_cpu_no_longmode movl $0x80000000,%eax # See if extended cpuid is implemented cpuid cmpl $0x80000001,%eax - jb verify_cpu_no_longmode # no extended cpuid + jb .Lverify_cpu_no_longmode # no extended cpuid movl $0x80000001,%eax # Does the cpu have what it takes cpuid andl $REQUIRED_MASK1,%edx xorl $REQUIRED_MASK1,%edx - jnz verify_cpu_no_longmode + jnz .Lverify_cpu_no_longmode -verify_cpu_sse_test: +.Lverify_cpu_sse_test: movl $1,%eax cpuid andl $SSE_MASK,%edx cmpl $SSE_MASK,%edx - je verify_cpu_sse_ok + je .Lverify_cpu_sse_ok test %di,%di - jz verify_cpu_no_longmode # only try to force SSE on AMD + jz .Lverify_cpu_no_longmode # only try to force SSE on AMD movl $MSR_K7_HWCR,%ecx rdmsr btr $15,%eax # enable SSE wrmsr xor %di,%di # don't loop - jmp verify_cpu_sse_test # try again + jmp .Lverify_cpu_sse_test # try again -verify_cpu_no_longmode: +.Lverify_cpu_no_longmode: popf # Restore caller passed flags movl $1,%eax ret -verify_cpu_sse_ok: +.Lverify_cpu_sse_ok: popf # Restore caller passed flags xorl %eax, %eax ret diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 524619351961..e574b8546518 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -175,7 +175,11 @@ static void mark_screen_rdonly(struct mm_struct *mm) if (pud_none_or_clear_bad(pud)) goto out; pmd = pmd_offset(pud, 0xA0000); - split_huge_page_pmd_mm(mm, 0xA0000, pmd); + + if (pmd_trans_huge(*pmd)) { + struct vm_area_struct *vma = find_vma(mm, 0xA0000); + split_huge_pmd(vma, pmd, 0xA0000); + } if (pmd_none_or_clear_bad(pmd)) goto out; pte = pte_offset_map_lock(mm, pmd, 0xA0000, &ptl); @@ -357,8 +361,10 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus) tss = &per_cpu(cpu_tss, get_cpu()); /* make room for real-mode segments */ tsk->thread.sp0 += 16; - if (cpu_has_sep) + + if (static_cpu_has_safe(X86_FEATURE_SEP)) tsk->thread.sysenter_cs = 0; + load_sp0(tss, &tsk->thread); put_cpu(); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 3839628d962e..dad5fe9633a3 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -68,7 +68,6 @@ struct x86_init_ops x86_init __initdata = { .timers = { .setup_percpu_clockev = setup_boot_APIC_clock, - .tsc_pre_init = x86_init_noop, .timer_init = hpet_time_init, .wallclock_init = x86_init_noop, }, diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 3f5c48ddba45..c8eda1498121 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -2,6 +2,7 @@ #define ARCH_X86_KVM_CPUID_H #include "x86.h" +#include <asm/cpu.h> int kvm_update_cpuid(struct kvm_vcpu *vcpu); struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, @@ -178,4 +179,37 @@ static inline bool guest_cpuid_has_nrips(struct kvm_vcpu *vcpu) } #undef BIT_NRIPS +static inline int guest_cpuid_family(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 0x1, 0); + if (!best) + return -1; + + return x86_family(best->eax); +} + +static inline int guest_cpuid_model(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 0x1, 0); + if (!best) + return -1; + + return x86_model(best->eax); +} + +static inline int guest_cpuid_stepping(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 0x1, 0); + if (!best) + return -1; + + return x86_stepping(best->eax); +} + #endif diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 62cf8c915e95..c58ba67175ac 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -23,13 +23,665 @@ #include "x86.h" #include "lapic.h" +#include "ioapic.h" #include "hyperv.h" #include <linux/kvm_host.h> +#include <linux/highmem.h> +#include <asm/apicdef.h> #include <trace/events/kvm.h> #include "trace.h" +static inline u64 synic_read_sint(struct kvm_vcpu_hv_synic *synic, int sint) +{ + return atomic64_read(&synic->sint[sint]); +} + +static inline int synic_get_sint_vector(u64 sint_value) +{ + if (sint_value & HV_SYNIC_SINT_MASKED) + return -1; + return sint_value & HV_SYNIC_SINT_VECTOR_MASK; +} + +static bool synic_has_vector_connected(struct kvm_vcpu_hv_synic *synic, + int vector) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(synic->sint); i++) { + if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector) + return true; + } + return false; +} + +static bool synic_has_vector_auto_eoi(struct kvm_vcpu_hv_synic *synic, + int vector) +{ + int i; + u64 sint_value; + + for (i = 0; i < ARRAY_SIZE(synic->sint); i++) { + sint_value = synic_read_sint(synic, i); + if (synic_get_sint_vector(sint_value) == vector && + sint_value & HV_SYNIC_SINT_AUTO_EOI) + return true; + } + return false; +} + +static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint, + u64 data, bool host) +{ + int vector; + + vector = data & HV_SYNIC_SINT_VECTOR_MASK; + if (vector < 16 && !host) + return 1; + /* + * Guest may configure multiple SINTs to use the same vector, so + * we maintain a bitmap of vectors handled by synic, and a + * bitmap of vectors with auto-eoi behavior. The bitmaps are + * updated here, and atomically queried on fast paths. + */ + + atomic64_set(&synic->sint[sint], data); + + if (synic_has_vector_connected(synic, vector)) + __set_bit(vector, synic->vec_bitmap); + else + __clear_bit(vector, synic->vec_bitmap); + + if (synic_has_vector_auto_eoi(synic, vector)) + __set_bit(vector, synic->auto_eoi_bitmap); + else + __clear_bit(vector, synic->auto_eoi_bitmap); + + /* Load SynIC vectors into EOI exit bitmap */ + kvm_make_request(KVM_REQ_SCAN_IOAPIC, synic_to_vcpu(synic)); + return 0; +} + +static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vcpu_id) +{ + struct kvm_vcpu *vcpu; + struct kvm_vcpu_hv_synic *synic; + + if (vcpu_id >= atomic_read(&kvm->online_vcpus)) + return NULL; + vcpu = kvm_get_vcpu(kvm, vcpu_id); + if (!vcpu) + return NULL; + synic = vcpu_to_synic(vcpu); + return (synic->active) ? synic : NULL; +} + +static void synic_clear_sint_msg_pending(struct kvm_vcpu_hv_synic *synic, + u32 sint) +{ + struct kvm_vcpu *vcpu = synic_to_vcpu(synic); + struct page *page; + gpa_t gpa; + struct hv_message *msg; + struct hv_message_page *msg_page; + + gpa = synic->msg_page & PAGE_MASK; + page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT); + if (is_error_page(page)) { + vcpu_err(vcpu, "Hyper-V SynIC can't get msg page, gpa 0x%llx\n", + gpa); + return; + } + msg_page = kmap_atomic(page); + + msg = &msg_page->sint_message[sint]; + msg->header.message_flags.msg_pending = 0; + + kunmap_atomic(msg_page); + kvm_release_page_dirty(page); + kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT); +} + +static void kvm_hv_notify_acked_sint(struct kvm_vcpu *vcpu, u32 sint) +{ + struct kvm *kvm = vcpu->kvm; + struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu); + struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); + struct kvm_vcpu_hv_stimer *stimer; + int gsi, idx, stimers_pending; + + trace_kvm_hv_notify_acked_sint(vcpu->vcpu_id, sint); + + if (synic->msg_page & HV_SYNIC_SIMP_ENABLE) + synic_clear_sint_msg_pending(synic, sint); + + /* Try to deliver pending Hyper-V SynIC timers messages */ + stimers_pending = 0; + for (idx = 0; idx < ARRAY_SIZE(hv_vcpu->stimer); idx++) { + stimer = &hv_vcpu->stimer[idx]; + if (stimer->msg_pending && + (stimer->config & HV_STIMER_ENABLE) && + HV_STIMER_SINT(stimer->config) == sint) { + set_bit(stimer->index, + hv_vcpu->stimer_pending_bitmap); + stimers_pending++; + } + } + if (stimers_pending) + kvm_make_request(KVM_REQ_HV_STIMER, vcpu); + + idx = srcu_read_lock(&kvm->irq_srcu); + gsi = atomic_read(&synic->sint_to_gsi[sint]); + if (gsi != -1) + kvm_notify_acked_gsi(kvm, gsi); + srcu_read_unlock(&kvm->irq_srcu, idx); +} + +static void synic_exit(struct kvm_vcpu_hv_synic *synic, u32 msr) +{ + struct kvm_vcpu *vcpu = synic_to_vcpu(synic); + struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv; + + hv_vcpu->exit.type = KVM_EXIT_HYPERV_SYNIC; + hv_vcpu->exit.u.synic.msr = msr; + hv_vcpu->exit.u.synic.control = synic->control; + hv_vcpu->exit.u.synic.evt_page = synic->evt_page; + hv_vcpu->exit.u.synic.msg_page = synic->msg_page; + + kvm_make_request(KVM_REQ_HV_EXIT, vcpu); +} + +static int synic_set_msr(struct kvm_vcpu_hv_synic *synic, + u32 msr, u64 data, bool host) +{ + struct kvm_vcpu *vcpu = synic_to_vcpu(synic); + int ret; + + if (!synic->active) + return 1; + + trace_kvm_hv_synic_set_msr(vcpu->vcpu_id, msr, data, host); + + ret = 0; + switch (msr) { + case HV_X64_MSR_SCONTROL: + synic->control = data; + if (!host) + synic_exit(synic, msr); + break; + case HV_X64_MSR_SVERSION: + if (!host) { + ret = 1; + break; + } + synic->version = data; + break; + case HV_X64_MSR_SIEFP: + if (data & HV_SYNIC_SIEFP_ENABLE) + if (kvm_clear_guest(vcpu->kvm, + data & PAGE_MASK, PAGE_SIZE)) { + ret = 1; + break; + } + synic->evt_page = data; + if (!host) + synic_exit(synic, msr); + break; + case HV_X64_MSR_SIMP: + if (data & HV_SYNIC_SIMP_ENABLE) + if (kvm_clear_guest(vcpu->kvm, + data & PAGE_MASK, PAGE_SIZE)) { + ret = 1; + break; + } + synic->msg_page = data; + if (!host) + synic_exit(synic, msr); + break; + case HV_X64_MSR_EOM: { + int i; + + for (i = 0; i < ARRAY_SIZE(synic->sint); i++) + kvm_hv_notify_acked_sint(vcpu, i); + break; + } + case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: + ret = synic_set_sint(synic, msr - HV_X64_MSR_SINT0, data, host); + break; + default: + ret = 1; + break; + } + return ret; +} + +static int synic_get_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 *pdata) +{ + int ret; + + if (!synic->active) + return 1; + + ret = 0; + switch (msr) { + case HV_X64_MSR_SCONTROL: + *pdata = synic->control; + break; + case HV_X64_MSR_SVERSION: + *pdata = synic->version; + break; + case HV_X64_MSR_SIEFP: + *pdata = synic->evt_page; + break; + case HV_X64_MSR_SIMP: + *pdata = synic->msg_page; + break; + case HV_X64_MSR_EOM: + *pdata = 0; + break; + case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: + *pdata = atomic64_read(&synic->sint[msr - HV_X64_MSR_SINT0]); + break; + default: + ret = 1; + break; + } + return ret; +} + +int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint) +{ + struct kvm_vcpu *vcpu = synic_to_vcpu(synic); + struct kvm_lapic_irq irq; + int ret, vector; + + if (sint >= ARRAY_SIZE(synic->sint)) + return -EINVAL; + + vector = synic_get_sint_vector(synic_read_sint(synic, sint)); + if (vector < 0) + return -ENOENT; + + memset(&irq, 0, sizeof(irq)); + irq.dest_id = kvm_apic_id(vcpu->arch.apic); + irq.dest_mode = APIC_DEST_PHYSICAL; + irq.delivery_mode = APIC_DM_FIXED; + irq.vector = vector; + irq.level = 1; + + ret = kvm_irq_delivery_to_apic(vcpu->kvm, NULL, &irq, NULL); + trace_kvm_hv_synic_set_irq(vcpu->vcpu_id, sint, irq.vector, ret); + return ret; +} + +int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint) +{ + struct kvm_vcpu_hv_synic *synic; + + synic = synic_get(kvm, vcpu_id); + if (!synic) + return -EINVAL; + + return synic_set_irq(synic, sint); +} + +void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector) +{ + struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu); + int i; + + trace_kvm_hv_synic_send_eoi(vcpu->vcpu_id, vector); + + for (i = 0; i < ARRAY_SIZE(synic->sint); i++) + if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector) + kvm_hv_notify_acked_sint(vcpu, i); +} + +static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vcpu_id, u32 sint, int gsi) +{ + struct kvm_vcpu_hv_synic *synic; + + synic = synic_get(kvm, vcpu_id); + if (!synic) + return -EINVAL; + + if (sint >= ARRAY_SIZE(synic->sint_to_gsi)) + return -EINVAL; + + atomic_set(&synic->sint_to_gsi[sint], gsi); + return 0; +} + +void kvm_hv_irq_routing_update(struct kvm *kvm) +{ + struct kvm_irq_routing_table *irq_rt; + struct kvm_kernel_irq_routing_entry *e; + u32 gsi; + + irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu, + lockdep_is_held(&kvm->irq_lock)); + + for (gsi = 0; gsi < irq_rt->nr_rt_entries; gsi++) { + hlist_for_each_entry(e, &irq_rt->map[gsi], link) { + if (e->type == KVM_IRQ_ROUTING_HV_SINT) + kvm_hv_set_sint_gsi(kvm, e->hv_sint.vcpu, + e->hv_sint.sint, gsi); + } + } +} + +static void synic_init(struct kvm_vcpu_hv_synic *synic) +{ + int i; + + memset(synic, 0, sizeof(*synic)); + synic->version = HV_SYNIC_VERSION_1; + for (i = 0; i < ARRAY_SIZE(synic->sint); i++) { + atomic64_set(&synic->sint[i], HV_SYNIC_SINT_MASKED); + atomic_set(&synic->sint_to_gsi[i], -1); + } +} + +static u64 get_time_ref_counter(struct kvm *kvm) +{ + return div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100); +} + +static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer, + bool vcpu_kick) +{ + struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); + + set_bit(stimer->index, + vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap); + kvm_make_request(KVM_REQ_HV_STIMER, vcpu); + if (vcpu_kick) + kvm_vcpu_kick(vcpu); +} + +static void stimer_cleanup(struct kvm_vcpu_hv_stimer *stimer) +{ + struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); + + trace_kvm_hv_stimer_cleanup(stimer_to_vcpu(stimer)->vcpu_id, + stimer->index); + + hrtimer_cancel(&stimer->timer); + clear_bit(stimer->index, + vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap); + stimer->msg_pending = false; + stimer->exp_time = 0; +} + +static enum hrtimer_restart stimer_timer_callback(struct hrtimer *timer) +{ + struct kvm_vcpu_hv_stimer *stimer; + + stimer = container_of(timer, struct kvm_vcpu_hv_stimer, timer); + trace_kvm_hv_stimer_callback(stimer_to_vcpu(stimer)->vcpu_id, + stimer->index); + stimer_mark_pending(stimer, true); + + return HRTIMER_NORESTART; +} + +/* + * stimer_start() assumptions: + * a) stimer->count is not equal to 0 + * b) stimer->config has HV_STIMER_ENABLE flag + */ +static int stimer_start(struct kvm_vcpu_hv_stimer *stimer) +{ + u64 time_now; + ktime_t ktime_now; + + time_now = get_time_ref_counter(stimer_to_vcpu(stimer)->kvm); + ktime_now = ktime_get(); + + if (stimer->config & HV_STIMER_PERIODIC) { + if (stimer->exp_time) { + if (time_now >= stimer->exp_time) { + u64 remainder; + + div64_u64_rem(time_now - stimer->exp_time, + stimer->count, &remainder); + stimer->exp_time = + time_now + (stimer->count - remainder); + } + } else + stimer->exp_time = time_now + stimer->count; + + trace_kvm_hv_stimer_start_periodic( + stimer_to_vcpu(stimer)->vcpu_id, + stimer->index, + time_now, stimer->exp_time); + + hrtimer_start(&stimer->timer, + ktime_add_ns(ktime_now, + 100 * (stimer->exp_time - time_now)), + HRTIMER_MODE_ABS); + return 0; + } + stimer->exp_time = stimer->count; + if (time_now >= stimer->count) { + /* + * Expire timer according to Hypervisor Top-Level Functional + * specification v4(15.3.1): + * "If a one shot is enabled and the specified count is in + * the past, it will expire immediately." + */ + stimer_mark_pending(stimer, false); + return 0; + } + + trace_kvm_hv_stimer_start_one_shot(stimer_to_vcpu(stimer)->vcpu_id, + stimer->index, + time_now, stimer->count); + + hrtimer_start(&stimer->timer, + ktime_add_ns(ktime_now, 100 * (stimer->count - time_now)), + HRTIMER_MODE_ABS); + return 0; +} + +static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config, + bool host) +{ + trace_kvm_hv_stimer_set_config(stimer_to_vcpu(stimer)->vcpu_id, + stimer->index, config, host); + + stimer_cleanup(stimer); + if ((stimer->config & HV_STIMER_ENABLE) && HV_STIMER_SINT(config) == 0) + config &= ~HV_STIMER_ENABLE; + stimer->config = config; + stimer_mark_pending(stimer, false); + return 0; +} + +static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count, + bool host) +{ + trace_kvm_hv_stimer_set_count(stimer_to_vcpu(stimer)->vcpu_id, + stimer->index, count, host); + + stimer_cleanup(stimer); + stimer->count = count; + if (stimer->count == 0) + stimer->config &= ~HV_STIMER_ENABLE; + else if (stimer->config & HV_STIMER_AUTOENABLE) + stimer->config |= HV_STIMER_ENABLE; + stimer_mark_pending(stimer, false); + return 0; +} + +static int stimer_get_config(struct kvm_vcpu_hv_stimer *stimer, u64 *pconfig) +{ + *pconfig = stimer->config; + return 0; +} + +static int stimer_get_count(struct kvm_vcpu_hv_stimer *stimer, u64 *pcount) +{ + *pcount = stimer->count; + return 0; +} + +static int synic_deliver_msg(struct kvm_vcpu_hv_synic *synic, u32 sint, + struct hv_message *src_msg) +{ + struct kvm_vcpu *vcpu = synic_to_vcpu(synic); + struct page *page; + gpa_t gpa; + struct hv_message *dst_msg; + int r; + struct hv_message_page *msg_page; + + if (!(synic->msg_page & HV_SYNIC_SIMP_ENABLE)) + return -ENOENT; + + gpa = synic->msg_page & PAGE_MASK; + page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT); + if (is_error_page(page)) + return -EFAULT; + + msg_page = kmap_atomic(page); + dst_msg = &msg_page->sint_message[sint]; + if (sync_cmpxchg(&dst_msg->header.message_type, HVMSG_NONE, + src_msg->header.message_type) != HVMSG_NONE) { + dst_msg->header.message_flags.msg_pending = 1; + r = -EAGAIN; + } else { + memcpy(&dst_msg->u.payload, &src_msg->u.payload, + src_msg->header.payload_size); + dst_msg->header.message_type = src_msg->header.message_type; + dst_msg->header.payload_size = src_msg->header.payload_size; + r = synic_set_irq(synic, sint); + if (r >= 1) + r = 0; + else if (r == 0) + r = -EFAULT; + } + kunmap_atomic(msg_page); + kvm_release_page_dirty(page); + kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT); + return r; +} + +static int stimer_send_msg(struct kvm_vcpu_hv_stimer *stimer) +{ + struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); + struct hv_message *msg = &stimer->msg; + struct hv_timer_message_payload *payload = + (struct hv_timer_message_payload *)&msg->u.payload; + + payload->expiration_time = stimer->exp_time; + payload->delivery_time = get_time_ref_counter(vcpu->kvm); + return synic_deliver_msg(vcpu_to_synic(vcpu), + HV_STIMER_SINT(stimer->config), msg); +} + +static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer) +{ + int r; + + stimer->msg_pending = true; + r = stimer_send_msg(stimer); + trace_kvm_hv_stimer_expiration(stimer_to_vcpu(stimer)->vcpu_id, + stimer->index, r); + if (!r) { + stimer->msg_pending = false; + if (!(stimer->config & HV_STIMER_PERIODIC)) + stimer->config &= ~HV_STIMER_ENABLE; + } +} + +void kvm_hv_process_stimers(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); + struct kvm_vcpu_hv_stimer *stimer; + u64 time_now, exp_time; + int i; + + for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) + if (test_and_clear_bit(i, hv_vcpu->stimer_pending_bitmap)) { + stimer = &hv_vcpu->stimer[i]; + if (stimer->config & HV_STIMER_ENABLE) { + exp_time = stimer->exp_time; + + if (exp_time) { + time_now = + get_time_ref_counter(vcpu->kvm); + if (time_now >= exp_time) + stimer_expiration(stimer); + } + + if ((stimer->config & HV_STIMER_ENABLE) && + stimer->count) + stimer_start(stimer); + else + stimer_cleanup(stimer); + } + } +} + +void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); + int i; + + for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) + stimer_cleanup(&hv_vcpu->stimer[i]); +} + +static void stimer_prepare_msg(struct kvm_vcpu_hv_stimer *stimer) +{ + struct hv_message *msg = &stimer->msg; + struct hv_timer_message_payload *payload = + (struct hv_timer_message_payload *)&msg->u.payload; + + memset(&msg->header, 0, sizeof(msg->header)); + msg->header.message_type = HVMSG_TIMER_EXPIRED; + msg->header.payload_size = sizeof(*payload); + + payload->timer_index = stimer->index; + payload->expiration_time = 0; + payload->delivery_time = 0; +} + +static void stimer_init(struct kvm_vcpu_hv_stimer *stimer, int timer_index) +{ + memset(stimer, 0, sizeof(*stimer)); + stimer->index = timer_index; + hrtimer_init(&stimer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + stimer->timer.function = stimer_timer_callback; + stimer_prepare_msg(stimer); +} + +void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); + int i; + + synic_init(&hv_vcpu->synic); + + bitmap_zero(hv_vcpu->stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT); + for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) + stimer_init(&hv_vcpu->stimer[i], i); +} + +int kvm_hv_activate_synic(struct kvm_vcpu *vcpu) +{ + /* + * Hyper-V SynIC auto EOI SINT's are + * not compatible with APICV, so deactivate APICV + */ + kvm_vcpu_deactivate_apicv(vcpu); + vcpu_to_synic(vcpu)->active = true; + return 0; +} + static bool kvm_hv_msr_partition_wide(u32 msr) { bool r = false; @@ -226,6 +878,31 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) return 1; hv->runtime_offset = data - current_task_runtime_100ns(); break; + case HV_X64_MSR_SCONTROL: + case HV_X64_MSR_SVERSION: + case HV_X64_MSR_SIEFP: + case HV_X64_MSR_SIMP: + case HV_X64_MSR_EOM: + case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: + return synic_set_msr(vcpu_to_synic(vcpu), msr, data, host); + case HV_X64_MSR_STIMER0_CONFIG: + case HV_X64_MSR_STIMER1_CONFIG: + case HV_X64_MSR_STIMER2_CONFIG: + case HV_X64_MSR_STIMER3_CONFIG: { + int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2; + + return stimer_set_config(vcpu_to_stimer(vcpu, timer_index), + data, host); + } + case HV_X64_MSR_STIMER0_COUNT: + case HV_X64_MSR_STIMER1_COUNT: + case HV_X64_MSR_STIMER2_COUNT: + case HV_X64_MSR_STIMER3_COUNT: { + int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2; + + return stimer_set_count(vcpu_to_stimer(vcpu, timer_index), + data, host); + } default: vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", msr, data); @@ -248,11 +925,9 @@ static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case HV_X64_MSR_HYPERCALL: data = hv->hv_hypercall; break; - case HV_X64_MSR_TIME_REF_COUNT: { - data = - div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100); + case HV_X64_MSR_TIME_REF_COUNT: + data = get_time_ref_counter(kvm); break; - } case HV_X64_MSR_REFERENCE_TSC: data = hv->hv_tsc_page; break; @@ -304,6 +979,31 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case HV_X64_MSR_VP_RUNTIME: data = current_task_runtime_100ns() + hv->runtime_offset; break; + case HV_X64_MSR_SCONTROL: + case HV_X64_MSR_SVERSION: + case HV_X64_MSR_SIEFP: + case HV_X64_MSR_SIMP: + case HV_X64_MSR_EOM: + case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: + return synic_get_msr(vcpu_to_synic(vcpu), msr, pdata); + case HV_X64_MSR_STIMER0_CONFIG: + case HV_X64_MSR_STIMER1_CONFIG: + case HV_X64_MSR_STIMER2_CONFIG: + case HV_X64_MSR_STIMER3_CONFIG: { + int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2; + + return stimer_get_config(vcpu_to_stimer(vcpu, timer_index), + pdata); + } + case HV_X64_MSR_STIMER0_COUNT: + case HV_X64_MSR_STIMER1_COUNT: + case HV_X64_MSR_STIMER2_COUNT: + case HV_X64_MSR_STIMER3_COUNT: { + int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2; + + return stimer_get_count(vcpu_to_stimer(vcpu, timer_index), + pdata); + } default: vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); return 1; diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index c7bce559f67b..60eccd4bd1d3 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -24,9 +24,64 @@ #ifndef __ARCH_X86_KVM_HYPERV_H__ #define __ARCH_X86_KVM_HYPERV_H__ +static inline struct kvm_vcpu_hv *vcpu_to_hv_vcpu(struct kvm_vcpu *vcpu) +{ + return &vcpu->arch.hyperv; +} + +static inline struct kvm_vcpu *hv_vcpu_to_vcpu(struct kvm_vcpu_hv *hv_vcpu) +{ + struct kvm_vcpu_arch *arch; + + arch = container_of(hv_vcpu, struct kvm_vcpu_arch, hyperv); + return container_of(arch, struct kvm_vcpu, arch); +} + +static inline struct kvm_vcpu_hv_synic *vcpu_to_synic(struct kvm_vcpu *vcpu) +{ + return &vcpu->arch.hyperv.synic; +} + +static inline struct kvm_vcpu *synic_to_vcpu(struct kvm_vcpu_hv_synic *synic) +{ + return hv_vcpu_to_vcpu(container_of(synic, struct kvm_vcpu_hv, synic)); +} + int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host); int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); + bool kvm_hv_hypercall_enabled(struct kvm *kvm); int kvm_hv_hypercall(struct kvm_vcpu *vcpu); +void kvm_hv_irq_routing_update(struct kvm *kvm); +int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint); +void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector); +int kvm_hv_activate_synic(struct kvm_vcpu *vcpu); + +void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu); +void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu); + +static inline struct kvm_vcpu_hv_stimer *vcpu_to_stimer(struct kvm_vcpu *vcpu, + int timer_index) +{ + return &vcpu_to_hv_vcpu(vcpu)->stimer[timer_index]; +} + +static inline struct kvm_vcpu *stimer_to_vcpu(struct kvm_vcpu_hv_stimer *stimer) +{ + struct kvm_vcpu_hv *hv_vcpu; + + hv_vcpu = container_of(stimer - stimer->index, struct kvm_vcpu_hv, + stimer[0]); + return hv_vcpu_to_vcpu(hv_vcpu); +} + +static inline bool kvm_hv_has_stimer_pending(struct kvm_vcpu *vcpu) +{ + return !bitmap_empty(vcpu->arch.hyperv.stimer_pending_bitmap, + HV_SYNIC_STIMER_COUNT); +} + +void kvm_hv_process_stimers(struct kvm_vcpu *vcpu); + #endif diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 88d0a92d3f94..1facfd60b04a 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -233,7 +233,7 @@ static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, unsigned long irr) } -void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) +void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors) { struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; union kvm_ioapic_redirect_entry *e; @@ -250,7 +250,7 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) (e->fields.trig_mode == IOAPIC_EDGE_TRIG && kvm_apic_pending_eoi(vcpu, e->fields.vector))) __set_bit(e->fields.vector, - (unsigned long *)eoi_exit_bitmap); + ioapic_handled_vectors); } } spin_unlock(&ioapic->lock); diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h index 084617d37c74..2d16dc251d81 100644 --- a/arch/x86/kvm/ioapic.h +++ b/arch/x86/kvm/ioapic.h @@ -121,7 +121,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, struct kvm_lapic_irq *irq, unsigned long *dest_map); int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); -void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); -void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); - +void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, + ulong *ioapic_handled_vectors); +void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, + ulong *ioapic_handled_vectors); #endif diff --git a/arch/x86/kvm/iommu.c b/arch/x86/kvm/iommu.c index 5c520ebf6343..a22a488b4622 100644 --- a/arch/x86/kvm/iommu.c +++ b/arch/x86/kvm/iommu.c @@ -43,11 +43,11 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm); static void kvm_iommu_put_pages(struct kvm *kvm, gfn_t base_gfn, unsigned long npages); -static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn, +static kvm_pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn, unsigned long npages) { gfn_t end_gfn; - pfn_t pfn; + kvm_pfn_t pfn; pfn = gfn_to_pfn_memslot(slot, gfn); end_gfn = gfn + npages; @@ -62,7 +62,8 @@ static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn, return pfn; } -static void kvm_unpin_pages(struct kvm *kvm, pfn_t pfn, unsigned long npages) +static void kvm_unpin_pages(struct kvm *kvm, kvm_pfn_t pfn, + unsigned long npages) { unsigned long i; @@ -73,7 +74,7 @@ static void kvm_unpin_pages(struct kvm *kvm, pfn_t pfn, unsigned long npages) int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) { gfn_t gfn, end_gfn; - pfn_t pfn; + kvm_pfn_t pfn; int r = 0; struct iommu_domain *domain = kvm->arch.iommu_domain; int flags; @@ -275,7 +276,7 @@ static void kvm_iommu_put_pages(struct kvm *kvm, { struct iommu_domain *domain; gfn_t end_gfn, gfn; - pfn_t pfn; + kvm_pfn_t pfn; u64 phys; domain = kvm->arch.iommu_domain; diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index 097060e33bd6..3982b479bb5f 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -76,7 +76,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) if (kvm_cpu_has_extint(v)) return 1; - if (kvm_vcpu_apic_vid_enabled(v)) + if (kvm_vcpu_apicv_active(v)) return 0; return kvm_apic_has_interrupt(v) != -1; /* LAPIC */ diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 84b96d319909..8fc89efb5250 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -33,6 +33,8 @@ #include "lapic.h" +#include "hyperv.h" + static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int irq_source_id, int level, bool line_status) @@ -219,6 +221,16 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, srcu_read_unlock(&kvm->irq_srcu, idx); } +static int kvm_hv_set_sint(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, int level, + bool line_status) +{ + if (!level) + return -1; + + return kvm_hv_synic_set_irq(kvm, e->hv_sint.vcpu, e->hv_sint.sint); +} + int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) { @@ -257,6 +269,11 @@ int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, e->msi.address_hi = ue->u.msi.address_hi; e->msi.data = ue->u.msi.data; break; + case KVM_IRQ_ROUTING_HV_SINT: + e->set = kvm_hv_set_sint; + e->hv_sint.vcpu = ue->u.hv_sint.vcpu; + e->hv_sint.sint = ue->u.hv_sint.sint; + break; default: goto out; } @@ -332,14 +349,15 @@ int kvm_setup_empty_irq_routing(struct kvm *kvm) return kvm_set_irq_routing(kvm, empty_routing, 0, 0); } -void kvm_arch_irq_routing_update(struct kvm *kvm) +void kvm_arch_post_irq_routing_update(struct kvm *kvm) { if (ioapic_in_kernel(kvm) || !irqchip_in_kernel(kvm)) return; kvm_make_scan_ioapic_request(kvm); } -void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) +void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, + ulong *ioapic_handled_vectors) { struct kvm *kvm = vcpu->kvm; struct kvm_kernel_irq_routing_entry *entry; @@ -369,9 +387,26 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) u32 vector = entry->msi.data & 0xff; __set_bit(vector, - (unsigned long *) eoi_exit_bitmap); + ioapic_handled_vectors); } } } srcu_read_unlock(&kvm->irq_srcu, idx); } + +int kvm_arch_set_irq(struct kvm_kernel_irq_routing_entry *irq, struct kvm *kvm, + int irq_source_id, int level, bool line_status) +{ + switch (irq->type) { + case KVM_IRQ_ROUTING_HV_SINT: + return kvm_hv_set_sint(irq, kvm, irq_source_id, level, + line_status); + default: + return -EWOULDBLOCK; + } +} + +void kvm_arch_irq_routing_update(struct kvm *kvm) +{ + kvm_hv_irq_routing_update(kvm); +} diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 4d30b865be30..36591faed13b 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -41,6 +41,7 @@ #include "trace.h" #include "x86.h" #include "cpuid.h" +#include "hyperv.h" #ifndef CONFIG_X86_64 #define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) @@ -128,11 +129,6 @@ static inline int apic_enabled(struct kvm_lapic *apic) (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) -static inline int kvm_apic_id(struct kvm_lapic *apic) -{ - return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; -} - /* The logical map is definitely wrong if we have multiple * modes at the same time. (Physical map is always right.) */ @@ -379,7 +375,8 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic) if (!apic->irr_pending) return -1; - kvm_x86_ops->sync_pir_to_irr(apic->vcpu); + if (apic->vcpu->arch.apicv_active) + kvm_x86_ops->sync_pir_to_irr(apic->vcpu); result = apic_search_irr(apic); ASSERT(result == -1 || result >= 16); @@ -392,7 +389,7 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) vcpu = apic->vcpu; - if (unlikely(kvm_vcpu_apic_vid_enabled(vcpu))) { + if (unlikely(vcpu->arch.apicv_active)) { /* try to update RVI */ apic_clear_vector(vec, apic->regs + APIC_IRR); kvm_make_request(KVM_REQ_EVENT, vcpu); @@ -418,7 +415,7 @@ static inline void apic_set_isr(int vec, struct kvm_lapic *apic) * because the processor can modify ISR under the hood. Instead * just set SVI. */ - if (unlikely(kvm_x86_ops->hwapic_isr_update)) + if (unlikely(vcpu->arch.apicv_active)) kvm_x86_ops->hwapic_isr_update(vcpu->kvm, vec); else { ++apic->isr_count; @@ -466,7 +463,7 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) * on the other hand isr_count and highest_isr_cache are unused * and must be left alone. */ - if (unlikely(kvm_x86_ops->hwapic_isr_update)) + if (unlikely(vcpu->arch.apicv_active)) kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic)); else { @@ -852,7 +849,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, apic_clear_vector(vector, apic->regs + APIC_TMR); } - if (kvm_x86_ops->deliver_posted_interrupt) + if (vcpu->arch.apicv_active) kvm_x86_ops->deliver_posted_interrupt(vcpu, vector); else { apic_set_irr(vector, apic); @@ -932,7 +929,7 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector) { - return test_bit(vector, (ulong *)apic->vcpu->arch.eoi_exit_bitmap); + return test_bit(vector, apic->vcpu->arch.ioapic_handled_vectors); } static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector) @@ -974,6 +971,9 @@ static int apic_set_eoi(struct kvm_lapic *apic) apic_clear_isr(vector, apic); apic_update_ppr(apic); + if (test_bit(vector, vcpu_to_synic(apic->vcpu)->vec_bitmap)) + kvm_hv_synic_send_eoi(apic->vcpu, vector); + kvm_ioapic_send_eoi(apic, vector); kvm_make_request(KVM_REQ_EVENT, apic->vcpu); return vector; @@ -1225,7 +1225,7 @@ static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu) int vec = reg & APIC_VECTOR_MASK; void *bitmap = apic->regs + APIC_ISR; - if (kvm_x86_ops->deliver_posted_interrupt) + if (vcpu->arch.apicv_active) bitmap = apic->regs + APIC_IRR; if (apic_test_vector(vec, bitmap)) @@ -1693,8 +1693,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) apic_set_reg(apic, APIC_ISR + 0x10 * i, 0); apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); } - apic->irr_pending = kvm_vcpu_apic_vid_enabled(vcpu); - apic->isr_count = kvm_x86_ops->hwapic_isr_update ? 1 : 0; + apic->irr_pending = vcpu->arch.apicv_active; + apic->isr_count = vcpu->arch.apicv_active ? 1 : 0; apic->highest_isr_cache = -1; update_divide_count(apic); atomic_set(&apic->lapic_timer.pending, 0); @@ -1883,6 +1883,12 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) apic_set_isr(vector, apic); apic_update_ppr(apic); apic_clear_irr(vector, apic); + + if (test_bit(vector, vcpu_to_synic(vcpu)->auto_eoi_bitmap)) { + apic_clear_isr(vector, apic); + apic_update_ppr(apic); + } + return vector; } @@ -1906,15 +1912,15 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, update_divide_count(apic); start_apic_timer(apic); apic->irr_pending = true; - apic->isr_count = kvm_x86_ops->hwapic_isr_update ? + apic->isr_count = vcpu->arch.apicv_active ? 1 : count_vectors(apic->regs + APIC_ISR); apic->highest_isr_cache = -1; - if (kvm_x86_ops->hwapic_irr_update) + if (vcpu->arch.apicv_active) { kvm_x86_ops->hwapic_irr_update(vcpu, apic_find_highest_irr(apic)); - if (unlikely(kvm_x86_ops->hwapic_isr_update)) kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic)); + } kvm_make_request(KVM_REQ_EVENT, vcpu); if (ioapic_in_kernel(vcpu->kvm)) kvm_rtc_eoi_tracking_restore_one(vcpu); diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index fde8e35d5850..41bdb35b4b67 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -143,9 +143,9 @@ static inline int apic_x2apic_mode(struct kvm_lapic *apic) return apic->vcpu->arch.apic_base & X2APIC_ENABLE; } -static inline bool kvm_vcpu_apic_vid_enabled(struct kvm_vcpu *vcpu) +static inline bool kvm_vcpu_apicv_active(struct kvm_vcpu *vcpu) { - return kvm_x86_ops->cpu_uses_apicv(vcpu); + return vcpu->arch.apic && vcpu->arch.apicv_active; } static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu) @@ -164,6 +164,11 @@ static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu) return kvm_vcpu_has_lapic(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); } +static inline int kvm_apic_id(struct kvm_lapic *apic) +{ + return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; +} + bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); void wait_lapic_expire(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index e7c2c1428a69..95a955de5964 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -259,7 +259,7 @@ static unsigned get_mmio_spte_access(u64 spte) } static bool set_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn, - pfn_t pfn, unsigned access) + kvm_pfn_t pfn, unsigned access) { if (unlikely(is_noslot_pfn(pfn))) { mark_mmio_spte(vcpu, sptep, gfn, access); @@ -311,11 +311,6 @@ static int is_large_pte(u64 pte) return pte & PT_PAGE_SIZE_MASK; } -static int is_rmap_spte(u64 pte) -{ - return is_shadow_present_pte(pte); -} - static int is_last_spte(u64 pte, int level) { if (level == PT_PAGE_TABLE_LEVEL) @@ -325,7 +320,7 @@ static int is_last_spte(u64 pte, int level) return 0; } -static pfn_t spte_to_pfn(u64 pte) +static kvm_pfn_t spte_to_pfn(u64 pte) { return (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; } @@ -540,7 +535,7 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte) u64 old_spte = *sptep; bool ret = false; - WARN_ON(!is_rmap_spte(new_spte)); + WARN_ON(!is_shadow_present_pte(new_spte)); if (!is_shadow_present_pte(old_spte)) { mmu_spte_set(sptep, new_spte); @@ -587,7 +582,7 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte) */ static int mmu_spte_clear_track_bits(u64 *sptep) { - pfn_t pfn; + kvm_pfn_t pfn; u64 old_spte = *sptep; if (!spte_has_volatile_bits(old_spte)) @@ -595,7 +590,7 @@ static int mmu_spte_clear_track_bits(u64 *sptep) else old_spte = __update_clear_spte_slow(sptep, 0ull); - if (!is_rmap_spte(old_spte)) + if (!is_shadow_present_pte(old_spte)) return 0; pfn = spte_to_pfn(old_spte); @@ -909,36 +904,35 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn, } /* - * Pte mapping structures: + * About rmap_head encoding: * - * If pte_list bit zero is zero, then pte_list point to the spte. - * - * If pte_list bit zero is one, (then pte_list & ~1) points to a struct + * If the bit zero of rmap_head->val is clear, then it points to the only spte + * in this rmap chain. Otherwise, (rmap_head->val & ~1) points to a struct * pte_list_desc containing more mappings. - * - * Returns the number of pte entries before the spte was added or zero if - * the spte was not added. - * + */ + +/* + * Returns the number of pointers in the rmap chain, not counting the new one. */ static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte, - unsigned long *pte_list) + struct kvm_rmap_head *rmap_head) { struct pte_list_desc *desc; int i, count = 0; - if (!*pte_list) { + if (!rmap_head->val) { rmap_printk("pte_list_add: %p %llx 0->1\n", spte, *spte); - *pte_list = (unsigned long)spte; - } else if (!(*pte_list & 1)) { + rmap_head->val = (unsigned long)spte; + } else if (!(rmap_head->val & 1)) { rmap_printk("pte_list_add: %p %llx 1->many\n", spte, *spte); desc = mmu_alloc_pte_list_desc(vcpu); - desc->sptes[0] = (u64 *)*pte_list; + desc->sptes[0] = (u64 *)rmap_head->val; desc->sptes[1] = spte; - *pte_list = (unsigned long)desc | 1; + rmap_head->val = (unsigned long)desc | 1; ++count; } else { rmap_printk("pte_list_add: %p %llx many->many\n", spte, *spte); - desc = (struct pte_list_desc *)(*pte_list & ~1ul); + desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); while (desc->sptes[PTE_LIST_EXT-1] && desc->more) { desc = desc->more; count += PTE_LIST_EXT; @@ -955,8 +949,9 @@ static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte, } static void -pte_list_desc_remove_entry(unsigned long *pte_list, struct pte_list_desc *desc, - int i, struct pte_list_desc *prev_desc) +pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head, + struct pte_list_desc *desc, int i, + struct pte_list_desc *prev_desc) { int j; @@ -967,43 +962,43 @@ pte_list_desc_remove_entry(unsigned long *pte_list, struct pte_list_desc *desc, if (j != 0) return; if (!prev_desc && !desc->more) - *pte_list = (unsigned long)desc->sptes[0]; + rmap_head->val = (unsigned long)desc->sptes[0]; else if (prev_desc) prev_desc->more = desc->more; else - *pte_list = (unsigned long)desc->more | 1; + rmap_head->val = (unsigned long)desc->more | 1; mmu_free_pte_list_desc(desc); } -static void pte_list_remove(u64 *spte, unsigned long *pte_list) +static void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head) { struct pte_list_desc *desc; struct pte_list_desc *prev_desc; int i; - if (!*pte_list) { + if (!rmap_head->val) { printk(KERN_ERR "pte_list_remove: %p 0->BUG\n", spte); BUG(); - } else if (!(*pte_list & 1)) { + } else if (!(rmap_head->val & 1)) { rmap_printk("pte_list_remove: %p 1->0\n", spte); - if ((u64 *)*pte_list != spte) { + if ((u64 *)rmap_head->val != spte) { printk(KERN_ERR "pte_list_remove: %p 1->BUG\n", spte); BUG(); } - *pte_list = 0; + rmap_head->val = 0; } else { rmap_printk("pte_list_remove: %p many->many\n", spte); - desc = (struct pte_list_desc *)(*pte_list & ~1ul); + desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); prev_desc = NULL; while (desc) { - for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i) + for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i) { if (desc->sptes[i] == spte) { - pte_list_desc_remove_entry(pte_list, - desc, i, - prev_desc); + pte_list_desc_remove_entry(rmap_head, + desc, i, prev_desc); return; } + } prev_desc = desc; desc = desc->more; } @@ -1012,28 +1007,8 @@ static void pte_list_remove(u64 *spte, unsigned long *pte_list) } } -typedef void (*pte_list_walk_fn) (u64 *spte); -static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn) -{ - struct pte_list_desc *desc; - int i; - - if (!*pte_list) - return; - - if (!(*pte_list & 1)) - return fn((u64 *)*pte_list); - - desc = (struct pte_list_desc *)(*pte_list & ~1ul); - while (desc) { - for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i) - fn(desc->sptes[i]); - desc = desc->more; - } -} - -static unsigned long *__gfn_to_rmap(gfn_t gfn, int level, - struct kvm_memory_slot *slot) +static struct kvm_rmap_head *__gfn_to_rmap(gfn_t gfn, int level, + struct kvm_memory_slot *slot) { unsigned long idx; @@ -1041,10 +1016,8 @@ static unsigned long *__gfn_to_rmap(gfn_t gfn, int level, return &slot->arch.rmap[level - PT_PAGE_TABLE_LEVEL][idx]; } -/* - * Take gfn and return the reverse mapping to it. - */ -static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, struct kvm_mmu_page *sp) +static struct kvm_rmap_head *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, + struct kvm_mmu_page *sp) { struct kvm_memslots *slots; struct kvm_memory_slot *slot; @@ -1065,24 +1038,24 @@ static bool rmap_can_add(struct kvm_vcpu *vcpu) static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) { struct kvm_mmu_page *sp; - unsigned long *rmapp; + struct kvm_rmap_head *rmap_head; sp = page_header(__pa(spte)); kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn); - rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp); - return pte_list_add(vcpu, spte, rmapp); + rmap_head = gfn_to_rmap(vcpu->kvm, gfn, sp); + return pte_list_add(vcpu, spte, rmap_head); } static void rmap_remove(struct kvm *kvm, u64 *spte) { struct kvm_mmu_page *sp; gfn_t gfn; - unsigned long *rmapp; + struct kvm_rmap_head *rmap_head; sp = page_header(__pa(spte)); gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt); - rmapp = gfn_to_rmap(kvm, gfn, sp); - pte_list_remove(spte, rmapp); + rmap_head = gfn_to_rmap(kvm, gfn, sp); + pte_list_remove(spte, rmap_head); } /* @@ -1102,19 +1075,26 @@ struct rmap_iterator { * * Returns sptep if found, NULL otherwise. */ -static u64 *rmap_get_first(unsigned long rmap, struct rmap_iterator *iter) +static u64 *rmap_get_first(struct kvm_rmap_head *rmap_head, + struct rmap_iterator *iter) { - if (!rmap) + u64 *sptep; + + if (!rmap_head->val) return NULL; - if (!(rmap & 1)) { + if (!(rmap_head->val & 1)) { iter->desc = NULL; - return (u64 *)rmap; + sptep = (u64 *)rmap_head->val; + goto out; } - iter->desc = (struct pte_list_desc *)(rmap & ~1ul); + iter->desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); iter->pos = 0; - return iter->desc->sptes[iter->pos]; + sptep = iter->desc->sptes[iter->pos]; +out: + BUG_ON(!is_shadow_present_pte(*sptep)); + return sptep; } /* @@ -1124,14 +1104,14 @@ static u64 *rmap_get_first(unsigned long rmap, struct rmap_iterator *iter) */ static u64 *rmap_get_next(struct rmap_iterator *iter) { + u64 *sptep; + if (iter->desc) { if (iter->pos < PTE_LIST_EXT - 1) { - u64 *sptep; - ++iter->pos; sptep = iter->desc->sptes[iter->pos]; if (sptep) - return sptep; + goto out; } iter->desc = iter->desc->more; @@ -1139,17 +1119,20 @@ static u64 *rmap_get_next(struct rmap_iterator *iter) if (iter->desc) { iter->pos = 0; /* desc->sptes[0] cannot be NULL */ - return iter->desc->sptes[iter->pos]; + sptep = iter->desc->sptes[iter->pos]; + goto out; } } return NULL; +out: + BUG_ON(!is_shadow_present_pte(*sptep)); + return sptep; } -#define for_each_rmap_spte(_rmap_, _iter_, _spte_) \ - for (_spte_ = rmap_get_first(*_rmap_, _iter_); \ - _spte_ && ({BUG_ON(!is_shadow_present_pte(*_spte_)); 1;}); \ - _spte_ = rmap_get_next(_iter_)) +#define for_each_rmap_spte(_rmap_head_, _iter_, _spte_) \ + for (_spte_ = rmap_get_first(_rmap_head_, _iter_); \ + _spte_; _spte_ = rmap_get_next(_iter_)) static void drop_spte(struct kvm *kvm, u64 *sptep) { @@ -1207,14 +1190,15 @@ static bool spte_write_protect(struct kvm *kvm, u64 *sptep, bool pt_protect) return mmu_spte_update(sptep, spte); } -static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, +static bool __rmap_write_protect(struct kvm *kvm, + struct kvm_rmap_head *rmap_head, bool pt_protect) { u64 *sptep; struct rmap_iterator iter; bool flush = false; - for_each_rmap_spte(rmapp, &iter, sptep) + for_each_rmap_spte(rmap_head, &iter, sptep) flush |= spte_write_protect(kvm, sptep, pt_protect); return flush; @@ -1231,13 +1215,13 @@ static bool spte_clear_dirty(struct kvm *kvm, u64 *sptep) return mmu_spte_update(sptep, spte); } -static bool __rmap_clear_dirty(struct kvm *kvm, unsigned long *rmapp) +static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head) { u64 *sptep; struct rmap_iterator iter; bool flush = false; - for_each_rmap_spte(rmapp, &iter, sptep) + for_each_rmap_spte(rmap_head, &iter, sptep) flush |= spte_clear_dirty(kvm, sptep); return flush; @@ -1254,13 +1238,13 @@ static bool spte_set_dirty(struct kvm *kvm, u64 *sptep) return mmu_spte_update(sptep, spte); } -static bool __rmap_set_dirty(struct kvm *kvm, unsigned long *rmapp) +static bool __rmap_set_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head) { u64 *sptep; struct rmap_iterator iter; bool flush = false; - for_each_rmap_spte(rmapp, &iter, sptep) + for_each_rmap_spte(rmap_head, &iter, sptep) flush |= spte_set_dirty(kvm, sptep); return flush; @@ -1280,12 +1264,12 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn_offset, unsigned long mask) { - unsigned long *rmapp; + struct kvm_rmap_head *rmap_head; while (mask) { - rmapp = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), - PT_PAGE_TABLE_LEVEL, slot); - __rmap_write_protect(kvm, rmapp, false); + rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), + PT_PAGE_TABLE_LEVEL, slot); + __rmap_write_protect(kvm, rmap_head, false); /* clear the first set bit */ mask &= mask - 1; @@ -1305,12 +1289,12 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn_offset, unsigned long mask) { - unsigned long *rmapp; + struct kvm_rmap_head *rmap_head; while (mask) { - rmapp = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), - PT_PAGE_TABLE_LEVEL, slot); - __rmap_clear_dirty(kvm, rmapp); + rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), + PT_PAGE_TABLE_LEVEL, slot); + __rmap_clear_dirty(kvm, rmap_head); /* clear the first set bit */ mask &= mask - 1; @@ -1342,28 +1326,27 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, static bool rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) { struct kvm_memory_slot *slot; - unsigned long *rmapp; + struct kvm_rmap_head *rmap_head; int i; bool write_protected = false; slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) { - rmapp = __gfn_to_rmap(gfn, i, slot); - write_protected |= __rmap_write_protect(vcpu->kvm, rmapp, true); + rmap_head = __gfn_to_rmap(gfn, i, slot); + write_protected |= __rmap_write_protect(vcpu->kvm, rmap_head, true); } return write_protected; } -static bool kvm_zap_rmapp(struct kvm *kvm, unsigned long *rmapp) +static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head) { u64 *sptep; struct rmap_iterator iter; bool flush = false; - while ((sptep = rmap_get_first(*rmapp, &iter))) { - BUG_ON(!(*sptep & PT_PRESENT_MASK)); + while ((sptep = rmap_get_first(rmap_head, &iter))) { rmap_printk("%s: spte %p %llx.\n", __func__, sptep, *sptep); drop_spte(kvm, sptep); @@ -1373,14 +1356,14 @@ static bool kvm_zap_rmapp(struct kvm *kvm, unsigned long *rmapp) return flush; } -static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, +static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, struct kvm_memory_slot *slot, gfn_t gfn, int level, unsigned long data) { - return kvm_zap_rmapp(kvm, rmapp); + return kvm_zap_rmapp(kvm, rmap_head); } -static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, +static int kvm_set_pte_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, struct kvm_memory_slot *slot, gfn_t gfn, int level, unsigned long data) { @@ -1389,13 +1372,13 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, int need_flush = 0; u64 new_spte; pte_t *ptep = (pte_t *)data; - pfn_t new_pfn; + kvm_pfn_t new_pfn; WARN_ON(pte_huge(*ptep)); new_pfn = pte_pfn(*ptep); restart: - for_each_rmap_spte(rmapp, &iter, sptep) { + for_each_rmap_spte(rmap_head, &iter, sptep) { rmap_printk("kvm_set_pte_rmapp: spte %p %llx gfn %llx (%d)\n", sptep, *sptep, gfn, level); @@ -1433,11 +1416,11 @@ struct slot_rmap_walk_iterator { /* output fields. */ gfn_t gfn; - unsigned long *rmap; + struct kvm_rmap_head *rmap; int level; /* private field. */ - unsigned long *end_rmap; + struct kvm_rmap_head *end_rmap; }; static void @@ -1496,7 +1479,7 @@ static int kvm_handle_hva_range(struct kvm *kvm, unsigned long end, unsigned long data, int (*handler)(struct kvm *kvm, - unsigned long *rmapp, + struct kvm_rmap_head *rmap_head, struct kvm_memory_slot *slot, gfn_t gfn, int level, @@ -1540,7 +1523,8 @@ static int kvm_handle_hva_range(struct kvm *kvm, static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, unsigned long data, - int (*handler)(struct kvm *kvm, unsigned long *rmapp, + int (*handler)(struct kvm *kvm, + struct kvm_rmap_head *rmap_head, struct kvm_memory_slot *slot, gfn_t gfn, int level, unsigned long data)) @@ -1563,7 +1547,7 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp); } -static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, +static int kvm_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, struct kvm_memory_slot *slot, gfn_t gfn, int level, unsigned long data) { @@ -1573,18 +1557,19 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, BUG_ON(!shadow_accessed_mask); - for_each_rmap_spte(rmapp, &iter, sptep) + for_each_rmap_spte(rmap_head, &iter, sptep) { if (*sptep & shadow_accessed_mask) { young = 1; clear_bit((ffs(shadow_accessed_mask) - 1), (unsigned long *)sptep); } + } trace_kvm_age_page(gfn, level, slot, young); return young; } -static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, +static int kvm_test_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, struct kvm_memory_slot *slot, gfn_t gfn, int level, unsigned long data) { @@ -1600,11 +1585,12 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, if (!shadow_accessed_mask) goto out; - for_each_rmap_spte(rmapp, &iter, sptep) + for_each_rmap_spte(rmap_head, &iter, sptep) { if (*sptep & shadow_accessed_mask) { young = 1; break; } + } out: return young; } @@ -1613,14 +1599,14 @@ out: static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) { - unsigned long *rmapp; + struct kvm_rmap_head *rmap_head; struct kvm_mmu_page *sp; sp = page_header(__pa(spte)); - rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp); + rmap_head = gfn_to_rmap(vcpu->kvm, gfn, sp); - kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, gfn, sp->role.level, 0); + kvm_unmap_rmapp(vcpu->kvm, rmap_head, NULL, gfn, sp->role.level, 0); kvm_flush_remote_tlbs(vcpu->kvm); } @@ -1720,8 +1706,7 @@ static void drop_parent_pte(struct kvm_mmu_page *sp, mmu_spte_clear_no_track(parent_pte); } -static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, - u64 *parent_pte, int direct) +static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, int direct) { struct kvm_mmu_page *sp; @@ -1737,8 +1722,6 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, * this feature. See the comments in kvm_zap_obsolete_pages(). */ list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); - sp->parent_ptes = 0; - mmu_page_add_parent_pte(vcpu, sp, parent_pte); kvm_mod_used_mmu_pages(vcpu->kvm, +1); return sp; } @@ -1746,7 +1729,12 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, static void mark_unsync(u64 *spte); static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp) { - pte_list_walk(&sp->parent_ptes, mark_unsync); + u64 *sptep; + struct rmap_iterator iter; + + for_each_rmap_spte(&sp->parent_ptes, &iter, sptep) { + mark_unsync(sptep); + } } static void mark_unsync(u64 *spte) @@ -1806,6 +1794,13 @@ static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp, return (pvec->nr == KVM_PAGE_ARRAY_NR); } +static inline void clear_unsync_child_bit(struct kvm_mmu_page *sp, int idx) +{ + --sp->unsync_children; + WARN_ON((int)sp->unsync_children < 0); + __clear_bit(idx, sp->unsync_child_bitmap); +} + static int __mmu_unsync_walk(struct kvm_mmu_page *sp, struct kvm_mmu_pages *pvec) { @@ -1815,8 +1810,10 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp, struct kvm_mmu_page *child; u64 ent = sp->spt[i]; - if (!is_shadow_present_pte(ent) || is_large_pte(ent)) - goto clear_child_bitmap; + if (!is_shadow_present_pte(ent) || is_large_pte(ent)) { + clear_unsync_child_bit(sp, i); + continue; + } child = page_header(ent & PT64_BASE_ADDR_MASK); @@ -1825,28 +1822,21 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp, return -ENOSPC; ret = __mmu_unsync_walk(child, pvec); - if (!ret) - goto clear_child_bitmap; - else if (ret > 0) + if (!ret) { + clear_unsync_child_bit(sp, i); + continue; + } else if (ret > 0) { nr_unsync_leaf += ret; - else + } else return ret; } else if (child->unsync) { nr_unsync_leaf++; if (mmu_pages_add(pvec, child, i)) return -ENOSPC; } else - goto clear_child_bitmap; - - continue; - -clear_child_bitmap: - __clear_bit(i, sp->unsync_child_bitmap); - sp->unsync_children--; - WARN_ON((int)sp->unsync_children < 0); + clear_unsync_child_bit(sp, i); } - return nr_unsync_leaf; } @@ -2009,9 +1999,7 @@ static void mmu_pages_clear_parents(struct mmu_page_path *parents) if (!sp) return; - --sp->unsync_children; - WARN_ON((int)sp->unsync_children < 0); - __clear_bit(idx, sp->unsync_child_bitmap); + clear_unsync_child_bit(sp, idx); level++; } while (level < PT64_ROOT_LEVEL-1 && !sp->unsync_children); } @@ -2053,14 +2041,6 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu, } } -static void init_shadow_page_table(struct kvm_mmu_page *sp) -{ - int i; - - for (i = 0; i < PT64_ENT_PER_PAGE; ++i) - sp->spt[i] = 0ull; -} - static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp) { sp->write_flooding_count = 0; @@ -2083,8 +2063,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, gva_t gaddr, unsigned level, int direct, - unsigned access, - u64 *parent_pte) + unsigned access) { union kvm_mmu_page_role role; unsigned quadrant; @@ -2116,21 +2095,18 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, if (sp->unsync && kvm_sync_page_transient(vcpu, sp)) break; - mmu_page_add_parent_pte(vcpu, sp, parent_pte); - if (sp->unsync_children) { + if (sp->unsync_children) kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); - kvm_mmu_mark_parents_unsync(sp); - } else if (sp->unsync) - kvm_mmu_mark_parents_unsync(sp); __clear_sp_write_flooding_count(sp); trace_kvm_mmu_get_page(sp, false); return sp; } + ++vcpu->kvm->stat.mmu_cache_miss; - sp = kvm_mmu_alloc_page(vcpu, parent_pte, direct); - if (!sp) - return sp; + + sp = kvm_mmu_alloc_page(vcpu, direct); + sp->gfn = gfn; sp->role = role; hlist_add_head(&sp->hash_link, @@ -2144,7 +2120,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, account_shadowed(vcpu->kvm, sp); } sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen; - init_shadow_page_table(sp); + clear_page(sp->spt); trace_kvm_mmu_get_page(sp, true); return sp; } @@ -2198,7 +2174,8 @@ static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator) return __shadow_walk_next(iterator, *iterator->sptep); } -static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp, bool accessed) +static void link_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep, + struct kvm_mmu_page *sp) { u64 spte; @@ -2206,12 +2183,14 @@ static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp, bool accessed) VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK); spte = __pa(sp->spt) | PT_PRESENT_MASK | PT_WRITABLE_MASK | - shadow_user_mask | shadow_x_mask; - - if (accessed) - spte |= shadow_accessed_mask; + shadow_user_mask | shadow_x_mask | shadow_accessed_mask; mmu_spte_set(sptep, spte); + + mmu_page_add_parent_pte(vcpu, sp, sptep); + + if (sp->unsync_children || sp->unsync) + mark_unsync(sptep); } static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep, @@ -2270,17 +2249,12 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm, mmu_page_zap_pte(kvm, sp, sp->spt + i); } -static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte) -{ - mmu_page_remove_parent_pte(sp, parent_pte); -} - static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp) { u64 *sptep; struct rmap_iterator iter; - while ((sptep = rmap_get_first(sp->parent_ptes, &iter))) + while ((sptep = rmap_get_first(&sp->parent_ptes, &iter))) drop_parent_pte(sp, sptep); } @@ -2476,7 +2450,7 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, return 0; } -static bool kvm_is_mmio_pfn(pfn_t pfn) +static bool kvm_is_mmio_pfn(kvm_pfn_t pfn) { if (pfn_valid(pfn)) return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn)); @@ -2486,7 +2460,7 @@ static bool kvm_is_mmio_pfn(pfn_t pfn) static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, int level, - gfn_t gfn, pfn_t pfn, bool speculative, + gfn_t gfn, kvm_pfn_t pfn, bool speculative, bool can_unsync, bool host_writable) { u64 spte; @@ -2564,18 +2538,18 @@ done: return ret; } -static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, - unsigned pte_access, int write_fault, int *emulate, - int level, gfn_t gfn, pfn_t pfn, bool speculative, - bool host_writable) +static bool mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, + int write_fault, int level, gfn_t gfn, kvm_pfn_t pfn, + bool speculative, bool host_writable) { int was_rmapped = 0; int rmap_count; + bool emulate = false; pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__, *sptep, write_fault, gfn); - if (is_rmap_spte(*sptep)) { + if (is_shadow_present_pte(*sptep)) { /* * If we overwrite a PTE page pointer with a 2MB PMD, unlink * the parent of the now unreachable PTE. @@ -2600,12 +2574,12 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, if (set_spte(vcpu, sptep, pte_access, level, gfn, pfn, speculative, true, host_writable)) { if (write_fault) - *emulate = 1; + emulate = true; kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); } - if (unlikely(is_mmio_spte(*sptep) && emulate)) - *emulate = 1; + if (unlikely(is_mmio_spte(*sptep))) + emulate = true; pgprintk("%s: setting spte %llx\n", __func__, *sptep); pgprintk("instantiating %s PTE (%s) at %llx (%llx) addr %p\n", @@ -2624,9 +2598,11 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, } kvm_release_pfn_clean(pfn); + + return emulate; } -static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, +static kvm_pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool no_dirty_log) { struct kvm_memory_slot *slot; @@ -2658,9 +2634,8 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, return -1; for (i = 0; i < ret; i++, gfn++, start++) - mmu_set_spte(vcpu, start, access, 0, NULL, - sp->role.level, gfn, page_to_pfn(pages[i]), - true, true); + mmu_set_spte(vcpu, start, access, 0, sp->role.level, gfn, + page_to_pfn(pages[i]), true, true); return 0; } @@ -2708,9 +2683,8 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep) __direct_pte_prefetch(vcpu, sp, sptep); } -static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, - int map_writable, int level, gfn_t gfn, pfn_t pfn, - bool prefault) +static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable, + int level, gfn_t gfn, kvm_pfn_t pfn, bool prefault) { struct kvm_shadow_walk_iterator iterator; struct kvm_mmu_page *sp; @@ -2722,9 +2696,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { if (iterator.level == level) { - mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, - write, &emulate, level, gfn, pfn, - prefault, map_writable); + emulate = mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, + write, level, gfn, pfn, prefault, + map_writable); direct_pte_prefetch(vcpu, iterator.sptep); ++vcpu->stat.pf_fixed; break; @@ -2737,10 +2711,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, base_addr &= PT64_LVL_ADDR_MASK(iterator.level); pseudo_gfn = base_addr >> PAGE_SHIFT; sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr, - iterator.level - 1, - 1, ACC_ALL, iterator.sptep); + iterator.level - 1, 1, ACC_ALL); - link_shadow_page(iterator.sptep, sp, true); + link_shadow_page(vcpu, iterator.sptep, sp); } } return emulate; @@ -2759,7 +2732,7 @@ static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct * send_sig_info(SIGBUS, &info, tsk); } -static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, pfn_t pfn) +static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn) { /* * Do not cache the mmio info caused by writing the readonly gfn @@ -2779,9 +2752,10 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, pfn_t pfn) } static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, - gfn_t *gfnp, pfn_t *pfnp, int *levelp) + gfn_t *gfnp, kvm_pfn_t *pfnp, + int *levelp) { - pfn_t pfn = *pfnp; + kvm_pfn_t pfn = *pfnp; gfn_t gfn = *gfnp; int level = *levelp; @@ -2820,7 +2794,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, } static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, - pfn_t pfn, unsigned access, int *ret_val) + kvm_pfn_t pfn, unsigned access, int *ret_val) { bool ret = true; @@ -2919,7 +2893,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, * If the mapping has been changed, let the vcpu fault on the * same address again. */ - if (!is_rmap_spte(spte)) { + if (!is_shadow_present_pte(spte)) { ret = true; goto exit; } @@ -2974,7 +2948,7 @@ exit: } static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, - gva_t gva, pfn_t *pfn, bool write, bool *writable); + gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable); static void make_mmu_pages_available(struct kvm_vcpu *vcpu); static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, @@ -2983,7 +2957,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, int r; int level; bool force_pt_level = false; - pfn_t pfn; + kvm_pfn_t pfn; unsigned long mmu_seq; bool map_writable, write = error_code & PFERR_WRITE_MASK; @@ -3018,11 +2992,9 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, make_mmu_pages_available(vcpu); if (likely(!force_pt_level)) transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); - r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn, - prefault); + r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault); spin_unlock(&vcpu->kvm->mmu_lock); - return r; out_unlock: @@ -3097,8 +3069,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { spin_lock(&vcpu->kvm->mmu_lock); make_mmu_pages_available(vcpu); - sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL, - 1, ACC_ALL, NULL); + sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL, 1, ACC_ALL); ++sp->root_count; spin_unlock(&vcpu->kvm->mmu_lock); vcpu->arch.mmu.root_hpa = __pa(sp->spt); @@ -3110,9 +3081,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) spin_lock(&vcpu->kvm->mmu_lock); make_mmu_pages_available(vcpu); sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT), - i << 30, - PT32_ROOT_LEVEL, 1, ACC_ALL, - NULL); + i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL); root = __pa(sp->spt); ++sp->root_count; spin_unlock(&vcpu->kvm->mmu_lock); @@ -3149,7 +3118,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) spin_lock(&vcpu->kvm->mmu_lock); make_mmu_pages_available(vcpu); sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL, - 0, ACC_ALL, NULL); + 0, ACC_ALL); root = __pa(sp->spt); ++sp->root_count; spin_unlock(&vcpu->kvm->mmu_lock); @@ -3182,9 +3151,8 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) } spin_lock(&vcpu->kvm->mmu_lock); make_mmu_pages_available(vcpu); - sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, - PT32_ROOT_LEVEL, 0, - ACC_ALL, NULL); + sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL, + 0, ACC_ALL); root = __pa(sp->spt); ++sp->root_count; spin_unlock(&vcpu->kvm->mmu_lock); @@ -3443,7 +3411,7 @@ static bool can_do_async_pf(struct kvm_vcpu *vcpu) } static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, - gva_t gva, pfn_t *pfn, bool write, bool *writable) + gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable) { struct kvm_memory_slot *slot; bool async; @@ -3481,7 +3449,7 @@ check_hugepage_cache_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int level) static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, bool prefault) { - pfn_t pfn; + kvm_pfn_t pfn; int r; int level; bool force_pt_level; @@ -3531,8 +3499,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, make_mmu_pages_available(vcpu); if (likely(!force_pt_level)) transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); - r = __direct_map(vcpu, gpa, write, map_writable, - level, gfn, pfn, prefault); + r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault); spin_unlock(&vcpu->kvm->mmu_lock); return r; @@ -4058,10 +4025,12 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu) g_context->inject_page_fault = kvm_inject_page_fault; /* - * Note that arch.mmu.gva_to_gpa translates l2_gva to l1_gpa. The - * translation of l2_gpa to l1_gpa addresses is done using the - * arch.nested_mmu.gva_to_gpa function. Basically the gva_to_gpa - * functions between mmu and nested_mmu are swapped. + * Note that arch.mmu.gva_to_gpa translates l2_gpa to l1_gpa using + * L1's nested page tables (e.g. EPT12). The nested translation + * of l2_gva to l1_gpa is done by arch.nested_mmu.gva_to_gpa using + * L2's page tables as the first level of translation and L1's + * nested page tables as the second level of translation. Basically + * the gva_to_gpa functions between mmu and nested_mmu are swapped. */ if (!is_paging(vcpu)) { g_context->nx = false; @@ -4495,7 +4464,7 @@ void kvm_mmu_setup(struct kvm_vcpu *vcpu) } /* The return value indicates if tlb flush on all vcpus is needed. */ -typedef bool (*slot_level_handler) (struct kvm *kvm, unsigned long *rmap); +typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head); /* The caller should hold mmu-lock before calling this function. */ static bool @@ -4589,9 +4558,10 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) spin_unlock(&kvm->mmu_lock); } -static bool slot_rmap_write_protect(struct kvm *kvm, unsigned long *rmapp) +static bool slot_rmap_write_protect(struct kvm *kvm, + struct kvm_rmap_head *rmap_head) { - return __rmap_write_protect(kvm, rmapp, false); + return __rmap_write_protect(kvm, rmap_head, false); } void kvm_mmu_slot_remove_write_access(struct kvm *kvm, @@ -4627,16 +4597,16 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, } static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm, - unsigned long *rmapp) + struct kvm_rmap_head *rmap_head) { u64 *sptep; struct rmap_iterator iter; int need_tlb_flush = 0; - pfn_t pfn; + kvm_pfn_t pfn; struct kvm_mmu_page *sp; restart: - for_each_rmap_spte(rmapp, &iter, sptep) { + for_each_rmap_spte(rmap_head, &iter, sptep) { sp = page_header(__pa(sptep)); pfn = spte_to_pfn(*sptep); diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index 03d518e499a6..dcce533d420c 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c @@ -97,7 +97,7 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level) { struct kvm_mmu_page *sp; gfn_t gfn; - pfn_t pfn; + kvm_pfn_t pfn; hpa_t hpa; sp = page_header(__pa(sptep)); @@ -129,7 +129,7 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level) static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) { static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10); - unsigned long *rmapp; + struct kvm_rmap_head *rmap_head; struct kvm_mmu_page *rev_sp; struct kvm_memslots *slots; struct kvm_memory_slot *slot; @@ -150,8 +150,8 @@ static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) return; } - rmapp = __gfn_to_rmap(gfn, rev_sp->role.level, slot); - if (!*rmapp) { + rmap_head = __gfn_to_rmap(gfn, rev_sp->role.level, slot); + if (!rmap_head->val) { if (!__ratelimit(&ratelimit_state)) return; audit_printk(kvm, "no rmap for writable spte %llx\n", @@ -183,7 +183,7 @@ static void check_mappings_rmap(struct kvm *kvm, struct kvm_mmu_page *sp) return; for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { - if (!is_rmap_spte(sp->spt[i])) + if (!is_shadow_present_pte(sp->spt[i])) continue; inspect_spte_has_rmap(kvm, sp->spt + i); @@ -192,7 +192,7 @@ static void check_mappings_rmap(struct kvm *kvm, struct kvm_mmu_page *sp) static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp) { - unsigned long *rmapp; + struct kvm_rmap_head *rmap_head; u64 *sptep; struct rmap_iterator iter; struct kvm_memslots *slots; @@ -203,13 +203,14 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp) slots = kvm_memslots_for_spte_role(kvm, sp->role); slot = __gfn_to_memslot(slots, sp->gfn); - rmapp = __gfn_to_rmap(sp->gfn, PT_PAGE_TABLE_LEVEL, slot); + rmap_head = __gfn_to_rmap(sp->gfn, PT_PAGE_TABLE_LEVEL, slot); - for_each_rmap_spte(rmapp, &iter, sptep) + for_each_rmap_spte(rmap_head, &iter, sptep) { if (is_writable_pte(*sptep)) audit_printk(kvm, "shadow page has writable " "mappings: gfn %llx role %x\n", sp->gfn, sp->role.word); + } } static void audit_sp(struct kvm *kvm, struct kvm_mmu_page *sp) diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 3058a22a658d..6c9fed957cce 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -456,7 +456,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, { unsigned pte_access; gfn_t gfn; - pfn_t pfn; + kvm_pfn_t pfn; if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte)) return false; @@ -475,8 +475,8 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, * we call mmu_set_spte() with host_writable = true because * pte_prefetch_gfn_to_pfn always gets a writable pfn. */ - mmu_set_spte(vcpu, spte, pte_access, 0, NULL, PT_PAGE_TABLE_LEVEL, - gfn, pfn, true, true); + mmu_set_spte(vcpu, spte, pte_access, 0, PT_PAGE_TABLE_LEVEL, gfn, pfn, + true, true); return true; } @@ -551,12 +551,12 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, struct guest_walker *gw, int write_fault, int hlevel, - pfn_t pfn, bool map_writable, bool prefault) + kvm_pfn_t pfn, bool map_writable, bool prefault) { struct kvm_mmu_page *sp = NULL; struct kvm_shadow_walk_iterator it; unsigned direct_access, access = gw->pt_access; - int top_level, emulate = 0; + int top_level, emulate; direct_access = gw->pte_access; @@ -587,7 +587,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, if (!is_shadow_present_pte(*it.sptep)) { table_gfn = gw->table_gfn[it.level - 2]; sp = kvm_mmu_get_page(vcpu, table_gfn, addr, it.level-1, - false, access, it.sptep); + false, access); } /* @@ -598,7 +598,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, goto out_gpte_changed; if (sp) - link_shadow_page(it.sptep, sp, PT_GUEST_ACCESSED_MASK); + link_shadow_page(vcpu, it.sptep, sp); } for (; @@ -617,20 +617,18 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, direct_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1, - true, direct_access, it.sptep); - link_shadow_page(it.sptep, sp, PT_GUEST_ACCESSED_MASK); + true, direct_access); + link_shadow_page(vcpu, it.sptep, sp); } clear_sp_write_flooding_count(it.sptep); - mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault, &emulate, - it.level, gw->gfn, pfn, prefault, map_writable); + emulate = mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault, + it.level, gw->gfn, pfn, prefault, map_writable); FNAME(pte_prefetch)(vcpu, gw, it.sptep); return emulate; out_gpte_changed: - if (sp) - kvm_mmu_put_page(sp, it.sptep); kvm_release_pfn_clean(pfn); return 0; } @@ -696,7 +694,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, int user_fault = error_code & PFERR_USER_MASK; struct guest_walker walker; int r; - pfn_t pfn; + kvm_pfn_t pfn; int level = PT_PAGE_TABLE_LEVEL; bool force_pt_level = false; unsigned long mmu_seq; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 899c40f826dd..c13a64b7d789 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -86,6 +86,7 @@ static const u32 host_save_user_msrs[] = { MSR_FS_BASE, #endif MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, + MSR_TSC_AUX, }; #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs) @@ -135,6 +136,7 @@ struct vcpu_svm { uint64_t asid_generation; uint64_t sysenter_esp; uint64_t sysenter_eip; + uint64_t tsc_aux; u64 next_rip; @@ -1238,6 +1240,9 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) wrmsrl(MSR_AMD64_TSC_RATIO, tsc_ratio); } } + /* This assumes that the kernel never uses MSR_TSC_AUX */ + if (static_cpu_has(X86_FEATURE_RDTSCP)) + wrmsrl(MSR_TSC_AUX, svm->tsc_aux); } static void svm_vcpu_put(struct kvm_vcpu *vcpu) @@ -3024,6 +3029,11 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_SYSENTER_ESP: msr_info->data = svm->sysenter_esp; break; + case MSR_TSC_AUX: + if (!boot_cpu_has(X86_FEATURE_RDTSCP)) + return 1; + msr_info->data = svm->tsc_aux; + break; /* * Nobody will change the following 5 values in the VMCB so we can * safely return them on rdmsr. They will always be 0 until LBRV is @@ -3053,6 +3063,23 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_UCODE_REV: msr_info->data = 0x01000065; break; + case MSR_F15H_IC_CFG: { + + int family, model; + + family = guest_cpuid_family(vcpu); + model = guest_cpuid_model(vcpu); + + if (family < 0 || model < 0) + return kvm_get_msr_common(vcpu, msr_info); + + msr_info->data = 0; + + if (family == 0x15 && + (model >= 0x2 && model < 0x20)) + msr_info->data = 0x1E; + } + break; default: return kvm_get_msr_common(vcpu, msr_info); } @@ -3145,6 +3172,18 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) svm->sysenter_esp = data; svm->vmcb->save.sysenter_esp = data; break; + case MSR_TSC_AUX: + if (!boot_cpu_has(X86_FEATURE_RDTSCP)) + return 1; + + /* + * This is rare, so we update the MSR here instead of using + * direct_access_msrs. Doing that would require a rdmsr in + * svm_vcpu_put. + */ + svm->tsc_aux = data; + wrmsrl(MSR_TSC_AUX, svm->tsc_aux); + break; case MSR_IA32_DEBUGCTLMSR: if (!boot_cpu_has(X86_FEATURE_LBRV)) { vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n", @@ -3561,12 +3600,16 @@ static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) return; } -static int svm_cpu_uses_apicv(struct kvm_vcpu *vcpu) +static bool svm_get_enable_apicv(void) { - return 0; + return false; } -static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu) +static void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) +{ +} + +static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) { return; } @@ -4037,7 +4080,7 @@ static int svm_get_lpage_level(void) static bool svm_rdtscp_supported(void) { - return false; + return boot_cpu_has(X86_FEATURE_RDTSCP); } static bool svm_invpcid_supported(void) @@ -4328,7 +4371,8 @@ static struct kvm_x86_ops svm_x86_ops = { .enable_irq_window = enable_irq_window, .update_cr8_intercept = update_cr8_intercept, .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode, - .cpu_uses_apicv = svm_cpu_uses_apicv, + .get_enable_apicv = svm_get_enable_apicv, + .refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl, .load_eoi_exitmap = svm_load_eoi_exitmap, .sync_pir_to_irr = svm_sync_pir_to_irr, diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 120302511802..ad9f6a23f139 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -268,7 +268,7 @@ TRACE_EVENT(kvm_inj_virq, #define kvm_trace_sym_exc \ EXS(DE), EXS(DB), EXS(BP), EXS(OF), EXS(BR), EXS(UD), EXS(NM), \ EXS(DF), EXS(TS), EXS(NP), EXS(SS), EXS(GP), EXS(PF), \ - EXS(MF), EXS(MC) + EXS(MF), EXS(AC), EXS(MC) /* * Tracepoint for kvm interrupt injection: @@ -1025,6 +1025,269 @@ TRACE_EVENT(kvm_pi_irte_update, __entry->pi_desc_addr) ); +/* + * Tracepoint for kvm_hv_notify_acked_sint. + */ +TRACE_EVENT(kvm_hv_notify_acked_sint, + TP_PROTO(int vcpu_id, u32 sint), + TP_ARGS(vcpu_id, sint), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(u32, sint) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->sint = sint; + ), + + TP_printk("vcpu_id %d sint %u", __entry->vcpu_id, __entry->sint) +); + +/* + * Tracepoint for synic_set_irq. + */ +TRACE_EVENT(kvm_hv_synic_set_irq, + TP_PROTO(int vcpu_id, u32 sint, int vector, int ret), + TP_ARGS(vcpu_id, sint, vector, ret), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(u32, sint) + __field(int, vector) + __field(int, ret) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->sint = sint; + __entry->vector = vector; + __entry->ret = ret; + ), + + TP_printk("vcpu_id %d sint %u vector %d ret %d", + __entry->vcpu_id, __entry->sint, __entry->vector, + __entry->ret) +); + +/* + * Tracepoint for kvm_hv_synic_send_eoi. + */ +TRACE_EVENT(kvm_hv_synic_send_eoi, + TP_PROTO(int vcpu_id, int vector), + TP_ARGS(vcpu_id, vector), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(u32, sint) + __field(int, vector) + __field(int, ret) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->vector = vector; + ), + + TP_printk("vcpu_id %d vector %d", __entry->vcpu_id, __entry->vector) +); + +/* + * Tracepoint for synic_set_msr. + */ +TRACE_EVENT(kvm_hv_synic_set_msr, + TP_PROTO(int vcpu_id, u32 msr, u64 data, bool host), + TP_ARGS(vcpu_id, msr, data, host), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(u32, msr) + __field(u64, data) + __field(bool, host) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->msr = msr; + __entry->data = data; + __entry->host = host + ), + + TP_printk("vcpu_id %d msr 0x%x data 0x%llx host %d", + __entry->vcpu_id, __entry->msr, __entry->data, __entry->host) +); + +/* + * Tracepoint for stimer_set_config. + */ +TRACE_EVENT(kvm_hv_stimer_set_config, + TP_PROTO(int vcpu_id, int timer_index, u64 config, bool host), + TP_ARGS(vcpu_id, timer_index, config, host), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(int, timer_index) + __field(u64, config) + __field(bool, host) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->timer_index = timer_index; + __entry->config = config; + __entry->host = host; + ), + + TP_printk("vcpu_id %d timer %d config 0x%llx host %d", + __entry->vcpu_id, __entry->timer_index, __entry->config, + __entry->host) +); + +/* + * Tracepoint for stimer_set_count. + */ +TRACE_EVENT(kvm_hv_stimer_set_count, + TP_PROTO(int vcpu_id, int timer_index, u64 count, bool host), + TP_ARGS(vcpu_id, timer_index, count, host), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(int, timer_index) + __field(u64, count) + __field(bool, host) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->timer_index = timer_index; + __entry->count = count; + __entry->host = host; + ), + + TP_printk("vcpu_id %d timer %d count %llu host %d", + __entry->vcpu_id, __entry->timer_index, __entry->count, + __entry->host) +); + +/* + * Tracepoint for stimer_start(periodic timer case). + */ +TRACE_EVENT(kvm_hv_stimer_start_periodic, + TP_PROTO(int vcpu_id, int timer_index, u64 time_now, u64 exp_time), + TP_ARGS(vcpu_id, timer_index, time_now, exp_time), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(int, timer_index) + __field(u64, time_now) + __field(u64, exp_time) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->timer_index = timer_index; + __entry->time_now = time_now; + __entry->exp_time = exp_time; + ), + + TP_printk("vcpu_id %d timer %d time_now %llu exp_time %llu", + __entry->vcpu_id, __entry->timer_index, __entry->time_now, + __entry->exp_time) +); + +/* + * Tracepoint for stimer_start(one-shot timer case). + */ +TRACE_EVENT(kvm_hv_stimer_start_one_shot, + TP_PROTO(int vcpu_id, int timer_index, u64 time_now, u64 count), + TP_ARGS(vcpu_id, timer_index, time_now, count), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(int, timer_index) + __field(u64, time_now) + __field(u64, count) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->timer_index = timer_index; + __entry->time_now = time_now; + __entry->count = count; + ), + + TP_printk("vcpu_id %d timer %d time_now %llu count %llu", + __entry->vcpu_id, __entry->timer_index, __entry->time_now, + __entry->count) +); + +/* + * Tracepoint for stimer_timer_callback. + */ +TRACE_EVENT(kvm_hv_stimer_callback, + TP_PROTO(int vcpu_id, int timer_index), + TP_ARGS(vcpu_id, timer_index), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(int, timer_index) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->timer_index = timer_index; + ), + + TP_printk("vcpu_id %d timer %d", + __entry->vcpu_id, __entry->timer_index) +); + +/* + * Tracepoint for stimer_expiration. + */ +TRACE_EVENT(kvm_hv_stimer_expiration, + TP_PROTO(int vcpu_id, int timer_index, int msg_send_result), + TP_ARGS(vcpu_id, timer_index, msg_send_result), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(int, timer_index) + __field(int, msg_send_result) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->timer_index = timer_index; + __entry->msg_send_result = msg_send_result; + ), + + TP_printk("vcpu_id %d timer %d msg send result %d", + __entry->vcpu_id, __entry->timer_index, + __entry->msg_send_result) +); + +/* + * Tracepoint for stimer_cleanup. + */ +TRACE_EVENT(kvm_hv_stimer_cleanup, + TP_PROTO(int vcpu_id, int timer_index), + TP_ARGS(vcpu_id, timer_index), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(int, timer_index) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->timer_index = timer_index; + ), + + TP_printk("vcpu_id %d timer %d", + __entry->vcpu_id, __entry->timer_index) +); + #endif /* _TRACE_KVM_H */ #undef TRACE_INCLUDE_PATH diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 44976a596fa6..e2951b6edbbc 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -19,6 +19,7 @@ #include "irq.h" #include "mmu.h" #include "cpuid.h" +#include "lapic.h" #include <linux/kvm_host.h> #include <linux/module.h> @@ -862,7 +863,6 @@ static void kvm_cpu_vmxon(u64 addr); static void kvm_cpu_vmxoff(void); static bool vmx_mpx_supported(void); static bool vmx_xsaves_supported(void); -static int vmx_cpu_uses_apicv(struct kvm_vcpu *vcpu); static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); static void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); @@ -870,7 +870,6 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); static bool guest_state_valid(struct kvm_vcpu *vcpu); static u32 vmx_segment_access_rights(struct kvm_segment *var); -static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu); static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); static int alloc_identity_pagetable(struct kvm *kvm); @@ -1448,7 +1447,51 @@ static inline void ept_sync_context(u64 eptp) } } -static __always_inline unsigned long vmcs_readl(unsigned long field) +static __always_inline void vmcs_check16(unsigned long field) +{ + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2000, + "16-bit accessor invalid for 64-bit field"); + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001, + "16-bit accessor invalid for 64-bit high field"); + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000, + "16-bit accessor invalid for 32-bit high field"); + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000, + "16-bit accessor invalid for natural width field"); +} + +static __always_inline void vmcs_check32(unsigned long field) +{ + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0, + "32-bit accessor invalid for 16-bit field"); + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000, + "32-bit accessor invalid for natural width field"); +} + +static __always_inline void vmcs_check64(unsigned long field) +{ + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0, + "64-bit accessor invalid for 16-bit field"); + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001, + "64-bit accessor invalid for 64-bit high field"); + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000, + "64-bit accessor invalid for 32-bit field"); + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000, + "64-bit accessor invalid for natural width field"); +} + +static __always_inline void vmcs_checkl(unsigned long field) +{ + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0, + "Natural width accessor invalid for 16-bit field"); + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2000, + "Natural width accessor invalid for 64-bit field"); + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001, + "Natural width accessor invalid for 64-bit high field"); + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000, + "Natural width accessor invalid for 32-bit field"); +} + +static __always_inline unsigned long __vmcs_readl(unsigned long field) { unsigned long value; @@ -1459,23 +1502,32 @@ static __always_inline unsigned long vmcs_readl(unsigned long field) static __always_inline u16 vmcs_read16(unsigned long field) { - return vmcs_readl(field); + vmcs_check16(field); + return __vmcs_readl(field); } static __always_inline u32 vmcs_read32(unsigned long field) { - return vmcs_readl(field); + vmcs_check32(field); + return __vmcs_readl(field); } static __always_inline u64 vmcs_read64(unsigned long field) { + vmcs_check64(field); #ifdef CONFIG_X86_64 - return vmcs_readl(field); + return __vmcs_readl(field); #else - return vmcs_readl(field) | ((u64)vmcs_readl(field+1) << 32); + return __vmcs_readl(field) | ((u64)__vmcs_readl(field+1) << 32); #endif } +static __always_inline unsigned long vmcs_readl(unsigned long field) +{ + vmcs_checkl(field); + return __vmcs_readl(field); +} + static noinline void vmwrite_error(unsigned long field, unsigned long value) { printk(KERN_ERR "vmwrite error: reg %lx value %lx (err %d)\n", @@ -1483,7 +1535,7 @@ static noinline void vmwrite_error(unsigned long field, unsigned long value) dump_stack(); } -static void vmcs_writel(unsigned long field, unsigned long value) +static __always_inline void __vmcs_writel(unsigned long field, unsigned long value) { u8 error; @@ -1493,33 +1545,46 @@ static void vmcs_writel(unsigned long field, unsigned long value) vmwrite_error(field, value); } -static void vmcs_write16(unsigned long field, u16 value) +static __always_inline void vmcs_write16(unsigned long field, u16 value) { - vmcs_writel(field, value); + vmcs_check16(field); + __vmcs_writel(field, value); } -static void vmcs_write32(unsigned long field, u32 value) +static __always_inline void vmcs_write32(unsigned long field, u32 value) { - vmcs_writel(field, value); + vmcs_check32(field); + __vmcs_writel(field, value); } -static void vmcs_write64(unsigned long field, u64 value) +static __always_inline void vmcs_write64(unsigned long field, u64 value) { - vmcs_writel(field, value); + vmcs_check64(field); + __vmcs_writel(field, value); #ifndef CONFIG_X86_64 asm volatile (""); - vmcs_writel(field+1, value >> 32); + __vmcs_writel(field+1, value >> 32); #endif } -static void vmcs_clear_bits(unsigned long field, u32 mask) +static __always_inline void vmcs_writel(unsigned long field, unsigned long value) { - vmcs_writel(field, vmcs_readl(field) & ~mask); + vmcs_checkl(field); + __vmcs_writel(field, value); } -static void vmcs_set_bits(unsigned long field, u32 mask) +static __always_inline void vmcs_clear_bits(unsigned long field, u32 mask) { - vmcs_writel(field, vmcs_readl(field) | mask); + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000, + "vmcs_clear_bits does not support 64-bit fields"); + __vmcs_writel(field, __vmcs_readl(field) & ~mask); +} + +static __always_inline void vmcs_set_bits(unsigned long field, u32 mask) +{ + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000, + "vmcs_set_bits does not support 64-bit fields"); + __vmcs_writel(field, __vmcs_readl(field) | mask); } static inline void vm_entry_controls_init(struct vcpu_vmx *vmx, u32 val) @@ -2498,7 +2563,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) vmx->nested.nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR | PIN_BASED_VMX_PREEMPTION_TIMER; - if (vmx_cpu_uses_apicv(&vmx->vcpu)) + if (kvm_vcpu_apicv_active(&vmx->vcpu)) vmx->nested.nested_vmx_pinbased_ctls_high |= PIN_BASED_POSTED_INTR; @@ -4186,7 +4251,7 @@ out: static int init_rmode_identity_map(struct kvm *kvm) { int i, idx, r = 0; - pfn_t identity_map_pfn; + kvm_pfn_t identity_map_pfn; u32 tmp; if (!enable_ept) @@ -4462,9 +4527,9 @@ static void vmx_disable_intercept_msr_write_x2apic(u32 msr) msr, MSR_TYPE_W); } -static int vmx_cpu_uses_apicv(struct kvm_vcpu *vcpu) +static bool vmx_get_enable_apicv(void) { - return enable_apicv && lapic_in_kernel(vcpu); + return enable_apicv; } static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) @@ -4586,11 +4651,6 @@ static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) kvm_apic_update_irr(vcpu, vmx->pi_desc.pir); } -static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu) -{ - return; -} - /* * Set up the vmcs's constant host-state fields, i.e., host-state fields that * will not change in the lifetime of the guest. @@ -4660,11 +4720,18 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) { u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl; - if (!vmx_cpu_uses_apicv(&vmx->vcpu)) + if (!kvm_vcpu_apicv_active(&vmx->vcpu)) pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR; return pin_based_exec_ctrl; } +static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx)); +} + static u32 vmx_exec_control(struct vcpu_vmx *vmx) { u32 exec_control = vmcs_config.cpu_based_exec_ctrl; @@ -4703,7 +4770,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; if (!ple_gap) exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; - if (!vmx_cpu_uses_apicv(&vmx->vcpu)) + if (!kvm_vcpu_apicv_active(&vmx->vcpu)) exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; @@ -4767,7 +4834,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write32(SECONDARY_VM_EXEC_CONTROL, vmx_secondary_exec_control(vmx)); - if (vmx_cpu_uses_apicv(&vmx->vcpu)) { + if (kvm_vcpu_apicv_active(&vmx->vcpu)) { vmcs_write64(EOI_EXIT_BITMAP0, 0); vmcs_write64(EOI_EXIT_BITMAP1, 0); vmcs_write64(EOI_EXIT_BITMAP2, 0); @@ -4775,7 +4842,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write16(GUEST_INTR_STATUS, 0); - vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR); + vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR); vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc))); } @@ -4867,7 +4934,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) seg_setup(VCPU_SREG_CS); vmcs_write16(GUEST_CS_SELECTOR, 0xf000); - vmcs_write32(GUEST_CS_BASE, 0xffff0000); + vmcs_writel(GUEST_CS_BASE, 0xffff0000ul); seg_setup(VCPU_SREG_DS); seg_setup(VCPU_SREG_ES); @@ -4903,7 +4970,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0); - vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0); + vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 0); setup_msrs(vmx); @@ -4919,7 +4986,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); - if (vmx_cpu_uses_apicv(vcpu)) + if (kvm_vcpu_apicv_active(vcpu)) memset(&vmx->pi_desc, 0, sizeof(struct pi_desc)); if (vmx->vpid != 0) @@ -6203,15 +6270,6 @@ static __init int hardware_setup(void) kvm_tsc_scaling_ratio_frac_bits = 48; } - if (enable_apicv) - kvm_x86_ops->update_cr8_intercept = NULL; - else { - kvm_x86_ops->hwapic_irr_update = NULL; - kvm_x86_ops->hwapic_isr_update = NULL; - kvm_x86_ops->deliver_posted_interrupt = NULL; - kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy; - } - vmx_disable_intercept_for_msr(MSR_FS_BASE, false); vmx_disable_intercept_for_msr(MSR_GS_BASE, false); vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true); @@ -7901,7 +7959,7 @@ static void dump_vmcs(void) u32 pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL); u32 secondary_exec_control = 0; unsigned long cr4 = vmcs_readl(GUEST_CR4); - u64 efer = vmcs_readl(GUEST_IA32_EFER); + u64 efer = vmcs_read64(GUEST_IA32_EFER); int i, n; if (cpu_has_secondary_exec_ctrls()) @@ -7917,10 +7975,10 @@ static void dump_vmcs(void) if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT) && (cr4 & X86_CR4_PAE) && !(efer & EFER_LMA)) { - pr_err("PDPTR0 = 0x%016lx PDPTR1 = 0x%016lx\n", - vmcs_readl(GUEST_PDPTR0), vmcs_readl(GUEST_PDPTR1)); - pr_err("PDPTR2 = 0x%016lx PDPTR3 = 0x%016lx\n", - vmcs_readl(GUEST_PDPTR2), vmcs_readl(GUEST_PDPTR3)); + pr_err("PDPTR0 = 0x%016llx PDPTR1 = 0x%016llx\n", + vmcs_read64(GUEST_PDPTR0), vmcs_read64(GUEST_PDPTR1)); + pr_err("PDPTR2 = 0x%016llx PDPTR3 = 0x%016llx\n", + vmcs_read64(GUEST_PDPTR2), vmcs_read64(GUEST_PDPTR3)); } pr_err("RSP = 0x%016lx RIP = 0x%016lx\n", vmcs_readl(GUEST_RSP), vmcs_readl(GUEST_RIP)); @@ -7941,16 +7999,16 @@ static void dump_vmcs(void) vmx_dump_sel("TR: ", GUEST_TR_SELECTOR); if ((vmexit_ctl & (VM_EXIT_SAVE_IA32_PAT | VM_EXIT_SAVE_IA32_EFER)) || (vmentry_ctl & (VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_IA32_EFER))) - pr_err("EFER = 0x%016llx PAT = 0x%016lx\n", - efer, vmcs_readl(GUEST_IA32_PAT)); - pr_err("DebugCtl = 0x%016lx DebugExceptions = 0x%016lx\n", - vmcs_readl(GUEST_IA32_DEBUGCTL), + pr_err("EFER = 0x%016llx PAT = 0x%016llx\n", + efer, vmcs_read64(GUEST_IA32_PAT)); + pr_err("DebugCtl = 0x%016llx DebugExceptions = 0x%016lx\n", + vmcs_read64(GUEST_IA32_DEBUGCTL), vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS)); if (vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) - pr_err("PerfGlobCtl = 0x%016lx\n", - vmcs_readl(GUEST_IA32_PERF_GLOBAL_CTRL)); + pr_err("PerfGlobCtl = 0x%016llx\n", + vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL)); if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS) - pr_err("BndCfgS = 0x%016lx\n", vmcs_readl(GUEST_BNDCFGS)); + pr_err("BndCfgS = 0x%016llx\n", vmcs_read64(GUEST_BNDCFGS)); pr_err("Interruptibility = %08x ActivityState = %08x\n", vmcs_read32(GUEST_INTERRUPTIBILITY_INFO), vmcs_read32(GUEST_ACTIVITY_STATE)); @@ -7979,11 +8037,12 @@ static void dump_vmcs(void) vmcs_read32(HOST_IA32_SYSENTER_CS), vmcs_readl(HOST_IA32_SYSENTER_EIP)); if (vmexit_ctl & (VM_EXIT_LOAD_IA32_PAT | VM_EXIT_LOAD_IA32_EFER)) - pr_err("EFER = 0x%016lx PAT = 0x%016lx\n", - vmcs_readl(HOST_IA32_EFER), vmcs_readl(HOST_IA32_PAT)); + pr_err("EFER = 0x%016llx PAT = 0x%016llx\n", + vmcs_read64(HOST_IA32_EFER), + vmcs_read64(HOST_IA32_PAT)); if (vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) - pr_err("PerfGlobCtl = 0x%016lx\n", - vmcs_readl(HOST_IA32_PERF_GLOBAL_CTRL)); + pr_err("PerfGlobCtl = 0x%016llx\n", + vmcs_read64(HOST_IA32_PERF_GLOBAL_CTRL)); pr_err("*** Control State ***\n"); pr_err("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n", @@ -8006,16 +8065,16 @@ static void dump_vmcs(void) pr_err("IDTVectoring: info=%08x errcode=%08x\n", vmcs_read32(IDT_VECTORING_INFO_FIELD), vmcs_read32(IDT_VECTORING_ERROR_CODE)); - pr_err("TSC Offset = 0x%016lx\n", vmcs_readl(TSC_OFFSET)); + pr_err("TSC Offset = 0x%016llx\n", vmcs_read64(TSC_OFFSET)); if (secondary_exec_control & SECONDARY_EXEC_TSC_SCALING) - pr_err("TSC Multiplier = 0x%016lx\n", - vmcs_readl(TSC_MULTIPLIER)); + pr_err("TSC Multiplier = 0x%016llx\n", + vmcs_read64(TSC_MULTIPLIER)); if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW) pr_err("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD)); if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR) pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV)); if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT)) - pr_err("EPT pointer = 0x%016lx\n", vmcs_readl(EPT_POINTER)); + pr_err("EPT pointer = 0x%016llx\n", vmcs_read64(EPT_POINTER)); n = vmcs_read32(CR3_TARGET_COUNT); for (i = 0; i + 1 < n; i += 4) pr_err("CR3 target%u=%016lx target%u=%016lx\n", @@ -8154,7 +8213,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) * apicv */ if (!cpu_has_vmx_virtualize_x2apic_mode() || - !vmx_cpu_uses_apicv(vcpu)) + !kvm_vcpu_apicv_active(vcpu)) return; if (!cpu_need_tpr_shadow(vcpu)) @@ -8259,10 +8318,9 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) } } -static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu) +static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) { - u64 *eoi_exit_bitmap = vcpu->arch.eoi_exit_bitmap; - if (!vmx_cpu_uses_apicv(vcpu)) + if (!kvm_vcpu_apicv_active(vcpu)) return; vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]); @@ -8932,7 +8990,8 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) best->ebx &= ~bit(X86_FEATURE_INVPCID); } - vmcs_set_secondary_exec_control(secondary_exec_ctl); + if (cpu_has_secondary_exec_ctrls()) + vmcs_set_secondary_exec_control(secondary_exec_ctl); if (static_cpu_has(X86_FEATURE_PCOMMIT) && nested) { if (guest_cpuid_has_pcommit(vcpu)) @@ -9508,7 +9567,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) */ vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv; vmx->nested.pi_pending = false; - vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR); + vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR); vmcs_write64(POSTED_INTR_DESC_ADDR, page_to_phys(vmx->nested.pi_desc_page) + (unsigned long)(vmcs12->posted_intr_desc_addr & @@ -10169,7 +10228,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, * Additionally, restore L2's PDPTR to vmcs12. */ if (enable_ept) { - vmcs12->guest_cr3 = vmcs_read64(GUEST_CR3); + vmcs12->guest_cr3 = vmcs_readl(GUEST_CR3); vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0); vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1); vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2); @@ -10805,7 +10864,8 @@ static struct kvm_x86_ops vmx_x86_ops = { .update_cr8_intercept = update_cr8_intercept, .set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode, .set_apic_access_page_addr = vmx_set_apic_access_page_addr, - .cpu_uses_apicv = vmx_cpu_uses_apicv, + .get_enable_apicv = vmx_get_enable_apicv, + .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl, .load_eoi_exitmap = vmx_load_eoi_exitmap, .hwapic_irr_update = vmx_hwapic_irr_update, .hwapic_isr_update = vmx_hwapic_isr_update, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 97592e190413..4244c2baf57d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -951,7 +951,7 @@ static u32 msrs_to_save[] = { MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, #endif MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, - MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS + MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, }; static unsigned num_msrs_to_save; @@ -966,6 +966,8 @@ static u32 emulated_msrs[] = { HV_X64_MSR_RESET, HV_X64_MSR_VP_INDEX, HV_X64_MSR_VP_RUNTIME, + HV_X64_MSR_SCONTROL, + HV_X64_MSR_STIMER0_CONFIG, HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, MSR_KVM_PV_EOI_EN, @@ -1167,7 +1169,8 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) ++version; - kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); + if (kvm_write_guest(kvm, wall_clock, &version, sizeof(version))) + return; /* * The guest calculates current wall clock time by adding @@ -1683,6 +1686,11 @@ static void pvclock_update_vm_gtod_copy(struct kvm *kvm) #endif } +void kvm_make_mclock_inprogress_request(struct kvm *kvm) +{ + kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS); +} + static void kvm_gen_update_masterclock(struct kvm *kvm) { #ifdef CONFIG_X86_64 @@ -2198,6 +2206,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: case HV_X64_MSR_CRASH_CTL: + case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT: return kvm_hv_set_msr_common(vcpu, msr, data, msr_info->host_initiated); case MSR_IA32_BBL_CR_CTL3: @@ -2402,6 +2411,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: case HV_X64_MSR_CRASH_CTL: + case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT: return kvm_hv_get_msr_common(vcpu, msr_info->index, &msr_info->data); break; @@ -2541,6 +2551,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_HYPERV: case KVM_CAP_HYPERV_VAPIC: case KVM_CAP_HYPERV_SPIN: + case KVM_CAP_HYPERV_SYNIC: case KVM_CAP_PCI_SEGMENT: case KVM_CAP_DEBUGREGS: case KVM_CAP_X86_ROBUST_SINGLESTEP: @@ -2693,6 +2704,11 @@ static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu) return kvm_arch_has_noncoherent_dma(vcpu->kvm); } +static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu) +{ + set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests); +} + void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { /* Address WBINVD may be executed by guest */ @@ -2748,7 +2764,9 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) { - kvm_x86_ops->sync_pir_to_irr(vcpu); + if (vcpu->arch.apicv_active) + kvm_x86_ops->sync_pir_to_irr(vcpu); + memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s); return 0; @@ -3191,6 +3209,20 @@ static int kvm_set_guest_paused(struct kvm_vcpu *vcpu) return 0; } +static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, + struct kvm_enable_cap *cap) +{ + if (cap->flags) + return -EINVAL; + + switch (cap->cap) { + case KVM_CAP_HYPERV_SYNIC: + return kvm_hv_activate_synic(vcpu); + default: + return -EINVAL; + } +} + long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -3455,6 +3487,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = kvm_set_guest_paused(vcpu); goto out; } + case KVM_ENABLE_CAP: { + struct kvm_enable_cap cap; + + r = -EFAULT; + if (copy_from_user(&cap, argp, sizeof(cap))) + goto out; + r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); + break; + } default: r = -EINVAL; } @@ -4006,16 +4047,17 @@ static void kvm_init_msr_list(void) /* * Even MSRs that are valid in the host may not be exposed - * to the guests in some cases. We could work around this - * in VMX with the generic MSR save/load machinery, but it - * is not really worthwhile since it will really only - * happen with nested virtualization. + * to the guests in some cases. */ switch (msrs_to_save[i]) { case MSR_IA32_BNDCFGS: if (!kvm_x86_ops->mpx_supported()) continue; break; + case MSR_TSC_AUX: + if (!kvm_x86_ops->rdtscp_supported()) + continue; + break; default: break; } @@ -5106,7 +5148,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, int emulation_type) { gpa_t gpa = cr2; - pfn_t pfn; + kvm_pfn_t pfn; if (emulation_type & EMULTYPE_NO_REEXECUTE) return false; @@ -5872,6 +5914,12 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid) kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL); } +void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu) +{ + vcpu->arch.apicv_active = false; + kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu); +} + int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) { unsigned long nr, a0, a1, a2, a3, ret; @@ -5965,6 +6013,9 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu) if (!vcpu->arch.apic) return; + if (vcpu->arch.apicv_active) + return; + if (!vcpu->arch.apic->vapic_addr) max_irr = kvm_lapic_find_highest_irr(vcpu); else @@ -6301,20 +6352,30 @@ static void process_smi(struct kvm_vcpu *vcpu) kvm_mmu_reset_context(vcpu); } +void kvm_make_scan_ioapic_request(struct kvm *kvm) +{ + kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC); +} + static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) { + u64 eoi_exit_bitmap[4]; + if (!kvm_apic_hw_enabled(vcpu->arch.apic)) return; - memset(vcpu->arch.eoi_exit_bitmap, 0, 256 / 8); + bitmap_zero(vcpu->arch.ioapic_handled_vectors, 256); if (irqchip_split(vcpu->kvm)) - kvm_scan_ioapic_routes(vcpu, vcpu->arch.eoi_exit_bitmap); + kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors); else { - kvm_x86_ops->sync_pir_to_irr(vcpu); - kvm_ioapic_scan_entry(vcpu, vcpu->arch.eoi_exit_bitmap); + if (vcpu->arch.apicv_active) + kvm_x86_ops->sync_pir_to_irr(vcpu); + kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors); } - kvm_x86_ops->load_eoi_exitmap(vcpu); + bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors, + vcpu_to_synic(vcpu)->vec_bitmap, 256); + kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); } static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu) @@ -6422,7 +6483,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) { BUG_ON(vcpu->arch.pending_ioapic_eoi > 255); if (test_bit(vcpu->arch.pending_ioapic_eoi, - (void *) vcpu->arch.eoi_exit_bitmap)) { + vcpu->arch.ioapic_handled_vectors)) { vcpu->run->exit_reason = KVM_EXIT_IOAPIC_EOI; vcpu->run->eoi.vector = vcpu->arch.pending_ioapic_eoi; @@ -6446,6 +6507,20 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) r = 0; goto out; } + if (kvm_check_request(KVM_REQ_HV_EXIT, vcpu)) { + vcpu->run->exit_reason = KVM_EXIT_HYPERV; + vcpu->run->hyperv = vcpu->arch.hyperv.exit; + r = 0; + goto out; + } + + /* + * KVM_REQ_HV_STIMER has to be processed after + * KVM_REQ_CLOCK_UPDATE, because Hyper-V SynIC timers + * depend on the guest clock being up-to-date + */ + if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu)) + kvm_hv_process_stimers(vcpu); } /* @@ -6457,7 +6532,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) * Update architecture specific hints for APIC * virtual interrupt delivery. */ - if (kvm_x86_ops->hwapic_irr_update) + if (vcpu->arch.apicv_active) kvm_x86_ops->hwapic_irr_update(vcpu, kvm_lapic_find_highest_irr(vcpu)); } @@ -7528,6 +7603,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) BUG_ON(vcpu->kvm == NULL); kvm = vcpu->kvm; + vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(); vcpu->arch.pv.pv_unhalted = false; vcpu->arch.emulate_ctxt.ops = &emulate_ops; if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_reset_bsp(vcpu)) @@ -7585,6 +7661,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) vcpu->arch.pending_external_vector = -1; + kvm_hv_vcpu_init(vcpu); + return 0; fail_free_mce_banks: @@ -7603,6 +7681,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) { int idx; + kvm_hv_vcpu_uninit(vcpu); kvm_pmu_destroy(vcpu); kfree(vcpu->arch.mce_banks); kvm_free_lapic(vcpu); @@ -7997,6 +8076,9 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) kvm_cpu_has_interrupt(vcpu)) return true; + if (kvm_hv_has_stimer_pending(vcpu)) + return true; + return false; } diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index a43b2eafc466..4ba229ac3f4f 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -1473,7 +1473,6 @@ __init void lguest_init(void) pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mmu_mode; pv_mmu_ops.lazy_mode.flush = paravirt_flush_lazy_mmu; pv_mmu_ops.pte_update = lguest_pte_update; - pv_mmu_ops.pte_update_defer = lguest_pte_update; #ifdef CONFIG_X86_LOCAL_APIC /* APIC read/write intercepts */ diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index f2587888d987..a501fa25da41 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -16,7 +16,7 @@ clean-files := inat-tables.c obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o -lib-y := delay.o misc.o cmdline.o +lib-y := delay.o misc.o cmdline.o cpu.o lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o lib-y += memcpy_$(BITS).o lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o diff --git a/arch/x86/lib/cpu.c b/arch/x86/lib/cpu.c new file mode 100644 index 000000000000..aa417a97511c --- /dev/null +++ b/arch/x86/lib/cpu.c @@ -0,0 +1,35 @@ +#include <linux/module.h> + +unsigned int x86_family(unsigned int sig) +{ + unsigned int x86; + + x86 = (sig >> 8) & 0xf; + + if (x86 == 0xf) + x86 += (sig >> 20) & 0xff; + + return x86; +} +EXPORT_SYMBOL_GPL(x86_family); + +unsigned int x86_model(unsigned int sig) +{ + unsigned int fam, model; + + fam = x86_family(sig); + + model = (sig >> 4) & 0xf; + + if (fam >= 0x6) + model += ((sig >> 16) & 0xf) << 4; + + return model; +} +EXPORT_SYMBOL_GPL(x86_model); + +unsigned int x86_stepping(unsigned int sig) +{ + return sig & 0xf; +} +EXPORT_SYMBOL_GPL(x86_stepping); diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c index 43623739c7cf..004c861b1648 100644 --- a/arch/x86/lib/msr.c +++ b/arch/x86/lib/msr.c @@ -1,6 +1,8 @@ #include <linux/module.h> #include <linux/preempt.h> #include <asm/msr.h> +#define CREATE_TRACE_POINTS +#include <asm/msr-trace.h> struct msr *msrs_alloc(void) { @@ -108,3 +110,27 @@ int msr_clear_bit(u32 msr, u8 bit) { return __flip_bit(msr, bit, false); } + +#ifdef CONFIG_TRACEPOINTS +void do_trace_write_msr(unsigned msr, u64 val, int failed) +{ + trace_write_msr(msr, val, failed); +} +EXPORT_SYMBOL(do_trace_write_msr); +EXPORT_TRACEPOINT_SYMBOL(write_msr); + +void do_trace_read_msr(unsigned msr, u64 val, int failed) +{ + trace_read_msr(msr, val, failed); +} +EXPORT_SYMBOL(do_trace_read_msr); +EXPORT_TRACEPOINT_SYMBOL(read_msr); + +void do_trace_rdpmc(unsigned counter, u64 val, int failed) +{ + trace_rdpmc(counter, val, failed); +} +EXPORT_SYMBOL(do_trace_rdpmc); +EXPORT_TRACEPOINT_SYMBOL(rdpmc); + +#endif diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 65c47fda26fc..f9d38a48e3c8 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -15,6 +15,7 @@ obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_X86_PTDUMP_CORE) += dump_pagetables.o +obj-$(CONFIG_X86_PTDUMP) += debug_pagetables.o obj-$(CONFIG_HIGHMEM) += highmem_32.o diff --git a/arch/x86/mm/debug_pagetables.c b/arch/x86/mm/debug_pagetables.c new file mode 100644 index 000000000000..bfcffdf6c577 --- /dev/null +++ b/arch/x86/mm/debug_pagetables.c @@ -0,0 +1,46 @@ +#include <linux/debugfs.h> +#include <linux/module.h> +#include <linux/seq_file.h> +#include <asm/pgtable.h> + +static int ptdump_show(struct seq_file *m, void *v) +{ + ptdump_walk_pgd_level(m, NULL); + return 0; +} + +static int ptdump_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, ptdump_show, NULL); +} + +static const struct file_operations ptdump_fops = { + .owner = THIS_MODULE, + .open = ptdump_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static struct dentry *pe; + +static int __init pt_dump_debug_init(void) +{ + pe = debugfs_create_file("kernel_page_tables", S_IRUSR, NULL, NULL, + &ptdump_fops); + if (!pe) + return -ENOMEM; + + return 0; +} + +static void __exit pt_dump_debug_exit(void) +{ + debugfs_remove_recursive(pe); +} + +module_init(pt_dump_debug_init); +module_exit(pt_dump_debug_exit); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>"); +MODULE_DESCRIPTION("Kernel debugging helper that dumps pagetables"); diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 0f1c6fc3ddd8..4a6f1d9b5106 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -426,38 +426,15 @@ void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd) { ptdump_walk_pgd_level_core(m, pgd, false); } +EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level); void ptdump_walk_pgd_level_checkwx(void) { ptdump_walk_pgd_level_core(NULL, NULL, true); } -#ifdef CONFIG_X86_PTDUMP -static int ptdump_show(struct seq_file *m, void *v) +static int __init pt_dump_init(void) { - ptdump_walk_pgd_level(m, NULL); - return 0; -} - -static int ptdump_open(struct inode *inode, struct file *filp) -{ - return single_open(filp, ptdump_show, NULL); -} - -static const struct file_operations ptdump_fops = { - .open = ptdump_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; -#endif - -static int pt_dump_init(void) -{ -#ifdef CONFIG_X86_PTDUMP - struct dentry *pe; -#endif - #ifdef CONFIG_X86_32 /* Not a compile-time constant on x86-32 */ address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; @@ -468,13 +445,6 @@ static int pt_dump_init(void) address_markers[FIXADDR_START_NR].start_address = FIXADDR_START; #endif -#ifdef CONFIG_X86_PTDUMP - pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL, - &ptdump_fops); - if (!pe) - return -ENOMEM; -#endif - return 0; } diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index ae9a37bf1371..6d5eb5900372 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -9,6 +9,7 @@ #include <linux/vmstat.h> #include <linux/highmem.h> #include <linux/swap.h> +#include <linux/memremap.h> #include <asm/pgtable.h> @@ -63,6 +64,16 @@ retry: #endif } +static void undo_dev_pagemap(int *nr, int nr_start, struct page **pages) +{ + while ((*nr) - nr_start) { + struct page *page = pages[--(*nr)]; + + ClearPageReferenced(page); + put_page(page); + } +} + /* * The performance critical leaf functions are made noinline otherwise gcc * inlines everything into a single function which results in too much @@ -71,7 +82,9 @@ retry: static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { + struct dev_pagemap *pgmap = NULL; unsigned long mask; + int nr_start = *nr; pte_t *ptep; mask = _PAGE_PRESENT|_PAGE_USER; @@ -89,13 +102,21 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, return 0; } - if ((pte_flags(pte) & (mask | _PAGE_SPECIAL)) != mask) { + page = pte_page(pte); + if (pte_devmap(pte)) { + pgmap = get_dev_pagemap(pte_pfn(pte), pgmap); + if (unlikely(!pgmap)) { + undo_dev_pagemap(nr, nr_start, pages); + pte_unmap(ptep); + return 0; + } + } else if ((pte_flags(pte) & (mask | _PAGE_SPECIAL)) != mask) { pte_unmap(ptep); return 0; } VM_BUG_ON(!pfn_valid(pte_pfn(pte))); - page = pte_page(pte); get_page(page); + put_dev_pagemap(pgmap); SetPageReferenced(page); pages[*nr] = page; (*nr)++; @@ -114,6 +135,32 @@ static inline void get_head_page_multiple(struct page *page, int nr) SetPageReferenced(page); } +static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr, + unsigned long end, struct page **pages, int *nr) +{ + int nr_start = *nr; + unsigned long pfn = pmd_pfn(pmd); + struct dev_pagemap *pgmap = NULL; + + pfn += (addr & ~PMD_MASK) >> PAGE_SHIFT; + do { + struct page *page = pfn_to_page(pfn); + + pgmap = get_dev_pagemap(pfn, pgmap); + if (unlikely(!pgmap)) { + undo_dev_pagemap(nr, nr_start, pages); + return 0; + } + SetPageReferenced(page); + pages[*nr] = page; + get_page(page); + put_dev_pagemap(pgmap); + (*nr)++; + pfn++; + } while (addr += PAGE_SIZE, addr != end); + return 1; +} + static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { @@ -126,9 +173,13 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, mask |= _PAGE_RW; if ((pmd_flags(pmd) & mask) != mask) return 0; + + VM_BUG_ON(!pfn_valid(pmd_pfn(pmd))); + if (pmd_devmap(pmd)) + return __gup_device_huge_pmd(pmd, addr, end, pages, nr); + /* hugepages are never "special" */ VM_BUG_ON(pmd_flags(pmd) & _PAGE_SPECIAL); - VM_BUG_ON(!pfn_valid(pmd_pfn(pmd))); refs = 0; head = pmd_page(pmd); @@ -136,8 +187,6 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, do { VM_BUG_ON_PAGE(compound_head(page) != head, page); pages[*nr] = page; - if (PageTail(page)) - get_huge_page_tail(page); (*nr)++; page++; refs++; @@ -158,18 +207,7 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, pmd_t pmd = *pmdp; next = pmd_addr_end(addr, end); - /* - * The pmd_trans_splitting() check below explains why - * pmdp_splitting_flush has to flush the tlb, to stop - * this gup-fast code from running while we set the - * splitting bit in the pmd. Returning zero will take - * the slow path that will call wait_split_huge_page() - * if the pmd is still in splitting state. gup-fast - * can't because it has irq disabled and - * wait_split_huge_page() would never return as the - * tlb flush IPI wouldn't run. - */ - if (pmd_none(pmd) || pmd_trans_splitting(pmd)) + if (pmd_none(pmd)) return 0; if (unlikely(pmd_large(pmd) || !pmd_present(pmd))) { /* @@ -212,8 +250,6 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr, do { VM_BUG_ON_PAGE(compound_head(page) != head, page); pages[*nr] = page; - if (PageTail(page)) - get_huge_page_tail(page); (*nr)++; page++; refs++; diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index ec081fe0ce2c..5488d21123bd 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -30,6 +30,7 @@ #include <linux/module.h> #include <linux/memory.h> #include <linux/memory_hotplug.h> +#include <linux/memremap.h> #include <linux/nmi.h> #include <linux/gfp.h> #include <linux/kcore.h> @@ -714,6 +715,12 @@ static void __meminit free_pagetable(struct page *page, int order) { unsigned long magic; unsigned int nr_pages = 1 << order; + struct vmem_altmap *altmap = to_vmem_altmap((unsigned long) page); + + if (altmap) { + vmem_altmap_free(altmap, nr_pages); + return; + } /* bootmem page has reserved flag */ if (PageReserved(page)) { @@ -814,8 +821,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, if (phys_addr < (phys_addr_t)0x40000000) return; - if (IS_ALIGNED(addr, PAGE_SIZE) && - IS_ALIGNED(next, PAGE_SIZE)) { + if (PAGE_ALIGNED(addr) && PAGE_ALIGNED(next)) { /* * Do not free direct mapping pages since they were * freed when offlining, or simplely not in use. @@ -1018,13 +1024,19 @@ int __ref arch_remove_memory(u64 start, u64 size) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; + struct page *page = pfn_to_page(start_pfn); + struct vmem_altmap *altmap; struct zone *zone; int ret; - zone = page_zone(pfn_to_page(start_pfn)); - kernel_physical_mapping_remove(start, start + size); + /* With altmap the first mapped page is offset from @start */ + altmap = to_vmem_altmap((unsigned long) page); + if (altmap) + page += vmem_altmap_offset(altmap); + zone = page_zone(page); ret = __remove_pages(zone, start_pfn, nr_pages); WARN_ON_ONCE(ret); + kernel_physical_mapping_remove(start, start + size); return ret; } @@ -1236,7 +1248,7 @@ static void __meminitdata *p_start, *p_end; static int __meminitdata node_start; static int __meminit vmemmap_populate_hugepages(unsigned long start, - unsigned long end, int node) + unsigned long end, int node, struct vmem_altmap *altmap) { unsigned long addr; unsigned long next; @@ -1259,7 +1271,7 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start, if (pmd_none(*pmd)) { void *p; - p = vmemmap_alloc_block_buf(PMD_SIZE, node); + p = __vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); if (p) { pte_t entry; @@ -1280,7 +1292,8 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start, addr_end = addr + PMD_SIZE; p_end = p + PMD_SIZE; continue; - } + } else if (altmap) + return -ENOMEM; /* no fallback */ } else if (pmd_large(*pmd)) { vmemmap_verify((pte_t *)pmd, node, addr, next); continue; @@ -1294,11 +1307,16 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start, int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) { + struct vmem_altmap *altmap = to_vmem_altmap(start); int err; if (cpu_has_pse) - err = vmemmap_populate_hugepages(start, end, node); - else + err = vmemmap_populate_hugepages(start, end, node, altmap); + else if (altmap) { + pr_err_once("%s: no cpu support for altmap allocations\n", + __func__); + err = -ENOMEM; + } else err = vmemmap_populate_basepages(start, end, node); if (!err) sync_global_pgds(start, end - 1, 0); diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index b9c78f3bcd67..0d8d53d1f5cc 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -194,8 +194,8 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, * Check if the request spans more than any BAR in the iomem resource * tree. */ - WARN_ONCE(iomem_map_sanity_check(unaligned_phys_addr, unaligned_size), - KERN_INFO "Info: mapping multiple BARs. Your kernel is fine."); + if (iomem_map_sanity_check(unaligned_phys_addr, unaligned_size)) + pr_warn("caller %pS mapping multiple BARs\n", caller); return ret_addr; err_free_area: diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 844b06d67df4..96bd1e2bffaf 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -69,14 +69,14 @@ unsigned long arch_mmap_rnd(void) { unsigned long rnd; - /* - * 8 bits of randomness in 32bit mmaps, 20 address space bits - * 28 bits of randomness in 64bit mmaps, 40 address space bits - */ if (mmap_is_ia32()) - rnd = (unsigned long)get_random_int() % (1<<8); +#ifdef CONFIG_COMPAT + rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_compat_bits) - 1); +#else + rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1); +#endif else - rnd = (unsigned long)get_random_int() % (1<<28); + rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1); return rnd << PAGE_SHIFT; } diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index a3137a4feed1..fc6a4c8f6e2a 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -66,6 +66,9 @@ void update_page_count(int level, unsigned long pages) static void split_page_count(int level) { + if (direct_pages_count[level] == 0) + return; + direct_pages_count[level]--; direct_pages_count[level - 1] += PTRS_PER_PTE; } @@ -129,14 +132,16 @@ within(unsigned long addr, unsigned long start, unsigned long end) */ void clflush_cache_range(void *vaddr, unsigned int size) { - unsigned long clflush_mask = boot_cpu_data.x86_clflush_size - 1; + const unsigned long clflush_size = boot_cpu_data.x86_clflush_size; + void *p = (void *)((unsigned long)vaddr & ~(clflush_size - 1)); void *vend = vaddr + size; - void *p; + + if (p >= vend) + return; mb(); - for (p = (void *)((unsigned long)vaddr & ~clflush_mask); - p < vend; p += boot_cpu_data.x86_clflush_size) + for (; p < vend; p += clflush_size) clflushopt(p); mb(); diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 188e3e07eeeb..f4ae536b0914 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -12,6 +12,7 @@ #include <linux/debugfs.h> #include <linux/kernel.h> #include <linux/module.h> +#include <linux/pfn_t.h> #include <linux/slab.h> #include <linux/mm.h> #include <linux/fs.h> @@ -586,7 +587,7 @@ int free_memtype(u64 start, u64 end) entry = rbt_memtype_erase(start, end); spin_unlock(&memtype_lock); - if (!entry) { + if (IS_ERR(entry)) { pr_info("x86/PAT: %s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n", current->comm, current->pid, start, end - 1); return -EINVAL; @@ -949,7 +950,7 @@ int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, } int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, - unsigned long pfn) + pfn_t pfn) { enum page_cache_mode pcm; @@ -957,7 +958,7 @@ int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, return 0; /* Set prot based on lookup */ - pcm = lookup_memtype((resource_size_t)pfn << PAGE_SHIFT); + pcm = lookup_memtype(pfn_t_to_phys(pfn)); *prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) | cachemode2protval(pcm)); @@ -992,6 +993,16 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, vma->vm_flags &= ~VM_PAT; } +/* + * untrack_pfn_moved is called, while mremapping a pfnmap for a new region, + * with the old vma after its pfnmap page table has been removed. The new + * vma has a new pfnmap to the same pfn & cache type with VM_PAT set. + */ +void untrack_pfn_moved(struct vm_area_struct *vma) +{ + vma->vm_flags &= ~VM_PAT; +} + pgprot_t pgprot_writecombine(pgprot_t prot) { return __pgprot(pgprot_val(prot) | diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c index 63931080366a..2f7702253ccf 100644 --- a/arch/x86/mm/pat_rbtree.c +++ b/arch/x86/mm/pat_rbtree.c @@ -98,8 +98,13 @@ static struct memtype *memtype_rb_lowest_match(struct rb_root *root, return last_lower; /* Returns NULL if there is no overlap */ } -static struct memtype *memtype_rb_exact_match(struct rb_root *root, - u64 start, u64 end) +enum { + MEMTYPE_EXACT_MATCH = 0, + MEMTYPE_END_MATCH = 1 +}; + +static struct memtype *memtype_rb_match(struct rb_root *root, + u64 start, u64 end, int match_type) { struct memtype *match; @@ -107,7 +112,12 @@ static struct memtype *memtype_rb_exact_match(struct rb_root *root, while (match != NULL && match->start < end) { struct rb_node *node; - if (match->start == start && match->end == end) + if ((match_type == MEMTYPE_EXACT_MATCH) && + (match->start == start) && (match->end == end)) + return match; + + if ((match_type == MEMTYPE_END_MATCH) && + (match->start < start) && (match->end == end)) return match; node = rb_next(&match->rb); @@ -117,7 +127,7 @@ static struct memtype *memtype_rb_exact_match(struct rb_root *root, match = NULL; } - return NULL; /* Returns NULL if there is no exact match */ + return NULL; /* Returns NULL if there is no match */ } static int memtype_rb_check_conflict(struct rb_root *root, @@ -210,12 +220,36 @@ struct memtype *rbt_memtype_erase(u64 start, u64 end) { struct memtype *data; - data = memtype_rb_exact_match(&memtype_rbroot, start, end); - if (!data) - goto out; + /* + * Since the memtype_rbroot tree allows overlapping ranges, + * rbt_memtype_erase() checks with EXACT_MATCH first, i.e. free + * a whole node for the munmap case. If no such entry is found, + * it then checks with END_MATCH, i.e. shrink the size of a node + * from the end for the mremap case. + */ + data = memtype_rb_match(&memtype_rbroot, start, end, + MEMTYPE_EXACT_MATCH); + if (!data) { + data = memtype_rb_match(&memtype_rbroot, start, end, + MEMTYPE_END_MATCH); + if (!data) + return ERR_PTR(-EINVAL); + } + + if (data->start == start) { + /* munmap: erase this node */ + rb_erase_augmented(&data->rb, &memtype_rbroot, + &memtype_rb_augment_cb); + } else { + /* mremap: update the end value of this node */ + rb_erase_augmented(&data->rb, &memtype_rbroot, + &memtype_rb_augment_cb); + data->end = start; + data->subtree_max_end = data->end; + memtype_rb_insert(&memtype_rbroot, data); + return NULL; + } - rb_erase_augmented(&data->rb, &memtype_rbroot, &memtype_rb_augment_cb); -out: return data; } diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index fb0a9dd1d6e4..4eb287e25043 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -414,7 +414,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma, if (changed && dirty) { *ptep = entry; - pte_update_defer(vma->vm_mm, address, ptep); + pte_update(vma->vm_mm, address, ptep); } return changed; @@ -431,7 +431,6 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, if (changed && dirty) { *pmdp = entry; - pmd_update_defer(vma->vm_mm, address, pmdp); /* * We had a write-protection fault here and changed the pmd * to to more permissive. No need to flush the TLB for that, @@ -469,9 +468,6 @@ int pmdp_test_and_clear_young(struct vm_area_struct *vma, ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, (unsigned long *)pmdp); - if (ret) - pmd_update(vma->vm_mm, addr, pmdp); - return ret; } #endif @@ -509,20 +505,6 @@ int pmdp_clear_flush_young(struct vm_area_struct *vma, return young; } - -void pmdp_splitting_flush(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp) -{ - int set; - VM_BUG_ON(address & ~HPAGE_PMD_MASK); - set = !test_and_set_bit(_PAGE_BIT_SPLITTING, - (unsigned long *)pmdp); - if (set) { - pmd_update(vma->vm_mm, address, pmdp); - /* need tlb flush only to serialize against gup-fast */ - flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); - } -} #endif /** diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c index 90555bf60aa4..92e2eacb3321 100644 --- a/arch/x86/mm/setup_nx.c +++ b/arch/x86/mm/setup_nx.c @@ -31,7 +31,7 @@ early_param("noexec", noexec_setup); void x86_configure_nx(void) { - if (cpu_has_nx && !disable_nx) + if (boot_cpu_has(X86_FEATURE_NX) && !disable_nx) __supported_pte_mask |= _PAGE_NX; else __supported_pte_mask &= ~_PAGE_NX; @@ -39,7 +39,7 @@ void x86_configure_nx(void) void __init x86_report_nx(void) { - if (!cpu_has_nx) { + if (!boot_cpu_has(X86_FEATURE_NX)) { printk(KERN_NOTICE "Notice: NX (Execute Disable) protection " "missing in CPU!\n"); } else { diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c index c2aea63bee20..b5f821881465 100644 --- a/arch/x86/mm/srat.c +++ b/arch/x86/mm/srat.c @@ -203,6 +203,8 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) pr_warn("SRAT: Failed to mark hotplug range [mem %#010Lx-%#010Lx] in memblock\n", (unsigned long long)start, (unsigned long long)end - 1); + max_possible_pfn = max(max_possible_pfn, PFN_UP(end - 1)); + return 0; out_err_bad_srat: bad_srat(); diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 8ddb5d0d66fb..8f4cc3dfac32 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -161,7 +161,10 @@ void flush_tlb_current_task(void) preempt_disable(); count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); + + /* This is an implicit full barrier that synchronizes with switch_mm. */ local_flush_tlb(); + trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL); if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL); @@ -188,17 +191,29 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, unsigned long base_pages_to_flush = TLB_FLUSH_ALL; preempt_disable(); - if (current->active_mm != mm) + if (current->active_mm != mm) { + /* Synchronize with switch_mm. */ + smp_mb(); + goto out; + } if (!current->mm) { leave_mm(smp_processor_id()); + + /* Synchronize with switch_mm. */ + smp_mb(); + goto out; } if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB)) base_pages_to_flush = (end - start) >> PAGE_SHIFT; + /* + * Both branches below are implicit full barriers (MOV to CR or + * INVLPG) that synchronize with switch_mm. + */ if (base_pages_to_flush > tlb_single_page_flush_ceiling) { base_pages_to_flush = TLB_FLUSH_ALL; count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); @@ -228,10 +243,18 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long start) preempt_disable(); if (current->active_mm == mm) { - if (current->mm) + if (current->mm) { + /* + * Implicit full barrier (INVLPG) that synchronizes + * with switch_mm. + */ __flush_tlb_one(start); - else + } else { leave_mm(smp_processor_id()); + + /* Synchronize with switch_mm. */ + smp_mb(); + } } if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 75991979f667..4286f3618bd0 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -193,7 +193,7 @@ struct jit_context { 32 /* space for rbx, r13, r14, r15 */ + \ 8 /* space for skb_copy_bits() buffer */) -#define PROLOGUE_SIZE 51 +#define PROLOGUE_SIZE 48 /* emit x64 prologue code for BPF program and check it's size. * bpf_tail_call helper will skip it while jumping into another program @@ -229,11 +229,15 @@ static void emit_prologue(u8 **pprog) /* mov qword ptr [rbp-X],r15 */ EMIT3_off32(0x4C, 0x89, 0xBD, -STACKSIZE + 24); - /* clear A and X registers */ - EMIT2(0x31, 0xc0); /* xor eax, eax */ - EMIT3(0x4D, 0x31, 0xED); /* xor r13, r13 */ + /* Clear the tail call counter (tail_call_cnt): for eBPF tail calls + * we need to reset the counter to 0. It's done in two instructions, + * resetting rax register to 0 (xor on eax gets 0 extended), and + * moving it to the counter location. + */ - /* clear tail_cnt: mov qword ptr [rbp-X], rax */ + /* xor eax, eax */ + EMIT2(0x31, 0xc0); + /* mov qword ptr [rbp-X], rax */ EMIT3_off32(0x48, 0x89, 0x85, -STACKSIZE + 32); BUILD_BUG_ON(cnt != PROLOGUE_SIZE); @@ -455,6 +459,18 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, } case BPF_ALU | BPF_MOV | BPF_K: + /* optimization: if imm32 is zero, use 'xor <dst>,<dst>' + * to save 3 bytes. + */ + if (imm32 == 0) { + if (is_ereg(dst_reg)) + EMIT1(add_2mod(0x40, dst_reg, dst_reg)); + b2 = 0x31; /* xor */ + b3 = 0xC0; + EMIT2(b2, add_2reg(b3, dst_reg, dst_reg)); + break; + } + /* mov %eax, imm32 */ if (is_ereg(dst_reg)) EMIT1(add_1mod(0x40, dst_reg)); @@ -469,6 +485,20 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, return -EINVAL; } + /* optimization: if imm64 is zero, use 'xor <dst>,<dst>' + * to save 7 bytes. + */ + if (insn[0].imm == 0 && insn[1].imm == 0) { + b1 = add_2mod(0x48, dst_reg, dst_reg); + b2 = 0x31; /* xor */ + b3 = 0xC0; + EMIT3(b1, b2, add_2reg(b3, dst_reg, dst_reg)); + + insn++; + i++; + break; + } + /* movabsq %rax, imm64 */ EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg)); EMIT(insn[0].imm, 4); diff --git a/arch/x86/platform/atom/punit_atom_debug.c b/arch/x86/platform/atom/punit_atom_debug.c index 5ca8ead91579..81c769e80614 100644 --- a/arch/x86/platform/atom/punit_atom_debug.c +++ b/arch/x86/platform/atom/punit_atom_debug.c @@ -25,8 +25,6 @@ #include <asm/cpu_device_id.h> #include <asm/iosf_mbi.h> -/* Side band Interface port */ -#define PUNIT_PORT 0x04 /* Power gate status reg */ #define PWRGT_STATUS 0x61 /* Subsystem config/status Video processor */ @@ -85,9 +83,8 @@ static int punit_dev_state_show(struct seq_file *seq_file, void *unused) seq_puts(seq_file, "\n\nPUNIT NORTH COMPLEX DEVICES :\n"); while (punit_devp->name) { - status = iosf_mbi_read(PUNIT_PORT, BT_MBI_PMC_READ, - punit_devp->reg, - &punit_pwr_status); + status = iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_REG_READ, + punit_devp->reg, &punit_pwr_status); if (status) { seq_printf(seq_file, "%9s : Read Failed\n", punit_devp->name); diff --git a/arch/x86/platform/intel-quark/imr.c b/arch/x86/platform/intel-quark/imr.c index 0ee619f9fcb7..c1bdafaac3ca 100644 --- a/arch/x86/platform/intel-quark/imr.c +++ b/arch/x86/platform/intel-quark/imr.c @@ -111,23 +111,19 @@ static int imr_read(struct imr_device *idev, u32 imr_id, struct imr_regs *imr) u32 reg = imr_id * IMR_NUM_REGS + idev->reg_base; int ret; - ret = iosf_mbi_read(QRK_MBI_UNIT_MM, QRK_MBI_MM_READ, - reg++, &imr->addr_lo); + ret = iosf_mbi_read(QRK_MBI_UNIT_MM, MBI_REG_READ, reg++, &imr->addr_lo); if (ret) return ret; - ret = iosf_mbi_read(QRK_MBI_UNIT_MM, QRK_MBI_MM_READ, - reg++, &imr->addr_hi); + ret = iosf_mbi_read(QRK_MBI_UNIT_MM, MBI_REG_READ, reg++, &imr->addr_hi); if (ret) return ret; - ret = iosf_mbi_read(QRK_MBI_UNIT_MM, QRK_MBI_MM_READ, - reg++, &imr->rmask); + ret = iosf_mbi_read(QRK_MBI_UNIT_MM, MBI_REG_READ, reg++, &imr->rmask); if (ret) return ret; - return iosf_mbi_read(QRK_MBI_UNIT_MM, QRK_MBI_MM_READ, - reg++, &imr->wmask); + return iosf_mbi_read(QRK_MBI_UNIT_MM, MBI_REG_READ, reg++, &imr->wmask); } /** @@ -151,31 +147,27 @@ static int imr_write(struct imr_device *idev, u32 imr_id, local_irq_save(flags); - ret = iosf_mbi_write(QRK_MBI_UNIT_MM, QRK_MBI_MM_WRITE, reg++, - imr->addr_lo); + ret = iosf_mbi_write(QRK_MBI_UNIT_MM, MBI_REG_WRITE, reg++, imr->addr_lo); if (ret) goto failed; - ret = iosf_mbi_write(QRK_MBI_UNIT_MM, QRK_MBI_MM_WRITE, - reg++, imr->addr_hi); + ret = iosf_mbi_write(QRK_MBI_UNIT_MM, MBI_REG_WRITE, reg++, imr->addr_hi); if (ret) goto failed; - ret = iosf_mbi_write(QRK_MBI_UNIT_MM, QRK_MBI_MM_WRITE, - reg++, imr->rmask); + ret = iosf_mbi_write(QRK_MBI_UNIT_MM, MBI_REG_WRITE, reg++, imr->rmask); if (ret) goto failed; - ret = iosf_mbi_write(QRK_MBI_UNIT_MM, QRK_MBI_MM_WRITE, - reg++, imr->wmask); + ret = iosf_mbi_write(QRK_MBI_UNIT_MM, MBI_REG_WRITE, reg++, imr->wmask); if (ret) goto failed; /* Lock bit must be set separately to addr_lo address bits. */ if (lock) { imr->addr_lo |= IMR_LOCK; - ret = iosf_mbi_write(QRK_MBI_UNIT_MM, QRK_MBI_MM_WRITE, - reg - IMR_NUM_REGS, imr->addr_lo); + ret = iosf_mbi_write(QRK_MBI_UNIT_MM, MBI_REG_WRITE, + reg - IMR_NUM_REGS, imr->addr_lo); if (ret) goto failed; } diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c index 327f21c3bde1..8dd80050d705 100644 --- a/arch/x86/platform/uv/uv_nmi.c +++ b/arch/x86/platform/uv/uv_nmi.c @@ -28,6 +28,7 @@ #include <linux/nmi.h> #include <linux/sched.h> #include <linux/slab.h> +#include <linux/clocksource.h> #include <asm/apic.h> #include <asm/current.h> diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 9ab52791fed5..d5f64996394a 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -23,6 +23,7 @@ #include <asm/debugreg.h> #include <asm/cpu.h> #include <asm/mmu_context.h> +#include <linux/dmi.h> #ifdef CONFIG_X86_32 __visible unsigned long saved_context_ebx; @@ -32,6 +33,29 @@ __visible unsigned long saved_context_eflags; #endif struct saved_context saved_context; +static void msr_save_context(struct saved_context *ctxt) +{ + struct saved_msr *msr = ctxt->saved_msrs.array; + struct saved_msr *end = msr + ctxt->saved_msrs.num; + + while (msr < end) { + msr->valid = !rdmsrl_safe(msr->info.msr_no, &msr->info.reg.q); + msr++; + } +} + +static void msr_restore_context(struct saved_context *ctxt) +{ + struct saved_msr *msr = ctxt->saved_msrs.array; + struct saved_msr *end = msr + ctxt->saved_msrs.num; + + while (msr < end) { + if (msr->valid) + wrmsrl(msr->info.msr_no, msr->info.reg.q); + msr++; + } +} + /** * __save_processor_state - save CPU registers before creating a * hibernation image and before restoring the memory state from it @@ -111,6 +135,7 @@ static void __save_processor_state(struct saved_context *ctxt) #endif ctxt->misc_enable_saved = !rdmsrl_safe(MSR_IA32_MISC_ENABLE, &ctxt->misc_enable); + msr_save_context(ctxt); } /* Needed by apm.c */ @@ -229,6 +254,7 @@ static void notrace __restore_processor_state(struct saved_context *ctxt) x86_platform.restore_sched_clock_state(); mtrr_bp_restore(); perf_restore_debug_store(); + msr_restore_context(ctxt); } /* Needed by apm.c */ @@ -320,3 +346,69 @@ static int __init bsp_pm_check_init(void) } core_initcall(bsp_pm_check_init); + +static int msr_init_context(const u32 *msr_id, const int total_num) +{ + int i = 0; + struct saved_msr *msr_array; + + if (saved_context.saved_msrs.array || saved_context.saved_msrs.num > 0) { + pr_err("x86/pm: MSR quirk already applied, please check your DMI match table.\n"); + return -EINVAL; + } + + msr_array = kmalloc_array(total_num, sizeof(struct saved_msr), GFP_KERNEL); + if (!msr_array) { + pr_err("x86/pm: Can not allocate memory to save/restore MSRs during suspend.\n"); + return -ENOMEM; + } + + for (i = 0; i < total_num; i++) { + msr_array[i].info.msr_no = msr_id[i]; + msr_array[i].valid = false; + msr_array[i].info.reg.q = 0; + } + saved_context.saved_msrs.num = total_num; + saved_context.saved_msrs.array = msr_array; + + return 0; +} + +/* + * The following section is a quirk framework for problematic BIOSen: + * Sometimes MSRs are modified by the BIOSen after suspended to + * RAM, this might cause unexpected behavior after wakeup. + * Thus we save/restore these specified MSRs across suspend/resume + * in order to work around it. + * + * For any further problematic BIOSen/platforms, + * please add your own function similar to msr_initialize_bdw. + */ +static int msr_initialize_bdw(const struct dmi_system_id *d) +{ + /* Add any extra MSR ids into this array. */ + u32 bdw_msr_id[] = { MSR_IA32_THERM_CONTROL }; + + pr_info("x86/pm: %s detected, MSR saving is needed during suspending.\n", d->ident); + return msr_init_context(bdw_msr_id, ARRAY_SIZE(bdw_msr_id)); +} + +static struct dmi_system_id msr_save_dmi_table[] = { + { + .callback = msr_initialize_bdw, + .ident = "BROADWELL BDX_EP", + .matches = { + DMI_MATCH(DMI_PRODUCT_NAME, "GRANTLEY"), + DMI_MATCH(DMI_PRODUCT_VERSION, "E63448-400"), + }, + }, + {} +}; + +static int pm_check_save_msr(void) +{ + dmi_check_system(msr_save_dmi_table); + return 0; +} + +device_initcall(pm_check_save_msr); diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile index a8fecc226946..3ee2bb6b440b 100644 --- a/arch/x86/um/Makefile +++ b/arch/x86/um/Makefile @@ -17,7 +17,7 @@ obj-y = bug.o bugs_$(BITS).o delay.o fault.o ksyms.o ldt.o \ ifeq ($(CONFIG_X86_32),y) obj-y += checksum_32.o -obj-$(CONFIG_BINFMT_ELF) += elfcore.o +obj-$(CONFIG_ELF_CORE) += elfcore.o subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o subarch-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += ../lib/rwsem.o diff --git a/arch/x86/um/asm/syscall.h b/arch/x86/um/asm/syscall.h index 81d6562ce01d..11ab90dc5f14 100644 --- a/arch/x86/um/asm/syscall.h +++ b/arch/x86/um/asm/syscall.h @@ -1,6 +1,7 @@ #ifndef __UM_ASM_SYSCALL_H #define __UM_ASM_SYSCALL_H +#include <asm/syscall-generic.h> #include <uapi/linux/audit.h> typedef asmlinkage long (*sys_call_ptr_t)(unsigned long, unsigned long, diff --git a/arch/x86/um/ptrace_32.c b/arch/x86/um/ptrace_32.c index a29756f2d940..47c78d5e5c32 100644 --- a/arch/x86/um/ptrace_32.c +++ b/arch/x86/um/ptrace_32.c @@ -68,6 +68,7 @@ static const int reg_offsets[] = { [EFL] = HOST_EFLAGS, [UESP] = HOST_SP, [SS] = HOST_SS, + [ORIG_EAX] = HOST_ORIG_AX, }; int putreg(struct task_struct *child, int regno, unsigned long value) @@ -83,6 +84,7 @@ int putreg(struct task_struct *child, int regno, unsigned long value) case EAX: case EIP: case UESP: + case ORIG_EAX: break; case FS: if (value && (value & 3) != 3) @@ -108,9 +110,6 @@ int putreg(struct task_struct *child, int regno, unsigned long value) value &= FLAG_MASK; child->thread.regs.regs.gp[HOST_EFLAGS] |= value; return 0; - case ORIG_EAX: - child->thread.regs.regs.syscall = value; - return 0; default : panic("Bad register in putreg() : %d\n", regno); } @@ -143,8 +142,6 @@ unsigned long getreg(struct task_struct *child, int regno) regno >>= 2; switch (regno) { - case ORIG_EAX: - return child->thread.regs.regs.syscall; case FS: case GS: case DS: @@ -163,6 +160,7 @@ unsigned long getreg(struct task_struct *child, int regno) case EDI: case EBP: case EFL: + case ORIG_EAX: break; default: panic("Bad register in getreg() : %d\n", regno); diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c index acda713ab5be..abf4901c917b 100644 --- a/arch/x86/xen/apic.c +++ b/arch/x86/xen/apic.c @@ -64,7 +64,7 @@ static u32 xen_apic_read(u32 reg) if (reg != APIC_ID) return 0; - ret = HYPERVISOR_dom0_op(&op); + ret = HYPERVISOR_platform_op(&op); if (ret) return 0; diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index b7de78bdc09c..d09e4c9d7cc5 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -415,7 +415,7 @@ static bool __init xen_check_mwait(void) set_xen_guest_handle(op.u.set_pminfo.pdc, buf); - if ((HYPERVISOR_dom0_op(&op) == 0) && + if ((HYPERVISOR_platform_op(&op) == 0) && (buf[2] & (ACPI_PDC_C_C1_FFH | ACPI_PDC_C_C2C3_FFH))) { cpuid_leaf5_ecx_val = cx; cpuid_leaf5_edx_val = dx; @@ -1229,10 +1229,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .iret = xen_iret, #ifdef CONFIG_X86_64 - .usergs_sysret32 = xen_sysret32, .usergs_sysret64 = xen_sysret64, -#else - .irq_enable_sysexit = xen_sysexit, #endif .load_tr_desc = paravirt_nop, @@ -1265,12 +1262,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .end_context_switch = xen_end_context_switch, }; -static const struct pv_apic_ops xen_apic_ops __initconst = { -#ifdef CONFIG_X86_LOCAL_APIC - .startup_ipi_hook = paravirt_nop, -#endif -}; - static void xen_reboot(int reason) { struct sched_shutdown r = { .reason = reason }; @@ -1374,7 +1365,7 @@ static void __init xen_boot_params_init_edd(void) info->params.length = sizeof(info->params); set_xen_guest_handle(op.u.firmware_info.u.disk_info.edd_params, &info->params); - ret = HYPERVISOR_dom0_op(&op); + ret = HYPERVISOR_platform_op(&op); if (ret) break; @@ -1392,7 +1383,7 @@ static void __init xen_boot_params_init_edd(void) op.u.firmware_info.type = XEN_FW_DISK_MBR_SIGNATURE; for (nr = 0; nr < EDD_MBR_SIG_MAX; nr++) { op.u.firmware_info.index = nr; - ret = HYPERVISOR_dom0_op(&op); + ret = HYPERVISOR_platform_op(&op); if (ret) break; mbr_signature[nr] = op.u.firmware_info.u.disk_mbr_signature.mbr_signature; @@ -1538,7 +1529,6 @@ asmlinkage __visible void __init xen_start_kernel(void) if (xen_initial_domain()) pv_info.features |= PV_SUPPORTED_RTC; pv_init_ops = xen_init_ops; - pv_apic_ops = xen_apic_ops; if (!xen_pvh_domain()) { pv_cpu_ops = xen_cpu_ops; @@ -1700,7 +1690,7 @@ asmlinkage __visible void __init xen_start_kernel(void) xen_start_info->console.domU.mfn = 0; xen_start_info->console.domU.evtchn = 0; - if (HYPERVISOR_dom0_op(&op) == 0) + if (HYPERVISOR_platform_op(&op) == 0) boot_params.kbd_status = op.u.firmware_info.u.kbd_shift_flags; /* Make sure ACS will be enabled */ diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index cb5e266a8bf7..c913ca4f6958 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -2436,7 +2436,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { .flush_tlb_others = xen_flush_tlb_others, .pte_update = paravirt_nop, - .pte_update_defer = paravirt_nop, .pgd_alloc = xen_pgd_alloc, .pgd_free = xen_pgd_free, diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index df0c40559583..7f664c416faf 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -34,7 +34,8 @@ static void xen_hvm_post_suspend(int suspend_cancelled) { #ifdef CONFIG_XEN_PVHVM int cpu; - xen_hvm_init_shared_info(); + if (!suspend_cancelled) + xen_hvm_init_shared_info(); xen_callback_vector(); xen_unplug_emulated_devices(); if (xen_feature(XENFEAT_hvm_safe_pvclock)) { diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index f1ba6a092854..a0a4e554c6f1 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -16,6 +16,7 @@ #include <linux/gfp.h> #include <linux/slab.h> #include <linux/pvclock_gtod.h> +#include <linux/timekeeper_internal.h> #include <asm/pvclock.h> #include <asm/xen/hypervisor.h> @@ -32,86 +33,12 @@ #define TIMER_SLOP 100000 #define NS_PER_TICK (1000000000LL / HZ) -/* runstate info updated by Xen */ -static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate); - /* snapshots of runstate info */ static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot); /* unused ns of stolen time */ static DEFINE_PER_CPU(u64, xen_residual_stolen); -/* return an consistent snapshot of 64-bit time/counter value */ -static u64 get64(const u64 *p) -{ - u64 ret; - - if (BITS_PER_LONG < 64) { - u32 *p32 = (u32 *)p; - u32 h, l; - - /* - * Read high then low, and then make sure high is - * still the same; this will only loop if low wraps - * and carries into high. - * XXX some clean way to make this endian-proof? - */ - do { - h = p32[1]; - barrier(); - l = p32[0]; - barrier(); - } while (p32[1] != h); - - ret = (((u64)h) << 32) | l; - } else - ret = *p; - - return ret; -} - -/* - * Runstate accounting - */ -static void get_runstate_snapshot(struct vcpu_runstate_info *res) -{ - u64 state_time; - struct vcpu_runstate_info *state; - - BUG_ON(preemptible()); - - state = this_cpu_ptr(&xen_runstate); - - /* - * The runstate info is always updated by the hypervisor on - * the current CPU, so there's no need to use anything - * stronger than a compiler barrier when fetching it. - */ - do { - state_time = get64(&state->state_entry_time); - barrier(); - *res = *state; - barrier(); - } while (get64(&state->state_entry_time) != state_time); -} - -/* return true when a vcpu could run but has no real cpu to run on */ -bool xen_vcpu_stolen(int vcpu) -{ - return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable; -} - -void xen_setup_runstate_info(int cpu) -{ - struct vcpu_register_runstate_memory_area area; - - area.addr.v = &per_cpu(xen_runstate, cpu); - - if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, - cpu, &area)) - BUG(); -} - static void do_stolen_accounting(void) { struct vcpu_runstate_info state; @@ -119,7 +46,7 @@ static void do_stolen_accounting(void) s64 runnable, offline, stolen; cputime_t ticks; - get_runstate_snapshot(&state); + xen_get_runstate_snapshot(&state); WARN_ON(state.state != RUNSTATE_running); @@ -194,26 +121,46 @@ static int xen_pvclock_gtod_notify(struct notifier_block *nb, unsigned long was_set, void *priv) { /* Protected by the calling core code serialization */ - static struct timespec next_sync; + static struct timespec64 next_sync; struct xen_platform_op op; - struct timespec now; + struct timespec64 now; + struct timekeeper *tk = priv; + static bool settime64_supported = true; + int ret; - now = __current_kernel_time(); + now.tv_sec = tk->xtime_sec; + now.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift); /* * We only take the expensive HV call when the clock was set * or when the 11 minutes RTC synchronization time elapsed. */ - if (!was_set && timespec_compare(&now, &next_sync) < 0) + if (!was_set && timespec64_compare(&now, &next_sync) < 0) return NOTIFY_OK; - op.cmd = XENPF_settime; - op.u.settime.secs = now.tv_sec; - op.u.settime.nsecs = now.tv_nsec; - op.u.settime.system_time = xen_clocksource_read(); +again: + if (settime64_supported) { + op.cmd = XENPF_settime64; + op.u.settime64.mbz = 0; + op.u.settime64.secs = now.tv_sec; + op.u.settime64.nsecs = now.tv_nsec; + op.u.settime64.system_time = xen_clocksource_read(); + } else { + op.cmd = XENPF_settime32; + op.u.settime32.secs = now.tv_sec; + op.u.settime32.nsecs = now.tv_nsec; + op.u.settime32.system_time = xen_clocksource_read(); + } + + ret = HYPERVISOR_platform_op(&op); - (void)HYPERVISOR_dom0_op(&op); + if (ret == -ENOSYS && settime64_supported) { + settime64_supported = false; + goto again; + } + if (ret < 0) + return NOTIFY_BAD; /* * Move the next drift compensation time 11 minutes diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S index fd92a64d748e..feb6d40a0860 100644 --- a/arch/x86/xen/xen-asm_32.S +++ b/arch/x86/xen/xen-asm_32.S @@ -35,20 +35,6 @@ check_events: ret /* - * We can't use sysexit directly, because we're not running in ring0. - * But we can easily fake it up using iret. Assuming xen_sysexit is - * jumped to with a standard stack frame, we can just strip it back to - * a standard iret frame and use iret. - */ -ENTRY(xen_sysexit) - movl PT_EAX(%esp), %eax /* Shouldn't be necessary? */ - orl $X86_EFLAGS_IF, PT_EFLAGS(%esp) - lea PT_EIP(%esp), %esp - - jmp xen_iret -ENDPROC(xen_sysexit) - -/* * This is run where a normal iret would be run, with the same stack setup: * 8: eflags * 4: cs diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index f22667abf7b9..cc8acc410ddb 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -68,25 +68,6 @@ ENTRY(xen_sysret64) ENDPATCH(xen_sysret64) RELOC(xen_sysret64, 1b+1) -ENTRY(xen_sysret32) - /* - * We're already on the usermode stack at this point, but - * still with the kernel gs, so we can easily switch back - */ - movq %rsp, PER_CPU_VAR(rsp_scratch) - movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp - - pushq $__USER32_DS - pushq PER_CPU_VAR(rsp_scratch) - pushq %r11 - pushq $__USER32_CS - pushq %rcx - - pushq $0 -1: jmp hypercall_iret -ENDPATCH(xen_sysret32) -RELOC(xen_sysret32, 1b+1) - /* * Xen handles syscall callbacks much like ordinary exceptions, which * means we have: diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 1399423f3418..4140b070f2e9 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -139,9 +139,6 @@ DECL_ASM(void, xen_restore_fl_direct, unsigned long); /* These are not functions, and cannot be called normally */ __visible void xen_iret(void); -#ifdef CONFIG_X86_32 -__visible void xen_sysexit(void); -#endif __visible void xen_sysret32(void); __visible void xen_sysret64(void); __visible void xen_adjust_exception_frame(void); diff --git a/arch/xtensa/include/uapi/asm/mman.h b/arch/xtensa/include/uapi/asm/mman.h index 360944e1da52..d030594ed22b 100644 --- a/arch/xtensa/include/uapi/asm/mman.h +++ b/arch/xtensa/include/uapi/asm/mman.h @@ -86,8 +86,10 @@ #define MADV_SEQUENTIAL 2 /* expect sequential page references */ #define MADV_WILLNEED 3 /* will need these pages */ #define MADV_DONTNEED 4 /* don't need these pages */ +#define MADV_FREE 5 /* free pages only if memory pressure */ /* common parameters: try to keep these consistent across architectures */ +#define MADV_FREE 8 /* free pages only if memory pressure */ #define MADV_REMOVE 9 /* remove these pages & resources */ #define MADV_DONTFORK 10 /* don't inherit across fork */ #define MADV_DOFORK 11 /* do inherit across fork */ diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h index 4120af086160..fd3b96d1153f 100644 --- a/arch/xtensa/include/uapi/asm/socket.h +++ b/arch/xtensa/include/uapi/asm/socket.h @@ -96,4 +96,7 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* _XTENSA_SOCKET_H */ diff --git a/arch/xtensa/mm/tlb.c b/arch/xtensa/mm/tlb.c index 5ece856c5725..35c822286bbe 100644 --- a/arch/xtensa/mm/tlb.c +++ b/arch/xtensa/mm/tlb.c @@ -245,7 +245,7 @@ static int check_tlb_entry(unsigned w, unsigned e, bool dtlb) page_mapcount(p)); if (!page_count(p)) rc |= TLB_INSANE; - else if (page_mapped(p)) + else if (page_mapcount(p)) rc |= TLB_SUSPICIOUS; } else { rc |= TLB_INSANE; diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c index 3c3ace2c46b6..f58a4e6472cb 100644 --- a/arch/xtensa/platforms/iss/simdisk.c +++ b/arch/xtensa/platforms/iss/simdisk.c @@ -227,16 +227,12 @@ static ssize_t proc_read_simdisk(struct file *file, char __user *buf, static ssize_t proc_write_simdisk(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - char *tmp = kmalloc(count + 1, GFP_KERNEL); + char *tmp = memdup_user_nul(buf, count); struct simdisk *dev = PDE_DATA(file_inode(file)); int err; - if (tmp == NULL) - return -ENOMEM; - if (copy_from_user(tmp, buf, count)) { - err = -EFAULT; - goto out_free; - } + if (IS_ERR(tmp)) + return PTR_ERR(tmp); err = simdisk_detach(dev); if (err != 0) @@ -244,8 +240,6 @@ static ssize_t proc_write_simdisk(struct file *file, const char __user *buf, if (count > 0 && tmp[count - 1] == '\n') tmp[count - 1] = 0; - else - tmp[count] = 0; if (tmp[0]) err = simdisk_attach(dev, tmp); |