diff options
Diffstat (limited to 'arch/x86')
185 files changed, 6769 insertions, 3026 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a42e2e99caae..b72777ff32a9 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -7,11 +7,13 @@ config 64BIT Say no to build a 32-bit kernel - formerly known as i386 config X86_32 - def_bool !64BIT + def_bool y + depends on !64BIT select CLKSRC_I8253 config X86_64 - def_bool 64BIT + def_bool y + depends on 64BIT select X86_DEV_DMA_OPS ### Arch settings @@ -36,6 +38,7 @@ config X86 select HAVE_KRETPROBES select HAVE_OPTPROBES select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_FENTRY if X86_64 select HAVE_C_RECORDMCOUNT select HAVE_DYNAMIC_FTRACE select HAVE_FUNCTION_TRACER @@ -60,6 +63,8 @@ config X86 select HAVE_MIXED_BREAKPOINTS_REGS select PERF_EVENTS select HAVE_PERF_EVENTS_NMI + select HAVE_PERF_REGS + select HAVE_PERF_USER_STACK_DUMP select ANON_INODES select HAVE_ALIGNED_STRUCT_PAGE if SLUB && !M386 select HAVE_CMPXCHG_LOCAL if !M386 @@ -97,9 +102,12 @@ config X86 select KTIME_SCALAR if X86_32 select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER + select HAVE_RCU_USER_QS if X86_64 + select HAVE_IRQ_TIME_ACCOUNTING config INSTRUCTION_DECODER - def_bool (KPROBES || PERF_EVENTS || UPROBES) + def_bool y + depends on KPROBES || PERF_EVENTS || UPROBES config OUTPUT_FORMAT string @@ -127,13 +135,15 @@ config SBUS bool config NEED_DMA_MAP_STATE - def_bool (X86_64 || INTEL_IOMMU || DMA_API_DEBUG) + def_bool y + depends on X86_64 || INTEL_IOMMU || DMA_API_DEBUG config NEED_SG_DMA_LENGTH def_bool y config GENERIC_ISA_DMA - def_bool ISA_DMA_API + def_bool y + depends on ISA_DMA_API config GENERIC_BUG def_bool y @@ -150,13 +160,16 @@ config GENERIC_GPIO bool config ARCH_MAY_HAVE_PC_FDC - def_bool ISA_DMA_API + def_bool y + depends on ISA_DMA_API config RWSEM_GENERIC_SPINLOCK - def_bool !X86_XADD + def_bool y + depends on !X86_XADD config RWSEM_XCHGADD_ALGORITHM - def_bool X86_XADD + def_bool y + depends on X86_XADD config GENERIC_CALIBRATE_DELAY def_bool y @@ -741,13 +754,14 @@ config SWIOTLB def_bool y if X86_64 ---help--- Support for software bounce buffers used on x86-64 systems - which don't have a hardware IOMMU (e.g. the current generation - of Intel's x86-64 CPUs). Using this PCI devices which can only - access 32-bits of memory can be used on systems with more than - 3 GB of memory. If unsure, say Y. + which don't have a hardware IOMMU. Using this PCI devices + which can only access 32-bits of memory can be used on systems + with more than 3 GB of memory. + If unsure, say Y. config IOMMU_HELPER - def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU) + def_bool y + depends on CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU config MAXSMP bool "Enable Maximum number of SMP Processors and NUMA Nodes" @@ -791,17 +805,6 @@ config SCHED_MC making when dealing with multi-core CPU chips at a cost of slightly increased overhead in some places. If unsure say N here. -config IRQ_TIME_ACCOUNTING - bool "Fine granularity task level IRQ time accounting" - default n - ---help--- - Select this option to enable fine granularity task irq time - accounting. This is done by reading a timestamp on each - transitions between softirq and hardirq state, so there can be a - small performance impact. - - If in doubt, say N here. - source "kernel/Kconfig.preempt" config X86_UP_APIC @@ -866,6 +869,7 @@ config X86_REROUTE_FOR_BROKEN_BOOT_IRQS config X86_MCE bool "Machine Check / overheating reporting" + default y ---help--- Machine Check support allows the processor to notify the kernel if it detects a problem (e.g. overheating, data corruption). @@ -977,25 +981,25 @@ config X86_REBOOTFIXUPS Say N otherwise. config MICROCODE - tristate "/dev/cpu/microcode - microcode support" + tristate "CPU microcode loading support" select FW_LOADER ---help--- + If you say Y here, you will be able to update the microcode on certain Intel and AMD processors. The Intel support is for the - IA32 family, e.g. Pentium Pro, Pentium II, Pentium III, - Pentium 4, Xeon etc. The AMD support is for family 0x10 and - 0x11 processors, e.g. Opteron, Phenom and Turion 64 Ultra. - You will obviously need the actual microcode binary data itself - which is not shipped with the Linux kernel. + IA32 family, e.g. Pentium Pro, Pentium II, Pentium III, Pentium 4, + Xeon etc. The AMD support is for families 0x10 and later. You will + obviously need the actual microcode binary data itself which is not + shipped with the Linux kernel. This option selects the general module only, you need to select at least one vendor specific module as well. - To compile this driver as a module, choose M here: the - module will be called microcode. + To compile this driver as a module, choose M here: the module + will be called microcode. config MICROCODE_INTEL - bool "Intel microcode patch loading support" + bool "Intel microcode loading support" depends on MICROCODE default MICROCODE select FW_LOADER @@ -1008,7 +1012,7 @@ config MICROCODE_INTEL <http://www.urbanmyth.org/microcode/>. config MICROCODE_AMD - bool "AMD microcode patch loading support" + bool "AMD microcode loading support" depends on MICROCODE select FW_LOADER ---help--- @@ -1154,10 +1158,12 @@ config X86_PAE consumes more pagetable space per process. config ARCH_PHYS_ADDR_T_64BIT - def_bool X86_64 || X86_PAE + def_bool y + depends on X86_64 || X86_PAE config ARCH_DMA_ADDR_T_64BIT - def_bool X86_64 || HIGHMEM64G + def_bool y + depends on X86_64 || HIGHMEM64G config DIRECT_GBPAGES bool "Enable 1GB pages for kernel pagetables" if EXPERT @@ -1280,8 +1286,8 @@ config ARCH_SELECT_MEMORY_MODEL depends on ARCH_SPARSEMEM_ENABLE config ARCH_MEMORY_PROBE - def_bool X86_64 - depends on MEMORY_HOTPLUG + def_bool y + depends on X86_64 && MEMORY_HOTPLUG config ARCH_PROC_KCORE_TEXT def_bool y @@ -1482,6 +1488,17 @@ config ARCH_RANDOM If supported, this is a high bandwidth, cryptographically secure hardware random number generator. +config X86_SMAP + def_bool y + prompt "Supervisor Mode Access Prevention" if EXPERT + ---help--- + Supervisor Mode Access Prevention (SMAP) is a security + feature in newer Intel processors. There is a small + performance cost if this enabled and turned on; there is + also a small increase in the kernel size if this is enabled. + + If unsure, say Y. + config EFI bool "EFI runtime service support" depends on ACPI @@ -1970,7 +1987,6 @@ config PCI_MMCONFIG config PCI_CNB20LE_QUIRK bool "Read CNB20LE Host Bridge Windows" if EXPERT - default n depends on PCI && EXPERIMENTAL help Read the PCI windows out of the CNB20LE host bridge. This allows @@ -2181,18 +2197,18 @@ config COMPAT depends on IA32_EMULATION || X86_X32 select ARCH_WANT_OLD_COMPAT_IPC +if COMPAT config COMPAT_FOR_U64_ALIGNMENT - def_bool COMPAT - depends on X86_64 + def_bool y config SYSVIPC_COMPAT def_bool y - depends on COMPAT && SYSVIPC + depends on SYSVIPC config KEYS_COMPAT - bool - depends on COMPAT && KEYS - default y + def_bool y + depends on KEYS +endif endmenu diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 706e12e9984b..f3b86d0df44e 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -306,7 +306,8 @@ config X86_INTERNODE_CACHE_SHIFT default X86_L1_CACHE_SHIFT config X86_CMPXCHG - def_bool X86_64 || (X86_32 && !M386) + def_bool y + depends on X86_64 || (X86_32 && !M386) config X86_L1_CACHE_SHIFT int @@ -317,7 +318,7 @@ config X86_L1_CACHE_SHIFT config X86_XADD def_bool y - depends on X86_64 || !M386 + depends on !M386 config X86_PPRO_FENCE bool "PentiumPro memory ordering errata workaround" diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 682e9c210baa..474ca35b1bce 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -142,7 +142,7 @@ KBUILD_CFLAGS += $(call cc-option,-mno-avx,) KBUILD_CFLAGS += $(mflags-y) KBUILD_AFLAGS += $(mflags-y) -archscripts: +archscripts: scripts_basic $(Q)$(MAKE) $(build)=arch/x86/tools relocs ### diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index f7535bedc33f..ce03476d8c8f 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -37,7 +37,7 @@ setup-y += video-bios.o targets += $(setup-y) hostprogs-y := mkcpustr tools/build -HOST_EXTRACFLAGS += -I$(srctree)/tools/include $(LINUXINCLUDE) \ +HOST_EXTRACFLAGS += -I$(srctree)/tools/include $(USERINCLUDE) \ -D__EXPORTED_HEADERS__ $(obj)/cpu.o: $(obj)/cpustr.h @@ -52,7 +52,7 @@ $(obj)/cpustr.h: $(obj)/mkcpustr FORCE # How to compile the 16-bit code. Note we always compile for -march=i386, # that way we can complain to the user if the CPU is insufficient. -KBUILD_CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \ +KBUILD_CFLAGS := $(USERINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \ -DDISABLE_BRANCH_PROFILING \ -Wall -Wstrict-prototypes \ -march=i386 -mregparm=3 \ diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index e398bb5d63bb..8a84501acb1b 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -28,6 +28,9 @@ VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \ $(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \ $(obj)/piggy.o +$(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone +$(obj)/efi_stub_$(BITS).o: KBUILD_CLFAGS += -fshort-wchar -mno-red-zone + ifeq ($(CONFIG_EFI_STUB), y) VMLINUX_OBJS += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o endif diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index b3e0227df2c9..c760e073963e 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -276,8 +276,9 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, nr_gops = size / sizeof(void *); for (i = 0; i < nr_gops; i++) { struct efi_graphics_output_mode_info *info; - efi_guid_t pciio_proto = EFI_PCI_IO_PROTOCOL_GUID; - void *pciio; + efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID; + bool conout_found = false; + void *dummy; void *h = gop_handle[i]; status = efi_call_phys3(sys_table->boottime->handle_protocol, @@ -285,19 +286,21 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, if (status != EFI_SUCCESS) continue; - efi_call_phys3(sys_table->boottime->handle_protocol, - h, &pciio_proto, &pciio); + status = efi_call_phys3(sys_table->boottime->handle_protocol, + h, &conout_proto, &dummy); + + if (status == EFI_SUCCESS) + conout_found = true; status = efi_call_phys4(gop->query_mode, gop, gop->mode->mode, &size, &info); - if (status == EFI_SUCCESS && (!first_gop || pciio)) { + if (status == EFI_SUCCESS && (!first_gop || conout_found)) { /* - * Apple provide GOPs that are not backed by - * real hardware (they're used to handle - * multiple displays). The workaround is to - * search for a GOP implementing the PCIIO - * protocol, and if one isn't found, to just - * fallback to the first GOP. + * Systems that use the UEFI Console Splitter may + * provide multiple GOP devices, not all of which are + * backed by real hardware. The workaround is to search + * for a GOP implementing the ConOut protocol, and if + * one isn't found, to just fall back to the first GOP. */ width = info->horizontal_resolution; height = info->vertical_resolution; @@ -308,10 +311,10 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, pixels_per_scan_line = info->pixels_per_scan_line; /* - * Once we've found a GOP supporting PCIIO, + * Once we've found a GOP supporting ConOut, * don't bother looking any further. */ - if (pciio) + if (conout_found) break; first_gop = gop; @@ -328,7 +331,6 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, si->lfb_width = width; si->lfb_height = height; si->lfb_base = fb_base; - si->lfb_size = fb_size; si->pages = 1; if (pixel_format == PIXEL_RGB_RESERVED_8BIT_PER_COLOR) { @@ -376,6 +378,10 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, si->rsvd_pos = 0; } + si->lfb_size = si->lfb_linelength * si->lfb_height; + + si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS; + free_handle: efi_call_phys1(sys_table->boottime->free_pool, gop_handle); return status; diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h index 3b6e15627c55..e5b0a8f91c5f 100644 --- a/arch/x86/boot/compressed/eboot.h +++ b/arch/x86/boot/compressed/eboot.h @@ -14,6 +14,10 @@ #define EFI_PAGE_SIZE (1UL << EFI_PAGE_SHIFT) #define EFI_READ_CHUNK_SIZE (1024 * 1024) +#define EFI_CONSOLE_OUT_DEVICE_GUID \ + EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4, 0x9a, 0x46, 0x0, 0x90, 0x27, \ + 0x3f, 0xc1, 0x4d) + #define PIXEL_RGB_RESERVED_8BIT_PER_COLOR 0 #define PIXEL_BGR_RESERVED_8BIT_PER_COLOR 1 #define PIXEL_BIT_MASK 2 diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index b4e15dd6786a..2a017441b8b2 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -32,10 +32,6 @@ SYSSEG = 0x1000 /* historical load address >> 4 */ #define SVGA_MODE ASK_VGA #endif -#ifndef RAMDISK -#define RAMDISK 0 -#endif - #ifndef ROOT_RDONLY #define ROOT_RDONLY 1 #endif diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c index 919257f526f2..4579eff0ef4d 100644 --- a/arch/x86/boot/mkcpustr.c +++ b/arch/x86/boot/mkcpustr.c @@ -15,6 +15,8 @@ #include <stdio.h> +#include "../include/asm/required-features.h" +#include "../include/asm/cpufeature.h" #include "../kernel/cpu/capflags.c" int main(void) diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 119db67dcb03..5598547281a7 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -8,6 +8,8 @@ CONFIG_TASK_DELAY_ACCT=y CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=18 CONFIG_CGROUPS=y CONFIG_CGROUP_FREEZER=y @@ -34,8 +36,6 @@ CONFIG_SGI_PARTITION=y CONFIG_SUN_PARTITION=y CONFIG_KARMA_PARTITION=y CONFIG_EFI_PARTITION=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_SMP=y CONFIG_X86_GENERIC=y CONFIG_HPET_TIMER=y @@ -144,8 +144,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEBUG_DEVRES=y CONFIG_CONNECTOR=y CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=16384 CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=y CONFIG_BLK_DEV_SR_VENDOR=y @@ -231,8 +229,6 @@ CONFIG_SND_HRTIMER=y CONFIG_SND_HDA_INTEL=y CONFIG_SND_HDA_HWDEP=y CONFIG_HIDRAW=y -CONFIG_HID_PID=y -CONFIG_USB_HIDDEV=y CONFIG_HID_GYRATION=y CONFIG_LOGITECH_FF=y CONFIG_HID_NTRIG=y @@ -243,11 +239,11 @@ CONFIG_HID_SAMSUNG=y CONFIG_HID_SONY=y CONFIG_HID_SUNPLUS=y CONFIG_HID_TOPSEED=y +CONFIG_HID_PID=y +CONFIG_USB_HIDDEV=y CONFIG_USB=y CONFIG_USB_DEBUG=y CONFIG_USB_ANNOUNCE_NEW_DEVICES=y -CONFIG_USB_DEVICEFS=y -# CONFIG_USB_DEVICE_CLASS is not set CONFIG_USB_MON=y CONFIG_USB_EHCI_HCD=y # CONFIG_USB_EHCI_TT_NEWSCHED is not set @@ -262,10 +258,9 @@ CONFIG_RTC_CLASS=y CONFIG_DMADEVICES=y CONFIG_EEEPC_LAPTOP=y CONFIG_EFI_VARS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_QUOTA=y CONFIG_QUOTA_NETLINK_INTERFACE=y # CONFIG_PRINT_QUOTA_WARNING is not set @@ -280,7 +275,6 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_HUGETLBFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y @@ -299,13 +293,11 @@ CONFIG_DEBUG_KERNEL=y CONFIG_SCHEDSTATS=y CONFIG_TIMER_STATS=y CONFIG_DEBUG_STACK_USAGE=y -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_PROVIDE_OHCI1394_DMA_INIT=y CONFIG_EARLY_PRINTK_DBGP=y CONFIG_DEBUG_STACKOVERFLOW=y # CONFIG_DEBUG_RODATA_TEST is not set -CONFIG_DEBUG_NX_TEST=m CONFIG_DEBUG_BOOT_PARAMS=y CONFIG_OPTIMIZE_INLINING=y CONFIG_KEYS_DEBUG_PROC_KEYS=y @@ -316,4 +308,3 @@ CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_DISABLE=y CONFIG_CRYPTO_AES_586=y # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_T10DIF=y diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 76eb2903809f..671524d0f6c0 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -8,6 +8,8 @@ CONFIG_TASK_DELAY_ACCT=y CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=18 CONFIG_CGROUPS=y CONFIG_CGROUP_FREEZER=y @@ -34,8 +36,6 @@ CONFIG_SGI_PARTITION=y CONFIG_SUN_PARTITION=y CONFIG_KARMA_PARTITION=y CONFIG_EFI_PARTITION=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_SMP=y CONFIG_CALGARY_IOMMU=y CONFIG_NR_CPUS=64 @@ -144,8 +144,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEBUG_DEVRES=y CONFIG_CONNECTOR=y CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=16384 CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=y CONFIG_BLK_DEV_SR_VENDOR=y @@ -227,8 +225,6 @@ CONFIG_SND_HRTIMER=y CONFIG_SND_HDA_INTEL=y CONFIG_SND_HDA_HWDEP=y CONFIG_HIDRAW=y -CONFIG_HID_PID=y -CONFIG_USB_HIDDEV=y CONFIG_HID_GYRATION=y CONFIG_LOGITECH_FF=y CONFIG_HID_NTRIG=y @@ -239,11 +235,11 @@ CONFIG_HID_SAMSUNG=y CONFIG_HID_SONY=y CONFIG_HID_SUNPLUS=y CONFIG_HID_TOPSEED=y +CONFIG_HID_PID=y +CONFIG_USB_HIDDEV=y CONFIG_USB=y CONFIG_USB_DEBUG=y CONFIG_USB_ANNOUNCE_NEW_DEVICES=y -CONFIG_USB_DEVICEFS=y -# CONFIG_USB_DEVICE_CLASS is not set CONFIG_USB_MON=y CONFIG_USB_EHCI_HCD=y # CONFIG_USB_EHCI_TT_NEWSCHED is not set @@ -262,10 +258,9 @@ CONFIG_AMD_IOMMU_STATS=y CONFIG_INTEL_IOMMU=y # CONFIG_INTEL_IOMMU_DEFAULT_ON is not set CONFIG_EFI_VARS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_QUOTA=y CONFIG_QUOTA_NETLINK_INTERFACE=y # CONFIG_PRINT_QUOTA_WARNING is not set @@ -280,7 +275,6 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_HUGETLBFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y @@ -298,13 +292,11 @@ CONFIG_DEBUG_KERNEL=y CONFIG_SCHEDSTATS=y CONFIG_TIMER_STATS=y CONFIG_DEBUG_STACK_USAGE=y -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_PROVIDE_OHCI1394_DMA_INIT=y CONFIG_EARLY_PRINTK_DBGP=y CONFIG_DEBUG_STACKOVERFLOW=y # CONFIG_DEBUG_RODATA_TEST is not set -CONFIG_DEBUG_NX_TEST=m CONFIG_DEBUG_BOOT_PARAMS=y CONFIG_OPTIMIZE_INLINING=y CONFIG_KEYS_DEBUG_PROC_KEYS=y @@ -314,4 +306,3 @@ CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_DISABLE=y # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_T10DIF=y diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index e908e5de82d3..5bacb4a226ac 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -12,6 +12,8 @@ obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o +obj-$(CONFIG_CRYPTO_CAST5_AVX_X86_64) += cast5-avx-x86_64.o +obj-$(CONFIG_CRYPTO_CAST6_AVX_X86_64) += cast6-avx-x86_64.o obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o @@ -32,6 +34,8 @@ serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o +cast5-avx-x86_64-y := cast5-avx-x86_64-asm_64.o cast5_avx_glue.o +cast6-avx-x86_64-y := cast6-avx-x86_64-asm_64.o cast6_avx_glue.o blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o diff --git a/arch/x86/crypto/aes_glue.c b/arch/x86/crypto/aes_glue.c index 59b37deb8c8d..aafe8ce0d65d 100644 --- a/arch/x86/crypto/aes_glue.c +++ b/arch/x86/crypto/aes_glue.c @@ -40,7 +40,6 @@ static struct crypto_alg aes_alg = { .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crypto_aes_ctx), .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(aes_alg.cra_list), .cra_u = { .cipher = { .cia_min_keysize = AES_MIN_KEY_SIZE, diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 34fdcff4d2c8..7c04d0da709b 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -28,6 +28,9 @@ #include <crypto/aes.h> #include <crypto/cryptd.h> #include <crypto/ctr.h> +#include <crypto/b128ops.h> +#include <crypto/lrw.h> +#include <crypto/xts.h> #include <asm/cpu_device_id.h> #include <asm/i387.h> #include <asm/crypto/aes.h> @@ -41,18 +44,10 @@ #define HAS_CTR #endif -#if defined(CONFIG_CRYPTO_LRW) || defined(CONFIG_CRYPTO_LRW_MODULE) -#define HAS_LRW -#endif - #if defined(CONFIG_CRYPTO_PCBC) || defined(CONFIG_CRYPTO_PCBC_MODULE) #define HAS_PCBC #endif -#if defined(CONFIG_CRYPTO_XTS) || defined(CONFIG_CRYPTO_XTS_MODULE) -#define HAS_XTS -#endif - /* This data is stored at the end of the crypto_tfm struct. * It's a type of per "session" data storage location. * This needs to be 16 byte aligned. @@ -79,6 +74,16 @@ struct aesni_hash_subkey_req_data { #define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1)) #define RFC4106_HASH_SUBKEY_SIZE 16 +struct aesni_lrw_ctx { + struct lrw_table_ctx lrw_table; + u8 raw_aes_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1]; +}; + +struct aesni_xts_ctx { + u8 raw_tweak_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1]; + u8 raw_crypt_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1]; +}; + asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, unsigned int key_len); asmlinkage void aesni_enc(struct crypto_aes_ctx *ctx, u8 *out, @@ -398,13 +403,6 @@ static int ablk_rfc3686_ctr_init(struct crypto_tfm *tfm) #endif #endif -#ifdef HAS_LRW -static int ablk_lrw_init(struct crypto_tfm *tfm) -{ - return ablk_init_common(tfm, "fpu(lrw(__driver-aes-aesni))"); -} -#endif - #ifdef HAS_PCBC static int ablk_pcbc_init(struct crypto_tfm *tfm) { @@ -412,12 +410,160 @@ static int ablk_pcbc_init(struct crypto_tfm *tfm) } #endif -#ifdef HAS_XTS -static int ablk_xts_init(struct crypto_tfm *tfm) +static void lrw_xts_encrypt_callback(void *ctx, u8 *blks, unsigned int nbytes) { - return ablk_init_common(tfm, "fpu(xts(__driver-aes-aesni))"); + aesni_ecb_enc(ctx, blks, blks, nbytes); +} + +static void lrw_xts_decrypt_callback(void *ctx, u8 *blks, unsigned int nbytes) +{ + aesni_ecb_dec(ctx, blks, blks, nbytes); +} + +static int lrw_aesni_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + struct aesni_lrw_ctx *ctx = crypto_tfm_ctx(tfm); + int err; + + err = aes_set_key_common(tfm, ctx->raw_aes_ctx, key, + keylen - AES_BLOCK_SIZE); + if (err) + return err; + + return lrw_init_table(&ctx->lrw_table, key + keylen - AES_BLOCK_SIZE); +} + +static void lrw_aesni_exit_tfm(struct crypto_tfm *tfm) +{ + struct aesni_lrw_ctx *ctx = crypto_tfm_ctx(tfm); + + lrw_free_table(&ctx->lrw_table); +} + +static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct aesni_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[8]; + struct lrw_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .table_ctx = &ctx->lrw_table, + .crypt_ctx = aes_ctx(ctx->raw_aes_ctx), + .crypt_fn = lrw_xts_encrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + kernel_fpu_begin(); + ret = lrw_crypt(desc, dst, src, nbytes, &req); + kernel_fpu_end(); + + return ret; +} + +static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct aesni_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[8]; + struct lrw_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .table_ctx = &ctx->lrw_table, + .crypt_ctx = aes_ctx(ctx->raw_aes_ctx), + .crypt_fn = lrw_xts_decrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + kernel_fpu_begin(); + ret = lrw_crypt(desc, dst, src, nbytes, &req); + kernel_fpu_end(); + + return ret; +} + +static int xts_aesni_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + struct aesni_xts_ctx *ctx = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; + int err; + + /* key consists of keys of equal size concatenated, therefore + * the length must be even + */ + if (keylen % 2) { + *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + + /* first half of xts-key is for crypt */ + err = aes_set_key_common(tfm, ctx->raw_crypt_ctx, key, keylen / 2); + if (err) + return err; + + /* second half of xts-key is for tweak */ + return aes_set_key_common(tfm, ctx->raw_tweak_ctx, key + keylen / 2, + keylen / 2); +} + + +static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[8]; + struct xts_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .tweak_ctx = aes_ctx(ctx->raw_tweak_ctx), + .tweak_fn = XTS_TWEAK_CAST(aesni_enc), + .crypt_ctx = aes_ctx(ctx->raw_crypt_ctx), + .crypt_fn = lrw_xts_encrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + kernel_fpu_begin(); + ret = xts_crypt(desc, dst, src, nbytes, &req); + kernel_fpu_end(); + + return ret; +} + +static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[8]; + struct xts_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .tweak_ctx = aes_ctx(ctx->raw_tweak_ctx), + .tweak_fn = XTS_TWEAK_CAST(aesni_enc), + .crypt_ctx = aes_ctx(ctx->raw_crypt_ctx), + .crypt_fn = lrw_xts_decrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + kernel_fpu_begin(); + ret = xts_crypt(desc, dst, src, nbytes, &req); + kernel_fpu_end(); + + return ret; } -#endif #ifdef CONFIG_X86_64 static int rfc4106_init(struct crypto_tfm *tfm) @@ -1035,10 +1181,10 @@ static struct crypto_alg aesni_algs[] = { { }, #endif #endif -#ifdef HAS_LRW +#ifdef HAS_PCBC }, { - .cra_name = "lrw(aes)", - .cra_driver_name = "lrw-aes-aesni", + .cra_name = "pcbc(aes)", + .cra_driver_name = "pcbc-aes-aesni", .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, .cra_blocksize = AES_BLOCK_SIZE, @@ -1046,12 +1192,12 @@ static struct crypto_alg aesni_algs[] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_init = ablk_lrw_init, + .cra_init = ablk_pcbc_init, .cra_exit = ablk_exit, .cra_u = { .ablkcipher = { - .min_keysize = AES_MIN_KEY_SIZE + AES_BLOCK_SIZE, - .max_keysize = AES_MAX_KEY_SIZE + AES_BLOCK_SIZE, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, .ivsize = AES_BLOCK_SIZE, .setkey = ablk_set_key, .encrypt = ablk_encrypt, @@ -1059,10 +1205,50 @@ static struct crypto_alg aesni_algs[] = { { }, }, #endif -#ifdef HAS_PCBC }, { - .cra_name = "pcbc(aes)", - .cra_driver_name = "pcbc-aes-aesni", + .cra_name = "__lrw-aes-aesni", + .cra_driver_name = "__driver-lrw-aes-aesni", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct aesni_lrw_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_exit = lrw_aesni_exit_tfm, + .cra_u = { + .blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE + AES_BLOCK_SIZE, + .max_keysize = AES_MAX_KEY_SIZE + AES_BLOCK_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = lrw_aesni_setkey, + .encrypt = lrw_encrypt, + .decrypt = lrw_decrypt, + }, + }, +}, { + .cra_name = "__xts-aes-aesni", + .cra_driver_name = "__driver-xts-aes-aesni", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct aesni_xts_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = xts_aesni_setkey, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, + }, + }, +}, { + .cra_name = "lrw(aes)", + .cra_driver_name = "lrw-aes-aesni", .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, .cra_blocksize = AES_BLOCK_SIZE, @@ -1070,20 +1256,18 @@ static struct crypto_alg aesni_algs[] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_init = ablk_pcbc_init, + .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { .ablkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, + .min_keysize = AES_MIN_KEY_SIZE + AES_BLOCK_SIZE, + .max_keysize = AES_MAX_KEY_SIZE + AES_BLOCK_SIZE, .ivsize = AES_BLOCK_SIZE, .setkey = ablk_set_key, .encrypt = ablk_encrypt, .decrypt = ablk_decrypt, }, }, -#endif -#ifdef HAS_XTS }, { .cra_name = "xts(aes)", .cra_driver_name = "xts-aes-aesni", @@ -1094,7 +1278,7 @@ static struct crypto_alg aesni_algs[] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_init = ablk_xts_init, + .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { .ablkcipher = { @@ -1106,7 +1290,6 @@ static struct crypto_alg aesni_algs[] = { { .decrypt = ablk_decrypt, }, }, -#endif } }; @@ -1118,7 +1301,7 @@ MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id); static int __init aesni_init(void) { - int err, i; + int err; if (!x86_match_cpu(aesni_cpu_id)) return -ENODEV; @@ -1127,9 +1310,6 @@ static int __init aesni_init(void) if (err) return err; - for (i = 0; i < ARRAY_SIZE(aesni_algs); i++) - INIT_LIST_HEAD(&aesni_algs[i].cra_list); - return crypto_register_algs(aesni_algs, ARRAY_SIZE(aesni_algs)); } diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c index 7967474de8f7..50ec333b70e6 100644 --- a/arch/x86/crypto/blowfish_glue.c +++ b/arch/x86/crypto/blowfish_glue.c @@ -367,7 +367,6 @@ static struct crypto_alg bf_algs[4] = { { .cra_ctxsize = sizeof(struct bf_ctx), .cra_alignmask = 0, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(bf_algs[0].cra_list), .cra_u = { .cipher = { .cia_min_keysize = BF_MIN_KEY_SIZE, @@ -387,7 +386,6 @@ static struct crypto_alg bf_algs[4] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(bf_algs[1].cra_list), .cra_u = { .blkcipher = { .min_keysize = BF_MIN_KEY_SIZE, @@ -407,7 +405,6 @@ static struct crypto_alg bf_algs[4] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(bf_algs[2].cra_list), .cra_u = { .blkcipher = { .min_keysize = BF_MIN_KEY_SIZE, @@ -428,7 +425,6 @@ static struct crypto_alg bf_algs[4] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(bf_algs[3].cra_list), .cra_u = { .blkcipher = { .min_keysize = BF_MIN_KEY_SIZE, diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c index eeb2b3b743e9..42ffd2bbab5b 100644 --- a/arch/x86/crypto/camellia_glue.c +++ b/arch/x86/crypto/camellia_glue.c @@ -92,715 +92,715 @@ static void camellia_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) /* camellia sboxes */ const u64 camellia_sp10011110[256] = { - 0x7000007070707000, 0x8200008282828200, 0x2c00002c2c2c2c00, - 0xec0000ecececec00, 0xb30000b3b3b3b300, 0x2700002727272700, - 0xc00000c0c0c0c000, 0xe50000e5e5e5e500, 0xe40000e4e4e4e400, - 0x8500008585858500, 0x5700005757575700, 0x3500003535353500, - 0xea0000eaeaeaea00, 0x0c00000c0c0c0c00, 0xae0000aeaeaeae00, - 0x4100004141414100, 0x2300002323232300, 0xef0000efefefef00, - 0x6b00006b6b6b6b00, 0x9300009393939300, 0x4500004545454500, - 0x1900001919191900, 0xa50000a5a5a5a500, 0x2100002121212100, - 0xed0000edededed00, 0x0e00000e0e0e0e00, 0x4f00004f4f4f4f00, - 0x4e00004e4e4e4e00, 0x1d00001d1d1d1d00, 0x6500006565656500, - 0x9200009292929200, 0xbd0000bdbdbdbd00, 0x8600008686868600, - 0xb80000b8b8b8b800, 0xaf0000afafafaf00, 0x8f00008f8f8f8f00, - 0x7c00007c7c7c7c00, 0xeb0000ebebebeb00, 0x1f00001f1f1f1f00, - 0xce0000cececece00, 0x3e00003e3e3e3e00, 0x3000003030303000, - 0xdc0000dcdcdcdc00, 0x5f00005f5f5f5f00, 0x5e00005e5e5e5e00, - 0xc50000c5c5c5c500, 0x0b00000b0b0b0b00, 0x1a00001a1a1a1a00, - 0xa60000a6a6a6a600, 0xe10000e1e1e1e100, 0x3900003939393900, - 0xca0000cacacaca00, 0xd50000d5d5d5d500, 0x4700004747474700, - 0x5d00005d5d5d5d00, 0x3d00003d3d3d3d00, 0xd90000d9d9d9d900, - 0x0100000101010100, 0x5a00005a5a5a5a00, 0xd60000d6d6d6d600, - 0x5100005151515100, 0x5600005656565600, 0x6c00006c6c6c6c00, - 0x4d00004d4d4d4d00, 0x8b00008b8b8b8b00, 0x0d00000d0d0d0d00, - 0x9a00009a9a9a9a00, 0x6600006666666600, 0xfb0000fbfbfbfb00, - 0xcc0000cccccccc00, 0xb00000b0b0b0b000, 0x2d00002d2d2d2d00, - 0x7400007474747400, 0x1200001212121200, 0x2b00002b2b2b2b00, - 0x2000002020202000, 0xf00000f0f0f0f000, 0xb10000b1b1b1b100, - 0x8400008484848400, 0x9900009999999900, 0xdf0000dfdfdfdf00, - 0x4c00004c4c4c4c00, 0xcb0000cbcbcbcb00, 0xc20000c2c2c2c200, - 0x3400003434343400, 0x7e00007e7e7e7e00, 0x7600007676767600, - 0x0500000505050500, 0x6d00006d6d6d6d00, 0xb70000b7b7b7b700, - 0xa90000a9a9a9a900, 0x3100003131313100, 0xd10000d1d1d1d100, - 0x1700001717171700, 0x0400000404040400, 0xd70000d7d7d7d700, - 0x1400001414141400, 0x5800005858585800, 0x3a00003a3a3a3a00, - 0x6100006161616100, 0xde0000dededede00, 0x1b00001b1b1b1b00, - 0x1100001111111100, 0x1c00001c1c1c1c00, 0x3200003232323200, - 0x0f00000f0f0f0f00, 0x9c00009c9c9c9c00, 0x1600001616161600, - 0x5300005353535300, 0x1800001818181800, 0xf20000f2f2f2f200, - 0x2200002222222200, 0xfe0000fefefefe00, 0x4400004444444400, - 0xcf0000cfcfcfcf00, 0xb20000b2b2b2b200, 0xc30000c3c3c3c300, - 0xb50000b5b5b5b500, 0x7a00007a7a7a7a00, 0x9100009191919100, - 0x2400002424242400, 0x0800000808080800, 0xe80000e8e8e8e800, - 0xa80000a8a8a8a800, 0x6000006060606000, 0xfc0000fcfcfcfc00, - 0x6900006969696900, 0x5000005050505000, 0xaa0000aaaaaaaa00, - 0xd00000d0d0d0d000, 0xa00000a0a0a0a000, 0x7d00007d7d7d7d00, - 0xa10000a1a1a1a100, 0x8900008989898900, 0x6200006262626200, - 0x9700009797979700, 0x5400005454545400, 0x5b00005b5b5b5b00, - 0x1e00001e1e1e1e00, 0x9500009595959500, 0xe00000e0e0e0e000, - 0xff0000ffffffff00, 0x6400006464646400, 0xd20000d2d2d2d200, - 0x1000001010101000, 0xc40000c4c4c4c400, 0x0000000000000000, - 0x4800004848484800, 0xa30000a3a3a3a300, 0xf70000f7f7f7f700, - 0x7500007575757500, 0xdb0000dbdbdbdb00, 0x8a00008a8a8a8a00, - 0x0300000303030300, 0xe60000e6e6e6e600, 0xda0000dadadada00, - 0x0900000909090900, 0x3f00003f3f3f3f00, 0xdd0000dddddddd00, - 0x9400009494949400, 0x8700008787878700, 0x5c00005c5c5c5c00, - 0x8300008383838300, 0x0200000202020200, 0xcd0000cdcdcdcd00, - 0x4a00004a4a4a4a00, 0x9000009090909000, 0x3300003333333300, - 0x7300007373737300, 0x6700006767676700, 0xf60000f6f6f6f600, - 0xf30000f3f3f3f300, 0x9d00009d9d9d9d00, 0x7f00007f7f7f7f00, - 0xbf0000bfbfbfbf00, 0xe20000e2e2e2e200, 0x5200005252525200, - 0x9b00009b9b9b9b00, 0xd80000d8d8d8d800, 0x2600002626262600, - 0xc80000c8c8c8c800, 0x3700003737373700, 0xc60000c6c6c6c600, - 0x3b00003b3b3b3b00, 0x8100008181818100, 0x9600009696969600, - 0x6f00006f6f6f6f00, 0x4b00004b4b4b4b00, 0x1300001313131300, - 0xbe0000bebebebe00, 0x6300006363636300, 0x2e00002e2e2e2e00, - 0xe90000e9e9e9e900, 0x7900007979797900, 0xa70000a7a7a7a700, - 0x8c00008c8c8c8c00, 0x9f00009f9f9f9f00, 0x6e00006e6e6e6e00, - 0xbc0000bcbcbcbc00, 0x8e00008e8e8e8e00, 0x2900002929292900, - 0xf50000f5f5f5f500, 0xf90000f9f9f9f900, 0xb60000b6b6b6b600, - 0x2f00002f2f2f2f00, 0xfd0000fdfdfdfd00, 0xb40000b4b4b4b400, - 0x5900005959595900, 0x7800007878787800, 0x9800009898989800, - 0x0600000606060600, 0x6a00006a6a6a6a00, 0xe70000e7e7e7e700, - 0x4600004646464600, 0x7100007171717100, 0xba0000babababa00, - 0xd40000d4d4d4d400, 0x2500002525252500, 0xab0000abababab00, - 0x4200004242424200, 0x8800008888888800, 0xa20000a2a2a2a200, - 0x8d00008d8d8d8d00, 0xfa0000fafafafa00, 0x7200007272727200, - 0x0700000707070700, 0xb90000b9b9b9b900, 0x5500005555555500, - 0xf80000f8f8f8f800, 0xee0000eeeeeeee00, 0xac0000acacacac00, - 0x0a00000a0a0a0a00, 0x3600003636363600, 0x4900004949494900, - 0x2a00002a2a2a2a00, 0x6800006868686800, 0x3c00003c3c3c3c00, - 0x3800003838383800, 0xf10000f1f1f1f100, 0xa40000a4a4a4a400, - 0x4000004040404000, 0x2800002828282800, 0xd30000d3d3d3d300, - 0x7b00007b7b7b7b00, 0xbb0000bbbbbbbb00, 0xc90000c9c9c9c900, - 0x4300004343434300, 0xc10000c1c1c1c100, 0x1500001515151500, - 0xe30000e3e3e3e300, 0xad0000adadadad00, 0xf40000f4f4f4f400, - 0x7700007777777700, 0xc70000c7c7c7c700, 0x8000008080808000, - 0x9e00009e9e9e9e00, + 0x7000007070707000ULL, 0x8200008282828200ULL, 0x2c00002c2c2c2c00ULL, + 0xec0000ecececec00ULL, 0xb30000b3b3b3b300ULL, 0x2700002727272700ULL, + 0xc00000c0c0c0c000ULL, 0xe50000e5e5e5e500ULL, 0xe40000e4e4e4e400ULL, + 0x8500008585858500ULL, 0x5700005757575700ULL, 0x3500003535353500ULL, + 0xea0000eaeaeaea00ULL, 0x0c00000c0c0c0c00ULL, 0xae0000aeaeaeae00ULL, + 0x4100004141414100ULL, 0x2300002323232300ULL, 0xef0000efefefef00ULL, + 0x6b00006b6b6b6b00ULL, 0x9300009393939300ULL, 0x4500004545454500ULL, + 0x1900001919191900ULL, 0xa50000a5a5a5a500ULL, 0x2100002121212100ULL, + 0xed0000edededed00ULL, 0x0e00000e0e0e0e00ULL, 0x4f00004f4f4f4f00ULL, + 0x4e00004e4e4e4e00ULL, 0x1d00001d1d1d1d00ULL, 0x6500006565656500ULL, + 0x9200009292929200ULL, 0xbd0000bdbdbdbd00ULL, 0x8600008686868600ULL, + 0xb80000b8b8b8b800ULL, 0xaf0000afafafaf00ULL, 0x8f00008f8f8f8f00ULL, + 0x7c00007c7c7c7c00ULL, 0xeb0000ebebebeb00ULL, 0x1f00001f1f1f1f00ULL, + 0xce0000cececece00ULL, 0x3e00003e3e3e3e00ULL, 0x3000003030303000ULL, + 0xdc0000dcdcdcdc00ULL, 0x5f00005f5f5f5f00ULL, 0x5e00005e5e5e5e00ULL, + 0xc50000c5c5c5c500ULL, 0x0b00000b0b0b0b00ULL, 0x1a00001a1a1a1a00ULL, + 0xa60000a6a6a6a600ULL, 0xe10000e1e1e1e100ULL, 0x3900003939393900ULL, + 0xca0000cacacaca00ULL, 0xd50000d5d5d5d500ULL, 0x4700004747474700ULL, + 0x5d00005d5d5d5d00ULL, 0x3d00003d3d3d3d00ULL, 0xd90000d9d9d9d900ULL, + 0x0100000101010100ULL, 0x5a00005a5a5a5a00ULL, 0xd60000d6d6d6d600ULL, + 0x5100005151515100ULL, 0x5600005656565600ULL, 0x6c00006c6c6c6c00ULL, + 0x4d00004d4d4d4d00ULL, 0x8b00008b8b8b8b00ULL, 0x0d00000d0d0d0d00ULL, + 0x9a00009a9a9a9a00ULL, 0x6600006666666600ULL, 0xfb0000fbfbfbfb00ULL, + 0xcc0000cccccccc00ULL, 0xb00000b0b0b0b000ULL, 0x2d00002d2d2d2d00ULL, + 0x7400007474747400ULL, 0x1200001212121200ULL, 0x2b00002b2b2b2b00ULL, + 0x2000002020202000ULL, 0xf00000f0f0f0f000ULL, 0xb10000b1b1b1b100ULL, + 0x8400008484848400ULL, 0x9900009999999900ULL, 0xdf0000dfdfdfdf00ULL, + 0x4c00004c4c4c4c00ULL, 0xcb0000cbcbcbcb00ULL, 0xc20000c2c2c2c200ULL, + 0x3400003434343400ULL, 0x7e00007e7e7e7e00ULL, 0x7600007676767600ULL, + 0x0500000505050500ULL, 0x6d00006d6d6d6d00ULL, 0xb70000b7b7b7b700ULL, + 0xa90000a9a9a9a900ULL, 0x3100003131313100ULL, 0xd10000d1d1d1d100ULL, + 0x1700001717171700ULL, 0x0400000404040400ULL, 0xd70000d7d7d7d700ULL, + 0x1400001414141400ULL, 0x5800005858585800ULL, 0x3a00003a3a3a3a00ULL, + 0x6100006161616100ULL, 0xde0000dededede00ULL, 0x1b00001b1b1b1b00ULL, + 0x1100001111111100ULL, 0x1c00001c1c1c1c00ULL, 0x3200003232323200ULL, + 0x0f00000f0f0f0f00ULL, 0x9c00009c9c9c9c00ULL, 0x1600001616161600ULL, + 0x5300005353535300ULL, 0x1800001818181800ULL, 0xf20000f2f2f2f200ULL, + 0x2200002222222200ULL, 0xfe0000fefefefe00ULL, 0x4400004444444400ULL, + 0xcf0000cfcfcfcf00ULL, 0xb20000b2b2b2b200ULL, 0xc30000c3c3c3c300ULL, + 0xb50000b5b5b5b500ULL, 0x7a00007a7a7a7a00ULL, 0x9100009191919100ULL, + 0x2400002424242400ULL, 0x0800000808080800ULL, 0xe80000e8e8e8e800ULL, + 0xa80000a8a8a8a800ULL, 0x6000006060606000ULL, 0xfc0000fcfcfcfc00ULL, + 0x6900006969696900ULL, 0x5000005050505000ULL, 0xaa0000aaaaaaaa00ULL, + 0xd00000d0d0d0d000ULL, 0xa00000a0a0a0a000ULL, 0x7d00007d7d7d7d00ULL, + 0xa10000a1a1a1a100ULL, 0x8900008989898900ULL, 0x6200006262626200ULL, + 0x9700009797979700ULL, 0x5400005454545400ULL, 0x5b00005b5b5b5b00ULL, + 0x1e00001e1e1e1e00ULL, 0x9500009595959500ULL, 0xe00000e0e0e0e000ULL, + 0xff0000ffffffff00ULL, 0x6400006464646400ULL, 0xd20000d2d2d2d200ULL, + 0x1000001010101000ULL, 0xc40000c4c4c4c400ULL, 0x0000000000000000ULL, + 0x4800004848484800ULL, 0xa30000a3a3a3a300ULL, 0xf70000f7f7f7f700ULL, + 0x7500007575757500ULL, 0xdb0000dbdbdbdb00ULL, 0x8a00008a8a8a8a00ULL, + 0x0300000303030300ULL, 0xe60000e6e6e6e600ULL, 0xda0000dadadada00ULL, + 0x0900000909090900ULL, 0x3f00003f3f3f3f00ULL, 0xdd0000dddddddd00ULL, + 0x9400009494949400ULL, 0x8700008787878700ULL, 0x5c00005c5c5c5c00ULL, + 0x8300008383838300ULL, 0x0200000202020200ULL, 0xcd0000cdcdcdcd00ULL, + 0x4a00004a4a4a4a00ULL, 0x9000009090909000ULL, 0x3300003333333300ULL, + 0x7300007373737300ULL, 0x6700006767676700ULL, 0xf60000f6f6f6f600ULL, + 0xf30000f3f3f3f300ULL, 0x9d00009d9d9d9d00ULL, 0x7f00007f7f7f7f00ULL, + 0xbf0000bfbfbfbf00ULL, 0xe20000e2e2e2e200ULL, 0x5200005252525200ULL, + 0x9b00009b9b9b9b00ULL, 0xd80000d8d8d8d800ULL, 0x2600002626262600ULL, + 0xc80000c8c8c8c800ULL, 0x3700003737373700ULL, 0xc60000c6c6c6c600ULL, + 0x3b00003b3b3b3b00ULL, 0x8100008181818100ULL, 0x9600009696969600ULL, + 0x6f00006f6f6f6f00ULL, 0x4b00004b4b4b4b00ULL, 0x1300001313131300ULL, + 0xbe0000bebebebe00ULL, 0x6300006363636300ULL, 0x2e00002e2e2e2e00ULL, + 0xe90000e9e9e9e900ULL, 0x7900007979797900ULL, 0xa70000a7a7a7a700ULL, + 0x8c00008c8c8c8c00ULL, 0x9f00009f9f9f9f00ULL, 0x6e00006e6e6e6e00ULL, + 0xbc0000bcbcbcbc00ULL, 0x8e00008e8e8e8e00ULL, 0x2900002929292900ULL, + 0xf50000f5f5f5f500ULL, 0xf90000f9f9f9f900ULL, 0xb60000b6b6b6b600ULL, + 0x2f00002f2f2f2f00ULL, 0xfd0000fdfdfdfd00ULL, 0xb40000b4b4b4b400ULL, + 0x5900005959595900ULL, 0x7800007878787800ULL, 0x9800009898989800ULL, + 0x0600000606060600ULL, 0x6a00006a6a6a6a00ULL, 0xe70000e7e7e7e700ULL, + 0x4600004646464600ULL, 0x7100007171717100ULL, 0xba0000babababa00ULL, + 0xd40000d4d4d4d400ULL, 0x2500002525252500ULL, 0xab0000abababab00ULL, + 0x4200004242424200ULL, 0x8800008888888800ULL, 0xa20000a2a2a2a200ULL, + 0x8d00008d8d8d8d00ULL, 0xfa0000fafafafa00ULL, 0x7200007272727200ULL, + 0x0700000707070700ULL, 0xb90000b9b9b9b900ULL, 0x5500005555555500ULL, + 0xf80000f8f8f8f800ULL, 0xee0000eeeeeeee00ULL, 0xac0000acacacac00ULL, + 0x0a00000a0a0a0a00ULL, 0x3600003636363600ULL, 0x4900004949494900ULL, + 0x2a00002a2a2a2a00ULL, 0x6800006868686800ULL, 0x3c00003c3c3c3c00ULL, + 0x3800003838383800ULL, 0xf10000f1f1f1f100ULL, 0xa40000a4a4a4a400ULL, + 0x4000004040404000ULL, 0x2800002828282800ULL, 0xd30000d3d3d3d300ULL, + 0x7b00007b7b7b7b00ULL, 0xbb0000bbbbbbbb00ULL, 0xc90000c9c9c9c900ULL, + 0x4300004343434300ULL, 0xc10000c1c1c1c100ULL, 0x1500001515151500ULL, + 0xe30000e3e3e3e300ULL, 0xad0000adadadad00ULL, 0xf40000f4f4f4f400ULL, + 0x7700007777777700ULL, 0xc70000c7c7c7c700ULL, 0x8000008080808000ULL, + 0x9e00009e9e9e9e00ULL, }; const u64 camellia_sp22000222[256] = { - 0xe0e0000000e0e0e0, 0x0505000000050505, 0x5858000000585858, - 0xd9d9000000d9d9d9, 0x6767000000676767, 0x4e4e0000004e4e4e, - 0x8181000000818181, 0xcbcb000000cbcbcb, 0xc9c9000000c9c9c9, - 0x0b0b0000000b0b0b, 0xaeae000000aeaeae, 0x6a6a0000006a6a6a, - 0xd5d5000000d5d5d5, 0x1818000000181818, 0x5d5d0000005d5d5d, - 0x8282000000828282, 0x4646000000464646, 0xdfdf000000dfdfdf, - 0xd6d6000000d6d6d6, 0x2727000000272727, 0x8a8a0000008a8a8a, - 0x3232000000323232, 0x4b4b0000004b4b4b, 0x4242000000424242, - 0xdbdb000000dbdbdb, 0x1c1c0000001c1c1c, 0x9e9e0000009e9e9e, - 0x9c9c0000009c9c9c, 0x3a3a0000003a3a3a, 0xcaca000000cacaca, - 0x2525000000252525, 0x7b7b0000007b7b7b, 0x0d0d0000000d0d0d, - 0x7171000000717171, 0x5f5f0000005f5f5f, 0x1f1f0000001f1f1f, - 0xf8f8000000f8f8f8, 0xd7d7000000d7d7d7, 0x3e3e0000003e3e3e, - 0x9d9d0000009d9d9d, 0x7c7c0000007c7c7c, 0x6060000000606060, - 0xb9b9000000b9b9b9, 0xbebe000000bebebe, 0xbcbc000000bcbcbc, - 0x8b8b0000008b8b8b, 0x1616000000161616, 0x3434000000343434, - 0x4d4d0000004d4d4d, 0xc3c3000000c3c3c3, 0x7272000000727272, - 0x9595000000959595, 0xabab000000ababab, 0x8e8e0000008e8e8e, - 0xbaba000000bababa, 0x7a7a0000007a7a7a, 0xb3b3000000b3b3b3, - 0x0202000000020202, 0xb4b4000000b4b4b4, 0xadad000000adadad, - 0xa2a2000000a2a2a2, 0xacac000000acacac, 0xd8d8000000d8d8d8, - 0x9a9a0000009a9a9a, 0x1717000000171717, 0x1a1a0000001a1a1a, - 0x3535000000353535, 0xcccc000000cccccc, 0xf7f7000000f7f7f7, - 0x9999000000999999, 0x6161000000616161, 0x5a5a0000005a5a5a, - 0xe8e8000000e8e8e8, 0x2424000000242424, 0x5656000000565656, - 0x4040000000404040, 0xe1e1000000e1e1e1, 0x6363000000636363, - 0x0909000000090909, 0x3333000000333333, 0xbfbf000000bfbfbf, - 0x9898000000989898, 0x9797000000979797, 0x8585000000858585, - 0x6868000000686868, 0xfcfc000000fcfcfc, 0xecec000000ececec, - 0x0a0a0000000a0a0a, 0xdada000000dadada, 0x6f6f0000006f6f6f, - 0x5353000000535353, 0x6262000000626262, 0xa3a3000000a3a3a3, - 0x2e2e0000002e2e2e, 0x0808000000080808, 0xafaf000000afafaf, - 0x2828000000282828, 0xb0b0000000b0b0b0, 0x7474000000747474, - 0xc2c2000000c2c2c2, 0xbdbd000000bdbdbd, 0x3636000000363636, - 0x2222000000222222, 0x3838000000383838, 0x6464000000646464, - 0x1e1e0000001e1e1e, 0x3939000000393939, 0x2c2c0000002c2c2c, - 0xa6a6000000a6a6a6, 0x3030000000303030, 0xe5e5000000e5e5e5, - 0x4444000000444444, 0xfdfd000000fdfdfd, 0x8888000000888888, - 0x9f9f0000009f9f9f, 0x6565000000656565, 0x8787000000878787, - 0x6b6b0000006b6b6b, 0xf4f4000000f4f4f4, 0x2323000000232323, - 0x4848000000484848, 0x1010000000101010, 0xd1d1000000d1d1d1, - 0x5151000000515151, 0xc0c0000000c0c0c0, 0xf9f9000000f9f9f9, - 0xd2d2000000d2d2d2, 0xa0a0000000a0a0a0, 0x5555000000555555, - 0xa1a1000000a1a1a1, 0x4141000000414141, 0xfafa000000fafafa, - 0x4343000000434343, 0x1313000000131313, 0xc4c4000000c4c4c4, - 0x2f2f0000002f2f2f, 0xa8a8000000a8a8a8, 0xb6b6000000b6b6b6, - 0x3c3c0000003c3c3c, 0x2b2b0000002b2b2b, 0xc1c1000000c1c1c1, - 0xffff000000ffffff, 0xc8c8000000c8c8c8, 0xa5a5000000a5a5a5, - 0x2020000000202020, 0x8989000000898989, 0x0000000000000000, - 0x9090000000909090, 0x4747000000474747, 0xefef000000efefef, - 0xeaea000000eaeaea, 0xb7b7000000b7b7b7, 0x1515000000151515, - 0x0606000000060606, 0xcdcd000000cdcdcd, 0xb5b5000000b5b5b5, - 0x1212000000121212, 0x7e7e0000007e7e7e, 0xbbbb000000bbbbbb, - 0x2929000000292929, 0x0f0f0000000f0f0f, 0xb8b8000000b8b8b8, - 0x0707000000070707, 0x0404000000040404, 0x9b9b0000009b9b9b, - 0x9494000000949494, 0x2121000000212121, 0x6666000000666666, - 0xe6e6000000e6e6e6, 0xcece000000cecece, 0xeded000000ededed, - 0xe7e7000000e7e7e7, 0x3b3b0000003b3b3b, 0xfefe000000fefefe, - 0x7f7f0000007f7f7f, 0xc5c5000000c5c5c5, 0xa4a4000000a4a4a4, - 0x3737000000373737, 0xb1b1000000b1b1b1, 0x4c4c0000004c4c4c, - 0x9191000000919191, 0x6e6e0000006e6e6e, 0x8d8d0000008d8d8d, - 0x7676000000767676, 0x0303000000030303, 0x2d2d0000002d2d2d, - 0xdede000000dedede, 0x9696000000969696, 0x2626000000262626, - 0x7d7d0000007d7d7d, 0xc6c6000000c6c6c6, 0x5c5c0000005c5c5c, - 0xd3d3000000d3d3d3, 0xf2f2000000f2f2f2, 0x4f4f0000004f4f4f, - 0x1919000000191919, 0x3f3f0000003f3f3f, 0xdcdc000000dcdcdc, - 0x7979000000797979, 0x1d1d0000001d1d1d, 0x5252000000525252, - 0xebeb000000ebebeb, 0xf3f3000000f3f3f3, 0x6d6d0000006d6d6d, - 0x5e5e0000005e5e5e, 0xfbfb000000fbfbfb, 0x6969000000696969, - 0xb2b2000000b2b2b2, 0xf0f0000000f0f0f0, 0x3131000000313131, - 0x0c0c0000000c0c0c, 0xd4d4000000d4d4d4, 0xcfcf000000cfcfcf, - 0x8c8c0000008c8c8c, 0xe2e2000000e2e2e2, 0x7575000000757575, - 0xa9a9000000a9a9a9, 0x4a4a0000004a4a4a, 0x5757000000575757, - 0x8484000000848484, 0x1111000000111111, 0x4545000000454545, - 0x1b1b0000001b1b1b, 0xf5f5000000f5f5f5, 0xe4e4000000e4e4e4, - 0x0e0e0000000e0e0e, 0x7373000000737373, 0xaaaa000000aaaaaa, - 0xf1f1000000f1f1f1, 0xdddd000000dddddd, 0x5959000000595959, - 0x1414000000141414, 0x6c6c0000006c6c6c, 0x9292000000929292, - 0x5454000000545454, 0xd0d0000000d0d0d0, 0x7878000000787878, - 0x7070000000707070, 0xe3e3000000e3e3e3, 0x4949000000494949, - 0x8080000000808080, 0x5050000000505050, 0xa7a7000000a7a7a7, - 0xf6f6000000f6f6f6, 0x7777000000777777, 0x9393000000939393, - 0x8686000000868686, 0x8383000000838383, 0x2a2a0000002a2a2a, - 0xc7c7000000c7c7c7, 0x5b5b0000005b5b5b, 0xe9e9000000e9e9e9, - 0xeeee000000eeeeee, 0x8f8f0000008f8f8f, 0x0101000000010101, - 0x3d3d0000003d3d3d, + 0xe0e0000000e0e0e0ULL, 0x0505000000050505ULL, 0x5858000000585858ULL, + 0xd9d9000000d9d9d9ULL, 0x6767000000676767ULL, 0x4e4e0000004e4e4eULL, + 0x8181000000818181ULL, 0xcbcb000000cbcbcbULL, 0xc9c9000000c9c9c9ULL, + 0x0b0b0000000b0b0bULL, 0xaeae000000aeaeaeULL, 0x6a6a0000006a6a6aULL, + 0xd5d5000000d5d5d5ULL, 0x1818000000181818ULL, 0x5d5d0000005d5d5dULL, + 0x8282000000828282ULL, 0x4646000000464646ULL, 0xdfdf000000dfdfdfULL, + 0xd6d6000000d6d6d6ULL, 0x2727000000272727ULL, 0x8a8a0000008a8a8aULL, + 0x3232000000323232ULL, 0x4b4b0000004b4b4bULL, 0x4242000000424242ULL, + 0xdbdb000000dbdbdbULL, 0x1c1c0000001c1c1cULL, 0x9e9e0000009e9e9eULL, + 0x9c9c0000009c9c9cULL, 0x3a3a0000003a3a3aULL, 0xcaca000000cacacaULL, + 0x2525000000252525ULL, 0x7b7b0000007b7b7bULL, 0x0d0d0000000d0d0dULL, + 0x7171000000717171ULL, 0x5f5f0000005f5f5fULL, 0x1f1f0000001f1f1fULL, + 0xf8f8000000f8f8f8ULL, 0xd7d7000000d7d7d7ULL, 0x3e3e0000003e3e3eULL, + 0x9d9d0000009d9d9dULL, 0x7c7c0000007c7c7cULL, 0x6060000000606060ULL, + 0xb9b9000000b9b9b9ULL, 0xbebe000000bebebeULL, 0xbcbc000000bcbcbcULL, + 0x8b8b0000008b8b8bULL, 0x1616000000161616ULL, 0x3434000000343434ULL, + 0x4d4d0000004d4d4dULL, 0xc3c3000000c3c3c3ULL, 0x7272000000727272ULL, + 0x9595000000959595ULL, 0xabab000000abababULL, 0x8e8e0000008e8e8eULL, + 0xbaba000000bababaULL, 0x7a7a0000007a7a7aULL, 0xb3b3000000b3b3b3ULL, + 0x0202000000020202ULL, 0xb4b4000000b4b4b4ULL, 0xadad000000adadadULL, + 0xa2a2000000a2a2a2ULL, 0xacac000000acacacULL, 0xd8d8000000d8d8d8ULL, + 0x9a9a0000009a9a9aULL, 0x1717000000171717ULL, 0x1a1a0000001a1a1aULL, + 0x3535000000353535ULL, 0xcccc000000ccccccULL, 0xf7f7000000f7f7f7ULL, + 0x9999000000999999ULL, 0x6161000000616161ULL, 0x5a5a0000005a5a5aULL, + 0xe8e8000000e8e8e8ULL, 0x2424000000242424ULL, 0x5656000000565656ULL, + 0x4040000000404040ULL, 0xe1e1000000e1e1e1ULL, 0x6363000000636363ULL, + 0x0909000000090909ULL, 0x3333000000333333ULL, 0xbfbf000000bfbfbfULL, + 0x9898000000989898ULL, 0x9797000000979797ULL, 0x8585000000858585ULL, + 0x6868000000686868ULL, 0xfcfc000000fcfcfcULL, 0xecec000000ecececULL, + 0x0a0a0000000a0a0aULL, 0xdada000000dadadaULL, 0x6f6f0000006f6f6fULL, + 0x5353000000535353ULL, 0x6262000000626262ULL, 0xa3a3000000a3a3a3ULL, + 0x2e2e0000002e2e2eULL, 0x0808000000080808ULL, 0xafaf000000afafafULL, + 0x2828000000282828ULL, 0xb0b0000000b0b0b0ULL, 0x7474000000747474ULL, + 0xc2c2000000c2c2c2ULL, 0xbdbd000000bdbdbdULL, 0x3636000000363636ULL, + 0x2222000000222222ULL, 0x3838000000383838ULL, 0x6464000000646464ULL, + 0x1e1e0000001e1e1eULL, 0x3939000000393939ULL, 0x2c2c0000002c2c2cULL, + 0xa6a6000000a6a6a6ULL, 0x3030000000303030ULL, 0xe5e5000000e5e5e5ULL, + 0x4444000000444444ULL, 0xfdfd000000fdfdfdULL, 0x8888000000888888ULL, + 0x9f9f0000009f9f9fULL, 0x6565000000656565ULL, 0x8787000000878787ULL, + 0x6b6b0000006b6b6bULL, 0xf4f4000000f4f4f4ULL, 0x2323000000232323ULL, + 0x4848000000484848ULL, 0x1010000000101010ULL, 0xd1d1000000d1d1d1ULL, + 0x5151000000515151ULL, 0xc0c0000000c0c0c0ULL, 0xf9f9000000f9f9f9ULL, + 0xd2d2000000d2d2d2ULL, 0xa0a0000000a0a0a0ULL, 0x5555000000555555ULL, + 0xa1a1000000a1a1a1ULL, 0x4141000000414141ULL, 0xfafa000000fafafaULL, + 0x4343000000434343ULL, 0x1313000000131313ULL, 0xc4c4000000c4c4c4ULL, + 0x2f2f0000002f2f2fULL, 0xa8a8000000a8a8a8ULL, 0xb6b6000000b6b6b6ULL, + 0x3c3c0000003c3c3cULL, 0x2b2b0000002b2b2bULL, 0xc1c1000000c1c1c1ULL, + 0xffff000000ffffffULL, 0xc8c8000000c8c8c8ULL, 0xa5a5000000a5a5a5ULL, + 0x2020000000202020ULL, 0x8989000000898989ULL, 0x0000000000000000ULL, + 0x9090000000909090ULL, 0x4747000000474747ULL, 0xefef000000efefefULL, + 0xeaea000000eaeaeaULL, 0xb7b7000000b7b7b7ULL, 0x1515000000151515ULL, + 0x0606000000060606ULL, 0xcdcd000000cdcdcdULL, 0xb5b5000000b5b5b5ULL, + 0x1212000000121212ULL, 0x7e7e0000007e7e7eULL, 0xbbbb000000bbbbbbULL, + 0x2929000000292929ULL, 0x0f0f0000000f0f0fULL, 0xb8b8000000b8b8b8ULL, + 0x0707000000070707ULL, 0x0404000000040404ULL, 0x9b9b0000009b9b9bULL, + 0x9494000000949494ULL, 0x2121000000212121ULL, 0x6666000000666666ULL, + 0xe6e6000000e6e6e6ULL, 0xcece000000cececeULL, 0xeded000000edededULL, + 0xe7e7000000e7e7e7ULL, 0x3b3b0000003b3b3bULL, 0xfefe000000fefefeULL, + 0x7f7f0000007f7f7fULL, 0xc5c5000000c5c5c5ULL, 0xa4a4000000a4a4a4ULL, + 0x3737000000373737ULL, 0xb1b1000000b1b1b1ULL, 0x4c4c0000004c4c4cULL, + 0x9191000000919191ULL, 0x6e6e0000006e6e6eULL, 0x8d8d0000008d8d8dULL, + 0x7676000000767676ULL, 0x0303000000030303ULL, 0x2d2d0000002d2d2dULL, + 0xdede000000dededeULL, 0x9696000000969696ULL, 0x2626000000262626ULL, + 0x7d7d0000007d7d7dULL, 0xc6c6000000c6c6c6ULL, 0x5c5c0000005c5c5cULL, + 0xd3d3000000d3d3d3ULL, 0xf2f2000000f2f2f2ULL, 0x4f4f0000004f4f4fULL, + 0x1919000000191919ULL, 0x3f3f0000003f3f3fULL, 0xdcdc000000dcdcdcULL, + 0x7979000000797979ULL, 0x1d1d0000001d1d1dULL, 0x5252000000525252ULL, + 0xebeb000000ebebebULL, 0xf3f3000000f3f3f3ULL, 0x6d6d0000006d6d6dULL, + 0x5e5e0000005e5e5eULL, 0xfbfb000000fbfbfbULL, 0x6969000000696969ULL, + 0xb2b2000000b2b2b2ULL, 0xf0f0000000f0f0f0ULL, 0x3131000000313131ULL, + 0x0c0c0000000c0c0cULL, 0xd4d4000000d4d4d4ULL, 0xcfcf000000cfcfcfULL, + 0x8c8c0000008c8c8cULL, 0xe2e2000000e2e2e2ULL, 0x7575000000757575ULL, + 0xa9a9000000a9a9a9ULL, 0x4a4a0000004a4a4aULL, 0x5757000000575757ULL, + 0x8484000000848484ULL, 0x1111000000111111ULL, 0x4545000000454545ULL, + 0x1b1b0000001b1b1bULL, 0xf5f5000000f5f5f5ULL, 0xe4e4000000e4e4e4ULL, + 0x0e0e0000000e0e0eULL, 0x7373000000737373ULL, 0xaaaa000000aaaaaaULL, + 0xf1f1000000f1f1f1ULL, 0xdddd000000ddddddULL, 0x5959000000595959ULL, + 0x1414000000141414ULL, 0x6c6c0000006c6c6cULL, 0x9292000000929292ULL, + 0x5454000000545454ULL, 0xd0d0000000d0d0d0ULL, 0x7878000000787878ULL, + 0x7070000000707070ULL, 0xe3e3000000e3e3e3ULL, 0x4949000000494949ULL, + 0x8080000000808080ULL, 0x5050000000505050ULL, 0xa7a7000000a7a7a7ULL, + 0xf6f6000000f6f6f6ULL, 0x7777000000777777ULL, 0x9393000000939393ULL, + 0x8686000000868686ULL, 0x8383000000838383ULL, 0x2a2a0000002a2a2aULL, + 0xc7c7000000c7c7c7ULL, 0x5b5b0000005b5b5bULL, 0xe9e9000000e9e9e9ULL, + 0xeeee000000eeeeeeULL, 0x8f8f0000008f8f8fULL, 0x0101000000010101ULL, + 0x3d3d0000003d3d3dULL, }; const u64 camellia_sp03303033[256] = { - 0x0038380038003838, 0x0041410041004141, 0x0016160016001616, - 0x0076760076007676, 0x00d9d900d900d9d9, 0x0093930093009393, - 0x0060600060006060, 0x00f2f200f200f2f2, 0x0072720072007272, - 0x00c2c200c200c2c2, 0x00abab00ab00abab, 0x009a9a009a009a9a, - 0x0075750075007575, 0x0006060006000606, 0x0057570057005757, - 0x00a0a000a000a0a0, 0x0091910091009191, 0x00f7f700f700f7f7, - 0x00b5b500b500b5b5, 0x00c9c900c900c9c9, 0x00a2a200a200a2a2, - 0x008c8c008c008c8c, 0x00d2d200d200d2d2, 0x0090900090009090, - 0x00f6f600f600f6f6, 0x0007070007000707, 0x00a7a700a700a7a7, - 0x0027270027002727, 0x008e8e008e008e8e, 0x00b2b200b200b2b2, - 0x0049490049004949, 0x00dede00de00dede, 0x0043430043004343, - 0x005c5c005c005c5c, 0x00d7d700d700d7d7, 0x00c7c700c700c7c7, - 0x003e3e003e003e3e, 0x00f5f500f500f5f5, 0x008f8f008f008f8f, - 0x0067670067006767, 0x001f1f001f001f1f, 0x0018180018001818, - 0x006e6e006e006e6e, 0x00afaf00af00afaf, 0x002f2f002f002f2f, - 0x00e2e200e200e2e2, 0x0085850085008585, 0x000d0d000d000d0d, - 0x0053530053005353, 0x00f0f000f000f0f0, 0x009c9c009c009c9c, - 0x0065650065006565, 0x00eaea00ea00eaea, 0x00a3a300a300a3a3, - 0x00aeae00ae00aeae, 0x009e9e009e009e9e, 0x00ecec00ec00ecec, - 0x0080800080008080, 0x002d2d002d002d2d, 0x006b6b006b006b6b, - 0x00a8a800a800a8a8, 0x002b2b002b002b2b, 0x0036360036003636, - 0x00a6a600a600a6a6, 0x00c5c500c500c5c5, 0x0086860086008686, - 0x004d4d004d004d4d, 0x0033330033003333, 0x00fdfd00fd00fdfd, - 0x0066660066006666, 0x0058580058005858, 0x0096960096009696, - 0x003a3a003a003a3a, 0x0009090009000909, 0x0095950095009595, - 0x0010100010001010, 0x0078780078007878, 0x00d8d800d800d8d8, - 0x0042420042004242, 0x00cccc00cc00cccc, 0x00efef00ef00efef, - 0x0026260026002626, 0x00e5e500e500e5e5, 0x0061610061006161, - 0x001a1a001a001a1a, 0x003f3f003f003f3f, 0x003b3b003b003b3b, - 0x0082820082008282, 0x00b6b600b600b6b6, 0x00dbdb00db00dbdb, - 0x00d4d400d400d4d4, 0x0098980098009898, 0x00e8e800e800e8e8, - 0x008b8b008b008b8b, 0x0002020002000202, 0x00ebeb00eb00ebeb, - 0x000a0a000a000a0a, 0x002c2c002c002c2c, 0x001d1d001d001d1d, - 0x00b0b000b000b0b0, 0x006f6f006f006f6f, 0x008d8d008d008d8d, - 0x0088880088008888, 0x000e0e000e000e0e, 0x0019190019001919, - 0x0087870087008787, 0x004e4e004e004e4e, 0x000b0b000b000b0b, - 0x00a9a900a900a9a9, 0x000c0c000c000c0c, 0x0079790079007979, - 0x0011110011001111, 0x007f7f007f007f7f, 0x0022220022002222, - 0x00e7e700e700e7e7, 0x0059590059005959, 0x00e1e100e100e1e1, - 0x00dada00da00dada, 0x003d3d003d003d3d, 0x00c8c800c800c8c8, - 0x0012120012001212, 0x0004040004000404, 0x0074740074007474, - 0x0054540054005454, 0x0030300030003030, 0x007e7e007e007e7e, - 0x00b4b400b400b4b4, 0x0028280028002828, 0x0055550055005555, - 0x0068680068006868, 0x0050500050005050, 0x00bebe00be00bebe, - 0x00d0d000d000d0d0, 0x00c4c400c400c4c4, 0x0031310031003131, - 0x00cbcb00cb00cbcb, 0x002a2a002a002a2a, 0x00adad00ad00adad, - 0x000f0f000f000f0f, 0x00caca00ca00caca, 0x0070700070007070, - 0x00ffff00ff00ffff, 0x0032320032003232, 0x0069690069006969, - 0x0008080008000808, 0x0062620062006262, 0x0000000000000000, - 0x0024240024002424, 0x00d1d100d100d1d1, 0x00fbfb00fb00fbfb, - 0x00baba00ba00baba, 0x00eded00ed00eded, 0x0045450045004545, - 0x0081810081008181, 0x0073730073007373, 0x006d6d006d006d6d, - 0x0084840084008484, 0x009f9f009f009f9f, 0x00eeee00ee00eeee, - 0x004a4a004a004a4a, 0x00c3c300c300c3c3, 0x002e2e002e002e2e, - 0x00c1c100c100c1c1, 0x0001010001000101, 0x00e6e600e600e6e6, - 0x0025250025002525, 0x0048480048004848, 0x0099990099009999, - 0x00b9b900b900b9b9, 0x00b3b300b300b3b3, 0x007b7b007b007b7b, - 0x00f9f900f900f9f9, 0x00cece00ce00cece, 0x00bfbf00bf00bfbf, - 0x00dfdf00df00dfdf, 0x0071710071007171, 0x0029290029002929, - 0x00cdcd00cd00cdcd, 0x006c6c006c006c6c, 0x0013130013001313, - 0x0064640064006464, 0x009b9b009b009b9b, 0x0063630063006363, - 0x009d9d009d009d9d, 0x00c0c000c000c0c0, 0x004b4b004b004b4b, - 0x00b7b700b700b7b7, 0x00a5a500a500a5a5, 0x0089890089008989, - 0x005f5f005f005f5f, 0x00b1b100b100b1b1, 0x0017170017001717, - 0x00f4f400f400f4f4, 0x00bcbc00bc00bcbc, 0x00d3d300d300d3d3, - 0x0046460046004646, 0x00cfcf00cf00cfcf, 0x0037370037003737, - 0x005e5e005e005e5e, 0x0047470047004747, 0x0094940094009494, - 0x00fafa00fa00fafa, 0x00fcfc00fc00fcfc, 0x005b5b005b005b5b, - 0x0097970097009797, 0x00fefe00fe00fefe, 0x005a5a005a005a5a, - 0x00acac00ac00acac, 0x003c3c003c003c3c, 0x004c4c004c004c4c, - 0x0003030003000303, 0x0035350035003535, 0x00f3f300f300f3f3, - 0x0023230023002323, 0x00b8b800b800b8b8, 0x005d5d005d005d5d, - 0x006a6a006a006a6a, 0x0092920092009292, 0x00d5d500d500d5d5, - 0x0021210021002121, 0x0044440044004444, 0x0051510051005151, - 0x00c6c600c600c6c6, 0x007d7d007d007d7d, 0x0039390039003939, - 0x0083830083008383, 0x00dcdc00dc00dcdc, 0x00aaaa00aa00aaaa, - 0x007c7c007c007c7c, 0x0077770077007777, 0x0056560056005656, - 0x0005050005000505, 0x001b1b001b001b1b, 0x00a4a400a400a4a4, - 0x0015150015001515, 0x0034340034003434, 0x001e1e001e001e1e, - 0x001c1c001c001c1c, 0x00f8f800f800f8f8, 0x0052520052005252, - 0x0020200020002020, 0x0014140014001414, 0x00e9e900e900e9e9, - 0x00bdbd00bd00bdbd, 0x00dddd00dd00dddd, 0x00e4e400e400e4e4, - 0x00a1a100a100a1a1, 0x00e0e000e000e0e0, 0x008a8a008a008a8a, - 0x00f1f100f100f1f1, 0x00d6d600d600d6d6, 0x007a7a007a007a7a, - 0x00bbbb00bb00bbbb, 0x00e3e300e300e3e3, 0x0040400040004040, - 0x004f4f004f004f4f, + 0x0038380038003838ULL, 0x0041410041004141ULL, 0x0016160016001616ULL, + 0x0076760076007676ULL, 0x00d9d900d900d9d9ULL, 0x0093930093009393ULL, + 0x0060600060006060ULL, 0x00f2f200f200f2f2ULL, 0x0072720072007272ULL, + 0x00c2c200c200c2c2ULL, 0x00abab00ab00ababULL, 0x009a9a009a009a9aULL, + 0x0075750075007575ULL, 0x0006060006000606ULL, 0x0057570057005757ULL, + 0x00a0a000a000a0a0ULL, 0x0091910091009191ULL, 0x00f7f700f700f7f7ULL, + 0x00b5b500b500b5b5ULL, 0x00c9c900c900c9c9ULL, 0x00a2a200a200a2a2ULL, + 0x008c8c008c008c8cULL, 0x00d2d200d200d2d2ULL, 0x0090900090009090ULL, + 0x00f6f600f600f6f6ULL, 0x0007070007000707ULL, 0x00a7a700a700a7a7ULL, + 0x0027270027002727ULL, 0x008e8e008e008e8eULL, 0x00b2b200b200b2b2ULL, + 0x0049490049004949ULL, 0x00dede00de00dedeULL, 0x0043430043004343ULL, + 0x005c5c005c005c5cULL, 0x00d7d700d700d7d7ULL, 0x00c7c700c700c7c7ULL, + 0x003e3e003e003e3eULL, 0x00f5f500f500f5f5ULL, 0x008f8f008f008f8fULL, + 0x0067670067006767ULL, 0x001f1f001f001f1fULL, 0x0018180018001818ULL, + 0x006e6e006e006e6eULL, 0x00afaf00af00afafULL, 0x002f2f002f002f2fULL, + 0x00e2e200e200e2e2ULL, 0x0085850085008585ULL, 0x000d0d000d000d0dULL, + 0x0053530053005353ULL, 0x00f0f000f000f0f0ULL, 0x009c9c009c009c9cULL, + 0x0065650065006565ULL, 0x00eaea00ea00eaeaULL, 0x00a3a300a300a3a3ULL, + 0x00aeae00ae00aeaeULL, 0x009e9e009e009e9eULL, 0x00ecec00ec00ececULL, + 0x0080800080008080ULL, 0x002d2d002d002d2dULL, 0x006b6b006b006b6bULL, + 0x00a8a800a800a8a8ULL, 0x002b2b002b002b2bULL, 0x0036360036003636ULL, + 0x00a6a600a600a6a6ULL, 0x00c5c500c500c5c5ULL, 0x0086860086008686ULL, + 0x004d4d004d004d4dULL, 0x0033330033003333ULL, 0x00fdfd00fd00fdfdULL, + 0x0066660066006666ULL, 0x0058580058005858ULL, 0x0096960096009696ULL, + 0x003a3a003a003a3aULL, 0x0009090009000909ULL, 0x0095950095009595ULL, + 0x0010100010001010ULL, 0x0078780078007878ULL, 0x00d8d800d800d8d8ULL, + 0x0042420042004242ULL, 0x00cccc00cc00ccccULL, 0x00efef00ef00efefULL, + 0x0026260026002626ULL, 0x00e5e500e500e5e5ULL, 0x0061610061006161ULL, + 0x001a1a001a001a1aULL, 0x003f3f003f003f3fULL, 0x003b3b003b003b3bULL, + 0x0082820082008282ULL, 0x00b6b600b600b6b6ULL, 0x00dbdb00db00dbdbULL, + 0x00d4d400d400d4d4ULL, 0x0098980098009898ULL, 0x00e8e800e800e8e8ULL, + 0x008b8b008b008b8bULL, 0x0002020002000202ULL, 0x00ebeb00eb00ebebULL, + 0x000a0a000a000a0aULL, 0x002c2c002c002c2cULL, 0x001d1d001d001d1dULL, + 0x00b0b000b000b0b0ULL, 0x006f6f006f006f6fULL, 0x008d8d008d008d8dULL, + 0x0088880088008888ULL, 0x000e0e000e000e0eULL, 0x0019190019001919ULL, + 0x0087870087008787ULL, 0x004e4e004e004e4eULL, 0x000b0b000b000b0bULL, + 0x00a9a900a900a9a9ULL, 0x000c0c000c000c0cULL, 0x0079790079007979ULL, + 0x0011110011001111ULL, 0x007f7f007f007f7fULL, 0x0022220022002222ULL, + 0x00e7e700e700e7e7ULL, 0x0059590059005959ULL, 0x00e1e100e100e1e1ULL, + 0x00dada00da00dadaULL, 0x003d3d003d003d3dULL, 0x00c8c800c800c8c8ULL, + 0x0012120012001212ULL, 0x0004040004000404ULL, 0x0074740074007474ULL, + 0x0054540054005454ULL, 0x0030300030003030ULL, 0x007e7e007e007e7eULL, + 0x00b4b400b400b4b4ULL, 0x0028280028002828ULL, 0x0055550055005555ULL, + 0x0068680068006868ULL, 0x0050500050005050ULL, 0x00bebe00be00bebeULL, + 0x00d0d000d000d0d0ULL, 0x00c4c400c400c4c4ULL, 0x0031310031003131ULL, + 0x00cbcb00cb00cbcbULL, 0x002a2a002a002a2aULL, 0x00adad00ad00adadULL, + 0x000f0f000f000f0fULL, 0x00caca00ca00cacaULL, 0x0070700070007070ULL, + 0x00ffff00ff00ffffULL, 0x0032320032003232ULL, 0x0069690069006969ULL, + 0x0008080008000808ULL, 0x0062620062006262ULL, 0x0000000000000000ULL, + 0x0024240024002424ULL, 0x00d1d100d100d1d1ULL, 0x00fbfb00fb00fbfbULL, + 0x00baba00ba00babaULL, 0x00eded00ed00ededULL, 0x0045450045004545ULL, + 0x0081810081008181ULL, 0x0073730073007373ULL, 0x006d6d006d006d6dULL, + 0x0084840084008484ULL, 0x009f9f009f009f9fULL, 0x00eeee00ee00eeeeULL, + 0x004a4a004a004a4aULL, 0x00c3c300c300c3c3ULL, 0x002e2e002e002e2eULL, + 0x00c1c100c100c1c1ULL, 0x0001010001000101ULL, 0x00e6e600e600e6e6ULL, + 0x0025250025002525ULL, 0x0048480048004848ULL, 0x0099990099009999ULL, + 0x00b9b900b900b9b9ULL, 0x00b3b300b300b3b3ULL, 0x007b7b007b007b7bULL, + 0x00f9f900f900f9f9ULL, 0x00cece00ce00ceceULL, 0x00bfbf00bf00bfbfULL, + 0x00dfdf00df00dfdfULL, 0x0071710071007171ULL, 0x0029290029002929ULL, + 0x00cdcd00cd00cdcdULL, 0x006c6c006c006c6cULL, 0x0013130013001313ULL, + 0x0064640064006464ULL, 0x009b9b009b009b9bULL, 0x0063630063006363ULL, + 0x009d9d009d009d9dULL, 0x00c0c000c000c0c0ULL, 0x004b4b004b004b4bULL, + 0x00b7b700b700b7b7ULL, 0x00a5a500a500a5a5ULL, 0x0089890089008989ULL, + 0x005f5f005f005f5fULL, 0x00b1b100b100b1b1ULL, 0x0017170017001717ULL, + 0x00f4f400f400f4f4ULL, 0x00bcbc00bc00bcbcULL, 0x00d3d300d300d3d3ULL, + 0x0046460046004646ULL, 0x00cfcf00cf00cfcfULL, 0x0037370037003737ULL, + 0x005e5e005e005e5eULL, 0x0047470047004747ULL, 0x0094940094009494ULL, + 0x00fafa00fa00fafaULL, 0x00fcfc00fc00fcfcULL, 0x005b5b005b005b5bULL, + 0x0097970097009797ULL, 0x00fefe00fe00fefeULL, 0x005a5a005a005a5aULL, + 0x00acac00ac00acacULL, 0x003c3c003c003c3cULL, 0x004c4c004c004c4cULL, + 0x0003030003000303ULL, 0x0035350035003535ULL, 0x00f3f300f300f3f3ULL, + 0x0023230023002323ULL, 0x00b8b800b800b8b8ULL, 0x005d5d005d005d5dULL, + 0x006a6a006a006a6aULL, 0x0092920092009292ULL, 0x00d5d500d500d5d5ULL, + 0x0021210021002121ULL, 0x0044440044004444ULL, 0x0051510051005151ULL, + 0x00c6c600c600c6c6ULL, 0x007d7d007d007d7dULL, 0x0039390039003939ULL, + 0x0083830083008383ULL, 0x00dcdc00dc00dcdcULL, 0x00aaaa00aa00aaaaULL, + 0x007c7c007c007c7cULL, 0x0077770077007777ULL, 0x0056560056005656ULL, + 0x0005050005000505ULL, 0x001b1b001b001b1bULL, 0x00a4a400a400a4a4ULL, + 0x0015150015001515ULL, 0x0034340034003434ULL, 0x001e1e001e001e1eULL, + 0x001c1c001c001c1cULL, 0x00f8f800f800f8f8ULL, 0x0052520052005252ULL, + 0x0020200020002020ULL, 0x0014140014001414ULL, 0x00e9e900e900e9e9ULL, + 0x00bdbd00bd00bdbdULL, 0x00dddd00dd00ddddULL, 0x00e4e400e400e4e4ULL, + 0x00a1a100a100a1a1ULL, 0x00e0e000e000e0e0ULL, 0x008a8a008a008a8aULL, + 0x00f1f100f100f1f1ULL, 0x00d6d600d600d6d6ULL, 0x007a7a007a007a7aULL, + 0x00bbbb00bb00bbbbULL, 0x00e3e300e300e3e3ULL, 0x0040400040004040ULL, + 0x004f4f004f004f4fULL, }; const u64 camellia_sp00444404[256] = { - 0x0000707070700070, 0x00002c2c2c2c002c, 0x0000b3b3b3b300b3, - 0x0000c0c0c0c000c0, 0x0000e4e4e4e400e4, 0x0000575757570057, - 0x0000eaeaeaea00ea, 0x0000aeaeaeae00ae, 0x0000232323230023, - 0x00006b6b6b6b006b, 0x0000454545450045, 0x0000a5a5a5a500a5, - 0x0000edededed00ed, 0x00004f4f4f4f004f, 0x00001d1d1d1d001d, - 0x0000929292920092, 0x0000868686860086, 0x0000afafafaf00af, - 0x00007c7c7c7c007c, 0x00001f1f1f1f001f, 0x00003e3e3e3e003e, - 0x0000dcdcdcdc00dc, 0x00005e5e5e5e005e, 0x00000b0b0b0b000b, - 0x0000a6a6a6a600a6, 0x0000393939390039, 0x0000d5d5d5d500d5, - 0x00005d5d5d5d005d, 0x0000d9d9d9d900d9, 0x00005a5a5a5a005a, - 0x0000515151510051, 0x00006c6c6c6c006c, 0x00008b8b8b8b008b, - 0x00009a9a9a9a009a, 0x0000fbfbfbfb00fb, 0x0000b0b0b0b000b0, - 0x0000747474740074, 0x00002b2b2b2b002b, 0x0000f0f0f0f000f0, - 0x0000848484840084, 0x0000dfdfdfdf00df, 0x0000cbcbcbcb00cb, - 0x0000343434340034, 0x0000767676760076, 0x00006d6d6d6d006d, - 0x0000a9a9a9a900a9, 0x0000d1d1d1d100d1, 0x0000040404040004, - 0x0000141414140014, 0x00003a3a3a3a003a, 0x0000dededede00de, - 0x0000111111110011, 0x0000323232320032, 0x00009c9c9c9c009c, - 0x0000535353530053, 0x0000f2f2f2f200f2, 0x0000fefefefe00fe, - 0x0000cfcfcfcf00cf, 0x0000c3c3c3c300c3, 0x00007a7a7a7a007a, - 0x0000242424240024, 0x0000e8e8e8e800e8, 0x0000606060600060, - 0x0000696969690069, 0x0000aaaaaaaa00aa, 0x0000a0a0a0a000a0, - 0x0000a1a1a1a100a1, 0x0000626262620062, 0x0000545454540054, - 0x00001e1e1e1e001e, 0x0000e0e0e0e000e0, 0x0000646464640064, - 0x0000101010100010, 0x0000000000000000, 0x0000a3a3a3a300a3, - 0x0000757575750075, 0x00008a8a8a8a008a, 0x0000e6e6e6e600e6, - 0x0000090909090009, 0x0000dddddddd00dd, 0x0000878787870087, - 0x0000838383830083, 0x0000cdcdcdcd00cd, 0x0000909090900090, - 0x0000737373730073, 0x0000f6f6f6f600f6, 0x00009d9d9d9d009d, - 0x0000bfbfbfbf00bf, 0x0000525252520052, 0x0000d8d8d8d800d8, - 0x0000c8c8c8c800c8, 0x0000c6c6c6c600c6, 0x0000818181810081, - 0x00006f6f6f6f006f, 0x0000131313130013, 0x0000636363630063, - 0x0000e9e9e9e900e9, 0x0000a7a7a7a700a7, 0x00009f9f9f9f009f, - 0x0000bcbcbcbc00bc, 0x0000292929290029, 0x0000f9f9f9f900f9, - 0x00002f2f2f2f002f, 0x0000b4b4b4b400b4, 0x0000787878780078, - 0x0000060606060006, 0x0000e7e7e7e700e7, 0x0000717171710071, - 0x0000d4d4d4d400d4, 0x0000abababab00ab, 0x0000888888880088, - 0x00008d8d8d8d008d, 0x0000727272720072, 0x0000b9b9b9b900b9, - 0x0000f8f8f8f800f8, 0x0000acacacac00ac, 0x0000363636360036, - 0x00002a2a2a2a002a, 0x00003c3c3c3c003c, 0x0000f1f1f1f100f1, - 0x0000404040400040, 0x0000d3d3d3d300d3, 0x0000bbbbbbbb00bb, - 0x0000434343430043, 0x0000151515150015, 0x0000adadadad00ad, - 0x0000777777770077, 0x0000808080800080, 0x0000828282820082, - 0x0000ecececec00ec, 0x0000272727270027, 0x0000e5e5e5e500e5, - 0x0000858585850085, 0x0000353535350035, 0x00000c0c0c0c000c, - 0x0000414141410041, 0x0000efefefef00ef, 0x0000939393930093, - 0x0000191919190019, 0x0000212121210021, 0x00000e0e0e0e000e, - 0x00004e4e4e4e004e, 0x0000656565650065, 0x0000bdbdbdbd00bd, - 0x0000b8b8b8b800b8, 0x00008f8f8f8f008f, 0x0000ebebebeb00eb, - 0x0000cececece00ce, 0x0000303030300030, 0x00005f5f5f5f005f, - 0x0000c5c5c5c500c5, 0x00001a1a1a1a001a, 0x0000e1e1e1e100e1, - 0x0000cacacaca00ca, 0x0000474747470047, 0x00003d3d3d3d003d, - 0x0000010101010001, 0x0000d6d6d6d600d6, 0x0000565656560056, - 0x00004d4d4d4d004d, 0x00000d0d0d0d000d, 0x0000666666660066, - 0x0000cccccccc00cc, 0x00002d2d2d2d002d, 0x0000121212120012, - 0x0000202020200020, 0x0000b1b1b1b100b1, 0x0000999999990099, - 0x00004c4c4c4c004c, 0x0000c2c2c2c200c2, 0x00007e7e7e7e007e, - 0x0000050505050005, 0x0000b7b7b7b700b7, 0x0000313131310031, - 0x0000171717170017, 0x0000d7d7d7d700d7, 0x0000585858580058, - 0x0000616161610061, 0x00001b1b1b1b001b, 0x00001c1c1c1c001c, - 0x00000f0f0f0f000f, 0x0000161616160016, 0x0000181818180018, - 0x0000222222220022, 0x0000444444440044, 0x0000b2b2b2b200b2, - 0x0000b5b5b5b500b5, 0x0000919191910091, 0x0000080808080008, - 0x0000a8a8a8a800a8, 0x0000fcfcfcfc00fc, 0x0000505050500050, - 0x0000d0d0d0d000d0, 0x00007d7d7d7d007d, 0x0000898989890089, - 0x0000979797970097, 0x00005b5b5b5b005b, 0x0000959595950095, - 0x0000ffffffff00ff, 0x0000d2d2d2d200d2, 0x0000c4c4c4c400c4, - 0x0000484848480048, 0x0000f7f7f7f700f7, 0x0000dbdbdbdb00db, - 0x0000030303030003, 0x0000dadadada00da, 0x00003f3f3f3f003f, - 0x0000949494940094, 0x00005c5c5c5c005c, 0x0000020202020002, - 0x00004a4a4a4a004a, 0x0000333333330033, 0x0000676767670067, - 0x0000f3f3f3f300f3, 0x00007f7f7f7f007f, 0x0000e2e2e2e200e2, - 0x00009b9b9b9b009b, 0x0000262626260026, 0x0000373737370037, - 0x00003b3b3b3b003b, 0x0000969696960096, 0x00004b4b4b4b004b, - 0x0000bebebebe00be, 0x00002e2e2e2e002e, 0x0000797979790079, - 0x00008c8c8c8c008c, 0x00006e6e6e6e006e, 0x00008e8e8e8e008e, - 0x0000f5f5f5f500f5, 0x0000b6b6b6b600b6, 0x0000fdfdfdfd00fd, - 0x0000595959590059, 0x0000989898980098, 0x00006a6a6a6a006a, - 0x0000464646460046, 0x0000babababa00ba, 0x0000252525250025, - 0x0000424242420042, 0x0000a2a2a2a200a2, 0x0000fafafafa00fa, - 0x0000070707070007, 0x0000555555550055, 0x0000eeeeeeee00ee, - 0x00000a0a0a0a000a, 0x0000494949490049, 0x0000686868680068, - 0x0000383838380038, 0x0000a4a4a4a400a4, 0x0000282828280028, - 0x00007b7b7b7b007b, 0x0000c9c9c9c900c9, 0x0000c1c1c1c100c1, - 0x0000e3e3e3e300e3, 0x0000f4f4f4f400f4, 0x0000c7c7c7c700c7, - 0x00009e9e9e9e009e, + 0x0000707070700070ULL, 0x00002c2c2c2c002cULL, 0x0000b3b3b3b300b3ULL, + 0x0000c0c0c0c000c0ULL, 0x0000e4e4e4e400e4ULL, 0x0000575757570057ULL, + 0x0000eaeaeaea00eaULL, 0x0000aeaeaeae00aeULL, 0x0000232323230023ULL, + 0x00006b6b6b6b006bULL, 0x0000454545450045ULL, 0x0000a5a5a5a500a5ULL, + 0x0000edededed00edULL, 0x00004f4f4f4f004fULL, 0x00001d1d1d1d001dULL, + 0x0000929292920092ULL, 0x0000868686860086ULL, 0x0000afafafaf00afULL, + 0x00007c7c7c7c007cULL, 0x00001f1f1f1f001fULL, 0x00003e3e3e3e003eULL, + 0x0000dcdcdcdc00dcULL, 0x00005e5e5e5e005eULL, 0x00000b0b0b0b000bULL, + 0x0000a6a6a6a600a6ULL, 0x0000393939390039ULL, 0x0000d5d5d5d500d5ULL, + 0x00005d5d5d5d005dULL, 0x0000d9d9d9d900d9ULL, 0x00005a5a5a5a005aULL, + 0x0000515151510051ULL, 0x00006c6c6c6c006cULL, 0x00008b8b8b8b008bULL, + 0x00009a9a9a9a009aULL, 0x0000fbfbfbfb00fbULL, 0x0000b0b0b0b000b0ULL, + 0x0000747474740074ULL, 0x00002b2b2b2b002bULL, 0x0000f0f0f0f000f0ULL, + 0x0000848484840084ULL, 0x0000dfdfdfdf00dfULL, 0x0000cbcbcbcb00cbULL, + 0x0000343434340034ULL, 0x0000767676760076ULL, 0x00006d6d6d6d006dULL, + 0x0000a9a9a9a900a9ULL, 0x0000d1d1d1d100d1ULL, 0x0000040404040004ULL, + 0x0000141414140014ULL, 0x00003a3a3a3a003aULL, 0x0000dededede00deULL, + 0x0000111111110011ULL, 0x0000323232320032ULL, 0x00009c9c9c9c009cULL, + 0x0000535353530053ULL, 0x0000f2f2f2f200f2ULL, 0x0000fefefefe00feULL, + 0x0000cfcfcfcf00cfULL, 0x0000c3c3c3c300c3ULL, 0x00007a7a7a7a007aULL, + 0x0000242424240024ULL, 0x0000e8e8e8e800e8ULL, 0x0000606060600060ULL, + 0x0000696969690069ULL, 0x0000aaaaaaaa00aaULL, 0x0000a0a0a0a000a0ULL, + 0x0000a1a1a1a100a1ULL, 0x0000626262620062ULL, 0x0000545454540054ULL, + 0x00001e1e1e1e001eULL, 0x0000e0e0e0e000e0ULL, 0x0000646464640064ULL, + 0x0000101010100010ULL, 0x0000000000000000ULL, 0x0000a3a3a3a300a3ULL, + 0x0000757575750075ULL, 0x00008a8a8a8a008aULL, 0x0000e6e6e6e600e6ULL, + 0x0000090909090009ULL, 0x0000dddddddd00ddULL, 0x0000878787870087ULL, + 0x0000838383830083ULL, 0x0000cdcdcdcd00cdULL, 0x0000909090900090ULL, + 0x0000737373730073ULL, 0x0000f6f6f6f600f6ULL, 0x00009d9d9d9d009dULL, + 0x0000bfbfbfbf00bfULL, 0x0000525252520052ULL, 0x0000d8d8d8d800d8ULL, + 0x0000c8c8c8c800c8ULL, 0x0000c6c6c6c600c6ULL, 0x0000818181810081ULL, + 0x00006f6f6f6f006fULL, 0x0000131313130013ULL, 0x0000636363630063ULL, + 0x0000e9e9e9e900e9ULL, 0x0000a7a7a7a700a7ULL, 0x00009f9f9f9f009fULL, + 0x0000bcbcbcbc00bcULL, 0x0000292929290029ULL, 0x0000f9f9f9f900f9ULL, + 0x00002f2f2f2f002fULL, 0x0000b4b4b4b400b4ULL, 0x0000787878780078ULL, + 0x0000060606060006ULL, 0x0000e7e7e7e700e7ULL, 0x0000717171710071ULL, + 0x0000d4d4d4d400d4ULL, 0x0000abababab00abULL, 0x0000888888880088ULL, + 0x00008d8d8d8d008dULL, 0x0000727272720072ULL, 0x0000b9b9b9b900b9ULL, + 0x0000f8f8f8f800f8ULL, 0x0000acacacac00acULL, 0x0000363636360036ULL, + 0x00002a2a2a2a002aULL, 0x00003c3c3c3c003cULL, 0x0000f1f1f1f100f1ULL, + 0x0000404040400040ULL, 0x0000d3d3d3d300d3ULL, 0x0000bbbbbbbb00bbULL, + 0x0000434343430043ULL, 0x0000151515150015ULL, 0x0000adadadad00adULL, + 0x0000777777770077ULL, 0x0000808080800080ULL, 0x0000828282820082ULL, + 0x0000ecececec00ecULL, 0x0000272727270027ULL, 0x0000e5e5e5e500e5ULL, + 0x0000858585850085ULL, 0x0000353535350035ULL, 0x00000c0c0c0c000cULL, + 0x0000414141410041ULL, 0x0000efefefef00efULL, 0x0000939393930093ULL, + 0x0000191919190019ULL, 0x0000212121210021ULL, 0x00000e0e0e0e000eULL, + 0x00004e4e4e4e004eULL, 0x0000656565650065ULL, 0x0000bdbdbdbd00bdULL, + 0x0000b8b8b8b800b8ULL, 0x00008f8f8f8f008fULL, 0x0000ebebebeb00ebULL, + 0x0000cececece00ceULL, 0x0000303030300030ULL, 0x00005f5f5f5f005fULL, + 0x0000c5c5c5c500c5ULL, 0x00001a1a1a1a001aULL, 0x0000e1e1e1e100e1ULL, + 0x0000cacacaca00caULL, 0x0000474747470047ULL, 0x00003d3d3d3d003dULL, + 0x0000010101010001ULL, 0x0000d6d6d6d600d6ULL, 0x0000565656560056ULL, + 0x00004d4d4d4d004dULL, 0x00000d0d0d0d000dULL, 0x0000666666660066ULL, + 0x0000cccccccc00ccULL, 0x00002d2d2d2d002dULL, 0x0000121212120012ULL, + 0x0000202020200020ULL, 0x0000b1b1b1b100b1ULL, 0x0000999999990099ULL, + 0x00004c4c4c4c004cULL, 0x0000c2c2c2c200c2ULL, 0x00007e7e7e7e007eULL, + 0x0000050505050005ULL, 0x0000b7b7b7b700b7ULL, 0x0000313131310031ULL, + 0x0000171717170017ULL, 0x0000d7d7d7d700d7ULL, 0x0000585858580058ULL, + 0x0000616161610061ULL, 0x00001b1b1b1b001bULL, 0x00001c1c1c1c001cULL, + 0x00000f0f0f0f000fULL, 0x0000161616160016ULL, 0x0000181818180018ULL, + 0x0000222222220022ULL, 0x0000444444440044ULL, 0x0000b2b2b2b200b2ULL, + 0x0000b5b5b5b500b5ULL, 0x0000919191910091ULL, 0x0000080808080008ULL, + 0x0000a8a8a8a800a8ULL, 0x0000fcfcfcfc00fcULL, 0x0000505050500050ULL, + 0x0000d0d0d0d000d0ULL, 0x00007d7d7d7d007dULL, 0x0000898989890089ULL, + 0x0000979797970097ULL, 0x00005b5b5b5b005bULL, 0x0000959595950095ULL, + 0x0000ffffffff00ffULL, 0x0000d2d2d2d200d2ULL, 0x0000c4c4c4c400c4ULL, + 0x0000484848480048ULL, 0x0000f7f7f7f700f7ULL, 0x0000dbdbdbdb00dbULL, + 0x0000030303030003ULL, 0x0000dadadada00daULL, 0x00003f3f3f3f003fULL, + 0x0000949494940094ULL, 0x00005c5c5c5c005cULL, 0x0000020202020002ULL, + 0x00004a4a4a4a004aULL, 0x0000333333330033ULL, 0x0000676767670067ULL, + 0x0000f3f3f3f300f3ULL, 0x00007f7f7f7f007fULL, 0x0000e2e2e2e200e2ULL, + 0x00009b9b9b9b009bULL, 0x0000262626260026ULL, 0x0000373737370037ULL, + 0x00003b3b3b3b003bULL, 0x0000969696960096ULL, 0x00004b4b4b4b004bULL, + 0x0000bebebebe00beULL, 0x00002e2e2e2e002eULL, 0x0000797979790079ULL, + 0x00008c8c8c8c008cULL, 0x00006e6e6e6e006eULL, 0x00008e8e8e8e008eULL, + 0x0000f5f5f5f500f5ULL, 0x0000b6b6b6b600b6ULL, 0x0000fdfdfdfd00fdULL, + 0x0000595959590059ULL, 0x0000989898980098ULL, 0x00006a6a6a6a006aULL, + 0x0000464646460046ULL, 0x0000babababa00baULL, 0x0000252525250025ULL, + 0x0000424242420042ULL, 0x0000a2a2a2a200a2ULL, 0x0000fafafafa00faULL, + 0x0000070707070007ULL, 0x0000555555550055ULL, 0x0000eeeeeeee00eeULL, + 0x00000a0a0a0a000aULL, 0x0000494949490049ULL, 0x0000686868680068ULL, + 0x0000383838380038ULL, 0x0000a4a4a4a400a4ULL, 0x0000282828280028ULL, + 0x00007b7b7b7b007bULL, 0x0000c9c9c9c900c9ULL, 0x0000c1c1c1c100c1ULL, + 0x0000e3e3e3e300e3ULL, 0x0000f4f4f4f400f4ULL, 0x0000c7c7c7c700c7ULL, + 0x00009e9e9e9e009eULL, }; const u64 camellia_sp02220222[256] = { - 0x00e0e0e000e0e0e0, 0x0005050500050505, 0x0058585800585858, - 0x00d9d9d900d9d9d9, 0x0067676700676767, 0x004e4e4e004e4e4e, - 0x0081818100818181, 0x00cbcbcb00cbcbcb, 0x00c9c9c900c9c9c9, - 0x000b0b0b000b0b0b, 0x00aeaeae00aeaeae, 0x006a6a6a006a6a6a, - 0x00d5d5d500d5d5d5, 0x0018181800181818, 0x005d5d5d005d5d5d, - 0x0082828200828282, 0x0046464600464646, 0x00dfdfdf00dfdfdf, - 0x00d6d6d600d6d6d6, 0x0027272700272727, 0x008a8a8a008a8a8a, - 0x0032323200323232, 0x004b4b4b004b4b4b, 0x0042424200424242, - 0x00dbdbdb00dbdbdb, 0x001c1c1c001c1c1c, 0x009e9e9e009e9e9e, - 0x009c9c9c009c9c9c, 0x003a3a3a003a3a3a, 0x00cacaca00cacaca, - 0x0025252500252525, 0x007b7b7b007b7b7b, 0x000d0d0d000d0d0d, - 0x0071717100717171, 0x005f5f5f005f5f5f, 0x001f1f1f001f1f1f, - 0x00f8f8f800f8f8f8, 0x00d7d7d700d7d7d7, 0x003e3e3e003e3e3e, - 0x009d9d9d009d9d9d, 0x007c7c7c007c7c7c, 0x0060606000606060, - 0x00b9b9b900b9b9b9, 0x00bebebe00bebebe, 0x00bcbcbc00bcbcbc, - 0x008b8b8b008b8b8b, 0x0016161600161616, 0x0034343400343434, - 0x004d4d4d004d4d4d, 0x00c3c3c300c3c3c3, 0x0072727200727272, - 0x0095959500959595, 0x00ababab00ababab, 0x008e8e8e008e8e8e, - 0x00bababa00bababa, 0x007a7a7a007a7a7a, 0x00b3b3b300b3b3b3, - 0x0002020200020202, 0x00b4b4b400b4b4b4, 0x00adadad00adadad, - 0x00a2a2a200a2a2a2, 0x00acacac00acacac, 0x00d8d8d800d8d8d8, - 0x009a9a9a009a9a9a, 0x0017171700171717, 0x001a1a1a001a1a1a, - 0x0035353500353535, 0x00cccccc00cccccc, 0x00f7f7f700f7f7f7, - 0x0099999900999999, 0x0061616100616161, 0x005a5a5a005a5a5a, - 0x00e8e8e800e8e8e8, 0x0024242400242424, 0x0056565600565656, - 0x0040404000404040, 0x00e1e1e100e1e1e1, 0x0063636300636363, - 0x0009090900090909, 0x0033333300333333, 0x00bfbfbf00bfbfbf, - 0x0098989800989898, 0x0097979700979797, 0x0085858500858585, - 0x0068686800686868, 0x00fcfcfc00fcfcfc, 0x00ececec00ececec, - 0x000a0a0a000a0a0a, 0x00dadada00dadada, 0x006f6f6f006f6f6f, - 0x0053535300535353, 0x0062626200626262, 0x00a3a3a300a3a3a3, - 0x002e2e2e002e2e2e, 0x0008080800080808, 0x00afafaf00afafaf, - 0x0028282800282828, 0x00b0b0b000b0b0b0, 0x0074747400747474, - 0x00c2c2c200c2c2c2, 0x00bdbdbd00bdbdbd, 0x0036363600363636, - 0x0022222200222222, 0x0038383800383838, 0x0064646400646464, - 0x001e1e1e001e1e1e, 0x0039393900393939, 0x002c2c2c002c2c2c, - 0x00a6a6a600a6a6a6, 0x0030303000303030, 0x00e5e5e500e5e5e5, - 0x0044444400444444, 0x00fdfdfd00fdfdfd, 0x0088888800888888, - 0x009f9f9f009f9f9f, 0x0065656500656565, 0x0087878700878787, - 0x006b6b6b006b6b6b, 0x00f4f4f400f4f4f4, 0x0023232300232323, - 0x0048484800484848, 0x0010101000101010, 0x00d1d1d100d1d1d1, - 0x0051515100515151, 0x00c0c0c000c0c0c0, 0x00f9f9f900f9f9f9, - 0x00d2d2d200d2d2d2, 0x00a0a0a000a0a0a0, 0x0055555500555555, - 0x00a1a1a100a1a1a1, 0x0041414100414141, 0x00fafafa00fafafa, - 0x0043434300434343, 0x0013131300131313, 0x00c4c4c400c4c4c4, - 0x002f2f2f002f2f2f, 0x00a8a8a800a8a8a8, 0x00b6b6b600b6b6b6, - 0x003c3c3c003c3c3c, 0x002b2b2b002b2b2b, 0x00c1c1c100c1c1c1, - 0x00ffffff00ffffff, 0x00c8c8c800c8c8c8, 0x00a5a5a500a5a5a5, - 0x0020202000202020, 0x0089898900898989, 0x0000000000000000, - 0x0090909000909090, 0x0047474700474747, 0x00efefef00efefef, - 0x00eaeaea00eaeaea, 0x00b7b7b700b7b7b7, 0x0015151500151515, - 0x0006060600060606, 0x00cdcdcd00cdcdcd, 0x00b5b5b500b5b5b5, - 0x0012121200121212, 0x007e7e7e007e7e7e, 0x00bbbbbb00bbbbbb, - 0x0029292900292929, 0x000f0f0f000f0f0f, 0x00b8b8b800b8b8b8, - 0x0007070700070707, 0x0004040400040404, 0x009b9b9b009b9b9b, - 0x0094949400949494, 0x0021212100212121, 0x0066666600666666, - 0x00e6e6e600e6e6e6, 0x00cecece00cecece, 0x00ededed00ededed, - 0x00e7e7e700e7e7e7, 0x003b3b3b003b3b3b, 0x00fefefe00fefefe, - 0x007f7f7f007f7f7f, 0x00c5c5c500c5c5c5, 0x00a4a4a400a4a4a4, - 0x0037373700373737, 0x00b1b1b100b1b1b1, 0x004c4c4c004c4c4c, - 0x0091919100919191, 0x006e6e6e006e6e6e, 0x008d8d8d008d8d8d, - 0x0076767600767676, 0x0003030300030303, 0x002d2d2d002d2d2d, - 0x00dedede00dedede, 0x0096969600969696, 0x0026262600262626, - 0x007d7d7d007d7d7d, 0x00c6c6c600c6c6c6, 0x005c5c5c005c5c5c, - 0x00d3d3d300d3d3d3, 0x00f2f2f200f2f2f2, 0x004f4f4f004f4f4f, - 0x0019191900191919, 0x003f3f3f003f3f3f, 0x00dcdcdc00dcdcdc, - 0x0079797900797979, 0x001d1d1d001d1d1d, 0x0052525200525252, - 0x00ebebeb00ebebeb, 0x00f3f3f300f3f3f3, 0x006d6d6d006d6d6d, - 0x005e5e5e005e5e5e, 0x00fbfbfb00fbfbfb, 0x0069696900696969, - 0x00b2b2b200b2b2b2, 0x00f0f0f000f0f0f0, 0x0031313100313131, - 0x000c0c0c000c0c0c, 0x00d4d4d400d4d4d4, 0x00cfcfcf00cfcfcf, - 0x008c8c8c008c8c8c, 0x00e2e2e200e2e2e2, 0x0075757500757575, - 0x00a9a9a900a9a9a9, 0x004a4a4a004a4a4a, 0x0057575700575757, - 0x0084848400848484, 0x0011111100111111, 0x0045454500454545, - 0x001b1b1b001b1b1b, 0x00f5f5f500f5f5f5, 0x00e4e4e400e4e4e4, - 0x000e0e0e000e0e0e, 0x0073737300737373, 0x00aaaaaa00aaaaaa, - 0x00f1f1f100f1f1f1, 0x00dddddd00dddddd, 0x0059595900595959, - 0x0014141400141414, 0x006c6c6c006c6c6c, 0x0092929200929292, - 0x0054545400545454, 0x00d0d0d000d0d0d0, 0x0078787800787878, - 0x0070707000707070, 0x00e3e3e300e3e3e3, 0x0049494900494949, - 0x0080808000808080, 0x0050505000505050, 0x00a7a7a700a7a7a7, - 0x00f6f6f600f6f6f6, 0x0077777700777777, 0x0093939300939393, - 0x0086868600868686, 0x0083838300838383, 0x002a2a2a002a2a2a, - 0x00c7c7c700c7c7c7, 0x005b5b5b005b5b5b, 0x00e9e9e900e9e9e9, - 0x00eeeeee00eeeeee, 0x008f8f8f008f8f8f, 0x0001010100010101, - 0x003d3d3d003d3d3d, + 0x00e0e0e000e0e0e0ULL, 0x0005050500050505ULL, 0x0058585800585858ULL, + 0x00d9d9d900d9d9d9ULL, 0x0067676700676767ULL, 0x004e4e4e004e4e4eULL, + 0x0081818100818181ULL, 0x00cbcbcb00cbcbcbULL, 0x00c9c9c900c9c9c9ULL, + 0x000b0b0b000b0b0bULL, 0x00aeaeae00aeaeaeULL, 0x006a6a6a006a6a6aULL, + 0x00d5d5d500d5d5d5ULL, 0x0018181800181818ULL, 0x005d5d5d005d5d5dULL, + 0x0082828200828282ULL, 0x0046464600464646ULL, 0x00dfdfdf00dfdfdfULL, + 0x00d6d6d600d6d6d6ULL, 0x0027272700272727ULL, 0x008a8a8a008a8a8aULL, + 0x0032323200323232ULL, 0x004b4b4b004b4b4bULL, 0x0042424200424242ULL, + 0x00dbdbdb00dbdbdbULL, 0x001c1c1c001c1c1cULL, 0x009e9e9e009e9e9eULL, + 0x009c9c9c009c9c9cULL, 0x003a3a3a003a3a3aULL, 0x00cacaca00cacacaULL, + 0x0025252500252525ULL, 0x007b7b7b007b7b7bULL, 0x000d0d0d000d0d0dULL, + 0x0071717100717171ULL, 0x005f5f5f005f5f5fULL, 0x001f1f1f001f1f1fULL, + 0x00f8f8f800f8f8f8ULL, 0x00d7d7d700d7d7d7ULL, 0x003e3e3e003e3e3eULL, + 0x009d9d9d009d9d9dULL, 0x007c7c7c007c7c7cULL, 0x0060606000606060ULL, + 0x00b9b9b900b9b9b9ULL, 0x00bebebe00bebebeULL, 0x00bcbcbc00bcbcbcULL, + 0x008b8b8b008b8b8bULL, 0x0016161600161616ULL, 0x0034343400343434ULL, + 0x004d4d4d004d4d4dULL, 0x00c3c3c300c3c3c3ULL, 0x0072727200727272ULL, + 0x0095959500959595ULL, 0x00ababab00abababULL, 0x008e8e8e008e8e8eULL, + 0x00bababa00bababaULL, 0x007a7a7a007a7a7aULL, 0x00b3b3b300b3b3b3ULL, + 0x0002020200020202ULL, 0x00b4b4b400b4b4b4ULL, 0x00adadad00adadadULL, + 0x00a2a2a200a2a2a2ULL, 0x00acacac00acacacULL, 0x00d8d8d800d8d8d8ULL, + 0x009a9a9a009a9a9aULL, 0x0017171700171717ULL, 0x001a1a1a001a1a1aULL, + 0x0035353500353535ULL, 0x00cccccc00ccccccULL, 0x00f7f7f700f7f7f7ULL, + 0x0099999900999999ULL, 0x0061616100616161ULL, 0x005a5a5a005a5a5aULL, + 0x00e8e8e800e8e8e8ULL, 0x0024242400242424ULL, 0x0056565600565656ULL, + 0x0040404000404040ULL, 0x00e1e1e100e1e1e1ULL, 0x0063636300636363ULL, + 0x0009090900090909ULL, 0x0033333300333333ULL, 0x00bfbfbf00bfbfbfULL, + 0x0098989800989898ULL, 0x0097979700979797ULL, 0x0085858500858585ULL, + 0x0068686800686868ULL, 0x00fcfcfc00fcfcfcULL, 0x00ececec00ecececULL, + 0x000a0a0a000a0a0aULL, 0x00dadada00dadadaULL, 0x006f6f6f006f6f6fULL, + 0x0053535300535353ULL, 0x0062626200626262ULL, 0x00a3a3a300a3a3a3ULL, + 0x002e2e2e002e2e2eULL, 0x0008080800080808ULL, 0x00afafaf00afafafULL, + 0x0028282800282828ULL, 0x00b0b0b000b0b0b0ULL, 0x0074747400747474ULL, + 0x00c2c2c200c2c2c2ULL, 0x00bdbdbd00bdbdbdULL, 0x0036363600363636ULL, + 0x0022222200222222ULL, 0x0038383800383838ULL, 0x0064646400646464ULL, + 0x001e1e1e001e1e1eULL, 0x0039393900393939ULL, 0x002c2c2c002c2c2cULL, + 0x00a6a6a600a6a6a6ULL, 0x0030303000303030ULL, 0x00e5e5e500e5e5e5ULL, + 0x0044444400444444ULL, 0x00fdfdfd00fdfdfdULL, 0x0088888800888888ULL, + 0x009f9f9f009f9f9fULL, 0x0065656500656565ULL, 0x0087878700878787ULL, + 0x006b6b6b006b6b6bULL, 0x00f4f4f400f4f4f4ULL, 0x0023232300232323ULL, + 0x0048484800484848ULL, 0x0010101000101010ULL, 0x00d1d1d100d1d1d1ULL, + 0x0051515100515151ULL, 0x00c0c0c000c0c0c0ULL, 0x00f9f9f900f9f9f9ULL, + 0x00d2d2d200d2d2d2ULL, 0x00a0a0a000a0a0a0ULL, 0x0055555500555555ULL, + 0x00a1a1a100a1a1a1ULL, 0x0041414100414141ULL, 0x00fafafa00fafafaULL, + 0x0043434300434343ULL, 0x0013131300131313ULL, 0x00c4c4c400c4c4c4ULL, + 0x002f2f2f002f2f2fULL, 0x00a8a8a800a8a8a8ULL, 0x00b6b6b600b6b6b6ULL, + 0x003c3c3c003c3c3cULL, 0x002b2b2b002b2b2bULL, 0x00c1c1c100c1c1c1ULL, + 0x00ffffff00ffffffULL, 0x00c8c8c800c8c8c8ULL, 0x00a5a5a500a5a5a5ULL, + 0x0020202000202020ULL, 0x0089898900898989ULL, 0x0000000000000000ULL, + 0x0090909000909090ULL, 0x0047474700474747ULL, 0x00efefef00efefefULL, + 0x00eaeaea00eaeaeaULL, 0x00b7b7b700b7b7b7ULL, 0x0015151500151515ULL, + 0x0006060600060606ULL, 0x00cdcdcd00cdcdcdULL, 0x00b5b5b500b5b5b5ULL, + 0x0012121200121212ULL, 0x007e7e7e007e7e7eULL, 0x00bbbbbb00bbbbbbULL, + 0x0029292900292929ULL, 0x000f0f0f000f0f0fULL, 0x00b8b8b800b8b8b8ULL, + 0x0007070700070707ULL, 0x0004040400040404ULL, 0x009b9b9b009b9b9bULL, + 0x0094949400949494ULL, 0x0021212100212121ULL, 0x0066666600666666ULL, + 0x00e6e6e600e6e6e6ULL, 0x00cecece00cececeULL, 0x00ededed00edededULL, + 0x00e7e7e700e7e7e7ULL, 0x003b3b3b003b3b3bULL, 0x00fefefe00fefefeULL, + 0x007f7f7f007f7f7fULL, 0x00c5c5c500c5c5c5ULL, 0x00a4a4a400a4a4a4ULL, + 0x0037373700373737ULL, 0x00b1b1b100b1b1b1ULL, 0x004c4c4c004c4c4cULL, + 0x0091919100919191ULL, 0x006e6e6e006e6e6eULL, 0x008d8d8d008d8d8dULL, + 0x0076767600767676ULL, 0x0003030300030303ULL, 0x002d2d2d002d2d2dULL, + 0x00dedede00dededeULL, 0x0096969600969696ULL, 0x0026262600262626ULL, + 0x007d7d7d007d7d7dULL, 0x00c6c6c600c6c6c6ULL, 0x005c5c5c005c5c5cULL, + 0x00d3d3d300d3d3d3ULL, 0x00f2f2f200f2f2f2ULL, 0x004f4f4f004f4f4fULL, + 0x0019191900191919ULL, 0x003f3f3f003f3f3fULL, 0x00dcdcdc00dcdcdcULL, + 0x0079797900797979ULL, 0x001d1d1d001d1d1dULL, 0x0052525200525252ULL, + 0x00ebebeb00ebebebULL, 0x00f3f3f300f3f3f3ULL, 0x006d6d6d006d6d6dULL, + 0x005e5e5e005e5e5eULL, 0x00fbfbfb00fbfbfbULL, 0x0069696900696969ULL, + 0x00b2b2b200b2b2b2ULL, 0x00f0f0f000f0f0f0ULL, 0x0031313100313131ULL, + 0x000c0c0c000c0c0cULL, 0x00d4d4d400d4d4d4ULL, 0x00cfcfcf00cfcfcfULL, + 0x008c8c8c008c8c8cULL, 0x00e2e2e200e2e2e2ULL, 0x0075757500757575ULL, + 0x00a9a9a900a9a9a9ULL, 0x004a4a4a004a4a4aULL, 0x0057575700575757ULL, + 0x0084848400848484ULL, 0x0011111100111111ULL, 0x0045454500454545ULL, + 0x001b1b1b001b1b1bULL, 0x00f5f5f500f5f5f5ULL, 0x00e4e4e400e4e4e4ULL, + 0x000e0e0e000e0e0eULL, 0x0073737300737373ULL, 0x00aaaaaa00aaaaaaULL, + 0x00f1f1f100f1f1f1ULL, 0x00dddddd00ddddddULL, 0x0059595900595959ULL, + 0x0014141400141414ULL, 0x006c6c6c006c6c6cULL, 0x0092929200929292ULL, + 0x0054545400545454ULL, 0x00d0d0d000d0d0d0ULL, 0x0078787800787878ULL, + 0x0070707000707070ULL, 0x00e3e3e300e3e3e3ULL, 0x0049494900494949ULL, + 0x0080808000808080ULL, 0x0050505000505050ULL, 0x00a7a7a700a7a7a7ULL, + 0x00f6f6f600f6f6f6ULL, 0x0077777700777777ULL, 0x0093939300939393ULL, + 0x0086868600868686ULL, 0x0083838300838383ULL, 0x002a2a2a002a2a2aULL, + 0x00c7c7c700c7c7c7ULL, 0x005b5b5b005b5b5bULL, 0x00e9e9e900e9e9e9ULL, + 0x00eeeeee00eeeeeeULL, 0x008f8f8f008f8f8fULL, 0x0001010100010101ULL, + 0x003d3d3d003d3d3dULL, }; const u64 camellia_sp30333033[256] = { - 0x3800383838003838, 0x4100414141004141, 0x1600161616001616, - 0x7600767676007676, 0xd900d9d9d900d9d9, 0x9300939393009393, - 0x6000606060006060, 0xf200f2f2f200f2f2, 0x7200727272007272, - 0xc200c2c2c200c2c2, 0xab00ababab00abab, 0x9a009a9a9a009a9a, - 0x7500757575007575, 0x0600060606000606, 0x5700575757005757, - 0xa000a0a0a000a0a0, 0x9100919191009191, 0xf700f7f7f700f7f7, - 0xb500b5b5b500b5b5, 0xc900c9c9c900c9c9, 0xa200a2a2a200a2a2, - 0x8c008c8c8c008c8c, 0xd200d2d2d200d2d2, 0x9000909090009090, - 0xf600f6f6f600f6f6, 0x0700070707000707, 0xa700a7a7a700a7a7, - 0x2700272727002727, 0x8e008e8e8e008e8e, 0xb200b2b2b200b2b2, - 0x4900494949004949, 0xde00dedede00dede, 0x4300434343004343, - 0x5c005c5c5c005c5c, 0xd700d7d7d700d7d7, 0xc700c7c7c700c7c7, - 0x3e003e3e3e003e3e, 0xf500f5f5f500f5f5, 0x8f008f8f8f008f8f, - 0x6700676767006767, 0x1f001f1f1f001f1f, 0x1800181818001818, - 0x6e006e6e6e006e6e, 0xaf00afafaf00afaf, 0x2f002f2f2f002f2f, - 0xe200e2e2e200e2e2, 0x8500858585008585, 0x0d000d0d0d000d0d, - 0x5300535353005353, 0xf000f0f0f000f0f0, 0x9c009c9c9c009c9c, - 0x6500656565006565, 0xea00eaeaea00eaea, 0xa300a3a3a300a3a3, - 0xae00aeaeae00aeae, 0x9e009e9e9e009e9e, 0xec00ececec00ecec, - 0x8000808080008080, 0x2d002d2d2d002d2d, 0x6b006b6b6b006b6b, - 0xa800a8a8a800a8a8, 0x2b002b2b2b002b2b, 0x3600363636003636, - 0xa600a6a6a600a6a6, 0xc500c5c5c500c5c5, 0x8600868686008686, - 0x4d004d4d4d004d4d, 0x3300333333003333, 0xfd00fdfdfd00fdfd, - 0x6600666666006666, 0x5800585858005858, 0x9600969696009696, - 0x3a003a3a3a003a3a, 0x0900090909000909, 0x9500959595009595, - 0x1000101010001010, 0x7800787878007878, 0xd800d8d8d800d8d8, - 0x4200424242004242, 0xcc00cccccc00cccc, 0xef00efefef00efef, - 0x2600262626002626, 0xe500e5e5e500e5e5, 0x6100616161006161, - 0x1a001a1a1a001a1a, 0x3f003f3f3f003f3f, 0x3b003b3b3b003b3b, - 0x8200828282008282, 0xb600b6b6b600b6b6, 0xdb00dbdbdb00dbdb, - 0xd400d4d4d400d4d4, 0x9800989898009898, 0xe800e8e8e800e8e8, - 0x8b008b8b8b008b8b, 0x0200020202000202, 0xeb00ebebeb00ebeb, - 0x0a000a0a0a000a0a, 0x2c002c2c2c002c2c, 0x1d001d1d1d001d1d, - 0xb000b0b0b000b0b0, 0x6f006f6f6f006f6f, 0x8d008d8d8d008d8d, - 0x8800888888008888, 0x0e000e0e0e000e0e, 0x1900191919001919, - 0x8700878787008787, 0x4e004e4e4e004e4e, 0x0b000b0b0b000b0b, - 0xa900a9a9a900a9a9, 0x0c000c0c0c000c0c, 0x7900797979007979, - 0x1100111111001111, 0x7f007f7f7f007f7f, 0x2200222222002222, - 0xe700e7e7e700e7e7, 0x5900595959005959, 0xe100e1e1e100e1e1, - 0xda00dadada00dada, 0x3d003d3d3d003d3d, 0xc800c8c8c800c8c8, - 0x1200121212001212, 0x0400040404000404, 0x7400747474007474, - 0x5400545454005454, 0x3000303030003030, 0x7e007e7e7e007e7e, - 0xb400b4b4b400b4b4, 0x2800282828002828, 0x5500555555005555, - 0x6800686868006868, 0x5000505050005050, 0xbe00bebebe00bebe, - 0xd000d0d0d000d0d0, 0xc400c4c4c400c4c4, 0x3100313131003131, - 0xcb00cbcbcb00cbcb, 0x2a002a2a2a002a2a, 0xad00adadad00adad, - 0x0f000f0f0f000f0f, 0xca00cacaca00caca, 0x7000707070007070, - 0xff00ffffff00ffff, 0x3200323232003232, 0x6900696969006969, - 0x0800080808000808, 0x6200626262006262, 0x0000000000000000, - 0x2400242424002424, 0xd100d1d1d100d1d1, 0xfb00fbfbfb00fbfb, - 0xba00bababa00baba, 0xed00ededed00eded, 0x4500454545004545, - 0x8100818181008181, 0x7300737373007373, 0x6d006d6d6d006d6d, - 0x8400848484008484, 0x9f009f9f9f009f9f, 0xee00eeeeee00eeee, - 0x4a004a4a4a004a4a, 0xc300c3c3c300c3c3, 0x2e002e2e2e002e2e, - 0xc100c1c1c100c1c1, 0x0100010101000101, 0xe600e6e6e600e6e6, - 0x2500252525002525, 0x4800484848004848, 0x9900999999009999, - 0xb900b9b9b900b9b9, 0xb300b3b3b300b3b3, 0x7b007b7b7b007b7b, - 0xf900f9f9f900f9f9, 0xce00cecece00cece, 0xbf00bfbfbf00bfbf, - 0xdf00dfdfdf00dfdf, 0x7100717171007171, 0x2900292929002929, - 0xcd00cdcdcd00cdcd, 0x6c006c6c6c006c6c, 0x1300131313001313, - 0x6400646464006464, 0x9b009b9b9b009b9b, 0x6300636363006363, - 0x9d009d9d9d009d9d, 0xc000c0c0c000c0c0, 0x4b004b4b4b004b4b, - 0xb700b7b7b700b7b7, 0xa500a5a5a500a5a5, 0x8900898989008989, - 0x5f005f5f5f005f5f, 0xb100b1b1b100b1b1, 0x1700171717001717, - 0xf400f4f4f400f4f4, 0xbc00bcbcbc00bcbc, 0xd300d3d3d300d3d3, - 0x4600464646004646, 0xcf00cfcfcf00cfcf, 0x3700373737003737, - 0x5e005e5e5e005e5e, 0x4700474747004747, 0x9400949494009494, - 0xfa00fafafa00fafa, 0xfc00fcfcfc00fcfc, 0x5b005b5b5b005b5b, - 0x9700979797009797, 0xfe00fefefe00fefe, 0x5a005a5a5a005a5a, - 0xac00acacac00acac, 0x3c003c3c3c003c3c, 0x4c004c4c4c004c4c, - 0x0300030303000303, 0x3500353535003535, 0xf300f3f3f300f3f3, - 0x2300232323002323, 0xb800b8b8b800b8b8, 0x5d005d5d5d005d5d, - 0x6a006a6a6a006a6a, 0x9200929292009292, 0xd500d5d5d500d5d5, - 0x2100212121002121, 0x4400444444004444, 0x5100515151005151, - 0xc600c6c6c600c6c6, 0x7d007d7d7d007d7d, 0x3900393939003939, - 0x8300838383008383, 0xdc00dcdcdc00dcdc, 0xaa00aaaaaa00aaaa, - 0x7c007c7c7c007c7c, 0x7700777777007777, 0x5600565656005656, - 0x0500050505000505, 0x1b001b1b1b001b1b, 0xa400a4a4a400a4a4, - 0x1500151515001515, 0x3400343434003434, 0x1e001e1e1e001e1e, - 0x1c001c1c1c001c1c, 0xf800f8f8f800f8f8, 0x5200525252005252, - 0x2000202020002020, 0x1400141414001414, 0xe900e9e9e900e9e9, - 0xbd00bdbdbd00bdbd, 0xdd00dddddd00dddd, 0xe400e4e4e400e4e4, - 0xa100a1a1a100a1a1, 0xe000e0e0e000e0e0, 0x8a008a8a8a008a8a, - 0xf100f1f1f100f1f1, 0xd600d6d6d600d6d6, 0x7a007a7a7a007a7a, - 0xbb00bbbbbb00bbbb, 0xe300e3e3e300e3e3, 0x4000404040004040, - 0x4f004f4f4f004f4f, + 0x3800383838003838ULL, 0x4100414141004141ULL, 0x1600161616001616ULL, + 0x7600767676007676ULL, 0xd900d9d9d900d9d9ULL, 0x9300939393009393ULL, + 0x6000606060006060ULL, 0xf200f2f2f200f2f2ULL, 0x7200727272007272ULL, + 0xc200c2c2c200c2c2ULL, 0xab00ababab00ababULL, 0x9a009a9a9a009a9aULL, + 0x7500757575007575ULL, 0x0600060606000606ULL, 0x5700575757005757ULL, + 0xa000a0a0a000a0a0ULL, 0x9100919191009191ULL, 0xf700f7f7f700f7f7ULL, + 0xb500b5b5b500b5b5ULL, 0xc900c9c9c900c9c9ULL, 0xa200a2a2a200a2a2ULL, + 0x8c008c8c8c008c8cULL, 0xd200d2d2d200d2d2ULL, 0x9000909090009090ULL, + 0xf600f6f6f600f6f6ULL, 0x0700070707000707ULL, 0xa700a7a7a700a7a7ULL, + 0x2700272727002727ULL, 0x8e008e8e8e008e8eULL, 0xb200b2b2b200b2b2ULL, + 0x4900494949004949ULL, 0xde00dedede00dedeULL, 0x4300434343004343ULL, + 0x5c005c5c5c005c5cULL, 0xd700d7d7d700d7d7ULL, 0xc700c7c7c700c7c7ULL, + 0x3e003e3e3e003e3eULL, 0xf500f5f5f500f5f5ULL, 0x8f008f8f8f008f8fULL, + 0x6700676767006767ULL, 0x1f001f1f1f001f1fULL, 0x1800181818001818ULL, + 0x6e006e6e6e006e6eULL, 0xaf00afafaf00afafULL, 0x2f002f2f2f002f2fULL, + 0xe200e2e2e200e2e2ULL, 0x8500858585008585ULL, 0x0d000d0d0d000d0dULL, + 0x5300535353005353ULL, 0xf000f0f0f000f0f0ULL, 0x9c009c9c9c009c9cULL, + 0x6500656565006565ULL, 0xea00eaeaea00eaeaULL, 0xa300a3a3a300a3a3ULL, + 0xae00aeaeae00aeaeULL, 0x9e009e9e9e009e9eULL, 0xec00ececec00ececULL, + 0x8000808080008080ULL, 0x2d002d2d2d002d2dULL, 0x6b006b6b6b006b6bULL, + 0xa800a8a8a800a8a8ULL, 0x2b002b2b2b002b2bULL, 0x3600363636003636ULL, + 0xa600a6a6a600a6a6ULL, 0xc500c5c5c500c5c5ULL, 0x8600868686008686ULL, + 0x4d004d4d4d004d4dULL, 0x3300333333003333ULL, 0xfd00fdfdfd00fdfdULL, + 0x6600666666006666ULL, 0x5800585858005858ULL, 0x9600969696009696ULL, + 0x3a003a3a3a003a3aULL, 0x0900090909000909ULL, 0x9500959595009595ULL, + 0x1000101010001010ULL, 0x7800787878007878ULL, 0xd800d8d8d800d8d8ULL, + 0x4200424242004242ULL, 0xcc00cccccc00ccccULL, 0xef00efefef00efefULL, + 0x2600262626002626ULL, 0xe500e5e5e500e5e5ULL, 0x6100616161006161ULL, + 0x1a001a1a1a001a1aULL, 0x3f003f3f3f003f3fULL, 0x3b003b3b3b003b3bULL, + 0x8200828282008282ULL, 0xb600b6b6b600b6b6ULL, 0xdb00dbdbdb00dbdbULL, + 0xd400d4d4d400d4d4ULL, 0x9800989898009898ULL, 0xe800e8e8e800e8e8ULL, + 0x8b008b8b8b008b8bULL, 0x0200020202000202ULL, 0xeb00ebebeb00ebebULL, + 0x0a000a0a0a000a0aULL, 0x2c002c2c2c002c2cULL, 0x1d001d1d1d001d1dULL, + 0xb000b0b0b000b0b0ULL, 0x6f006f6f6f006f6fULL, 0x8d008d8d8d008d8dULL, + 0x8800888888008888ULL, 0x0e000e0e0e000e0eULL, 0x1900191919001919ULL, + 0x8700878787008787ULL, 0x4e004e4e4e004e4eULL, 0x0b000b0b0b000b0bULL, + 0xa900a9a9a900a9a9ULL, 0x0c000c0c0c000c0cULL, 0x7900797979007979ULL, + 0x1100111111001111ULL, 0x7f007f7f7f007f7fULL, 0x2200222222002222ULL, + 0xe700e7e7e700e7e7ULL, 0x5900595959005959ULL, 0xe100e1e1e100e1e1ULL, + 0xda00dadada00dadaULL, 0x3d003d3d3d003d3dULL, 0xc800c8c8c800c8c8ULL, + 0x1200121212001212ULL, 0x0400040404000404ULL, 0x7400747474007474ULL, + 0x5400545454005454ULL, 0x3000303030003030ULL, 0x7e007e7e7e007e7eULL, + 0xb400b4b4b400b4b4ULL, 0x2800282828002828ULL, 0x5500555555005555ULL, + 0x6800686868006868ULL, 0x5000505050005050ULL, 0xbe00bebebe00bebeULL, + 0xd000d0d0d000d0d0ULL, 0xc400c4c4c400c4c4ULL, 0x3100313131003131ULL, + 0xcb00cbcbcb00cbcbULL, 0x2a002a2a2a002a2aULL, 0xad00adadad00adadULL, + 0x0f000f0f0f000f0fULL, 0xca00cacaca00cacaULL, 0x7000707070007070ULL, + 0xff00ffffff00ffffULL, 0x3200323232003232ULL, 0x6900696969006969ULL, + 0x0800080808000808ULL, 0x6200626262006262ULL, 0x0000000000000000ULL, + 0x2400242424002424ULL, 0xd100d1d1d100d1d1ULL, 0xfb00fbfbfb00fbfbULL, + 0xba00bababa00babaULL, 0xed00ededed00ededULL, 0x4500454545004545ULL, + 0x8100818181008181ULL, 0x7300737373007373ULL, 0x6d006d6d6d006d6dULL, + 0x8400848484008484ULL, 0x9f009f9f9f009f9fULL, 0xee00eeeeee00eeeeULL, + 0x4a004a4a4a004a4aULL, 0xc300c3c3c300c3c3ULL, 0x2e002e2e2e002e2eULL, + 0xc100c1c1c100c1c1ULL, 0x0100010101000101ULL, 0xe600e6e6e600e6e6ULL, + 0x2500252525002525ULL, 0x4800484848004848ULL, 0x9900999999009999ULL, + 0xb900b9b9b900b9b9ULL, 0xb300b3b3b300b3b3ULL, 0x7b007b7b7b007b7bULL, + 0xf900f9f9f900f9f9ULL, 0xce00cecece00ceceULL, 0xbf00bfbfbf00bfbfULL, + 0xdf00dfdfdf00dfdfULL, 0x7100717171007171ULL, 0x2900292929002929ULL, + 0xcd00cdcdcd00cdcdULL, 0x6c006c6c6c006c6cULL, 0x1300131313001313ULL, + 0x6400646464006464ULL, 0x9b009b9b9b009b9bULL, 0x6300636363006363ULL, + 0x9d009d9d9d009d9dULL, 0xc000c0c0c000c0c0ULL, 0x4b004b4b4b004b4bULL, + 0xb700b7b7b700b7b7ULL, 0xa500a5a5a500a5a5ULL, 0x8900898989008989ULL, + 0x5f005f5f5f005f5fULL, 0xb100b1b1b100b1b1ULL, 0x1700171717001717ULL, + 0xf400f4f4f400f4f4ULL, 0xbc00bcbcbc00bcbcULL, 0xd300d3d3d300d3d3ULL, + 0x4600464646004646ULL, 0xcf00cfcfcf00cfcfULL, 0x3700373737003737ULL, + 0x5e005e5e5e005e5eULL, 0x4700474747004747ULL, 0x9400949494009494ULL, + 0xfa00fafafa00fafaULL, 0xfc00fcfcfc00fcfcULL, 0x5b005b5b5b005b5bULL, + 0x9700979797009797ULL, 0xfe00fefefe00fefeULL, 0x5a005a5a5a005a5aULL, + 0xac00acacac00acacULL, 0x3c003c3c3c003c3cULL, 0x4c004c4c4c004c4cULL, + 0x0300030303000303ULL, 0x3500353535003535ULL, 0xf300f3f3f300f3f3ULL, + 0x2300232323002323ULL, 0xb800b8b8b800b8b8ULL, 0x5d005d5d5d005d5dULL, + 0x6a006a6a6a006a6aULL, 0x9200929292009292ULL, 0xd500d5d5d500d5d5ULL, + 0x2100212121002121ULL, 0x4400444444004444ULL, 0x5100515151005151ULL, + 0xc600c6c6c600c6c6ULL, 0x7d007d7d7d007d7dULL, 0x3900393939003939ULL, + 0x8300838383008383ULL, 0xdc00dcdcdc00dcdcULL, 0xaa00aaaaaa00aaaaULL, + 0x7c007c7c7c007c7cULL, 0x7700777777007777ULL, 0x5600565656005656ULL, + 0x0500050505000505ULL, 0x1b001b1b1b001b1bULL, 0xa400a4a4a400a4a4ULL, + 0x1500151515001515ULL, 0x3400343434003434ULL, 0x1e001e1e1e001e1eULL, + 0x1c001c1c1c001c1cULL, 0xf800f8f8f800f8f8ULL, 0x5200525252005252ULL, + 0x2000202020002020ULL, 0x1400141414001414ULL, 0xe900e9e9e900e9e9ULL, + 0xbd00bdbdbd00bdbdULL, 0xdd00dddddd00ddddULL, 0xe400e4e4e400e4e4ULL, + 0xa100a1a1a100a1a1ULL, 0xe000e0e0e000e0e0ULL, 0x8a008a8a8a008a8aULL, + 0xf100f1f1f100f1f1ULL, 0xd600d6d6d600d6d6ULL, 0x7a007a7a7a007a7aULL, + 0xbb00bbbbbb00bbbbULL, 0xe300e3e3e300e3e3ULL, 0x4000404040004040ULL, + 0x4f004f4f4f004f4fULL, }; const u64 camellia_sp44044404[256] = { - 0x7070007070700070, 0x2c2c002c2c2c002c, 0xb3b300b3b3b300b3, - 0xc0c000c0c0c000c0, 0xe4e400e4e4e400e4, 0x5757005757570057, - 0xeaea00eaeaea00ea, 0xaeae00aeaeae00ae, 0x2323002323230023, - 0x6b6b006b6b6b006b, 0x4545004545450045, 0xa5a500a5a5a500a5, - 0xeded00ededed00ed, 0x4f4f004f4f4f004f, 0x1d1d001d1d1d001d, - 0x9292009292920092, 0x8686008686860086, 0xafaf00afafaf00af, - 0x7c7c007c7c7c007c, 0x1f1f001f1f1f001f, 0x3e3e003e3e3e003e, - 0xdcdc00dcdcdc00dc, 0x5e5e005e5e5e005e, 0x0b0b000b0b0b000b, - 0xa6a600a6a6a600a6, 0x3939003939390039, 0xd5d500d5d5d500d5, - 0x5d5d005d5d5d005d, 0xd9d900d9d9d900d9, 0x5a5a005a5a5a005a, - 0x5151005151510051, 0x6c6c006c6c6c006c, 0x8b8b008b8b8b008b, - 0x9a9a009a9a9a009a, 0xfbfb00fbfbfb00fb, 0xb0b000b0b0b000b0, - 0x7474007474740074, 0x2b2b002b2b2b002b, 0xf0f000f0f0f000f0, - 0x8484008484840084, 0xdfdf00dfdfdf00df, 0xcbcb00cbcbcb00cb, - 0x3434003434340034, 0x7676007676760076, 0x6d6d006d6d6d006d, - 0xa9a900a9a9a900a9, 0xd1d100d1d1d100d1, 0x0404000404040004, - 0x1414001414140014, 0x3a3a003a3a3a003a, 0xdede00dedede00de, - 0x1111001111110011, 0x3232003232320032, 0x9c9c009c9c9c009c, - 0x5353005353530053, 0xf2f200f2f2f200f2, 0xfefe00fefefe00fe, - 0xcfcf00cfcfcf00cf, 0xc3c300c3c3c300c3, 0x7a7a007a7a7a007a, - 0x2424002424240024, 0xe8e800e8e8e800e8, 0x6060006060600060, - 0x6969006969690069, 0xaaaa00aaaaaa00aa, 0xa0a000a0a0a000a0, - 0xa1a100a1a1a100a1, 0x6262006262620062, 0x5454005454540054, - 0x1e1e001e1e1e001e, 0xe0e000e0e0e000e0, 0x6464006464640064, - 0x1010001010100010, 0x0000000000000000, 0xa3a300a3a3a300a3, - 0x7575007575750075, 0x8a8a008a8a8a008a, 0xe6e600e6e6e600e6, - 0x0909000909090009, 0xdddd00dddddd00dd, 0x8787008787870087, - 0x8383008383830083, 0xcdcd00cdcdcd00cd, 0x9090009090900090, - 0x7373007373730073, 0xf6f600f6f6f600f6, 0x9d9d009d9d9d009d, - 0xbfbf00bfbfbf00bf, 0x5252005252520052, 0xd8d800d8d8d800d8, - 0xc8c800c8c8c800c8, 0xc6c600c6c6c600c6, 0x8181008181810081, - 0x6f6f006f6f6f006f, 0x1313001313130013, 0x6363006363630063, - 0xe9e900e9e9e900e9, 0xa7a700a7a7a700a7, 0x9f9f009f9f9f009f, - 0xbcbc00bcbcbc00bc, 0x2929002929290029, 0xf9f900f9f9f900f9, - 0x2f2f002f2f2f002f, 0xb4b400b4b4b400b4, 0x7878007878780078, - 0x0606000606060006, 0xe7e700e7e7e700e7, 0x7171007171710071, - 0xd4d400d4d4d400d4, 0xabab00ababab00ab, 0x8888008888880088, - 0x8d8d008d8d8d008d, 0x7272007272720072, 0xb9b900b9b9b900b9, - 0xf8f800f8f8f800f8, 0xacac00acacac00ac, 0x3636003636360036, - 0x2a2a002a2a2a002a, 0x3c3c003c3c3c003c, 0xf1f100f1f1f100f1, - 0x4040004040400040, 0xd3d300d3d3d300d3, 0xbbbb00bbbbbb00bb, - 0x4343004343430043, 0x1515001515150015, 0xadad00adadad00ad, - 0x7777007777770077, 0x8080008080800080, 0x8282008282820082, - 0xecec00ececec00ec, 0x2727002727270027, 0xe5e500e5e5e500e5, - 0x8585008585850085, 0x3535003535350035, 0x0c0c000c0c0c000c, - 0x4141004141410041, 0xefef00efefef00ef, 0x9393009393930093, - 0x1919001919190019, 0x2121002121210021, 0x0e0e000e0e0e000e, - 0x4e4e004e4e4e004e, 0x6565006565650065, 0xbdbd00bdbdbd00bd, - 0xb8b800b8b8b800b8, 0x8f8f008f8f8f008f, 0xebeb00ebebeb00eb, - 0xcece00cecece00ce, 0x3030003030300030, 0x5f5f005f5f5f005f, - 0xc5c500c5c5c500c5, 0x1a1a001a1a1a001a, 0xe1e100e1e1e100e1, - 0xcaca00cacaca00ca, 0x4747004747470047, 0x3d3d003d3d3d003d, - 0x0101000101010001, 0xd6d600d6d6d600d6, 0x5656005656560056, - 0x4d4d004d4d4d004d, 0x0d0d000d0d0d000d, 0x6666006666660066, - 0xcccc00cccccc00cc, 0x2d2d002d2d2d002d, 0x1212001212120012, - 0x2020002020200020, 0xb1b100b1b1b100b1, 0x9999009999990099, - 0x4c4c004c4c4c004c, 0xc2c200c2c2c200c2, 0x7e7e007e7e7e007e, - 0x0505000505050005, 0xb7b700b7b7b700b7, 0x3131003131310031, - 0x1717001717170017, 0xd7d700d7d7d700d7, 0x5858005858580058, - 0x6161006161610061, 0x1b1b001b1b1b001b, 0x1c1c001c1c1c001c, - 0x0f0f000f0f0f000f, 0x1616001616160016, 0x1818001818180018, - 0x2222002222220022, 0x4444004444440044, 0xb2b200b2b2b200b2, - 0xb5b500b5b5b500b5, 0x9191009191910091, 0x0808000808080008, - 0xa8a800a8a8a800a8, 0xfcfc00fcfcfc00fc, 0x5050005050500050, - 0xd0d000d0d0d000d0, 0x7d7d007d7d7d007d, 0x8989008989890089, - 0x9797009797970097, 0x5b5b005b5b5b005b, 0x9595009595950095, - 0xffff00ffffff00ff, 0xd2d200d2d2d200d2, 0xc4c400c4c4c400c4, - 0x4848004848480048, 0xf7f700f7f7f700f7, 0xdbdb00dbdbdb00db, - 0x0303000303030003, 0xdada00dadada00da, 0x3f3f003f3f3f003f, - 0x9494009494940094, 0x5c5c005c5c5c005c, 0x0202000202020002, - 0x4a4a004a4a4a004a, 0x3333003333330033, 0x6767006767670067, - 0xf3f300f3f3f300f3, 0x7f7f007f7f7f007f, 0xe2e200e2e2e200e2, - 0x9b9b009b9b9b009b, 0x2626002626260026, 0x3737003737370037, - 0x3b3b003b3b3b003b, 0x9696009696960096, 0x4b4b004b4b4b004b, - 0xbebe00bebebe00be, 0x2e2e002e2e2e002e, 0x7979007979790079, - 0x8c8c008c8c8c008c, 0x6e6e006e6e6e006e, 0x8e8e008e8e8e008e, - 0xf5f500f5f5f500f5, 0xb6b600b6b6b600b6, 0xfdfd00fdfdfd00fd, - 0x5959005959590059, 0x9898009898980098, 0x6a6a006a6a6a006a, - 0x4646004646460046, 0xbaba00bababa00ba, 0x2525002525250025, - 0x4242004242420042, 0xa2a200a2a2a200a2, 0xfafa00fafafa00fa, - 0x0707000707070007, 0x5555005555550055, 0xeeee00eeeeee00ee, - 0x0a0a000a0a0a000a, 0x4949004949490049, 0x6868006868680068, - 0x3838003838380038, 0xa4a400a4a4a400a4, 0x2828002828280028, - 0x7b7b007b7b7b007b, 0xc9c900c9c9c900c9, 0xc1c100c1c1c100c1, - 0xe3e300e3e3e300e3, 0xf4f400f4f4f400f4, 0xc7c700c7c7c700c7, - 0x9e9e009e9e9e009e, + 0x7070007070700070ULL, 0x2c2c002c2c2c002cULL, 0xb3b300b3b3b300b3ULL, + 0xc0c000c0c0c000c0ULL, 0xe4e400e4e4e400e4ULL, 0x5757005757570057ULL, + 0xeaea00eaeaea00eaULL, 0xaeae00aeaeae00aeULL, 0x2323002323230023ULL, + 0x6b6b006b6b6b006bULL, 0x4545004545450045ULL, 0xa5a500a5a5a500a5ULL, + 0xeded00ededed00edULL, 0x4f4f004f4f4f004fULL, 0x1d1d001d1d1d001dULL, + 0x9292009292920092ULL, 0x8686008686860086ULL, 0xafaf00afafaf00afULL, + 0x7c7c007c7c7c007cULL, 0x1f1f001f1f1f001fULL, 0x3e3e003e3e3e003eULL, + 0xdcdc00dcdcdc00dcULL, 0x5e5e005e5e5e005eULL, 0x0b0b000b0b0b000bULL, + 0xa6a600a6a6a600a6ULL, 0x3939003939390039ULL, 0xd5d500d5d5d500d5ULL, + 0x5d5d005d5d5d005dULL, 0xd9d900d9d9d900d9ULL, 0x5a5a005a5a5a005aULL, + 0x5151005151510051ULL, 0x6c6c006c6c6c006cULL, 0x8b8b008b8b8b008bULL, + 0x9a9a009a9a9a009aULL, 0xfbfb00fbfbfb00fbULL, 0xb0b000b0b0b000b0ULL, + 0x7474007474740074ULL, 0x2b2b002b2b2b002bULL, 0xf0f000f0f0f000f0ULL, + 0x8484008484840084ULL, 0xdfdf00dfdfdf00dfULL, 0xcbcb00cbcbcb00cbULL, + 0x3434003434340034ULL, 0x7676007676760076ULL, 0x6d6d006d6d6d006dULL, + 0xa9a900a9a9a900a9ULL, 0xd1d100d1d1d100d1ULL, 0x0404000404040004ULL, + 0x1414001414140014ULL, 0x3a3a003a3a3a003aULL, 0xdede00dedede00deULL, + 0x1111001111110011ULL, 0x3232003232320032ULL, 0x9c9c009c9c9c009cULL, + 0x5353005353530053ULL, 0xf2f200f2f2f200f2ULL, 0xfefe00fefefe00feULL, + 0xcfcf00cfcfcf00cfULL, 0xc3c300c3c3c300c3ULL, 0x7a7a007a7a7a007aULL, + 0x2424002424240024ULL, 0xe8e800e8e8e800e8ULL, 0x6060006060600060ULL, + 0x6969006969690069ULL, 0xaaaa00aaaaaa00aaULL, 0xa0a000a0a0a000a0ULL, + 0xa1a100a1a1a100a1ULL, 0x6262006262620062ULL, 0x5454005454540054ULL, + 0x1e1e001e1e1e001eULL, 0xe0e000e0e0e000e0ULL, 0x6464006464640064ULL, + 0x1010001010100010ULL, 0x0000000000000000ULL, 0xa3a300a3a3a300a3ULL, + 0x7575007575750075ULL, 0x8a8a008a8a8a008aULL, 0xe6e600e6e6e600e6ULL, + 0x0909000909090009ULL, 0xdddd00dddddd00ddULL, 0x8787008787870087ULL, + 0x8383008383830083ULL, 0xcdcd00cdcdcd00cdULL, 0x9090009090900090ULL, + 0x7373007373730073ULL, 0xf6f600f6f6f600f6ULL, 0x9d9d009d9d9d009dULL, + 0xbfbf00bfbfbf00bfULL, 0x5252005252520052ULL, 0xd8d800d8d8d800d8ULL, + 0xc8c800c8c8c800c8ULL, 0xc6c600c6c6c600c6ULL, 0x8181008181810081ULL, + 0x6f6f006f6f6f006fULL, 0x1313001313130013ULL, 0x6363006363630063ULL, + 0xe9e900e9e9e900e9ULL, 0xa7a700a7a7a700a7ULL, 0x9f9f009f9f9f009fULL, + 0xbcbc00bcbcbc00bcULL, 0x2929002929290029ULL, 0xf9f900f9f9f900f9ULL, + 0x2f2f002f2f2f002fULL, 0xb4b400b4b4b400b4ULL, 0x7878007878780078ULL, + 0x0606000606060006ULL, 0xe7e700e7e7e700e7ULL, 0x7171007171710071ULL, + 0xd4d400d4d4d400d4ULL, 0xabab00ababab00abULL, 0x8888008888880088ULL, + 0x8d8d008d8d8d008dULL, 0x7272007272720072ULL, 0xb9b900b9b9b900b9ULL, + 0xf8f800f8f8f800f8ULL, 0xacac00acacac00acULL, 0x3636003636360036ULL, + 0x2a2a002a2a2a002aULL, 0x3c3c003c3c3c003cULL, 0xf1f100f1f1f100f1ULL, + 0x4040004040400040ULL, 0xd3d300d3d3d300d3ULL, 0xbbbb00bbbbbb00bbULL, + 0x4343004343430043ULL, 0x1515001515150015ULL, 0xadad00adadad00adULL, + 0x7777007777770077ULL, 0x8080008080800080ULL, 0x8282008282820082ULL, + 0xecec00ececec00ecULL, 0x2727002727270027ULL, 0xe5e500e5e5e500e5ULL, + 0x8585008585850085ULL, 0x3535003535350035ULL, 0x0c0c000c0c0c000cULL, + 0x4141004141410041ULL, 0xefef00efefef00efULL, 0x9393009393930093ULL, + 0x1919001919190019ULL, 0x2121002121210021ULL, 0x0e0e000e0e0e000eULL, + 0x4e4e004e4e4e004eULL, 0x6565006565650065ULL, 0xbdbd00bdbdbd00bdULL, + 0xb8b800b8b8b800b8ULL, 0x8f8f008f8f8f008fULL, 0xebeb00ebebeb00ebULL, + 0xcece00cecece00ceULL, 0x3030003030300030ULL, 0x5f5f005f5f5f005fULL, + 0xc5c500c5c5c500c5ULL, 0x1a1a001a1a1a001aULL, 0xe1e100e1e1e100e1ULL, + 0xcaca00cacaca00caULL, 0x4747004747470047ULL, 0x3d3d003d3d3d003dULL, + 0x0101000101010001ULL, 0xd6d600d6d6d600d6ULL, 0x5656005656560056ULL, + 0x4d4d004d4d4d004dULL, 0x0d0d000d0d0d000dULL, 0x6666006666660066ULL, + 0xcccc00cccccc00ccULL, 0x2d2d002d2d2d002dULL, 0x1212001212120012ULL, + 0x2020002020200020ULL, 0xb1b100b1b1b100b1ULL, 0x9999009999990099ULL, + 0x4c4c004c4c4c004cULL, 0xc2c200c2c2c200c2ULL, 0x7e7e007e7e7e007eULL, + 0x0505000505050005ULL, 0xb7b700b7b7b700b7ULL, 0x3131003131310031ULL, + 0x1717001717170017ULL, 0xd7d700d7d7d700d7ULL, 0x5858005858580058ULL, + 0x6161006161610061ULL, 0x1b1b001b1b1b001bULL, 0x1c1c001c1c1c001cULL, + 0x0f0f000f0f0f000fULL, 0x1616001616160016ULL, 0x1818001818180018ULL, + 0x2222002222220022ULL, 0x4444004444440044ULL, 0xb2b200b2b2b200b2ULL, + 0xb5b500b5b5b500b5ULL, 0x9191009191910091ULL, 0x0808000808080008ULL, + 0xa8a800a8a8a800a8ULL, 0xfcfc00fcfcfc00fcULL, 0x5050005050500050ULL, + 0xd0d000d0d0d000d0ULL, 0x7d7d007d7d7d007dULL, 0x8989008989890089ULL, + 0x9797009797970097ULL, 0x5b5b005b5b5b005bULL, 0x9595009595950095ULL, + 0xffff00ffffff00ffULL, 0xd2d200d2d2d200d2ULL, 0xc4c400c4c4c400c4ULL, + 0x4848004848480048ULL, 0xf7f700f7f7f700f7ULL, 0xdbdb00dbdbdb00dbULL, + 0x0303000303030003ULL, 0xdada00dadada00daULL, 0x3f3f003f3f3f003fULL, + 0x9494009494940094ULL, 0x5c5c005c5c5c005cULL, 0x0202000202020002ULL, + 0x4a4a004a4a4a004aULL, 0x3333003333330033ULL, 0x6767006767670067ULL, + 0xf3f300f3f3f300f3ULL, 0x7f7f007f7f7f007fULL, 0xe2e200e2e2e200e2ULL, + 0x9b9b009b9b9b009bULL, 0x2626002626260026ULL, 0x3737003737370037ULL, + 0x3b3b003b3b3b003bULL, 0x9696009696960096ULL, 0x4b4b004b4b4b004bULL, + 0xbebe00bebebe00beULL, 0x2e2e002e2e2e002eULL, 0x7979007979790079ULL, + 0x8c8c008c8c8c008cULL, 0x6e6e006e6e6e006eULL, 0x8e8e008e8e8e008eULL, + 0xf5f500f5f5f500f5ULL, 0xb6b600b6b6b600b6ULL, 0xfdfd00fdfdfd00fdULL, + 0x5959005959590059ULL, 0x9898009898980098ULL, 0x6a6a006a6a6a006aULL, + 0x4646004646460046ULL, 0xbaba00bababa00baULL, 0x2525002525250025ULL, + 0x4242004242420042ULL, 0xa2a200a2a2a200a2ULL, 0xfafa00fafafa00faULL, + 0x0707000707070007ULL, 0x5555005555550055ULL, 0xeeee00eeeeee00eeULL, + 0x0a0a000a0a0a000aULL, 0x4949004949490049ULL, 0x6868006868680068ULL, + 0x3838003838380038ULL, 0xa4a400a4a4a400a4ULL, 0x2828002828280028ULL, + 0x7b7b007b7b7b007bULL, 0xc9c900c9c9c900c9ULL, 0xc1c100c1c1c100c1ULL, + 0xe3e300e3e3e300e3ULL, 0xf4f400f4f4f400f4ULL, 0xc7c700c7c7c700c7ULL, + 0x9e9e009e9e9e009eULL, }; const u64 camellia_sp11101110[256] = { - 0x7070700070707000, 0x8282820082828200, 0x2c2c2c002c2c2c00, - 0xececec00ececec00, 0xb3b3b300b3b3b300, 0x2727270027272700, - 0xc0c0c000c0c0c000, 0xe5e5e500e5e5e500, 0xe4e4e400e4e4e400, - 0x8585850085858500, 0x5757570057575700, 0x3535350035353500, - 0xeaeaea00eaeaea00, 0x0c0c0c000c0c0c00, 0xaeaeae00aeaeae00, - 0x4141410041414100, 0x2323230023232300, 0xefefef00efefef00, - 0x6b6b6b006b6b6b00, 0x9393930093939300, 0x4545450045454500, - 0x1919190019191900, 0xa5a5a500a5a5a500, 0x2121210021212100, - 0xededed00ededed00, 0x0e0e0e000e0e0e00, 0x4f4f4f004f4f4f00, - 0x4e4e4e004e4e4e00, 0x1d1d1d001d1d1d00, 0x6565650065656500, - 0x9292920092929200, 0xbdbdbd00bdbdbd00, 0x8686860086868600, - 0xb8b8b800b8b8b800, 0xafafaf00afafaf00, 0x8f8f8f008f8f8f00, - 0x7c7c7c007c7c7c00, 0xebebeb00ebebeb00, 0x1f1f1f001f1f1f00, - 0xcecece00cecece00, 0x3e3e3e003e3e3e00, 0x3030300030303000, - 0xdcdcdc00dcdcdc00, 0x5f5f5f005f5f5f00, 0x5e5e5e005e5e5e00, - 0xc5c5c500c5c5c500, 0x0b0b0b000b0b0b00, 0x1a1a1a001a1a1a00, - 0xa6a6a600a6a6a600, 0xe1e1e100e1e1e100, 0x3939390039393900, - 0xcacaca00cacaca00, 0xd5d5d500d5d5d500, 0x4747470047474700, - 0x5d5d5d005d5d5d00, 0x3d3d3d003d3d3d00, 0xd9d9d900d9d9d900, - 0x0101010001010100, 0x5a5a5a005a5a5a00, 0xd6d6d600d6d6d600, - 0x5151510051515100, 0x5656560056565600, 0x6c6c6c006c6c6c00, - 0x4d4d4d004d4d4d00, 0x8b8b8b008b8b8b00, 0x0d0d0d000d0d0d00, - 0x9a9a9a009a9a9a00, 0x6666660066666600, 0xfbfbfb00fbfbfb00, - 0xcccccc00cccccc00, 0xb0b0b000b0b0b000, 0x2d2d2d002d2d2d00, - 0x7474740074747400, 0x1212120012121200, 0x2b2b2b002b2b2b00, - 0x2020200020202000, 0xf0f0f000f0f0f000, 0xb1b1b100b1b1b100, - 0x8484840084848400, 0x9999990099999900, 0xdfdfdf00dfdfdf00, - 0x4c4c4c004c4c4c00, 0xcbcbcb00cbcbcb00, 0xc2c2c200c2c2c200, - 0x3434340034343400, 0x7e7e7e007e7e7e00, 0x7676760076767600, - 0x0505050005050500, 0x6d6d6d006d6d6d00, 0xb7b7b700b7b7b700, - 0xa9a9a900a9a9a900, 0x3131310031313100, 0xd1d1d100d1d1d100, - 0x1717170017171700, 0x0404040004040400, 0xd7d7d700d7d7d700, - 0x1414140014141400, 0x5858580058585800, 0x3a3a3a003a3a3a00, - 0x6161610061616100, 0xdedede00dedede00, 0x1b1b1b001b1b1b00, - 0x1111110011111100, 0x1c1c1c001c1c1c00, 0x3232320032323200, - 0x0f0f0f000f0f0f00, 0x9c9c9c009c9c9c00, 0x1616160016161600, - 0x5353530053535300, 0x1818180018181800, 0xf2f2f200f2f2f200, - 0x2222220022222200, 0xfefefe00fefefe00, 0x4444440044444400, - 0xcfcfcf00cfcfcf00, 0xb2b2b200b2b2b200, 0xc3c3c300c3c3c300, - 0xb5b5b500b5b5b500, 0x7a7a7a007a7a7a00, 0x9191910091919100, - 0x2424240024242400, 0x0808080008080800, 0xe8e8e800e8e8e800, - 0xa8a8a800a8a8a800, 0x6060600060606000, 0xfcfcfc00fcfcfc00, - 0x6969690069696900, 0x5050500050505000, 0xaaaaaa00aaaaaa00, - 0xd0d0d000d0d0d000, 0xa0a0a000a0a0a000, 0x7d7d7d007d7d7d00, - 0xa1a1a100a1a1a100, 0x8989890089898900, 0x6262620062626200, - 0x9797970097979700, 0x5454540054545400, 0x5b5b5b005b5b5b00, - 0x1e1e1e001e1e1e00, 0x9595950095959500, 0xe0e0e000e0e0e000, - 0xffffff00ffffff00, 0x6464640064646400, 0xd2d2d200d2d2d200, - 0x1010100010101000, 0xc4c4c400c4c4c400, 0x0000000000000000, - 0x4848480048484800, 0xa3a3a300a3a3a300, 0xf7f7f700f7f7f700, - 0x7575750075757500, 0xdbdbdb00dbdbdb00, 0x8a8a8a008a8a8a00, - 0x0303030003030300, 0xe6e6e600e6e6e600, 0xdadada00dadada00, - 0x0909090009090900, 0x3f3f3f003f3f3f00, 0xdddddd00dddddd00, - 0x9494940094949400, 0x8787870087878700, 0x5c5c5c005c5c5c00, - 0x8383830083838300, 0x0202020002020200, 0xcdcdcd00cdcdcd00, - 0x4a4a4a004a4a4a00, 0x9090900090909000, 0x3333330033333300, - 0x7373730073737300, 0x6767670067676700, 0xf6f6f600f6f6f600, - 0xf3f3f300f3f3f300, 0x9d9d9d009d9d9d00, 0x7f7f7f007f7f7f00, - 0xbfbfbf00bfbfbf00, 0xe2e2e200e2e2e200, 0x5252520052525200, - 0x9b9b9b009b9b9b00, 0xd8d8d800d8d8d800, 0x2626260026262600, - 0xc8c8c800c8c8c800, 0x3737370037373700, 0xc6c6c600c6c6c600, - 0x3b3b3b003b3b3b00, 0x8181810081818100, 0x9696960096969600, - 0x6f6f6f006f6f6f00, 0x4b4b4b004b4b4b00, 0x1313130013131300, - 0xbebebe00bebebe00, 0x6363630063636300, 0x2e2e2e002e2e2e00, - 0xe9e9e900e9e9e900, 0x7979790079797900, 0xa7a7a700a7a7a700, - 0x8c8c8c008c8c8c00, 0x9f9f9f009f9f9f00, 0x6e6e6e006e6e6e00, - 0xbcbcbc00bcbcbc00, 0x8e8e8e008e8e8e00, 0x2929290029292900, - 0xf5f5f500f5f5f500, 0xf9f9f900f9f9f900, 0xb6b6b600b6b6b600, - 0x2f2f2f002f2f2f00, 0xfdfdfd00fdfdfd00, 0xb4b4b400b4b4b400, - 0x5959590059595900, 0x7878780078787800, 0x9898980098989800, - 0x0606060006060600, 0x6a6a6a006a6a6a00, 0xe7e7e700e7e7e700, - 0x4646460046464600, 0x7171710071717100, 0xbababa00bababa00, - 0xd4d4d400d4d4d400, 0x2525250025252500, 0xababab00ababab00, - 0x4242420042424200, 0x8888880088888800, 0xa2a2a200a2a2a200, - 0x8d8d8d008d8d8d00, 0xfafafa00fafafa00, 0x7272720072727200, - 0x0707070007070700, 0xb9b9b900b9b9b900, 0x5555550055555500, - 0xf8f8f800f8f8f800, 0xeeeeee00eeeeee00, 0xacacac00acacac00, - 0x0a0a0a000a0a0a00, 0x3636360036363600, 0x4949490049494900, - 0x2a2a2a002a2a2a00, 0x6868680068686800, 0x3c3c3c003c3c3c00, - 0x3838380038383800, 0xf1f1f100f1f1f100, 0xa4a4a400a4a4a400, - 0x4040400040404000, 0x2828280028282800, 0xd3d3d300d3d3d300, - 0x7b7b7b007b7b7b00, 0xbbbbbb00bbbbbb00, 0xc9c9c900c9c9c900, - 0x4343430043434300, 0xc1c1c100c1c1c100, 0x1515150015151500, - 0xe3e3e300e3e3e300, 0xadadad00adadad00, 0xf4f4f400f4f4f400, - 0x7777770077777700, 0xc7c7c700c7c7c700, 0x8080800080808000, - 0x9e9e9e009e9e9e00, + 0x7070700070707000ULL, 0x8282820082828200ULL, 0x2c2c2c002c2c2c00ULL, + 0xececec00ececec00ULL, 0xb3b3b300b3b3b300ULL, 0x2727270027272700ULL, + 0xc0c0c000c0c0c000ULL, 0xe5e5e500e5e5e500ULL, 0xe4e4e400e4e4e400ULL, + 0x8585850085858500ULL, 0x5757570057575700ULL, 0x3535350035353500ULL, + 0xeaeaea00eaeaea00ULL, 0x0c0c0c000c0c0c00ULL, 0xaeaeae00aeaeae00ULL, + 0x4141410041414100ULL, 0x2323230023232300ULL, 0xefefef00efefef00ULL, + 0x6b6b6b006b6b6b00ULL, 0x9393930093939300ULL, 0x4545450045454500ULL, + 0x1919190019191900ULL, 0xa5a5a500a5a5a500ULL, 0x2121210021212100ULL, + 0xededed00ededed00ULL, 0x0e0e0e000e0e0e00ULL, 0x4f4f4f004f4f4f00ULL, + 0x4e4e4e004e4e4e00ULL, 0x1d1d1d001d1d1d00ULL, 0x6565650065656500ULL, + 0x9292920092929200ULL, 0xbdbdbd00bdbdbd00ULL, 0x8686860086868600ULL, + 0xb8b8b800b8b8b800ULL, 0xafafaf00afafaf00ULL, 0x8f8f8f008f8f8f00ULL, + 0x7c7c7c007c7c7c00ULL, 0xebebeb00ebebeb00ULL, 0x1f1f1f001f1f1f00ULL, + 0xcecece00cecece00ULL, 0x3e3e3e003e3e3e00ULL, 0x3030300030303000ULL, + 0xdcdcdc00dcdcdc00ULL, 0x5f5f5f005f5f5f00ULL, 0x5e5e5e005e5e5e00ULL, + 0xc5c5c500c5c5c500ULL, 0x0b0b0b000b0b0b00ULL, 0x1a1a1a001a1a1a00ULL, + 0xa6a6a600a6a6a600ULL, 0xe1e1e100e1e1e100ULL, 0x3939390039393900ULL, + 0xcacaca00cacaca00ULL, 0xd5d5d500d5d5d500ULL, 0x4747470047474700ULL, + 0x5d5d5d005d5d5d00ULL, 0x3d3d3d003d3d3d00ULL, 0xd9d9d900d9d9d900ULL, + 0x0101010001010100ULL, 0x5a5a5a005a5a5a00ULL, 0xd6d6d600d6d6d600ULL, + 0x5151510051515100ULL, 0x5656560056565600ULL, 0x6c6c6c006c6c6c00ULL, + 0x4d4d4d004d4d4d00ULL, 0x8b8b8b008b8b8b00ULL, 0x0d0d0d000d0d0d00ULL, + 0x9a9a9a009a9a9a00ULL, 0x6666660066666600ULL, 0xfbfbfb00fbfbfb00ULL, + 0xcccccc00cccccc00ULL, 0xb0b0b000b0b0b000ULL, 0x2d2d2d002d2d2d00ULL, + 0x7474740074747400ULL, 0x1212120012121200ULL, 0x2b2b2b002b2b2b00ULL, + 0x2020200020202000ULL, 0xf0f0f000f0f0f000ULL, 0xb1b1b100b1b1b100ULL, + 0x8484840084848400ULL, 0x9999990099999900ULL, 0xdfdfdf00dfdfdf00ULL, + 0x4c4c4c004c4c4c00ULL, 0xcbcbcb00cbcbcb00ULL, 0xc2c2c200c2c2c200ULL, + 0x3434340034343400ULL, 0x7e7e7e007e7e7e00ULL, 0x7676760076767600ULL, + 0x0505050005050500ULL, 0x6d6d6d006d6d6d00ULL, 0xb7b7b700b7b7b700ULL, + 0xa9a9a900a9a9a900ULL, 0x3131310031313100ULL, 0xd1d1d100d1d1d100ULL, + 0x1717170017171700ULL, 0x0404040004040400ULL, 0xd7d7d700d7d7d700ULL, + 0x1414140014141400ULL, 0x5858580058585800ULL, 0x3a3a3a003a3a3a00ULL, + 0x6161610061616100ULL, 0xdedede00dedede00ULL, 0x1b1b1b001b1b1b00ULL, + 0x1111110011111100ULL, 0x1c1c1c001c1c1c00ULL, 0x3232320032323200ULL, + 0x0f0f0f000f0f0f00ULL, 0x9c9c9c009c9c9c00ULL, 0x1616160016161600ULL, + 0x5353530053535300ULL, 0x1818180018181800ULL, 0xf2f2f200f2f2f200ULL, + 0x2222220022222200ULL, 0xfefefe00fefefe00ULL, 0x4444440044444400ULL, + 0xcfcfcf00cfcfcf00ULL, 0xb2b2b200b2b2b200ULL, 0xc3c3c300c3c3c300ULL, + 0xb5b5b500b5b5b500ULL, 0x7a7a7a007a7a7a00ULL, 0x9191910091919100ULL, + 0x2424240024242400ULL, 0x0808080008080800ULL, 0xe8e8e800e8e8e800ULL, + 0xa8a8a800a8a8a800ULL, 0x6060600060606000ULL, 0xfcfcfc00fcfcfc00ULL, + 0x6969690069696900ULL, 0x5050500050505000ULL, 0xaaaaaa00aaaaaa00ULL, + 0xd0d0d000d0d0d000ULL, 0xa0a0a000a0a0a000ULL, 0x7d7d7d007d7d7d00ULL, + 0xa1a1a100a1a1a100ULL, 0x8989890089898900ULL, 0x6262620062626200ULL, + 0x9797970097979700ULL, 0x5454540054545400ULL, 0x5b5b5b005b5b5b00ULL, + 0x1e1e1e001e1e1e00ULL, 0x9595950095959500ULL, 0xe0e0e000e0e0e000ULL, + 0xffffff00ffffff00ULL, 0x6464640064646400ULL, 0xd2d2d200d2d2d200ULL, + 0x1010100010101000ULL, 0xc4c4c400c4c4c400ULL, 0x0000000000000000ULL, + 0x4848480048484800ULL, 0xa3a3a300a3a3a300ULL, 0xf7f7f700f7f7f700ULL, + 0x7575750075757500ULL, 0xdbdbdb00dbdbdb00ULL, 0x8a8a8a008a8a8a00ULL, + 0x0303030003030300ULL, 0xe6e6e600e6e6e600ULL, 0xdadada00dadada00ULL, + 0x0909090009090900ULL, 0x3f3f3f003f3f3f00ULL, 0xdddddd00dddddd00ULL, + 0x9494940094949400ULL, 0x8787870087878700ULL, 0x5c5c5c005c5c5c00ULL, + 0x8383830083838300ULL, 0x0202020002020200ULL, 0xcdcdcd00cdcdcd00ULL, + 0x4a4a4a004a4a4a00ULL, 0x9090900090909000ULL, 0x3333330033333300ULL, + 0x7373730073737300ULL, 0x6767670067676700ULL, 0xf6f6f600f6f6f600ULL, + 0xf3f3f300f3f3f300ULL, 0x9d9d9d009d9d9d00ULL, 0x7f7f7f007f7f7f00ULL, + 0xbfbfbf00bfbfbf00ULL, 0xe2e2e200e2e2e200ULL, 0x5252520052525200ULL, + 0x9b9b9b009b9b9b00ULL, 0xd8d8d800d8d8d800ULL, 0x2626260026262600ULL, + 0xc8c8c800c8c8c800ULL, 0x3737370037373700ULL, 0xc6c6c600c6c6c600ULL, + 0x3b3b3b003b3b3b00ULL, 0x8181810081818100ULL, 0x9696960096969600ULL, + 0x6f6f6f006f6f6f00ULL, 0x4b4b4b004b4b4b00ULL, 0x1313130013131300ULL, + 0xbebebe00bebebe00ULL, 0x6363630063636300ULL, 0x2e2e2e002e2e2e00ULL, + 0xe9e9e900e9e9e900ULL, 0x7979790079797900ULL, 0xa7a7a700a7a7a700ULL, + 0x8c8c8c008c8c8c00ULL, 0x9f9f9f009f9f9f00ULL, 0x6e6e6e006e6e6e00ULL, + 0xbcbcbc00bcbcbc00ULL, 0x8e8e8e008e8e8e00ULL, 0x2929290029292900ULL, + 0xf5f5f500f5f5f500ULL, 0xf9f9f900f9f9f900ULL, 0xb6b6b600b6b6b600ULL, + 0x2f2f2f002f2f2f00ULL, 0xfdfdfd00fdfdfd00ULL, 0xb4b4b400b4b4b400ULL, + 0x5959590059595900ULL, 0x7878780078787800ULL, 0x9898980098989800ULL, + 0x0606060006060600ULL, 0x6a6a6a006a6a6a00ULL, 0xe7e7e700e7e7e700ULL, + 0x4646460046464600ULL, 0x7171710071717100ULL, 0xbababa00bababa00ULL, + 0xd4d4d400d4d4d400ULL, 0x2525250025252500ULL, 0xababab00ababab00ULL, + 0x4242420042424200ULL, 0x8888880088888800ULL, 0xa2a2a200a2a2a200ULL, + 0x8d8d8d008d8d8d00ULL, 0xfafafa00fafafa00ULL, 0x7272720072727200ULL, + 0x0707070007070700ULL, 0xb9b9b900b9b9b900ULL, 0x5555550055555500ULL, + 0xf8f8f800f8f8f800ULL, 0xeeeeee00eeeeee00ULL, 0xacacac00acacac00ULL, + 0x0a0a0a000a0a0a00ULL, 0x3636360036363600ULL, 0x4949490049494900ULL, + 0x2a2a2a002a2a2a00ULL, 0x6868680068686800ULL, 0x3c3c3c003c3c3c00ULL, + 0x3838380038383800ULL, 0xf1f1f100f1f1f100ULL, 0xa4a4a400a4a4a400ULL, + 0x4040400040404000ULL, 0x2828280028282800ULL, 0xd3d3d300d3d3d300ULL, + 0x7b7b7b007b7b7b00ULL, 0xbbbbbb00bbbbbb00ULL, 0xc9c9c900c9c9c900ULL, + 0x4343430043434300ULL, 0xc1c1c100c1c1c100ULL, 0x1515150015151500ULL, + 0xe3e3e300e3e3e300ULL, 0xadadad00adadad00ULL, 0xf4f4f400f4f4f400ULL, + 0x7777770077777700ULL, 0xc7c7c700c7c7c700ULL, 0x8080800080808000ULL, + 0x9e9e9e009e9e9e00ULL, }; /* key constants */ @@ -1601,7 +1601,6 @@ static struct crypto_alg camellia_algs[6] = { { .cra_ctxsize = sizeof(struct camellia_ctx), .cra_alignmask = 0, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(camellia_algs[0].cra_list), .cra_u = { .cipher = { .cia_min_keysize = CAMELLIA_MIN_KEY_SIZE, @@ -1621,7 +1620,6 @@ static struct crypto_alg camellia_algs[6] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(camellia_algs[1].cra_list), .cra_u = { .blkcipher = { .min_keysize = CAMELLIA_MIN_KEY_SIZE, @@ -1641,7 +1639,6 @@ static struct crypto_alg camellia_algs[6] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(camellia_algs[2].cra_list), .cra_u = { .blkcipher = { .min_keysize = CAMELLIA_MIN_KEY_SIZE, @@ -1662,7 +1659,6 @@ static struct crypto_alg camellia_algs[6] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(camellia_algs[3].cra_list), .cra_u = { .blkcipher = { .min_keysize = CAMELLIA_MIN_KEY_SIZE, @@ -1683,7 +1679,6 @@ static struct crypto_alg camellia_algs[6] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(camellia_algs[4].cra_list), .cra_exit = lrw_exit_tfm, .cra_u = { .blkcipher = { @@ -1707,7 +1702,6 @@ static struct crypto_alg camellia_algs[6] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(camellia_algs[5].cra_list), .cra_u = { .blkcipher = { .min_keysize = CAMELLIA_MIN_KEY_SIZE * 2, diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S new file mode 100644 index 000000000000..a41a3aaba220 --- /dev/null +++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S @@ -0,0 +1,376 @@ +/* + * Cast5 Cipher 16-way parallel algorithm (AVX/x86_64) + * + * Copyright (C) 2012 Johannes Goetzfried + * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> + * + * Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +.file "cast5-avx-x86_64-asm_64.S" + +.extern cast5_s1 +.extern cast5_s2 +.extern cast5_s3 +.extern cast5_s4 + +/* structure of crypto context */ +#define km 0 +#define kr (16*4) +#define rr ((16*4)+16) + +/* s-boxes */ +#define s1 cast5_s1 +#define s2 cast5_s2 +#define s3 cast5_s3 +#define s4 cast5_s4 + +/********************************************************************** + 16-way AVX cast5 + **********************************************************************/ +#define CTX %rdi + +#define RL1 %xmm0 +#define RR1 %xmm1 +#define RL2 %xmm2 +#define RR2 %xmm3 +#define RL3 %xmm4 +#define RR3 %xmm5 +#define RL4 %xmm6 +#define RR4 %xmm7 + +#define RX %xmm8 + +#define RKM %xmm9 +#define RKR %xmm10 +#define RKRF %xmm11 +#define RKRR %xmm12 + +#define R32 %xmm13 +#define R1ST %xmm14 + +#define RTMP %xmm15 + +#define RID1 %rbp +#define RID1d %ebp +#define RID2 %rsi +#define RID2d %esi + +#define RGI1 %rdx +#define RGI1bl %dl +#define RGI1bh %dh +#define RGI2 %rcx +#define RGI2bl %cl +#define RGI2bh %ch + +#define RGI3 %rax +#define RGI3bl %al +#define RGI3bh %ah +#define RGI4 %rbx +#define RGI4bl %bl +#define RGI4bh %bh + +#define RFS1 %r8 +#define RFS1d %r8d +#define RFS2 %r9 +#define RFS2d %r9d +#define RFS3 %r10 +#define RFS3d %r10d + + +#define lookup_32bit(src, dst, op1, op2, op3, interleave_op, il_reg) \ + movzbl src ## bh, RID1d; \ + movzbl src ## bl, RID2d; \ + shrq $16, src; \ + movl s1(, RID1, 4), dst ## d; \ + op1 s2(, RID2, 4), dst ## d; \ + movzbl src ## bh, RID1d; \ + movzbl src ## bl, RID2d; \ + interleave_op(il_reg); \ + op2 s3(, RID1, 4), dst ## d; \ + op3 s4(, RID2, 4), dst ## d; + +#define dummy(d) /* do nothing */ + +#define shr_next(reg) \ + shrq $16, reg; + +#define F_head(a, x, gi1, gi2, op0) \ + op0 a, RKM, x; \ + vpslld RKRF, x, RTMP; \ + vpsrld RKRR, x, x; \ + vpor RTMP, x, x; \ + \ + vmovq x, gi1; \ + vpextrq $1, x, gi2; + +#define F_tail(a, x, gi1, gi2, op1, op2, op3) \ + lookup_32bit(##gi1, RFS1, op1, op2, op3, shr_next, ##gi1); \ + lookup_32bit(##gi2, RFS3, op1, op2, op3, shr_next, ##gi2); \ + \ + lookup_32bit(##gi1, RFS2, op1, op2, op3, dummy, none); \ + shlq $32, RFS2; \ + orq RFS1, RFS2; \ + lookup_32bit(##gi2, RFS1, op1, op2, op3, dummy, none); \ + shlq $32, RFS1; \ + orq RFS1, RFS3; \ + \ + vmovq RFS2, x; \ + vpinsrq $1, RFS3, x, x; + +#define F_2(a1, b1, a2, b2, op0, op1, op2, op3) \ + F_head(b1, RX, RGI1, RGI2, op0); \ + F_head(b2, RX, RGI3, RGI4, op0); \ + \ + F_tail(b1, RX, RGI1, RGI2, op1, op2, op3); \ + F_tail(b2, RTMP, RGI3, RGI4, op1, op2, op3); \ + \ + vpxor a1, RX, a1; \ + vpxor a2, RTMP, a2; + +#define F1_2(a1, b1, a2, b2) \ + F_2(a1, b1, a2, b2, vpaddd, xorl, subl, addl) +#define F2_2(a1, b1, a2, b2) \ + F_2(a1, b1, a2, b2, vpxor, subl, addl, xorl) +#define F3_2(a1, b1, a2, b2) \ + F_2(a1, b1, a2, b2, vpsubd, addl, xorl, subl) + +#define subround(a1, b1, a2, b2, f) \ + F ## f ## _2(a1, b1, a2, b2); + +#define round(l, r, n, f) \ + vbroadcastss (km+(4*n))(CTX), RKM; \ + vpand R1ST, RKR, RKRF; \ + vpsubq RKRF, R32, RKRR; \ + vpsrldq $1, RKR, RKR; \ + subround(l ## 1, r ## 1, l ## 2, r ## 2, f); \ + subround(l ## 3, r ## 3, l ## 4, r ## 4, f); + +#define enc_preload_rkr() \ + vbroadcastss .L16_mask, RKR; \ + /* add 16-bit rotation to key rotations (mod 32) */ \ + vpxor kr(CTX), RKR, RKR; + +#define dec_preload_rkr() \ + vbroadcastss .L16_mask, RKR; \ + /* add 16-bit rotation to key rotations (mod 32) */ \ + vpxor kr(CTX), RKR, RKR; \ + vpshufb .Lbswap128_mask, RKR, RKR; + +#define transpose_2x4(x0, x1, t0, t1) \ + vpunpckldq x1, x0, t0; \ + vpunpckhdq x1, x0, t1; \ + \ + vpunpcklqdq t1, t0, x0; \ + vpunpckhqdq t1, t0, x1; + +#define inpack_blocks(in, x0, x1, t0, t1, rmask) \ + vmovdqu (0*4*4)(in), x0; \ + vmovdqu (1*4*4)(in), x1; \ + vpshufb rmask, x0, x0; \ + vpshufb rmask, x1, x1; \ + \ + transpose_2x4(x0, x1, t0, t1) + +#define outunpack_blocks(out, x0, x1, t0, t1, rmask) \ + transpose_2x4(x0, x1, t0, t1) \ + \ + vpshufb rmask, x0, x0; \ + vpshufb rmask, x1, x1; \ + vmovdqu x0, (0*4*4)(out); \ + vmovdqu x1, (1*4*4)(out); + +#define outunpack_xor_blocks(out, x0, x1, t0, t1, rmask) \ + transpose_2x4(x0, x1, t0, t1) \ + \ + vpshufb rmask, x0, x0; \ + vpshufb rmask, x1, x1; \ + vpxor (0*4*4)(out), x0, x0; \ + vmovdqu x0, (0*4*4)(out); \ + vpxor (1*4*4)(out), x1, x1; \ + vmovdqu x1, (1*4*4)(out); + +.data + +.align 16 +.Lbswap_mask: + .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 +.Lbswap128_mask: + .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 +.L16_mask: + .byte 16, 16, 16, 16 +.L32_mask: + .byte 32, 0, 0, 0 +.Lfirst_mask: + .byte 0x1f, 0, 0, 0 + +.text + +.align 16 +.global __cast5_enc_blk_16way +.type __cast5_enc_blk_16way,@function; + +__cast5_enc_blk_16way: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + * %rcx: bool, if true: xor output + */ + + pushq %rbp; + pushq %rbx; + pushq %rcx; + + vmovdqa .Lbswap_mask, RKM; + vmovd .Lfirst_mask, R1ST; + vmovd .L32_mask, R32; + enc_preload_rkr(); + + leaq 1*(2*4*4)(%rdx), %rax; + inpack_blocks(%rdx, RL1, RR1, RTMP, RX, RKM); + inpack_blocks(%rax, RL2, RR2, RTMP, RX, RKM); + leaq 2*(2*4*4)(%rdx), %rax; + inpack_blocks(%rax, RL3, RR3, RTMP, RX, RKM); + leaq 3*(2*4*4)(%rdx), %rax; + inpack_blocks(%rax, RL4, RR4, RTMP, RX, RKM); + + movq %rsi, %r11; + + round(RL, RR, 0, 1); + round(RR, RL, 1, 2); + round(RL, RR, 2, 3); + round(RR, RL, 3, 1); + round(RL, RR, 4, 2); + round(RR, RL, 5, 3); + round(RL, RR, 6, 1); + round(RR, RL, 7, 2); + round(RL, RR, 8, 3); + round(RR, RL, 9, 1); + round(RL, RR, 10, 2); + round(RR, RL, 11, 3); + + movzbl rr(CTX), %eax; + testl %eax, %eax; + jnz __skip_enc; + + round(RL, RR, 12, 1); + round(RR, RL, 13, 2); + round(RL, RR, 14, 3); + round(RR, RL, 15, 1); + +__skip_enc: + popq %rcx; + popq %rbx; + popq %rbp; + + vmovdqa .Lbswap_mask, RKM; + leaq 1*(2*4*4)(%r11), %rax; + + testb %cl, %cl; + jnz __enc_xor16; + + outunpack_blocks(%r11, RR1, RL1, RTMP, RX, RKM); + outunpack_blocks(%rax, RR2, RL2, RTMP, RX, RKM); + leaq 2*(2*4*4)(%r11), %rax; + outunpack_blocks(%rax, RR3, RL3, RTMP, RX, RKM); + leaq 3*(2*4*4)(%r11), %rax; + outunpack_blocks(%rax, RR4, RL4, RTMP, RX, RKM); + + ret; + +__enc_xor16: + outunpack_xor_blocks(%r11, RR1, RL1, RTMP, RX, RKM); + outunpack_xor_blocks(%rax, RR2, RL2, RTMP, RX, RKM); + leaq 2*(2*4*4)(%r11), %rax; + outunpack_xor_blocks(%rax, RR3, RL3, RTMP, RX, RKM); + leaq 3*(2*4*4)(%r11), %rax; + outunpack_xor_blocks(%rax, RR4, RL4, RTMP, RX, RKM); + + ret; + +.align 16 +.global cast5_dec_blk_16way +.type cast5_dec_blk_16way,@function; + +cast5_dec_blk_16way: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + + pushq %rbp; + pushq %rbx; + + vmovdqa .Lbswap_mask, RKM; + vmovd .Lfirst_mask, R1ST; + vmovd .L32_mask, R32; + dec_preload_rkr(); + + leaq 1*(2*4*4)(%rdx), %rax; + inpack_blocks(%rdx, RL1, RR1, RTMP, RX, RKM); + inpack_blocks(%rax, RL2, RR2, RTMP, RX, RKM); + leaq 2*(2*4*4)(%rdx), %rax; + inpack_blocks(%rax, RL3, RR3, RTMP, RX, RKM); + leaq 3*(2*4*4)(%rdx), %rax; + inpack_blocks(%rax, RL4, RR4, RTMP, RX, RKM); + + movq %rsi, %r11; + + movzbl rr(CTX), %eax; + testl %eax, %eax; + jnz __skip_dec; + + round(RL, RR, 15, 1); + round(RR, RL, 14, 3); + round(RL, RR, 13, 2); + round(RR, RL, 12, 1); + +__dec_tail: + round(RL, RR, 11, 3); + round(RR, RL, 10, 2); + round(RL, RR, 9, 1); + round(RR, RL, 8, 3); + round(RL, RR, 7, 2); + round(RR, RL, 6, 1); + round(RL, RR, 5, 3); + round(RR, RL, 4, 2); + round(RL, RR, 3, 1); + round(RR, RL, 2, 3); + round(RL, RR, 1, 2); + round(RR, RL, 0, 1); + + vmovdqa .Lbswap_mask, RKM; + popq %rbx; + popq %rbp; + + leaq 1*(2*4*4)(%r11), %rax; + outunpack_blocks(%r11, RR1, RL1, RTMP, RX, RKM); + outunpack_blocks(%rax, RR2, RL2, RTMP, RX, RKM); + leaq 2*(2*4*4)(%r11), %rax; + outunpack_blocks(%rax, RR3, RL3, RTMP, RX, RKM); + leaq 3*(2*4*4)(%r11), %rax; + outunpack_blocks(%rax, RR4, RL4, RTMP, RX, RKM); + + ret; + +__skip_dec: + vpsrldq $4, RKR, RKR; + jmp __dec_tail; diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c new file mode 100644 index 000000000000..e0ea14f9547f --- /dev/null +++ b/arch/x86/crypto/cast5_avx_glue.c @@ -0,0 +1,530 @@ +/* + * Glue Code for the AVX assembler implemention of the Cast5 Cipher + * + * Copyright (C) 2012 Johannes Goetzfried + * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +#include <linux/module.h> +#include <linux/hardirq.h> +#include <linux/types.h> +#include <linux/crypto.h> +#include <linux/err.h> +#include <crypto/algapi.h> +#include <crypto/cast5.h> +#include <crypto/cryptd.h> +#include <crypto/ctr.h> +#include <asm/xcr.h> +#include <asm/xsave.h> +#include <asm/crypto/ablk_helper.h> +#include <asm/crypto/glue_helper.h> + +#define CAST5_PARALLEL_BLOCKS 16 + +asmlinkage void __cast5_enc_blk_16way(struct cast5_ctx *ctx, u8 *dst, + const u8 *src, bool xor); +asmlinkage void cast5_dec_blk_16way(struct cast5_ctx *ctx, u8 *dst, + const u8 *src); + +static inline void cast5_enc_blk_xway(struct cast5_ctx *ctx, u8 *dst, + const u8 *src) +{ + __cast5_enc_blk_16way(ctx, dst, src, false); +} + +static inline void cast5_enc_blk_xway_xor(struct cast5_ctx *ctx, u8 *dst, + const u8 *src) +{ + __cast5_enc_blk_16way(ctx, dst, src, true); +} + +static inline void cast5_dec_blk_xway(struct cast5_ctx *ctx, u8 *dst, + const u8 *src) +{ + cast5_dec_blk_16way(ctx, dst, src); +} + + +static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes) +{ + return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS, + NULL, fpu_enabled, nbytes); +} + +static inline void cast5_fpu_end(bool fpu_enabled) +{ + return glue_fpu_end(fpu_enabled); +} + +static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, + bool enc) +{ + bool fpu_enabled = false; + struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + const unsigned int bsize = CAST5_BLOCK_SIZE; + unsigned int nbytes; + int err; + + err = blkcipher_walk_virt(desc, walk); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + while ((nbytes = walk->nbytes)) { + u8 *wsrc = walk->src.virt.addr; + u8 *wdst = walk->dst.virt.addr; + + fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); + + /* Process multi-block batch */ + if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { + do { + if (enc) + cast5_enc_blk_xway(ctx, wdst, wsrc); + else + cast5_dec_blk_xway(ctx, wdst, wsrc); + + wsrc += bsize * CAST5_PARALLEL_BLOCKS; + wdst += bsize * CAST5_PARALLEL_BLOCKS; + nbytes -= bsize * CAST5_PARALLEL_BLOCKS; + } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + do { + if (enc) + __cast5_encrypt(ctx, wdst, wsrc); + else + __cast5_decrypt(ctx, wdst, wsrc); + + wsrc += bsize; + wdst += bsize; + nbytes -= bsize; + } while (nbytes >= bsize); + +done: + err = blkcipher_walk_done(desc, walk, nbytes); + } + + cast5_fpu_end(fpu_enabled); + return err; +} + +static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_crypt(desc, &walk, true); +} + +static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_crypt(desc, &walk, false); +} + +static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + const unsigned int bsize = CAST5_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u64 *src = (u64 *)walk->src.virt.addr; + u64 *dst = (u64 *)walk->dst.virt.addr; + u64 *iv = (u64 *)walk->iv; + + do { + *dst = *src ^ *iv; + __cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst); + iv = dst; + + src += 1; + dst += 1; + nbytes -= bsize; + } while (nbytes >= bsize); + + *(u64 *)walk->iv = *iv; + return nbytes; +} + +static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + while ((nbytes = walk.nbytes)) { + nbytes = __cbc_encrypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + return err; +} + +static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + const unsigned int bsize = CAST5_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u64 *src = (u64 *)walk->src.virt.addr; + u64 *dst = (u64 *)walk->dst.virt.addr; + u64 ivs[CAST5_PARALLEL_BLOCKS - 1]; + u64 last_iv; + int i; + + /* Start of the last block. */ + src += nbytes / bsize - 1; + dst += nbytes / bsize - 1; + + last_iv = *src; + + /* Process multi-block batch */ + if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { + do { + nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1); + src -= CAST5_PARALLEL_BLOCKS - 1; + dst -= CAST5_PARALLEL_BLOCKS - 1; + + for (i = 0; i < CAST5_PARALLEL_BLOCKS - 1; i++) + ivs[i] = src[i]; + + cast5_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); + + for (i = 0; i < CAST5_PARALLEL_BLOCKS - 1; i++) + *(dst + (i + 1)) ^= *(ivs + i); + + nbytes -= bsize; + if (nbytes < bsize) + goto done; + + *dst ^= *(src - 1); + src -= 1; + dst -= 1; + } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + for (;;) { + __cast5_decrypt(ctx, (u8 *)dst, (u8 *)src); + + nbytes -= bsize; + if (nbytes < bsize) + break; + + *dst ^= *(src - 1); + src -= 1; + dst -= 1; + } + +done: + *dst ^= *(u64 *)walk->iv; + *(u64 *)walk->iv = last_iv; + + return nbytes; +} + +static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + bool fpu_enabled = false; + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + while ((nbytes = walk.nbytes)) { + fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); + nbytes = __cbc_decrypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + cast5_fpu_end(fpu_enabled); + return err; +} + +static void ctr_crypt_final(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + u8 *ctrblk = walk->iv; + u8 keystream[CAST5_BLOCK_SIZE]; + u8 *src = walk->src.virt.addr; + u8 *dst = walk->dst.virt.addr; + unsigned int nbytes = walk->nbytes; + + __cast5_encrypt(ctx, keystream, ctrblk); + crypto_xor(keystream, src, nbytes); + memcpy(dst, keystream, nbytes); + + crypto_inc(ctrblk, CAST5_BLOCK_SIZE); +} + +static unsigned int __ctr_crypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + const unsigned int bsize = CAST5_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u64 *src = (u64 *)walk->src.virt.addr; + u64 *dst = (u64 *)walk->dst.virt.addr; + u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv); + __be64 ctrblocks[CAST5_PARALLEL_BLOCKS]; + int i; + + /* Process multi-block batch */ + if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { + do { + /* create ctrblks for parallel encrypt */ + for (i = 0; i < CAST5_PARALLEL_BLOCKS; i++) { + if (dst != src) + dst[i] = src[i]; + + ctrblocks[i] = cpu_to_be64(ctrblk++); + } + + cast5_enc_blk_xway_xor(ctx, (u8 *)dst, + (u8 *)ctrblocks); + + src += CAST5_PARALLEL_BLOCKS; + dst += CAST5_PARALLEL_BLOCKS; + nbytes -= bsize * CAST5_PARALLEL_BLOCKS; + } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + do { + if (dst != src) + *dst = *src; + + ctrblocks[0] = cpu_to_be64(ctrblk++); + + __cast5_encrypt(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks); + *dst ^= ctrblocks[0]; + + src += 1; + dst += 1; + nbytes -= bsize; + } while (nbytes >= bsize); + +done: + *(__be64 *)walk->iv = cpu_to_be64(ctrblk); + return nbytes; +} + +static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + bool fpu_enabled = false; + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt_block(desc, &walk, CAST5_BLOCK_SIZE); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) { + fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); + nbytes = __ctr_crypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + cast5_fpu_end(fpu_enabled); + + if (walk.nbytes) { + ctr_crypt_final(desc, &walk); + err = blkcipher_walk_done(desc, &walk, 0); + } + + return err; +} + + +static struct crypto_alg cast5_algs[6] = { { + .cra_name = "__ecb-cast5-avx", + .cra_driver_name = "__driver-ecb-cast5-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = CAST5_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct cast5_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = CAST5_MIN_KEY_SIZE, + .max_keysize = CAST5_MAX_KEY_SIZE, + .setkey = cast5_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, + }, +}, { + .cra_name = "__cbc-cast5-avx", + .cra_driver_name = "__driver-cbc-cast5-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = CAST5_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct cast5_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = CAST5_MIN_KEY_SIZE, + .max_keysize = CAST5_MAX_KEY_SIZE, + .setkey = cast5_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, + }, +}, { + .cra_name = "__ctr-cast5-avx", + .cra_driver_name = "__driver-ctr-cast5-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct cast5_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = CAST5_MIN_KEY_SIZE, + .max_keysize = CAST5_MAX_KEY_SIZE, + .ivsize = CAST5_BLOCK_SIZE, + .setkey = cast5_setkey, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, + }, +}, { + .cra_name = "ecb(cast5)", + .cra_driver_name = "ecb-cast5-avx", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = CAST5_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = CAST5_MIN_KEY_SIZE, + .max_keysize = CAST5_MAX_KEY_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +}, { + .cra_name = "cbc(cast5)", + .cra_driver_name = "cbc-cast5-avx", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = CAST5_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = CAST5_MIN_KEY_SIZE, + .max_keysize = CAST5_MAX_KEY_SIZE, + .ivsize = CAST5_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = __ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +}, { + .cra_name = "ctr(cast5)", + .cra_driver_name = "ctr-cast5-avx", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = CAST5_MIN_KEY_SIZE, + .max_keysize = CAST5_MAX_KEY_SIZE, + .ivsize = CAST5_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_encrypt, + .geniv = "chainiv", + }, + }, +} }; + +static int __init cast5_init(void) +{ + u64 xcr0; + + if (!cpu_has_avx || !cpu_has_osxsave) { + pr_info("AVX instructions are not detected.\n"); + return -ENODEV; + } + + xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); + if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { + pr_info("AVX detected but unusable.\n"); + return -ENODEV; + } + + return crypto_register_algs(cast5_algs, ARRAY_SIZE(cast5_algs)); +} + +static void __exit cast5_exit(void) +{ + crypto_unregister_algs(cast5_algs, ARRAY_SIZE(cast5_algs)); +} + +module_init(cast5_init); +module_exit(cast5_exit); + +MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("cast5"); diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S new file mode 100644 index 000000000000..218d283772f4 --- /dev/null +++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S @@ -0,0 +1,383 @@ +/* + * Cast6 Cipher 8-way parallel algorithm (AVX/x86_64) + * + * Copyright (C) 2012 Johannes Goetzfried + * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> + * + * Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +.file "cast6-avx-x86_64-asm_64.S" + +.extern cast6_s1 +.extern cast6_s2 +.extern cast6_s3 +.extern cast6_s4 + +/* structure of crypto context */ +#define km 0 +#define kr (12*4*4) + +/* s-boxes */ +#define s1 cast6_s1 +#define s2 cast6_s2 +#define s3 cast6_s3 +#define s4 cast6_s4 + +/********************************************************************** + 8-way AVX cast6 + **********************************************************************/ +#define CTX %rdi + +#define RA1 %xmm0 +#define RB1 %xmm1 +#define RC1 %xmm2 +#define RD1 %xmm3 + +#define RA2 %xmm4 +#define RB2 %xmm5 +#define RC2 %xmm6 +#define RD2 %xmm7 + +#define RX %xmm8 + +#define RKM %xmm9 +#define RKR %xmm10 +#define RKRF %xmm11 +#define RKRR %xmm12 +#define R32 %xmm13 +#define R1ST %xmm14 + +#define RTMP %xmm15 + +#define RID1 %rbp +#define RID1d %ebp +#define RID2 %rsi +#define RID2d %esi + +#define RGI1 %rdx +#define RGI1bl %dl +#define RGI1bh %dh +#define RGI2 %rcx +#define RGI2bl %cl +#define RGI2bh %ch + +#define RGI3 %rax +#define RGI3bl %al +#define RGI3bh %ah +#define RGI4 %rbx +#define RGI4bl %bl +#define RGI4bh %bh + +#define RFS1 %r8 +#define RFS1d %r8d +#define RFS2 %r9 +#define RFS2d %r9d +#define RFS3 %r10 +#define RFS3d %r10d + + +#define lookup_32bit(src, dst, op1, op2, op3, interleave_op, il_reg) \ + movzbl src ## bh, RID1d; \ + movzbl src ## bl, RID2d; \ + shrq $16, src; \ + movl s1(, RID1, 4), dst ## d; \ + op1 s2(, RID2, 4), dst ## d; \ + movzbl src ## bh, RID1d; \ + movzbl src ## bl, RID2d; \ + interleave_op(il_reg); \ + op2 s3(, RID1, 4), dst ## d; \ + op3 s4(, RID2, 4), dst ## d; + +#define dummy(d) /* do nothing */ + +#define shr_next(reg) \ + shrq $16, reg; + +#define F_head(a, x, gi1, gi2, op0) \ + op0 a, RKM, x; \ + vpslld RKRF, x, RTMP; \ + vpsrld RKRR, x, x; \ + vpor RTMP, x, x; \ + \ + vmovq x, gi1; \ + vpextrq $1, x, gi2; + +#define F_tail(a, x, gi1, gi2, op1, op2, op3) \ + lookup_32bit(##gi1, RFS1, op1, op2, op3, shr_next, ##gi1); \ + lookup_32bit(##gi2, RFS3, op1, op2, op3, shr_next, ##gi2); \ + \ + lookup_32bit(##gi1, RFS2, op1, op2, op3, dummy, none); \ + shlq $32, RFS2; \ + orq RFS1, RFS2; \ + lookup_32bit(##gi2, RFS1, op1, op2, op3, dummy, none); \ + shlq $32, RFS1; \ + orq RFS1, RFS3; \ + \ + vmovq RFS2, x; \ + vpinsrq $1, RFS3, x, x; + +#define F_2(a1, b1, a2, b2, op0, op1, op2, op3) \ + F_head(b1, RX, RGI1, RGI2, op0); \ + F_head(b2, RX, RGI3, RGI4, op0); \ + \ + F_tail(b1, RX, RGI1, RGI2, op1, op2, op3); \ + F_tail(b2, RTMP, RGI3, RGI4, op1, op2, op3); \ + \ + vpxor a1, RX, a1; \ + vpxor a2, RTMP, a2; + +#define F1_2(a1, b1, a2, b2) \ + F_2(a1, b1, a2, b2, vpaddd, xorl, subl, addl) +#define F2_2(a1, b1, a2, b2) \ + F_2(a1, b1, a2, b2, vpxor, subl, addl, xorl) +#define F3_2(a1, b1, a2, b2) \ + F_2(a1, b1, a2, b2, vpsubd, addl, xorl, subl) + +#define qop(in, out, f) \ + F ## f ## _2(out ## 1, in ## 1, out ## 2, in ## 2); + +#define get_round_keys(nn) \ + vbroadcastss (km+(4*(nn)))(CTX), RKM; \ + vpand R1ST, RKR, RKRF; \ + vpsubq RKRF, R32, RKRR; \ + vpsrldq $1, RKR, RKR; + +#define Q(n) \ + get_round_keys(4*n+0); \ + qop(RD, RC, 1); \ + \ + get_round_keys(4*n+1); \ + qop(RC, RB, 2); \ + \ + get_round_keys(4*n+2); \ + qop(RB, RA, 3); \ + \ + get_round_keys(4*n+3); \ + qop(RA, RD, 1); + +#define QBAR(n) \ + get_round_keys(4*n+3); \ + qop(RA, RD, 1); \ + \ + get_round_keys(4*n+2); \ + qop(RB, RA, 3); \ + \ + get_round_keys(4*n+1); \ + qop(RC, RB, 2); \ + \ + get_round_keys(4*n+0); \ + qop(RD, RC, 1); + +#define shuffle(mask) \ + vpshufb mask, RKR, RKR; + +#define preload_rkr(n, do_mask, mask) \ + vbroadcastss .L16_mask, RKR; \ + /* add 16-bit rotation to key rotations (mod 32) */ \ + vpxor (kr+n*16)(CTX), RKR, RKR; \ + do_mask(mask); + +#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ + vpunpckldq x1, x0, t0; \ + vpunpckhdq x1, x0, t2; \ + vpunpckldq x3, x2, t1; \ + vpunpckhdq x3, x2, x3; \ + \ + vpunpcklqdq t1, t0, x0; \ + vpunpckhqdq t1, t0, x1; \ + vpunpcklqdq x3, t2, x2; \ + vpunpckhqdq x3, t2, x3; + +#define inpack_blocks(in, x0, x1, x2, x3, t0, t1, t2, rmask) \ + vmovdqu (0*4*4)(in), x0; \ + vmovdqu (1*4*4)(in), x1; \ + vmovdqu (2*4*4)(in), x2; \ + vmovdqu (3*4*4)(in), x3; \ + vpshufb rmask, x0, x0; \ + vpshufb rmask, x1, x1; \ + vpshufb rmask, x2, x2; \ + vpshufb rmask, x3, x3; \ + \ + transpose_4x4(x0, x1, x2, x3, t0, t1, t2) + +#define outunpack_blocks(out, x0, x1, x2, x3, t0, t1, t2, rmask) \ + transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ + \ + vpshufb rmask, x0, x0; \ + vpshufb rmask, x1, x1; \ + vpshufb rmask, x2, x2; \ + vpshufb rmask, x3, x3; \ + vmovdqu x0, (0*4*4)(out); \ + vmovdqu x1, (1*4*4)(out); \ + vmovdqu x2, (2*4*4)(out); \ + vmovdqu x3, (3*4*4)(out); + +#define outunpack_xor_blocks(out, x0, x1, x2, x3, t0, t1, t2, rmask) \ + transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ + \ + vpshufb rmask, x0, x0; \ + vpshufb rmask, x1, x1; \ + vpshufb rmask, x2, x2; \ + vpshufb rmask, x3, x3; \ + vpxor (0*4*4)(out), x0, x0; \ + vmovdqu x0, (0*4*4)(out); \ + vpxor (1*4*4)(out), x1, x1; \ + vmovdqu x1, (1*4*4)(out); \ + vpxor (2*4*4)(out), x2, x2; \ + vmovdqu x2, (2*4*4)(out); \ + vpxor (3*4*4)(out), x3, x3; \ + vmovdqu x3, (3*4*4)(out); + +.data + +.align 16 +.Lbswap_mask: + .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 +.Lrkr_enc_Q_Q_QBAR_QBAR: + .byte 0, 1, 2, 3, 4, 5, 6, 7, 11, 10, 9, 8, 15, 14, 13, 12 +.Lrkr_enc_QBAR_QBAR_QBAR_QBAR: + .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 +.Lrkr_dec_Q_Q_Q_Q: + .byte 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3 +.Lrkr_dec_Q_Q_QBAR_QBAR: + .byte 12, 13, 14, 15, 8, 9, 10, 11, 7, 6, 5, 4, 3, 2, 1, 0 +.Lrkr_dec_QBAR_QBAR_QBAR_QBAR: + .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 +.L16_mask: + .byte 16, 16, 16, 16 +.L32_mask: + .byte 32, 0, 0, 0 +.Lfirst_mask: + .byte 0x1f, 0, 0, 0 + +.text + +.align 16 +.global __cast6_enc_blk_8way +.type __cast6_enc_blk_8way,@function; + +__cast6_enc_blk_8way: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + * %rcx: bool, if true: xor output + */ + + pushq %rbp; + pushq %rbx; + pushq %rcx; + + vmovdqa .Lbswap_mask, RKM; + vmovd .Lfirst_mask, R1ST; + vmovd .L32_mask, R32; + + leaq (4*4*4)(%rdx), %rax; + inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); + inpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); + + movq %rsi, %r11; + + preload_rkr(0, dummy, none); + Q(0); + Q(1); + Q(2); + Q(3); + preload_rkr(1, shuffle, .Lrkr_enc_Q_Q_QBAR_QBAR); + Q(4); + Q(5); + QBAR(6); + QBAR(7); + preload_rkr(2, shuffle, .Lrkr_enc_QBAR_QBAR_QBAR_QBAR); + QBAR(8); + QBAR(9); + QBAR(10); + QBAR(11); + + popq %rcx; + popq %rbx; + popq %rbp; + + vmovdqa .Lbswap_mask, RKM; + leaq (4*4*4)(%r11), %rax; + + testb %cl, %cl; + jnz __enc_xor8; + + outunpack_blocks(%r11, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); + outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); + + ret; + +__enc_xor8: + outunpack_xor_blocks(%r11, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); + outunpack_xor_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); + + ret; + +.align 16 +.global cast6_dec_blk_8way +.type cast6_dec_blk_8way,@function; + +cast6_dec_blk_8way: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + + pushq %rbp; + pushq %rbx; + + vmovdqa .Lbswap_mask, RKM; + vmovd .Lfirst_mask, R1ST; + vmovd .L32_mask, R32; + + leaq (4*4*4)(%rdx), %rax; + inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); + inpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); + + movq %rsi, %r11; + + preload_rkr(2, shuffle, .Lrkr_dec_Q_Q_Q_Q); + Q(11); + Q(10); + Q(9); + Q(8); + preload_rkr(1, shuffle, .Lrkr_dec_Q_Q_QBAR_QBAR); + Q(7); + Q(6); + QBAR(5); + QBAR(4); + preload_rkr(0, shuffle, .Lrkr_dec_QBAR_QBAR_QBAR_QBAR); + QBAR(3); + QBAR(2); + QBAR(1); + QBAR(0); + + popq %rbx; + popq %rbp; + + vmovdqa .Lbswap_mask, RKM; + leaq (4*4*4)(%r11), %rax; + outunpack_blocks(%r11, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); + outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); + + ret; diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c new file mode 100644 index 000000000000..15e5f85a5011 --- /dev/null +++ b/arch/x86/crypto/cast6_avx_glue.c @@ -0,0 +1,648 @@ +/* + * Glue Code for the AVX assembler implemention of the Cast6 Cipher + * + * Copyright (C) 2012 Johannes Goetzfried + * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +#include <linux/module.h> +#include <linux/hardirq.h> +#include <linux/types.h> +#include <linux/crypto.h> +#include <linux/err.h> +#include <crypto/algapi.h> +#include <crypto/cast6.h> +#include <crypto/cryptd.h> +#include <crypto/b128ops.h> +#include <crypto/ctr.h> +#include <crypto/lrw.h> +#include <crypto/xts.h> +#include <asm/xcr.h> +#include <asm/xsave.h> +#include <asm/crypto/ablk_helper.h> +#include <asm/crypto/glue_helper.h> + +#define CAST6_PARALLEL_BLOCKS 8 + +asmlinkage void __cast6_enc_blk_8way(struct cast6_ctx *ctx, u8 *dst, + const u8 *src, bool xor); +asmlinkage void cast6_dec_blk_8way(struct cast6_ctx *ctx, u8 *dst, + const u8 *src); + +static inline void cast6_enc_blk_xway(struct cast6_ctx *ctx, u8 *dst, + const u8 *src) +{ + __cast6_enc_blk_8way(ctx, dst, src, false); +} + +static inline void cast6_enc_blk_xway_xor(struct cast6_ctx *ctx, u8 *dst, + const u8 *src) +{ + __cast6_enc_blk_8way(ctx, dst, src, true); +} + +static inline void cast6_dec_blk_xway(struct cast6_ctx *ctx, u8 *dst, + const u8 *src) +{ + cast6_dec_blk_8way(ctx, dst, src); +} + + +static void cast6_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src) +{ + u128 ivs[CAST6_PARALLEL_BLOCKS - 1]; + unsigned int j; + + for (j = 0; j < CAST6_PARALLEL_BLOCKS - 1; j++) + ivs[j] = src[j]; + + cast6_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); + + for (j = 0; j < CAST6_PARALLEL_BLOCKS - 1; j++) + u128_xor(dst + (j + 1), dst + (j + 1), ivs + j); +} + +static void cast6_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv) +{ + be128 ctrblk; + + u128_to_be128(&ctrblk, iv); + u128_inc(iv); + + __cast6_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); + u128_xor(dst, src, (u128 *)&ctrblk); +} + +static void cast6_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src, + u128 *iv) +{ + be128 ctrblks[CAST6_PARALLEL_BLOCKS]; + unsigned int i; + + for (i = 0; i < CAST6_PARALLEL_BLOCKS; i++) { + if (dst != src) + dst[i] = src[i]; + + u128_to_be128(&ctrblks[i], iv); + u128_inc(iv); + } + + cast6_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks); +} + +static const struct common_glue_ctx cast6_enc = { + .num_funcs = 2, + .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, + + .funcs = { { + .num_blocks = CAST6_PARALLEL_BLOCKS, + .fn_u = { .ecb = GLUE_FUNC_CAST(cast6_enc_blk_xway) } + }, { + .num_blocks = 1, + .fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_encrypt) } + } } +}; + +static const struct common_glue_ctx cast6_ctr = { + .num_funcs = 2, + .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, + + .funcs = { { + .num_blocks = CAST6_PARALLEL_BLOCKS, + .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_crypt_ctr_xway) } + }, { + .num_blocks = 1, + .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_crypt_ctr) } + } } +}; + +static const struct common_glue_ctx cast6_dec = { + .num_funcs = 2, + .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, + + .funcs = { { + .num_blocks = CAST6_PARALLEL_BLOCKS, + .fn_u = { .ecb = GLUE_FUNC_CAST(cast6_dec_blk_xway) } + }, { + .num_blocks = 1, + .fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_decrypt) } + } } +}; + +static const struct common_glue_ctx cast6_dec_cbc = { + .num_funcs = 2, + .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, + + .funcs = { { + .num_blocks = CAST6_PARALLEL_BLOCKS, + .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(cast6_decrypt_cbc_xway) } + }, { + .num_blocks = 1, + .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__cast6_decrypt) } + } } +}; + +static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + return glue_ecb_crypt_128bit(&cast6_enc, desc, dst, src, nbytes); +} + +static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + return glue_ecb_crypt_128bit(&cast6_dec, desc, dst, src, nbytes); +} + +static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__cast6_encrypt), desc, + dst, src, nbytes); +} + +static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + return glue_cbc_decrypt_128bit(&cast6_dec_cbc, desc, dst, src, + nbytes); +} + +static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + return glue_ctr_crypt_128bit(&cast6_ctr, desc, dst, src, nbytes); +} + +static inline bool cast6_fpu_begin(bool fpu_enabled, unsigned int nbytes) +{ + return glue_fpu_begin(CAST6_BLOCK_SIZE, CAST6_PARALLEL_BLOCKS, + NULL, fpu_enabled, nbytes); +} + +static inline void cast6_fpu_end(bool fpu_enabled) +{ + glue_fpu_end(fpu_enabled); +} + +struct crypt_priv { + struct cast6_ctx *ctx; + bool fpu_enabled; +}; + +static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +{ + const unsigned int bsize = CAST6_BLOCK_SIZE; + struct crypt_priv *ctx = priv; + int i; + + ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes); + + if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) { + cast6_enc_blk_xway(ctx->ctx, srcdst, srcdst); + return; + } + + for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) + __cast6_encrypt(ctx->ctx, srcdst, srcdst); +} + +static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +{ + const unsigned int bsize = CAST6_BLOCK_SIZE; + struct crypt_priv *ctx = priv; + int i; + + ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes); + + if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) { + cast6_dec_blk_xway(ctx->ctx, srcdst, srcdst); + return; + } + + for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) + __cast6_decrypt(ctx->ctx, srcdst, srcdst); +} + +struct cast6_lrw_ctx { + struct lrw_table_ctx lrw_table; + struct cast6_ctx cast6_ctx; +}; + +static int lrw_cast6_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + struct cast6_lrw_ctx *ctx = crypto_tfm_ctx(tfm); + int err; + + err = __cast6_setkey(&ctx->cast6_ctx, key, keylen - CAST6_BLOCK_SIZE, + &tfm->crt_flags); + if (err) + return err; + + return lrw_init_table(&ctx->lrw_table, key + keylen - CAST6_BLOCK_SIZE); +} + +static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct cast6_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[CAST6_PARALLEL_BLOCKS]; + struct crypt_priv crypt_ctx = { + .ctx = &ctx->cast6_ctx, + .fpu_enabled = false, + }; + struct lrw_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .table_ctx = &ctx->lrw_table, + .crypt_ctx = &crypt_ctx, + .crypt_fn = encrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + ret = lrw_crypt(desc, dst, src, nbytes, &req); + cast6_fpu_end(crypt_ctx.fpu_enabled); + + return ret; +} + +static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct cast6_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[CAST6_PARALLEL_BLOCKS]; + struct crypt_priv crypt_ctx = { + .ctx = &ctx->cast6_ctx, + .fpu_enabled = false, + }; + struct lrw_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .table_ctx = &ctx->lrw_table, + .crypt_ctx = &crypt_ctx, + .crypt_fn = decrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + ret = lrw_crypt(desc, dst, src, nbytes, &req); + cast6_fpu_end(crypt_ctx.fpu_enabled); + + return ret; +} + +static void lrw_exit_tfm(struct crypto_tfm *tfm) +{ + struct cast6_lrw_ctx *ctx = crypto_tfm_ctx(tfm); + + lrw_free_table(&ctx->lrw_table); +} + +struct cast6_xts_ctx { + struct cast6_ctx tweak_ctx; + struct cast6_ctx crypt_ctx; +}; + +static int xts_cast6_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + struct cast6_xts_ctx *ctx = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; + int err; + + /* key consists of keys of equal size concatenated, therefore + * the length must be even + */ + if (keylen % 2) { + *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + + /* first half of xts-key is for crypt */ + err = __cast6_setkey(&ctx->crypt_ctx, key, keylen / 2, flags); + if (err) + return err; + + /* second half of xts-key is for tweak */ + return __cast6_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2, + flags); +} + +static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[CAST6_PARALLEL_BLOCKS]; + struct crypt_priv crypt_ctx = { + .ctx = &ctx->crypt_ctx, + .fpu_enabled = false, + }; + struct xts_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .tweak_ctx = &ctx->tweak_ctx, + .tweak_fn = XTS_TWEAK_CAST(__cast6_encrypt), + .crypt_ctx = &crypt_ctx, + .crypt_fn = encrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + ret = xts_crypt(desc, dst, src, nbytes, &req); + cast6_fpu_end(crypt_ctx.fpu_enabled); + + return ret; +} + +static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[CAST6_PARALLEL_BLOCKS]; + struct crypt_priv crypt_ctx = { + .ctx = &ctx->crypt_ctx, + .fpu_enabled = false, + }; + struct xts_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .tweak_ctx = &ctx->tweak_ctx, + .tweak_fn = XTS_TWEAK_CAST(__cast6_encrypt), + .crypt_ctx = &crypt_ctx, + .crypt_fn = decrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + ret = xts_crypt(desc, dst, src, nbytes, &req); + cast6_fpu_end(crypt_ctx.fpu_enabled); + + return ret; +} + +static struct crypto_alg cast6_algs[10] = { { + .cra_name = "__ecb-cast6-avx", + .cra_driver_name = "__driver-ecb-cast6-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = CAST6_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct cast6_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = CAST6_MIN_KEY_SIZE, + .max_keysize = CAST6_MAX_KEY_SIZE, + .setkey = cast6_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, + }, +}, { + .cra_name = "__cbc-cast6-avx", + .cra_driver_name = "__driver-cbc-cast6-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = CAST6_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct cast6_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = CAST6_MIN_KEY_SIZE, + .max_keysize = CAST6_MAX_KEY_SIZE, + .setkey = cast6_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, + }, +}, { + .cra_name = "__ctr-cast6-avx", + .cra_driver_name = "__driver-ctr-cast6-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct cast6_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = CAST6_MIN_KEY_SIZE, + .max_keysize = CAST6_MAX_KEY_SIZE, + .ivsize = CAST6_BLOCK_SIZE, + .setkey = cast6_setkey, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, + }, +}, { + .cra_name = "__lrw-cast6-avx", + .cra_driver_name = "__driver-lrw-cast6-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = CAST6_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct cast6_lrw_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_exit = lrw_exit_tfm, + .cra_u = { + .blkcipher = { + .min_keysize = CAST6_MIN_KEY_SIZE + + CAST6_BLOCK_SIZE, + .max_keysize = CAST6_MAX_KEY_SIZE + + CAST6_BLOCK_SIZE, + .ivsize = CAST6_BLOCK_SIZE, + .setkey = lrw_cast6_setkey, + .encrypt = lrw_encrypt, + .decrypt = lrw_decrypt, + }, + }, +}, { + .cra_name = "__xts-cast6-avx", + .cra_driver_name = "__driver-xts-cast6-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = CAST6_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct cast6_xts_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = CAST6_MIN_KEY_SIZE * 2, + .max_keysize = CAST6_MAX_KEY_SIZE * 2, + .ivsize = CAST6_BLOCK_SIZE, + .setkey = xts_cast6_setkey, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, + }, + }, +}, { + .cra_name = "ecb(cast6)", + .cra_driver_name = "ecb-cast6-avx", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = CAST6_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = CAST6_MIN_KEY_SIZE, + .max_keysize = CAST6_MAX_KEY_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +}, { + .cra_name = "cbc(cast6)", + .cra_driver_name = "cbc-cast6-avx", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = CAST6_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = CAST6_MIN_KEY_SIZE, + .max_keysize = CAST6_MAX_KEY_SIZE, + .ivsize = CAST6_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = __ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +}, { + .cra_name = "ctr(cast6)", + .cra_driver_name = "ctr-cast6-avx", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = CAST6_MIN_KEY_SIZE, + .max_keysize = CAST6_MAX_KEY_SIZE, + .ivsize = CAST6_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_encrypt, + .geniv = "chainiv", + }, + }, +}, { + .cra_name = "lrw(cast6)", + .cra_driver_name = "lrw-cast6-avx", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = CAST6_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = CAST6_MIN_KEY_SIZE + + CAST6_BLOCK_SIZE, + .max_keysize = CAST6_MAX_KEY_SIZE + + CAST6_BLOCK_SIZE, + .ivsize = CAST6_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +}, { + .cra_name = "xts(cast6)", + .cra_driver_name = "xts-cast6-avx", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = CAST6_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = CAST6_MIN_KEY_SIZE * 2, + .max_keysize = CAST6_MAX_KEY_SIZE * 2, + .ivsize = CAST6_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +} }; + +static int __init cast6_init(void) +{ + u64 xcr0; + + if (!cpu_has_avx || !cpu_has_osxsave) { + pr_info("AVX instructions are not detected.\n"); + return -ENODEV; + } + + xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); + if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { + pr_info("AVX detected but unusable.\n"); + return -ENODEV; + } + + return crypto_register_algs(cast6_algs, ARRAY_SIZE(cast6_algs)); +} + +static void __exit cast6_exit(void) +{ + crypto_unregister_algs(cast6_algs, ARRAY_SIZE(cast6_algs)); +} + +module_init(cast6_init); +module_exit(cast6_exit); + +MODULE_DESCRIPTION("Cast6 Cipher Algorithm, AVX optimized"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("cast6"); diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index b4bf0a63b520..6759dd1135be 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -150,7 +150,6 @@ static struct shash_alg ghash_alg = { .cra_blocksize = GHASH_BLOCK_SIZE, .cra_ctxsize = sizeof(struct ghash_ctx), .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(ghash_alg.base.cra_list), }, }; @@ -288,7 +287,6 @@ static struct ahash_alg ghash_async_alg = { .cra_blocksize = GHASH_BLOCK_SIZE, .cra_type = &crypto_ahash_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(ghash_async_alg.halg.base.cra_list), .cra_init = ghash_async_init_tfm, .cra_exit = ghash_async_exit_tfm, }, diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c index 4854f0f31e4f..30b3927bd733 100644 --- a/arch/x86/crypto/glue_helper.c +++ b/arch/x86/crypto/glue_helper.c @@ -110,7 +110,7 @@ static unsigned int __glue_cbc_encrypt_128bit(const common_glue_func_t fn, nbytes -= bsize; } while (nbytes >= bsize); - u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv); + *(u128 *)walk->iv = *iv; return nbytes; } diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c index bccb76d80987..a3a3c0205c16 100644 --- a/arch/x86/crypto/salsa20_glue.c +++ b/arch/x86/crypto/salsa20_glue.c @@ -97,7 +97,6 @@ static struct crypto_alg alg = { .cra_ctxsize = sizeof(struct salsa20_ctx), .cra_alignmask = 3, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(alg.cra_list), .cra_u = { .blkcipher = { .setkey = setkey, diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index b36bdac237eb..3f543a04cf1e 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c @@ -390,7 +390,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[0].cra_list), .cra_u = { .blkcipher = { .min_keysize = SERPENT_MIN_KEY_SIZE, @@ -410,7 +409,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[1].cra_list), .cra_u = { .blkcipher = { .min_keysize = SERPENT_MIN_KEY_SIZE, @@ -430,7 +428,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[2].cra_list), .cra_u = { .blkcipher = { .min_keysize = SERPENT_MIN_KEY_SIZE, @@ -451,7 +448,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[3].cra_list), .cra_exit = lrw_exit_tfm, .cra_u = { .blkcipher = { @@ -475,7 +471,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[4].cra_list), .cra_u = { .blkcipher = { .min_keysize = SERPENT_MIN_KEY_SIZE * 2, @@ -496,7 +491,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[5].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { @@ -518,7 +512,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[6].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { @@ -541,7 +534,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[7].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { @@ -565,7 +557,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[8].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { @@ -590,7 +581,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[9].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c index d679c8675f4a..9107a9908c41 100644 --- a/arch/x86/crypto/serpent_sse2_glue.c +++ b/arch/x86/crypto/serpent_sse2_glue.c @@ -393,7 +393,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[0].cra_list), .cra_u = { .blkcipher = { .min_keysize = SERPENT_MIN_KEY_SIZE, @@ -413,7 +412,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[1].cra_list), .cra_u = { .blkcipher = { .min_keysize = SERPENT_MIN_KEY_SIZE, @@ -433,7 +431,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[2].cra_list), .cra_u = { .blkcipher = { .min_keysize = SERPENT_MIN_KEY_SIZE, @@ -454,7 +451,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[3].cra_list), .cra_exit = lrw_exit_tfm, .cra_u = { .blkcipher = { @@ -478,7 +474,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[4].cra_list), .cra_u = { .blkcipher = { .min_keysize = SERPENT_MIN_KEY_SIZE * 2, @@ -499,7 +494,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[5].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { @@ -521,7 +515,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[6].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { @@ -544,7 +537,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[7].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { @@ -568,7 +560,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[8].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { @@ -593,7 +584,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[9].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S index 35f45574390d..1585abb13dde 100644 --- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S @@ -4,6 +4,8 @@ * Copyright (C) 2012 Johannes Goetzfried * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> * + * Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -47,16 +49,22 @@ #define RC2 %xmm6 #define RD2 %xmm7 -#define RX %xmm8 -#define RY %xmm9 +#define RX0 %xmm8 +#define RY0 %xmm9 + +#define RX1 %xmm10 +#define RY1 %xmm11 -#define RK1 %xmm10 -#define RK2 %xmm11 +#define RK1 %xmm12 +#define RK2 %xmm13 -#define RID1 %rax -#define RID1b %al -#define RID2 %rbx -#define RID2b %bl +#define RT %xmm14 +#define RR %xmm15 + +#define RID1 %rbp +#define RID1d %ebp +#define RID2 %rsi +#define RID2d %esi #define RGI1 %rdx #define RGI1bl %dl @@ -65,6 +73,13 @@ #define RGI2bl %cl #define RGI2bh %ch +#define RGI3 %rax +#define RGI3bl %al +#define RGI3bh %ah +#define RGI4 %rbx +#define RGI4bl %bl +#define RGI4bh %bh + #define RGS1 %r8 #define RGS1d %r8d #define RGS2 %r9 @@ -73,89 +88,123 @@ #define RGS3d %r10d -#define lookup_32bit(t0, t1, t2, t3, src, dst) \ - movb src ## bl, RID1b; \ - movb src ## bh, RID2b; \ - movl t0(CTX, RID1, 4), dst ## d; \ - xorl t1(CTX, RID2, 4), dst ## d; \ +#define lookup_32bit(t0, t1, t2, t3, src, dst, interleave_op, il_reg) \ + movzbl src ## bl, RID1d; \ + movzbl src ## bh, RID2d; \ shrq $16, src; \ - movb src ## bl, RID1b; \ - movb src ## bh, RID2b; \ + movl t0(CTX, RID1, 4), dst ## d; \ + movl t1(CTX, RID2, 4), RID2d; \ + movzbl src ## bl, RID1d; \ + xorl RID2d, dst ## d; \ + movzbl src ## bh, RID2d; \ + interleave_op(il_reg); \ xorl t2(CTX, RID1, 4), dst ## d; \ xorl t3(CTX, RID2, 4), dst ## d; -#define G(a, x, t0, t1, t2, t3) \ - vmovq a, RGI1; \ - vpsrldq $8, a, x; \ - vmovq x, RGI2; \ +#define dummy(d) /* do nothing */ + +#define shr_next(reg) \ + shrq $16, reg; + +#define G(gi1, gi2, x, t0, t1, t2, t3) \ + lookup_32bit(t0, t1, t2, t3, ##gi1, RGS1, shr_next, ##gi1); \ + lookup_32bit(t0, t1, t2, t3, ##gi2, RGS3, shr_next, ##gi2); \ + \ + lookup_32bit(t0, t1, t2, t3, ##gi1, RGS2, dummy, none); \ + shlq $32, RGS2; \ + orq RGS1, RGS2; \ + lookup_32bit(t0, t1, t2, t3, ##gi2, RGS1, dummy, none); \ + shlq $32, RGS1; \ + orq RGS1, RGS3; + +#define round_head_2(a, b, x1, y1, x2, y2) \ + vmovq b ## 1, RGI3; \ + vpextrq $1, b ## 1, RGI4; \ \ - lookup_32bit(t0, t1, t2, t3, RGI1, RGS1); \ - shrq $16, RGI1; \ - lookup_32bit(t0, t1, t2, t3, RGI1, RGS2); \ - shlq $32, RGS2; \ - orq RGS1, RGS2; \ + G(RGI1, RGI2, x1, s0, s1, s2, s3); \ + vmovq a ## 2, RGI1; \ + vpextrq $1, a ## 2, RGI2; \ + vmovq RGS2, x1; \ + vpinsrq $1, RGS3, x1, x1; \ \ - lookup_32bit(t0, t1, t2, t3, RGI2, RGS1); \ - shrq $16, RGI2; \ - lookup_32bit(t0, t1, t2, t3, RGI2, RGS3); \ - shlq $32, RGS3; \ - orq RGS1, RGS3; \ + G(RGI3, RGI4, y1, s1, s2, s3, s0); \ + vmovq b ## 2, RGI3; \ + vpextrq $1, b ## 2, RGI4; \ + vmovq RGS2, y1; \ + vpinsrq $1, RGS3, y1, y1; \ \ - vmovq RGS2, x; \ - vpinsrq $1, RGS3, x, x; + G(RGI1, RGI2, x2, s0, s1, s2, s3); \ + vmovq RGS2, x2; \ + vpinsrq $1, RGS3, x2, x2; \ + \ + G(RGI3, RGI4, y2, s1, s2, s3, s0); \ + vmovq RGS2, y2; \ + vpinsrq $1, RGS3, y2, y2; -#define encround(a, b, c, d, x, y) \ - G(a, x, s0, s1, s2, s3); \ - G(b, y, s1, s2, s3, s0); \ +#define encround_tail(a, b, c, d, x, y, prerotate) \ vpaddd x, y, x; \ + vpaddd x, RK1, RT;\ + prerotate(b); \ + vpxor RT, c, c; \ vpaddd y, x, y; \ - vpaddd x, RK1, x; \ vpaddd y, RK2, y; \ - vpxor x, c, c; \ - vpsrld $1, c, x; \ + vpsrld $1, c, RT; \ vpslld $(32 - 1), c, c; \ - vpor c, x, c; \ - vpslld $1, d, x; \ - vpsrld $(32 - 1), d, d; \ - vpor d, x, d; \ - vpxor d, y, d; - -#define decround(a, b, c, d, x, y) \ - G(a, x, s0, s1, s2, s3); \ - G(b, y, s1, s2, s3, s0); \ + vpor c, RT, c; \ + vpxor d, y, d; \ + +#define decround_tail(a, b, c, d, x, y, prerotate) \ vpaddd x, y, x; \ + vpaddd x, RK1, RT;\ + prerotate(a); \ + vpxor RT, c, c; \ vpaddd y, x, y; \ vpaddd y, RK2, y; \ vpxor d, y, d; \ vpsrld $1, d, y; \ vpslld $(32 - 1), d, d; \ vpor d, y, d; \ - vpslld $1, c, y; \ - vpsrld $(32 - 1), c, c; \ - vpor c, y, c; \ - vpaddd x, RK1, x; \ - vpxor x, c, c; - -#define encrypt_round(n, a, b, c, d) \ - vbroadcastss (k+4*(2*(n)))(CTX), RK1; \ - vbroadcastss (k+4*(2*(n)+1))(CTX), RK2; \ - encround(a ## 1, b ## 1, c ## 1, d ## 1, RX, RY); \ - encround(a ## 2, b ## 2, c ## 2, d ## 2, RX, RY); - -#define decrypt_round(n, a, b, c, d) \ - vbroadcastss (k+4*(2*(n)))(CTX), RK1; \ - vbroadcastss (k+4*(2*(n)+1))(CTX), RK2; \ - decround(a ## 1, b ## 1, c ## 1, d ## 1, RX, RY); \ - decround(a ## 2, b ## 2, c ## 2, d ## 2, RX, RY); + +#define rotate_1l(x) \ + vpslld $1, x, RR; \ + vpsrld $(32 - 1), x, x; \ + vpor x, RR, x; + +#define preload_rgi(c) \ + vmovq c, RGI1; \ + vpextrq $1, c, RGI2; + +#define encrypt_round(n, a, b, c, d, preload, prerotate) \ + vbroadcastss (k+4*(2*(n)))(CTX), RK1; \ + vbroadcastss (k+4*(2*(n)+1))(CTX), RK2; \ + round_head_2(a, b, RX0, RY0, RX1, RY1); \ + encround_tail(a ## 1, b ## 1, c ## 1, d ## 1, RX0, RY0, prerotate); \ + preload(c ## 1); \ + encround_tail(a ## 2, b ## 2, c ## 2, d ## 2, RX1, RY1, prerotate); + +#define decrypt_round(n, a, b, c, d, preload, prerotate) \ + vbroadcastss (k+4*(2*(n)))(CTX), RK1; \ + vbroadcastss (k+4*(2*(n)+1))(CTX), RK2; \ + round_head_2(a, b, RX0, RY0, RX1, RY1); \ + decround_tail(a ## 1, b ## 1, c ## 1, d ## 1, RX0, RY0, prerotate); \ + preload(c ## 1); \ + decround_tail(a ## 2, b ## 2, c ## 2, d ## 2, RX1, RY1, prerotate); #define encrypt_cycle(n) \ - encrypt_round((2*n), RA, RB, RC, RD); \ - encrypt_round(((2*n) + 1), RC, RD, RA, RB); + encrypt_round((2*n), RA, RB, RC, RD, preload_rgi, rotate_1l); \ + encrypt_round(((2*n) + 1), RC, RD, RA, RB, preload_rgi, rotate_1l); + +#define encrypt_cycle_last(n) \ + encrypt_round((2*n), RA, RB, RC, RD, preload_rgi, rotate_1l); \ + encrypt_round(((2*n) + 1), RC, RD, RA, RB, dummy, dummy); #define decrypt_cycle(n) \ - decrypt_round(((2*n) + 1), RC, RD, RA, RB); \ - decrypt_round((2*n), RA, RB, RC, RD); + decrypt_round(((2*n) + 1), RC, RD, RA, RB, preload_rgi, rotate_1l); \ + decrypt_round((2*n), RA, RB, RC, RD, preload_rgi, rotate_1l); +#define decrypt_cycle_last(n) \ + decrypt_round(((2*n) + 1), RC, RD, RA, RB, preload_rgi, rotate_1l); \ + decrypt_round((2*n), RA, RB, RC, RD, dummy, dummy); #define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ vpunpckldq x1, x0, t0; \ @@ -216,17 +265,20 @@ __twofish_enc_blk_8way: * %rcx: bool, if true: xor output */ + pushq %rbp; pushq %rbx; pushq %rcx; vmovdqu w(CTX), RK1; leaq (4*4*4)(%rdx), %rax; - inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RK1, RX, RY, RK2); - inpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX, RY, RK2); + inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2); + preload_rgi(RA1); + rotate_1l(RD1); + inpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2); + rotate_1l(RD2); - xorq RID1, RID1; - xorq RID2, RID2; + movq %rsi, %r11; encrypt_cycle(0); encrypt_cycle(1); @@ -235,26 +287,27 @@ __twofish_enc_blk_8way: encrypt_cycle(4); encrypt_cycle(5); encrypt_cycle(6); - encrypt_cycle(7); + encrypt_cycle_last(7); vmovdqu (w+4*4)(CTX), RK1; popq %rcx; popq %rbx; + popq %rbp; - leaq (4*4*4)(%rsi), %rax; + leaq (4*4*4)(%r11), %rax; testb %cl, %cl; jnz __enc_xor8; - outunpack_blocks(%rsi, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2); - outunpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2); + outunpack_blocks(%r11, RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2); + outunpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2); ret; __enc_xor8: - outunpack_xor_blocks(%rsi, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2); - outunpack_xor_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2); + outunpack_xor_blocks(%r11, RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2); + outunpack_xor_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2); ret; @@ -269,16 +322,19 @@ twofish_dec_blk_8way: * %rdx: src */ + pushq %rbp; pushq %rbx; vmovdqu (w+4*4)(CTX), RK1; leaq (4*4*4)(%rdx), %rax; - inpack_blocks(%rdx, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2); - inpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2); + inpack_blocks(%rdx, RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2); + preload_rgi(RC1); + rotate_1l(RA1); + inpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2); + rotate_1l(RA2); - xorq RID1, RID1; - xorq RID2, RID2; + movq %rsi, %r11; decrypt_cycle(7); decrypt_cycle(6); @@ -287,14 +343,15 @@ twofish_dec_blk_8way: decrypt_cycle(3); decrypt_cycle(2); decrypt_cycle(1); - decrypt_cycle(0); + decrypt_cycle_last(0); vmovdqu (w)(CTX), RK1; popq %rbx; + popq %rbp; - leaq (4*4*4)(%rsi), %rax; - outunpack_blocks(%rsi, RA1, RB1, RC1, RD1, RK1, RX, RY, RK2); - outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX, RY, RK2); + leaq (4*4*4)(%r11), %rax; + outunpack_blocks(%r11, RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2); + outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2); ret; diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index 782b67ddaf6a..e7708b5442e0 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c @@ -378,7 +378,6 @@ static struct crypto_alg twofish_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(twofish_algs[0].cra_list), .cra_u = { .blkcipher = { .min_keysize = TF_MIN_KEY_SIZE, @@ -398,7 +397,6 @@ static struct crypto_alg twofish_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(twofish_algs[1].cra_list), .cra_u = { .blkcipher = { .min_keysize = TF_MIN_KEY_SIZE, @@ -418,7 +416,6 @@ static struct crypto_alg twofish_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(twofish_algs[2].cra_list), .cra_u = { .blkcipher = { .min_keysize = TF_MIN_KEY_SIZE, @@ -439,7 +436,6 @@ static struct crypto_alg twofish_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(twofish_algs[3].cra_list), .cra_exit = lrw_twofish_exit_tfm, .cra_u = { .blkcipher = { @@ -463,7 +459,6 @@ static struct crypto_alg twofish_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(twofish_algs[4].cra_list), .cra_u = { .blkcipher = { .min_keysize = TF_MIN_KEY_SIZE * 2, @@ -484,7 +479,6 @@ static struct crypto_alg twofish_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(twofish_algs[5].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { @@ -506,7 +500,6 @@ static struct crypto_alg twofish_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(twofish_algs[6].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { @@ -529,7 +522,6 @@ static struct crypto_alg twofish_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(twofish_algs[7].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { @@ -553,7 +545,6 @@ static struct crypto_alg twofish_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(twofish_algs[8].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { @@ -578,7 +569,6 @@ static struct crypto_alg twofish_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(twofish_algs[9].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { diff --git a/arch/x86/crypto/twofish_glue.c b/arch/x86/crypto/twofish_glue.c index 359ae084275c..0a5202303501 100644 --- a/arch/x86/crypto/twofish_glue.c +++ b/arch/x86/crypto/twofish_glue.c @@ -70,7 +70,6 @@ static struct crypto_alg alg = { .cra_ctxsize = sizeof(struct twofish_ctx), .cra_alignmask = 0, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(alg.cra_list), .cra_u = { .cipher = { .cia_min_keysize = TF_MIN_KEY_SIZE, diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c index 15f9347316c8..aa3eb358b7e8 100644 --- a/arch/x86/crypto/twofish_glue_3way.c +++ b/arch/x86/crypto/twofish_glue_3way.c @@ -342,7 +342,6 @@ static struct crypto_alg tf_algs[5] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(tf_algs[0].cra_list), .cra_u = { .blkcipher = { .min_keysize = TF_MIN_KEY_SIZE, @@ -362,7 +361,6 @@ static struct crypto_alg tf_algs[5] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(tf_algs[1].cra_list), .cra_u = { .blkcipher = { .min_keysize = TF_MIN_KEY_SIZE, @@ -383,7 +381,6 @@ static struct crypto_alg tf_algs[5] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(tf_algs[2].cra_list), .cra_u = { .blkcipher = { .min_keysize = TF_MIN_KEY_SIZE, @@ -404,7 +401,6 @@ static struct crypto_alg tf_algs[5] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(tf_algs[3].cra_list), .cra_exit = lrw_twofish_exit_tfm, .cra_u = { .blkcipher = { @@ -426,7 +422,6 @@ static struct crypto_alg tf_algs[5] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(tf_algs[4].cra_list), .cra_u = { .blkcipher = { .min_keysize = TF_MIN_KEY_SIZE * 2, diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 673ac9b63d6b..efc6a958b71d 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -32,6 +32,7 @@ #include <asm/sigframe.h> #include <asm/sighandling.h> #include <asm/sys_ia32.h> +#include <asm/smap.h> #define FIX_EFLAGS __FIX_EFLAGS @@ -162,7 +163,8 @@ asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr, } seg = get_fs(); set_fs(KERNEL_DS); - ret = do_sigaltstack(uss_ptr ? &uss : NULL, &uoss, regs->sp); + ret = do_sigaltstack((stack_t __force __user *) (uss_ptr ? &uss : NULL), + (stack_t __force __user *) &uoss, regs->sp); set_fs(seg); if (ret >= 0 && uoss_ptr) { if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(stack_ia32_t))) @@ -250,11 +252,12 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, get_user_ex(tmp, &sc->fpstate); buf = compat_ptr(tmp); - err |= restore_i387_xstate_ia32(buf); get_user_ex(*pax, &sc->ax); } get_user_catch(err); + err |= restore_xstate_sig(buf, 1); + return err; } @@ -361,7 +364,7 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, */ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, - void **fpstate) + void __user **fpstate) { unsigned long sp; @@ -381,9 +384,12 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, sp = (unsigned long) ka->sa.sa_restorer; if (used_math()) { - sp = sp - sig_xstate_ia32_size; - *fpstate = (struct _fpstate_ia32 *) sp; - if (save_i387_xstate_ia32(*fpstate) < 0) + unsigned long fx_aligned, math_size; + + sp = alloc_mathframe(sp, 1, &fx_aligned, &math_size); + *fpstate = (struct _fpstate_ia32 __user *) sp; + if (save_xstate_sig(*fpstate, (void __user *)fx_aligned, + math_size) < 0) return (void __user *) -1L; } @@ -448,7 +454,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, * These are actually not used anymore, but left because some * gdb versions depend on them as a marker. */ - put_user_ex(*((u64 *)&code), (u64 *)frame->retcode); + put_user_ex(*((u64 *)&code), (u64 __user *)frame->retcode); } put_user_catch(err); if (err) @@ -502,7 +508,6 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, put_user_ex(sig, &frame->sig); put_user_ex(ptr_to_compat(&frame->info), &frame->pinfo); put_user_ex(ptr_to_compat(&frame->uc), &frame->puc); - err |= copy_siginfo_to_user32(&frame->info, info); /* Create the ucontext. */ if (cpu_has_xsave) @@ -514,9 +519,6 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, put_user_ex(sas_ss_flags(regs->sp), &frame->uc.uc_stack.ss_flags); put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); - err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate, - regs, set->sig[0]); - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); if (ka->sa.sa_flags & SA_RESTORER) restorer = ka->sa.sa_restorer; @@ -529,9 +531,14 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, * Not actually used anymore, but left because some gdb * versions need it. */ - put_user_ex(*((u64 *)&code), (u64 *)frame->retcode); + put_user_ex(*((u64 *)&code), (u64 __user *)frame->retcode); } put_user_catch(err); + err |= copy_siginfo_to_user32(&frame->info, info); + err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate, + regs, set->sig[0]); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + if (err) return -EFAULT; diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 20e5f7ba0e6b..9c289504e680 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -14,6 +14,7 @@ #include <asm/segment.h> #include <asm/irqflags.h> #include <asm/asm.h> +#include <asm/smap.h> #include <linux/linkage.h> #include <linux/err.h> @@ -146,8 +147,10 @@ ENTRY(ia32_sysenter_target) SAVE_ARGS 0,1,0 /* no need to do an access_ok check here because rbp has been 32bit zero extended */ + ASM_STAC 1: movl (%rbp),%ebp _ASM_EXTABLE(1b,ia32_badarg) + ASM_CLAC orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) CFI_REMEMBER_STATE @@ -301,8 +304,10 @@ ENTRY(ia32_cstar_target) /* no need to do an access_ok check here because r8 has been 32bit zero extended */ /* hardware stack frame is complete now */ + ASM_STAC 1: movl (%r8),%r9d _ASM_EXTABLE(1b,ia32_badarg) + ASM_CLAC orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) CFI_REMEMBER_STATE @@ -365,6 +370,7 @@ cstar_tracesys: END(ia32_cstar_target) ia32_badarg: + ASM_CLAC movq $-EFAULT,%rax jmp ia32_sysret CFI_ENDPROC diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 4540bece0946..c5b938d92eab 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -287,7 +287,7 @@ asmlinkage long sys32_sigaction(int sig, struct old_sigaction32 __user *act, return ret; } -asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr, +asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int __user *stat_addr, int options) { return compat_sys_wait4(pid, stat_addr, options, NULL); diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index f9c0d3ba9e84..1595d6813432 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -22,7 +22,3 @@ header-y += sigcontext32.h header-y += ucontext.h header-y += vm86.h header-y += vsyscall.h - -genhdr-y += unistd_32.h -genhdr-y += unistd_64.h -genhdr-y += unistd_x32.h diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index 952bd0100c5c..372231c22a47 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h @@ -1,3 +1,6 @@ +#ifndef _ASM_X86_ALTERNATIVE_ASM_H +#define _ASM_X86_ALTERNATIVE_ASM_H + #ifdef __ASSEMBLY__ #include <asm/asm.h> @@ -5,10 +8,10 @@ #ifdef CONFIG_SMP .macro LOCK_PREFIX 672: lock - .section .smp_locks,"a" + .pushsection .smp_locks,"a" .balign 4 .long 672b - . - .previous + .popsection .endm #else .macro LOCK_PREFIX @@ -24,3 +27,5 @@ .endm #endif /* __ASSEMBLY__ */ + +#endif /* _ASM_X86_ALTERNATIVE_ASM_H */ diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 70780689599a..58ed6d96a6ac 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -29,10 +29,10 @@ #ifdef CONFIG_SMP #define LOCK_PREFIX_HERE \ - ".section .smp_locks,\"a\"\n" \ - ".balign 4\n" \ - ".long 671f - .\n" /* offset */ \ - ".previous\n" \ + ".pushsection .smp_locks,\"a\"\n" \ + ".balign 4\n" \ + ".long 671f - .\n" /* offset */ \ + ".popsection\n" \ "671:" #define LOCK_PREFIX LOCK_PREFIX_HERE "\n\tlock; " @@ -60,7 +60,7 @@ extern void alternatives_smp_module_add(struct module *mod, char *name, void *locks, void *locks_end, void *text, void *text_end); extern void alternatives_smp_module_del(struct module *mod); -extern void alternatives_smp_switch(int smp); +extern void alternatives_enable_smp(void); extern int alternatives_text_reserved(void *start, void *end); extern bool skip_smp_alternatives; #else @@ -68,7 +68,7 @@ static inline void alternatives_smp_module_add(struct module *mod, char *name, void *locks, void *locks_end, void *text, void *text_end) {} static inline void alternatives_smp_module_del(struct module *mod) {} -static inline void alternatives_smp_switch(int smp) {} +static inline void alternatives_enable_smp(void) {} static inline int alternatives_text_reserved(void *start, void *end) { return 0; @@ -99,30 +99,30 @@ static inline int alternatives_text_reserved(void *start, void *end) /* alternative assembly primitive: */ #define ALTERNATIVE(oldinstr, newinstr, feature) \ OLDINSTR(oldinstr) \ - ".section .altinstructions,\"a\"\n" \ + ".pushsection .altinstructions,\"a\"\n" \ ALTINSTR_ENTRY(feature, 1) \ - ".previous\n" \ - ".section .discard,\"aw\",@progbits\n" \ + ".popsection\n" \ + ".pushsection .discard,\"aw\",@progbits\n" \ DISCARD_ENTRY(1) \ - ".previous\n" \ - ".section .altinstr_replacement, \"ax\"\n" \ + ".popsection\n" \ + ".pushsection .altinstr_replacement, \"ax\"\n" \ ALTINSTR_REPLACEMENT(newinstr, feature, 1) \ - ".previous" + ".popsection" #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\ OLDINSTR(oldinstr) \ - ".section .altinstructions,\"a\"\n" \ + ".pushsection .altinstructions,\"a\"\n" \ ALTINSTR_ENTRY(feature1, 1) \ ALTINSTR_ENTRY(feature2, 2) \ - ".previous\n" \ - ".section .discard,\"aw\",@progbits\n" \ + ".popsection\n" \ + ".pushsection .discard,\"aw\",@progbits\n" \ DISCARD_ENTRY(1) \ DISCARD_ENTRY(2) \ - ".previous\n" \ - ".section .altinstr_replacement, \"ax\"\n" \ + ".popsection\n" \ + ".pushsection .altinstr_replacement, \"ax\"\n" \ ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \ ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \ - ".previous" + ".popsection" /* * This must be included *after* the definition of ALTERNATIVE due to diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 58cb6d4085f7..250b8774c158 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -309,9 +309,9 @@ static inline void atomic_or_long(unsigned long *v1, unsigned long v2) #define smp_mb__after_atomic_inc() barrier() #ifdef CONFIG_X86_32 -# include "atomic64_32.h" +# include <asm/atomic64_32.h> #else -# include "atomic64_64.h" +# include <asm/atomic64_64.h> #endif #endif /* _ASM_X86_ATOMIC_H */ diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 72f5009deb5a..6dfd0195bb55 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -355,7 +355,7 @@ static int test_bit(int nr, const volatile unsigned long *addr); */ static inline unsigned long __ffs(unsigned long word) { - asm("bsf %1,%0" + asm("rep; bsf %1,%0" : "=r" (word) : "rm" (word)); return word; @@ -369,7 +369,7 @@ static inline unsigned long __ffs(unsigned long word) */ static inline unsigned long ffz(unsigned long word) { - asm("bsf %1,%0" + asm("rep; bsf %1,%0" : "=r" (word) : "r" (~word)); return word; @@ -417,10 +417,9 @@ static inline int ffs(int x) * We cannot do this on 32 bits because at the very least some * 486 CPUs did not behave this way. */ - long tmp = -1; asm("bsfl %1,%0" : "=r" (r) - : "rm" (x), "0" (tmp)); + : "rm" (x), "0" (-1)); #elif defined(CONFIG_X86_CMOV) asm("bsfl %1,%0\n\t" "cmovzl %2,%0" @@ -459,10 +458,9 @@ static inline int fls(int x) * We cannot do this on 32 bits because at the very least some * 486 CPUs did not behave this way. */ - long tmp = -1; asm("bsrl %1,%0" : "=r" (r) - : "rm" (x), "0" (tmp)); + : "rm" (x), "0" (-1)); #elif defined(CONFIG_X86_CMOV) asm("bsrl %1,%0\n\t" "cmovzl %2,%0" @@ -490,13 +488,13 @@ static inline int fls(int x) #ifdef CONFIG_X86_64 static __always_inline int fls64(__u64 x) { - long bitpos = -1; + int bitpos = -1; /* * AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the * dest reg is undefined if x==0, but their CPU architect says its * value is written to set it to the same as before. */ - asm("bsrq %1,%0" + asm("bsrq %1,%q0" : "+r" (bitpos) : "rm" (x)); return bitpos + 1; diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h index a9e3a740f697..0fa675033912 100644 --- a/arch/x86/include/asm/calling.h +++ b/arch/x86/include/asm/calling.h @@ -46,41 +46,39 @@ For 32-bit we have the following conventions - kernel is built with */ -#include "dwarf2.h" +#include <asm/dwarf2.h> /* - * 64-bit system call stack frame layout defines and helpers, for - * assembly code (note that the seemingly unnecessary parentheses - * are to prevent cpp from inserting spaces in expressions that get - * passed to macros): + * 64-bit system call stack frame layout defines and helpers, + * for assembly code: */ -#define R15 (0) -#define R14 (8) -#define R13 (16) -#define R12 (24) -#define RBP (32) -#define RBX (40) +#define R15 0 +#define R14 8 +#define R13 16 +#define R12 24 +#define RBP 32 +#define RBX 40 /* arguments: interrupts/non tracing syscalls only save up to here: */ -#define R11 (48) -#define R10 (56) -#define R9 (64) -#define R8 (72) -#define RAX (80) -#define RCX (88) -#define RDX (96) -#define RSI (104) -#define RDI (112) -#define ORIG_RAX (120) /* + error_code */ +#define R11 48 +#define R10 56 +#define R9 64 +#define R8 72 +#define RAX 80 +#define RCX 88 +#define RDX 96 +#define RSI 104 +#define RDI 112 +#define ORIG_RAX 120 /* + error_code */ /* end of arguments */ /* cpu exception frame or undefined in case of fast syscall: */ -#define RIP (128) -#define CS (136) -#define EFLAGS (144) -#define RSP (152) -#define SS (160) +#define RIP 128 +#define CS 136 +#define EFLAGS 144 +#define RSP 152 +#define SS 160 #define ARGOFFSET R11 #define SWFRAME ORIG_RAX diff --git a/arch/x86/include/asm/checksum.h b/arch/x86/include/asm/checksum.h index 848850fd7d62..5f5bb0f97361 100644 --- a/arch/x86/include/asm/checksum.h +++ b/arch/x86/include/asm/checksum.h @@ -1,5 +1,5 @@ #ifdef CONFIG_X86_32 -# include "checksum_32.h" +# include <asm/checksum_32.h> #else -# include "checksum_64.h" +# include <asm/checksum_64.h> #endif diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h index 99480e55973d..8d871eaddb66 100644 --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h @@ -138,9 +138,9 @@ extern void __add_wrong_size(void) __raw_cmpxchg((ptr), (old), (new), (size), "") #ifdef CONFIG_X86_32 -# include "cmpxchg_32.h" +# include <asm/cmpxchg_32.h> #else -# include "cmpxchg_64.h" +# include <asm/cmpxchg_64.h> #endif #ifdef __HAVE_ARCH_CMPXCHG diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 6b7ee5ff6820..8c297aa53eef 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -4,7 +4,9 @@ #ifndef _ASM_X86_CPUFEATURE_H #define _ASM_X86_CPUFEATURE_H +#ifndef _ASM_X86_REQUIRED_FEATURES_H #include <asm/required-features.h> +#endif #define NCAPINTS 10 /* N 32-bit words worth of info */ @@ -97,6 +99,7 @@ #define X86_FEATURE_EXTD_APICID (3*32+26) /* has extended APICID (8 bits) */ #define X86_FEATURE_AMD_DCM (3*32+27) /* multi-node processor */ #define X86_FEATURE_APERFMPERF (3*32+28) /* APERFMPERF */ +#define X86_FEATURE_EAGER_FPU (3*32+29) /* "eagerfpu" Non lazy FPU restore */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ @@ -209,6 +212,7 @@ #define X86_FEATURE_RTM (9*32+11) /* Restricted Transactional Memory */ #define X86_FEATURE_RDSEED (9*32+18) /* The RDSEED instruction */ #define X86_FEATURE_ADX (9*32+19) /* The ADCX and ADOX instructions */ +#define X86_FEATURE_SMAP (9*32+20) /* Supervisor Mode Access Prevention */ #if defined(__KERNEL__) && !defined(__ASSEMBLY__) @@ -299,12 +303,14 @@ extern const char * const x86_power_flags[32]; #define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2) #define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) +#define cpu_has_xsaveopt boot_cpu_has(X86_FEATURE_XSAVEOPT) #define cpu_has_osxsave boot_cpu_has(X86_FEATURE_OSXSAVE) #define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) #define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ) #define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE) #define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8) #define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16) +#define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU) #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) # define cpu_has_invlpg 1 diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 75f4c6d6a331..831dbb9c6c02 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -12,6 +12,7 @@ #include <linux/kernel_stat.h> #include <linux/regset.h> +#include <linux/compat.h> #include <linux/slab.h> #include <asm/asm.h> #include <asm/cpufeature.h> @@ -20,43 +21,76 @@ #include <asm/user.h> #include <asm/uaccess.h> #include <asm/xsave.h> +#include <asm/smap.h> -extern unsigned int sig_xstate_size; +#ifdef CONFIG_X86_64 +# include <asm/sigcontext32.h> +# include <asm/user32.h> +int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, + compat_sigset_t *set, struct pt_regs *regs); +int ia32_setup_frame(int sig, struct k_sigaction *ka, + compat_sigset_t *set, struct pt_regs *regs); +#else +# define user_i387_ia32_struct user_i387_struct +# define user32_fxsr_struct user_fxsr_struct +# define ia32_setup_frame __setup_frame +# define ia32_setup_rt_frame __setup_rt_frame +#endif + +extern unsigned int mxcsr_feature_mask; extern void fpu_init(void); +extern void eager_fpu_init(void); DECLARE_PER_CPU(struct task_struct *, fpu_owner_task); +extern void convert_from_fxsr(struct user_i387_ia32_struct *env, + struct task_struct *tsk); +extern void convert_to_fxsr(struct task_struct *tsk, + const struct user_i387_ia32_struct *env); + extern user_regset_active_fn fpregs_active, xfpregs_active; extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get, xstateregs_get; extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set, xstateregs_set; - /* * xstateregs_active == fpregs_active. Please refer to the comment * at the definition of fpregs_active. */ #define xstateregs_active fpregs_active -extern struct _fpx_sw_bytes fx_sw_reserved; -#ifdef CONFIG_IA32_EMULATION -extern unsigned int sig_xstate_ia32_size; -extern struct _fpx_sw_bytes fx_sw_reserved_ia32; -struct _fpstate_ia32; -struct _xstate_ia32; -extern int save_i387_xstate_ia32(void __user *buf); -extern int restore_i387_xstate_ia32(void __user *buf); -#endif - #ifdef CONFIG_MATH_EMULATION +# define HAVE_HWFP (boot_cpu_data.hard_math) extern void finit_soft_fpu(struct i387_soft_struct *soft); #else +# define HAVE_HWFP 1 static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} #endif +static inline int is_ia32_compat_frame(void) +{ + return config_enabled(CONFIG_IA32_EMULATION) && + test_thread_flag(TIF_IA32); +} + +static inline int is_ia32_frame(void) +{ + return config_enabled(CONFIG_X86_32) || is_ia32_compat_frame(); +} + +static inline int is_x32_frame(void) +{ + return config_enabled(CONFIG_X86_X32_ABI) && test_thread_flag(TIF_X32); +} + #define X87_FSW_ES (1 << 7) /* Exception Summary */ +static __always_inline __pure bool use_eager_fpu(void) +{ + return static_cpu_has(X86_FEATURE_EAGER_FPU); +} + static __always_inline __pure bool use_xsaveopt(void) { return static_cpu_has(X86_FEATURE_XSAVEOPT); @@ -72,6 +106,13 @@ static __always_inline __pure bool use_fxsr(void) return static_cpu_has(X86_FEATURE_FXSR); } +static inline void fx_finit(struct i387_fxsave_struct *fx) +{ + memset(fx, 0, xstate_size); + fx->cwd = 0x37f; + fx->mxcsr = MXCSR_DEFAULT; +} + extern void __sanitize_i387_state(struct task_struct *); static inline void sanitize_i387_state(struct task_struct *tsk) @@ -81,131 +122,121 @@ static inline void sanitize_i387_state(struct task_struct *tsk) __sanitize_i387_state(tsk); } -#ifdef CONFIG_X86_64 -static inline int fxrstor_checking(struct i387_fxsave_struct *fx) -{ - int err; - - /* See comment in fxsave() below. */ -#ifdef CONFIG_AS_FXSAVEQ - asm volatile("1: fxrstorq %[fx]\n\t" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl $-1,%[err]\n" - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) - : [err] "=r" (err) - : [fx] "m" (*fx), "0" (0)); -#else - asm volatile("1: rex64/fxrstor (%[fx])\n\t" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl $-1,%[err]\n" - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) - : [err] "=r" (err) - : [fx] "R" (fx), "m" (*fx), "0" (0)); -#endif - return err; +#define user_insn(insn, output, input...) \ +({ \ + int err; \ + asm volatile(ASM_STAC "\n" \ + "1:" #insn "\n\t" \ + "2: " ASM_CLAC "\n" \ + ".section .fixup,\"ax\"\n" \ + "3: movl $-1,%[err]\n" \ + " jmp 2b\n" \ + ".previous\n" \ + _ASM_EXTABLE(1b, 3b) \ + : [err] "=r" (err), output \ + : "0"(0), input); \ + err; \ +}) + +#define check_insn(insn, output, input...) \ +({ \ + int err; \ + asm volatile("1:" #insn "\n\t" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: movl $-1,%[err]\n" \ + " jmp 2b\n" \ + ".previous\n" \ + _ASM_EXTABLE(1b, 3b) \ + : [err] "=r" (err), output \ + : "0"(0), input); \ + err; \ +}) + +static inline int fsave_user(struct i387_fsave_struct __user *fx) +{ + return user_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx)); } static inline int fxsave_user(struct i387_fxsave_struct __user *fx) { - int err; + if (config_enabled(CONFIG_X86_32)) + return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx)); + else if (config_enabled(CONFIG_AS_FXSAVEQ)) + return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx)); - /* - * Clear the bytes not touched by the fxsave and reserved - * for the SW usage. - */ - err = __clear_user(&fx->sw_reserved, - sizeof(struct _fpx_sw_bytes)); - if (unlikely(err)) - return -EFAULT; - - /* See comment in fxsave() below. */ -#ifdef CONFIG_AS_FXSAVEQ - asm volatile("1: fxsaveq %[fx]\n\t" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl $-1,%[err]\n" - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) - : [err] "=r" (err), [fx] "=m" (*fx) - : "0" (0)); -#else - asm volatile("1: rex64/fxsave (%[fx])\n\t" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl $-1,%[err]\n" - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) - : [err] "=r" (err), "=m" (*fx) - : [fx] "R" (fx), "0" (0)); -#endif - if (unlikely(err) && - __clear_user(fx, sizeof(struct i387_fxsave_struct))) - err = -EFAULT; - /* No need to clear here because the caller clears USED_MATH */ - return err; + /* See comment in fpu_fxsave() below. */ + return user_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx)); } -static inline void fpu_fxsave(struct fpu *fpu) +static inline int fxrstor_checking(struct i387_fxsave_struct *fx) { - /* Using "rex64; fxsave %0" is broken because, if the memory operand - uses any extended registers for addressing, a second REX prefix - will be generated (to the assembler, rex64 followed by semicolon - is a separate instruction), and hence the 64-bitness is lost. */ + if (config_enabled(CONFIG_X86_32)) + return check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); + else if (config_enabled(CONFIG_AS_FXSAVEQ)) + return check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); -#ifdef CONFIG_AS_FXSAVEQ - /* Using "fxsaveq %0" would be the ideal choice, but is only supported - starting with gas 2.16. */ - __asm__ __volatile__("fxsaveq %0" - : "=m" (fpu->state->fxsave)); -#else - /* Using, as a workaround, the properly prefixed form below isn't - accepted by any binutils version so far released, complaining that - the same type of prefix is used twice if an extended register is - needed for addressing (fix submitted to mainline 2005-11-21). - asm volatile("rex64/fxsave %0" - : "=m" (fpu->state->fxsave)); - This, however, we can work around by forcing the compiler to select - an addressing mode that doesn't require extended registers. */ - asm volatile("rex64/fxsave (%[fx])" - : "=m" (fpu->state->fxsave) - : [fx] "R" (&fpu->state->fxsave)); -#endif + /* See comment in fpu_fxsave() below. */ + return check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), + "m" (*fx)); } -#else /* CONFIG_X86_32 */ +static inline int fxrstor_user(struct i387_fxsave_struct __user *fx) +{ + if (config_enabled(CONFIG_X86_32)) + return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); + else if (config_enabled(CONFIG_AS_FXSAVEQ)) + return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); -/* perform fxrstor iff the processor has extended states, otherwise frstor */ -static inline int fxrstor_checking(struct i387_fxsave_struct *fx) + /* See comment in fpu_fxsave() below. */ + return user_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), + "m" (*fx)); +} + +static inline int frstor_checking(struct i387_fsave_struct *fx) { - /* - * The "nop" is needed to make the instructions the same - * length. - */ - alternative_input( - "nop ; frstor %1", - "fxrstor %1", - X86_FEATURE_FXSR, - "m" (*fx)); + return check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); +} - return 0; +static inline int frstor_user(struct i387_fsave_struct __user *fx) +{ + return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); } static inline void fpu_fxsave(struct fpu *fpu) { - asm volatile("fxsave %[fx]" - : [fx] "=m" (fpu->state->fxsave)); + if (config_enabled(CONFIG_X86_32)) + asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state->fxsave)); + else if (config_enabled(CONFIG_AS_FXSAVEQ)) + asm volatile("fxsaveq %0" : "=m" (fpu->state->fxsave)); + else { + /* Using "rex64; fxsave %0" is broken because, if the memory + * operand uses any extended registers for addressing, a second + * REX prefix will be generated (to the assembler, rex64 + * followed by semicolon is a separate instruction), and hence + * the 64-bitness is lost. + * + * Using "fxsaveq %0" would be the ideal choice, but is only + * supported starting with gas 2.16. + * + * Using, as a workaround, the properly prefixed form below + * isn't accepted by any binutils version so far released, + * complaining that the same type of prefix is used twice if + * an extended register is needed for addressing (fix submitted + * to mainline 2005-11-21). + * + * asm volatile("rex64/fxsave %0" : "=m" (fpu->state->fxsave)); + * + * This, however, we can work around by forcing the compiler to + * select an addressing mode that doesn't require extended + * registers. + */ + asm volatile( "rex64/fxsave (%[fx])" + : "=m" (fpu->state->fxsave) + : [fx] "R" (&fpu->state->fxsave)); + } } -#endif /* CONFIG_X86_64 */ - /* * These must be called with preempt disabled. Returns * 'true' if the FPU state is still intact. @@ -248,17 +279,14 @@ static inline int __save_init_fpu(struct task_struct *tsk) return fpu_save_init(&tsk->thread.fpu); } -static inline int fpu_fxrstor_checking(struct fpu *fpu) -{ - return fxrstor_checking(&fpu->state->fxsave); -} - static inline int fpu_restore_checking(struct fpu *fpu) { if (use_xsave()) - return fpu_xrstor_checking(fpu); + return fpu_xrstor_checking(&fpu->state->xsave); + else if (use_fxsr()) + return fxrstor_checking(&fpu->state->fxsave); else - return fpu_fxrstor_checking(fpu); + return frstor_checking(&fpu->state->fsave); } static inline int restore_fpu_checking(struct task_struct *tsk) @@ -310,15 +338,52 @@ static inline void __thread_set_has_fpu(struct task_struct *tsk) static inline void __thread_fpu_end(struct task_struct *tsk) { __thread_clear_has_fpu(tsk); - stts(); + if (!use_eager_fpu()) + stts(); } static inline void __thread_fpu_begin(struct task_struct *tsk) { - clts(); + if (!use_eager_fpu()) + clts(); __thread_set_has_fpu(tsk); } +static inline void __drop_fpu(struct task_struct *tsk) +{ + if (__thread_has_fpu(tsk)) { + /* Ignore delayed exceptions from user space */ + asm volatile("1: fwait\n" + "2:\n" + _ASM_EXTABLE(1b, 2b)); + __thread_fpu_end(tsk); + } +} + +static inline void drop_fpu(struct task_struct *tsk) +{ + /* + * Forget coprocessor state.. + */ + preempt_disable(); + tsk->fpu_counter = 0; + __drop_fpu(tsk); + clear_used_math(); + preempt_enable(); +} + +static inline void drop_init_fpu(struct task_struct *tsk) +{ + if (!use_eager_fpu()) + drop_fpu(tsk); + else { + if (use_xsave()) + xrstor_state(init_xstate_buf, -1); + else + fxrstor_checking(&init_xstate_buf->i387); + } +} + /* * FPU state switching for scheduling. * @@ -352,7 +417,12 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta { fpu_switch_t fpu; - fpu.preload = tsk_used_math(new) && new->fpu_counter > 5; + /* + * If the task has used the math, pre-load the FPU on xsave processors + * or if the past 5 consecutive context-switches used math. + */ + fpu.preload = tsk_used_math(new) && (use_eager_fpu() || + new->fpu_counter > 5); if (__thread_has_fpu(old)) { if (!__save_init_fpu(old)) cpu = ~0; @@ -364,14 +434,14 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta new->fpu_counter++; __thread_set_has_fpu(new); prefetch(new->thread.fpu.state); - } else + } else if (!use_eager_fpu()) stts(); } else { old->fpu_counter = 0; old->thread.fpu.last_cpu = ~0; if (fpu.preload) { new->fpu_counter++; - if (fpu_lazy_restore(new, cpu)) + if (!use_eager_fpu() && fpu_lazy_restore(new, cpu)) fpu.preload = 0; else prefetch(new->thread.fpu.state); @@ -391,44 +461,40 @@ static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu) { if (fpu.preload) { if (unlikely(restore_fpu_checking(new))) - __thread_fpu_end(new); + drop_init_fpu(new); } } /* * Signal frame handlers... */ -extern int save_i387_xstate(void __user *buf); -extern int restore_i387_xstate(void __user *buf); +extern int save_xstate_sig(void __user *buf, void __user *fx, int size); +extern int __restore_xstate_sig(void __user *buf, void __user *fx, int size); -static inline void __clear_fpu(struct task_struct *tsk) +static inline int xstate_sigframe_size(void) { - if (__thread_has_fpu(tsk)) { - /* Ignore delayed exceptions from user space */ - asm volatile("1: fwait\n" - "2:\n" - _ASM_EXTABLE(1b, 2b)); - __thread_fpu_end(tsk); + return use_xsave() ? xstate_size + FP_XSTATE_MAGIC2_SIZE : xstate_size; +} + +static inline int restore_xstate_sig(void __user *buf, int ia32_frame) +{ + void __user *buf_fx = buf; + int size = xstate_sigframe_size(); + + if (ia32_frame && use_fxsr()) { + buf_fx = buf + sizeof(struct i387_fsave_struct); + size += sizeof(struct i387_fsave_struct); } + + return __restore_xstate_sig(buf, buf_fx, size); } /* - * The actual user_fpu_begin/end() functions - * need to be preemption-safe. + * Need to be preemption-safe. * - * NOTE! user_fpu_end() must be used only after you - * have saved the FP state, and user_fpu_begin() must - * be used only immediately before restoring it. - * These functions do not do any save/restore on - * their own. + * NOTE! user_fpu_begin() must be used only immediately before restoring + * it. This function does not do any save/restore on their own. */ -static inline void user_fpu_end(void) -{ - preempt_disable(); - __thread_fpu_end(current); - preempt_enable(); -} - static inline void user_fpu_begin(void) { preempt_disable(); @@ -437,25 +503,32 @@ static inline void user_fpu_begin(void) preempt_enable(); } +static inline void __save_fpu(struct task_struct *tsk) +{ + if (use_xsave()) + xsave_state(&tsk->thread.fpu.state->xsave, -1); + else + fpu_fxsave(&tsk->thread.fpu); +} + /* * These disable preemption on their own and are safe */ static inline void save_init_fpu(struct task_struct *tsk) { WARN_ON_ONCE(!__thread_has_fpu(tsk)); + + if (use_eager_fpu()) { + __save_fpu(tsk); + return; + } + preempt_disable(); __save_init_fpu(tsk); __thread_fpu_end(tsk); preempt_enable(); } -static inline void clear_fpu(struct task_struct *tsk) -{ - preempt_disable(); - __clear_fpu(tsk); - preempt_enable(); -} - /* * i387 state interaction */ @@ -510,11 +583,34 @@ static inline void fpu_free(struct fpu *fpu) } } -static inline void fpu_copy(struct fpu *dst, struct fpu *src) +static inline void fpu_copy(struct task_struct *dst, struct task_struct *src) { - memcpy(dst->state, src->state, xstate_size); + if (use_eager_fpu()) { + memset(&dst->thread.fpu.state->xsave, 0, xstate_size); + __save_fpu(dst); + } else { + struct fpu *dfpu = &dst->thread.fpu; + struct fpu *sfpu = &src->thread.fpu; + + unlazy_fpu(src); + memcpy(dfpu->state, sfpu->state, xstate_size); + } } -extern void fpu_finit(struct fpu *fpu); +static inline unsigned long +alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long *buf_fx, + unsigned long *size) +{ + unsigned long frame_size = xstate_sigframe_size(); + + *buf_fx = sp = round_down(sp - frame_size, 64); + if (ia32_frame && use_fxsr()) { + frame_size += sizeof(struct i387_fsave_struct); + sp -= sizeof(struct i387_fsave_struct); + } + + *size = frame_size; + return sp; +} #endif diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index b0767bc08740..9a25b522d377 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -3,38 +3,54 @@ #ifdef __ASSEMBLY__ - .macro MCOUNT_SAVE_FRAME - /* taken from glibc */ - subq $0x38, %rsp - movq %rax, (%rsp) - movq %rcx, 8(%rsp) - movq %rdx, 16(%rsp) - movq %rsi, 24(%rsp) - movq %rdi, 32(%rsp) - movq %r8, 40(%rsp) - movq %r9, 48(%rsp) + /* skip is set if the stack was already partially adjusted */ + .macro MCOUNT_SAVE_FRAME skip=0 + /* + * We add enough stack to save all regs. + */ + subq $(SS+8-\skip), %rsp + movq %rax, RAX(%rsp) + movq %rcx, RCX(%rsp) + movq %rdx, RDX(%rsp) + movq %rsi, RSI(%rsp) + movq %rdi, RDI(%rsp) + movq %r8, R8(%rsp) + movq %r9, R9(%rsp) + /* Move RIP to its proper location */ + movq SS+8(%rsp), %rdx + movq %rdx, RIP(%rsp) .endm - .macro MCOUNT_RESTORE_FRAME - movq 48(%rsp), %r9 - movq 40(%rsp), %r8 - movq 32(%rsp), %rdi - movq 24(%rsp), %rsi - movq 16(%rsp), %rdx - movq 8(%rsp), %rcx - movq (%rsp), %rax - addq $0x38, %rsp + .macro MCOUNT_RESTORE_FRAME skip=0 + movq R9(%rsp), %r9 + movq R8(%rsp), %r8 + movq RDI(%rsp), %rdi + movq RSI(%rsp), %rsi + movq RDX(%rsp), %rdx + movq RCX(%rsp), %rcx + movq RAX(%rsp), %rax + addq $(SS+8-\skip), %rsp .endm #endif #ifdef CONFIG_FUNCTION_TRACER -#define MCOUNT_ADDR ((long)(mcount)) +#ifdef CC_USING_FENTRY +# define MCOUNT_ADDR ((long)(__fentry__)) +#else +# define MCOUNT_ADDR ((long)(mcount)) +#endif #define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ +#ifdef CONFIG_DYNAMIC_FTRACE +#define ARCH_SUPPORTS_FTRACE_OPS 1 +#define ARCH_SUPPORTS_FTRACE_SAVE_REGS +#endif + #ifndef __ASSEMBLY__ extern void mcount(void); extern atomic_t modifying_ftrace_code; +extern void __fentry__(void); static inline unsigned long ftrace_call_adjust(unsigned long addr) { diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h index 71ecbcba1a4e..f373046e63ec 100644 --- a/arch/x86/include/asm/futex.h +++ b/arch/x86/include/asm/futex.h @@ -9,10 +9,13 @@ #include <asm/asm.h> #include <asm/errno.h> #include <asm/processor.h> +#include <asm/smap.h> #define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \ - asm volatile("1:\t" insn "\n" \ - "2:\t.section .fixup,\"ax\"\n" \ + asm volatile("\t" ASM_STAC "\n" \ + "1:\t" insn "\n" \ + "2:\t" ASM_CLAC "\n" \ + "\t.section .fixup,\"ax\"\n" \ "3:\tmov\t%3, %1\n" \ "\tjmp\t2b\n" \ "\t.previous\n" \ @@ -21,12 +24,14 @@ : "i" (-EFAULT), "0" (oparg), "1" (0)) #define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \ - asm volatile("1:\tmovl %2, %0\n" \ + asm volatile("\t" ASM_STAC "\n" \ + "1:\tmovl %2, %0\n" \ "\tmovl\t%0, %3\n" \ "\t" insn "\n" \ "2:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" \ "\tjnz\t1b\n" \ - "3:\t.section .fixup,\"ax\"\n" \ + "3:\t" ASM_CLAC "\n" \ + "\t.section .fixup,\"ax\"\n" \ "4:\tmov\t%5, %1\n" \ "\tjmp\t3b\n" \ "\t.previous\n" \ @@ -122,8 +127,10 @@ static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; - asm volatile("1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n" - "2:\t.section .fixup, \"ax\"\n" + asm volatile("\t" ASM_STAC "\n" + "1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n" + "2:\t" ASM_CLAC "\n" + "\t.section .fixup, \"ax\"\n" "3:\tmov %3, %0\n" "\tjmp 2b\n" "\t.previous\n" diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index d3895dbf4ddb..81f04cee5f74 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -18,6 +18,10 @@ typedef struct { #ifdef CONFIG_SMP unsigned int irq_resched_count; unsigned int irq_call_count; + /* + * irq_tlb_count is double-counted in irq_call_count, so it must be + * subtracted from irq_call_count when displaying irq_call_count + */ unsigned int irq_tlb_count; #endif #ifdef CONFIG_X86_THERMAL_VECTOR diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h index 2c392d663dce..434e2106cc87 100644 --- a/arch/x86/include/asm/hpet.h +++ b/arch/x86/include/asm/hpet.h @@ -35,8 +35,6 @@ #define HPET_ID_NUMBER_SHIFT 8 #define HPET_ID_VENDOR_SHIFT 16 -#define HPET_ID_VENDOR_8086 0x8086 - #define HPET_CFG_ENABLE 0x001 #define HPET_CFG_LEGACY 0x002 #define HPET_LEGACY_8254 2 diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 257d9cca214f..ed8089d69094 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -19,12 +19,37 @@ struct pt_regs; struct user_i387_struct; extern int init_fpu(struct task_struct *child); +extern void fpu_finit(struct fpu *fpu); extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); extern void math_state_restore(void); extern bool irq_fpu_usable(void); -extern void kernel_fpu_begin(void); -extern void kernel_fpu_end(void); + +/* + * Careful: __kernel_fpu_begin/end() must be called with preempt disabled + * and they don't touch the preempt state on their own. + * If you enable preemption after __kernel_fpu_begin(), preempt notifier + * should call the __kernel_fpu_end() to prevent the kernel/user FPU + * state from getting corrupted. KVM for example uses this model. + * + * All other cases use kernel_fpu_begin/end() which disable preemption + * during kernel FPU usage. + */ +extern void __kernel_fpu_begin(void); +extern void __kernel_fpu_end(void); + +static inline void kernel_fpu_begin(void) +{ + WARN_ON_ONCE(!irq_fpu_usable()); + preempt_disable(); + __kernel_fpu_begin(); +} + +static inline void kernel_fpu_end(void) +{ + __kernel_fpu_end(); + preempt_enable(); +} /* * Some instructions like VIA's padlock instructions generate a spurious diff --git a/arch/x86/include/asm/iommu_table.h b/arch/x86/include/asm/iommu_table.h index f229b13a5f30..f42a04735a0a 100644 --- a/arch/x86/include/asm/iommu_table.h +++ b/arch/x86/include/asm/iommu_table.h @@ -48,7 +48,7 @@ struct iommu_table_entry { #define __IOMMU_INIT(_detect, _depend, _early_init, _late_init, _finish)\ - static const struct iommu_table_entry const \ + static const struct iommu_table_entry \ __iommu_entry_##_detect __used \ __attribute__ ((unused, __section__(".iommu_table"), \ aligned((sizeof(void *))))) \ @@ -63,10 +63,10 @@ struct iommu_table_entry { * to stop detecting the other IOMMUs after yours has been detected. */ #define IOMMU_INIT_POST(_detect) \ - __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, 0, 0, 0) + __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, NULL, NULL, 0) #define IOMMU_INIT_POST_FINISH(detect) \ - __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, 0, 0, 1) + __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, NULL, NULL, 1) /* * A more sophisticated version of IOMMU_INIT. This variant requires: diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 547882539157..d3ddd17405d0 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -27,6 +27,7 @@ #include <asm/insn.h> #define __ARCH_WANT_KPROBES_INSN_SLOT +#define ARCH_SUPPORTS_KPROBES_ON_FTRACE struct pt_regs; struct kprobe; diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index 521bf252e34b..a65ec29e6ffb 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -9,6 +9,22 @@ #include <linux/types.h> #include <linux/ioctl.h> +#define DE_VECTOR 0 +#define DB_VECTOR 1 +#define BP_VECTOR 3 +#define OF_VECTOR 4 +#define BR_VECTOR 5 +#define UD_VECTOR 6 +#define NM_VECTOR 7 +#define DF_VECTOR 8 +#define TS_VECTOR 10 +#define NP_VECTOR 11 +#define SS_VECTOR 12 +#define GP_VECTOR 13 +#define PF_VECTOR 14 +#define MF_VECTOR 16 +#define MC_VECTOR 18 + /* Select x86 specific features in <linux/kvm.h> */ #define __KVM_HAVE_PIT #define __KVM_HAVE_IOAPIC diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c9a91368fc5e..b2e11f452435 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -75,22 +75,6 @@ #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) -#define DE_VECTOR 0 -#define DB_VECTOR 1 -#define BP_VECTOR 3 -#define OF_VECTOR 4 -#define BR_VECTOR 5 -#define UD_VECTOR 6 -#define NM_VECTOR 7 -#define DF_VECTOR 8 -#define TS_VECTOR 10 -#define NP_VECTOR 11 -#define SS_VECTOR 12 -#define GP_VECTOR 13 -#define PF_VECTOR 14 -#define MF_VECTOR 16 -#define MC_VECTOR 18 - #define SELECTOR_TI_MASK (1 << 2) #define SELECTOR_RPL_MASK 0x03 diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index a3ac52b29cbf..54d73b1f00a0 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -116,19 +116,9 @@ struct mce_log { /* Software defined banks */ #define MCE_EXTENDED_BANK 128 #define MCE_THERMAL_BANK MCE_EXTENDED_BANK + 0 - -#define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1) /* MCE_AMD */ -#define K8_MCE_THRESHOLD_BANK_0 (MCE_THRESHOLD_BASE + 0 * 9) -#define K8_MCE_THRESHOLD_BANK_1 (MCE_THRESHOLD_BASE + 1 * 9) -#define K8_MCE_THRESHOLD_BANK_2 (MCE_THRESHOLD_BASE + 2 * 9) -#define K8_MCE_THRESHOLD_BANK_3 (MCE_THRESHOLD_BASE + 3 * 9) -#define K8_MCE_THRESHOLD_BANK_4 (MCE_THRESHOLD_BASE + 4 * 9) -#define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9) -#define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0) - +#define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1) #ifdef __KERNEL__ - extern void mce_register_decode_chain(struct notifier_block *nb); extern void mce_unregister_decode_chain(struct notifier_block *nb); @@ -171,6 +161,7 @@ DECLARE_PER_CPU(struct device *, mce_device); #ifdef CONFIG_X86_MCE_INTEL extern int mce_cmci_disabled; extern int mce_ignore_ce; +extern int mce_bios_cmci_threshold; void mce_intel_feature_init(struct cpuinfo_x86 *c); void cmci_clear(void); void cmci_reenable(void); diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 4ebe157bf73d..43d921b4752c 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -15,8 +15,8 @@ struct microcode_ops { enum ucode_state (*request_microcode_user) (int cpu, const void __user *buf, size_t size); - enum ucode_state (*request_microcode_fw) (int cpu, - struct device *device); + enum ucode_state (*request_microcode_fw) (int cpu, struct device *, + bool refresh_fw); void (*microcode_fini_cpu) (int cpu); @@ -49,12 +49,6 @@ static inline struct microcode_ops * __init init_intel_microcode(void) #ifdef CONFIG_MICROCODE_AMD extern struct microcode_ops * __init init_amd_microcode(void); extern void __exit exit_amd_microcode(void); - -static inline void get_ucode_data(void *to, const u8 *from, size_t n) -{ - memcpy(to, from, n); -} - #else static inline struct microcode_ops * __init init_amd_microcode(void) { diff --git a/arch/x86/include/asm/mmzone.h b/arch/x86/include/asm/mmzone.h index 64217ea16a36..d497bc425cae 100644 --- a/arch/x86/include/asm/mmzone.h +++ b/arch/x86/include/asm/mmzone.h @@ -1,5 +1,5 @@ #ifdef CONFIG_X86_32 -# include "mmzone_32.h" +# include <asm/mmzone_32.h> #else -# include "mmzone_64.h" +# include <asm/mmzone_64.h> #endif diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 957ec87385af..fbee9714d9ab 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -248,6 +248,9 @@ #define MSR_IA32_PERF_STATUS 0x00000198 #define MSR_IA32_PERF_CTL 0x00000199 +#define MSR_AMD_PSTATE_DEF_BASE 0xc0010064 +#define MSR_AMD_PERF_STATUS 0xc0010063 +#define MSR_AMD_PERF_CTL 0xc0010062 #define MSR_IA32_MPERF 0x000000e7 #define MSR_IA32_APERF 0x000000e8 diff --git a/arch/x86/include/asm/mutex.h b/arch/x86/include/asm/mutex.h index a731b9c573a6..7d3a48275394 100644 --- a/arch/x86/include/asm/mutex.h +++ b/arch/x86/include/asm/mutex.h @@ -1,5 +1,5 @@ #ifdef CONFIG_X86_32 -# include "mutex_32.h" +# include <asm/mutex_32.h> #else -# include "mutex_64.h" +# include <asm/mutex_64.h> #endif diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h index bfacd2ccf651..49119fcea2dc 100644 --- a/arch/x86/include/asm/numa.h +++ b/arch/x86/include/asm/numa.h @@ -53,9 +53,9 @@ static inline int numa_cpu_node(int cpu) #endif /* CONFIG_NUMA */ #ifdef CONFIG_X86_32 -# include "numa_32.h" +# include <asm/numa_32.h> #else -# include "numa_64.h" +# include <asm/numa_64.h> #endif #ifdef CONFIG_NUMA diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index df75d07571ce..6e41b9343928 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -141,7 +141,7 @@ void default_restore_msi_irqs(struct pci_dev *dev, int irq); #endif /* __KERNEL__ */ #ifdef CONFIG_X86_64 -#include "pci_64.h" +#include <asm/pci_64.h> #endif /* implement the pci_ DMA API in terms of the generic device dma_ one */ diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index cb4e43bce98a..4fabcdf1cfa7 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -262,4 +262,6 @@ static inline void perf_check_microcode(void) { } static inline void amd_pmu_disable_virt(void) { } #endif +#define arch_perf_out_copy_user copy_from_user_nmi + #endif /* _ASM_X86_PERF_EVENT_H */ diff --git a/arch/x86/include/asm/perf_regs.h b/arch/x86/include/asm/perf_regs.h new file mode 100644 index 000000000000..3f2207bfd17b --- /dev/null +++ b/arch/x86/include/asm/perf_regs.h @@ -0,0 +1,33 @@ +#ifndef _ASM_X86_PERF_REGS_H +#define _ASM_X86_PERF_REGS_H + +enum perf_event_x86_regs { + PERF_REG_X86_AX, + PERF_REG_X86_BX, + PERF_REG_X86_CX, + PERF_REG_X86_DX, + PERF_REG_X86_SI, + PERF_REG_X86_DI, + PERF_REG_X86_BP, + PERF_REG_X86_SP, + PERF_REG_X86_IP, + PERF_REG_X86_FLAGS, + PERF_REG_X86_CS, + PERF_REG_X86_SS, + PERF_REG_X86_DS, + PERF_REG_X86_ES, + PERF_REG_X86_FS, + PERF_REG_X86_GS, + PERF_REG_X86_R8, + PERF_REG_X86_R9, + PERF_REG_X86_R10, + PERF_REG_X86_R11, + PERF_REG_X86_R12, + PERF_REG_X86_R13, + PERF_REG_X86_R14, + PERF_REG_X86_R15, + + PERF_REG_X86_32_MAX = PERF_REG_X86_GS + 1, + PERF_REG_X86_64_MAX = PERF_REG_X86_R15 + 1, +}; +#endif /* _ASM_X86_PERF_REGS_H */ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 49afb3f41eb6..fc9948465293 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -384,9 +384,9 @@ pte_t *populate_extra_pte(unsigned long vaddr); #endif /* __ASSEMBLY__ */ #ifdef CONFIG_X86_32 -# include "pgtable_32.h" +# include <asm/pgtable_32.h> #else -# include "pgtable_64.h" +# include <asm/pgtable_64.h> #endif #ifndef __ASSEMBLY__ diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 013286a10c2c..ec8a1fc9505d 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -174,9 +174,9 @@ #endif #ifdef CONFIG_X86_32 -# include "pgtable_32_types.h" +# include <asm/pgtable_32_types.h> #else -# include "pgtable_64_types.h" +# include <asm/pgtable_64_types.h> #endif #ifndef __ASSEMBLY__ @@ -303,11 +303,9 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pte); extern void native_pagetable_reserve(u64 start, u64 end); #ifdef CONFIG_X86_32 -extern void native_pagetable_setup_start(pgd_t *base); -extern void native_pagetable_setup_done(pgd_t *base); +extern void native_pagetable_init(void); #else -#define native_pagetable_setup_start x86_init_pgd_noop -#define native_pagetable_setup_done x86_init_pgd_noop +#define native_pagetable_init paging_init #endif struct seq_file; diff --git a/arch/x86/include/asm/posix_types.h b/arch/x86/include/asm/posix_types.h index 7ef7c3020e5c..bad3665c25fc 100644 --- a/arch/x86/include/asm/posix_types.h +++ b/arch/x86/include/asm/posix_types.h @@ -1,15 +1,15 @@ #ifdef __KERNEL__ # ifdef CONFIG_X86_32 -# include "posix_types_32.h" +# include <asm/posix_types_32.h> # else -# include "posix_types_64.h" +# include <asm/posix_types_64.h> # endif #else # ifdef __i386__ -# include "posix_types_32.h" +# include <asm/posix_types_32.h> # elif defined(__ILP32__) -# include "posix_types_x32.h" +# include <asm/posix_types_x32.h> # else -# include "posix_types_64.h" +# include <asm/posix_types_64.h> # endif #endif diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h index aea1d1d848c7..680cf09ed100 100644 --- a/arch/x86/include/asm/processor-flags.h +++ b/arch/x86/include/asm/processor-flags.h @@ -65,6 +65,7 @@ #define X86_CR4_PCIDE 0x00020000 /* enable PCID support */ #define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */ #define X86_CR4_SMEP 0x00100000 /* enable SMEP support */ +#define X86_CR4_SMAP 0x00200000 /* enable SMAP support */ /* * x86-64 Task Priority Register, CR8 diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index d048cad9bcad..b98c0d958ebb 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -423,7 +423,6 @@ DECLARE_INIT_PER_CPU(irq_stack_union); DECLARE_PER_CPU(char *, irq_stack_ptr); DECLARE_PER_CPU(unsigned int, irq_count); -extern unsigned long kernel_eflags; extern asmlinkage void ignore_sysret(void); #else /* X86_64 */ #ifdef CONFIG_CC_STACKPROTECTOR @@ -759,6 +758,8 @@ static inline void update_debugctlmsr(unsigned long debugctlmsr) wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); } +extern void set_task_blockstep(struct task_struct *task, bool on); + /* * from system description table in BIOS. Mostly for MCA use, but * others may find it useful: diff --git a/arch/x86/include/asm/rcu.h b/arch/x86/include/asm/rcu.h new file mode 100644 index 000000000000..d1ac07a23979 --- /dev/null +++ b/arch/x86/include/asm/rcu.h @@ -0,0 +1,32 @@ +#ifndef _ASM_X86_RCU_H +#define _ASM_X86_RCU_H + +#ifndef __ASSEMBLY__ + +#include <linux/rcupdate.h> +#include <asm/ptrace.h> + +static inline void exception_enter(struct pt_regs *regs) +{ + rcu_user_exit(); +} + +static inline void exception_exit(struct pt_regs *regs) +{ +#ifdef CONFIG_RCU_USER_QS + if (user_mode(regs)) + rcu_user_enter(); +#endif +} + +#else /* __ASSEMBLY__ */ + +#ifdef CONFIG_RCU_USER_QS +# define SCHEDULE_USER call schedule_user +#else +# define SCHEDULE_USER call schedule +#endif + +#endif /* !__ASSEMBLY__ */ + +#endif diff --git a/arch/x86/include/asm/seccomp.h b/arch/x86/include/asm/seccomp.h index c62e58a5a90d..0f3d7f099224 100644 --- a/arch/x86/include/asm/seccomp.h +++ b/arch/x86/include/asm/seccomp.h @@ -1,5 +1,5 @@ #ifdef CONFIG_X86_32 -# include "seccomp_32.h" +# include <asm/seccomp_32.h> #else -# include "seccomp_64.h" +# include <asm/seccomp_64.h> #endif diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h index 598457cbd0f8..323973f4abf1 100644 --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h @@ -31,6 +31,10 @@ typedef struct { unsigned long sig[_NSIG_WORDS]; } sigset_t; +#ifndef CONFIG_COMPAT +typedef sigset_t compat_sigset_t; +#endif + #else /* Here we must cater to libcs that poke about in kernel headers. */ diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h new file mode 100644 index 000000000000..8d3120f4e270 --- /dev/null +++ b/arch/x86/include/asm/smap.h @@ -0,0 +1,91 @@ +/* + * Supervisor Mode Access Prevention support + * + * Copyright (C) 2012 Intel Corporation + * Author: H. Peter Anvin <hpa@linux.intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#ifndef _ASM_X86_SMAP_H +#define _ASM_X86_SMAP_H + +#include <linux/stringify.h> +#include <asm/nops.h> +#include <asm/cpufeature.h> + +/* "Raw" instruction opcodes */ +#define __ASM_CLAC .byte 0x0f,0x01,0xca +#define __ASM_STAC .byte 0x0f,0x01,0xcb + +#ifdef __ASSEMBLY__ + +#include <asm/alternative-asm.h> + +#ifdef CONFIG_X86_SMAP + +#define ASM_CLAC \ + 661: ASM_NOP3 ; \ + .pushsection .altinstr_replacement, "ax" ; \ + 662: __ASM_CLAC ; \ + .popsection ; \ + .pushsection .altinstructions, "a" ; \ + altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ; \ + .popsection + +#define ASM_STAC \ + 661: ASM_NOP3 ; \ + .pushsection .altinstr_replacement, "ax" ; \ + 662: __ASM_STAC ; \ + .popsection ; \ + .pushsection .altinstructions, "a" ; \ + altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ; \ + .popsection + +#else /* CONFIG_X86_SMAP */ + +#define ASM_CLAC +#define ASM_STAC + +#endif /* CONFIG_X86_SMAP */ + +#else /* __ASSEMBLY__ */ + +#include <asm/alternative.h> + +#ifdef CONFIG_X86_SMAP + +static __always_inline void clac(void) +{ + /* Note: a barrier is implicit in alternative() */ + alternative(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP); +} + +static __always_inline void stac(void) +{ + /* Note: a barrier is implicit in alternative() */ + alternative(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP); +} + +/* These macros can be used in asm() statements */ +#define ASM_CLAC \ + ALTERNATIVE(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP) +#define ASM_STAC \ + ALTERNATIVE(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP) + +#else /* CONFIG_X86_SMAP */ + +static inline void clac(void) { } +static inline void stac(void) { } + +#define ASM_CLAC +#define ASM_STAC + +#endif /* CONFIG_X86_SMAP */ + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_X86_SMAP_H */ diff --git a/arch/x86/include/asm/string.h b/arch/x86/include/asm/string.h index 6dfd6d9373a0..09224d7a5862 100644 --- a/arch/x86/include/asm/string.h +++ b/arch/x86/include/asm/string.h @@ -1,5 +1,5 @@ #ifdef CONFIG_X86_32 -# include "string_32.h" +# include <asm/string_32.h> #else -# include "string_64.h" +# include <asm/string_64.h> #endif diff --git a/arch/x86/include/asm/suspend.h b/arch/x86/include/asm/suspend.h index 9bd521fe4570..2fab6c2c3575 100644 --- a/arch/x86/include/asm/suspend.h +++ b/arch/x86/include/asm/suspend.h @@ -1,5 +1,5 @@ #ifdef CONFIG_X86_32 -# include "suspend_32.h" +# include <asm/suspend_32.h> #else -# include "suspend_64.h" +# include <asm/suspend_64.h> #endif diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index f2b83bc7d784..cdf5674dd23a 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -1,6 +1,135 @@ #ifndef __SVM_H #define __SVM_H +#define SVM_EXIT_READ_CR0 0x000 +#define SVM_EXIT_READ_CR3 0x003 +#define SVM_EXIT_READ_CR4 0x004 +#define SVM_EXIT_READ_CR8 0x008 +#define SVM_EXIT_WRITE_CR0 0x010 +#define SVM_EXIT_WRITE_CR3 0x013 +#define SVM_EXIT_WRITE_CR4 0x014 +#define SVM_EXIT_WRITE_CR8 0x018 +#define SVM_EXIT_READ_DR0 0x020 +#define SVM_EXIT_READ_DR1 0x021 +#define SVM_EXIT_READ_DR2 0x022 +#define SVM_EXIT_READ_DR3 0x023 +#define SVM_EXIT_READ_DR4 0x024 +#define SVM_EXIT_READ_DR5 0x025 +#define SVM_EXIT_READ_DR6 0x026 +#define SVM_EXIT_READ_DR7 0x027 +#define SVM_EXIT_WRITE_DR0 0x030 +#define SVM_EXIT_WRITE_DR1 0x031 +#define SVM_EXIT_WRITE_DR2 0x032 +#define SVM_EXIT_WRITE_DR3 0x033 +#define SVM_EXIT_WRITE_DR4 0x034 +#define SVM_EXIT_WRITE_DR5 0x035 +#define SVM_EXIT_WRITE_DR6 0x036 +#define SVM_EXIT_WRITE_DR7 0x037 +#define SVM_EXIT_EXCP_BASE 0x040 +#define SVM_EXIT_INTR 0x060 +#define SVM_EXIT_NMI 0x061 +#define SVM_EXIT_SMI 0x062 +#define SVM_EXIT_INIT 0x063 +#define SVM_EXIT_VINTR 0x064 +#define SVM_EXIT_CR0_SEL_WRITE 0x065 +#define SVM_EXIT_IDTR_READ 0x066 +#define SVM_EXIT_GDTR_READ 0x067 +#define SVM_EXIT_LDTR_READ 0x068 +#define SVM_EXIT_TR_READ 0x069 +#define SVM_EXIT_IDTR_WRITE 0x06a +#define SVM_EXIT_GDTR_WRITE 0x06b +#define SVM_EXIT_LDTR_WRITE 0x06c +#define SVM_EXIT_TR_WRITE 0x06d +#define SVM_EXIT_RDTSC 0x06e +#define SVM_EXIT_RDPMC 0x06f +#define SVM_EXIT_PUSHF 0x070 +#define SVM_EXIT_POPF 0x071 +#define SVM_EXIT_CPUID 0x072 +#define SVM_EXIT_RSM 0x073 +#define SVM_EXIT_IRET 0x074 +#define SVM_EXIT_SWINT 0x075 +#define SVM_EXIT_INVD 0x076 +#define SVM_EXIT_PAUSE 0x077 +#define SVM_EXIT_HLT 0x078 +#define SVM_EXIT_INVLPG 0x079 +#define SVM_EXIT_INVLPGA 0x07a +#define SVM_EXIT_IOIO 0x07b +#define SVM_EXIT_MSR 0x07c +#define SVM_EXIT_TASK_SWITCH 0x07d +#define SVM_EXIT_FERR_FREEZE 0x07e +#define SVM_EXIT_SHUTDOWN 0x07f +#define SVM_EXIT_VMRUN 0x080 +#define SVM_EXIT_VMMCALL 0x081 +#define SVM_EXIT_VMLOAD 0x082 +#define SVM_EXIT_VMSAVE 0x083 +#define SVM_EXIT_STGI 0x084 +#define SVM_EXIT_CLGI 0x085 +#define SVM_EXIT_SKINIT 0x086 +#define SVM_EXIT_RDTSCP 0x087 +#define SVM_EXIT_ICEBP 0x088 +#define SVM_EXIT_WBINVD 0x089 +#define SVM_EXIT_MONITOR 0x08a +#define SVM_EXIT_MWAIT 0x08b +#define SVM_EXIT_MWAIT_COND 0x08c +#define SVM_EXIT_XSETBV 0x08d +#define SVM_EXIT_NPF 0x400 + +#define SVM_EXIT_ERR -1 + +#define SVM_EXIT_REASONS \ + { SVM_EXIT_READ_CR0, "read_cr0" }, \ + { SVM_EXIT_READ_CR3, "read_cr3" }, \ + { SVM_EXIT_READ_CR4, "read_cr4" }, \ + { SVM_EXIT_READ_CR8, "read_cr8" }, \ + { SVM_EXIT_WRITE_CR0, "write_cr0" }, \ + { SVM_EXIT_WRITE_CR3, "write_cr3" }, \ + { SVM_EXIT_WRITE_CR4, "write_cr4" }, \ + { SVM_EXIT_WRITE_CR8, "write_cr8" }, \ + { SVM_EXIT_READ_DR0, "read_dr0" }, \ + { SVM_EXIT_READ_DR1, "read_dr1" }, \ + { SVM_EXIT_READ_DR2, "read_dr2" }, \ + { SVM_EXIT_READ_DR3, "read_dr3" }, \ + { SVM_EXIT_WRITE_DR0, "write_dr0" }, \ + { SVM_EXIT_WRITE_DR1, "write_dr1" }, \ + { SVM_EXIT_WRITE_DR2, "write_dr2" }, \ + { SVM_EXIT_WRITE_DR3, "write_dr3" }, \ + { SVM_EXIT_WRITE_DR5, "write_dr5" }, \ + { SVM_EXIT_WRITE_DR7, "write_dr7" }, \ + { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, \ + { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, \ + { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, \ + { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \ + { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, \ + { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, \ + { SVM_EXIT_INTR, "interrupt" }, \ + { SVM_EXIT_NMI, "nmi" }, \ + { SVM_EXIT_SMI, "smi" }, \ + { SVM_EXIT_INIT, "init" }, \ + { SVM_EXIT_VINTR, "vintr" }, \ + { SVM_EXIT_CPUID, "cpuid" }, \ + { SVM_EXIT_INVD, "invd" }, \ + { SVM_EXIT_HLT, "hlt" }, \ + { SVM_EXIT_INVLPG, "invlpg" }, \ + { SVM_EXIT_INVLPGA, "invlpga" }, \ + { SVM_EXIT_IOIO, "io" }, \ + { SVM_EXIT_MSR, "msr" }, \ + { SVM_EXIT_TASK_SWITCH, "task_switch" }, \ + { SVM_EXIT_SHUTDOWN, "shutdown" }, \ + { SVM_EXIT_VMRUN, "vmrun" }, \ + { SVM_EXIT_VMMCALL, "hypercall" }, \ + { SVM_EXIT_VMLOAD, "vmload" }, \ + { SVM_EXIT_VMSAVE, "vmsave" }, \ + { SVM_EXIT_STGI, "stgi" }, \ + { SVM_EXIT_CLGI, "clgi" }, \ + { SVM_EXIT_SKINIT, "skinit" }, \ + { SVM_EXIT_WBINVD, "wbinvd" }, \ + { SVM_EXIT_MONITOR, "monitor" }, \ + { SVM_EXIT_MWAIT, "mwait" }, \ + { SVM_EXIT_XSETBV, "xsetbv" }, \ + { SVM_EXIT_NPF, "npf" } + +#ifdef __KERNEL__ + enum { INTERCEPT_INTR, INTERCEPT_NMI, @@ -264,81 +393,6 @@ struct __attribute__ ((__packed__)) vmcb { #define SVM_EXITINFO_REG_MASK 0x0F -#define SVM_EXIT_READ_CR0 0x000 -#define SVM_EXIT_READ_CR3 0x003 -#define SVM_EXIT_READ_CR4 0x004 -#define SVM_EXIT_READ_CR8 0x008 -#define SVM_EXIT_WRITE_CR0 0x010 -#define SVM_EXIT_WRITE_CR3 0x013 -#define SVM_EXIT_WRITE_CR4 0x014 -#define SVM_EXIT_WRITE_CR8 0x018 -#define SVM_EXIT_READ_DR0 0x020 -#define SVM_EXIT_READ_DR1 0x021 -#define SVM_EXIT_READ_DR2 0x022 -#define SVM_EXIT_READ_DR3 0x023 -#define SVM_EXIT_READ_DR4 0x024 -#define SVM_EXIT_READ_DR5 0x025 -#define SVM_EXIT_READ_DR6 0x026 -#define SVM_EXIT_READ_DR7 0x027 -#define SVM_EXIT_WRITE_DR0 0x030 -#define SVM_EXIT_WRITE_DR1 0x031 -#define SVM_EXIT_WRITE_DR2 0x032 -#define SVM_EXIT_WRITE_DR3 0x033 -#define SVM_EXIT_WRITE_DR4 0x034 -#define SVM_EXIT_WRITE_DR5 0x035 -#define SVM_EXIT_WRITE_DR6 0x036 -#define SVM_EXIT_WRITE_DR7 0x037 -#define SVM_EXIT_EXCP_BASE 0x040 -#define SVM_EXIT_INTR 0x060 -#define SVM_EXIT_NMI 0x061 -#define SVM_EXIT_SMI 0x062 -#define SVM_EXIT_INIT 0x063 -#define SVM_EXIT_VINTR 0x064 -#define SVM_EXIT_CR0_SEL_WRITE 0x065 -#define SVM_EXIT_IDTR_READ 0x066 -#define SVM_EXIT_GDTR_READ 0x067 -#define SVM_EXIT_LDTR_READ 0x068 -#define SVM_EXIT_TR_READ 0x069 -#define SVM_EXIT_IDTR_WRITE 0x06a -#define SVM_EXIT_GDTR_WRITE 0x06b -#define SVM_EXIT_LDTR_WRITE 0x06c -#define SVM_EXIT_TR_WRITE 0x06d -#define SVM_EXIT_RDTSC 0x06e -#define SVM_EXIT_RDPMC 0x06f -#define SVM_EXIT_PUSHF 0x070 -#define SVM_EXIT_POPF 0x071 -#define SVM_EXIT_CPUID 0x072 -#define SVM_EXIT_RSM 0x073 -#define SVM_EXIT_IRET 0x074 -#define SVM_EXIT_SWINT 0x075 -#define SVM_EXIT_INVD 0x076 -#define SVM_EXIT_PAUSE 0x077 -#define SVM_EXIT_HLT 0x078 -#define SVM_EXIT_INVLPG 0x079 -#define SVM_EXIT_INVLPGA 0x07a -#define SVM_EXIT_IOIO 0x07b -#define SVM_EXIT_MSR 0x07c -#define SVM_EXIT_TASK_SWITCH 0x07d -#define SVM_EXIT_FERR_FREEZE 0x07e -#define SVM_EXIT_SHUTDOWN 0x07f -#define SVM_EXIT_VMRUN 0x080 -#define SVM_EXIT_VMMCALL 0x081 -#define SVM_EXIT_VMLOAD 0x082 -#define SVM_EXIT_VMSAVE 0x083 -#define SVM_EXIT_STGI 0x084 -#define SVM_EXIT_CLGI 0x085 -#define SVM_EXIT_SKINIT 0x086 -#define SVM_EXIT_RDTSCP 0x087 -#define SVM_EXIT_ICEBP 0x088 -#define SVM_EXIT_WBINVD 0x089 -#define SVM_EXIT_MONITOR 0x08a -#define SVM_EXIT_MWAIT 0x08b -#define SVM_EXIT_MWAIT_COND 0x08c -#define SVM_EXIT_XSETBV 0x08d -#define SVM_EXIT_NPF 0x400 - -#define SVM_EXIT_ERR -1 - #define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP) #define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda" @@ -350,3 +404,4 @@ struct __attribute__ ((__packed__)) vmcb { #endif +#endif diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index 3fda9db48819..4ca1c611b552 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -40,7 +40,7 @@ asmlinkage long sys32_sigaction(int, struct old_sigaction32 __user *, struct old_sigaction32 __user *); asmlinkage long sys32_alarm(unsigned int); -asmlinkage long sys32_waitpid(compat_pid_t, unsigned int *, int); +asmlinkage long sys32_waitpid(compat_pid_t, unsigned int __user *, int); asmlinkage long sys32_sysfs(int, u32, u32); asmlinkage long sys32_sched_rr_get_interval(compat_pid_t, diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 89f794f007ec..c535d847e3b5 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -89,6 +89,7 @@ struct thread_info { #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* IA32 compatibility process */ #define TIF_FORK 18 /* ret_from_fork */ +#define TIF_NOHZ 19 /* in adaptive nohz mode */ #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ #define TIF_DEBUG 21 /* uses debug registers */ #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ @@ -114,6 +115,7 @@ struct thread_info { #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) #define _TIF_FORK (1 << TIF_FORK) +#define _TIF_NOHZ (1 << TIF_NOHZ) #define _TIF_DEBUG (1 << TIF_DEBUG) #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) @@ -126,12 +128,13 @@ struct thread_info { /* work to do in syscall_trace_enter() */ #define _TIF_WORK_SYSCALL_ENTRY \ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \ - _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT) + _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT | \ + _TIF_NOHZ) /* work to do in syscall_trace_leave() */ #define _TIF_WORK_SYSCALL_EXIT \ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP | \ - _TIF_SYSCALL_TRACEPOINT) + _TIF_SYSCALL_TRACEPOINT | _TIF_NOHZ) /* work to do on interrupt/exception return */ #define _TIF_WORK_MASK \ @@ -141,7 +144,8 @@ struct thread_info { /* work to do on any return to user space */ #define _TIF_ALLWORK_MASK \ - ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT) + ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT | \ + _TIF_NOHZ) /* Only used for 64 bit */ #define _TIF_DO_NOTIFY_MASK \ diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index e1f3a17034fc..7ccf8d131535 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -9,6 +9,7 @@ #include <linux/string.h> #include <asm/asm.h> #include <asm/page.h> +#include <asm/smap.h> #define VERIFY_READ 0 #define VERIFY_WRITE 1 @@ -192,9 +193,10 @@ extern int __get_user_bad(void); #ifdef CONFIG_X86_32 #define __put_user_asm_u64(x, addr, err, errret) \ - asm volatile("1: movl %%eax,0(%2)\n" \ + asm volatile(ASM_STAC "\n" \ + "1: movl %%eax,0(%2)\n" \ "2: movl %%edx,4(%2)\n" \ - "3:\n" \ + "3: " ASM_CLAC "\n" \ ".section .fixup,\"ax\"\n" \ "4: movl %3,%0\n" \ " jmp 3b\n" \ @@ -205,9 +207,10 @@ extern int __get_user_bad(void); : "A" (x), "r" (addr), "i" (errret), "0" (err)) #define __put_user_asm_ex_u64(x, addr) \ - asm volatile("1: movl %%eax,0(%1)\n" \ + asm volatile(ASM_STAC "\n" \ + "1: movl %%eax,0(%1)\n" \ "2: movl %%edx,4(%1)\n" \ - "3:\n" \ + "3: " ASM_CLAC "\n" \ _ASM_EXTABLE_EX(1b, 2b) \ _ASM_EXTABLE_EX(2b, 3b) \ : : "A" (x), "r" (addr)) @@ -379,8 +382,9 @@ do { \ } while (0) #define __get_user_asm(x, addr, err, itype, rtype, ltype, errret) \ - asm volatile("1: mov"itype" %2,%"rtype"1\n" \ - "2:\n" \ + asm volatile(ASM_STAC "\n" \ + "1: mov"itype" %2,%"rtype"1\n" \ + "2: " ASM_CLAC "\n" \ ".section .fixup,\"ax\"\n" \ "3: mov %3,%0\n" \ " xor"itype" %"rtype"1,%"rtype"1\n" \ @@ -443,8 +447,9 @@ struct __large_struct { unsigned long buf[100]; }; * aliasing issues. */ #define __put_user_asm(x, addr, err, itype, rtype, ltype, errret) \ - asm volatile("1: mov"itype" %"rtype"1,%2\n" \ - "2:\n" \ + asm volatile(ASM_STAC "\n" \ + "1: mov"itype" %"rtype"1,%2\n" \ + "2: " ASM_CLAC "\n" \ ".section .fixup,\"ax\"\n" \ "3: mov %3,%0\n" \ " jmp 2b\n" \ @@ -463,13 +468,13 @@ struct __large_struct { unsigned long buf[100]; }; * uaccess_try and catch */ #define uaccess_try do { \ - int prev_err = current_thread_info()->uaccess_err; \ current_thread_info()->uaccess_err = 0; \ + stac(); \ barrier(); #define uaccess_catch(err) \ + clac(); \ (err) |= (current_thread_info()->uaccess_err ? -EFAULT : 0); \ - current_thread_info()->uaccess_err = prev_err; \ } while (0) /** @@ -569,6 +574,9 @@ strncpy_from_user(char *dst, const char __user *src, long count); extern __must_check long strlen_user(const char __user *str); extern __must_check long strnlen_user(const char __user *str, long n); +unsigned long __must_check clear_user(void __user *mem, unsigned long len); +unsigned long __must_check __clear_user(void __user *mem, unsigned long len); + /* * movsl can be slow when source and dest are not both 8-byte aligned */ @@ -581,9 +589,9 @@ extern struct movsl_mask { #define ARCH_HAS_NOCACHE_UACCESS 1 #ifdef CONFIG_X86_32 -# include "uaccess_32.h" +# include <asm/uaccess_32.h> #else -# include "uaccess_64.h" +# include <asm/uaccess_64.h> #endif #endif /* _ASM_X86_UACCESS_H */ diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 576e39bca6ad..7f760a9f1f61 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -213,7 +213,4 @@ static inline unsigned long __must_check copy_from_user(void *to, return n; } -unsigned long __must_check clear_user(void __user *mem, unsigned long len); -unsigned long __must_check __clear_user(void __user *mem, unsigned long len); - #endif /* _ASM_X86_UACCESS_32_H */ diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index d8def8b3dba0..142810c457dc 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -217,9 +217,6 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size) } } -__must_check unsigned long clear_user(void __user *mem, unsigned long len); -__must_check unsigned long __clear_user(void __user *mem, unsigned long len); - static __must_check __always_inline int __copy_from_user_inatomic(void *dst, const void __user *src, unsigned size) { diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index f3971bbcd1de..8ff8be7835ab 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h @@ -42,10 +42,11 @@ struct arch_uprobe { }; struct arch_uprobe_task { - unsigned long saved_trap_nr; #ifdef CONFIG_X86_64 unsigned long saved_scratch_register; #endif + unsigned int saved_trap_nr; + unsigned int saved_tf; }; extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr); diff --git a/arch/x86/include/asm/user.h b/arch/x86/include/asm/user.h index 24532c7da3d6..ccab4af1646d 100644 --- a/arch/x86/include/asm/user.h +++ b/arch/x86/include/asm/user.h @@ -2,9 +2,9 @@ #define _ASM_X86_USER_H #ifdef CONFIG_X86_32 -# include "user_32.h" +# include <asm/user_32.h> #else -# include "user_64.h" +# include <asm/user_64.h> #endif #include <asm/types.h> diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h index bb0522850b74..fddb53d63915 100644 --- a/arch/x86/include/asm/vdso.h +++ b/arch/x86/include/asm/vdso.h @@ -11,7 +11,8 @@ extern const char VDSO32_PRELINK[]; #define VDSO32_SYMBOL(base, name) \ ({ \ extern const char VDSO32_##name[]; \ - (void *)(VDSO32_##name - VDSO32_PRELINK + (unsigned long)(base)); \ + (void __user *)(VDSO32_##name - VDSO32_PRELINK + \ + (unsigned long)(base)); \ }) #endif diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 74fcb963595b..36ec21c36d68 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -25,6 +25,88 @@ * */ +#define VMX_EXIT_REASONS_FAILED_VMENTRY 0x80000000 + +#define EXIT_REASON_EXCEPTION_NMI 0 +#define EXIT_REASON_EXTERNAL_INTERRUPT 1 +#define EXIT_REASON_TRIPLE_FAULT 2 + +#define EXIT_REASON_PENDING_INTERRUPT 7 +#define EXIT_REASON_NMI_WINDOW 8 +#define EXIT_REASON_TASK_SWITCH 9 +#define EXIT_REASON_CPUID 10 +#define EXIT_REASON_HLT 12 +#define EXIT_REASON_INVD 13 +#define EXIT_REASON_INVLPG 14 +#define EXIT_REASON_RDPMC 15 +#define EXIT_REASON_RDTSC 16 +#define EXIT_REASON_VMCALL 18 +#define EXIT_REASON_VMCLEAR 19 +#define EXIT_REASON_VMLAUNCH 20 +#define EXIT_REASON_VMPTRLD 21 +#define EXIT_REASON_VMPTRST 22 +#define EXIT_REASON_VMREAD 23 +#define EXIT_REASON_VMRESUME 24 +#define EXIT_REASON_VMWRITE 25 +#define EXIT_REASON_VMOFF 26 +#define EXIT_REASON_VMON 27 +#define EXIT_REASON_CR_ACCESS 28 +#define EXIT_REASON_DR_ACCESS 29 +#define EXIT_REASON_IO_INSTRUCTION 30 +#define EXIT_REASON_MSR_READ 31 +#define EXIT_REASON_MSR_WRITE 32 +#define EXIT_REASON_INVALID_STATE 33 +#define EXIT_REASON_MWAIT_INSTRUCTION 36 +#define EXIT_REASON_MONITOR_INSTRUCTION 39 +#define EXIT_REASON_PAUSE_INSTRUCTION 40 +#define EXIT_REASON_MCE_DURING_VMENTRY 41 +#define EXIT_REASON_TPR_BELOW_THRESHOLD 43 +#define EXIT_REASON_APIC_ACCESS 44 +#define EXIT_REASON_EPT_VIOLATION 48 +#define EXIT_REASON_EPT_MISCONFIG 49 +#define EXIT_REASON_WBINVD 54 +#define EXIT_REASON_XSETBV 55 +#define EXIT_REASON_INVPCID 58 + +#define VMX_EXIT_REASONS \ + { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ + { EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \ + { EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \ + { EXIT_REASON_PENDING_INTERRUPT, "PENDING_INTERRUPT" }, \ + { EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \ + { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \ + { EXIT_REASON_CPUID, "CPUID" }, \ + { EXIT_REASON_HLT, "HLT" }, \ + { EXIT_REASON_INVLPG, "INVLPG" }, \ + { EXIT_REASON_RDPMC, "RDPMC" }, \ + { EXIT_REASON_RDTSC, "RDTSC" }, \ + { EXIT_REASON_VMCALL, "VMCALL" }, \ + { EXIT_REASON_VMCLEAR, "VMCLEAR" }, \ + { EXIT_REASON_VMLAUNCH, "VMLAUNCH" }, \ + { EXIT_REASON_VMPTRLD, "VMPTRLD" }, \ + { EXIT_REASON_VMPTRST, "VMPTRST" }, \ + { EXIT_REASON_VMREAD, "VMREAD" }, \ + { EXIT_REASON_VMRESUME, "VMRESUME" }, \ + { EXIT_REASON_VMWRITE, "VMWRITE" }, \ + { EXIT_REASON_VMOFF, "VMOFF" }, \ + { EXIT_REASON_VMON, "VMON" }, \ + { EXIT_REASON_CR_ACCESS, "CR_ACCESS" }, \ + { EXIT_REASON_DR_ACCESS, "DR_ACCESS" }, \ + { EXIT_REASON_IO_INSTRUCTION, "IO_INSTRUCTION" }, \ + { EXIT_REASON_MSR_READ, "MSR_READ" }, \ + { EXIT_REASON_MSR_WRITE, "MSR_WRITE" }, \ + { EXIT_REASON_MWAIT_INSTRUCTION, "MWAIT_INSTRUCTION" }, \ + { EXIT_REASON_MONITOR_INSTRUCTION, "MONITOR_INSTRUCTION" }, \ + { EXIT_REASON_PAUSE_INSTRUCTION, "PAUSE_INSTRUCTION" }, \ + { EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \ + { EXIT_REASON_TPR_BELOW_THRESHOLD, "TPR_BELOW_THRESHOLD" }, \ + { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \ + { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \ + { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \ + { EXIT_REASON_WBINVD, "WBINVD" } + +#ifdef __KERNEL__ + #include <linux/types.h> /* @@ -241,49 +323,6 @@ enum vmcs_field { HOST_RIP = 0x00006c16, }; -#define VMX_EXIT_REASONS_FAILED_VMENTRY 0x80000000 - -#define EXIT_REASON_EXCEPTION_NMI 0 -#define EXIT_REASON_EXTERNAL_INTERRUPT 1 -#define EXIT_REASON_TRIPLE_FAULT 2 - -#define EXIT_REASON_PENDING_INTERRUPT 7 -#define EXIT_REASON_NMI_WINDOW 8 -#define EXIT_REASON_TASK_SWITCH 9 -#define EXIT_REASON_CPUID 10 -#define EXIT_REASON_HLT 12 -#define EXIT_REASON_INVD 13 -#define EXIT_REASON_INVLPG 14 -#define EXIT_REASON_RDPMC 15 -#define EXIT_REASON_RDTSC 16 -#define EXIT_REASON_VMCALL 18 -#define EXIT_REASON_VMCLEAR 19 -#define EXIT_REASON_VMLAUNCH 20 -#define EXIT_REASON_VMPTRLD 21 -#define EXIT_REASON_VMPTRST 22 -#define EXIT_REASON_VMREAD 23 -#define EXIT_REASON_VMRESUME 24 -#define EXIT_REASON_VMWRITE 25 -#define EXIT_REASON_VMOFF 26 -#define EXIT_REASON_VMON 27 -#define EXIT_REASON_CR_ACCESS 28 -#define EXIT_REASON_DR_ACCESS 29 -#define EXIT_REASON_IO_INSTRUCTION 30 -#define EXIT_REASON_MSR_READ 31 -#define EXIT_REASON_MSR_WRITE 32 -#define EXIT_REASON_INVALID_STATE 33 -#define EXIT_REASON_MWAIT_INSTRUCTION 36 -#define EXIT_REASON_MONITOR_INSTRUCTION 39 -#define EXIT_REASON_PAUSE_INSTRUCTION 40 -#define EXIT_REASON_MCE_DURING_VMENTRY 41 -#define EXIT_REASON_TPR_BELOW_THRESHOLD 43 -#define EXIT_REASON_APIC_ACCESS 44 -#define EXIT_REASON_EPT_VIOLATION 48 -#define EXIT_REASON_EPT_MISCONFIG 49 -#define EXIT_REASON_WBINVD 54 -#define EXIT_REASON_XSETBV 55 -#define EXIT_REASON_INVPCID 58 - /* * Interruption-information format */ @@ -488,3 +527,5 @@ enum vm_instruction_error_number { }; #endif + +#endif diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 38155f667144..57693498519c 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -81,12 +81,13 @@ struct x86_init_mapping { /** * struct x86_init_paging - platform specific paging functions - * @pagetable_setup_start: platform specific pre paging_init() call - * @pagetable_setup_done: platform specific post paging_init() call + * @pagetable_init: platform specific paging initialization call to setup + * the kernel pagetables and prepare accessors functions. + * Callback must call paging_init(). Called once after the + * direct mapping for phys memory is available. */ struct x86_init_paging { - void (*pagetable_setup_start)(pgd_t *base); - void (*pagetable_setup_done)(pgd_t *base); + void (*pagetable_init)(void); }; /** diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h index cbf0c9d50b92..1707cfa928fb 100644 --- a/arch/x86/include/asm/xen/interface.h +++ b/arch/x86/include/asm/xen/interface.h @@ -47,6 +47,10 @@ #endif #ifndef __ASSEMBLY__ +/* Explicitly size integers that represent pfns in the public interface + * with Xen so that on ARM we can have one ABI that works for 32 and 64 + * bit guests. */ +typedef unsigned long xen_pfn_t; /* Guest handles for primitive C types. */ __DEFINE_GUEST_HANDLE(uchar, unsigned char); __DEFINE_GUEST_HANDLE(uint, unsigned int); @@ -57,6 +61,7 @@ DEFINE_GUEST_HANDLE(long); DEFINE_GUEST_HANDLE(void); DEFINE_GUEST_HANDLE(uint64_t); DEFINE_GUEST_HANDLE(uint32_t); +DEFINE_GUEST_HANDLE(xen_pfn_t); #endif #ifndef HYPERVISOR_VIRT_START @@ -116,11 +121,13 @@ struct arch_shared_info { #endif /* !__ASSEMBLY__ */ #ifdef CONFIG_X86_32 -#include "interface_32.h" +#include <asm/xen/interface_32.h> #else -#include "interface_64.h" +#include <asm/xen/interface_64.h> #endif +#include <asm/pvclock-abi.h> + #ifndef __ASSEMBLY__ /* * The following is all CPU context. Note that the fpu_ctxt block is filled diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 93971e841dd5..472b9b783019 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -51,7 +51,8 @@ extern unsigned long set_phys_range_identity(unsigned long pfn_s, extern int m2p_add_override(unsigned long mfn, struct page *page, struct gnttab_map_grant_ref *kmap_op); -extern int m2p_remove_override(struct page *page, bool clear_pte); +extern int m2p_remove_override(struct page *page, + struct gnttab_map_grant_ref *kmap_op); extern struct page *m2p_find_override(unsigned long mfn); extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); diff --git a/arch/x86/include/asm/xen/swiotlb-xen.h b/arch/x86/include/asm/xen/swiotlb-xen.h index 1be1ab7d6a41..ee52fcac6f72 100644 --- a/arch/x86/include/asm/xen/swiotlb-xen.h +++ b/arch/x86/include/asm/xen/swiotlb-xen.h @@ -5,10 +5,12 @@ extern int xen_swiotlb; extern int __init pci_xen_swiotlb_detect(void); extern void __init pci_xen_swiotlb_init(void); +extern int pci_xen_swiotlb_init_late(void); #else #define xen_swiotlb (0) static inline int __init pci_xen_swiotlb_detect(void) { return 0; } static inline void __init pci_xen_swiotlb_init(void) { } +static inline int pci_xen_swiotlb_init_late(void) { return -ENXIO; } #endif #endif /* _ASM_X86_SWIOTLB_XEN_H */ diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h index 7fcf6f3dbcc3..f8fde90bc45e 100644 --- a/arch/x86/include/asm/xor.h +++ b/arch/x86/include/asm/xor.h @@ -3,8 +3,8 @@ # include <asm-generic/xor.h> #else #ifdef CONFIG_X86_32 -# include "xor_32.h" +# include <asm/xor_32.h> #else -# include "xor_64.h" +# include <asm/xor_64.h> #endif #endif diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h index 454570891bdc..f79cb7ec0e06 100644 --- a/arch/x86/include/asm/xor_32.h +++ b/arch/x86/include/asm/xor_32.h @@ -534,38 +534,6 @@ static struct xor_block_template xor_block_p5_mmx = { * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo) */ -#define XMMS_SAVE \ -do { \ - preempt_disable(); \ - cr0 = read_cr0(); \ - clts(); \ - asm volatile( \ - "movups %%xmm0,(%0) ;\n\t" \ - "movups %%xmm1,0x10(%0) ;\n\t" \ - "movups %%xmm2,0x20(%0) ;\n\t" \ - "movups %%xmm3,0x30(%0) ;\n\t" \ - : \ - : "r" (xmm_save) \ - : "memory"); \ -} while (0) - -#define XMMS_RESTORE \ -do { \ - asm volatile( \ - "sfence ;\n\t" \ - "movups (%0),%%xmm0 ;\n\t" \ - "movups 0x10(%0),%%xmm1 ;\n\t" \ - "movups 0x20(%0),%%xmm2 ;\n\t" \ - "movups 0x30(%0),%%xmm3 ;\n\t" \ - : \ - : "r" (xmm_save) \ - : "memory"); \ - write_cr0(cr0); \ - preempt_enable(); \ -} while (0) - -#define ALIGN16 __attribute__((aligned(16))) - #define OFFS(x) "16*("#x")" #define PF_OFFS(x) "256+16*("#x")" #define PF0(x) " prefetchnta "PF_OFFS(x)"(%1) ;\n" @@ -587,10 +555,8 @@ static void xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) { unsigned long lines = bytes >> 8; - char xmm_save[16*4] ALIGN16; - int cr0; - XMMS_SAVE; + kernel_fpu_begin(); asm volatile( #undef BLOCK @@ -633,7 +599,7 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) : : "memory"); - XMMS_RESTORE; + kernel_fpu_end(); } static void @@ -641,10 +607,8 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3) { unsigned long lines = bytes >> 8; - char xmm_save[16*4] ALIGN16; - int cr0; - XMMS_SAVE; + kernel_fpu_begin(); asm volatile( #undef BLOCK @@ -694,7 +658,7 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, : : "memory" ); - XMMS_RESTORE; + kernel_fpu_end(); } static void @@ -702,10 +666,8 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3, unsigned long *p4) { unsigned long lines = bytes >> 8; - char xmm_save[16*4] ALIGN16; - int cr0; - XMMS_SAVE; + kernel_fpu_begin(); asm volatile( #undef BLOCK @@ -762,7 +724,7 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, : : "memory" ); - XMMS_RESTORE; + kernel_fpu_end(); } static void @@ -770,10 +732,8 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3, unsigned long *p4, unsigned long *p5) { unsigned long lines = bytes >> 8; - char xmm_save[16*4] ALIGN16; - int cr0; - XMMS_SAVE; + kernel_fpu_begin(); /* Make sure GCC forgets anything it knows about p4 or p5, such that it won't pass to the asm volatile below a @@ -850,7 +810,7 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, like assuming they have some legal value. */ asm("" : "=r" (p4), "=r" (p5)); - XMMS_RESTORE; + kernel_fpu_end(); } static struct xor_block_template xor_block_pIII_sse = { @@ -862,7 +822,7 @@ static struct xor_block_template xor_block_pIII_sse = { }; /* Also try the AVX routines */ -#include "xor_avx.h" +#include <asm/xor_avx.h> /* Also try the generic routines. */ #include <asm-generic/xor.h> diff --git a/arch/x86/include/asm/xor_64.h b/arch/x86/include/asm/xor_64.h index b9b2323e90fe..87ac522c4af5 100644 --- a/arch/x86/include/asm/xor_64.h +++ b/arch/x86/include/asm/xor_64.h @@ -34,41 +34,7 @@ * no advantages to be gotten from x86-64 here anyways. */ -typedef struct { - unsigned long a, b; -} __attribute__((aligned(16))) xmm_store_t; - -/* Doesn't use gcc to save the XMM registers, because there is no easy way to - tell it to do a clts before the register saving. */ -#define XMMS_SAVE \ -do { \ - preempt_disable(); \ - asm volatile( \ - "movq %%cr0,%0 ;\n\t" \ - "clts ;\n\t" \ - "movups %%xmm0,(%1) ;\n\t" \ - "movups %%xmm1,0x10(%1) ;\n\t" \ - "movups %%xmm2,0x20(%1) ;\n\t" \ - "movups %%xmm3,0x30(%1) ;\n\t" \ - : "=&r" (cr0) \ - : "r" (xmm_save) \ - : "memory"); \ -} while (0) - -#define XMMS_RESTORE \ -do { \ - asm volatile( \ - "sfence ;\n\t" \ - "movups (%1),%%xmm0 ;\n\t" \ - "movups 0x10(%1),%%xmm1 ;\n\t" \ - "movups 0x20(%1),%%xmm2 ;\n\t" \ - "movups 0x30(%1),%%xmm3 ;\n\t" \ - "movq %0,%%cr0 ;\n\t" \ - : \ - : "r" (cr0), "r" (xmm_save) \ - : "memory"); \ - preempt_enable(); \ -} while (0) +#include <asm/i387.h> #define OFFS(x) "16*("#x")" #define PF_OFFS(x) "256+16*("#x")" @@ -91,10 +57,8 @@ static void xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) { unsigned int lines = bytes >> 8; - unsigned long cr0; - xmm_store_t xmm_save[4]; - XMMS_SAVE; + kernel_fpu_begin(); asm volatile( #undef BLOCK @@ -135,7 +99,7 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) : [inc] "r" (256UL) : "memory"); - XMMS_RESTORE; + kernel_fpu_end(); } static void @@ -143,11 +107,8 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3) { unsigned int lines = bytes >> 8; - xmm_store_t xmm_save[4]; - unsigned long cr0; - - XMMS_SAVE; + kernel_fpu_begin(); asm volatile( #undef BLOCK #define BLOCK(i) \ @@ -194,7 +155,7 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3) : [inc] "r" (256UL) : "memory"); - XMMS_RESTORE; + kernel_fpu_end(); } static void @@ -202,10 +163,8 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3, unsigned long *p4) { unsigned int lines = bytes >> 8; - xmm_store_t xmm_save[4]; - unsigned long cr0; - XMMS_SAVE; + kernel_fpu_begin(); asm volatile( #undef BLOCK @@ -261,7 +220,7 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, : [inc] "r" (256UL) : "memory" ); - XMMS_RESTORE; + kernel_fpu_end(); } static void @@ -269,10 +228,8 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3, unsigned long *p4, unsigned long *p5) { unsigned int lines = bytes >> 8; - xmm_store_t xmm_save[4]; - unsigned long cr0; - XMMS_SAVE; + kernel_fpu_begin(); asm volatile( #undef BLOCK @@ -336,7 +293,7 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, : [inc] "r" (256UL) : "memory"); - XMMS_RESTORE; + kernel_fpu_end(); } static struct xor_block_template xor_block_sse = { @@ -349,7 +306,7 @@ static struct xor_block_template xor_block_sse = { /* Also try the AVX routines */ -#include "xor_avx.h" +#include <asm/xor_avx.h> #undef XOR_TRY_TEMPLATES #define XOR_TRY_TEMPLATES \ diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h index 2510d35f480e..7ea79c5fa1f2 100644 --- a/arch/x86/include/asm/xor_avx.h +++ b/arch/x86/include/asm/xor_avx.h @@ -20,32 +20,6 @@ #include <linux/compiler.h> #include <asm/i387.h> -#define ALIGN32 __aligned(32) - -#define YMM_SAVED_REGS 4 - -#define YMMS_SAVE \ -do { \ - preempt_disable(); \ - cr0 = read_cr0(); \ - clts(); \ - asm volatile("vmovaps %%ymm0, %0" : "=m" (ymm_save[0]) : : "memory"); \ - asm volatile("vmovaps %%ymm1, %0" : "=m" (ymm_save[32]) : : "memory"); \ - asm volatile("vmovaps %%ymm2, %0" : "=m" (ymm_save[64]) : : "memory"); \ - asm volatile("vmovaps %%ymm3, %0" : "=m" (ymm_save[96]) : : "memory"); \ -} while (0); - -#define YMMS_RESTORE \ -do { \ - asm volatile("sfence" : : : "memory"); \ - asm volatile("vmovaps %0, %%ymm3" : : "m" (ymm_save[96])); \ - asm volatile("vmovaps %0, %%ymm2" : : "m" (ymm_save[64])); \ - asm volatile("vmovaps %0, %%ymm1" : : "m" (ymm_save[32])); \ - asm volatile("vmovaps %0, %%ymm0" : : "m" (ymm_save[0])); \ - write_cr0(cr0); \ - preempt_enable(); \ -} while (0); - #define BLOCK4(i) \ BLOCK(32 * i, 0) \ BLOCK(32 * (i + 1), 1) \ @@ -60,10 +34,9 @@ do { \ static void xor_avx_2(unsigned long bytes, unsigned long *p0, unsigned long *p1) { - unsigned long cr0, lines = bytes >> 9; - char ymm_save[32 * YMM_SAVED_REGS] ALIGN32; + unsigned long lines = bytes >> 9; - YMMS_SAVE + kernel_fpu_begin(); while (lines--) { #undef BLOCK @@ -82,16 +55,15 @@ do { \ p1 = (unsigned long *)((uintptr_t)p1 + 512); } - YMMS_RESTORE + kernel_fpu_end(); } static void xor_avx_3(unsigned long bytes, unsigned long *p0, unsigned long *p1, unsigned long *p2) { - unsigned long cr0, lines = bytes >> 9; - char ymm_save[32 * YMM_SAVED_REGS] ALIGN32; + unsigned long lines = bytes >> 9; - YMMS_SAVE + kernel_fpu_begin(); while (lines--) { #undef BLOCK @@ -113,16 +85,15 @@ do { \ p2 = (unsigned long *)((uintptr_t)p2 + 512); } - YMMS_RESTORE + kernel_fpu_end(); } static void xor_avx_4(unsigned long bytes, unsigned long *p0, unsigned long *p1, unsigned long *p2, unsigned long *p3) { - unsigned long cr0, lines = bytes >> 9; - char ymm_save[32 * YMM_SAVED_REGS] ALIGN32; + unsigned long lines = bytes >> 9; - YMMS_SAVE + kernel_fpu_begin(); while (lines--) { #undef BLOCK @@ -147,16 +118,15 @@ do { \ p3 = (unsigned long *)((uintptr_t)p3 + 512); } - YMMS_RESTORE + kernel_fpu_end(); } static void xor_avx_5(unsigned long bytes, unsigned long *p0, unsigned long *p1, unsigned long *p2, unsigned long *p3, unsigned long *p4) { - unsigned long cr0, lines = bytes >> 9; - char ymm_save[32 * YMM_SAVED_REGS] ALIGN32; + unsigned long lines = bytes >> 9; - YMMS_SAVE + kernel_fpu_begin(); while (lines--) { #undef BLOCK @@ -184,7 +154,7 @@ do { \ p4 = (unsigned long *)((uintptr_t)p4 + 512); } - YMMS_RESTORE + kernel_fpu_end(); } static struct xor_block_template xor_block_avx = { diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 8a1b6f9b594a..0415cdabb5a6 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -34,17 +34,14 @@ extern unsigned int xstate_size; extern u64 pcntxt_mask; extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; +extern struct xsave_struct *init_xstate_buf; extern void xsave_init(void); extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); extern int init_fpu(struct task_struct *child); -extern int check_for_xstate(struct i387_fxsave_struct __user *buf, - void __user *fpstate, - struct _fpx_sw_bytes *sw); -static inline int fpu_xrstor_checking(struct fpu *fpu) +static inline int fpu_xrstor_checking(struct xsave_struct *fx) { - struct xsave_struct *fx = &fpu->state->xsave; int err; asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" @@ -69,13 +66,13 @@ static inline int xsave_user(struct xsave_struct __user *buf) * Clear the xsave header first, so that reserved fields are * initialized to zero. */ - err = __clear_user(&buf->xsave_hdr, - sizeof(struct xsave_hdr_struct)); + err = __clear_user(&buf->xsave_hdr, sizeof(buf->xsave_hdr)); if (unlikely(err)) return -EFAULT; - __asm__ __volatile__("1: .byte " REX_PREFIX "0x0f,0xae,0x27\n" - "2:\n" + __asm__ __volatile__(ASM_STAC "\n" + "1: .byte " REX_PREFIX "0x0f,0xae,0x27\n" + "2: " ASM_CLAC "\n" ".section .fixup,\"ax\"\n" "3: movl $-1,%[err]\n" " jmp 2b\n" @@ -84,9 +81,6 @@ static inline int xsave_user(struct xsave_struct __user *buf) : [err] "=r" (err) : "D" (buf), "a" (-1), "d" (-1), "0" (0) : "memory"); - if (unlikely(err) && __clear_user(buf, xstate_size)) - err = -EFAULT; - /* No need to clear here because the caller clears USED_MATH */ return err; } @@ -97,8 +91,9 @@ static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask) u32 lmask = mask; u32 hmask = mask >> 32; - __asm__ __volatile__("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n" - "2:\n" + __asm__ __volatile__(ASM_STAC "\n" + "1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n" + "2: " ASM_CLAC "\n" ".section .fixup,\"ax\"\n" "3: movl $-1,%[err]\n" " jmp 2b\n" diff --git a/arch/x86/include/uapi/asm/Kbuild b/arch/x86/include/uapi/asm/Kbuild new file mode 100644 index 000000000000..83b6e9a0dce4 --- /dev/null +++ b/arch/x86/include/uapi/asm/Kbuild @@ -0,0 +1,6 @@ +# UAPI Header export list +include include/uapi/asm-generic/Kbuild.asm + +genhdr-y += unistd_32.h +genhdr-y += unistd_64.h +genhdr-y += unistd_x32.h diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 7203298e0b83..a48ea05157d3 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -99,6 +99,8 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o obj-$(CONFIG_OF) += devicetree.o obj-$(CONFIG_UPROBES) += uprobes.o +obj-$(CONFIG_PERF_EVENTS) += perf_regs.o + ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index b2297e58c6ed..e651f7a589ac 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -656,7 +656,7 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu) acpi_register_lapic(physid, ACPI_MADT_ENABLED); /* - * If mp_register_lapic successfully generates a new logical cpu + * If acpi_register_lapic successfully generates a new logical cpu * number, then the following will get us exactly what was mapped */ cpumask_andnot(new_map, cpu_present_mask, tmp_map); diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 1b8e5a03d942..11676cf65aee 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -43,17 +43,22 @@ int acpi_suspend_lowlevel(void) header->video_mode = saved_video_mode; + header->pmode_behavior = 0; + #ifndef CONFIG_64BIT store_gdt((struct desc_ptr *)&header->pmode_gdt); - if (rdmsr_safe(MSR_EFER, &header->pmode_efer_low, - &header->pmode_efer_high)) - header->pmode_efer_low = header->pmode_efer_high = 0; + if (!rdmsr_safe(MSR_EFER, + &header->pmode_efer_low, + &header->pmode_efer_high)) + header->pmode_behavior |= (1 << WAKEUP_BEHAVIOR_RESTORE_EFER); #endif /* !CONFIG_64BIT */ header->pmode_cr0 = read_cr0(); - header->pmode_cr4 = read_cr4_safe(); - header->pmode_behavior = 0; + if (__this_cpu_read(cpu_info.cpuid_level) >= 0) { + header->pmode_cr4 = read_cr4(); + header->pmode_behavior |= (1 << WAKEUP_BEHAVIOR_RESTORE_CR4); + } if (!rdmsr_safe(MSR_IA32_MISC_ENABLE, &header->pmode_misc_en_low, &header->pmode_misc_en_high)) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index ced4534baed5..ef5ccca79a6c 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -23,19 +23,6 @@ #define MAX_PATCH_LEN (255-1) -#ifdef CONFIG_HOTPLUG_CPU -static int smp_alt_once; - -static int __init bootonly(char *str) -{ - smp_alt_once = 1; - return 1; -} -__setup("smp-alt-boot", bootonly); -#else -#define smp_alt_once 1 -#endif - static int __initdata_or_module debug_alternative; static int __init debug_alt(char *str) @@ -317,7 +304,7 @@ static void alternatives_smp_lock(const s32 *start, const s32 *end, /* turn DS segment override prefix into lock prefix */ if (*ptr == 0x3e) text_poke(ptr, ((unsigned char []){0xf0}), 1); - }; + } mutex_unlock(&text_mutex); } @@ -326,9 +313,6 @@ static void alternatives_smp_unlock(const s32 *start, const s32 *end, { const s32 *poff; - if (noreplace_smp) - return; - mutex_lock(&text_mutex); for (poff = start; poff < end; poff++) { u8 *ptr = (u8 *)poff + *poff; @@ -338,7 +322,7 @@ static void alternatives_smp_unlock(const s32 *start, const s32 *end, /* turn lock prefix into DS segment override prefix */ if (*ptr == 0xf0) text_poke(ptr, ((unsigned char []){0x3E}), 1); - }; + } mutex_unlock(&text_mutex); } @@ -359,7 +343,7 @@ struct smp_alt_module { }; static LIST_HEAD(smp_alt_modules); static DEFINE_MUTEX(smp_alt); -static int smp_mode = 1; /* protected by smp_alt */ +static bool uniproc_patched = false; /* protected by smp_alt */ void __init_or_module alternatives_smp_module_add(struct module *mod, char *name, @@ -368,19 +352,18 @@ void __init_or_module alternatives_smp_module_add(struct module *mod, { struct smp_alt_module *smp; - if (noreplace_smp) - return; + mutex_lock(&smp_alt); + if (!uniproc_patched) + goto unlock; - if (smp_alt_once) { - if (boot_cpu_has(X86_FEATURE_UP)) - alternatives_smp_unlock(locks, locks_end, - text, text_end); - return; - } + if (num_possible_cpus() == 1) + /* Don't bother remembering, we'll never have to undo it. */ + goto smp_unlock; smp = kzalloc(sizeof(*smp), GFP_KERNEL); if (NULL == smp) - return; /* we'll run the (safe but slow) SMP code then ... */ + /* we'll run the (safe but slow) SMP code then ... */ + goto unlock; smp->mod = mod; smp->name = name; @@ -392,11 +375,10 @@ void __init_or_module alternatives_smp_module_add(struct module *mod, __func__, smp->locks, smp->locks_end, smp->text, smp->text_end, smp->name); - mutex_lock(&smp_alt); list_add_tail(&smp->next, &smp_alt_modules); - if (boot_cpu_has(X86_FEATURE_UP)) - alternatives_smp_unlock(smp->locks, smp->locks_end, - smp->text, smp->text_end); +smp_unlock: + alternatives_smp_unlock(locks, locks_end, text, text_end); +unlock: mutex_unlock(&smp_alt); } @@ -404,24 +386,18 @@ void __init_or_module alternatives_smp_module_del(struct module *mod) { struct smp_alt_module *item; - if (smp_alt_once || noreplace_smp) - return; - mutex_lock(&smp_alt); list_for_each_entry(item, &smp_alt_modules, next) { if (mod != item->mod) continue; list_del(&item->next); - mutex_unlock(&smp_alt); - DPRINTK("%s: %s\n", __func__, item->name); kfree(item); - return; + break; } mutex_unlock(&smp_alt); } -bool skip_smp_alternatives; -void alternatives_smp_switch(int smp) +void alternatives_enable_smp(void) { struct smp_alt_module *mod; @@ -436,34 +412,21 @@ void alternatives_smp_switch(int smp) pr_info("lockdep: fixing up alternatives\n"); #endif - if (noreplace_smp || smp_alt_once || skip_smp_alternatives) - return; - BUG_ON(!smp && (num_online_cpus() > 1)); + /* Why bother if there are no other CPUs? */ + BUG_ON(num_possible_cpus() == 1); mutex_lock(&smp_alt); - /* - * Avoid unnecessary switches because it forces JIT based VMs to - * throw away all cached translations, which can be quite costly. - */ - if (smp == smp_mode) { - /* nothing */ - } else if (smp) { + if (uniproc_patched) { pr_info("switching to SMP code\n"); + BUG_ON(num_online_cpus() != 1); clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP); list_for_each_entry(mod, &smp_alt_modules, next) alternatives_smp_lock(mod->locks, mod->locks_end, mod->text, mod->text_end); - } else { - pr_info("switching to UP code\n"); - set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); - set_cpu_cap(&cpu_data(0), X86_FEATURE_UP); - list_for_each_entry(mod, &smp_alt_modules, next) - alternatives_smp_unlock(mod->locks, mod->locks_end, - mod->text, mod->text_end); + uniproc_patched = false; } - smp_mode = smp; mutex_unlock(&smp_alt); } @@ -540,40 +503,22 @@ void __init alternative_instructions(void) apply_alternatives(__alt_instructions, __alt_instructions_end); - /* switch to patch-once-at-boottime-only mode and free the - * tables in case we know the number of CPUs will never ever - * change */ -#ifdef CONFIG_HOTPLUG_CPU - if (num_possible_cpus() < 2) - smp_alt_once = 1; -#endif - #ifdef CONFIG_SMP - if (smp_alt_once) { - if (1 == num_possible_cpus()) { - pr_info("switching to UP code\n"); - set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); - set_cpu_cap(&cpu_data(0), X86_FEATURE_UP); - - alternatives_smp_unlock(__smp_locks, __smp_locks_end, - _text, _etext); - } - } else { + /* Patch to UP if other cpus not imminent. */ + if (!noreplace_smp && (num_present_cpus() == 1 || setup_max_cpus <= 1)) { + uniproc_patched = true; alternatives_smp_module_add(NULL, "core kernel", __smp_locks, __smp_locks_end, _text, _etext); - - /* Only switch to UP mode if we don't immediately boot others */ - if (num_present_cpus() == 1 || setup_max_cpus <= 1) - alternatives_smp_switch(0); } -#endif - apply_paravirt(__parainstructions, __parainstructions_end); - if (smp_alt_once) + if (!uniproc_patched || num_possible_cpus() == 1) free_init_pages("SMP alternatives", (unsigned long)__smp_locks, (unsigned long)__smp_locks_end); +#endif + + apply_paravirt(__parainstructions, __parainstructions_end); restart_nmi(); } diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 24deb3082328..b17416e72fbd 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1934,7 +1934,7 @@ void smp_error_interrupt(struct pt_regs *regs) apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]); i++; v1 >>= 1; - }; + } apic_printk(APIC_DEBUG, KERN_CONT "\n"); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 9d92e19039f0..f7e98a2c0d12 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -737,6 +737,72 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, } #endif +static void __cpuinit cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c) +{ + if (!cpu_has_invlpg) + return; + + tlb_flushall_shift = 5; + + if (c->x86 <= 0x11) + tlb_flushall_shift = 4; +} + +static void __cpuinit cpu_detect_tlb_amd(struct cpuinfo_x86 *c) +{ + u32 ebx, eax, ecx, edx; + u16 mask = 0xfff; + + if (c->x86 < 0xf) + return; + + if (c->extended_cpuid_level < 0x80000006) + return; + + cpuid(0x80000006, &eax, &ebx, &ecx, &edx); + + tlb_lld_4k[ENTRIES] = (ebx >> 16) & mask; + tlb_lli_4k[ENTRIES] = ebx & mask; + + /* + * K8 doesn't have 2M/4M entries in the L2 TLB so read out the L1 TLB + * characteristics from the CPUID function 0x80000005 instead. + */ + if (c->x86 == 0xf) { + cpuid(0x80000005, &eax, &ebx, &ecx, &edx); + mask = 0xff; + } + + /* Handle DTLB 2M and 4M sizes, fall back to L1 if L2 is disabled */ + if (!((eax >> 16) & mask)) { + u32 a, b, c, d; + + cpuid(0x80000005, &a, &b, &c, &d); + tlb_lld_2m[ENTRIES] = (a >> 16) & 0xff; + } else { + tlb_lld_2m[ENTRIES] = (eax >> 16) & mask; + } + + /* a 4M entry uses two 2M entries */ + tlb_lld_4m[ENTRIES] = tlb_lld_2m[ENTRIES] >> 1; + + /* Handle ITLB 2M and 4M sizes, fall back to L1 if L2 is disabled */ + if (!(eax & mask)) { + /* Erratum 658 */ + if (c->x86 == 0x15 && c->x86_model <= 0x1f) { + tlb_lli_2m[ENTRIES] = 1024; + } else { + cpuid(0x80000005, &eax, &ebx, &ecx, &edx); + tlb_lli_2m[ENTRIES] = eax & 0xff; + } + } else + tlb_lli_2m[ENTRIES] = eax & mask; + + tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1; + + cpu_set_tlb_flushall_shift(c); +} + static const struct cpu_dev __cpuinitconst amd_cpu_dev = { .c_vendor = "AMD", .c_ident = { "AuthenticAMD" }, @@ -756,6 +822,7 @@ static const struct cpu_dev __cpuinitconst amd_cpu_dev = { .c_size_cache = amd_size_cache, #endif .c_early_init = early_init_amd, + .c_detect_tlb = cpu_detect_tlb_amd, .c_bsp_init = bsp_init_amd, .c_init = init_amd, .c_x86_vendor = X86_VENDOR_AMD, diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c97bb7b5a9f8..d0e910da16c5 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -165,10 +165,15 @@ void __init check_bugs(void) print_cpu_info(&boot_cpu_data); #endif check_config(); - check_fpu(); check_hlt(); check_popad(); init_utsname()->machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); alternative_instructions(); + + /* + * kernel_fpu_begin/end() in check_fpu() relies on the patched + * alternative instructions. + */ + check_fpu(); } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a5fbc3c5fccc..7505f7b13e71 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -259,23 +259,36 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) } #endif -static int disable_smep __cpuinitdata; static __init int setup_disable_smep(char *arg) { - disable_smep = 1; + setup_clear_cpu_cap(X86_FEATURE_SMEP); return 1; } __setup("nosmep", setup_disable_smep); -static __cpuinit void setup_smep(struct cpuinfo_x86 *c) +static __always_inline void setup_smep(struct cpuinfo_x86 *c) { - if (cpu_has(c, X86_FEATURE_SMEP)) { - if (unlikely(disable_smep)) { - setup_clear_cpu_cap(X86_FEATURE_SMEP); - clear_in_cr4(X86_CR4_SMEP); - } else - set_in_cr4(X86_CR4_SMEP); - } + if (cpu_has(c, X86_FEATURE_SMEP)) + set_in_cr4(X86_CR4_SMEP); +} + +static __init int setup_disable_smap(char *arg) +{ + setup_clear_cpu_cap(X86_FEATURE_SMAP); + return 1; +} +__setup("nosmap", setup_disable_smap); + +static __always_inline void setup_smap(struct cpuinfo_x86 *c) +{ + unsigned long eflags; + + /* This should have been cleared long ago */ + raw_local_save_flags(eflags); + BUG_ON(eflags & X86_EFLAGS_AC); + + if (cpu_has(c, X86_FEATURE_SMAP)) + set_in_cr4(X86_CR4_SMAP); } /* @@ -476,7 +489,7 @@ void __cpuinit cpu_detect_tlb(struct cpuinfo_x86 *c) printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ - "tlb_flushall_shift is 0x%x\n", + "tlb_flushall_shift: %d\n", tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES], tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES], tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES], @@ -712,8 +725,6 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) c->cpu_index = 0; filter_cpuid_features(c, false); - setup_smep(c); - if (this_cpu->c_bsp_init) this_cpu->c_bsp_init(c); } @@ -798,8 +809,6 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) c->phys_proc_id = c->initial_apicid; } - setup_smep(c); - get_model_name(c); /* Default name */ detect_nopl(c); @@ -864,6 +873,10 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) /* Disable the PN if appropriate */ squash_the_stupid_serial_number(c); + /* Set up SMEP/SMAP */ + setup_smep(c); + setup_smap(c); + /* * The vendor-specific functions might have changed features. * Now we do "generic changes." @@ -942,8 +955,7 @@ void __init identify_boot_cpu(void) #else vgetcpu_set_mode(); #endif - if (boot_cpu_data.cpuid_level >= 2) - cpu_detect_tlb(&boot_cpu_data); + cpu_detect_tlb(&boot_cpu_data); } void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) @@ -1023,14 +1035,16 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) printk(KERN_CONT "%s ", vendor); if (c->x86_model_id[0]) - printk(KERN_CONT "%s", c->x86_model_id); + printk(KERN_CONT "%s", strim(c->x86_model_id)); else printk(KERN_CONT "%d86", c->x86); + printk(KERN_CONT " (fam: %02x, model: %02x", c->x86, c->x86_model); + if (c->x86_mask || c->cpuid_level >= 0) - printk(KERN_CONT " stepping %02x\n", c->x86_mask); + printk(KERN_CONT ", stepping: %02x)\n", c->x86_mask); else - printk(KERN_CONT "\n"); + printk(KERN_CONT ")\n"); print_cpu_msr(c); } @@ -1113,11 +1127,10 @@ void syscall_init(void) /* Flags to clear on syscall */ wrmsrl(MSR_SYSCALL_MASK, - X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); + X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF| + X86_EFLAGS_IOPL|X86_EFLAGS_AC); } -unsigned long kernel_eflags; - /* * Copies of the original ist values from the tss are only accessed during * debugging, no special alignment required. @@ -1297,9 +1310,6 @@ void __cpuinit cpu_init(void) dbg_restore_debug_regs(); fpu_init(); - xsave_init(); - - raw_local_save_flags(kernel_eflags); if (is_uv_system()) uv_cpu_init(); @@ -1352,6 +1362,5 @@ void __cpuinit cpu_init(void) dbg_restore_debug_regs(); fpu_init(); - xsave_init(); } #endif diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 0a4ce2980a5a..198e019a531a 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -648,6 +648,10 @@ static void __cpuinit intel_detect_tlb(struct cpuinfo_x86 *c) int i, j, n; unsigned int regs[4]; unsigned char *desc = (unsigned char *)regs; + + if (c->cpuid_level < 2) + return; + /* Number of times to iterate */ n = cpuid_eax(2) & 0xFF; diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index fc4beb393577..ddc72f839332 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -78,6 +78,7 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs) } static cpumask_var_t mce_inject_cpumask; +static DEFINE_MUTEX(mce_inject_mutex); static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs) { @@ -194,7 +195,11 @@ static void raise_mce(struct mce *m) put_online_cpus(); } else #endif + { + preempt_disable(); raise_local(); + preempt_enable(); + } } /* Error injection interface */ @@ -225,7 +230,10 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf, * so do it a jiffie or two later everywhere. */ schedule_timeout(2); + + mutex_lock(&mce_inject_mutex); raise_mce(&m); + mutex_unlock(&mce_inject_mutex); return usize; } diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index ed44c8a65858..6a05c1d327a9 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h @@ -28,6 +28,18 @@ extern int mce_ser; extern struct mce_bank *mce_banks; +#ifdef CONFIG_X86_MCE_INTEL +unsigned long mce_intel_adjust_timer(unsigned long interval); +void mce_intel_cmci_poll(void); +void mce_intel_hcpu_update(unsigned long cpu); +#else +# define mce_intel_adjust_timer mce_adjust_timer_default +static inline void mce_intel_cmci_poll(void) { } +static inline void mce_intel_hcpu_update(unsigned long cpu) { } +#endif + +void mce_timer_kick(unsigned long interval); + #ifdef CONFIG_ACPI_APEI int apei_write_mce(struct mce *m); ssize_t apei_read_mce(struct mce *m, u64 *record_id); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 292d0258311c..29e87d3b2843 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -83,6 +83,7 @@ static int mce_dont_log_ce __read_mostly; int mce_cmci_disabled __read_mostly; int mce_ignore_ce __read_mostly; int mce_ser __read_mostly; +int mce_bios_cmci_threshold __read_mostly; struct mce_bank *mce_banks __read_mostly; @@ -1266,6 +1267,14 @@ static unsigned long check_interval = 5 * 60; /* 5 minutes */ static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */ static DEFINE_PER_CPU(struct timer_list, mce_timer); +static unsigned long mce_adjust_timer_default(unsigned long interval) +{ + return interval; +} + +static unsigned long (*mce_adjust_timer)(unsigned long interval) = + mce_adjust_timer_default; + static void mce_timer_fn(unsigned long data) { struct timer_list *t = &__get_cpu_var(mce_timer); @@ -1276,6 +1285,7 @@ static void mce_timer_fn(unsigned long data) if (mce_available(__this_cpu_ptr(&cpu_info))) { machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_poll_banks)); + mce_intel_cmci_poll(); } /* @@ -1283,14 +1293,38 @@ static void mce_timer_fn(unsigned long data) * polling interval, otherwise increase the polling interval. */ iv = __this_cpu_read(mce_next_interval); - if (mce_notify_irq()) + if (mce_notify_irq()) { iv = max(iv / 2, (unsigned long) HZ/100); - else + } else { iv = min(iv * 2, round_jiffies_relative(check_interval * HZ)); + iv = mce_adjust_timer(iv); + } __this_cpu_write(mce_next_interval, iv); + /* Might have become 0 after CMCI storm subsided */ + if (iv) { + t->expires = jiffies + iv; + add_timer_on(t, smp_processor_id()); + } +} - t->expires = jiffies + iv; - add_timer_on(t, smp_processor_id()); +/* + * Ensure that the timer is firing in @interval from now. + */ +void mce_timer_kick(unsigned long interval) +{ + struct timer_list *t = &__get_cpu_var(mce_timer); + unsigned long when = jiffies + interval; + unsigned long iv = __this_cpu_read(mce_next_interval); + + if (timer_pending(t)) { + if (time_before(when, t->expires)) + mod_timer_pinned(t, when); + } else { + t->expires = round_jiffies(when); + add_timer_on(t, smp_processor_id()); + } + if (interval < iv) + __this_cpu_write(mce_next_interval, interval); } /* Must not be called in IRQ context where del_timer_sync() can deadlock */ @@ -1585,6 +1619,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) switch (c->x86_vendor) { case X86_VENDOR_INTEL: mce_intel_feature_init(c); + mce_adjust_timer = mce_intel_adjust_timer; break; case X86_VENDOR_AMD: mce_amd_feature_init(c); @@ -1594,23 +1629,28 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) } } -static void __mcheck_cpu_init_timer(void) +static void mce_start_timer(unsigned int cpu, struct timer_list *t) { - struct timer_list *t = &__get_cpu_var(mce_timer); - unsigned long iv = check_interval * HZ; + unsigned long iv = mce_adjust_timer(check_interval * HZ); - setup_timer(t, mce_timer_fn, smp_processor_id()); + __this_cpu_write(mce_next_interval, iv); - if (mce_ignore_ce) + if (mce_ignore_ce || !iv) return; - __this_cpu_write(mce_next_interval, iv); - if (!iv) - return; t->expires = round_jiffies(jiffies + iv); add_timer_on(t, smp_processor_id()); } +static void __mcheck_cpu_init_timer(void) +{ + struct timer_list *t = &__get_cpu_var(mce_timer); + unsigned int cpu = smp_processor_id(); + + setup_timer(t, mce_timer_fn, cpu); + mce_start_timer(cpu, t); +} + /* Handle unconfigured int18 (should never happen) */ static void unexpected_machine_check(struct pt_regs *regs, long error_code) { @@ -1907,6 +1947,7 @@ static struct miscdevice mce_chrdev_device = { * check, or 0 to not wait * mce=bootlog Log MCEs from before booting. Disabled by default on AMD. * mce=nobootlog Don't log MCEs from before booting. + * mce=bios_cmci_threshold Don't program the CMCI threshold */ static int __init mcheck_enable(char *str) { @@ -1926,6 +1967,8 @@ static int __init mcheck_enable(char *str) mce_ignore_ce = 1; else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) mce_bootlog = (str[0] == 'b'); + else if (!strcmp(str, "bios_cmci_threshold")) + mce_bios_cmci_threshold = 1; else if (isdigit(str[0])) { get_option(&str, &tolerant); if (*str == ',') { @@ -2166,6 +2209,11 @@ static struct dev_ext_attribute dev_attr_cmci_disabled = { &mce_cmci_disabled }; +static struct dev_ext_attribute dev_attr_bios_cmci_threshold = { + __ATTR(bios_cmci_threshold, 0444, device_show_int, NULL), + &mce_bios_cmci_threshold +}; + static struct device_attribute *mce_device_attrs[] = { &dev_attr_tolerant.attr, &dev_attr_check_interval.attr, @@ -2174,6 +2222,7 @@ static struct device_attribute *mce_device_attrs[] = { &dev_attr_dont_log_ce.attr, &dev_attr_ignore_ce.attr, &dev_attr_cmci_disabled.attr, + &dev_attr_bios_cmci_threshold.attr, NULL }; @@ -2294,38 +2343,33 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) unsigned int cpu = (unsigned long)hcpu; struct timer_list *t = &per_cpu(mce_timer, cpu); - switch (action) { + switch (action & ~CPU_TASKS_FROZEN) { case CPU_ONLINE: - case CPU_ONLINE_FROZEN: mce_device_create(cpu); if (threshold_cpu_callback) threshold_cpu_callback(action, cpu); break; case CPU_DEAD: - case CPU_DEAD_FROZEN: if (threshold_cpu_callback) threshold_cpu_callback(action, cpu); mce_device_remove(cpu); + mce_intel_hcpu_update(cpu); break; case CPU_DOWN_PREPARE: - case CPU_DOWN_PREPARE_FROZEN: - del_timer_sync(t); smp_call_function_single(cpu, mce_disable_cpu, &action, 1); + del_timer_sync(t); break; case CPU_DOWN_FAILED: - case CPU_DOWN_FAILED_FROZEN: - if (!mce_ignore_ce && check_interval) { - t->expires = round_jiffies(jiffies + - per_cpu(mce_next_interval, cpu)); - add_timer_on(t, cpu); - } smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); + mce_start_timer(cpu, t); break; - case CPU_POST_DEAD: + } + + if (action == CPU_POST_DEAD) { /* intentionally ignoring frozen here */ cmci_rediscover(cpu); - break; } + return NOTIFY_OK; } diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 38e49bc95ffc..5f88abf07e9c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -15,6 +15,8 @@ #include <asm/msr.h> #include <asm/mce.h> +#include "mce-internal.h" + /* * Support for Intel Correct Machine Check Interrupts. This allows * the CPU to raise an interrupt when a corrected machine check happened. @@ -30,7 +32,22 @@ static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); */ static DEFINE_RAW_SPINLOCK(cmci_discover_lock); -#define CMCI_THRESHOLD 1 +#define CMCI_THRESHOLD 1 +#define CMCI_POLL_INTERVAL (30 * HZ) +#define CMCI_STORM_INTERVAL (1 * HZ) +#define CMCI_STORM_THRESHOLD 15 + +static DEFINE_PER_CPU(unsigned long, cmci_time_stamp); +static DEFINE_PER_CPU(unsigned int, cmci_storm_cnt); +static DEFINE_PER_CPU(unsigned int, cmci_storm_state); + +enum { + CMCI_STORM_NONE, + CMCI_STORM_ACTIVE, + CMCI_STORM_SUBSIDED, +}; + +static atomic_t cmci_storm_on_cpus; static int cmci_supported(int *banks) { @@ -53,6 +70,93 @@ static int cmci_supported(int *banks) return !!(cap & MCG_CMCI_P); } +void mce_intel_cmci_poll(void) +{ + if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE) + return; + machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); +} + +void mce_intel_hcpu_update(unsigned long cpu) +{ + if (per_cpu(cmci_storm_state, cpu) == CMCI_STORM_ACTIVE) + atomic_dec(&cmci_storm_on_cpus); + + per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE; +} + +unsigned long mce_intel_adjust_timer(unsigned long interval) +{ + int r; + + if (interval < CMCI_POLL_INTERVAL) + return interval; + + switch (__this_cpu_read(cmci_storm_state)) { + case CMCI_STORM_ACTIVE: + /* + * We switch back to interrupt mode once the poll timer has + * silenced itself. That means no events recorded and the + * timer interval is back to our poll interval. + */ + __this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED); + r = atomic_sub_return(1, &cmci_storm_on_cpus); + if (r == 0) + pr_notice("CMCI storm subsided: switching to interrupt mode\n"); + /* FALLTHROUGH */ + + case CMCI_STORM_SUBSIDED: + /* + * We wait for all cpus to go back to SUBSIDED + * state. When that happens we switch back to + * interrupt mode. + */ + if (!atomic_read(&cmci_storm_on_cpus)) { + __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE); + cmci_reenable(); + cmci_recheck(); + } + return CMCI_POLL_INTERVAL; + default: + /* + * We have shiny weather. Let the poll do whatever it + * thinks. + */ + return interval; + } +} + +static bool cmci_storm_detect(void) +{ + unsigned int cnt = __this_cpu_read(cmci_storm_cnt); + unsigned long ts = __this_cpu_read(cmci_time_stamp); + unsigned long now = jiffies; + int r; + + if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE) + return true; + + if (time_before_eq(now, ts + CMCI_STORM_INTERVAL)) { + cnt++; + } else { + cnt = 1; + __this_cpu_write(cmci_time_stamp, now); + } + __this_cpu_write(cmci_storm_cnt, cnt); + + if (cnt <= CMCI_STORM_THRESHOLD) + return false; + + cmci_clear(); + __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE); + r = atomic_add_return(1, &cmci_storm_on_cpus); + mce_timer_kick(CMCI_POLL_INTERVAL); + + if (r == 1) + pr_notice("CMCI storm detected: switching to poll mode\n"); + return true; +} + /* * The interrupt handler. This is called on every event. * Just call the poller directly to log any events. @@ -61,33 +165,28 @@ static int cmci_supported(int *banks) */ static void intel_threshold_interrupt(void) { + if (cmci_storm_detect()) + return; machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); mce_notify_irq(); } -static void print_update(char *type, int *hdr, int num) -{ - if (*hdr == 0) - printk(KERN_INFO "CPU %d MCA banks", smp_processor_id()); - *hdr = 1; - printk(KERN_CONT " %s:%d", type, num); -} - /* * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks * on this CPU. Use the algorithm recommended in the SDM to discover shared * banks. */ -static void cmci_discover(int banks, int boot) +static void cmci_discover(int banks) { unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned); unsigned long flags; - int hdr = 0; int i; + int bios_wrong_thresh = 0; raw_spin_lock_irqsave(&cmci_discover_lock, flags); for (i = 0; i < banks; i++) { u64 val; + int bios_zero_thresh = 0; if (test_bit(i, owned)) continue; @@ -96,29 +195,52 @@ static void cmci_discover(int banks, int boot) /* Already owned by someone else? */ if (val & MCI_CTL2_CMCI_EN) { - if (test_and_clear_bit(i, owned) && !boot) - print_update("SHD", &hdr, i); + clear_bit(i, owned); __clear_bit(i, __get_cpu_var(mce_poll_banks)); continue; } - val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; - val |= MCI_CTL2_CMCI_EN | CMCI_THRESHOLD; + if (!mce_bios_cmci_threshold) { + val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; + val |= CMCI_THRESHOLD; + } else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) { + /* + * If bios_cmci_threshold boot option was specified + * but the threshold is zero, we'll try to initialize + * it to 1. + */ + bios_zero_thresh = 1; + val |= CMCI_THRESHOLD; + } + + val |= MCI_CTL2_CMCI_EN; wrmsrl(MSR_IA32_MCx_CTL2(i), val); rdmsrl(MSR_IA32_MCx_CTL2(i), val); /* Did the enable bit stick? -- the bank supports CMCI */ if (val & MCI_CTL2_CMCI_EN) { - if (!test_and_set_bit(i, owned) && !boot) - print_update("CMCI", &hdr, i); + set_bit(i, owned); __clear_bit(i, __get_cpu_var(mce_poll_banks)); + /* + * We are able to set thresholds for some banks that + * had a threshold of 0. This means the BIOS has not + * set the thresholds properly or does not work with + * this boot option. Note down now and report later. + */ + if (mce_bios_cmci_threshold && bios_zero_thresh && + (val & MCI_CTL2_CMCI_THRESHOLD_MASK)) + bios_wrong_thresh = 1; } else { WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); } } raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); - if (hdr) - printk(KERN_CONT "\n"); + if (mce_bios_cmci_threshold && bios_wrong_thresh) { + pr_info_once( + "bios_cmci_threshold: Some banks do not have valid thresholds set\n"); + pr_info_once( + "bios_cmci_threshold: Make sure your BIOS supports this boot option\n"); + } } /* @@ -156,7 +278,7 @@ void cmci_clear(void) continue; /* Disable CMCI */ rdmsrl(MSR_IA32_MCx_CTL2(i), val); - val &= ~(MCI_CTL2_CMCI_EN|MCI_CTL2_CMCI_THRESHOLD_MASK); + val &= ~MCI_CTL2_CMCI_EN; wrmsrl(MSR_IA32_MCx_CTL2(i), val); __clear_bit(i, __get_cpu_var(mce_banks_owned)); } @@ -186,7 +308,7 @@ void cmci_rediscover(int dying) continue; /* Recheck banks in case CPUs don't all have the same */ if (cmci_supported(&banks)) - cmci_discover(banks, 0); + cmci_discover(banks); } set_cpus_allowed_ptr(current, old); @@ -200,7 +322,7 @@ void cmci_reenable(void) { int banks; if (cmci_supported(&banks)) - cmci_discover(banks, 0); + cmci_discover(banks); } static void intel_init_cmci(void) @@ -211,7 +333,7 @@ static void intel_init_cmci(void) return; mce_threshold_vector = intel_threshold_interrupt; - cmci_discover(banks, 1); + cmci_discover(banks); /* * For CPU #0 this runs with still disabled APIC, but that's * ok because only the vector is set up. We still do another diff --git a/arch/x86/kernel/cpu/mkcapflags.pl b/arch/x86/kernel/cpu/mkcapflags.pl index c7b3fe2d72e0..091972ef49de 100644 --- a/arch/x86/kernel/cpu/mkcapflags.pl +++ b/arch/x86/kernel/cpu/mkcapflags.pl @@ -8,7 +8,10 @@ open(IN, "< $in\0") or die "$0: cannot open: $in: $!\n"; open(OUT, "> $out\0") or die "$0: cannot create: $out: $!\n"; -print OUT "#include <asm/cpufeature.h>\n\n"; +print OUT "#ifndef _ASM_X86_CPUFEATURE_H\n"; +print OUT "#include <asm/cpufeature.h>\n"; +print OUT "#endif\n"; +print OUT "\n"; print OUT "const char * const x86_cap_flags[NCAPINTS*32] = {\n"; %features = (); diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 6605a81ba339..8b6defe7eefc 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -586,6 +586,8 @@ extern struct event_constraint intel_westmere_pebs_event_constraints[]; extern struct event_constraint intel_snb_pebs_event_constraints[]; +extern struct event_constraint intel_ivb_pebs_event_constraints[]; + struct event_constraint *intel_pebs_constraints(struct perf_event *event); void intel_pmu_pebs_enable(struct perf_event *event); diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c index 7bfb5bec8630..eebd5ffe1bba 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c +++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c @@ -209,6 +209,15 @@ static int perf_ibs_precise_event(struct perf_event *event, u64 *config) return -EOPNOTSUPP; } +static const struct perf_event_attr ibs_notsupp = { + .exclude_user = 1, + .exclude_kernel = 1, + .exclude_hv = 1, + .exclude_idle = 1, + .exclude_host = 1, + .exclude_guest = 1, +}; + static int perf_ibs_init(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; @@ -229,6 +238,9 @@ static int perf_ibs_init(struct perf_event *event) if (event->pmu != &perf_ibs->pmu) return -ENOENT; + if (perf_flags(&event->attr) & perf_flags(&ibs_notsupp)) + return -EINVAL; + if (config & ~perf_ibs->config_mask) return -EINVAL; diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 7f2739e03e79..6bca492b8547 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -2008,6 +2008,7 @@ __init int intel_pmu_init(void) break; case 28: /* Atom */ + case 54: /* Cedariew */ memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -2047,7 +2048,6 @@ __init int intel_pmu_init(void) case 42: /* SandyBridge */ case 45: /* SandyBridge, "Romely-EP" */ x86_add_quirk(intel_sandybridge_quirk); - case 58: /* IvyBridge */ memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, @@ -2072,6 +2072,29 @@ __init int intel_pmu_init(void) pr_cont("SandyBridge events, "); break; + case 58: /* IvyBridge */ + memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, + sizeof(hw_cache_extra_regs)); + + intel_pmu_lbr_init_snb(); + + x86_pmu.event_constraints = intel_snb_event_constraints; + x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints; + x86_pmu.pebs_aliases = intel_pebs_aliases_snb; + x86_pmu.extra_regs = intel_snb_extra_regs; + /* all extra regs are per-cpu when HT is on */ + x86_pmu.er_flags |= ERF_HAS_RSP_1; + x86_pmu.er_flags |= ERF_NO_HT_SHARING; + + /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = + X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); + + pr_cont("IvyBridge events, "); + break; + default: switch (x86_pmu.version) { diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index e38d97bf4259..826054a4f2ee 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -407,6 +407,20 @@ struct event_constraint intel_snb_pebs_event_constraints[] = { EVENT_CONSTRAINT_END }; +struct event_constraint intel_ivb_pebs_event_constraints[] = { + INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ + INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ + INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ + INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ + EVENT_CONSTRAINT_END +}; + struct event_constraint *intel_pebs_constraints(struct perf_event *event) { struct event_constraint *c; diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index 520b4265fcd2..da02e9cc3754 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -686,7 +686,8 @@ void intel_pmu_lbr_init_atom(void) * to have an operational LBR which can freeze * on PMU interrupt */ - if (boot_cpu_data.x86_mask < 10) { + if (boot_cpu_data.x86_model == 28 + && boot_cpu_data.x86_mask < 10) { pr_cont("LBR disabled due to erratum"); return; } diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 0a5571080e74..99d96a4978b5 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -661,6 +661,11 @@ static void snb_uncore_msr_init_box(struct intel_uncore_box *box) } } +static struct uncore_event_desc snb_uncore_events[] = { + INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"), + { /* end: all zeroes */ }, +}; + static struct attribute *snb_uncore_formats_attr[] = { &format_attr_event.attr, &format_attr_umask.attr, @@ -704,6 +709,7 @@ static struct intel_uncore_type snb_uncore_cbox = { .constraints = snb_uncore_cbox_constraints, .ops = &snb_uncore_msr_ops, .format_group = &snb_uncore_format_group, + .event_descs = snb_uncore_events, }; static struct intel_uncore_type *snb_msr_uncores[] = { @@ -1944,7 +1950,7 @@ struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int cp static struct intel_uncore_box * uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) { - static struct intel_uncore_box *box; + struct intel_uncore_box *box; box = *per_cpu_ptr(pmu->box, cpu); if (box) @@ -2341,6 +2347,27 @@ int uncore_pmu_event_init(struct perf_event *event) return ret; } +static ssize_t uncore_get_attr_cpumask(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &uncore_cpu_mask); + + buf[n++] = '\n'; + buf[n] = '\0'; + return n; +} + +static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL); + +static struct attribute *uncore_pmu_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static struct attribute_group uncore_pmu_attr_group = { + .attrs = uncore_pmu_attrs, +}; + static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu) { int ret; @@ -2378,8 +2405,8 @@ static void __init uncore_type_exit(struct intel_uncore_type *type) free_percpu(type->pmus[i].box); kfree(type->pmus); type->pmus = NULL; - kfree(type->attr_groups[1]); - type->attr_groups[1] = NULL; + kfree(type->events_group); + type->events_group = NULL; } static void __init uncore_types_exit(struct intel_uncore_type **types) @@ -2431,9 +2458,10 @@ static int __init uncore_type_init(struct intel_uncore_type *type) for (j = 0; j < i; j++) attrs[j] = &type->event_descs[j].attr.attr; - type->attr_groups[1] = events_group; + type->events_group = events_group; } + type->pmu_group = &uncore_pmu_attr_group; type->pmus = pmus; return 0; fail: diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index 5b81c1856aac..e68a4550e952 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h @@ -369,10 +369,12 @@ struct intel_uncore_type { struct intel_uncore_pmu *pmus; struct intel_uncore_ops *ops; struct uncore_event_desc *event_descs; - const struct attribute_group *attr_groups[3]; + const struct attribute_group *attr_groups[4]; }; -#define format_group attr_groups[0] +#define pmu_group attr_groups[0] +#define format_group attr_groups[1] +#define events_group attr_groups[2] struct intel_uncore_ops { void (*init_box)(struct intel_uncore_box *); diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 8022c6681485..fbd895562292 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -140,10 +140,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) static void *c_start(struct seq_file *m, loff_t *pos) { - if (*pos == 0) /* just in case, cpu 0 is not the first */ - *pos = cpumask_first(cpu_online_mask); - else - *pos = cpumask_next(*pos - 1, cpu_online_mask); + *pos = cpumask_next(*pos - 1, cpu_online_mask); if ((*pos) < nr_cpu_ids) return &cpu_data(*pos); return NULL; diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 39472dd2323f..60c78917190c 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -199,12 +199,14 @@ static int __init cpuid_init(void) goto out_chrdev; } cpuid_class->devnode = cpuid_devnode; + get_online_cpus(); for_each_online_cpu(i) { err = cpuid_device_create(i); if (err != 0) goto out_class; } register_hotcpu_notifier(&cpuid_class_cpu_notifier); + put_online_cpus(); err = 0; goto out; @@ -214,6 +216,7 @@ out_class: for_each_online_cpu(i) { cpuid_device_destroy(i); } + put_online_cpus(); class_destroy(cpuid_class); out_chrdev: __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); @@ -225,11 +228,13 @@ static void __exit cpuid_exit(void) { int cpu = 0; + get_online_cpus(); for_each_online_cpu(cpu) cpuid_device_destroy(cpu); class_destroy(cpuid_class); __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); unregister_hotcpu_notifier(&cpuid_class_cpu_notifier); + put_online_cpus(); } module_init(cpuid_init); diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 3ae2ced4a874..b1581527a236 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -342,6 +342,47 @@ const struct irq_domain_ops ioapic_irq_domain_ops = { .xlate = ioapic_xlate, }; +static void dt_add_ioapic_domain(unsigned int ioapic_num, + struct device_node *np) +{ + struct irq_domain *id; + struct mp_ioapic_gsi *gsi_cfg; + int ret; + int num; + + gsi_cfg = mp_ioapic_gsi_routing(ioapic_num); + num = gsi_cfg->gsi_end - gsi_cfg->gsi_base + 1; + + id = irq_domain_add_linear(np, num, &ioapic_irq_domain_ops, + (void *)ioapic_num); + BUG_ON(!id); + if (gsi_cfg->gsi_base == 0) { + /* + * The first NR_IRQS_LEGACY irq descs are allocated in + * early_irq_init() and need just a mapping. The + * remaining irqs need both. All of them are preallocated + * and assigned so we can keep the 1:1 mapping which the ioapic + * is having. + */ + ret = irq_domain_associate_many(id, 0, 0, NR_IRQS_LEGACY); + if (ret) + pr_err("Error mapping legacy IRQs: %d\n", ret); + + if (num > NR_IRQS_LEGACY) { + ret = irq_create_strict_mappings(id, NR_IRQS_LEGACY, + NR_IRQS_LEGACY, num - NR_IRQS_LEGACY); + if (ret) + pr_err("Error creating mapping for the " + "remaining IRQs: %d\n", ret); + } + irq_set_default_host(id); + } else { + ret = irq_create_strict_mappings(id, gsi_cfg->gsi_base, 0, num); + if (ret) + pr_err("Error creating IRQ mapping: %d\n", ret); + } +} + static void __init ioapic_add_ofnode(struct device_node *np) { struct resource r; @@ -356,15 +397,7 @@ static void __init ioapic_add_ofnode(struct device_node *np) for (i = 0; i < nr_ioapics; i++) { if (r.start == mpc_ioapic_addr(i)) { - struct irq_domain *id; - struct mp_ioapic_gsi *gsi_cfg; - - gsi_cfg = mp_ioapic_gsi_routing(i); - - id = irq_domain_add_legacy(np, 32, gsi_cfg->gsi_base, 0, - &ioapic_irq_domain_ops, - (void*)i); - BUG_ON(!id); + dt_add_ioapic_domain(i, np); return; } } diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 623f28837476..0750e3ba87c0 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -57,6 +57,7 @@ #include <asm/cpufeature.h> #include <asm/alternative-asm.h> #include <asm/asm.h> +#include <asm/smap.h> /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ #include <linux/elf-em.h> @@ -407,7 +408,9 @@ sysenter_past_esp: */ cmpl $__PAGE_OFFSET-3,%ebp jae syscall_fault + ASM_STAC 1: movl (%ebp),%ebp + ASM_CLAC movl %ebp,PT_EBP(%esp) _ASM_EXTABLE(1b,syscall_fault) @@ -488,6 +491,7 @@ ENDPROC(ia32_sysenter_target) # system call handler stub ENTRY(system_call) RING0_INT_FRAME # can't unwind into user space anyway + ASM_CLAC pushl_cfi %eax # save orig_eax SAVE_ALL GET_THREAD_INFO(%ebp) @@ -670,6 +674,7 @@ END(syscall_exit_work) RING0_INT_FRAME # can't unwind into user space anyway syscall_fault: + ASM_CLAC GET_THREAD_INFO(%ebp) movl $-EFAULT,PT_EAX(%esp) jmp resume_userspace @@ -825,6 +830,7 @@ END(interrupt) */ .p2align CONFIG_X86_L1_CACHE_SHIFT common_interrupt: + ASM_CLAC addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */ SAVE_ALL TRACE_IRQS_OFF @@ -841,6 +847,7 @@ ENDPROC(common_interrupt) #define BUILD_INTERRUPT3(name, nr, fn) \ ENTRY(name) \ RING0_INT_FRAME; \ + ASM_CLAC; \ pushl_cfi $~(nr); \ SAVE_ALL; \ TRACE_IRQS_OFF \ @@ -857,6 +864,7 @@ ENDPROC(name) ENTRY(coprocessor_error) RING0_INT_FRAME + ASM_CLAC pushl_cfi $0 pushl_cfi $do_coprocessor_error jmp error_code @@ -865,6 +873,7 @@ END(coprocessor_error) ENTRY(simd_coprocessor_error) RING0_INT_FRAME + ASM_CLAC pushl_cfi $0 #ifdef CONFIG_X86_INVD_BUG /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */ @@ -886,6 +895,7 @@ END(simd_coprocessor_error) ENTRY(device_not_available) RING0_INT_FRAME + ASM_CLAC pushl_cfi $-1 # mark this as an int pushl_cfi $do_device_not_available jmp error_code @@ -906,6 +916,7 @@ END(native_irq_enable_sysexit) ENTRY(overflow) RING0_INT_FRAME + ASM_CLAC pushl_cfi $0 pushl_cfi $do_overflow jmp error_code @@ -914,6 +925,7 @@ END(overflow) ENTRY(bounds) RING0_INT_FRAME + ASM_CLAC pushl_cfi $0 pushl_cfi $do_bounds jmp error_code @@ -922,6 +934,7 @@ END(bounds) ENTRY(invalid_op) RING0_INT_FRAME + ASM_CLAC pushl_cfi $0 pushl_cfi $do_invalid_op jmp error_code @@ -930,6 +943,7 @@ END(invalid_op) ENTRY(coprocessor_segment_overrun) RING0_INT_FRAME + ASM_CLAC pushl_cfi $0 pushl_cfi $do_coprocessor_segment_overrun jmp error_code @@ -938,6 +952,7 @@ END(coprocessor_segment_overrun) ENTRY(invalid_TSS) RING0_EC_FRAME + ASM_CLAC pushl_cfi $do_invalid_TSS jmp error_code CFI_ENDPROC @@ -945,6 +960,7 @@ END(invalid_TSS) ENTRY(segment_not_present) RING0_EC_FRAME + ASM_CLAC pushl_cfi $do_segment_not_present jmp error_code CFI_ENDPROC @@ -952,6 +968,7 @@ END(segment_not_present) ENTRY(stack_segment) RING0_EC_FRAME + ASM_CLAC pushl_cfi $do_stack_segment jmp error_code CFI_ENDPROC @@ -959,6 +976,7 @@ END(stack_segment) ENTRY(alignment_check) RING0_EC_FRAME + ASM_CLAC pushl_cfi $do_alignment_check jmp error_code CFI_ENDPROC @@ -966,6 +984,7 @@ END(alignment_check) ENTRY(divide_error) RING0_INT_FRAME + ASM_CLAC pushl_cfi $0 # no error code pushl_cfi $do_divide_error jmp error_code @@ -975,6 +994,7 @@ END(divide_error) #ifdef CONFIG_X86_MCE ENTRY(machine_check) RING0_INT_FRAME + ASM_CLAC pushl_cfi $0 pushl_cfi machine_check_vector jmp error_code @@ -984,6 +1004,7 @@ END(machine_check) ENTRY(spurious_interrupt_bug) RING0_INT_FRAME + ASM_CLAC pushl_cfi $0 pushl_cfi $do_spurious_interrupt_bug jmp error_code @@ -1109,17 +1130,21 @@ ENTRY(ftrace_caller) pushl %eax pushl %ecx pushl %edx - movl 0xc(%esp), %eax + pushl $0 /* Pass NULL as regs pointer */ + movl 4*4(%esp), %eax movl 0x4(%ebp), %edx + leal function_trace_op, %ecx subl $MCOUNT_INSN_SIZE, %eax .globl ftrace_call ftrace_call: call ftrace_stub + addl $4,%esp /* skip NULL pointer */ popl %edx popl %ecx popl %eax +ftrace_ret: #ifdef CONFIG_FUNCTION_GRAPH_TRACER .globl ftrace_graph_call ftrace_graph_call: @@ -1131,6 +1156,71 @@ ftrace_stub: ret END(ftrace_caller) +ENTRY(ftrace_regs_caller) + pushf /* push flags before compare (in cs location) */ + cmpl $0, function_trace_stop + jne ftrace_restore_flags + + /* + * i386 does not save SS and ESP when coming from kernel. + * Instead, to get sp, ®s->sp is used (see ptrace.h). + * Unfortunately, that means eflags must be at the same location + * as the current return ip is. We move the return ip into the + * ip location, and move flags into the return ip location. + */ + pushl 4(%esp) /* save return ip into ip slot */ + + pushl $0 /* Load 0 into orig_ax */ + pushl %gs + pushl %fs + pushl %es + pushl %ds + pushl %eax + pushl %ebp + pushl %edi + pushl %esi + pushl %edx + pushl %ecx + pushl %ebx + + movl 13*4(%esp), %eax /* Get the saved flags */ + movl %eax, 14*4(%esp) /* Move saved flags into regs->flags location */ + /* clobbering return ip */ + movl $__KERNEL_CS,13*4(%esp) + + movl 12*4(%esp), %eax /* Load ip (1st parameter) */ + subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */ + movl 0x4(%ebp), %edx /* Load parent ip (2nd parameter) */ + leal function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */ + pushl %esp /* Save pt_regs as 4th parameter */ + +GLOBAL(ftrace_regs_call) + call ftrace_stub + + addl $4, %esp /* Skip pt_regs */ + movl 14*4(%esp), %eax /* Move flags back into cs */ + movl %eax, 13*4(%esp) /* Needed to keep addl from modifying flags */ + movl 12*4(%esp), %eax /* Get return ip from regs->ip */ + movl %eax, 14*4(%esp) /* Put return ip back for ret */ + + popl %ebx + popl %ecx + popl %edx + popl %esi + popl %edi + popl %ebp + popl %eax + popl %ds + popl %es + popl %fs + popl %gs + addl $8, %esp /* Skip orig_ax and ip */ + popf /* Pop flags at end (no addl to corrupt flags) */ + jmp ftrace_ret + +ftrace_restore_flags: + popf + jmp ftrace_stub #else /* ! CONFIG_DYNAMIC_FTRACE */ ENTRY(mcount) @@ -1171,9 +1261,6 @@ END(mcount) #ifdef CONFIG_FUNCTION_GRAPH_TRACER ENTRY(ftrace_graph_caller) - cmpl $0, function_trace_stop - jne ftrace_stub - pushl %eax pushl %ecx pushl %edx @@ -1207,6 +1294,7 @@ return_to_handler: ENTRY(page_fault) RING0_EC_FRAME + ASM_CLAC pushl_cfi $do_page_fault ALIGN error_code: @@ -1279,6 +1367,7 @@ END(page_fault) ENTRY(debug) RING0_INT_FRAME + ASM_CLAC cmpl $ia32_sysenter_target,(%esp) jne debug_stack_correct FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn @@ -1303,6 +1392,7 @@ END(debug) */ ENTRY(nmi) RING0_INT_FRAME + ASM_CLAC pushl_cfi %eax movl %ss, %eax cmpw $__ESPFIX_SS, %ax @@ -1373,6 +1463,7 @@ END(nmi) ENTRY(int3) RING0_INT_FRAME + ASM_CLAC pushl_cfi $-1 # mark this as an int SAVE_ALL TRACE_IRQS_OFF @@ -1393,6 +1484,7 @@ END(general_protection) #ifdef CONFIG_KVM_GUEST ENTRY(async_page_fault) RING0_EC_FRAME + ASM_CLAC pushl_cfi $do_async_page_fault jmp error_code CFI_ENDPROC diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 69babd8c834f..44531acd9a81 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -56,6 +56,8 @@ #include <asm/ftrace.h> #include <asm/percpu.h> #include <asm/asm.h> +#include <asm/rcu.h> +#include <asm/smap.h> #include <linux/err.h> /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ @@ -68,25 +70,51 @@ .section .entry.text, "ax" #ifdef CONFIG_FUNCTION_TRACER + +#ifdef CC_USING_FENTRY +# define function_hook __fentry__ +#else +# define function_hook mcount +#endif + #ifdef CONFIG_DYNAMIC_FTRACE -ENTRY(mcount) + +ENTRY(function_hook) retq -END(mcount) +END(function_hook) + +/* skip is set if stack has been adjusted */ +.macro ftrace_caller_setup skip=0 + MCOUNT_SAVE_FRAME \skip + + /* Load the ftrace_ops into the 3rd parameter */ + leaq function_trace_op, %rdx + + /* Load ip into the first parameter */ + movq RIP(%rsp), %rdi + subq $MCOUNT_INSN_SIZE, %rdi + /* Load the parent_ip into the second parameter */ +#ifdef CC_USING_FENTRY + movq SS+16(%rsp), %rsi +#else + movq 8(%rbp), %rsi +#endif +.endm ENTRY(ftrace_caller) + /* Check if tracing was disabled (quick check) */ cmpl $0, function_trace_stop jne ftrace_stub - MCOUNT_SAVE_FRAME - - movq 0x38(%rsp), %rdi - movq 8(%rbp), %rsi - subq $MCOUNT_INSN_SIZE, %rdi + ftrace_caller_setup + /* regs go into 4th parameter (but make it NULL) */ + movq $0, %rcx GLOBAL(ftrace_call) call ftrace_stub MCOUNT_RESTORE_FRAME +ftrace_return: #ifdef CONFIG_FUNCTION_GRAPH_TRACER GLOBAL(ftrace_graph_call) @@ -97,8 +125,78 @@ GLOBAL(ftrace_stub) retq END(ftrace_caller) +ENTRY(ftrace_regs_caller) + /* Save the current flags before compare (in SS location)*/ + pushfq + + /* Check if tracing was disabled (quick check) */ + cmpl $0, function_trace_stop + jne ftrace_restore_flags + + /* skip=8 to skip flags saved in SS */ + ftrace_caller_setup 8 + + /* Save the rest of pt_regs */ + movq %r15, R15(%rsp) + movq %r14, R14(%rsp) + movq %r13, R13(%rsp) + movq %r12, R12(%rsp) + movq %r11, R11(%rsp) + movq %r10, R10(%rsp) + movq %rbp, RBP(%rsp) + movq %rbx, RBX(%rsp) + /* Copy saved flags */ + movq SS(%rsp), %rcx + movq %rcx, EFLAGS(%rsp) + /* Kernel segments */ + movq $__KERNEL_DS, %rcx + movq %rcx, SS(%rsp) + movq $__KERNEL_CS, %rcx + movq %rcx, CS(%rsp) + /* Stack - skipping return address */ + leaq SS+16(%rsp), %rcx + movq %rcx, RSP(%rsp) + + /* regs go into 4th parameter */ + leaq (%rsp), %rcx + +GLOBAL(ftrace_regs_call) + call ftrace_stub + + /* Copy flags back to SS, to restore them */ + movq EFLAGS(%rsp), %rax + movq %rax, SS(%rsp) + + /* Handlers can change the RIP */ + movq RIP(%rsp), %rax + movq %rax, SS+8(%rsp) + + /* restore the rest of pt_regs */ + movq R15(%rsp), %r15 + movq R14(%rsp), %r14 + movq R13(%rsp), %r13 + movq R12(%rsp), %r12 + movq R10(%rsp), %r10 + movq RBP(%rsp), %rbp + movq RBX(%rsp), %rbx + + /* skip=8 to skip flags saved in SS */ + MCOUNT_RESTORE_FRAME 8 + + /* Restore flags */ + popfq + + jmp ftrace_return +ftrace_restore_flags: + popfq + jmp ftrace_stub + +END(ftrace_regs_caller) + + #else /* ! CONFIG_DYNAMIC_FTRACE */ -ENTRY(mcount) + +ENTRY(function_hook) cmpl $0, function_trace_stop jne ftrace_stub @@ -119,8 +217,12 @@ GLOBAL(ftrace_stub) trace: MCOUNT_SAVE_FRAME - movq 0x38(%rsp), %rdi + movq RIP(%rsp), %rdi +#ifdef CC_USING_FENTRY + movq SS+16(%rsp), %rsi +#else movq 8(%rbp), %rsi +#endif subq $MCOUNT_INSN_SIZE, %rdi call *ftrace_trace_function @@ -128,20 +230,22 @@ trace: MCOUNT_RESTORE_FRAME jmp ftrace_stub -END(mcount) +END(function_hook) #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER ENTRY(ftrace_graph_caller) - cmpl $0, function_trace_stop - jne ftrace_stub - MCOUNT_SAVE_FRAME +#ifdef CC_USING_FENTRY + leaq SS+16(%rsp), %rdi + movq $0, %rdx /* No framepointers needed */ +#else leaq 8(%rbp), %rdi - movq 0x38(%rsp), %rsi movq (%rbp), %rdx +#endif + movq RIP(%rsp), %rsi subq $MCOUNT_INSN_SIZE, %rsi call prepare_ftrace_return @@ -342,15 +446,15 @@ ENDPROC(native_usergs_sysret64) .macro SAVE_ARGS_IRQ cld /* start from rbp in pt_regs and jump over */ - movq_cfi rdi, RDI-RBP - movq_cfi rsi, RSI-RBP - movq_cfi rdx, RDX-RBP - movq_cfi rcx, RCX-RBP - movq_cfi rax, RAX-RBP - movq_cfi r8, R8-RBP - movq_cfi r9, R9-RBP - movq_cfi r10, R10-RBP - movq_cfi r11, R11-RBP + movq_cfi rdi, (RDI-RBP) + movq_cfi rsi, (RSI-RBP) + movq_cfi rdx, (RDX-RBP) + movq_cfi rcx, (RCX-RBP) + movq_cfi rax, (RAX-RBP) + movq_cfi r8, (R8-RBP) + movq_cfi r9, (R9-RBP) + movq_cfi r10, (R10-RBP) + movq_cfi r11, (R11-RBP) /* Save rbp so that we can unwind from get_irq_regs() */ movq_cfi rbp, 0 @@ -384,7 +488,7 @@ ENDPROC(native_usergs_sysret64) .endm ENTRY(save_rest) - PARTIAL_FRAME 1 REST_SKIP+8 + PARTIAL_FRAME 1 (REST_SKIP+8) movq 5*8+16(%rsp), %r11 /* save return address */ movq_cfi rbx, RBX+16 movq_cfi rbp, RBP+16 @@ -440,7 +544,7 @@ ENTRY(ret_from_fork) LOCK ; btr $TIF_FORK,TI_flags(%r8) - pushq_cfi kernel_eflags(%rip) + pushq_cfi $0x0002 popfq_cfi # reset kernel eflags call schedule_tail # rdi: 'prev' task parameter @@ -465,7 +569,8 @@ END(ret_from_fork) * System call entry. Up to 6 arguments in registers are supported. * * SYSCALL does not save anything on the stack and does not change the - * stack pointer. + * stack pointer. However, it does mask the flags register for us, so + * CLD and CLAC are not needed. */ /* @@ -565,7 +670,7 @@ sysret_careful: TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) pushq_cfi %rdi - call schedule + SCHEDULE_USER popq_cfi %rdi jmp sysret_check @@ -678,7 +783,7 @@ int_careful: TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) pushq_cfi %rdi - call schedule + SCHEDULE_USER popq_cfi %rdi DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF @@ -884,6 +989,7 @@ END(interrupt) */ .p2align CONFIG_X86_L1_CACHE_SHIFT common_interrupt: + ASM_CLAC XCPT_FRAME addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ interrupt do_IRQ @@ -974,7 +1080,7 @@ retint_careful: TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) pushq_cfi %rdi - call schedule + SCHEDULE_USER popq_cfi %rdi GET_THREAD_INFO(%rcx) DISABLE_INTERRUPTS(CLBR_NONE) @@ -1023,6 +1129,7 @@ END(common_interrupt) */ .macro apicinterrupt num sym do_sym ENTRY(\sym) + ASM_CLAC INTR_FRAME pushq_cfi $~(\num) .Lcommon_\sym: @@ -1077,6 +1184,7 @@ apicinterrupt IRQ_WORK_VECTOR \ */ .macro zeroentry sym do_sym ENTRY(\sym) + ASM_CLAC INTR_FRAME PARAVIRT_ADJUST_EXCEPTION_FRAME pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ @@ -1094,6 +1202,7 @@ END(\sym) .macro paranoidzeroentry sym do_sym ENTRY(\sym) + ASM_CLAC INTR_FRAME PARAVIRT_ADJUST_EXCEPTION_FRAME pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ @@ -1112,6 +1221,7 @@ END(\sym) #define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8) .macro paranoidzeroentry_ist sym do_sym ist ENTRY(\sym) + ASM_CLAC INTR_FRAME PARAVIRT_ADJUST_EXCEPTION_FRAME pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ @@ -1131,6 +1241,7 @@ END(\sym) .macro errorentry sym do_sym ENTRY(\sym) + ASM_CLAC XCPT_FRAME PARAVIRT_ADJUST_EXCEPTION_FRAME subq $ORIG_RAX-R15, %rsp @@ -1149,6 +1260,7 @@ END(\sym) /* error code is on the stack already */ .macro paranoiderrorentry sym do_sym ENTRY(\sym) + ASM_CLAC XCPT_FRAME PARAVIRT_ADJUST_EXCEPTION_FRAME subq $ORIG_RAX-R15, %rsp @@ -1449,7 +1561,7 @@ paranoid_userspace: paranoid_schedule: TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_ANY) - call schedule + SCHEDULE_USER DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF jmp paranoid_userspace diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index c3a7cb4bf6e6..1d414029f1d8 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -206,6 +206,21 @@ static int ftrace_modify_code(unsigned long ip, unsigned const char *old_code, unsigned const char *new_code); +/* + * Should never be called: + * As it is only called by __ftrace_replace_code() which is called by + * ftrace_replace_code() that x86 overrides, and by ftrace_update_code() + * which is called to turn mcount into nops or nops into function calls + * but not to convert a function from not using regs to one that uses + * regs, which ftrace_modify_call() is for. + */ +int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr) +{ + WARN_ON(1); + return -EINVAL; +} + int ftrace_update_ftrace_func(ftrace_func_t func) { unsigned long ip = (unsigned long)(&ftrace_call); @@ -220,6 +235,14 @@ int ftrace_update_ftrace_func(ftrace_func_t func) ret = ftrace_modify_code(ip, old, new); + /* Also update the regs callback function */ + if (!ret) { + ip = (unsigned long)(&ftrace_regs_call); + memcpy(old, &ftrace_regs_call, MCOUNT_INSN_SIZE); + new = ftrace_call_replace(ip, (unsigned long)func); + ret = ftrace_modify_code(ip, old, new); + } + atomic_dec(&modifying_ftrace_code); return ret; @@ -299,6 +322,32 @@ static int add_brk_on_nop(struct dyn_ftrace *rec) return add_break(rec->ip, old); } +/* + * If the record has the FTRACE_FL_REGS set, that means that it + * wants to convert to a callback that saves all regs. If FTRACE_FL_REGS + * is not not set, then it wants to convert to the normal callback. + */ +static unsigned long get_ftrace_addr(struct dyn_ftrace *rec) +{ + if (rec->flags & FTRACE_FL_REGS) + return (unsigned long)FTRACE_REGS_ADDR; + else + return (unsigned long)FTRACE_ADDR; +} + +/* + * The FTRACE_FL_REGS_EN is set when the record already points to + * a function that saves all the regs. Basically the '_EN' version + * represents the current state of the function. + */ +static unsigned long get_ftrace_old_addr(struct dyn_ftrace *rec) +{ + if (rec->flags & FTRACE_FL_REGS_EN) + return (unsigned long)FTRACE_REGS_ADDR; + else + return (unsigned long)FTRACE_ADDR; +} + static int add_breakpoints(struct dyn_ftrace *rec, int enable) { unsigned long ftrace_addr; @@ -306,7 +355,7 @@ static int add_breakpoints(struct dyn_ftrace *rec, int enable) ret = ftrace_test_record(rec, enable); - ftrace_addr = (unsigned long)FTRACE_ADDR; + ftrace_addr = get_ftrace_addr(rec); switch (ret) { case FTRACE_UPDATE_IGNORE: @@ -316,6 +365,10 @@ static int add_breakpoints(struct dyn_ftrace *rec, int enable) /* converting nop to call */ return add_brk_on_nop(rec); + case FTRACE_UPDATE_MODIFY_CALL_REGS: + case FTRACE_UPDATE_MODIFY_CALL: + ftrace_addr = get_ftrace_old_addr(rec); + /* fall through */ case FTRACE_UPDATE_MAKE_NOP: /* converting a call to a nop */ return add_brk_on_call(rec, ftrace_addr); @@ -360,13 +413,21 @@ static int remove_breakpoint(struct dyn_ftrace *rec) * If not, don't touch the breakpoint, we make just create * a disaster. */ - ftrace_addr = (unsigned long)FTRACE_ADDR; + ftrace_addr = get_ftrace_addr(rec); + nop = ftrace_call_replace(ip, ftrace_addr); + + if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) == 0) + goto update; + + /* Check both ftrace_addr and ftrace_old_addr */ + ftrace_addr = get_ftrace_old_addr(rec); nop = ftrace_call_replace(ip, ftrace_addr); if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) return -EINVAL; } + update: return probe_kernel_write((void *)ip, &nop[0], 1); } @@ -405,12 +466,14 @@ static int add_update(struct dyn_ftrace *rec, int enable) ret = ftrace_test_record(rec, enable); - ftrace_addr = (unsigned long)FTRACE_ADDR; + ftrace_addr = get_ftrace_addr(rec); switch (ret) { case FTRACE_UPDATE_IGNORE: return 0; + case FTRACE_UPDATE_MODIFY_CALL_REGS: + case FTRACE_UPDATE_MODIFY_CALL: case FTRACE_UPDATE_MAKE_CALL: /* converting nop to call */ return add_update_call(rec, ftrace_addr); @@ -455,12 +518,14 @@ static int finish_update(struct dyn_ftrace *rec, int enable) ret = ftrace_update_record(rec, enable); - ftrace_addr = (unsigned long)FTRACE_ADDR; + ftrace_addr = get_ftrace_addr(rec); switch (ret) { case FTRACE_UPDATE_IGNORE: return 0; + case FTRACE_UPDATE_MODIFY_CALL_REGS: + case FTRACE_UPDATE_MODIFY_CALL: case FTRACE_UPDATE_MAKE_CALL: /* converting nop to call */ return finish_update_call(rec, ftrace_addr); diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index d42ab17b7397..957a47aec64e 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -287,27 +287,28 @@ ENTRY(startup_32_smp) leal -__PAGE_OFFSET(%ecx),%esp default_entry: - /* * New page tables may be in 4Mbyte page mode and may * be using the global pages. * * NOTE! If we are on a 486 we may have no cr4 at all! - * So we do not try to touch it unless we really have - * some bits in it to set. This won't work if the BSP - * implements cr4 but this AP does not -- very unlikely - * but be warned! The same applies to the pse feature - * if not equally supported. --macro - * - * NOTE! We have to correct for the fact that we're - * not yet offset PAGE_OFFSET.. + * Specifically, cr4 exists if and only if CPUID exists, + * which in turn exists if and only if EFLAGS.ID exists. */ -#define cr4_bits pa(mmu_cr4_features) - movl cr4_bits,%edx - andl %edx,%edx - jz 6f - movl %cr4,%eax # Turn on paging options (PSE,PAE,..) - orl %edx,%eax + movl $X86_EFLAGS_ID,%ecx + pushl %ecx + popfl + pushfl + popl %eax + pushl $0 + popfl + pushfl + popl %edx + xorl %edx,%eax + testl %ecx,%eax + jz 6f # No ID flag = no CPUID = no CR4 + + movl pa(mmu_cr4_features),%eax movl %eax,%cr4 testb $X86_CR4_PAE, %al # check if PAE is enabled diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index f250431fb505..675a05012449 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -19,24 +19,17 @@ #include <asm/fpu-internal.h> #include <asm/user.h> -#ifdef CONFIG_X86_64 -# include <asm/sigcontext32.h> -# include <asm/user32.h> -#else -# define save_i387_xstate_ia32 save_i387_xstate -# define restore_i387_xstate_ia32 restore_i387_xstate -# define _fpstate_ia32 _fpstate -# define _xstate_ia32 _xstate -# define sig_xstate_ia32_size sig_xstate_size -# define fx_sw_reserved_ia32 fx_sw_reserved -# define user_i387_ia32_struct user_i387_struct -# define user32_fxsr_struct user_fxsr_struct -#endif - /* * Were we in an interrupt that interrupted kernel mode? * - * We can do a kernel_fpu_begin/end() pair *ONLY* if that + * For now, with eagerfpu we will return interrupted kernel FPU + * state as not-idle. TBD: Ideally we can change the return value + * to something like __thread_has_fpu(current). But we need to + * be careful of doing __thread_clear_has_fpu() before saving + * the FPU etc for supporting nested uses etc. For now, take + * the simple route! + * + * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that * pair does nothing at all: the thread must not have fpu (so * that we don't try to save the FPU state), and TS must * be set (so that the clts/stts pair does nothing that is @@ -44,6 +37,9 @@ */ static inline bool interrupted_kernel_fpu_idle(void) { + if (use_eager_fpu()) + return 0; + return !__thread_has_fpu(current) && (read_cr0() & X86_CR0_TS); } @@ -77,29 +73,29 @@ bool irq_fpu_usable(void) } EXPORT_SYMBOL(irq_fpu_usable); -void kernel_fpu_begin(void) +void __kernel_fpu_begin(void) { struct task_struct *me = current; - WARN_ON_ONCE(!irq_fpu_usable()); - preempt_disable(); if (__thread_has_fpu(me)) { __save_init_fpu(me); __thread_clear_has_fpu(me); - /* We do 'stts()' in kernel_fpu_end() */ - } else { + /* We do 'stts()' in __kernel_fpu_end() */ + } else if (!use_eager_fpu()) { this_cpu_write(fpu_owner_task, NULL); clts(); } } -EXPORT_SYMBOL(kernel_fpu_begin); +EXPORT_SYMBOL(__kernel_fpu_begin); -void kernel_fpu_end(void) +void __kernel_fpu_end(void) { - stts(); - preempt_enable(); + if (use_eager_fpu()) + math_state_restore(); + else + stts(); } -EXPORT_SYMBOL(kernel_fpu_end); +EXPORT_SYMBOL(__kernel_fpu_end); void unlazy_fpu(struct task_struct *tsk) { @@ -113,23 +109,15 @@ void unlazy_fpu(struct task_struct *tsk) } EXPORT_SYMBOL(unlazy_fpu); -#ifdef CONFIG_MATH_EMULATION -# define HAVE_HWFP (boot_cpu_data.hard_math) -#else -# define HAVE_HWFP 1 -#endif - -static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; +unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; unsigned int xstate_size; EXPORT_SYMBOL_GPL(xstate_size); -unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32); static struct i387_fxsave_struct fx_scratch __cpuinitdata; static void __cpuinit mxcsr_feature_mask_init(void) { unsigned long mask = 0; - clts(); if (cpu_has_fxsr) { memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct)); asm volatile("fxsave %0" : : "m" (fx_scratch)); @@ -138,7 +126,6 @@ static void __cpuinit mxcsr_feature_mask_init(void) mask = 0x0000ffbf; } mxcsr_feature_mask &= mask; - stts(); } static void __cpuinit init_thread_xstate(void) @@ -192,9 +179,8 @@ void __cpuinit fpu_init(void) init_thread_xstate(); mxcsr_feature_mask_init(); - /* clean state in init */ - current_thread_info()->status = 0; - clear_used_math(); + xsave_init(); + eager_fpu_init(); } void fpu_finit(struct fpu *fpu) @@ -205,12 +191,7 @@ void fpu_finit(struct fpu *fpu) } if (cpu_has_fxsr) { - struct i387_fxsave_struct *fx = &fpu->state->fxsave; - - memset(fx, 0, xstate_size); - fx->cwd = 0x37f; - if (cpu_has_xmm) - fx->mxcsr = MXCSR_DEFAULT; + fx_finit(&fpu->state->fxsave); } else { struct i387_fsave_struct *fp = &fpu->state->fsave; memset(fp, 0, xstate_size); @@ -454,7 +435,7 @@ static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave) * FXSR floating point environment conversions. */ -static void +void convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) { struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; @@ -491,8 +472,8 @@ convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) memcpy(&to[i], &from[i], sizeof(to[0])); } -static void convert_to_fxsr(struct task_struct *tsk, - const struct user_i387_ia32_struct *env) +void convert_to_fxsr(struct task_struct *tsk, + const struct user_i387_ia32_struct *env) { struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; @@ -589,223 +570,6 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, } /* - * Signal frame handlers. - */ - -static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) -{ - struct task_struct *tsk = current; - struct i387_fsave_struct *fp = &tsk->thread.fpu.state->fsave; - - fp->status = fp->swd; - if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct))) - return -1; - return 1; -} - -static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) -{ - struct task_struct *tsk = current; - struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave; - struct user_i387_ia32_struct env; - int err = 0; - - convert_from_fxsr(&env, tsk); - if (__copy_to_user(buf, &env, sizeof(env))) - return -1; - - err |= __put_user(fx->swd, &buf->status); - err |= __put_user(X86_FXSR_MAGIC, &buf->magic); - if (err) - return -1; - - if (__copy_to_user(&buf->_fxsr_env[0], fx, xstate_size)) - return -1; - return 1; -} - -static int save_i387_xsave(void __user *buf) -{ - struct task_struct *tsk = current; - struct _fpstate_ia32 __user *fx = buf; - int err = 0; - - - sanitize_i387_state(tsk); - - /* - * For legacy compatible, we always set FP/SSE bits in the bit - * vector while saving the state to the user context. - * This will enable us capturing any changes(during sigreturn) to - * the FP/SSE bits by the legacy applications which don't touch - * xstate_bv in the xsave header. - * - * xsave aware applications can change the xstate_bv in the xsave - * header as well as change any contents in the memory layout. - * xrestore as part of sigreturn will capture all the changes. - */ - tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE; - - if (save_i387_fxsave(fx) < 0) - return -1; - - err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved_ia32, - sizeof(struct _fpx_sw_bytes)); - err |= __put_user(FP_XSTATE_MAGIC2, - (__u32 __user *) (buf + sig_xstate_ia32_size - - FP_XSTATE_MAGIC2_SIZE)); - if (err) - return -1; - - return 1; -} - -int save_i387_xstate_ia32(void __user *buf) -{ - struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf; - struct task_struct *tsk = current; - - if (!used_math()) - return 0; - - if (!access_ok(VERIFY_WRITE, buf, sig_xstate_ia32_size)) - return -EACCES; - /* - * This will cause a "finit" to be triggered by the next - * attempted FPU operation by the 'current' process. - */ - clear_used_math(); - - if (!HAVE_HWFP) { - return fpregs_soft_get(current, NULL, - 0, sizeof(struct user_i387_ia32_struct), - NULL, fp) ? -1 : 1; - } - - unlazy_fpu(tsk); - - if (cpu_has_xsave) - return save_i387_xsave(fp); - if (cpu_has_fxsr) - return save_i387_fxsave(fp); - else - return save_i387_fsave(fp); -} - -static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf) -{ - struct task_struct *tsk = current; - - return __copy_from_user(&tsk->thread.fpu.state->fsave, buf, - sizeof(struct i387_fsave_struct)); -} - -static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf, - unsigned int size) -{ - struct task_struct *tsk = current; - struct user_i387_ia32_struct env; - int err; - - err = __copy_from_user(&tsk->thread.fpu.state->fxsave, &buf->_fxsr_env[0], - size); - /* mxcsr reserved bits must be masked to zero for security reasons */ - tsk->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask; - if (err || __copy_from_user(&env, buf, sizeof(env))) - return 1; - convert_to_fxsr(tsk, &env); - - return 0; -} - -static int restore_i387_xsave(void __user *buf) -{ - struct _fpx_sw_bytes fx_sw_user; - struct _fpstate_ia32 __user *fx_user = - ((struct _fpstate_ia32 __user *) buf); - struct i387_fxsave_struct __user *fx = - (struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0]; - struct xsave_hdr_struct *xsave_hdr = - ¤t->thread.fpu.state->xsave.xsave_hdr; - u64 mask; - int err; - - if (check_for_xstate(fx, buf, &fx_sw_user)) - goto fx_only; - - mask = fx_sw_user.xstate_bv; - - err = restore_i387_fxsave(buf, fx_sw_user.xstate_size); - - xsave_hdr->xstate_bv &= pcntxt_mask; - /* - * These bits must be zero. - */ - xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0; - - /* - * Init the state that is not present in the memory layout - * and enabled by the OS. - */ - mask = ~(pcntxt_mask & ~mask); - xsave_hdr->xstate_bv &= mask; - - return err; -fx_only: - /* - * Couldn't find the extended state information in the memory - * layout. Restore the FP/SSE and init the other extended state - * enabled by the OS. - */ - xsave_hdr->xstate_bv = XSTATE_FPSSE; - return restore_i387_fxsave(buf, sizeof(struct i387_fxsave_struct)); -} - -int restore_i387_xstate_ia32(void __user *buf) -{ - int err; - struct task_struct *tsk = current; - struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf; - - if (HAVE_HWFP) - clear_fpu(tsk); - - if (!buf) { - if (used_math()) { - clear_fpu(tsk); - clear_used_math(); - } - - return 0; - } else - if (!access_ok(VERIFY_READ, buf, sig_xstate_ia32_size)) - return -EACCES; - - if (!used_math()) { - err = init_fpu(tsk); - if (err) - return err; - } - - if (HAVE_HWFP) { - if (cpu_has_xsave) - err = restore_i387_xsave(buf); - else if (cpu_has_fxsr) - err = restore_i387_fxsave(fp, sizeof(struct - i387_fxsave_struct)); - else - err = restore_i387_fsave(fp); - } else { - err = fpregs_soft_set(current, NULL, - 0, sizeof(struct user_i387_ia32_struct), - NULL, fp) != 0; - } - set_used_math(); - - return err; -} - -/* * FPU state for core dumps. * This is only used for a.out dumps now. * It is declared generically using elf_fpregset_t (which is diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 36d1853e91af..9a5c460404dc 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -263,7 +263,7 @@ static void i8259A_shutdown(void) * out of. */ outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ - outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */ + outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ } static struct syscore_ops i8259_syscore_ops = { diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index d44f7829968e..e4595f105910 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -92,7 +92,8 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_printf(p, " Rescheduling interrupts\n"); seq_printf(p, "%*s: ", prec, "CAL"); for_each_online_cpu(j) - seq_printf(p, "%10u ", irq_stats(j)->irq_call_count); + seq_printf(p, "%10u ", irq_stats(j)->irq_call_count - + irq_stats(j)->irq_tlb_count); seq_printf(p, " Function call interrupts\n"); seq_printf(p, "%*s: ", prec, "TLB"); for_each_online_cpu(j) @@ -147,7 +148,6 @@ u64 arch_irq_stat_cpu(unsigned int cpu) #ifdef CONFIG_SMP sum += irq_stats(cpu)->irq_resched_count; sum += irq_stats(cpu)->irq_call_count; - sum += irq_stats(cpu)->irq_tlb_count; #endif #ifdef CONFIG_X86_THERMAL_VECTOR sum += irq_stats(cpu)->irq_thermal_count; diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index e2f751efb7b1..57916c0d3cf6 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -541,6 +541,23 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb return 1; } +#ifdef KPROBES_CAN_USE_FTRACE +static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) +{ + /* + * Emulate singlestep (and also recover regs->ip) + * as if there is a 5byte nop + */ + regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE; + if (unlikely(p->post_handler)) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + p->post_handler(p, regs, 0); + } + __this_cpu_write(current_kprobe, NULL); +} +#endif + /* * Interrupts are disabled on entry as trap3 is an interrupt gate and they * remain disabled throughout this function. @@ -599,6 +616,12 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) } else if (kprobe_running()) { p = __this_cpu_read(current_kprobe); if (p->break_handler && p->break_handler(p, regs)) { +#ifdef KPROBES_CAN_USE_FTRACE + if (kprobe_ftrace(p)) { + skip_singlestep(p, regs, kcb); + return 1; + } +#endif setup_singlestep(p, regs, kcb, 0); return 1; } @@ -1052,6 +1075,50 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) return 0; } +#ifdef KPROBES_CAN_USE_FTRACE +/* Ftrace callback handler for kprobes */ +void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *ops, struct pt_regs *regs) +{ + struct kprobe *p; + struct kprobe_ctlblk *kcb; + unsigned long flags; + + /* Disable irq for emulating a breakpoint and avoiding preempt */ + local_irq_save(flags); + + p = get_kprobe((kprobe_opcode_t *)ip); + if (unlikely(!p) || kprobe_disabled(p)) + goto end; + + kcb = get_kprobe_ctlblk(); + if (kprobe_running()) { + kprobes_inc_nmissed_count(p); + } else { + /* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */ + regs->ip = ip + sizeof(kprobe_opcode_t); + + __this_cpu_write(current_kprobe, p); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + if (!p->pre_handler || !p->pre_handler(p, regs)) + skip_singlestep(p, regs, kcb); + /* + * If pre_handler returns !0, it sets regs->ip and + * resets current kprobe. + */ + } +end: + local_irq_restore(flags); +} + +int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p) +{ + p->ainsn.insn = NULL; + p->ainsn.boostable = -1; + return 0; +} +#endif + int __init arch_init_kprobes(void) { return arch_init_optprobes(); diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 82746f942cd8..7720ff5a9ee2 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -75,20 +75,113 @@ struct microcode_amd { static struct equiv_cpu_entry *equiv_cpu_table; -/* page-sized ucode patch buffer */ -void *patch; +struct ucode_patch { + struct list_head plist; + void *data; + u32 patch_id; + u16 equiv_cpu; +}; + +static LIST_HEAD(pcache); + +static u16 find_equiv_id(unsigned int cpu) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + int i = 0; + + if (!equiv_cpu_table) + return 0; + + while (equiv_cpu_table[i].installed_cpu != 0) { + if (uci->cpu_sig.sig == equiv_cpu_table[i].installed_cpu) + return equiv_cpu_table[i].equiv_cpu; + + i++; + } + return 0; +} + +static u32 find_cpu_family_by_equiv_cpu(u16 equiv_cpu) +{ + int i = 0; + + BUG_ON(!equiv_cpu_table); + + while (equiv_cpu_table[i].equiv_cpu != 0) { + if (equiv_cpu == equiv_cpu_table[i].equiv_cpu) + return equiv_cpu_table[i].installed_cpu; + i++; + } + return 0; +} + +/* + * a small, trivial cache of per-family ucode patches + */ +static struct ucode_patch *cache_find_patch(u16 equiv_cpu) +{ + struct ucode_patch *p; + + list_for_each_entry(p, &pcache, plist) + if (p->equiv_cpu == equiv_cpu) + return p; + return NULL; +} + +static void update_cache(struct ucode_patch *new_patch) +{ + struct ucode_patch *p; + + list_for_each_entry(p, &pcache, plist) { + if (p->equiv_cpu == new_patch->equiv_cpu) { + if (p->patch_id >= new_patch->patch_id) + /* we already have the latest patch */ + return; + + list_replace(&p->plist, &new_patch->plist); + kfree(p->data); + kfree(p); + return; + } + } + /* no patch found, add it */ + list_add_tail(&new_patch->plist, &pcache); +} + +static void free_cache(void) +{ + struct ucode_patch *p, *tmp; + + list_for_each_entry_safe(p, tmp, &pcache, plist) { + __list_del(p->plist.prev, p->plist.next); + kfree(p->data); + kfree(p); + } +} + +static struct ucode_patch *find_patch(unsigned int cpu) +{ + u16 equiv_id; + + equiv_id = find_equiv_id(cpu); + if (!equiv_id) + return NULL; + + return cache_find_patch(equiv_id); +} static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) { struct cpuinfo_x86 *c = &cpu_data(cpu); + csig->sig = cpuid_eax(0x00000001); csig->rev = c->microcode; pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev); return 0; } -static unsigned int verify_ucode_size(int cpu, u32 patch_size, +static unsigned int verify_patch_size(int cpu, u32 patch_size, unsigned int size) { struct cpuinfo_x86 *c = &cpu_data(cpu); @@ -118,95 +211,37 @@ static unsigned int verify_ucode_size(int cpu, u32 patch_size, return patch_size; } -static u16 find_equiv_id(void) +static int apply_microcode_amd(int cpu) { - unsigned int current_cpu_id, i = 0; - - BUG_ON(equiv_cpu_table == NULL); - - current_cpu_id = cpuid_eax(0x00000001); - - while (equiv_cpu_table[i].installed_cpu != 0) { - if (current_cpu_id == equiv_cpu_table[i].installed_cpu) - return equiv_cpu_table[i].equiv_cpu; - - i++; - } - return 0; -} + struct cpuinfo_x86 *c = &cpu_data(cpu); + struct microcode_amd *mc_amd; + struct ucode_cpu_info *uci; + struct ucode_patch *p; + u32 rev, dummy; -/* - * we signal a good patch is found by returning its size > 0 - */ -static int get_matching_microcode(int cpu, const u8 *ucode_ptr, - unsigned int leftover_size, int rev, - unsigned int *current_size) -{ - struct microcode_header_amd *mc_hdr; - unsigned int actual_size, patch_size; - u16 equiv_cpu_id; + BUG_ON(raw_smp_processor_id() != cpu); - /* size of the current patch we're staring at */ - patch_size = *(u32 *)(ucode_ptr + 4); - *current_size = patch_size + SECTION_HDR_SIZE; + uci = ucode_cpu_info + cpu; - equiv_cpu_id = find_equiv_id(); - if (!equiv_cpu_id) + p = find_patch(cpu); + if (!p) return 0; - /* - * let's look at the patch header itself now - */ - mc_hdr = (struct microcode_header_amd *)(ucode_ptr + SECTION_HDR_SIZE); + mc_amd = p->data; + uci->mc = p->data; - if (mc_hdr->processor_rev_id != equiv_cpu_id) - return 0; + rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); - /* ucode might be chipset specific -- currently we don't support this */ - if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) { - pr_err("CPU%d: chipset specific code not yet supported\n", - cpu); + /* need to apply patch? */ + if (rev >= mc_amd->hdr.patch_id) { + c->microcode = rev; return 0; } - if (mc_hdr->patch_id <= rev) - return 0; - - /* - * now that the header looks sane, verify its size - */ - actual_size = verify_ucode_size(cpu, patch_size, leftover_size); - if (!actual_size) - return 0; - - /* clear the patch buffer */ - memset(patch, 0, PAGE_SIZE); - - /* all looks ok, get the binary patch */ - get_ucode_data(patch, ucode_ptr + SECTION_HDR_SIZE, actual_size); - - return actual_size; -} - -static int apply_microcode_amd(int cpu) -{ - u32 rev, dummy; - int cpu_num = raw_smp_processor_id(); - struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; - struct microcode_amd *mc_amd = uci->mc; - struct cpuinfo_x86 *c = &cpu_data(cpu); - - /* We should bind the task to the CPU */ - BUG_ON(cpu_num != cpu); - - if (mc_amd == NULL) - return 0; - wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code); - /* get patch id after patching */ - rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); - /* check current patch id and patch's id for match */ + /* verify patch application was successful */ + rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); if (rev != mc_amd->hdr.patch_id) { pr_err("CPU%d: update failed for patch_level=0x%08x\n", cpu, mc_amd->hdr.patch_id); @@ -238,7 +273,7 @@ static int install_equiv_cpu_table(const u8 *buf) return -ENOMEM; } - get_ucode_data(equiv_cpu_table, buf + CONTAINER_HDR_SZ, size); + memcpy(equiv_cpu_table, buf + CONTAINER_HDR_SZ, size); /* add header length */ return size + CONTAINER_HDR_SZ; @@ -250,61 +285,113 @@ static void free_equiv_cpu_table(void) equiv_cpu_table = NULL; } -static enum ucode_state -generic_load_microcode(int cpu, const u8 *data, size_t size) +static void cleanup(void) { - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - struct microcode_header_amd *mc_hdr = NULL; - unsigned int mc_size, leftover, current_size = 0; + free_equiv_cpu_table(); + free_cache(); +} + +/* + * We return the current size even if some of the checks failed so that + * we can skip over the next patch. If we return a negative value, we + * signal a grave error like a memory allocation has failed and the + * driver cannot continue functioning normally. In such cases, we tear + * down everything we've used up so far and exit. + */ +static int verify_and_add_patch(unsigned int cpu, u8 *fw, unsigned int leftover) +{ + struct cpuinfo_x86 *c = &cpu_data(cpu); + struct microcode_header_amd *mc_hdr; + struct ucode_patch *patch; + unsigned int patch_size, crnt_size, ret; + u32 proc_fam; + u16 proc_id; + + patch_size = *(u32 *)(fw + 4); + crnt_size = patch_size + SECTION_HDR_SIZE; + mc_hdr = (struct microcode_header_amd *)(fw + SECTION_HDR_SIZE); + proc_id = mc_hdr->processor_rev_id; + + proc_fam = find_cpu_family_by_equiv_cpu(proc_id); + if (!proc_fam) { + pr_err("No patch family for equiv ID: 0x%04x\n", proc_id); + return crnt_size; + } + + /* check if patch is for the current family */ + proc_fam = ((proc_fam >> 8) & 0xf) + ((proc_fam >> 20) & 0xff); + if (proc_fam != c->x86) + return crnt_size; + + if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) { + pr_err("Patch-ID 0x%08x: chipset-specific code unsupported.\n", + mc_hdr->patch_id); + return crnt_size; + } + + ret = verify_patch_size(cpu, patch_size, leftover); + if (!ret) { + pr_err("Patch-ID 0x%08x: size mismatch.\n", mc_hdr->patch_id); + return crnt_size; + } + + patch = kzalloc(sizeof(*patch), GFP_KERNEL); + if (!patch) { + pr_err("Patch allocation failure.\n"); + return -EINVAL; + } + + patch->data = kzalloc(patch_size, GFP_KERNEL); + if (!patch->data) { + pr_err("Patch data allocation failure.\n"); + kfree(patch); + return -EINVAL; + } + + /* All looks ok, copy patch... */ + memcpy(patch->data, fw + SECTION_HDR_SIZE, patch_size); + INIT_LIST_HEAD(&patch->plist); + patch->patch_id = mc_hdr->patch_id; + patch->equiv_cpu = proc_id; + + /* ... and add to cache. */ + update_cache(patch); + + return crnt_size; +} + +static enum ucode_state load_microcode_amd(int cpu, const u8 *data, size_t size) +{ + enum ucode_state ret = UCODE_ERROR; + unsigned int leftover; + u8 *fw = (u8 *)data; + int crnt_size = 0; int offset; - const u8 *ucode_ptr = data; - void *new_mc = NULL; - unsigned int new_rev = uci->cpu_sig.rev; - enum ucode_state state = UCODE_ERROR; - offset = install_equiv_cpu_table(ucode_ptr); + offset = install_equiv_cpu_table(data); if (offset < 0) { pr_err("failed to create equivalent cpu table\n"); - goto out; + return ret; } - ucode_ptr += offset; + fw += offset; leftover = size - offset; - if (*(u32 *)ucode_ptr != UCODE_UCODE_TYPE) { + if (*(u32 *)fw != UCODE_UCODE_TYPE) { pr_err("invalid type field in container file section header\n"); - goto free_table; + free_equiv_cpu_table(); + return ret; } while (leftover) { - mc_size = get_matching_microcode(cpu, ucode_ptr, leftover, - new_rev, ¤t_size); - if (mc_size) { - mc_hdr = patch; - new_mc = patch; - new_rev = mc_hdr->patch_id; - goto out_ok; - } - - ucode_ptr += current_size; - leftover -= current_size; - } + crnt_size = verify_and_add_patch(cpu, fw, leftover); + if (crnt_size < 0) + return ret; - if (!new_mc) { - state = UCODE_NFOUND; - goto free_table; + fw += crnt_size; + leftover -= crnt_size; } -out_ok: - uci->mc = new_mc; - state = UCODE_OK; - pr_debug("CPU%d update ucode (0x%08x -> 0x%08x)\n", - cpu, uci->cpu_sig.rev, new_rev); - -free_table: - free_equiv_cpu_table(); - -out: - return state; + return UCODE_OK; } /* @@ -315,7 +402,7 @@ out: * * This legacy file is always smaller than 2K in size. * - * Starting at family 15h they are in family specific firmware files: + * Beginning with family 15h, they are in family-specific firmware files: * * amd-ucode/microcode_amd_fam15h.bin * amd-ucode/microcode_amd_fam16h.bin @@ -323,12 +410,17 @@ out: * * These might be larger than 2K. */ -static enum ucode_state request_microcode_amd(int cpu, struct device *device) +static enum ucode_state request_microcode_amd(int cpu, struct device *device, + bool refresh_fw) { char fw_name[36] = "amd-ucode/microcode_amd.bin"; - const struct firmware *fw; - enum ucode_state ret = UCODE_NFOUND; struct cpuinfo_x86 *c = &cpu_data(cpu); + enum ucode_state ret = UCODE_NFOUND; + const struct firmware *fw; + + /* reload ucode container only on the boot cpu */ + if (!refresh_fw || c->cpu_index != boot_cpu_data.cpu_index) + return UCODE_OK; if (c->x86 >= 0x15) snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86); @@ -344,12 +436,17 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device) goto fw_release; } - ret = generic_load_microcode(cpu, fw->data, fw->size); + /* free old equiv table */ + free_equiv_cpu_table(); + + ret = load_microcode_amd(cpu, fw->data, fw->size); + if (ret != UCODE_OK) + cleanup(); -fw_release: + fw_release: release_firmware(fw); -out: + out: return ret; } @@ -383,14 +480,10 @@ struct microcode_ops * __init init_amd_microcode(void) return NULL; } - patch = (void *)get_zeroed_page(GFP_KERNEL); - if (!patch) - return NULL; - return µcode_amd_ops; } void __exit exit_amd_microcode(void) { - free_page((unsigned long)patch); + cleanup(); } diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 4873e62db6a1..3a04b224d0c0 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -225,6 +225,9 @@ static ssize_t microcode_write(struct file *file, const char __user *buf, if (do_microcode_update(buf, len) == 0) ret = (ssize_t)len; + if (ret > 0) + perf_check_microcode(); + mutex_unlock(µcode_mutex); put_online_cpus(); @@ -276,19 +279,18 @@ static struct platform_device *microcode_pdev; static int reload_for_cpu(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + enum ucode_state ustate; int err = 0; - if (uci->valid) { - enum ucode_state ustate; - - ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev); - if (ustate == UCODE_OK) - apply_microcode_on_target(cpu); - else - if (ustate == UCODE_ERROR) - err = -EINVAL; - } + if (!uci->valid) + return err; + ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, true); + if (ustate == UCODE_OK) + apply_microcode_on_target(cpu); + else + if (ustate == UCODE_ERROR) + err = -EINVAL; return err; } @@ -370,18 +372,15 @@ static void microcode_fini_cpu(int cpu) static enum ucode_state microcode_resume_cpu(int cpu) { - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - if (!uci->mc) - return UCODE_NFOUND; - pr_debug("CPU%d updated upon resume\n", cpu); - apply_microcode_on_target(cpu); + + if (apply_microcode_on_target(cpu)) + return UCODE_ERROR; return UCODE_OK; } -static enum ucode_state microcode_init_cpu(int cpu) +static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw) { enum ucode_state ustate; @@ -392,7 +391,8 @@ static enum ucode_state microcode_init_cpu(int cpu) if (system_state != SYSTEM_RUNNING) return UCODE_NFOUND; - ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev); + ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, + refresh_fw); if (ustate == UCODE_OK) { pr_debug("CPU%d updated upon init\n", cpu); @@ -405,14 +405,11 @@ static enum ucode_state microcode_init_cpu(int cpu) static enum ucode_state microcode_update_cpu(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - enum ucode_state ustate; if (uci->valid) - ustate = microcode_resume_cpu(cpu); - else - ustate = microcode_init_cpu(cpu); + return microcode_resume_cpu(cpu); - return ustate; + return microcode_init_cpu(cpu, false); } static int mc_device_add(struct device *dev, struct subsys_interface *sif) @@ -428,7 +425,7 @@ static int mc_device_add(struct device *dev, struct subsys_interface *sif) if (err) return err; - if (microcode_init_cpu(cpu) == UCODE_ERROR) + if (microcode_init_cpu(cpu, true) == UCODE_ERROR) return -EINVAL; return err; @@ -477,34 +474,41 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) struct device *dev; dev = get_cpu_device(cpu); - switch (action) { + + switch (action & ~CPU_TASKS_FROZEN) { case CPU_ONLINE: - case CPU_ONLINE_FROZEN: microcode_update_cpu(cpu); - case CPU_DOWN_FAILED: - case CPU_DOWN_FAILED_FROZEN: pr_debug("CPU%d added\n", cpu); + /* + * "break" is missing on purpose here because we want to fall + * through in order to create the sysfs group. + */ + + case CPU_DOWN_FAILED: if (sysfs_create_group(&dev->kobj, &mc_attr_group)) pr_err("Failed to create group for CPU%d\n", cpu); break; + case CPU_DOWN_PREPARE: - case CPU_DOWN_PREPARE_FROZEN: /* Suspend is in progress, only remove the interface */ sysfs_remove_group(&dev->kobj, &mc_attr_group); pr_debug("CPU%d removed\n", cpu); break; /* + * case CPU_DEAD: + * * When a CPU goes offline, don't free up or invalidate the copy of * the microcode in kernel memory, so that we can reuse it when the * CPU comes back online without unnecessarily requesting the userspace * for it again. */ - case CPU_UP_CANCELED_FROZEN: - /* The CPU refused to come up during a system resume */ - microcode_fini_cpu(cpu); - break; } + + /* The CPU refused to come up during a system resume */ + if (action == CPU_UP_CANCELED_FROZEN) + microcode_fini_cpu(cpu); + return NOTIFY_OK; } diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index 0327e2b3c408..3544aed39338 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -405,7 +405,8 @@ static int get_ucode_fw(void *to, const void *from, size_t n) return 0; } -static enum ucode_state request_microcode_fw(int cpu, struct device *device) +static enum ucode_state request_microcode_fw(int cpu, struct device *device, + bool refresh_fw) { char name[30]; struct cpuinfo_x86 *c = &cpu_data(cpu); diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index eb113693f043..a7c5661f8496 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -257,12 +257,14 @@ static int __init msr_init(void) goto out_chrdev; } msr_class->devnode = msr_devnode; + get_online_cpus(); for_each_online_cpu(i) { err = msr_device_create(i); if (err != 0) goto out_class; } register_hotcpu_notifier(&msr_class_cpu_notifier); + put_online_cpus(); err = 0; goto out; @@ -271,6 +273,7 @@ out_class: i = 0; for_each_online_cpu(i) msr_device_destroy(i); + put_online_cpus(); class_destroy(msr_class); out_chrdev: __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); @@ -281,11 +284,13 @@ out: static void __exit msr_exit(void) { int cpu = 0; + get_online_cpus(); for_each_online_cpu(cpu) msr_device_destroy(cpu); class_destroy(msr_class); __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); unregister_hotcpu_notifier(&msr_class_cpu_notifier); + put_online_cpus(); } module_init(msr_init); diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c new file mode 100644 index 000000000000..e309cc5c276e --- /dev/null +++ b/arch/x86/kernel/perf_regs.c @@ -0,0 +1,105 @@ +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/perf_event.h> +#include <linux/bug.h> +#include <linux/stddef.h> +#include <asm/perf_regs.h> +#include <asm/ptrace.h> + +#ifdef CONFIG_X86_32 +#define PERF_REG_X86_MAX PERF_REG_X86_32_MAX +#else +#define PERF_REG_X86_MAX PERF_REG_X86_64_MAX +#endif + +#define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r) + +static unsigned int pt_regs_offset[PERF_REG_X86_MAX] = { + PT_REGS_OFFSET(PERF_REG_X86_AX, ax), + PT_REGS_OFFSET(PERF_REG_X86_BX, bx), + PT_REGS_OFFSET(PERF_REG_X86_CX, cx), + PT_REGS_OFFSET(PERF_REG_X86_DX, dx), + PT_REGS_OFFSET(PERF_REG_X86_SI, si), + PT_REGS_OFFSET(PERF_REG_X86_DI, di), + PT_REGS_OFFSET(PERF_REG_X86_BP, bp), + PT_REGS_OFFSET(PERF_REG_X86_SP, sp), + PT_REGS_OFFSET(PERF_REG_X86_IP, ip), + PT_REGS_OFFSET(PERF_REG_X86_FLAGS, flags), + PT_REGS_OFFSET(PERF_REG_X86_CS, cs), + PT_REGS_OFFSET(PERF_REG_X86_SS, ss), +#ifdef CONFIG_X86_32 + PT_REGS_OFFSET(PERF_REG_X86_DS, ds), + PT_REGS_OFFSET(PERF_REG_X86_ES, es), + PT_REGS_OFFSET(PERF_REG_X86_FS, fs), + PT_REGS_OFFSET(PERF_REG_X86_GS, gs), +#else + /* + * The pt_regs struct does not store + * ds, es, fs, gs in 64 bit mode. + */ + (unsigned int) -1, + (unsigned int) -1, + (unsigned int) -1, + (unsigned int) -1, +#endif +#ifdef CONFIG_X86_64 + PT_REGS_OFFSET(PERF_REG_X86_R8, r8), + PT_REGS_OFFSET(PERF_REG_X86_R9, r9), + PT_REGS_OFFSET(PERF_REG_X86_R10, r10), + PT_REGS_OFFSET(PERF_REG_X86_R11, r11), + PT_REGS_OFFSET(PERF_REG_X86_R12, r12), + PT_REGS_OFFSET(PERF_REG_X86_R13, r13), + PT_REGS_OFFSET(PERF_REG_X86_R14, r14), + PT_REGS_OFFSET(PERF_REG_X86_R15, r15), +#endif +}; + +u64 perf_reg_value(struct pt_regs *regs, int idx) +{ + if (WARN_ON_ONCE(idx >= ARRAY_SIZE(pt_regs_offset))) + return 0; + + return regs_get_register(regs, pt_regs_offset[idx]); +} + +#define REG_RESERVED (~((1ULL << PERF_REG_X86_MAX) - 1ULL)) + +#ifdef CONFIG_X86_32 +int perf_reg_validate(u64 mask) +{ + if (!mask || mask & REG_RESERVED) + return -EINVAL; + + return 0; +} + +u64 perf_reg_abi(struct task_struct *task) +{ + return PERF_SAMPLE_REGS_ABI_32; +} +#else /* CONFIG_X86_64 */ +#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \ + (1ULL << PERF_REG_X86_ES) | \ + (1ULL << PERF_REG_X86_FS) | \ + (1ULL << PERF_REG_X86_GS)) + +int perf_reg_validate(u64 mask) +{ + if (!mask || mask & REG_RESERVED) + return -EINVAL; + + if (mask & REG_NOSUPPORT) + return -EINVAL; + + return 0; +} + +u64 perf_reg_abi(struct task_struct *task) +{ + if (test_tsk_thread_flag(task, TIF_IA32)) + return PERF_SAMPLE_REGS_ABI_32; + else + return PERF_SAMPLE_REGS_ABI_64; +} +#endif /* CONFIG_X86_32 */ diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c index 0bc72e2069e3..d5f15c3f7b25 100644 --- a/arch/x86/kernel/probe_roms.c +++ b/arch/x86/kernel/probe_roms.c @@ -150,7 +150,7 @@ static struct resource *find_oprom(struct pci_dev *pdev) return oprom; } -void *pci_map_biosrom(struct pci_dev *pdev) +void __iomem *pci_map_biosrom(struct pci_dev *pdev) { struct resource *oprom = find_oprom(pdev); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ef6a8456f719..dc3567e083f9 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -66,15 +66,13 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { int ret; - unlazy_fpu(src); - *dst = *src; if (fpu_allocated(&src->thread.fpu)) { memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu)); ret = fpu_alloc(&dst->thread.fpu); if (ret) return ret; - fpu_copy(&dst->thread.fpu, &src->thread.fpu); + fpu_copy(dst, src); } return 0; } @@ -97,16 +95,6 @@ void arch_task_cache_init(void) SLAB_PANIC | SLAB_NOTRACK, NULL); } -static inline void drop_fpu(struct task_struct *tsk) -{ - /* - * Forget coprocessor state.. - */ - tsk->fpu_counter = 0; - clear_fpu(tsk); - clear_used_math(); -} - /* * Free current thread data structures etc.. */ @@ -163,7 +151,13 @@ void flush_thread(void) flush_ptrace_hw_breakpoint(tsk); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); - drop_fpu(tsk); + drop_init_fpu(tsk); + /* + * Free the FPU state for non xsave platforms. They get reallocated + * lazily at the first use. + */ + if (!use_eager_fpu()) + free_thread_xstate(tsk); } static void hard_disable_TSC(void) diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 516fa186121b..b9ff83c7135b 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -190,10 +190,6 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) regs->cs = __USER_CS; regs->ip = new_ip; regs->sp = new_sp; - /* - * Free the old FP and other extended state - */ - free_thread_xstate(current); } EXPORT_SYMBOL_GPL(start_thread); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 0a980c9d7cb8..8a6d20ce1978 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -232,10 +232,6 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip, regs->cs = _cs; regs->ss = _ss; regs->flags = X86_EFLAGS_IF; - /* - * Free the old FP and other extended state - */ - free_thread_xstate(current); } void diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index c4c6a5c2bf0f..b00b33a18390 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -21,6 +21,7 @@ #include <linux/signal.h> #include <linux/perf_event.h> #include <linux/hw_breakpoint.h> +#include <linux/rcupdate.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -1332,9 +1333,6 @@ static const struct user_regset_view user_x86_64_view = { #define genregs32_get genregs_get #define genregs32_set genregs_set -#define user_i387_ia32_struct user_i387_struct -#define user32_fxsr_struct user_fxsr_struct - #endif /* CONFIG_X86_64 */ #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION @@ -1463,6 +1461,8 @@ long syscall_trace_enter(struct pt_regs *regs) { long ret = 0; + rcu_user_exit(); + /* * If we stepped into a sysenter/syscall insn, it trapped in * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. @@ -1526,4 +1526,6 @@ void syscall_trace_leave(struct pt_regs *regs) !test_thread_flag(TIF_SYSCALL_EMU); if (step || test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall_exit(regs, step); + + rcu_user_enter(); } diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b3386ae3438b..d609be046b57 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -961,9 +961,7 @@ void __init setup_arch(char **cmdline_p) kvmclock_init(); #endif - x86_init.paging.pagetable_setup_start(swapper_pg_dir); - paging_init(); - x86_init.paging.pagetable_setup_done(swapper_pg_dir); + x86_init.paging.pagetable_init(); if (boot_cpu_data.cpuid_level >= 0) { /* A CPU has %cr4 if and only if it has CPUID */ diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index b280908a376e..b33144c8b309 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -114,11 +114,12 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, regs->orig_ax = -1; /* disable syscall checks */ get_user_ex(buf, &sc->fpstate); - err |= restore_i387_xstate(buf); get_user_ex(*pax, &sc->ax); } get_user_catch(err); + err |= restore_xstate_sig(buf, config_enabled(CONFIG_X86_32)); + return err; } @@ -206,35 +207,32 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, void __user **fpstate) { /* Default to using normal stack */ + unsigned long math_size = 0; unsigned long sp = regs->sp; + unsigned long buf_fx = 0; int onsigstack = on_sig_stack(sp); -#ifdef CONFIG_X86_64 /* redzone */ - sp -= 128; -#endif /* CONFIG_X86_64 */ + if (config_enabled(CONFIG_X86_64)) + sp -= 128; if (!onsigstack) { /* This is the X/Open sanctioned signal stack switching. */ if (ka->sa.sa_flags & SA_ONSTACK) { if (current->sas_ss_size) sp = current->sas_ss_sp + current->sas_ss_size; - } else { -#ifdef CONFIG_X86_32 - /* This is the legacy signal stack switching. */ - if ((regs->ss & 0xffff) != __USER_DS && - !(ka->sa.sa_flags & SA_RESTORER) && - ka->sa.sa_restorer) + } else if (config_enabled(CONFIG_X86_32) && + (regs->ss & 0xffff) != __USER_DS && + !(ka->sa.sa_flags & SA_RESTORER) && + ka->sa.sa_restorer) { + /* This is the legacy signal stack switching. */ sp = (unsigned long) ka->sa.sa_restorer; -#endif /* CONFIG_X86_32 */ } } if (used_math()) { - sp -= sig_xstate_size; -#ifdef CONFIG_X86_64 - sp = round_down(sp, 64); -#endif /* CONFIG_X86_64 */ + sp = alloc_mathframe(sp, config_enabled(CONFIG_X86_32), + &buf_fx, &math_size); *fpstate = (void __user *)sp; } @@ -247,8 +245,9 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, if (onsigstack && !likely(on_sig_stack(sp))) return (void __user *)-1L; - /* save i387 state */ - if (used_math() && save_i387_xstate(*fpstate) < 0) + /* save i387 and extended state */ + if (used_math() && + save_xstate_sig(*fpstate, (void __user *)buf_fx, math_size) < 0) return (void __user *)-1L; return (void __user *)sp; @@ -357,7 +356,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, put_user_ex(sig, &frame->sig); put_user_ex(&frame->info, &frame->pinfo); put_user_ex(&frame->uc, &frame->puc); - err |= copy_siginfo_to_user(&frame->info, info); /* Create the ucontext. */ if (cpu_has_xsave) @@ -369,9 +367,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, put_user_ex(sas_ss_flags(regs->sp), &frame->uc.uc_stack.ss_flags); put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); - err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, - regs, set->sig[0]); - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); /* Set up to return from userspace. */ restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); @@ -388,6 +383,11 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, */ put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode); } put_user_catch(err); + + err |= copy_siginfo_to_user(&frame->info, info); + err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, + regs, set->sig[0]); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); if (err) return -EFAULT; @@ -436,8 +436,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, put_user_ex(sas_ss_flags(regs->sp), &frame->uc.uc_stack.ss_flags); put_user_ex(me->sas_ss_size, &frame->uc.uc_stack.ss_size); - err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]); - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); /* Set up to return from userspace. If provided, use a stub already in userspace. */ @@ -450,6 +448,9 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, } } put_user_catch(err); + err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + if (err) return -EFAULT; @@ -474,6 +475,75 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, } #endif /* CONFIG_X86_32 */ +static int x32_setup_rt_frame(int sig, struct k_sigaction *ka, + siginfo_t *info, compat_sigset_t *set, + struct pt_regs *regs) +{ +#ifdef CONFIG_X86_X32_ABI + struct rt_sigframe_x32 __user *frame; + void __user *restorer; + int err = 0; + void __user *fpstate = NULL; + + frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); + + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + return -EFAULT; + + if (ka->sa.sa_flags & SA_SIGINFO) { + if (copy_siginfo_to_user32(&frame->info, info)) + return -EFAULT; + } + + put_user_try { + /* Create the ucontext. */ + if (cpu_has_xsave) + put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); + else + put_user_ex(0, &frame->uc.uc_flags); + put_user_ex(0, &frame->uc.uc_link); + put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); + put_user_ex(sas_ss_flags(regs->sp), + &frame->uc.uc_stack.ss_flags); + put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); + put_user_ex(0, &frame->uc.uc__pad0); + + if (ka->sa.sa_flags & SA_RESTORER) { + restorer = ka->sa.sa_restorer; + } else { + /* could use a vstub here */ + restorer = NULL; + err |= -EFAULT; + } + put_user_ex(restorer, &frame->pretcode); + } put_user_catch(err); + + err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, + regs, set->sig[0]); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + + if (err) + return -EFAULT; + + /* Set up registers for signal handler */ + regs->sp = (unsigned long) frame; + regs->ip = (unsigned long) ka->sa.sa_handler; + + /* We use the x32 calling convention here... */ + regs->di = sig; + regs->si = (unsigned long) &frame->info; + regs->dx = (unsigned long) &frame->uc; + + loadsegment(ds, __USER_DS); + loadsegment(es, __USER_DS); + + regs->cs = __USER_CS; + regs->ss = __USER_DS; +#endif /* CONFIG_X86_X32_ABI */ + + return 0; +} + #ifdef CONFIG_X86_32 /* * Atomically swap in the new signal mask, and wait for a signal. @@ -612,55 +682,22 @@ static int signr_convert(int sig) return sig; } -#ifdef CONFIG_X86_32 - -#define is_ia32 1 -#define ia32_setup_frame __setup_frame -#define ia32_setup_rt_frame __setup_rt_frame - -#else /* !CONFIG_X86_32 */ - -#ifdef CONFIG_IA32_EMULATION -#define is_ia32 test_thread_flag(TIF_IA32) -#else /* !CONFIG_IA32_EMULATION */ -#define is_ia32 0 -#endif /* CONFIG_IA32_EMULATION */ - -#ifdef CONFIG_X86_X32_ABI -#define is_x32 test_thread_flag(TIF_X32) - -static int x32_setup_rt_frame(int sig, struct k_sigaction *ka, - siginfo_t *info, compat_sigset_t *set, - struct pt_regs *regs); -#else /* !CONFIG_X86_X32_ABI */ -#define is_x32 0 -#endif /* CONFIG_X86_X32_ABI */ - -int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, - sigset_t *set, struct pt_regs *regs); -int ia32_setup_frame(int sig, struct k_sigaction *ka, - sigset_t *set, struct pt_regs *regs); - -#endif /* CONFIG_X86_32 */ - static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, struct pt_regs *regs) { int usig = signr_convert(sig); sigset_t *set = sigmask_to_save(); + compat_sigset_t *cset = (compat_sigset_t *) set; /* Set up the stack frame */ - if (is_ia32) { + if (is_ia32_frame()) { if (ka->sa.sa_flags & SA_SIGINFO) - return ia32_setup_rt_frame(usig, ka, info, set, regs); + return ia32_setup_rt_frame(usig, ka, info, cset, regs); else - return ia32_setup_frame(usig, ka, set, regs); -#ifdef CONFIG_X86_X32_ABI - } else if (is_x32) { - return x32_setup_rt_frame(usig, ka, info, - (compat_sigset_t *)set, regs); -#endif + return ia32_setup_frame(usig, ka, cset, regs); + } else if (is_x32_frame()) { + return x32_setup_rt_frame(usig, ka, info, cset, regs); } else { return __setup_rt_frame(sig, ka, info, set, regs); } @@ -779,6 +816,8 @@ static void do_signal(struct pt_regs *regs) void do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) { + rcu_user_exit(); + #ifdef CONFIG_X86_MCE /* notify userspace of pending MCEs */ if (thread_info_flags & _TIF_MCE_NOTIFY) @@ -804,6 +843,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) #ifdef CONFIG_X86_32 clear_thread_flag(TIF_IRET); #endif /* CONFIG_X86_32 */ + + rcu_user_enter(); } void signal_fault(struct pt_regs *regs, void __user *frame, char *where) @@ -824,72 +865,6 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where) } #ifdef CONFIG_X86_X32_ABI -static int x32_setup_rt_frame(int sig, struct k_sigaction *ka, - siginfo_t *info, compat_sigset_t *set, - struct pt_regs *regs) -{ - struct rt_sigframe_x32 __user *frame; - void __user *restorer; - int err = 0; - void __user *fpstate = NULL; - - frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); - - if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) - return -EFAULT; - - if (ka->sa.sa_flags & SA_SIGINFO) { - if (copy_siginfo_to_user32(&frame->info, info)) - return -EFAULT; - } - - put_user_try { - /* Create the ucontext. */ - if (cpu_has_xsave) - put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); - else - put_user_ex(0, &frame->uc.uc_flags); - put_user_ex(0, &frame->uc.uc_link); - put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); - put_user_ex(sas_ss_flags(regs->sp), - &frame->uc.uc_stack.ss_flags); - put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); - put_user_ex(0, &frame->uc.uc__pad0); - err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, - regs, set->sig[0]); - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); - - if (ka->sa.sa_flags & SA_RESTORER) { - restorer = ka->sa.sa_restorer; - } else { - /* could use a vstub here */ - restorer = NULL; - err |= -EFAULT; - } - put_user_ex(restorer, &frame->pretcode); - } put_user_catch(err); - - if (err) - return -EFAULT; - - /* Set up registers for signal handler */ - regs->sp = (unsigned long) frame; - regs->ip = (unsigned long) ka->sa.sa_handler; - - /* We use the x32 calling convention here... */ - regs->di = sig; - regs->si = (unsigned long) &frame->info; - regs->dx = (unsigned long) &frame->uc; - - loadsegment(ds, __USER_DS); - loadsegment(es, __USER_DS); - - regs->cs = __USER_CS; - regs->ss = __USER_DS; - - return 0; -} - asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs) { struct rt_sigframe_x32 __user *frame; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 7c5a8c314c02..c80a33bc528b 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -665,7 +665,8 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle) unsigned long boot_error = 0; int timeout; - alternatives_smp_switch(1); + /* Just in case we booted with a single CPU. */ + alternatives_enable_smp(); idle->thread.sp = (unsigned long) (((struct pt_regs *) (THREAD_SIZE + task_stack_page(idle))) - 1); @@ -1053,20 +1054,6 @@ out: preempt_enable(); } -void arch_disable_nonboot_cpus_begin(void) -{ - /* - * Avoid the smp alternatives switch during the disable_nonboot_cpus(). - * In the suspend path, we will be back in the SMP mode shortly anyways. - */ - skip_smp_alternatives = true; -} - -void arch_disable_nonboot_cpus_end(void) -{ - skip_smp_alternatives = false; -} - void arch_enable_nonboot_cpus_begin(void) { set_mtrr_aps_delayed_init(); @@ -1256,9 +1243,6 @@ void native_cpu_die(unsigned int cpu) if (per_cpu(cpu_state, cpu) == CPU_DEAD) { if (system_state == SYSTEM_RUNNING) pr_info("CPU %u is now offline\n", cpu); - - if (1 == num_online_cpus()) - alternatives_smp_switch(0); return; } msleep(100); diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index c346d1161488..cd3b2438a980 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -157,6 +157,33 @@ static int enable_single_step(struct task_struct *child) return 1; } +void set_task_blockstep(struct task_struct *task, bool on) +{ + unsigned long debugctl; + + /* + * Ensure irq/preemption can't change debugctl in between. + * Note also that both TIF_BLOCKSTEP and debugctl should + * be changed atomically wrt preemption. + * FIXME: this means that set/clear TIF_BLOCKSTEP is simply + * wrong if task != current, SIGKILL can wakeup the stopped + * tracee and set/clear can play with the running task, this + * can confuse the next __switch_to_xtra(). + */ + local_irq_disable(); + debugctl = get_debugctlmsr(); + if (on) { + debugctl |= DEBUGCTLMSR_BTF; + set_tsk_thread_flag(task, TIF_BLOCKSTEP); + } else { + debugctl &= ~DEBUGCTLMSR_BTF; + clear_tsk_thread_flag(task, TIF_BLOCKSTEP); + } + if (task == current) + update_debugctlmsr(debugctl); + local_irq_enable(); +} + /* * Enable single or block step. */ @@ -169,19 +196,10 @@ static void enable_step(struct task_struct *child, bool block) * So no one should try to use debugger block stepping in a program * that uses user-mode single stepping itself. */ - if (enable_single_step(child) && block) { - unsigned long debugctl = get_debugctlmsr(); - - debugctl |= DEBUGCTLMSR_BTF; - update_debugctlmsr(debugctl); - set_tsk_thread_flag(child, TIF_BLOCKSTEP); - } else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) { - unsigned long debugctl = get_debugctlmsr(); - - debugctl &= ~DEBUGCTLMSR_BTF; - update_debugctlmsr(debugctl); - clear_tsk_thread_flag(child, TIF_BLOCKSTEP); - } + if (enable_single_step(child) && block) + set_task_blockstep(child, true); + else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) + set_task_blockstep(child, false); } void user_enable_single_step(struct task_struct *child) @@ -199,13 +217,8 @@ void user_disable_single_step(struct task_struct *child) /* * Make sure block stepping (BTF) is disabled. */ - if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) { - unsigned long debugctl = get_debugctlmsr(); - - debugctl &= ~DEBUGCTLMSR_BTF; - update_debugctlmsr(debugctl); - clear_tsk_thread_flag(child, TIF_BLOCKSTEP); - } + if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) + set_task_blockstep(child, false); /* Always clear TIF_SINGLESTEP... */ clear_tsk_thread_flag(child, TIF_SINGLESTEP); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index b481341c9369..8276dc6794cc 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -55,6 +55,7 @@ #include <asm/i387.h> #include <asm/fpu-internal.h> #include <asm/mce.h> +#include <asm/rcu.h> #include <asm/mach_traps.h> @@ -107,30 +108,45 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) dec_preempt_count(); } -static void __kprobes -do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, - long error_code, siginfo_t *info) +static int __kprobes +do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, + struct pt_regs *regs, long error_code) { - struct task_struct *tsk = current; - #ifdef CONFIG_X86_32 if (regs->flags & X86_VM_MASK) { /* - * traps 0, 1, 3, 4, and 5 should be forwarded to vm86. + * Traps 0, 1, 3, 4, and 5 should be forwarded to vm86. * On nmi (interrupt 2), do_trap should not be called. */ - if (trapnr < X86_TRAP_UD) - goto vm86_trap; - goto trap_signal; + if (trapnr < X86_TRAP_UD) { + if (!handle_vm86_trap((struct kernel_vm86_regs *) regs, + error_code, trapnr)) + return 0; + } + return -1; } #endif + if (!user_mode(regs)) { + if (!fixup_exception(regs)) { + tsk->thread.error_code = error_code; + tsk->thread.trap_nr = trapnr; + die(str, regs, error_code); + } + return 0; + } - if (!user_mode(regs)) - goto kernel_trap; + return -1; +} -#ifdef CONFIG_X86_32 -trap_signal: -#endif +static void __kprobes +do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, + long error_code, siginfo_t *info) +{ + struct task_struct *tsk = current; + + + if (!do_trap_no_signal(tsk, trapnr, str, regs, error_code)) + return; /* * We want error_code and trap_nr set for userspace faults and * kernelspace faults which result in die(), but not @@ -158,33 +174,20 @@ trap_signal: force_sig_info(signr, info, tsk); else force_sig(signr, tsk); - return; - -kernel_trap: - if (!fixup_exception(regs)) { - tsk->thread.error_code = error_code; - tsk->thread.trap_nr = trapnr; - die(str, regs, error_code); - } - return; - -#ifdef CONFIG_X86_32 -vm86_trap: - if (handle_vm86_trap((struct kernel_vm86_regs *) regs, - error_code, trapnr)) - goto trap_signal; - return; -#endif } #define DO_ERROR(trapnr, signr, str, name) \ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ { \ - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ - == NOTIFY_STOP) \ + exception_enter(regs); \ + if (notify_die(DIE_TRAP, str, regs, error_code, \ + trapnr, signr) == NOTIFY_STOP) { \ + exception_exit(regs); \ return; \ + } \ conditional_sti(regs); \ do_trap(trapnr, signr, str, regs, error_code, NULL); \ + exception_exit(regs); \ } #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ @@ -195,11 +198,15 @@ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ info.si_errno = 0; \ info.si_code = sicode; \ info.si_addr = (void __user *)siaddr; \ - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ - == NOTIFY_STOP) \ + exception_enter(regs); \ + if (notify_die(DIE_TRAP, str, regs, error_code, \ + trapnr, signr) == NOTIFY_STOP) { \ + exception_exit(regs); \ return; \ + } \ conditional_sti(regs); \ do_trap(trapnr, signr, str, regs, error_code, &info); \ + exception_exit(regs); \ } DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV, @@ -222,12 +229,14 @@ DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check, /* Runs on IST stack */ dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) { + exception_enter(regs); if (notify_die(DIE_TRAP, "stack segment", regs, error_code, - X86_TRAP_SS, SIGBUS) == NOTIFY_STOP) - return; - preempt_conditional_sti(regs); - do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL); - preempt_conditional_cli(regs); + X86_TRAP_SS, SIGBUS) != NOTIFY_STOP) { + preempt_conditional_sti(regs); + do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL); + preempt_conditional_cli(regs); + } + exception_exit(regs); } dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) @@ -235,6 +244,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) static const char str[] = "double fault"; struct task_struct *tsk = current; + exception_enter(regs); /* Return not checked because double check cannot be ignored */ notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); @@ -255,16 +265,29 @@ do_general_protection(struct pt_regs *regs, long error_code) { struct task_struct *tsk; + exception_enter(regs); conditional_sti(regs); #ifdef CONFIG_X86_32 - if (regs->flags & X86_VM_MASK) - goto gp_in_vm86; + if (regs->flags & X86_VM_MASK) { + local_irq_enable(); + handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); + goto exit; + } #endif tsk = current; - if (!user_mode(regs)) - goto gp_in_kernel; + if (!user_mode(regs)) { + if (fixup_exception(regs)) + goto exit; + + tsk->thread.error_code = error_code; + tsk->thread.trap_nr = X86_TRAP_GP; + if (notify_die(DIE_GPF, "general protection fault", regs, error_code, + X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP) + die("general protection fault", regs, error_code); + goto exit; + } tsk->thread.error_code = error_code; tsk->thread.trap_nr = X86_TRAP_GP; @@ -279,25 +302,8 @@ do_general_protection(struct pt_regs *regs, long error_code) } force_sig(SIGSEGV, tsk); - return; - -#ifdef CONFIG_X86_32 -gp_in_vm86: - local_irq_enable(); - handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); - return; -#endif - -gp_in_kernel: - if (fixup_exception(regs)) - return; - - tsk->thread.error_code = error_code; - tsk->thread.trap_nr = X86_TRAP_GP; - if (notify_die(DIE_GPF, "general protection fault", regs, error_code, - X86_TRAP_GP, SIGSEGV) == NOTIFY_STOP) - return; - die("general protection fault", regs, error_code); +exit: + exception_exit(regs); } /* May run on IST stack. */ @@ -312,15 +318,16 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co ftrace_int3_handler(regs)) return; #endif + exception_enter(regs); #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, SIGTRAP) == NOTIFY_STOP) - return; + goto exit; #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, SIGTRAP) == NOTIFY_STOP) - return; + goto exit; /* * Let others (NMI) know that the debug stack is in use @@ -331,6 +338,8 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL); preempt_conditional_cli(regs); debug_stack_usage_dec(); +exit: + exception_exit(regs); } #ifdef CONFIG_X86_64 @@ -391,6 +400,8 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) unsigned long dr6; int si_code; + exception_enter(regs); + get_debugreg(dr6, 6); /* Filter out all the reserved bits which are preset to 1 */ @@ -406,7 +417,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) /* Catch kmemcheck conditions first of all! */ if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) - return; + goto exit; /* DR6 may or may not be cleared by the CPU */ set_debugreg(0, 6); @@ -421,7 +432,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code, SIGTRAP) == NOTIFY_STOP) - return; + goto exit; /* * Let others (NMI) know that the debug stack is in use @@ -437,7 +448,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) X86_TRAP_DB); preempt_conditional_cli(regs); debug_stack_usage_dec(); - return; + goto exit; } /* @@ -458,7 +469,8 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) preempt_conditional_cli(regs); debug_stack_usage_dec(); - return; +exit: + exception_exit(regs); } /* @@ -555,14 +567,17 @@ dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) #ifdef CONFIG_X86_32 ignore_fpu_irq = 1; #endif - + exception_enter(regs); math_error(regs, error_code, X86_TRAP_MF); + exception_exit(regs); } dotraplinkage void do_simd_coprocessor_error(struct pt_regs *regs, long error_code) { + exception_enter(regs); math_error(regs, error_code, X86_TRAP_XF); + exception_exit(regs); } dotraplinkage void @@ -613,11 +628,12 @@ void math_state_restore(void) } __thread_fpu_begin(tsk); + /* * Paranoid restore. send a SIGSEGV if we fail to restore the state. */ if (unlikely(restore_fpu_checking(tsk))) { - __thread_fpu_end(tsk); + drop_init_fpu(tsk); force_sig(SIGSEGV, tsk); return; } @@ -629,6 +645,9 @@ EXPORT_SYMBOL_GPL(math_state_restore); dotraplinkage void __kprobes do_device_not_available(struct pt_regs *regs, long error_code) { + exception_enter(regs); + BUG_ON(use_eager_fpu()); + #ifdef CONFIG_MATH_EMULATION if (read_cr0() & X86_CR0_EM) { struct math_emu_info info = { }; @@ -637,6 +656,7 @@ do_device_not_available(struct pt_regs *regs, long error_code) info.regs = regs; math_emulate(&info); + exception_exit(regs); return; } #endif @@ -644,12 +664,15 @@ do_device_not_available(struct pt_regs *regs, long error_code) #ifdef CONFIG_X86_32 conditional_sti(regs); #endif + exception_exit(regs); } #ifdef CONFIG_X86_32 dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) { siginfo_t info; + + exception_enter(regs); local_irq_enable(); info.si_signo = SIGILL; @@ -657,10 +680,11 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) info.si_code = ILL_BADSTK; info.si_addr = NULL; if (notify_die(DIE_TRAP, "iret exception", regs, error_code, - X86_TRAP_IRET, SIGILL) == NOTIFY_STOP) - return; - do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code, - &info); + X86_TRAP_IRET, SIGILL) != NOTIFY_STOP) { + do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code, + &info); + } + exception_exit(regs); } #endif diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 36fd42091fa7..9538f00827a9 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -41,6 +41,9 @@ /* Adjust the return address of a call insn */ #define UPROBE_FIX_CALL 0x2 +/* Instruction will modify TF, don't change it */ +#define UPROBE_FIX_SETF 0x4 + #define UPROBE_FIX_RIP_AX 0x8000 #define UPROBE_FIX_RIP_CX 0x4000 @@ -239,6 +242,10 @@ static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn) insn_get_opcode(insn); /* should be a nop */ switch (OPCODE1(insn)) { + case 0x9d: + /* popf */ + auprobe->fixups |= UPROBE_FIX_SETF; + break; case 0xc3: /* ret/lret */ case 0xcb: case 0xc2: @@ -646,7 +653,7 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) * Skip these instructions as per the currently known x86 ISA. * 0x66* { 0x90 | 0x0f 0x1f | 0x0f 0x19 | 0x87 0xc0 } */ -bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) +static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) { int i; @@ -673,3 +680,46 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) } return false; } + +bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + bool ret = __skip_sstep(auprobe, regs); + if (ret && (regs->flags & X86_EFLAGS_TF)) + send_sig(SIGTRAP, current, 0); + return ret; +} + +void arch_uprobe_enable_step(struct arch_uprobe *auprobe) +{ + struct task_struct *task = current; + struct arch_uprobe_task *autask = &task->utask->autask; + struct pt_regs *regs = task_pt_regs(task); + + autask->saved_tf = !!(regs->flags & X86_EFLAGS_TF); + + regs->flags |= X86_EFLAGS_TF; + if (test_tsk_thread_flag(task, TIF_BLOCKSTEP)) + set_task_blockstep(task, false); +} + +void arch_uprobe_disable_step(struct arch_uprobe *auprobe) +{ + struct task_struct *task = current; + struct arch_uprobe_task *autask = &task->utask->autask; + bool trapped = (task->utask->state == UTASK_SSTEP_TRAPPED); + struct pt_regs *regs = task_pt_regs(task); + /* + * The state of TIF_BLOCKSTEP was not saved so we can get an extra + * SIGTRAP if we do not clear TF. We need to examine the opcode to + * make it right. + */ + if (unlikely(trapped)) { + if (!autask->saved_tf) + regs->flags &= ~X86_EFLAGS_TF; + } else { + if (autask->saved_tf) + send_sig(SIGTRAP, task, 0); + else if (!(auprobe->fixups & UPROBE_FIX_SETF)) + regs->flags &= ~X86_EFLAGS_TF; + } +} diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 6020f6f5927c..1330dd102950 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -13,9 +13,13 @@ #include <asm/ftrace.h> #ifdef CONFIG_FUNCTION_TRACER -/* mcount is defined in assembly */ +/* mcount and __fentry__ are defined in assembly */ +#ifdef CC_USING_FENTRY +EXPORT_SYMBOL(__fentry__); +#else EXPORT_SYMBOL(mcount); #endif +#endif EXPORT_SYMBOL(__get_user_1); EXPORT_SYMBOL(__get_user_2); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 9f3167e891ef..7a3d075a814a 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -26,7 +26,6 @@ void __cpuinit x86_init_noop(void) { } void __init x86_init_uint_noop(unsigned int unused) { } -void __init x86_init_pgd_noop(pgd_t *unused) { } int __init iommu_init_noop(void) { return 0; } void iommu_shutdown_noop(void) { } @@ -68,8 +67,7 @@ struct x86_init_ops x86_init __initdata = { }, .paging = { - .pagetable_setup_start = native_pagetable_setup_start, - .pagetable_setup_done = native_pagetable_setup_done, + .pagetable_init = native_pagetable_init, }, .timers = { diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 3d3e20709119..ada87a329edc 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -10,9 +10,7 @@ #include <linux/compat.h> #include <asm/i387.h> #include <asm/fpu-internal.h> -#ifdef CONFIG_IA32_EMULATION -#include <asm/sigcontext32.h> -#endif +#include <asm/sigframe.h> #include <asm/xcr.h> /* @@ -23,13 +21,9 @@ u64 pcntxt_mask; /* * Represents init state for the supported extended state. */ -static struct xsave_struct *init_xstate_buf; - -struct _fpx_sw_bytes fx_sw_reserved; -#ifdef CONFIG_IA32_EMULATION -struct _fpx_sw_bytes fx_sw_reserved_ia32; -#endif +struct xsave_struct *init_xstate_buf; +static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32; static unsigned int *xstate_offsets, *xstate_sizes, xstate_features; /* @@ -44,9 +38,9 @@ static unsigned int *xstate_offsets, *xstate_sizes, xstate_features; */ void __sanitize_i387_state(struct task_struct *tsk) { - u64 xstate_bv; - int feature_bit = 0x2; struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave; + int feature_bit = 0x2; + u64 xstate_bv; if (!fx) return; @@ -104,213 +98,326 @@ void __sanitize_i387_state(struct task_struct *tsk) * Check for the presence of extended state information in the * user fpstate pointer in the sigcontext. */ -int check_for_xstate(struct i387_fxsave_struct __user *buf, - void __user *fpstate, - struct _fpx_sw_bytes *fx_sw_user) +static inline int check_for_xstate(struct i387_fxsave_struct __user *buf, + void __user *fpstate, + struct _fpx_sw_bytes *fx_sw) { int min_xstate_size = sizeof(struct i387_fxsave_struct) + sizeof(struct xsave_hdr_struct); unsigned int magic2; - int err; - err = __copy_from_user(fx_sw_user, &buf->sw_reserved[0], - sizeof(struct _fpx_sw_bytes)); - if (err) - return -EFAULT; + if (__copy_from_user(fx_sw, &buf->sw_reserved[0], sizeof(*fx_sw))) + return -1; - /* - * First Magic check failed. - */ - if (fx_sw_user->magic1 != FP_XSTATE_MAGIC1) - return -EINVAL; + /* Check for the first magic field and other error scenarios. */ + if (fx_sw->magic1 != FP_XSTATE_MAGIC1 || + fx_sw->xstate_size < min_xstate_size || + fx_sw->xstate_size > xstate_size || + fx_sw->xstate_size > fx_sw->extended_size) + return -1; /* - * Check for error scenarios. - */ - if (fx_sw_user->xstate_size < min_xstate_size || - fx_sw_user->xstate_size > xstate_size || - fx_sw_user->xstate_size > fx_sw_user->extended_size) - return -EINVAL; - - err = __get_user(magic2, (__u32 *) (((void *)fpstate) + - fx_sw_user->extended_size - - FP_XSTATE_MAGIC2_SIZE)); - if (err) - return err; - /* * Check for the presence of second magic word at the end of memory * layout. This detects the case where the user just copied the legacy * fpstate layout with out copying the extended state information * in the memory layout. */ - if (magic2 != FP_XSTATE_MAGIC2) - return -EFAULT; + if (__get_user(magic2, (__u32 __user *)(fpstate + fx_sw->xstate_size)) + || magic2 != FP_XSTATE_MAGIC2) + return -1; return 0; } -#ifdef CONFIG_X86_64 /* * Signal frame handlers. */ - -int save_i387_xstate(void __user *buf) +static inline int save_fsave_header(struct task_struct *tsk, void __user *buf) { - struct task_struct *tsk = current; - int err = 0; - - if (!access_ok(VERIFY_WRITE, buf, sig_xstate_size)) - return -EACCES; + if (use_fxsr()) { + struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; + struct user_i387_ia32_struct env; + struct _fpstate_ia32 __user *fp = buf; - BUG_ON(sig_xstate_size < xstate_size); + convert_from_fxsr(&env, tsk); - if ((unsigned long)buf % 64) - pr_err("%s: bad fpstate %p\n", __func__, buf); - - if (!used_math()) - return 0; - - if (user_has_fpu()) { - if (use_xsave()) - err = xsave_user(buf); - else - err = fxsave_user(buf); - - if (err) - return err; - user_fpu_end(); + if (__copy_to_user(buf, &env, sizeof(env)) || + __put_user(xsave->i387.swd, &fp->status) || + __put_user(X86_FXSR_MAGIC, &fp->magic)) + return -1; } else { - sanitize_i387_state(tsk); - if (__copy_to_user(buf, &tsk->thread.fpu.state->fxsave, - xstate_size)) + struct i387_fsave_struct __user *fp = buf; + u32 swd; + if (__get_user(swd, &fp->swd) || __put_user(swd, &fp->status)) return -1; } - clear_used_math(); /* trigger finit */ + return 0; +} - if (use_xsave()) { - struct _fpstate __user *fx = buf; - struct _xstate __user *x = buf; - u64 xstate_bv; +static inline int save_xstate_epilog(void __user *buf, int ia32_frame) +{ + struct xsave_struct __user *x = buf; + struct _fpx_sw_bytes *sw_bytes; + u32 xstate_bv; + int err; - err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved, - sizeof(struct _fpx_sw_bytes)); + /* Setup the bytes not touched by the [f]xsave and reserved for SW. */ + sw_bytes = ia32_frame ? &fx_sw_reserved_ia32 : &fx_sw_reserved; + err = __copy_to_user(&x->i387.sw_reserved, sw_bytes, sizeof(*sw_bytes)); - err |= __put_user(FP_XSTATE_MAGIC2, - (__u32 __user *) (buf + sig_xstate_size - - FP_XSTATE_MAGIC2_SIZE)); + if (!use_xsave()) + return err; - /* - * Read the xstate_bv which we copied (directly from the cpu or - * from the state in task struct) to the user buffers and - * set the FP/SSE bits. - */ - err |= __get_user(xstate_bv, &x->xstate_hdr.xstate_bv); + err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size)); - /* - * For legacy compatible, we always set FP/SSE bits in the bit - * vector while saving the state to the user context. This will - * enable us capturing any changes(during sigreturn) to - * the FP/SSE bits by the legacy applications which don't touch - * xstate_bv in the xsave header. - * - * xsave aware apps can change the xstate_bv in the xsave - * header as well as change any contents in the memory layout. - * xrestore as part of sigreturn will capture all the changes. - */ - xstate_bv |= XSTATE_FPSSE; + /* + * Read the xstate_bv which we copied (directly from the cpu or + * from the state in task struct) to the user buffers. + */ + err |= __get_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv); - err |= __put_user(xstate_bv, &x->xstate_hdr.xstate_bv); + /* + * For legacy compatible, we always set FP/SSE bits in the bit + * vector while saving the state to the user context. This will + * enable us capturing any changes(during sigreturn) to + * the FP/SSE bits by the legacy applications which don't touch + * xstate_bv in the xsave header. + * + * xsave aware apps can change the xstate_bv in the xsave + * header as well as change any contents in the memory layout. + * xrestore as part of sigreturn will capture all the changes. + */ + xstate_bv |= XSTATE_FPSSE; - if (err) - return err; - } + err |= __put_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv); - return 1; + return err; +} + +static inline int save_user_xstate(struct xsave_struct __user *buf) +{ + int err; + + if (use_xsave()) + err = xsave_user(buf); + else if (use_fxsr()) + err = fxsave_user((struct i387_fxsave_struct __user *) buf); + else + err = fsave_user((struct i387_fsave_struct __user *) buf); + + if (unlikely(err) && __clear_user(buf, xstate_size)) + err = -EFAULT; + return err; } /* - * Restore the extended state if present. Otherwise, restore the FP/SSE - * state. + * Save the fpu, extended register state to the user signal frame. + * + * 'buf_fx' is the 64-byte aligned pointer at which the [f|fx|x]save + * state is copied. + * 'buf' points to the 'buf_fx' or to the fsave header followed by 'buf_fx'. + * + * buf == buf_fx for 64-bit frames and 32-bit fsave frame. + * buf != buf_fx for 32-bit frames with fxstate. + * + * If the fpu, extended register state is live, save the state directly + * to the user frame pointed by the aligned pointer 'buf_fx'. Otherwise, + * copy the thread's fpu state to the user frame starting at 'buf_fx'. + * + * If this is a 32-bit frame with fxstate, put a fsave header before + * the aligned state at 'buf_fx'. + * + * For [f]xsave state, update the SW reserved fields in the [f]xsave frame + * indicating the absence/presence of the extended state to the user. */ -static int restore_user_xstate(void __user *buf) +int save_xstate_sig(void __user *buf, void __user *buf_fx, int size) { - struct _fpx_sw_bytes fx_sw_user; - u64 mask; - int err; + struct xsave_struct *xsave = ¤t->thread.fpu.state->xsave; + struct task_struct *tsk = current; + int ia32_fxstate = (buf != buf_fx); - if (((unsigned long)buf % 64) || - check_for_xstate(buf, buf, &fx_sw_user)) - goto fx_only; + ia32_fxstate &= (config_enabled(CONFIG_X86_32) || + config_enabled(CONFIG_IA32_EMULATION)); - mask = fx_sw_user.xstate_bv; + if (!access_ok(VERIFY_WRITE, buf, size)) + return -EACCES; - /* - * restore the state passed by the user. - */ - err = xrestore_user(buf, mask); - if (err) - return err; + if (!HAVE_HWFP) + return fpregs_soft_get(current, NULL, 0, + sizeof(struct user_i387_ia32_struct), NULL, + (struct _fpstate_ia32 __user *) buf) ? -1 : 1; - /* - * init the state skipped by the user. - */ - mask = pcntxt_mask & ~mask; - if (unlikely(mask)) - xrstor_state(init_xstate_buf, mask); + if (user_has_fpu()) { + /* Save the live register state to the user directly. */ + if (save_user_xstate(buf_fx)) + return -1; + /* Update the thread's fxstate to save the fsave header. */ + if (ia32_fxstate) + fpu_fxsave(&tsk->thread.fpu); + } else { + sanitize_i387_state(tsk); + if (__copy_to_user(buf_fx, xsave, xstate_size)) + return -1; + } + + /* Save the fsave header for the 32-bit frames. */ + if ((ia32_fxstate || !use_fxsr()) && save_fsave_header(tsk, buf)) + return -1; + + if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate)) + return -1; + + drop_init_fpu(tsk); /* trigger finit */ return 0; +} -fx_only: - /* - * couldn't find the extended state information in the - * memory layout. Restore just the FP/SSE and init all - * the other extended state. - */ - xrstor_state(init_xstate_buf, pcntxt_mask & ~XSTATE_FPSSE); - return fxrstor_checking((__force struct i387_fxsave_struct *)buf); +static inline void +sanitize_restored_xstate(struct task_struct *tsk, + struct user_i387_ia32_struct *ia32_env, + u64 xstate_bv, int fx_only) +{ + struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; + struct xsave_hdr_struct *xsave_hdr = &xsave->xsave_hdr; + + if (use_xsave()) { + /* These bits must be zero. */ + xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0; + + /* + * Init the state that is not present in the memory + * layout and not enabled by the OS. + */ + if (fx_only) + xsave_hdr->xstate_bv = XSTATE_FPSSE; + else + xsave_hdr->xstate_bv &= (pcntxt_mask & xstate_bv); + } + + if (use_fxsr()) { + /* + * mscsr reserved bits must be masked to zero for security + * reasons. + */ + xsave->i387.mxcsr &= mxcsr_feature_mask; + + convert_to_fxsr(tsk, ia32_env); + } } /* - * This restores directly out of user space. Exceptions are handled. + * Restore the extended state if present. Otherwise, restore the FP/SSE state. */ -int restore_i387_xstate(void __user *buf) +static inline int restore_user_xstate(void __user *buf, u64 xbv, int fx_only) { + if (use_xsave()) { + if ((unsigned long)buf % 64 || fx_only) { + u64 init_bv = pcntxt_mask & ~XSTATE_FPSSE; + xrstor_state(init_xstate_buf, init_bv); + return fxrstor_user(buf); + } else { + u64 init_bv = pcntxt_mask & ~xbv; + if (unlikely(init_bv)) + xrstor_state(init_xstate_buf, init_bv); + return xrestore_user(buf, xbv); + } + } else if (use_fxsr()) { + return fxrstor_user(buf); + } else + return frstor_user(buf); +} + +int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) +{ + int ia32_fxstate = (buf != buf_fx); struct task_struct *tsk = current; - int err = 0; + int state_size = xstate_size; + u64 xstate_bv = 0; + int fx_only = 0; + + ia32_fxstate &= (config_enabled(CONFIG_X86_32) || + config_enabled(CONFIG_IA32_EMULATION)); if (!buf) { - if (used_math()) - goto clear; + drop_init_fpu(tsk); return 0; - } else - if (!access_ok(VERIFY_READ, buf, sig_xstate_size)) - return -EACCES; + } - if (!used_math()) { - err = init_fpu(tsk); - if (err) - return err; + if (!access_ok(VERIFY_READ, buf, size)) + return -EACCES; + + if (!used_math() && init_fpu(tsk)) + return -1; + + if (!HAVE_HWFP) { + return fpregs_soft_set(current, NULL, + 0, sizeof(struct user_i387_ia32_struct), + NULL, buf) != 0; } - user_fpu_begin(); - if (use_xsave()) - err = restore_user_xstate(buf); - else - err = fxrstor_checking((__force struct i387_fxsave_struct *) - buf); - if (unlikely(err)) { + if (use_xsave()) { + struct _fpx_sw_bytes fx_sw_user; + if (unlikely(check_for_xstate(buf_fx, buf_fx, &fx_sw_user))) { + /* + * Couldn't find the extended state information in the + * memory layout. Restore just the FP/SSE and init all + * the other extended state. + */ + state_size = sizeof(struct i387_fxsave_struct); + fx_only = 1; + } else { + state_size = fx_sw_user.xstate_size; + xstate_bv = fx_sw_user.xstate_bv; + } + } + + if (ia32_fxstate) { + /* + * For 32-bit frames with fxstate, copy the user state to the + * thread's fpu state, reconstruct fxstate from the fsave + * header. Sanitize the copied state etc. + */ + struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; + struct user_i387_ia32_struct env; + int err = 0; + + /* + * Drop the current fpu which clears used_math(). This ensures + * that any context-switch during the copy of the new state, + * avoids the intermediate state from getting restored/saved. + * Thus avoiding the new restored state from getting corrupted. + * We will be ready to restore/save the state only after + * set_used_math() is again set. + */ + drop_fpu(tsk); + + if (__copy_from_user(xsave, buf_fx, state_size) || + __copy_from_user(&env, buf, sizeof(env))) { + err = -1; + } else { + sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only); + set_used_math(); + } + + if (use_eager_fpu()) + math_state_restore(); + + return err; + } else { /* - * Encountered an error while doing the restore from the - * user buffer, clear the fpu state. + * For 64-bit frames and 32-bit fsave frames, restore the user + * state to the registers directly (with exceptions handled). */ -clear: - clear_fpu(tsk); - clear_used_math(); + user_fpu_begin(); + if (restore_user_xstate(buf_fx, xstate_bv, fx_only)) { + drop_init_fpu(tsk); + return -1; + } } - return err; + + return 0; } -#endif /* * Prepare the SW reserved portion of the fxsave memory layout, indicating @@ -321,31 +428,22 @@ clear: */ static void prepare_fx_sw_frame(void) { - int size_extended = (xstate_size - sizeof(struct i387_fxsave_struct)) + - FP_XSTATE_MAGIC2_SIZE; + int fsave_header_size = sizeof(struct i387_fsave_struct); + int size = xstate_size + FP_XSTATE_MAGIC2_SIZE; - sig_xstate_size = sizeof(struct _fpstate) + size_extended; - -#ifdef CONFIG_IA32_EMULATION - sig_xstate_ia32_size = sizeof(struct _fpstate_ia32) + size_extended; -#endif - - memset(&fx_sw_reserved, 0, sizeof(fx_sw_reserved)); + if (config_enabled(CONFIG_X86_32)) + size += fsave_header_size; fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; - fx_sw_reserved.extended_size = sig_xstate_size; + fx_sw_reserved.extended_size = size; fx_sw_reserved.xstate_bv = pcntxt_mask; fx_sw_reserved.xstate_size = xstate_size; -#ifdef CONFIG_IA32_EMULATION - memcpy(&fx_sw_reserved_ia32, &fx_sw_reserved, - sizeof(struct _fpx_sw_bytes)); - fx_sw_reserved_ia32.extended_size = sig_xstate_ia32_size; -#endif -} -#ifdef CONFIG_X86_64 -unsigned int sig_xstate_size = sizeof(struct _fpstate); -#endif + if (config_enabled(CONFIG_IA32_EMULATION)) { + fx_sw_reserved_ia32 = fx_sw_reserved; + fx_sw_reserved_ia32.extended_size += fsave_header_size; + } +} /* * Enable the extended processor state save/restore feature @@ -384,19 +482,21 @@ static void __init setup_xstate_features(void) /* * setup the xstate image representing the init state */ -static void __init setup_xstate_init(void) +static void __init setup_init_fpu_buf(void) { - setup_xstate_features(); - /* * Setup init_xstate_buf to represent the init state of * all the features managed by the xsave */ init_xstate_buf = alloc_bootmem_align(xstate_size, __alignof__(struct xsave_struct)); - init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; + fx_finit(&init_xstate_buf->i387); + + if (!cpu_has_xsave) + return; + + setup_xstate_features(); - clts(); /* * Init all the features state with header_bv being 0x0 */ @@ -406,9 +506,21 @@ static void __init setup_xstate_init(void) * of any feature which is not represented by all zero's. */ xsave_state(init_xstate_buf, -1); - stts(); } +static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO; +static int __init eager_fpu_setup(char *s) +{ + if (!strcmp(s, "on")) + eagerfpu = ENABLE; + else if (!strcmp(s, "off")) + eagerfpu = DISABLE; + else if (!strcmp(s, "auto")) + eagerfpu = AUTO; + return 1; +} +__setup("eagerfpu=", eager_fpu_setup); + /* * Enable and initialize the xsave feature. */ @@ -445,8 +557,11 @@ static void __init xstate_enable_boot_cpu(void) update_regset_xstate_info(xstate_size, pcntxt_mask); prepare_fx_sw_frame(); + setup_init_fpu_buf(); - setup_xstate_init(); + /* Auto enable eagerfpu for xsaveopt */ + if (cpu_has_xsaveopt && eagerfpu != DISABLE) + eagerfpu = ENABLE; pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n", pcntxt_mask, xstate_size); @@ -471,3 +586,43 @@ void __cpuinit xsave_init(void) next_func = xstate_enable; this_func(); } + +static inline void __init eager_fpu_init_bp(void) +{ + current->thread.fpu.state = + alloc_bootmem_align(xstate_size, __alignof__(struct xsave_struct)); + if (!init_xstate_buf) + setup_init_fpu_buf(); +} + +void __cpuinit eager_fpu_init(void) +{ + static __refdata void (*boot_func)(void) = eager_fpu_init_bp; + + clear_used_math(); + current_thread_info()->status = 0; + + if (eagerfpu == ENABLE) + setup_force_cpu_cap(X86_FEATURE_EAGER_FPU); + + if (!cpu_has_eager_fpu) { + stts(); + return; + } + + if (boot_func) { + boot_func(); + boot_func = NULL; + } + + /* + * This is same as math_state_restore(). But use_xsave() is + * not yet patched to use math_state_restore(). + */ + init_fpu(current); + __thread_fpu_begin(current); + if (cpu_has_xsave) + xrstor_state(init_xstate_buf, -1); + else + fxrstor_checking(&init_xstate_buf->i387); +} diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index a71faf727ff3..bca63f04dccb 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -183,95 +183,6 @@ TRACE_EVENT(kvm_apic, #define KVM_ISA_VMX 1 #define KVM_ISA_SVM 2 -#define VMX_EXIT_REASONS \ - { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ - { EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \ - { EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \ - { EXIT_REASON_PENDING_INTERRUPT, "PENDING_INTERRUPT" }, \ - { EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \ - { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \ - { EXIT_REASON_CPUID, "CPUID" }, \ - { EXIT_REASON_HLT, "HLT" }, \ - { EXIT_REASON_INVLPG, "INVLPG" }, \ - { EXIT_REASON_RDPMC, "RDPMC" }, \ - { EXIT_REASON_RDTSC, "RDTSC" }, \ - { EXIT_REASON_VMCALL, "VMCALL" }, \ - { EXIT_REASON_VMCLEAR, "VMCLEAR" }, \ - { EXIT_REASON_VMLAUNCH, "VMLAUNCH" }, \ - { EXIT_REASON_VMPTRLD, "VMPTRLD" }, \ - { EXIT_REASON_VMPTRST, "VMPTRST" }, \ - { EXIT_REASON_VMREAD, "VMREAD" }, \ - { EXIT_REASON_VMRESUME, "VMRESUME" }, \ - { EXIT_REASON_VMWRITE, "VMWRITE" }, \ - { EXIT_REASON_VMOFF, "VMOFF" }, \ - { EXIT_REASON_VMON, "VMON" }, \ - { EXIT_REASON_CR_ACCESS, "CR_ACCESS" }, \ - { EXIT_REASON_DR_ACCESS, "DR_ACCESS" }, \ - { EXIT_REASON_IO_INSTRUCTION, "IO_INSTRUCTION" }, \ - { EXIT_REASON_MSR_READ, "MSR_READ" }, \ - { EXIT_REASON_MSR_WRITE, "MSR_WRITE" }, \ - { EXIT_REASON_MWAIT_INSTRUCTION, "MWAIT_INSTRUCTION" }, \ - { EXIT_REASON_MONITOR_INSTRUCTION, "MONITOR_INSTRUCTION" }, \ - { EXIT_REASON_PAUSE_INSTRUCTION, "PAUSE_INSTRUCTION" }, \ - { EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \ - { EXIT_REASON_TPR_BELOW_THRESHOLD, "TPR_BELOW_THRESHOLD" }, \ - { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \ - { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \ - { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \ - { EXIT_REASON_WBINVD, "WBINVD" } - -#define SVM_EXIT_REASONS \ - { SVM_EXIT_READ_CR0, "read_cr0" }, \ - { SVM_EXIT_READ_CR3, "read_cr3" }, \ - { SVM_EXIT_READ_CR4, "read_cr4" }, \ - { SVM_EXIT_READ_CR8, "read_cr8" }, \ - { SVM_EXIT_WRITE_CR0, "write_cr0" }, \ - { SVM_EXIT_WRITE_CR3, "write_cr3" }, \ - { SVM_EXIT_WRITE_CR4, "write_cr4" }, \ - { SVM_EXIT_WRITE_CR8, "write_cr8" }, \ - { SVM_EXIT_READ_DR0, "read_dr0" }, \ - { SVM_EXIT_READ_DR1, "read_dr1" }, \ - { SVM_EXIT_READ_DR2, "read_dr2" }, \ - { SVM_EXIT_READ_DR3, "read_dr3" }, \ - { SVM_EXIT_WRITE_DR0, "write_dr0" }, \ - { SVM_EXIT_WRITE_DR1, "write_dr1" }, \ - { SVM_EXIT_WRITE_DR2, "write_dr2" }, \ - { SVM_EXIT_WRITE_DR3, "write_dr3" }, \ - { SVM_EXIT_WRITE_DR5, "write_dr5" }, \ - { SVM_EXIT_WRITE_DR7, "write_dr7" }, \ - { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, \ - { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, \ - { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, \ - { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \ - { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, \ - { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, \ - { SVM_EXIT_INTR, "interrupt" }, \ - { SVM_EXIT_NMI, "nmi" }, \ - { SVM_EXIT_SMI, "smi" }, \ - { SVM_EXIT_INIT, "init" }, \ - { SVM_EXIT_VINTR, "vintr" }, \ - { SVM_EXIT_CPUID, "cpuid" }, \ - { SVM_EXIT_INVD, "invd" }, \ - { SVM_EXIT_HLT, "hlt" }, \ - { SVM_EXIT_INVLPG, "invlpg" }, \ - { SVM_EXIT_INVLPGA, "invlpga" }, \ - { SVM_EXIT_IOIO, "io" }, \ - { SVM_EXIT_MSR, "msr" }, \ - { SVM_EXIT_TASK_SWITCH, "task_switch" }, \ - { SVM_EXIT_SHUTDOWN, "shutdown" }, \ - { SVM_EXIT_VMRUN, "vmrun" }, \ - { SVM_EXIT_VMMCALL, "hypercall" }, \ - { SVM_EXIT_VMLOAD, "vmload" }, \ - { SVM_EXIT_VMSAVE, "vmsave" }, \ - { SVM_EXIT_STGI, "stgi" }, \ - { SVM_EXIT_CLGI, "clgi" }, \ - { SVM_EXIT_SKINIT, "skinit" }, \ - { SVM_EXIT_WBINVD, "wbinvd" }, \ - { SVM_EXIT_MONITOR, "monitor" }, \ - { SVM_EXIT_MWAIT, "mwait" }, \ - { SVM_EXIT_XSETBV, "xsetbv" }, \ - { SVM_EXIT_NPF, "npf" } - /* * Tracepoint for kvm guest exit: */ diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 5d46c905e06f..ad6b1dd06f8b 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1494,8 +1494,12 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) #ifdef CONFIG_X86_64 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); #endif - if (user_has_fpu()) - clts(); + /* + * If the FPU is not active (through the host task or + * the guest vcpu), then restore the cr0.TS bit. + */ + if (!user_has_fpu() && !vmx->vcpu.guest_fpu_loaded) + stts(); load_gdt(&__get_cpu_var(host_gdt)); } @@ -3582,6 +3586,7 @@ static void seg_setup(int seg) static int alloc_apic_access_page(struct kvm *kvm) { + struct page *page; struct kvm_userspace_memory_region kvm_userspace_mem; int r = 0; @@ -3596,7 +3601,13 @@ static int alloc_apic_access_page(struct kvm *kvm) if (r) goto out; - kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00); + page = gfn_to_page(kvm, 0xfee00); + if (is_error_page(page)) { + r = -EFAULT; + goto out; + } + + kvm->arch.apic_access_page = page; out: mutex_unlock(&kvm->slots_lock); return r; @@ -3604,6 +3615,7 @@ out: static int alloc_identity_pagetable(struct kvm *kvm) { + struct page *page; struct kvm_userspace_memory_region kvm_userspace_mem; int r = 0; @@ -3619,8 +3631,13 @@ static int alloc_identity_pagetable(struct kvm *kvm) if (r) goto out; - kvm->arch.ept_identity_pagetable = gfn_to_page(kvm, - kvm->arch.ept_identity_map_addr >> PAGE_SHIFT); + page = gfn_to_page(kvm, kvm->arch.ept_identity_map_addr >> PAGE_SHIFT); + if (is_error_page(page)) { + r = -EFAULT; + goto out; + } + + kvm->arch.ept_identity_pagetable = page; out: mutex_unlock(&kvm->slots_lock); return r; @@ -3693,7 +3710,7 @@ static void vmx_set_constant_host_state(void) unsigned long tmpl; struct desc_ptr dt; - vmcs_writel(HOST_CR0, read_cr0() | X86_CR0_TS); /* 22.2.3 */ + vmcs_writel(HOST_CR0, read_cr0() & ~X86_CR0_TS); /* 22.2.3 */ vmcs_writel(HOST_CR4, read_cr4()); /* 22.2.3, 22.2.5 */ vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */ @@ -4490,7 +4507,7 @@ static int handle_cr(struct kvm_vcpu *vcpu) vcpu->run->exit_reason = KVM_EXIT_SET_TPR; return 0; } - }; + } break; case 2: /* clts */ handle_clts(vcpu); @@ -6536,7 +6553,7 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) /* Exposing INVPCID only when PCID is exposed */ best = kvm_find_cpuid_entry(vcpu, 0x7, 0); if (vmx_invpcid_supported() && - best && (best->ecx & bit(X86_FEATURE_INVPCID)) && + best && (best->ebx & bit(X86_FEATURE_INVPCID)) && guest_cpuid_has_pcid(vcpu)) { exec_control |= SECONDARY_EXEC_ENABLE_INVPCID; vmcs_write32(SECONDARY_VM_EXEC_CONTROL, @@ -6546,7 +6563,7 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); if (best) - best->ecx &= ~bit(X86_FEATURE_INVPCID); + best->ebx &= ~bit(X86_FEATURE_INVPCID); } } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b16d4a5bfa41..1eefebe5d727 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2010,6 +2010,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case MSR_KVM_STEAL_TIME: data = vcpu->arch.st.msr_val; break; + case MSR_KVM_PV_EOI_EN: + data = vcpu->arch.pv_eoi.msr_val; + break; case MSR_IA32_P5_MC_ADDR: case MSR_IA32_P5_MC_TYPE: case MSR_IA32_MCG_CAP: @@ -5110,17 +5113,20 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu) !kvm_event_needs_reinjection(vcpu); } -static void vapic_enter(struct kvm_vcpu *vcpu) +static int vapic_enter(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; struct page *page; if (!apic || !apic->vapic_addr) - return; + return 0; page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); + if (is_error_page(page)) + return -EFAULT; vcpu->arch.apic->vapic_page = page; + return 0; } static void vapic_exit(struct kvm_vcpu *vcpu) @@ -5427,7 +5433,11 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) } vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); - vapic_enter(vcpu); + r = vapic_enter(vcpu); + if (r) { + srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); + return r; + } r = 1; while (r > 0) { @@ -5979,7 +5989,7 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) */ kvm_put_guest_xcr0(vcpu); vcpu->guest_fpu_loaded = 1; - unlazy_fpu(current); + __kernel_fpu_begin(); fpu_restore_checking(&vcpu->arch.guest_fpu); trace_kvm_fpu(1); } @@ -5993,6 +6003,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) vcpu->guest_fpu_loaded = 0; fpu_save_init(&vcpu->arch.guest_fpu); + __kernel_fpu_end(); ++vcpu->stat.fpu_reload; kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu); trace_kvm_fpu(0); diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 5b2995f4557a..a30ca15be21c 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -17,6 +17,7 @@ #include <asm/cpufeature.h> #include <asm/alternative-asm.h> #include <asm/asm.h> +#include <asm/smap.h> /* * By placing feature2 after feature1 in altinstructions section, we logically @@ -130,6 +131,7 @@ ENDPROC(bad_from_user) */ ENTRY(copy_user_generic_unrolled) CFI_STARTPROC + ASM_STAC cmpl $8,%edx jb 20f /* less then 8 bytes, go to byte copy loop */ ALIGN_DESTINATION @@ -177,6 +179,7 @@ ENTRY(copy_user_generic_unrolled) decl %ecx jnz 21b 23: xor %eax,%eax + ASM_CLAC ret .section .fixup,"ax" @@ -232,6 +235,7 @@ ENDPROC(copy_user_generic_unrolled) */ ENTRY(copy_user_generic_string) CFI_STARTPROC + ASM_STAC andl %edx,%edx jz 4f cmpl $8,%edx @@ -246,6 +250,7 @@ ENTRY(copy_user_generic_string) 3: rep movsb 4: xorl %eax,%eax + ASM_CLAC ret .section .fixup,"ax" @@ -273,12 +278,14 @@ ENDPROC(copy_user_generic_string) */ ENTRY(copy_user_enhanced_fast_string) CFI_STARTPROC + ASM_STAC andl %edx,%edx jz 2f movl %edx,%ecx 1: rep movsb 2: xorl %eax,%eax + ASM_CLAC ret .section .fixup,"ax" diff --git a/arch/x86/lib/copy_user_nocache_64.S b/arch/x86/lib/copy_user_nocache_64.S index cacddc7163eb..6a4f43c2d9e6 100644 --- a/arch/x86/lib/copy_user_nocache_64.S +++ b/arch/x86/lib/copy_user_nocache_64.S @@ -15,6 +15,7 @@ #include <asm/asm-offsets.h> #include <asm/thread_info.h> #include <asm/asm.h> +#include <asm/smap.h> .macro ALIGN_DESTINATION #ifdef FIX_ALIGNMENT @@ -48,6 +49,7 @@ */ ENTRY(__copy_user_nocache) CFI_STARTPROC + ASM_STAC cmpl $8,%edx jb 20f /* less then 8 bytes, go to byte copy loop */ ALIGN_DESTINATION @@ -95,6 +97,7 @@ ENTRY(__copy_user_nocache) decl %ecx jnz 21b 23: xorl %eax,%eax + ASM_CLAC sfence ret diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index b33b1fb1e6d4..156b9c804670 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -33,6 +33,7 @@ #include <asm/asm-offsets.h> #include <asm/thread_info.h> #include <asm/asm.h> +#include <asm/smap.h> .text ENTRY(__get_user_1) @@ -40,8 +41,10 @@ ENTRY(__get_user_1) GET_THREAD_INFO(%_ASM_DX) cmp TI_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user + ASM_STAC 1: movzb (%_ASM_AX),%edx xor %eax,%eax + ASM_CLAC ret CFI_ENDPROC ENDPROC(__get_user_1) @@ -53,8 +56,10 @@ ENTRY(__get_user_2) GET_THREAD_INFO(%_ASM_DX) cmp TI_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user + ASM_STAC 2: movzwl -1(%_ASM_AX),%edx xor %eax,%eax + ASM_CLAC ret CFI_ENDPROC ENDPROC(__get_user_2) @@ -66,8 +71,10 @@ ENTRY(__get_user_4) GET_THREAD_INFO(%_ASM_DX) cmp TI_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user + ASM_STAC 3: mov -3(%_ASM_AX),%edx xor %eax,%eax + ASM_CLAC ret CFI_ENDPROC ENDPROC(__get_user_4) @@ -80,8 +87,10 @@ ENTRY(__get_user_8) GET_THREAD_INFO(%_ASM_DX) cmp TI_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user + ASM_STAC 4: movq -7(%_ASM_AX),%_ASM_DX xor %eax,%eax + ASM_CLAC ret CFI_ENDPROC ENDPROC(__get_user_8) @@ -91,6 +100,7 @@ bad_get_user: CFI_STARTPROC xor %edx,%edx mov $(-EFAULT),%_ASM_AX + ASM_CLAC ret CFI_ENDPROC END(bad_get_user) diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index b1e6c4b2e8eb..54fcffed28ed 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c @@ -18,7 +18,11 @@ * Copyright (C) IBM Corporation, 2002, 2004, 2009 */ +#ifdef __KERNEL__ #include <linux/string.h> +#else +#include <string.h> +#endif #include <asm/inat.h> #include <asm/insn.h> diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S index 7f951c8f76c4..fc6ba17a7eec 100644 --- a/arch/x86/lib/putuser.S +++ b/arch/x86/lib/putuser.S @@ -15,6 +15,7 @@ #include <asm/thread_info.h> #include <asm/errno.h> #include <asm/asm.h> +#include <asm/smap.h> /* @@ -31,7 +32,8 @@ #define ENTER CFI_STARTPROC ; \ GET_THREAD_INFO(%_ASM_BX) -#define EXIT ret ; \ +#define EXIT ASM_CLAC ; \ + ret ; \ CFI_ENDPROC .text @@ -39,6 +41,7 @@ ENTRY(__put_user_1) ENTER cmp TI_addr_limit(%_ASM_BX),%_ASM_CX jae bad_put_user + ASM_STAC 1: movb %al,(%_ASM_CX) xor %eax,%eax EXIT @@ -50,6 +53,7 @@ ENTRY(__put_user_2) sub $1,%_ASM_BX cmp %_ASM_BX,%_ASM_CX jae bad_put_user + ASM_STAC 2: movw %ax,(%_ASM_CX) xor %eax,%eax EXIT @@ -61,6 +65,7 @@ ENTRY(__put_user_4) sub $3,%_ASM_BX cmp %_ASM_BX,%_ASM_CX jae bad_put_user + ASM_STAC 3: movl %eax,(%_ASM_CX) xor %eax,%eax EXIT @@ -72,6 +77,7 @@ ENTRY(__put_user_8) sub $7,%_ASM_BX cmp %_ASM_BX,%_ASM_CX jae bad_put_user + ASM_STAC 4: mov %_ASM_AX,(%_ASM_CX) #ifdef CONFIG_X86_32 5: movl %edx,4(%_ASM_CX) diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 1781b2f950e2..98f6d6b68f5a 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -42,10 +42,11 @@ do { \ int __d0; \ might_fault(); \ __asm__ __volatile__( \ + ASM_STAC "\n" \ "0: rep; stosl\n" \ " movl %2,%0\n" \ "1: rep; stosb\n" \ - "2:\n" \ + "2: " ASM_CLAC "\n" \ ".section .fixup,\"ax\"\n" \ "3: lea 0(%2,%0,4),%0\n" \ " jmp 2b\n" \ @@ -626,10 +627,12 @@ survive: return n; } #endif + stac(); if (movsl_is_ok(to, from, n)) __copy_user(to, from, n); else n = __copy_user_intel(to, from, n); + clac(); return n; } EXPORT_SYMBOL(__copy_to_user_ll); @@ -637,10 +640,12 @@ EXPORT_SYMBOL(__copy_to_user_ll); unsigned long __copy_from_user_ll(void *to, const void __user *from, unsigned long n) { + stac(); if (movsl_is_ok(to, from, n)) __copy_user_zeroing(to, from, n); else n = __copy_user_zeroing_intel(to, from, n); + clac(); return n; } EXPORT_SYMBOL(__copy_from_user_ll); @@ -648,11 +653,13 @@ EXPORT_SYMBOL(__copy_from_user_ll); unsigned long __copy_from_user_ll_nozero(void *to, const void __user *from, unsigned long n) { + stac(); if (movsl_is_ok(to, from, n)) __copy_user(to, from, n); else n = __copy_user_intel((void __user *)to, (const void *)from, n); + clac(); return n; } EXPORT_SYMBOL(__copy_from_user_ll_nozero); @@ -660,6 +667,7 @@ EXPORT_SYMBOL(__copy_from_user_ll_nozero); unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from, unsigned long n) { + stac(); #ifdef CONFIG_X86_INTEL_USERCOPY if (n > 64 && cpu_has_xmm2) n = __copy_user_zeroing_intel_nocache(to, from, n); @@ -668,6 +676,7 @@ unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from, #else __copy_user_zeroing(to, from, n); #endif + clac(); return n; } EXPORT_SYMBOL(__copy_from_user_ll_nocache); @@ -675,6 +684,7 @@ EXPORT_SYMBOL(__copy_from_user_ll_nocache); unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from, unsigned long n) { + stac(); #ifdef CONFIG_X86_INTEL_USERCOPY if (n > 64 && cpu_has_xmm2) n = __copy_user_intel_nocache(to, from, n); @@ -683,6 +693,7 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr #else __copy_user(to, from, n); #endif + clac(); return n; } EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero); diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index e5b130bc2d0e..05928aae911e 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -18,6 +18,7 @@ unsigned long __clear_user(void __user *addr, unsigned long size) might_fault(); /* no memory constraint because it doesn't change any memory gcc knows about */ + stac(); asm volatile( " testq %[size8],%[size8]\n" " jz 4f\n" @@ -40,6 +41,7 @@ unsigned long __clear_user(void __user *addr, unsigned long size) : [size8] "=&c"(size), [dst] "=&D" (__d0) : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr), [zero] "r" (0UL), [eight] "r" (8UL)); + clac(); return size; } EXPORT_SYMBOL(__clear_user); @@ -82,5 +84,6 @@ copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest) for (c = 0, zero_len = len; zerorest && zero_len; --zero_len) if (__put_user_nocheck(c, to++, sizeof(char))) break; + clac(); return len; } diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 76dcd9d8e0bc..a530b230e7d7 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -18,6 +18,7 @@ #include <asm/pgalloc.h> /* pgd_*(), ... */ #include <asm/kmemcheck.h> /* kmemcheck_*(), ... */ #include <asm/fixmap.h> /* VSYSCALL_START */ +#include <asm/rcu.h> /* exception_enter(), ... */ /* * Page fault error code bits: @@ -995,13 +996,24 @@ static int fault_in_kernel_space(unsigned long address) return address >= TASK_SIZE_MAX; } +static inline bool smap_violation(int error_code, struct pt_regs *regs) +{ + if (error_code & PF_USER) + return false; + + if (!user_mode_vm(regs) && (regs->flags & X86_EFLAGS_AC)) + return false; + + return true; +} + /* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate * routines. */ -dotraplinkage void __kprobes -do_page_fault(struct pt_regs *regs, unsigned long error_code) +static void __kprobes +__do_page_fault(struct pt_regs *regs, unsigned long error_code) { struct vm_area_struct *vma; struct task_struct *tsk; @@ -1088,6 +1100,13 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) if (unlikely(error_code & PF_RSVD)) pgtable_bad(regs, error_code, address); + if (static_cpu_has(X86_FEATURE_SMAP)) { + if (unlikely(smap_violation(error_code, regs))) { + bad_area_nosemaphore(regs, error_code, address); + return; + } + } + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); /* @@ -1209,3 +1228,11 @@ good_area: up_read(&mm->mmap_sem); } + +dotraplinkage void __kprobes +do_page_fault(struct pt_regs *regs, unsigned long error_code) +{ + exception_enter(regs); + __do_page_fault(regs, error_code); + exception_exit(regs); +} diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index e0e6990723e9..ab1f6a93b527 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -319,7 +319,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, */ int devmem_is_allowed(unsigned long pagenr) { - if (pagenr <= 256) + if (pagenr < 256) return 1; if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) return 0; diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 575d86f85ce4..11a58001b4ce 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -445,10 +445,10 @@ static inline void permanent_kmaps_init(pgd_t *pgd_base) } #endif /* CONFIG_HIGHMEM */ -void __init native_pagetable_setup_start(pgd_t *base) +void __init native_pagetable_init(void) { unsigned long pfn, va; - pgd_t *pgd; + pgd_t *pgd, *base = swapper_pg_dir; pud_t *pud; pmd_t *pmd; pte_t *pte; @@ -475,10 +475,7 @@ void __init native_pagetable_setup_start(pgd_t *base) pte_clear(NULL, va, pte); } paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT); -} - -void __init native_pagetable_setup_done(pgd_t *base) -{ + paging_init(); } /* @@ -493,7 +490,7 @@ void __init native_pagetable_setup_done(pgd_t *base) * If we're booting paravirtualized under a hypervisor, then there are * more options: we may already be running PAE, and the pagetable may * or may not be based in swapper_pg_dir. In any case, - * paravirt_pagetable_setup_start() will set up swapper_pg_dir + * paravirt_pagetable_init() will set up swapper_pg_dir * appropriately for the rest of the initialization to work. * * In general, pagetable_init() assumes that the pagetable may already @@ -712,7 +709,7 @@ static void __init test_wp_bit(void) "Checking if this processor honours the WP bit even in supervisor mode..."); /* Any page-aligned address will do, the test is non-destructive */ - __set_fixmap(FIX_WP_TEST, __pa(&swapper_pg_dir), PAGE_READONLY); + __set_fixmap(FIX_WP_TEST, __pa(&swapper_pg_dir), PAGE_KERNEL_RO); boot_cpu_data.wp_works_ok = do_test_wp_bit(); clear_fixmap(FIX_WP_TEST); diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 613cd83e8c0c..0777f042e400 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -98,6 +98,8 @@ static void flush_tlb_func(void *info) { struct flush_tlb_info *f = info; + inc_irq_stat(irq_tlb_count); + if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm)) return; @@ -320,7 +322,7 @@ static ssize_t tlbflush_write_file(struct file *file, if (kstrtos8(buf, 0, &shift)) return -EINVAL; - if (shift > 64) + if (shift < -1 || shift >= BITS_PER_LONG) return -EINVAL; tlb_flushall_shift = shift; diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 33643a8bcbbb..520d2bd0b9c5 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -280,6 +280,31 @@ void bpf_jit_compile(struct sk_filter *fp) } EMIT4(0x31, 0xd2, 0xf7, 0xf3); /* xor %edx,%edx; div %ebx */ break; + case BPF_S_ALU_MOD_X: /* A %= X; */ + seen |= SEEN_XREG; + EMIT2(0x85, 0xdb); /* test %ebx,%ebx */ + if (pc_ret0 > 0) { + /* addrs[pc_ret0 - 1] is start address of target + * (addrs[i] - 6) is the address following this jmp + * ("xor %edx,%edx; div %ebx;mov %edx,%eax" being 6 bytes long) + */ + EMIT_COND_JMP(X86_JE, addrs[pc_ret0 - 1] - + (addrs[i] - 6)); + } else { + EMIT_COND_JMP(X86_JNE, 2 + 5); + CLEAR_A(); + EMIT1_off32(0xe9, cleanup_addr - (addrs[i] - 6)); /* jmp .+off32 */ + } + EMIT2(0x31, 0xd2); /* xor %edx,%edx */ + EMIT2(0xf7, 0xf3); /* div %ebx */ + EMIT2(0x89, 0xd0); /* mov %edx,%eax */ + break; + case BPF_S_ALU_MOD_K: /* A %= K; */ + EMIT2(0x31, 0xd2); /* xor %edx,%edx */ + EMIT1(0xb9);EMIT(K, 4); /* mov imm32,%ecx */ + EMIT2(0xf7, 0xf1); /* div %ecx */ + EMIT2(0x89, 0xd0); /* mov %edx,%eax */ + break; case BPF_S_ALU_DIV_K: /* A = reciprocal_divide(A, K); */ EMIT3(0x48, 0x69, 0xc0); /* imul imm32,%rax,%rax */ EMIT(K, 4); @@ -310,9 +335,18 @@ void bpf_jit_compile(struct sk_filter *fp) EMIT1_off32(0x0d, K); /* or imm32,%eax */ break; case BPF_S_ANC_ALU_XOR_X: /* A ^= X; */ + case BPF_S_ALU_XOR_X: seen |= SEEN_XREG; EMIT2(0x31, 0xd8); /* xor %ebx,%eax */ break; + case BPF_S_ALU_XOR_K: /* A ^= K; */ + if (K == 0) + break; + if (is_imm8(K)) + EMIT3(0x83, 0xf0, K); /* xor imm8,%eax */ + else + EMIT1_off32(0x35, K); /* xor imm32,%eax */ + break; case BPF_S_ALU_LSH_X: /* A <<= X; */ seen |= SEEN_XREG; EMIT4(0x89, 0xd9, 0xd3, 0xe0); /* mov %ebx,%ecx; shl %cl,%eax */ diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 505acdd6d600..192397c98606 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -305,7 +305,6 @@ setup_resource(struct acpi_resource *acpi_res, void *data) res->flags = flags; res->start = start; res->end = end; - res->child = NULL; if (!pci_use_crs) { dev_printk(KERN_DEBUG, &info->bridge->dev, @@ -434,7 +433,7 @@ probe_pci_root_info(struct pci_root_info *info, struct acpi_device *device, size = sizeof(*info->res) * info->res_num; info->res_num = 0; - info->res = kmalloc(size, GFP_KERNEL); + info->res = kzalloc(size, GFP_KERNEL); if (!info->res) return; diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 937bcece7006..704b9ec043d7 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -585,7 +585,7 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header) while (i >= sizeof(struct acpi_mcfg_allocation)) { entries++; i -= sizeof(struct acpi_mcfg_allocation); - }; + } if (entries == 0) { pr_err(PREFIX "MMCONFIG has no entries\n"); return -ENODEV; diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c index 6f2f8eeed171..3e6d2a6db866 100644 --- a/arch/x86/pci/visws.c +++ b/arch/x86/pci/visws.c @@ -62,11 +62,6 @@ out: return irq; } -void __init pcibios_update_irq(struct pci_dev *dev, int irq) -{ - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); -} - int __init pci_visws_init(void) { pcibios_enable_irq = &pci_visws_enable_irq; diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile index 73b8be0f3675..6db1cc4c7534 100644 --- a/arch/x86/platform/efi/Makefile +++ b/arch/x86/platform/efi/Makefile @@ -1 +1,2 @@ obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o +obj-$(CONFIG_ACPI_BGRT) += efi-bgrt.o diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c new file mode 100644 index 000000000000..f6a0c1b8e518 --- /dev/null +++ b/arch/x86/platform/efi/efi-bgrt.c @@ -0,0 +1,76 @@ +/* + * Copyright 2012 Intel Corporation + * Author: Josh Triplett <josh@joshtriplett.org> + * + * Based on the bgrt driver: + * Copyright 2012 Red Hat, Inc <mjg@redhat.com> + * Author: Matthew Garrett + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/kernel.h> +#include <linux/acpi.h> +#include <linux/efi.h> +#include <linux/efi-bgrt.h> + +struct acpi_table_bgrt *bgrt_tab; +void *bgrt_image; +size_t bgrt_image_size; + +struct bmp_header { + u16 id; + u32 size; +} __packed; + +void efi_bgrt_init(void) +{ + acpi_status status; + void __iomem *image; + bool ioremapped = false; + struct bmp_header bmp_header; + + if (acpi_disabled) + return; + + status = acpi_get_table("BGRT", 0, + (struct acpi_table_header **)&bgrt_tab); + if (ACPI_FAILURE(status)) + return; + + if (bgrt_tab->version != 1) + return; + if (bgrt_tab->image_type != 0 || !bgrt_tab->image_address) + return; + + image = efi_lookup_mapped_addr(bgrt_tab->image_address); + if (!image) { + image = ioremap(bgrt_tab->image_address, sizeof(bmp_header)); + ioremapped = true; + if (!image) + return; + } + + memcpy_fromio(&bmp_header, image, sizeof(bmp_header)); + if (ioremapped) + iounmap(image); + bgrt_image_size = bmp_header.size; + + bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL); + if (!bgrt_image) + return; + + if (ioremapped) { + image = ioremap(bgrt_tab->image_address, bmp_header.size); + if (!image) { + kfree(bgrt_image); + bgrt_image = NULL; + return; + } + } + + memcpy_fromio(bgrt_image, image, bgrt_image_size); + if (ioremapped) + iounmap(image); +} diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 92660edaa1e7..aded2a91162a 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -31,6 +31,7 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/efi.h> +#include <linux/efi-bgrt.h> #include <linux/export.h> #include <linux/bootmem.h> #include <linux/memblock.h> @@ -419,10 +420,21 @@ void __init efi_reserve_boot_services(void) } } -static void __init efi_free_boot_services(void) +static void __init efi_unmap_memmap(void) +{ + if (memmap.map) { + early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size); + memmap.map = NULL; + } +} + +void __init efi_free_boot_services(void) { void *p; + if (!efi_native) + return; + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { efi_memory_desc_t *md = p; unsigned long long start = md->phys_addr; @@ -438,6 +450,8 @@ static void __init efi_free_boot_services(void) free_bootmem_late(start, size); } + + efi_unmap_memmap(); } static int __init efi_systab_init(void *phys) @@ -732,6 +746,11 @@ void __init efi_init(void) #endif } +void __init efi_late_init(void) +{ + efi_bgrt_init(); +} + void __init efi_set_executable(efi_memory_desc_t *md, bool executable) { u64 addr, npages; @@ -764,6 +783,34 @@ static void __init runtime_code_page_mkexec(void) } /* + * We can't ioremap data in EFI boot services RAM, because we've already mapped + * it as RAM. So, look it up in the existing EFI memory map instead. Only + * callable after efi_enter_virtual_mode and before efi_free_boot_services. + */ +void __iomem *efi_lookup_mapped_addr(u64 phys_addr) +{ + void *p; + if (WARN_ON(!memmap.map)) + return NULL; + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + efi_memory_desc_t *md = p; + u64 size = md->num_pages << EFI_PAGE_SHIFT; + u64 end = md->phys_addr + size; + if (!(md->attribute & EFI_MEMORY_RUNTIME) && + md->type != EFI_BOOT_SERVICES_CODE && + md->type != EFI_BOOT_SERVICES_DATA) + continue; + if (!md->virt_addr) + continue; + if (phys_addr >= md->phys_addr && phys_addr < end) { + phys_addr += md->virt_addr - md->phys_addr; + return (__force void __iomem *)(unsigned long)phys_addr; + } + } + return NULL; +} + +/* * This function will switch the EFI runtime services to virtual mode. * Essentially, look through the EFI memmap and map every region that * has the runtime attribute bit set in its memory descriptor and update @@ -787,8 +834,10 @@ void __init efi_enter_virtual_mode(void) * non-native EFI */ - if (!efi_native) - goto out; + if (!efi_native) { + efi_unmap_memmap(); + return; + } /* Merge contiguous regions of the same type and attribute */ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { @@ -878,18 +927,12 @@ void __init efi_enter_virtual_mode(void) } /* - * Thankfully, it does seem that no runtime services other than - * SetVirtualAddressMap() will touch boot services code, so we can - * get rid of it all at this point - */ - efi_free_boot_services(); - - /* * Now that EFI is in virtual mode, update the function * pointers in the runtime service table to the new virtual addresses. * * Call EFI services through wrapper functions. */ + efi.runtime_version = efi_systab.fw_revision; efi.get_time = virt_efi_get_time; efi.set_time = virt_efi_set_time; efi.get_wakeup_time = virt_efi_get_wakeup_time; @@ -906,9 +949,6 @@ void __init efi_enter_virtual_mode(void) if (__supported_pte_mask & _PAGE_NX) runtime_code_page_mkexec(); -out: - early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size); - memmap.map = NULL; kfree(new_memmap); } diff --git a/arch/x86/realmode/rm/wakeup.h b/arch/x86/realmode/rm/wakeup.h index 9317e0042f24..7dd86a419f5d 100644 --- a/arch/x86/realmode/rm/wakeup.h +++ b/arch/x86/realmode/rm/wakeup.h @@ -36,5 +36,7 @@ extern struct wakeup_header wakeup_header; /* Wakeup behavior bits */ #define WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE 0 +#define WAKEUP_BEHAVIOR_RESTORE_CR4 1 +#define WAKEUP_BEHAVIOR_RESTORE_EFER 2 #endif /* ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H */ diff --git a/arch/x86/realmode/rm/wakeup_asm.S b/arch/x86/realmode/rm/wakeup_asm.S index 8905166b0bbb..e56479e58053 100644 --- a/arch/x86/realmode/rm/wakeup_asm.S +++ b/arch/x86/realmode/rm/wakeup_asm.S @@ -74,9 +74,18 @@ ENTRY(wakeup_start) lidtl wakeup_idt - /* Clear the EFLAGS */ - pushl $0 + /* Clear the EFLAGS but remember if we have EFLAGS.ID */ + movl $X86_EFLAGS_ID, %ecx + pushl %ecx popfl + pushfl + popl %edi + pushl $0 + popfl + pushfl + popl %edx + xorl %edx, %edi + andl %ecx, %edi /* %edi is zero iff CPUID & %cr4 are missing */ /* Check header signature... */ movl signature, %eax @@ -93,8 +102,8 @@ ENTRY(wakeup_start) /* Restore MISC_ENABLE before entering protected mode, in case BIOS decided to clear XD_DISABLE during S3. */ - movl pmode_behavior, %eax - btl $WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE, %eax + movl pmode_behavior, %edi + btl $WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE, %edi jnc 1f movl pmode_misc_en, %eax @@ -110,15 +119,15 @@ ENTRY(wakeup_start) movl pmode_cr3, %eax movl %eax, %cr3 - movl pmode_cr4, %ecx - jecxz 1f - movl %ecx, %cr4 + btl $WAKEUP_BEHAVIOR_RESTORE_CR4, %edi + jz 1f + movl pmode_cr4, %eax + movl %eax, %cr4 1: + btl $WAKEUP_BEHAVIOR_RESTORE_EFER, %edi + jz 1f movl pmode_efer, %eax movl pmode_efer + 4, %edx - movl %eax, %ecx - orl %edx, %ecx - jz 1f movl $MSR_EFER, %ecx wrmsr 1: diff --git a/arch/x86/syscalls/Makefile b/arch/x86/syscalls/Makefile index 3236aebc828d..f325af26107c 100644 --- a/arch/x86/syscalls/Makefile +++ b/arch/x86/syscalls/Makefile @@ -1,7 +1,9 @@ out := $(obj)/../include/generated/asm +uapi := $(obj)/../include/generated/uapi/asm # Create output directory if not already present -_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') +_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') \ + $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)') syscall32 := $(srctree)/$(src)/syscall_32.tbl syscall64 := $(srctree)/$(src)/syscall_64.tbl @@ -18,7 +20,7 @@ quiet_cmd_systbl = SYSTBL $@ cmd_systbl = $(CONFIG_SHELL) '$(systbl)' $< $@ syshdr_abi_unistd_32 := i386 -$(out)/unistd_32.h: $(syscall32) $(syshdr) +$(uapi)/unistd_32.h: $(syscall32) $(syshdr) $(call if_changed,syshdr) syshdr_abi_unistd_32_ia32 := i386 @@ -28,11 +30,11 @@ $(out)/unistd_32_ia32.h: $(syscall32) $(syshdr) syshdr_abi_unistd_x32 := common,x32 syshdr_offset_unistd_x32 := __X32_SYSCALL_BIT -$(out)/unistd_x32.h: $(syscall64) $(syshdr) +$(uapi)/unistd_x32.h: $(syscall64) $(syshdr) $(call if_changed,syshdr) syshdr_abi_unistd_64 := common,64 -$(out)/unistd_64.h: $(syscall64) $(syshdr) +$(uapi)/unistd_64.h: $(syscall64) $(syshdr) $(call if_changed,syshdr) syshdr_abi_unistd_64_x32 := x32 @@ -45,11 +47,12 @@ $(out)/syscalls_32.h: $(syscall32) $(systbl) $(out)/syscalls_64.h: $(syscall64) $(systbl) $(call if_changed,systbl) -syshdr-y += unistd_32.h unistd_64.h unistd_x32.h +uapisyshdr-y += unistd_32.h unistd_64.h unistd_x32.h syshdr-y += syscalls_32.h syshdr-$(CONFIG_X86_64) += unistd_32_ia32.h unistd_64_x32.h syshdr-$(CONFIG_X86_64) += syscalls_64.h -targets += $(syshdr-y) +targets += $(uapisyshdr-y) $(syshdr-y) -all: $(addprefix $(out)/,$(targets)) +all: $(addprefix $(uapi)/,$(uapisyshdr-y)) +all: $(addprefix $(out)/,$(syshdr-y)) diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile index 733057b435b0..bae601f900ef 100644 --- a/arch/x86/tools/Makefile +++ b/arch/x86/tools/Makefile @@ -28,7 +28,7 @@ posttest: $(obj)/test_get_len vmlinux $(obj)/insn_sanity hostprogs-y += test_get_len insn_sanity # -I needed for generated C source and C source which in the kernel tree. -HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/ +HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/uapi/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/uapi/ HOSTCFLAGS_insn_sanity.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/ diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig index 9926e11a772d..aeaff8bef2f1 100644 --- a/arch/x86/um/Kconfig +++ b/arch/x86/um/Kconfig @@ -21,6 +21,7 @@ config 64BIT config X86_32 def_bool !64BIT select HAVE_AOUT + select ARCH_WANT_IPC_PARSE_VERSION config X86_64 def_bool 64BIT diff --git a/arch/x86/um/shared/sysdep/kernel-offsets.h b/arch/x86/um/shared/sysdep/kernel-offsets.h index 5868526b5eef..46a9df99f3c5 100644 --- a/arch/x86/um/shared/sysdep/kernel-offsets.h +++ b/arch/x86/um/shared/sysdep/kernel-offsets.h @@ -7,9 +7,6 @@ #define DEFINE(sym, val) \ asm volatile("\n->" #sym " %0 " #val : : "i" (val)) -#define STR(x) #x -#define DEFINE_STR(sym, val) asm volatile("\n->" #sym " " STR(val) " " #val: : ) - #define BLANK() asm volatile("\n->" : : ) #define OFFSET(sym, str, mem) \ diff --git a/arch/x86/um/shared/sysdep/syscalls.h b/arch/x86/um/shared/sysdep/syscalls.h index bd9a89b67e41..ca255a805ed9 100644 --- a/arch/x86/um/shared/sysdep/syscalls.h +++ b/arch/x86/um/shared/sysdep/syscalls.h @@ -1,3 +1,5 @@ +extern long sys_clone(unsigned long clone_flags, unsigned long newsp, + void __user *parent_tid, void __user *child_tid); #ifdef __i386__ #include "syscalls_32.h" #else diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c index a508cea13503..ba7363ecf896 100644 --- a/arch/x86/um/signal.c +++ b/arch/x86/um/signal.c @@ -416,9 +416,6 @@ int setup_signal_stack_sc(unsigned long stack_top, int sig, PT_REGS_AX(regs) = (unsigned long) sig; PT_REGS_DX(regs) = (unsigned long) 0; PT_REGS_CX(regs) = (unsigned long) 0; - - if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED)) - ptrace_notify(SIGTRAP); return 0; } @@ -466,9 +463,6 @@ int setup_signal_stack_si(unsigned long stack_top, int sig, PT_REGS_AX(regs) = (unsigned long) sig; PT_REGS_DX(regs) = (unsigned long) &frame->info; PT_REGS_CX(regs) = (unsigned long) &frame->uc; - - if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED)) - ptrace_notify(SIGTRAP); return 0; } diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c index 68d1dc91b37b..b5408cecac6c 100644 --- a/arch/x86/um/sys_call_table_32.c +++ b/arch/x86/um/sys_call_table_32.c @@ -28,7 +28,7 @@ #define ptregs_execve sys_execve #define ptregs_iopl sys_iopl #define ptregs_vm86old sys_vm86old -#define ptregs_clone sys_clone +#define ptregs_clone i386_clone #define ptregs_vm86 sys_vm86 #define ptregs_sigaltstack sys_sigaltstack #define ptregs_vfork sys_vfork diff --git a/arch/x86/um/syscalls_32.c b/arch/x86/um/syscalls_32.c index b853e8600b9d..db444c7218fe 100644 --- a/arch/x86/um/syscalls_32.c +++ b/arch/x86/um/syscalls_32.c @@ -3,37 +3,24 @@ * Licensed under the GPL */ -#include "linux/sched.h" -#include "linux/shm.h" -#include "linux/ipc.h" -#include "linux/syscalls.h" -#include "asm/mman.h" -#include "asm/uaccess.h" -#include "asm/unistd.h" +#include <linux/syscalls.h> +#include <sysdep/syscalls.h> /* * The prototype on i386 is: * - * int clone(int flags, void * child_stack, int * parent_tidptr, struct user_desc * newtls, int * child_tidptr) + * int clone(int flags, void * child_stack, int * parent_tidptr, struct user_desc * newtls * * and the "newtls" arg. on i386 is read by copy_thread directly from the * register saved on the stack. */ -long sys_clone(unsigned long clone_flags, unsigned long newsp, - int __user *parent_tid, void *newtls, int __user *child_tid) +long i386_clone(unsigned long clone_flags, unsigned long newsp, + int __user *parent_tid, void *newtls, int __user *child_tid) { - long ret; - - if (!newsp) - newsp = UPT_SP(¤t->thread.regs.regs); - - current->thread.forking = 1; - ret = do_fork(clone_flags, newsp, ¤t->thread.regs, 0, parent_tid, - child_tid); - current->thread.forking = 0; - return ret; + return sys_clone(clone_flags, newsp, parent_tid, child_tid); } + long sys_sigaction(int sig, const struct old_sigaction __user *act, struct old_sigaction __user *oact) { diff --git a/arch/x86/um/syscalls_64.c b/arch/x86/um/syscalls_64.c index f3d82bb6e15a..adb08eb5c22a 100644 --- a/arch/x86/um/syscalls_64.c +++ b/arch/x86/um/syscalls_64.c @@ -5,12 +5,9 @@ * Licensed under the GPL */ -#include "linux/linkage.h" -#include "linux/personality.h" -#include "linux/utsname.h" -#include "asm/prctl.h" /* XXX This should get the constants from libc */ -#include "asm/uaccess.h" -#include "os.h" +#include <linux/sched.h> +#include <asm/prctl.h> /* XXX This should get the constants from libc */ +#include <os.h> long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr) { @@ -79,20 +76,6 @@ long sys_arch_prctl(int code, unsigned long addr) return arch_prctl(current, code, (unsigned long __user *) addr); } -long sys_clone(unsigned long clone_flags, unsigned long newsp, - void __user *parent_tid, void __user *child_tid) -{ - long ret; - - if (!newsp) - newsp = UPT_SP(¤t->thread.regs.regs); - current->thread.forking = 1; - ret = do_fork(clone_flags, newsp, ¤t->thread.regs, 0, parent_tid, - child_tid); - current->thread.forking = 0; - return ret; -} - void arch_switch_to(struct task_struct *to) { if ((to->thread.arch.fs == 0) || (to->mm == NULL)) diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c index ec57bd3818a4..7005ced5d1ad 100644 --- a/arch/x86/xen/apic.c +++ b/arch/x86/xen/apic.c @@ -6,8 +6,9 @@ #include <xen/xen.h> #include <xen/interface/physdev.h> +#include "xen-ops.h" -unsigned int xen_io_apic_read(unsigned apic, unsigned reg) +static unsigned int xen_io_apic_read(unsigned apic, unsigned reg) { struct physdev_apic apic_op; int ret; diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 9642d4a38602..2d932c351f91 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -80,6 +80,8 @@ #include "smp.h" #include "multicalls.h" +#include <xen/events.h> + EXPORT_SYMBOL_GPL(hypercall_page); DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); @@ -1288,7 +1290,6 @@ asmlinkage void __init xen_start_kernel(void) { struct physdev_set_iopl set_iopl; int rc; - pgd_t *pgd; if (!xen_start_info) return; @@ -1380,8 +1381,6 @@ asmlinkage void __init xen_start_kernel(void) acpi_numa = -1; #endif - pgd = (pgd_t *)xen_start_info->pt_base; - /* Don't do the full vcpu_info placement stuff until we have a possible map and a non-dummy shared_info. */ per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; @@ -1390,7 +1389,7 @@ asmlinkage void __init xen_start_kernel(void) early_boot_irqs_disabled = true; xen_raw_console_write("mapping kernel into physical memory\n"); - pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); + xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base, xen_start_info->nr_pages); /* Allocate and initialize top and mid mfn levels for p2m structure */ xen_build_mfn_list_list(); @@ -1441,17 +1440,29 @@ asmlinkage void __init xen_start_kernel(void) const struct dom0_vga_console_info *info = (void *)((char *)xen_start_info + xen_start_info->console.dom0.info_off); + struct xen_platform_op op = { + .cmd = XENPF_firmware_info, + .interface_version = XENPF_INTERFACE_VERSION, + .u.firmware_info.type = XEN_FW_KBD_SHIFT_FLAGS, + }; xen_init_vga(info, xen_start_info->console.dom0.info_size); xen_start_info->console.domU.mfn = 0; xen_start_info->console.domU.evtchn = 0; + if (HYPERVISOR_dom0_op(&op) == 0) + boot_params.kbd_status = op.u.firmware_info.u.kbd_shift_flags; + xen_init_apic(); /* Make sure ACS will be enabled */ pci_request_acs(); xen_acpi_sleep_register(); + + /* Avoid searching for BIOS MP tables */ + x86_init.mpparse.find_smp_config = x86_init_noop; + x86_init.mpparse.get_smp_config = x86_init_uint_noop; } #ifdef CONFIG_PCI /* PCI BIOS service won't work from a PV guest. */ diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index b65a76133f4f..5a16824cc2b3 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -84,6 +84,7 @@ */ DEFINE_SPINLOCK(xen_reservation_lock); +#ifdef CONFIG_X86_32 /* * Identity map, in addition to plain kernel map. This needs to be * large enough to allocate page table pages to allocate the rest. @@ -91,7 +92,7 @@ DEFINE_SPINLOCK(xen_reservation_lock); */ #define LEVEL1_IDENT_ENTRIES (PTRS_PER_PTE * 4) static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES); - +#endif #ifdef CONFIG_X86_64 /* l3 pud for userspace vsyscall mapping */ static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; @@ -1174,9 +1175,7 @@ static void xen_exit_mmap(struct mm_struct *mm) spin_unlock(&mm->page_table_lock); } -static void __init xen_pagetable_setup_start(pgd_t *base) -{ -} +static void xen_post_allocator_init(void); static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) { @@ -1192,14 +1191,87 @@ static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) } } -static void xen_post_allocator_init(void); +#ifdef CONFIG_X86_64 +static void __init xen_cleanhighmap(unsigned long vaddr, + unsigned long vaddr_end) +{ + unsigned long kernel_end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1; + pmd_t *pmd = level2_kernel_pgt + pmd_index(vaddr); -static void __init xen_pagetable_setup_done(pgd_t *base) + /* NOTE: The loop is more greedy than the cleanup_highmap variant. + * We include the PMD passed in on _both_ boundaries. */ + for (; vaddr <= vaddr_end && (pmd < (level2_kernel_pgt + PAGE_SIZE)); + pmd++, vaddr += PMD_SIZE) { + if (pmd_none(*pmd)) + continue; + if (vaddr < (unsigned long) _text || vaddr > kernel_end) + set_pmd(pmd, __pmd(0)); + } + /* In case we did something silly, we should crash in this function + * instead of somewhere later and be confusing. */ + xen_mc_flush(); +} +#endif +static void __init xen_pagetable_init(void) { +#ifdef CONFIG_X86_64 + unsigned long size; + unsigned long addr; +#endif + paging_init(); xen_setup_shared_info(); +#ifdef CONFIG_X86_64 + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + unsigned long new_mfn_list; + + size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); + + /* On 32-bit, we get zero so this never gets executed. */ + new_mfn_list = xen_revector_p2m_tree(); + if (new_mfn_list && new_mfn_list != xen_start_info->mfn_list) { + /* using __ka address and sticking INVALID_P2M_ENTRY! */ + memset((void *)xen_start_info->mfn_list, 0xff, size); + + /* We should be in __ka space. */ + BUG_ON(xen_start_info->mfn_list < __START_KERNEL_map); + addr = xen_start_info->mfn_list; + /* We roundup to the PMD, which means that if anybody at this stage is + * using the __ka address of xen_start_info or xen_start_info->shared_info + * they are in going to crash. Fortunatly we have already revectored + * in xen_setup_kernel_pagetable and in xen_setup_shared_info. */ + size = roundup(size, PMD_SIZE); + xen_cleanhighmap(addr, addr + size); + + size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); + memblock_free(__pa(xen_start_info->mfn_list), size); + /* And revector! Bye bye old array */ + xen_start_info->mfn_list = new_mfn_list; + } else + goto skip; + } + /* At this stage, cleanup_highmap has already cleaned __ka space + * from _brk_limit way up to the max_pfn_mapped (which is the end of + * the ramdisk). We continue on, erasing PMD entries that point to page + * tables - do note that they are accessible at this stage via __va. + * For good measure we also round up to the PMD - which means that if + * anybody is using __ka address to the initial boot-stack - and try + * to use it - they are going to crash. The xen_start_info has been + * taken care of already in xen_setup_kernel_pagetable. */ + addr = xen_start_info->pt_base; + size = roundup(xen_start_info->nr_pt_frames * PAGE_SIZE, PMD_SIZE); + + xen_cleanhighmap(addr, addr + size); + xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base)); +#ifdef DEBUG + /* This is superflous and is not neccessary, but you know what + * lets do it. The MODULES_VADDR -> MODULES_END should be clear of + * anything at this stage. */ + xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1); +#endif +skip: +#endif xen_post_allocator_init(); } - static void xen_write_cr2(unsigned long cr2) { this_cpu_read(xen_vcpu)->arch.cr2 = cr2; @@ -1283,7 +1355,7 @@ static void xen_flush_tlb_others(const struct cpumask *cpus, cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask)); args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; - if (start != TLB_FLUSH_ALL && (end - start) <= PAGE_SIZE) { + if (end != TLB_FLUSH_ALL && (end - start) <= PAGE_SIZE) { args->op.cmd = MMUEXT_INVLPG_MULTI; args->op.arg1.linear_addr = start; } @@ -1655,7 +1727,7 @@ static void set_page_prot(void *addr, pgprot_t prot) if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) BUG(); } - +#ifdef CONFIG_X86_32 static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) { unsigned pmdidx, pteidx; @@ -1706,7 +1778,7 @@ static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) set_page_prot(pmd, PAGE_KERNEL_RO); } - +#endif void __init xen_setup_machphys_mapping(void) { struct xen_machphys_mapping mapping; @@ -1734,7 +1806,20 @@ static void convert_pfn_mfn(void *v) for (i = 0; i < PTRS_PER_PTE; i++) pte[i] = xen_make_pte(pte[i].pte); } - +static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end, + unsigned long addr) +{ + if (*pt_base == PFN_DOWN(__pa(addr))) { + set_page_prot((void *)addr, PAGE_KERNEL); + clear_page((void *)addr); + (*pt_base)++; + } + if (*pt_end == PFN_DOWN(__pa(addr))) { + set_page_prot((void *)addr, PAGE_KERNEL); + clear_page((void *)addr); + (*pt_end)--; + } +} /* * Set up the initial kernel pagetable. * @@ -1746,11 +1831,13 @@ static void convert_pfn_mfn(void *v) * of the physical mapping once some sort of allocator has been set * up. */ -pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, - unsigned long max_pfn) +void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) { pud_t *l3; pmd_t *l2; + unsigned long addr[3]; + unsigned long pt_base, pt_end; + unsigned i; /* max_pfn_mapped is the last pfn mapped in the initial memory * mappings. Considering that on Xen after the kernel mappings we @@ -1758,32 +1845,53 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, * set max_pfn_mapped to the last real pfn mapped. */ max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list)); + pt_base = PFN_DOWN(__pa(xen_start_info->pt_base)); + pt_end = pt_base + xen_start_info->nr_pt_frames; + /* Zap identity mapping */ init_level4_pgt[0] = __pgd(0); /* Pre-constructed entries are in pfn, so convert to mfn */ + /* L4[272] -> level3_ident_pgt + * L4[511] -> level3_kernel_pgt */ convert_pfn_mfn(init_level4_pgt); + + /* L3_i[0] -> level2_ident_pgt */ convert_pfn_mfn(level3_ident_pgt); + /* L3_k[510] -> level2_kernel_pgt + * L3_i[511] -> level2_fixmap_pgt */ convert_pfn_mfn(level3_kernel_pgt); + /* We get [511][511] and have Xen's version of level2_kernel_pgt */ l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); - memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); - memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); - + addr[0] = (unsigned long)pgd; + addr[1] = (unsigned long)l3; + addr[2] = (unsigned long)l2; + /* Graft it onto L4[272][0]. Note that we creating an aliasing problem: + * Both L4[272][0] and L4[511][511] have entries that point to the same + * L2 (PMD) tables. Meaning that if you modify it in __va space + * it will be also modified in the __ka space! (But if you just + * modify the PMD table to point to other PTE's or none, then you + * are OK - which is what cleanup_highmap does) */ + copy_page(level2_ident_pgt, l2); + /* Graft it onto L4[511][511] */ + copy_page(level2_kernel_pgt, l2); + + /* Get [511][510] and graft that in level2_fixmap_pgt */ l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd); l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud); - memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); - - /* Set up identity map */ - xen_map_identity_early(level2_ident_pgt, max_pfn); + copy_page(level2_fixmap_pgt, l2); + /* Note that we don't do anything with level1_fixmap_pgt which + * we don't need. */ /* Make pagetable pieces RO */ set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); + set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); @@ -1794,22 +1902,28 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, /* Unpin Xen-provided one */ pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); - /* Switch over */ - pgd = init_level4_pgt; - /* * At this stage there can be no user pgd, and no page * structure to attach it to, so make sure we just set kernel * pgd. */ xen_mc_batch(); - __xen_write_cr3(true, __pa(pgd)); + __xen_write_cr3(true, __pa(init_level4_pgt)); xen_mc_issue(PARAVIRT_LAZY_CPU); - memblock_reserve(__pa(xen_start_info->pt_base), - xen_start_info->nr_pt_frames * PAGE_SIZE); + /* We can't that easily rip out L3 and L2, as the Xen pagetables are + * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for + * the initial domain. For guests using the toolstack, they are in: + * [L4], [L3], [L2], [L1], [L1], order .. So for dom0 we can only + * rip out the [L4] (pgd), but for guests we shave off three pages. + */ + for (i = 0; i < ARRAY_SIZE(addr); i++) + check_pt_base(&pt_base, &pt_end, addr[i]); - return pgd; + /* Our (by three pages) smaller Xen pagetable that we are using */ + memblock_reserve(PFN_PHYS(pt_base), (pt_end - pt_base) * PAGE_SIZE); + /* Revector the xen_start_info */ + xen_start_info = (struct start_info *)__va(__pa(xen_start_info)); } #else /* !CONFIG_X86_64 */ static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD); @@ -1834,8 +1948,7 @@ static void __init xen_write_cr3_init(unsigned long cr3) */ swapper_kernel_pmd = extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); - memcpy(swapper_kernel_pmd, initial_kernel_pmd, - sizeof(pmd_t) * PTRS_PER_PMD); + copy_page(swapper_kernel_pmd, initial_kernel_pmd); swapper_pg_dir[KERNEL_PGD_BOUNDARY] = __pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT); set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO); @@ -1852,8 +1965,7 @@ static void __init xen_write_cr3_init(unsigned long cr3) pv_mmu_ops.write_cr3 = &xen_write_cr3; } -pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, - unsigned long max_pfn) +void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) { pmd_t *kernel_pmd; @@ -1865,11 +1977,11 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, 512*1024); kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); - memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); + copy_page(initial_kernel_pmd, kernel_pmd); xen_map_identity_early(initial_kernel_pmd, max_pfn); - memcpy(initial_page_table, pgd, sizeof(pgd_t) * PTRS_PER_PGD); + copy_page(initial_page_table, pgd); initial_page_table[KERNEL_PGD_BOUNDARY] = __pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT); @@ -1885,8 +1997,6 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, memblock_reserve(__pa(xen_start_info->pt_base), xen_start_info->nr_pt_frames * PAGE_SIZE); - - return initial_page_table; } #endif /* CONFIG_X86_64 */ @@ -2068,8 +2178,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { void __init xen_init_mmu_ops(void) { x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve; - x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start; - x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done; + x86_init.paging.pagetable_init = xen_pagetable_init; pv_mmu_ops = xen_mmu_ops; memset(dummy_mapping, 0xff, PAGE_SIZE); @@ -2337,6 +2446,9 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, unsigned long range; int err = 0; + if (xen_feature(XENFEAT_auto_translated_physmap)) + return -EINVAL; + prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP); BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) == @@ -2355,8 +2467,8 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, if (err) goto out; - err = -EFAULT; - if (HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid) < 0) + err = HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid); + if (err < 0) goto out; nr -= batch; diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index d4b255463253..95fb2aa5927e 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -22,7 +22,7 @@ * * P2M_PER_PAGE depends on the architecture, as a mfn is always * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to - * 512 and 1024 entries respectively. + * 512 and 1024 entries respectively. * * In short, these structures contain the Machine Frame Number (MFN) of the PFN. * @@ -139,11 +139,11 @@ * / | ~0, ~0, .... | * | \---------------/ * | - * p2m_missing p2m_missing - * /------------------\ /------------\ - * | [p2m_mid_missing]+---->| ~0, ~0, ~0 | - * | [p2m_mid_missing]+---->| ..., ~0 | - * \------------------/ \------------/ + * p2m_mid_missing p2m_missing + * /-----------------\ /------------\ + * | [p2m_missing] +---->| ~0, ~0, ~0 | + * | [p2m_missing] +---->| ..., ~0 | + * \-----------------/ \------------/ * * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT) */ @@ -396,7 +396,85 @@ void __init xen_build_dynamic_phys_to_machine(void) m2p_override_init(); } +#ifdef CONFIG_X86_64 +#include <linux/bootmem.h> +unsigned long __init xen_revector_p2m_tree(void) +{ + unsigned long va_start; + unsigned long va_end; + unsigned long pfn; + unsigned long pfn_free = 0; + unsigned long *mfn_list = NULL; + unsigned long size; + + va_start = xen_start_info->mfn_list; + /*We copy in increments of P2M_PER_PAGE * sizeof(unsigned long), + * so make sure it is rounded up to that */ + size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); + va_end = va_start + size; + + /* If we were revectored already, don't do it again. */ + if (va_start <= __START_KERNEL_map && va_start >= __PAGE_OFFSET) + return 0; + + mfn_list = alloc_bootmem_align(size, PAGE_SIZE); + if (!mfn_list) { + pr_warn("Could not allocate space for a new P2M tree!\n"); + return xen_start_info->mfn_list; + } + /* Fill it out with INVALID_P2M_ENTRY value */ + memset(mfn_list, 0xFF, size); + + for (pfn = 0; pfn < ALIGN(MAX_DOMAIN_PAGES, P2M_PER_PAGE); pfn += P2M_PER_PAGE) { + unsigned topidx = p2m_top_index(pfn); + unsigned mididx; + unsigned long *mid_p; + + if (!p2m_top[topidx]) + continue; + if (p2m_top[topidx] == p2m_mid_missing) + continue; + + mididx = p2m_mid_index(pfn); + mid_p = p2m_top[topidx][mididx]; + if (!mid_p) + continue; + if ((mid_p == p2m_missing) || (mid_p == p2m_identity)) + continue; + + if ((unsigned long)mid_p == INVALID_P2M_ENTRY) + continue; + + /* The old va. Rebase it on mfn_list */ + if (mid_p >= (unsigned long *)va_start && mid_p <= (unsigned long *)va_end) { + unsigned long *new; + + if (pfn_free > (size / sizeof(unsigned long))) { + WARN(1, "Only allocated for %ld pages, but we want %ld!\n", + size / sizeof(unsigned long), pfn_free); + return 0; + } + new = &mfn_list[pfn_free]; + + copy_page(new, mid_p); + p2m_top[topidx][mididx] = &mfn_list[pfn_free]; + p2m_top_mfn_p[topidx][mididx] = virt_to_mfn(&mfn_list[pfn_free]); + + pfn_free += P2M_PER_PAGE; + + } + /* This should be the leafs allocated for identity from _brk. */ + } + return (unsigned long)mfn_list; + +} +#else +unsigned long __init xen_revector_p2m_tree(void) +{ + return 0; +} +#endif unsigned long get_phys_to_machine(unsigned long pfn) { unsigned topidx, mididx, idx; @@ -430,7 +508,7 @@ static void free_p2m_page(void *p) free_page((unsigned long)p); } -/* +/* * Fully allocate the p2m structure for a given pfn. We need to check * that both the top and mid levels are allocated, and make sure the * parallel mfn tree is kept in sync. We may race with other cpus, so @@ -599,7 +677,7 @@ bool __init early_can_reuse_p2m_middle(unsigned long set_pfn, unsigned long set_ if (p2m_index(set_pfn)) return false; - for (pfn = 0; pfn <= MAX_DOMAIN_PAGES; pfn += P2M_PER_PAGE) { + for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_PER_PAGE) { topidx = p2m_top_index(pfn); if (!p2m_top[topidx]) @@ -828,9 +906,6 @@ int m2p_add_override(unsigned long mfn, struct page *page, xen_mc_issue(PARAVIRT_LAZY_MMU); } - /* let's use dev_bus_addr to record the old mfn instead */ - kmap_op->dev_bus_addr = page->index; - page->index = (unsigned long) kmap_op; } spin_lock_irqsave(&m2p_override_lock, flags); list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]); @@ -857,7 +932,8 @@ int m2p_add_override(unsigned long mfn, struct page *page, return 0; } EXPORT_SYMBOL_GPL(m2p_add_override); -int m2p_remove_override(struct page *page, bool clear_pte) +int m2p_remove_override(struct page *page, + struct gnttab_map_grant_ref *kmap_op) { unsigned long flags; unsigned long mfn; @@ -887,10 +963,8 @@ int m2p_remove_override(struct page *page, bool clear_pte) WARN_ON(!PagePrivate(page)); ClearPagePrivate(page); - if (clear_pte) { - struct gnttab_map_grant_ref *map_op = - (struct gnttab_map_grant_ref *) page->index; - set_phys_to_machine(pfn, map_op->dev_bus_addr); + set_phys_to_machine(pfn, page->index); + if (kmap_op != NULL) { if (!PageHighMem(page)) { struct multicall_space mcs; struct gnttab_unmap_grant_ref *unmap_op; @@ -902,13 +976,13 @@ int m2p_remove_override(struct page *page, bool clear_pte) * issued. In this case handle is going to -1 because * it hasn't been modified yet. */ - if (map_op->handle == -1) + if (kmap_op->handle == -1) xen_mc_flush(); /* - * Now if map_op->handle is negative it means that the + * Now if kmap_op->handle is negative it means that the * hypercall actually returned an error. */ - if (map_op->handle == GNTST_general_error) { + if (kmap_op->handle == GNTST_general_error) { printk(KERN_WARNING "m2p_remove_override: " "pfn %lx mfn %lx, failed to modify kernel mappings", pfn, mfn); @@ -918,8 +992,8 @@ int m2p_remove_override(struct page *page, bool clear_pte) mcs = xen_mc_entry( sizeof(struct gnttab_unmap_grant_ref)); unmap_op = mcs.args; - unmap_op->host_addr = map_op->host_addr; - unmap_op->handle = map_op->handle; + unmap_op->host_addr = kmap_op->host_addr; + unmap_op->handle = kmap_op->handle; unmap_op->dev_bus_addr = 0; MULTI_grant_table_op(mcs.mc, @@ -930,10 +1004,9 @@ int m2p_remove_override(struct page *page, bool clear_pte) set_pte_at(&init_mm, address, ptep, pfn_pte(pfn, PAGE_KERNEL)); __flush_tlb_single(address); - map_op->host_addr = 0; + kmap_op->host_addr = 0; } - } else - set_phys_to_machine(pfn, page->index); + } /* p2m(m2p(mfn)) == FOREIGN_FRAME(mfn): the mfn is already present * somewhere in this domain, even before being added to the diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c index 967633ad98c4..969570491c39 100644 --- a/arch/x86/xen/pci-swiotlb-xen.c +++ b/arch/x86/xen/pci-swiotlb-xen.c @@ -8,6 +8,14 @@ #include <xen/xen.h> #include <asm/iommu_table.h> + +#include <asm/xen/swiotlb-xen.h> +#ifdef CONFIG_X86_64 +#include <asm/iommu.h> +#include <asm/dma.h> +#endif +#include <linux/export.h> + int xen_swiotlb __read_mostly; static struct dma_map_ops xen_swiotlb_dma_ops = { @@ -34,34 +42,64 @@ static struct dma_map_ops xen_swiotlb_dma_ops = { int __init pci_xen_swiotlb_detect(void) { + if (!xen_pv_domain()) + return 0; + /* If running as PV guest, either iommu=soft, or swiotlb=force will * activate this IOMMU. If running as PV privileged, activate it * irregardless. */ - if ((xen_initial_domain() || swiotlb || swiotlb_force) && - (xen_pv_domain())) + if ((xen_initial_domain() || swiotlb || swiotlb_force)) xen_swiotlb = 1; /* If we are running under Xen, we MUST disable the native SWIOTLB. * Don't worry about swiotlb_force flag activating the native, as * the 'swiotlb' flag is the only one turning it on. */ - if (xen_pv_domain()) - swiotlb = 0; + swiotlb = 0; +#ifdef CONFIG_X86_64 + /* pci_swiotlb_detect_4gb turns on native SWIOTLB if no_iommu == 0 + * (so no iommu=X command line over-writes). + * Considering that PV guests do not want the *native SWIOTLB* but + * only Xen SWIOTLB it is not useful to us so set no_iommu=1 here. + */ + if (max_pfn > MAX_DMA32_PFN) + no_iommu = 1; +#endif return xen_swiotlb; } void __init pci_xen_swiotlb_init(void) { if (xen_swiotlb) { - xen_swiotlb_init(1); + xen_swiotlb_init(1, true /* early */); dma_ops = &xen_swiotlb_dma_ops; /* Make sure ACS will be enabled */ pci_request_acs(); } } + +int pci_xen_swiotlb_init_late(void) +{ + int rc; + + if (xen_swiotlb) + return 0; + + rc = xen_swiotlb_init(1, false /* late */); + if (rc) + return rc; + + dma_ops = &xen_swiotlb_dma_ops; + /* Make sure ACS will be enabled */ + pci_request_acs(); + + return 0; +} +EXPORT_SYMBOL_GPL(pci_xen_swiotlb_init_late); + IOMMU_INIT_FINISH(pci_xen_swiotlb_detect, - 0, + NULL, pci_xen_swiotlb_init, - 0); + NULL); diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c index ffcf2615640b..0a7852483ffe 100644 --- a/arch/x86/xen/platform-pci-unplug.c +++ b/arch/x86/xen/platform-pci-unplug.c @@ -24,6 +24,7 @@ #include <linux/module.h> #include <xen/platform_pci.h> +#include "xen-ops.h" #define XEN_PLATFORM_ERR_MAGIC -1 #define XEN_PLATFORM_ERR_PROTOCOL -2 diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index d11ca11d14fc..8971a26d21ab 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -17,6 +17,7 @@ #include <asm/e820.h> #include <asm/setup.h> #include <asm/acpi.h> +#include <asm/numa.h> #include <asm/xen/hypervisor.h> #include <asm/xen/hypercall.h> @@ -431,6 +432,24 @@ char * __init xen_memory_setup(void) * - mfn_list * - xen_start_info * See comment above "struct start_info" in <xen/interface/xen.h> + * We tried to make the the memblock_reserve more selective so + * that it would be clear what region is reserved. Sadly we ran + * in the problem wherein on a 64-bit hypervisor with a 32-bit + * initial domain, the pt_base has the cr3 value which is not + * neccessarily where the pagetable starts! As Jan put it: " + * Actually, the adjustment turns out to be correct: The page + * tables for a 32-on-64 dom0 get allocated in the order "first L1", + * "first L2", "first L3", so the offset to the page table base is + * indeed 2. When reading xen/include/public/xen.h's comment + * very strictly, this is not a violation (since there nothing is said + * that the first thing in the page table space is pointed to by + * pt_base; I admit that this seems to be implied though, namely + * do I think that it is implied that the page table space is the + * range [pt_base, pt_base + nt_pt_frames), whereas that + * range here indeed is [pt_base - 2, pt_base - 2 + nt_pt_frames), + * which - without a priori knowledge - the kernel would have + * difficulty to figure out)." - so lets just fall back to the + * easy way and reserve the whole region. */ memblock_reserve(__pa(xen_start_info->mfn_list), xen_start_info->pt_base - xen_start_info->mfn_list); @@ -544,4 +563,7 @@ void __init xen_arch_setup(void) disable_cpufreq(); WARN_ON(set_pm_idle_to_default()); fiddle_vdso(); +#ifdef CONFIG_NUMA + numa_off = 1; +#endif } diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index f58dca7a6e52..353c50f18702 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -377,7 +377,8 @@ static int __cpuinit xen_cpu_up(unsigned int cpu, struct task_struct *idle) return rc; if (num_online_cpus() == 1) - alternatives_smp_switch(1); + /* Just in case we booted with a single CPU. */ + alternatives_enable_smp(); rc = xen_smp_intr_init(cpu); if (rc) @@ -424,9 +425,6 @@ static void xen_cpu_die(unsigned int cpu) unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL); xen_uninit_lock_cpu(cpu); xen_teardown_timer(cpu); - - if (num_online_cpus() == 1) - alternatives_smp_switch(0); } static void __cpuinit xen_play_dead(void) /* used only with HOTPLUG_CPU */ diff --git a/arch/x86/xen/vga.c b/arch/x86/xen/vga.c index 1cd7f4d11e29..6722e3733f02 100644 --- a/arch/x86/xen/vga.c +++ b/arch/x86/xen/vga.c @@ -35,6 +35,7 @@ void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size) info->u.text_mode_3.font_height; break; + case XEN_VGATYPE_EFI_LFB: case XEN_VGATYPE_VESA_LFB: if (size < offsetof(struct dom0_vga_console_info, u.vesa_lfb.gbl_caps)) @@ -54,6 +55,12 @@ void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size) screen_info->blue_pos = info->u.vesa_lfb.blue_pos; screen_info->rsvd_size = info->u.vesa_lfb.rsvd_size; screen_info->rsvd_pos = info->u.vesa_lfb.rsvd_pos; + + if (info->video_type == XEN_VGATYPE_EFI_LFB) { + screen_info->orig_video_isVGA = VIDEO_TYPE_EFI; + break; + } + if (size >= offsetof(struct dom0_vga_console_info, u.vesa_lfb.gbl_caps) + sizeof(info->u.vesa_lfb.gbl_caps)) diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index aaa7291c9259..7faed5869e5b 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -28,9 +28,61 @@ ENTRY(startup_xen) __FINIT .pushsection .text - .align PAGE_SIZE + .balign PAGE_SIZE ENTRY(hypercall_page) - .skip PAGE_SIZE +#define NEXT_HYPERCALL(x) \ + ENTRY(xen_hypercall_##x) \ + .skip 32 + +NEXT_HYPERCALL(set_trap_table) +NEXT_HYPERCALL(mmu_update) +NEXT_HYPERCALL(set_gdt) +NEXT_HYPERCALL(stack_switch) +NEXT_HYPERCALL(set_callbacks) +NEXT_HYPERCALL(fpu_taskswitch) +NEXT_HYPERCALL(sched_op_compat) +NEXT_HYPERCALL(platform_op) +NEXT_HYPERCALL(set_debugreg) +NEXT_HYPERCALL(get_debugreg) +NEXT_HYPERCALL(update_descriptor) +NEXT_HYPERCALL(ni) +NEXT_HYPERCALL(memory_op) +NEXT_HYPERCALL(multicall) +NEXT_HYPERCALL(update_va_mapping) +NEXT_HYPERCALL(set_timer_op) +NEXT_HYPERCALL(event_channel_op_compat) +NEXT_HYPERCALL(xen_version) +NEXT_HYPERCALL(console_io) +NEXT_HYPERCALL(physdev_op_compat) +NEXT_HYPERCALL(grant_table_op) +NEXT_HYPERCALL(vm_assist) +NEXT_HYPERCALL(update_va_mapping_otherdomain) +NEXT_HYPERCALL(iret) +NEXT_HYPERCALL(vcpu_op) +NEXT_HYPERCALL(set_segment_base) +NEXT_HYPERCALL(mmuext_op) +NEXT_HYPERCALL(xsm_op) +NEXT_HYPERCALL(nmi_op) +NEXT_HYPERCALL(sched_op) +NEXT_HYPERCALL(callback_op) +NEXT_HYPERCALL(xenoprof_op) +NEXT_HYPERCALL(event_channel_op) +NEXT_HYPERCALL(physdev_op) +NEXT_HYPERCALL(hvm_op) +NEXT_HYPERCALL(sysctl) +NEXT_HYPERCALL(domctl) +NEXT_HYPERCALL(kexec_op) +NEXT_HYPERCALL(tmem_op) /* 38 */ +ENTRY(xen_hypercall_rsvr) + .skip 320 +NEXT_HYPERCALL(mca) /* 48 */ +NEXT_HYPERCALL(arch_1) +NEXT_HYPERCALL(arch_2) +NEXT_HYPERCALL(arch_3) +NEXT_HYPERCALL(arch_4) +NEXT_HYPERCALL(arch_5) +NEXT_HYPERCALL(arch_6) + .balign PAGE_SIZE .popsection ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux") diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 202d4c150154..bb5a8105ea86 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -27,7 +27,7 @@ void xen_setup_mfn_list_list(void); void xen_setup_shared_info(void); void xen_build_mfn_list_list(void); void xen_setup_machphys_mapping(void); -pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); +void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); void xen_reserve_top(void); extern unsigned long xen_max_p2m_pfn; @@ -45,6 +45,7 @@ void xen_hvm_init_shared_info(void); void xen_unplug_emulated_devices(void); void __init xen_build_dynamic_phys_to_machine(void); +unsigned long __init xen_revector_p2m_tree(void); void xen_init_irq_ops(void); void xen_setup_timer(int cpu); |