From eedb92abb9bb03ef21442614a6f5867eaac6e77f Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 14 Feb 2018 14:16:50 +0300 Subject: x86/mm: Make virtual memory layout dynamic for CONFIG_X86_5LEVEL=y We need to be able to adjust virtual memory layout at runtime to be able to switch between 4- and 5-level paging at boot-time. KASLR already has movable __VMALLOC_BASE, __VMEMMAP_BASE and __PAGE_OFFSET. Let's re-use it. Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180214111656.88514-4-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86/Kconfig') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 63bf349b2b24..92256489b8a4 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1430,6 +1430,7 @@ config X86_PAE config X86_5LEVEL bool "Enable 5-level page tables support" + select DYNAMIC_MEMORY_LAYOUT depends on X86_64 ---help--- 5-level paging enables access to larger address space: @@ -2143,10 +2144,17 @@ config PHYSICAL_ALIGN Don't change this unless you know what you are doing. +config DYNAMIC_MEMORY_LAYOUT + bool + ---help--- + This option makes base addresses of vmalloc and vmemmap as well as + __PAGE_OFFSET movable during boot. + config RANDOMIZE_MEMORY bool "Randomize the kernel memory sections" depends on X86_64 depends on RANDOMIZE_BASE + select DYNAMIC_MEMORY_LAYOUT default RANDOMIZE_BASE ---help--- Randomizes the base virtual address of kernel memory sections -- cgit v1.2.3 From 162434e7f58b21f0b6c9cc5fb02222cd7d9064cc Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 14 Feb 2018 14:16:54 +0300 Subject: x86/mm: Make MAX_PHYSADDR_BITS and MAX_PHYSMEM_BITS dynamic For boot-time switching between paging modes, we need to be able to adjust size of physical address space at runtime. As part of making physical address space size variable, we have to make X86_5LEVEL dependent on SPARSEMEM_VMEMMAP. !SPARSEMEM_VMEMMAP configuration doesn't build with variable MAX_PHYSMEM_BITS. For !SPARSEMEM_VMEMMAP SECTIONS_WIDTH depends on MAX_PHYSMEM_BITS: SECTIONS_WIDTH SECTIONS_SHIFT MAX_PHYSMEM_BITS And SECTIONS_WIDTH is used on pre-processor stage, it doesn't work if it's dyncamic. See include/linux/page-flags-layout.h. Effect on kernel image size: text data bss dec hex filename 8628393 4734340 1368064 14730797 e0c62d vmlinux.before 8628892 4734340 1368064 14731296 e0c820 vmlinux.after Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180214111656.88514-8-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 + arch/x86/include/asm/pgtable_64_types.h | 2 +- arch/x86/include/asm/sparsemem.h | 9 ++------- arch/x86/kernel/setup.c | 5 ++--- 4 files changed, 6 insertions(+), 11 deletions(-) (limited to 'arch/x86/Kconfig') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 92256489b8a4..fcc3f88996b3 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1431,6 +1431,7 @@ config X86_PAE config X86_5LEVEL bool "Enable 5-level page tables support" select DYNAMIC_MEMORY_LAYOUT + select SPARSEMEM_VMEMMAP depends on X86_64 ---help--- 5-level paging enables access to larger address space: diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 0c48d80e11d4..59d971c85de5 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -95,7 +95,7 @@ extern unsigned int ptrs_per_p4d; * range must not overlap with anything except the KASAN shadow area, which * is correct as KASAN disables KASLR. */ -#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) +#define MAXMEM (1UL << MAX_PHYSMEM_BITS) #define LDT_PGD_ENTRY_L4 -3UL #define LDT_PGD_ENTRY_L5 -112UL diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h index 4fc1e9d3c43e..4617a2bf123c 100644 --- a/arch/x86/include/asm/sparsemem.h +++ b/arch/x86/include/asm/sparsemem.h @@ -27,13 +27,8 @@ # endif #else /* CONFIG_X86_32 */ # define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */ -# ifdef CONFIG_X86_5LEVEL -# define MAX_PHYSADDR_BITS 52 -# define MAX_PHYSMEM_BITS 52 -# else -# define MAX_PHYSADDR_BITS 44 -# define MAX_PHYSMEM_BITS 46 -# endif +# define MAX_PHYSADDR_BITS (pgtable_l5_enabled ? 52 : 44) +# define MAX_PHYSMEM_BITS (pgtable_l5_enabled ? 52 : 46) #endif #endif /* CONFIG_SPARSEMEM */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 1ae67e982af7..399d0f7fa8f1 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -189,9 +189,7 @@ struct ist_info ist_info; #endif #else -struct cpuinfo_x86 boot_cpu_data __read_mostly = { - .x86_phys_bits = MAX_PHYSMEM_BITS, -}; +struct cpuinfo_x86 boot_cpu_data __read_mostly; EXPORT_SYMBOL(boot_cpu_data); #endif @@ -851,6 +849,7 @@ void __init setup_arch(char **cmdline_p) __flush_tlb_all(); #else printk(KERN_INFO "Command line: %s\n", boot_command_line); + boot_cpu_data.x86_phys_bits = MAX_PHYSMEM_BITS; #endif /* -- cgit v1.2.3 From 6657fca06e3ffab8d0b3f9d8b397f5ee498952d7 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 14 Feb 2018 21:25:42 +0300 Subject: x86/mm: Allow to boot without LA57 if CONFIG_X86_5LEVEL=y All pieces of the puzzle are in place and we can now allow to boot with CONFIG_X86_5LEVEL=y on a machine without LA57 support. Kernel will detect that LA57 is missing and fold p4d at runtime. Update the documentation and the Kconfig option description to reflect the change. Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180214182542.69302-10-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- Documentation/x86/x86_64/5level-paging.txt | 9 +++------ arch/x86/Kconfig | 4 ++-- arch/x86/boot/compressed/misc.c | 16 ---------------- arch/x86/include/asm/required-features.h | 8 +------- 4 files changed, 6 insertions(+), 31 deletions(-) (limited to 'arch/x86/Kconfig') diff --git a/Documentation/x86/x86_64/5level-paging.txt b/Documentation/x86/x86_64/5level-paging.txt index 087251a0d99c..2432a5ef86d9 100644 --- a/Documentation/x86/x86_64/5level-paging.txt +++ b/Documentation/x86/x86_64/5level-paging.txt @@ -20,12 +20,9 @@ Documentation/x86/x86_64/mm.txt CONFIG_X86_5LEVEL=y enables the feature. -So far, a kernel compiled with the option enabled will be able to boot -only on machines that supports the feature -- see for 'la57' flag in -/proc/cpuinfo. - -The plan is to implement boot-time switching between 4- and 5-level paging -in the future. +Kernel with CONFIG_X86_5LEVEL=y still able to boot on 4-level hardware. +In this case additional page table level -- p4d -- will be folded at +runtime. == User-space and large virtual address space == diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index fcc3f88996b3..1c4f7b6a94f4 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1440,8 +1440,8 @@ config X86_5LEVEL It will be supported by future Intel CPUs. - Note: a kernel with this option enabled can only be booted - on machines that support the feature. + A kernel with the option enabled can be booted on machines that + support 4- or 5-level paging. See Documentation/x86/x86_64/5level-paging.txt for more information. diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 98761a1576ce..b50c42455e25 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -169,16 +169,6 @@ void __puthex(unsigned long value) } } -static bool l5_supported(void) -{ - /* Check if leaf 7 is supported. */ - if (native_cpuid_eax(0) < 7) - return 0; - - /* Check if la57 is supported. */ - return native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)); -} - #if CONFIG_X86_NEED_RELOCS static void handle_relocations(void *output, unsigned long output_len, unsigned long virt_addr) @@ -372,12 +362,6 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, console_init(); debug_putstr("early console in extract_kernel\n"); - if (IS_ENABLED(CONFIG_X86_5LEVEL) && !l5_supported()) { - error("This linux kernel as configured requires 5-level paging\n" - "This CPU does not support the required 'cr4.la57' feature\n" - "Unable to boot - please use a kernel appropriate for your CPU\n"); - } - free_mem_ptr = heap; /* Heap */ free_mem_end_ptr = heap + BOOT_HEAP_SIZE; diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index fb3a6de7440b..6847d85400a8 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h @@ -53,12 +53,6 @@ # define NEED_MOVBE 0 #endif -#ifdef CONFIG_X86_5LEVEL -# define NEED_LA57 (1<<(X86_FEATURE_LA57 & 31)) -#else -# define NEED_LA57 0 -#endif - #ifdef CONFIG_X86_64 #ifdef CONFIG_PARAVIRT /* Paravirtualized systems may not have PSE or PGE available */ @@ -104,7 +98,7 @@ #define REQUIRED_MASK13 0 #define REQUIRED_MASK14 0 #define REQUIRED_MASK15 0 -#define REQUIRED_MASK16 (NEED_LA57) +#define REQUIRED_MASK16 0 #define REQUIRED_MASK17 0 #define REQUIRED_MASK18 0 #define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) -- cgit v1.2.3 From 076ca272a14cea558b1092ec85cea08510283f2a Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 7 Mar 2018 11:12:27 -0800 Subject: x86/vsyscall/64: Drop "native" vsyscalls Since Linux v3.2, vsyscalls have been deprecated and slow. From v3.2 on, Linux had three vsyscall modes: "native", "emulate", and "none". "emulate" is the default. All known user programs work correctly in emulate mode, but vsyscalls turn into page faults and are emulated. This is very slow. In "native" mode, the vsyscall page is easily usable as an exploit gadget, but vsyscalls are a bit faster -- they turn into normal syscalls. (This is in contrast to vDSO functions, which can be much faster than syscalls.) In "none" mode, there are no vsyscalls. For all practical purposes, "native" was really just a chicken bit in case something went wrong with the emulation. It's been over six years, and nothing has gone wrong. Delete it. Signed-off-by: Andy Lutomirski Acked-by: Kees Cook Acked-by: Linus Torvalds Cc: Borislav Petkov Cc: Dominik Brodowski Cc: Kernel Hardening Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/519fee5268faea09ae550776ce969fa6e88668b0.1520449896.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 11 +---------- arch/x86/entry/vsyscall/vsyscall_64.c | 16 +++------------- arch/x86/include/asm/pgtable_types.h | 2 -- tools/testing/selftests/x86/test_vsyscall.c | 11 ++++++----- 4 files changed, 10 insertions(+), 30 deletions(-) (limited to 'arch/x86/Kconfig') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c1aed6c0e413..09c599e0900d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2266,7 +2266,7 @@ choice it can be used to assist security vulnerability exploitation. This setting can be changed at boot time via the kernel command - line parameter vsyscall=[native|emulate|none]. + line parameter vsyscall=[emulate|none]. On a system with recent enough glibc (2.14 or newer) and no static binaries, you can say None without a performance penalty @@ -2274,15 +2274,6 @@ choice If unsure, select "Emulate". - config LEGACY_VSYSCALL_NATIVE - bool "Native" - help - Actual executable code is located in the fixed vsyscall - address mapping, implementing time() efficiently. Since - this makes the mapping executable, it can be used during - security vulnerability exploitation (traditionally as - ROP gadgets). This configuration is not recommended. - config LEGACY_VSYSCALL_EMULATE bool "Emulate" help diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index 577fa8adb785..8560ef68a9d6 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -42,10 +42,8 @@ #define CREATE_TRACE_POINTS #include "vsyscall_trace.h" -static enum { EMULATE, NATIVE, NONE } vsyscall_mode = -#if defined(CONFIG_LEGACY_VSYSCALL_NATIVE) - NATIVE; -#elif defined(CONFIG_LEGACY_VSYSCALL_NONE) +static enum { EMULATE, NONE } vsyscall_mode = +#ifdef CONFIG_LEGACY_VSYSCALL_NONE NONE; #else EMULATE; @@ -56,8 +54,6 @@ static int __init vsyscall_setup(char *str) if (str) { if (!strcmp("emulate", str)) vsyscall_mode = EMULATE; - else if (!strcmp("native", str)) - vsyscall_mode = NATIVE; else if (!strcmp("none", str)) vsyscall_mode = NONE; else @@ -139,10 +135,6 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) WARN_ON_ONCE(address != regs->ip); - /* This should be unreachable in NATIVE mode. */ - if (WARN_ON(vsyscall_mode == NATIVE)) - return false; - if (vsyscall_mode == NONE) { warn_bad_vsyscall(KERN_INFO, regs, "vsyscall attempted with vsyscall=none"); @@ -370,9 +362,7 @@ void __init map_vsyscall(void) if (vsyscall_mode != NONE) { __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall, - vsyscall_mode == NATIVE - ? PAGE_KERNEL_VSYSCALL - : PAGE_KERNEL_VVAR); + PAGE_KERNEL_VVAR); set_vsyscall_pgtable_user_bits(swapper_pg_dir); } diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 246f15b4e64c..acfe755562a6 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -174,7 +174,6 @@ enum page_cache_mode { #define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW) #define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW) #define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_NOCACHE) -#define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER) #define __PAGE_KERNEL_VVAR (__PAGE_KERNEL_RO | _PAGE_USER) #define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) #define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) @@ -206,7 +205,6 @@ enum page_cache_mode { #define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC) #define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC) #define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC | _PAGE_ENC) -#define PAGE_KERNEL_VSYSCALL __pgprot(__PAGE_KERNEL_VSYSCALL | _PAGE_ENC) #define PAGE_KERNEL_VVAR __pgprot(__PAGE_KERNEL_VVAR | _PAGE_ENC) #define PAGE_KERNEL_IO __pgprot(__PAGE_KERNEL_IO) diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c index be81621446f0..0b4f1cc2291c 100644 --- a/tools/testing/selftests/x86/test_vsyscall.c +++ b/tools/testing/selftests/x86/test_vsyscall.c @@ -450,7 +450,7 @@ static void sigtrap(int sig, siginfo_t *info, void *ctx_void) num_vsyscall_traps++; } -static int test_native_vsyscall(void) +static int test_emulation(void) { time_t tmp; bool is_native; @@ -458,7 +458,7 @@ static int test_native_vsyscall(void) if (!vtime) return 0; - printf("[RUN]\tchecking for native vsyscall\n"); + printf("[RUN]\tchecking that vsyscalls are emulated\n"); sethandler(SIGTRAP, sigtrap, 0); set_eflags(get_eflags() | X86_EFLAGS_TF); vtime(&tmp); @@ -474,11 +474,12 @@ static int test_native_vsyscall(void) */ is_native = (num_vsyscall_traps > 1); - printf("\tvsyscalls are %s (%d instructions in vsyscall page)\n", + printf("[%s]\tvsyscalls are %s (%d instructions in vsyscall page)\n", + (is_native ? "FAIL" : "OK"), (is_native ? "native" : "emulated"), (int)num_vsyscall_traps); - return 0; + return is_native; } #endif @@ -498,7 +499,7 @@ int main(int argc, char **argv) nerrs += test_vsys_r(); #ifdef __x86_64__ - nerrs += test_native_vsyscall(); + nerrs += test_emulation(); #endif return nerrs ? 1 : 0; -- cgit v1.2.3 From 55027a7772b1a9c36fb9731fc6c847b6a59c5bef Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 7 Mar 2018 08:39:15 +0100 Subject: x86: Align x86_64 PCI_MMCONFIG with 32-bit variant Allow to enable PCI_MMCONFIG when only SFI is present and make this option default on. This will help consolidating both into one Kconfig statement. Signed-off-by: Jan Kiszka Signed-off-by: Thomas Gleixner Cc: jailhouse-dev@googlegroups.com Cc: linux-pci@vger.kernel.org Cc: virtualization@lists.linux-foundation.org Cc: Andy Shevchenko Cc: Bjorn Helgaas Link: https://lkml.kernel.org/r/a2faf78c54f340f5549149e8b679c95950dae83d.1520408357.git.jan.kiszka@siemens.com --- arch/x86/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/Kconfig') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c1236b187824..6bd763640925 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2658,7 +2658,8 @@ config PCI_DOMAINS config PCI_MMCONFIG bool "Support mmconfig PCI config space access" - depends on X86_64 && PCI && ACPI + default y + depends on X86_64 && PCI && (ACPI || SFI) config PCI_CNB20LE_QUIRK bool "Read CNB20LE Host Bridge Windows" if EXPERT -- cgit v1.2.3 From b45c9f3656b691ab7324c7eaefb3416bea0326d8 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 7 Mar 2018 08:39:16 +0100 Subject: x86: Consolidate PCI_MMCONFIG configs Since e279b6c1d329 ("x86: start unification of arch/x86/Kconfig.*"), there exist two PCI_MMCONFIG entries, one from the original i386 and another from x86_64. Consolidate both entries into a single one. Signed-off-by: Jan Kiszka Signed-off-by: Thomas Gleixner Cc: jailhouse-dev@googlegroups.com Cc: linux-pci@vger.kernel.org Cc: virtualization@lists.linux-foundation.org Cc: Andy Shevchenko Cc: Bjorn Helgaas Link: https://lkml.kernel.org/r/2a0ccd51ea6f7996e07162918228e23bdc1fbb03.1520408357.git.jan.kiszka@siemens.com --- arch/x86/Kconfig | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'arch/x86/Kconfig') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6bd763640925..573b7c49d9f8 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2640,8 +2640,10 @@ config PCI_DIRECT depends on PCI && (X86_64 || (PCI_GODIRECT || PCI_GOANY || PCI_GOOLPC || PCI_GOMMCONFIG)) config PCI_MMCONFIG - def_bool y - depends on X86_32 && PCI && (ACPI || SFI) && (PCI_GOMMCONFIG || PCI_GOANY) + bool "Support mmconfig PCI config space access" if X86_64 + default y + depends on PCI && (ACPI || SFI) + depends on X86_64 || (PCI_GOANY || PCI_GOMMCONFIG) config PCI_OLPC def_bool y @@ -2656,11 +2658,6 @@ config PCI_DOMAINS def_bool y depends on PCI -config PCI_MMCONFIG - bool "Support mmconfig PCI config space access" - default y - depends on X86_64 && PCI && (ACPI || SFI) - config PCI_CNB20LE_QUIRK bool "Read CNB20LE Host Bridge Windows" if EXPERT depends on PCI -- cgit v1.2.3 From 8364e1f8379c7f9d3e63f127a585f889906b3e10 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 7 Mar 2018 08:39:17 +0100 Subject: x86/jailhouse: Allow to use PCI_MMCONFIG without ACPI Jailhouse does not use ACPI, but it does support MMCONFIG. Make sure the latter can be built without having to enable ACPI as well. Primarily, its required to make the AMD mmconf-fam10h_64 depend upon MMCONFIG and ACPI, instead of just the former. Saves some bytes in the Jailhouse non-root kernel. Signed-off-by: Jan Kiszka Signed-off-by: Thomas Gleixner Cc: jailhouse-dev@googlegroups.com Cc: linux-pci@vger.kernel.org Cc: virtualization@lists.linux-foundation.org Cc: Andy Shevchenko Cc: Bjorn Helgaas Link: https://lkml.kernel.org/r/788bbd5325d1922235e9562c213057425fbc548c.1520408357.git.jan.kiszka@siemens.com --- arch/x86/Kconfig | 6 +++++- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/cpu/amd.c | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) (limited to 'arch/x86/Kconfig') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 573b7c49d9f8..7f00f60a2028 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2642,7 +2642,7 @@ config PCI_DIRECT config PCI_MMCONFIG bool "Support mmconfig PCI config space access" if X86_64 default y - depends on PCI && (ACPI || SFI) + depends on PCI && (ACPI || SFI || JAILHOUSE_GUEST) depends on X86_64 || (PCI_GOANY || PCI_GOMMCONFIG) config PCI_OLPC @@ -2658,6 +2658,10 @@ config PCI_DOMAINS def_bool y depends on PCI +config MMCONF_FAM10H + def_bool y + depends on X86_64 && PCI_MMCONFIG && ACPI + config PCI_CNB20LE_QUIRK bool "Read CNB20LE Host Bridge Windows" if EXPERT depends on PCI diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 29786c87e864..73ccf80c09a2 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -146,6 +146,6 @@ ifeq ($(CONFIG_X86_64),y) obj-$(CONFIG_GART_IOMMU) += amd_gart_64.o aperture_64.o obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o - obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o + obj-$(CONFIG_MMCONF_FAM10H) += mmconf-fam10h_64.o obj-y += vsmp_64.o endif diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index f0e6456ca7d3..12bc0a1139da 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -716,7 +716,7 @@ static void init_amd_k8(struct cpuinfo_x86 *c) static void init_amd_gh(struct cpuinfo_x86 *c) { -#ifdef CONFIG_X86_64 +#ifdef CONFIG_MMCONF_FAM10H /* do this for boot cpu */ if (c == &boot_cpu_data) check_enable_amd_mmconf_dmi(); -- cgit v1.2.3 From fec777c385b6376048fc4b08f039366545b335cd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Mar 2018 11:38:15 +0100 Subject: x86/dma: Use DMA-direct (CONFIG_DMA_DIRECT_OPS=y) The generic DMA-direct (CONFIG_DMA_DIRECT_OPS=y) implementation is now functionally equivalent to the x86 nommu dma_map implementation, so switch over to using it. That includes switching from using x86_dma_supported in various IOMMU drivers to use dma_direct_supported instead, which provides the same functionality. Tested-by: Tom Lendacky Signed-off-by: Christoph Hellwig Reviewed-by: Thomas Gleixner Cc: David Woodhouse Cc: Joerg Roedel Cc: Jon Mason Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Muli Ben-Yehuda Cc: Peter Zijlstra Cc: iommu@lists.linux-foundation.org Link: http://lkml.kernel.org/r/20180319103826.12853-4-hch@lst.de Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 + arch/x86/include/asm/dma-mapping.h | 8 ----- arch/x86/include/asm/iommu.h | 3 -- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/amd_gart_64.c | 7 ++-- arch/x86/kernel/pci-calgary_64.c | 3 +- arch/x86/kernel/pci-dma.c | 66 +------------------------------------- arch/x86/kernel/pci-swiotlb.c | 5 ++- arch/x86/pci/sta2x11-fixup.c | 2 +- drivers/iommu/amd_iommu.c | 7 ++-- drivers/iommu/intel-iommu.c | 3 +- 11 files changed, 17 insertions(+), 90 deletions(-) (limited to 'arch/x86/Kconfig') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 18233e459bff..7dc347217d3a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -83,6 +83,7 @@ config X86 select CLOCKSOURCE_VALIDATE_LAST_CYCLE select CLOCKSOURCE_WATCHDOG select DCACHE_WORD_ACCESS + select DMA_DIRECT_OPS select EDAC_ATOMIC_SCRUB select EDAC_SUPPORT select GENERIC_CLOCKEVENTS diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 545bf3721bc0..df9816b385eb 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -36,14 +36,6 @@ int arch_dma_supported(struct device *dev, u64 mask); bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp); #define arch_dma_alloc_attrs arch_dma_alloc_attrs -extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_addr, gfp_t flag, - unsigned long attrs); - -extern void dma_generic_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_addr, - unsigned long attrs); - static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp) { if (dev->coherent_dma_mask <= DMA_BIT_MASK(24)) diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index 1e5d5d92eb40..baedab8ac538 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h @@ -2,13 +2,10 @@ #ifndef _ASM_X86_IOMMU_H #define _ASM_X86_IOMMU_H -extern const struct dma_map_ops nommu_dma_ops; extern int force_iommu, no_iommu; extern int iommu_detected; extern int iommu_pass_through; -int x86_dma_supported(struct device *dev, u64 mask); - /* 10 seconds */ #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 29786c87e864..2e8c8a09ecab 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -57,7 +57,7 @@ obj-$(CONFIG_X86_ESPFIX64) += espfix_64.o obj-$(CONFIG_SYSFS) += ksysfs.o obj-y += bootflag.o e820.o obj-y += pci-dma.o quirks.o topology.o kdebugfs.o -obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o +obj-y += alternative.o i8253.o hw_breakpoint.o obj-y += tsc.o tsc_msr.o io_delay.o rtc.o obj-y += pci-iommu_table.o obj-y += resource.o diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index ecd486cb06ab..52e3abcf3e70 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -501,8 +501,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, } __free_pages(page, get_order(size)); } else - return dma_generic_alloc_coherent(dev, size, dma_addr, flag, - attrs); + return dma_direct_alloc(dev, size, dma_addr, flag, attrs); return NULL; } @@ -513,7 +512,7 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_addr, unsigned long attrs) { gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, 0); - dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs); + dma_direct_free(dev, size, vaddr, dma_addr, attrs); } static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr) @@ -705,7 +704,7 @@ static const struct dma_map_ops gart_dma_ops = { .alloc = gart_alloc_coherent, .free = gart_free_coherent, .mapping_error = gart_mapping_error, - .dma_supported = x86_dma_supported, + .dma_supported = dma_direct_supported, }; static void gart_iommu_shutdown(void) diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 35c461f21815..5647853053bd 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -493,7 +494,7 @@ static const struct dma_map_ops calgary_dma_ops = { .map_page = calgary_map_page, .unmap_page = calgary_unmap_page, .mapping_error = calgary_mapping_error, - .dma_supported = x86_dma_supported, + .dma_supported = dma_direct_supported, }; static inline void __iomem * busno_to_bbar(unsigned char num) diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index b59820872ec7..db0b88ea8d1b 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -18,7 +18,7 @@ static int forbid_dac __read_mostly; -const struct dma_map_ops *dma_ops = &nommu_dma_ops; +const struct dma_map_ops *dma_ops = &dma_direct_ops; EXPORT_SYMBOL(dma_ops); static int iommu_sac_force __read_mostly; @@ -76,60 +76,6 @@ void __init pci_iommu_alloc(void) } } } -void *dma_generic_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_addr, gfp_t flag, - unsigned long attrs) -{ - struct page *page; - unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; - dma_addr_t addr; - -again: - page = NULL; - /* CMA can be used only in the context which permits sleeping */ - if (gfpflags_allow_blocking(flag)) { - page = dma_alloc_from_contiguous(dev, count, get_order(size), - flag); - if (page) { - addr = phys_to_dma(dev, page_to_phys(page)); - if (addr + size > dev->coherent_dma_mask) { - dma_release_from_contiguous(dev, page, count); - page = NULL; - } - } - } - /* fallback */ - if (!page) - page = alloc_pages_node(dev_to_node(dev), flag, get_order(size)); - if (!page) - return NULL; - - addr = phys_to_dma(dev, page_to_phys(page)); - if (addr + size > dev->coherent_dma_mask) { - __free_pages(page, get_order(size)); - - if (dev->coherent_dma_mask < DMA_BIT_MASK(32) && - !(flag & GFP_DMA)) { - flag = (flag & ~GFP_DMA32) | GFP_DMA; - goto again; - } - - return NULL; - } - memset(page_address(page), 0, size); - *dma_addr = addr; - return page_address(page); -} - -void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_addr, unsigned long attrs) -{ - unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; - struct page *page = virt_to_page(vaddr); - - if (!dma_release_from_contiguous(dev, page, count)) - free_pages((unsigned long)vaddr, get_order(size)); -} bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp) { @@ -243,16 +189,6 @@ int arch_dma_supported(struct device *dev, u64 mask) } EXPORT_SYMBOL(arch_dma_supported); -int x86_dma_supported(struct device *dev, u64 mask) -{ - /* Copied from i386. Doesn't make much sense, because it will - only work for pci_alloc_coherent. - The caller just has to use GFP_DMA in this case. */ - if (mask < DMA_BIT_MASK(24)) - return 0; - return 1; -} - static int __init pci_iommu_init(void) { struct iommu_table_entry *p; diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 0ee0f8f34251..bcb6a9bf64ad 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -30,8 +30,7 @@ void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, */ flags |= __GFP_NOWARN; - vaddr = dma_generic_alloc_coherent(hwdev, size, dma_handle, flags, - attrs); + vaddr = dma_direct_alloc(hwdev, size, dma_handle, flags, attrs); if (vaddr) return vaddr; @@ -45,7 +44,7 @@ void x86_swiotlb_free_coherent(struct device *dev, size_t size, if (is_swiotlb_buffer(dma_to_phys(dev, dma_addr))) swiotlb_free_coherent(dev, size, vaddr, dma_addr); else - dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs); + dma_direct_free(dev, size, vaddr, dma_addr, attrs); } static const struct dma_map_ops x86_swiotlb_dma_ops = { diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c index 75577c1490c4..6c712fe11bdc 100644 --- a/arch/x86/pci/sta2x11-fixup.c +++ b/arch/x86/pci/sta2x11-fixup.c @@ -193,7 +193,7 @@ static const struct dma_map_ops sta2x11_dma_ops = { .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, .sync_sg_for_device = swiotlb_sync_sg_for_device, .mapping_error = swiotlb_dma_mapping_error, - .dma_supported = x86_dma_supported, + .dma_supported = dma_direct_supported, }; /* At setup time, we use our own ops if the device is a ConneXt one */ diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 74788fdeb773..0bf19423b588 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -2193,7 +2194,7 @@ static int amd_iommu_add_device(struct device *dev) dev_name(dev)); iommu_ignore_device(dev); - dev->dma_ops = &nommu_dma_ops; + dev->dma_ops = &dma_direct_ops; goto out; } init_iommu_group(dev); @@ -2680,7 +2681,7 @@ free_mem: */ static int amd_iommu_dma_supported(struct device *dev, u64 mask) { - if (!x86_dma_supported(dev, mask)) + if (!dma_direct_supported(dev, mask)) return 0; return check_device(dev); } @@ -2794,7 +2795,7 @@ int __init amd_iommu_init_dma_ops(void) * continue to be SWIOTLB. */ if (!swiotlb) - dma_ops = &nommu_dma_ops; + dma_ops = &dma_direct_ops; if (amd_iommu_unmap_flush) pr_info("AMD-Vi: IO/TLB flush on unmap enabled\n"); diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 582fd01cb7d1..fd899b2a12bb 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -3871,7 +3872,7 @@ const struct dma_map_ops intel_dma_ops = { .unmap_page = intel_unmap_page, .mapping_error = intel_mapping_error, #ifdef CONFIG_X86 - .dma_supported = x86_dma_supported, + .dma_supported = dma_direct_supported, #endif }; -- cgit v1.2.3 From b6e05477c10c12e36141558fc14f04b00ea634d4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Mar 2018 11:38:24 +0100 Subject: dma/direct: Handle the memory encryption bit in common code Give the basic phys_to_dma() and dma_to_phys() helpers a __-prefix and add the memory encryption mask to the non-prefixed versions. Use the __-prefixed versions directly instead of clearing the mask again in various places. Tested-by: Tom Lendacky Signed-off-by: Christoph Hellwig Reviewed-by: Thomas Gleixner Cc: David Woodhouse Cc: Joerg Roedel Cc: Jon Mason Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Muli Ben-Yehuda Cc: Peter Zijlstra Cc: iommu@lists.linux-foundation.org Link: http://lkml.kernel.org/r/20180319103826.12853-13-hch@lst.de Signed-off-by: Ingo Molnar --- arch/arm/include/asm/dma-direct.h | 4 ++-- arch/mips/cavium-octeon/dma-octeon.c | 10 ++++----- .../include/asm/mach-cavium-octeon/dma-coherence.h | 4 ++-- .../include/asm/mach-loongson64/dma-coherence.h | 10 ++++----- arch/mips/loongson64/common/dma-swiotlb.c | 4 ++-- arch/powerpc/include/asm/dma-direct.h | 4 ++-- arch/x86/Kconfig | 2 +- arch/x86/include/asm/dma-direct.h | 25 ++-------------------- arch/x86/mm/mem_encrypt.c | 2 +- arch/x86/pci/sta2x11-fixup.c | 6 +++--- include/linux/dma-direct.h | 21 ++++++++++++++++-- lib/swiotlb.c | 25 ++++++++-------------- 12 files changed, 53 insertions(+), 64 deletions(-) (limited to 'arch/x86/Kconfig') diff --git a/arch/arm/include/asm/dma-direct.h b/arch/arm/include/asm/dma-direct.h index 5b0a8a421894..b67e5fc1fe43 100644 --- a/arch/arm/include/asm/dma-direct.h +++ b/arch/arm/include/asm/dma-direct.h @@ -2,13 +2,13 @@ #ifndef ASM_ARM_DMA_DIRECT_H #define ASM_ARM_DMA_DIRECT_H 1 -static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) { unsigned int offset = paddr & ~PAGE_MASK; return pfn_to_dma(dev, __phys_to_pfn(paddr)) + offset; } -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr) +static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dev_addr) { unsigned int offset = dev_addr & ~PAGE_MASK; return __pfn_to_phys(dma_to_pfn(dev, dev_addr)) + offset; diff --git a/arch/mips/cavium-octeon/dma-octeon.c b/arch/mips/cavium-octeon/dma-octeon.c index c7bb8a407041..7b335ab21697 100644 --- a/arch/mips/cavium-octeon/dma-octeon.c +++ b/arch/mips/cavium-octeon/dma-octeon.c @@ -10,7 +10,7 @@ * IP32 changes by Ilya. * Copyright (C) 2010 Cavium Networks, Inc. */ -#include +#include #include #include #include @@ -182,7 +182,7 @@ struct octeon_dma_map_ops { phys_addr_t (*dma_to_phys)(struct device *dev, dma_addr_t daddr); }; -dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) { struct octeon_dma_map_ops *ops = container_of(get_dma_ops(dev), struct octeon_dma_map_ops, @@ -190,9 +190,9 @@ dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) return ops->phys_to_dma(dev, paddr); } -EXPORT_SYMBOL(phys_to_dma); +EXPORT_SYMBOL(__phys_to_dma); -phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) +phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr) { struct octeon_dma_map_ops *ops = container_of(get_dma_ops(dev), struct octeon_dma_map_ops, @@ -200,7 +200,7 @@ phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) return ops->dma_to_phys(dev, daddr); } -EXPORT_SYMBOL(dma_to_phys); +EXPORT_SYMBOL(__dma_to_phys); static struct octeon_dma_map_ops octeon_linear_dma_map_ops = { .dma_map_ops = { diff --git a/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h b/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h index 138edf6b5b48..6eb1ee548b11 100644 --- a/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h +++ b/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h @@ -69,8 +69,8 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) return addr + size - 1 <= *dev->dma_mask; } -dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr); -phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr); +dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr); +phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr); struct dma_map_ops; extern const struct dma_map_ops *octeon_pci_dma_map_ops; diff --git a/arch/mips/include/asm/mach-loongson64/dma-coherence.h b/arch/mips/include/asm/mach-loongson64/dma-coherence.h index b1b575f5c6c1..64fc44dec0a8 100644 --- a/arch/mips/include/asm/mach-loongson64/dma-coherence.h +++ b/arch/mips/include/asm/mach-loongson64/dma-coherence.h @@ -25,13 +25,13 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) return addr + size - 1 <= *dev->dma_mask; } -extern dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr); -extern phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr); +extern dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr); +extern phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr); static inline dma_addr_t plat_map_dma_mem(struct device *dev, void *addr, size_t size) { #ifdef CONFIG_CPU_LOONGSON3 - return phys_to_dma(dev, virt_to_phys(addr)); + return __phys_to_dma(dev, virt_to_phys(addr)); #else return virt_to_phys(addr) | 0x80000000; #endif @@ -41,7 +41,7 @@ static inline dma_addr_t plat_map_dma_mem_page(struct device *dev, struct page *page) { #ifdef CONFIG_CPU_LOONGSON3 - return phys_to_dma(dev, page_to_phys(page)); + return __phys_to_dma(dev, page_to_phys(page)); #else return page_to_phys(page) | 0x80000000; #endif @@ -51,7 +51,7 @@ static inline unsigned long plat_dma_addr_to_phys(struct device *dev, dma_addr_t dma_addr) { #if defined(CONFIG_CPU_LOONGSON3) && defined(CONFIG_64BIT) - return dma_to_phys(dev, dma_addr); + return __dma_to_phys(dev, dma_addr); #elif defined(CONFIG_CPU_LOONGSON2F) && defined(CONFIG_64BIT) return (dma_addr > 0x8fffffff) ? dma_addr : (dma_addr & 0x0fffffff); #else diff --git a/arch/mips/loongson64/common/dma-swiotlb.c b/arch/mips/loongson64/common/dma-swiotlb.c index 7bbcf89475f3..6a739f8ae110 100644 --- a/arch/mips/loongson64/common/dma-swiotlb.c +++ b/arch/mips/loongson64/common/dma-swiotlb.c @@ -63,7 +63,7 @@ static int loongson_dma_supported(struct device *dev, u64 mask) return swiotlb_dma_supported(dev, mask); } -dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) { long nid; #ifdef CONFIG_PHYS48_TO_HT40 @@ -75,7 +75,7 @@ dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) return paddr; } -phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) +phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr) { long nid; #ifdef CONFIG_PHYS48_TO_HT40 diff --git a/arch/powerpc/include/asm/dma-direct.h b/arch/powerpc/include/asm/dma-direct.h index a5b59c765426..7702875aabb7 100644 --- a/arch/powerpc/include/asm/dma-direct.h +++ b/arch/powerpc/include/asm/dma-direct.h @@ -17,12 +17,12 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) return addr + size - 1 <= *dev->dma_mask; } -static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) { return paddr + get_dma_offset(dev); } -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) +static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr) { return daddr - get_dma_offset(dev); } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7dc347217d3a..5b4899de076f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -54,7 +54,6 @@ config X86 select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_KCOV if X86_64 - select ARCH_HAS_PHYS_TO_DMA select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_PMEM_API if X86_64 select ARCH_HAS_REFCOUNT @@ -692,6 +691,7 @@ config X86_SUPPORTS_MEMORY_FAILURE config STA2X11 bool "STA2X11 Companion Chip Support" depends on X86_32_NON_STANDARD && PCI + select ARCH_HAS_PHYS_TO_DMA select X86_DEV_DMA_OPS select X86_DMA_REMAP select SWIOTLB diff --git a/arch/x86/include/asm/dma-direct.h b/arch/x86/include/asm/dma-direct.h index 1295bc622ebe..1a19251eaac9 100644 --- a/arch/x86/include/asm/dma-direct.h +++ b/arch/x86/include/asm/dma-direct.h @@ -2,29 +2,8 @@ #ifndef ASM_X86_DMA_DIRECT_H #define ASM_X86_DMA_DIRECT_H 1 -#include - -#ifdef CONFIG_X86_DMA_REMAP /* Platform code defines bridge-specific code */ bool dma_capable(struct device *dev, dma_addr_t addr, size_t size); -dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr); -phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr); -#else -static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) -{ - if (!dev->dma_mask) - return 0; - - return addr + size - 1 <= *dev->dma_mask; -} - -static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) -{ - return __sme_set(paddr); -} +dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr); +phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr); -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) -{ - return __sme_clr(daddr); -} -#endif /* CONFIG_X86_DMA_REMAP */ #endif /* ASM_X86_DMA_DIRECT_H */ diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index d243e8d80d89..1b396422d26f 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -211,7 +211,7 @@ static void *sev_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, * Since we will be clearing the encryption bit, check the * mask with it already cleared. */ - addr = __sme_clr(phys_to_dma(dev, page_to_phys(page))); + addr = __phys_to_dma(dev, page_to_phys(page)); if ((addr + size) > dev->coherent_dma_mask) { __free_pages(page, get_order(size)); } else { diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c index eac58e03f43c..7a5bafb76d77 100644 --- a/arch/x86/pci/sta2x11-fixup.c +++ b/arch/x86/pci/sta2x11-fixup.c @@ -207,11 +207,11 @@ bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) } /** - * phys_to_dma - Return the DMA AMBA address used for this STA2x11 device + * __phys_to_dma - Return the DMA AMBA address used for this STA2x11 device * @dev: device for a PCI device * @paddr: Physical address */ -dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) { if (!dev->archdata.is_sta2x11) return paddr; @@ -223,7 +223,7 @@ dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) * @dev: device for a PCI device * @daddr: STA2x11 AMBA DMA address */ -phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) +phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr) { if (!dev->archdata.is_sta2x11) return daddr; diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index bcdb1a3e4b1f..53ad6a47f513 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -3,18 +3,19 @@ #define _LINUX_DMA_DIRECT_H 1 #include +#include #ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA #include #else -static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) { dma_addr_t dev_addr = (dma_addr_t)paddr; return dev_addr - ((dma_addr_t)dev->dma_pfn_offset << PAGE_SHIFT); } -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr) +static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dev_addr) { phys_addr_t paddr = (phys_addr_t)dev_addr; @@ -30,6 +31,22 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) } #endif /* !CONFIG_ARCH_HAS_PHYS_TO_DMA */ +/* + * If memory encryption is supported, phys_to_dma will set the memory encryption + * bit in the DMA address, and dma_to_phys will clear it. The raw __phys_to_dma + * and __dma_to_phys versions should only be used on non-encrypted memory for + * special occasions like DMA coherent buffers. + */ +static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +{ + return __sme_set(__phys_to_dma(dev, paddr)); +} + +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) +{ + return __sme_clr(__dma_to_phys(dev, daddr)); +} + #ifdef CONFIG_ARCH_HAS_DMA_MARK_CLEAN void dma_mark_clean(void *addr, size_t size); #else diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 005d1d87bb2e..8b06b4485e65 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -157,13 +157,6 @@ unsigned long swiotlb_size_or_default(void) return size ? size : (IO_TLB_DEFAULT_SIZE); } -/* For swiotlb, clear memory encryption mask from dma addresses */ -static dma_addr_t swiotlb_phys_to_dma(struct device *hwdev, - phys_addr_t address) -{ - return __sme_clr(phys_to_dma(hwdev, address)); -} - /* Note that this doesn't work with highmem page */ static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev, volatile void *address) @@ -622,7 +615,7 @@ map_single(struct device *hwdev, phys_addr_t phys, size_t size, return SWIOTLB_MAP_ERROR; } - start_dma_addr = swiotlb_phys_to_dma(hwdev, io_tlb_start); + start_dma_addr = __phys_to_dma(hwdev, io_tlb_start); return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, dir, attrs); } @@ -726,12 +719,12 @@ swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle, goto out_warn; phys_addr = swiotlb_tbl_map_single(dev, - swiotlb_phys_to_dma(dev, io_tlb_start), + __phys_to_dma(dev, io_tlb_start), 0, size, DMA_FROM_DEVICE, 0); if (phys_addr == SWIOTLB_MAP_ERROR) goto out_warn; - *dma_handle = swiotlb_phys_to_dma(dev, phys_addr); + *dma_handle = __phys_to_dma(dev, phys_addr); if (dma_coherent_ok(dev, *dma_handle, size)) goto out_unmap; @@ -867,10 +860,10 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, map = map_single(dev, phys, size, dir, attrs); if (map == SWIOTLB_MAP_ERROR) { swiotlb_full(dev, size, dir, 1); - return swiotlb_phys_to_dma(dev, io_tlb_overflow_buffer); + return __phys_to_dma(dev, io_tlb_overflow_buffer); } - dev_addr = swiotlb_phys_to_dma(dev, map); + dev_addr = __phys_to_dma(dev, map); /* Ensure that the address returned is DMA'ble */ if (dma_capable(dev, dev_addr, size)) @@ -879,7 +872,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, attrs |= DMA_ATTR_SKIP_CPU_SYNC; swiotlb_tbl_unmap_single(dev, map, size, dir, attrs); - return swiotlb_phys_to_dma(dev, io_tlb_overflow_buffer); + return __phys_to_dma(dev, io_tlb_overflow_buffer); } /* @@ -1009,7 +1002,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, sg_dma_len(sgl) = 0; return 0; } - sg->dma_address = swiotlb_phys_to_dma(hwdev, map); + sg->dma_address = __phys_to_dma(hwdev, map); } else sg->dma_address = dev_addr; sg_dma_len(sg) = sg->length; @@ -1073,7 +1066,7 @@ swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, int swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr) { - return (dma_addr == swiotlb_phys_to_dma(hwdev, io_tlb_overflow_buffer)); + return (dma_addr == __phys_to_dma(hwdev, io_tlb_overflow_buffer)); } /* @@ -1085,7 +1078,7 @@ swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr) int swiotlb_dma_supported(struct device *hwdev, u64 mask) { - return swiotlb_phys_to_dma(hwdev, io_tlb_end - 1) <= mask; + return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask; } #ifdef CONFIG_DMA_DIRECT_OPS -- cgit v1.2.3 From d0266046ad54e0c964941364cd82a0d0478ce286 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 19 Mar 2018 16:41:26 +0100 Subject: x86: Remove FAST_FEATURE_TESTS Since we want to rely on static branches to avoid speculation, remove any possible fallback code for static_cpu_has. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Cc: torvalds@linux-foundation.org Link: https://lkml.kernel.org/r/20180319154717.705383007@infradead.org --- arch/x86/Kconfig | 11 ----------- arch/x86/include/asm/cpufeature.h | 8 -------- 2 files changed, 19 deletions(-) (limited to 'arch/x86/Kconfig') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0fa71a78ec99..cb5b5907dbd6 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -393,17 +393,6 @@ config X86_FEATURE_NAMES If in doubt, say Y. -config X86_FAST_FEATURE_TESTS - bool "Fast CPU feature tests" if EMBEDDED - default y - ---help--- - Some fast-paths in the kernel depend on the capabilities of the CPU. - Say Y here for the kernel to patch in the appropriate code at runtime - based on the capabilities of the CPU. The infrastructure for patching - code at runtime takes up some additional space; space-constrained - embedded systems may wish to say N here to produce smaller, slightly - slower code. - config X86_X2APIC bool "Support x2apic" depends on X86_LOCAL_APIC && X86_64 && (IRQ_REMAP || HYPERVISOR_GUEST) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 736771c9822e..b27da9602a6d 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -140,7 +140,6 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); #define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit) -#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS) /* * Static testing of CPU features. Used the same as boot_cpu_has(). * These will statically patch the target code for additional @@ -196,13 +195,6 @@ t_no: boot_cpu_has(bit) : \ _static_cpu_has(bit) \ ) -#else -/* - * Fall back to dynamic for gcc versions which don't support asm goto. Should be - * a minority now anyway. - */ -#define static_cpu_has(bit) boot_cpu_has(bit) -#endif #define cpu_has_bug(c, bit) cpu_has(c, (bit)) #define set_cpu_bug(c, bit) set_cpu_cap(c, (bit)) -- cgit v1.2.3 From fc5d1073cae299de4517755a910df4f12a6a438f Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 26 Mar 2018 23:27:21 -0700 Subject: x86/mm/32: Remove unused node_memmap_size_bytes() & CONFIG_NEED_NODE_MEMMAP_SIZE logic node_memmap_size_bytes() has been unused since the v3.9 kernel, so remove it. Signed-off-by: David Rientjes Cc: Dave Hansen Cc: Laura Abbott Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Fixes: f03574f2d5b2 ("x86-32, mm: Rip out x86_32 NUMA remapping code") Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1803262325540.256524@chino.kir.corp.google.com Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 4 ---- arch/x86/mm/numa_32.c | 11 ----------- include/linux/mmzone.h | 5 ----- mm/sparse.c | 22 ---------------------- 4 files changed, 42 deletions(-) (limited to 'arch/x86/Kconfig') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 18233e459bff..739aff253d17 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1608,10 +1608,6 @@ config ARCH_HAVE_MEMORY_PRESENT def_bool y depends on X86_32 && DISCONTIGMEM -config NEED_NODE_MEMMAP_SIZE - def_bool y - depends on X86_32 && (DISCONTIGMEM || SPARSEMEM) - config ARCH_FLATMEM_ENABLE def_bool y depends on X86_32 && !NUMA diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index aca6295350f3..e8a4a09e20f1 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c @@ -60,17 +60,6 @@ void memory_present(int nid, unsigned long start, unsigned long end) } printk(KERN_CONT "\n"); } - -unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, - unsigned long end_pfn) -{ - unsigned long nr_pages = end_pfn - start_pfn; - - if (!nr_pages) - return 0; - - return (nr_pages + 1) * sizeof(struct page); -} #endif extern unsigned long highend_pfn, highstart_pfn; diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 7522a6987595..a2db4576e499 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -816,10 +816,6 @@ int local_memory_node(int node_id); static inline int local_memory_node(int node_id) { return node_id; }; #endif -#ifdef CONFIG_NEED_NODE_MEMMAP_SIZE -unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long); -#endif - /* * zone_idx() returns 0 for the ZONE_DMA zone, 1 for the ZONE_NORMAL zone, etc. */ @@ -1289,7 +1285,6 @@ struct mminit_pfnnid_cache { #endif void memory_present(int nid, unsigned long start, unsigned long end); -unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long); /* * If it is possible to have holes within a MAX_ORDER_NR_PAGES, then we diff --git a/mm/sparse.c b/mm/sparse.c index 7af5e7a92528..79b26f98d793 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -235,28 +235,6 @@ void __init memory_present(int nid, unsigned long start, unsigned long end) } } -/* - * Only used by the i386 NUMA architecures, but relatively - * generic code. - */ -unsigned long __init node_memmap_size_bytes(int nid, unsigned long start_pfn, - unsigned long end_pfn) -{ - unsigned long pfn; - unsigned long nr_pages = 0; - - mminit_validate_memmodel_limits(&start_pfn, &end_pfn); - for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { - if (nid != early_pfn_to_nid(pfn)) - continue; - - if (pfn_present(pfn)) - nr_pages += PAGES_PER_SECTION; - } - - return nr_pages * sizeof(struct page); -} - /* * Subtle, we encode the real pfn into the mem_map such that * the identity pfn - section_mem_map will return the actual -- cgit v1.2.3 From fa697140f9a20119a9ec8fd7460cc4314fbdaff3 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Thu, 5 Apr 2018 11:53:02 +0200 Subject: syscalls/x86: Use 'struct pt_regs' based syscall calling convention for 64-bit syscalls Let's make use of ARCH_HAS_SYSCALL_WRAPPER=y on pure 64-bit x86-64 systems: Each syscall defines a stub which takes struct pt_regs as its only argument. It decodes just those parameters it needs, e.g: asmlinkage long sys_xyzzy(const struct pt_regs *regs) { return SyS_xyzzy(regs->di, regs->si, regs->dx); } This approach avoids leaking random user-provided register content down the call chain. For example, for sys_recv() which is a 4-parameter syscall, the assembly now is (in slightly reordered fashion): : callq <__fentry__> /* decode regs->di, ->si, ->dx and ->r10 */ mov 0x70(%rdi),%rdi mov 0x68(%rdi),%rsi mov 0x60(%rdi),%rdx mov 0x38(%rdi),%rcx [ SyS_recv() is automatically inlined by the compiler, as it is not [yet] used anywhere else ] /* clear %r9 and %r8, the 5th and 6th args */ xor %r9d,%r9d xor %r8d,%r8d /* do the actual work */ callq __sys_recvfrom /* cleanup and return */ cltq retq The only valid place in an x86-64 kernel which rightfully calls a syscall function on its own -- vsyscall -- needs to be modified to pass struct pt_regs onwards as well. To keep the syscall table generation working independent of SYSCALL_PTREGS being enabled, the stubs are named the same as the "original" syscall stubs, i.e. sys_*(). This patch is based on an original proof-of-concept | From: Linus Torvalds | Signed-off-by: Linus Torvalds and was split up and heavily modified by me, in particular to base it on ARCH_HAS_SYSCALL_WRAPPER, to limit it to 64-bit-only for the time being, and to update the vsyscall to the new calling convention. Signed-off-by: Dominik Brodowski Acked-by: Linus Torvalds Cc: Al Viro Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180405095307.3730-4-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 5 +++ arch/x86/entry/common.c | 4 ++ arch/x86/entry/syscall_64.c | 9 ++++- arch/x86/entry/vsyscall/vsyscall_64.c | 22 +++++++++++ arch/x86/include/asm/syscall.h | 4 ++ arch/x86/include/asm/syscall_wrapper.h | 70 ++++++++++++++++++++++++++++++++++ arch/x86/include/asm/syscalls.h | 7 ++++ include/linux/syscalls.h | 2 +- 8 files changed, 120 insertions(+), 3 deletions(-) create mode 100644 arch/x86/include/asm/syscall_wrapper.h (limited to 'arch/x86/Kconfig') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 27fede438959..67348efc2540 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2954,3 +2954,8 @@ source "crypto/Kconfig" source "arch/x86/kvm/Kconfig" source "lib/Kconfig" + +config SYSCALL_PTREGS + def_bool y + depends on X86_64 && !COMPAT + select ARCH_HAS_SYSCALL_WRAPPER diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index a8b066dbbf48..e1b91bffa988 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -284,9 +284,13 @@ __visible void do_syscall_64(unsigned long nr, struct pt_regs *regs) nr &= __SYSCALL_MASK; if (likely(nr < NR_syscalls)) { nr = array_index_nospec(nr, NR_syscalls); +#ifdef CONFIG_SYSCALL_PTREGS + regs->ax = sys_call_table[nr](regs); +#else regs->ax = sys_call_table[nr]( regs->di, regs->si, regs->dx, regs->r10, regs->r8, regs->r9); +#endif } syscall_return_slowpath(regs); diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c index c176d2fab1da..6197850adf91 100644 --- a/arch/x86/entry/syscall_64.c +++ b/arch/x86/entry/syscall_64.c @@ -7,14 +7,19 @@ #include #include +#ifdef CONFIG_SYSCALL_PTREGS +/* this is a lie, but it does not hurt as sys_ni_syscall just returns -EINVAL */ +extern asmlinkage long sys_ni_syscall(const struct pt_regs *); +#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(const struct pt_regs *); +#else /* CONFIG_SYSCALL_PTREGS */ +extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); #define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); +#endif /* CONFIG_SYSCALL_PTREGS */ #include #undef __SYSCALL_64 #define __SYSCALL_64(nr, sym, qual) [nr] = sym, -extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); - asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { /* * Smells like a compiler bug -- it doesn't work diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index 317be365bce3..05eebbf9b989 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -127,6 +127,9 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) int vsyscall_nr, syscall_nr, tmp; int prev_sig_on_uaccess_err; long ret; +#ifdef CONFIG_SYSCALL_PTREGS + unsigned long orig_dx; +#endif /* * No point in checking CS -- the only way to get here is a user mode @@ -227,19 +230,38 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) ret = -EFAULT; switch (vsyscall_nr) { case 0: +#ifdef CONFIG_SYSCALL_PTREGS + /* this decodes regs->di and regs->si on its own */ + ret = sys_gettimeofday(regs); +#else ret = sys_gettimeofday( (struct timeval __user *)regs->di, (struct timezone __user *)regs->si); +#endif /* CONFIG_SYSCALL_PTREGS */ break; case 1: +#ifdef CONFIG_SYSCALL_PTREGS + /* this decodes regs->di on its own */ + ret = sys_time(regs); +#else ret = sys_time((time_t __user *)regs->di); +#endif /* CONFIG_SYSCALL_PTREGS */ break; case 2: +#ifdef CONFIG_SYSCALL_PTREGS + /* while we could clobber regs->dx, we didn't in the past... */ + orig_dx = regs->dx; + regs->dx = 0; + /* this decodes regs->di, regs->si and regs->dx on its own */ + ret = sys_getcpu(regs); + regs->dx = orig_dx; +#else ret = sys_getcpu((unsigned __user *)regs->di, (unsigned __user *)regs->si, NULL); +#endif /* CONFIG_SYSCALL_PTREGS */ break; } diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index 03eedc21246d..17c62373a6f9 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -20,9 +20,13 @@ #include /* for TS_COMPAT */ #include +#ifdef CONFIG_SYSCALL_PTREGS +typedef asmlinkage long (*sys_call_ptr_t)(const struct pt_regs *); +#else typedef asmlinkage long (*sys_call_ptr_t)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); +#endif /* CONFIG_SYSCALL_PTREGS */ extern const sys_call_ptr_t sys_call_table[]; #if defined(CONFIG_X86_32) diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h new file mode 100644 index 000000000000..702bdee377af --- /dev/null +++ b/arch/x86/include/asm/syscall_wrapper.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * syscall_wrapper.h - x86 specific wrappers to syscall definitions + */ + +#ifndef _ASM_X86_SYSCALL_WRAPPER_H +#define _ASM_X86_SYSCALL_WRAPPER_H + +/* + * Instead of the generic __SYSCALL_DEFINEx() definition, this macro takes + * struct pt_regs *regs as the only argument of the syscall stub named + * sys_*(). It decodes just the registers it needs and passes them on to + * the SyS_*() wrapper and then to the SYSC_*() function doing the actual job. + * These wrappers and functions are inlined, meaning that the assembly looks + * as follows (slightly re-ordered): + * + * : <-- syscall with 4 parameters + * callq <__fentry__> + * + * mov 0x70(%rdi),%rdi <-- decode regs->di + * mov 0x68(%rdi),%rsi <-- decode regs->si + * mov 0x60(%rdi),%rdx <-- decode regs->dx + * mov 0x38(%rdi),%rcx <-- decode regs->r10 + * + * xor %r9d,%r9d <-- clear %r9 + * xor %r8d,%r8d <-- clear %r8 + * + * callq __sys_recvfrom <-- do the actual work in __sys_recvfrom() + * which takes 6 arguments + * + * cltq <-- extend return value to 64-bit + * retq <-- return + * + * This approach avoids leaking random user-provided register content down + * the call chain. + * + * As the generic SYSCALL_DEFINE0() macro does not decode any parameters for + * obvious reasons, and passing struct pt_regs *regs to it in %rdi does not + * hurt, there is no need to override it. + */ +#define __SYSCALL_DEFINEx(x, name, ...) \ + asmlinkage long sys##name(const struct pt_regs *regs); \ + ALLOW_ERROR_INJECTION(sys##name, ERRNO); \ + static long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ + static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ + asmlinkage long sys##name(const struct pt_regs *regs) \ + { \ + return SyS##name(__MAP(x,__SC_ARGS \ + ,,regs->di,,regs->si,,regs->dx \ + ,,regs->r10,,regs->r8,,regs->r9)); \ + } \ + static long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ + { \ + long ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \ + __MAP(x,__SC_TEST,__VA_ARGS__); \ + __PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__)); \ + return ret; \ + } \ + static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)) + +/* + * For VSYSCALLS, we need to declare these three syscalls with the new + * pt_regs-based calling convention for in-kernel use. + */ +struct pt_regs; +asmlinkage long sys_getcpu(const struct pt_regs *regs); /* di,si,dx */ +asmlinkage long sys_gettimeofday(const struct pt_regs *regs); /* di,si */ +asmlinkage long sys_time(const struct pt_regs *regs); /* di */ + +#endif /* _ASM_X86_SYSCALL_WRAPPER_H */ diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index ae6e05fdc24b..e4ad93c05f02 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -18,6 +18,12 @@ /* Common in X86_32 and X86_64 */ /* kernel/ioport.c */ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on); + +#ifndef CONFIG_SYSCALL_PTREGS +/* + * If CONFIG_SYSCALL_PTREGS is enabled, a different syscall calling convention + * is used. Do not include these -- invalid -- prototypes then + */ asmlinkage long sys_ioperm(unsigned long, unsigned long, int); asmlinkage long sys_iopl(unsigned int); @@ -53,4 +59,5 @@ asmlinkage long sys_mmap(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); #endif /* CONFIG_X86_32 */ +#endif /* CONFIG_SYSCALL_PTREGS */ #endif /* _ASM_X86_SYSCALLS_H */ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 503ab245d4ce..d7168b3a4b4c 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -102,7 +102,7 @@ union bpf_attr; * for SYSCALL_DEFINE/COMPAT_SYSCALL_DEFINE */ #define __MAP0(m,...) -#define __MAP1(m,t,a) m(t,a) +#define __MAP1(m,t,a,...) m(t,a) #define __MAP2(m,t,a,...) m(t,a), __MAP1(m,__VA_ARGS__) #define __MAP3(m,t,a,...) m(t,a), __MAP2(m,__VA_ARGS__) #define __MAP4(m,t,a,...) m(t,a), __MAP3(m,__VA_ARGS__) -- cgit v1.2.3 From ebeb8c82ffaf94435806ff0b686fffd41dd410b5 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Thu, 5 Apr 2018 11:53:04 +0200 Subject: syscalls/x86: Use 'struct pt_regs' based syscall calling for IA32_EMULATION and x32 Extend ARCH_HAS_SYSCALL_WRAPPER for i386 emulation and for x32 on 64-bit x86. For x32, all we need to do is to create an additional stub for each compat syscall which decodes the parameters in x86-64 ordering, e.g.: asmlinkage long __compat_sys_x32_xyzzy(struct pt_regs *regs) { return c_SyS_xyzzy(regs->di, regs->si, regs->dx); } For i386 emulation, we need to teach compat_sys_*() to take struct pt_regs as its only argument, e.g.: asmlinkage long __compat_sys_ia32_xyzzy(struct pt_regs *regs) { return c_SyS_xyzzy(regs->bx, regs->cx, regs->dx); } In addition, we need to create additional stubs for common syscalls (that is, for syscalls which have the same parameters on 32-bit and 64-bit), e.g.: asmlinkage long __sys_ia32_xyzzy(struct pt_regs *regs) { return c_sys_xyzzy(regs->bx, regs->cx, regs->dx); } This approach avoids leaking random user-provided register content down the call chain. This patch is based on an original proof-of-concept | From: Linus Torvalds | Signed-off-by: Linus Torvalds and was split up and heavily modified by me, in particular to base it on ARCH_HAS_SYSCALL_WRAPPER. Signed-off-by: Dominik Brodowski Acked-by: Linus Torvalds Cc: Al Viro Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180405095307.3730-6-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- arch/x86/entry/common.c | 4 + arch/x86/entry/syscall_32.c | 15 +- arch/x86/entry/syscalls/syscall_32.tbl | 677 +++++++++++++++++---------------- arch/x86/entry/syscalls/syscall_64.tbl | 74 ++-- arch/x86/include/asm/syscall_wrapper.h | 117 +++++- 6 files changed, 509 insertions(+), 380 deletions(-) (limited to 'arch/x86/Kconfig') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 67348efc2540..7bbd6a174722 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2957,5 +2957,5 @@ source "lib/Kconfig" config SYSCALL_PTREGS def_bool y - depends on X86_64 && !COMPAT + depends on X86_64 select ARCH_HAS_SYSCALL_WRAPPER diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index e1b91bffa988..425f798b39e3 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -325,6 +325,9 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) if (likely(nr < IA32_NR_syscalls)) { nr = array_index_nospec(nr, IA32_NR_syscalls); +#ifdef CONFIG_SYSCALL_PTREGS + regs->ax = ia32_sys_call_table[nr](regs); +#else /* * It's possible that a 32-bit syscall implementation * takes a 64-bit parameter but nonetheless assumes that @@ -335,6 +338,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) (unsigned int)regs->bx, (unsigned int)regs->cx, (unsigned int)regs->dx, (unsigned int)regs->si, (unsigned int)regs->di, (unsigned int)regs->bp); +#endif /* CONFIG_SYSCALL_PTREGS */ } syscall_return_slowpath(regs); diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c index 95c294963612..47060dd8efb1 100644 --- a/arch/x86/entry/syscall_32.c +++ b/arch/x86/entry/syscall_32.c @@ -7,14 +7,23 @@ #include #include -#define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ; +#ifdef CONFIG_SYSCALL_PTREGS +/* On X86_64, we use struct pt_regs * to pass parameters to syscalls */ +#define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(const struct pt_regs *); + +/* this is a lie, but it does not hurt as sys_ni_syscall just returns -EINVAL */ +extern asmlinkage long sys_ni_syscall(const struct pt_regs *); + +#else /* CONFIG_SYSCALL_PTREGS */ +#define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); +extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); +#endif /* CONFIG_SYSCALL_PTREGS */ + #include #undef __SYSCALL_I386 #define __SYSCALL_I386(nr, sym, qual) [nr] = sym, -extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); - __visible const sys_call_ptr_t ia32_sys_call_table[__NR_syscall_compat_max+1] = { /* * Smells like a compiler bug -- it doesn't work diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index c58f75b088c5..7f09a3da0b3d 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -4,390 +4,395 @@ # The format is: # # +# The __sys_ia32 and __compat_sys_ia32 stubs are created on-the-fly for +# sys_*() system calls and compat_sys_*() compat system calls if +# IA32_EMULATION is defined, and expect struct pt_regs *regs as their only +# parameter. +# # The abi is always "i386" for this file. # 0 i386 restart_syscall sys_restart_syscall -1 i386 exit sys_exit +1 i386 exit sys_exit __sys_ia32_exit 2 i386 fork sys_fork -3 i386 read sys_read -4 i386 write sys_write -5 i386 open sys_open compat_sys_open -6 i386 close sys_close -7 i386 waitpid sys_waitpid -8 i386 creat sys_creat -9 i386 link sys_link -10 i386 unlink sys_unlink -11 i386 execve sys_execve compat_sys_execve -12 i386 chdir sys_chdir -13 i386 time sys_time compat_sys_time -14 i386 mknod sys_mknod -15 i386 chmod sys_chmod -16 i386 lchown sys_lchown16 +3 i386 read sys_read __sys_ia32_read +4 i386 write sys_write __sys_ia32_write +5 i386 open sys_open __compat_sys_ia32_open +6 i386 close sys_close __sys_ia32_close +7 i386 waitpid sys_waitpid __sys_ia32_waitpid +8 i386 creat sys_creat __sys_ia32_creat +9 i386 link sys_link __sys_ia32_link +10 i386 unlink sys_unlink __sys_ia32_unlink +11 i386 execve sys_execve __compat_sys_ia32_execve +12 i386 chdir sys_chdir __sys_ia32_chdir +13 i386 time sys_time __compat_sys_ia32_time +14 i386 mknod sys_mknod __sys_ia32_mknod +15 i386 chmod sys_chmod __sys_ia32_chmod +16 i386 lchown sys_lchown16 __sys_ia32_lchown16 17 i386 break -18 i386 oldstat sys_stat -19 i386 lseek sys_lseek compat_sys_lseek +18 i386 oldstat sys_stat __sys_ia32_stat +19 i386 lseek sys_lseek __compat_sys_ia32_lseek 20 i386 getpid sys_getpid -21 i386 mount sys_mount compat_sys_mount -22 i386 umount sys_oldumount -23 i386 setuid sys_setuid16 +21 i386 mount sys_mount __compat_sys_ia32_mount +22 i386 umount sys_oldumount __sys_ia32_oldumount +23 i386 setuid sys_setuid16 __sys_ia32_setuid16 24 i386 getuid sys_getuid16 -25 i386 stime sys_stime compat_sys_stime -26 i386 ptrace sys_ptrace compat_sys_ptrace -27 i386 alarm sys_alarm -28 i386 oldfstat sys_fstat +25 i386 stime sys_stime __compat_sys_ia32_stime +26 i386 ptrace sys_ptrace __compat_sys_ia32_ptrace +27 i386 alarm sys_alarm __sys_ia32_alarm +28 i386 oldfstat sys_fstat __sys_ia32_fstat 29 i386 pause sys_pause -30 i386 utime sys_utime compat_sys_utime +30 i386 utime sys_utime __compat_sys_ia32_utime 31 i386 stty 32 i386 gtty -33 i386 access sys_access -34 i386 nice sys_nice +33 i386 access sys_access __sys_ia32_access +34 i386 nice sys_nice __sys_ia32_nice 35 i386 ftime 36 i386 sync sys_sync -37 i386 kill sys_kill -38 i386 rename sys_rename -39 i386 mkdir sys_mkdir -40 i386 rmdir sys_rmdir -41 i386 dup sys_dup -42 i386 pipe sys_pipe -43 i386 times sys_times compat_sys_times +37 i386 kill sys_kill __sys_ia32_kill +38 i386 rename sys_rename __sys_ia32_rename +39 i386 mkdir sys_mkdir __sys_ia32_mkdir +40 i386 rmdir sys_rmdir __sys_ia32_rmdir +41 i386 dup sys_dup __sys_ia32_dup +42 i386 pipe sys_pipe __sys_ia32_pipe +43 i386 times sys_times __compat_sys_ia32_times 44 i386 prof -45 i386 brk sys_brk -46 i386 setgid sys_setgid16 +45 i386 brk sys_brk __sys_ia32_brk +46 i386 setgid sys_setgid16 __sys_ia32_setgid16 47 i386 getgid sys_getgid16 -48 i386 signal sys_signal +48 i386 signal sys_signal __sys_ia32_signal 49 i386 geteuid sys_geteuid16 50 i386 getegid sys_getegid16 -51 i386 acct sys_acct -52 i386 umount2 sys_umount +51 i386 acct sys_acct __sys_ia32_acct +52 i386 umount2 sys_umount __sys_ia32_umount 53 i386 lock -54 i386 ioctl sys_ioctl compat_sys_ioctl -55 i386 fcntl sys_fcntl compat_sys_fcntl64 +54 i386 ioctl sys_ioctl __compat_sys_ia32_ioctl +55 i386 fcntl sys_fcntl __compat_sys_ia32_fcntl64 56 i386 mpx -57 i386 setpgid sys_setpgid +57 i386 setpgid sys_setpgid __sys_ia32_setpgid 58 i386 ulimit -59 i386 oldolduname sys_olduname -60 i386 umask sys_umask -61 i386 chroot sys_chroot -62 i386 ustat sys_ustat compat_sys_ustat -63 i386 dup2 sys_dup2 +59 i386 oldolduname sys_olduname __sys_ia32_olduname +60 i386 umask sys_umask __sys_ia32_umask +61 i386 chroot sys_chroot __sys_ia32_chroot +62 i386 ustat sys_ustat __compat_sys_ia32_ustat +63 i386 dup2 sys_dup2 __sys_ia32_dup2 64 i386 getppid sys_getppid 65 i386 getpgrp sys_getpgrp 66 i386 setsid sys_setsid -67 i386 sigaction sys_sigaction compat_sys_sigaction +67 i386 sigaction sys_sigaction __compat_sys_ia32_sigaction 68 i386 sgetmask sys_sgetmask -69 i386 ssetmask sys_ssetmask -70 i386 setreuid sys_setreuid16 -71 i386 setregid sys_setregid16 -72 i386 sigsuspend sys_sigsuspend -73 i386 sigpending sys_sigpending compat_sys_sigpending -74 i386 sethostname sys_sethostname -75 i386 setrlimit sys_setrlimit compat_sys_setrlimit -76 i386 getrlimit sys_old_getrlimit compat_sys_old_getrlimit -77 i386 getrusage sys_getrusage compat_sys_getrusage -78 i386 gettimeofday sys_gettimeofday compat_sys_gettimeofday -79 i386 settimeofday sys_settimeofday compat_sys_settimeofday -80 i386 getgroups sys_getgroups16 -81 i386 setgroups sys_setgroups16 -82 i386 select sys_old_select compat_sys_old_select -83 i386 symlink sys_symlink -84 i386 oldlstat sys_lstat -85 i386 readlink sys_readlink -86 i386 uselib sys_uselib -87 i386 swapon sys_swapon -88 i386 reboot sys_reboot -89 i386 readdir sys_old_readdir compat_sys_old_readdir -90 i386 mmap sys_old_mmap compat_sys_x86_mmap -91 i386 munmap sys_munmap -92 i386 truncate sys_truncate compat_sys_truncate -93 i386 ftruncate sys_ftruncate compat_sys_ftruncate -94 i386 fchmod sys_fchmod -95 i386 fchown sys_fchown16 -96 i386 getpriority sys_getpriority -97 i386 setpriority sys_setpriority +69 i386 ssetmask sys_ssetmask __sys_ia32_ssetmask +70 i386 setreuid sys_setreuid16 __sys_ia32_setreuid16 +71 i386 setregid sys_setregid16 __sys_ia32_setregid16 +72 i386 sigsuspend sys_sigsuspend __sys_ia32_sigsuspend +73 i386 sigpending sys_sigpending __compat_sys_ia32_sigpending +74 i386 sethostname sys_sethostname __sys_ia32_sethostname +75 i386 setrlimit sys_setrlimit __compat_sys_ia32_setrlimit +76 i386 getrlimit sys_old_getrlimit __compat_sys_ia32_old_getrlimit +77 i386 getrusage sys_getrusage __compat_sys_ia32_getrusage +78 i386 gettimeofday sys_gettimeofday __compat_sys_ia32_gettimeofday +79 i386 settimeofday sys_settimeofday __compat_sys_ia32_settimeofday +80 i386 getgroups sys_getgroups16 __sys_ia32_getgroups16 +81 i386 setgroups sys_setgroups16 __sys_ia32_setgroups16 +82 i386 select sys_old_select __compat_sys_ia32_old_select +83 i386 symlink sys_symlink __sys_ia32_symlink +84 i386 oldlstat sys_lstat __sys_ia32_lstat +85 i386 readlink sys_readlink __sys_ia32_readlink +86 i386 uselib sys_uselib __sys_ia32_uselib +87 i386 swapon sys_swapon __sys_ia32_swapon +88 i386 reboot sys_reboot __sys_ia32_reboot +89 i386 readdir sys_old_readdir __compat_sys_ia32_old_readdir +90 i386 mmap sys_old_mmap __compat_sys_ia32_x86_mmap +91 i386 munmap sys_munmap __sys_ia32_munmap +92 i386 truncate sys_truncate __compat_sys_ia32_truncate +93 i386 ftruncate sys_ftruncate __compat_sys_ia32_ftruncate +94 i386 fchmod sys_fchmod __sys_ia32_fchmod +95 i386 fchown sys_fchown16 __sys_ia32_fchown16 +96 i386 getpriority sys_getpriority __sys_ia32_getpriority +97 i386 setpriority sys_setpriority __sys_ia32_setpriority 98 i386 profil -99 i386 statfs sys_statfs compat_sys_statfs -100 i386 fstatfs sys_fstatfs compat_sys_fstatfs -101 i386 ioperm sys_ioperm -102 i386 socketcall sys_socketcall compat_sys_socketcall -103 i386 syslog sys_syslog -104 i386 setitimer sys_setitimer compat_sys_setitimer -105 i386 getitimer sys_getitimer compat_sys_getitimer -106 i386 stat sys_newstat compat_sys_newstat -107 i386 lstat sys_newlstat compat_sys_newlstat -108 i386 fstat sys_newfstat compat_sys_newfstat -109 i386 olduname sys_uname -110 i386 iopl sys_iopl +99 i386 statfs sys_statfs __compat_sys_ia32_statfs +100 i386 fstatfs sys_fstatfs __compat_sys_ia32_fstatfs +101 i386 ioperm sys_ioperm __sys_ia32_ioperm +102 i386 socketcall sys_socketcall __compat_sys_ia32_socketcall +103 i386 syslog sys_syslog __sys_ia32_syslog +104 i386 setitimer sys_setitimer __compat_sys_ia32_setitimer +105 i386 getitimer sys_getitimer __compat_sys_ia32_getitimer +106 i386 stat sys_newstat __compat_sys_ia32_newstat +107 i386 lstat sys_newlstat __compat_sys_ia32_newlstat +108 i386 fstat sys_newfstat __compat_sys_ia32_newfstat +109 i386 olduname sys_uname __sys_ia32_uname +110 i386 iopl sys_iopl __sys_ia32_iopl 111 i386 vhangup sys_vhangup 112 i386 idle 113 i386 vm86old sys_vm86old sys_ni_syscall -114 i386 wait4 sys_wait4 compat_sys_wait4 -115 i386 swapoff sys_swapoff -116 i386 sysinfo sys_sysinfo compat_sys_sysinfo -117 i386 ipc sys_ipc compat_sys_ipc -118 i386 fsync sys_fsync +114 i386 wait4 sys_wait4 __compat_sys_ia32_wait4 +115 i386 swapoff sys_swapoff __sys_ia32_swapoff +116 i386 sysinfo sys_sysinfo __compat_sys_ia32_sysinfo +117 i386 ipc sys_ipc __compat_sys_ia32_ipc +118 i386 fsync sys_fsync __sys_ia32_fsync 119 i386 sigreturn sys_sigreturn sys32_sigreturn -120 i386 clone sys_clone compat_sys_x86_clone -121 i386 setdomainname sys_setdomainname -122 i386 uname sys_newuname -123 i386 modify_ldt sys_modify_ldt -124 i386 adjtimex sys_adjtimex compat_sys_adjtimex -125 i386 mprotect sys_mprotect -126 i386 sigprocmask sys_sigprocmask compat_sys_sigprocmask +120 i386 clone sys_clone __compat_sys_ia32_x86_clone +121 i386 setdomainname sys_setdomainname __sys_ia32_setdomainname +122 i386 uname sys_newuname __sys_ia32_newuname +123 i386 modify_ldt sys_modify_ldt __sys_ia32_modify_ldt +124 i386 adjtimex sys_adjtimex __compat_sys_ia32_adjtimex +125 i386 mprotect sys_mprotect __sys_ia32_mprotect +126 i386 sigprocmask sys_sigprocmask __compat_sys_ia32_sigprocmask 127 i386 create_module -128 i386 init_module sys_init_module -129 i386 delete_module sys_delete_module +128 i386 init_module sys_init_module __sys_ia32_init_module +129 i386 delete_module sys_delete_module __sys_ia32_delete_module 130 i386 get_kernel_syms -131 i386 quotactl sys_quotactl compat_sys_quotactl32 -132 i386 getpgid sys_getpgid -133 i386 fchdir sys_fchdir -134 i386 bdflush sys_bdflush -135 i386 sysfs sys_sysfs -136 i386 personality sys_personality +131 i386 quotactl sys_quotactl __compat_sys_ia32_quotactl32 +132 i386 getpgid sys_getpgid __sys_ia32_getpgid +133 i386 fchdir sys_fchdir __sys_ia32_fchdir +134 i386 bdflush sys_bdflush __sys_ia32_bdflush +135 i386 sysfs sys_sysfs __sys_ia32_sysfs +136 i386 personality sys_personality __sys_ia32_personality 137 i386 afs_syscall -138 i386 setfsuid sys_setfsuid16 -139 i386 setfsgid sys_setfsgid16 -140 i386 _llseek sys_llseek -141 i386 getdents sys_getdents compat_sys_getdents -142 i386 _newselect sys_select compat_sys_select -143 i386 flock sys_flock -144 i386 msync sys_msync -145 i386 readv sys_readv compat_sys_readv -146 i386 writev sys_writev compat_sys_writev -147 i386 getsid sys_getsid -148 i386 fdatasync sys_fdatasync -149 i386 _sysctl sys_sysctl compat_sys_sysctl -150 i386 mlock sys_mlock -151 i386 munlock sys_munlock -152 i386 mlockall sys_mlockall +138 i386 setfsuid sys_setfsuid16 __sys_ia32_setfsuid16 +139 i386 setfsgid sys_setfsgid16 __sys_ia32_setfsgid16 +140 i386 _llseek sys_llseek __sys_ia32_llseek +141 i386 getdents sys_getdents __compat_sys_ia32_getdents +142 i386 _newselect sys_select __compat_sys_ia32_select +143 i386 flock sys_flock __sys_ia32_flock +144 i386 msync sys_msync __sys_ia32_msync +145 i386 readv sys_readv __compat_sys_ia32_readv +146 i386 writev sys_writev __compat_sys_ia32_writev +147 i386 getsid sys_getsid __sys_ia32_getsid +148 i386 fdatasync sys_fdatasync __sys_ia32_fdatasync +149 i386 _sysctl sys_sysctl __compat_sys_ia32_sysctl +150 i386 mlock sys_mlock __sys_ia32_mlock +151 i386 munlock sys_munlock __sys_ia32_munlock +152 i386 mlockall sys_mlockall __sys_ia32_mlockall 153 i386 munlockall sys_munlockall -154 i386 sched_setparam sys_sched_setparam -155 i386 sched_getparam sys_sched_getparam -156 i386 sched_setscheduler sys_sched_setscheduler -157 i386 sched_getscheduler sys_sched_getscheduler +154 i386 sched_setparam sys_sched_setparam __sys_ia32_sched_setparam +155 i386 sched_getparam sys_sched_getparam __sys_ia32_sched_getparam +156 i386 sched_setscheduler sys_sched_setscheduler __sys_ia32_sched_setscheduler +157 i386 sched_getscheduler sys_sched_getscheduler __sys_ia32_sched_getscheduler 158 i386 sched_yield sys_sched_yield -159 i386 sched_get_priority_max sys_sched_get_priority_max -160 i386 sched_get_priority_min sys_sched_get_priority_min -161 i386 sched_rr_get_interval sys_sched_rr_get_interval compat_sys_sched_rr_get_interval -162 i386 nanosleep sys_nanosleep compat_sys_nanosleep -163 i386 mremap sys_mremap -164 i386 setresuid sys_setresuid16 -165 i386 getresuid sys_getresuid16 +159 i386 sched_get_priority_max sys_sched_get_priority_max __sys_ia32_sched_get_priority_max +160 i386 sched_get_priority_min sys_sched_get_priority_min __sys_ia32_sched_get_priority_min +161 i386 sched_rr_get_interval sys_sched_rr_get_interval __compat_sys_ia32_sched_rr_get_interval +162 i386 nanosleep sys_nanosleep __compat_sys_ia32_nanosleep +163 i386 mremap sys_mremap __sys_ia32_mremap +164 i386 setresuid sys_setresuid16 __sys_ia32_setresuid16 +165 i386 getresuid sys_getresuid16 __sys_ia32_getresuid16 166 i386 vm86 sys_vm86 sys_ni_syscall 167 i386 query_module -168 i386 poll sys_poll +168 i386 poll sys_poll __sys_ia32_poll 169 i386 nfsservctl -170 i386 setresgid sys_setresgid16 -171 i386 getresgid sys_getresgid16 -172 i386 prctl sys_prctl +170 i386 setresgid sys_setresgid16 __sys_ia32_setresgid16 +171 i386 getresgid sys_getresgid16 __sys_ia32_getresgid16 +172 i386 prctl sys_prctl __sys_ia32_prctl 173 i386 rt_sigreturn sys_rt_sigreturn sys32_rt_sigreturn -174 i386 rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction -175 i386 rt_sigprocmask sys_rt_sigprocmask -176 i386 rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending -177 i386 rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait -178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo -179 i386 rt_sigsuspend sys_rt_sigsuspend -180 i386 pread64 sys_pread64 compat_sys_x86_pread -181 i386 pwrite64 sys_pwrite64 compat_sys_x86_pwrite -182 i386 chown sys_chown16 -183 i386 getcwd sys_getcwd -184 i386 capget sys_capget -185 i386 capset sys_capset -186 i386 sigaltstack sys_sigaltstack compat_sys_sigaltstack -187 i386 sendfile sys_sendfile compat_sys_sendfile +174 i386 rt_sigaction sys_rt_sigaction __compat_sys_ia32_rt_sigaction +175 i386 rt_sigprocmask sys_rt_sigprocmask __sys_ia32_rt_sigprocmask +176 i386 rt_sigpending sys_rt_sigpending __compat_sys_ia32_rt_sigpending +177 i386 rt_sigtimedwait sys_rt_sigtimedwait __compat_sys_ia32_rt_sigtimedwait +178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo __compat_sys_ia32_rt_sigqueueinfo +179 i386 rt_sigsuspend sys_rt_sigsuspend __sys_ia32_rt_sigsuspend +180 i386 pread64 sys_pread64 __compat_sys_ia32_x86_pread +181 i386 pwrite64 sys_pwrite64 __compat_sys_ia32_x86_pwrite +182 i386 chown sys_chown16 __sys_ia32_chown16 +183 i386 getcwd sys_getcwd __sys_ia32_getcwd +184 i386 capget sys_capget __sys_ia32_capget +185 i386 capset sys_capset __sys_ia32_capset +186 i386 sigaltstack sys_sigaltstack __compat_sys_ia32_sigaltstack +187 i386 sendfile sys_sendfile __compat_sys_ia32_sendfile 188 i386 getpmsg 189 i386 putpmsg 190 i386 vfork sys_vfork -191 i386 ugetrlimit sys_getrlimit compat_sys_getrlimit -192 i386 mmap2 sys_mmap_pgoff -193 i386 truncate64 sys_truncate64 compat_sys_x86_truncate64 -194 i386 ftruncate64 sys_ftruncate64 compat_sys_x86_ftruncate64 -195 i386 stat64 sys_stat64 compat_sys_x86_stat64 -196 i386 lstat64 sys_lstat64 compat_sys_x86_lstat64 -197 i386 fstat64 sys_fstat64 compat_sys_x86_fstat64 -198 i386 lchown32 sys_lchown +191 i386 ugetrlimit sys_getrlimit __compat_sys_ia32_getrlimit +192 i386 mmap2 sys_mmap_pgoff __sys_ia32_mmap_pgoff +193 i386 truncate64 sys_truncate64 __compat_sys_ia32_x86_truncate64 +194 i386 ftruncate64 sys_ftruncate64 __compat_sys_ia32_x86_ftruncate64 +195 i386 stat64 sys_stat64 __compat_sys_ia32_x86_stat64 +196 i386 lstat64 sys_lstat64 __compat_sys_ia32_x86_lstat64 +197 i386 fstat64 sys_fstat64 __compat_sys_ia32_x86_fstat64 +198 i386 lchown32 sys_lchown __sys_ia32_lchown 199 i386 getuid32 sys_getuid 200 i386 getgid32 sys_getgid 201 i386 geteuid32 sys_geteuid 202 i386 getegid32 sys_getegid -203 i386 setreuid32 sys_setreuid -204 i386 setregid32 sys_setregid -205 i386 getgroups32 sys_getgroups -206 i386 setgroups32 sys_setgroups -207 i386 fchown32 sys_fchown -208 i386 setresuid32 sys_setresuid -209 i386 getresuid32 sys_getresuid -210 i386 setresgid32 sys_setresgid -211 i386 getresgid32 sys_getresgid -212 i386 chown32 sys_chown -213 i386 setuid32 sys_setuid -214 i386 setgid32 sys_setgid -215 i386 setfsuid32 sys_setfsuid -216 i386 setfsgid32 sys_setfsgid -217 i386 pivot_root sys_pivot_root -218 i386 mincore sys_mincore -219 i386 madvise sys_madvise -220 i386 getdents64 sys_getdents64 -221 i386 fcntl64 sys_fcntl64 compat_sys_fcntl64 +203 i386 setreuid32 sys_setreuid __sys_ia32_setreuid +204 i386 setregid32 sys_setregid __sys_ia32_setregid +205 i386 getgroups32 sys_getgroups __sys_ia32_getgroups +206 i386 setgroups32 sys_setgroups __sys_ia32_setgroups +207 i386 fchown32 sys_fchown __sys_ia32_fchown +208 i386 setresuid32 sys_setresuid __sys_ia32_setresuid +209 i386 getresuid32 sys_getresuid __sys_ia32_getresuid +210 i386 setresgid32 sys_setresgid __sys_ia32_setresgid +211 i386 getresgid32 sys_getresgid __sys_ia32_getresgid +212 i386 chown32 sys_chown __sys_ia32_chown +213 i386 setuid32 sys_setuid __sys_ia32_setuid +214 i386 setgid32 sys_setgid __sys_ia32_setgid +215 i386 setfsuid32 sys_setfsuid __sys_ia32_setfsuid +216 i386 setfsgid32 sys_setfsgid __sys_ia32_setfsgid +217 i386 pivot_root sys_pivot_root __sys_ia32_pivot_root +218 i386 mincore sys_mincore __sys_ia32_mincore +219 i386 madvise sys_madvise __sys_ia32_madvise +220 i386 getdents64 sys_getdents64 __sys_ia32_getdents64 +221 i386 fcntl64 sys_fcntl64 __compat_sys_ia32_fcntl64 # 222 is unused # 223 is unused 224 i386 gettid sys_gettid -225 i386 readahead sys_readahead compat_sys_x86_readahead -226 i386 setxattr sys_setxattr -227 i386 lsetxattr sys_lsetxattr -228 i386 fsetxattr sys_fsetxattr -229 i386 getxattr sys_getxattr -230 i386 lgetxattr sys_lgetxattr -231 i386 fgetxattr sys_fgetxattr -232 i386 listxattr sys_listxattr -233 i386 llistxattr sys_llistxattr -234 i386 flistxattr sys_flistxattr -235 i386 removexattr sys_removexattr -236 i386 lremovexattr sys_lremovexattr -237 i386 fremovexattr sys_fremovexattr -238 i386 tkill sys_tkill -239 i386 sendfile64 sys_sendfile64 -240 i386 futex sys_futex compat_sys_futex -241 i386 sched_setaffinity sys_sched_setaffinity compat_sys_sched_setaffinity -242 i386 sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity -243 i386 set_thread_area sys_set_thread_area -244 i386 get_thread_area sys_get_thread_area -245 i386 io_setup sys_io_setup compat_sys_io_setup -246 i386 io_destroy sys_io_destroy -247 i386 io_getevents sys_io_getevents compat_sys_io_getevents -248 i386 io_submit sys_io_submit compat_sys_io_submit -249 i386 io_cancel sys_io_cancel -250 i386 fadvise64 sys_fadvise64 compat_sys_x86_fadvise64 +225 i386 readahead sys_readahead __compat_sys_ia32_x86_readahead +226 i386 setxattr sys_setxattr __sys_ia32_setxattr +227 i386 lsetxattr sys_lsetxattr __sys_ia32_lsetxattr +228 i386 fsetxattr sys_fsetxattr __sys_ia32_fsetxattr +229 i386 getxattr sys_getxattr __sys_ia32_getxattr +230 i386 lgetxattr sys_lgetxattr __sys_ia32_lgetxattr +231 i386 fgetxattr sys_fgetxattr __sys_ia32_fgetxattr +232 i386 listxattr sys_listxattr __sys_ia32_listxattr +233 i386 llistxattr sys_llistxattr __sys_ia32_llistxattr +234 i386 flistxattr sys_flistxattr __sys_ia32_flistxattr +235 i386 removexattr sys_removexattr __sys_ia32_removexattr +236 i386 lremovexattr sys_lremovexattr __sys_ia32_lremovexattr +237 i386 fremovexattr sys_fremovexattr __sys_ia32_fremovexattr +238 i386 tkill sys_tkill __sys_ia32_tkill +239 i386 sendfile64 sys_sendfile64 __sys_ia32_sendfile64 +240 i386 futex sys_futex __compat_sys_ia32_futex +241 i386 sched_setaffinity sys_sched_setaffinity __compat_sys_ia32_sched_setaffinity +242 i386 sched_getaffinity sys_sched_getaffinity __compat_sys_ia32_sched_getaffinity +243 i386 set_thread_area sys_set_thread_area __sys_ia32_set_thread_area +244 i386 get_thread_area sys_get_thread_area __sys_ia32_get_thread_area +245 i386 io_setup sys_io_setup __compat_sys_ia32_io_setup +246 i386 io_destroy sys_io_destroy __sys_ia32_io_destroy +247 i386 io_getevents sys_io_getevents __compat_sys_ia32_io_getevents +248 i386 io_submit sys_io_submit __compat_sys_ia32_io_submit +249 i386 io_cancel sys_io_cancel __sys_ia32_io_cancel +250 i386 fadvise64 sys_fadvise64 __compat_sys_ia32_x86_fadvise64 # 251 is available for reuse (was briefly sys_set_zone_reclaim) -252 i386 exit_group sys_exit_group -253 i386 lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie -254 i386 epoll_create sys_epoll_create -255 i386 epoll_ctl sys_epoll_ctl -256 i386 epoll_wait sys_epoll_wait -257 i386 remap_file_pages sys_remap_file_pages -258 i386 set_tid_address sys_set_tid_address -259 i386 timer_create sys_timer_create compat_sys_timer_create -260 i386 timer_settime sys_timer_settime compat_sys_timer_settime -261 i386 timer_gettime sys_timer_gettime compat_sys_timer_gettime -262 i386 timer_getoverrun sys_timer_getoverrun -263 i386 timer_delete sys_timer_delete -264 i386 clock_settime sys_clock_settime compat_sys_clock_settime -265 i386 clock_gettime sys_clock_gettime compat_sys_clock_gettime -266 i386 clock_getres sys_clock_getres compat_sys_clock_getres -267 i386 clock_nanosleep sys_clock_nanosleep compat_sys_clock_nanosleep -268 i386 statfs64 sys_statfs64 compat_sys_statfs64 -269 i386 fstatfs64 sys_fstatfs64 compat_sys_fstatfs64 -270 i386 tgkill sys_tgkill -271 i386 utimes sys_utimes compat_sys_utimes -272 i386 fadvise64_64 sys_fadvise64_64 compat_sys_x86_fadvise64_64 +252 i386 exit_group sys_exit_group __sys_ia32_exit_group +253 i386 lookup_dcookie sys_lookup_dcookie __compat_sys_ia32_lookup_dcookie +254 i386 epoll_create sys_epoll_create __sys_ia32_epoll_create +255 i386 epoll_ctl sys_epoll_ctl __sys_ia32_epoll_ctl +256 i386 epoll_wait sys_epoll_wait __sys_ia32_epoll_wait +257 i386 remap_file_pages sys_remap_file_pages __sys_ia32_remap_file_pages +258 i386 set_tid_address sys_set_tid_address __sys_ia32_set_tid_address +259 i386 timer_create sys_timer_create __compat_sys_ia32_timer_create +260 i386 timer_settime sys_timer_settime __compat_sys_ia32_timer_settime +261 i386 timer_gettime sys_timer_gettime __compat_sys_ia32_timer_gettime +262 i386 timer_getoverrun sys_timer_getoverrun __sys_ia32_timer_getoverrun +263 i386 timer_delete sys_timer_delete __sys_ia32_timer_delete +264 i386 clock_settime sys_clock_settime __compat_sys_ia32_clock_settime +265 i386 clock_gettime sys_clock_gettime __compat_sys_ia32_clock_gettime +266 i386 clock_getres sys_clock_getres __compat_sys_ia32_clock_getres +267 i386 clock_nanosleep sys_clock_nanosleep __compat_sys_ia32_clock_nanosleep +268 i386 statfs64 sys_statfs64 __compat_sys_ia32_statfs64 +269 i386 fstatfs64 sys_fstatfs64 __compat_sys_ia32_fstatfs64 +270 i386 tgkill sys_tgkill __sys_ia32_tgkill +271 i386 utimes sys_utimes __compat_sys_ia32_utimes +272 i386 fadvise64_64 sys_fadvise64_64 __compat_sys_ia32_x86_fadvise64_64 273 i386 vserver -274 i386 mbind sys_mbind -275 i386 get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy -276 i386 set_mempolicy sys_set_mempolicy -277 i386 mq_open sys_mq_open compat_sys_mq_open -278 i386 mq_unlink sys_mq_unlink -279 i386 mq_timedsend sys_mq_timedsend compat_sys_mq_timedsend -280 i386 mq_timedreceive sys_mq_timedreceive compat_sys_mq_timedreceive -281 i386 mq_notify sys_mq_notify compat_sys_mq_notify -282 i386 mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr -283 i386 kexec_load sys_kexec_load compat_sys_kexec_load -284 i386 waitid sys_waitid compat_sys_waitid +274 i386 mbind sys_mbind __sys_ia32_mbind +275 i386 get_mempolicy sys_get_mempolicy __compat_sys_ia32_get_mempolicy +276 i386 set_mempolicy sys_set_mempolicy __sys_ia32_set_mempolicy +277 i386 mq_open sys_mq_open __compat_sys_ia32_mq_open +278 i386 mq_unlink sys_mq_unlink __sys_ia32_mq_unlink +279 i386 mq_timedsend sys_mq_timedsend __compat_sys_ia32_mq_timedsend +280 i386 mq_timedreceive sys_mq_timedreceive __compat_sys_ia32_mq_timedreceive +281 i386 mq_notify sys_mq_notify __compat_sys_ia32_mq_notify +282 i386 mq_getsetattr sys_mq_getsetattr __compat_sys_ia32_mq_getsetattr +283 i386 kexec_load sys_kexec_load __compat_sys_ia32_kexec_load +284 i386 waitid sys_waitid __compat_sys_ia32_waitid # 285 sys_setaltroot -286 i386 add_key sys_add_key -287 i386 request_key sys_request_key -288 i386 keyctl sys_keyctl compat_sys_keyctl -289 i386 ioprio_set sys_ioprio_set -290 i386 ioprio_get sys_ioprio_get +286 i386 add_key sys_add_key __sys_ia32_add_key +287 i386 request_key sys_request_key __sys_ia32_request_key +288 i386 keyctl sys_keyctl __compat_sys_ia32_keyctl +289 i386 ioprio_set sys_ioprio_set __sys_ia32_ioprio_set +290 i386 ioprio_get sys_ioprio_get __sys_ia32_ioprio_get 291 i386 inotify_init sys_inotify_init -292 i386 inotify_add_watch sys_inotify_add_watch -293 i386 inotify_rm_watch sys_inotify_rm_watch -294 i386 migrate_pages sys_migrate_pages -295 i386 openat sys_openat compat_sys_openat -296 i386 mkdirat sys_mkdirat -297 i386 mknodat sys_mknodat -298 i386 fchownat sys_fchownat -299 i386 futimesat sys_futimesat compat_sys_futimesat -300 i386 fstatat64 sys_fstatat64 compat_sys_x86_fstatat -301 i386 unlinkat sys_unlinkat -302 i386 renameat sys_renameat -303 i386 linkat sys_linkat -304 i386 symlinkat sys_symlinkat -305 i386 readlinkat sys_readlinkat -306 i386 fchmodat sys_fchmodat -307 i386 faccessat sys_faccessat -308 i386 pselect6 sys_pselect6 compat_sys_pselect6 -309 i386 ppoll sys_ppoll compat_sys_ppoll -310 i386 unshare sys_unshare -311 i386 set_robust_list sys_set_robust_list compat_sys_set_robust_list -312 i386 get_robust_list sys_get_robust_list compat_sys_get_robust_list -313 i386 splice sys_splice -314 i386 sync_file_range sys_sync_file_range compat_sys_x86_sync_file_range -315 i386 tee sys_tee -316 i386 vmsplice sys_vmsplice compat_sys_vmsplice -317 i386 move_pages sys_move_pages compat_sys_move_pages -318 i386 getcpu sys_getcpu -319 i386 epoll_pwait sys_epoll_pwait -320 i386 utimensat sys_utimensat compat_sys_utimensat -321 i386 signalfd sys_signalfd compat_sys_signalfd -322 i386 timerfd_create sys_timerfd_create -323 i386 eventfd sys_eventfd -324 i386 fallocate sys_fallocate compat_sys_x86_fallocate -325 i386 timerfd_settime sys_timerfd_settime compat_sys_timerfd_settime -326 i386 timerfd_gettime sys_timerfd_gettime compat_sys_timerfd_gettime -327 i386 signalfd4 sys_signalfd4 compat_sys_signalfd4 -328 i386 eventfd2 sys_eventfd2 -329 i386 epoll_create1 sys_epoll_create1 -330 i386 dup3 sys_dup3 -331 i386 pipe2 sys_pipe2 -332 i386 inotify_init1 sys_inotify_init1 -333 i386 preadv sys_preadv compat_sys_preadv -334 i386 pwritev sys_pwritev compat_sys_pwritev -335 i386 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo -336 i386 perf_event_open sys_perf_event_open -337 i386 recvmmsg sys_recvmmsg compat_sys_recvmmsg -338 i386 fanotify_init sys_fanotify_init -339 i386 fanotify_mark sys_fanotify_mark compat_sys_fanotify_mark -340 i386 prlimit64 sys_prlimit64 -341 i386 name_to_handle_at sys_name_to_handle_at -342 i386 open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at -343 i386 clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime -344 i386 syncfs sys_syncfs -345 i386 sendmmsg sys_sendmmsg compat_sys_sendmmsg -346 i386 setns sys_setns -347 i386 process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv -348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev -349 i386 kcmp sys_kcmp -350 i386 finit_module sys_finit_module -351 i386 sched_setattr sys_sched_setattr -352 i386 sched_getattr sys_sched_getattr -353 i386 renameat2 sys_renameat2 -354 i386 seccomp sys_seccomp -355 i386 getrandom sys_getrandom -356 i386 memfd_create sys_memfd_create -357 i386 bpf sys_bpf -358 i386 execveat sys_execveat compat_sys_execveat -359 i386 socket sys_socket -360 i386 socketpair sys_socketpair -361 i386 bind sys_bind -362 i386 connect sys_connect -363 i386 listen sys_listen -364 i386 accept4 sys_accept4 -365 i386 getsockopt sys_getsockopt compat_sys_getsockopt -366 i386 setsockopt sys_setsockopt compat_sys_setsockopt -367 i386 getsockname sys_getsockname -368 i386 getpeername sys_getpeername -369 i386 sendto sys_sendto -370 i386 sendmsg sys_sendmsg compat_sys_sendmsg -371 i386 recvfrom sys_recvfrom compat_sys_recvfrom -372 i386 recvmsg sys_recvmsg compat_sys_recvmsg -373 i386 shutdown sys_shutdown -374 i386 userfaultfd sys_userfaultfd -375 i386 membarrier sys_membarrier -376 i386 mlock2 sys_mlock2 -377 i386 copy_file_range sys_copy_file_range -378 i386 preadv2 sys_preadv2 compat_sys_preadv2 -379 i386 pwritev2 sys_pwritev2 compat_sys_pwritev2 -380 i386 pkey_mprotect sys_pkey_mprotect -381 i386 pkey_alloc sys_pkey_alloc -382 i386 pkey_free sys_pkey_free -383 i386 statx sys_statx -384 i386 arch_prctl sys_arch_prctl compat_sys_arch_prctl +292 i386 inotify_add_watch sys_inotify_add_watch __sys_ia32_inotify_add_watch +293 i386 inotify_rm_watch sys_inotify_rm_watch __sys_ia32_inotify_rm_watch +294 i386 migrate_pages sys_migrate_pages __sys_ia32_migrate_pages +295 i386 openat sys_openat __compat_sys_ia32_openat +296 i386 mkdirat sys_mkdirat __sys_ia32_mkdirat +297 i386 mknodat sys_mknodat __sys_ia32_mknodat +298 i386 fchownat sys_fchownat __sys_ia32_fchownat +299 i386 futimesat sys_futimesat __compat_sys_ia32_futimesat +300 i386 fstatat64 sys_fstatat64 __compat_sys_ia32_x86_fstatat +301 i386 unlinkat sys_unlinkat __sys_ia32_unlinkat +302 i386 renameat sys_renameat __sys_ia32_renameat +303 i386 linkat sys_linkat __sys_ia32_linkat +304 i386 symlinkat sys_symlinkat __sys_ia32_symlinkat +305 i386 readlinkat sys_readlinkat __sys_ia32_readlinkat +306 i386 fchmodat sys_fchmodat __sys_ia32_fchmodat +307 i386 faccessat sys_faccessat __sys_ia32_faccessat +308 i386 pselect6 sys_pselect6 __compat_sys_ia32_pselect6 +309 i386 ppoll sys_ppoll __compat_sys_ia32_ppoll +310 i386 unshare sys_unshare __sys_ia32_unshare +311 i386 set_robust_list sys_set_robust_list __compat_sys_ia32_set_robust_list +312 i386 get_robust_list sys_get_robust_list __compat_sys_ia32_get_robust_list +313 i386 splice sys_splice __sys_ia32_splice +314 i386 sync_file_range sys_sync_file_range __compat_sys_ia32_x86_sync_file_range +315 i386 tee sys_tee __sys_ia32_tee +316 i386 vmsplice sys_vmsplice __compat_sys_ia32_vmsplice +317 i386 move_pages sys_move_pages __compat_sys_ia32_move_pages +318 i386 getcpu sys_getcpu __sys_ia32_getcpu +319 i386 epoll_pwait sys_epoll_pwait __sys_ia32_epoll_pwait +320 i386 utimensat sys_utimensat __compat_sys_ia32_utimensat +321 i386 signalfd sys_signalfd __compat_sys_ia32_signalfd +322 i386 timerfd_create sys_timerfd_create __sys_ia32_timerfd_create +323 i386 eventfd sys_eventfd __sys_ia32_eventfd +324 i386 fallocate sys_fallocate __compat_sys_ia32_x86_fallocate +325 i386 timerfd_settime sys_timerfd_settime __compat_sys_ia32_timerfd_settime +326 i386 timerfd_gettime sys_timerfd_gettime __compat_sys_ia32_timerfd_gettime +327 i386 signalfd4 sys_signalfd4 __compat_sys_ia32_signalfd4 +328 i386 eventfd2 sys_eventfd2 __sys_ia32_eventfd2 +329 i386 epoll_create1 sys_epoll_create1 __sys_ia32_epoll_create1 +330 i386 dup3 sys_dup3 __sys_ia32_dup3 +331 i386 pipe2 sys_pipe2 __sys_ia32_pipe2 +332 i386 inotify_init1 sys_inotify_init1 __sys_ia32_inotify_init1 +333 i386 preadv sys_preadv __compat_sys_ia32_preadv +334 i386 pwritev sys_pwritev __compat_sys_ia32_pwritev +335 i386 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo __compat_sys_ia32_rt_tgsigqueueinfo +336 i386 perf_event_open sys_perf_event_open __sys_ia32_perf_event_open +337 i386 recvmmsg sys_recvmmsg __compat_sys_ia32_recvmmsg +338 i386 fanotify_init sys_fanotify_init __sys_ia32_fanotify_init +339 i386 fanotify_mark sys_fanotify_mark __compat_sys_ia32_fanotify_mark +340 i386 prlimit64 sys_prlimit64 __sys_ia32_prlimit64 +341 i386 name_to_handle_at sys_name_to_handle_at __sys_ia32_name_to_handle_at +342 i386 open_by_handle_at sys_open_by_handle_at __compat_sys_ia32_open_by_handle_at +343 i386 clock_adjtime sys_clock_adjtime __compat_sys_ia32_clock_adjtime +344 i386 syncfs sys_syncfs __sys_ia32_syncfs +345 i386 sendmmsg sys_sendmmsg __compat_sys_ia32_sendmmsg +346 i386 setns sys_setns __sys_ia32_setns +347 i386 process_vm_readv sys_process_vm_readv __compat_sys_ia32_process_vm_readv +348 i386 process_vm_writev sys_process_vm_writev __compat_sys_ia32_process_vm_writev +349 i386 kcmp sys_kcmp __sys_ia32_kcmp +350 i386 finit_module sys_finit_module __sys_ia32_finit_module +351 i386 sched_setattr sys_sched_setattr __sys_ia32_sched_setattr +352 i386 sched_getattr sys_sched_getattr __sys_ia32_sched_getattr +353 i386 renameat2 sys_renameat2 __sys_ia32_renameat2 +354 i386 seccomp sys_seccomp __sys_ia32_seccomp +355 i386 getrandom sys_getrandom __sys_ia32_getrandom +356 i386 memfd_create sys_memfd_create __sys_ia32_memfd_create +357 i386 bpf sys_bpf __sys_ia32_bpf +358 i386 execveat sys_execveat __compat_sys_ia32_execveat +359 i386 socket sys_socket __sys_ia32_socket +360 i386 socketpair sys_socketpair __sys_ia32_socketpair +361 i386 bind sys_bind __sys_ia32_bind +362 i386 connect sys_connect __sys_ia32_connect +363 i386 listen sys_listen __sys_ia32_listen +364 i386 accept4 sys_accept4 __sys_ia32_accept4 +365 i386 getsockopt sys_getsockopt __compat_sys_ia32_getsockopt +366 i386 setsockopt sys_setsockopt __compat_sys_ia32_setsockopt +367 i386 getsockname sys_getsockname __sys_ia32_getsockname +368 i386 getpeername sys_getpeername __sys_ia32_getpeername +369 i386 sendto sys_sendto __sys_ia32_sendto +370 i386 sendmsg sys_sendmsg __compat_sys_ia32_sendmsg +371 i386 recvfrom sys_recvfrom __compat_sys_ia32_recvfrom +372 i386 recvmsg sys_recvmsg __compat_sys_ia32_recvmsg +373 i386 shutdown sys_shutdown __sys_ia32_shutdown +374 i386 userfaultfd sys_userfaultfd __sys_ia32_userfaultfd +375 i386 membarrier sys_membarrier __sys_ia32_membarrier +376 i386 mlock2 sys_mlock2 __sys_ia32_mlock2 +377 i386 copy_file_range sys_copy_file_range __sys_ia32_copy_file_range +378 i386 preadv2 sys_preadv2 __compat_sys_ia32_preadv2 +379 i386 pwritev2 sys_pwritev2 __compat_sys_ia32_pwritev2 +380 i386 pkey_mprotect sys_pkey_mprotect __sys_ia32_pkey_mprotect +381 i386 pkey_alloc sys_pkey_alloc __sys_ia32_pkey_alloc +382 i386 pkey_free sys_pkey_free __sys_ia32_pkey_free +383 i386 statx sys_statx __sys_ia32_statx +384 i386 arch_prctl sys_arch_prctl __compat_sys_ia32_arch_prctl diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 5aef183e2f85..a83c0f7f462f 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -342,41 +342,43 @@ # # x32-specific system call numbers start at 512 to avoid cache impact -# for native 64-bit operation. +# for native 64-bit operation. The __compat_sys_x32 stubs are created +# on-the-fly for compat_sys_*() compatibility system calls if X86_X32 +# is defined. # -512 x32 rt_sigaction compat_sys_rt_sigaction +512 x32 rt_sigaction __compat_sys_x32_rt_sigaction 513 x32 rt_sigreturn sys32_x32_rt_sigreturn -514 x32 ioctl compat_sys_ioctl -515 x32 readv compat_sys_readv -516 x32 writev compat_sys_writev -517 x32 recvfrom compat_sys_recvfrom -518 x32 sendmsg compat_sys_sendmsg -519 x32 recvmsg compat_sys_recvmsg -520 x32 execve compat_sys_execve/ptregs -521 x32 ptrace compat_sys_ptrace -522 x32 rt_sigpending compat_sys_rt_sigpending -523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait -524 x32 rt_sigqueueinfo compat_sys_rt_sigqueueinfo -525 x32 sigaltstack compat_sys_sigaltstack -526 x32 timer_create compat_sys_timer_create -527 x32 mq_notify compat_sys_mq_notify -528 x32 kexec_load compat_sys_kexec_load -529 x32 waitid compat_sys_waitid -530 x32 set_robust_list compat_sys_set_robust_list -531 x32 get_robust_list compat_sys_get_robust_list -532 x32 vmsplice compat_sys_vmsplice -533 x32 move_pages compat_sys_move_pages -534 x32 preadv compat_sys_preadv64 -535 x32 pwritev compat_sys_pwritev64 -536 x32 rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo -537 x32 recvmmsg compat_sys_recvmmsg -538 x32 sendmmsg compat_sys_sendmmsg -539 x32 process_vm_readv compat_sys_process_vm_readv -540 x32 process_vm_writev compat_sys_process_vm_writev -541 x32 setsockopt compat_sys_setsockopt -542 x32 getsockopt compat_sys_getsockopt -543 x32 io_setup compat_sys_io_setup -544 x32 io_submit compat_sys_io_submit -545 x32 execveat compat_sys_execveat/ptregs -546 x32 preadv2 compat_sys_preadv64v2 -547 x32 pwritev2 compat_sys_pwritev64v2 +514 x32 ioctl __compat_sys_x32_ioctl +515 x32 readv __compat_sys_x32_readv +516 x32 writev __compat_sys_x32_writev +517 x32 recvfrom __compat_sys_x32_recvfrom +518 x32 sendmsg __compat_sys_x32_sendmsg +519 x32 recvmsg __compat_sys_x32_recvmsg +520 x32 execve __compat_sys_x32_execve/ptregs +521 x32 ptrace __compat_sys_x32_ptrace +522 x32 rt_sigpending __compat_sys_x32_rt_sigpending +523 x32 rt_sigtimedwait __compat_sys_x32_rt_sigtimedwait +524 x32 rt_sigqueueinfo __compat_sys_x32_rt_sigqueueinfo +525 x32 sigaltstack __compat_sys_x32_sigaltstack +526 x32 timer_create __compat_sys_x32_timer_create +527 x32 mq_notify __compat_sys_x32_mq_notify +528 x32 kexec_load __compat_sys_x32_kexec_load +529 x32 waitid __compat_sys_x32_waitid +530 x32 set_robust_list __compat_sys_x32_set_robust_list +531 x32 get_robust_list __compat_sys_x32_get_robust_list +532 x32 vmsplice __compat_sys_x32_vmsplice +533 x32 move_pages __compat_sys_x32_move_pages +534 x32 preadv __compat_sys_x32_preadv64 +535 x32 pwritev __compat_sys_x32_pwritev64 +536 x32 rt_tgsigqueueinfo __compat_sys_x32_rt_tgsigqueueinfo +537 x32 recvmmsg __compat_sys_x32_recvmmsg +538 x32 sendmmsg __compat_sys_x32_sendmmsg +539 x32 process_vm_readv __compat_sys_x32_process_vm_readv +540 x32 process_vm_writev __compat_sys_x32_process_vm_writev +541 x32 setsockopt __compat_sys_x32_setsockopt +542 x32 getsockopt __compat_sys_x32_getsockopt +543 x32 io_setup __compat_sys_x32_io_setup +544 x32 io_submit __compat_sys_x32_io_submit +545 x32 execveat __compat_sys_x32_execveat/ptregs +546 x32 preadv2 __compat_sys_x32_preadv64v2 +547 x32 pwritev2 __compat_sys_x32_pwritev64v2 diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h index 702bdee377af..49d7e4970110 100644 --- a/arch/x86/include/asm/syscall_wrapper.h +++ b/arch/x86/include/asm/syscall_wrapper.h @@ -6,6 +6,111 @@ #ifndef _ASM_X86_SYSCALL_WRAPPER_H #define _ASM_X86_SYSCALL_WRAPPER_H +/* Mapping of registers to parameters for syscalls on x86-64 and x32 */ +#define SC_X86_64_REGS_TO_ARGS(x, ...) \ + __MAP(x,__SC_ARGS \ + ,,regs->di,,regs->si,,regs->dx \ + ,,regs->r10,,regs->r8,,regs->r9) \ + +/* Mapping of registers to parameters for syscalls on i386 */ +#define SC_IA32_REGS_TO_ARGS(x, ...) \ + __MAP(x,__SC_ARGS \ + ,,(unsigned int)regs->bx,,(unsigned int)regs->cx \ + ,,(unsigned int)regs->dx,,(unsigned int)regs->si \ + ,,(unsigned int)regs->di,,(unsigned int)regs->bp) + +#ifdef CONFIG_IA32_EMULATION +/* + * For IA32 emulation, we need to handle "compat" syscalls *and* create + * additional wrappers (aptly named __sys_ia32_sys_xyzzy) which decode the + * ia32 regs in the proper order for shared or "common" syscalls. As some + * syscalls may not be implemented, we need to expand COND_SYSCALL in + * kernel/sys_ni.c and SYS_NI in kernel/time/posix-stubs.c to cover this + * case as well. + */ +#define COMPAT_SC_IA32_STUBx(x, name, ...) \ + asmlinkage long __compat_sys_ia32##name(const struct pt_regs *regs);\ + ALLOW_ERROR_INJECTION(__compat_sys_ia32##name, ERRNO); \ + asmlinkage long __compat_sys_ia32##name(const struct pt_regs *regs)\ + { \ + return c_SyS##name(SC_IA32_REGS_TO_ARGS(x,__VA_ARGS__));\ + } \ + +#define SC_IA32_WRAPPERx(x, name, ...) \ + asmlinkage long __sys_ia32##name(const struct pt_regs *regs); \ + ALLOW_ERROR_INJECTION(__sys_ia32##name, ERRNO); \ + asmlinkage long __sys_ia32##name(const struct pt_regs *regs) \ + { \ + return SyS##name(SC_IA32_REGS_TO_ARGS(x,__VA_ARGS__)); \ + } + +#define COND_SYSCALL(name) \ + cond_syscall(sys_##name); \ + cond_syscall(__sys_ia32_##name) + +#define SYS_NI(name) \ + SYSCALL_ALIAS(sys_##name, sys_ni_posix_timers); \ + SYSCALL_ALIAS(__sys_ia32_##name, sys_ni_posix_timers) + +#else /* CONFIG_IA32_EMULATION */ +#define COMPAT_SC_IA32_STUBx(x, name, ...) +#define SC_IA32_WRAPPERx(x, fullname, name, ...) +#endif /* CONFIG_IA32_EMULATION */ + + +#ifdef CONFIG_X86_X32 +/* + * For the x32 ABI, we need to create a stub for compat_sys_*() which is aware + * of the x86-64-style parameter ordering of x32 syscalls. The syscalls common + * with x86_64 obviously do not need such care. + */ +#define COMPAT_SC_X32_STUBx(x, name, ...) \ + asmlinkage long __compat_sys_x32##name(const struct pt_regs *regs);\ + ALLOW_ERROR_INJECTION(__compat_sys_x32##name, ERRNO); \ + asmlinkage long __compat_sys_x32##name(const struct pt_regs *regs)\ + { \ + return c_SyS##name(SC_X86_64_REGS_TO_ARGS(x,__VA_ARGS__));\ + } \ + +#else /* CONFIG_X86_X32 */ +#define COMPAT_SC_X32_STUBx(x, name, ...) +#endif /* CONFIG_X86_X32 */ + + +#ifdef CONFIG_COMPAT +/* + * Compat means IA32_EMULATION and/or X86_X32. As they use a different + * mapping of registers to parameters, we need to generate stubs for each + * of them. There is no need to implement COMPAT_SYSCALL_DEFINE0, as it is + * unused on x86. + */ +#define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ + static long c_SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ + static inline long C_SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ + COMPAT_SC_IA32_STUBx(x, name, __VA_ARGS__) \ + COMPAT_SC_X32_STUBx(x, name, __VA_ARGS__) \ + static long c_SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ + { \ + return C_SYSC##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__)); \ + } \ + static inline long C_SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)) + +/* + * As some compat syscalls may not be implemented, we need to expand + * COND_SYSCALL_COMPAT in kernel/sys_ni.c and COMPAT_SYS_NI in + * kernel/time/posix-stubs.c to cover this case as well. + */ +#define COND_SYSCALL_COMPAT(name) \ + cond_syscall(__compat_sys_ia32_##name); \ + cond_syscall(__compat_sys_x32_##name) + +#define COMPAT_SYS_NI(name) \ + SYSCALL_ALIAS(__compat_sys_ia32_##name, sys_ni_posix_timers); \ + SYSCALL_ALIAS(__compat_sys_x32_##name, sys_ni_posix_timers) + +#endif /* CONFIG_COMPAT */ + + /* * Instead of the generic __SYSCALL_DEFINEx() definition, this macro takes * struct pt_regs *regs as the only argument of the syscall stub named @@ -34,9 +139,14 @@ * This approach avoids leaking random user-provided register content down * the call chain. * + * If IA32_EMULATION is enabled, this macro generates an additional wrapper + * named __sys_ia32_*() which decodes the struct pt_regs *regs according + * to the i386 calling convention (bx, cx, dx, si, di, bp). + * * As the generic SYSCALL_DEFINE0() macro does not decode any parameters for * obvious reasons, and passing struct pt_regs *regs to it in %rdi does not - * hurt, there is no need to override it. + * hurt, there is no need to override it, or to define it differently for + * IA32_EMULATION. */ #define __SYSCALL_DEFINEx(x, name, ...) \ asmlinkage long sys##name(const struct pt_regs *regs); \ @@ -45,10 +155,9 @@ static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ asmlinkage long sys##name(const struct pt_regs *regs) \ { \ - return SyS##name(__MAP(x,__SC_ARGS \ - ,,regs->di,,regs->si,,regs->dx \ - ,,regs->r10,,regs->r8,,regs->r9)); \ + return SyS##name(SC_X86_64_REGS_TO_ARGS(x,__VA_ARGS__));\ } \ + SC_IA32_WRAPPERx(x, name, __VA_ARGS__) \ static long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ { \ long ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \ -- cgit v1.2.3 From f8781c4a226319fe60e652118b90cf094ccfe747 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Thu, 5 Apr 2018 11:53:05 +0200 Subject: syscalls/x86: Unconditionally enable 'struct pt_regs' based syscalls on x86_64 Removing CONFIG_SYSCALL_PTREGS from arch/x86/Kconfig and simply selecting ARCH_HAS_SYSCALL_WRAPPER unconditionally on x86-64 allows us to simplify several codepaths. Signed-off-by: Dominik Brodowski Acked-by: Linus Torvalds Cc: Al Viro Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180405095307.3730-7-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 6 +----- arch/x86/entry/common.c | 10 ++-------- arch/x86/entry/syscall_32.c | 6 +++--- arch/x86/entry/syscall_64.c | 5 ----- arch/x86/entry/vsyscall/vsyscall_64.c | 18 ------------------ arch/x86/include/asm/syscall.h | 4 ++-- arch/x86/include/asm/syscalls.h | 20 ++++---------------- 7 files changed, 12 insertions(+), 57 deletions(-) (limited to 'arch/x86/Kconfig') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7bbd6a174722..bcdd3e0e2ef5 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -29,6 +29,7 @@ config X86_64 select HAVE_ARCH_SOFT_DIRTY select MODULES_USE_ELF_RELA select X86_DEV_DMA_OPS + select ARCH_HAS_SYSCALL_WRAPPER # # Arch settings @@ -2954,8 +2955,3 @@ source "crypto/Kconfig" source "arch/x86/kvm/Kconfig" source "lib/Kconfig" - -config SYSCALL_PTREGS - def_bool y - depends on X86_64 - select ARCH_HAS_SYSCALL_WRAPPER diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 425f798b39e3..fbf6a6c3fd2d 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -284,13 +284,7 @@ __visible void do_syscall_64(unsigned long nr, struct pt_regs *regs) nr &= __SYSCALL_MASK; if (likely(nr < NR_syscalls)) { nr = array_index_nospec(nr, NR_syscalls); -#ifdef CONFIG_SYSCALL_PTREGS regs->ax = sys_call_table[nr](regs); -#else - regs->ax = sys_call_table[nr]( - regs->di, regs->si, regs->dx, - regs->r10, regs->r8, regs->r9); -#endif } syscall_return_slowpath(regs); @@ -325,7 +319,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) if (likely(nr < IA32_NR_syscalls)) { nr = array_index_nospec(nr, IA32_NR_syscalls); -#ifdef CONFIG_SYSCALL_PTREGS +#ifdef CONFIG_IA32_EMULATION regs->ax = ia32_sys_call_table[nr](regs); #else /* @@ -338,7 +332,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) (unsigned int)regs->bx, (unsigned int)regs->cx, (unsigned int)regs->dx, (unsigned int)regs->si, (unsigned int)regs->di, (unsigned int)regs->bp); -#endif /* CONFIG_SYSCALL_PTREGS */ +#endif /* CONFIG_IA32_EMULATION */ } syscall_return_slowpath(regs); diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c index 47060dd8efb1..aa3336a7cb15 100644 --- a/arch/x86/entry/syscall_32.c +++ b/arch/x86/entry/syscall_32.c @@ -7,17 +7,17 @@ #include #include -#ifdef CONFIG_SYSCALL_PTREGS +#ifdef CONFIG_IA32_EMULATION /* On X86_64, we use struct pt_regs * to pass parameters to syscalls */ #define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(const struct pt_regs *); /* this is a lie, but it does not hurt as sys_ni_syscall just returns -EINVAL */ extern asmlinkage long sys_ni_syscall(const struct pt_regs *); -#else /* CONFIG_SYSCALL_PTREGS */ +#else /* CONFIG_IA32_EMULATION */ #define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); -#endif /* CONFIG_SYSCALL_PTREGS */ +#endif /* CONFIG_IA32_EMULATION */ #include #undef __SYSCALL_I386 diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c index 6197850adf91..d5252bc1e380 100644 --- a/arch/x86/entry/syscall_64.c +++ b/arch/x86/entry/syscall_64.c @@ -7,14 +7,9 @@ #include #include -#ifdef CONFIG_SYSCALL_PTREGS /* this is a lie, but it does not hurt as sys_ni_syscall just returns -EINVAL */ extern asmlinkage long sys_ni_syscall(const struct pt_regs *); #define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(const struct pt_regs *); -#else /* CONFIG_SYSCALL_PTREGS */ -extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); -#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); -#endif /* CONFIG_SYSCALL_PTREGS */ #include #undef __SYSCALL_64 diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index 05eebbf9b989..20b3d4a88ee4 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -127,9 +127,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) int vsyscall_nr, syscall_nr, tmp; int prev_sig_on_uaccess_err; long ret; -#ifdef CONFIG_SYSCALL_PTREGS unsigned long orig_dx; -#endif /* * No point in checking CS -- the only way to get here is a user mode @@ -230,38 +228,22 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) ret = -EFAULT; switch (vsyscall_nr) { case 0: -#ifdef CONFIG_SYSCALL_PTREGS /* this decodes regs->di and regs->si on its own */ ret = sys_gettimeofday(regs); -#else - ret = sys_gettimeofday( - (struct timeval __user *)regs->di, - (struct timezone __user *)regs->si); -#endif /* CONFIG_SYSCALL_PTREGS */ break; case 1: -#ifdef CONFIG_SYSCALL_PTREGS /* this decodes regs->di on its own */ ret = sys_time(regs); -#else - ret = sys_time((time_t __user *)regs->di); -#endif /* CONFIG_SYSCALL_PTREGS */ break; case 2: -#ifdef CONFIG_SYSCALL_PTREGS /* while we could clobber regs->dx, we didn't in the past... */ orig_dx = regs->dx; regs->dx = 0; /* this decodes regs->di, regs->si and regs->dx on its own */ ret = sys_getcpu(regs); regs->dx = orig_dx; -#else - ret = sys_getcpu((unsigned __user *)regs->di, - (unsigned __user *)regs->si, - NULL); -#endif /* CONFIG_SYSCALL_PTREGS */ break; } diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index 17c62373a6f9..d653139857af 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -20,13 +20,13 @@ #include /* for TS_COMPAT */ #include -#ifdef CONFIG_SYSCALL_PTREGS +#ifdef CONFIG_X86_64 typedef asmlinkage long (*sys_call_ptr_t)(const struct pt_regs *); #else typedef asmlinkage long (*sys_call_ptr_t)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); -#endif /* CONFIG_SYSCALL_PTREGS */ +#endif /* CONFIG_X86_64 */ extern const sys_call_ptr_t sys_call_table[]; #if defined(CONFIG_X86_32) diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index e4ad93c05f02..d4d18d94695c 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -19,10 +19,10 @@ /* kernel/ioport.c */ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on); -#ifndef CONFIG_SYSCALL_PTREGS -/* - * If CONFIG_SYSCALL_PTREGS is enabled, a different syscall calling convention - * is used. Do not include these -- invalid -- prototypes then +#ifdef CONFIG_X86_32 +/* + * These definitions are only valid on pure 32-bit systems; x86-64 uses a + * different syscall calling convention */ asmlinkage long sys_ioperm(unsigned long, unsigned long, int); asmlinkage long sys_iopl(unsigned int); @@ -38,7 +38,6 @@ asmlinkage long sys_set_thread_area(struct user_desc __user *); asmlinkage long sys_get_thread_area(struct user_desc __user *); /* X86_32 only */ -#ifdef CONFIG_X86_32 /* kernel/signal.c */ asmlinkage long sys_sigreturn(void); @@ -48,16 +47,5 @@ struct vm86_struct; asmlinkage long sys_vm86old(struct vm86_struct __user *); asmlinkage long sys_vm86(unsigned long, unsigned long); -#else /* CONFIG_X86_32 */ - -/* X86_64 only */ -/* kernel/process_64.c */ -asmlinkage long sys_arch_prctl(int, unsigned long); - -/* kernel/sys_x86_64.c */ -asmlinkage long sys_mmap(unsigned long, unsigned long, unsigned long, - unsigned long, unsigned long, unsigned long); - #endif /* CONFIG_X86_32 */ -#endif /* CONFIG_SYSCALL_PTREGS */ #endif /* _ASM_X86_SYSCALLS_H */ -- cgit v1.2.3