From 784d5699eddc55878627da20d3fe0c8542e2f1a2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 11 Jan 2016 11:04:34 -0500 Subject: x86: move exports to actual definitions Signed-off-by: Al Viro --- arch/x86/include/asm/export.h | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 arch/x86/include/asm/export.h (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/export.h b/arch/x86/include/asm/export.h new file mode 100644 index 000000000000..138de56b13eb --- /dev/null +++ b/arch/x86/include/asm/export.h @@ -0,0 +1,4 @@ +#ifdef CONFIG_64BIT +#define KSYM_ALIGN 16 +#endif +#include -- cgit v1.2.3 From 799bc3c51b2b120ca6e59e702ede23fff2efaf43 Mon Sep 17 00:00:00 2001 From: Lance Richardson Date: Thu, 22 Sep 2016 10:03:57 -0400 Subject: percpu: eliminate two sparse warnings Fix two cases where a __percpu pointer cast drops __percpu. Signed-off-by: Lance Richardson Signed-off-by: Tejun Heo --- arch/x86/include/asm/percpu.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index e02e3f80d363..84f58de08c2b 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -521,7 +521,8 @@ do { \ static __always_inline bool x86_this_cpu_constant_test_bit(unsigned int nr, const unsigned long __percpu *addr) { - unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG; + unsigned long __percpu *a = + (unsigned long __percpu *)addr + nr / BITS_PER_LONG; #ifdef CONFIG_X86_64 return ((1UL << (nr % BITS_PER_LONG)) & raw_cpu_read_8(*a)) != 0; @@ -538,7 +539,7 @@ static inline bool x86_this_cpu_variable_test_bit(int nr, asm volatile("bt "__percpu_arg(2)",%1\n\t" CC_SET(c) : CC_OUT(c) (oldbit) - : "m" (*(unsigned long *)addr), "Ir" (nr)); + : "m" (*(unsigned long __percpu *)addr), "Ir" (nr)); return oldbit; } -- cgit v1.2.3 From c836eeda3e1e652d424bbbbb908f07eb7380448c Mon Sep 17 00:00:00 2001 From: "Longpeng(Mike)" Date: Fri, 14 Oct 2016 08:42:20 +0800 Subject: x86: Remove duplicate rtit status MSR macro The MSR_IA32_RTIT_STATUS is defined twice, so remove one. Signed-off-by: Longpeng(Mike) Acked-by: Borislav Petkov Cc: len.brown@intel.com Cc: peterz@infradead.org Cc: rafael.j.wysocki@intel.com Cc: alexander.shishkin@linux.intel.com Cc: ray.huang@amd.com Cc: Aravind.Gopalakrishnan@amd.com Cc: wu.wubin@huawei.com Cc: srinivas.pandruvada@linux.intel.com Cc: zhaoshenglong@huawei.com Cc: vladimir_zapolskiy@mentor.com Link: http://lkml.kernel.org/r/1476405740-80816-1-git-send-email-longpeng2@huawei.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/msr-index.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 56f4c6676b29..78f3760ca1f2 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -88,7 +88,6 @@ #define MSR_IA32_RTIT_CTL 0x00000570 #define MSR_IA32_RTIT_STATUS 0x00000571 -#define MSR_IA32_RTIT_STATUS 0x00000571 #define MSR_IA32_RTIT_ADDR0_A 0x00000580 #define MSR_IA32_RTIT_ADDR0_B 0x00000581 #define MSR_IA32_RTIT_ADDR1_A 0x00000582 -- cgit v1.2.3 From 0047f59834e5947d45f34f5f12eb330d158f700b Mon Sep 17 00:00:00 2001 From: Piotr Luc Date: Wed, 12 Oct 2016 20:05:20 +0200 Subject: x86/cpu/intel: Add Knights Mill to Intel family Add CPUID of Knights Mill (KNM) processor to Intel family list. Signed-off-by: Piotr Luc Reviewed-by: Dave Hansen Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20161012180520.30976-1-piotr.luc@intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/intel-family.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 9ae5ab80a497..34a46dc076d3 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -64,5 +64,6 @@ /* Xeon Phi */ #define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */ +#define INTEL_FAM6_XEON_PHI_KNM 0x85 /* Knights Mill */ #endif /* _ASM_X86_INTEL_FAMILY_H */ -- cgit v1.2.3 From 55a76b59b5fef408442e16121faa9eb00a65fd50 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 13 Oct 2016 16:26:15 -0500 Subject: locking/rwsem/x86: Add stack frame dependency for ____down_write() Arnd reported the following objtool warning: kernel/locking/rwsem.o: warning: objtool: down_write_killable()+0x16: call without frame pointer save/setup The warning means gcc placed the ____down_write() inline asm (and its call instruction) before the frame pointer setup in down_write_killable(), which breaks frame pointer convention and can result in incorrect stack traces. Force the stack frame to be created before the call instruction by listing the stack pointer as an output operand in the inline asm statement. Reported-by: Arnd Bergmann Signed-off-by: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1188b7015f04baf361e59de499ee2d7272c59dce.1476393828.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/rwsem.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h index 3d33a719f5c1..a34e0d4b957d 100644 --- a/arch/x86/include/asm/rwsem.h +++ b/arch/x86/include/asm/rwsem.h @@ -103,8 +103,10 @@ static inline bool __down_read_trylock(struct rw_semaphore *sem) ({ \ long tmp; \ struct rw_semaphore* ret; \ + register void *__sp asm(_ASM_SP); \ + \ asm volatile("# beginning down_write\n\t" \ - LOCK_PREFIX " xadd %1,(%3)\n\t" \ + LOCK_PREFIX " xadd %1,(%4)\n\t" \ /* adds 0xffff0001, returns the old value */ \ " test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t" \ /* was the active mask 0 before? */\ @@ -112,7 +114,7 @@ static inline bool __down_read_trylock(struct rw_semaphore *sem) " call " slow_path "\n" \ "1:\n" \ "# ending down_write" \ - : "+m" (sem->count), "=d" (tmp), "=a" (ret) \ + : "+m" (sem->count), "=d" (tmp), "=a" (ret), "+r" (__sp) \ : "a" (sem), "1" (RWSEM_ACTIVE_WRITE_BIAS) \ : "memory", "cc"); \ ret; \ -- cgit v1.2.3 From 8214899342981dbd49ae24aadbbd19e9e7830684 Mon Sep 17 00:00:00 2001 From: Piotr Luc Date: Tue, 18 Oct 2016 17:01:11 +0200 Subject: x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features AVX512_4VNNIW - Vector instructions for deep learning enhanced word variable precision. AVX512_4FMAPS - Vector instructions for deep learning floating-point single precision. These new instructions are to be used in future Intel Xeon & Xeon Phi processors. The bits 2&3 of CPUID[level:0x07, EDX] inform that new instructions are supported by a processor. The spec can be found in the Intel Software Developer Manual (SDM) or in the Instruction Set Extensions Programming Reference (ISE). Define new feature flags to enumerate the new instructions in /proc/cpuinfo accordingly to CPUID bits and add the required xsave extensions which are required for proper operation. Signed-off-by: Piotr Luc Cc: Denys Vlasenko Cc: Peter Zijlstra Cc: Brian Gerst Cc: Dave Hansen Cc: Borislav Petkov Cc: Andy Lutomirski Cc: Josh Poimboeuf Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20161018150111.29926-1-piotr.luc@intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/cpufeatures.h | 2 ++ arch/x86/kernel/cpu/scattered.c | 2 ++ arch/x86/kernel/fpu/xstate.c | 2 ++ tools/arch/x86/include/asm/cpufeatures.h | 2 ++ 4 files changed, 8 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 1188bc849ee3..a39629206864 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -194,6 +194,8 @@ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ +#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ +#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 8cb57df9398d..1db8dc490b66 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -32,6 +32,8 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_INTEL_PT, CR_EBX,25, 0x00000007, 0 }, + { X86_FEATURE_AVX512_4VNNIW, CR_EDX, 2, 0x00000007, 0 }, + { X86_FEATURE_AVX512_4FMAPS, CR_EDX, 3, 0x00000007, 0 }, { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 }, { X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 }, { X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 }, diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 124aa5c593f8..095ef7ddd6ae 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -74,6 +74,8 @@ void fpu__xstate_clear_all_cpu_caps(void) setup_clear_cpu_cap(X86_FEATURE_MPX); setup_clear_cpu_cap(X86_FEATURE_XGETBV1); setup_clear_cpu_cap(X86_FEATURE_PKU); + setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW); + setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS); } /* diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 1188bc849ee3..a39629206864 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -194,6 +194,8 @@ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ +#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ +#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ -- cgit v1.2.3 From c8061485a0d7569a865a3cc3c63347b0f42b3765 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 19 Oct 2016 19:28:11 +0100 Subject: sched/core, x86: Make struct thread_info arch specific again The following commit: c65eacbe290b ("sched/core: Allow putting thread_info into task_struct") ... made 'struct thread_info' a generic struct with only a single ::flags member, if CONFIG_THREAD_INFO_IN_TASK_STRUCT=y is selected. This change however seems to be quite x86 centric, since at least the generic preemption code (asm-generic/preempt.h) assumes that struct thread_info also has a preempt_count member, which apparently was not true for x86. We could add a bit more #ifdefs to solve this problem too, but it seems to be much simpler to make struct thread_info arch specific again. This also makes the conversion to THREAD_INFO_IN_TASK_STRUCT a bit easier for architectures that have a couple of arch specific stuff in their thread_info definition. The arch specific stuff _could_ be moved to thread_struct. However keeping them in thread_info makes it easier: accessing thread_info members is simple, since it is at the beginning of the task_struct, while the thread_struct is at the end. At least on s390 the offsets needed to access members of the thread_struct (with task_struct as base) are too large for various asm instructions. This is not a problem when keeping these members within thread_info. Signed-off-by: Heiko Carstens Signed-off-by: Mark Rutland Acked-by: Thomas Gleixner Cc: Andrew Morton Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Peter Zijlstra Cc: keescook@chromium.org Cc: linux-arch@vger.kernel.org Link: http://lkml.kernel.org/r/1476901693-8492-2-git-send-email-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/thread_info.h | 9 +++++++++ include/linux/thread_info.h | 11 ----------- 2 files changed, 9 insertions(+), 11 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 2aaca53c0974..ad6f5eb07a95 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -52,6 +52,15 @@ struct task_struct; #include #include +struct thread_info { + unsigned long flags; /* low level flags */ +}; + +#define INIT_THREAD_INFO(tsk) \ +{ \ + .flags = 0, \ +} + #define init_stack (init_thread_union.stack) #else /* !__ASSEMBLY__ */ diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 45f004e9cc59..2873baf5372a 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -13,17 +13,6 @@ struct timespec; struct compat_timespec; -#ifdef CONFIG_THREAD_INFO_IN_TASK -struct thread_info { - unsigned long flags; /* low level flags */ -}; - -#define INIT_THREAD_INFO(tsk) \ -{ \ - .flags = 0, \ -} -#endif - #ifdef CONFIG_THREAD_INFO_IN_TASK #define current_thread_info() ((struct thread_info *)current) #endif -- cgit v1.2.3 From 8ef4227615e158faa4ee85a1d6466782f7e22f2f Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 24 Oct 2016 15:27:59 +1000 Subject: x86/io: add interface to reserve io memtype for a resource range. (v1.1) A recent change to the mm code in: 87744ab3832b mm: fix cache mode tracking in vm_insert_mixed() started enforcing checking the memory type against the registered list for amixed pfn insertion mappings. It happens that the drm drivers for a number of gpus relied on this being broken. Currently the driver only inserted VRAM mappings into the tracking table when they came from the kernel, and userspace mappings never landed in the table. This led to a regression where all the mapping end up as UC instead of WC now. I've considered a number of solutions but since this needs to be fixed in fixes and not next, and some of the solutions were going to introduce overhead that hadn't been there before I didn't consider them viable at this stage. These mainly concerned hooking into the TTM io reserve APIs, but these API have a bunch of fast paths I didn't want to unwind to add this to. The solution I've decided on is to add a new API like the arch_phys_wc APIs (these would have worked but wc_del didn't take a range), and use them from the drivers to add a WC compatible mapping to the table for all VRAM on those GPUs. This means we can then create userspace mapping that won't get degraded to UC. v1.1: use CONFIG_X86_PAT + add some comments in io.h Cc: Toshi Kani Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Andy Lutomirski Cc: Denys Vlasenko Cc: Brian Gerst Cc: x86@kernel.org Cc: mcgrof@suse.com Cc: Dan Williams Acked-by: Ingo Molnar Reviewed-by: Thomas Gleixner Signed-off-by: Dave Airlie --- arch/x86/include/asm/io.h | 6 ++++++ arch/x86/mm/pat.c | 14 ++++++++++++++ include/linux/io.h | 22 ++++++++++++++++++++++ 3 files changed, 42 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index de25aad07853..d34bd370074b 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -351,4 +351,10 @@ extern void arch_phys_wc_del(int handle); #define arch_phys_wc_add arch_phys_wc_add #endif +#ifdef CONFIG_X86_PAT +extern int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size); +extern void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size); +#define arch_io_reserve_memtype_wc arch_io_reserve_memtype_wc +#endif + #endif /* _ASM_X86_IO_H */ diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 170cc4ff057b..83e701f160a9 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -730,6 +730,20 @@ void io_free_memtype(resource_size_t start, resource_size_t end) free_memtype(start, end); } +int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size) +{ + enum page_cache_mode type = _PAGE_CACHE_MODE_WC; + + return io_reserve_memtype(start, start + size, &type); +} +EXPORT_SYMBOL(arch_io_reserve_memtype_wc); + +void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size) +{ + io_free_memtype(start, start + size); +} +EXPORT_SYMBOL(arch_io_free_memtype_wc); + pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) { diff --git a/include/linux/io.h b/include/linux/io.h index e2c8419278c1..82ef36eac8a1 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -141,4 +141,26 @@ enum { void *memremap(resource_size_t offset, size_t size, unsigned long flags); void memunmap(void *addr); +/* + * On x86 PAT systems we have memory tracking that keeps track of + * the allowed mappings on memory ranges. This tracking works for + * all the in-kernel mapping APIs (ioremap*), but where the user + * wishes to map a range from a physical device into user memory + * the tracking won't be updated. This API is to be used by + * drivers which remap physical device pages into userspace, + * and wants to make sure they are mapped WC and not UC. + */ +#ifndef arch_io_reserve_memtype_wc +static inline int arch_io_reserve_memtype_wc(resource_size_t base, + resource_size_t size) +{ + return 0; +} + +static inline void arch_io_free_memtype_wc(resource_size_t base, + resource_size_t size) +{ +} +#endif + #endif /* _LINUX_IO_H */ -- cgit v1.2.3