diff options
Diffstat (limited to 'arch/x86')
41 files changed, 608 insertions, 455 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f73071742975..25d2c6f7325e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -43,6 +43,7 @@ config X86 select HAVE_DMA_ATTRS select HAVE_DMA_CONTIGUOUS if !SWIOTLB select HAVE_KRETPROBES + select GENERIC_EARLY_IOREMAP select HAVE_OPTPROBES select HAVE_KPROBES_ON_FTRACE select HAVE_FTRACE_MCOUNT_RECORD @@ -128,6 +129,7 @@ config X86 select HAVE_IRQ_EXIT_ON_IRQ_STACK if X86_64 select HAVE_CC_STACKPROTECTOR select GENERIC_CPU_AUTOPROBE + select HAVE_ARCH_AUDITSYSCALL config INSTRUCTION_DECODER def_bool y diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 1e6146137f8e..4703a6c4b8e3 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -112,7 +112,7 @@ __file_size64(void *__fh, efi_char16_t *filename_16, efi_file_info_t *info; efi_status_t status; efi_guid_t info_guid = EFI_FILE_INFO_ID; - u32 info_sz; + u64 info_sz; status = efi_early->call((unsigned long)fh->open, fh, &h, filename_16, EFI_FILE_MODE_READ, (u64)0); @@ -167,31 +167,31 @@ efi_file_size(efi_system_table_t *sys_table, void *__fh, } static inline efi_status_t -efi_file_read(void *__fh, void *handle, unsigned long *size, void *addr) +efi_file_read(void *handle, unsigned long *size, void *addr) { unsigned long func; if (efi_early->is64) { - efi_file_handle_64_t *fh = __fh; + efi_file_handle_64_t *fh = handle; func = (unsigned long)fh->read; return efi_early->call(func, handle, size, addr); } else { - efi_file_handle_32_t *fh = __fh; + efi_file_handle_32_t *fh = handle; func = (unsigned long)fh->read; return efi_early->call(func, handle, size, addr); } } -static inline efi_status_t efi_file_close(void *__fh, void *handle) +static inline efi_status_t efi_file_close(void *handle) { if (efi_early->is64) { - efi_file_handle_64_t *fh = __fh; + efi_file_handle_64_t *fh = handle; return efi_early->call((unsigned long)fh->close, handle); } else { - efi_file_handle_32_t *fh = __fh; + efi_file_handle_32_t *fh = handle; return efi_early->call((unsigned long)fh->close, handle); } @@ -1016,6 +1016,9 @@ void setup_graphics(struct boot_params *boot_params) * Because the x86 boot code expects to be passed a boot_params we * need to create one ourselves (usually the bootloader would create * one for us). + * + * The caller is responsible for filling out ->code32_start in the + * returned boot_params. */ struct boot_params *make_boot_params(struct efi_config *c) { @@ -1081,8 +1084,6 @@ struct boot_params *make_boot_params(struct efi_config *c) hdr->vid_mode = 0xffff; hdr->boot_flag = 0xAA55; - hdr->code32_start = (__u64)(unsigned long)image->image_base; - hdr->type_of_loader = 0x21; /* Convert unicode cmdline to ascii */ diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index de9d4200d305..cbed1407a5cd 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -59,6 +59,7 @@ ENTRY(efi_pe_entry) call make_boot_params cmpl $0, %eax je fail + movl %esi, BP_code32_start(%eax) popl %ecx pushl %eax pushl %ecx @@ -90,12 +91,7 @@ fail: hlt jmp fail 2: - call 3f -3: - popl %eax - subl $3b, %eax - subl BP_pref_address(%esi), %eax - add BP_code32_start(%esi), %eax + movl BP_code32_start(%esi), %eax leal preferred_addr(%eax), %eax jmp *%eax diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 57e58a5fa210..0d558ee899ae 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -261,6 +261,8 @@ ENTRY(efi_pe_entry) cmpq $0,%rax je fail mov %rax, %rsi + leaq startup_32(%rip), %rax + movl %eax, BP_code32_start(%rsi) jmp 2f /* Skip the relocation */ handover_entry: @@ -284,12 +286,7 @@ fail: hlt jmp fail 2: - call 3f -3: - popq %rax - subq $3b, %rax - subq BP_pref_address(%rsi), %rax - add BP_code32_start(%esi), %eax + movl BP_code32_start(%esi), %eax leaq preferred_addr(%rax), %rax jmp *%rax diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 4acddc43ee0c..3ca9762e1649 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -5,5 +5,6 @@ genhdr-y += unistd_64.h genhdr-y += unistd_x32.h generic-y += clkdev.h +generic-y += early_ioremap.h generic-y += cputime.h generic-y += mcs_spinlock.h diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h index e6a92455740e..69f1366f1aa3 100644 --- a/arch/x86/include/asm/archrandom.h +++ b/arch/x86/include/asm/archrandom.h @@ -1,7 +1,7 @@ /* * This file is part of the Linux kernel. * - * Copyright (c) 2011, Intel Corporation + * Copyright (c) 2011-2014, Intel Corporation * Authors: Fenghua Yu <fenghua.yu@intel.com>, * H. Peter Anvin <hpa@linux.intel.com> * @@ -31,10 +31,13 @@ #define RDRAND_RETRY_LOOPS 10 #define RDRAND_INT ".byte 0x0f,0xc7,0xf0" +#define RDSEED_INT ".byte 0x0f,0xc7,0xf8" #ifdef CONFIG_X86_64 # define RDRAND_LONG ".byte 0x48,0x0f,0xc7,0xf0" +# define RDSEED_LONG ".byte 0x48,0x0f,0xc7,0xf8" #else # define RDRAND_LONG RDRAND_INT +# define RDSEED_LONG RDSEED_INT #endif #ifdef CONFIG_ARCH_RANDOM @@ -53,6 +56,16 @@ static inline int rdrand_long(unsigned long *v) return ok; } +/* A single attempt at RDSEED */ +static inline bool rdseed_long(unsigned long *v) +{ + unsigned char ok; + asm volatile(RDSEED_LONG "\n\t" + "setc %0" + : "=qm" (ok), "=a" (*v)); + return ok; +} + #define GET_RANDOM(name, type, rdrand, nop) \ static inline int name(type *v) \ { \ @@ -70,18 +83,40 @@ static inline int name(type *v) \ return ok; \ } +#define GET_SEED(name, type, rdseed, nop) \ +static inline int name(type *v) \ +{ \ + unsigned char ok; \ + alternative_io("movb $0, %0\n\t" \ + nop, \ + rdseed "\n\t" \ + "setc %0", \ + X86_FEATURE_RDSEED, \ + ASM_OUTPUT2("=q" (ok), "=a" (*v))); \ + return ok; \ +} + #ifdef CONFIG_X86_64 GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP5); GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP4); +GET_SEED(arch_get_random_seed_long, unsigned long, RDSEED_LONG, ASM_NOP5); +GET_SEED(arch_get_random_seed_int, unsigned int, RDSEED_INT, ASM_NOP4); + #else GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP3); GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP3); +GET_SEED(arch_get_random_seed_long, unsigned long, RDSEED_LONG, ASM_NOP4); +GET_SEED(arch_get_random_seed_int, unsigned int, RDSEED_INT, ASM_NOP4); + #endif /* CONFIG_X86_64 */ +#define arch_has_random() static_cpu_has(X86_FEATURE_RDRAND) +#define arch_has_random_seed() static_cpu_has(X86_FEATURE_RDSEED) + #else static inline int rdrand_long(unsigned long *v) @@ -89,6 +124,11 @@ static inline int rdrand_long(unsigned long *v) return 0; } +static inline bool rdseed_long(unsigned long *v) +{ + return 0; +} + #endif /* CONFIG_ARCH_RANDOM */ extern void x86_init_rdrand(struct cpuinfo_x86 *c); diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index 2f03ff018d36..ba38ebbaced3 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -1,7 +1,6 @@ #ifndef _ASM_X86_BUG_H #define _ASM_X86_BUG_H -#ifdef CONFIG_BUG #define HAVE_ARCH_BUG #ifdef CONFIG_DEBUG_BUGVERBOSE @@ -33,8 +32,6 @@ do { \ } while (0) #endif -#endif /* !CONFIG_BUG */ - #include <asm-generic/bug.h> #endif /* _ASM_X86_BUG_H */ diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 8dcd35c4c787..43f482a0db37 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -163,5 +163,11 @@ static inline void __set_fixmap(enum fixed_addresses idx, #include <asm-generic/fixmap.h> +#define __late_set_fixmap(idx, phys, flags) __set_fixmap(idx, phys, flags) +#define __late_clear_fixmap(idx) __set_fixmap(idx, 0, __pgprot(0)) + +void __early_set_fixmap(enum fixed_addresses idx, + phys_addr_t phys, pgprot_t flags); + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_FIXMAP_H */ diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 91d9c69a629e..b8237d8a1e0c 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -39,6 +39,7 @@ #include <linux/string.h> #include <linux/compiler.h> #include <asm/page.h> +#include <asm/early_ioremap.h> #define build_mmio_read(name, size, type, reg, barrier) \ static inline type name(const volatile void __iomem *addr) \ @@ -316,19 +317,6 @@ extern int ioremap_change_attr(unsigned long vaddr, unsigned long size, unsigned long prot_val); extern void __iomem *ioremap_wc(resource_size_t offset, unsigned long size); -/* - * early_ioremap() and early_iounmap() are for temporary early boot-time - * mappings, before the real ioremap() is functional. - * A boot-time mapping is currently limited to at most 16 pages. - */ -extern void early_ioremap_init(void); -extern void early_ioremap_reset(void); -extern void __iomem *early_ioremap(resource_size_t phys_addr, - unsigned long size); -extern void __iomem *early_memremap(resource_size_t phys_addr, - unsigned long size); -extern void early_iounmap(void __iomem *addr, unsigned long size); -extern void fixup_early_ioremap(void); extern bool is_early_ioremap_ptep(pte_t *ptep); #ifdef CONFIG_XEN diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 94220d14d5cc..851bcdc5db04 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -52,7 +52,7 @@ * Compared to the generic __my_cpu_offset version, the following * saves one instruction and avoids clobbering a temp register. */ -#define __this_cpu_ptr(ptr) \ +#define raw_cpu_ptr(ptr) \ ({ \ unsigned long tcp_ptr__; \ __verify_pcpu_ptr(ptr); \ @@ -362,25 +362,25 @@ do { \ */ #define this_cpu_read_stable(var) percpu_from_op("mov", var, "p" (&(var))) -#define __this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) -#define __this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) -#define __this_cpu_read_4(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) - -#define __this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) -#define __this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) -#define __this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) -#define __this_cpu_add_1(pcp, val) percpu_add_op((pcp), val) -#define __this_cpu_add_2(pcp, val) percpu_add_op((pcp), val) -#define __this_cpu_add_4(pcp, val) percpu_add_op((pcp), val) -#define __this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) -#define __this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) -#define __this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) -#define __this_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val) -#define __this_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val) -#define __this_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val) -#define __this_cpu_xchg_1(pcp, val) percpu_xchg_op(pcp, val) -#define __this_cpu_xchg_2(pcp, val) percpu_xchg_op(pcp, val) -#define __this_cpu_xchg_4(pcp, val) percpu_xchg_op(pcp, val) +#define raw_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) +#define raw_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) +#define raw_cpu_read_4(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) + +#define raw_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) +#define raw_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) +#define raw_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) +#define raw_cpu_add_1(pcp, val) percpu_add_op((pcp), val) +#define raw_cpu_add_2(pcp, val) percpu_add_op((pcp), val) +#define raw_cpu_add_4(pcp, val) percpu_add_op((pcp), val) +#define raw_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) +#define raw_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) +#define raw_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) +#define raw_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val) +#define raw_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val) +#define raw_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val) +#define raw_cpu_xchg_1(pcp, val) percpu_xchg_op(pcp, val) +#define raw_cpu_xchg_2(pcp, val) percpu_xchg_op(pcp, val) +#define raw_cpu_xchg_4(pcp, val) percpu_xchg_op(pcp, val) #define this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) #define this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) @@ -401,16 +401,16 @@ do { \ #define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval) #define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval) -#define __this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) -#define __this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) -#define __this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) -#define __this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) -#define __this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) -#define __this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) +#define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) +#define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) +#define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) +#define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) +#define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) +#define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) -#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) -#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) -#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) +#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) +#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) +#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) #define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) #define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) #define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) @@ -427,7 +427,7 @@ do { \ __ret; \ }) -#define __this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double +#define raw_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double #define this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double #endif /* CONFIG_X86_CMPXCHG64 */ @@ -436,22 +436,22 @@ do { \ * 32 bit must fall back to generic operations. */ #ifdef CONFIG_X86_64 -#define __this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) -#define __this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) -#define __this_cpu_add_8(pcp, val) percpu_add_op((pcp), val) -#define __this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) -#define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) -#define __this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) -#define __this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) -#define __this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) - -#define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) -#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) -#define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val) -#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) -#define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) -#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) -#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) +#define raw_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) +#define raw_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) +#define raw_cpu_add_8(pcp, val) percpu_add_op((pcp), val) +#define raw_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) +#define raw_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) +#define raw_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) +#define raw_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) +#define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) + +#define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) +#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) +#define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val) +#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) +#define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) +#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) +#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) #define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) /* @@ -474,7 +474,7 @@ do { \ __ret; \ }) -#define __this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double +#define raw_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double #define this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double #endif @@ -495,9 +495,9 @@ static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr, unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG; #ifdef CONFIG_X86_64 - return ((1UL << (nr % BITS_PER_LONG)) & __this_cpu_read_8(*a)) != 0; + return ((1UL << (nr % BITS_PER_LONG)) & raw_cpu_read_8(*a)) != 0; #else - return ((1UL << (nr % BITS_PER_LONG)) & __this_cpu_read_4(*a)) != 0; + return ((1UL << (nr % BITS_PER_LONG)) & raw_cpu_read_4(*a)) != 0; #endif } diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index c8b051933b1b..7024c12f7bfe 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -19,12 +19,12 @@ DECLARE_PER_CPU(int, __preempt_count); */ static __always_inline int preempt_count(void) { - return __this_cpu_read_4(__preempt_count) & ~PREEMPT_NEED_RESCHED; + return raw_cpu_read_4(__preempt_count) & ~PREEMPT_NEED_RESCHED; } static __always_inline void preempt_count_set(int pc) { - __this_cpu_write_4(__preempt_count, pc); + raw_cpu_write_4(__preempt_count, pc); } /* @@ -53,17 +53,17 @@ static __always_inline void preempt_count_set(int pc) static __always_inline void set_preempt_need_resched(void) { - __this_cpu_and_4(__preempt_count, ~PREEMPT_NEED_RESCHED); + raw_cpu_and_4(__preempt_count, ~PREEMPT_NEED_RESCHED); } static __always_inline void clear_preempt_need_resched(void) { - __this_cpu_or_4(__preempt_count, PREEMPT_NEED_RESCHED); + raw_cpu_or_4(__preempt_count, PREEMPT_NEED_RESCHED); } static __always_inline bool test_preempt_need_resched(void) { - return !(__this_cpu_read_4(__preempt_count) & PREEMPT_NEED_RESCHED); + return !(raw_cpu_read_4(__preempt_count) & PREEMPT_NEED_RESCHED); } /* @@ -72,12 +72,12 @@ static __always_inline bool test_preempt_need_resched(void) static __always_inline void __preempt_count_add(int val) { - __this_cpu_add_4(__preempt_count, val); + raw_cpu_add_4(__preempt_count, val); } static __always_inline void __preempt_count_sub(int val) { - __this_cpu_add_4(__preempt_count, -val); + raw_cpu_add_4(__preempt_count, -val); } /* @@ -95,7 +95,7 @@ static __always_inline bool __preempt_count_dec_and_test(void) */ static __always_inline bool should_resched(void) { - return unlikely(!__this_cpu_read_4(__preempt_count)); + return unlikely(!raw_cpu_read_4(__preempt_count)); } #ifdef CONFIG_PREEMPT diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index aea284b41312..d6a756ae04c8 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -13,7 +13,7 @@ #ifndef _ASM_X86_SYSCALL_H #define _ASM_X86_SYSCALL_H -#include <linux/audit.h> +#include <uapi/linux/audit.h> #include <linux/sched.h> #include <linux/err.h> #include <asm/asm-offsets.h> /* For NR_syscalls */ @@ -91,8 +91,7 @@ static inline void syscall_set_arguments(struct task_struct *task, memcpy(®s->bx + i, args, n * sizeof(args[0])); } -static inline int syscall_get_arch(struct task_struct *task, - struct pt_regs *regs) +static inline int syscall_get_arch(void) { return AUDIT_ARCH_I386; } @@ -221,8 +220,7 @@ static inline void syscall_set_arguments(struct task_struct *task, } } -static inline int syscall_get_arch(struct task_struct *task, - struct pt_regs *regs) +static inline int syscall_get_arch(void) { #ifdef CONFIG_IA32_EMULATION /* @@ -234,7 +232,7 @@ static inline int syscall_get_arch(struct task_struct *task, * * x32 tasks should be considered AUDIT_ARCH_X86_64. */ - if (task_thread_info(task)->status & TS_COMPAT) + if (task_thread_info(current)->status & TS_COMPAT) return AUDIT_ARCH_I386; #endif /* Both x32 and x86_64 are considered "64-bit". */ diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 3e276eb23d1b..c949923a5668 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -49,10 +49,17 @@ extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); extern unsigned long set_phys_range_identity(unsigned long pfn_s, unsigned long pfn_e); +extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, + struct gnttab_map_grant_ref *kmap_ops, + struct page **pages, unsigned int count); extern int m2p_add_override(unsigned long mfn, struct page *page, struct gnttab_map_grant_ref *kmap_op); +extern int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, + struct gnttab_map_grant_ref *kmap_ops, + struct page **pages, unsigned int count); extern int m2p_remove_override(struct page *page, - struct gnttab_map_grant_ref *kmap_op); + struct gnttab_map_grant_ref *kmap_op, + unsigned long mfn); extern struct page *m2p_find_override(unsigned long mfn); extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); @@ -121,7 +128,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) pfn = m2p_find_override_pfn(mfn, ~0); } - /* + /* * pfn is ~0 if there are no entries in the m2p for mfn or if the * entry doesn't map back to the mfn and m2p_override doesn't have a * valid entry for it. diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index e69182fd01cf..4b28159e0421 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -87,7 +87,9 @@ static long acpi_processor_ffh_cstate_probe_cpu(void *_cx) num_cstate_subtype = edx_part & MWAIT_SUBSTATE_MASK; retval = 0; - if (num_cstate_subtype < (cx->address & MWAIT_SUBSTATE_MASK)) { + /* If the HW does not support any sub-states in this C-state */ + if (num_cstate_subtype == 0) { + pr_warn(FW_BUG "ACPI MWAIT C-state 0x%x not supported by HW (0x%x)\n", cx->address, edx_part); retval = -1; goto out; } diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 481ae38f6a44..ad28db7e6bde 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1996,7 +1996,8 @@ static inline void __smp_error_interrupt(struct pt_regs *regs) }; /* First tickle the hardware, only then report what went on. -- REW */ - apic_write(APIC_ESR, 0); + if (lapic_get_maxlvt() > 3) /* Due to the Pentium erratum 3AP. */ + apic_write(APIC_ESR, 0); v = apic_read(APIC_ESR); ack_APIC_irq(); atomic_inc(&irq_err_count); diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 0641113e2965..a952e9c85b6f 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -1225,21 +1225,24 @@ static struct notifier_block cacheinfo_cpu_notifier = { static int __init cache_sysfs_init(void) { - int i; + int i, err = 0; if (num_cache_leaves == 0) return 0; + cpu_notifier_register_begin(); for_each_online_cpu(i) { - int err; struct device *dev = get_cpu_device(i); err = cache_add_dev(dev); if (err) - return err; + goto out; } - register_hotcpu_notifier(&cacheinfo_cpu_notifier); - return 0; + __register_hotcpu_notifier(&cacheinfo_cpu_notifier); + +out: + cpu_notifier_register_done(); + return err; } device_initcall(cache_sysfs_init); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 4d5419b249da..eeee23ff75ef 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -89,6 +89,9 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait); static DEFINE_PER_CPU(struct mce, mces_seen); static int cpu_missing; +/* CMCI storm detection filter */ +static DEFINE_PER_CPU(unsigned long, mce_polled_error); + /* * MCA banks polled by the period polling timer for corrected events. * With Intel CMCI, this only has MCA banks which do not support CMCI (if any). @@ -595,6 +598,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) { struct mce m; int i; + unsigned long *v; this_cpu_inc(mce_poll_count); @@ -614,6 +618,8 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) if (!(m.status & MCI_STATUS_VAL)) continue; + v = &get_cpu_var(mce_polled_error); + set_bit(0, v); /* * Uncorrected or signalled events are handled by the exception * handler when it is enabled, so don't process those here. @@ -1278,10 +1284,18 @@ static unsigned long mce_adjust_timer_default(unsigned long interval) static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default; +static int cmc_error_seen(void) +{ + unsigned long *v = &__get_cpu_var(mce_polled_error); + + return test_and_clear_bit(0, v); +} + static void mce_timer_fn(unsigned long data) { struct timer_list *t = &__get_cpu_var(mce_timer); unsigned long iv; + int notify; WARN_ON(smp_processor_id() != data); @@ -1296,7 +1310,9 @@ static void mce_timer_fn(unsigned long data) * polling interval, otherwise increase the polling interval. */ iv = __this_cpu_read(mce_next_interval); - if (mce_notify_irq()) { + notify = mce_notify_irq(); + notify |= cmc_error_seen(); + if (notify) { iv = max(iv / 2, (unsigned long) HZ/100); } else { iv = min(iv * 2, round_jiffies_relative(check_interval * HZ)); @@ -2434,14 +2450,18 @@ static __init int mcheck_init_device(void) if (err) return err; + cpu_notifier_register_begin(); for_each_online_cpu(i) { err = mce_device_create(i); - if (err) + if (err) { + cpu_notifier_register_done(); return err; + } } register_syscore_ops(&mce_syscore_ops); - register_hotcpu_notifier(&mce_cpu_notifier); + __register_hotcpu_notifier(&mce_cpu_notifier); + cpu_notifier_register_done(); /* register character device /dev/mcelog */ misc_register(&mce_chrdev_device); diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index fb6156fee6f7..3bdb95ae8c43 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -9,6 +9,7 @@ #include <linux/interrupt.h> #include <linux/percpu.h> #include <linux/sched.h> +#include <linux/cpumask.h> #include <asm/apic.h> #include <asm/processor.h> #include <asm/msr.h> @@ -137,6 +138,22 @@ unsigned long mce_intel_adjust_timer(unsigned long interval) } } +static void cmci_storm_disable_banks(void) +{ + unsigned long flags, *owned; + int bank; + u64 val; + + raw_spin_lock_irqsave(&cmci_discover_lock, flags); + owned = __get_cpu_var(mce_banks_owned); + for_each_set_bit(bank, owned, MAX_NR_BANKS) { + rdmsrl(MSR_IA32_MCx_CTL2(bank), val); + val &= ~MCI_CTL2_CMCI_EN; + wrmsrl(MSR_IA32_MCx_CTL2(bank), val); + } + raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); +} + static bool cmci_storm_detect(void) { unsigned int cnt = __this_cpu_read(cmci_storm_cnt); @@ -158,7 +175,7 @@ static bool cmci_storm_detect(void) if (cnt <= CMCI_STORM_THRESHOLD) return false; - cmci_clear(); + cmci_storm_disable_banks(); __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE); r = atomic_add_return(1, &cmci_storm_on_cpus); mce_timer_kick(CMCI_POLL_INTERVAL); diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 3eec7de76efb..d921b7ee6595 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -271,9 +271,6 @@ static void thermal_throttle_remove_dev(struct device *dev) sysfs_remove_group(&dev->kobj, &thermal_attr_group); } -/* Mutex protecting device creation against CPU hotplug: */ -static DEFINE_MUTEX(therm_cpu_lock); - /* Get notified when a cpu comes on/off. Be hotplug friendly. */ static int thermal_throttle_cpu_callback(struct notifier_block *nfb, @@ -289,18 +286,14 @@ thermal_throttle_cpu_callback(struct notifier_block *nfb, switch (action) { case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: - mutex_lock(&therm_cpu_lock); err = thermal_throttle_add_dev(dev, cpu); - mutex_unlock(&therm_cpu_lock); WARN_ON(err); break; case CPU_UP_CANCELED: case CPU_UP_CANCELED_FROZEN: case CPU_DEAD: case CPU_DEAD_FROZEN: - mutex_lock(&therm_cpu_lock); thermal_throttle_remove_dev(dev); - mutex_unlock(&therm_cpu_lock); break; } return notifier_from_errno(err); @@ -319,19 +312,16 @@ static __init int thermal_throttle_init_device(void) if (!atomic_read(&therm_throt_en)) return 0; - register_hotcpu_notifier(&thermal_throttle_cpu_notifier); + cpu_notifier_register_begin(); -#ifdef CONFIG_HOTPLUG_CPU - mutex_lock(&therm_cpu_lock); -#endif /* connect live CPUs to sysfs */ for_each_online_cpu(cpu) { err = thermal_throttle_add_dev(get_cpu_device(cpu), cpu); WARN_ON(err); } -#ifdef CONFIG_HOTPLUG_CPU - mutex_unlock(&therm_cpu_lock); -#endif + + __register_hotcpu_notifier(&thermal_throttle_cpu_notifier); + cpu_notifier_register_done(); return 0; } diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c index 4b8e4d3cd6ea..4c36bbe3173a 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c +++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c @@ -926,13 +926,13 @@ static __init int amd_ibs_init(void) goto out; perf_ibs_pm_init(); - get_online_cpus(); + cpu_notifier_register_begin(); ibs_caps = caps; /* make ibs_caps visible to other cpus: */ smp_mb(); - perf_cpu_notifier(perf_ibs_cpu_notifier); smp_call_function(setup_APIC_ibs, NULL, 1); - put_online_cpus(); + __perf_cpu_notifier(perf_ibs_cpu_notifier); + cpu_notifier_register_done(); ret = perf_event_ibs_init(); out: diff --git a/arch/x86/kernel/cpu/perf_event_amd_uncore.c b/arch/x86/kernel/cpu/perf_event_amd_uncore.c index 754291adec33..3bbdf4cd38b9 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_amd_uncore.c @@ -531,15 +531,16 @@ static int __init amd_uncore_init(void) if (ret) return -ENODEV; - get_online_cpus(); + cpu_notifier_register_begin(); + /* init cpus already online before registering for hotplug notifier */ for_each_online_cpu(cpu) { amd_uncore_cpu_up_prepare(cpu); smp_call_function_single(cpu, init_cpu_already_online, NULL, 1); } - register_cpu_notifier(&amd_uncore_cpu_notifier_block); - put_online_cpus(); + __register_cpu_notifier(&amd_uncore_cpu_notifier_block); + cpu_notifier_register_done(); return 0; } diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c index 5ad35ad94d0f..059218ed5208 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c +++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c @@ -646,19 +646,20 @@ static int __init rapl_pmu_init(void) /* unsupported */ return 0; } - get_online_cpus(); + + cpu_notifier_register_begin(); for_each_online_cpu(cpu) { rapl_cpu_prepare(cpu); rapl_cpu_init(cpu); } - perf_cpu_notifier(rapl_cpu_notifier); + __perf_cpu_notifier(rapl_cpu_notifier); ret = perf_pmu_register(&rapl_pmu_class, "power", -1); if (WARN_ON(ret)) { pr_info("RAPL PMU detected, registration failed (%d), RAPL PMU disabled\n", ret); - put_online_cpus(); + cpu_notifier_register_done(); return -1; } @@ -672,7 +673,7 @@ static int __init rapl_pmu_init(void) hweight32(rapl_cntr_mask), ktime_to_ms(pmu->timer_interval)); - put_online_cpus(); + cpu_notifier_register_done(); return 0; } diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index bd2253d40cff..65bbbea38b9c 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -4244,7 +4244,7 @@ static void __init uncore_cpumask_init(void) if (!cpumask_empty(&uncore_cpu_mask)) return; - get_online_cpus(); + cpu_notifier_register_begin(); for_each_online_cpu(cpu) { int i, phys_id = topology_physical_package_id(cpu); @@ -4263,9 +4263,9 @@ static void __init uncore_cpumask_init(void) } on_each_cpu(uncore_cpu_setup, NULL, 1); - register_cpu_notifier(&uncore_cpu_nb); + __register_cpu_notifier(&uncore_cpu_nb); - put_online_cpus(); + cpu_notifier_register_done(); } diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 7d9481c743f8..3225ae6c5180 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -198,14 +198,15 @@ static int __init cpuid_init(void) goto out_chrdev; } cpuid_class->devnode = cpuid_devnode; - get_online_cpus(); + + cpu_notifier_register_begin(); for_each_online_cpu(i) { err = cpuid_device_create(i); if (err != 0) goto out_class; } - register_hotcpu_notifier(&cpuid_class_cpu_notifier); - put_online_cpus(); + __register_hotcpu_notifier(&cpuid_class_cpu_notifier); + cpu_notifier_register_done(); err = 0; goto out; @@ -215,7 +216,7 @@ out_class: for_each_online_cpu(i) { cpuid_device_destroy(i); } - put_online_cpus(); + cpu_notifier_register_done(); class_destroy(cpuid_class); out_chrdev: __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); @@ -227,13 +228,13 @@ static void __exit cpuid_exit(void) { int cpu = 0; - get_online_cpus(); + cpu_notifier_register_begin(); for_each_online_cpu(cpu) cpuid_device_destroy(cpu); class_destroy(cpuid_class); __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); - unregister_hotcpu_notifier(&cpuid_class_cpu_notifier); - put_online_cpus(); + __unregister_hotcpu_notifier(&cpuid_class_cpu_notifier); + cpu_notifier_register_done(); } module_init(cpuid_init); diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 6d7d5a1260a6..b0cc3809723d 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -225,7 +225,7 @@ static void __init intel_remapping_check(int num, int slot, int func) * * And yes, so far on current devices the base addr is always under 4G. */ -static u32 __init intel_stolen_base(int num, int slot, int func) +static u32 __init intel_stolen_base(int num, int slot, int func, size_t stolen_size) { u32 base; @@ -244,6 +244,114 @@ static u32 __init intel_stolen_base(int num, int slot, int func) #define MB(x) (KB (KB (x))) #define GB(x) (MB (KB (x))) +static size_t __init i830_tseg_size(void) +{ + u8 tmp = read_pci_config_byte(0, 0, 0, I830_ESMRAMC); + + if (!(tmp & TSEG_ENABLE)) + return 0; + + if (tmp & I830_TSEG_SIZE_1M) + return MB(1); + else + return KB(512); +} + +static size_t __init i845_tseg_size(void) +{ + u8 tmp = read_pci_config_byte(0, 0, 0, I845_ESMRAMC); + + if (!(tmp & TSEG_ENABLE)) + return 0; + + switch (tmp & I845_TSEG_SIZE_MASK) { + case I845_TSEG_SIZE_512K: + return KB(512); + case I845_TSEG_SIZE_1M: + return MB(1); + default: + WARN_ON(1); + return 0; + } +} + +static size_t __init i85x_tseg_size(void) +{ + u8 tmp = read_pci_config_byte(0, 0, 0, I85X_ESMRAMC); + + if (!(tmp & TSEG_ENABLE)) + return 0; + + return MB(1); +} + +static size_t __init i830_mem_size(void) +{ + return read_pci_config_byte(0, 0, 0, I830_DRB3) * MB(32); +} + +static size_t __init i85x_mem_size(void) +{ + return read_pci_config_byte(0, 0, 1, I85X_DRB3) * MB(32); +} + +/* + * On 830/845/85x the stolen memory base isn't available in any + * register. We need to calculate it as TOM-TSEG_SIZE-stolen_size. + */ +static u32 __init i830_stolen_base(int num, int slot, int func, size_t stolen_size) +{ + return i830_mem_size() - i830_tseg_size() - stolen_size; +} + +static u32 __init i845_stolen_base(int num, int slot, int func, size_t stolen_size) +{ + return i830_mem_size() - i845_tseg_size() - stolen_size; +} + +static u32 __init i85x_stolen_base(int num, int slot, int func, size_t stolen_size) +{ + return i85x_mem_size() - i85x_tseg_size() - stolen_size; +} + +static u32 __init i865_stolen_base(int num, int slot, int func, size_t stolen_size) +{ + /* + * FIXME is the graphics stolen memory region + * always at TOUD? Ie. is it always the last + * one to be allocated by the BIOS? + */ + return read_pci_config_16(0, 0, 0, I865_TOUD) << 16; +} + +static size_t __init i830_stolen_size(int num, int slot, int func) +{ + size_t stolen_size; + u16 gmch_ctrl; + + gmch_ctrl = read_pci_config_16(0, 0, 0, I830_GMCH_CTRL); + + switch (gmch_ctrl & I830_GMCH_GMS_MASK) { + case I830_GMCH_GMS_STOLEN_512: + stolen_size = KB(512); + break; + case I830_GMCH_GMS_STOLEN_1024: + stolen_size = MB(1); + break; + case I830_GMCH_GMS_STOLEN_8192: + stolen_size = MB(8); + break; + case I830_GMCH_GMS_LOCAL: + /* local memory isn't part of the normal address space */ + stolen_size = 0; + break; + default: + return 0; + } + + return stolen_size; +} + static size_t __init gen3_stolen_size(int num, int slot, int func) { size_t stolen_size; @@ -310,7 +418,7 @@ static size_t __init gen6_stolen_size(int num, int slot, int func) return gmch_ctrl << 25; /* 32 MB units */ } -static inline size_t gen8_stolen_size(int num, int slot, int func) +static size_t gen8_stolen_size(int num, int slot, int func) { u16 gmch_ctrl; @@ -320,31 +428,74 @@ static inline size_t gen8_stolen_size(int num, int slot, int func) return gmch_ctrl << 25; /* 32 MB units */ } -typedef size_t (*stolen_size_fn)(int num, int slot, int func); + +struct intel_stolen_funcs { + size_t (*size)(int num, int slot, int func); + u32 (*base)(int num, int slot, int func, size_t size); +}; + +static const struct intel_stolen_funcs i830_stolen_funcs = { + .base = i830_stolen_base, + .size = i830_stolen_size, +}; + +static const struct intel_stolen_funcs i845_stolen_funcs = { + .base = i845_stolen_base, + .size = i830_stolen_size, +}; + +static const struct intel_stolen_funcs i85x_stolen_funcs = { + .base = i85x_stolen_base, + .size = gen3_stolen_size, +}; + +static const struct intel_stolen_funcs i865_stolen_funcs = { + .base = i865_stolen_base, + .size = gen3_stolen_size, +}; + +static const struct intel_stolen_funcs gen3_stolen_funcs = { + .base = intel_stolen_base, + .size = gen3_stolen_size, +}; + +static const struct intel_stolen_funcs gen6_stolen_funcs = { + .base = intel_stolen_base, + .size = gen6_stolen_size, +}; + +static const struct intel_stolen_funcs gen8_stolen_funcs = { + .base = intel_stolen_base, + .size = gen8_stolen_size, +}; static struct pci_device_id intel_stolen_ids[] __initdata = { - INTEL_I915G_IDS(gen3_stolen_size), - INTEL_I915GM_IDS(gen3_stolen_size), - INTEL_I945G_IDS(gen3_stolen_size), - INTEL_I945GM_IDS(gen3_stolen_size), - INTEL_VLV_M_IDS(gen6_stolen_size), - INTEL_VLV_D_IDS(gen6_stolen_size), - INTEL_PINEVIEW_IDS(gen3_stolen_size), - INTEL_I965G_IDS(gen3_stolen_size), - INTEL_G33_IDS(gen3_stolen_size), - INTEL_I965GM_IDS(gen3_stolen_size), - INTEL_GM45_IDS(gen3_stolen_size), - INTEL_G45_IDS(gen3_stolen_size), - INTEL_IRONLAKE_D_IDS(gen3_stolen_size), - INTEL_IRONLAKE_M_IDS(gen3_stolen_size), - INTEL_SNB_D_IDS(gen6_stolen_size), - INTEL_SNB_M_IDS(gen6_stolen_size), - INTEL_IVB_M_IDS(gen6_stolen_size), - INTEL_IVB_D_IDS(gen6_stolen_size), - INTEL_HSW_D_IDS(gen6_stolen_size), - INTEL_HSW_M_IDS(gen6_stolen_size), - INTEL_BDW_M_IDS(gen8_stolen_size), - INTEL_BDW_D_IDS(gen8_stolen_size) + INTEL_I830_IDS(&i830_stolen_funcs), + INTEL_I845G_IDS(&i845_stolen_funcs), + INTEL_I85X_IDS(&i85x_stolen_funcs), + INTEL_I865G_IDS(&i865_stolen_funcs), + INTEL_I915G_IDS(&gen3_stolen_funcs), + INTEL_I915GM_IDS(&gen3_stolen_funcs), + INTEL_I945G_IDS(&gen3_stolen_funcs), + INTEL_I945GM_IDS(&gen3_stolen_funcs), + INTEL_VLV_M_IDS(&gen6_stolen_funcs), + INTEL_VLV_D_IDS(&gen6_stolen_funcs), + INTEL_PINEVIEW_IDS(&gen3_stolen_funcs), + INTEL_I965G_IDS(&gen3_stolen_funcs), + INTEL_G33_IDS(&gen3_stolen_funcs), + INTEL_I965GM_IDS(&gen3_stolen_funcs), + INTEL_GM45_IDS(&gen3_stolen_funcs), + INTEL_G45_IDS(&gen3_stolen_funcs), + INTEL_IRONLAKE_D_IDS(&gen3_stolen_funcs), + INTEL_IRONLAKE_M_IDS(&gen3_stolen_funcs), + INTEL_SNB_D_IDS(&gen6_stolen_funcs), + INTEL_SNB_M_IDS(&gen6_stolen_funcs), + INTEL_IVB_M_IDS(&gen6_stolen_funcs), + INTEL_IVB_D_IDS(&gen6_stolen_funcs), + INTEL_HSW_D_IDS(&gen6_stolen_funcs), + INTEL_HSW_M_IDS(&gen6_stolen_funcs), + INTEL_BDW_M_IDS(&gen8_stolen_funcs), + INTEL_BDW_D_IDS(&gen8_stolen_funcs) }; static void __init intel_graphics_stolen(int num, int slot, int func) @@ -361,11 +512,13 @@ static void __init intel_graphics_stolen(int num, int slot, int func) for (i = 0; i < ARRAY_SIZE(intel_stolen_ids); i++) { if (intel_stolen_ids[i].device == device) { - stolen_size_fn stolen_size = - (stolen_size_fn)intel_stolen_ids[i].driver_data; - size = stolen_size(num, slot, func); - start = intel_stolen_base(num, slot, func); + const struct intel_stolen_funcs *stolen_funcs = + (const struct intel_stolen_funcs *)intel_stolen_ids[i].driver_data; + size = stolen_funcs->size(num, slot, func); + start = stolen_funcs->base(num, slot, func, size); if (size && start) { + printk(KERN_INFO "Reserving Intel graphics stolen memory at 0x%x-0x%x\n", + start, start + (u32)size - 1); /* Mark this space as reserved */ e820_add_region(start, size, E820_RESERVED); sanitize_e820_map(e820.map, diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 93eed15a8fd4..8d80ae011603 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -941,12 +941,14 @@ static __init int hpet_late_init(void) if (boot_cpu_has(X86_FEATURE_ARAT)) return 0; + cpu_notifier_register_begin(); for_each_online_cpu(cpu) { hpet_cpuhp_notify(NULL, CPU_ONLINE, (void *)(long)cpu); } /* This notifier should be called after workqueue is ready */ - hotcpu_notifier(hpet_cpuhp_notify, -20); + __hotcpu_notifier(hpet_cpuhp_notify, -20); + cpu_notifier_register_done(); return 0; } diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 42805fac0092..283a76a9cc40 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -125,7 +125,7 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_printf(p, "%10u ", per_cpu(mce_poll_count, j)); seq_printf(p, " Machine check polls\n"); #endif -#if defined(CONFIG_HYPERV) || defined(CONFIG_XEN) +#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN) seq_printf(p, "%*s: ", prec, "THR"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_hv_callback_count); diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index ebc987398923..af1d14a9ebda 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -229,6 +229,17 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) } } + /* + * On x86-64 we do not support 16-bit segments due to + * IRET leaking the high bits of the kernel stack address. + */ +#ifdef CONFIG_X86_64 + if (!ldt_info.seg_32bit) { + error = -EINVAL; + goto out_unlock; + } +#endif + fill_ldt(&ldt, &ldt_info); if (oldmode) ldt.avl = 0; diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 05266b5aae22..c9603ac80de5 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -259,14 +259,15 @@ static int __init msr_init(void) goto out_chrdev; } msr_class->devnode = msr_devnode; - get_online_cpus(); + + cpu_notifier_register_begin(); for_each_online_cpu(i) { err = msr_device_create(i); if (err != 0) goto out_class; } - register_hotcpu_notifier(&msr_class_cpu_notifier); - put_online_cpus(); + __register_hotcpu_notifier(&msr_class_cpu_notifier); + cpu_notifier_register_done(); err = 0; goto out; @@ -275,7 +276,7 @@ out_class: i = 0; for_each_online_cpu(i) msr_device_destroy(i); - put_online_cpus(); + cpu_notifier_register_done(); class_destroy(msr_class); out_chrdev: __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); @@ -286,13 +287,14 @@ out: static void __exit msr_exit(void) { int cpu = 0; - get_online_cpus(); + + cpu_notifier_register_begin(); for_each_online_cpu(cpu) msr_device_destroy(cpu); class_destroy(msr_class); __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); - unregister_hotcpu_notifier(&msr_class_cpu_notifier); - put_online_cpus(); + __unregister_hotcpu_notifier(&msr_class_cpu_notifier); + cpu_notifier_register_done(); } module_init(msr_init); diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 299d49302e7d..0497f719977d 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -1207,23 +1207,31 @@ error: return ret; } -static inline int __init determine_tce_table_size(u64 ram) +static inline int __init determine_tce_table_size(void) { int ret; if (specified_table_size != TCE_TABLE_SIZE_UNSPECIFIED) return specified_table_size; - /* - * Table sizes are from 0 to 7 (TCE_TABLE_SIZE_64K to - * TCE_TABLE_SIZE_8M). Table size 0 has 8K entries and each - * larger table size has twice as many entries, so shift the - * max ram address by 13 to divide by 8K and then look at the - * order of the result to choose between 0-7. - */ - ret = get_order(ram >> 13); - if (ret > TCE_TABLE_SIZE_8M) + if (is_kdump_kernel() && saved_max_pfn) { + /* + * Table sizes are from 0 to 7 (TCE_TABLE_SIZE_64K to + * TCE_TABLE_SIZE_8M). Table size 0 has 8K entries and each + * larger table size has twice as many entries, so shift the + * max ram address by 13 to divide by 8K and then look at the + * order of the result to choose between 0-7. + */ + ret = get_order((saved_max_pfn * PAGE_SIZE) >> 13); + if (ret > TCE_TABLE_SIZE_8M) + ret = TCE_TABLE_SIZE_8M; + } else { + /* + * Use 8M by default (suggested by Muli) if it's not + * kdump kernel and saved_max_pfn isn't set. + */ ret = TCE_TABLE_SIZE_8M; + } return ret; } @@ -1418,8 +1426,7 @@ int __init detect_calgary(void) return -ENOMEM; } - specified_table_size = determine_tce_table_size((is_kdump_kernel() ? - saved_max_pfn : max_pfn) * PAGE_SIZE); + specified_table_size = determine_tce_table_size(); for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { struct calgary_bus_info *info = &bus_info[bus]; diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 9ea287666c65..8b3b3eb3cead 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -348,9 +348,13 @@ static int __init vsyscall_init(void) { BUG_ON(VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)); + cpu_notifier_register_begin(); + on_each_cpu(cpu_vsyscall_init, NULL, 1); /* notifier priority > KVM */ - hotcpu_notifier(cpu_vsyscall_notifier, 30); + __hotcpu_notifier(cpu_vsyscall_notifier, 30); + + cpu_notifier_register_done(); return 0; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d1c55f8722c6..9d1b5cd4d34c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5422,7 +5422,8 @@ static void kvm_timer_init(void) int cpu; max_tsc_khz = tsc_khz; - register_hotcpu_notifier(&kvmclock_cpu_notifier_block); + + cpu_notifier_register_begin(); if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { #ifdef CONFIG_CPU_FREQ struct cpufreq_policy policy; @@ -5439,6 +5440,10 @@ static void kvm_timer_init(void) pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz); for_each_online_cpu(cpu) smp_call_function_single(cpu, tsc_khz_changed, NULL, 1); + + __register_hotcpu_notifier(&kvmclock_cpu_notifier_block); + cpu_notifier_register_done(); + } static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu); diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 799580cabc78..597ac155c91c 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -328,17 +328,6 @@ void unxlate_dev_mem_ptr(unsigned long phys, void *addr) return; } -static int __initdata early_ioremap_debug; - -static int __init early_ioremap_debug_setup(char *str) -{ - early_ioremap_debug = 1; - - return 0; -} -early_param("early_ioremap_debug", early_ioremap_debug_setup); - -static __initdata int after_paging_init; static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss; static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) @@ -362,18 +351,11 @@ bool __init is_early_ioremap_ptep(pte_t *ptep) return ptep >= &bm_pte[0] && ptep < &bm_pte[PAGE_SIZE/sizeof(pte_t)]; } -static unsigned long slot_virt[FIX_BTMAPS_SLOTS] __initdata; - void __init early_ioremap_init(void) { pmd_t *pmd; - int i; - if (early_ioremap_debug) - printk(KERN_INFO "early_ioremap_init()\n"); - - for (i = 0; i < FIX_BTMAPS_SLOTS; i++) - slot_virt[i] = __fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i); + early_ioremap_setup(); pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); memset(bm_pte, 0, sizeof(bm_pte)); @@ -402,13 +384,8 @@ void __init early_ioremap_init(void) } } -void __init early_ioremap_reset(void) -{ - after_paging_init = 1; -} - -static void __init __early_set_fixmap(enum fixed_addresses idx, - phys_addr_t phys, pgprot_t flags) +void __init __early_set_fixmap(enum fixed_addresses idx, + phys_addr_t phys, pgprot_t flags) { unsigned long addr = __fix_to_virt(idx); pte_t *pte; @@ -425,198 +402,3 @@ static void __init __early_set_fixmap(enum fixed_addresses idx, pte_clear(&init_mm, addr, pte); __flush_tlb_one(addr); } - -static inline void __init early_set_fixmap(enum fixed_addresses idx, - phys_addr_t phys, pgprot_t prot) -{ - if (after_paging_init) - __set_fixmap(idx, phys, prot); - else - __early_set_fixmap(idx, phys, prot); -} - -static inline void __init early_clear_fixmap(enum fixed_addresses idx) -{ - if (after_paging_init) - clear_fixmap(idx); - else - __early_set_fixmap(idx, 0, __pgprot(0)); -} - -static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata; -static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata; - -void __init fixup_early_ioremap(void) -{ - int i; - - for (i = 0; i < FIX_BTMAPS_SLOTS; i++) { - if (prev_map[i]) { - WARN_ON(1); - break; - } - } - - early_ioremap_init(); -} - -static int __init check_early_ioremap_leak(void) -{ - int count = 0; - int i; - - for (i = 0; i < FIX_BTMAPS_SLOTS; i++) - if (prev_map[i]) - count++; - - if (!count) - return 0; - WARN(1, KERN_WARNING - "Debug warning: early ioremap leak of %d areas detected.\n", - count); - printk(KERN_WARNING - "please boot with early_ioremap_debug and report the dmesg.\n"); - - return 1; -} -late_initcall(check_early_ioremap_leak); - -static void __init __iomem * -__early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot) -{ - unsigned long offset; - resource_size_t last_addr; - unsigned int nrpages; - enum fixed_addresses idx; - int i, slot; - - WARN_ON(system_state != SYSTEM_BOOTING); - - slot = -1; - for (i = 0; i < FIX_BTMAPS_SLOTS; i++) { - if (!prev_map[i]) { - slot = i; - break; - } - } - - if (slot < 0) { - printk(KERN_INFO "%s(%08llx, %08lx) not found slot\n", - __func__, (u64)phys_addr, size); - WARN_ON(1); - return NULL; - } - - if (early_ioremap_debug) { - printk(KERN_INFO "%s(%08llx, %08lx) [%d] => ", - __func__, (u64)phys_addr, size, slot); - dump_stack(); - } - - /* Don't allow wraparound or zero size */ - last_addr = phys_addr + size - 1; - if (!size || last_addr < phys_addr) { - WARN_ON(1); - return NULL; - } - - prev_size[slot] = size; - /* - * Mappings have to be page-aligned - */ - offset = phys_addr & ~PAGE_MASK; - phys_addr &= PAGE_MASK; - size = PAGE_ALIGN(last_addr + 1) - phys_addr; - - /* - * Mappings have to fit in the FIX_BTMAP area. - */ - nrpages = size >> PAGE_SHIFT; - if (nrpages > NR_FIX_BTMAPS) { - WARN_ON(1); - return NULL; - } - - /* - * Ok, go for it.. - */ - idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot; - while (nrpages > 0) { - early_set_fixmap(idx, phys_addr, prot); - phys_addr += PAGE_SIZE; - --idx; - --nrpages; - } - if (early_ioremap_debug) - printk(KERN_CONT "%08lx + %08lx\n", offset, slot_virt[slot]); - - prev_map[slot] = (void __iomem *)(offset + slot_virt[slot]); - return prev_map[slot]; -} - -/* Remap an IO device */ -void __init __iomem * -early_ioremap(resource_size_t phys_addr, unsigned long size) -{ - return __early_ioremap(phys_addr, size, PAGE_KERNEL_IO); -} - -/* Remap memory */ -void __init __iomem * -early_memremap(resource_size_t phys_addr, unsigned long size) -{ - return __early_ioremap(phys_addr, size, PAGE_KERNEL); -} - -void __init early_iounmap(void __iomem *addr, unsigned long size) -{ - unsigned long virt_addr; - unsigned long offset; - unsigned int nrpages; - enum fixed_addresses idx; - int i, slot; - - slot = -1; - for (i = 0; i < FIX_BTMAPS_SLOTS; i++) { - if (prev_map[i] == addr) { - slot = i; - break; - } - } - - if (slot < 0) { - printk(KERN_INFO "early_iounmap(%p, %08lx) not found slot\n", - addr, size); - WARN_ON(1); - return; - } - - if (prev_size[slot] != size) { - printk(KERN_INFO "early_iounmap(%p, %08lx) [%d] size not consistent %08lx\n", - addr, size, slot, prev_size[slot]); - WARN_ON(1); - return; - } - - if (early_ioremap_debug) { - printk(KERN_INFO "early_iounmap(%p, %08lx) [%d]\n", addr, - size, slot); - dump_stack(); - } - - virt_addr = (unsigned long)addr; - if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN)) { - WARN_ON(1); - return; - } - offset = virt_addr & ~PAGE_MASK; - nrpages = PAGE_ALIGN(offset + size) >> PAGE_SHIFT; - - idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot; - while (nrpages > 0) { - early_clear_fixmap(idx); - --idx; - --nrpages; - } - prev_map[slot] = NULL; -} diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c index d87dd6d042d6..dd89a13f1051 100644 --- a/arch/x86/mm/kmemcheck/kmemcheck.c +++ b/arch/x86/mm/kmemcheck/kmemcheck.c @@ -78,10 +78,16 @@ early_initcall(kmemcheck_init); */ static int __init param_kmemcheck(char *str) { + int val; + int ret; + if (!str) return -EINVAL; - sscanf(str, "%d", &kmemcheck_enabled); + ret = kstrtoint(str, 0, &val); + if (ret) + return ret; + kmemcheck_enabled = val; return 0; } diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index a69bcb8c7621..4dd8cf652579 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -127,7 +127,7 @@ static int __init parse_reservetop(char *arg) address = memparse(arg, &arg); reserve_top_address(address); - fixup_early_ioremap(); + early_ioremap_init(); return 0; } early_param("reservetop", parse_reservetop); diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 6890d8498e0b..379e8bd0deea 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -494,14 +494,19 @@ static int nmi_setup(void) if (err) goto fail; + cpu_notifier_register_begin(); + + /* Use get/put_online_cpus() to protect 'nmi_enabled' */ get_online_cpus(); - register_cpu_notifier(&oprofile_cpu_nb); nmi_enabled = 1; /* make nmi_enabled visible to the nmi handler: */ smp_mb(); on_each_cpu(nmi_cpu_setup, NULL, 1); + __register_cpu_notifier(&oprofile_cpu_nb); put_online_cpus(); + cpu_notifier_register_done(); + return 0; fail: free_msrs(); @@ -512,12 +517,18 @@ static void nmi_shutdown(void) { struct op_msrs *msrs; + cpu_notifier_register_begin(); + + /* Use get/put_online_cpus() to protect 'nmi_enabled' & 'ctr_running' */ get_online_cpus(); - unregister_cpu_notifier(&oprofile_cpu_nb); on_each_cpu(nmi_cpu_shutdown, NULL, 1); nmi_enabled = 0; ctr_running = 0; + __unregister_cpu_notifier(&oprofile_cpu_nb); put_online_cpus(); + + cpu_notifier_register_done(); + /* make variables visible to the nmi handler: */ smp_mb(); unregister_nmi_handler(NMI_LOCAL, "oprofile"); diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index a313a7fb6b86..e88f4c53d7f6 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c @@ -370,10 +370,13 @@ static int __init pci_io_ecs_init(void) if (early_pci_allowed()) pci_enable_pci_io_ecs(); - register_cpu_notifier(&amd_cpu_notifier); + cpu_notifier_register_begin(); for_each_online_cpu(cpu) amd_cpu_notify(&amd_cpu_notifier, (unsigned long)CPU_ONLINE, (void *)(long)cpu); + __register_cpu_notifier(&amd_cpu_notifier); + cpu_notifier_register_done(); + pci_probe |= PCI_HAS_IO_ECS; return 0; diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index 103e702ec5a7..905956f16465 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -178,6 +178,7 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) i = 0; list_for_each_entry(msidesc, &dev->msi_list, list) { irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i], + (type == PCI_CAP_ID_MSI) ? nvec : 1, (type == PCI_CAP_ID_MSIX) ? "pcifront-msi-x" : "pcifront-msi", @@ -245,6 +246,7 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) "xen: msi already bound to pirq=%d\n", pirq); } irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, + (type == PCI_CAP_ID_MSI) ? nvec : 1, (type == PCI_CAP_ID_MSIX) ? "msi-x" : "msi", DOMID_SELF); @@ -269,9 +271,6 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) int ret = 0; struct msi_desc *msidesc; - if (type == PCI_CAP_ID_MSI && nvec > 1) - return 1; - list_for_each_entry(msidesc, &dev->msi_list, list) { struct physdev_map_pirq map_irq; domid_t domid; @@ -291,7 +290,10 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) (pci_domain_nr(dev->bus) << 16); map_irq.devfn = dev->devfn; - if (type == PCI_CAP_ID_MSIX) { + if (type == PCI_CAP_ID_MSI && nvec > 1) { + map_irq.type = MAP_PIRQ_TYPE_MULTI_MSI; + map_irq.entry_nr = nvec; + } else if (type == PCI_CAP_ID_MSIX) { int pos; u32 table_offset, bir; @@ -308,6 +310,16 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) if (pci_seg_supported) ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); + if (type == PCI_CAP_ID_MSI && nvec > 1 && ret) { + /* + * If MAP_PIRQ_TYPE_MULTI_MSI is not available + * there's nothing else we can do in this case. + * Just set ret > 0 so driver can retry with + * single MSI. + */ + ret = 1; + goto out; + } if (ret == -EINVAL && !pci_domain_nr(dev->bus)) { map_irq.type = MAP_PIRQ_TYPE_MSI; map_irq.index = -1; @@ -324,11 +336,10 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) goto out; } - ret = xen_bind_pirq_msi_to_irq(dev, msidesc, - map_irq.pirq, - (type == PCI_CAP_ID_MSIX) ? - "msi-x" : "msi", - domid); + ret = xen_bind_pirq_msi_to_irq(dev, msidesc, map_irq.pirq, + (type == PCI_CAP_ID_MSI) ? nvec : 1, + (type == PCI_CAP_ID_MSIX) ? "msi-x" : "msi", + domid); if (ret < 0) goto out; } diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index a12bddc7ccea..04376ac3d9ef 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -322,6 +322,7 @@ 313 common finit_module sys_finit_module 314 common sched_setattr sys_sched_setattr 315 common sched_getattr sys_sched_getattr +316 common renameat2 sys_renameat2 # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 9c50cc2e403b..e88fda867a33 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -19,11 +19,6 @@ config XEN_DOM0 depends on XEN && PCI_XEN && SWIOTLB_XEN depends on X86_LOCAL_APIC && X86_IO_APIC && ACPI && PCI -# Dummy symbol since people have come to rely on the PRIVILEGED_GUEST -# name in tools. -config XEN_PRIVILEGED_GUEST - def_bool XEN_DOM0 - config XEN_PVHVM def_bool y depends on XEN && PCI && X86_LOCAL_APIC diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 696c694986d0..85e5d78c9874 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -881,6 +881,65 @@ static unsigned long mfn_hash(unsigned long mfn) return hash_long(mfn, M2P_OVERRIDE_HASH_SHIFT); } +int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, + struct gnttab_map_grant_ref *kmap_ops, + struct page **pages, unsigned int count) +{ + int i, ret = 0; + bool lazy = false; + pte_t *pte; + + if (xen_feature(XENFEAT_auto_translated_physmap)) + return 0; + + if (kmap_ops && + !in_interrupt() && + paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) { + arch_enter_lazy_mmu_mode(); + lazy = true; + } + + for (i = 0; i < count; i++) { + unsigned long mfn, pfn; + + /* Do not add to override if the map failed. */ + if (map_ops[i].status) + continue; + + if (map_ops[i].flags & GNTMAP_contains_pte) { + pte = (pte_t *) (mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) + + (map_ops[i].host_addr & ~PAGE_MASK)); + mfn = pte_mfn(*pte); + } else { + mfn = PFN_DOWN(map_ops[i].dev_bus_addr); + } + pfn = page_to_pfn(pages[i]); + + WARN_ON(PagePrivate(pages[i])); + SetPagePrivate(pages[i]); + set_page_private(pages[i], mfn); + pages[i]->index = pfn_to_mfn(pfn); + + if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) { + ret = -ENOMEM; + goto out; + } + + if (kmap_ops) { + ret = m2p_add_override(mfn, pages[i], &kmap_ops[i]); + if (ret) + goto out; + } + } + +out: + if (lazy) + arch_leave_lazy_mmu_mode(); + + return ret; +} +EXPORT_SYMBOL_GPL(set_foreign_p2m_mapping); + /* Add an MFN override for a particular page */ int m2p_add_override(unsigned long mfn, struct page *page, struct gnttab_map_grant_ref *kmap_op) @@ -899,13 +958,6 @@ int m2p_add_override(unsigned long mfn, struct page *page, "m2p_add_override: pfn %lx not mapped", pfn)) return -EINVAL; } - WARN_ON(PagePrivate(page)); - SetPagePrivate(page); - set_page_private(page, mfn); - page->index = pfn_to_mfn(pfn); - - if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) - return -ENOMEM; if (kmap_op != NULL) { if (!PageHighMem(page)) { @@ -943,20 +995,62 @@ int m2p_add_override(unsigned long mfn, struct page *page, return 0; } EXPORT_SYMBOL_GPL(m2p_add_override); + +int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, + struct gnttab_map_grant_ref *kmap_ops, + struct page **pages, unsigned int count) +{ + int i, ret = 0; + bool lazy = false; + + if (xen_feature(XENFEAT_auto_translated_physmap)) + return 0; + + if (kmap_ops && + !in_interrupt() && + paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) { + arch_enter_lazy_mmu_mode(); + lazy = true; + } + + for (i = 0; i < count; i++) { + unsigned long mfn = get_phys_to_machine(page_to_pfn(pages[i])); + unsigned long pfn = page_to_pfn(pages[i]); + + if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) { + ret = -EINVAL; + goto out; + } + + set_page_private(pages[i], INVALID_P2M_ENTRY); + WARN_ON(!PagePrivate(pages[i])); + ClearPagePrivate(pages[i]); + set_phys_to_machine(pfn, pages[i]->index); + + if (kmap_ops) + ret = m2p_remove_override(pages[i], &kmap_ops[i], mfn); + if (ret) + goto out; + } + +out: + if (lazy) + arch_leave_lazy_mmu_mode(); + return ret; +} +EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping); + int m2p_remove_override(struct page *page, - struct gnttab_map_grant_ref *kmap_op) + struct gnttab_map_grant_ref *kmap_op, + unsigned long mfn) { unsigned long flags; - unsigned long mfn; unsigned long pfn; unsigned long uninitialized_var(address); unsigned level; pte_t *ptep = NULL; pfn = page_to_pfn(page); - mfn = get_phys_to_machine(pfn); - if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) - return -EINVAL; if (!PageHighMem(page)) { address = (unsigned long)__va(pfn << PAGE_SHIFT); @@ -970,10 +1064,7 @@ int m2p_remove_override(struct page *page, spin_lock_irqsave(&m2p_override_lock, flags); list_del(&page->lru); spin_unlock_irqrestore(&m2p_override_lock, flags); - WARN_ON(!PagePrivate(page)); - ClearPagePrivate(page); - set_phys_to_machine(pfn, page->index); if (kmap_op != NULL) { if (!PageHighMem(page)) { struct multicall_space mcs; |