summaryrefslogtreecommitdiff
path: root/arch/x86/include/asm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/include/asm')
-rw-r--r--arch/x86/include/asm/acpi.h2
-rw-r--r--arch/x86/include/asm/apic.h7
-rw-r--r--arch/x86/include/asm/atomic.h33
-rw-r--r--arch/x86/include/asm/atomic64_64.h46
-rw-r--r--arch/x86/include/asm/bug.h80
-rw-r--r--arch/x86/include/asm/clocksource.h3
-rw-r--r--arch/x86/include/asm/cmpxchg.h70
-rw-r--r--arch/x86/include/asm/cpufeatures.h3
-rw-r--r--arch/x86/include/asm/crypto/glue_helper.h10
-rw-r--r--arch/x86/include/asm/desc.h147
-rw-r--r--arch/x86/include/asm/disabled-features.h8
-rw-r--r--arch/x86/include/asm/e820.h73
-rw-r--r--arch/x86/include/asm/e820/api.h50
-rw-r--r--arch/x86/include/asm/e820/types.h104
-rw-r--r--arch/x86/include/asm/elf.h30
-rw-r--r--arch/x86/include/asm/fixmap.h4
-rw-r--r--arch/x86/include/asm/gart.h4
-rw-r--r--arch/x86/include/asm/intel-family.h6
-rw-r--r--arch/x86/include/asm/intel_rdt.h157
-rw-r--r--arch/x86/include/asm/kasan.h9
-rw-r--r--arch/x86/include/asm/kexec.h1
-rw-r--r--arch/x86/include/asm/kprobes.h7
-rw-r--r--arch/x86/include/asm/kvm_page_track.h1
-rw-r--r--arch/x86/include/asm/mce.h12
-rw-r--r--arch/x86/include/asm/mmu_context.h4
-rw-r--r--arch/x86/include/asm/mpspec.h4
-rw-r--r--arch/x86/include/asm/mshyperv.h54
-rw-r--r--arch/x86/include/asm/msr-index.h11
-rw-r--r--arch/x86/include/asm/page_64.h16
-rw-r--r--arch/x86/include/asm/page_64_types.h10
-rw-r--r--arch/x86/include/asm/paravirt.h54
-rw-r--r--arch/x86/include/asm/paravirt_types.h17
-rw-r--r--arch/x86/include/asm/pci_x86.h2
-rw-r--r--arch/x86/include/asm/pgalloc.h37
-rw-r--r--arch/x86/include/asm/pgtable-2level_types.h1
-rw-r--r--arch/x86/include/asm/pgtable-3level_types.h1
-rw-r--r--arch/x86/include/asm/pgtable.h87
-rw-r--r--arch/x86/include/asm/pgtable_32.h1
-rw-r--r--arch/x86/include/asm/pgtable_64.h23
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h32
-rw-r--r--arch/x86/include/asm/pgtable_types.h46
-rw-r--r--arch/x86/include/asm/pmem.h42
-rw-r--r--arch/x86/include/asm/processor.h19
-rw-r--r--arch/x86/include/asm/proto.h4
-rw-r--r--arch/x86/include/asm/reboot.h1
-rw-r--r--arch/x86/include/asm/required-features.h8
-rw-r--r--arch/x86/include/asm/smp.h35
-rw-r--r--arch/x86/include/asm/sparsemem.h9
-rw-r--r--arch/x86/include/asm/stackprotector.h2
-rw-r--r--arch/x86/include/asm/thread_info.h32
-rw-r--r--arch/x86/include/asm/timer.h2
-rw-r--r--arch/x86/include/asm/tlbflush.h19
-rw-r--r--arch/x86/include/asm/uaccess.h70
-rw-r--r--arch/x86/include/asm/uaccess_32.h127
-rw-r--r--arch/x86/include/asm/uaccess_64.h128
-rw-r--r--arch/x86/include/asm/unistd.h1
-rw-r--r--arch/x86/include/asm/unwind.h8
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h82
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h8
-rw-r--r--arch/x86/include/asm/vdso.h1
-rw-r--r--arch/x86/include/asm/xen/page.h9
61 files changed, 1176 insertions, 698 deletions
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 395b69551fce..2efc768e4362 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -52,6 +52,8 @@ extern u8 acpi_sci_flags;
extern int acpi_sci_override_gsi;
void acpi_pic_sci_set_trigger(unsigned int, u16);
+struct device;
+
extern int (*__acpi_register_gsi)(struct device *dev, u32 gsi,
int trigger, int polarity);
extern void (*__acpi_unregister_gsi)(u32 gsi);
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 730ef65e8393..bdffcd9eab2b 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -252,12 +252,6 @@ static inline int x2apic_enabled(void) { return 0; }
#define x2apic_supported() (0)
#endif /* !CONFIG_X86_X2APIC */
-#ifdef CONFIG_X86_64
-#define SET_APIC_ID(x) (apic->set_apic_id(x))
-#else
-
-#endif
-
/*
* Copyright 2004 James Cleverdon, IBM.
* Subject to the GNU Public License, v.2
@@ -299,6 +293,7 @@ struct apic {
int (*phys_pkg_id)(int cpuid_apic, int index_msb);
unsigned int (*get_apic_id)(unsigned long x);
+ /* Can't be NULL on 64-bit */
unsigned long (*set_apic_id)(unsigned int id);
int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask,
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 14635c5ea025..caa5798c92f4 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -186,6 +186,12 @@ static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new)
return cmpxchg(&v->counter, old, new);
}
+#define atomic_try_cmpxchg atomic_try_cmpxchg
+static __always_inline bool atomic_try_cmpxchg(atomic_t *v, int *old, int new)
+{
+ return try_cmpxchg(&v->counter, old, new);
+}
+
static inline int atomic_xchg(atomic_t *v, int new)
{
return xchg(&v->counter, new);
@@ -201,16 +207,12 @@ static inline void atomic_##op(int i, atomic_t *v) \
}
#define ATOMIC_FETCH_OP(op, c_op) \
-static inline int atomic_fetch_##op(int i, atomic_t *v) \
+static inline int atomic_fetch_##op(int i, atomic_t *v) \
{ \
- int old, val = atomic_read(v); \
- for (;;) { \
- old = atomic_cmpxchg(v, val, val c_op i); \
- if (old == val) \
- break; \
- val = old; \
- } \
- return old; \
+ int val = atomic_read(v); \
+ do { \
+ } while (!atomic_try_cmpxchg(v, &val, val c_op i)); \
+ return val; \
}
#define ATOMIC_OPS(op, c_op) \
@@ -236,16 +238,11 @@ ATOMIC_OPS(xor, ^)
*/
static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u)
{
- int c, old;
- c = atomic_read(v);
- for (;;) {
- if (unlikely(c == (u)))
- break;
- old = atomic_cmpxchg((v), c, c + (a));
- if (likely(old == c))
+ int c = atomic_read(v);
+ do {
+ if (unlikely(c == u))
break;
- c = old;
- }
+ } while (!atomic_try_cmpxchg(v, &c, c + a));
return c;
}
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 89ed2f6ae2f7..6189a433c9a9 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -176,6 +176,12 @@ static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new)
return cmpxchg(&v->counter, old, new);
}
+#define atomic64_try_cmpxchg atomic64_try_cmpxchg
+static __always_inline bool atomic64_try_cmpxchg(atomic64_t *v, long *old, long new)
+{
+ return try_cmpxchg(&v->counter, old, new);
+}
+
static inline long atomic64_xchg(atomic64_t *v, long new)
{
return xchg(&v->counter, new);
@@ -192,17 +198,12 @@ static inline long atomic64_xchg(atomic64_t *v, long new)
*/
static inline bool atomic64_add_unless(atomic64_t *v, long a, long u)
{
- long c, old;
- c = atomic64_read(v);
- for (;;) {
- if (unlikely(c == (u)))
- break;
- old = atomic64_cmpxchg((v), c, c + (a));
- if (likely(old == c))
- break;
- c = old;
- }
- return c != (u);
+ long c = atomic64_read(v);
+ do {
+ if (unlikely(c == u))
+ return false;
+ } while (!atomic64_try_cmpxchg(v, &c, c + a));
+ return true;
}
#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
@@ -216,17 +217,12 @@ static inline bool atomic64_add_unless(atomic64_t *v, long a, long u)
*/
static inline long atomic64_dec_if_positive(atomic64_t *v)
{
- long c, old, dec;
- c = atomic64_read(v);
- for (;;) {
+ long dec, c = atomic64_read(v);
+ do {
dec = c - 1;
if (unlikely(dec < 0))
break;
- old = atomic64_cmpxchg((v), c, dec);
- if (likely(old == c))
- break;
- c = old;
- }
+ } while (!atomic64_try_cmpxchg(v, &c, dec));
return dec;
}
@@ -242,14 +238,10 @@ static inline void atomic64_##op(long i, atomic64_t *v) \
#define ATOMIC64_FETCH_OP(op, c_op) \
static inline long atomic64_fetch_##op(long i, atomic64_t *v) \
{ \
- long old, val = atomic64_read(v); \
- for (;;) { \
- old = atomic64_cmpxchg(v, val, val c_op i); \
- if (old == val) \
- break; \
- val = old; \
- } \
- return old; \
+ long val = atomic64_read(v); \
+ do { \
+ } while (!atomic64_try_cmpxchg(v, &val, val c_op i)); \
+ return val; \
}
#define ATOMIC64_OPS(op, c_op) \
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index ba38ebbaced3..39e702d90cdb 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -1,36 +1,82 @@
#ifndef _ASM_X86_BUG_H
#define _ASM_X86_BUG_H
-#define HAVE_ARCH_BUG
+#include <linux/stringify.h>
-#ifdef CONFIG_DEBUG_BUGVERBOSE
+/*
+ * Since some emulators terminate on UD2, we cannot use it for WARN.
+ * Since various instruction decoders disagree on the length of UD1,
+ * we cannot use it either. So use UD0 for WARN.
+ *
+ * (binutils knows about "ud1" but {en,de}codes it as 2 bytes, whereas
+ * our kernel decoder thinks it takes a ModRM byte, which seems consistent
+ * with various things like the Intel SDM instruction encoding rules)
+ */
+
+#define ASM_UD0 ".byte 0x0f, 0xff"
+#define ASM_UD1 ".byte 0x0f, 0xb9" /* + ModRM */
+#define ASM_UD2 ".byte 0x0f, 0x0b"
+
+#define INSN_UD0 0xff0f
+#define INSN_UD2 0x0b0f
+
+#define LEN_UD0 2
+
+#ifdef CONFIG_GENERIC_BUG
#ifdef CONFIG_X86_32
-# define __BUG_C0 "2:\t.long 1b, %c0\n"
+# define __BUG_REL(val) ".long " __stringify(val)
#else
-# define __BUG_C0 "2:\t.long 1b - 2b, %c0 - 2b\n"
+# define __BUG_REL(val) ".long " __stringify(val) " - 2b"
#endif
-#define BUG() \
-do { \
- asm volatile("1:\tud2\n" \
- ".pushsection __bug_table,\"a\"\n" \
- __BUG_C0 \
- "\t.word %c1, 0\n" \
- "\t.org 2b+%c2\n" \
- ".popsection" \
- : : "i" (__FILE__), "i" (__LINE__), \
- "i" (sizeof(struct bug_entry))); \
- unreachable(); \
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+
+#define _BUG_FLAGS(ins, flags) \
+do { \
+ asm volatile("1:\t" ins "\n" \
+ ".pushsection __bug_table,\"a\"\n" \
+ "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n" \
+ "\t" __BUG_REL(%c0) "\t# bug_entry::file\n" \
+ "\t.word %c1" "\t# bug_entry::line\n" \
+ "\t.word %c2" "\t# bug_entry::flags\n" \
+ "\t.org 2b+%c3\n" \
+ ".popsection" \
+ : : "i" (__FILE__), "i" (__LINE__), \
+ "i" (flags), \
+ "i" (sizeof(struct bug_entry))); \
} while (0)
+#else /* !CONFIG_DEBUG_BUGVERBOSE */
+
+#define _BUG_FLAGS(ins, flags) \
+do { \
+ asm volatile("1:\t" ins "\n" \
+ ".pushsection __bug_table,\"a\"\n" \
+ "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n" \
+ "\t.word %c0" "\t# bug_entry::flags\n" \
+ "\t.org 2b+%c1\n" \
+ ".popsection" \
+ : : "i" (flags), \
+ "i" (sizeof(struct bug_entry))); \
+} while (0)
+
+#endif /* CONFIG_DEBUG_BUGVERBOSE */
+
#else
+
+#define _BUG_FLAGS(ins, flags) asm volatile(ins)
+
+#endif /* CONFIG_GENERIC_BUG */
+
+#define HAVE_ARCH_BUG
#define BUG() \
do { \
- asm volatile("ud2"); \
+ _BUG_FLAGS(ASM_UD2, 0); \
unreachable(); \
} while (0)
-#endif
+
+#define __WARN_FLAGS(flags) _BUG_FLAGS(ASM_UD0, BUGFLAG_WARNING|(flags))
#include <asm-generic/bug.h>
diff --git a/arch/x86/include/asm/clocksource.h b/arch/x86/include/asm/clocksource.h
index eae33c7170c8..47bea8cadbd0 100644
--- a/arch/x86/include/asm/clocksource.h
+++ b/arch/x86/include/asm/clocksource.h
@@ -6,7 +6,8 @@
#define VCLOCK_NONE 0 /* No vDSO clock available. */
#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */
#define VCLOCK_PVCLOCK 2 /* vDSO should use vread_pvclock. */
-#define VCLOCK_MAX 2
+#define VCLOCK_HVCLOCK 3 /* vDSO should use vread_hvclock. */
+#define VCLOCK_MAX 3
struct arch_clocksource_data {
int vclock_mode;
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index 97848cdfcb1a..d90296d061e8 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -153,6 +153,76 @@ extern void __add_wrong_size(void)
#define cmpxchg_local(ptr, old, new) \
__cmpxchg_local(ptr, old, new, sizeof(*(ptr)))
+
+#define __raw_try_cmpxchg(_ptr, _pold, _new, size, lock) \
+({ \
+ bool success; \
+ __typeof__(_ptr) _old = (_pold); \
+ __typeof__(*(_ptr)) __old = *_old; \
+ __typeof__(*(_ptr)) __new = (_new); \
+ switch (size) { \
+ case __X86_CASE_B: \
+ { \
+ volatile u8 *__ptr = (volatile u8 *)(_ptr); \
+ asm volatile(lock "cmpxchgb %[new], %[ptr]" \
+ CC_SET(z) \
+ : CC_OUT(z) (success), \
+ [ptr] "+m" (*__ptr), \
+ [old] "+a" (__old) \
+ : [new] "q" (__new) \
+ : "memory"); \
+ break; \
+ } \
+ case __X86_CASE_W: \
+ { \
+ volatile u16 *__ptr = (volatile u16 *)(_ptr); \
+ asm volatile(lock "cmpxchgw %[new], %[ptr]" \
+ CC_SET(z) \
+ : CC_OUT(z) (success), \
+ [ptr] "+m" (*__ptr), \
+ [old] "+a" (__old) \
+ : [new] "r" (__new) \
+ : "memory"); \
+ break; \
+ } \
+ case __X86_CASE_L: \
+ { \
+ volatile u32 *__ptr = (volatile u32 *)(_ptr); \
+ asm volatile(lock "cmpxchgl %[new], %[ptr]" \
+ CC_SET(z) \
+ : CC_OUT(z) (success), \
+ [ptr] "+m" (*__ptr), \
+ [old] "+a" (__old) \
+ : [new] "r" (__new) \
+ : "memory"); \
+ break; \
+ } \
+ case __X86_CASE_Q: \
+ { \
+ volatile u64 *__ptr = (volatile u64 *)(_ptr); \
+ asm volatile(lock "cmpxchgq %[new], %[ptr]" \
+ CC_SET(z) \
+ : CC_OUT(z) (success), \
+ [ptr] "+m" (*__ptr), \
+ [old] "+a" (__old) \
+ : [new] "r" (__new) \
+ : "memory"); \
+ break; \
+ } \
+ default: \
+ __cmpxchg_wrong_size(); \
+ } \
+ if (unlikely(!success)) \
+ *_old = __old; \
+ likely(success); \
+})
+
+#define __try_cmpxchg(ptr, pold, new, size) \
+ __raw_try_cmpxchg((ptr), (pold), (new), (size), LOCK_PREFIX)
+
+#define try_cmpxchg(ptr, pold, new) \
+ __try_cmpxchg((ptr), (pold), (new), sizeof(*(ptr)))
+
/*
* xadd() adds "inc" to "*ptr" and atomically returns the previous
* value of "*ptr".
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index b04bb6dfed7f..2701e5f8145b 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -187,6 +187,7 @@
* Reuse free bits when adding new feature flags!
*/
#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */
+#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */
#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
@@ -201,6 +202,8 @@
#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
+
/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h
index 29e53ea7d764..ed8b66de541f 100644
--- a/arch/x86/include/asm/crypto/glue_helper.h
+++ b/arch/x86/include/asm/crypto/glue_helper.h
@@ -125,16 +125,6 @@ static inline void le128_inc(le128 *i)
i->b = cpu_to_le64(b);
}
-static inline void le128_gf128mul_x_ble(le128 *dst, const le128 *src)
-{
- u64 a = le64_to_cpu(src->a);
- u64 b = le64_to_cpu(src->b);
- u64 _tt = ((s64)a >> 63) & 0x87;
-
- dst->a = cpu_to_le64((a << 1) ^ (b >> 63));
- dst->b = cpu_to_le64((b << 1) ^ _tt);
-}
-
extern int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
struct blkcipher_desc *desc,
struct scatterlist *dst,
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 1548ca92ad3f..d0a21b12dd58 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -4,6 +4,7 @@
#include <asm/desc_defs.h>
#include <asm/ldt.h>
#include <asm/mmu.h>
+#include <asm/fixmap.h>
#include <linux/smp.h>
#include <linux/percpu.h>
@@ -45,11 +46,43 @@ struct gdt_page {
DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page);
-static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
+/* Provide the original GDT */
+static inline struct desc_struct *get_cpu_gdt_rw(unsigned int cpu)
{
return per_cpu(gdt_page, cpu).gdt;
}
+/* Provide the current original GDT */
+static inline struct desc_struct *get_current_gdt_rw(void)
+{
+ return this_cpu_ptr(&gdt_page)->gdt;
+}
+
+/* Get the fixmap index for a specific processor */
+static inline unsigned int get_cpu_gdt_ro_index(int cpu)
+{
+ return FIX_GDT_REMAP_BEGIN + cpu;
+}
+
+/* Provide the fixmap address of the remapped GDT */
+static inline struct desc_struct *get_cpu_gdt_ro(int cpu)
+{
+ unsigned int idx = get_cpu_gdt_ro_index(cpu);
+ return (struct desc_struct *)__fix_to_virt(idx);
+}
+
+/* Provide the current read-only GDT */
+static inline struct desc_struct *get_current_gdt_ro(void)
+{
+ return get_cpu_gdt_ro(smp_processor_id());
+}
+
+/* Provide the physical address of the GDT page. */
+static inline phys_addr_t get_cpu_gdt_paddr(unsigned int cpu)
+{
+ return per_cpu_ptr_to_phys(get_cpu_gdt_rw(cpu));
+}
+
#ifdef CONFIG_X86_64
static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func,
@@ -174,7 +207,7 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr, unsigned t
static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr)
{
- struct desc_struct *d = get_cpu_gdt_table(cpu);
+ struct desc_struct *d = get_cpu_gdt_rw(cpu);
tss_desc tss;
set_tssldt_descriptor(&tss, (unsigned long)addr, DESC_TSS,
@@ -194,22 +227,90 @@ static inline void native_set_ldt(const void *addr, unsigned int entries)
set_tssldt_descriptor(&ldt, (unsigned long)addr, DESC_LDT,
entries * LDT_ENTRY_SIZE - 1);
- write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT,
+ write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_LDT,
&ldt, DESC_LDT);
asm volatile("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
}
}
+static inline void native_load_gdt(const struct desc_ptr *dtr)
+{
+ asm volatile("lgdt %0"::"m" (*dtr));
+}
+
+static inline void native_load_idt(const struct desc_ptr *dtr)
+{
+ asm volatile("lidt %0"::"m" (*dtr));
+}
+
+static inline void native_store_gdt(struct desc_ptr *dtr)
+{
+ asm volatile("sgdt %0":"=m" (*dtr));
+}
+
+static inline void native_store_idt(struct desc_ptr *dtr)
+{
+ asm volatile("sidt %0":"=m" (*dtr));
+}
+
+/*
+ * The LTR instruction marks the TSS GDT entry as busy. On 64-bit, the GDT is
+ * a read-only remapping. To prevent a page fault, the GDT is switched to the
+ * original writeable version when needed.
+ */
+#ifdef CONFIG_X86_64
+static inline void native_load_tr_desc(void)
+{
+ struct desc_ptr gdt;
+ int cpu = raw_smp_processor_id();
+ bool restore = 0;
+ struct desc_struct *fixmap_gdt;
+
+ native_store_gdt(&gdt);
+ fixmap_gdt = get_cpu_gdt_ro(cpu);
+
+ /*
+ * If the current GDT is the read-only fixmap, swap to the original
+ * writeable version. Swap back at the end.
+ */
+ if (gdt.address == (unsigned long)fixmap_gdt) {
+ load_direct_gdt(cpu);
+ restore = 1;
+ }
+ asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
+ if (restore)
+ load_fixmap_gdt(cpu);
+}
+#else
static inline void native_load_tr_desc(void)
{
asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
}
+#endif
+
+static inline unsigned long native_store_tr(void)
+{
+ unsigned long tr;
+
+ asm volatile("str %0":"=r" (tr));
+
+ return tr;
+}
+
+static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
+{
+ struct desc_struct *gdt = get_cpu_gdt_rw(cpu);
+ unsigned int i;
+
+ for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
+ gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i];
+}
DECLARE_PER_CPU(bool, __tss_limit_invalid);
static inline void force_reload_TR(void)
{
- struct desc_struct *d = get_cpu_gdt_table(smp_processor_id());
+ struct desc_struct *d = get_current_gdt_rw();
tss_desc tss;
memcpy(&tss, &d[GDT_ENTRY_TSS], sizeof(tss_desc));
@@ -257,44 +358,6 @@ static inline void invalidate_tss_limit(void)
this_cpu_write(__tss_limit_invalid, true);
}
-static inline void native_load_gdt(const struct desc_ptr *dtr)
-{
- asm volatile("lgdt %0"::"m" (*dtr));
-}
-
-static inline void native_load_idt(const struct desc_ptr *dtr)
-{
- asm volatile("lidt %0"::"m" (*dtr));
-}
-
-static inline void native_store_gdt(struct desc_ptr *dtr)
-{
- asm volatile("sgdt %0":"=m" (*dtr));
-}
-
-static inline void native_store_idt(struct desc_ptr *dtr)
-{
- asm volatile("sidt %0":"=m" (*dtr));
-}
-
-static inline unsigned long native_store_tr(void)
-{
- unsigned long tr;
-
- asm volatile("str %0":"=r" (tr));
-
- return tr;
-}
-
-static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
-{
- struct desc_struct *gdt = get_cpu_gdt_table(cpu);
- unsigned int i;
-
- for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
- gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i];
-}
-
/* This intentionally ignores lm, since 32-bit apps don't have that field. */
#define LDT_empty(info) \
((info)->base_addr == 0 && \
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index 85599ad4d024..5dff775af7cd 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -36,6 +36,12 @@
# define DISABLE_OSPKE (1<<(X86_FEATURE_OSPKE & 31))
#endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */
+#ifdef CONFIG_X86_5LEVEL
+# define DISABLE_LA57 0
+#else
+# define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31))
+#endif
+
/*
* Make sure to add features to the correct mask
*/
@@ -55,7 +61,7 @@
#define DISABLED_MASK13 0
#define DISABLED_MASK14 0
#define DISABLED_MASK15 0
-#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE)
+#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57)
#define DISABLED_MASK17 0
#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
deleted file mode 100644
index 67313f3a9874..000000000000
--- a/arch/x86/include/asm/e820.h
+++ /dev/null
@@ -1,73 +0,0 @@
-#ifndef _ASM_X86_E820_H
-#define _ASM_X86_E820_H
-
-/*
- * E820_X_MAX is the maximum size of the extended E820 table. The extended
- * table may contain up to 3 extra E820 entries per possible NUMA node, so we
- * make room for 3 * MAX_NUMNODES possible entries, beyond the standard 128.
- * Also note that E820_X_MAX *must* be defined before we include uapi/asm/e820.h.
- */
-#include <linux/numa.h>
-#define E820_X_MAX (E820MAX + 3 * MAX_NUMNODES)
-
-#include <uapi/asm/e820.h>
-
-#ifndef __ASSEMBLY__
-/* see comment in arch/x86/kernel/e820.c */
-extern struct e820map *e820;
-extern struct e820map *e820_saved;
-
-extern unsigned long pci_mem_start;
-extern int e820_any_mapped(u64 start, u64 end, unsigned type);
-extern int e820_all_mapped(u64 start, u64 end, unsigned type);
-extern void e820_add_region(u64 start, u64 size, int type);
-extern void e820_print_map(char *who);
-extern int
-sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, u32 *pnr_map);
-extern u64 e820_update_range(u64 start, u64 size, unsigned old_type,
- unsigned new_type);
-extern u64 e820_remove_range(u64 start, u64 size, unsigned old_type,
- int checktype);
-extern void update_e820(void);
-extern void e820_setup_gap(void);
-struct setup_data;
-extern void parse_e820_ext(u64 phys_addr, u32 data_len);
-
-#if defined(CONFIG_X86_64) || \
- (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION))
-extern void e820_mark_nosave_regions(unsigned long limit_pfn);
-#else
-static inline void e820_mark_nosave_regions(unsigned long limit_pfn)
-{
-}
-#endif
-
-extern unsigned long e820_end_of_ram_pfn(void);
-extern unsigned long e820_end_of_low_ram_pfn(void);
-extern u64 early_reserve_e820(u64 sizet, u64 align);
-
-void memblock_x86_fill(void);
-void memblock_find_dma_reserve(void);
-
-extern void finish_e820_parsing(void);
-extern void e820_reserve_resources(void);
-extern void e820_reserve_resources_late(void);
-extern void setup_memory_map(void);
-extern char *default_machine_specific_memory_setup(void);
-
-extern void e820_reallocate_tables(void);
-
-/*
- * Returns true iff the specified range [s,e) is completely contained inside
- * the ISA region.
- */
-static inline bool is_ISA_range(u64 s, u64 e)
-{
- return s >= ISA_START_ADDRESS && e <= ISA_END_ADDRESS;
-}
-
-#endif /* __ASSEMBLY__ */
-#include <linux/ioport.h>
-
-#define HIGH_MEMORY (1024*1024)
-#endif /* _ASM_X86_E820_H */
diff --git a/arch/x86/include/asm/e820/api.h b/arch/x86/include/asm/e820/api.h
new file mode 100644
index 000000000000..8e0f8b85b209
--- /dev/null
+++ b/arch/x86/include/asm/e820/api.h
@@ -0,0 +1,50 @@
+#ifndef _ASM_E820_API_H
+#define _ASM_E820_API_H
+
+#include <asm/e820/types.h>
+
+extern struct e820_table *e820_table;
+extern struct e820_table *e820_table_firmware;
+
+extern unsigned long pci_mem_start;
+
+extern bool e820__mapped_any(u64 start, u64 end, enum e820_type type);
+extern bool e820__mapped_all(u64 start, u64 end, enum e820_type type);
+
+extern void e820__range_add (u64 start, u64 size, enum e820_type type);
+extern u64 e820__range_update(u64 start, u64 size, enum e820_type old_type, enum e820_type new_type);
+extern u64 e820__range_remove(u64 start, u64 size, enum e820_type old_type, bool check_type);
+
+extern void e820__print_table(char *who);
+extern int e820__update_table(struct e820_table *table);
+extern void e820__update_table_print(void);
+
+extern unsigned long e820__end_of_ram_pfn(void);
+extern unsigned long e820__end_of_low_ram_pfn(void);
+
+extern u64 e820__memblock_alloc_reserved(u64 size, u64 align);
+extern void e820__memblock_setup(void);
+
+extern void e820__reserve_setup_data(void);
+extern void e820__finish_early_params(void);
+extern void e820__reserve_resources(void);
+extern void e820__reserve_resources_late(void);
+
+extern void e820__memory_setup(void);
+extern void e820__memory_setup_extended(u64 phys_addr, u32 data_len);
+extern char *e820__memory_setup_default(void);
+extern void e820__setup_pci_gap(void);
+
+extern void e820__reallocate_tables(void);
+extern void e820__register_nosave_regions(unsigned long limit_pfn);
+
+/*
+ * Returns true iff the specified range [start,end) is completely contained inside
+ * the ISA region.
+ */
+static inline bool is_ISA_range(u64 start, u64 end)
+{
+ return start >= ISA_START_ADDRESS && end <= ISA_END_ADDRESS;
+}
+
+#endif /* _ASM_E820_API_H */
diff --git a/arch/x86/include/asm/e820/types.h b/arch/x86/include/asm/e820/types.h
new file mode 100644
index 000000000000..4adeed03a9a1
--- /dev/null
+++ b/arch/x86/include/asm/e820/types.h
@@ -0,0 +1,104 @@
+#ifndef _ASM_E820_TYPES_H
+#define _ASM_E820_TYPES_H
+
+#include <uapi/asm/bootparam.h>
+
+/*
+ * These are the E820 types known to the kernel:
+ */
+enum e820_type {
+ E820_TYPE_RAM = 1,
+ E820_TYPE_RESERVED = 2,
+ E820_TYPE_ACPI = 3,
+ E820_TYPE_NVS = 4,
+ E820_TYPE_UNUSABLE = 5,
+ E820_TYPE_PMEM = 7,
+
+ /*
+ * This is a non-standardized way to represent ADR or
+ * NVDIMM regions that persist over a reboot.
+ *
+ * The kernel will ignore their special capabilities
+ * unless the CONFIG_X86_PMEM_LEGACY=y option is set.
+ *
+ * ( Note that older platforms also used 6 for the same
+ * type of memory, but newer versions switched to 12 as
+ * 6 was assigned differently. Some time they will learn... )
+ */
+ E820_TYPE_PRAM = 12,
+
+ /*
+ * Reserved RAM used by the kernel itself if
+ * CONFIG_INTEL_TXT=y is enabled, memory of this type
+ * will be included in the S3 integrity calculation
+ * and so should not include any memory that the BIOS
+ * might alter over the S3 transition:
+ */
+ E820_TYPE_RESERVED_KERN = 128,
+};
+
+/*
+ * A single E820 map entry, describing a memory range of [addr...addr+size-1],
+ * of 'type' memory type:
+ *
+ * (We pack it because there can be thousands of them on large systems.)
+ */
+struct e820_entry {
+ u64 addr;
+ u64 size;
+ enum e820_type type;
+} __attribute__((packed));
+
+/*
+ * The legacy E820 BIOS limits us to 128 (E820_MAX_ENTRIES_ZEROPAGE) nodes
+ * due to the constrained space in the zeropage.
+ *
+ * On large systems we can easily have thousands of nodes with RAM,
+ * which cannot be fit into so few entries - so we have a mechanism
+ * to extend the e820 table size at build-time, via the E820_MAX_ENTRIES
+ * define below.
+ *
+ * ( Those extra entries are enumerated via the EFI memory map, not
+ * via the legacy zeropage mechanism. )
+ *
+ * Size our internal memory map tables to have room for these additional
+ * entries, based on a heuristic calculation: up to three entries per
+ * NUMA node, plus E820_MAX_ENTRIES_ZEROPAGE for some extra space.
+ *
+ * This allows for bootstrap/firmware quirks such as possible duplicate
+ * E820 entries that might need room in the same arrays, prior to the
+ * call to e820__update_table() to remove duplicates. The allowance
+ * of three memory map entries per node is "enough" entries for
+ * the initial hardware platform motivating this mechanism to make
+ * use of additional EFI map entries. Future platforms may want
+ * to allow more than three entries per node or otherwise refine
+ * this size.
+ */
+
+#include <linux/numa.h>
+
+#define E820_MAX_ENTRIES (E820_MAX_ENTRIES_ZEROPAGE + 3*MAX_NUMNODES)
+
+/*
+ * The whole array of E820 entries:
+ */
+struct e820_table {
+ __u32 nr_entries;
+ struct e820_entry entries[E820_MAX_ENTRIES];
+};
+
+/*
+ * Various well-known legacy memory ranges in physical memory:
+ */
+#define ISA_START_ADDRESS 0x000a0000
+#define ISA_END_ADDRESS 0x00100000
+
+#define BIOS_BEGIN 0x000a0000
+#define BIOS_END 0x00100000
+
+#define HIGH_MEMORY 0x00100000
+
+#define BIOS_ROM_BASE 0xffe00000
+#define BIOS_ROM_END 0xffffffff
+
+#endif /* _ASM_E820_TYPES_H */
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 9d49c18b5ea9..e8ab9a46bc68 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -287,14 +287,29 @@ struct task_struct;
#define ARCH_DLINFO_IA32 \
do { \
- if (vdso32_enabled) { \
+ if (VDSO_CURRENT_BASE) { \
NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY); \
NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE); \
} \
} while (0)
+/*
+ * True on X86_32 or when emulating IA32 on X86_64
+ */
+static inline int mmap_is_ia32(void)
+{
+ return IS_ENABLED(CONFIG_X86_32) ||
+ (IS_ENABLED(CONFIG_COMPAT) &&
+ test_thread_flag(TIF_ADDR32));
+}
+
+extern unsigned long tasksize_32bit(void);
+extern unsigned long tasksize_64bit(void);
+extern unsigned long get_mmap_base(int is_legacy);
+
#ifdef CONFIG_X86_32
+#define __STACK_RND_MASK(is32bit) (0x7ff)
#define STACK_RND_MASK (0x7ff)
#define ARCH_DLINFO ARCH_DLINFO_IA32
@@ -304,7 +319,8 @@ do { \
#else /* CONFIG_X86_32 */
/* 1GB for 64bit, 8MB for 32bit */
-#define STACK_RND_MASK (test_thread_flag(TIF_ADDR32) ? 0x7ff : 0x3fffff)
+#define __STACK_RND_MASK(is32bit) ((is32bit) ? 0x7ff : 0x3fffff)
+#define STACK_RND_MASK __STACK_RND_MASK(mmap_is_ia32())
#define ARCH_DLINFO \
do { \
@@ -348,16 +364,6 @@ extern int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
int uses_interp);
#define compat_arch_setup_additional_pages compat_arch_setup_additional_pages
-/*
- * True on X86_32 or when emulating IA32 on X86_64
- */
-static inline int mmap_is_ia32(void)
-{
- return IS_ENABLED(CONFIG_X86_32) ||
- (IS_ENABLED(CONFIG_COMPAT) &&
- test_thread_flag(TIF_ADDR32));
-}
-
/* Do not change the values. See get_align_mask() */
enum align_flags {
ALIGN_VA_32 = BIT(0),
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 8554f960e21b..b65155cc3760 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -100,6 +100,10 @@ enum fixed_addresses {
#ifdef CONFIG_X86_INTEL_MID
FIX_LNW_VRTC,
#endif
+ /* Fixmap entries to remap the GDTs, one per processor. */
+ FIX_GDT_REMAP_BEGIN,
+ FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1,
+
__end_of_permanent_fixed_addresses,
/*
diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h
index 156cd5d18d2a..1d268098ac2e 100644
--- a/arch/x86/include/asm/gart.h
+++ b/arch/x86/include/asm/gart.h
@@ -1,7 +1,7 @@
#ifndef _ASM_X86_GART_H
#define _ASM_X86_GART_H
-#include <asm/e820.h>
+#include <asm/e820/api.h>
extern void set_up_gart_resume(u32, u32);
@@ -97,7 +97,7 @@ static inline int aperture_valid(u64 aper_base, u32 aper_size, u32 min_size)
printk(KERN_INFO "Aperture beyond 4GB. Ignoring.\n");
return 0;
}
- if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) {
+ if (e820__mapped_any(aper_base, aper_base + aper_size, E820_TYPE_RAM)) {
printk(KERN_INFO "Aperture pointing to e820 RAM. Ignoring.\n");
return 0;
}
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 9814db42b790..75b748a1deb8 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -12,6 +12,7 @@
*/
#define INTEL_FAM6_CORE_YONAH 0x0E
+
#define INTEL_FAM6_CORE2_MEROM 0x0F
#define INTEL_FAM6_CORE2_MEROM_L 0x16
#define INTEL_FAM6_CORE2_PENRYN 0x17
@@ -21,6 +22,7 @@
#define INTEL_FAM6_NEHALEM_G 0x1F /* Auburndale / Havendale */
#define INTEL_FAM6_NEHALEM_EP 0x1A
#define INTEL_FAM6_NEHALEM_EX 0x2E
+
#define INTEL_FAM6_WESTMERE 0x25
#define INTEL_FAM6_WESTMERE_EP 0x2C
#define INTEL_FAM6_WESTMERE_EX 0x2F
@@ -36,9 +38,9 @@
#define INTEL_FAM6_HASWELL_GT3E 0x46
#define INTEL_FAM6_BROADWELL_CORE 0x3D
-#define INTEL_FAM6_BROADWELL_XEON_D 0x56
#define INTEL_FAM6_BROADWELL_GT3E 0x47
#define INTEL_FAM6_BROADWELL_X 0x4F
+#define INTEL_FAM6_BROADWELL_XEON_D 0x56
#define INTEL_FAM6_SKYLAKE_MOBILE 0x4E
#define INTEL_FAM6_SKYLAKE_DESKTOP 0x5E
@@ -59,8 +61,8 @@
#define INTEL_FAM6_ATOM_MERRIFIELD 0x4A /* Tangier */
#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Anniedale */
#define INTEL_FAM6_ATOM_GOLDMONT 0x5C
-#define INTEL_FAM6_ATOM_GEMINI_LAKE 0x7A
#define INTEL_FAM6_ATOM_DENVERTON 0x5F /* Goldmont Microserver */
+#define INTEL_FAM6_ATOM_GEMINI_LAKE 0x7A
/* Xeon Phi */
diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index 0d64397cee58..597dc4995678 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -12,6 +12,7 @@
#define IA32_L3_QOS_CFG 0xc81
#define IA32_L3_CBM_BASE 0xc90
#define IA32_L2_CBM_BASE 0xd10
+#define IA32_MBA_THRTL_BASE 0xd50
#define L3_QOS_CDP_ENABLE 0x01ULL
@@ -37,23 +38,30 @@ struct rdtgroup {
/* rdtgroup.flags */
#define RDT_DELETED 1
+/* rftype.flags */
+#define RFTYPE_FLAGS_CPUS_LIST 1
+
/* List of all resource groups */
extern struct list_head rdt_all_groups;
+extern int max_name_width, max_data_width;
+
int __init rdtgroup_init(void);
/**
* struct rftype - describe each file in the resctrl file system
- * @name: file name
- * @mode: access mode
- * @kf_ops: operations
- * @seq_show: show content of the file
- * @write: write to the file
+ * @name: File name
+ * @mode: Access mode
+ * @kf_ops: File operations
+ * @flags: File specific RFTYPE_FLAGS_* flags
+ * @seq_show: Show content of the file
+ * @write: Write to the file
*/
struct rftype {
char *name;
umode_t mode;
struct kernfs_ops *kf_ops;
+ unsigned long flags;
int (*seq_show)(struct kernfs_open_file *of,
struct seq_file *sf, void *v);
@@ -67,54 +75,21 @@ struct rftype {
};
/**
- * struct rdt_resource - attributes of an RDT resource
- * @enabled: Is this feature enabled on this machine
- * @capable: Is this feature available on this machine
- * @name: Name to use in "schemata" file
- * @num_closid: Number of CLOSIDs available
- * @max_cbm: Largest Cache Bit Mask allowed
- * @min_cbm_bits: Minimum number of consecutive bits to be set
- * in a cache bit mask
- * @domains: All domains for this resource
- * @num_domains: Number of domains active
- * @msr_base: Base MSR address for CBMs
- * @tmp_cbms: Scratch space when updating schemata
- * @num_tmp_cbms: Number of CBMs in tmp_cbms
- * @cache_level: Which cache level defines scope of this domain
- * @cbm_idx_multi: Multiplier of CBM index
- * @cbm_idx_offset: Offset of CBM index. CBM index is computed by:
- * closid * cbm_idx_multi + cbm_idx_offset
- */
-struct rdt_resource {
- bool enabled;
- bool capable;
- char *name;
- int num_closid;
- int cbm_len;
- int min_cbm_bits;
- u32 max_cbm;
- struct list_head domains;
- int num_domains;
- int msr_base;
- u32 *tmp_cbms;
- int num_tmp_cbms;
- int cache_level;
- int cbm_idx_multi;
- int cbm_idx_offset;
-};
-
-/**
* struct rdt_domain - group of cpus sharing an RDT resource
* @list: all instances of this resource
* @id: unique id for this instance
* @cpu_mask: which cpus share this resource
- * @cbm: array of cache bit masks (indexed by CLOSID)
+ * @ctrl_val: array of cache or mem ctrl values (indexed by CLOSID)
+ * @new_ctrl: new ctrl value to be loaded
+ * @have_new_ctrl: did user provide new_ctrl for this domain
*/
struct rdt_domain {
struct list_head list;
int id;
struct cpumask cpu_mask;
- u32 *cbm;
+ u32 *ctrl_val;
+ u32 new_ctrl;
+ bool have_new_ctrl;
};
/**
@@ -129,6 +104,83 @@ struct msr_param {
int high;
};
+/**
+ * struct rdt_cache - Cache allocation related data
+ * @cbm_len: Length of the cache bit mask
+ * @min_cbm_bits: Minimum number of consecutive bits to be set
+ * @cbm_idx_mult: Multiplier of CBM index
+ * @cbm_idx_offset: Offset of CBM index. CBM index is computed by:
+ * closid * cbm_idx_multi + cbm_idx_offset
+ * in a cache bit mask
+ */
+struct rdt_cache {
+ unsigned int cbm_len;
+ unsigned int min_cbm_bits;
+ unsigned int cbm_idx_mult;
+ unsigned int cbm_idx_offset;
+};
+
+/**
+ * struct rdt_membw - Memory bandwidth allocation related data
+ * @max_delay: Max throttle delay. Delay is the hardware
+ * representation for memory bandwidth.
+ * @min_bw: Minimum memory bandwidth percentage user can request
+ * @bw_gran: Granularity at which the memory bandwidth is allocated
+ * @delay_linear: True if memory B/W delay is in linear scale
+ * @mb_map: Mapping of memory B/W percentage to memory B/W delay
+ */
+struct rdt_membw {
+ u32 max_delay;
+ u32 min_bw;
+ u32 bw_gran;
+ u32 delay_linear;
+ u32 *mb_map;
+};
+
+/**
+ * struct rdt_resource - attributes of an RDT resource
+ * @enabled: Is this feature enabled on this machine
+ * @capable: Is this feature available on this machine
+ * @name: Name to use in "schemata" file
+ * @num_closid: Number of CLOSIDs available
+ * @cache_level: Which cache level defines scope of this resource
+ * @default_ctrl: Specifies default cache cbm or memory B/W percent.
+ * @msr_base: Base MSR address for CBMs
+ * @msr_update: Function pointer to update QOS MSRs
+ * @data_width: Character width of data when displaying
+ * @domains: All domains for this resource
+ * @cache: Cache allocation related data
+ * @info_files: resctrl info files for the resource
+ * @nr_info_files: Number of info files
+ * @format_str: Per resource format string to show domain value
+ * @parse_ctrlval: Per resource function pointer to parse control values
+ */
+struct rdt_resource {
+ bool enabled;
+ bool capable;
+ char *name;
+ int num_closid;
+ int cache_level;
+ u32 default_ctrl;
+ unsigned int msr_base;
+ void (*msr_update) (struct rdt_domain *d, struct msr_param *m,
+ struct rdt_resource *r);
+ int data_width;
+ struct list_head domains;
+ struct rdt_cache cache;
+ struct rdt_membw membw;
+ struct rftype *info_files;
+ int nr_info_files;
+ const char *format_str;
+ int (*parse_ctrlval) (char *buf, struct rdt_resource *r,
+ struct rdt_domain *d);
+};
+
+void rdt_get_cache_infofile(struct rdt_resource *r);
+void rdt_get_mba_infofile(struct rdt_resource *r);
+int parse_cbm(char *buf, struct rdt_resource *r, struct rdt_domain *d);
+int parse_bw(char *buf, struct rdt_resource *r, struct rdt_domain *d);
+
extern struct mutex rdtgroup_mutex;
extern struct rdt_resource rdt_resources_all[];
@@ -142,6 +194,7 @@ enum {
RDT_RESOURCE_L3DATA,
RDT_RESOURCE_L3CODE,
RDT_RESOURCE_L2,
+ RDT_RESOURCE_MBA,
/* Must be the last */
RDT_NUM_RESOURCES,
@@ -149,7 +202,7 @@ enum {
#define for_each_capable_rdt_resource(r) \
for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
- r++) \
+ r++) \
if (r->capable)
#define for_each_enabled_rdt_resource(r) \
@@ -165,8 +218,16 @@ union cpuid_0x10_1_eax {
unsigned int full;
};
-/* CPUID.(EAX=10H, ECX=ResID=1).EDX */
-union cpuid_0x10_1_edx {
+/* CPUID.(EAX=10H, ECX=ResID=3).EAX */
+union cpuid_0x10_3_eax {
+ struct {
+ unsigned int max_delay:12;
+ } split;
+ unsigned int full;
+};
+
+/* CPUID.(EAX=10H, ECX=ResID).EDX */
+union cpuid_0x10_x_edx {
struct {
unsigned int cos_max:16;
} split;
@@ -175,7 +236,7 @@ union cpuid_0x10_1_edx {
DECLARE_PER_CPU_READ_MOSTLY(int, cpu_closid);
-void rdt_cbm_update(void *arg);
+void rdt_ctrl_update(void *arg);
struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn);
void rdtgroup_kn_unlock(struct kernfs_node *kn);
ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
diff --git a/arch/x86/include/asm/kasan.h b/arch/x86/include/asm/kasan.h
index 1410b567ecde..f527b02a0ee3 100644
--- a/arch/x86/include/asm/kasan.h
+++ b/arch/x86/include/asm/kasan.h
@@ -11,9 +11,12 @@
* 'kernel address space start' >> KASAN_SHADOW_SCALE_SHIFT
*/
#define KASAN_SHADOW_START (KASAN_SHADOW_OFFSET + \
- (0xffff800000000000ULL >> 3))
-/* 47 bits for kernel address -> (47 - 3) bits for shadow */
-#define KASAN_SHADOW_END (KASAN_SHADOW_START + (1ULL << (47 - 3)))
+ ((-1UL << __VIRTUAL_MASK_SHIFT) >> 3))
+/*
+ * 47 bits for kernel address -> (47 - 3) bits for shadow
+ * 56 bits for kernel address -> (56 - 3) bits for shadow
+ */
+#define KASAN_SHADOW_END (KASAN_SHADOW_START + (1ULL << (__VIRTUAL_MASK_SHIFT - 3)))
#ifndef __ASSEMBLY__
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 282630e4c6ea..70ef205489f0 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -164,6 +164,7 @@ struct kimage_arch {
};
#else
struct kimage_arch {
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 200581691c6e..34b984c60790 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -72,14 +72,13 @@ struct arch_specific_insn {
/* copy of the original instruction */
kprobe_opcode_t *insn;
/*
- * boostable = -1: This instruction type is not boostable.
- * boostable = 0: This instruction type is boostable.
- * boostable = 1: This instruction has been boosted: we have
+ * boostable = false: This instruction type is not boostable.
+ * boostable = true: This instruction has been boosted: we have
* added a relative jump after the instruction copy in insn,
* so no single-step and fixup are needed (unless there's
* a post_handler or break_handler).
*/
- int boostable;
+ bool boostable;
bool if_modifier;
};
diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h
index d74747b031ec..c4eda791f877 100644
--- a/arch/x86/include/asm/kvm_page_track.h
+++ b/arch/x86/include/asm/kvm_page_track.h
@@ -46,6 +46,7 @@ struct kvm_page_track_notifier_node {
};
void kvm_page_track_init(struct kvm *kvm);
+void kvm_page_track_cleanup(struct kvm *kvm);
void kvm_page_track_free_memslot(struct kvm_memory_slot *free,
struct kvm_memory_slot *dont);
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index e63873683d4a..4fd5195deed0 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -128,7 +128,7 @@
* debugging tools. Each entry is only valid when its finished flag
* is set.
*/
-struct mce_log {
+struct mce_log_buffer {
char signature[12]; /* "MACHINECHECK" */
unsigned len; /* = MCE_LOG_LEN */
unsigned next;
@@ -191,10 +191,12 @@ extern struct mca_config mca_cfg;
extern struct mca_msr_regs msr_ops;
enum mce_notifier_prios {
- MCE_PRIO_SRAO = INT_MAX,
- MCE_PRIO_EXTLOG = INT_MAX - 1,
- MCE_PRIO_NFIT = INT_MAX - 2,
- MCE_PRIO_EDAC = INT_MAX - 3,
+ MCE_PRIO_FIRST = INT_MAX,
+ MCE_PRIO_SRAO = INT_MAX - 1,
+ MCE_PRIO_EXTLOG = INT_MAX - 2,
+ MCE_PRIO_NFIT = INT_MAX - 3,
+ MCE_PRIO_EDAC = INT_MAX - 4,
+ MCE_PRIO_MCELOG = 1,
MCE_PRIO_LOWEST = 0,
};
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 306c7e12af55..68b329d77b3a 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -268,8 +268,4 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
return __pkru_allows_pkey(vma_pkey(vma), write);
}
-static inline bool arch_pte_access_permitted(pte_t pte, bool write)
-{
- return __pkru_allows_pkey(pte_flags_pkey(pte_flags(pte)), write);
-}
#endif /* _ASM_X86_MMU_CONTEXT_H */
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 32007041ef8c..831eb7895535 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -64,7 +64,7 @@ static inline void find_smp_config(void)
}
#ifdef CONFIG_X86_MPPARSE
-extern void early_reserve_e820_mpc_new(void);
+extern void e820__memblock_alloc_reserved_mpc_new(void);
extern int enable_update_mptable;
extern int default_mpc_apic_id(struct mpc_cpu *m);
extern void default_smp_read_mpc_oem(struct mpc_table *mpc);
@@ -76,7 +76,7 @@ extern void default_mpc_oem_bus_info(struct mpc_bus *m, char *str);
extern void default_find_smp_config(void);
extern void default_get_smp_config(unsigned int early);
#else
-static inline void early_reserve_e820_mpc_new(void) { }
+static inline void e820__memblock_alloc_reserved_mpc_new(void) { }
#define enable_update_mptable 0
#define default_mpc_apic_id NULL
#define default_smp_read_mpc_oem NULL
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 7c9c895432a9..fba100713924 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -176,4 +176,58 @@ void hyperv_report_panic(struct pt_regs *regs);
bool hv_is_hypercall_page_setup(void);
void hyperv_cleanup(void);
#endif
+#ifdef CONFIG_HYPERV_TSCPAGE
+struct ms_hyperv_tsc_page *hv_get_tsc_page(void);
+static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
+{
+ u64 scale, offset, cur_tsc;
+ u32 sequence;
+
+ /*
+ * The protocol for reading Hyper-V TSC page is specified in Hypervisor
+ * Top-Level Functional Specification ver. 3.0 and above. To get the
+ * reference time we must do the following:
+ * - READ ReferenceTscSequence
+ * A special '0' value indicates the time source is unreliable and we
+ * need to use something else. The currently published specification
+ * versions (up to 4.0b) contain a mistake and wrongly claim '-1'
+ * instead of '0' as the special value, see commit c35b82ef0294.
+ * - ReferenceTime =
+ * ((RDTSC() * ReferenceTscScale) >> 64) + ReferenceTscOffset
+ * - READ ReferenceTscSequence again. In case its value has changed
+ * since our first reading we need to discard ReferenceTime and repeat
+ * the whole sequence as the hypervisor was updating the page in
+ * between.
+ */
+ do {
+ sequence = READ_ONCE(tsc_pg->tsc_sequence);
+ if (!sequence)
+ return U64_MAX;
+ /*
+ * Make sure we read sequence before we read other values from
+ * TSC page.
+ */
+ smp_rmb();
+
+ scale = READ_ONCE(tsc_pg->tsc_scale);
+ offset = READ_ONCE(tsc_pg->tsc_offset);
+ cur_tsc = rdtsc_ordered();
+
+ /*
+ * Make sure we read sequence after we read all other values
+ * from TSC page.
+ */
+ smp_rmb();
+
+ } while (READ_ONCE(tsc_pg->tsc_sequence) != sequence);
+
+ return mul_u64_u64_shr(cur_tsc, scale, 64) + offset;
+}
+
+#else
+static inline struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
+{
+ return NULL;
+}
+#endif
#endif
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index d8b5f8ab8ef9..673f9ac50f6d 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -45,6 +45,8 @@
#define MSR_IA32_PERFCTR1 0x000000c2
#define MSR_FSB_FREQ 0x000000cd
#define MSR_PLATFORM_INFO 0x000000ce
+#define MSR_PLATFORM_INFO_CPUID_FAULT_BIT 31
+#define MSR_PLATFORM_INFO_CPUID_FAULT BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT)
#define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2
#define NHM_C3_AUTO_DEMOTE (1UL << 25)
@@ -127,6 +129,7 @@
/* DEBUGCTLMSR bits (others vary by model): */
#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */
+#define DEBUGCTLMSR_BTF_SHIFT 1
#define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */
#define DEBUGCTLMSR_TR (1UL << 6)
#define DEBUGCTLMSR_BTS (1UL << 7)
@@ -552,10 +555,12 @@
#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT 39
#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT)
-/* MISC_FEATURE_ENABLES non-architectural features */
-#define MSR_MISC_FEATURE_ENABLES 0x00000140
+/* MISC_FEATURES_ENABLES non-architectural features */
+#define MSR_MISC_FEATURES_ENABLES 0x00000140
-#define MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT 1
+#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT 0
+#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT BIT_ULL(MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT)
+#define MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT 1
#define MSR_IA32_TSC_DEADLINE 0x000006E0
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index b3bebf9e5746..b4a0d43248cf 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -4,6 +4,7 @@
#include <asm/page_64_types.h>
#ifndef __ASSEMBLY__
+#include <asm/alternative.h>
/* duplicated to the one in bootmem.h */
extern unsigned long max_pfn;
@@ -34,7 +35,20 @@ extern unsigned long __phys_addr_symbol(unsigned long);
#define pfn_valid(pfn) ((pfn) < max_pfn)
#endif
-void clear_page(void *page);
+void clear_page_orig(void *page);
+void clear_page_rep(void *page);
+void clear_page_erms(void *page);
+
+static inline void clear_page(void *page)
+{
+ alternative_call_2(clear_page_orig,
+ clear_page_rep, X86_FEATURE_REP_GOOD,
+ clear_page_erms, X86_FEATURE_ERMS,
+ "=D" (page),
+ "0" (page)
+ : "memory", "rax", "rcx");
+}
+
void copy_page(void *to, void *from);
#endif /* !__ASSEMBLY__ */
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 9215e0527647..3f5f08b010d0 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -36,7 +36,12 @@
* hypervisor to fit. Choosing 16 slots here is arbitrary, but it's
* what Xen requires.
*/
+#ifdef CONFIG_X86_5LEVEL
+#define __PAGE_OFFSET_BASE _AC(0xff10000000000000, UL)
+#else
#define __PAGE_OFFSET_BASE _AC(0xffff880000000000, UL)
+#endif
+
#ifdef CONFIG_RANDOMIZE_MEMORY
#define __PAGE_OFFSET page_offset_base
#else
@@ -46,8 +51,13 @@
#define __START_KERNEL_map _AC(0xffffffff80000000, UL)
/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
+#ifdef CONFIG_X86_5LEVEL
+#define __PHYSICAL_MASK_SHIFT 52
+#define __VIRTUAL_MASK_SHIFT 56
+#else
#define __PHYSICAL_MASK_SHIFT 46
#define __VIRTUAL_MASK_SHIFT 47
+#endif
/*
* Kernel image size is limited to 1GiB due to the fixmap living in the
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 0489884fdc44..55fa56fe4e45 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -357,6 +357,16 @@ static inline void paravirt_release_pud(unsigned long pfn)
PVOP_VCALL1(pv_mmu_ops.release_pud, pfn);
}
+static inline void paravirt_alloc_p4d(struct mm_struct *mm, unsigned long pfn)
+{
+ PVOP_VCALL2(pv_mmu_ops.alloc_p4d, mm, pfn);
+}
+
+static inline void paravirt_release_p4d(unsigned long pfn)
+{
+ PVOP_VCALL1(pv_mmu_ops.release_p4d, pfn);
+}
+
static inline void pte_update(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
@@ -536,7 +546,7 @@ static inline void set_pud(pud_t *pudp, pud_t pud)
PVOP_VCALL2(pv_mmu_ops.set_pud, pudp,
val);
}
-#if CONFIG_PGTABLE_LEVELS == 4
+#if CONFIG_PGTABLE_LEVELS >= 4
static inline pud_t __pud(pudval_t val)
{
pudval_t ret;
@@ -565,26 +575,54 @@ static inline pudval_t pud_val(pud_t pud)
return ret;
}
-static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
+static inline void pud_clear(pud_t *pudp)
{
- pgdval_t val = native_pgd_val(pgd);
+ set_pud(pudp, __pud(0));
+}
- if (sizeof(pgdval_t) > sizeof(long))
- PVOP_VCALL3(pv_mmu_ops.set_pgd, pgdp,
+static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
+{
+ p4dval_t val = native_p4d_val(p4d);
+
+ if (sizeof(p4dval_t) > sizeof(long))
+ PVOP_VCALL3(pv_mmu_ops.set_p4d, p4dp,
val, (u64)val >> 32);
else
- PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp,
+ PVOP_VCALL2(pv_mmu_ops.set_p4d, p4dp,
val);
}
+#if CONFIG_PGTABLE_LEVELS >= 5
+
+static inline p4d_t __p4d(p4dval_t val)
+{
+ p4dval_t ret = PVOP_CALLEE1(p4dval_t, pv_mmu_ops.make_p4d, val);
+
+ return (p4d_t) { ret };
+}
+
+static inline p4dval_t p4d_val(p4d_t p4d)
+{
+ return PVOP_CALLEE1(p4dval_t, pv_mmu_ops.p4d_val, p4d.p4d);
+}
+
+static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+ pgdval_t val = native_pgd_val(pgd);
+
+ PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp, val);
+}
+
static inline void pgd_clear(pgd_t *pgdp)
{
set_pgd(pgdp, __pgd(0));
}
-static inline void pud_clear(pud_t *pudp)
+#endif /* CONFIG_PGTABLE_LEVELS == 5 */
+
+static inline void p4d_clear(p4d_t *p4dp)
{
- set_pud(pudp, __pud(0));
+ set_p4d(p4dp, __p4d(0));
}
#endif /* CONFIG_PGTABLE_LEVELS == 4 */
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index b060f962d581..7465d6fe336f 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -238,9 +238,11 @@ struct pv_mmu_ops {
void (*alloc_pte)(struct mm_struct *mm, unsigned long pfn);
void (*alloc_pmd)(struct mm_struct *mm, unsigned long pfn);
void (*alloc_pud)(struct mm_struct *mm, unsigned long pfn);
+ void (*alloc_p4d)(struct mm_struct *mm, unsigned long pfn);
void (*release_pte)(unsigned long pfn);
void (*release_pmd)(unsigned long pfn);
void (*release_pud)(unsigned long pfn);
+ void (*release_p4d)(unsigned long pfn);
/* Pagetable manipulation functions */
void (*set_pte)(pte_t *ptep, pte_t pteval);
@@ -279,12 +281,21 @@ struct pv_mmu_ops {
struct paravirt_callee_save pmd_val;
struct paravirt_callee_save make_pmd;
-#if CONFIG_PGTABLE_LEVELS == 4
+#if CONFIG_PGTABLE_LEVELS >= 4
struct paravirt_callee_save pud_val;
struct paravirt_callee_save make_pud;
- void (*set_pgd)(pgd_t *pudp, pgd_t pgdval);
-#endif /* CONFIG_PGTABLE_LEVELS == 4 */
+ void (*set_p4d)(p4d_t *p4dp, p4d_t p4dval);
+
+#if CONFIG_PGTABLE_LEVELS >= 5
+ struct paravirt_callee_save p4d_val;
+ struct paravirt_callee_save make_p4d;
+
+ void (*set_pgd)(pgd_t *pgdp, pgd_t pgdval);
+#endif /* CONFIG_PGTABLE_LEVELS >= 5 */
+
+#endif /* CONFIG_PGTABLE_LEVELS >= 4 */
+
#endif /* CONFIG_PGTABLE_LEVELS >= 3 */
struct pv_lazy_ops lazy_mode;
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index d08eacd298c2..9f1b21f372fe 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -4,6 +4,8 @@
* (c) 1999 Martin Mares <mj@ucw.cz>
*/
+#include <linux/ioport.h>
+
#undef DEBUG
#ifdef DEBUG
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index b6d425999f99..b2d0cd8288aa 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -17,9 +17,11 @@ static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn) {
static inline void paravirt_alloc_pmd_clone(unsigned long pfn, unsigned long clonepfn,
unsigned long start, unsigned long count) {}
static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned long pfn) {}
+static inline void paravirt_alloc_p4d(struct mm_struct *mm, unsigned long pfn) {}
static inline void paravirt_release_pte(unsigned long pfn) {}
static inline void paravirt_release_pmd(unsigned long pfn) {}
static inline void paravirt_release_pud(unsigned long pfn) {}
+static inline void paravirt_release_p4d(unsigned long pfn) {}
#endif
/*
@@ -121,10 +123,10 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
#endif /* CONFIG_X86_PAE */
#if CONFIG_PGTABLE_LEVELS > 3
-static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)
{
paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT);
- set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pud)));
+ set_p4d(p4d, __p4d(_PAGE_TABLE | __pa(pud)));
}
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
@@ -150,6 +152,37 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
___pud_free_tlb(tlb, pud);
}
+#if CONFIG_PGTABLE_LEVELS > 4
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d)
+{
+ paravirt_alloc_p4d(mm, __pa(p4d) >> PAGE_SHIFT);
+ set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(p4d)));
+}
+
+static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+ gfp_t gfp = GFP_KERNEL_ACCOUNT;
+
+ if (mm == &init_mm)
+ gfp &= ~__GFP_ACCOUNT;
+ return (p4d_t *)get_zeroed_page(gfp);
+}
+
+static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
+{
+ BUG_ON((unsigned long)p4d & (PAGE_SIZE-1));
+ free_page((unsigned long)p4d);
+}
+
+extern void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d);
+
+static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
+ unsigned long address)
+{
+ ___p4d_free_tlb(tlb, p4d);
+}
+
+#endif /* CONFIG_PGTABLE_LEVELS > 4 */
#endif /* CONFIG_PGTABLE_LEVELS > 3 */
#endif /* CONFIG_PGTABLE_LEVELS > 2 */
diff --git a/arch/x86/include/asm/pgtable-2level_types.h b/arch/x86/include/asm/pgtable-2level_types.h
index 392576433e77..373ab1de909f 100644
--- a/arch/x86/include/asm/pgtable-2level_types.h
+++ b/arch/x86/include/asm/pgtable-2level_types.h
@@ -7,6 +7,7 @@
typedef unsigned long pteval_t;
typedef unsigned long pmdval_t;
typedef unsigned long pudval_t;
+typedef unsigned long p4dval_t;
typedef unsigned long pgdval_t;
typedef unsigned long pgprotval_t;
diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h
index bcc89625ebe5..b8a4341faafa 100644
--- a/arch/x86/include/asm/pgtable-3level_types.h
+++ b/arch/x86/include/asm/pgtable-3level_types.h
@@ -7,6 +7,7 @@
typedef u64 pteval_t;
typedef u64 pmdval_t;
typedef u64 pudval_t;
+typedef u64 p4dval_t;
typedef u64 pgdval_t;
typedef u64 pgprotval_t;
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 585ee0d42d18..f5af95a0c6b8 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -2,8 +2,6 @@
#define _ASM_X86_PGTABLE_H
#include <asm/page.h>
-#include <asm/e820.h>
-
#include <asm/pgtable_types.h>
/*
@@ -53,11 +51,19 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page);
#define set_pmd(pmdp, pmd) native_set_pmd(pmdp, pmd)
-#ifndef __PAGETABLE_PUD_FOLDED
+#ifndef __PAGETABLE_P4D_FOLDED
#define set_pgd(pgdp, pgd) native_set_pgd(pgdp, pgd)
#define pgd_clear(pgd) native_pgd_clear(pgd)
#endif
+#ifndef set_p4d
+# define set_p4d(p4dp, p4d) native_set_p4d(p4dp, p4d)
+#endif
+
+#ifndef __PAGETABLE_PUD_FOLDED
+#define p4d_clear(p4d) native_p4d_clear(p4d)
+#endif
+
#ifndef set_pud
# define set_pud(pudp, pud) native_set_pud(pudp, pud)
#endif
@@ -74,6 +80,11 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page);
#define pgd_val(x) native_pgd_val(x)
#define __pgd(x) native_make_pgd(x)
+#ifndef __PAGETABLE_P4D_FOLDED
+#define p4d_val(x) native_p4d_val(x)
+#define __p4d(x) native_make_p4d(x)
+#endif
+
#ifndef __PAGETABLE_PUD_FOLDED
#define pud_val(x) native_pud_val(x)
#define __pud(x) native_make_pud(x)
@@ -179,6 +190,17 @@ static inline unsigned long pud_pfn(pud_t pud)
return (pud_val(pud) & pud_pfn_mask(pud)) >> PAGE_SHIFT;
}
+static inline unsigned long p4d_pfn(p4d_t p4d)
+{
+ return (p4d_val(p4d) & p4d_pfn_mask(p4d)) >> PAGE_SHIFT;
+}
+
+static inline int p4d_large(p4d_t p4d)
+{
+ /* No 512 GiB pages yet */
+ return 0;
+}
+
#define pte_page(pte) pfn_to_page(pte_pfn(pte))
static inline int pmd_large(pmd_t pte)
@@ -538,6 +560,7 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
#define pte_pgprot(x) __pgprot(pte_flags(x))
#define pmd_pgprot(x) __pgprot(pmd_flags(x))
#define pud_pgprot(x) __pgprot(pud_flags(x))
+#define p4d_pgprot(x) __pgprot(p4d_flags(x))
#define canon_pgprot(p) __pgprot(massage_pgprot(p))
@@ -587,6 +610,7 @@ pte_t *populate_extra_pte(unsigned long vaddr);
#include <linux/mm_types.h>
#include <linux/mmdebug.h>
#include <linux/log2.h>
+#include <asm/fixmap.h>
static inline int pte_none(pte_t pte)
{
@@ -770,7 +794,52 @@ static inline int pud_large(pud_t pud)
}
#endif /* CONFIG_PGTABLE_LEVELS > 2 */
+static inline unsigned long pud_index(unsigned long address)
+{
+ return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
+}
+
#if CONFIG_PGTABLE_LEVELS > 3
+static inline int p4d_none(p4d_t p4d)
+{
+ return (native_p4d_val(p4d) & ~(_PAGE_KNL_ERRATUM_MASK)) == 0;
+}
+
+static inline int p4d_present(p4d_t p4d)
+{
+ return p4d_flags(p4d) & _PAGE_PRESENT;
+}
+
+static inline unsigned long p4d_page_vaddr(p4d_t p4d)
+{
+ return (unsigned long)__va(p4d_val(p4d) & p4d_pfn_mask(p4d));
+}
+
+/*
+ * Currently stuck as a macro due to indirect forward reference to
+ * linux/mmzone.h's __section_mem_map_addr() definition:
+ */
+#define p4d_page(p4d) \
+ pfn_to_page((p4d_val(p4d) & p4d_pfn_mask(p4d)) >> PAGE_SHIFT)
+
+/* Find an entry in the third-level page table.. */
+static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
+{
+ return (pud_t *)p4d_page_vaddr(*p4d) + pud_index(address);
+}
+
+static inline int p4d_bad(p4d_t p4d)
+{
+ return (p4d_flags(p4d) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0;
+}
+#endif /* CONFIG_PGTABLE_LEVELS > 3 */
+
+static inline unsigned long p4d_index(unsigned long address)
+{
+ return (address >> P4D_SHIFT) & (PTRS_PER_P4D - 1);
+}
+
+#if CONFIG_PGTABLE_LEVELS > 4
static inline int pgd_present(pgd_t pgd)
{
return pgd_flags(pgd) & _PAGE_PRESENT;
@@ -788,14 +857,9 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd)
#define pgd_page(pgd) pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT)
/* to find an entry in a page-table-directory. */
-static inline unsigned long pud_index(unsigned long address)
-{
- return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
-}
-
-static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address)
+static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
{
- return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(address);
+ return (p4d_t *)pgd_page_vaddr(*pgd) + p4d_index(address);
}
static inline int pgd_bad(pgd_t pgd)
@@ -813,7 +877,7 @@ static inline int pgd_none(pgd_t pgd)
*/
return !native_pgd_val(pgd);
}
-#endif /* CONFIG_PGTABLE_LEVELS > 3 */
+#endif /* CONFIG_PGTABLE_LEVELS > 4 */
#endif /* __ASSEMBLY__ */
@@ -845,6 +909,7 @@ static inline int pgd_none(pgd_t pgd)
extern int direct_gbpages;
void init_mem_mapping(void);
void early_alloc_pgt_buf(void);
+extern void memblock_find_dma_reserve(void);
#ifdef CONFIG_X86_64
/* Realmode trampoline initialization. */
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index fbc73360aea0..bfab55675c16 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -14,7 +14,6 @@
*/
#ifndef __ASSEMBLY__
#include <asm/processor.h>
-#include <asm/fixmap.h>
#include <linux/threads.h>
#include <asm/paravirt.h>
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 73c7ccc38912..9991224f6238 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -35,15 +35,22 @@ extern void paging_init(void);
#define pud_ERROR(e) \
pr_err("%s:%d: bad pud %p(%016lx)\n", \
__FILE__, __LINE__, &(e), pud_val(e))
+
+#if CONFIG_PGTABLE_LEVELS >= 5
+#define p4d_ERROR(e) \
+ pr_err("%s:%d: bad p4d %p(%016lx)\n", \
+ __FILE__, __LINE__, &(e), p4d_val(e))
+#endif
+
#define pgd_ERROR(e) \
pr_err("%s:%d: bad pgd %p(%016lx)\n", \
__FILE__, __LINE__, &(e), pgd_val(e))
struct mm_struct;
+void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte);
void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte);
-
static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
@@ -121,6 +128,20 @@ static inline pud_t native_pudp_get_and_clear(pud_t *xp)
#endif
}
+static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
+{
+ *p4dp = p4d;
+}
+
+static inline void native_p4d_clear(p4d_t *p4d)
+{
+#ifdef CONFIG_X86_5LEVEL
+ native_set_p4d(p4d, native_make_p4d(0));
+#else
+ native_set_p4d(p4d, (p4d_t) { .pgd = native_make_pgd(0)});
+#endif
+}
+
static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
{
*pgdp = pgd;
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 3a264200c62f..06470da156ba 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -13,6 +13,7 @@
typedef unsigned long pteval_t;
typedef unsigned long pmdval_t;
typedef unsigned long pudval_t;
+typedef unsigned long p4dval_t;
typedef unsigned long pgdval_t;
typedef unsigned long pgprotval_t;
@@ -22,12 +23,32 @@ typedef struct { pteval_t pte; } pte_t;
#define SHARED_KERNEL_PMD 0
+#ifdef CONFIG_X86_5LEVEL
+
+/*
+ * PGDIR_SHIFT determines what a top-level page table entry can map
+ */
+#define PGDIR_SHIFT 48
+#define PTRS_PER_PGD 512
+
+/*
+ * 4th level page in 5-level paging case
+ */
+#define P4D_SHIFT 39
+#define PTRS_PER_P4D 512
+#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT)
+#define P4D_MASK (~(P4D_SIZE - 1))
+
+#else /* CONFIG_X86_5LEVEL */
+
/*
* PGDIR_SHIFT determines what a top-level page table entry can map
*/
#define PGDIR_SHIFT 39
#define PTRS_PER_PGD 512
+#endif /* CONFIG_X86_5LEVEL */
+
/*
* 3rd level page
*/
@@ -55,9 +76,15 @@ typedef struct { pteval_t pte; } pte_t;
/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
+#ifdef CONFIG_X86_5LEVEL
+#define VMALLOC_SIZE_TB _AC(16384, UL)
+#define __VMALLOC_BASE _AC(0xff92000000000000, UL)
+#define __VMEMMAP_BASE _AC(0xffd4000000000000, UL)
+#else
#define VMALLOC_SIZE_TB _AC(32, UL)
#define __VMALLOC_BASE _AC(0xffffc90000000000, UL)
#define __VMEMMAP_BASE _AC(0xffffea0000000000, UL)
+#endif
#ifdef CONFIG_RANDOMIZE_MEMORY
#define VMALLOC_START vmalloc_base
#define VMEMMAP_START vmemmap_base
@@ -67,10 +94,11 @@ typedef struct { pteval_t pte; } pte_t;
#endif /* CONFIG_RANDOMIZE_MEMORY */
#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
-#define MODULES_END _AC(0xffffffffff000000, UL)
+/* The module sections ends with the start of the fixmap */
+#define MODULES_END __fix_to_virt(__end_of_fixed_addresses + 1)
#define MODULES_LEN (MODULES_END - MODULES_VADDR)
#define ESPFIX_PGD_ENTRY _AC(-2, UL)
-#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << PGDIR_SHIFT)
+#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT)
#define EFI_VA_START ( -4 * (_AC(1, UL) << 30))
#define EFI_VA_END (-68 * (_AC(1, UL) << 30))
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 62484333673d..bf9638e1ee42 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -272,9 +272,28 @@ static inline pgdval_t pgd_flags(pgd_t pgd)
return native_pgd_val(pgd) & PTE_FLAGS_MASK;
}
-#if CONFIG_PGTABLE_LEVELS > 3
-#include <asm-generic/5level-fixup.h>
+#if CONFIG_PGTABLE_LEVELS > 4
+typedef struct { p4dval_t p4d; } p4d_t;
+
+static inline p4d_t native_make_p4d(pudval_t val)
+{
+ return (p4d_t) { val };
+}
+
+static inline p4dval_t native_p4d_val(p4d_t p4d)
+{
+ return p4d.p4d;
+}
+#else
+#include <asm-generic/pgtable-nop4d.h>
+
+static inline p4dval_t native_p4d_val(p4d_t p4d)
+{
+ return native_pgd_val(p4d.pgd);
+}
+#endif
+#if CONFIG_PGTABLE_LEVELS > 3
typedef struct { pudval_t pud; } pud_t;
static inline pud_t native_make_pud(pmdval_t val)
@@ -287,12 +306,11 @@ static inline pudval_t native_pud_val(pud_t pud)
return pud.pud;
}
#else
-#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopud.h>
static inline pudval_t native_pud_val(pud_t pud)
{
- return native_pgd_val(pud.pgd);
+ return native_pgd_val(pud.p4d.pgd);
}
#endif
@@ -309,15 +327,30 @@ static inline pmdval_t native_pmd_val(pmd_t pmd)
return pmd.pmd;
}
#else
-#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopmd.h>
static inline pmdval_t native_pmd_val(pmd_t pmd)
{
- return native_pgd_val(pmd.pud.pgd);
+ return native_pgd_val(pmd.pud.p4d.pgd);
}
#endif
+static inline p4dval_t p4d_pfn_mask(p4d_t p4d)
+{
+ /* No 512 GiB huge pages yet */
+ return PTE_PFN_MASK;
+}
+
+static inline p4dval_t p4d_flags_mask(p4d_t p4d)
+{
+ return ~p4d_pfn_mask(p4d);
+}
+
+static inline p4dval_t p4d_flags(p4d_t p4d)
+{
+ return native_p4d_val(p4d) & p4d_flags_mask(p4d);
+}
+
static inline pudval_t pud_pfn_mask(pud_t pud)
{
if (native_pud_val(pud) & _PAGE_PSE)
@@ -461,6 +494,7 @@ enum pg_level {
PG_LEVEL_4K,
PG_LEVEL_2M,
PG_LEVEL_1G,
+ PG_LEVEL_512G,
PG_LEVEL_NUM
};
diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
index 2c1ebeb4d737..529bb4a6487a 100644
--- a/arch/x86/include/asm/pmem.h
+++ b/arch/x86/include/asm/pmem.h
@@ -55,7 +55,8 @@ static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n)
* @size: number of bytes to write back
*
* Write back a cache range using the CLWB (cache line write back)
- * instruction.
+ * instruction. Note that @size is internally rounded up to be cache
+ * line size aligned.
*/
static inline void arch_wb_cache_pmem(void *addr, size_t size)
{
@@ -69,15 +70,6 @@ static inline void arch_wb_cache_pmem(void *addr, size_t size)
clwb(p);
}
-/*
- * copy_from_iter_nocache() on x86 only uses non-temporal stores for iovec
- * iterators, so for other types (bvec & kvec) we must do a cache write-back.
- */
-static inline bool __iter_needs_pmem_wb(struct iov_iter *i)
-{
- return iter_is_iovec(i) == false;
-}
-
/**
* arch_copy_from_iter_pmem - copy data from an iterator to PMEM
* @addr: PMEM destination address
@@ -94,7 +86,35 @@ static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes,
/* TODO: skip the write-back by always using non-temporal stores */
len = copy_from_iter_nocache(addr, bytes, i);
- if (__iter_needs_pmem_wb(i))
+ /*
+ * In the iovec case on x86_64 copy_from_iter_nocache() uses
+ * non-temporal stores for the bulk of the transfer, but we need
+ * to manually flush if the transfer is unaligned. A cached
+ * memory copy is used when destination or size is not naturally
+ * aligned. That is:
+ * - Require 8-byte alignment when size is 8 bytes or larger.
+ * - Require 4-byte alignment when size is 4 bytes.
+ *
+ * In the non-iovec case the entire destination needs to be
+ * flushed.
+ */
+ if (iter_is_iovec(i)) {
+ unsigned long flushed, dest = (unsigned long) addr;
+
+ if (bytes < 8) {
+ if (!IS_ALIGNED(dest, 4) || (bytes != 4))
+ arch_wb_cache_pmem(addr, 1);
+ } else {
+ if (!IS_ALIGNED(dest, 8)) {
+ dest = ALIGN(dest, boot_cpu_data.x86_clflush_size);
+ arch_wb_cache_pmem(addr, 1);
+ }
+
+ flushed = dest - (unsigned long) addr;
+ if (bytes > flushed && !IS_ALIGNED(bytes - flushed, 8))
+ arch_wb_cache_pmem(addr + bytes - 1, 1);
+ }
+ } else
arch_wb_cache_pmem(addr, bytes);
return len;
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index f385eca5407a..3cada998a402 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -80,7 +80,7 @@ extern u16 __read_mostly tlb_lld_1g[NR_INFO];
/*
* CPU type and hardware bug flags. Kept separately for each CPU.
- * Members of this structure are referenced in head.S, so think twice
+ * Members of this structure are referenced in head_32.S, so think twice
* before touching them. [mj]
*/
@@ -89,14 +89,7 @@ struct cpuinfo_x86 {
__u8 x86_vendor; /* CPU vendor */
__u8 x86_model;
__u8 x86_mask;
-#ifdef CONFIG_X86_32
- char wp_works_ok; /* It doesn't on 386's */
-
- /* Problems on some 486Dx4's and old 386's: */
- char rfu;
- char pad0;
- char pad1;
-#else
+#ifdef CONFIG_X86_64
/* Number of 4K pages in DTLB/ITLB combined(in pages): */
int x86_tlbsize;
#endif
@@ -716,6 +709,8 @@ extern struct desc_ptr early_gdt_descr;
extern void cpu_set_gdt(int);
extern void switch_to_new_gdt(int);
+extern void load_direct_gdt(int);
+extern void load_fixmap_gdt(int);
extern void load_percpu_segment(int);
extern void cpu_init(void);
@@ -797,6 +792,7 @@ static inline void spin_lock_prefetch(const void *x)
/*
* User space process size: 3GB (default).
*/
+#define IA32_PAGE_OFFSET PAGE_OFFSET
#define TASK_SIZE PAGE_OFFSET
#define TASK_SIZE_MAX TASK_SIZE
#define STACK_TOP TASK_SIZE
@@ -873,7 +869,8 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
* This decides where the kernel will search for a free chunk of vm
* space during mmap's.
*/
-#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 3))
+#define __TASK_UNMAPPED_BASE(task_size) (PAGE_ALIGN(task_size / 3))
+#define TASK_UNMAPPED_BASE __TASK_UNMAPPED_BASE(TASK_SIZE)
#define KSTK_EIP(task) (task_pt_regs(task)->ip)
@@ -884,6 +881,8 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
extern int get_tsc_mode(unsigned long adr);
extern int set_tsc_mode(unsigned int val);
+DECLARE_PER_CPU(u64, msr_misc_features_shadow);
+
/* Register/unregister a process' MPX related resource */
#define MPX_ENABLE_MANAGEMENT() mpx_enable_management()
#define MPX_DISABLE_MANAGEMENT() mpx_disable_management()
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 9b9b30b19441..8d3964fc5f91 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -9,6 +9,7 @@ void syscall_init(void);
#ifdef CONFIG_X86_64
void entry_SYSCALL_64(void);
+long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2);
#endif
#ifdef CONFIG_X86_32
@@ -30,6 +31,7 @@ void x86_report_nx(void);
extern int reboot_force;
-long do_arch_prctl(struct task_struct *task, int code, unsigned long addr);
+long do_arch_prctl_common(struct task_struct *task, int option,
+ unsigned long cpuid_enabled);
#endif /* _ASM_X86_PROTO_H */
diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h
index 2cb1cc253d51..fc62ba8dce93 100644
--- a/arch/x86/include/asm/reboot.h
+++ b/arch/x86/include/asm/reboot.h
@@ -15,6 +15,7 @@ struct machine_ops {
};
extern struct machine_ops machine_ops;
+extern int crashing_cpu;
void native_machine_crash_shutdown(struct pt_regs *regs);
void native_machine_shutdown(void);
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
index fac9a5c0abe9..d91ba04dd007 100644
--- a/arch/x86/include/asm/required-features.h
+++ b/arch/x86/include/asm/required-features.h
@@ -53,6 +53,12 @@
# define NEED_MOVBE 0
#endif
+#ifdef CONFIG_X86_5LEVEL
+# define NEED_LA57 (1<<(X86_FEATURE_LA57 & 31))
+#else
+# define NEED_LA57 0
+#endif
+
#ifdef CONFIG_X86_64
#ifdef CONFIG_PARAVIRT
/* Paravirtualized systems may not have PSE or PGE available */
@@ -98,7 +104,7 @@
#define REQUIRED_MASK13 0
#define REQUIRED_MASK14 0
#define REQUIRED_MASK15 0
-#define REQUIRED_MASK16 0
+#define REQUIRED_MASK16 (NEED_LA57)
#define REQUIRED_MASK17 0
#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 026ea82ecc60..47103eca3775 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -149,6 +149,19 @@ void smp_store_cpu_info(int id);
#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu)
#define cpu_acpi_id(cpu) per_cpu(x86_cpu_to_acpiid, cpu)
+/*
+ * This function is needed by all SMP systems. It must _always_ be valid
+ * from the initial startup. We map APIC_BASE very early in page_setup(),
+ * so this is correct in the x86 case.
+ */
+#define raw_smp_processor_id() (this_cpu_read(cpu_number))
+
+#ifdef CONFIG_X86_32
+extern int safe_smp_processor_id(void);
+#else
+# define safe_smp_processor_id() smp_processor_id()
+#endif
+
#else /* !CONFIG_SMP */
#define wbinvd_on_cpu(cpu) wbinvd()
static inline int wbinvd_on_all_cpus(void)
@@ -161,22 +174,6 @@ static inline int wbinvd_on_all_cpus(void)
extern unsigned disabled_cpus;
-#ifdef CONFIG_X86_32_SMP
-/*
- * This function is needed by all SMP systems. It must _always_ be valid
- * from the initial startup. We map APIC_BASE very early in page_setup(),
- * so this is correct in the x86 case.
- */
-#define raw_smp_processor_id() (this_cpu_read(cpu_number))
-extern int safe_smp_processor_id(void);
-
-#elif defined(CONFIG_X86_64_SMP)
-#define raw_smp_processor_id() (this_cpu_read(cpu_number))
-
-#define safe_smp_processor_id() smp_processor_id()
-
-#endif
-
#ifdef CONFIG_X86_LOCAL_APIC
#ifndef CONFIG_X86_64
@@ -191,11 +188,7 @@ static inline int logical_smp_processor_id(void)
extern int hard_smp_processor_id(void);
#else /* CONFIG_X86_LOCAL_APIC */
-
-# ifndef CONFIG_SMP
-# define hard_smp_processor_id() 0
-# endif
-
+#define hard_smp_processor_id() 0
#endif /* CONFIG_X86_LOCAL_APIC */
#ifdef CONFIG_DEBUG_NMI_SELFTEST
diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h
index 4517d6b93188..1f5bee2c202f 100644
--- a/arch/x86/include/asm/sparsemem.h
+++ b/arch/x86/include/asm/sparsemem.h
@@ -26,8 +26,13 @@
# endif
#else /* CONFIG_X86_32 */
# define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */
-# define MAX_PHYSADDR_BITS 44
-# define MAX_PHYSMEM_BITS 46
+# ifdef CONFIG_X86_5LEVEL
+# define MAX_PHYSADDR_BITS 52
+# define MAX_PHYSMEM_BITS 52
+# else
+# define MAX_PHYSADDR_BITS 44
+# define MAX_PHYSMEM_BITS 46
+# endif
#endif
#endif /* CONFIG_SPARSEMEM */
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 58505f01962f..dcbd9bcce714 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -87,7 +87,7 @@ static inline void setup_stack_canary_segment(int cpu)
{
#ifdef CONFIG_X86_32
unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu);
- struct desc_struct *gdt_table = get_cpu_gdt_table(cpu);
+ struct desc_struct *gdt_table = get_cpu_gdt_rw(cpu);
struct desc_struct desc;
desc = gdt_table[GDT_ENTRY_STACK_CANARY];
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index ad6f5eb07a95..e00e1bd6e7b3 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -73,9 +73,6 @@ struct thread_info {
* thread information flags
* - these are process state flags that various assembly files
* may need to access
- * - pending work-to-be-done flags are in LSW
- * - other flags in MSW
- * Warning: layout of LSW is hardcoded in entry.S
*/
#define TIF_SYSCALL_TRACE 0 /* syscall trace active */
#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */
@@ -87,6 +84,8 @@ struct thread_info {
#define TIF_SECCOMP 8 /* secure computing */
#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
#define TIF_UPROBE 12 /* breakpointed or singlestepping */
+#define TIF_PATCH_PENDING 13 /* pending live patching update */
+#define TIF_NOCPUID 15 /* CPUID is not accessible in userland */
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
#define TIF_IA32 17 /* IA32 compatibility process */
#define TIF_NOHZ 19 /* in adaptive nohz mode */
@@ -103,13 +102,15 @@ struct thread_info {
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
-#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
+#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
#define _TIF_UPROBE (1 << TIF_UPROBE)
+#define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING)
+#define _TIF_NOCPUID (1 << TIF_NOCPUID)
#define _TIF_NOTSC (1 << TIF_NOTSC)
#define _TIF_IA32 (1 << TIF_IA32)
#define _TIF_NOHZ (1 << TIF_NOHZ)
@@ -133,12 +134,14 @@ struct thread_info {
/* work to do on any return to user space */
#define _TIF_ALLWORK_MASK \
- ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT | \
- _TIF_NOHZ)
+ (_TIF_SYSCALL_TRACE | _TIF_NOTIFY_RESUME | _TIF_SIGPENDING | \
+ _TIF_NEED_RESCHED | _TIF_SINGLESTEP | _TIF_SYSCALL_EMU | \
+ _TIF_SYSCALL_AUDIT | _TIF_USER_RETURN_NOTIFY | _TIF_UPROBE | \
+ _TIF_PATCH_PENDING | _TIF_NOHZ | _TIF_SYSCALL_TRACEPOINT)
/* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW \
- (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP)
+ (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP)
#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
@@ -168,9 +171,9 @@ static inline unsigned long current_stack_pointer(void)
* entirely contained by a single stack frame.
*
* Returns:
- * 1 if within a frame
- * -1 if placed across a frame boundary (or outside stack)
- * 0 unable to determine (no frame pointers, etc)
+ * GOOD_FRAME if within a frame
+ * BAD_STACK if placed across a frame boundary (or outside stack)
+ * NOT_STACK unable to determine (no frame pointers, etc)
*/
static inline int arch_within_stack_frames(const void * const stack,
const void * const stackend,
@@ -197,13 +200,14 @@ static inline int arch_within_stack_frames(const void * const stack,
* the copy as invalid.
*/
if (obj + len <= frame)
- return obj >= oldframe + 2 * sizeof(void *) ? 1 : -1;
+ return obj >= oldframe + 2 * sizeof(void *) ?
+ GOOD_FRAME : BAD_STACK;
oldframe = frame;
frame = *(const void * const *)frame;
}
- return -1;
+ return BAD_STACK;
#else
- return 0;
+ return NOT_STACK;
#endif
}
@@ -239,6 +243,8 @@ static inline int arch_within_stack_frames(const void * const stack,
extern void arch_task_cache_init(void);
extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
extern void arch_release_task_struct(struct task_struct *tsk);
+extern void arch_setup_new_exec(void);
+#define arch_setup_new_exec arch_setup_new_exec
#endif /* !__ASSEMBLY__ */
#endif /* _ASM_X86_THREAD_INFO_H */
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h
index a04eabd43d06..27e9f9d769b8 100644
--- a/arch/x86/include/asm/timer.h
+++ b/arch/x86/include/asm/timer.h
@@ -12,6 +12,8 @@ extern int recalibrate_cpu_khz(void);
extern int no_timer_check;
+extern bool using_native_sched_clock(void);
+
/*
* We use the full linear equation: f(x) = a + b*x, in order to allow
* a continuous function in the face of dynamic freq changes.
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index fc5abff9b7fd..6ed9ea469b48 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -110,6 +110,16 @@ static inline void cr4_clear_bits(unsigned long mask)
}
}
+static inline void cr4_toggle_bits(unsigned long mask)
+{
+ unsigned long cr4;
+
+ cr4 = this_cpu_read(cpu_tlbstate.cr4);
+ cr4 ^= mask;
+ this_cpu_write(cpu_tlbstate.cr4, cr4);
+ __write_cr4(cr4);
+}
+
/* Read the CR4 shadow. */
static inline unsigned long cr4_read_shadow(void)
{
@@ -205,7 +215,6 @@ static inline void __flush_tlb_one(unsigned long addr)
/*
* TLB flushing:
*
- * - flush_tlb() flushes the current mm struct TLBs
* - flush_tlb_all() flushes all processes TLBs
* - flush_tlb_mm(mm) flushes the specified mm context TLB's
* - flush_tlb_page(vma, vmaddr) flushes one page
@@ -237,11 +246,6 @@ static inline void flush_tlb_all(void)
__flush_tlb_all();
}
-static inline void flush_tlb(void)
-{
- __flush_tlb_up();
-}
-
static inline void local_flush_tlb(void)
{
__flush_tlb_up();
@@ -303,14 +307,11 @@ static inline void flush_tlb_kernel_range(unsigned long start,
flush_tlb_mm_range(vma->vm_mm, start, end, vma->vm_flags)
extern void flush_tlb_all(void);
-extern void flush_tlb_current_task(void);
extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
unsigned long end, unsigned long vmflag);
extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
-#define flush_tlb() flush_tlb_current_task()
-
void native_flush_tlb_others(const struct cpumask *cpumask,
struct mm_struct *mm,
unsigned long start, unsigned long end);
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index ea148313570f..68766b276d9e 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -3,19 +3,14 @@
/*
* User space memory access functions
*/
-#include <linux/errno.h>
#include <linux/compiler.h>
#include <linux/kasan-checks.h>
-#include <linux/thread_info.h>
#include <linux/string.h>
#include <asm/asm.h>
#include <asm/page.h>
#include <asm/smap.h>
#include <asm/extable.h>
-#define VERIFY_READ 0
-#define VERIFY_WRITE 1
-
/*
* The fs value determines whether argument validity checking should be
* performed or not. If get_fs() == USER_DS, checking is performed, with
@@ -384,6 +379,18 @@ do { \
: "=r" (err), ltype(x) \
: "m" (__m(addr)), "i" (errret), "0" (err))
+#define __get_user_asm_nozero(x, addr, err, itype, rtype, ltype, errret) \
+ asm volatile("\n" \
+ "1: mov"itype" %2,%"rtype"1\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: mov %3,%0\n" \
+ " jmp 2b\n" \
+ ".previous\n" \
+ _ASM_EXTABLE(1b, 3b) \
+ : "=r" (err), ltype(x) \
+ : "m" (__m(addr)), "i" (errret), "0" (err))
+
/*
* This doesn't do __uaccess_begin/end - the exception handling
* around it must do that.
@@ -675,59 +682,6 @@ extern struct movsl_mask {
# include <asm/uaccess_64.h>
#endif
-unsigned long __must_check _copy_from_user(void *to, const void __user *from,
- unsigned n);
-unsigned long __must_check _copy_to_user(void __user *to, const void *from,
- unsigned n);
-
-extern void __compiletime_error("usercopy buffer size is too small")
-__bad_copy_user(void);
-
-static inline void copy_user_overflow(int size, unsigned long count)
-{
- WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count);
-}
-
-static __always_inline unsigned long __must_check
-copy_from_user(void *to, const void __user *from, unsigned long n)
-{
- int sz = __compiletime_object_size(to);
-
- might_fault();
-
- kasan_check_write(to, n);
-
- if (likely(sz < 0 || sz >= n)) {
- check_object_size(to, n, false);
- n = _copy_from_user(to, from, n);
- } else if (!__builtin_constant_p(n))
- copy_user_overflow(sz, n);
- else
- __bad_copy_user();
-
- return n;
-}
-
-static __always_inline unsigned long __must_check
-copy_to_user(void __user *to, const void *from, unsigned long n)
-{
- int sz = __compiletime_object_size(from);
-
- kasan_check_read(from, n);
-
- might_fault();
-
- if (likely(sz < 0 || sz >= n)) {
- check_object_size(from, n, true);
- n = _copy_to_user(to, from, n);
- } else if (!__builtin_constant_p(n))
- copy_user_overflow(sz, n);
- else
- __bad_copy_user();
-
- return n;
-}
-
/*
* We rely on the nested NMI work to allow atomic faults from the NMI path; the
* nested NMI paths are careful to preserve CR2.
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 7d3bdd1ed697..aeda9bb8af50 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -4,149 +4,52 @@
/*
* User space memory access functions
*/
-#include <linux/errno.h>
-#include <linux/thread_info.h>
#include <linux/string.h>
#include <asm/asm.h>
#include <asm/page.h>
-unsigned long __must_check __copy_to_user_ll
- (void __user *to, const void *from, unsigned long n);
-unsigned long __must_check __copy_from_user_ll
- (void *to, const void __user *from, unsigned long n);
-unsigned long __must_check __copy_from_user_ll_nozero
- (void *to, const void __user *from, unsigned long n);
-unsigned long __must_check __copy_from_user_ll_nocache
- (void *to, const void __user *from, unsigned long n);
+unsigned long __must_check __copy_user_ll
+ (void *to, const void *from, unsigned long n);
unsigned long __must_check __copy_from_user_ll_nocache_nozero
(void *to, const void __user *from, unsigned long n);
-/**
- * __copy_to_user_inatomic: - Copy a block of data into user space, with less checking.
- * @to: Destination address, in user space.
- * @from: Source address, in kernel space.
- * @n: Number of bytes to copy.
- *
- * Context: User context only.
- *
- * Copy data from kernel space to user space. Caller must check
- * the specified block with access_ok() before calling this function.
- * The caller should also make sure he pins the user space address
- * so that we don't result in page fault and sleep.
- */
-static __always_inline unsigned long __must_check
-__copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
-{
- check_object_size(from, n, true);
- return __copy_to_user_ll(to, from, n);
-}
-
-/**
- * __copy_to_user: - Copy a block of data into user space, with less checking.
- * @to: Destination address, in user space.
- * @from: Source address, in kernel space.
- * @n: Number of bytes to copy.
- *
- * Context: User context only. This function may sleep if pagefaults are
- * enabled.
- *
- * Copy data from kernel space to user space. Caller must check
- * the specified block with access_ok() before calling this function.
- *
- * Returns number of bytes that could not be copied.
- * On success, this will be zero.
- */
static __always_inline unsigned long __must_check
-__copy_to_user(void __user *to, const void *from, unsigned long n)
+raw_copy_to_user(void __user *to, const void *from, unsigned long n)
{
- might_fault();
- return __copy_to_user_inatomic(to, from, n);
+ return __copy_user_ll((__force void *)to, from, n);
}
static __always_inline unsigned long
-__copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
-{
- return __copy_from_user_ll_nozero(to, from, n);
-}
-
-/**
- * __copy_from_user: - Copy a block of data from user space, with less checking.
- * @to: Destination address, in kernel space.
- * @from: Source address, in user space.
- * @n: Number of bytes to copy.
- *
- * Context: User context only. This function may sleep if pagefaults are
- * enabled.
- *
- * Copy data from user space to kernel space. Caller must check
- * the specified block with access_ok() before calling this function.
- *
- * Returns number of bytes that could not be copied.
- * On success, this will be zero.
- *
- * If some data could not be copied, this function will pad the copied
- * data to the requested size using zero bytes.
- *
- * An alternate version - __copy_from_user_inatomic() - may be called from
- * atomic context and will fail rather than sleep. In this case the
- * uncopied bytes will *NOT* be padded with zeros. See fs/filemap.h
- * for explanation of why this is needed.
- */
-static __always_inline unsigned long
-__copy_from_user(void *to, const void __user *from, unsigned long n)
-{
- might_fault();
- check_object_size(to, n, false);
- if (__builtin_constant_p(n)) {
- unsigned long ret;
-
- switch (n) {
- case 1:
- __uaccess_begin();
- __get_user_size(*(u8 *)to, from, 1, ret, 1);
- __uaccess_end();
- return ret;
- case 2:
- __uaccess_begin();
- __get_user_size(*(u16 *)to, from, 2, ret, 2);
- __uaccess_end();
- return ret;
- case 4:
- __uaccess_begin();
- __get_user_size(*(u32 *)to, from, 4, ret, 4);
- __uaccess_end();
- return ret;
- }
- }
- return __copy_from_user_ll(to, from, n);
-}
-
-static __always_inline unsigned long __copy_from_user_nocache(void *to,
- const void __user *from, unsigned long n)
+raw_copy_from_user(void *to, const void __user *from, unsigned long n)
{
- might_fault();
if (__builtin_constant_p(n)) {
unsigned long ret;
switch (n) {
case 1:
+ ret = 0;
__uaccess_begin();
- __get_user_size(*(u8 *)to, from, 1, ret, 1);
+ __get_user_asm_nozero(*(u8 *)to, from, ret,
+ "b", "b", "=q", 1);
__uaccess_end();
return ret;
case 2:
+ ret = 0;
__uaccess_begin();
- __get_user_size(*(u16 *)to, from, 2, ret, 2);
+ __get_user_asm_nozero(*(u16 *)to, from, ret,
+ "w", "w", "=r", 2);
__uaccess_end();
return ret;
case 4:
+ ret = 0;
__uaccess_begin();
- __get_user_size(*(u32 *)to, from, 4, ret, 4);
+ __get_user_asm_nozero(*(u32 *)to, from, ret,
+ "l", "k", "=r", 4);
__uaccess_end();
return ret;
}
}
- return __copy_from_user_ll_nocache(to, from, n);
+ return __copy_user_ll(to, (__force const void *)from, n);
}
static __always_inline unsigned long
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 673059a109fe..c5504b9a472e 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -5,7 +5,6 @@
* User space memory access functions
*/
#include <linux/compiler.h>
-#include <linux/errno.h>
#include <linux/lockdep.h>
#include <linux/kasan-checks.h>
#include <asm/alternative.h>
@@ -46,58 +45,54 @@ copy_user_generic(void *to, const void *from, unsigned len)
return ret;
}
-__must_check unsigned long
-copy_in_user(void __user *to, const void __user *from, unsigned len);
-
-static __always_inline __must_check
-int __copy_from_user_nocheck(void *dst, const void __user *src, unsigned size)
+static __always_inline __must_check unsigned long
+raw_copy_from_user(void *dst, const void __user *src, unsigned long size)
{
int ret = 0;
- check_object_size(dst, size, false);
if (!__builtin_constant_p(size))
return copy_user_generic(dst, (__force void *)src, size);
switch (size) {
case 1:
__uaccess_begin();
- __get_user_asm(*(u8 *)dst, (u8 __user *)src,
+ __get_user_asm_nozero(*(u8 *)dst, (u8 __user *)src,
ret, "b", "b", "=q", 1);
__uaccess_end();
return ret;
case 2:
__uaccess_begin();
- __get_user_asm(*(u16 *)dst, (u16 __user *)src,
+ __get_user_asm_nozero(*(u16 *)dst, (u16 __user *)src,
ret, "w", "w", "=r", 2);
__uaccess_end();
return ret;
case 4:
__uaccess_begin();
- __get_user_asm(*(u32 *)dst, (u32 __user *)src,
+ __get_user_asm_nozero(*(u32 *)dst, (u32 __user *)src,
ret, "l", "k", "=r", 4);
__uaccess_end();
return ret;
case 8:
__uaccess_begin();
- __get_user_asm(*(u64 *)dst, (u64 __user *)src,
+ __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src,
ret, "q", "", "=r", 8);
__uaccess_end();
return ret;
case 10:
__uaccess_begin();
- __get_user_asm(*(u64 *)dst, (u64 __user *)src,
+ __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src,
ret, "q", "", "=r", 10);
if (likely(!ret))
- __get_user_asm(*(u16 *)(8 + (char *)dst),
+ __get_user_asm_nozero(*(u16 *)(8 + (char *)dst),
(u16 __user *)(8 + (char __user *)src),
ret, "w", "w", "=r", 2);
__uaccess_end();
return ret;
case 16:
__uaccess_begin();
- __get_user_asm(*(u64 *)dst, (u64 __user *)src,
+ __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src,
ret, "q", "", "=r", 16);
if (likely(!ret))
- __get_user_asm(*(u64 *)(8 + (char *)dst),
+ __get_user_asm_nozero(*(u64 *)(8 + (char *)dst),
(u64 __user *)(8 + (char __user *)src),
ret, "q", "", "=r", 8);
__uaccess_end();
@@ -107,20 +102,11 @@ int __copy_from_user_nocheck(void *dst, const void __user *src, unsigned size)
}
}
-static __always_inline __must_check
-int __copy_from_user(void *dst, const void __user *src, unsigned size)
-{
- might_fault();
- kasan_check_write(dst, size);
- return __copy_from_user_nocheck(dst, src, size);
-}
-
-static __always_inline __must_check
-int __copy_to_user_nocheck(void __user *dst, const void *src, unsigned size)
+static __always_inline __must_check unsigned long
+raw_copy_to_user(void __user *dst, const void *src, unsigned long size)
{
int ret = 0;
- check_object_size(src, size, true);
if (!__builtin_constant_p(size))
return copy_user_generic((__force void *)dst, src, size);
switch (size) {
@@ -176,100 +162,16 @@ int __copy_to_user_nocheck(void __user *dst, const void *src, unsigned size)
}
static __always_inline __must_check
-int __copy_to_user(void __user *dst, const void *src, unsigned size)
-{
- might_fault();
- kasan_check_read(src, size);
- return __copy_to_user_nocheck(dst, src, size);
-}
-
-static __always_inline __must_check
-int __copy_in_user(void __user *dst, const void __user *src, unsigned size)
-{
- int ret = 0;
-
- might_fault();
- if (!__builtin_constant_p(size))
- return copy_user_generic((__force void *)dst,
- (__force void *)src, size);
- switch (size) {
- case 1: {
- u8 tmp;
- __uaccess_begin();
- __get_user_asm(tmp, (u8 __user *)src,
- ret, "b", "b", "=q", 1);
- if (likely(!ret))
- __put_user_asm(tmp, (u8 __user *)dst,
- ret, "b", "b", "iq", 1);
- __uaccess_end();
- return ret;
- }
- case 2: {
- u16 tmp;
- __uaccess_begin();
- __get_user_asm(tmp, (u16 __user *)src,
- ret, "w", "w", "=r", 2);
- if (likely(!ret))
- __put_user_asm(tmp, (u16 __user *)dst,
- ret, "w", "w", "ir", 2);
- __uaccess_end();
- return ret;
- }
-
- case 4: {
- u32 tmp;
- __uaccess_begin();
- __get_user_asm(tmp, (u32 __user *)src,
- ret, "l", "k", "=r", 4);
- if (likely(!ret))
- __put_user_asm(tmp, (u32 __user *)dst,
- ret, "l", "k", "ir", 4);
- __uaccess_end();
- return ret;
- }
- case 8: {
- u64 tmp;
- __uaccess_begin();
- __get_user_asm(tmp, (u64 __user *)src,
- ret, "q", "", "=r", 8);
- if (likely(!ret))
- __put_user_asm(tmp, (u64 __user *)dst,
- ret, "q", "", "er", 8);
- __uaccess_end();
- return ret;
- }
- default:
- return copy_user_generic((__force void *)dst,
- (__force void *)src, size);
- }
-}
-
-static __must_check __always_inline int
-__copy_from_user_inatomic(void *dst, const void __user *src, unsigned size)
-{
- kasan_check_write(dst, size);
- return __copy_from_user_nocheck(dst, src, size);
-}
-
-static __must_check __always_inline int
-__copy_to_user_inatomic(void __user *dst, const void *src, unsigned size)
+unsigned long raw_copy_in_user(void __user *dst, const void __user *src, unsigned long size)
{
- kasan_check_read(src, size);
- return __copy_to_user_nocheck(dst, src, size);
+ return copy_user_generic((__force void *)dst,
+ (__force void *)src, size);
}
extern long __copy_user_nocache(void *dst, const void __user *src,
unsigned size, int zerorest);
static inline int
-__copy_from_user_nocache(void *dst, const void __user *src, unsigned size)
-{
- might_fault();
- kasan_check_write(dst, size);
- return __copy_user_nocache(dst, src, size, 1);
-}
-
-static inline int
__copy_from_user_inatomic_nocache(void *dst, const void __user *src,
unsigned size)
{
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index 32712a925f26..1ba1536f627e 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -23,7 +23,6 @@
# include <asm/unistd_64.h>
# include <asm/unistd_64_x32.h>
# define __ARCH_WANT_COMPAT_SYS_TIME
-# define __ARCH_WANT_COMPAT_SYS_GETDENTS64
# define __ARCH_WANT_COMPAT_SYS_PREADV64
# define __ARCH_WANT_COMPAT_SYS_PWRITEV64
# define __ARCH_WANT_COMPAT_SYS_PREADV64V2
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
index 6fa75b17aec3..e6676495b125 100644
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -11,9 +11,12 @@ struct unwind_state {
unsigned long stack_mask;
struct task_struct *task;
int graph_idx;
+ bool error;
#ifdef CONFIG_FRAME_POINTER
+ bool got_irq;
unsigned long *bp, *orig_sp;
struct pt_regs *regs;
+ unsigned long ip;
#else
unsigned long *sp;
#endif
@@ -40,6 +43,11 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,
__unwind_start(state, task, regs, first_frame);
}
+static inline bool unwind_error(struct unwind_state *state)
+{
+ return state->error;
+}
+
#ifdef CONFIG_FRAME_POINTER
static inline
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 57ab86d94d64..7cac79802ad2 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -185,6 +185,15 @@
#define MSG_REGULAR 1
#define MSG_RETRY 2
+#define BAU_DESC_QUALIFIER 0x534749
+
+enum uv_bau_version {
+ UV_BAU_V1 = 1,
+ UV_BAU_V2,
+ UV_BAU_V3,
+ UV_BAU_V4,
+};
+
/*
* Distribution: 32 bytes (256 bits) (bytes 0-0x1f of descriptor)
* If the 'multilevel' flag in the header portion of the descriptor
@@ -222,20 +231,32 @@ struct bau_local_cpumask {
* the s/w ack bit vector ]
*/
-/*
- * The payload is software-defined for INTD transactions
+/**
+ * struct uv1_2_3_bau_msg_payload - defines payload for INTD transactions
+ * @address: Signifies a page or all TLB's of the cpu
+ * @sending_cpu: CPU from which the message originates
+ * @acknowledge_count: CPUs on the destination Hub that received the interrupt
*/
-struct bau_msg_payload {
- unsigned long address; /* signifies a page or all
- TLB's of the cpu */
- /* 64 bits */
- unsigned short sending_cpu; /* filled in by sender */
- /* 16 bits */
- unsigned short acknowledge_count; /* filled in by destination */
- /* 16 bits */
- unsigned int reserved1:32; /* not usable */
+struct uv1_2_3_bau_msg_payload {
+ u64 address;
+ u16 sending_cpu;
+ u16 acknowledge_count;
};
+/**
+ * struct uv4_bau_msg_payload - defines payload for INTD transactions
+ * @address: Signifies a page or all TLB's of the cpu
+ * @sending_cpu: CPU from which the message originates
+ * @acknowledge_count: CPUs on the destination Hub that received the interrupt
+ * @qualifier: Set by source to verify origin of INTD broadcast
+ */
+struct uv4_bau_msg_payload {
+ u64 address;
+ u16 sending_cpu;
+ u16 acknowledge_count;
+ u32 reserved:8;
+ u32 qualifier:24;
+};
/*
* UV1 Message header: 16 bytes (128 bits) (bytes 0x30-0x3f of descriptor)
@@ -385,17 +406,6 @@ struct uv2_3_bau_msg_header {
/* bits 127:120 */
};
-/* Abstracted BAU functions */
-struct bau_operations {
- unsigned long (*read_l_sw_ack)(void);
- unsigned long (*read_g_sw_ack)(int pnode);
- unsigned long (*bau_gpa_to_offset)(unsigned long vaddr);
- void (*write_l_sw_ack)(unsigned long mmr);
- void (*write_g_sw_ack)(int pnode, unsigned long mmr);
- void (*write_payload_first)(int pnode, unsigned long mmr);
- void (*write_payload_last)(int pnode, unsigned long mmr);
-};
-
/*
* The activation descriptor:
* The format of the message to send, plus all accompanying control
@@ -411,7 +421,10 @@ struct bau_desc {
struct uv2_3_bau_msg_header uv2_3_hdr;
} header;
- struct bau_msg_payload payload;
+ union bau_payload_header {
+ struct uv1_2_3_bau_msg_payload uv1_2_3;
+ struct uv4_bau_msg_payload uv4;
+ } payload;
};
/* UV1:
* -payload-- ---------header------
@@ -588,8 +601,12 @@ struct uvhub_desc {
struct socket_desc socket[2];
};
-/*
- * one per-cpu; to locate the software tables
+/**
+ * struct bau_control
+ * @status_mmr: location of status mmr, determined by uvhub_cpu
+ * @status_index: index of ERR|BUSY bits in status mmr, determined by uvhub_cpu
+ *
+ * Per-cpu control struct containing CPU topology information and BAU tuneables.
*/
struct bau_control {
struct bau_desc *descriptor_base;
@@ -607,6 +624,8 @@ struct bau_control {
int timeout_tries;
int ipi_attempts;
int conseccompletes;
+ u64 status_mmr;
+ int status_index;
bool nobau;
short baudisabled;
short cpu;
@@ -644,6 +663,19 @@ struct bau_control {
struct hub_and_pnode *thp;
};
+/* Abstracted BAU functions */
+struct bau_operations {
+ unsigned long (*read_l_sw_ack)(void);
+ unsigned long (*read_g_sw_ack)(int pnode);
+ unsigned long (*bau_gpa_to_offset)(unsigned long vaddr);
+ void (*write_l_sw_ack)(unsigned long mmr);
+ void (*write_g_sw_ack)(int pnode, unsigned long mmr);
+ void (*write_payload_first)(int pnode, unsigned long mmr);
+ void (*write_payload_last)(int pnode, unsigned long mmr);
+ int (*wait_completion)(struct bau_desc*,
+ struct bau_control*, long try);
+};
+
static inline void write_mmr_data_broadcast(int pnode, unsigned long mmr_image)
{
write_gmmr(pnode, UVH_BAU_DATA_BROADCAST, mmr_image);
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 72e8300b1e8a..9cffb44a3cf5 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -485,15 +485,17 @@ static inline unsigned long uv_soc_phys_ram_to_gpa(unsigned long paddr)
if (paddr < uv_hub_info->lowmem_remap_top)
paddr |= uv_hub_info->lowmem_remap_base;
- paddr |= uv_hub_info->gnode_upper;
- if (m_val)
+
+ if (m_val) {
+ paddr |= uv_hub_info->gnode_upper;
paddr = ((paddr << uv_hub_info->m_shift)
>> uv_hub_info->m_shift) |
((paddr >> uv_hub_info->m_val)
<< uv_hub_info->n_lshift);
- else
+ } else {
paddr |= uv_soc_phys_ram_to_nasid(paddr)
<< uv_hub_info->gpa_shift;
+ }
return paddr;
}
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index 2444189cbe28..bccdf4938ddf 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -20,6 +20,7 @@ struct vdso_image {
long sym_vvar_page;
long sym_hpet_page;
long sym_pvclock_page;
+ long sym_hvclock_page;
long sym_VDSO32_NOTE_MASK;
long sym___kernel_sigreturn;
long sym___kernel_rt_sigreturn;
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 33cbd3db97b9..8a5a02b1dfba 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -6,6 +6,7 @@
#include <linux/spinlock.h>
#include <linux/pfn.h>
#include <linux/mm.h>
+#include <linux/device.h>
#include <linux/uaccess.h>
#include <asm/page.h>
@@ -279,13 +280,17 @@ static inline pte_t __pte_ma(pteval_t x)
#define pmd_val_ma(v) ((v).pmd)
#ifdef __PAGETABLE_PUD_FOLDED
-#define pud_val_ma(v) ((v).pgd.pgd)
+#define pud_val_ma(v) ((v).p4d.pgd.pgd)
#else
#define pud_val_ma(v) ((v).pud)
#endif
#define __pmd_ma(x) ((pmd_t) { (x) } )
-#define pgd_val_ma(x) ((x).pgd)
+#ifdef __PAGETABLE_P4D_FOLDED
+#define p4d_val_ma(x) ((x).pgd.pgd)
+#else
+#define p4d_val_ma(x) ((x).p4d)
+#endif
void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid);