25 files changed, 485 insertions, 58 deletions
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
new file mode 100644
index 000000000000..830b19dbfa31
--- /dev/null
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -0,0 +1,16 @@
+#include <asm/ftrace.h>
+#include <linux/uaccess.h>
+#include <asm/string.h>
+#include <asm/page.h>
+#include <asm/checksum.h>
+
+#include <asm-generic/asm-prototypes.h>
+
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/special_insns.h>
+#include <asm/preempt.h>
+
+#ifndef CONFIG_X86_CMPXCHG64
+extern void cmpxchg8b_emu(void);
+#endif
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 68557f52b961..854022772c5b 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -139,6 +139,19 @@ static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
 	asm volatile("btr %1,%0" : ADDR : "Ir" (nr));
 }
 
+static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
+{
+	bool negative;
+	asm volatile(LOCK_PREFIX "andb %2,%1\n\t"
+		CC_SET(s)
+		: CC_OUT(s) (negative), ADDR
+		: "ir" ((char) ~(1 << nr)) : "memory");
+	return negative;
+}
+
+// Let everybody know we have it
+#define clear_bit_unlock_is_negative_byte clear_bit_unlock_is_negative_byte
+
 /*
  * __clear_bit_unlock - Clears a bit in memory
  * @nr: Bit to clear
diff --git a/arch/x86/include/asm/checksum_64.h b/arch/x86/include/asm/checksum_64.h
index c020ee75dce7..08e7efb2c140 100644
--- a/arch/x86/include/asm/checksum_64.h
+++ b/arch/x86/include/asm/checksum_64.h
@@ -8,7 +8,7 @@
  */
 
 #include <linux/compiler.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/byteorder.h>
 
 /**
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 59ac427960d4..eafee3161d1c 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -105,6 +105,7 @@
 #define X86_FEATURE_AMD_DCM     ( 3*32+27) /* multi-node processor */
 #define X86_FEATURE_APERFMPERF	( 3*32+28) /* APERFMPERF */
 #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
+#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
 #define X86_FEATURE_XMM3	( 4*32+ 0) /* "pni" SSE-3 */
@@ -188,6 +189,9 @@
 
 #define X86_FEATURE_CPB		( 7*32+ 2) /* AMD Core Performance Boost */
 #define X86_FEATURE_EPB		( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
+#define X86_FEATURE_CAT_L3	( 7*32+ 4) /* Cache Allocation Technology L3 */
+#define X86_FEATURE_CAT_L2	( 7*32+ 5) /* Cache Allocation Technology L2 */
+#define X86_FEATURE_CDP_L3	( 7*32+ 6) /* Code and Data Prioritization L3 */
 
 #define X86_FEATURE_HW_PSTATE	( 7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
@@ -221,6 +225,7 @@
 #define X86_FEATURE_RTM		( 9*32+11) /* Restricted Transactional Memory */
 #define X86_FEATURE_CQM		( 9*32+12) /* Cache QoS Monitoring */
 #define X86_FEATURE_MPX		( 9*32+14) /* Memory Protection Extension */
+#define X86_FEATURE_RDT_A	( 9*32+15) /* Resource Director Technology Allocation */
 #define X86_FEATURE_AVX512F	( 9*32+16) /* AVX-512 Foundation */
 #define X86_FEATURE_AVX512DQ	( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
 #define X86_FEATURE_RDSEED	( 9*32+18) /* The RDSEED instruction */
diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h
index 03bb1065c335..29e53ea7d764 100644
--- a/arch/x86/include/asm/crypto/glue_helper.h
+++ b/arch/x86/include/asm/crypto/glue_helper.h
@@ -5,8 +5,8 @@
 #ifndef _CRYPTO_GLUE_HELPER_H
 #define _CRYPTO_GLUE_HELPER_H
 
+#include <crypto/internal/skcipher.h>
 #include <linux/kernel.h>
-#include <linux/crypto.h>
 #include <asm/fpu/api.h>
 #include <crypto/b128ops.h>
 
@@ -69,6 +69,31 @@ static inline bool glue_fpu_begin(unsigned int bsize, int fpu_blocks_limit,
 	return true;
 }
 
+static inline bool glue_skwalk_fpu_begin(unsigned int bsize,
+					 int fpu_blocks_limit,
+					 struct skcipher_walk *walk,
+					 bool fpu_enabled, unsigned int nbytes)
+{
+	if (likely(fpu_blocks_limit < 0))
+		return false;
+
+	if (fpu_enabled)
+		return true;
+
+	/*
+	 * Vector-registers are only used when chunk to be processed is large
+	 * enough, so do not enable FPU until it is necessary.
+	 */
+	if (nbytes < bsize * (unsigned int)fpu_blocks_limit)
+		return false;
+
+	/* prevent sleeping if FPU is in use */
+	skcipher_walk_atomise(walk);
+
+	kernel_fpu_begin();
+	return true;
+}
+
 static inline void glue_fpu_end(bool fpu_enabled)
 {
 	if (fpu_enabled)
@@ -139,6 +164,18 @@ extern int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
 				 common_glue_func_t tweak_fn, void *tweak_ctx,
 				 void *crypt_ctx);
 
+extern int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
+				 struct blkcipher_desc *desc,
+				 struct scatterlist *dst,
+				 struct scatterlist *src, unsigned int nbytes,
+				 common_glue_func_t tweak_fn, void *tweak_ctx,
+				 void *crypt_ctx);
+
+extern int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
+			       struct skcipher_request *req,
+			       common_glue_func_t tweak_fn, void *tweak_ctx,
+			       void *crypt_ctx);
+
 extern void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src,
 				      le128 *iv, common_glue_func_t fn);
 
diff --git a/arch/x86/include/asm/floppy.h b/arch/x86/include/asm/floppy.h
index 1c7eefe32502..7ec59edde154 100644
--- a/arch/x86/include/asm/floppy.h
+++ b/arch/x86/include/asm/floppy.h
@@ -229,18 +229,18 @@ static struct fd_routine_l {
 	int (*_dma_setup)(char *addr, unsigned long size, int mode, int io);
 } fd_routine[] = {
 	{
-		request_dma,
-		free_dma,
-		get_dma_residue,
-		dma_mem_alloc,
-		hard_dma_setup
+		._request_dma		= request_dma,
+		._free_dma		= free_dma,
+		._get_dma_residue	= get_dma_residue,
+		._dma_mem_alloc		= dma_mem_alloc,
+		._dma_setup		= hard_dma_setup
 	},
 	{
-		vdma_request_dma,
-		vdma_nop,
-		vdma_get_dma_residue,
-		vdma_mem_alloc,
-		vdma_dma_setup
+		._request_dma		= vdma_request_dma,
+		._free_dma		= vdma_nop,
+		._get_dma_residue	= vdma_get_dma_residue,
+		._dma_mem_alloc		= vdma_mem_alloc,
+		._dma_setup		= vdma_dma_setup
 	}
 };
 
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 34a46dc076d3..8167fdb67ae8 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -57,7 +57,7 @@
 #define INTEL_FAM6_ATOM_SILVERMONT2	0x4D /* Avaton/Rangely */
 #define INTEL_FAM6_ATOM_AIRMONT		0x4C /* CherryTrail / Braswell */
 #define INTEL_FAM6_ATOM_MERRIFIELD	0x4A /* Tangier */
-#define INTEL_FAM6_ATOM_MOOREFIELD	0x5A /* Annidale */
+#define INTEL_FAM6_ATOM_MOOREFIELD	0x5A /* Anniedale */
 #define INTEL_FAM6_ATOM_GOLDMONT	0x5C
 #define INTEL_FAM6_ATOM_DENVERTON	0x5F /* Goldmont Microserver */
 
diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
new file mode 100644
index 000000000000..95ce5c85b009
--- /dev/null
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -0,0 +1,224 @@
+#ifndef _ASM_X86_INTEL_RDT_H
+#define _ASM_X86_INTEL_RDT_H
+
+#ifdef CONFIG_INTEL_RDT_A
+
+#include <linux/kernfs.h>
+#include <linux/jump_label.h>
+
+#include <asm/intel_rdt_common.h>
+
+#define IA32_L3_QOS_CFG		0xc81
+#define IA32_L3_CBM_BASE	0xc90
+#define IA32_L2_CBM_BASE	0xd10
+
+#define L3_QOS_CDP_ENABLE	0x01ULL
+
+/**
+ * struct rdtgroup - store rdtgroup's data in resctrl file system.
+ * @kn:				kernfs node
+ * @rdtgroup_list:		linked list for all rdtgroups
+ * @closid:			closid for this rdtgroup
+ * @cpu_mask:			CPUs assigned to this rdtgroup
+ * @flags:			status bits
+ * @waitcount:			how many cpus expect to find this
+ *				group when they acquire rdtgroup_mutex
+ */
+struct rdtgroup {
+	struct kernfs_node	*kn;
+	struct list_head	rdtgroup_list;
+	int			closid;
+	struct cpumask		cpu_mask;
+	int			flags;
+	atomic_t		waitcount;
+};
+
+/* rdtgroup.flags */
+#define	RDT_DELETED		1
+
+/* List of all resource groups */
+extern struct list_head rdt_all_groups;
+
+int __init rdtgroup_init(void);
+
+/**
+ * struct rftype - describe each file in the resctrl file system
+ * @name: file name
+ * @mode: access mode
+ * @kf_ops: operations
+ * @seq_show: show content of the file
+ * @write: write to the file
+ */
+struct rftype {
+	char			*name;
+	umode_t			mode;
+	struct kernfs_ops	*kf_ops;
+
+	int (*seq_show)(struct kernfs_open_file *of,
+			struct seq_file *sf, void *v);
+	/*
+	 * write() is the generic write callback which maps directly to
+	 * kernfs write operation and overrides all other operations.
+	 * Maximum write size is determined by ->max_write_len.
+	 */
+	ssize_t (*write)(struct kernfs_open_file *of,
+			 char *buf, size_t nbytes, loff_t off);
+};
+
+/**
+ * struct rdt_resource - attributes of an RDT resource
+ * @enabled:			Is this feature enabled on this machine
+ * @capable:			Is this feature available on this machine
+ * @name:			Name to use in "schemata" file
+ * @num_closid:			Number of CLOSIDs available
+ * @max_cbm:			Largest Cache Bit Mask allowed
+ * @min_cbm_bits:		Minimum number of consecutive bits to be set
+ *				in a cache bit mask
+ * @domains:			All domains for this resource
+ * @num_domains:		Number of domains active
+ * @msr_base:			Base MSR address for CBMs
+ * @tmp_cbms:			Scratch space when updating schemata
+ * @num_tmp_cbms:		Number of CBMs in tmp_cbms
+ * @cache_level:		Which cache level defines scope of this domain
+ * @cbm_idx_multi:		Multiplier of CBM index
+ * @cbm_idx_offset:		Offset of CBM index. CBM index is computed by:
+ *				closid * cbm_idx_multi + cbm_idx_offset
+ */
+struct rdt_resource {
+	bool			enabled;
+	bool			capable;
+	char			*name;
+	int			num_closid;
+	int			cbm_len;
+	int			min_cbm_bits;
+	u32			max_cbm;
+	struct list_head	domains;
+	int			num_domains;
+	int			msr_base;
+	u32			*tmp_cbms;
+	int			num_tmp_cbms;
+	int			cache_level;
+	int			cbm_idx_multi;
+	int			cbm_idx_offset;
+};
+
+/**
+ * struct rdt_domain - group of cpus sharing an RDT resource
+ * @list:	all instances of this resource
+ * @id:		unique id for this instance
+ * @cpu_mask:	which cpus share this resource
+ * @cbm:	array of cache bit masks (indexed by CLOSID)
+ */
+struct rdt_domain {
+	struct list_head	list;
+	int			id;
+	struct cpumask		cpu_mask;
+	u32			*cbm;
+};
+
+/**
+ * struct msr_param - set a range of MSRs from a domain
+ * @res:       The resource to use
+ * @low:       Beginning index from base MSR
+ * @high:      End index
+ */
+struct msr_param {
+	struct rdt_resource	*res;
+	int			low;
+	int			high;
+};
+
+extern struct mutex rdtgroup_mutex;
+
+extern struct rdt_resource rdt_resources_all[];
+extern struct rdtgroup rdtgroup_default;
+DECLARE_STATIC_KEY_FALSE(rdt_enable_key);
+
+int __init rdtgroup_init(void);
+
+enum {
+	RDT_RESOURCE_L3,
+	RDT_RESOURCE_L3DATA,
+	RDT_RESOURCE_L3CODE,
+	RDT_RESOURCE_L2,
+
+	/* Must be the last */
+	RDT_NUM_RESOURCES,
+};
+
+#define for_each_capable_rdt_resource(r)				      \
+	for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
+	     r++) 							      \
+		if (r->capable)
+
+#define for_each_enabled_rdt_resource(r)				      \
+	for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
+	     r++)							      \
+		if (r->enabled)
+
+/* CPUID.(EAX=10H, ECX=ResID=1).EAX */
+union cpuid_0x10_1_eax {
+	struct {
+		unsigned int cbm_len:5;
+	} split;
+	unsigned int full;
+};
+
+/* CPUID.(EAX=10H, ECX=ResID=1).EDX */
+union cpuid_0x10_1_edx {
+	struct {
+		unsigned int cos_max:16;
+	} split;
+	unsigned int full;
+};
+
+DECLARE_PER_CPU_READ_MOSTLY(int, cpu_closid);
+
+void rdt_cbm_update(void *arg);
+struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn);
+void rdtgroup_kn_unlock(struct kernfs_node *kn);
+ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
+				char *buf, size_t nbytes, loff_t off);
+int rdtgroup_schemata_show(struct kernfs_open_file *of,
+			   struct seq_file *s, void *v);
+
+/*
+ * intel_rdt_sched_in() - Writes the task's CLOSid to IA32_PQR_MSR
+ *
+ * Following considerations are made so that this has minimal impact
+ * on scheduler hot path:
+ * - This will stay as no-op unless we are running on an Intel SKU
+ *   which supports resource control and we enable by mounting the
+ *   resctrl file system.
+ * - Caches the per cpu CLOSid values and does the MSR write only
+ *   when a task with a different CLOSid is scheduled in.
+ *
+ * Must be called with preemption disabled.
+ */
+static inline void intel_rdt_sched_in(void)
+{
+	if (static_branch_likely(&rdt_enable_key)) {
+		struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
+		int closid;
+
+		/*
+		 * If this task has a closid assigned, use it.
+		 * Else use the closid assigned to this cpu.
+		 */
+		closid = current->closid;
+		if (closid == 0)
+			closid = this_cpu_read(cpu_closid);
+
+		if (closid != state->closid) {
+			state->closid = closid;
+			wrmsr(MSR_IA32_PQR_ASSOC, state->rmid, closid);
+		}
+	}
+}
+
+#else
+
+static inline void intel_rdt_sched_in(void) {}
+
+#endif /* CONFIG_INTEL_RDT_A */
+#endif /* _ASM_X86_INTEL_RDT_H */
diff --git a/arch/x86/include/asm/intel_rdt_common.h b/arch/x86/include/asm/intel_rdt_common.h
new file mode 100644
index 000000000000..b31081b89407
--- /dev/null
+++ b/arch/x86/include/asm/intel_rdt_common.h
@@ -0,0 +1,27 @@
+#ifndef _ASM_X86_INTEL_RDT_COMMON_H
+#define _ASM_X86_INTEL_RDT_COMMON_H
+
+#define MSR_IA32_PQR_ASSOC	0x0c8f
+
+/**
+ * struct intel_pqr_state - State cache for the PQR MSR
+ * @rmid:		The cached Resource Monitoring ID
+ * @closid:		The cached Class Of Service ID
+ * @rmid_usecnt:	The usage counter for rmid
+ *
+ * The upper 32 bits of MSR_IA32_PQR_ASSOC contain closid and the
+ * lower 10 bits rmid. The update to MSR_IA32_PQR_ASSOC always
+ * contains both parts, so we need to cache them.
+ *
+ * The cache also helps to avoid pointless updates if the value does
+ * not change.
+ */
+struct intel_pqr_state {
+	u32			rmid;
+	u32			closid;
+	int			rmid_usecnt;
+};
+
+DECLARE_PER_CPU(struct intel_pqr_state, pqr_state);
+
+#endif /* _ASM_X86_INTEL_RDT_COMMON_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7892530cbacf..a7066dc1a7e9 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -704,6 +704,7 @@ struct kvm_apic_map {
 
 /* Hyper-V emulation context */
 struct kvm_hv {
+	struct mutex hv_lock;
 	u64 hv_guest_os_id;
 	u64 hv_hypercall;
 	u64 hv_tsc_page;
@@ -767,7 +768,7 @@ struct kvm_arch {
 	spinlock_t pvclock_gtod_sync_lock;
 	bool use_master_clock;
 	u64 master_kernel_ns;
-	cycle_t master_cycle_now;
+	u64 master_cycle_now;
 	struct delayed_work kvmclock_update_work;
 	struct delayed_work kvmclock_sync_work;
 
diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h
index 195becc6f780..e793fc9a9b20 100644
--- a/arch/x86/include/asm/microcode_intel.h
+++ b/arch/x86/include/asm/microcode_intel.h
@@ -52,6 +52,21 @@ struct extended_sigtable {
 
 #define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE)
 
+static inline u32 intel_get_microcode_revision(void)
+{
+	u32 rev, dummy;
+
+	native_wrmsrl(MSR_IA32_UCODE_REV, 0);
+
+	/* As documented in the SDM: Do a CPUID 1 here */
+	native_cpuid_eax(1);
+
+	/* get the current revision from MSR 0x8B */
+	native_rdmsr(MSR_IA32_UCODE_REV, dummy, rev);
+
+	return rev;
+}
+
 #ifdef CONFIG_MICROCODE_INTEL
 extern void __init load_ucode_intel_bsp(void);
 extern void load_ucode_intel_ap(void);
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 72198c64e646..f9813b6d8b80 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -31,6 +31,10 @@ typedef struct {
 	u16 pkey_allocation_map;
 	s16 execute_only_pkey;
 #endif
+#ifdef CONFIG_X86_INTEL_MPX
+	/* address of the bounds directory */
+	void __user *bd_addr;
+#endif
 } mm_context_t;
 
 #ifdef CONFIG_SMP
diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h
index 7a35495275a9..0b416d4cf73b 100644
--- a/arch/x86/include/asm/mpx.h
+++ b/arch/x86/include/asm/mpx.h
@@ -59,7 +59,7 @@ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs);
 int mpx_handle_bd_fault(void);
 static inline int kernel_managing_mpx_tables(struct mm_struct *mm)
 {
-	return (mm->bd_addr != MPX_INVALID_BOUNDS_DIR);
+	return (mm->context.bd_addr != MPX_INVALID_BOUNDS_DIR);
 }
 static inline void mpx_mm_init(struct mm_struct *mm)
 {
@@ -67,7 +67,7 @@ static inline void mpx_mm_init(struct mm_struct *mm)
 	 * NULL is theoretically a valid place to put the bounds
 	 * directory, so point this at an invalid address.
 	 */
-	mm->bd_addr = MPX_INVALID_BOUNDS_DIR;
+	mm->context.bd_addr = MPX_INVALID_BOUNDS_DIR;
 }
 void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
 		      unsigned long start, unsigned long end);
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 1cc82ece9ac1..62b775926045 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -116,8 +116,7 @@ static inline void native_pgd_clear(pgd_t *pgd)
 	native_set_pgd(pgd, native_make_pgd(0));
 }
 
-extern void sync_global_pgds(unsigned long start, unsigned long end,
-			     int removed);
+extern void sync_global_pgds(unsigned long start, unsigned long end);
 
 /*
  * Conversion functions: convert a page and protection to a page entry,
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 6aa741fbe1df..1be64da0384e 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -219,6 +219,24 @@ static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
 	    : "memory");
 }
 
+#define native_cpuid_reg(reg)					\
+static inline unsigned int native_cpuid_##reg(unsigned int op)	\
+{								\
+	unsigned int eax = op, ebx, ecx = 0, edx;		\
+								\
+	native_cpuid(&eax, &ebx, &ecx, &edx);			\
+								\
+	return reg;						\
+}
+
+/*
+ * Native CPUID functions returning a single datum.
+ */
+native_cpuid_reg(eax)
+native_cpuid_reg(ebx)
+native_cpuid_reg(ecx)
+native_cpuid_reg(edx)
+
 static inline void load_cr3(pgd_t *pgdir)
 {
 	write_cr3(__pa(pgdir));
@@ -602,33 +620,69 @@ static __always_inline void cpu_relax(void)
 	rep_nop();
 }
 
-/* Stop speculative execution and prefetching of modified code. */
+/*
+ * This function forces the icache and prefetched instruction stream to
+ * catch up with reality in two very specific cases:
+ *
+ *  a) Text was modified using one virtual address and is about to be executed
+ *     from the same physical page at a different virtual address.
+ *
+ *  b) Text was modified on a different CPU, may subsequently be
+ *     executed on this CPU, and you want to make sure the new version
+ *     gets executed.  This generally means you're calling this in a IPI.
+ *
+ * If you're calling this for a different reason, you're probably doing
+ * it wrong.
+ */
 static inline void sync_core(void)
 {
-	int tmp;
-
-#ifdef CONFIG_M486
 	/*
-	 * Do a CPUID if available, otherwise do a jump.  The jump
-	 * can conveniently enough be the jump around CPUID.
+	 * There are quite a few ways to do this.  IRET-to-self is nice
+	 * because it works on every CPU, at any CPL (so it's compatible
+	 * with paravirtualization), and it never exits to a hypervisor.
+	 * The only down sides are that it's a bit slow (it seems to be
+	 * a bit more than 2x slower than the fastest options) and that
+	 * it unmasks NMIs.  The "push %cs" is needed because, in
+	 * paravirtual environments, __KERNEL_CS may not be a valid CS
+	 * value when we do IRET directly.
+	 *
+	 * In case NMI unmasking or performance ever becomes a problem,
+	 * the next best option appears to be MOV-to-CR2 and an
+	 * unconditional jump.  That sequence also works on all CPUs,
+	 * but it will fault at CPL3 (i.e. Xen PV and lguest).
+	 *
+	 * CPUID is the conventional way, but it's nasty: it doesn't
+	 * exist on some 486-like CPUs, and it usually exits to a
+	 * hypervisor.
+	 *
+	 * Like all of Linux's memory ordering operations, this is a
+	 * compiler barrier as well.
 	 */
-	asm volatile("cmpl %2,%1\n\t"
-		     "jl 1f\n\t"
-		     "cpuid\n"
-		     "1:"
-		     : "=a" (tmp)
-		     : "rm" (boot_cpu_data.cpuid_level), "ri" (0), "0" (1)
-		     : "ebx", "ecx", "edx", "memory");
+	register void *__sp asm(_ASM_SP);
+
+#ifdef CONFIG_X86_32
+	asm volatile (
+		"pushfl\n\t"
+		"pushl %%cs\n\t"
+		"pushl $1f\n\t"
+		"iret\n\t"
+		"1:"
+		: "+r" (__sp) : : "memory");
 #else
-	/*
-	 * CPUID is a barrier to speculative execution.
-	 * Prefetched instructions are automatically
-	 * invalidated when modified.
-	 */
-	asm volatile("cpuid"
-		     : "=a" (tmp)
-		     : "0" (1)
-		     : "ebx", "ecx", "edx", "memory");
+	unsigned int tmp;
+
+	asm volatile (
+		"mov %%ss, %0\n\t"
+		"pushq %q0\n\t"
+		"pushq %%rsp\n\t"
+		"addq $8, (%%rsp)\n\t"
+		"pushfq\n\t"
+		"mov %%cs, %0\n\t"
+		"pushq %q0\n\t"
+		"pushq $1f\n\t"
+		"iretq\n\t"
+		"1:"
+		: "=&r" (tmp), "+r" (__sp) : : "cc", "memory");
 #endif
 }
 
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index 3ad741b84072..448cfe1b48cf 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -14,7 +14,7 @@ static inline struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void)
 #endif
 
 /* some helper functions for xen and kvm pv clock sources */
-cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src);
+u64 pvclock_clocksource_read(struct pvclock_vcpu_time_info *src);
 u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src);
 void pvclock_set_flags(u8 flags);
 unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src);
@@ -87,11 +87,10 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift)
 }
 
 static __always_inline
-cycle_t __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src,
-			      u64 tsc)
+u64 __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src, u64 tsc)
 {
 	u64 delta = tsc - src->tsc_timestamp;
-	cycle_t offset = pvclock_scale_delta(delta, src->tsc_to_system_mul,
+	u64 offset = pvclock_scale_delta(delta, src->tsc_to_system_mul,
 					     src->tsc_shift);
 	return src->system_time + offset;
 }
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index a3269c897ec5..2e41c50ddf47 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -58,7 +58,7 @@ get_frame_pointer(struct task_struct *task, struct pt_regs *regs)
 	if (task == current)
 		return __builtin_frame_address(0);
 
-	return (unsigned long *)((struct inactive_task_frame *)task->thread.sp)->bp;
+	return &((struct inactive_task_frame *)task->thread.sp)->bp;
 }
 #else
 static inline unsigned long *
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 5cb436acd463..fcc5cd387fd1 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -36,7 +36,10 @@ static inline void prepare_switch_to(struct task_struct *prev,
 
 asmlinkage void ret_from_fork(void);
 
-/* data that is pointed to by thread.sp */
+/*
+ * This is the structure pointed to by thread.sp for an inactive task.  The
+ * order of the fields must match the code in __switch_to_asm().
+ */
 struct inactive_task_frame {
 #ifdef CONFIG_X86_64
 	unsigned long r15;
@@ -48,6 +51,11 @@ struct inactive_task_frame {
 	unsigned long di;
 #endif
 	unsigned long bx;
+
+	/*
+	 * These two fields must be together.  They form a stack frame header,
+	 * needed by get_frame_pointer().
+	 */
 	unsigned long bp;
 	unsigned long ret_addr;
 };
diff --git a/arch/x86/include/asm/trace/exceptions.h b/arch/x86/include/asm/trace/exceptions.h
index 2fbc66c7885b..2422b14c50a7 100644
--- a/arch/x86/include/asm/trace/exceptions.h
+++ b/arch/x86/include/asm/trace/exceptions.h
@@ -6,7 +6,7 @@
 
 #include <linux/tracepoint.h>
 
-extern void trace_irq_vector_regfunc(void);
+extern int trace_irq_vector_regfunc(void);
 extern void trace_irq_vector_unregfunc(void);
 
 DECLARE_EVENT_CLASS(x86_exceptions,
diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h
index 38a09a13a9bc..32dd6a9e343c 100644
--- a/arch/x86/include/asm/trace/irq_vectors.h
+++ b/arch/x86/include/asm/trace/irq_vectors.h
@@ -6,7 +6,7 @@
 
 #include <linux/tracepoint.h>
 
-extern void trace_irq_vector_regfunc(void);
+extern int trace_irq_vector_regfunc(void);
 extern void trace_irq_vector_unregfunc(void);
 
 DECLARE_EVENT_CLASS(x86_irq_vector,
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 33b6365c22fe..f5e6f1c417df 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -29,7 +29,7 @@ static inline cycles_t get_cycles(void)
 	return rdtsc();
 }
 
-extern struct system_counterval_t convert_art_to_tsc(cycle_t art);
+extern struct system_counterval_t convert_art_to_tsc(u64 art);
 
 extern void tsc_init(void);
 extern void mark_tsc_unstable(char *reason);
@@ -45,8 +45,17 @@ extern int tsc_clocksource_reliable;
  * Boot-time check whether the TSCs are synchronized across
  * all CPUs/cores:
  */
+#ifdef CONFIG_X86_TSC
+extern bool tsc_store_and_check_tsc_adjust(bool bootcpu);
+extern void tsc_verify_tsc_adjust(bool resume);
 extern void check_tsc_sync_source(int cpu);
 extern void check_tsc_sync_target(void);
+#else
+static inline bool tsc_store_and_check_tsc_adjust(bool bootcpu) { return false; }
+static inline void tsc_verify_tsc_adjust(bool resume) { }
+static inline void check_tsc_sync_source(int cpu) { }
+static inline void check_tsc_sync_target(void) { }
+#endif
 
 extern int notsc_setup(char *);
 extern void tsc_save_sched_clock_state(void);
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
index c5a7f3a930dd..6fa75b17aec3 100644
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -12,7 +12,7 @@ struct unwind_state {
 	struct task_struct *task;
 	int graph_idx;
 #ifdef CONFIG_FRAME_POINTER
-	unsigned long *bp;
+	unsigned long *bp, *orig_sp;
 	struct pt_regs *regs;
 #else
 	unsigned long *sp;
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index 3a01996db58f..022e59714562 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -17,8 +17,8 @@ struct vsyscall_gtod_data {
 	unsigned seq;
 
 	int vclock_mode;
-	cycle_t	cycle_last;
-	cycle_t	mask;
+	u64	cycle_last;
+	u64	mask;
 	u32	mult;
 	u32	shift;
 
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 6ba793178441..7ba7e90a9ad6 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -59,7 +59,7 @@ struct x86_init_irqs {
 
 /**
  * struct x86_init_oem - oem platform specific customizing functions
- * @arch_setup:			platform specific architecure setup
+ * @arch_setup:			platform specific architecture setup
  * @banner:			print a platform specific banner
  */
 struct x86_init_oem {
@@ -165,8 +165,25 @@ struct x86_legacy_devices {
 };
 
 /**
+ * enum x86_legacy_i8042_state - i8042 keyboard controller state
+ * @X86_LEGACY_I8042_PLATFORM_ABSENT: the controller is always absent on
+ *	given platform/subarch.
+ * @X86_LEGACY_I8042_FIRMWARE_ABSENT: firmware reports that the controller
+ *	is absent.
+ * @X86_LEGACY_i8042_EXPECTED_PRESENT: the controller is likely to be
+ *	present, the i8042 driver should probe for controller existence.
+ */
+enum x86_legacy_i8042_state {
+	X86_LEGACY_I8042_PLATFORM_ABSENT,
+	X86_LEGACY_I8042_FIRMWARE_ABSENT,
+	X86_LEGACY_I8042_EXPECTED_PRESENT,
+};
+
+/**
  * struct x86_legacy_features - legacy x86 features
  *
+ * @i8042: indicated if we expect the device to have i8042 controller
+ *	present.
  * @rtc: this device has a CMOS real-time clock present
  * @reserve_bios_regions: boot code will search for the EBDA address and the
  * 	start of the 640k - 1M BIOS region.  If false, the platform must
@@ -175,6 +192,7 @@ struct x86_legacy_devices {
  * 	documentation for further details.
  */
 struct x86_legacy_features {
+	enum x86_legacy_i8042_state i8042;
 	int rtc;
 	int reserve_bios_regions;
 	struct x86_legacy_devices devices;
@@ -188,15 +206,14 @@ struct x86_legacy_features {
  * @set_wallclock:		set time back to HW clock
  * @is_untracked_pat_range	exclude from PAT logic
  * @nmi_init			enable NMI on cpus
- * @i8042_detect		pre-detect if i8042 controller exists
  * @save_sched_clock_state:	save state for sched_clock() on suspend
  * @restore_sched_clock_state:	restore state for sched_clock() on resume
- * @apic_post_init:		adjust apic if neeeded
+ * @apic_post_init:		adjust apic if needed
  * @legacy:			legacy features
  * @set_legacy_features:	override legacy features. Use of this callback
  * 				is highly discouraged. You should only need
  * 				this if your hardware platform requires further
- * 				custom fine tuning far beyong what may be
+ * 				custom fine tuning far beyond what may be
  * 				possible in x86_early_init_platform_quirks() by
  * 				only using the current x86_hardware_subarch
  * 				semantics.
@@ -210,7 +227,6 @@ struct x86_platform_ops {
 	bool (*is_untracked_pat_range)(u64 start, u64 end);
 	void (*nmi_init)(void);
 	unsigned char (*get_nmi_reason)(void);
-	int (*i8042_detect)(void);
 	void (*save_sched_clock_state)(void);
 	void (*restore_sched_clock_state)(void);
 	void (*apic_post_init)(void);
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index f5fb840b43e8..33cbd3db97b9 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -7,7 +7,7 @@
 #include <linux/pfn.h>
 #include <linux/mm.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>