summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig13
-rw-r--r--arch/x86/Kconfig.debug4
-rw-r--r--arch/x86/crypto/sha512-avx2-asm.S2
-rw-r--r--arch/x86/include/asm/intel-mid.h3
-rw-r--r--arch/x86/include/asm/lguest.h7
-rw-r--r--arch/x86/include/asm/serial.h8
-rw-r--r--arch/x86/include/uapi/asm/e820.h10
-rw-r--r--arch/x86/include/uapi/asm/hyperv.h2
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h8
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/acpi/boot.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c4
-rw-r--r--arch/x86/kernel/cpu/perf_event.h18
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c8
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_pt.c33
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_rapl.c94
-rw-r--r--arch/x86/kernel/e820.c26
-rw-r--r--arch/x86/kernel/early_printk.c6
-rw-r--r--arch/x86/kernel/i387.c8
-rw-r--r--arch/x86/kernel/pmem.c53
-rw-r--r--arch/x86/kernel/signal.c22
-rw-r--r--arch/x86/kvm/lapic.c11
-rw-r--r--arch/x86/kvm/mmu.c20
-rw-r--r--arch/x86/kvm/vmx.c12
-rw-r--r--arch/x86/kvm/x86.c10
-rw-r--r--arch/x86/lguest/boot.c7
-rw-r--r--arch/x86/lguest/head_32.S30
-rw-r--r--arch/x86/lib/usercopy_64.c2
-rw-r--r--arch/x86/platform/intel-mid/Makefile1
-rw-r--r--arch/x86/platform/intel-mid/early_printk_intel_mid.c112
31 files changed, 287 insertions, 252 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d43e7e1c784b..226d5696e1d1 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -22,6 +22,7 @@ config X86_64
### Arch settings
config X86
def_bool y
+ select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI
select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
select ARCH_HAS_FAST_MULTIPLIER
@@ -178,7 +179,7 @@ config SBUS
config NEED_DMA_MAP_STATE
def_bool y
- depends on X86_64 || INTEL_IOMMU || DMA_API_DEBUG
+ depends on X86_64 || INTEL_IOMMU || DMA_API_DEBUG || SWIOTLB
config NEED_SG_DMA_LENGTH
def_bool y
@@ -1421,6 +1422,16 @@ config ILLEGAL_POINTER_VALUE
source "mm/Kconfig"
+config X86_PMEM_LEGACY
+ bool "Support non-standard NVDIMMs and ADR protected memory"
+ help
+ Treat memory marked using the non-standard e820 type of 12 as used
+ by the Intel Sandy Bridge-EP reference BIOS as protected memory.
+ The kernel will offer these regions to the 'pmem' driver so
+ they can be used for persistent storage.
+
+ Say Y if unsure.
+
config HIGHPTE
bool "Allocate 3rd-level pagetables from highmem"
depends on HIGHMEM
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 20028da8ae18..72484a645f05 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -43,10 +43,6 @@ config EARLY_PRINTK
with klogd/syslogd or the X server. You should normally N here,
unless you want to debug such a crash.
-config EARLY_PRINTK_INTEL_MID
- bool "Early printk for Intel MID platform support"
- depends on EARLY_PRINTK && X86_INTEL_MID
-
config EARLY_PRINTK_DBGP
bool "Early printk via EHCI debug port"
depends on EARLY_PRINTK && PCI
diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S
index a4771dcd1fcf..1f20b35d8573 100644
--- a/arch/x86/crypto/sha512-avx2-asm.S
+++ b/arch/x86/crypto/sha512-avx2-asm.S
@@ -79,7 +79,7 @@ NUM_BLKS = %rdx
c = %rcx
d = %r8
e = %rdx
-y3 = %rdi
+y3 = %rsi
TBL = %rbp
diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h
index 705d35708a50..7c5af123bdbd 100644
--- a/arch/x86/include/asm/intel-mid.h
+++ b/arch/x86/include/asm/intel-mid.h
@@ -136,9 +136,6 @@ extern enum intel_mid_timer_options intel_mid_timer_options;
#define SFI_MTMR_MAX_NUM 8
#define SFI_MRTC_MAX 8
-extern struct console early_hsu_console;
-extern void hsu_early_console_init(const char *);
-
extern void intel_scu_devices_create(void);
extern void intel_scu_devices_destroy(void);
diff --git a/arch/x86/include/asm/lguest.h b/arch/x86/include/asm/lguest.h
index e2d4a4afa8c3..3bbc07a57a31 100644
--- a/arch/x86/include/asm/lguest.h
+++ b/arch/x86/include/asm/lguest.h
@@ -20,13 +20,10 @@ extern unsigned long switcher_addr;
/* Found in switcher.S */
extern unsigned long default_idt_entries[];
-/* Declarations for definitions in lguest_guest.S */
-extern char lguest_noirq_start[], lguest_noirq_end[];
+/* Declarations for definitions in arch/x86/lguest/head_32.S */
+extern char lguest_noirq_iret[];
extern const char lgstart_cli[], lgend_cli[];
-extern const char lgstart_sti[], lgend_sti[];
-extern const char lgstart_popf[], lgend_popf[];
extern const char lgstart_pushf[], lgend_pushf[];
-extern const char lgstart_iret[], lgend_iret[];
extern void lguest_iret(void);
extern void lguest_init(void);
diff --git a/arch/x86/include/asm/serial.h b/arch/x86/include/asm/serial.h
index 460b84f64556..8378b8c9109c 100644
--- a/arch/x86/include/asm/serial.h
+++ b/arch/x86/include/asm/serial.h
@@ -12,11 +12,11 @@
/* Standard COM flags (except for COM4, because of the 8514 problem) */
#ifdef CONFIG_SERIAL_DETECT_IRQ
-# define STD_COMX_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST | ASYNC_AUTO_IRQ)
-# define STD_COM4_FLAGS (ASYNC_BOOT_AUTOCONF | 0 | ASYNC_AUTO_IRQ)
+# define STD_COMX_FLAGS (UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_AUTO_IRQ)
+# define STD_COM4_FLAGS (UPF_BOOT_AUTOCONF | 0 | UPF_AUTO_IRQ)
#else
-# define STD_COMX_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST | 0 )
-# define STD_COM4_FLAGS (ASYNC_BOOT_AUTOCONF | 0 | 0 )
+# define STD_COMX_FLAGS (UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | 0 )
+# define STD_COM4_FLAGS (UPF_BOOT_AUTOCONF | 0 | 0 )
#endif
#define SERIAL_PORT_DFNS \
diff --git a/arch/x86/include/uapi/asm/e820.h b/arch/x86/include/uapi/asm/e820.h
index d993e33f5236..960a8a9dc4ab 100644
--- a/arch/x86/include/uapi/asm/e820.h
+++ b/arch/x86/include/uapi/asm/e820.h
@@ -33,6 +33,16 @@
#define E820_NVS 4
#define E820_UNUSABLE 5
+/*
+ * This is a non-standardized way to represent ADR or NVDIMM regions that
+ * persist over a reboot. The kernel will ignore their special capabilities
+ * unless the CONFIG_X86_PMEM_LEGACY=y option is set.
+ *
+ * ( Note that older platforms also used 6 for the same type of memory,
+ * but newer versions switched to 12 as 6 was assigned differently. Some
+ * time they will learn... )
+ */
+#define E820_PRAM 12
/*
* reserved RAM used by kernel itself
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index 90c458e66e13..ce6068dbcfbc 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -225,6 +225,8 @@
#define HV_STATUS_INVALID_HYPERCALL_CODE 2
#define HV_STATUS_INVALID_HYPERCALL_INPUT 3
#define HV_STATUS_INVALID_ALIGNMENT 4
+#define HV_STATUS_INSUFFICIENT_MEMORY 11
+#define HV_STATUS_INVALID_CONNECTION_ID 18
#define HV_STATUS_INSUFFICIENT_BUFFERS 19
typedef struct _HV_REFERENCE_TSC_PAGE {
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index 1a4eae695ca8..c469490db4a8 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -61,6 +61,9 @@
#define MSR_OFFCORE_RSP_1 0x000001a7
#define MSR_NHM_TURBO_RATIO_LIMIT 0x000001ad
#define MSR_IVT_TURBO_RATIO_LIMIT 0x000001ae
+#define MSR_TURBO_RATIO_LIMIT 0x000001ad
+#define MSR_TURBO_RATIO_LIMIT1 0x000001ae
+#define MSR_TURBO_RATIO_LIMIT2 0x000001af
#define MSR_LBR_SELECT 0x000001c8
#define MSR_LBR_TOS 0x000001c9
@@ -165,6 +168,11 @@
#define MSR_PP1_ENERGY_STATUS 0x00000641
#define MSR_PP1_POLICY 0x00000642
+#define MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658
+#define MSR_PKG_ANY_CORE_C0_RES 0x00000659
+#define MSR_PKG_ANY_GFXE_C0_RES 0x0000065A
+#define MSR_PKG_BOTH_CORE_GFXE_C0_RES 0x0000065B
+
#define MSR_CORE_C1_RES 0x00000660
#define MSR_CC6_DEMOTION_POLICY_CONFIG 0x00000668
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index c887cd944f0c..9bcd0b56ca17 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -95,6 +95,7 @@ obj-$(CONFIG_KVM_GUEST) += kvm.o kvmclock.o
obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o
+obj-$(CONFIG_X86_PMEM_LEGACY) += pmem.o
obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 803b684676ff..dbe76a14c3c9 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -757,7 +757,7 @@ static int _acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu)
}
/* wrapper to silence section mismatch warning */
-int __ref acpi_map_cpu(acpi_handle handle, int physid, int *pcpu)
+int __ref acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu)
{
return _acpi_map_lsapic(handle, physid, pcpu);
}
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index d9d0bd2faaf4..ab3219b3fbda 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -171,8 +171,8 @@ update_clusterinfo(struct notifier_block *nfb, unsigned long action, void *hcpu)
for_each_online_cpu(cpu) {
if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu))
continue;
- __cpu_clear(this_cpu, per_cpu(cpus_in_cluster, cpu));
- __cpu_clear(cpu, per_cpu(cpus_in_cluster, this_cpu));
+ cpumask_clear_cpu(this_cpu, per_cpu(cpus_in_cluster, cpu));
+ cpumask_clear_cpu(cpu, per_cpu(cpus_in_cluster, this_cpu));
}
free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu));
free_cpumask_var(per_cpu(ipi_mask, this_cpu));
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 329f0356ad4a..6ac5cb7a9e14 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -65,15 +65,15 @@ struct event_constraint {
/*
* struct hw_perf_event.flags flags
*/
-#define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */
-#define PERF_X86_EVENT_PEBS_ST 0x2 /* st data address sampling */
-#define PERF_X86_EVENT_PEBS_ST_HSW 0x4 /* haswell style datala, store */
-#define PERF_X86_EVENT_COMMITTED 0x8 /* event passed commit_txn */
-#define PERF_X86_EVENT_PEBS_LD_HSW 0x10 /* haswell style datala, load */
-#define PERF_X86_EVENT_PEBS_NA_HSW 0x20 /* haswell style datala, unknown */
-#define PERF_X86_EVENT_EXCL 0x40 /* HT exclusivity on counter */
-#define PERF_X86_EVENT_DYNAMIC 0x80 /* dynamic alloc'd constraint */
-#define PERF_X86_EVENT_RDPMC_ALLOWED 0x40 /* grant rdpmc permission */
+#define PERF_X86_EVENT_PEBS_LDLAT 0x0001 /* ld+ldlat data address sampling */
+#define PERF_X86_EVENT_PEBS_ST 0x0002 /* st data address sampling */
+#define PERF_X86_EVENT_PEBS_ST_HSW 0x0004 /* haswell style datala, store */
+#define PERF_X86_EVENT_COMMITTED 0x0008 /* event passed commit_txn */
+#define PERF_X86_EVENT_PEBS_LD_HSW 0x0010 /* haswell style datala, load */
+#define PERF_X86_EVENT_PEBS_NA_HSW 0x0020 /* haswell style datala, unknown */
+#define PERF_X86_EVENT_EXCL 0x0040 /* HT exclusivity on counter */
+#define PERF_X86_EVENT_DYNAMIC 0x0080 /* dynamic alloc'd constraint */
+#define PERF_X86_EVENT_RDPMC_ALLOWED 0x0100 /* grant rdpmc permission */
struct amd_nb {
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 9da2400c2ec3..219d3fb423a1 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -3275,7 +3275,7 @@ __init int intel_pmu_init(void)
hw_cache_extra_regs[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = HSW_DEMAND_WRITE|
BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM;
- intel_pmu_lbr_init_snb();
+ intel_pmu_lbr_init_hsw();
x86_pmu.event_constraints = intel_bdw_event_constraints;
x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index ca69ea56c712..813f75d71175 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -558,6 +558,8 @@ struct event_constraint intel_core2_pebs_event_constraints[] = {
INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
+ /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01),
EVENT_CONSTRAINT_END
};
@@ -565,6 +567,8 @@ struct event_constraint intel_atom_pebs_event_constraints[] = {
INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
+ /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01),
EVENT_CONSTRAINT_END
};
@@ -588,6 +592,8 @@ struct event_constraint intel_nehalem_pebs_event_constraints[] = {
INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
+ /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
EVENT_CONSTRAINT_END
};
@@ -603,6 +609,8 @@ struct event_constraint intel_westmere_pebs_event_constraints[] = {
INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
+ /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
EVENT_CONSTRAINT_END
};
diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/kernel/cpu/perf_event_intel_pt.c
index f2770641c0fd..ffe666c2c6b5 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_pt.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_pt.c
@@ -988,39 +988,36 @@ static int pt_event_add(struct perf_event *event, int mode)
int ret = -EBUSY;
if (pt->handle.event)
- goto out;
+ goto fail;
buf = perf_aux_output_begin(&pt->handle, event);
- if (!buf) {
- ret = -EINVAL;
- goto out;
- }
+ ret = -EINVAL;
+ if (!buf)
+ goto fail_stop;
pt_buffer_reset_offsets(buf, pt->handle.head);
if (!buf->snapshot) {
ret = pt_buffer_reset_markers(buf, &pt->handle);
- if (ret) {
- perf_aux_output_end(&pt->handle, 0, true);
- goto out;
- }
+ if (ret)
+ goto fail_end_stop;
}
if (mode & PERF_EF_START) {
pt_event_start(event, 0);
- if (hwc->state == PERF_HES_STOPPED) {
- pt_event_del(event, 0);
- ret = -EBUSY;
- }
+ ret = -EBUSY;
+ if (hwc->state == PERF_HES_STOPPED)
+ goto fail_end_stop;
} else {
hwc->state = PERF_HES_STOPPED;
}
- ret = 0;
-out:
-
- if (ret)
- hwc->state = PERF_HES_STOPPED;
+ return 0;
+fail_end_stop:
+ perf_aux_output_end(&pt->handle, 0, true);
+fail_stop:
+ hwc->state = PERF_HES_STOPPED;
+fail:
return ret;
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
index c4bb8b8e5017..999289b94025 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
@@ -62,6 +62,14 @@
#define RAPL_IDX_PP1_NRG_STAT 3 /* gpu */
#define INTEL_RAPL_PP1 0x4 /* pseudo-encoding */
+#define NR_RAPL_DOMAINS 0x4
+static const char *rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
+ "pp0-core",
+ "package",
+ "dram",
+ "pp1-gpu",
+};
+
/* Clients have PP0, PKG */
#define RAPL_IDX_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\
1<<RAPL_IDX_PKG_NRG_STAT|\
@@ -112,7 +120,6 @@ static struct perf_pmu_events_attr event_attr_##v = { \
struct rapl_pmu {
spinlock_t lock;
- int hw_unit; /* 1/2^hw_unit Joule */
int n_active; /* number of active events */
struct list_head active_list;
struct pmu *pmu; /* pointer to rapl_pmu_class */
@@ -120,6 +127,7 @@ struct rapl_pmu {
struct hrtimer hrtimer;
};
+static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly; /* 1/2^hw_unit Joule */
static struct pmu rapl_pmu_class;
static cpumask_t rapl_cpu_mask;
static int rapl_cntr_mask;
@@ -127,6 +135,7 @@ static int rapl_cntr_mask;
static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu);
static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu_to_free);
+static struct x86_pmu_quirk *rapl_quirks;
static inline u64 rapl_read_counter(struct perf_event *event)
{
u64 raw;
@@ -134,15 +143,28 @@ static inline u64 rapl_read_counter(struct perf_event *event)
return raw;
}
-static inline u64 rapl_scale(u64 v)
+#define rapl_add_quirk(func_) \
+do { \
+ static struct x86_pmu_quirk __quirk __initdata = { \
+ .func = func_, \
+ }; \
+ __quirk.next = rapl_quirks; \
+ rapl_quirks = &__quirk; \
+} while (0)
+
+static inline u64 rapl_scale(u64 v, int cfg)
{
+ if (cfg > NR_RAPL_DOMAINS) {
+ pr_warn("invalid domain %d, failed to scale data\n", cfg);
+ return v;
+ }
/*
* scale delta to smallest unit (1/2^32)
* users must then scale back: count * 1/(1e9*2^32) to get Joules
* or use ldexp(count, -32).
* Watts = Joules/Time delta
*/
- return v << (32 - __this_cpu_read(rapl_pmu)->hw_unit);
+ return v << (32 - rapl_hw_unit[cfg - 1]);
}
static u64 rapl_event_update(struct perf_event *event)
@@ -173,7 +195,7 @@ again:
delta = (new_raw_count << shift) - (prev_raw_count << shift);
delta >>= shift;
- sdelta = rapl_scale(delta);
+ sdelta = rapl_scale(delta, event->hw.config);
local64_add(sdelta, &event->count);
@@ -546,12 +568,22 @@ static void rapl_cpu_init(int cpu)
cpumask_set_cpu(cpu, &rapl_cpu_mask);
}
+static __init void rapl_hsw_server_quirk(void)
+{
+ /*
+ * DRAM domain on HSW server has fixed energy unit which can be
+ * different than the unit from power unit MSR.
+ * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, V2
+ * of 2. Datasheet, September 2014, Reference Number: 330784-001 "
+ */
+ rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16;
+}
+
static int rapl_cpu_prepare(int cpu)
{
struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
int phys_id = topology_physical_package_id(cpu);
u64 ms;
- u64 msr_rapl_power_unit_bits;
if (pmu)
return 0;
@@ -559,24 +591,13 @@ static int rapl_cpu_prepare(int cpu)
if (phys_id < 0)
return -1;
- /* protect rdmsrl() to handle virtualization */
- if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &msr_rapl_power_unit_bits))
- return -1;
-
pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
if (!pmu)
return -1;
-
spin_lock_init(&pmu->lock);
INIT_LIST_HEAD(&pmu->active_list);
- /*
- * grab power unit as: 1/2^unit Joules
- *
- * we cache in local PMU instance
- */
- pmu->hw_unit = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
pmu->pmu = &rapl_pmu_class;
/*
@@ -586,8 +607,8 @@ static int rapl_cpu_prepare(int cpu)
* divide interval by 2 to avoid lockstep (2 * 100)
* if hw unit is 32, then we use 2 ms 1/200/2
*/
- if (pmu->hw_unit < 32)
- ms = (1000 / (2 * 100)) * (1ULL << (32 - pmu->hw_unit - 1));
+ if (rapl_hw_unit[0] < 32)
+ ms = (1000 / (2 * 100)) * (1ULL << (32 - rapl_hw_unit[0] - 1));
else
ms = 2;
@@ -655,6 +676,20 @@ static int rapl_cpu_notifier(struct notifier_block *self,
return NOTIFY_OK;
}
+static int rapl_check_hw_unit(void)
+{
+ u64 msr_rapl_power_unit_bits;
+ int i;
+
+ /* protect rdmsrl() to handle virtualization */
+ if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &msr_rapl_power_unit_bits))
+ return -1;
+ for (i = 0; i < NR_RAPL_DOMAINS; i++)
+ rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
+
+ return 0;
+}
+
static const struct x86_cpu_id rapl_cpu_match[] = {
[0] = { .vendor = X86_VENDOR_INTEL, .family = 6 },
[1] = {},
@@ -664,6 +699,8 @@ static int __init rapl_pmu_init(void)
{
struct rapl_pmu *pmu;
int cpu, ret;
+ struct x86_pmu_quirk *quirk;
+ int i;
/*
* check for Intel processor family 6
@@ -678,6 +715,11 @@ static int __init rapl_pmu_init(void)
rapl_cntr_mask = RAPL_IDX_CLN;
rapl_pmu_events_group.attrs = rapl_events_cln_attr;
break;
+ case 63: /* Haswell-Server */
+ rapl_add_quirk(rapl_hsw_server_quirk);
+ rapl_cntr_mask = RAPL_IDX_SRV;
+ rapl_pmu_events_group.attrs = rapl_events_srv_attr;
+ break;
case 60: /* Haswell */
case 69: /* Haswell-Celeron */
rapl_cntr_mask = RAPL_IDX_HSW;
@@ -693,7 +735,13 @@ static int __init rapl_pmu_init(void)
/* unsupported */
return 0;
}
+ ret = rapl_check_hw_unit();
+ if (ret)
+ return ret;
+ /* run cpu model quirks */
+ for (quirk = rapl_quirks; quirk; quirk = quirk->next)
+ quirk->func();
cpu_notifier_register_begin();
for_each_online_cpu(cpu) {
@@ -714,14 +762,18 @@ static int __init rapl_pmu_init(void)
pmu = __this_cpu_read(rapl_pmu);
- pr_info("RAPL PMU detected, hw unit 2^-%d Joules,"
+ pr_info("RAPL PMU detected,"
" API unit is 2^-32 Joules,"
" %d fixed counters"
" %llu ms ovfl timer\n",
- pmu->hw_unit,
hweight32(rapl_cntr_mask),
ktime_to_ms(pmu->timer_interval));
-
+ for (i = 0; i < NR_RAPL_DOMAINS; i++) {
+ if (rapl_cntr_mask & (1 << i)) {
+ pr_info("hw unit of domain %s 2^-%d Joules\n",
+ rapl_domain_names[i], rapl_hw_unit[i]);
+ }
+ }
out:
cpu_notifier_register_done();
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 7d46bb260334..e2ce85db2283 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -149,6 +149,9 @@ static void __init e820_print_type(u32 type)
case E820_UNUSABLE:
printk(KERN_CONT "unusable");
break;
+ case E820_PRAM:
+ printk(KERN_CONT "persistent (type %u)", type);
+ break;
default:
printk(KERN_CONT "type %u", type);
break;
@@ -343,7 +346,7 @@ int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map,
* continue building up new bios map based on this
* information
*/
- if (current_type != last_type) {
+ if (current_type != last_type || current_type == E820_PRAM) {
if (last_type != 0) {
new_bios[new_bios_entry].size =
change_point[chgidx]->addr - last_addr;
@@ -688,6 +691,7 @@ void __init e820_mark_nosave_regions(unsigned long limit_pfn)
register_nosave_region(pfn, PFN_UP(ei->addr));
pfn = PFN_DOWN(ei->addr + ei->size);
+
if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN)
register_nosave_region(PFN_UP(ei->addr), pfn);
@@ -748,7 +752,7 @@ u64 __init early_reserve_e820(u64 size, u64 align)
/*
* Find the highest page frame number we have available
*/
-static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
+static unsigned long __init e820_end_pfn(unsigned long limit_pfn)
{
int i;
unsigned long last_pfn = 0;
@@ -759,7 +763,11 @@ static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
unsigned long start_pfn;
unsigned long end_pfn;
- if (ei->type != type)
+ /*
+ * Persistent memory is accounted as ram for purposes of
+ * establishing max_pfn and mem_map.
+ */
+ if (ei->type != E820_RAM && ei->type != E820_PRAM)
continue;
start_pfn = ei->addr >> PAGE_SHIFT;
@@ -784,12 +792,12 @@ static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
}
unsigned long __init e820_end_of_ram_pfn(void)
{
- return e820_end_pfn(MAX_ARCH_PFN, E820_RAM);
+ return e820_end_pfn(MAX_ARCH_PFN);
}
unsigned long __init e820_end_of_low_ram_pfn(void)
{
- return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM);
+ return e820_end_pfn(1UL << (32-PAGE_SHIFT));
}
static void early_panic(char *msg)
@@ -866,6 +874,9 @@ static int __init parse_memmap_one(char *p)
} else if (*p == '$') {
start_at = memparse(p+1, &p);
e820_add_region(start_at, mem_size, E820_RESERVED);
+ } else if (*p == '!') {
+ start_at = memparse(p+1, &p);
+ e820_add_region(start_at, mem_size, E820_PRAM);
} else
e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
@@ -907,6 +918,7 @@ static inline const char *e820_type_to_string(int e820_type)
case E820_ACPI: return "ACPI Tables";
case E820_NVS: return "ACPI Non-volatile Storage";
case E820_UNUSABLE: return "Unusable memory";
+ case E820_PRAM: return "Persistent RAM";
default: return "reserved";
}
}
@@ -940,7 +952,9 @@ void __init e820_reserve_resources(void)
* pci device BAR resource and insert them later in
* pcibios_resource_survey()
*/
- if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20)) {
+ if (((e820.map[i].type != E820_RESERVED) &&
+ (e820.map[i].type != E820_PRAM)) ||
+ res->start < (1ULL<<20)) {
res->flags |= IORESOURCE_BUSY;
insert_resource(&iomem_resource, res);
}
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 49ff55ef9b26..89427d8d4fc5 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -375,12 +375,6 @@ static int __init setup_early_printk(char *buf)
if (!strncmp(buf, "xen", 3))
early_console_register(&xenboot_console, keep);
#endif
-#ifdef CONFIG_EARLY_PRINTK_INTEL_MID
- if (!strncmp(buf, "hsu", 3)) {
- hsu_early_console_init(buf + 3);
- early_console_register(&early_hsu_console, keep);
- }
-#endif
#ifdef CONFIG_EARLY_PRINTK_EFI
if (!strncmp(buf, "efi", 3))
early_console_register(&early_efi_console, keep);
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 367f39d35e9c..009183276bb7 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -341,7 +341,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
- struct xsave_struct *xsave = &target->thread.fpu.state->xsave;
+ struct xsave_struct *xsave;
int ret;
if (!cpu_has_xsave)
@@ -351,6 +351,8 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
if (ret)
return ret;
+ xsave = &target->thread.fpu.state->xsave;
+
/*
* Copy the 48bytes defined by the software first into the xstate
* memory layout in the thread struct, so that we can copy the entire
@@ -369,7 +371,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- struct xsave_struct *xsave = &target->thread.fpu.state->xsave;
+ struct xsave_struct *xsave;
int ret;
if (!cpu_has_xsave)
@@ -379,6 +381,8 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
if (ret)
return ret;
+ xsave = &target->thread.fpu.state->xsave;
+
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
/*
* mxcsr reserved bits must be masked to zero for security reasons.
diff --git a/arch/x86/kernel/pmem.c b/arch/x86/kernel/pmem.c
new file mode 100644
index 000000000000..3420c874ddc5
--- /dev/null
+++ b/arch/x86/kernel/pmem.c
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2015, Christoph Hellwig.
+ */
+#include <linux/memblock.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <asm/e820.h>
+#include <asm/page_types.h>
+#include <asm/setup.h>
+
+static __init void register_pmem_device(struct resource *res)
+{
+ struct platform_device *pdev;
+ int error;
+
+ pdev = platform_device_alloc("pmem", PLATFORM_DEVID_AUTO);
+ if (!pdev)
+ return;
+
+ error = platform_device_add_resources(pdev, res, 1);
+ if (error)
+ goto out_put_pdev;
+
+ error = platform_device_add(pdev);
+ if (error)
+ goto out_put_pdev;
+ return;
+
+out_put_pdev:
+ dev_warn(&pdev->dev, "failed to add 'pmem' (persistent memory) device!\n");
+ platform_device_put(pdev);
+}
+
+static __init int register_pmem_devices(void)
+{
+ int i;
+
+ for (i = 0; i < e820.nr_map; i++) {
+ struct e820entry *ei = &e820.map[i];
+
+ if (ei->type == E820_PRAM) {
+ struct resource res = {
+ .flags = IORESOURCE_MEM,
+ .start = ei->addr,
+ .end = ei->addr + ei->size - 1,
+ };
+ register_pmem_device(&res);
+ }
+ }
+
+ return 0;
+}
+device_initcall(register_pmem_devices);
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index f9804080ccb3..1ea14fd53933 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -616,7 +616,8 @@ setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
static void
handle_signal(struct ksignal *ksig, struct pt_regs *regs)
{
- bool failed;
+ bool stepping, failed;
+
/* Are we from a system call? */
if (syscall_get_nr(current, regs) >= 0) {
/* If so, check system call restarting.. */
@@ -640,12 +641,13 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
}
/*
- * If TF is set due to a debugger (TIF_FORCED_TF), clear the TF
- * flag so that register information in the sigcontext is correct.
+ * If TF is set due to a debugger (TIF_FORCED_TF), clear TF now
+ * so that register information in the sigcontext is correct and
+ * then notify the tracer before entering the signal handler.
*/
- if (unlikely(regs->flags & X86_EFLAGS_TF) &&
- likely(test_and_clear_thread_flag(TIF_FORCED_TF)))
- regs->flags &= ~X86_EFLAGS_TF;
+ stepping = test_thread_flag(TIF_SINGLESTEP);
+ if (stepping)
+ user_disable_single_step(current);
failed = (setup_rt_frame(ksig, regs) < 0);
if (!failed) {
@@ -656,10 +658,8 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
* it might disable possible debug exception from the
* signal handler.
*
- * Clear TF when entering the signal handler, but
- * notify any tracer that was single-stepping it.
- * The tracer may want to single-step inside the
- * handler too.
+ * Clear TF for the case when it wasn't set by debugger to
+ * avoid the recursive send_sigtrap() in SIGTRAP handler.
*/
regs->flags &= ~(X86_EFLAGS_DF|X86_EFLAGS_RF|X86_EFLAGS_TF);
/*
@@ -668,7 +668,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
if (used_math())
fpu_reset_state(current);
}
- signal_setup_done(failed, ksig, test_thread_flag(TIF_SINGLESTEP));
+ signal_setup_done(failed, ksig, stepping);
}
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index d67206a7b99a..629af0f1c5c4 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -683,8 +683,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
unsigned long bitmap = 1;
struct kvm_lapic **dst;
int i;
- bool ret = false;
- bool x2apic_ipi = src && apic_x2apic_mode(src);
+ bool ret, x2apic_ipi;
*r = -1;
@@ -696,16 +695,18 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
if (irq->shorthand)
return false;
+ x2apic_ipi = src && apic_x2apic_mode(src);
if (irq->dest_id == (x2apic_ipi ? X2APIC_BROADCAST : APIC_BROADCAST))
return false;
+ ret = true;
rcu_read_lock();
map = rcu_dereference(kvm->arch.apic_map);
- if (!map)
+ if (!map) {
+ ret = false;
goto out;
-
- ret = true;
+ }
if (irq->dest_mode == APIC_DEST_PHYSICAL) {
if (irq->dest_id >= ARRAY_SIZE(map->phys_map))
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 146f295ee322..d43867c33bc4 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4481,9 +4481,11 @@ static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
pfn = spte_to_pfn(*sptep);
/*
- * Only EPT supported for now; otherwise, one would need to
- * find out efficiently whether the guest page tables are
- * also using huge pages.
+ * We cannot do huge page mapping for indirect shadow pages,
+ * which are found on the last rmap (level = 1) when not using
+ * tdp; such shadow pages are synced with the page table in
+ * the guest, and the guest page table is using 4K page size
+ * mapping if the indirect sp has level = 1.
*/
if (sp->role.direct &&
!kvm_is_reserved_pfn(pfn) &&
@@ -4504,19 +4506,12 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
bool flush = false;
unsigned long *rmapp;
unsigned long last_index, index;
- gfn_t gfn_start, gfn_end;
spin_lock(&kvm->mmu_lock);
- gfn_start = memslot->base_gfn;
- gfn_end = memslot->base_gfn + memslot->npages - 1;
-
- if (gfn_start >= gfn_end)
- goto out;
-
rmapp = memslot->arch.rmap[0];
- last_index = gfn_to_index(gfn_end, memslot->base_gfn,
- PT_PAGE_TABLE_LEVEL);
+ last_index = gfn_to_index(memslot->base_gfn + memslot->npages - 1,
+ memslot->base_gfn, PT_PAGE_TABLE_LEVEL);
for (index = 0; index <= last_index; ++index, ++rmapp) {
if (*rmapp)
@@ -4534,7 +4529,6 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
if (flush)
kvm_flush_remote_tlbs(kvm);
-out:
spin_unlock(&kvm->mmu_lock);
}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f5e8dce8046c..f7b61687bd79 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3622,8 +3622,16 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
- unsigned long hw_cr4 = cr4 | (to_vmx(vcpu)->rmode.vm86_active ?
- KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
+ /*
+ * Pass through host's Machine Check Enable value to hw_cr4, which
+ * is in force while we are in guest mode. Do not let guests control
+ * this bit, even if host CR4.MCE == 0.
+ */
+ unsigned long hw_cr4 =
+ (cr4_read_shadow() & X86_CR4_MCE) |
+ (cr4 & ~X86_CR4_MCE) |
+ (to_vmx(vcpu)->rmode.vm86_active ?
+ KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
if (cr4 & X86_CR4_VMXE) {
/*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e1a81267f3f6..ed31c31b2485 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5799,7 +5799,6 @@ int kvm_arch_init(void *opaque)
kvm_set_mmio_spte_mask();
kvm_x86_ops = ops;
- kvm_init_msr_list();
kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
PT_DIRTY_MASK, PT64_NX_MASK, 0);
@@ -7253,7 +7252,14 @@ void kvm_arch_hardware_disable(void)
int kvm_arch_hardware_setup(void)
{
- return kvm_x86_ops->hardware_setup();
+ int r;
+
+ r = kvm_x86_ops->hardware_setup();
+ if (r != 0)
+ return r;
+
+ kvm_init_msr_list();
+ return 0;
}
void kvm_arch_hardware_unsetup(void)
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 717908b16037..8f9a133cc099 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -87,8 +87,7 @@
struct lguest_data lguest_data = {
.hcall_status = { [0 ... LHCALL_RING_SIZE-1] = 0xFF },
- .noirq_start = (u32)lguest_noirq_start,
- .noirq_end = (u32)lguest_noirq_end,
+ .noirq_iret = (u32)lguest_noirq_iret,
.kernel_address = PAGE_OFFSET,
.blocked_interrupts = { 1 }, /* Block timer interrupts */
.syscall_vec = SYSCALL_VECTOR,
@@ -262,7 +261,7 @@ PV_CALLEE_SAVE_REGS_THUNK(lguest_save_fl);
PV_CALLEE_SAVE_REGS_THUNK(lguest_irq_disable);
/*:*/
-/* These are in i386_head.S */
+/* These are in head_32.S */
extern void lg_irq_enable(void);
extern void lg_restore_fl(unsigned long flags);
@@ -1368,7 +1367,7 @@ static void lguest_restart(char *reason)
* fit comfortably.
*
* First we need assembly templates of each of the patchable Guest operations,
- * and these are in i386_head.S.
+ * and these are in head_32.S.
*/
/*G:060 We construct a table from the assembler templates: */
diff --git a/arch/x86/lguest/head_32.S b/arch/x86/lguest/head_32.S
index 6ddfe4fc23c3..d5ae63f5ec5d 100644
--- a/arch/x86/lguest/head_32.S
+++ b/arch/x86/lguest/head_32.S
@@ -84,7 +84,7 @@ ENTRY(lg_irq_enable)
* set lguest_data.irq_pending to X86_EFLAGS_IF. If it's not zero, we
* jump to send_interrupts, otherwise we're done.
*/
- testl $0, lguest_data+LGUEST_DATA_irq_pending
+ cmpl $0, lguest_data+LGUEST_DATA_irq_pending
jnz send_interrupts
/*
* One cool thing about x86 is that you can do many things without using
@@ -133,9 +133,8 @@ ENTRY(lg_restore_fl)
ret
/*:*/
-/* These demark the EIP range where host should never deliver interrupts. */
-.global lguest_noirq_start
-.global lguest_noirq_end
+/* These demark the EIP where host should never deliver interrupts. */
+.global lguest_noirq_iret
/*M:004
* When the Host reflects a trap or injects an interrupt into the Guest, it
@@ -168,29 +167,26 @@ ENTRY(lg_restore_fl)
* So we have to copy eflags from the stack to lguest_data.irq_enabled before
* we do the "iret".
*
- * There are two problems with this: firstly, we need to use a register to do
- * the copy and secondly, the whole thing needs to be atomic. The first
- * problem is easy to solve: push %eax on the stack so we can use it, and then
- * restore it at the end just before the real "iret".
+ * There are two problems with this: firstly, we can't clobber any registers
+ * and secondly, the whole thing needs to be atomic. The first problem
+ * is solved by using "push memory"/"pop memory" instruction pair for copying.
*
* The second is harder: copying eflags to lguest_data.irq_enabled will turn
* interrupts on before we're finished, so we could be interrupted before we
- * return to userspace or wherever. Our solution to this is to surround the
- * code with lguest_noirq_start: and lguest_noirq_end: labels. We tell the
+ * return to userspace or wherever. Our solution to this is to tell the
* Host that it is *never* to interrupt us there, even if interrupts seem to be
- * enabled.
+ * enabled. (It's not necessary to protect pop instruction, since
+ * data gets updated only after it completes, so we only need to protect
+ * one instruction, iret).
*/
ENTRY(lguest_iret)
- pushl %eax
- movl 12(%esp), %eax
-lguest_noirq_start:
+ pushl 2*4(%esp)
/*
* Note the %ss: segment prefix here. Normal data accesses use the
* "ds" segment, but that will have already been restored for whatever
* we're returning to (such as userspace): we can't trust it. The %ss:
* prefix makes sure we use the stack segment, which is still valid.
*/
- movl %eax,%ss:lguest_data+LGUEST_DATA_irq_enabled
- popl %eax
+ popl %ss:lguest_data+LGUEST_DATA_irq_enabled
+lguest_noirq_iret:
iret
-lguest_noirq_end:
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 1f33b3d1fd68..0a42327a59d7 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -82,7 +82,7 @@ copy_user_handle_tail(char *to, char *from, unsigned len)
clac();
/* If the destination is a kernel buffer, we always clear the end */
- if ((unsigned long)to >= TASK_SIZE_MAX)
+ if (!__addr_ok(to))
memset(to, 0, len);
return len;
}
diff --git a/arch/x86/platform/intel-mid/Makefile b/arch/x86/platform/intel-mid/Makefile
index 0a8ee703b9fa..0ce1b1913673 100644
--- a/arch/x86/platform/intel-mid/Makefile
+++ b/arch/x86/platform/intel-mid/Makefile
@@ -1,5 +1,4 @@
obj-$(CONFIG_X86_INTEL_MID) += intel-mid.o intel_mid_vrtc.o mfld.o mrfl.o
-obj-$(CONFIG_EARLY_PRINTK_INTEL_MID) += early_printk_intel_mid.o
# SFI specific code
ifdef CONFIG_X86_INTEL_MID
diff --git a/arch/x86/platform/intel-mid/early_printk_intel_mid.c b/arch/x86/platform/intel-mid/early_printk_intel_mid.c
deleted file mode 100644
index 4e720829ab90..000000000000
--- a/arch/x86/platform/intel-mid/early_printk_intel_mid.c
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * early_printk_intel_mid.c - early consoles for Intel MID platforms
- *
- * Copyright (c) 2008-2010, Intel Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- */
-
-/*
- * This file implements early console named hsu.
- * hsu is based on a High Speed UART device which only exists in the Medfield
- * platform
- */
-
-#include <linux/serial_reg.h>
-#include <linux/serial_mfd.h>
-#include <linux/console.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <linux/io.h>
-
-#include <asm/fixmap.h>
-#include <asm/pgtable.h>
-#include <asm/intel-mid.h>
-
-/*
- * Following is the early console based on Medfield HSU (High
- * Speed UART) device.
- */
-#define HSU_PORT_BASE 0xffa28080
-
-static void __iomem *phsu;
-
-void hsu_early_console_init(const char *s)
-{
- unsigned long paddr, port = 0;
- u8 lcr;
-
- /*
- * Select the early HSU console port if specified by user in the
- * kernel command line.
- */
- if (*s && !kstrtoul(s, 10, &port))
- port = clamp_val(port, 0, 2);
-
- paddr = HSU_PORT_BASE + port * 0x80;
- phsu = (void __iomem *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE, paddr);
-
- /* Disable FIFO */
- writeb(0x0, phsu + UART_FCR);
-
- /* Set to default 115200 bps, 8n1 */
- lcr = readb(phsu + UART_LCR);
- writeb((0x80 | lcr), phsu + UART_LCR);
- writeb(0x18, phsu + UART_DLL);
- writeb(lcr, phsu + UART_LCR);
- writel(0x3600, phsu + UART_MUL*4);
-
- writeb(0x8, phsu + UART_MCR);
- writeb(0x7, phsu + UART_FCR);
- writeb(0x3, phsu + UART_LCR);
-
- /* Clear IRQ status */
- readb(phsu + UART_LSR);
- readb(phsu + UART_RX);
- readb(phsu + UART_IIR);
- readb(phsu + UART_MSR);
-
- /* Enable FIFO */
- writeb(0x7, phsu + UART_FCR);
-}
-
-#define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE)
-
-static void early_hsu_putc(char ch)
-{
- unsigned int timeout = 10000; /* 10ms */
- u8 status;
-
- while (--timeout) {
- status = readb(phsu + UART_LSR);
- if (status & BOTH_EMPTY)
- break;
- udelay(1);
- }
-
- /* Only write the char when there was no timeout */
- if (timeout)
- writeb(ch, phsu + UART_TX);
-}
-
-static void early_hsu_write(struct console *con, const char *str, unsigned n)
-{
- int i;
-
- for (i = 0; i < n && *str; i++) {
- if (*str == '\n')
- early_hsu_putc('\r');
- early_hsu_putc(*str);
- str++;
- }
-}
-
-struct console early_hsu_console = {
- .name = "earlyhsu",
- .write = early_hsu_write,
- .flags = CON_PRINTBUFFER,
- .index = -1,
-};