summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2015-01-28 17:30:32 +0300
committerIngo Molnar <mingo@kernel.org>2015-01-28 17:30:32 +0300
commit41ca5d4e9be11ea6ae040b51d9628a189fd82896 (patch)
treef9c35cc37b9622f6cccd91b94548f44b9a534029 /arch/x86
parent0fcedc8631ec28ca25d3c0b116e8fa0c19dd5f6d (diff)
parent3669ef9fa7d35f573ec9c0e0341b29251c2734a7 (diff)
downloadlinux-41ca5d4e9be11ea6ae040b51d9628a189fd82896.tar.xz
Merge commit 3669ef9fa7d3 ("x86, tls: Interpret an all-zero struct user_desc as 'no segment'") into x86/asm
Pick up the latestest asm fixes before advancing it any further. Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/boot/compressed/misc.c9
-rw-r--r--arch/x86/include/asm/acpi.h1
-rw-r--r--arch/x86/include/asm/desc.h20
-rw-r--r--arch/x86/include/asm/mmu_context.h20
-rw-r--r--arch/x86/kernel/acpi/boot.c26
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c1
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c4
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_rapl.c44
-rw-r--r--arch/x86/kernel/irq.c2
-rw-r--r--arch/x86/kernel/kprobes/core.c20
-rw-r--r--arch/x86/kernel/tls.c25
-rw-r--r--arch/x86/mm/mpx.c6
-rw-r--r--arch/x86/mm/pat.c7
-rw-r--r--arch/x86/pci/xen.c49
14 files changed, 143 insertions, 91 deletions
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index dcc1c536cc21..a950864a64da 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -373,6 +373,8 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap,
unsigned long output_len,
unsigned long run_size)
{
+ unsigned char *output_orig = output;
+
real_mode = rmode;
sanitize_boot_params(real_mode);
@@ -421,7 +423,12 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap,
debug_putstr("\nDecompressing Linux... ");
decompress(input_data, input_len, NULL, NULL, output, NULL, error);
parse_elf(output);
- handle_relocations(output, output_len);
+ /*
+ * 32-bit always performs relocations. 64-bit relocations are only
+ * needed if kASLR has chosen a different load address.
+ */
+ if (!IS_ENABLED(CONFIG_X86_64) || output != output_orig)
+ handle_relocations(output, output_len);
debug_putstr("done.\nBooting the kernel.\n");
return output;
}
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 0ab4f9fd2687..3a45668f6dc3 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -50,6 +50,7 @@ void acpi_pic_sci_set_trigger(unsigned int, u16);
extern int (*__acpi_register_gsi)(struct device *dev, u32 gsi,
int trigger, int polarity);
+extern void (*__acpi_unregister_gsi)(u32 gsi);
static inline void disable_acpi(void)
{
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 50d033a8947d..a94b82e8f156 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -251,7 +251,8 @@ static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i];
}
-#define _LDT_empty(info) \
+/* This intentionally ignores lm, since 32-bit apps don't have that field. */
+#define LDT_empty(info) \
((info)->base_addr == 0 && \
(info)->limit == 0 && \
(info)->contents == 0 && \
@@ -261,11 +262,18 @@ static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
(info)->seg_not_present == 1 && \
(info)->useable == 0)
-#ifdef CONFIG_X86_64
-#define LDT_empty(info) (_LDT_empty(info) && ((info)->lm == 0))
-#else
-#define LDT_empty(info) (_LDT_empty(info))
-#endif
+/* Lots of programs expect an all-zero user_desc to mean "no segment at all". */
+static inline bool LDT_zero(const struct user_desc *info)
+{
+ return (info->base_addr == 0 &&
+ info->limit == 0 &&
+ info->contents == 0 &&
+ info->read_exec_only == 0 &&
+ info->seg_32bit == 0 &&
+ info->limit_in_pages == 0 &&
+ info->seg_not_present == 0 &&
+ info->useable == 0);
+}
static inline void clear_LDT(void)
{
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 40269a2bf6f9..4b75d591eb5e 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -130,7 +130,25 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm,
static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
- mpx_notify_unmap(mm, vma, start, end);
+ /*
+ * mpx_notify_unmap() goes and reads a rarely-hot
+ * cacheline in the mm_struct. That can be expensive
+ * enough to be seen in profiles.
+ *
+ * The mpx_notify_unmap() call and its contents have been
+ * observed to affect munmap() performance on hardware
+ * where MPX is not present.
+ *
+ * The unlikely() optimizes for the fast case: no MPX
+ * in the CPU, or no MPX use in the process. Even if
+ * we get this wrong (in the unlikely event that MPX
+ * is widely enabled on some system) the overhead of
+ * MPX itself (reading bounds tables) is expected to
+ * overwhelm the overhead of getting this unlikely()
+ * consistently wrong.
+ */
+ if (unlikely(cpu_feature_enabled(X86_FEATURE_MPX)))
+ mpx_notify_unmap(mm, vma, start, end);
}
#endif /* _ASM_X86_MMU_CONTEXT_H */
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index d1626364a28a..b9e30daa0881 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -611,20 +611,20 @@ void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger)
int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp)
{
- int irq;
-
- if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
- *irqp = gsi;
- } else {
- mutex_lock(&acpi_ioapic_lock);
- irq = mp_map_gsi_to_irq(gsi,
- IOAPIC_MAP_ALLOC | IOAPIC_MAP_CHECK);
- mutex_unlock(&acpi_ioapic_lock);
- if (irq < 0)
- return -1;
- *irqp = irq;
+ int rc, irq, trigger, polarity;
+
+ rc = acpi_get_override_irq(gsi, &trigger, &polarity);
+ if (rc == 0) {
+ trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE;
+ polarity = polarity ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH;
+ irq = acpi_register_gsi(NULL, gsi, trigger, polarity);
+ if (irq >= 0) {
+ *irqp = irq;
+ return 0;
+ }
}
- return 0;
+
+ return -1;
}
EXPORT_SYMBOL_GPL(acpi_gsi_to_irq);
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index a450373e8e91..939155ffdece 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -107,6 +107,7 @@ static struct clocksource hyperv_cs = {
.rating = 400, /* use this when running on Hyperv*/
.read = read_hv_clock,
.mask = CLOCKSOURCE_MASK(64),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
static void __init ms_hyperv_init_platform(void)
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 3c895d480cd7..073983398364 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -568,8 +568,8 @@ struct event_constraint intel_atom_pebs_event_constraints[] = {
};
struct event_constraint intel_slm_pebs_event_constraints[] = {
- /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+ /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x1),
/* Allow all events as PEBS with no flags */
INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
EVENT_CONSTRAINT_END
diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
index 673f930c700f..6e434f8e5fc8 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
@@ -103,6 +103,13 @@ static struct kobj_attribute format_attr_##_var = \
#define RAPL_CNTR_WIDTH 32 /* 32-bit rapl counters */
+#define RAPL_EVENT_ATTR_STR(_name, v, str) \
+static struct perf_pmu_events_attr event_attr_##v = { \
+ .attr = __ATTR(_name, 0444, rapl_sysfs_show, NULL), \
+ .id = 0, \
+ .event_str = str, \
+};
+
struct rapl_pmu {
spinlock_t lock;
int hw_unit; /* 1/2^hw_unit Joule */
@@ -379,23 +386,36 @@ static struct attribute_group rapl_pmu_attr_group = {
.attrs = rapl_pmu_attrs,
};
-EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01");
-EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02");
-EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03");
-EVENT_ATTR_STR(energy-gpu , rapl_gpu, "event=0x04");
+static ssize_t rapl_sysfs_show(struct device *dev,
+ struct device_attribute *attr,
+ char *page)
+{
+ struct perf_pmu_events_attr *pmu_attr = \
+ container_of(attr, struct perf_pmu_events_attr, attr);
+
+ if (pmu_attr->event_str)
+ return sprintf(page, "%s", pmu_attr->event_str);
+
+ return 0;
+}
+
+RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01");
+RAPL_EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02");
+RAPL_EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03");
+RAPL_EVENT_ATTR_STR(energy-gpu , rapl_gpu, "event=0x04");
-EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules");
-EVENT_ATTR_STR(energy-pkg.unit , rapl_pkg_unit, "Joules");
-EVENT_ATTR_STR(energy-ram.unit , rapl_ram_unit, "Joules");
-EVENT_ATTR_STR(energy-gpu.unit , rapl_gpu_unit, "Joules");
+RAPL_EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules");
+RAPL_EVENT_ATTR_STR(energy-pkg.unit , rapl_pkg_unit, "Joules");
+RAPL_EVENT_ATTR_STR(energy-ram.unit , rapl_ram_unit, "Joules");
+RAPL_EVENT_ATTR_STR(energy-gpu.unit , rapl_gpu_unit, "Joules");
/*
* we compute in 0.23 nJ increments regardless of MSR
*/
-EVENT_ATTR_STR(energy-cores.scale, rapl_cores_scale, "2.3283064365386962890625e-10");
-EVENT_ATTR_STR(energy-pkg.scale, rapl_pkg_scale, "2.3283064365386962890625e-10");
-EVENT_ATTR_STR(energy-ram.scale, rapl_ram_scale, "2.3283064365386962890625e-10");
-EVENT_ATTR_STR(energy-gpu.scale, rapl_gpu_scale, "2.3283064365386962890625e-10");
+RAPL_EVENT_ATTR_STR(energy-cores.scale, rapl_cores_scale, "2.3283064365386962890625e-10");
+RAPL_EVENT_ATTR_STR(energy-pkg.scale, rapl_pkg_scale, "2.3283064365386962890625e-10");
+RAPL_EVENT_ATTR_STR(energy-ram.scale, rapl_ram_scale, "2.3283064365386962890625e-10");
+RAPL_EVENT_ATTR_STR(energy-gpu.scale, rapl_gpu_scale, "2.3283064365386962890625e-10");
static struct attribute *rapl_events_srv_attr[] = {
EVENT_PTR(rapl_cores),
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 6307a0f0cf17..705ef8d48e2d 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -127,7 +127,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_puts(p, " Machine check polls\n");
#endif
#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
- seq_printf(p, "%*s: ", prec, "THR");
+ seq_printf(p, "%*s: ", prec, "HYP");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->irq_hv_callback_count);
seq_puts(p, " Hypervisor callback interrupts\n");
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index f7e3cd50ece0..98f654d466e5 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -1020,6 +1020,15 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
regs->flags &= ~X86_EFLAGS_IF;
trace_hardirqs_off();
regs->ip = (unsigned long)(jp->entry);
+
+ /*
+ * jprobes use jprobe_return() which skips the normal return
+ * path of the function, and this messes up the accounting of the
+ * function graph tracer to get messed up.
+ *
+ * Pause function graph tracing while performing the jprobe function.
+ */
+ pause_graph_tracing();
return 1;
}
NOKPROBE_SYMBOL(setjmp_pre_handler);
@@ -1048,24 +1057,25 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
u8 *addr = (u8 *) (regs->ip - 1);
struct jprobe *jp = container_of(p, struct jprobe, kp);
+ void *saved_sp = kcb->jprobe_saved_sp;
if ((addr > (u8 *) jprobe_return) &&
(addr < (u8 *) jprobe_return_end)) {
- if (stack_addr(regs) != kcb->jprobe_saved_sp) {
+ if (stack_addr(regs) != saved_sp) {
struct pt_regs *saved_regs = &kcb->jprobe_saved_regs;
printk(KERN_ERR
"current sp %p does not match saved sp %p\n",
- stack_addr(regs), kcb->jprobe_saved_sp);
+ stack_addr(regs), saved_sp);
printk(KERN_ERR "Saved registers for jprobe %p\n", jp);
show_regs(saved_regs);
printk(KERN_ERR "Current registers\n");
show_regs(regs);
BUG();
}
+ /* It's OK to start function graph tracing again */
+ unpause_graph_tracing();
*regs = kcb->jprobe_saved_regs;
- memcpy((kprobe_opcode_t *)(kcb->jprobe_saved_sp),
- kcb->jprobes_stack,
- MIN_STACK_SIZE(kcb->jprobe_saved_sp));
+ memcpy(saved_sp, kcb->jprobes_stack, MIN_STACK_SIZE(saved_sp));
preempt_enable_no_resched();
return 1;
}
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index 4e942f31b1a7..7fc5e843f247 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -29,7 +29,28 @@ static int get_free_idx(void)
static bool tls_desc_okay(const struct user_desc *info)
{
- if (LDT_empty(info))
+ /*
+ * For historical reasons (i.e. no one ever documented how any
+ * of the segmentation APIs work), user programs can and do
+ * assume that a struct user_desc that's all zeros except for
+ * entry_number means "no segment at all". This never actually
+ * worked. In fact, up to Linux 3.19, a struct user_desc like
+ * this would create a 16-bit read-write segment with base and
+ * limit both equal to zero.
+ *
+ * That was close enough to "no segment at all" until we
+ * hardened this function to disallow 16-bit TLS segments. Fix
+ * it up by interpreting these zeroed segments the way that they
+ * were almost certainly intended to be interpreted.
+ *
+ * The correct way to ask for "no segment at all" is to specify
+ * a user_desc that satisfies LDT_empty. To keep everything
+ * working, we accept both.
+ *
+ * Note that there's a similar kludge in modify_ldt -- look at
+ * the distinction between modes 1 and 0x11.
+ */
+ if (LDT_empty(info) || LDT_zero(info))
return true;
/*
@@ -71,7 +92,7 @@ static void set_tls_desc(struct task_struct *p, int idx,
cpu = get_cpu();
while (n-- > 0) {
- if (LDT_empty(info))
+ if (LDT_empty(info) || LDT_zero(info))
desc->a = desc->b = 0;
else
fill_ldt(desc, info);
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index 67ebf5751222..c439ec478216 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -349,6 +349,12 @@ static __user void *task_get_bounds_dir(struct task_struct *tsk)
return MPX_INVALID_BOUNDS_DIR;
/*
+ * 32-bit binaries on 64-bit kernels are currently
+ * unsupported.
+ */
+ if (IS_ENABLED(CONFIG_X86_64) && test_thread_flag(TIF_IA32))
+ return MPX_INVALID_BOUNDS_DIR;
+ /*
* The bounds directory pointer is stored in a register
* only accessible if we first do an xsave.
*/
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index edf299c8ff6c..7ac68698406c 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -234,8 +234,13 @@ void pat_init(void)
PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC);
/* Boot CPU check */
- if (!boot_pat_state)
+ if (!boot_pat_state) {
rdmsrl(MSR_IA32_CR_PAT, boot_pat_state);
+ if (!boot_pat_state) {
+ pat_disable("PAT read returns always zero, disabled.");
+ return;
+ }
+ }
wrmsrl(MSR_IA32_CR_PAT, pat);
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index c489ef2c1a39..9098d880c476 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -458,6 +458,7 @@ int __init pci_xen_hvm_init(void)
* just how GSIs get registered.
*/
__acpi_register_gsi = acpi_register_gsi_xen_hvm;
+ __acpi_unregister_gsi = NULL;
#endif
#ifdef CONFIG_PCI_MSI
@@ -471,52 +472,6 @@ int __init pci_xen_hvm_init(void)
}
#ifdef CONFIG_XEN_DOM0
-static __init void xen_setup_acpi_sci(void)
-{
- int rc;
- int trigger, polarity;
- int gsi = acpi_sci_override_gsi;
- int irq = -1;
- int gsi_override = -1;
-
- if (!gsi)
- return;
-
- rc = acpi_get_override_irq(gsi, &trigger, &polarity);
- if (rc) {
- printk(KERN_WARNING "xen: acpi_get_override_irq failed for acpi"
- " sci, rc=%d\n", rc);
- return;
- }
- trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE;
- polarity = polarity ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH;
-
- printk(KERN_INFO "xen: sci override: global_irq=%d trigger=%d "
- "polarity=%d\n", gsi, trigger, polarity);
-
- /* Before we bind the GSI to a Linux IRQ, check whether
- * we need to override it with bus_irq (IRQ) value. Usually for
- * IRQs below IRQ_LEGACY_IRQ this holds IRQ == GSI, as so:
- * ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 low level)
- * but there are oddballs where the IRQ != GSI:
- * ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 20 low level)
- * which ends up being: gsi_to_irq[9] == 20
- * (which is what acpi_gsi_to_irq ends up calling when starting the
- * the ACPI interpreter and keels over since IRQ 9 has not been
- * setup as we had setup IRQ 20 for it).
- */
- if (acpi_gsi_to_irq(gsi, &irq) == 0) {
- /* Use the provided value if it's valid. */
- if (irq >= 0)
- gsi_override = irq;
- }
-
- gsi = xen_register_gsi(gsi, gsi_override, trigger, polarity);
- printk(KERN_INFO "xen: acpi sci %d\n", gsi);
-
- return;
-}
-
int __init pci_xen_initial_domain(void)
{
int irq;
@@ -527,8 +482,8 @@ int __init pci_xen_initial_domain(void)
x86_msi.restore_msi_irqs = xen_initdom_restore_msi_irqs;
pci_msi_ignore_mask = 1;
#endif
- xen_setup_acpi_sci();
__acpi_register_gsi = acpi_register_gsi_xen;
+ __acpi_unregister_gsi = NULL;
/* Pre-allocate legacy irqs */
for (irq = 0; irq < nr_legacy_irqs(); irq++) {
int trigger, polarity;