From 5bfce5ef55cbe78ee2ee6e97f2e26a8a582008f3 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 10 Oct 2013 17:18:15 -0700 Subject: x86, kaslr: Provide randomness functions Adds potential sources of randomness: RDRAND, RDTSC, or the i8254. This moves the pre-alternatives inline rdrand function into the header so both pieces of code can use it. Availability of RDRAND is then controlled by CONFIG_ARCH_RANDOM, if someone wants to disable it even for kASLR. Signed-off-by: Kees Cook Link: http://lkml.kernel.org/r/1381450698-28710-4-git-send-email-keescook@chromium.org Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/rdrand.c | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c index 88db010845cb..384df5105fbc 100644 --- a/arch/x86/kernel/cpu/rdrand.c +++ b/arch/x86/kernel/cpu/rdrand.c @@ -31,20 +31,6 @@ static int __init x86_rdrand_setup(char *s) } __setup("nordrand", x86_rdrand_setup); -/* We can't use arch_get_random_long() here since alternatives haven't run */ -static inline int rdrand_long(unsigned long *v) -{ - int ok; - asm volatile("1: " RDRAND_LONG "\n\t" - "jc 2f\n\t" - "decl %0\n\t" - "jnz 1b\n\t" - "2:" - : "=r" (ok), "=a" (*v) - : "0" (RDRAND_RETRY_LOOPS)); - return ok; -} - /* * Force a reseed cycle; we are architecturally guaranteed a reseed * after no more than 512 128-bit chunks of random data. This also -- cgit v1.2.3 From 4788e5b4b2338f85fa42a712a182d8afd65d7c58 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 12 Nov 2013 17:58:50 +0100 Subject: perf/x86: Add Intel RAPL PMU support This patch adds a new uncore PMU to expose the Intel RAPL energy consumption counters. Up to 3 counters, each counting a particular RAPL event are exposed. The RAPL counters are available on Intel SandyBridge, IvyBridge, Haswell. The server skus add a 3rd counter. The following events are available and exposed in sysfs: - power/energy-cores: power consumption of all cores on socket - power/energy-pkg: power consumption of all cores + LLc cache - power/energy-dram: power consumption of DRAM (servers only) For each event both the unit (Joules) and scale (2^-32 J) is exposed in sysfs for use by perf stat and other tools. The files are: /sys/devices/power/events/energy-*.unit /sys/devices/power/events/energy-*.scale The RAPL PMU is uncore by nature and is implemented such that it only works in system-wide mode. Measuring only one CPU per socket is sufficient. The /sys/devices/power/cpumask file can be used by tools to figure out which CPUs to monitor by default. For instance, on a 2-socket system, 2 CPUs (one on each socket) will be shown. All the counters measure in the same unit (exposed via sysfs). The perf_events API exposes all RAPL counters as 64-bit integers counting in unit of 1/2^32 Joules (about 0.23 nJ). User level tools must convert the counts by multiplying them by 2^-32 to obtain Joules. The reason for this is that the kernel avoids doing floating point math whenever possible because it is expensive (user floating-point state must be saved). The method used avoids kernel floating-point usage. There is no loss of precision. Thanks to PeterZ for suggesting this approach. To convert the raw count in Watt: W = C * 2.3 / (1e10 * time) or ldexp(C, -32). RAPL PMU is a new standalone PMU which registers with the perf_event core subsystem. The PMU type (attr->type) is dynamically allocated and is available from /sys/device/power/type. Sampling is not supported by the RAPL PMU. There is no privilege level filtering either. Signed-off-by: Stephane Eranian Reviewed-by: Maria Dimakopoulou Reviewed-by: Andi Kleen Signed-off-by: Peter Zijlstra Cc: acme@redhat.com Cc: jolsa@redhat.com Cc: zheng.z.yan@intel.com Cc: bp@alien8.de Link: http://lkml.kernel.org/r/1384275531-10892-4-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/Makefile | 2 +- arch/x86/kernel/cpu/perf_event_intel_rapl.c | 591 ++++++++++++++++++++++++++++ 2 files changed, 592 insertions(+), 1 deletion(-) create mode 100644 arch/x86/kernel/cpu/perf_event_intel_rapl.c (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 47b56a7e99cb..6359506a19ee 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -36,7 +36,7 @@ obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd_iommu.o endif obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o -obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o +obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o perf_event_intel_rapl.o endif diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c new file mode 100644 index 000000000000..cfcd386b5d89 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c @@ -0,0 +1,591 @@ +/* + * perf_event_intel_rapl.c: support Intel RAPL energy consumption counters + * Copyright (C) 2013 Google, Inc., Stephane Eranian + * + * Intel RAPL interface is specified in the IA-32 Manual Vol3b + * section 14.7.1 (September 2013) + * + * RAPL provides more controls than just reporting energy consumption + * however here we only expose the 3 energy consumption free running + * counters (pp0, pkg, dram). + * + * Each of those counters increments in a power unit defined by the + * RAPL_POWER_UNIT MSR. On SandyBridge, this unit is 1/(2^16) Joules + * but it can vary. + * + * Counter to rapl events mappings: + * + * pp0 counter: consumption of all physical cores (power plane 0) + * event: rapl_energy_cores + * perf code: 0x1 + * + * pkg counter: consumption of the whole processor package + * event: rapl_energy_pkg + * perf code: 0x2 + * + * dram counter: consumption of the dram domain (servers only) + * event: rapl_energy_dram + * perf code: 0x3 + * + * We manage those counters as free running (read-only). They may be + * use simultaneously by other tools, such as turbostat. + * + * The events only support system-wide mode counting. There is no + * sampling support because it does not make sense and is not + * supported by the RAPL hardware. + * + * Because we want to avoid floating-point operations in the kernel, + * the events are all reported in fixed point arithmetic (32.32). + * Tools must adjust the counts to convert them to Watts using + * the duration of the measurement. Tools may use a function such as + * ldexp(raw_count, -32); + */ +#include +#include +#include +#include +#include "perf_event.h" + +/* + * RAPL energy status counters + */ +#define RAPL_IDX_PP0_NRG_STAT 0 /* all cores */ +#define INTEL_RAPL_PP0 0x1 /* pseudo-encoding */ +#define RAPL_IDX_PKG_NRG_STAT 1 /* entire package */ +#define INTEL_RAPL_PKG 0x2 /* pseudo-encoding */ +#define RAPL_IDX_RAM_NRG_STAT 2 /* DRAM */ +#define INTEL_RAPL_RAM 0x3 /* pseudo-encoding */ + +/* Clients have PP0, PKG */ +#define RAPL_IDX_CLN (1<config + * any other bit is reserved + */ +#define RAPL_EVENT_MASK 0xFFULL + +#define DEFINE_RAPL_FORMAT_ATTR(_var, _name, _format) \ +static ssize_t __rapl_##_var##_show(struct kobject *kobj, \ + struct kobj_attribute *attr, \ + char *page) \ +{ \ + BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ + return sprintf(page, _format "\n"); \ +} \ +static struct kobj_attribute format_attr_##_var = \ + __ATTR(_name, 0444, __rapl_##_var##_show, NULL) + +#define RAPL_EVENT_DESC(_name, _config) \ +{ \ + .attr = __ATTR(_name, 0444, rapl_event_show, NULL), \ + .config = _config, \ +} + +#define RAPL_CNTR_WIDTH 32 /* 32-bit rapl counters */ + +struct rapl_pmu { + spinlock_t lock; + int hw_unit; /* 1/2^hw_unit Joule */ + int n_active; /* number of active events */ + struct list_head active_list; + struct pmu *pmu; /* pointer to rapl_pmu_class */ +}; + +static struct pmu rapl_pmu_class; +static cpumask_t rapl_cpu_mask; +static int rapl_cntr_mask; + +static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu); +static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu_to_free); + +static inline u64 rapl_read_counter(struct perf_event *event) +{ + u64 raw; + rdmsrl(event->hw.event_base, raw); + return raw; +} + +static inline u64 rapl_scale(u64 v) +{ + /* + * scale delta to smallest unit (1/2^32) + * users must then scale back: count * 1/(1e9*2^32) to get Joules + * or use ldexp(count, -32). + * Watts = Joules/Time delta + */ + return v << (32 - __get_cpu_var(rapl_pmu)->hw_unit); +} + +static u64 rapl_event_update(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 prev_raw_count, new_raw_count; + s64 delta, sdelta; + int shift = RAPL_CNTR_WIDTH; + +again: + prev_raw_count = local64_read(&hwc->prev_count); + rdmsrl(event->hw.event_base, new_raw_count); + + if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count) != prev_raw_count) { + cpu_relax(); + goto again; + } + + /* + * Now we have the new raw value and have updated the prev + * timestamp already. We can now calculate the elapsed delta + * (event-)time and add that to the generic event. + * + * Careful, not all hw sign-extends above the physical width + * of the count. + */ + delta = (new_raw_count << shift) - (prev_raw_count << shift); + delta >>= shift; + + sdelta = rapl_scale(delta); + + local64_add(sdelta, &event->count); + + return new_raw_count; +} + +static void __rapl_pmu_event_start(struct rapl_pmu *pmu, + struct perf_event *event) +{ + if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) + return; + + event->hw.state = 0; + + list_add_tail(&event->active_entry, &pmu->active_list); + + local64_set(&event->hw.prev_count, rapl_read_counter(event)); + + pmu->n_active++; +} + +static void rapl_pmu_event_start(struct perf_event *event, int mode) +{ + struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu); + unsigned long flags; + + spin_lock_irqsave(&pmu->lock, flags); + __rapl_pmu_event_start(pmu, event); + spin_unlock_irqrestore(&pmu->lock, flags); +} + +static void rapl_pmu_event_stop(struct perf_event *event, int mode) +{ + struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu); + struct hw_perf_event *hwc = &event->hw; + unsigned long flags; + + spin_lock_irqsave(&pmu->lock, flags); + + /* mark event as deactivated and stopped */ + if (!(hwc->state & PERF_HES_STOPPED)) { + WARN_ON_ONCE(pmu->n_active <= 0); + pmu->n_active--; + + list_del(&event->active_entry); + + WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); + hwc->state |= PERF_HES_STOPPED; + } + + /* check if update of sw counter is necessary */ + if ((mode & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { + /* + * Drain the remaining delta count out of a event + * that we are disabling: + */ + rapl_event_update(event); + hwc->state |= PERF_HES_UPTODATE; + } + + spin_unlock_irqrestore(&pmu->lock, flags); +} + +static int rapl_pmu_event_add(struct perf_event *event, int mode) +{ + struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu); + struct hw_perf_event *hwc = &event->hw; + unsigned long flags; + + spin_lock_irqsave(&pmu->lock, flags); + + hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + + if (mode & PERF_EF_START) + __rapl_pmu_event_start(pmu, event); + + spin_unlock_irqrestore(&pmu->lock, flags); + + return 0; +} + +static void rapl_pmu_event_del(struct perf_event *event, int flags) +{ + rapl_pmu_event_stop(event, PERF_EF_UPDATE); +} + +static int rapl_pmu_event_init(struct perf_event *event) +{ + u64 cfg = event->attr.config & RAPL_EVENT_MASK; + int bit, msr, ret = 0; + + /* only look at RAPL events */ + if (event->attr.type != rapl_pmu_class.type) + return -ENOENT; + + /* check only supported bits are set */ + if (event->attr.config & ~RAPL_EVENT_MASK) + return -EINVAL; + + /* + * check event is known (determines counter) + */ + switch (cfg) { + case INTEL_RAPL_PP0: + bit = RAPL_IDX_PP0_NRG_STAT; + msr = MSR_PP0_ENERGY_STATUS; + break; + case INTEL_RAPL_PKG: + bit = RAPL_IDX_PKG_NRG_STAT; + msr = MSR_PKG_ENERGY_STATUS; + break; + case INTEL_RAPL_RAM: + bit = RAPL_IDX_RAM_NRG_STAT; + msr = MSR_DRAM_ENERGY_STATUS; + break; + default: + return -EINVAL; + } + /* check event supported */ + if (!(rapl_cntr_mask & (1 << bit))) + return -EINVAL; + + /* unsupported modes and filters */ + if (event->attr.exclude_user || + event->attr.exclude_kernel || + event->attr.exclude_hv || + event->attr.exclude_idle || + event->attr.exclude_host || + event->attr.exclude_guest || + event->attr.sample_period) /* no sampling */ + return -EINVAL; + + /* must be done before validate_group */ + event->hw.event_base = msr; + event->hw.config = cfg; + event->hw.idx = bit; + + return ret; +} + +static void rapl_pmu_event_read(struct perf_event *event) +{ + rapl_event_update(event); +} + +static ssize_t rapl_get_attr_cpumask(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &rapl_cpu_mask); + + buf[n++] = '\n'; + buf[n] = '\0'; + return n; +} + +static DEVICE_ATTR(cpumask, S_IRUGO, rapl_get_attr_cpumask, NULL); + +static struct attribute *rapl_pmu_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static struct attribute_group rapl_pmu_attr_group = { + .attrs = rapl_pmu_attrs, +}; + +EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01"); +EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02"); +EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03"); + +EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules"); +EVENT_ATTR_STR(energy-pkg.unit , rapl_pkg_unit, "Joules"); +EVENT_ATTR_STR(energy-ram.unit , rapl_ram_unit, "Joules"); + +/* + * we compute in 0.23 nJ increments regardless of MSR + */ +EVENT_ATTR_STR(energy-cores.scale, rapl_cores_scale, "2.3283064365386962890625e-10"); +EVENT_ATTR_STR(energy-pkg.scale, rapl_pkg_scale, "2.3283064365386962890625e-10"); +EVENT_ATTR_STR(energy-ram.scale, rapl_ram_scale, "2.3283064365386962890625e-10"); + +static struct attribute *rapl_events_srv_attr[] = { + EVENT_PTR(rapl_cores), + EVENT_PTR(rapl_pkg), + EVENT_PTR(rapl_ram), + + EVENT_PTR(rapl_cores_unit), + EVENT_PTR(rapl_pkg_unit), + EVENT_PTR(rapl_ram_unit), + + EVENT_PTR(rapl_cores_scale), + EVENT_PTR(rapl_pkg_scale), + EVENT_PTR(rapl_ram_scale), + NULL, +}; + +static struct attribute *rapl_events_cln_attr[] = { + EVENT_PTR(rapl_cores), + EVENT_PTR(rapl_pkg), + + EVENT_PTR(rapl_cores_unit), + EVENT_PTR(rapl_pkg_unit), + + EVENT_PTR(rapl_cores_scale), + EVENT_PTR(rapl_pkg_scale), + NULL, +}; + +static struct attribute_group rapl_pmu_events_group = { + .name = "events", + .attrs = NULL, /* patched at runtime */ +}; + +DEFINE_RAPL_FORMAT_ATTR(event, event, "config:0-7"); +static struct attribute *rapl_formats_attr[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group rapl_pmu_format_group = { + .name = "format", + .attrs = rapl_formats_attr, +}; + +const struct attribute_group *rapl_attr_groups[] = { + &rapl_pmu_attr_group, + &rapl_pmu_format_group, + &rapl_pmu_events_group, + NULL, +}; + +static struct pmu rapl_pmu_class = { + .attr_groups = rapl_attr_groups, + .task_ctx_nr = perf_invalid_context, /* system-wide only */ + .event_init = rapl_pmu_event_init, + .add = rapl_pmu_event_add, /* must have */ + .del = rapl_pmu_event_del, /* must have */ + .start = rapl_pmu_event_start, + .stop = rapl_pmu_event_stop, + .read = rapl_pmu_event_read, +}; + +static void rapl_cpu_exit(int cpu) +{ + struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu); + int i, phys_id = topology_physical_package_id(cpu); + int target = -1; + + /* find a new cpu on same package */ + for_each_online_cpu(i) { + if (i == cpu) + continue; + if (phys_id == topology_physical_package_id(i)) { + target = i; + break; + } + } + /* + * clear cpu from cpumask + * if was set in cpumask and still some cpu on package, + * then move to new cpu + */ + if (cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask) && target >= 0) + cpumask_set_cpu(target, &rapl_cpu_mask); + + WARN_ON(cpumask_empty(&rapl_cpu_mask)); + /* + * migrate events and context to new cpu + */ + if (target >= 0) + perf_pmu_migrate_context(pmu->pmu, cpu, target); +} + +static void rapl_cpu_init(int cpu) +{ + int i, phys_id = topology_physical_package_id(cpu); + + /* check if phys_is is already covered */ + for_each_cpu(i, &rapl_cpu_mask) { + if (phys_id == topology_physical_package_id(i)) + return; + } + /* was not found, so add it */ + cpumask_set_cpu(cpu, &rapl_cpu_mask); +} + +static int rapl_cpu_prepare(int cpu) +{ + struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu); + int phys_id = topology_physical_package_id(cpu); + + if (pmu) + return 0; + + if (phys_id < 0) + return -1; + + pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu)); + if (!pmu) + return -1; + + spin_lock_init(&pmu->lock); + + INIT_LIST_HEAD(&pmu->active_list); + + /* + * grab power unit as: 1/2^unit Joules + * + * we cache in local PMU instance + */ + rdmsrl(MSR_RAPL_POWER_UNIT, pmu->hw_unit); + pmu->hw_unit = (pmu->hw_unit >> 8) & 0x1FULL; + pmu->pmu = &rapl_pmu_class; + + /* set RAPL pmu for this cpu for now */ + per_cpu(rapl_pmu, cpu) = pmu; + per_cpu(rapl_pmu_to_free, cpu) = NULL; + + return 0; +} + +static void rapl_cpu_kfree(int cpu) +{ + struct rapl_pmu *pmu = per_cpu(rapl_pmu_to_free, cpu); + + kfree(pmu); + + per_cpu(rapl_pmu_to_free, cpu) = NULL; +} + +static int rapl_cpu_dying(int cpu) +{ + struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu); + + if (!pmu) + return 0; + + per_cpu(rapl_pmu, cpu) = NULL; + + per_cpu(rapl_pmu_to_free, cpu) = pmu; + + return 0; +} + +static int rapl_cpu_notifier(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (long)hcpu; + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_UP_PREPARE: + rapl_cpu_prepare(cpu); + break; + case CPU_STARTING: + rapl_cpu_init(cpu); + break; + case CPU_UP_CANCELED: + case CPU_DYING: + rapl_cpu_dying(cpu); + break; + case CPU_ONLINE: + case CPU_DEAD: + rapl_cpu_kfree(cpu); + break; + case CPU_DOWN_PREPARE: + rapl_cpu_exit(cpu); + break; + default: + break; + } + + return NOTIFY_OK; +} + +static const struct x86_cpu_id rapl_cpu_match[] = { + [0] = { .vendor = X86_VENDOR_INTEL, .family = 6 }, + [1] = {}, +}; + +static int __init rapl_pmu_init(void) +{ + struct rapl_pmu *pmu; + int cpu, ret; + + /* + * check for Intel processor family 6 + */ + if (!x86_match_cpu(rapl_cpu_match)) + return 0; + + /* check supported CPU */ + switch (boot_cpu_data.x86_model) { + case 42: /* Sandy Bridge */ + case 58: /* Ivy Bridge */ + case 60: /* Haswell */ + rapl_cntr_mask = RAPL_IDX_CLN; + rapl_pmu_events_group.attrs = rapl_events_cln_attr; + break; + case 45: /* Sandy Bridge-EP */ + case 62: /* IvyTown */ + rapl_cntr_mask = RAPL_IDX_SRV; + rapl_pmu_events_group.attrs = rapl_events_srv_attr; + break; + + default: + /* unsupported */ + return 0; + } + get_online_cpus(); + + for_each_online_cpu(cpu) { + rapl_cpu_prepare(cpu); + rapl_cpu_init(cpu); + } + + perf_cpu_notifier(rapl_cpu_notifier); + + ret = perf_pmu_register(&rapl_pmu_class, "power", -1); + if (WARN_ON(ret)) { + pr_info("RAPL PMU detected, registration failed (%d), RAPL PMU disabled\n", ret); + put_online_cpus(); + return -1; + } + + pmu = __get_cpu_var(rapl_pmu); + + pr_info("RAPL PMU detected, hw unit 2^-%d Joules," + " API unit is 2^-32 Joules," + " %d fixed counters\n", + pmu->hw_unit, + hweight32(rapl_cntr_mask)); + + put_online_cpus(); + + return 0; +} +device_initcall(rapl_pmu_init); -- cgit v1.2.3 From 65661f96d3b32f4b28fef26d21be81d7e173b965 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 12 Nov 2013 17:58:51 +0100 Subject: perf/x86: Add RAPL hrtimer support The RAPL PMU counters do not interrupt on overflow. Therefore, the kernel needs to poll the counters to avoid missing an overflow. This patch adds the hrtimer code to do this. The timer interval is calculated at boot time based on the power unit used by the HW. There is one hrtimer per-cpu to handle the case of multiple simultaneous use across cores on the same package + hotplug CPU. Thanks to Maria Dimakopoulou for her contributions to this patch especially on the math aspects. Signed-off-by: Stephane Eranian Reviewed-by: Maria Dimakopoulou Reviewed-by: Andi Kleen [ Applied 32-bit build fix. ] Signed-off-by: Peter Zijlstra Cc: acme@redhat.com Cc: jolsa@redhat.com Cc: zheng.z.yan@intel.com Cc: bp@alien8.de Cc: maria.n.dimakopoulou@gmail.com Link: http://lkml.kernel.org/r/1384275531-10892-5-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_rapl.c | 74 ++++++++++++++++++++++++++++- 1 file changed, 72 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c index cfcd386b5d89..bf8e4a736d48 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c +++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c @@ -96,6 +96,8 @@ struct rapl_pmu { int n_active; /* number of active events */ struct list_head active_list; struct pmu *pmu; /* pointer to rapl_pmu_class */ + ktime_t timer_interval; /* in ktime_t unit */ + struct hrtimer hrtimer; }; static struct pmu rapl_pmu_class; @@ -158,6 +160,48 @@ again: return new_raw_count; } +static void rapl_start_hrtimer(struct rapl_pmu *pmu) +{ + __hrtimer_start_range_ns(&pmu->hrtimer, + pmu->timer_interval, 0, + HRTIMER_MODE_REL_PINNED, 0); +} + +static void rapl_stop_hrtimer(struct rapl_pmu *pmu) +{ + hrtimer_cancel(&pmu->hrtimer); +} + +static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer) +{ + struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu); + struct perf_event *event; + unsigned long flags; + + if (!pmu->n_active) + return HRTIMER_NORESTART; + + spin_lock_irqsave(&pmu->lock, flags); + + list_for_each_entry(event, &pmu->active_list, active_entry) { + rapl_event_update(event); + } + + spin_unlock_irqrestore(&pmu->lock, flags); + + hrtimer_forward_now(hrtimer, pmu->timer_interval); + + return HRTIMER_RESTART; +} + +static void rapl_hrtimer_init(struct rapl_pmu *pmu) +{ + struct hrtimer *hr = &pmu->hrtimer; + + hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hr->function = rapl_hrtimer_handle; +} + static void __rapl_pmu_event_start(struct rapl_pmu *pmu, struct perf_event *event) { @@ -171,6 +215,8 @@ static void __rapl_pmu_event_start(struct rapl_pmu *pmu, local64_set(&event->hw.prev_count, rapl_read_counter(event)); pmu->n_active++; + if (pmu->n_active == 1) + rapl_start_hrtimer(pmu); } static void rapl_pmu_event_start(struct perf_event *event, int mode) @@ -195,6 +241,8 @@ static void rapl_pmu_event_stop(struct perf_event *event, int mode) if (!(hwc->state & PERF_HES_STOPPED)) { WARN_ON_ONCE(pmu->n_active <= 0); pmu->n_active--; + if (pmu->n_active == 0) + rapl_stop_hrtimer(pmu); list_del(&event->active_entry); @@ -423,6 +471,9 @@ static void rapl_cpu_exit(int cpu) */ if (target >= 0) perf_pmu_migrate_context(pmu->pmu, cpu, target); + + /* cancel overflow polling timer for CPU */ + rapl_stop_hrtimer(pmu); } static void rapl_cpu_init(int cpu) @@ -442,6 +493,7 @@ static int rapl_cpu_prepare(int cpu) { struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu); int phys_id = topology_physical_package_id(cpu); + u64 ms; if (pmu) return 0; @@ -466,6 +518,22 @@ static int rapl_cpu_prepare(int cpu) pmu->hw_unit = (pmu->hw_unit >> 8) & 0x1FULL; pmu->pmu = &rapl_pmu_class; + /* + * use reference of 200W for scaling the timeout + * to avoid missing counter overflows. + * 200W = 200 Joules/sec + * divide interval by 2 to avoid lockstep (2 * 100) + * if hw unit is 32, then we use 2 ms 1/200/2 + */ + if (pmu->hw_unit < 32) + ms = (1000 / (2 * 100)) * (1ULL << (32 - pmu->hw_unit - 1)); + else + ms = 2; + + pmu->timer_interval = ms_to_ktime(ms); + + rapl_hrtimer_init(pmu); + /* set RAPL pmu for this cpu for now */ per_cpu(rapl_pmu, cpu) = pmu; per_cpu(rapl_pmu_to_free, cpu) = NULL; @@ -580,9 +648,11 @@ static int __init rapl_pmu_init(void) pr_info("RAPL PMU detected, hw unit 2^-%d Joules," " API unit is 2^-32 Joules," - " %d fixed counters\n", + " %d fixed counters" + " %llu ms ovfl timer\n", pmu->hw_unit, - hweight32(rapl_cntr_mask)); + hweight32(rapl_cntr_mask), + ktime_to_ms(pmu->timer_interval)); put_online_cpus(); -- cgit v1.2.3 From 853d9b18f1e861d37e9b271742329f8c1176eabe Mon Sep 17 00:00:00 2001 From: Levente Kurusa Date: Fri, 29 Nov 2013 21:28:48 +0100 Subject: x86, mce: Call put_device on device_register failure This patch adds a call to put_device() when the device_register() call has failed. This is required so that the last reference to the device is given up. Signed-off-by: Levente Kurusa Link: http://lkml.kernel.org/r/5298F900.9000208@linux.com Signed-off-by: Borislav Petkov --- arch/x86/kernel/cpu/mcheck/mce.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index b3218cdee95f..a389c1d859ec 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -2272,8 +2272,10 @@ static int mce_device_create(unsigned int cpu) dev->release = &mce_device_release; err = device_register(dev); - if (err) + if (err) { + put_device(dev); return err; + } for (i = 0; mce_device_attrs[i]; i++) { err = device_create_file(dev, mce_device_attrs[i]); -- cgit v1.2.3 From 7fd565e27547c913b83b46d94662103be81a88ec Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Mon, 16 Dec 2013 21:20:23 +0100 Subject: perf/x86: enable Haswell Celeron RAPL support Enable RAPL support for Haswell Celeron (model 69). Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra Cc: ak@linux.intel.com Cc: acme@redhat.com Cc: jolsa@redhat.com Cc: zheng.z.yan@intel.com Cc: bp@alien8.de Cc: vincent.weaver@maine.edu Cc: maria.n.dimakopoulou@gmail.com Cc: peterz@infradead.org Link: http://lkml.kernel.org/r/1387225224-27799-2-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_rapl.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c index bf8e4a736d48..0e3754e450d9 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c +++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c @@ -615,6 +615,7 @@ static int __init rapl_pmu_init(void) case 42: /* Sandy Bridge */ case 58: /* Ivy Bridge */ case 60: /* Haswell */ + case 69: /* Haswell-Celeron */ rapl_cntr_mask = RAPL_IDX_CLN; rapl_pmu_events_group.attrs = rapl_events_cln_attr; break; -- cgit v1.2.3 From addccbb264e5e0e5762f4893f6df24afad327c8c Mon Sep 17 00:00:00 2001 From: "Chen, Gong" Date: Mon, 25 Nov 2013 02:15:00 -0500 Subject: ACPI, APEI, GHES: Do not report only correctable errors with SCI Currently SCI is employed to handle corrected errors - memory corrected errors, more specifically but in fact SCI still can be used to handle any errors, e.g. uncorrected or even fatal ones if enabled by the BIOS. Enable logging for those kinds of errors too. Signed-off-by: Chen, Gong Acked-by: Naveen N. Rao Cc: Tony Luck Link: http://lkml.kernel.org/r/1385363701-12387-1-git-send-email-gong.chen@linux.intel.com [ Boris: massage commit message, rename function arg. ] Signed-off-by: Borislav Petkov --- arch/x86/kernel/cpu/mcheck/mce-apei.c | 14 ++++++++++---- drivers/acpi/apei/ghes.c | 3 +-- 2 files changed, 11 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c index de8b60a53f69..a1aef9533154 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-apei.c +++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c @@ -33,22 +33,28 @@ #include #include #include +#include #include #include "mce-internal.h" -void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err) +void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err) { struct mce m; - /* Only corrected MC is reported */ - if (!corrected || !(mem_err->validation_bits & CPER_MEM_VALID_PA)) + if (!(mem_err->validation_bits & CPER_MEM_VALID_PA)) return; mce_setup(&m); m.bank = 1; - /* Fake a memory read corrected error with unknown channel */ + /* Fake a memory read error with unknown channel */ m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | 0x9f; + + if (severity >= GHES_SEV_RECOVERABLE) + m.status |= MCI_STATUS_UC; + if (severity >= GHES_SEV_PANIC) + m.status |= MCI_STATUS_PCC; + m.addr = mem_err->physical_addr; mce_log(&m); mce_notify_irq(); diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index a30bc313787b..ce3683d93a13 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -453,8 +453,7 @@ static void ghes_do_proc(struct ghes *ghes, ghes_edac_report_mem_error(ghes, sev, mem_err); #ifdef CONFIG_X86_MCE - apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED, - mem_err); + apei_mce_report_mem_error(sev, mem_err); #endif ghes_handle_memory_failure(gdata, sev); } -- cgit v1.2.3 From dd360393f4d948eb518372316e52101cf3b44212 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 23 Dec 2013 14:16:58 +0200 Subject: x86, cpu: Detect more TLB configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Intel Software Developer’s Manual covers few more TLB configurations exposed as CPUID 2 descriptors: 61H Instruction TLB: 4 KByte pages, fully associative, 48 entries 63H Data TLB: 1 GByte pages, 4-way set associative, 4 entries 76H Instruction TLB: 2M/4M pages, fully associative, 8 entries B5H Instruction TLB: 4KByte pages, 8-way set associative, 64 entries B6H Instruction TLB: 4KByte pages, 8-way set associative, 128 entries C1H Shared 2nd-Level TLB: 4 KByte/2MByte pages, 8-way associative, 1024 entries C2H DTLB DTLB: 2 MByte/$MByte pages, 4-way associative, 16 entries Let's detect them as well. Signed-off-by: Kirill A. Shutemov Link: http://lkml.kernel.org/r/1387801018-14499-1-git-send-email-kirill.shutemov@linux.intel.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/processor.h | 1 + arch/x86/kernel/cpu/common.c | 7 ++++--- arch/x86/kernel/cpu/intel.c | 26 ++++++++++++++++++++++++++ 3 files changed, 31 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 7b034a4057f9..1dd6260ed940 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -72,6 +72,7 @@ extern u16 __read_mostly tlb_lli_4m[NR_INFO]; extern u16 __read_mostly tlb_lld_4k[NR_INFO]; extern u16 __read_mostly tlb_lld_2m[NR_INFO]; extern u16 __read_mostly tlb_lld_4m[NR_INFO]; +extern u16 __read_mostly tlb_lld_1g[NR_INFO]; extern s8 __read_mostly tlb_flushall_shift; /* diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 6abc172b8258..24b6fd10625a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -472,6 +472,7 @@ u16 __read_mostly tlb_lli_4m[NR_INFO]; u16 __read_mostly tlb_lld_4k[NR_INFO]; u16 __read_mostly tlb_lld_2m[NR_INFO]; u16 __read_mostly tlb_lld_4m[NR_INFO]; +u16 __read_mostly tlb_lld_1g[NR_INFO]; /* * tlb_flushall_shift shows the balance point in replacing cr3 write @@ -486,13 +487,13 @@ void cpu_detect_tlb(struct cpuinfo_x86 *c) if (this_cpu->c_detect_tlb) this_cpu->c_detect_tlb(c); - printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ - "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ + printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" + "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d, 1GB %d\n" "tlb_flushall_shift: %d\n", tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES], tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES], tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES], - tlb_flushall_shift); + tlb_lld_1g[ENTRIES], tlb_flushall_shift); } void detect_ht(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index ea04b342c026..5eb7ea5fae15 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -506,6 +506,7 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) #define TLB_DATA0_2M_4M 0x23 #define STLB_4K 0x41 +#define STLB_4K_2M 0x42 static const struct _tlb_table intel_tlb_table[] = { { 0x01, TLB_INST_4K, 32, " TLB_INST 4 KByte pages, 4-way set associative" }, @@ -526,13 +527,20 @@ static const struct _tlb_table intel_tlb_table[] = { { 0x5b, TLB_DATA_4K_4M, 64, " TLB_DATA 4 KByte and 4 MByte pages" }, { 0x5c, TLB_DATA_4K_4M, 128, " TLB_DATA 4 KByte and 4 MByte pages" }, { 0x5d, TLB_DATA_4K_4M, 256, " TLB_DATA 4 KByte and 4 MByte pages" }, + { 0x61, TLB_INST_4K, 48, " TLB_INST 4 KByte pages, full associative" }, + { 0x63, TLB_DATA_1G, 4, " TLB_DATA 1 GByte pages, 4-way set associative" }, + { 0x76, TLB_INST_2M_4M, 8, " TLB_INST 2-MByte or 4-MByte pages, fully associative" }, { 0xb0, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 4-way set associative" }, { 0xb1, TLB_INST_2M_4M, 4, " TLB_INST 2M pages, 4-way, 8 entries or 4M pages, 4-way entries" }, { 0xb2, TLB_INST_4K, 64, " TLB_INST 4KByte pages, 4-way set associative" }, { 0xb3, TLB_DATA_4K, 128, " TLB_DATA 4 KByte pages, 4-way set associative" }, { 0xb4, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 4-way associative" }, + { 0xb5, TLB_INST_4K, 64, " TLB_INST 4 KByte pages, 8-way set ssociative" }, + { 0xb6, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 8-way set ssociative" }, { 0xba, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way associative" }, { 0xc0, TLB_DATA_4K_4M, 8, " TLB_DATA 4 KByte and 4 MByte pages, 4-way associative" }, + { 0xc1, STLB_4K_2M, 1024, " STLB 4 KByte and 2 MByte pages, 8-way associative" }, + { 0xc2, TLB_DATA_2M_4M, 16, " DTLB 2 MByte/4MByte pages, 4-way associative" }, { 0xca, STLB_4K, 512, " STLB 4 KByte pages, 4-way associative" }, { 0x00, 0, 0 } }; @@ -558,6 +566,20 @@ static void intel_tlb_lookup(const unsigned char desc) if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; break; + case STLB_4K_2M: + if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) + tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; + if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) + tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; + if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries) + tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries; + if (tlb_lld_2m[ENTRIES] < intel_tlb_table[k].entries) + tlb_lld_2m[ENTRIES] = intel_tlb_table[k].entries; + if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries) + tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries; + if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) + tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; + break; case TLB_INST_ALL: if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; @@ -603,6 +625,10 @@ static void intel_tlb_lookup(const unsigned char desc) if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; break; + case TLB_DATA_1G: + if (tlb_lld_1g[ENTRIES] < intel_tlb_table[k].entries) + tlb_lld_1g[ENTRIES] = intel_tlb_table[k].entries; + break; } } -- cgit v1.2.3 From 663b55b9b39fa9c848cca273ca4e12bf29b32c71 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Mon, 6 Jan 2014 19:20:26 -0500 Subject: x86: Delete non-required instances of include None of these files are actually using any __init type directives and hence don't need to include . Most are just a left over from __devinit and __cpuinit removal, or simply due to code getting copied from one driver to the next. [ hpa: undid incorrect removal from arch/x86/kernel/head_32.S ] Signed-off-by: Paul Gortmaker Link: http://lkml.kernel.org/r/1389054026-12947-1-git-send-email-paul.gortmaker@windriver.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/mce.h | 1 - arch/x86/include/asm/mpspec.h | 1 - arch/x86/include/asm/processor.h | 1 - arch/x86/include/asm/ptrace.h | 1 - arch/x86/include/asm/smp.h | 1 - arch/x86/include/asm/timer.h | 1 - arch/x86/kernel/apic/apic_flat_64.c | 1 - arch/x86/kernel/apic/apic_noop.c | 1 - arch/x86/kernel/apic/ipi.c | 1 - arch/x86/kernel/apic/summit_32.c | 1 - arch/x86/kernel/apic/x2apic_cluster.c | 1 - arch/x86/kernel/apic/x2apic_phys.c | 1 - arch/x86/kernel/cpu/amd.c | 1 - arch/x86/kernel/cpu/centaur.c | 1 - arch/x86/kernel/cpu/cyrix.c | 1 - arch/x86/kernel/cpu/intel.c | 1 - arch/x86/kernel/cpu/mcheck/mce_intel.c | 1 - arch/x86/kernel/cpu/mcheck/p5.c | 1 - arch/x86/kernel/cpu/mcheck/winchip.c | 1 - arch/x86/kernel/cpu/transmeta.c | 1 - arch/x86/kernel/cpu/umc.c | 1 - arch/x86/kernel/crash.c | 1 - arch/x86/kernel/doublefault.c | 1 - arch/x86/kernel/hw_breakpoint.c | 1 - arch/x86/kernel/kgdb.c | 1 - arch/x86/kernel/machine_kexec_32.c | 1 - arch/x86/kernel/pci-nommu.c | 1 - arch/x86/kernel/process_32.c | 1 - arch/x86/kernel/tsc_sync.c | 1 - arch/x86/lib/delay.c | 1 - arch/x86/mm/kmmio.c | 1 - arch/x86/mm/pageattr-test.c | 1 - arch/x86/pci/fixup.c | 1 - arch/x86/platform/intel-mid/early_printk_intel_mid.c | 1 - arch/x86/platform/iris/iris.c | 1 - arch/x86/realmode/rm/reboot.S | 1 - arch/x86/realmode/rm/trampoline_32.S | 1 - arch/x86/realmode/rm/trampoline_64.S | 1 - arch/x86/um/vdso/vdso.S | 1 - arch/x86/vdso/vdso.S | 1 - arch/x86/vdso/vdsox32.S | 1 - 41 files changed, 41 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index c696a8687567..6e4ce2df87cf 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -118,7 +118,6 @@ extern void mce_register_decode_chain(struct notifier_block *nb); extern void mce_unregister_decode_chain(struct notifier_block *nb); #include -#include #include extern int mce_p5_enabled; diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 3142a94c7b4b..3e6b4920ef5d 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -1,7 +1,6 @@ #ifndef _ASM_X86_MPSPEC_H #define _ASM_X86_MPSPEC_H -#include #include #include diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 7b034a4057f9..8ade61721ffb 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -27,7 +27,6 @@ struct mm_struct; #include #include #include -#include #include #include diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 942a08623a1a..14fd6fd75a19 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -60,7 +60,6 @@ struct pt_regs { #endif /* !__i386__ */ -#include #ifdef CONFIG_PARAVIRT #include #endif diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 4137890e88e3..8cd27e08e23c 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -2,7 +2,6 @@ #define _ASM_X86_SMP_H #ifndef __ASSEMBLY__ #include -#include #include /* diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index 34baa0eb5d0c..a6f3e776d2e4 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h @@ -1,6 +1,5 @@ #ifndef _ASM_X86_TIMER_H #define _ASM_X86_TIMER_H -#include #include #include #include diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 00c77cf78e9e..5d5b9eb2b7a4 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index e145f28b4099..191ce75c0e54 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index 7434d8556d09..62071569bd50 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c @@ -1,6 +1,5 @@ #include #include -#include #include #include diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index 77c95c0e1bf7..00146f9b0254 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c @@ -29,7 +29,6 @@ #define pr_fmt(fmt) "summit: %s: " fmt, __func__ #include -#include #include #include diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 140e29db478d..cac85ee6913f 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -3,7 +3,6 @@ #include #include #include -#include #include #include diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 562a76d433c8..de231e328cae 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -3,7 +3,6 @@ #include #include #include -#include #include #include diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index bca023bdd6b2..39bc78dad377 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -1,5 +1,4 @@ #include -#include #include #include #include diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index 8d5652dc99dd..8779edab684e 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -1,6 +1,5 @@ #include #include -#include #include #include diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index d0969c75ab54..aaf152e79637 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -1,4 +1,3 @@ -#include #include #include #include diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index dc1ec0dff939..53f5d3c7ac09 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -1,4 +1,3 @@ -#include #include #include diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 4cfe0458ca66..fb6156fee6f7 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -6,7 +6,6 @@ */ #include -#include #include #include #include diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c index 1c044b1ccc59..a3042989398c 100644 --- a/arch/x86/kernel/cpu/mcheck/p5.c +++ b/arch/x86/kernel/cpu/mcheck/p5.c @@ -5,7 +5,6 @@ #include #include #include -#include #include #include diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c index e9a701aecaa1..7dc5564d0cdf 100644 --- a/arch/x86/kernel/cpu/mcheck/winchip.c +++ b/arch/x86/kernel/cpu/mcheck/winchip.c @@ -5,7 +5,6 @@ #include #include #include -#include #include #include diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index aa0430d69b90..3fa0e5ad86b4 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -1,6 +1,5 @@ #include #include -#include #include #include #include "cpu.h" diff --git a/arch/x86/kernel/cpu/umc.c b/arch/x86/kernel/cpu/umc.c index 75c5ad5d35cc..ef9c2a0078bd 100644 --- a/arch/x86/kernel/cpu/umc.c +++ b/arch/x86/kernel/cpu/umc.c @@ -1,5 +1,4 @@ #include -#include #include #include "cpu.h" diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 18677a90d6a3..a57902efe2d5 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -7,7 +7,6 @@ * */ -#include #include #include #include diff --git a/arch/x86/kernel/doublefault.c b/arch/x86/kernel/doublefault.c index 5d3fe8d36e4a..f6dfd9334b67 100644 --- a/arch/x86/kernel/doublefault.c +++ b/arch/x86/kernel/doublefault.c @@ -1,6 +1,5 @@ #include #include -#include #include #include diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index f66ff162dce8..a67b47c31314 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -38,7 +38,6 @@ #include #include #include -#include #include #include diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 836f8322960e..7ec1d5f8d283 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 5b19e4d78b00..1667b1de8d5d 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 871be4a84c7d..da15918d1c81 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -3,7 +3,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 6f1236c29c4b..0de43e98ce08 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index adfdf56a3714..26488487bc61 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -16,7 +16,6 @@ */ #include #include -#include #include #include #include diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index 7c3bee636e2f..39d6a3db0b96 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index e5d5e2ce9f77..637ab34ed632 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c index d0b1773d9d2e..461bc8289024 100644 --- a/arch/x86/mm/pageattr-test.c +++ b/arch/x86/mm/pageattr-test.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index b046e070e088..bca9e85daaa5 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -5,7 +5,6 @@ #include #include #include -#include #include #include diff --git a/arch/x86/platform/intel-mid/early_printk_intel_mid.c b/arch/x86/platform/intel-mid/early_printk_intel_mid.c index 4f702f554f6e..e0bd082a80e0 100644 --- a/arch/x86/platform/intel-mid/early_printk_intel_mid.c +++ b/arch/x86/platform/intel-mid/early_printk_intel_mid.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include diff --git a/arch/x86/platform/iris/iris.c b/arch/x86/platform/iris/iris.c index e6cb80f620af..4d171e8640ef 100644 --- a/arch/x86/platform/iris/iris.c +++ b/arch/x86/platform/iris/iris.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include diff --git a/arch/x86/realmode/rm/reboot.S b/arch/x86/realmode/rm/reboot.S index f932ea61d1c8..d66c607bdc58 100644 --- a/arch/x86/realmode/rm/reboot.S +++ b/arch/x86/realmode/rm/reboot.S @@ -1,5 +1,4 @@ #include -#include #include #include #include diff --git a/arch/x86/realmode/rm/trampoline_32.S b/arch/x86/realmode/rm/trampoline_32.S index c1b2791183e7..48ddd76bc4c3 100644 --- a/arch/x86/realmode/rm/trampoline_32.S +++ b/arch/x86/realmode/rm/trampoline_32.S @@ -20,7 +20,6 @@ */ #include -#include #include #include #include "realmode.h" diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S index bb360dc39d21..dac7b20d2f9d 100644 --- a/arch/x86/realmode/rm/trampoline_64.S +++ b/arch/x86/realmode/rm/trampoline_64.S @@ -25,7 +25,6 @@ */ #include -#include #include #include #include diff --git a/arch/x86/um/vdso/vdso.S b/arch/x86/um/vdso/vdso.S index 1cb468adacbb..4b4bd4cc06ab 100644 --- a/arch/x86/um/vdso/vdso.S +++ b/arch/x86/um/vdso/vdso.S @@ -1,4 +1,3 @@ -#include __INITDATA diff --git a/arch/x86/vdso/vdso.S b/arch/x86/vdso/vdso.S index 01f5e3b4613c..1e13eb8c9656 100644 --- a/arch/x86/vdso/vdso.S +++ b/arch/x86/vdso/vdso.S @@ -1,6 +1,5 @@ #include #include -#include __PAGE_ALIGNED_DATA diff --git a/arch/x86/vdso/vdsox32.S b/arch/x86/vdso/vdsox32.S index d6b9a7f42a8a..295f1c7543d8 100644 --- a/arch/x86/vdso/vdsox32.S +++ b/arch/x86/vdso/vdsox32.S @@ -1,6 +1,5 @@ #include #include -#include __PAGE_ALIGNED_DATA -- cgit v1.2.3 From f228c5b882602697a1adb50d61ff688b0df1eced Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Wed, 8 Jan 2014 11:15:53 +0100 Subject: perf/x86/intel: Add Intel RAPL PP1 energy counter support This patch adds support for the Intel RAPL energy counter PP1 (Power Plane 1). On client processors, it usually corresponds to the energy consumption of the builtin graphic card. That is why the sysfs event is called energy-gpu. New event: - name: power/energy-gpu/ - code: event=0x4 - unit: 2^-32 Joules On processors without graphics, this should count 0. The patch only enables this event on client processors. Reviewed-by: Maria Dimakopoulou Signed-off-by: Stephane Eranian Cc: ak@linux.intel.com Cc: acme@redhat.com Cc: jolsa@redhat.com Cc: zheng.z.yan@intel.com Cc: bp@alien8.de Cc: vincent.weaver@maine.edu Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1389176153-3128-3-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_rapl.c | 31 ++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c index 0e3754e450d9..5ad35ad94d0f 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c +++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c @@ -27,6 +27,10 @@ * event: rapl_energy_dram * perf code: 0x3 * + * dram counter: consumption of the builtin-gpu domain (client only) + * event: rapl_energy_gpu + * perf code: 0x4 + * * We manage those counters as free running (read-only). They may be * use simultaneously by other tools, such as turbostat. * @@ -55,10 +59,13 @@ #define INTEL_RAPL_PKG 0x2 /* pseudo-encoding */ #define RAPL_IDX_RAM_NRG_STAT 2 /* DRAM */ #define INTEL_RAPL_RAM 0x3 /* pseudo-encoding */ +#define RAPL_IDX_PP1_NRG_STAT 3 /* DRAM */ +#define INTEL_RAPL_PP1 0x4 /* pseudo-encoding */ /* Clients have PP0, PKG */ #define RAPL_IDX_CLN (1< Date: Mon, 23 Dec 2013 18:05:02 +0100 Subject: x86, mce: Fix mce_start_timer semantics So mce_start_timer() has a 'cpu' argument which is supposed to mean to start a timer on that cpu. However, the code currently starts a timer on the *current* cpu the function runs on and causes the sanity-check in mce_timer_fn to fire: WARNING: CPU: 0 PID: 0 at arch/x86/kernel/cpu/mcheck/mce.c:1286 mce_timer_fn because it is running on the wrong cpu. This was triggered by Prarit Bhargava by offlining all the cpus in succession. Then, we were fiddling with the CMCI storm settings when starting the timer whereas there's no need for that - if there's storm happening on this newly restarted cpu, we're going to be in normal CMCI mode initially and then when the CMCI interrupt starts firing, we're going to go to the polling mode with the timer real soon. Signed-off-by: Borislav Petkov Tested-by: Prarit Bhargava Cc: Tony Luck Reviewed-by: Chen, Gong Link: http://lkml.kernel.org/r/1387722156-5511-1-git-send-email-prarit@redhat.com --- arch/x86/kernel/cpu/mcheck/mce.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index a389c1d859ec..4d5419b249da 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1638,15 +1638,15 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) static void mce_start_timer(unsigned int cpu, struct timer_list *t) { - unsigned long iv = mce_adjust_timer(check_interval * HZ); - - __this_cpu_write(mce_next_interval, iv); + unsigned long iv = check_interval * HZ; if (mca_cfg.ignore_ce || !iv) return; + per_cpu(mce_next_interval, cpu) = iv; + t->expires = round_jiffies(jiffies + iv); - add_timer_on(t, smp_processor_id()); + add_timer_on(t, cpu); } static void __mcheck_cpu_init_timer(void) -- cgit v1.2.3 From 20d1c86a57762f0a33a78988e3fc8818316badd4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 29 Nov 2013 15:40:29 +0100 Subject: sched/clock, x86: Rewrite cyc2ns() to avoid the need to disable IRQs Use a ring-buffer like multi-version object structure which allows always having a coherent object; we use this to avoid having to disable IRQs while reading sched_clock() and avoids a problem when getting an NMI while changing the cyc2ns data. MAINLINE PRE POST sched_clock_stable: 1 1 1 (cold) sched_clock: 329841 331312 257223 (cold) local_clock: 301773 310296 309889 (warm) sched_clock: 38375 38247 25280 (warm) local_clock: 100371 102713 85268 (warm) rdtsc: 27340 27289 24247 sched_clock_stable: 0 0 0 (cold) sched_clock: 382634 372706 301224 (cold) local_clock: 396890 399275 399870 (warm) sched_clock: 38194 38124 25630 (warm) local_clock: 143452 148698 129629 (warm) rdtsc: 27345 27365 24307 Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/n/tip-s567in1e5ekq2nlyhn8f987r@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/timer.h | 23 +++- arch/x86/kernel/cpu/perf_event.c | 14 ++- arch/x86/kernel/tsc.c | 229 +++++++++++++++++++++++++++++++++++---- arch/x86/platform/uv/tlb_uv.c | 66 ++++++----- 4 files changed, 276 insertions(+), 56 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index b4c667693a21..3de54ef0aea5 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h @@ -13,7 +13,26 @@ extern int recalibrate_cpu_khz(void); extern int no_timer_check; -DECLARE_PER_CPU(unsigned long, cyc2ns); -DECLARE_PER_CPU(unsigned long long, cyc2ns_offset); +/* + * We use the full linear equation: f(x) = a + b*x, in order to allow + * a continuous function in the face of dynamic freq changes. + * + * Continuity means that when our frequency changes our slope (b); we want to + * ensure that: f(t) == f'(t), which gives: a + b*t == a' + b'*t. + * + * Without an offset (a) the above would not be possible. + * + * See the comment near cycles_2_ns() for details on how we compute (b). + */ +struct cyc2ns_data { + u32 cyc2ns_mul; + u32 cyc2ns_shift; + u64 cyc2ns_offset; + u32 __count; + /* u32 hole */ +}; /* 24 bytes -- do not grow */ + +extern struct cyc2ns_data *cyc2ns_read_begin(void); +extern void cyc2ns_read_end(struct cyc2ns_data *); #endif /* _ASM_X86_TIMER_H */ diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 8e132931614d..9f97bd03f74f 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1883,6 +1883,8 @@ static struct pmu pmu = { void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) { + struct cyc2ns_data *data; + userpg->cap_user_time = 0; userpg->cap_user_time_zero = 0; userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc; @@ -1891,13 +1893,17 @@ void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) if (!sched_clock_stable) return; + data = cyc2ns_read_begin(); + userpg->cap_user_time = 1; - userpg->time_mult = this_cpu_read(cyc2ns); - userpg->time_shift = CYC2NS_SCALE_FACTOR; - userpg->time_offset = this_cpu_read(cyc2ns_offset) - now; + userpg->time_mult = data->cyc2ns_mul; + userpg->time_shift = data->cyc2ns_shift; + userpg->time_offset = data->cyc2ns_offset - now; userpg->cap_user_time_zero = 1; - userpg->time_zero = this_cpu_read(cyc2ns_offset); + userpg->time_zero = data->cyc2ns_offset; + + cyc2ns_read_end(data); } /* diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index b4a04ac1d7aa..92b090b2b79e 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -39,7 +39,119 @@ static int __read_mostly tsc_disabled = -1; int tsc_clocksource_reliable; -/* Accelerators for sched_clock() +/* + * Use a ring-buffer like data structure, where a writer advances the head by + * writing a new data entry and a reader advances the tail when it observes a + * new entry. + * + * Writers are made to wait on readers until there's space to write a new + * entry. + * + * This means that we can always use an {offset, mul} pair to compute a ns + * value that is 'roughly' in the right direction, even if we're writing a new + * {offset, mul} pair during the clock read. + * + * The down-side is that we can no longer guarantee strict monotonicity anymore + * (assuming the TSC was that to begin with), because while we compute the + * intersection point of the two clock slopes and make sure the time is + * continuous at the point of switching; we can no longer guarantee a reader is + * strictly before or after the switch point. + * + * It does mean a reader no longer needs to disable IRQs in order to avoid + * CPU-Freq updates messing with his times, and similarly an NMI reader will + * no longer run the risk of hitting half-written state. + */ + +struct cyc2ns { + struct cyc2ns_data data[2]; /* 0 + 2*24 = 48 */ + struct cyc2ns_data *head; /* 48 + 8 = 56 */ + struct cyc2ns_data *tail; /* 56 + 8 = 64 */ +}; /* exactly fits one cacheline */ + +static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns); + +struct cyc2ns_data *cyc2ns_read_begin(void) +{ + struct cyc2ns_data *head; + + preempt_disable(); + + head = this_cpu_read(cyc2ns.head); + /* + * Ensure we observe the entry when we observe the pointer to it. + * matches the wmb from cyc2ns_write_end(). + */ + smp_read_barrier_depends(); + head->__count++; + barrier(); + + return head; +} + +void cyc2ns_read_end(struct cyc2ns_data *head) +{ + barrier(); + /* + * If we're the outer most nested read; update the tail pointer + * when we're done. This notifies possible pending writers + * that we've observed the head pointer and that the other + * entry is now free. + */ + if (!--head->__count) { + /* + * x86-TSO does not reorder writes with older reads; + * therefore once this write becomes visible to another + * cpu, we must be finished reading the cyc2ns_data. + * + * matches with cyc2ns_write_begin(). + */ + this_cpu_write(cyc2ns.tail, head); + } + preempt_enable(); +} + +/* + * Begin writing a new @data entry for @cpu. + * + * Assumes some sort of write side lock; currently 'provided' by the assumption + * that cpufreq will call its notifiers sequentially. + */ +static struct cyc2ns_data *cyc2ns_write_begin(int cpu) +{ + struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu); + struct cyc2ns_data *data = c2n->data; + + if (data == c2n->head) + data++; + + /* XXX send an IPI to @cpu in order to guarantee a read? */ + + /* + * When we observe the tail write from cyc2ns_read_end(), + * the cpu must be done with that entry and its safe + * to start writing to it. + */ + while (c2n->tail == data) + cpu_relax(); + + return data; +} + +static void cyc2ns_write_end(int cpu, struct cyc2ns_data *data) +{ + struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu); + + /* + * Ensure the @data writes are visible before we publish the + * entry. Matches the data-depencency in cyc2ns_read_begin(). + */ + smp_wmb(); + + ACCESS_ONCE(c2n->head) = data; +} + +/* + * Accelerators for sched_clock() * convert from cycles(64bits) => nanoseconds (64bits) * basic equation: * ns = cycles / (freq / ns_per_sec) @@ -61,49 +173,106 @@ int tsc_clocksource_reliable; * -johnstul@us.ibm.com "math is hard, lets go shopping!" */ -DEFINE_PER_CPU(unsigned long, cyc2ns); -DEFINE_PER_CPU(unsigned long long, cyc2ns_offset); - #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ +static void cyc2ns_data_init(struct cyc2ns_data *data) +{ + data->cyc2ns_mul = 1U << CYC2NS_SCALE_FACTOR; + data->cyc2ns_shift = CYC2NS_SCALE_FACTOR; + data->cyc2ns_offset = 0; + data->__count = 0; +} + +static void cyc2ns_init(int cpu) +{ + struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu); + + cyc2ns_data_init(&c2n->data[0]); + cyc2ns_data_init(&c2n->data[1]); + + c2n->head = c2n->data; + c2n->tail = c2n->data; +} + static inline unsigned long long cycles_2_ns(unsigned long long cyc) { - unsigned long long ns = this_cpu_read(cyc2ns_offset); - ns += mul_u64_u32_shr(cyc, this_cpu_read(cyc2ns), CYC2NS_SCALE_FACTOR); + struct cyc2ns_data *data, *tail; + unsigned long long ns; + + /* + * See cyc2ns_read_*() for details; replicated in order to avoid + * an extra few instructions that came with the abstraction. + * Notable, it allows us to only do the __count and tail update + * dance when its actually needed. + */ + + preempt_disable(); + data = this_cpu_read(cyc2ns.head); + tail = this_cpu_read(cyc2ns.tail); + + if (likely(data == tail)) { + ns = data->cyc2ns_offset; + ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR); + } else { + data->__count++; + + barrier(); + + ns = data->cyc2ns_offset; + ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR); + + barrier(); + + if (!--data->__count) + this_cpu_write(cyc2ns.tail, data); + } + preempt_enable(); + return ns; } +/* XXX surely we already have this someplace in the kernel?! */ +#define DIV_ROUND(n, d) (((n) + ((d) / 2)) / (d)) + static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) { - unsigned long long tsc_now, ns_now, *offset; - unsigned long flags, *scale; + unsigned long long tsc_now, ns_now; + struct cyc2ns_data *data; + unsigned long flags; local_irq_save(flags); sched_clock_idle_sleep_event(); - scale = &per_cpu(cyc2ns, cpu); - offset = &per_cpu(cyc2ns_offset, cpu); + if (!cpu_khz) + goto done; + + data = cyc2ns_write_begin(cpu); rdtscll(tsc_now); ns_now = cycles_2_ns(tsc_now); - if (cpu_khz) { - *scale = ((NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR) + - cpu_khz / 2) / cpu_khz; - *offset = ns_now - mult_frac(tsc_now, *scale, - (1UL << CYC2NS_SCALE_FACTOR)); - } + /* + * Compute a new multiplier as per the above comment and ensure our + * time function is continuous; see the comment near struct + * cyc2ns_data. + */ + data->cyc2ns_mul = DIV_ROUND(NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR, cpu_khz); + data->cyc2ns_shift = CYC2NS_SCALE_FACTOR; + data->cyc2ns_offset = ns_now - + mul_u64_u32_shr(tsc_now, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR); + + cyc2ns_write_end(cpu, data); +done: sched_clock_idle_wakeup_event(0); local_irq_restore(flags); } - /* * Scheduler clock - returns current time in nanosec units. */ u64 native_sched_clock(void) { - u64 this_offset; + u64 tsc_now; /* * Fall back to jiffies if there's no TSC available: @@ -119,10 +288,10 @@ u64 native_sched_clock(void) } /* read the Time Stamp Counter: */ - rdtscll(this_offset); + rdtscll(tsc_now); /* return the value in ns */ - return cycles_2_ns(this_offset); + return cycles_2_ns(tsc_now); } /* We need to define a real function for sched_clock, to override the @@ -678,11 +847,21 @@ void tsc_restore_sched_clock_state(void) local_irq_save(flags); - __this_cpu_write(cyc2ns_offset, 0); + /* + * We're comming out of suspend, there's no concurrency yet; don't + * bother being nice about the RCU stuff, just write to both + * data fields. + */ + + this_cpu_write(cyc2ns.data[0].cyc2ns_offset, 0); + this_cpu_write(cyc2ns.data[1].cyc2ns_offset, 0); + offset = cyc2ns_suspend - sched_clock(); - for_each_possible_cpu(cpu) - per_cpu(cyc2ns_offset, cpu) = offset; + for_each_possible_cpu(cpu) { + per_cpu(cyc2ns.data[0].cyc2ns_offset, cpu) = offset; + per_cpu(cyc2ns.data[1].cyc2ns_offset, cpu) = offset; + } local_irq_restore(flags); } @@ -1005,8 +1184,10 @@ void __init tsc_init(void) * speed as the bootup CPU. (cpufreq notifiers will fix this * up if their speed diverges) */ - for_each_possible_cpu(cpu) + for_each_possible_cpu(cpu) { + cyc2ns_init(cpu); set_cyc2ns_scale(cpu_khz, cpu); + } if (tsc_disabled > 0) return; diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index efe4d7220397..dfe605ac1bcd 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -433,15 +433,49 @@ static void reset_with_ipi(struct pnmask *distribution, struct bau_control *bcp) return; } -static inline unsigned long cycles_2_us(unsigned long long cyc) +/* + * Not to be confused with cycles_2_ns() from tsc.c; this gives a relative + * number, not an absolute. It converts a duration in cycles to a duration in + * ns. + */ +static inline unsigned long long cycles_2_ns(unsigned long long cyc) { + struct cyc2ns_data *data = cyc2ns_read_begin(); unsigned long long ns; - unsigned long us; - int cpu = smp_processor_id(); - ns = (cyc * per_cpu(cyc2ns, cpu)) >> CYC2NS_SCALE_FACTOR; - us = ns / 1000; - return us; + ns = mul_u64_u32_shr(cyc, data->cyc2ns_mul, data->cyc2ns_shift); + + cyc2ns_read_end(data); + return ns; +} + +/* + * The reverse of the above; converts a duration in ns to a duration in cycles. + */ +static inline unsigned long long ns_2_cycles(unsigned long long ns) +{ + struct cyc2ns_data *data = cyc2ns_read_begin(); + unsigned long long cyc; + + cyc = (ns << data->cyc2ns_shift) / data->cyc2ns_mul; + + cyc2ns_read_end(data); + return cyc; +} + +static inline unsigned long cycles_2_us(unsigned long long cyc) +{ + return cycles_2_ns(cyc) / NSEC_PER_USEC; +} + +static inline cycles_t sec_2_cycles(unsigned long sec) +{ + return ns_2_cycles(sec * NSEC_PER_SEC); +} + +static inline unsigned long long usec_2_cycles(unsigned long usec) +{ + return ns_2_cycles(usec * NSEC_PER_USEC); } /* @@ -668,16 +702,6 @@ static int wait_completion(struct bau_desc *bau_desc, bcp, try); } -static inline cycles_t sec_2_cycles(unsigned long sec) -{ - unsigned long ns; - cycles_t cyc; - - ns = sec * 1000000000; - cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id())); - return cyc; -} - /* * Our retries are blocked by all destination sw ack resources being * in use, and a timeout is pending. In that case hardware immediately @@ -1327,16 +1351,6 @@ static void ptc_seq_stop(struct seq_file *file, void *data) { } -static inline unsigned long long usec_2_cycles(unsigned long microsec) -{ - unsigned long ns; - unsigned long long cyc; - - ns = microsec * 1000; - cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id())); - return cyc; -} - /* * Display the statistics thru /proc/sgi_uv/ptc_statistics * 'data' points to the cpu number -- cgit v1.2.3 From 35af99e646c7f7ea46dc2977601e9e71a51dadd5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 28 Nov 2013 19:38:42 +0100 Subject: sched/clock, x86: Use a static_key for sched_clock_stable In order to avoid the runtime condition and variable load turn sched_clock_stable into a static_key. Also provide a shorter implementation of local_clock() and cpu_clock(int) when sched_clock_stable==1. MAINLINE PRE POST sched_clock_stable: 1 1 1 (cold) sched_clock: 329841 221876 215295 (cold) local_clock: 301773 234692 220773 (warm) sched_clock: 38375 25602 25659 (warm) local_clock: 100371 33265 27242 (warm) rdtsc: 27340 24214 24208 sched_clock_stable: 0 0 0 (cold) sched_clock: 382634 235941 237019 (cold) local_clock: 396890 297017 294819 (warm) sched_clock: 38194 25233 25609 (warm) local_clock: 143452 71234 71232 (warm) rdtsc: 27345 24245 24243 Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/n/tip-eummbdechzz37mwmpags1gjr@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 2 +- arch/x86/kernel/cpu/intel.c | 2 +- arch/x86/kernel/cpu/perf_event.c | 2 +- arch/x86/kernel/tsc.c | 6 +++--- include/linux/sched.h | 4 +++- kernel/sched/clock.c | 41 +++++++++++++++++++++++++++++++++------- kernel/sched/debug.c | 2 +- kernel/time/tick-sched.c | 2 +- kernel/trace/ring_buffer.c | 2 +- 9 files changed, 46 insertions(+), 17 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index bca023bdd6b2..8bc79cddd9a2 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -487,7 +487,7 @@ static void early_init_amd(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); if (!check_tsc_unstable()) - sched_clock_stable = 1; + set_sched_clock_stable(); } #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index ea04b342c026..1a439c047ff3 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -93,7 +93,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); if (!check_tsc_unstable()) - sched_clock_stable = 1; + set_sched_clock_stable(); } /* Penwell and Cloverview have the TSC which doesn't sleep on S3 */ diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 9f97bd03f74f..b88645191fe5 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1890,7 +1890,7 @@ void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc; userpg->pmc_width = x86_pmu.cntval_bits; - if (!sched_clock_stable) + if (!sched_clock_stable()) return; data = cyc2ns_read_begin(); diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 92b090b2b79e..53c123537245 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -822,7 +822,7 @@ static unsigned long long cyc2ns_suspend; void tsc_save_sched_clock_state(void) { - if (!sched_clock_stable) + if (!sched_clock_stable()) return; cyc2ns_suspend = sched_clock(); @@ -842,7 +842,7 @@ void tsc_restore_sched_clock_state(void) unsigned long flags; int cpu; - if (!sched_clock_stable) + if (!sched_clock_stable()) return; local_irq_save(flags); @@ -984,7 +984,7 @@ void mark_tsc_unstable(char *reason) { if (!tsc_unstable) { tsc_unstable = 1; - sched_clock_stable = 0; + clear_sched_clock_stable(); disable_sched_clock_irqtime(); pr_info("Marking TSC unstable due to %s\n", reason); /* Change only the rating, when not registered */ diff --git a/include/linux/sched.h b/include/linux/sched.h index a196cb7fc6f2..a03875221663 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1994,7 +1994,9 @@ static inline void sched_clock_idle_wakeup_event(u64 delta_ns) * but then during bootup it turns out that sched_clock() * is reliable after all: */ -extern int sched_clock_stable; +extern int sched_clock_stable(void); +extern void set_sched_clock_stable(void); +extern void clear_sched_clock_stable(void); extern void sched_clock_tick(void); extern void sched_clock_idle_sleep_event(void); diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index 59371549ddf0..c9b34c4e3ecc 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c @@ -58,6 +58,7 @@ #include #include #include +#include /* * Scheduler clock - returns current time in nanosec units. @@ -74,7 +75,27 @@ EXPORT_SYMBOL_GPL(sched_clock); __read_mostly int sched_clock_running; #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK -__read_mostly int sched_clock_stable; +static struct static_key __sched_clock_stable = STATIC_KEY_INIT; + +int sched_clock_stable(void) +{ + if (static_key_false(&__sched_clock_stable)) + return false; + return true; +} + +void set_sched_clock_stable(void) +{ + if (!sched_clock_stable()) + static_key_slow_dec(&__sched_clock_stable); +} + +void clear_sched_clock_stable(void) +{ + /* XXX worry about clock continuity */ + if (sched_clock_stable()) + static_key_slow_inc(&__sched_clock_stable); +} struct sched_clock_data { u64 tick_raw; @@ -234,7 +255,7 @@ u64 sched_clock_cpu(int cpu) struct sched_clock_data *scd; u64 clock; - if (sched_clock_stable) + if (sched_clock_stable()) return sched_clock(); if (unlikely(!sched_clock_running)) @@ -257,7 +278,7 @@ void sched_clock_tick(void) struct sched_clock_data *scd; u64 now, now_gtod; - if (sched_clock_stable) + if (sched_clock_stable()) return; if (unlikely(!sched_clock_running)) @@ -308,7 +329,10 @@ EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); */ u64 cpu_clock(int cpu) { - return sched_clock_cpu(cpu); + if (static_key_false(&__sched_clock_stable)) + return sched_clock_cpu(cpu); + + return sched_clock(); } /* @@ -320,7 +344,10 @@ u64 cpu_clock(int cpu) */ u64 local_clock(void) { - return sched_clock_cpu(raw_smp_processor_id()); + if (static_key_false(&__sched_clock_stable)) + return sched_clock_cpu(raw_smp_processor_id()); + + return sched_clock(); } #else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ @@ -340,12 +367,12 @@ u64 sched_clock_cpu(int cpu) u64 cpu_clock(int cpu) { - return sched_clock_cpu(cpu); + return sched_clock(); } u64 local_clock(void) { - return sched_clock_cpu(0); + return sched_clock(); } #endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 374fe04a5e6e..dd52e7ffb10e 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -371,7 +371,7 @@ static void sched_debug_header(struct seq_file *m) PN(cpu_clk); P(jiffies); #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK - P(sched_clock_stable); + P(sched_clock_stable()); #endif #undef PN #undef P diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index ea20f7d1ac2c..c833249ab0fb 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -177,7 +177,7 @@ static bool can_stop_full_tick(void) * TODO: kick full dynticks CPUs when * sched_clock_stable is set. */ - if (!sched_clock_stable) { + if (!sched_clock_stable()) { trace_tick_stop(0, "unstable sched clock\n"); /* * Don't allow the user to think they can get diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index cc2f66f68dc5..294b8a271a04 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2558,7 +2558,7 @@ rb_reserve_next_event(struct ring_buffer *buffer, if (unlikely(test_time_stamp(delta))) { int local_clock_stable = 1; #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK - local_clock_stable = sched_clock_stable; + local_clock_stable = sched_clock_stable(); #endif WARN_ONCE(delta > (1ULL << 59), KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s", -- cgit v1.2.3 From bad5fa631fca5466401cd4a48e30cc1f1cb6101e Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 1 Dec 2013 18:09:58 +0100 Subject: x86, microcode: Move to a proper location We've grown a bunch of microcode loader files all prefixed with "microcode_". They should be under cpu/ because this is strictly CPU-related functionality so do that and drop the prefix since they're in their own directory now which gives that prefix. :) While at it, drop MICROCODE_INTEL_LIB config item and stash the functionality under CONFIG_MICROCODE_INTEL as it was its only user. Signed-off-by: Borislav Petkov Tested-by: Aravind Gopalakrishnan --- arch/x86/Kconfig | 4 - arch/x86/kernel/Makefile | 9 - arch/x86/kernel/cpu/Makefile | 1 + arch/x86/kernel/cpu/microcode/Makefile | 7 + arch/x86/kernel/cpu/microcode/amd.c | 492 +++++++++++++++++ arch/x86/kernel/cpu/microcode/amd_early.c | 380 ++++++++++++++ arch/x86/kernel/cpu/microcode/core.c | 645 +++++++++++++++++++++++ arch/x86/kernel/cpu/microcode/core_early.c | 141 +++++ arch/x86/kernel/cpu/microcode/intel.c | 333 ++++++++++++ arch/x86/kernel/cpu/microcode/intel_early.c | 787 ++++++++++++++++++++++++++++ arch/x86/kernel/cpu/microcode/intel_lib.c | 174 ++++++ arch/x86/kernel/microcode_amd.c | 492 ----------------- arch/x86/kernel/microcode_amd_early.c | 380 -------------- arch/x86/kernel/microcode_core.c | 645 ----------------------- arch/x86/kernel/microcode_core_early.c | 141 ----- arch/x86/kernel/microcode_intel.c | 333 ------------ arch/x86/kernel/microcode_intel_early.c | 787 ---------------------------- arch/x86/kernel/microcode_intel_lib.c | 174 ------ 18 files changed, 2960 insertions(+), 2965 deletions(-) create mode 100644 arch/x86/kernel/cpu/microcode/Makefile create mode 100644 arch/x86/kernel/cpu/microcode/amd.c create mode 100644 arch/x86/kernel/cpu/microcode/amd_early.c create mode 100644 arch/x86/kernel/cpu/microcode/core.c create mode 100644 arch/x86/kernel/cpu/microcode/core_early.c create mode 100644 arch/x86/kernel/cpu/microcode/intel.c create mode 100644 arch/x86/kernel/cpu/microcode/intel_early.c create mode 100644 arch/x86/kernel/cpu/microcode/intel_lib.c delete mode 100644 arch/x86/kernel/microcode_amd.c delete mode 100644 arch/x86/kernel/microcode_amd_early.c delete mode 100644 arch/x86/kernel/microcode_core.c delete mode 100644 arch/x86/kernel/microcode_core_early.c delete mode 100644 arch/x86/kernel/microcode_intel.c delete mode 100644 arch/x86/kernel/microcode_intel_early.c delete mode 100644 arch/x86/kernel/microcode_intel_lib.c (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0952ecd60eca..01bfb9e6ba55 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1080,10 +1080,6 @@ config MICROCODE_OLD_INTERFACE def_bool y depends on MICROCODE -config MICROCODE_INTEL_LIB - def_bool y - depends on MICROCODE_INTEL - config MICROCODE_INTEL_EARLY def_bool n diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 9b0a34e2cd79..4d0094dbb6c9 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -91,15 +91,6 @@ obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o -obj-$(CONFIG_MICROCODE_EARLY) += microcode_core_early.o -obj-$(CONFIG_MICROCODE_INTEL_EARLY) += microcode_intel_early.o -obj-$(CONFIG_MICROCODE_INTEL_LIB) += microcode_intel_lib.o -microcode-y := microcode_core.o -microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o -microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o -obj-$(CONFIG_MICROCODE_AMD_EARLY) += microcode_amd_early.o -obj-$(CONFIG_MICROCODE) += microcode.o - obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 47b56a7e99cb..0710eeffcf51 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -42,6 +42,7 @@ endif obj-$(CONFIG_X86_MCE) += mcheck/ obj-$(CONFIG_MTRR) += mtrr/ +obj-$(CONFIG_MICROCODE) += microcode/ obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o perf_event_amd_ibs.o diff --git a/arch/x86/kernel/cpu/microcode/Makefile b/arch/x86/kernel/cpu/microcode/Makefile new file mode 100644 index 000000000000..285c85427c32 --- /dev/null +++ b/arch/x86/kernel/cpu/microcode/Makefile @@ -0,0 +1,7 @@ +microcode-y := core.o +obj-$(CONFIG_MICROCODE) += microcode.o +microcode-$(CONFIG_MICROCODE_INTEL) += intel.o intel_lib.o +microcode-$(CONFIG_MICROCODE_AMD) += amd.o +obj-$(CONFIG_MICROCODE_EARLY) += core_early.o +obj-$(CONFIG_MICROCODE_INTEL_EARLY) += intel_early.o +obj-$(CONFIG_MICROCODE_AMD_EARLY) += amd_early.o diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c new file mode 100644 index 000000000000..4a6ff747aaad --- /dev/null +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -0,0 +1,492 @@ +/* + * AMD CPU Microcode Update Driver for Linux + * Copyright (C) 2008-2011 Advanced Micro Devices Inc. + * + * Author: Peter Oruba + * + * Based on work by: + * Tigran Aivazian + * + * Maintainers: + * Andreas Herrmann + * Borislav Petkov + * + * This driver allows to upgrade microcode on F10h AMD + * CPUs and later. + * + * Licensed under the terms of the GNU General Public + * License version 2. See file COPYING for details. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +MODULE_DESCRIPTION("AMD Microcode Update Driver"); +MODULE_AUTHOR("Peter Oruba"); +MODULE_LICENSE("GPL v2"); + +static struct equiv_cpu_entry *equiv_cpu_table; + +struct ucode_patch { + struct list_head plist; + void *data; + u32 patch_id; + u16 equiv_cpu; +}; + +static LIST_HEAD(pcache); + +static u16 __find_equiv_id(unsigned int cpu) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + return find_equiv_id(equiv_cpu_table, uci->cpu_sig.sig); +} + +static u32 find_cpu_family_by_equiv_cpu(u16 equiv_cpu) +{ + int i = 0; + + BUG_ON(!equiv_cpu_table); + + while (equiv_cpu_table[i].equiv_cpu != 0) { + if (equiv_cpu == equiv_cpu_table[i].equiv_cpu) + return equiv_cpu_table[i].installed_cpu; + i++; + } + return 0; +} + +/* + * a small, trivial cache of per-family ucode patches + */ +static struct ucode_patch *cache_find_patch(u16 equiv_cpu) +{ + struct ucode_patch *p; + + list_for_each_entry(p, &pcache, plist) + if (p->equiv_cpu == equiv_cpu) + return p; + return NULL; +} + +static void update_cache(struct ucode_patch *new_patch) +{ + struct ucode_patch *p; + + list_for_each_entry(p, &pcache, plist) { + if (p->equiv_cpu == new_patch->equiv_cpu) { + if (p->patch_id >= new_patch->patch_id) + /* we already have the latest patch */ + return; + + list_replace(&p->plist, &new_patch->plist); + kfree(p->data); + kfree(p); + return; + } + } + /* no patch found, add it */ + list_add_tail(&new_patch->plist, &pcache); +} + +static void free_cache(void) +{ + struct ucode_patch *p, *tmp; + + list_for_each_entry_safe(p, tmp, &pcache, plist) { + __list_del(p->plist.prev, p->plist.next); + kfree(p->data); + kfree(p); + } +} + +static struct ucode_patch *find_patch(unsigned int cpu) +{ + u16 equiv_id; + + equiv_id = __find_equiv_id(cpu); + if (!equiv_id) + return NULL; + + return cache_find_patch(equiv_id); +} + +static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) +{ + struct cpuinfo_x86 *c = &cpu_data(cpu); + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + struct ucode_patch *p; + + csig->sig = cpuid_eax(0x00000001); + csig->rev = c->microcode; + + /* + * a patch could have been loaded early, set uci->mc so that + * mc_bp_resume() can call apply_microcode() + */ + p = find_patch(cpu); + if (p && (p->patch_id == csig->rev)) + uci->mc = p->data; + + pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev); + + return 0; +} + +static unsigned int verify_patch_size(u8 family, u32 patch_size, + unsigned int size) +{ + u32 max_size; + +#define F1XH_MPB_MAX_SIZE 2048 +#define F14H_MPB_MAX_SIZE 1824 +#define F15H_MPB_MAX_SIZE 4096 +#define F16H_MPB_MAX_SIZE 3458 + + switch (family) { + case 0x14: + max_size = F14H_MPB_MAX_SIZE; + break; + case 0x15: + max_size = F15H_MPB_MAX_SIZE; + break; + case 0x16: + max_size = F16H_MPB_MAX_SIZE; + break; + default: + max_size = F1XH_MPB_MAX_SIZE; + break; + } + + if (patch_size > min_t(u32, size, max_size)) { + pr_err("patch size mismatch\n"); + return 0; + } + + return patch_size; +} + +int __apply_microcode_amd(struct microcode_amd *mc_amd) +{ + u32 rev, dummy; + + native_wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code); + + /* verify patch application was successful */ + native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); + if (rev != mc_amd->hdr.patch_id) + return -1; + + return 0; +} + +int apply_microcode_amd(int cpu) +{ + struct cpuinfo_x86 *c = &cpu_data(cpu); + struct microcode_amd *mc_amd; + struct ucode_cpu_info *uci; + struct ucode_patch *p; + u32 rev, dummy; + + BUG_ON(raw_smp_processor_id() != cpu); + + uci = ucode_cpu_info + cpu; + + p = find_patch(cpu); + if (!p) + return 0; + + mc_amd = p->data; + uci->mc = p->data; + + rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); + + /* need to apply patch? */ + if (rev >= mc_amd->hdr.patch_id) { + c->microcode = rev; + uci->cpu_sig.rev = rev; + return 0; + } + + if (__apply_microcode_amd(mc_amd)) { + pr_err("CPU%d: update failed for patch_level=0x%08x\n", + cpu, mc_amd->hdr.patch_id); + return -1; + } + pr_info("CPU%d: new patch_level=0x%08x\n", cpu, + mc_amd->hdr.patch_id); + + uci->cpu_sig.rev = mc_amd->hdr.patch_id; + c->microcode = mc_amd->hdr.patch_id; + + return 0; +} + +static int install_equiv_cpu_table(const u8 *buf) +{ + unsigned int *ibuf = (unsigned int *)buf; + unsigned int type = ibuf[1]; + unsigned int size = ibuf[2]; + + if (type != UCODE_EQUIV_CPU_TABLE_TYPE || !size) { + pr_err("empty section/" + "invalid type field in container file section header\n"); + return -EINVAL; + } + + equiv_cpu_table = vmalloc(size); + if (!equiv_cpu_table) { + pr_err("failed to allocate equivalent CPU table\n"); + return -ENOMEM; + } + + memcpy(equiv_cpu_table, buf + CONTAINER_HDR_SZ, size); + + /* add header length */ + return size + CONTAINER_HDR_SZ; +} + +static void free_equiv_cpu_table(void) +{ + vfree(equiv_cpu_table); + equiv_cpu_table = NULL; +} + +static void cleanup(void) +{ + free_equiv_cpu_table(); + free_cache(); +} + +/* + * We return the current size even if some of the checks failed so that + * we can skip over the next patch. If we return a negative value, we + * signal a grave error like a memory allocation has failed and the + * driver cannot continue functioning normally. In such cases, we tear + * down everything we've used up so far and exit. + */ +static int verify_and_add_patch(u8 family, u8 *fw, unsigned int leftover) +{ + struct microcode_header_amd *mc_hdr; + struct ucode_patch *patch; + unsigned int patch_size, crnt_size, ret; + u32 proc_fam; + u16 proc_id; + + patch_size = *(u32 *)(fw + 4); + crnt_size = patch_size + SECTION_HDR_SIZE; + mc_hdr = (struct microcode_header_amd *)(fw + SECTION_HDR_SIZE); + proc_id = mc_hdr->processor_rev_id; + + proc_fam = find_cpu_family_by_equiv_cpu(proc_id); + if (!proc_fam) { + pr_err("No patch family for equiv ID: 0x%04x\n", proc_id); + return crnt_size; + } + + /* check if patch is for the current family */ + proc_fam = ((proc_fam >> 8) & 0xf) + ((proc_fam >> 20) & 0xff); + if (proc_fam != family) + return crnt_size; + + if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) { + pr_err("Patch-ID 0x%08x: chipset-specific code unsupported.\n", + mc_hdr->patch_id); + return crnt_size; + } + + ret = verify_patch_size(family, patch_size, leftover); + if (!ret) { + pr_err("Patch-ID 0x%08x: size mismatch.\n", mc_hdr->patch_id); + return crnt_size; + } + + patch = kzalloc(sizeof(*patch), GFP_KERNEL); + if (!patch) { + pr_err("Patch allocation failure.\n"); + return -EINVAL; + } + + patch->data = kzalloc(patch_size, GFP_KERNEL); + if (!patch->data) { + pr_err("Patch data allocation failure.\n"); + kfree(patch); + return -EINVAL; + } + + /* All looks ok, copy patch... */ + memcpy(patch->data, fw + SECTION_HDR_SIZE, patch_size); + INIT_LIST_HEAD(&patch->plist); + patch->patch_id = mc_hdr->patch_id; + patch->equiv_cpu = proc_id; + + pr_debug("%s: Added patch_id: 0x%08x, proc_id: 0x%04x\n", + __func__, patch->patch_id, proc_id); + + /* ... and add to cache. */ + update_cache(patch); + + return crnt_size; +} + +static enum ucode_state __load_microcode_amd(u8 family, const u8 *data, + size_t size) +{ + enum ucode_state ret = UCODE_ERROR; + unsigned int leftover; + u8 *fw = (u8 *)data; + int crnt_size = 0; + int offset; + + offset = install_equiv_cpu_table(data); + if (offset < 0) { + pr_err("failed to create equivalent cpu table\n"); + return ret; + } + fw += offset; + leftover = size - offset; + + if (*(u32 *)fw != UCODE_UCODE_TYPE) { + pr_err("invalid type field in container file section header\n"); + free_equiv_cpu_table(); + return ret; + } + + while (leftover) { + crnt_size = verify_and_add_patch(family, fw, leftover); + if (crnt_size < 0) + return ret; + + fw += crnt_size; + leftover -= crnt_size; + } + + return UCODE_OK; +} + +enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size) +{ + enum ucode_state ret; + + /* free old equiv table */ + free_equiv_cpu_table(); + + ret = __load_microcode_amd(family, data, size); + + if (ret != UCODE_OK) + cleanup(); + +#if defined(CONFIG_MICROCODE_AMD_EARLY) && defined(CONFIG_X86_32) + /* save BSP's matching patch for early load */ + if (cpu_data(smp_processor_id()).cpu_index == boot_cpu_data.cpu_index) { + struct ucode_patch *p = find_patch(smp_processor_id()); + if (p) { + memset(amd_ucode_patch, 0, PATCH_MAX_SIZE); + memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data), + PATCH_MAX_SIZE)); + } + } +#endif + return ret; +} + +/* + * AMD microcode firmware naming convention, up to family 15h they are in + * the legacy file: + * + * amd-ucode/microcode_amd.bin + * + * This legacy file is always smaller than 2K in size. + * + * Beginning with family 15h, they are in family-specific firmware files: + * + * amd-ucode/microcode_amd_fam15h.bin + * amd-ucode/microcode_amd_fam16h.bin + * ... + * + * These might be larger than 2K. + */ +static enum ucode_state request_microcode_amd(int cpu, struct device *device, + bool refresh_fw) +{ + char fw_name[36] = "amd-ucode/microcode_amd.bin"; + struct cpuinfo_x86 *c = &cpu_data(cpu); + enum ucode_state ret = UCODE_NFOUND; + const struct firmware *fw; + + /* reload ucode container only on the boot cpu */ + if (!refresh_fw || c->cpu_index != boot_cpu_data.cpu_index) + return UCODE_OK; + + if (c->x86 >= 0x15) + snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86); + + if (request_firmware(&fw, (const char *)fw_name, device)) { + pr_debug("failed to load file %s\n", fw_name); + goto out; + } + + ret = UCODE_ERROR; + if (*(u32 *)fw->data != UCODE_MAGIC) { + pr_err("invalid magic value (0x%08x)\n", *(u32 *)fw->data); + goto fw_release; + } + + ret = load_microcode_amd(c->x86, fw->data, fw->size); + + fw_release: + release_firmware(fw); + + out: + return ret; +} + +static enum ucode_state +request_microcode_user(int cpu, const void __user *buf, size_t size) +{ + return UCODE_ERROR; +} + +static void microcode_fini_cpu_amd(int cpu) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + uci->mc = NULL; +} + +static struct microcode_ops microcode_amd_ops = { + .request_microcode_user = request_microcode_user, + .request_microcode_fw = request_microcode_amd, + .collect_cpu_info = collect_cpu_info_amd, + .apply_microcode = apply_microcode_amd, + .microcode_fini_cpu = microcode_fini_cpu_amd, +}; + +struct microcode_ops * __init init_amd_microcode(void) +{ + struct cpuinfo_x86 *c = &cpu_data(0); + + if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { + pr_warning("AMD CPU family 0x%x not supported\n", c->x86); + return NULL; + } + + return µcode_amd_ops; +} + +void __exit exit_amd_microcode(void) +{ + cleanup(); +} diff --git a/arch/x86/kernel/cpu/microcode/amd_early.c b/arch/x86/kernel/cpu/microcode/amd_early.c new file mode 100644 index 000000000000..8384c0fa206f --- /dev/null +++ b/arch/x86/kernel/cpu/microcode/amd_early.c @@ -0,0 +1,380 @@ +/* + * Copyright (C) 2013 Advanced Micro Devices, Inc. + * + * Author: Jacob Shin + * Fixes: Borislav Petkov + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include + +#include +#include +#include + +/* + * This points to the current valid container of microcode patches which we will + * save from the initrd before jettisoning its contents. + */ +static u8 *container; +static size_t container_size; + +static u32 ucode_new_rev; +u8 amd_ucode_patch[PATCH_MAX_SIZE]; +static u16 this_equiv_id; + +struct cpio_data ucode_cpio; + +/* + * Microcode patch container file is prepended to the initrd in cpio format. + * See Documentation/x86/early-microcode.txt + */ +static __initdata char ucode_path[] = "kernel/x86/microcode/AuthenticAMD.bin"; + +static struct cpio_data __init find_ucode_in_initrd(void) +{ + long offset = 0; + char *path; + void *start; + size_t size; + +#ifdef CONFIG_X86_32 + struct boot_params *p; + + /* + * On 32-bit, early load occurs before paging is turned on so we need + * to use physical addresses. + */ + p = (struct boot_params *)__pa_nodebug(&boot_params); + path = (char *)__pa_nodebug(ucode_path); + start = (void *)p->hdr.ramdisk_image; + size = p->hdr.ramdisk_size; +#else + path = ucode_path; + start = (void *)(boot_params.hdr.ramdisk_image + PAGE_OFFSET); + size = boot_params.hdr.ramdisk_size; +#endif + + return find_cpio_data(path, start, size, &offset); +} + +static size_t compute_container_size(u8 *data, u32 total_size) +{ + size_t size = 0; + u32 *header = (u32 *)data; + + if (header[0] != UCODE_MAGIC || + header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */ + header[2] == 0) /* size */ + return size; + + size = header[2] + CONTAINER_HDR_SZ; + total_size -= size; + data += size; + + while (total_size) { + u16 patch_size; + + header = (u32 *)data; + + if (header[0] != UCODE_UCODE_TYPE) + break; + + /* + * Sanity-check patch size. + */ + patch_size = header[1]; + if (patch_size > PATCH_MAX_SIZE) + break; + + size += patch_size + SECTION_HDR_SIZE; + data += patch_size + SECTION_HDR_SIZE; + total_size -= patch_size + SECTION_HDR_SIZE; + } + + return size; +} + +/* + * Early load occurs before we can vmalloc(). So we look for the microcode + * patch container file in initrd, traverse equivalent cpu table, look for a + * matching microcode patch, and update, all in initrd memory in place. + * When vmalloc() is available for use later -- on 64-bit during first AP load, + * and on 32-bit during save_microcode_in_initrd_amd() -- we can call + * load_microcode_amd() to save equivalent cpu table and microcode patches in + * kernel heap memory. + */ +static void apply_ucode_in_initrd(void *ucode, size_t size) +{ + struct equiv_cpu_entry *eq; + size_t *cont_sz; + u32 *header; + u8 *data, **cont; + u16 eq_id = 0; + int offset, left; + u32 rev, eax, ebx, ecx, edx; + u32 *new_rev; + +#ifdef CONFIG_X86_32 + new_rev = (u32 *)__pa_nodebug(&ucode_new_rev); + cont_sz = (size_t *)__pa_nodebug(&container_size); + cont = (u8 **)__pa_nodebug(&container); +#else + new_rev = &ucode_new_rev; + cont_sz = &container_size; + cont = &container; +#endif + + data = ucode; + left = size; + header = (u32 *)data; + + /* find equiv cpu table */ + if (header[0] != UCODE_MAGIC || + header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */ + header[2] == 0) /* size */ + return; + + eax = 0x00000001; + ecx = 0; + native_cpuid(&eax, &ebx, &ecx, &edx); + + while (left > 0) { + eq = (struct equiv_cpu_entry *)(data + CONTAINER_HDR_SZ); + + *cont = data; + + /* Advance past the container header */ + offset = header[2] + CONTAINER_HDR_SZ; + data += offset; + left -= offset; + + eq_id = find_equiv_id(eq, eax); + if (eq_id) { + this_equiv_id = eq_id; + *cont_sz = compute_container_size(*cont, left + offset); + + /* + * truncate how much we need to iterate over in the + * ucode update loop below + */ + left = *cont_sz - offset; + break; + } + + /* + * support multiple container files appended together. if this + * one does not have a matching equivalent cpu entry, we fast + * forward to the next container file. + */ + while (left > 0) { + header = (u32 *)data; + if (header[0] == UCODE_MAGIC && + header[1] == UCODE_EQUIV_CPU_TABLE_TYPE) + break; + + offset = header[1] + SECTION_HDR_SIZE; + data += offset; + left -= offset; + } + + /* mark where the next microcode container file starts */ + offset = data - (u8 *)ucode; + ucode = data; + } + + if (!eq_id) { + *cont = NULL; + *cont_sz = 0; + return; + } + + /* find ucode and update if needed */ + + native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax); + + while (left > 0) { + struct microcode_amd *mc; + + header = (u32 *)data; + if (header[0] != UCODE_UCODE_TYPE || /* type */ + header[1] == 0) /* size */ + break; + + mc = (struct microcode_amd *)(data + SECTION_HDR_SIZE); + + if (eq_id == mc->hdr.processor_rev_id && rev < mc->hdr.patch_id) { + + if (!__apply_microcode_amd(mc)) { + rev = mc->hdr.patch_id; + *new_rev = rev; + + /* save ucode patch */ + memcpy(amd_ucode_patch, mc, + min_t(u32, header[1], PATCH_MAX_SIZE)); + } + } + + offset = header[1] + SECTION_HDR_SIZE; + data += offset; + left -= offset; + } +} + +void __init load_ucode_amd_bsp(void) +{ + struct cpio_data cp; + void **data; + size_t *size; + +#ifdef CONFIG_X86_32 + data = (void **)__pa_nodebug(&ucode_cpio.data); + size = (size_t *)__pa_nodebug(&ucode_cpio.size); +#else + data = &ucode_cpio.data; + size = &ucode_cpio.size; +#endif + + cp = find_ucode_in_initrd(); + if (!cp.data) + return; + + *data = cp.data; + *size = cp.size; + + apply_ucode_in_initrd(cp.data, cp.size); +} + +#ifdef CONFIG_X86_32 +/* + * On 32-bit, since AP's early load occurs before paging is turned on, we + * cannot traverse cpu_equiv_table and pcache in kernel heap memory. So during + * cold boot, AP will apply_ucode_in_initrd() just like the BSP. During + * save_microcode_in_initrd_amd() BSP's patch is copied to amd_ucode_patch, + * which is used upon resume from suspend. + */ +void load_ucode_amd_ap(void) +{ + struct microcode_amd *mc; + size_t *usize; + void **ucode; + + mc = (struct microcode_amd *)__pa(amd_ucode_patch); + if (mc->hdr.patch_id && mc->hdr.processor_rev_id) { + __apply_microcode_amd(mc); + return; + } + + ucode = (void *)__pa_nodebug(&container); + usize = (size_t *)__pa_nodebug(&container_size); + + if (!*ucode || !*usize) + return; + + apply_ucode_in_initrd(*ucode, *usize); +} + +static void __init collect_cpu_sig_on_bsp(void *arg) +{ + unsigned int cpu = smp_processor_id(); + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + uci->cpu_sig.sig = cpuid_eax(0x00000001); +} +#else +void load_ucode_amd_ap(void) +{ + unsigned int cpu = smp_processor_id(); + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + struct equiv_cpu_entry *eq; + struct microcode_amd *mc; + u32 rev, eax; + u16 eq_id; + + /* Exit if called on the BSP. */ + if (!cpu) + return; + + if (!container) + return; + + rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax); + + uci->cpu_sig.rev = rev; + uci->cpu_sig.sig = eax; + + eax = cpuid_eax(0x00000001); + eq = (struct equiv_cpu_entry *)(container + CONTAINER_HDR_SZ); + + eq_id = find_equiv_id(eq, eax); + if (!eq_id) + return; + + if (eq_id == this_equiv_id) { + mc = (struct microcode_amd *)amd_ucode_patch; + + if (mc && rev < mc->hdr.patch_id) { + if (!__apply_microcode_amd(mc)) + ucode_new_rev = mc->hdr.patch_id; + } + + } else { + if (!ucode_cpio.data) + return; + + /* + * AP has a different equivalence ID than BSP, looks like + * mixed-steppings silicon so go through the ucode blob anew. + */ + apply_ucode_in_initrd(ucode_cpio.data, ucode_cpio.size); + } +} +#endif + +int __init save_microcode_in_initrd_amd(void) +{ + enum ucode_state ret; + u32 eax; + +#ifdef CONFIG_X86_32 + unsigned int bsp = boot_cpu_data.cpu_index; + struct ucode_cpu_info *uci = ucode_cpu_info + bsp; + + if (!uci->cpu_sig.sig) + smp_call_function_single(bsp, collect_cpu_sig_on_bsp, NULL, 1); + + /* + * Take into account the fact that the ramdisk might get relocated + * and therefore we need to recompute the container's position in + * virtual memory space. + */ + container = (u8 *)(__va((u32)relocated_ramdisk) + + ((u32)container - boot_params.hdr.ramdisk_image)); +#endif + if (ucode_new_rev) + pr_info("microcode: updated early to new patch_level=0x%08x\n", + ucode_new_rev); + + if (!container) + return -EINVAL; + + eax = cpuid_eax(0x00000001); + eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff); + + ret = load_microcode_amd(eax, container, container_size); + if (ret != UCODE_OK) + return -EINVAL; + + /* + * This will be freed any msec now, stash patches for the current + * family and switch to patch cache for cpu hotplug, etc later. + */ + container = NULL; + container_size = 0; + + return 0; +} diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c new file mode 100644 index 000000000000..15c987698b0f --- /dev/null +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -0,0 +1,645 @@ +/* + * Intel CPU Microcode Update Driver for Linux + * + * Copyright (C) 2000-2006 Tigran Aivazian + * 2006 Shaohua Li + * + * This driver allows to upgrade microcode on Intel processors + * belonging to IA-32 family - PentiumPro, Pentium II, + * Pentium III, Xeon, Pentium 4, etc. + * + * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture + * Software Developer's Manual + * Order Number 253668 or free download from: + * + * http://developer.intel.com/Assets/PDF/manual/253668.pdf + * + * For more information, go to http://www.urbanmyth.org/microcode + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * 1.0 16 Feb 2000, Tigran Aivazian + * Initial release. + * 1.01 18 Feb 2000, Tigran Aivazian + * Added read() support + cleanups. + * 1.02 21 Feb 2000, Tigran Aivazian + * Added 'device trimming' support. open(O_WRONLY) zeroes + * and frees the saved copy of applied microcode. + * 1.03 29 Feb 2000, Tigran Aivazian + * Made to use devfs (/dev/cpu/microcode) + cleanups. + * 1.04 06 Jun 2000, Simon Trimmer + * Added misc device support (now uses both devfs and misc). + * Added MICROCODE_IOCFREE ioctl to clear memory. + * 1.05 09 Jun 2000, Simon Trimmer + * Messages for error cases (non Intel & no suitable microcode). + * 1.06 03 Aug 2000, Tigran Aivazian + * Removed ->release(). Removed exclusive open and status bitmap. + * Added microcode_rwsem to serialize read()/write()/ioctl(). + * Removed global kernel lock usage. + * 1.07 07 Sep 2000, Tigran Aivazian + * Write 0 to 0x8B msr and then cpuid before reading revision, + * so that it works even if there were no update done by the + * BIOS. Otherwise, reading from 0x8B gives junk (which happened + * to be 0 on my machine which is why it worked even when I + * disabled update by the BIOS) + * Thanks to Eric W. Biederman for the fix. + * 1.08 11 Dec 2000, Richard Schaal and + * Tigran Aivazian + * Intel Pentium 4 processor support and bugfixes. + * 1.09 30 Oct 2001, Tigran Aivazian + * Bugfix for HT (Hyper-Threading) enabled processors + * whereby processor resources are shared by all logical processors + * in a single CPU package. + * 1.10 28 Feb 2002 Asit K Mallick and + * Tigran Aivazian , + * Serialize updates as required on HT processors due to + * speculative nature of implementation. + * 1.11 22 Mar 2002 Tigran Aivazian + * Fix the panic when writing zero-length microcode chunk. + * 1.12 29 Sep 2003 Nitin Kamble , + * Jun Nakajima + * Support for the microcode updates in the new format. + * 1.13 10 Oct 2003 Tigran Aivazian + * Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl + * because we no longer hold a copy of applied microcode + * in kernel memory. + * 1.14 25 Jun 2004 Tigran Aivazian + * Fix sigmatch() macro to handle old CPUs with pf == 0. + * Thanks to Stuart Swales for pointing out this bug. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +MODULE_DESCRIPTION("Microcode Update Driver"); +MODULE_AUTHOR("Tigran Aivazian "); +MODULE_LICENSE("GPL"); + +#define MICROCODE_VERSION "2.00" + +static struct microcode_ops *microcode_ops; + +/* + * Synchronization. + * + * All non cpu-hotplug-callback call sites use: + * + * - microcode_mutex to synchronize with each other; + * - get/put_online_cpus() to synchronize with + * the cpu-hotplug-callback call sites. + * + * We guarantee that only a single cpu is being + * updated at any particular moment of time. + */ +static DEFINE_MUTEX(microcode_mutex); + +struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; +EXPORT_SYMBOL_GPL(ucode_cpu_info); + +/* + * Operations that are run on a target cpu: + */ + +struct cpu_info_ctx { + struct cpu_signature *cpu_sig; + int err; +}; + +static void collect_cpu_info_local(void *arg) +{ + struct cpu_info_ctx *ctx = arg; + + ctx->err = microcode_ops->collect_cpu_info(smp_processor_id(), + ctx->cpu_sig); +} + +static int collect_cpu_info_on_target(int cpu, struct cpu_signature *cpu_sig) +{ + struct cpu_info_ctx ctx = { .cpu_sig = cpu_sig, .err = 0 }; + int ret; + + ret = smp_call_function_single(cpu, collect_cpu_info_local, &ctx, 1); + if (!ret) + ret = ctx.err; + + return ret; +} + +static int collect_cpu_info(int cpu) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + int ret; + + memset(uci, 0, sizeof(*uci)); + + ret = collect_cpu_info_on_target(cpu, &uci->cpu_sig); + if (!ret) + uci->valid = 1; + + return ret; +} + +struct apply_microcode_ctx { + int err; +}; + +static void apply_microcode_local(void *arg) +{ + struct apply_microcode_ctx *ctx = arg; + + ctx->err = microcode_ops->apply_microcode(smp_processor_id()); +} + +static int apply_microcode_on_target(int cpu) +{ + struct apply_microcode_ctx ctx = { .err = 0 }; + int ret; + + ret = smp_call_function_single(cpu, apply_microcode_local, &ctx, 1); + if (!ret) + ret = ctx.err; + + return ret; +} + +#ifdef CONFIG_MICROCODE_OLD_INTERFACE +static int do_microcode_update(const void __user *buf, size_t size) +{ + int error = 0; + int cpu; + + for_each_online_cpu(cpu) { + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + enum ucode_state ustate; + + if (!uci->valid) + continue; + + ustate = microcode_ops->request_microcode_user(cpu, buf, size); + if (ustate == UCODE_ERROR) { + error = -1; + break; + } else if (ustate == UCODE_OK) + apply_microcode_on_target(cpu); + } + + return error; +} + +static int microcode_open(struct inode *inode, struct file *file) +{ + return capable(CAP_SYS_RAWIO) ? nonseekable_open(inode, file) : -EPERM; +} + +static ssize_t microcode_write(struct file *file, const char __user *buf, + size_t len, loff_t *ppos) +{ + ssize_t ret = -EINVAL; + + if ((len >> PAGE_SHIFT) > totalram_pages) { + pr_err("too much data (max %ld pages)\n", totalram_pages); + return ret; + } + + get_online_cpus(); + mutex_lock(µcode_mutex); + + if (do_microcode_update(buf, len) == 0) + ret = (ssize_t)len; + + if (ret > 0) + perf_check_microcode(); + + mutex_unlock(µcode_mutex); + put_online_cpus(); + + return ret; +} + +static const struct file_operations microcode_fops = { + .owner = THIS_MODULE, + .write = microcode_write, + .open = microcode_open, + .llseek = no_llseek, +}; + +static struct miscdevice microcode_dev = { + .minor = MICROCODE_MINOR, + .name = "microcode", + .nodename = "cpu/microcode", + .fops = µcode_fops, +}; + +static int __init microcode_dev_init(void) +{ + int error; + + error = misc_register(µcode_dev); + if (error) { + pr_err("can't misc_register on minor=%d\n", MICROCODE_MINOR); + return error; + } + + return 0; +} + +static void __exit microcode_dev_exit(void) +{ + misc_deregister(µcode_dev); +} + +MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); +MODULE_ALIAS("devname:cpu/microcode"); +#else +#define microcode_dev_init() 0 +#define microcode_dev_exit() do { } while (0) +#endif + +/* fake device for request_firmware */ +static struct platform_device *microcode_pdev; + +static int reload_for_cpu(int cpu) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + enum ucode_state ustate; + int err = 0; + + if (!uci->valid) + return err; + + ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, true); + if (ustate == UCODE_OK) + apply_microcode_on_target(cpu); + else + if (ustate == UCODE_ERROR) + err = -EINVAL; + return err; +} + +static ssize_t reload_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + unsigned long val; + int cpu; + ssize_t ret = 0, tmp_ret; + + ret = kstrtoul(buf, 0, &val); + if (ret) + return ret; + + if (val != 1) + return size; + + get_online_cpus(); + mutex_lock(µcode_mutex); + for_each_online_cpu(cpu) { + tmp_ret = reload_for_cpu(cpu); + if (tmp_ret != 0) + pr_warn("Error reloading microcode on CPU %d\n", cpu); + + /* save retval of the first encountered reload error */ + if (!ret) + ret = tmp_ret; + } + if (!ret) + perf_check_microcode(); + mutex_unlock(µcode_mutex); + put_online_cpus(); + + if (!ret) + ret = size; + + return ret; +} + +static ssize_t version_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; + + return sprintf(buf, "0x%x\n", uci->cpu_sig.rev); +} + +static ssize_t pf_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; + + return sprintf(buf, "0x%x\n", uci->cpu_sig.pf); +} + +static DEVICE_ATTR(reload, 0200, NULL, reload_store); +static DEVICE_ATTR(version, 0400, version_show, NULL); +static DEVICE_ATTR(processor_flags, 0400, pf_show, NULL); + +static struct attribute *mc_default_attrs[] = { + &dev_attr_version.attr, + &dev_attr_processor_flags.attr, + NULL +}; + +static struct attribute_group mc_attr_group = { + .attrs = mc_default_attrs, + .name = "microcode", +}; + +static void microcode_fini_cpu(int cpu) +{ + microcode_ops->microcode_fini_cpu(cpu); +} + +static enum ucode_state microcode_resume_cpu(int cpu) +{ + pr_debug("CPU%d updated upon resume\n", cpu); + + if (apply_microcode_on_target(cpu)) + return UCODE_ERROR; + + return UCODE_OK; +} + +static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw) +{ + enum ucode_state ustate; + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + if (uci && uci->valid) + return UCODE_OK; + + if (collect_cpu_info(cpu)) + return UCODE_ERROR; + + /* --dimm. Trigger a delayed update? */ + if (system_state != SYSTEM_RUNNING) + return UCODE_NFOUND; + + ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, + refresh_fw); + + if (ustate == UCODE_OK) { + pr_debug("CPU%d updated upon init\n", cpu); + apply_microcode_on_target(cpu); + } + + return ustate; +} + +static enum ucode_state microcode_update_cpu(int cpu) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + if (uci->valid) + return microcode_resume_cpu(cpu); + + return microcode_init_cpu(cpu, false); +} + +static int mc_device_add(struct device *dev, struct subsys_interface *sif) +{ + int err, cpu = dev->id; + + if (!cpu_online(cpu)) + return 0; + + pr_debug("CPU%d added\n", cpu); + + err = sysfs_create_group(&dev->kobj, &mc_attr_group); + if (err) + return err; + + if (microcode_init_cpu(cpu, true) == UCODE_ERROR) + return -EINVAL; + + return err; +} + +static int mc_device_remove(struct device *dev, struct subsys_interface *sif) +{ + int cpu = dev->id; + + if (!cpu_online(cpu)) + return 0; + + pr_debug("CPU%d removed\n", cpu); + microcode_fini_cpu(cpu); + sysfs_remove_group(&dev->kobj, &mc_attr_group); + return 0; +} + +static struct subsys_interface mc_cpu_interface = { + .name = "microcode", + .subsys = &cpu_subsys, + .add_dev = mc_device_add, + .remove_dev = mc_device_remove, +}; + +/** + * mc_bp_resume - Update boot CPU microcode during resume. + */ +static void mc_bp_resume(void) +{ + int cpu = smp_processor_id(); + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + if (uci->valid && uci->mc) + microcode_ops->apply_microcode(cpu); +} + +static struct syscore_ops mc_syscore_ops = { + .resume = mc_bp_resume, +}; + +static int +mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + struct device *dev; + + dev = get_cpu_device(cpu); + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_ONLINE: + microcode_update_cpu(cpu); + pr_debug("CPU%d added\n", cpu); + /* + * "break" is missing on purpose here because we want to fall + * through in order to create the sysfs group. + */ + + case CPU_DOWN_FAILED: + if (sysfs_create_group(&dev->kobj, &mc_attr_group)) + pr_err("Failed to create group for CPU%d\n", cpu); + break; + + case CPU_DOWN_PREPARE: + /* Suspend is in progress, only remove the interface */ + sysfs_remove_group(&dev->kobj, &mc_attr_group); + pr_debug("CPU%d removed\n", cpu); + break; + + /* + * case CPU_DEAD: + * + * When a CPU goes offline, don't free up or invalidate the copy of + * the microcode in kernel memory, so that we can reuse it when the + * CPU comes back online without unnecessarily requesting the userspace + * for it again. + */ + } + + /* The CPU refused to come up during a system resume */ + if (action == CPU_UP_CANCELED_FROZEN) + microcode_fini_cpu(cpu); + + return NOTIFY_OK; +} + +static struct notifier_block __refdata mc_cpu_notifier = { + .notifier_call = mc_cpu_callback, +}; + +#ifdef MODULE +/* Autoload on Intel and AMD systems */ +static const struct x86_cpu_id __initconst microcode_id[] = { +#ifdef CONFIG_MICROCODE_INTEL + { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, }, +#endif +#ifdef CONFIG_MICROCODE_AMD + { X86_VENDOR_AMD, X86_FAMILY_ANY, X86_MODEL_ANY, }, +#endif + {} +}; +MODULE_DEVICE_TABLE(x86cpu, microcode_id); +#endif + +static struct attribute *cpu_root_microcode_attrs[] = { + &dev_attr_reload.attr, + NULL +}; + +static struct attribute_group cpu_root_microcode_group = { + .name = "microcode", + .attrs = cpu_root_microcode_attrs, +}; + +static int __init microcode_init(void) +{ + struct cpuinfo_x86 *c = &cpu_data(0); + int error; + + if (c->x86_vendor == X86_VENDOR_INTEL) + microcode_ops = init_intel_microcode(); + else if (c->x86_vendor == X86_VENDOR_AMD) + microcode_ops = init_amd_microcode(); + else + pr_err("no support for this CPU vendor\n"); + + if (!microcode_ops) + return -ENODEV; + + microcode_pdev = platform_device_register_simple("microcode", -1, + NULL, 0); + if (IS_ERR(microcode_pdev)) + return PTR_ERR(microcode_pdev); + + get_online_cpus(); + mutex_lock(µcode_mutex); + + error = subsys_interface_register(&mc_cpu_interface); + if (!error) + perf_check_microcode(); + mutex_unlock(µcode_mutex); + put_online_cpus(); + + if (error) + goto out_pdev; + + error = sysfs_create_group(&cpu_subsys.dev_root->kobj, + &cpu_root_microcode_group); + + if (error) { + pr_err("Error creating microcode group!\n"); + goto out_driver; + } + + error = microcode_dev_init(); + if (error) + goto out_ucode_group; + + register_syscore_ops(&mc_syscore_ops); + register_hotcpu_notifier(&mc_cpu_notifier); + + pr_info("Microcode Update Driver: v" MICROCODE_VERSION + " , Peter Oruba\n"); + + return 0; + + out_ucode_group: + sysfs_remove_group(&cpu_subsys.dev_root->kobj, + &cpu_root_microcode_group); + + out_driver: + get_online_cpus(); + mutex_lock(µcode_mutex); + + subsys_interface_unregister(&mc_cpu_interface); + + mutex_unlock(µcode_mutex); + put_online_cpus(); + + out_pdev: + platform_device_unregister(microcode_pdev); + return error; + +} +module_init(microcode_init); + +static void __exit microcode_exit(void) +{ + struct cpuinfo_x86 *c = &cpu_data(0); + + microcode_dev_exit(); + + unregister_hotcpu_notifier(&mc_cpu_notifier); + unregister_syscore_ops(&mc_syscore_ops); + + sysfs_remove_group(&cpu_subsys.dev_root->kobj, + &cpu_root_microcode_group); + + get_online_cpus(); + mutex_lock(µcode_mutex); + + subsys_interface_unregister(&mc_cpu_interface); + + mutex_unlock(µcode_mutex); + put_online_cpus(); + + platform_device_unregister(microcode_pdev); + + microcode_ops = NULL; + + if (c->x86_vendor == X86_VENDOR_AMD) + exit_amd_microcode(); + + pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n"); +} +module_exit(microcode_exit); diff --git a/arch/x86/kernel/cpu/microcode/core_early.c b/arch/x86/kernel/cpu/microcode/core_early.c new file mode 100644 index 000000000000..be7f8514f577 --- /dev/null +++ b/arch/x86/kernel/cpu/microcode/core_early.c @@ -0,0 +1,141 @@ +/* + * X86 CPU microcode early update for Linux + * + * Copyright (C) 2012 Fenghua Yu + * H Peter Anvin" + * + * This driver allows to early upgrade microcode on Intel processors + * belonging to IA-32 family - PentiumPro, Pentium II, + * Pentium III, Xeon, Pentium 4, etc. + * + * Reference: Section 9.11 of Volume 3, IA-32 Intel Architecture + * Software Developer's Manual. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include + +#define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24)) +#define CPUID_INTEL1 QCHAR('G', 'e', 'n', 'u') +#define CPUID_INTEL2 QCHAR('i', 'n', 'e', 'I') +#define CPUID_INTEL3 QCHAR('n', 't', 'e', 'l') +#define CPUID_AMD1 QCHAR('A', 'u', 't', 'h') +#define CPUID_AMD2 QCHAR('e', 'n', 't', 'i') +#define CPUID_AMD3 QCHAR('c', 'A', 'M', 'D') + +#define CPUID_IS(a, b, c, ebx, ecx, edx) \ + (!((ebx ^ (a))|(edx ^ (b))|(ecx ^ (c)))) + +/* + * In early loading microcode phase on BSP, boot_cpu_data is not set up yet. + * x86_vendor() gets vendor id for BSP. + * + * In 32 bit AP case, accessing boot_cpu_data needs linear address. To simplify + * coding, we still use x86_vendor() to get vendor id for AP. + * + * x86_vendor() gets vendor information directly through cpuid. + */ +static int x86_vendor(void) +{ + u32 eax = 0x00000000; + u32 ebx, ecx = 0, edx; + + native_cpuid(&eax, &ebx, &ecx, &edx); + + if (CPUID_IS(CPUID_INTEL1, CPUID_INTEL2, CPUID_INTEL3, ebx, ecx, edx)) + return X86_VENDOR_INTEL; + + if (CPUID_IS(CPUID_AMD1, CPUID_AMD2, CPUID_AMD3, ebx, ecx, edx)) + return X86_VENDOR_AMD; + + return X86_VENDOR_UNKNOWN; +} + +static int x86_family(void) +{ + u32 eax = 0x00000001; + u32 ebx, ecx = 0, edx; + int x86; + + native_cpuid(&eax, &ebx, &ecx, &edx); + + x86 = (eax >> 8) & 0xf; + if (x86 == 15) + x86 += (eax >> 20) & 0xff; + + return x86; +} + +void __init load_ucode_bsp(void) +{ + int vendor, x86; + + if (!have_cpuid_p()) + return; + + vendor = x86_vendor(); + x86 = x86_family(); + + switch (vendor) { + case X86_VENDOR_INTEL: + if (x86 >= 6) + load_ucode_intel_bsp(); + break; + case X86_VENDOR_AMD: + if (x86 >= 0x10) + load_ucode_amd_bsp(); + break; + default: + break; + } +} + +void load_ucode_ap(void) +{ + int vendor, x86; + + if (!have_cpuid_p()) + return; + + vendor = x86_vendor(); + x86 = x86_family(); + + switch (vendor) { + case X86_VENDOR_INTEL: + if (x86 >= 6) + load_ucode_intel_ap(); + break; + case X86_VENDOR_AMD: + if (x86 >= 0x10) + load_ucode_amd_ap(); + break; + default: + break; + } +} + +int __init save_microcode_in_initrd(void) +{ + struct cpuinfo_x86 *c = &boot_cpu_data; + + switch (c->x86_vendor) { + case X86_VENDOR_INTEL: + if (c->x86 >= 6) + save_microcode_in_initrd_intel(); + break; + case X86_VENDOR_AMD: + if (c->x86 >= 0x10) + save_microcode_in_initrd_amd(); + break; + default: + break; + } + + return 0; +} diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c new file mode 100644 index 000000000000..5fb2cebf556b --- /dev/null +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -0,0 +1,333 @@ +/* + * Intel CPU Microcode Update Driver for Linux + * + * Copyright (C) 2000-2006 Tigran Aivazian + * 2006 Shaohua Li + * + * This driver allows to upgrade microcode on Intel processors + * belonging to IA-32 family - PentiumPro, Pentium II, + * Pentium III, Xeon, Pentium 4, etc. + * + * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture + * Software Developer's Manual + * Order Number 253668 or free download from: + * + * http://developer.intel.com/Assets/PDF/manual/253668.pdf + * + * For more information, go to http://www.urbanmyth.org/microcode + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * 1.0 16 Feb 2000, Tigran Aivazian + * Initial release. + * 1.01 18 Feb 2000, Tigran Aivazian + * Added read() support + cleanups. + * 1.02 21 Feb 2000, Tigran Aivazian + * Added 'device trimming' support. open(O_WRONLY) zeroes + * and frees the saved copy of applied microcode. + * 1.03 29 Feb 2000, Tigran Aivazian + * Made to use devfs (/dev/cpu/microcode) + cleanups. + * 1.04 06 Jun 2000, Simon Trimmer + * Added misc device support (now uses both devfs and misc). + * Added MICROCODE_IOCFREE ioctl to clear memory. + * 1.05 09 Jun 2000, Simon Trimmer + * Messages for error cases (non Intel & no suitable microcode). + * 1.06 03 Aug 2000, Tigran Aivazian + * Removed ->release(). Removed exclusive open and status bitmap. + * Added microcode_rwsem to serialize read()/write()/ioctl(). + * Removed global kernel lock usage. + * 1.07 07 Sep 2000, Tigran Aivazian + * Write 0 to 0x8B msr and then cpuid before reading revision, + * so that it works even if there were no update done by the + * BIOS. Otherwise, reading from 0x8B gives junk (which happened + * to be 0 on my machine which is why it worked even when I + * disabled update by the BIOS) + * Thanks to Eric W. Biederman for the fix. + * 1.08 11 Dec 2000, Richard Schaal and + * Tigran Aivazian + * Intel Pentium 4 processor support and bugfixes. + * 1.09 30 Oct 2001, Tigran Aivazian + * Bugfix for HT (Hyper-Threading) enabled processors + * whereby processor resources are shared by all logical processors + * in a single CPU package. + * 1.10 28 Feb 2002 Asit K Mallick and + * Tigran Aivazian , + * Serialize updates as required on HT processors due to + * speculative nature of implementation. + * 1.11 22 Mar 2002 Tigran Aivazian + * Fix the panic when writing zero-length microcode chunk. + * 1.12 29 Sep 2003 Nitin Kamble , + * Jun Nakajima + * Support for the microcode updates in the new format. + * 1.13 10 Oct 2003 Tigran Aivazian + * Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl + * because we no longer hold a copy of applied microcode + * in kernel memory. + * 1.14 25 Jun 2004 Tigran Aivazian + * Fix sigmatch() macro to handle old CPUs with pf == 0. + * Thanks to Stuart Swales for pointing out this bug. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include + +#include +#include +#include + +MODULE_DESCRIPTION("Microcode Update Driver"); +MODULE_AUTHOR("Tigran Aivazian "); +MODULE_LICENSE("GPL"); + +static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) +{ + struct cpuinfo_x86 *c = &cpu_data(cpu_num); + unsigned int val[2]; + + memset(csig, 0, sizeof(*csig)); + + csig->sig = cpuid_eax(0x00000001); + + if ((c->x86_model >= 5) || (c->x86 > 6)) { + /* get processor flags from MSR 0x17 */ + rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); + csig->pf = 1 << ((val[1] >> 18) & 7); + } + + csig->rev = c->microcode; + pr_info("CPU%d sig=0x%x, pf=0x%x, revision=0x%x\n", + cpu_num, csig->sig, csig->pf, csig->rev); + + return 0; +} + +/* + * return 0 - no update found + * return 1 - found update + */ +static int get_matching_mc(struct microcode_intel *mc_intel, int cpu) +{ + struct cpu_signature cpu_sig; + unsigned int csig, cpf, crev; + + collect_cpu_info(cpu, &cpu_sig); + + csig = cpu_sig.sig; + cpf = cpu_sig.pf; + crev = cpu_sig.rev; + + return get_matching_microcode(csig, cpf, mc_intel, crev); +} + +int apply_microcode(int cpu) +{ + struct microcode_intel *mc_intel; + struct ucode_cpu_info *uci; + unsigned int val[2]; + int cpu_num = raw_smp_processor_id(); + struct cpuinfo_x86 *c = &cpu_data(cpu_num); + + uci = ucode_cpu_info + cpu; + mc_intel = uci->mc; + + /* We should bind the task to the CPU */ + BUG_ON(cpu_num != cpu); + + if (mc_intel == NULL) + return 0; + + /* + * Microcode on this CPU could be updated earlier. Only apply the + * microcode patch in mc_intel when it is newer than the one on this + * CPU. + */ + if (get_matching_mc(mc_intel, cpu) == 0) + return 0; + + /* write microcode via MSR 0x79 */ + wrmsr(MSR_IA32_UCODE_WRITE, + (unsigned long) mc_intel->bits, + (unsigned long) mc_intel->bits >> 16 >> 16); + wrmsr(MSR_IA32_UCODE_REV, 0, 0); + + /* As documented in the SDM: Do a CPUID 1 here */ + sync_core(); + + /* get the current revision from MSR 0x8B */ + rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); + + if (val[1] != mc_intel->hdr.rev) { + pr_err("CPU%d update to revision 0x%x failed\n", + cpu_num, mc_intel->hdr.rev); + return -1; + } + pr_info("CPU%d updated to revision 0x%x, date = %04x-%02x-%02x\n", + cpu_num, val[1], + mc_intel->hdr.date & 0xffff, + mc_intel->hdr.date >> 24, + (mc_intel->hdr.date >> 16) & 0xff); + + uci->cpu_sig.rev = val[1]; + c->microcode = val[1]; + + return 0; +} + +static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, + int (*get_ucode_data)(void *, const void *, size_t)) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + u8 *ucode_ptr = data, *new_mc = NULL, *mc = NULL; + int new_rev = uci->cpu_sig.rev; + unsigned int leftover = size; + enum ucode_state state = UCODE_OK; + unsigned int curr_mc_size = 0; + unsigned int csig, cpf; + + while (leftover) { + struct microcode_header_intel mc_header; + unsigned int mc_size; + + if (get_ucode_data(&mc_header, ucode_ptr, sizeof(mc_header))) + break; + + mc_size = get_totalsize(&mc_header); + if (!mc_size || mc_size > leftover) { + pr_err("error! Bad data in microcode data file\n"); + break; + } + + /* For performance reasons, reuse mc area when possible */ + if (!mc || mc_size > curr_mc_size) { + vfree(mc); + mc = vmalloc(mc_size); + if (!mc) + break; + curr_mc_size = mc_size; + } + + if (get_ucode_data(mc, ucode_ptr, mc_size) || + microcode_sanity_check(mc, 1) < 0) { + break; + } + + csig = uci->cpu_sig.sig; + cpf = uci->cpu_sig.pf; + if (get_matching_microcode(csig, cpf, mc, new_rev)) { + vfree(new_mc); + new_rev = mc_header.rev; + new_mc = mc; + mc = NULL; /* trigger new vmalloc */ + } + + ucode_ptr += mc_size; + leftover -= mc_size; + } + + vfree(mc); + + if (leftover) { + vfree(new_mc); + state = UCODE_ERROR; + goto out; + } + + if (!new_mc) { + state = UCODE_NFOUND; + goto out; + } + + vfree(uci->mc); + uci->mc = (struct microcode_intel *)new_mc; + + /* + * If early loading microcode is supported, save this mc into + * permanent memory. So it will be loaded early when a CPU is hot added + * or resumes. + */ + save_mc_for_early(new_mc); + + pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", + cpu, new_rev, uci->cpu_sig.rev); +out: + return state; +} + +static int get_ucode_fw(void *to, const void *from, size_t n) +{ + memcpy(to, from, n); + return 0; +} + +static enum ucode_state request_microcode_fw(int cpu, struct device *device, + bool refresh_fw) +{ + char name[30]; + struct cpuinfo_x86 *c = &cpu_data(cpu); + const struct firmware *firmware; + enum ucode_state ret; + + sprintf(name, "intel-ucode/%02x-%02x-%02x", + c->x86, c->x86_model, c->x86_mask); + + if (request_firmware(&firmware, name, device)) { + pr_debug("data file %s load failed\n", name); + return UCODE_NFOUND; + } + + ret = generic_load_microcode(cpu, (void *)firmware->data, + firmware->size, &get_ucode_fw); + + release_firmware(firmware); + + return ret; +} + +static int get_ucode_user(void *to, const void *from, size_t n) +{ + return copy_from_user(to, from, n); +} + +static enum ucode_state +request_microcode_user(int cpu, const void __user *buf, size_t size) +{ + return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user); +} + +static void microcode_fini_cpu(int cpu) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + vfree(uci->mc); + uci->mc = NULL; +} + +static struct microcode_ops microcode_intel_ops = { + .request_microcode_user = request_microcode_user, + .request_microcode_fw = request_microcode_fw, + .collect_cpu_info = collect_cpu_info, + .apply_microcode = apply_microcode, + .microcode_fini_cpu = microcode_fini_cpu, +}; + +struct microcode_ops * __init init_intel_microcode(void) +{ + struct cpuinfo_x86 *c = &cpu_data(0); + + if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || + cpu_has(c, X86_FEATURE_IA64)) { + pr_err("Intel CPU family 0x%x not supported\n", c->x86); + return NULL; + } + + return µcode_intel_ops; +} + diff --git a/arch/x86/kernel/cpu/microcode/intel_early.c b/arch/x86/kernel/cpu/microcode/intel_early.c new file mode 100644 index 000000000000..18f739129e72 --- /dev/null +++ b/arch/x86/kernel/cpu/microcode/intel_early.c @@ -0,0 +1,787 @@ +/* + * Intel CPU microcode early update for Linux + * + * Copyright (C) 2012 Fenghua Yu + * H Peter Anvin" + * + * This allows to early upgrade microcode on Intel processors + * belonging to IA-32 family - PentiumPro, Pentium II, + * Pentium III, Xeon, Pentium 4, etc. + * + * Reference: Section 9.11 of Volume 3, IA-32 Intel Architecture + * Software Developer's Manual. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +unsigned long mc_saved_in_initrd[MAX_UCODE_COUNT]; +struct mc_saved_data { + unsigned int mc_saved_count; + struct microcode_intel **mc_saved; +} mc_saved_data; + +static enum ucode_state +generic_load_microcode_early(struct microcode_intel **mc_saved_p, + unsigned int mc_saved_count, + struct ucode_cpu_info *uci) +{ + struct microcode_intel *ucode_ptr, *new_mc = NULL; + int new_rev = uci->cpu_sig.rev; + enum ucode_state state = UCODE_OK; + unsigned int mc_size; + struct microcode_header_intel *mc_header; + unsigned int csig = uci->cpu_sig.sig; + unsigned int cpf = uci->cpu_sig.pf; + int i; + + for (i = 0; i < mc_saved_count; i++) { + ucode_ptr = mc_saved_p[i]; + + mc_header = (struct microcode_header_intel *)ucode_ptr; + mc_size = get_totalsize(mc_header); + if (get_matching_microcode(csig, cpf, ucode_ptr, new_rev)) { + new_rev = mc_header->rev; + new_mc = ucode_ptr; + } + } + + if (!new_mc) { + state = UCODE_NFOUND; + goto out; + } + + uci->mc = (struct microcode_intel *)new_mc; +out: + return state; +} + +static void +microcode_pointer(struct microcode_intel **mc_saved, + unsigned long *mc_saved_in_initrd, + unsigned long initrd_start, int mc_saved_count) +{ + int i; + + for (i = 0; i < mc_saved_count; i++) + mc_saved[i] = (struct microcode_intel *) + (mc_saved_in_initrd[i] + initrd_start); +} + +#ifdef CONFIG_X86_32 +static void +microcode_phys(struct microcode_intel **mc_saved_tmp, + struct mc_saved_data *mc_saved_data) +{ + int i; + struct microcode_intel ***mc_saved; + + mc_saved = (struct microcode_intel ***) + __pa_nodebug(&mc_saved_data->mc_saved); + for (i = 0; i < mc_saved_data->mc_saved_count; i++) { + struct microcode_intel *p; + + p = *(struct microcode_intel **) + __pa_nodebug(mc_saved_data->mc_saved + i); + mc_saved_tmp[i] = (struct microcode_intel *)__pa_nodebug(p); + } +} +#endif + +static enum ucode_state +load_microcode(struct mc_saved_data *mc_saved_data, + unsigned long *mc_saved_in_initrd, + unsigned long initrd_start, + struct ucode_cpu_info *uci) +{ + struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; + unsigned int count = mc_saved_data->mc_saved_count; + + if (!mc_saved_data->mc_saved) { + microcode_pointer(mc_saved_tmp, mc_saved_in_initrd, + initrd_start, count); + + return generic_load_microcode_early(mc_saved_tmp, count, uci); + } else { +#ifdef CONFIG_X86_32 + microcode_phys(mc_saved_tmp, mc_saved_data); + return generic_load_microcode_early(mc_saved_tmp, count, uci); +#else + return generic_load_microcode_early(mc_saved_data->mc_saved, + count, uci); +#endif + } +} + +static u8 get_x86_family(unsigned long sig) +{ + u8 x86; + + x86 = (sig >> 8) & 0xf; + + if (x86 == 0xf) + x86 += (sig >> 20) & 0xff; + + return x86; +} + +static u8 get_x86_model(unsigned long sig) +{ + u8 x86, x86_model; + + x86 = get_x86_family(sig); + x86_model = (sig >> 4) & 0xf; + + if (x86 == 0x6 || x86 == 0xf) + x86_model += ((sig >> 16) & 0xf) << 4; + + return x86_model; +} + +/* + * Given CPU signature and a microcode patch, this function finds if the + * microcode patch has matching family and model with the CPU. + */ +static enum ucode_state +matching_model_microcode(struct microcode_header_intel *mc_header, + unsigned long sig) +{ + u8 x86, x86_model; + u8 x86_ucode, x86_model_ucode; + struct extended_sigtable *ext_header; + unsigned long total_size = get_totalsize(mc_header); + unsigned long data_size = get_datasize(mc_header); + int ext_sigcount, i; + struct extended_signature *ext_sig; + + x86 = get_x86_family(sig); + x86_model = get_x86_model(sig); + + x86_ucode = get_x86_family(mc_header->sig); + x86_model_ucode = get_x86_model(mc_header->sig); + + if (x86 == x86_ucode && x86_model == x86_model_ucode) + return UCODE_OK; + + /* Look for ext. headers: */ + if (total_size <= data_size + MC_HEADER_SIZE) + return UCODE_NFOUND; + + ext_header = (struct extended_sigtable *) + mc_header + data_size + MC_HEADER_SIZE; + ext_sigcount = ext_header->count; + ext_sig = (void *)ext_header + EXT_HEADER_SIZE; + + for (i = 0; i < ext_sigcount; i++) { + x86_ucode = get_x86_family(ext_sig->sig); + x86_model_ucode = get_x86_model(ext_sig->sig); + + if (x86 == x86_ucode && x86_model == x86_model_ucode) + return UCODE_OK; + + ext_sig++; + } + + return UCODE_NFOUND; +} + +static int +save_microcode(struct mc_saved_data *mc_saved_data, + struct microcode_intel **mc_saved_src, + unsigned int mc_saved_count) +{ + int i, j; + struct microcode_intel **mc_saved_p; + int ret; + + if (!mc_saved_count) + return -EINVAL; + + /* + * Copy new microcode data. + */ + mc_saved_p = kmalloc(mc_saved_count*sizeof(struct microcode_intel *), + GFP_KERNEL); + if (!mc_saved_p) + return -ENOMEM; + + for (i = 0; i < mc_saved_count; i++) { + struct microcode_intel *mc = mc_saved_src[i]; + struct microcode_header_intel *mc_header = &mc->hdr; + unsigned long mc_size = get_totalsize(mc_header); + mc_saved_p[i] = kmalloc(mc_size, GFP_KERNEL); + if (!mc_saved_p[i]) { + ret = -ENOMEM; + goto err; + } + if (!mc_saved_src[i]) { + ret = -EINVAL; + goto err; + } + memcpy(mc_saved_p[i], mc, mc_size); + } + + /* + * Point to newly saved microcode. + */ + mc_saved_data->mc_saved = mc_saved_p; + mc_saved_data->mc_saved_count = mc_saved_count; + + return 0; + +err: + for (j = 0; j <= i; j++) + kfree(mc_saved_p[j]); + kfree(mc_saved_p); + + return ret; +} + +/* + * A microcode patch in ucode_ptr is saved into mc_saved + * - if it has matching signature and newer revision compared to an existing + * patch mc_saved. + * - or if it is a newly discovered microcode patch. + * + * The microcode patch should have matching model with CPU. + */ +static void _save_mc(struct microcode_intel **mc_saved, u8 *ucode_ptr, + unsigned int *mc_saved_count_p) +{ + int i; + int found = 0; + unsigned int mc_saved_count = *mc_saved_count_p; + struct microcode_header_intel *mc_header; + + mc_header = (struct microcode_header_intel *)ucode_ptr; + for (i = 0; i < mc_saved_count; i++) { + unsigned int sig, pf; + unsigned int new_rev; + struct microcode_header_intel *mc_saved_header = + (struct microcode_header_intel *)mc_saved[i]; + sig = mc_saved_header->sig; + pf = mc_saved_header->pf; + new_rev = mc_header->rev; + + if (get_matching_sig(sig, pf, ucode_ptr, new_rev)) { + found = 1; + if (update_match_revision(mc_header, new_rev)) { + /* + * Found an older ucode saved before. + * Replace the older one with this newer + * one. + */ + mc_saved[i] = + (struct microcode_intel *)ucode_ptr; + break; + } + } + } + if (i >= mc_saved_count && !found) + /* + * This ucode is first time discovered in ucode file. + * Save it to memory. + */ + mc_saved[mc_saved_count++] = + (struct microcode_intel *)ucode_ptr; + + *mc_saved_count_p = mc_saved_count; +} + +/* + * Get microcode matching with BSP's model. Only CPUs with the same model as + * BSP can stay in the platform. + */ +static enum ucode_state __init +get_matching_model_microcode(int cpu, unsigned long start, + void *data, size_t size, + struct mc_saved_data *mc_saved_data, + unsigned long *mc_saved_in_initrd, + struct ucode_cpu_info *uci) +{ + u8 *ucode_ptr = data; + unsigned int leftover = size; + enum ucode_state state = UCODE_OK; + unsigned int mc_size; + struct microcode_header_intel *mc_header; + struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; + unsigned int mc_saved_count = mc_saved_data->mc_saved_count; + int i; + + while (leftover) { + mc_header = (struct microcode_header_intel *)ucode_ptr; + + mc_size = get_totalsize(mc_header); + if (!mc_size || mc_size > leftover || + microcode_sanity_check(ucode_ptr, 0) < 0) + break; + + leftover -= mc_size; + + /* + * Since APs with same family and model as the BSP may boot in + * the platform, we need to find and save microcode patches + * with the same family and model as the BSP. + */ + if (matching_model_microcode(mc_header, uci->cpu_sig.sig) != + UCODE_OK) { + ucode_ptr += mc_size; + continue; + } + + _save_mc(mc_saved_tmp, ucode_ptr, &mc_saved_count); + + ucode_ptr += mc_size; + } + + if (leftover) { + state = UCODE_ERROR; + goto out; + } + + if (mc_saved_count == 0) { + state = UCODE_NFOUND; + goto out; + } + + for (i = 0; i < mc_saved_count; i++) + mc_saved_in_initrd[i] = (unsigned long)mc_saved_tmp[i] - start; + + mc_saved_data->mc_saved_count = mc_saved_count; +out: + return state; +} + +static int collect_cpu_info_early(struct ucode_cpu_info *uci) +{ + unsigned int val[2]; + u8 x86, x86_model; + struct cpu_signature csig; + unsigned int eax, ebx, ecx, edx; + + csig.sig = 0; + csig.pf = 0; + csig.rev = 0; + + memset(uci, 0, sizeof(*uci)); + + eax = 0x00000001; + ecx = 0; + native_cpuid(&eax, &ebx, &ecx, &edx); + csig.sig = eax; + + x86 = get_x86_family(csig.sig); + x86_model = get_x86_model(csig.sig); + + if ((x86_model >= 5) || (x86 > 6)) { + /* get processor flags from MSR 0x17 */ + native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); + csig.pf = 1 << ((val[1] >> 18) & 7); + } + native_wrmsr(MSR_IA32_UCODE_REV, 0, 0); + + /* As documented in the SDM: Do a CPUID 1 here */ + sync_core(); + + /* get the current revision from MSR 0x8B */ + native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); + + csig.rev = val[1]; + + uci->cpu_sig = csig; + uci->valid = 1; + + return 0; +} + +#ifdef DEBUG +static void __ref show_saved_mc(void) +{ + int i, j; + unsigned int sig, pf, rev, total_size, data_size, date; + struct ucode_cpu_info uci; + + if (mc_saved_data.mc_saved_count == 0) { + pr_debug("no micorcode data saved.\n"); + return; + } + pr_debug("Total microcode saved: %d\n", mc_saved_data.mc_saved_count); + + collect_cpu_info_early(&uci); + + sig = uci.cpu_sig.sig; + pf = uci.cpu_sig.pf; + rev = uci.cpu_sig.rev; + pr_debug("CPU%d: sig=0x%x, pf=0x%x, rev=0x%x\n", + smp_processor_id(), sig, pf, rev); + + for (i = 0; i < mc_saved_data.mc_saved_count; i++) { + struct microcode_header_intel *mc_saved_header; + struct extended_sigtable *ext_header; + int ext_sigcount; + struct extended_signature *ext_sig; + + mc_saved_header = (struct microcode_header_intel *) + mc_saved_data.mc_saved[i]; + sig = mc_saved_header->sig; + pf = mc_saved_header->pf; + rev = mc_saved_header->rev; + total_size = get_totalsize(mc_saved_header); + data_size = get_datasize(mc_saved_header); + date = mc_saved_header->date; + + pr_debug("mc_saved[%d]: sig=0x%x, pf=0x%x, rev=0x%x, toal size=0x%x, date = %04x-%02x-%02x\n", + i, sig, pf, rev, total_size, + date & 0xffff, + date >> 24, + (date >> 16) & 0xff); + + /* Look for ext. headers: */ + if (total_size <= data_size + MC_HEADER_SIZE) + continue; + + ext_header = (struct extended_sigtable *) + mc_saved_header + data_size + MC_HEADER_SIZE; + ext_sigcount = ext_header->count; + ext_sig = (void *)ext_header + EXT_HEADER_SIZE; + + for (j = 0; j < ext_sigcount; j++) { + sig = ext_sig->sig; + pf = ext_sig->pf; + + pr_debug("\tExtended[%d]: sig=0x%x, pf=0x%x\n", + j, sig, pf); + + ext_sig++; + } + + } +} +#else +static inline void show_saved_mc(void) +{ +} +#endif + +#if defined(CONFIG_MICROCODE_INTEL_EARLY) && defined(CONFIG_HOTPLUG_CPU) +static DEFINE_MUTEX(x86_cpu_microcode_mutex); +/* + * Save this mc into mc_saved_data. So it will be loaded early when a CPU is + * hot added or resumes. + * + * Please make sure this mc should be a valid microcode patch before calling + * this function. + */ +int save_mc_for_early(u8 *mc) +{ + struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; + unsigned int mc_saved_count_init; + unsigned int mc_saved_count; + struct microcode_intel **mc_saved; + int ret = 0; + int i; + + /* + * Hold hotplug lock so mc_saved_data is not accessed by a CPU in + * hotplug. + */ + mutex_lock(&x86_cpu_microcode_mutex); + + mc_saved_count_init = mc_saved_data.mc_saved_count; + mc_saved_count = mc_saved_data.mc_saved_count; + mc_saved = mc_saved_data.mc_saved; + + if (mc_saved && mc_saved_count) + memcpy(mc_saved_tmp, mc_saved, + mc_saved_count * sizeof(struct mirocode_intel *)); + /* + * Save the microcode patch mc in mc_save_tmp structure if it's a newer + * version. + */ + + _save_mc(mc_saved_tmp, mc, &mc_saved_count); + + /* + * Save the mc_save_tmp in global mc_saved_data. + */ + ret = save_microcode(&mc_saved_data, mc_saved_tmp, mc_saved_count); + if (ret) { + pr_err("Cannot save microcode patch.\n"); + goto out; + } + + show_saved_mc(); + + /* + * Free old saved microcod data. + */ + if (mc_saved) { + for (i = 0; i < mc_saved_count_init; i++) + kfree(mc_saved[i]); + kfree(mc_saved); + } + +out: + mutex_unlock(&x86_cpu_microcode_mutex); + + return ret; +} +EXPORT_SYMBOL_GPL(save_mc_for_early); +#endif + +static __initdata char ucode_name[] = "kernel/x86/microcode/GenuineIntel.bin"; +static __init enum ucode_state +scan_microcode(unsigned long start, unsigned long end, + struct mc_saved_data *mc_saved_data, + unsigned long *mc_saved_in_initrd, + struct ucode_cpu_info *uci) +{ + unsigned int size = end - start + 1; + struct cpio_data cd; + long offset = 0; +#ifdef CONFIG_X86_32 + char *p = (char *)__pa_nodebug(ucode_name); +#else + char *p = ucode_name; +#endif + + cd.data = NULL; + cd.size = 0; + + cd = find_cpio_data(p, (void *)start, size, &offset); + if (!cd.data) + return UCODE_ERROR; + + + return get_matching_model_microcode(0, start, cd.data, cd.size, + mc_saved_data, mc_saved_in_initrd, + uci); +} + +/* + * Print ucode update info. + */ +static void +print_ucode_info(struct ucode_cpu_info *uci, unsigned int date) +{ + int cpu = smp_processor_id(); + + pr_info("CPU%d microcode updated early to revision 0x%x, date = %04x-%02x-%02x\n", + cpu, + uci->cpu_sig.rev, + date & 0xffff, + date >> 24, + (date >> 16) & 0xff); +} + +#ifdef CONFIG_X86_32 + +static int delay_ucode_info; +static int current_mc_date; + +/* + * Print early updated ucode info after printk works. This is delayed info dump. + */ +void show_ucode_info_early(void) +{ + struct ucode_cpu_info uci; + + if (delay_ucode_info) { + collect_cpu_info_early(&uci); + print_ucode_info(&uci, current_mc_date); + delay_ucode_info = 0; + } +} + +/* + * At this point, we can not call printk() yet. Keep microcode patch number in + * mc_saved_data.mc_saved and delay printing microcode info in + * show_ucode_info_early() until printk() works. + */ +static void print_ucode(struct ucode_cpu_info *uci) +{ + struct microcode_intel *mc_intel; + int *delay_ucode_info_p; + int *current_mc_date_p; + + mc_intel = uci->mc; + if (mc_intel == NULL) + return; + + delay_ucode_info_p = (int *)__pa_nodebug(&delay_ucode_info); + current_mc_date_p = (int *)__pa_nodebug(¤t_mc_date); + + *delay_ucode_info_p = 1; + *current_mc_date_p = mc_intel->hdr.date; +} +#else + +/* + * Flush global tlb. We only do this in x86_64 where paging has been enabled + * already and PGE should be enabled as well. + */ +static inline void flush_tlb_early(void) +{ + __native_flush_tlb_global_irq_disabled(); +} + +static inline void print_ucode(struct ucode_cpu_info *uci) +{ + struct microcode_intel *mc_intel; + + mc_intel = uci->mc; + if (mc_intel == NULL) + return; + + print_ucode_info(uci, mc_intel->hdr.date); +} +#endif + +static int apply_microcode_early(struct mc_saved_data *mc_saved_data, + struct ucode_cpu_info *uci) +{ + struct microcode_intel *mc_intel; + unsigned int val[2]; + + mc_intel = uci->mc; + if (mc_intel == NULL) + return 0; + + /* write microcode via MSR 0x79 */ + native_wrmsr(MSR_IA32_UCODE_WRITE, + (unsigned long) mc_intel->bits, + (unsigned long) mc_intel->bits >> 16 >> 16); + native_wrmsr(MSR_IA32_UCODE_REV, 0, 0); + + /* As documented in the SDM: Do a CPUID 1 here */ + sync_core(); + + /* get the current revision from MSR 0x8B */ + native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); + if (val[1] != mc_intel->hdr.rev) + return -1; + +#ifdef CONFIG_X86_64 + /* Flush global tlb. This is precaution. */ + flush_tlb_early(); +#endif + uci->cpu_sig.rev = val[1]; + + print_ucode(uci); + + return 0; +} + +/* + * This function converts microcode patch offsets previously stored in + * mc_saved_in_initrd to pointers and stores the pointers in mc_saved_data. + */ +int __init save_microcode_in_initrd_intel(void) +{ + unsigned int count = mc_saved_data.mc_saved_count; + struct microcode_intel *mc_saved[MAX_UCODE_COUNT]; + int ret = 0; + + if (count == 0) + return ret; + + microcode_pointer(mc_saved, mc_saved_in_initrd, initrd_start, count); + ret = save_microcode(&mc_saved_data, mc_saved, count); + if (ret) + pr_err("Cannot save microcode patches from initrd.\n"); + + show_saved_mc(); + + return ret; +} + +static void __init +_load_ucode_intel_bsp(struct mc_saved_data *mc_saved_data, + unsigned long *mc_saved_in_initrd, + unsigned long initrd_start_early, + unsigned long initrd_end_early, + struct ucode_cpu_info *uci) +{ + collect_cpu_info_early(uci); + scan_microcode(initrd_start_early, initrd_end_early, mc_saved_data, + mc_saved_in_initrd, uci); + load_microcode(mc_saved_data, mc_saved_in_initrd, + initrd_start_early, uci); + apply_microcode_early(mc_saved_data, uci); +} + +void __init +load_ucode_intel_bsp(void) +{ + u64 ramdisk_image, ramdisk_size; + unsigned long initrd_start_early, initrd_end_early; + struct ucode_cpu_info uci; +#ifdef CONFIG_X86_32 + struct boot_params *boot_params_p; + + boot_params_p = (struct boot_params *)__pa_nodebug(&boot_params); + ramdisk_image = boot_params_p->hdr.ramdisk_image; + ramdisk_size = boot_params_p->hdr.ramdisk_size; + initrd_start_early = ramdisk_image; + initrd_end_early = initrd_start_early + ramdisk_size; + + _load_ucode_intel_bsp( + (struct mc_saved_data *)__pa_nodebug(&mc_saved_data), + (unsigned long *)__pa_nodebug(&mc_saved_in_initrd), + initrd_start_early, initrd_end_early, &uci); +#else + ramdisk_image = boot_params.hdr.ramdisk_image; + ramdisk_size = boot_params.hdr.ramdisk_size; + initrd_start_early = ramdisk_image + PAGE_OFFSET; + initrd_end_early = initrd_start_early + ramdisk_size; + + _load_ucode_intel_bsp(&mc_saved_data, mc_saved_in_initrd, + initrd_start_early, initrd_end_early, &uci); +#endif +} + +void load_ucode_intel_ap(void) +{ + struct mc_saved_data *mc_saved_data_p; + struct ucode_cpu_info uci; + unsigned long *mc_saved_in_initrd_p; + unsigned long initrd_start_addr; +#ifdef CONFIG_X86_32 + unsigned long *initrd_start_p; + + mc_saved_in_initrd_p = + (unsigned long *)__pa_nodebug(mc_saved_in_initrd); + mc_saved_data_p = (struct mc_saved_data *)__pa_nodebug(&mc_saved_data); + initrd_start_p = (unsigned long *)__pa_nodebug(&initrd_start); + initrd_start_addr = (unsigned long)__pa_nodebug(*initrd_start_p); +#else + mc_saved_data_p = &mc_saved_data; + mc_saved_in_initrd_p = mc_saved_in_initrd; + initrd_start_addr = initrd_start; +#endif + + /* + * If there is no valid ucode previously saved in memory, no need to + * update ucode on this AP. + */ + if (mc_saved_data_p->mc_saved_count == 0) + return; + + collect_cpu_info_early(&uci); + load_microcode(mc_saved_data_p, mc_saved_in_initrd_p, + initrd_start_addr, &uci); + apply_microcode_early(mc_saved_data_p, &uci); +} diff --git a/arch/x86/kernel/cpu/microcode/intel_lib.c b/arch/x86/kernel/cpu/microcode/intel_lib.c new file mode 100644 index 000000000000..ce69320d0179 --- /dev/null +++ b/arch/x86/kernel/cpu/microcode/intel_lib.c @@ -0,0 +1,174 @@ +/* + * Intel CPU Microcode Update Driver for Linux + * + * Copyright (C) 2012 Fenghua Yu + * H Peter Anvin" + * + * This driver allows to upgrade microcode on Intel processors + * belonging to IA-32 family - PentiumPro, Pentium II, + * Pentium III, Xeon, Pentium 4, etc. + * + * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture + * Software Developer's Manual + * Order Number 253668 or free download from: + * + * http://developer.intel.com/Assets/PDF/manual/253668.pdf + * + * For more information, go to http://www.urbanmyth.org/microcode + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ +#include +#include +#include +#include + +#include +#include +#include + +static inline int +update_match_cpu(unsigned int csig, unsigned int cpf, + unsigned int sig, unsigned int pf) +{ + return (!sigmatch(sig, csig, pf, cpf)) ? 0 : 1; +} + +int +update_match_revision(struct microcode_header_intel *mc_header, int rev) +{ + return (mc_header->rev <= rev) ? 0 : 1; +} + +int microcode_sanity_check(void *mc, int print_err) +{ + unsigned long total_size, data_size, ext_table_size; + struct microcode_header_intel *mc_header = mc; + struct extended_sigtable *ext_header = NULL; + int sum, orig_sum, ext_sigcount = 0, i; + struct extended_signature *ext_sig; + + total_size = get_totalsize(mc_header); + data_size = get_datasize(mc_header); + + if (data_size + MC_HEADER_SIZE > total_size) { + if (print_err) + pr_err("error! Bad data size in microcode data file\n"); + return -EINVAL; + } + + if (mc_header->ldrver != 1 || mc_header->hdrver != 1) { + if (print_err) + pr_err("error! Unknown microcode update format\n"); + return -EINVAL; + } + ext_table_size = total_size - (MC_HEADER_SIZE + data_size); + if (ext_table_size) { + if ((ext_table_size < EXT_HEADER_SIZE) + || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) { + if (print_err) + pr_err("error! Small exttable size in microcode data file\n"); + return -EINVAL; + } + ext_header = mc + MC_HEADER_SIZE + data_size; + if (ext_table_size != exttable_size(ext_header)) { + if (print_err) + pr_err("error! Bad exttable size in microcode data file\n"); + return -EFAULT; + } + ext_sigcount = ext_header->count; + } + + /* check extended table checksum */ + if (ext_table_size) { + int ext_table_sum = 0; + int *ext_tablep = (int *)ext_header; + + i = ext_table_size / DWSIZE; + while (i--) + ext_table_sum += ext_tablep[i]; + if (ext_table_sum) { + if (print_err) + pr_warn("aborting, bad extended signature table checksum\n"); + return -EINVAL; + } + } + + /* calculate the checksum */ + orig_sum = 0; + i = (MC_HEADER_SIZE + data_size) / DWSIZE; + while (i--) + orig_sum += ((int *)mc)[i]; + if (orig_sum) { + if (print_err) + pr_err("aborting, bad checksum\n"); + return -EINVAL; + } + if (!ext_table_size) + return 0; + /* check extended signature checksum */ + for (i = 0; i < ext_sigcount; i++) { + ext_sig = (void *)ext_header + EXT_HEADER_SIZE + + EXT_SIGNATURE_SIZE * i; + sum = orig_sum + - (mc_header->sig + mc_header->pf + mc_header->cksum) + + (ext_sig->sig + ext_sig->pf + ext_sig->cksum); + if (sum) { + if (print_err) + pr_err("aborting, bad checksum\n"); + return -EINVAL; + } + } + return 0; +} +EXPORT_SYMBOL_GPL(microcode_sanity_check); + +/* + * return 0 - no update found + * return 1 - found update + */ +int get_matching_sig(unsigned int csig, int cpf, void *mc, int rev) +{ + struct microcode_header_intel *mc_header = mc; + struct extended_sigtable *ext_header; + unsigned long total_size = get_totalsize(mc_header); + int ext_sigcount, i; + struct extended_signature *ext_sig; + + if (update_match_cpu(csig, cpf, mc_header->sig, mc_header->pf)) + return 1; + + /* Look for ext. headers: */ + if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE) + return 0; + + ext_header = mc + get_datasize(mc_header) + MC_HEADER_SIZE; + ext_sigcount = ext_header->count; + ext_sig = (void *)ext_header + EXT_HEADER_SIZE; + + for (i = 0; i < ext_sigcount; i++) { + if (update_match_cpu(csig, cpf, ext_sig->sig, ext_sig->pf)) + return 1; + ext_sig++; + } + return 0; +} + +/* + * return 0 - no update found + * return 1 - found update + */ +int get_matching_microcode(unsigned int csig, int cpf, void *mc, int rev) +{ + struct microcode_header_intel *mc_header = mc; + + if (!update_match_revision(mc_header, rev)) + return 0; + + return get_matching_sig(csig, cpf, mc, rev); +} +EXPORT_SYMBOL_GPL(get_matching_microcode); diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c deleted file mode 100644 index 4a6ff747aaad..000000000000 --- a/arch/x86/kernel/microcode_amd.c +++ /dev/null @@ -1,492 +0,0 @@ -/* - * AMD CPU Microcode Update Driver for Linux - * Copyright (C) 2008-2011 Advanced Micro Devices Inc. - * - * Author: Peter Oruba - * - * Based on work by: - * Tigran Aivazian - * - * Maintainers: - * Andreas Herrmann - * Borislav Petkov - * - * This driver allows to upgrade microcode on F10h AMD - * CPUs and later. - * - * Licensed under the terms of the GNU General Public - * License version 2. See file COPYING for details. - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -MODULE_DESCRIPTION("AMD Microcode Update Driver"); -MODULE_AUTHOR("Peter Oruba"); -MODULE_LICENSE("GPL v2"); - -static struct equiv_cpu_entry *equiv_cpu_table; - -struct ucode_patch { - struct list_head plist; - void *data; - u32 patch_id; - u16 equiv_cpu; -}; - -static LIST_HEAD(pcache); - -static u16 __find_equiv_id(unsigned int cpu) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - return find_equiv_id(equiv_cpu_table, uci->cpu_sig.sig); -} - -static u32 find_cpu_family_by_equiv_cpu(u16 equiv_cpu) -{ - int i = 0; - - BUG_ON(!equiv_cpu_table); - - while (equiv_cpu_table[i].equiv_cpu != 0) { - if (equiv_cpu == equiv_cpu_table[i].equiv_cpu) - return equiv_cpu_table[i].installed_cpu; - i++; - } - return 0; -} - -/* - * a small, trivial cache of per-family ucode patches - */ -static struct ucode_patch *cache_find_patch(u16 equiv_cpu) -{ - struct ucode_patch *p; - - list_for_each_entry(p, &pcache, plist) - if (p->equiv_cpu == equiv_cpu) - return p; - return NULL; -} - -static void update_cache(struct ucode_patch *new_patch) -{ - struct ucode_patch *p; - - list_for_each_entry(p, &pcache, plist) { - if (p->equiv_cpu == new_patch->equiv_cpu) { - if (p->patch_id >= new_patch->patch_id) - /* we already have the latest patch */ - return; - - list_replace(&p->plist, &new_patch->plist); - kfree(p->data); - kfree(p); - return; - } - } - /* no patch found, add it */ - list_add_tail(&new_patch->plist, &pcache); -} - -static void free_cache(void) -{ - struct ucode_patch *p, *tmp; - - list_for_each_entry_safe(p, tmp, &pcache, plist) { - __list_del(p->plist.prev, p->plist.next); - kfree(p->data); - kfree(p); - } -} - -static struct ucode_patch *find_patch(unsigned int cpu) -{ - u16 equiv_id; - - equiv_id = __find_equiv_id(cpu); - if (!equiv_id) - return NULL; - - return cache_find_patch(equiv_id); -} - -static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) -{ - struct cpuinfo_x86 *c = &cpu_data(cpu); - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - struct ucode_patch *p; - - csig->sig = cpuid_eax(0x00000001); - csig->rev = c->microcode; - - /* - * a patch could have been loaded early, set uci->mc so that - * mc_bp_resume() can call apply_microcode() - */ - p = find_patch(cpu); - if (p && (p->patch_id == csig->rev)) - uci->mc = p->data; - - pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev); - - return 0; -} - -static unsigned int verify_patch_size(u8 family, u32 patch_size, - unsigned int size) -{ - u32 max_size; - -#define F1XH_MPB_MAX_SIZE 2048 -#define F14H_MPB_MAX_SIZE 1824 -#define F15H_MPB_MAX_SIZE 4096 -#define F16H_MPB_MAX_SIZE 3458 - - switch (family) { - case 0x14: - max_size = F14H_MPB_MAX_SIZE; - break; - case 0x15: - max_size = F15H_MPB_MAX_SIZE; - break; - case 0x16: - max_size = F16H_MPB_MAX_SIZE; - break; - default: - max_size = F1XH_MPB_MAX_SIZE; - break; - } - - if (patch_size > min_t(u32, size, max_size)) { - pr_err("patch size mismatch\n"); - return 0; - } - - return patch_size; -} - -int __apply_microcode_amd(struct microcode_amd *mc_amd) -{ - u32 rev, dummy; - - native_wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code); - - /* verify patch application was successful */ - native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); - if (rev != mc_amd->hdr.patch_id) - return -1; - - return 0; -} - -int apply_microcode_amd(int cpu) -{ - struct cpuinfo_x86 *c = &cpu_data(cpu); - struct microcode_amd *mc_amd; - struct ucode_cpu_info *uci; - struct ucode_patch *p; - u32 rev, dummy; - - BUG_ON(raw_smp_processor_id() != cpu); - - uci = ucode_cpu_info + cpu; - - p = find_patch(cpu); - if (!p) - return 0; - - mc_amd = p->data; - uci->mc = p->data; - - rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); - - /* need to apply patch? */ - if (rev >= mc_amd->hdr.patch_id) { - c->microcode = rev; - uci->cpu_sig.rev = rev; - return 0; - } - - if (__apply_microcode_amd(mc_amd)) { - pr_err("CPU%d: update failed for patch_level=0x%08x\n", - cpu, mc_amd->hdr.patch_id); - return -1; - } - pr_info("CPU%d: new patch_level=0x%08x\n", cpu, - mc_amd->hdr.patch_id); - - uci->cpu_sig.rev = mc_amd->hdr.patch_id; - c->microcode = mc_amd->hdr.patch_id; - - return 0; -} - -static int install_equiv_cpu_table(const u8 *buf) -{ - unsigned int *ibuf = (unsigned int *)buf; - unsigned int type = ibuf[1]; - unsigned int size = ibuf[2]; - - if (type != UCODE_EQUIV_CPU_TABLE_TYPE || !size) { - pr_err("empty section/" - "invalid type field in container file section header\n"); - return -EINVAL; - } - - equiv_cpu_table = vmalloc(size); - if (!equiv_cpu_table) { - pr_err("failed to allocate equivalent CPU table\n"); - return -ENOMEM; - } - - memcpy(equiv_cpu_table, buf + CONTAINER_HDR_SZ, size); - - /* add header length */ - return size + CONTAINER_HDR_SZ; -} - -static void free_equiv_cpu_table(void) -{ - vfree(equiv_cpu_table); - equiv_cpu_table = NULL; -} - -static void cleanup(void) -{ - free_equiv_cpu_table(); - free_cache(); -} - -/* - * We return the current size even if some of the checks failed so that - * we can skip over the next patch. If we return a negative value, we - * signal a grave error like a memory allocation has failed and the - * driver cannot continue functioning normally. In such cases, we tear - * down everything we've used up so far and exit. - */ -static int verify_and_add_patch(u8 family, u8 *fw, unsigned int leftover) -{ - struct microcode_header_amd *mc_hdr; - struct ucode_patch *patch; - unsigned int patch_size, crnt_size, ret; - u32 proc_fam; - u16 proc_id; - - patch_size = *(u32 *)(fw + 4); - crnt_size = patch_size + SECTION_HDR_SIZE; - mc_hdr = (struct microcode_header_amd *)(fw + SECTION_HDR_SIZE); - proc_id = mc_hdr->processor_rev_id; - - proc_fam = find_cpu_family_by_equiv_cpu(proc_id); - if (!proc_fam) { - pr_err("No patch family for equiv ID: 0x%04x\n", proc_id); - return crnt_size; - } - - /* check if patch is for the current family */ - proc_fam = ((proc_fam >> 8) & 0xf) + ((proc_fam >> 20) & 0xff); - if (proc_fam != family) - return crnt_size; - - if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) { - pr_err("Patch-ID 0x%08x: chipset-specific code unsupported.\n", - mc_hdr->patch_id); - return crnt_size; - } - - ret = verify_patch_size(family, patch_size, leftover); - if (!ret) { - pr_err("Patch-ID 0x%08x: size mismatch.\n", mc_hdr->patch_id); - return crnt_size; - } - - patch = kzalloc(sizeof(*patch), GFP_KERNEL); - if (!patch) { - pr_err("Patch allocation failure.\n"); - return -EINVAL; - } - - patch->data = kzalloc(patch_size, GFP_KERNEL); - if (!patch->data) { - pr_err("Patch data allocation failure.\n"); - kfree(patch); - return -EINVAL; - } - - /* All looks ok, copy patch... */ - memcpy(patch->data, fw + SECTION_HDR_SIZE, patch_size); - INIT_LIST_HEAD(&patch->plist); - patch->patch_id = mc_hdr->patch_id; - patch->equiv_cpu = proc_id; - - pr_debug("%s: Added patch_id: 0x%08x, proc_id: 0x%04x\n", - __func__, patch->patch_id, proc_id); - - /* ... and add to cache. */ - update_cache(patch); - - return crnt_size; -} - -static enum ucode_state __load_microcode_amd(u8 family, const u8 *data, - size_t size) -{ - enum ucode_state ret = UCODE_ERROR; - unsigned int leftover; - u8 *fw = (u8 *)data; - int crnt_size = 0; - int offset; - - offset = install_equiv_cpu_table(data); - if (offset < 0) { - pr_err("failed to create equivalent cpu table\n"); - return ret; - } - fw += offset; - leftover = size - offset; - - if (*(u32 *)fw != UCODE_UCODE_TYPE) { - pr_err("invalid type field in container file section header\n"); - free_equiv_cpu_table(); - return ret; - } - - while (leftover) { - crnt_size = verify_and_add_patch(family, fw, leftover); - if (crnt_size < 0) - return ret; - - fw += crnt_size; - leftover -= crnt_size; - } - - return UCODE_OK; -} - -enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size) -{ - enum ucode_state ret; - - /* free old equiv table */ - free_equiv_cpu_table(); - - ret = __load_microcode_amd(family, data, size); - - if (ret != UCODE_OK) - cleanup(); - -#if defined(CONFIG_MICROCODE_AMD_EARLY) && defined(CONFIG_X86_32) - /* save BSP's matching patch for early load */ - if (cpu_data(smp_processor_id()).cpu_index == boot_cpu_data.cpu_index) { - struct ucode_patch *p = find_patch(smp_processor_id()); - if (p) { - memset(amd_ucode_patch, 0, PATCH_MAX_SIZE); - memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data), - PATCH_MAX_SIZE)); - } - } -#endif - return ret; -} - -/* - * AMD microcode firmware naming convention, up to family 15h they are in - * the legacy file: - * - * amd-ucode/microcode_amd.bin - * - * This legacy file is always smaller than 2K in size. - * - * Beginning with family 15h, they are in family-specific firmware files: - * - * amd-ucode/microcode_amd_fam15h.bin - * amd-ucode/microcode_amd_fam16h.bin - * ... - * - * These might be larger than 2K. - */ -static enum ucode_state request_microcode_amd(int cpu, struct device *device, - bool refresh_fw) -{ - char fw_name[36] = "amd-ucode/microcode_amd.bin"; - struct cpuinfo_x86 *c = &cpu_data(cpu); - enum ucode_state ret = UCODE_NFOUND; - const struct firmware *fw; - - /* reload ucode container only on the boot cpu */ - if (!refresh_fw || c->cpu_index != boot_cpu_data.cpu_index) - return UCODE_OK; - - if (c->x86 >= 0x15) - snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86); - - if (request_firmware(&fw, (const char *)fw_name, device)) { - pr_debug("failed to load file %s\n", fw_name); - goto out; - } - - ret = UCODE_ERROR; - if (*(u32 *)fw->data != UCODE_MAGIC) { - pr_err("invalid magic value (0x%08x)\n", *(u32 *)fw->data); - goto fw_release; - } - - ret = load_microcode_amd(c->x86, fw->data, fw->size); - - fw_release: - release_firmware(fw); - - out: - return ret; -} - -static enum ucode_state -request_microcode_user(int cpu, const void __user *buf, size_t size) -{ - return UCODE_ERROR; -} - -static void microcode_fini_cpu_amd(int cpu) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - uci->mc = NULL; -} - -static struct microcode_ops microcode_amd_ops = { - .request_microcode_user = request_microcode_user, - .request_microcode_fw = request_microcode_amd, - .collect_cpu_info = collect_cpu_info_amd, - .apply_microcode = apply_microcode_amd, - .microcode_fini_cpu = microcode_fini_cpu_amd, -}; - -struct microcode_ops * __init init_amd_microcode(void) -{ - struct cpuinfo_x86 *c = &cpu_data(0); - - if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { - pr_warning("AMD CPU family 0x%x not supported\n", c->x86); - return NULL; - } - - return µcode_amd_ops; -} - -void __exit exit_amd_microcode(void) -{ - cleanup(); -} diff --git a/arch/x86/kernel/microcode_amd_early.c b/arch/x86/kernel/microcode_amd_early.c deleted file mode 100644 index 8384c0fa206f..000000000000 --- a/arch/x86/kernel/microcode_amd_early.c +++ /dev/null @@ -1,380 +0,0 @@ -/* - * Copyright (C) 2013 Advanced Micro Devices, Inc. - * - * Author: Jacob Shin - * Fixes: Borislav Petkov - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include - -#include -#include -#include - -/* - * This points to the current valid container of microcode patches which we will - * save from the initrd before jettisoning its contents. - */ -static u8 *container; -static size_t container_size; - -static u32 ucode_new_rev; -u8 amd_ucode_patch[PATCH_MAX_SIZE]; -static u16 this_equiv_id; - -struct cpio_data ucode_cpio; - -/* - * Microcode patch container file is prepended to the initrd in cpio format. - * See Documentation/x86/early-microcode.txt - */ -static __initdata char ucode_path[] = "kernel/x86/microcode/AuthenticAMD.bin"; - -static struct cpio_data __init find_ucode_in_initrd(void) -{ - long offset = 0; - char *path; - void *start; - size_t size; - -#ifdef CONFIG_X86_32 - struct boot_params *p; - - /* - * On 32-bit, early load occurs before paging is turned on so we need - * to use physical addresses. - */ - p = (struct boot_params *)__pa_nodebug(&boot_params); - path = (char *)__pa_nodebug(ucode_path); - start = (void *)p->hdr.ramdisk_image; - size = p->hdr.ramdisk_size; -#else - path = ucode_path; - start = (void *)(boot_params.hdr.ramdisk_image + PAGE_OFFSET); - size = boot_params.hdr.ramdisk_size; -#endif - - return find_cpio_data(path, start, size, &offset); -} - -static size_t compute_container_size(u8 *data, u32 total_size) -{ - size_t size = 0; - u32 *header = (u32 *)data; - - if (header[0] != UCODE_MAGIC || - header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */ - header[2] == 0) /* size */ - return size; - - size = header[2] + CONTAINER_HDR_SZ; - total_size -= size; - data += size; - - while (total_size) { - u16 patch_size; - - header = (u32 *)data; - - if (header[0] != UCODE_UCODE_TYPE) - break; - - /* - * Sanity-check patch size. - */ - patch_size = header[1]; - if (patch_size > PATCH_MAX_SIZE) - break; - - size += patch_size + SECTION_HDR_SIZE; - data += patch_size + SECTION_HDR_SIZE; - total_size -= patch_size + SECTION_HDR_SIZE; - } - - return size; -} - -/* - * Early load occurs before we can vmalloc(). So we look for the microcode - * patch container file in initrd, traverse equivalent cpu table, look for a - * matching microcode patch, and update, all in initrd memory in place. - * When vmalloc() is available for use later -- on 64-bit during first AP load, - * and on 32-bit during save_microcode_in_initrd_amd() -- we can call - * load_microcode_amd() to save equivalent cpu table and microcode patches in - * kernel heap memory. - */ -static void apply_ucode_in_initrd(void *ucode, size_t size) -{ - struct equiv_cpu_entry *eq; - size_t *cont_sz; - u32 *header; - u8 *data, **cont; - u16 eq_id = 0; - int offset, left; - u32 rev, eax, ebx, ecx, edx; - u32 *new_rev; - -#ifdef CONFIG_X86_32 - new_rev = (u32 *)__pa_nodebug(&ucode_new_rev); - cont_sz = (size_t *)__pa_nodebug(&container_size); - cont = (u8 **)__pa_nodebug(&container); -#else - new_rev = &ucode_new_rev; - cont_sz = &container_size; - cont = &container; -#endif - - data = ucode; - left = size; - header = (u32 *)data; - - /* find equiv cpu table */ - if (header[0] != UCODE_MAGIC || - header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */ - header[2] == 0) /* size */ - return; - - eax = 0x00000001; - ecx = 0; - native_cpuid(&eax, &ebx, &ecx, &edx); - - while (left > 0) { - eq = (struct equiv_cpu_entry *)(data + CONTAINER_HDR_SZ); - - *cont = data; - - /* Advance past the container header */ - offset = header[2] + CONTAINER_HDR_SZ; - data += offset; - left -= offset; - - eq_id = find_equiv_id(eq, eax); - if (eq_id) { - this_equiv_id = eq_id; - *cont_sz = compute_container_size(*cont, left + offset); - - /* - * truncate how much we need to iterate over in the - * ucode update loop below - */ - left = *cont_sz - offset; - break; - } - - /* - * support multiple container files appended together. if this - * one does not have a matching equivalent cpu entry, we fast - * forward to the next container file. - */ - while (left > 0) { - header = (u32 *)data; - if (header[0] == UCODE_MAGIC && - header[1] == UCODE_EQUIV_CPU_TABLE_TYPE) - break; - - offset = header[1] + SECTION_HDR_SIZE; - data += offset; - left -= offset; - } - - /* mark where the next microcode container file starts */ - offset = data - (u8 *)ucode; - ucode = data; - } - - if (!eq_id) { - *cont = NULL; - *cont_sz = 0; - return; - } - - /* find ucode and update if needed */ - - native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax); - - while (left > 0) { - struct microcode_amd *mc; - - header = (u32 *)data; - if (header[0] != UCODE_UCODE_TYPE || /* type */ - header[1] == 0) /* size */ - break; - - mc = (struct microcode_amd *)(data + SECTION_HDR_SIZE); - - if (eq_id == mc->hdr.processor_rev_id && rev < mc->hdr.patch_id) { - - if (!__apply_microcode_amd(mc)) { - rev = mc->hdr.patch_id; - *new_rev = rev; - - /* save ucode patch */ - memcpy(amd_ucode_patch, mc, - min_t(u32, header[1], PATCH_MAX_SIZE)); - } - } - - offset = header[1] + SECTION_HDR_SIZE; - data += offset; - left -= offset; - } -} - -void __init load_ucode_amd_bsp(void) -{ - struct cpio_data cp; - void **data; - size_t *size; - -#ifdef CONFIG_X86_32 - data = (void **)__pa_nodebug(&ucode_cpio.data); - size = (size_t *)__pa_nodebug(&ucode_cpio.size); -#else - data = &ucode_cpio.data; - size = &ucode_cpio.size; -#endif - - cp = find_ucode_in_initrd(); - if (!cp.data) - return; - - *data = cp.data; - *size = cp.size; - - apply_ucode_in_initrd(cp.data, cp.size); -} - -#ifdef CONFIG_X86_32 -/* - * On 32-bit, since AP's early load occurs before paging is turned on, we - * cannot traverse cpu_equiv_table and pcache in kernel heap memory. So during - * cold boot, AP will apply_ucode_in_initrd() just like the BSP. During - * save_microcode_in_initrd_amd() BSP's patch is copied to amd_ucode_patch, - * which is used upon resume from suspend. - */ -void load_ucode_amd_ap(void) -{ - struct microcode_amd *mc; - size_t *usize; - void **ucode; - - mc = (struct microcode_amd *)__pa(amd_ucode_patch); - if (mc->hdr.patch_id && mc->hdr.processor_rev_id) { - __apply_microcode_amd(mc); - return; - } - - ucode = (void *)__pa_nodebug(&container); - usize = (size_t *)__pa_nodebug(&container_size); - - if (!*ucode || !*usize) - return; - - apply_ucode_in_initrd(*ucode, *usize); -} - -static void __init collect_cpu_sig_on_bsp(void *arg) -{ - unsigned int cpu = smp_processor_id(); - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - uci->cpu_sig.sig = cpuid_eax(0x00000001); -} -#else -void load_ucode_amd_ap(void) -{ - unsigned int cpu = smp_processor_id(); - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - struct equiv_cpu_entry *eq; - struct microcode_amd *mc; - u32 rev, eax; - u16 eq_id; - - /* Exit if called on the BSP. */ - if (!cpu) - return; - - if (!container) - return; - - rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax); - - uci->cpu_sig.rev = rev; - uci->cpu_sig.sig = eax; - - eax = cpuid_eax(0x00000001); - eq = (struct equiv_cpu_entry *)(container + CONTAINER_HDR_SZ); - - eq_id = find_equiv_id(eq, eax); - if (!eq_id) - return; - - if (eq_id == this_equiv_id) { - mc = (struct microcode_amd *)amd_ucode_patch; - - if (mc && rev < mc->hdr.patch_id) { - if (!__apply_microcode_amd(mc)) - ucode_new_rev = mc->hdr.patch_id; - } - - } else { - if (!ucode_cpio.data) - return; - - /* - * AP has a different equivalence ID than BSP, looks like - * mixed-steppings silicon so go through the ucode blob anew. - */ - apply_ucode_in_initrd(ucode_cpio.data, ucode_cpio.size); - } -} -#endif - -int __init save_microcode_in_initrd_amd(void) -{ - enum ucode_state ret; - u32 eax; - -#ifdef CONFIG_X86_32 - unsigned int bsp = boot_cpu_data.cpu_index; - struct ucode_cpu_info *uci = ucode_cpu_info + bsp; - - if (!uci->cpu_sig.sig) - smp_call_function_single(bsp, collect_cpu_sig_on_bsp, NULL, 1); - - /* - * Take into account the fact that the ramdisk might get relocated - * and therefore we need to recompute the container's position in - * virtual memory space. - */ - container = (u8 *)(__va((u32)relocated_ramdisk) + - ((u32)container - boot_params.hdr.ramdisk_image)); -#endif - if (ucode_new_rev) - pr_info("microcode: updated early to new patch_level=0x%08x\n", - ucode_new_rev); - - if (!container) - return -EINVAL; - - eax = cpuid_eax(0x00000001); - eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff); - - ret = load_microcode_amd(eax, container, container_size); - if (ret != UCODE_OK) - return -EINVAL; - - /* - * This will be freed any msec now, stash patches for the current - * family and switch to patch cache for cpu hotplug, etc later. - */ - container = NULL; - container_size = 0; - - return 0; -} diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c deleted file mode 100644 index 15c987698b0f..000000000000 --- a/arch/x86/kernel/microcode_core.c +++ /dev/null @@ -1,645 +0,0 @@ -/* - * Intel CPU Microcode Update Driver for Linux - * - * Copyright (C) 2000-2006 Tigran Aivazian - * 2006 Shaohua Li - * - * This driver allows to upgrade microcode on Intel processors - * belonging to IA-32 family - PentiumPro, Pentium II, - * Pentium III, Xeon, Pentium 4, etc. - * - * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture - * Software Developer's Manual - * Order Number 253668 or free download from: - * - * http://developer.intel.com/Assets/PDF/manual/253668.pdf - * - * For more information, go to http://www.urbanmyth.org/microcode - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * 1.0 16 Feb 2000, Tigran Aivazian - * Initial release. - * 1.01 18 Feb 2000, Tigran Aivazian - * Added read() support + cleanups. - * 1.02 21 Feb 2000, Tigran Aivazian - * Added 'device trimming' support. open(O_WRONLY) zeroes - * and frees the saved copy of applied microcode. - * 1.03 29 Feb 2000, Tigran Aivazian - * Made to use devfs (/dev/cpu/microcode) + cleanups. - * 1.04 06 Jun 2000, Simon Trimmer - * Added misc device support (now uses both devfs and misc). - * Added MICROCODE_IOCFREE ioctl to clear memory. - * 1.05 09 Jun 2000, Simon Trimmer - * Messages for error cases (non Intel & no suitable microcode). - * 1.06 03 Aug 2000, Tigran Aivazian - * Removed ->release(). Removed exclusive open and status bitmap. - * Added microcode_rwsem to serialize read()/write()/ioctl(). - * Removed global kernel lock usage. - * 1.07 07 Sep 2000, Tigran Aivazian - * Write 0 to 0x8B msr and then cpuid before reading revision, - * so that it works even if there were no update done by the - * BIOS. Otherwise, reading from 0x8B gives junk (which happened - * to be 0 on my machine which is why it worked even when I - * disabled update by the BIOS) - * Thanks to Eric W. Biederman for the fix. - * 1.08 11 Dec 2000, Richard Schaal and - * Tigran Aivazian - * Intel Pentium 4 processor support and bugfixes. - * 1.09 30 Oct 2001, Tigran Aivazian - * Bugfix for HT (Hyper-Threading) enabled processors - * whereby processor resources are shared by all logical processors - * in a single CPU package. - * 1.10 28 Feb 2002 Asit K Mallick and - * Tigran Aivazian , - * Serialize updates as required on HT processors due to - * speculative nature of implementation. - * 1.11 22 Mar 2002 Tigran Aivazian - * Fix the panic when writing zero-length microcode chunk. - * 1.12 29 Sep 2003 Nitin Kamble , - * Jun Nakajima - * Support for the microcode updates in the new format. - * 1.13 10 Oct 2003 Tigran Aivazian - * Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl - * because we no longer hold a copy of applied microcode - * in kernel memory. - * 1.14 25 Jun 2004 Tigran Aivazian - * Fix sigmatch() macro to handle old CPUs with pf == 0. - * Thanks to Stuart Swales for pointing out this bug. - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -MODULE_DESCRIPTION("Microcode Update Driver"); -MODULE_AUTHOR("Tigran Aivazian "); -MODULE_LICENSE("GPL"); - -#define MICROCODE_VERSION "2.00" - -static struct microcode_ops *microcode_ops; - -/* - * Synchronization. - * - * All non cpu-hotplug-callback call sites use: - * - * - microcode_mutex to synchronize with each other; - * - get/put_online_cpus() to synchronize with - * the cpu-hotplug-callback call sites. - * - * We guarantee that only a single cpu is being - * updated at any particular moment of time. - */ -static DEFINE_MUTEX(microcode_mutex); - -struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; -EXPORT_SYMBOL_GPL(ucode_cpu_info); - -/* - * Operations that are run on a target cpu: - */ - -struct cpu_info_ctx { - struct cpu_signature *cpu_sig; - int err; -}; - -static void collect_cpu_info_local(void *arg) -{ - struct cpu_info_ctx *ctx = arg; - - ctx->err = microcode_ops->collect_cpu_info(smp_processor_id(), - ctx->cpu_sig); -} - -static int collect_cpu_info_on_target(int cpu, struct cpu_signature *cpu_sig) -{ - struct cpu_info_ctx ctx = { .cpu_sig = cpu_sig, .err = 0 }; - int ret; - - ret = smp_call_function_single(cpu, collect_cpu_info_local, &ctx, 1); - if (!ret) - ret = ctx.err; - - return ret; -} - -static int collect_cpu_info(int cpu) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - int ret; - - memset(uci, 0, sizeof(*uci)); - - ret = collect_cpu_info_on_target(cpu, &uci->cpu_sig); - if (!ret) - uci->valid = 1; - - return ret; -} - -struct apply_microcode_ctx { - int err; -}; - -static void apply_microcode_local(void *arg) -{ - struct apply_microcode_ctx *ctx = arg; - - ctx->err = microcode_ops->apply_microcode(smp_processor_id()); -} - -static int apply_microcode_on_target(int cpu) -{ - struct apply_microcode_ctx ctx = { .err = 0 }; - int ret; - - ret = smp_call_function_single(cpu, apply_microcode_local, &ctx, 1); - if (!ret) - ret = ctx.err; - - return ret; -} - -#ifdef CONFIG_MICROCODE_OLD_INTERFACE -static int do_microcode_update(const void __user *buf, size_t size) -{ - int error = 0; - int cpu; - - for_each_online_cpu(cpu) { - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - enum ucode_state ustate; - - if (!uci->valid) - continue; - - ustate = microcode_ops->request_microcode_user(cpu, buf, size); - if (ustate == UCODE_ERROR) { - error = -1; - break; - } else if (ustate == UCODE_OK) - apply_microcode_on_target(cpu); - } - - return error; -} - -static int microcode_open(struct inode *inode, struct file *file) -{ - return capable(CAP_SYS_RAWIO) ? nonseekable_open(inode, file) : -EPERM; -} - -static ssize_t microcode_write(struct file *file, const char __user *buf, - size_t len, loff_t *ppos) -{ - ssize_t ret = -EINVAL; - - if ((len >> PAGE_SHIFT) > totalram_pages) { - pr_err("too much data (max %ld pages)\n", totalram_pages); - return ret; - } - - get_online_cpus(); - mutex_lock(µcode_mutex); - - if (do_microcode_update(buf, len) == 0) - ret = (ssize_t)len; - - if (ret > 0) - perf_check_microcode(); - - mutex_unlock(µcode_mutex); - put_online_cpus(); - - return ret; -} - -static const struct file_operations microcode_fops = { - .owner = THIS_MODULE, - .write = microcode_write, - .open = microcode_open, - .llseek = no_llseek, -}; - -static struct miscdevice microcode_dev = { - .minor = MICROCODE_MINOR, - .name = "microcode", - .nodename = "cpu/microcode", - .fops = µcode_fops, -}; - -static int __init microcode_dev_init(void) -{ - int error; - - error = misc_register(µcode_dev); - if (error) { - pr_err("can't misc_register on minor=%d\n", MICROCODE_MINOR); - return error; - } - - return 0; -} - -static void __exit microcode_dev_exit(void) -{ - misc_deregister(µcode_dev); -} - -MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); -MODULE_ALIAS("devname:cpu/microcode"); -#else -#define microcode_dev_init() 0 -#define microcode_dev_exit() do { } while (0) -#endif - -/* fake device for request_firmware */ -static struct platform_device *microcode_pdev; - -static int reload_for_cpu(int cpu) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - enum ucode_state ustate; - int err = 0; - - if (!uci->valid) - return err; - - ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, true); - if (ustate == UCODE_OK) - apply_microcode_on_target(cpu); - else - if (ustate == UCODE_ERROR) - err = -EINVAL; - return err; -} - -static ssize_t reload_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t size) -{ - unsigned long val; - int cpu; - ssize_t ret = 0, tmp_ret; - - ret = kstrtoul(buf, 0, &val); - if (ret) - return ret; - - if (val != 1) - return size; - - get_online_cpus(); - mutex_lock(µcode_mutex); - for_each_online_cpu(cpu) { - tmp_ret = reload_for_cpu(cpu); - if (tmp_ret != 0) - pr_warn("Error reloading microcode on CPU %d\n", cpu); - - /* save retval of the first encountered reload error */ - if (!ret) - ret = tmp_ret; - } - if (!ret) - perf_check_microcode(); - mutex_unlock(µcode_mutex); - put_online_cpus(); - - if (!ret) - ret = size; - - return ret; -} - -static ssize_t version_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; - - return sprintf(buf, "0x%x\n", uci->cpu_sig.rev); -} - -static ssize_t pf_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; - - return sprintf(buf, "0x%x\n", uci->cpu_sig.pf); -} - -static DEVICE_ATTR(reload, 0200, NULL, reload_store); -static DEVICE_ATTR(version, 0400, version_show, NULL); -static DEVICE_ATTR(processor_flags, 0400, pf_show, NULL); - -static struct attribute *mc_default_attrs[] = { - &dev_attr_version.attr, - &dev_attr_processor_flags.attr, - NULL -}; - -static struct attribute_group mc_attr_group = { - .attrs = mc_default_attrs, - .name = "microcode", -}; - -static void microcode_fini_cpu(int cpu) -{ - microcode_ops->microcode_fini_cpu(cpu); -} - -static enum ucode_state microcode_resume_cpu(int cpu) -{ - pr_debug("CPU%d updated upon resume\n", cpu); - - if (apply_microcode_on_target(cpu)) - return UCODE_ERROR; - - return UCODE_OK; -} - -static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw) -{ - enum ucode_state ustate; - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - if (uci && uci->valid) - return UCODE_OK; - - if (collect_cpu_info(cpu)) - return UCODE_ERROR; - - /* --dimm. Trigger a delayed update? */ - if (system_state != SYSTEM_RUNNING) - return UCODE_NFOUND; - - ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, - refresh_fw); - - if (ustate == UCODE_OK) { - pr_debug("CPU%d updated upon init\n", cpu); - apply_microcode_on_target(cpu); - } - - return ustate; -} - -static enum ucode_state microcode_update_cpu(int cpu) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - if (uci->valid) - return microcode_resume_cpu(cpu); - - return microcode_init_cpu(cpu, false); -} - -static int mc_device_add(struct device *dev, struct subsys_interface *sif) -{ - int err, cpu = dev->id; - - if (!cpu_online(cpu)) - return 0; - - pr_debug("CPU%d added\n", cpu); - - err = sysfs_create_group(&dev->kobj, &mc_attr_group); - if (err) - return err; - - if (microcode_init_cpu(cpu, true) == UCODE_ERROR) - return -EINVAL; - - return err; -} - -static int mc_device_remove(struct device *dev, struct subsys_interface *sif) -{ - int cpu = dev->id; - - if (!cpu_online(cpu)) - return 0; - - pr_debug("CPU%d removed\n", cpu); - microcode_fini_cpu(cpu); - sysfs_remove_group(&dev->kobj, &mc_attr_group); - return 0; -} - -static struct subsys_interface mc_cpu_interface = { - .name = "microcode", - .subsys = &cpu_subsys, - .add_dev = mc_device_add, - .remove_dev = mc_device_remove, -}; - -/** - * mc_bp_resume - Update boot CPU microcode during resume. - */ -static void mc_bp_resume(void) -{ - int cpu = smp_processor_id(); - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - if (uci->valid && uci->mc) - microcode_ops->apply_microcode(cpu); -} - -static struct syscore_ops mc_syscore_ops = { - .resume = mc_bp_resume, -}; - -static int -mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) -{ - unsigned int cpu = (unsigned long)hcpu; - struct device *dev; - - dev = get_cpu_device(cpu); - - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_ONLINE: - microcode_update_cpu(cpu); - pr_debug("CPU%d added\n", cpu); - /* - * "break" is missing on purpose here because we want to fall - * through in order to create the sysfs group. - */ - - case CPU_DOWN_FAILED: - if (sysfs_create_group(&dev->kobj, &mc_attr_group)) - pr_err("Failed to create group for CPU%d\n", cpu); - break; - - case CPU_DOWN_PREPARE: - /* Suspend is in progress, only remove the interface */ - sysfs_remove_group(&dev->kobj, &mc_attr_group); - pr_debug("CPU%d removed\n", cpu); - break; - - /* - * case CPU_DEAD: - * - * When a CPU goes offline, don't free up or invalidate the copy of - * the microcode in kernel memory, so that we can reuse it when the - * CPU comes back online without unnecessarily requesting the userspace - * for it again. - */ - } - - /* The CPU refused to come up during a system resume */ - if (action == CPU_UP_CANCELED_FROZEN) - microcode_fini_cpu(cpu); - - return NOTIFY_OK; -} - -static struct notifier_block __refdata mc_cpu_notifier = { - .notifier_call = mc_cpu_callback, -}; - -#ifdef MODULE -/* Autoload on Intel and AMD systems */ -static const struct x86_cpu_id __initconst microcode_id[] = { -#ifdef CONFIG_MICROCODE_INTEL - { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, }, -#endif -#ifdef CONFIG_MICROCODE_AMD - { X86_VENDOR_AMD, X86_FAMILY_ANY, X86_MODEL_ANY, }, -#endif - {} -}; -MODULE_DEVICE_TABLE(x86cpu, microcode_id); -#endif - -static struct attribute *cpu_root_microcode_attrs[] = { - &dev_attr_reload.attr, - NULL -}; - -static struct attribute_group cpu_root_microcode_group = { - .name = "microcode", - .attrs = cpu_root_microcode_attrs, -}; - -static int __init microcode_init(void) -{ - struct cpuinfo_x86 *c = &cpu_data(0); - int error; - - if (c->x86_vendor == X86_VENDOR_INTEL) - microcode_ops = init_intel_microcode(); - else if (c->x86_vendor == X86_VENDOR_AMD) - microcode_ops = init_amd_microcode(); - else - pr_err("no support for this CPU vendor\n"); - - if (!microcode_ops) - return -ENODEV; - - microcode_pdev = platform_device_register_simple("microcode", -1, - NULL, 0); - if (IS_ERR(microcode_pdev)) - return PTR_ERR(microcode_pdev); - - get_online_cpus(); - mutex_lock(µcode_mutex); - - error = subsys_interface_register(&mc_cpu_interface); - if (!error) - perf_check_microcode(); - mutex_unlock(µcode_mutex); - put_online_cpus(); - - if (error) - goto out_pdev; - - error = sysfs_create_group(&cpu_subsys.dev_root->kobj, - &cpu_root_microcode_group); - - if (error) { - pr_err("Error creating microcode group!\n"); - goto out_driver; - } - - error = microcode_dev_init(); - if (error) - goto out_ucode_group; - - register_syscore_ops(&mc_syscore_ops); - register_hotcpu_notifier(&mc_cpu_notifier); - - pr_info("Microcode Update Driver: v" MICROCODE_VERSION - " , Peter Oruba\n"); - - return 0; - - out_ucode_group: - sysfs_remove_group(&cpu_subsys.dev_root->kobj, - &cpu_root_microcode_group); - - out_driver: - get_online_cpus(); - mutex_lock(µcode_mutex); - - subsys_interface_unregister(&mc_cpu_interface); - - mutex_unlock(µcode_mutex); - put_online_cpus(); - - out_pdev: - platform_device_unregister(microcode_pdev); - return error; - -} -module_init(microcode_init); - -static void __exit microcode_exit(void) -{ - struct cpuinfo_x86 *c = &cpu_data(0); - - microcode_dev_exit(); - - unregister_hotcpu_notifier(&mc_cpu_notifier); - unregister_syscore_ops(&mc_syscore_ops); - - sysfs_remove_group(&cpu_subsys.dev_root->kobj, - &cpu_root_microcode_group); - - get_online_cpus(); - mutex_lock(µcode_mutex); - - subsys_interface_unregister(&mc_cpu_interface); - - mutex_unlock(µcode_mutex); - put_online_cpus(); - - platform_device_unregister(microcode_pdev); - - microcode_ops = NULL; - - if (c->x86_vendor == X86_VENDOR_AMD) - exit_amd_microcode(); - - pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n"); -} -module_exit(microcode_exit); diff --git a/arch/x86/kernel/microcode_core_early.c b/arch/x86/kernel/microcode_core_early.c deleted file mode 100644 index be7f8514f577..000000000000 --- a/arch/x86/kernel/microcode_core_early.c +++ /dev/null @@ -1,141 +0,0 @@ -/* - * X86 CPU microcode early update for Linux - * - * Copyright (C) 2012 Fenghua Yu - * H Peter Anvin" - * - * This driver allows to early upgrade microcode on Intel processors - * belonging to IA-32 family - PentiumPro, Pentium II, - * Pentium III, Xeon, Pentium 4, etc. - * - * Reference: Section 9.11 of Volume 3, IA-32 Intel Architecture - * Software Developer's Manual. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#include -#include -#include -#include - -#define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24)) -#define CPUID_INTEL1 QCHAR('G', 'e', 'n', 'u') -#define CPUID_INTEL2 QCHAR('i', 'n', 'e', 'I') -#define CPUID_INTEL3 QCHAR('n', 't', 'e', 'l') -#define CPUID_AMD1 QCHAR('A', 'u', 't', 'h') -#define CPUID_AMD2 QCHAR('e', 'n', 't', 'i') -#define CPUID_AMD3 QCHAR('c', 'A', 'M', 'D') - -#define CPUID_IS(a, b, c, ebx, ecx, edx) \ - (!((ebx ^ (a))|(edx ^ (b))|(ecx ^ (c)))) - -/* - * In early loading microcode phase on BSP, boot_cpu_data is not set up yet. - * x86_vendor() gets vendor id for BSP. - * - * In 32 bit AP case, accessing boot_cpu_data needs linear address. To simplify - * coding, we still use x86_vendor() to get vendor id for AP. - * - * x86_vendor() gets vendor information directly through cpuid. - */ -static int x86_vendor(void) -{ - u32 eax = 0x00000000; - u32 ebx, ecx = 0, edx; - - native_cpuid(&eax, &ebx, &ecx, &edx); - - if (CPUID_IS(CPUID_INTEL1, CPUID_INTEL2, CPUID_INTEL3, ebx, ecx, edx)) - return X86_VENDOR_INTEL; - - if (CPUID_IS(CPUID_AMD1, CPUID_AMD2, CPUID_AMD3, ebx, ecx, edx)) - return X86_VENDOR_AMD; - - return X86_VENDOR_UNKNOWN; -} - -static int x86_family(void) -{ - u32 eax = 0x00000001; - u32 ebx, ecx = 0, edx; - int x86; - - native_cpuid(&eax, &ebx, &ecx, &edx); - - x86 = (eax >> 8) & 0xf; - if (x86 == 15) - x86 += (eax >> 20) & 0xff; - - return x86; -} - -void __init load_ucode_bsp(void) -{ - int vendor, x86; - - if (!have_cpuid_p()) - return; - - vendor = x86_vendor(); - x86 = x86_family(); - - switch (vendor) { - case X86_VENDOR_INTEL: - if (x86 >= 6) - load_ucode_intel_bsp(); - break; - case X86_VENDOR_AMD: - if (x86 >= 0x10) - load_ucode_amd_bsp(); - break; - default: - break; - } -} - -void load_ucode_ap(void) -{ - int vendor, x86; - - if (!have_cpuid_p()) - return; - - vendor = x86_vendor(); - x86 = x86_family(); - - switch (vendor) { - case X86_VENDOR_INTEL: - if (x86 >= 6) - load_ucode_intel_ap(); - break; - case X86_VENDOR_AMD: - if (x86 >= 0x10) - load_ucode_amd_ap(); - break; - default: - break; - } -} - -int __init save_microcode_in_initrd(void) -{ - struct cpuinfo_x86 *c = &boot_cpu_data; - - switch (c->x86_vendor) { - case X86_VENDOR_INTEL: - if (c->x86 >= 6) - save_microcode_in_initrd_intel(); - break; - case X86_VENDOR_AMD: - if (c->x86 >= 0x10) - save_microcode_in_initrd_amd(); - break; - default: - break; - } - - return 0; -} diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c deleted file mode 100644 index 5fb2cebf556b..000000000000 --- a/arch/x86/kernel/microcode_intel.c +++ /dev/null @@ -1,333 +0,0 @@ -/* - * Intel CPU Microcode Update Driver for Linux - * - * Copyright (C) 2000-2006 Tigran Aivazian - * 2006 Shaohua Li - * - * This driver allows to upgrade microcode on Intel processors - * belonging to IA-32 family - PentiumPro, Pentium II, - * Pentium III, Xeon, Pentium 4, etc. - * - * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture - * Software Developer's Manual - * Order Number 253668 or free download from: - * - * http://developer.intel.com/Assets/PDF/manual/253668.pdf - * - * For more information, go to http://www.urbanmyth.org/microcode - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * 1.0 16 Feb 2000, Tigran Aivazian - * Initial release. - * 1.01 18 Feb 2000, Tigran Aivazian - * Added read() support + cleanups. - * 1.02 21 Feb 2000, Tigran Aivazian - * Added 'device trimming' support. open(O_WRONLY) zeroes - * and frees the saved copy of applied microcode. - * 1.03 29 Feb 2000, Tigran Aivazian - * Made to use devfs (/dev/cpu/microcode) + cleanups. - * 1.04 06 Jun 2000, Simon Trimmer - * Added misc device support (now uses both devfs and misc). - * Added MICROCODE_IOCFREE ioctl to clear memory. - * 1.05 09 Jun 2000, Simon Trimmer - * Messages for error cases (non Intel & no suitable microcode). - * 1.06 03 Aug 2000, Tigran Aivazian - * Removed ->release(). Removed exclusive open and status bitmap. - * Added microcode_rwsem to serialize read()/write()/ioctl(). - * Removed global kernel lock usage. - * 1.07 07 Sep 2000, Tigran Aivazian - * Write 0 to 0x8B msr and then cpuid before reading revision, - * so that it works even if there were no update done by the - * BIOS. Otherwise, reading from 0x8B gives junk (which happened - * to be 0 on my machine which is why it worked even when I - * disabled update by the BIOS) - * Thanks to Eric W. Biederman for the fix. - * 1.08 11 Dec 2000, Richard Schaal and - * Tigran Aivazian - * Intel Pentium 4 processor support and bugfixes. - * 1.09 30 Oct 2001, Tigran Aivazian - * Bugfix for HT (Hyper-Threading) enabled processors - * whereby processor resources are shared by all logical processors - * in a single CPU package. - * 1.10 28 Feb 2002 Asit K Mallick and - * Tigran Aivazian , - * Serialize updates as required on HT processors due to - * speculative nature of implementation. - * 1.11 22 Mar 2002 Tigran Aivazian - * Fix the panic when writing zero-length microcode chunk. - * 1.12 29 Sep 2003 Nitin Kamble , - * Jun Nakajima - * Support for the microcode updates in the new format. - * 1.13 10 Oct 2003 Tigran Aivazian - * Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl - * because we no longer hold a copy of applied microcode - * in kernel memory. - * 1.14 25 Jun 2004 Tigran Aivazian - * Fix sigmatch() macro to handle old CPUs with pf == 0. - * Thanks to Stuart Swales for pointing out this bug. - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include - -#include -#include -#include - -MODULE_DESCRIPTION("Microcode Update Driver"); -MODULE_AUTHOR("Tigran Aivazian "); -MODULE_LICENSE("GPL"); - -static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) -{ - struct cpuinfo_x86 *c = &cpu_data(cpu_num); - unsigned int val[2]; - - memset(csig, 0, sizeof(*csig)); - - csig->sig = cpuid_eax(0x00000001); - - if ((c->x86_model >= 5) || (c->x86 > 6)) { - /* get processor flags from MSR 0x17 */ - rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); - csig->pf = 1 << ((val[1] >> 18) & 7); - } - - csig->rev = c->microcode; - pr_info("CPU%d sig=0x%x, pf=0x%x, revision=0x%x\n", - cpu_num, csig->sig, csig->pf, csig->rev); - - return 0; -} - -/* - * return 0 - no update found - * return 1 - found update - */ -static int get_matching_mc(struct microcode_intel *mc_intel, int cpu) -{ - struct cpu_signature cpu_sig; - unsigned int csig, cpf, crev; - - collect_cpu_info(cpu, &cpu_sig); - - csig = cpu_sig.sig; - cpf = cpu_sig.pf; - crev = cpu_sig.rev; - - return get_matching_microcode(csig, cpf, mc_intel, crev); -} - -int apply_microcode(int cpu) -{ - struct microcode_intel *mc_intel; - struct ucode_cpu_info *uci; - unsigned int val[2]; - int cpu_num = raw_smp_processor_id(); - struct cpuinfo_x86 *c = &cpu_data(cpu_num); - - uci = ucode_cpu_info + cpu; - mc_intel = uci->mc; - - /* We should bind the task to the CPU */ - BUG_ON(cpu_num != cpu); - - if (mc_intel == NULL) - return 0; - - /* - * Microcode on this CPU could be updated earlier. Only apply the - * microcode patch in mc_intel when it is newer than the one on this - * CPU. - */ - if (get_matching_mc(mc_intel, cpu) == 0) - return 0; - - /* write microcode via MSR 0x79 */ - wrmsr(MSR_IA32_UCODE_WRITE, - (unsigned long) mc_intel->bits, - (unsigned long) mc_intel->bits >> 16 >> 16); - wrmsr(MSR_IA32_UCODE_REV, 0, 0); - - /* As documented in the SDM: Do a CPUID 1 here */ - sync_core(); - - /* get the current revision from MSR 0x8B */ - rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); - - if (val[1] != mc_intel->hdr.rev) { - pr_err("CPU%d update to revision 0x%x failed\n", - cpu_num, mc_intel->hdr.rev); - return -1; - } - pr_info("CPU%d updated to revision 0x%x, date = %04x-%02x-%02x\n", - cpu_num, val[1], - mc_intel->hdr.date & 0xffff, - mc_intel->hdr.date >> 24, - (mc_intel->hdr.date >> 16) & 0xff); - - uci->cpu_sig.rev = val[1]; - c->microcode = val[1]; - - return 0; -} - -static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, - int (*get_ucode_data)(void *, const void *, size_t)) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - u8 *ucode_ptr = data, *new_mc = NULL, *mc = NULL; - int new_rev = uci->cpu_sig.rev; - unsigned int leftover = size; - enum ucode_state state = UCODE_OK; - unsigned int curr_mc_size = 0; - unsigned int csig, cpf; - - while (leftover) { - struct microcode_header_intel mc_header; - unsigned int mc_size; - - if (get_ucode_data(&mc_header, ucode_ptr, sizeof(mc_header))) - break; - - mc_size = get_totalsize(&mc_header); - if (!mc_size || mc_size > leftover) { - pr_err("error! Bad data in microcode data file\n"); - break; - } - - /* For performance reasons, reuse mc area when possible */ - if (!mc || mc_size > curr_mc_size) { - vfree(mc); - mc = vmalloc(mc_size); - if (!mc) - break; - curr_mc_size = mc_size; - } - - if (get_ucode_data(mc, ucode_ptr, mc_size) || - microcode_sanity_check(mc, 1) < 0) { - break; - } - - csig = uci->cpu_sig.sig; - cpf = uci->cpu_sig.pf; - if (get_matching_microcode(csig, cpf, mc, new_rev)) { - vfree(new_mc); - new_rev = mc_header.rev; - new_mc = mc; - mc = NULL; /* trigger new vmalloc */ - } - - ucode_ptr += mc_size; - leftover -= mc_size; - } - - vfree(mc); - - if (leftover) { - vfree(new_mc); - state = UCODE_ERROR; - goto out; - } - - if (!new_mc) { - state = UCODE_NFOUND; - goto out; - } - - vfree(uci->mc); - uci->mc = (struct microcode_intel *)new_mc; - - /* - * If early loading microcode is supported, save this mc into - * permanent memory. So it will be loaded early when a CPU is hot added - * or resumes. - */ - save_mc_for_early(new_mc); - - pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", - cpu, new_rev, uci->cpu_sig.rev); -out: - return state; -} - -static int get_ucode_fw(void *to, const void *from, size_t n) -{ - memcpy(to, from, n); - return 0; -} - -static enum ucode_state request_microcode_fw(int cpu, struct device *device, - bool refresh_fw) -{ - char name[30]; - struct cpuinfo_x86 *c = &cpu_data(cpu); - const struct firmware *firmware; - enum ucode_state ret; - - sprintf(name, "intel-ucode/%02x-%02x-%02x", - c->x86, c->x86_model, c->x86_mask); - - if (request_firmware(&firmware, name, device)) { - pr_debug("data file %s load failed\n", name); - return UCODE_NFOUND; - } - - ret = generic_load_microcode(cpu, (void *)firmware->data, - firmware->size, &get_ucode_fw); - - release_firmware(firmware); - - return ret; -} - -static int get_ucode_user(void *to, const void *from, size_t n) -{ - return copy_from_user(to, from, n); -} - -static enum ucode_state -request_microcode_user(int cpu, const void __user *buf, size_t size) -{ - return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user); -} - -static void microcode_fini_cpu(int cpu) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - vfree(uci->mc); - uci->mc = NULL; -} - -static struct microcode_ops microcode_intel_ops = { - .request_microcode_user = request_microcode_user, - .request_microcode_fw = request_microcode_fw, - .collect_cpu_info = collect_cpu_info, - .apply_microcode = apply_microcode, - .microcode_fini_cpu = microcode_fini_cpu, -}; - -struct microcode_ops * __init init_intel_microcode(void) -{ - struct cpuinfo_x86 *c = &cpu_data(0); - - if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || - cpu_has(c, X86_FEATURE_IA64)) { - pr_err("Intel CPU family 0x%x not supported\n", c->x86); - return NULL; - } - - return µcode_intel_ops; -} - diff --git a/arch/x86/kernel/microcode_intel_early.c b/arch/x86/kernel/microcode_intel_early.c deleted file mode 100644 index 18f739129e72..000000000000 --- a/arch/x86/kernel/microcode_intel_early.c +++ /dev/null @@ -1,787 +0,0 @@ -/* - * Intel CPU microcode early update for Linux - * - * Copyright (C) 2012 Fenghua Yu - * H Peter Anvin" - * - * This allows to early upgrade microcode on Intel processors - * belonging to IA-32 family - PentiumPro, Pentium II, - * Pentium III, Xeon, Pentium 4, etc. - * - * Reference: Section 9.11 of Volume 3, IA-32 Intel Architecture - * Software Developer's Manual. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -unsigned long mc_saved_in_initrd[MAX_UCODE_COUNT]; -struct mc_saved_data { - unsigned int mc_saved_count; - struct microcode_intel **mc_saved; -} mc_saved_data; - -static enum ucode_state -generic_load_microcode_early(struct microcode_intel **mc_saved_p, - unsigned int mc_saved_count, - struct ucode_cpu_info *uci) -{ - struct microcode_intel *ucode_ptr, *new_mc = NULL; - int new_rev = uci->cpu_sig.rev; - enum ucode_state state = UCODE_OK; - unsigned int mc_size; - struct microcode_header_intel *mc_header; - unsigned int csig = uci->cpu_sig.sig; - unsigned int cpf = uci->cpu_sig.pf; - int i; - - for (i = 0; i < mc_saved_count; i++) { - ucode_ptr = mc_saved_p[i]; - - mc_header = (struct microcode_header_intel *)ucode_ptr; - mc_size = get_totalsize(mc_header); - if (get_matching_microcode(csig, cpf, ucode_ptr, new_rev)) { - new_rev = mc_header->rev; - new_mc = ucode_ptr; - } - } - - if (!new_mc) { - state = UCODE_NFOUND; - goto out; - } - - uci->mc = (struct microcode_intel *)new_mc; -out: - return state; -} - -static void -microcode_pointer(struct microcode_intel **mc_saved, - unsigned long *mc_saved_in_initrd, - unsigned long initrd_start, int mc_saved_count) -{ - int i; - - for (i = 0; i < mc_saved_count; i++) - mc_saved[i] = (struct microcode_intel *) - (mc_saved_in_initrd[i] + initrd_start); -} - -#ifdef CONFIG_X86_32 -static void -microcode_phys(struct microcode_intel **mc_saved_tmp, - struct mc_saved_data *mc_saved_data) -{ - int i; - struct microcode_intel ***mc_saved; - - mc_saved = (struct microcode_intel ***) - __pa_nodebug(&mc_saved_data->mc_saved); - for (i = 0; i < mc_saved_data->mc_saved_count; i++) { - struct microcode_intel *p; - - p = *(struct microcode_intel **) - __pa_nodebug(mc_saved_data->mc_saved + i); - mc_saved_tmp[i] = (struct microcode_intel *)__pa_nodebug(p); - } -} -#endif - -static enum ucode_state -load_microcode(struct mc_saved_data *mc_saved_data, - unsigned long *mc_saved_in_initrd, - unsigned long initrd_start, - struct ucode_cpu_info *uci) -{ - struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; - unsigned int count = mc_saved_data->mc_saved_count; - - if (!mc_saved_data->mc_saved) { - microcode_pointer(mc_saved_tmp, mc_saved_in_initrd, - initrd_start, count); - - return generic_load_microcode_early(mc_saved_tmp, count, uci); - } else { -#ifdef CONFIG_X86_32 - microcode_phys(mc_saved_tmp, mc_saved_data); - return generic_load_microcode_early(mc_saved_tmp, count, uci); -#else - return generic_load_microcode_early(mc_saved_data->mc_saved, - count, uci); -#endif - } -} - -static u8 get_x86_family(unsigned long sig) -{ - u8 x86; - - x86 = (sig >> 8) & 0xf; - - if (x86 == 0xf) - x86 += (sig >> 20) & 0xff; - - return x86; -} - -static u8 get_x86_model(unsigned long sig) -{ - u8 x86, x86_model; - - x86 = get_x86_family(sig); - x86_model = (sig >> 4) & 0xf; - - if (x86 == 0x6 || x86 == 0xf) - x86_model += ((sig >> 16) & 0xf) << 4; - - return x86_model; -} - -/* - * Given CPU signature and a microcode patch, this function finds if the - * microcode patch has matching family and model with the CPU. - */ -static enum ucode_state -matching_model_microcode(struct microcode_header_intel *mc_header, - unsigned long sig) -{ - u8 x86, x86_model; - u8 x86_ucode, x86_model_ucode; - struct extended_sigtable *ext_header; - unsigned long total_size = get_totalsize(mc_header); - unsigned long data_size = get_datasize(mc_header); - int ext_sigcount, i; - struct extended_signature *ext_sig; - - x86 = get_x86_family(sig); - x86_model = get_x86_model(sig); - - x86_ucode = get_x86_family(mc_header->sig); - x86_model_ucode = get_x86_model(mc_header->sig); - - if (x86 == x86_ucode && x86_model == x86_model_ucode) - return UCODE_OK; - - /* Look for ext. headers: */ - if (total_size <= data_size + MC_HEADER_SIZE) - return UCODE_NFOUND; - - ext_header = (struct extended_sigtable *) - mc_header + data_size + MC_HEADER_SIZE; - ext_sigcount = ext_header->count; - ext_sig = (void *)ext_header + EXT_HEADER_SIZE; - - for (i = 0; i < ext_sigcount; i++) { - x86_ucode = get_x86_family(ext_sig->sig); - x86_model_ucode = get_x86_model(ext_sig->sig); - - if (x86 == x86_ucode && x86_model == x86_model_ucode) - return UCODE_OK; - - ext_sig++; - } - - return UCODE_NFOUND; -} - -static int -save_microcode(struct mc_saved_data *mc_saved_data, - struct microcode_intel **mc_saved_src, - unsigned int mc_saved_count) -{ - int i, j; - struct microcode_intel **mc_saved_p; - int ret; - - if (!mc_saved_count) - return -EINVAL; - - /* - * Copy new microcode data. - */ - mc_saved_p = kmalloc(mc_saved_count*sizeof(struct microcode_intel *), - GFP_KERNEL); - if (!mc_saved_p) - return -ENOMEM; - - for (i = 0; i < mc_saved_count; i++) { - struct microcode_intel *mc = mc_saved_src[i]; - struct microcode_header_intel *mc_header = &mc->hdr; - unsigned long mc_size = get_totalsize(mc_header); - mc_saved_p[i] = kmalloc(mc_size, GFP_KERNEL); - if (!mc_saved_p[i]) { - ret = -ENOMEM; - goto err; - } - if (!mc_saved_src[i]) { - ret = -EINVAL; - goto err; - } - memcpy(mc_saved_p[i], mc, mc_size); - } - - /* - * Point to newly saved microcode. - */ - mc_saved_data->mc_saved = mc_saved_p; - mc_saved_data->mc_saved_count = mc_saved_count; - - return 0; - -err: - for (j = 0; j <= i; j++) - kfree(mc_saved_p[j]); - kfree(mc_saved_p); - - return ret; -} - -/* - * A microcode patch in ucode_ptr is saved into mc_saved - * - if it has matching signature and newer revision compared to an existing - * patch mc_saved. - * - or if it is a newly discovered microcode patch. - * - * The microcode patch should have matching model with CPU. - */ -static void _save_mc(struct microcode_intel **mc_saved, u8 *ucode_ptr, - unsigned int *mc_saved_count_p) -{ - int i; - int found = 0; - unsigned int mc_saved_count = *mc_saved_count_p; - struct microcode_header_intel *mc_header; - - mc_header = (struct microcode_header_intel *)ucode_ptr; - for (i = 0; i < mc_saved_count; i++) { - unsigned int sig, pf; - unsigned int new_rev; - struct microcode_header_intel *mc_saved_header = - (struct microcode_header_intel *)mc_saved[i]; - sig = mc_saved_header->sig; - pf = mc_saved_header->pf; - new_rev = mc_header->rev; - - if (get_matching_sig(sig, pf, ucode_ptr, new_rev)) { - found = 1; - if (update_match_revision(mc_header, new_rev)) { - /* - * Found an older ucode saved before. - * Replace the older one with this newer - * one. - */ - mc_saved[i] = - (struct microcode_intel *)ucode_ptr; - break; - } - } - } - if (i >= mc_saved_count && !found) - /* - * This ucode is first time discovered in ucode file. - * Save it to memory. - */ - mc_saved[mc_saved_count++] = - (struct microcode_intel *)ucode_ptr; - - *mc_saved_count_p = mc_saved_count; -} - -/* - * Get microcode matching with BSP's model. Only CPUs with the same model as - * BSP can stay in the platform. - */ -static enum ucode_state __init -get_matching_model_microcode(int cpu, unsigned long start, - void *data, size_t size, - struct mc_saved_data *mc_saved_data, - unsigned long *mc_saved_in_initrd, - struct ucode_cpu_info *uci) -{ - u8 *ucode_ptr = data; - unsigned int leftover = size; - enum ucode_state state = UCODE_OK; - unsigned int mc_size; - struct microcode_header_intel *mc_header; - struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; - unsigned int mc_saved_count = mc_saved_data->mc_saved_count; - int i; - - while (leftover) { - mc_header = (struct microcode_header_intel *)ucode_ptr; - - mc_size = get_totalsize(mc_header); - if (!mc_size || mc_size > leftover || - microcode_sanity_check(ucode_ptr, 0) < 0) - break; - - leftover -= mc_size; - - /* - * Since APs with same family and model as the BSP may boot in - * the platform, we need to find and save microcode patches - * with the same family and model as the BSP. - */ - if (matching_model_microcode(mc_header, uci->cpu_sig.sig) != - UCODE_OK) { - ucode_ptr += mc_size; - continue; - } - - _save_mc(mc_saved_tmp, ucode_ptr, &mc_saved_count); - - ucode_ptr += mc_size; - } - - if (leftover) { - state = UCODE_ERROR; - goto out; - } - - if (mc_saved_count == 0) { - state = UCODE_NFOUND; - goto out; - } - - for (i = 0; i < mc_saved_count; i++) - mc_saved_in_initrd[i] = (unsigned long)mc_saved_tmp[i] - start; - - mc_saved_data->mc_saved_count = mc_saved_count; -out: - return state; -} - -static int collect_cpu_info_early(struct ucode_cpu_info *uci) -{ - unsigned int val[2]; - u8 x86, x86_model; - struct cpu_signature csig; - unsigned int eax, ebx, ecx, edx; - - csig.sig = 0; - csig.pf = 0; - csig.rev = 0; - - memset(uci, 0, sizeof(*uci)); - - eax = 0x00000001; - ecx = 0; - native_cpuid(&eax, &ebx, &ecx, &edx); - csig.sig = eax; - - x86 = get_x86_family(csig.sig); - x86_model = get_x86_model(csig.sig); - - if ((x86_model >= 5) || (x86 > 6)) { - /* get processor flags from MSR 0x17 */ - native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); - csig.pf = 1 << ((val[1] >> 18) & 7); - } - native_wrmsr(MSR_IA32_UCODE_REV, 0, 0); - - /* As documented in the SDM: Do a CPUID 1 here */ - sync_core(); - - /* get the current revision from MSR 0x8B */ - native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); - - csig.rev = val[1]; - - uci->cpu_sig = csig; - uci->valid = 1; - - return 0; -} - -#ifdef DEBUG -static void __ref show_saved_mc(void) -{ - int i, j; - unsigned int sig, pf, rev, total_size, data_size, date; - struct ucode_cpu_info uci; - - if (mc_saved_data.mc_saved_count == 0) { - pr_debug("no micorcode data saved.\n"); - return; - } - pr_debug("Total microcode saved: %d\n", mc_saved_data.mc_saved_count); - - collect_cpu_info_early(&uci); - - sig = uci.cpu_sig.sig; - pf = uci.cpu_sig.pf; - rev = uci.cpu_sig.rev; - pr_debug("CPU%d: sig=0x%x, pf=0x%x, rev=0x%x\n", - smp_processor_id(), sig, pf, rev); - - for (i = 0; i < mc_saved_data.mc_saved_count; i++) { - struct microcode_header_intel *mc_saved_header; - struct extended_sigtable *ext_header; - int ext_sigcount; - struct extended_signature *ext_sig; - - mc_saved_header = (struct microcode_header_intel *) - mc_saved_data.mc_saved[i]; - sig = mc_saved_header->sig; - pf = mc_saved_header->pf; - rev = mc_saved_header->rev; - total_size = get_totalsize(mc_saved_header); - data_size = get_datasize(mc_saved_header); - date = mc_saved_header->date; - - pr_debug("mc_saved[%d]: sig=0x%x, pf=0x%x, rev=0x%x, toal size=0x%x, date = %04x-%02x-%02x\n", - i, sig, pf, rev, total_size, - date & 0xffff, - date >> 24, - (date >> 16) & 0xff); - - /* Look for ext. headers: */ - if (total_size <= data_size + MC_HEADER_SIZE) - continue; - - ext_header = (struct extended_sigtable *) - mc_saved_header + data_size + MC_HEADER_SIZE; - ext_sigcount = ext_header->count; - ext_sig = (void *)ext_header + EXT_HEADER_SIZE; - - for (j = 0; j < ext_sigcount; j++) { - sig = ext_sig->sig; - pf = ext_sig->pf; - - pr_debug("\tExtended[%d]: sig=0x%x, pf=0x%x\n", - j, sig, pf); - - ext_sig++; - } - - } -} -#else -static inline void show_saved_mc(void) -{ -} -#endif - -#if defined(CONFIG_MICROCODE_INTEL_EARLY) && defined(CONFIG_HOTPLUG_CPU) -static DEFINE_MUTEX(x86_cpu_microcode_mutex); -/* - * Save this mc into mc_saved_data. So it will be loaded early when a CPU is - * hot added or resumes. - * - * Please make sure this mc should be a valid microcode patch before calling - * this function. - */ -int save_mc_for_early(u8 *mc) -{ - struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; - unsigned int mc_saved_count_init; - unsigned int mc_saved_count; - struct microcode_intel **mc_saved; - int ret = 0; - int i; - - /* - * Hold hotplug lock so mc_saved_data is not accessed by a CPU in - * hotplug. - */ - mutex_lock(&x86_cpu_microcode_mutex); - - mc_saved_count_init = mc_saved_data.mc_saved_count; - mc_saved_count = mc_saved_data.mc_saved_count; - mc_saved = mc_saved_data.mc_saved; - - if (mc_saved && mc_saved_count) - memcpy(mc_saved_tmp, mc_saved, - mc_saved_count * sizeof(struct mirocode_intel *)); - /* - * Save the microcode patch mc in mc_save_tmp structure if it's a newer - * version. - */ - - _save_mc(mc_saved_tmp, mc, &mc_saved_count); - - /* - * Save the mc_save_tmp in global mc_saved_data. - */ - ret = save_microcode(&mc_saved_data, mc_saved_tmp, mc_saved_count); - if (ret) { - pr_err("Cannot save microcode patch.\n"); - goto out; - } - - show_saved_mc(); - - /* - * Free old saved microcod data. - */ - if (mc_saved) { - for (i = 0; i < mc_saved_count_init; i++) - kfree(mc_saved[i]); - kfree(mc_saved); - } - -out: - mutex_unlock(&x86_cpu_microcode_mutex); - - return ret; -} -EXPORT_SYMBOL_GPL(save_mc_for_early); -#endif - -static __initdata char ucode_name[] = "kernel/x86/microcode/GenuineIntel.bin"; -static __init enum ucode_state -scan_microcode(unsigned long start, unsigned long end, - struct mc_saved_data *mc_saved_data, - unsigned long *mc_saved_in_initrd, - struct ucode_cpu_info *uci) -{ - unsigned int size = end - start + 1; - struct cpio_data cd; - long offset = 0; -#ifdef CONFIG_X86_32 - char *p = (char *)__pa_nodebug(ucode_name); -#else - char *p = ucode_name; -#endif - - cd.data = NULL; - cd.size = 0; - - cd = find_cpio_data(p, (void *)start, size, &offset); - if (!cd.data) - return UCODE_ERROR; - - - return get_matching_model_microcode(0, start, cd.data, cd.size, - mc_saved_data, mc_saved_in_initrd, - uci); -} - -/* - * Print ucode update info. - */ -static void -print_ucode_info(struct ucode_cpu_info *uci, unsigned int date) -{ - int cpu = smp_processor_id(); - - pr_info("CPU%d microcode updated early to revision 0x%x, date = %04x-%02x-%02x\n", - cpu, - uci->cpu_sig.rev, - date & 0xffff, - date >> 24, - (date >> 16) & 0xff); -} - -#ifdef CONFIG_X86_32 - -static int delay_ucode_info; -static int current_mc_date; - -/* - * Print early updated ucode info after printk works. This is delayed info dump. - */ -void show_ucode_info_early(void) -{ - struct ucode_cpu_info uci; - - if (delay_ucode_info) { - collect_cpu_info_early(&uci); - print_ucode_info(&uci, current_mc_date); - delay_ucode_info = 0; - } -} - -/* - * At this point, we can not call printk() yet. Keep microcode patch number in - * mc_saved_data.mc_saved and delay printing microcode info in - * show_ucode_info_early() until printk() works. - */ -static void print_ucode(struct ucode_cpu_info *uci) -{ - struct microcode_intel *mc_intel; - int *delay_ucode_info_p; - int *current_mc_date_p; - - mc_intel = uci->mc; - if (mc_intel == NULL) - return; - - delay_ucode_info_p = (int *)__pa_nodebug(&delay_ucode_info); - current_mc_date_p = (int *)__pa_nodebug(¤t_mc_date); - - *delay_ucode_info_p = 1; - *current_mc_date_p = mc_intel->hdr.date; -} -#else - -/* - * Flush global tlb. We only do this in x86_64 where paging has been enabled - * already and PGE should be enabled as well. - */ -static inline void flush_tlb_early(void) -{ - __native_flush_tlb_global_irq_disabled(); -} - -static inline void print_ucode(struct ucode_cpu_info *uci) -{ - struct microcode_intel *mc_intel; - - mc_intel = uci->mc; - if (mc_intel == NULL) - return; - - print_ucode_info(uci, mc_intel->hdr.date); -} -#endif - -static int apply_microcode_early(struct mc_saved_data *mc_saved_data, - struct ucode_cpu_info *uci) -{ - struct microcode_intel *mc_intel; - unsigned int val[2]; - - mc_intel = uci->mc; - if (mc_intel == NULL) - return 0; - - /* write microcode via MSR 0x79 */ - native_wrmsr(MSR_IA32_UCODE_WRITE, - (unsigned long) mc_intel->bits, - (unsigned long) mc_intel->bits >> 16 >> 16); - native_wrmsr(MSR_IA32_UCODE_REV, 0, 0); - - /* As documented in the SDM: Do a CPUID 1 here */ - sync_core(); - - /* get the current revision from MSR 0x8B */ - native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); - if (val[1] != mc_intel->hdr.rev) - return -1; - -#ifdef CONFIG_X86_64 - /* Flush global tlb. This is precaution. */ - flush_tlb_early(); -#endif - uci->cpu_sig.rev = val[1]; - - print_ucode(uci); - - return 0; -} - -/* - * This function converts microcode patch offsets previously stored in - * mc_saved_in_initrd to pointers and stores the pointers in mc_saved_data. - */ -int __init save_microcode_in_initrd_intel(void) -{ - unsigned int count = mc_saved_data.mc_saved_count; - struct microcode_intel *mc_saved[MAX_UCODE_COUNT]; - int ret = 0; - - if (count == 0) - return ret; - - microcode_pointer(mc_saved, mc_saved_in_initrd, initrd_start, count); - ret = save_microcode(&mc_saved_data, mc_saved, count); - if (ret) - pr_err("Cannot save microcode patches from initrd.\n"); - - show_saved_mc(); - - return ret; -} - -static void __init -_load_ucode_intel_bsp(struct mc_saved_data *mc_saved_data, - unsigned long *mc_saved_in_initrd, - unsigned long initrd_start_early, - unsigned long initrd_end_early, - struct ucode_cpu_info *uci) -{ - collect_cpu_info_early(uci); - scan_microcode(initrd_start_early, initrd_end_early, mc_saved_data, - mc_saved_in_initrd, uci); - load_microcode(mc_saved_data, mc_saved_in_initrd, - initrd_start_early, uci); - apply_microcode_early(mc_saved_data, uci); -} - -void __init -load_ucode_intel_bsp(void) -{ - u64 ramdisk_image, ramdisk_size; - unsigned long initrd_start_early, initrd_end_early; - struct ucode_cpu_info uci; -#ifdef CONFIG_X86_32 - struct boot_params *boot_params_p; - - boot_params_p = (struct boot_params *)__pa_nodebug(&boot_params); - ramdisk_image = boot_params_p->hdr.ramdisk_image; - ramdisk_size = boot_params_p->hdr.ramdisk_size; - initrd_start_early = ramdisk_image; - initrd_end_early = initrd_start_early + ramdisk_size; - - _load_ucode_intel_bsp( - (struct mc_saved_data *)__pa_nodebug(&mc_saved_data), - (unsigned long *)__pa_nodebug(&mc_saved_in_initrd), - initrd_start_early, initrd_end_early, &uci); -#else - ramdisk_image = boot_params.hdr.ramdisk_image; - ramdisk_size = boot_params.hdr.ramdisk_size; - initrd_start_early = ramdisk_image + PAGE_OFFSET; - initrd_end_early = initrd_start_early + ramdisk_size; - - _load_ucode_intel_bsp(&mc_saved_data, mc_saved_in_initrd, - initrd_start_early, initrd_end_early, &uci); -#endif -} - -void load_ucode_intel_ap(void) -{ - struct mc_saved_data *mc_saved_data_p; - struct ucode_cpu_info uci; - unsigned long *mc_saved_in_initrd_p; - unsigned long initrd_start_addr; -#ifdef CONFIG_X86_32 - unsigned long *initrd_start_p; - - mc_saved_in_initrd_p = - (unsigned long *)__pa_nodebug(mc_saved_in_initrd); - mc_saved_data_p = (struct mc_saved_data *)__pa_nodebug(&mc_saved_data); - initrd_start_p = (unsigned long *)__pa_nodebug(&initrd_start); - initrd_start_addr = (unsigned long)__pa_nodebug(*initrd_start_p); -#else - mc_saved_data_p = &mc_saved_data; - mc_saved_in_initrd_p = mc_saved_in_initrd; - initrd_start_addr = initrd_start; -#endif - - /* - * If there is no valid ucode previously saved in memory, no need to - * update ucode on this AP. - */ - if (mc_saved_data_p->mc_saved_count == 0) - return; - - collect_cpu_info_early(&uci); - load_microcode(mc_saved_data_p, mc_saved_in_initrd_p, - initrd_start_addr, &uci); - apply_microcode_early(mc_saved_data_p, &uci); -} diff --git a/arch/x86/kernel/microcode_intel_lib.c b/arch/x86/kernel/microcode_intel_lib.c deleted file mode 100644 index ce69320d0179..000000000000 --- a/arch/x86/kernel/microcode_intel_lib.c +++ /dev/null @@ -1,174 +0,0 @@ -/* - * Intel CPU Microcode Update Driver for Linux - * - * Copyright (C) 2012 Fenghua Yu - * H Peter Anvin" - * - * This driver allows to upgrade microcode on Intel processors - * belonging to IA-32 family - PentiumPro, Pentium II, - * Pentium III, Xeon, Pentium 4, etc. - * - * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture - * Software Developer's Manual - * Order Number 253668 or free download from: - * - * http://developer.intel.com/Assets/PDF/manual/253668.pdf - * - * For more information, go to http://www.urbanmyth.org/microcode - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ -#include -#include -#include -#include - -#include -#include -#include - -static inline int -update_match_cpu(unsigned int csig, unsigned int cpf, - unsigned int sig, unsigned int pf) -{ - return (!sigmatch(sig, csig, pf, cpf)) ? 0 : 1; -} - -int -update_match_revision(struct microcode_header_intel *mc_header, int rev) -{ - return (mc_header->rev <= rev) ? 0 : 1; -} - -int microcode_sanity_check(void *mc, int print_err) -{ - unsigned long total_size, data_size, ext_table_size; - struct microcode_header_intel *mc_header = mc; - struct extended_sigtable *ext_header = NULL; - int sum, orig_sum, ext_sigcount = 0, i; - struct extended_signature *ext_sig; - - total_size = get_totalsize(mc_header); - data_size = get_datasize(mc_header); - - if (data_size + MC_HEADER_SIZE > total_size) { - if (print_err) - pr_err("error! Bad data size in microcode data file\n"); - return -EINVAL; - } - - if (mc_header->ldrver != 1 || mc_header->hdrver != 1) { - if (print_err) - pr_err("error! Unknown microcode update format\n"); - return -EINVAL; - } - ext_table_size = total_size - (MC_HEADER_SIZE + data_size); - if (ext_table_size) { - if ((ext_table_size < EXT_HEADER_SIZE) - || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) { - if (print_err) - pr_err("error! Small exttable size in microcode data file\n"); - return -EINVAL; - } - ext_header = mc + MC_HEADER_SIZE + data_size; - if (ext_table_size != exttable_size(ext_header)) { - if (print_err) - pr_err("error! Bad exttable size in microcode data file\n"); - return -EFAULT; - } - ext_sigcount = ext_header->count; - } - - /* check extended table checksum */ - if (ext_table_size) { - int ext_table_sum = 0; - int *ext_tablep = (int *)ext_header; - - i = ext_table_size / DWSIZE; - while (i--) - ext_table_sum += ext_tablep[i]; - if (ext_table_sum) { - if (print_err) - pr_warn("aborting, bad extended signature table checksum\n"); - return -EINVAL; - } - } - - /* calculate the checksum */ - orig_sum = 0; - i = (MC_HEADER_SIZE + data_size) / DWSIZE; - while (i--) - orig_sum += ((int *)mc)[i]; - if (orig_sum) { - if (print_err) - pr_err("aborting, bad checksum\n"); - return -EINVAL; - } - if (!ext_table_size) - return 0; - /* check extended signature checksum */ - for (i = 0; i < ext_sigcount; i++) { - ext_sig = (void *)ext_header + EXT_HEADER_SIZE + - EXT_SIGNATURE_SIZE * i; - sum = orig_sum - - (mc_header->sig + mc_header->pf + mc_header->cksum) - + (ext_sig->sig + ext_sig->pf + ext_sig->cksum); - if (sum) { - if (print_err) - pr_err("aborting, bad checksum\n"); - return -EINVAL; - } - } - return 0; -} -EXPORT_SYMBOL_GPL(microcode_sanity_check); - -/* - * return 0 - no update found - * return 1 - found update - */ -int get_matching_sig(unsigned int csig, int cpf, void *mc, int rev) -{ - struct microcode_header_intel *mc_header = mc; - struct extended_sigtable *ext_header; - unsigned long total_size = get_totalsize(mc_header); - int ext_sigcount, i; - struct extended_signature *ext_sig; - - if (update_match_cpu(csig, cpf, mc_header->sig, mc_header->pf)) - return 1; - - /* Look for ext. headers: */ - if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE) - return 0; - - ext_header = mc + get_datasize(mc_header) + MC_HEADER_SIZE; - ext_sigcount = ext_header->count; - ext_sig = (void *)ext_header + EXT_HEADER_SIZE; - - for (i = 0; i < ext_sigcount; i++) { - if (update_match_cpu(csig, cpf, ext_sig->sig, ext_sig->pf)) - return 1; - ext_sig++; - } - return 0; -} - -/* - * return 0 - no update found - * return 1 - found update - */ -int get_matching_microcode(unsigned int csig, int cpf, void *mc, int rev) -{ - struct microcode_header_intel *mc_header = mc; - - if (!update_match_revision(mc_header, rev)) - return 0; - - return get_matching_sig(csig, cpf, mc, rev); -} -EXPORT_SYMBOL_GPL(get_matching_microcode); -- cgit v1.2.3 From d139336700a5f3a560da235e4dfcd286773025d4 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 15 Jan 2014 12:52:15 +0100 Subject: x86, cpu, amd: Fix a shadowed variable situation Having u32 and struct cpuinfo_x86 * by the same name is not very smart, although it was ok in this case due to the limited scope of u32 c and it being used only once in there. Fix this. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1389786735-16751-1-git-send-email-bp@alien8.de Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/amd.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 39bc78dad377..e5647ab5fc23 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -789,14 +789,10 @@ static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c) } /* Handle DTLB 2M and 4M sizes, fall back to L1 if L2 is disabled */ - if (!((eax >> 16) & mask)) { - u32 a, b, c, d; - - cpuid(0x80000005, &a, &b, &c, &d); - tlb_lld_2m[ENTRIES] = (a >> 16) & 0xff; - } else { + if (!((eax >> 16) & mask)) + tlb_lld_2m[ENTRIES] = (cpuid_eax(0x80000005) >> 16) & 0xff; + else tlb_lld_2m[ENTRIES] = (eax >> 16) & mask; - } /* a 4M entry uses two 2M entries */ tlb_lld_4m[ENTRIES] = tlb_lld_2m[ENTRIES] >> 1; -- cgit v1.2.3 From bee09ed91cacdbffdbcd3b05de8409c77ec9fcd6 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 15 Jan 2014 15:57:29 +0100 Subject: perf/x86/amd/ibs: Fix waking up from S3 for AMD family 10h On AMD family 10h we see following error messages while waking up from S3 for all non-boot CPUs leading to a failed IBS initialization: Enabling non-boot CPUs ... smpboot: Booting Node 0 Processor 1 APIC 0x1 [Firmware Bug]: cpu 1, try to use APIC500 (LVT offset 0) for vector 0x400, but the register is already in use for vector 0xf9 on another cpu perf: IBS APIC setup failed on cpu #1 process: Switch to broadcast mode on CPU1 CPU1 is up ... ACPI: Waking up from system sleep state S3 Reason for this is that during suspend the LVT offset for the IBS vector gets lost and needs to be reinialized while resuming. The offset is read from the IBSCTL msr. On family 10h the offset needs to be 1 as offset 0 is used for the MCE threshold interrupt, but firmware assings it for IBS to 0 too. The kernel needs to reprogram the vector. The msr is a readonly node msr, but a new value can be written via pci config space access. The reinitialization is implemented for family 10h in setup_ibs_ctl() which is forced during IBS setup. This patch fixes IBS setup after waking up from S3 by adding resume/supend hooks for the boot cpu which does the offset reinitialization. Marking it as stable to let distros pick up this fix. Signed-off-by: Robert Richter Signed-off-by: Peter Zijlstra Cc: v3.2.. Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1389797849-5565-1-git-send-email-rric.net@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_amd_ibs.c | 53 +++++++++++++++++++++++++++----- 1 file changed, 45 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c index e09f0bfb7b8f..4b8e4d3cd6ea 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c +++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c @@ -10,6 +10,7 @@ #include #include #include +#include #include @@ -816,6 +817,18 @@ out: return ret; } +static void ibs_eilvt_setup(void) +{ + /* + * Force LVT offset assignment for family 10h: The offsets are + * not assigned by the BIOS for this family, so the OS is + * responsible for doing it. If the OS assignment fails, fall + * back to BIOS settings and try to setup this. + */ + if (boot_cpu_data.x86 == 0x10) + force_ibs_eilvt_setup(); +} + static inline int get_ibs_lvt_offset(void) { u64 val; @@ -851,6 +864,36 @@ static void clear_APIC_ibs(void *dummy) setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1); } +#ifdef CONFIG_PM + +static int perf_ibs_suspend(void) +{ + clear_APIC_ibs(NULL); + return 0; +} + +static void perf_ibs_resume(void) +{ + ibs_eilvt_setup(); + setup_APIC_ibs(NULL); +} + +static struct syscore_ops perf_ibs_syscore_ops = { + .resume = perf_ibs_resume, + .suspend = perf_ibs_suspend, +}; + +static void perf_ibs_pm_init(void) +{ + register_syscore_ops(&perf_ibs_syscore_ops); +} + +#else + +static inline void perf_ibs_pm_init(void) { } + +#endif + static int perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) { @@ -877,18 +920,12 @@ static __init int amd_ibs_init(void) if (!caps) return -ENODEV; /* ibs not supported by the cpu */ - /* - * Force LVT offset assignment for family 10h: The offsets are - * not assigned by the BIOS for this family, so the OS is - * responsible for doing it. If the OS assignment fails, fall - * back to BIOS settings and try to setup this. - */ - if (boot_cpu_data.x86 == 0x10) - force_ibs_eilvt_setup(); + ibs_eilvt_setup(); if (!ibs_eilvt_valid()) goto out; + perf_ibs_pm_init(); get_online_cpus(); ibs_caps = caps; /* make ibs_caps visible to other cpus: */ -- cgit v1.2.3