diff options
Diffstat (limited to 'arch')
-rw-r--r-- | arch/arm/Kconfig | 9 | ||||
-rw-r--r-- | arch/arm/Makefile | 13 | ||||
-rw-r--r-- | arch/arm/boot/Makefile | 10 | ||||
-rw-r--r-- | arch/arm/include/asm/dma-mapping.h | 1 | ||||
-rw-r--r-- | arch/arm/include/asm/flat.h | 2 | ||||
-rw-r--r-- | arch/arm/include/asm/uaccess.h | 4 | ||||
-rw-r--r-- | arch/arm/kernel/irq.c | 2 | ||||
-rw-r--r-- | arch/arm/kernel/kprobes-test-arm.c | 4 | ||||
-rw-r--r-- | arch/arm/kernel/machine_kexec.c | 7 | ||||
-rw-r--r-- | arch/arm/kernel/perf_event.c | 4 | ||||
-rw-r--r-- | arch/arm/kernel/smp.c | 14 | ||||
-rw-r--r-- | arch/arm/kernel/smp_twd.c | 4 | ||||
-rw-r--r-- | arch/arm/lib/delay.c | 1 | ||||
-rw-r--r-- | arch/arm/mm/alignment.c | 4 | ||||
-rw-r--r-- | arch/arm/mm/dma-mapping.c | 2 | ||||
-rw-r--r-- | arch/arm/mm/vmregion.h | 1 | ||||
-rw-r--r-- | arch/arm/tools/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 10 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_uncore.c | 45 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_knc.c | 93 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_p6.c | 127 |
21 files changed, 280 insertions, 79 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 73067efd4845..ade7e924bef5 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1603,8 +1603,8 @@ config NR_CPUS default "4" config HOTPLUG_CPU - bool "Support for hot-pluggable CPUs (EXPERIMENTAL)" - depends on SMP && HOTPLUG && EXPERIMENTAL + bool "Support for hot-pluggable CPUs" + depends on SMP && HOTPLUG help Say Y here to experiment with turning CPUs off and on. CPUs can be controlled through /sys/devices/system/cpu. @@ -1645,8 +1645,8 @@ config HZ default 100 config THUMB2_KERNEL - bool "Compile the kernel in Thumb-2 mode (EXPERIMENTAL)" - depends on CPU_V7 && !CPU_V6 && !CPU_V6K && EXPERIMENTAL + bool "Compile the kernel in Thumb-2 mode" + depends on CPU_V7 && !CPU_V6 && !CPU_V6K select AEABI select ARM_ASM_UNIFIED select ARM_UNWIND @@ -1850,6 +1850,7 @@ config XEN_DOM0 config XEN bool "Xen guest support on ARM (EXPERIMENTAL)" depends on EXPERIMENTAL && ARM && OF + depends on CPU_V7 && !CPU_V6 help Say Y if you want to run Linux in a Virtual Machine on Xen on ARM. diff --git a/arch/arm/Makefile b/arch/arm/Makefile index f023e3acdfbd..5f914fca911b 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -21,8 +21,6 @@ endif OBJCOPYFLAGS :=-O binary -R .comment -S GZFLAGS :=-9 #KBUILD_CFLAGS +=-pipe -# Explicitly specifiy 32-bit ARM ISA since toolchain default can be -mthumb: -KBUILD_CFLAGS +=$(call cc-option,-marm,) # Never generate .eh_frame KBUILD_CFLAGS += $(call cc-option,-fno-dwarf2-cfi-asm) @@ -105,17 +103,20 @@ endif ifeq ($(CONFIG_THUMB2_KERNEL),y) AFLAGS_AUTOIT :=$(call as-option,-Wa$(comma)-mimplicit-it=always,-Wa$(comma)-mauto-it) AFLAGS_NOWARN :=$(call as-option,-Wa$(comma)-mno-warn-deprecated,-Wa$(comma)-W) -CFLAGS_THUMB2 :=-mthumb $(AFLAGS_AUTOIT) $(AFLAGS_NOWARN) -AFLAGS_THUMB2 :=$(CFLAGS_THUMB2) -Wa$(comma)-mthumb +CFLAGS_ISA :=-mthumb $(AFLAGS_AUTOIT) $(AFLAGS_NOWARN) +AFLAGS_ISA :=$(CFLAGS_ISA) -Wa$(comma)-mthumb # Work around buggy relocation from gas if requested: ifeq ($(CONFIG_THUMB2_AVOID_R_ARM_THM_JUMP11),y) CFLAGS_MODULE +=-fno-optimize-sibling-calls endif +else +CFLAGS_ISA :=$(call cc-option,-marm,) +AFLAGS_ISA :=$(CFLAGS_ISA) endif # Need -Uarm for gcc < 3.x -KBUILD_CFLAGS +=$(CFLAGS_ABI) $(CFLAGS_THUMB2) $(arch-y) $(tune-y) $(call cc-option,-mshort-load-bytes,$(call cc-option,-malignment-traps,)) -msoft-float -Uarm -KBUILD_AFLAGS +=$(CFLAGS_ABI) $(AFLAGS_THUMB2) $(arch-y) $(tune-y) -include asm/unified.h -msoft-float +KBUILD_CFLAGS +=$(CFLAGS_ABI) $(CFLAGS_ISA) $(arch-y) $(tune-y) $(call cc-option,-mshort-load-bytes,$(call cc-option,-malignment-traps,)) -msoft-float -Uarm +KBUILD_AFLAGS +=$(CFLAGS_ABI) $(AFLAGS_ISA) $(arch-y) $(tune-y) -include asm/unified.h -msoft-float CHECKFLAGS += -D__arm__ diff --git a/arch/arm/boot/Makefile b/arch/arm/boot/Makefile index 3fdab016aa5c..f2aa09eb658e 100644 --- a/arch/arm/boot/Makefile +++ b/arch/arm/boot/Makefile @@ -33,7 +33,7 @@ ifeq ($(CONFIG_XIP_KERNEL),y) $(obj)/xipImage: vmlinux FORCE $(call if_changed,objcopy) - @echo ' Kernel: $@ is ready (physical address: $(CONFIG_XIP_PHYS_ADDR))' + $(kecho) ' Kernel: $@ is ready (physical address: $(CONFIG_XIP_PHYS_ADDR))' $(obj)/Image $(obj)/zImage: FORCE @echo 'Kernel configured for XIP (CONFIG_XIP_KERNEL=y)' @@ -48,14 +48,14 @@ $(obj)/xipImage: FORCE $(obj)/Image: vmlinux FORCE $(call if_changed,objcopy) - @echo ' Kernel: $@ is ready' + $(kecho) ' Kernel: $@ is ready' $(obj)/compressed/vmlinux: $(obj)/Image FORCE $(Q)$(MAKE) $(build)=$(obj)/compressed $@ $(obj)/zImage: $(obj)/compressed/vmlinux FORCE $(call if_changed,objcopy) - @echo ' Kernel: $@ is ready' + $(kecho) ' Kernel: $@ is ready' endif @@ -90,7 +90,7 @@ fi $(obj)/uImage: $(obj)/zImage FORCE @$(check_for_multiple_loadaddr) $(call if_changed,uimage) - @echo ' Image $@ is ready' + $(kecho) ' Image $@ is ready' $(obj)/bootp/bootp: $(obj)/zImage initrd FORCE $(Q)$(MAKE) $(build)=$(obj)/bootp $@ @@ -98,7 +98,7 @@ $(obj)/bootp/bootp: $(obj)/zImage initrd FORCE $(obj)/bootpImage: $(obj)/bootp/bootp FORCE $(call if_changed,objcopy) - @echo ' Kernel: $@ is ready' + $(kecho) ' Kernel: $@ is ready' PHONY += initrd FORCE initrd: diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index 23004847bb05..78d8e9b5544f 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -91,6 +91,7 @@ static inline dma_addr_t virt_to_dma(struct device *dev, void *addr) */ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { + debug_dma_mapping_error(dev, dma_addr); return dma_addr == DMA_ERROR_CODE; } diff --git a/arch/arm/include/asm/flat.h b/arch/arm/include/asm/flat.h index 59426a4595c9..e847d23351ed 100644 --- a/arch/arm/include/asm/flat.h +++ b/arch/arm/include/asm/flat.h @@ -8,7 +8,7 @@ #define flat_argvp_envp_on_stack() 1 #define flat_old_ram_flag(flags) (flags) #define flat_reloc_valid(reloc, size) ((reloc) <= (size)) -#define flat_get_addr_from_rp(rp, relval, flags, persistent) get_unaligned(rp) +#define flat_get_addr_from_rp(rp, relval, flags, persistent) ((void)persistent,get_unaligned(rp)) #define flat_put_addr_at_rp(rp, val, relval) put_unaligned(val,rp) #define flat_get_relocate_addr(rel) (rel) #define flat_set_persistent(relval, p) 0 diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 77bd79f2ffdb..7e1f76027f66 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -200,8 +200,8 @@ extern int __put_user_8(void *, unsigned long long); #define USER_DS KERNEL_DS #define segment_eq(a,b) (1) -#define __addr_ok(addr) (1) -#define __range_ok(addr,size) (0) +#define __addr_ok(addr) ((void)(addr),1) +#define __range_ok(addr,size) ((void)(addr),0) #define get_fs() (KERNEL_DS) static inline void set_fs(mm_segment_t fs) diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index 16cedb42c0c3..896165096d6a 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -34,6 +34,7 @@ #include <linux/list.h> #include <linux/kallsyms.h> #include <linux/proc_fs.h> +#include <linux/export.h> #include <asm/exception.h> #include <asm/mach/arch.h> @@ -109,6 +110,7 @@ void set_irq_flags(unsigned int irq, unsigned int iflags) /* Order is clear bits in "clr" then set bits in "set" */ irq_modify_status(irq, clr, set & ~clr); } +EXPORT_SYMBOL_GPL(set_irq_flags); void __init init_IRQ(void) { diff --git a/arch/arm/kernel/kprobes-test-arm.c b/arch/arm/kernel/kprobes-test-arm.c index 38c1a3b103a0..839312905067 100644 --- a/arch/arm/kernel/kprobes-test-arm.c +++ b/arch/arm/kernel/kprobes-test-arm.c @@ -366,7 +366,9 @@ void kprobe_arm_test_cases(void) TEST_UNSUPPORTED(".word 0xe04f0392 @ umaal r0, pc, r2, r3") TEST_UNSUPPORTED(".word 0xe0500090 @ undef") TEST_UNSUPPORTED(".word 0xe05fff9f @ undef") +#endif +#if __LINUX_ARM_ARCH__ >= 7 TEST_RRR( "mls r0, r",1, VAL1,", r",2, VAL2,", r",3, VAL3,"") TEST_RRR( "mlshi r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"") TEST_RR( "mls lr, r",1, VAL2,", r",2, VAL3,", r13") @@ -456,6 +458,8 @@ void kprobe_arm_test_cases(void) TEST_UNSUPPORTED(".word 0xe1700090") /* Unallocated space */ #if __LINUX_ARM_ARCH__ >= 6 TEST_UNSUPPORTED("ldrex r2, [sp]") +#endif +#if (__LINUX_ARM_ARCH__ >= 7) || defined(CONFIG_CPU_32v6K) TEST_UNSUPPORTED("strexd r0, r2, r3, [sp]") TEST_UNSUPPORTED("ldrexd r2, r3, [sp]") TEST_UNSUPPORTED("strexb r0, r2, [sp]") diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c index e29c3337ca81..8ef8c9337809 100644 --- a/arch/arm/kernel/machine_kexec.c +++ b/arch/arm/kernel/machine_kexec.c @@ -45,10 +45,9 @@ int machine_kexec_prepare(struct kimage *image) for (i = 0; i < image->nr_segments; i++) { current_segment = &image->segment[i]; - err = memblock_is_region_memory(current_segment->mem, - current_segment->memsz); - if (err) - return - EINVAL; + if (!memblock_is_region_memory(current_segment->mem, + current_segment->memsz)) + return -EINVAL; err = get_user(header, (__be32*)current_segment->buf); if (err) diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 93971b1a4f0b..53c0304b734a 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -96,6 +96,10 @@ armpmu_event_set_period(struct perf_event *event, s64 period = hwc->sample_period; int ret = 0; + /* The period may have been changed by PERF_EVENT_IOC_PERIOD */ + if (unlikely(period != hwc->last_period)) + left = period - (hwc->last_period - left); + if (unlikely(left <= -period)) { left = period; local64_set(&hwc->period_left, left); diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 8e20754dd31d..fbc8b2623d82 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -294,18 +294,24 @@ static void percpu_timer_setup(void); asmlinkage void __cpuinit secondary_start_kernel(void) { struct mm_struct *mm = &init_mm; - unsigned int cpu = smp_processor_id(); + unsigned int cpu; + + /* + * The identity mapping is uncached (strongly ordered), so + * switch away from it before attempting any exclusive accesses. + */ + cpu_switch_mm(mm->pgd, mm); + enter_lazy_tlb(mm, current); + local_flush_tlb_all(); /* * All kernel threads share the same mm context; grab a * reference and switch to it. */ + cpu = smp_processor_id(); atomic_inc(&mm->mm_count); current->active_mm = mm; cpumask_set_cpu(cpu, mm_cpumask(mm)); - cpu_switch_mm(mm->pgd, mm); - enter_lazy_tlb(mm, current); - local_flush_tlb_all(); printk("CPU%u: Booted secondary processor\n", cpu); diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c index e1f906989bb8..b22d700fea27 100644 --- a/arch/arm/kernel/smp_twd.c +++ b/arch/arm/kernel/smp_twd.c @@ -42,10 +42,10 @@ static void twd_set_mode(enum clock_event_mode mode, switch (mode) { case CLOCK_EVT_MODE_PERIODIC: - /* timer load already set up */ ctrl = TWD_TIMER_CONTROL_ENABLE | TWD_TIMER_CONTROL_IT_ENABLE | TWD_TIMER_CONTROL_PERIODIC; - __raw_writel(twd_timer_rate / HZ, twd_base + TWD_TIMER_LOAD); + __raw_writel(DIV_ROUND_CLOSEST(twd_timer_rate, HZ), + twd_base + TWD_TIMER_LOAD); break; case CLOCK_EVT_MODE_ONESHOT: /* period set, and timer enabled in 'next_event' hook */ diff --git a/arch/arm/lib/delay.c b/arch/arm/lib/delay.c index 9d0a30032d7f..0dc53854a5d8 100644 --- a/arch/arm/lib/delay.c +++ b/arch/arm/lib/delay.c @@ -45,6 +45,7 @@ int read_current_timer(unsigned long *timer_val) *timer_val = delay_timer->read_current_timer(); return 0; } +EXPORT_SYMBOL_GPL(read_current_timer); static void __timer_delay(unsigned long cycles) { diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index b9f60ebe3bc4..023f443784ec 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -856,8 +856,10 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (thumb2_32b) { offset.un = 0; handler = do_alignment_t32_to_handler(&instr, regs, &offset); - } else + } else { + offset.un = 0; handler = do_alignment_ldmstm; + } break; default: diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 477a2d23ddf1..58bc3e4d3bd0 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -610,7 +610,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, pgprot_t prot, bool is_coherent, const void *caller) { u64 mask = get_coherent_dma_mask(dev); - struct page *page; + struct page *page = NULL; void *addr; #ifdef CONFIG_DMA_API_DEBUG diff --git a/arch/arm/mm/vmregion.h b/arch/arm/mm/vmregion.h index bf312c354a21..0f5a5f2a2c7b 100644 --- a/arch/arm/mm/vmregion.h +++ b/arch/arm/mm/vmregion.h @@ -17,7 +17,6 @@ struct arm_vmregion { struct list_head vm_list; unsigned long vm_start; unsigned long vm_end; - void *priv; int vm_active; const void *caller; }; diff --git a/arch/arm/tools/Makefile b/arch/arm/tools/Makefile index 635cb1865e4d..cd60a81163e9 100644 --- a/arch/arm/tools/Makefile +++ b/arch/arm/tools/Makefile @@ -5,6 +5,6 @@ # include/generated/mach-types.h: $(src)/gen-mach-types $(src)/mach-types - @echo ' Generating $@' + $(kecho) ' Generating $@' @mkdir -p $(dir $@) $(Q)$(AWK) -f $^ > $@ || { rm -f $@; /bin/false; } diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 3373f84d1397..4a3374e61a93 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -208,12 +208,14 @@ static bool check_hw_exists(void) } /* - * Now write a value and read it back to see if it matches, - * this is needed to detect certain hardware emulators (qemu/kvm) - * that don't trap on the MSR access and always return 0s. + * Read the current value, change it and read it back to see if it + * matches, this is needed to detect certain hardware emulators + * (qemu/kvm) that don't trap on the MSR access and always return 0s. */ - val = 0xabcdUL; reg = x86_pmu_event_addr(0); + if (rdmsrl_safe(reg, &val)) + goto msr_fail; + val ^= 0xffffUL; ret = wrmsrl_safe(reg, val); ret |= rdmsrl_safe(reg, &val_new); if (ret || val != val_new) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 5df8d32ba91e..3cf3d97cce3a 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -118,22 +118,24 @@ static void snbep_uncore_pci_disable_box(struct intel_uncore_box *box) { struct pci_dev *pdev = box->pci_dev; int box_ctl = uncore_pci_box_ctl(box); - u32 config; + u32 config = 0; - pci_read_config_dword(pdev, box_ctl, &config); - config |= SNBEP_PMON_BOX_CTL_FRZ; - pci_write_config_dword(pdev, box_ctl, config); + if (!pci_read_config_dword(pdev, box_ctl, &config)) { + config |= SNBEP_PMON_BOX_CTL_FRZ; + pci_write_config_dword(pdev, box_ctl, config); + } } static void snbep_uncore_pci_enable_box(struct intel_uncore_box *box) { struct pci_dev *pdev = box->pci_dev; int box_ctl = uncore_pci_box_ctl(box); - u32 config; + u32 config = 0; - pci_read_config_dword(pdev, box_ctl, &config); - config &= ~SNBEP_PMON_BOX_CTL_FRZ; - pci_write_config_dword(pdev, box_ctl, config); + if (!pci_read_config_dword(pdev, box_ctl, &config)) { + config &= ~SNBEP_PMON_BOX_CTL_FRZ; + pci_write_config_dword(pdev, box_ctl, config); + } } static void snbep_uncore_pci_enable_event(struct intel_uncore_box *box, struct perf_event *event) @@ -156,7 +158,7 @@ static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box, struct pe { struct pci_dev *pdev = box->pci_dev; struct hw_perf_event *hwc = &event->hw; - u64 count; + u64 count = 0; pci_read_config_dword(pdev, hwc->event_base, (u32 *)&count); pci_read_config_dword(pdev, hwc->event_base + 4, (u32 *)&count + 1); @@ -603,11 +605,12 @@ static struct pci_driver snbep_uncore_pci_driver = { /* * build pci bus to socket mapping */ -static void snbep_pci2phy_map_init(void) +static int snbep_pci2phy_map_init(void) { struct pci_dev *ubox_dev = NULL; int i, bus, nodeid; - u32 config; + int err = 0; + u32 config = 0; while (1) { /* find the UBOX device */ @@ -618,10 +621,14 @@ static void snbep_pci2phy_map_init(void) break; bus = ubox_dev->bus->number; /* get the Node ID of the local register */ - pci_read_config_dword(ubox_dev, 0x40, &config); + err = pci_read_config_dword(ubox_dev, 0x40, &config); + if (err) + break; nodeid = config; /* get the Node ID mapping */ - pci_read_config_dword(ubox_dev, 0x54, &config); + err = pci_read_config_dword(ubox_dev, 0x54, &config); + if (err) + break; /* * every three bits in the Node ID mapping register maps * to a particular node. @@ -633,7 +640,11 @@ static void snbep_pci2phy_map_init(void) } } }; - return; + + if (ubox_dev) + pci_dev_put(ubox_dev); + + return err ? pcibios_err_to_errno(err) : 0; } /* end of Sandy Bridge-EP uncore support */ @@ -1547,7 +1558,6 @@ void nhmex_rbox_alter_er(struct intel_uncore_box *box, struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; struct hw_perf_event_extra *reg1 = &hwc->extra_reg; - int port; /* adjust the main event selector and extra register index */ if (reg1->idx % 2) { @@ -1559,7 +1569,6 @@ void nhmex_rbox_alter_er(struct intel_uncore_box *box, struct perf_event *event) } /* adjust extra register config */ - port = reg1->idx / 6 + box->pmu->pmu_idx * 4; switch (reg1->idx % 6) { case 2: /* shift the 8~15 bits to the 0~7 bits */ @@ -2578,9 +2587,11 @@ static int __init uncore_pci_init(void) switch (boot_cpu_data.x86_model) { case 45: /* Sandy Bridge-EP */ + ret = snbep_pci2phy_map_init(); + if (ret) + return ret; pci_uncores = snbep_pci_uncores; uncore_pci_driver = &snbep_uncore_pci_driver; - snbep_pci2phy_map_init(); break; default: return 0; diff --git a/arch/x86/kernel/cpu/perf_event_knc.c b/arch/x86/kernel/cpu/perf_event_knc.c index 7c46bfdbc373..4b7731bf23a8 100644 --- a/arch/x86/kernel/cpu/perf_event_knc.c +++ b/arch/x86/kernel/cpu/perf_event_knc.c @@ -3,6 +3,8 @@ #include <linux/perf_event.h> #include <linux/types.h> +#include <asm/hardirq.h> + #include "perf_event.h" static const u64 knc_perfmon_event_map[] = @@ -173,30 +175,100 @@ static void knc_pmu_enable_all(int added) static inline void knc_pmu_disable_event(struct perf_event *event) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; u64 val; val = hwc->config; - if (cpuc->enabled) - val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; + val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; (void)wrmsrl_safe(hwc->config_base + hwc->idx, val); } static void knc_pmu_enable_event(struct perf_event *event) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; u64 val; val = hwc->config; - if (cpuc->enabled) - val |= ARCH_PERFMON_EVENTSEL_ENABLE; + val |= ARCH_PERFMON_EVENTSEL_ENABLE; (void)wrmsrl_safe(hwc->config_base + hwc->idx, val); } +static inline u64 knc_pmu_get_status(void) +{ + u64 status; + + rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_STATUS, status); + + return status; +} + +static inline void knc_pmu_ack_status(u64 ack) +{ + wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL, ack); +} + +static int knc_pmu_handle_irq(struct pt_regs *regs) +{ + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + int handled = 0; + int bit, loops; + u64 status; + + cpuc = &__get_cpu_var(cpu_hw_events); + + knc_pmu_disable_all(); + + status = knc_pmu_get_status(); + if (!status) { + knc_pmu_enable_all(0); + return handled; + } + + loops = 0; +again: + knc_pmu_ack_status(status); + if (++loops > 100) { + WARN_ONCE(1, "perf: irq loop stuck!\n"); + perf_event_print_debug(); + goto done; + } + + inc_irq_stat(apic_perf_irqs); + + for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { + struct perf_event *event = cpuc->events[bit]; + + handled++; + + if (!test_bit(bit, cpuc->active_mask)) + continue; + + if (!intel_pmu_save_and_restart(event)) + continue; + + perf_sample_data_init(&data, 0, event->hw.last_period); + + if (perf_event_overflow(event, &data, regs)) + x86_pmu_stop(event, 0); + } + + /* + * Repeat if there is more work to be done: + */ + status = knc_pmu_get_status(); + if (status) + goto again; + +done: + knc_pmu_enable_all(0); + + return handled; +} + + PMU_FORMAT_ATTR(event, "config:0-7" ); PMU_FORMAT_ATTR(umask, "config:8-15" ); PMU_FORMAT_ATTR(edge, "config:18" ); @@ -214,7 +286,7 @@ static struct attribute *intel_knc_formats_attr[] = { static __initconst struct x86_pmu knc_pmu = { .name = "knc", - .handle_irq = x86_pmu_handle_irq, + .handle_irq = knc_pmu_handle_irq, .disable_all = knc_pmu_disable_all, .enable_all = knc_pmu_enable_all, .enable = knc_pmu_enable_event, @@ -226,12 +298,11 @@ static __initconst struct x86_pmu knc_pmu = { .event_map = knc_pmu_event_map, .max_events = ARRAY_SIZE(knc_perfmon_event_map), .apic = 1, - .max_period = (1ULL << 31) - 1, + .max_period = (1ULL << 39) - 1, .version = 0, .num_counters = 2, - /* in theory 40 bits, early silicon is buggy though */ - .cntval_bits = 32, - .cntval_mask = (1ULL << 32) - 1, + .cntval_bits = 40, + .cntval_mask = (1ULL << 40) - 1, .get_event_constraints = x86_get_event_constraints, .event_constraints = knc_event_constraints, .format_attrs = intel_knc_formats_attr, diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c index e4dd0f7a0453..7d0270bd793e 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/kernel/cpu/perf_event_p6.c @@ -8,13 +8,106 @@ */ static const u64 p6_perfmon_event_map[] = { - [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, - [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, - [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, - [PERF_COUNT_HW_CACHE_MISSES] = 0x012e, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, - [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, - [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, + [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, /* CPU_CLK_UNHALTED */ + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, /* INST_RETIRED */ + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, /* L2_RQSTS:M:E:S:I */ + [PERF_COUNT_HW_CACHE_MISSES] = 0x012e, /* L2_RQSTS:I */ + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, /* BR_INST_RETIRED */ + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, /* BR_MISS_PRED_RETIRED */ + [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, /* BUS_DRDY_CLOCKS */ + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a2, /* RESOURCE_STALLS */ + +}; + +static __initconst u64 p6_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0043, /* DATA_MEM_REFS */ + [ C(RESULT_MISS) ] = 0x0045, /* DCU_LINES_IN */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0x0f29, /* L2_LD:M:E:S:I */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0080, /* IFU_IFETCH */ + [ C(RESULT_MISS) ] = 0x0f28, /* L2_IFETCH:M:E:S:I */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0x0025, /* L2_M_LINES_INM */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0043, /* DATA_MEM_REFS */ + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0080, /* IFU_IFETCH */ + [ C(RESULT_MISS) ] = 0x0085, /* ITLB_MISS */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED */ + [ C(RESULT_MISS) ] = 0x00c5, /* BR_MISS_PRED_RETIRED */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, }; static u64 p6_pmu_event_map(int hw_event) @@ -34,7 +127,7 @@ static struct event_constraint p6_event_constraints[] = { INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ - INTEL_EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */ + INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ @@ -64,25 +157,25 @@ static void p6_pmu_enable_all(int added) static inline void p6_pmu_disable_event(struct perf_event *event) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; u64 val = P6_NOP_EVENT; - if (cpuc->enabled) - val |= ARCH_PERFMON_EVENTSEL_ENABLE; - (void)wrmsrl_safe(hwc->config_base, val); } static void p6_pmu_enable_event(struct perf_event *event) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; u64 val; val = hwc->config; - if (cpuc->enabled) - val |= ARCH_PERFMON_EVENTSEL_ENABLE; + + /* + * p6 only has a global event enable, set on PerfEvtSel0 + * We "disable" events by programming P6_NOP_EVENT + * and we rely on p6_pmu_enable_all() being called + * to actually enable the events. + */ (void)wrmsrl_safe(hwc->config_base, val); } @@ -158,5 +251,9 @@ __init int p6_pmu_init(void) x86_pmu = p6_pmu; + memcpy(hw_cache_event_ids, p6_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + + return 0; } |