diff options
86 files changed, 1180 insertions, 1128 deletions
diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c index 02d5e5e8d44c..895ae5197159 100644 --- a/arch/arm/kernel/smp_twd.c +++ b/arch/arm/kernel/smp_twd.c @@ -339,7 +339,7 @@ static int __init twd_local_timer_common_register(struct device_node *np) } cpuhp_setup_state_nocalls(CPUHP_AP_ARM_TWD_STARTING, - "AP_ARM_TWD_STARTING", + "arm/timer/twd:starting", twd_timer_starting_cpu, twd_timer_dying_cpu); twd_get_clock(np); diff --git a/arch/arm/mach-imx/mmdc.c b/arch/arm/mach-imx/mmdc.c index ba96bf979625..699157759120 100644 --- a/arch/arm/mach-imx/mmdc.c +++ b/arch/arm/mach-imx/mmdc.c @@ -60,6 +60,7 @@ #define to_mmdc_pmu(p) container_of(p, struct mmdc_pmu, pmu) +static enum cpuhp_state cpuhp_mmdc_state; static int ddr_type; struct fsl_mmdc_devtype_data { @@ -451,8 +452,8 @@ static int imx_mmdc_remove(struct platform_device *pdev) { struct mmdc_pmu *pmu_mmdc = platform_get_drvdata(pdev); + cpuhp_state_remove_instance_nocalls(cpuhp_mmdc_state, &pmu_mmdc->node); perf_pmu_unregister(&pmu_mmdc->pmu); - cpuhp_remove_state_nocalls(CPUHP_ONLINE); kfree(pmu_mmdc); return 0; } @@ -472,6 +473,18 @@ static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_b return -ENOMEM; } + /* The first instance registers the hotplug state */ + if (!cpuhp_mmdc_state) { + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, + "perf/arm/mmdc:online", NULL, + mmdc_pmu_offline_cpu); + if (ret < 0) { + pr_err("cpuhp_setup_state_multi failed\n"); + goto pmu_free; + } + cpuhp_mmdc_state = ret; + } + mmdc_num = mmdc_pmu_init(pmu_mmdc, mmdc_base, &pdev->dev); if (mmdc_num == 0) name = "mmdc"; @@ -485,26 +498,23 @@ static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_b HRTIMER_MODE_REL); pmu_mmdc->hrtimer.function = mmdc_pmu_timer_handler; - cpuhp_state_add_instance_nocalls(CPUHP_ONLINE, - &pmu_mmdc->node); - cpumask_set_cpu(smp_processor_id(), &pmu_mmdc->cpu); - ret = cpuhp_setup_state_multi(CPUHP_AP_NOTIFY_ONLINE, - "MMDC_ONLINE", NULL, - mmdc_pmu_offline_cpu); - if (ret) { - pr_err("cpuhp_setup_state_multi failure\n"); - goto pmu_register_err; - } + cpumask_set_cpu(raw_smp_processor_id(), &pmu_mmdc->cpu); + + /* Register the pmu instance for cpu hotplug */ + cpuhp_state_add_instance_nocalls(cpuhp_mmdc_state, &pmu_mmdc->node); ret = perf_pmu_register(&(pmu_mmdc->pmu), name, -1); - platform_set_drvdata(pdev, pmu_mmdc); if (ret) goto pmu_register_err; + + platform_set_drvdata(pdev, pmu_mmdc); return 0; pmu_register_err: pr_warn("MMDC Perf PMU failed (%d), disabled\n", ret); + cpuhp_state_remove_instance_nocalls(cpuhp_mmdc_state, &pmu_mmdc->node); hrtimer_cancel(&pmu_mmdc->hrtimer); +pmu_free: kfree(pmu_mmdc); return ret; } diff --git a/arch/arm/mach-mvebu/coherency.c b/arch/arm/mach-mvebu/coherency.c index ae2a018b9305..8f8748a0c84f 100644 --- a/arch/arm/mach-mvebu/coherency.c +++ b/arch/arm/mach-mvebu/coherency.c @@ -148,7 +148,7 @@ static void __init armada_370_coherency_init(struct device_node *np) of_node_put(cpu_config_np); cpuhp_setup_state_nocalls(CPUHP_AP_ARM_MVEBU_COHERENCY, - "AP_ARM_MVEBU_COHERENCY", + "arm/mvebu/coherency:starting", armada_xp_clear_l2_starting, NULL); exit: set_cpu_coherent(); diff --git a/arch/arm/mm/cache-l2x0-pmu.c b/arch/arm/mm/cache-l2x0-pmu.c index 976d3057272e..0a1e2280141f 100644 --- a/arch/arm/mm/cache-l2x0-pmu.c +++ b/arch/arm/mm/cache-l2x0-pmu.c @@ -563,7 +563,7 @@ static __init int l2x0_pmu_init(void) cpumask_set_cpu(0, &pmu_cpu); ret = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ARM_L2X0_ONLINE, - "AP_PERF_ARM_L2X0_ONLINE", NULL, + "perf/arm/l2x0:online", NULL, l2x0_pmu_offline_cpu); if (ret) goto out_pmu; diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index d1870c777c6e..2290be390f87 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -683,7 +683,7 @@ static void __init l2c310_enable(void __iomem *base, unsigned num_lock) if (aux & L310_AUX_CTRL_FULL_LINE_ZERO) cpuhp_setup_state(CPUHP_AP_ARM_L2X0_STARTING, - "AP_ARM_L2X0_STARTING", l2c310_starting_cpu, + "arm/l2x0:starting", l2c310_starting_cpu, l2c310_dying_cpu); } diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index 0351f5645fb1..569d5a650a4a 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -799,7 +799,7 @@ static int __init vfp_init(void) } cpuhp_setup_state_nocalls(CPUHP_AP_ARM_VFP_STARTING, - "AP_ARM_VFP_STARTING", vfp_starting_cpu, + "arm/vfp:starting", vfp_starting_cpu, vfp_dying_cpu); vfp_vector = vfp_support_entry; diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 4986dc0c1dff..11d9f2898b16 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -412,7 +412,7 @@ static int __init xen_guest_init(void) pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier); return cpuhp_setup_state(CPUHP_AP_ARM_XEN_STARTING, - "AP_ARM_XEN_STARTING", xen_starting_cpu, + "arm/xen:starting", xen_starting_cpu, xen_dying_cpu); } early_initcall(xen_guest_init); diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index fde04f029ec3..ecf9298a12d4 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -640,7 +640,7 @@ static int __init armv8_deprecated_init(void) } cpuhp_setup_state_nocalls(CPUHP_AP_ARM64_ISNDEP_STARTING, - "AP_ARM64_ISNDEP_STARTING", + "arm64/isndep:starting", run_all_insn_set_hw_mode, NULL); register_insn_emulation_sysctl(ctl_abi); diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 605df76f0a06..2bd426448fc1 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -140,7 +140,7 @@ static int clear_os_lock(unsigned int cpu) static int debug_monitors_init(void) { return cpuhp_setup_state(CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING, - "CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING", + "arm64/debug_monitors:starting", clear_os_lock, NULL); } postcore_initcall(debug_monitors_init); diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 1b3c747fedda..0296e7924240 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -1001,7 +1001,7 @@ static int __init arch_hw_breakpoint_init(void) * debugger will leave the world in a nice state for us. */ ret = cpuhp_setup_state(CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING, - "CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING", + "perf/arm64/hw_breakpoint:starting", hw_breakpoint_reset, NULL); if (ret) pr_err("failed to register CPU hotplug notifier: %d\n", ret); diff --git a/arch/blackfin/kernel/perf_event.c b/arch/blackfin/kernel/perf_event.c index 6355e97d22b9..6a9524ad04a5 100644 --- a/arch/blackfin/kernel/perf_event.c +++ b/arch/blackfin/kernel/perf_event.c @@ -475,7 +475,7 @@ static int __init bfin_pmu_init(void) ret = perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); if (!ret) - cpuhp_setup_state(CPUHP_PERF_BFIN, "PERF_BFIN", + cpuhp_setup_state(CPUHP_PERF_BFIN,"perf/bfin:starting", bfin_pmu_prepare_cpu, NULL); return ret; } diff --git a/arch/metag/kernel/perf/perf_event.c b/arch/metag/kernel/perf/perf_event.c index 052cba23708c..7e793eb0c1fe 100644 --- a/arch/metag/kernel/perf/perf_event.c +++ b/arch/metag/kernel/perf/perf_event.c @@ -868,7 +868,7 @@ static int __init init_hw_perf_events(void) metag_out32(0, PERF_COUNT(1)); cpuhp_setup_state(CPUHP_AP_PERF_METAG_STARTING, - "AP_PERF_METAG_STARTING", metag_pmu_starting_cpu, + "perf/metag:starting", metag_pmu_starting_cpu, NULL); ret = perf_pmu_register(&pmu, metag_pmu->name, PERF_TYPE_RAW); diff --git a/arch/mips/kernel/pm-cps.c b/arch/mips/kernel/pm-cps.c index 7cf653e21423..5f928c34c148 100644 --- a/arch/mips/kernel/pm-cps.c +++ b/arch/mips/kernel/pm-cps.c @@ -713,7 +713,7 @@ static int __init cps_pm_init(void) pr_warn("pm-cps: no CPC, clock & power gating unavailable\n"); } - return cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "AP_PM_CPS_CPU_ONLINE", + return cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "mips/cps_pm:online", cps_pm_online_cpu, NULL); } arch_initcall(cps_pm_init); diff --git a/arch/mips/oprofile/op_model_loongson3.c b/arch/mips/oprofile/op_model_loongson3.c index 40660392006f..436b1fc99f2c 100644 --- a/arch/mips/oprofile/op_model_loongson3.c +++ b/arch/mips/oprofile/op_model_loongson3.c @@ -186,7 +186,7 @@ static int __init loongson3_init(void) { on_each_cpu(reset_counters, NULL, 1); cpuhp_setup_state_nocalls(CPUHP_AP_MIPS_OP_LOONGSON3_STARTING, - "AP_MIPS_OP_LOONGSON3_STARTING", + "mips/oprofile/loongson3:starting", loongson3_starting_cpu, loongson3_dying_cpu); save_perf_irq = perf_irq; perf_irq = loongson3_perfcount_handler; diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 0cb6bd8bfccf..b1099cb2f393 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -944,7 +944,7 @@ void __init initmem_init(void) * _nocalls() + manual invocation is used because cpuhp is not yet * initialized for the boot CPU. */ - cpuhp_setup_state_nocalls(CPUHP_POWER_NUMA_PREPARE, "POWER_NUMA_PREPARE", + cpuhp_setup_state_nocalls(CPUHP_POWER_NUMA_PREPARE, "powerpc/numa:prepare", ppc_numa_cpu_prepare, ppc_numa_cpu_dead); for_each_present_cpu(cpu) numa_setup_cpu(cpu); diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 72c27b8d2cf3..fd3e4034c04d 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -2189,7 +2189,7 @@ int register_power_pmu(struct power_pmu *pmu) #endif /* CONFIG_PPC64 */ perf_pmu_register(&power_pmu, "cpu", PERF_TYPE_RAW); - cpuhp_setup_state(CPUHP_PERF_POWER, "PERF_POWER", + cpuhp_setup_state(CPUHP_PERF_POWER, "perf/powerpc:prepare", power_pmu_prepare_cpu, NULL); return 0; } diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 037c2a253ae4..1aba10e90906 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -711,7 +711,7 @@ static int __init cpumf_pmu_init(void) return rc; } return cpuhp_setup_state(CPUHP_AP_PERF_S390_CF_ONLINE, - "AP_PERF_S390_CF_ONLINE", + "perf/s390/cf:online", s390_pmu_online_cpu, s390_pmu_offline_cpu); } early_initcall(cpumf_pmu_init); diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 763dec18edcd..1c0b58545c04 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1623,7 +1623,7 @@ static int __init init_cpum_sampling_pmu(void) goto out; } - cpuhp_setup_state(CPUHP_AP_PERF_S390_SF_ONLINE, "AP_PERF_S390_SF_ONLINE", + cpuhp_setup_state(CPUHP_AP_PERF_S390_SF_ONLINE, "perf/s390/sf:online", s390_pmu_sf_online_cpu, s390_pmu_sf_offline_cpu); out: return err; diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 40121d14d34d..10820f6cefbf 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -371,7 +371,7 @@ static int __init init_vdso(void) /* notifier priority > KVM */ return cpuhp_setup_state(CPUHP_AP_X86_VDSO_VMA_ONLINE, - "AP_X86_VDSO_VMA_ONLINE", vgetcpu_online, NULL); + "x86/vdso/vma:online", vgetcpu_online, NULL); } subsys_initcall(init_vdso); #endif /* CONFIG_X86_64 */ diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index b26ee32f73e8..05612a2529c8 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -1010,7 +1010,7 @@ static __init int amd_ibs_init(void) * all online cpus. */ cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_IBS_STARTING, - "AP_PERF_X86_AMD_IBS_STARTING", + "perf/x86/amd/ibs:STARTING", x86_pmu_amd_ibs_starting_cpu, x86_pmu_amd_ibs_dying_cpu); diff --git a/arch/x86/events/amd/power.c b/arch/x86/events/amd/power.c index 9842270ed2f2..a6eee5ac4f58 100644 --- a/arch/x86/events/amd/power.c +++ b/arch/x86/events/amd/power.c @@ -291,7 +291,7 @@ static int __init amd_power_pmu_init(void) cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_POWER_ONLINE, - "AP_PERF_X86_AMD_POWER_ONLINE", + "perf/x86/amd/power:online", power_cpu_init, power_cpu_exit); ret = perf_pmu_register(&pmu_class, "power", -1); diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index 65577f081d07..a0b1bdb3ad42 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -527,16 +527,16 @@ static int __init amd_uncore_init(void) * Install callbacks. Core will call them for each online cpu. */ if (cpuhp_setup_state(CPUHP_PERF_X86_AMD_UNCORE_PREP, - "PERF_X86_AMD_UNCORE_PREP", + "perf/x86/amd/uncore:prepare", amd_uncore_cpu_up_prepare, amd_uncore_cpu_dead)) goto fail_l2; if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING, - "AP_PERF_X86_AMD_UNCORE_STARTING", + "perf/x86/amd/uncore:starting", amd_uncore_cpu_starting, NULL)) goto fail_prep; if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE, - "AP_PERF_X86_AMD_UNCORE_ONLINE", + "perf/x86/amd/uncore:online", amd_uncore_cpu_online, amd_uncore_cpu_down_prepare)) goto fail_start; diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index f1c22584a46f..019c5887b698 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1820,18 +1820,18 @@ static int __init init_hw_perf_events(void) * Install callbacks. Core will call them for each online * cpu. */ - err = cpuhp_setup_state(CPUHP_PERF_X86_PREPARE, "PERF_X86_PREPARE", + err = cpuhp_setup_state(CPUHP_PERF_X86_PREPARE, "perf/x86:prepare", x86_pmu_prepare_cpu, x86_pmu_dead_cpu); if (err) return err; err = cpuhp_setup_state(CPUHP_AP_PERF_X86_STARTING, - "AP_PERF_X86_STARTING", x86_pmu_starting_cpu, + "perf/x86:starting", x86_pmu_starting_cpu, x86_pmu_dying_cpu); if (err) goto out; - err = cpuhp_setup_state(CPUHP_AP_PERF_X86_ONLINE, "AP_PERF_X86_ONLINE", + err = cpuhp_setup_state(CPUHP_AP_PERF_X86_ONLINE, "perf/x86:online", x86_pmu_online_cpu, NULL); if (err) goto out1; diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c index 0c45cc8e64ba..8c00dc09a5d2 100644 --- a/arch/x86/events/intel/cqm.c +++ b/arch/x86/events/intel/cqm.c @@ -1747,9 +1747,9 @@ static int __init intel_cqm_init(void) * is enabled to avoid notifier leak. */ cpuhp_setup_state(CPUHP_AP_PERF_X86_CQM_STARTING, - "AP_PERF_X86_CQM_STARTING", + "perf/x86/cqm:starting", intel_cqm_cpu_starting, NULL); - cpuhp_setup_state(CPUHP_AP_PERF_X86_CQM_ONLINE, "AP_PERF_X86_CQM_ONLINE", + cpuhp_setup_state(CPUHP_AP_PERF_X86_CQM_ONLINE, "perf/x86/cqm:online", NULL, intel_cqm_cpu_exit); out: diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index da51e5a3e2ff..fec8a461bdef 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -594,6 +594,9 @@ static int __init cstate_probe(const struct cstate_model *cm) static inline void cstate_cleanup(void) { + cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_ONLINE); + cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_STARTING); + if (has_cstate_core) perf_pmu_unregister(&cstate_core_pmu); @@ -606,16 +609,16 @@ static int __init cstate_init(void) int err; cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_STARTING, - "AP_PERF_X86_CSTATE_STARTING", cstate_cpu_init, - NULL); + "perf/x86/cstate:starting", cstate_cpu_init, NULL); cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_ONLINE, - "AP_PERF_X86_CSTATE_ONLINE", NULL, cstate_cpu_exit); + "perf/x86/cstate:online", NULL, cstate_cpu_exit); if (has_cstate_core) { err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1); if (err) { has_cstate_core = false; pr_info("Failed to register cstate core pmu\n"); + cstate_cleanup(); return err; } } @@ -629,8 +632,7 @@ static int __init cstate_init(void) return err; } } - - return err; + return 0; } static int __init cstate_pmu_init(void) @@ -655,8 +657,6 @@ module_init(cstate_pmu_init); static void __exit cstate_pmu_exit(void) { - cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_ONLINE); - cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_STARTING); cstate_cleanup(); } module_exit(cstate_pmu_exit); diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 0a535cea8ff3..bd34124449b0 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -803,13 +803,13 @@ static int __init rapl_pmu_init(void) * Install callbacks. Core will call them for each online cpu. */ - ret = cpuhp_setup_state(CPUHP_PERF_X86_RAPL_PREP, "PERF_X86_RAPL_PREP", + ret = cpuhp_setup_state(CPUHP_PERF_X86_RAPL_PREP, "perf/x86/rapl:prepare", rapl_cpu_prepare, NULL); if (ret) goto out; ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_RAPL_ONLINE, - "AP_PERF_X86_RAPL_ONLINE", + "perf/x86/rapl:online", rapl_cpu_online, rapl_cpu_offline); if (ret) goto out1; diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index dbaaf7dc8373..97c246f84dea 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1398,22 +1398,22 @@ static int __init intel_uncore_init(void) */ if (!cret) { ret = cpuhp_setup_state(CPUHP_PERF_X86_UNCORE_PREP, - "PERF_X86_UNCORE_PREP", - uncore_cpu_prepare, NULL); + "perf/x86/intel/uncore:prepare", + uncore_cpu_prepare, NULL); if (ret) goto err; } else { cpuhp_setup_state_nocalls(CPUHP_PERF_X86_UNCORE_PREP, - "PERF_X86_UNCORE_PREP", + "perf/x86/intel/uncore:prepare", uncore_cpu_prepare, NULL); } first_init = 1; cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_STARTING, - "AP_PERF_X86_UNCORE_STARTING", + "perf/x86/uncore:starting", uncore_cpu_starting, uncore_cpu_dying); first_init = 0; cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE, - "AP_PERF_X86_UNCORE_ONLINE", + "perf/x86/uncore:online", uncore_event_cpu_online, uncore_event_cpu_offline); return 0; diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index 092ea664d2c6..65721dc73bd8 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c @@ -234,7 +234,7 @@ static __init int apbt_late_init(void) if (intel_mid_timer_options == INTEL_MID_TIMER_LAPIC_APBT || !apb_timer_block_enabled) return 0; - return cpuhp_setup_state(CPUHP_X86_APB_DEAD, "X86_APB_DEAD", NULL, + return cpuhp_setup_state(CPUHP_X86_APB_DEAD, "x86/apb:dead", NULL, apbt_cpu_dead); } fs_initcall(apbt_late_init); diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 200af5ae9662..5a35f208ed95 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -191,7 +191,7 @@ static int x2apic_cluster_probe(void) if (!x2apic_mode) return 0; - ret = cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "X2APIC_PREPARE", + ret = cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "x86/x2apic:prepare", x2apic_prepare_cpu, x2apic_dead_cpu); if (ret < 0) { pr_err("Failed to register X2APIC_PREPARE\n"); diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 367756d55980..85e87b46c318 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -1051,11 +1051,11 @@ static __init int hpet_late_init(void) return 0; /* This notifier should be called after workqueue is ready */ - ret = cpuhp_setup_state(CPUHP_AP_X86_HPET_ONLINE, "AP_X86_HPET_ONLINE", + ret = cpuhp_setup_state(CPUHP_AP_X86_HPET_ONLINE, "x86/hpet:online", hpet_cpuhp_online, NULL); if (ret) return ret; - ret = cpuhp_setup_state(CPUHP_X86_HPET_DEAD, "X86_HPET_DEAD", NULL, + ret = cpuhp_setup_state(CPUHP_X86_HPET_DEAD, "x86/hpet:dead", NULL, hpet_cpuhp_dead); if (ret) goto err_cpuhp; diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index f5e3ff835cc8..ef688804f80d 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -224,7 +224,6 @@ static int __init msr_init(void) return 0; out_class: - cpuhp_remove_state(cpuhp_msr_state); class_destroy(msr_class); out_chrdev: __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index 8402907825b0..b868fa1b812b 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -408,7 +408,7 @@ static __init int tboot_late_init(void) tboot_create_trampoline(); atomic_set(&ap_wfs_count, 0); - cpuhp_setup_state(CPUHP_AP_X86_TBOOT_DYING, "AP_X86_TBOOT_DYING", NULL, + cpuhp_setup_state(CPUHP_AP_X86_TBOOT_DYING, "x86/tboot:dying", NULL, tboot_dying_cpu); #ifdef CONFIG_DEBUG_FS debugfs_create_file("tboot_log", S_IRUSR, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ed04398f52c1..51ccfe08e32f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5855,7 +5855,7 @@ static void kvm_timer_init(void) } pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz); - cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "AP_X86_KVM_CLK_ONLINE", + cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "x86/kvm/clk:online", kvmclock_cpu_online, kvmclock_cpu_down_prep); } diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index ced7027b3fbc..51ef95232725 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1529,11 +1529,11 @@ static int xen_cpuhp_setup(void) int rc; rc = cpuhp_setup_state_nocalls(CPUHP_XEN_PREPARE, - "XEN_HVM_GUEST_PREPARE", + "x86/xen/hvm_guest:prepare", xen_cpu_up_prepare, xen_cpu_dead); if (rc >= 0) { rc = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, - "XEN_HVM_GUEST_ONLINE", + "x86/xen/hvm_guest:online", xen_cpu_up_online, NULL); if (rc < 0) cpuhp_remove_state_nocalls(CPUHP_XEN_PREPARE); diff --git a/arch/xtensa/kernel/perf_event.c b/arch/xtensa/kernel/perf_event.c index 0fecc8a2c0b5..ff1d81385ed7 100644 --- a/arch/xtensa/kernel/perf_event.c +++ b/arch/xtensa/kernel/perf_event.c @@ -422,7 +422,7 @@ static int __init xtensa_pmu_init(void) int irq = irq_create_mapping(NULL, XCHAL_PROFILING_INTERRUPT); ret = cpuhp_setup_state(CPUHP_AP_PERF_XTENSA_STARTING, - "AP_PERF_XTENSA_STARTING", xtensa_pmu_setup, + "perf/xtensa:starting", xtensa_pmu_setup, NULL); if (ret) { pr_err("xtensa_pmu: failed to register CPU-hotplug.\n"); diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c index 231633328dfa..c49da15d9790 100644 --- a/drivers/bus/arm-cci.c +++ b/drivers/bus/arm-cci.c @@ -1796,7 +1796,7 @@ static int __init cci_platform_init(void) int ret; ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_CCI_ONLINE, - "AP_PERF_ARM_CCI_ONLINE", NULL, + "perf/arm/cci:online", NULL, cci_pmu_offline_cpu); if (ret) return ret; diff --git a/drivers/bus/arm-ccn.c b/drivers/bus/arm-ccn.c index d1074d9b38ba..4d6a2b7e4d3f 100644 --- a/drivers/bus/arm-ccn.c +++ b/drivers/bus/arm-ccn.c @@ -1562,7 +1562,7 @@ static int __init arm_ccn_init(void) int i, ret; ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_CCN_ONLINE, - "AP_PERF_ARM_CCN_ONLINE", NULL, + "perf/arm/ccn:online", NULL, arm_ccn_pmu_offline_cpu); if (ret) return ret; @@ -1570,7 +1570,10 @@ static int __init arm_ccn_init(void) for (i = 0; i < ARRAY_SIZE(arm_ccn_pmu_events); i++) arm_ccn_pmu_events_attrs[i] = &arm_ccn_pmu_events[i].attr.attr; - return platform_driver_register(&arm_ccn_driver); + ret = platform_driver_register(&arm_ccn_driver); + if (ret) + cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_CCN_ONLINE); + return ret; } static void __exit arm_ccn_exit(void) diff --git a/drivers/clocksource/arc_timer.c b/drivers/clocksource/arc_timer.c index 3ea46343024f..7517f959cba7 100644 --- a/drivers/clocksource/arc_timer.c +++ b/drivers/clocksource/arc_timer.c @@ -309,7 +309,7 @@ static int __init arc_clockevent_setup(struct device_node *node) } ret = cpuhp_setup_state(CPUHP_AP_ARC_TIMER_STARTING, - "AP_ARC_TIMER_STARTING", + "clockevents/arc/timer:starting", arc_timer_starting_cpu, arc_timer_dying_cpu); if (ret) { diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index 394e417414d3..4c8c3fb2e8b2 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -738,7 +738,7 @@ static int __init arch_timer_register(void) /* Register and immediately configure the timer on the boot CPU */ err = cpuhp_setup_state(CPUHP_AP_ARM_ARCH_TIMER_STARTING, - "AP_ARM_ARCH_TIMER_STARTING", + "clockevents/arm/arch_timer:starting", arch_timer_starting_cpu, arch_timer_dying_cpu); if (err) goto out_unreg_cpupm; diff --git a/drivers/clocksource/arm_global_timer.c b/drivers/clocksource/arm_global_timer.c index 570cc58baec4..123ed20ac2ff 100644 --- a/drivers/clocksource/arm_global_timer.c +++ b/drivers/clocksource/arm_global_timer.c @@ -316,7 +316,7 @@ static int __init global_timer_of_register(struct device_node *np) goto out_irq; err = cpuhp_setup_state(CPUHP_AP_ARM_GLOBAL_TIMER_STARTING, - "AP_ARM_GLOBAL_TIMER_STARTING", + "clockevents/arm/global_timer:starting", gt_starting_cpu, gt_dying_cpu); if (err) goto out_irq; diff --git a/drivers/clocksource/dummy_timer.c b/drivers/clocksource/dummy_timer.c index 89f1c2edbe02..01f3f5a59bc6 100644 --- a/drivers/clocksource/dummy_timer.c +++ b/drivers/clocksource/dummy_timer.c @@ -34,7 +34,7 @@ static int dummy_timer_starting_cpu(unsigned int cpu) static int __init dummy_timer_register(void) { return cpuhp_setup_state(CPUHP_AP_DUMMY_TIMER_STARTING, - "AP_DUMMY_TIMER_STARTING", + "clockevents/dummy_timer:starting", dummy_timer_starting_cpu, NULL); } early_initcall(dummy_timer_register); diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c index c8b9f834f4de..4da1dc2278bd 100644 --- a/drivers/clocksource/exynos_mct.c +++ b/drivers/clocksource/exynos_mct.c @@ -552,7 +552,7 @@ static int __init exynos4_timer_resources(struct device_node *np, void __iomem * /* Install hotplug callbacks which configure the timer on this CPU */ err = cpuhp_setup_state(CPUHP_AP_EXYNOS4_MCT_TIMER_STARTING, - "AP_EXYNOS4_MCT_TIMER_STARTING", + "clockevents/exynos4/mct_timer:starting", exynos4_mct_starting_cpu, exynos4_mct_dying_cpu); if (err) diff --git a/drivers/clocksource/jcore-pit.c b/drivers/clocksource/jcore-pit.c index e90a6cfcb061..7c61226f4359 100644 --- a/drivers/clocksource/jcore-pit.c +++ b/drivers/clocksource/jcore-pit.c @@ -240,7 +240,7 @@ static int __init jcore_pit_init(struct device_node *node) } cpuhp_setup_state(CPUHP_AP_JCORE_TIMER_STARTING, - "AP_JCORE_TIMER_STARTING", + "clockevents/jcore:starting", jcore_pit_local_init, NULL); return 0; diff --git a/drivers/clocksource/metag_generic.c b/drivers/clocksource/metag_generic.c index 8d06a0f7ff26..6fcf96540631 100644 --- a/drivers/clocksource/metag_generic.c +++ b/drivers/clocksource/metag_generic.c @@ -154,6 +154,6 @@ int __init metag_generic_timer_init(void) /* Hook cpu boot to configure the CPU's timers */ return cpuhp_setup_state(CPUHP_AP_METAG_TIMER_STARTING, - "AP_METAG_TIMER_STARTING", + "clockevents/metag:starting", arch_timer_starting_cpu, NULL); } diff --git a/drivers/clocksource/mips-gic-timer.c b/drivers/clocksource/mips-gic-timer.c index 7b86d07c99b4..d9ef7a61e093 100644 --- a/drivers/clocksource/mips-gic-timer.c +++ b/drivers/clocksource/mips-gic-timer.c @@ -120,8 +120,8 @@ static int gic_clockevent_init(void) } cpuhp_setup_state(CPUHP_AP_MIPS_GIC_TIMER_STARTING, - "AP_MIPS_GIC_TIMER_STARTING", gic_starting_cpu, - gic_dying_cpu); + "clockevents/mips/gic/timer:starting", + gic_starting_cpu, gic_dying_cpu); return 0; } diff --git a/drivers/clocksource/qcom-timer.c b/drivers/clocksource/qcom-timer.c index d5d048d890d4..ee358cdf4a07 100644 --- a/drivers/clocksource/qcom-timer.c +++ b/drivers/clocksource/qcom-timer.c @@ -182,7 +182,7 @@ static int __init msm_timer_init(u32 dgt_hz, int sched_bits, int irq, } else { /* Install and invoke hotplug callbacks */ res = cpuhp_setup_state(CPUHP_AP_QCOM_TIMER_STARTING, - "AP_QCOM_TIMER_STARTING", + "clockevents/qcom/timer:starting", msm_local_timer_starting_cpu, msm_local_timer_dying_cpu); if (res) { diff --git a/drivers/clocksource/time-armada-370-xp.c b/drivers/clocksource/time-armada-370-xp.c index 3c39e6f45971..4440aefc59cd 100644 --- a/drivers/clocksource/time-armada-370-xp.c +++ b/drivers/clocksource/time-armada-370-xp.c @@ -320,7 +320,7 @@ static int __init armada_370_xp_timer_common_init(struct device_node *np) } res = cpuhp_setup_state(CPUHP_AP_ARMADA_TIMER_STARTING, - "AP_ARMADA_TIMER_STARTING", + "clockevents/armada:starting", armada_370_xp_timer_starting_cpu, armada_370_xp_timer_dying_cpu); if (res) { diff --git a/drivers/clocksource/timer-atlas7.c b/drivers/clocksource/timer-atlas7.c index db0f21e7d7d2..3d8a181f0252 100644 --- a/drivers/clocksource/timer-atlas7.c +++ b/drivers/clocksource/timer-atlas7.c @@ -221,7 +221,7 @@ static int __init sirfsoc_clockevent_init(void) /* Install and invoke hotplug callbacks */ return cpuhp_setup_state(CPUHP_AP_MARCO_TIMER_STARTING, - "AP_MARCO_TIMER_STARTING", + "clockevents/marco:starting", sirfsoc_local_timer_starting_cpu, sirfsoc_local_timer_dying_cpu); } diff --git a/drivers/hwtracing/coresight/coresight-etm3x.c b/drivers/hwtracing/coresight/coresight-etm3x.c index 3fe368b23d15..a51b6b64ecdf 100644 --- a/drivers/hwtracing/coresight/coresight-etm3x.c +++ b/drivers/hwtracing/coresight/coresight-etm3x.c @@ -804,10 +804,10 @@ static int etm_probe(struct amba_device *adev, const struct amba_id *id) if (!etm_count++) { cpuhp_setup_state_nocalls(CPUHP_AP_ARM_CORESIGHT_STARTING, - "AP_ARM_CORESIGHT_STARTING", + "arm/coresight:starting", etm_starting_cpu, etm_dying_cpu); ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, - "AP_ARM_CORESIGHT_ONLINE", + "arm/coresight:online", etm_online_cpu, NULL); if (ret < 0) goto err_arch_supported; diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c index 4db8d6a4d0cb..031480f2c34d 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.c +++ b/drivers/hwtracing/coresight/coresight-etm4x.c @@ -986,11 +986,11 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id) dev_err(dev, "ETM arch init failed\n"); if (!etm4_count++) { - cpuhp_setup_state_nocalls(CPUHP_AP_ARM_CORESIGHT4_STARTING, - "AP_ARM_CORESIGHT4_STARTING", + cpuhp_setup_state_nocalls(CPUHP_AP_ARM_CORESIGHT_STARTING, + "arm/coresight4:starting", etm4_starting_cpu, etm4_dying_cpu); ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, - "AP_ARM_CORESIGHT4_ONLINE", + "arm/coresight4:online", etm4_online_cpu, NULL); if (ret < 0) goto err_arch_supported; @@ -1037,7 +1037,7 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id) err_arch_supported: if (--etm4_count == 0) { - cpuhp_remove_state_nocalls(CPUHP_AP_ARM_CORESIGHT4_STARTING); + cpuhp_remove_state_nocalls(CPUHP_AP_ARM_CORESIGHT_STARTING); if (hp_online) cpuhp_remove_state_nocalls(hp_online); } diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c index 8bcee65a0b8c..eb0d4d41b156 100644 --- a/drivers/irqchip/irq-armada-370-xp.c +++ b/drivers/irqchip/irq-armada-370-xp.c @@ -578,13 +578,13 @@ static int __init armada_370_xp_mpic_of_init(struct device_node *node, #ifdef CONFIG_SMP set_smp_cross_call(armada_mpic_send_doorbell); cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_ARMADA_XP_STARTING, - "AP_IRQ_ARMADA_XP_STARTING", + "irqchip/armada/ipi:starting", armada_xp_mpic_starting_cpu, NULL); #endif } else { #ifdef CONFIG_SMP - cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_ARMADA_CASC_STARTING, - "AP_IRQ_ARMADA_CASC_STARTING", + cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_ARMADA_XP_STARTING, + "irqchip/armada/cascade:starting", mpic_cascaded_starting_cpu, NULL); #endif irq_set_chained_handler(parent_irq, diff --git a/drivers/irqchip/irq-bcm2836.c b/drivers/irqchip/irq-bcm2836.c index d96b2c947e74..e7463e3c0814 100644 --- a/drivers/irqchip/irq-bcm2836.c +++ b/drivers/irqchip/irq-bcm2836.c @@ -245,7 +245,7 @@ bcm2836_arm_irqchip_smp_init(void) #ifdef CONFIG_SMP /* Unmask IPIs to the boot CPU. */ cpuhp_setup_state(CPUHP_AP_IRQ_BCM2836_STARTING, - "AP_IRQ_BCM2836_STARTING", bcm2836_cpu_starting, + "irqchip/bcm2836:starting", bcm2836_cpu_starting, bcm2836_cpu_dying); set_smp_cross_call(bcm2836_arm_irqchip_send_ipi); diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 26e1d7fafb1e..c132f29322cc 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -632,9 +632,9 @@ static void gic_raise_softirq(const struct cpumask *mask, unsigned int irq) static void gic_smp_init(void) { set_smp_cross_call(gic_raise_softirq); - cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_GICV3_STARTING, - "AP_IRQ_GICV3_STARTING", gic_starting_cpu, - NULL); + cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_GIC_STARTING, + "irqchip/arm/gicv3:starting", + gic_starting_cpu, NULL); } static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val, diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index d6c404b3584d..1b1df4f770bd 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -1191,7 +1191,7 @@ static int __init __gic_init_bases(struct gic_chip_data *gic, set_smp_cross_call(gic_raise_softirq); #endif cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_GIC_STARTING, - "AP_IRQ_GIC_STARTING", + "irqchip/arm/gic:starting", gic_starting_cpu, NULL); set_handle_irq(gic_handle_irq); if (static_key_true(&supports_deactivate)) diff --git a/drivers/irqchip/irq-hip04.c b/drivers/irqchip/irq-hip04.c index 021b0e0833c1..c1b4ee955dbe 100644 --- a/drivers/irqchip/irq-hip04.c +++ b/drivers/irqchip/irq-hip04.c @@ -407,7 +407,7 @@ hip04_of_init(struct device_node *node, struct device_node *parent) set_handle_irq(hip04_handle_irq); hip04_irq_dist_init(&hip04_data); - cpuhp_setup_state(CPUHP_AP_IRQ_HIP04_STARTING, "AP_IRQ_HIP04_STARTING", + cpuhp_setup_state(CPUHP_AP_IRQ_HIP04_STARTING, "irqchip/hip04:starting", hip04_irq_starting_cpu, NULL); return 0; } diff --git a/drivers/leds/trigger/ledtrig-cpu.c b/drivers/leds/trigger/ledtrig-cpu.c index 9719caf7437c..a41896468cb3 100644 --- a/drivers/leds/trigger/ledtrig-cpu.c +++ b/drivers/leds/trigger/ledtrig-cpu.c @@ -127,7 +127,7 @@ static int __init ledtrig_cpu_init(void) register_syscore_ops(&ledtrig_cpu_syscore_ops); - ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "AP_LEDTRIG_STARTING", + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "leds/trigger:starting", ledtrig_online_cpu, ledtrig_prepare_down_cpu); if (ret < 0) pr_err("CPU hotplug notifier for ledtrig-cpu could not be registered: %d\n", diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 5deeda61d6d3..4a105006ca63 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2484,13 +2484,13 @@ static __init int virtio_net_driver_init(void) { int ret; - ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "AP_VIRT_NET_ONLINE", + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "virtio/net:online", virtnet_cpu_online, virtnet_cpu_down_prep); if (ret < 0) goto out; virtionet_online = ret; - ret = cpuhp_setup_state_multi(CPUHP_VIRT_NET_DEAD, "VIRT_NET_DEAD", + ret = cpuhp_setup_state_multi(CPUHP_VIRT_NET_DEAD, "virtio/net:dead", NULL, virtnet_cpu_dead); if (ret) goto err_dead; diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index b37b57294566..6d9335865880 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -1084,7 +1084,7 @@ static int arm_pmu_hp_init(void) int ret; ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_STARTING, - "AP_PERF_ARM_STARTING", + "perf/arm/pmu:starting", arm_perf_starting_cpu, NULL); if (ret) pr_err("CPU hotplug notifier for ARM PMU could not be registered: %d\n", diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c index 0990130821fa..c639d5a02656 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c +++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c @@ -127,13 +127,6 @@ module_param_named(log_fka, bnx2fc_log_fka, uint, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(log_fka, " Print message to kernel log when fcoe is " "initiating a FIP keep alive when debug logging is enabled."); -static int bnx2fc_cpu_callback(struct notifier_block *nfb, - unsigned long action, void *hcpu); -/* notification function for CPU hotplug events */ -static struct notifier_block bnx2fc_cpu_notifier = { - .notifier_call = bnx2fc_cpu_callback, -}; - static inline struct net_device *bnx2fc_netdev(const struct fc_lport *lport) { return ((struct bnx2fc_interface *) @@ -2622,37 +2615,19 @@ static void bnx2fc_percpu_thread_destroy(unsigned int cpu) kthread_stop(thread); } -/** - * bnx2fc_cpu_callback - Handler for CPU hotplug events - * - * @nfb: The callback data block - * @action: The event triggering the callback - * @hcpu: The index of the CPU that the event is for - * - * This creates or destroys per-CPU data for fcoe - * - * Returns NOTIFY_OK always. - */ -static int bnx2fc_cpu_callback(struct notifier_block *nfb, - unsigned long action, void *hcpu) + +static int bnx2fc_cpu_online(unsigned int cpu) { - unsigned cpu = (unsigned long)hcpu; + printk(PFX "CPU %x online: Create Rx thread\n", cpu); + bnx2fc_percpu_thread_create(cpu); + return 0; +} - switch (action) { - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - printk(PFX "CPU %x online: Create Rx thread\n", cpu); - bnx2fc_percpu_thread_create(cpu); - break; - case CPU_DEAD: - case CPU_DEAD_FROZEN: - printk(PFX "CPU %x offline: Remove Rx thread\n", cpu); - bnx2fc_percpu_thread_destroy(cpu); - break; - default: - break; - } - return NOTIFY_OK; +static int bnx2fc_cpu_dead(unsigned int cpu) +{ + printk(PFX "CPU %x offline: Remove Rx thread\n", cpu); + bnx2fc_percpu_thread_destroy(cpu); + return 0; } static int bnx2fc_slave_configure(struct scsi_device *sdev) @@ -2664,6 +2639,8 @@ static int bnx2fc_slave_configure(struct scsi_device *sdev) return 0; } +static enum cpuhp_state bnx2fc_online_state; + /** * bnx2fc_mod_init - module init entry point * @@ -2724,21 +2701,31 @@ static int __init bnx2fc_mod_init(void) spin_lock_init(&p->fp_work_lock); } - cpu_notifier_register_begin(); + get_online_cpus(); - for_each_online_cpu(cpu) { + for_each_online_cpu(cpu) bnx2fc_percpu_thread_create(cpu); - } - /* Initialize per CPU interrupt thread */ - __register_hotcpu_notifier(&bnx2fc_cpu_notifier); + rc = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, + "scsi/bnx2fc:online", + bnx2fc_cpu_online, NULL); + if (rc < 0) + goto stop_threads; + bnx2fc_online_state = rc; - cpu_notifier_register_done(); + cpuhp_setup_state_nocalls(CPUHP_SCSI_BNX2FC_DEAD, "scsi/bnx2fc:dead", + NULL, bnx2fc_cpu_dead); + put_online_cpus(); cnic_register_driver(CNIC_ULP_FCOE, &bnx2fc_cnic_cb); return 0; +stop_threads: + for_each_online_cpu(cpu) + bnx2fc_percpu_thread_destroy(cpu); + put_online_cpus(); + kthread_stop(l2_thread); free_wq: destroy_workqueue(bnx2fc_wq); release_bt: @@ -2797,16 +2784,16 @@ static void __exit bnx2fc_mod_exit(void) if (l2_thread) kthread_stop(l2_thread); - cpu_notifier_register_begin(); - + get_online_cpus(); /* Destroy per cpu threads */ for_each_online_cpu(cpu) { bnx2fc_percpu_thread_destroy(cpu); } - __unregister_hotcpu_notifier(&bnx2fc_cpu_notifier); + cpuhp_remove_state_nocalls(bnx2fc_online_state); + cpuhp_remove_state_nocalls(CPUHP_SCSI_BNX2FC_DEAD); - cpu_notifier_register_done(); + put_online_cpus(); destroy_workqueue(bnx2fc_wq); /* diff --git a/drivers/scsi/bnx2i/bnx2i_init.c b/drivers/scsi/bnx2i/bnx2i_init.c index c8b410c24cf0..86afc002814c 100644 --- a/drivers/scsi/bnx2i/bnx2i_init.c +++ b/drivers/scsi/bnx2i/bnx2i_init.c @@ -70,14 +70,6 @@ u64 iscsi_error_mask = 0x00; DEFINE_PER_CPU(struct bnx2i_percpu_s, bnx2i_percpu); -static int bnx2i_cpu_callback(struct notifier_block *nfb, - unsigned long action, void *hcpu); -/* notification function for CPU hotplug events */ -static struct notifier_block bnx2i_cpu_notifier = { - .notifier_call = bnx2i_cpu_callback, -}; - - /** * bnx2i_identify_device - identifies NetXtreme II device type * @hba: Adapter structure pointer @@ -461,41 +453,21 @@ static void bnx2i_percpu_thread_destroy(unsigned int cpu) kthread_stop(thread); } - -/** - * bnx2i_cpu_callback - Handler for CPU hotplug events - * - * @nfb: The callback data block - * @action: The event triggering the callback - * @hcpu: The index of the CPU that the event is for - * - * This creates or destroys per-CPU data for iSCSI - * - * Returns NOTIFY_OK always. - */ -static int bnx2i_cpu_callback(struct notifier_block *nfb, - unsigned long action, void *hcpu) +static int bnx2i_cpu_online(unsigned int cpu) { - unsigned cpu = (unsigned long)hcpu; + pr_info("bnx2i: CPU %x online: Create Rx thread\n", cpu); + bnx2i_percpu_thread_create(cpu); + return 0; +} - switch (action) { - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - printk(KERN_INFO "bnx2i: CPU %x online: Create Rx thread\n", - cpu); - bnx2i_percpu_thread_create(cpu); - break; - case CPU_DEAD: - case CPU_DEAD_FROZEN: - printk(KERN_INFO "CPU %x offline: Remove Rx thread\n", cpu); - bnx2i_percpu_thread_destroy(cpu); - break; - default: - break; - } - return NOTIFY_OK; +static int bnx2i_cpu_dead(unsigned int cpu) +{ + pr_info("CPU %x offline: Remove Rx thread\n", cpu); + bnx2i_percpu_thread_destroy(cpu); + return 0; } +static enum cpuhp_state bnx2i_online_state; /** * bnx2i_mod_init - module init entry point @@ -539,18 +511,28 @@ static int __init bnx2i_mod_init(void) p->iothread = NULL; } - cpu_notifier_register_begin(); + get_online_cpus(); for_each_online_cpu(cpu) bnx2i_percpu_thread_create(cpu); - /* Initialize per CPU interrupt thread */ - __register_hotcpu_notifier(&bnx2i_cpu_notifier); - - cpu_notifier_register_done(); + err = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, + "scsi/bnx2i:online", + bnx2i_cpu_online, NULL); + if (err < 0) + goto remove_threads; + bnx2i_online_state = err; + cpuhp_setup_state_nocalls(CPUHP_SCSI_BNX2I_DEAD, "scsi/bnx2i:dead", + NULL, bnx2i_cpu_dead); + put_online_cpus(); return 0; +remove_threads: + for_each_online_cpu(cpu) + bnx2i_percpu_thread_destroy(cpu); + put_online_cpus(); + cnic_unregister_driver(CNIC_ULP_ISCSI); unreg_xport: iscsi_unregister_transport(&bnx2i_iscsi_transport); out: @@ -587,14 +569,14 @@ static void __exit bnx2i_mod_exit(void) } mutex_unlock(&bnx2i_dev_lock); - cpu_notifier_register_begin(); + get_online_cpus(); for_each_online_cpu(cpu) bnx2i_percpu_thread_destroy(cpu); - __unregister_hotcpu_notifier(&bnx2i_cpu_notifier); - - cpu_notifier_register_done(); + cpuhp_remove_state_nocalls(bnx2i_online_state); + cpuhp_remove_state_nocalls(CPUHP_SCSI_BNX2I_DEAD); + put_online_cpus(); iscsi_unregister_transport(&bnx2i_iscsi_transport); cnic_unregister_driver(CNIC_ULP_ISCSI); diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index 19ead8d17e55..5eda21d903e9 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -1612,30 +1612,29 @@ static int qedi_percpu_io_thread(void *arg) return 0; } -static void qedi_percpu_thread_create(unsigned int cpu) +static int qedi_cpu_online(unsigned int cpu) { - struct qedi_percpu_s *p; + struct qedi_percpu_s *p = this_cpu_ptr(&qedi_percpu); struct task_struct *thread; - p = &per_cpu(qedi_percpu, cpu); - thread = kthread_create_on_node(qedi_percpu_io_thread, (void *)p, cpu_to_node(cpu), "qedi_thread/%d", cpu); - if (likely(!IS_ERR(thread))) { - kthread_bind(thread, cpu); - p->iothread = thread; - wake_up_process(thread); - } + if (IS_ERR(thread)) + return PTR_ERR(thread); + + kthread_bind(thread, cpu); + p->iothread = thread; + wake_up_process(thread); + return 0; } -static void qedi_percpu_thread_destroy(unsigned int cpu) +static int qedi_cpu_offline(unsigned int cpu) { - struct qedi_percpu_s *p; - struct task_struct *thread; + struct qedi_percpu_s *p = this_cpu_ptr(&qedi_percpu); struct qedi_work *work, *tmp; + struct task_struct *thread; - p = &per_cpu(qedi_percpu, cpu); spin_lock_bh(&p->p_work_lock); thread = p->iothread; p->iothread = NULL; @@ -1650,35 +1649,9 @@ static void qedi_percpu_thread_destroy(unsigned int cpu) spin_unlock_bh(&p->p_work_lock); if (thread) kthread_stop(thread); + return 0; } -static int qedi_cpu_callback(struct notifier_block *nfb, - unsigned long action, void *hcpu) -{ - unsigned int cpu = (unsigned long)hcpu; - - switch (action) { - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - QEDI_ERR(NULL, "CPU %d online.\n", cpu); - qedi_percpu_thread_create(cpu); - break; - case CPU_DEAD: - case CPU_DEAD_FROZEN: - QEDI_ERR(NULL, "CPU %d offline.\n", cpu); - qedi_percpu_thread_destroy(cpu); - break; - default: - break; - } - - return NOTIFY_OK; -} - -static struct notifier_block qedi_cpu_notifier = { - .notifier_call = qedi_cpu_callback, -}; - void qedi_reset_host_mtu(struct qedi_ctx *qedi, u16 mtu) { struct qed_ll2_params params; @@ -2038,6 +2011,8 @@ static struct pci_device_id qedi_pci_tbl[] = { }; MODULE_DEVICE_TABLE(pci, qedi_pci_tbl); +static enum cpuhp_state qedi_cpuhp_state; + static struct pci_driver qedi_pci_driver = { .name = QEDI_MODULE_NAME, .id_table = qedi_pci_tbl, @@ -2047,16 +2022,13 @@ static struct pci_driver qedi_pci_driver = { static int __init qedi_init(void) { - int rc = 0; - int ret; struct qedi_percpu_s *p; - unsigned int cpu = 0; + int cpu, rc = 0; qedi_ops = qed_get_iscsi_ops(); if (!qedi_ops) { QEDI_ERR(NULL, "Failed to get qed iSCSI operations\n"); - rc = -EINVAL; - goto exit_qedi_init_0; + return -EINVAL; } #ifdef CONFIG_DEBUG_FS @@ -2070,15 +2042,6 @@ static int __init qedi_init(void) goto exit_qedi_init_1; } - register_hotcpu_notifier(&qedi_cpu_notifier); - - ret = pci_register_driver(&qedi_pci_driver); - if (ret) { - QEDI_ERR(NULL, "Failed to register driver\n"); - rc = -EINVAL; - goto exit_qedi_init_2; - } - for_each_possible_cpu(cpu) { p = &per_cpu(qedi_percpu, cpu); INIT_LIST_HEAD(&p->work_list); @@ -2086,11 +2049,22 @@ static int __init qedi_init(void) p->iothread = NULL; } - for_each_online_cpu(cpu) - qedi_percpu_thread_create(cpu); + rc = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "scsi/qedi:online", + qedi_cpu_online, qedi_cpu_offline); + if (rc < 0) + goto exit_qedi_init_2; + qedi_cpuhp_state = rc; - return rc; + rc = pci_register_driver(&qedi_pci_driver); + if (rc) { + QEDI_ERR(NULL, "Failed to register driver\n"); + goto exit_qedi_hp; + } + + return 0; +exit_qedi_hp: + cpuhp_remove_state(qedi_cpuhp_state); exit_qedi_init_2: iscsi_unregister_transport(&qedi_iscsi_transport); exit_qedi_init_1: @@ -2098,19 +2072,13 @@ exit_qedi_init_1: qedi_dbg_exit(); #endif qed_put_iscsi_ops(); -exit_qedi_init_0: return rc; } static void __exit qedi_cleanup(void) { - unsigned int cpu = 0; - - for_each_online_cpu(cpu) - qedi_percpu_thread_destroy(cpu); - pci_unregister_driver(&qedi_pci_driver); - unregister_hotcpu_notifier(&qedi_cpu_notifier); + cpuhp_remove_state(qedi_cpuhp_state); iscsi_unregister_transport(&qedi_iscsi_transport); #ifdef CONFIG_DEBUG_FS diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c index 6b9cf06e8df2..427e2198bb9e 100644 --- a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c @@ -967,48 +967,38 @@ cfs_cpt_table_create_pattern(char *pattern) } #ifdef CONFIG_HOTPLUG_CPU -static int -cfs_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) -{ - unsigned int cpu = (unsigned long)hcpu; - bool warn; - - switch (action) { - case CPU_DEAD: - case CPU_DEAD_FROZEN: - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - spin_lock(&cpt_data.cpt_lock); - cpt_data.cpt_version++; - spin_unlock(&cpt_data.cpt_lock); - /* Fall through */ - default: - if (action != CPU_DEAD && action != CPU_DEAD_FROZEN) { - CDEBUG(D_INFO, "CPU changed [cpu %u action %lx]\n", - cpu, action); - break; - } +static enum cpuhp_state lustre_cpu_online; - mutex_lock(&cpt_data.cpt_mutex); - /* if all HTs in a core are offline, it may break affinity */ - cpumask_copy(cpt_data.cpt_cpumask, - topology_sibling_cpumask(cpu)); - warn = cpumask_any_and(cpt_data.cpt_cpumask, - cpu_online_mask) >= nr_cpu_ids; - mutex_unlock(&cpt_data.cpt_mutex); - CDEBUG(warn ? D_WARNING : D_INFO, - "Lustre: can't support CPU plug-out well now, performance and stability could be impacted [CPU %u action: %lx]\n", - cpu, action); - } +static void cfs_cpu_incr_cpt_version(void) +{ + spin_lock(&cpt_data.cpt_lock); + cpt_data.cpt_version++; + spin_unlock(&cpt_data.cpt_lock); +} - return NOTIFY_OK; +static int cfs_cpu_online(unsigned int cpu) +{ + cfs_cpu_incr_cpt_version(); + return 0; } -static struct notifier_block cfs_cpu_notifier = { - .notifier_call = cfs_cpu_notify, - .priority = 0 -}; +static int cfs_cpu_dead(unsigned int cpu) +{ + bool warn; + + cfs_cpu_incr_cpt_version(); + mutex_lock(&cpt_data.cpt_mutex); + /* if all HTs in a core are offline, it may break affinity */ + cpumask_copy(cpt_data.cpt_cpumask, topology_sibling_cpumask(cpu)); + warn = cpumask_any_and(cpt_data.cpt_cpumask, + cpu_online_mask) >= nr_cpu_ids; + mutex_unlock(&cpt_data.cpt_mutex); + CDEBUG(warn ? D_WARNING : D_INFO, + "Lustre: can't support CPU plug-out well now, performance and stability could be impacted [CPU %u]\n", + cpu); + return 0; +} #endif void @@ -1018,7 +1008,9 @@ cfs_cpu_fini(void) cfs_cpt_table_free(cfs_cpt_table); #ifdef CONFIG_HOTPLUG_CPU - unregister_hotcpu_notifier(&cfs_cpu_notifier); + if (lustre_cpu_online > 0) + cpuhp_remove_state_nocalls(lustre_cpu_online); + cpuhp_remove_state_nocalls(CPUHP_LUSTRE_CFS_DEAD); #endif if (cpt_data.cpt_cpumask) LIBCFS_FREE(cpt_data.cpt_cpumask, cpumask_size()); @@ -1027,6 +1019,8 @@ cfs_cpu_fini(void) int cfs_cpu_init(void) { + int ret = 0; + LASSERT(!cfs_cpt_table); memset(&cpt_data, 0, sizeof(cpt_data)); @@ -1041,8 +1035,19 @@ cfs_cpu_init(void) mutex_init(&cpt_data.cpt_mutex); #ifdef CONFIG_HOTPLUG_CPU - register_hotcpu_notifier(&cfs_cpu_notifier); + ret = cpuhp_setup_state_nocalls(CPUHP_LUSTRE_CFS_DEAD, + "staging/lustre/cfe:dead", NULL, + cfs_cpu_dead); + if (ret < 0) + goto failed; + ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, + "staging/lustre/cfe:online", + cfs_cpu_online, NULL); + if (ret < 0) + goto failed; + lustre_cpu_online = ret; #endif + ret = -EINVAL; if (*cpu_pattern) { cfs_cpt_table = cfs_cpt_table_create_pattern(cpu_pattern); @@ -1075,7 +1080,7 @@ cfs_cpu_init(void) failed: cfs_cpu_fini(); - return -1; + return ret; } #endif diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c index 7ef27c6ed72f..c03f9c86c7e3 100644 --- a/drivers/xen/events/events_fifo.c +++ b/drivers/xen/events/events_fifo.c @@ -445,7 +445,7 @@ int __init xen_evtchn_fifo_init(void) evtchn_ops = &evtchn_ops_fifo; cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE, - "CPUHP_XEN_EVTCHN_PREPARE", + "xen/evtchn:prepare", xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead); out: put_cpu(); diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 09807c2ce328..21f9c74496e7 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -57,9 +57,6 @@ struct notifier_block; #define CPU_ONLINE 0x0002 /* CPU (unsigned)v is up */ #define CPU_UP_PREPARE 0x0003 /* CPU (unsigned)v coming up */ -#define CPU_UP_CANCELED 0x0004 /* CPU (unsigned)v NOT coming up */ -#define CPU_DOWN_PREPARE 0x0005 /* CPU (unsigned)v going down */ -#define CPU_DOWN_FAILED 0x0006 /* CPU (unsigned)v NOT going down */ #define CPU_DEAD 0x0007 /* CPU (unsigned)v dead */ #define CPU_POST_DEAD 0x0009 /* CPU (unsigned)v dead, cpu_hotplug * lock is dropped */ @@ -80,80 +77,14 @@ struct notifier_block; #ifdef CONFIG_SMP extern bool cpuhp_tasks_frozen; -/* Need to know about CPUs going up/down? */ -#if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) -#define cpu_notifier(fn, pri) { \ - static struct notifier_block fn##_nb = \ - { .notifier_call = fn, .priority = pri }; \ - register_cpu_notifier(&fn##_nb); \ -} - -#define __cpu_notifier(fn, pri) { \ - static struct notifier_block fn##_nb = \ - { .notifier_call = fn, .priority = pri }; \ - __register_cpu_notifier(&fn##_nb); \ -} - -extern int register_cpu_notifier(struct notifier_block *nb); -extern int __register_cpu_notifier(struct notifier_block *nb); -extern void unregister_cpu_notifier(struct notifier_block *nb); -extern void __unregister_cpu_notifier(struct notifier_block *nb); - -#else /* #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) */ -#define cpu_notifier(fn, pri) do { (void)(fn); } while (0) -#define __cpu_notifier(fn, pri) do { (void)(fn); } while (0) - -static inline int register_cpu_notifier(struct notifier_block *nb) -{ - return 0; -} - -static inline int __register_cpu_notifier(struct notifier_block *nb) -{ - return 0; -} - -static inline void unregister_cpu_notifier(struct notifier_block *nb) -{ -} - -static inline void __unregister_cpu_notifier(struct notifier_block *nb) -{ -} -#endif - int cpu_up(unsigned int cpu); void notify_cpu_starting(unsigned int cpu); extern void cpu_maps_update_begin(void); extern void cpu_maps_update_done(void); -#define cpu_notifier_register_begin cpu_maps_update_begin -#define cpu_notifier_register_done cpu_maps_update_done - #else /* CONFIG_SMP */ #define cpuhp_tasks_frozen 0 -#define cpu_notifier(fn, pri) do { (void)(fn); } while (0) -#define __cpu_notifier(fn, pri) do { (void)(fn); } while (0) - -static inline int register_cpu_notifier(struct notifier_block *nb) -{ - return 0; -} - -static inline int __register_cpu_notifier(struct notifier_block *nb) -{ - return 0; -} - -static inline void unregister_cpu_notifier(struct notifier_block *nb) -{ -} - -static inline void __unregister_cpu_notifier(struct notifier_block *nb) -{ -} - static inline void cpu_maps_update_begin(void) { } @@ -162,14 +93,6 @@ static inline void cpu_maps_update_done(void) { } -static inline void cpu_notifier_register_begin(void) -{ -} - -static inline void cpu_notifier_register_done(void) -{ -} - #endif /* CONFIG_SMP */ extern struct bus_type cpu_subsys; @@ -182,12 +105,6 @@ extern void get_online_cpus(void); extern void put_online_cpus(void); extern void cpu_hotplug_disable(void); extern void cpu_hotplug_enable(void); -#define hotcpu_notifier(fn, pri) cpu_notifier(fn, pri) -#define __hotcpu_notifier(fn, pri) __cpu_notifier(fn, pri) -#define register_hotcpu_notifier(nb) register_cpu_notifier(nb) -#define __register_hotcpu_notifier(nb) __register_cpu_notifier(nb) -#define unregister_hotcpu_notifier(nb) unregister_cpu_notifier(nb) -#define __unregister_hotcpu_notifier(nb) __unregister_cpu_notifier(nb) void clear_tasks_mm_cpumask(int cpu); int cpu_down(unsigned int cpu); @@ -199,13 +116,6 @@ static inline void cpu_hotplug_done(void) {} #define put_online_cpus() do { } while (0) #define cpu_hotplug_disable() do { } while (0) #define cpu_hotplug_enable() do { } while (0) -#define hotcpu_notifier(fn, pri) do { (void)(fn); } while (0) -#define __hotcpu_notifier(fn, pri) do { (void)(fn); } while (0) -/* These aren't inline functions due to a GCC bug. */ -#define register_hotcpu_notifier(nb) ({ (void)(nb); 0; }) -#define __register_hotcpu_notifier(nb) ({ (void)(nb); 0; }) -#define unregister_hotcpu_notifier(nb) ({ (void)(nb); }) -#define __unregister_hotcpu_notifier(nb) ({ (void)(nb); }) #endif /* CONFIG_HOTPLUG_CPU */ #ifdef CONFIG_PM_SLEEP_SMP diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 2ab7bf53d529..20bfefbe7594 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -41,6 +41,9 @@ enum cpuhp_state { CPUHP_NET_DEV_DEAD, CPUHP_PCI_XGENE_DEAD, CPUHP_IOMMU_INTEL_DEAD, + CPUHP_LUSTRE_CFS_DEAD, + CPUHP_SCSI_BNX2FC_DEAD, + CPUHP_SCSI_BNX2I_DEAD, CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, CPUHP_HRTIMERS_PREPARE, @@ -56,7 +59,6 @@ enum cpuhp_state { CPUHP_POWERPC_MMU_CTX_PREPARE, CPUHP_XEN_PREPARE, CPUHP_XEN_EVTCHN_PREPARE, - CPUHP_NOTIFY_PREPARE, CPUHP_ARM_SHMOBILE_SCU_PREPARE, CPUHP_SH_SH3X_PREPARE, CPUHP_BLK_MQ_PREPARE, @@ -71,7 +73,6 @@ enum cpuhp_state { CPUHP_KVM_PPC_BOOK3S_PREPARE, CPUHP_ZCOMP_PREPARE, CPUHP_TIMERS_DEAD, - CPUHP_NOTF_ERR_INJ_PREPARE, CPUHP_MIPS_SOC_PREPARE, CPUHP_BRINGUP_CPU, CPUHP_AP_IDLE_DEAD, @@ -79,10 +80,8 @@ enum cpuhp_state { CPUHP_AP_SCHED_STARTING, CPUHP_AP_RCUTREE_DYING, CPUHP_AP_IRQ_GIC_STARTING, - CPUHP_AP_IRQ_GICV3_STARTING, CPUHP_AP_IRQ_HIP04_STARTING, CPUHP_AP_IRQ_ARMADA_XP_STARTING, - CPUHP_AP_IRQ_ARMADA_CASC_STARTING, CPUHP_AP_IRQ_BCM2836_STARTING, CPUHP_AP_ARM_MVEBU_COHERENCY, CPUHP_AP_PERF_X86_UNCORE_STARTING, @@ -118,7 +117,6 @@ enum cpuhp_state { CPUHP_AP_DUMMY_TIMER_STARTING, CPUHP_AP_ARM_XEN_STARTING, CPUHP_AP_ARM_CORESIGHT_STARTING, - CPUHP_AP_ARM_CORESIGHT4_STARTING, CPUHP_AP_ARM64_ISNDEP_STARTING, CPUHP_AP_SMPCFD_DYING, CPUHP_AP_X86_TBOOT_DYING, @@ -142,7 +140,6 @@ enum cpuhp_state { CPUHP_AP_PERF_ARM_L2X0_ONLINE, CPUHP_AP_WORKQUEUE_ONLINE, CPUHP_AP_RCUTREE_ONLINE, - CPUHP_AP_NOTIFY_ONLINE, CPUHP_AP_ONLINE_DYN, CPUHP_AP_ONLINE_DYN_END = CPUHP_AP_ONLINE_DYN + 30, CPUHP_AP_X86_HPET_ONLINE, diff --git a/include/linux/mm.h b/include/linux/mm.h index 4424784ac374..fe6b4036664a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1758,6 +1758,8 @@ static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd) return ptl; } +extern void __init pagecache_init(void); + extern void free_area_init(unsigned long * zones_size); extern void free_area_init_node(int nid, unsigned long * zones_size, unsigned long zone_start_pfn, unsigned long *zholes_size); diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 74e4dda91238..c56b39890a41 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -73,6 +73,7 @@ */ enum pageflags { PG_locked, /* Page is locked. Don't touch. */ + PG_waiters, /* Page has waiters, check its waitqueue */ PG_error, PG_referenced, PG_uptodate, @@ -87,7 +88,6 @@ enum pageflags { PG_private_2, /* If pagecache, has fs aux data */ PG_writeback, /* Page is under writeback */ PG_head, /* A head page */ - PG_swapcache, /* Swap page: swp_entry_t in private */ PG_mappedtodisk, /* Has blocks allocated on-disk */ PG_reclaim, /* To be reclaimed asap */ PG_swapbacked, /* Page is backed by RAM/swap */ @@ -110,6 +110,9 @@ enum pageflags { /* Filesystems */ PG_checked = PG_owner_priv_1, + /* SwapBacked */ + PG_swapcache = PG_owner_priv_1, /* Swap page: swp_entry_t in private */ + /* Two page bits are conscripted by FS-Cache to maintain local caching * state. These bits are set on pages belonging to the netfs's inodes * when those inodes are being locally cached. @@ -167,6 +170,9 @@ static __always_inline int PageCompound(struct page *page) * for compound page all operations related to the page flag applied to * head page. * + * PF_ONLY_HEAD: + * for compound page, callers only ever operate on the head page. + * * PF_NO_TAIL: * modifications of the page flag must be done on small or head pages, * checks can be done on tail pages too. @@ -176,6 +182,9 @@ static __always_inline int PageCompound(struct page *page) */ #define PF_ANY(page, enforce) page #define PF_HEAD(page, enforce) compound_head(page) +#define PF_ONLY_HEAD(page, enforce) ({ \ + VM_BUG_ON_PGFLAGS(PageTail(page), page); \ + page;}) #define PF_NO_TAIL(page, enforce) ({ \ VM_BUG_ON_PGFLAGS(enforce && PageTail(page), page); \ compound_head(page);}) @@ -253,6 +262,7 @@ static inline int TestClearPage##uname(struct page *page) { return 0; } TESTSETFLAG_FALSE(uname) TESTCLEARFLAG_FALSE(uname) __PAGEFLAG(Locked, locked, PF_NO_TAIL) +PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) __CLEARPAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) PAGEFLAG(Error, error, PF_NO_COMPOUND) TESTCLEARFLAG(Error, error, PF_NO_COMPOUND) PAGEFLAG(Referenced, referenced, PF_HEAD) TESTCLEARFLAG(Referenced, referenced, PF_HEAD) @@ -314,7 +324,13 @@ PAGEFLAG_FALSE(HighMem) #endif #ifdef CONFIG_SWAP -PAGEFLAG(SwapCache, swapcache, PF_NO_COMPOUND) +static __always_inline int PageSwapCache(struct page *page) +{ + return PageSwapBacked(page) && test_bit(PG_swapcache, &page->flags); + +} +SETPAGEFLAG(SwapCache, swapcache, PF_NO_COMPOUND) +CLEARPAGEFLAG(SwapCache, swapcache, PF_NO_COMPOUND) #else PAGEFLAG_FALSE(SwapCache) #endif @@ -701,12 +717,12 @@ static inline void ClearPageSlabPfmemalloc(struct page *page) * Flags checked when a page is freed. Pages being freed should not have * these flags set. It they are, there is a problem. */ -#define PAGE_FLAGS_CHECK_AT_FREE \ - (1UL << PG_lru | 1UL << PG_locked | \ - 1UL << PG_private | 1UL << PG_private_2 | \ - 1UL << PG_writeback | 1UL << PG_reserved | \ - 1UL << PG_slab | 1UL << PG_swapcache | 1UL << PG_active | \ - 1UL << PG_unevictable | __PG_MLOCKED) +#define PAGE_FLAGS_CHECK_AT_FREE \ + (1UL << PG_lru | 1UL << PG_locked | \ + 1UL << PG_private | 1UL << PG_private_2 | \ + 1UL << PG_writeback | 1UL << PG_reserved | \ + 1UL << PG_slab | 1UL << PG_active | \ + 1UL << PG_unevictable | __PG_MLOCKED) /* * Flags checked when a page is prepped for return by the page allocator. @@ -735,6 +751,7 @@ static inline int page_has_private(struct page *page) #undef PF_ANY #undef PF_HEAD +#undef PF_ONLY_HEAD #undef PF_NO_TAIL #undef PF_NO_COMPOUND #endif /* !__GENERATING_BOUNDS_H */ diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index f29f80f81dbf..324c8dbad1e1 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -486,22 +486,14 @@ static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm, * and for filesystems which need to wait on PG_private. */ extern void wait_on_page_bit(struct page *page, int bit_nr); - extern int wait_on_page_bit_killable(struct page *page, int bit_nr); -extern int wait_on_page_bit_killable_timeout(struct page *page, - int bit_nr, unsigned long timeout); - -static inline int wait_on_page_locked_killable(struct page *page) -{ - if (!PageLocked(page)) - return 0; - return wait_on_page_bit_killable(compound_head(page), PG_locked); -} +extern void wake_up_page_bit(struct page *page, int bit_nr); -extern wait_queue_head_t *page_waitqueue(struct page *page); static inline void wake_up_page(struct page *page, int bit) { - __wake_up_bit(page_waitqueue(page), &page->flags, bit); + if (!PageWaiters(page)) + return; + wake_up_page_bit(page, bit); } /* @@ -517,6 +509,13 @@ static inline void wait_on_page_locked(struct page *page) wait_on_page_bit(compound_head(page), PG_locked); } +static inline int wait_on_page_locked_killable(struct page *page) +{ + if (!PageLocked(page)) + return 0; + return wait_on_page_bit_killable(compound_head(page), PG_locked); +} + /* * Wait for a page to complete writeback */ diff --git a/include/linux/writeback.h b/include/linux/writeback.h index c78f9f0920b5..5527d910ba3d 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -375,7 +375,6 @@ void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty); unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh); void wb_update_bandwidth(struct bdi_writeback *wb, unsigned long start_time); -void page_writeback_init(void); void balance_dirty_pages_ratelimited(struct address_space *mapping); bool wb_over_bg_thresh(struct bdi_writeback *wb); diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index 5a81ab48a2fb..9e687ca9a307 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -81,6 +81,7 @@ #define __def_pageflag_names \ {1UL << PG_locked, "locked" }, \ + {1UL << PG_waiters, "waiters" }, \ {1UL << PG_error, "error" }, \ {1UL << PG_referenced, "referenced" }, \ {1UL << PG_uptodate, "uptodate" }, \ @@ -95,7 +96,6 @@ {1UL << PG_private_2, "private_2" }, \ {1UL << PG_writeback, "writeback" }, \ {1UL << PG_head, "head" }, \ - {1UL << PG_swapcache, "swapcache" }, \ {1UL << PG_mappedtodisk, "mappedtodisk" }, \ {1UL << PG_reclaim, "reclaim" }, \ {1UL << PG_swapbacked, "swapbacked" }, \ diff --git a/init/main.c b/init/main.c index c81c9fa21bc7..b0c9d6facef9 100644 --- a/init/main.c +++ b/init/main.c @@ -647,9 +647,8 @@ asmlinkage __visible void __init start_kernel(void) security_init(); dbg_late_init(); vfs_caches_init(); + pagecache_init(); signals_init(); - /* rootfs populating might need page-writeback */ - page_writeback_init(); proc_root_init(); nsfs_init(); cpuset_init(); diff --git a/kernel/cpu.c b/kernel/cpu.c index 5339aca811d2..042fd7e8e030 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -183,23 +183,16 @@ EXPORT_SYMBOL_GPL(cpuhp_tasks_frozen); /* * The following two APIs (cpu_maps_update_begin/done) must be used when * attempting to serialize the updates to cpu_online_mask & cpu_present_mask. - * The APIs cpu_notifier_register_begin/done() must be used to protect CPU - * hotplug callback (un)registration performed using __register_cpu_notifier() - * or __unregister_cpu_notifier(). */ void cpu_maps_update_begin(void) { mutex_lock(&cpu_add_remove_lock); } -EXPORT_SYMBOL(cpu_notifier_register_begin); void cpu_maps_update_done(void) { mutex_unlock(&cpu_add_remove_lock); } -EXPORT_SYMBOL(cpu_notifier_register_done); - -static RAW_NOTIFIER_HEAD(cpu_chain); /* If set, cpu_up and cpu_down will return -EBUSY and do nothing. * Should always be manipulated under cpu_add_remove_lock @@ -349,66 +342,7 @@ void cpu_hotplug_enable(void) EXPORT_SYMBOL_GPL(cpu_hotplug_enable); #endif /* CONFIG_HOTPLUG_CPU */ -/* Need to know about CPUs going up/down? */ -int register_cpu_notifier(struct notifier_block *nb) -{ - int ret; - cpu_maps_update_begin(); - ret = raw_notifier_chain_register(&cpu_chain, nb); - cpu_maps_update_done(); - return ret; -} - -int __register_cpu_notifier(struct notifier_block *nb) -{ - return raw_notifier_chain_register(&cpu_chain, nb); -} - -static int __cpu_notify(unsigned long val, unsigned int cpu, int nr_to_call, - int *nr_calls) -{ - unsigned long mod = cpuhp_tasks_frozen ? CPU_TASKS_FROZEN : 0; - void *hcpu = (void *)(long)cpu; - - int ret; - - ret = __raw_notifier_call_chain(&cpu_chain, val | mod, hcpu, nr_to_call, - nr_calls); - - return notifier_to_errno(ret); -} - -static int cpu_notify(unsigned long val, unsigned int cpu) -{ - return __cpu_notify(val, cpu, -1, NULL); -} - -static void cpu_notify_nofail(unsigned long val, unsigned int cpu) -{ - BUG_ON(cpu_notify(val, cpu)); -} - /* Notifier wrappers for transitioning to state machine */ -static int notify_prepare(unsigned int cpu) -{ - int nr_calls = 0; - int ret; - - ret = __cpu_notify(CPU_UP_PREPARE, cpu, -1, &nr_calls); - if (ret) { - nr_calls--; - printk(KERN_WARNING "%s: attempt to bring up CPU %u failed\n", - __func__, cpu); - __cpu_notify(CPU_UP_CANCELED, cpu, nr_calls, NULL); - } - return ret; -} - -static int notify_online(unsigned int cpu) -{ - cpu_notify(CPU_ONLINE, cpu); - return 0; -} static int bringup_wait_for_ap(unsigned int cpu) { @@ -433,10 +367,8 @@ static int bringup_cpu(unsigned int cpu) /* Arch-specific enabling code. */ ret = __cpu_up(cpu, idle); irq_unlock_sparse(); - if (ret) { - cpu_notify(CPU_UP_CANCELED, cpu); + if (ret) return ret; - } ret = bringup_wait_for_ap(cpu); BUG_ON(!cpu_online(cpu)); return ret; @@ -565,11 +497,6 @@ static void cpuhp_thread_fun(unsigned int cpu) BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE); undo_cpu_down(cpu, st); - /* - * This is a momentary workaround to keep the notifier users - * happy. Will go away once we got rid of the notifiers. - */ - cpu_notify_nofail(CPU_DOWN_FAILED, cpu); st->rollback = false; } else { /* Cannot happen .... */ @@ -659,22 +586,6 @@ void __init cpuhp_threads_init(void) kthread_unpark(this_cpu_read(cpuhp_state.thread)); } -EXPORT_SYMBOL(register_cpu_notifier); -EXPORT_SYMBOL(__register_cpu_notifier); -void unregister_cpu_notifier(struct notifier_block *nb) -{ - cpu_maps_update_begin(); - raw_notifier_chain_unregister(&cpu_chain, nb); - cpu_maps_update_done(); -} -EXPORT_SYMBOL(unregister_cpu_notifier); - -void __unregister_cpu_notifier(struct notifier_block *nb) -{ - raw_notifier_chain_unregister(&cpu_chain, nb); -} -EXPORT_SYMBOL(__unregister_cpu_notifier); - #ifdef CONFIG_HOTPLUG_CPU /** * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU @@ -741,20 +652,6 @@ static inline void check_for_tasks(int dead_cpu) read_unlock(&tasklist_lock); } -static int notify_down_prepare(unsigned int cpu) -{ - int err, nr_calls = 0; - - err = __cpu_notify(CPU_DOWN_PREPARE, cpu, -1, &nr_calls); - if (err) { - nr_calls--; - __cpu_notify(CPU_DOWN_FAILED, cpu, nr_calls, NULL); - pr_warn("%s: attempt to take down CPU %u failed\n", - __func__, cpu); - } - return err; -} - /* Take this CPU down. */ static int take_cpu_down(void *_param) { @@ -833,13 +730,6 @@ static int takedown_cpu(unsigned int cpu) return 0; } -static int notify_dead(unsigned int cpu) -{ - cpu_notify_nofail(CPU_DEAD, cpu); - check_for_tasks(cpu); - return 0; -} - static void cpuhp_complete_idle_dead(void *arg) { struct cpuhp_cpu_state *st = arg; @@ -863,9 +753,7 @@ void cpuhp_report_idle_dead(void) } #else -#define notify_down_prepare NULL #define takedown_cpu NULL -#define notify_dead NULL #endif #ifdef CONFIG_HOTPLUG_CPU @@ -924,9 +812,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, hasdied = prev_state != st->state && st->state == CPUHP_OFFLINE; out: cpu_hotplug_done(); - /* This post dead nonsense must die */ - if (!ret && hasdied) - cpu_notify_nofail(CPU_POST_DEAD, cpu); return ret; } @@ -1292,17 +1177,6 @@ static struct cpuhp_step cpuhp_bp_states[] = { .teardown.single = rcutree_dead_cpu, }, /* - * Preparatory and dead notifiers. Will be replaced once the notifiers - * are converted to states. - */ - [CPUHP_NOTIFY_PREPARE] = { - .name = "notify:prepare", - .startup.single = notify_prepare, - .teardown.single = notify_dead, - .skip_onerr = true, - .cant_stop = true, - }, - /* * On the tear-down path, timers_dead_cpu() must be invoked * before blk_mq_queue_reinit_notify() from notify_dead(), * otherwise a RCU stall occurs. @@ -1391,17 +1265,6 @@ static struct cpuhp_step cpuhp_ap_states[] = { .startup.single = rcutree_online_cpu, .teardown.single = rcutree_offline_cpu, }, - - /* - * Online/down_prepare notifiers. Will be removed once the notifiers - * are converted to states. - */ - [CPUHP_AP_NOTIFY_ONLINE] = { - .name = "notify:online", - .startup.single = notify_online, - .teardown.single = notify_down_prepare, - .skip_onerr = true, - }, #endif /* * The dynamically registered state space is here @@ -1432,23 +1295,53 @@ static int cpuhp_cb_check(enum cpuhp_state state) return 0; } -static void cpuhp_store_callbacks(enum cpuhp_state state, - const char *name, - int (*startup)(unsigned int cpu), - int (*teardown)(unsigned int cpu), - bool multi_instance) +/* + * Returns a free for dynamic slot assignment of the Online state. The states + * are protected by the cpuhp_slot_states mutex and an empty slot is identified + * by having no name assigned. + */ +static int cpuhp_reserve_state(enum cpuhp_state state) +{ + enum cpuhp_state i; + + for (i = CPUHP_AP_ONLINE_DYN; i <= CPUHP_AP_ONLINE_DYN_END; i++) { + if (!cpuhp_ap_states[i].name) + return i; + } + WARN(1, "No more dynamic states available for CPU hotplug\n"); + return -ENOSPC; +} + +static int cpuhp_store_callbacks(enum cpuhp_state state, const char *name, + int (*startup)(unsigned int cpu), + int (*teardown)(unsigned int cpu), + bool multi_instance) { /* (Un)Install the callbacks for further cpu hotplug operations */ struct cpuhp_step *sp; + int ret = 0; mutex_lock(&cpuhp_state_mutex); + + if (state == CPUHP_AP_ONLINE_DYN) { + ret = cpuhp_reserve_state(state); + if (ret < 0) + goto out; + state = ret; + } sp = cpuhp_get_step(state); + if (name && sp->name) { + ret = -EBUSY; + goto out; + } sp->startup.single = startup; sp->teardown.single = teardown; sp->name = name; sp->multi_instance = multi_instance; INIT_HLIST_HEAD(&sp->list); +out: mutex_unlock(&cpuhp_state_mutex); + return ret; } static void *cpuhp_get_teardown_cb(enum cpuhp_state state) @@ -1509,29 +1402,6 @@ static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state, } } -/* - * Returns a free for dynamic slot assignment of the Online state. The states - * are protected by the cpuhp_slot_states mutex and an empty slot is identified - * by having no name assigned. - */ -static int cpuhp_reserve_state(enum cpuhp_state state) -{ - enum cpuhp_state i; - - mutex_lock(&cpuhp_state_mutex); - for (i = CPUHP_AP_ONLINE_DYN; i <= CPUHP_AP_ONLINE_DYN_END; i++) { - if (cpuhp_ap_states[i].name) - continue; - - cpuhp_ap_states[i].name = "Reserved"; - mutex_unlock(&cpuhp_state_mutex); - return i; - } - mutex_unlock(&cpuhp_state_mutex); - WARN(1, "No more dynamic states available for CPU hotplug\n"); - return -ENOSPC; -} - int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node, bool invoke) { @@ -1580,11 +1450,13 @@ EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance); /** * __cpuhp_setup_state - Setup the callbacks for an hotplug machine state - * @state: The state to setup - * @invoke: If true, the startup function is invoked for cpus where - * cpu state >= @state - * @startup: startup callback function - * @teardown: teardown callback function + * @state: The state to setup + * @invoke: If true, the startup function is invoked for cpus where + * cpu state >= @state + * @startup: startup callback function + * @teardown: teardown callback function + * @multi_instance: State is set up for multiple instances which get + * added afterwards. * * Returns: * On success: @@ -1599,25 +1471,16 @@ int __cpuhp_setup_state(enum cpuhp_state state, bool multi_instance) { int cpu, ret = 0; - int dyn_state = 0; if (cpuhp_cb_check(state) || !name) return -EINVAL; get_online_cpus(); - /* currently assignments for the ONLINE state are possible */ - if (state == CPUHP_AP_ONLINE_DYN) { - dyn_state = 1; - ret = cpuhp_reserve_state(state); - if (ret < 0) - goto out; - state = ret; - } + ret = cpuhp_store_callbacks(state, name, startup, teardown, + multi_instance); - cpuhp_store_callbacks(state, name, startup, teardown, multi_instance); - - if (!invoke || !startup) + if (ret || !invoke || !startup) goto out; /* @@ -1641,7 +1504,11 @@ int __cpuhp_setup_state(enum cpuhp_state state, } out: put_online_cpus(); - if (!ret && dyn_state) + /* + * If the requested state is CPUHP_AP_ONLINE_DYN, return the + * dynamically allocated state in case of success. + */ + if (!ret && state == CPUHP_AP_ONLINE_DYN) return state; return ret; } diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index cb66a4648840..b06848a104e6 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1538,30 +1538,6 @@ config NOTIFIER_ERROR_INJECTION Say N if unsure. -config CPU_NOTIFIER_ERROR_INJECT - tristate "CPU notifier error injection module" - depends on HOTPLUG_CPU && NOTIFIER_ERROR_INJECTION - help - This option provides a kernel module that can be used to test - the error handling of the cpu notifiers by injecting artificial - errors to CPU notifier chain callbacks. It is controlled through - debugfs interface under /sys/kernel/debug/notifier-error-inject/cpu - - If the notifier call chain should be failed with some events - notified, write the error code to "actions/<notifier event>/error". - - Example: Inject CPU offline error (-1 == -EPERM) - - # cd /sys/kernel/debug/notifier-error-inject/cpu - # echo -1 > actions/CPU_DOWN_PREPARE/error - # echo 0 > /sys/devices/system/cpu/cpu1/online - bash: echo: write error: Operation not permitted - - To compile this code as a module, choose M here: the module will - be called cpu-notifier-error-inject. - - If unsure, say N. - config PM_NOTIFIER_ERROR_INJECT tristate "PM notifier error injection module" depends on PM && NOTIFIER_ERROR_INJECTION diff --git a/lib/Makefile b/lib/Makefile index 50144a3aeebd..bc4073a8cd08 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -128,7 +128,6 @@ obj-$(CONFIG_SWIOTLB) += swiotlb.o obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o iommu-common.o obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o -obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o obj-$(CONFIG_PM_NOTIFIER_ERROR_INJECT) += pm-notifier-error-inject.o obj-$(CONFIG_NETDEV_NOTIFIER_ERROR_INJECT) += netdev-notifier-error-inject.o obj-$(CONFIG_MEMORY_NOTIFIER_ERROR_INJECT) += memory-notifier-error-inject.o diff --git a/lib/cpu-notifier-error-inject.c b/lib/cpu-notifier-error-inject.c deleted file mode 100644 index 0e2c9a1e958a..000000000000 --- a/lib/cpu-notifier-error-inject.c +++ /dev/null @@ -1,84 +0,0 @@ -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/cpu.h> - -#include "notifier-error-inject.h" - -static int priority; -module_param(priority, int, 0); -MODULE_PARM_DESC(priority, "specify cpu notifier priority"); - -#define UP_PREPARE 0 -#define UP_PREPARE_FROZEN 0 -#define DOWN_PREPARE 0 -#define DOWN_PREPARE_FROZEN 0 - -static struct notifier_err_inject cpu_notifier_err_inject = { - .actions = { - { NOTIFIER_ERR_INJECT_ACTION(UP_PREPARE) }, - { NOTIFIER_ERR_INJECT_ACTION(UP_PREPARE_FROZEN) }, - { NOTIFIER_ERR_INJECT_ACTION(DOWN_PREPARE) }, - { NOTIFIER_ERR_INJECT_ACTION(DOWN_PREPARE_FROZEN) }, - {} - } -}; - -static int notf_err_handle(struct notifier_err_inject_action *action) -{ - int ret; - - ret = action->error; - if (ret) - pr_info("Injecting error (%d) to %s\n", ret, action->name); - return ret; -} - -static int notf_err_inj_up_prepare(unsigned int cpu) -{ - if (!cpuhp_tasks_frozen) - return notf_err_handle(&cpu_notifier_err_inject.actions[0]); - else - return notf_err_handle(&cpu_notifier_err_inject.actions[1]); -} - -static int notf_err_inj_dead(unsigned int cpu) -{ - if (!cpuhp_tasks_frozen) - return notf_err_handle(&cpu_notifier_err_inject.actions[2]); - else - return notf_err_handle(&cpu_notifier_err_inject.actions[3]); -} - -static struct dentry *dir; - -static int err_inject_init(void) -{ - int err; - - dir = notifier_err_inject_init("cpu", notifier_err_inject_dir, - &cpu_notifier_err_inject, priority); - if (IS_ERR(dir)) - return PTR_ERR(dir); - - err = cpuhp_setup_state_nocalls(CPUHP_NOTF_ERR_INJ_PREPARE, - "cpu-err-notif:prepare", - notf_err_inj_up_prepare, - notf_err_inj_dead); - if (err) - debugfs_remove_recursive(dir); - - return err; -} - -static void err_inject_exit(void) -{ - cpuhp_remove_state_nocalls(CPUHP_NOTF_ERR_INJ_PREPARE); - debugfs_remove_recursive(dir); -} - -module_init(err_inject_init); -module_exit(err_inject_exit); - -MODULE_DESCRIPTION("CPU notifier error injection module"); -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Akinobu Mita <akinobu.mita@gmail.com>"); diff --git a/mm/filemap.c b/mm/filemap.c index 32be3c8f3a11..82f26cde830c 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -739,45 +739,159 @@ EXPORT_SYMBOL(__page_cache_alloc); * at a cost of "thundering herd" phenomena during rare hash * collisions. */ -wait_queue_head_t *page_waitqueue(struct page *page) +#define PAGE_WAIT_TABLE_BITS 8 +#define PAGE_WAIT_TABLE_SIZE (1 << PAGE_WAIT_TABLE_BITS) +static wait_queue_head_t page_wait_table[PAGE_WAIT_TABLE_SIZE] __cacheline_aligned; + +static wait_queue_head_t *page_waitqueue(struct page *page) { - return bit_waitqueue(page, 0); + return &page_wait_table[hash_ptr(page, PAGE_WAIT_TABLE_BITS)]; } -EXPORT_SYMBOL(page_waitqueue); -void wait_on_page_bit(struct page *page, int bit_nr) +void __init pagecache_init(void) { - DEFINE_WAIT_BIT(wait, &page->flags, bit_nr); + int i; - if (test_bit(bit_nr, &page->flags)) - __wait_on_bit(page_waitqueue(page), &wait, bit_wait_io, - TASK_UNINTERRUPTIBLE); + for (i = 0; i < PAGE_WAIT_TABLE_SIZE; i++) + init_waitqueue_head(&page_wait_table[i]); + + page_writeback_init(); } -EXPORT_SYMBOL(wait_on_page_bit); -int wait_on_page_bit_killable(struct page *page, int bit_nr) +struct wait_page_key { + struct page *page; + int bit_nr; + int page_match; +}; + +struct wait_page_queue { + struct page *page; + int bit_nr; + wait_queue_t wait; +}; + +static int wake_page_function(wait_queue_t *wait, unsigned mode, int sync, void *arg) { - DEFINE_WAIT_BIT(wait, &page->flags, bit_nr); + struct wait_page_key *key = arg; + struct wait_page_queue *wait_page + = container_of(wait, struct wait_page_queue, wait); + + if (wait_page->page != key->page) + return 0; + key->page_match = 1; - if (!test_bit(bit_nr, &page->flags)) + if (wait_page->bit_nr != key->bit_nr) + return 0; + if (test_bit(key->bit_nr, &key->page->flags)) return 0; - return __wait_on_bit(page_waitqueue(page), &wait, - bit_wait_io, TASK_KILLABLE); + return autoremove_wake_function(wait, mode, sync, key); } -int wait_on_page_bit_killable_timeout(struct page *page, - int bit_nr, unsigned long timeout) +void wake_up_page_bit(struct page *page, int bit_nr) { - DEFINE_WAIT_BIT(wait, &page->flags, bit_nr); + wait_queue_head_t *q = page_waitqueue(page); + struct wait_page_key key; + unsigned long flags; - wait.key.timeout = jiffies + timeout; - if (!test_bit(bit_nr, &page->flags)) - return 0; - return __wait_on_bit(page_waitqueue(page), &wait, - bit_wait_io_timeout, TASK_KILLABLE); + key.page = page; + key.bit_nr = bit_nr; + key.page_match = 0; + + spin_lock_irqsave(&q->lock, flags); + __wake_up_locked_key(q, TASK_NORMAL, &key); + /* + * It is possible for other pages to have collided on the waitqueue + * hash, so in that case check for a page match. That prevents a long- + * term waiter + * + * It is still possible to miss a case here, when we woke page waiters + * and removed them from the waitqueue, but there are still other + * page waiters. + */ + if (!waitqueue_active(q) || !key.page_match) { + ClearPageWaiters(page); + /* + * It's possible to miss clearing Waiters here, when we woke + * our page waiters, but the hashed waitqueue has waiters for + * other pages on it. + * + * That's okay, it's a rare case. The next waker will clear it. + */ + } + spin_unlock_irqrestore(&q->lock, flags); +} +EXPORT_SYMBOL(wake_up_page_bit); + +static inline int wait_on_page_bit_common(wait_queue_head_t *q, + struct page *page, int bit_nr, int state, bool lock) +{ + struct wait_page_queue wait_page; + wait_queue_t *wait = &wait_page.wait; + int ret = 0; + + init_wait(wait); + wait->func = wake_page_function; + wait_page.page = page; + wait_page.bit_nr = bit_nr; + + for (;;) { + spin_lock_irq(&q->lock); + + if (likely(list_empty(&wait->task_list))) { + if (lock) + __add_wait_queue_tail_exclusive(q, wait); + else + __add_wait_queue(q, wait); + SetPageWaiters(page); + } + + set_current_state(state); + + spin_unlock_irq(&q->lock); + + if (likely(test_bit(bit_nr, &page->flags))) { + io_schedule(); + if (unlikely(signal_pending_state(state, current))) { + ret = -EINTR; + break; + } + } + + if (lock) { + if (!test_and_set_bit_lock(bit_nr, &page->flags)) + break; + } else { + if (!test_bit(bit_nr, &page->flags)) + break; + } + } + + finish_wait(q, wait); + + /* + * A signal could leave PageWaiters set. Clearing it here if + * !waitqueue_active would be possible (by open-coding finish_wait), + * but still fail to catch it in the case of wait hash collision. We + * already can fail to clear wait hash collision cases, so don't + * bother with signals either. + */ + + return ret; +} + +void wait_on_page_bit(struct page *page, int bit_nr) +{ + wait_queue_head_t *q = page_waitqueue(page); + wait_on_page_bit_common(q, page, bit_nr, TASK_UNINTERRUPTIBLE, false); +} +EXPORT_SYMBOL(wait_on_page_bit); + +int wait_on_page_bit_killable(struct page *page, int bit_nr) +{ + wait_queue_head_t *q = page_waitqueue(page); + return wait_on_page_bit_common(q, page, bit_nr, TASK_KILLABLE, false); } -EXPORT_SYMBOL_GPL(wait_on_page_bit_killable_timeout); /** * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue @@ -793,6 +907,7 @@ void add_page_wait_queue(struct page *page, wait_queue_t *waiter) spin_lock_irqsave(&q->lock, flags); __add_wait_queue(q, waiter); + SetPageWaiters(page); spin_unlock_irqrestore(&q->lock, flags); } EXPORT_SYMBOL_GPL(add_page_wait_queue); @@ -874,23 +989,19 @@ EXPORT_SYMBOL_GPL(page_endio); * __lock_page - get a lock on the page, assuming we need to sleep to get it * @page: the page to lock */ -void __lock_page(struct page *page) +void __lock_page(struct page *__page) { - struct page *page_head = compound_head(page); - DEFINE_WAIT_BIT(wait, &page_head->flags, PG_locked); - - __wait_on_bit_lock(page_waitqueue(page_head), &wait, bit_wait_io, - TASK_UNINTERRUPTIBLE); + struct page *page = compound_head(__page); + wait_queue_head_t *q = page_waitqueue(page); + wait_on_page_bit_common(q, page, PG_locked, TASK_UNINTERRUPTIBLE, true); } EXPORT_SYMBOL(__lock_page); -int __lock_page_killable(struct page *page) +int __lock_page_killable(struct page *__page) { - struct page *page_head = compound_head(page); - DEFINE_WAIT_BIT(wait, &page_head->flags, PG_locked); - - return __wait_on_bit_lock(page_waitqueue(page_head), &wait, - bit_wait_io, TASK_KILLABLE); + struct page *page = compound_head(__page); + wait_queue_head_t *q = page_waitqueue(page); + return wait_on_page_bit_common(q, page, PG_locked, TASK_KILLABLE, true); } EXPORT_SYMBOL_GPL(__lock_page_killable); diff --git a/mm/internal.h b/mm/internal.h index 44d68895a9b9..7aa2ea0a8623 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -36,6 +36,8 @@ /* Do not use these with a slab allocator */ #define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK) +void page_writeback_init(void); + int do_swap_page(struct vm_fault *vmf); void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma, diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 19e796d36a62..f283c7e0a2a3 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -764,12 +764,11 @@ static int me_huge_page(struct page *p, unsigned long pfn) */ #define dirty (1UL << PG_dirty) -#define sc (1UL << PG_swapcache) +#define sc ((1UL << PG_swapcache) | (1UL << PG_swapbacked)) #define unevict (1UL << PG_unevictable) #define mlock (1UL << PG_mlocked) #define writeback (1UL << PG_writeback) #define lru (1UL << PG_lru) -#define swapbacked (1UL << PG_swapbacked) #define head (1UL << PG_head) #define slab (1UL << PG_slab) #define reserved (1UL << PG_reserved) @@ -819,7 +818,6 @@ static struct page_state { #undef mlock #undef writeback #undef lru -#undef swapbacked #undef head #undef slab #undef reserved diff --git a/mm/migrate.c b/mm/migrate.c index 0ed24b1fa77b..87f4d0f81819 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -466,13 +466,15 @@ int migrate_page_move_mapping(struct address_space *mapping, */ newpage->index = page->index; newpage->mapping = page->mapping; - if (PageSwapBacked(page)) - __SetPageSwapBacked(newpage); - get_page(newpage); /* add cache reference */ - if (PageSwapCache(page)) { - SetPageSwapCache(newpage); - set_page_private(newpage, page_private(page)); + if (PageSwapBacked(page)) { + __SetPageSwapBacked(newpage); + if (PageSwapCache(page)) { + SetPageSwapCache(newpage); + set_page_private(newpage, page_private(page)); + } + } else { + VM_BUG_ON_PAGE(PageSwapCache(page), page); } /* Move dirty while page refs frozen and newpage not yet exposed */ diff --git a/mm/swap.c b/mm/swap.c index 4dcf852e1e6d..844baedd2429 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -69,6 +69,7 @@ static void __page_cache_release(struct page *page) del_page_from_lru_list(page, lruvec, page_off_lru(page)); spin_unlock_irqrestore(zone_lru_lock(zone), flags); } + __ClearPageWaiters(page); mem_cgroup_uncharge(page); } @@ -784,6 +785,7 @@ void release_pages(struct page **pages, int nr, bool cold) /* Clear Active bit in case of parallel mark_page_accessed */ __ClearPageActive(page); + __ClearPageWaiters(page); list_add(&page->lru, &pages_to_free); } diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index 8561e7ddca59..8792ad8dbf83 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile @@ -10,6 +10,7 @@ endif turbostat : turbostat.c CFLAGS += -Wall CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"' +CFLAGS += -DINTEL_FAMILY_HEADER='"../../../../arch/x86/include/asm/intel-family.h"' %: %.c @mkdir -p $(BUILD_OUTPUT) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 492e84fbebfa..03cb639b292e 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -25,9 +25,27 @@ Some information is not available on older processors. .SS Options Options can be specified with a single or double '-', and only as much of the option name as necessary to disambiguate it from others is necessary. Note that options are case-sensitive. -\fB--Counter MSR#\fP shows the delta of the specified 64-bit MSR counter. .PP -\fB--counter MSR#\fP shows the delta of the specified 32-bit MSR counter. +\fB--add attributes\fP add column with counter having specified 'attributes'. The 'location' attribute is required, all others are optional. +.nf + location: {\fBmsrDDD\fP | \fBmsr0xXXX\fP} + msrDDD is a decimal offset, eg. msr16 + msr0xXXX is a hex offset, eg. msr0x10 + + scope: {\fBcpu\fP | \fBcore\fP | \fBpackage\fP} + sample and print the counter for every cpu, core, or package. + default: cpu + + size: {\fBu32\fP | \fBu64\fP } + MSRs are read as 64-bits, u32 truncates the displayed value to 32-bits. + default: u64 + + format: {\fBraw\fP | \fBdelta\fP | \fBpercent\fP} + 'raw' shows the MSR contents in hex. + 'delta' shows the difference in values during the measurement interval. + 'percent' shows the delta as a percentage of the cycles elapsed. + default: delta +.fi .PP \fB--Dump\fP displays the raw counter values. .PP @@ -43,10 +61,6 @@ The file is truncated if it already exists, and it is created if it does not exi .PP \fB--Joules\fP displays energy in Joules, rather than dividing Joules by time to print power in Watts. .PP -\fB--MSR MSR#\fP shows the specified 64-bit MSR value. -.PP -\fB--msr MSR#\fP shows the specified 32-bit MSR value. -.PP \fB--Package\fP limits output to the system summary plus the 1st thread in each Package. .PP \fB--processor\fP limits output to the system summary plus the 1st thread in each processor of each package. Ie. it skips hyper-threaded siblings. diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 3e199b508a96..f13f61b065c6 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -21,6 +21,7 @@ #define _GNU_SOURCE #include MSRHEADER +#include INTEL_FAMILY_HEADER #include <stdarg.h> #include <stdio.h> #include <err.h> @@ -51,8 +52,6 @@ unsigned int debug; unsigned int rapl_joules; unsigned int summary_only; unsigned int dump_only; -unsigned int skip_c0; -unsigned int skip_c1; unsigned int do_nhm_cstates; unsigned int do_snb_cstates; unsigned int do_knl_cstates; @@ -72,10 +71,6 @@ unsigned int units = 1000000; /* MHz etc */ unsigned int genuine_intel; unsigned int has_invariant_tsc; unsigned int do_nhm_platform_info; -unsigned int extra_msr_offset32; -unsigned int extra_msr_offset64; -unsigned int extra_delta_offset32; -unsigned int extra_delta_offset64; unsigned int aperf_mperf_multiplier = 1; int do_irq = 1; int do_smi; @@ -131,9 +126,8 @@ unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */ #define RAPL_DRAM_POWER_INFO (1 << 5) /* 0x61c MSR_DRAM_POWER_INFO */ -#define RAPL_CORES (1 << 6) +#define RAPL_CORES_POWER_LIMIT (1 << 6) /* 0x638 MSR_PP0_POWER_LIMIT */ - /* 0x639 MSR_PP0_ENERGY_STATUS */ #define RAPL_CORE_POLICY (1 << 7) /* 0x63a MSR_PP0_POLICY */ @@ -141,11 +135,20 @@ unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */ /* 0x640 MSR_PP1_POWER_LIMIT */ /* 0x641 MSR_PP1_ENERGY_STATUS */ /* 0x642 MSR_PP1_POLICY */ + +#define RAPL_CORES_ENERGY_STATUS (1 << 9) + /* 0x639 MSR_PP0_ENERGY_STATUS */ +#define RAPL_CORES (RAPL_CORES_ENERGY_STATUS | RAPL_CORES_POWER_LIMIT) #define TJMAX_DEFAULT 100 #define MAX(a, b) ((a) > (b) ? (a) : (b)) -int aperf_mperf_unstable; +/* + * buffer size used by sscanf() for added column names + * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters + */ +#define NAME_BYTES 20 + int backwards_count; char *progname; @@ -157,16 +160,13 @@ struct thread_data { unsigned long long aperf; unsigned long long mperf; unsigned long long c1; - unsigned long long extra_msr64; - unsigned long long extra_delta64; - unsigned long long extra_msr32; - unsigned long long extra_delta32; unsigned int irq_count; unsigned int smi_count; unsigned int cpu_id; unsigned int flags; #define CPU_IS_FIRST_THREAD_IN_CORE 0x2 #define CPU_IS_FIRST_CORE_IN_PACKAGE 0x4 + unsigned long long counter[1]; } *thread_even, *thread_odd; struct core_data { @@ -175,6 +175,7 @@ struct core_data { unsigned long long c7; unsigned int core_temp_c; unsigned int core_id; + unsigned long long counter[1]; } *core_even, *core_odd; struct pkg_data { @@ -199,7 +200,7 @@ struct pkg_data { unsigned int rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */ unsigned int rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */ unsigned int pkg_temp_c; - + unsigned long long counter[1]; } *package_even, *package_odd; #define ODD_COUNTERS thread_odd, core_odd, package_odd @@ -213,11 +214,33 @@ struct pkg_data { (core_base + (pkg_no) * topo.num_cores_per_pkg + (core_no)) #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no) +enum counter_scope {SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE}; +enum counter_type {COUNTER_CYCLES, COUNTER_SECONDS}; +enum counter_format {FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT}; + +struct msr_counter { + unsigned int msr_num; + char name[NAME_BYTES]; + unsigned int width; + enum counter_type type; + enum counter_format format; + struct msr_counter *next; +}; + +struct sys_counters { + unsigned int thread_counter_bytes; + unsigned int core_counter_bytes; + unsigned int package_counter_bytes; + struct msr_counter *tp; + struct msr_counter *cp; + struct msr_counter *pp; +} sys; + struct system_summary { struct thread_data threads; struct core_data cores; struct pkg_data packages; -} sum, average; +} average; struct topo_params { @@ -319,120 +342,148 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr) /* * Example Format w/ field column widths: * - * Package Core CPU Avg_MHz Bzy_MHz TSC_MHz IRQ SMI Busy% CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 CoreTmp PkgTmp GFXMHz Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt + * Package Core CPU Avg_MHz Bzy_MHz TSC_MHz IRQ SMI Busy% CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 ThreadC CoreTmp CoreCnt PkgTmp GFXMHz Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt PkgCnt * 12345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678 */ void print_header(void) { + struct msr_counter *mp; + if (show_pkg) - outp += sprintf(outp, " Package"); + outp += sprintf(outp, "\tPackage"); if (show_core) - outp += sprintf(outp, " Core"); + outp += sprintf(outp, "\tCore"); if (show_cpu) - outp += sprintf(outp, " CPU"); + outp += sprintf(outp, "\tCPU"); if (has_aperf) - outp += sprintf(outp, " Avg_MHz"); + outp += sprintf(outp, "\tAvg_MHz"); if (has_aperf) - outp += sprintf(outp, " Busy%%"); + outp += sprintf(outp, "\tBusy%%"); if (has_aperf) - outp += sprintf(outp, " Bzy_MHz"); - outp += sprintf(outp, " TSC_MHz"); - - if (extra_delta_offset32) - outp += sprintf(outp, " count 0x%03X", extra_delta_offset32); - if (extra_delta_offset64) - outp += sprintf(outp, " COUNT 0x%03X", extra_delta_offset64); - if (extra_msr_offset32) - outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset32); - if (extra_msr_offset64) - outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset64); + outp += sprintf(outp, "\tBzy_MHz"); + outp += sprintf(outp, "\tTSC_MHz"); if (!debug) goto done; if (do_irq) - outp += sprintf(outp, " IRQ"); + outp += sprintf(outp, "\tIRQ"); if (do_smi) - outp += sprintf(outp, " SMI"); + outp += sprintf(outp, "\tSMI"); if (do_nhm_cstates) - outp += sprintf(outp, " CPU%%c1"); + outp += sprintf(outp, "\tCPU%%c1"); if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates) - outp += sprintf(outp, " CPU%%c3"); + outp += sprintf(outp, "\tCPU%%c3"); if (do_nhm_cstates) - outp += sprintf(outp, " CPU%%c6"); + outp += sprintf(outp, "\tCPU%%c6"); if (do_snb_cstates) - outp += sprintf(outp, " CPU%%c7"); + outp += sprintf(outp, "\tCPU%%c7"); + + for (mp = sys.tp; mp; mp = mp->next) { + if (mp->format == FORMAT_RAW) { + if (mp->width == 64) + outp += sprintf(outp, "\t%18.18s", mp->name); + else + outp += sprintf(outp, "\t%10.10s", mp->name); + } else { + outp += sprintf(outp, "\t%-7.7s", mp->name); + } + } if (do_dts) - outp += sprintf(outp, " CoreTmp"); + outp += sprintf(outp, "\tCoreTmp"); + + for (mp = sys.cp; mp; mp = mp->next) { + if (mp->format == FORMAT_RAW) { + if (mp->width == 64) + outp += sprintf(outp, "\t%18.18s", mp->name); + else + outp += sprintf(outp, "\t%10.10s", mp->name); + } else { + outp += sprintf(outp, "\t%-7.7s", mp->name); + } + } + if (do_ptm) - outp += sprintf(outp, " PkgTmp"); + outp += sprintf(outp, "\tPkgTmp"); if (do_gfx_rc6_ms) - outp += sprintf(outp, " GFX%%rc6"); + outp += sprintf(outp, "\tGFX%%rc6"); if (do_gfx_mhz) - outp += sprintf(outp, " GFXMHz"); + outp += sprintf(outp, "\tGFXMHz"); if (do_skl_residency) { - outp += sprintf(outp, " Totl%%C0"); - outp += sprintf(outp, " Any%%C0"); - outp += sprintf(outp, " GFX%%C0"); - outp += sprintf(outp, " CPUGFX%%"); + outp += sprintf(outp, "\tTotl%%C0"); + outp += sprintf(outp, "\tAny%%C0"); + outp += sprintf(outp, "\tGFX%%C0"); + outp += sprintf(outp, "\tCPUGFX%%"); } if (do_pc2) - outp += sprintf(outp, " Pkg%%pc2"); + outp += sprintf(outp, "\tPkg%%pc2"); if (do_pc3) - outp += sprintf(outp, " Pkg%%pc3"); + outp += sprintf(outp, "\tPkg%%pc3"); if (do_pc6) - outp += sprintf(outp, " Pkg%%pc6"); + outp += sprintf(outp, "\tPkg%%pc6"); if (do_pc7) - outp += sprintf(outp, " Pkg%%pc7"); + outp += sprintf(outp, "\tPkg%%pc7"); if (do_c8_c9_c10) { - outp += sprintf(outp, " Pkg%%pc8"); - outp += sprintf(outp, " Pkg%%pc9"); - outp += sprintf(outp, " Pk%%pc10"); + outp += sprintf(outp, "\tPkg%%pc8"); + outp += sprintf(outp, "\tPkg%%pc9"); + outp += sprintf(outp, "\tPk%%pc10"); } if (do_rapl && !rapl_joules) { if (do_rapl & RAPL_PKG) - outp += sprintf(outp, " PkgWatt"); - if (do_rapl & RAPL_CORES) - outp += sprintf(outp, " CorWatt"); + outp += sprintf(outp, "\tPkgWatt"); + if (do_rapl & RAPL_CORES_ENERGY_STATUS) + outp += sprintf(outp, "\tCorWatt"); if (do_rapl & RAPL_GFX) - outp += sprintf(outp, " GFXWatt"); + outp += sprintf(outp, "\tGFXWatt"); if (do_rapl & RAPL_DRAM) - outp += sprintf(outp, " RAMWatt"); + outp += sprintf(outp, "\tRAMWatt"); if (do_rapl & RAPL_PKG_PERF_STATUS) - outp += sprintf(outp, " PKG_%%"); + outp += sprintf(outp, "\tPKG_%%"); if (do_rapl & RAPL_DRAM_PERF_STATUS) - outp += sprintf(outp, " RAM_%%"); + outp += sprintf(outp, "\tRAM_%%"); } else if (do_rapl && rapl_joules) { if (do_rapl & RAPL_PKG) - outp += sprintf(outp, " Pkg_J"); - if (do_rapl & RAPL_CORES) - outp += sprintf(outp, " Cor_J"); + outp += sprintf(outp, "\tPkg_J"); + if (do_rapl & RAPL_CORES_ENERGY_STATUS) + outp += sprintf(outp, "\tCor_J"); if (do_rapl & RAPL_GFX) - outp += sprintf(outp, " GFX_J"); + outp += sprintf(outp, "\tGFX_J"); if (do_rapl & RAPL_DRAM) - outp += sprintf(outp, " RAM_J"); + outp += sprintf(outp, "\tRAM_J"); if (do_rapl & RAPL_PKG_PERF_STATUS) - outp += sprintf(outp, " PKG_%%"); + outp += sprintf(outp, "\tPKG_%%"); if (do_rapl & RAPL_DRAM_PERF_STATUS) - outp += sprintf(outp, " RAM_%%"); - outp += sprintf(outp, " time"); - + outp += sprintf(outp, "\tRAM_%%"); } - done: + for (mp = sys.pp; mp; mp = mp->next) { + if (mp->format == FORMAT_RAW) { + if (mp->width == 64) + outp += sprintf(outp, "\t%18.18s", mp->name); + else + outp += sprintf(outp, "\t%10.10s", mp->name); + } else { + outp += sprintf(outp, "\t%-7.7s", mp->name); + } + } + +done: outp += sprintf(outp, "\n"); } int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) { + int i; + struct msr_counter *mp; + outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p); if (t) { @@ -442,18 +493,16 @@ int dump_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, "aperf: %016llX\n", t->aperf); outp += sprintf(outp, "mperf: %016llX\n", t->mperf); outp += sprintf(outp, "c1: %016llX\n", t->c1); - outp += sprintf(outp, "msr0x%x: %08llX\n", - extra_delta_offset32, t->extra_delta32); - outp += sprintf(outp, "msr0x%x: %016llX\n", - extra_delta_offset64, t->extra_delta64); - outp += sprintf(outp, "msr0x%x: %08llX\n", - extra_msr_offset32, t->extra_msr32); - outp += sprintf(outp, "msr0x%x: %016llX\n", - extra_msr_offset64, t->extra_msr64); + if (do_irq) outp += sprintf(outp, "IRQ: %08X\n", t->irq_count); if (do_smi) outp += sprintf(outp, "SMI: %08X\n", t->smi_count); + + for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { + outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n", + i, mp->msr_num, t->counter[i]); + } } if (c) { @@ -462,6 +511,11 @@ int dump_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, "c6: %016llX\n", c->c6); outp += sprintf(outp, "c7: %016llX\n", c->c7); outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c); + + for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { + outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n", + i, mp->msr_num, c->counter[i]); + } } if (p) { @@ -491,6 +545,11 @@ int dump_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, "Throttle RAM: %0X\n", p->rapl_dram_perf_status); outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c); + + for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { + outp += sprintf(outp, "pADDED [%d] msr0x%x: %08llX\n", + i, mp->msr_num, p->counter[i]); + } } outp += sprintf(outp, "\n"); @@ -506,6 +565,8 @@ int format_counters(struct thread_data *t, struct core_data *c, { double interval_float; char *fmt8; + int i; + struct msr_counter *mp; /* if showing only 1st thread in core and this isn't one, bail out */ if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) @@ -520,99 +581,103 @@ int format_counters(struct thread_data *t, struct core_data *c, /* topo columns, print blanks on 1st (average) line */ if (t == &average.threads) { if (show_pkg) - outp += sprintf(outp, " -"); + outp += sprintf(outp, "\t-"); if (show_core) - outp += sprintf(outp, " -"); + outp += sprintf(outp, "\t-"); if (show_cpu) - outp += sprintf(outp, " -"); + outp += sprintf(outp, "\t-"); } else { if (show_pkg) { if (p) - outp += sprintf(outp, "%8d", p->package_id); + outp += sprintf(outp, "\t%d", p->package_id); else - outp += sprintf(outp, " -"); + outp += sprintf(outp, "\t-"); } if (show_core) { if (c) - outp += sprintf(outp, "%8d", c->core_id); + outp += sprintf(outp, "\t%d", c->core_id); else - outp += sprintf(outp, " -"); + outp += sprintf(outp, "\t-"); } if (show_cpu) - outp += sprintf(outp, "%8d", t->cpu_id); + outp += sprintf(outp, "\t%d", t->cpu_id); } /* Avg_MHz */ if (has_aperf) - outp += sprintf(outp, "%8.0f", + outp += sprintf(outp, "\t%.0f", 1.0 / units * t->aperf / interval_float); /* Busy% */ - if (has_aperf) { - if (!skip_c0) - outp += sprintf(outp, "%8.2f", 100.0 * t->mperf/t->tsc/tsc_tweak); - else - outp += sprintf(outp, "********"); - } + if (has_aperf) + outp += sprintf(outp, "\t%.2f", 100.0 * t->mperf/t->tsc/tsc_tweak); /* Bzy_MHz */ if (has_aperf) { if (has_base_hz) - outp += sprintf(outp, "%8.0f", base_hz / units * t->aperf / t->mperf); + outp += sprintf(outp, "\t%.0f", base_hz / units * t->aperf / t->mperf); else - outp += sprintf(outp, "%8.0f", + outp += sprintf(outp, "\t%.0f", 1.0 * t->tsc / units * t->aperf / t->mperf / interval_float); } /* TSC_MHz */ - outp += sprintf(outp, "%8.0f", 1.0 * t->tsc/units/interval_float); - - /* delta */ - if (extra_delta_offset32) - outp += sprintf(outp, " %11llu", t->extra_delta32); - - /* DELTA */ - if (extra_delta_offset64) - outp += sprintf(outp, " %11llu", t->extra_delta64); - /* msr */ - if (extra_msr_offset32) - outp += sprintf(outp, " 0x%08llx", t->extra_msr32); - - /* MSR */ - if (extra_msr_offset64) - outp += sprintf(outp, " 0x%016llx", t->extra_msr64); + outp += sprintf(outp, "\t%.0f", 1.0 * t->tsc/units/interval_float); if (!debug) goto done; /* IRQ */ if (do_irq) - outp += sprintf(outp, "%8d", t->irq_count); + outp += sprintf(outp, "\t%d", t->irq_count); /* SMI */ if (do_smi) - outp += sprintf(outp, "%8d", t->smi_count); + outp += sprintf(outp, "\t%d", t->smi_count); - if (do_nhm_cstates) { - if (!skip_c1) - outp += sprintf(outp, "%8.2f", 100.0 * t->c1/t->tsc); - else - outp += sprintf(outp, "********"); - } + if (do_nhm_cstates) + outp += sprintf(outp, "\t%.2f", 100.0 * t->c1/t->tsc); /* print per-core data only for 1st thread in core */ if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) goto done; if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates) - outp += sprintf(outp, "%8.2f", 100.0 * c->c3/t->tsc); + outp += sprintf(outp, "\t%.2f", 100.0 * c->c3/t->tsc); if (do_nhm_cstates) - outp += sprintf(outp, "%8.2f", 100.0 * c->c6/t->tsc); + outp += sprintf(outp, "\t%.2f", 100.0 * c->c6/t->tsc); if (do_snb_cstates) - outp += sprintf(outp, "%8.2f", 100.0 * c->c7/t->tsc); + outp += sprintf(outp, "\t%.2f", 100.0 * c->c7/t->tsc); + + for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { + if (mp->format == FORMAT_RAW) { + if (mp->width == 32) + outp += sprintf(outp, "\t0x%08lx", (unsigned long) t->counter[i]); + else + outp += sprintf(outp, "\t0x%016llx", t->counter[i]); + } else if (mp->format == FORMAT_DELTA) { + outp += sprintf(outp, "\t%8lld", t->counter[i]); + } else if (mp->format == FORMAT_PERCENT) { + outp += sprintf(outp, "\t%.2f", 100.0 * t->counter[i]/t->tsc); + } + } + if (do_dts) - outp += sprintf(outp, "%8d", c->core_temp_c); + outp += sprintf(outp, "\t%d", c->core_temp_c); + + for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { + if (mp->format == FORMAT_RAW) { + if (mp->width == 32) + outp += sprintf(outp, "\t0x%08lx", (unsigned long) c->counter[i]); + else + outp += sprintf(outp, "\t0x%016llx", c->counter[i]); + } else if (mp->format == FORMAT_DELTA) { + outp += sprintf(outp, "\t%8lld", c->counter[i]); + } else if (mp->format == FORMAT_PERCENT) { + outp += sprintf(outp, "\t%.2f", 100.0 * c->counter[i]/t->tsc); + } + } /* print per-package data only for 1st core in package */ if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) @@ -620,42 +685,42 @@ int format_counters(struct thread_data *t, struct core_data *c, /* PkgTmp */ if (do_ptm) - outp += sprintf(outp, "%8d", p->pkg_temp_c); + outp += sprintf(outp, "\t%d", p->pkg_temp_c); /* GFXrc6 */ if (do_gfx_rc6_ms) { - if (p->gfx_rc6_ms == -1) { /* detect counter reset */ - outp += sprintf(outp, " ***.**"); + if (p->gfx_rc6_ms == -1) { /* detect GFX counter reset */ + outp += sprintf(outp, "\t**.**"); } else { - outp += sprintf(outp, "%8.2f", + outp += sprintf(outp, "\t%.2f", p->gfx_rc6_ms / 10.0 / interval_float); } } /* GFXMHz */ if (do_gfx_mhz) - outp += sprintf(outp, "%8d", p->gfx_mhz); + outp += sprintf(outp, "\t%d", p->gfx_mhz); /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */ if (do_skl_residency) { - outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_wtd_core_c0/t->tsc); - outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_any_core_c0/t->tsc); - outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_any_gfxe_c0/t->tsc); - outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_both_core_gfxe_c0/t->tsc); + outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_wtd_core_c0/t->tsc); + outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_any_core_c0/t->tsc); + outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_any_gfxe_c0/t->tsc); + outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_both_core_gfxe_c0/t->tsc); } if (do_pc2) - outp += sprintf(outp, "%8.2f", 100.0 * p->pc2/t->tsc); + outp += sprintf(outp, "\t%.2f", 100.0 * p->pc2/t->tsc); if (do_pc3) - outp += sprintf(outp, "%8.2f", 100.0 * p->pc3/t->tsc); + outp += sprintf(outp, "\t%.2f", 100.0 * p->pc3/t->tsc); if (do_pc6) - outp += sprintf(outp, "%8.2f", 100.0 * p->pc6/t->tsc); + outp += sprintf(outp, "\t%.2f", 100.0 * p->pc6/t->tsc); if (do_pc7) - outp += sprintf(outp, "%8.2f", 100.0 * p->pc7/t->tsc); + outp += sprintf(outp, "\t%.2f", 100.0 * p->pc7/t->tsc); if (do_c8_c9_c10) { - outp += sprintf(outp, "%8.2f", 100.0 * p->pc8/t->tsc); - outp += sprintf(outp, "%8.2f", 100.0 * p->pc9/t->tsc); - outp += sprintf(outp, "%8.2f", 100.0 * p->pc10/t->tsc); + outp += sprintf(outp, "\t%.2f", 100.0 * p->pc8/t->tsc); + outp += sprintf(outp, "\t%.2f", 100.0 * p->pc9/t->tsc); + outp += sprintf(outp, "\t%.2f", 100.0 * p->pc10/t->tsc); } /* @@ -663,14 +728,14 @@ int format_counters(struct thread_data *t, struct core_data *c, * indicate that results are suspect by printing "**" in fraction place. */ if (interval_float < rapl_joule_counter_range) - fmt8 = "%8.2f"; + fmt8 = "\t%.2f"; else - fmt8 = " %6.0f**"; + fmt8 = "%6.0f**"; if (do_rapl && !rapl_joules) { if (do_rapl & RAPL_PKG) outp += sprintf(outp, fmt8, p->energy_pkg * rapl_energy_units / interval_float); - if (do_rapl & RAPL_CORES) + if (do_rapl & RAPL_CORES_ENERGY_STATUS) outp += sprintf(outp, fmt8, p->energy_cores * rapl_energy_units / interval_float); if (do_rapl & RAPL_GFX) outp += sprintf(outp, fmt8, p->energy_gfx * rapl_energy_units / interval_float); @@ -697,9 +762,20 @@ int format_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float); if (do_rapl & RAPL_DRAM_PERF_STATUS) outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float); - - outp += sprintf(outp, fmt8, interval_float); } + for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { + if (mp->format == FORMAT_RAW) { + if (mp->width == 32) + outp += sprintf(outp, "\t0x%08lx", (unsigned long) p->counter[i]); + else + outp += sprintf(outp, "\t0x%016llx", p->counter[i]); + } else if (mp->format == FORMAT_DELTA) { + outp += sprintf(outp, "\t%8lld", p->counter[i]); + } else if (mp->format == FORMAT_PERCENT) { + outp += sprintf(outp, "\t%.2f", 100.0 * p->counter[i]/t->tsc); + } + } + done: outp += sprintf(outp, "\n"); @@ -752,9 +828,11 @@ void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_ old = 0x100000000 + new - old; \ } -void +int delta_package(struct pkg_data *new, struct pkg_data *old) { + int i; + struct msr_counter *mp; if (do_skl_residency) { old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0; @@ -788,24 +866,46 @@ delta_package(struct pkg_data *new, struct pkg_data *old) DELTA_WRAP32(new->energy_dram, old->energy_dram); DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status); DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status); + + for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { + if (mp->format == FORMAT_RAW) + old->counter[i] = new->counter[i]; + else + old->counter[i] = new->counter[i] - old->counter[i]; + } + + return 0; } void delta_core(struct core_data *new, struct core_data *old) { + int i; + struct msr_counter *mp; + old->c3 = new->c3 - old->c3; old->c6 = new->c6 - old->c6; old->c7 = new->c7 - old->c7; old->core_temp_c = new->core_temp_c; + + for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { + if (mp->format == FORMAT_RAW) + old->counter[i] = new->counter[i]; + else + old->counter[i] = new->counter[i] - old->counter[i]; + } } /* * old = new - old */ -void +int delta_thread(struct thread_data *new, struct thread_data *old, struct core_data *core_delta) { + int i; + struct msr_counter *mp; + old->tsc = new->tsc - old->tsc; /* check for TSC < 1 Mcycles over interval */ @@ -821,20 +921,7 @@ delta_thread(struct thread_data *new, struct thread_data *old, old->aperf = new->aperf - old->aperf; old->mperf = new->mperf - old->mperf; } else { - - if (!aperf_mperf_unstable) { - fprintf(outf, "%s: APERF or MPERF went backwards *\n", progname); - fprintf(outf, "* Frequency results do not cover entire interval *\n"); - fprintf(outf, "* fix this by running Linux-2.6.30 or later *\n"); - - aperf_mperf_unstable = 1; - } - /* - * mperf delta is likely a huge "positive" number - * can not use it for calculating c0 time - */ - skip_c0 = 1; - skip_c1 = 1; + return -1; } } @@ -865,52 +952,53 @@ delta_thread(struct thread_data *new, struct thread_data *old, old->mperf = 1; /* divide by 0 protection */ } - old->extra_delta32 = new->extra_delta32 - old->extra_delta32; - old->extra_delta32 &= 0xFFFFFFFF; - - old->extra_delta64 = new->extra_delta64 - old->extra_delta64; - - /* - * Extra MSR is just a snapshot, simply copy latest w/o subtracting - */ - old->extra_msr32 = new->extra_msr32; - old->extra_msr64 = new->extra_msr64; - if (do_irq) old->irq_count = new->irq_count - old->irq_count; if (do_smi) old->smi_count = new->smi_count - old->smi_count; + + for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { + if (mp->format == FORMAT_RAW) + old->counter[i] = new->counter[i]; + else + old->counter[i] = new->counter[i] - old->counter[i]; + } + return 0; } int delta_cpu(struct thread_data *t, struct core_data *c, struct pkg_data *p, struct thread_data *t2, struct core_data *c2, struct pkg_data *p2) { + int retval = 0; + /* calculate core delta only for 1st thread in core */ if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE) delta_core(c, c2); /* always calculate thread delta */ - delta_thread(t, t2, c2); /* c2 is core delta */ + retval = delta_thread(t, t2, c2); /* c2 is core delta */ + if (retval) + return retval; /* calculate package delta only for 1st core in package */ if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE) - delta_package(p, p2); + retval = delta_package(p, p2); - return 0; + return retval; } void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) { + int i; + struct msr_counter *mp; + t->tsc = 0; t->aperf = 0; t->mperf = 0; t->c1 = 0; - t->extra_delta32 = 0; - t->extra_delta64 = 0; - t->irq_count = 0; t->smi_count = 0; @@ -948,21 +1036,36 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data p->gfx_rc6_ms = 0; p->gfx_mhz = 0; + + for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) + t->counter[i] = 0; + + for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) + c->counter[i] = 0; + + for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) + p->counter[i] = 0; } int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) { + int i; + struct msr_counter *mp; + average.threads.tsc += t->tsc; average.threads.aperf += t->aperf; average.threads.mperf += t->mperf; average.threads.c1 += t->c1; - average.threads.extra_delta32 += t->extra_delta32; - average.threads.extra_delta64 += t->extra_delta64; - average.threads.irq_count += t->irq_count; average.threads.smi_count += t->smi_count; + for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { + if (mp->format == FORMAT_RAW) + continue; + average.threads.counter[i] += t->counter[i]; + } + /* sum per-core values only for 1st thread in core */ if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) return 0; @@ -973,6 +1076,12 @@ int sum_counters(struct thread_data *t, struct core_data *c, average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c); + for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { + if (mp->format == FORMAT_RAW) + continue; + average.cores.counter[i] += c->counter[i]; + } + /* sum per-pkg values only for 1st core in pkg */ if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) return 0; @@ -1007,6 +1116,12 @@ int sum_counters(struct thread_data *t, struct core_data *c, average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status; average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status; + + for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { + if (mp->format == FORMAT_RAW) + continue; + average.packages.counter[i] += p->counter[i]; + } return 0; } /* @@ -1016,6 +1131,9 @@ int sum_counters(struct thread_data *t, struct core_data *c, void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data *p) { + int i; + struct msr_counter *mp; + clear_counters(&average.threads, &average.cores, &average.packages); for_all_cpus(sum_counters, t, c, p); @@ -1025,11 +1143,6 @@ void compute_average(struct thread_data *t, struct core_data *c, average.threads.mperf /= topo.num_cpus; average.threads.c1 /= topo.num_cpus; - average.threads.extra_delta32 /= topo.num_cpus; - average.threads.extra_delta32 &= 0xFFFFFFFF; - - average.threads.extra_delta64 /= topo.num_cpus; - average.cores.c3 /= topo.num_cores; average.cores.c6 /= topo.num_cores; average.cores.c7 /= topo.num_cores; @@ -1052,6 +1165,22 @@ void compute_average(struct thread_data *t, struct core_data *c, average.packages.pc8 /= topo.num_packages; average.packages.pc9 /= topo.num_packages; average.packages.pc10 /= topo.num_packages; + + for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { + if (mp->format == FORMAT_RAW) + continue; + average.threads.counter[i] /= topo.num_cpus; + } + for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { + if (mp->format == FORMAT_RAW) + continue; + average.cores.counter[i] /= topo.num_cores; + } + for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { + if (mp->format == FORMAT_RAW) + continue; + average.packages.counter[i] /= topo.num_packages; + } } static unsigned long long rdtsc(void) @@ -1073,6 +1202,8 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) int cpu = t->cpu_id; unsigned long long msr; int aperf_mperf_retry_count = 0; + struct msr_counter *mp; + int i; if (cpu_migrate(cpu)) { fprintf(outf, "Could not migrate to CPU %d\n", cpu); @@ -1145,31 +1276,18 @@ retry: return -5; t->smi_count = msr & 0xFFFFFFFF; } - if (extra_delta_offset32) { - if (get_msr(cpu, extra_delta_offset32, &msr)) - return -5; - t->extra_delta32 = msr & 0xFFFFFFFF; - } - - if (extra_delta_offset64) - if (get_msr(cpu, extra_delta_offset64, &t->extra_delta64)) - return -5; - - if (extra_msr_offset32) { - if (get_msr(cpu, extra_msr_offset32, &msr)) - return -5; - t->extra_msr32 = msr & 0xFFFFFFFF; - } - - if (extra_msr_offset64) - if (get_msr(cpu, extra_msr_offset64, &t->extra_msr64)) - return -5; if (use_c1_residency_msr) { if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1)) return -6; } + for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { + if (get_msr(cpu, mp->msr_num, &t->counter[i])) + return -10; + } + + /* collect core counters only for 1st thread in core */ if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) return 0; @@ -1197,6 +1315,10 @@ retry: c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F); } + for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { + if (get_msr(cpu, mp->msr_num, &c->counter[i])) + return -10; + } /* collect package counters only for 1st core in package */ if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) @@ -1237,7 +1359,7 @@ retry: return -13; p->energy_pkg = msr & 0xFFFFFFFF; } - if (do_rapl & RAPL_CORES) { + if (do_rapl & RAPL_CORES_ENERGY_STATUS) { if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr)) return -14; p->energy_cores = msr & 0xFFFFFFFF; @@ -1274,6 +1396,11 @@ retry: if (do_gfx_mhz) p->gfx_mhz = gfx_cur_mhz; + for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { + if (get_msr(cpu, mp->msr_num, &p->counter[i])) + return -10; + } + return 0; } @@ -1310,6 +1437,7 @@ int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, int amt_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; int bxt_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; +int skx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; static void @@ -1638,7 +1766,7 @@ void free_fd_percpu(void) { int i; - for (i = 0; i < topo.max_cpu_num; ++i) { + for (i = 0; i < topo.max_cpu_num + 1; ++i) { if (fd_percpu[i] != 0) close(fd_percpu[i]); } @@ -2071,7 +2199,10 @@ restart: } gettimeofday(&tv_odd, (struct timezone *)NULL); timersub(&tv_odd, &tv_even, &tv_delta); - for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); + if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) { + re_initialize(); + goto restart; + } compute_average(EVEN_COUNTERS); format_all_counters(EVEN_COUNTERS); flush_output_stdout(); @@ -2087,7 +2218,10 @@ restart: } gettimeofday(&tv_even, (struct timezone *)NULL); timersub(&tv_even, &tv_odd, &tv_delta); - for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS); + if (for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS)) { + re_initialize(); + goto restart; + } compute_average(ODD_COUNTERS); format_all_counters(ODD_COUNTERS); flush_output_stdout(); @@ -2174,47 +2308,51 @@ int probe_nhm_msrs(unsigned int family, unsigned int model) bclk = discover_bclk(family, model); switch (model) { - case 0x1A: /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */ - case 0x1E: /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */ + case INTEL_FAM6_NEHALEM_EP: /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */ + case INTEL_FAM6_NEHALEM: /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */ case 0x1F: /* Core i7 and i5 Processor - Nehalem */ - case 0x25: /* Westmere Client - Clarkdale, Arrandale */ - case 0x2C: /* Westmere EP - Gulftown */ - case 0x2E: /* Nehalem-EX Xeon - Beckton */ - case 0x2F: /* Westmere-EX Xeon - Eagleton */ + case INTEL_FAM6_WESTMERE: /* Westmere Client - Clarkdale, Arrandale */ + case INTEL_FAM6_WESTMERE_EP: /* Westmere EP - Gulftown */ + case INTEL_FAM6_NEHALEM_EX: /* Nehalem-EX Xeon - Beckton */ + case INTEL_FAM6_WESTMERE_EX: /* Westmere-EX Xeon - Eagleton */ pkg_cstate_limits = nhm_pkg_cstate_limits; break; - case 0x2A: /* SNB */ - case 0x2D: /* SNB Xeon */ - case 0x3A: /* IVB */ - case 0x3E: /* IVB Xeon */ + case INTEL_FAM6_SANDYBRIDGE: /* SNB */ + case INTEL_FAM6_SANDYBRIDGE_X: /* SNB Xeon */ + case INTEL_FAM6_IVYBRIDGE: /* IVB */ + case INTEL_FAM6_IVYBRIDGE_X: /* IVB Xeon */ pkg_cstate_limits = snb_pkg_cstate_limits; break; - case 0x3C: /* HSW */ - case 0x3F: /* HSX */ - case 0x45: /* HSW */ - case 0x46: /* HSW */ - case 0x3D: /* BDW */ - case 0x47: /* BDW */ - case 0x4F: /* BDX */ - case 0x56: /* BDX-DE */ - case 0x4E: /* SKL */ - case 0x5E: /* SKL */ - case 0x8E: /* KBL */ - case 0x9E: /* KBL */ - case 0x55: /* SKX */ + case INTEL_FAM6_HASWELL_CORE: /* HSW */ + case INTEL_FAM6_HASWELL_X: /* HSX */ + case INTEL_FAM6_HASWELL_ULT: /* HSW */ + case INTEL_FAM6_HASWELL_GT3E: /* HSW */ + case INTEL_FAM6_BROADWELL_CORE: /* BDW */ + case INTEL_FAM6_BROADWELL_GT3E: /* BDW */ + case INTEL_FAM6_BROADWELL_X: /* BDX */ + case INTEL_FAM6_BROADWELL_XEON_D: /* BDX-DE */ + case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */ + case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */ + case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */ + case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ pkg_cstate_limits = hsw_pkg_cstate_limits; break; - case 0x37: /* BYT */ - case 0x4D: /* AVN */ + case INTEL_FAM6_SKYLAKE_X: /* SKX */ + pkg_cstate_limits = skx_pkg_cstate_limits; + break; + case INTEL_FAM6_ATOM_SILVERMONT1: /* BYT */ + case INTEL_FAM6_ATOM_SILVERMONT2: /* AVN */ pkg_cstate_limits = slv_pkg_cstate_limits; break; - case 0x4C: /* AMT */ + case INTEL_FAM6_ATOM_AIRMONT: /* AMT */ pkg_cstate_limits = amt_pkg_cstate_limits; break; - case 0x57: /* PHI */ + case INTEL_FAM6_XEON_PHI_KNL: /* PHI */ + case INTEL_FAM6_XEON_PHI_KNM: pkg_cstate_limits = phi_pkg_cstate_limits; break; - case 0x5C: /* BXT */ + case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ + case INTEL_FAM6_ATOM_DENVERTON: /* DNV */ pkg_cstate_limits = bxt_pkg_cstate_limits; break; default: @@ -2234,9 +2372,10 @@ int has_nhm_turbo_ratio_limit(unsigned int family, unsigned int model) { switch (model) { /* Nehalem compatible, but do not include turbo-ratio limit support */ - case 0x2E: /* Nehalem-EX Xeon - Beckton */ - case 0x2F: /* Westmere-EX Xeon - Eagleton */ - case 0x57: /* PHI - Knights Landing (different MSR definition) */ + case INTEL_FAM6_NEHALEM_EX: /* Nehalem-EX Xeon - Beckton */ + case INTEL_FAM6_WESTMERE_EX: /* Westmere-EX Xeon - Eagleton */ + case INTEL_FAM6_XEON_PHI_KNL: /* PHI - Knights Landing (different MSR definition) */ + case INTEL_FAM6_XEON_PHI_KNM: return 0; default: return 1; @@ -2251,8 +2390,8 @@ int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model) return 0; switch (model) { - case 0x3E: /* IVB Xeon */ - case 0x3F: /* HSW Xeon */ + case INTEL_FAM6_IVYBRIDGE_X: /* IVB Xeon */ + case INTEL_FAM6_HASWELL_X: /* HSW Xeon */ return 1; default: return 0; @@ -2267,7 +2406,7 @@ int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model) return 0; switch (model) { - case 0x3F: /* HSW Xeon */ + case INTEL_FAM6_HASWELL_X: /* HSW Xeon */ return 1; default: return 0; @@ -2283,7 +2422,8 @@ int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model) return 0; switch (model) { - case 0x57: /* Knights Landing */ + case INTEL_FAM6_XEON_PHI_KNL: /* Knights Landing */ + case INTEL_FAM6_XEON_PHI_KNM: return 1; default: return 0; @@ -2298,22 +2438,23 @@ int has_config_tdp(unsigned int family, unsigned int model) return 0; switch (model) { - case 0x3A: /* IVB */ - case 0x3C: /* HSW */ - case 0x3F: /* HSX */ - case 0x45: /* HSW */ - case 0x46: /* HSW */ - case 0x3D: /* BDW */ - case 0x47: /* BDW */ - case 0x4F: /* BDX */ - case 0x56: /* BDX-DE */ - case 0x4E: /* SKL */ - case 0x5E: /* SKL */ - case 0x8E: /* KBL */ - case 0x9E: /* KBL */ - case 0x55: /* SKX */ - - case 0x57: /* Knights Landing */ + case INTEL_FAM6_IVYBRIDGE: /* IVB */ + case INTEL_FAM6_HASWELL_CORE: /* HSW */ + case INTEL_FAM6_HASWELL_X: /* HSX */ + case INTEL_FAM6_HASWELL_ULT: /* HSW */ + case INTEL_FAM6_HASWELL_GT3E: /* HSW */ + case INTEL_FAM6_BROADWELL_CORE: /* BDW */ + case INTEL_FAM6_BROADWELL_GT3E: /* BDW */ + case INTEL_FAM6_BROADWELL_X: /* BDX */ + case INTEL_FAM6_BROADWELL_XEON_D: /* BDX-DE */ + case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */ + case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */ + case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */ + case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ + case INTEL_FAM6_SKYLAKE_X: /* SKX */ + + case INTEL_FAM6_XEON_PHI_KNL: /* Knights Landing */ + case INTEL_FAM6_XEON_PHI_KNM: return 1; default: return 0; @@ -2593,8 +2734,8 @@ double get_tdp(unsigned int model) return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units; switch (model) { - case 0x37: - case 0x4D: + case INTEL_FAM6_ATOM_SILVERMONT1: + case INTEL_FAM6_ATOM_SILVERMONT2: return 30.0; default: return 135.0; @@ -2611,10 +2752,11 @@ rapl_dram_energy_units_probe(int model, double rapl_energy_units) /* only called for genuine_intel, family 6 */ switch (model) { - case 0x3F: /* HSX */ - case 0x4F: /* BDX */ - case 0x56: /* BDX-DE */ - case 0x57: /* KNL */ + case INTEL_FAM6_HASWELL_X: /* HSX */ + case INTEL_FAM6_BROADWELL_X: /* BDX */ + case INTEL_FAM6_BROADWELL_XEON_D: /* BDX-DE */ + case INTEL_FAM6_XEON_PHI_KNL: /* KNL */ + case INTEL_FAM6_XEON_PHI_KNM: return (rapl_dram_energy_units = 15.3 / 1000000); default: return (rapl_energy_units); @@ -2640,38 +2782,42 @@ void rapl_probe(unsigned int family, unsigned int model) return; switch (model) { - case 0x2A: - case 0x3A: - case 0x3C: /* HSW */ - case 0x45: /* HSW */ - case 0x46: /* HSW */ - case 0x3D: /* BDW */ - case 0x47: /* BDW */ + case INTEL_FAM6_SANDYBRIDGE: + case INTEL_FAM6_IVYBRIDGE: + case INTEL_FAM6_HASWELL_CORE: /* HSW */ + case INTEL_FAM6_HASWELL_ULT: /* HSW */ + case INTEL_FAM6_HASWELL_GT3E: /* HSW */ + case INTEL_FAM6_BROADWELL_CORE: /* BDW */ + case INTEL_FAM6_BROADWELL_GT3E: /* BDW */ do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO; break; - case 0x5C: /* BXT */ + case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO; break; - case 0x4E: /* SKL */ - case 0x5E: /* SKL */ - case 0x8E: /* KBL */ - case 0x9E: /* KBL */ + case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */ + case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */ + case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */ + case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO; break; - case 0x3F: /* HSX */ - case 0x4F: /* BDX */ - case 0x56: /* BDX-DE */ - case 0x55: /* SKX */ - case 0x57: /* KNL */ + case INTEL_FAM6_HASWELL_X: /* HSX */ + case INTEL_FAM6_BROADWELL_X: /* BDX */ + case INTEL_FAM6_BROADWELL_XEON_D: /* BDX-DE */ + case INTEL_FAM6_SKYLAKE_X: /* SKX */ + case INTEL_FAM6_XEON_PHI_KNL: /* KNL */ + case INTEL_FAM6_XEON_PHI_KNM: do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO; break; - case 0x2D: - case 0x3E: + case INTEL_FAM6_SANDYBRIDGE_X: + case INTEL_FAM6_IVYBRIDGE_X: do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO; break; - case 0x37: /* BYT */ - case 0x4D: /* AVN */ - do_rapl = RAPL_PKG | RAPL_CORES ; + case INTEL_FAM6_ATOM_SILVERMONT1: /* BYT */ + case INTEL_FAM6_ATOM_SILVERMONT2: /* AVN */ + do_rapl = RAPL_PKG | RAPL_CORES; + break; + case INTEL_FAM6_ATOM_DENVERTON: /* DNV */ + do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS; break; default: return; @@ -2682,7 +2828,7 @@ void rapl_probe(unsigned int family, unsigned int model) return; rapl_power_units = 1.0 / (1 << (msr & 0xF)); - if (model == 0x37) + if (model == INTEL_FAM6_ATOM_SILVERMONT1) rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000; else rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F)); @@ -2713,11 +2859,11 @@ void perf_limit_reasons_probe(unsigned int family, unsigned int model) return; switch (model) { - case 0x3C: /* HSW */ - case 0x45: /* HSW */ - case 0x46: /* HSW */ + case INTEL_FAM6_HASWELL_CORE: /* HSW */ + case INTEL_FAM6_HASWELL_ULT: /* HSW */ + case INTEL_FAM6_HASWELL_GT3E: /* HSW */ do_gfx_perf_limit_reasons = 1; - case 0x3F: /* HSX */ + case INTEL_FAM6_HASWELL_X: /* HSX */ do_core_perf_limit_reasons = 1; do_ring_perf_limit_reasons = 1; default: @@ -2737,7 +2883,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p cpu = t->cpu_id; /* DTS is per-core, no need to print for each thread */ - if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) + if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) return 0; if (cpu_migrate(cpu)) { @@ -2886,9 +3032,8 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF); } } - if (do_rapl & RAPL_CORES) { + if (do_rapl & RAPL_CORES_POWER_LIMIT) { if (debug) { - if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr)) return -9; fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", @@ -2927,24 +3072,25 @@ int has_snb_msrs(unsigned int family, unsigned int model) return 0; switch (model) { - case 0x2A: - case 0x2D: - case 0x3A: /* IVB */ - case 0x3E: /* IVB Xeon */ - case 0x3C: /* HSW */ - case 0x3F: /* HSW */ - case 0x45: /* HSW */ - case 0x46: /* HSW */ - case 0x3D: /* BDW */ - case 0x47: /* BDW */ - case 0x4F: /* BDX */ - case 0x56: /* BDX-DE */ - case 0x4E: /* SKL */ - case 0x5E: /* SKL */ - case 0x8E: /* KBL */ - case 0x9E: /* KBL */ - case 0x55: /* SKX */ - case 0x5C: /* BXT */ + case INTEL_FAM6_SANDYBRIDGE: + case INTEL_FAM6_SANDYBRIDGE_X: + case INTEL_FAM6_IVYBRIDGE: /* IVB */ + case INTEL_FAM6_IVYBRIDGE_X: /* IVB Xeon */ + case INTEL_FAM6_HASWELL_CORE: /* HSW */ + case INTEL_FAM6_HASWELL_X: /* HSW */ + case INTEL_FAM6_HASWELL_ULT: /* HSW */ + case INTEL_FAM6_HASWELL_GT3E: /* HSW */ + case INTEL_FAM6_BROADWELL_CORE: /* BDW */ + case INTEL_FAM6_BROADWELL_GT3E: /* BDW */ + case INTEL_FAM6_BROADWELL_X: /* BDX */ + case INTEL_FAM6_BROADWELL_XEON_D: /* BDX-DE */ + case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */ + case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */ + case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */ + case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ + case INTEL_FAM6_SKYLAKE_X: /* SKX */ + case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ + case INTEL_FAM6_ATOM_DENVERTON: /* DNV */ return 1; } return 0; @@ -2968,13 +3114,13 @@ int has_hsw_msrs(unsigned int family, unsigned int model) return 0; switch (model) { - case 0x45: /* HSW */ - case 0x3D: /* BDW */ - case 0x4E: /* SKL */ - case 0x5E: /* SKL */ - case 0x8E: /* KBL */ - case 0x9E: /* KBL */ - case 0x5C: /* BXT */ + case INTEL_FAM6_HASWELL_ULT: /* HSW */ + case INTEL_FAM6_BROADWELL_CORE: /* BDW */ + case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */ + case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */ + case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */ + case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ + case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ return 1; } return 0; @@ -2994,10 +3140,10 @@ int has_skl_msrs(unsigned int family, unsigned int model) return 0; switch (model) { - case 0x4E: /* SKL */ - case 0x5E: /* SKL */ - case 0x8E: /* KBL */ - case 0x9E: /* KBL */ + case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */ + case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */ + case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */ + case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ return 1; } return 0; @@ -3010,8 +3156,8 @@ int is_slm(unsigned int family, unsigned int model) if (!genuine_intel) return 0; switch (model) { - case 0x37: /* BYT */ - case 0x4D: /* AVN */ + case INTEL_FAM6_ATOM_SILVERMONT1: /* BYT */ + case INTEL_FAM6_ATOM_SILVERMONT2: /* AVN */ return 1; } return 0; @@ -3022,7 +3168,8 @@ int is_knl(unsigned int family, unsigned int model) if (!genuine_intel) return 0; switch (model) { - case 0x57: /* KNL */ + case INTEL_FAM6_XEON_PHI_KNL: /* KNL */ + case INTEL_FAM6_XEON_PHI_KNM: return 1; } return 0; @@ -3050,7 +3197,7 @@ double slm_bclk(void) i = msr & 0xf; if (i >= SLM_BCLK_FREQS) { fprintf(outf, "SLM BCLK[%d] invalid\n", i); - msr = 3; + i = 3; } freq = slm_freq_table[i]; @@ -3174,10 +3321,11 @@ void decode_misc_pwr_mgmt_msr(void) return; if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr)) - fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB)\n", + fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n", base_cpu, msr, msr & (1 << 0) ? "DIS" : "EN", - msr & (1 << 1) ? "EN" : "DIS"); + msr & (1 << 1) ? "EN" : "DIS", + msr & (1 << 8) ? "EN" : "DIS"); } void process_cpuid() @@ -3303,16 +3451,17 @@ void process_cpuid() if (crystal_hz == 0) switch(model) { - case 0x4E: /* SKL */ - case 0x5E: /* SKL */ - case 0x8E: /* KBL */ - case 0x9E: /* KBL */ + case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */ + case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */ + case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */ + case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ crystal_hz = 24000000; /* 24.0 MHz */ break; - case 0x55: /* SKX */ + case INTEL_FAM6_SKYLAKE_X: /* SKX */ + case INTEL_FAM6_ATOM_DENVERTON: /* DNV */ crystal_hz = 25000000; /* 25.0 MHz */ break; - case 0x5C: /* BXT */ + case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ crystal_hz = 19200000; /* 19.2 MHz */ break; default: @@ -3385,14 +3534,12 @@ void help() "when COMMAND completes.\n" "If no COMMAND is specified, turbostat wakes every 5-seconds\n" "to print statistics, until interrupted.\n" + "--add add a counter\n" + " eg. --add msr0x10,u64,cpu,delta,MY_TSC\n" "--debug run in \"debug\" mode\n" "--interval sec Override default 5-second measurement interval\n" "--help print this help message\n" - "--counter msr print 32-bit counter at address \"msr\"\n" - "--Counter msr print 64-bit Counter at address \"msr\"\n" "--out file create or truncate \"file\" for all output\n" - "--msr msr print 32-bit value at address \"msr\"\n" - "--MSR msr print 64-bit Value at address \"msr\"\n" "--version print version information\n" "\n" "For more help, run \"man turbostat\"\n"); @@ -3515,7 +3662,7 @@ allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data int i; *t = calloc(topo.num_threads_per_core * topo.num_cores_per_pkg * - topo.num_packages, sizeof(struct thread_data)); + topo.num_packages, sizeof(struct thread_data) + sys.thread_counter_bytes); if (*t == NULL) goto error; @@ -3524,14 +3671,14 @@ allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data (*t)[i].cpu_id = -1; *c = calloc(topo.num_cores_per_pkg * topo.num_packages, - sizeof(struct core_data)); + sizeof(struct core_data) + sys.core_counter_bytes); if (*c == NULL) goto error; for (i = 0; i < topo.num_cores_per_pkg * topo.num_packages; i++) (*c)[i].core_id = -1; - *p = calloc(topo.num_packages, sizeof(struct pkg_data)); + *p = calloc(topo.num_packages, sizeof(struct pkg_data) + sys.package_counter_bytes); if (*p == NULL) goto error; @@ -3598,7 +3745,7 @@ void allocate_output_buffer() } void allocate_fd_percpu(void) { - fd_percpu = calloc(topo.max_cpu_num, sizeof(int)); + fd_percpu = calloc(topo.max_cpu_num + 1, sizeof(int)); if (fd_percpu == NULL) err(-1, "calloc fd_percpu"); } @@ -3608,9 +3755,9 @@ void allocate_irq_buffers(void) if (irq_column_2_cpu == NULL) err(-1, "calloc %d", topo.num_cpus); - irqs_per_cpu = calloc(topo.max_cpu_num, sizeof(int)); + irqs_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int)); if (irqs_per_cpu == NULL) - err(-1, "calloc %d", topo.max_cpu_num); + err(-1, "calloc %d", topo.max_cpu_num + 1); } void setup_all_buffers(void) { @@ -3697,9 +3844,12 @@ int fork_it(char **argv) for_all_cpus(get_counters, ODD_COUNTERS); gettimeofday(&tv_odd, (struct timezone *)NULL); timersub(&tv_odd, &tv_even, &tv_delta); - for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); - compute_average(EVEN_COUNTERS); - format_all_counters(EVEN_COUNTERS); + if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) + fprintf(outf, "%s: Counter reset detected\n", progname); + else { + compute_average(EVEN_COUNTERS); + format_all_counters(EVEN_COUNTERS); + } fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0); @@ -3726,24 +3876,170 @@ int get_and_dump_counters(void) } void print_version() { - fprintf(outf, "turbostat version 4.12 5 Apr 2016" + fprintf(outf, "turbostat version 4.16 24 Dec 2016" " - Len Brown <lenb@kernel.org>\n"); } +int add_counter(unsigned int msr_num, char *name, unsigned int width, + enum counter_scope scope, enum counter_type type, + enum counter_format format) +{ + struct msr_counter *msrp; + + msrp = calloc(1, sizeof(struct msr_counter)); + if (msrp == NULL) { + perror("calloc"); + exit(1); + } + + msrp->msr_num = msr_num; + strncpy(msrp->name, name, NAME_BYTES); + msrp->width = width; + msrp->type = type; + msrp->format = format; + + switch (scope) { + + case SCOPE_CPU: + sys.thread_counter_bytes += 64; + msrp->next = sys.tp; + sys.tp = msrp; + sys.thread_counter_bytes += sizeof(unsigned long long); + break; + + case SCOPE_CORE: + sys.core_counter_bytes += 64; + msrp->next = sys.cp; + sys.cp = msrp; + sys.core_counter_bytes += sizeof(unsigned long long); + break; + + case SCOPE_PACKAGE: + sys.package_counter_bytes += 64; + msrp->next = sys.pp; + sys.pp = msrp; + sys.package_counter_bytes += sizeof(unsigned long long); + break; + } + + return 0; +} + +void parse_add_command(char *add_command) +{ + int msr_num = 0; + char name_buffer[NAME_BYTES]; + int width = 64; + int fail = 0; + enum counter_scope scope = SCOPE_CPU; + enum counter_type type = COUNTER_CYCLES; + enum counter_format format = FORMAT_DELTA; + + while (add_command) { + + if (sscanf(add_command, "msr0x%x", &msr_num) == 1) + goto next; + + if (sscanf(add_command, "msr%d", &msr_num) == 1) + goto next; + + if (sscanf(add_command, "u%d", &width) == 1) { + if ((width == 32) || (width == 64)) + goto next; + width = 64; + } + if (!strncmp(add_command, "cpu", strlen("cpu"))) { + scope = SCOPE_CPU; + goto next; + } + if (!strncmp(add_command, "core", strlen("core"))) { + scope = SCOPE_CORE; + goto next; + } + if (!strncmp(add_command, "package", strlen("package"))) { + scope = SCOPE_PACKAGE; + goto next; + } + if (!strncmp(add_command, "cycles", strlen("cycles"))) { + type = COUNTER_CYCLES; + goto next; + } + if (!strncmp(add_command, "seconds", strlen("seconds"))) { + type = COUNTER_SECONDS; + goto next; + } + if (!strncmp(add_command, "raw", strlen("raw"))) { + format = FORMAT_RAW; + goto next; + } + if (!strncmp(add_command, "delta", strlen("delta"))) { + format = FORMAT_DELTA; + goto next; + } + if (!strncmp(add_command, "percent", strlen("percent"))) { + format = FORMAT_PERCENT; + goto next; + } + + if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) { /* 18 < NAME_BYTES */ + char *eos; + + eos = strchr(name_buffer, ','); + if (eos) + *eos = '\0'; + goto next; + } + +next: + add_command = strchr(add_command, ','); + if (add_command) + add_command++; + + } + if (msr_num == 0) { + fprintf(stderr, "--add: (msrDDD | msr0xXXX) required\n"); + fail++; + } + + /* generate default column header */ + if (*name_buffer == '\0') { + if (format == FORMAT_RAW) { + if (width == 32) + sprintf(name_buffer, "msr%d", msr_num); + else + sprintf(name_buffer, "MSR%d", msr_num); + } else if (format == FORMAT_DELTA) { + if (width == 32) + sprintf(name_buffer, "cnt%d", msr_num); + else + sprintf(name_buffer, "CNT%d", msr_num); + } else if (format == FORMAT_PERCENT) { + if (width == 32) + sprintf(name_buffer, "msr%d%%", msr_num); + else + sprintf(name_buffer, "MSR%d%%", msr_num); + } + } + + if (add_counter(msr_num, name_buffer, width, scope, type, format)) + fail++; + + if (fail) { + help(); + exit(1); + } +} void cmdline(int argc, char **argv) { int opt; int option_index = 0; static struct option long_options[] = { - {"Counter", required_argument, 0, 'C'}, - {"counter", required_argument, 0, 'c'}, + {"add", required_argument, 0, 'a'}, {"Dump", no_argument, 0, 'D'}, {"debug", no_argument, 0, 'd'}, {"interval", required_argument, 0, 'i'}, {"help", no_argument, 0, 'h'}, {"Joules", no_argument, 0, 'J'}, - {"MSR", required_argument, 0, 'M'}, - {"msr", required_argument, 0, 'm'}, {"out", required_argument, 0, 'o'}, {"Package", no_argument, 0, 'p'}, {"processor", no_argument, 0, 'p'}, @@ -3758,11 +4054,8 @@ void cmdline(int argc, char **argv) while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:PpST:v", long_options, &option_index)) != -1) { switch (opt) { - case 'C': - sscanf(optarg, "%x", &extra_delta_offset64); - break; - case 'c': - sscanf(optarg, "%x", &extra_delta_offset32); + case 'a': + parse_add_command(optarg); break; case 'D': dump_only++; @@ -3791,12 +4084,6 @@ void cmdline(int argc, char **argv) case 'J': rapl_joules++; break; - case 'M': - sscanf(optarg, "%x", &extra_msr_offset64); - break; - case 'm': - sscanf(optarg, "%x", &extra_msr_offset32); - break; case 'o': outf = fopen_or_die(optarg, "w"); break; diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 97b657adb3bd..a2dbbccbb6a3 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -456,7 +456,7 @@ int kvm_timer_hyp_init(void) kvm_info("virtual timer IRQ%d\n", host_vtimer_irq); cpuhp_setup_state(CPUHP_AP_KVM_ARM_TIMER_STARTING, - "AP_KVM_ARM_TIMER_STARTING", kvm_timer_starting_cpu, + "kvm/arm/timer:starting", kvm_timer_starting_cpu, kvm_timer_dying_cpu); return err; } diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index 8cebfbc19e90..5114391b7e5a 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -428,7 +428,7 @@ int kvm_vgic_hyp_init(void) } ret = cpuhp_setup_state(CPUHP_AP_KVM_ARM_VGIC_INIT_STARTING, - "AP_KVM_ARM_VGIC_INIT_STARTING", + "kvm/arm/vgic:starting", vgic_init_cpu_starting, vgic_init_cpu_dying); if (ret) { kvm_err("Cannot register vgic CPU notifier\n"); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 994f81f8eecb..482612b4e496 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3944,7 +3944,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, goto out_free_1; } - r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_STARTING, "AP_KVM_STARTING", + r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_STARTING, "kvm/cpu:starting", kvm_starting_cpu, kvm_dying_cpu); if (r) goto out_free_2; |