summaryrefslogtreecommitdiff
path: root/drivers/perf
diff options
context:
space:
mode:
authorWill Deacon <will@kernel.org>2022-12-06 14:22:48 +0300
committerWill Deacon <will@kernel.org>2022-12-06 14:22:48 +0300
commit10162e78eacc939efe8edc868aed2307cd50b675 (patch)
treedaa967109bdb4ff16bf2ab59e708e1b2eb0cd37e /drivers/perf
parentc947948f7aa4fc8ffca598866f1955fad2860b72 (diff)
parent4361251cef466839795691e2628285e3f5093a98 (diff)
downloadlinux-10162e78eacc939efe8edc868aed2307cd50b675.tar.xz
Merge branch 'for-next/perf' into for-next/core
* for-next/perf: (21 commits) arm_pmu: Drop redundant armpmu->map_event() in armpmu_event_init() drivers/perf: hisi: Add TLP filter support Documentation: perf: Indent filter options list of hisi-pcie-pmu docs: perf: Fix PMU instance name of hisi-pcie-pmu drivers/perf: hisi: Fix some event id for hisi-pcie-pmu arm64/perf: Replace PMU version number '0' with ID_AA64DFR0_EL1_PMUVer_NI perf/amlogic: Remove unused header inclusions of <linux/version.h> perf/amlogic: Fix build error for x86_64 allmodconfig dt-binding: perf: Add Amlogic DDR PMU docs/perf: Add documentation for the Amlogic G12 DDR PMU perf/amlogic: Add support for Amlogic meson G12 SoC DDR PMU driver MAINTAINERS: Update HiSilicon PMU maintainers perf: arm_cspmu: Fix module cyclic dependency perf: arm_cspmu: Fix build failure on x86_64 perf: arm_cspmu: Fix modular builds due to missing MODULE_LICENSE()s perf: arm_cspmu: Add support for NVIDIA SCF and MCF attribute perf: arm_cspmu: Add support for ARM CoreSight PMU driver perf/smmuv3: Fix hotplug callback leak in arm_smmu_pmu_init() perf/arm_dmc620: Fix hotplug callback leak in dmc620_pmu_init() drivers: perf: marvell_cn10k: Fix hotplug callback leak in tad_pmu_init() ...
Diffstat (limited to 'drivers/perf')
-rw-r--r--drivers/perf/Kconfig4
-rw-r--r--drivers/perf/Makefile2
-rw-r--r--drivers/perf/amlogic/Kconfig10
-rw-r--r--drivers/perf/amlogic/Makefile5
-rw-r--r--drivers/perf/amlogic/meson_ddr_pmu_core.c561
-rw-r--r--drivers/perf/amlogic/meson_g12_ddr_pmu.c394
-rw-r--r--drivers/perf/arm_cspmu/Kconfig13
-rw-r--r--drivers/perf/arm_cspmu/Makefile6
-rw-r--r--drivers/perf/arm_cspmu/arm_cspmu.c1303
-rw-r--r--drivers/perf/arm_cspmu/arm_cspmu.h151
-rw-r--r--drivers/perf/arm_cspmu/nvidia_cspmu.c400
-rw-r--r--drivers/perf/arm_cspmu/nvidia_cspmu.h17
-rw-r--r--drivers/perf/arm_dmc620_pmu.c8
-rw-r--r--drivers/perf/arm_dsu_pmu.c6
-rw-r--r--drivers/perf/arm_pmu.c3
-rw-r--r--drivers/perf/arm_smmuv3_pmu.c8
-rw-r--r--drivers/perf/hisilicon/hisi_pcie_pmu.c22
-rw-r--r--drivers/perf/marvell_cn10k_tad_pmu.c6
18 files changed, 2907 insertions, 12 deletions
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index 341010f20b77..77043bcdb33c 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -199,4 +199,8 @@ config MARVELL_CN10K_DDR_PMU
Enable perf support for Marvell DDR Performance monitoring
event on CN10K platform.
+source "drivers/perf/arm_cspmu/Kconfig"
+
+source "drivers/perf/amlogic/Kconfig"
+
endmenu
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index 050d04ee19dd..13e45da61100 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -21,3 +21,5 @@ obj-$(CONFIG_MARVELL_CN10K_TAD_PMU) += marvell_cn10k_tad_pmu.o
obj-$(CONFIG_MARVELL_CN10K_DDR_PMU) += marvell_cn10k_ddr_pmu.o
obj-$(CONFIG_APPLE_M1_CPU_PMU) += apple_m1_cpu_pmu.o
obj-$(CONFIG_ALIBABA_UNCORE_DRW_PMU) += alibaba_uncore_drw_pmu.o
+obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu/
+obj-$(CONFIG_MESON_DDR_PMU) += amlogic/
diff --git a/drivers/perf/amlogic/Kconfig b/drivers/perf/amlogic/Kconfig
new file mode 100644
index 000000000000..f68db01a7f17
--- /dev/null
+++ b/drivers/perf/amlogic/Kconfig
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config MESON_DDR_PMU
+ tristate "Amlogic DDR Bandwidth Performance Monitor"
+ depends on ARCH_MESON || COMPILE_TEST
+ help
+ Provides support for the DDR performance monitor
+ in Amlogic SoCs, which can give information about
+ memory throughput and other related events. It
+ supports multiple channels to monitor the memory
+ bandwidth simultaneously.
diff --git a/drivers/perf/amlogic/Makefile b/drivers/perf/amlogic/Makefile
new file mode 100644
index 000000000000..d3ab2ac5353b
--- /dev/null
+++ b/drivers/perf/amlogic/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-$(CONFIG_MESON_DDR_PMU) += meson_ddr_pmu_g12.o
+
+meson_ddr_pmu_g12-y := meson_ddr_pmu_core.o meson_g12_ddr_pmu.o
diff --git a/drivers/perf/amlogic/meson_ddr_pmu_core.c b/drivers/perf/amlogic/meson_ddr_pmu_core.c
new file mode 100644
index 000000000000..b84346dbac2c
--- /dev/null
+++ b/drivers/perf/amlogic/meson_ddr_pmu_core.c
@@ -0,0 +1,561 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2022 Amlogic, Inc. All rights reserved.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/init.h>
+#include <linux/irqreturn.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_irq.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/printk.h>
+#include <linux/sysfs.h>
+#include <linux/types.h>
+
+#include <soc/amlogic/meson_ddr_pmu.h>
+
+struct ddr_pmu {
+ struct pmu pmu;
+ struct dmc_info info;
+ struct dmc_counter counters; /* save counters from hw */
+ bool pmu_enabled;
+ struct device *dev;
+ char *name;
+ struct hlist_node node;
+ enum cpuhp_state cpuhp_state;
+ int cpu; /* for cpu hotplug */
+};
+
+#define DDR_PERF_DEV_NAME "meson_ddr_bw"
+#define MAX_AXI_PORTS_OF_CHANNEL 4 /* A DMC channel can monitor max 4 axi ports */
+
+#define to_ddr_pmu(p) container_of(p, struct ddr_pmu, pmu)
+#define dmc_info_to_pmu(p) container_of(p, struct ddr_pmu, info)
+
+static void dmc_pmu_enable(struct ddr_pmu *pmu)
+{
+ if (!pmu->pmu_enabled)
+ pmu->info.hw_info->enable(&pmu->info);
+
+ pmu->pmu_enabled = true;
+}
+
+static void dmc_pmu_disable(struct ddr_pmu *pmu)
+{
+ if (pmu->pmu_enabled)
+ pmu->info.hw_info->disable(&pmu->info);
+
+ pmu->pmu_enabled = false;
+}
+
+static void meson_ddr_set_axi_filter(struct perf_event *event, u8 axi_id)
+{
+ struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+ int chann;
+
+ if (event->attr.config > ALL_CHAN_COUNTER_ID &&
+ event->attr.config < COUNTER_MAX_ID) {
+ chann = event->attr.config - CHAN1_COUNTER_ID;
+
+ pmu->info.hw_info->set_axi_filter(&pmu->info, axi_id, chann);
+ }
+}
+
+static void ddr_cnt_addition(struct dmc_counter *sum,
+ struct dmc_counter *add1,
+ struct dmc_counter *add2,
+ int chann_nr)
+{
+ int i;
+ u64 cnt1, cnt2;
+
+ sum->all_cnt = add1->all_cnt + add2->all_cnt;
+ sum->all_req = add1->all_req + add2->all_req;
+ for (i = 0; i < chann_nr; i++) {
+ cnt1 = add1->channel_cnt[i];
+ cnt2 = add2->channel_cnt[i];
+
+ sum->channel_cnt[i] = cnt1 + cnt2;
+ }
+}
+
+static void meson_ddr_perf_event_update(struct perf_event *event)
+{
+ struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+ u64 new_raw_count = 0;
+ struct dmc_counter dc = {0}, sum_dc = {0};
+ int idx;
+ int chann_nr = pmu->info.hw_info->chann_nr;
+
+ /* get the remain counters in register. */
+ pmu->info.hw_info->get_counters(&pmu->info, &dc);
+
+ ddr_cnt_addition(&sum_dc, &pmu->counters, &dc, chann_nr);
+
+ switch (event->attr.config) {
+ case ALL_CHAN_COUNTER_ID:
+ new_raw_count = sum_dc.all_cnt;
+ break;
+ case CHAN1_COUNTER_ID:
+ case CHAN2_COUNTER_ID:
+ case CHAN3_COUNTER_ID:
+ case CHAN4_COUNTER_ID:
+ case CHAN5_COUNTER_ID:
+ case CHAN6_COUNTER_ID:
+ case CHAN7_COUNTER_ID:
+ case CHAN8_COUNTER_ID:
+ idx = event->attr.config - CHAN1_COUNTER_ID;
+ new_raw_count = sum_dc.channel_cnt[idx];
+ break;
+ }
+
+ local64_set(&event->count, new_raw_count);
+}
+
+static int meson_ddr_perf_event_init(struct perf_event *event)
+{
+ struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+ u64 config1 = event->attr.config1;
+ u64 config2 = event->attr.config2;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+ return -EOPNOTSUPP;
+
+ if (event->cpu < 0)
+ return -EOPNOTSUPP;
+
+ /* check if the number of parameters is too much */
+ if (event->attr.config != ALL_CHAN_COUNTER_ID &&
+ hweight64(config1) + hweight64(config2) > MAX_AXI_PORTS_OF_CHANNEL)
+ return -EOPNOTSUPP;
+
+ event->cpu = pmu->cpu;
+
+ return 0;
+}
+
+static void meson_ddr_perf_event_start(struct perf_event *event, int flags)
+{
+ struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+
+ memset(&pmu->counters, 0, sizeof(pmu->counters));
+ dmc_pmu_enable(pmu);
+}
+
+static int meson_ddr_perf_event_add(struct perf_event *event, int flags)
+{
+ u64 config1 = event->attr.config1;
+ u64 config2 = event->attr.config2;
+ int i;
+
+ for_each_set_bit(i, (const unsigned long *)&config1, sizeof(config1))
+ meson_ddr_set_axi_filter(event, i);
+
+ for_each_set_bit(i, (const unsigned long *)&config2, sizeof(config2))
+ meson_ddr_set_axi_filter(event, i + 64);
+
+ if (flags & PERF_EF_START)
+ meson_ddr_perf_event_start(event, flags);
+
+ return 0;
+}
+
+static void meson_ddr_perf_event_stop(struct perf_event *event, int flags)
+{
+ struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+
+ if (flags & PERF_EF_UPDATE)
+ meson_ddr_perf_event_update(event);
+
+ dmc_pmu_disable(pmu);
+}
+
+static void meson_ddr_perf_event_del(struct perf_event *event, int flags)
+{
+ meson_ddr_perf_event_stop(event, PERF_EF_UPDATE);
+}
+
+static ssize_t meson_ddr_perf_cpumask_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ddr_pmu *pmu = dev_get_drvdata(dev);
+
+ return cpumap_print_to_pagebuf(true, buf, cpumask_of(pmu->cpu));
+}
+
+static struct device_attribute meson_ddr_perf_cpumask_attr =
+__ATTR(cpumask, 0444, meson_ddr_perf_cpumask_show, NULL);
+
+static struct attribute *meson_ddr_perf_cpumask_attrs[] = {
+ &meson_ddr_perf_cpumask_attr.attr,
+ NULL,
+};
+
+static const struct attribute_group ddr_perf_cpumask_attr_group = {
+ .attrs = meson_ddr_perf_cpumask_attrs,
+};
+
+static ssize_t
+pmu_event_show(struct device *dev, struct device_attribute *attr,
+ char *page)
+{
+ struct perf_pmu_events_attr *pmu_attr;
+
+ pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+ return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id);
+}
+
+static ssize_t
+event_show_unit(struct device *dev, struct device_attribute *attr,
+ char *page)
+{
+ return sysfs_emit(page, "MB\n");
+}
+
+static ssize_t
+event_show_scale(struct device *dev, struct device_attribute *attr,
+ char *page)
+{
+ /* one count = 16byte = 1.52587890625e-05 MB */
+ return sysfs_emit(page, "1.52587890625e-05\n");
+}
+
+#define AML_DDR_PMU_EVENT_ATTR(_name, _id) \
+{ \
+ .attr = __ATTR(_name, 0444, pmu_event_show, NULL), \
+ .id = _id, \
+}
+
+#define AML_DDR_PMU_EVENT_UNIT_ATTR(_name) \
+ __ATTR(_name.unit, 0444, event_show_unit, NULL)
+
+#define AML_DDR_PMU_EVENT_SCALE_ATTR(_name) \
+ __ATTR(_name.scale, 0444, event_show_scale, NULL)
+
+static struct device_attribute event_unit_attrs[] = {
+ AML_DDR_PMU_EVENT_UNIT_ATTR(total_rw_bytes),
+ AML_DDR_PMU_EVENT_UNIT_ATTR(chan_1_rw_bytes),
+ AML_DDR_PMU_EVENT_UNIT_ATTR(chan_2_rw_bytes),
+ AML_DDR_PMU_EVENT_UNIT_ATTR(chan_3_rw_bytes),
+ AML_DDR_PMU_EVENT_UNIT_ATTR(chan_4_rw_bytes),
+ AML_DDR_PMU_EVENT_UNIT_ATTR(chan_5_rw_bytes),
+ AML_DDR_PMU_EVENT_UNIT_ATTR(chan_6_rw_bytes),
+ AML_DDR_PMU_EVENT_UNIT_ATTR(chan_7_rw_bytes),
+ AML_DDR_PMU_EVENT_UNIT_ATTR(chan_8_rw_bytes),
+};
+
+static struct device_attribute event_scale_attrs[] = {
+ AML_DDR_PMU_EVENT_SCALE_ATTR(total_rw_bytes),
+ AML_DDR_PMU_EVENT_SCALE_ATTR(chan_1_rw_bytes),
+ AML_DDR_PMU_EVENT_SCALE_ATTR(chan_2_rw_bytes),
+ AML_DDR_PMU_EVENT_SCALE_ATTR(chan_3_rw_bytes),
+ AML_DDR_PMU_EVENT_SCALE_ATTR(chan_4_rw_bytes),
+ AML_DDR_PMU_EVENT_SCALE_ATTR(chan_5_rw_bytes),
+ AML_DDR_PMU_EVENT_SCALE_ATTR(chan_6_rw_bytes),
+ AML_DDR_PMU_EVENT_SCALE_ATTR(chan_7_rw_bytes),
+ AML_DDR_PMU_EVENT_SCALE_ATTR(chan_8_rw_bytes),
+};
+
+static struct perf_pmu_events_attr event_attrs[] = {
+ AML_DDR_PMU_EVENT_ATTR(total_rw_bytes, ALL_CHAN_COUNTER_ID),
+ AML_DDR_PMU_EVENT_ATTR(chan_1_rw_bytes, CHAN1_COUNTER_ID),
+ AML_DDR_PMU_EVENT_ATTR(chan_2_rw_bytes, CHAN2_COUNTER_ID),
+ AML_DDR_PMU_EVENT_ATTR(chan_3_rw_bytes, CHAN3_COUNTER_ID),
+ AML_DDR_PMU_EVENT_ATTR(chan_4_rw_bytes, CHAN4_COUNTER_ID),
+ AML_DDR_PMU_EVENT_ATTR(chan_5_rw_bytes, CHAN5_COUNTER_ID),
+ AML_DDR_PMU_EVENT_ATTR(chan_6_rw_bytes, CHAN6_COUNTER_ID),
+ AML_DDR_PMU_EVENT_ATTR(chan_7_rw_bytes, CHAN7_COUNTER_ID),
+ AML_DDR_PMU_EVENT_ATTR(chan_8_rw_bytes, CHAN8_COUNTER_ID),
+};
+
+/* three attrs are combined an event */
+static struct attribute *ddr_perf_events_attrs[COUNTER_MAX_ID * 3];
+
+static struct attribute_group ddr_perf_events_attr_group = {
+ .name = "events",
+ .attrs = ddr_perf_events_attrs,
+};
+
+static umode_t meson_ddr_perf_format_attr_visible(struct kobject *kobj,
+ struct attribute *attr,
+ int n)
+{
+ struct pmu *pmu = dev_get_drvdata(kobj_to_dev(kobj));
+ struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu);
+ const u64 *capability = ddr_pmu->info.hw_info->capability;
+ struct device_attribute *dev_attr;
+ int id;
+ char value[20]; // config1:xxx, 20 is enough
+
+ dev_attr = container_of(attr, struct device_attribute, attr);
+ dev_attr->show(NULL, NULL, value);
+
+ if (sscanf(value, "config1:%d", &id) == 1)
+ return capability[0] & (1ULL << id) ? attr->mode : 0;
+
+ if (sscanf(value, "config2:%d", &id) == 1)
+ return capability[1] & (1ULL << id) ? attr->mode : 0;
+
+ return attr->mode;
+}
+
+static struct attribute_group ddr_perf_format_attr_group = {
+ .name = "format",
+ .is_visible = meson_ddr_perf_format_attr_visible,
+};
+
+static ssize_t meson_ddr_perf_identifier_show(struct device *dev,
+ struct device_attribute *attr,
+ char *page)
+{
+ struct ddr_pmu *pmu = dev_get_drvdata(dev);
+
+ return sysfs_emit(page, "%s\n", pmu->name);
+}
+
+static struct device_attribute meson_ddr_perf_identifier_attr =
+__ATTR(identifier, 0444, meson_ddr_perf_identifier_show, NULL);
+
+static struct attribute *meson_ddr_perf_identifier_attrs[] = {
+ &meson_ddr_perf_identifier_attr.attr,
+ NULL,
+};
+
+static const struct attribute_group ddr_perf_identifier_attr_group = {
+ .attrs = meson_ddr_perf_identifier_attrs,
+};
+
+static const struct attribute_group *attr_groups[] = {
+ &ddr_perf_events_attr_group,
+ &ddr_perf_format_attr_group,
+ &ddr_perf_cpumask_attr_group,
+ &ddr_perf_identifier_attr_group,
+ NULL,
+};
+
+static irqreturn_t dmc_irq_handler(int irq, void *dev_id)
+{
+ struct dmc_info *info = dev_id;
+ struct ddr_pmu *pmu;
+ struct dmc_counter counters, *sum_cnter;
+ int i;
+
+ pmu = dmc_info_to_pmu(info);
+
+ if (info->hw_info->irq_handler(info, &counters) != 0)
+ goto out;
+
+ sum_cnter = &pmu->counters;
+ sum_cnter->all_cnt += counters.all_cnt;
+ sum_cnter->all_req += counters.all_req;
+
+ for (i = 0; i < pmu->info.hw_info->chann_nr; i++)
+ sum_cnter->channel_cnt[i] += counters.channel_cnt[i];
+
+ if (pmu->pmu_enabled)
+ /*
+ * the timer interrupt only supprt
+ * one shot mode, we have to re-enable
+ * it in ISR to support continue mode.
+ */
+ info->hw_info->enable(info);
+
+ dev_dbg(pmu->dev, "counts: %llu %llu %llu, %llu, %llu, %llu\t\t"
+ "sum: %llu %llu %llu, %llu, %llu, %llu\n",
+ counters.all_req,
+ counters.all_cnt,
+ counters.channel_cnt[0],
+ counters.channel_cnt[1],
+ counters.channel_cnt[2],
+ counters.channel_cnt[3],
+
+ pmu->counters.all_req,
+ pmu->counters.all_cnt,
+ pmu->counters.channel_cnt[0],
+ pmu->counters.channel_cnt[1],
+ pmu->counters.channel_cnt[2],
+ pmu->counters.channel_cnt[3]);
+out:
+ return IRQ_HANDLED;
+}
+
+static int ddr_perf_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+ struct ddr_pmu *pmu = hlist_entry_safe(node, struct ddr_pmu, node);
+ int target;
+
+ if (cpu != pmu->cpu)
+ return 0;
+
+ target = cpumask_any_but(cpu_online_mask, cpu);
+ if (target >= nr_cpu_ids)
+ return 0;
+
+ perf_pmu_migrate_context(&pmu->pmu, cpu, target);
+ pmu->cpu = target;
+
+ WARN_ON(irq_set_affinity(pmu->info.irq_num, cpumask_of(pmu->cpu)));
+
+ return 0;
+}
+
+static void fill_event_attr(struct ddr_pmu *pmu)
+{
+ int i, j, k;
+ struct attribute **dst = ddr_perf_events_attrs;
+
+ j = 0;
+ k = 0;
+
+ /* fill ALL_CHAN_COUNTER_ID event */
+ dst[j++] = &event_attrs[k].attr.attr;
+ dst[j++] = &event_unit_attrs[k].attr;
+ dst[j++] = &event_scale_attrs[k].attr;
+
+ k++;
+
+ /* fill each channel event */
+ for (i = 0; i < pmu->info.hw_info->chann_nr; i++, k++) {
+ dst[j++] = &event_attrs[k].attr.attr;
+ dst[j++] = &event_unit_attrs[k].attr;
+ dst[j++] = &event_scale_attrs[k].attr;
+ }
+
+ dst[j] = NULL; /* mark end */
+}
+
+static void fmt_attr_fill(struct attribute **fmt_attr)
+{
+ ddr_perf_format_attr_group.attrs = fmt_attr;
+}
+
+static int ddr_pmu_parse_dt(struct platform_device *pdev,
+ struct dmc_info *info)
+{
+ void __iomem *base;
+ int i, ret;
+
+ info->hw_info = of_device_get_match_data(&pdev->dev);
+
+ for (i = 0; i < info->hw_info->dmc_nr; i++) {
+ /* resource 0 for ddr register base */
+ base = devm_platform_ioremap_resource(pdev, i);
+ if (IS_ERR(base))
+ return PTR_ERR(base);
+
+ info->ddr_reg[i] = base;
+ }
+
+ /* resource i for pll register base */
+ base = devm_platform_ioremap_resource(pdev, i);
+ if (IS_ERR(base))
+ return PTR_ERR(base);
+
+ info->pll_reg = base;
+
+ ret = platform_get_irq(pdev, 0);
+ if (ret < 0)
+ return ret;
+
+ info->irq_num = ret;
+
+ ret = devm_request_irq(&pdev->dev, info->irq_num, dmc_irq_handler,
+ IRQF_NOBALANCING, dev_name(&pdev->dev),
+ (void *)info);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+int meson_ddr_pmu_create(struct platform_device *pdev)
+{
+ int ret;
+ char *name;
+ struct ddr_pmu *pmu;
+
+ pmu = devm_kzalloc(&pdev->dev, sizeof(struct ddr_pmu), GFP_KERNEL);
+ if (!pmu)
+ return -ENOMEM;
+
+ *pmu = (struct ddr_pmu) {
+ .pmu = {
+ .module = THIS_MODULE,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+ .task_ctx_nr = perf_invalid_context,
+ .attr_groups = attr_groups,
+ .event_init = meson_ddr_perf_event_init,
+ .add = meson_ddr_perf_event_add,
+ .del = meson_ddr_perf_event_del,
+ .start = meson_ddr_perf_event_start,
+ .stop = meson_ddr_perf_event_stop,
+ .read = meson_ddr_perf_event_update,
+ },
+ };
+
+ ret = ddr_pmu_parse_dt(pdev, &pmu->info);
+ if (ret < 0)
+ return ret;
+
+ fmt_attr_fill(pmu->info.hw_info->fmt_attr);
+
+ pmu->cpu = smp_processor_id();
+
+ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, DDR_PERF_DEV_NAME);
+ if (!name)
+ return -ENOMEM;
+
+ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, name, NULL,
+ ddr_perf_offline_cpu);
+ if (ret < 0)
+ return ret;
+
+ pmu->cpuhp_state = ret;
+
+ /* Register the pmu instance for cpu hotplug */
+ ret = cpuhp_state_add_instance_nocalls(pmu->cpuhp_state, &pmu->node);
+ if (ret)
+ goto cpuhp_instance_err;
+
+ fill_event_attr(pmu);
+
+ ret = perf_pmu_register(&pmu->pmu, name, -1);
+ if (ret)
+ goto pmu_register_err;
+
+ pmu->name = name;
+ pmu->dev = &pdev->dev;
+ pmu->pmu_enabled = false;
+
+ platform_set_drvdata(pdev, pmu);
+
+ return 0;
+
+pmu_register_err:
+ cpuhp_state_remove_instance_nocalls(pmu->cpuhp_state, &pmu->node);
+
+cpuhp_instance_err:
+ cpuhp_remove_state(pmu->cpuhp_state);
+
+ return ret;
+}
+
+int meson_ddr_pmu_remove(struct platform_device *pdev)
+{
+ struct ddr_pmu *pmu = platform_get_drvdata(pdev);
+
+ perf_pmu_unregister(&pmu->pmu);
+ cpuhp_state_remove_instance_nocalls(pmu->cpuhp_state, &pmu->node);
+ cpuhp_remove_state(pmu->cpuhp_state);
+
+ return 0;
+}
diff --git a/drivers/perf/amlogic/meson_g12_ddr_pmu.c b/drivers/perf/amlogic/meson_g12_ddr_pmu.c
new file mode 100644
index 000000000000..a78fdb15e26c
--- /dev/null
+++ b/drivers/perf/amlogic/meson_g12_ddr_pmu.c
@@ -0,0 +1,394 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2022 Amlogic, Inc. All rights reserved.
+ */
+
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/printk.h>
+#include <linux/types.h>
+
+#include <soc/amlogic/meson_ddr_pmu.h>
+
+#define PORT_MAJOR 32
+#define DEFAULT_XTAL_FREQ 24000000UL
+
+#define DMC_QOS_IRQ BIT(30)
+
+/* DMC bandwidth monitor register address offset */
+#define DMC_MON_G12_CTRL0 (0x20 << 2)
+#define DMC_MON_G12_CTRL1 (0x21 << 2)
+#define DMC_MON_G12_CTRL2 (0x22 << 2)
+#define DMC_MON_G12_CTRL3 (0x23 << 2)
+#define DMC_MON_G12_CTRL4 (0x24 << 2)
+#define DMC_MON_G12_CTRL5 (0x25 << 2)
+#define DMC_MON_G12_CTRL6 (0x26 << 2)
+#define DMC_MON_G12_CTRL7 (0x27 << 2)
+#define DMC_MON_G12_CTRL8 (0x28 << 2)
+
+#define DMC_MON_G12_ALL_REQ_CNT (0x29 << 2)
+#define DMC_MON_G12_ALL_GRANT_CNT (0x2a << 2)
+#define DMC_MON_G12_ONE_GRANT_CNT (0x2b << 2)
+#define DMC_MON_G12_SEC_GRANT_CNT (0x2c << 2)
+#define DMC_MON_G12_THD_GRANT_CNT (0x2d << 2)
+#define DMC_MON_G12_FOR_GRANT_CNT (0x2e << 2)
+#define DMC_MON_G12_TIMER (0x2f << 2)
+
+/* Each bit represent a axi line */
+PMU_FORMAT_ATTR(event, "config:0-7");
+PMU_FORMAT_ATTR(arm, "config1:0");
+PMU_FORMAT_ATTR(gpu, "config1:1");
+PMU_FORMAT_ATTR(pcie, "config1:2");
+PMU_FORMAT_ATTR(hdcp, "config1:3");
+PMU_FORMAT_ATTR(hevc_front, "config1:4");
+PMU_FORMAT_ATTR(usb3_0, "config1:6");
+PMU_FORMAT_ATTR(device, "config1:7");
+PMU_FORMAT_ATTR(hevc_back, "config1:8");
+PMU_FORMAT_ATTR(h265enc, "config1:9");
+PMU_FORMAT_ATTR(vpu_read1, "config1:16");
+PMU_FORMAT_ATTR(vpu_read2, "config1:17");
+PMU_FORMAT_ATTR(vpu_read3, "config1:18");
+PMU_FORMAT_ATTR(vpu_write1, "config1:19");
+PMU_FORMAT_ATTR(vpu_write2, "config1:20");
+PMU_FORMAT_ATTR(vdec, "config1:21");
+PMU_FORMAT_ATTR(hcodec, "config1:22");
+PMU_FORMAT_ATTR(ge2d, "config1:23");
+
+PMU_FORMAT_ATTR(spicc1, "config1:32");
+PMU_FORMAT_ATTR(usb0, "config1:33");
+PMU_FORMAT_ATTR(dma, "config1:34");
+PMU_FORMAT_ATTR(arb0, "config1:35");
+PMU_FORMAT_ATTR(sd_emmc_b, "config1:36");
+PMU_FORMAT_ATTR(usb1, "config1:37");
+PMU_FORMAT_ATTR(audio, "config1:38");
+PMU_FORMAT_ATTR(aififo, "config1:39");
+PMU_FORMAT_ATTR(parser, "config1:41");
+PMU_FORMAT_ATTR(ao_cpu, "config1:42");
+PMU_FORMAT_ATTR(sd_emmc_c, "config1:43");
+PMU_FORMAT_ATTR(spicc2, "config1:44");
+PMU_FORMAT_ATTR(ethernet, "config1:45");
+PMU_FORMAT_ATTR(sana, "config1:46");
+
+/* for sm1 and g12b */
+PMU_FORMAT_ATTR(nna, "config1:10");
+
+/* for g12b only */
+PMU_FORMAT_ATTR(gdc, "config1:11");
+PMU_FORMAT_ATTR(mipi_isp, "config1:12");
+PMU_FORMAT_ATTR(arm1, "config1:13");
+PMU_FORMAT_ATTR(sd_emmc_a, "config1:40");
+
+static struct attribute *g12_pmu_format_attrs[] = {
+ &format_attr_event.attr,
+ &format_attr_arm.attr,
+ &format_attr_gpu.attr,
+ &format_attr_nna.attr,
+ &format_attr_gdc.attr,
+ &format_attr_arm1.attr,
+ &format_attr_mipi_isp.attr,
+ &format_attr_sd_emmc_a.attr,
+ &format_attr_pcie.attr,
+ &format_attr_hdcp.attr,
+ &format_attr_hevc_front.attr,
+ &format_attr_usb3_0.attr,
+ &format_attr_device.attr,
+ &format_attr_hevc_back.attr,
+ &format_attr_h265enc.attr,
+ &format_attr_vpu_read1.attr,
+ &format_attr_vpu_read2.attr,
+ &format_attr_vpu_read3.attr,
+ &format_attr_vpu_write1.attr,
+ &format_attr_vpu_write2.attr,
+ &format_attr_vdec.attr,
+ &format_attr_hcodec.attr,
+ &format_attr_ge2d.attr,
+ &format_attr_spicc1.attr,
+ &format_attr_usb0.attr,
+ &format_attr_dma.attr,
+ &format_attr_arb0.attr,
+ &format_attr_sd_emmc_b.attr,
+ &format_attr_usb1.attr,
+ &format_attr_audio.attr,
+ &format_attr_aififo.attr,
+ &format_attr_parser.attr,
+ &format_attr_ao_cpu.attr,
+ &format_attr_sd_emmc_c.attr,
+ &format_attr_spicc2.attr,
+ &format_attr_ethernet.attr,
+ &format_attr_sana.attr,
+ NULL,
+};
+
+/* calculate ddr clock */
+static unsigned long dmc_g12_get_freq_quick(struct dmc_info *info)
+{
+ unsigned int val;
+ unsigned int n, m, od1;
+ unsigned int od_div = 0xfff;
+ unsigned long freq = 0;
+
+ val = readl(info->pll_reg);
+ val = val & 0xfffff;
+ switch ((val >> 16) & 7) {
+ case 0:
+ od_div = 2;
+ break;
+
+ case 1:
+ od_div = 3;
+ break;
+
+ case 2:
+ od_div = 4;
+ break;
+
+ case 3:
+ od_div = 6;
+ break;
+
+ case 4:
+ od_div = 8;
+ break;
+
+ default:
+ break;
+ }
+
+ m = val & 0x1ff;
+ n = ((val >> 10) & 0x1f);
+ od1 = (((val >> 19) & 0x1)) == 1 ? 2 : 1;
+ freq = DEFAULT_XTAL_FREQ / 1000; /* avoid overflow */
+ if (n)
+ freq = ((((freq * m) / n) >> od1) / od_div) * 1000;
+
+ return freq;
+}
+
+#ifdef DEBUG
+static void g12_dump_reg(struct dmc_info *db)
+{
+ int s = 0, i;
+ unsigned int r;
+
+ for (i = 0; i < 9; i++) {
+ r = readl(db->ddr_reg[0] + (DMC_MON_G12_CTRL0 + (i << 2)));
+ pr_notice("DMC_MON_CTRL%d: %08x\n", i, r);
+ }
+ r = readl(db->ddr_reg[0] + DMC_MON_G12_ALL_REQ_CNT);
+ pr_notice("DMC_MON_ALL_REQ_CNT: %08x\n", r);
+ r = readl(db->ddr_reg[0] + DMC_MON_G12_ALL_GRANT_CNT);
+ pr_notice("DMC_MON_ALL_GRANT_CNT:%08x\n", r);
+ r = readl(db->ddr_reg[0] + DMC_MON_G12_ONE_GRANT_CNT);
+ pr_notice("DMC_MON_ONE_GRANT_CNT:%08x\n", r);
+ r = readl(db->ddr_reg[0] + DMC_MON_G12_SEC_GRANT_CNT);
+ pr_notice("DMC_MON_SEC_GRANT_CNT:%08x\n", r);
+ r = readl(db->ddr_reg[0] + DMC_MON_G12_THD_GRANT_CNT);
+ pr_notice("DMC_MON_THD_GRANT_CNT:%08x\n", r);
+ r = readl(db->ddr_reg[0] + DMC_MON_G12_FOR_GRANT_CNT);
+ pr_notice("DMC_MON_FOR_GRANT_CNT:%08x\n", r);
+ r = readl(db->ddr_reg[0] + DMC_MON_G12_TIMER);
+ pr_notice("DMC_MON_TIMER: %08x\n", r);
+}
+#endif
+
+static void dmc_g12_counter_enable(struct dmc_info *info)
+{
+ unsigned int val;
+ unsigned long clock_count = dmc_g12_get_freq_quick(info) / 10; /* 100ms */
+
+ writel(clock_count, info->ddr_reg[0] + DMC_MON_G12_TIMER);
+
+ val = readl(info->ddr_reg[0] + DMC_MON_G12_CTRL0);
+
+ /* enable all channel */
+ val = BIT(31) | /* enable bit */
+ BIT(20) | /* use timer */
+ 0x0f; /* 4 channels */
+
+ writel(val, info->ddr_reg[0] + DMC_MON_G12_CTRL0);
+
+#ifdef DEBUG
+ g12_dump_reg(info);
+#endif
+}
+
+static void dmc_g12_config_fiter(struct dmc_info *info,
+ int port, int channel)
+{
+ u32 val;
+ u32 rp[MAX_CHANNEL_NUM] = {DMC_MON_G12_CTRL1, DMC_MON_G12_CTRL3,
+ DMC_MON_G12_CTRL5, DMC_MON_G12_CTRL7};
+ u32 rs[MAX_CHANNEL_NUM] = {DMC_MON_G12_CTRL2, DMC_MON_G12_CTRL4,
+ DMC_MON_G12_CTRL6, DMC_MON_G12_CTRL8};
+ int subport = -1;
+
+ /* clear all port mask */
+ if (port < 0) {
+ writel(0, info->ddr_reg[0] + rp[channel]);
+ writel(0, info->ddr_reg[0] + rs[channel]);
+ return;
+ }
+
+ if (port >= PORT_MAJOR)
+ subport = port - PORT_MAJOR;
+
+ if (subport < 0) {
+ val = readl(info->ddr_reg[0] + rp[channel]);
+ val |= (1 << port);
+ writel(val, info->ddr_reg[0] + rp[channel]);
+ val = 0xffff;
+ writel(val, info->ddr_reg[0] + rs[channel]);
+ } else {
+ val = BIT(23); /* select device */
+ writel(val, info->ddr_reg[0] + rp[channel]);
+ val = readl(info->ddr_reg[0] + rs[channel]);
+ val |= (1 << subport);
+ writel(val, info->ddr_reg[0] + rs[channel]);
+ }
+}
+
+static void dmc_g12_set_axi_filter(struct dmc_info *info, int axi_id, int channel)
+{
+ if (channel > info->hw_info->chann_nr)
+ return;
+
+ dmc_g12_config_fiter(info, axi_id, channel);
+}
+
+static void dmc_g12_counter_disable(struct dmc_info *info)
+{
+ int i;
+
+ /* clear timer */
+ writel(0, info->ddr_reg[0] + DMC_MON_G12_CTRL0);
+ writel(0, info->ddr_reg[0] + DMC_MON_G12_TIMER);
+
+ writel(0, info->ddr_reg[0] + DMC_MON_G12_ALL_REQ_CNT);
+ writel(0, info->ddr_reg[0] + DMC_MON_G12_ALL_GRANT_CNT);
+ writel(0, info->ddr_reg[0] + DMC_MON_G12_ONE_GRANT_CNT);
+ writel(0, info->ddr_reg[0] + DMC_MON_G12_SEC_GRANT_CNT);
+ writel(0, info->ddr_reg[0] + DMC_MON_G12_THD_GRANT_CNT);
+ writel(0, info->ddr_reg[0] + DMC_MON_G12_FOR_GRANT_CNT);
+
+ /* clear port channel mapping */
+ for (i = 0; i < info->hw_info->chann_nr; i++)
+ dmc_g12_config_fiter(info, -1, i);
+}
+
+static void dmc_g12_get_counters(struct dmc_info *info,
+ struct dmc_counter *counter)
+{
+ int i;
+ unsigned int reg;
+
+ counter->all_cnt = readl(info->ddr_reg[0] + DMC_MON_G12_ALL_GRANT_CNT);
+ counter->all_req = readl(info->ddr_reg[0] + DMC_MON_G12_ALL_REQ_CNT);
+
+ for (i = 0; i < info->hw_info->chann_nr; i++) {
+ reg = DMC_MON_G12_ONE_GRANT_CNT + (i << 2);
+ counter->channel_cnt[i] = readl(info->ddr_reg[0] + reg);
+ }
+}
+
+static int dmc_g12_irq_handler(struct dmc_info *info,
+ struct dmc_counter *counter)
+{
+ unsigned int val;
+ int ret = -EINVAL;
+
+ val = readl(info->ddr_reg[0] + DMC_MON_G12_CTRL0);
+ if (val & DMC_QOS_IRQ) {
+ dmc_g12_get_counters(info, counter);
+ /* clear irq flags */
+ writel(val, info->ddr_reg[0] + DMC_MON_G12_CTRL0);
+ ret = 0;
+ }
+ return ret;
+}
+
+static const struct dmc_hw_info g12a_dmc_info = {
+ .enable = dmc_g12_counter_enable,
+ .disable = dmc_g12_counter_disable,
+ .irq_handler = dmc_g12_irq_handler,
+ .get_counters = dmc_g12_get_counters,
+ .set_axi_filter = dmc_g12_set_axi_filter,
+
+ .dmc_nr = 1,
+ .chann_nr = 4,
+ .capability = {0X7EFF00FF03DF, 0},
+ .fmt_attr = g12_pmu_format_attrs,
+};
+
+static const struct dmc_hw_info g12b_dmc_info = {
+ .enable = dmc_g12_counter_enable,
+ .disable = dmc_g12_counter_disable,
+ .irq_handler = dmc_g12_irq_handler,
+ .get_counters = dmc_g12_get_counters,
+ .set_axi_filter = dmc_g12_set_axi_filter,
+
+ .dmc_nr = 1,
+ .chann_nr = 4,
+ .capability = {0X7FFF00FF3FDF, 0},
+ .fmt_attr = g12_pmu_format_attrs,
+};
+
+static const struct dmc_hw_info sm1_dmc_info = {
+ .enable = dmc_g12_counter_enable,
+ .disable = dmc_g12_counter_disable,
+ .irq_handler = dmc_g12_irq_handler,
+ .get_counters = dmc_g12_get_counters,
+ .set_axi_filter = dmc_g12_set_axi_filter,
+
+ .dmc_nr = 1,
+ .chann_nr = 4,
+ .capability = {0X7EFF00FF07DF, 0},
+ .fmt_attr = g12_pmu_format_attrs,
+};
+
+static int g12_ddr_pmu_probe(struct platform_device *pdev)
+{
+ return meson_ddr_pmu_create(pdev);
+}
+
+static int g12_ddr_pmu_remove(struct platform_device *pdev)
+{
+ meson_ddr_pmu_remove(pdev);
+
+ return 0;
+}
+
+static const struct of_device_id meson_ddr_pmu_dt_match[] = {
+ {
+ .compatible = "amlogic,g12a-ddr-pmu",
+ .data = &g12a_dmc_info,
+ },
+ {
+ .compatible = "amlogic,g12b-ddr-pmu",
+ .data = &g12b_dmc_info,
+ },
+ {
+ .compatible = "amlogic,sm1-ddr-pmu",
+ .data = &sm1_dmc_info,
+ },
+ {}
+};
+
+static struct platform_driver g12_ddr_pmu_driver = {
+ .probe = g12_ddr_pmu_probe,
+ .remove = g12_ddr_pmu_remove,
+
+ .driver = {
+ .name = "meson-g12-ddr-pmu",
+ .of_match_table = meson_ddr_pmu_dt_match,
+ },
+};
+
+module_platform_driver(g12_ddr_pmu_driver);
+MODULE_AUTHOR("Jiucheng Xu");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Amlogic G12 series SoC DDR PMU");
diff --git a/drivers/perf/arm_cspmu/Kconfig b/drivers/perf/arm_cspmu/Kconfig
new file mode 100644
index 000000000000..0b316fe69a45
--- /dev/null
+++ b/drivers/perf/arm_cspmu/Kconfig
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+config ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
+ tristate "ARM Coresight Architecture PMU"
+ depends on ARM64 && ACPI
+ depends on ACPI_APMT || COMPILE_TEST
+ help
+ Provides support for performance monitoring unit (PMU) devices
+ based on ARM CoreSight PMU architecture. Note that this PMU
+ architecture does not have relationship with the ARM CoreSight
+ Self-Hosted Tracing.
diff --git a/drivers/perf/arm_cspmu/Makefile b/drivers/perf/arm_cspmu/Makefile
new file mode 100644
index 000000000000..fedb17df982d
--- /dev/null
+++ b/drivers/perf/arm_cspmu/Makefile
@@ -0,0 +1,6 @@
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu_module.o
+arm_cspmu_module-y := arm_cspmu.o nvidia_cspmu.o
diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c
new file mode 100644
index 000000000000..e31302ab7e37
--- /dev/null
+++ b/drivers/perf/arm_cspmu/arm_cspmu.c
@@ -0,0 +1,1303 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM CoreSight Architecture PMU driver.
+ *
+ * This driver adds support for uncore PMU based on ARM CoreSight Performance
+ * Monitoring Unit Architecture. The PMU is accessible via MMIO registers and
+ * like other uncore PMUs, it does not support process specific events and
+ * cannot be used in sampling mode.
+ *
+ * This code is based on other uncore PMUs like ARM DSU PMU. It provides a
+ * generic implementation to operate the PMU according to CoreSight PMU
+ * architecture and ACPI ARM PMU table (APMT) documents below:
+ * - ARM CoreSight PMU architecture document number: ARM IHI 0091 A.a-00bet0.
+ * - APMT document number: ARM DEN0117.
+ *
+ * The user should refer to the vendor technical documentation to get details
+ * about the supported events.
+ *
+ * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ *
+ */
+
+#include <linux/acpi.h>
+#include <linux/cacheinfo.h>
+#include <linux/ctype.h>
+#include <linux/interrupt.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/module.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <acpi/processor.h>
+
+#include "arm_cspmu.h"
+#include "nvidia_cspmu.h"
+
+#define PMUNAME "arm_cspmu"
+#define DRVNAME "arm-cs-arch-pmu"
+
+#define ARM_CSPMU_CPUMASK_ATTR(_name, _config) \
+ ARM_CSPMU_EXT_ATTR(_name, arm_cspmu_cpumask_show, \
+ (unsigned long)_config)
+
+/*
+ * CoreSight PMU Arch register offsets.
+ */
+#define PMEVCNTR_LO 0x0
+#define PMEVCNTR_HI 0x4
+#define PMEVTYPER 0x400
+#define PMCCFILTR 0x47C
+#define PMEVFILTR 0xA00
+#define PMCNTENSET 0xC00
+#define PMCNTENCLR 0xC20
+#define PMINTENSET 0xC40
+#define PMINTENCLR 0xC60
+#define PMOVSCLR 0xC80
+#define PMOVSSET 0xCC0
+#define PMCFGR 0xE00
+#define PMCR 0xE04
+#define PMIIDR 0xE08
+
+/* PMCFGR register field */
+#define PMCFGR_NCG GENMASK(31, 28)
+#define PMCFGR_HDBG BIT(24)
+#define PMCFGR_TRO BIT(23)
+#define PMCFGR_SS BIT(22)
+#define PMCFGR_FZO BIT(21)
+#define PMCFGR_MSI BIT(20)
+#define PMCFGR_UEN BIT(19)
+#define PMCFGR_NA BIT(17)
+#define PMCFGR_EX BIT(16)
+#define PMCFGR_CCD BIT(15)
+#define PMCFGR_CC BIT(14)
+#define PMCFGR_SIZE GENMASK(13, 8)
+#define PMCFGR_N GENMASK(7, 0)
+
+/* PMCR register field */
+#define PMCR_TRO BIT(11)
+#define PMCR_HDBG BIT(10)
+#define PMCR_FZO BIT(9)
+#define PMCR_NA BIT(8)
+#define PMCR_DP BIT(5)
+#define PMCR_X BIT(4)
+#define PMCR_D BIT(3)
+#define PMCR_C BIT(2)
+#define PMCR_P BIT(1)
+#define PMCR_E BIT(0)
+
+/* Each SET/CLR register supports up to 32 counters. */
+#define ARM_CSPMU_SET_CLR_COUNTER_SHIFT 5
+#define ARM_CSPMU_SET_CLR_COUNTER_NUM \
+ (1 << ARM_CSPMU_SET_CLR_COUNTER_SHIFT)
+
+/* Convert counter idx into SET/CLR register number. */
+#define COUNTER_TO_SET_CLR_ID(idx) \
+ (idx >> ARM_CSPMU_SET_CLR_COUNTER_SHIFT)
+
+/* Convert counter idx into SET/CLR register bit. */
+#define COUNTER_TO_SET_CLR_BIT(idx) \
+ (idx & (ARM_CSPMU_SET_CLR_COUNTER_NUM - 1))
+
+#define ARM_CSPMU_ACTIVE_CPU_MASK 0x0
+#define ARM_CSPMU_ASSOCIATED_CPU_MASK 0x1
+
+/* Check if field f in flags is set with value v */
+#define CHECK_APMT_FLAG(flags, f, v) \
+ ((flags & (ACPI_APMT_FLAGS_ ## f)) == (ACPI_APMT_FLAGS_ ## f ## _ ## v))
+
+/* Check and use default if implementer doesn't provide attribute callback */
+#define CHECK_DEFAULT_IMPL_OPS(ops, callback) \
+ do { \
+ if (!ops->callback) \
+ ops->callback = arm_cspmu_ ## callback; \
+ } while (0)
+
+/*
+ * Maximum poll count for reading counter value using high-low-high sequence.
+ */
+#define HILOHI_MAX_POLL 1000
+
+/* JEDEC-assigned JEP106 identification code */
+#define ARM_CSPMU_IMPL_ID_NVIDIA 0x36B
+
+static unsigned long arm_cspmu_cpuhp_state;
+
+/*
+ * In CoreSight PMU architecture, all of the MMIO registers are 32-bit except
+ * counter register. The counter register can be implemented as 32-bit or 64-bit
+ * register depending on the value of PMCFGR.SIZE field. For 64-bit access,
+ * single-copy 64-bit atomic support is implementation defined. APMT node flag
+ * is used to identify if the PMU supports 64-bit single copy atomic. If 64-bit
+ * single copy atomic is not supported, the driver treats the register as a pair
+ * of 32-bit register.
+ */
+
+/*
+ * Read 64-bit register as a pair of 32-bit registers using hi-lo-hi sequence.
+ */
+static u64 read_reg64_hilohi(const void __iomem *addr, u32 max_poll_count)
+{
+ u32 val_lo, val_hi;
+ u64 val;
+
+ /* Use high-low-high sequence to avoid tearing */
+ do {
+ if (max_poll_count-- == 0) {
+ pr_err("ARM CSPMU: timeout hi-low-high sequence\n");
+ return 0;
+ }
+
+ val_hi = readl(addr + 4);
+ val_lo = readl(addr);
+ } while (val_hi != readl(addr + 4));
+
+ val = (((u64)val_hi << 32) | val_lo);
+
+ return val;
+}
+
+/* Check if PMU supports 64-bit single copy atomic. */
+static inline bool supports_64bit_atomics(const struct arm_cspmu *cspmu)
+{
+ return CHECK_APMT_FLAG(cspmu->apmt_node->flags, ATOMIC, SUPP);
+}
+
+/* Check if cycle counter is supported. */
+static inline bool supports_cycle_counter(const struct arm_cspmu *cspmu)
+{
+ return (cspmu->pmcfgr & PMCFGR_CC);
+}
+
+/* Get counter size, which is (PMCFGR_SIZE + 1). */
+static inline u32 counter_size(const struct arm_cspmu *cspmu)
+{
+ return FIELD_GET(PMCFGR_SIZE, cspmu->pmcfgr) + 1;
+}
+
+/* Get counter mask. */
+static inline u64 counter_mask(const struct arm_cspmu *cspmu)
+{
+ return GENMASK_ULL(counter_size(cspmu) - 1, 0);
+}
+
+/* Check if counter is implemented as 64-bit register. */
+static inline bool use_64b_counter_reg(const struct arm_cspmu *cspmu)
+{
+ return (counter_size(cspmu) > 32);
+}
+
+ssize_t arm_cspmu_sysfs_event_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct dev_ext_attribute *eattr =
+ container_of(attr, struct dev_ext_attribute, attr);
+ return sysfs_emit(buf, "event=0x%llx\n",
+ (unsigned long long)eattr->var);
+}
+EXPORT_SYMBOL_GPL(arm_cspmu_sysfs_event_show);
+
+/* Default event list. */
+static struct attribute *arm_cspmu_event_attrs[] = {
+ ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
+ NULL,
+};
+
+static struct attribute **
+arm_cspmu_get_event_attrs(const struct arm_cspmu *cspmu)
+{
+ struct attribute **attrs;
+
+ attrs = devm_kmemdup(cspmu->dev, arm_cspmu_event_attrs,
+ sizeof(arm_cspmu_event_attrs), GFP_KERNEL);
+
+ return attrs;
+}
+
+static umode_t
+arm_cspmu_event_attr_is_visible(struct kobject *kobj,
+ struct attribute *attr, int unused)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct arm_cspmu *cspmu = to_arm_cspmu(dev_get_drvdata(dev));
+ struct perf_pmu_events_attr *eattr;
+
+ eattr = container_of(attr, typeof(*eattr), attr.attr);
+
+ /* Hide cycle event if not supported */
+ if (!supports_cycle_counter(cspmu) &&
+ eattr->id == ARM_CSPMU_EVT_CYCLES_DEFAULT)
+ return 0;
+
+ return attr->mode;
+}
+
+ssize_t arm_cspmu_sysfs_format_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct dev_ext_attribute *eattr =
+ container_of(attr, struct dev_ext_attribute, attr);
+ return sysfs_emit(buf, "%s\n", (char *)eattr->var);
+}
+EXPORT_SYMBOL_GPL(arm_cspmu_sysfs_format_show);
+
+static struct attribute *arm_cspmu_format_attrs[] = {
+ ARM_CSPMU_FORMAT_EVENT_ATTR,
+ ARM_CSPMU_FORMAT_FILTER_ATTR,
+ NULL,
+};
+
+static struct attribute **
+arm_cspmu_get_format_attrs(const struct arm_cspmu *cspmu)
+{
+ struct attribute **attrs;
+
+ attrs = devm_kmemdup(cspmu->dev, arm_cspmu_format_attrs,
+ sizeof(arm_cspmu_format_attrs), GFP_KERNEL);
+
+ return attrs;
+}
+
+static u32 arm_cspmu_event_type(const struct perf_event *event)
+{
+ return event->attr.config & ARM_CSPMU_EVENT_MASK;
+}
+
+static bool arm_cspmu_is_cycle_counter_event(const struct perf_event *event)
+{
+ return (event->attr.config == ARM_CSPMU_EVT_CYCLES_DEFAULT);
+}
+
+static u32 arm_cspmu_event_filter(const struct perf_event *event)
+{
+ return event->attr.config1 & ARM_CSPMU_FILTER_MASK;
+}
+
+static ssize_t arm_cspmu_identifier_show(struct device *dev,
+ struct device_attribute *attr,
+ char *page)
+{
+ struct arm_cspmu *cspmu = to_arm_cspmu(dev_get_drvdata(dev));
+
+ return sysfs_emit(page, "%s\n", cspmu->identifier);
+}
+
+static struct device_attribute arm_cspmu_identifier_attr =
+ __ATTR(identifier, 0444, arm_cspmu_identifier_show, NULL);
+
+static struct attribute *arm_cspmu_identifier_attrs[] = {
+ &arm_cspmu_identifier_attr.attr,
+ NULL,
+};
+
+static struct attribute_group arm_cspmu_identifier_attr_group = {
+ .attrs = arm_cspmu_identifier_attrs,
+};
+
+static const char *arm_cspmu_get_identifier(const struct arm_cspmu *cspmu)
+{
+ const char *identifier =
+ devm_kasprintf(cspmu->dev, GFP_KERNEL, "%x",
+ cspmu->impl.pmiidr);
+ return identifier;
+}
+
+static const char *arm_cspmu_type_str[ACPI_APMT_NODE_TYPE_COUNT] = {
+ "mc",
+ "smmu",
+ "pcie",
+ "acpi",
+ "cache",
+};
+
+static const char *arm_cspmu_get_name(const struct arm_cspmu *cspmu)
+{
+ struct device *dev;
+ struct acpi_apmt_node *apmt_node;
+ u8 pmu_type;
+ char *name;
+ char acpi_hid_string[ACPI_ID_LEN] = { 0 };
+ static atomic_t pmu_idx[ACPI_APMT_NODE_TYPE_COUNT] = { 0 };
+
+ dev = cspmu->dev;
+ apmt_node = cspmu->apmt_node;
+ pmu_type = apmt_node->type;
+
+ if (pmu_type >= ACPI_APMT_NODE_TYPE_COUNT) {
+ dev_err(dev, "unsupported PMU type-%u\n", pmu_type);
+ return NULL;
+ }
+
+ if (pmu_type == ACPI_APMT_NODE_TYPE_ACPI) {
+ memcpy(acpi_hid_string,
+ &apmt_node->inst_primary,
+ sizeof(apmt_node->inst_primary));
+ name = devm_kasprintf(dev, GFP_KERNEL, "%s_%s_%s_%u", PMUNAME,
+ arm_cspmu_type_str[pmu_type],
+ acpi_hid_string,
+ apmt_node->inst_secondary);
+ } else {
+ name = devm_kasprintf(dev, GFP_KERNEL, "%s_%s_%d", PMUNAME,
+ arm_cspmu_type_str[pmu_type],
+ atomic_fetch_inc(&pmu_idx[pmu_type]));
+ }
+
+ return name;
+}
+
+static ssize_t arm_cspmu_cpumask_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct pmu *pmu = dev_get_drvdata(dev);
+ struct arm_cspmu *cspmu = to_arm_cspmu(pmu);
+ struct dev_ext_attribute *eattr =
+ container_of(attr, struct dev_ext_attribute, attr);
+ unsigned long mask_id = (unsigned long)eattr->var;
+ const cpumask_t *cpumask;
+
+ switch (mask_id) {
+ case ARM_CSPMU_ACTIVE_CPU_MASK:
+ cpumask = &cspmu->active_cpu;
+ break;
+ case ARM_CSPMU_ASSOCIATED_CPU_MASK:
+ cpumask = &cspmu->associated_cpus;
+ break;
+ default:
+ return 0;
+ }
+ return cpumap_print_to_pagebuf(true, buf, cpumask);
+}
+
+static struct attribute *arm_cspmu_cpumask_attrs[] = {
+ ARM_CSPMU_CPUMASK_ATTR(cpumask, ARM_CSPMU_ACTIVE_CPU_MASK),
+ ARM_CSPMU_CPUMASK_ATTR(associated_cpus, ARM_CSPMU_ASSOCIATED_CPU_MASK),
+ NULL,
+};
+
+static struct attribute_group arm_cspmu_cpumask_attr_group = {
+ .attrs = arm_cspmu_cpumask_attrs,
+};
+
+struct impl_match {
+ u32 pmiidr;
+ u32 mask;
+ int (*impl_init_ops)(struct arm_cspmu *cspmu);
+};
+
+static const struct impl_match impl_match[] = {
+ {
+ .pmiidr = ARM_CSPMU_IMPL_ID_NVIDIA,
+ .mask = ARM_CSPMU_PMIIDR_IMPLEMENTER,
+ .impl_init_ops = nv_cspmu_init_ops
+ },
+ {}
+};
+
+static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu)
+{
+ int ret;
+ struct acpi_apmt_node *apmt_node = cspmu->apmt_node;
+ struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
+ const struct impl_match *match = impl_match;
+
+ /*
+ * Get PMU implementer and product id from APMT node.
+ * If APMT node doesn't have implementer/product id, try get it
+ * from PMIIDR.
+ */
+ cspmu->impl.pmiidr =
+ (apmt_node->impl_id) ? apmt_node->impl_id :
+ readl(cspmu->base0 + PMIIDR);
+
+ /* Find implementer specific attribute ops. */
+ for (; match->pmiidr; match++) {
+ const u32 mask = match->mask;
+
+ if ((match->pmiidr & mask) == (cspmu->impl.pmiidr & mask)) {
+ ret = match->impl_init_ops(cspmu);
+ if (ret)
+ return ret;
+
+ break;
+ }
+ }
+
+ /* Use default callbacks if implementer doesn't provide one. */
+ CHECK_DEFAULT_IMPL_OPS(impl_ops, get_event_attrs);
+ CHECK_DEFAULT_IMPL_OPS(impl_ops, get_format_attrs);
+ CHECK_DEFAULT_IMPL_OPS(impl_ops, get_identifier);
+ CHECK_DEFAULT_IMPL_OPS(impl_ops, get_name);
+ CHECK_DEFAULT_IMPL_OPS(impl_ops, is_cycle_counter_event);
+ CHECK_DEFAULT_IMPL_OPS(impl_ops, event_type);
+ CHECK_DEFAULT_IMPL_OPS(impl_ops, event_filter);
+ CHECK_DEFAULT_IMPL_OPS(impl_ops, event_attr_is_visible);
+
+ return 0;
+}
+
+static struct attribute_group *
+arm_cspmu_alloc_event_attr_group(struct arm_cspmu *cspmu)
+{
+ struct attribute_group *event_group;
+ struct device *dev = cspmu->dev;
+ const struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
+
+ event_group =
+ devm_kzalloc(dev, sizeof(struct attribute_group), GFP_KERNEL);
+ if (!event_group)
+ return NULL;
+
+ event_group->name = "events";
+ event_group->is_visible = impl_ops->event_attr_is_visible;
+ event_group->attrs = impl_ops->get_event_attrs(cspmu);
+
+ if (!event_group->attrs)
+ return NULL;
+
+ return event_group;
+}
+
+static struct attribute_group *
+arm_cspmu_alloc_format_attr_group(struct arm_cspmu *cspmu)
+{
+ struct attribute_group *format_group;
+ struct device *dev = cspmu->dev;
+
+ format_group =
+ devm_kzalloc(dev, sizeof(struct attribute_group), GFP_KERNEL);
+ if (!format_group)
+ return NULL;
+
+ format_group->name = "format";
+ format_group->attrs = cspmu->impl.ops.get_format_attrs(cspmu);
+
+ if (!format_group->attrs)
+ return NULL;
+
+ return format_group;
+}
+
+static struct attribute_group **
+arm_cspmu_alloc_attr_group(struct arm_cspmu *cspmu)
+{
+ struct attribute_group **attr_groups = NULL;
+ struct device *dev = cspmu->dev;
+ const struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
+ int ret;
+
+ ret = arm_cspmu_init_impl_ops(cspmu);
+ if (ret)
+ return NULL;
+
+ cspmu->identifier = impl_ops->get_identifier(cspmu);
+ cspmu->name = impl_ops->get_name(cspmu);
+
+ if (!cspmu->identifier || !cspmu->name)
+ return NULL;
+
+ attr_groups = devm_kcalloc(dev, 5, sizeof(struct attribute_group *),
+ GFP_KERNEL);
+ if (!attr_groups)
+ return NULL;
+
+ attr_groups[0] = arm_cspmu_alloc_event_attr_group(cspmu);
+ attr_groups[1] = arm_cspmu_alloc_format_attr_group(cspmu);
+ attr_groups[2] = &arm_cspmu_identifier_attr_group;
+ attr_groups[3] = &arm_cspmu_cpumask_attr_group;
+
+ if (!attr_groups[0] || !attr_groups[1])
+ return NULL;
+
+ return attr_groups;
+}
+
+static inline void arm_cspmu_reset_counters(struct arm_cspmu *cspmu)
+{
+ u32 pmcr = 0;
+
+ pmcr |= PMCR_P;
+ pmcr |= PMCR_C;
+ writel(pmcr, cspmu->base0 + PMCR);
+}
+
+static inline void arm_cspmu_start_counters(struct arm_cspmu *cspmu)
+{
+ writel(PMCR_E, cspmu->base0 + PMCR);
+}
+
+static inline void arm_cspmu_stop_counters(struct arm_cspmu *cspmu)
+{
+ writel(0, cspmu->base0 + PMCR);
+}
+
+static void arm_cspmu_enable(struct pmu *pmu)
+{
+ bool disabled;
+ struct arm_cspmu *cspmu = to_arm_cspmu(pmu);
+
+ disabled = bitmap_empty(cspmu->hw_events.used_ctrs,
+ cspmu->num_logical_ctrs);
+
+ if (disabled)
+ return;
+
+ arm_cspmu_start_counters(cspmu);
+}
+
+static void arm_cspmu_disable(struct pmu *pmu)
+{
+ struct arm_cspmu *cspmu = to_arm_cspmu(pmu);
+
+ arm_cspmu_stop_counters(cspmu);
+}
+
+static int arm_cspmu_get_event_idx(struct arm_cspmu_hw_events *hw_events,
+ struct perf_event *event)
+{
+ int idx;
+ struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu);
+
+ if (supports_cycle_counter(cspmu)) {
+ if (cspmu->impl.ops.is_cycle_counter_event(event)) {
+ /* Search for available cycle counter. */
+ if (test_and_set_bit(cspmu->cycle_counter_logical_idx,
+ hw_events->used_ctrs))
+ return -EAGAIN;
+
+ return cspmu->cycle_counter_logical_idx;
+ }
+
+ /*
+ * Search a regular counter from the used counter bitmap.
+ * The cycle counter divides the bitmap into two parts. Search
+ * the first then second half to exclude the cycle counter bit.
+ */
+ idx = find_first_zero_bit(hw_events->used_ctrs,
+ cspmu->cycle_counter_logical_idx);
+ if (idx >= cspmu->cycle_counter_logical_idx) {
+ idx = find_next_zero_bit(
+ hw_events->used_ctrs,
+ cspmu->num_logical_ctrs,
+ cspmu->cycle_counter_logical_idx + 1);
+ }
+ } else {
+ idx = find_first_zero_bit(hw_events->used_ctrs,
+ cspmu->num_logical_ctrs);
+ }
+
+ if (idx >= cspmu->num_logical_ctrs)
+ return -EAGAIN;
+
+ set_bit(idx, hw_events->used_ctrs);
+
+ return idx;
+}
+
+static bool arm_cspmu_validate_event(struct pmu *pmu,
+ struct arm_cspmu_hw_events *hw_events,
+ struct perf_event *event)
+{
+ if (is_software_event(event))
+ return true;
+
+ /* Reject groups spanning multiple HW PMUs. */
+ if (event->pmu != pmu)
+ return false;
+
+ return (arm_cspmu_get_event_idx(hw_events, event) >= 0);
+}
+
+/*
+ * Make sure the group of events can be scheduled at once
+ * on the PMU.
+ */
+static bool arm_cspmu_validate_group(struct perf_event *event)
+{
+ struct perf_event *sibling, *leader = event->group_leader;
+ struct arm_cspmu_hw_events fake_hw_events;
+
+ if (event->group_leader == event)
+ return true;
+
+ memset(&fake_hw_events, 0, sizeof(fake_hw_events));
+
+ if (!arm_cspmu_validate_event(event->pmu, &fake_hw_events, leader))
+ return false;
+
+ for_each_sibling_event(sibling, leader) {
+ if (!arm_cspmu_validate_event(event->pmu, &fake_hw_events,
+ sibling))
+ return false;
+ }
+
+ return arm_cspmu_validate_event(event->pmu, &fake_hw_events, event);
+}
+
+static int arm_cspmu_event_init(struct perf_event *event)
+{
+ struct arm_cspmu *cspmu;
+ struct hw_perf_event *hwc = &event->hw;
+
+ cspmu = to_arm_cspmu(event->pmu);
+
+ /*
+ * Following other "uncore" PMUs, we do not support sampling mode or
+ * attach to a task (per-process mode).
+ */
+ if (is_sampling_event(event)) {
+ dev_dbg(cspmu->pmu.dev,
+ "Can't support sampling events\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (event->cpu < 0 || event->attach_state & PERF_ATTACH_TASK) {
+ dev_dbg(cspmu->pmu.dev,
+ "Can't support per-task counters\n");
+ return -EINVAL;
+ }
+
+ /*
+ * Make sure the CPU assignment is on one of the CPUs associated with
+ * this PMU.
+ */
+ if (!cpumask_test_cpu(event->cpu, &cspmu->associated_cpus)) {
+ dev_dbg(cspmu->pmu.dev,
+ "Requested cpu is not associated with the PMU\n");
+ return -EINVAL;
+ }
+
+ /* Enforce the current active CPU to handle the events in this PMU. */
+ event->cpu = cpumask_first(&cspmu->active_cpu);
+ if (event->cpu >= nr_cpu_ids)
+ return -EINVAL;
+
+ if (!arm_cspmu_validate_group(event))
+ return -EINVAL;
+
+ /*
+ * The logical counter id is tracked with hw_perf_event.extra_reg.idx.
+ * The physical counter id is tracked with hw_perf_event.idx.
+ * We don't assign an index until we actually place the event onto
+ * hardware. Use -1 to signify that we haven't decided where to put it
+ * yet.
+ */
+ hwc->idx = -1;
+ hwc->extra_reg.idx = -1;
+ hwc->config = cspmu->impl.ops.event_type(event);
+
+ return 0;
+}
+
+static inline u32 counter_offset(u32 reg_sz, u32 ctr_idx)
+{
+ return (PMEVCNTR_LO + (reg_sz * ctr_idx));
+}
+
+static void arm_cspmu_write_counter(struct perf_event *event, u64 val)
+{
+ u32 offset;
+ struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu);
+
+ if (use_64b_counter_reg(cspmu)) {
+ offset = counter_offset(sizeof(u64), event->hw.idx);
+
+ writeq(val, cspmu->base1 + offset);
+ } else {
+ offset = counter_offset(sizeof(u32), event->hw.idx);
+
+ writel(lower_32_bits(val), cspmu->base1 + offset);
+ }
+}
+
+static u64 arm_cspmu_read_counter(struct perf_event *event)
+{
+ u32 offset;
+ const void __iomem *counter_addr;
+ struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu);
+
+ if (use_64b_counter_reg(cspmu)) {
+ offset = counter_offset(sizeof(u64), event->hw.idx);
+ counter_addr = cspmu->base1 + offset;
+
+ return supports_64bit_atomics(cspmu) ?
+ readq(counter_addr) :
+ read_reg64_hilohi(counter_addr, HILOHI_MAX_POLL);
+ }
+
+ offset = counter_offset(sizeof(u32), event->hw.idx);
+ return readl(cspmu->base1 + offset);
+}
+
+/*
+ * arm_cspmu_set_event_period: Set the period for the counter.
+ *
+ * To handle cases of extreme interrupt latency, we program
+ * the counter with half of the max count for the counters.
+ */
+static void arm_cspmu_set_event_period(struct perf_event *event)
+{
+ struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu);
+ u64 val = counter_mask(cspmu) >> 1ULL;
+
+ local64_set(&event->hw.prev_count, val);
+ arm_cspmu_write_counter(event, val);
+}
+
+static void arm_cspmu_enable_counter(struct arm_cspmu *cspmu, int idx)
+{
+ u32 reg_id, reg_bit, inten_off, cnten_off;
+
+ reg_id = COUNTER_TO_SET_CLR_ID(idx);
+ reg_bit = COUNTER_TO_SET_CLR_BIT(idx);
+
+ inten_off = PMINTENSET + (4 * reg_id);
+ cnten_off = PMCNTENSET + (4 * reg_id);
+
+ writel(BIT(reg_bit), cspmu->base0 + inten_off);
+ writel(BIT(reg_bit), cspmu->base0 + cnten_off);
+}
+
+static void arm_cspmu_disable_counter(struct arm_cspmu *cspmu, int idx)
+{
+ u32 reg_id, reg_bit, inten_off, cnten_off;
+
+ reg_id = COUNTER_TO_SET_CLR_ID(idx);
+ reg_bit = COUNTER_TO_SET_CLR_BIT(idx);
+
+ inten_off = PMINTENCLR + (4 * reg_id);
+ cnten_off = PMCNTENCLR + (4 * reg_id);
+
+ writel(BIT(reg_bit), cspmu->base0 + cnten_off);
+ writel(BIT(reg_bit), cspmu->base0 + inten_off);
+}
+
+static void arm_cspmu_event_update(struct perf_event *event)
+{
+ struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ u64 delta, prev, now;
+
+ do {
+ prev = local64_read(&hwc->prev_count);
+ now = arm_cspmu_read_counter(event);
+ } while (local64_cmpxchg(&hwc->prev_count, prev, now) != prev);
+
+ delta = (now - prev) & counter_mask(cspmu);
+ local64_add(delta, &event->count);
+}
+
+static inline void arm_cspmu_set_event(struct arm_cspmu *cspmu,
+ struct hw_perf_event *hwc)
+{
+ u32 offset = PMEVTYPER + (4 * hwc->idx);
+
+ writel(hwc->config, cspmu->base0 + offset);
+}
+
+static inline void arm_cspmu_set_ev_filter(struct arm_cspmu *cspmu,
+ struct hw_perf_event *hwc,
+ u32 filter)
+{
+ u32 offset = PMEVFILTR + (4 * hwc->idx);
+
+ writel(filter, cspmu->base0 + offset);
+}
+
+static inline void arm_cspmu_set_cc_filter(struct arm_cspmu *cspmu, u32 filter)
+{
+ u32 offset = PMCCFILTR;
+
+ writel(filter, cspmu->base0 + offset);
+}
+
+static void arm_cspmu_start(struct perf_event *event, int pmu_flags)
+{
+ struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ u32 filter;
+
+ /* We always reprogram the counter */
+ if (pmu_flags & PERF_EF_RELOAD)
+ WARN_ON(!(hwc->state & PERF_HES_UPTODATE));
+
+ arm_cspmu_set_event_period(event);
+
+ filter = cspmu->impl.ops.event_filter(event);
+
+ if (event->hw.extra_reg.idx == cspmu->cycle_counter_logical_idx) {
+ arm_cspmu_set_cc_filter(cspmu, filter);
+ } else {
+ arm_cspmu_set_event(cspmu, hwc);
+ arm_cspmu_set_ev_filter(cspmu, hwc, filter);
+ }
+
+ hwc->state = 0;
+
+ arm_cspmu_enable_counter(cspmu, hwc->idx);
+}
+
+static void arm_cspmu_stop(struct perf_event *event, int pmu_flags)
+{
+ struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (hwc->state & PERF_HES_STOPPED)
+ return;
+
+ arm_cspmu_disable_counter(cspmu, hwc->idx);
+ arm_cspmu_event_update(event);
+
+ hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+}
+
+static inline u32 to_phys_idx(struct arm_cspmu *cspmu, u32 idx)
+{
+ return (idx == cspmu->cycle_counter_logical_idx) ?
+ ARM_CSPMU_CYCLE_CNTR_IDX : idx;
+}
+
+static int arm_cspmu_add(struct perf_event *event, int flags)
+{
+ struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu);
+ struct arm_cspmu_hw_events *hw_events = &cspmu->hw_events;
+ struct hw_perf_event *hwc = &event->hw;
+ int idx;
+
+ if (WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(),
+ &cspmu->associated_cpus)))
+ return -ENOENT;
+
+ idx = arm_cspmu_get_event_idx(hw_events, event);
+ if (idx < 0)
+ return idx;
+
+ hw_events->events[idx] = event;
+ hwc->idx = to_phys_idx(cspmu, idx);
+ hwc->extra_reg.idx = idx;
+ hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+
+ if (flags & PERF_EF_START)
+ arm_cspmu_start(event, PERF_EF_RELOAD);
+
+ /* Propagate changes to the userspace mapping. */
+ perf_event_update_userpage(event);
+
+ return 0;
+}
+
+static void arm_cspmu_del(struct perf_event *event, int flags)
+{
+ struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu);
+ struct arm_cspmu_hw_events *hw_events = &cspmu->hw_events;
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->extra_reg.idx;
+
+ arm_cspmu_stop(event, PERF_EF_UPDATE);
+
+ hw_events->events[idx] = NULL;
+
+ clear_bit(idx, hw_events->used_ctrs);
+
+ perf_event_update_userpage(event);
+}
+
+static void arm_cspmu_read(struct perf_event *event)
+{
+ arm_cspmu_event_update(event);
+}
+
+static struct arm_cspmu *arm_cspmu_alloc(struct platform_device *pdev)
+{
+ struct acpi_apmt_node *apmt_node;
+ struct arm_cspmu *cspmu;
+ struct device *dev;
+
+ dev = &pdev->dev;
+ apmt_node = *(struct acpi_apmt_node **)dev_get_platdata(dev);
+ if (!apmt_node) {
+ dev_err(dev, "failed to get APMT node\n");
+ return NULL;
+ }
+
+ cspmu = devm_kzalloc(dev, sizeof(*cspmu), GFP_KERNEL);
+ if (!cspmu)
+ return NULL;
+
+ cspmu->dev = dev;
+ cspmu->apmt_node = apmt_node;
+
+ platform_set_drvdata(pdev, cspmu);
+
+ return cspmu;
+}
+
+static int arm_cspmu_init_mmio(struct arm_cspmu *cspmu)
+{
+ struct device *dev;
+ struct platform_device *pdev;
+ struct acpi_apmt_node *apmt_node;
+
+ dev = cspmu->dev;
+ pdev = to_platform_device(dev);
+ apmt_node = cspmu->apmt_node;
+
+ /* Base address for page 0. */
+ cspmu->base0 = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(cspmu->base0)) {
+ dev_err(dev, "ioremap failed for page-0 resource\n");
+ return PTR_ERR(cspmu->base0);
+ }
+
+ /* Base address for page 1 if supported. Otherwise point to page 0. */
+ cspmu->base1 = cspmu->base0;
+ if (CHECK_APMT_FLAG(apmt_node->flags, DUAL_PAGE, SUPP)) {
+ cspmu->base1 = devm_platform_ioremap_resource(pdev, 1);
+ if (IS_ERR(cspmu->base1)) {
+ dev_err(dev, "ioremap failed for page-1 resource\n");
+ return PTR_ERR(cspmu->base1);
+ }
+ }
+
+ cspmu->pmcfgr = readl(cspmu->base0 + PMCFGR);
+
+ cspmu->num_logical_ctrs = FIELD_GET(PMCFGR_N, cspmu->pmcfgr) + 1;
+
+ cspmu->cycle_counter_logical_idx = ARM_CSPMU_MAX_HW_CNTRS;
+
+ if (supports_cycle_counter(cspmu)) {
+ /*
+ * The last logical counter is mapped to cycle counter if
+ * there is a gap between regular and cycle counter. Otherwise,
+ * logical and physical have 1-to-1 mapping.
+ */
+ cspmu->cycle_counter_logical_idx =
+ (cspmu->num_logical_ctrs <= ARM_CSPMU_CYCLE_CNTR_IDX) ?
+ cspmu->num_logical_ctrs - 1 :
+ ARM_CSPMU_CYCLE_CNTR_IDX;
+ }
+
+ cspmu->num_set_clr_reg =
+ DIV_ROUND_UP(cspmu->num_logical_ctrs,
+ ARM_CSPMU_SET_CLR_COUNTER_NUM);
+
+ cspmu->hw_events.events =
+ devm_kcalloc(dev, cspmu->num_logical_ctrs,
+ sizeof(*cspmu->hw_events.events), GFP_KERNEL);
+
+ if (!cspmu->hw_events.events)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static inline int arm_cspmu_get_reset_overflow(struct arm_cspmu *cspmu,
+ u32 *pmovs)
+{
+ int i;
+ u32 pmovclr_offset = PMOVSCLR;
+ u32 has_overflowed = 0;
+
+ for (i = 0; i < cspmu->num_set_clr_reg; ++i) {
+ pmovs[i] = readl(cspmu->base1 + pmovclr_offset);
+ has_overflowed |= pmovs[i];
+ writel(pmovs[i], cspmu->base1 + pmovclr_offset);
+ pmovclr_offset += sizeof(u32);
+ }
+
+ return has_overflowed != 0;
+}
+
+static irqreturn_t arm_cspmu_handle_irq(int irq_num, void *dev)
+{
+ int idx, has_overflowed;
+ struct perf_event *event;
+ struct arm_cspmu *cspmu = dev;
+ DECLARE_BITMAP(pmovs, ARM_CSPMU_MAX_HW_CNTRS);
+ bool handled = false;
+
+ arm_cspmu_stop_counters(cspmu);
+
+ has_overflowed = arm_cspmu_get_reset_overflow(cspmu, (u32 *)pmovs);
+ if (!has_overflowed)
+ goto done;
+
+ for_each_set_bit(idx, cspmu->hw_events.used_ctrs,
+ cspmu->num_logical_ctrs) {
+ event = cspmu->hw_events.events[idx];
+
+ if (!event)
+ continue;
+
+ if (!test_bit(event->hw.idx, pmovs))
+ continue;
+
+ arm_cspmu_event_update(event);
+ arm_cspmu_set_event_period(event);
+
+ handled = true;
+ }
+
+done:
+ arm_cspmu_start_counters(cspmu);
+ return IRQ_RETVAL(handled);
+}
+
+static int arm_cspmu_request_irq(struct arm_cspmu *cspmu)
+{
+ int irq, ret;
+ struct device *dev;
+ struct platform_device *pdev;
+ struct acpi_apmt_node *apmt_node;
+
+ dev = cspmu->dev;
+ pdev = to_platform_device(dev);
+ apmt_node = cspmu->apmt_node;
+
+ /* Skip IRQ request if the PMU does not support overflow interrupt. */
+ if (apmt_node->ovflw_irq == 0)
+ return 0;
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return irq;
+
+ ret = devm_request_irq(dev, irq, arm_cspmu_handle_irq,
+ IRQF_NOBALANCING | IRQF_NO_THREAD, dev_name(dev),
+ cspmu);
+ if (ret) {
+ dev_err(dev, "Could not request IRQ %d\n", irq);
+ return ret;
+ }
+
+ cspmu->irq = irq;
+
+ return 0;
+}
+
+static inline int arm_cspmu_find_cpu_container(int cpu, u32 container_uid)
+{
+ u32 acpi_uid;
+ struct device *cpu_dev = get_cpu_device(cpu);
+ struct acpi_device *acpi_dev = ACPI_COMPANION(cpu_dev);
+
+ if (!cpu_dev)
+ return -ENODEV;
+
+ while (acpi_dev) {
+ if (!strcmp(acpi_device_hid(acpi_dev),
+ ACPI_PROCESSOR_CONTAINER_HID) &&
+ !kstrtouint(acpi_device_uid(acpi_dev), 0, &acpi_uid) &&
+ acpi_uid == container_uid)
+ return 0;
+
+ acpi_dev = acpi_dev_parent(acpi_dev);
+ }
+
+ return -ENODEV;
+}
+
+static int arm_cspmu_get_cpus(struct arm_cspmu *cspmu)
+{
+ struct device *dev;
+ struct acpi_apmt_node *apmt_node;
+ int affinity_flag;
+ int cpu;
+
+ dev = cspmu->pmu.dev;
+ apmt_node = cspmu->apmt_node;
+ affinity_flag = apmt_node->flags & ACPI_APMT_FLAGS_AFFINITY;
+
+ if (affinity_flag == ACPI_APMT_FLAGS_AFFINITY_PROC) {
+ for_each_possible_cpu(cpu) {
+ if (apmt_node->proc_affinity ==
+ get_acpi_id_for_cpu(cpu)) {
+ cpumask_set_cpu(cpu, &cspmu->associated_cpus);
+ break;
+ }
+ }
+ } else {
+ for_each_possible_cpu(cpu) {
+ if (arm_cspmu_find_cpu_container(
+ cpu, apmt_node->proc_affinity))
+ continue;
+
+ cpumask_set_cpu(cpu, &cspmu->associated_cpus);
+ }
+ }
+
+ if (cpumask_empty(&cspmu->associated_cpus)) {
+ dev_dbg(dev, "No cpu associated with the PMU\n");
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+static int arm_cspmu_register_pmu(struct arm_cspmu *cspmu)
+{
+ int ret, capabilities;
+ struct attribute_group **attr_groups;
+
+ attr_groups = arm_cspmu_alloc_attr_group(cspmu);
+ if (!attr_groups)
+ return -ENOMEM;
+
+ ret = cpuhp_state_add_instance(arm_cspmu_cpuhp_state,
+ &cspmu->cpuhp_node);
+ if (ret)
+ return ret;
+
+ capabilities = PERF_PMU_CAP_NO_EXCLUDE;
+ if (cspmu->irq == 0)
+ capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+
+ cspmu->pmu = (struct pmu){
+ .task_ctx_nr = perf_invalid_context,
+ .module = THIS_MODULE,
+ .pmu_enable = arm_cspmu_enable,
+ .pmu_disable = arm_cspmu_disable,
+ .event_init = arm_cspmu_event_init,
+ .add = arm_cspmu_add,
+ .del = arm_cspmu_del,
+ .start = arm_cspmu_start,
+ .stop = arm_cspmu_stop,
+ .read = arm_cspmu_read,
+ .attr_groups = (const struct attribute_group **)attr_groups,
+ .capabilities = capabilities,
+ };
+
+ /* Hardware counter init */
+ arm_cspmu_stop_counters(cspmu);
+ arm_cspmu_reset_counters(cspmu);
+
+ ret = perf_pmu_register(&cspmu->pmu, cspmu->name, -1);
+ if (ret) {
+ cpuhp_state_remove_instance(arm_cspmu_cpuhp_state,
+ &cspmu->cpuhp_node);
+ }
+
+ return ret;
+}
+
+static int arm_cspmu_device_probe(struct platform_device *pdev)
+{
+ int ret;
+ struct arm_cspmu *cspmu;
+
+ cspmu = arm_cspmu_alloc(pdev);
+ if (!cspmu)
+ return -ENOMEM;
+
+ ret = arm_cspmu_init_mmio(cspmu);
+ if (ret)
+ return ret;
+
+ ret = arm_cspmu_request_irq(cspmu);
+ if (ret)
+ return ret;
+
+ ret = arm_cspmu_get_cpus(cspmu);
+ if (ret)
+ return ret;
+
+ ret = arm_cspmu_register_pmu(cspmu);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int arm_cspmu_device_remove(struct platform_device *pdev)
+{
+ struct arm_cspmu *cspmu = platform_get_drvdata(pdev);
+
+ perf_pmu_unregister(&cspmu->pmu);
+ cpuhp_state_remove_instance(arm_cspmu_cpuhp_state, &cspmu->cpuhp_node);
+
+ return 0;
+}
+
+static struct platform_driver arm_cspmu_driver = {
+ .driver = {
+ .name = DRVNAME,
+ .suppress_bind_attrs = true,
+ },
+ .probe = arm_cspmu_device_probe,
+ .remove = arm_cspmu_device_remove,
+};
+
+static void arm_cspmu_set_active_cpu(int cpu, struct arm_cspmu *cspmu)
+{
+ cpumask_set_cpu(cpu, &cspmu->active_cpu);
+ WARN_ON(irq_set_affinity(cspmu->irq, &cspmu->active_cpu));
+}
+
+static int arm_cspmu_cpu_online(unsigned int cpu, struct hlist_node *node)
+{
+ struct arm_cspmu *cspmu =
+ hlist_entry_safe(node, struct arm_cspmu, cpuhp_node);
+
+ if (!cpumask_test_cpu(cpu, &cspmu->associated_cpus))
+ return 0;
+
+ /* If the PMU is already managed, there is nothing to do */
+ if (!cpumask_empty(&cspmu->active_cpu))
+ return 0;
+
+ /* Use this CPU for event counting */
+ arm_cspmu_set_active_cpu(cpu, cspmu);
+
+ return 0;
+}
+
+static int arm_cspmu_cpu_teardown(unsigned int cpu, struct hlist_node *node)
+{
+ int dst;
+ struct cpumask online_supported;
+
+ struct arm_cspmu *cspmu =
+ hlist_entry_safe(node, struct arm_cspmu, cpuhp_node);
+
+ /* Nothing to do if this CPU doesn't own the PMU */
+ if (!cpumask_test_and_clear_cpu(cpu, &cspmu->active_cpu))
+ return 0;
+
+ /* Choose a new CPU to migrate ownership of the PMU to */
+ cpumask_and(&online_supported, &cspmu->associated_cpus,
+ cpu_online_mask);
+ dst = cpumask_any_but(&online_supported, cpu);
+ if (dst >= nr_cpu_ids)
+ return 0;
+
+ /* Use this CPU for event counting */
+ perf_pmu_migrate_context(&cspmu->pmu, cpu, dst);
+ arm_cspmu_set_active_cpu(dst, cspmu);
+
+ return 0;
+}
+
+static int __init arm_cspmu_init(void)
+{
+ int ret;
+
+ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+ "perf/arm/cspmu:online",
+ arm_cspmu_cpu_online,
+ arm_cspmu_cpu_teardown);
+ if (ret < 0)
+ return ret;
+ arm_cspmu_cpuhp_state = ret;
+ return platform_driver_register(&arm_cspmu_driver);
+}
+
+static void __exit arm_cspmu_exit(void)
+{
+ platform_driver_unregister(&arm_cspmu_driver);
+ cpuhp_remove_multi_state(arm_cspmu_cpuhp_state);
+}
+
+module_init(arm_cspmu_init);
+module_exit(arm_cspmu_exit);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/arm_cspmu/arm_cspmu.h b/drivers/perf/arm_cspmu/arm_cspmu.h
new file mode 100644
index 000000000000..51323b175a4a
--- /dev/null
+++ b/drivers/perf/arm_cspmu/arm_cspmu.h
@@ -0,0 +1,151 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * ARM CoreSight Architecture PMU driver.
+ * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ *
+ */
+
+#ifndef __ARM_CSPMU_H__
+#define __ARM_CSPMU_H__
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/cpumask.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/types.h>
+
+#define to_arm_cspmu(p) (container_of(p, struct arm_cspmu, pmu))
+
+#define ARM_CSPMU_EXT_ATTR(_name, _func, _config) \
+ (&((struct dev_ext_attribute[]){ \
+ { \
+ .attr = __ATTR(_name, 0444, _func, NULL), \
+ .var = (void *)_config \
+ } \
+ })[0].attr.attr)
+
+#define ARM_CSPMU_FORMAT_ATTR(_name, _config) \
+ ARM_CSPMU_EXT_ATTR(_name, arm_cspmu_sysfs_format_show, (char *)_config)
+
+#define ARM_CSPMU_EVENT_ATTR(_name, _config) \
+ PMU_EVENT_ATTR_ID(_name, arm_cspmu_sysfs_event_show, _config)
+
+
+/* Default event id mask */
+#define ARM_CSPMU_EVENT_MASK GENMASK_ULL(63, 0)
+
+/* Default filter value mask */
+#define ARM_CSPMU_FILTER_MASK GENMASK_ULL(63, 0)
+
+/* Default event format */
+#define ARM_CSPMU_FORMAT_EVENT_ATTR \
+ ARM_CSPMU_FORMAT_ATTR(event, "config:0-32")
+
+/* Default filter format */
+#define ARM_CSPMU_FORMAT_FILTER_ATTR \
+ ARM_CSPMU_FORMAT_ATTR(filter, "config1:0-31")
+
+/*
+ * This is the default event number for cycle count, if supported, since the
+ * ARM Coresight PMU specification does not define a standard event code
+ * for cycle count.
+ */
+#define ARM_CSPMU_EVT_CYCLES_DEFAULT (0x1ULL << 32)
+
+/*
+ * The ARM Coresight PMU supports up to 256 event counters.
+ * If the counters are larger-than 32-bits, then the PMU includes at
+ * most 128 counters.
+ */
+#define ARM_CSPMU_MAX_HW_CNTRS 256
+
+/* The cycle counter, if implemented, is located at counter[31]. */
+#define ARM_CSPMU_CYCLE_CNTR_IDX 31
+
+/* PMIIDR register field */
+#define ARM_CSPMU_PMIIDR_IMPLEMENTER GENMASK(11, 0)
+#define ARM_CSPMU_PMIIDR_PRODUCTID GENMASK(31, 20)
+
+struct arm_cspmu;
+
+/* This tracks the events assigned to each counter in the PMU. */
+struct arm_cspmu_hw_events {
+ /* The events that are active on the PMU for a given logical index. */
+ struct perf_event **events;
+
+ /*
+ * Each bit indicates a logical counter is being used (or not) for an
+ * event. If cycle counter is supported and there is a gap between
+ * regular and cycle counter, the last logical counter is mapped to
+ * cycle counter. Otherwise, logical and physical have 1-to-1 mapping.
+ */
+ DECLARE_BITMAP(used_ctrs, ARM_CSPMU_MAX_HW_CNTRS);
+};
+
+/* Contains ops to query vendor/implementer specific attribute. */
+struct arm_cspmu_impl_ops {
+ /* Get event attributes */
+ struct attribute **(*get_event_attrs)(const struct arm_cspmu *cspmu);
+ /* Get format attributes */
+ struct attribute **(*get_format_attrs)(const struct arm_cspmu *cspmu);
+ /* Get string identifier */
+ const char *(*get_identifier)(const struct arm_cspmu *cspmu);
+ /* Get PMU name to register to core perf */
+ const char *(*get_name)(const struct arm_cspmu *cspmu);
+ /* Check if the event corresponds to cycle count event */
+ bool (*is_cycle_counter_event)(const struct perf_event *event);
+ /* Decode event type/id from configs */
+ u32 (*event_type)(const struct perf_event *event);
+ /* Decode filter value from configs */
+ u32 (*event_filter)(const struct perf_event *event);
+ /* Hide/show unsupported events */
+ umode_t (*event_attr_is_visible)(struct kobject *kobj,
+ struct attribute *attr, int unused);
+};
+
+/* Vendor/implementer descriptor. */
+struct arm_cspmu_impl {
+ u32 pmiidr;
+ struct arm_cspmu_impl_ops ops;
+ void *ctx;
+};
+
+/* Coresight PMU descriptor. */
+struct arm_cspmu {
+ struct pmu pmu;
+ struct device *dev;
+ struct acpi_apmt_node *apmt_node;
+ const char *name;
+ const char *identifier;
+ void __iomem *base0;
+ void __iomem *base1;
+ int irq;
+ cpumask_t associated_cpus;
+ cpumask_t active_cpu;
+ struct hlist_node cpuhp_node;
+
+ u32 pmcfgr;
+ u32 num_logical_ctrs;
+ u32 num_set_clr_reg;
+ int cycle_counter_logical_idx;
+
+ struct arm_cspmu_hw_events hw_events;
+
+ struct arm_cspmu_impl impl;
+};
+
+/* Default function to show event attribute in sysfs. */
+ssize_t arm_cspmu_sysfs_event_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf);
+
+/* Default function to show format attribute in sysfs. */
+ssize_t arm_cspmu_sysfs_format_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf);
+
+#endif /* __ARM_CSPMU_H__ */
diff --git a/drivers/perf/arm_cspmu/nvidia_cspmu.c b/drivers/perf/arm_cspmu/nvidia_cspmu.c
new file mode 100644
index 000000000000..72ef80caa3c8
--- /dev/null
+++ b/drivers/perf/arm_cspmu/nvidia_cspmu.c
@@ -0,0 +1,400 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ *
+ */
+
+/* Support for NVIDIA specific attributes. */
+
+#include <linux/topology.h>
+
+#include "nvidia_cspmu.h"
+
+#define NV_PCIE_PORT_COUNT 10ULL
+#define NV_PCIE_FILTER_ID_MASK GENMASK_ULL(NV_PCIE_PORT_COUNT - 1, 0)
+
+#define NV_NVL_C2C_PORT_COUNT 2ULL
+#define NV_NVL_C2C_FILTER_ID_MASK GENMASK_ULL(NV_NVL_C2C_PORT_COUNT - 1, 0)
+
+#define NV_CNVL_PORT_COUNT 4ULL
+#define NV_CNVL_FILTER_ID_MASK GENMASK_ULL(NV_CNVL_PORT_COUNT - 1, 0)
+
+#define NV_GENERIC_FILTER_ID_MASK GENMASK_ULL(31, 0)
+
+#define NV_PRODID_MASK GENMASK(31, 0)
+
+#define NV_FORMAT_NAME_GENERIC 0
+
+#define to_nv_cspmu_ctx(cspmu) ((struct nv_cspmu_ctx *)(cspmu->impl.ctx))
+
+#define NV_CSPMU_EVENT_ATTR_4_INNER(_pref, _num, _suff, _config) \
+ ARM_CSPMU_EVENT_ATTR(_pref##_num##_suff, _config)
+
+#define NV_CSPMU_EVENT_ATTR_4(_pref, _suff, _config) \
+ NV_CSPMU_EVENT_ATTR_4_INNER(_pref, _0_, _suff, _config), \
+ NV_CSPMU_EVENT_ATTR_4_INNER(_pref, _1_, _suff, _config + 1), \
+ NV_CSPMU_EVENT_ATTR_4_INNER(_pref, _2_, _suff, _config + 2), \
+ NV_CSPMU_EVENT_ATTR_4_INNER(_pref, _3_, _suff, _config + 3)
+
+struct nv_cspmu_ctx {
+ const char *name;
+ u32 filter_mask;
+ u32 filter_default_val;
+ struct attribute **event_attr;
+ struct attribute **format_attr;
+};
+
+static struct attribute *scf_pmu_event_attrs[] = {
+ ARM_CSPMU_EVENT_ATTR(bus_cycles, 0x1d),
+
+ ARM_CSPMU_EVENT_ATTR(scf_cache_allocate, 0xF0),
+ ARM_CSPMU_EVENT_ATTR(scf_cache_refill, 0xF1),
+ ARM_CSPMU_EVENT_ATTR(scf_cache, 0xF2),
+ ARM_CSPMU_EVENT_ATTR(scf_cache_wb, 0xF3),
+
+ NV_CSPMU_EVENT_ATTR_4(socket, rd_data, 0x101),
+ NV_CSPMU_EVENT_ATTR_4(socket, dl_rsp, 0x105),
+ NV_CSPMU_EVENT_ATTR_4(socket, wb_data, 0x109),
+ NV_CSPMU_EVENT_ATTR_4(socket, ev_rsp, 0x10d),
+ NV_CSPMU_EVENT_ATTR_4(socket, prb_data, 0x111),
+
+ NV_CSPMU_EVENT_ATTR_4(socket, rd_outstanding, 0x115),
+ NV_CSPMU_EVENT_ATTR_4(socket, dl_outstanding, 0x119),
+ NV_CSPMU_EVENT_ATTR_4(socket, wb_outstanding, 0x11d),
+ NV_CSPMU_EVENT_ATTR_4(socket, wr_outstanding, 0x121),
+ NV_CSPMU_EVENT_ATTR_4(socket, ev_outstanding, 0x125),
+ NV_CSPMU_EVENT_ATTR_4(socket, prb_outstanding, 0x129),
+
+ NV_CSPMU_EVENT_ATTR_4(socket, rd_access, 0x12d),
+ NV_CSPMU_EVENT_ATTR_4(socket, dl_access, 0x131),
+ NV_CSPMU_EVENT_ATTR_4(socket, wb_access, 0x135),
+ NV_CSPMU_EVENT_ATTR_4(socket, wr_access, 0x139),
+ NV_CSPMU_EVENT_ATTR_4(socket, ev_access, 0x13d),
+ NV_CSPMU_EVENT_ATTR_4(socket, prb_access, 0x141),
+
+ NV_CSPMU_EVENT_ATTR_4(ocu, gmem_rd_data, 0x145),
+ NV_CSPMU_EVENT_ATTR_4(ocu, gmem_rd_access, 0x149),
+ NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wb_access, 0x14d),
+ NV_CSPMU_EVENT_ATTR_4(ocu, gmem_rd_outstanding, 0x151),
+ NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wr_outstanding, 0x155),
+
+ NV_CSPMU_EVENT_ATTR_4(ocu, rem_rd_data, 0x159),
+ NV_CSPMU_EVENT_ATTR_4(ocu, rem_rd_access, 0x15d),
+ NV_CSPMU_EVENT_ATTR_4(ocu, rem_wb_access, 0x161),
+ NV_CSPMU_EVENT_ATTR_4(ocu, rem_rd_outstanding, 0x165),
+ NV_CSPMU_EVENT_ATTR_4(ocu, rem_wr_outstanding, 0x169),
+
+ ARM_CSPMU_EVENT_ATTR(gmem_rd_data, 0x16d),
+ ARM_CSPMU_EVENT_ATTR(gmem_rd_access, 0x16e),
+ ARM_CSPMU_EVENT_ATTR(gmem_rd_outstanding, 0x16f),
+ ARM_CSPMU_EVENT_ATTR(gmem_dl_rsp, 0x170),
+ ARM_CSPMU_EVENT_ATTR(gmem_dl_access, 0x171),
+ ARM_CSPMU_EVENT_ATTR(gmem_dl_outstanding, 0x172),
+ ARM_CSPMU_EVENT_ATTR(gmem_wb_data, 0x173),
+ ARM_CSPMU_EVENT_ATTR(gmem_wb_access, 0x174),
+ ARM_CSPMU_EVENT_ATTR(gmem_wb_outstanding, 0x175),
+ ARM_CSPMU_EVENT_ATTR(gmem_ev_rsp, 0x176),
+ ARM_CSPMU_EVENT_ATTR(gmem_ev_access, 0x177),
+ ARM_CSPMU_EVENT_ATTR(gmem_ev_outstanding, 0x178),
+ ARM_CSPMU_EVENT_ATTR(gmem_wr_data, 0x179),
+ ARM_CSPMU_EVENT_ATTR(gmem_wr_outstanding, 0x17a),
+ ARM_CSPMU_EVENT_ATTR(gmem_wr_access, 0x17b),
+
+ NV_CSPMU_EVENT_ATTR_4(socket, wr_data, 0x17c),
+
+ NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wr_data, 0x180),
+ NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wb_data, 0x184),
+ NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wr_access, 0x188),
+ NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wb_outstanding, 0x18c),
+
+ NV_CSPMU_EVENT_ATTR_4(ocu, rem_wr_data, 0x190),
+ NV_CSPMU_EVENT_ATTR_4(ocu, rem_wb_data, 0x194),
+ NV_CSPMU_EVENT_ATTR_4(ocu, rem_wr_access, 0x198),
+ NV_CSPMU_EVENT_ATTR_4(ocu, rem_wb_outstanding, 0x19c),
+
+ ARM_CSPMU_EVENT_ATTR(gmem_wr_total_bytes, 0x1a0),
+ ARM_CSPMU_EVENT_ATTR(remote_socket_wr_total_bytes, 0x1a1),
+ ARM_CSPMU_EVENT_ATTR(remote_socket_rd_data, 0x1a2),
+ ARM_CSPMU_EVENT_ATTR(remote_socket_rd_outstanding, 0x1a3),
+ ARM_CSPMU_EVENT_ATTR(remote_socket_rd_access, 0x1a4),
+
+ ARM_CSPMU_EVENT_ATTR(cmem_rd_data, 0x1a5),
+ ARM_CSPMU_EVENT_ATTR(cmem_rd_access, 0x1a6),
+ ARM_CSPMU_EVENT_ATTR(cmem_rd_outstanding, 0x1a7),
+ ARM_CSPMU_EVENT_ATTR(cmem_dl_rsp, 0x1a8),
+ ARM_CSPMU_EVENT_ATTR(cmem_dl_access, 0x1a9),
+ ARM_CSPMU_EVENT_ATTR(cmem_dl_outstanding, 0x1aa),
+ ARM_CSPMU_EVENT_ATTR(cmem_wb_data, 0x1ab),
+ ARM_CSPMU_EVENT_ATTR(cmem_wb_access, 0x1ac),
+ ARM_CSPMU_EVENT_ATTR(cmem_wb_outstanding, 0x1ad),
+ ARM_CSPMU_EVENT_ATTR(cmem_ev_rsp, 0x1ae),
+ ARM_CSPMU_EVENT_ATTR(cmem_ev_access, 0x1af),
+ ARM_CSPMU_EVENT_ATTR(cmem_ev_outstanding, 0x1b0),
+ ARM_CSPMU_EVENT_ATTR(cmem_wr_data, 0x1b1),
+ ARM_CSPMU_EVENT_ATTR(cmem_wr_outstanding, 0x1b2),
+
+ NV_CSPMU_EVENT_ATTR_4(ocu, cmem_rd_data, 0x1b3),
+ NV_CSPMU_EVENT_ATTR_4(ocu, cmem_rd_access, 0x1b7),
+ NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wb_access, 0x1bb),
+ NV_CSPMU_EVENT_ATTR_4(ocu, cmem_rd_outstanding, 0x1bf),
+ NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wr_outstanding, 0x1c3),
+
+ ARM_CSPMU_EVENT_ATTR(ocu_prb_access, 0x1c7),
+ ARM_CSPMU_EVENT_ATTR(ocu_prb_data, 0x1c8),
+ ARM_CSPMU_EVENT_ATTR(ocu_prb_outstanding, 0x1c9),
+
+ ARM_CSPMU_EVENT_ATTR(cmem_wr_access, 0x1ca),
+
+ NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wr_access, 0x1cb),
+ NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wb_data, 0x1cf),
+ NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wr_data, 0x1d3),
+ NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wb_outstanding, 0x1d7),
+
+ ARM_CSPMU_EVENT_ATTR(cmem_wr_total_bytes, 0x1db),
+
+ ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
+ NULL,
+};
+
+static struct attribute *mcf_pmu_event_attrs[] = {
+ ARM_CSPMU_EVENT_ATTR(rd_bytes_loc, 0x0),
+ ARM_CSPMU_EVENT_ATTR(rd_bytes_rem, 0x1),
+ ARM_CSPMU_EVENT_ATTR(wr_bytes_loc, 0x2),
+ ARM_CSPMU_EVENT_ATTR(wr_bytes_rem, 0x3),
+ ARM_CSPMU_EVENT_ATTR(total_bytes_loc, 0x4),
+ ARM_CSPMU_EVENT_ATTR(total_bytes_rem, 0x5),
+ ARM_CSPMU_EVENT_ATTR(rd_req_loc, 0x6),
+ ARM_CSPMU_EVENT_ATTR(rd_req_rem, 0x7),
+ ARM_CSPMU_EVENT_ATTR(wr_req_loc, 0x8),
+ ARM_CSPMU_EVENT_ATTR(wr_req_rem, 0x9),
+ ARM_CSPMU_EVENT_ATTR(total_req_loc, 0xa),
+ ARM_CSPMU_EVENT_ATTR(total_req_rem, 0xb),
+ ARM_CSPMU_EVENT_ATTR(rd_cum_outs_loc, 0xc),
+ ARM_CSPMU_EVENT_ATTR(rd_cum_outs_rem, 0xd),
+ ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
+ NULL,
+};
+
+static struct attribute *generic_pmu_event_attrs[] = {
+ ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
+ NULL,
+};
+
+static struct attribute *scf_pmu_format_attrs[] = {
+ ARM_CSPMU_FORMAT_EVENT_ATTR,
+ NULL,
+};
+
+static struct attribute *pcie_pmu_format_attrs[] = {
+ ARM_CSPMU_FORMAT_EVENT_ATTR,
+ ARM_CSPMU_FORMAT_ATTR(root_port, "config1:0-9"),
+ NULL,
+};
+
+static struct attribute *nvlink_c2c_pmu_format_attrs[] = {
+ ARM_CSPMU_FORMAT_EVENT_ATTR,
+ NULL,
+};
+
+static struct attribute *cnvlink_pmu_format_attrs[] = {
+ ARM_CSPMU_FORMAT_EVENT_ATTR,
+ ARM_CSPMU_FORMAT_ATTR(rem_socket, "config1:0-3"),
+ NULL,
+};
+
+static struct attribute *generic_pmu_format_attrs[] = {
+ ARM_CSPMU_FORMAT_EVENT_ATTR,
+ ARM_CSPMU_FORMAT_FILTER_ATTR,
+ NULL,
+};
+
+static struct attribute **
+nv_cspmu_get_event_attrs(const struct arm_cspmu *cspmu)
+{
+ const struct nv_cspmu_ctx *ctx = to_nv_cspmu_ctx(cspmu);
+
+ return ctx->event_attr;
+}
+
+static struct attribute **
+nv_cspmu_get_format_attrs(const struct arm_cspmu *cspmu)
+{
+ const struct nv_cspmu_ctx *ctx = to_nv_cspmu_ctx(cspmu);
+
+ return ctx->format_attr;
+}
+
+static const char *
+nv_cspmu_get_name(const struct arm_cspmu *cspmu)
+{
+ const struct nv_cspmu_ctx *ctx = to_nv_cspmu_ctx(cspmu);
+
+ return ctx->name;
+}
+
+static u32 nv_cspmu_event_filter(const struct perf_event *event)
+{
+ const struct nv_cspmu_ctx *ctx =
+ to_nv_cspmu_ctx(to_arm_cspmu(event->pmu));
+
+ if (ctx->filter_mask == 0)
+ return ctx->filter_default_val;
+
+ return event->attr.config1 & ctx->filter_mask;
+}
+
+enum nv_cspmu_name_fmt {
+ NAME_FMT_GENERIC,
+ NAME_FMT_SOCKET
+};
+
+struct nv_cspmu_match {
+ u32 prodid;
+ u32 prodid_mask;
+ u64 filter_mask;
+ u32 filter_default_val;
+ const char *name_pattern;
+ enum nv_cspmu_name_fmt name_fmt;
+ struct attribute **event_attr;
+ struct attribute **format_attr;
+};
+
+static const struct nv_cspmu_match nv_cspmu_match[] = {
+ {
+ .prodid = 0x103,
+ .prodid_mask = NV_PRODID_MASK,
+ .filter_mask = NV_PCIE_FILTER_ID_MASK,
+ .filter_default_val = NV_PCIE_FILTER_ID_MASK,
+ .name_pattern = "nvidia_pcie_pmu_%u",
+ .name_fmt = NAME_FMT_SOCKET,
+ .event_attr = mcf_pmu_event_attrs,
+ .format_attr = pcie_pmu_format_attrs
+ },
+ {
+ .prodid = 0x104,
+ .prodid_mask = NV_PRODID_MASK,
+ .filter_mask = 0x0,
+ .filter_default_val = NV_NVL_C2C_FILTER_ID_MASK,
+ .name_pattern = "nvidia_nvlink_c2c1_pmu_%u",
+ .name_fmt = NAME_FMT_SOCKET,
+ .event_attr = mcf_pmu_event_attrs,
+ .format_attr = nvlink_c2c_pmu_format_attrs
+ },
+ {
+ .prodid = 0x105,
+ .prodid_mask = NV_PRODID_MASK,
+ .filter_mask = 0x0,
+ .filter_default_val = NV_NVL_C2C_FILTER_ID_MASK,
+ .name_pattern = "nvidia_nvlink_c2c0_pmu_%u",
+ .name_fmt = NAME_FMT_SOCKET,
+ .event_attr = mcf_pmu_event_attrs,
+ .format_attr = nvlink_c2c_pmu_format_attrs
+ },
+ {
+ .prodid = 0x106,
+ .prodid_mask = NV_PRODID_MASK,
+ .filter_mask = NV_CNVL_FILTER_ID_MASK,
+ .filter_default_val = NV_CNVL_FILTER_ID_MASK,
+ .name_pattern = "nvidia_cnvlink_pmu_%u",
+ .name_fmt = NAME_FMT_SOCKET,
+ .event_attr = mcf_pmu_event_attrs,
+ .format_attr = cnvlink_pmu_format_attrs
+ },
+ {
+ .prodid = 0x2CF,
+ .prodid_mask = NV_PRODID_MASK,
+ .filter_mask = 0x0,
+ .filter_default_val = 0x0,
+ .name_pattern = "nvidia_scf_pmu_%u",
+ .name_fmt = NAME_FMT_SOCKET,
+ .event_attr = scf_pmu_event_attrs,
+ .format_attr = scf_pmu_format_attrs
+ },
+ {
+ .prodid = 0,
+ .prodid_mask = 0,
+ .filter_mask = NV_GENERIC_FILTER_ID_MASK,
+ .filter_default_val = NV_GENERIC_FILTER_ID_MASK,
+ .name_pattern = "nvidia_uncore_pmu_%u",
+ .name_fmt = NAME_FMT_GENERIC,
+ .event_attr = generic_pmu_event_attrs,
+ .format_attr = generic_pmu_format_attrs
+ },
+};
+
+static char *nv_cspmu_format_name(const struct arm_cspmu *cspmu,
+ const struct nv_cspmu_match *match)
+{
+ char *name;
+ struct device *dev = cspmu->dev;
+
+ static atomic_t pmu_generic_idx = {0};
+
+ switch (match->name_fmt) {
+ case NAME_FMT_SOCKET: {
+ const int cpu = cpumask_first(&cspmu->associated_cpus);
+ const int socket = cpu_to_node(cpu);
+
+ name = devm_kasprintf(dev, GFP_KERNEL, match->name_pattern,
+ socket);
+ break;
+ }
+ case NAME_FMT_GENERIC:
+ name = devm_kasprintf(dev, GFP_KERNEL, match->name_pattern,
+ atomic_fetch_inc(&pmu_generic_idx));
+ break;
+ default:
+ name = NULL;
+ break;
+ }
+
+ return name;
+}
+
+int nv_cspmu_init_ops(struct arm_cspmu *cspmu)
+{
+ u32 prodid;
+ struct nv_cspmu_ctx *ctx;
+ struct device *dev = cspmu->dev;
+ struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
+ const struct nv_cspmu_match *match = nv_cspmu_match;
+
+ ctx = devm_kzalloc(dev, sizeof(struct nv_cspmu_ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ prodid = FIELD_GET(ARM_CSPMU_PMIIDR_PRODUCTID, cspmu->impl.pmiidr);
+
+ /* Find matching PMU. */
+ for (; match->prodid; match++) {
+ const u32 prodid_mask = match->prodid_mask;
+
+ if ((match->prodid & prodid_mask) == (prodid & prodid_mask))
+ break;
+ }
+
+ ctx->name = nv_cspmu_format_name(cspmu, match);
+ ctx->filter_mask = match->filter_mask;
+ ctx->filter_default_val = match->filter_default_val;
+ ctx->event_attr = match->event_attr;
+ ctx->format_attr = match->format_attr;
+
+ cspmu->impl.ctx = ctx;
+
+ /* NVIDIA specific callbacks. */
+ impl_ops->event_filter = nv_cspmu_event_filter;
+ impl_ops->get_event_attrs = nv_cspmu_get_event_attrs;
+ impl_ops->get_format_attrs = nv_cspmu_get_format_attrs;
+ impl_ops->get_name = nv_cspmu_get_name;
+
+ /* Set others to NULL to use default callback. */
+ impl_ops->event_type = NULL;
+ impl_ops->event_attr_is_visible = NULL;
+ impl_ops->get_identifier = NULL;
+ impl_ops->is_cycle_counter_event = NULL;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nv_cspmu_init_ops);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/arm_cspmu/nvidia_cspmu.h b/drivers/perf/arm_cspmu/nvidia_cspmu.h
new file mode 100644
index 000000000000..71e18f0dc50b
--- /dev/null
+++ b/drivers/perf/arm_cspmu/nvidia_cspmu.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ *
+ */
+
+/* Support for NVIDIA specific attributes. */
+
+#ifndef __NVIDIA_CSPMU_H__
+#define __NVIDIA_CSPMU_H__
+
+#include "arm_cspmu.h"
+
+/* Allocate NVIDIA descriptor. */
+int nv_cspmu_init_ops(struct arm_cspmu *cspmu);
+
+#endif /* __NVIDIA_CSPMU_H__ */
diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c
index 280a6ae3e27c..54aa4658fb36 100644
--- a/drivers/perf/arm_dmc620_pmu.c
+++ b/drivers/perf/arm_dmc620_pmu.c
@@ -725,6 +725,8 @@ static struct platform_driver dmc620_pmu_driver = {
static int __init dmc620_pmu_init(void)
{
+ int ret;
+
cpuhp_state_num = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
DMC620_DRVNAME,
NULL,
@@ -732,7 +734,11 @@ static int __init dmc620_pmu_init(void)
if (cpuhp_state_num < 0)
return cpuhp_state_num;
- return platform_driver_register(&dmc620_pmu_driver);
+ ret = platform_driver_register(&dmc620_pmu_driver);
+ if (ret)
+ cpuhp_remove_multi_state(cpuhp_state_num);
+
+ return ret;
}
static void __exit dmc620_pmu_exit(void)
diff --git a/drivers/perf/arm_dsu_pmu.c b/drivers/perf/arm_dsu_pmu.c
index 4a15c86f45ef..fe2abb412c00 100644
--- a/drivers/perf/arm_dsu_pmu.c
+++ b/drivers/perf/arm_dsu_pmu.c
@@ -858,7 +858,11 @@ static int __init dsu_pmu_init(void)
if (ret < 0)
return ret;
dsu_pmu_cpuhp_state = ret;
- return platform_driver_register(&dsu_pmu_driver);
+ ret = platform_driver_register(&dsu_pmu_driver);
+ if (ret)
+ cpuhp_remove_multi_state(dsu_pmu_cpuhp_state);
+
+ return ret;
}
static void __exit dsu_pmu_exit(void)
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 82a6d22e8ee2..bb56676f50ef 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -514,9 +514,6 @@ static int armpmu_event_init(struct perf_event *event)
if (has_branch_stack(event))
return -EOPNOTSUPP;
- if (armpmu->map_event(event) == -ENOENT)
- return -ENOENT;
-
return __hw_perf_event_init(event);
}
diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c
index 00d4c45a8017..25a269d431e4 100644
--- a/drivers/perf/arm_smmuv3_pmu.c
+++ b/drivers/perf/arm_smmuv3_pmu.c
@@ -959,6 +959,8 @@ static struct platform_driver smmu_pmu_driver = {
static int __init arm_smmu_pmu_init(void)
{
+ int ret;
+
cpuhp_state_num = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
"perf/arm/pmcg:online",
NULL,
@@ -966,7 +968,11 @@ static int __init arm_smmu_pmu_init(void)
if (cpuhp_state_num < 0)
return cpuhp_state_num;
- return platform_driver_register(&smmu_pmu_driver);
+ ret = platform_driver_register(&smmu_pmu_driver);
+ if (ret)
+ cpuhp_remove_multi_state(cpuhp_state_num);
+
+ return ret;
}
module_init(arm_smmu_pmu_init);
diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c
index 21771708597d..6fee0b6e163b 100644
--- a/drivers/perf/hisilicon/hisi_pcie_pmu.c
+++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c
@@ -47,10 +47,14 @@
#define HISI_PCIE_EVENT_M GENMASK_ULL(15, 0)
#define HISI_PCIE_THR_MODE_M GENMASK_ULL(27, 27)
#define HISI_PCIE_THR_M GENMASK_ULL(31, 28)
+#define HISI_PCIE_LEN_M GENMASK_ULL(35, 34)
#define HISI_PCIE_TARGET_M GENMASK_ULL(52, 36)
#define HISI_PCIE_TRIG_MODE_M GENMASK_ULL(53, 53)
#define HISI_PCIE_TRIG_M GENMASK_ULL(59, 56)
+/* Default config of TLP length mode, will count both TLP headers and payloads */
+#define HISI_PCIE_LEN_M_DEFAULT 3ULL
+
#define HISI_PCIE_MAX_COUNTERS 8
#define HISI_PCIE_REG_STEP 8
#define HISI_PCIE_THR_MAX_VAL 10
@@ -91,6 +95,7 @@ HISI_PCIE_PMU_FILTER_ATTR(thr_len, config1, 3, 0);
HISI_PCIE_PMU_FILTER_ATTR(thr_mode, config1, 4, 4);
HISI_PCIE_PMU_FILTER_ATTR(trig_len, config1, 8, 5);
HISI_PCIE_PMU_FILTER_ATTR(trig_mode, config1, 9, 9);
+HISI_PCIE_PMU_FILTER_ATTR(len_mode, config1, 11, 10);
HISI_PCIE_PMU_FILTER_ATTR(port, config2, 15, 0);
HISI_PCIE_PMU_FILTER_ATTR(bdf, config2, 31, 16);
@@ -215,8 +220,8 @@ static void hisi_pcie_pmu_config_filter(struct perf_event *event)
{
struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
+ u64 port, trig_len, thr_len, len_mode;
u64 reg = HISI_PCIE_INIT_SET;
- u64 port, trig_len, thr_len;
/* Config HISI_PCIE_EVENT_CTRL according to event. */
reg |= FIELD_PREP(HISI_PCIE_EVENT_M, hisi_pcie_get_real_event(event));
@@ -245,6 +250,12 @@ static void hisi_pcie_pmu_config_filter(struct perf_event *event)
reg |= HISI_PCIE_THR_EN;
}
+ len_mode = hisi_pcie_get_len_mode(event);
+ if (len_mode)
+ reg |= FIELD_PREP(HISI_PCIE_LEN_M, len_mode);
+ else
+ reg |= FIELD_PREP(HISI_PCIE_LEN_M, HISI_PCIE_LEN_M_DEFAULT);
+
hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_EVENT_CTRL, hwc->idx, reg);
}
@@ -693,10 +704,10 @@ static struct attribute *hisi_pcie_pmu_events_attr[] = {
HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_cnt, 0x10210),
HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_latency, 0x0011),
HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_cnt, 0x10011),
- HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_flux, 0x1005),
- HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_time, 0x11005),
- HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_flux, 0x2004),
- HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_time, 0x12004),
+ HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_flux, 0x0804),
+ HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_time, 0x10804),
+ HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_flux, 0x0405),
+ HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_time, 0x10405),
NULL
};
@@ -711,6 +722,7 @@ static struct attribute *hisi_pcie_pmu_format_attr[] = {
HISI_PCIE_PMU_FORMAT_ATTR(thr_mode, "config1:4"),
HISI_PCIE_PMU_FORMAT_ATTR(trig_len, "config1:5-8"),
HISI_PCIE_PMU_FORMAT_ATTR(trig_mode, "config1:9"),
+ HISI_PCIE_PMU_FORMAT_ATTR(len_mode, "config1:10-11"),
HISI_PCIE_PMU_FORMAT_ATTR(port, "config2:0-15"),
HISI_PCIE_PMU_FORMAT_ATTR(bdf, "config2:16-31"),
NULL
diff --git a/drivers/perf/marvell_cn10k_tad_pmu.c b/drivers/perf/marvell_cn10k_tad_pmu.c
index 69c3050a4348..a1166afb3702 100644
--- a/drivers/perf/marvell_cn10k_tad_pmu.c
+++ b/drivers/perf/marvell_cn10k_tad_pmu.c
@@ -408,7 +408,11 @@ static int __init tad_pmu_init(void)
if (ret < 0)
return ret;
tad_pmu_cpuhp_state = ret;
- return platform_driver_register(&tad_pmu_driver);
+ ret = platform_driver_register(&tad_pmu_driver);
+ if (ret)
+ cpuhp_remove_multi_state(tad_pmu_cpuhp_state);
+
+ return ret;
}
static void __exit tad_pmu_exit(void)