summaryrefslogtreecommitdiff
path: root/drivers/perf
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/perf')
-rw-r--r--drivers/perf/Kconfig20
-rw-r--r--drivers/perf/Makefile2
-rw-r--r--drivers/perf/arm-ccn.c2
-rw-r--r--drivers/perf/arm-cmn.c29
-rw-r--r--drivers/perf/arm-ni.c153
-rw-r--r--drivers/perf/arm_brbe.c805
-rw-r--r--drivers/perf/arm_brbe.h47
-rw-r--r--drivers/perf/arm_pmu.c16
-rw-r--r--drivers/perf/arm_pmuv3.c134
-rw-r--r--drivers/perf/arm_spe_pmu.c132
-rw-r--r--drivers/perf/cxl_pmu.c12
-rw-r--r--drivers/perf/dwc_pcie_pmu.c161
-rw-r--r--drivers/perf/fsl_imx9_ddr_perf.c14
-rw-r--r--drivers/perf/fujitsu_uncore_pmu.c613
-rw-r--r--drivers/perf/hisilicon/Makefile3
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c354
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_hha_pmu.c6
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c528
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_mn_pmu.c411
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_noc_pmu.c443
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_pa_pmu.c2
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_pmu.c16
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_pmu.h8
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c220
-rw-r--r--drivers/perf/riscv_pmu_sbi.c8
25 files changed, 3635 insertions, 504 deletions
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index 278c929dc87a..638321fc9800 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -178,6 +178,15 @@ config FSL_IMX9_DDR_PMU
can give information about memory throughput and other related
events.
+config FUJITSU_UNCORE_PMU
+ tristate "Fujitsu Uncore PMU"
+ depends on (ARM64 && ACPI) || (COMPILE_TEST && 64BIT)
+ help
+ Provides support for the Uncore performance monitor unit (PMU)
+ in Fujitsu processors.
+ Adds the Uncore PMU into the perf events subsystem for
+ monitoring Uncore events.
+
config QCOM_L2_PMU
bool "Qualcomm Technologies L2-cache PMU"
depends on ARCH_QCOM && ARM64 && ACPI
@@ -223,6 +232,17 @@ config ARM_SPE_PMU
Extension, which provides periodic sampling of operations in
the CPU pipeline and reports this via the perf AUX interface.
+config ARM64_BRBE
+ bool "Enable support for branch stack sampling using FEAT_BRBE"
+ depends on ARM_PMUV3 && ARM64
+ default y
+ help
+ Enable perf support for Branch Record Buffer Extension (BRBE) which
+ records all branches taken in an execution path. This supports some
+ branch types and privilege based filtering. It captures additional
+ relevant information such as cycle count, misprediction and branch
+ type, branch privilege level etc.
+
config ARM_DMC620_PMU
tristate "Enable PMU support for the ARM DMC-620 memory controller"
depends on (ARM64 && ACPI) || COMPILE_TEST
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index de71d2574857..ea52711a87e3 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_ARM_XSCALE_PMU) += arm_xscale_pmu.o
obj-$(CONFIG_ARM_SMMU_V3_PMU) += arm_smmuv3_pmu.o
obj-$(CONFIG_FSL_IMX8_DDR_PMU) += fsl_imx8_ddr_perf.o
obj-$(CONFIG_FSL_IMX9_DDR_PMU) += fsl_imx9_ddr_perf.o
+obj-$(CONFIG_FUJITSU_UNCORE_PMU) += fujitsu_uncore_pmu.o
obj-$(CONFIG_HISI_PMU) += hisilicon/
obj-$(CONFIG_QCOM_L2_PMU) += qcom_l2_pmu.o
obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
@@ -23,6 +24,7 @@ obj-$(CONFIG_STARFIVE_STARLINK_PMU) += starfive_starlink_pmu.o
obj-$(CONFIG_THUNDERX2_PMU) += thunderx2_pmu.o
obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o
+obj-$(CONFIG_ARM64_BRBE) += arm_brbe.o
obj-$(CONFIG_ARM_DMC620_PMU) += arm_dmc620_pmu.o
obj-$(CONFIG_MARVELL_CN10K_TAD_PMU) += marvell_cn10k_tad_pmu.o
obj-$(CONFIG_MARVELL_CN10K_DDR_PMU) += marvell_cn10k_ddr_pmu.o
diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c
index 1a0d0e1a2263..8af3563fdf60 100644
--- a/drivers/perf/arm-ccn.c
+++ b/drivers/perf/arm-ccn.c
@@ -565,7 +565,7 @@ module_param_named(pmu_poll_period_us, arm_ccn_pmu_poll_period_us, uint,
static ktime_t arm_ccn_pmu_timer_period(void)
{
- return ns_to_ktime((u64)arm_ccn_pmu_poll_period_us * 1000);
+ return us_to_ktime((u64)arm_ccn_pmu_poll_period_us);
}
diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index 031d45d0fe3d..23245352a3fc 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (C) 2016-2020 Arm Limited
-// CMN-600 Coherent Mesh Network PMU driver
+// ARM CMN/CI interconnect PMU driver
#include <linux/acpi.h>
#include <linux/bitfield.h>
@@ -65,7 +65,7 @@
/* PMU registers occupy the 3rd 4KB page of each node's region */
#define CMN_PMU_OFFSET 0x2000
/* ...except when they don't :( */
-#define CMN_S3_DTM_OFFSET 0xa000
+#define CMN_S3_R1_DTM_OFFSET 0xa000
#define CMN_S3_PMU_OFFSET 0xd900
/* For most nodes, this is all there is */
@@ -233,6 +233,9 @@ enum cmn_revision {
REV_CMN700_R1P0,
REV_CMN700_R2P0,
REV_CMN700_R3P0,
+ REV_CMNS3_R0P0 = 0,
+ REV_CMNS3_R0P1,
+ REV_CMNS3_R1P0,
REV_CI700_R0P0 = 0,
REV_CI700_R1P0,
REV_CI700_R2P0,
@@ -425,8 +428,8 @@ static enum cmn_model arm_cmn_model(const struct arm_cmn *cmn)
static int arm_cmn_pmu_offset(const struct arm_cmn *cmn, const struct arm_cmn_node *dn)
{
if (cmn->part == PART_CMN_S3) {
- if (dn->type == CMN_TYPE_XP)
- return CMN_S3_DTM_OFFSET;
+ if (cmn->rev >= REV_CMNS3_R1P0 && dn->type == CMN_TYPE_XP)
+ return CMN_S3_R1_DTM_OFFSET;
return CMN_S3_PMU_OFFSET;
}
return CMN_PMU_OFFSET;
@@ -2245,12 +2248,11 @@ static enum cmn_node_type arm_cmn_subtype(enum cmn_node_type type)
static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
{
- void __iomem *cfg_region;
+ void __iomem *cfg_region, __iomem *xp_region;
struct arm_cmn_node cfg, *dn;
struct arm_cmn_dtm *dtm;
enum cmn_part part;
u16 child_count, child_poff;
- u32 xp_offset[CMN_MAX_XPS];
u64 reg;
int i, j;
size_t sz;
@@ -2302,11 +2304,12 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
cmn->num_dns = cmn->num_xps;
/* Pass 1: visit the XPs, enumerate their children */
+ cfg_region += child_poff;
for (i = 0; i < cmn->num_xps; i++) {
- reg = readq_relaxed(cfg_region + child_poff + i * 8);
- xp_offset[i] = reg & CMN_CHILD_NODE_ADDR;
+ reg = readq_relaxed(cfg_region + i * 8);
+ xp_region = cmn->base + (reg & CMN_CHILD_NODE_ADDR);
- reg = readq_relaxed(cmn->base + xp_offset[i] + CMN_CHILD_INFO);
+ reg = readq_relaxed(xp_region + CMN_CHILD_INFO);
cmn->num_dns += FIELD_GET(CMN_CI_CHILD_COUNT, reg);
}
@@ -2332,11 +2335,12 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
cmn->dns = dn;
cmn->dtms = dtm;
for (i = 0; i < cmn->num_xps; i++) {
- void __iomem *xp_region = cmn->base + xp_offset[i];
struct arm_cmn_node *xp = dn++;
unsigned int xp_ports = 0;
- arm_cmn_init_node_info(cmn, xp_offset[i], xp);
+ reg = readq_relaxed(cfg_region + i * 8);
+ xp_region = cmn->base + (reg & CMN_CHILD_NODE_ADDR);
+ arm_cmn_init_node_info(cmn, reg & CMN_CHILD_NODE_ADDR, xp);
/*
* Thanks to the order in which XP logical IDs seem to be
* assigned, we can handily infer the mesh X dimension by
@@ -2655,6 +2659,7 @@ static struct platform_driver arm_cmn_driver = {
.name = "arm-cmn",
.of_match_table = of_match_ptr(arm_cmn_of_match),
.acpi_match_table = ACPI_PTR(arm_cmn_acpi_match),
+ .suppress_bind_attrs = true,
},
.probe = arm_cmn_probe,
.remove = arm_cmn_remove,
@@ -2693,5 +2698,5 @@ module_init(arm_cmn_init);
module_exit(arm_cmn_exit);
MODULE_AUTHOR("Robin Murphy <robin.murphy@arm.com>");
-MODULE_DESCRIPTION("Arm CMN-600 PMU driver");
+MODULE_DESCRIPTION("Arm CMN/CI interconnect PMU driver");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/arm-ni.c b/drivers/perf/arm-ni.c
index de7b6cce4d68..1615a0564031 100644
--- a/drivers/perf/arm-ni.c
+++ b/drivers/perf/arm-ni.c
@@ -102,10 +102,9 @@ struct arm_ni_unit {
struct arm_ni_cd {
void __iomem *pmu_base;
u16 id;
+ s8 irq_friend;
int num_units;
int irq;
- int cpu;
- struct hlist_node cpuhp_node;
struct pmu pmu;
struct arm_ni_unit *units;
struct perf_event *evcnt[NI_NUM_COUNTERS];
@@ -117,13 +116,18 @@ struct arm_ni {
void __iomem *base;
enum ni_part part;
int id;
+ int cpu;
int num_cds;
+ struct hlist_node cpuhp_node;
struct arm_ni_cd cds[] __counted_by(num_cds);
};
#define cd_to_ni(cd) container_of((cd), struct arm_ni, cds[(cd)->id])
#define pmu_to_cd(p) container_of((p), struct arm_ni_cd, pmu)
+#define ni_for_each_cd(n, c) \
+ for (struct arm_ni_cd *c = n->cds; c < n->cds + n->num_cds; c++) if (c->pmu_base)
+
#define cd_for_each_unit(cd, u) \
for (struct arm_ni_unit *u = cd->units; u < cd->units + cd->num_units; u++)
@@ -218,9 +222,9 @@ static const struct attribute_group arm_ni_format_attrs_group = {
static ssize_t arm_ni_cpumask_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- struct arm_ni_cd *cd = pmu_to_cd(dev_get_drvdata(dev));
+ struct arm_ni *ni = cd_to_ni(pmu_to_cd(dev_get_drvdata(dev)));
- return cpumap_print_to_pagebuf(true, buf, cpumask_of(cd->cpu));
+ return cpumap_print_to_pagebuf(true, buf, cpumask_of(ni->cpu));
}
static struct device_attribute arm_ni_cpumask_attr =
@@ -314,7 +318,7 @@ static int arm_ni_event_init(struct perf_event *event)
if (is_sampling_event(event))
return -EINVAL;
- event->cpu = cd->cpu;
+ event->cpu = cd_to_ni(cd)->cpu;
if (NI_EVENT_TYPE(event) == NI_PMU)
return arm_ni_validate_group(event);
@@ -445,33 +449,37 @@ static irqreturn_t arm_ni_handle_irq(int irq, void *dev_id)
{
struct arm_ni_cd *cd = dev_id;
irqreturn_t ret = IRQ_NONE;
- u32 reg = readl_relaxed(cd->pmu_base + NI_PMOVSCLR);
- if (reg & (1U << NI_CCNT_IDX)) {
- ret = IRQ_HANDLED;
- if (!(WARN_ON(!cd->ccnt))) {
- arm_ni_event_read(cd->ccnt);
- arm_ni_init_ccnt(cd);
+ for (;;) {
+ u32 reg = readl_relaxed(cd->pmu_base + NI_PMOVSCLR);
+
+ if (reg & (1U << NI_CCNT_IDX)) {
+ ret = IRQ_HANDLED;
+ if (!(WARN_ON(!cd->ccnt))) {
+ arm_ni_event_read(cd->ccnt);
+ arm_ni_init_ccnt(cd);
+ }
}
- }
- for (int i = 0; i < NI_NUM_COUNTERS; i++) {
- if (!(reg & (1U << i)))
- continue;
- ret = IRQ_HANDLED;
- if (!(WARN_ON(!cd->evcnt[i]))) {
- arm_ni_event_read(cd->evcnt[i]);
- arm_ni_init_evcnt(cd, i);
+ for (int i = 0; i < NI_NUM_COUNTERS; i++) {
+ if (!(reg & (1U << i)))
+ continue;
+ ret = IRQ_HANDLED;
+ if (!(WARN_ON(!cd->evcnt[i]))) {
+ arm_ni_event_read(cd->evcnt[i]);
+ arm_ni_init_evcnt(cd, i);
+ }
}
+ writel_relaxed(reg, cd->pmu_base + NI_PMOVSCLR);
+ if (!cd->irq_friend)
+ return ret;
+ cd += cd->irq_friend;
}
- writel_relaxed(reg, cd->pmu_base + NI_PMOVSCLR);
- return ret;
}
static int arm_ni_init_cd(struct arm_ni *ni, struct arm_ni_node *node, u64 res_start)
{
struct arm_ni_cd *cd = ni->cds + node->id;
const char *name;
- int err;
cd->id = node->id;
cd->num_units = node->num_components;
@@ -531,19 +539,11 @@ static int arm_ni_init_cd(struct arm_ni *ni, struct arm_ni_node *node, u64 res_s
cd->pmu_base + NI_PMCR);
writel_relaxed(U32_MAX, cd->pmu_base + NI_PMCNTENCLR);
writel_relaxed(U32_MAX, cd->pmu_base + NI_PMOVSCLR);
- writel_relaxed(U32_MAX, cd->pmu_base + NI_PMINTENSET);
cd->irq = platform_get_irq(to_platform_device(ni->dev), cd->id);
if (cd->irq < 0)
return cd->irq;
- err = devm_request_irq(ni->dev, cd->irq, arm_ni_handle_irq,
- IRQF_NOBALANCING | IRQF_NO_THREAD,
- dev_name(ni->dev), cd);
- if (err)
- return err;
-
- cd->cpu = cpumask_local_spread(0, dev_to_node(ni->dev));
cd->pmu = (struct pmu) {
.module = THIS_MODULE,
.parent = ni->dev,
@@ -564,32 +564,19 @@ static int arm_ni_init_cd(struct arm_ni *ni, struct arm_ni_node *node, u64 res_s
if (!name)
return -ENOMEM;
- err = cpuhp_state_add_instance_nocalls(arm_ni_hp_state, &cd->cpuhp_node);
- if (err)
- return err;
-
- err = perf_pmu_register(&cd->pmu, name, -1);
- if (err)
- cpuhp_state_remove_instance_nocalls(arm_ni_hp_state, &cd->cpuhp_node);
-
- return err;
+ return perf_pmu_register(&cd->pmu, name, -1);
}
static void arm_ni_remove(struct platform_device *pdev)
{
struct arm_ni *ni = platform_get_drvdata(pdev);
- for (int i = 0; i < ni->num_cds; i++) {
- struct arm_ni_cd *cd = ni->cds + i;
-
- if (!cd->pmu_base)
- continue;
-
+ ni_for_each_cd(ni, cd) {
writel_relaxed(0, cd->pmu_base + NI_PMCR);
writel_relaxed(U32_MAX, cd->pmu_base + NI_PMINTENCLR);
perf_pmu_unregister(&cd->pmu);
- cpuhp_state_remove_instance_nocalls(arm_ni_hp_state, &cd->cpuhp_node);
}
+ cpuhp_state_remove_instance_nocalls(arm_ni_hp_state, &ni->cpuhp_node);
}
static void arm_ni_probe_domain(void __iomem *base, struct arm_ni_node *node)
@@ -602,6 +589,34 @@ static void arm_ni_probe_domain(void __iomem *base, struct arm_ni_node *node)
node->num_components = readl_relaxed(base + NI_CHILD_NODE_INFO);
}
+static int arm_ni_init_irqs(struct arm_ni *ni)
+{
+ int err;
+
+ ni_for_each_cd(ni, cd) {
+ for (struct arm_ni_cd *prev = cd; prev-- > ni->cds; ) {
+ if (prev->irq == cd->irq) {
+ prev->irq_friend = cd - prev;
+ goto set_inten;
+ }
+ }
+ err = devm_request_irq(ni->dev, cd->irq, arm_ni_handle_irq,
+ IRQF_NOBALANCING | IRQF_NO_THREAD | IRQF_NO_AUTOEN,
+ dev_name(ni->dev), cd);
+ if (err)
+ return err;
+
+ irq_set_affinity(cd->irq, cpumask_of(ni->cpu));
+set_inten:
+ writel_relaxed(U32_MAX, cd->pmu_base + NI_PMINTENSET);
+ }
+
+ ni_for_each_cd(ni, cd)
+ if (!cd->irq_friend)
+ enable_irq(cd->irq);
+ return 0;
+}
+
static int arm_ni_probe(struct platform_device *pdev)
{
struct arm_ni_node cfg, vd, pd, cd;
@@ -609,7 +624,7 @@ static int arm_ni_probe(struct platform_device *pdev)
struct resource *res;
void __iomem *base;
static atomic_t id;
- int num_cds;
+ int ret, num_cds;
u32 reg, part;
/*
@@ -660,8 +675,13 @@ static int arm_ni_probe(struct platform_device *pdev)
ni->num_cds = num_cds;
ni->part = part;
ni->id = atomic_fetch_inc(&id);
+ ni->cpu = cpumask_local_spread(0, dev_to_node(ni->dev));
platform_set_drvdata(pdev, ni);
+ ret = cpuhp_state_add_instance_nocalls(arm_ni_hp_state, &ni->cpuhp_node);
+ if (ret)
+ return ret;
+
for (int v = 0; v < cfg.num_components; v++) {
reg = readl_relaxed(cfg.base + NI_CHILD_PTR(v));
arm_ni_probe_domain(base + reg, &vd);
@@ -669,8 +689,6 @@ static int arm_ni_probe(struct platform_device *pdev)
reg = readl_relaxed(vd.base + NI_CHILD_PTR(p));
arm_ni_probe_domain(base + reg, &pd);
for (int c = 0; c < pd.num_components; c++) {
- int ret;
-
reg = readl_relaxed(pd.base + NI_CHILD_PTR(c));
arm_ni_probe_domain(base + reg, &cd);
ret = arm_ni_init_cd(ni, &cd, res->start);
@@ -683,7 +701,11 @@ static int arm_ni_probe(struct platform_device *pdev)
}
}
- return 0;
+ ret = arm_ni_init_irqs(ni);
+ if (ret)
+ arm_ni_remove(pdev);
+
+ return ret;
}
#ifdef CONFIG_OF
@@ -707,47 +729,50 @@ static struct platform_driver arm_ni_driver = {
.name = "arm-ni",
.of_match_table = of_match_ptr(arm_ni_of_match),
.acpi_match_table = ACPI_PTR(arm_ni_acpi_match),
+ .suppress_bind_attrs = true,
},
.probe = arm_ni_probe,
.remove = arm_ni_remove,
};
-static void arm_ni_pmu_migrate(struct arm_ni_cd *cd, unsigned int cpu)
+static void arm_ni_pmu_migrate(struct arm_ni *ni, unsigned int cpu)
{
- perf_pmu_migrate_context(&cd->pmu, cd->cpu, cpu);
- irq_set_affinity(cd->irq, cpumask_of(cpu));
- cd->cpu = cpu;
+ ni_for_each_cd(ni, cd) {
+ perf_pmu_migrate_context(&cd->pmu, ni->cpu, cpu);
+ irq_set_affinity(cd->irq, cpumask_of(cpu));
+ }
+ ni->cpu = cpu;
}
static int arm_ni_pmu_online_cpu(unsigned int cpu, struct hlist_node *cpuhp_node)
{
- struct arm_ni_cd *cd;
+ struct arm_ni *ni;
int node;
- cd = hlist_entry_safe(cpuhp_node, struct arm_ni_cd, cpuhp_node);
- node = dev_to_node(cd_to_ni(cd)->dev);
- if (cpu_to_node(cd->cpu) != node && cpu_to_node(cpu) == node)
- arm_ni_pmu_migrate(cd, cpu);
+ ni = hlist_entry_safe(cpuhp_node, struct arm_ni, cpuhp_node);
+ node = dev_to_node(ni->dev);
+ if (cpu_to_node(ni->cpu) != node && cpu_to_node(cpu) == node)
+ arm_ni_pmu_migrate(ni, cpu);
return 0;
}
static int arm_ni_pmu_offline_cpu(unsigned int cpu, struct hlist_node *cpuhp_node)
{
- struct arm_ni_cd *cd;
+ struct arm_ni *ni;
unsigned int target;
int node;
- cd = hlist_entry_safe(cpuhp_node, struct arm_ni_cd, cpuhp_node);
- if (cpu != cd->cpu)
+ ni = hlist_entry_safe(cpuhp_node, struct arm_ni, cpuhp_node);
+ if (cpu != ni->cpu)
return 0;
- node = dev_to_node(cd_to_ni(cd)->dev);
+ node = dev_to_node(ni->dev);
target = cpumask_any_and_but(cpumask_of_node(node), cpu_online_mask, cpu);
if (target >= nr_cpu_ids)
target = cpumask_any_but(cpu_online_mask, cpu);
if (target < nr_cpu_ids)
- arm_ni_pmu_migrate(cd, target);
+ arm_ni_pmu_migrate(ni, target);
return 0;
}
diff --git a/drivers/perf/arm_brbe.c b/drivers/perf/arm_brbe.c
new file mode 100644
index 000000000000..ba554e0c846c
--- /dev/null
+++ b/drivers/perf/arm_brbe.c
@@ -0,0 +1,805 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Branch Record Buffer Extension Driver.
+ *
+ * Copyright (C) 2022-2025 ARM Limited
+ *
+ * Author: Anshuman Khandual <anshuman.khandual@arm.com>
+ */
+#include <linux/types.h>
+#include <linux/bitmap.h>
+#include <linux/perf/arm_pmu.h>
+#include "arm_brbe.h"
+
+#define BRBFCR_EL1_BRANCH_FILTERS (BRBFCR_EL1_DIRECT | \
+ BRBFCR_EL1_INDIRECT | \
+ BRBFCR_EL1_RTN | \
+ BRBFCR_EL1_INDCALL | \
+ BRBFCR_EL1_DIRCALL | \
+ BRBFCR_EL1_CONDDIR)
+
+/*
+ * BRBTS_EL1 is currently not used for branch stack implementation
+ * purpose but BRBCR_ELx.TS needs to have a valid value from all
+ * available options. BRBCR_ELx_TS_VIRTUAL is selected for this.
+ */
+#define BRBCR_ELx_DEFAULT_TS FIELD_PREP(BRBCR_ELx_TS_MASK, BRBCR_ELx_TS_VIRTUAL)
+
+/*
+ * BRBE Buffer Organization
+ *
+ * BRBE buffer is arranged as multiple banks of 32 branch record
+ * entries each. An individual branch record in a given bank could
+ * be accessed, after selecting the bank in BRBFCR_EL1.BANK and
+ * accessing the registers i.e [BRBSRC, BRBTGT, BRBINF] set with
+ * indices [0..31].
+ *
+ * Bank 0
+ *
+ * --------------------------------- ------
+ * | 00 | BRBSRC | BRBTGT | BRBINF | | 00 |
+ * --------------------------------- ------
+ * | 01 | BRBSRC | BRBTGT | BRBINF | | 01 |
+ * --------------------------------- ------
+ * | .. | BRBSRC | BRBTGT | BRBINF | | .. |
+ * --------------------------------- ------
+ * | 31 | BRBSRC | BRBTGT | BRBINF | | 31 |
+ * --------------------------------- ------
+ *
+ * Bank 1
+ *
+ * --------------------------------- ------
+ * | 32 | BRBSRC | BRBTGT | BRBINF | | 00 |
+ * --------------------------------- ------
+ * | 33 | BRBSRC | BRBTGT | BRBINF | | 01 |
+ * --------------------------------- ------
+ * | .. | BRBSRC | BRBTGT | BRBINF | | .. |
+ * --------------------------------- ------
+ * | 63 | BRBSRC | BRBTGT | BRBINF | | 31 |
+ * --------------------------------- ------
+ */
+#define BRBE_BANK_MAX_ENTRIES 32
+
+struct brbe_regset {
+ u64 brbsrc;
+ u64 brbtgt;
+ u64 brbinf;
+};
+
+#define PERF_BR_ARM64_MAX (PERF_BR_MAX + PERF_BR_NEW_MAX)
+
+struct brbe_hw_attr {
+ int brbe_version;
+ int brbe_cc;
+ int brbe_nr;
+ int brbe_format;
+};
+
+#define BRBE_REGN_CASE(n, case_macro) \
+ case n: case_macro(n); break
+
+#define BRBE_REGN_SWITCH(x, case_macro) \
+ do { \
+ switch (x) { \
+ BRBE_REGN_CASE(0, case_macro); \
+ BRBE_REGN_CASE(1, case_macro); \
+ BRBE_REGN_CASE(2, case_macro); \
+ BRBE_REGN_CASE(3, case_macro); \
+ BRBE_REGN_CASE(4, case_macro); \
+ BRBE_REGN_CASE(5, case_macro); \
+ BRBE_REGN_CASE(6, case_macro); \
+ BRBE_REGN_CASE(7, case_macro); \
+ BRBE_REGN_CASE(8, case_macro); \
+ BRBE_REGN_CASE(9, case_macro); \
+ BRBE_REGN_CASE(10, case_macro); \
+ BRBE_REGN_CASE(11, case_macro); \
+ BRBE_REGN_CASE(12, case_macro); \
+ BRBE_REGN_CASE(13, case_macro); \
+ BRBE_REGN_CASE(14, case_macro); \
+ BRBE_REGN_CASE(15, case_macro); \
+ BRBE_REGN_CASE(16, case_macro); \
+ BRBE_REGN_CASE(17, case_macro); \
+ BRBE_REGN_CASE(18, case_macro); \
+ BRBE_REGN_CASE(19, case_macro); \
+ BRBE_REGN_CASE(20, case_macro); \
+ BRBE_REGN_CASE(21, case_macro); \
+ BRBE_REGN_CASE(22, case_macro); \
+ BRBE_REGN_CASE(23, case_macro); \
+ BRBE_REGN_CASE(24, case_macro); \
+ BRBE_REGN_CASE(25, case_macro); \
+ BRBE_REGN_CASE(26, case_macro); \
+ BRBE_REGN_CASE(27, case_macro); \
+ BRBE_REGN_CASE(28, case_macro); \
+ BRBE_REGN_CASE(29, case_macro); \
+ BRBE_REGN_CASE(30, case_macro); \
+ BRBE_REGN_CASE(31, case_macro); \
+ default: WARN(1, "Invalid BRB* index %d\n", x); \
+ } \
+ } while (0)
+
+#define RETURN_READ_BRBSRCN(n) \
+ return read_sysreg_s(SYS_BRBSRC_EL1(n))
+static inline u64 get_brbsrc_reg(int idx)
+{
+ BRBE_REGN_SWITCH(idx, RETURN_READ_BRBSRCN);
+ return 0;
+}
+
+#define RETURN_READ_BRBTGTN(n) \
+ return read_sysreg_s(SYS_BRBTGT_EL1(n))
+static u64 get_brbtgt_reg(int idx)
+{
+ BRBE_REGN_SWITCH(idx, RETURN_READ_BRBTGTN);
+ return 0;
+}
+
+#define RETURN_READ_BRBINFN(n) \
+ return read_sysreg_s(SYS_BRBINF_EL1(n))
+static u64 get_brbinf_reg(int idx)
+{
+ BRBE_REGN_SWITCH(idx, RETURN_READ_BRBINFN);
+ return 0;
+}
+
+static u64 brbe_record_valid(u64 brbinf)
+{
+ return FIELD_GET(BRBINFx_EL1_VALID_MASK, brbinf);
+}
+
+static bool brbe_invalid(u64 brbinf)
+{
+ return brbe_record_valid(brbinf) == BRBINFx_EL1_VALID_NONE;
+}
+
+static bool brbe_record_is_complete(u64 brbinf)
+{
+ return brbe_record_valid(brbinf) == BRBINFx_EL1_VALID_FULL;
+}
+
+static bool brbe_record_is_source_only(u64 brbinf)
+{
+ return brbe_record_valid(brbinf) == BRBINFx_EL1_VALID_SOURCE;
+}
+
+static bool brbe_record_is_target_only(u64 brbinf)
+{
+ return brbe_record_valid(brbinf) == BRBINFx_EL1_VALID_TARGET;
+}
+
+static int brbinf_get_in_tx(u64 brbinf)
+{
+ return FIELD_GET(BRBINFx_EL1_T_MASK, brbinf);
+}
+
+static int brbinf_get_mispredict(u64 brbinf)
+{
+ return FIELD_GET(BRBINFx_EL1_MPRED_MASK, brbinf);
+}
+
+static int brbinf_get_lastfailed(u64 brbinf)
+{
+ return FIELD_GET(BRBINFx_EL1_LASTFAILED_MASK, brbinf);
+}
+
+static u16 brbinf_get_cycles(u64 brbinf)
+{
+ u32 exp, mant, cycles;
+ /*
+ * Captured cycle count is unknown and hence
+ * should not be passed on to userspace.
+ */
+ if (brbinf & BRBINFx_EL1_CCU)
+ return 0;
+
+ exp = FIELD_GET(BRBINFx_EL1_CC_EXP_MASK, brbinf);
+ mant = FIELD_GET(BRBINFx_EL1_CC_MANT_MASK, brbinf);
+
+ if (!exp)
+ return mant;
+
+ cycles = (mant | 0x100) << (exp - 1);
+
+ return min(cycles, U16_MAX);
+}
+
+static int brbinf_get_type(u64 brbinf)
+{
+ return FIELD_GET(BRBINFx_EL1_TYPE_MASK, brbinf);
+}
+
+static int brbinf_get_el(u64 brbinf)
+{
+ return FIELD_GET(BRBINFx_EL1_EL_MASK, brbinf);
+}
+
+void brbe_invalidate(void)
+{
+ /* Ensure all branches before this point are recorded */
+ isb();
+ asm volatile(BRB_IALL_INSN);
+ /* Ensure all branch records are invalidated after this point */
+ isb();
+}
+
+static bool valid_brbe_nr(int brbe_nr)
+{
+ return brbe_nr == BRBIDR0_EL1_NUMREC_8 ||
+ brbe_nr == BRBIDR0_EL1_NUMREC_16 ||
+ brbe_nr == BRBIDR0_EL1_NUMREC_32 ||
+ brbe_nr == BRBIDR0_EL1_NUMREC_64;
+}
+
+static bool valid_brbe_cc(int brbe_cc)
+{
+ return brbe_cc == BRBIDR0_EL1_CC_20_BIT;
+}
+
+static bool valid_brbe_format(int brbe_format)
+{
+ return brbe_format == BRBIDR0_EL1_FORMAT_FORMAT_0;
+}
+
+static bool valid_brbidr(u64 brbidr)
+{
+ int brbe_format, brbe_cc, brbe_nr;
+
+ brbe_format = FIELD_GET(BRBIDR0_EL1_FORMAT_MASK, brbidr);
+ brbe_cc = FIELD_GET(BRBIDR0_EL1_CC_MASK, brbidr);
+ brbe_nr = FIELD_GET(BRBIDR0_EL1_NUMREC_MASK, brbidr);
+
+ return valid_brbe_format(brbe_format) && valid_brbe_cc(brbe_cc) && valid_brbe_nr(brbe_nr);
+}
+
+static bool valid_brbe_version(int brbe_version)
+{
+ return brbe_version == ID_AA64DFR0_EL1_BRBE_IMP ||
+ brbe_version == ID_AA64DFR0_EL1_BRBE_BRBE_V1P1;
+}
+
+static void select_brbe_bank(int bank)
+{
+ u64 brbfcr;
+
+ brbfcr = read_sysreg_s(SYS_BRBFCR_EL1);
+ brbfcr &= ~BRBFCR_EL1_BANK_MASK;
+ brbfcr |= SYS_FIELD_PREP(BRBFCR_EL1, BANK, bank);
+ write_sysreg_s(brbfcr, SYS_BRBFCR_EL1);
+ /*
+ * Arm ARM (DDI 0487K.a) D.18.4 rule PPBZP requires explicit sync
+ * between setting BANK and accessing branch records.
+ */
+ isb();
+}
+
+static bool __read_brbe_regset(struct brbe_regset *entry, int idx)
+{
+ entry->brbinf = get_brbinf_reg(idx);
+
+ if (brbe_invalid(entry->brbinf))
+ return false;
+
+ entry->brbsrc = get_brbsrc_reg(idx);
+ entry->brbtgt = get_brbtgt_reg(idx);
+ return true;
+}
+
+/*
+ * Generic perf branch filters supported on BRBE
+ *
+ * New branch filters need to be evaluated whether they could be supported on
+ * BRBE. This ensures that such branch filters would not just be accepted, to
+ * fail silently. PERF_SAMPLE_BRANCH_HV is a special case that is selectively
+ * supported only on platforms where kernel is in hyp mode.
+ */
+#define BRBE_EXCLUDE_BRANCH_FILTERS (PERF_SAMPLE_BRANCH_ABORT_TX | \
+ PERF_SAMPLE_BRANCH_IN_TX | \
+ PERF_SAMPLE_BRANCH_NO_TX | \
+ PERF_SAMPLE_BRANCH_CALL_STACK | \
+ PERF_SAMPLE_BRANCH_COUNTERS)
+
+#define BRBE_ALLOWED_BRANCH_TYPES (PERF_SAMPLE_BRANCH_ANY | \
+ PERF_SAMPLE_BRANCH_ANY_CALL | \
+ PERF_SAMPLE_BRANCH_ANY_RETURN | \
+ PERF_SAMPLE_BRANCH_IND_CALL | \
+ PERF_SAMPLE_BRANCH_COND | \
+ PERF_SAMPLE_BRANCH_IND_JUMP | \
+ PERF_SAMPLE_BRANCH_CALL)
+
+
+#define BRBE_ALLOWED_BRANCH_FILTERS (PERF_SAMPLE_BRANCH_USER | \
+ PERF_SAMPLE_BRANCH_KERNEL | \
+ PERF_SAMPLE_BRANCH_HV | \
+ BRBE_ALLOWED_BRANCH_TYPES | \
+ PERF_SAMPLE_BRANCH_NO_FLAGS | \
+ PERF_SAMPLE_BRANCH_NO_CYCLES | \
+ PERF_SAMPLE_BRANCH_TYPE_SAVE | \
+ PERF_SAMPLE_BRANCH_HW_INDEX | \
+ PERF_SAMPLE_BRANCH_PRIV_SAVE)
+
+#define BRBE_PERF_BRANCH_FILTERS (BRBE_ALLOWED_BRANCH_FILTERS | \
+ BRBE_EXCLUDE_BRANCH_FILTERS)
+
+/*
+ * BRBE supports the following functional branch type filters while
+ * generating branch records. These branch filters can be enabled,
+ * either individually or as a group i.e ORing multiple filters
+ * with each other.
+ *
+ * BRBFCR_EL1_CONDDIR - Conditional direct branch
+ * BRBFCR_EL1_DIRCALL - Direct call
+ * BRBFCR_EL1_INDCALL - Indirect call
+ * BRBFCR_EL1_INDIRECT - Indirect branch
+ * BRBFCR_EL1_DIRECT - Direct branch
+ * BRBFCR_EL1_RTN - Subroutine return
+ */
+static u64 branch_type_to_brbfcr(int branch_type)
+{
+ u64 brbfcr = 0;
+
+ if (branch_type & PERF_SAMPLE_BRANCH_ANY) {
+ brbfcr |= BRBFCR_EL1_BRANCH_FILTERS;
+ return brbfcr;
+ }
+
+ if (branch_type & PERF_SAMPLE_BRANCH_ANY_CALL) {
+ brbfcr |= BRBFCR_EL1_INDCALL;
+ brbfcr |= BRBFCR_EL1_DIRCALL;
+ }
+
+ if (branch_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
+ brbfcr |= BRBFCR_EL1_RTN;
+
+ if (branch_type & PERF_SAMPLE_BRANCH_IND_CALL)
+ brbfcr |= BRBFCR_EL1_INDCALL;
+
+ if (branch_type & PERF_SAMPLE_BRANCH_COND)
+ brbfcr |= BRBFCR_EL1_CONDDIR;
+
+ if (branch_type & PERF_SAMPLE_BRANCH_IND_JUMP)
+ brbfcr |= BRBFCR_EL1_INDIRECT;
+
+ if (branch_type & PERF_SAMPLE_BRANCH_CALL)
+ brbfcr |= BRBFCR_EL1_DIRCALL;
+
+ return brbfcr;
+}
+
+/*
+ * BRBE supports the following privilege mode filters while generating
+ * branch records.
+ *
+ * BRBCR_ELx_E0BRE - EL0 branch records
+ * BRBCR_ELx_ExBRE - EL1/EL2 branch records
+ *
+ * BRBE also supports the following additional functional branch type
+ * filters while generating branch records.
+ *
+ * BRBCR_ELx_EXCEPTION - Exception
+ * BRBCR_ELx_ERTN - Exception return
+ */
+static u64 branch_type_to_brbcr(int branch_type)
+{
+ u64 brbcr = BRBCR_ELx_FZP | BRBCR_ELx_DEFAULT_TS;
+
+ if (branch_type & PERF_SAMPLE_BRANCH_USER)
+ brbcr |= BRBCR_ELx_E0BRE;
+
+ /*
+ * When running in the hyp mode, writing into BRBCR_EL1
+ * actually writes into BRBCR_EL2 instead. Field E2BRE
+ * is also at the same position as E1BRE.
+ */
+ if (branch_type & PERF_SAMPLE_BRANCH_KERNEL)
+ brbcr |= BRBCR_ELx_ExBRE;
+
+ if (branch_type & PERF_SAMPLE_BRANCH_HV) {
+ if (is_kernel_in_hyp_mode())
+ brbcr |= BRBCR_ELx_ExBRE;
+ }
+
+ if (!(branch_type & PERF_SAMPLE_BRANCH_NO_CYCLES))
+ brbcr |= BRBCR_ELx_CC;
+
+ if (!(branch_type & PERF_SAMPLE_BRANCH_NO_FLAGS))
+ brbcr |= BRBCR_ELx_MPRED;
+
+ /*
+ * The exception and exception return branches could be
+ * captured, irrespective of the perf event's privilege.
+ * If the perf event does not have enough privilege for
+ * a given exception level, then addresses which falls
+ * under that exception level will be reported as zero
+ * for the captured branch record, creating source only
+ * or target only records.
+ */
+ if (branch_type & PERF_SAMPLE_BRANCH_KERNEL) {
+ if (branch_type & PERF_SAMPLE_BRANCH_ANY) {
+ brbcr |= BRBCR_ELx_EXCEPTION;
+ brbcr |= BRBCR_ELx_ERTN;
+ }
+
+ if (branch_type & PERF_SAMPLE_BRANCH_ANY_CALL)
+ brbcr |= BRBCR_ELx_EXCEPTION;
+
+ if (branch_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
+ brbcr |= BRBCR_ELx_ERTN;
+ }
+ return brbcr;
+}
+
+bool brbe_branch_attr_valid(struct perf_event *event)
+{
+ u64 branch_type = event->attr.branch_sample_type;
+
+ /*
+ * Ensure both perf branch filter allowed and exclude
+ * masks are always in sync with the generic perf ABI.
+ */
+ BUILD_BUG_ON(BRBE_PERF_BRANCH_FILTERS != (PERF_SAMPLE_BRANCH_MAX - 1));
+
+ if (branch_type & BRBE_EXCLUDE_BRANCH_FILTERS) {
+ pr_debug("requested branch filter not supported 0x%llx\n", branch_type);
+ return false;
+ }
+
+ /* Ensure at least 1 branch type is enabled */
+ if (!(branch_type & BRBE_ALLOWED_BRANCH_TYPES)) {
+ pr_debug("no branch type enabled 0x%llx\n", branch_type);
+ return false;
+ }
+
+ /*
+ * No branches are recorded in guests nor nVHE hypervisors, so
+ * excluding the host or both kernel and user is invalid.
+ *
+ * Ideally we'd just require exclude_guest and exclude_hv, but setting
+ * event filters with perf for kernel or user don't set exclude_guest.
+ * So effectively, exclude_guest and exclude_hv are ignored.
+ */
+ if (event->attr.exclude_host || (event->attr.exclude_user && event->attr.exclude_kernel)) {
+ pr_debug("branch filter in hypervisor or guest only not supported 0x%llx\n", branch_type);
+ return false;
+ }
+
+ event->hw.branch_reg.config = branch_type_to_brbfcr(event->attr.branch_sample_type);
+ event->hw.extra_reg.config = branch_type_to_brbcr(event->attr.branch_sample_type);
+
+ return true;
+}
+
+unsigned int brbe_num_branch_records(const struct arm_pmu *armpmu)
+{
+ return FIELD_GET(BRBIDR0_EL1_NUMREC_MASK, armpmu->reg_brbidr);
+}
+
+void brbe_probe(struct arm_pmu *armpmu)
+{
+ u64 brbidr, aa64dfr0 = read_sysreg_s(SYS_ID_AA64DFR0_EL1);
+ u32 brbe;
+
+ brbe = cpuid_feature_extract_unsigned_field(aa64dfr0, ID_AA64DFR0_EL1_BRBE_SHIFT);
+ if (!valid_brbe_version(brbe))
+ return;
+
+ brbidr = read_sysreg_s(SYS_BRBIDR0_EL1);
+ if (!valid_brbidr(brbidr))
+ return;
+
+ armpmu->reg_brbidr = brbidr;
+}
+
+/*
+ * BRBE is assumed to be disabled/paused on entry
+ */
+void brbe_enable(const struct arm_pmu *arm_pmu)
+{
+ struct pmu_hw_events *cpuc = this_cpu_ptr(arm_pmu->hw_events);
+ u64 brbfcr = 0, brbcr = 0;
+
+ /*
+ * Discard existing records to avoid a discontinuity, e.g. records
+ * missed during handling an overflow.
+ */
+ brbe_invalidate();
+
+ /*
+ * Merge the permitted branch filters of all events.
+ */
+ for (int i = 0; i < ARMPMU_MAX_HWEVENTS; i++) {
+ struct perf_event *event = cpuc->events[i];
+
+ if (event && has_branch_stack(event)) {
+ brbfcr |= event->hw.branch_reg.config;
+ brbcr |= event->hw.extra_reg.config;
+ }
+ }
+
+ /*
+ * In VHE mode with MDCR_EL2.HPMN equal to PMCR_EL0.N, BRBCR_EL1.FZP
+ * controls freezing the branch records on counter overflow rather than
+ * BRBCR_EL2.FZP (which writes to BRBCR_EL1 are redirected to).
+ * The exception levels are enabled/disabled in BRBCR_EL2, so keep EL1
+ * and EL0 recording disabled for guests.
+ *
+ * As BRBCR_EL1 CC and MPRED bits also need to match, use the same
+ * value for both registers just masking the exception levels.
+ */
+ if (is_kernel_in_hyp_mode())
+ write_sysreg_s(brbcr & ~(BRBCR_ELx_ExBRE | BRBCR_ELx_E0BRE), SYS_BRBCR_EL12);
+ write_sysreg_s(brbcr, SYS_BRBCR_EL1);
+ /* Ensure BRBCR_ELx settings take effect before unpausing */
+ isb();
+
+ /* Finally write SYS_BRBFCR_EL to unpause BRBE */
+ write_sysreg_s(brbfcr, SYS_BRBFCR_EL1);
+ /* Synchronization in PMCR write ensures ordering WRT PMU enabling */
+}
+
+void brbe_disable(void)
+{
+ /*
+ * No need for synchronization here as synchronization in PMCR write
+ * ensures ordering and in the interrupt handler this is a NOP as
+ * we're already paused.
+ */
+ write_sysreg_s(BRBFCR_EL1_PAUSED, SYS_BRBFCR_EL1);
+ write_sysreg_s(0, SYS_BRBCR_EL1);
+}
+
+static const int brbe_type_to_perf_type_map[BRBINFx_EL1_TYPE_DEBUG_EXIT + 1][2] = {
+ [BRBINFx_EL1_TYPE_DIRECT_UNCOND] = { PERF_BR_UNCOND, 0 },
+ [BRBINFx_EL1_TYPE_INDIRECT] = { PERF_BR_IND, 0 },
+ [BRBINFx_EL1_TYPE_DIRECT_LINK] = { PERF_BR_CALL, 0 },
+ [BRBINFx_EL1_TYPE_INDIRECT_LINK] = { PERF_BR_IND_CALL, 0 },
+ [BRBINFx_EL1_TYPE_RET] = { PERF_BR_RET, 0 },
+ [BRBINFx_EL1_TYPE_DIRECT_COND] = { PERF_BR_COND, 0 },
+ [BRBINFx_EL1_TYPE_CALL] = { PERF_BR_SYSCALL, 0 },
+ [BRBINFx_EL1_TYPE_ERET] = { PERF_BR_ERET, 0 },
+ [BRBINFx_EL1_TYPE_IRQ] = { PERF_BR_IRQ, 0 },
+ [BRBINFx_EL1_TYPE_TRAP] = { PERF_BR_IRQ, 0 },
+ [BRBINFx_EL1_TYPE_SERROR] = { PERF_BR_SERROR, 0 },
+ [BRBINFx_EL1_TYPE_ALIGN_FAULT] = { PERF_BR_EXTEND_ABI, PERF_BR_NEW_FAULT_ALGN },
+ [BRBINFx_EL1_TYPE_INSN_FAULT] = { PERF_BR_EXTEND_ABI, PERF_BR_NEW_FAULT_INST },
+ [BRBINFx_EL1_TYPE_DATA_FAULT] = { PERF_BR_EXTEND_ABI, PERF_BR_NEW_FAULT_DATA },
+};
+
+static void brbe_set_perf_entry_type(struct perf_branch_entry *entry, u64 brbinf)
+{
+ int brbe_type = brbinf_get_type(brbinf);
+
+ if (brbe_type <= BRBINFx_EL1_TYPE_DEBUG_EXIT) {
+ const int *br_type = brbe_type_to_perf_type_map[brbe_type];
+
+ entry->type = br_type[0];
+ entry->new_type = br_type[1];
+ }
+}
+
+static int brbinf_get_perf_priv(u64 brbinf)
+{
+ int brbe_el = brbinf_get_el(brbinf);
+
+ switch (brbe_el) {
+ case BRBINFx_EL1_EL_EL0:
+ return PERF_BR_PRIV_USER;
+ case BRBINFx_EL1_EL_EL1:
+ return PERF_BR_PRIV_KERNEL;
+ case BRBINFx_EL1_EL_EL2:
+ if (is_kernel_in_hyp_mode())
+ return PERF_BR_PRIV_KERNEL;
+ return PERF_BR_PRIV_HV;
+ default:
+ pr_warn_once("%d - unknown branch privilege captured\n", brbe_el);
+ return PERF_BR_PRIV_UNKNOWN;
+ }
+}
+
+static bool perf_entry_from_brbe_regset(int index, struct perf_branch_entry *entry,
+ const struct perf_event *event)
+{
+ struct brbe_regset bregs;
+ u64 brbinf;
+
+ if (!__read_brbe_regset(&bregs, index))
+ return false;
+
+ brbinf = bregs.brbinf;
+ perf_clear_branch_entry_bitfields(entry);
+ if (brbe_record_is_complete(brbinf)) {
+ entry->from = bregs.brbsrc;
+ entry->to = bregs.brbtgt;
+ } else if (brbe_record_is_source_only(brbinf)) {
+ entry->from = bregs.brbsrc;
+ entry->to = 0;
+ } else if (brbe_record_is_target_only(brbinf)) {
+ entry->from = 0;
+ entry->to = bregs.brbtgt;
+ }
+
+ brbe_set_perf_entry_type(entry, brbinf);
+
+ if (!branch_sample_no_cycles(event))
+ entry->cycles = brbinf_get_cycles(brbinf);
+
+ if (!branch_sample_no_flags(event)) {
+ /* Mispredict info is available for source only and complete branch records. */
+ if (!brbe_record_is_target_only(brbinf)) {
+ entry->mispred = brbinf_get_mispredict(brbinf);
+ entry->predicted = !entry->mispred;
+ }
+
+ /*
+ * Currently TME feature is neither implemented in any hardware
+ * nor it is being supported in the kernel. Just warn here once
+ * if TME related information shows up rather unexpectedly.
+ */
+ if (brbinf_get_lastfailed(brbinf) || brbinf_get_in_tx(brbinf))
+ pr_warn_once("Unknown transaction states\n");
+ }
+
+ /*
+ * Branch privilege level is available for target only and complete
+ * branch records.
+ */
+ if (!brbe_record_is_source_only(brbinf))
+ entry->priv = brbinf_get_perf_priv(brbinf);
+
+ return true;
+}
+
+#define PERF_BR_ARM64_ALL ( \
+ BIT(PERF_BR_COND) | \
+ BIT(PERF_BR_UNCOND) | \
+ BIT(PERF_BR_IND) | \
+ BIT(PERF_BR_CALL) | \
+ BIT(PERF_BR_IND_CALL) | \
+ BIT(PERF_BR_RET))
+
+#define PERF_BR_ARM64_ALL_KERNEL ( \
+ BIT(PERF_BR_SYSCALL) | \
+ BIT(PERF_BR_IRQ) | \
+ BIT(PERF_BR_SERROR) | \
+ BIT(PERF_BR_MAX + PERF_BR_NEW_FAULT_ALGN) | \
+ BIT(PERF_BR_MAX + PERF_BR_NEW_FAULT_DATA) | \
+ BIT(PERF_BR_MAX + PERF_BR_NEW_FAULT_INST))
+
+static void prepare_event_branch_type_mask(u64 branch_sample,
+ unsigned long *event_type_mask)
+{
+ if (branch_sample & PERF_SAMPLE_BRANCH_ANY) {
+ if (branch_sample & PERF_SAMPLE_BRANCH_KERNEL)
+ bitmap_from_u64(event_type_mask,
+ BIT(PERF_BR_ERET) | PERF_BR_ARM64_ALL |
+ PERF_BR_ARM64_ALL_KERNEL);
+ else
+ bitmap_from_u64(event_type_mask, PERF_BR_ARM64_ALL);
+ return;
+ }
+
+ bitmap_zero(event_type_mask, PERF_BR_ARM64_MAX);
+
+ if (branch_sample & PERF_SAMPLE_BRANCH_ANY_CALL) {
+ if (branch_sample & PERF_SAMPLE_BRANCH_KERNEL)
+ bitmap_from_u64(event_type_mask, PERF_BR_ARM64_ALL_KERNEL);
+
+ set_bit(PERF_BR_CALL, event_type_mask);
+ set_bit(PERF_BR_IND_CALL, event_type_mask);
+ }
+
+ if (branch_sample & PERF_SAMPLE_BRANCH_IND_JUMP)
+ set_bit(PERF_BR_IND, event_type_mask);
+
+ if (branch_sample & PERF_SAMPLE_BRANCH_COND)
+ set_bit(PERF_BR_COND, event_type_mask);
+
+ if (branch_sample & PERF_SAMPLE_BRANCH_CALL)
+ set_bit(PERF_BR_CALL, event_type_mask);
+
+ if (branch_sample & PERF_SAMPLE_BRANCH_IND_CALL)
+ set_bit(PERF_BR_IND_CALL, event_type_mask);
+
+ if (branch_sample & PERF_SAMPLE_BRANCH_ANY_RETURN) {
+ set_bit(PERF_BR_RET, event_type_mask);
+
+ if (branch_sample & PERF_SAMPLE_BRANCH_KERNEL)
+ set_bit(PERF_BR_ERET, event_type_mask);
+ }
+}
+
+/*
+ * BRBE is configured with an OR of permissions from all events, so there may
+ * be events which have to be dropped or events where just the source or target
+ * address has to be zeroed.
+ */
+static bool filter_branch_privilege(struct perf_branch_entry *entry, u64 branch_sample_type)
+{
+ bool from_user = access_ok((void __user *)(unsigned long)entry->from, 4);
+ bool to_user = access_ok((void __user *)(unsigned long)entry->to, 4);
+ bool exclude_kernel = !((branch_sample_type & PERF_SAMPLE_BRANCH_KERNEL) ||
+ (is_kernel_in_hyp_mode() && (branch_sample_type & PERF_SAMPLE_BRANCH_HV)));
+
+ /* We can only have a half record if permissions have not been expanded */
+ if (!entry->from || !entry->to)
+ return true;
+
+ /*
+ * If record is within a single exception level, just need to either
+ * drop or keep the entire record.
+ */
+ if (from_user == to_user)
+ return ((entry->priv == PERF_BR_PRIV_KERNEL) && !exclude_kernel) ||
+ ((entry->priv == PERF_BR_PRIV_USER) &&
+ (branch_sample_type & PERF_SAMPLE_BRANCH_USER));
+
+ /*
+ * Record is across exception levels, mask addresses for the exception
+ * level we're not capturing.
+ */
+ if (!(branch_sample_type & PERF_SAMPLE_BRANCH_USER)) {
+ if (from_user)
+ entry->from = 0;
+ if (to_user)
+ entry->to = 0;
+ }
+
+ if (exclude_kernel) {
+ if (!from_user)
+ entry->from = 0;
+ if (!to_user)
+ entry->to = 0;
+ }
+
+ return true;
+}
+
+static bool filter_branch_type(struct perf_branch_entry *entry,
+ const unsigned long *event_type_mask)
+{
+ if (entry->type == PERF_BR_EXTEND_ABI)
+ return test_bit(PERF_BR_MAX + entry->new_type, event_type_mask);
+ else
+ return test_bit(entry->type, event_type_mask);
+}
+
+static bool filter_branch_record(struct perf_branch_entry *entry,
+ u64 branch_sample,
+ const unsigned long *event_type_mask)
+{
+ return filter_branch_type(entry, event_type_mask) &&
+ filter_branch_privilege(entry, branch_sample);
+}
+
+void brbe_read_filtered_entries(struct perf_branch_stack *branch_stack,
+ const struct perf_event *event)
+{
+ struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+ int nr_hw = brbe_num_branch_records(cpu_pmu);
+ int nr_banks = DIV_ROUND_UP(nr_hw, BRBE_BANK_MAX_ENTRIES);
+ int nr_filtered = 0;
+ u64 branch_sample_type = event->attr.branch_sample_type;
+ DECLARE_BITMAP(event_type_mask, PERF_BR_ARM64_MAX);
+
+ prepare_event_branch_type_mask(branch_sample_type, event_type_mask);
+
+ for (int bank = 0; bank < nr_banks; bank++) {
+ int nr_remaining = nr_hw - (bank * BRBE_BANK_MAX_ENTRIES);
+ int nr_this_bank = min(nr_remaining, BRBE_BANK_MAX_ENTRIES);
+
+ select_brbe_bank(bank);
+
+ for (int i = 0; i < nr_this_bank; i++) {
+ struct perf_branch_entry *pbe = &branch_stack->entries[nr_filtered];
+
+ if (!perf_entry_from_brbe_regset(i, pbe, event))
+ goto done;
+
+ if (!filter_branch_record(pbe, branch_sample_type, event_type_mask))
+ continue;
+
+ nr_filtered++;
+ }
+ }
+
+done:
+ branch_stack->nr = nr_filtered;
+}
diff --git a/drivers/perf/arm_brbe.h b/drivers/perf/arm_brbe.h
new file mode 100644
index 000000000000..b7c7d8796c86
--- /dev/null
+++ b/drivers/perf/arm_brbe.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Branch Record Buffer Extension Helpers.
+ *
+ * Copyright (C) 2022-2025 ARM Limited
+ *
+ * Author: Anshuman Khandual <anshuman.khandual@arm.com>
+ */
+
+struct arm_pmu;
+struct perf_branch_stack;
+struct perf_event;
+
+#ifdef CONFIG_ARM64_BRBE
+void brbe_probe(struct arm_pmu *arm_pmu);
+unsigned int brbe_num_branch_records(const struct arm_pmu *armpmu);
+void brbe_invalidate(void);
+
+void brbe_enable(const struct arm_pmu *arm_pmu);
+void brbe_disable(void);
+
+bool brbe_branch_attr_valid(struct perf_event *event);
+void brbe_read_filtered_entries(struct perf_branch_stack *branch_stack,
+ const struct perf_event *event);
+#else
+static inline void brbe_probe(struct arm_pmu *arm_pmu) { }
+static inline unsigned int brbe_num_branch_records(const struct arm_pmu *armpmu)
+{
+ return 0;
+}
+
+static inline void brbe_invalidate(void) { }
+
+static inline void brbe_enable(const struct arm_pmu *arm_pmu) { };
+static inline void brbe_disable(void) { };
+
+static inline bool brbe_branch_attr_valid(struct perf_event *event)
+{
+ WARN_ON_ONCE(!has_branch_stack(event));
+ return false;
+}
+
+static void brbe_read_filtered_entries(struct perf_branch_stack *branch_stack,
+ const struct perf_event *event)
+{
+}
+#endif
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 2f33e69a8caf..5c310e803dd7 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -99,7 +99,7 @@ static const struct pmu_irq_ops percpu_pmunmi_ops = {
.free_pmuirq = armpmu_free_percpu_pmunmi
};
-static DEFINE_PER_CPU(struct arm_pmu *, cpu_armpmu);
+DEFINE_PER_CPU(struct arm_pmu *, cpu_armpmu);
static DEFINE_PER_CPU(int, cpu_irq);
static DEFINE_PER_CPU(const struct pmu_irq_ops *, cpu_irq_ops);
@@ -318,6 +318,12 @@ armpmu_del(struct perf_event *event, int flags)
int idx = hwc->idx;
armpmu_stop(event, PERF_EF_UPDATE);
+
+ if (has_branch_stack(event)) {
+ hw_events->branch_users--;
+ perf_sched_cb_dec(event->pmu);
+ }
+
hw_events->events[idx] = NULL;
armpmu->clear_event_idx(hw_events, event);
perf_event_update_userpage(event);
@@ -345,6 +351,11 @@ armpmu_add(struct perf_event *event, int flags)
/* The newly-allocated counter should be empty */
WARN_ON_ONCE(hw_events->events[idx]);
+ if (has_branch_stack(event)) {
+ hw_events->branch_users++;
+ perf_sched_cb_inc(event->pmu);
+ }
+
event->hw.idx = idx;
hw_events->events[idx] = event;
@@ -509,8 +520,7 @@ static int armpmu_event_init(struct perf_event *event)
!cpumask_test_cpu(event->cpu, &armpmu->supported_cpus))
return -ENOENT;
- /* does not support taken branch sampling */
- if (has_branch_stack(event))
+ if (has_branch_stack(event) && !armpmu->reg_brbidr)
return -EOPNOTSUPP;
return __hw_perf_event_init(event);
diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
index 3db9f4ed17e8..69c5cc8f5606 100644
--- a/drivers/perf/arm_pmuv3.c
+++ b/drivers/perf/arm_pmuv3.c
@@ -25,6 +25,8 @@
#include <linux/smp.h>
#include <linux/nmi.h>
+#include "arm_brbe.h"
+
/* ARMv8 Cortex-A53 specific event types. */
#define ARMV8_A53_PERFCTR_PREF_LINEFILL 0xC2
@@ -438,7 +440,19 @@ static ssize_t threshold_max_show(struct device *dev,
static DEVICE_ATTR_RO(threshold_max);
+static ssize_t branches_show(struct device *dev,
+ struct device_attribute *attr, char *page)
+{
+ struct pmu *pmu = dev_get_drvdata(dev);
+ struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
+
+ return sysfs_emit(page, "%d\n", brbe_num_branch_records(cpu_pmu));
+}
+
+static DEVICE_ATTR_RO(branches);
+
static struct attribute *armv8_pmuv3_caps_attrs[] = {
+ &dev_attr_branches.attr,
&dev_attr_slots.attr,
&dev_attr_bus_slots.attr,
&dev_attr_bus_width.attr,
@@ -446,9 +460,22 @@ static struct attribute *armv8_pmuv3_caps_attrs[] = {
NULL,
};
+static umode_t caps_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct pmu *pmu = dev_get_drvdata(dev);
+ struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
+
+ if (i == 0)
+ return brbe_num_branch_records(cpu_pmu) ? attr->mode : 0;
+
+ return attr->mode;
+}
+
static const struct attribute_group armv8_pmuv3_caps_attr_group = {
.name = "caps",
.attrs = armv8_pmuv3_caps_attrs,
+ .is_visible = caps_is_visible,
};
/*
@@ -809,6 +836,7 @@ static void armv8pmu_disable_event(struct perf_event *event)
static void armv8pmu_start(struct arm_pmu *cpu_pmu)
{
struct perf_event_context *ctx;
+ struct pmu_hw_events *hw_events = this_cpu_ptr(cpu_pmu->hw_events);
int nr_user = 0;
ctx = perf_cpu_task_ctx();
@@ -822,16 +850,34 @@ static void armv8pmu_start(struct arm_pmu *cpu_pmu)
kvm_vcpu_pmu_resync_el0();
+ if (hw_events->branch_users)
+ brbe_enable(cpu_pmu);
+
/* Enable all counters */
armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E);
}
static void armv8pmu_stop(struct arm_pmu *cpu_pmu)
{
+ struct pmu_hw_events *hw_events = this_cpu_ptr(cpu_pmu->hw_events);
+
+ if (hw_events->branch_users)
+ brbe_disable();
+
/* Disable all counters */
armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E);
}
+static void read_branch_records(struct pmu_hw_events *cpuc,
+ struct perf_event *event,
+ struct perf_sample_data *data)
+{
+ struct perf_branch_stack *branch_stack = cpuc->branch_stack;
+
+ brbe_read_filtered_entries(branch_stack, event);
+ perf_sample_save_brstack(data, event, branch_stack, NULL);
+}
+
static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu)
{
u64 pmovsr;
@@ -882,6 +928,9 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu)
if (!armpmu_event_set_period(event))
continue;
+ if (has_branch_stack(event))
+ read_branch_records(cpuc, event, &data);
+
/*
* Perf event overflow will queue the processing of the event as
* an irq_work which will be taken care of in the handling of
@@ -929,6 +978,32 @@ static int armv8pmu_get_chain_idx(struct pmu_hw_events *cpuc,
return -EAGAIN;
}
+static bool armv8pmu_can_use_pmccntr(struct pmu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT;
+
+ if (evtype != ARMV8_PMUV3_PERFCTR_CPU_CYCLES)
+ return false;
+
+ /*
+ * A CPU_CYCLES event with threshold counting cannot use PMCCNTR_EL0
+ * since it lacks threshold support.
+ */
+ if (armv8pmu_event_get_threshold(&event->attr))
+ return false;
+
+ /*
+ * PMCCNTR_EL0 is not affected by BRBE controls like BRBCR_ELx.FZP.
+ * So don't use it for branch events.
+ */
+ if (has_branch_stack(event))
+ return false;
+
+ return true;
+}
+
static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc,
struct perf_event *event)
{
@@ -937,8 +1012,7 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc,
unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT;
/* Always prefer to place a cycle counter into the cycle counter. */
- if ((evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) &&
- !armv8pmu_event_get_threshold(&event->attr)) {
+ if (armv8pmu_can_use_pmccntr(cpuc, event)) {
if (!test_and_set_bit(ARMV8_PMU_CYCLE_IDX, cpuc->used_mask))
return ARMV8_PMU_CYCLE_IDX;
else if (armv8pmu_event_is_64bit(event) &&
@@ -987,6 +1061,19 @@ static int armv8pmu_user_event_idx(struct perf_event *event)
return event->hw.idx + 1;
}
+static void armv8pmu_sched_task(struct perf_event_pmu_context *pmu_ctx,
+ struct task_struct *task, bool sched_in)
+{
+ struct arm_pmu *armpmu = *this_cpu_ptr(&cpu_armpmu);
+ struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
+
+ if (!hw_events->branch_users)
+ return;
+
+ if (sched_in)
+ brbe_invalidate();
+}
+
/*
* Add an event filter to a given event.
*/
@@ -1004,6 +1091,13 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
return -EOPNOTSUPP;
}
+ if (has_branch_stack(perf_event)) {
+ if (!brbe_num_branch_records(cpu_pmu) || !brbe_branch_attr_valid(perf_event))
+ return -EOPNOTSUPP;
+
+ perf_event->attach_state |= PERF_ATTACH_SCHED_CB;
+ }
+
/*
* If we're running in hyp mode, then we *are* the hypervisor.
* Therefore we ignore exclude_hv in this configuration, since
@@ -1070,6 +1164,11 @@ static void armv8pmu_reset(void *info)
/* Clear the counters we flip at guest entry/exit */
kvm_clr_pmu_events(mask);
+ if (brbe_num_branch_records(cpu_pmu)) {
+ brbe_disable();
+ brbe_invalidate();
+ }
+
/*
* Initialize & Reset PMNC. Request overflow interrupt for
* 64 bit cycle counter but cheat in armv8pmu_write_counter().
@@ -1238,6 +1337,25 @@ static void __armv8pmu_probe_pmu(void *info)
cpu_pmu->reg_pmmir = read_pmmir();
else
cpu_pmu->reg_pmmir = 0;
+
+ brbe_probe(cpu_pmu);
+}
+
+static int branch_records_alloc(struct arm_pmu *armpmu)
+{
+ size_t size = struct_size_t(struct perf_branch_stack, entries,
+ brbe_num_branch_records(armpmu));
+ int cpu;
+
+ for_each_cpu(cpu, &armpmu->supported_cpus) {
+ struct pmu_hw_events *events_cpu;
+
+ events_cpu = per_cpu_ptr(armpmu->hw_events, cpu);
+ events_cpu->branch_stack = kmalloc(size, GFP_KERNEL);
+ if (!events_cpu->branch_stack)
+ return -ENOMEM;
+ }
+ return 0;
}
static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu)
@@ -1254,7 +1372,15 @@ static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu)
if (ret)
return ret;
- return probe.present ? 0 : -ENODEV;
+ if (!probe.present)
+ return -ENODEV;
+
+ if (brbe_num_branch_records(cpu_pmu)) {
+ ret = branch_records_alloc(cpu_pmu);
+ if (ret)
+ return ret;
+ }
+ return 0;
}
static void armv8pmu_disable_user_access_ipi(void *unused)
@@ -1313,6 +1439,8 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name,
cpu_pmu->set_event_filter = armv8pmu_set_event_filter;
cpu_pmu->pmu.event_idx = armv8pmu_user_event_idx;
+ if (brbe_num_branch_records(cpu_pmu))
+ cpu_pmu->pmu.sched_task = armv8pmu_sched_task;
cpu_pmu->name = name;
cpu_pmu->map_event = map_event;
diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
index 3efed8839a4e..fa50645fedda 100644
--- a/drivers/perf/arm_spe_pmu.c
+++ b/drivers/perf/arm_spe_pmu.c
@@ -86,9 +86,11 @@ struct arm_spe_pmu {
#define SPE_PMU_FEAT_ERND (1UL << 5)
#define SPE_PMU_FEAT_INV_FILT_EVT (1UL << 6)
#define SPE_PMU_FEAT_DISCARD (1UL << 7)
+#define SPE_PMU_FEAT_EFT (1UL << 8)
#define SPE_PMU_FEAT_DEV_PROBED (1UL << 63)
u64 features;
+ u64 pmsevfr_res0;
u16 max_record_sz;
u16 align;
struct perf_output_handle __percpu *handle;
@@ -97,7 +99,8 @@ struct arm_spe_pmu {
#define to_spe_pmu(p) (container_of(p, struct arm_spe_pmu, pmu))
/* Convert a free-running index from perf into an SPE buffer offset */
-#define PERF_IDX2OFF(idx, buf) ((idx) % ((buf)->nr_pages << PAGE_SHIFT))
+#define PERF_IDX2OFF(idx, buf) \
+ ((idx) % ((unsigned long)(buf)->nr_pages << PAGE_SHIFT))
/* Keep track of our dynamic hotplug state */
static enum cpuhp_state arm_spe_pmu_online;
@@ -115,6 +118,7 @@ enum arm_spe_pmu_capabilities {
SPE_PMU_CAP_FEAT_MAX,
SPE_PMU_CAP_CNT_SZ = SPE_PMU_CAP_FEAT_MAX,
SPE_PMU_CAP_MIN_IVAL,
+ SPE_PMU_CAP_EVENT_FILTER,
};
static int arm_spe_pmu_feat_caps[SPE_PMU_CAP_FEAT_MAX] = {
@@ -122,7 +126,7 @@ static int arm_spe_pmu_feat_caps[SPE_PMU_CAP_FEAT_MAX] = {
[SPE_PMU_CAP_ERND] = SPE_PMU_FEAT_ERND,
};
-static u32 arm_spe_pmu_cap_get(struct arm_spe_pmu *spe_pmu, int cap)
+static u64 arm_spe_pmu_cap_get(struct arm_spe_pmu *spe_pmu, int cap)
{
if (cap < SPE_PMU_CAP_FEAT_MAX)
return !!(spe_pmu->features & arm_spe_pmu_feat_caps[cap]);
@@ -132,6 +136,8 @@ static u32 arm_spe_pmu_cap_get(struct arm_spe_pmu *spe_pmu, int cap)
return spe_pmu->counter_sz;
case SPE_PMU_CAP_MIN_IVAL:
return spe_pmu->min_period;
+ case SPE_PMU_CAP_EVENT_FILTER:
+ return ~spe_pmu->pmsevfr_res0;
default:
WARN(1, "unknown cap %d\n", cap);
}
@@ -148,7 +154,19 @@ static ssize_t arm_spe_pmu_cap_show(struct device *dev,
container_of(attr, struct dev_ext_attribute, attr);
int cap = (long)ea->var;
- return sysfs_emit(buf, "%u\n", arm_spe_pmu_cap_get(spe_pmu, cap));
+ return sysfs_emit(buf, "%llu\n", arm_spe_pmu_cap_get(spe_pmu, cap));
+}
+
+static ssize_t arm_spe_pmu_cap_show_hex(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct arm_spe_pmu *spe_pmu = dev_get_drvdata(dev);
+ struct dev_ext_attribute *ea =
+ container_of(attr, struct dev_ext_attribute, attr);
+ int cap = (long)ea->var;
+
+ return sysfs_emit(buf, "0x%llx\n", arm_spe_pmu_cap_get(spe_pmu, cap));
}
#define SPE_EXT_ATTR_ENTRY(_name, _func, _var) \
@@ -158,12 +176,15 @@ static ssize_t arm_spe_pmu_cap_show(struct device *dev,
#define SPE_CAP_EXT_ATTR_ENTRY(_name, _var) \
SPE_EXT_ATTR_ENTRY(_name, arm_spe_pmu_cap_show, _var)
+#define SPE_CAP_EXT_ATTR_ENTRY_HEX(_name, _var) \
+ SPE_EXT_ATTR_ENTRY(_name, arm_spe_pmu_cap_show_hex, _var)
static struct attribute *arm_spe_pmu_cap_attr[] = {
SPE_CAP_EXT_ATTR_ENTRY(arch_inst, SPE_PMU_CAP_ARCH_INST),
SPE_CAP_EXT_ATTR_ENTRY(ernd, SPE_PMU_CAP_ERND),
SPE_CAP_EXT_ATTR_ENTRY(count_size, SPE_PMU_CAP_CNT_SZ),
SPE_CAP_EXT_ATTR_ENTRY(min_interval, SPE_PMU_CAP_MIN_IVAL),
+ SPE_CAP_EXT_ATTR_ENTRY_HEX(event_filter, SPE_PMU_CAP_EVENT_FILTER),
NULL,
};
@@ -197,6 +218,27 @@ static const struct attribute_group arm_spe_pmu_cap_group = {
#define ATTR_CFG_FLD_discard_CFG config /* PMBLIMITR_EL1.FM = DISCARD */
#define ATTR_CFG_FLD_discard_LO 35
#define ATTR_CFG_FLD_discard_HI 35
+#define ATTR_CFG_FLD_branch_filter_mask_CFG config /* PMSFCR_EL1.Bm */
+#define ATTR_CFG_FLD_branch_filter_mask_LO 36
+#define ATTR_CFG_FLD_branch_filter_mask_HI 36
+#define ATTR_CFG_FLD_load_filter_mask_CFG config /* PMSFCR_EL1.LDm */
+#define ATTR_CFG_FLD_load_filter_mask_LO 37
+#define ATTR_CFG_FLD_load_filter_mask_HI 37
+#define ATTR_CFG_FLD_store_filter_mask_CFG config /* PMSFCR_EL1.STm */
+#define ATTR_CFG_FLD_store_filter_mask_LO 38
+#define ATTR_CFG_FLD_store_filter_mask_HI 38
+#define ATTR_CFG_FLD_simd_filter_CFG config /* PMSFCR_EL1.SIMD */
+#define ATTR_CFG_FLD_simd_filter_LO 39
+#define ATTR_CFG_FLD_simd_filter_HI 39
+#define ATTR_CFG_FLD_simd_filter_mask_CFG config /* PMSFCR_EL1.SIMDm */
+#define ATTR_CFG_FLD_simd_filter_mask_LO 40
+#define ATTR_CFG_FLD_simd_filter_mask_HI 40
+#define ATTR_CFG_FLD_float_filter_CFG config /* PMSFCR_EL1.FP */
+#define ATTR_CFG_FLD_float_filter_LO 41
+#define ATTR_CFG_FLD_float_filter_HI 41
+#define ATTR_CFG_FLD_float_filter_mask_CFG config /* PMSFCR_EL1.FPm */
+#define ATTR_CFG_FLD_float_filter_mask_LO 42
+#define ATTR_CFG_FLD_float_filter_mask_HI 42
#define ATTR_CFG_FLD_event_filter_CFG config1 /* PMSEVFR_EL1 */
#define ATTR_CFG_FLD_event_filter_LO 0
@@ -215,8 +257,15 @@ GEN_PMU_FORMAT_ATTR(pa_enable);
GEN_PMU_FORMAT_ATTR(pct_enable);
GEN_PMU_FORMAT_ATTR(jitter);
GEN_PMU_FORMAT_ATTR(branch_filter);
+GEN_PMU_FORMAT_ATTR(branch_filter_mask);
GEN_PMU_FORMAT_ATTR(load_filter);
+GEN_PMU_FORMAT_ATTR(load_filter_mask);
GEN_PMU_FORMAT_ATTR(store_filter);
+GEN_PMU_FORMAT_ATTR(store_filter_mask);
+GEN_PMU_FORMAT_ATTR(simd_filter);
+GEN_PMU_FORMAT_ATTR(simd_filter_mask);
+GEN_PMU_FORMAT_ATTR(float_filter);
+GEN_PMU_FORMAT_ATTR(float_filter_mask);
GEN_PMU_FORMAT_ATTR(event_filter);
GEN_PMU_FORMAT_ATTR(inv_event_filter);
GEN_PMU_FORMAT_ATTR(min_latency);
@@ -228,8 +277,15 @@ static struct attribute *arm_spe_pmu_formats_attr[] = {
&format_attr_pct_enable.attr,
&format_attr_jitter.attr,
&format_attr_branch_filter.attr,
+ &format_attr_branch_filter_mask.attr,
&format_attr_load_filter.attr,
+ &format_attr_load_filter_mask.attr,
&format_attr_store_filter.attr,
+ &format_attr_store_filter_mask.attr,
+ &format_attr_simd_filter.attr,
+ &format_attr_simd_filter_mask.attr,
+ &format_attr_float_filter.attr,
+ &format_attr_float_filter_mask.attr,
&format_attr_event_filter.attr,
&format_attr_inv_event_filter.attr,
&format_attr_min_latency.attr,
@@ -250,6 +306,16 @@ static umode_t arm_spe_pmu_format_attr_is_visible(struct kobject *kobj,
if (attr == &format_attr_inv_event_filter.attr && !(spe_pmu->features & SPE_PMU_FEAT_INV_FILT_EVT))
return 0;
+ if ((attr == &format_attr_branch_filter_mask.attr ||
+ attr == &format_attr_load_filter_mask.attr ||
+ attr == &format_attr_store_filter_mask.attr ||
+ attr == &format_attr_simd_filter.attr ||
+ attr == &format_attr_simd_filter_mask.attr ||
+ attr == &format_attr_float_filter.attr ||
+ attr == &format_attr_float_filter_mask.attr) &&
+ !(spe_pmu->features & SPE_PMU_FEAT_EFT))
+ return 0;
+
return attr->mode;
}
@@ -308,17 +374,21 @@ static u64 arm_spe_event_to_pmscr(struct perf_event *event)
static void arm_spe_event_sanitise_period(struct perf_event *event)
{
- struct arm_spe_pmu *spe_pmu = to_spe_pmu(event->pmu);
u64 period = event->hw.sample_period;
u64 max_period = PMSIRR_EL1_INTERVAL_MASK;
- if (period < spe_pmu->min_period)
- period = spe_pmu->min_period;
- else if (period > max_period)
- period = max_period;
- else
- period &= max_period;
+ /*
+ * The PMSIDR_EL1.Interval field (stored in spe_pmu->min_period) is a
+ * recommendation for the minimum interval, not a hardware limitation.
+ *
+ * According to the Arm ARM (DDI 0487 L.a), section D24.7.12 PMSIRR_EL1,
+ * Sampling Interval Reload Register, the INTERVAL field (bits [31:8])
+ * states: "Software must set this to a nonzero value". Use 1 as the
+ * minimum value.
+ */
+ u64 min_period = FIELD_PREP(PMSIRR_EL1_INTERVAL_MASK, 1);
+ period = clamp_t(u64, period, min_period, max_period) & max_period;
event->hw.sample_period = period;
}
@@ -341,8 +411,15 @@ static u64 arm_spe_event_to_pmsfcr(struct perf_event *event)
u64 reg = 0;
reg |= FIELD_PREP(PMSFCR_EL1_LD, ATTR_CFG_GET_FLD(attr, load_filter));
+ reg |= FIELD_PREP(PMSFCR_EL1_LDm, ATTR_CFG_GET_FLD(attr, load_filter_mask));
reg |= FIELD_PREP(PMSFCR_EL1_ST, ATTR_CFG_GET_FLD(attr, store_filter));
+ reg |= FIELD_PREP(PMSFCR_EL1_STm, ATTR_CFG_GET_FLD(attr, store_filter_mask));
reg |= FIELD_PREP(PMSFCR_EL1_B, ATTR_CFG_GET_FLD(attr, branch_filter));
+ reg |= FIELD_PREP(PMSFCR_EL1_Bm, ATTR_CFG_GET_FLD(attr, branch_filter_mask));
+ reg |= FIELD_PREP(PMSFCR_EL1_SIMD, ATTR_CFG_GET_FLD(attr, simd_filter));
+ reg |= FIELD_PREP(PMSFCR_EL1_SIMDm, ATTR_CFG_GET_FLD(attr, simd_filter_mask));
+ reg |= FIELD_PREP(PMSFCR_EL1_FP, ATTR_CFG_GET_FLD(attr, float_filter));
+ reg |= FIELD_PREP(PMSFCR_EL1_FPm, ATTR_CFG_GET_FLD(attr, float_filter_mask));
if (reg)
reg |= PMSFCR_EL1_FT;
@@ -693,20 +770,6 @@ static irqreturn_t arm_spe_pmu_irq_handler(int irq, void *dev)
return IRQ_HANDLED;
}
-static u64 arm_spe_pmsevfr_res0(u16 pmsver)
-{
- switch (pmsver) {
- case ID_AA64DFR0_EL1_PMSVer_IMP:
- return PMSEVFR_EL1_RES0_IMP;
- case ID_AA64DFR0_EL1_PMSVer_V1P1:
- return PMSEVFR_EL1_RES0_V1P1;
- case ID_AA64DFR0_EL1_PMSVer_V1P2:
- /* Return the highest version we support in default */
- default:
- return PMSEVFR_EL1_RES0_V1P2;
- }
-}
-
/* Perf callbacks */
static int arm_spe_pmu_event_init(struct perf_event *event)
{
@@ -722,10 +785,10 @@ static int arm_spe_pmu_event_init(struct perf_event *event)
!cpumask_test_cpu(event->cpu, &spe_pmu->supported_cpus))
return -ENOENT;
- if (arm_spe_event_to_pmsevfr(event) & arm_spe_pmsevfr_res0(spe_pmu->pmsver))
+ if (arm_spe_event_to_pmsevfr(event) & spe_pmu->pmsevfr_res0)
return -EOPNOTSUPP;
- if (arm_spe_event_to_pmsnevfr(event) & arm_spe_pmsevfr_res0(spe_pmu->pmsver))
+ if (arm_spe_event_to_pmsnevfr(event) & spe_pmu->pmsevfr_res0)
return -EOPNOTSUPP;
if (attr->exclude_idle)
@@ -758,6 +821,16 @@ static int arm_spe_pmu_event_init(struct perf_event *event)
!(spe_pmu->features & SPE_PMU_FEAT_FILT_LAT))
return -EOPNOTSUPP;
+ if ((FIELD_GET(PMSFCR_EL1_LDm, reg) ||
+ FIELD_GET(PMSFCR_EL1_STm, reg) ||
+ FIELD_GET(PMSFCR_EL1_Bm, reg) ||
+ FIELD_GET(PMSFCR_EL1_SIMD, reg) ||
+ FIELD_GET(PMSFCR_EL1_SIMDm, reg) ||
+ FIELD_GET(PMSFCR_EL1_FP, reg) ||
+ FIELD_GET(PMSFCR_EL1_FPm, reg)) &&
+ !(spe_pmu->features & SPE_PMU_FEAT_EFT))
+ return -EOPNOTSUPP;
+
if (ATTR_CFG_GET_FLD(&event->attr, discard) &&
!(spe_pmu->features & SPE_PMU_FEAT_DISCARD))
return -EOPNOTSUPP;
@@ -1049,6 +1122,9 @@ static void __arm_spe_pmu_dev_probe(void *info)
if (spe_pmu->pmsver >= ID_AA64DFR0_EL1_PMSVer_V1P2)
spe_pmu->features |= SPE_PMU_FEAT_DISCARD;
+ if (FIELD_GET(PMSIDR_EL1_EFT, reg))
+ spe_pmu->features |= SPE_PMU_FEAT_EFT;
+
/* This field has a spaced out encoding, so just use a look-up */
fld = FIELD_GET(PMSIDR_EL1_INTERVAL, reg);
switch (fld) {
@@ -1103,6 +1179,10 @@ static void __arm_spe_pmu_dev_probe(void *info)
spe_pmu->counter_sz = 16;
}
+ /* Write all 1s and then read back. Unsupported filter bits are RAZ/WI. */
+ write_sysreg_s(U64_MAX, SYS_PMSEVFR_EL1);
+ spe_pmu->pmsevfr_res0 = ~read_sysreg_s(SYS_PMSEVFR_EL1);
+
dev_info(dev,
"probed SPEv1.%d for CPUs %*pbl [max_record_sz %u, align %u, features 0x%llx]\n",
spe_pmu->pmsver - 1, cpumask_pr_args(&spe_pmu->supported_cpus),
diff --git a/drivers/perf/cxl_pmu.c b/drivers/perf/cxl_pmu.c
index d6693519eaee..d094030220bf 100644
--- a/drivers/perf/cxl_pmu.c
+++ b/drivers/perf/cxl_pmu.c
@@ -113,7 +113,7 @@ struct cxl_pmu_info {
/*
* All CPMU counters are discoverable via the Event Capabilities Registers.
- * Each Event Capability register contains a a VID / GroupID.
+ * Each Event Capability register contains a VID / GroupID.
* A counter may then count any combination (by summing) of events in
* that group which are in the Supported Events Bitmask.
* However, there are some complexities to the scheme.
@@ -406,7 +406,7 @@ static struct attribute *cxl_pmu_event_attrs[] = {
CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_curblk, CXL_PMU_GID_S2M_BISNP, BIT(4)),
CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_datblk, CXL_PMU_GID_S2M_BISNP, BIT(5)),
CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_invblk, CXL_PMU_GID_S2M_BISNP, BIT(6)),
- /* CXL rev 3.1 Table 3-50 S2M NDR Opcopdes */
+ /* CXL rev 3.1 Table 3-50 S2M NDR Opcodes */
CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmp, CXL_PMU_GID_S2M_NDR, BIT(0)),
CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmps, CXL_PMU_GID_S2M_NDR, BIT(1)),
CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmpe, CXL_PMU_GID_S2M_NDR, BIT(2)),
@@ -627,7 +627,7 @@ static void cxl_pmu_event_start(struct perf_event *event, int flags)
hwc->state = 0;
/*
- * Currently only hdm filter control is implemnted, this code will
+ * Currently only hdm filter control is implemented, this code will
* want generalizing when more filters are added.
*/
if (info->filter_hdm) {
@@ -834,8 +834,8 @@ static int cxl_pmu_probe(struct device *dev)
if (rc)
return rc;
- info->hw_events = devm_kcalloc(dev, sizeof(*info->hw_events),
- info->num_counters, GFP_KERNEL);
+ info->hw_events = devm_kcalloc(dev, info->num_counters,
+ sizeof(*info->hw_events), GFP_KERNEL);
if (!info->hw_events)
return -ENOMEM;
@@ -873,7 +873,7 @@ static int cxl_pmu_probe(struct device *dev)
return rc;
irq = rc;
- irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_overflow\n", dev_name);
+ irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_overflow", dev_name);
if (!irq_name)
return -ENOMEM;
diff --git a/drivers/perf/dwc_pcie_pmu.c b/drivers/perf/dwc_pcie_pmu.c
index 146ff57813fb..22f73ac894e9 100644
--- a/drivers/perf/dwc_pcie_pmu.c
+++ b/drivers/perf/dwc_pcie_pmu.c
@@ -39,6 +39,10 @@
#define DWC_PCIE_EVENT_CLEAR GENMASK(1, 0)
#define DWC_PCIE_EVENT_PER_CLEAR 0x1
+/* Event Selection Field has two subfields */
+#define DWC_PCIE_CNT_EVENT_SEL_GROUP GENMASK(11, 8)
+#define DWC_PCIE_CNT_EVENT_SEL_EVID GENMASK(7, 0)
+
#define DWC_PCIE_EVENT_CNT_DATA 0xC
#define DWC_PCIE_TIME_BASED_ANAL_CTL 0x10
@@ -73,6 +77,10 @@ enum dwc_pcie_event_type {
DWC_PCIE_EVENT_TYPE_MAX,
};
+#define DWC_PCIE_LANE_GROUP_6 6
+#define DWC_PCIE_LANE_GROUP_7 7
+#define DWC_PCIE_LANE_MAX_EVENTS_PER_GROUP 256
+
#define DWC_PCIE_LANE_EVENT_MAX_PERIOD GENMASK_ULL(31, 0)
#define DWC_PCIE_MAX_PERIOD GENMASK_ULL(63, 0)
@@ -82,8 +90,11 @@ struct dwc_pcie_pmu {
u16 ras_des_offset;
u32 nr_lanes;
+ /* Groups #6 and #7 */
+ DECLARE_BITMAP(lane_events, 2 * DWC_PCIE_LANE_MAX_EVENTS_PER_GROUP);
+ struct perf_event *time_based_event;
+
struct hlist_node cpuhp_node;
- struct perf_event *event[DWC_PCIE_EVENT_TYPE_MAX];
int on_cpu;
};
@@ -246,19 +257,26 @@ static const struct attribute_group *dwc_pcie_attr_groups[] = {
};
static void dwc_pcie_pmu_lane_event_enable(struct dwc_pcie_pmu *pcie_pmu,
+ struct perf_event *event,
bool enable)
{
struct pci_dev *pdev = pcie_pmu->pdev;
u16 ras_des_offset = pcie_pmu->ras_des_offset;
+ int event_id = DWC_PCIE_EVENT_ID(event);
+ int lane = DWC_PCIE_EVENT_LANE(event);
+ u32 ctrl;
+
+ ctrl = FIELD_PREP(DWC_PCIE_CNT_EVENT_SEL, event_id) |
+ FIELD_PREP(DWC_PCIE_CNT_LANE_SEL, lane) |
+ FIELD_PREP(DWC_PCIE_EVENT_CLEAR, DWC_PCIE_EVENT_PER_CLEAR);
if (enable)
- pci_clear_and_set_config_dword(pdev,
- ras_des_offset + DWC_PCIE_EVENT_CNT_CTL,
- DWC_PCIE_CNT_ENABLE, DWC_PCIE_PER_EVENT_ON);
+ ctrl |= FIELD_PREP(DWC_PCIE_CNT_ENABLE, DWC_PCIE_PER_EVENT_ON);
else
- pci_clear_and_set_config_dword(pdev,
- ras_des_offset + DWC_PCIE_EVENT_CNT_CTL,
- DWC_PCIE_CNT_ENABLE, DWC_PCIE_PER_EVENT_OFF);
+ ctrl |= FIELD_PREP(DWC_PCIE_CNT_ENABLE, DWC_PCIE_PER_EVENT_OFF);
+
+ pci_write_config_dword(pdev, ras_des_offset + DWC_PCIE_EVENT_CNT_CTL,
+ ctrl);
}
static void dwc_pcie_pmu_time_based_event_enable(struct dwc_pcie_pmu *pcie_pmu,
@@ -276,11 +294,22 @@ static u64 dwc_pcie_pmu_read_lane_event_counter(struct perf_event *event)
{
struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu);
struct pci_dev *pdev = pcie_pmu->pdev;
+ int event_id = DWC_PCIE_EVENT_ID(event);
+ int lane = DWC_PCIE_EVENT_LANE(event);
u16 ras_des_offset = pcie_pmu->ras_des_offset;
- u32 val;
+ u32 val, ctrl;
+ ctrl = FIELD_PREP(DWC_PCIE_CNT_EVENT_SEL, event_id) |
+ FIELD_PREP(DWC_PCIE_CNT_LANE_SEL, lane) |
+ FIELD_PREP(DWC_PCIE_CNT_ENABLE, DWC_PCIE_PER_EVENT_ON);
+ pci_write_config_dword(pdev, ras_des_offset + DWC_PCIE_EVENT_CNT_CTL,
+ ctrl);
pci_read_config_dword(pdev, ras_des_offset + DWC_PCIE_EVENT_CNT_DATA, &val);
+ ctrl |= FIELD_PREP(DWC_PCIE_EVENT_CLEAR, DWC_PCIE_EVENT_PER_CLEAR);
+ pci_write_config_dword(pdev, ras_des_offset + DWC_PCIE_EVENT_CNT_CTL,
+ ctrl);
+
return val;
}
@@ -329,26 +358,77 @@ static void dwc_pcie_pmu_event_update(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
enum dwc_pcie_event_type type = DWC_PCIE_EVENT_TYPE(event);
- u64 delta, prev, now = 0;
+ u64 delta, prev, now;
+
+ if (type == DWC_PCIE_LANE_EVENT) {
+ now = dwc_pcie_pmu_read_lane_event_counter(event) &
+ DWC_PCIE_LANE_EVENT_MAX_PERIOD;
+ local64_add(now, &event->count);
+ return;
+ }
do {
prev = local64_read(&hwc->prev_count);
-
- if (type == DWC_PCIE_LANE_EVENT)
- now = dwc_pcie_pmu_read_lane_event_counter(event);
- else if (type == DWC_PCIE_TIME_BASE_EVENT)
- now = dwc_pcie_pmu_read_time_based_counter(event);
+ now = dwc_pcie_pmu_read_time_based_counter(event);
} while (local64_cmpxchg(&hwc->prev_count, prev, now) != prev);
delta = (now - prev) & DWC_PCIE_MAX_PERIOD;
- /* 32-bit counter for Lane Event Counting */
- if (type == DWC_PCIE_LANE_EVENT)
- delta &= DWC_PCIE_LANE_EVENT_MAX_PERIOD;
-
local64_add(delta, &event->count);
}
+static int dwc_pcie_pmu_validate_add_lane_event(struct perf_event *event,
+ unsigned long val_lane_events[])
+{
+ int event_id, event_nr, group;
+
+ event_id = DWC_PCIE_EVENT_ID(event);
+ event_nr = FIELD_GET(DWC_PCIE_CNT_EVENT_SEL_EVID, event_id);
+ group = FIELD_GET(DWC_PCIE_CNT_EVENT_SEL_GROUP, event_id);
+
+ if (group != DWC_PCIE_LANE_GROUP_6 && group != DWC_PCIE_LANE_GROUP_7)
+ return -EINVAL;
+
+ group -= DWC_PCIE_LANE_GROUP_6;
+
+ if (test_and_set_bit(group * DWC_PCIE_LANE_MAX_EVENTS_PER_GROUP + event_nr,
+ val_lane_events))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int dwc_pcie_pmu_validate_group(struct perf_event *event)
+{
+ struct perf_event *sibling, *leader = event->group_leader;
+ DECLARE_BITMAP(val_lane_events, 2 * DWC_PCIE_LANE_MAX_EVENTS_PER_GROUP);
+ bool time_event = false;
+ int type;
+
+ type = DWC_PCIE_EVENT_TYPE(leader);
+ if (type == DWC_PCIE_TIME_BASE_EVENT)
+ time_event = true;
+ else
+ if (dwc_pcie_pmu_validate_add_lane_event(leader, val_lane_events))
+ return -ENOSPC;
+
+ for_each_sibling_event(sibling, leader) {
+ type = DWC_PCIE_EVENT_TYPE(sibling);
+ if (type == DWC_PCIE_TIME_BASE_EVENT) {
+ if (time_event)
+ return -ENOSPC;
+
+ time_event = true;
+ continue;
+ }
+
+ if (dwc_pcie_pmu_validate_add_lane_event(sibling, val_lane_events))
+ return -ENOSPC;
+ }
+
+ return 0;
+}
+
static int dwc_pcie_pmu_event_init(struct perf_event *event)
{
struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu);
@@ -367,10 +447,6 @@ static int dwc_pcie_pmu_event_init(struct perf_event *event)
if (event->cpu < 0 || event->attach_state & PERF_ATTACH_TASK)
return -EINVAL;
- if (event->group_leader != event &&
- !is_software_event(event->group_leader))
- return -EINVAL;
-
for_each_sibling_event(sibling, event->group_leader) {
if (sibling->pmu != event->pmu && !is_software_event(sibling))
return -EINVAL;
@@ -385,6 +461,9 @@ static int dwc_pcie_pmu_event_init(struct perf_event *event)
return -EINVAL;
}
+ if (dwc_pcie_pmu_validate_group(event))
+ return -ENOSPC;
+
event->cpu = pcie_pmu->on_cpu;
return 0;
@@ -400,7 +479,7 @@ static void dwc_pcie_pmu_event_start(struct perf_event *event, int flags)
local64_set(&hwc->prev_count, 0);
if (type == DWC_PCIE_LANE_EVENT)
- dwc_pcie_pmu_lane_event_enable(pcie_pmu, true);
+ dwc_pcie_pmu_lane_event_enable(pcie_pmu, event, true);
else if (type == DWC_PCIE_TIME_BASE_EVENT)
dwc_pcie_pmu_time_based_event_enable(pcie_pmu, true);
}
@@ -414,12 +493,13 @@ static void dwc_pcie_pmu_event_stop(struct perf_event *event, int flags)
if (event->hw.state & PERF_HES_STOPPED)
return;
+ dwc_pcie_pmu_event_update(event);
+
if (type == DWC_PCIE_LANE_EVENT)
- dwc_pcie_pmu_lane_event_enable(pcie_pmu, false);
+ dwc_pcie_pmu_lane_event_enable(pcie_pmu, event, false);
else if (type == DWC_PCIE_TIME_BASE_EVENT)
dwc_pcie_pmu_time_based_event_enable(pcie_pmu, false);
- dwc_pcie_pmu_event_update(event);
hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
}
@@ -434,14 +514,17 @@ static int dwc_pcie_pmu_event_add(struct perf_event *event, int flags)
u16 ras_des_offset = pcie_pmu->ras_des_offset;
u32 ctrl;
- /* one counter for each type and it is in use */
- if (pcie_pmu->event[type])
- return -ENOSPC;
-
- pcie_pmu->event[type] = event;
hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
if (type == DWC_PCIE_LANE_EVENT) {
+ int event_nr = FIELD_GET(DWC_PCIE_CNT_EVENT_SEL_EVID, event_id);
+ int group = FIELD_GET(DWC_PCIE_CNT_EVENT_SEL_GROUP, event_id) -
+ DWC_PCIE_LANE_GROUP_6;
+
+ if (test_and_set_bit(group * DWC_PCIE_LANE_MAX_EVENTS_PER_GROUP + event_nr,
+ pcie_pmu->lane_events))
+ return -ENOSPC;
+
/* EVENT_COUNTER_DATA_REG needs clear manually */
ctrl = FIELD_PREP(DWC_PCIE_CNT_EVENT_SEL, event_id) |
FIELD_PREP(DWC_PCIE_CNT_LANE_SEL, lane) |
@@ -450,6 +533,11 @@ static int dwc_pcie_pmu_event_add(struct perf_event *event, int flags)
pci_write_config_dword(pdev, ras_des_offset + DWC_PCIE_EVENT_CNT_CTL,
ctrl);
} else if (type == DWC_PCIE_TIME_BASE_EVENT) {
+ if (pcie_pmu->time_based_event)
+ return -ENOSPC;
+
+ pcie_pmu->time_based_event = event;
+
/*
* TIME_BASED_ANAL_DATA_REG is a 64 bit register, we can safely
* use it with any manually controlled duration. And it is
@@ -478,7 +566,18 @@ static void dwc_pcie_pmu_event_del(struct perf_event *event, int flags)
dwc_pcie_pmu_event_stop(event, flags | PERF_EF_UPDATE);
perf_event_update_userpage(event);
- pcie_pmu->event[type] = NULL;
+
+ if (type == DWC_PCIE_TIME_BASE_EVENT) {
+ pcie_pmu->time_based_event = NULL;
+ } else {
+ int event_id = DWC_PCIE_EVENT_ID(event);
+ int event_nr = FIELD_GET(DWC_PCIE_CNT_EVENT_SEL_EVID, event_id);
+ int group = FIELD_GET(DWC_PCIE_CNT_EVENT_SEL_GROUP, event_id) -
+ DWC_PCIE_LANE_GROUP_6;
+
+ clear_bit(group * DWC_PCIE_LANE_MAX_EVENTS_PER_GROUP + event_nr,
+ pcie_pmu->lane_events);
+ }
}
static void dwc_pcie_pmu_remove_cpuhp_instance(void *hotplug_node)
diff --git a/drivers/perf/fsl_imx9_ddr_perf.c b/drivers/perf/fsl_imx9_ddr_perf.c
index 843f163e6c33..7050b48c0467 100644
--- a/drivers/perf/fsl_imx9_ddr_perf.c
+++ b/drivers/perf/fsl_imx9_ddr_perf.c
@@ -104,6 +104,11 @@ static const struct imx_ddr_devtype_data imx93_devtype_data = {
.filter_ver = DDR_PERF_AXI_FILTER_V1
};
+static const struct imx_ddr_devtype_data imx94_devtype_data = {
+ .identifier = "imx94",
+ .filter_ver = DDR_PERF_AXI_FILTER_V2
+};
+
static const struct imx_ddr_devtype_data imx95_devtype_data = {
.identifier = "imx95",
.filter_ver = DDR_PERF_AXI_FILTER_V2
@@ -122,6 +127,7 @@ static inline bool axi_filter_v2(struct ddr_pmu *pmu)
static const struct of_device_id imx_ddr_pmu_dt_ids[] = {
{ .compatible = "fsl,imx91-ddr-pmu", .data = &imx91_devtype_data },
{ .compatible = "fsl,imx93-ddr-pmu", .data = &imx93_devtype_data },
+ { .compatible = "fsl,imx94-ddr-pmu", .data = &imx94_devtype_data },
{ .compatible = "fsl,imx95-ddr-pmu", .data = &imx95_devtype_data },
{ /* sentinel */ }
};
@@ -461,9 +467,11 @@ static void imx93_ddr_perf_monitor_config(struct ddr_pmu *pmu, int event,
int counter, int axi_id, int axi_mask)
{
u32 pmcfg1, pmcfg2;
- u32 mask[] = { MX93_PMCFG1_RD_TRANS_FILT_EN,
- MX93_PMCFG1_WR_TRANS_FILT_EN,
- MX93_PMCFG1_RD_BT_FILT_EN };
+ static const u32 mask[] = {
+ MX93_PMCFG1_RD_TRANS_FILT_EN,
+ MX93_PMCFG1_WR_TRANS_FILT_EN,
+ MX93_PMCFG1_RD_BT_FILT_EN
+ };
pmcfg1 = readl_relaxed(pmu->base + PMCFG1);
diff --git a/drivers/perf/fujitsu_uncore_pmu.c b/drivers/perf/fujitsu_uncore_pmu.c
new file mode 100644
index 000000000000..c3c6f56474ad
--- /dev/null
+++ b/drivers/perf/fujitsu_uncore_pmu.c
@@ -0,0 +1,613 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Driver for the Uncore PMUs in Fujitsu chips.
+ *
+ * See Documentation/admin-guide/perf/fujitsu_uncore_pmu.rst for more details.
+ *
+ * Copyright (c) 2025 Fujitsu. All rights reserved.
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/list.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+
+/* Number of counters on each PMU */
+#define MAC_NUM_COUNTERS 8
+#define PCI_NUM_COUNTERS 8
+/* Mask for the event type field within perf_event_attr.config and EVTYPE reg */
+#define UNCORE_EVTYPE_MASK 0xFF
+
+/* Perfmon registers */
+#define PM_EVCNTR(__cntr) (0x000 + (__cntr) * 8)
+#define PM_CNTCTL(__cntr) (0x100 + (__cntr) * 8)
+#define PM_CNTCTL_RESET 0
+#define PM_EVTYPE(__cntr) (0x200 + (__cntr) * 8)
+#define PM_EVTYPE_EVSEL(__val) FIELD_GET(UNCORE_EVTYPE_MASK, __val)
+#define PM_CR 0x400
+#define PM_CR_RESET BIT(1)
+#define PM_CR_ENABLE BIT(0)
+#define PM_CNTENSET 0x410
+#define PM_CNTENSET_IDX(__cntr) BIT(__cntr)
+#define PM_CNTENCLR 0x418
+#define PM_CNTENCLR_IDX(__cntr) BIT(__cntr)
+#define PM_CNTENCLR_RESET 0xFF
+#define PM_INTENSET 0x420
+#define PM_INTENSET_IDX(__cntr) BIT(__cntr)
+#define PM_INTENCLR 0x428
+#define PM_INTENCLR_IDX(__cntr) BIT(__cntr)
+#define PM_INTENCLR_RESET 0xFF
+#define PM_OVSR 0x440
+#define PM_OVSR_OVSRCLR_RESET 0xFF
+
+enum fujitsu_uncore_pmu {
+ FUJITSU_UNCORE_PMU_MAC = 1,
+ FUJITSU_UNCORE_PMU_PCI = 2,
+};
+
+struct uncore_pmu {
+ int num_counters;
+ struct pmu pmu;
+ struct hlist_node node;
+ void __iomem *regs;
+ struct perf_event **events;
+ unsigned long *used_mask;
+ int cpu;
+ int irq;
+ struct device *dev;
+};
+
+#define to_uncore_pmu(p) (container_of(p, struct uncore_pmu, pmu))
+
+static int uncore_pmu_cpuhp_state;
+
+static void fujitsu_uncore_counter_start(struct perf_event *event)
+{
+ struct uncore_pmu *uncorepmu = to_uncore_pmu(event->pmu);
+ int idx = event->hw.idx;
+
+ /* Initialize the hardware counter and reset prev_count*/
+ local64_set(&event->hw.prev_count, 0);
+ writeq_relaxed(0, uncorepmu->regs + PM_EVCNTR(idx));
+
+ /* Set the event type */
+ writeq_relaxed(PM_EVTYPE_EVSEL(event->attr.config), uncorepmu->regs + PM_EVTYPE(idx));
+
+ /* Enable interrupt generation by this counter */
+ writeq_relaxed(PM_INTENSET_IDX(idx), uncorepmu->regs + PM_INTENSET);
+
+ /* Finally, enable the counter */
+ writeq_relaxed(PM_CNTCTL_RESET, uncorepmu->regs + PM_CNTCTL(idx));
+ writeq_relaxed(PM_CNTENSET_IDX(idx), uncorepmu->regs + PM_CNTENSET);
+}
+
+static void fujitsu_uncore_counter_stop(struct perf_event *event)
+{
+ struct uncore_pmu *uncorepmu = to_uncore_pmu(event->pmu);
+ int idx = event->hw.idx;
+
+ /* Disable the counter */
+ writeq_relaxed(PM_CNTENCLR_IDX(idx), uncorepmu->regs + PM_CNTENCLR);
+
+ /* Disable interrupt generation by this counter */
+ writeq_relaxed(PM_INTENCLR_IDX(idx), uncorepmu->regs + PM_INTENCLR);
+}
+
+static void fujitsu_uncore_counter_update(struct perf_event *event)
+{
+ struct uncore_pmu *uncorepmu = to_uncore_pmu(event->pmu);
+ int idx = event->hw.idx;
+ u64 prev, new;
+
+ do {
+ prev = local64_read(&event->hw.prev_count);
+ new = readq_relaxed(uncorepmu->regs + PM_EVCNTR(idx));
+ } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
+
+ local64_add(new - prev, &event->count);
+}
+
+static inline void fujitsu_uncore_init(struct uncore_pmu *uncorepmu)
+{
+ int i;
+
+ writeq_relaxed(PM_CR_RESET, uncorepmu->regs + PM_CR);
+
+ writeq_relaxed(PM_CNTENCLR_RESET, uncorepmu->regs + PM_CNTENCLR);
+ writeq_relaxed(PM_INTENCLR_RESET, uncorepmu->regs + PM_INTENCLR);
+ writeq_relaxed(PM_OVSR_OVSRCLR_RESET, uncorepmu->regs + PM_OVSR);
+
+ for (i = 0; i < uncorepmu->num_counters; ++i) {
+ writeq_relaxed(PM_CNTCTL_RESET, uncorepmu->regs + PM_CNTCTL(i));
+ writeq_relaxed(PM_EVTYPE_EVSEL(0), uncorepmu->regs + PM_EVTYPE(i));
+ }
+ writeq_relaxed(PM_CR_ENABLE, uncorepmu->regs + PM_CR);
+}
+
+static irqreturn_t fujitsu_uncore_handle_irq(int irq_num, void *data)
+{
+ struct uncore_pmu *uncorepmu = data;
+ /* Read the overflow status register */
+ long status = readq_relaxed(uncorepmu->regs + PM_OVSR);
+ int idx;
+
+ if (status == 0)
+ return IRQ_NONE;
+
+ /* Clear the bits we read on the overflow status register */
+ writeq_relaxed(status, uncorepmu->regs + PM_OVSR);
+
+ for_each_set_bit(idx, &status, uncorepmu->num_counters) {
+ struct perf_event *event;
+
+ event = uncorepmu->events[idx];
+ if (!event)
+ continue;
+
+ fujitsu_uncore_counter_update(event);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static void fujitsu_uncore_pmu_enable(struct pmu *pmu)
+{
+ writeq_relaxed(PM_CR_ENABLE, to_uncore_pmu(pmu)->regs + PM_CR);
+}
+
+static void fujitsu_uncore_pmu_disable(struct pmu *pmu)
+{
+ writeq_relaxed(0, to_uncore_pmu(pmu)->regs + PM_CR);
+}
+
+static bool fujitsu_uncore_validate_event_group(struct perf_event *event)
+{
+ struct uncore_pmu *uncorepmu = to_uncore_pmu(event->pmu);
+ struct perf_event *leader = event->group_leader;
+ struct perf_event *sibling;
+ int counters = 1;
+
+ if (leader == event)
+ return true;
+
+ if (leader->pmu == event->pmu)
+ counters++;
+
+ for_each_sibling_event(sibling, leader) {
+ if (sibling->pmu == event->pmu)
+ counters++;
+ }
+
+ /*
+ * If the group requires more counters than the HW has, it
+ * cannot ever be scheduled.
+ */
+ return counters <= uncorepmu->num_counters;
+}
+
+static int fujitsu_uncore_event_init(struct perf_event *event)
+{
+ struct uncore_pmu *uncorepmu = to_uncore_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+
+ /* Is the event for this PMU? */
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /*
+ * Sampling not supported since these events are not
+ * core-attributable.
+ */
+ if (is_sampling_event(event))
+ return -EINVAL;
+
+ /*
+ * Task mode not available, we run the counters as socket counters,
+ * not attributable to any CPU and therefore cannot attribute per-task.
+ */
+ if (event->cpu < 0)
+ return -EINVAL;
+
+ /* Validate the group */
+ if (!fujitsu_uncore_validate_event_group(event))
+ return -EINVAL;
+
+ hwc->idx = -1;
+
+ event->cpu = uncorepmu->cpu;
+
+ return 0;
+}
+
+static void fujitsu_uncore_event_start(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ hwc->state = 0;
+ fujitsu_uncore_counter_start(event);
+}
+
+static void fujitsu_uncore_event_stop(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (hwc->state & PERF_HES_STOPPED)
+ return;
+
+ fujitsu_uncore_counter_stop(event);
+ if (flags & PERF_EF_UPDATE)
+ fujitsu_uncore_counter_update(event);
+ hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+}
+
+static int fujitsu_uncore_event_add(struct perf_event *event, int flags)
+{
+ struct uncore_pmu *uncorepmu = to_uncore_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int idx;
+
+ /* Try to allocate a counter. */
+ idx = bitmap_find_free_region(uncorepmu->used_mask, uncorepmu->num_counters, 0);
+ if (idx < 0)
+ /* The counters are all in use. */
+ return -EAGAIN;
+
+ hwc->idx = idx;
+ hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+ uncorepmu->events[idx] = event;
+
+ if (flags & PERF_EF_START)
+ fujitsu_uncore_event_start(event, 0);
+
+ /* Propagate changes to the userspace mapping. */
+ perf_event_update_userpage(event);
+
+ return 0;
+}
+
+static void fujitsu_uncore_event_del(struct perf_event *event, int flags)
+{
+ struct uncore_pmu *uncorepmu = to_uncore_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+
+ /* Stop and clean up */
+ fujitsu_uncore_event_stop(event, flags | PERF_EF_UPDATE);
+ uncorepmu->events[hwc->idx] = NULL;
+ bitmap_release_region(uncorepmu->used_mask, hwc->idx, 0);
+
+ /* Propagate changes to the userspace mapping. */
+ perf_event_update_userpage(event);
+}
+
+static void fujitsu_uncore_event_read(struct perf_event *event)
+{
+ fujitsu_uncore_counter_update(event);
+}
+
+#define UNCORE_PMU_FORMAT_ATTR(_name, _config) \
+ (&((struct dev_ext_attribute[]) { \
+ { .attr = __ATTR(_name, 0444, device_show_string, NULL), \
+ .var = (void *)_config, } \
+ })[0].attr.attr)
+
+static struct attribute *fujitsu_uncore_pmu_formats[] = {
+ UNCORE_PMU_FORMAT_ATTR(event, "config:0-7"),
+ NULL
+};
+
+static const struct attribute_group fujitsu_uncore_pmu_format_group = {
+ .name = "format",
+ .attrs = fujitsu_uncore_pmu_formats,
+};
+
+static ssize_t fujitsu_uncore_pmu_event_show(struct device *dev,
+ struct device_attribute *attr, char *page)
+{
+ struct perf_pmu_events_attr *pmu_attr;
+
+ pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+ return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id);
+}
+
+#define MAC_EVENT_ATTR(_name, _id) \
+ PMU_EVENT_ATTR_ID(_name, fujitsu_uncore_pmu_event_show, _id)
+
+static struct attribute *fujitsu_uncore_mac_pmu_events[] = {
+ MAC_EVENT_ATTR(cycles, 0x00),
+ MAC_EVENT_ATTR(read-count, 0x10),
+ MAC_EVENT_ATTR(read-count-request, 0x11),
+ MAC_EVENT_ATTR(read-count-return, 0x12),
+ MAC_EVENT_ATTR(read-count-request-pftgt, 0x13),
+ MAC_EVENT_ATTR(read-count-request-normal, 0x14),
+ MAC_EVENT_ATTR(read-count-return-pftgt-hit, 0x15),
+ MAC_EVENT_ATTR(read-count-return-pftgt-miss, 0x16),
+ MAC_EVENT_ATTR(read-wait, 0x17),
+ MAC_EVENT_ATTR(write-count, 0x20),
+ MAC_EVENT_ATTR(write-count-write, 0x21),
+ MAC_EVENT_ATTR(write-count-pwrite, 0x22),
+ MAC_EVENT_ATTR(memory-read-count, 0x40),
+ MAC_EVENT_ATTR(memory-write-count, 0x50),
+ MAC_EVENT_ATTR(memory-pwrite-count, 0x60),
+ MAC_EVENT_ATTR(ea-mac, 0x80),
+ MAC_EVENT_ATTR(ea-memory, 0x90),
+ MAC_EVENT_ATTR(ea-memory-mac-write, 0x92),
+ MAC_EVENT_ATTR(ea-ha, 0xa0),
+ NULL
+};
+
+#define PCI_EVENT_ATTR(_name, _id) \
+ PMU_EVENT_ATTR_ID(_name, fujitsu_uncore_pmu_event_show, _id)
+
+static struct attribute *fujitsu_uncore_pci_pmu_events[] = {
+ PCI_EVENT_ATTR(pci-port0-cycles, 0x00),
+ PCI_EVENT_ATTR(pci-port0-read-count, 0x10),
+ PCI_EVENT_ATTR(pci-port0-read-count-bus, 0x14),
+ PCI_EVENT_ATTR(pci-port0-write-count, 0x20),
+ PCI_EVENT_ATTR(pci-port0-write-count-bus, 0x24),
+ PCI_EVENT_ATTR(pci-port1-cycles, 0x40),
+ PCI_EVENT_ATTR(pci-port1-read-count, 0x50),
+ PCI_EVENT_ATTR(pci-port1-read-count-bus, 0x54),
+ PCI_EVENT_ATTR(pci-port1-write-count, 0x60),
+ PCI_EVENT_ATTR(pci-port1-write-count-bus, 0x64),
+ PCI_EVENT_ATTR(ea-pci, 0x80),
+ NULL
+};
+
+static const struct attribute_group fujitsu_uncore_mac_pmu_events_group = {
+ .name = "events",
+ .attrs = fujitsu_uncore_mac_pmu_events,
+};
+
+static const struct attribute_group fujitsu_uncore_pci_pmu_events_group = {
+ .name = "events",
+ .attrs = fujitsu_uncore_pci_pmu_events,
+};
+
+static ssize_t cpumask_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct uncore_pmu *uncorepmu = to_uncore_pmu(dev_get_drvdata(dev));
+
+ return cpumap_print_to_pagebuf(true, buf, cpumask_of(uncorepmu->cpu));
+}
+static DEVICE_ATTR_RO(cpumask);
+
+static struct attribute *fujitsu_uncore_pmu_cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL
+};
+
+static const struct attribute_group fujitsu_uncore_pmu_cpumask_attr_group = {
+ .attrs = fujitsu_uncore_pmu_cpumask_attrs,
+};
+
+static const struct attribute_group *fujitsu_uncore_mac_pmu_attr_grps[] = {
+ &fujitsu_uncore_pmu_format_group,
+ &fujitsu_uncore_mac_pmu_events_group,
+ &fujitsu_uncore_pmu_cpumask_attr_group,
+ NULL
+};
+
+static const struct attribute_group *fujitsu_uncore_pci_pmu_attr_grps[] = {
+ &fujitsu_uncore_pmu_format_group,
+ &fujitsu_uncore_pci_pmu_events_group,
+ &fujitsu_uncore_pmu_cpumask_attr_group,
+ NULL
+};
+
+static void fujitsu_uncore_pmu_migrate(struct uncore_pmu *uncorepmu, unsigned int cpu)
+{
+ perf_pmu_migrate_context(&uncorepmu->pmu, uncorepmu->cpu, cpu);
+ irq_set_affinity(uncorepmu->irq, cpumask_of(cpu));
+ uncorepmu->cpu = cpu;
+}
+
+static int fujitsu_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *cpuhp_node)
+{
+ struct uncore_pmu *uncorepmu;
+ int node;
+
+ uncorepmu = hlist_entry_safe(cpuhp_node, struct uncore_pmu, node);
+ node = dev_to_node(uncorepmu->dev);
+ if (cpu_to_node(uncorepmu->cpu) != node && cpu_to_node(cpu) == node)
+ fujitsu_uncore_pmu_migrate(uncorepmu, cpu);
+
+ return 0;
+}
+
+static int fujitsu_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *cpuhp_node)
+{
+ struct uncore_pmu *uncorepmu;
+ unsigned int target;
+ int node;
+
+ uncorepmu = hlist_entry_safe(cpuhp_node, struct uncore_pmu, node);
+ if (cpu != uncorepmu->cpu)
+ return 0;
+
+ node = dev_to_node(uncorepmu->dev);
+ target = cpumask_any_and_but(cpumask_of_node(node), cpu_online_mask, cpu);
+ if (target >= nr_cpu_ids)
+ target = cpumask_any_but(cpu_online_mask, cpu);
+
+ if (target < nr_cpu_ids)
+ fujitsu_uncore_pmu_migrate(uncorepmu, target);
+
+ return 0;
+}
+
+static int fujitsu_uncore_pmu_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ unsigned long device_type = (unsigned long)device_get_match_data(dev);
+ const struct attribute_group **attr_groups;
+ struct uncore_pmu *uncorepmu;
+ struct resource *memrc;
+ size_t alloc_size;
+ char *name;
+ int ret;
+ int irq;
+ u64 uid;
+
+ ret = acpi_dev_uid_to_integer(ACPI_COMPANION(dev), &uid);
+ if (ret)
+ return dev_err_probe(dev, ret, "unable to read ACPI uid\n");
+
+ uncorepmu = devm_kzalloc(dev, sizeof(*uncorepmu), GFP_KERNEL);
+ if (!uncorepmu)
+ return -ENOMEM;
+ uncorepmu->dev = dev;
+ uncorepmu->cpu = cpumask_local_spread(0, dev_to_node(dev));
+ platform_set_drvdata(pdev, uncorepmu);
+
+ switch (device_type) {
+ case FUJITSU_UNCORE_PMU_MAC:
+ uncorepmu->num_counters = MAC_NUM_COUNTERS;
+ attr_groups = fujitsu_uncore_mac_pmu_attr_grps;
+ name = devm_kasprintf(dev, GFP_KERNEL, "mac_iod%llu_mac%llu_ch%llu",
+ (uid >> 8) & 0xF, (uid >> 4) & 0xF, uid & 0xF);
+ break;
+ case FUJITSU_UNCORE_PMU_PCI:
+ uncorepmu->num_counters = PCI_NUM_COUNTERS;
+ attr_groups = fujitsu_uncore_pci_pmu_attr_grps;
+ name = devm_kasprintf(dev, GFP_KERNEL, "pci_iod%llu_pci%llu",
+ (uid >> 4) & 0xF, uid & 0xF);
+ break;
+ default:
+ return dev_err_probe(dev, -EINVAL, "illegal device type: %lu\n", device_type);
+ }
+ if (!name)
+ return -ENOMEM;
+
+ uncorepmu->pmu = (struct pmu) {
+ .parent = dev,
+ .task_ctx_nr = perf_invalid_context,
+
+ .attr_groups = attr_groups,
+
+ .pmu_enable = fujitsu_uncore_pmu_enable,
+ .pmu_disable = fujitsu_uncore_pmu_disable,
+ .event_init = fujitsu_uncore_event_init,
+ .add = fujitsu_uncore_event_add,
+ .del = fujitsu_uncore_event_del,
+ .start = fujitsu_uncore_event_start,
+ .stop = fujitsu_uncore_event_stop,
+ .read = fujitsu_uncore_event_read,
+
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
+ };
+
+ alloc_size = sizeof(uncorepmu->events[0]) * uncorepmu->num_counters;
+ uncorepmu->events = devm_kzalloc(dev, alloc_size, GFP_KERNEL);
+ if (!uncorepmu->events)
+ return -ENOMEM;
+
+ alloc_size = sizeof(uncorepmu->used_mask[0]) * BITS_TO_LONGS(uncorepmu->num_counters);
+ uncorepmu->used_mask = devm_kzalloc(dev, alloc_size, GFP_KERNEL);
+ if (!uncorepmu->used_mask)
+ return -ENOMEM;
+
+ uncorepmu->regs = devm_platform_get_and_ioremap_resource(pdev, 0, &memrc);
+ if (IS_ERR(uncorepmu->regs))
+ return PTR_ERR(uncorepmu->regs);
+
+ fujitsu_uncore_init(uncorepmu);
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return irq;
+
+ ret = devm_request_irq(dev, irq, fujitsu_uncore_handle_irq,
+ IRQF_NOBALANCING | IRQF_NO_THREAD,
+ name, uncorepmu);
+ if (ret)
+ return dev_err_probe(dev, ret, "Failed to request IRQ:%d\n", irq);
+
+ ret = irq_set_affinity(irq, cpumask_of(uncorepmu->cpu));
+ if (ret)
+ return dev_err_probe(dev, ret, "Failed to set irq affinity:%d\n", irq);
+
+ uncorepmu->irq = irq;
+
+ /* Add this instance to the list used by the offline callback */
+ ret = cpuhp_state_add_instance(uncore_pmu_cpuhp_state, &uncorepmu->node);
+ if (ret)
+ return dev_err_probe(dev, ret, "Error registering hotplug");
+
+ ret = perf_pmu_register(&uncorepmu->pmu, name, -1);
+ if (ret < 0) {
+ cpuhp_state_remove_instance_nocalls(uncore_pmu_cpuhp_state, &uncorepmu->node);
+ return dev_err_probe(dev, ret, "Failed to register %s PMU\n", name);
+ }
+
+ dev_dbg(dev, "Registered %s, type: %d\n", name, uncorepmu->pmu.type);
+
+ return 0;
+}
+
+static void fujitsu_uncore_pmu_remove(struct platform_device *pdev)
+{
+ struct uncore_pmu *uncorepmu = platform_get_drvdata(pdev);
+
+ writeq_relaxed(0, uncorepmu->regs + PM_CR);
+
+ perf_pmu_unregister(&uncorepmu->pmu);
+ cpuhp_state_remove_instance_nocalls(uncore_pmu_cpuhp_state, &uncorepmu->node);
+}
+
+static const struct acpi_device_id fujitsu_uncore_pmu_acpi_match[] = {
+ { "FUJI200C", FUJITSU_UNCORE_PMU_MAC },
+ { "FUJI200D", FUJITSU_UNCORE_PMU_PCI },
+ { }
+};
+MODULE_DEVICE_TABLE(acpi, fujitsu_uncore_pmu_acpi_match);
+
+static struct platform_driver fujitsu_uncore_pmu_driver = {
+ .driver = {
+ .name = "fujitsu-uncore-pmu",
+ .acpi_match_table = fujitsu_uncore_pmu_acpi_match,
+ .suppress_bind_attrs = true,
+ },
+ .probe = fujitsu_uncore_pmu_probe,
+ .remove = fujitsu_uncore_pmu_remove,
+};
+
+static int __init fujitsu_uncore_pmu_init(void)
+{
+ int ret;
+
+ /* Install a hook to update the reader CPU in case it goes offline */
+ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+ "perf/fujitsu/uncore:online",
+ fujitsu_uncore_pmu_online_cpu,
+ fujitsu_uncore_pmu_offline_cpu);
+ if (ret < 0)
+ return ret;
+
+ uncore_pmu_cpuhp_state = ret;
+
+ ret = platform_driver_register(&fujitsu_uncore_pmu_driver);
+ if (ret)
+ cpuhp_remove_multi_state(uncore_pmu_cpuhp_state);
+
+ return ret;
+}
+
+static void __exit fujitsu_uncore_pmu_exit(void)
+{
+ platform_driver_unregister(&fujitsu_uncore_pmu_driver);
+ cpuhp_remove_multi_state(uncore_pmu_cpuhp_state);
+}
+
+module_init(fujitsu_uncore_pmu_init);
+module_exit(fujitsu_uncore_pmu_exit);
+
+MODULE_AUTHOR("Koichi Okuno <fj2767dz@fujitsu.com>");
+MODULE_DESCRIPTION("Fujitsu Uncore PMU driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/perf/hisilicon/Makefile b/drivers/perf/hisilicon/Makefile
index 48dcc8381ea7..186be3d02238 100644
--- a/drivers/perf/hisilicon/Makefile
+++ b/drivers/perf/hisilicon/Makefile
@@ -1,7 +1,8 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o \
hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o hisi_uncore_sllc_pmu.o \
- hisi_uncore_pa_pmu.o hisi_uncore_cpa_pmu.o hisi_uncore_uc_pmu.o
+ hisi_uncore_pa_pmu.o hisi_uncore_cpa_pmu.o hisi_uncore_uc_pmu.o \
+ hisi_uncore_noc_pmu.o hisi_uncore_mn_pmu.o
obj-$(CONFIG_HISI_PCIE_PMU) += hisi_pcie_pmu.o
obj-$(CONFIG_HNS3_PMU) += hns3_pmu.o
diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
index 7e490f8868f2..21c494881ca0 100644
--- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
@@ -43,12 +43,21 @@
#define DDRC_V2_EVENT_TYPE 0xe74
#define DDRC_V2_PERF_CTRL 0xeA0
+/* DDRC interrupt registers definition in v3 */
+#define DDRC_V3_INT_MASK 0x534
+#define DDRC_V3_INT_STATUS 0x538
+#define DDRC_V3_INT_CLEAR 0x53C
+
/* DDRC has 8-counters */
#define DDRC_NR_COUNTERS 0x8
#define DDRC_V1_PERF_CTRL_EN 0x2
#define DDRC_V2_PERF_CTRL_EN 0x1
#define DDRC_V1_NR_EVENTS 0x7
-#define DDRC_V2_NR_EVENTS 0x90
+#define DDRC_V2_NR_EVENTS 0xFF
+
+#define DDRC_EVENT_CNTn(base, n) ((base) + (n) * 8)
+#define DDRC_EVENT_TYPEn(base, n) ((base) + (n) * 4)
+#define DDRC_UNIMPLEMENTED_REG GENMASK(31, 0)
/*
* For PMU v1, there are eight-events and every event has been mapped
@@ -63,47 +72,37 @@ static const u32 ddrc_reg_off[] = {
DDRC_PRE_CMD, DDRC_ACT_CMD, DDRC_RNK_CHG, DDRC_RW_CHG
};
-/*
- * Select the counter register offset using the counter index.
- * In PMU v1, there are no programmable counter, the count
- * is read form the statistics counter register itself.
- */
-static u32 hisi_ddrc_pmu_v1_get_counter_offset(int cntr_idx)
-{
- return ddrc_reg_off[cntr_idx];
-}
-
-static u32 hisi_ddrc_pmu_v2_get_counter_offset(int cntr_idx)
-{
- return DDRC_V2_EVENT_CNT + cntr_idx * 8;
-}
+struct hisi_ddrc_pmu_regs {
+ u32 event_cnt;
+ u32 event_ctrl;
+ u32 event_type;
+ u32 perf_ctrl;
+ u32 perf_ctrl_en;
+ u32 int_mask;
+ u32 int_clear;
+ u32 int_status;
+};
-static u64 hisi_ddrc_pmu_v1_read_counter(struct hisi_pmu *ddrc_pmu,
+static u64 hisi_ddrc_pmu_read_counter(struct hisi_pmu *ddrc_pmu,
struct hw_perf_event *hwc)
{
- return readl(ddrc_pmu->base +
- hisi_ddrc_pmu_v1_get_counter_offset(hwc->idx));
-}
+ struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private;
-static void hisi_ddrc_pmu_v1_write_counter(struct hisi_pmu *ddrc_pmu,
- struct hw_perf_event *hwc, u64 val)
-{
- writel((u32)val,
- ddrc_pmu->base + hisi_ddrc_pmu_v1_get_counter_offset(hwc->idx));
-}
+ if (regs->event_cnt == DDRC_UNIMPLEMENTED_REG)
+ return readl(ddrc_pmu->base + ddrc_reg_off[hwc->idx]);
-static u64 hisi_ddrc_pmu_v2_read_counter(struct hisi_pmu *ddrc_pmu,
- struct hw_perf_event *hwc)
-{
- return readq(ddrc_pmu->base +
- hisi_ddrc_pmu_v2_get_counter_offset(hwc->idx));
+ return readq(ddrc_pmu->base + DDRC_EVENT_CNTn(regs->event_cnt, hwc->idx));
}
-static void hisi_ddrc_pmu_v2_write_counter(struct hisi_pmu *ddrc_pmu,
- struct hw_perf_event *hwc, u64 val)
+static void hisi_ddrc_pmu_write_counter(struct hisi_pmu *ddrc_pmu,
+ struct hw_perf_event *hwc, u64 val)
{
- writeq(val,
- ddrc_pmu->base + hisi_ddrc_pmu_v2_get_counter_offset(hwc->idx));
+ struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private;
+
+ if (regs->event_cnt == DDRC_UNIMPLEMENTED_REG)
+ writel((u32)val, ddrc_pmu->base + ddrc_reg_off[hwc->idx]);
+ else
+ writeq(val, ddrc_pmu->base + DDRC_EVENT_CNTn(regs->event_cnt, hwc->idx));
}
/*
@@ -114,54 +113,12 @@ static void hisi_ddrc_pmu_v2_write_counter(struct hisi_pmu *ddrc_pmu,
static void hisi_ddrc_pmu_write_evtype(struct hisi_pmu *ddrc_pmu, int idx,
u32 type)
{
- u32 offset;
-
- if (ddrc_pmu->identifier >= HISI_PMU_V2) {
- offset = DDRC_V2_EVENT_TYPE + 4 * idx;
- writel(type, ddrc_pmu->base + offset);
- }
-}
-
-static void hisi_ddrc_pmu_v1_start_counters(struct hisi_pmu *ddrc_pmu)
-{
- u32 val;
-
- /* Set perf_enable in DDRC_PERF_CTRL to start event counting */
- val = readl(ddrc_pmu->base + DDRC_PERF_CTRL);
- val |= DDRC_V1_PERF_CTRL_EN;
- writel(val, ddrc_pmu->base + DDRC_PERF_CTRL);
-}
-
-static void hisi_ddrc_pmu_v1_stop_counters(struct hisi_pmu *ddrc_pmu)
-{
- u32 val;
-
- /* Clear perf_enable in DDRC_PERF_CTRL to stop event counting */
- val = readl(ddrc_pmu->base + DDRC_PERF_CTRL);
- val &= ~DDRC_V1_PERF_CTRL_EN;
- writel(val, ddrc_pmu->base + DDRC_PERF_CTRL);
-}
-
-static void hisi_ddrc_pmu_v1_enable_counter(struct hisi_pmu *ddrc_pmu,
- struct hw_perf_event *hwc)
-{
- u32 val;
+ struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private;
- /* Set counter index(event code) in DDRC_EVENT_CTRL register */
- val = readl(ddrc_pmu->base + DDRC_EVENT_CTRL);
- val |= (1 << GET_DDRC_EVENTID(hwc));
- writel(val, ddrc_pmu->base + DDRC_EVENT_CTRL);
-}
+ if (regs->event_type == DDRC_UNIMPLEMENTED_REG)
+ return;
-static void hisi_ddrc_pmu_v1_disable_counter(struct hisi_pmu *ddrc_pmu,
- struct hw_perf_event *hwc)
-{
- u32 val;
-
- /* Clear counter index(event code) in DDRC_EVENT_CTRL register */
- val = readl(ddrc_pmu->base + DDRC_EVENT_CTRL);
- val &= ~(1 << GET_DDRC_EVENTID(hwc));
- writel(val, ddrc_pmu->base + DDRC_EVENT_CTRL);
+ writel(type, ddrc_pmu->base + DDRC_EVENT_TYPEn(regs->event_type, idx));
}
static int hisi_ddrc_pmu_v1_get_event_idx(struct perf_event *event)
@@ -180,120 +137,96 @@ static int hisi_ddrc_pmu_v1_get_event_idx(struct perf_event *event)
return idx;
}
-static int hisi_ddrc_pmu_v2_get_event_idx(struct perf_event *event)
+static int hisi_ddrc_pmu_get_event_idx(struct perf_event *event)
{
+ struct hisi_pmu *ddrc_pmu = to_hisi_pmu(event->pmu);
+ struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private;
+
+ if (regs->event_type == DDRC_UNIMPLEMENTED_REG)
+ return hisi_ddrc_pmu_v1_get_event_idx(event);
+
return hisi_uncore_pmu_get_event_idx(event);
}
-static void hisi_ddrc_pmu_v2_start_counters(struct hisi_pmu *ddrc_pmu)
+static void hisi_ddrc_pmu_start_counters(struct hisi_pmu *ddrc_pmu)
{
+ struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private;
u32 val;
- val = readl(ddrc_pmu->base + DDRC_V2_PERF_CTRL);
- val |= DDRC_V2_PERF_CTRL_EN;
- writel(val, ddrc_pmu->base + DDRC_V2_PERF_CTRL);
+ val = readl(ddrc_pmu->base + regs->perf_ctrl);
+ val |= regs->perf_ctrl_en;
+ writel(val, ddrc_pmu->base + regs->perf_ctrl);
}
-static void hisi_ddrc_pmu_v2_stop_counters(struct hisi_pmu *ddrc_pmu)
+static void hisi_ddrc_pmu_stop_counters(struct hisi_pmu *ddrc_pmu)
{
+ struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private;
u32 val;
- val = readl(ddrc_pmu->base + DDRC_V2_PERF_CTRL);
- val &= ~DDRC_V2_PERF_CTRL_EN;
- writel(val, ddrc_pmu->base + DDRC_V2_PERF_CTRL);
+ val = readl(ddrc_pmu->base + regs->perf_ctrl);
+ val &= ~regs->perf_ctrl_en;
+ writel(val, ddrc_pmu->base + regs->perf_ctrl);
}
-static void hisi_ddrc_pmu_v2_enable_counter(struct hisi_pmu *ddrc_pmu,
+static void hisi_ddrc_pmu_enable_counter(struct hisi_pmu *ddrc_pmu,
struct hw_perf_event *hwc)
{
+ struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private;
u32 val;
- val = readl(ddrc_pmu->base + DDRC_V2_EVENT_CTRL);
- val |= 1 << hwc->idx;
- writel(val, ddrc_pmu->base + DDRC_V2_EVENT_CTRL);
+ val = readl(ddrc_pmu->base + regs->event_ctrl);
+ val |= BIT_ULL(hwc->idx);
+ writel(val, ddrc_pmu->base + regs->event_ctrl);
}
-static void hisi_ddrc_pmu_v2_disable_counter(struct hisi_pmu *ddrc_pmu,
+static void hisi_ddrc_pmu_disable_counter(struct hisi_pmu *ddrc_pmu,
struct hw_perf_event *hwc)
{
+ struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private;
u32 val;
- val = readl(ddrc_pmu->base + DDRC_V2_EVENT_CTRL);
- val &= ~(1 << hwc->idx);
- writel(val, ddrc_pmu->base + DDRC_V2_EVENT_CTRL);
+ val = readl(ddrc_pmu->base + regs->event_ctrl);
+ val &= ~BIT_ULL(hwc->idx);
+ writel(val, ddrc_pmu->base + regs->event_ctrl);
}
-static void hisi_ddrc_pmu_v1_enable_counter_int(struct hisi_pmu *ddrc_pmu,
- struct hw_perf_event *hwc)
-{
- u32 val;
-
- /* Write 0 to enable interrupt */
- val = readl(ddrc_pmu->base + DDRC_INT_MASK);
- val &= ~(1 << hwc->idx);
- writel(val, ddrc_pmu->base + DDRC_INT_MASK);
-}
-
-static void hisi_ddrc_pmu_v1_disable_counter_int(struct hisi_pmu *ddrc_pmu,
- struct hw_perf_event *hwc)
-{
- u32 val;
-
- /* Write 1 to mask interrupt */
- val = readl(ddrc_pmu->base + DDRC_INT_MASK);
- val |= 1 << hwc->idx;
- writel(val, ddrc_pmu->base + DDRC_INT_MASK);
-}
-
-static void hisi_ddrc_pmu_v2_enable_counter_int(struct hisi_pmu *ddrc_pmu,
- struct hw_perf_event *hwc)
+static void hisi_ddrc_pmu_enable_counter_int(struct hisi_pmu *ddrc_pmu,
+ struct hw_perf_event *hwc)
{
+ struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private;
u32 val;
- val = readl(ddrc_pmu->base + DDRC_V2_INT_MASK);
- val &= ~(1 << hwc->idx);
- writel(val, ddrc_pmu->base + DDRC_V2_INT_MASK);
+ val = readl(ddrc_pmu->base + regs->int_mask);
+ val &= ~BIT_ULL(hwc->idx);
+ writel(val, ddrc_pmu->base + regs->int_mask);
}
-static void hisi_ddrc_pmu_v2_disable_counter_int(struct hisi_pmu *ddrc_pmu,
- struct hw_perf_event *hwc)
+static void hisi_ddrc_pmu_disable_counter_int(struct hisi_pmu *ddrc_pmu,
+ struct hw_perf_event *hwc)
{
+ struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private;
u32 val;
- val = readl(ddrc_pmu->base + DDRC_V2_INT_MASK);
- val |= 1 << hwc->idx;
- writel(val, ddrc_pmu->base + DDRC_V2_INT_MASK);
+ val = readl(ddrc_pmu->base + regs->int_mask);
+ val |= BIT_ULL(hwc->idx);
+ writel(val, ddrc_pmu->base + regs->int_mask);
}
-static u32 hisi_ddrc_pmu_v1_get_int_status(struct hisi_pmu *ddrc_pmu)
+static u32 hisi_ddrc_pmu_get_int_status(struct hisi_pmu *ddrc_pmu)
{
- return readl(ddrc_pmu->base + DDRC_INT_STATUS);
-}
+ struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private;
-static void hisi_ddrc_pmu_v1_clear_int_status(struct hisi_pmu *ddrc_pmu,
- int idx)
-{
- writel(1 << idx, ddrc_pmu->base + DDRC_INT_CLEAR);
+ return readl(ddrc_pmu->base + regs->int_status);
}
-static u32 hisi_ddrc_pmu_v2_get_int_status(struct hisi_pmu *ddrc_pmu)
+static void hisi_ddrc_pmu_clear_int_status(struct hisi_pmu *ddrc_pmu,
+ int idx)
{
- return readl(ddrc_pmu->base + DDRC_V2_INT_STATUS);
-}
+ struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private;
-static void hisi_ddrc_pmu_v2_clear_int_status(struct hisi_pmu *ddrc_pmu,
- int idx)
-{
- writel(1 << idx, ddrc_pmu->base + DDRC_V2_INT_CLEAR);
+ writel(1 << idx, ddrc_pmu->base + regs->int_clear);
}
-static const struct acpi_device_id hisi_ddrc_pmu_acpi_match[] = {
- { "HISI0233", },
- { "HISI0234", },
- {}
-};
-MODULE_DEVICE_TABLE(acpi, hisi_ddrc_pmu_acpi_match);
-
static int hisi_ddrc_pmu_init_data(struct platform_device *pdev,
struct hisi_pmu *ddrc_pmu)
{
@@ -314,6 +247,10 @@ static int hisi_ddrc_pmu_init_data(struct platform_device *pdev,
return -EINVAL;
}
+ ddrc_pmu->dev_info = device_get_match_data(&pdev->dev);
+ if (!ddrc_pmu->dev_info)
+ return -ENODEV;
+
ddrc_pmu->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(ddrc_pmu->base)) {
dev_err(&pdev->dev, "ioremap failed for ddrc_pmu resource\n");
@@ -396,34 +333,19 @@ static const struct attribute_group *hisi_ddrc_pmu_v2_attr_groups[] = {
NULL
};
-static const struct hisi_uncore_ops hisi_uncore_ddrc_v1_ops = {
- .write_evtype = hisi_ddrc_pmu_write_evtype,
- .get_event_idx = hisi_ddrc_pmu_v1_get_event_idx,
- .start_counters = hisi_ddrc_pmu_v1_start_counters,
- .stop_counters = hisi_ddrc_pmu_v1_stop_counters,
- .enable_counter = hisi_ddrc_pmu_v1_enable_counter,
- .disable_counter = hisi_ddrc_pmu_v1_disable_counter,
- .enable_counter_int = hisi_ddrc_pmu_v1_enable_counter_int,
- .disable_counter_int = hisi_ddrc_pmu_v1_disable_counter_int,
- .write_counter = hisi_ddrc_pmu_v1_write_counter,
- .read_counter = hisi_ddrc_pmu_v1_read_counter,
- .get_int_status = hisi_ddrc_pmu_v1_get_int_status,
- .clear_int_status = hisi_ddrc_pmu_v1_clear_int_status,
-};
-
-static const struct hisi_uncore_ops hisi_uncore_ddrc_v2_ops = {
+static const struct hisi_uncore_ops hisi_uncore_ddrc_ops = {
.write_evtype = hisi_ddrc_pmu_write_evtype,
- .get_event_idx = hisi_ddrc_pmu_v2_get_event_idx,
- .start_counters = hisi_ddrc_pmu_v2_start_counters,
- .stop_counters = hisi_ddrc_pmu_v2_stop_counters,
- .enable_counter = hisi_ddrc_pmu_v2_enable_counter,
- .disable_counter = hisi_ddrc_pmu_v2_disable_counter,
- .enable_counter_int = hisi_ddrc_pmu_v2_enable_counter_int,
- .disable_counter_int = hisi_ddrc_pmu_v2_disable_counter_int,
- .write_counter = hisi_ddrc_pmu_v2_write_counter,
- .read_counter = hisi_ddrc_pmu_v2_read_counter,
- .get_int_status = hisi_ddrc_pmu_v2_get_int_status,
- .clear_int_status = hisi_ddrc_pmu_v2_clear_int_status,
+ .get_event_idx = hisi_ddrc_pmu_get_event_idx,
+ .start_counters = hisi_ddrc_pmu_start_counters,
+ .stop_counters = hisi_ddrc_pmu_stop_counters,
+ .enable_counter = hisi_ddrc_pmu_enable_counter,
+ .disable_counter = hisi_ddrc_pmu_disable_counter,
+ .enable_counter_int = hisi_ddrc_pmu_enable_counter_int,
+ .disable_counter_int = hisi_ddrc_pmu_disable_counter_int,
+ .write_counter = hisi_ddrc_pmu_write_counter,
+ .read_counter = hisi_ddrc_pmu_read_counter,
+ .get_int_status = hisi_ddrc_pmu_get_int_status,
+ .clear_int_status = hisi_ddrc_pmu_clear_int_status,
};
static int hisi_ddrc_pmu_dev_probe(struct platform_device *pdev,
@@ -439,18 +361,10 @@ static int hisi_ddrc_pmu_dev_probe(struct platform_device *pdev,
if (ret)
return ret;
- if (ddrc_pmu->identifier >= HISI_PMU_V2) {
- ddrc_pmu->counter_bits = 48;
- ddrc_pmu->check_event = DDRC_V2_NR_EVENTS;
- ddrc_pmu->pmu_events.attr_groups = hisi_ddrc_pmu_v2_attr_groups;
- ddrc_pmu->ops = &hisi_uncore_ddrc_v2_ops;
- } else {
- ddrc_pmu->counter_bits = 32;
- ddrc_pmu->check_event = DDRC_V1_NR_EVENTS;
- ddrc_pmu->pmu_events.attr_groups = hisi_ddrc_pmu_v1_attr_groups;
- ddrc_pmu->ops = &hisi_uncore_ddrc_v1_ops;
- }
-
+ ddrc_pmu->pmu_events.attr_groups = ddrc_pmu->dev_info->attr_groups;
+ ddrc_pmu->counter_bits = ddrc_pmu->dev_info->counter_bits;
+ ddrc_pmu->check_event = ddrc_pmu->dev_info->check_event;
+ ddrc_pmu->ops = &hisi_uncore_ddrc_ops;
ddrc_pmu->num_counters = DDRC_NR_COUNTERS;
ddrc_pmu->dev = &pdev->dev;
ddrc_pmu->on_cpu = -1;
@@ -515,6 +429,68 @@ static void hisi_ddrc_pmu_remove(struct platform_device *pdev)
&ddrc_pmu->node);
}
+static struct hisi_ddrc_pmu_regs hisi_ddrc_v1_pmu_regs = {
+ .event_cnt = DDRC_UNIMPLEMENTED_REG,
+ .event_ctrl = DDRC_EVENT_CTRL,
+ .event_type = DDRC_UNIMPLEMENTED_REG,
+ .perf_ctrl = DDRC_PERF_CTRL,
+ .perf_ctrl_en = DDRC_V1_PERF_CTRL_EN,
+ .int_mask = DDRC_INT_MASK,
+ .int_clear = DDRC_INT_CLEAR,
+ .int_status = DDRC_INT_STATUS,
+};
+
+static const struct hisi_pmu_dev_info hisi_ddrc_v1 = {
+ .counter_bits = 32,
+ .check_event = DDRC_V1_NR_EVENTS,
+ .attr_groups = hisi_ddrc_pmu_v1_attr_groups,
+ .private = &hisi_ddrc_v1_pmu_regs,
+};
+
+static struct hisi_ddrc_pmu_regs hisi_ddrc_v2_pmu_regs = {
+ .event_cnt = DDRC_V2_EVENT_CNT,
+ .event_ctrl = DDRC_V2_EVENT_CTRL,
+ .event_type = DDRC_V2_EVENT_TYPE,
+ .perf_ctrl = DDRC_V2_PERF_CTRL,
+ .perf_ctrl_en = DDRC_V2_PERF_CTRL_EN,
+ .int_mask = DDRC_V2_INT_MASK,
+ .int_clear = DDRC_V2_INT_CLEAR,
+ .int_status = DDRC_V2_INT_STATUS,
+};
+
+static const struct hisi_pmu_dev_info hisi_ddrc_v2 = {
+ .counter_bits = 48,
+ .check_event = DDRC_V2_NR_EVENTS,
+ .attr_groups = hisi_ddrc_pmu_v2_attr_groups,
+ .private = &hisi_ddrc_v2_pmu_regs,
+};
+
+static struct hisi_ddrc_pmu_regs hisi_ddrc_v3_pmu_regs = {
+ .event_cnt = DDRC_V2_EVENT_CNT,
+ .event_ctrl = DDRC_V2_EVENT_CTRL,
+ .event_type = DDRC_V2_EVENT_TYPE,
+ .perf_ctrl = DDRC_V2_PERF_CTRL,
+ .perf_ctrl_en = DDRC_V2_PERF_CTRL_EN,
+ .int_mask = DDRC_V3_INT_MASK,
+ .int_clear = DDRC_V3_INT_CLEAR,
+ .int_status = DDRC_V3_INT_STATUS,
+};
+
+static const struct hisi_pmu_dev_info hisi_ddrc_v3 = {
+ .counter_bits = 48,
+ .check_event = DDRC_V2_NR_EVENTS,
+ .attr_groups = hisi_ddrc_pmu_v2_attr_groups,
+ .private = &hisi_ddrc_v3_pmu_regs,
+};
+
+static const struct acpi_device_id hisi_ddrc_pmu_acpi_match[] = {
+ { "HISI0233", (kernel_ulong_t)&hisi_ddrc_v1 },
+ { "HISI0234", (kernel_ulong_t)&hisi_ddrc_v2 },
+ { "HISI0235", (kernel_ulong_t)&hisi_ddrc_v3 },
+ {}
+};
+MODULE_DEVICE_TABLE(acpi, hisi_ddrc_pmu_acpi_match);
+
static struct platform_driver hisi_ddrc_pmu_driver = {
.driver = {
.name = "hisi_ddrc_pmu",
diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
index ca609db86046..97cfaa586a87 100644
--- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
@@ -47,9 +47,9 @@
#define HHA_SRCID_CMD GENMASK(16, 6)
#define HHA_SRCID_MSK GENMASK(30, 20)
#define HHA_DATSRC_SKT_EN BIT(23)
-#define HHA_EVTYPE_NONE 0xff
+#define HHA_EVTYPE_MASK GENMASK(7, 0)
#define HHA_V1_NR_EVENT 0x65
-#define HHA_V2_NR_EVENT 0xCE
+#define HHA_V2_NR_EVENT 0xFF
HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_cmd, config1, 10, 0);
HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_msk, config1, 21, 11);
@@ -197,7 +197,7 @@ static void hisi_hha_pmu_write_evtype(struct hisi_pmu *hha_pmu, int idx,
/* Write event code to HHA_EVENT_TYPEx register */
val = readl(hha_pmu->base + reg);
- val &= ~(HHA_EVTYPE_NONE << shift);
+ val &= ~(HHA_EVTYPE_MASK << shift);
val |= (type << shift);
writel(val, hha_pmu->base + reg);
}
diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
index 412fc3a97963..bbd81a43047d 100644
--- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
@@ -39,6 +39,7 @@
/* L3C has 8-counters */
#define L3C_NR_COUNTERS 0x8
+#define L3C_MAX_EXT 2
#define L3C_PERF_CTRL_EN 0x10000
#define L3C_TRACETAG_EN BIT(31)
@@ -55,59 +56,152 @@
#define L3C_V1_NR_EVENTS 0x59
#define L3C_V2_NR_EVENTS 0xFF
-HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_core, config1, 7, 0);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(ext, config, 17, 16);
HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_req, config1, 10, 8);
HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_cfg, config1, 15, 11);
HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_skt, config1, 16, 16);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_core, config2, 15, 0);
-static void hisi_l3c_pmu_config_req_tracetag(struct perf_event *event)
+struct hisi_l3c_pmu {
+ struct hisi_pmu l3c_pmu;
+
+ /* MMIO and IRQ resources for extension events */
+ void __iomem *ext_base[L3C_MAX_EXT];
+ int ext_irq[L3C_MAX_EXT];
+ int ext_num;
+};
+
+#define to_hisi_l3c_pmu(_l3c_pmu) \
+ container_of(_l3c_pmu, struct hisi_l3c_pmu, l3c_pmu)
+
+/*
+ * The hardware counter idx used in counter enable/disable,
+ * interrupt enable/disable and status check, etc.
+ */
+#define L3C_HW_IDX(_cntr_idx) ((_cntr_idx) % L3C_NR_COUNTERS)
+
+/* Range of ext counters in used mask. */
+#define L3C_CNTR_EXT_L(_ext) (((_ext) + 1) * L3C_NR_COUNTERS)
+#define L3C_CNTR_EXT_H(_ext) (((_ext) + 2) * L3C_NR_COUNTERS)
+
+struct hisi_l3c_pmu_ext {
+ bool support_ext;
+};
+
+static bool support_ext(struct hisi_l3c_pmu *pmu)
+{
+ struct hisi_l3c_pmu_ext *l3c_pmu_ext = pmu->l3c_pmu.dev_info->private;
+
+ return l3c_pmu_ext->support_ext;
+}
+
+static int hisi_l3c_pmu_get_event_idx(struct perf_event *event)
{
struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+ struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu);
+ unsigned long *used_mask = l3c_pmu->pmu_events.used_mask;
+ int ext = hisi_get_ext(event);
+ int idx;
+
+ /*
+ * For an L3C PMU that supports extension events, we can monitor
+ * maximum 2 * num_counters to 3 * num_counters events, depending on
+ * the number of ext regions supported by hardware. Thus use bit
+ * [0, num_counters - 1] for normal events and bit
+ * [ext * num_counters, (ext + 1) * num_counters - 1] for extension
+ * events. The idx allocation will keep unchanged for normal events and
+ * we can also use the idx to distinguish whether it's an extension
+ * event or not.
+ *
+ * Since normal events and extension events locates on the different
+ * address space, save the base address to the event->hw.event_base.
+ */
+ if (ext && !support_ext(hisi_l3c_pmu))
+ return -EOPNOTSUPP;
+
+ if (ext)
+ event->hw.event_base = (unsigned long)hisi_l3c_pmu->ext_base[ext - 1];
+ else
+ event->hw.event_base = (unsigned long)l3c_pmu->base;
+
+ ext -= 1;
+ idx = find_next_zero_bit(used_mask, L3C_CNTR_EXT_H(ext), L3C_CNTR_EXT_L(ext));
+
+ if (idx >= L3C_CNTR_EXT_H(ext))
+ return -EAGAIN;
+
+ set_bit(idx, used_mask);
+
+ return idx;
+}
+
+static u32 hisi_l3c_pmu_event_readl(struct hw_perf_event *hwc, u32 reg)
+{
+ return readl((void __iomem *)hwc->event_base + reg);
+}
+
+static void hisi_l3c_pmu_event_writel(struct hw_perf_event *hwc, u32 reg, u32 val)
+{
+ writel(val, (void __iomem *)hwc->event_base + reg);
+}
+
+static u64 hisi_l3c_pmu_event_readq(struct hw_perf_event *hwc, u32 reg)
+{
+ return readq((void __iomem *)hwc->event_base + reg);
+}
+
+static void hisi_l3c_pmu_event_writeq(struct hw_perf_event *hwc, u32 reg, u64 val)
+{
+ writeq(val, (void __iomem *)hwc->event_base + reg);
+}
+
+static void hisi_l3c_pmu_config_req_tracetag(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
u32 tt_req = hisi_get_tt_req(event);
if (tt_req) {
u32 val;
/* Set request-type for tracetag */
- val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL);
+ val = hisi_l3c_pmu_event_readl(hwc, L3C_TRACETAG_CTRL);
val |= tt_req << L3C_TRACETAG_REQ_SHIFT;
val |= L3C_TRACETAG_REQ_EN;
- writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL);
+ hisi_l3c_pmu_event_writel(hwc, L3C_TRACETAG_CTRL, val);
/* Enable request-tracetag statistics */
- val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+ val = hisi_l3c_pmu_event_readl(hwc, L3C_PERF_CTRL);
val |= L3C_TRACETAG_EN;
- writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+ hisi_l3c_pmu_event_writel(hwc, L3C_PERF_CTRL, val);
}
}
static void hisi_l3c_pmu_clear_req_tracetag(struct perf_event *event)
{
- struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
u32 tt_req = hisi_get_tt_req(event);
if (tt_req) {
u32 val;
/* Clear request-type */
- val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL);
+ val = hisi_l3c_pmu_event_readl(hwc, L3C_TRACETAG_CTRL);
val &= ~(tt_req << L3C_TRACETAG_REQ_SHIFT);
val &= ~L3C_TRACETAG_REQ_EN;
- writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL);
+ hisi_l3c_pmu_event_writel(hwc, L3C_TRACETAG_CTRL, val);
/* Disable request-tracetag statistics */
- val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+ val = hisi_l3c_pmu_event_readl(hwc, L3C_PERF_CTRL);
val &= ~L3C_TRACETAG_EN;
- writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+ hisi_l3c_pmu_event_writel(hwc, L3C_PERF_CTRL, val);
}
}
static void hisi_l3c_pmu_write_ds(struct perf_event *event, u32 ds_cfg)
{
- struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
u32 reg, reg_idx, shift, val;
- int idx = hwc->idx;
+ int idx = L3C_HW_IDX(hwc->idx);
/*
* Select the appropriate datasource register(L3C_DATSRC_TYPE0/1).
@@ -120,15 +214,15 @@ static void hisi_l3c_pmu_write_ds(struct perf_event *event, u32 ds_cfg)
reg_idx = idx % 4;
shift = 8 * reg_idx;
- val = readl(l3c_pmu->base + reg);
+ val = hisi_l3c_pmu_event_readl(hwc, reg);
val &= ~(L3C_DATSRC_MASK << shift);
val |= ds_cfg << shift;
- writel(val, l3c_pmu->base + reg);
+ hisi_l3c_pmu_event_writel(hwc, reg, val);
}
static void hisi_l3c_pmu_config_ds(struct perf_event *event)
{
- struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
u32 ds_cfg = hisi_get_datasrc_cfg(event);
u32 ds_skt = hisi_get_datasrc_skt(event);
@@ -138,15 +232,15 @@ static void hisi_l3c_pmu_config_ds(struct perf_event *event)
if (ds_skt) {
u32 val;
- val = readl(l3c_pmu->base + L3C_DATSRC_CTRL);
+ val = hisi_l3c_pmu_event_readl(hwc, L3C_DATSRC_CTRL);
val |= L3C_DATSRC_SKT_EN;
- writel(val, l3c_pmu->base + L3C_DATSRC_CTRL);
+ hisi_l3c_pmu_event_writel(hwc, L3C_DATSRC_CTRL, val);
}
}
static void hisi_l3c_pmu_clear_ds(struct perf_event *event)
{
- struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
u32 ds_cfg = hisi_get_datasrc_cfg(event);
u32 ds_skt = hisi_get_datasrc_skt(event);
@@ -156,57 +250,63 @@ static void hisi_l3c_pmu_clear_ds(struct perf_event *event)
if (ds_skt) {
u32 val;
- val = readl(l3c_pmu->base + L3C_DATSRC_CTRL);
+ val = hisi_l3c_pmu_event_readl(hwc, L3C_DATSRC_CTRL);
val &= ~L3C_DATSRC_SKT_EN;
- writel(val, l3c_pmu->base + L3C_DATSRC_CTRL);
+ hisi_l3c_pmu_event_writel(hwc, L3C_DATSRC_CTRL, val);
}
}
static void hisi_l3c_pmu_config_core_tracetag(struct perf_event *event)
{
- struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
u32 core = hisi_get_tt_core(event);
if (core) {
u32 val;
/* Config and enable core information */
- writel(core, l3c_pmu->base + L3C_CORE_CTRL);
- val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+ hisi_l3c_pmu_event_writel(hwc, L3C_CORE_CTRL, core);
+ val = hisi_l3c_pmu_event_readl(hwc, L3C_PERF_CTRL);
val |= L3C_CORE_EN;
- writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+ hisi_l3c_pmu_event_writel(hwc, L3C_PERF_CTRL, val);
/* Enable core-tracetag statistics */
- val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL);
+ val = hisi_l3c_pmu_event_readl(hwc, L3C_TRACETAG_CTRL);
val |= L3C_TRACETAG_CORE_EN;
- writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL);
+ hisi_l3c_pmu_event_writel(hwc, L3C_TRACETAG_CTRL, val);
}
}
static void hisi_l3c_pmu_clear_core_tracetag(struct perf_event *event)
{
- struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
u32 core = hisi_get_tt_core(event);
if (core) {
u32 val;
/* Clear core information */
- writel(L3C_COER_NONE, l3c_pmu->base + L3C_CORE_CTRL);
- val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+ hisi_l3c_pmu_event_writel(hwc, L3C_CORE_CTRL, L3C_COER_NONE);
+ val = hisi_l3c_pmu_event_readl(hwc, L3C_PERF_CTRL);
val &= ~L3C_CORE_EN;
- writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+ hisi_l3c_pmu_event_writel(hwc, L3C_PERF_CTRL, val);
/* Disable core-tracetag statistics */
- val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL);
+ val = hisi_l3c_pmu_event_readl(hwc, L3C_TRACETAG_CTRL);
val &= ~L3C_TRACETAG_CORE_EN;
- writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL);
+ hisi_l3c_pmu_event_writel(hwc, L3C_TRACETAG_CTRL, val);
}
}
+static bool hisi_l3c_pmu_have_filter(struct perf_event *event)
+{
+ return hisi_get_tt_req(event) || hisi_get_tt_core(event) ||
+ hisi_get_datasrc_cfg(event) || hisi_get_datasrc_skt(event);
+}
+
static void hisi_l3c_pmu_enable_filter(struct perf_event *event)
{
- if (event->attr.config1 != 0x0) {
+ if (hisi_l3c_pmu_have_filter(event)) {
hisi_l3c_pmu_config_req_tracetag(event);
hisi_l3c_pmu_config_core_tracetag(event);
hisi_l3c_pmu_config_ds(event);
@@ -215,38 +315,53 @@ static void hisi_l3c_pmu_enable_filter(struct perf_event *event)
static void hisi_l3c_pmu_disable_filter(struct perf_event *event)
{
- if (event->attr.config1 != 0x0) {
+ if (hisi_l3c_pmu_have_filter(event)) {
hisi_l3c_pmu_clear_ds(event);
hisi_l3c_pmu_clear_core_tracetag(event);
hisi_l3c_pmu_clear_req_tracetag(event);
}
}
+static int hisi_l3c_pmu_check_filter(struct perf_event *event)
+{
+ struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+ struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu);
+ int ext = hisi_get_ext(event);
+
+ if (ext < 0 || ext > hisi_l3c_pmu->ext_num)
+ return -EINVAL;
+
+ return 0;
+}
+
/*
* Select the counter register offset using the counter index
*/
static u32 hisi_l3c_pmu_get_counter_offset(int cntr_idx)
{
- return (L3C_CNTR0_LOWER + (cntr_idx * 8));
+ return L3C_CNTR0_LOWER + L3C_HW_IDX(cntr_idx) * 8;
}
static u64 hisi_l3c_pmu_read_counter(struct hisi_pmu *l3c_pmu,
struct hw_perf_event *hwc)
{
- return readq(l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(hwc->idx));
+ return hisi_l3c_pmu_event_readq(hwc, hisi_l3c_pmu_get_counter_offset(hwc->idx));
}
static void hisi_l3c_pmu_write_counter(struct hisi_pmu *l3c_pmu,
struct hw_perf_event *hwc, u64 val)
{
- writeq(val, l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(hwc->idx));
+ hisi_l3c_pmu_event_writeq(hwc, hisi_l3c_pmu_get_counter_offset(hwc->idx), val);
}
static void hisi_l3c_pmu_write_evtype(struct hisi_pmu *l3c_pmu, int idx,
u32 type)
{
+ struct hw_perf_event *hwc = &l3c_pmu->pmu_events.hw_events[idx]->hw;
u32 reg, reg_idx, shift, val;
+ idx = L3C_HW_IDX(idx);
+
/*
* Select the appropriate event select register(L3C_EVENT_TYPE0/1).
* There are 2 event select registers for the 8 hardware counters.
@@ -259,36 +374,72 @@ static void hisi_l3c_pmu_write_evtype(struct hisi_pmu *l3c_pmu, int idx,
shift = 8 * reg_idx;
/* Write event code to L3C_EVENT_TYPEx Register */
- val = readl(l3c_pmu->base + reg);
+ val = hisi_l3c_pmu_event_readl(hwc, reg);
val &= ~(L3C_EVTYPE_NONE << shift);
- val |= (type << shift);
- writel(val, l3c_pmu->base + reg);
+ val |= type << shift;
+ hisi_l3c_pmu_event_writel(hwc, reg, val);
}
static void hisi_l3c_pmu_start_counters(struct hisi_pmu *l3c_pmu)
{
+ struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu);
+ unsigned long *used_mask = l3c_pmu->pmu_events.used_mask;
+ unsigned long used_cntr = find_first_bit(used_mask, l3c_pmu->num_counters);
u32 val;
+ int i;
/*
- * Set perf_enable bit in L3C_PERF_CTRL register to start counting
- * for all enabled counters.
+ * Check if any counter belongs to the normal range (instead of ext
+ * range). If so, enable it.
*/
- val = readl(l3c_pmu->base + L3C_PERF_CTRL);
- val |= L3C_PERF_CTRL_EN;
- writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+ if (used_cntr < L3C_NR_COUNTERS) {
+ val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+ val |= L3C_PERF_CTRL_EN;
+ writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+ }
+
+ /* If not, do enable it on ext ranges. */
+ for (i = 0; i < hisi_l3c_pmu->ext_num; i++) {
+ /* Find used counter in this ext range, skip the range if not. */
+ used_cntr = find_next_bit(used_mask, L3C_CNTR_EXT_H(i), L3C_CNTR_EXT_L(i));
+ if (used_cntr >= L3C_CNTR_EXT_H(i))
+ continue;
+
+ val = readl(hisi_l3c_pmu->ext_base[i] + L3C_PERF_CTRL);
+ val |= L3C_PERF_CTRL_EN;
+ writel(val, hisi_l3c_pmu->ext_base[i] + L3C_PERF_CTRL);
+ }
}
static void hisi_l3c_pmu_stop_counters(struct hisi_pmu *l3c_pmu)
{
+ struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu);
+ unsigned long *used_mask = l3c_pmu->pmu_events.used_mask;
+ unsigned long used_cntr = find_first_bit(used_mask, l3c_pmu->num_counters);
u32 val;
+ int i;
/*
- * Clear perf_enable bit in L3C_PERF_CTRL register to stop counting
- * for all enabled counters.
+ * Check if any counter belongs to the normal range (instead of ext
+ * range). If so, stop it.
*/
- val = readl(l3c_pmu->base + L3C_PERF_CTRL);
- val &= ~(L3C_PERF_CTRL_EN);
- writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+ if (used_cntr < L3C_NR_COUNTERS) {
+ val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+ val &= ~L3C_PERF_CTRL_EN;
+ writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+ }
+
+ /* If not, do stop it on ext ranges. */
+ for (i = 0; i < hisi_l3c_pmu->ext_num; i++) {
+ /* Find used counter in this ext range, skip the range if not. */
+ used_cntr = find_next_bit(used_mask, L3C_CNTR_EXT_H(i), L3C_CNTR_EXT_L(i));
+ if (used_cntr >= L3C_CNTR_EXT_H(i))
+ continue;
+
+ val = readl(hisi_l3c_pmu->ext_base[i] + L3C_PERF_CTRL);
+ val &= ~L3C_PERF_CTRL_EN;
+ writel(val, hisi_l3c_pmu->ext_base[i] + L3C_PERF_CTRL);
+ }
}
static void hisi_l3c_pmu_enable_counter(struct hisi_pmu *l3c_pmu,
@@ -297,9 +448,9 @@ static void hisi_l3c_pmu_enable_counter(struct hisi_pmu *l3c_pmu,
u32 val;
/* Enable counter index in L3C_EVENT_CTRL register */
- val = readl(l3c_pmu->base + L3C_EVENT_CTRL);
- val |= (1 << hwc->idx);
- writel(val, l3c_pmu->base + L3C_EVENT_CTRL);
+ val = hisi_l3c_pmu_event_readl(hwc, L3C_EVENT_CTRL);
+ val |= 1 << L3C_HW_IDX(hwc->idx);
+ hisi_l3c_pmu_event_writel(hwc, L3C_EVENT_CTRL, val);
}
static void hisi_l3c_pmu_disable_counter(struct hisi_pmu *l3c_pmu,
@@ -308,9 +459,9 @@ static void hisi_l3c_pmu_disable_counter(struct hisi_pmu *l3c_pmu,
u32 val;
/* Clear counter index in L3C_EVENT_CTRL register */
- val = readl(l3c_pmu->base + L3C_EVENT_CTRL);
- val &= ~(1 << hwc->idx);
- writel(val, l3c_pmu->base + L3C_EVENT_CTRL);
+ val = hisi_l3c_pmu_event_readl(hwc, L3C_EVENT_CTRL);
+ val &= ~(1 << L3C_HW_IDX(hwc->idx));
+ hisi_l3c_pmu_event_writel(hwc, L3C_EVENT_CTRL, val);
}
static void hisi_l3c_pmu_enable_counter_int(struct hisi_pmu *l3c_pmu,
@@ -318,10 +469,10 @@ static void hisi_l3c_pmu_enable_counter_int(struct hisi_pmu *l3c_pmu,
{
u32 val;
- val = readl(l3c_pmu->base + L3C_INT_MASK);
+ val = hisi_l3c_pmu_event_readl(hwc, L3C_INT_MASK);
/* Write 0 to enable interrupt */
- val &= ~(1 << hwc->idx);
- writel(val, l3c_pmu->base + L3C_INT_MASK);
+ val &= ~(1 << L3C_HW_IDX(hwc->idx));
+ hisi_l3c_pmu_event_writel(hwc, L3C_INT_MASK, val);
}
static void hisi_l3c_pmu_disable_counter_int(struct hisi_pmu *l3c_pmu,
@@ -329,28 +480,37 @@ static void hisi_l3c_pmu_disable_counter_int(struct hisi_pmu *l3c_pmu,
{
u32 val;
- val = readl(l3c_pmu->base + L3C_INT_MASK);
+ val = hisi_l3c_pmu_event_readl(hwc, L3C_INT_MASK);
/* Write 1 to mask interrupt */
- val |= (1 << hwc->idx);
- writel(val, l3c_pmu->base + L3C_INT_MASK);
+ val |= 1 << L3C_HW_IDX(hwc->idx);
+ hisi_l3c_pmu_event_writel(hwc, L3C_INT_MASK, val);
}
static u32 hisi_l3c_pmu_get_int_status(struct hisi_pmu *l3c_pmu)
{
- return readl(l3c_pmu->base + L3C_INT_STATUS);
+ struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu);
+ u32 ext_int, status, status_ext = 0;
+ int i;
+
+ status = readl(l3c_pmu->base + L3C_INT_STATUS);
+
+ if (!support_ext(hisi_l3c_pmu))
+ return status;
+
+ for (i = 0; i < hisi_l3c_pmu->ext_num; i++) {
+ ext_int = readl(hisi_l3c_pmu->ext_base[i] + L3C_INT_STATUS);
+ status_ext |= ext_int << (L3C_NR_COUNTERS * i);
+ }
+
+ return status | (status_ext << L3C_NR_COUNTERS);
}
static void hisi_l3c_pmu_clear_int_status(struct hisi_pmu *l3c_pmu, int idx)
{
- writel(1 << idx, l3c_pmu->base + L3C_INT_CLEAR);
-}
+ struct hw_perf_event *hwc = &l3c_pmu->pmu_events.hw_events[idx]->hw;
-static const struct acpi_device_id hisi_l3c_pmu_acpi_match[] = {
- { "HISI0213", },
- { "HISI0214", },
- {}
-};
-MODULE_DEVICE_TABLE(acpi, hisi_l3c_pmu_acpi_match);
+ hisi_l3c_pmu_event_writel(hwc, L3C_INT_CLEAR, 1 << L3C_HW_IDX(idx));
+}
static int hisi_l3c_pmu_init_data(struct platform_device *pdev,
struct hisi_pmu *l3c_pmu)
@@ -371,6 +531,10 @@ static int hisi_l3c_pmu_init_data(struct platform_device *pdev,
return -EINVAL;
}
+ l3c_pmu->dev_info = device_get_match_data(&pdev->dev);
+ if (!l3c_pmu->dev_info)
+ return -ENODEV;
+
l3c_pmu->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(l3c_pmu->base)) {
dev_err(&pdev->dev, "ioremap failed for l3c_pmu resource\n");
@@ -382,6 +546,50 @@ static int hisi_l3c_pmu_init_data(struct platform_device *pdev,
return 0;
}
+static int hisi_l3c_pmu_init_ext(struct hisi_pmu *l3c_pmu, struct platform_device *pdev)
+{
+ struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu);
+ int ret, irq, ext_num, i;
+ char *irqname;
+
+ /* HiSilicon L3C PMU supporting ext should have more than 1 irq resources. */
+ ext_num = platform_irq_count(pdev);
+ if (ext_num < L3C_MAX_EXT)
+ return -ENODEV;
+
+ /*
+ * The number of ext supported equals the number of irq - 1, since one
+ * of the irqs belongs to the normal part of PMU.
+ */
+ hisi_l3c_pmu->ext_num = ext_num - 1;
+
+ for (i = 0; i < hisi_l3c_pmu->ext_num; i++) {
+ hisi_l3c_pmu->ext_base[i] = devm_platform_ioremap_resource(pdev, i + 1);
+ if (IS_ERR(hisi_l3c_pmu->ext_base[i]))
+ return PTR_ERR(hisi_l3c_pmu->ext_base[i]);
+
+ irq = platform_get_irq(pdev, i + 1);
+ if (irq < 0)
+ return irq;
+
+ irqname = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%s ext%d",
+ dev_name(&pdev->dev), i + 1);
+ if (!irqname)
+ return -ENOMEM;
+
+ ret = devm_request_irq(&pdev->dev, irq, hisi_uncore_pmu_isr,
+ IRQF_NOBALANCING | IRQF_NO_THREAD,
+ irqname, l3c_pmu);
+ if (ret < 0)
+ return dev_err_probe(&pdev->dev, ret,
+ "Fail to request EXT IRQ: %d.\n", irq);
+
+ hisi_l3c_pmu->ext_irq[i] = irq;
+ }
+
+ return 0;
+}
+
static struct attribute *hisi_l3c_pmu_v1_format_attr[] = {
HISI_PMU_FORMAT_ATTR(event, "config:0-7"),
NULL,
@@ -394,7 +602,7 @@ static const struct attribute_group hisi_l3c_pmu_v1_format_group = {
static struct attribute *hisi_l3c_pmu_v2_format_attr[] = {
HISI_PMU_FORMAT_ATTR(event, "config:0-7"),
- HISI_PMU_FORMAT_ATTR(tt_core, "config1:0-7"),
+ HISI_PMU_FORMAT_ATTR(tt_core, "config2:0-15"),
HISI_PMU_FORMAT_ATTR(tt_req, "config1:8-10"),
HISI_PMU_FORMAT_ATTR(datasrc_cfg, "config1:11-15"),
HISI_PMU_FORMAT_ATTR(datasrc_skt, "config1:16"),
@@ -406,6 +614,19 @@ static const struct attribute_group hisi_l3c_pmu_v2_format_group = {
.attrs = hisi_l3c_pmu_v2_format_attr,
};
+static struct attribute *hisi_l3c_pmu_v3_format_attr[] = {
+ HISI_PMU_FORMAT_ATTR(event, "config:0-7"),
+ HISI_PMU_FORMAT_ATTR(ext, "config:16-17"),
+ HISI_PMU_FORMAT_ATTR(tt_req, "config1:8-10"),
+ HISI_PMU_FORMAT_ATTR(tt_core, "config2:0-15"),
+ NULL
+};
+
+static const struct attribute_group hisi_l3c_pmu_v3_format_group = {
+ .name = "format",
+ .attrs = hisi_l3c_pmu_v3_format_attr,
+};
+
static struct attribute *hisi_l3c_pmu_v1_events_attr[] = {
HISI_PMU_EVENT_ATTR(rd_cpipe, 0x00),
HISI_PMU_EVENT_ATTR(wr_cpipe, 0x01),
@@ -441,6 +662,26 @@ static const struct attribute_group hisi_l3c_pmu_v2_events_group = {
.attrs = hisi_l3c_pmu_v2_events_attr,
};
+static struct attribute *hisi_l3c_pmu_v3_events_attr[] = {
+ HISI_PMU_EVENT_ATTR(rd_spipe, 0x18),
+ HISI_PMU_EVENT_ATTR(rd_hit_spipe, 0x19),
+ HISI_PMU_EVENT_ATTR(wr_spipe, 0x1a),
+ HISI_PMU_EVENT_ATTR(wr_hit_spipe, 0x1b),
+ HISI_PMU_EVENT_ATTR(io_rd_spipe, 0x1c),
+ HISI_PMU_EVENT_ATTR(io_rd_hit_spipe, 0x1d),
+ HISI_PMU_EVENT_ATTR(io_wr_spipe, 0x1e),
+ HISI_PMU_EVENT_ATTR(io_wr_hit_spipe, 0x1f),
+ HISI_PMU_EVENT_ATTR(cycles, 0x7f),
+ HISI_PMU_EVENT_ATTR(l3c_ref, 0xbc),
+ HISI_PMU_EVENT_ATTR(l3c2ring, 0xbd),
+ NULL
+};
+
+static const struct attribute_group hisi_l3c_pmu_v3_events_group = {
+ .name = "events",
+ .attrs = hisi_l3c_pmu_v3_events_attr,
+};
+
static const struct attribute_group *hisi_l3c_pmu_v1_attr_groups[] = {
&hisi_l3c_pmu_v1_format_group,
&hisi_l3c_pmu_v1_events_group,
@@ -457,9 +698,46 @@ static const struct attribute_group *hisi_l3c_pmu_v2_attr_groups[] = {
NULL
};
+static const struct attribute_group *hisi_l3c_pmu_v3_attr_groups[] = {
+ &hisi_l3c_pmu_v3_format_group,
+ &hisi_l3c_pmu_v3_events_group,
+ &hisi_pmu_cpumask_attr_group,
+ &hisi_pmu_identifier_group,
+ NULL
+};
+
+static struct hisi_l3c_pmu_ext hisi_l3c_pmu_support_ext = {
+ .support_ext = true,
+};
+
+static struct hisi_l3c_pmu_ext hisi_l3c_pmu_not_support_ext = {
+ .support_ext = false,
+};
+
+static const struct hisi_pmu_dev_info hisi_l3c_pmu_v1 = {
+ .attr_groups = hisi_l3c_pmu_v1_attr_groups,
+ .counter_bits = 48,
+ .check_event = L3C_V1_NR_EVENTS,
+ .private = &hisi_l3c_pmu_not_support_ext,
+};
+
+static const struct hisi_pmu_dev_info hisi_l3c_pmu_v2 = {
+ .attr_groups = hisi_l3c_pmu_v2_attr_groups,
+ .counter_bits = 64,
+ .check_event = L3C_V2_NR_EVENTS,
+ .private = &hisi_l3c_pmu_not_support_ext,
+};
+
+static const struct hisi_pmu_dev_info hisi_l3c_pmu_v3 = {
+ .attr_groups = hisi_l3c_pmu_v3_attr_groups,
+ .counter_bits = 64,
+ .check_event = L3C_V2_NR_EVENTS,
+ .private = &hisi_l3c_pmu_support_ext,
+};
+
static const struct hisi_uncore_ops hisi_uncore_l3c_ops = {
.write_evtype = hisi_l3c_pmu_write_evtype,
- .get_event_idx = hisi_uncore_pmu_get_event_idx,
+ .get_event_idx = hisi_l3c_pmu_get_event_idx,
.start_counters = hisi_l3c_pmu_start_counters,
.stop_counters = hisi_l3c_pmu_stop_counters,
.enable_counter = hisi_l3c_pmu_enable_counter,
@@ -472,11 +750,14 @@ static const struct hisi_uncore_ops hisi_uncore_l3c_ops = {
.clear_int_status = hisi_l3c_pmu_clear_int_status,
.enable_filter = hisi_l3c_pmu_enable_filter,
.disable_filter = hisi_l3c_pmu_disable_filter,
+ .check_filter = hisi_l3c_pmu_check_filter,
};
static int hisi_l3c_pmu_dev_probe(struct platform_device *pdev,
struct hisi_pmu *l3c_pmu)
{
+ struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu);
+ struct hisi_l3c_pmu_ext *l3c_pmu_dev_ext;
int ret;
ret = hisi_l3c_pmu_init_data(pdev, l3c_pmu);
@@ -487,42 +768,55 @@ static int hisi_l3c_pmu_dev_probe(struct platform_device *pdev,
if (ret)
return ret;
- if (l3c_pmu->identifier >= HISI_PMU_V2) {
- l3c_pmu->counter_bits = 64;
- l3c_pmu->check_event = L3C_V2_NR_EVENTS;
- l3c_pmu->pmu_events.attr_groups = hisi_l3c_pmu_v2_attr_groups;
- } else {
- l3c_pmu->counter_bits = 48;
- l3c_pmu->check_event = L3C_V1_NR_EVENTS;
- l3c_pmu->pmu_events.attr_groups = hisi_l3c_pmu_v1_attr_groups;
- }
-
+ l3c_pmu->pmu_events.attr_groups = l3c_pmu->dev_info->attr_groups;
+ l3c_pmu->counter_bits = l3c_pmu->dev_info->counter_bits;
+ l3c_pmu->check_event = l3c_pmu->dev_info->check_event;
l3c_pmu->num_counters = L3C_NR_COUNTERS;
l3c_pmu->ops = &hisi_uncore_l3c_ops;
l3c_pmu->dev = &pdev->dev;
l3c_pmu->on_cpu = -1;
+ l3c_pmu_dev_ext = l3c_pmu->dev_info->private;
+ if (l3c_pmu_dev_ext->support_ext) {
+ ret = hisi_l3c_pmu_init_ext(l3c_pmu, pdev);
+ if (ret)
+ return ret;
+ /*
+ * The extension events have their own counters with the
+ * same number of the normal events counters. So we can
+ * have at maximum num_counters * ext events monitored.
+ */
+ l3c_pmu->num_counters += hisi_l3c_pmu->ext_num * L3C_NR_COUNTERS;
+ }
+
return 0;
}
static int hisi_l3c_pmu_probe(struct platform_device *pdev)
{
+ struct hisi_l3c_pmu *hisi_l3c_pmu;
struct hisi_pmu *l3c_pmu;
char *name;
int ret;
- l3c_pmu = devm_kzalloc(&pdev->dev, sizeof(*l3c_pmu), GFP_KERNEL);
- if (!l3c_pmu)
+ hisi_l3c_pmu = devm_kzalloc(&pdev->dev, sizeof(*hisi_l3c_pmu), GFP_KERNEL);
+ if (!hisi_l3c_pmu)
return -ENOMEM;
+ l3c_pmu = &hisi_l3c_pmu->l3c_pmu;
platform_set_drvdata(pdev, l3c_pmu);
ret = hisi_l3c_pmu_dev_probe(pdev, l3c_pmu);
if (ret)
return ret;
- name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_l3c%d",
- l3c_pmu->topo.sccl_id, l3c_pmu->topo.ccl_id);
+ if (l3c_pmu->topo.sub_id >= 0)
+ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_l3c%d_%d",
+ l3c_pmu->topo.sccl_id, l3c_pmu->topo.ccl_id,
+ l3c_pmu->topo.sub_id);
+ else
+ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_l3c%d",
+ l3c_pmu->topo.sccl_id, l3c_pmu->topo.ccl_id);
if (!name)
return -ENOMEM;
@@ -554,6 +848,14 @@ static void hisi_l3c_pmu_remove(struct platform_device *pdev)
&l3c_pmu->node);
}
+static const struct acpi_device_id hisi_l3c_pmu_acpi_match[] = {
+ { "HISI0213", (kernel_ulong_t)&hisi_l3c_pmu_v1 },
+ { "HISI0214", (kernel_ulong_t)&hisi_l3c_pmu_v2 },
+ { "HISI0215", (kernel_ulong_t)&hisi_l3c_pmu_v3 },
+ {}
+};
+MODULE_DEVICE_TABLE(acpi, hisi_l3c_pmu_acpi_match);
+
static struct platform_driver hisi_l3c_pmu_driver = {
.driver = {
.name = "hisi_l3c_pmu",
@@ -564,14 +866,60 @@ static struct platform_driver hisi_l3c_pmu_driver = {
.remove = hisi_l3c_pmu_remove,
};
+static int hisi_l3c_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
+{
+ struct hisi_pmu *l3c_pmu = hlist_entry_safe(node, struct hisi_pmu, node);
+ struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu);
+ int ret, i;
+
+ ret = hisi_uncore_pmu_online_cpu(cpu, node);
+ if (ret)
+ return ret;
+
+ /* Avoid L3C pmu not supporting ext from ext irq migrating. */
+ if (!support_ext(hisi_l3c_pmu))
+ return 0;
+
+ for (i = 0; i < hisi_l3c_pmu->ext_num; i++)
+ WARN_ON(irq_set_affinity(hisi_l3c_pmu->ext_irq[i],
+ cpumask_of(l3c_pmu->on_cpu)));
+
+ return 0;
+}
+
+static int hisi_l3c_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+ struct hisi_pmu *l3c_pmu = hlist_entry_safe(node, struct hisi_pmu, node);
+ struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu);
+ int ret, i;
+
+ ret = hisi_uncore_pmu_offline_cpu(cpu, node);
+ if (ret)
+ return ret;
+
+ /* If failed to find any available CPU, skip irq migration. */
+ if (l3c_pmu->on_cpu < 0)
+ return 0;
+
+ /* Avoid L3C pmu not supporting ext from ext irq migrating. */
+ if (!support_ext(hisi_l3c_pmu))
+ return 0;
+
+ for (i = 0; i < hisi_l3c_pmu->ext_num; i++)
+ WARN_ON(irq_set_affinity(hisi_l3c_pmu->ext_irq[i],
+ cpumask_of(l3c_pmu->on_cpu)));
+
+ return 0;
+}
+
static int __init hisi_l3c_pmu_module_init(void)
{
int ret;
ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE,
"AP_PERF_ARM_HISI_L3_ONLINE",
- hisi_uncore_pmu_online_cpu,
- hisi_uncore_pmu_offline_cpu);
+ hisi_l3c_pmu_online_cpu,
+ hisi_l3c_pmu_offline_cpu);
if (ret) {
pr_err("L3C PMU: Error setup hotplug, ret = %d\n", ret);
return ret;
diff --git a/drivers/perf/hisilicon/hisi_uncore_mn_pmu.c b/drivers/perf/hisilicon/hisi_uncore_mn_pmu.c
new file mode 100644
index 000000000000..4df4eebe243e
--- /dev/null
+++ b/drivers/perf/hisilicon/hisi_uncore_mn_pmu.c
@@ -0,0 +1,411 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * HiSilicon SoC MN uncore Hardware event counters support
+ *
+ * Copyright (c) 2025 HiSilicon Technologies Co., Ltd.
+ */
+#include <linux/cpuhotplug.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/irq.h>
+#include <linux/list.h>
+#include <linux/mod_devicetable.h>
+#include <linux/property.h>
+
+#include "hisi_uncore_pmu.h"
+
+/* Dynamic CPU hotplug state used by MN PMU */
+static enum cpuhp_state hisi_mn_pmu_online;
+
+/* MN register definition */
+#define HISI_MN_DYNAMIC_CTRL_REG 0x400
+#define HISI_MN_DYNAMIC_CTRL_EN BIT(0)
+#define HISI_MN_PERF_CTRL_REG 0x408
+#define HISI_MN_PERF_CTRL_EN BIT(6)
+#define HISI_MN_INT_MASK_REG 0x800
+#define HISI_MN_INT_STATUS_REG 0x808
+#define HISI_MN_INT_CLEAR_REG 0x80C
+#define HISI_MN_EVENT_CTRL_REG 0x1C00
+#define HISI_MN_VERSION_REG 0x1C04
+#define HISI_MN_EVTYPE0_REG 0x1d00
+#define HISI_MN_EVTYPE_MASK GENMASK(7, 0)
+#define HISI_MN_CNTR0_REG 0x1e00
+#define HISI_MN_EVTYPE_REGn(evtype0, n) ((evtype0) + (n) * 4)
+#define HISI_MN_CNTR_REGn(cntr0, n) ((cntr0) + (n) * 8)
+
+#define HISI_MN_NR_COUNTERS 4
+#define HISI_MN_TIMEOUT_US 500U
+
+struct hisi_mn_pmu_regs {
+ u32 version;
+ u32 dyn_ctrl;
+ u32 perf_ctrl;
+ u32 int_mask;
+ u32 int_clear;
+ u32 int_status;
+ u32 event_ctrl;
+ u32 event_type0;
+ u32 event_cntr0;
+};
+
+/*
+ * Each event request takes a certain amount of time to complete. If
+ * we counting the latency related event, we need to wait for the all
+ * requests complete. Otherwise, the value of counter is slightly larger.
+ */
+static void hisi_mn_pmu_counter_flush(struct hisi_pmu *mn_pmu)
+{
+ struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private;
+ int ret;
+ u32 val;
+
+ val = readl(mn_pmu->base + reg_info->dyn_ctrl);
+ val |= HISI_MN_DYNAMIC_CTRL_EN;
+ writel(val, mn_pmu->base + reg_info->dyn_ctrl);
+
+ ret = readl_poll_timeout_atomic(mn_pmu->base + reg_info->dyn_ctrl,
+ val, !(val & HISI_MN_DYNAMIC_CTRL_EN),
+ 1, HISI_MN_TIMEOUT_US);
+ if (ret)
+ dev_warn(mn_pmu->dev, "Counter flush timeout\n");
+}
+
+static u64 hisi_mn_pmu_read_counter(struct hisi_pmu *mn_pmu,
+ struct hw_perf_event *hwc)
+{
+ struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private;
+
+ return readq(mn_pmu->base + HISI_MN_CNTR_REGn(reg_info->event_cntr0, hwc->idx));
+}
+
+static void hisi_mn_pmu_write_counter(struct hisi_pmu *mn_pmu,
+ struct hw_perf_event *hwc, u64 val)
+{
+ struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private;
+
+ writeq(val, mn_pmu->base + HISI_MN_CNTR_REGn(reg_info->event_cntr0, hwc->idx));
+}
+
+static void hisi_mn_pmu_write_evtype(struct hisi_pmu *mn_pmu, int idx, u32 type)
+{
+ struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private;
+ u32 val;
+
+ /*
+ * Select the appropriate event select register.
+ * There are 2 32-bit event select registers for the
+ * 8 hardware counters, each event code is 8-bit wide.
+ */
+ val = readl(mn_pmu->base + HISI_MN_EVTYPE_REGn(reg_info->event_type0, idx / 4));
+ val &= ~(HISI_MN_EVTYPE_MASK << HISI_PMU_EVTYPE_SHIFT(idx));
+ val |= (type << HISI_PMU_EVTYPE_SHIFT(idx));
+ writel(val, mn_pmu->base + HISI_MN_EVTYPE_REGn(reg_info->event_type0, idx / 4));
+}
+
+static void hisi_mn_pmu_start_counters(struct hisi_pmu *mn_pmu)
+{
+ struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private;
+ u32 val;
+
+ val = readl(mn_pmu->base + reg_info->perf_ctrl);
+ val |= HISI_MN_PERF_CTRL_EN;
+ writel(val, mn_pmu->base + reg_info->perf_ctrl);
+}
+
+static void hisi_mn_pmu_stop_counters(struct hisi_pmu *mn_pmu)
+{
+ struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private;
+ u32 val;
+
+ val = readl(mn_pmu->base + reg_info->perf_ctrl);
+ val &= ~HISI_MN_PERF_CTRL_EN;
+ writel(val, mn_pmu->base + reg_info->perf_ctrl);
+
+ hisi_mn_pmu_counter_flush(mn_pmu);
+}
+
+static void hisi_mn_pmu_enable_counter(struct hisi_pmu *mn_pmu,
+ struct hw_perf_event *hwc)
+{
+ struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private;
+ u32 val;
+
+ val = readl(mn_pmu->base + reg_info->event_ctrl);
+ val |= BIT(hwc->idx);
+ writel(val, mn_pmu->base + reg_info->event_ctrl);
+}
+
+static void hisi_mn_pmu_disable_counter(struct hisi_pmu *mn_pmu,
+ struct hw_perf_event *hwc)
+{
+ struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private;
+ u32 val;
+
+ val = readl(mn_pmu->base + reg_info->event_ctrl);
+ val &= ~BIT(hwc->idx);
+ writel(val, mn_pmu->base + reg_info->event_ctrl);
+}
+
+static void hisi_mn_pmu_enable_counter_int(struct hisi_pmu *mn_pmu,
+ struct hw_perf_event *hwc)
+{
+ struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private;
+ u32 val;
+
+ val = readl(mn_pmu->base + reg_info->int_mask);
+ val &= ~BIT(hwc->idx);
+ writel(val, mn_pmu->base + reg_info->int_mask);
+}
+
+static void hisi_mn_pmu_disable_counter_int(struct hisi_pmu *mn_pmu,
+ struct hw_perf_event *hwc)
+{
+ struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private;
+ u32 val;
+
+ val = readl(mn_pmu->base + reg_info->int_mask);
+ val |= BIT(hwc->idx);
+ writel(val, mn_pmu->base + reg_info->int_mask);
+}
+
+static u32 hisi_mn_pmu_get_int_status(struct hisi_pmu *mn_pmu)
+{
+ struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private;
+
+ return readl(mn_pmu->base + reg_info->int_status);
+}
+
+static void hisi_mn_pmu_clear_int_status(struct hisi_pmu *mn_pmu, int idx)
+{
+ struct hisi_mn_pmu_regs *reg_info = mn_pmu->dev_info->private;
+
+ writel(BIT(idx), mn_pmu->base + reg_info->int_clear);
+}
+
+static struct attribute *hisi_mn_pmu_format_attr[] = {
+ HISI_PMU_FORMAT_ATTR(event, "config:0-7"),
+ NULL
+};
+
+static const struct attribute_group hisi_mn_pmu_format_group = {
+ .name = "format",
+ .attrs = hisi_mn_pmu_format_attr,
+};
+
+static struct attribute *hisi_mn_pmu_events_attr[] = {
+ HISI_PMU_EVENT_ATTR(req_eobarrier_num, 0x00),
+ HISI_PMU_EVENT_ATTR(req_ecbarrier_num, 0x01),
+ HISI_PMU_EVENT_ATTR(req_dvmop_num, 0x02),
+ HISI_PMU_EVENT_ATTR(req_dvmsync_num, 0x03),
+ HISI_PMU_EVENT_ATTR(req_retry_num, 0x04),
+ HISI_PMU_EVENT_ATTR(req_writenosnp_num, 0x05),
+ HISI_PMU_EVENT_ATTR(req_readnosnp_num, 0x06),
+ HISI_PMU_EVENT_ATTR(snp_dvm_num, 0x07),
+ HISI_PMU_EVENT_ATTR(snp_dvmsync_num, 0x08),
+ HISI_PMU_EVENT_ATTR(l3t_req_dvm_num, 0x09),
+ HISI_PMU_EVENT_ATTR(l3t_req_dvmsync_num, 0x0A),
+ HISI_PMU_EVENT_ATTR(mn_req_dvm_num, 0x0B),
+ HISI_PMU_EVENT_ATTR(mn_req_dvmsync_num, 0x0C),
+ HISI_PMU_EVENT_ATTR(pa_req_dvm_num, 0x0D),
+ HISI_PMU_EVENT_ATTR(pa_req_dvmsync_num, 0x0E),
+ HISI_PMU_EVENT_ATTR(snp_dvm_latency, 0x80),
+ HISI_PMU_EVENT_ATTR(snp_dvmsync_latency, 0x81),
+ HISI_PMU_EVENT_ATTR(l3t_req_dvm_latency, 0x82),
+ HISI_PMU_EVENT_ATTR(l3t_req_dvmsync_latency, 0x83),
+ HISI_PMU_EVENT_ATTR(mn_req_dvm_latency, 0x84),
+ HISI_PMU_EVENT_ATTR(mn_req_dvmsync_latency, 0x85),
+ HISI_PMU_EVENT_ATTR(pa_req_dvm_latency, 0x86),
+ HISI_PMU_EVENT_ATTR(pa_req_dvmsync_latency, 0x87),
+ NULL
+};
+
+static const struct attribute_group hisi_mn_pmu_events_group = {
+ .name = "events",
+ .attrs = hisi_mn_pmu_events_attr,
+};
+
+static const struct attribute_group *hisi_mn_pmu_attr_groups[] = {
+ &hisi_mn_pmu_format_group,
+ &hisi_mn_pmu_events_group,
+ &hisi_pmu_cpumask_attr_group,
+ &hisi_pmu_identifier_group,
+ NULL
+};
+
+static const struct hisi_uncore_ops hisi_uncore_mn_ops = {
+ .write_evtype = hisi_mn_pmu_write_evtype,
+ .get_event_idx = hisi_uncore_pmu_get_event_idx,
+ .start_counters = hisi_mn_pmu_start_counters,
+ .stop_counters = hisi_mn_pmu_stop_counters,
+ .enable_counter = hisi_mn_pmu_enable_counter,
+ .disable_counter = hisi_mn_pmu_disable_counter,
+ .enable_counter_int = hisi_mn_pmu_enable_counter_int,
+ .disable_counter_int = hisi_mn_pmu_disable_counter_int,
+ .write_counter = hisi_mn_pmu_write_counter,
+ .read_counter = hisi_mn_pmu_read_counter,
+ .get_int_status = hisi_mn_pmu_get_int_status,
+ .clear_int_status = hisi_mn_pmu_clear_int_status,
+};
+
+static int hisi_mn_pmu_dev_init(struct platform_device *pdev,
+ struct hisi_pmu *mn_pmu)
+{
+ struct hisi_mn_pmu_regs *reg_info;
+ int ret;
+
+ hisi_uncore_pmu_init_topology(mn_pmu, &pdev->dev);
+
+ if (mn_pmu->topo.scl_id < 0)
+ return dev_err_probe(&pdev->dev, -EINVAL,
+ "Failed to read MN scl id\n");
+
+ if (mn_pmu->topo.index_id < 0)
+ return dev_err_probe(&pdev->dev, -EINVAL,
+ "Failed to read MN index id\n");
+
+ mn_pmu->base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(mn_pmu->base))
+ return dev_err_probe(&pdev->dev, PTR_ERR(mn_pmu->base),
+ "Failed to ioremap resource\n");
+
+ ret = hisi_uncore_pmu_init_irq(mn_pmu, pdev);
+ if (ret)
+ return ret;
+
+ mn_pmu->dev_info = device_get_match_data(&pdev->dev);
+ if (!mn_pmu->dev_info)
+ return -ENODEV;
+
+ mn_pmu->pmu_events.attr_groups = mn_pmu->dev_info->attr_groups;
+ mn_pmu->counter_bits = mn_pmu->dev_info->counter_bits;
+ mn_pmu->check_event = mn_pmu->dev_info->check_event;
+ mn_pmu->num_counters = HISI_MN_NR_COUNTERS;
+ mn_pmu->ops = &hisi_uncore_mn_ops;
+ mn_pmu->dev = &pdev->dev;
+ mn_pmu->on_cpu = -1;
+
+ reg_info = mn_pmu->dev_info->private;
+ mn_pmu->identifier = readl(mn_pmu->base + reg_info->version);
+
+ return 0;
+}
+
+static void hisi_mn_pmu_remove_cpuhp(void *hotplug_node)
+{
+ cpuhp_state_remove_instance_nocalls(hisi_mn_pmu_online, hotplug_node);
+}
+
+static void hisi_mn_pmu_unregister(void *pmu)
+{
+ perf_pmu_unregister(pmu);
+}
+
+static int hisi_mn_pmu_probe(struct platform_device *pdev)
+{
+ struct hisi_pmu *mn_pmu;
+ char *name;
+ int ret;
+
+ mn_pmu = devm_kzalloc(&pdev->dev, sizeof(*mn_pmu), GFP_KERNEL);
+ if (!mn_pmu)
+ return -ENOMEM;
+
+ platform_set_drvdata(pdev, mn_pmu);
+
+ ret = hisi_mn_pmu_dev_init(pdev, mn_pmu);
+ if (ret)
+ return ret;
+
+ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_scl%d_mn%d",
+ mn_pmu->topo.scl_id, mn_pmu->topo.index_id);
+ if (!name)
+ return -ENOMEM;
+
+ ret = cpuhp_state_add_instance(hisi_mn_pmu_online, &mn_pmu->node);
+ if (ret)
+ return dev_err_probe(&pdev->dev, ret, "Failed to register cpu hotplug\n");
+
+ ret = devm_add_action_or_reset(&pdev->dev, hisi_mn_pmu_remove_cpuhp, &mn_pmu->node);
+ if (ret)
+ return ret;
+
+ hisi_pmu_init(mn_pmu, THIS_MODULE);
+
+ ret = perf_pmu_register(&mn_pmu->pmu, name, -1);
+ if (ret)
+ return dev_err_probe(mn_pmu->dev, ret, "Failed to register MN PMU\n");
+
+ return devm_add_action_or_reset(&pdev->dev, hisi_mn_pmu_unregister, &mn_pmu->pmu);
+}
+
+static struct hisi_mn_pmu_regs hisi_mn_v1_pmu_regs = {
+ .version = HISI_MN_VERSION_REG,
+ .dyn_ctrl = HISI_MN_DYNAMIC_CTRL_REG,
+ .perf_ctrl = HISI_MN_PERF_CTRL_REG,
+ .int_mask = HISI_MN_INT_MASK_REG,
+ .int_clear = HISI_MN_INT_CLEAR_REG,
+ .int_status = HISI_MN_INT_STATUS_REG,
+ .event_ctrl = HISI_MN_EVENT_CTRL_REG,
+ .event_type0 = HISI_MN_EVTYPE0_REG,
+ .event_cntr0 = HISI_MN_CNTR0_REG,
+};
+
+static const struct hisi_pmu_dev_info hisi_mn_v1 = {
+ .attr_groups = hisi_mn_pmu_attr_groups,
+ .counter_bits = 48,
+ .check_event = HISI_MN_EVTYPE_MASK,
+ .private = &hisi_mn_v1_pmu_regs,
+};
+
+static const struct acpi_device_id hisi_mn_pmu_acpi_match[] = {
+ { "HISI0222", (kernel_ulong_t) &hisi_mn_v1 },
+ { }
+};
+MODULE_DEVICE_TABLE(acpi, hisi_mn_pmu_acpi_match);
+
+static struct platform_driver hisi_mn_pmu_driver = {
+ .driver = {
+ .name = "hisi_mn_pmu",
+ .acpi_match_table = hisi_mn_pmu_acpi_match,
+ /*
+ * We have not worked out a safe bind/unbind process,
+ * Forcefully unbinding during sampling will lead to a
+ * kernel panic, so this is not supported yet.
+ */
+ .suppress_bind_attrs = true,
+ },
+ .probe = hisi_mn_pmu_probe,
+};
+
+static int __init hisi_mn_pmu_module_init(void)
+{
+ int ret;
+
+ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "perf/hisi/mn:online",
+ hisi_uncore_pmu_online_cpu,
+ hisi_uncore_pmu_offline_cpu);
+ if (ret < 0) {
+ pr_err("hisi_mn_pmu: Failed to setup MN PMU hotplug: %d\n", ret);
+ return ret;
+ }
+ hisi_mn_pmu_online = ret;
+
+ ret = platform_driver_register(&hisi_mn_pmu_driver);
+ if (ret)
+ cpuhp_remove_multi_state(hisi_mn_pmu_online);
+
+ return ret;
+}
+module_init(hisi_mn_pmu_module_init);
+
+static void __exit hisi_mn_pmu_module_exit(void)
+{
+ platform_driver_unregister(&hisi_mn_pmu_driver);
+ cpuhp_remove_multi_state(hisi_mn_pmu_online);
+}
+module_exit(hisi_mn_pmu_module_exit);
+
+MODULE_IMPORT_NS("HISI_PMU");
+MODULE_DESCRIPTION("HiSilicon SoC MN uncore PMU driver");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Junhao He <hejunhao3@huawei.com>");
diff --git a/drivers/perf/hisilicon/hisi_uncore_noc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_noc_pmu.c
new file mode 100644
index 000000000000..de3b9cc7aada
--- /dev/null
+++ b/drivers/perf/hisilicon/hisi_uncore_noc_pmu.c
@@ -0,0 +1,443 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Driver for HiSilicon Uncore NoC (Network on Chip) PMU device
+ *
+ * Copyright (c) 2025 HiSilicon Technologies Co., Ltd.
+ * Author: Yicong Yang <yangyicong@hisilicon.com>
+ */
+#include <linux/bitops.h>
+#include <linux/cpuhotplug.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/property.h>
+#include <linux/sysfs.h>
+
+#include "hisi_uncore_pmu.h"
+
+#define NOC_PMU_VERSION 0x1e00
+#define NOC_PMU_GLOBAL_CTRL 0x1e04
+#define NOC_PMU_GLOBAL_CTRL_PMU_EN BIT(0)
+#define NOC_PMU_GLOBAL_CTRL_TT_EN BIT(1)
+#define NOC_PMU_CNT_INFO 0x1e08
+#define NOC_PMU_CNT_INFO_OVERFLOW(n) BIT(n)
+#define NOC_PMU_EVENT_CTRL0 0x1e20
+#define NOC_PMU_EVENT_CTRL_TYPE GENMASK(4, 0)
+/*
+ * Note channel of 0x0 will reset the counter value, so don't do it before
+ * we read out the counter.
+ */
+#define NOC_PMU_EVENT_CTRL_CHANNEL GENMASK(10, 8)
+#define NOC_PMU_EVENT_CTRL_EN BIT(11)
+#define NOC_PMU_EVENT_COUNTER0 0x1e80
+
+#define NOC_PMU_NR_COUNTERS 4
+#define NOC_PMU_CH_DEFAULT 0x7
+
+#define NOC_PMU_EVENT_CTRLn(ctrl0, n) ((ctrl0) + 4 * (n))
+#define NOC_PMU_EVENT_CNTRn(cntr0, n) ((cntr0) + 8 * (n))
+
+HISI_PMU_EVENT_ATTR_EXTRACTOR(ch, config1, 2, 0);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_en, config1, 3, 3);
+
+/* Dynamic CPU hotplug state used by this PMU driver */
+static enum cpuhp_state hisi_noc_pmu_cpuhp_state;
+
+struct hisi_noc_pmu_regs {
+ u32 version;
+ u32 pmu_ctrl;
+ u32 event_ctrl0;
+ u32 event_cntr0;
+ u32 overflow_status;
+};
+
+/*
+ * Tracetag filtering is not per event and all the events should keep
+ * the consistence. Return true if the new comer doesn't match the
+ * tracetag filtering configuration of the current scheduled events.
+ */
+static bool hisi_noc_pmu_check_global_filter(struct perf_event *curr,
+ struct perf_event *new)
+{
+ return hisi_get_tt_en(curr) == hisi_get_tt_en(new);
+}
+
+static void hisi_noc_pmu_write_evtype(struct hisi_pmu *noc_pmu, int idx, u32 type)
+{
+ struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private;
+ u32 reg;
+
+ reg = readl(noc_pmu->base + NOC_PMU_EVENT_CTRLn(reg_info->event_ctrl0, idx));
+ reg &= ~NOC_PMU_EVENT_CTRL_TYPE;
+ reg |= FIELD_PREP(NOC_PMU_EVENT_CTRL_TYPE, type);
+ writel(reg, noc_pmu->base + NOC_PMU_EVENT_CTRLn(reg_info->event_ctrl0, idx));
+}
+
+static int hisi_noc_pmu_get_event_idx(struct perf_event *event)
+{
+ struct hisi_pmu *noc_pmu = to_hisi_pmu(event->pmu);
+ struct hisi_pmu_hwevents *pmu_events = &noc_pmu->pmu_events;
+ int cur_idx;
+
+ cur_idx = find_first_bit(pmu_events->used_mask, noc_pmu->num_counters);
+ if (cur_idx != noc_pmu->num_counters &&
+ !hisi_noc_pmu_check_global_filter(pmu_events->hw_events[cur_idx], event))
+ return -EAGAIN;
+
+ return hisi_uncore_pmu_get_event_idx(event);
+}
+
+static u64 hisi_noc_pmu_read_counter(struct hisi_pmu *noc_pmu,
+ struct hw_perf_event *hwc)
+{
+ struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private;
+
+ return readq(noc_pmu->base + NOC_PMU_EVENT_CNTRn(reg_info->event_cntr0, hwc->idx));
+}
+
+static void hisi_noc_pmu_write_counter(struct hisi_pmu *noc_pmu,
+ struct hw_perf_event *hwc, u64 val)
+{
+ struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private;
+
+ writeq(val, noc_pmu->base + NOC_PMU_EVENT_CNTRn(reg_info->event_cntr0, hwc->idx));
+}
+
+static void hisi_noc_pmu_enable_counter(struct hisi_pmu *noc_pmu,
+ struct hw_perf_event *hwc)
+{
+ struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private;
+ u32 reg;
+
+ reg = readl(noc_pmu->base + NOC_PMU_EVENT_CTRLn(reg_info->event_ctrl0, hwc->idx));
+ reg |= NOC_PMU_EVENT_CTRL_EN;
+ writel(reg, noc_pmu->base + NOC_PMU_EVENT_CTRLn(reg_info->event_ctrl0, hwc->idx));
+}
+
+static void hisi_noc_pmu_disable_counter(struct hisi_pmu *noc_pmu,
+ struct hw_perf_event *hwc)
+{
+ struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private;
+ u32 reg;
+
+ reg = readl(noc_pmu->base + NOC_PMU_EVENT_CTRLn(reg_info->event_ctrl0, hwc->idx));
+ reg &= ~NOC_PMU_EVENT_CTRL_EN;
+ writel(reg, noc_pmu->base + NOC_PMU_EVENT_CTRLn(reg_info->event_ctrl0, hwc->idx));
+}
+
+static void hisi_noc_pmu_enable_counter_int(struct hisi_pmu *noc_pmu,
+ struct hw_perf_event *hwc)
+{
+ /* We don't support interrupt, so a stub here. */
+}
+
+static void hisi_noc_pmu_disable_counter_int(struct hisi_pmu *noc_pmu,
+ struct hw_perf_event *hwc)
+{
+}
+
+static void hisi_noc_pmu_start_counters(struct hisi_pmu *noc_pmu)
+{
+ struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private;
+ u32 reg;
+
+ reg = readl(noc_pmu->base + reg_info->pmu_ctrl);
+ reg |= NOC_PMU_GLOBAL_CTRL_PMU_EN;
+ writel(reg, noc_pmu->base + reg_info->pmu_ctrl);
+}
+
+static void hisi_noc_pmu_stop_counters(struct hisi_pmu *noc_pmu)
+{
+ struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private;
+ u32 reg;
+
+ reg = readl(noc_pmu->base + reg_info->pmu_ctrl);
+ reg &= ~NOC_PMU_GLOBAL_CTRL_PMU_EN;
+ writel(reg, noc_pmu->base + reg_info->pmu_ctrl);
+}
+
+static u32 hisi_noc_pmu_get_int_status(struct hisi_pmu *noc_pmu)
+{
+ struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private;
+
+ return readl(noc_pmu->base + reg_info->overflow_status);
+}
+
+static void hisi_noc_pmu_clear_int_status(struct hisi_pmu *noc_pmu, int idx)
+{
+ struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private;
+ u32 reg;
+
+ reg = readl(noc_pmu->base + reg_info->overflow_status);
+ reg &= ~NOC_PMU_CNT_INFO_OVERFLOW(idx);
+ writel(reg, noc_pmu->base + reg_info->overflow_status);
+}
+
+static void hisi_noc_pmu_enable_filter(struct perf_event *event)
+{
+ struct hisi_pmu *noc_pmu = to_hisi_pmu(event->pmu);
+ struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private;
+ struct hw_perf_event *hwc = &event->hw;
+ u32 tt_en = hisi_get_tt_en(event);
+ u32 ch = hisi_get_ch(event);
+ u32 reg;
+
+ if (!ch)
+ ch = NOC_PMU_CH_DEFAULT;
+
+ reg = readl(noc_pmu->base + NOC_PMU_EVENT_CTRLn(reg_info->event_ctrl0, hwc->idx));
+ reg &= ~NOC_PMU_EVENT_CTRL_CHANNEL;
+ reg |= FIELD_PREP(NOC_PMU_EVENT_CTRL_CHANNEL, ch);
+ writel(reg, noc_pmu->base + NOC_PMU_EVENT_CTRLn(reg_info->event_ctrl0, hwc->idx));
+
+ /*
+ * Since tracetag filter applies to all the counters, don't touch it
+ * if user doesn't specify it explicitly.
+ */
+ if (tt_en) {
+ reg = readl(noc_pmu->base + reg_info->pmu_ctrl);
+ reg |= NOC_PMU_GLOBAL_CTRL_TT_EN;
+ writel(reg, noc_pmu->base + reg_info->pmu_ctrl);
+ }
+}
+
+static void hisi_noc_pmu_disable_filter(struct perf_event *event)
+{
+ struct hisi_pmu *noc_pmu = to_hisi_pmu(event->pmu);
+ struct hisi_noc_pmu_regs *reg_info = noc_pmu->dev_info->private;
+ u32 tt_en = hisi_get_tt_en(event);
+ u32 reg;
+
+ /*
+ * If we're not the last counter, don't touch the global tracetag
+ * configuration.
+ */
+ if (bitmap_weight(noc_pmu->pmu_events.used_mask, noc_pmu->num_counters) > 1)
+ return;
+
+ if (tt_en) {
+ reg = readl(noc_pmu->base + reg_info->pmu_ctrl);
+ reg &= ~NOC_PMU_GLOBAL_CTRL_TT_EN;
+ writel(reg, noc_pmu->base + reg_info->pmu_ctrl);
+ }
+}
+
+static const struct hisi_uncore_ops hisi_uncore_noc_ops = {
+ .write_evtype = hisi_noc_pmu_write_evtype,
+ .get_event_idx = hisi_noc_pmu_get_event_idx,
+ .read_counter = hisi_noc_pmu_read_counter,
+ .write_counter = hisi_noc_pmu_write_counter,
+ .enable_counter = hisi_noc_pmu_enable_counter,
+ .disable_counter = hisi_noc_pmu_disable_counter,
+ .enable_counter_int = hisi_noc_pmu_enable_counter_int,
+ .disable_counter_int = hisi_noc_pmu_disable_counter_int,
+ .start_counters = hisi_noc_pmu_start_counters,
+ .stop_counters = hisi_noc_pmu_stop_counters,
+ .get_int_status = hisi_noc_pmu_get_int_status,
+ .clear_int_status = hisi_noc_pmu_clear_int_status,
+ .enable_filter = hisi_noc_pmu_enable_filter,
+ .disable_filter = hisi_noc_pmu_disable_filter,
+};
+
+static struct attribute *hisi_noc_pmu_format_attrs[] = {
+ HISI_PMU_FORMAT_ATTR(event, "config:0-7"),
+ HISI_PMU_FORMAT_ATTR(ch, "config1:0-2"),
+ HISI_PMU_FORMAT_ATTR(tt_en, "config1:3"),
+ NULL
+};
+
+static const struct attribute_group hisi_noc_pmu_format_group = {
+ .name = "format",
+ .attrs = hisi_noc_pmu_format_attrs,
+};
+
+static struct attribute *hisi_noc_pmu_events_attrs[] = {
+ HISI_PMU_EVENT_ATTR(cycles, 0x0e),
+ /* Flux on/off the ring */
+ HISI_PMU_EVENT_ATTR(ingress_flow_sum, 0x1a),
+ HISI_PMU_EVENT_ATTR(egress_flow_sum, 0x17),
+ /* Buffer full duration on/off the ring */
+ HISI_PMU_EVENT_ATTR(ingress_buf_full, 0x19),
+ HISI_PMU_EVENT_ATTR(egress_buf_full, 0x12),
+ /* Failure packets count on/off the ring */
+ HISI_PMU_EVENT_ATTR(cw_ingress_fail, 0x01),
+ HISI_PMU_EVENT_ATTR(cc_ingress_fail, 0x09),
+ HISI_PMU_EVENT_ATTR(cw_egress_fail, 0x03),
+ HISI_PMU_EVENT_ATTR(cc_egress_fail, 0x0b),
+ /* Flux of the ring */
+ HISI_PMU_EVENT_ATTR(cw_main_flow_sum, 0x05),
+ HISI_PMU_EVENT_ATTR(cc_main_flow_sum, 0x0d),
+ NULL
+};
+
+static const struct attribute_group hisi_noc_pmu_events_group = {
+ .name = "events",
+ .attrs = hisi_noc_pmu_events_attrs,
+};
+
+static const struct attribute_group *hisi_noc_pmu_attr_groups[] = {
+ &hisi_noc_pmu_format_group,
+ &hisi_noc_pmu_events_group,
+ &hisi_pmu_cpumask_attr_group,
+ &hisi_pmu_identifier_group,
+ NULL
+};
+
+static int hisi_noc_pmu_dev_init(struct platform_device *pdev, struct hisi_pmu *noc_pmu)
+{
+ struct hisi_noc_pmu_regs *reg_info;
+
+ hisi_uncore_pmu_init_topology(noc_pmu, &pdev->dev);
+
+ if (noc_pmu->topo.scl_id < 0)
+ return dev_err_probe(&pdev->dev, -EINVAL, "failed to get scl-id\n");
+
+ if (noc_pmu->topo.index_id < 0)
+ return dev_err_probe(&pdev->dev, -EINVAL, "failed to get idx-id\n");
+
+ if (noc_pmu->topo.sub_id < 0)
+ return dev_err_probe(&pdev->dev, -EINVAL, "failed to get sub-id\n");
+
+ noc_pmu->base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(noc_pmu->base))
+ return dev_err_probe(&pdev->dev, PTR_ERR(noc_pmu->base),
+ "fail to remap io memory\n");
+
+ noc_pmu->dev_info = device_get_match_data(&pdev->dev);
+ if (!noc_pmu->dev_info)
+ return -ENODEV;
+
+ noc_pmu->pmu_events.attr_groups = noc_pmu->dev_info->attr_groups;
+ noc_pmu->counter_bits = noc_pmu->dev_info->counter_bits;
+ noc_pmu->check_event = noc_pmu->dev_info->check_event;
+ noc_pmu->num_counters = NOC_PMU_NR_COUNTERS;
+ noc_pmu->ops = &hisi_uncore_noc_ops;
+ noc_pmu->dev = &pdev->dev;
+ noc_pmu->on_cpu = -1;
+
+ reg_info = noc_pmu->dev_info->private;
+ noc_pmu->identifier = readl(noc_pmu->base + reg_info->version);
+
+ return 0;
+}
+
+static void hisi_noc_pmu_remove_cpuhp_instance(void *hotplug_node)
+{
+ cpuhp_state_remove_instance_nocalls(hisi_noc_pmu_cpuhp_state, hotplug_node);
+}
+
+static void hisi_noc_pmu_unregister_pmu(void *pmu)
+{
+ perf_pmu_unregister(pmu);
+}
+
+static int hisi_noc_pmu_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct hisi_pmu *noc_pmu;
+ char *name;
+ int ret;
+
+ noc_pmu = devm_kzalloc(dev, sizeof(*noc_pmu), GFP_KERNEL);
+ if (!noc_pmu)
+ return -ENOMEM;
+
+ /*
+ * HiSilicon Uncore PMU framework needs to get common hisi_pmu device
+ * from device's drvdata.
+ */
+ platform_set_drvdata(pdev, noc_pmu);
+
+ ret = hisi_noc_pmu_dev_init(pdev, noc_pmu);
+ if (ret)
+ return ret;
+
+ ret = cpuhp_state_add_instance(hisi_noc_pmu_cpuhp_state, &noc_pmu->node);
+ if (ret)
+ return dev_err_probe(dev, ret, "Fail to register cpuhp instance\n");
+
+ ret = devm_add_action_or_reset(dev, hisi_noc_pmu_remove_cpuhp_instance,
+ &noc_pmu->node);
+ if (ret)
+ return ret;
+
+ hisi_pmu_init(noc_pmu, THIS_MODULE);
+
+ name = devm_kasprintf(dev, GFP_KERNEL, "hisi_scl%d_noc%d_%d",
+ noc_pmu->topo.scl_id, noc_pmu->topo.index_id,
+ noc_pmu->topo.sub_id);
+ if (!name)
+ return -ENOMEM;
+
+ ret = perf_pmu_register(&noc_pmu->pmu, name, -1);
+ if (ret)
+ return dev_err_probe(dev, ret, "Fail to register PMU\n");
+
+ return devm_add_action_or_reset(dev, hisi_noc_pmu_unregister_pmu,
+ &noc_pmu->pmu);
+}
+
+static struct hisi_noc_pmu_regs hisi_noc_v1_pmu_regs = {
+ .version = NOC_PMU_VERSION,
+ .pmu_ctrl = NOC_PMU_GLOBAL_CTRL,
+ .event_ctrl0 = NOC_PMU_EVENT_CTRL0,
+ .event_cntr0 = NOC_PMU_EVENT_COUNTER0,
+ .overflow_status = NOC_PMU_CNT_INFO,
+};
+
+static const struct hisi_pmu_dev_info hisi_noc_v1 = {
+ .attr_groups = hisi_noc_pmu_attr_groups,
+ .counter_bits = 64,
+ .check_event = NOC_PMU_EVENT_CTRL_TYPE,
+ .private = &hisi_noc_v1_pmu_regs,
+};
+
+static const struct acpi_device_id hisi_noc_pmu_ids[] = {
+ { "HISI04E0", (kernel_ulong_t) &hisi_noc_v1 },
+ { }
+};
+MODULE_DEVICE_TABLE(acpi, hisi_noc_pmu_ids);
+
+static struct platform_driver hisi_noc_pmu_driver = {
+ .driver = {
+ .name = "hisi_noc_pmu",
+ .acpi_match_table = hisi_noc_pmu_ids,
+ .suppress_bind_attrs = true,
+ },
+ .probe = hisi_noc_pmu_probe,
+};
+
+static int __init hisi_noc_pmu_module_init(void)
+{
+ int ret;
+
+ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "perf/hisi/noc:online",
+ hisi_uncore_pmu_online_cpu,
+ hisi_uncore_pmu_offline_cpu);
+ if (ret < 0) {
+ pr_err("hisi_noc_pmu: Fail to setup cpuhp callbacks, ret = %d\n", ret);
+ return ret;
+ }
+ hisi_noc_pmu_cpuhp_state = ret;
+
+ ret = platform_driver_register(&hisi_noc_pmu_driver);
+ if (ret)
+ cpuhp_remove_multi_state(hisi_noc_pmu_cpuhp_state);
+
+ return ret;
+}
+module_init(hisi_noc_pmu_module_init);
+
+static void __exit hisi_noc_pmu_module_exit(void)
+{
+ platform_driver_unregister(&hisi_noc_pmu_driver);
+ cpuhp_remove_multi_state(hisi_noc_pmu_cpuhp_state);
+}
+module_exit(hisi_noc_pmu_module_exit);
+
+MODULE_IMPORT_NS("HISI_PMU");
+MODULE_DESCRIPTION("HiSilicon SoC Uncore NoC PMU driver");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Yicong Yang <yangyicong@hisilicon.com>");
diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c
index a0142684e379..80108c63cb60 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c
@@ -440,7 +440,7 @@ static int hisi_pa_pmu_dev_probe(struct platform_device *pdev,
pa_pmu->pmu_events.attr_groups = pa_pmu->dev_info->attr_groups;
pa_pmu->num_counters = PA_NR_COUNTERS;
pa_pmu->ops = &hisi_uncore_pa_ops;
- pa_pmu->check_event = 0xB0;
+ pa_pmu->check_event = PA_EVTYPE_MASK;
pa_pmu->counter_bits = 64;
pa_pmu->dev = &pdev->dev;
pa_pmu->on_cpu = -1;
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c
index ef058b1dd509..de71dcf11653 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c
@@ -149,7 +149,7 @@ static void hisi_uncore_pmu_clear_event_idx(struct hisi_pmu *hisi_pmu, int idx)
clear_bit(idx, hisi_pmu->pmu_events.used_mask);
}
-static irqreturn_t hisi_uncore_pmu_isr(int irq, void *data)
+irqreturn_t hisi_uncore_pmu_isr(int irq, void *data)
{
struct hisi_pmu *hisi_pmu = data;
struct perf_event *event;
@@ -178,6 +178,7 @@ static irqreturn_t hisi_uncore_pmu_isr(int irq, void *data)
return IRQ_HANDLED;
}
+EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_isr, "HISI_PMU");
int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu,
struct platform_device *pdev)
@@ -234,7 +235,7 @@ int hisi_uncore_pmu_event_init(struct perf_event *event)
return -EINVAL;
hisi_pmu = to_hisi_pmu(event->pmu);
- if (event->attr.config > hisi_pmu->check_event)
+ if ((event->attr.config & HISI_EVENTID_MASK) > hisi_pmu->check_event)
return -EINVAL;
if (hisi_pmu->on_cpu == -1)
@@ -510,7 +511,9 @@ int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
return 0;
hisi_pmu->on_cpu = cpumask_local_spread(0, dev_to_node(hisi_pmu->dev));
- WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(hisi_pmu->on_cpu)));
+ if (hisi_pmu->irq > 0)
+ WARN_ON(irq_set_affinity(hisi_pmu->irq,
+ cpumask_of(hisi_pmu->on_cpu)));
return 0;
}
@@ -525,7 +528,8 @@ int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
hisi_pmu->on_cpu = cpu;
/* Overflow interrupt also should use the same CPU */
- WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(cpu)));
+ if (hisi_pmu->irq > 0)
+ WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(cpu)));
return 0;
}
@@ -560,7 +564,9 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
perf_pmu_migrate_context(&hisi_pmu->pmu, cpu, target);
/* Use this CPU for event counting */
hisi_pmu->on_cpu = target;
- WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(target)));
+
+ if (hisi_pmu->irq > 0)
+ WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(target)));
return 0;
}
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h
index f4fed2544877..3ffe6acda653 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pmu.h
+++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h
@@ -24,7 +24,7 @@
#define pr_fmt(fmt) "hisi_pmu: " fmt
#define HISI_PMU_V2 0x30
-#define HISI_MAX_COUNTERS 0x10
+#define HISI_MAX_COUNTERS 0x18
#define to_hisi_pmu(p) (container_of(p, struct hisi_pmu, pmu))
#define HISI_PMU_ATTR(_name, _func, _config) \
@@ -43,7 +43,8 @@
return FIELD_GET(GENMASK_ULL(hi, lo), event->attr.config); \
}
-#define HISI_GET_EVENTID(ev) (ev->hw.config_base & 0xff)
+#define HISI_EVENTID_MASK GENMASK(7, 0)
+#define HISI_GET_EVENTID(ev) ((ev)->hw.config_base & HISI_EVENTID_MASK)
#define HISI_PMU_EVTYPE_BITS 8
#define HISI_PMU_EVTYPE_SHIFT(idx) ((idx) % 4 * HISI_PMU_EVTYPE_BITS)
@@ -72,6 +73,8 @@ struct hisi_uncore_ops {
struct hisi_pmu_dev_info {
const char *name;
const struct attribute_group **attr_groups;
+ u32 counter_bits;
+ u32 check_event;
void *private;
};
@@ -162,6 +165,7 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node);
ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev,
struct device_attribute *attr,
char *page);
+irqreturn_t hisi_uncore_pmu_isr(int irq, void *data);
int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu,
struct platform_device *pdev);
void hisi_uncore_pmu_init_topology(struct hisi_pmu *hisi_pmu, struct device *dev);
diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c
index dbd079016fc4..cd32d606df05 100644
--- a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c
@@ -28,6 +28,18 @@
#define SLLC_VERSION 0x1cf0
#define SLLC_EVENT_CNT0_L 0x1d00
+/* SLLC registers definition in v3 */
+#define SLLC_V3_INT_MASK 0x6834
+#define SLLC_V3_INT_STATUS 0x6838
+#define SLLC_V3_INT_CLEAR 0x683c
+#define SLLC_V3_VERSION 0x6c00
+#define SLLC_V3_PERF_CTRL 0x6d00
+#define SLLC_V3_SRCID_CTRL 0x6d04
+#define SLLC_V3_TGTID_CTRL 0x6d08
+#define SLLC_V3_EVENT_CTRL 0x6d14
+#define SLLC_V3_EVENT_TYPE0 0x6d18
+#define SLLC_V3_EVENT_CNT0_L 0x6e00
+
#define SLLC_EVTYPE_MASK 0xff
#define SLLC_PERF_CTRL_EN BIT(0)
#define SLLC_FILT_EN BIT(1)
@@ -40,7 +52,14 @@
#define SLLC_TGTID_MAX_SHIFT 12
#define SLLC_SRCID_CMD_SHIFT 1
#define SLLC_SRCID_MSK_SHIFT 12
-#define SLLC_NR_EVENTS 0x80
+
+#define SLLC_V3_TGTID_MIN_SHIFT 1
+#define SLLC_V3_TGTID_MAX_SHIFT 10
+#define SLLC_V3_SRCID_CMD_SHIFT 1
+#define SLLC_V3_SRCID_MSK_SHIFT 10
+
+#define SLLC_NR_EVENTS 0xff
+#define SLLC_EVENT_CNTn(cnt0, n) ((cnt0) + (n) * 8)
HISI_PMU_EVENT_ATTR_EXTRACTOR(tgtid_min, config1, 10, 0);
HISI_PMU_EVENT_ATTR_EXTRACTOR(tgtid_max, config1, 21, 11);
@@ -48,6 +67,23 @@ HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_cmd, config1, 32, 22);
HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_msk, config1, 43, 33);
HISI_PMU_EVENT_ATTR_EXTRACTOR(tracetag_en, config1, 44, 44);
+struct hisi_sllc_pmu_regs {
+ u32 int_mask;
+ u32 int_clear;
+ u32 int_status;
+ u32 perf_ctrl;
+ u32 srcid_ctrl;
+ u32 srcid_cmd_shift;
+ u32 srcid_mask_shift;
+ u32 tgtid_ctrl;
+ u32 tgtid_min_shift;
+ u32 tgtid_max_shift;
+ u32 event_ctrl;
+ u32 event_type0;
+ u32 version;
+ u32 event_cnt0;
+};
+
static bool tgtid_is_valid(u32 max, u32 min)
{
return max > 0 && max >= min;
@@ -56,96 +92,104 @@ static bool tgtid_is_valid(u32 max, u32 min)
static void hisi_sllc_pmu_enable_tracetag(struct perf_event *event)
{
struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu);
+ struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private;
u32 tt_en = hisi_get_tracetag_en(event);
if (tt_en) {
u32 val;
- val = readl(sllc_pmu->base + SLLC_PERF_CTRL);
+ val = readl(sllc_pmu->base + regs->perf_ctrl);
val |= SLLC_TRACETAG_EN | SLLC_FILT_EN;
- writel(val, sllc_pmu->base + SLLC_PERF_CTRL);
+ writel(val, sllc_pmu->base + regs->perf_ctrl);
}
}
static void hisi_sllc_pmu_disable_tracetag(struct perf_event *event)
{
struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu);
+ struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private;
u32 tt_en = hisi_get_tracetag_en(event);
if (tt_en) {
u32 val;
- val = readl(sllc_pmu->base + SLLC_PERF_CTRL);
+ val = readl(sllc_pmu->base + regs->perf_ctrl);
val &= ~(SLLC_TRACETAG_EN | SLLC_FILT_EN);
- writel(val, sllc_pmu->base + SLLC_PERF_CTRL);
+ writel(val, sllc_pmu->base + regs->perf_ctrl);
}
}
static void hisi_sllc_pmu_config_tgtid(struct perf_event *event)
{
struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu);
+ struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private;
u32 min = hisi_get_tgtid_min(event);
u32 max = hisi_get_tgtid_max(event);
if (tgtid_is_valid(max, min)) {
- u32 val = (max << SLLC_TGTID_MAX_SHIFT) | (min << SLLC_TGTID_MIN_SHIFT);
+ u32 val = (max << regs->tgtid_max_shift) |
+ (min << regs->tgtid_min_shift);
- writel(val, sllc_pmu->base + SLLC_TGTID_CTRL);
+ writel(val, sllc_pmu->base + regs->tgtid_ctrl);
/* Enable the tgtid */
- val = readl(sllc_pmu->base + SLLC_PERF_CTRL);
+ val = readl(sllc_pmu->base + regs->perf_ctrl);
val |= SLLC_TGTID_EN | SLLC_FILT_EN;
- writel(val, sllc_pmu->base + SLLC_PERF_CTRL);
+ writel(val, sllc_pmu->base + regs->perf_ctrl);
}
}
static void hisi_sllc_pmu_clear_tgtid(struct perf_event *event)
{
struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu);
+ struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private;
u32 min = hisi_get_tgtid_min(event);
u32 max = hisi_get_tgtid_max(event);
if (tgtid_is_valid(max, min)) {
u32 val;
- writel(SLLC_TGTID_NONE, sllc_pmu->base + SLLC_TGTID_CTRL);
+ writel(SLLC_TGTID_NONE, sllc_pmu->base + regs->tgtid_ctrl);
/* Disable the tgtid */
- val = readl(sllc_pmu->base + SLLC_PERF_CTRL);
+ val = readl(sllc_pmu->base + regs->perf_ctrl);
val &= ~(SLLC_TGTID_EN | SLLC_FILT_EN);
- writel(val, sllc_pmu->base + SLLC_PERF_CTRL);
+ writel(val, sllc_pmu->base + regs->perf_ctrl);
}
}
static void hisi_sllc_pmu_config_srcid(struct perf_event *event)
{
struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu);
+ struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private;
u32 cmd = hisi_get_srcid_cmd(event);
if (cmd) {
u32 val, msk;
msk = hisi_get_srcid_msk(event);
- val = (cmd << SLLC_SRCID_CMD_SHIFT) | (msk << SLLC_SRCID_MSK_SHIFT);
- writel(val, sllc_pmu->base + SLLC_SRCID_CTRL);
+ val = (cmd << regs->srcid_cmd_shift) |
+ (msk << regs->srcid_mask_shift);
+ writel(val, sllc_pmu->base + regs->srcid_ctrl);
/* Enable the srcid */
- val = readl(sllc_pmu->base + SLLC_PERF_CTRL);
+ val = readl(sllc_pmu->base + regs->perf_ctrl);
val |= SLLC_SRCID_EN | SLLC_FILT_EN;
- writel(val, sllc_pmu->base + SLLC_PERF_CTRL);
+ writel(val, sllc_pmu->base + regs->perf_ctrl);
}
}
static void hisi_sllc_pmu_clear_srcid(struct perf_event *event)
{
struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu);
+ struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private;
u32 cmd = hisi_get_srcid_cmd(event);
if (cmd) {
u32 val;
- writel(SLLC_SRCID_NONE, sllc_pmu->base + SLLC_SRCID_CTRL);
+ writel(SLLC_SRCID_NONE, sllc_pmu->base + regs->srcid_ctrl);
/* Disable the srcid */
- val = readl(sllc_pmu->base + SLLC_PERF_CTRL);
+ val = readl(sllc_pmu->base + regs->perf_ctrl);
val &= ~(SLLC_SRCID_EN | SLLC_FILT_EN);
- writel(val, sllc_pmu->base + SLLC_PERF_CTRL);
+ writel(val, sllc_pmu->base + regs->perf_ctrl);
}
}
@@ -167,29 +211,27 @@ static void hisi_sllc_pmu_clear_filter(struct perf_event *event)
}
}
-static u32 hisi_sllc_pmu_get_counter_offset(int idx)
-{
- return (SLLC_EVENT_CNT0_L + idx * 8);
-}
-
static u64 hisi_sllc_pmu_read_counter(struct hisi_pmu *sllc_pmu,
struct hw_perf_event *hwc)
{
- return readq(sllc_pmu->base +
- hisi_sllc_pmu_get_counter_offset(hwc->idx));
+ struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private;
+
+ return readq(sllc_pmu->base + SLLC_EVENT_CNTn(regs->event_cnt0, hwc->idx));
}
static void hisi_sllc_pmu_write_counter(struct hisi_pmu *sllc_pmu,
struct hw_perf_event *hwc, u64 val)
{
- writeq(val, sllc_pmu->base +
- hisi_sllc_pmu_get_counter_offset(hwc->idx));
+ struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private;
+
+ writeq(val, sllc_pmu->base + SLLC_EVENT_CNTn(regs->event_cnt0, hwc->idx));
}
static void hisi_sllc_pmu_write_evtype(struct hisi_pmu *sllc_pmu, int idx,
u32 type)
{
- u32 reg, reg_idx, shift, val;
+ struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private;
+ u32 reg, val;
/*
* Select the appropriate event select register(SLLC_EVENT_TYPE0/1).
@@ -198,96 +240,98 @@ static void hisi_sllc_pmu_write_evtype(struct hisi_pmu *sllc_pmu, int idx,
* SLLC_EVENT_TYPE0 is chosen. For the latter 4 hardware counters,
* SLLC_EVENT_TYPE1 is chosen.
*/
- reg = SLLC_EVENT_TYPE0 + (idx / 4) * 4;
- reg_idx = idx % 4;
- shift = 8 * reg_idx;
+ reg = regs->event_type0 + (idx / 4) * 4;
/* Write event code to SLLC_EVENT_TYPEx Register */
val = readl(sllc_pmu->base + reg);
- val &= ~(SLLC_EVTYPE_MASK << shift);
- val |= (type << shift);
+ val &= ~(SLLC_EVTYPE_MASK << HISI_PMU_EVTYPE_SHIFT(idx));
+ val |= (type << HISI_PMU_EVTYPE_SHIFT(idx));
writel(val, sllc_pmu->base + reg);
}
static void hisi_sllc_pmu_start_counters(struct hisi_pmu *sllc_pmu)
{
+ struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private;
u32 val;
- val = readl(sllc_pmu->base + SLLC_PERF_CTRL);
+ val = readl(sllc_pmu->base + regs->perf_ctrl);
val |= SLLC_PERF_CTRL_EN;
- writel(val, sllc_pmu->base + SLLC_PERF_CTRL);
+ writel(val, sllc_pmu->base + regs->perf_ctrl);
}
static void hisi_sllc_pmu_stop_counters(struct hisi_pmu *sllc_pmu)
{
+ struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private;
u32 val;
- val = readl(sllc_pmu->base + SLLC_PERF_CTRL);
+ val = readl(sllc_pmu->base + regs->perf_ctrl);
val &= ~(SLLC_PERF_CTRL_EN);
- writel(val, sllc_pmu->base + SLLC_PERF_CTRL);
+ writel(val, sllc_pmu->base + regs->perf_ctrl);
}
static void hisi_sllc_pmu_enable_counter(struct hisi_pmu *sllc_pmu,
struct hw_perf_event *hwc)
{
+ struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private;
u32 val;
- val = readl(sllc_pmu->base + SLLC_EVENT_CTRL);
- val |= 1 << hwc->idx;
- writel(val, sllc_pmu->base + SLLC_EVENT_CTRL);
+ val = readl(sllc_pmu->base + regs->event_ctrl);
+ val |= BIT_ULL(hwc->idx);
+ writel(val, sllc_pmu->base + regs->event_ctrl);
}
static void hisi_sllc_pmu_disable_counter(struct hisi_pmu *sllc_pmu,
struct hw_perf_event *hwc)
{
+ struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private;
u32 val;
- val = readl(sllc_pmu->base + SLLC_EVENT_CTRL);
- val &= ~(1 << hwc->idx);
- writel(val, sllc_pmu->base + SLLC_EVENT_CTRL);
+ val = readl(sllc_pmu->base + regs->event_ctrl);
+ val &= ~BIT_ULL(hwc->idx);
+ writel(val, sllc_pmu->base + regs->event_ctrl);
}
static void hisi_sllc_pmu_enable_counter_int(struct hisi_pmu *sllc_pmu,
struct hw_perf_event *hwc)
{
+ struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private;
u32 val;
- val = readl(sllc_pmu->base + SLLC_INT_MASK);
- /* Write 0 to enable interrupt */
- val &= ~(1 << hwc->idx);
- writel(val, sllc_pmu->base + SLLC_INT_MASK);
+ val = readl(sllc_pmu->base + regs->int_mask);
+ val &= ~BIT_ULL(hwc->idx);
+ writel(val, sllc_pmu->base + regs->int_mask);
}
static void hisi_sllc_pmu_disable_counter_int(struct hisi_pmu *sllc_pmu,
struct hw_perf_event *hwc)
{
+ struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private;
u32 val;
- val = readl(sllc_pmu->base + SLLC_INT_MASK);
- /* Write 1 to mask interrupt */
- val |= 1 << hwc->idx;
- writel(val, sllc_pmu->base + SLLC_INT_MASK);
+ val = readl(sllc_pmu->base + regs->int_mask);
+ val |= BIT_ULL(hwc->idx);
+ writel(val, sllc_pmu->base + regs->int_mask);
}
static u32 hisi_sllc_pmu_get_int_status(struct hisi_pmu *sllc_pmu)
{
- return readl(sllc_pmu->base + SLLC_INT_STATUS);
+ struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private;
+
+ return readl(sllc_pmu->base + regs->int_status);
}
static void hisi_sllc_pmu_clear_int_status(struct hisi_pmu *sllc_pmu, int idx)
{
- writel(1 << idx, sllc_pmu->base + SLLC_INT_CLEAR);
-}
+ struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private;
-static const struct acpi_device_id hisi_sllc_pmu_acpi_match[] = {
- { "HISI0263", },
- {}
-};
-MODULE_DEVICE_TABLE(acpi, hisi_sllc_pmu_acpi_match);
+ writel(BIT_ULL(idx), sllc_pmu->base + regs->int_clear);
+}
static int hisi_sllc_pmu_init_data(struct platform_device *pdev,
struct hisi_pmu *sllc_pmu)
{
+ struct hisi_sllc_pmu_regs *regs;
+
hisi_uncore_pmu_init_topology(sllc_pmu, &pdev->dev);
/*
@@ -304,13 +348,18 @@ static int hisi_sllc_pmu_init_data(struct platform_device *pdev,
return -EINVAL;
}
+ sllc_pmu->dev_info = device_get_match_data(&pdev->dev);
+ if (!sllc_pmu->dev_info)
+ return -ENODEV;
+
sllc_pmu->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(sllc_pmu->base)) {
dev_err(&pdev->dev, "ioremap failed for sllc_pmu resource.\n");
return PTR_ERR(sllc_pmu->base);
}
- sllc_pmu->identifier = readl(sllc_pmu->base + SLLC_VERSION);
+ regs = sllc_pmu->dev_info->private;
+ sllc_pmu->identifier = readl(sllc_pmu->base + regs->version);
return 0;
}
@@ -352,6 +401,48 @@ static const struct attribute_group *hisi_sllc_pmu_v2_attr_groups[] = {
NULL
};
+static struct hisi_sllc_pmu_regs hisi_sllc_v2_pmu_regs = {
+ .int_mask = SLLC_INT_MASK,
+ .int_clear = SLLC_INT_CLEAR,
+ .int_status = SLLC_INT_STATUS,
+ .perf_ctrl = SLLC_PERF_CTRL,
+ .srcid_ctrl = SLLC_SRCID_CTRL,
+ .srcid_cmd_shift = SLLC_SRCID_CMD_SHIFT,
+ .srcid_mask_shift = SLLC_SRCID_MSK_SHIFT,
+ .tgtid_ctrl = SLLC_TGTID_CTRL,
+ .tgtid_min_shift = SLLC_TGTID_MIN_SHIFT,
+ .tgtid_max_shift = SLLC_TGTID_MAX_SHIFT,
+ .event_ctrl = SLLC_EVENT_CTRL,
+ .event_type0 = SLLC_EVENT_TYPE0,
+ .version = SLLC_VERSION,
+ .event_cnt0 = SLLC_EVENT_CNT0_L,
+};
+
+static const struct hisi_pmu_dev_info hisi_sllc_v2 = {
+ .private = &hisi_sllc_v2_pmu_regs,
+};
+
+static struct hisi_sllc_pmu_regs hisi_sllc_v3_pmu_regs = {
+ .int_mask = SLLC_V3_INT_MASK,
+ .int_clear = SLLC_V3_INT_CLEAR,
+ .int_status = SLLC_V3_INT_STATUS,
+ .perf_ctrl = SLLC_V3_PERF_CTRL,
+ .srcid_ctrl = SLLC_V3_SRCID_CTRL,
+ .srcid_cmd_shift = SLLC_V3_SRCID_CMD_SHIFT,
+ .srcid_mask_shift = SLLC_V3_SRCID_MSK_SHIFT,
+ .tgtid_ctrl = SLLC_V3_TGTID_CTRL,
+ .tgtid_min_shift = SLLC_V3_TGTID_MIN_SHIFT,
+ .tgtid_max_shift = SLLC_V3_TGTID_MAX_SHIFT,
+ .event_ctrl = SLLC_V3_EVENT_CTRL,
+ .event_type0 = SLLC_V3_EVENT_TYPE0,
+ .version = SLLC_V3_VERSION,
+ .event_cnt0 = SLLC_V3_EVENT_CNT0_L,
+};
+
+static const struct hisi_pmu_dev_info hisi_sllc_v3 = {
+ .private = &hisi_sllc_v3_pmu_regs,
+};
+
static const struct hisi_uncore_ops hisi_uncore_sllc_ops = {
.write_evtype = hisi_sllc_pmu_write_evtype,
.get_event_idx = hisi_uncore_pmu_get_event_idx,
@@ -443,6 +534,13 @@ static void hisi_sllc_pmu_remove(struct platform_device *pdev)
&sllc_pmu->node);
}
+static const struct acpi_device_id hisi_sllc_pmu_acpi_match[] = {
+ { "HISI0263", (kernel_ulong_t)&hisi_sllc_v2 },
+ { "HISI0264", (kernel_ulong_t)&hisi_sllc_v3 },
+ {}
+};
+MODULE_DEVICE_TABLE(acpi, hisi_sllc_pmu_acpi_match);
+
static struct platform_driver hisi_sllc_pmu_driver = {
.driver = {
.name = "hisi_sllc_pmu",
diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c
index 698de8ddf895..3fc16bbab025 100644
--- a/drivers/perf/riscv_pmu_sbi.c
+++ b/drivers/perf/riscv_pmu_sbi.c
@@ -339,7 +339,7 @@ static bool pmu_sbi_ctr_is_fw(int cidx)
if (!info)
return false;
- return (info->type == SBI_PMU_CTR_TYPE_FW) ? true : false;
+ return info->type == SBI_PMU_CTR_TYPE_FW;
}
/*
@@ -877,8 +877,10 @@ static inline void pmu_sbi_start_ovf_ctrs_sbi(struct cpu_hw_events *cpu_hw_evt,
for (i = 0; i < BITS_TO_LONGS(RISCV_MAX_COUNTERS); i++) {
ctr_start_mask = cpu_hw_evt->used_hw_ctrs[i] & ~ctr_ovf_mask;
/* Start all the counters that did not overflow in a single shot */
- sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, i * BITS_PER_LONG, ctr_start_mask,
- 0, 0, 0, 0);
+ if (ctr_start_mask) {
+ sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, i * BITS_PER_LONG,
+ ctr_start_mask, 0, 0, 0, 0);
+ }
}
/* Reinitialize and start all the counter that overflowed */