diff options
author | Kajol Jain <kjain@linux.ibm.com> | 2021-02-09 12:52:34 +0300 |
---|---|---|
committer | Michael Ellerman <mpe@ellerman.id.au> | 2021-02-11 15:35:36 +0300 |
commit | 82d2c16b350f72aa21ac2a6860c542aa4b43a51e (patch) | |
tree | 211576b43194d8c00ed547d6c2a7be3f1bf21740 /arch/powerpc/perf/isa207-common.c | |
parent | b3abe590c80e0ba55b6fce48762232d90dbc37a5 (diff) | |
download | linux-82d2c16b350f72aa21ac2a6860c542aa4b43a51e.tar.xz |
powerpc/perf: Adds support for programming of Thresholding in P10
Thresholding, a performance monitoring unit feature, can be
used to identify marked instructions which take more than
expected cycles between start event and end event.
Threshold compare (thresh_cmp) bits are programmed in MMCRA
register. In Power9, thresh_cmp bits were part of the
event code. But in case of P10, thresh_cmp are not part of
event code due to inclusion of MMCR3 bits.
Patch here adds an option to use attr.config1 variable
to be used to pass thresh_cmp value to be programmed in
MMCRA register. A new ppmu flag called PPMU_HAS_ATTR_CONFIG1
has been added and this flag is used to notify the use of
attr.config1 variable.
Patch has extended the parameter list of 'compute_mmcr',
to include power_pmu's 'flags' element and parameter list of
get_constraint to include attr.config1 value. It also extend
parameter list of power_check_constraints inorder to pass
perf_event list.
As stated by commit ef0e3b650f8d ("powerpc/perf: Fix Threshold
Event Counter Multiplier width for P10"), constraint bits for
thresh_cmp is also needed to be increased to 11 bits, which is
handled as part of this patch. We added bit number 53 as part
of constraint bits of thresh_cmp for power10 to make it an
11 bit field.
Updated layout for p10:
/*
* Layout of constraint bits:
*
* 60 56 52 48 44 40 36 32
* | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
* [ fab_match ] [ thresh_cmp ] [ thresh_ctl ] [ ]
* | |
* [ thresh_cmp bits for p10] thresh_sel -*
*
* 28 24 20 16 12 8 4 0
* | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
* [ ] | [ ] | [ sample ] [ ] [6] [5] [4] [3] [2] [1]
* | | | | |
* BHRB IFM -* | | |*radix_scope | Count of events for each PMC.
* EBB -* | | p1, p2, p3, p4, p5, p6.
* L1 I/D qualifier -* |
* nc - number of counters -*
*
* The PMC fields P1..P6, and NC, are adder fields. As we accumulate constraints
* we want the low bit of each field to be added to any existing value.
*
* Everything else is a value field.
*/
Result:
command#: cat /sys/devices/cpu/format/thresh_cmp
config1:0-17
ex. usage:
command#: perf record -I --weight -d -e
cpu/event=0x67340101EC,thresh_cmp=500/ ./ebizzy -S 2 -t 1 -s 4096
1826636 records/s
real 2.00 s
user 2.00 s
sys 0.00 s
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.038 MB perf.data (61 samples) ]
Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210209095234.837356-1-kjain@linux.ibm.com
Diffstat (limited to 'arch/powerpc/perf/isa207-common.c')
-rw-r--r-- | arch/powerpc/perf/isa207-common.c | 67 |
1 files changed, 59 insertions, 8 deletions
diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 6ab5b272090a..e4f577da33d8 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -108,12 +108,57 @@ static void mmcra_sdar_mode(u64 event, unsigned long *mmcra) *mmcra |= MMCRA_SDAR_MODE_TLB; } +static u64 p10_thresh_cmp_val(u64 value) +{ + int exp = 0; + u64 result = value; + + if (!value) + return value; + + /* + * Incase of P10, thresh_cmp value is not part of raw event code + * and provided via attr.config1 parameter. To program threshold in MMCRA, + * take a 18 bit number N and shift right 2 places and increment + * the exponent E by 1 until the upper 10 bits of N are zero. + * Write E to the threshold exponent and write the lower 8 bits of N + * to the threshold mantissa. + * The max threshold that can be written is 261120. + */ + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + if (value > 261120) + value = 261120; + while ((64 - __builtin_clzl(value)) > 8) { + exp++; + value >>= 2; + } + + /* + * Note that it is invalid to write a mantissa with the + * upper 2 bits of mantissa being zero, unless the + * exponent is also zero. + */ + if (!(value & 0xC0) && exp) + result = 0; + else + result = (exp << 8) | value; + } + return result; +} + static u64 thresh_cmp_val(u64 value) { + if (cpu_has_feature(CPU_FTR_ARCH_31)) + value = p10_thresh_cmp_val(value); + + /* + * Since location of threshold compare bits in MMCRA + * is different for p8, using different shift value. + */ if (cpu_has_feature(CPU_FTR_ARCH_300)) return value << p9_MMCRA_THR_CMP_SHIFT; - - return value << MMCRA_THR_CMP_SHIFT; + else + return value << MMCRA_THR_CMP_SHIFT; } static unsigned long combine_from_event(u64 event) @@ -141,13 +186,13 @@ static bool is_thresh_cmp_valid(u64 event) { unsigned int cmp, exp; + if (cpu_has_feature(CPU_FTR_ARCH_31)) + return p10_thresh_cmp_val(event) != 0; + /* * Check the mantissa upper two bits are not zero, unless the * exponent is also zero. See the THRESH_CMP_MANTISSA doc. - * Power10: thresh_cmp is replaced by l2_l3 event select. */ - if (cpu_has_feature(CPU_FTR_ARCH_31)) - return false; cmp = (event >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK; exp = cmp >> 7; @@ -256,7 +301,7 @@ void isa207_get_mem_weight(u64 *weight) *weight = mantissa << (2 * exp); } -int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) +int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp, u64 event_config1) { unsigned int unit, pmc, cache, ebb; unsigned long mask, value; @@ -355,9 +400,11 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) } if (cpu_has_feature(CPU_FTR_ARCH_31)) { - if (event_is_threshold(event)) { + if (event_is_threshold(event) && is_thresh_cmp_valid(event_config1)) { mask |= CNST_THRESH_CTL_SEL_MASK; value |= CNST_THRESH_CTL_SEL_VAL(event >> EVENT_THRESH_SHIFT); + mask |= p10_CNST_THRESH_CMP_MASK; + value |= p10_CNST_THRESH_CMP_VAL(p10_thresh_cmp_val(event_config1)); } } else if (cpu_has_feature(CPU_FTR_ARCH_300)) { if (event_is_threshold(event) && is_thresh_cmp_valid(event)) { @@ -411,7 +458,7 @@ ebb_bhrb: int isa207_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], struct mmcr_regs *mmcr, - struct perf_event *pevents[]) + struct perf_event *pevents[], u32 flags) { unsigned long mmcra, mmcr1, mmcr2, unit, combine, psel, cache, val; unsigned long mmcr3; @@ -504,6 +551,10 @@ int isa207_compute_mmcr(u64 event[], int n_ev, val = (event[i] >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK; mmcra |= thresh_cmp_val(val); + } else if (flags & PPMU_HAS_ATTR_CONFIG1) { + val = (pevents[i]->attr.config1 >> p10_EVENT_THR_CMP_SHIFT) & + p10_EVENT_THR_CMP_MASK; + mmcra |= thresh_cmp_val(val); } } |