diff options
Diffstat (limited to 'tools')
-rw-r--r-- | tools/perf/Documentation/perf-arm-spe.txt | 26 | ||||
-rw-r--r-- | tools/testing/selftests/arm64/abi/hwcap.c | 235 |
2 files changed, 259 insertions, 2 deletions
diff --git a/tools/perf/Documentation/perf-arm-spe.txt b/tools/perf/Documentation/perf-arm-spe.txt index de2b0b479249..37afade4f1b2 100644 --- a/tools/perf/Documentation/perf-arm-spe.txt +++ b/tools/perf/Documentation/perf-arm-spe.txt @@ -150,6 +150,7 @@ arm_spe/load_filter=1,min_latency=10/' pct_enable=1 - collect physical timestamp instead of virtual timestamp (PMSCR.PCT) - requires privilege store_filter=1 - collect stores only (PMSFCR.ST) ts_enable=1 - enable timestamping with value of generic timer (PMSCR.TS) + discard=1 - enable SPE PMU events but don't collect sample data - see 'Discard mode' (PMBLIMITR.FM = DISCARD) +++*+++ Latency is the total latency from the point at which sampling started on that instruction, rather than only the execution latency. @@ -220,6 +221,31 @@ Common errors Increase sampling interval (see above) +PMU events +~~~~~~~~~~ + +SPE has events that can be counted on core PMUs. These are prefixed with +SAMPLE_, for example SAMPLE_POP, SAMPLE_FEED, SAMPLE_COLLISION and +SAMPLE_FEED_BR. + +These events will only count when an SPE event is running on the same core that +the PMU event is opened on, otherwise they read as 0. There are various ways to +ensure that the PMU event and SPE event are scheduled together depending on the +way the event is opened. For example opening both events as per-process events +on the same process, although it's not guaranteed that the PMU event is enabled +first when context switching. For that reason it may be better to open the PMU +event as a systemwide event and then open SPE on the process of interest. + +Discard mode +~~~~~~~~~~~~ + +SPE related (SAMPLE_* etc) core PMU events can be used without the overhead of +collecting sample data if discard mode is supported (optional from Armv8.6). +First run a system wide SPE session (or on the core of interest) using options +to minimize output. Then run perf stat: + + perf record -e arm_spe/discard/ -a -N -B --no-bpf-event -o - > /dev/null & + perf stat -e SAMPLE_FEED_LD SEE ALSO -------- diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index 0029ed9c5c9a..35f521e5f41c 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -46,6 +46,12 @@ static void atomics_sigill(void) asm volatile(".inst 0xb82003ff" : : : ); } +static void cmpbr_sigill(void) +{ + /* Not implemented, too complicated and unreliable anyway */ +} + + static void crc32_sigill(void) { /* CRC32W W0, W0, W1 */ @@ -82,6 +88,18 @@ static void f8fma_sigill(void) asm volatile(".inst 0xec0fc00"); } +static void f8mm4_sigill(void) +{ + /* FMMLA V0.4SH, V0.16B, V0.16B */ + asm volatile(".inst 0x6e00ec00"); +} + +static void f8mm8_sigill(void) +{ + /* FMMLA V0.4S, V0.16B, V0.16B */ + asm volatile(".inst 0x6e80ec00"); +} + static void faminmax_sigill(void) { /* FAMIN V0.4H, V0.4H, V0.4H */ @@ -98,6 +116,12 @@ static void fpmr_sigill(void) asm volatile("mrs x0, S3_3_C4_C4_2" : : : "x0"); } +static void fprcvt_sigill(void) +{ + /* FCVTAS S0, H0 */ + asm volatile(".inst 0x1efa0000"); +} + static void gcs_sigill(void) { unsigned long *gcspr; @@ -226,6 +250,42 @@ static void sme2p1_sigill(void) asm volatile("msr S0_3_C4_C6_3, xzr" : : : ); } +static void sme2p2_sigill(void) +{ + /* SMSTART SM */ + asm volatile("msr S0_3_C4_C3_3, xzr" : : : ); + + /* UXTB Z0.D, P0/Z, Z0.D */ + asm volatile(".inst 0x4c1a000" : : : ); + + /* SMSTOP */ + asm volatile("msr S0_3_C4_C6_3, xzr" : : : ); +} + +static void sme_aes_sigill(void) +{ + /* SMSTART SM */ + asm volatile("msr S0_3_C4_C3_3, xzr" : : : ); + + /* AESD z0.b, z0.b, z0.b */ + asm volatile(".inst 0x4522e400" : : : "z0"); + + /* SMSTOP */ + asm volatile("msr S0_3_C4_C6_3, xzr" : : : ); +} + +static void sme_sbitperm_sigill(void) +{ + /* SMSTART SM */ + asm volatile("msr S0_3_C4_C3_3, xzr" : : : ); + + /* BDEP Z0.B, Z0.B, Z0.B */ + asm volatile(".inst 0x4500b400" : : : "z0"); + + /* SMSTOP */ + asm volatile("msr S0_3_C4_C6_3, xzr" : : : ); +} + static void smei16i32_sigill(void) { /* SMSTART */ @@ -339,8 +399,44 @@ static void smesf8fma_sigill(void) /* SMSTART */ asm volatile("msr S0_3_C4_C7_3, xzr" : : : ); - /* FMLALB V0.8H, V0.16B, V0.16B */ - asm volatile(".inst 0xec0fc00"); + /* FMLALB Z0.8H, Z0.B, Z0.B */ + asm volatile(".inst 0x64205000"); + + /* SMSTOP */ + asm volatile("msr S0_3_C4_C6_3, xzr" : : : ); +} + +static void smesfexpa_sigill(void) +{ + /* SMSTART */ + asm volatile("msr S0_3_C4_C7_3, xzr" : : : ); + + /* FEXPA Z0.D, Z0.D */ + asm volatile(".inst 0x04e0b800"); + + /* SMSTOP */ + asm volatile("msr S0_3_C4_C6_3, xzr" : : : ); +} + +static void smesmop4_sigill(void) +{ + /* SMSTART */ + asm volatile("msr S0_3_C4_C7_3, xzr" : : : ); + + /* SMOP4A ZA0.S, Z0.B, { Z0.B - Z1.B } */ + asm volatile(".inst 0x80108000"); + + /* SMSTOP */ + asm volatile("msr S0_3_C4_C6_3, xzr" : : : ); +} + +static void smestmop_sigill(void) +{ + /* SMSTART */ + asm volatile("msr S0_3_C4_C7_3, xzr" : : : ); + + /* STMOPA ZA0.S, { Z0.H - Z1.H }, Z0.H, Z20[0] */ + asm volatile(".inst 0x80408008"); /* SMSTOP */ asm volatile("msr S0_3_C4_C6_3, xzr" : : : ); @@ -364,18 +460,42 @@ static void sve2p1_sigill(void) asm volatile(".inst 0x65000000" : : : "z0"); } +static void sve2p2_sigill(void) +{ + /* NOT Z0.D, P0/Z, Z0.D */ + asm volatile(".inst 0x4cea000" : : : "z0"); +} + static void sveaes_sigill(void) { /* AESD z0.b, z0.b, z0.b */ asm volatile(".inst 0x4522e400" : : : "z0"); } +static void sveaes2_sigill(void) +{ + /* AESD {Z0.B - Z1.B }, { Z0.B - Z1.B }, Z0.Q */ + asm volatile(".inst 0x4522ec00" : : : "z0"); +} + static void sveb16b16_sigill(void) { /* BFADD Z0.H, Z0.H, Z0.H */ asm volatile(".inst 0x65000000" : : : ); } +static void svebfscale_sigill(void) +{ + /* BFSCALE Z0.H, P0/M, Z0.H, Z0.H */ + asm volatile(".inst 0x65098000" : : : "z0"); +} + +static void svef16mm_sigill(void) +{ + /* FMMLA Z0.S, Z0.H, Z0.H */ + asm volatile(".inst 0x6420e400"); +} + static void svepmull_sigill(void) { /* PMULLB Z0.Q, Z0.D, Z0.D */ @@ -394,6 +514,12 @@ static void svesha3_sigill(void) asm volatile(".inst 0x4203800" : : : "z0"); } +static void sveeltperm_sigill(void) +{ + /* COMPACT Z0.B, P0, Z0.B */ + asm volatile(".inst 0x5218000" : : : "x0"); +} + static void svesm4_sigill(void) { /* SM4E Z0.S, Z0.S, Z0.S */ @@ -470,6 +596,13 @@ static const struct hwcap_data { .sigill_fn = aes_sigill, }, { + .name = "CMPBR", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_CMPBR, + .cpuinfo = "cmpbr", + .sigill_fn = cmpbr_sigill, + }, + { .name = "CRC32", .at_hwcap = AT_HWCAP, .hwcap_bit = HWCAP_CRC32, @@ -524,6 +657,20 @@ static const struct hwcap_data { .sigill_fn = f8fma_sigill, }, { + .name = "F8MM8", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_F8MM8, + .cpuinfo = "f8mm8", + .sigill_fn = f8mm8_sigill, + }, + { + .name = "F8MM4", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_F8MM4, + .cpuinfo = "f8mm4", + .sigill_fn = f8mm4_sigill, + }, + { .name = "FAMINMAX", .at_hwcap = AT_HWCAP2, .hwcap_bit = HWCAP2_FAMINMAX, @@ -546,6 +693,13 @@ static const struct hwcap_data { .sigill_reliable = true, }, { + .name = "FPRCVT", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_FPRCVT, + .cpuinfo = "fprcvt", + .sigill_fn = fprcvt_sigill, + }, + { .name = "GCS", .at_hwcap = AT_HWCAP, .hwcap_bit = HWCAP_GCS, @@ -692,6 +846,20 @@ static const struct hwcap_data { .sigill_fn = sme2p1_sigill, }, { + .name = "SME 2.2", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_SME2P2, + .cpuinfo = "sme2p2", + .sigill_fn = sme2p2_sigill, + }, + { + .name = "SME AES", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_SME_AES, + .cpuinfo = "smeaes", + .sigill_fn = sme_aes_sigill, + }, + { .name = "SME I16I32", .at_hwcap = AT_HWCAP2, .hwcap_bit = HWCAP2_SME_I16I32, @@ -741,6 +909,13 @@ static const struct hwcap_data { .sigill_fn = smelutv2_sigill, }, { + .name = "SME SBITPERM", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_SME_SBITPERM, + .cpuinfo = "smesbitperm", + .sigill_fn = sme_sbitperm_sigill, + }, + { .name = "SME SF8FMA", .at_hwcap = AT_HWCAP2, .hwcap_bit = HWCAP2_SME_SF8FMA, @@ -762,6 +937,27 @@ static const struct hwcap_data { .sigill_fn = smesf8dp4_sigill, }, { + .name = "SME SFEXPA", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_SME_SFEXPA, + .cpuinfo = "smesfexpa", + .sigill_fn = smesfexpa_sigill, + }, + { + .name = "SME SMOP4", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_SME_SMOP4, + .cpuinfo = "smesmop4", + .sigill_fn = smesmop4_sigill, + }, + { + .name = "SME STMOP", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_SME_STMOP, + .cpuinfo = "smestmop", + .sigill_fn = smestmop_sigill, + }, + { .name = "SVE", .at_hwcap = AT_HWCAP, .hwcap_bit = HWCAP_SVE, @@ -784,6 +980,13 @@ static const struct hwcap_data { .sigill_fn = sve2p1_sigill, }, { + .name = "SVE 2.2", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_SVE2P2, + .cpuinfo = "sve2p2", + .sigill_fn = sve2p2_sigill, + }, + { .name = "SVE AES", .at_hwcap = AT_HWCAP2, .hwcap_bit = HWCAP2_SVEAES, @@ -791,6 +994,34 @@ static const struct hwcap_data { .sigill_fn = sveaes_sigill, }, { + .name = "SVE AES2", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_SVE_AES2, + .cpuinfo = "sveaes2", + .sigill_fn = sveaes2_sigill, + }, + { + .name = "SVE BFSCALE", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_SVE_BFSCALE, + .cpuinfo = "svebfscale", + .sigill_fn = svebfscale_sigill, + }, + { + .name = "SVE ELTPERM", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_SVE_ELTPERM, + .cpuinfo = "sveeltperm", + .sigill_fn = sveeltperm_sigill, + }, + { + .name = "SVE F16MM", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_SVE_F16MM, + .cpuinfo = "svef16mm", + .sigill_fn = svef16mm_sigill, + }, + { .name = "SVE2 B16B16", .at_hwcap = AT_HWCAP2, .hwcap_bit = HWCAP2_SVE_B16B16, |