diff options
Diffstat (limited to 'tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json')
-rw-r--r-- | tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json | 25 |
1 files changed, 5 insertions, 20 deletions
diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json index e31a4aac9f20..56e54babcc26 100644 --- a/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json +++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json @@ -400,7 +400,6 @@ }, { "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(76 * tma_info_system_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) + 75.5 * tma_info_system_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks", "MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group", "MetricName": "tma_contested_accesses", @@ -421,7 +420,6 @@ }, { "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "75.5 * tma_info_system_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD + MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * (1 - OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks", "MetricGroup": "Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group", "MetricName": "tma_data_sharing", @@ -449,7 +447,6 @@ }, { "BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks - tma_pmm_bound if #has_pmem > 0 else MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks)", "MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_dram_bound", @@ -656,7 +653,6 @@ }, { "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES", "MetricGroup": "Bad;BrMispredicts;tma_issueBM", "MetricName": "tma_info_bad_spec_branch_misprediction_cost", @@ -699,7 +695,6 @@ }, { "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_system_smt_2t_utilization > 0.5 else 0)", "MetricGroup": "Cor;SMT", "MetricName": "tma_info_botlnk_l0_core_bound_likely", @@ -707,7 +702,6 @@ }, { "BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * (tma_fetch_latency * tma_dsb_switches / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_mite / (tma_dsb + tma_mite))", "MetricGroup": "DSBmiss;Fed;tma_issueFB", "MetricName": "tma_info_botlnk_l2_dsb_misses", @@ -716,7 +710,6 @@ }, { "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))", "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL", "MetricName": "tma_info_botlnk_l2_ic_misses", @@ -725,7 +718,6 @@ }, { "BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)", "MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB;tma_issueBC", "MetricName": "tma_info_bottleneck_big_code", @@ -742,7 +734,6 @@ }, { "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_bottleneck_big_code", "MetricGroup": "Fed;FetchBW;Frontend", "MetricName": "tma_info_bottleneck_instruction_fetch_bw", @@ -750,7 +741,6 @@ }, { "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))", "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW", "MetricName": "tma_info_bottleneck_memory_bandwidth", @@ -759,7 +749,6 @@ }, { "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))", "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB", "MetricName": "tma_info_bottleneck_memory_data_tlbs", @@ -768,7 +757,6 @@ }, { "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound))", "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat", "MetricName": "tma_info_bottleneck_memory_latency", @@ -777,7 +765,6 @@ }, { "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))", "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM", "MetricName": "tma_info_bottleneck_mispredictions", @@ -1301,6 +1288,7 @@ }, { "BriefDescription": "Average latency of data read request to external memory (in nanoseconds)", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD / UNC_CHA_TOR_INSERTS.IA_MISS_DRD) / (tma_info_system_socket_clks / duration_time)", "MetricGroup": "Mem;MemoryLat;SoC", "MetricName": "tma_info_system_mem_read_latency", @@ -1455,7 +1443,6 @@ }, { "BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(MEMORY_ACTIVITY.STALLS_L1D_MISS - MEMORY_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks", "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_l2_bound", @@ -1465,7 +1452,6 @@ }, { "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core", - "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "(MEMORY_ACTIVITY.STALLS_L2_MISS - MEMORY_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks", "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_l3_bound", @@ -1538,7 +1524,6 @@ }, { "BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(16 * max(0, MEM_INST_RETIRED.LOCK_LOADS - L2_RQSTS.ALL_RFO) + MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES * (10 * L2_RQSTS.RFO_HIT + min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO))) / tma_info_thread_clks", "MetricGroup": "Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group", "MetricName": "tma_lock_latency", @@ -1596,6 +1581,7 @@ }, { "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to LFENCE Instructions.", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "13 * MISC2_RETIRED.LFENCE / tma_info_thread_clks", "MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group", "MetricName": "tma_memory_fence", @@ -1604,7 +1590,6 @@ }, { "BriefDescription": "This metric represents fraction of slots where the CPU was retiring memory operations -- uops for memory load or store accesses.", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "tma_light_operations * MEM_UOP_RETIRED.ANY / (tma_retiring * tma_info_thread_slots)", "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group", "MetricName": "tma_memory_operations", @@ -1676,7 +1661,6 @@ }, { "BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_int_operations + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches + tma_nop_instructions))", "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group", "MetricName": "tma_other_light_ops", @@ -1758,6 +1742,7 @@ }, { "BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "EXE_ACTIVITY.2_PORTS_UTIL / tma_info_thread_clks", "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group", "MetricName": "tma_ports_utilized_2", @@ -1767,6 +1752,7 @@ }, { "BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "UOPS_EXECUTED.CYCLES_GE_3 / tma_info_thread_clks", "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group", "MetricName": "tma_ports_utilized_3m", @@ -1822,6 +1808,7 @@ }, { "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "CPU_CLK_UNHALTED.PAUSE / tma_info_thread_clks", "MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group", "MetricName": "tma_slow_pause", @@ -1840,7 +1827,6 @@ }, { "BriefDescription": "This metric represents rate of split store accesses", - "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks", "MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group", "MetricName": "tma_split_stores", @@ -1868,7 +1854,6 @@ }, { "BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores", - "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks", "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", "MetricName": "tma_store_fwd_blk", |