1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
|
[
{
"BriefDescription": "Instructions Per Cycle (per logical thread)",
"MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
"MetricGroup": "TopDownL1",
"MetricName": "IPC"
},
{
"BriefDescription": "Uops Per Instruction",
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
"MetricGroup": "Pipeline",
"MetricName": "UPI"
},
{
"BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
"MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )",
"MetricGroup": "Frontend",
"MetricName": "IFetch_Line_Utilization"
},
{
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
"MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
"MetricGroup": "DSB; Frontend_Bandwidth",
"MetricName": "DSB_Coverage"
},
{
"BriefDescription": "Cycles Per Instruction (threaded)",
"MetricExpr": "1 / INST_RETIRED.ANY / cycles",
"MetricGroup": "Pipeline;Summary",
"MetricName": "CPI"
},
{
"BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
"MetricExpr": "CPU_CLK_UNHALTED.THREAD",
"MetricGroup": "Summary",
"MetricName": "CLKS"
},
{
"BriefDescription": "Total issue-pipeline slots",
"MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
"MetricGroup": "TopDownL1",
"MetricName": "SLOTS"
},
{
"BriefDescription": "Total number of retired Instructions",
"MetricExpr": "INST_RETIRED.ANY",
"MetricGroup": "Summary",
"MetricName": "Instructions"
},
{
"BriefDescription": "Instructions Per Cycle (per physical core)",
"MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
"MetricGroup": "SMT",
"MetricName": "CoreIPC"
},
{
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
"MetricExpr": "UOPS_EXECUTED.THREAD / ( cpu@uops_executed.core\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
"MetricGroup": "Pipeline;Ports_Utilization",
"MetricName": "ILP"
},
{
"BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
"MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - ( 14 * ITLB_MISSES.STLB_HIT + cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + 7* ITLB_MISSES.WALK_COMPLETED ) ) / RS_EVENTS.EMPTY_END",
"MetricGroup": "Unknown_Branches",
"MetricName": "BAClear_Cost"
},
{
"BriefDescription": "Core actual clocks when any thread is active on the physical core",
"MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
"MetricGroup": "SMT",
"MetricName": "CORE_CLKS"
},
{
"BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
"MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
"MetricGroup": "Memory_Bound;Memory_Lat",
"MetricName": "Load_Miss_Real_Latency"
},
{
"BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
"MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES",
"MetricGroup": "Memory_Bound;Memory_BW",
"MetricName": "MLP"
},
{
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
"MetricExpr": "( cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_LOAD_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_STORE_MISSES.WALK_DURATION\\,cmask\\=1@ + 7*(DTLB_STORE_MISSES.WALK_COMPLETED+DTLB_LOAD_MISSES.WALK_COMPLETED+ITLB_MISSES.WALK_COMPLETED)) / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
"MetricGroup": "TLB",
"MetricName": "Page_Walks_Utilization"
},
{
"BriefDescription": "Average CPU Utilization",
"MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
"MetricGroup": "Summary",
"MetricName": "CPU_Utilization"
},
{
"BriefDescription": "Giga Floating Point Operations Per Second",
"MetricExpr": "( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE ) / 1000000000 / duration_time",
"MetricGroup": "FLOPS;Summary",
"MetricName": "GFLOPs"
},
{
"BriefDescription": "Average Frequency Utilization relative nominal frequency",
"MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
"MetricGroup": "Power",
"MetricName": "Turbo_Utilization"
},
{
"BriefDescription": "Fraction of cycles where both hardware threads were active",
"MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
"MetricGroup": "SMT;Summary",
"MetricName": "SMT_2T_Utilization"
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
"MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
"MetricGroup": "Summary",
"MetricName": "Kernel_Utilization"
},
{
"BriefDescription": "C3 residency percent per core",
"MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
"MetricGroup": "Power",
"MetricName": "C3_Core_Residency"
},
{
"BriefDescription": "C6 residency percent per core",
"MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency"
},
{
"BriefDescription": "C7 residency percent per core",
"MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
"MetricGroup": "Power",
"MetricName": "C7_Core_Residency"
},
{
"BriefDescription": "C2 residency percent per package",
"MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency"
},
{
"BriefDescription": "C3 residency percent per package",
"MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
"MetricGroup": "Power",
"MetricName": "C3_Pkg_Residency"
},
{
"BriefDescription": "C6 residency percent per package",
"MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency"
},
{
"BriefDescription": "C7 residency percent per package",
"MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
"MetricGroup": "Power",
"MetricName": "C7_Pkg_Residency"
}
]
|