1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
|
[
{
"BriefDescription": "Number of occurrences where a microcode assist is invoked by hardware.",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.ANY",
"PEBScounters": "0,1,2,3,4,5,6,7",
"PublicDescription": "Counts the number of occurrences where a microcode assist is invoked by hardware Examples include AD (page Access Dirty), FP and AVX related assists.",
"SampleAfterValue": "100003",
"UMask": "0x7"
},
{
"BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the Non-AVX turbo schedule.",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3",
"EventCode": "0x28",
"EventName": "CORE_POWER.LVL0_TURBO_LICENSE",
"PEBScounters": "0,1,2,3",
"PublicDescription": "Counts Core cycles where the core was running with power-delivery for baseline license level 0. This includes non-AVX codes, SSE, AVX 128-bit, and low-current AVX 256-bit codes.",
"SampleAfterValue": "200003",
"UMask": "0x7"
},
{
"BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX2 turbo schedule.",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3",
"EventCode": "0x28",
"EventName": "CORE_POWER.LVL1_TURBO_LICENSE",
"PEBScounters": "0,1,2,3",
"PublicDescription": "Counts Core cycles where the core was running with power-delivery for license level 1. This includes high current AVX 256-bit instructions as well as low current AVX 512-bit instructions.",
"SampleAfterValue": "200003",
"UMask": "0x18"
},
{
"BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX512 turbo schedule.",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3",
"EventCode": "0x28",
"EventName": "CORE_POWER.LVL2_TURBO_LICENSE",
"PEBScounters": "0,1,2,3",
"PublicDescription": "Core cycles where the core was running with power-delivery for license level 2 (introduced in Skylake Server microarchtecture). This includes high current AVX 512-bit instructions.",
"SampleAfterValue": "200003",
"UMask": "0x20"
},
{
"BriefDescription": "Counts demand data reads that hit a cacheline in the L3 where a snoop hit in another cores caches, data forwarding is required as the data is modified.",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
"EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM",
"MSRIndex": "0x1a6,0x1a7",
"MSRValue": "0x10003C0001",
"Offcore": "1",
"PEBScounters": "0,1,2,3",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
{
"BriefDescription": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
"EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
"MSRIndex": "0x1a6,0x1a7",
"MSRValue": "0x8003C0001",
"Offcore": "1",
"PEBScounters": "0,1,2,3",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
{
"BriefDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that hit a cacheline in the L3 where a snoop hit in another cores caches, data forwarding is required as the data is modified.",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
"EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM",
"MSRIndex": "0x1a6,0x1a7",
"MSRValue": "0x10003C0002",
"Offcore": "1",
"PEBScounters": "0,1,2,3",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
{
"BriefDescription": "Counts streaming stores that have any type of response.",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
"EventName": "OCR.STREAMING_WR.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
"MSRValue": "0x10800",
"Offcore": "1",
"PEBScounters": "0,1,2,3",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
{
"BriefDescription": "Number of PREFETCHNTA instructions executed.",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3",
"EventCode": "0x32",
"EventName": "SW_PREFETCH_ACCESS.NTA",
"PEBScounters": "0,1,2,3",
"PublicDescription": "Counts the number of PREFETCHNTA instructions executed.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
{
"BriefDescription": "Number of PREFETCHW instructions executed.",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3",
"EventCode": "0x32",
"EventName": "SW_PREFETCH_ACCESS.PREFETCHW",
"PEBScounters": "0,1,2,3",
"PublicDescription": "Counts the number of PREFETCHW instructions executed.",
"SampleAfterValue": "100003",
"UMask": "0x8"
},
{
"BriefDescription": "Number of PREFETCHT0 instructions executed.",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3",
"EventCode": "0x32",
"EventName": "SW_PREFETCH_ACCESS.T0",
"PEBScounters": "0,1,2,3",
"PublicDescription": "Counts the number of PREFETCHT0 instructions executed.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
{
"BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed.",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3",
"EventCode": "0x32",
"EventName": "SW_PREFETCH_ACCESS.T1_T2",
"PEBScounters": "0,1,2,3",
"PublicDescription": "Counts the number of PREFETCHT1 or PREFETCHT2 instructions executed.",
"SampleAfterValue": "100003",
"UMask": "0x4"
},
{
"BriefDescription": "TMA slots where no uops were being issued due to lack of back-end resources.",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa4",
"EventName": "TOPDOWN.BACKEND_BOUND_SLOTS",
"PEBScounters": "0,1,2,3,4,5,6,7",
"PublicDescription": "Counts the number of Top-down Microarchitecture Analysis (TMA) method's slots where no micro-operations were being issued from front-end to back-end of the machine due to lack of back-end resources.",
"SampleAfterValue": "10000003",
"UMask": "0x2"
},
{
"BriefDescription": "TMA slots wasted due to incorrect speculation by branch mispredictions",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa4",
"EventName": "TOPDOWN.BR_MISPREDICT_SLOTS",
"PEBScounters": "0,1,2,3,4,5,6,7",
"PublicDescription": "Number of TMA slots that were wasted due to incorrect speculation by branch mispredictions. This event estimates number of operations that were issued but not retired from the specualtive path as well as the out-of-order engine recovery past a branch misprediction.",
"SampleAfterValue": "10000003",
"UMask": "0x8"
},
{
"BriefDescription": "TMA slots available for an unhalted logical processor. Fixed counter - architectural event",
"CollectPEBSRecord": "2",
"Counter": "Fixed counter 3",
"EventName": "TOPDOWN.SLOTS",
"PEBScounters": "35",
"PublicDescription": "Number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method (TMA). The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core. Software can use this event as the denominator for the top-level metrics of the TMA method. This architectural event is counted on a designated fixed counter (Fixed Counter 3).",
"SampleAfterValue": "10000003",
"UMask": "0x4"
},
{
"BriefDescription": "TMA slots available for an unhalted logical processor. General counter - architectural event",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa4",
"EventName": "TOPDOWN.SLOTS_P",
"PEBScounters": "0,1,2,3,4,5,6,7",
"PublicDescription": "Counts the number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method. The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core.",
"SampleAfterValue": "10000003",
"UMask": "0x1"
}
]
|