1// SPDX-License-Identifier: GPL-2.0
2/* Driver for Intel Xeon Phi "Knights Corner" PMU */
3
4#include <linux/perf_event.h>
5#include <linux/types.h>
6
7#include <asm/hardirq.h>
8#include <asm/msr.h>
9
10#include "../perf_event.h"
11
12static const u64 knc_perfmon_event_map[] =
13{
14 [PERF_COUNT_HW_CPU_CYCLES] = 0x002a,
15 [PERF_COUNT_HW_INSTRUCTIONS] = 0x0016,
16 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0028,
17 [PERF_COUNT_HW_CACHE_MISSES] = 0x0029,
18 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0012,
19 [PERF_COUNT_HW_BRANCH_MISSES] = 0x002b,
20};
21
22static const u64 __initconst knc_hw_cache_event_ids
23 [PERF_COUNT_HW_CACHE_MAX]
24 [PERF_COUNT_HW_CACHE_OP_MAX]
25 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
26{
27 [ C(L1D) ] = {
28 [ C(OP_READ) ] = {
29 /* On Xeon Phi event "0" is a valid DATA_READ */
30 /* (L1 Data Cache Reads) Instruction. */
31 /* We code this as ARCH_PERFMON_EVENTSEL_INT as this */
32 /* bit will always be set in x86_pmu_hw_config(). */
33 [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT,
34 /* DATA_READ */
35 [ C(RESULT_MISS) ] = 0x0003, /* DATA_READ_MISS */
36 },
37 [ C(OP_WRITE) ] = {
38 [ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */
39 [ C(RESULT_MISS) ] = 0x0004, /* DATA_WRITE_MISS */
40 },
41 [ C(OP_PREFETCH) ] = {
42 [ C(RESULT_ACCESS) ] = 0x0011, /* L1_DATA_PF1 */
43 [ C(RESULT_MISS) ] = 0x001c, /* L1_DATA_PF1_MISS */
44 },
45 },
46 [ C(L1I ) ] = {
47 [ C(OP_READ) ] = {
48 [ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */
49 [ C(RESULT_MISS) ] = 0x000e, /* CODE_CACHE_MISS */
50 },
51 [ C(OP_WRITE) ] = {
52 [ C(RESULT_ACCESS) ] = -1,
53 [ C(RESULT_MISS) ] = -1,
54 },
55 [ C(OP_PREFETCH) ] = {
56 [ C(RESULT_ACCESS) ] = 0x0,
57 [ C(RESULT_MISS) ] = 0x0,
58 },
59 },
60 [ C(LL ) ] = {
61 [ C(OP_READ) ] = {
62 [ C(RESULT_ACCESS) ] = 0,
63 [ C(RESULT_MISS) ] = 0x10cb, /* L2_READ_MISS */
64 },
65 [ C(OP_WRITE) ] = {
66 [ C(RESULT_ACCESS) ] = 0x10cc, /* L2_WRITE_HIT */
67 [ C(RESULT_MISS) ] = 0,
68 },
69 [ C(OP_PREFETCH) ] = {
70 [ C(RESULT_ACCESS) ] = 0x10fc, /* L2_DATA_PF2 */
71 [ C(RESULT_MISS) ] = 0x10fe, /* L2_DATA_PF2_MISS */
72 },
73 },
74 [ C(DTLB) ] = {
75 [ C(OP_READ) ] = {
76 [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT,
77 /* DATA_READ */
78 /* see note on L1 OP_READ */
79 [ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */
80 },
81 [ C(OP_WRITE) ] = {
82 [ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */
83 [ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */
84 },
85 [ C(OP_PREFETCH) ] = {
86 [ C(RESULT_ACCESS) ] = 0x0,
87 [ C(RESULT_MISS) ] = 0x0,
88 },
89 },
90 [ C(ITLB) ] = {
91 [ C(OP_READ) ] = {
92 [ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */
93 [ C(RESULT_MISS) ] = 0x000d, /* CODE_PAGE_WALK */
94 },
95 [ C(OP_WRITE) ] = {
96 [ C(RESULT_ACCESS) ] = -1,
97 [ C(RESULT_MISS) ] = -1,
98 },
99 [ C(OP_PREFETCH) ] = {
100 [ C(RESULT_ACCESS) ] = -1,
101 [ C(RESULT_MISS) ] = -1,
102 },
103 },
104 [ C(BPU ) ] = {
105 [ C(OP_READ) ] = {
106 [ C(RESULT_ACCESS) ] = 0x0012, /* BRANCHES */
107 [ C(RESULT_MISS) ] = 0x002b, /* BRANCHES_MISPREDICTED */
108 },
109 [ C(OP_WRITE) ] = {
110 [ C(RESULT_ACCESS) ] = -1,
111 [ C(RESULT_MISS) ] = -1,
112 },
113 [ C(OP_PREFETCH) ] = {
114 [ C(RESULT_ACCESS) ] = -1,
115 [ C(RESULT_MISS) ] = -1,
116 },
117 },
118};
119
120
121static u64 knc_pmu_event_map(int hw_event)
122{
123 return knc_perfmon_event_map[hw_event];
124}
125
126static struct event_constraint knc_event_constraints[] =
127{
128 INTEL_EVENT_CONSTRAINT(0xc3, 0x1), /* HWP_L2HIT */
129 INTEL_EVENT_CONSTRAINT(0xc4, 0x1), /* HWP_L2MISS */
130 INTEL_EVENT_CONSTRAINT(0xc8, 0x1), /* L2_READ_HIT_E */
131 INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* L2_READ_HIT_M */
132 INTEL_EVENT_CONSTRAINT(0xca, 0x1), /* L2_READ_HIT_S */
133 INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* L2_READ_MISS */
134 INTEL_EVENT_CONSTRAINT(0xcc, 0x1), /* L2_WRITE_HIT */
135 INTEL_EVENT_CONSTRAINT(0xce, 0x1), /* L2_STRONGLY_ORDERED_STREAMING_VSTORES_MISS */
136 INTEL_EVENT_CONSTRAINT(0xcf, 0x1), /* L2_WEAKLY_ORDERED_STREAMING_VSTORE_MISS */
137 INTEL_EVENT_CONSTRAINT(0xd7, 0x1), /* L2_VICTIM_REQ_WITH_DATA */
138 INTEL_EVENT_CONSTRAINT(0xe3, 0x1), /* SNP_HITM_BUNIT */
139 INTEL_EVENT_CONSTRAINT(0xe6, 0x1), /* SNP_HIT_L2 */
140 INTEL_EVENT_CONSTRAINT(0xe7, 0x1), /* SNP_HITM_L2 */
141 INTEL_EVENT_CONSTRAINT(0xf1, 0x1), /* L2_DATA_READ_MISS_CACHE_FILL */
142 INTEL_EVENT_CONSTRAINT(0xf2, 0x1), /* L2_DATA_WRITE_MISS_CACHE_FILL */
143 INTEL_EVENT_CONSTRAINT(0xf6, 0x1), /* L2_DATA_READ_MISS_MEM_FILL */
144 INTEL_EVENT_CONSTRAINT(0xf7, 0x1), /* L2_DATA_WRITE_MISS_MEM_FILL */
145 INTEL_EVENT_CONSTRAINT(0xfc, 0x1), /* L2_DATA_PF2 */
146 INTEL_EVENT_CONSTRAINT(0xfd, 0x1), /* L2_DATA_PF2_DROP */
147 INTEL_EVENT_CONSTRAINT(0xfe, 0x1), /* L2_DATA_PF2_MISS */
148 INTEL_EVENT_CONSTRAINT(0xff, 0x1), /* L2_DATA_HIT_INFLIGHT_PF2 */
149 EVENT_CONSTRAINT_END
150};
151
152#define MSR_KNC_IA32_PERF_GLOBAL_STATUS 0x0000002d
153#define MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL 0x0000002e
154#define MSR_KNC_IA32_PERF_GLOBAL_CTRL 0x0000002f
155
156#define KNC_ENABLE_COUNTER0 0x00000001
157#define KNC_ENABLE_COUNTER1 0x00000002
158
159static void knc_pmu_disable_all(void)
160{
161 u64 val;
162
163 rdmsrq(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
164 val &= ~(KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1);
165 wrmsrq(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
166}
167
168static void knc_pmu_enable_all(int added)
169{
170 u64 val;
171
172 rdmsrq(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
173 val |= (KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1);
174 wrmsrq(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
175}
176
177static inline void
178knc_pmu_disable_event(struct perf_event *event)
179{
180 struct hw_perf_event *hwc = &event->hw;
181 u64 val;
182
183 val = hwc->config;
184 val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
185
186 (void)wrmsrq_safe(msr: hwc->config_base + hwc->idx, val);
187}
188
189static void knc_pmu_enable_event(struct perf_event *event)
190{
191 struct hw_perf_event *hwc = &event->hw;
192 u64 val;
193
194 val = hwc->config;
195 val |= ARCH_PERFMON_EVENTSEL_ENABLE;
196
197 (void)wrmsrq_safe(msr: hwc->config_base + hwc->idx, val);
198}
199
200static inline u64 knc_pmu_get_status(void)
201{
202 u64 status;
203
204 rdmsrq(MSR_KNC_IA32_PERF_GLOBAL_STATUS, status);
205
206 return status;
207}
208
209static inline void knc_pmu_ack_status(u64 ack)
210{
211 wrmsrq(MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL, val: ack);
212}
213
214static int knc_pmu_handle_irq(struct pt_regs *regs)
215{
216 struct perf_sample_data data;
217 struct cpu_hw_events *cpuc;
218 int handled = 0;
219 int bit, loops;
220 u64 status;
221
222 cpuc = this_cpu_ptr(&cpu_hw_events);
223
224 knc_pmu_disable_all();
225
226 status = knc_pmu_get_status();
227 if (!status) {
228 knc_pmu_enable_all(added: 0);
229 return handled;
230 }
231
232 loops = 0;
233again:
234 knc_pmu_ack_status(ack: status);
235 if (++loops > 100) {
236 WARN_ONCE(1, "perf: irq loop stuck!\n");
237 perf_event_print_debug();
238 goto done;
239 }
240
241 inc_irq_stat(apic_perf_irqs);
242
243 for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
244 struct perf_event *event = cpuc->events[bit];
245 u64 last_period;
246
247 handled++;
248
249 if (!test_bit(bit, cpuc->active_mask))
250 continue;
251
252 last_period = event->hw.last_period;
253 if (!intel_pmu_save_and_restart(event))
254 continue;
255
256 perf_sample_data_init(data: &data, addr: 0, period: last_period);
257
258 perf_event_overflow(event, data: &data, regs);
259 }
260
261 /*
262 * Repeat if there is more work to be done:
263 */
264 status = knc_pmu_get_status();
265 if (status)
266 goto again;
267
268done:
269 /* Only restore PMU state when it's active. See x86_pmu_disable(). */
270 if (cpuc->enabled)
271 knc_pmu_enable_all(added: 0);
272
273 return handled;
274}
275
276
277PMU_FORMAT_ATTR(event, "config:0-7" );
278PMU_FORMAT_ATTR(umask, "config:8-15" );
279PMU_FORMAT_ATTR(edge, "config:18" );
280PMU_FORMAT_ATTR(inv, "config:23" );
281PMU_FORMAT_ATTR(cmask, "config:24-31" );
282
283static struct attribute *intel_knc_formats_attr[] = {
284 &format_attr_event.attr,
285 &format_attr_umask.attr,
286 &format_attr_edge.attr,
287 &format_attr_inv.attr,
288 &format_attr_cmask.attr,
289 NULL,
290};
291
292static const struct x86_pmu knc_pmu __initconst = {
293 .name = "knc",
294 .handle_irq = knc_pmu_handle_irq,
295 .disable_all = knc_pmu_disable_all,
296 .enable_all = knc_pmu_enable_all,
297 .enable = knc_pmu_enable_event,
298 .disable = knc_pmu_disable_event,
299 .hw_config = x86_pmu_hw_config,
300 .schedule_events = x86_schedule_events,
301 .eventsel = MSR_KNC_EVNTSEL0,
302 .perfctr = MSR_KNC_PERFCTR0,
303 .event_map = knc_pmu_event_map,
304 .max_events = ARRAY_SIZE(knc_perfmon_event_map),
305 .apic = 1,
306 .max_period = (1ULL << 39) - 1,
307 .version = 0,
308 .cntr_mask64 = 0x3,
309 .cntval_bits = 40,
310 .cntval_mask = (1ULL << 40) - 1,
311 .get_event_constraints = x86_get_event_constraints,
312 .event_constraints = knc_event_constraints,
313 .format_attrs = intel_knc_formats_attr,
314};
315
316__init int knc_pmu_init(void)
317{
318 x86_pmu = knc_pmu;
319
320 memcpy(to: hw_cache_event_ids, from: knc_hw_cache_event_ids,
321 len: sizeof(hw_cache_event_ids));
322
323 return 0;
324}
325