1// SPDX-License-Identifier: GPL-2.0
2#include <linux/perf_event.h>
3#include <linux/sysfs.h>
4#include <linux/nospec.h>
5#include <asm/cpu_device_id.h>
6#include <asm/msr.h>
7
8#include "probe.h"
9
10enum perf_msr_id {
11 PERF_MSR_TSC = 0,
12 PERF_MSR_APERF = 1,
13 PERF_MSR_MPERF = 2,
14 PERF_MSR_PPERF = 3,
15 PERF_MSR_SMI = 4,
16 PERF_MSR_PTSC = 5,
17 PERF_MSR_IRPERF = 6,
18 PERF_MSR_THERM = 7,
19 PERF_MSR_EVENT_MAX,
20};
21
22static bool test_aperfmperf(int idx, void *data)
23{
24 return boot_cpu_has(X86_FEATURE_APERFMPERF);
25}
26
27static bool test_ptsc(int idx, void *data)
28{
29 return boot_cpu_has(X86_FEATURE_PTSC);
30}
31
32static bool test_irperf(int idx, void *data)
33{
34 return boot_cpu_has(X86_FEATURE_IRPERF);
35}
36
37static bool test_therm_status(int idx, void *data)
38{
39 return boot_cpu_has(X86_FEATURE_DTHERM);
40}
41
42static bool test_intel(int idx, void *data)
43{
44 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
45 boot_cpu_data.x86 != 6)
46 return false;
47
48 switch (boot_cpu_data.x86_vfm) {
49 case INTEL_NEHALEM:
50 case INTEL_NEHALEM_G:
51 case INTEL_NEHALEM_EP:
52 case INTEL_NEHALEM_EX:
53
54 case INTEL_WESTMERE:
55 case INTEL_WESTMERE_EP:
56 case INTEL_WESTMERE_EX:
57
58 case INTEL_SANDYBRIDGE:
59 case INTEL_SANDYBRIDGE_X:
60
61 case INTEL_IVYBRIDGE:
62 case INTEL_IVYBRIDGE_X:
63
64 case INTEL_HASWELL:
65 case INTEL_HASWELL_X:
66 case INTEL_HASWELL_L:
67 case INTEL_HASWELL_G:
68
69 case INTEL_BROADWELL:
70 case INTEL_BROADWELL_D:
71 case INTEL_BROADWELL_G:
72 case INTEL_BROADWELL_X:
73 case INTEL_SAPPHIRERAPIDS_X:
74 case INTEL_EMERALDRAPIDS_X:
75 case INTEL_GRANITERAPIDS_X:
76 case INTEL_GRANITERAPIDS_D:
77
78 case INTEL_ATOM_SILVERMONT:
79 case INTEL_ATOM_SILVERMONT_D:
80 case INTEL_ATOM_AIRMONT:
81
82 case INTEL_ATOM_GOLDMONT:
83 case INTEL_ATOM_GOLDMONT_D:
84 case INTEL_ATOM_GOLDMONT_PLUS:
85 case INTEL_ATOM_TREMONT_D:
86 case INTEL_ATOM_TREMONT:
87 case INTEL_ATOM_TREMONT_L:
88
89 case INTEL_XEON_PHI_KNL:
90 case INTEL_XEON_PHI_KNM:
91 if (idx == PERF_MSR_SMI)
92 return true;
93 break;
94
95 case INTEL_SKYLAKE_L:
96 case INTEL_SKYLAKE:
97 case INTEL_SKYLAKE_X:
98 case INTEL_KABYLAKE_L:
99 case INTEL_KABYLAKE:
100 case INTEL_COMETLAKE_L:
101 case INTEL_COMETLAKE:
102 case INTEL_ICELAKE_L:
103 case INTEL_ICELAKE:
104 case INTEL_ICELAKE_X:
105 case INTEL_ICELAKE_D:
106 case INTEL_TIGERLAKE_L:
107 case INTEL_TIGERLAKE:
108 case INTEL_ROCKETLAKE:
109 case INTEL_ALDERLAKE:
110 case INTEL_ALDERLAKE_L:
111 case INTEL_ATOM_GRACEMONT:
112 case INTEL_RAPTORLAKE:
113 case INTEL_RAPTORLAKE_P:
114 case INTEL_RAPTORLAKE_S:
115 case INTEL_METEORLAKE:
116 case INTEL_METEORLAKE_L:
117 if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
118 return true;
119 break;
120 }
121
122 return false;
123}
124
125PMU_EVENT_ATTR_STRING(tsc, attr_tsc, "event=0x00" );
126PMU_EVENT_ATTR_STRING(aperf, attr_aperf, "event=0x01" );
127PMU_EVENT_ATTR_STRING(mperf, attr_mperf, "event=0x02" );
128PMU_EVENT_ATTR_STRING(pperf, attr_pperf, "event=0x03" );
129PMU_EVENT_ATTR_STRING(smi, attr_smi, "event=0x04" );
130PMU_EVENT_ATTR_STRING(ptsc, attr_ptsc, "event=0x05" );
131PMU_EVENT_ATTR_STRING(irperf, attr_irperf, "event=0x06" );
132PMU_EVENT_ATTR_STRING(cpu_thermal_margin, attr_therm, "event=0x07" );
133PMU_EVENT_ATTR_STRING(cpu_thermal_margin.snapshot, attr_therm_snap, "1" );
134PMU_EVENT_ATTR_STRING(cpu_thermal_margin.unit, attr_therm_unit, "C" );
135
136static unsigned long msr_mask;
137
138PMU_EVENT_GROUP(events, aperf);
139PMU_EVENT_GROUP(events, mperf);
140PMU_EVENT_GROUP(events, pperf);
141PMU_EVENT_GROUP(events, smi);
142PMU_EVENT_GROUP(events, ptsc);
143PMU_EVENT_GROUP(events, irperf);
144
145static struct attribute *attrs_therm[] = {
146 &attr_therm.attr.attr,
147 &attr_therm_snap.attr.attr,
148 &attr_therm_unit.attr.attr,
149 NULL,
150};
151
152static struct attribute_group group_therm = {
153 .name = "events",
154 .attrs = attrs_therm,
155};
156
157static struct perf_msr msr[] = {
158 [PERF_MSR_TSC] = { .no_check = true, },
159 [PERF_MSR_APERF] = { MSR_IA32_APERF, &group_aperf, test_aperfmperf, },
160 [PERF_MSR_MPERF] = { MSR_IA32_MPERF, .grp: &group_mperf, .test: test_aperfmperf, },
161 [PERF_MSR_PPERF] = { MSR_PPERF, .grp: &group_pperf, .test: test_intel, },
162 [PERF_MSR_SMI] = { MSR_SMI_COUNT, .grp: &group_smi, .test: test_intel, },
163 [PERF_MSR_PTSC] = { MSR_F15H_PTSC, .grp: &group_ptsc, .test: test_ptsc, },
164 [PERF_MSR_IRPERF] = { MSR_F17H_IRPERF, .grp: &group_irperf, .test: test_irperf, },
165 [PERF_MSR_THERM] = { MSR_IA32_THERM_STATUS, .grp: &group_therm, .test: test_therm_status, },
166};
167
168static struct attribute *events_attrs[] = {
169 &attr_tsc.attr.attr,
170 NULL,
171};
172
173static struct attribute_group events_attr_group = {
174 .name = "events",
175 .attrs = events_attrs,
176};
177
178PMU_FORMAT_ATTR(event, "config:0-63");
179static struct attribute *format_attrs[] = {
180 &format_attr_event.attr,
181 NULL,
182};
183static struct attribute_group format_attr_group = {
184 .name = "format",
185 .attrs = format_attrs,
186};
187
188static const struct attribute_group *attr_groups[] = {
189 &events_attr_group,
190 &format_attr_group,
191 NULL,
192};
193
194static const struct attribute_group *attr_update[] = {
195 &group_aperf,
196 &group_mperf,
197 &group_pperf,
198 &group_smi,
199 &group_ptsc,
200 &group_irperf,
201 &group_therm,
202 NULL,
203};
204
205static int msr_event_init(struct perf_event *event)
206{
207 u64 cfg = event->attr.config;
208
209 if (event->attr.type != event->pmu->type)
210 return -ENOENT;
211
212 /* unsupported modes and filters */
213 if (event->attr.sample_period) /* no sampling */
214 return -EINVAL;
215
216 if (cfg >= PERF_MSR_EVENT_MAX)
217 return -EINVAL;
218
219 cfg = array_index_nospec((unsigned long)cfg, PERF_MSR_EVENT_MAX);
220
221 if (!(msr_mask & (1 << cfg)))
222 return -EINVAL;
223
224 event->hw.idx = -1;
225 event->hw.event_base = msr[cfg].msr;
226 event->hw.config = cfg;
227
228 return 0;
229}
230
231static inline u64 msr_read_counter(struct perf_event *event)
232{
233 u64 now;
234
235 if (event->hw.event_base)
236 rdmsrq(event->hw.event_base, now);
237 else
238 now = rdtsc_ordered();
239
240 return now;
241}
242
243static void msr_event_update(struct perf_event *event)
244{
245 u64 prev, now;
246 s64 delta;
247
248 /* Careful, an NMI might modify the previous event value: */
249 prev = local64_read(&event->hw.prev_count);
250 do {
251 now = msr_read_counter(event);
252 } while (!local64_try_cmpxchg(l: &event->hw.prev_count, old: &prev, new: now));
253
254 delta = now - prev;
255 if (unlikely(event->hw.event_base == MSR_SMI_COUNT)) {
256 delta = sign_extend64(value: delta, index: 31);
257 local64_add(delta, &event->count);
258 } else if (unlikely(event->hw.event_base == MSR_IA32_THERM_STATUS)) {
259 /* If valid, extract digital readout, otherwise set to -1: */
260 now = now & (1ULL << 31) ? (now >> 16) & 0x3f : -1;
261 local64_set(&event->count, now);
262 } else {
263 local64_add(delta, &event->count);
264 }
265}
266
267static void msr_event_start(struct perf_event *event, int flags)
268{
269 u64 now = msr_read_counter(event);
270
271 local64_set(&event->hw.prev_count, now);
272}
273
274static void msr_event_stop(struct perf_event *event, int flags)
275{
276 msr_event_update(event);
277}
278
279static void msr_event_del(struct perf_event *event, int flags)
280{
281 msr_event_stop(event, PERF_EF_UPDATE);
282}
283
284static int msr_event_add(struct perf_event *event, int flags)
285{
286 if (flags & PERF_EF_START)
287 msr_event_start(event, flags);
288
289 return 0;
290}
291
292static struct pmu pmu_msr = {
293 .task_ctx_nr = perf_sw_context,
294 .attr_groups = attr_groups,
295 .event_init = msr_event_init,
296 .add = msr_event_add,
297 .del = msr_event_del,
298 .start = msr_event_start,
299 .stop = msr_event_stop,
300 .read = msr_event_update,
301 .capabilities = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE,
302 .attr_update = attr_update,
303};
304
305static int __init msr_init(void)
306{
307 if (!boot_cpu_has(X86_FEATURE_TSC)) {
308 pr_cont("no MSR PMU driver.\n");
309 return 0;
310 }
311
312 msr_mask = perf_msr_probe(msr, cnt: PERF_MSR_EVENT_MAX, no_zero: true, NULL);
313
314 perf_pmu_register(pmu: &pmu_msr, name: "msr", type: -1);
315
316 return 0;
317}
318device_initcall(msr_init);
319