| 1 | /* | 
|---|
| 2 | * Performance events - AMD IBS | 
|---|
| 3 | * | 
|---|
| 4 | *  Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter | 
|---|
| 5 | * | 
|---|
| 6 | *  For licencing details see kernel-base/COPYING | 
|---|
| 7 | */ | 
|---|
| 8 |  | 
|---|
| 9 | #include <linux/perf_event.h> | 
|---|
| 10 | #include <linux/init.h> | 
|---|
| 11 | #include <linux/export.h> | 
|---|
| 12 | #include <linux/pci.h> | 
|---|
| 13 | #include <linux/ptrace.h> | 
|---|
| 14 | #include <linux/syscore_ops.h> | 
|---|
| 15 | #include <linux/sched/clock.h> | 
|---|
| 16 |  | 
|---|
| 17 | #include <asm/apic.h> | 
|---|
| 18 | #include <asm/msr.h> | 
|---|
| 19 |  | 
|---|
| 20 | #include "../perf_event.h" | 
|---|
| 21 |  | 
|---|
| 22 | static u32 ibs_caps; | 
|---|
| 23 |  | 
|---|
| 24 | #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) | 
|---|
| 25 |  | 
|---|
| 26 | #include <linux/kprobes.h> | 
|---|
| 27 | #include <linux/hardirq.h> | 
|---|
| 28 |  | 
|---|
| 29 | #include <asm/nmi.h> | 
|---|
| 30 | #include <asm/amd/ibs.h> | 
|---|
| 31 |  | 
|---|
| 32 | /* attr.config2 */ | 
|---|
| 33 | #define IBS_SW_FILTER_MASK	1 | 
|---|
| 34 |  | 
|---|
| 35 | /* | 
|---|
| 36 | * IBS states: | 
|---|
| 37 | * | 
|---|
| 38 | * ENABLED; tracks the pmu::add(), pmu::del() state, when set the counter is taken | 
|---|
| 39 | * and any further add()s must fail. | 
|---|
| 40 | * | 
|---|
| 41 | * STARTED/STOPPING/STOPPED; deal with pmu::start(), pmu::stop() state but are | 
|---|
| 42 | * complicated by the fact that the IBS hardware can send late NMIs (ie. after | 
|---|
| 43 | * we've cleared the EN bit). | 
|---|
| 44 | * | 
|---|
| 45 | * In order to consume these late NMIs we have the STOPPED state, any NMI that | 
|---|
| 46 | * happens after we've cleared the EN state will clear this bit and report the | 
|---|
| 47 | * NMI handled (this is fundamentally racy in the face or multiple NMI sources, | 
|---|
| 48 | * someone else can consume our BIT and our NMI will go unhandled). | 
|---|
| 49 | * | 
|---|
| 50 | * And since we cannot set/clear this separate bit together with the EN bit, | 
|---|
| 51 | * there are races; if we cleared STARTED early, an NMI could land in | 
|---|
| 52 | * between clearing STARTED and clearing the EN bit (in fact multiple NMIs | 
|---|
| 53 | * could happen if the period is small enough), and consume our STOPPED bit | 
|---|
| 54 | * and trigger streams of unhandled NMIs. | 
|---|
| 55 | * | 
|---|
| 56 | * If, however, we clear STARTED late, an NMI can hit between clearing the | 
|---|
| 57 | * EN bit and clearing STARTED, still see STARTED set and process the event. | 
|---|
| 58 | * If this event will have the VALID bit clear, we bail properly, but this | 
|---|
| 59 | * is not a given. With VALID set we can end up calling pmu::stop() again | 
|---|
| 60 | * (the throttle logic) and trigger the WARNs in there. | 
|---|
| 61 | * | 
|---|
| 62 | * So what we do is set STOPPING before clearing EN to avoid the pmu::stop() | 
|---|
| 63 | * nesting, and clear STARTED late, so that we have a well defined state over | 
|---|
| 64 | * the clearing of the EN bit. | 
|---|
| 65 | * | 
|---|
| 66 | * XXX: we could probably be using !atomic bitops for all this. | 
|---|
| 67 | */ | 
|---|
| 68 |  | 
|---|
| 69 | enum ibs_states { | 
|---|
| 70 | IBS_ENABLED	= 0, | 
|---|
| 71 | IBS_STARTED	= 1, | 
|---|
| 72 | IBS_STOPPING	= 2, | 
|---|
| 73 | IBS_STOPPED	= 3, | 
|---|
| 74 |  | 
|---|
| 75 | IBS_MAX_STATES, | 
|---|
| 76 | }; | 
|---|
| 77 |  | 
|---|
| 78 | struct cpu_perf_ibs { | 
|---|
| 79 | struct perf_event	*event; | 
|---|
| 80 | unsigned long		state[BITS_TO_LONGS(IBS_MAX_STATES)]; | 
|---|
| 81 | }; | 
|---|
| 82 |  | 
|---|
| 83 | struct perf_ibs { | 
|---|
| 84 | struct pmu			pmu; | 
|---|
| 85 | unsigned int			msr; | 
|---|
| 86 | u64				config_mask; | 
|---|
| 87 | u64				cnt_mask; | 
|---|
| 88 | u64				enable_mask; | 
|---|
| 89 | u64				valid_mask; | 
|---|
| 90 | u16				min_period; | 
|---|
| 91 | u64				max_period; | 
|---|
| 92 | unsigned long			offset_mask[1]; | 
|---|
| 93 | int				offset_max; | 
|---|
| 94 | unsigned int			fetch_count_reset_broken : 1; | 
|---|
| 95 | unsigned int			fetch_ignore_if_zero_rip : 1; | 
|---|
| 96 | struct cpu_perf_ibs __percpu	*pcpu; | 
|---|
| 97 |  | 
|---|
| 98 | u64				(*get_count)(u64 config); | 
|---|
| 99 | }; | 
|---|
| 100 |  | 
|---|
| 101 | static int | 
|---|
| 102 | perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period) | 
|---|
| 103 | { | 
|---|
| 104 | s64 left = local64_read(&hwc->period_left); | 
|---|
| 105 | s64 period = hwc->sample_period; | 
|---|
| 106 | int overflow = 0; | 
|---|
| 107 |  | 
|---|
| 108 | /* | 
|---|
| 109 | * If we are way outside a reasonable range then just skip forward: | 
|---|
| 110 | */ | 
|---|
| 111 | if (unlikely(left <= -period)) { | 
|---|
| 112 | left = period; | 
|---|
| 113 | local64_set(&hwc->period_left, left); | 
|---|
| 114 | hwc->last_period = period; | 
|---|
| 115 | overflow = 1; | 
|---|
| 116 | } | 
|---|
| 117 |  | 
|---|
| 118 | if (unlikely(left < (s64)min)) { | 
|---|
| 119 | left += period; | 
|---|
| 120 | local64_set(&hwc->period_left, left); | 
|---|
| 121 | hwc->last_period = period; | 
|---|
| 122 | overflow = 1; | 
|---|
| 123 | } | 
|---|
| 124 |  | 
|---|
| 125 | /* | 
|---|
| 126 | * If the hw period that triggers the sw overflow is too short | 
|---|
| 127 | * we might hit the irq handler. This biases the results. | 
|---|
| 128 | * Thus we shorten the next-to-last period and set the last | 
|---|
| 129 | * period to the max period. | 
|---|
| 130 | */ | 
|---|
| 131 | if (left > max) { | 
|---|
| 132 | left -= max; | 
|---|
| 133 | if (left > max) | 
|---|
| 134 | left = max; | 
|---|
| 135 | else if (left < min) | 
|---|
| 136 | left = min; | 
|---|
| 137 | } | 
|---|
| 138 |  | 
|---|
| 139 | *hw_period = (u64)left; | 
|---|
| 140 |  | 
|---|
| 141 | return overflow; | 
|---|
| 142 | } | 
|---|
| 143 |  | 
|---|
| 144 | static  int | 
|---|
| 145 | perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width) | 
|---|
| 146 | { | 
|---|
| 147 | struct hw_perf_event *hwc = &event->hw; | 
|---|
| 148 | int shift = 64 - width; | 
|---|
| 149 | u64 prev_raw_count; | 
|---|
| 150 | u64 delta; | 
|---|
| 151 |  | 
|---|
| 152 | /* | 
|---|
| 153 | * Careful: an NMI might modify the previous event value. | 
|---|
| 154 | * | 
|---|
| 155 | * Our tactic to handle this is to first atomically read and | 
|---|
| 156 | * exchange a new raw count - then add that new-prev delta | 
|---|
| 157 | * count to the generic event atomically: | 
|---|
| 158 | */ | 
|---|
| 159 | prev_raw_count = local64_read(&hwc->prev_count); | 
|---|
| 160 | if (!local64_try_cmpxchg(l: &hwc->prev_count, | 
|---|
| 161 | old: &prev_raw_count, new: new_raw_count)) | 
|---|
| 162 | return 0; | 
|---|
| 163 |  | 
|---|
| 164 | /* | 
|---|
| 165 | * Now we have the new raw value and have updated the prev | 
|---|
| 166 | * timestamp already. We can now calculate the elapsed delta | 
|---|
| 167 | * (event-)time and add that to the generic event. | 
|---|
| 168 | * | 
|---|
| 169 | * Careful, not all hw sign-extends above the physical width | 
|---|
| 170 | * of the count. | 
|---|
| 171 | */ | 
|---|
| 172 | delta = (new_raw_count << shift) - (prev_raw_count << shift); | 
|---|
| 173 | delta >>= shift; | 
|---|
| 174 |  | 
|---|
| 175 | local64_add(delta, &event->count); | 
|---|
| 176 | local64_sub(delta, &hwc->period_left); | 
|---|
| 177 |  | 
|---|
| 178 | return 1; | 
|---|
| 179 | } | 
|---|
| 180 |  | 
|---|
| 181 | static struct perf_ibs perf_ibs_fetch; | 
|---|
| 182 | static struct perf_ibs perf_ibs_op; | 
|---|
| 183 |  | 
|---|
| 184 | static struct perf_ibs *get_ibs_pmu(int type) | 
|---|
| 185 | { | 
|---|
| 186 | if (perf_ibs_fetch.pmu.type == type) | 
|---|
| 187 | return &perf_ibs_fetch; | 
|---|
| 188 | if (perf_ibs_op.pmu.type == type) | 
|---|
| 189 | return &perf_ibs_op; | 
|---|
| 190 | return NULL; | 
|---|
| 191 | } | 
|---|
| 192 |  | 
|---|
| 193 | /* | 
|---|
| 194 | * core pmu config -> IBS config | 
|---|
| 195 | * | 
|---|
| 196 | *  perf record -a -e cpu-cycles:p ...    # use ibs op counting cycle count | 
|---|
| 197 | *  perf record -a -e r076:p ...          # same as -e cpu-cycles:p | 
|---|
| 198 | *  perf record -a -e r0C1:p ...          # use ibs op counting micro-ops | 
|---|
| 199 | * | 
|---|
| 200 | * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl, | 
|---|
| 201 | * MSRC001_1033) is used to select either cycle or micro-ops counting | 
|---|
| 202 | * mode. | 
|---|
| 203 | */ | 
|---|
| 204 | static int core_pmu_ibs_config(struct perf_event *event, u64 *config) | 
|---|
| 205 | { | 
|---|
| 206 | switch (event->attr.type) { | 
|---|
| 207 | case PERF_TYPE_HARDWARE: | 
|---|
| 208 | switch (event->attr.config) { | 
|---|
| 209 | case PERF_COUNT_HW_CPU_CYCLES: | 
|---|
| 210 | *config = 0; | 
|---|
| 211 | return 0; | 
|---|
| 212 | } | 
|---|
| 213 | break; | 
|---|
| 214 | case PERF_TYPE_RAW: | 
|---|
| 215 | switch (event->attr.config) { | 
|---|
| 216 | case 0x0076: | 
|---|
| 217 | *config = 0; | 
|---|
| 218 | return 0; | 
|---|
| 219 | case 0x00C1: | 
|---|
| 220 | *config = IBS_OP_CNT_CTL; | 
|---|
| 221 | return 0; | 
|---|
| 222 | } | 
|---|
| 223 | break; | 
|---|
| 224 | default: | 
|---|
| 225 | return -ENOENT; | 
|---|
| 226 | } | 
|---|
| 227 |  | 
|---|
| 228 | return -EOPNOTSUPP; | 
|---|
| 229 | } | 
|---|
| 230 |  | 
|---|
| 231 | /* | 
|---|
| 232 | * The rip of IBS samples has skid 0. Thus, IBS supports precise | 
|---|
| 233 | * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the | 
|---|
| 234 | * rip is invalid when IBS was not able to record the rip correctly. | 
|---|
| 235 | * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then. | 
|---|
| 236 | */ | 
|---|
| 237 | int forward_event_to_ibs(struct perf_event *event) | 
|---|
| 238 | { | 
|---|
| 239 | u64 config = 0; | 
|---|
| 240 |  | 
|---|
| 241 | if (!event->attr.precise_ip || event->attr.precise_ip > 2) | 
|---|
| 242 | return -EOPNOTSUPP; | 
|---|
| 243 |  | 
|---|
| 244 | if (!core_pmu_ibs_config(event, config: &config)) { | 
|---|
| 245 | event->attr.type = perf_ibs_op.pmu.type; | 
|---|
| 246 | event->attr.config = config; | 
|---|
| 247 | } | 
|---|
| 248 | return -ENOENT; | 
|---|
| 249 | } | 
|---|
| 250 |  | 
|---|
| 251 | /* | 
|---|
| 252 | * Grouping of IBS events is not possible since IBS can have only | 
|---|
| 253 | * one event active at any point in time. | 
|---|
| 254 | */ | 
|---|
| 255 | static int validate_group(struct perf_event *event) | 
|---|
| 256 | { | 
|---|
| 257 | struct perf_event *sibling; | 
|---|
| 258 |  | 
|---|
| 259 | if (event->group_leader == event) | 
|---|
| 260 | return 0; | 
|---|
| 261 |  | 
|---|
| 262 | if (event->group_leader->pmu == event->pmu) | 
|---|
| 263 | return -EINVAL; | 
|---|
| 264 |  | 
|---|
| 265 | for_each_sibling_event(sibling, event->group_leader) { | 
|---|
| 266 | if (sibling->pmu == event->pmu) | 
|---|
| 267 | return -EINVAL; | 
|---|
| 268 | } | 
|---|
| 269 | return 0; | 
|---|
| 270 | } | 
|---|
| 271 |  | 
|---|
| 272 | static bool perf_ibs_ldlat_event(struct perf_ibs *perf_ibs, | 
|---|
| 273 | struct perf_event *event) | 
|---|
| 274 | { | 
|---|
| 275 | return perf_ibs == &perf_ibs_op && | 
|---|
| 276 | (ibs_caps & IBS_CAPS_OPLDLAT) && | 
|---|
| 277 | (event->attr.config1 & 0xFFF); | 
|---|
| 278 | } | 
|---|
| 279 |  | 
|---|
| 280 | static int perf_ibs_init(struct perf_event *event) | 
|---|
| 281 | { | 
|---|
| 282 | struct hw_perf_event *hwc = &event->hw; | 
|---|
| 283 | struct perf_ibs *perf_ibs; | 
|---|
| 284 | u64 config; | 
|---|
| 285 | int ret; | 
|---|
| 286 |  | 
|---|
| 287 | perf_ibs = get_ibs_pmu(type: event->attr.type); | 
|---|
| 288 | if (!perf_ibs) | 
|---|
| 289 | return -ENOENT; | 
|---|
| 290 |  | 
|---|
| 291 | config = event->attr.config; | 
|---|
| 292 |  | 
|---|
| 293 | if (event->pmu != &perf_ibs->pmu) | 
|---|
| 294 | return -ENOENT; | 
|---|
| 295 |  | 
|---|
| 296 | if (config & ~perf_ibs->config_mask) | 
|---|
| 297 | return -EINVAL; | 
|---|
| 298 |  | 
|---|
| 299 | if (has_branch_stack(event)) | 
|---|
| 300 | return -EOPNOTSUPP; | 
|---|
| 301 |  | 
|---|
| 302 | /* handle exclude_{user,kernel} in the IRQ handler */ | 
|---|
| 303 | if (event->attr.exclude_host || event->attr.exclude_guest || | 
|---|
| 304 | event->attr.exclude_idle) | 
|---|
| 305 | return -EINVAL; | 
|---|
| 306 |  | 
|---|
| 307 | if (!(event->attr.config2 & IBS_SW_FILTER_MASK) && | 
|---|
| 308 | (event->attr.exclude_kernel || event->attr.exclude_user || | 
|---|
| 309 | event->attr.exclude_hv)) | 
|---|
| 310 | return -EINVAL; | 
|---|
| 311 |  | 
|---|
| 312 | ret = validate_group(event); | 
|---|
| 313 | if (ret) | 
|---|
| 314 | return ret; | 
|---|
| 315 |  | 
|---|
| 316 | if (hwc->sample_period) { | 
|---|
| 317 | if (config & perf_ibs->cnt_mask) | 
|---|
| 318 | /* raw max_cnt may not be set */ | 
|---|
| 319 | return -EINVAL; | 
|---|
| 320 |  | 
|---|
| 321 | if (event->attr.freq) { | 
|---|
| 322 | hwc->sample_period = perf_ibs->min_period; | 
|---|
| 323 | } else { | 
|---|
| 324 | /* Silently mask off lower nibble. IBS hw mandates it. */ | 
|---|
| 325 | hwc->sample_period &= ~0x0FULL; | 
|---|
| 326 | if (hwc->sample_period < perf_ibs->min_period) | 
|---|
| 327 | return -EINVAL; | 
|---|
| 328 | } | 
|---|
| 329 | } else { | 
|---|
| 330 | u64 period = 0; | 
|---|
| 331 |  | 
|---|
| 332 | if (event->attr.freq) | 
|---|
| 333 | return -EINVAL; | 
|---|
| 334 |  | 
|---|
| 335 | if (perf_ibs == &perf_ibs_op) { | 
|---|
| 336 | period = (config & IBS_OP_MAX_CNT) << 4; | 
|---|
| 337 | if (ibs_caps & IBS_CAPS_OPCNTEXT) | 
|---|
| 338 | period |= config & IBS_OP_MAX_CNT_EXT_MASK; | 
|---|
| 339 | } else { | 
|---|
| 340 | period = (config & IBS_FETCH_MAX_CNT) << 4; | 
|---|
| 341 | } | 
|---|
| 342 |  | 
|---|
| 343 | config &= ~perf_ibs->cnt_mask; | 
|---|
| 344 | event->attr.sample_period = period; | 
|---|
| 345 | hwc->sample_period = period; | 
|---|
| 346 |  | 
|---|
| 347 | if (hwc->sample_period < perf_ibs->min_period) | 
|---|
| 348 | return -EINVAL; | 
|---|
| 349 | } | 
|---|
| 350 |  | 
|---|
| 351 | if (perf_ibs_ldlat_event(perf_ibs, event)) { | 
|---|
| 352 | u64 ldlat = event->attr.config1 & 0xFFF; | 
|---|
| 353 |  | 
|---|
| 354 | if (ldlat < 128 || ldlat > 2048) | 
|---|
| 355 | return -EINVAL; | 
|---|
| 356 | ldlat >>= 7; | 
|---|
| 357 |  | 
|---|
| 358 | config |= (ldlat - 1) << 59; | 
|---|
| 359 | config |= IBS_OP_L3MISSONLY | IBS_OP_LDLAT_EN; | 
|---|
| 360 | } | 
|---|
| 361 |  | 
|---|
| 362 | /* | 
|---|
| 363 | * If we modify hwc->sample_period, we also need to update | 
|---|
| 364 | * hwc->last_period and hwc->period_left. | 
|---|
| 365 | */ | 
|---|
| 366 | hwc->last_period = hwc->sample_period; | 
|---|
| 367 | local64_set(&hwc->period_left, hwc->sample_period); | 
|---|
| 368 |  | 
|---|
| 369 | hwc->config_base = perf_ibs->msr; | 
|---|
| 370 | hwc->config = config; | 
|---|
| 371 |  | 
|---|
| 372 | return 0; | 
|---|
| 373 | } | 
|---|
| 374 |  | 
|---|
| 375 | static int perf_ibs_set_period(struct perf_ibs *perf_ibs, | 
|---|
| 376 | struct hw_perf_event *hwc, u64 *period) | 
|---|
| 377 | { | 
|---|
| 378 | int overflow; | 
|---|
| 379 |  | 
|---|
| 380 | /* ignore lower 4 bits in min count: */ | 
|---|
| 381 | overflow = perf_event_set_period(hwc, min: perf_ibs->min_period, | 
|---|
| 382 | max: perf_ibs->max_period, hw_period: period); | 
|---|
| 383 | local64_set(&hwc->prev_count, 0); | 
|---|
| 384 |  | 
|---|
| 385 | return overflow; | 
|---|
| 386 | } | 
|---|
| 387 |  | 
|---|
| 388 | static u64 get_ibs_fetch_count(u64 config) | 
|---|
| 389 | { | 
|---|
| 390 | union ibs_fetch_ctl fetch_ctl = (union ibs_fetch_ctl)config; | 
|---|
| 391 |  | 
|---|
| 392 | return fetch_ctl.fetch_cnt << 4; | 
|---|
| 393 | } | 
|---|
| 394 |  | 
|---|
| 395 | static u64 get_ibs_op_count(u64 config) | 
|---|
| 396 | { | 
|---|
| 397 | union ibs_op_ctl op_ctl = (union ibs_op_ctl)config; | 
|---|
| 398 | u64 count = 0; | 
|---|
| 399 |  | 
|---|
| 400 | /* | 
|---|
| 401 | * If the internal 27-bit counter rolled over, the count is MaxCnt | 
|---|
| 402 | * and the lower 7 bits of CurCnt are randomized. | 
|---|
| 403 | * Otherwise CurCnt has the full 27-bit current counter value. | 
|---|
| 404 | */ | 
|---|
| 405 | if (op_ctl.op_val) { | 
|---|
| 406 | count = op_ctl.opmaxcnt << 4; | 
|---|
| 407 | if (ibs_caps & IBS_CAPS_OPCNTEXT) | 
|---|
| 408 | count += op_ctl.opmaxcnt_ext << 20; | 
|---|
| 409 | } else if (ibs_caps & IBS_CAPS_RDWROPCNT) { | 
|---|
| 410 | count = op_ctl.opcurcnt; | 
|---|
| 411 | } | 
|---|
| 412 |  | 
|---|
| 413 | return count; | 
|---|
| 414 | } | 
|---|
| 415 |  | 
|---|
| 416 | static void | 
|---|
| 417 | perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event, | 
|---|
| 418 | u64 *config) | 
|---|
| 419 | { | 
|---|
| 420 | u64 count = perf_ibs->get_count(*config); | 
|---|
| 421 |  | 
|---|
| 422 | /* | 
|---|
| 423 | * Set width to 64 since we do not overflow on max width but | 
|---|
| 424 | * instead on max count. In perf_ibs_set_period() we clear | 
|---|
| 425 | * prev count manually on overflow. | 
|---|
| 426 | */ | 
|---|
| 427 | while (!perf_event_try_update(event, new_raw_count: count, width: 64)) { | 
|---|
| 428 | rdmsrq(event->hw.config_base, *config); | 
|---|
| 429 | count = perf_ibs->get_count(*config); | 
|---|
| 430 | } | 
|---|
| 431 | } | 
|---|
| 432 |  | 
|---|
| 433 | static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs, | 
|---|
| 434 | struct hw_perf_event *hwc, u64 config) | 
|---|
| 435 | { | 
|---|
| 436 | u64 tmp = hwc->config | config; | 
|---|
| 437 |  | 
|---|
| 438 | if (perf_ibs->fetch_count_reset_broken) | 
|---|
| 439 | wrmsrq(msr: hwc->config_base, val: tmp & ~perf_ibs->enable_mask); | 
|---|
| 440 |  | 
|---|
| 441 | wrmsrq(msr: hwc->config_base, val: tmp | perf_ibs->enable_mask); | 
|---|
| 442 | } | 
|---|
| 443 |  | 
|---|
| 444 | /* | 
|---|
| 445 | * Erratum #420 Instruction-Based Sampling Engine May Generate | 
|---|
| 446 | * Interrupt that Cannot Be Cleared: | 
|---|
| 447 | * | 
|---|
| 448 | * Must clear counter mask first, then clear the enable bit. See | 
|---|
| 449 | * Revision Guide for AMD Family 10h Processors, Publication #41322. | 
|---|
| 450 | */ | 
|---|
| 451 | static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs, | 
|---|
| 452 | struct hw_perf_event *hwc, u64 config) | 
|---|
| 453 | { | 
|---|
| 454 | config &= ~perf_ibs->cnt_mask; | 
|---|
| 455 | if (boot_cpu_data.x86 == 0x10) | 
|---|
| 456 | wrmsrq(msr: hwc->config_base, val: config); | 
|---|
| 457 | config &= ~perf_ibs->enable_mask; | 
|---|
| 458 | wrmsrq(msr: hwc->config_base, val: config); | 
|---|
| 459 | } | 
|---|
| 460 |  | 
|---|
| 461 | /* | 
|---|
| 462 | * We cannot restore the ibs pmu state, so we always needs to update | 
|---|
| 463 | * the event while stopping it and then reset the state when starting | 
|---|
| 464 | * again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in | 
|---|
| 465 | * perf_ibs_start()/perf_ibs_stop() and instead always do it. | 
|---|
| 466 | */ | 
|---|
| 467 | static void perf_ibs_start(struct perf_event *event, int flags) | 
|---|
| 468 | { | 
|---|
| 469 | struct hw_perf_event *hwc = &event->hw; | 
|---|
| 470 | struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); | 
|---|
| 471 | struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); | 
|---|
| 472 | u64 period, config = 0; | 
|---|
| 473 |  | 
|---|
| 474 | if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) | 
|---|
| 475 | return; | 
|---|
| 476 |  | 
|---|
| 477 | WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); | 
|---|
| 478 | hwc->state = 0; | 
|---|
| 479 |  | 
|---|
| 480 | if (event->attr.freq && hwc->sample_period < perf_ibs->min_period) | 
|---|
| 481 | hwc->sample_period = perf_ibs->min_period; | 
|---|
| 482 |  | 
|---|
| 483 | perf_ibs_set_period(perf_ibs, hwc, period: &period); | 
|---|
| 484 | if (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_OPCNTEXT)) { | 
|---|
| 485 | config |= period & IBS_OP_MAX_CNT_EXT_MASK; | 
|---|
| 486 | period &= ~IBS_OP_MAX_CNT_EXT_MASK; | 
|---|
| 487 | } | 
|---|
| 488 | config |= period >> 4; | 
|---|
| 489 |  | 
|---|
| 490 | /* | 
|---|
| 491 | * Set STARTED before enabling the hardware, such that a subsequent NMI | 
|---|
| 492 | * must observe it. | 
|---|
| 493 | */ | 
|---|
| 494 | set_bit(nr: IBS_STARTED,    addr: pcpu->state); | 
|---|
| 495 | clear_bit(nr: IBS_STOPPING, addr: pcpu->state); | 
|---|
| 496 | perf_ibs_enable_event(perf_ibs, hwc, config); | 
|---|
| 497 |  | 
|---|
| 498 | perf_event_update_userpage(event); | 
|---|
| 499 | } | 
|---|
| 500 |  | 
|---|
| 501 | static void perf_ibs_stop(struct perf_event *event, int flags) | 
|---|
| 502 | { | 
|---|
| 503 | struct hw_perf_event *hwc = &event->hw; | 
|---|
| 504 | struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); | 
|---|
| 505 | struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); | 
|---|
| 506 | u64 config; | 
|---|
| 507 | int stopping; | 
|---|
| 508 |  | 
|---|
| 509 | if (test_and_set_bit(nr: IBS_STOPPING, addr: pcpu->state)) | 
|---|
| 510 | return; | 
|---|
| 511 |  | 
|---|
| 512 | stopping = test_bit(IBS_STARTED, pcpu->state); | 
|---|
| 513 |  | 
|---|
| 514 | if (!stopping && (hwc->state & PERF_HES_UPTODATE)) | 
|---|
| 515 | return; | 
|---|
| 516 |  | 
|---|
| 517 | rdmsrq(hwc->config_base, config); | 
|---|
| 518 |  | 
|---|
| 519 | if (stopping) { | 
|---|
| 520 | /* | 
|---|
| 521 | * Set STOPPED before disabling the hardware, such that it | 
|---|
| 522 | * must be visible to NMIs the moment we clear the EN bit, | 
|---|
| 523 | * at which point we can generate an !VALID sample which | 
|---|
| 524 | * we need to consume. | 
|---|
| 525 | */ | 
|---|
| 526 | set_bit(nr: IBS_STOPPED, addr: pcpu->state); | 
|---|
| 527 | perf_ibs_disable_event(perf_ibs, hwc, config); | 
|---|
| 528 | /* | 
|---|
| 529 | * Clear STARTED after disabling the hardware; if it were | 
|---|
| 530 | * cleared before an NMI hitting after the clear but before | 
|---|
| 531 | * clearing the EN bit might think it a spurious NMI and not | 
|---|
| 532 | * handle it. | 
|---|
| 533 | * | 
|---|
| 534 | * Clearing it after, however, creates the problem of the NMI | 
|---|
| 535 | * handler seeing STARTED but not having a valid sample. | 
|---|
| 536 | */ | 
|---|
| 537 | clear_bit(nr: IBS_STARTED, addr: pcpu->state); | 
|---|
| 538 | WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); | 
|---|
| 539 | hwc->state |= PERF_HES_STOPPED; | 
|---|
| 540 | } | 
|---|
| 541 |  | 
|---|
| 542 | if (hwc->state & PERF_HES_UPTODATE) | 
|---|
| 543 | return; | 
|---|
| 544 |  | 
|---|
| 545 | /* | 
|---|
| 546 | * Clear valid bit to not count rollovers on update, rollovers | 
|---|
| 547 | * are only updated in the irq handler. | 
|---|
| 548 | */ | 
|---|
| 549 | config &= ~perf_ibs->valid_mask; | 
|---|
| 550 |  | 
|---|
| 551 | perf_ibs_event_update(perf_ibs, event, config: &config); | 
|---|
| 552 | hwc->state |= PERF_HES_UPTODATE; | 
|---|
| 553 | } | 
|---|
| 554 |  | 
|---|
| 555 | static int perf_ibs_add(struct perf_event *event, int flags) | 
|---|
| 556 | { | 
|---|
| 557 | struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); | 
|---|
| 558 | struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); | 
|---|
| 559 |  | 
|---|
| 560 | if (test_and_set_bit(nr: IBS_ENABLED, addr: pcpu->state)) | 
|---|
| 561 | return -ENOSPC; | 
|---|
| 562 |  | 
|---|
| 563 | event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; | 
|---|
| 564 |  | 
|---|
| 565 | pcpu->event = event; | 
|---|
| 566 |  | 
|---|
| 567 | if (flags & PERF_EF_START) | 
|---|
| 568 | perf_ibs_start(event, PERF_EF_RELOAD); | 
|---|
| 569 |  | 
|---|
| 570 | return 0; | 
|---|
| 571 | } | 
|---|
| 572 |  | 
|---|
| 573 | static void perf_ibs_del(struct perf_event *event, int flags) | 
|---|
| 574 | { | 
|---|
| 575 | struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); | 
|---|
| 576 | struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); | 
|---|
| 577 |  | 
|---|
| 578 | if (!test_and_clear_bit(nr: IBS_ENABLED, addr: pcpu->state)) | 
|---|
| 579 | return; | 
|---|
| 580 |  | 
|---|
| 581 | perf_ibs_stop(event, PERF_EF_UPDATE); | 
|---|
| 582 |  | 
|---|
| 583 | pcpu->event = NULL; | 
|---|
| 584 |  | 
|---|
| 585 | perf_event_update_userpage(event); | 
|---|
| 586 | } | 
|---|
| 587 |  | 
|---|
| 588 | static void perf_ibs_read(struct perf_event *event) { } | 
|---|
| 589 |  | 
|---|
| 590 | static int perf_ibs_check_period(struct perf_event *event, u64 value) | 
|---|
| 591 | { | 
|---|
| 592 | struct perf_ibs *perf_ibs; | 
|---|
| 593 | u64 low_nibble; | 
|---|
| 594 |  | 
|---|
| 595 | if (event->attr.freq) | 
|---|
| 596 | return 0; | 
|---|
| 597 |  | 
|---|
| 598 | perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); | 
|---|
| 599 | low_nibble = value & 0xFULL; | 
|---|
| 600 |  | 
|---|
| 601 | /* | 
|---|
| 602 | * This contradicts with perf_ibs_init() which allows sample period | 
|---|
| 603 | * with lower nibble bits set but silently masks them off. Whereas | 
|---|
| 604 | * this returns error. | 
|---|
| 605 | */ | 
|---|
| 606 | if (low_nibble || value < perf_ibs->min_period) | 
|---|
| 607 | return -EINVAL; | 
|---|
| 608 |  | 
|---|
| 609 | return 0; | 
|---|
| 610 | } | 
|---|
| 611 |  | 
|---|
| 612 | /* | 
|---|
| 613 | * We need to initialize with empty group if all attributes in the | 
|---|
| 614 | * group are dynamic. | 
|---|
| 615 | */ | 
|---|
| 616 | static struct attribute *attrs_empty[] = { | 
|---|
| 617 | NULL, | 
|---|
| 618 | }; | 
|---|
| 619 |  | 
|---|
| 620 | static struct attribute_group empty_caps_group = { | 
|---|
| 621 | .name = "caps", | 
|---|
| 622 | .attrs = attrs_empty, | 
|---|
| 623 | }; | 
|---|
| 624 |  | 
|---|
| 625 | PMU_FORMAT_ATTR(rand_en, "config:57"); | 
|---|
| 626 | PMU_FORMAT_ATTR(cnt_ctl, "config:19"); | 
|---|
| 627 | PMU_FORMAT_ATTR(swfilt, "config2:0"); | 
|---|
| 628 | PMU_EVENT_ATTR_STRING(l3missonly, fetch_l3missonly, "config:59"); | 
|---|
| 629 | PMU_EVENT_ATTR_STRING(l3missonly, op_l3missonly, "config:16"); | 
|---|
| 630 | PMU_EVENT_ATTR_STRING(ldlat, ibs_op_ldlat_format, "config1:0-11"); | 
|---|
| 631 | PMU_EVENT_ATTR_STRING(zen4_ibs_extensions, zen4_ibs_extensions, "1"); | 
|---|
| 632 | PMU_EVENT_ATTR_STRING(ldlat, ibs_op_ldlat_cap, "1"); | 
|---|
| 633 | PMU_EVENT_ATTR_STRING(dtlb_pgsize, ibs_op_dtlb_pgsize_cap, "1"); | 
|---|
| 634 |  | 
|---|
| 635 | static umode_t | 
|---|
| 636 | zen4_ibs_extensions_is_visible(struct kobject *kobj, struct attribute *attr, int i) | 
|---|
| 637 | { | 
|---|
| 638 | return ibs_caps & IBS_CAPS_ZEN4 ? attr->mode : 0; | 
|---|
| 639 | } | 
|---|
| 640 |  | 
|---|
| 641 | static umode_t | 
|---|
| 642 | ibs_op_ldlat_is_visible(struct kobject *kobj, struct attribute *attr, int i) | 
|---|
| 643 | { | 
|---|
| 644 | return ibs_caps & IBS_CAPS_OPLDLAT ? attr->mode : 0; | 
|---|
| 645 | } | 
|---|
| 646 |  | 
|---|
| 647 | static umode_t | 
|---|
| 648 | ibs_op_dtlb_pgsize_is_visible(struct kobject *kobj, struct attribute *attr, int i) | 
|---|
| 649 | { | 
|---|
| 650 | return ibs_caps & IBS_CAPS_OPDTLBPGSIZE ? attr->mode : 0; | 
|---|
| 651 | } | 
|---|
| 652 |  | 
|---|
| 653 | static struct attribute *fetch_attrs[] = { | 
|---|
| 654 | &format_attr_rand_en.attr, | 
|---|
| 655 | &format_attr_swfilt.attr, | 
|---|
| 656 | NULL, | 
|---|
| 657 | }; | 
|---|
| 658 |  | 
|---|
| 659 | static struct attribute *fetch_l3missonly_attrs[] = { | 
|---|
| 660 | &fetch_l3missonly.attr.attr, | 
|---|
| 661 | NULL, | 
|---|
| 662 | }; | 
|---|
| 663 |  | 
|---|
| 664 | static struct attribute *zen4_ibs_extensions_attrs[] = { | 
|---|
| 665 | &zen4_ibs_extensions.attr.attr, | 
|---|
| 666 | NULL, | 
|---|
| 667 | }; | 
|---|
| 668 |  | 
|---|
| 669 | static struct attribute *ibs_op_ldlat_cap_attrs[] = { | 
|---|
| 670 | &ibs_op_ldlat_cap.attr.attr, | 
|---|
| 671 | NULL, | 
|---|
| 672 | }; | 
|---|
| 673 |  | 
|---|
| 674 | static struct attribute *ibs_op_dtlb_pgsize_cap_attrs[] = { | 
|---|
| 675 | &ibs_op_dtlb_pgsize_cap.attr.attr, | 
|---|
| 676 | NULL, | 
|---|
| 677 | }; | 
|---|
| 678 |  | 
|---|
| 679 | static struct attribute_group group_fetch_formats = { | 
|---|
| 680 | .name = "format", | 
|---|
| 681 | .attrs = fetch_attrs, | 
|---|
| 682 | }; | 
|---|
| 683 |  | 
|---|
| 684 | static struct attribute_group group_fetch_l3missonly = { | 
|---|
| 685 | .name = "format", | 
|---|
| 686 | .attrs = fetch_l3missonly_attrs, | 
|---|
| 687 | .is_visible = zen4_ibs_extensions_is_visible, | 
|---|
| 688 | }; | 
|---|
| 689 |  | 
|---|
| 690 | static struct attribute_group group_zen4_ibs_extensions = { | 
|---|
| 691 | .name = "caps", | 
|---|
| 692 | .attrs = zen4_ibs_extensions_attrs, | 
|---|
| 693 | .is_visible = zen4_ibs_extensions_is_visible, | 
|---|
| 694 | }; | 
|---|
| 695 |  | 
|---|
| 696 | static struct attribute_group group_ibs_op_ldlat_cap = { | 
|---|
| 697 | .name = "caps", | 
|---|
| 698 | .attrs = ibs_op_ldlat_cap_attrs, | 
|---|
| 699 | .is_visible = ibs_op_ldlat_is_visible, | 
|---|
| 700 | }; | 
|---|
| 701 |  | 
|---|
| 702 | static struct attribute_group group_ibs_op_dtlb_pgsize_cap = { | 
|---|
| 703 | .name = "caps", | 
|---|
| 704 | .attrs = ibs_op_dtlb_pgsize_cap_attrs, | 
|---|
| 705 | .is_visible = ibs_op_dtlb_pgsize_is_visible, | 
|---|
| 706 | }; | 
|---|
| 707 |  | 
|---|
| 708 | static const struct attribute_group *fetch_attr_groups[] = { | 
|---|
| 709 | &group_fetch_formats, | 
|---|
| 710 | &empty_caps_group, | 
|---|
| 711 | NULL, | 
|---|
| 712 | }; | 
|---|
| 713 |  | 
|---|
| 714 | static const struct attribute_group *fetch_attr_update[] = { | 
|---|
| 715 | &group_fetch_l3missonly, | 
|---|
| 716 | &group_zen4_ibs_extensions, | 
|---|
| 717 | NULL, | 
|---|
| 718 | }; | 
|---|
| 719 |  | 
|---|
| 720 | static umode_t | 
|---|
| 721 | cnt_ctl_is_visible(struct kobject *kobj, struct attribute *attr, int i) | 
|---|
| 722 | { | 
|---|
| 723 | return ibs_caps & IBS_CAPS_OPCNT ? attr->mode : 0; | 
|---|
| 724 | } | 
|---|
| 725 |  | 
|---|
| 726 | static struct attribute *op_attrs[] = { | 
|---|
| 727 | &format_attr_swfilt.attr, | 
|---|
| 728 | NULL, | 
|---|
| 729 | }; | 
|---|
| 730 |  | 
|---|
| 731 | static struct attribute *cnt_ctl_attrs[] = { | 
|---|
| 732 | &format_attr_cnt_ctl.attr, | 
|---|
| 733 | NULL, | 
|---|
| 734 | }; | 
|---|
| 735 |  | 
|---|
| 736 | static struct attribute *op_l3missonly_attrs[] = { | 
|---|
| 737 | &op_l3missonly.attr.attr, | 
|---|
| 738 | NULL, | 
|---|
| 739 | }; | 
|---|
| 740 |  | 
|---|
| 741 | static struct attribute_group group_op_formats = { | 
|---|
| 742 | .name = "format", | 
|---|
| 743 | .attrs = op_attrs, | 
|---|
| 744 | }; | 
|---|
| 745 |  | 
|---|
| 746 | static struct attribute *ibs_op_ldlat_format_attrs[] = { | 
|---|
| 747 | &ibs_op_ldlat_format.attr.attr, | 
|---|
| 748 | NULL, | 
|---|
| 749 | }; | 
|---|
| 750 |  | 
|---|
| 751 | static struct attribute_group group_cnt_ctl = { | 
|---|
| 752 | .name = "format", | 
|---|
| 753 | .attrs = cnt_ctl_attrs, | 
|---|
| 754 | .is_visible = cnt_ctl_is_visible, | 
|---|
| 755 | }; | 
|---|
| 756 |  | 
|---|
| 757 | static struct attribute_group group_op_l3missonly = { | 
|---|
| 758 | .name = "format", | 
|---|
| 759 | .attrs = op_l3missonly_attrs, | 
|---|
| 760 | .is_visible = zen4_ibs_extensions_is_visible, | 
|---|
| 761 | }; | 
|---|
| 762 |  | 
|---|
| 763 | static const struct attribute_group *op_attr_groups[] = { | 
|---|
| 764 | &group_op_formats, | 
|---|
| 765 | &empty_caps_group, | 
|---|
| 766 | NULL, | 
|---|
| 767 | }; | 
|---|
| 768 |  | 
|---|
| 769 | static struct attribute_group group_ibs_op_ldlat_format = { | 
|---|
| 770 | .name = "format", | 
|---|
| 771 | .attrs = ibs_op_ldlat_format_attrs, | 
|---|
| 772 | .is_visible = ibs_op_ldlat_is_visible, | 
|---|
| 773 | }; | 
|---|
| 774 |  | 
|---|
| 775 | static const struct attribute_group *op_attr_update[] = { | 
|---|
| 776 | &group_cnt_ctl, | 
|---|
| 777 | &group_op_l3missonly, | 
|---|
| 778 | &group_zen4_ibs_extensions, | 
|---|
| 779 | &group_ibs_op_ldlat_cap, | 
|---|
| 780 | &group_ibs_op_ldlat_format, | 
|---|
| 781 | &group_ibs_op_dtlb_pgsize_cap, | 
|---|
| 782 | NULL, | 
|---|
| 783 | }; | 
|---|
| 784 |  | 
|---|
| 785 | static struct perf_ibs perf_ibs_fetch = { | 
|---|
| 786 | .pmu = { | 
|---|
| 787 | .task_ctx_nr	= perf_hw_context, | 
|---|
| 788 |  | 
|---|
| 789 | .event_init	= perf_ibs_init, | 
|---|
| 790 | .add		= perf_ibs_add, | 
|---|
| 791 | .del		= perf_ibs_del, | 
|---|
| 792 | .start		= perf_ibs_start, | 
|---|
| 793 | .stop		= perf_ibs_stop, | 
|---|
| 794 | .read		= perf_ibs_read, | 
|---|
| 795 | .check_period	= perf_ibs_check_period, | 
|---|
| 796 | }, | 
|---|
| 797 | .msr			= MSR_AMD64_IBSFETCHCTL, | 
|---|
| 798 | .config_mask		= IBS_FETCH_MAX_CNT | IBS_FETCH_RAND_EN, | 
|---|
| 799 | .cnt_mask		= IBS_FETCH_MAX_CNT, | 
|---|
| 800 | .enable_mask		= IBS_FETCH_ENABLE, | 
|---|
| 801 | .valid_mask		= IBS_FETCH_VAL, | 
|---|
| 802 | .min_period		= 0x10, | 
|---|
| 803 | .max_period		= IBS_FETCH_MAX_CNT << 4, | 
|---|
| 804 | .offset_mask		= { MSR_AMD64_IBSFETCH_REG_MASK }, | 
|---|
| 805 | .offset_max		= MSR_AMD64_IBSFETCH_REG_COUNT, | 
|---|
| 806 |  | 
|---|
| 807 | .get_count		= get_ibs_fetch_count, | 
|---|
| 808 | }; | 
|---|
| 809 |  | 
|---|
| 810 | static struct perf_ibs perf_ibs_op = { | 
|---|
| 811 | .pmu = { | 
|---|
| 812 | .task_ctx_nr	= perf_hw_context, | 
|---|
| 813 |  | 
|---|
| 814 | .event_init	= perf_ibs_init, | 
|---|
| 815 | .add		= perf_ibs_add, | 
|---|
| 816 | .del		= perf_ibs_del, | 
|---|
| 817 | .start		= perf_ibs_start, | 
|---|
| 818 | .stop		= perf_ibs_stop, | 
|---|
| 819 | .read		= perf_ibs_read, | 
|---|
| 820 | .check_period	= perf_ibs_check_period, | 
|---|
| 821 | }, | 
|---|
| 822 | .msr			= MSR_AMD64_IBSOPCTL, | 
|---|
| 823 | .config_mask		= IBS_OP_MAX_CNT, | 
|---|
| 824 | .cnt_mask		= IBS_OP_MAX_CNT | IBS_OP_CUR_CNT | | 
|---|
| 825 | IBS_OP_CUR_CNT_RAND, | 
|---|
| 826 | .enable_mask		= IBS_OP_ENABLE, | 
|---|
| 827 | .valid_mask		= IBS_OP_VAL, | 
|---|
| 828 | .min_period		= 0x90, | 
|---|
| 829 | .max_period		= IBS_OP_MAX_CNT << 4, | 
|---|
| 830 | .offset_mask		= { MSR_AMD64_IBSOP_REG_MASK }, | 
|---|
| 831 | .offset_max		= MSR_AMD64_IBSOP_REG_COUNT, | 
|---|
| 832 |  | 
|---|
| 833 | .get_count		= get_ibs_op_count, | 
|---|
| 834 | }; | 
|---|
| 835 |  | 
|---|
| 836 | static void perf_ibs_get_mem_op(union ibs_op_data3 *op_data3, | 
|---|
| 837 | struct perf_sample_data *data) | 
|---|
| 838 | { | 
|---|
| 839 | union perf_mem_data_src *data_src = &data->data_src; | 
|---|
| 840 |  | 
|---|
| 841 | data_src->mem_op = PERF_MEM_OP_NA; | 
|---|
| 842 |  | 
|---|
| 843 | if (op_data3->ld_op) | 
|---|
| 844 | data_src->mem_op = PERF_MEM_OP_LOAD; | 
|---|
| 845 | else if (op_data3->st_op) | 
|---|
| 846 | data_src->mem_op = PERF_MEM_OP_STORE; | 
|---|
| 847 | } | 
|---|
| 848 |  | 
|---|
| 849 | /* | 
|---|
| 850 | * Processors having CPUID_Fn8000001B_EAX[11] aka IBS_CAPS_ZEN4 has | 
|---|
| 851 | * more fine granular DataSrc encodings. Others have coarse. | 
|---|
| 852 | */ | 
|---|
| 853 | static u8 perf_ibs_data_src(union ibs_op_data2 *op_data2) | 
|---|
| 854 | { | 
|---|
| 855 | if (ibs_caps & IBS_CAPS_ZEN4) | 
|---|
| 856 | return (op_data2->data_src_hi << 3) | op_data2->data_src_lo; | 
|---|
| 857 |  | 
|---|
| 858 | return op_data2->data_src_lo; | 
|---|
| 859 | } | 
|---|
| 860 |  | 
|---|
| 861 | #define	L(x)		(PERF_MEM_S(LVL, x) | PERF_MEM_S(LVL, HIT)) | 
|---|
| 862 | #define	LN(x)		PERF_MEM_S(LVLNUM, x) | 
|---|
| 863 | #define	REM		PERF_MEM_S(REMOTE, REMOTE) | 
|---|
| 864 | #define	HOPS(x)		PERF_MEM_S(HOPS, x) | 
|---|
| 865 |  | 
|---|
| 866 | static u64 g_data_src[8] = { | 
|---|
| 867 | [IBS_DATA_SRC_LOC_CACHE]	  = L(L3) | L(REM_CCE1) | LN(ANY_CACHE) | HOPS(0), | 
|---|
| 868 | [IBS_DATA_SRC_DRAM]		  = L(LOC_RAM) | LN(RAM), | 
|---|
| 869 | [IBS_DATA_SRC_REM_CACHE]	  = L(REM_CCE2) | LN(ANY_CACHE) | REM | HOPS(1), | 
|---|
| 870 | [IBS_DATA_SRC_IO]		  = L(IO) | LN(IO), | 
|---|
| 871 | }; | 
|---|
| 872 |  | 
|---|
| 873 | #define RMT_NODE_BITS			(1 << IBS_DATA_SRC_DRAM) | 
|---|
| 874 | #define RMT_NODE_APPLICABLE(x)		(RMT_NODE_BITS & (1 << x)) | 
|---|
| 875 |  | 
|---|
| 876 | static u64 g_zen4_data_src[32] = { | 
|---|
| 877 | [IBS_DATA_SRC_EXT_LOC_CACHE]	  = L(L3) | LN(L3), | 
|---|
| 878 | [IBS_DATA_SRC_EXT_NEAR_CCX_CACHE] = L(REM_CCE1) | LN(ANY_CACHE) | REM | HOPS(0), | 
|---|
| 879 | [IBS_DATA_SRC_EXT_DRAM]		  = L(LOC_RAM) | LN(RAM), | 
|---|
| 880 | [IBS_DATA_SRC_EXT_FAR_CCX_CACHE]  = L(REM_CCE2) | LN(ANY_CACHE) | REM | HOPS(1), | 
|---|
| 881 | [IBS_DATA_SRC_EXT_PMEM]		  = LN(PMEM), | 
|---|
| 882 | [IBS_DATA_SRC_EXT_IO]		  = L(IO) | LN(IO), | 
|---|
| 883 | [IBS_DATA_SRC_EXT_EXT_MEM]	  = LN(CXL), | 
|---|
| 884 | }; | 
|---|
| 885 |  | 
|---|
| 886 | #define ZEN4_RMT_NODE_BITS		((1 << IBS_DATA_SRC_EXT_DRAM) | \ | 
|---|
| 887 | (1 << IBS_DATA_SRC_EXT_PMEM) | \ | 
|---|
| 888 | (1 << IBS_DATA_SRC_EXT_EXT_MEM)) | 
|---|
| 889 | #define ZEN4_RMT_NODE_APPLICABLE(x)	(ZEN4_RMT_NODE_BITS & (1 << x)) | 
|---|
| 890 |  | 
|---|
| 891 | static __u64 perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2, | 
|---|
| 892 | union ibs_op_data3 *op_data3, | 
|---|
| 893 | struct perf_sample_data *data) | 
|---|
| 894 | { | 
|---|
| 895 | union perf_mem_data_src *data_src = &data->data_src; | 
|---|
| 896 | u8 ibs_data_src = perf_ibs_data_src(op_data2); | 
|---|
| 897 |  | 
|---|
| 898 | data_src->mem_lvl = 0; | 
|---|
| 899 | data_src->mem_lvl_num = 0; | 
|---|
| 900 |  | 
|---|
| 901 | /* | 
|---|
| 902 | * DcMiss, L2Miss, DataSrc, DcMissLat etc. are all invalid for Uncached | 
|---|
| 903 | * memory accesses. So, check DcUcMemAcc bit early. | 
|---|
| 904 | */ | 
|---|
| 905 | if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO) | 
|---|
| 906 | return L(UNC) | LN(UNC); | 
|---|
| 907 |  | 
|---|
| 908 | /* L1 Hit */ | 
|---|
| 909 | if (op_data3->dc_miss == 0) | 
|---|
| 910 | return L(L1) | LN(L1); | 
|---|
| 911 |  | 
|---|
| 912 | /* L2 Hit */ | 
|---|
| 913 | if (op_data3->l2_miss == 0) { | 
|---|
| 914 | /* Erratum #1293 */ | 
|---|
| 915 | if (boot_cpu_data.x86 != 0x19 || boot_cpu_data.x86_model > 0xF || | 
|---|
| 916 | !(op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc)) | 
|---|
| 917 | return L(L2) | LN(L2); | 
|---|
| 918 | } | 
|---|
| 919 |  | 
|---|
| 920 | /* | 
|---|
| 921 | * OP_DATA2 is valid only for load ops. Skip all checks which | 
|---|
| 922 | * uses OP_DATA2[DataSrc]. | 
|---|
| 923 | */ | 
|---|
| 924 | if (data_src->mem_op != PERF_MEM_OP_LOAD) | 
|---|
| 925 | goto check_mab; | 
|---|
| 926 |  | 
|---|
| 927 | if (ibs_caps & IBS_CAPS_ZEN4) { | 
|---|
| 928 | u64 val = g_zen4_data_src[ibs_data_src]; | 
|---|
| 929 |  | 
|---|
| 930 | if (!val) | 
|---|
| 931 | goto check_mab; | 
|---|
| 932 |  | 
|---|
| 933 | /* HOPS_1 because IBS doesn't provide remote socket detail */ | 
|---|
| 934 | if (op_data2->rmt_node && ZEN4_RMT_NODE_APPLICABLE(ibs_data_src)) { | 
|---|
| 935 | if (ibs_data_src == IBS_DATA_SRC_EXT_DRAM) | 
|---|
| 936 | val = L(REM_RAM1) | LN(RAM) | REM | HOPS(1); | 
|---|
| 937 | else | 
|---|
| 938 | val |= REM | HOPS(1); | 
|---|
| 939 | } | 
|---|
| 940 |  | 
|---|
| 941 | return val; | 
|---|
| 942 | } else { | 
|---|
| 943 | u64 val = g_data_src[ibs_data_src]; | 
|---|
| 944 |  | 
|---|
| 945 | if (!val) | 
|---|
| 946 | goto check_mab; | 
|---|
| 947 |  | 
|---|
| 948 | /* HOPS_1 because IBS doesn't provide remote socket detail */ | 
|---|
| 949 | if (op_data2->rmt_node && RMT_NODE_APPLICABLE(ibs_data_src)) { | 
|---|
| 950 | if (ibs_data_src == IBS_DATA_SRC_DRAM) | 
|---|
| 951 | val = L(REM_RAM1) | LN(RAM) | REM | HOPS(1); | 
|---|
| 952 | else | 
|---|
| 953 | val |= REM | HOPS(1); | 
|---|
| 954 | } | 
|---|
| 955 |  | 
|---|
| 956 | return val; | 
|---|
| 957 | } | 
|---|
| 958 |  | 
|---|
| 959 | check_mab: | 
|---|
| 960 | /* | 
|---|
| 961 | * MAB (Miss Address Buffer) Hit. MAB keeps track of outstanding | 
|---|
| 962 | * DC misses. However, such data may come from any level in mem | 
|---|
| 963 | * hierarchy. IBS provides detail about both MAB as well as actual | 
|---|
| 964 | * DataSrc simultaneously. Prioritize DataSrc over MAB, i.e. set | 
|---|
| 965 | * MAB only when IBS fails to provide DataSrc. | 
|---|
| 966 | */ | 
|---|
| 967 | if (op_data3->dc_miss_no_mab_alloc) | 
|---|
| 968 | return L(LFB) | LN(LFB); | 
|---|
| 969 |  | 
|---|
| 970 | /* Don't set HIT with NA */ | 
|---|
| 971 | return PERF_MEM_S(LVL, NA) | LN(NA); | 
|---|
| 972 | } | 
|---|
| 973 |  | 
|---|
| 974 | static bool perf_ibs_cache_hit_st_valid(void) | 
|---|
| 975 | { | 
|---|
| 976 | /* 0: Uninitialized, 1: Valid, -1: Invalid */ | 
|---|
| 977 | static int cache_hit_st_valid; | 
|---|
| 978 |  | 
|---|
| 979 | if (unlikely(!cache_hit_st_valid)) { | 
|---|
| 980 | if (boot_cpu_data.x86 == 0x19 && | 
|---|
| 981 | (boot_cpu_data.x86_model <= 0xF || | 
|---|
| 982 | (boot_cpu_data.x86_model >= 0x20 && | 
|---|
| 983 | boot_cpu_data.x86_model <= 0x5F))) { | 
|---|
| 984 | cache_hit_st_valid = -1; | 
|---|
| 985 | } else { | 
|---|
| 986 | cache_hit_st_valid = 1; | 
|---|
| 987 | } | 
|---|
| 988 | } | 
|---|
| 989 |  | 
|---|
| 990 | return cache_hit_st_valid == 1; | 
|---|
| 991 | } | 
|---|
| 992 |  | 
|---|
| 993 | static void perf_ibs_get_mem_snoop(union ibs_op_data2 *op_data2, | 
|---|
| 994 | struct perf_sample_data *data) | 
|---|
| 995 | { | 
|---|
| 996 | union perf_mem_data_src *data_src = &data->data_src; | 
|---|
| 997 | u8 ibs_data_src; | 
|---|
| 998 |  | 
|---|
| 999 | data_src->mem_snoop = PERF_MEM_SNOOP_NA; | 
|---|
| 1000 |  | 
|---|
| 1001 | if (!perf_ibs_cache_hit_st_valid() || | 
|---|
| 1002 | data_src->mem_op != PERF_MEM_OP_LOAD || | 
|---|
| 1003 | data_src->mem_lvl & PERF_MEM_LVL_L1 || | 
|---|
| 1004 | data_src->mem_lvl & PERF_MEM_LVL_L2 || | 
|---|
| 1005 | op_data2->cache_hit_st) | 
|---|
| 1006 | return; | 
|---|
| 1007 |  | 
|---|
| 1008 | ibs_data_src = perf_ibs_data_src(op_data2); | 
|---|
| 1009 |  | 
|---|
| 1010 | if (ibs_caps & IBS_CAPS_ZEN4) { | 
|---|
| 1011 | if (ibs_data_src == IBS_DATA_SRC_EXT_LOC_CACHE || | 
|---|
| 1012 | ibs_data_src == IBS_DATA_SRC_EXT_NEAR_CCX_CACHE || | 
|---|
| 1013 | ibs_data_src == IBS_DATA_SRC_EXT_FAR_CCX_CACHE) | 
|---|
| 1014 | data_src->mem_snoop = PERF_MEM_SNOOP_HITM; | 
|---|
| 1015 | } else if (ibs_data_src == IBS_DATA_SRC_LOC_CACHE) { | 
|---|
| 1016 | data_src->mem_snoop = PERF_MEM_SNOOP_HITM; | 
|---|
| 1017 | } | 
|---|
| 1018 | } | 
|---|
| 1019 |  | 
|---|
| 1020 | static void perf_ibs_get_tlb_lvl(union ibs_op_data3 *op_data3, | 
|---|
| 1021 | struct perf_sample_data *data) | 
|---|
| 1022 | { | 
|---|
| 1023 | union perf_mem_data_src *data_src = &data->data_src; | 
|---|
| 1024 |  | 
|---|
| 1025 | data_src->mem_dtlb = PERF_MEM_TLB_NA; | 
|---|
| 1026 |  | 
|---|
| 1027 | if (!op_data3->dc_lin_addr_valid) | 
|---|
| 1028 | return; | 
|---|
| 1029 |  | 
|---|
| 1030 | if ((ibs_caps & IBS_CAPS_OPDTLBPGSIZE) && | 
|---|
| 1031 | !op_data3->dc_phy_addr_valid) | 
|---|
| 1032 | return; | 
|---|
| 1033 |  | 
|---|
| 1034 | if (!op_data3->dc_l1tlb_miss) { | 
|---|
| 1035 | data_src->mem_dtlb = PERF_MEM_TLB_L1 | PERF_MEM_TLB_HIT; | 
|---|
| 1036 | return; | 
|---|
| 1037 | } | 
|---|
| 1038 |  | 
|---|
| 1039 | if (!op_data3->dc_l2tlb_miss) { | 
|---|
| 1040 | data_src->mem_dtlb = PERF_MEM_TLB_L2 | PERF_MEM_TLB_HIT; | 
|---|
| 1041 | return; | 
|---|
| 1042 | } | 
|---|
| 1043 |  | 
|---|
| 1044 | data_src->mem_dtlb = PERF_MEM_TLB_L2 | PERF_MEM_TLB_MISS; | 
|---|
| 1045 | } | 
|---|
| 1046 |  | 
|---|
| 1047 | static void perf_ibs_get_mem_lock(union ibs_op_data3 *op_data3, | 
|---|
| 1048 | struct perf_sample_data *data) | 
|---|
| 1049 | { | 
|---|
| 1050 | union perf_mem_data_src *data_src = &data->data_src; | 
|---|
| 1051 |  | 
|---|
| 1052 | data_src->mem_lock = PERF_MEM_LOCK_NA; | 
|---|
| 1053 |  | 
|---|
| 1054 | if (op_data3->dc_locked_op) | 
|---|
| 1055 | data_src->mem_lock = PERF_MEM_LOCK_LOCKED; | 
|---|
| 1056 | } | 
|---|
| 1057 |  | 
|---|
| 1058 | /* Be careful. Works only for contiguous MSRs. */ | 
|---|
| 1059 | #define ibs_fetch_msr_idx(msr)	(msr - MSR_AMD64_IBSFETCHCTL) | 
|---|
| 1060 | #define ibs_op_msr_idx(msr)	(msr - MSR_AMD64_IBSOPCTL) | 
|---|
| 1061 |  | 
|---|
| 1062 | static void perf_ibs_get_data_src(struct perf_ibs_data *ibs_data, | 
|---|
| 1063 | struct perf_sample_data *data, | 
|---|
| 1064 | union ibs_op_data2 *op_data2, | 
|---|
| 1065 | union ibs_op_data3 *op_data3) | 
|---|
| 1066 | { | 
|---|
| 1067 | union perf_mem_data_src *data_src = &data->data_src; | 
|---|
| 1068 |  | 
|---|
| 1069 | data_src->val |= perf_ibs_get_mem_lvl(op_data2, op_data3, data); | 
|---|
| 1070 | perf_ibs_get_mem_snoop(op_data2, data); | 
|---|
| 1071 | perf_ibs_get_tlb_lvl(op_data3, data); | 
|---|
| 1072 | perf_ibs_get_mem_lock(op_data3, data); | 
|---|
| 1073 | } | 
|---|
| 1074 |  | 
|---|
| 1075 | static __u64 perf_ibs_get_op_data2(struct perf_ibs_data *ibs_data, | 
|---|
| 1076 | union ibs_op_data3 *op_data3) | 
|---|
| 1077 | { | 
|---|
| 1078 | __u64 val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA2)]; | 
|---|
| 1079 |  | 
|---|
| 1080 | /* Erratum #1293 */ | 
|---|
| 1081 | if (boot_cpu_data.x86 == 0x19 && boot_cpu_data.x86_model <= 0xF && | 
|---|
| 1082 | (op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc)) { | 
|---|
| 1083 | /* | 
|---|
| 1084 | * OP_DATA2 has only two fields on Zen3: DataSrc and RmtNode. | 
|---|
| 1085 | * DataSrc=0 is 'No valid status' and RmtNode is invalid when | 
|---|
| 1086 | * DataSrc=0. | 
|---|
| 1087 | */ | 
|---|
| 1088 | val = 0; | 
|---|
| 1089 | } | 
|---|
| 1090 | return val; | 
|---|
| 1091 | } | 
|---|
| 1092 |  | 
|---|
| 1093 | static void perf_ibs_parse_ld_st_data(__u64 sample_type, | 
|---|
| 1094 | struct perf_ibs_data *ibs_data, | 
|---|
| 1095 | struct perf_sample_data *data) | 
|---|
| 1096 | { | 
|---|
| 1097 | union ibs_op_data3 op_data3; | 
|---|
| 1098 | union ibs_op_data2 op_data2; | 
|---|
| 1099 | union ibs_op_data op_data; | 
|---|
| 1100 |  | 
|---|
| 1101 | data->data_src.val = PERF_MEM_NA; | 
|---|
| 1102 | op_data3.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA3)]; | 
|---|
| 1103 |  | 
|---|
| 1104 | perf_ibs_get_mem_op(op_data3: &op_data3, data); | 
|---|
| 1105 | if (data->data_src.mem_op != PERF_MEM_OP_LOAD && | 
|---|
| 1106 | data->data_src.mem_op != PERF_MEM_OP_STORE) | 
|---|
| 1107 | return; | 
|---|
| 1108 |  | 
|---|
| 1109 | op_data2.val = perf_ibs_get_op_data2(ibs_data, op_data3: &op_data3); | 
|---|
| 1110 |  | 
|---|
| 1111 | if (sample_type & PERF_SAMPLE_DATA_SRC) { | 
|---|
| 1112 | perf_ibs_get_data_src(ibs_data, data, op_data2: &op_data2, op_data3: &op_data3); | 
|---|
| 1113 | data->sample_flags |= PERF_SAMPLE_DATA_SRC; | 
|---|
| 1114 | } | 
|---|
| 1115 |  | 
|---|
| 1116 | if (sample_type & PERF_SAMPLE_WEIGHT_TYPE && op_data3.dc_miss && | 
|---|
| 1117 | data->data_src.mem_op == PERF_MEM_OP_LOAD) { | 
|---|
| 1118 | op_data.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA)]; | 
|---|
| 1119 |  | 
|---|
| 1120 | if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) { | 
|---|
| 1121 | data->weight.var1_dw = op_data3.dc_miss_lat; | 
|---|
| 1122 | data->weight.var2_w = op_data.tag_to_ret_ctr; | 
|---|
| 1123 | } else if (sample_type & PERF_SAMPLE_WEIGHT) { | 
|---|
| 1124 | data->weight.full = op_data3.dc_miss_lat; | 
|---|
| 1125 | } | 
|---|
| 1126 | data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE; | 
|---|
| 1127 | } | 
|---|
| 1128 |  | 
|---|
| 1129 | if (sample_type & PERF_SAMPLE_ADDR && op_data3.dc_lin_addr_valid) { | 
|---|
| 1130 | data->addr = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCLINAD)]; | 
|---|
| 1131 | data->sample_flags |= PERF_SAMPLE_ADDR; | 
|---|
| 1132 | } | 
|---|
| 1133 |  | 
|---|
| 1134 | if (sample_type & PERF_SAMPLE_PHYS_ADDR && op_data3.dc_phy_addr_valid) { | 
|---|
| 1135 | data->phys_addr = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCPHYSAD)]; | 
|---|
| 1136 | data->sample_flags |= PERF_SAMPLE_PHYS_ADDR; | 
|---|
| 1137 | } | 
|---|
| 1138 | } | 
|---|
| 1139 |  | 
|---|
| 1140 | static bool perf_ibs_is_mem_sample_type(struct perf_ibs *perf_ibs, | 
|---|
| 1141 | struct perf_event *event) | 
|---|
| 1142 | { | 
|---|
| 1143 | u64 sample_type = event->attr.sample_type; | 
|---|
| 1144 |  | 
|---|
| 1145 | return perf_ibs == &perf_ibs_op && | 
|---|
| 1146 | sample_type & (PERF_SAMPLE_DATA_SRC | | 
|---|
| 1147 | PERF_SAMPLE_WEIGHT_TYPE | | 
|---|
| 1148 | PERF_SAMPLE_ADDR | | 
|---|
| 1149 | PERF_SAMPLE_PHYS_ADDR); | 
|---|
| 1150 | } | 
|---|
| 1151 |  | 
|---|
| 1152 | static int perf_ibs_get_offset_max(struct perf_ibs *perf_ibs, | 
|---|
| 1153 | struct perf_event *event, | 
|---|
| 1154 | int check_rip) | 
|---|
| 1155 | { | 
|---|
| 1156 | if (event->attr.sample_type & PERF_SAMPLE_RAW || | 
|---|
| 1157 | perf_ibs_is_mem_sample_type(perf_ibs, event) || | 
|---|
| 1158 | perf_ibs_ldlat_event(perf_ibs, event)) | 
|---|
| 1159 | return perf_ibs->offset_max; | 
|---|
| 1160 | else if (check_rip) | 
|---|
| 1161 | return 3; | 
|---|
| 1162 | return 1; | 
|---|
| 1163 | } | 
|---|
| 1164 |  | 
|---|
| 1165 | static bool perf_ibs_is_kernel_data_addr(struct perf_event *event, | 
|---|
| 1166 | struct perf_ibs_data *ibs_data) | 
|---|
| 1167 | { | 
|---|
| 1168 | u64 sample_type_mask = PERF_SAMPLE_ADDR | PERF_SAMPLE_RAW; | 
|---|
| 1169 | union ibs_op_data3 op_data3; | 
|---|
| 1170 | u64 dc_lin_addr; | 
|---|
| 1171 |  | 
|---|
| 1172 | op_data3.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA3)]; | 
|---|
| 1173 | dc_lin_addr = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCLINAD)]; | 
|---|
| 1174 |  | 
|---|
| 1175 | return unlikely((event->attr.sample_type & sample_type_mask) && | 
|---|
| 1176 | op_data3.dc_lin_addr_valid && kernel_ip(dc_lin_addr)); | 
|---|
| 1177 | } | 
|---|
| 1178 |  | 
|---|
| 1179 | static bool perf_ibs_is_kernel_br_target(struct perf_event *event, | 
|---|
| 1180 | struct perf_ibs_data *ibs_data, | 
|---|
| 1181 | int br_target_idx) | 
|---|
| 1182 | { | 
|---|
| 1183 | union ibs_op_data op_data; | 
|---|
| 1184 | u64 br_target; | 
|---|
| 1185 |  | 
|---|
| 1186 | op_data.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA)]; | 
|---|
| 1187 | br_target = ibs_data->regs[br_target_idx]; | 
|---|
| 1188 |  | 
|---|
| 1189 | return unlikely((event->attr.sample_type & PERF_SAMPLE_RAW) && | 
|---|
| 1190 | op_data.op_brn_ret && kernel_ip(br_target)); | 
|---|
| 1191 | } | 
|---|
| 1192 |  | 
|---|
| 1193 | static bool perf_ibs_swfilt_discard(struct perf_ibs *perf_ibs, struct perf_event *event, | 
|---|
| 1194 | struct pt_regs *regs, struct perf_ibs_data *ibs_data, | 
|---|
| 1195 | int br_target_idx) | 
|---|
| 1196 | { | 
|---|
| 1197 | if (perf_exclude_event(event, regs)) | 
|---|
| 1198 | return true; | 
|---|
| 1199 |  | 
|---|
| 1200 | if (perf_ibs != &perf_ibs_op || !event->attr.exclude_kernel) | 
|---|
| 1201 | return false; | 
|---|
| 1202 |  | 
|---|
| 1203 | if (perf_ibs_is_kernel_data_addr(event, ibs_data)) | 
|---|
| 1204 | return true; | 
|---|
| 1205 |  | 
|---|
| 1206 | if (br_target_idx != -1 && | 
|---|
| 1207 | perf_ibs_is_kernel_br_target(event, ibs_data, br_target_idx)) | 
|---|
| 1208 | return true; | 
|---|
| 1209 |  | 
|---|
| 1210 | return false; | 
|---|
| 1211 | } | 
|---|
| 1212 |  | 
|---|
| 1213 | static void perf_ibs_phyaddr_clear(struct perf_ibs *perf_ibs, | 
|---|
| 1214 | struct perf_ibs_data *ibs_data) | 
|---|
| 1215 | { | 
|---|
| 1216 | if (perf_ibs == &perf_ibs_op) { | 
|---|
| 1217 | ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA3)] &= ~(1ULL << 18); | 
|---|
| 1218 | ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCPHYSAD)] = 0; | 
|---|
| 1219 | return; | 
|---|
| 1220 | } | 
|---|
| 1221 |  | 
|---|
| 1222 | ibs_data->regs[ibs_fetch_msr_idx(MSR_AMD64_IBSFETCHCTL)] &= ~(1ULL << 52); | 
|---|
| 1223 | ibs_data->regs[ibs_fetch_msr_idx(MSR_AMD64_IBSFETCHPHYSAD)] = 0; | 
|---|
| 1224 | } | 
|---|
| 1225 |  | 
|---|
| 1226 | static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) | 
|---|
| 1227 | { | 
|---|
| 1228 | struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); | 
|---|
| 1229 | struct perf_event *event = pcpu->event; | 
|---|
| 1230 | struct hw_perf_event *hwc; | 
|---|
| 1231 | struct perf_sample_data data; | 
|---|
| 1232 | struct perf_raw_record raw; | 
|---|
| 1233 | struct pt_regs regs; | 
|---|
| 1234 | struct perf_ibs_data ibs_data; | 
|---|
| 1235 | int offset, size, check_rip, offset_max, throttle = 0; | 
|---|
| 1236 | unsigned int msr; | 
|---|
| 1237 | u64 *buf, *config, period, new_config = 0; | 
|---|
| 1238 | int br_target_idx = -1; | 
|---|
| 1239 |  | 
|---|
| 1240 | if (!test_bit(IBS_STARTED, pcpu->state)) { | 
|---|
| 1241 | fail: | 
|---|
| 1242 | /* | 
|---|
| 1243 | * Catch spurious interrupts after stopping IBS: After | 
|---|
| 1244 | * disabling IBS there could be still incoming NMIs | 
|---|
| 1245 | * with samples that even have the valid bit cleared. | 
|---|
| 1246 | * Mark all this NMIs as handled. | 
|---|
| 1247 | */ | 
|---|
| 1248 | if (test_and_clear_bit(nr: IBS_STOPPED, addr: pcpu->state)) | 
|---|
| 1249 | return 1; | 
|---|
| 1250 |  | 
|---|
| 1251 | return 0; | 
|---|
| 1252 | } | 
|---|
| 1253 |  | 
|---|
| 1254 | if (WARN_ON_ONCE(!event)) | 
|---|
| 1255 | goto fail; | 
|---|
| 1256 |  | 
|---|
| 1257 | hwc = &event->hw; | 
|---|
| 1258 | msr = hwc->config_base; | 
|---|
| 1259 | buf = ibs_data.regs; | 
|---|
| 1260 | rdmsrq(msr, *buf); | 
|---|
| 1261 | if (!(*buf++ & perf_ibs->valid_mask)) | 
|---|
| 1262 | goto fail; | 
|---|
| 1263 |  | 
|---|
| 1264 | config = &ibs_data.regs[0]; | 
|---|
| 1265 | perf_ibs_event_update(perf_ibs, event, config); | 
|---|
| 1266 | perf_sample_data_init(data: &data, addr: 0, period: hwc->last_period); | 
|---|
| 1267 | if (!perf_ibs_set_period(perf_ibs, hwc, period: &period)) | 
|---|
| 1268 | goto out;	/* no sw counter overflow */ | 
|---|
| 1269 |  | 
|---|
| 1270 | ibs_data.caps = ibs_caps; | 
|---|
| 1271 | size = 1; | 
|---|
| 1272 | offset = 1; | 
|---|
| 1273 | check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK)); | 
|---|
| 1274 |  | 
|---|
| 1275 | offset_max = perf_ibs_get_offset_max(perf_ibs, event, check_rip); | 
|---|
| 1276 |  | 
|---|
| 1277 | do { | 
|---|
| 1278 | rdmsrq(msr + offset, *buf++); | 
|---|
| 1279 | size++; | 
|---|
| 1280 | offset = find_next_bit(addr: perf_ibs->offset_mask, | 
|---|
| 1281 | size: perf_ibs->offset_max, | 
|---|
| 1282 | offset: offset + 1); | 
|---|
| 1283 | } while (offset < offset_max); | 
|---|
| 1284 |  | 
|---|
| 1285 | if (perf_ibs_ldlat_event(perf_ibs, event)) { | 
|---|
| 1286 | union ibs_op_data3 op_data3; | 
|---|
| 1287 |  | 
|---|
| 1288 | op_data3.val = ibs_data.regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA3)]; | 
|---|
| 1289 | /* | 
|---|
| 1290 | * Opening event is errored out if load latency threshold is | 
|---|
| 1291 | * outside of [128, 2048] range. Since the event has reached | 
|---|
| 1292 | * interrupt handler, we can safely assume the threshold is | 
|---|
| 1293 | * within [128, 2048] range. | 
|---|
| 1294 | */ | 
|---|
| 1295 | if (!op_data3.ld_op || !op_data3.dc_miss || | 
|---|
| 1296 | op_data3.dc_miss_lat <= (event->attr.config1 & 0xFFF)) | 
|---|
| 1297 | goto out; | 
|---|
| 1298 | } | 
|---|
| 1299 |  | 
|---|
| 1300 | /* | 
|---|
| 1301 | * Read IbsBrTarget, IbsOpData4, and IbsExtdCtl separately | 
|---|
| 1302 | * depending on their availability. | 
|---|
| 1303 | * Can't add to offset_max as they are staggered | 
|---|
| 1304 | */ | 
|---|
| 1305 | if (event->attr.sample_type & PERF_SAMPLE_RAW) { | 
|---|
| 1306 | if (perf_ibs == &perf_ibs_op) { | 
|---|
| 1307 | if (ibs_caps & IBS_CAPS_BRNTRGT) { | 
|---|
| 1308 | rdmsrq(MSR_AMD64_IBSBRTARGET, *buf++); | 
|---|
| 1309 | br_target_idx = size; | 
|---|
| 1310 | size++; | 
|---|
| 1311 | } | 
|---|
| 1312 | if (ibs_caps & IBS_CAPS_OPDATA4) { | 
|---|
| 1313 | rdmsrq(MSR_AMD64_IBSOPDATA4, *buf++); | 
|---|
| 1314 | size++; | 
|---|
| 1315 | } | 
|---|
| 1316 | } | 
|---|
| 1317 | if (perf_ibs == &perf_ibs_fetch && (ibs_caps & IBS_CAPS_FETCHCTLEXTD)) { | 
|---|
| 1318 | rdmsrq(MSR_AMD64_ICIBSEXTDCTL, *buf++); | 
|---|
| 1319 | size++; | 
|---|
| 1320 | } | 
|---|
| 1321 | } | 
|---|
| 1322 | ibs_data.size = sizeof(u64) * size; | 
|---|
| 1323 |  | 
|---|
| 1324 | regs = *iregs; | 
|---|
| 1325 | if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) { | 
|---|
| 1326 | regs.flags &= ~PERF_EFLAGS_EXACT; | 
|---|
| 1327 | } else { | 
|---|
| 1328 | /* Workaround for erratum #1197 */ | 
|---|
| 1329 | if (perf_ibs->fetch_ignore_if_zero_rip && !(ibs_data.regs[1])) | 
|---|
| 1330 | goto out; | 
|---|
| 1331 |  | 
|---|
| 1332 | set_linear_ip(regs: ®s, ip: ibs_data.regs[1]); | 
|---|
| 1333 | regs.flags |= PERF_EFLAGS_EXACT; | 
|---|
| 1334 | } | 
|---|
| 1335 |  | 
|---|
| 1336 | if ((event->attr.config2 & IBS_SW_FILTER_MASK) && | 
|---|
| 1337 | perf_ibs_swfilt_discard(perf_ibs, event, regs: ®s, ibs_data: &ibs_data, br_target_idx)) { | 
|---|
| 1338 | throttle = perf_event_account_interrupt(event); | 
|---|
| 1339 | goto out; | 
|---|
| 1340 | } | 
|---|
| 1341 | /* | 
|---|
| 1342 | * Prevent leaking physical addresses to unprivileged users. Skip | 
|---|
| 1343 | * PERF_SAMPLE_PHYS_ADDR check since generic code prevents it for | 
|---|
| 1344 | * unprivileged users. | 
|---|
| 1345 | */ | 
|---|
| 1346 | if ((event->attr.sample_type & PERF_SAMPLE_RAW) && | 
|---|
| 1347 | perf_allow_kernel()) { | 
|---|
| 1348 | perf_ibs_phyaddr_clear(perf_ibs, ibs_data: &ibs_data); | 
|---|
| 1349 | } | 
|---|
| 1350 |  | 
|---|
| 1351 | if (event->attr.sample_type & PERF_SAMPLE_RAW) { | 
|---|
| 1352 | raw = (struct perf_raw_record){ | 
|---|
| 1353 | .frag = { | 
|---|
| 1354 | .size = sizeof(u32) + ibs_data.size, | 
|---|
| 1355 | .data = ibs_data.data, | 
|---|
| 1356 | }, | 
|---|
| 1357 | }; | 
|---|
| 1358 | perf_sample_save_raw_data(data: &data, event, raw: &raw); | 
|---|
| 1359 | } | 
|---|
| 1360 |  | 
|---|
| 1361 | if (perf_ibs == &perf_ibs_op) | 
|---|
| 1362 | perf_ibs_parse_ld_st_data(sample_type: event->attr.sample_type, ibs_data: &ibs_data, data: &data); | 
|---|
| 1363 |  | 
|---|
| 1364 | /* | 
|---|
| 1365 | * rip recorded by IbsOpRip will not be consistent with rsp and rbp | 
|---|
| 1366 | * recorded as part of interrupt regs. Thus we need to use rip from | 
|---|
| 1367 | * interrupt regs while unwinding call stack. | 
|---|
| 1368 | */ | 
|---|
| 1369 | perf_sample_save_callchain(data: &data, event, regs: iregs); | 
|---|
| 1370 |  | 
|---|
| 1371 | throttle = perf_event_overflow(event, data: &data, regs: ®s); | 
|---|
| 1372 |  | 
|---|
| 1373 | if (event->attr.freq && hwc->sample_period < perf_ibs->min_period) | 
|---|
| 1374 | hwc->sample_period = perf_ibs->min_period; | 
|---|
| 1375 |  | 
|---|
| 1376 | out: | 
|---|
| 1377 | if (!throttle) { | 
|---|
| 1378 | if (perf_ibs == &perf_ibs_op) { | 
|---|
| 1379 | if (ibs_caps & IBS_CAPS_OPCNTEXT) { | 
|---|
| 1380 | new_config = period & IBS_OP_MAX_CNT_EXT_MASK; | 
|---|
| 1381 | period &= ~IBS_OP_MAX_CNT_EXT_MASK; | 
|---|
| 1382 | } | 
|---|
| 1383 | if ((ibs_caps & IBS_CAPS_RDWROPCNT) && (*config & IBS_OP_CNT_CTL)) | 
|---|
| 1384 | new_config |= *config & IBS_OP_CUR_CNT_RAND; | 
|---|
| 1385 | } | 
|---|
| 1386 | new_config |= period >> 4; | 
|---|
| 1387 |  | 
|---|
| 1388 | perf_ibs_enable_event(perf_ibs, hwc, config: new_config); | 
|---|
| 1389 | } | 
|---|
| 1390 |  | 
|---|
| 1391 | perf_event_update_userpage(event); | 
|---|
| 1392 |  | 
|---|
| 1393 | return 1; | 
|---|
| 1394 | } | 
|---|
| 1395 |  | 
|---|
| 1396 | static int | 
|---|
| 1397 | perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs) | 
|---|
| 1398 | { | 
|---|
| 1399 | u64 stamp = sched_clock(); | 
|---|
| 1400 | int handled = 0; | 
|---|
| 1401 |  | 
|---|
| 1402 | handled += perf_ibs_handle_irq(perf_ibs: &perf_ibs_fetch, iregs: regs); | 
|---|
| 1403 | handled += perf_ibs_handle_irq(perf_ibs: &perf_ibs_op, iregs: regs); | 
|---|
| 1404 |  | 
|---|
| 1405 | if (handled) | 
|---|
| 1406 | inc_irq_stat(apic_perf_irqs); | 
|---|
| 1407 |  | 
|---|
| 1408 | perf_sample_event_took(sample_len_ns: sched_clock() - stamp); | 
|---|
| 1409 |  | 
|---|
| 1410 | return handled; | 
|---|
| 1411 | } | 
|---|
| 1412 | NOKPROBE_SYMBOL(perf_ibs_nmi_handler); | 
|---|
| 1413 |  | 
|---|
| 1414 | static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name) | 
|---|
| 1415 | { | 
|---|
| 1416 | struct cpu_perf_ibs __percpu *pcpu; | 
|---|
| 1417 | int ret; | 
|---|
| 1418 |  | 
|---|
| 1419 | pcpu = alloc_percpu(struct cpu_perf_ibs); | 
|---|
| 1420 | if (!pcpu) | 
|---|
| 1421 | return -ENOMEM; | 
|---|
| 1422 |  | 
|---|
| 1423 | perf_ibs->pcpu = pcpu; | 
|---|
| 1424 |  | 
|---|
| 1425 | ret = perf_pmu_register(pmu: &perf_ibs->pmu, name, type: -1); | 
|---|
| 1426 | if (ret) { | 
|---|
| 1427 | perf_ibs->pcpu = NULL; | 
|---|
| 1428 | free_percpu(pdata: pcpu); | 
|---|
| 1429 | } | 
|---|
| 1430 |  | 
|---|
| 1431 | return ret; | 
|---|
| 1432 | } | 
|---|
| 1433 |  | 
|---|
| 1434 | static __init int perf_ibs_fetch_init(void) | 
|---|
| 1435 | { | 
|---|
| 1436 | /* | 
|---|
| 1437 | * Some chips fail to reset the fetch count when it is written; instead | 
|---|
| 1438 | * they need a 0-1 transition of IbsFetchEn. | 
|---|
| 1439 | */ | 
|---|
| 1440 | if (boot_cpu_data.x86 >= 0x16 && boot_cpu_data.x86 <= 0x18) | 
|---|
| 1441 | perf_ibs_fetch.fetch_count_reset_broken = 1; | 
|---|
| 1442 |  | 
|---|
| 1443 | if (boot_cpu_data.x86 == 0x19 && boot_cpu_data.x86_model < 0x10) | 
|---|
| 1444 | perf_ibs_fetch.fetch_ignore_if_zero_rip = 1; | 
|---|
| 1445 |  | 
|---|
| 1446 | if (ibs_caps & IBS_CAPS_ZEN4) | 
|---|
| 1447 | perf_ibs_fetch.config_mask |= IBS_FETCH_L3MISSONLY; | 
|---|
| 1448 |  | 
|---|
| 1449 | perf_ibs_fetch.pmu.attr_groups = fetch_attr_groups; | 
|---|
| 1450 | perf_ibs_fetch.pmu.attr_update = fetch_attr_update; | 
|---|
| 1451 |  | 
|---|
| 1452 | return perf_ibs_pmu_init(perf_ibs: &perf_ibs_fetch, name: "ibs_fetch"); | 
|---|
| 1453 | } | 
|---|
| 1454 |  | 
|---|
| 1455 | static __init int perf_ibs_op_init(void) | 
|---|
| 1456 | { | 
|---|
| 1457 | if (ibs_caps & IBS_CAPS_OPCNT) | 
|---|
| 1458 | perf_ibs_op.config_mask |= IBS_OP_CNT_CTL; | 
|---|
| 1459 |  | 
|---|
| 1460 | if (ibs_caps & IBS_CAPS_OPCNTEXT) { | 
|---|
| 1461 | perf_ibs_op.max_period  |= IBS_OP_MAX_CNT_EXT_MASK; | 
|---|
| 1462 | perf_ibs_op.config_mask	|= IBS_OP_MAX_CNT_EXT_MASK; | 
|---|
| 1463 | perf_ibs_op.cnt_mask    |= (IBS_OP_MAX_CNT_EXT_MASK | | 
|---|
| 1464 | IBS_OP_CUR_CNT_EXT_MASK); | 
|---|
| 1465 | } | 
|---|
| 1466 |  | 
|---|
| 1467 | if (ibs_caps & IBS_CAPS_ZEN4) | 
|---|
| 1468 | perf_ibs_op.config_mask |= IBS_OP_L3MISSONLY; | 
|---|
| 1469 |  | 
|---|
| 1470 | perf_ibs_op.pmu.attr_groups = op_attr_groups; | 
|---|
| 1471 | perf_ibs_op.pmu.attr_update = op_attr_update; | 
|---|
| 1472 |  | 
|---|
| 1473 | return perf_ibs_pmu_init(perf_ibs: &perf_ibs_op, name: "ibs_op"); | 
|---|
| 1474 | } | 
|---|
| 1475 |  | 
|---|
| 1476 | static __init int perf_event_ibs_init(void) | 
|---|
| 1477 | { | 
|---|
| 1478 | int ret; | 
|---|
| 1479 |  | 
|---|
| 1480 | ret = perf_ibs_fetch_init(); | 
|---|
| 1481 | if (ret) | 
|---|
| 1482 | return ret; | 
|---|
| 1483 |  | 
|---|
| 1484 | ret = perf_ibs_op_init(); | 
|---|
| 1485 | if (ret) | 
|---|
| 1486 | goto err_op; | 
|---|
| 1487 |  | 
|---|
| 1488 | ret = register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs"); | 
|---|
| 1489 | if (ret) | 
|---|
| 1490 | goto err_nmi; | 
|---|
| 1491 |  | 
|---|
| 1492 | pr_info( "perf: AMD IBS detected (0x%08x)\n", ibs_caps); | 
|---|
| 1493 | return 0; | 
|---|
| 1494 |  | 
|---|
| 1495 | err_nmi: | 
|---|
| 1496 | perf_pmu_unregister(pmu: &perf_ibs_op.pmu); | 
|---|
| 1497 | free_percpu(pdata: perf_ibs_op.pcpu); | 
|---|
| 1498 | perf_ibs_op.pcpu = NULL; | 
|---|
| 1499 | err_op: | 
|---|
| 1500 | perf_pmu_unregister(pmu: &perf_ibs_fetch.pmu); | 
|---|
| 1501 | free_percpu(pdata: perf_ibs_fetch.pcpu); | 
|---|
| 1502 | perf_ibs_fetch.pcpu = NULL; | 
|---|
| 1503 |  | 
|---|
| 1504 | return ret; | 
|---|
| 1505 | } | 
|---|
| 1506 |  | 
|---|
| 1507 | #else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */ | 
|---|
| 1508 |  | 
|---|
| 1509 | static __init int perf_event_ibs_init(void) | 
|---|
| 1510 | { | 
|---|
| 1511 | return 0; | 
|---|
| 1512 | } | 
|---|
| 1513 |  | 
|---|
| 1514 | #endif | 
|---|
| 1515 |  | 
|---|
| 1516 | /* IBS - apic initialization, for perf and oprofile */ | 
|---|
| 1517 |  | 
|---|
| 1518 | static __init u32 __get_ibs_caps(void) | 
|---|
| 1519 | { | 
|---|
| 1520 | u32 caps; | 
|---|
| 1521 | unsigned int max_level; | 
|---|
| 1522 |  | 
|---|
| 1523 | if (!boot_cpu_has(X86_FEATURE_IBS)) | 
|---|
| 1524 | return 0; | 
|---|
| 1525 |  | 
|---|
| 1526 | /* check IBS cpuid feature flags */ | 
|---|
| 1527 | max_level = cpuid_eax(op: 0x80000000); | 
|---|
| 1528 | if (max_level < IBS_CPUID_FEATURES) | 
|---|
| 1529 | return IBS_CAPS_DEFAULT; | 
|---|
| 1530 |  | 
|---|
| 1531 | caps = cpuid_eax(IBS_CPUID_FEATURES); | 
|---|
| 1532 | if (!(caps & IBS_CAPS_AVAIL)) | 
|---|
| 1533 | /* cpuid flags not valid */ | 
|---|
| 1534 | return IBS_CAPS_DEFAULT; | 
|---|
| 1535 |  | 
|---|
| 1536 | return caps; | 
|---|
| 1537 | } | 
|---|
| 1538 |  | 
|---|
| 1539 | u32 get_ibs_caps(void) | 
|---|
| 1540 | { | 
|---|
| 1541 | return ibs_caps; | 
|---|
| 1542 | } | 
|---|
| 1543 |  | 
|---|
| 1544 | EXPORT_SYMBOL(get_ibs_caps); | 
|---|
| 1545 |  | 
|---|
| 1546 | static inline int get_eilvt(int offset) | 
|---|
| 1547 | { | 
|---|
| 1548 | return !setup_APIC_eilvt(lvt_off: offset, vector: 0, APIC_EILVT_MSG_NMI, mask: 1); | 
|---|
| 1549 | } | 
|---|
| 1550 |  | 
|---|
| 1551 | static inline int put_eilvt(int offset) | 
|---|
| 1552 | { | 
|---|
| 1553 | return !setup_APIC_eilvt(lvt_off: offset, vector: 0, msg_type: 0, mask: 1); | 
|---|
| 1554 | } | 
|---|
| 1555 |  | 
|---|
| 1556 | /* | 
|---|
| 1557 | * Check and reserve APIC extended interrupt LVT offset for IBS if available. | 
|---|
| 1558 | */ | 
|---|
| 1559 | static inline int ibs_eilvt_valid(void) | 
|---|
| 1560 | { | 
|---|
| 1561 | int offset; | 
|---|
| 1562 | u64 val; | 
|---|
| 1563 | int valid = 0; | 
|---|
| 1564 |  | 
|---|
| 1565 | preempt_disable(); | 
|---|
| 1566 |  | 
|---|
| 1567 | rdmsrq(MSR_AMD64_IBSCTL, val); | 
|---|
| 1568 | offset = val & IBSCTL_LVT_OFFSET_MASK; | 
|---|
| 1569 |  | 
|---|
| 1570 | if (!(val & IBSCTL_LVT_OFFSET_VALID)) { | 
|---|
| 1571 | pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n", | 
|---|
| 1572 | smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); | 
|---|
| 1573 | goto out; | 
|---|
| 1574 | } | 
|---|
| 1575 |  | 
|---|
| 1576 | if (!get_eilvt(offset)) { | 
|---|
| 1577 | pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n", | 
|---|
| 1578 | smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); | 
|---|
| 1579 | goto out; | 
|---|
| 1580 | } | 
|---|
| 1581 |  | 
|---|
| 1582 | valid = 1; | 
|---|
| 1583 | out: | 
|---|
| 1584 | preempt_enable(); | 
|---|
| 1585 |  | 
|---|
| 1586 | return valid; | 
|---|
| 1587 | } | 
|---|
| 1588 |  | 
|---|
| 1589 | static int setup_ibs_ctl(int ibs_eilvt_off) | 
|---|
| 1590 | { | 
|---|
| 1591 | struct pci_dev *cpu_cfg; | 
|---|
| 1592 | int nodes; | 
|---|
| 1593 | u32 value = 0; | 
|---|
| 1594 |  | 
|---|
| 1595 | nodes = 0; | 
|---|
| 1596 | cpu_cfg = NULL; | 
|---|
| 1597 | do { | 
|---|
| 1598 | cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD, | 
|---|
| 1599 | PCI_DEVICE_ID_AMD_10H_NB_MISC, | 
|---|
| 1600 | from: cpu_cfg); | 
|---|
| 1601 | if (!cpu_cfg) | 
|---|
| 1602 | break; | 
|---|
| 1603 | ++nodes; | 
|---|
| 1604 | pci_write_config_dword(dev: cpu_cfg, IBSCTL, val: ibs_eilvt_off | 
|---|
| 1605 | | IBSCTL_LVT_OFFSET_VALID); | 
|---|
| 1606 | pci_read_config_dword(dev: cpu_cfg, IBSCTL, val: &value); | 
|---|
| 1607 | if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) { | 
|---|
| 1608 | pci_dev_put(dev: cpu_cfg); | 
|---|
| 1609 | pr_debug( "Failed to setup IBS LVT offset, IBSCTL = 0x%08x\n", | 
|---|
| 1610 | value); | 
|---|
| 1611 | return -EINVAL; | 
|---|
| 1612 | } | 
|---|
| 1613 | } while (1); | 
|---|
| 1614 |  | 
|---|
| 1615 | if (!nodes) { | 
|---|
| 1616 | pr_debug( "No CPU node configured for IBS\n"); | 
|---|
| 1617 | return -ENODEV; | 
|---|
| 1618 | } | 
|---|
| 1619 |  | 
|---|
| 1620 | return 0; | 
|---|
| 1621 | } | 
|---|
| 1622 |  | 
|---|
| 1623 | /* | 
|---|
| 1624 | * This runs only on the current cpu. We try to find an LVT offset and | 
|---|
| 1625 | * setup the local APIC. For this we must disable preemption. On | 
|---|
| 1626 | * success we initialize all nodes with this offset. This updates then | 
|---|
| 1627 | * the offset in the IBS_CTL per-node msr. The per-core APIC setup of | 
|---|
| 1628 | * the IBS interrupt vector is handled by perf_ibs_cpu_notifier that | 
|---|
| 1629 | * is using the new offset. | 
|---|
| 1630 | */ | 
|---|
| 1631 | static void force_ibs_eilvt_setup(void) | 
|---|
| 1632 | { | 
|---|
| 1633 | int offset; | 
|---|
| 1634 | int ret; | 
|---|
| 1635 |  | 
|---|
| 1636 | preempt_disable(); | 
|---|
| 1637 | /* find the next free available EILVT entry, skip offset 0 */ | 
|---|
| 1638 | for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) { | 
|---|
| 1639 | if (get_eilvt(offset)) | 
|---|
| 1640 | break; | 
|---|
| 1641 | } | 
|---|
| 1642 | preempt_enable(); | 
|---|
| 1643 |  | 
|---|
| 1644 | if (offset == APIC_EILVT_NR_MAX) { | 
|---|
| 1645 | pr_debug( "No EILVT entry available\n"); | 
|---|
| 1646 | return; | 
|---|
| 1647 | } | 
|---|
| 1648 |  | 
|---|
| 1649 | ret = setup_ibs_ctl(offset); | 
|---|
| 1650 | if (ret) | 
|---|
| 1651 | goto out; | 
|---|
| 1652 |  | 
|---|
| 1653 | if (!ibs_eilvt_valid()) | 
|---|
| 1654 | goto out; | 
|---|
| 1655 |  | 
|---|
| 1656 | pr_info( "LVT offset %d assigned\n", offset); | 
|---|
| 1657 |  | 
|---|
| 1658 | return; | 
|---|
| 1659 | out: | 
|---|
| 1660 | preempt_disable(); | 
|---|
| 1661 | put_eilvt(offset); | 
|---|
| 1662 | preempt_enable(); | 
|---|
| 1663 | return; | 
|---|
| 1664 | } | 
|---|
| 1665 |  | 
|---|
| 1666 | static void ibs_eilvt_setup(void) | 
|---|
| 1667 | { | 
|---|
| 1668 | /* | 
|---|
| 1669 | * Force LVT offset assignment for family 10h: The offsets are | 
|---|
| 1670 | * not assigned by the BIOS for this family, so the OS is | 
|---|
| 1671 | * responsible for doing it. If the OS assignment fails, fall | 
|---|
| 1672 | * back to BIOS settings and try to setup this. | 
|---|
| 1673 | */ | 
|---|
| 1674 | if (boot_cpu_data.x86 == 0x10) | 
|---|
| 1675 | force_ibs_eilvt_setup(); | 
|---|
| 1676 | } | 
|---|
| 1677 |  | 
|---|
| 1678 | static inline int get_ibs_lvt_offset(void) | 
|---|
| 1679 | { | 
|---|
| 1680 | u64 val; | 
|---|
| 1681 |  | 
|---|
| 1682 | rdmsrq(MSR_AMD64_IBSCTL, val); | 
|---|
| 1683 | if (!(val & IBSCTL_LVT_OFFSET_VALID)) | 
|---|
| 1684 | return -EINVAL; | 
|---|
| 1685 |  | 
|---|
| 1686 | return val & IBSCTL_LVT_OFFSET_MASK; | 
|---|
| 1687 | } | 
|---|
| 1688 |  | 
|---|
| 1689 | static void setup_APIC_ibs(void) | 
|---|
| 1690 | { | 
|---|
| 1691 | int offset; | 
|---|
| 1692 |  | 
|---|
| 1693 | offset = get_ibs_lvt_offset(); | 
|---|
| 1694 | if (offset < 0) | 
|---|
| 1695 | goto failed; | 
|---|
| 1696 |  | 
|---|
| 1697 | if (!setup_APIC_eilvt(lvt_off: offset, vector: 0, APIC_EILVT_MSG_NMI, mask: 0)) | 
|---|
| 1698 | return; | 
|---|
| 1699 | failed: | 
|---|
| 1700 | pr_warn( "perf: IBS APIC setup failed on cpu #%d\n", | 
|---|
| 1701 | smp_processor_id()); | 
|---|
| 1702 | } | 
|---|
| 1703 |  | 
|---|
| 1704 | static void clear_APIC_ibs(void) | 
|---|
| 1705 | { | 
|---|
| 1706 | int offset; | 
|---|
| 1707 |  | 
|---|
| 1708 | offset = get_ibs_lvt_offset(); | 
|---|
| 1709 | if (offset >= 0) | 
|---|
| 1710 | setup_APIC_eilvt(lvt_off: offset, vector: 0, APIC_EILVT_MSG_FIX, mask: 1); | 
|---|
| 1711 | } | 
|---|
| 1712 |  | 
|---|
| 1713 | static int x86_pmu_amd_ibs_starting_cpu(unsigned int cpu) | 
|---|
| 1714 | { | 
|---|
| 1715 | setup_APIC_ibs(); | 
|---|
| 1716 | return 0; | 
|---|
| 1717 | } | 
|---|
| 1718 |  | 
|---|
| 1719 | #ifdef CONFIG_PM | 
|---|
| 1720 |  | 
|---|
| 1721 | static int perf_ibs_suspend(void) | 
|---|
| 1722 | { | 
|---|
| 1723 | clear_APIC_ibs(); | 
|---|
| 1724 | return 0; | 
|---|
| 1725 | } | 
|---|
| 1726 |  | 
|---|
| 1727 | static void perf_ibs_resume(void) | 
|---|
| 1728 | { | 
|---|
| 1729 | ibs_eilvt_setup(); | 
|---|
| 1730 | setup_APIC_ibs(); | 
|---|
| 1731 | } | 
|---|
| 1732 |  | 
|---|
| 1733 | static struct syscore_ops perf_ibs_syscore_ops = { | 
|---|
| 1734 | .resume		= perf_ibs_resume, | 
|---|
| 1735 | .suspend	= perf_ibs_suspend, | 
|---|
| 1736 | }; | 
|---|
| 1737 |  | 
|---|
| 1738 | static void perf_ibs_pm_init(void) | 
|---|
| 1739 | { | 
|---|
| 1740 | register_syscore_ops(ops: &perf_ibs_syscore_ops); | 
|---|
| 1741 | } | 
|---|
| 1742 |  | 
|---|
| 1743 | #else | 
|---|
| 1744 |  | 
|---|
| 1745 | static inline void perf_ibs_pm_init(void) { } | 
|---|
| 1746 |  | 
|---|
| 1747 | #endif | 
|---|
| 1748 |  | 
|---|
| 1749 | static int x86_pmu_amd_ibs_dying_cpu(unsigned int cpu) | 
|---|
| 1750 | { | 
|---|
| 1751 | clear_APIC_ibs(); | 
|---|
| 1752 | return 0; | 
|---|
| 1753 | } | 
|---|
| 1754 |  | 
|---|
| 1755 | static __init int amd_ibs_init(void) | 
|---|
| 1756 | { | 
|---|
| 1757 | u32 caps; | 
|---|
| 1758 |  | 
|---|
| 1759 | caps = __get_ibs_caps(); | 
|---|
| 1760 | if (!caps) | 
|---|
| 1761 | return -ENODEV;	/* ibs not supported by the cpu */ | 
|---|
| 1762 |  | 
|---|
| 1763 | ibs_eilvt_setup(); | 
|---|
| 1764 |  | 
|---|
| 1765 | if (!ibs_eilvt_valid()) | 
|---|
| 1766 | return -EINVAL; | 
|---|
| 1767 |  | 
|---|
| 1768 | perf_ibs_pm_init(); | 
|---|
| 1769 |  | 
|---|
| 1770 | ibs_caps = caps; | 
|---|
| 1771 | /* make ibs_caps visible to other cpus: */ | 
|---|
| 1772 | smp_mb(); | 
|---|
| 1773 | /* | 
|---|
| 1774 | * x86_pmu_amd_ibs_starting_cpu will be called from core on | 
|---|
| 1775 | * all online cpus. | 
|---|
| 1776 | */ | 
|---|
| 1777 | cpuhp_setup_state(state: CPUHP_AP_PERF_X86_AMD_IBS_STARTING, | 
|---|
| 1778 | name: "perf/x86/amd/ibs:starting", | 
|---|
| 1779 | startup: x86_pmu_amd_ibs_starting_cpu, | 
|---|
| 1780 | teardown: x86_pmu_amd_ibs_dying_cpu); | 
|---|
| 1781 |  | 
|---|
| 1782 | return perf_event_ibs_init(); | 
|---|
| 1783 | } | 
|---|
| 1784 |  | 
|---|
| 1785 | /* Since we need the pci subsystem to init ibs we can't do this earlier: */ | 
|---|
| 1786 | device_initcall(amd_ibs_init); | 
|---|
| 1787 |  | 
|---|