| 1 | // SPDX-License-Identifier: GPL-2.0-only | 
|---|
| 2 | /* | 
|---|
| 3 | * BTS PMU driver for perf | 
|---|
| 4 | * Copyright (c) 2013-2014, Intel Corporation. | 
|---|
| 5 | */ | 
|---|
| 6 |  | 
|---|
| 7 | #undef DEBUG | 
|---|
| 8 |  | 
|---|
| 9 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | 
|---|
| 10 |  | 
|---|
| 11 | #include <linux/bitops.h> | 
|---|
| 12 | #include <linux/types.h> | 
|---|
| 13 | #include <linux/slab.h> | 
|---|
| 14 | #include <linux/debugfs.h> | 
|---|
| 15 | #include <linux/device.h> | 
|---|
| 16 | #include <linux/coredump.h> | 
|---|
| 17 |  | 
|---|
| 18 | #include <linux/sizes.h> | 
|---|
| 19 | #include <asm/perf_event.h> | 
|---|
| 20 | #include <asm/msr.h> | 
|---|
| 21 |  | 
|---|
| 22 | #include "../perf_event.h" | 
|---|
| 23 |  | 
|---|
| 24 | struct bts_ctx { | 
|---|
| 25 | struct perf_output_handle	handle; | 
|---|
| 26 | struct debug_store		ds_back; | 
|---|
| 27 | int				state; | 
|---|
| 28 | }; | 
|---|
| 29 |  | 
|---|
| 30 | /* BTS context states: */ | 
|---|
| 31 | enum { | 
|---|
| 32 | /* no ongoing AUX transactions */ | 
|---|
| 33 | BTS_STATE_STOPPED = 0, | 
|---|
| 34 | /* AUX transaction is on, BTS tracing is disabled */ | 
|---|
| 35 | BTS_STATE_INACTIVE, | 
|---|
| 36 | /* AUX transaction is on, BTS tracing is running */ | 
|---|
| 37 | BTS_STATE_ACTIVE, | 
|---|
| 38 | }; | 
|---|
| 39 |  | 
|---|
| 40 | static struct bts_ctx __percpu *bts_ctx; | 
|---|
| 41 |  | 
|---|
| 42 | #define BTS_RECORD_SIZE		24 | 
|---|
| 43 | #define BTS_SAFETY_MARGIN	4080 | 
|---|
| 44 |  | 
|---|
| 45 | struct bts_phys { | 
|---|
| 46 | struct page	*page; | 
|---|
| 47 | unsigned long	size; | 
|---|
| 48 | unsigned long	offset; | 
|---|
| 49 | unsigned long	displacement; | 
|---|
| 50 | }; | 
|---|
| 51 |  | 
|---|
| 52 | struct bts_buffer { | 
|---|
| 53 | size_t		real_size;	/* multiple of BTS_RECORD_SIZE */ | 
|---|
| 54 | unsigned int	nr_pages; | 
|---|
| 55 | unsigned int	nr_bufs; | 
|---|
| 56 | unsigned int	cur_buf; | 
|---|
| 57 | bool		snapshot; | 
|---|
| 58 | local_t		data_size; | 
|---|
| 59 | local_t		head; | 
|---|
| 60 | unsigned long	end; | 
|---|
| 61 | void		**data_pages; | 
|---|
| 62 | struct bts_phys	buf[] __counted_by(nr_bufs); | 
|---|
| 63 | }; | 
|---|
| 64 |  | 
|---|
| 65 | static struct pmu bts_pmu; | 
|---|
| 66 |  | 
|---|
| 67 | static int buf_nr_pages(struct page *page) | 
|---|
| 68 | { | 
|---|
| 69 | if (!PagePrivate(page)) | 
|---|
| 70 | return 1; | 
|---|
| 71 |  | 
|---|
| 72 | return 1 << page_private(page); | 
|---|
| 73 | } | 
|---|
| 74 |  | 
|---|
| 75 | static size_t buf_size(struct page *page) | 
|---|
| 76 | { | 
|---|
| 77 | return buf_nr_pages(page) * PAGE_SIZE; | 
|---|
| 78 | } | 
|---|
| 79 |  | 
|---|
| 80 | static void * | 
|---|
| 81 | bts_buffer_setup_aux(struct perf_event *event, void **pages, | 
|---|
| 82 | int nr_pages, bool overwrite) | 
|---|
| 83 | { | 
|---|
| 84 | struct bts_buffer *bb; | 
|---|
| 85 | struct page *page; | 
|---|
| 86 | int cpu = event->cpu; | 
|---|
| 87 | int node = (cpu == -1) ? cpu : cpu_to_node(cpu); | 
|---|
| 88 | unsigned long offset; | 
|---|
| 89 | size_t size = nr_pages << PAGE_SHIFT; | 
|---|
| 90 | int pg, nr_buf, pad; | 
|---|
| 91 |  | 
|---|
| 92 | /* count all the high order buffers */ | 
|---|
| 93 | for (pg = 0, nr_buf = 0; pg < nr_pages;) { | 
|---|
| 94 | page = virt_to_page(pages[pg]); | 
|---|
| 95 | pg += buf_nr_pages(page); | 
|---|
| 96 | nr_buf++; | 
|---|
| 97 | } | 
|---|
| 98 |  | 
|---|
| 99 | /* | 
|---|
| 100 | * to avoid interrupts in overwrite mode, only allow one physical | 
|---|
| 101 | */ | 
|---|
| 102 | if (overwrite && nr_buf > 1) | 
|---|
| 103 | return NULL; | 
|---|
| 104 |  | 
|---|
| 105 | bb = kzalloc_node(struct_size(bb, buf, nr_buf), GFP_KERNEL, node); | 
|---|
| 106 | if (!bb) | 
|---|
| 107 | return NULL; | 
|---|
| 108 |  | 
|---|
| 109 | bb->nr_pages = nr_pages; | 
|---|
| 110 | bb->nr_bufs = nr_buf; | 
|---|
| 111 | bb->snapshot = overwrite; | 
|---|
| 112 | bb->data_pages = pages; | 
|---|
| 113 | bb->real_size = size - size % BTS_RECORD_SIZE; | 
|---|
| 114 |  | 
|---|
| 115 | for (pg = 0, nr_buf = 0, offset = 0, pad = 0; nr_buf < bb->nr_bufs; nr_buf++) { | 
|---|
| 116 | unsigned int __nr_pages; | 
|---|
| 117 |  | 
|---|
| 118 | page = virt_to_page(pages[pg]); | 
|---|
| 119 | __nr_pages = buf_nr_pages(page); | 
|---|
| 120 | bb->buf[nr_buf].page = page; | 
|---|
| 121 | bb->buf[nr_buf].offset = offset; | 
|---|
| 122 | bb->buf[nr_buf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0); | 
|---|
| 123 | bb->buf[nr_buf].size = buf_size(page) - bb->buf[nr_buf].displacement; | 
|---|
| 124 | pad = bb->buf[nr_buf].size % BTS_RECORD_SIZE; | 
|---|
| 125 | bb->buf[nr_buf].size -= pad; | 
|---|
| 126 |  | 
|---|
| 127 | pg += __nr_pages; | 
|---|
| 128 | offset += __nr_pages << PAGE_SHIFT; | 
|---|
| 129 | } | 
|---|
| 130 |  | 
|---|
| 131 | return bb; | 
|---|
| 132 | } | 
|---|
| 133 |  | 
|---|
| 134 | static void bts_buffer_free_aux(void *data) | 
|---|
| 135 | { | 
|---|
| 136 | kfree(objp: data); | 
|---|
| 137 | } | 
|---|
| 138 |  | 
|---|
| 139 | static unsigned long bts_buffer_offset(struct bts_buffer *bb, unsigned int idx) | 
|---|
| 140 | { | 
|---|
| 141 | return bb->buf[idx].offset + bb->buf[idx].displacement; | 
|---|
| 142 | } | 
|---|
| 143 |  | 
|---|
| 144 | static void | 
|---|
| 145 | bts_config_buffer(struct bts_buffer *bb) | 
|---|
| 146 | { | 
|---|
| 147 | int cpu = raw_smp_processor_id(); | 
|---|
| 148 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | 
|---|
| 149 | struct bts_phys *phys = &bb->buf[bb->cur_buf]; | 
|---|
| 150 | unsigned long index, thresh = 0, end = phys->size; | 
|---|
| 151 | struct page *page = phys->page; | 
|---|
| 152 |  | 
|---|
| 153 | index = local_read(&bb->head); | 
|---|
| 154 |  | 
|---|
| 155 | if (!bb->snapshot) { | 
|---|
| 156 | if (bb->end < phys->offset + buf_size(page)) | 
|---|
| 157 | end = bb->end - phys->offset - phys->displacement; | 
|---|
| 158 |  | 
|---|
| 159 | index -= phys->offset + phys->displacement; | 
|---|
| 160 |  | 
|---|
| 161 | if (end - index > BTS_SAFETY_MARGIN) | 
|---|
| 162 | thresh = end - BTS_SAFETY_MARGIN; | 
|---|
| 163 | else if (end - index > BTS_RECORD_SIZE) | 
|---|
| 164 | thresh = end - BTS_RECORD_SIZE; | 
|---|
| 165 | else | 
|---|
| 166 | thresh = end; | 
|---|
| 167 | } | 
|---|
| 168 |  | 
|---|
| 169 | ds->bts_buffer_base = (u64)(long)page_address(page) + phys->displacement; | 
|---|
| 170 | ds->bts_index = ds->bts_buffer_base + index; | 
|---|
| 171 | ds->bts_absolute_maximum = ds->bts_buffer_base + end; | 
|---|
| 172 | ds->bts_interrupt_threshold = !bb->snapshot | 
|---|
| 173 | ? ds->bts_buffer_base + thresh | 
|---|
| 174 | : ds->bts_absolute_maximum + BTS_RECORD_SIZE; | 
|---|
| 175 | } | 
|---|
| 176 |  | 
|---|
| 177 | static void bts_buffer_pad_out(struct bts_phys *phys, unsigned long head) | 
|---|
| 178 | { | 
|---|
| 179 | unsigned long index = head - phys->offset; | 
|---|
| 180 |  | 
|---|
| 181 | memset(page_address(phys->page) + index, c: 0, n: phys->size - index); | 
|---|
| 182 | } | 
|---|
| 183 |  | 
|---|
| 184 | static void bts_update(struct bts_ctx *bts) | 
|---|
| 185 | { | 
|---|
| 186 | int cpu = raw_smp_processor_id(); | 
|---|
| 187 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | 
|---|
| 188 | struct bts_buffer *bb = perf_get_aux(handle: &bts->handle); | 
|---|
| 189 | unsigned long index = ds->bts_index - ds->bts_buffer_base, old, head; | 
|---|
| 190 |  | 
|---|
| 191 | if (!bb) | 
|---|
| 192 | return; | 
|---|
| 193 |  | 
|---|
| 194 | head = index + bts_buffer_offset(bb, idx: bb->cur_buf); | 
|---|
| 195 | old = local_xchg(l: &bb->head, n: head); | 
|---|
| 196 |  | 
|---|
| 197 | if (!bb->snapshot) { | 
|---|
| 198 | if (old == head) | 
|---|
| 199 | return; | 
|---|
| 200 |  | 
|---|
| 201 | if (ds->bts_index >= ds->bts_absolute_maximum) | 
|---|
| 202 | perf_aux_output_flag(handle: &bts->handle, | 
|---|
| 203 | PERF_AUX_FLAG_TRUNCATED); | 
|---|
| 204 |  | 
|---|
| 205 | /* | 
|---|
| 206 | * old and head are always in the same physical buffer, so we | 
|---|
| 207 | * can subtract them to get the data size. | 
|---|
| 208 | */ | 
|---|
| 209 | local_add(i: head - old, l: &bb->data_size); | 
|---|
| 210 | } else { | 
|---|
| 211 | local_set(&bb->data_size, head); | 
|---|
| 212 | } | 
|---|
| 213 |  | 
|---|
| 214 | /* | 
|---|
| 215 | * Since BTS is coherent, just add compiler barrier to ensure | 
|---|
| 216 | * BTS updating is ordered against bts::handle::event. | 
|---|
| 217 | */ | 
|---|
| 218 | barrier(); | 
|---|
| 219 | } | 
|---|
| 220 |  | 
|---|
| 221 | static int | 
|---|
| 222 | bts_buffer_reset(struct bts_buffer *bb, struct perf_output_handle *handle); | 
|---|
| 223 |  | 
|---|
| 224 | /* | 
|---|
| 225 | * Ordering PMU callbacks wrt themselves and the PMI is done by means | 
|---|
| 226 | * of bts::state, which: | 
|---|
| 227 | *  - is set when bts::handle::event is valid, that is, between | 
|---|
| 228 | *    perf_aux_output_begin() and perf_aux_output_end(); | 
|---|
| 229 | *  - is zero otherwise; | 
|---|
| 230 | *  - is ordered against bts::handle::event with a compiler barrier. | 
|---|
| 231 | */ | 
|---|
| 232 |  | 
|---|
| 233 | static void __bts_event_start(struct perf_event *event) | 
|---|
| 234 | { | 
|---|
| 235 | struct bts_ctx *bts = this_cpu_ptr(bts_ctx); | 
|---|
| 236 | struct bts_buffer *bb = perf_get_aux(handle: &bts->handle); | 
|---|
| 237 | u64 config = 0; | 
|---|
| 238 |  | 
|---|
| 239 | if (!bb->snapshot) | 
|---|
| 240 | config |= ARCH_PERFMON_EVENTSEL_INT; | 
|---|
| 241 | if (!event->attr.exclude_kernel) | 
|---|
| 242 | config |= ARCH_PERFMON_EVENTSEL_OS; | 
|---|
| 243 | if (!event->attr.exclude_user) | 
|---|
| 244 | config |= ARCH_PERFMON_EVENTSEL_USR; | 
|---|
| 245 |  | 
|---|
| 246 | bts_config_buffer(bb); | 
|---|
| 247 |  | 
|---|
| 248 | /* | 
|---|
| 249 | * local barrier to make sure that ds configuration made it | 
|---|
| 250 | * before we enable BTS and bts::state goes ACTIVE | 
|---|
| 251 | */ | 
|---|
| 252 | wmb(); | 
|---|
| 253 |  | 
|---|
| 254 | /* INACTIVE/STOPPED -> ACTIVE */ | 
|---|
| 255 | WRITE_ONCE(bts->state, BTS_STATE_ACTIVE); | 
|---|
| 256 |  | 
|---|
| 257 | intel_pmu_enable_bts(config); | 
|---|
| 258 |  | 
|---|
| 259 | } | 
|---|
| 260 |  | 
|---|
| 261 | static void bts_event_start(struct perf_event *event, int flags) | 
|---|
| 262 | { | 
|---|
| 263 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); | 
|---|
| 264 | struct bts_ctx *bts = this_cpu_ptr(bts_ctx); | 
|---|
| 265 | struct bts_buffer *bb; | 
|---|
| 266 |  | 
|---|
| 267 | bb = perf_aux_output_begin(handle: &bts->handle, event); | 
|---|
| 268 | if (!bb) | 
|---|
| 269 | goto fail_stop; | 
|---|
| 270 |  | 
|---|
| 271 | if (bts_buffer_reset(bb, handle: &bts->handle)) | 
|---|
| 272 | goto fail_end_stop; | 
|---|
| 273 |  | 
|---|
| 274 | bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base; | 
|---|
| 275 | bts->ds_back.bts_absolute_maximum = cpuc->ds->bts_absolute_maximum; | 
|---|
| 276 | bts->ds_back.bts_interrupt_threshold = cpuc->ds->bts_interrupt_threshold; | 
|---|
| 277 |  | 
|---|
| 278 | perf_event_itrace_started(event); | 
|---|
| 279 | event->hw.state = 0; | 
|---|
| 280 |  | 
|---|
| 281 | __bts_event_start(event); | 
|---|
| 282 |  | 
|---|
| 283 | return; | 
|---|
| 284 |  | 
|---|
| 285 | fail_end_stop: | 
|---|
| 286 | perf_aux_output_end(handle: &bts->handle, size: 0); | 
|---|
| 287 |  | 
|---|
| 288 | fail_stop: | 
|---|
| 289 | event->hw.state = PERF_HES_STOPPED; | 
|---|
| 290 | } | 
|---|
| 291 |  | 
|---|
| 292 | static void __bts_event_stop(struct perf_event *event, int state) | 
|---|
| 293 | { | 
|---|
| 294 | struct bts_ctx *bts = this_cpu_ptr(bts_ctx); | 
|---|
| 295 |  | 
|---|
| 296 | /* ACTIVE -> INACTIVE(PMI)/STOPPED(->stop()) */ | 
|---|
| 297 | WRITE_ONCE(bts->state, state); | 
|---|
| 298 |  | 
|---|
| 299 | /* | 
|---|
| 300 | * No extra synchronization is mandated by the documentation to have | 
|---|
| 301 | * BTS data stores globally visible. | 
|---|
| 302 | */ | 
|---|
| 303 | intel_pmu_disable_bts(); | 
|---|
| 304 | } | 
|---|
| 305 |  | 
|---|
| 306 | static void bts_event_stop(struct perf_event *event, int flags) | 
|---|
| 307 | { | 
|---|
| 308 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); | 
|---|
| 309 | struct bts_ctx *bts = this_cpu_ptr(bts_ctx); | 
|---|
| 310 | struct bts_buffer *bb = NULL; | 
|---|
| 311 | int state = READ_ONCE(bts->state); | 
|---|
| 312 |  | 
|---|
| 313 | if (state == BTS_STATE_ACTIVE) | 
|---|
| 314 | __bts_event_stop(event, state: BTS_STATE_STOPPED); | 
|---|
| 315 |  | 
|---|
| 316 | if (state != BTS_STATE_STOPPED) | 
|---|
| 317 | bb = perf_get_aux(handle: &bts->handle); | 
|---|
| 318 |  | 
|---|
| 319 | event->hw.state |= PERF_HES_STOPPED; | 
|---|
| 320 |  | 
|---|
| 321 | if (flags & PERF_EF_UPDATE) { | 
|---|
| 322 | bts_update(bts); | 
|---|
| 323 |  | 
|---|
| 324 | if (bb) { | 
|---|
| 325 | if (bb->snapshot) | 
|---|
| 326 | bts->handle.head = | 
|---|
| 327 | local_xchg(l: &bb->data_size, | 
|---|
| 328 | n: bb->nr_pages << PAGE_SHIFT); | 
|---|
| 329 | perf_aux_output_end(handle: &bts->handle, | 
|---|
| 330 | size: local_xchg(l: &bb->data_size, n: 0)); | 
|---|
| 331 | } | 
|---|
| 332 |  | 
|---|
| 333 | cpuc->ds->bts_index = bts->ds_back.bts_buffer_base; | 
|---|
| 334 | cpuc->ds->bts_buffer_base = bts->ds_back.bts_buffer_base; | 
|---|
| 335 | cpuc->ds->bts_absolute_maximum = bts->ds_back.bts_absolute_maximum; | 
|---|
| 336 | cpuc->ds->bts_interrupt_threshold = bts->ds_back.bts_interrupt_threshold; | 
|---|
| 337 | } | 
|---|
| 338 | } | 
|---|
| 339 |  | 
|---|
| 340 | void intel_bts_enable_local(void) | 
|---|
| 341 | { | 
|---|
| 342 | struct bts_ctx *bts; | 
|---|
| 343 | int state; | 
|---|
| 344 |  | 
|---|
| 345 | if (!bts_ctx) | 
|---|
| 346 | return; | 
|---|
| 347 |  | 
|---|
| 348 | bts = this_cpu_ptr(bts_ctx); | 
|---|
| 349 | state = READ_ONCE(bts->state); | 
|---|
| 350 | /* | 
|---|
| 351 | * Here we transition from INACTIVE to ACTIVE; | 
|---|
| 352 | * if we instead are STOPPED from the interrupt handler, | 
|---|
| 353 | * stay that way. Can't be ACTIVE here though. | 
|---|
| 354 | */ | 
|---|
| 355 | if (WARN_ON_ONCE(state == BTS_STATE_ACTIVE)) | 
|---|
| 356 | return; | 
|---|
| 357 |  | 
|---|
| 358 | if (state == BTS_STATE_STOPPED) | 
|---|
| 359 | return; | 
|---|
| 360 |  | 
|---|
| 361 | if (bts->handle.event) | 
|---|
| 362 | __bts_event_start(event: bts->handle.event); | 
|---|
| 363 | } | 
|---|
| 364 |  | 
|---|
| 365 | void intel_bts_disable_local(void) | 
|---|
| 366 | { | 
|---|
| 367 | struct bts_ctx *bts; | 
|---|
| 368 |  | 
|---|
| 369 | if (!bts_ctx) | 
|---|
| 370 | return; | 
|---|
| 371 |  | 
|---|
| 372 | bts = this_cpu_ptr(bts_ctx); | 
|---|
| 373 |  | 
|---|
| 374 | /* | 
|---|
| 375 | * Here we transition from ACTIVE to INACTIVE; | 
|---|
| 376 | * do nothing for STOPPED or INACTIVE. | 
|---|
| 377 | */ | 
|---|
| 378 | if (READ_ONCE(bts->state) != BTS_STATE_ACTIVE) | 
|---|
| 379 | return; | 
|---|
| 380 |  | 
|---|
| 381 | if (bts->handle.event) | 
|---|
| 382 | __bts_event_stop(event: bts->handle.event, state: BTS_STATE_INACTIVE); | 
|---|
| 383 | } | 
|---|
| 384 |  | 
|---|
| 385 | static int | 
|---|
| 386 | bts_buffer_reset(struct bts_buffer *bb, struct perf_output_handle *handle) | 
|---|
| 387 | { | 
|---|
| 388 | unsigned long head, space, next_space, pad, gap, skip, wakeup; | 
|---|
| 389 | unsigned int next_buf; | 
|---|
| 390 | struct bts_phys *phys, *next_phys; | 
|---|
| 391 | int ret; | 
|---|
| 392 |  | 
|---|
| 393 | if (bb->snapshot) | 
|---|
| 394 | return 0; | 
|---|
| 395 |  | 
|---|
| 396 | head = handle->head & ((bb->nr_pages << PAGE_SHIFT) - 1); | 
|---|
| 397 |  | 
|---|
| 398 | phys = &bb->buf[bb->cur_buf]; | 
|---|
| 399 | space = phys->offset + phys->displacement + phys->size - head; | 
|---|
| 400 | pad = space; | 
|---|
| 401 | if (space > handle->size) { | 
|---|
| 402 | space = handle->size; | 
|---|
| 403 | space -= space % BTS_RECORD_SIZE; | 
|---|
| 404 | } | 
|---|
| 405 | if (space <= BTS_SAFETY_MARGIN) { | 
|---|
| 406 | /* See if next phys buffer has more space */ | 
|---|
| 407 | next_buf = bb->cur_buf + 1; | 
|---|
| 408 | if (next_buf >= bb->nr_bufs) | 
|---|
| 409 | next_buf = 0; | 
|---|
| 410 | next_phys = &bb->buf[next_buf]; | 
|---|
| 411 | gap = buf_size(page: phys->page) - phys->displacement - phys->size + | 
|---|
| 412 | next_phys->displacement; | 
|---|
| 413 | skip = pad + gap; | 
|---|
| 414 | if (handle->size >= skip) { | 
|---|
| 415 | next_space = next_phys->size; | 
|---|
| 416 | if (next_space + skip > handle->size) { | 
|---|
| 417 | next_space = handle->size - skip; | 
|---|
| 418 | next_space -= next_space % BTS_RECORD_SIZE; | 
|---|
| 419 | } | 
|---|
| 420 | if (next_space > space || !space) { | 
|---|
| 421 | if (pad) | 
|---|
| 422 | bts_buffer_pad_out(phys, head); | 
|---|
| 423 | ret = perf_aux_output_skip(handle, size: skip); | 
|---|
| 424 | if (ret) | 
|---|
| 425 | return ret; | 
|---|
| 426 | /* Advance to next phys buffer */ | 
|---|
| 427 | phys = next_phys; | 
|---|
| 428 | space = next_space; | 
|---|
| 429 | head = phys->offset + phys->displacement; | 
|---|
| 430 | /* | 
|---|
| 431 | * After this, cur_buf and head won't match ds | 
|---|
| 432 | * anymore, so we must not be racing with | 
|---|
| 433 | * bts_update(). | 
|---|
| 434 | */ | 
|---|
| 435 | bb->cur_buf = next_buf; | 
|---|
| 436 | local_set(&bb->head, head); | 
|---|
| 437 | } | 
|---|
| 438 | } | 
|---|
| 439 | } | 
|---|
| 440 |  | 
|---|
| 441 | /* Don't go far beyond wakeup watermark */ | 
|---|
| 442 | wakeup = BTS_SAFETY_MARGIN + BTS_RECORD_SIZE + handle->wakeup - | 
|---|
| 443 | handle->head; | 
|---|
| 444 | if (space > wakeup) { | 
|---|
| 445 | space = wakeup; | 
|---|
| 446 | space -= space % BTS_RECORD_SIZE; | 
|---|
| 447 | } | 
|---|
| 448 |  | 
|---|
| 449 | bb->end = head + space; | 
|---|
| 450 |  | 
|---|
| 451 | /* | 
|---|
| 452 | * If we have no space, the lost notification would have been sent when | 
|---|
| 453 | * we hit absolute_maximum - see bts_update() | 
|---|
| 454 | */ | 
|---|
| 455 | if (!space) | 
|---|
| 456 | return -ENOSPC; | 
|---|
| 457 |  | 
|---|
| 458 | return 0; | 
|---|
| 459 | } | 
|---|
| 460 |  | 
|---|
| 461 | int intel_bts_interrupt(void) | 
|---|
| 462 | { | 
|---|
| 463 | struct debug_store *ds = this_cpu_ptr(&cpu_hw_events)->ds; | 
|---|
| 464 | struct bts_ctx *bts; | 
|---|
| 465 | struct perf_event *event; | 
|---|
| 466 | struct bts_buffer *bb; | 
|---|
| 467 | s64 old_head; | 
|---|
| 468 | int err = -ENOSPC, handled = 0; | 
|---|
| 469 |  | 
|---|
| 470 | if (!bts_ctx) | 
|---|
| 471 | return 0; | 
|---|
| 472 |  | 
|---|
| 473 | bts = this_cpu_ptr(bts_ctx); | 
|---|
| 474 | event = bts->handle.event; | 
|---|
| 475 | /* | 
|---|
| 476 | * The only surefire way of knowing if this NMI is ours is by checking | 
|---|
| 477 | * the write ptr against the PMI threshold. | 
|---|
| 478 | */ | 
|---|
| 479 | if (ds && (ds->bts_index >= ds->bts_interrupt_threshold)) | 
|---|
| 480 | handled = 1; | 
|---|
| 481 |  | 
|---|
| 482 | /* | 
|---|
| 483 | * this is wrapped in intel_bts_enable_local/intel_bts_disable_local, | 
|---|
| 484 | * so we can only be INACTIVE or STOPPED | 
|---|
| 485 | */ | 
|---|
| 486 | if (READ_ONCE(bts->state) == BTS_STATE_STOPPED) | 
|---|
| 487 | return handled; | 
|---|
| 488 |  | 
|---|
| 489 | bb = perf_get_aux(handle: &bts->handle); | 
|---|
| 490 | if (!bb) | 
|---|
| 491 | return handled; | 
|---|
| 492 |  | 
|---|
| 493 | /* | 
|---|
| 494 | * Skip snapshot counters: they don't use the interrupt, but | 
|---|
| 495 | * there's no other way of telling, because the pointer will | 
|---|
| 496 | * keep moving | 
|---|
| 497 | */ | 
|---|
| 498 | if (bb->snapshot) | 
|---|
| 499 | return 0; | 
|---|
| 500 |  | 
|---|
| 501 | old_head = local_read(&bb->head); | 
|---|
| 502 | bts_update(bts); | 
|---|
| 503 |  | 
|---|
| 504 | /* no new data */ | 
|---|
| 505 | if (old_head == local_read(&bb->head)) | 
|---|
| 506 | return handled; | 
|---|
| 507 |  | 
|---|
| 508 | perf_aux_output_end(handle: &bts->handle, size: local_xchg(l: &bb->data_size, n: 0)); | 
|---|
| 509 |  | 
|---|
| 510 | bb = perf_aux_output_begin(handle: &bts->handle, event); | 
|---|
| 511 | if (bb) | 
|---|
| 512 | err = bts_buffer_reset(bb, handle: &bts->handle); | 
|---|
| 513 |  | 
|---|
| 514 | if (err) { | 
|---|
| 515 | WRITE_ONCE(bts->state, BTS_STATE_STOPPED); | 
|---|
| 516 |  | 
|---|
| 517 | if (bb) { | 
|---|
| 518 | /* | 
|---|
| 519 | * BTS_STATE_STOPPED should be visible before | 
|---|
| 520 | * cleared handle::event | 
|---|
| 521 | */ | 
|---|
| 522 | barrier(); | 
|---|
| 523 | perf_aux_output_end(handle: &bts->handle, size: 0); | 
|---|
| 524 | } | 
|---|
| 525 | } | 
|---|
| 526 |  | 
|---|
| 527 | return 1; | 
|---|
| 528 | } | 
|---|
| 529 |  | 
|---|
| 530 | static void bts_event_del(struct perf_event *event, int mode) | 
|---|
| 531 | { | 
|---|
| 532 | bts_event_stop(event, PERF_EF_UPDATE); | 
|---|
| 533 | } | 
|---|
| 534 |  | 
|---|
| 535 | static int bts_event_add(struct perf_event *event, int mode) | 
|---|
| 536 | { | 
|---|
| 537 | struct bts_ctx *bts = this_cpu_ptr(bts_ctx); | 
|---|
| 538 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); | 
|---|
| 539 | struct hw_perf_event *hwc = &event->hw; | 
|---|
| 540 |  | 
|---|
| 541 | event->hw.state = PERF_HES_STOPPED; | 
|---|
| 542 |  | 
|---|
| 543 | if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) | 
|---|
| 544 | return -EBUSY; | 
|---|
| 545 |  | 
|---|
| 546 | if (bts->handle.event) | 
|---|
| 547 | return -EBUSY; | 
|---|
| 548 |  | 
|---|
| 549 | if (mode & PERF_EF_START) { | 
|---|
| 550 | bts_event_start(event, flags: 0); | 
|---|
| 551 | if (hwc->state & PERF_HES_STOPPED) | 
|---|
| 552 | return -EINVAL; | 
|---|
| 553 | } | 
|---|
| 554 |  | 
|---|
| 555 | return 0; | 
|---|
| 556 | } | 
|---|
| 557 |  | 
|---|
| 558 | static void bts_event_destroy(struct perf_event *event) | 
|---|
| 559 | { | 
|---|
| 560 | x86_release_hardware(); | 
|---|
| 561 | x86_del_exclusive(what: x86_lbr_exclusive_bts); | 
|---|
| 562 | } | 
|---|
| 563 |  | 
|---|
| 564 | static int bts_event_init(struct perf_event *event) | 
|---|
| 565 | { | 
|---|
| 566 | int ret; | 
|---|
| 567 |  | 
|---|
| 568 | if (event->attr.type != bts_pmu.type) | 
|---|
| 569 | return -ENOENT; | 
|---|
| 570 |  | 
|---|
| 571 | /* | 
|---|
| 572 | * BTS leaks kernel addresses even when CPL0 tracing is | 
|---|
| 573 | * disabled, so disallow intel_bts driver for unprivileged | 
|---|
| 574 | * users on paranoid systems since it provides trace data | 
|---|
| 575 | * to the user in a zero-copy fashion. | 
|---|
| 576 | */ | 
|---|
| 577 | if (event->attr.exclude_kernel) { | 
|---|
| 578 | ret = perf_allow_kernel(); | 
|---|
| 579 | if (ret) | 
|---|
| 580 | return ret; | 
|---|
| 581 | } | 
|---|
| 582 |  | 
|---|
| 583 | if (x86_add_exclusive(what: x86_lbr_exclusive_bts)) | 
|---|
| 584 | return -EBUSY; | 
|---|
| 585 |  | 
|---|
| 586 | ret = x86_reserve_hardware(); | 
|---|
| 587 | if (ret) { | 
|---|
| 588 | x86_del_exclusive(what: x86_lbr_exclusive_bts); | 
|---|
| 589 | return ret; | 
|---|
| 590 | } | 
|---|
| 591 |  | 
|---|
| 592 | event->destroy = bts_event_destroy; | 
|---|
| 593 |  | 
|---|
| 594 | return 0; | 
|---|
| 595 | } | 
|---|
| 596 |  | 
|---|
| 597 | static void bts_event_read(struct perf_event *event) | 
|---|
| 598 | { | 
|---|
| 599 | } | 
|---|
| 600 |  | 
|---|
| 601 | static __init int bts_init(void) | 
|---|
| 602 | { | 
|---|
| 603 | if (!boot_cpu_has(X86_FEATURE_DTES64)) | 
|---|
| 604 | return -ENODEV; | 
|---|
| 605 |  | 
|---|
| 606 | x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); | 
|---|
| 607 | if (!x86_pmu.bts) | 
|---|
| 608 | return -ENODEV; | 
|---|
| 609 |  | 
|---|
| 610 | if (boot_cpu_has(X86_FEATURE_PTI)) { | 
|---|
| 611 | /* | 
|---|
| 612 | * BTS hardware writes through a virtual memory map we must | 
|---|
| 613 | * either use the kernel physical map, or the user mapping of | 
|---|
| 614 | * the AUX buffer. | 
|---|
| 615 | * | 
|---|
| 616 | * However, since this driver supports per-CPU and per-task inherit | 
|---|
| 617 | * we cannot use the user mapping since it will not be available | 
|---|
| 618 | * if we're not running the owning process. | 
|---|
| 619 | * | 
|---|
| 620 | * With PTI we can't use the kernel map either, because its not | 
|---|
| 621 | * there when we run userspace. | 
|---|
| 622 | * | 
|---|
| 623 | * For now, disable this driver when using PTI. | 
|---|
| 624 | */ | 
|---|
| 625 | return -ENODEV; | 
|---|
| 626 | } | 
|---|
| 627 |  | 
|---|
| 628 | bts_ctx = alloc_percpu(struct bts_ctx); | 
|---|
| 629 | if (!bts_ctx) | 
|---|
| 630 | return -ENOMEM; | 
|---|
| 631 |  | 
|---|
| 632 | bts_pmu.capabilities	= PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE | | 
|---|
| 633 | PERF_PMU_CAP_EXCLUSIVE; | 
|---|
| 634 | bts_pmu.task_ctx_nr	= perf_sw_context; | 
|---|
| 635 | bts_pmu.event_init	= bts_event_init; | 
|---|
| 636 | bts_pmu.add		= bts_event_add; | 
|---|
| 637 | bts_pmu.del		= bts_event_del; | 
|---|
| 638 | bts_pmu.start		= bts_event_start; | 
|---|
| 639 | bts_pmu.stop		= bts_event_stop; | 
|---|
| 640 | bts_pmu.read		= bts_event_read; | 
|---|
| 641 | bts_pmu.setup_aux	= bts_buffer_setup_aux; | 
|---|
| 642 | bts_pmu.free_aux	= bts_buffer_free_aux; | 
|---|
| 643 |  | 
|---|
| 644 | return perf_pmu_register(pmu: &bts_pmu, name: "intel_bts", type: -1); | 
|---|
| 645 | } | 
|---|
| 646 | early_initcall(bts_init); | 
|---|
| 647 |  | 
|---|