| 1 | // SPDX-License-Identifier: GPL-2.0+ | 
|---|
| 2 | /* | 
|---|
| 3 | * Copyright (C) 2007 Alan Stern | 
|---|
| 4 | * Copyright (C) IBM Corporation, 2009 | 
|---|
| 5 | * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com> | 
|---|
| 6 | * | 
|---|
| 7 | * Thanks to Ingo Molnar for his many suggestions. | 
|---|
| 8 | * | 
|---|
| 9 | * Authors: Alan Stern <stern@rowland.harvard.edu> | 
|---|
| 10 | *          K.Prasad <prasad@linux.vnet.ibm.com> | 
|---|
| 11 | *          Frederic Weisbecker <fweisbec@gmail.com> | 
|---|
| 12 | */ | 
|---|
| 13 |  | 
|---|
| 14 | /* | 
|---|
| 15 | * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, | 
|---|
| 16 | * using the CPU's debug registers. | 
|---|
| 17 | * This file contains the arch-independent routines. | 
|---|
| 18 | */ | 
|---|
| 19 |  | 
|---|
| 20 | #include <linux/hw_breakpoint.h> | 
|---|
| 21 |  | 
|---|
| 22 | #include <linux/atomic.h> | 
|---|
| 23 | #include <linux/bug.h> | 
|---|
| 24 | #include <linux/cpu.h> | 
|---|
| 25 | #include <linux/export.h> | 
|---|
| 26 | #include <linux/init.h> | 
|---|
| 27 | #include <linux/irqflags.h> | 
|---|
| 28 | #include <linux/kdebug.h> | 
|---|
| 29 | #include <linux/kernel.h> | 
|---|
| 30 | #include <linux/mutex.h> | 
|---|
| 31 | #include <linux/notifier.h> | 
|---|
| 32 | #include <linux/percpu-rwsem.h> | 
|---|
| 33 | #include <linux/percpu.h> | 
|---|
| 34 | #include <linux/rhashtable.h> | 
|---|
| 35 | #include <linux/sched.h> | 
|---|
| 36 | #include <linux/slab.h> | 
|---|
| 37 |  | 
|---|
| 38 | /* | 
|---|
| 39 | * Datastructure to track the total uses of N slots across tasks or CPUs; | 
|---|
| 40 | * bp_slots_histogram::count[N] is the number of assigned N+1 breakpoint slots. | 
|---|
| 41 | */ | 
|---|
| 42 | struct bp_slots_histogram { | 
|---|
| 43 | #ifdef hw_breakpoint_slots | 
|---|
| 44 | atomic_t count[hw_breakpoint_slots(0)]; | 
|---|
| 45 | #else | 
|---|
| 46 | atomic_t *count; | 
|---|
| 47 | #endif | 
|---|
| 48 | }; | 
|---|
| 49 |  | 
|---|
| 50 | /* | 
|---|
| 51 | * Per-CPU constraints data. | 
|---|
| 52 | */ | 
|---|
| 53 | struct bp_cpuinfo { | 
|---|
| 54 | /* Number of pinned CPU breakpoints in a CPU. */ | 
|---|
| 55 | unsigned int			cpu_pinned; | 
|---|
| 56 | /* Histogram of pinned task breakpoints in a CPU. */ | 
|---|
| 57 | struct bp_slots_histogram	tsk_pinned; | 
|---|
| 58 | }; | 
|---|
| 59 |  | 
|---|
| 60 | static DEFINE_PER_CPU(struct bp_cpuinfo, bp_cpuinfo[TYPE_MAX]); | 
|---|
| 61 |  | 
|---|
| 62 | static struct bp_cpuinfo *get_bp_info(int cpu, enum bp_type_idx type) | 
|---|
| 63 | { | 
|---|
| 64 | return per_cpu_ptr(bp_cpuinfo + type, cpu); | 
|---|
| 65 | } | 
|---|
| 66 |  | 
|---|
| 67 | /* Number of pinned CPU breakpoints globally. */ | 
|---|
| 68 | static struct bp_slots_histogram cpu_pinned[TYPE_MAX]; | 
|---|
| 69 | /* Number of pinned CPU-independent task breakpoints. */ | 
|---|
| 70 | static struct bp_slots_histogram tsk_pinned_all[TYPE_MAX]; | 
|---|
| 71 |  | 
|---|
| 72 | /* Keep track of the breakpoints attached to tasks */ | 
|---|
| 73 | static struct rhltable task_bps_ht; | 
|---|
| 74 | static const struct rhashtable_params task_bps_ht_params = { | 
|---|
| 75 | .head_offset = offsetof(struct hw_perf_event, bp_list), | 
|---|
| 76 | .key_offset = offsetof(struct hw_perf_event, target), | 
|---|
| 77 | .key_len = sizeof_field(struct hw_perf_event, target), | 
|---|
| 78 | .automatic_shrinking = true, | 
|---|
| 79 | }; | 
|---|
| 80 |  | 
|---|
| 81 | static bool constraints_initialized __ro_after_init; | 
|---|
| 82 |  | 
|---|
| 83 | /* | 
|---|
| 84 | * Synchronizes accesses to the per-CPU constraints; the locking rules are: | 
|---|
| 85 | * | 
|---|
| 86 | *  1. Atomic updates to bp_cpuinfo::tsk_pinned only require a held read-lock | 
|---|
| 87 | *     (due to bp_slots_histogram::count being atomic, no update are lost). | 
|---|
| 88 | * | 
|---|
| 89 | *  2. Holding a write-lock is required for computations that require a | 
|---|
| 90 | *     stable snapshot of all bp_cpuinfo::tsk_pinned. | 
|---|
| 91 | * | 
|---|
| 92 | *  3. In all other cases, non-atomic accesses require the appropriately held | 
|---|
| 93 | *     lock (read-lock for read-only accesses; write-lock for reads/writes). | 
|---|
| 94 | */ | 
|---|
| 95 | DEFINE_STATIC_PERCPU_RWSEM(bp_cpuinfo_sem); | 
|---|
| 96 |  | 
|---|
| 97 | /* | 
|---|
| 98 | * Return mutex to serialize accesses to per-task lists in task_bps_ht. Since | 
|---|
| 99 | * rhltable synchronizes concurrent insertions/deletions, independent tasks may | 
|---|
| 100 | * insert/delete concurrently; therefore, a mutex per task is sufficient. | 
|---|
| 101 | * | 
|---|
| 102 | * Uses task_struct::perf_event_mutex, to avoid extending task_struct with a | 
|---|
| 103 | * hw_breakpoint-only mutex, which may be infrequently used. The caveat here is | 
|---|
| 104 | * that hw_breakpoint may contend with per-task perf event list management. The | 
|---|
| 105 | * assumption is that perf usecases involving hw_breakpoints are very unlikely | 
|---|
| 106 | * to result in unnecessary contention. | 
|---|
| 107 | */ | 
|---|
| 108 | static inline struct mutex *get_task_bps_mutex(struct perf_event *bp) | 
|---|
| 109 | { | 
|---|
| 110 | struct task_struct *tsk = bp->hw.target; | 
|---|
| 111 |  | 
|---|
| 112 | return tsk ? &tsk->perf_event_mutex : NULL; | 
|---|
| 113 | } | 
|---|
| 114 |  | 
|---|
| 115 | static struct mutex *bp_constraints_lock(struct perf_event *bp) | 
|---|
| 116 | { | 
|---|
| 117 | struct mutex *tsk_mtx = get_task_bps_mutex(bp); | 
|---|
| 118 |  | 
|---|
| 119 | if (tsk_mtx) { | 
|---|
| 120 | /* | 
|---|
| 121 | * Fully analogous to the perf_try_init_event() nesting | 
|---|
| 122 | * argument in the comment near perf_event_ctx_lock_nested(); | 
|---|
| 123 | * this child->perf_event_mutex cannot ever deadlock against | 
|---|
| 124 | * the parent->perf_event_mutex usage from | 
|---|
| 125 | * perf_event_task_{en,dis}able(). | 
|---|
| 126 | * | 
|---|
| 127 | * Specifically, inherited events will never occur on | 
|---|
| 128 | * ->perf_event_list. | 
|---|
| 129 | */ | 
|---|
| 130 | mutex_lock_nested(tsk_mtx, SINGLE_DEPTH_NESTING); | 
|---|
| 131 | percpu_down_read(sem: &bp_cpuinfo_sem); | 
|---|
| 132 | } else { | 
|---|
| 133 | percpu_down_write(&bp_cpuinfo_sem); | 
|---|
| 134 | } | 
|---|
| 135 |  | 
|---|
| 136 | return tsk_mtx; | 
|---|
| 137 | } | 
|---|
| 138 |  | 
|---|
| 139 | static void bp_constraints_unlock(struct mutex *tsk_mtx) | 
|---|
| 140 | { | 
|---|
| 141 | if (tsk_mtx) { | 
|---|
| 142 | percpu_up_read(sem: &bp_cpuinfo_sem); | 
|---|
| 143 | mutex_unlock(lock: tsk_mtx); | 
|---|
| 144 | } else { | 
|---|
| 145 | percpu_up_write(&bp_cpuinfo_sem); | 
|---|
| 146 | } | 
|---|
| 147 | } | 
|---|
| 148 |  | 
|---|
| 149 | static bool bp_constraints_is_locked(struct perf_event *bp) | 
|---|
| 150 | { | 
|---|
| 151 | struct mutex *tsk_mtx = get_task_bps_mutex(bp); | 
|---|
| 152 |  | 
|---|
| 153 | return percpu_is_write_locked(sem: &bp_cpuinfo_sem) || | 
|---|
| 154 | (tsk_mtx ? mutex_is_locked(lock: tsk_mtx) : | 
|---|
| 155 | percpu_is_read_locked(&bp_cpuinfo_sem)); | 
|---|
| 156 | } | 
|---|
| 157 |  | 
|---|
| 158 | static inline void assert_bp_constraints_lock_held(struct perf_event *bp) | 
|---|
| 159 | { | 
|---|
| 160 | struct mutex *tsk_mtx = get_task_bps_mutex(bp); | 
|---|
| 161 |  | 
|---|
| 162 | if (tsk_mtx) | 
|---|
| 163 | lockdep_assert_held(tsk_mtx); | 
|---|
| 164 | lockdep_assert_held(&bp_cpuinfo_sem); | 
|---|
| 165 | } | 
|---|
| 166 |  | 
|---|
| 167 | #ifdef hw_breakpoint_slots | 
|---|
| 168 | /* | 
|---|
| 169 | * Number of breakpoint slots is constant, and the same for all types. | 
|---|
| 170 | */ | 
|---|
| 171 | static_assert(hw_breakpoint_slots(TYPE_INST) == hw_breakpoint_slots(TYPE_DATA)); | 
|---|
| 172 | static inline int hw_breakpoint_slots_cached(int type)	{ return hw_breakpoint_slots(type); } | 
|---|
| 173 | static inline int init_breakpoint_slots(void)		{ return 0; } | 
|---|
| 174 | #else | 
|---|
| 175 | /* | 
|---|
| 176 | * Dynamic number of breakpoint slots. | 
|---|
| 177 | */ | 
|---|
| 178 | static int __nr_bp_slots[TYPE_MAX] __ro_after_init; | 
|---|
| 179 |  | 
|---|
| 180 | static inline int hw_breakpoint_slots_cached(int type) | 
|---|
| 181 | { | 
|---|
| 182 | return __nr_bp_slots[type]; | 
|---|
| 183 | } | 
|---|
| 184 |  | 
|---|
| 185 | static __init bool | 
|---|
| 186 | bp_slots_histogram_alloc(struct bp_slots_histogram *hist, enum bp_type_idx type) | 
|---|
| 187 | { | 
|---|
| 188 | hist->count = kcalloc(hw_breakpoint_slots_cached(type), sizeof(*hist->count), GFP_KERNEL); | 
|---|
| 189 | return hist->count; | 
|---|
| 190 | } | 
|---|
| 191 |  | 
|---|
| 192 | static __init void bp_slots_histogram_free(struct bp_slots_histogram *hist) | 
|---|
| 193 | { | 
|---|
| 194 | kfree(hist->count); | 
|---|
| 195 | } | 
|---|
| 196 |  | 
|---|
| 197 | static __init int init_breakpoint_slots(void) | 
|---|
| 198 | { | 
|---|
| 199 | int i, cpu, err_cpu; | 
|---|
| 200 |  | 
|---|
| 201 | for (i = 0; i < TYPE_MAX; i++) | 
|---|
| 202 | __nr_bp_slots[i] = hw_breakpoint_slots(i); | 
|---|
| 203 |  | 
|---|
| 204 | for_each_possible_cpu(cpu) { | 
|---|
| 205 | for (i = 0; i < TYPE_MAX; i++) { | 
|---|
| 206 | struct bp_cpuinfo *info = get_bp_info(cpu, i); | 
|---|
| 207 |  | 
|---|
| 208 | if (!bp_slots_histogram_alloc(&info->tsk_pinned, i)) | 
|---|
| 209 | goto err; | 
|---|
| 210 | } | 
|---|
| 211 | } | 
|---|
| 212 | for (i = 0; i < TYPE_MAX; i++) { | 
|---|
| 213 | if (!bp_slots_histogram_alloc(&cpu_pinned[i], i)) | 
|---|
| 214 | goto err; | 
|---|
| 215 | if (!bp_slots_histogram_alloc(&tsk_pinned_all[i], i)) | 
|---|
| 216 | goto err; | 
|---|
| 217 | } | 
|---|
| 218 |  | 
|---|
| 219 | return 0; | 
|---|
| 220 | err: | 
|---|
| 221 | for_each_possible_cpu(err_cpu) { | 
|---|
| 222 | for (i = 0; i < TYPE_MAX; i++) | 
|---|
| 223 | bp_slots_histogram_free(&get_bp_info(err_cpu, i)->tsk_pinned); | 
|---|
| 224 | if (err_cpu == cpu) | 
|---|
| 225 | break; | 
|---|
| 226 | } | 
|---|
| 227 | for (i = 0; i < TYPE_MAX; i++) { | 
|---|
| 228 | bp_slots_histogram_free(&cpu_pinned[i]); | 
|---|
| 229 | bp_slots_histogram_free(&tsk_pinned_all[i]); | 
|---|
| 230 | } | 
|---|
| 231 |  | 
|---|
| 232 | return -ENOMEM; | 
|---|
| 233 | } | 
|---|
| 234 | #endif | 
|---|
| 235 |  | 
|---|
| 236 | static inline void | 
|---|
| 237 | bp_slots_histogram_add(struct bp_slots_histogram *hist, int old, int val) | 
|---|
| 238 | { | 
|---|
| 239 | const int old_idx = old - 1; | 
|---|
| 240 | const int new_idx = old_idx + val; | 
|---|
| 241 |  | 
|---|
| 242 | if (old_idx >= 0) | 
|---|
| 243 | WARN_ON(atomic_dec_return_relaxed(&hist->count[old_idx]) < 0); | 
|---|
| 244 | if (new_idx >= 0) | 
|---|
| 245 | WARN_ON(atomic_inc_return_relaxed(&hist->count[new_idx]) < 0); | 
|---|
| 246 | } | 
|---|
| 247 |  | 
|---|
| 248 | static int | 
|---|
| 249 | bp_slots_histogram_max(struct bp_slots_histogram *hist, enum bp_type_idx type) | 
|---|
| 250 | { | 
|---|
| 251 | for (int i = hw_breakpoint_slots_cached(type) - 1; i >= 0; i--) { | 
|---|
| 252 | const int count = atomic_read(v: &hist->count[i]); | 
|---|
| 253 |  | 
|---|
| 254 | /* Catch unexpected writers; we want a stable snapshot. */ | 
|---|
| 255 | ASSERT_EXCLUSIVE_WRITER(hist->count[i]); | 
|---|
| 256 | if (count > 0) | 
|---|
| 257 | return i + 1; | 
|---|
| 258 | WARN(count < 0, "inconsistent breakpoint slots histogram"); | 
|---|
| 259 | } | 
|---|
| 260 |  | 
|---|
| 261 | return 0; | 
|---|
| 262 | } | 
|---|
| 263 |  | 
|---|
| 264 | static int | 
|---|
| 265 | bp_slots_histogram_max_merge(struct bp_slots_histogram *hist1, struct bp_slots_histogram *hist2, | 
|---|
| 266 | enum bp_type_idx type) | 
|---|
| 267 | { | 
|---|
| 268 | for (int i = hw_breakpoint_slots_cached(type) - 1; i >= 0; i--) { | 
|---|
| 269 | const int count1 = atomic_read(v: &hist1->count[i]); | 
|---|
| 270 | const int count2 = atomic_read(v: &hist2->count[i]); | 
|---|
| 271 |  | 
|---|
| 272 | /* Catch unexpected writers; we want a stable snapshot. */ | 
|---|
| 273 | ASSERT_EXCLUSIVE_WRITER(hist1->count[i]); | 
|---|
| 274 | ASSERT_EXCLUSIVE_WRITER(hist2->count[i]); | 
|---|
| 275 | if (count1 + count2 > 0) | 
|---|
| 276 | return i + 1; | 
|---|
| 277 | WARN(count1 < 0, "inconsistent breakpoint slots histogram"); | 
|---|
| 278 | WARN(count2 < 0, "inconsistent breakpoint slots histogram"); | 
|---|
| 279 | } | 
|---|
| 280 |  | 
|---|
| 281 | return 0; | 
|---|
| 282 | } | 
|---|
| 283 |  | 
|---|
| 284 | #ifndef hw_breakpoint_weight | 
|---|
| 285 | static inline int hw_breakpoint_weight(struct perf_event *bp) | 
|---|
| 286 | { | 
|---|
| 287 | return 1; | 
|---|
| 288 | } | 
|---|
| 289 | #endif | 
|---|
| 290 |  | 
|---|
| 291 | static inline enum bp_type_idx find_slot_idx(u64 bp_type) | 
|---|
| 292 | { | 
|---|
| 293 | if (bp_type & HW_BREAKPOINT_RW) | 
|---|
| 294 | return TYPE_DATA; | 
|---|
| 295 |  | 
|---|
| 296 | return TYPE_INST; | 
|---|
| 297 | } | 
|---|
| 298 |  | 
|---|
| 299 | /* | 
|---|
| 300 | * Return the maximum number of pinned breakpoints a task has in this CPU. | 
|---|
| 301 | */ | 
|---|
| 302 | static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type) | 
|---|
| 303 | { | 
|---|
| 304 | struct bp_slots_histogram *tsk_pinned = &get_bp_info(cpu, type)->tsk_pinned; | 
|---|
| 305 |  | 
|---|
| 306 | /* | 
|---|
| 307 | * At this point we want to have acquired the bp_cpuinfo_sem as a | 
|---|
| 308 | * writer to ensure that there are no concurrent writers in | 
|---|
| 309 | * toggle_bp_task_slot() to tsk_pinned, and we get a stable snapshot. | 
|---|
| 310 | */ | 
|---|
| 311 | lockdep_assert_held_write(&bp_cpuinfo_sem); | 
|---|
| 312 | return bp_slots_histogram_max_merge(hist1: tsk_pinned, hist2: &tsk_pinned_all[type], type); | 
|---|
| 313 | } | 
|---|
| 314 |  | 
|---|
| 315 | /* | 
|---|
| 316 | * Count the number of breakpoints of the same type and same task. | 
|---|
| 317 | * The given event must be not on the list. | 
|---|
| 318 | * | 
|---|
| 319 | * If @cpu is -1, but the result of task_bp_pinned() is not CPU-independent, | 
|---|
| 320 | * returns a negative value. | 
|---|
| 321 | */ | 
|---|
| 322 | static int task_bp_pinned(int cpu, struct perf_event *bp, enum bp_type_idx type) | 
|---|
| 323 | { | 
|---|
| 324 | struct rhlist_head *head, *pos; | 
|---|
| 325 | struct perf_event *iter; | 
|---|
| 326 | int count = 0; | 
|---|
| 327 |  | 
|---|
| 328 | /* | 
|---|
| 329 | * We need a stable snapshot of the per-task breakpoint list. | 
|---|
| 330 | */ | 
|---|
| 331 | assert_bp_constraints_lock_held(bp); | 
|---|
| 332 |  | 
|---|
| 333 | rcu_read_lock(); | 
|---|
| 334 | head = rhltable_lookup(hlt: &task_bps_ht, key: &bp->hw.target, params: task_bps_ht_params); | 
|---|
| 335 | if (!head) | 
|---|
| 336 | goto out; | 
|---|
| 337 |  | 
|---|
| 338 | rhl_for_each_entry_rcu(iter, pos, head, hw.bp_list) { | 
|---|
| 339 | if (find_slot_idx(bp_type: iter->attr.bp_type) != type) | 
|---|
| 340 | continue; | 
|---|
| 341 |  | 
|---|
| 342 | if (iter->cpu >= 0) { | 
|---|
| 343 | if (cpu == -1) { | 
|---|
| 344 | count = -1; | 
|---|
| 345 | goto out; | 
|---|
| 346 | } else if (cpu != iter->cpu) | 
|---|
| 347 | continue; | 
|---|
| 348 | } | 
|---|
| 349 |  | 
|---|
| 350 | count += hw_breakpoint_weight(bp: iter); | 
|---|
| 351 | } | 
|---|
| 352 |  | 
|---|
| 353 | out: | 
|---|
| 354 | rcu_read_unlock(); | 
|---|
| 355 | return count; | 
|---|
| 356 | } | 
|---|
| 357 |  | 
|---|
| 358 | static const struct cpumask *cpumask_of_bp(struct perf_event *bp) | 
|---|
| 359 | { | 
|---|
| 360 | if (bp->cpu >= 0) | 
|---|
| 361 | return cpumask_of(bp->cpu); | 
|---|
| 362 | return cpu_possible_mask; | 
|---|
| 363 | } | 
|---|
| 364 |  | 
|---|
| 365 | /* | 
|---|
| 366 | * Returns the max pinned breakpoint slots in a given | 
|---|
| 367 | * CPU (cpu > -1) or across all of them (cpu = -1). | 
|---|
| 368 | */ | 
|---|
| 369 | static int | 
|---|
| 370 | max_bp_pinned_slots(struct perf_event *bp, enum bp_type_idx type) | 
|---|
| 371 | { | 
|---|
| 372 | const struct cpumask *cpumask = cpumask_of_bp(bp); | 
|---|
| 373 | int pinned_slots = 0; | 
|---|
| 374 | int cpu; | 
|---|
| 375 |  | 
|---|
| 376 | if (bp->hw.target && bp->cpu < 0) { | 
|---|
| 377 | int max_pinned = task_bp_pinned(cpu: -1, bp, type); | 
|---|
| 378 |  | 
|---|
| 379 | if (max_pinned >= 0) { | 
|---|
| 380 | /* | 
|---|
| 381 | * Fast path: task_bp_pinned() is CPU-independent and | 
|---|
| 382 | * returns the same value for any CPU. | 
|---|
| 383 | */ | 
|---|
| 384 | max_pinned += bp_slots_histogram_max(hist: &cpu_pinned[type], type); | 
|---|
| 385 | return max_pinned; | 
|---|
| 386 | } | 
|---|
| 387 | } | 
|---|
| 388 |  | 
|---|
| 389 | for_each_cpu(cpu, cpumask) { | 
|---|
| 390 | struct bp_cpuinfo *info = get_bp_info(cpu, type); | 
|---|
| 391 | int nr; | 
|---|
| 392 |  | 
|---|
| 393 | nr = info->cpu_pinned; | 
|---|
| 394 | if (!bp->hw.target) | 
|---|
| 395 | nr += max_task_bp_pinned(cpu, type); | 
|---|
| 396 | else | 
|---|
| 397 | nr += task_bp_pinned(cpu, bp, type); | 
|---|
| 398 |  | 
|---|
| 399 | pinned_slots = max(nr, pinned_slots); | 
|---|
| 400 | } | 
|---|
| 401 |  | 
|---|
| 402 | return pinned_slots; | 
|---|
| 403 | } | 
|---|
| 404 |  | 
|---|
| 405 | /* | 
|---|
| 406 | * Add/remove the given breakpoint in our constraint table | 
|---|
| 407 | */ | 
|---|
| 408 | static int | 
|---|
| 409 | toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type, int weight) | 
|---|
| 410 | { | 
|---|
| 411 | int cpu, next_tsk_pinned; | 
|---|
| 412 |  | 
|---|
| 413 | if (!enable) | 
|---|
| 414 | weight = -weight; | 
|---|
| 415 |  | 
|---|
| 416 | if (!bp->hw.target) { | 
|---|
| 417 | /* | 
|---|
| 418 | * Update the pinned CPU slots, in per-CPU bp_cpuinfo and in the | 
|---|
| 419 | * global histogram. | 
|---|
| 420 | */ | 
|---|
| 421 | struct bp_cpuinfo *info = get_bp_info(cpu: bp->cpu, type); | 
|---|
| 422 |  | 
|---|
| 423 | lockdep_assert_held_write(&bp_cpuinfo_sem); | 
|---|
| 424 | bp_slots_histogram_add(hist: &cpu_pinned[type], old: info->cpu_pinned, val: weight); | 
|---|
| 425 | info->cpu_pinned += weight; | 
|---|
| 426 | return 0; | 
|---|
| 427 | } | 
|---|
| 428 |  | 
|---|
| 429 | /* | 
|---|
| 430 | * If bp->hw.target, tsk_pinned is only modified, but not used | 
|---|
| 431 | * otherwise. We can permit concurrent updates as long as there are no | 
|---|
| 432 | * other uses: having acquired bp_cpuinfo_sem as a reader allows | 
|---|
| 433 | * concurrent updates here. Uses of tsk_pinned will require acquiring | 
|---|
| 434 | * bp_cpuinfo_sem as a writer to stabilize tsk_pinned's value. | 
|---|
| 435 | */ | 
|---|
| 436 | lockdep_assert_held_read(&bp_cpuinfo_sem); | 
|---|
| 437 |  | 
|---|
| 438 | /* | 
|---|
| 439 | * Update the pinned task slots, in per-CPU bp_cpuinfo and in the global | 
|---|
| 440 | * histogram. We need to take care of 4 cases: | 
|---|
| 441 | * | 
|---|
| 442 | *  1. This breakpoint targets all CPUs (cpu < 0), and there may only | 
|---|
| 443 | *     exist other task breakpoints targeting all CPUs. In this case we | 
|---|
| 444 | *     can simply update the global slots histogram. | 
|---|
| 445 | * | 
|---|
| 446 | *  2. This breakpoint targets a specific CPU (cpu >= 0), but there may | 
|---|
| 447 | *     only exist other task breakpoints targeting all CPUs. | 
|---|
| 448 | * | 
|---|
| 449 | *     a. On enable: remove the existing breakpoints from the global | 
|---|
| 450 | *        slots histogram and use the per-CPU histogram. | 
|---|
| 451 | * | 
|---|
| 452 | *     b. On disable: re-insert the existing breakpoints into the global | 
|---|
| 453 | *        slots histogram and remove from per-CPU histogram. | 
|---|
| 454 | * | 
|---|
| 455 | *  3. Some other existing task breakpoints target specific CPUs. Only | 
|---|
| 456 | *     update the per-CPU slots histogram. | 
|---|
| 457 | */ | 
|---|
| 458 |  | 
|---|
| 459 | if (!enable) { | 
|---|
| 460 | /* | 
|---|
| 461 | * Remove before updating histograms so we can determine if this | 
|---|
| 462 | * was the last task breakpoint for a specific CPU. | 
|---|
| 463 | */ | 
|---|
| 464 | int ret = rhltable_remove(hlt: &task_bps_ht, list: &bp->hw.bp_list, params: task_bps_ht_params); | 
|---|
| 465 |  | 
|---|
| 466 | if (ret) | 
|---|
| 467 | return ret; | 
|---|
| 468 | } | 
|---|
| 469 | /* | 
|---|
| 470 | * Note: If !enable, next_tsk_pinned will not count the to-be-removed breakpoint. | 
|---|
| 471 | */ | 
|---|
| 472 | next_tsk_pinned = task_bp_pinned(cpu: -1, bp, type); | 
|---|
| 473 |  | 
|---|
| 474 | if (next_tsk_pinned >= 0) { | 
|---|
| 475 | if (bp->cpu < 0) { /* Case 1: fast path */ | 
|---|
| 476 | if (!enable) | 
|---|
| 477 | next_tsk_pinned += hw_breakpoint_weight(bp); | 
|---|
| 478 | bp_slots_histogram_add(hist: &tsk_pinned_all[type], old: next_tsk_pinned, val: weight); | 
|---|
| 479 | } else if (enable) { /* Case 2.a: slow path */ | 
|---|
| 480 | /* Add existing to per-CPU histograms. */ | 
|---|
| 481 | for_each_possible_cpu(cpu) { | 
|---|
| 482 | bp_slots_histogram_add(hist: &get_bp_info(cpu, type)->tsk_pinned, | 
|---|
| 483 | old: 0, val: next_tsk_pinned); | 
|---|
| 484 | } | 
|---|
| 485 | /* Add this first CPU-pinned task breakpoint. */ | 
|---|
| 486 | bp_slots_histogram_add(hist: &get_bp_info(cpu: bp->cpu, type)->tsk_pinned, | 
|---|
| 487 | old: next_tsk_pinned, val: weight); | 
|---|
| 488 | /* Rebalance global task pinned histogram. */ | 
|---|
| 489 | bp_slots_histogram_add(hist: &tsk_pinned_all[type], old: next_tsk_pinned, | 
|---|
| 490 | val: -next_tsk_pinned); | 
|---|
| 491 | } else { /* Case 2.b: slow path */ | 
|---|
| 492 | /* Remove this last CPU-pinned task breakpoint. */ | 
|---|
| 493 | bp_slots_histogram_add(hist: &get_bp_info(cpu: bp->cpu, type)->tsk_pinned, | 
|---|
| 494 | old: next_tsk_pinned + hw_breakpoint_weight(bp), val: weight); | 
|---|
| 495 | /* Remove all from per-CPU histograms. */ | 
|---|
| 496 | for_each_possible_cpu(cpu) { | 
|---|
| 497 | bp_slots_histogram_add(hist: &get_bp_info(cpu, type)->tsk_pinned, | 
|---|
| 498 | old: next_tsk_pinned, val: -next_tsk_pinned); | 
|---|
| 499 | } | 
|---|
| 500 | /* Rebalance global task pinned histogram. */ | 
|---|
| 501 | bp_slots_histogram_add(hist: &tsk_pinned_all[type], old: 0, val: next_tsk_pinned); | 
|---|
| 502 | } | 
|---|
| 503 | } else { /* Case 3: slow path */ | 
|---|
| 504 | const struct cpumask *cpumask = cpumask_of_bp(bp); | 
|---|
| 505 |  | 
|---|
| 506 | for_each_cpu(cpu, cpumask) { | 
|---|
| 507 | next_tsk_pinned = task_bp_pinned(cpu, bp, type); | 
|---|
| 508 | if (!enable) | 
|---|
| 509 | next_tsk_pinned += hw_breakpoint_weight(bp); | 
|---|
| 510 | bp_slots_histogram_add(hist: &get_bp_info(cpu, type)->tsk_pinned, | 
|---|
| 511 | old: next_tsk_pinned, val: weight); | 
|---|
| 512 | } | 
|---|
| 513 | } | 
|---|
| 514 |  | 
|---|
| 515 | /* | 
|---|
| 516 | * Readers want a stable snapshot of the per-task breakpoint list. | 
|---|
| 517 | */ | 
|---|
| 518 | assert_bp_constraints_lock_held(bp); | 
|---|
| 519 |  | 
|---|
| 520 | if (enable) | 
|---|
| 521 | return rhltable_insert(hlt: &task_bps_ht, list: &bp->hw.bp_list, params: task_bps_ht_params); | 
|---|
| 522 |  | 
|---|
| 523 | return 0; | 
|---|
| 524 | } | 
|---|
| 525 |  | 
|---|
| 526 | /* | 
|---|
| 527 | * Constraints to check before allowing this new breakpoint counter. | 
|---|
| 528 | * | 
|---|
| 529 | * Note: Flexible breakpoints are currently unimplemented, but outlined in the | 
|---|
| 530 | * below algorithm for completeness.  The implementation treats flexible as | 
|---|
| 531 | * pinned due to no guarantee that we currently always schedule flexible events | 
|---|
| 532 | * before a pinned event in a same CPU. | 
|---|
| 533 | * | 
|---|
| 534 | *  == Non-pinned counter == (Considered as pinned for now) | 
|---|
| 535 | * | 
|---|
| 536 | *   - If attached to a single cpu, check: | 
|---|
| 537 | * | 
|---|
| 538 | *       (per_cpu(info->flexible, cpu) || (per_cpu(info->cpu_pinned, cpu) | 
|---|
| 539 | *           + max(per_cpu(info->tsk_pinned, cpu)))) < HBP_NUM | 
|---|
| 540 | * | 
|---|
| 541 | *       -> If there are already non-pinned counters in this cpu, it means | 
|---|
| 542 | *          there is already a free slot for them. | 
|---|
| 543 | *          Otherwise, we check that the maximum number of per task | 
|---|
| 544 | *          breakpoints (for this cpu) plus the number of per cpu breakpoint | 
|---|
| 545 | *          (for this cpu) doesn't cover every registers. | 
|---|
| 546 | * | 
|---|
| 547 | *   - If attached to every cpus, check: | 
|---|
| 548 | * | 
|---|
| 549 | *       (per_cpu(info->flexible, *) || (max(per_cpu(info->cpu_pinned, *)) | 
|---|
| 550 | *           + max(per_cpu(info->tsk_pinned, *)))) < HBP_NUM | 
|---|
| 551 | * | 
|---|
| 552 | *       -> This is roughly the same, except we check the number of per cpu | 
|---|
| 553 | *          bp for every cpu and we keep the max one. Same for the per tasks | 
|---|
| 554 | *          breakpoints. | 
|---|
| 555 | * | 
|---|
| 556 | * | 
|---|
| 557 | * == Pinned counter == | 
|---|
| 558 | * | 
|---|
| 559 | *   - If attached to a single cpu, check: | 
|---|
| 560 | * | 
|---|
| 561 | *       ((per_cpu(info->flexible, cpu) > 1) + per_cpu(info->cpu_pinned, cpu) | 
|---|
| 562 | *            + max(per_cpu(info->tsk_pinned, cpu))) < HBP_NUM | 
|---|
| 563 | * | 
|---|
| 564 | *       -> Same checks as before. But now the info->flexible, if any, must keep | 
|---|
| 565 | *          one register at least (or they will never be fed). | 
|---|
| 566 | * | 
|---|
| 567 | *   - If attached to every cpus, check: | 
|---|
| 568 | * | 
|---|
| 569 | *       ((per_cpu(info->flexible, *) > 1) + max(per_cpu(info->cpu_pinned, *)) | 
|---|
| 570 | *            + max(per_cpu(info->tsk_pinned, *))) < HBP_NUM | 
|---|
| 571 | */ | 
|---|
| 572 | static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type) | 
|---|
| 573 | { | 
|---|
| 574 | enum bp_type_idx type; | 
|---|
| 575 | int max_pinned_slots; | 
|---|
| 576 | int weight; | 
|---|
| 577 |  | 
|---|
| 578 | /* We couldn't initialize breakpoint constraints on boot */ | 
|---|
| 579 | if (!constraints_initialized) | 
|---|
| 580 | return -ENOMEM; | 
|---|
| 581 |  | 
|---|
| 582 | /* Basic checks */ | 
|---|
| 583 | if (bp_type == HW_BREAKPOINT_EMPTY || | 
|---|
| 584 | bp_type == HW_BREAKPOINT_INVALID) | 
|---|
| 585 | return -EINVAL; | 
|---|
| 586 |  | 
|---|
| 587 | type = find_slot_idx(bp_type); | 
|---|
| 588 | weight = hw_breakpoint_weight(bp); | 
|---|
| 589 |  | 
|---|
| 590 | /* Check if this new breakpoint can be satisfied across all CPUs. */ | 
|---|
| 591 | max_pinned_slots = max_bp_pinned_slots(bp, type) + weight; | 
|---|
| 592 | if (max_pinned_slots > hw_breakpoint_slots_cached(type)) | 
|---|
| 593 | return -ENOSPC; | 
|---|
| 594 |  | 
|---|
| 595 | return toggle_bp_slot(bp, enable: true, type, weight); | 
|---|
| 596 | } | 
|---|
| 597 |  | 
|---|
| 598 | int reserve_bp_slot(struct perf_event *bp) | 
|---|
| 599 | { | 
|---|
| 600 | struct mutex *mtx = bp_constraints_lock(bp); | 
|---|
| 601 | int ret = __reserve_bp_slot(bp, bp_type: bp->attr.bp_type); | 
|---|
| 602 |  | 
|---|
| 603 | bp_constraints_unlock(tsk_mtx: mtx); | 
|---|
| 604 | return ret; | 
|---|
| 605 | } | 
|---|
| 606 |  | 
|---|
| 607 | static void __release_bp_slot(struct perf_event *bp, u64 bp_type) | 
|---|
| 608 | { | 
|---|
| 609 | enum bp_type_idx type; | 
|---|
| 610 | int weight; | 
|---|
| 611 |  | 
|---|
| 612 | type = find_slot_idx(bp_type); | 
|---|
| 613 | weight = hw_breakpoint_weight(bp); | 
|---|
| 614 | WARN_ON(toggle_bp_slot(bp, false, type, weight)); | 
|---|
| 615 | } | 
|---|
| 616 |  | 
|---|
| 617 | void release_bp_slot(struct perf_event *bp) | 
|---|
| 618 | { | 
|---|
| 619 | struct mutex *mtx = bp_constraints_lock(bp); | 
|---|
| 620 |  | 
|---|
| 621 | __release_bp_slot(bp, bp_type: bp->attr.bp_type); | 
|---|
| 622 | bp_constraints_unlock(tsk_mtx: mtx); | 
|---|
| 623 | } | 
|---|
| 624 |  | 
|---|
| 625 | static int __modify_bp_slot(struct perf_event *bp, u64 old_type, u64 new_type) | 
|---|
| 626 | { | 
|---|
| 627 | int err; | 
|---|
| 628 |  | 
|---|
| 629 | __release_bp_slot(bp, bp_type: old_type); | 
|---|
| 630 |  | 
|---|
| 631 | err = __reserve_bp_slot(bp, bp_type: new_type); | 
|---|
| 632 | if (err) { | 
|---|
| 633 | /* | 
|---|
| 634 | * Reserve the old_type slot back in case | 
|---|
| 635 | * there's no space for the new type. | 
|---|
| 636 | * | 
|---|
| 637 | * This must succeed, because we just released | 
|---|
| 638 | * the old_type slot in the __release_bp_slot | 
|---|
| 639 | * call above. If not, something is broken. | 
|---|
| 640 | */ | 
|---|
| 641 | WARN_ON(__reserve_bp_slot(bp, old_type)); | 
|---|
| 642 | } | 
|---|
| 643 |  | 
|---|
| 644 | return err; | 
|---|
| 645 | } | 
|---|
| 646 |  | 
|---|
| 647 | static int modify_bp_slot(struct perf_event *bp, u64 old_type, u64 new_type) | 
|---|
| 648 | { | 
|---|
| 649 | struct mutex *mtx = bp_constraints_lock(bp); | 
|---|
| 650 | int ret = __modify_bp_slot(bp, old_type, new_type); | 
|---|
| 651 |  | 
|---|
| 652 | bp_constraints_unlock(tsk_mtx: mtx); | 
|---|
| 653 | return ret; | 
|---|
| 654 | } | 
|---|
| 655 |  | 
|---|
| 656 | /* | 
|---|
| 657 | * Allow the kernel debugger to reserve breakpoint slots without | 
|---|
| 658 | * taking a lock using the dbg_* variant of for the reserve and | 
|---|
| 659 | * release breakpoint slots. | 
|---|
| 660 | */ | 
|---|
| 661 | int dbg_reserve_bp_slot(struct perf_event *bp) | 
|---|
| 662 | { | 
|---|
| 663 | int ret; | 
|---|
| 664 |  | 
|---|
| 665 | if (bp_constraints_is_locked(bp)) | 
|---|
| 666 | return -1; | 
|---|
| 667 |  | 
|---|
| 668 | /* Locks aren't held; disable lockdep assert checking. */ | 
|---|
| 669 | lockdep_off(); | 
|---|
| 670 | ret = __reserve_bp_slot(bp, bp_type: bp->attr.bp_type); | 
|---|
| 671 | lockdep_on(); | 
|---|
| 672 |  | 
|---|
| 673 | return ret; | 
|---|
| 674 | } | 
|---|
| 675 |  | 
|---|
| 676 | int dbg_release_bp_slot(struct perf_event *bp) | 
|---|
| 677 | { | 
|---|
| 678 | if (bp_constraints_is_locked(bp)) | 
|---|
| 679 | return -1; | 
|---|
| 680 |  | 
|---|
| 681 | /* Locks aren't held; disable lockdep assert checking. */ | 
|---|
| 682 | lockdep_off(); | 
|---|
| 683 | __release_bp_slot(bp, bp_type: bp->attr.bp_type); | 
|---|
| 684 | lockdep_on(); | 
|---|
| 685 |  | 
|---|
| 686 | return 0; | 
|---|
| 687 | } | 
|---|
| 688 |  | 
|---|
| 689 | static int hw_breakpoint_parse(struct perf_event *bp, | 
|---|
| 690 | const struct perf_event_attr *attr, | 
|---|
| 691 | struct arch_hw_breakpoint *hw) | 
|---|
| 692 | { | 
|---|
| 693 | int err; | 
|---|
| 694 |  | 
|---|
| 695 | err = hw_breakpoint_arch_parse(bp, attr, hw); | 
|---|
| 696 | if (err) | 
|---|
| 697 | return err; | 
|---|
| 698 |  | 
|---|
| 699 | if (arch_check_bp_in_kernelspace(hw)) { | 
|---|
| 700 | if (attr->exclude_kernel) | 
|---|
| 701 | return -EINVAL; | 
|---|
| 702 | /* | 
|---|
| 703 | * Don't let unprivileged users set a breakpoint in the trap | 
|---|
| 704 | * path to avoid trap recursion attacks. | 
|---|
| 705 | */ | 
|---|
| 706 | if (!capable(CAP_SYS_ADMIN)) | 
|---|
| 707 | return -EPERM; | 
|---|
| 708 | } | 
|---|
| 709 |  | 
|---|
| 710 | return 0; | 
|---|
| 711 | } | 
|---|
| 712 |  | 
|---|
| 713 | int register_perf_hw_breakpoint(struct perf_event *bp) | 
|---|
| 714 | { | 
|---|
| 715 | struct arch_hw_breakpoint hw = { }; | 
|---|
| 716 | int err; | 
|---|
| 717 |  | 
|---|
| 718 | err = reserve_bp_slot(bp); | 
|---|
| 719 | if (err) | 
|---|
| 720 | return err; | 
|---|
| 721 |  | 
|---|
| 722 | err = hw_breakpoint_parse(bp, attr: &bp->attr, hw: &hw); | 
|---|
| 723 | if (err) { | 
|---|
| 724 | release_bp_slot(bp); | 
|---|
| 725 | return err; | 
|---|
| 726 | } | 
|---|
| 727 |  | 
|---|
| 728 | bp->hw.info = hw; | 
|---|
| 729 |  | 
|---|
| 730 | return 0; | 
|---|
| 731 | } | 
|---|
| 732 |  | 
|---|
| 733 | /** | 
|---|
| 734 | * register_user_hw_breakpoint - register a hardware breakpoint for user space | 
|---|
| 735 | * @attr: breakpoint attributes | 
|---|
| 736 | * @triggered: callback to trigger when we hit the breakpoint | 
|---|
| 737 | * @context: context data could be used in the triggered callback | 
|---|
| 738 | * @tsk: pointer to 'task_struct' of the process to which the address belongs | 
|---|
| 739 | */ | 
|---|
| 740 | struct perf_event * | 
|---|
| 741 | register_user_hw_breakpoint(struct perf_event_attr *attr, | 
|---|
| 742 | perf_overflow_handler_t triggered, | 
|---|
| 743 | void *context, | 
|---|
| 744 | struct task_struct *tsk) | 
|---|
| 745 | { | 
|---|
| 746 | return perf_event_create_kernel_counter(attr, cpu: -1, task: tsk, callback: triggered, | 
|---|
| 747 | context); | 
|---|
| 748 | } | 
|---|
| 749 | EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); | 
|---|
| 750 |  | 
|---|
| 751 | static void hw_breakpoint_copy_attr(struct perf_event_attr *to, | 
|---|
| 752 | struct perf_event_attr *from) | 
|---|
| 753 | { | 
|---|
| 754 | to->bp_addr = from->bp_addr; | 
|---|
| 755 | to->bp_type = from->bp_type; | 
|---|
| 756 | to->bp_len  = from->bp_len; | 
|---|
| 757 | to->disabled = from->disabled; | 
|---|
| 758 | } | 
|---|
| 759 |  | 
|---|
| 760 | int | 
|---|
| 761 | modify_user_hw_breakpoint_check(struct perf_event *bp, struct perf_event_attr *attr, | 
|---|
| 762 | bool check) | 
|---|
| 763 | { | 
|---|
| 764 | struct arch_hw_breakpoint hw = { }; | 
|---|
| 765 | int err; | 
|---|
| 766 |  | 
|---|
| 767 | err = hw_breakpoint_parse(bp, attr, hw: &hw); | 
|---|
| 768 | if (err) | 
|---|
| 769 | return err; | 
|---|
| 770 |  | 
|---|
| 771 | if (check) { | 
|---|
| 772 | struct perf_event_attr old_attr; | 
|---|
| 773 |  | 
|---|
| 774 | old_attr = bp->attr; | 
|---|
| 775 | hw_breakpoint_copy_attr(to: &old_attr, from: attr); | 
|---|
| 776 | if (memcmp(&old_attr, attr, sizeof(*attr))) | 
|---|
| 777 | return -EINVAL; | 
|---|
| 778 | } | 
|---|
| 779 |  | 
|---|
| 780 | if (bp->attr.bp_type != attr->bp_type) { | 
|---|
| 781 | err = modify_bp_slot(bp, old_type: bp->attr.bp_type, new_type: attr->bp_type); | 
|---|
| 782 | if (err) | 
|---|
| 783 | return err; | 
|---|
| 784 | } | 
|---|
| 785 |  | 
|---|
| 786 | hw_breakpoint_copy_attr(to: &bp->attr, from: attr); | 
|---|
| 787 | bp->hw.info = hw; | 
|---|
| 788 |  | 
|---|
| 789 | return 0; | 
|---|
| 790 | } | 
|---|
| 791 |  | 
|---|
| 792 | /** | 
|---|
| 793 | * modify_user_hw_breakpoint - modify a user-space hardware breakpoint | 
|---|
| 794 | * @bp: the breakpoint structure to modify | 
|---|
| 795 | * @attr: new breakpoint attributes | 
|---|
| 796 | */ | 
|---|
| 797 | int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr) | 
|---|
| 798 | { | 
|---|
| 799 | int err; | 
|---|
| 800 |  | 
|---|
| 801 | /* | 
|---|
| 802 | * modify_user_hw_breakpoint can be invoked with IRQs disabled and hence it | 
|---|
| 803 | * will not be possible to raise IPIs that invoke __perf_event_disable. | 
|---|
| 804 | * So call the function directly after making sure we are targeting the | 
|---|
| 805 | * current task. | 
|---|
| 806 | */ | 
|---|
| 807 | if (irqs_disabled() && bp->ctx && bp->ctx->task == current) | 
|---|
| 808 | perf_event_disable_local(event: bp); | 
|---|
| 809 | else | 
|---|
| 810 | perf_event_disable(event: bp); | 
|---|
| 811 |  | 
|---|
| 812 | err = modify_user_hw_breakpoint_check(bp, attr, check: false); | 
|---|
| 813 |  | 
|---|
| 814 | if (!bp->attr.disabled) | 
|---|
| 815 | perf_event_enable(event: bp); | 
|---|
| 816 |  | 
|---|
| 817 | return err; | 
|---|
| 818 | } | 
|---|
| 819 | EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); | 
|---|
| 820 |  | 
|---|
| 821 | /** | 
|---|
| 822 | * unregister_hw_breakpoint - unregister a user-space hardware breakpoint | 
|---|
| 823 | * @bp: the breakpoint structure to unregister | 
|---|
| 824 | */ | 
|---|
| 825 | void unregister_hw_breakpoint(struct perf_event *bp) | 
|---|
| 826 | { | 
|---|
| 827 | if (!bp) | 
|---|
| 828 | return; | 
|---|
| 829 | perf_event_release_kernel(event: bp); | 
|---|
| 830 | } | 
|---|
| 831 | EXPORT_SYMBOL_GPL(unregister_hw_breakpoint); | 
|---|
| 832 |  | 
|---|
| 833 | /** | 
|---|
| 834 | * register_wide_hw_breakpoint - register a wide breakpoint in the kernel | 
|---|
| 835 | * @attr: breakpoint attributes | 
|---|
| 836 | * @triggered: callback to trigger when we hit the breakpoint | 
|---|
| 837 | * @context: context data could be used in the triggered callback | 
|---|
| 838 | * | 
|---|
| 839 | * @return a set of per_cpu pointers to perf events | 
|---|
| 840 | */ | 
|---|
| 841 | struct perf_event * __percpu * | 
|---|
| 842 | register_wide_hw_breakpoint(struct perf_event_attr *attr, | 
|---|
| 843 | perf_overflow_handler_t triggered, | 
|---|
| 844 | void *context) | 
|---|
| 845 | { | 
|---|
| 846 | struct perf_event * __percpu *cpu_events, *bp; | 
|---|
| 847 | long err = 0; | 
|---|
| 848 | int cpu; | 
|---|
| 849 |  | 
|---|
| 850 | cpu_events = alloc_percpu(typeof(*cpu_events)); | 
|---|
| 851 | if (!cpu_events) | 
|---|
| 852 | return ERR_PTR_PCPU(-ENOMEM); | 
|---|
| 853 |  | 
|---|
| 854 | cpus_read_lock(); | 
|---|
| 855 | for_each_online_cpu(cpu) { | 
|---|
| 856 | bp = perf_event_create_kernel_counter(attr, cpu, NULL, | 
|---|
| 857 | callback: triggered, context); | 
|---|
| 858 | if (IS_ERR(ptr: bp)) { | 
|---|
| 859 | err = PTR_ERR(ptr: bp); | 
|---|
| 860 | break; | 
|---|
| 861 | } | 
|---|
| 862 |  | 
|---|
| 863 | per_cpu(*cpu_events, cpu) = bp; | 
|---|
| 864 | } | 
|---|
| 865 | cpus_read_unlock(); | 
|---|
| 866 |  | 
|---|
| 867 | if (likely(!err)) | 
|---|
| 868 | return cpu_events; | 
|---|
| 869 |  | 
|---|
| 870 | unregister_wide_hw_breakpoint(cpu_events); | 
|---|
| 871 | return ERR_PTR_PCPU(err); | 
|---|
| 872 | } | 
|---|
| 873 | EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint); | 
|---|
| 874 |  | 
|---|
| 875 | /** | 
|---|
| 876 | * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel | 
|---|
| 877 | * @cpu_events: the per cpu set of events to unregister | 
|---|
| 878 | */ | 
|---|
| 879 | void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events) | 
|---|
| 880 | { | 
|---|
| 881 | int cpu; | 
|---|
| 882 |  | 
|---|
| 883 | for_each_possible_cpu(cpu) | 
|---|
| 884 | unregister_hw_breakpoint(per_cpu(*cpu_events, cpu)); | 
|---|
| 885 |  | 
|---|
| 886 | free_percpu(pdata: cpu_events); | 
|---|
| 887 | } | 
|---|
| 888 | EXPORT_SYMBOL_GPL(unregister_wide_hw_breakpoint); | 
|---|
| 889 |  | 
|---|
| 890 | /** | 
|---|
| 891 | * hw_breakpoint_is_used - check if breakpoints are currently used | 
|---|
| 892 | * | 
|---|
| 893 | * Returns: true if breakpoints are used, false otherwise. | 
|---|
| 894 | */ | 
|---|
| 895 | bool hw_breakpoint_is_used(void) | 
|---|
| 896 | { | 
|---|
| 897 | int cpu; | 
|---|
| 898 |  | 
|---|
| 899 | if (!constraints_initialized) | 
|---|
| 900 | return false; | 
|---|
| 901 |  | 
|---|
| 902 | for_each_possible_cpu(cpu) { | 
|---|
| 903 | for (int type = 0; type < TYPE_MAX; ++type) { | 
|---|
| 904 | struct bp_cpuinfo *info = get_bp_info(cpu, type); | 
|---|
| 905 |  | 
|---|
| 906 | if (info->cpu_pinned) | 
|---|
| 907 | return true; | 
|---|
| 908 |  | 
|---|
| 909 | for (int slot = 0; slot < hw_breakpoint_slots_cached(type); ++slot) { | 
|---|
| 910 | if (atomic_read(v: &info->tsk_pinned.count[slot])) | 
|---|
| 911 | return true; | 
|---|
| 912 | } | 
|---|
| 913 | } | 
|---|
| 914 | } | 
|---|
| 915 |  | 
|---|
| 916 | for (int type = 0; type < TYPE_MAX; ++type) { | 
|---|
| 917 | for (int slot = 0; slot < hw_breakpoint_slots_cached(type); ++slot) { | 
|---|
| 918 | /* | 
|---|
| 919 | * Warn, because if there are CPU pinned counters, | 
|---|
| 920 | * should never get here; bp_cpuinfo::cpu_pinned should | 
|---|
| 921 | * be consistent with the global cpu_pinned histogram. | 
|---|
| 922 | */ | 
|---|
| 923 | if (WARN_ON(atomic_read(&cpu_pinned[type].count[slot]))) | 
|---|
| 924 | return true; | 
|---|
| 925 |  | 
|---|
| 926 | if (atomic_read(v: &tsk_pinned_all[type].count[slot])) | 
|---|
| 927 | return true; | 
|---|
| 928 | } | 
|---|
| 929 | } | 
|---|
| 930 |  | 
|---|
| 931 | return false; | 
|---|
| 932 | } | 
|---|
| 933 |  | 
|---|
| 934 | static struct notifier_block hw_breakpoint_exceptions_nb = { | 
|---|
| 935 | .notifier_call = hw_breakpoint_exceptions_notify, | 
|---|
| 936 | /* we need to be notified first */ | 
|---|
| 937 | .priority = 0x7fffffff | 
|---|
| 938 | }; | 
|---|
| 939 |  | 
|---|
| 940 | static void bp_perf_event_destroy(struct perf_event *event) | 
|---|
| 941 | { | 
|---|
| 942 | release_bp_slot(bp: event); | 
|---|
| 943 | } | 
|---|
| 944 |  | 
|---|
| 945 | static int hw_breakpoint_event_init(struct perf_event *bp) | 
|---|
| 946 | { | 
|---|
| 947 | int err; | 
|---|
| 948 |  | 
|---|
| 949 | if (bp->attr.type != PERF_TYPE_BREAKPOINT) | 
|---|
| 950 | return -ENOENT; | 
|---|
| 951 |  | 
|---|
| 952 | /* | 
|---|
| 953 | * Check if breakpoint type is supported before proceeding. | 
|---|
| 954 | * Also, no branch sampling for breakpoint events. | 
|---|
| 955 | */ | 
|---|
| 956 | if (!hw_breakpoint_slots_cached(type: find_slot_idx(bp_type: bp->attr.bp_type)) || has_branch_stack(event: bp)) | 
|---|
| 957 | return -EOPNOTSUPP; | 
|---|
| 958 |  | 
|---|
| 959 | err = register_perf_hw_breakpoint(bp); | 
|---|
| 960 | if (err) | 
|---|
| 961 | return err; | 
|---|
| 962 |  | 
|---|
| 963 | bp->destroy = bp_perf_event_destroy; | 
|---|
| 964 |  | 
|---|
| 965 | return 0; | 
|---|
| 966 | } | 
|---|
| 967 |  | 
|---|
| 968 | static int hw_breakpoint_add(struct perf_event *bp, int flags) | 
|---|
| 969 | { | 
|---|
| 970 | if (!(flags & PERF_EF_START)) | 
|---|
| 971 | bp->hw.state = PERF_HES_STOPPED; | 
|---|
| 972 |  | 
|---|
| 973 | if (is_sampling_event(event: bp)) { | 
|---|
| 974 | bp->hw.last_period = bp->hw.sample_period; | 
|---|
| 975 | perf_swevent_set_period(event: bp); | 
|---|
| 976 | } | 
|---|
| 977 |  | 
|---|
| 978 | return arch_install_hw_breakpoint(bp); | 
|---|
| 979 | } | 
|---|
| 980 |  | 
|---|
| 981 | static void hw_breakpoint_del(struct perf_event *bp, int flags) | 
|---|
| 982 | { | 
|---|
| 983 | arch_uninstall_hw_breakpoint(bp); | 
|---|
| 984 | } | 
|---|
| 985 |  | 
|---|
| 986 | static void hw_breakpoint_start(struct perf_event *bp, int flags) | 
|---|
| 987 | { | 
|---|
| 988 | bp->hw.state = 0; | 
|---|
| 989 | } | 
|---|
| 990 |  | 
|---|
| 991 | static void hw_breakpoint_stop(struct perf_event *bp, int flags) | 
|---|
| 992 | { | 
|---|
| 993 | bp->hw.state = PERF_HES_STOPPED; | 
|---|
| 994 | } | 
|---|
| 995 |  | 
|---|
| 996 | static struct pmu perf_breakpoint = { | 
|---|
| 997 | .task_ctx_nr	= perf_sw_context, /* could eventually get its own */ | 
|---|
| 998 |  | 
|---|
| 999 | .event_init	= hw_breakpoint_event_init, | 
|---|
| 1000 | .add		= hw_breakpoint_add, | 
|---|
| 1001 | .del		= hw_breakpoint_del, | 
|---|
| 1002 | .start		= hw_breakpoint_start, | 
|---|
| 1003 | .stop		= hw_breakpoint_stop, | 
|---|
| 1004 | .read		= hw_breakpoint_pmu_read, | 
|---|
| 1005 | }; | 
|---|
| 1006 |  | 
|---|
| 1007 | int __init init_hw_breakpoint(void) | 
|---|
| 1008 | { | 
|---|
| 1009 | int ret; | 
|---|
| 1010 |  | 
|---|
| 1011 | ret = rhltable_init(&task_bps_ht, &task_bps_ht_params); | 
|---|
| 1012 | if (ret) | 
|---|
| 1013 | return ret; | 
|---|
| 1014 |  | 
|---|
| 1015 | ret = init_breakpoint_slots(); | 
|---|
| 1016 | if (ret) | 
|---|
| 1017 | return ret; | 
|---|
| 1018 |  | 
|---|
| 1019 | constraints_initialized = true; | 
|---|
| 1020 |  | 
|---|
| 1021 | perf_pmu_register(pmu: &perf_breakpoint, name: "breakpoint", type: PERF_TYPE_BREAKPOINT); | 
|---|
| 1022 |  | 
|---|
| 1023 | return register_die_notifier(nb: &hw_breakpoint_exceptions_nb); | 
|---|
| 1024 | } | 
|---|
| 1025 |  | 
|---|