| 1 | // SPDX-License-Identifier: GPL-2.0-only | 
|---|
| 2 | #include <linux/errno.h> | 
|---|
| 3 | #include <linux/numa.h> | 
|---|
| 4 | #include <linux/slab.h> | 
|---|
| 5 | #include <linux/rculist.h> | 
|---|
| 6 | #include <linux/threads.h> | 
|---|
| 7 | #include <linux/preempt.h> | 
|---|
| 8 | #include <linux/irqflags.h> | 
|---|
| 9 | #include <linux/vmalloc.h> | 
|---|
| 10 | #include <linux/mm.h> | 
|---|
| 11 | #include <linux/module.h> | 
|---|
| 12 | #include <linux/device-mapper.h> | 
|---|
| 13 |  | 
|---|
| 14 | #include "dm-core.h" | 
|---|
| 15 | #include "dm-stats.h" | 
|---|
| 16 |  | 
|---|
| 17 | #define DM_MSG_PREFIX "stats" | 
|---|
| 18 |  | 
|---|
| 19 | static int dm_stat_need_rcu_barrier; | 
|---|
| 20 |  | 
|---|
| 21 | /* | 
|---|
| 22 | * Using 64-bit values to avoid overflow (which is a | 
|---|
| 23 | * problem that block/genhd.c's IO accounting has). | 
|---|
| 24 | */ | 
|---|
| 25 | struct dm_stat_percpu { | 
|---|
| 26 | unsigned long long sectors[2]; | 
|---|
| 27 | unsigned long long ios[2]; | 
|---|
| 28 | unsigned long long merges[2]; | 
|---|
| 29 | unsigned long long ticks[2]; | 
|---|
| 30 | unsigned long long io_ticks[2]; | 
|---|
| 31 | unsigned long long io_ticks_total; | 
|---|
| 32 | unsigned long long time_in_queue; | 
|---|
| 33 | unsigned long long *histogram; | 
|---|
| 34 | }; | 
|---|
| 35 |  | 
|---|
| 36 | struct dm_stat_shared { | 
|---|
| 37 | atomic_t in_flight[2]; | 
|---|
| 38 | unsigned long long stamp; | 
|---|
| 39 | struct dm_stat_percpu tmp; | 
|---|
| 40 | }; | 
|---|
| 41 |  | 
|---|
| 42 | struct dm_stat { | 
|---|
| 43 | struct list_head list_entry; | 
|---|
| 44 | int id; | 
|---|
| 45 | unsigned int stat_flags; | 
|---|
| 46 | size_t n_entries; | 
|---|
| 47 | sector_t start; | 
|---|
| 48 | sector_t end; | 
|---|
| 49 | sector_t step; | 
|---|
| 50 | unsigned int n_histogram_entries; | 
|---|
| 51 | unsigned long long *histogram_boundaries; | 
|---|
| 52 | const char *program_id; | 
|---|
| 53 | const char *aux_data; | 
|---|
| 54 | struct rcu_head rcu_head; | 
|---|
| 55 | size_t shared_alloc_size; | 
|---|
| 56 | size_t percpu_alloc_size; | 
|---|
| 57 | size_t histogram_alloc_size; | 
|---|
| 58 | struct dm_stat_percpu *stat_percpu[NR_CPUS]; | 
|---|
| 59 | struct dm_stat_shared stat_shared[] __counted_by(n_entries); | 
|---|
| 60 | }; | 
|---|
| 61 |  | 
|---|
| 62 | #define STAT_PRECISE_TIMESTAMPS		1 | 
|---|
| 63 |  | 
|---|
| 64 | struct dm_stats_last_position { | 
|---|
| 65 | sector_t last_sector; | 
|---|
| 66 | unsigned int last_rw; | 
|---|
| 67 | }; | 
|---|
| 68 |  | 
|---|
| 69 | #define DM_STAT_MAX_ENTRIES		8388608 | 
|---|
| 70 | #define DM_STAT_MAX_HISTOGRAM_ENTRIES	134217728 | 
|---|
| 71 |  | 
|---|
| 72 | /* | 
|---|
| 73 | * A typo on the command line could possibly make the kernel run out of memory | 
|---|
| 74 | * and crash. To prevent the crash we account all used memory. We fail if we | 
|---|
| 75 | * exhaust 1/4 of all memory or 1/2 of vmalloc space. | 
|---|
| 76 | */ | 
|---|
| 77 | #define DM_STATS_MEMORY_FACTOR		4 | 
|---|
| 78 | #define DM_STATS_VMALLOC_FACTOR		2 | 
|---|
| 79 |  | 
|---|
| 80 | static DEFINE_SPINLOCK(shared_memory_lock); | 
|---|
| 81 |  | 
|---|
| 82 | static unsigned long shared_memory_amount; | 
|---|
| 83 |  | 
|---|
| 84 | static bool __check_shared_memory(size_t alloc_size) | 
|---|
| 85 | { | 
|---|
| 86 | size_t a; | 
|---|
| 87 |  | 
|---|
| 88 | a = shared_memory_amount + alloc_size; | 
|---|
| 89 | if (a < shared_memory_amount) | 
|---|
| 90 | return false; | 
|---|
| 91 | if (a >> PAGE_SHIFT > totalram_pages() / DM_STATS_MEMORY_FACTOR) | 
|---|
| 92 | return false; | 
|---|
| 93 | #ifdef CONFIG_MMU | 
|---|
| 94 | if (a > (VMALLOC_END - VMALLOC_START) / DM_STATS_VMALLOC_FACTOR) | 
|---|
| 95 | return false; | 
|---|
| 96 | #endif | 
|---|
| 97 | return true; | 
|---|
| 98 | } | 
|---|
| 99 |  | 
|---|
| 100 | static bool check_shared_memory(size_t alloc_size) | 
|---|
| 101 | { | 
|---|
| 102 | bool ret; | 
|---|
| 103 |  | 
|---|
| 104 | spin_lock_irq(lock: &shared_memory_lock); | 
|---|
| 105 |  | 
|---|
| 106 | ret = __check_shared_memory(alloc_size); | 
|---|
| 107 |  | 
|---|
| 108 | spin_unlock_irq(lock: &shared_memory_lock); | 
|---|
| 109 |  | 
|---|
| 110 | return ret; | 
|---|
| 111 | } | 
|---|
| 112 |  | 
|---|
| 113 | static bool claim_shared_memory(size_t alloc_size) | 
|---|
| 114 | { | 
|---|
| 115 | spin_lock_irq(lock: &shared_memory_lock); | 
|---|
| 116 |  | 
|---|
| 117 | if (!__check_shared_memory(alloc_size)) { | 
|---|
| 118 | spin_unlock_irq(lock: &shared_memory_lock); | 
|---|
| 119 | return false; | 
|---|
| 120 | } | 
|---|
| 121 |  | 
|---|
| 122 | shared_memory_amount += alloc_size; | 
|---|
| 123 |  | 
|---|
| 124 | spin_unlock_irq(lock: &shared_memory_lock); | 
|---|
| 125 |  | 
|---|
| 126 | return true; | 
|---|
| 127 | } | 
|---|
| 128 |  | 
|---|
| 129 | static void free_shared_memory(size_t alloc_size) | 
|---|
| 130 | { | 
|---|
| 131 | unsigned long flags; | 
|---|
| 132 |  | 
|---|
| 133 | spin_lock_irqsave(&shared_memory_lock, flags); | 
|---|
| 134 |  | 
|---|
| 135 | if (WARN_ON_ONCE(shared_memory_amount < alloc_size)) { | 
|---|
| 136 | spin_unlock_irqrestore(lock: &shared_memory_lock, flags); | 
|---|
| 137 | DMCRIT( "Memory usage accounting bug."); | 
|---|
| 138 | return; | 
|---|
| 139 | } | 
|---|
| 140 |  | 
|---|
| 141 | shared_memory_amount -= alloc_size; | 
|---|
| 142 |  | 
|---|
| 143 | spin_unlock_irqrestore(lock: &shared_memory_lock, flags); | 
|---|
| 144 | } | 
|---|
| 145 |  | 
|---|
| 146 | static void *dm_kvzalloc(size_t alloc_size, int node) | 
|---|
| 147 | { | 
|---|
| 148 | void *p; | 
|---|
| 149 |  | 
|---|
| 150 | if (!claim_shared_memory(alloc_size)) | 
|---|
| 151 | return NULL; | 
|---|
| 152 |  | 
|---|
| 153 | p = kvzalloc_node(alloc_size, GFP_KERNEL | __GFP_NOMEMALLOC, node); | 
|---|
| 154 | if (p) | 
|---|
| 155 | return p; | 
|---|
| 156 |  | 
|---|
| 157 | free_shared_memory(alloc_size); | 
|---|
| 158 |  | 
|---|
| 159 | return NULL; | 
|---|
| 160 | } | 
|---|
| 161 |  | 
|---|
| 162 | static void dm_kvfree(void *ptr, size_t alloc_size) | 
|---|
| 163 | { | 
|---|
| 164 | if (!ptr) | 
|---|
| 165 | return; | 
|---|
| 166 |  | 
|---|
| 167 | free_shared_memory(alloc_size); | 
|---|
| 168 |  | 
|---|
| 169 | kvfree(addr: ptr); | 
|---|
| 170 | } | 
|---|
| 171 |  | 
|---|
| 172 | static void dm_stat_free(struct rcu_head *head) | 
|---|
| 173 | { | 
|---|
| 174 | int cpu; | 
|---|
| 175 | struct dm_stat *s = container_of(head, struct dm_stat, rcu_head); | 
|---|
| 176 |  | 
|---|
| 177 | kfree(objp: s->histogram_boundaries); | 
|---|
| 178 | kfree(objp: s->program_id); | 
|---|
| 179 | kfree(objp: s->aux_data); | 
|---|
| 180 | for_each_possible_cpu(cpu) { | 
|---|
| 181 | dm_kvfree(ptr: s->stat_percpu[cpu][0].histogram, alloc_size: s->histogram_alloc_size); | 
|---|
| 182 | dm_kvfree(ptr: s->stat_percpu[cpu], alloc_size: s->percpu_alloc_size); | 
|---|
| 183 | } | 
|---|
| 184 | dm_kvfree(ptr: s->stat_shared[0].tmp.histogram, alloc_size: s->histogram_alloc_size); | 
|---|
| 185 | dm_kvfree(ptr: s, alloc_size: s->shared_alloc_size); | 
|---|
| 186 | } | 
|---|
| 187 |  | 
|---|
| 188 | static int dm_stat_in_flight(struct dm_stat_shared *shared) | 
|---|
| 189 | { | 
|---|
| 190 | return atomic_read(v: &shared->in_flight[READ]) + | 
|---|
| 191 | atomic_read(v: &shared->in_flight[WRITE]); | 
|---|
| 192 | } | 
|---|
| 193 |  | 
|---|
| 194 | int dm_stats_init(struct dm_stats *stats) | 
|---|
| 195 | { | 
|---|
| 196 | int cpu; | 
|---|
| 197 | struct dm_stats_last_position *last; | 
|---|
| 198 |  | 
|---|
| 199 | mutex_init(&stats->mutex); | 
|---|
| 200 | INIT_LIST_HEAD(list: &stats->list); | 
|---|
| 201 | stats->precise_timestamps = false; | 
|---|
| 202 | stats->last = alloc_percpu(struct dm_stats_last_position); | 
|---|
| 203 | if (!stats->last) | 
|---|
| 204 | return -ENOMEM; | 
|---|
| 205 |  | 
|---|
| 206 | for_each_possible_cpu(cpu) { | 
|---|
| 207 | last = per_cpu_ptr(stats->last, cpu); | 
|---|
| 208 | last->last_sector = (sector_t)ULLONG_MAX; | 
|---|
| 209 | last->last_rw = UINT_MAX; | 
|---|
| 210 | } | 
|---|
| 211 |  | 
|---|
| 212 | return 0; | 
|---|
| 213 | } | 
|---|
| 214 |  | 
|---|
| 215 | void dm_stats_cleanup(struct dm_stats *stats) | 
|---|
| 216 | { | 
|---|
| 217 | size_t ni; | 
|---|
| 218 | struct dm_stat *s; | 
|---|
| 219 | struct dm_stat_shared *shared; | 
|---|
| 220 |  | 
|---|
| 221 | while (!list_empty(head: &stats->list)) { | 
|---|
| 222 | s = container_of(stats->list.next, struct dm_stat, list_entry); | 
|---|
| 223 | list_del(entry: &s->list_entry); | 
|---|
| 224 | for (ni = 0; ni < s->n_entries; ni++) { | 
|---|
| 225 | shared = &s->stat_shared[ni]; | 
|---|
| 226 | if (WARN_ON(dm_stat_in_flight(shared))) { | 
|---|
| 227 | DMCRIT( "leaked in-flight counter at index %lu " | 
|---|
| 228 | "(start %llu, end %llu, step %llu): reads %d, writes %d", | 
|---|
| 229 | (unsigned long)ni, | 
|---|
| 230 | (unsigned long long)s->start, | 
|---|
| 231 | (unsigned long long)s->end, | 
|---|
| 232 | (unsigned long long)s->step, | 
|---|
| 233 | atomic_read(&shared->in_flight[READ]), | 
|---|
| 234 | atomic_read(&shared->in_flight[WRITE])); | 
|---|
| 235 | } | 
|---|
| 236 | cond_resched(); | 
|---|
| 237 | } | 
|---|
| 238 | dm_stat_free(head: &s->rcu_head); | 
|---|
| 239 | } | 
|---|
| 240 | free_percpu(pdata: stats->last); | 
|---|
| 241 | mutex_destroy(lock: &stats->mutex); | 
|---|
| 242 | } | 
|---|
| 243 |  | 
|---|
| 244 | static void dm_stats_recalc_precise_timestamps(struct dm_stats *stats) | 
|---|
| 245 | { | 
|---|
| 246 | struct list_head *l; | 
|---|
| 247 | struct dm_stat *tmp_s; | 
|---|
| 248 | bool precise_timestamps = false; | 
|---|
| 249 |  | 
|---|
| 250 | list_for_each(l, &stats->list) { | 
|---|
| 251 | tmp_s = container_of(l, struct dm_stat, list_entry); | 
|---|
| 252 | if (tmp_s->stat_flags & STAT_PRECISE_TIMESTAMPS) { | 
|---|
| 253 | precise_timestamps = true; | 
|---|
| 254 | break; | 
|---|
| 255 | } | 
|---|
| 256 | } | 
|---|
| 257 | stats->precise_timestamps = precise_timestamps; | 
|---|
| 258 | } | 
|---|
| 259 |  | 
|---|
| 260 | static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end, | 
|---|
| 261 | sector_t step, unsigned int stat_flags, | 
|---|
| 262 | unsigned int n_histogram_entries, | 
|---|
| 263 | unsigned long long *histogram_boundaries, | 
|---|
| 264 | const char *program_id, const char *aux_data, | 
|---|
| 265 | void (*suspend_callback)(struct mapped_device *), | 
|---|
| 266 | void (*resume_callback)(struct mapped_device *), | 
|---|
| 267 | struct mapped_device *md) | 
|---|
| 268 | { | 
|---|
| 269 | struct list_head *l; | 
|---|
| 270 | struct dm_stat *s, *tmp_s; | 
|---|
| 271 | sector_t n_entries; | 
|---|
| 272 | size_t ni; | 
|---|
| 273 | size_t shared_alloc_size; | 
|---|
| 274 | size_t percpu_alloc_size; | 
|---|
| 275 | size_t histogram_alloc_size; | 
|---|
| 276 | struct dm_stat_percpu *p; | 
|---|
| 277 | int cpu; | 
|---|
| 278 | int ret_id; | 
|---|
| 279 | int r; | 
|---|
| 280 |  | 
|---|
| 281 | if (end < start || !step) | 
|---|
| 282 | return -EINVAL; | 
|---|
| 283 |  | 
|---|
| 284 | n_entries = end - start; | 
|---|
| 285 | if (dm_sector_div64(n_entries, step)) | 
|---|
| 286 | n_entries++; | 
|---|
| 287 |  | 
|---|
| 288 | if (n_entries != (size_t)n_entries || !(size_t)(n_entries + 1)) | 
|---|
| 289 | return -EOVERFLOW; | 
|---|
| 290 |  | 
|---|
| 291 | if (n_entries > DM_STAT_MAX_ENTRIES) | 
|---|
| 292 | return -EOVERFLOW; | 
|---|
| 293 |  | 
|---|
| 294 | shared_alloc_size = struct_size(s, stat_shared, n_entries); | 
|---|
| 295 | if ((shared_alloc_size - sizeof(struct dm_stat)) / sizeof(struct dm_stat_shared) != n_entries) | 
|---|
| 296 | return -EOVERFLOW; | 
|---|
| 297 |  | 
|---|
| 298 | percpu_alloc_size = (size_t)n_entries * sizeof(struct dm_stat_percpu); | 
|---|
| 299 | if (percpu_alloc_size / sizeof(struct dm_stat_percpu) != n_entries) | 
|---|
| 300 | return -EOVERFLOW; | 
|---|
| 301 |  | 
|---|
| 302 | histogram_alloc_size = (n_histogram_entries + 1) * (size_t)n_entries * sizeof(unsigned long long); | 
|---|
| 303 | if (histogram_alloc_size / (n_histogram_entries + 1) != (size_t)n_entries * sizeof(unsigned long long)) | 
|---|
| 304 | return -EOVERFLOW; | 
|---|
| 305 |  | 
|---|
| 306 | if ((n_histogram_entries + 1) * (size_t)n_entries > DM_STAT_MAX_HISTOGRAM_ENTRIES) | 
|---|
| 307 | return -EOVERFLOW; | 
|---|
| 308 |  | 
|---|
| 309 | if (!check_shared_memory(alloc_size: shared_alloc_size + histogram_alloc_size + | 
|---|
| 310 | num_possible_cpus() * (percpu_alloc_size + histogram_alloc_size))) | 
|---|
| 311 | return -ENOMEM; | 
|---|
| 312 |  | 
|---|
| 313 | s = dm_kvzalloc(alloc_size: shared_alloc_size, NUMA_NO_NODE); | 
|---|
| 314 | if (!s) | 
|---|
| 315 | return -ENOMEM; | 
|---|
| 316 |  | 
|---|
| 317 | s->stat_flags = stat_flags; | 
|---|
| 318 | s->n_entries = n_entries; | 
|---|
| 319 | s->start = start; | 
|---|
| 320 | s->end = end; | 
|---|
| 321 | s->step = step; | 
|---|
| 322 | s->shared_alloc_size = shared_alloc_size; | 
|---|
| 323 | s->percpu_alloc_size = percpu_alloc_size; | 
|---|
| 324 | s->histogram_alloc_size = histogram_alloc_size; | 
|---|
| 325 |  | 
|---|
| 326 | s->n_histogram_entries = n_histogram_entries; | 
|---|
| 327 | s->histogram_boundaries = kmemdup(histogram_boundaries, | 
|---|
| 328 | s->n_histogram_entries * sizeof(unsigned long long), GFP_KERNEL); | 
|---|
| 329 | if (!s->histogram_boundaries) { | 
|---|
| 330 | r = -ENOMEM; | 
|---|
| 331 | goto out; | 
|---|
| 332 | } | 
|---|
| 333 |  | 
|---|
| 334 | s->program_id = kstrdup(s: program_id, GFP_KERNEL); | 
|---|
| 335 | if (!s->program_id) { | 
|---|
| 336 | r = -ENOMEM; | 
|---|
| 337 | goto out; | 
|---|
| 338 | } | 
|---|
| 339 | s->aux_data = kstrdup(s: aux_data, GFP_KERNEL); | 
|---|
| 340 | if (!s->aux_data) { | 
|---|
| 341 | r = -ENOMEM; | 
|---|
| 342 | goto out; | 
|---|
| 343 | } | 
|---|
| 344 |  | 
|---|
| 345 | for (ni = 0; ni < n_entries; ni++) { | 
|---|
| 346 | atomic_set(v: &s->stat_shared[ni].in_flight[READ], i: 0); | 
|---|
| 347 | atomic_set(v: &s->stat_shared[ni].in_flight[WRITE], i: 0); | 
|---|
| 348 | cond_resched(); | 
|---|
| 349 | } | 
|---|
| 350 |  | 
|---|
| 351 | if (s->n_histogram_entries) { | 
|---|
| 352 | unsigned long long *hi; | 
|---|
| 353 |  | 
|---|
| 354 | hi = dm_kvzalloc(alloc_size: s->histogram_alloc_size, NUMA_NO_NODE); | 
|---|
| 355 | if (!hi) { | 
|---|
| 356 | r = -ENOMEM; | 
|---|
| 357 | goto out; | 
|---|
| 358 | } | 
|---|
| 359 | for (ni = 0; ni < n_entries; ni++) { | 
|---|
| 360 | s->stat_shared[ni].tmp.histogram = hi; | 
|---|
| 361 | hi += s->n_histogram_entries + 1; | 
|---|
| 362 | cond_resched(); | 
|---|
| 363 | } | 
|---|
| 364 | } | 
|---|
| 365 |  | 
|---|
| 366 | for_each_possible_cpu(cpu) { | 
|---|
| 367 | p = dm_kvzalloc(alloc_size: percpu_alloc_size, node: cpu_to_node(cpu)); | 
|---|
| 368 | if (!p) { | 
|---|
| 369 | r = -ENOMEM; | 
|---|
| 370 | goto out; | 
|---|
| 371 | } | 
|---|
| 372 | s->stat_percpu[cpu] = p; | 
|---|
| 373 | if (s->n_histogram_entries) { | 
|---|
| 374 | unsigned long long *hi; | 
|---|
| 375 |  | 
|---|
| 376 | hi = dm_kvzalloc(alloc_size: s->histogram_alloc_size, node: cpu_to_node(cpu)); | 
|---|
| 377 | if (!hi) { | 
|---|
| 378 | r = -ENOMEM; | 
|---|
| 379 | goto out; | 
|---|
| 380 | } | 
|---|
| 381 | for (ni = 0; ni < n_entries; ni++) { | 
|---|
| 382 | p[ni].histogram = hi; | 
|---|
| 383 | hi += s->n_histogram_entries + 1; | 
|---|
| 384 | cond_resched(); | 
|---|
| 385 | } | 
|---|
| 386 | } | 
|---|
| 387 | } | 
|---|
| 388 |  | 
|---|
| 389 | /* | 
|---|
| 390 | * Suspend/resume to make sure there is no i/o in flight, | 
|---|
| 391 | * so that newly created statistics will be exact. | 
|---|
| 392 | * | 
|---|
| 393 | * (note: we couldn't suspend earlier because we must not | 
|---|
| 394 | * allocate memory while suspended) | 
|---|
| 395 | */ | 
|---|
| 396 | suspend_callback(md); | 
|---|
| 397 |  | 
|---|
| 398 | mutex_lock(lock: &stats->mutex); | 
|---|
| 399 | s->id = 0; | 
|---|
| 400 | list_for_each(l, &stats->list) { | 
|---|
| 401 | tmp_s = container_of(l, struct dm_stat, list_entry); | 
|---|
| 402 | if (WARN_ON(tmp_s->id < s->id)) { | 
|---|
| 403 | r = -EINVAL; | 
|---|
| 404 | goto out_unlock_resume; | 
|---|
| 405 | } | 
|---|
| 406 | if (tmp_s->id > s->id) | 
|---|
| 407 | break; | 
|---|
| 408 | if (unlikely(s->id == INT_MAX)) { | 
|---|
| 409 | r = -ENFILE; | 
|---|
| 410 | goto out_unlock_resume; | 
|---|
| 411 | } | 
|---|
| 412 | s->id++; | 
|---|
| 413 | } | 
|---|
| 414 | ret_id = s->id; | 
|---|
| 415 | list_add_tail_rcu(new: &s->list_entry, head: l); | 
|---|
| 416 |  | 
|---|
| 417 | dm_stats_recalc_precise_timestamps(stats); | 
|---|
| 418 |  | 
|---|
| 419 | if (!static_key_enabled(&stats_enabled.key)) | 
|---|
| 420 | static_branch_enable(&stats_enabled); | 
|---|
| 421 |  | 
|---|
| 422 | mutex_unlock(lock: &stats->mutex); | 
|---|
| 423 |  | 
|---|
| 424 | resume_callback(md); | 
|---|
| 425 |  | 
|---|
| 426 | return ret_id; | 
|---|
| 427 |  | 
|---|
| 428 | out_unlock_resume: | 
|---|
| 429 | mutex_unlock(lock: &stats->mutex); | 
|---|
| 430 | resume_callback(md); | 
|---|
| 431 | out: | 
|---|
| 432 | dm_stat_free(head: &s->rcu_head); | 
|---|
| 433 | return r; | 
|---|
| 434 | } | 
|---|
| 435 |  | 
|---|
| 436 | static struct dm_stat *__dm_stats_find(struct dm_stats *stats, int id) | 
|---|
| 437 | { | 
|---|
| 438 | struct dm_stat *s; | 
|---|
| 439 |  | 
|---|
| 440 | list_for_each_entry(s, &stats->list, list_entry) { | 
|---|
| 441 | if (s->id > id) | 
|---|
| 442 | break; | 
|---|
| 443 | if (s->id == id) | 
|---|
| 444 | return s; | 
|---|
| 445 | } | 
|---|
| 446 |  | 
|---|
| 447 | return NULL; | 
|---|
| 448 | } | 
|---|
| 449 |  | 
|---|
| 450 | static int dm_stats_delete(struct dm_stats *stats, int id) | 
|---|
| 451 | { | 
|---|
| 452 | struct dm_stat *s; | 
|---|
| 453 | int cpu; | 
|---|
| 454 |  | 
|---|
| 455 | mutex_lock(lock: &stats->mutex); | 
|---|
| 456 |  | 
|---|
| 457 | s = __dm_stats_find(stats, id); | 
|---|
| 458 | if (!s) { | 
|---|
| 459 | mutex_unlock(lock: &stats->mutex); | 
|---|
| 460 | return -ENOENT; | 
|---|
| 461 | } | 
|---|
| 462 |  | 
|---|
| 463 | list_del_rcu(entry: &s->list_entry); | 
|---|
| 464 |  | 
|---|
| 465 | dm_stats_recalc_precise_timestamps(stats); | 
|---|
| 466 |  | 
|---|
| 467 | mutex_unlock(lock: &stats->mutex); | 
|---|
| 468 |  | 
|---|
| 469 | /* | 
|---|
| 470 | * vfree can't be called from RCU callback | 
|---|
| 471 | */ | 
|---|
| 472 | for_each_possible_cpu(cpu) | 
|---|
| 473 | if (is_vmalloc_addr(x: s->stat_percpu) || | 
|---|
| 474 | is_vmalloc_addr(x: s->stat_percpu[cpu][0].histogram)) | 
|---|
| 475 | goto do_sync_free; | 
|---|
| 476 | if (is_vmalloc_addr(x: s) || | 
|---|
| 477 | is_vmalloc_addr(x: s->stat_shared[0].tmp.histogram)) { | 
|---|
| 478 | do_sync_free: | 
|---|
| 479 | synchronize_rcu_expedited(); | 
|---|
| 480 | dm_stat_free(head: &s->rcu_head); | 
|---|
| 481 | } else { | 
|---|
| 482 | WRITE_ONCE(dm_stat_need_rcu_barrier, 1); | 
|---|
| 483 | call_rcu(head: &s->rcu_head, func: dm_stat_free); | 
|---|
| 484 | } | 
|---|
| 485 | return 0; | 
|---|
| 486 | } | 
|---|
| 487 |  | 
|---|
| 488 | static int dm_stats_list(struct dm_stats *stats, const char *program, | 
|---|
| 489 | char *result, unsigned int maxlen) | 
|---|
| 490 | { | 
|---|
| 491 | struct dm_stat *s; | 
|---|
| 492 | sector_t len; | 
|---|
| 493 | unsigned int sz = 0; | 
|---|
| 494 |  | 
|---|
| 495 | /* | 
|---|
| 496 | * Output format: | 
|---|
| 497 | *   <region_id>: <start_sector>+<length> <step> <program_id> <aux_data> | 
|---|
| 498 | */ | 
|---|
| 499 |  | 
|---|
| 500 | mutex_lock(lock: &stats->mutex); | 
|---|
| 501 | list_for_each_entry(s, &stats->list, list_entry) { | 
|---|
| 502 | if (!program || !strcmp(program, s->program_id)) { | 
|---|
| 503 | len = s->end - s->start; | 
|---|
| 504 | DMEMIT( "%d: %llu+%llu %llu %s %s", s->id, | 
|---|
| 505 | (unsigned long long)s->start, | 
|---|
| 506 | (unsigned long long)len, | 
|---|
| 507 | (unsigned long long)s->step, | 
|---|
| 508 | s->program_id, | 
|---|
| 509 | s->aux_data); | 
|---|
| 510 | if (s->stat_flags & STAT_PRECISE_TIMESTAMPS) | 
|---|
| 511 | DMEMIT( " precise_timestamps"); | 
|---|
| 512 | if (s->n_histogram_entries) { | 
|---|
| 513 | unsigned int i; | 
|---|
| 514 |  | 
|---|
| 515 | DMEMIT( " histogram:"); | 
|---|
| 516 | for (i = 0; i < s->n_histogram_entries; i++) { | 
|---|
| 517 | if (i) | 
|---|
| 518 | DMEMIT( ","); | 
|---|
| 519 | DMEMIT( "%llu", s->histogram_boundaries[i]); | 
|---|
| 520 | } | 
|---|
| 521 | } | 
|---|
| 522 | DMEMIT( "\n"); | 
|---|
| 523 | } | 
|---|
| 524 | cond_resched(); | 
|---|
| 525 | } | 
|---|
| 526 | mutex_unlock(lock: &stats->mutex); | 
|---|
| 527 |  | 
|---|
| 528 | return 1; | 
|---|
| 529 | } | 
|---|
| 530 |  | 
|---|
| 531 | static void dm_stat_round(struct dm_stat *s, struct dm_stat_shared *shared, | 
|---|
| 532 | struct dm_stat_percpu *p) | 
|---|
| 533 | { | 
|---|
| 534 | /* | 
|---|
| 535 | * This is racy, but so is part_round_stats_single. | 
|---|
| 536 | */ | 
|---|
| 537 | unsigned long long now, difference; | 
|---|
| 538 | unsigned int in_flight_read, in_flight_write; | 
|---|
| 539 |  | 
|---|
| 540 | if (likely(!(s->stat_flags & STAT_PRECISE_TIMESTAMPS))) | 
|---|
| 541 | now = jiffies; | 
|---|
| 542 | else | 
|---|
| 543 | now = ktime_to_ns(kt: ktime_get()); | 
|---|
| 544 |  | 
|---|
| 545 | difference = now - shared->stamp; | 
|---|
| 546 | if (!difference) | 
|---|
| 547 | return; | 
|---|
| 548 |  | 
|---|
| 549 | in_flight_read = (unsigned int)atomic_read(v: &shared->in_flight[READ]); | 
|---|
| 550 | in_flight_write = (unsigned int)atomic_read(v: &shared->in_flight[WRITE]); | 
|---|
| 551 | if (in_flight_read) | 
|---|
| 552 | p->io_ticks[READ] += difference; | 
|---|
| 553 | if (in_flight_write) | 
|---|
| 554 | p->io_ticks[WRITE] += difference; | 
|---|
| 555 | if (in_flight_read + in_flight_write) { | 
|---|
| 556 | p->io_ticks_total += difference; | 
|---|
| 557 | p->time_in_queue += (in_flight_read + in_flight_write) * difference; | 
|---|
| 558 | } | 
|---|
| 559 | shared->stamp = now; | 
|---|
| 560 | } | 
|---|
| 561 |  | 
|---|
| 562 | static void dm_stat_for_entry(struct dm_stat *s, size_t entry, | 
|---|
| 563 | int idx, sector_t len, | 
|---|
| 564 | struct dm_stats_aux *stats_aux, bool end, | 
|---|
| 565 | unsigned long duration_jiffies) | 
|---|
| 566 | { | 
|---|
| 567 | struct dm_stat_shared *shared = &s->stat_shared[entry]; | 
|---|
| 568 | struct dm_stat_percpu *p; | 
|---|
| 569 |  | 
|---|
| 570 | /* | 
|---|
| 571 | * For strict correctness we should use local_irq_save/restore | 
|---|
| 572 | * instead of preempt_disable/enable. | 
|---|
| 573 | * | 
|---|
| 574 | * preempt_disable/enable is racy if the driver finishes bios | 
|---|
| 575 | * from non-interrupt context as well as from interrupt context | 
|---|
| 576 | * or from more different interrupts. | 
|---|
| 577 | * | 
|---|
| 578 | * On 64-bit architectures the race only results in not counting some | 
|---|
| 579 | * events, so it is acceptable.  On 32-bit architectures the race could | 
|---|
| 580 | * cause the counter going off by 2^32, so we need to do proper locking | 
|---|
| 581 | * there. | 
|---|
| 582 | * | 
|---|
| 583 | * part_stat_lock()/part_stat_unlock() have this race too. | 
|---|
| 584 | */ | 
|---|
| 585 | #if BITS_PER_LONG == 32 | 
|---|
| 586 | unsigned long flags; | 
|---|
| 587 |  | 
|---|
| 588 | local_irq_save(flags); | 
|---|
| 589 | #else | 
|---|
| 590 | preempt_disable(); | 
|---|
| 591 | #endif | 
|---|
| 592 | p = &s->stat_percpu[smp_processor_id()][entry]; | 
|---|
| 593 |  | 
|---|
| 594 | if (!end) { | 
|---|
| 595 | dm_stat_round(s, shared, p); | 
|---|
| 596 | atomic_inc(v: &shared->in_flight[idx]); | 
|---|
| 597 | } else { | 
|---|
| 598 | unsigned long long duration; | 
|---|
| 599 |  | 
|---|
| 600 | dm_stat_round(s, shared, p); | 
|---|
| 601 | atomic_dec(v: &shared->in_flight[idx]); | 
|---|
| 602 | p->sectors[idx] += len; | 
|---|
| 603 | p->ios[idx] += 1; | 
|---|
| 604 | p->merges[idx] += stats_aux->merged; | 
|---|
| 605 | if (!(s->stat_flags & STAT_PRECISE_TIMESTAMPS)) { | 
|---|
| 606 | p->ticks[idx] += duration_jiffies; | 
|---|
| 607 | duration = jiffies_to_msecs(j: duration_jiffies); | 
|---|
| 608 | } else { | 
|---|
| 609 | p->ticks[idx] += stats_aux->duration_ns; | 
|---|
| 610 | duration = stats_aux->duration_ns; | 
|---|
| 611 | } | 
|---|
| 612 | if (s->n_histogram_entries) { | 
|---|
| 613 | unsigned int lo = 0, hi = s->n_histogram_entries + 1; | 
|---|
| 614 |  | 
|---|
| 615 | while (lo + 1 < hi) { | 
|---|
| 616 | unsigned int mid = (lo + hi) / 2; | 
|---|
| 617 |  | 
|---|
| 618 | if (s->histogram_boundaries[mid - 1] > duration) | 
|---|
| 619 | hi = mid; | 
|---|
| 620 | else | 
|---|
| 621 | lo = mid; | 
|---|
| 622 | } | 
|---|
| 623 | p->histogram[lo]++; | 
|---|
| 624 | } | 
|---|
| 625 | } | 
|---|
| 626 |  | 
|---|
| 627 | #if BITS_PER_LONG == 32 | 
|---|
| 628 | local_irq_restore(flags); | 
|---|
| 629 | #else | 
|---|
| 630 | preempt_enable(); | 
|---|
| 631 | #endif | 
|---|
| 632 | } | 
|---|
| 633 |  | 
|---|
| 634 | static void __dm_stat_bio(struct dm_stat *s, int bi_rw, | 
|---|
| 635 | sector_t bi_sector, sector_t end_sector, | 
|---|
| 636 | bool end, unsigned long duration_jiffies, | 
|---|
| 637 | struct dm_stats_aux *stats_aux) | 
|---|
| 638 | { | 
|---|
| 639 | sector_t rel_sector, offset, todo, fragment_len; | 
|---|
| 640 | size_t entry; | 
|---|
| 641 |  | 
|---|
| 642 | if (end_sector <= s->start || bi_sector >= s->end) | 
|---|
| 643 | return; | 
|---|
| 644 | if (unlikely(bi_sector < s->start)) { | 
|---|
| 645 | rel_sector = 0; | 
|---|
| 646 | todo = end_sector - s->start; | 
|---|
| 647 | } else { | 
|---|
| 648 | rel_sector = bi_sector - s->start; | 
|---|
| 649 | todo = end_sector - bi_sector; | 
|---|
| 650 | } | 
|---|
| 651 | if (unlikely(end_sector > s->end)) | 
|---|
| 652 | todo -= (end_sector - s->end); | 
|---|
| 653 |  | 
|---|
| 654 | offset = dm_sector_div64(rel_sector, s->step); | 
|---|
| 655 | entry = rel_sector; | 
|---|
| 656 | do { | 
|---|
| 657 | if (WARN_ON_ONCE(entry >= s->n_entries)) { | 
|---|
| 658 | DMCRIT( "Invalid area access in region id %d", s->id); | 
|---|
| 659 | return; | 
|---|
| 660 | } | 
|---|
| 661 | fragment_len = todo; | 
|---|
| 662 | if (fragment_len > s->step - offset) | 
|---|
| 663 | fragment_len = s->step - offset; | 
|---|
| 664 | dm_stat_for_entry(s, entry, idx: bi_rw, len: fragment_len, | 
|---|
| 665 | stats_aux, end, duration_jiffies); | 
|---|
| 666 | todo -= fragment_len; | 
|---|
| 667 | entry++; | 
|---|
| 668 | offset = 0; | 
|---|
| 669 | } while (unlikely(todo != 0)); | 
|---|
| 670 | } | 
|---|
| 671 |  | 
|---|
| 672 | void dm_stats_account_io(struct dm_stats *stats, unsigned long bi_rw, | 
|---|
| 673 | sector_t bi_sector, unsigned int bi_sectors, bool end, | 
|---|
| 674 | unsigned long start_time, | 
|---|
| 675 | struct dm_stats_aux *stats_aux) | 
|---|
| 676 | { | 
|---|
| 677 | struct dm_stat *s; | 
|---|
| 678 | sector_t end_sector; | 
|---|
| 679 | struct dm_stats_last_position *last; | 
|---|
| 680 | bool got_precise_time; | 
|---|
| 681 | unsigned long duration_jiffies = 0; | 
|---|
| 682 |  | 
|---|
| 683 | if (unlikely(!bi_sectors)) | 
|---|
| 684 | return; | 
|---|
| 685 |  | 
|---|
| 686 | end_sector = bi_sector + bi_sectors; | 
|---|
| 687 |  | 
|---|
| 688 | if (!end) { | 
|---|
| 689 | /* | 
|---|
| 690 | * A race condition can at worst result in the merged flag being | 
|---|
| 691 | * misrepresented, so we don't have to disable preemption here. | 
|---|
| 692 | */ | 
|---|
| 693 | last = raw_cpu_ptr(stats->last); | 
|---|
| 694 | stats_aux->merged = | 
|---|
| 695 | (bi_sector == (READ_ONCE(last->last_sector) && | 
|---|
| 696 | ((bi_rw == WRITE) == | 
|---|
| 697 | (READ_ONCE(last->last_rw) == WRITE)) | 
|---|
| 698 | )); | 
|---|
| 699 | WRITE_ONCE(last->last_sector, end_sector); | 
|---|
| 700 | WRITE_ONCE(last->last_rw, bi_rw); | 
|---|
| 701 | } else | 
|---|
| 702 | duration_jiffies = jiffies - start_time; | 
|---|
| 703 |  | 
|---|
| 704 | rcu_read_lock(); | 
|---|
| 705 |  | 
|---|
| 706 | got_precise_time = false; | 
|---|
| 707 | list_for_each_entry_rcu(s, &stats->list, list_entry) { | 
|---|
| 708 | if (s->stat_flags & STAT_PRECISE_TIMESTAMPS && !got_precise_time) { | 
|---|
| 709 | /* start (!end) duration_ns is set by DM core's alloc_io() */ | 
|---|
| 710 | if (end) | 
|---|
| 711 | stats_aux->duration_ns = ktime_to_ns(kt: ktime_get()) - stats_aux->duration_ns; | 
|---|
| 712 | got_precise_time = true; | 
|---|
| 713 | } | 
|---|
| 714 | __dm_stat_bio(s, bi_rw, bi_sector, end_sector, end, duration_jiffies, stats_aux); | 
|---|
| 715 | } | 
|---|
| 716 |  | 
|---|
| 717 | rcu_read_unlock(); | 
|---|
| 718 | } | 
|---|
| 719 |  | 
|---|
| 720 | static void __dm_stat_init_temporary_percpu_totals(struct dm_stat_shared *shared, | 
|---|
| 721 | struct dm_stat *s, size_t x) | 
|---|
| 722 | { | 
|---|
| 723 | int cpu; | 
|---|
| 724 | struct dm_stat_percpu *p; | 
|---|
| 725 |  | 
|---|
| 726 | local_irq_disable(); | 
|---|
| 727 | p = &s->stat_percpu[smp_processor_id()][x]; | 
|---|
| 728 | dm_stat_round(s, shared, p); | 
|---|
| 729 | local_irq_enable(); | 
|---|
| 730 |  | 
|---|
| 731 | shared->tmp.sectors[READ] = 0; | 
|---|
| 732 | shared->tmp.sectors[WRITE] = 0; | 
|---|
| 733 | shared->tmp.ios[READ] = 0; | 
|---|
| 734 | shared->tmp.ios[WRITE] = 0; | 
|---|
| 735 | shared->tmp.merges[READ] = 0; | 
|---|
| 736 | shared->tmp.merges[WRITE] = 0; | 
|---|
| 737 | shared->tmp.ticks[READ] = 0; | 
|---|
| 738 | shared->tmp.ticks[WRITE] = 0; | 
|---|
| 739 | shared->tmp.io_ticks[READ] = 0; | 
|---|
| 740 | shared->tmp.io_ticks[WRITE] = 0; | 
|---|
| 741 | shared->tmp.io_ticks_total = 0; | 
|---|
| 742 | shared->tmp.time_in_queue = 0; | 
|---|
| 743 |  | 
|---|
| 744 | if (s->n_histogram_entries) | 
|---|
| 745 | memset(s: shared->tmp.histogram, c: 0, n: (s->n_histogram_entries + 1) * sizeof(unsigned long long)); | 
|---|
| 746 |  | 
|---|
| 747 | for_each_possible_cpu(cpu) { | 
|---|
| 748 | p = &s->stat_percpu[cpu][x]; | 
|---|
| 749 | shared->tmp.sectors[READ] += READ_ONCE(p->sectors[READ]); | 
|---|
| 750 | shared->tmp.sectors[WRITE] += READ_ONCE(p->sectors[WRITE]); | 
|---|
| 751 | shared->tmp.ios[READ] += READ_ONCE(p->ios[READ]); | 
|---|
| 752 | shared->tmp.ios[WRITE] += READ_ONCE(p->ios[WRITE]); | 
|---|
| 753 | shared->tmp.merges[READ] += READ_ONCE(p->merges[READ]); | 
|---|
| 754 | shared->tmp.merges[WRITE] += READ_ONCE(p->merges[WRITE]); | 
|---|
| 755 | shared->tmp.ticks[READ] += READ_ONCE(p->ticks[READ]); | 
|---|
| 756 | shared->tmp.ticks[WRITE] += READ_ONCE(p->ticks[WRITE]); | 
|---|
| 757 | shared->tmp.io_ticks[READ] += READ_ONCE(p->io_ticks[READ]); | 
|---|
| 758 | shared->tmp.io_ticks[WRITE] += READ_ONCE(p->io_ticks[WRITE]); | 
|---|
| 759 | shared->tmp.io_ticks_total += READ_ONCE(p->io_ticks_total); | 
|---|
| 760 | shared->tmp.time_in_queue += READ_ONCE(p->time_in_queue); | 
|---|
| 761 | if (s->n_histogram_entries) { | 
|---|
| 762 | unsigned int i; | 
|---|
| 763 |  | 
|---|
| 764 | for (i = 0; i < s->n_histogram_entries + 1; i++) | 
|---|
| 765 | shared->tmp.histogram[i] += READ_ONCE(p->histogram[i]); | 
|---|
| 766 | } | 
|---|
| 767 | } | 
|---|
| 768 | } | 
|---|
| 769 |  | 
|---|
| 770 | static void __dm_stat_clear(struct dm_stat *s, size_t idx_start, size_t idx_end, | 
|---|
| 771 | bool init_tmp_percpu_totals) | 
|---|
| 772 | { | 
|---|
| 773 | size_t x; | 
|---|
| 774 | struct dm_stat_shared *shared; | 
|---|
| 775 | struct dm_stat_percpu *p; | 
|---|
| 776 |  | 
|---|
| 777 | for (x = idx_start; x < idx_end; x++) { | 
|---|
| 778 | shared = &s->stat_shared[x]; | 
|---|
| 779 | if (init_tmp_percpu_totals) | 
|---|
| 780 | __dm_stat_init_temporary_percpu_totals(shared, s, x); | 
|---|
| 781 | local_irq_disable(); | 
|---|
| 782 | p = &s->stat_percpu[smp_processor_id()][x]; | 
|---|
| 783 | p->sectors[READ] -= shared->tmp.sectors[READ]; | 
|---|
| 784 | p->sectors[WRITE] -= shared->tmp.sectors[WRITE]; | 
|---|
| 785 | p->ios[READ] -= shared->tmp.ios[READ]; | 
|---|
| 786 | p->ios[WRITE] -= shared->tmp.ios[WRITE]; | 
|---|
| 787 | p->merges[READ] -= shared->tmp.merges[READ]; | 
|---|
| 788 | p->merges[WRITE] -= shared->tmp.merges[WRITE]; | 
|---|
| 789 | p->ticks[READ] -= shared->tmp.ticks[READ]; | 
|---|
| 790 | p->ticks[WRITE] -= shared->tmp.ticks[WRITE]; | 
|---|
| 791 | p->io_ticks[READ] -= shared->tmp.io_ticks[READ]; | 
|---|
| 792 | p->io_ticks[WRITE] -= shared->tmp.io_ticks[WRITE]; | 
|---|
| 793 | p->io_ticks_total -= shared->tmp.io_ticks_total; | 
|---|
| 794 | p->time_in_queue -= shared->tmp.time_in_queue; | 
|---|
| 795 | local_irq_enable(); | 
|---|
| 796 | if (s->n_histogram_entries) { | 
|---|
| 797 | unsigned int i; | 
|---|
| 798 |  | 
|---|
| 799 | for (i = 0; i < s->n_histogram_entries + 1; i++) { | 
|---|
| 800 | local_irq_disable(); | 
|---|
| 801 | p = &s->stat_percpu[smp_processor_id()][x]; | 
|---|
| 802 | p->histogram[i] -= shared->tmp.histogram[i]; | 
|---|
| 803 | local_irq_enable(); | 
|---|
| 804 | } | 
|---|
| 805 | } | 
|---|
| 806 | cond_resched(); | 
|---|
| 807 | } | 
|---|
| 808 | } | 
|---|
| 809 |  | 
|---|
| 810 | static int dm_stats_clear(struct dm_stats *stats, int id) | 
|---|
| 811 | { | 
|---|
| 812 | struct dm_stat *s; | 
|---|
| 813 |  | 
|---|
| 814 | mutex_lock(lock: &stats->mutex); | 
|---|
| 815 |  | 
|---|
| 816 | s = __dm_stats_find(stats, id); | 
|---|
| 817 | if (!s) { | 
|---|
| 818 | mutex_unlock(lock: &stats->mutex); | 
|---|
| 819 | return -ENOENT; | 
|---|
| 820 | } | 
|---|
| 821 |  | 
|---|
| 822 | __dm_stat_clear(s, idx_start: 0, idx_end: s->n_entries, init_tmp_percpu_totals: true); | 
|---|
| 823 |  | 
|---|
| 824 | mutex_unlock(lock: &stats->mutex); | 
|---|
| 825 |  | 
|---|
| 826 | return 1; | 
|---|
| 827 | } | 
|---|
| 828 |  | 
|---|
| 829 | /* | 
|---|
| 830 | * This is like jiffies_to_msec, but works for 64-bit values. | 
|---|
| 831 | */ | 
|---|
| 832 | static unsigned long long dm_jiffies_to_msec64(struct dm_stat *s, unsigned long long j) | 
|---|
| 833 | { | 
|---|
| 834 | unsigned long long result; | 
|---|
| 835 | unsigned int mult; | 
|---|
| 836 |  | 
|---|
| 837 | if (s->stat_flags & STAT_PRECISE_TIMESTAMPS) | 
|---|
| 838 | return j; | 
|---|
| 839 |  | 
|---|
| 840 | result = 0; | 
|---|
| 841 | if (j) | 
|---|
| 842 | result = jiffies_to_msecs(j: j & 0x3fffff); | 
|---|
| 843 | if (j >= 1 << 22) { | 
|---|
| 844 | mult = jiffies_to_msecs(j: 1 << 22); | 
|---|
| 845 | result += (unsigned long long)mult * (unsigned long long)jiffies_to_msecs(j: (j >> 22) & 0x3fffff); | 
|---|
| 846 | } | 
|---|
| 847 | if (j >= 1ULL << 44) | 
|---|
| 848 | result += (unsigned long long)mult * (unsigned long long)mult * (unsigned long long)jiffies_to_msecs(j: j >> 44); | 
|---|
| 849 |  | 
|---|
| 850 | return result; | 
|---|
| 851 | } | 
|---|
| 852 |  | 
|---|
| 853 | static int dm_stats_print(struct dm_stats *stats, int id, | 
|---|
| 854 | size_t idx_start, size_t idx_len, | 
|---|
| 855 | bool clear, char *result, unsigned int maxlen) | 
|---|
| 856 | { | 
|---|
| 857 | unsigned int sz = 0; | 
|---|
| 858 | struct dm_stat *s; | 
|---|
| 859 | size_t x; | 
|---|
| 860 | sector_t start, end, step; | 
|---|
| 861 | size_t idx_end; | 
|---|
| 862 | struct dm_stat_shared *shared; | 
|---|
| 863 |  | 
|---|
| 864 | /* | 
|---|
| 865 | * Output format: | 
|---|
| 866 | *   <start_sector>+<length> counters | 
|---|
| 867 | */ | 
|---|
| 868 |  | 
|---|
| 869 | mutex_lock(lock: &stats->mutex); | 
|---|
| 870 |  | 
|---|
| 871 | s = __dm_stats_find(stats, id); | 
|---|
| 872 | if (!s) { | 
|---|
| 873 | mutex_unlock(lock: &stats->mutex); | 
|---|
| 874 | return -ENOENT; | 
|---|
| 875 | } | 
|---|
| 876 |  | 
|---|
| 877 | idx_end = idx_start + idx_len; | 
|---|
| 878 | if (idx_end < idx_start || | 
|---|
| 879 | idx_end > s->n_entries) | 
|---|
| 880 | idx_end = s->n_entries; | 
|---|
| 881 |  | 
|---|
| 882 | if (idx_start > idx_end) | 
|---|
| 883 | idx_start = idx_end; | 
|---|
| 884 |  | 
|---|
| 885 | step = s->step; | 
|---|
| 886 | start = s->start + (step * idx_start); | 
|---|
| 887 |  | 
|---|
| 888 | for (x = idx_start; x < idx_end; x++, start = end) { | 
|---|
| 889 | shared = &s->stat_shared[x]; | 
|---|
| 890 | end = start + step; | 
|---|
| 891 | if (unlikely(end > s->end)) | 
|---|
| 892 | end = s->end; | 
|---|
| 893 |  | 
|---|
| 894 | __dm_stat_init_temporary_percpu_totals(shared, s, x); | 
|---|
| 895 |  | 
|---|
| 896 | DMEMIT( "%llu+%llu %llu %llu %llu %llu %llu %llu %llu %llu %d %llu %llu %llu %llu", | 
|---|
| 897 | (unsigned long long)start, | 
|---|
| 898 | (unsigned long long)step, | 
|---|
| 899 | shared->tmp.ios[READ], | 
|---|
| 900 | shared->tmp.merges[READ], | 
|---|
| 901 | shared->tmp.sectors[READ], | 
|---|
| 902 | dm_jiffies_to_msec64(s, shared->tmp.ticks[READ]), | 
|---|
| 903 | shared->tmp.ios[WRITE], | 
|---|
| 904 | shared->tmp.merges[WRITE], | 
|---|
| 905 | shared->tmp.sectors[WRITE], | 
|---|
| 906 | dm_jiffies_to_msec64(s, shared->tmp.ticks[WRITE]), | 
|---|
| 907 | dm_stat_in_flight(shared), | 
|---|
| 908 | dm_jiffies_to_msec64(s, shared->tmp.io_ticks_total), | 
|---|
| 909 | dm_jiffies_to_msec64(s, shared->tmp.time_in_queue), | 
|---|
| 910 | dm_jiffies_to_msec64(s, shared->tmp.io_ticks[READ]), | 
|---|
| 911 | dm_jiffies_to_msec64(s, shared->tmp.io_ticks[WRITE])); | 
|---|
| 912 | if (s->n_histogram_entries) { | 
|---|
| 913 | unsigned int i; | 
|---|
| 914 |  | 
|---|
| 915 | for (i = 0; i < s->n_histogram_entries + 1; i++) | 
|---|
| 916 | DMEMIT( "%s%llu", !i ? " ": ":", shared->tmp.histogram[i]); | 
|---|
| 917 | } | 
|---|
| 918 | DMEMIT( "\n"); | 
|---|
| 919 |  | 
|---|
| 920 | if (unlikely(sz + 1 >= maxlen)) | 
|---|
| 921 | goto buffer_overflow; | 
|---|
| 922 |  | 
|---|
| 923 | cond_resched(); | 
|---|
| 924 | } | 
|---|
| 925 |  | 
|---|
| 926 | if (clear) | 
|---|
| 927 | __dm_stat_clear(s, idx_start, idx_end, init_tmp_percpu_totals: false); | 
|---|
| 928 |  | 
|---|
| 929 | buffer_overflow: | 
|---|
| 930 | mutex_unlock(lock: &stats->mutex); | 
|---|
| 931 |  | 
|---|
| 932 | return 1; | 
|---|
| 933 | } | 
|---|
| 934 |  | 
|---|
| 935 | static int dm_stats_set_aux(struct dm_stats *stats, int id, const char *aux_data) | 
|---|
| 936 | { | 
|---|
| 937 | struct dm_stat *s; | 
|---|
| 938 | const char *new_aux_data; | 
|---|
| 939 |  | 
|---|
| 940 | mutex_lock(lock: &stats->mutex); | 
|---|
| 941 |  | 
|---|
| 942 | s = __dm_stats_find(stats, id); | 
|---|
| 943 | if (!s) { | 
|---|
| 944 | mutex_unlock(lock: &stats->mutex); | 
|---|
| 945 | return -ENOENT; | 
|---|
| 946 | } | 
|---|
| 947 |  | 
|---|
| 948 | new_aux_data = kstrdup(s: aux_data, GFP_KERNEL); | 
|---|
| 949 | if (!new_aux_data) { | 
|---|
| 950 | mutex_unlock(lock: &stats->mutex); | 
|---|
| 951 | return -ENOMEM; | 
|---|
| 952 | } | 
|---|
| 953 |  | 
|---|
| 954 | kfree(objp: s->aux_data); | 
|---|
| 955 | s->aux_data = new_aux_data; | 
|---|
| 956 |  | 
|---|
| 957 | mutex_unlock(lock: &stats->mutex); | 
|---|
| 958 |  | 
|---|
| 959 | return 0; | 
|---|
| 960 | } | 
|---|
| 961 |  | 
|---|
| 962 | static int parse_histogram(const char *h, unsigned int *n_histogram_entries, | 
|---|
| 963 | unsigned long long **histogram_boundaries) | 
|---|
| 964 | { | 
|---|
| 965 | const char *q; | 
|---|
| 966 | unsigned int n; | 
|---|
| 967 | unsigned long long last; | 
|---|
| 968 |  | 
|---|
| 969 | *n_histogram_entries = 1; | 
|---|
| 970 | for (q = h; *q; q++) | 
|---|
| 971 | if (*q == ',') | 
|---|
| 972 | (*n_histogram_entries)++; | 
|---|
| 973 |  | 
|---|
| 974 | *histogram_boundaries = kmalloc_array(*n_histogram_entries, | 
|---|
| 975 | sizeof(unsigned long long), | 
|---|
| 976 | GFP_KERNEL); | 
|---|
| 977 | if (!*histogram_boundaries) | 
|---|
| 978 | return -ENOMEM; | 
|---|
| 979 |  | 
|---|
| 980 | n = 0; | 
|---|
| 981 | last = 0; | 
|---|
| 982 | while (1) { | 
|---|
| 983 | unsigned long long hi; | 
|---|
| 984 | int s; | 
|---|
| 985 | char ch; | 
|---|
| 986 |  | 
|---|
| 987 | s = sscanf(h, "%llu%c", &hi, &ch); | 
|---|
| 988 | if (!s || (s == 2 && ch != ',')) | 
|---|
| 989 | return -EINVAL; | 
|---|
| 990 | if (hi <= last) | 
|---|
| 991 | return -EINVAL; | 
|---|
| 992 | last = hi; | 
|---|
| 993 | (*histogram_boundaries)[n] = hi; | 
|---|
| 994 | if (s == 1) | 
|---|
| 995 | return 0; | 
|---|
| 996 | h = strchr(h, ',') + 1; | 
|---|
| 997 | n++; | 
|---|
| 998 | } | 
|---|
| 999 | } | 
|---|
| 1000 |  | 
|---|
| 1001 | static int message_stats_create(struct mapped_device *md, | 
|---|
| 1002 | unsigned int argc, char **argv, | 
|---|
| 1003 | char *result, unsigned int maxlen) | 
|---|
| 1004 | { | 
|---|
| 1005 | int r; | 
|---|
| 1006 | int id; | 
|---|
| 1007 | char dummy; | 
|---|
| 1008 | unsigned long long start, end, len, step; | 
|---|
| 1009 | unsigned int divisor; | 
|---|
| 1010 | const char *program_id, *aux_data; | 
|---|
| 1011 | unsigned int stat_flags = 0; | 
|---|
| 1012 | unsigned int n_histogram_entries = 0; | 
|---|
| 1013 | unsigned long long *histogram_boundaries = NULL; | 
|---|
| 1014 | struct dm_arg_set as, as_backup; | 
|---|
| 1015 | const char *a; | 
|---|
| 1016 | unsigned int feature_args; | 
|---|
| 1017 |  | 
|---|
| 1018 | /* | 
|---|
| 1019 | * Input format: | 
|---|
| 1020 | *   <range> <step> [<extra_parameters> <parameters>] [<program_id> [<aux_data>]] | 
|---|
| 1021 | */ | 
|---|
| 1022 |  | 
|---|
| 1023 | if (argc < 3) | 
|---|
| 1024 | goto ret_einval; | 
|---|
| 1025 |  | 
|---|
| 1026 | as.argc = argc; | 
|---|
| 1027 | as.argv = argv; | 
|---|
| 1028 | dm_consume_args(as: &as, num_args: 1); | 
|---|
| 1029 |  | 
|---|
| 1030 | a = dm_shift_arg(as: &as); | 
|---|
| 1031 | if (!strcmp(a, "-")) { | 
|---|
| 1032 | start = 0; | 
|---|
| 1033 | len = dm_get_size(md); | 
|---|
| 1034 | if (!len) | 
|---|
| 1035 | len = 1; | 
|---|
| 1036 | } else if (sscanf(a, "%llu+%llu%c", &start, &len, &dummy) != 2 || | 
|---|
| 1037 | start != (sector_t)start || len != (sector_t)len) | 
|---|
| 1038 | goto ret_einval; | 
|---|
| 1039 |  | 
|---|
| 1040 | end = start + len; | 
|---|
| 1041 | if (start >= end) | 
|---|
| 1042 | goto ret_einval; | 
|---|
| 1043 |  | 
|---|
| 1044 | a = dm_shift_arg(as: &as); | 
|---|
| 1045 | if (sscanf(a, "/%u%c", &divisor, &dummy) == 1) { | 
|---|
| 1046 | if (!divisor) | 
|---|
| 1047 | return -EINVAL; | 
|---|
| 1048 | step = end - start; | 
|---|
| 1049 | if (do_div(step, divisor)) | 
|---|
| 1050 | step++; | 
|---|
| 1051 | if (!step) | 
|---|
| 1052 | step = 1; | 
|---|
| 1053 | } else if (sscanf(a, "%llu%c", &step, &dummy) != 1 || | 
|---|
| 1054 | step != (sector_t)step || !step) | 
|---|
| 1055 | goto ret_einval; | 
|---|
| 1056 |  | 
|---|
| 1057 | as_backup = as; | 
|---|
| 1058 | a = dm_shift_arg(as: &as); | 
|---|
| 1059 | if (a && sscanf(a, "%u%c", &feature_args, &dummy) == 1) { | 
|---|
| 1060 | while (feature_args--) { | 
|---|
| 1061 | a = dm_shift_arg(as: &as); | 
|---|
| 1062 | if (!a) | 
|---|
| 1063 | goto ret_einval; | 
|---|
| 1064 | if (!strcasecmp(s1: a, s2: "precise_timestamps")) | 
|---|
| 1065 | stat_flags |= STAT_PRECISE_TIMESTAMPS; | 
|---|
| 1066 | else if (!strncasecmp(s1: a, s2: "histogram:", n: 10)) { | 
|---|
| 1067 | if (n_histogram_entries) | 
|---|
| 1068 | goto ret_einval; | 
|---|
| 1069 | r = parse_histogram(h: a + 10, n_histogram_entries: &n_histogram_entries, histogram_boundaries: &histogram_boundaries); | 
|---|
| 1070 | if (r) | 
|---|
| 1071 | goto ret; | 
|---|
| 1072 | } else | 
|---|
| 1073 | goto ret_einval; | 
|---|
| 1074 | } | 
|---|
| 1075 | } else { | 
|---|
| 1076 | as = as_backup; | 
|---|
| 1077 | } | 
|---|
| 1078 |  | 
|---|
| 1079 | program_id = "-"; | 
|---|
| 1080 | aux_data = "-"; | 
|---|
| 1081 |  | 
|---|
| 1082 | a = dm_shift_arg(as: &as); | 
|---|
| 1083 | if (a) | 
|---|
| 1084 | program_id = a; | 
|---|
| 1085 |  | 
|---|
| 1086 | a = dm_shift_arg(as: &as); | 
|---|
| 1087 | if (a) | 
|---|
| 1088 | aux_data = a; | 
|---|
| 1089 |  | 
|---|
| 1090 | if (as.argc) | 
|---|
| 1091 | goto ret_einval; | 
|---|
| 1092 |  | 
|---|
| 1093 | /* | 
|---|
| 1094 | * If a buffer overflow happens after we created the region, | 
|---|
| 1095 | * it's too late (the userspace would retry with a larger | 
|---|
| 1096 | * buffer, but the region id that caused the overflow is already | 
|---|
| 1097 | * leaked).  So we must detect buffer overflow in advance. | 
|---|
| 1098 | */ | 
|---|
| 1099 | snprintf(buf: result, size: maxlen, fmt: "%d", INT_MAX); | 
|---|
| 1100 | if (dm_message_test_buffer_overflow(result, maxlen)) { | 
|---|
| 1101 | r = 1; | 
|---|
| 1102 | goto ret; | 
|---|
| 1103 | } | 
|---|
| 1104 |  | 
|---|
| 1105 | id = dm_stats_create(stats: dm_get_stats(md), start, end, step, stat_flags, | 
|---|
| 1106 | n_histogram_entries, histogram_boundaries, program_id, aux_data, | 
|---|
| 1107 | suspend_callback: dm_internal_suspend_fast, resume_callback: dm_internal_resume_fast, md); | 
|---|
| 1108 | if (id < 0) { | 
|---|
| 1109 | r = id; | 
|---|
| 1110 | goto ret; | 
|---|
| 1111 | } | 
|---|
| 1112 |  | 
|---|
| 1113 | snprintf(buf: result, size: maxlen, fmt: "%d", id); | 
|---|
| 1114 |  | 
|---|
| 1115 | r = 1; | 
|---|
| 1116 | goto ret; | 
|---|
| 1117 |  | 
|---|
| 1118 | ret_einval: | 
|---|
| 1119 | r = -EINVAL; | 
|---|
| 1120 | ret: | 
|---|
| 1121 | kfree(objp: histogram_boundaries); | 
|---|
| 1122 | return r; | 
|---|
| 1123 | } | 
|---|
| 1124 |  | 
|---|
| 1125 | static int message_stats_delete(struct mapped_device *md, | 
|---|
| 1126 | unsigned int argc, char **argv) | 
|---|
| 1127 | { | 
|---|
| 1128 | int id; | 
|---|
| 1129 | char dummy; | 
|---|
| 1130 |  | 
|---|
| 1131 | if (argc != 2) | 
|---|
| 1132 | return -EINVAL; | 
|---|
| 1133 |  | 
|---|
| 1134 | if (sscanf(argv[1], "%d%c", &id, &dummy) != 1 || id < 0) | 
|---|
| 1135 | return -EINVAL; | 
|---|
| 1136 |  | 
|---|
| 1137 | return dm_stats_delete(stats: dm_get_stats(md), id); | 
|---|
| 1138 | } | 
|---|
| 1139 |  | 
|---|
| 1140 | static int message_stats_clear(struct mapped_device *md, | 
|---|
| 1141 | unsigned int argc, char **argv) | 
|---|
| 1142 | { | 
|---|
| 1143 | int id; | 
|---|
| 1144 | char dummy; | 
|---|
| 1145 |  | 
|---|
| 1146 | if (argc != 2) | 
|---|
| 1147 | return -EINVAL; | 
|---|
| 1148 |  | 
|---|
| 1149 | if (sscanf(argv[1], "%d%c", &id, &dummy) != 1 || id < 0) | 
|---|
| 1150 | return -EINVAL; | 
|---|
| 1151 |  | 
|---|
| 1152 | return dm_stats_clear(stats: dm_get_stats(md), id); | 
|---|
| 1153 | } | 
|---|
| 1154 |  | 
|---|
| 1155 | static int message_stats_list(struct mapped_device *md, | 
|---|
| 1156 | unsigned int argc, char **argv, | 
|---|
| 1157 | char *result, unsigned int maxlen) | 
|---|
| 1158 | { | 
|---|
| 1159 | int r; | 
|---|
| 1160 | const char *program = NULL; | 
|---|
| 1161 |  | 
|---|
| 1162 | if (argc < 1 || argc > 2) | 
|---|
| 1163 | return -EINVAL; | 
|---|
| 1164 |  | 
|---|
| 1165 | if (argc > 1) { | 
|---|
| 1166 | program = kstrdup(s: argv[1], GFP_KERNEL); | 
|---|
| 1167 | if (!program) | 
|---|
| 1168 | return -ENOMEM; | 
|---|
| 1169 | } | 
|---|
| 1170 |  | 
|---|
| 1171 | r = dm_stats_list(stats: dm_get_stats(md), program, result, maxlen); | 
|---|
| 1172 |  | 
|---|
| 1173 | kfree(objp: program); | 
|---|
| 1174 |  | 
|---|
| 1175 | return r; | 
|---|
| 1176 | } | 
|---|
| 1177 |  | 
|---|
| 1178 | static int message_stats_print(struct mapped_device *md, | 
|---|
| 1179 | unsigned int argc, char **argv, bool clear, | 
|---|
| 1180 | char *result, unsigned int maxlen) | 
|---|
| 1181 | { | 
|---|
| 1182 | int id; | 
|---|
| 1183 | char dummy; | 
|---|
| 1184 | unsigned long idx_start = 0, idx_len = ULONG_MAX; | 
|---|
| 1185 |  | 
|---|
| 1186 | if (argc != 2 && argc != 4) | 
|---|
| 1187 | return -EINVAL; | 
|---|
| 1188 |  | 
|---|
| 1189 | if (sscanf(argv[1], "%d%c", &id, &dummy) != 1 || id < 0) | 
|---|
| 1190 | return -EINVAL; | 
|---|
| 1191 |  | 
|---|
| 1192 | if (argc > 3) { | 
|---|
| 1193 | if (strcmp(argv[2], "-") && | 
|---|
| 1194 | sscanf(argv[2], "%lu%c", &idx_start, &dummy) != 1) | 
|---|
| 1195 | return -EINVAL; | 
|---|
| 1196 | if (strcmp(argv[3], "-") && | 
|---|
| 1197 | sscanf(argv[3], "%lu%c", &idx_len, &dummy) != 1) | 
|---|
| 1198 | return -EINVAL; | 
|---|
| 1199 | } | 
|---|
| 1200 |  | 
|---|
| 1201 | return dm_stats_print(stats: dm_get_stats(md), id, idx_start, idx_len, clear, | 
|---|
| 1202 | result, maxlen); | 
|---|
| 1203 | } | 
|---|
| 1204 |  | 
|---|
| 1205 | static int message_stats_set_aux(struct mapped_device *md, | 
|---|
| 1206 | unsigned int argc, char **argv) | 
|---|
| 1207 | { | 
|---|
| 1208 | int id; | 
|---|
| 1209 | char dummy; | 
|---|
| 1210 |  | 
|---|
| 1211 | if (argc != 3) | 
|---|
| 1212 | return -EINVAL; | 
|---|
| 1213 |  | 
|---|
| 1214 | if (sscanf(argv[1], "%d%c", &id, &dummy) != 1 || id < 0) | 
|---|
| 1215 | return -EINVAL; | 
|---|
| 1216 |  | 
|---|
| 1217 | return dm_stats_set_aux(stats: dm_get_stats(md), id, aux_data: argv[2]); | 
|---|
| 1218 | } | 
|---|
| 1219 |  | 
|---|
| 1220 | int dm_stats_message(struct mapped_device *md, unsigned int argc, char **argv, | 
|---|
| 1221 | char *result, unsigned int maxlen) | 
|---|
| 1222 | { | 
|---|
| 1223 | int r; | 
|---|
| 1224 |  | 
|---|
| 1225 | /* All messages here must start with '@' */ | 
|---|
| 1226 | if (!strcasecmp(s1: argv[0], s2: "@stats_create")) | 
|---|
| 1227 | r = message_stats_create(md, argc, argv, result, maxlen); | 
|---|
| 1228 | else if (!strcasecmp(s1: argv[0], s2: "@stats_delete")) | 
|---|
| 1229 | r = message_stats_delete(md, argc, argv); | 
|---|
| 1230 | else if (!strcasecmp(s1: argv[0], s2: "@stats_clear")) | 
|---|
| 1231 | r = message_stats_clear(md, argc, argv); | 
|---|
| 1232 | else if (!strcasecmp(s1: argv[0], s2: "@stats_list")) | 
|---|
| 1233 | r = message_stats_list(md, argc, argv, result, maxlen); | 
|---|
| 1234 | else if (!strcasecmp(s1: argv[0], s2: "@stats_print")) | 
|---|
| 1235 | r = message_stats_print(md, argc, argv, clear: false, result, maxlen); | 
|---|
| 1236 | else if (!strcasecmp(s1: argv[0], s2: "@stats_print_clear")) | 
|---|
| 1237 | r = message_stats_print(md, argc, argv, clear: true, result, maxlen); | 
|---|
| 1238 | else if (!strcasecmp(s1: argv[0], s2: "@stats_set_aux")) | 
|---|
| 1239 | r = message_stats_set_aux(md, argc, argv); | 
|---|
| 1240 | else | 
|---|
| 1241 | return 2; /* this wasn't a stats message */ | 
|---|
| 1242 |  | 
|---|
| 1243 | if (r == -EINVAL) | 
|---|
| 1244 | DMCRIT( "Invalid parameters for message %s", argv[0]); | 
|---|
| 1245 |  | 
|---|
| 1246 | return r; | 
|---|
| 1247 | } | 
|---|
| 1248 |  | 
|---|
| 1249 | int __init dm_statistics_init(void) | 
|---|
| 1250 | { | 
|---|
| 1251 | shared_memory_amount = 0; | 
|---|
| 1252 | dm_stat_need_rcu_barrier = 0; | 
|---|
| 1253 | return 0; | 
|---|
| 1254 | } | 
|---|
| 1255 |  | 
|---|
| 1256 | void dm_statistics_exit(void) | 
|---|
| 1257 | { | 
|---|
| 1258 | if (dm_stat_need_rcu_barrier) | 
|---|
| 1259 | rcu_barrier(); | 
|---|
| 1260 | if (WARN_ON(shared_memory_amount)) | 
|---|
| 1261 | DMCRIT( "shared_memory_amount leaked: %lu", shared_memory_amount); | 
|---|
| 1262 | } | 
|---|
| 1263 |  | 
|---|
| 1264 | module_param_named(stats_current_allocated_bytes, shared_memory_amount, ulong, 0444); | 
|---|
| 1265 | MODULE_PARM_DESC(stats_current_allocated_bytes, "Memory currently used by statistics"); | 
|---|
| 1266 |  | 
|---|