1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Functions related to sysfs handling
4 */
5#include <linux/kernel.h>
6#include <linux/slab.h>
7#include <linux/module.h>
8#include <linux/bio.h>
9#include <linux/blkdev.h>
10#include <linux/backing-dev.h>
11#include <linux/blktrace_api.h>
12#include <linux/debugfs.h>
13
14#include "blk.h"
15#include "blk-mq.h"
16#include "blk-mq-debugfs.h"
17#include "blk-mq-sched.h"
18#include "blk-rq-qos.h"
19#include "blk-wbt.h"
20#include "blk-cgroup.h"
21#include "blk-throttle.h"
22
23struct queue_sysfs_entry {
24 struct attribute attr;
25 ssize_t (*show)(struct gendisk *disk, char *page);
26 ssize_t (*show_limit)(struct gendisk *disk, char *page);
27
28 ssize_t (*store)(struct gendisk *disk, const char *page, size_t count);
29 int (*store_limit)(struct gendisk *disk, const char *page,
30 size_t count, struct queue_limits *lim);
31};
32
33static ssize_t
34queue_var_show(unsigned long var, char *page)
35{
36 return sysfs_emit(buf: page, fmt: "%lu\n", var);
37}
38
39static ssize_t
40queue_var_store(unsigned long *var, const char *page, size_t count)
41{
42 int err;
43 unsigned long v;
44
45 err = kstrtoul(s: page, base: 10, res: &v);
46 if (err || v > UINT_MAX)
47 return -EINVAL;
48
49 *var = v;
50
51 return count;
52}
53
54static ssize_t queue_requests_show(struct gendisk *disk, char *page)
55{
56 ssize_t ret;
57
58 mutex_lock(lock: &disk->queue->elevator_lock);
59 ret = queue_var_show(var: disk->queue->nr_requests, page);
60 mutex_unlock(lock: &disk->queue->elevator_lock);
61 return ret;
62}
63
64static ssize_t
65queue_requests_store(struct gendisk *disk, const char *page, size_t count)
66{
67 struct request_queue *q = disk->queue;
68 struct blk_mq_tag_set *set = q->tag_set;
69 struct elevator_tags *et = NULL;
70 unsigned int memflags;
71 unsigned long nr;
72 int ret;
73
74 ret = queue_var_store(var: &nr, page, count);
75 if (ret < 0)
76 return ret;
77
78 /*
79 * Serialize updating nr_requests with concurrent queue_requests_store()
80 * and switching elevator.
81 */
82 down_write(sem: &set->update_nr_hwq_lock);
83
84 if (nr == q->nr_requests)
85 goto unlock;
86
87 if (nr < BLKDEV_MIN_RQ)
88 nr = BLKDEV_MIN_RQ;
89
90 /*
91 * Switching elevator is protected by update_nr_hwq_lock:
92 * - read lock is held from elevator sysfs attribute;
93 * - write lock is held from updating nr_hw_queues;
94 * Hence it's safe to access q->elevator here with write lock held.
95 */
96 if (nr <= set->reserved_tags ||
97 (q->elevator && nr > MAX_SCHED_RQ) ||
98 (!q->elevator && nr > set->queue_depth)) {
99 ret = -EINVAL;
100 goto unlock;
101 }
102
103 if (!blk_mq_is_shared_tags(flags: set->flags) && q->elevator &&
104 nr > q->elevator->et->nr_requests) {
105 /*
106 * Tags will grow, allocate memory before freezing queue to
107 * prevent deadlock.
108 */
109 et = blk_mq_alloc_sched_tags(set, nr_hw_queues: q->nr_hw_queues, nr_requests: nr);
110 if (!et) {
111 ret = -ENOMEM;
112 goto unlock;
113 }
114 }
115
116 memflags = blk_mq_freeze_queue(q);
117 mutex_lock(lock: &q->elevator_lock);
118 et = blk_mq_update_nr_requests(q, tags: et, nr);
119 mutex_unlock(lock: &q->elevator_lock);
120 blk_mq_unfreeze_queue(q, memflags);
121
122 if (et)
123 blk_mq_free_sched_tags(et, set);
124
125unlock:
126 up_write(sem: &set->update_nr_hwq_lock);
127 return ret;
128}
129
130static ssize_t queue_ra_show(struct gendisk *disk, char *page)
131{
132 ssize_t ret;
133
134 mutex_lock(lock: &disk->queue->limits_lock);
135 ret = queue_var_show(var: disk->bdi->ra_pages << (PAGE_SHIFT - 10), page);
136 mutex_unlock(lock: &disk->queue->limits_lock);
137
138 return ret;
139}
140
141static ssize_t
142queue_ra_store(struct gendisk *disk, const char *page, size_t count)
143{
144 unsigned long ra_kb;
145 ssize_t ret;
146 unsigned int memflags;
147 struct request_queue *q = disk->queue;
148
149 ret = queue_var_store(var: &ra_kb, page, count);
150 if (ret < 0)
151 return ret;
152 /*
153 * ->ra_pages is protected by ->limits_lock because it is usually
154 * calculated from the queue limits by queue_limits_commit_update.
155 */
156 mutex_lock(lock: &q->limits_lock);
157 memflags = blk_mq_freeze_queue(q);
158 disk->bdi->ra_pages = ra_kb >> (PAGE_SHIFT - 10);
159 mutex_unlock(lock: &q->limits_lock);
160 blk_mq_unfreeze_queue(q, memflags);
161
162 return ret;
163}
164
165#define QUEUE_SYSFS_LIMIT_SHOW(_field) \
166static ssize_t queue_##_field##_show(struct gendisk *disk, char *page) \
167{ \
168 return queue_var_show(disk->queue->limits._field, page); \
169}
170
171QUEUE_SYSFS_LIMIT_SHOW(max_segments)
172QUEUE_SYSFS_LIMIT_SHOW(max_discard_segments)
173QUEUE_SYSFS_LIMIT_SHOW(max_integrity_segments)
174QUEUE_SYSFS_LIMIT_SHOW(max_segment_size)
175QUEUE_SYSFS_LIMIT_SHOW(max_write_streams)
176QUEUE_SYSFS_LIMIT_SHOW(write_stream_granularity)
177QUEUE_SYSFS_LIMIT_SHOW(logical_block_size)
178QUEUE_SYSFS_LIMIT_SHOW(physical_block_size)
179QUEUE_SYSFS_LIMIT_SHOW(chunk_sectors)
180QUEUE_SYSFS_LIMIT_SHOW(io_min)
181QUEUE_SYSFS_LIMIT_SHOW(io_opt)
182QUEUE_SYSFS_LIMIT_SHOW(discard_granularity)
183QUEUE_SYSFS_LIMIT_SHOW(zone_write_granularity)
184QUEUE_SYSFS_LIMIT_SHOW(virt_boundary_mask)
185QUEUE_SYSFS_LIMIT_SHOW(dma_alignment)
186QUEUE_SYSFS_LIMIT_SHOW(max_open_zones)
187QUEUE_SYSFS_LIMIT_SHOW(max_active_zones)
188QUEUE_SYSFS_LIMIT_SHOW(atomic_write_unit_min)
189QUEUE_SYSFS_LIMIT_SHOW(atomic_write_unit_max)
190
191#define QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(_field) \
192static ssize_t queue_##_field##_show(struct gendisk *disk, char *page) \
193{ \
194 return sysfs_emit(page, "%llu\n", \
195 (unsigned long long)disk->queue->limits._field << \
196 SECTOR_SHIFT); \
197}
198
199QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_discard_sectors)
200QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_hw_discard_sectors)
201QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_write_zeroes_sectors)
202QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_hw_wzeroes_unmap_sectors)
203QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_wzeroes_unmap_sectors)
204QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(atomic_write_max_sectors)
205QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(atomic_write_boundary_sectors)
206QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_zone_append_sectors)
207
208#define QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_KB(_field) \
209static ssize_t queue_##_field##_show(struct gendisk *disk, char *page) \
210{ \
211 return queue_var_show(disk->queue->limits._field >> 1, page); \
212}
213
214QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_KB(max_sectors)
215QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_KB(max_hw_sectors)
216
217#define QUEUE_SYSFS_SHOW_CONST(_name, _val) \
218static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \
219{ \
220 return sysfs_emit(page, "%d\n", _val); \
221}
222
223/* deprecated fields */
224QUEUE_SYSFS_SHOW_CONST(discard_zeroes_data, 0)
225QUEUE_SYSFS_SHOW_CONST(write_same_max, 0)
226QUEUE_SYSFS_SHOW_CONST(poll_delay, -1)
227
228static int queue_max_discard_sectors_store(struct gendisk *disk,
229 const char *page, size_t count, struct queue_limits *lim)
230{
231 unsigned long max_discard_bytes;
232 ssize_t ret;
233
234 ret = queue_var_store(var: &max_discard_bytes, page, count);
235 if (ret < 0)
236 return ret;
237
238 if (max_discard_bytes & (disk->queue->limits.discard_granularity - 1))
239 return -EINVAL;
240
241 if ((max_discard_bytes >> SECTOR_SHIFT) > UINT_MAX)
242 return -EINVAL;
243
244 lim->max_user_discard_sectors = max_discard_bytes >> SECTOR_SHIFT;
245 return 0;
246}
247
248static int queue_max_wzeroes_unmap_sectors_store(struct gendisk *disk,
249 const char *page, size_t count, struct queue_limits *lim)
250{
251 unsigned long max_zeroes_bytes, max_hw_zeroes_bytes;
252 ssize_t ret;
253
254 ret = queue_var_store(var: &max_zeroes_bytes, page, count);
255 if (ret < 0)
256 return ret;
257
258 max_hw_zeroes_bytes = lim->max_hw_wzeroes_unmap_sectors << SECTOR_SHIFT;
259 if (max_zeroes_bytes != 0 && max_zeroes_bytes != max_hw_zeroes_bytes)
260 return -EINVAL;
261
262 lim->max_user_wzeroes_unmap_sectors = max_zeroes_bytes >> SECTOR_SHIFT;
263 return 0;
264}
265
266static int
267queue_max_sectors_store(struct gendisk *disk, const char *page, size_t count,
268 struct queue_limits *lim)
269{
270 unsigned long max_sectors_kb;
271 ssize_t ret;
272
273 ret = queue_var_store(var: &max_sectors_kb, page, count);
274 if (ret < 0)
275 return ret;
276
277 lim->max_user_sectors = max_sectors_kb << 1;
278 return 0;
279}
280
281static ssize_t queue_feature_store(struct gendisk *disk, const char *page,
282 size_t count, struct queue_limits *lim, blk_features_t feature)
283{
284 unsigned long val;
285 ssize_t ret;
286
287 ret = queue_var_store(var: &val, page, count);
288 if (ret < 0)
289 return ret;
290
291 if (val)
292 lim->features |= feature;
293 else
294 lim->features &= ~feature;
295 return 0;
296}
297
298#define QUEUE_SYSFS_FEATURE(_name, _feature) \
299static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \
300{ \
301 return sysfs_emit(page, "%u\n", \
302 !!(disk->queue->limits.features & _feature)); \
303} \
304static int queue_##_name##_store(struct gendisk *disk, \
305 const char *page, size_t count, struct queue_limits *lim) \
306{ \
307 return queue_feature_store(disk, page, count, lim, _feature); \
308}
309
310QUEUE_SYSFS_FEATURE(rotational, BLK_FEAT_ROTATIONAL)
311QUEUE_SYSFS_FEATURE(add_random, BLK_FEAT_ADD_RANDOM)
312QUEUE_SYSFS_FEATURE(iostats, BLK_FEAT_IO_STAT)
313QUEUE_SYSFS_FEATURE(stable_writes, BLK_FEAT_STABLE_WRITES);
314
315#define QUEUE_SYSFS_FEATURE_SHOW(_name, _feature) \
316static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \
317{ \
318 return sysfs_emit(page, "%u\n", \
319 !!(disk->queue->limits.features & _feature)); \
320}
321
322QUEUE_SYSFS_FEATURE_SHOW(fua, BLK_FEAT_FUA);
323QUEUE_SYSFS_FEATURE_SHOW(dax, BLK_FEAT_DAX);
324
325static ssize_t queue_poll_show(struct gendisk *disk, char *page)
326{
327 if (queue_is_mq(q: disk->queue))
328 return sysfs_emit(buf: page, fmt: "%u\n", blk_mq_can_poll(q: disk->queue));
329
330 return sysfs_emit(buf: page, fmt: "%u\n",
331 !!(disk->queue->limits.features & BLK_FEAT_POLL));
332}
333
334static ssize_t queue_zoned_show(struct gendisk *disk, char *page)
335{
336 if (blk_queue_is_zoned(q: disk->queue))
337 return sysfs_emit(buf: page, fmt: "host-managed\n");
338 return sysfs_emit(buf: page, fmt: "none\n");
339}
340
341static ssize_t queue_nr_zones_show(struct gendisk *disk, char *page)
342{
343 return queue_var_show(var: disk_nr_zones(disk), page);
344}
345
346static ssize_t queue_iostats_passthrough_show(struct gendisk *disk, char *page)
347{
348 return queue_var_show(var: !!blk_queue_passthrough_stat(disk->queue), page);
349}
350
351static int queue_iostats_passthrough_store(struct gendisk *disk,
352 const char *page, size_t count, struct queue_limits *lim)
353{
354 unsigned long ios;
355 ssize_t ret;
356
357 ret = queue_var_store(var: &ios, page, count);
358 if (ret < 0)
359 return ret;
360
361 if (ios)
362 lim->flags |= BLK_FLAG_IOSTATS_PASSTHROUGH;
363 else
364 lim->flags &= ~BLK_FLAG_IOSTATS_PASSTHROUGH;
365 return 0;
366}
367
368static ssize_t queue_nomerges_show(struct gendisk *disk, char *page)
369{
370 return queue_var_show(var: (blk_queue_nomerges(disk->queue) << 1) |
371 blk_queue_noxmerges(disk->queue), page);
372}
373
374static ssize_t queue_nomerges_store(struct gendisk *disk, const char *page,
375 size_t count)
376{
377 unsigned long nm;
378 unsigned int memflags;
379 struct request_queue *q = disk->queue;
380 ssize_t ret = queue_var_store(var: &nm, page, count);
381
382 if (ret < 0)
383 return ret;
384
385 memflags = blk_mq_freeze_queue(q);
386 blk_queue_flag_clear(flag: QUEUE_FLAG_NOMERGES, q);
387 blk_queue_flag_clear(flag: QUEUE_FLAG_NOXMERGES, q);
388 if (nm == 2)
389 blk_queue_flag_set(flag: QUEUE_FLAG_NOMERGES, q);
390 else if (nm)
391 blk_queue_flag_set(flag: QUEUE_FLAG_NOXMERGES, q);
392 blk_mq_unfreeze_queue(q, memflags);
393
394 return ret;
395}
396
397static ssize_t queue_rq_affinity_show(struct gendisk *disk, char *page)
398{
399 bool set = test_bit(QUEUE_FLAG_SAME_COMP, &disk->queue->queue_flags);
400 bool force = test_bit(QUEUE_FLAG_SAME_FORCE, &disk->queue->queue_flags);
401
402 return queue_var_show(var: set << force, page);
403}
404
405static ssize_t
406queue_rq_affinity_store(struct gendisk *disk, const char *page, size_t count)
407{
408 ssize_t ret = -EINVAL;
409#ifdef CONFIG_SMP
410 struct request_queue *q = disk->queue;
411 unsigned long val;
412 unsigned int memflags;
413
414 ret = queue_var_store(var: &val, page, count);
415 if (ret < 0)
416 return ret;
417
418 /*
419 * Here we update two queue flags each using atomic bitops, although
420 * updating two flags isn't atomic it should be harmless as those flags
421 * are accessed individually using atomic test_bit operation. So we
422 * don't grab any lock while updating these flags.
423 */
424 memflags = blk_mq_freeze_queue(q);
425 if (val == 2) {
426 blk_queue_flag_set(flag: QUEUE_FLAG_SAME_COMP, q);
427 blk_queue_flag_set(flag: QUEUE_FLAG_SAME_FORCE, q);
428 } else if (val == 1) {
429 blk_queue_flag_set(flag: QUEUE_FLAG_SAME_COMP, q);
430 blk_queue_flag_clear(flag: QUEUE_FLAG_SAME_FORCE, q);
431 } else if (val == 0) {
432 blk_queue_flag_clear(flag: QUEUE_FLAG_SAME_COMP, q);
433 blk_queue_flag_clear(flag: QUEUE_FLAG_SAME_FORCE, q);
434 }
435 blk_mq_unfreeze_queue(q, memflags);
436#endif
437 return ret;
438}
439
440static ssize_t queue_poll_delay_store(struct gendisk *disk, const char *page,
441 size_t count)
442{
443 return count;
444}
445
446static ssize_t queue_poll_store(struct gendisk *disk, const char *page,
447 size_t count)
448{
449 unsigned int memflags;
450 ssize_t ret = count;
451 struct request_queue *q = disk->queue;
452
453 memflags = blk_mq_freeze_queue(q);
454 if (!(q->limits.features & BLK_FEAT_POLL)) {
455 ret = -EINVAL;
456 goto out;
457 }
458
459 pr_info_ratelimited("writes to the poll attribute are ignored.\n");
460 pr_info_ratelimited("please use driver specific parameters instead.\n");
461out:
462 blk_mq_unfreeze_queue(q, memflags);
463 return ret;
464}
465
466static ssize_t queue_io_timeout_show(struct gendisk *disk, char *page)
467{
468 return sysfs_emit(buf: page, fmt: "%u\n",
469 jiffies_to_msecs(READ_ONCE(disk->queue->rq_timeout)));
470}
471
472static ssize_t queue_io_timeout_store(struct gendisk *disk, const char *page,
473 size_t count)
474{
475 unsigned int val, memflags;
476 int err;
477 struct request_queue *q = disk->queue;
478
479 err = kstrtou32(s: page, base: 10, res: &val);
480 if (err || val == 0)
481 return -EINVAL;
482
483 memflags = blk_mq_freeze_queue(q);
484 blk_queue_rq_timeout(q, msecs_to_jiffies(m: val));
485 blk_mq_unfreeze_queue(q, memflags);
486
487 return count;
488}
489
490static ssize_t queue_wc_show(struct gendisk *disk, char *page)
491{
492 if (blk_queue_write_cache(q: disk->queue))
493 return sysfs_emit(buf: page, fmt: "write back\n");
494 return sysfs_emit(buf: page, fmt: "write through\n");
495}
496
497static int queue_wc_store(struct gendisk *disk, const char *page,
498 size_t count, struct queue_limits *lim)
499{
500 bool disable;
501
502 if (!strncmp(page, "write back", 10)) {
503 disable = false;
504 } else if (!strncmp(page, "write through", 13) ||
505 !strncmp(page, "none", 4)) {
506 disable = true;
507 } else {
508 return -EINVAL;
509 }
510
511 if (disable)
512 lim->flags |= BLK_FLAG_WRITE_CACHE_DISABLED;
513 else
514 lim->flags &= ~BLK_FLAG_WRITE_CACHE_DISABLED;
515 return 0;
516}
517
518#define QUEUE_RO_ENTRY(_prefix, _name) \
519static struct queue_sysfs_entry _prefix##_entry = { \
520 .attr = { .name = _name, .mode = 0444 }, \
521 .show = _prefix##_show, \
522};
523
524#define QUEUE_RW_ENTRY(_prefix, _name) \
525static struct queue_sysfs_entry _prefix##_entry = { \
526 .attr = { .name = _name, .mode = 0644 }, \
527 .show = _prefix##_show, \
528 .store = _prefix##_store, \
529};
530
531#define QUEUE_LIM_RO_ENTRY(_prefix, _name) \
532static struct queue_sysfs_entry _prefix##_entry = { \
533 .attr = { .name = _name, .mode = 0444 }, \
534 .show_limit = _prefix##_show, \
535}
536
537#define QUEUE_LIM_RW_ENTRY(_prefix, _name) \
538static struct queue_sysfs_entry _prefix##_entry = { \
539 .attr = { .name = _name, .mode = 0644 }, \
540 .show_limit = _prefix##_show, \
541 .store_limit = _prefix##_store, \
542}
543
544QUEUE_RW_ENTRY(queue_requests, "nr_requests");
545QUEUE_RW_ENTRY(queue_ra, "read_ahead_kb");
546QUEUE_LIM_RW_ENTRY(queue_max_sectors, "max_sectors_kb");
547QUEUE_LIM_RO_ENTRY(queue_max_hw_sectors, "max_hw_sectors_kb");
548QUEUE_LIM_RO_ENTRY(queue_max_segments, "max_segments");
549QUEUE_LIM_RO_ENTRY(queue_max_integrity_segments, "max_integrity_segments");
550QUEUE_LIM_RO_ENTRY(queue_max_segment_size, "max_segment_size");
551QUEUE_LIM_RO_ENTRY(queue_max_write_streams, "max_write_streams");
552QUEUE_LIM_RO_ENTRY(queue_write_stream_granularity, "write_stream_granularity");
553QUEUE_RW_ENTRY(elv_iosched, "scheduler");
554
555QUEUE_LIM_RO_ENTRY(queue_logical_block_size, "logical_block_size");
556QUEUE_LIM_RO_ENTRY(queue_physical_block_size, "physical_block_size");
557QUEUE_LIM_RO_ENTRY(queue_chunk_sectors, "chunk_sectors");
558QUEUE_LIM_RO_ENTRY(queue_io_min, "minimum_io_size");
559QUEUE_LIM_RO_ENTRY(queue_io_opt, "optimal_io_size");
560
561QUEUE_LIM_RO_ENTRY(queue_max_discard_segments, "max_discard_segments");
562QUEUE_LIM_RO_ENTRY(queue_discard_granularity, "discard_granularity");
563QUEUE_LIM_RO_ENTRY(queue_max_hw_discard_sectors, "discard_max_hw_bytes");
564QUEUE_LIM_RW_ENTRY(queue_max_discard_sectors, "discard_max_bytes");
565QUEUE_RO_ENTRY(queue_discard_zeroes_data, "discard_zeroes_data");
566
567QUEUE_LIM_RO_ENTRY(queue_atomic_write_max_sectors, "atomic_write_max_bytes");
568QUEUE_LIM_RO_ENTRY(queue_atomic_write_boundary_sectors,
569 "atomic_write_boundary_bytes");
570QUEUE_LIM_RO_ENTRY(queue_atomic_write_unit_max, "atomic_write_unit_max_bytes");
571QUEUE_LIM_RO_ENTRY(queue_atomic_write_unit_min, "atomic_write_unit_min_bytes");
572
573QUEUE_RO_ENTRY(queue_write_same_max, "write_same_max_bytes");
574QUEUE_LIM_RO_ENTRY(queue_max_write_zeroes_sectors, "write_zeroes_max_bytes");
575QUEUE_LIM_RO_ENTRY(queue_max_hw_wzeroes_unmap_sectors,
576 "write_zeroes_unmap_max_hw_bytes");
577QUEUE_LIM_RW_ENTRY(queue_max_wzeroes_unmap_sectors,
578 "write_zeroes_unmap_max_bytes");
579QUEUE_LIM_RO_ENTRY(queue_max_zone_append_sectors, "zone_append_max_bytes");
580QUEUE_LIM_RO_ENTRY(queue_zone_write_granularity, "zone_write_granularity");
581
582QUEUE_LIM_RO_ENTRY(queue_zoned, "zoned");
583QUEUE_RO_ENTRY(queue_nr_zones, "nr_zones");
584QUEUE_LIM_RO_ENTRY(queue_max_open_zones, "max_open_zones");
585QUEUE_LIM_RO_ENTRY(queue_max_active_zones, "max_active_zones");
586
587QUEUE_RW_ENTRY(queue_nomerges, "nomerges");
588QUEUE_LIM_RW_ENTRY(queue_iostats_passthrough, "iostats_passthrough");
589QUEUE_RW_ENTRY(queue_rq_affinity, "rq_affinity");
590QUEUE_RW_ENTRY(queue_poll, "io_poll");
591QUEUE_RW_ENTRY(queue_poll_delay, "io_poll_delay");
592QUEUE_LIM_RW_ENTRY(queue_wc, "write_cache");
593QUEUE_LIM_RO_ENTRY(queue_fua, "fua");
594QUEUE_LIM_RO_ENTRY(queue_dax, "dax");
595QUEUE_RW_ENTRY(queue_io_timeout, "io_timeout");
596QUEUE_LIM_RO_ENTRY(queue_virt_boundary_mask, "virt_boundary_mask");
597QUEUE_LIM_RO_ENTRY(queue_dma_alignment, "dma_alignment");
598
599/* legacy alias for logical_block_size: */
600static struct queue_sysfs_entry queue_hw_sector_size_entry = {
601 .attr = {.name = "hw_sector_size", .mode = 0444 },
602 .show_limit = queue_logical_block_size_show,
603};
604
605QUEUE_LIM_RW_ENTRY(queue_rotational, "rotational");
606QUEUE_LIM_RW_ENTRY(queue_iostats, "iostats");
607QUEUE_LIM_RW_ENTRY(queue_add_random, "add_random");
608QUEUE_LIM_RW_ENTRY(queue_stable_writes, "stable_writes");
609
610#ifdef CONFIG_BLK_WBT
611static ssize_t queue_var_store64(s64 *var, const char *page)
612{
613 int err;
614 s64 v;
615
616 err = kstrtos64(page, 10, &v);
617 if (err < 0)
618 return err;
619
620 *var = v;
621 return 0;
622}
623
624static ssize_t queue_wb_lat_show(struct gendisk *disk, char *page)
625{
626 ssize_t ret;
627 struct request_queue *q = disk->queue;
628
629 mutex_lock(&disk->rqos_state_mutex);
630 if (!wbt_rq_qos(q)) {
631 ret = -EINVAL;
632 goto out;
633 }
634
635 if (wbt_disabled(q)) {
636 ret = sysfs_emit(page, "0\n");
637 goto out;
638 }
639
640 ret = sysfs_emit(page, "%llu\n", div_u64(wbt_get_min_lat(q), 1000));
641out:
642 mutex_unlock(&disk->rqos_state_mutex);
643 return ret;
644}
645
646static ssize_t queue_wb_lat_store(struct gendisk *disk, const char *page,
647 size_t count)
648{
649 struct request_queue *q = disk->queue;
650 struct rq_qos *rqos;
651 ssize_t ret;
652 s64 val;
653 unsigned int memflags;
654
655 ret = queue_var_store64(&val, page);
656 if (ret < 0)
657 return ret;
658 if (val < -1)
659 return -EINVAL;
660
661 /*
662 * Ensure that the queue is idled, in case the latency update
663 * ends up either enabling or disabling wbt completely. We can't
664 * have IO inflight if that happens.
665 */
666 memflags = blk_mq_freeze_queue(q);
667
668 rqos = wbt_rq_qos(q);
669 if (!rqos) {
670 ret = wbt_init(disk);
671 if (ret)
672 goto out;
673 }
674
675 ret = count;
676 if (val == -1)
677 val = wbt_default_latency_nsec(q);
678 else if (val >= 0)
679 val *= 1000ULL;
680
681 if (wbt_get_min_lat(q) == val)
682 goto out;
683
684 blk_mq_quiesce_queue(q);
685
686 mutex_lock(&disk->rqos_state_mutex);
687 wbt_set_min_lat(q, val);
688 mutex_unlock(&disk->rqos_state_mutex);
689
690 blk_mq_unquiesce_queue(q);
691out:
692 blk_mq_unfreeze_queue(q, memflags);
693
694 return ret;
695}
696
697QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec");
698#endif
699
700/* Common attributes for bio-based and request-based queues. */
701static struct attribute *queue_attrs[] = {
702 /*
703 * Attributes which are protected with q->limits_lock.
704 */
705 &queue_max_hw_sectors_entry.attr,
706 &queue_max_sectors_entry.attr,
707 &queue_max_segments_entry.attr,
708 &queue_max_discard_segments_entry.attr,
709 &queue_max_integrity_segments_entry.attr,
710 &queue_max_segment_size_entry.attr,
711 &queue_max_write_streams_entry.attr,
712 &queue_write_stream_granularity_entry.attr,
713 &queue_hw_sector_size_entry.attr,
714 &queue_logical_block_size_entry.attr,
715 &queue_physical_block_size_entry.attr,
716 &queue_chunk_sectors_entry.attr,
717 &queue_io_min_entry.attr,
718 &queue_io_opt_entry.attr,
719 &queue_discard_granularity_entry.attr,
720 &queue_max_discard_sectors_entry.attr,
721 &queue_max_hw_discard_sectors_entry.attr,
722 &queue_atomic_write_max_sectors_entry.attr,
723 &queue_atomic_write_boundary_sectors_entry.attr,
724 &queue_atomic_write_unit_min_entry.attr,
725 &queue_atomic_write_unit_max_entry.attr,
726 &queue_max_write_zeroes_sectors_entry.attr,
727 &queue_max_hw_wzeroes_unmap_sectors_entry.attr,
728 &queue_max_wzeroes_unmap_sectors_entry.attr,
729 &queue_max_zone_append_sectors_entry.attr,
730 &queue_zone_write_granularity_entry.attr,
731 &queue_rotational_entry.attr,
732 &queue_zoned_entry.attr,
733 &queue_max_open_zones_entry.attr,
734 &queue_max_active_zones_entry.attr,
735 &queue_iostats_passthrough_entry.attr,
736 &queue_iostats_entry.attr,
737 &queue_stable_writes_entry.attr,
738 &queue_add_random_entry.attr,
739 &queue_wc_entry.attr,
740 &queue_fua_entry.attr,
741 &queue_dax_entry.attr,
742 &queue_virt_boundary_mask_entry.attr,
743 &queue_dma_alignment_entry.attr,
744 &queue_ra_entry.attr,
745
746 /*
747 * Attributes which don't require locking.
748 */
749 &queue_discard_zeroes_data_entry.attr,
750 &queue_write_same_max_entry.attr,
751 &queue_nr_zones_entry.attr,
752 &queue_nomerges_entry.attr,
753 &queue_poll_entry.attr,
754 &queue_poll_delay_entry.attr,
755
756 NULL,
757};
758
759/* Request-based queue attributes that are not relevant for bio-based queues. */
760static struct attribute *blk_mq_queue_attrs[] = {
761 /*
762 * Attributes which require some form of locking other than
763 * q->sysfs_lock.
764 */
765 &elv_iosched_entry.attr,
766 &queue_requests_entry.attr,
767#ifdef CONFIG_BLK_WBT
768 &queue_wb_lat_entry.attr,
769#endif
770 /*
771 * Attributes which don't require locking.
772 */
773 &queue_rq_affinity_entry.attr,
774 &queue_io_timeout_entry.attr,
775
776 NULL,
777};
778
779static umode_t queue_attr_visible(struct kobject *kobj, struct attribute *attr,
780 int n)
781{
782 struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj);
783 struct request_queue *q = disk->queue;
784
785 if ((attr == &queue_max_open_zones_entry.attr ||
786 attr == &queue_max_active_zones_entry.attr) &&
787 !blk_queue_is_zoned(q))
788 return 0;
789
790 return attr->mode;
791}
792
793static umode_t blk_mq_queue_attr_visible(struct kobject *kobj,
794 struct attribute *attr, int n)
795{
796 struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj);
797 struct request_queue *q = disk->queue;
798
799 if (!queue_is_mq(q))
800 return 0;
801
802 if (attr == &queue_io_timeout_entry.attr && !q->mq_ops->timeout)
803 return 0;
804
805 return attr->mode;
806}
807
808static struct attribute_group queue_attr_group = {
809 .attrs = queue_attrs,
810 .is_visible = queue_attr_visible,
811};
812
813static struct attribute_group blk_mq_queue_attr_group = {
814 .attrs = blk_mq_queue_attrs,
815 .is_visible = blk_mq_queue_attr_visible,
816};
817
818#define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr)
819
820static ssize_t
821queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
822{
823 struct queue_sysfs_entry *entry = to_queue(attr);
824 struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj);
825
826 if (!entry->show && !entry->show_limit)
827 return -EIO;
828
829 if (entry->show_limit) {
830 ssize_t res;
831
832 mutex_lock(lock: &disk->queue->limits_lock);
833 res = entry->show_limit(disk, page);
834 mutex_unlock(lock: &disk->queue->limits_lock);
835 return res;
836 }
837
838 return entry->show(disk, page);
839}
840
841static ssize_t
842queue_attr_store(struct kobject *kobj, struct attribute *attr,
843 const char *page, size_t length)
844{
845 struct queue_sysfs_entry *entry = to_queue(attr);
846 struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj);
847 struct request_queue *q = disk->queue;
848
849 if (!entry->store_limit && !entry->store)
850 return -EIO;
851
852 if (entry->store_limit) {
853 ssize_t res;
854
855 struct queue_limits lim = queue_limits_start_update(q);
856
857 res = entry->store_limit(disk, page, length, &lim);
858 if (res < 0) {
859 queue_limits_cancel_update(q);
860 return res;
861 }
862
863 res = queue_limits_commit_update_frozen(q, lim: &lim);
864 if (res)
865 return res;
866 return length;
867 }
868
869 return entry->store(disk, page, length);
870}
871
872static const struct sysfs_ops queue_sysfs_ops = {
873 .show = queue_attr_show,
874 .store = queue_attr_store,
875};
876
877static const struct attribute_group *blk_queue_attr_groups[] = {
878 &queue_attr_group,
879 &blk_mq_queue_attr_group,
880 NULL
881};
882
883static void blk_queue_release(struct kobject *kobj)
884{
885 /* nothing to do here, all data is associated with the parent gendisk */
886}
887
888const struct kobj_type blk_queue_ktype = {
889 .default_groups = blk_queue_attr_groups,
890 .sysfs_ops = &queue_sysfs_ops,
891 .release = blk_queue_release,
892};
893
894static void blk_debugfs_remove(struct gendisk *disk)
895{
896 struct request_queue *q = disk->queue;
897
898 mutex_lock(lock: &q->debugfs_mutex);
899 blk_trace_shutdown(q);
900 debugfs_remove_recursive(dentry: q->debugfs_dir);
901 q->debugfs_dir = NULL;
902 q->sched_debugfs_dir = NULL;
903 q->rqos_debugfs_dir = NULL;
904 mutex_unlock(lock: &q->debugfs_mutex);
905}
906
907/**
908 * blk_register_queue - register a block layer queue with sysfs
909 * @disk: Disk of which the request queue should be registered with sysfs.
910 */
911int blk_register_queue(struct gendisk *disk)
912{
913 struct request_queue *q = disk->queue;
914 int ret;
915
916 ret = kobject_add(kobj: &disk->queue_kobj, parent: &disk_to_dev(disk)->kobj, fmt: "queue");
917 if (ret < 0)
918 return ret;
919
920 if (queue_is_mq(q)) {
921 ret = blk_mq_sysfs_register(disk);
922 if (ret)
923 goto out_del_queue_kobj;
924 }
925 mutex_lock(lock: &q->sysfs_lock);
926
927 mutex_lock(lock: &q->debugfs_mutex);
928 q->debugfs_dir = debugfs_create_dir(name: disk->disk_name, parent: blk_debugfs_root);
929 if (queue_is_mq(q))
930 blk_mq_debugfs_register(q);
931 mutex_unlock(lock: &q->debugfs_mutex);
932
933 ret = disk_register_independent_access_ranges(disk);
934 if (ret)
935 goto out_debugfs_remove;
936
937 ret = blk_crypto_sysfs_register(disk);
938 if (ret)
939 goto out_unregister_ia_ranges;
940
941 if (queue_is_mq(q))
942 elevator_set_default(q);
943
944 blk_queue_flag_set(flag: QUEUE_FLAG_REGISTERED, q);
945 wbt_enable_default(disk);
946
947 /* Now everything is ready and send out KOBJ_ADD uevent */
948 kobject_uevent(kobj: &disk->queue_kobj, action: KOBJ_ADD);
949 if (q->elevator)
950 kobject_uevent(kobj: &q->elevator->kobj, action: KOBJ_ADD);
951 mutex_unlock(lock: &q->sysfs_lock);
952
953 /*
954 * SCSI probing may synchronously create and destroy a lot of
955 * request_queues for non-existent devices. Shutting down a fully
956 * functional queue takes measureable wallclock time as RCU grace
957 * periods are involved. To avoid excessive latency in these
958 * cases, a request_queue starts out in a degraded mode which is
959 * faster to shut down and is made fully functional here as
960 * request_queues for non-existent devices never get registered.
961 */
962 blk_queue_flag_set(flag: QUEUE_FLAG_INIT_DONE, q);
963 percpu_ref_switch_to_percpu(ref: &q->q_usage_counter);
964
965 return ret;
966
967out_unregister_ia_ranges:
968 disk_unregister_independent_access_ranges(disk);
969out_debugfs_remove:
970 blk_debugfs_remove(disk);
971 mutex_unlock(lock: &q->sysfs_lock);
972 if (queue_is_mq(q))
973 blk_mq_sysfs_unregister(disk);
974out_del_queue_kobj:
975 kobject_del(kobj: &disk->queue_kobj);
976 return ret;
977}
978
979/**
980 * blk_unregister_queue - counterpart of blk_register_queue()
981 * @disk: Disk of which the request queue should be unregistered from sysfs.
982 *
983 * Note: the caller is responsible for guaranteeing that this function is called
984 * after blk_register_queue() has finished.
985 */
986void blk_unregister_queue(struct gendisk *disk)
987{
988 struct request_queue *q = disk->queue;
989
990 if (WARN_ON(!q))
991 return;
992
993 /* Return early if disk->queue was never registered. */
994 if (!blk_queue_registered(q))
995 return;
996
997 /*
998 * Since sysfs_remove_dir() prevents adding new directory entries
999 * before removal of existing entries starts, protect against
1000 * concurrent elv_iosched_store() calls.
1001 */
1002 mutex_lock(lock: &q->sysfs_lock);
1003 blk_queue_flag_clear(flag: QUEUE_FLAG_REGISTERED, q);
1004 mutex_unlock(lock: &q->sysfs_lock);
1005
1006 /*
1007 * Remove the sysfs attributes before unregistering the queue data
1008 * structures that can be modified through sysfs.
1009 */
1010 if (queue_is_mq(q))
1011 blk_mq_sysfs_unregister(disk);
1012 blk_crypto_sysfs_unregister(disk);
1013
1014 mutex_lock(lock: &q->sysfs_lock);
1015 disk_unregister_independent_access_ranges(disk);
1016 mutex_unlock(lock: &q->sysfs_lock);
1017
1018 /* Now that we've deleted all child objects, we can delete the queue. */
1019 kobject_uevent(kobj: &disk->queue_kobj, action: KOBJ_REMOVE);
1020 kobject_del(kobj: &disk->queue_kobj);
1021
1022 if (queue_is_mq(q))
1023 elevator_set_none(q);
1024
1025 blk_debugfs_remove(disk);
1026}
1027