| 1 | /* SPDX-License-Identifier: GPL-2.0 */ | 
|---|
| 2 | #ifndef BLK_MQ_H | 
|---|
| 3 | #define BLK_MQ_H | 
|---|
| 4 |  | 
|---|
| 5 | #include <linux/blkdev.h> | 
|---|
| 6 | #include <linux/sbitmap.h> | 
|---|
| 7 | #include <linux/lockdep.h> | 
|---|
| 8 | #include <linux/scatterlist.h> | 
|---|
| 9 | #include <linux/prefetch.h> | 
|---|
| 10 | #include <linux/srcu.h> | 
|---|
| 11 | #include <linux/rw_hint.h> | 
|---|
| 12 | #include <linux/rwsem.h> | 
|---|
| 13 |  | 
|---|
| 14 | struct blk_mq_tags; | 
|---|
| 15 | struct blk_flush_queue; | 
|---|
| 16 |  | 
|---|
| 17 | #define BLKDEV_MIN_RQ	4 | 
|---|
| 18 | #define BLKDEV_DEFAULT_RQ	128 | 
|---|
| 19 |  | 
|---|
| 20 | enum rq_end_io_ret { | 
|---|
| 21 | RQ_END_IO_NONE, | 
|---|
| 22 | RQ_END_IO_FREE, | 
|---|
| 23 | }; | 
|---|
| 24 |  | 
|---|
| 25 | typedef enum rq_end_io_ret (rq_end_io_fn)(struct request *, blk_status_t); | 
|---|
| 26 |  | 
|---|
| 27 | /* | 
|---|
| 28 | * request flags */ | 
|---|
| 29 | typedef __u32 __bitwise req_flags_t; | 
|---|
| 30 |  | 
|---|
| 31 | /* Keep rqf_name[] in sync with the definitions below */ | 
|---|
| 32 | enum rqf_flags { | 
|---|
| 33 | /* drive already may have started this one */ | 
|---|
| 34 | __RQF_STARTED, | 
|---|
| 35 | /* request for flush sequence */ | 
|---|
| 36 | __RQF_FLUSH_SEQ, | 
|---|
| 37 | /* merge of different types, fail separately */ | 
|---|
| 38 | __RQF_MIXED_MERGE, | 
|---|
| 39 | /* don't call prep for this one */ | 
|---|
| 40 | __RQF_DONTPREP, | 
|---|
| 41 | /* use hctx->sched_tags */ | 
|---|
| 42 | __RQF_SCHED_TAGS, | 
|---|
| 43 | /* use an I/O scheduler for this request */ | 
|---|
| 44 | __RQF_USE_SCHED, | 
|---|
| 45 | /* vaguely specified driver internal error.  Ignored by block layer */ | 
|---|
| 46 | __RQF_FAILED, | 
|---|
| 47 | /* don't warn about errors */ | 
|---|
| 48 | __RQF_QUIET, | 
|---|
| 49 | /* account into disk and partition IO statistics */ | 
|---|
| 50 | __RQF_IO_STAT, | 
|---|
| 51 | /* runtime pm request */ | 
|---|
| 52 | __RQF_PM, | 
|---|
| 53 | /* on IO scheduler merge hash */ | 
|---|
| 54 | __RQF_HASHED, | 
|---|
| 55 | /* track IO completion time */ | 
|---|
| 56 | __RQF_STATS, | 
|---|
| 57 | /* Look at ->special_vec for the actual data payload instead of the | 
|---|
| 58 | bio chain. */ | 
|---|
| 59 | __RQF_SPECIAL_PAYLOAD, | 
|---|
| 60 | /* request completion needs to be signaled to zone write plugging. */ | 
|---|
| 61 | __RQF_ZONE_WRITE_PLUGGING, | 
|---|
| 62 | /* ->timeout has been called, don't expire again */ | 
|---|
| 63 | __RQF_TIMED_OUT, | 
|---|
| 64 | __RQF_RESV, | 
|---|
| 65 | __RQF_BITS | 
|---|
| 66 | }; | 
|---|
| 67 |  | 
|---|
| 68 | #define RQF_STARTED		((__force req_flags_t)(1 << __RQF_STARTED)) | 
|---|
| 69 | #define RQF_FLUSH_SEQ		((__force req_flags_t)(1 << __RQF_FLUSH_SEQ)) | 
|---|
| 70 | #define RQF_MIXED_MERGE		((__force req_flags_t)(1 << __RQF_MIXED_MERGE)) | 
|---|
| 71 | #define RQF_DONTPREP		((__force req_flags_t)(1 << __RQF_DONTPREP)) | 
|---|
| 72 | #define RQF_SCHED_TAGS		((__force req_flags_t)(1 << __RQF_SCHED_TAGS)) | 
|---|
| 73 | #define RQF_USE_SCHED		((__force req_flags_t)(1 << __RQF_USE_SCHED)) | 
|---|
| 74 | #define RQF_FAILED		((__force req_flags_t)(1 << __RQF_FAILED)) | 
|---|
| 75 | #define RQF_QUIET		((__force req_flags_t)(1 << __RQF_QUIET)) | 
|---|
| 76 | #define RQF_IO_STAT		((__force req_flags_t)(1 << __RQF_IO_STAT)) | 
|---|
| 77 | #define RQF_PM			((__force req_flags_t)(1 << __RQF_PM)) | 
|---|
| 78 | #define RQF_HASHED		((__force req_flags_t)(1 << __RQF_HASHED)) | 
|---|
| 79 | #define RQF_STATS		((__force req_flags_t)(1 << __RQF_STATS)) | 
|---|
| 80 | #define RQF_SPECIAL_PAYLOAD	\ | 
|---|
| 81 | ((__force req_flags_t)(1 << __RQF_SPECIAL_PAYLOAD)) | 
|---|
| 82 | #define RQF_ZONE_WRITE_PLUGGING	\ | 
|---|
| 83 | ((__force req_flags_t)(1 << __RQF_ZONE_WRITE_PLUGGING)) | 
|---|
| 84 | #define RQF_TIMED_OUT		((__force req_flags_t)(1 << __RQF_TIMED_OUT)) | 
|---|
| 85 | #define RQF_RESV		((__force req_flags_t)(1 << __RQF_RESV)) | 
|---|
| 86 |  | 
|---|
| 87 | /* flags that prevent us from merging requests: */ | 
|---|
| 88 | #define RQF_NOMERGE_FLAGS \ | 
|---|
| 89 | (RQF_STARTED | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD) | 
|---|
| 90 |  | 
|---|
| 91 | enum mq_rq_state { | 
|---|
| 92 | MQ_RQ_IDLE		= 0, | 
|---|
| 93 | MQ_RQ_IN_FLIGHT		= 1, | 
|---|
| 94 | MQ_RQ_COMPLETE		= 2, | 
|---|
| 95 | }; | 
|---|
| 96 |  | 
|---|
| 97 | /* | 
|---|
| 98 | * Try to put the fields that are referenced together in the same cacheline. | 
|---|
| 99 | * | 
|---|
| 100 | * If you modify this structure, make sure to update blk_rq_init() and | 
|---|
| 101 | * especially blk_mq_rq_ctx_init() to take care of the added fields. | 
|---|
| 102 | */ | 
|---|
| 103 | struct request { | 
|---|
| 104 | struct request_queue *q; | 
|---|
| 105 | struct blk_mq_ctx *mq_ctx; | 
|---|
| 106 | struct blk_mq_hw_ctx *mq_hctx; | 
|---|
| 107 |  | 
|---|
| 108 | blk_opf_t cmd_flags;		/* op and common flags */ | 
|---|
| 109 | req_flags_t rq_flags; | 
|---|
| 110 |  | 
|---|
| 111 | int tag; | 
|---|
| 112 | int internal_tag; | 
|---|
| 113 |  | 
|---|
| 114 | unsigned int timeout; | 
|---|
| 115 |  | 
|---|
| 116 | /* the following two fields are internal, NEVER access directly */ | 
|---|
| 117 | unsigned int __data_len;	/* total data len */ | 
|---|
| 118 | sector_t __sector;		/* sector cursor */ | 
|---|
| 119 |  | 
|---|
| 120 | struct bio *bio; | 
|---|
| 121 | struct bio *biotail; | 
|---|
| 122 |  | 
|---|
| 123 | union { | 
|---|
| 124 | struct list_head queuelist; | 
|---|
| 125 | struct request *rq_next; | 
|---|
| 126 | }; | 
|---|
| 127 |  | 
|---|
| 128 | struct block_device *part; | 
|---|
| 129 | #ifdef CONFIG_BLK_RQ_ALLOC_TIME | 
|---|
| 130 | /* Time that the first bio started allocating this request. */ | 
|---|
| 131 | u64 alloc_time_ns; | 
|---|
| 132 | #endif | 
|---|
| 133 | /* Time that this request was allocated for this IO. */ | 
|---|
| 134 | u64 start_time_ns; | 
|---|
| 135 | /* Time that I/O was submitted to the device. */ | 
|---|
| 136 | u64 io_start_time_ns; | 
|---|
| 137 |  | 
|---|
| 138 | #ifdef CONFIG_BLK_WBT | 
|---|
| 139 | unsigned short wbt_flags; | 
|---|
| 140 | #endif | 
|---|
| 141 | /* | 
|---|
| 142 | * rq sectors used for blk stats. It has the same value | 
|---|
| 143 | * with blk_rq_sectors(rq), except that it never be zeroed | 
|---|
| 144 | * by completion. | 
|---|
| 145 | */ | 
|---|
| 146 | unsigned short stats_sectors; | 
|---|
| 147 |  | 
|---|
| 148 | /* | 
|---|
| 149 | * Number of scatter-gather DMA addr+len pairs after | 
|---|
| 150 | * physical address coalescing is performed. | 
|---|
| 151 | */ | 
|---|
| 152 | unsigned short nr_phys_segments; | 
|---|
| 153 | unsigned short nr_integrity_segments; | 
|---|
| 154 |  | 
|---|
| 155 | #ifdef CONFIG_BLK_INLINE_ENCRYPTION | 
|---|
| 156 | struct bio_crypt_ctx *crypt_ctx; | 
|---|
| 157 | struct blk_crypto_keyslot *crypt_keyslot; | 
|---|
| 158 | #endif | 
|---|
| 159 |  | 
|---|
| 160 | enum mq_rq_state state; | 
|---|
| 161 | atomic_t ref; | 
|---|
| 162 |  | 
|---|
| 163 | unsigned long deadline; | 
|---|
| 164 |  | 
|---|
| 165 | /* | 
|---|
| 166 | * The hash is used inside the scheduler, and killed once the | 
|---|
| 167 | * request reaches the dispatch list. The ipi_list is only used | 
|---|
| 168 | * to queue the request for softirq completion, which is long | 
|---|
| 169 | * after the request has been unhashed (and even removed from | 
|---|
| 170 | * the dispatch list). | 
|---|
| 171 | */ | 
|---|
| 172 | union { | 
|---|
| 173 | struct hlist_node hash;	/* merge hash */ | 
|---|
| 174 | struct llist_node ipi_list; | 
|---|
| 175 | }; | 
|---|
| 176 |  | 
|---|
| 177 | /* | 
|---|
| 178 | * The rb_node is only used inside the io scheduler, requests | 
|---|
| 179 | * are pruned when moved to the dispatch queue. special_vec must | 
|---|
| 180 | * only be used if RQF_SPECIAL_PAYLOAD is set, and those cannot be | 
|---|
| 181 | * insert into an IO scheduler. | 
|---|
| 182 | */ | 
|---|
| 183 | union { | 
|---|
| 184 | struct rb_node rb_node;	/* sort/lookup */ | 
|---|
| 185 | struct bio_vec special_vec; | 
|---|
| 186 | }; | 
|---|
| 187 |  | 
|---|
| 188 | /* | 
|---|
| 189 | * Three pointers are available for the IO schedulers, if they need | 
|---|
| 190 | * more they have to dynamically allocate it. | 
|---|
| 191 | */ | 
|---|
| 192 | struct { | 
|---|
| 193 | struct io_cq		*icq; | 
|---|
| 194 | void			*priv[2]; | 
|---|
| 195 | } elv; | 
|---|
| 196 |  | 
|---|
| 197 | struct { | 
|---|
| 198 | unsigned int		seq; | 
|---|
| 199 | rq_end_io_fn		*saved_end_io; | 
|---|
| 200 | } flush; | 
|---|
| 201 |  | 
|---|
| 202 | u64 fifo_time; | 
|---|
| 203 |  | 
|---|
| 204 | /* | 
|---|
| 205 | * completion callback. | 
|---|
| 206 | */ | 
|---|
| 207 | rq_end_io_fn *end_io; | 
|---|
| 208 | void *end_io_data; | 
|---|
| 209 | }; | 
|---|
| 210 |  | 
|---|
| 211 | static inline enum req_op req_op(const struct request *req) | 
|---|
| 212 | { | 
|---|
| 213 | return req->cmd_flags & REQ_OP_MASK; | 
|---|
| 214 | } | 
|---|
| 215 |  | 
|---|
| 216 | static inline bool blk_rq_is_passthrough(struct request *rq) | 
|---|
| 217 | { | 
|---|
| 218 | return blk_op_is_passthrough(op: rq->cmd_flags); | 
|---|
| 219 | } | 
|---|
| 220 |  | 
|---|
| 221 | static inline unsigned short req_get_ioprio(struct request *req) | 
|---|
| 222 | { | 
|---|
| 223 | if (req->bio) | 
|---|
| 224 | return req->bio->bi_ioprio; | 
|---|
| 225 | return 0; | 
|---|
| 226 | } | 
|---|
| 227 |  | 
|---|
| 228 | #define rq_data_dir(rq)		(op_is_write(req_op(rq)) ? WRITE : READ) | 
|---|
| 229 |  | 
|---|
| 230 | #define rq_dma_dir(rq) \ | 
|---|
| 231 | (op_is_write(req_op(rq)) ? DMA_TO_DEVICE : DMA_FROM_DEVICE) | 
|---|
| 232 |  | 
|---|
| 233 | static inline int rq_list_empty(const struct rq_list *rl) | 
|---|
| 234 | { | 
|---|
| 235 | return rl->head == NULL; | 
|---|
| 236 | } | 
|---|
| 237 |  | 
|---|
| 238 | static inline void rq_list_init(struct rq_list *rl) | 
|---|
| 239 | { | 
|---|
| 240 | rl->head = NULL; | 
|---|
| 241 | rl->tail = NULL; | 
|---|
| 242 | } | 
|---|
| 243 |  | 
|---|
| 244 | static inline void rq_list_add_tail(struct rq_list *rl, struct request *rq) | 
|---|
| 245 | { | 
|---|
| 246 | rq->rq_next = NULL; | 
|---|
| 247 | if (rl->tail) | 
|---|
| 248 | rl->tail->rq_next = rq; | 
|---|
| 249 | else | 
|---|
| 250 | rl->head = rq; | 
|---|
| 251 | rl->tail = rq; | 
|---|
| 252 | } | 
|---|
| 253 |  | 
|---|
| 254 | static inline void rq_list_add_head(struct rq_list *rl, struct request *rq) | 
|---|
| 255 | { | 
|---|
| 256 | rq->rq_next = rl->head; | 
|---|
| 257 | rl->head = rq; | 
|---|
| 258 | if (!rl->tail) | 
|---|
| 259 | rl->tail = rq; | 
|---|
| 260 | } | 
|---|
| 261 |  | 
|---|
| 262 | static inline struct request *rq_list_pop(struct rq_list *rl) | 
|---|
| 263 | { | 
|---|
| 264 | struct request *rq = rl->head; | 
|---|
| 265 |  | 
|---|
| 266 | if (rq) { | 
|---|
| 267 | rl->head = rl->head->rq_next; | 
|---|
| 268 | if (!rl->head) | 
|---|
| 269 | rl->tail = NULL; | 
|---|
| 270 | rq->rq_next = NULL; | 
|---|
| 271 | } | 
|---|
| 272 |  | 
|---|
| 273 | return rq; | 
|---|
| 274 | } | 
|---|
| 275 |  | 
|---|
| 276 | static inline struct request *rq_list_peek(struct rq_list *rl) | 
|---|
| 277 | { | 
|---|
| 278 | return rl->head; | 
|---|
| 279 | } | 
|---|
| 280 |  | 
|---|
| 281 | #define rq_list_for_each(rl, pos)					\ | 
|---|
| 282 | for (pos = rq_list_peek((rl)); (pos); pos = pos->rq_next) | 
|---|
| 283 |  | 
|---|
| 284 | #define rq_list_for_each_safe(rl, pos, nxt)				\ | 
|---|
| 285 | for (pos = rq_list_peek((rl)), nxt = pos->rq_next;		\ | 
|---|
| 286 | pos; pos = nxt, nxt = pos ? pos->rq_next : NULL) | 
|---|
| 287 |  | 
|---|
| 288 | /** | 
|---|
| 289 | * enum blk_eh_timer_return - How the timeout handler should proceed | 
|---|
| 290 | * @BLK_EH_DONE: The block driver completed the command or will complete it at | 
|---|
| 291 | *	a later time. | 
|---|
| 292 | * @BLK_EH_RESET_TIMER: Reset the request timer and continue waiting for the | 
|---|
| 293 | *	request to complete. | 
|---|
| 294 | */ | 
|---|
| 295 | enum blk_eh_timer_return { | 
|---|
| 296 | BLK_EH_DONE, | 
|---|
| 297 | BLK_EH_RESET_TIMER, | 
|---|
| 298 | }; | 
|---|
| 299 |  | 
|---|
| 300 | /** | 
|---|
| 301 | * struct blk_mq_hw_ctx - State for a hardware queue facing the hardware | 
|---|
| 302 | * block device | 
|---|
| 303 | */ | 
|---|
| 304 | struct blk_mq_hw_ctx { | 
|---|
| 305 | struct { | 
|---|
| 306 | /** @lock: Protects the dispatch list. */ | 
|---|
| 307 | spinlock_t		lock; | 
|---|
| 308 | /** | 
|---|
| 309 | * @dispatch: Used for requests that are ready to be | 
|---|
| 310 | * dispatched to the hardware but for some reason (e.g. lack of | 
|---|
| 311 | * resources) could not be sent to the hardware. As soon as the | 
|---|
| 312 | * driver can send new requests, requests at this list will | 
|---|
| 313 | * be sent first for a fairer dispatch. | 
|---|
| 314 | */ | 
|---|
| 315 | struct list_head	dispatch; | 
|---|
| 316 | /** | 
|---|
| 317 | * @state: BLK_MQ_S_* flags. Defines the state of the hw | 
|---|
| 318 | * queue (active, scheduled to restart, stopped). | 
|---|
| 319 | */ | 
|---|
| 320 | unsigned long		state; | 
|---|
| 321 | } ____cacheline_aligned_in_smp; | 
|---|
| 322 |  | 
|---|
| 323 | /** | 
|---|
| 324 | * @run_work: Used for scheduling a hardware queue run at a later time. | 
|---|
| 325 | */ | 
|---|
| 326 | struct delayed_work	run_work; | 
|---|
| 327 | /** @cpumask: Map of available CPUs where this hctx can run. */ | 
|---|
| 328 | cpumask_var_t		cpumask; | 
|---|
| 329 | /** | 
|---|
| 330 | * @next_cpu: Used by blk_mq_hctx_next_cpu() for round-robin CPU | 
|---|
| 331 | * selection from @cpumask. | 
|---|
| 332 | */ | 
|---|
| 333 | int			next_cpu; | 
|---|
| 334 | /** | 
|---|
| 335 | * @next_cpu_batch: Counter of how many works left in the batch before | 
|---|
| 336 | * changing to the next CPU. | 
|---|
| 337 | */ | 
|---|
| 338 | int			next_cpu_batch; | 
|---|
| 339 |  | 
|---|
| 340 | /** @flags: BLK_MQ_F_* flags. Defines the behaviour of the queue. */ | 
|---|
| 341 | unsigned long		flags; | 
|---|
| 342 |  | 
|---|
| 343 | /** | 
|---|
| 344 | * @sched_data: Pointer owned by the IO scheduler attached to a request | 
|---|
| 345 | * queue. It's up to the IO scheduler how to use this pointer. | 
|---|
| 346 | */ | 
|---|
| 347 | void			*sched_data; | 
|---|
| 348 | /** | 
|---|
| 349 | * @queue: Pointer to the request queue that owns this hardware context. | 
|---|
| 350 | */ | 
|---|
| 351 | struct request_queue	*queue; | 
|---|
| 352 | /** @fq: Queue of requests that need to perform a flush operation. */ | 
|---|
| 353 | struct blk_flush_queue	*fq; | 
|---|
| 354 |  | 
|---|
| 355 | /** | 
|---|
| 356 | * @driver_data: Pointer to data owned by the block driver that created | 
|---|
| 357 | * this hctx | 
|---|
| 358 | */ | 
|---|
| 359 | void			*driver_data; | 
|---|
| 360 |  | 
|---|
| 361 | /** | 
|---|
| 362 | * @ctx_map: Bitmap for each software queue. If bit is on, there is a | 
|---|
| 363 | * pending request in that software queue. | 
|---|
| 364 | */ | 
|---|
| 365 | struct sbitmap		ctx_map; | 
|---|
| 366 |  | 
|---|
| 367 | /** | 
|---|
| 368 | * @dispatch_from: Software queue to be used when no scheduler was | 
|---|
| 369 | * selected. | 
|---|
| 370 | */ | 
|---|
| 371 | struct blk_mq_ctx	*dispatch_from; | 
|---|
| 372 | /** | 
|---|
| 373 | * @dispatch_busy: Number used by blk_mq_update_dispatch_busy() to | 
|---|
| 374 | * decide if the hw_queue is busy using Exponential Weighted Moving | 
|---|
| 375 | * Average algorithm. | 
|---|
| 376 | */ | 
|---|
| 377 | unsigned int		dispatch_busy; | 
|---|
| 378 |  | 
|---|
| 379 | /** @type: HCTX_TYPE_* flags. Type of hardware queue. */ | 
|---|
| 380 | unsigned short		type; | 
|---|
| 381 | /** @nr_ctx: Number of software queues. */ | 
|---|
| 382 | unsigned short		nr_ctx; | 
|---|
| 383 | /** @ctxs: Array of software queues. */ | 
|---|
| 384 | struct blk_mq_ctx	**ctxs; | 
|---|
| 385 |  | 
|---|
| 386 | /** @dispatch_wait_lock: Lock for dispatch_wait queue. */ | 
|---|
| 387 | spinlock_t		dispatch_wait_lock; | 
|---|
| 388 | /** | 
|---|
| 389 | * @dispatch_wait: Waitqueue to put requests when there is no tag | 
|---|
| 390 | * available at the moment, to wait for another try in the future. | 
|---|
| 391 | */ | 
|---|
| 392 | wait_queue_entry_t	dispatch_wait; | 
|---|
| 393 |  | 
|---|
| 394 | /** | 
|---|
| 395 | * @wait_index: Index of next available dispatch_wait queue to insert | 
|---|
| 396 | * requests. | 
|---|
| 397 | */ | 
|---|
| 398 | atomic_t		wait_index; | 
|---|
| 399 |  | 
|---|
| 400 | /** | 
|---|
| 401 | * @tags: Tags owned by the block driver. A tag at this set is only | 
|---|
| 402 | * assigned when a request is dispatched from a hardware queue. | 
|---|
| 403 | */ | 
|---|
| 404 | struct blk_mq_tags	*tags; | 
|---|
| 405 | /** | 
|---|
| 406 | * @sched_tags: Tags owned by I/O scheduler. If there is an I/O | 
|---|
| 407 | * scheduler associated with a request queue, a tag is assigned when | 
|---|
| 408 | * that request is allocated. Else, this member is not used. | 
|---|
| 409 | */ | 
|---|
| 410 | struct blk_mq_tags	*sched_tags; | 
|---|
| 411 |  | 
|---|
| 412 | /** @numa_node: NUMA node the storage adapter has been connected to. */ | 
|---|
| 413 | unsigned int		numa_node; | 
|---|
| 414 | /** @queue_num: Index of this hardware queue. */ | 
|---|
| 415 | unsigned int		queue_num; | 
|---|
| 416 |  | 
|---|
| 417 | /** | 
|---|
| 418 | * @nr_active: Number of active requests. Only used when a tag set is | 
|---|
| 419 | * shared across request queues. | 
|---|
| 420 | */ | 
|---|
| 421 | atomic_t		nr_active; | 
|---|
| 422 |  | 
|---|
| 423 | /** @cpuhp_online: List to store request if CPU is going to die */ | 
|---|
| 424 | struct hlist_node	cpuhp_online; | 
|---|
| 425 | /** @cpuhp_dead: List to store request if some CPU die. */ | 
|---|
| 426 | struct hlist_node	cpuhp_dead; | 
|---|
| 427 | /** @kobj: Kernel object for sysfs. */ | 
|---|
| 428 | struct kobject		kobj; | 
|---|
| 429 |  | 
|---|
| 430 | #ifdef CONFIG_BLK_DEBUG_FS | 
|---|
| 431 | /** | 
|---|
| 432 | * @debugfs_dir: debugfs directory for this hardware queue. Named | 
|---|
| 433 | * as cpu<cpu_number>. | 
|---|
| 434 | */ | 
|---|
| 435 | struct dentry		*debugfs_dir; | 
|---|
| 436 | /** @sched_debugfs_dir:	debugfs directory for the scheduler. */ | 
|---|
| 437 | struct dentry		*sched_debugfs_dir; | 
|---|
| 438 | #endif | 
|---|
| 439 |  | 
|---|
| 440 | /** | 
|---|
| 441 | * @hctx_list: if this hctx is not in use, this is an entry in | 
|---|
| 442 | * q->unused_hctx_list. | 
|---|
| 443 | */ | 
|---|
| 444 | struct list_head	hctx_list; | 
|---|
| 445 | }; | 
|---|
| 446 |  | 
|---|
| 447 | /** | 
|---|
| 448 | * struct blk_mq_queue_map - Map software queues to hardware queues | 
|---|
| 449 | * @mq_map:       CPU ID to hardware queue index map. This is an array | 
|---|
| 450 | *	with nr_cpu_ids elements. Each element has a value in the range | 
|---|
| 451 | *	[@queue_offset, @queue_offset + @nr_queues). | 
|---|
| 452 | * @nr_queues:    Number of hardware queues to map CPU IDs onto. | 
|---|
| 453 | * @queue_offset: First hardware queue to map onto. Used by the PCIe NVMe | 
|---|
| 454 | *	driver to map each hardware queue type (enum hctx_type) onto a distinct | 
|---|
| 455 | *	set of hardware queues. | 
|---|
| 456 | */ | 
|---|
| 457 | struct blk_mq_queue_map { | 
|---|
| 458 | unsigned int *mq_map; | 
|---|
| 459 | unsigned int nr_queues; | 
|---|
| 460 | unsigned int queue_offset; | 
|---|
| 461 | }; | 
|---|
| 462 |  | 
|---|
| 463 | /** | 
|---|
| 464 | * enum hctx_type - Type of hardware queue | 
|---|
| 465 | * @HCTX_TYPE_DEFAULT:	All I/O not otherwise accounted for. | 
|---|
| 466 | * @HCTX_TYPE_READ:	Just for READ I/O. | 
|---|
| 467 | * @HCTX_TYPE_POLL:	Polled I/O of any kind. | 
|---|
| 468 | * @HCTX_MAX_TYPES:	Number of types of hctx. | 
|---|
| 469 | */ | 
|---|
| 470 | enum hctx_type { | 
|---|
| 471 | HCTX_TYPE_DEFAULT, | 
|---|
| 472 | HCTX_TYPE_READ, | 
|---|
| 473 | HCTX_TYPE_POLL, | 
|---|
| 474 |  | 
|---|
| 475 | HCTX_MAX_TYPES, | 
|---|
| 476 | }; | 
|---|
| 477 |  | 
|---|
| 478 | /** | 
|---|
| 479 | * struct blk_mq_tag_set - tag set that can be shared between request queues | 
|---|
| 480 | * @ops:	   Pointers to functions that implement block driver behavior. | 
|---|
| 481 | * @map:	   One or more ctx -> hctx mappings. One map exists for each | 
|---|
| 482 | *		   hardware queue type (enum hctx_type) that the driver wishes | 
|---|
| 483 | *		   to support. There are no restrictions on maps being of the | 
|---|
| 484 | *		   same size, and it's perfectly legal to share maps between | 
|---|
| 485 | *		   types. | 
|---|
| 486 | * @nr_maps:	   Number of elements in the @map array. A number in the range | 
|---|
| 487 | *		   [1, HCTX_MAX_TYPES]. | 
|---|
| 488 | * @nr_hw_queues:  Number of hardware queues supported by the block driver that | 
|---|
| 489 | *		   owns this data structure. | 
|---|
| 490 | * @queue_depth:   Number of tags per hardware queue, reserved tags included. | 
|---|
| 491 | * @reserved_tags: Number of tags to set aside for BLK_MQ_REQ_RESERVED tag | 
|---|
| 492 | *		   allocations. | 
|---|
| 493 | * @cmd_size:	   Number of additional bytes to allocate per request. The block | 
|---|
| 494 | *		   driver owns these additional bytes. | 
|---|
| 495 | * @numa_node:	   NUMA node the storage adapter has been connected to. | 
|---|
| 496 | * @timeout:	   Request processing timeout in jiffies. | 
|---|
| 497 | * @flags:	   Zero or more BLK_MQ_F_* flags. | 
|---|
| 498 | * @driver_data:   Pointer to data owned by the block driver that created this | 
|---|
| 499 | *		   tag set. | 
|---|
| 500 | * @tags:	   Tag sets. One tag set per hardware queue. Has @nr_hw_queues | 
|---|
| 501 | *		   elements. | 
|---|
| 502 | * @shared_tags: | 
|---|
| 503 | *		   Shared set of tags. Has @nr_hw_queues elements. If set, | 
|---|
| 504 | *		   shared by all @tags. | 
|---|
| 505 | * @tag_list_lock: Serializes tag_list accesses. | 
|---|
| 506 | * @tag_list:	   List of the request queues that use this tag set. See also | 
|---|
| 507 | *		   request_queue.tag_set_list. | 
|---|
| 508 | * @srcu:	   Use as lock when type of the request queue is blocking | 
|---|
| 509 | *		   (BLK_MQ_F_BLOCKING). | 
|---|
| 510 | * @tags_srcu:	   SRCU used to defer freeing of tags page_list to prevent | 
|---|
| 511 | *		   use-after-free when iterating tags. | 
|---|
| 512 | * @update_nr_hwq_lock: | 
|---|
| 513 | * 		   Synchronize updating nr_hw_queues with add/del disk & | 
|---|
| 514 | * 		   switching elevator. | 
|---|
| 515 | */ | 
|---|
| 516 | struct blk_mq_tag_set { | 
|---|
| 517 | const struct blk_mq_ops	*ops; | 
|---|
| 518 | struct blk_mq_queue_map	map[HCTX_MAX_TYPES]; | 
|---|
| 519 | unsigned int		nr_maps; | 
|---|
| 520 | unsigned int		nr_hw_queues; | 
|---|
| 521 | unsigned int		queue_depth; | 
|---|
| 522 | unsigned int		reserved_tags; | 
|---|
| 523 | unsigned int		cmd_size; | 
|---|
| 524 | int			numa_node; | 
|---|
| 525 | unsigned int		timeout; | 
|---|
| 526 | unsigned int		flags; | 
|---|
| 527 | void			*driver_data; | 
|---|
| 528 |  | 
|---|
| 529 | struct blk_mq_tags	**tags; | 
|---|
| 530 |  | 
|---|
| 531 | struct blk_mq_tags	*shared_tags; | 
|---|
| 532 |  | 
|---|
| 533 | struct mutex		tag_list_lock; | 
|---|
| 534 | struct list_head	tag_list; | 
|---|
| 535 | struct srcu_struct	*srcu; | 
|---|
| 536 | struct srcu_struct	tags_srcu; | 
|---|
| 537 |  | 
|---|
| 538 | struct rw_semaphore	update_nr_hwq_lock; | 
|---|
| 539 | }; | 
|---|
| 540 |  | 
|---|
| 541 | /** | 
|---|
| 542 | * struct blk_mq_queue_data - Data about a request inserted in a queue | 
|---|
| 543 | * | 
|---|
| 544 | * @rq:   Request pointer. | 
|---|
| 545 | * @last: If it is the last request in the queue. | 
|---|
| 546 | */ | 
|---|
| 547 | struct blk_mq_queue_data { | 
|---|
| 548 | struct request *rq; | 
|---|
| 549 | bool last; | 
|---|
| 550 | }; | 
|---|
| 551 |  | 
|---|
| 552 | typedef bool (busy_tag_iter_fn)(struct request *, void *); | 
|---|
| 553 |  | 
|---|
| 554 | /** | 
|---|
| 555 | * struct blk_mq_ops - Callback functions that implements block driver | 
|---|
| 556 | * behaviour. | 
|---|
| 557 | */ | 
|---|
| 558 | struct blk_mq_ops { | 
|---|
| 559 | /** | 
|---|
| 560 | * @queue_rq: Queue a new request from block IO. | 
|---|
| 561 | */ | 
|---|
| 562 | blk_status_t (*queue_rq)(struct blk_mq_hw_ctx *, | 
|---|
| 563 | const struct blk_mq_queue_data *); | 
|---|
| 564 |  | 
|---|
| 565 | /** | 
|---|
| 566 | * @commit_rqs: If a driver uses bd->last to judge when to submit | 
|---|
| 567 | * requests to hardware, it must define this function. In case of errors | 
|---|
| 568 | * that make us stop issuing further requests, this hook serves the | 
|---|
| 569 | * purpose of kicking the hardware (which the last request otherwise | 
|---|
| 570 | * would have done). | 
|---|
| 571 | */ | 
|---|
| 572 | void (*commit_rqs)(struct blk_mq_hw_ctx *); | 
|---|
| 573 |  | 
|---|
| 574 | /** | 
|---|
| 575 | * @queue_rqs: Queue a list of new requests. Driver is guaranteed | 
|---|
| 576 | * that each request belongs to the same queue. If the driver doesn't | 
|---|
| 577 | * empty the @rqlist completely, then the rest will be queued | 
|---|
| 578 | * individually by the block layer upon return. | 
|---|
| 579 | */ | 
|---|
| 580 | void (*queue_rqs)(struct rq_list *rqlist); | 
|---|
| 581 |  | 
|---|
| 582 | /** | 
|---|
| 583 | * @get_budget: Reserve budget before queue request, once .queue_rq is | 
|---|
| 584 | * run, it is driver's responsibility to release the | 
|---|
| 585 | * reserved budget. Also we have to handle failure case | 
|---|
| 586 | * of .get_budget for avoiding I/O deadlock. | 
|---|
| 587 | */ | 
|---|
| 588 | int (*get_budget)(struct request_queue *); | 
|---|
| 589 |  | 
|---|
| 590 | /** | 
|---|
| 591 | * @put_budget: Release the reserved budget. | 
|---|
| 592 | */ | 
|---|
| 593 | void (*put_budget)(struct request_queue *, int); | 
|---|
| 594 |  | 
|---|
| 595 | /** | 
|---|
| 596 | * @set_rq_budget_token: store rq's budget token | 
|---|
| 597 | */ | 
|---|
| 598 | void (*set_rq_budget_token)(struct request *, int); | 
|---|
| 599 | /** | 
|---|
| 600 | * @get_rq_budget_token: retrieve rq's budget token | 
|---|
| 601 | */ | 
|---|
| 602 | int (*get_rq_budget_token)(struct request *); | 
|---|
| 603 |  | 
|---|
| 604 | /** | 
|---|
| 605 | * @timeout: Called on request timeout. | 
|---|
| 606 | */ | 
|---|
| 607 | enum blk_eh_timer_return (*timeout)(struct request *); | 
|---|
| 608 |  | 
|---|
| 609 | /** | 
|---|
| 610 | * @poll: Called to poll for completion of a specific tag. | 
|---|
| 611 | */ | 
|---|
| 612 | int (*poll)(struct blk_mq_hw_ctx *, struct io_comp_batch *); | 
|---|
| 613 |  | 
|---|
| 614 | /** | 
|---|
| 615 | * @complete: Mark the request as complete. | 
|---|
| 616 | */ | 
|---|
| 617 | void (*complete)(struct request *); | 
|---|
| 618 |  | 
|---|
| 619 | /** | 
|---|
| 620 | * @init_hctx: Called when the block layer side of a hardware queue has | 
|---|
| 621 | * been set up, allowing the driver to allocate/init matching | 
|---|
| 622 | * structures. | 
|---|
| 623 | */ | 
|---|
| 624 | int (*init_hctx)(struct blk_mq_hw_ctx *, void *, unsigned int); | 
|---|
| 625 | /** | 
|---|
| 626 | * @exit_hctx: Ditto for exit/teardown. | 
|---|
| 627 | */ | 
|---|
| 628 | void (*exit_hctx)(struct blk_mq_hw_ctx *, unsigned int); | 
|---|
| 629 |  | 
|---|
| 630 | /** | 
|---|
| 631 | * @init_request: Called for every command allocated by the block layer | 
|---|
| 632 | * to allow the driver to set up driver specific data. | 
|---|
| 633 | * | 
|---|
| 634 | * Tag greater than or equal to queue_depth is for setting up | 
|---|
| 635 | * flush request. | 
|---|
| 636 | */ | 
|---|
| 637 | int (*init_request)(struct blk_mq_tag_set *set, struct request *, | 
|---|
| 638 | unsigned int, unsigned int); | 
|---|
| 639 | /** | 
|---|
| 640 | * @exit_request: Ditto for exit/teardown. | 
|---|
| 641 | */ | 
|---|
| 642 | void (*exit_request)(struct blk_mq_tag_set *set, struct request *, | 
|---|
| 643 | unsigned int); | 
|---|
| 644 |  | 
|---|
| 645 | /** | 
|---|
| 646 | * @cleanup_rq: Called before freeing one request which isn't completed | 
|---|
| 647 | * yet, and usually for freeing the driver private data. | 
|---|
| 648 | */ | 
|---|
| 649 | void (*cleanup_rq)(struct request *); | 
|---|
| 650 |  | 
|---|
| 651 | /** | 
|---|
| 652 | * @busy: If set, returns whether or not this queue currently is busy. | 
|---|
| 653 | */ | 
|---|
| 654 | bool (*busy)(struct request_queue *); | 
|---|
| 655 |  | 
|---|
| 656 | /** | 
|---|
| 657 | * @map_queues: This allows drivers specify their own queue mapping by | 
|---|
| 658 | * overriding the setup-time function that builds the mq_map. | 
|---|
| 659 | */ | 
|---|
| 660 | void (*map_queues)(struct blk_mq_tag_set *set); | 
|---|
| 661 |  | 
|---|
| 662 | #ifdef CONFIG_BLK_DEBUG_FS | 
|---|
| 663 | /** | 
|---|
| 664 | * @show_rq: Used by the debugfs implementation to show driver-specific | 
|---|
| 665 | * information about a request. | 
|---|
| 666 | */ | 
|---|
| 667 | void (*show_rq)(struct seq_file *m, struct request *rq); | 
|---|
| 668 | #endif | 
|---|
| 669 | }; | 
|---|
| 670 |  | 
|---|
| 671 | /* Keep hctx_flag_name[] in sync with the definitions below */ | 
|---|
| 672 | enum { | 
|---|
| 673 | BLK_MQ_F_TAG_QUEUE_SHARED = 1 << 1, | 
|---|
| 674 | /* | 
|---|
| 675 | * Set when this device requires underlying blk-mq device for | 
|---|
| 676 | * completing IO: | 
|---|
| 677 | */ | 
|---|
| 678 | BLK_MQ_F_STACKING	= 1 << 2, | 
|---|
| 679 | BLK_MQ_F_TAG_HCTX_SHARED = 1 << 3, | 
|---|
| 680 | BLK_MQ_F_BLOCKING	= 1 << 4, | 
|---|
| 681 |  | 
|---|
| 682 | /* | 
|---|
| 683 | * Alloc tags on a round-robin base instead of the first available one. | 
|---|
| 684 | */ | 
|---|
| 685 | BLK_MQ_F_TAG_RR		= 1 << 5, | 
|---|
| 686 |  | 
|---|
| 687 | /* | 
|---|
| 688 | * Select 'none' during queue registration in case of a single hwq | 
|---|
| 689 | * or shared hwqs instead of 'mq-deadline'. | 
|---|
| 690 | */ | 
|---|
| 691 | BLK_MQ_F_NO_SCHED_BY_DEFAULT	= 1 << 6, | 
|---|
| 692 |  | 
|---|
| 693 | BLK_MQ_F_MAX = 1 << 7, | 
|---|
| 694 | }; | 
|---|
| 695 |  | 
|---|
| 696 | #define BLK_MQ_MAX_DEPTH	(10240) | 
|---|
| 697 | #define BLK_MQ_NO_HCTX_IDX	(-1U) | 
|---|
| 698 |  | 
|---|
| 699 | enum { | 
|---|
| 700 | /* Keep hctx_state_name[] in sync with the definitions below */ | 
|---|
| 701 | BLK_MQ_S_STOPPED, | 
|---|
| 702 | BLK_MQ_S_TAG_ACTIVE, | 
|---|
| 703 | BLK_MQ_S_SCHED_RESTART, | 
|---|
| 704 | /* hw queue is inactive after all its CPUs become offline */ | 
|---|
| 705 | BLK_MQ_S_INACTIVE, | 
|---|
| 706 | BLK_MQ_S_MAX | 
|---|
| 707 | }; | 
|---|
| 708 |  | 
|---|
| 709 | struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, | 
|---|
| 710 | struct queue_limits *lim, void *queuedata, | 
|---|
| 711 | struct lock_class_key *lkclass); | 
|---|
| 712 | #define blk_mq_alloc_disk(set, lim, queuedata)				\ | 
|---|
| 713 | ({									\ | 
|---|
| 714 | static struct lock_class_key __key;				\ | 
|---|
| 715 | \ | 
|---|
| 716 | __blk_mq_alloc_disk(set, lim, queuedata, &__key);		\ | 
|---|
| 717 | }) | 
|---|
| 718 | struct gendisk *blk_mq_alloc_disk_for_queue(struct request_queue *q, | 
|---|
| 719 | struct lock_class_key *lkclass); | 
|---|
| 720 | struct request_queue *blk_mq_alloc_queue(struct blk_mq_tag_set *set, | 
|---|
| 721 | struct queue_limits *lim, void *queuedata); | 
|---|
| 722 | int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, | 
|---|
| 723 | struct request_queue *q); | 
|---|
| 724 | void blk_mq_destroy_queue(struct request_queue *); | 
|---|
| 725 |  | 
|---|
| 726 | int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set); | 
|---|
| 727 | int blk_mq_alloc_sq_tag_set(struct blk_mq_tag_set *set, | 
|---|
| 728 | const struct blk_mq_ops *ops, unsigned int queue_depth, | 
|---|
| 729 | unsigned int set_flags); | 
|---|
| 730 | void blk_mq_free_tag_set(struct blk_mq_tag_set *set); | 
|---|
| 731 |  | 
|---|
| 732 | void blk_mq_free_request(struct request *rq); | 
|---|
| 733 | int blk_rq_poll(struct request *rq, struct io_comp_batch *iob, | 
|---|
| 734 | unsigned int poll_flags); | 
|---|
| 735 |  | 
|---|
| 736 | bool blk_mq_queue_inflight(struct request_queue *q); | 
|---|
| 737 |  | 
|---|
| 738 | enum { | 
|---|
| 739 | /* return when out of requests */ | 
|---|
| 740 | BLK_MQ_REQ_NOWAIT	= (__force blk_mq_req_flags_t)(1 << 0), | 
|---|
| 741 | /* allocate from reserved pool */ | 
|---|
| 742 | BLK_MQ_REQ_RESERVED	= (__force blk_mq_req_flags_t)(1 << 1), | 
|---|
| 743 | /* set RQF_PM */ | 
|---|
| 744 | BLK_MQ_REQ_PM		= (__force blk_mq_req_flags_t)(1 << 2), | 
|---|
| 745 | }; | 
|---|
| 746 |  | 
|---|
| 747 | struct request *blk_mq_alloc_request(struct request_queue *q, blk_opf_t opf, | 
|---|
| 748 | blk_mq_req_flags_t flags); | 
|---|
| 749 | struct request *blk_mq_alloc_request_hctx(struct request_queue *q, | 
|---|
| 750 | blk_opf_t opf, blk_mq_req_flags_t flags, | 
|---|
| 751 | unsigned int hctx_idx); | 
|---|
| 752 |  | 
|---|
| 753 | /* | 
|---|
| 754 | * Tag address space map. | 
|---|
| 755 | */ | 
|---|
| 756 | struct blk_mq_tags { | 
|---|
| 757 | unsigned int nr_tags; | 
|---|
| 758 | unsigned int nr_reserved_tags; | 
|---|
| 759 | unsigned int active_queues; | 
|---|
| 760 |  | 
|---|
| 761 | struct sbitmap_queue bitmap_tags; | 
|---|
| 762 | struct sbitmap_queue breserved_tags; | 
|---|
| 763 |  | 
|---|
| 764 | struct request **rqs; | 
|---|
| 765 | struct request **static_rqs; | 
|---|
| 766 | struct list_head page_list; | 
|---|
| 767 |  | 
|---|
| 768 | /* | 
|---|
| 769 | * used to clear request reference in rqs[] before freeing one | 
|---|
| 770 | * request pool | 
|---|
| 771 | */ | 
|---|
| 772 | spinlock_t lock; | 
|---|
| 773 | struct rcu_head rcu_head; | 
|---|
| 774 | }; | 
|---|
| 775 |  | 
|---|
| 776 | static inline struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, | 
|---|
| 777 | unsigned int tag) | 
|---|
| 778 | { | 
|---|
| 779 | if (tag < tags->nr_tags) { | 
|---|
| 780 | prefetch(tags->rqs[tag]); | 
|---|
| 781 | return tags->rqs[tag]; | 
|---|
| 782 | } | 
|---|
| 783 |  | 
|---|
| 784 | return NULL; | 
|---|
| 785 | } | 
|---|
| 786 |  | 
|---|
| 787 | enum { | 
|---|
| 788 | BLK_MQ_UNIQUE_TAG_BITS = 16, | 
|---|
| 789 | BLK_MQ_UNIQUE_TAG_MASK = (1 << BLK_MQ_UNIQUE_TAG_BITS) - 1, | 
|---|
| 790 | }; | 
|---|
| 791 |  | 
|---|
| 792 | u32 blk_mq_unique_tag(struct request *rq); | 
|---|
| 793 |  | 
|---|
| 794 | static inline u16 blk_mq_unique_tag_to_hwq(u32 unique_tag) | 
|---|
| 795 | { | 
|---|
| 796 | return unique_tag >> BLK_MQ_UNIQUE_TAG_BITS; | 
|---|
| 797 | } | 
|---|
| 798 |  | 
|---|
| 799 | static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag) | 
|---|
| 800 | { | 
|---|
| 801 | return unique_tag & BLK_MQ_UNIQUE_TAG_MASK; | 
|---|
| 802 | } | 
|---|
| 803 |  | 
|---|
| 804 | /** | 
|---|
| 805 | * blk_mq_rq_state() - read the current MQ_RQ_* state of a request | 
|---|
| 806 | * @rq: target request. | 
|---|
| 807 | */ | 
|---|
| 808 | static inline enum mq_rq_state blk_mq_rq_state(struct request *rq) | 
|---|
| 809 | { | 
|---|
| 810 | return READ_ONCE(rq->state); | 
|---|
| 811 | } | 
|---|
| 812 |  | 
|---|
| 813 | static inline int blk_mq_request_started(struct request *rq) | 
|---|
| 814 | { | 
|---|
| 815 | return blk_mq_rq_state(rq) != MQ_RQ_IDLE; | 
|---|
| 816 | } | 
|---|
| 817 |  | 
|---|
| 818 | static inline int blk_mq_request_completed(struct request *rq) | 
|---|
| 819 | { | 
|---|
| 820 | return blk_mq_rq_state(rq) == MQ_RQ_COMPLETE; | 
|---|
| 821 | } | 
|---|
| 822 |  | 
|---|
| 823 | /* | 
|---|
| 824 | * | 
|---|
| 825 | * Set the state to complete when completing a request from inside ->queue_rq. | 
|---|
| 826 | * This is used by drivers that want to ensure special complete actions that | 
|---|
| 827 | * need access to the request are called on failure, e.g. by nvme for | 
|---|
| 828 | * multipathing. | 
|---|
| 829 | */ | 
|---|
| 830 | static inline void blk_mq_set_request_complete(struct request *rq) | 
|---|
| 831 | { | 
|---|
| 832 | WRITE_ONCE(rq->state, MQ_RQ_COMPLETE); | 
|---|
| 833 | } | 
|---|
| 834 |  | 
|---|
| 835 | /* | 
|---|
| 836 | * Complete the request directly instead of deferring it to softirq or | 
|---|
| 837 | * completing it another CPU. Useful in preemptible instead of an interrupt. | 
|---|
| 838 | */ | 
|---|
| 839 | static inline void blk_mq_complete_request_direct(struct request *rq, | 
|---|
| 840 | void (*complete)(struct request *rq)) | 
|---|
| 841 | { | 
|---|
| 842 | WRITE_ONCE(rq->state, MQ_RQ_COMPLETE); | 
|---|
| 843 | complete(rq); | 
|---|
| 844 | } | 
|---|
| 845 |  | 
|---|
| 846 | void blk_mq_start_request(struct request *rq); | 
|---|
| 847 | void blk_mq_end_request(struct request *rq, blk_status_t error); | 
|---|
| 848 | void __blk_mq_end_request(struct request *rq, blk_status_t error); | 
|---|
| 849 | void blk_mq_end_request_batch(struct io_comp_batch *ib); | 
|---|
| 850 |  | 
|---|
| 851 | /* | 
|---|
| 852 | * Only need start/end time stamping if we have iostat or | 
|---|
| 853 | * blk stats enabled, or using an IO scheduler. | 
|---|
| 854 | */ | 
|---|
| 855 | static inline bool blk_mq_need_time_stamp(struct request *rq) | 
|---|
| 856 | { | 
|---|
| 857 | return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_USE_SCHED)); | 
|---|
| 858 | } | 
|---|
| 859 |  | 
|---|
| 860 | static inline bool blk_mq_is_reserved_rq(struct request *rq) | 
|---|
| 861 | { | 
|---|
| 862 | return rq->rq_flags & RQF_RESV; | 
|---|
| 863 | } | 
|---|
| 864 |  | 
|---|
| 865 | /** | 
|---|
| 866 | * blk_mq_add_to_batch() - add a request to the completion batch | 
|---|
| 867 | * @req: The request to add to batch | 
|---|
| 868 | * @iob: The batch to add the request | 
|---|
| 869 | * @is_error: Specify true if the request failed with an error | 
|---|
| 870 | * @complete: The completaion handler for the request | 
|---|
| 871 | * | 
|---|
| 872 | * Batched completions only work when there is no I/O error and no special | 
|---|
| 873 | * ->end_io handler. | 
|---|
| 874 | * | 
|---|
| 875 | * Return: true when the request was added to the batch, otherwise false | 
|---|
| 876 | */ | 
|---|
| 877 | static inline bool blk_mq_add_to_batch(struct request *req, | 
|---|
| 878 | struct io_comp_batch *iob, bool is_error, | 
|---|
| 879 | void (*complete)(struct io_comp_batch *)) | 
|---|
| 880 | { | 
|---|
| 881 | /* | 
|---|
| 882 | * Check various conditions that exclude batch processing: | 
|---|
| 883 | * 1) No batch container | 
|---|
| 884 | * 2) Has scheduler data attached | 
|---|
| 885 | * 3) Not a passthrough request and end_io set | 
|---|
| 886 | * 4) Not a passthrough request and failed with an error | 
|---|
| 887 | */ | 
|---|
| 888 | if (!iob) | 
|---|
| 889 | return false; | 
|---|
| 890 | if (req->rq_flags & RQF_SCHED_TAGS) | 
|---|
| 891 | return false; | 
|---|
| 892 | if (!blk_rq_is_passthrough(rq: req)) { | 
|---|
| 893 | if (req->end_io) | 
|---|
| 894 | return false; | 
|---|
| 895 | if (is_error) | 
|---|
| 896 | return false; | 
|---|
| 897 | } | 
|---|
| 898 |  | 
|---|
| 899 | if (!iob->complete) | 
|---|
| 900 | iob->complete = complete; | 
|---|
| 901 | else if (iob->complete != complete) | 
|---|
| 902 | return false; | 
|---|
| 903 | iob->need_ts |= blk_mq_need_time_stamp(rq: req); | 
|---|
| 904 | rq_list_add_tail(rl: &iob->req_list, rq: req); | 
|---|
| 905 | return true; | 
|---|
| 906 | } | 
|---|
| 907 |  | 
|---|
| 908 | void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list); | 
|---|
| 909 | void blk_mq_kick_requeue_list(struct request_queue *q); | 
|---|
| 910 | void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs); | 
|---|
| 911 | void blk_mq_complete_request(struct request *rq); | 
|---|
| 912 | bool blk_mq_complete_request_remote(struct request *rq); | 
|---|
| 913 | void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); | 
|---|
| 914 | void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx); | 
|---|
| 915 | void blk_mq_stop_hw_queues(struct request_queue *q); | 
|---|
| 916 | void blk_mq_start_hw_queues(struct request_queue *q); | 
|---|
| 917 | void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); | 
|---|
| 918 | void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); | 
|---|
| 919 | void blk_mq_quiesce_queue(struct request_queue *q); | 
|---|
| 920 | void blk_mq_wait_quiesce_done(struct blk_mq_tag_set *set); | 
|---|
| 921 | void blk_mq_quiesce_tagset(struct blk_mq_tag_set *set); | 
|---|
| 922 | void blk_mq_unquiesce_tagset(struct blk_mq_tag_set *set); | 
|---|
| 923 | void blk_mq_unquiesce_queue(struct request_queue *q); | 
|---|
| 924 | void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); | 
|---|
| 925 | void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); | 
|---|
| 926 | void blk_mq_run_hw_queues(struct request_queue *q, bool async); | 
|---|
| 927 | void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs); | 
|---|
| 928 | void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, | 
|---|
| 929 | busy_tag_iter_fn *fn, void *priv); | 
|---|
| 930 | void blk_mq_tagset_wait_completed_request(struct blk_mq_tag_set *tagset); | 
|---|
| 931 | void blk_mq_freeze_queue_nomemsave(struct request_queue *q); | 
|---|
| 932 | void blk_mq_unfreeze_queue_nomemrestore(struct request_queue *q); | 
|---|
| 933 | static inline unsigned int __must_check | 
|---|
| 934 | blk_mq_freeze_queue(struct request_queue *q) | 
|---|
| 935 | { | 
|---|
| 936 | unsigned int memflags = memalloc_noio_save(); | 
|---|
| 937 |  | 
|---|
| 938 | blk_mq_freeze_queue_nomemsave(q); | 
|---|
| 939 | return memflags; | 
|---|
| 940 | } | 
|---|
| 941 | static inline void | 
|---|
| 942 | blk_mq_unfreeze_queue(struct request_queue *q, unsigned int memflags) | 
|---|
| 943 | { | 
|---|
| 944 | blk_mq_unfreeze_queue_nomemrestore(q); | 
|---|
| 945 | memalloc_noio_restore(flags: memflags); | 
|---|
| 946 | } | 
|---|
| 947 | void blk_freeze_queue_start(struct request_queue *q); | 
|---|
| 948 | void blk_mq_freeze_queue_wait(struct request_queue *q); | 
|---|
| 949 | int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, | 
|---|
| 950 | unsigned long timeout); | 
|---|
| 951 | void blk_mq_unfreeze_queue_non_owner(struct request_queue *q); | 
|---|
| 952 | void blk_freeze_queue_start_non_owner(struct request_queue *q); | 
|---|
| 953 |  | 
|---|
| 954 | unsigned int blk_mq_num_possible_queues(unsigned int max_queues); | 
|---|
| 955 | unsigned int blk_mq_num_online_queues(unsigned int max_queues); | 
|---|
| 956 | void blk_mq_map_queues(struct blk_mq_queue_map *qmap); | 
|---|
| 957 | void blk_mq_map_hw_queues(struct blk_mq_queue_map *qmap, | 
|---|
| 958 | struct device *dev, unsigned int offset); | 
|---|
| 959 | void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues); | 
|---|
| 960 |  | 
|---|
| 961 | void blk_mq_quiesce_queue_nowait(struct request_queue *q); | 
|---|
| 962 |  | 
|---|
| 963 | unsigned int blk_mq_rq_cpu(struct request *rq); | 
|---|
| 964 |  | 
|---|
| 965 | bool __blk_should_fake_timeout(struct request_queue *q); | 
|---|
| 966 | static inline bool blk_should_fake_timeout(struct request_queue *q) | 
|---|
| 967 | { | 
|---|
| 968 | if (IS_ENABLED(CONFIG_FAIL_IO_TIMEOUT) && | 
|---|
| 969 | test_bit(QUEUE_FLAG_FAIL_IO, &q->queue_flags)) | 
|---|
| 970 | return __blk_should_fake_timeout(q); | 
|---|
| 971 | return false; | 
|---|
| 972 | } | 
|---|
| 973 |  | 
|---|
| 974 | /** | 
|---|
| 975 | * blk_mq_rq_from_pdu - cast a PDU to a request | 
|---|
| 976 | * @pdu: the PDU (Protocol Data Unit) to be casted | 
|---|
| 977 | * | 
|---|
| 978 | * Return: request | 
|---|
| 979 | * | 
|---|
| 980 | * Driver command data is immediately after the request. So subtract request | 
|---|
| 981 | * size to get back to the original request. | 
|---|
| 982 | */ | 
|---|
| 983 | static inline struct request *blk_mq_rq_from_pdu(void *pdu) | 
|---|
| 984 | { | 
|---|
| 985 | return pdu - sizeof(struct request); | 
|---|
| 986 | } | 
|---|
| 987 |  | 
|---|
| 988 | /** | 
|---|
| 989 | * blk_mq_rq_to_pdu - cast a request to a PDU | 
|---|
| 990 | * @rq: the request to be casted | 
|---|
| 991 | * | 
|---|
| 992 | * Return: pointer to the PDU | 
|---|
| 993 | * | 
|---|
| 994 | * Driver command data is immediately after the request. So add request to get | 
|---|
| 995 | * the PDU. | 
|---|
| 996 | */ | 
|---|
| 997 | static inline void *blk_mq_rq_to_pdu(struct request *rq) | 
|---|
| 998 | { | 
|---|
| 999 | return rq + 1; | 
|---|
| 1000 | } | 
|---|
| 1001 |  | 
|---|
| 1002 | #define queue_for_each_hw_ctx(q, hctx, i)				\ | 
|---|
| 1003 | xa_for_each(&(q)->hctx_table, (i), (hctx)) | 
|---|
| 1004 |  | 
|---|
| 1005 | #define hctx_for_each_ctx(hctx, ctx, i)					\ | 
|---|
| 1006 | for ((i) = 0; (i) < (hctx)->nr_ctx &&				\ | 
|---|
| 1007 | ({ ctx = (hctx)->ctxs[(i)]; 1; }); (i)++) | 
|---|
| 1008 |  | 
|---|
| 1009 | static inline void blk_mq_cleanup_rq(struct request *rq) | 
|---|
| 1010 | { | 
|---|
| 1011 | if (rq->q->mq_ops->cleanup_rq) | 
|---|
| 1012 | rq->q->mq_ops->cleanup_rq(rq); | 
|---|
| 1013 | } | 
|---|
| 1014 |  | 
|---|
| 1015 | void blk_mq_hctx_set_fq_lock_class(struct blk_mq_hw_ctx *hctx, | 
|---|
| 1016 | struct lock_class_key *key); | 
|---|
| 1017 |  | 
|---|
| 1018 | static inline bool rq_is_sync(struct request *rq) | 
|---|
| 1019 | { | 
|---|
| 1020 | return op_is_sync(op: rq->cmd_flags); | 
|---|
| 1021 | } | 
|---|
| 1022 |  | 
|---|
| 1023 | void blk_rq_init(struct request_queue *q, struct request *rq); | 
|---|
| 1024 | int blk_rq_prep_clone(struct request *rq, struct request *rq_src, | 
|---|
| 1025 | struct bio_set *bs, gfp_t gfp_mask, | 
|---|
| 1026 | int (*bio_ctr)(struct bio *, struct bio *, void *), void *data); | 
|---|
| 1027 | void blk_rq_unprep_clone(struct request *rq); | 
|---|
| 1028 | blk_status_t blk_insert_cloned_request(struct request *rq); | 
|---|
| 1029 |  | 
|---|
| 1030 | struct rq_map_data { | 
|---|
| 1031 | struct page **pages; | 
|---|
| 1032 | unsigned long offset; | 
|---|
| 1033 | unsigned short page_order; | 
|---|
| 1034 | unsigned short nr_entries; | 
|---|
| 1035 | bool null_mapped; | 
|---|
| 1036 | bool from_user; | 
|---|
| 1037 | }; | 
|---|
| 1038 |  | 
|---|
| 1039 | int blk_rq_map_user(struct request_queue *, struct request *, | 
|---|
| 1040 | struct rq_map_data *, void __user *, unsigned long, gfp_t); | 
|---|
| 1041 | int blk_rq_map_user_io(struct request *, struct rq_map_data *, | 
|---|
| 1042 | void __user *, unsigned long, gfp_t, bool, int, bool, int); | 
|---|
| 1043 | int blk_rq_map_user_iov(struct request_queue *, struct request *, | 
|---|
| 1044 | struct rq_map_data *, const struct iov_iter *, gfp_t); | 
|---|
| 1045 | int blk_rq_unmap_user(struct bio *); | 
|---|
| 1046 | int blk_rq_map_kern(struct request *rq, void *kbuf, unsigned int len, | 
|---|
| 1047 | gfp_t gfp); | 
|---|
| 1048 | int blk_rq_append_bio(struct request *rq, struct bio *bio); | 
|---|
| 1049 | void blk_execute_rq_nowait(struct request *rq, bool at_head); | 
|---|
| 1050 | blk_status_t blk_execute_rq(struct request *rq, bool at_head); | 
|---|
| 1051 | bool blk_rq_is_poll(struct request *rq); | 
|---|
| 1052 |  | 
|---|
| 1053 | struct req_iterator { | 
|---|
| 1054 | struct bvec_iter iter; | 
|---|
| 1055 | struct bio *bio; | 
|---|
| 1056 | }; | 
|---|
| 1057 |  | 
|---|
| 1058 | #define __rq_for_each_bio(_bio, rq)	\ | 
|---|
| 1059 | if ((rq->bio))			\ | 
|---|
| 1060 | for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next) | 
|---|
| 1061 |  | 
|---|
| 1062 | #define rq_for_each_segment(bvl, _rq, _iter)			\ | 
|---|
| 1063 | __rq_for_each_bio(_iter.bio, _rq)			\ | 
|---|
| 1064 | bio_for_each_segment(bvl, _iter.bio, _iter.iter) | 
|---|
| 1065 |  | 
|---|
| 1066 | #define rq_for_each_bvec(bvl, _rq, _iter)			\ | 
|---|
| 1067 | __rq_for_each_bio(_iter.bio, _rq)			\ | 
|---|
| 1068 | bio_for_each_bvec(bvl, _iter.bio, _iter.iter) | 
|---|
| 1069 |  | 
|---|
| 1070 | #define rq_iter_last(bvec, _iter)				\ | 
|---|
| 1071 | (_iter.bio->bi_next == NULL &&			\ | 
|---|
| 1072 | bio_iter_last(bvec, _iter.iter)) | 
|---|
| 1073 |  | 
|---|
| 1074 | /* | 
|---|
| 1075 | * blk_rq_pos()			: the current sector | 
|---|
| 1076 | * blk_rq_bytes()		: bytes left in the entire request | 
|---|
| 1077 | * blk_rq_cur_bytes()		: bytes left in the current segment | 
|---|
| 1078 | * blk_rq_sectors()		: sectors left in the entire request | 
|---|
| 1079 | * blk_rq_cur_sectors()		: sectors left in the current segment | 
|---|
| 1080 | * blk_rq_stats_sectors()	: sectors of the entire request used for stats | 
|---|
| 1081 | */ | 
|---|
| 1082 | static inline sector_t blk_rq_pos(const struct request *rq) | 
|---|
| 1083 | { | 
|---|
| 1084 | return rq->__sector; | 
|---|
| 1085 | } | 
|---|
| 1086 |  | 
|---|
| 1087 | static inline unsigned int blk_rq_bytes(const struct request *rq) | 
|---|
| 1088 | { | 
|---|
| 1089 | return rq->__data_len; | 
|---|
| 1090 | } | 
|---|
| 1091 |  | 
|---|
| 1092 | static inline int blk_rq_cur_bytes(const struct request *rq) | 
|---|
| 1093 | { | 
|---|
| 1094 | if (!rq->bio) | 
|---|
| 1095 | return 0; | 
|---|
| 1096 | if (!bio_has_data(bio: rq->bio))	/* dataless requests such as discard */ | 
|---|
| 1097 | return rq->bio->bi_iter.bi_size; | 
|---|
| 1098 | return bio_iovec(rq->bio).bv_len; | 
|---|
| 1099 | } | 
|---|
| 1100 |  | 
|---|
| 1101 | static inline unsigned int blk_rq_sectors(const struct request *rq) | 
|---|
| 1102 | { | 
|---|
| 1103 | return blk_rq_bytes(rq) >> SECTOR_SHIFT; | 
|---|
| 1104 | } | 
|---|
| 1105 |  | 
|---|
| 1106 | static inline unsigned int blk_rq_cur_sectors(const struct request *rq) | 
|---|
| 1107 | { | 
|---|
| 1108 | return blk_rq_cur_bytes(rq) >> SECTOR_SHIFT; | 
|---|
| 1109 | } | 
|---|
| 1110 |  | 
|---|
| 1111 | static inline unsigned int blk_rq_stats_sectors(const struct request *rq) | 
|---|
| 1112 | { | 
|---|
| 1113 | return rq->stats_sectors; | 
|---|
| 1114 | } | 
|---|
| 1115 |  | 
|---|
| 1116 | /* | 
|---|
| 1117 | * Some commands like WRITE SAME have a payload or data transfer size which | 
|---|
| 1118 | * is different from the size of the request.  Any driver that supports such | 
|---|
| 1119 | * commands using the RQF_SPECIAL_PAYLOAD flag needs to use this helper to | 
|---|
| 1120 | * calculate the data transfer size. | 
|---|
| 1121 | */ | 
|---|
| 1122 | static inline unsigned int blk_rq_payload_bytes(struct request *rq) | 
|---|
| 1123 | { | 
|---|
| 1124 | if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) | 
|---|
| 1125 | return rq->special_vec.bv_len; | 
|---|
| 1126 | return blk_rq_bytes(rq); | 
|---|
| 1127 | } | 
|---|
| 1128 |  | 
|---|
| 1129 | /* | 
|---|
| 1130 | * Return the first full biovec in the request.  The caller needs to check that | 
|---|
| 1131 | * there are any bvecs before calling this helper. | 
|---|
| 1132 | */ | 
|---|
| 1133 | static inline struct bio_vec req_bvec(struct request *rq) | 
|---|
| 1134 | { | 
|---|
| 1135 | if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) | 
|---|
| 1136 | return rq->special_vec; | 
|---|
| 1137 | return mp_bvec_iter_bvec(rq->bio->bi_io_vec, rq->bio->bi_iter); | 
|---|
| 1138 | } | 
|---|
| 1139 |  | 
|---|
| 1140 | static inline unsigned int blk_rq_count_bios(struct request *rq) | 
|---|
| 1141 | { | 
|---|
| 1142 | unsigned int nr_bios = 0; | 
|---|
| 1143 | struct bio *bio; | 
|---|
| 1144 |  | 
|---|
| 1145 | __rq_for_each_bio(bio, rq) | 
|---|
| 1146 | nr_bios++; | 
|---|
| 1147 |  | 
|---|
| 1148 | return nr_bios; | 
|---|
| 1149 | } | 
|---|
| 1150 |  | 
|---|
| 1151 | void blk_steal_bios(struct bio_list *list, struct request *rq); | 
|---|
| 1152 |  | 
|---|
| 1153 | /* | 
|---|
| 1154 | * Request completion related functions. | 
|---|
| 1155 | * | 
|---|
| 1156 | * blk_update_request() completes given number of bytes and updates | 
|---|
| 1157 | * the request without completing it. | 
|---|
| 1158 | */ | 
|---|
| 1159 | bool blk_update_request(struct request *rq, blk_status_t error, | 
|---|
| 1160 | unsigned int nr_bytes); | 
|---|
| 1161 | void blk_abort_request(struct request *); | 
|---|
| 1162 |  | 
|---|
| 1163 | /* | 
|---|
| 1164 | * Number of physical segments as sent to the device. | 
|---|
| 1165 | * | 
|---|
| 1166 | * Normally this is the number of discontiguous data segments sent by the | 
|---|
| 1167 | * submitter.  But for data-less command like discard we might have no | 
|---|
| 1168 | * actual data segments submitted, but the driver might have to add it's | 
|---|
| 1169 | * own special payload.  In that case we still return 1 here so that this | 
|---|
| 1170 | * special payload will be mapped. | 
|---|
| 1171 | */ | 
|---|
| 1172 | static inline unsigned short blk_rq_nr_phys_segments(struct request *rq) | 
|---|
| 1173 | { | 
|---|
| 1174 | if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) | 
|---|
| 1175 | return 1; | 
|---|
| 1176 | return rq->nr_phys_segments; | 
|---|
| 1177 | } | 
|---|
| 1178 |  | 
|---|
| 1179 | /* | 
|---|
| 1180 | * Number of discard segments (or ranges) the driver needs to fill in. | 
|---|
| 1181 | * Each discard bio merged into a request is counted as one segment. | 
|---|
| 1182 | */ | 
|---|
| 1183 | static inline unsigned short blk_rq_nr_discard_segments(struct request *rq) | 
|---|
| 1184 | { | 
|---|
| 1185 | return max_t(unsigned short, rq->nr_phys_segments, 1); | 
|---|
| 1186 | } | 
|---|
| 1187 |  | 
|---|
| 1188 | int __blk_rq_map_sg(struct request *rq, struct scatterlist *sglist, | 
|---|
| 1189 | struct scatterlist **last_sg); | 
|---|
| 1190 | static inline int blk_rq_map_sg(struct request *rq, struct scatterlist *sglist) | 
|---|
| 1191 | { | 
|---|
| 1192 | struct scatterlist *last_sg = NULL; | 
|---|
| 1193 |  | 
|---|
| 1194 | return __blk_rq_map_sg(rq, sglist, last_sg: &last_sg); | 
|---|
| 1195 | } | 
|---|
| 1196 | void blk_dump_rq_flags(struct request *, char *); | 
|---|
| 1197 |  | 
|---|
| 1198 | #endif /* BLK_MQ_H */ | 
|---|
| 1199 |  | 
|---|