1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3 * Block data types and constants. Directly include this file only to
4 * break include dependency loop.
5 */
6 #ifndef __LINUX_BLK_TYPES_H
7 #define __LINUX_BLK_TYPES_H
8
9 #include <linux/types.h>
10 #include <linux/bvec.h>
11 #include <linux/device.h>
12 #include <linux/ktime.h>
13
14 struct bio_set;
15 struct bio;
16 struct bio_integrity_payload;
17 struct page;
18 struct io_context;
19 struct cgroup_subsys_state;
20 typedef void (bio_end_io_t) (struct bio *);
21 struct bio_crypt_ctx;
22
23 /*
24 * The basic unit of block I/O is a sector. It is used in a number of contexts
25 * in Linux (blk, bio, genhd). The size of one sector is 512 = 2**9
26 * bytes. Variables of type sector_t represent an offset or size that is a
27 * multiple of 512 bytes. Hence these two constants.
28 */
29 #ifndef SECTOR_SHIFT
30 #define SECTOR_SHIFT 9
31 #endif
32 #ifndef SECTOR_SIZE
33 #define SECTOR_SIZE (1 << SECTOR_SHIFT)
34 #endif
35
36 #define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
37 #define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT)
38 #define SECTOR_MASK (PAGE_SECTORS - 1)
39
40 struct block_device {
41 sector_t bd_start_sect;
42 sector_t bd_nr_sectors;
43 struct disk_stats __percpu *bd_stats;
44 unsigned long bd_stamp;
45 bool bd_read_only; /* read-only policy */
46 dev_t bd_dev;
47 int bd_openers;
48 struct inode * bd_inode; /* will die */
49 struct super_block * bd_super;
50 void * bd_claiming;
51 struct device bd_device;
52 void * bd_holder;
53 int bd_holders;
54 bool bd_write_holder;
55 struct kobject *bd_holder_dir;
56 u8 bd_partno;
57 spinlock_t bd_size_lock; /* for bd_inode->i_size updates */
58 struct gendisk * bd_disk;
59 struct request_queue * bd_queue;
60
61 /* The counter of freeze processes */
62 int bd_fsfreeze_count;
63 /* Mutex for freeze */
64 struct mutex bd_fsfreeze_mutex;
65 struct super_block *bd_fsfreeze_sb;
66
67 struct partition_meta_info *bd_meta_info;
68 #ifdef CONFIG_FAIL_MAKE_REQUEST
69 bool bd_make_it_fail;
70 #endif
71 } __randomize_layout;
72
73 #define bdev_whole(_bdev) \
74 ((_bdev)->bd_disk->part0)
75
76 #define dev_to_bdev(device) \
77 container_of((device), struct block_device, bd_device)
78
79 #define bdev_kobj(_bdev) \
80 (&((_bdev)->bd_device.kobj))
81
82 /*
83 * Block error status values. See block/blk-core:blk_errors for the details.
84 * Alpha cannot write a byte atomically, so we need to use 32-bit value.
85 */
86 #if defined(CONFIG_ALPHA) && !defined(__alpha_bwx__)
87 typedef u32 __bitwise blk_status_t;
88 #else
89 typedef u8 __bitwise blk_status_t;
90 #endif
91 #define BLK_STS_OK 0
92 #define BLK_STS_NOTSUPP ((__force blk_status_t)1)
93 #define BLK_STS_TIMEOUT ((__force blk_status_t)2)
94 #define BLK_STS_NOSPC ((__force blk_status_t)3)
95 #define BLK_STS_TRANSPORT ((__force blk_status_t)4)
96 #define BLK_STS_TARGET ((__force blk_status_t)5)
97 #define BLK_STS_NEXUS ((__force blk_status_t)6)
98 #define BLK_STS_MEDIUM ((__force blk_status_t)7)
99 #define BLK_STS_PROTECTION ((__force blk_status_t)8)
100 #define BLK_STS_RESOURCE ((__force blk_status_t)9)
101 #define BLK_STS_IOERR ((__force blk_status_t)10)
102
103 /* hack for device mapper, don't use elsewhere: */
104 #define BLK_STS_DM_REQUEUE ((__force blk_status_t)11)
105
106 #define BLK_STS_AGAIN ((__force blk_status_t)12)
107
108 /*
109 * BLK_STS_DEV_RESOURCE is returned from the driver to the block layer if
110 * device related resources are unavailable, but the driver can guarantee
111 * that the queue will be rerun in the future once resources become
112 * available again. This is typically the case for device specific
113 * resources that are consumed for IO. If the driver fails allocating these
114 * resources, we know that inflight (or pending) IO will free these
115 * resource upon completion.
116 *
117 * This is different from BLK_STS_RESOURCE in that it explicitly references
118 * a device specific resource. For resources of wider scope, allocation
119 * failure can happen without having pending IO. This means that we can't
120 * rely on request completions freeing these resources, as IO may not be in
121 * flight. Examples of that are kernel memory allocations, DMA mappings, or
122 * any other system wide resources.
123 */
124 #define BLK_STS_DEV_RESOURCE ((__force blk_status_t)13)
125
126 /*
127 * BLK_STS_ZONE_RESOURCE is returned from the driver to the block layer if zone
128 * related resources are unavailable, but the driver can guarantee the queue
129 * will be rerun in the future once the resources become available again.
130 *
131 * This is different from BLK_STS_DEV_RESOURCE in that it explicitly references
132 * a zone specific resource and IO to a different zone on the same device could
133 * still be served. Examples of that are zones that are write-locked, but a read
134 * to the same zone could be served.
135 */
136 #define BLK_STS_ZONE_RESOURCE ((__force blk_status_t)14)
137
138 /*
139 * BLK_STS_ZONE_OPEN_RESOURCE is returned from the driver in the completion
140 * path if the device returns a status indicating that too many zone resources
141 * are currently open. The same command should be successful if resubmitted
142 * after the number of open zones decreases below the device's limits, which is
143 * reported in the request_queue's max_open_zones.
144 */
145 #define BLK_STS_ZONE_OPEN_RESOURCE ((__force blk_status_t)15)
146
147 /*
148 * BLK_STS_ZONE_ACTIVE_RESOURCE is returned from the driver in the completion
149 * path if the device returns a status indicating that too many zone resources
150 * are currently active. The same command should be successful if resubmitted
151 * after the number of active zones decreases below the device's limits, which
152 * is reported in the request_queue's max_active_zones.
153 */
154 #define BLK_STS_ZONE_ACTIVE_RESOURCE ((__force blk_status_t)16)
155
156 /**
157 * blk_path_error - returns true if error may be path related
158 * @error: status the request was completed with
159 *
160 * Description:
161 * This classifies block error status into non-retryable errors and ones
162 * that may be successful if retried on a failover path.
163 *
164 * Return:
165 * %false - retrying failover path will not help
166 * %true - may succeed if retried
167 */
blk_path_error(blk_status_t error)168 static inline bool blk_path_error(blk_status_t error)
169 {
170 switch (error) {
171 case BLK_STS_NOTSUPP:
172 case BLK_STS_NOSPC:
173 case BLK_STS_TARGET:
174 case BLK_STS_NEXUS:
175 case BLK_STS_MEDIUM:
176 case BLK_STS_PROTECTION:
177 return false;
178 }
179
180 /* Anything else could be a path failure, so should be retried */
181 return true;
182 }
183
184 /*
185 * From most significant bit:
186 * 1 bit: reserved for other usage, see below
187 * 12 bits: original size of bio
188 * 51 bits: issue time of bio
189 */
190 #define BIO_ISSUE_RES_BITS 1
191 #define BIO_ISSUE_SIZE_BITS 12
192 #define BIO_ISSUE_RES_SHIFT (64 - BIO_ISSUE_RES_BITS)
193 #define BIO_ISSUE_SIZE_SHIFT (BIO_ISSUE_RES_SHIFT - BIO_ISSUE_SIZE_BITS)
194 #define BIO_ISSUE_TIME_MASK ((1ULL << BIO_ISSUE_SIZE_SHIFT) - 1)
195 #define BIO_ISSUE_SIZE_MASK \
196 (((1ULL << BIO_ISSUE_SIZE_BITS) - 1) << BIO_ISSUE_SIZE_SHIFT)
197 #define BIO_ISSUE_RES_MASK (~((1ULL << BIO_ISSUE_RES_SHIFT) - 1))
198
199 /* Reserved bit for blk-throtl */
200 #define BIO_ISSUE_THROTL_SKIP_LATENCY (1ULL << 63)
201
202 struct bio_issue {
203 u64 value;
204 };
205
__bio_issue_time(u64 time)206 static inline u64 __bio_issue_time(u64 time)
207 {
208 return time & BIO_ISSUE_TIME_MASK;
209 }
210
bio_issue_time(struct bio_issue * issue)211 static inline u64 bio_issue_time(struct bio_issue *issue)
212 {
213 return __bio_issue_time(issue->value);
214 }
215
bio_issue_size(struct bio_issue * issue)216 static inline sector_t bio_issue_size(struct bio_issue *issue)
217 {
218 return ((issue->value & BIO_ISSUE_SIZE_MASK) >> BIO_ISSUE_SIZE_SHIFT);
219 }
220
bio_issue_init(struct bio_issue * issue,sector_t size)221 static inline void bio_issue_init(struct bio_issue *issue,
222 sector_t size)
223 {
224 size &= (1ULL << BIO_ISSUE_SIZE_BITS) - 1;
225 issue->value = ((issue->value & BIO_ISSUE_RES_MASK) |
226 (ktime_get_ns() & BIO_ISSUE_TIME_MASK) |
227 ((u64)size << BIO_ISSUE_SIZE_SHIFT));
228 }
229
230 typedef unsigned int blk_qc_t;
231 #define BLK_QC_T_NONE -1U
232
233 /*
234 * main unit of I/O for the block layer and lower layers (ie drivers and
235 * stacking drivers)
236 */
237 struct bio {
238 struct bio *bi_next; /* request queue link */
239 struct block_device *bi_bdev;
240 unsigned int bi_opf; /* bottom bits req flags,
241 * top bits REQ_OP. Use
242 * accessors.
243 */
244 unsigned short bi_flags; /* BIO_* below */
245 unsigned short bi_ioprio;
246 unsigned short bi_write_hint;
247 blk_status_t bi_status;
248 atomic_t __bi_remaining;
249
250 struct bvec_iter bi_iter;
251
252 blk_qc_t bi_cookie;
253 bio_end_io_t *bi_end_io;
254 void *bi_private;
255 #ifdef CONFIG_BLK_CGROUP
256 /*
257 * Represents the association of the css and request_queue for the bio.
258 * If a bio goes direct to device, it will not have a blkg as it will
259 * not have a request_queue associated with it. The reference is put
260 * on release of the bio.
261 */
262 struct blkcg_gq *bi_blkg;
263 struct bio_issue bi_issue;
264 #ifdef CONFIG_BLK_CGROUP_IOCOST
265 u64 bi_iocost_cost;
266 #endif
267 #endif
268
269 #ifdef CONFIG_BLK_INLINE_ENCRYPTION
270 struct bio_crypt_ctx *bi_crypt_context;
271 #endif
272
273 union {
274 #if defined(CONFIG_BLK_DEV_INTEGRITY)
275 struct bio_integrity_payload *bi_integrity; /* data integrity */
276 #endif
277 };
278
279 unsigned short bi_vcnt; /* how many bio_vec's */
280
281 /*
282 * Everything starting with bi_max_vecs will be preserved by bio_reset()
283 */
284
285 unsigned short bi_max_vecs; /* max bvl_vecs we can hold */
286
287 atomic_t __bi_cnt; /* pin count */
288
289 struct bio_vec *bi_io_vec; /* the actual vec list */
290
291 struct bio_set *bi_pool;
292
293 /*
294 * We can inline a number of vecs at the end of the bio, to avoid
295 * double allocations for a small number of bio_vecs. This member
296 * MUST obviously be kept at the very end of the bio.
297 */
298 struct bio_vec bi_inline_vecs[];
299 };
300
301 #define BIO_RESET_BYTES offsetof(struct bio, bi_max_vecs)
302 #define BIO_MAX_SECTORS (UINT_MAX >> SECTOR_SHIFT)
303
304 /*
305 * bio flags
306 */
307 enum {
308 BIO_NO_PAGE_REF, /* don't put release vec pages */
309 BIO_CLONED, /* doesn't own data */
310 BIO_BOUNCED, /* bio is a bounce bio */
311 BIO_WORKINGSET, /* contains userspace workingset pages */
312 BIO_QUIET, /* Make BIO Quiet */
313 BIO_CHAIN, /* chained bio, ->bi_remaining in effect */
314 BIO_REFFED, /* bio has elevated ->bi_cnt */
315 BIO_THROTTLED, /* This bio has already been subjected to
316 * throttling rules. Don't do it again. */
317 BIO_TRACE_COMPLETION, /* bio_endio() should trace the final completion
318 * of this bio. */
319 BIO_CGROUP_ACCT, /* has been accounted to a cgroup */
320 BIO_TRACKED, /* set if bio goes through the rq_qos path */
321 BIO_REMAPPED,
322 BIO_ZONE_WRITE_LOCKED, /* Owns a zoned device zone write lock */
323 BIO_PERCPU_CACHE, /* can participate in per-cpu alloc cache */
324 BIO_FLAG_LAST
325 };
326
327 typedef __u32 __bitwise blk_mq_req_flags_t;
328
329 /*
330 * Operations and flags common to the bio and request structures.
331 * We use 8 bits for encoding the operation, and the remaining 24 for flags.
332 *
333 * The least significant bit of the operation number indicates the data
334 * transfer direction:
335 *
336 * - if the least significant bit is set transfers are TO the device
337 * - if the least significant bit is not set transfers are FROM the device
338 *
339 * If a operation does not transfer data the least significant bit has no
340 * meaning.
341 */
342 #define REQ_OP_BITS 8
343 #define REQ_OP_MASK ((1 << REQ_OP_BITS) - 1)
344 #define REQ_FLAG_BITS 24
345
346 enum req_opf {
347 /* read sectors from the device */
348 REQ_OP_READ = 0,
349 /* write sectors to the device */
350 REQ_OP_WRITE = 1,
351 /* flush the volatile write cache */
352 REQ_OP_FLUSH = 2,
353 /* discard sectors */
354 REQ_OP_DISCARD = 3,
355 /* securely erase sectors */
356 REQ_OP_SECURE_ERASE = 5,
357 /* write the same sector many times */
358 REQ_OP_WRITE_SAME = 7,
359 /* write the zero filled sector many times */
360 REQ_OP_WRITE_ZEROES = 9,
361 /* Open a zone */
362 REQ_OP_ZONE_OPEN = 10,
363 /* Close a zone */
364 REQ_OP_ZONE_CLOSE = 11,
365 /* Transition a zone to full */
366 REQ_OP_ZONE_FINISH = 12,
367 /* write data at the current zone write pointer */
368 REQ_OP_ZONE_APPEND = 13,
369 /* reset a zone write pointer */
370 REQ_OP_ZONE_RESET = 15,
371 /* reset all the zone present on the device */
372 REQ_OP_ZONE_RESET_ALL = 17,
373
374 /* Driver private requests */
375 REQ_OP_DRV_IN = 34,
376 REQ_OP_DRV_OUT = 35,
377
378 REQ_OP_LAST,
379 };
380
381 enum req_flag_bits {
382 __REQ_FAILFAST_DEV = /* no driver retries of device errors */
383 REQ_OP_BITS,
384 __REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */
385 __REQ_FAILFAST_DRIVER, /* no driver retries of driver errors */
386 __REQ_SYNC, /* request is sync (sync write or read) */
387 __REQ_META, /* metadata io request */
388 __REQ_PRIO, /* boost priority in cfq */
389 __REQ_NOMERGE, /* don't touch this for merging */
390 __REQ_IDLE, /* anticipate more IO after this one */
391 __REQ_INTEGRITY, /* I/O includes block integrity payload */
392 __REQ_FUA, /* forced unit access */
393 __REQ_PREFLUSH, /* request for cache flush */
394 __REQ_RAHEAD, /* read ahead, can fail anytime */
395 __REQ_BACKGROUND, /* background IO */
396 __REQ_NOWAIT, /* Don't wait if request will block */
397 /*
398 * When a shared kthread needs to issue a bio for a cgroup, doing
399 * so synchronously can lead to priority inversions as the kthread
400 * can be trapped waiting for that cgroup. CGROUP_PUNT flag makes
401 * submit_bio() punt the actual issuing to a dedicated per-blkcg
402 * work item to avoid such priority inversions.
403 */
404 __REQ_CGROUP_PUNT,
405
406 /* command specific flags for REQ_OP_WRITE_ZEROES: */
407 __REQ_NOUNMAP, /* do not free blocks when zeroing */
408
409 __REQ_POLLED, /* caller polls for completion using bio_poll */
410
411 /* for driver use */
412 __REQ_DRV,
413 __REQ_SWAP, /* swapping request. */
414 __REQ_NR_BITS, /* stops here */
415 };
416
417 #define REQ_FAILFAST_DEV (1ULL << __REQ_FAILFAST_DEV)
418 #define REQ_FAILFAST_TRANSPORT (1ULL << __REQ_FAILFAST_TRANSPORT)
419 #define REQ_FAILFAST_DRIVER (1ULL << __REQ_FAILFAST_DRIVER)
420 #define REQ_SYNC (1ULL << __REQ_SYNC)
421 #define REQ_META (1ULL << __REQ_META)
422 #define REQ_PRIO (1ULL << __REQ_PRIO)
423 #define REQ_NOMERGE (1ULL << __REQ_NOMERGE)
424 #define REQ_IDLE (1ULL << __REQ_IDLE)
425 #define REQ_INTEGRITY (1ULL << __REQ_INTEGRITY)
426 #define REQ_FUA (1ULL << __REQ_FUA)
427 #define REQ_PREFLUSH (1ULL << __REQ_PREFLUSH)
428 #define REQ_RAHEAD (1ULL << __REQ_RAHEAD)
429 #define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND)
430 #define REQ_NOWAIT (1ULL << __REQ_NOWAIT)
431 #define REQ_CGROUP_PUNT (1ULL << __REQ_CGROUP_PUNT)
432
433 #define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP)
434 #define REQ_POLLED (1ULL << __REQ_POLLED)
435
436 #define REQ_DRV (1ULL << __REQ_DRV)
437 #define REQ_SWAP (1ULL << __REQ_SWAP)
438
439 #define REQ_FAILFAST_MASK \
440 (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
441
442 #define REQ_NOMERGE_FLAGS \
443 (REQ_NOMERGE | REQ_PREFLUSH | REQ_FUA)
444
445 enum stat_group {
446 STAT_READ,
447 STAT_WRITE,
448 STAT_DISCARD,
449 STAT_FLUSH,
450
451 NR_STAT_GROUPS
452 };
453
454 #define bio_op(bio) \
455 ((bio)->bi_opf & REQ_OP_MASK)
456
457 /* obsolete, don't use in new code */
bio_set_op_attrs(struct bio * bio,unsigned op,unsigned op_flags)458 static inline void bio_set_op_attrs(struct bio *bio, unsigned op,
459 unsigned op_flags)
460 {
461 bio->bi_opf = op | op_flags;
462 }
463
op_is_write(unsigned int op)464 static inline bool op_is_write(unsigned int op)
465 {
466 return (op & 1);
467 }
468
469 /*
470 * Check if the bio or request is one that needs special treatment in the
471 * flush state machine.
472 */
op_is_flush(unsigned int op)473 static inline bool op_is_flush(unsigned int op)
474 {
475 return op & (REQ_FUA | REQ_PREFLUSH);
476 }
477
478 /*
479 * Reads are always treated as synchronous, as are requests with the FUA or
480 * PREFLUSH flag. Other operations may be marked as synchronous using the
481 * REQ_SYNC flag.
482 */
op_is_sync(unsigned int op)483 static inline bool op_is_sync(unsigned int op)
484 {
485 return (op & REQ_OP_MASK) == REQ_OP_READ ||
486 (op & (REQ_SYNC | REQ_FUA | REQ_PREFLUSH));
487 }
488
op_is_discard(unsigned int op)489 static inline bool op_is_discard(unsigned int op)
490 {
491 return (op & REQ_OP_MASK) == REQ_OP_DISCARD;
492 }
493
494 /*
495 * Check if a bio or request operation is a zone management operation, with
496 * the exception of REQ_OP_ZONE_RESET_ALL which is treated as a special case
497 * due to its different handling in the block layer and device response in
498 * case of command failure.
499 */
op_is_zone_mgmt(enum req_opf op)500 static inline bool op_is_zone_mgmt(enum req_opf op)
501 {
502 switch (op & REQ_OP_MASK) {
503 case REQ_OP_ZONE_RESET:
504 case REQ_OP_ZONE_OPEN:
505 case REQ_OP_ZONE_CLOSE:
506 case REQ_OP_ZONE_FINISH:
507 return true;
508 default:
509 return false;
510 }
511 }
512
op_stat_group(unsigned int op)513 static inline int op_stat_group(unsigned int op)
514 {
515 if (op_is_discard(op))
516 return STAT_DISCARD;
517 return op_is_write(op);
518 }
519
520 struct blk_rq_stat {
521 u64 mean;
522 u64 min;
523 u64 max;
524 u32 nr_samples;
525 u64 batch;
526 };
527
528 #endif /* __LINUX_BLK_TYPES_H */
529