1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * Block data types and constants. Directly include this file only to 4 * break include dependency loop. 5 */ 6 #ifndef __LINUX_BLK_TYPES_H 7 #define __LINUX_BLK_TYPES_H 8 9 #include <linux/types.h> 10 #include <linux/bvec.h> 11 #include <linux/device.h> 12 #include <linux/ktime.h> 13 14 struct bio_set; 15 struct bio; 16 struct bio_integrity_payload; 17 struct page; 18 struct io_context; 19 struct cgroup_subsys_state; 20 typedef void (bio_end_io_t) (struct bio *); 21 struct bio_crypt_ctx; 22 23 struct block_device { 24 sector_t bd_start_sect; 25 struct disk_stats __percpu *bd_stats; 26 unsigned long bd_stamp; 27 bool bd_read_only; /* read-only policy */ 28 dev_t bd_dev; 29 int bd_openers; 30 struct inode * bd_inode; /* will die */ 31 struct super_block * bd_super; 32 struct mutex bd_mutex; /* open/close mutex */ 33 void * bd_claiming; 34 struct device bd_device; 35 void * bd_holder; 36 int bd_holders; 37 bool bd_write_holder; 38 #ifdef CONFIG_SYSFS 39 struct list_head bd_holder_disks; 40 #endif 41 struct kobject *bd_holder_dir; 42 u8 bd_partno; 43 /* number of times partitions within this device have been opened. */ 44 unsigned bd_part_count; 45 46 spinlock_t bd_size_lock; /* for bd_inode->i_size updates */ 47 struct gendisk * bd_disk; 48 struct backing_dev_info *bd_bdi; 49 50 /* The counter of freeze processes */ 51 int bd_fsfreeze_count; 52 /* Mutex for freeze */ 53 struct mutex bd_fsfreeze_mutex; 54 struct super_block *bd_fsfreeze_sb; 55 56 struct partition_meta_info *bd_meta_info; 57 #ifdef CONFIG_FAIL_MAKE_REQUEST 58 bool bd_make_it_fail; 59 #endif 60 } __randomize_layout; 61 62 #define bdev_whole(_bdev) \ 63 ((_bdev)->bd_disk->part0) 64 65 #define dev_to_bdev(device) \ 66 container_of((device), struct block_device, bd_device) 67 68 #define bdev_kobj(_bdev) \ 69 (&((_bdev)->bd_device.kobj)) 70 71 /* 72 * Block error status values. See block/blk-core:blk_errors for the details. 73 * Alpha cannot write a byte atomically, so we need to use 32-bit value. 74 */ 75 #if defined(CONFIG_ALPHA) && !defined(__alpha_bwx__) 76 typedef u32 __bitwise blk_status_t; 77 #else 78 typedef u8 __bitwise blk_status_t; 79 #endif 80 #define BLK_STS_OK 0 81 #define BLK_STS_NOTSUPP ((__force blk_status_t)1) 82 #define BLK_STS_TIMEOUT ((__force blk_status_t)2) 83 #define BLK_STS_NOSPC ((__force blk_status_t)3) 84 #define BLK_STS_TRANSPORT ((__force blk_status_t)4) 85 #define BLK_STS_TARGET ((__force blk_status_t)5) 86 #define BLK_STS_NEXUS ((__force blk_status_t)6) 87 #define BLK_STS_MEDIUM ((__force blk_status_t)7) 88 #define BLK_STS_PROTECTION ((__force blk_status_t)8) 89 #define BLK_STS_RESOURCE ((__force blk_status_t)9) 90 #define BLK_STS_IOERR ((__force blk_status_t)10) 91 92 /* hack for device mapper, don't use elsewhere: */ 93 #define BLK_STS_DM_REQUEUE ((__force blk_status_t)11) 94 95 #define BLK_STS_AGAIN ((__force blk_status_t)12) 96 97 /* 98 * BLK_STS_DEV_RESOURCE is returned from the driver to the block layer if 99 * device related resources are unavailable, but the driver can guarantee 100 * that the queue will be rerun in the future once resources become 101 * available again. This is typically the case for device specific 102 * resources that are consumed for IO. If the driver fails allocating these 103 * resources, we know that inflight (or pending) IO will free these 104 * resource upon completion. 105 * 106 * This is different from BLK_STS_RESOURCE in that it explicitly references 107 * a device specific resource. For resources of wider scope, allocation 108 * failure can happen without having pending IO. This means that we can't 109 * rely on request completions freeing these resources, as IO may not be in 110 * flight. Examples of that are kernel memory allocations, DMA mappings, or 111 * any other system wide resources. 112 */ 113 #define BLK_STS_DEV_RESOURCE ((__force blk_status_t)13) 114 115 /* 116 * BLK_STS_ZONE_RESOURCE is returned from the driver to the block layer if zone 117 * related resources are unavailable, but the driver can guarantee the queue 118 * will be rerun in the future once the resources become available again. 119 * 120 * This is different from BLK_STS_DEV_RESOURCE in that it explicitly references 121 * a zone specific resource and IO to a different zone on the same device could 122 * still be served. Examples of that are zones that are write-locked, but a read 123 * to the same zone could be served. 124 */ 125 #define BLK_STS_ZONE_RESOURCE ((__force blk_status_t)14) 126 127 /* 128 * BLK_STS_ZONE_OPEN_RESOURCE is returned from the driver in the completion 129 * path if the device returns a status indicating that too many zone resources 130 * are currently open. The same command should be successful if resubmitted 131 * after the number of open zones decreases below the device's limits, which is 132 * reported in the request_queue's max_open_zones. 133 */ 134 #define BLK_STS_ZONE_OPEN_RESOURCE ((__force blk_status_t)15) 135 136 /* 137 * BLK_STS_ZONE_ACTIVE_RESOURCE is returned from the driver in the completion 138 * path if the device returns a status indicating that too many zone resources 139 * are currently active. The same command should be successful if resubmitted 140 * after the number of active zones decreases below the device's limits, which 141 * is reported in the request_queue's max_active_zones. 142 */ 143 #define BLK_STS_ZONE_ACTIVE_RESOURCE ((__force blk_status_t)16) 144 145 /** 146 * blk_path_error - returns true if error may be path related 147 * @error: status the request was completed with 148 * 149 * Description: 150 * This classifies block error status into non-retryable errors and ones 151 * that may be successful if retried on a failover path. 152 * 153 * Return: 154 * %false - retrying failover path will not help 155 * %true - may succeed if retried 156 */ 157 static inline bool blk_path_error(blk_status_t error) 158 { 159 switch (error) { 160 case BLK_STS_NOTSUPP: 161 case BLK_STS_NOSPC: 162 case BLK_STS_TARGET: 163 case BLK_STS_NEXUS: 164 case BLK_STS_MEDIUM: 165 case BLK_STS_PROTECTION: 166 return false; 167 } 168 169 /* Anything else could be a path failure, so should be retried */ 170 return true; 171 } 172 173 /* 174 * From most significant bit: 175 * 1 bit: reserved for other usage, see below 176 * 12 bits: original size of bio 177 * 51 bits: issue time of bio 178 */ 179 #define BIO_ISSUE_RES_BITS 1 180 #define BIO_ISSUE_SIZE_BITS 12 181 #define BIO_ISSUE_RES_SHIFT (64 - BIO_ISSUE_RES_BITS) 182 #define BIO_ISSUE_SIZE_SHIFT (BIO_ISSUE_RES_SHIFT - BIO_ISSUE_SIZE_BITS) 183 #define BIO_ISSUE_TIME_MASK ((1ULL << BIO_ISSUE_SIZE_SHIFT) - 1) 184 #define BIO_ISSUE_SIZE_MASK \ 185 (((1ULL << BIO_ISSUE_SIZE_BITS) - 1) << BIO_ISSUE_SIZE_SHIFT) 186 #define BIO_ISSUE_RES_MASK (~((1ULL << BIO_ISSUE_RES_SHIFT) - 1)) 187 188 /* Reserved bit for blk-throtl */ 189 #define BIO_ISSUE_THROTL_SKIP_LATENCY (1ULL << 63) 190 191 struct bio_issue { 192 u64 value; 193 }; 194 195 static inline u64 __bio_issue_time(u64 time) 196 { 197 return time & BIO_ISSUE_TIME_MASK; 198 } 199 200 static inline u64 bio_issue_time(struct bio_issue *issue) 201 { 202 return __bio_issue_time(issue->value); 203 } 204 205 static inline sector_t bio_issue_size(struct bio_issue *issue) 206 { 207 return ((issue->value & BIO_ISSUE_SIZE_MASK) >> BIO_ISSUE_SIZE_SHIFT); 208 } 209 210 static inline void bio_issue_init(struct bio_issue *issue, 211 sector_t size) 212 { 213 size &= (1ULL << BIO_ISSUE_SIZE_BITS) - 1; 214 issue->value = ((issue->value & BIO_ISSUE_RES_MASK) | 215 (ktime_get_ns() & BIO_ISSUE_TIME_MASK) | 216 ((u64)size << BIO_ISSUE_SIZE_SHIFT)); 217 } 218 219 /* 220 * main unit of I/O for the block layer and lower layers (ie drivers and 221 * stacking drivers) 222 */ 223 struct bio { 224 struct bio *bi_next; /* request queue link */ 225 struct block_device *bi_bdev; 226 unsigned int bi_opf; /* bottom bits req flags, 227 * top bits REQ_OP. Use 228 * accessors. 229 */ 230 unsigned short bi_flags; /* BIO_* below */ 231 unsigned short bi_ioprio; 232 unsigned short bi_write_hint; 233 blk_status_t bi_status; 234 atomic_t __bi_remaining; 235 236 struct bvec_iter bi_iter; 237 238 bio_end_io_t *bi_end_io; 239 240 void *bi_private; 241 #ifdef CONFIG_BLK_CGROUP 242 /* 243 * Represents the association of the css and request_queue for the bio. 244 * If a bio goes direct to device, it will not have a blkg as it will 245 * not have a request_queue associated with it. The reference is put 246 * on release of the bio. 247 */ 248 struct blkcg_gq *bi_blkg; 249 struct bio_issue bi_issue; 250 #ifdef CONFIG_BLK_CGROUP_IOCOST 251 u64 bi_iocost_cost; 252 #endif 253 #endif 254 255 #ifdef CONFIG_BLK_INLINE_ENCRYPTION 256 struct bio_crypt_ctx *bi_crypt_context; 257 #endif 258 259 union { 260 #if defined(CONFIG_BLK_DEV_INTEGRITY) 261 struct bio_integrity_payload *bi_integrity; /* data integrity */ 262 #endif 263 }; 264 265 unsigned short bi_vcnt; /* how many bio_vec's */ 266 267 /* 268 * Everything starting with bi_max_vecs will be preserved by bio_reset() 269 */ 270 271 unsigned short bi_max_vecs; /* max bvl_vecs we can hold */ 272 273 atomic_t __bi_cnt; /* pin count */ 274 275 struct bio_vec *bi_io_vec; /* the actual vec list */ 276 277 struct bio_set *bi_pool; 278 279 /* 280 * We can inline a number of vecs at the end of the bio, to avoid 281 * double allocations for a small number of bio_vecs. This member 282 * MUST obviously be kept at the very end of the bio. 283 */ 284 struct bio_vec bi_inline_vecs[]; 285 }; 286 287 #define BIO_RESET_BYTES offsetof(struct bio, bi_max_vecs) 288 289 /* 290 * bio flags 291 */ 292 enum { 293 BIO_NO_PAGE_REF, /* don't put release vec pages */ 294 BIO_CLONED, /* doesn't own data */ 295 BIO_BOUNCED, /* bio is a bounce bio */ 296 BIO_WORKINGSET, /* contains userspace workingset pages */ 297 BIO_QUIET, /* Make BIO Quiet */ 298 BIO_CHAIN, /* chained bio, ->bi_remaining in effect */ 299 BIO_REFFED, /* bio has elevated ->bi_cnt */ 300 BIO_THROTTLED, /* This bio has already been subjected to 301 * throttling rules. Don't do it again. */ 302 BIO_TRACE_COMPLETION, /* bio_endio() should trace the final completion 303 * of this bio. */ 304 BIO_CGROUP_ACCT, /* has been accounted to a cgroup */ 305 BIO_TRACKED, /* set if bio goes through the rq_qos path */ 306 BIO_REMAPPED, 307 BIO_FLAG_LAST 308 }; 309 310 typedef __u32 __bitwise blk_mq_req_flags_t; 311 312 /* 313 * Operations and flags common to the bio and request structures. 314 * We use 8 bits for encoding the operation, and the remaining 24 for flags. 315 * 316 * The least significant bit of the operation number indicates the data 317 * transfer direction: 318 * 319 * - if the least significant bit is set transfers are TO the device 320 * - if the least significant bit is not set transfers are FROM the device 321 * 322 * If a operation does not transfer data the least significant bit has no 323 * meaning. 324 */ 325 #define REQ_OP_BITS 8 326 #define REQ_OP_MASK ((1 << REQ_OP_BITS) - 1) 327 #define REQ_FLAG_BITS 24 328 329 enum req_opf { 330 /* read sectors from the device */ 331 REQ_OP_READ = 0, 332 /* write sectors to the device */ 333 REQ_OP_WRITE = 1, 334 /* flush the volatile write cache */ 335 REQ_OP_FLUSH = 2, 336 /* discard sectors */ 337 REQ_OP_DISCARD = 3, 338 /* securely erase sectors */ 339 REQ_OP_SECURE_ERASE = 5, 340 /* write the same sector many times */ 341 REQ_OP_WRITE_SAME = 7, 342 /* write the zero filled sector many times */ 343 REQ_OP_WRITE_ZEROES = 9, 344 /* Open a zone */ 345 REQ_OP_ZONE_OPEN = 10, 346 /* Close a zone */ 347 REQ_OP_ZONE_CLOSE = 11, 348 /* Transition a zone to full */ 349 REQ_OP_ZONE_FINISH = 12, 350 /* write data at the current zone write pointer */ 351 REQ_OP_ZONE_APPEND = 13, 352 /* reset a zone write pointer */ 353 REQ_OP_ZONE_RESET = 15, 354 /* reset all the zone present on the device */ 355 REQ_OP_ZONE_RESET_ALL = 17, 356 357 /* SCSI passthrough using struct scsi_request */ 358 REQ_OP_SCSI_IN = 32, 359 REQ_OP_SCSI_OUT = 33, 360 /* Driver private requests */ 361 REQ_OP_DRV_IN = 34, 362 REQ_OP_DRV_OUT = 35, 363 364 REQ_OP_LAST, 365 }; 366 367 enum req_flag_bits { 368 __REQ_FAILFAST_DEV = /* no driver retries of device errors */ 369 REQ_OP_BITS, 370 __REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */ 371 __REQ_FAILFAST_DRIVER, /* no driver retries of driver errors */ 372 __REQ_SYNC, /* request is sync (sync write or read) */ 373 __REQ_META, /* metadata io request */ 374 __REQ_PRIO, /* boost priority in cfq */ 375 __REQ_NOMERGE, /* don't touch this for merging */ 376 __REQ_IDLE, /* anticipate more IO after this one */ 377 __REQ_INTEGRITY, /* I/O includes block integrity payload */ 378 __REQ_FUA, /* forced unit access */ 379 __REQ_PREFLUSH, /* request for cache flush */ 380 __REQ_RAHEAD, /* read ahead, can fail anytime */ 381 __REQ_BACKGROUND, /* background IO */ 382 __REQ_NOWAIT, /* Don't wait if request will block */ 383 /* 384 * When a shared kthread needs to issue a bio for a cgroup, doing 385 * so synchronously can lead to priority inversions as the kthread 386 * can be trapped waiting for that cgroup. CGROUP_PUNT flag makes 387 * submit_bio() punt the actual issuing to a dedicated per-blkcg 388 * work item to avoid such priority inversions. 389 */ 390 __REQ_CGROUP_PUNT, 391 392 /* command specific flags for REQ_OP_WRITE_ZEROES: */ 393 __REQ_NOUNMAP, /* do not free blocks when zeroing */ 394 395 __REQ_HIPRI, 396 397 /* for driver use */ 398 __REQ_DRV, 399 __REQ_SWAP, /* swapping request. */ 400 __REQ_NR_BITS, /* stops here */ 401 }; 402 403 #define REQ_FAILFAST_DEV (1ULL << __REQ_FAILFAST_DEV) 404 #define REQ_FAILFAST_TRANSPORT (1ULL << __REQ_FAILFAST_TRANSPORT) 405 #define REQ_FAILFAST_DRIVER (1ULL << __REQ_FAILFAST_DRIVER) 406 #define REQ_SYNC (1ULL << __REQ_SYNC) 407 #define REQ_META (1ULL << __REQ_META) 408 #define REQ_PRIO (1ULL << __REQ_PRIO) 409 #define REQ_NOMERGE (1ULL << __REQ_NOMERGE) 410 #define REQ_IDLE (1ULL << __REQ_IDLE) 411 #define REQ_INTEGRITY (1ULL << __REQ_INTEGRITY) 412 #define REQ_FUA (1ULL << __REQ_FUA) 413 #define REQ_PREFLUSH (1ULL << __REQ_PREFLUSH) 414 #define REQ_RAHEAD (1ULL << __REQ_RAHEAD) 415 #define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND) 416 #define REQ_NOWAIT (1ULL << __REQ_NOWAIT) 417 #define REQ_CGROUP_PUNT (1ULL << __REQ_CGROUP_PUNT) 418 419 #define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) 420 #define REQ_HIPRI (1ULL << __REQ_HIPRI) 421 422 #define REQ_DRV (1ULL << __REQ_DRV) 423 #define REQ_SWAP (1ULL << __REQ_SWAP) 424 425 #define REQ_FAILFAST_MASK \ 426 (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) 427 428 #define REQ_NOMERGE_FLAGS \ 429 (REQ_NOMERGE | REQ_PREFLUSH | REQ_FUA) 430 431 enum stat_group { 432 STAT_READ, 433 STAT_WRITE, 434 STAT_DISCARD, 435 STAT_FLUSH, 436 437 NR_STAT_GROUPS 438 }; 439 440 #define bio_op(bio) \ 441 ((bio)->bi_opf & REQ_OP_MASK) 442 #define req_op(req) \ 443 ((req)->cmd_flags & REQ_OP_MASK) 444 445 /* obsolete, don't use in new code */ 446 static inline void bio_set_op_attrs(struct bio *bio, unsigned op, 447 unsigned op_flags) 448 { 449 bio->bi_opf = op | op_flags; 450 } 451 452 static inline bool op_is_write(unsigned int op) 453 { 454 return (op & 1); 455 } 456 457 /* 458 * Check if the bio or request is one that needs special treatment in the 459 * flush state machine. 460 */ 461 static inline bool op_is_flush(unsigned int op) 462 { 463 return op & (REQ_FUA | REQ_PREFLUSH); 464 } 465 466 /* 467 * Reads are always treated as synchronous, as are requests with the FUA or 468 * PREFLUSH flag. Other operations may be marked as synchronous using the 469 * REQ_SYNC flag. 470 */ 471 static inline bool op_is_sync(unsigned int op) 472 { 473 return (op & REQ_OP_MASK) == REQ_OP_READ || 474 (op & (REQ_SYNC | REQ_FUA | REQ_PREFLUSH)); 475 } 476 477 static inline bool op_is_discard(unsigned int op) 478 { 479 return (op & REQ_OP_MASK) == REQ_OP_DISCARD; 480 } 481 482 /* 483 * Check if a bio or request operation is a zone management operation, with 484 * the exception of REQ_OP_ZONE_RESET_ALL which is treated as a special case 485 * due to its different handling in the block layer and device response in 486 * case of command failure. 487 */ 488 static inline bool op_is_zone_mgmt(enum req_opf op) 489 { 490 switch (op & REQ_OP_MASK) { 491 case REQ_OP_ZONE_RESET: 492 case REQ_OP_ZONE_OPEN: 493 case REQ_OP_ZONE_CLOSE: 494 case REQ_OP_ZONE_FINISH: 495 return true; 496 default: 497 return false; 498 } 499 } 500 501 static inline int op_stat_group(unsigned int op) 502 { 503 if (op_is_discard(op)) 504 return STAT_DISCARD; 505 return op_is_write(op); 506 } 507 508 typedef unsigned int blk_qc_t; 509 #define BLK_QC_T_NONE -1U 510 #define BLK_QC_T_SHIFT 16 511 #define BLK_QC_T_INTERNAL (1U << 31) 512 513 static inline bool blk_qc_t_valid(blk_qc_t cookie) 514 { 515 return cookie != BLK_QC_T_NONE; 516 } 517 518 static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie) 519 { 520 return (cookie & ~BLK_QC_T_INTERNAL) >> BLK_QC_T_SHIFT; 521 } 522 523 static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie) 524 { 525 return cookie & ((1u << BLK_QC_T_SHIFT) - 1); 526 } 527 528 static inline bool blk_qc_t_is_internal(blk_qc_t cookie) 529 { 530 return (cookie & BLK_QC_T_INTERNAL) != 0; 531 } 532 533 struct blk_rq_stat { 534 u64 mean; 535 u64 min; 536 u64 max; 537 u32 nr_samples; 538 u64 batch; 539 }; 540 541 #endif /* __LINUX_BLK_TYPES_H */ 542