1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef BLK_INTERNAL_H 3 #define BLK_INTERNAL_H 4 5 #include <linux/idr.h> 6 #include <linux/blk-mq.h> 7 #include <linux/part_stat.h> 8 #include <xen/xen.h> 9 #include "blk-mq.h" 10 #include "blk-mq-sched.h" 11 12 /* Max future timer expiry for timeouts */ 13 #define BLK_MAX_TIMEOUT (5 * HZ) 14 15 #ifdef CONFIG_DEBUG_FS 16 extern struct dentry *blk_debugfs_root; 17 #endif 18 19 struct blk_flush_queue { 20 unsigned int flush_queue_delayed:1; 21 unsigned int flush_pending_idx:1; 22 unsigned int flush_running_idx:1; 23 blk_status_t rq_status; 24 unsigned long flush_pending_since; 25 struct list_head flush_queue[2]; 26 struct list_head flush_data_in_flight; 27 struct request *flush_rq; 28 29 /* 30 * flush_rq shares tag with this rq, both can't be active 31 * at the same time 32 */ 33 struct request *orig_rq; 34 struct lock_class_key key; 35 spinlock_t mq_flush_lock; 36 }; 37 38 extern struct kmem_cache *blk_requestq_cachep; 39 extern struct kobj_type blk_queue_ktype; 40 extern struct ida blk_queue_ida; 41 42 static inline struct blk_flush_queue * 43 blk_get_flush_queue(struct request_queue *q, struct blk_mq_ctx *ctx) 44 { 45 return blk_mq_map_queue(q, REQ_OP_FLUSH, ctx)->fq; 46 } 47 48 static inline void __blk_get_queue(struct request_queue *q) 49 { 50 kobject_get(&q->kobj); 51 } 52 53 static inline bool 54 is_flush_rq(struct request *req, struct blk_mq_hw_ctx *hctx) 55 { 56 return hctx->fq->flush_rq == req; 57 } 58 59 struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size, 60 gfp_t flags); 61 void blk_free_flush_queue(struct blk_flush_queue *q); 62 63 void blk_freeze_queue(struct request_queue *q); 64 65 static inline void blk_queue_enter_live(struct request_queue *q) 66 { 67 /* 68 * Given that running in generic_make_request() context 69 * guarantees that a live reference against q_usage_counter has 70 * been established, further references under that same context 71 * need not check that the queue has been frozen (marked dead). 72 */ 73 percpu_ref_get(&q->q_usage_counter); 74 } 75 76 static inline bool biovec_phys_mergeable(struct request_queue *q, 77 struct bio_vec *vec1, struct bio_vec *vec2) 78 { 79 unsigned long mask = queue_segment_boundary(q); 80 phys_addr_t addr1 = page_to_phys(vec1->bv_page) + vec1->bv_offset; 81 phys_addr_t addr2 = page_to_phys(vec2->bv_page) + vec2->bv_offset; 82 83 if (addr1 + vec1->bv_len != addr2) 84 return false; 85 if (xen_domain() && !xen_biovec_phys_mergeable(vec1, vec2->bv_page)) 86 return false; 87 if ((addr1 | mask) != ((addr2 + vec2->bv_len - 1) | mask)) 88 return false; 89 return true; 90 } 91 92 static inline bool __bvec_gap_to_prev(struct request_queue *q, 93 struct bio_vec *bprv, unsigned int offset) 94 { 95 return (offset & queue_virt_boundary(q)) || 96 ((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q)); 97 } 98 99 /* 100 * Check if adding a bio_vec after bprv with offset would create a gap in 101 * the SG list. Most drivers don't care about this, but some do. 102 */ 103 static inline bool bvec_gap_to_prev(struct request_queue *q, 104 struct bio_vec *bprv, unsigned int offset) 105 { 106 if (!queue_virt_boundary(q)) 107 return false; 108 return __bvec_gap_to_prev(q, bprv, offset); 109 } 110 111 static inline void blk_rq_bio_prep(struct request *rq, struct bio *bio, 112 unsigned int nr_segs) 113 { 114 rq->nr_phys_segments = nr_segs; 115 rq->__data_len = bio->bi_iter.bi_size; 116 rq->bio = rq->biotail = bio; 117 rq->ioprio = bio_prio(bio); 118 119 if (bio->bi_disk) 120 rq->rq_disk = bio->bi_disk; 121 } 122 123 #ifdef CONFIG_BLK_DEV_INTEGRITY 124 void blk_flush_integrity(void); 125 bool __bio_integrity_endio(struct bio *); 126 void bio_integrity_free(struct bio *bio); 127 static inline bool bio_integrity_endio(struct bio *bio) 128 { 129 if (bio_integrity(bio)) 130 return __bio_integrity_endio(bio); 131 return true; 132 } 133 134 static inline bool integrity_req_gap_back_merge(struct request *req, 135 struct bio *next) 136 { 137 struct bio_integrity_payload *bip = bio_integrity(req->bio); 138 struct bio_integrity_payload *bip_next = bio_integrity(next); 139 140 return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1], 141 bip_next->bip_vec[0].bv_offset); 142 } 143 144 static inline bool integrity_req_gap_front_merge(struct request *req, 145 struct bio *bio) 146 { 147 struct bio_integrity_payload *bip = bio_integrity(bio); 148 struct bio_integrity_payload *bip_next = bio_integrity(req->bio); 149 150 return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1], 151 bip_next->bip_vec[0].bv_offset); 152 } 153 154 void blk_integrity_add(struct gendisk *); 155 void blk_integrity_del(struct gendisk *); 156 #else /* CONFIG_BLK_DEV_INTEGRITY */ 157 static inline bool integrity_req_gap_back_merge(struct request *req, 158 struct bio *next) 159 { 160 return false; 161 } 162 static inline bool integrity_req_gap_front_merge(struct request *req, 163 struct bio *bio) 164 { 165 return false; 166 } 167 168 static inline void blk_flush_integrity(void) 169 { 170 } 171 static inline bool bio_integrity_endio(struct bio *bio) 172 { 173 return true; 174 } 175 static inline void bio_integrity_free(struct bio *bio) 176 { 177 } 178 static inline void blk_integrity_add(struct gendisk *disk) 179 { 180 } 181 static inline void blk_integrity_del(struct gendisk *disk) 182 { 183 } 184 #endif /* CONFIG_BLK_DEV_INTEGRITY */ 185 186 unsigned long blk_rq_timeout(unsigned long timeout); 187 void blk_add_timer(struct request *req); 188 189 bool bio_attempt_front_merge(struct request *req, struct bio *bio, 190 unsigned int nr_segs); 191 bool bio_attempt_back_merge(struct request *req, struct bio *bio, 192 unsigned int nr_segs); 193 bool bio_attempt_discard_merge(struct request_queue *q, struct request *req, 194 struct bio *bio); 195 bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, 196 unsigned int nr_segs, struct request **same_queue_rq); 197 198 void blk_account_io_start(struct request *req, bool new_io); 199 void blk_account_io_completion(struct request *req, unsigned int bytes); 200 void blk_account_io_done(struct request *req, u64 now); 201 202 /* 203 * Internal elevator interface 204 */ 205 #define ELV_ON_HASH(rq) ((rq)->rq_flags & RQF_HASHED) 206 207 void blk_insert_flush(struct request *rq); 208 209 void elevator_init_mq(struct request_queue *q); 210 int elevator_switch_mq(struct request_queue *q, 211 struct elevator_type *new_e); 212 void __elevator_exit(struct request_queue *, struct elevator_queue *); 213 int elv_register_queue(struct request_queue *q, bool uevent); 214 void elv_unregister_queue(struct request_queue *q); 215 216 static inline void elevator_exit(struct request_queue *q, 217 struct elevator_queue *e) 218 { 219 lockdep_assert_held(&q->sysfs_lock); 220 221 blk_mq_sched_free_requests(q); 222 __elevator_exit(q, e); 223 } 224 225 struct hd_struct *__disk_get_part(struct gendisk *disk, int partno); 226 227 ssize_t part_size_show(struct device *dev, struct device_attribute *attr, 228 char *buf); 229 ssize_t part_stat_show(struct device *dev, struct device_attribute *attr, 230 char *buf); 231 ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr, 232 char *buf); 233 ssize_t part_fail_show(struct device *dev, struct device_attribute *attr, 234 char *buf); 235 ssize_t part_fail_store(struct device *dev, struct device_attribute *attr, 236 const char *buf, size_t count); 237 238 #ifdef CONFIG_FAIL_IO_TIMEOUT 239 int blk_should_fake_timeout(struct request_queue *); 240 ssize_t part_timeout_show(struct device *, struct device_attribute *, char *); 241 ssize_t part_timeout_store(struct device *, struct device_attribute *, 242 const char *, size_t); 243 #else 244 static inline int blk_should_fake_timeout(struct request_queue *q) 245 { 246 return 0; 247 } 248 #endif 249 250 void __blk_queue_split(struct request_queue *q, struct bio **bio, 251 unsigned int *nr_segs); 252 int ll_back_merge_fn(struct request *req, struct bio *bio, 253 unsigned int nr_segs); 254 int ll_front_merge_fn(struct request *req, struct bio *bio, 255 unsigned int nr_segs); 256 struct request *attempt_back_merge(struct request_queue *q, struct request *rq); 257 struct request *attempt_front_merge(struct request_queue *q, struct request *rq); 258 int blk_attempt_req_merge(struct request_queue *q, struct request *rq, 259 struct request *next); 260 unsigned int blk_recalc_rq_segments(struct request *rq); 261 void blk_rq_set_mixed_merge(struct request *rq); 262 bool blk_rq_merge_ok(struct request *rq, struct bio *bio); 263 enum elv_merge blk_try_merge(struct request *rq, struct bio *bio); 264 265 int blk_dev_init(void); 266 267 /* 268 * Contribute to IO statistics IFF: 269 * 270 * a) it's attached to a gendisk, and 271 * b) the queue had IO stats enabled when this request was started 272 */ 273 static inline bool blk_do_io_stat(struct request *rq) 274 { 275 return rq->rq_disk && (rq->rq_flags & RQF_IO_STAT); 276 } 277 278 static inline void req_set_nomerge(struct request_queue *q, struct request *req) 279 { 280 req->cmd_flags |= REQ_NOMERGE; 281 if (req == q->last_merge) 282 q->last_merge = NULL; 283 } 284 285 /* 286 * The max size one bio can handle is UINT_MAX becasue bvec_iter.bi_size 287 * is defined as 'unsigned int', meantime it has to aligned to with logical 288 * block size which is the minimum accepted unit by hardware. 289 */ 290 static inline unsigned int bio_allowed_max_sectors(struct request_queue *q) 291 { 292 return round_down(UINT_MAX, queue_logical_block_size(q)) >> 9; 293 } 294 295 /* 296 * Internal io_context interface 297 */ 298 void get_io_context(struct io_context *ioc); 299 struct io_cq *ioc_lookup_icq(struct io_context *ioc, struct request_queue *q); 300 struct io_cq *ioc_create_icq(struct io_context *ioc, struct request_queue *q, 301 gfp_t gfp_mask); 302 void ioc_clear_queue(struct request_queue *q); 303 304 int create_task_io_context(struct task_struct *task, gfp_t gfp_mask, int node); 305 306 /** 307 * create_io_context - try to create task->io_context 308 * @gfp_mask: allocation mask 309 * @node: allocation node 310 * 311 * If %current->io_context is %NULL, allocate a new io_context and install 312 * it. Returns the current %current->io_context which may be %NULL if 313 * allocation failed. 314 * 315 * Note that this function can't be called with IRQ disabled because 316 * task_lock which protects %current->io_context is IRQ-unsafe. 317 */ 318 static inline struct io_context *create_io_context(gfp_t gfp_mask, int node) 319 { 320 WARN_ON_ONCE(irqs_disabled()); 321 if (unlikely(!current->io_context)) 322 create_task_io_context(current, gfp_mask, node); 323 return current->io_context; 324 } 325 326 /* 327 * Internal throttling interface 328 */ 329 #ifdef CONFIG_BLK_DEV_THROTTLING 330 extern void blk_throtl_drain(struct request_queue *q); 331 extern int blk_throtl_init(struct request_queue *q); 332 extern void blk_throtl_exit(struct request_queue *q); 333 extern void blk_throtl_register_queue(struct request_queue *q); 334 #else /* CONFIG_BLK_DEV_THROTTLING */ 335 static inline void blk_throtl_drain(struct request_queue *q) { } 336 static inline int blk_throtl_init(struct request_queue *q) { return 0; } 337 static inline void blk_throtl_exit(struct request_queue *q) { } 338 static inline void blk_throtl_register_queue(struct request_queue *q) { } 339 #endif /* CONFIG_BLK_DEV_THROTTLING */ 340 #ifdef CONFIG_BLK_DEV_THROTTLING_LOW 341 extern ssize_t blk_throtl_sample_time_show(struct request_queue *q, char *page); 342 extern ssize_t blk_throtl_sample_time_store(struct request_queue *q, 343 const char *page, size_t count); 344 extern void blk_throtl_bio_endio(struct bio *bio); 345 extern void blk_throtl_stat_add(struct request *rq, u64 time); 346 #else 347 static inline void blk_throtl_bio_endio(struct bio *bio) { } 348 static inline void blk_throtl_stat_add(struct request *rq, u64 time) { } 349 #endif 350 351 #ifdef CONFIG_BOUNCE 352 extern int init_emergency_isa_pool(void); 353 extern void blk_queue_bounce(struct request_queue *q, struct bio **bio); 354 #else 355 static inline int init_emergency_isa_pool(void) 356 { 357 return 0; 358 } 359 static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio) 360 { 361 } 362 #endif /* CONFIG_BOUNCE */ 363 364 #ifdef CONFIG_BLK_CGROUP_IOLATENCY 365 extern int blk_iolatency_init(struct request_queue *q); 366 #else 367 static inline int blk_iolatency_init(struct request_queue *q) { return 0; } 368 #endif 369 370 struct bio *blk_next_bio(struct bio *bio, unsigned int nr_pages, gfp_t gfp); 371 372 #ifdef CONFIG_BLK_DEV_ZONED 373 void blk_queue_free_zone_bitmaps(struct request_queue *q); 374 #else 375 static inline void blk_queue_free_zone_bitmaps(struct request_queue *q) {} 376 #endif 377 378 void part_dec_in_flight(struct request_queue *q, struct hd_struct *part, 379 int rw); 380 void part_inc_in_flight(struct request_queue *q, struct hd_struct *part, 381 int rw); 382 void update_io_ticks(struct hd_struct *part, unsigned long now, bool end); 383 struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector); 384 385 int blk_alloc_devt(struct hd_struct *part, dev_t *devt); 386 void blk_free_devt(dev_t devt); 387 void blk_invalidate_devt(dev_t devt); 388 char *disk_name(struct gendisk *hd, int partno, char *buf); 389 #define ADDPART_FLAG_NONE 0 390 #define ADDPART_FLAG_RAID 1 391 #define ADDPART_FLAG_WHOLEDISK 2 392 struct hd_struct *__must_check add_partition(struct gendisk *disk, int partno, 393 sector_t start, sector_t len, int flags, 394 struct partition_meta_info *info); 395 void __delete_partition(struct percpu_ref *ref); 396 void delete_partition(struct gendisk *disk, int partno); 397 int disk_expand_part_tbl(struct gendisk *disk, int target); 398 399 static inline int hd_ref_init(struct hd_struct *part) 400 { 401 if (percpu_ref_init(&part->ref, __delete_partition, 0, 402 GFP_KERNEL)) 403 return -ENOMEM; 404 return 0; 405 } 406 407 static inline void hd_struct_get(struct hd_struct *part) 408 { 409 percpu_ref_get(&part->ref); 410 } 411 412 static inline int hd_struct_try_get(struct hd_struct *part) 413 { 414 return percpu_ref_tryget_live(&part->ref); 415 } 416 417 static inline void hd_struct_put(struct hd_struct *part) 418 { 419 percpu_ref_put(&part->ref); 420 } 421 422 static inline void hd_struct_kill(struct hd_struct *part) 423 { 424 percpu_ref_kill(&part->ref); 425 } 426 427 static inline void hd_free_part(struct hd_struct *part) 428 { 429 free_part_stats(part); 430 kfree(part->info); 431 percpu_ref_exit(&part->ref); 432 } 433 434 /* 435 * Any access of part->nr_sects which is not protected by partition 436 * bd_mutex or gendisk bdev bd_mutex, should be done using this 437 * accessor function. 438 * 439 * Code written along the lines of i_size_read() and i_size_write(). 440 * CONFIG_PREEMPTION case optimizes the case of UP kernel with preemption 441 * on. 442 */ 443 static inline sector_t part_nr_sects_read(struct hd_struct *part) 444 { 445 #if BITS_PER_LONG==32 && defined(CONFIG_SMP) 446 sector_t nr_sects; 447 unsigned seq; 448 do { 449 seq = read_seqcount_begin(&part->nr_sects_seq); 450 nr_sects = part->nr_sects; 451 } while (read_seqcount_retry(&part->nr_sects_seq, seq)); 452 return nr_sects; 453 #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) 454 sector_t nr_sects; 455 456 preempt_disable(); 457 nr_sects = part->nr_sects; 458 preempt_enable(); 459 return nr_sects; 460 #else 461 return part->nr_sects; 462 #endif 463 } 464 465 /* 466 * Should be called with mutex lock held (typically bd_mutex) of partition 467 * to provide mutual exlusion among writers otherwise seqcount might be 468 * left in wrong state leaving the readers spinning infinitely. 469 */ 470 static inline void part_nr_sects_write(struct hd_struct *part, sector_t size) 471 { 472 #if BITS_PER_LONG==32 && defined(CONFIG_SMP) 473 write_seqcount_begin(&part->nr_sects_seq); 474 part->nr_sects = size; 475 write_seqcount_end(&part->nr_sects_seq); 476 #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) 477 preempt_disable(); 478 part->nr_sects = size; 479 preempt_enable(); 480 #else 481 part->nr_sects = size; 482 #endif 483 } 484 485 struct request_queue *__blk_alloc_queue(int node_id); 486 487 int __bio_add_pc_page(struct request_queue *q, struct bio *bio, 488 struct page *page, unsigned int len, unsigned int offset, 489 bool *same_page); 490 491 #endif /* BLK_INTERNAL_H */ 492