1 /* 2 * MQ Deadline i/o scheduler - adaptation of the legacy deadline scheduler, 3 * for the blk-mq scheduling framework 4 * 5 * Copyright (C) 2016 Jens Axboe <axboe@kernel.dk> 6 */ 7 #include <linux/kernel.h> 8 #include <linux/fs.h> 9 #include <linux/blkdev.h> 10 #include <linux/blk-mq.h> 11 #include <linux/elevator.h> 12 #include <linux/bio.h> 13 #include <linux/module.h> 14 #include <linux/slab.h> 15 #include <linux/init.h> 16 #include <linux/compiler.h> 17 #include <linux/rbtree.h> 18 #include <linux/sbitmap.h> 19 20 #include "blk.h" 21 #include "blk-mq.h" 22 #include "blk-mq-tag.h" 23 #include "blk-mq-sched.h" 24 25 /* 26 * See Documentation/block/deadline-iosched.txt 27 */ 28 static const int read_expire = HZ / 2; /* max time before a read is submitted. */ 29 static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */ 30 static const int writes_starved = 2; /* max times reads can starve a write */ 31 static const int fifo_batch = 16; /* # of sequential requests treated as one 32 by the above parameters. For throughput. */ 33 34 struct deadline_data { 35 /* 36 * run time data 37 */ 38 39 /* 40 * requests (deadline_rq s) are present on both sort_list and fifo_list 41 */ 42 struct rb_root sort_list[2]; 43 struct list_head fifo_list[2]; 44 45 /* 46 * next in sort order. read, write or both are NULL 47 */ 48 struct request *next_rq[2]; 49 unsigned int batching; /* number of sequential requests made */ 50 unsigned int starved; /* times reads have starved writes */ 51 52 /* 53 * settings that change how the i/o scheduler behaves 54 */ 55 int fifo_expire[2]; 56 int fifo_batch; 57 int writes_starved; 58 int front_merges; 59 60 spinlock_t lock; 61 struct list_head dispatch; 62 }; 63 64 static inline struct rb_root * 65 deadline_rb_root(struct deadline_data *dd, struct request *rq) 66 { 67 return &dd->sort_list[rq_data_dir(rq)]; 68 } 69 70 /* 71 * get the request after `rq' in sector-sorted order 72 */ 73 static inline struct request * 74 deadline_latter_request(struct request *rq) 75 { 76 struct rb_node *node = rb_next(&rq->rb_node); 77 78 if (node) 79 return rb_entry_rq(node); 80 81 return NULL; 82 } 83 84 static void 85 deadline_add_rq_rb(struct deadline_data *dd, struct request *rq) 86 { 87 struct rb_root *root = deadline_rb_root(dd, rq); 88 89 elv_rb_add(root, rq); 90 } 91 92 static inline void 93 deadline_del_rq_rb(struct deadline_data *dd, struct request *rq) 94 { 95 const int data_dir = rq_data_dir(rq); 96 97 if (dd->next_rq[data_dir] == rq) 98 dd->next_rq[data_dir] = deadline_latter_request(rq); 99 100 elv_rb_del(deadline_rb_root(dd, rq), rq); 101 } 102 103 /* 104 * remove rq from rbtree and fifo. 105 */ 106 static void deadline_remove_request(struct request_queue *q, struct request *rq) 107 { 108 struct deadline_data *dd = q->elevator->elevator_data; 109 110 list_del_init(&rq->queuelist); 111 112 /* 113 * We might not be on the rbtree, if we are doing an insert merge 114 */ 115 if (!RB_EMPTY_NODE(&rq->rb_node)) 116 deadline_del_rq_rb(dd, rq); 117 118 elv_rqhash_del(q, rq); 119 if (q->last_merge == rq) 120 q->last_merge = NULL; 121 } 122 123 static void dd_request_merged(struct request_queue *q, struct request *req, 124 enum elv_merge type) 125 { 126 struct deadline_data *dd = q->elevator->elevator_data; 127 128 /* 129 * if the merge was a front merge, we need to reposition request 130 */ 131 if (type == ELEVATOR_FRONT_MERGE) { 132 elv_rb_del(deadline_rb_root(dd, req), req); 133 deadline_add_rq_rb(dd, req); 134 } 135 } 136 137 static void dd_merged_requests(struct request_queue *q, struct request *req, 138 struct request *next) 139 { 140 /* 141 * if next expires before rq, assign its expire time to rq 142 * and move into next position (next will be deleted) in fifo 143 */ 144 if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) { 145 if (time_before((unsigned long)next->fifo_time, 146 (unsigned long)req->fifo_time)) { 147 list_move(&req->queuelist, &next->queuelist); 148 req->fifo_time = next->fifo_time; 149 } 150 } 151 152 /* 153 * kill knowledge of next, this one is a goner 154 */ 155 deadline_remove_request(q, next); 156 } 157 158 /* 159 * move an entry to dispatch queue 160 */ 161 static void 162 deadline_move_request(struct deadline_data *dd, struct request *rq) 163 { 164 const int data_dir = rq_data_dir(rq); 165 166 dd->next_rq[READ] = NULL; 167 dd->next_rq[WRITE] = NULL; 168 dd->next_rq[data_dir] = deadline_latter_request(rq); 169 170 /* 171 * take it off the sort and fifo list 172 */ 173 deadline_remove_request(rq->q, rq); 174 } 175 176 /* 177 * deadline_check_fifo returns 0 if there are no expired requests on the fifo, 178 * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir]) 179 */ 180 static inline int deadline_check_fifo(struct deadline_data *dd, int ddir) 181 { 182 struct request *rq = rq_entry_fifo(dd->fifo_list[ddir].next); 183 184 /* 185 * rq is expired! 186 */ 187 if (time_after_eq(jiffies, (unsigned long)rq->fifo_time)) 188 return 1; 189 190 return 0; 191 } 192 193 /* 194 * deadline_dispatch_requests selects the best request according to 195 * read/write expire, fifo_batch, etc 196 */ 197 static struct request *__dd_dispatch_request(struct blk_mq_hw_ctx *hctx) 198 { 199 struct deadline_data *dd = hctx->queue->elevator->elevator_data; 200 struct request *rq; 201 bool reads, writes; 202 int data_dir; 203 204 if (!list_empty(&dd->dispatch)) { 205 rq = list_first_entry(&dd->dispatch, struct request, queuelist); 206 list_del_init(&rq->queuelist); 207 goto done; 208 } 209 210 reads = !list_empty(&dd->fifo_list[READ]); 211 writes = !list_empty(&dd->fifo_list[WRITE]); 212 213 /* 214 * batches are currently reads XOR writes 215 */ 216 if (dd->next_rq[WRITE]) 217 rq = dd->next_rq[WRITE]; 218 else 219 rq = dd->next_rq[READ]; 220 221 if (rq && dd->batching < dd->fifo_batch) 222 /* we have a next request are still entitled to batch */ 223 goto dispatch_request; 224 225 /* 226 * at this point we are not running a batch. select the appropriate 227 * data direction (read / write) 228 */ 229 230 if (reads) { 231 BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[READ])); 232 233 if (writes && (dd->starved++ >= dd->writes_starved)) 234 goto dispatch_writes; 235 236 data_dir = READ; 237 238 goto dispatch_find_request; 239 } 240 241 /* 242 * there are either no reads or writes have been starved 243 */ 244 245 if (writes) { 246 dispatch_writes: 247 BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[WRITE])); 248 249 dd->starved = 0; 250 251 data_dir = WRITE; 252 253 goto dispatch_find_request; 254 } 255 256 return NULL; 257 258 dispatch_find_request: 259 /* 260 * we are not running a batch, find best request for selected data_dir 261 */ 262 if (deadline_check_fifo(dd, data_dir) || !dd->next_rq[data_dir]) { 263 /* 264 * A deadline has expired, the last request was in the other 265 * direction, or we have run out of higher-sectored requests. 266 * Start again from the request with the earliest expiry time. 267 */ 268 rq = rq_entry_fifo(dd->fifo_list[data_dir].next); 269 } else { 270 /* 271 * The last req was the same dir and we have a next request in 272 * sort order. No expired requests so continue on from here. 273 */ 274 rq = dd->next_rq[data_dir]; 275 } 276 277 dd->batching = 0; 278 279 dispatch_request: 280 /* 281 * rq is the selected appropriate request. 282 */ 283 dd->batching++; 284 deadline_move_request(dd, rq); 285 done: 286 rq->rq_flags |= RQF_STARTED; 287 return rq; 288 } 289 290 static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx) 291 { 292 struct deadline_data *dd = hctx->queue->elevator->elevator_data; 293 struct request *rq; 294 295 spin_lock(&dd->lock); 296 rq = __dd_dispatch_request(hctx); 297 spin_unlock(&dd->lock); 298 299 return rq; 300 } 301 302 static void dd_exit_queue(struct elevator_queue *e) 303 { 304 struct deadline_data *dd = e->elevator_data; 305 306 BUG_ON(!list_empty(&dd->fifo_list[READ])); 307 BUG_ON(!list_empty(&dd->fifo_list[WRITE])); 308 309 kfree(dd); 310 } 311 312 /* 313 * initialize elevator private data (deadline_data). 314 */ 315 static int dd_init_queue(struct request_queue *q, struct elevator_type *e) 316 { 317 struct deadline_data *dd; 318 struct elevator_queue *eq; 319 320 eq = elevator_alloc(q, e); 321 if (!eq) 322 return -ENOMEM; 323 324 dd = kzalloc_node(sizeof(*dd), GFP_KERNEL, q->node); 325 if (!dd) { 326 kobject_put(&eq->kobj); 327 return -ENOMEM; 328 } 329 eq->elevator_data = dd; 330 331 INIT_LIST_HEAD(&dd->fifo_list[READ]); 332 INIT_LIST_HEAD(&dd->fifo_list[WRITE]); 333 dd->sort_list[READ] = RB_ROOT; 334 dd->sort_list[WRITE] = RB_ROOT; 335 dd->fifo_expire[READ] = read_expire; 336 dd->fifo_expire[WRITE] = write_expire; 337 dd->writes_starved = writes_starved; 338 dd->front_merges = 1; 339 dd->fifo_batch = fifo_batch; 340 spin_lock_init(&dd->lock); 341 INIT_LIST_HEAD(&dd->dispatch); 342 343 q->elevator = eq; 344 return 0; 345 } 346 347 static int dd_request_merge(struct request_queue *q, struct request **rq, 348 struct bio *bio) 349 { 350 struct deadline_data *dd = q->elevator->elevator_data; 351 sector_t sector = bio_end_sector(bio); 352 struct request *__rq; 353 354 if (!dd->front_merges) 355 return ELEVATOR_NO_MERGE; 356 357 __rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector); 358 if (__rq) { 359 BUG_ON(sector != blk_rq_pos(__rq)); 360 361 if (elv_bio_merge_ok(__rq, bio)) { 362 *rq = __rq; 363 return ELEVATOR_FRONT_MERGE; 364 } 365 } 366 367 return ELEVATOR_NO_MERGE; 368 } 369 370 static bool dd_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio) 371 { 372 struct request_queue *q = hctx->queue; 373 struct deadline_data *dd = q->elevator->elevator_data; 374 struct request *free = NULL; 375 bool ret; 376 377 spin_lock(&dd->lock); 378 ret = blk_mq_sched_try_merge(q, bio, &free); 379 spin_unlock(&dd->lock); 380 381 if (free) 382 blk_mq_free_request(free); 383 384 return ret; 385 } 386 387 /* 388 * add rq to rbtree and fifo 389 */ 390 static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, 391 bool at_head) 392 { 393 struct request_queue *q = hctx->queue; 394 struct deadline_data *dd = q->elevator->elevator_data; 395 const int data_dir = rq_data_dir(rq); 396 397 if (blk_mq_sched_try_insert_merge(q, rq)) 398 return; 399 400 blk_mq_sched_request_inserted(rq); 401 402 if (at_head || blk_rq_is_passthrough(rq)) { 403 if (at_head) 404 list_add(&rq->queuelist, &dd->dispatch); 405 else 406 list_add_tail(&rq->queuelist, &dd->dispatch); 407 } else { 408 deadline_add_rq_rb(dd, rq); 409 410 if (rq_mergeable(rq)) { 411 elv_rqhash_add(q, rq); 412 if (!q->last_merge) 413 q->last_merge = rq; 414 } 415 416 /* 417 * set expire time and add to fifo list 418 */ 419 rq->fifo_time = jiffies + dd->fifo_expire[data_dir]; 420 list_add_tail(&rq->queuelist, &dd->fifo_list[data_dir]); 421 } 422 } 423 424 static void dd_insert_requests(struct blk_mq_hw_ctx *hctx, 425 struct list_head *list, bool at_head) 426 { 427 struct request_queue *q = hctx->queue; 428 struct deadline_data *dd = q->elevator->elevator_data; 429 430 spin_lock(&dd->lock); 431 while (!list_empty(list)) { 432 struct request *rq; 433 434 rq = list_first_entry(list, struct request, queuelist); 435 list_del_init(&rq->queuelist); 436 dd_insert_request(hctx, rq, at_head); 437 } 438 spin_unlock(&dd->lock); 439 } 440 441 static bool dd_has_work(struct blk_mq_hw_ctx *hctx) 442 { 443 struct deadline_data *dd = hctx->queue->elevator->elevator_data; 444 445 return !list_empty_careful(&dd->dispatch) || 446 !list_empty_careful(&dd->fifo_list[0]) || 447 !list_empty_careful(&dd->fifo_list[1]); 448 } 449 450 /* 451 * sysfs parts below 452 */ 453 static ssize_t 454 deadline_var_show(int var, char *page) 455 { 456 return sprintf(page, "%d\n", var); 457 } 458 459 static ssize_t 460 deadline_var_store(int *var, const char *page, size_t count) 461 { 462 char *p = (char *) page; 463 464 *var = simple_strtol(p, &p, 10); 465 return count; 466 } 467 468 #define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ 469 static ssize_t __FUNC(struct elevator_queue *e, char *page) \ 470 { \ 471 struct deadline_data *dd = e->elevator_data; \ 472 int __data = __VAR; \ 473 if (__CONV) \ 474 __data = jiffies_to_msecs(__data); \ 475 return deadline_var_show(__data, (page)); \ 476 } 477 SHOW_FUNCTION(deadline_read_expire_show, dd->fifo_expire[READ], 1); 478 SHOW_FUNCTION(deadline_write_expire_show, dd->fifo_expire[WRITE], 1); 479 SHOW_FUNCTION(deadline_writes_starved_show, dd->writes_starved, 0); 480 SHOW_FUNCTION(deadline_front_merges_show, dd->front_merges, 0); 481 SHOW_FUNCTION(deadline_fifo_batch_show, dd->fifo_batch, 0); 482 #undef SHOW_FUNCTION 483 484 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ 485 static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \ 486 { \ 487 struct deadline_data *dd = e->elevator_data; \ 488 int __data; \ 489 int ret = deadline_var_store(&__data, (page), count); \ 490 if (__data < (MIN)) \ 491 __data = (MIN); \ 492 else if (__data > (MAX)) \ 493 __data = (MAX); \ 494 if (__CONV) \ 495 *(__PTR) = msecs_to_jiffies(__data); \ 496 else \ 497 *(__PTR) = __data; \ 498 return ret; \ 499 } 500 STORE_FUNCTION(deadline_read_expire_store, &dd->fifo_expire[READ], 0, INT_MAX, 1); 501 STORE_FUNCTION(deadline_write_expire_store, &dd->fifo_expire[WRITE], 0, INT_MAX, 1); 502 STORE_FUNCTION(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX, 0); 503 STORE_FUNCTION(deadline_front_merges_store, &dd->front_merges, 0, 1, 0); 504 STORE_FUNCTION(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX, 0); 505 #undef STORE_FUNCTION 506 507 #define DD_ATTR(name) \ 508 __ATTR(name, S_IRUGO|S_IWUSR, deadline_##name##_show, \ 509 deadline_##name##_store) 510 511 static struct elv_fs_entry deadline_attrs[] = { 512 DD_ATTR(read_expire), 513 DD_ATTR(write_expire), 514 DD_ATTR(writes_starved), 515 DD_ATTR(front_merges), 516 DD_ATTR(fifo_batch), 517 __ATTR_NULL 518 }; 519 520 static struct elevator_type mq_deadline = { 521 .ops.mq = { 522 .insert_requests = dd_insert_requests, 523 .dispatch_request = dd_dispatch_request, 524 .next_request = elv_rb_latter_request, 525 .former_request = elv_rb_former_request, 526 .bio_merge = dd_bio_merge, 527 .request_merge = dd_request_merge, 528 .requests_merged = dd_merged_requests, 529 .request_merged = dd_request_merged, 530 .has_work = dd_has_work, 531 .init_sched = dd_init_queue, 532 .exit_sched = dd_exit_queue, 533 }, 534 535 .uses_mq = true, 536 .elevator_attrs = deadline_attrs, 537 .elevator_name = "mq-deadline", 538 .elevator_owner = THIS_MODULE, 539 }; 540 541 static int __init deadline_init(void) 542 { 543 return elv_register(&mq_deadline); 544 } 545 546 static void __exit deadline_exit(void) 547 { 548 elv_unregister(&mq_deadline); 549 } 550 551 module_init(deadline_init); 552 module_exit(deadline_exit); 553 554 MODULE_AUTHOR("Jens Axboe"); 555 MODULE_LICENSE("GPL"); 556 MODULE_DESCRIPTION("MQ deadline IO scheduler"); 557