1 /* 2 * Common Block IO controller cgroup interface 3 * 4 * Based on ideas and code from CFQ, CFS and BFQ: 5 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> 6 * 7 * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it> 8 * Paolo Valente <paolo.valente@unimore.it> 9 * 10 * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com> 11 * Nauman Rafique <nauman@google.com> 12 */ 13 #include <linux/ioprio.h> 14 #include <linux/seq_file.h> 15 #include <linux/kdev_t.h> 16 #include <linux/module.h> 17 #include <linux/err.h> 18 #include <linux/blkdev.h> 19 #include <linux/slab.h> 20 #include "blk-cgroup.h" 21 #include <linux/genhd.h> 22 23 #define MAX_KEY_LEN 100 24 25 static DEFINE_SPINLOCK(blkio_list_lock); 26 static LIST_HEAD(blkio_list); 27 28 struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT }; 29 EXPORT_SYMBOL_GPL(blkio_root_cgroup); 30 31 static struct cgroup_subsys_state *blkiocg_create(struct cgroup_subsys *, 32 struct cgroup *); 33 static int blkiocg_can_attach(struct cgroup_subsys *, struct cgroup *, 34 struct task_struct *, bool); 35 static void blkiocg_attach(struct cgroup_subsys *, struct cgroup *, 36 struct cgroup *, struct task_struct *, bool); 37 static void blkiocg_destroy(struct cgroup_subsys *, struct cgroup *); 38 static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *); 39 40 /* for encoding cft->private value on file */ 41 #define BLKIOFILE_PRIVATE(x, val) (((x) << 16) | (val)) 42 /* What policy owns the file, proportional or throttle */ 43 #define BLKIOFILE_POLICY(val) (((val) >> 16) & 0xffff) 44 #define BLKIOFILE_ATTR(val) ((val) & 0xffff) 45 46 struct cgroup_subsys blkio_subsys = { 47 .name = "blkio", 48 .create = blkiocg_create, 49 .can_attach = blkiocg_can_attach, 50 .attach = blkiocg_attach, 51 .destroy = blkiocg_destroy, 52 .populate = blkiocg_populate, 53 #ifdef CONFIG_BLK_CGROUP 54 /* note: blkio_subsys_id is otherwise defined in blk-cgroup.h */ 55 .subsys_id = blkio_subsys_id, 56 #endif 57 .use_id = 1, 58 .module = THIS_MODULE, 59 }; 60 EXPORT_SYMBOL_GPL(blkio_subsys); 61 62 static inline void blkio_policy_insert_node(struct blkio_cgroup *blkcg, 63 struct blkio_policy_node *pn) 64 { 65 list_add(&pn->node, &blkcg->policy_list); 66 } 67 68 static inline bool cftype_blkg_same_policy(struct cftype *cft, 69 struct blkio_group *blkg) 70 { 71 enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private); 72 73 if (blkg->plid == plid) 74 return 1; 75 76 return 0; 77 } 78 79 /* Determines if policy node matches cgroup file being accessed */ 80 static inline bool pn_matches_cftype(struct cftype *cft, 81 struct blkio_policy_node *pn) 82 { 83 enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private); 84 int fileid = BLKIOFILE_ATTR(cft->private); 85 86 return (plid == pn->plid && fileid == pn->fileid); 87 } 88 89 /* Must be called with blkcg->lock held */ 90 static inline void blkio_policy_delete_node(struct blkio_policy_node *pn) 91 { 92 list_del(&pn->node); 93 } 94 95 /* Must be called with blkcg->lock held */ 96 static struct blkio_policy_node * 97 blkio_policy_search_node(const struct blkio_cgroup *blkcg, dev_t dev, 98 enum blkio_policy_id plid, int fileid) 99 { 100 struct blkio_policy_node *pn; 101 102 list_for_each_entry(pn, &blkcg->policy_list, node) { 103 if (pn->dev == dev && pn->plid == plid && pn->fileid == fileid) 104 return pn; 105 } 106 107 return NULL; 108 } 109 110 struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup) 111 { 112 return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id), 113 struct blkio_cgroup, css); 114 } 115 EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup); 116 117 static inline void 118 blkio_update_group_weight(struct blkio_group *blkg, unsigned int weight) 119 { 120 struct blkio_policy_type *blkiop; 121 122 list_for_each_entry(blkiop, &blkio_list, list) { 123 /* If this policy does not own the blkg, do not send updates */ 124 if (blkiop->plid != blkg->plid) 125 continue; 126 if (blkiop->ops.blkio_update_group_weight_fn) 127 blkiop->ops.blkio_update_group_weight_fn(blkg->key, 128 blkg, weight); 129 } 130 } 131 132 static inline void blkio_update_group_bps(struct blkio_group *blkg, u64 bps, 133 int fileid) 134 { 135 struct blkio_policy_type *blkiop; 136 137 list_for_each_entry(blkiop, &blkio_list, list) { 138 139 /* If this policy does not own the blkg, do not send updates */ 140 if (blkiop->plid != blkg->plid) 141 continue; 142 143 if (fileid == BLKIO_THROTL_read_bps_device 144 && blkiop->ops.blkio_update_group_read_bps_fn) 145 blkiop->ops.blkio_update_group_read_bps_fn(blkg->key, 146 blkg, bps); 147 148 if (fileid == BLKIO_THROTL_write_bps_device 149 && blkiop->ops.blkio_update_group_write_bps_fn) 150 blkiop->ops.blkio_update_group_write_bps_fn(blkg->key, 151 blkg, bps); 152 } 153 } 154 155 static inline void blkio_update_group_iops(struct blkio_group *blkg, 156 unsigned int iops, int fileid) 157 { 158 struct blkio_policy_type *blkiop; 159 160 list_for_each_entry(blkiop, &blkio_list, list) { 161 162 /* If this policy does not own the blkg, do not send updates */ 163 if (blkiop->plid != blkg->plid) 164 continue; 165 166 if (fileid == BLKIO_THROTL_read_iops_device 167 && blkiop->ops.blkio_update_group_read_iops_fn) 168 blkiop->ops.blkio_update_group_read_iops_fn(blkg->key, 169 blkg, iops); 170 171 if (fileid == BLKIO_THROTL_write_iops_device 172 && blkiop->ops.blkio_update_group_write_iops_fn) 173 blkiop->ops.blkio_update_group_write_iops_fn(blkg->key, 174 blkg,iops); 175 } 176 } 177 178 /* 179 * Add to the appropriate stat variable depending on the request type. 180 * This should be called with the blkg->stats_lock held. 181 */ 182 static void blkio_add_stat(uint64_t *stat, uint64_t add, bool direction, 183 bool sync) 184 { 185 if (direction) 186 stat[BLKIO_STAT_WRITE] += add; 187 else 188 stat[BLKIO_STAT_READ] += add; 189 if (sync) 190 stat[BLKIO_STAT_SYNC] += add; 191 else 192 stat[BLKIO_STAT_ASYNC] += add; 193 } 194 195 /* 196 * Decrements the appropriate stat variable if non-zero depending on the 197 * request type. Panics on value being zero. 198 * This should be called with the blkg->stats_lock held. 199 */ 200 static void blkio_check_and_dec_stat(uint64_t *stat, bool direction, bool sync) 201 { 202 if (direction) { 203 BUG_ON(stat[BLKIO_STAT_WRITE] == 0); 204 stat[BLKIO_STAT_WRITE]--; 205 } else { 206 BUG_ON(stat[BLKIO_STAT_READ] == 0); 207 stat[BLKIO_STAT_READ]--; 208 } 209 if (sync) { 210 BUG_ON(stat[BLKIO_STAT_SYNC] == 0); 211 stat[BLKIO_STAT_SYNC]--; 212 } else { 213 BUG_ON(stat[BLKIO_STAT_ASYNC] == 0); 214 stat[BLKIO_STAT_ASYNC]--; 215 } 216 } 217 218 #ifdef CONFIG_DEBUG_BLK_CGROUP 219 /* This should be called with the blkg->stats_lock held. */ 220 static void blkio_set_start_group_wait_time(struct blkio_group *blkg, 221 struct blkio_group *curr_blkg) 222 { 223 if (blkio_blkg_waiting(&blkg->stats)) 224 return; 225 if (blkg == curr_blkg) 226 return; 227 blkg->stats.start_group_wait_time = sched_clock(); 228 blkio_mark_blkg_waiting(&blkg->stats); 229 } 230 231 /* This should be called with the blkg->stats_lock held. */ 232 static void blkio_update_group_wait_time(struct blkio_group_stats *stats) 233 { 234 unsigned long long now; 235 236 if (!blkio_blkg_waiting(stats)) 237 return; 238 239 now = sched_clock(); 240 if (time_after64(now, stats->start_group_wait_time)) 241 stats->group_wait_time += now - stats->start_group_wait_time; 242 blkio_clear_blkg_waiting(stats); 243 } 244 245 /* This should be called with the blkg->stats_lock held. */ 246 static void blkio_end_empty_time(struct blkio_group_stats *stats) 247 { 248 unsigned long long now; 249 250 if (!blkio_blkg_empty(stats)) 251 return; 252 253 now = sched_clock(); 254 if (time_after64(now, stats->start_empty_time)) 255 stats->empty_time += now - stats->start_empty_time; 256 blkio_clear_blkg_empty(stats); 257 } 258 259 void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg) 260 { 261 unsigned long flags; 262 263 spin_lock_irqsave(&blkg->stats_lock, flags); 264 BUG_ON(blkio_blkg_idling(&blkg->stats)); 265 blkg->stats.start_idle_time = sched_clock(); 266 blkio_mark_blkg_idling(&blkg->stats); 267 spin_unlock_irqrestore(&blkg->stats_lock, flags); 268 } 269 EXPORT_SYMBOL_GPL(blkiocg_update_set_idle_time_stats); 270 271 void blkiocg_update_idle_time_stats(struct blkio_group *blkg) 272 { 273 unsigned long flags; 274 unsigned long long now; 275 struct blkio_group_stats *stats; 276 277 spin_lock_irqsave(&blkg->stats_lock, flags); 278 stats = &blkg->stats; 279 if (blkio_blkg_idling(stats)) { 280 now = sched_clock(); 281 if (time_after64(now, stats->start_idle_time)) 282 stats->idle_time += now - stats->start_idle_time; 283 blkio_clear_blkg_idling(stats); 284 } 285 spin_unlock_irqrestore(&blkg->stats_lock, flags); 286 } 287 EXPORT_SYMBOL_GPL(blkiocg_update_idle_time_stats); 288 289 void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg) 290 { 291 unsigned long flags; 292 struct blkio_group_stats *stats; 293 294 spin_lock_irqsave(&blkg->stats_lock, flags); 295 stats = &blkg->stats; 296 stats->avg_queue_size_sum += 297 stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] + 298 stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE]; 299 stats->avg_queue_size_samples++; 300 blkio_update_group_wait_time(stats); 301 spin_unlock_irqrestore(&blkg->stats_lock, flags); 302 } 303 EXPORT_SYMBOL_GPL(blkiocg_update_avg_queue_size_stats); 304 305 void blkiocg_set_start_empty_time(struct blkio_group *blkg) 306 { 307 unsigned long flags; 308 struct blkio_group_stats *stats; 309 310 spin_lock_irqsave(&blkg->stats_lock, flags); 311 stats = &blkg->stats; 312 313 if (stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] || 314 stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE]) { 315 spin_unlock_irqrestore(&blkg->stats_lock, flags); 316 return; 317 } 318 319 /* 320 * group is already marked empty. This can happen if cfqq got new 321 * request in parent group and moved to this group while being added 322 * to service tree. Just ignore the event and move on. 323 */ 324 if(blkio_blkg_empty(stats)) { 325 spin_unlock_irqrestore(&blkg->stats_lock, flags); 326 return; 327 } 328 329 stats->start_empty_time = sched_clock(); 330 blkio_mark_blkg_empty(stats); 331 spin_unlock_irqrestore(&blkg->stats_lock, flags); 332 } 333 EXPORT_SYMBOL_GPL(blkiocg_set_start_empty_time); 334 335 void blkiocg_update_dequeue_stats(struct blkio_group *blkg, 336 unsigned long dequeue) 337 { 338 blkg->stats.dequeue += dequeue; 339 } 340 EXPORT_SYMBOL_GPL(blkiocg_update_dequeue_stats); 341 #else 342 static inline void blkio_set_start_group_wait_time(struct blkio_group *blkg, 343 struct blkio_group *curr_blkg) {} 344 static inline void blkio_end_empty_time(struct blkio_group_stats *stats) {} 345 #endif 346 347 void blkiocg_update_io_add_stats(struct blkio_group *blkg, 348 struct blkio_group *curr_blkg, bool direction, 349 bool sync) 350 { 351 unsigned long flags; 352 353 spin_lock_irqsave(&blkg->stats_lock, flags); 354 blkio_add_stat(blkg->stats.stat_arr[BLKIO_STAT_QUEUED], 1, direction, 355 sync); 356 blkio_end_empty_time(&blkg->stats); 357 blkio_set_start_group_wait_time(blkg, curr_blkg); 358 spin_unlock_irqrestore(&blkg->stats_lock, flags); 359 } 360 EXPORT_SYMBOL_GPL(blkiocg_update_io_add_stats); 361 362 void blkiocg_update_io_remove_stats(struct blkio_group *blkg, 363 bool direction, bool sync) 364 { 365 unsigned long flags; 366 367 spin_lock_irqsave(&blkg->stats_lock, flags); 368 blkio_check_and_dec_stat(blkg->stats.stat_arr[BLKIO_STAT_QUEUED], 369 direction, sync); 370 spin_unlock_irqrestore(&blkg->stats_lock, flags); 371 } 372 EXPORT_SYMBOL_GPL(blkiocg_update_io_remove_stats); 373 374 void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time) 375 { 376 unsigned long flags; 377 378 spin_lock_irqsave(&blkg->stats_lock, flags); 379 blkg->stats.time += time; 380 spin_unlock_irqrestore(&blkg->stats_lock, flags); 381 } 382 EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used); 383 384 void blkiocg_update_dispatch_stats(struct blkio_group *blkg, 385 uint64_t bytes, bool direction, bool sync) 386 { 387 struct blkio_group_stats *stats; 388 unsigned long flags; 389 390 spin_lock_irqsave(&blkg->stats_lock, flags); 391 stats = &blkg->stats; 392 stats->sectors += bytes >> 9; 393 blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICED], 1, direction, 394 sync); 395 blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICE_BYTES], bytes, 396 direction, sync); 397 spin_unlock_irqrestore(&blkg->stats_lock, flags); 398 } 399 EXPORT_SYMBOL_GPL(blkiocg_update_dispatch_stats); 400 401 void blkiocg_update_completion_stats(struct blkio_group *blkg, 402 uint64_t start_time, uint64_t io_start_time, bool direction, bool sync) 403 { 404 struct blkio_group_stats *stats; 405 unsigned long flags; 406 unsigned long long now = sched_clock(); 407 408 spin_lock_irqsave(&blkg->stats_lock, flags); 409 stats = &blkg->stats; 410 if (time_after64(now, io_start_time)) 411 blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICE_TIME], 412 now - io_start_time, direction, sync); 413 if (time_after64(io_start_time, start_time)) 414 blkio_add_stat(stats->stat_arr[BLKIO_STAT_WAIT_TIME], 415 io_start_time - start_time, direction, sync); 416 spin_unlock_irqrestore(&blkg->stats_lock, flags); 417 } 418 EXPORT_SYMBOL_GPL(blkiocg_update_completion_stats); 419 420 void blkiocg_update_io_merged_stats(struct blkio_group *blkg, bool direction, 421 bool sync) 422 { 423 unsigned long flags; 424 425 spin_lock_irqsave(&blkg->stats_lock, flags); 426 blkio_add_stat(blkg->stats.stat_arr[BLKIO_STAT_MERGED], 1, direction, 427 sync); 428 spin_unlock_irqrestore(&blkg->stats_lock, flags); 429 } 430 EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats); 431 432 void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, 433 struct blkio_group *blkg, void *key, dev_t dev, 434 enum blkio_policy_id plid) 435 { 436 unsigned long flags; 437 438 spin_lock_irqsave(&blkcg->lock, flags); 439 spin_lock_init(&blkg->stats_lock); 440 rcu_assign_pointer(blkg->key, key); 441 blkg->blkcg_id = css_id(&blkcg->css); 442 hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); 443 blkg->plid = plid; 444 spin_unlock_irqrestore(&blkcg->lock, flags); 445 /* Need to take css reference ? */ 446 cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path)); 447 blkg->dev = dev; 448 } 449 EXPORT_SYMBOL_GPL(blkiocg_add_blkio_group); 450 451 static void __blkiocg_del_blkio_group(struct blkio_group *blkg) 452 { 453 hlist_del_init_rcu(&blkg->blkcg_node); 454 blkg->blkcg_id = 0; 455 } 456 457 /* 458 * returns 0 if blkio_group was still on cgroup list. Otherwise returns 1 459 * indicating that blk_group was unhashed by the time we got to it. 460 */ 461 int blkiocg_del_blkio_group(struct blkio_group *blkg) 462 { 463 struct blkio_cgroup *blkcg; 464 unsigned long flags; 465 struct cgroup_subsys_state *css; 466 int ret = 1; 467 468 rcu_read_lock(); 469 css = css_lookup(&blkio_subsys, blkg->blkcg_id); 470 if (css) { 471 blkcg = container_of(css, struct blkio_cgroup, css); 472 spin_lock_irqsave(&blkcg->lock, flags); 473 if (!hlist_unhashed(&blkg->blkcg_node)) { 474 __blkiocg_del_blkio_group(blkg); 475 ret = 0; 476 } 477 spin_unlock_irqrestore(&blkcg->lock, flags); 478 } 479 480 rcu_read_unlock(); 481 return ret; 482 } 483 EXPORT_SYMBOL_GPL(blkiocg_del_blkio_group); 484 485 /* called under rcu_read_lock(). */ 486 struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) 487 { 488 struct blkio_group *blkg; 489 struct hlist_node *n; 490 void *__key; 491 492 hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) { 493 __key = blkg->key; 494 if (__key == key) 495 return blkg; 496 } 497 498 return NULL; 499 } 500 EXPORT_SYMBOL_GPL(blkiocg_lookup_group); 501 502 static int 503 blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val) 504 { 505 struct blkio_cgroup *blkcg; 506 struct blkio_group *blkg; 507 struct blkio_group_stats *stats; 508 struct hlist_node *n; 509 uint64_t queued[BLKIO_STAT_TOTAL]; 510 int i; 511 #ifdef CONFIG_DEBUG_BLK_CGROUP 512 bool idling, waiting, empty; 513 unsigned long long now = sched_clock(); 514 #endif 515 516 blkcg = cgroup_to_blkio_cgroup(cgroup); 517 spin_lock_irq(&blkcg->lock); 518 hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { 519 spin_lock(&blkg->stats_lock); 520 stats = &blkg->stats; 521 #ifdef CONFIG_DEBUG_BLK_CGROUP 522 idling = blkio_blkg_idling(stats); 523 waiting = blkio_blkg_waiting(stats); 524 empty = blkio_blkg_empty(stats); 525 #endif 526 for (i = 0; i < BLKIO_STAT_TOTAL; i++) 527 queued[i] = stats->stat_arr[BLKIO_STAT_QUEUED][i]; 528 memset(stats, 0, sizeof(struct blkio_group_stats)); 529 for (i = 0; i < BLKIO_STAT_TOTAL; i++) 530 stats->stat_arr[BLKIO_STAT_QUEUED][i] = queued[i]; 531 #ifdef CONFIG_DEBUG_BLK_CGROUP 532 if (idling) { 533 blkio_mark_blkg_idling(stats); 534 stats->start_idle_time = now; 535 } 536 if (waiting) { 537 blkio_mark_blkg_waiting(stats); 538 stats->start_group_wait_time = now; 539 } 540 if (empty) { 541 blkio_mark_blkg_empty(stats); 542 stats->start_empty_time = now; 543 } 544 #endif 545 spin_unlock(&blkg->stats_lock); 546 } 547 spin_unlock_irq(&blkcg->lock); 548 return 0; 549 } 550 551 static void blkio_get_key_name(enum stat_sub_type type, dev_t dev, char *str, 552 int chars_left, bool diskname_only) 553 { 554 snprintf(str, chars_left, "%d:%d", MAJOR(dev), MINOR(dev)); 555 chars_left -= strlen(str); 556 if (chars_left <= 0) { 557 printk(KERN_WARNING 558 "Possibly incorrect cgroup stat display format"); 559 return; 560 } 561 if (diskname_only) 562 return; 563 switch (type) { 564 case BLKIO_STAT_READ: 565 strlcat(str, " Read", chars_left); 566 break; 567 case BLKIO_STAT_WRITE: 568 strlcat(str, " Write", chars_left); 569 break; 570 case BLKIO_STAT_SYNC: 571 strlcat(str, " Sync", chars_left); 572 break; 573 case BLKIO_STAT_ASYNC: 574 strlcat(str, " Async", chars_left); 575 break; 576 case BLKIO_STAT_TOTAL: 577 strlcat(str, " Total", chars_left); 578 break; 579 default: 580 strlcat(str, " Invalid", chars_left); 581 } 582 } 583 584 static uint64_t blkio_fill_stat(char *str, int chars_left, uint64_t val, 585 struct cgroup_map_cb *cb, dev_t dev) 586 { 587 blkio_get_key_name(0, dev, str, chars_left, true); 588 cb->fill(cb, str, val); 589 return val; 590 } 591 592 /* This should be called with blkg->stats_lock held */ 593 static uint64_t blkio_get_stat(struct blkio_group *blkg, 594 struct cgroup_map_cb *cb, dev_t dev, enum stat_type type) 595 { 596 uint64_t disk_total; 597 char key_str[MAX_KEY_LEN]; 598 enum stat_sub_type sub_type; 599 600 if (type == BLKIO_STAT_TIME) 601 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, 602 blkg->stats.time, cb, dev); 603 if (type == BLKIO_STAT_SECTORS) 604 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, 605 blkg->stats.sectors, cb, dev); 606 #ifdef CONFIG_DEBUG_BLK_CGROUP 607 if (type == BLKIO_STAT_AVG_QUEUE_SIZE) { 608 uint64_t sum = blkg->stats.avg_queue_size_sum; 609 uint64_t samples = blkg->stats.avg_queue_size_samples; 610 if (samples) 611 do_div(sum, samples); 612 else 613 sum = 0; 614 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, sum, cb, dev); 615 } 616 if (type == BLKIO_STAT_GROUP_WAIT_TIME) 617 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, 618 blkg->stats.group_wait_time, cb, dev); 619 if (type == BLKIO_STAT_IDLE_TIME) 620 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, 621 blkg->stats.idle_time, cb, dev); 622 if (type == BLKIO_STAT_EMPTY_TIME) 623 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, 624 blkg->stats.empty_time, cb, dev); 625 if (type == BLKIO_STAT_DEQUEUE) 626 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, 627 blkg->stats.dequeue, cb, dev); 628 #endif 629 630 for (sub_type = BLKIO_STAT_READ; sub_type < BLKIO_STAT_TOTAL; 631 sub_type++) { 632 blkio_get_key_name(sub_type, dev, key_str, MAX_KEY_LEN, false); 633 cb->fill(cb, key_str, blkg->stats.stat_arr[type][sub_type]); 634 } 635 disk_total = blkg->stats.stat_arr[type][BLKIO_STAT_READ] + 636 blkg->stats.stat_arr[type][BLKIO_STAT_WRITE]; 637 blkio_get_key_name(BLKIO_STAT_TOTAL, dev, key_str, MAX_KEY_LEN, false); 638 cb->fill(cb, key_str, disk_total); 639 return disk_total; 640 } 641 642 static int blkio_check_dev_num(dev_t dev) 643 { 644 int part = 0; 645 struct gendisk *disk; 646 647 disk = get_gendisk(dev, &part); 648 if (!disk || part) 649 return -ENODEV; 650 651 return 0; 652 } 653 654 static int blkio_policy_parse_and_set(char *buf, 655 struct blkio_policy_node *newpn, enum blkio_policy_id plid, int fileid) 656 { 657 char *s[4], *p, *major_s = NULL, *minor_s = NULL; 658 int ret; 659 unsigned long major, minor, temp; 660 int i = 0; 661 dev_t dev; 662 u64 bps, iops; 663 664 memset(s, 0, sizeof(s)); 665 666 while ((p = strsep(&buf, " ")) != NULL) { 667 if (!*p) 668 continue; 669 670 s[i++] = p; 671 672 /* Prevent from inputing too many things */ 673 if (i == 3) 674 break; 675 } 676 677 if (i != 2) 678 return -EINVAL; 679 680 p = strsep(&s[0], ":"); 681 if (p != NULL) 682 major_s = p; 683 else 684 return -EINVAL; 685 686 minor_s = s[0]; 687 if (!minor_s) 688 return -EINVAL; 689 690 ret = strict_strtoul(major_s, 10, &major); 691 if (ret) 692 return -EINVAL; 693 694 ret = strict_strtoul(minor_s, 10, &minor); 695 if (ret) 696 return -EINVAL; 697 698 dev = MKDEV(major, minor); 699 700 ret = blkio_check_dev_num(dev); 701 if (ret) 702 return ret; 703 704 newpn->dev = dev; 705 706 if (s[1] == NULL) 707 return -EINVAL; 708 709 switch (plid) { 710 case BLKIO_POLICY_PROP: 711 ret = strict_strtoul(s[1], 10, &temp); 712 if (ret || (temp < BLKIO_WEIGHT_MIN && temp > 0) || 713 temp > BLKIO_WEIGHT_MAX) 714 return -EINVAL; 715 716 newpn->plid = plid; 717 newpn->fileid = fileid; 718 newpn->val.weight = temp; 719 break; 720 case BLKIO_POLICY_THROTL: 721 switch(fileid) { 722 case BLKIO_THROTL_read_bps_device: 723 case BLKIO_THROTL_write_bps_device: 724 ret = strict_strtoull(s[1], 10, &bps); 725 if (ret) 726 return -EINVAL; 727 728 newpn->plid = plid; 729 newpn->fileid = fileid; 730 newpn->val.bps = bps; 731 break; 732 case BLKIO_THROTL_read_iops_device: 733 case BLKIO_THROTL_write_iops_device: 734 ret = strict_strtoull(s[1], 10, &iops); 735 if (ret) 736 return -EINVAL; 737 738 if (iops > THROTL_IOPS_MAX) 739 return -EINVAL; 740 741 newpn->plid = plid; 742 newpn->fileid = fileid; 743 newpn->val.iops = (unsigned int)iops; 744 break; 745 } 746 break; 747 default: 748 BUG(); 749 } 750 751 return 0; 752 } 753 754 unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg, 755 dev_t dev) 756 { 757 struct blkio_policy_node *pn; 758 759 pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_PROP, 760 BLKIO_PROP_weight_device); 761 if (pn) 762 return pn->val.weight; 763 else 764 return blkcg->weight; 765 } 766 EXPORT_SYMBOL_GPL(blkcg_get_weight); 767 768 uint64_t blkcg_get_read_bps(struct blkio_cgroup *blkcg, dev_t dev) 769 { 770 struct blkio_policy_node *pn; 771 772 pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL, 773 BLKIO_THROTL_read_bps_device); 774 if (pn) 775 return pn->val.bps; 776 else 777 return -1; 778 } 779 780 uint64_t blkcg_get_write_bps(struct blkio_cgroup *blkcg, dev_t dev) 781 { 782 struct blkio_policy_node *pn; 783 pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL, 784 BLKIO_THROTL_write_bps_device); 785 if (pn) 786 return pn->val.bps; 787 else 788 return -1; 789 } 790 791 unsigned int blkcg_get_read_iops(struct blkio_cgroup *blkcg, dev_t dev) 792 { 793 struct blkio_policy_node *pn; 794 795 pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL, 796 BLKIO_THROTL_read_iops_device); 797 if (pn) 798 return pn->val.iops; 799 else 800 return -1; 801 } 802 803 unsigned int blkcg_get_write_iops(struct blkio_cgroup *blkcg, dev_t dev) 804 { 805 struct blkio_policy_node *pn; 806 pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL, 807 BLKIO_THROTL_write_iops_device); 808 if (pn) 809 return pn->val.iops; 810 else 811 return -1; 812 } 813 814 /* Checks whether user asked for deleting a policy rule */ 815 static bool blkio_delete_rule_command(struct blkio_policy_node *pn) 816 { 817 switch(pn->plid) { 818 case BLKIO_POLICY_PROP: 819 if (pn->val.weight == 0) 820 return 1; 821 break; 822 case BLKIO_POLICY_THROTL: 823 switch(pn->fileid) { 824 case BLKIO_THROTL_read_bps_device: 825 case BLKIO_THROTL_write_bps_device: 826 if (pn->val.bps == 0) 827 return 1; 828 break; 829 case BLKIO_THROTL_read_iops_device: 830 case BLKIO_THROTL_write_iops_device: 831 if (pn->val.iops == 0) 832 return 1; 833 } 834 break; 835 default: 836 BUG(); 837 } 838 839 return 0; 840 } 841 842 static void blkio_update_policy_rule(struct blkio_policy_node *oldpn, 843 struct blkio_policy_node *newpn) 844 { 845 switch(oldpn->plid) { 846 case BLKIO_POLICY_PROP: 847 oldpn->val.weight = newpn->val.weight; 848 break; 849 case BLKIO_POLICY_THROTL: 850 switch(newpn->fileid) { 851 case BLKIO_THROTL_read_bps_device: 852 case BLKIO_THROTL_write_bps_device: 853 oldpn->val.bps = newpn->val.bps; 854 break; 855 case BLKIO_THROTL_read_iops_device: 856 case BLKIO_THROTL_write_iops_device: 857 oldpn->val.iops = newpn->val.iops; 858 } 859 break; 860 default: 861 BUG(); 862 } 863 } 864 865 /* 866 * Some rules/values in blkg have changed. Propogate those to respective 867 * policies. 868 */ 869 static void blkio_update_blkg_policy(struct blkio_cgroup *blkcg, 870 struct blkio_group *blkg, struct blkio_policy_node *pn) 871 { 872 unsigned int weight, iops; 873 u64 bps; 874 875 switch(pn->plid) { 876 case BLKIO_POLICY_PROP: 877 weight = pn->val.weight ? pn->val.weight : 878 blkcg->weight; 879 blkio_update_group_weight(blkg, weight); 880 break; 881 case BLKIO_POLICY_THROTL: 882 switch(pn->fileid) { 883 case BLKIO_THROTL_read_bps_device: 884 case BLKIO_THROTL_write_bps_device: 885 bps = pn->val.bps ? pn->val.bps : (-1); 886 blkio_update_group_bps(blkg, bps, pn->fileid); 887 break; 888 case BLKIO_THROTL_read_iops_device: 889 case BLKIO_THROTL_write_iops_device: 890 iops = pn->val.iops ? pn->val.iops : (-1); 891 blkio_update_group_iops(blkg, iops, pn->fileid); 892 break; 893 } 894 break; 895 default: 896 BUG(); 897 } 898 } 899 900 /* 901 * A policy node rule has been updated. Propogate this update to all the 902 * block groups which might be affected by this update. 903 */ 904 static void blkio_update_policy_node_blkg(struct blkio_cgroup *blkcg, 905 struct blkio_policy_node *pn) 906 { 907 struct blkio_group *blkg; 908 struct hlist_node *n; 909 910 spin_lock(&blkio_list_lock); 911 spin_lock_irq(&blkcg->lock); 912 913 hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { 914 if (pn->dev != blkg->dev || pn->plid != blkg->plid) 915 continue; 916 blkio_update_blkg_policy(blkcg, blkg, pn); 917 } 918 919 spin_unlock_irq(&blkcg->lock); 920 spin_unlock(&blkio_list_lock); 921 } 922 923 static int blkiocg_file_write(struct cgroup *cgrp, struct cftype *cft, 924 const char *buffer) 925 { 926 int ret = 0; 927 char *buf; 928 struct blkio_policy_node *newpn, *pn; 929 struct blkio_cgroup *blkcg; 930 int keep_newpn = 0; 931 enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private); 932 int fileid = BLKIOFILE_ATTR(cft->private); 933 934 buf = kstrdup(buffer, GFP_KERNEL); 935 if (!buf) 936 return -ENOMEM; 937 938 newpn = kzalloc(sizeof(*newpn), GFP_KERNEL); 939 if (!newpn) { 940 ret = -ENOMEM; 941 goto free_buf; 942 } 943 944 ret = blkio_policy_parse_and_set(buf, newpn, plid, fileid); 945 if (ret) 946 goto free_newpn; 947 948 blkcg = cgroup_to_blkio_cgroup(cgrp); 949 950 spin_lock_irq(&blkcg->lock); 951 952 pn = blkio_policy_search_node(blkcg, newpn->dev, plid, fileid); 953 if (!pn) { 954 if (!blkio_delete_rule_command(newpn)) { 955 blkio_policy_insert_node(blkcg, newpn); 956 keep_newpn = 1; 957 } 958 spin_unlock_irq(&blkcg->lock); 959 goto update_io_group; 960 } 961 962 if (blkio_delete_rule_command(newpn)) { 963 blkio_policy_delete_node(pn); 964 spin_unlock_irq(&blkcg->lock); 965 goto update_io_group; 966 } 967 spin_unlock_irq(&blkcg->lock); 968 969 blkio_update_policy_rule(pn, newpn); 970 971 update_io_group: 972 blkio_update_policy_node_blkg(blkcg, newpn); 973 974 free_newpn: 975 if (!keep_newpn) 976 kfree(newpn); 977 free_buf: 978 kfree(buf); 979 return ret; 980 } 981 982 static void 983 blkio_print_policy_node(struct seq_file *m, struct blkio_policy_node *pn) 984 { 985 switch(pn->plid) { 986 case BLKIO_POLICY_PROP: 987 if (pn->fileid == BLKIO_PROP_weight_device) 988 seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev), 989 MINOR(pn->dev), pn->val.weight); 990 break; 991 case BLKIO_POLICY_THROTL: 992 switch(pn->fileid) { 993 case BLKIO_THROTL_read_bps_device: 994 case BLKIO_THROTL_write_bps_device: 995 seq_printf(m, "%u:%u\t%llu\n", MAJOR(pn->dev), 996 MINOR(pn->dev), pn->val.bps); 997 break; 998 case BLKIO_THROTL_read_iops_device: 999 case BLKIO_THROTL_write_iops_device: 1000 seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev), 1001 MINOR(pn->dev), pn->val.iops); 1002 break; 1003 } 1004 break; 1005 default: 1006 BUG(); 1007 } 1008 } 1009 1010 /* cgroup files which read their data from policy nodes end up here */ 1011 static void blkio_read_policy_node_files(struct cftype *cft, 1012 struct blkio_cgroup *blkcg, struct seq_file *m) 1013 { 1014 struct blkio_policy_node *pn; 1015 1016 if (!list_empty(&blkcg->policy_list)) { 1017 spin_lock_irq(&blkcg->lock); 1018 list_for_each_entry(pn, &blkcg->policy_list, node) { 1019 if (!pn_matches_cftype(cft, pn)) 1020 continue; 1021 blkio_print_policy_node(m, pn); 1022 } 1023 spin_unlock_irq(&blkcg->lock); 1024 } 1025 } 1026 1027 static int blkiocg_file_read(struct cgroup *cgrp, struct cftype *cft, 1028 struct seq_file *m) 1029 { 1030 struct blkio_cgroup *blkcg; 1031 enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private); 1032 int name = BLKIOFILE_ATTR(cft->private); 1033 1034 blkcg = cgroup_to_blkio_cgroup(cgrp); 1035 1036 switch(plid) { 1037 case BLKIO_POLICY_PROP: 1038 switch(name) { 1039 case BLKIO_PROP_weight_device: 1040 blkio_read_policy_node_files(cft, blkcg, m); 1041 return 0; 1042 default: 1043 BUG(); 1044 } 1045 break; 1046 case BLKIO_POLICY_THROTL: 1047 switch(name){ 1048 case BLKIO_THROTL_read_bps_device: 1049 case BLKIO_THROTL_write_bps_device: 1050 case BLKIO_THROTL_read_iops_device: 1051 case BLKIO_THROTL_write_iops_device: 1052 blkio_read_policy_node_files(cft, blkcg, m); 1053 return 0; 1054 default: 1055 BUG(); 1056 } 1057 break; 1058 default: 1059 BUG(); 1060 } 1061 1062 return 0; 1063 } 1064 1065 static int blkio_read_blkg_stats(struct blkio_cgroup *blkcg, 1066 struct cftype *cft, struct cgroup_map_cb *cb, enum stat_type type, 1067 bool show_total) 1068 { 1069 struct blkio_group *blkg; 1070 struct hlist_node *n; 1071 uint64_t cgroup_total = 0; 1072 1073 rcu_read_lock(); 1074 hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) { 1075 if (blkg->dev) { 1076 if (!cftype_blkg_same_policy(cft, blkg)) 1077 continue; 1078 spin_lock_irq(&blkg->stats_lock); 1079 cgroup_total += blkio_get_stat(blkg, cb, blkg->dev, 1080 type); 1081 spin_unlock_irq(&blkg->stats_lock); 1082 } 1083 } 1084 if (show_total) 1085 cb->fill(cb, "Total", cgroup_total); 1086 rcu_read_unlock(); 1087 return 0; 1088 } 1089 1090 /* All map kind of cgroup file get serviced by this function */ 1091 static int blkiocg_file_read_map(struct cgroup *cgrp, struct cftype *cft, 1092 struct cgroup_map_cb *cb) 1093 { 1094 struct blkio_cgroup *blkcg; 1095 enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private); 1096 int name = BLKIOFILE_ATTR(cft->private); 1097 1098 blkcg = cgroup_to_blkio_cgroup(cgrp); 1099 1100 switch(plid) { 1101 case BLKIO_POLICY_PROP: 1102 switch(name) { 1103 case BLKIO_PROP_time: 1104 return blkio_read_blkg_stats(blkcg, cft, cb, 1105 BLKIO_STAT_TIME, 0); 1106 case BLKIO_PROP_sectors: 1107 return blkio_read_blkg_stats(blkcg, cft, cb, 1108 BLKIO_STAT_SECTORS, 0); 1109 case BLKIO_PROP_io_service_bytes: 1110 return blkio_read_blkg_stats(blkcg, cft, cb, 1111 BLKIO_STAT_SERVICE_BYTES, 1); 1112 case BLKIO_PROP_io_serviced: 1113 return blkio_read_blkg_stats(blkcg, cft, cb, 1114 BLKIO_STAT_SERVICED, 1); 1115 case BLKIO_PROP_io_service_time: 1116 return blkio_read_blkg_stats(blkcg, cft, cb, 1117 BLKIO_STAT_SERVICE_TIME, 1); 1118 case BLKIO_PROP_io_wait_time: 1119 return blkio_read_blkg_stats(blkcg, cft, cb, 1120 BLKIO_STAT_WAIT_TIME, 1); 1121 case BLKIO_PROP_io_merged: 1122 return blkio_read_blkg_stats(blkcg, cft, cb, 1123 BLKIO_STAT_MERGED, 1); 1124 case BLKIO_PROP_io_queued: 1125 return blkio_read_blkg_stats(blkcg, cft, cb, 1126 BLKIO_STAT_QUEUED, 1); 1127 #ifdef CONFIG_DEBUG_BLK_CGROUP 1128 case BLKIO_PROP_dequeue: 1129 return blkio_read_blkg_stats(blkcg, cft, cb, 1130 BLKIO_STAT_DEQUEUE, 0); 1131 case BLKIO_PROP_avg_queue_size: 1132 return blkio_read_blkg_stats(blkcg, cft, cb, 1133 BLKIO_STAT_AVG_QUEUE_SIZE, 0); 1134 case BLKIO_PROP_group_wait_time: 1135 return blkio_read_blkg_stats(blkcg, cft, cb, 1136 BLKIO_STAT_GROUP_WAIT_TIME, 0); 1137 case BLKIO_PROP_idle_time: 1138 return blkio_read_blkg_stats(blkcg, cft, cb, 1139 BLKIO_STAT_IDLE_TIME, 0); 1140 case BLKIO_PROP_empty_time: 1141 return blkio_read_blkg_stats(blkcg, cft, cb, 1142 BLKIO_STAT_EMPTY_TIME, 0); 1143 #endif 1144 default: 1145 BUG(); 1146 } 1147 break; 1148 case BLKIO_POLICY_THROTL: 1149 switch(name){ 1150 case BLKIO_THROTL_io_service_bytes: 1151 return blkio_read_blkg_stats(blkcg, cft, cb, 1152 BLKIO_STAT_SERVICE_BYTES, 1); 1153 case BLKIO_THROTL_io_serviced: 1154 return blkio_read_blkg_stats(blkcg, cft, cb, 1155 BLKIO_STAT_SERVICED, 1); 1156 default: 1157 BUG(); 1158 } 1159 break; 1160 default: 1161 BUG(); 1162 } 1163 1164 return 0; 1165 } 1166 1167 static int blkio_weight_write(struct blkio_cgroup *blkcg, u64 val) 1168 { 1169 struct blkio_group *blkg; 1170 struct hlist_node *n; 1171 struct blkio_policy_node *pn; 1172 1173 if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX) 1174 return -EINVAL; 1175 1176 spin_lock(&blkio_list_lock); 1177 spin_lock_irq(&blkcg->lock); 1178 blkcg->weight = (unsigned int)val; 1179 1180 hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { 1181 pn = blkio_policy_search_node(blkcg, blkg->dev, 1182 BLKIO_POLICY_PROP, BLKIO_PROP_weight_device); 1183 if (pn) 1184 continue; 1185 1186 blkio_update_group_weight(blkg, blkcg->weight); 1187 } 1188 spin_unlock_irq(&blkcg->lock); 1189 spin_unlock(&blkio_list_lock); 1190 return 0; 1191 } 1192 1193 static u64 blkiocg_file_read_u64 (struct cgroup *cgrp, struct cftype *cft) { 1194 struct blkio_cgroup *blkcg; 1195 enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private); 1196 int name = BLKIOFILE_ATTR(cft->private); 1197 1198 blkcg = cgroup_to_blkio_cgroup(cgrp); 1199 1200 switch(plid) { 1201 case BLKIO_POLICY_PROP: 1202 switch(name) { 1203 case BLKIO_PROP_weight: 1204 return (u64)blkcg->weight; 1205 } 1206 break; 1207 default: 1208 BUG(); 1209 } 1210 return 0; 1211 } 1212 1213 static int 1214 blkiocg_file_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val) 1215 { 1216 struct blkio_cgroup *blkcg; 1217 enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private); 1218 int name = BLKIOFILE_ATTR(cft->private); 1219 1220 blkcg = cgroup_to_blkio_cgroup(cgrp); 1221 1222 switch(plid) { 1223 case BLKIO_POLICY_PROP: 1224 switch(name) { 1225 case BLKIO_PROP_weight: 1226 return blkio_weight_write(blkcg, val); 1227 } 1228 break; 1229 default: 1230 BUG(); 1231 } 1232 1233 return 0; 1234 } 1235 1236 struct cftype blkio_files[] = { 1237 { 1238 .name = "weight_device", 1239 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, 1240 BLKIO_PROP_weight_device), 1241 .read_seq_string = blkiocg_file_read, 1242 .write_string = blkiocg_file_write, 1243 .max_write_len = 256, 1244 }, 1245 { 1246 .name = "weight", 1247 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, 1248 BLKIO_PROP_weight), 1249 .read_u64 = blkiocg_file_read_u64, 1250 .write_u64 = blkiocg_file_write_u64, 1251 }, 1252 { 1253 .name = "time", 1254 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, 1255 BLKIO_PROP_time), 1256 .read_map = blkiocg_file_read_map, 1257 }, 1258 { 1259 .name = "sectors", 1260 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, 1261 BLKIO_PROP_sectors), 1262 .read_map = blkiocg_file_read_map, 1263 }, 1264 { 1265 .name = "io_service_bytes", 1266 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, 1267 BLKIO_PROP_io_service_bytes), 1268 .read_map = blkiocg_file_read_map, 1269 }, 1270 { 1271 .name = "io_serviced", 1272 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, 1273 BLKIO_PROP_io_serviced), 1274 .read_map = blkiocg_file_read_map, 1275 }, 1276 { 1277 .name = "io_service_time", 1278 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, 1279 BLKIO_PROP_io_service_time), 1280 .read_map = blkiocg_file_read_map, 1281 }, 1282 { 1283 .name = "io_wait_time", 1284 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, 1285 BLKIO_PROP_io_wait_time), 1286 .read_map = blkiocg_file_read_map, 1287 }, 1288 { 1289 .name = "io_merged", 1290 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, 1291 BLKIO_PROP_io_merged), 1292 .read_map = blkiocg_file_read_map, 1293 }, 1294 { 1295 .name = "io_queued", 1296 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, 1297 BLKIO_PROP_io_queued), 1298 .read_map = blkiocg_file_read_map, 1299 }, 1300 { 1301 .name = "reset_stats", 1302 .write_u64 = blkiocg_reset_stats, 1303 }, 1304 #ifdef CONFIG_BLK_DEV_THROTTLING 1305 { 1306 .name = "throttle.read_bps_device", 1307 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL, 1308 BLKIO_THROTL_read_bps_device), 1309 .read_seq_string = blkiocg_file_read, 1310 .write_string = blkiocg_file_write, 1311 .max_write_len = 256, 1312 }, 1313 1314 { 1315 .name = "throttle.write_bps_device", 1316 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL, 1317 BLKIO_THROTL_write_bps_device), 1318 .read_seq_string = blkiocg_file_read, 1319 .write_string = blkiocg_file_write, 1320 .max_write_len = 256, 1321 }, 1322 1323 { 1324 .name = "throttle.read_iops_device", 1325 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL, 1326 BLKIO_THROTL_read_iops_device), 1327 .read_seq_string = blkiocg_file_read, 1328 .write_string = blkiocg_file_write, 1329 .max_write_len = 256, 1330 }, 1331 1332 { 1333 .name = "throttle.write_iops_device", 1334 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL, 1335 BLKIO_THROTL_write_iops_device), 1336 .read_seq_string = blkiocg_file_read, 1337 .write_string = blkiocg_file_write, 1338 .max_write_len = 256, 1339 }, 1340 { 1341 .name = "throttle.io_service_bytes", 1342 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL, 1343 BLKIO_THROTL_io_service_bytes), 1344 .read_map = blkiocg_file_read_map, 1345 }, 1346 { 1347 .name = "throttle.io_serviced", 1348 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL, 1349 BLKIO_THROTL_io_serviced), 1350 .read_map = blkiocg_file_read_map, 1351 }, 1352 #endif /* CONFIG_BLK_DEV_THROTTLING */ 1353 1354 #ifdef CONFIG_DEBUG_BLK_CGROUP 1355 { 1356 .name = "avg_queue_size", 1357 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, 1358 BLKIO_PROP_avg_queue_size), 1359 .read_map = blkiocg_file_read_map, 1360 }, 1361 { 1362 .name = "group_wait_time", 1363 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, 1364 BLKIO_PROP_group_wait_time), 1365 .read_map = blkiocg_file_read_map, 1366 }, 1367 { 1368 .name = "idle_time", 1369 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, 1370 BLKIO_PROP_idle_time), 1371 .read_map = blkiocg_file_read_map, 1372 }, 1373 { 1374 .name = "empty_time", 1375 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, 1376 BLKIO_PROP_empty_time), 1377 .read_map = blkiocg_file_read_map, 1378 }, 1379 { 1380 .name = "dequeue", 1381 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, 1382 BLKIO_PROP_dequeue), 1383 .read_map = blkiocg_file_read_map, 1384 }, 1385 #endif 1386 }; 1387 1388 static int blkiocg_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup) 1389 { 1390 return cgroup_add_files(cgroup, subsys, blkio_files, 1391 ARRAY_SIZE(blkio_files)); 1392 } 1393 1394 static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup) 1395 { 1396 struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup); 1397 unsigned long flags; 1398 struct blkio_group *blkg; 1399 void *key; 1400 struct blkio_policy_type *blkiop; 1401 struct blkio_policy_node *pn, *pntmp; 1402 1403 rcu_read_lock(); 1404 do { 1405 spin_lock_irqsave(&blkcg->lock, flags); 1406 1407 if (hlist_empty(&blkcg->blkg_list)) { 1408 spin_unlock_irqrestore(&blkcg->lock, flags); 1409 break; 1410 } 1411 1412 blkg = hlist_entry(blkcg->blkg_list.first, struct blkio_group, 1413 blkcg_node); 1414 key = rcu_dereference(blkg->key); 1415 __blkiocg_del_blkio_group(blkg); 1416 1417 spin_unlock_irqrestore(&blkcg->lock, flags); 1418 1419 /* 1420 * This blkio_group is being unlinked as associated cgroup is 1421 * going away. Let all the IO controlling policies know about 1422 * this event. 1423 */ 1424 spin_lock(&blkio_list_lock); 1425 list_for_each_entry(blkiop, &blkio_list, list) { 1426 if (blkiop->plid != blkg->plid) 1427 continue; 1428 blkiop->ops.blkio_unlink_group_fn(key, blkg); 1429 } 1430 spin_unlock(&blkio_list_lock); 1431 } while (1); 1432 1433 list_for_each_entry_safe(pn, pntmp, &blkcg->policy_list, node) { 1434 blkio_policy_delete_node(pn); 1435 kfree(pn); 1436 } 1437 1438 free_css_id(&blkio_subsys, &blkcg->css); 1439 rcu_read_unlock(); 1440 if (blkcg != &blkio_root_cgroup) 1441 kfree(blkcg); 1442 } 1443 1444 static struct cgroup_subsys_state * 1445 blkiocg_create(struct cgroup_subsys *subsys, struct cgroup *cgroup) 1446 { 1447 struct blkio_cgroup *blkcg; 1448 struct cgroup *parent = cgroup->parent; 1449 1450 if (!parent) { 1451 blkcg = &blkio_root_cgroup; 1452 goto done; 1453 } 1454 1455 blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL); 1456 if (!blkcg) 1457 return ERR_PTR(-ENOMEM); 1458 1459 blkcg->weight = BLKIO_WEIGHT_DEFAULT; 1460 done: 1461 spin_lock_init(&blkcg->lock); 1462 INIT_HLIST_HEAD(&blkcg->blkg_list); 1463 1464 INIT_LIST_HEAD(&blkcg->policy_list); 1465 return &blkcg->css; 1466 } 1467 1468 /* 1469 * We cannot support shared io contexts, as we have no mean to support 1470 * two tasks with the same ioc in two different groups without major rework 1471 * of the main cic data structures. For now we allow a task to change 1472 * its cgroup only if it's the only owner of its ioc. 1473 */ 1474 static int blkiocg_can_attach(struct cgroup_subsys *subsys, 1475 struct cgroup *cgroup, struct task_struct *tsk, 1476 bool threadgroup) 1477 { 1478 struct io_context *ioc; 1479 int ret = 0; 1480 1481 /* task_lock() is needed to avoid races with exit_io_context() */ 1482 task_lock(tsk); 1483 ioc = tsk->io_context; 1484 if (ioc && atomic_read(&ioc->nr_tasks) > 1) 1485 ret = -EINVAL; 1486 task_unlock(tsk); 1487 1488 return ret; 1489 } 1490 1491 static void blkiocg_attach(struct cgroup_subsys *subsys, struct cgroup *cgroup, 1492 struct cgroup *prev, struct task_struct *tsk, 1493 bool threadgroup) 1494 { 1495 struct io_context *ioc; 1496 1497 task_lock(tsk); 1498 ioc = tsk->io_context; 1499 if (ioc) 1500 ioc->cgroup_changed = 1; 1501 task_unlock(tsk); 1502 } 1503 1504 void blkio_policy_register(struct blkio_policy_type *blkiop) 1505 { 1506 spin_lock(&blkio_list_lock); 1507 list_add_tail(&blkiop->list, &blkio_list); 1508 spin_unlock(&blkio_list_lock); 1509 } 1510 EXPORT_SYMBOL_GPL(blkio_policy_register); 1511 1512 void blkio_policy_unregister(struct blkio_policy_type *blkiop) 1513 { 1514 spin_lock(&blkio_list_lock); 1515 list_del_init(&blkiop->list); 1516 spin_unlock(&blkio_list_lock); 1517 } 1518 EXPORT_SYMBOL_GPL(blkio_policy_unregister); 1519 1520 static int __init init_cgroup_blkio(void) 1521 { 1522 return cgroup_load_subsys(&blkio_subsys); 1523 } 1524 1525 static void __exit exit_cgroup_blkio(void) 1526 { 1527 cgroup_unload_subsys(&blkio_subsys); 1528 } 1529 1530 module_init(init_cgroup_blkio); 1531 module_exit(exit_cgroup_blkio); 1532 MODULE_LICENSE("GPL"); 1533