1a7b36ee6SJens Axboe #ifndef BLK_THROTTLE_H 2a7b36ee6SJens Axboe #define BLK_THROTTLE_H 3a7b36ee6SJens Axboe 4a7b36ee6SJens Axboe #include "blk-cgroup-rwstat.h" 5a7b36ee6SJens Axboe 6a7b36ee6SJens Axboe /* 7a7b36ee6SJens Axboe * To implement hierarchical throttling, throtl_grps form a tree and bios 8a7b36ee6SJens Axboe * are dispatched upwards level by level until they reach the top and get 9a7b36ee6SJens Axboe * issued. When dispatching bios from the children and local group at each 10a7b36ee6SJens Axboe * level, if the bios are dispatched into a single bio_list, there's a risk 11a7b36ee6SJens Axboe * of a local or child group which can queue many bios at once filling up 12a7b36ee6SJens Axboe * the list starving others. 13a7b36ee6SJens Axboe * 14a7b36ee6SJens Axboe * To avoid such starvation, dispatched bios are queued separately 15a7b36ee6SJens Axboe * according to where they came from. When they are again dispatched to 16a7b36ee6SJens Axboe * the parent, they're popped in round-robin order so that no single source 17a7b36ee6SJens Axboe * hogs the dispatch window. 18a7b36ee6SJens Axboe * 19a7b36ee6SJens Axboe * throtl_qnode is used to keep the queued bios separated by their sources. 20a7b36ee6SJens Axboe * Bios are queued to throtl_qnode which in turn is queued to 21a7b36ee6SJens Axboe * throtl_service_queue and then dispatched in round-robin order. 22a7b36ee6SJens Axboe * 23a7b36ee6SJens Axboe * It's also used to track the reference counts on blkg's. A qnode always 24a7b36ee6SJens Axboe * belongs to a throtl_grp and gets queued on itself or the parent, so 25a7b36ee6SJens Axboe * incrementing the reference of the associated throtl_grp when a qnode is 26a7b36ee6SJens Axboe * queued and decrementing when dequeued is enough to keep the whole blkg 27a7b36ee6SJens Axboe * tree pinned while bios are in flight. 28a7b36ee6SJens Axboe */ 29a7b36ee6SJens Axboe struct throtl_qnode { 30a7b36ee6SJens Axboe struct list_head node; /* service_queue->queued[] */ 31a7b36ee6SJens Axboe struct bio_list bios; /* queued bios */ 32a7b36ee6SJens Axboe struct throtl_grp *tg; /* tg this qnode belongs to */ 33a7b36ee6SJens Axboe }; 34a7b36ee6SJens Axboe 35a7b36ee6SJens Axboe struct throtl_service_queue { 36a7b36ee6SJens Axboe struct throtl_service_queue *parent_sq; /* the parent service_queue */ 37a7b36ee6SJens Axboe 38a7b36ee6SJens Axboe /* 39a7b36ee6SJens Axboe * Bios queued directly to this service_queue or dispatched from 40a7b36ee6SJens Axboe * children throtl_grp's. 41a7b36ee6SJens Axboe */ 42a7b36ee6SJens Axboe struct list_head queued[2]; /* throtl_qnode [READ/WRITE] */ 43a7b36ee6SJens Axboe unsigned int nr_queued[2]; /* number of queued bios */ 44a7b36ee6SJens Axboe 45a7b36ee6SJens Axboe /* 46a7b36ee6SJens Axboe * RB tree of active children throtl_grp's, which are sorted by 47a7b36ee6SJens Axboe * their ->disptime. 48a7b36ee6SJens Axboe */ 49a7b36ee6SJens Axboe struct rb_root_cached pending_tree; /* RB tree of active tgs */ 50a7b36ee6SJens Axboe unsigned int nr_pending; /* # queued in the tree */ 51a7b36ee6SJens Axboe unsigned long first_pending_disptime; /* disptime of the first tg */ 52a7b36ee6SJens Axboe struct timer_list pending_timer; /* fires on first_pending_disptime */ 53a7b36ee6SJens Axboe }; 54a7b36ee6SJens Axboe 555a93b602SMing Lei enum tg_state_flags { 565a93b602SMing Lei THROTL_TG_PENDING = 1 << 0, /* on parent's pending tree */ 575a93b602SMing Lei THROTL_TG_WAS_EMPTY = 1 << 1, /* bio_lists[] became non-empty */ 585a93b602SMing Lei THROTL_TG_HAS_IOPS_LIMIT = 1 << 2, /* tg has iops limit */ 59*8f9e7b65SYu Kuai THROTL_TG_CANCELING = 1 << 3, /* starts to cancel bio */ 605a93b602SMing Lei }; 615a93b602SMing Lei 62a7b36ee6SJens Axboe enum { 63a7b36ee6SJens Axboe LIMIT_LOW, 64a7b36ee6SJens Axboe LIMIT_MAX, 65a7b36ee6SJens Axboe LIMIT_CNT, 66a7b36ee6SJens Axboe }; 67a7b36ee6SJens Axboe 68a7b36ee6SJens Axboe struct throtl_grp { 69a7b36ee6SJens Axboe /* must be the first member */ 70a7b36ee6SJens Axboe struct blkg_policy_data pd; 71a7b36ee6SJens Axboe 72a7b36ee6SJens Axboe /* active throtl group service_queue member */ 73a7b36ee6SJens Axboe struct rb_node rb_node; 74a7b36ee6SJens Axboe 75a7b36ee6SJens Axboe /* throtl_data this group belongs to */ 76a7b36ee6SJens Axboe struct throtl_data *td; 77a7b36ee6SJens Axboe 78a7b36ee6SJens Axboe /* this group's service queue */ 79a7b36ee6SJens Axboe struct throtl_service_queue service_queue; 80a7b36ee6SJens Axboe 81a7b36ee6SJens Axboe /* 82a7b36ee6SJens Axboe * qnode_on_self is used when bios are directly queued to this 83a7b36ee6SJens Axboe * throtl_grp so that local bios compete fairly with bios 84a7b36ee6SJens Axboe * dispatched from children. qnode_on_parent is used when bios are 85a7b36ee6SJens Axboe * dispatched from this throtl_grp into its parent and will compete 86a7b36ee6SJens Axboe * with the sibling qnode_on_parents and the parent's 87a7b36ee6SJens Axboe * qnode_on_self. 88a7b36ee6SJens Axboe */ 89a7b36ee6SJens Axboe struct throtl_qnode qnode_on_self[2]; 90a7b36ee6SJens Axboe struct throtl_qnode qnode_on_parent[2]; 91a7b36ee6SJens Axboe 92a7b36ee6SJens Axboe /* 93a7b36ee6SJens Axboe * Dispatch time in jiffies. This is the estimated time when group 94a7b36ee6SJens Axboe * will unthrottle and is ready to dispatch more bio. It is used as 95a7b36ee6SJens Axboe * key to sort active groups in service tree. 96a7b36ee6SJens Axboe */ 97a7b36ee6SJens Axboe unsigned long disptime; 98a7b36ee6SJens Axboe 99a7b36ee6SJens Axboe unsigned int flags; 100a7b36ee6SJens Axboe 101a7b36ee6SJens Axboe /* are there any throtl rules between this group and td? */ 102a7b36ee6SJens Axboe bool has_rules[2]; 103a7b36ee6SJens Axboe 104a7b36ee6SJens Axboe /* internally used bytes per second rate limits */ 105a7b36ee6SJens Axboe uint64_t bps[2][LIMIT_CNT]; 106a7b36ee6SJens Axboe /* user configured bps limits */ 107a7b36ee6SJens Axboe uint64_t bps_conf[2][LIMIT_CNT]; 108a7b36ee6SJens Axboe 109a7b36ee6SJens Axboe /* internally used IOPS limits */ 110a7b36ee6SJens Axboe unsigned int iops[2][LIMIT_CNT]; 111a7b36ee6SJens Axboe /* user configured IOPS limits */ 112a7b36ee6SJens Axboe unsigned int iops_conf[2][LIMIT_CNT]; 113a7b36ee6SJens Axboe 114a7b36ee6SJens Axboe /* Number of bytes dispatched in current slice */ 115a7b36ee6SJens Axboe uint64_t bytes_disp[2]; 116a7b36ee6SJens Axboe /* Number of bio's dispatched in current slice */ 117a7b36ee6SJens Axboe unsigned int io_disp[2]; 118a7b36ee6SJens Axboe 119a7b36ee6SJens Axboe unsigned long last_low_overflow_time[2]; 120a7b36ee6SJens Axboe 121a7b36ee6SJens Axboe uint64_t last_bytes_disp[2]; 122a7b36ee6SJens Axboe unsigned int last_io_disp[2]; 123a7b36ee6SJens Axboe 124a7b36ee6SJens Axboe unsigned long last_check_time; 125a7b36ee6SJens Axboe 126a7b36ee6SJens Axboe unsigned long latency_target; /* us */ 127a7b36ee6SJens Axboe unsigned long latency_target_conf; /* us */ 128a7b36ee6SJens Axboe /* When did we start a new slice */ 129a7b36ee6SJens Axboe unsigned long slice_start[2]; 130a7b36ee6SJens Axboe unsigned long slice_end[2]; 131a7b36ee6SJens Axboe 132a7b36ee6SJens Axboe unsigned long last_finish_time; /* ns / 1024 */ 133a7b36ee6SJens Axboe unsigned long checked_last_finish_time; /* ns / 1024 */ 134a7b36ee6SJens Axboe unsigned long avg_idletime; /* ns / 1024 */ 135a7b36ee6SJens Axboe unsigned long idletime_threshold; /* us */ 136a7b36ee6SJens Axboe unsigned long idletime_threshold_conf; /* us */ 137a7b36ee6SJens Axboe 138a7b36ee6SJens Axboe unsigned int bio_cnt; /* total bios */ 139a7b36ee6SJens Axboe unsigned int bad_bio_cnt; /* bios exceeding latency threshold */ 140a7b36ee6SJens Axboe unsigned long bio_cnt_reset_time; 141a7b36ee6SJens Axboe 142a7b36ee6SJens Axboe struct blkg_rwstat stat_bytes; 143a7b36ee6SJens Axboe struct blkg_rwstat stat_ios; 144a7b36ee6SJens Axboe }; 145a7b36ee6SJens Axboe 146a7b36ee6SJens Axboe extern struct blkcg_policy blkcg_policy_throtl; 147a7b36ee6SJens Axboe 148a7b36ee6SJens Axboe static inline struct throtl_grp *pd_to_tg(struct blkg_policy_data *pd) 149a7b36ee6SJens Axboe { 150a7b36ee6SJens Axboe return pd ? container_of(pd, struct throtl_grp, pd) : NULL; 151a7b36ee6SJens Axboe } 152a7b36ee6SJens Axboe 153a7b36ee6SJens Axboe static inline struct throtl_grp *blkg_to_tg(struct blkcg_gq *blkg) 154a7b36ee6SJens Axboe { 155a7b36ee6SJens Axboe return pd_to_tg(blkg_to_pd(blkg, &blkcg_policy_throtl)); 156a7b36ee6SJens Axboe } 157a7b36ee6SJens Axboe 158a7b36ee6SJens Axboe /* 159a7b36ee6SJens Axboe * Internal throttling interface 160a7b36ee6SJens Axboe */ 161a7b36ee6SJens Axboe #ifndef CONFIG_BLK_DEV_THROTTLING 162a7b36ee6SJens Axboe static inline int blk_throtl_init(struct request_queue *q) { return 0; } 163a7b36ee6SJens Axboe static inline void blk_throtl_exit(struct request_queue *q) { } 164a7b36ee6SJens Axboe static inline void blk_throtl_register_queue(struct request_queue *q) { } 165a7b36ee6SJens Axboe static inline bool blk_throtl_bio(struct bio *bio) { return false; } 166*8f9e7b65SYu Kuai static inline void blk_throtl_cancel_bios(struct request_queue *q) { } 167a7b36ee6SJens Axboe #else /* CONFIG_BLK_DEV_THROTTLING */ 168a7b36ee6SJens Axboe int blk_throtl_init(struct request_queue *q); 169a7b36ee6SJens Axboe void blk_throtl_exit(struct request_queue *q); 170a7b36ee6SJens Axboe void blk_throtl_register_queue(struct request_queue *q); 171a7b36ee6SJens Axboe bool __blk_throtl_bio(struct bio *bio); 172*8f9e7b65SYu Kuai void blk_throtl_cancel_bios(struct request_queue *q); 173a7b36ee6SJens Axboe static inline bool blk_throtl_bio(struct bio *bio) 174a7b36ee6SJens Axboe { 175a7b36ee6SJens Axboe struct throtl_grp *tg = blkg_to_tg(bio->bi_blkg); 176a7b36ee6SJens Axboe 1775a93b602SMing Lei /* no need to throttle bps any more if the bio has been throttled */ 1785a93b602SMing Lei if (bio_flagged(bio, BIO_THROTTLED) && 1795a93b602SMing Lei !(tg->flags & THROTL_TG_HAS_IOPS_LIMIT)) 1805a93b602SMing Lei return false; 1815a93b602SMing Lei 182a7b36ee6SJens Axboe if (!tg->has_rules[bio_data_dir(bio)]) 183a7b36ee6SJens Axboe return false; 184a7b36ee6SJens Axboe 185a7b36ee6SJens Axboe return __blk_throtl_bio(bio); 186a7b36ee6SJens Axboe } 187a7b36ee6SJens Axboe #endif /* CONFIG_BLK_DEV_THROTTLING */ 188a7b36ee6SJens Axboe 189a7b36ee6SJens Axboe #endif 190