1a7b36ee6SJens Axboe #ifndef BLK_THROTTLE_H 2a7b36ee6SJens Axboe #define BLK_THROTTLE_H 3a7b36ee6SJens Axboe 4a7b36ee6SJens Axboe #include "blk-cgroup-rwstat.h" 5a7b36ee6SJens Axboe 6a7b36ee6SJens Axboe /* 7a7b36ee6SJens Axboe * To implement hierarchical throttling, throtl_grps form a tree and bios 8a7b36ee6SJens Axboe * are dispatched upwards level by level until they reach the top and get 9a7b36ee6SJens Axboe * issued. When dispatching bios from the children and local group at each 10a7b36ee6SJens Axboe * level, if the bios are dispatched into a single bio_list, there's a risk 11a7b36ee6SJens Axboe * of a local or child group which can queue many bios at once filling up 12a7b36ee6SJens Axboe * the list starving others. 13a7b36ee6SJens Axboe * 14a7b36ee6SJens Axboe * To avoid such starvation, dispatched bios are queued separately 15a7b36ee6SJens Axboe * according to where they came from. When they are again dispatched to 16a7b36ee6SJens Axboe * the parent, they're popped in round-robin order so that no single source 17a7b36ee6SJens Axboe * hogs the dispatch window. 18a7b36ee6SJens Axboe * 19a7b36ee6SJens Axboe * throtl_qnode is used to keep the queued bios separated by their sources. 20a7b36ee6SJens Axboe * Bios are queued to throtl_qnode which in turn is queued to 21a7b36ee6SJens Axboe * throtl_service_queue and then dispatched in round-robin order. 22a7b36ee6SJens Axboe * 23a7b36ee6SJens Axboe * It's also used to track the reference counts on blkg's. A qnode always 24a7b36ee6SJens Axboe * belongs to a throtl_grp and gets queued on itself or the parent, so 25a7b36ee6SJens Axboe * incrementing the reference of the associated throtl_grp when a qnode is 26a7b36ee6SJens Axboe * queued and decrementing when dequeued is enough to keep the whole blkg 27a7b36ee6SJens Axboe * tree pinned while bios are in flight. 28a7b36ee6SJens Axboe */ 29a7b36ee6SJens Axboe struct throtl_qnode { 30a7b36ee6SJens Axboe struct list_head node; /* service_queue->queued[] */ 31a7b36ee6SJens Axboe struct bio_list bios; /* queued bios */ 32a7b36ee6SJens Axboe struct throtl_grp *tg; /* tg this qnode belongs to */ 33a7b36ee6SJens Axboe }; 34a7b36ee6SJens Axboe 35a7b36ee6SJens Axboe struct throtl_service_queue { 36a7b36ee6SJens Axboe struct throtl_service_queue *parent_sq; /* the parent service_queue */ 37a7b36ee6SJens Axboe 38a7b36ee6SJens Axboe /* 39a7b36ee6SJens Axboe * Bios queued directly to this service_queue or dispatched from 40a7b36ee6SJens Axboe * children throtl_grp's. 41a7b36ee6SJens Axboe */ 42a7b36ee6SJens Axboe struct list_head queued[2]; /* throtl_qnode [READ/WRITE] */ 43a7b36ee6SJens Axboe unsigned int nr_queued[2]; /* number of queued bios */ 44a7b36ee6SJens Axboe 45a7b36ee6SJens Axboe /* 46a7b36ee6SJens Axboe * RB tree of active children throtl_grp's, which are sorted by 47a7b36ee6SJens Axboe * their ->disptime. 48a7b36ee6SJens Axboe */ 49a7b36ee6SJens Axboe struct rb_root_cached pending_tree; /* RB tree of active tgs */ 50a7b36ee6SJens Axboe unsigned int nr_pending; /* # queued in the tree */ 51a7b36ee6SJens Axboe unsigned long first_pending_disptime; /* disptime of the first tg */ 52a7b36ee6SJens Axboe struct timer_list pending_timer; /* fires on first_pending_disptime */ 53a7b36ee6SJens Axboe }; 54a7b36ee6SJens Axboe 555a93b602SMing Lei enum tg_state_flags { 565a93b602SMing Lei THROTL_TG_PENDING = 1 << 0, /* on parent's pending tree */ 575a93b602SMing Lei THROTL_TG_WAS_EMPTY = 1 << 1, /* bio_lists[] became non-empty */ 5885496749SYu Kuai THROTL_TG_CANCELING = 1 << 2, /* starts to cancel bio */ 595a93b602SMing Lei }; 605a93b602SMing Lei 61a7b36ee6SJens Axboe enum { 62a7b36ee6SJens Axboe LIMIT_LOW, 63a7b36ee6SJens Axboe LIMIT_MAX, 64a7b36ee6SJens Axboe LIMIT_CNT, 65a7b36ee6SJens Axboe }; 66a7b36ee6SJens Axboe 67a7b36ee6SJens Axboe struct throtl_grp { 68a7b36ee6SJens Axboe /* must be the first member */ 69a7b36ee6SJens Axboe struct blkg_policy_data pd; 70a7b36ee6SJens Axboe 71a7b36ee6SJens Axboe /* active throtl group service_queue member */ 72a7b36ee6SJens Axboe struct rb_node rb_node; 73a7b36ee6SJens Axboe 74a7b36ee6SJens Axboe /* throtl_data this group belongs to */ 75a7b36ee6SJens Axboe struct throtl_data *td; 76a7b36ee6SJens Axboe 77a7b36ee6SJens Axboe /* this group's service queue */ 78a7b36ee6SJens Axboe struct throtl_service_queue service_queue; 79a7b36ee6SJens Axboe 80a7b36ee6SJens Axboe /* 81a7b36ee6SJens Axboe * qnode_on_self is used when bios are directly queued to this 82a7b36ee6SJens Axboe * throtl_grp so that local bios compete fairly with bios 83a7b36ee6SJens Axboe * dispatched from children. qnode_on_parent is used when bios are 84a7b36ee6SJens Axboe * dispatched from this throtl_grp into its parent and will compete 85a7b36ee6SJens Axboe * with the sibling qnode_on_parents and the parent's 86a7b36ee6SJens Axboe * qnode_on_self. 87a7b36ee6SJens Axboe */ 88a7b36ee6SJens Axboe struct throtl_qnode qnode_on_self[2]; 89a7b36ee6SJens Axboe struct throtl_qnode qnode_on_parent[2]; 90a7b36ee6SJens Axboe 91a7b36ee6SJens Axboe /* 92a7b36ee6SJens Axboe * Dispatch time in jiffies. This is the estimated time when group 93a7b36ee6SJens Axboe * will unthrottle and is ready to dispatch more bio. It is used as 94a7b36ee6SJens Axboe * key to sort active groups in service tree. 95a7b36ee6SJens Axboe */ 96a7b36ee6SJens Axboe unsigned long disptime; 97a7b36ee6SJens Axboe 98a7b36ee6SJens Axboe unsigned int flags; 99a7b36ee6SJens Axboe 100a7b36ee6SJens Axboe /* are there any throtl rules between this group and td? */ 10181c7a63aSYu Kuai bool has_rules_bps[2]; 10281c7a63aSYu Kuai bool has_rules_iops[2]; 103a7b36ee6SJens Axboe 104a7b36ee6SJens Axboe /* internally used bytes per second rate limits */ 105a7b36ee6SJens Axboe uint64_t bps[2][LIMIT_CNT]; 106a7b36ee6SJens Axboe /* user configured bps limits */ 107a7b36ee6SJens Axboe uint64_t bps_conf[2][LIMIT_CNT]; 108a7b36ee6SJens Axboe 109a7b36ee6SJens Axboe /* internally used IOPS limits */ 110a7b36ee6SJens Axboe unsigned int iops[2][LIMIT_CNT]; 111a7b36ee6SJens Axboe /* user configured IOPS limits */ 112a7b36ee6SJens Axboe unsigned int iops_conf[2][LIMIT_CNT]; 113a7b36ee6SJens Axboe 114a7b36ee6SJens Axboe /* Number of bytes dispatched in current slice */ 115a7b36ee6SJens Axboe uint64_t bytes_disp[2]; 116a7b36ee6SJens Axboe /* Number of bio's dispatched in current slice */ 117a7b36ee6SJens Axboe unsigned int io_disp[2]; 118a7b36ee6SJens Axboe 119a7b36ee6SJens Axboe unsigned long last_low_overflow_time[2]; 120a7b36ee6SJens Axboe 121a7b36ee6SJens Axboe uint64_t last_bytes_disp[2]; 122a7b36ee6SJens Axboe unsigned int last_io_disp[2]; 123a7b36ee6SJens Axboe 124a880ae93SYu Kuai /* 125a880ae93SYu Kuai * The following two fields are updated when new configuration is 126a880ae93SYu Kuai * submitted while some bios are still throttled, they record how many 127a880ae93SYu Kuai * bytes/ios are waited already in previous configuration, and they will 128a880ae93SYu Kuai * be used to calculate wait time under new configuration. 129a880ae93SYu Kuai */ 130a880ae93SYu Kuai uint64_t carryover_bytes[2]; 131a880ae93SYu Kuai unsigned int carryover_ios[2]; 132a880ae93SYu Kuai 133a7b36ee6SJens Axboe unsigned long last_check_time; 134a7b36ee6SJens Axboe 135a7b36ee6SJens Axboe unsigned long latency_target; /* us */ 136a7b36ee6SJens Axboe unsigned long latency_target_conf; /* us */ 137a7b36ee6SJens Axboe /* When did we start a new slice */ 138a7b36ee6SJens Axboe unsigned long slice_start[2]; 139a7b36ee6SJens Axboe unsigned long slice_end[2]; 140a7b36ee6SJens Axboe 141a7b36ee6SJens Axboe unsigned long last_finish_time; /* ns / 1024 */ 142a7b36ee6SJens Axboe unsigned long checked_last_finish_time; /* ns / 1024 */ 143a7b36ee6SJens Axboe unsigned long avg_idletime; /* ns / 1024 */ 144a7b36ee6SJens Axboe unsigned long idletime_threshold; /* us */ 145a7b36ee6SJens Axboe unsigned long idletime_threshold_conf; /* us */ 146a7b36ee6SJens Axboe 147a7b36ee6SJens Axboe unsigned int bio_cnt; /* total bios */ 148a7b36ee6SJens Axboe unsigned int bad_bio_cnt; /* bios exceeding latency threshold */ 149a7b36ee6SJens Axboe unsigned long bio_cnt_reset_time; 150a7b36ee6SJens Axboe 151a7b36ee6SJens Axboe struct blkg_rwstat stat_bytes; 152a7b36ee6SJens Axboe struct blkg_rwstat stat_ios; 153a7b36ee6SJens Axboe }; 154a7b36ee6SJens Axboe 155a7b36ee6SJens Axboe extern struct blkcg_policy blkcg_policy_throtl; 156a7b36ee6SJens Axboe 157a7b36ee6SJens Axboe static inline struct throtl_grp *pd_to_tg(struct blkg_policy_data *pd) 158a7b36ee6SJens Axboe { 159a7b36ee6SJens Axboe return pd ? container_of(pd, struct throtl_grp, pd) : NULL; 160a7b36ee6SJens Axboe } 161a7b36ee6SJens Axboe 162a7b36ee6SJens Axboe static inline struct throtl_grp *blkg_to_tg(struct blkcg_gq *blkg) 163a7b36ee6SJens Axboe { 164a7b36ee6SJens Axboe return pd_to_tg(blkg_to_pd(blkg, &blkcg_policy_throtl)); 165a7b36ee6SJens Axboe } 166a7b36ee6SJens Axboe 167a7b36ee6SJens Axboe /* 168a7b36ee6SJens Axboe * Internal throttling interface 169a7b36ee6SJens Axboe */ 170a7b36ee6SJens Axboe #ifndef CONFIG_BLK_DEV_THROTTLING 171*e13793baSChristoph Hellwig static inline int blk_throtl_init(struct gendisk *disk) { return 0; } 172*e13793baSChristoph Hellwig static inline void blk_throtl_exit(struct gendisk *disk) { } 173a7b36ee6SJens Axboe static inline void blk_throtl_register_queue(struct request_queue *q) { } 174a7b36ee6SJens Axboe static inline bool blk_throtl_bio(struct bio *bio) { return false; } 1758f9e7b65SYu Kuai static inline void blk_throtl_cancel_bios(struct request_queue *q) { } 176a7b36ee6SJens Axboe #else /* CONFIG_BLK_DEV_THROTTLING */ 177*e13793baSChristoph Hellwig int blk_throtl_init(struct gendisk *disk); 178*e13793baSChristoph Hellwig void blk_throtl_exit(struct gendisk *disk); 179a7b36ee6SJens Axboe void blk_throtl_register_queue(struct request_queue *q); 180a7b36ee6SJens Axboe bool __blk_throtl_bio(struct bio *bio); 1818f9e7b65SYu Kuai void blk_throtl_cancel_bios(struct request_queue *q); 18281c7a63aSYu Kuai 18381c7a63aSYu Kuai static inline bool blk_should_throtl(struct bio *bio) 184a7b36ee6SJens Axboe { 185a7b36ee6SJens Axboe struct throtl_grp *tg = blkg_to_tg(bio->bi_blkg); 18681c7a63aSYu Kuai int rw = bio_data_dir(bio); 187a7b36ee6SJens Axboe 18881c7a63aSYu Kuai /* iops limit is always counted */ 18981c7a63aSYu Kuai if (tg->has_rules_iops[rw]) 19081c7a63aSYu Kuai return true; 19181c7a63aSYu Kuai 19281c7a63aSYu Kuai if (tg->has_rules_bps[rw] && !bio_flagged(bio, BIO_BPS_THROTTLED)) 19381c7a63aSYu Kuai return true; 19481c7a63aSYu Kuai 19581c7a63aSYu Kuai return false; 19681c7a63aSYu Kuai } 19781c7a63aSYu Kuai 19881c7a63aSYu Kuai static inline bool blk_throtl_bio(struct bio *bio) 19981c7a63aSYu Kuai { 20081c7a63aSYu Kuai 20181c7a63aSYu Kuai if (!blk_should_throtl(bio)) 202a7b36ee6SJens Axboe return false; 203a7b36ee6SJens Axboe 204a7b36ee6SJens Axboe return __blk_throtl_bio(bio); 205a7b36ee6SJens Axboe } 206a7b36ee6SJens Axboe #endif /* CONFIG_BLK_DEV_THROTTLING */ 207a7b36ee6SJens Axboe 208a7b36ee6SJens Axboe #endif 209