1*a7b36ee6SJens Axboe #ifndef BLK_THROTTLE_H 2*a7b36ee6SJens Axboe #define BLK_THROTTLE_H 3*a7b36ee6SJens Axboe 4*a7b36ee6SJens Axboe #include "blk-cgroup-rwstat.h" 5*a7b36ee6SJens Axboe 6*a7b36ee6SJens Axboe /* 7*a7b36ee6SJens Axboe * To implement hierarchical throttling, throtl_grps form a tree and bios 8*a7b36ee6SJens Axboe * are dispatched upwards level by level until they reach the top and get 9*a7b36ee6SJens Axboe * issued. When dispatching bios from the children and local group at each 10*a7b36ee6SJens Axboe * level, if the bios are dispatched into a single bio_list, there's a risk 11*a7b36ee6SJens Axboe * of a local or child group which can queue many bios at once filling up 12*a7b36ee6SJens Axboe * the list starving others. 13*a7b36ee6SJens Axboe * 14*a7b36ee6SJens Axboe * To avoid such starvation, dispatched bios are queued separately 15*a7b36ee6SJens Axboe * according to where they came from. When they are again dispatched to 16*a7b36ee6SJens Axboe * the parent, they're popped in round-robin order so that no single source 17*a7b36ee6SJens Axboe * hogs the dispatch window. 18*a7b36ee6SJens Axboe * 19*a7b36ee6SJens Axboe * throtl_qnode is used to keep the queued bios separated by their sources. 20*a7b36ee6SJens Axboe * Bios are queued to throtl_qnode which in turn is queued to 21*a7b36ee6SJens Axboe * throtl_service_queue and then dispatched in round-robin order. 22*a7b36ee6SJens Axboe * 23*a7b36ee6SJens Axboe * It's also used to track the reference counts on blkg's. A qnode always 24*a7b36ee6SJens Axboe * belongs to a throtl_grp and gets queued on itself or the parent, so 25*a7b36ee6SJens Axboe * incrementing the reference of the associated throtl_grp when a qnode is 26*a7b36ee6SJens Axboe * queued and decrementing when dequeued is enough to keep the whole blkg 27*a7b36ee6SJens Axboe * tree pinned while bios are in flight. 28*a7b36ee6SJens Axboe */ 29*a7b36ee6SJens Axboe struct throtl_qnode { 30*a7b36ee6SJens Axboe struct list_head node; /* service_queue->queued[] */ 31*a7b36ee6SJens Axboe struct bio_list bios; /* queued bios */ 32*a7b36ee6SJens Axboe struct throtl_grp *tg; /* tg this qnode belongs to */ 33*a7b36ee6SJens Axboe }; 34*a7b36ee6SJens Axboe 35*a7b36ee6SJens Axboe struct throtl_service_queue { 36*a7b36ee6SJens Axboe struct throtl_service_queue *parent_sq; /* the parent service_queue */ 37*a7b36ee6SJens Axboe 38*a7b36ee6SJens Axboe /* 39*a7b36ee6SJens Axboe * Bios queued directly to this service_queue or dispatched from 40*a7b36ee6SJens Axboe * children throtl_grp's. 41*a7b36ee6SJens Axboe */ 42*a7b36ee6SJens Axboe struct list_head queued[2]; /* throtl_qnode [READ/WRITE] */ 43*a7b36ee6SJens Axboe unsigned int nr_queued[2]; /* number of queued bios */ 44*a7b36ee6SJens Axboe 45*a7b36ee6SJens Axboe /* 46*a7b36ee6SJens Axboe * RB tree of active children throtl_grp's, which are sorted by 47*a7b36ee6SJens Axboe * their ->disptime. 48*a7b36ee6SJens Axboe */ 49*a7b36ee6SJens Axboe struct rb_root_cached pending_tree; /* RB tree of active tgs */ 50*a7b36ee6SJens Axboe unsigned int nr_pending; /* # queued in the tree */ 51*a7b36ee6SJens Axboe unsigned long first_pending_disptime; /* disptime of the first tg */ 52*a7b36ee6SJens Axboe struct timer_list pending_timer; /* fires on first_pending_disptime */ 53*a7b36ee6SJens Axboe }; 54*a7b36ee6SJens Axboe 55*a7b36ee6SJens Axboe enum { 56*a7b36ee6SJens Axboe LIMIT_LOW, 57*a7b36ee6SJens Axboe LIMIT_MAX, 58*a7b36ee6SJens Axboe LIMIT_CNT, 59*a7b36ee6SJens Axboe }; 60*a7b36ee6SJens Axboe 61*a7b36ee6SJens Axboe struct throtl_grp { 62*a7b36ee6SJens Axboe /* must be the first member */ 63*a7b36ee6SJens Axboe struct blkg_policy_data pd; 64*a7b36ee6SJens Axboe 65*a7b36ee6SJens Axboe /* active throtl group service_queue member */ 66*a7b36ee6SJens Axboe struct rb_node rb_node; 67*a7b36ee6SJens Axboe 68*a7b36ee6SJens Axboe /* throtl_data this group belongs to */ 69*a7b36ee6SJens Axboe struct throtl_data *td; 70*a7b36ee6SJens Axboe 71*a7b36ee6SJens Axboe /* this group's service queue */ 72*a7b36ee6SJens Axboe struct throtl_service_queue service_queue; 73*a7b36ee6SJens Axboe 74*a7b36ee6SJens Axboe /* 75*a7b36ee6SJens Axboe * qnode_on_self is used when bios are directly queued to this 76*a7b36ee6SJens Axboe * throtl_grp so that local bios compete fairly with bios 77*a7b36ee6SJens Axboe * dispatched from children. qnode_on_parent is used when bios are 78*a7b36ee6SJens Axboe * dispatched from this throtl_grp into its parent and will compete 79*a7b36ee6SJens Axboe * with the sibling qnode_on_parents and the parent's 80*a7b36ee6SJens Axboe * qnode_on_self. 81*a7b36ee6SJens Axboe */ 82*a7b36ee6SJens Axboe struct throtl_qnode qnode_on_self[2]; 83*a7b36ee6SJens Axboe struct throtl_qnode qnode_on_parent[2]; 84*a7b36ee6SJens Axboe 85*a7b36ee6SJens Axboe /* 86*a7b36ee6SJens Axboe * Dispatch time in jiffies. This is the estimated time when group 87*a7b36ee6SJens Axboe * will unthrottle and is ready to dispatch more bio. It is used as 88*a7b36ee6SJens Axboe * key to sort active groups in service tree. 89*a7b36ee6SJens Axboe */ 90*a7b36ee6SJens Axboe unsigned long disptime; 91*a7b36ee6SJens Axboe 92*a7b36ee6SJens Axboe unsigned int flags; 93*a7b36ee6SJens Axboe 94*a7b36ee6SJens Axboe /* are there any throtl rules between this group and td? */ 95*a7b36ee6SJens Axboe bool has_rules[2]; 96*a7b36ee6SJens Axboe 97*a7b36ee6SJens Axboe /* internally used bytes per second rate limits */ 98*a7b36ee6SJens Axboe uint64_t bps[2][LIMIT_CNT]; 99*a7b36ee6SJens Axboe /* user configured bps limits */ 100*a7b36ee6SJens Axboe uint64_t bps_conf[2][LIMIT_CNT]; 101*a7b36ee6SJens Axboe 102*a7b36ee6SJens Axboe /* internally used IOPS limits */ 103*a7b36ee6SJens Axboe unsigned int iops[2][LIMIT_CNT]; 104*a7b36ee6SJens Axboe /* user configured IOPS limits */ 105*a7b36ee6SJens Axboe unsigned int iops_conf[2][LIMIT_CNT]; 106*a7b36ee6SJens Axboe 107*a7b36ee6SJens Axboe /* Number of bytes dispatched in current slice */ 108*a7b36ee6SJens Axboe uint64_t bytes_disp[2]; 109*a7b36ee6SJens Axboe /* Number of bio's dispatched in current slice */ 110*a7b36ee6SJens Axboe unsigned int io_disp[2]; 111*a7b36ee6SJens Axboe 112*a7b36ee6SJens Axboe unsigned long last_low_overflow_time[2]; 113*a7b36ee6SJens Axboe 114*a7b36ee6SJens Axboe uint64_t last_bytes_disp[2]; 115*a7b36ee6SJens Axboe unsigned int last_io_disp[2]; 116*a7b36ee6SJens Axboe 117*a7b36ee6SJens Axboe unsigned long last_check_time; 118*a7b36ee6SJens Axboe 119*a7b36ee6SJens Axboe unsigned long latency_target; /* us */ 120*a7b36ee6SJens Axboe unsigned long latency_target_conf; /* us */ 121*a7b36ee6SJens Axboe /* When did we start a new slice */ 122*a7b36ee6SJens Axboe unsigned long slice_start[2]; 123*a7b36ee6SJens Axboe unsigned long slice_end[2]; 124*a7b36ee6SJens Axboe 125*a7b36ee6SJens Axboe unsigned long last_finish_time; /* ns / 1024 */ 126*a7b36ee6SJens Axboe unsigned long checked_last_finish_time; /* ns / 1024 */ 127*a7b36ee6SJens Axboe unsigned long avg_idletime; /* ns / 1024 */ 128*a7b36ee6SJens Axboe unsigned long idletime_threshold; /* us */ 129*a7b36ee6SJens Axboe unsigned long idletime_threshold_conf; /* us */ 130*a7b36ee6SJens Axboe 131*a7b36ee6SJens Axboe unsigned int bio_cnt; /* total bios */ 132*a7b36ee6SJens Axboe unsigned int bad_bio_cnt; /* bios exceeding latency threshold */ 133*a7b36ee6SJens Axboe unsigned long bio_cnt_reset_time; 134*a7b36ee6SJens Axboe 135*a7b36ee6SJens Axboe atomic_t io_split_cnt[2]; 136*a7b36ee6SJens Axboe atomic_t last_io_split_cnt[2]; 137*a7b36ee6SJens Axboe 138*a7b36ee6SJens Axboe struct blkg_rwstat stat_bytes; 139*a7b36ee6SJens Axboe struct blkg_rwstat stat_ios; 140*a7b36ee6SJens Axboe }; 141*a7b36ee6SJens Axboe 142*a7b36ee6SJens Axboe extern struct blkcg_policy blkcg_policy_throtl; 143*a7b36ee6SJens Axboe 144*a7b36ee6SJens Axboe static inline struct throtl_grp *pd_to_tg(struct blkg_policy_data *pd) 145*a7b36ee6SJens Axboe { 146*a7b36ee6SJens Axboe return pd ? container_of(pd, struct throtl_grp, pd) : NULL; 147*a7b36ee6SJens Axboe } 148*a7b36ee6SJens Axboe 149*a7b36ee6SJens Axboe static inline struct throtl_grp *blkg_to_tg(struct blkcg_gq *blkg) 150*a7b36ee6SJens Axboe { 151*a7b36ee6SJens Axboe return pd_to_tg(blkg_to_pd(blkg, &blkcg_policy_throtl)); 152*a7b36ee6SJens Axboe } 153*a7b36ee6SJens Axboe 154*a7b36ee6SJens Axboe /* 155*a7b36ee6SJens Axboe * Internal throttling interface 156*a7b36ee6SJens Axboe */ 157*a7b36ee6SJens Axboe #ifndef CONFIG_BLK_DEV_THROTTLING 158*a7b36ee6SJens Axboe static inline int blk_throtl_init(struct request_queue *q) { return 0; } 159*a7b36ee6SJens Axboe static inline void blk_throtl_exit(struct request_queue *q) { } 160*a7b36ee6SJens Axboe static inline void blk_throtl_register_queue(struct request_queue *q) { } 161*a7b36ee6SJens Axboe static inline void blk_throtl_charge_bio_split(struct bio *bio) { } 162*a7b36ee6SJens Axboe static inline bool blk_throtl_bio(struct bio *bio) { return false; } 163*a7b36ee6SJens Axboe #else /* CONFIG_BLK_DEV_THROTTLING */ 164*a7b36ee6SJens Axboe int blk_throtl_init(struct request_queue *q); 165*a7b36ee6SJens Axboe void blk_throtl_exit(struct request_queue *q); 166*a7b36ee6SJens Axboe void blk_throtl_register_queue(struct request_queue *q); 167*a7b36ee6SJens Axboe void blk_throtl_charge_bio_split(struct bio *bio); 168*a7b36ee6SJens Axboe bool __blk_throtl_bio(struct bio *bio); 169*a7b36ee6SJens Axboe static inline bool blk_throtl_bio(struct bio *bio) 170*a7b36ee6SJens Axboe { 171*a7b36ee6SJens Axboe struct throtl_grp *tg = blkg_to_tg(bio->bi_blkg); 172*a7b36ee6SJens Axboe 173*a7b36ee6SJens Axboe if (bio_flagged(bio, BIO_THROTTLED)) 174*a7b36ee6SJens Axboe return false; 175*a7b36ee6SJens Axboe if (!tg->has_rules[bio_data_dir(bio)]) 176*a7b36ee6SJens Axboe return false; 177*a7b36ee6SJens Axboe 178*a7b36ee6SJens Axboe return __blk_throtl_bio(bio); 179*a7b36ee6SJens Axboe } 180*a7b36ee6SJens Axboe #endif /* CONFIG_BLK_DEV_THROTTLING */ 181*a7b36ee6SJens Axboe 182*a7b36ee6SJens Axboe #endif 183