1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef _BLK_CGROUP_PRIVATE_H 3 #define _BLK_CGROUP_PRIVATE_H 4 /* 5 * block cgroup private header 6 * 7 * Based on ideas and code from CFQ, CFS and BFQ: 8 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> 9 * 10 * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it> 11 * Paolo Valente <paolo.valente@unimore.it> 12 * 13 * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com> 14 * Nauman Rafique <nauman@google.com> 15 */ 16 17 #include <linux/blk-cgroup.h> 18 19 /* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */ 20 #define BLKG_STAT_CPU_BATCH (INT_MAX / 2) 21 22 #ifdef CONFIG_BLK_CGROUP 23 24 /* 25 * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a 26 * request_queue (q). This is used by blkcg policies which need to track 27 * information per blkcg - q pair. 28 * 29 * There can be multiple active blkcg policies and each blkg:policy pair is 30 * represented by a blkg_policy_data which is allocated and freed by each 31 * policy's pd_alloc/free_fn() methods. A policy can allocate private data 32 * area by allocating larger data structure which embeds blkg_policy_data 33 * at the beginning. 34 */ 35 struct blkg_policy_data { 36 /* the blkg and policy id this per-policy data belongs to */ 37 struct blkcg_gq *blkg; 38 int plid; 39 }; 40 41 /* 42 * Policies that need to keep per-blkcg data which is independent from any 43 * request_queue associated to it should implement cpd_alloc/free_fn() 44 * methods. A policy can allocate private data area by allocating larger 45 * data structure which embeds blkcg_policy_data at the beginning. 46 * cpd_init() is invoked to let each policy handle per-blkcg data. 47 */ 48 struct blkcg_policy_data { 49 /* the blkcg and policy id this per-policy data belongs to */ 50 struct blkcg *blkcg; 51 int plid; 52 }; 53 54 typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp); 55 typedef void (blkcg_pol_init_cpd_fn)(struct blkcg_policy_data *cpd); 56 typedef void (blkcg_pol_free_cpd_fn)(struct blkcg_policy_data *cpd); 57 typedef void (blkcg_pol_bind_cpd_fn)(struct blkcg_policy_data *cpd); 58 typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(gfp_t gfp, 59 struct request_queue *q, struct blkcg *blkcg); 60 typedef void (blkcg_pol_init_pd_fn)(struct blkg_policy_data *pd); 61 typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd); 62 typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd); 63 typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd); 64 typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd); 65 typedef bool (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd, 66 struct seq_file *s); 67 68 struct blkcg_policy { 69 int plid; 70 /* cgroup files for the policy */ 71 struct cftype *dfl_cftypes; 72 struct cftype *legacy_cftypes; 73 74 /* operations */ 75 blkcg_pol_alloc_cpd_fn *cpd_alloc_fn; 76 blkcg_pol_init_cpd_fn *cpd_init_fn; 77 blkcg_pol_free_cpd_fn *cpd_free_fn; 78 blkcg_pol_bind_cpd_fn *cpd_bind_fn; 79 80 blkcg_pol_alloc_pd_fn *pd_alloc_fn; 81 blkcg_pol_init_pd_fn *pd_init_fn; 82 blkcg_pol_online_pd_fn *pd_online_fn; 83 blkcg_pol_offline_pd_fn *pd_offline_fn; 84 blkcg_pol_free_pd_fn *pd_free_fn; 85 blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn; 86 blkcg_pol_stat_pd_fn *pd_stat_fn; 87 }; 88 89 extern struct blkcg blkcg_root; 90 extern bool blkcg_debug_stats; 91 92 struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg, 93 struct request_queue *q, bool update_hint); 94 int blkcg_init_queue(struct request_queue *q); 95 void blkcg_exit_queue(struct request_queue *q); 96 97 /* Blkio controller policy registration */ 98 int blkcg_policy_register(struct blkcg_policy *pol); 99 void blkcg_policy_unregister(struct blkcg_policy *pol); 100 int blkcg_activate_policy(struct request_queue *q, 101 const struct blkcg_policy *pol); 102 void blkcg_deactivate_policy(struct request_queue *q, 103 const struct blkcg_policy *pol); 104 105 const char *blkg_dev_name(struct blkcg_gq *blkg); 106 void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, 107 u64 (*prfill)(struct seq_file *, 108 struct blkg_policy_data *, int), 109 const struct blkcg_policy *pol, int data, 110 bool show_total); 111 u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v); 112 113 struct blkg_conf_ctx { 114 struct block_device *bdev; 115 struct blkcg_gq *blkg; 116 char *body; 117 }; 118 119 struct block_device *blkcg_conf_open_bdev(char **inputp); 120 int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, 121 char *input, struct blkg_conf_ctx *ctx); 122 void blkg_conf_finish(struct blkg_conf_ctx *ctx); 123 124 /** 125 * blkcg_css - find the current css 126 * 127 * Find the css associated with either the kthread or the current task. 128 * This may return a dying css, so it is up to the caller to use tryget logic 129 * to confirm it is alive and well. 130 */ 131 static inline struct cgroup_subsys_state *blkcg_css(void) 132 { 133 struct cgroup_subsys_state *css; 134 135 css = kthread_blkcg(); 136 if (css) 137 return css; 138 return task_css(current, io_cgrp_id); 139 } 140 141 /** 142 * __bio_blkcg - internal, inconsistent version to get blkcg 143 * 144 * DO NOT USE. 145 * This function is inconsistent and consequently is dangerous to use. The 146 * first part of the function returns a blkcg where a reference is owned by the 147 * bio. This means it does not need to be rcu protected as it cannot go away 148 * with the bio owning a reference to it. However, the latter potentially gets 149 * it from task_css(). This can race against task migration and the cgroup 150 * dying. It is also semantically different as it must be called rcu protected 151 * and is susceptible to failure when trying to get a reference to it. 152 * Therefore, it is not ok to assume that *_get() will always succeed on the 153 * blkcg returned here. 154 */ 155 static inline struct blkcg *__bio_blkcg(struct bio *bio) 156 { 157 if (bio && bio->bi_blkg) 158 return bio->bi_blkg->blkcg; 159 return css_to_blkcg(blkcg_css()); 160 } 161 162 /** 163 * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg 164 * @return: true if this bio needs to be submitted with the root blkg context. 165 * 166 * In order to avoid priority inversions we sometimes need to issue a bio as if 167 * it were attached to the root blkg, and then backcharge to the actual owning 168 * blkg. The idea is we do bio_blkcg() to look up the actual context for the 169 * bio and attach the appropriate blkg to the bio. Then we call this helper and 170 * if it is true run with the root blkg for that queue and then do any 171 * backcharging to the originating cgroup once the io is complete. 172 */ 173 static inline bool bio_issue_as_root_blkg(struct bio *bio) 174 { 175 return (bio->bi_opf & (REQ_META | REQ_SWAP)) != 0; 176 } 177 178 /** 179 * __blkg_lookup - internal version of blkg_lookup() 180 * @blkcg: blkcg of interest 181 * @q: request_queue of interest 182 * @update_hint: whether to update lookup hint with the result or not 183 * 184 * This is internal version and shouldn't be used by policy 185 * implementations. Looks up blkgs for the @blkcg - @q pair regardless of 186 * @q's bypass state. If @update_hint is %true, the caller should be 187 * holding @q->queue_lock and lookup hint is updated on success. 188 */ 189 static inline struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, 190 struct request_queue *q, 191 bool update_hint) 192 { 193 struct blkcg_gq *blkg; 194 195 if (blkcg == &blkcg_root) 196 return q->root_blkg; 197 198 blkg = rcu_dereference(blkcg->blkg_hint); 199 if (blkg && blkg->q == q) 200 return blkg; 201 202 return blkg_lookup_slowpath(blkcg, q, update_hint); 203 } 204 205 /** 206 * blkg_lookup - lookup blkg for the specified blkcg - q pair 207 * @blkcg: blkcg of interest 208 * @q: request_queue of interest 209 * 210 * Lookup blkg for the @blkcg - @q pair. This function should be called 211 * under RCU read lock. 212 */ 213 static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, 214 struct request_queue *q) 215 { 216 WARN_ON_ONCE(!rcu_read_lock_held()); 217 return __blkg_lookup(blkcg, q, false); 218 } 219 220 /** 221 * blk_queue_root_blkg - return blkg for the (blkcg_root, @q) pair 222 * @q: request_queue of interest 223 * 224 * Lookup blkg for @q at the root level. See also blkg_lookup(). 225 */ 226 static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q) 227 { 228 return q->root_blkg; 229 } 230 231 /** 232 * blkg_to_pdata - get policy private data 233 * @blkg: blkg of interest 234 * @pol: policy of interest 235 * 236 * Return pointer to private data associated with the @blkg-@pol pair. 237 */ 238 static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, 239 struct blkcg_policy *pol) 240 { 241 return blkg ? blkg->pd[pol->plid] : NULL; 242 } 243 244 static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg, 245 struct blkcg_policy *pol) 246 { 247 return blkcg ? blkcg->cpd[pol->plid] : NULL; 248 } 249 250 /** 251 * pdata_to_blkg - get blkg associated with policy private data 252 * @pd: policy private data of interest 253 * 254 * @pd is policy private data. Determine the blkg it's associated with. 255 */ 256 static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) 257 { 258 return pd ? pd->blkg : NULL; 259 } 260 261 static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd) 262 { 263 return cpd ? cpd->blkcg : NULL; 264 } 265 266 /** 267 * blkg_path - format cgroup path of blkg 268 * @blkg: blkg of interest 269 * @buf: target buffer 270 * @buflen: target buffer length 271 * 272 * Format the path of the cgroup of @blkg into @buf. 273 */ 274 static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen) 275 { 276 return cgroup_path(blkg->blkcg->css.cgroup, buf, buflen); 277 } 278 279 /** 280 * blkg_get - get a blkg reference 281 * @blkg: blkg to get 282 * 283 * The caller should be holding an existing reference. 284 */ 285 static inline void blkg_get(struct blkcg_gq *blkg) 286 { 287 percpu_ref_get(&blkg->refcnt); 288 } 289 290 /** 291 * blkg_tryget - try and get a blkg reference 292 * @blkg: blkg to get 293 * 294 * This is for use when doing an RCU lookup of the blkg. We may be in the midst 295 * of freeing this blkg, so we can only use it if the refcnt is not zero. 296 */ 297 static inline bool blkg_tryget(struct blkcg_gq *blkg) 298 { 299 return blkg && percpu_ref_tryget(&blkg->refcnt); 300 } 301 302 /** 303 * blkg_put - put a blkg reference 304 * @blkg: blkg to put 305 */ 306 static inline void blkg_put(struct blkcg_gq *blkg) 307 { 308 percpu_ref_put(&blkg->refcnt); 309 } 310 311 /** 312 * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants 313 * @d_blkg: loop cursor pointing to the current descendant 314 * @pos_css: used for iteration 315 * @p_blkg: target blkg to walk descendants of 316 * 317 * Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU 318 * read locked. If called under either blkcg or queue lock, the iteration 319 * is guaranteed to include all and only online blkgs. The caller may 320 * update @pos_css by calling css_rightmost_descendant() to skip subtree. 321 * @p_blkg is included in the iteration and the first node to be visited. 322 */ 323 #define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg) \ 324 css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css) \ 325 if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \ 326 (p_blkg)->q, false))) 327 328 /** 329 * blkg_for_each_descendant_post - post-order walk of a blkg's descendants 330 * @d_blkg: loop cursor pointing to the current descendant 331 * @pos_css: used for iteration 332 * @p_blkg: target blkg to walk descendants of 333 * 334 * Similar to blkg_for_each_descendant_pre() but performs post-order 335 * traversal instead. Synchronization rules are the same. @p_blkg is 336 * included in the iteration and the last node to be visited. 337 */ 338 #define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg) \ 339 css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css) \ 340 if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \ 341 (p_blkg)->q, false))) 342 343 bool __blkcg_punt_bio_submit(struct bio *bio); 344 345 static inline bool blkcg_punt_bio_submit(struct bio *bio) 346 { 347 if (bio->bi_opf & REQ_CGROUP_PUNT) 348 return __blkcg_punt_bio_submit(bio); 349 else 350 return false; 351 } 352 353 static inline void blkcg_bio_issue_init(struct bio *bio) 354 { 355 bio_issue_init(&bio->bi_issue, bio_sectors(bio)); 356 } 357 358 static inline void blkcg_use_delay(struct blkcg_gq *blkg) 359 { 360 if (WARN_ON_ONCE(atomic_read(&blkg->use_delay) < 0)) 361 return; 362 if (atomic_add_return(1, &blkg->use_delay) == 1) 363 atomic_inc(&blkg->blkcg->css.cgroup->congestion_count); 364 } 365 366 static inline int blkcg_unuse_delay(struct blkcg_gq *blkg) 367 { 368 int old = atomic_read(&blkg->use_delay); 369 370 if (WARN_ON_ONCE(old < 0)) 371 return 0; 372 if (old == 0) 373 return 0; 374 375 /* 376 * We do this song and dance because we can race with somebody else 377 * adding or removing delay. If we just did an atomic_dec we'd end up 378 * negative and we'd already be in trouble. We need to subtract 1 and 379 * then check to see if we were the last delay so we can drop the 380 * congestion count on the cgroup. 381 */ 382 while (old) { 383 int cur = atomic_cmpxchg(&blkg->use_delay, old, old - 1); 384 if (cur == old) 385 break; 386 old = cur; 387 } 388 389 if (old == 0) 390 return 0; 391 if (old == 1) 392 atomic_dec(&blkg->blkcg->css.cgroup->congestion_count); 393 return 1; 394 } 395 396 /** 397 * blkcg_set_delay - Enable allocator delay mechanism with the specified delay amount 398 * @blkg: target blkg 399 * @delay: delay duration in nsecs 400 * 401 * When enabled with this function, the delay is not decayed and must be 402 * explicitly cleared with blkcg_clear_delay(). Must not be mixed with 403 * blkcg_[un]use_delay() and blkcg_add_delay() usages. 404 */ 405 static inline void blkcg_set_delay(struct blkcg_gq *blkg, u64 delay) 406 { 407 int old = atomic_read(&blkg->use_delay); 408 409 /* We only want 1 person setting the congestion count for this blkg. */ 410 if (!old && atomic_cmpxchg(&blkg->use_delay, old, -1) == old) 411 atomic_inc(&blkg->blkcg->css.cgroup->congestion_count); 412 413 atomic64_set(&blkg->delay_nsec, delay); 414 } 415 416 /** 417 * blkcg_clear_delay - Disable allocator delay mechanism 418 * @blkg: target blkg 419 * 420 * Disable use_delay mechanism. See blkcg_set_delay(). 421 */ 422 static inline void blkcg_clear_delay(struct blkcg_gq *blkg) 423 { 424 int old = atomic_read(&blkg->use_delay); 425 426 /* We only want 1 person clearing the congestion count for this blkg. */ 427 if (old && atomic_cmpxchg(&blkg->use_delay, old, 0) == old) 428 atomic_dec(&blkg->blkcg->css.cgroup->congestion_count); 429 } 430 431 void blk_cgroup_bio_start(struct bio *bio); 432 void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta); 433 #else /* CONFIG_BLK_CGROUP */ 434 435 struct blkg_policy_data { 436 }; 437 438 struct blkcg_policy_data { 439 }; 440 441 struct blkcg_policy { 442 }; 443 444 #ifdef CONFIG_BLOCK 445 446 static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; } 447 static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q) 448 { return NULL; } 449 static inline int blkcg_init_queue(struct request_queue *q) { return 0; } 450 static inline void blkcg_exit_queue(struct request_queue *q) { } 451 static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; } 452 static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { } 453 static inline int blkcg_activate_policy(struct request_queue *q, 454 const struct blkcg_policy *pol) { return 0; } 455 static inline void blkcg_deactivate_policy(struct request_queue *q, 456 const struct blkcg_policy *pol) { } 457 458 static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; } 459 460 static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, 461 struct blkcg_policy *pol) { return NULL; } 462 static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; } 463 static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; } 464 static inline void blkg_get(struct blkcg_gq *blkg) { } 465 static inline void blkg_put(struct blkcg_gq *blkg) { } 466 467 static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; } 468 static inline void blkcg_bio_issue_init(struct bio *bio) { } 469 static inline void blk_cgroup_bio_start(struct bio *bio) { } 470 471 #define blk_queue_for_each_rl(rl, q) \ 472 for ((rl) = &(q)->root_rl; (rl); (rl) = NULL) 473 474 #endif /* CONFIG_BLOCK */ 475 #endif /* CONFIG_BLK_CGROUP */ 476 477 #endif /* _BLK_CGROUP_PRIVATE_H */ 478