xref: /openbmc/linux/block/blk-cgroup.h (revision 2524a578)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _BLK_CGROUP_PRIVATE_H
3 #define _BLK_CGROUP_PRIVATE_H
4 /*
5  * block cgroup private header
6  *
7  * Based on ideas and code from CFQ, CFS and BFQ:
8  * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
9  *
10  * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
11  *		      Paolo Valente <paolo.valente@unimore.it>
12  *
13  * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
14  * 	              Nauman Rafique <nauman@google.com>
15  */
16 
17 #include <linux/blk-cgroup.h>
18 #include <linux/blk-mq.h>
19 
20 /* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */
21 #define BLKG_STAT_CPU_BATCH	(INT_MAX / 2)
22 
23 #ifdef CONFIG_BLK_CGROUP
24 
25 /*
26  * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a
27  * request_queue (q).  This is used by blkcg policies which need to track
28  * information per blkcg - q pair.
29  *
30  * There can be multiple active blkcg policies and each blkg:policy pair is
31  * represented by a blkg_policy_data which is allocated and freed by each
32  * policy's pd_alloc/free_fn() methods.  A policy can allocate private data
33  * area by allocating larger data structure which embeds blkg_policy_data
34  * at the beginning.
35  */
36 struct blkg_policy_data {
37 	/* the blkg and policy id this per-policy data belongs to */
38 	struct blkcg_gq			*blkg;
39 	int				plid;
40 };
41 
42 /*
43  * Policies that need to keep per-blkcg data which is independent from any
44  * request_queue associated to it should implement cpd_alloc/free_fn()
45  * methods.  A policy can allocate private data area by allocating larger
46  * data structure which embeds blkcg_policy_data at the beginning.
47  * cpd_init() is invoked to let each policy handle per-blkcg data.
48  */
49 struct blkcg_policy_data {
50 	/* the blkcg and policy id this per-policy data belongs to */
51 	struct blkcg			*blkcg;
52 	int				plid;
53 };
54 
55 typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp);
56 typedef void (blkcg_pol_init_cpd_fn)(struct blkcg_policy_data *cpd);
57 typedef void (blkcg_pol_free_cpd_fn)(struct blkcg_policy_data *cpd);
58 typedef void (blkcg_pol_bind_cpd_fn)(struct blkcg_policy_data *cpd);
59 typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(gfp_t gfp,
60 				struct request_queue *q, struct blkcg *blkcg);
61 typedef void (blkcg_pol_init_pd_fn)(struct blkg_policy_data *pd);
62 typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd);
63 typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd);
64 typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd);
65 typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd);
66 typedef bool (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd,
67 				struct seq_file *s);
68 
69 struct blkcg_policy {
70 	int				plid;
71 	/* cgroup files for the policy */
72 	struct cftype			*dfl_cftypes;
73 	struct cftype			*legacy_cftypes;
74 
75 	/* operations */
76 	blkcg_pol_alloc_cpd_fn		*cpd_alloc_fn;
77 	blkcg_pol_init_cpd_fn		*cpd_init_fn;
78 	blkcg_pol_free_cpd_fn		*cpd_free_fn;
79 	blkcg_pol_bind_cpd_fn		*cpd_bind_fn;
80 
81 	blkcg_pol_alloc_pd_fn		*pd_alloc_fn;
82 	blkcg_pol_init_pd_fn		*pd_init_fn;
83 	blkcg_pol_online_pd_fn		*pd_online_fn;
84 	blkcg_pol_offline_pd_fn		*pd_offline_fn;
85 	blkcg_pol_free_pd_fn		*pd_free_fn;
86 	blkcg_pol_reset_pd_stats_fn	*pd_reset_stats_fn;
87 	blkcg_pol_stat_pd_fn		*pd_stat_fn;
88 };
89 
90 extern struct blkcg blkcg_root;
91 extern bool blkcg_debug_stats;
92 
93 struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
94 				      struct request_queue *q, bool update_hint);
95 int blkcg_init_queue(struct request_queue *q);
96 void blkcg_exit_queue(struct request_queue *q);
97 
98 /* Blkio controller policy registration */
99 int blkcg_policy_register(struct blkcg_policy *pol);
100 void blkcg_policy_unregister(struct blkcg_policy *pol);
101 int blkcg_activate_policy(struct request_queue *q,
102 			  const struct blkcg_policy *pol);
103 void blkcg_deactivate_policy(struct request_queue *q,
104 			     const struct blkcg_policy *pol);
105 
106 const char *blkg_dev_name(struct blkcg_gq *blkg);
107 void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
108 		       u64 (*prfill)(struct seq_file *,
109 				     struct blkg_policy_data *, int),
110 		       const struct blkcg_policy *pol, int data,
111 		       bool show_total);
112 u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v);
113 
114 struct blkg_conf_ctx {
115 	struct block_device		*bdev;
116 	struct blkcg_gq			*blkg;
117 	char				*body;
118 };
119 
120 struct block_device *blkcg_conf_open_bdev(char **inputp);
121 int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
122 		   char *input, struct blkg_conf_ctx *ctx);
123 void blkg_conf_finish(struct blkg_conf_ctx *ctx);
124 
125 /**
126  * blkcg_css - find the current css
127  *
128  * Find the css associated with either the kthread or the current task.
129  * This may return a dying css, so it is up to the caller to use tryget logic
130  * to confirm it is alive and well.
131  */
132 static inline struct cgroup_subsys_state *blkcg_css(void)
133 {
134 	struct cgroup_subsys_state *css;
135 
136 	css = kthread_blkcg();
137 	if (css)
138 		return css;
139 	return task_css(current, io_cgrp_id);
140 }
141 
142 /**
143  * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg
144  * @return: true if this bio needs to be submitted with the root blkg context.
145  *
146  * In order to avoid priority inversions we sometimes need to issue a bio as if
147  * it were attached to the root blkg, and then backcharge to the actual owning
148  * blkg.  The idea is we do bio_blkcg() to look up the actual context for the
149  * bio and attach the appropriate blkg to the bio.  Then we call this helper and
150  * if it is true run with the root blkg for that queue and then do any
151  * backcharging to the originating cgroup once the io is complete.
152  */
153 static inline bool bio_issue_as_root_blkg(struct bio *bio)
154 {
155 	return (bio->bi_opf & (REQ_META | REQ_SWAP)) != 0;
156 }
157 
158 /**
159  * __blkg_lookup - internal version of blkg_lookup()
160  * @blkcg: blkcg of interest
161  * @q: request_queue of interest
162  * @update_hint: whether to update lookup hint with the result or not
163  *
164  * This is internal version and shouldn't be used by policy
165  * implementations.  Looks up blkgs for the @blkcg - @q pair regardless of
166  * @q's bypass state.  If @update_hint is %true, the caller should be
167  * holding @q->queue_lock and lookup hint is updated on success.
168  */
169 static inline struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
170 					     struct request_queue *q,
171 					     bool update_hint)
172 {
173 	struct blkcg_gq *blkg;
174 
175 	if (blkcg == &blkcg_root)
176 		return q->root_blkg;
177 
178 	blkg = rcu_dereference(blkcg->blkg_hint);
179 	if (blkg && blkg->q == q)
180 		return blkg;
181 
182 	return blkg_lookup_slowpath(blkcg, q, update_hint);
183 }
184 
185 /**
186  * blkg_lookup - lookup blkg for the specified blkcg - q pair
187  * @blkcg: blkcg of interest
188  * @q: request_queue of interest
189  *
190  * Lookup blkg for the @blkcg - @q pair.  This function should be called
191  * under RCU read lock.
192  */
193 static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
194 					   struct request_queue *q)
195 {
196 	WARN_ON_ONCE(!rcu_read_lock_held());
197 	return __blkg_lookup(blkcg, q, false);
198 }
199 
200 /**
201  * blk_queue_root_blkg - return blkg for the (blkcg_root, @q) pair
202  * @q: request_queue of interest
203  *
204  * Lookup blkg for @q at the root level. See also blkg_lookup().
205  */
206 static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
207 {
208 	return q->root_blkg;
209 }
210 
211 /**
212  * blkg_to_pdata - get policy private data
213  * @blkg: blkg of interest
214  * @pol: policy of interest
215  *
216  * Return pointer to private data associated with the @blkg-@pol pair.
217  */
218 static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
219 						  struct blkcg_policy *pol)
220 {
221 	return blkg ? blkg->pd[pol->plid] : NULL;
222 }
223 
224 static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg,
225 						     struct blkcg_policy *pol)
226 {
227 	return blkcg ? blkcg->cpd[pol->plid] : NULL;
228 }
229 
230 /**
231  * pdata_to_blkg - get blkg associated with policy private data
232  * @pd: policy private data of interest
233  *
234  * @pd is policy private data.  Determine the blkg it's associated with.
235  */
236 static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd)
237 {
238 	return pd ? pd->blkg : NULL;
239 }
240 
241 static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd)
242 {
243 	return cpd ? cpd->blkcg : NULL;
244 }
245 
246 /**
247  * blkg_path - format cgroup path of blkg
248  * @blkg: blkg of interest
249  * @buf: target buffer
250  * @buflen: target buffer length
251  *
252  * Format the path of the cgroup of @blkg into @buf.
253  */
254 static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
255 {
256 	return cgroup_path(blkg->blkcg->css.cgroup, buf, buflen);
257 }
258 
259 /**
260  * blkg_get - get a blkg reference
261  * @blkg: blkg to get
262  *
263  * The caller should be holding an existing reference.
264  */
265 static inline void blkg_get(struct blkcg_gq *blkg)
266 {
267 	percpu_ref_get(&blkg->refcnt);
268 }
269 
270 /**
271  * blkg_tryget - try and get a blkg reference
272  * @blkg: blkg to get
273  *
274  * This is for use when doing an RCU lookup of the blkg.  We may be in the midst
275  * of freeing this blkg, so we can only use it if the refcnt is not zero.
276  */
277 static inline bool blkg_tryget(struct blkcg_gq *blkg)
278 {
279 	return blkg && percpu_ref_tryget(&blkg->refcnt);
280 }
281 
282 /**
283  * blkg_put - put a blkg reference
284  * @blkg: blkg to put
285  */
286 static inline void blkg_put(struct blkcg_gq *blkg)
287 {
288 	percpu_ref_put(&blkg->refcnt);
289 }
290 
291 /**
292  * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants
293  * @d_blkg: loop cursor pointing to the current descendant
294  * @pos_css: used for iteration
295  * @p_blkg: target blkg to walk descendants of
296  *
297  * Walk @c_blkg through the descendants of @p_blkg.  Must be used with RCU
298  * read locked.  If called under either blkcg or queue lock, the iteration
299  * is guaranteed to include all and only online blkgs.  The caller may
300  * update @pos_css by calling css_rightmost_descendant() to skip subtree.
301  * @p_blkg is included in the iteration and the first node to be visited.
302  */
303 #define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg)		\
304 	css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css)	\
305 		if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css),	\
306 					      (p_blkg)->q, false)))
307 
308 /**
309  * blkg_for_each_descendant_post - post-order walk of a blkg's descendants
310  * @d_blkg: loop cursor pointing to the current descendant
311  * @pos_css: used for iteration
312  * @p_blkg: target blkg to walk descendants of
313  *
314  * Similar to blkg_for_each_descendant_pre() but performs post-order
315  * traversal instead.  Synchronization rules are the same.  @p_blkg is
316  * included in the iteration and the last node to be visited.
317  */
318 #define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg)		\
319 	css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css)	\
320 		if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css),	\
321 					      (p_blkg)->q, false)))
322 
323 bool __blkcg_punt_bio_submit(struct bio *bio);
324 
325 static inline bool blkcg_punt_bio_submit(struct bio *bio)
326 {
327 	if (bio->bi_opf & REQ_CGROUP_PUNT)
328 		return __blkcg_punt_bio_submit(bio);
329 	else
330 		return false;
331 }
332 
333 static inline void blkcg_bio_issue_init(struct bio *bio)
334 {
335 	bio_issue_init(&bio->bi_issue, bio_sectors(bio));
336 }
337 
338 static inline void blkcg_use_delay(struct blkcg_gq *blkg)
339 {
340 	if (WARN_ON_ONCE(atomic_read(&blkg->use_delay) < 0))
341 		return;
342 	if (atomic_add_return(1, &blkg->use_delay) == 1)
343 		atomic_inc(&blkg->blkcg->css.cgroup->congestion_count);
344 }
345 
346 static inline int blkcg_unuse_delay(struct blkcg_gq *blkg)
347 {
348 	int old = atomic_read(&blkg->use_delay);
349 
350 	if (WARN_ON_ONCE(old < 0))
351 		return 0;
352 	if (old == 0)
353 		return 0;
354 
355 	/*
356 	 * We do this song and dance because we can race with somebody else
357 	 * adding or removing delay.  If we just did an atomic_dec we'd end up
358 	 * negative and we'd already be in trouble.  We need to subtract 1 and
359 	 * then check to see if we were the last delay so we can drop the
360 	 * congestion count on the cgroup.
361 	 */
362 	while (old) {
363 		int cur = atomic_cmpxchg(&blkg->use_delay, old, old - 1);
364 		if (cur == old)
365 			break;
366 		old = cur;
367 	}
368 
369 	if (old == 0)
370 		return 0;
371 	if (old == 1)
372 		atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
373 	return 1;
374 }
375 
376 /**
377  * blkcg_set_delay - Enable allocator delay mechanism with the specified delay amount
378  * @blkg: target blkg
379  * @delay: delay duration in nsecs
380  *
381  * When enabled with this function, the delay is not decayed and must be
382  * explicitly cleared with blkcg_clear_delay(). Must not be mixed with
383  * blkcg_[un]use_delay() and blkcg_add_delay() usages.
384  */
385 static inline void blkcg_set_delay(struct blkcg_gq *blkg, u64 delay)
386 {
387 	int old = atomic_read(&blkg->use_delay);
388 
389 	/* We only want 1 person setting the congestion count for this blkg. */
390 	if (!old && atomic_cmpxchg(&blkg->use_delay, old, -1) == old)
391 		atomic_inc(&blkg->blkcg->css.cgroup->congestion_count);
392 
393 	atomic64_set(&blkg->delay_nsec, delay);
394 }
395 
396 /**
397  * blkcg_clear_delay - Disable allocator delay mechanism
398  * @blkg: target blkg
399  *
400  * Disable use_delay mechanism. See blkcg_set_delay().
401  */
402 static inline void blkcg_clear_delay(struct blkcg_gq *blkg)
403 {
404 	int old = atomic_read(&blkg->use_delay);
405 
406 	/* We only want 1 person clearing the congestion count for this blkg. */
407 	if (old && atomic_cmpxchg(&blkg->use_delay, old, 0) == old)
408 		atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
409 }
410 
411 /**
412  * blk_cgroup_mergeable - Determine whether to allow or disallow merges
413  * @rq: request to merge into
414  * @bio: bio to merge
415  *
416  * @bio and @rq should belong to the same cgroup and their issue_as_root should
417  * match. The latter is necessary as we don't want to throttle e.g. a metadata
418  * update because it happens to be next to a regular IO.
419  */
420 static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio)
421 {
422 	return rq->bio->bi_blkg == bio->bi_blkg &&
423 		bio_issue_as_root_blkg(rq->bio) == bio_issue_as_root_blkg(bio);
424 }
425 
426 void blk_cgroup_bio_start(struct bio *bio);
427 void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta);
428 #else	/* CONFIG_BLK_CGROUP */
429 
430 struct blkg_policy_data {
431 };
432 
433 struct blkcg_policy_data {
434 };
435 
436 struct blkcg_policy {
437 };
438 
439 #ifdef CONFIG_BLOCK
440 
441 static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
442 static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
443 { return NULL; }
444 static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
445 static inline void blkcg_exit_queue(struct request_queue *q) { }
446 static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; }
447 static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { }
448 static inline int blkcg_activate_policy(struct request_queue *q,
449 					const struct blkcg_policy *pol) { return 0; }
450 static inline void blkcg_deactivate_policy(struct request_queue *q,
451 					   const struct blkcg_policy *pol) { }
452 
453 static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
454 						  struct blkcg_policy *pol) { return NULL; }
455 static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; }
456 static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; }
457 static inline void blkg_get(struct blkcg_gq *blkg) { }
458 static inline void blkg_put(struct blkcg_gq *blkg) { }
459 
460 static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; }
461 static inline void blkcg_bio_issue_init(struct bio *bio) { }
462 static inline void blk_cgroup_bio_start(struct bio *bio) { }
463 static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) { return true; }
464 
465 #define blk_queue_for_each_rl(rl, q)	\
466 	for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)
467 
468 #endif	/* CONFIG_BLOCK */
469 #endif	/* CONFIG_BLK_CGROUP */
470 
471 #endif /* _BLK_CGROUP_PRIVATE_H */
472