xref: /openbmc/linux/block/blk-cgroup.h (revision 672fdcf0)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _BLK_CGROUP_PRIVATE_H
3 #define _BLK_CGROUP_PRIVATE_H
4 /*
5  * block cgroup private header
6  *
7  * Based on ideas and code from CFQ, CFS and BFQ:
8  * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
9  *
10  * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
11  *		      Paolo Valente <paolo.valente@unimore.it>
12  *
13  * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
14  * 	              Nauman Rafique <nauman@google.com>
15  */
16 
17 #include <linux/blk-cgroup.h>
18 
19 /* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */
20 #define BLKG_STAT_CPU_BATCH	(INT_MAX / 2)
21 
22 #ifdef CONFIG_BLK_CGROUP
23 
24 /*
25  * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a
26  * request_queue (q).  This is used by blkcg policies which need to track
27  * information per blkcg - q pair.
28  *
29  * There can be multiple active blkcg policies and each blkg:policy pair is
30  * represented by a blkg_policy_data which is allocated and freed by each
31  * policy's pd_alloc/free_fn() methods.  A policy can allocate private data
32  * area by allocating larger data structure which embeds blkg_policy_data
33  * at the beginning.
34  */
35 struct blkg_policy_data {
36 	/* the blkg and policy id this per-policy data belongs to */
37 	struct blkcg_gq			*blkg;
38 	int				plid;
39 };
40 
41 /*
42  * Policies that need to keep per-blkcg data which is independent from any
43  * request_queue associated to it should implement cpd_alloc/free_fn()
44  * methods.  A policy can allocate private data area by allocating larger
45  * data structure which embeds blkcg_policy_data at the beginning.
46  * cpd_init() is invoked to let each policy handle per-blkcg data.
47  */
48 struct blkcg_policy_data {
49 	/* the blkcg and policy id this per-policy data belongs to */
50 	struct blkcg			*blkcg;
51 	int				plid;
52 };
53 
54 typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp);
55 typedef void (blkcg_pol_init_cpd_fn)(struct blkcg_policy_data *cpd);
56 typedef void (blkcg_pol_free_cpd_fn)(struct blkcg_policy_data *cpd);
57 typedef void (blkcg_pol_bind_cpd_fn)(struct blkcg_policy_data *cpd);
58 typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(gfp_t gfp,
59 				struct request_queue *q, struct blkcg *blkcg);
60 typedef void (blkcg_pol_init_pd_fn)(struct blkg_policy_data *pd);
61 typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd);
62 typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd);
63 typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd);
64 typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd);
65 typedef bool (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd,
66 				struct seq_file *s);
67 
68 struct blkcg_policy {
69 	int				plid;
70 	/* cgroup files for the policy */
71 	struct cftype			*dfl_cftypes;
72 	struct cftype			*legacy_cftypes;
73 
74 	/* operations */
75 	blkcg_pol_alloc_cpd_fn		*cpd_alloc_fn;
76 	blkcg_pol_init_cpd_fn		*cpd_init_fn;
77 	blkcg_pol_free_cpd_fn		*cpd_free_fn;
78 	blkcg_pol_bind_cpd_fn		*cpd_bind_fn;
79 
80 	blkcg_pol_alloc_pd_fn		*pd_alloc_fn;
81 	blkcg_pol_init_pd_fn		*pd_init_fn;
82 	blkcg_pol_online_pd_fn		*pd_online_fn;
83 	blkcg_pol_offline_pd_fn		*pd_offline_fn;
84 	blkcg_pol_free_pd_fn		*pd_free_fn;
85 	blkcg_pol_reset_pd_stats_fn	*pd_reset_stats_fn;
86 	blkcg_pol_stat_pd_fn		*pd_stat_fn;
87 };
88 
89 extern struct blkcg blkcg_root;
90 extern bool blkcg_debug_stats;
91 
92 struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
93 				      struct request_queue *q, bool update_hint);
94 int blkcg_init_queue(struct request_queue *q);
95 void blkcg_exit_queue(struct request_queue *q);
96 
97 /* Blkio controller policy registration */
98 int blkcg_policy_register(struct blkcg_policy *pol);
99 void blkcg_policy_unregister(struct blkcg_policy *pol);
100 int blkcg_activate_policy(struct request_queue *q,
101 			  const struct blkcg_policy *pol);
102 void blkcg_deactivate_policy(struct request_queue *q,
103 			     const struct blkcg_policy *pol);
104 
105 const char *blkg_dev_name(struct blkcg_gq *blkg);
106 void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
107 		       u64 (*prfill)(struct seq_file *,
108 				     struct blkg_policy_data *, int),
109 		       const struct blkcg_policy *pol, int data,
110 		       bool show_total);
111 u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v);
112 
113 struct blkg_conf_ctx {
114 	struct block_device		*bdev;
115 	struct blkcg_gq			*blkg;
116 	char				*body;
117 };
118 
119 struct block_device *blkcg_conf_open_bdev(char **inputp);
120 int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
121 		   char *input, struct blkg_conf_ctx *ctx);
122 void blkg_conf_finish(struct blkg_conf_ctx *ctx);
123 
124 /**
125  * blkcg_css - find the current css
126  *
127  * Find the css associated with either the kthread or the current task.
128  * This may return a dying css, so it is up to the caller to use tryget logic
129  * to confirm it is alive and well.
130  */
131 static inline struct cgroup_subsys_state *blkcg_css(void)
132 {
133 	struct cgroup_subsys_state *css;
134 
135 	css = kthread_blkcg();
136 	if (css)
137 		return css;
138 	return task_css(current, io_cgrp_id);
139 }
140 
141 /**
142  * __bio_blkcg - internal, inconsistent version to get blkcg
143  *
144  * DO NOT USE.
145  * This function is inconsistent and consequently is dangerous to use.  The
146  * first part of the function returns a blkcg where a reference is owned by the
147  * bio.  This means it does not need to be rcu protected as it cannot go away
148  * with the bio owning a reference to it.  However, the latter potentially gets
149  * it from task_css().  This can race against task migration and the cgroup
150  * dying.  It is also semantically different as it must be called rcu protected
151  * and is susceptible to failure when trying to get a reference to it.
152  * Therefore, it is not ok to assume that *_get() will always succeed on the
153  * blkcg returned here.
154  */
155 static inline struct blkcg *__bio_blkcg(struct bio *bio)
156 {
157 	if (bio && bio->bi_blkg)
158 		return bio->bi_blkg->blkcg;
159 	return css_to_blkcg(blkcg_css());
160 }
161 
162 /**
163  * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg
164  * @return: true if this bio needs to be submitted with the root blkg context.
165  *
166  * In order to avoid priority inversions we sometimes need to issue a bio as if
167  * it were attached to the root blkg, and then backcharge to the actual owning
168  * blkg.  The idea is we do bio_blkcg() to look up the actual context for the
169  * bio and attach the appropriate blkg to the bio.  Then we call this helper and
170  * if it is true run with the root blkg for that queue and then do any
171  * backcharging to the originating cgroup once the io is complete.
172  */
173 static inline bool bio_issue_as_root_blkg(struct bio *bio)
174 {
175 	return (bio->bi_opf & (REQ_META | REQ_SWAP)) != 0;
176 }
177 
178 /**
179  * __blkg_lookup - internal version of blkg_lookup()
180  * @blkcg: blkcg of interest
181  * @q: request_queue of interest
182  * @update_hint: whether to update lookup hint with the result or not
183  *
184  * This is internal version and shouldn't be used by policy
185  * implementations.  Looks up blkgs for the @blkcg - @q pair regardless of
186  * @q's bypass state.  If @update_hint is %true, the caller should be
187  * holding @q->queue_lock and lookup hint is updated on success.
188  */
189 static inline struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
190 					     struct request_queue *q,
191 					     bool update_hint)
192 {
193 	struct blkcg_gq *blkg;
194 
195 	if (blkcg == &blkcg_root)
196 		return q->root_blkg;
197 
198 	blkg = rcu_dereference(blkcg->blkg_hint);
199 	if (blkg && blkg->q == q)
200 		return blkg;
201 
202 	return blkg_lookup_slowpath(blkcg, q, update_hint);
203 }
204 
205 /**
206  * blkg_lookup - lookup blkg for the specified blkcg - q pair
207  * @blkcg: blkcg of interest
208  * @q: request_queue of interest
209  *
210  * Lookup blkg for the @blkcg - @q pair.  This function should be called
211  * under RCU read lock.
212  */
213 static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
214 					   struct request_queue *q)
215 {
216 	WARN_ON_ONCE(!rcu_read_lock_held());
217 	return __blkg_lookup(blkcg, q, false);
218 }
219 
220 /**
221  * blk_queue_root_blkg - return blkg for the (blkcg_root, @q) pair
222  * @q: request_queue of interest
223  *
224  * Lookup blkg for @q at the root level. See also blkg_lookup().
225  */
226 static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
227 {
228 	return q->root_blkg;
229 }
230 
231 /**
232  * blkg_to_pdata - get policy private data
233  * @blkg: blkg of interest
234  * @pol: policy of interest
235  *
236  * Return pointer to private data associated with the @blkg-@pol pair.
237  */
238 static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
239 						  struct blkcg_policy *pol)
240 {
241 	return blkg ? blkg->pd[pol->plid] : NULL;
242 }
243 
244 static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg,
245 						     struct blkcg_policy *pol)
246 {
247 	return blkcg ? blkcg->cpd[pol->plid] : NULL;
248 }
249 
250 /**
251  * pdata_to_blkg - get blkg associated with policy private data
252  * @pd: policy private data of interest
253  *
254  * @pd is policy private data.  Determine the blkg it's associated with.
255  */
256 static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd)
257 {
258 	return pd ? pd->blkg : NULL;
259 }
260 
261 static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd)
262 {
263 	return cpd ? cpd->blkcg : NULL;
264 }
265 
266 /**
267  * blkg_path - format cgroup path of blkg
268  * @blkg: blkg of interest
269  * @buf: target buffer
270  * @buflen: target buffer length
271  *
272  * Format the path of the cgroup of @blkg into @buf.
273  */
274 static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
275 {
276 	return cgroup_path(blkg->blkcg->css.cgroup, buf, buflen);
277 }
278 
279 /**
280  * blkg_get - get a blkg reference
281  * @blkg: blkg to get
282  *
283  * The caller should be holding an existing reference.
284  */
285 static inline void blkg_get(struct blkcg_gq *blkg)
286 {
287 	percpu_ref_get(&blkg->refcnt);
288 }
289 
290 /**
291  * blkg_tryget - try and get a blkg reference
292  * @blkg: blkg to get
293  *
294  * This is for use when doing an RCU lookup of the blkg.  We may be in the midst
295  * of freeing this blkg, so we can only use it if the refcnt is not zero.
296  */
297 static inline bool blkg_tryget(struct blkcg_gq *blkg)
298 {
299 	return blkg && percpu_ref_tryget(&blkg->refcnt);
300 }
301 
302 /**
303  * blkg_put - put a blkg reference
304  * @blkg: blkg to put
305  */
306 static inline void blkg_put(struct blkcg_gq *blkg)
307 {
308 	percpu_ref_put(&blkg->refcnt);
309 }
310 
311 /**
312  * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants
313  * @d_blkg: loop cursor pointing to the current descendant
314  * @pos_css: used for iteration
315  * @p_blkg: target blkg to walk descendants of
316  *
317  * Walk @c_blkg through the descendants of @p_blkg.  Must be used with RCU
318  * read locked.  If called under either blkcg or queue lock, the iteration
319  * is guaranteed to include all and only online blkgs.  The caller may
320  * update @pos_css by calling css_rightmost_descendant() to skip subtree.
321  * @p_blkg is included in the iteration and the first node to be visited.
322  */
323 #define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg)		\
324 	css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css)	\
325 		if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css),	\
326 					      (p_blkg)->q, false)))
327 
328 /**
329  * blkg_for_each_descendant_post - post-order walk of a blkg's descendants
330  * @d_blkg: loop cursor pointing to the current descendant
331  * @pos_css: used for iteration
332  * @p_blkg: target blkg to walk descendants of
333  *
334  * Similar to blkg_for_each_descendant_pre() but performs post-order
335  * traversal instead.  Synchronization rules are the same.  @p_blkg is
336  * included in the iteration and the last node to be visited.
337  */
338 #define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg)		\
339 	css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css)	\
340 		if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css),	\
341 					      (p_blkg)->q, false)))
342 
343 bool __blkcg_punt_bio_submit(struct bio *bio);
344 
345 static inline bool blkcg_punt_bio_submit(struct bio *bio)
346 {
347 	if (bio->bi_opf & REQ_CGROUP_PUNT)
348 		return __blkcg_punt_bio_submit(bio);
349 	else
350 		return false;
351 }
352 
353 static inline void blkcg_bio_issue_init(struct bio *bio)
354 {
355 	bio_issue_init(&bio->bi_issue, bio_sectors(bio));
356 }
357 
358 static inline void blkcg_use_delay(struct blkcg_gq *blkg)
359 {
360 	if (WARN_ON_ONCE(atomic_read(&blkg->use_delay) < 0))
361 		return;
362 	if (atomic_add_return(1, &blkg->use_delay) == 1)
363 		atomic_inc(&blkg->blkcg->css.cgroup->congestion_count);
364 }
365 
366 static inline int blkcg_unuse_delay(struct blkcg_gq *blkg)
367 {
368 	int old = atomic_read(&blkg->use_delay);
369 
370 	if (WARN_ON_ONCE(old < 0))
371 		return 0;
372 	if (old == 0)
373 		return 0;
374 
375 	/*
376 	 * We do this song and dance because we can race with somebody else
377 	 * adding or removing delay.  If we just did an atomic_dec we'd end up
378 	 * negative and we'd already be in trouble.  We need to subtract 1 and
379 	 * then check to see if we were the last delay so we can drop the
380 	 * congestion count on the cgroup.
381 	 */
382 	while (old) {
383 		int cur = atomic_cmpxchg(&blkg->use_delay, old, old - 1);
384 		if (cur == old)
385 			break;
386 		old = cur;
387 	}
388 
389 	if (old == 0)
390 		return 0;
391 	if (old == 1)
392 		atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
393 	return 1;
394 }
395 
396 /**
397  * blkcg_set_delay - Enable allocator delay mechanism with the specified delay amount
398  * @blkg: target blkg
399  * @delay: delay duration in nsecs
400  *
401  * When enabled with this function, the delay is not decayed and must be
402  * explicitly cleared with blkcg_clear_delay(). Must not be mixed with
403  * blkcg_[un]use_delay() and blkcg_add_delay() usages.
404  */
405 static inline void blkcg_set_delay(struct blkcg_gq *blkg, u64 delay)
406 {
407 	int old = atomic_read(&blkg->use_delay);
408 
409 	/* We only want 1 person setting the congestion count for this blkg. */
410 	if (!old && atomic_cmpxchg(&blkg->use_delay, old, -1) == old)
411 		atomic_inc(&blkg->blkcg->css.cgroup->congestion_count);
412 
413 	atomic64_set(&blkg->delay_nsec, delay);
414 }
415 
416 /**
417  * blkcg_clear_delay - Disable allocator delay mechanism
418  * @blkg: target blkg
419  *
420  * Disable use_delay mechanism. See blkcg_set_delay().
421  */
422 static inline void blkcg_clear_delay(struct blkcg_gq *blkg)
423 {
424 	int old = atomic_read(&blkg->use_delay);
425 
426 	/* We only want 1 person clearing the congestion count for this blkg. */
427 	if (old && atomic_cmpxchg(&blkg->use_delay, old, 0) == old)
428 		atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
429 }
430 
431 void blk_cgroup_bio_start(struct bio *bio);
432 void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta);
433 #else	/* CONFIG_BLK_CGROUP */
434 
435 struct blkg_policy_data {
436 };
437 
438 struct blkcg_policy_data {
439 };
440 
441 struct blkcg_policy {
442 };
443 
444 #ifdef CONFIG_BLOCK
445 
446 static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
447 static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
448 { return NULL; }
449 static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
450 static inline void blkcg_exit_queue(struct request_queue *q) { }
451 static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; }
452 static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { }
453 static inline int blkcg_activate_policy(struct request_queue *q,
454 					const struct blkcg_policy *pol) { return 0; }
455 static inline void blkcg_deactivate_policy(struct request_queue *q,
456 					   const struct blkcg_policy *pol) { }
457 
458 static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; }
459 
460 static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
461 						  struct blkcg_policy *pol) { return NULL; }
462 static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; }
463 static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; }
464 static inline void blkg_get(struct blkcg_gq *blkg) { }
465 static inline void blkg_put(struct blkcg_gq *blkg) { }
466 
467 static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; }
468 static inline void blkcg_bio_issue_init(struct bio *bio) { }
469 static inline void blk_cgroup_bio_start(struct bio *bio) { }
470 
471 #define blk_queue_for_each_rl(rl, q)	\
472 	for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)
473 
474 #endif	/* CONFIG_BLOCK */
475 #endif	/* CONFIG_BLK_CGROUP */
476 
477 #endif /* _BLK_CGROUP_PRIVATE_H */
478