xref: /openbmc/linux/block/ioprio.c (revision f5c27da4)
1  // SPDX-License-Identifier: GPL-2.0
2  /*
3   * fs/ioprio.c
4   *
5   * Copyright (C) 2004 Jens Axboe <axboe@kernel.dk>
6   *
7   * Helper functions for setting/querying io priorities of processes. The
8   * system calls closely mimmick getpriority/setpriority, see the man page for
9   * those. The prio argument is a composite of prio class and prio data, where
10   * the data argument has meaning within that class. The standard scheduling
11   * classes have 8 distinct prio levels, with 0 being the highest prio and 7
12   * being the lowest.
13   *
14   * IOW, setting BE scheduling class with prio 2 is done ala:
15   *
16   * unsigned int prio = (IOPRIO_CLASS_BE << IOPRIO_CLASS_SHIFT) | 2;
17   *
18   * ioprio_set(PRIO_PROCESS, pid, prio);
19   *
20   * See also Documentation/block/ioprio.rst
21   *
22   */
23  #include <linux/gfp.h>
24  #include <linux/kernel.h>
25  #include <linux/ioprio.h>
26  #include <linux/cred.h>
27  #include <linux/blkdev.h>
28  #include <linux/capability.h>
29  #include <linux/syscalls.h>
30  #include <linux/security.h>
31  #include <linux/pid_namespace.h>
32  
33  int ioprio_check_cap(int ioprio)
34  {
35  	int class = IOPRIO_PRIO_CLASS(ioprio);
36  	int data = IOPRIO_PRIO_DATA(ioprio);
37  
38  	switch (class) {
39  		case IOPRIO_CLASS_RT:
40  			/*
41  			 * Originally this only checked for CAP_SYS_ADMIN,
42  			 * which was implicitly allowed for pid 0 by security
43  			 * modules such as SELinux. Make sure we check
44  			 * CAP_SYS_ADMIN first to avoid a denial/avc for
45  			 * possibly missing CAP_SYS_NICE permission.
46  			 */
47  			if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_NICE))
48  				return -EPERM;
49  			fallthrough;
50  			/* rt has prio field too */
51  		case IOPRIO_CLASS_BE:
52  			if (data >= IOPRIO_NR_LEVELS || data < 0)
53  				return -EINVAL;
54  			break;
55  		case IOPRIO_CLASS_IDLE:
56  			break;
57  		case IOPRIO_CLASS_NONE:
58  			if (data)
59  				return -EINVAL;
60  			break;
61  		default:
62  			return -EINVAL;
63  	}
64  
65  	return 0;
66  }
67  
68  SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
69  {
70  	struct task_struct *p, *g;
71  	struct user_struct *user;
72  	struct pid *pgrp;
73  	kuid_t uid;
74  	int ret;
75  
76  	ret = ioprio_check_cap(ioprio);
77  	if (ret)
78  		return ret;
79  
80  	ret = -ESRCH;
81  	rcu_read_lock();
82  	switch (which) {
83  		case IOPRIO_WHO_PROCESS:
84  			if (!who)
85  				p = current;
86  			else
87  				p = find_task_by_vpid(who);
88  			if (p)
89  				ret = set_task_ioprio(p, ioprio);
90  			break;
91  		case IOPRIO_WHO_PGRP:
92  			if (!who)
93  				pgrp = task_pgrp(current);
94  			else
95  				pgrp = find_vpid(who);
96  
97  			read_lock(&tasklist_lock);
98  			do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
99  				ret = set_task_ioprio(p, ioprio);
100  				if (ret) {
101  					read_unlock(&tasklist_lock);
102  					goto out;
103  				}
104  			} while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
105  			read_unlock(&tasklist_lock);
106  
107  			break;
108  		case IOPRIO_WHO_USER:
109  			uid = make_kuid(current_user_ns(), who);
110  			if (!uid_valid(uid))
111  				break;
112  			if (!who)
113  				user = current_user();
114  			else
115  				user = find_user(uid);
116  
117  			if (!user)
118  				break;
119  
120  			for_each_process_thread(g, p) {
121  				if (!uid_eq(task_uid(p), uid) ||
122  				    !task_pid_vnr(p))
123  					continue;
124  				ret = set_task_ioprio(p, ioprio);
125  				if (ret)
126  					goto free_uid;
127  			}
128  free_uid:
129  			if (who)
130  				free_uid(user);
131  			break;
132  		default:
133  			ret = -EINVAL;
134  	}
135  
136  out:
137  	rcu_read_unlock();
138  	return ret;
139  }
140  
141  /*
142   * If the task has set an I/O priority, use that. Otherwise, return
143   * the default I/O priority.
144   *
145   * Expected to be called for current task or with task_lock() held to keep
146   * io_context stable.
147   */
148  int __get_task_ioprio(struct task_struct *p)
149  {
150  	struct io_context *ioc = p->io_context;
151  	int prio;
152  
153  	if (p != current)
154  		lockdep_assert_held(&p->alloc_lock);
155  	if (ioc)
156  		prio = ioc->ioprio;
157  	else
158  		prio = IOPRIO_DEFAULT;
159  
160  	if (IOPRIO_PRIO_CLASS(prio) == IOPRIO_CLASS_NONE)
161  		prio = IOPRIO_PRIO_VALUE(task_nice_ioclass(p),
162  					 task_nice_ioprio(p));
163  	return prio;
164  }
165  EXPORT_SYMBOL_GPL(__get_task_ioprio);
166  
167  static int get_task_ioprio(struct task_struct *p)
168  {
169  	int ret;
170  
171  	ret = security_task_getioprio(p);
172  	if (ret)
173  		goto out;
174  	task_lock(p);
175  	ret = __get_task_ioprio(p);
176  	task_unlock(p);
177  out:
178  	return ret;
179  }
180  
181  /*
182   * Return raw IO priority value as set by userspace. We use this for
183   * ioprio_get(pid, IOPRIO_WHO_PROCESS) so that we keep historical behavior and
184   * also so that userspace can distinguish unset IO priority (which just gets
185   * overriden based on task's nice value) from IO priority set to some value.
186   */
187  static int get_task_raw_ioprio(struct task_struct *p)
188  {
189  	int ret;
190  
191  	ret = security_task_getioprio(p);
192  	if (ret)
193  		goto out;
194  	task_lock(p);
195  	if (p->io_context)
196  		ret = p->io_context->ioprio;
197  	else
198  		ret = IOPRIO_DEFAULT;
199  	task_unlock(p);
200  out:
201  	return ret;
202  }
203  
204  static int ioprio_best(unsigned short aprio, unsigned short bprio)
205  {
206  	return min(aprio, bprio);
207  }
208  
209  SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
210  {
211  	struct task_struct *g, *p;
212  	struct user_struct *user;
213  	struct pid *pgrp;
214  	kuid_t uid;
215  	int ret = -ESRCH;
216  	int tmpio;
217  
218  	rcu_read_lock();
219  	switch (which) {
220  		case IOPRIO_WHO_PROCESS:
221  			if (!who)
222  				p = current;
223  			else
224  				p = find_task_by_vpid(who);
225  			if (p)
226  				ret = get_task_raw_ioprio(p);
227  			break;
228  		case IOPRIO_WHO_PGRP:
229  			if (!who)
230  				pgrp = task_pgrp(current);
231  			else
232  				pgrp = find_vpid(who);
233  			read_lock(&tasklist_lock);
234  			do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
235  				tmpio = get_task_ioprio(p);
236  				if (tmpio < 0)
237  					continue;
238  				if (ret == -ESRCH)
239  					ret = tmpio;
240  				else
241  					ret = ioprio_best(ret, tmpio);
242  			} while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
243  			read_unlock(&tasklist_lock);
244  
245  			break;
246  		case IOPRIO_WHO_USER:
247  			uid = make_kuid(current_user_ns(), who);
248  			if (!who)
249  				user = current_user();
250  			else
251  				user = find_user(uid);
252  
253  			if (!user)
254  				break;
255  
256  			for_each_process_thread(g, p) {
257  				if (!uid_eq(task_uid(p), user->uid) ||
258  				    !task_pid_vnr(p))
259  					continue;
260  				tmpio = get_task_ioprio(p);
261  				if (tmpio < 0)
262  					continue;
263  				if (ret == -ESRCH)
264  					ret = tmpio;
265  				else
266  					ret = ioprio_best(ret, tmpio);
267  			}
268  
269  			if (who)
270  				free_uid(user);
271  			break;
272  		default:
273  			ret = -EINVAL;
274  	}
275  
276  	rcu_read_unlock();
277  	return ret;
278  }
279