1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Auto-group scheduling implementation: 5 */ 6 7 unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1; 8 static struct autogroup autogroup_default; 9 static atomic_t autogroup_seq_nr; 10 11 #ifdef CONFIG_SYSCTL 12 static struct ctl_table sched_autogroup_sysctls[] = { 13 { 14 .procname = "sched_autogroup_enabled", 15 .data = &sysctl_sched_autogroup_enabled, 16 .maxlen = sizeof(unsigned int), 17 .mode = 0644, 18 .proc_handler = proc_dointvec_minmax, 19 .extra1 = SYSCTL_ZERO, 20 .extra2 = SYSCTL_ONE, 21 }, 22 {} 23 }; 24 25 static void __init sched_autogroup_sysctl_init(void) 26 { 27 register_sysctl_init("kernel", sched_autogroup_sysctls); 28 } 29 #else 30 #define sched_autogroup_sysctl_init() do { } while (0) 31 #endif 32 33 void __init autogroup_init(struct task_struct *init_task) 34 { 35 autogroup_default.tg = &root_task_group; 36 kref_init(&autogroup_default.kref); 37 init_rwsem(&autogroup_default.lock); 38 init_task->signal->autogroup = &autogroup_default; 39 sched_autogroup_sysctl_init(); 40 } 41 42 void autogroup_free(struct task_group *tg) 43 { 44 kfree(tg->autogroup); 45 } 46 47 static inline void autogroup_destroy(struct kref *kref) 48 { 49 struct autogroup *ag = container_of(kref, struct autogroup, kref); 50 51 #ifdef CONFIG_RT_GROUP_SCHED 52 /* We've redirected RT tasks to the root task group... */ 53 ag->tg->rt_se = NULL; 54 ag->tg->rt_rq = NULL; 55 #endif 56 sched_release_group(ag->tg); 57 sched_destroy_group(ag->tg); 58 } 59 60 static inline void autogroup_kref_put(struct autogroup *ag) 61 { 62 kref_put(&ag->kref, autogroup_destroy); 63 } 64 65 static inline struct autogroup *autogroup_kref_get(struct autogroup *ag) 66 { 67 kref_get(&ag->kref); 68 return ag; 69 } 70 71 static inline struct autogroup *autogroup_task_get(struct task_struct *p) 72 { 73 struct autogroup *ag; 74 unsigned long flags; 75 76 if (!lock_task_sighand(p, &flags)) 77 return autogroup_kref_get(&autogroup_default); 78 79 ag = autogroup_kref_get(p->signal->autogroup); 80 unlock_task_sighand(p, &flags); 81 82 return ag; 83 } 84 85 static inline struct autogroup *autogroup_create(void) 86 { 87 struct autogroup *ag = kzalloc(sizeof(*ag), GFP_KERNEL); 88 struct task_group *tg; 89 90 if (!ag) 91 goto out_fail; 92 93 tg = sched_create_group(&root_task_group); 94 if (IS_ERR(tg)) 95 goto out_free; 96 97 kref_init(&ag->kref); 98 init_rwsem(&ag->lock); 99 ag->id = atomic_inc_return(&autogroup_seq_nr); 100 ag->tg = tg; 101 #ifdef CONFIG_RT_GROUP_SCHED 102 /* 103 * Autogroup RT tasks are redirected to the root task group 104 * so we don't have to move tasks around upon policy change, 105 * or flail around trying to allocate bandwidth on the fly. 106 * A bandwidth exception in __sched_setscheduler() allows 107 * the policy change to proceed. 108 */ 109 free_rt_sched_group(tg); 110 tg->rt_se = root_task_group.rt_se; 111 tg->rt_rq = root_task_group.rt_rq; 112 #endif 113 tg->autogroup = ag; 114 115 sched_online_group(tg, &root_task_group); 116 return ag; 117 118 out_free: 119 kfree(ag); 120 out_fail: 121 if (printk_ratelimit()) { 122 printk(KERN_WARNING "autogroup_create: %s failure.\n", 123 ag ? "sched_create_group()" : "kzalloc()"); 124 } 125 126 return autogroup_kref_get(&autogroup_default); 127 } 128 129 bool task_wants_autogroup(struct task_struct *p, struct task_group *tg) 130 { 131 if (tg != &root_task_group) 132 return false; 133 /* 134 * If we race with autogroup_move_group() the caller can use the old 135 * value of signal->autogroup but in this case sched_move_task() will 136 * be called again before autogroup_kref_put(). 137 * 138 * However, there is no way sched_autogroup_exit_task() could tell us 139 * to avoid autogroup->tg, so we abuse PF_EXITING flag for this case. 140 */ 141 if (p->flags & PF_EXITING) 142 return false; 143 144 return true; 145 } 146 147 void sched_autogroup_exit_task(struct task_struct *p) 148 { 149 /* 150 * We are going to call exit_notify() and autogroup_move_group() can't 151 * see this thread after that: we can no longer use signal->autogroup. 152 * See the PF_EXITING check in task_wants_autogroup(). 153 */ 154 sched_move_task(p); 155 } 156 157 static void 158 autogroup_move_group(struct task_struct *p, struct autogroup *ag) 159 { 160 struct autogroup *prev; 161 struct task_struct *t; 162 unsigned long flags; 163 164 BUG_ON(!lock_task_sighand(p, &flags)); 165 166 prev = p->signal->autogroup; 167 if (prev == ag) { 168 unlock_task_sighand(p, &flags); 169 return; 170 } 171 172 p->signal->autogroup = autogroup_kref_get(ag); 173 /* 174 * We can't avoid sched_move_task() after we changed signal->autogroup, 175 * this process can already run with task_group() == prev->tg or we can 176 * race with cgroup code which can read autogroup = prev under rq->lock. 177 * In the latter case for_each_thread() can not miss a migrating thread, 178 * cpu_cgroup_attach() must not be possible after cgroup_exit() and it 179 * can't be removed from thread list, we hold ->siglock. 180 * 181 * If an exiting thread was already removed from thread list we rely on 182 * sched_autogroup_exit_task(). 183 */ 184 for_each_thread(p, t) 185 sched_move_task(t); 186 187 unlock_task_sighand(p, &flags); 188 autogroup_kref_put(prev); 189 } 190 191 /* Allocates GFP_KERNEL, cannot be called under any spinlock: */ 192 void sched_autogroup_create_attach(struct task_struct *p) 193 { 194 struct autogroup *ag = autogroup_create(); 195 196 autogroup_move_group(p, ag); 197 198 /* Drop extra reference added by autogroup_create(): */ 199 autogroup_kref_put(ag); 200 } 201 EXPORT_SYMBOL(sched_autogroup_create_attach); 202 203 /* Cannot be called under siglock. Currently has no users: */ 204 void sched_autogroup_detach(struct task_struct *p) 205 { 206 autogroup_move_group(p, &autogroup_default); 207 } 208 EXPORT_SYMBOL(sched_autogroup_detach); 209 210 void sched_autogroup_fork(struct signal_struct *sig) 211 { 212 sig->autogroup = autogroup_task_get(current); 213 } 214 215 void sched_autogroup_exit(struct signal_struct *sig) 216 { 217 autogroup_kref_put(sig->autogroup); 218 } 219 220 static int __init setup_autogroup(char *str) 221 { 222 sysctl_sched_autogroup_enabled = 0; 223 224 return 1; 225 } 226 __setup("noautogroup", setup_autogroup); 227 228 #ifdef CONFIG_PROC_FS 229 230 int proc_sched_autogroup_set_nice(struct task_struct *p, int nice) 231 { 232 static unsigned long next = INITIAL_JIFFIES; 233 struct autogroup *ag; 234 unsigned long shares; 235 int err, idx; 236 237 if (nice < MIN_NICE || nice > MAX_NICE) 238 return -EINVAL; 239 240 err = security_task_setnice(current, nice); 241 if (err) 242 return err; 243 244 if (nice < 0 && !can_nice(current, nice)) 245 return -EPERM; 246 247 /* This is a heavy operation, taking global locks.. */ 248 if (!capable(CAP_SYS_ADMIN) && time_before(jiffies, next)) 249 return -EAGAIN; 250 251 next = HZ / 10 + jiffies; 252 ag = autogroup_task_get(p); 253 254 idx = array_index_nospec(nice + 20, 40); 255 shares = scale_load(sched_prio_to_weight[idx]); 256 257 down_write(&ag->lock); 258 err = sched_group_set_shares(ag->tg, shares); 259 if (!err) 260 ag->nice = nice; 261 up_write(&ag->lock); 262 263 autogroup_kref_put(ag); 264 265 return err; 266 } 267 268 void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m) 269 { 270 struct autogroup *ag = autogroup_task_get(p); 271 272 if (!task_group_is_autogroup(ag->tg)) 273 goto out; 274 275 down_read(&ag->lock); 276 seq_printf(m, "/autogroup-%ld nice %d\n", ag->id, ag->nice); 277 up_read(&ag->lock); 278 279 out: 280 autogroup_kref_put(ag); 281 } 282 #endif /* CONFIG_PROC_FS */ 283 284 int autogroup_path(struct task_group *tg, char *buf, int buflen) 285 { 286 if (!task_group_is_autogroup(tg)) 287 return 0; 288 289 return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id); 290 } 291