xref: /openbmc/linux/kernel/sys.c (revision 1e5d533142c1c178a31d4cc81837eb078f9269bc)
1 /*
2  *  linux/kernel/sys.c
3  *
4  *  Copyright (C) 1991, 1992  Linus Torvalds
5  */
6 
7 #include <linux/config.h>
8 #include <linux/module.h>
9 #include <linux/mm.h>
10 #include <linux/utsname.h>
11 #include <linux/mman.h>
12 #include <linux/smp_lock.h>
13 #include <linux/notifier.h>
14 #include <linux/reboot.h>
15 #include <linux/prctl.h>
16 #include <linux/init.h>
17 #include <linux/highuid.h>
18 #include <linux/fs.h>
19 #include <linux/kernel.h>
20 #include <linux/kexec.h>
21 #include <linux/workqueue.h>
22 #include <linux/device.h>
23 #include <linux/key.h>
24 #include <linux/times.h>
25 #include <linux/posix-timers.h>
26 #include <linux/security.h>
27 #include <linux/dcookies.h>
28 #include <linux/suspend.h>
29 #include <linux/tty.h>
30 #include <linux/signal.h>
31 #include <linux/cn_proc.h>
32 
33 #include <linux/compat.h>
34 #include <linux/syscalls.h>
35 
36 #include <asm/uaccess.h>
37 #include <asm/io.h>
38 #include <asm/unistd.h>
39 
40 #ifndef SET_UNALIGN_CTL
41 # define SET_UNALIGN_CTL(a,b)	(-EINVAL)
42 #endif
43 #ifndef GET_UNALIGN_CTL
44 # define GET_UNALIGN_CTL(a,b)	(-EINVAL)
45 #endif
46 #ifndef SET_FPEMU_CTL
47 # define SET_FPEMU_CTL(a,b)	(-EINVAL)
48 #endif
49 #ifndef GET_FPEMU_CTL
50 # define GET_FPEMU_CTL(a,b)	(-EINVAL)
51 #endif
52 #ifndef SET_FPEXC_CTL
53 # define SET_FPEXC_CTL(a,b)	(-EINVAL)
54 #endif
55 #ifndef GET_FPEXC_CTL
56 # define GET_FPEXC_CTL(a,b)	(-EINVAL)
57 #endif
58 
59 /*
60  * this is where the system-wide overflow UID and GID are defined, for
61  * architectures that now have 32-bit UID/GID but didn't in the past
62  */
63 
64 int overflowuid = DEFAULT_OVERFLOWUID;
65 int overflowgid = DEFAULT_OVERFLOWGID;
66 
67 #ifdef CONFIG_UID16
68 EXPORT_SYMBOL(overflowuid);
69 EXPORT_SYMBOL(overflowgid);
70 #endif
71 
72 /*
73  * the same as above, but for filesystems which can only store a 16-bit
74  * UID and GID. as such, this is needed on all architectures
75  */
76 
77 int fs_overflowuid = DEFAULT_FS_OVERFLOWUID;
78 int fs_overflowgid = DEFAULT_FS_OVERFLOWUID;
79 
80 EXPORT_SYMBOL(fs_overflowuid);
81 EXPORT_SYMBOL(fs_overflowgid);
82 
83 /*
84  * this indicates whether you can reboot with ctrl-alt-del: the default is yes
85  */
86 
87 int C_A_D = 1;
88 int cad_pid = 1;
89 
90 /*
91  *	Notifier list for kernel code which wants to be called
92  *	at shutdown. This is used to stop any idling DMA operations
93  *	and the like.
94  */
95 
96 static struct notifier_block *reboot_notifier_list;
97 static DEFINE_RWLOCK(notifier_lock);
98 
99 /**
100  *	notifier_chain_register	- Add notifier to a notifier chain
101  *	@list: Pointer to root list pointer
102  *	@n: New entry in notifier chain
103  *
104  *	Adds a notifier to a notifier chain.
105  *
106  *	Currently always returns zero.
107  */
108 
109 int notifier_chain_register(struct notifier_block **list, struct notifier_block *n)
110 {
111 	write_lock(&notifier_lock);
112 	while(*list)
113 	{
114 		if(n->priority > (*list)->priority)
115 			break;
116 		list= &((*list)->next);
117 	}
118 	n->next = *list;
119 	*list=n;
120 	write_unlock(&notifier_lock);
121 	return 0;
122 }
123 
124 EXPORT_SYMBOL(notifier_chain_register);
125 
126 /**
127  *	notifier_chain_unregister - Remove notifier from a notifier chain
128  *	@nl: Pointer to root list pointer
129  *	@n: New entry in notifier chain
130  *
131  *	Removes a notifier from a notifier chain.
132  *
133  *	Returns zero on success, or %-ENOENT on failure.
134  */
135 
136 int notifier_chain_unregister(struct notifier_block **nl, struct notifier_block *n)
137 {
138 	write_lock(&notifier_lock);
139 	while((*nl)!=NULL)
140 	{
141 		if((*nl)==n)
142 		{
143 			*nl=n->next;
144 			write_unlock(&notifier_lock);
145 			return 0;
146 		}
147 		nl=&((*nl)->next);
148 	}
149 	write_unlock(&notifier_lock);
150 	return -ENOENT;
151 }
152 
153 EXPORT_SYMBOL(notifier_chain_unregister);
154 
155 /**
156  *	notifier_call_chain - Call functions in a notifier chain
157  *	@n: Pointer to root pointer of notifier chain
158  *	@val: Value passed unmodified to notifier function
159  *	@v: Pointer passed unmodified to notifier function
160  *
161  *	Calls each function in a notifier chain in turn.
162  *
163  *	If the return value of the notifier can be and'd
164  *	with %NOTIFY_STOP_MASK, then notifier_call_chain
165  *	will return immediately, with the return value of
166  *	the notifier function which halted execution.
167  *	Otherwise, the return value is the return value
168  *	of the last notifier function called.
169  */
170 
171 int notifier_call_chain(struct notifier_block **n, unsigned long val, void *v)
172 {
173 	int ret=NOTIFY_DONE;
174 	struct notifier_block *nb = *n;
175 
176 	while(nb)
177 	{
178 		ret=nb->notifier_call(nb,val,v);
179 		if(ret&NOTIFY_STOP_MASK)
180 		{
181 			return ret;
182 		}
183 		nb=nb->next;
184 	}
185 	return ret;
186 }
187 
188 EXPORT_SYMBOL(notifier_call_chain);
189 
190 /**
191  *	register_reboot_notifier - Register function to be called at reboot time
192  *	@nb: Info about notifier function to be called
193  *
194  *	Registers a function with the list of functions
195  *	to be called at reboot time.
196  *
197  *	Currently always returns zero, as notifier_chain_register
198  *	always returns zero.
199  */
200 
201 int register_reboot_notifier(struct notifier_block * nb)
202 {
203 	return notifier_chain_register(&reboot_notifier_list, nb);
204 }
205 
206 EXPORT_SYMBOL(register_reboot_notifier);
207 
208 /**
209  *	unregister_reboot_notifier - Unregister previously registered reboot notifier
210  *	@nb: Hook to be unregistered
211  *
212  *	Unregisters a previously registered reboot
213  *	notifier function.
214  *
215  *	Returns zero on success, or %-ENOENT on failure.
216  */
217 
218 int unregister_reboot_notifier(struct notifier_block * nb)
219 {
220 	return notifier_chain_unregister(&reboot_notifier_list, nb);
221 }
222 
223 EXPORT_SYMBOL(unregister_reboot_notifier);
224 
225 static int set_one_prio(struct task_struct *p, int niceval, int error)
226 {
227 	int no_nice;
228 
229 	if (p->uid != current->euid &&
230 		p->euid != current->euid && !capable(CAP_SYS_NICE)) {
231 		error = -EPERM;
232 		goto out;
233 	}
234 	if (niceval < task_nice(p) && !can_nice(p, niceval)) {
235 		error = -EACCES;
236 		goto out;
237 	}
238 	no_nice = security_task_setnice(p, niceval);
239 	if (no_nice) {
240 		error = no_nice;
241 		goto out;
242 	}
243 	if (error == -ESRCH)
244 		error = 0;
245 	set_user_nice(p, niceval);
246 out:
247 	return error;
248 }
249 
250 asmlinkage long sys_setpriority(int which, int who, int niceval)
251 {
252 	struct task_struct *g, *p;
253 	struct user_struct *user;
254 	int error = -EINVAL;
255 
256 	if (which > 2 || which < 0)
257 		goto out;
258 
259 	/* normalize: avoid signed division (rounding problems) */
260 	error = -ESRCH;
261 	if (niceval < -20)
262 		niceval = -20;
263 	if (niceval > 19)
264 		niceval = 19;
265 
266 	read_lock(&tasklist_lock);
267 	switch (which) {
268 		case PRIO_PROCESS:
269 			if (!who)
270 				who = current->pid;
271 			p = find_task_by_pid(who);
272 			if (p)
273 				error = set_one_prio(p, niceval, error);
274 			break;
275 		case PRIO_PGRP:
276 			if (!who)
277 				who = process_group(current);
278 			do_each_task_pid(who, PIDTYPE_PGID, p) {
279 				error = set_one_prio(p, niceval, error);
280 			} while_each_task_pid(who, PIDTYPE_PGID, p);
281 			break;
282 		case PRIO_USER:
283 			user = current->user;
284 			if (!who)
285 				who = current->uid;
286 			else
287 				if ((who != current->uid) && !(user = find_user(who)))
288 					goto out_unlock;	/* No processes for this user */
289 
290 			do_each_thread(g, p)
291 				if (p->uid == who)
292 					error = set_one_prio(p, niceval, error);
293 			while_each_thread(g, p);
294 			if (who != current->uid)
295 				free_uid(user);		/* For find_user() */
296 			break;
297 	}
298 out_unlock:
299 	read_unlock(&tasklist_lock);
300 out:
301 	return error;
302 }
303 
304 /*
305  * Ugh. To avoid negative return values, "getpriority()" will
306  * not return the normal nice-value, but a negated value that
307  * has been offset by 20 (ie it returns 40..1 instead of -20..19)
308  * to stay compatible.
309  */
310 asmlinkage long sys_getpriority(int which, int who)
311 {
312 	struct task_struct *g, *p;
313 	struct user_struct *user;
314 	long niceval, retval = -ESRCH;
315 
316 	if (which > 2 || which < 0)
317 		return -EINVAL;
318 
319 	read_lock(&tasklist_lock);
320 	switch (which) {
321 		case PRIO_PROCESS:
322 			if (!who)
323 				who = current->pid;
324 			p = find_task_by_pid(who);
325 			if (p) {
326 				niceval = 20 - task_nice(p);
327 				if (niceval > retval)
328 					retval = niceval;
329 			}
330 			break;
331 		case PRIO_PGRP:
332 			if (!who)
333 				who = process_group(current);
334 			do_each_task_pid(who, PIDTYPE_PGID, p) {
335 				niceval = 20 - task_nice(p);
336 				if (niceval > retval)
337 					retval = niceval;
338 			} while_each_task_pid(who, PIDTYPE_PGID, p);
339 			break;
340 		case PRIO_USER:
341 			user = current->user;
342 			if (!who)
343 				who = current->uid;
344 			else
345 				if ((who != current->uid) && !(user = find_user(who)))
346 					goto out_unlock;	/* No processes for this user */
347 
348 			do_each_thread(g, p)
349 				if (p->uid == who) {
350 					niceval = 20 - task_nice(p);
351 					if (niceval > retval)
352 						retval = niceval;
353 				}
354 			while_each_thread(g, p);
355 			if (who != current->uid)
356 				free_uid(user);		/* for find_user() */
357 			break;
358 	}
359 out_unlock:
360 	read_unlock(&tasklist_lock);
361 
362 	return retval;
363 }
364 
365 /**
366  *	emergency_restart - reboot the system
367  *
368  *	Without shutting down any hardware or taking any locks
369  *	reboot the system.  This is called when we know we are in
370  *	trouble so this is our best effort to reboot.  This is
371  *	safe to call in interrupt context.
372  */
373 void emergency_restart(void)
374 {
375 	machine_emergency_restart();
376 }
377 EXPORT_SYMBOL_GPL(emergency_restart);
378 
379 void kernel_restart_prepare(char *cmd)
380 {
381 	notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd);
382 	system_state = SYSTEM_RESTART;
383 	device_shutdown();
384 }
385 
386 /**
387  *	kernel_restart - reboot the system
388  *	@cmd: pointer to buffer containing command to execute for restart
389  *		or NULL
390  *
391  *	Shutdown everything and perform a clean reboot.
392  *	This is not safe to call in interrupt context.
393  */
394 void kernel_restart(char *cmd)
395 {
396 	kernel_restart_prepare(cmd);
397 	if (!cmd) {
398 		printk(KERN_EMERG "Restarting system.\n");
399 	} else {
400 		printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd);
401 	}
402 	printk(".\n");
403 	machine_restart(cmd);
404 }
405 EXPORT_SYMBOL_GPL(kernel_restart);
406 
407 /**
408  *	kernel_kexec - reboot the system
409  *
410  *	Move into place and start executing a preloaded standalone
411  *	executable.  If nothing was preloaded return an error.
412  */
413 void kernel_kexec(void)
414 {
415 #ifdef CONFIG_KEXEC
416 	struct kimage *image;
417 	image = xchg(&kexec_image, 0);
418 	if (!image) {
419 		return;
420 	}
421 	kernel_restart_prepare(NULL);
422 	printk(KERN_EMERG "Starting new kernel\n");
423 	machine_shutdown();
424 	machine_kexec(image);
425 #endif
426 }
427 EXPORT_SYMBOL_GPL(kernel_kexec);
428 
429 /**
430  *	kernel_halt - halt the system
431  *
432  *	Shutdown everything and perform a clean system halt.
433  */
434 void kernel_halt_prepare(void)
435 {
436 	notifier_call_chain(&reboot_notifier_list, SYS_HALT, NULL);
437 	system_state = SYSTEM_HALT;
438 	device_shutdown();
439 }
440 void kernel_halt(void)
441 {
442 	kernel_halt_prepare();
443 	printk(KERN_EMERG "System halted.\n");
444 	machine_halt();
445 }
446 EXPORT_SYMBOL_GPL(kernel_halt);
447 
448 /**
449  *	kernel_power_off - power_off the system
450  *
451  *	Shutdown everything and perform a clean system power_off.
452  */
453 void kernel_power_off_prepare(void)
454 {
455 	notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL);
456 	system_state = SYSTEM_POWER_OFF;
457 	device_shutdown();
458 }
459 void kernel_power_off(void)
460 {
461 	kernel_power_off_prepare();
462 	printk(KERN_EMERG "Power down.\n");
463 	machine_power_off();
464 }
465 EXPORT_SYMBOL_GPL(kernel_power_off);
466 
467 /*
468  * Reboot system call: for obvious reasons only root may call it,
469  * and even root needs to set up some magic numbers in the registers
470  * so that some mistake won't make this reboot the whole machine.
471  * You can also set the meaning of the ctrl-alt-del-key here.
472  *
473  * reboot doesn't sync: do that yourself before calling this.
474  */
475 asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user * arg)
476 {
477 	char buffer[256];
478 
479 	/* We only trust the superuser with rebooting the system. */
480 	if (!capable(CAP_SYS_BOOT))
481 		return -EPERM;
482 
483 	/* For safety, we require "magic" arguments. */
484 	if (magic1 != LINUX_REBOOT_MAGIC1 ||
485 	    (magic2 != LINUX_REBOOT_MAGIC2 &&
486 	                magic2 != LINUX_REBOOT_MAGIC2A &&
487 			magic2 != LINUX_REBOOT_MAGIC2B &&
488 	                magic2 != LINUX_REBOOT_MAGIC2C))
489 		return -EINVAL;
490 
491 	lock_kernel();
492 	switch (cmd) {
493 	case LINUX_REBOOT_CMD_RESTART:
494 		kernel_restart(NULL);
495 		break;
496 
497 	case LINUX_REBOOT_CMD_CAD_ON:
498 		C_A_D = 1;
499 		break;
500 
501 	case LINUX_REBOOT_CMD_CAD_OFF:
502 		C_A_D = 0;
503 		break;
504 
505 	case LINUX_REBOOT_CMD_HALT:
506 		kernel_halt();
507 		unlock_kernel();
508 		do_exit(0);
509 		break;
510 
511 	case LINUX_REBOOT_CMD_POWER_OFF:
512 		kernel_power_off();
513 		unlock_kernel();
514 		do_exit(0);
515 		break;
516 
517 	case LINUX_REBOOT_CMD_RESTART2:
518 		if (strncpy_from_user(&buffer[0], arg, sizeof(buffer) - 1) < 0) {
519 			unlock_kernel();
520 			return -EFAULT;
521 		}
522 		buffer[sizeof(buffer) - 1] = '\0';
523 
524 		kernel_restart(buffer);
525 		break;
526 
527 	case LINUX_REBOOT_CMD_KEXEC:
528 		kernel_kexec();
529 		unlock_kernel();
530 		return -EINVAL;
531 
532 #ifdef CONFIG_SOFTWARE_SUSPEND
533 	case LINUX_REBOOT_CMD_SW_SUSPEND:
534 		{
535 			int ret = software_suspend();
536 			unlock_kernel();
537 			return ret;
538 		}
539 #endif
540 
541 	default:
542 		unlock_kernel();
543 		return -EINVAL;
544 	}
545 	unlock_kernel();
546 	return 0;
547 }
548 
549 static void deferred_cad(void *dummy)
550 {
551 	kernel_restart(NULL);
552 }
553 
554 /*
555  * This function gets called by ctrl-alt-del - ie the keyboard interrupt.
556  * As it's called within an interrupt, it may NOT sync: the only choice
557  * is whether to reboot at once, or just ignore the ctrl-alt-del.
558  */
559 void ctrl_alt_del(void)
560 {
561 	static DECLARE_WORK(cad_work, deferred_cad, NULL);
562 
563 	if (C_A_D)
564 		schedule_work(&cad_work);
565 	else
566 		kill_proc(cad_pid, SIGINT, 1);
567 }
568 
569 
570 /*
571  * Unprivileged users may change the real gid to the effective gid
572  * or vice versa.  (BSD-style)
573  *
574  * If you set the real gid at all, or set the effective gid to a value not
575  * equal to the real gid, then the saved gid is set to the new effective gid.
576  *
577  * This makes it possible for a setgid program to completely drop its
578  * privileges, which is often a useful assertion to make when you are doing
579  * a security audit over a program.
580  *
581  * The general idea is that a program which uses just setregid() will be
582  * 100% compatible with BSD.  A program which uses just setgid() will be
583  * 100% compatible with POSIX with saved IDs.
584  *
585  * SMP: There are not races, the GIDs are checked only by filesystem
586  *      operations (as far as semantic preservation is concerned).
587  */
588 asmlinkage long sys_setregid(gid_t rgid, gid_t egid)
589 {
590 	int old_rgid = current->gid;
591 	int old_egid = current->egid;
592 	int new_rgid = old_rgid;
593 	int new_egid = old_egid;
594 	int retval;
595 
596 	retval = security_task_setgid(rgid, egid, (gid_t)-1, LSM_SETID_RE);
597 	if (retval)
598 		return retval;
599 
600 	if (rgid != (gid_t) -1) {
601 		if ((old_rgid == rgid) ||
602 		    (current->egid==rgid) ||
603 		    capable(CAP_SETGID))
604 			new_rgid = rgid;
605 		else
606 			return -EPERM;
607 	}
608 	if (egid != (gid_t) -1) {
609 		if ((old_rgid == egid) ||
610 		    (current->egid == egid) ||
611 		    (current->sgid == egid) ||
612 		    capable(CAP_SETGID))
613 			new_egid = egid;
614 		else {
615 			return -EPERM;
616 		}
617 	}
618 	if (new_egid != old_egid)
619 	{
620 		current->mm->dumpable = suid_dumpable;
621 		smp_wmb();
622 	}
623 	if (rgid != (gid_t) -1 ||
624 	    (egid != (gid_t) -1 && egid != old_rgid))
625 		current->sgid = new_egid;
626 	current->fsgid = new_egid;
627 	current->egid = new_egid;
628 	current->gid = new_rgid;
629 	key_fsgid_changed(current);
630 	proc_id_connector(current, PROC_EVENT_GID);
631 	return 0;
632 }
633 
634 /*
635  * setgid() is implemented like SysV w/ SAVED_IDS
636  *
637  * SMP: Same implicit races as above.
638  */
639 asmlinkage long sys_setgid(gid_t gid)
640 {
641 	int old_egid = current->egid;
642 	int retval;
643 
644 	retval = security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_ID);
645 	if (retval)
646 		return retval;
647 
648 	if (capable(CAP_SETGID))
649 	{
650 		if(old_egid != gid)
651 		{
652 			current->mm->dumpable = suid_dumpable;
653 			smp_wmb();
654 		}
655 		current->gid = current->egid = current->sgid = current->fsgid = gid;
656 	}
657 	else if ((gid == current->gid) || (gid == current->sgid))
658 	{
659 		if(old_egid != gid)
660 		{
661 			current->mm->dumpable = suid_dumpable;
662 			smp_wmb();
663 		}
664 		current->egid = current->fsgid = gid;
665 	}
666 	else
667 		return -EPERM;
668 
669 	key_fsgid_changed(current);
670 	proc_id_connector(current, PROC_EVENT_GID);
671 	return 0;
672 }
673 
674 static int set_user(uid_t new_ruid, int dumpclear)
675 {
676 	struct user_struct *new_user;
677 
678 	new_user = alloc_uid(new_ruid);
679 	if (!new_user)
680 		return -EAGAIN;
681 
682 	if (atomic_read(&new_user->processes) >=
683 				current->signal->rlim[RLIMIT_NPROC].rlim_cur &&
684 			new_user != &root_user) {
685 		free_uid(new_user);
686 		return -EAGAIN;
687 	}
688 
689 	switch_uid(new_user);
690 
691 	if(dumpclear)
692 	{
693 		current->mm->dumpable = suid_dumpable;
694 		smp_wmb();
695 	}
696 	current->uid = new_ruid;
697 	return 0;
698 }
699 
700 /*
701  * Unprivileged users may change the real uid to the effective uid
702  * or vice versa.  (BSD-style)
703  *
704  * If you set the real uid at all, or set the effective uid to a value not
705  * equal to the real uid, then the saved uid is set to the new effective uid.
706  *
707  * This makes it possible for a setuid program to completely drop its
708  * privileges, which is often a useful assertion to make when you are doing
709  * a security audit over a program.
710  *
711  * The general idea is that a program which uses just setreuid() will be
712  * 100% compatible with BSD.  A program which uses just setuid() will be
713  * 100% compatible with POSIX with saved IDs.
714  */
715 asmlinkage long sys_setreuid(uid_t ruid, uid_t euid)
716 {
717 	int old_ruid, old_euid, old_suid, new_ruid, new_euid;
718 	int retval;
719 
720 	retval = security_task_setuid(ruid, euid, (uid_t)-1, LSM_SETID_RE);
721 	if (retval)
722 		return retval;
723 
724 	new_ruid = old_ruid = current->uid;
725 	new_euid = old_euid = current->euid;
726 	old_suid = current->suid;
727 
728 	if (ruid != (uid_t) -1) {
729 		new_ruid = ruid;
730 		if ((old_ruid != ruid) &&
731 		    (current->euid != ruid) &&
732 		    !capable(CAP_SETUID))
733 			return -EPERM;
734 	}
735 
736 	if (euid != (uid_t) -1) {
737 		new_euid = euid;
738 		if ((old_ruid != euid) &&
739 		    (current->euid != euid) &&
740 		    (current->suid != euid) &&
741 		    !capable(CAP_SETUID))
742 			return -EPERM;
743 	}
744 
745 	if (new_ruid != old_ruid && set_user(new_ruid, new_euid != old_euid) < 0)
746 		return -EAGAIN;
747 
748 	if (new_euid != old_euid)
749 	{
750 		current->mm->dumpable = suid_dumpable;
751 		smp_wmb();
752 	}
753 	current->fsuid = current->euid = new_euid;
754 	if (ruid != (uid_t) -1 ||
755 	    (euid != (uid_t) -1 && euid != old_ruid))
756 		current->suid = current->euid;
757 	current->fsuid = current->euid;
758 
759 	key_fsuid_changed(current);
760 	proc_id_connector(current, PROC_EVENT_UID);
761 
762 	return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RE);
763 }
764 
765 
766 
767 /*
768  * setuid() is implemented like SysV with SAVED_IDS
769  *
770  * Note that SAVED_ID's is deficient in that a setuid root program
771  * like sendmail, for example, cannot set its uid to be a normal
772  * user and then switch back, because if you're root, setuid() sets
773  * the saved uid too.  If you don't like this, blame the bright people
774  * in the POSIX committee and/or USG.  Note that the BSD-style setreuid()
775  * will allow a root program to temporarily drop privileges and be able to
776  * regain them by swapping the real and effective uid.
777  */
778 asmlinkage long sys_setuid(uid_t uid)
779 {
780 	int old_euid = current->euid;
781 	int old_ruid, old_suid, new_ruid, new_suid;
782 	int retval;
783 
784 	retval = security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_ID);
785 	if (retval)
786 		return retval;
787 
788 	old_ruid = new_ruid = current->uid;
789 	old_suid = current->suid;
790 	new_suid = old_suid;
791 
792 	if (capable(CAP_SETUID)) {
793 		if (uid != old_ruid && set_user(uid, old_euid != uid) < 0)
794 			return -EAGAIN;
795 		new_suid = uid;
796 	} else if ((uid != current->uid) && (uid != new_suid))
797 		return -EPERM;
798 
799 	if (old_euid != uid)
800 	{
801 		current->mm->dumpable = suid_dumpable;
802 		smp_wmb();
803 	}
804 	current->fsuid = current->euid = uid;
805 	current->suid = new_suid;
806 
807 	key_fsuid_changed(current);
808 	proc_id_connector(current, PROC_EVENT_UID);
809 
810 	return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_ID);
811 }
812 
813 
814 /*
815  * This function implements a generic ability to update ruid, euid,
816  * and suid.  This allows you to implement the 4.4 compatible seteuid().
817  */
818 asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
819 {
820 	int old_ruid = current->uid;
821 	int old_euid = current->euid;
822 	int old_suid = current->suid;
823 	int retval;
824 
825 	retval = security_task_setuid(ruid, euid, suid, LSM_SETID_RES);
826 	if (retval)
827 		return retval;
828 
829 	if (!capable(CAP_SETUID)) {
830 		if ((ruid != (uid_t) -1) && (ruid != current->uid) &&
831 		    (ruid != current->euid) && (ruid != current->suid))
832 			return -EPERM;
833 		if ((euid != (uid_t) -1) && (euid != current->uid) &&
834 		    (euid != current->euid) && (euid != current->suid))
835 			return -EPERM;
836 		if ((suid != (uid_t) -1) && (suid != current->uid) &&
837 		    (suid != current->euid) && (suid != current->suid))
838 			return -EPERM;
839 	}
840 	if (ruid != (uid_t) -1) {
841 		if (ruid != current->uid && set_user(ruid, euid != current->euid) < 0)
842 			return -EAGAIN;
843 	}
844 	if (euid != (uid_t) -1) {
845 		if (euid != current->euid)
846 		{
847 			current->mm->dumpable = suid_dumpable;
848 			smp_wmb();
849 		}
850 		current->euid = euid;
851 	}
852 	current->fsuid = current->euid;
853 	if (suid != (uid_t) -1)
854 		current->suid = suid;
855 
856 	key_fsuid_changed(current);
857 	proc_id_connector(current, PROC_EVENT_UID);
858 
859 	return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RES);
860 }
861 
862 asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __user *suid)
863 {
864 	int retval;
865 
866 	if (!(retval = put_user(current->uid, ruid)) &&
867 	    !(retval = put_user(current->euid, euid)))
868 		retval = put_user(current->suid, suid);
869 
870 	return retval;
871 }
872 
873 /*
874  * Same as above, but for rgid, egid, sgid.
875  */
876 asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
877 {
878 	int retval;
879 
880 	retval = security_task_setgid(rgid, egid, sgid, LSM_SETID_RES);
881 	if (retval)
882 		return retval;
883 
884 	if (!capable(CAP_SETGID)) {
885 		if ((rgid != (gid_t) -1) && (rgid != current->gid) &&
886 		    (rgid != current->egid) && (rgid != current->sgid))
887 			return -EPERM;
888 		if ((egid != (gid_t) -1) && (egid != current->gid) &&
889 		    (egid != current->egid) && (egid != current->sgid))
890 			return -EPERM;
891 		if ((sgid != (gid_t) -1) && (sgid != current->gid) &&
892 		    (sgid != current->egid) && (sgid != current->sgid))
893 			return -EPERM;
894 	}
895 	if (egid != (gid_t) -1) {
896 		if (egid != current->egid)
897 		{
898 			current->mm->dumpable = suid_dumpable;
899 			smp_wmb();
900 		}
901 		current->egid = egid;
902 	}
903 	current->fsgid = current->egid;
904 	if (rgid != (gid_t) -1)
905 		current->gid = rgid;
906 	if (sgid != (gid_t) -1)
907 		current->sgid = sgid;
908 
909 	key_fsgid_changed(current);
910 	proc_id_connector(current, PROC_EVENT_GID);
911 	return 0;
912 }
913 
914 asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __user *sgid)
915 {
916 	int retval;
917 
918 	if (!(retval = put_user(current->gid, rgid)) &&
919 	    !(retval = put_user(current->egid, egid)))
920 		retval = put_user(current->sgid, sgid);
921 
922 	return retval;
923 }
924 
925 
926 /*
927  * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This
928  * is used for "access()" and for the NFS daemon (letting nfsd stay at
929  * whatever uid it wants to). It normally shadows "euid", except when
930  * explicitly set by setfsuid() or for access..
931  */
932 asmlinkage long sys_setfsuid(uid_t uid)
933 {
934 	int old_fsuid;
935 
936 	old_fsuid = current->fsuid;
937 	if (security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS))
938 		return old_fsuid;
939 
940 	if (uid == current->uid || uid == current->euid ||
941 	    uid == current->suid || uid == current->fsuid ||
942 	    capable(CAP_SETUID))
943 	{
944 		if (uid != old_fsuid)
945 		{
946 			current->mm->dumpable = suid_dumpable;
947 			smp_wmb();
948 		}
949 		current->fsuid = uid;
950 	}
951 
952 	key_fsuid_changed(current);
953 	proc_id_connector(current, PROC_EVENT_UID);
954 
955 	security_task_post_setuid(old_fsuid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS);
956 
957 	return old_fsuid;
958 }
959 
960 /*
961  * Samma p� svenska..
962  */
963 asmlinkage long sys_setfsgid(gid_t gid)
964 {
965 	int old_fsgid;
966 
967 	old_fsgid = current->fsgid;
968 	if (security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_FS))
969 		return old_fsgid;
970 
971 	if (gid == current->gid || gid == current->egid ||
972 	    gid == current->sgid || gid == current->fsgid ||
973 	    capable(CAP_SETGID))
974 	{
975 		if (gid != old_fsgid)
976 		{
977 			current->mm->dumpable = suid_dumpable;
978 			smp_wmb();
979 		}
980 		current->fsgid = gid;
981 		key_fsgid_changed(current);
982 		proc_id_connector(current, PROC_EVENT_GID);
983 	}
984 	return old_fsgid;
985 }
986 
987 asmlinkage long sys_times(struct tms __user * tbuf)
988 {
989 	/*
990 	 *	In the SMP world we might just be unlucky and have one of
991 	 *	the times increment as we use it. Since the value is an
992 	 *	atomically safe type this is just fine. Conceptually its
993 	 *	as if the syscall took an instant longer to occur.
994 	 */
995 	if (tbuf) {
996 		struct tms tmp;
997 		cputime_t utime, stime, cutime, cstime;
998 
999 #ifdef CONFIG_SMP
1000 		if (thread_group_empty(current)) {
1001 			/*
1002 			 * Single thread case without the use of any locks.
1003 			 *
1004 			 * We may race with release_task if two threads are
1005 			 * executing. However, release task first adds up the
1006 			 * counters (__exit_signal) before  removing the task
1007 			 * from the process tasklist (__unhash_process).
1008 			 * __exit_signal also acquires and releases the
1009 			 * siglock which results in the proper memory ordering
1010 			 * so that the list modifications are always visible
1011 			 * after the counters have been updated.
1012 			 *
1013 			 * If the counters have been updated by the second thread
1014 			 * but the thread has not yet been removed from the list
1015 			 * then the other branch will be executing which will
1016 			 * block on tasklist_lock until the exit handling of the
1017 			 * other task is finished.
1018 			 *
1019 			 * This also implies that the sighand->siglock cannot
1020 			 * be held by another processor. So we can also
1021 			 * skip acquiring that lock.
1022 			 */
1023 			utime = cputime_add(current->signal->utime, current->utime);
1024 			stime = cputime_add(current->signal->utime, current->stime);
1025 			cutime = current->signal->cutime;
1026 			cstime = current->signal->cstime;
1027 		} else
1028 #endif
1029 		{
1030 
1031 			/* Process with multiple threads */
1032 			struct task_struct *tsk = current;
1033 			struct task_struct *t;
1034 
1035 			read_lock(&tasklist_lock);
1036 			utime = tsk->signal->utime;
1037 			stime = tsk->signal->stime;
1038 			t = tsk;
1039 			do {
1040 				utime = cputime_add(utime, t->utime);
1041 				stime = cputime_add(stime, t->stime);
1042 				t = next_thread(t);
1043 			} while (t != tsk);
1044 
1045 			/*
1046 			 * While we have tasklist_lock read-locked, no dying thread
1047 			 * can be updating current->signal->[us]time.  Instead,
1048 			 * we got their counts included in the live thread loop.
1049 			 * However, another thread can come in right now and
1050 			 * do a wait call that updates current->signal->c[us]time.
1051 			 * To make sure we always see that pair updated atomically,
1052 			 * we take the siglock around fetching them.
1053 			 */
1054 			spin_lock_irq(&tsk->sighand->siglock);
1055 			cutime = tsk->signal->cutime;
1056 			cstime = tsk->signal->cstime;
1057 			spin_unlock_irq(&tsk->sighand->siglock);
1058 			read_unlock(&tasklist_lock);
1059 		}
1060 		tmp.tms_utime = cputime_to_clock_t(utime);
1061 		tmp.tms_stime = cputime_to_clock_t(stime);
1062 		tmp.tms_cutime = cputime_to_clock_t(cutime);
1063 		tmp.tms_cstime = cputime_to_clock_t(cstime);
1064 		if (copy_to_user(tbuf, &tmp, sizeof(struct tms)))
1065 			return -EFAULT;
1066 	}
1067 	return (long) jiffies_64_to_clock_t(get_jiffies_64());
1068 }
1069 
1070 /*
1071  * This needs some heavy checking ...
1072  * I just haven't the stomach for it. I also don't fully
1073  * understand sessions/pgrp etc. Let somebody who does explain it.
1074  *
1075  * OK, I think I have the protection semantics right.... this is really
1076  * only important on a multi-user system anyway, to make sure one user
1077  * can't send a signal to a process owned by another.  -TYT, 12/12/91
1078  *
1079  * Auch. Had to add the 'did_exec' flag to conform completely to POSIX.
1080  * LBT 04.03.94
1081  */
1082 
1083 asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
1084 {
1085 	struct task_struct *p;
1086 	int err = -EINVAL;
1087 
1088 	if (!pid)
1089 		pid = current->pid;
1090 	if (!pgid)
1091 		pgid = pid;
1092 	if (pgid < 0)
1093 		return -EINVAL;
1094 
1095 	/* From this point forward we keep holding onto the tasklist lock
1096 	 * so that our parent does not change from under us. -DaveM
1097 	 */
1098 	write_lock_irq(&tasklist_lock);
1099 
1100 	err = -ESRCH;
1101 	p = find_task_by_pid(pid);
1102 	if (!p)
1103 		goto out;
1104 
1105 	err = -EINVAL;
1106 	if (!thread_group_leader(p))
1107 		goto out;
1108 
1109 	if (p->parent == current || p->real_parent == current) {
1110 		err = -EPERM;
1111 		if (p->signal->session != current->signal->session)
1112 			goto out;
1113 		err = -EACCES;
1114 		if (p->did_exec)
1115 			goto out;
1116 	} else {
1117 		err = -ESRCH;
1118 		if (p != current)
1119 			goto out;
1120 	}
1121 
1122 	err = -EPERM;
1123 	if (p->signal->leader)
1124 		goto out;
1125 
1126 	if (pgid != pid) {
1127 		struct task_struct *p;
1128 
1129 		do_each_task_pid(pgid, PIDTYPE_PGID, p) {
1130 			if (p->signal->session == current->signal->session)
1131 				goto ok_pgid;
1132 		} while_each_task_pid(pgid, PIDTYPE_PGID, p);
1133 		goto out;
1134 	}
1135 
1136 ok_pgid:
1137 	err = security_task_setpgid(p, pgid);
1138 	if (err)
1139 		goto out;
1140 
1141 	if (process_group(p) != pgid) {
1142 		detach_pid(p, PIDTYPE_PGID);
1143 		p->signal->pgrp = pgid;
1144 		attach_pid(p, PIDTYPE_PGID, pgid);
1145 	}
1146 
1147 	err = 0;
1148 out:
1149 	/* All paths lead to here, thus we are safe. -DaveM */
1150 	write_unlock_irq(&tasklist_lock);
1151 	return err;
1152 }
1153 
1154 asmlinkage long sys_getpgid(pid_t pid)
1155 {
1156 	if (!pid) {
1157 		return process_group(current);
1158 	} else {
1159 		int retval;
1160 		struct task_struct *p;
1161 
1162 		read_lock(&tasklist_lock);
1163 		p = find_task_by_pid(pid);
1164 
1165 		retval = -ESRCH;
1166 		if (p) {
1167 			retval = security_task_getpgid(p);
1168 			if (!retval)
1169 				retval = process_group(p);
1170 		}
1171 		read_unlock(&tasklist_lock);
1172 		return retval;
1173 	}
1174 }
1175 
1176 #ifdef __ARCH_WANT_SYS_GETPGRP
1177 
1178 asmlinkage long sys_getpgrp(void)
1179 {
1180 	/* SMP - assuming writes are word atomic this is fine */
1181 	return process_group(current);
1182 }
1183 
1184 #endif
1185 
1186 asmlinkage long sys_getsid(pid_t pid)
1187 {
1188 	if (!pid) {
1189 		return current->signal->session;
1190 	} else {
1191 		int retval;
1192 		struct task_struct *p;
1193 
1194 		read_lock(&tasklist_lock);
1195 		p = find_task_by_pid(pid);
1196 
1197 		retval = -ESRCH;
1198 		if(p) {
1199 			retval = security_task_getsid(p);
1200 			if (!retval)
1201 				retval = p->signal->session;
1202 		}
1203 		read_unlock(&tasklist_lock);
1204 		return retval;
1205 	}
1206 }
1207 
1208 asmlinkage long sys_setsid(void)
1209 {
1210 	struct pid *pid;
1211 	int err = -EPERM;
1212 
1213 	if (!thread_group_leader(current))
1214 		return -EINVAL;
1215 
1216 	down(&tty_sem);
1217 	write_lock_irq(&tasklist_lock);
1218 
1219 	pid = find_pid(PIDTYPE_PGID, current->pid);
1220 	if (pid)
1221 		goto out;
1222 
1223 	current->signal->leader = 1;
1224 	__set_special_pids(current->pid, current->pid);
1225 	current->signal->tty = NULL;
1226 	current->signal->tty_old_pgrp = 0;
1227 	err = process_group(current);
1228 out:
1229 	write_unlock_irq(&tasklist_lock);
1230 	up(&tty_sem);
1231 	return err;
1232 }
1233 
1234 /*
1235  * Supplementary group IDs
1236  */
1237 
1238 /* init to 2 - one for init_task, one to ensure it is never freed */
1239 struct group_info init_groups = { .usage = ATOMIC_INIT(2) };
1240 
1241 struct group_info *groups_alloc(int gidsetsize)
1242 {
1243 	struct group_info *group_info;
1244 	int nblocks;
1245 	int i;
1246 
1247 	nblocks = (gidsetsize + NGROUPS_PER_BLOCK - 1) / NGROUPS_PER_BLOCK;
1248 	/* Make sure we always allocate at least one indirect block pointer */
1249 	nblocks = nblocks ? : 1;
1250 	group_info = kmalloc(sizeof(*group_info) + nblocks*sizeof(gid_t *), GFP_USER);
1251 	if (!group_info)
1252 		return NULL;
1253 	group_info->ngroups = gidsetsize;
1254 	group_info->nblocks = nblocks;
1255 	atomic_set(&group_info->usage, 1);
1256 
1257 	if (gidsetsize <= NGROUPS_SMALL) {
1258 		group_info->blocks[0] = group_info->small_block;
1259 	} else {
1260 		for (i = 0; i < nblocks; i++) {
1261 			gid_t *b;
1262 			b = (void *)__get_free_page(GFP_USER);
1263 			if (!b)
1264 				goto out_undo_partial_alloc;
1265 			group_info->blocks[i] = b;
1266 		}
1267 	}
1268 	return group_info;
1269 
1270 out_undo_partial_alloc:
1271 	while (--i >= 0) {
1272 		free_page((unsigned long)group_info->blocks[i]);
1273 	}
1274 	kfree(group_info);
1275 	return NULL;
1276 }
1277 
1278 EXPORT_SYMBOL(groups_alloc);
1279 
1280 void groups_free(struct group_info *group_info)
1281 {
1282 	if (group_info->blocks[0] != group_info->small_block) {
1283 		int i;
1284 		for (i = 0; i < group_info->nblocks; i++)
1285 			free_page((unsigned long)group_info->blocks[i]);
1286 	}
1287 	kfree(group_info);
1288 }
1289 
1290 EXPORT_SYMBOL(groups_free);
1291 
1292 /* export the group_info to a user-space array */
1293 static int groups_to_user(gid_t __user *grouplist,
1294     struct group_info *group_info)
1295 {
1296 	int i;
1297 	int count = group_info->ngroups;
1298 
1299 	for (i = 0; i < group_info->nblocks; i++) {
1300 		int cp_count = min(NGROUPS_PER_BLOCK, count);
1301 		int off = i * NGROUPS_PER_BLOCK;
1302 		int len = cp_count * sizeof(*grouplist);
1303 
1304 		if (copy_to_user(grouplist+off, group_info->blocks[i], len))
1305 			return -EFAULT;
1306 
1307 		count -= cp_count;
1308 	}
1309 	return 0;
1310 }
1311 
1312 /* fill a group_info from a user-space array - it must be allocated already */
1313 static int groups_from_user(struct group_info *group_info,
1314     gid_t __user *grouplist)
1315  {
1316 	int i;
1317 	int count = group_info->ngroups;
1318 
1319 	for (i = 0; i < group_info->nblocks; i++) {
1320 		int cp_count = min(NGROUPS_PER_BLOCK, count);
1321 		int off = i * NGROUPS_PER_BLOCK;
1322 		int len = cp_count * sizeof(*grouplist);
1323 
1324 		if (copy_from_user(group_info->blocks[i], grouplist+off, len))
1325 			return -EFAULT;
1326 
1327 		count -= cp_count;
1328 	}
1329 	return 0;
1330 }
1331 
1332 /* a simple Shell sort */
1333 static void groups_sort(struct group_info *group_info)
1334 {
1335 	int base, max, stride;
1336 	int gidsetsize = group_info->ngroups;
1337 
1338 	for (stride = 1; stride < gidsetsize; stride = 3 * stride + 1)
1339 		; /* nothing */
1340 	stride /= 3;
1341 
1342 	while (stride) {
1343 		max = gidsetsize - stride;
1344 		for (base = 0; base < max; base++) {
1345 			int left = base;
1346 			int right = left + stride;
1347 			gid_t tmp = GROUP_AT(group_info, right);
1348 
1349 			while (left >= 0 && GROUP_AT(group_info, left) > tmp) {
1350 				GROUP_AT(group_info, right) =
1351 				    GROUP_AT(group_info, left);
1352 				right = left;
1353 				left -= stride;
1354 			}
1355 			GROUP_AT(group_info, right) = tmp;
1356 		}
1357 		stride /= 3;
1358 	}
1359 }
1360 
1361 /* a simple bsearch */
1362 int groups_search(struct group_info *group_info, gid_t grp)
1363 {
1364 	int left, right;
1365 
1366 	if (!group_info)
1367 		return 0;
1368 
1369 	left = 0;
1370 	right = group_info->ngroups;
1371 	while (left < right) {
1372 		int mid = (left+right)/2;
1373 		int cmp = grp - GROUP_AT(group_info, mid);
1374 		if (cmp > 0)
1375 			left = mid + 1;
1376 		else if (cmp < 0)
1377 			right = mid;
1378 		else
1379 			return 1;
1380 	}
1381 	return 0;
1382 }
1383 
1384 /* validate and set current->group_info */
1385 int set_current_groups(struct group_info *group_info)
1386 {
1387 	int retval;
1388 	struct group_info *old_info;
1389 
1390 	retval = security_task_setgroups(group_info);
1391 	if (retval)
1392 		return retval;
1393 
1394 	groups_sort(group_info);
1395 	get_group_info(group_info);
1396 
1397 	task_lock(current);
1398 	old_info = current->group_info;
1399 	current->group_info = group_info;
1400 	task_unlock(current);
1401 
1402 	put_group_info(old_info);
1403 
1404 	return 0;
1405 }
1406 
1407 EXPORT_SYMBOL(set_current_groups);
1408 
1409 asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist)
1410 {
1411 	int i = 0;
1412 
1413 	/*
1414 	 *	SMP: Nobody else can change our grouplist. Thus we are
1415 	 *	safe.
1416 	 */
1417 
1418 	if (gidsetsize < 0)
1419 		return -EINVAL;
1420 
1421 	/* no need to grab task_lock here; it cannot change */
1422 	get_group_info(current->group_info);
1423 	i = current->group_info->ngroups;
1424 	if (gidsetsize) {
1425 		if (i > gidsetsize) {
1426 			i = -EINVAL;
1427 			goto out;
1428 		}
1429 		if (groups_to_user(grouplist, current->group_info)) {
1430 			i = -EFAULT;
1431 			goto out;
1432 		}
1433 	}
1434 out:
1435 	put_group_info(current->group_info);
1436 	return i;
1437 }
1438 
1439 /*
1440  *	SMP: Our groups are copy-on-write. We can set them safely
1441  *	without another task interfering.
1442  */
1443 
1444 asmlinkage long sys_setgroups(int gidsetsize, gid_t __user *grouplist)
1445 {
1446 	struct group_info *group_info;
1447 	int retval;
1448 
1449 	if (!capable(CAP_SETGID))
1450 		return -EPERM;
1451 	if ((unsigned)gidsetsize > NGROUPS_MAX)
1452 		return -EINVAL;
1453 
1454 	group_info = groups_alloc(gidsetsize);
1455 	if (!group_info)
1456 		return -ENOMEM;
1457 	retval = groups_from_user(group_info, grouplist);
1458 	if (retval) {
1459 		put_group_info(group_info);
1460 		return retval;
1461 	}
1462 
1463 	retval = set_current_groups(group_info);
1464 	put_group_info(group_info);
1465 
1466 	return retval;
1467 }
1468 
1469 /*
1470  * Check whether we're fsgid/egid or in the supplemental group..
1471  */
1472 int in_group_p(gid_t grp)
1473 {
1474 	int retval = 1;
1475 	if (grp != current->fsgid) {
1476 		get_group_info(current->group_info);
1477 		retval = groups_search(current->group_info, grp);
1478 		put_group_info(current->group_info);
1479 	}
1480 	return retval;
1481 }
1482 
1483 EXPORT_SYMBOL(in_group_p);
1484 
1485 int in_egroup_p(gid_t grp)
1486 {
1487 	int retval = 1;
1488 	if (grp != current->egid) {
1489 		get_group_info(current->group_info);
1490 		retval = groups_search(current->group_info, grp);
1491 		put_group_info(current->group_info);
1492 	}
1493 	return retval;
1494 }
1495 
1496 EXPORT_SYMBOL(in_egroup_p);
1497 
1498 DECLARE_RWSEM(uts_sem);
1499 
1500 EXPORT_SYMBOL(uts_sem);
1501 
1502 asmlinkage long sys_newuname(struct new_utsname __user * name)
1503 {
1504 	int errno = 0;
1505 
1506 	down_read(&uts_sem);
1507 	if (copy_to_user(name,&system_utsname,sizeof *name))
1508 		errno = -EFAULT;
1509 	up_read(&uts_sem);
1510 	return errno;
1511 }
1512 
1513 asmlinkage long sys_sethostname(char __user *name, int len)
1514 {
1515 	int errno;
1516 	char tmp[__NEW_UTS_LEN];
1517 
1518 	if (!capable(CAP_SYS_ADMIN))
1519 		return -EPERM;
1520 	if (len < 0 || len > __NEW_UTS_LEN)
1521 		return -EINVAL;
1522 	down_write(&uts_sem);
1523 	errno = -EFAULT;
1524 	if (!copy_from_user(tmp, name, len)) {
1525 		memcpy(system_utsname.nodename, tmp, len);
1526 		system_utsname.nodename[len] = 0;
1527 		errno = 0;
1528 	}
1529 	up_write(&uts_sem);
1530 	return errno;
1531 }
1532 
1533 #ifdef __ARCH_WANT_SYS_GETHOSTNAME
1534 
1535 asmlinkage long sys_gethostname(char __user *name, int len)
1536 {
1537 	int i, errno;
1538 
1539 	if (len < 0)
1540 		return -EINVAL;
1541 	down_read(&uts_sem);
1542 	i = 1 + strlen(system_utsname.nodename);
1543 	if (i > len)
1544 		i = len;
1545 	errno = 0;
1546 	if (copy_to_user(name, system_utsname.nodename, i))
1547 		errno = -EFAULT;
1548 	up_read(&uts_sem);
1549 	return errno;
1550 }
1551 
1552 #endif
1553 
1554 /*
1555  * Only setdomainname; getdomainname can be implemented by calling
1556  * uname()
1557  */
1558 asmlinkage long sys_setdomainname(char __user *name, int len)
1559 {
1560 	int errno;
1561 	char tmp[__NEW_UTS_LEN];
1562 
1563 	if (!capable(CAP_SYS_ADMIN))
1564 		return -EPERM;
1565 	if (len < 0 || len > __NEW_UTS_LEN)
1566 		return -EINVAL;
1567 
1568 	down_write(&uts_sem);
1569 	errno = -EFAULT;
1570 	if (!copy_from_user(tmp, name, len)) {
1571 		memcpy(system_utsname.domainname, tmp, len);
1572 		system_utsname.domainname[len] = 0;
1573 		errno = 0;
1574 	}
1575 	up_write(&uts_sem);
1576 	return errno;
1577 }
1578 
1579 asmlinkage long sys_getrlimit(unsigned int resource, struct rlimit __user *rlim)
1580 {
1581 	if (resource >= RLIM_NLIMITS)
1582 		return -EINVAL;
1583 	else {
1584 		struct rlimit value;
1585 		task_lock(current->group_leader);
1586 		value = current->signal->rlim[resource];
1587 		task_unlock(current->group_leader);
1588 		return copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0;
1589 	}
1590 }
1591 
1592 #ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT
1593 
1594 /*
1595  *	Back compatibility for getrlimit. Needed for some apps.
1596  */
1597 
1598 asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *rlim)
1599 {
1600 	struct rlimit x;
1601 	if (resource >= RLIM_NLIMITS)
1602 		return -EINVAL;
1603 
1604 	task_lock(current->group_leader);
1605 	x = current->signal->rlim[resource];
1606 	task_unlock(current->group_leader);
1607 	if(x.rlim_cur > 0x7FFFFFFF)
1608 		x.rlim_cur = 0x7FFFFFFF;
1609 	if(x.rlim_max > 0x7FFFFFFF)
1610 		x.rlim_max = 0x7FFFFFFF;
1611 	return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0;
1612 }
1613 
1614 #endif
1615 
1616 asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim)
1617 {
1618 	struct rlimit new_rlim, *old_rlim;
1619 	int retval;
1620 
1621 	if (resource >= RLIM_NLIMITS)
1622 		return -EINVAL;
1623 	if(copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
1624 		return -EFAULT;
1625        if (new_rlim.rlim_cur > new_rlim.rlim_max)
1626                return -EINVAL;
1627 	old_rlim = current->signal->rlim + resource;
1628 	if ((new_rlim.rlim_max > old_rlim->rlim_max) &&
1629 	    !capable(CAP_SYS_RESOURCE))
1630 		return -EPERM;
1631 	if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > NR_OPEN)
1632 			return -EPERM;
1633 
1634 	retval = security_task_setrlimit(resource, &new_rlim);
1635 	if (retval)
1636 		return retval;
1637 
1638 	task_lock(current->group_leader);
1639 	*old_rlim = new_rlim;
1640 	task_unlock(current->group_leader);
1641 
1642 	if (resource == RLIMIT_CPU && new_rlim.rlim_cur != RLIM_INFINITY &&
1643 	    (cputime_eq(current->signal->it_prof_expires, cputime_zero) ||
1644 	     new_rlim.rlim_cur <= cputime_to_secs(
1645 		     current->signal->it_prof_expires))) {
1646 		cputime_t cputime = secs_to_cputime(new_rlim.rlim_cur);
1647 		read_lock(&tasklist_lock);
1648 		spin_lock_irq(&current->sighand->siglock);
1649 		set_process_cpu_timer(current, CPUCLOCK_PROF,
1650 				      &cputime, NULL);
1651 		spin_unlock_irq(&current->sighand->siglock);
1652 		read_unlock(&tasklist_lock);
1653 	}
1654 
1655 	return 0;
1656 }
1657 
1658 /*
1659  * It would make sense to put struct rusage in the task_struct,
1660  * except that would make the task_struct be *really big*.  After
1661  * task_struct gets moved into malloc'ed memory, it would
1662  * make sense to do this.  It will make moving the rest of the information
1663  * a lot simpler!  (Which we're not doing right now because we're not
1664  * measuring them yet).
1665  *
1666  * This expects to be called with tasklist_lock read-locked or better,
1667  * and the siglock not locked.  It may momentarily take the siglock.
1668  *
1669  * When sampling multiple threads for RUSAGE_SELF, under SMP we might have
1670  * races with threads incrementing their own counters.  But since word
1671  * reads are atomic, we either get new values or old values and we don't
1672  * care which for the sums.  We always take the siglock to protect reading
1673  * the c* fields from p->signal from races with exit.c updating those
1674  * fields when reaping, so a sample either gets all the additions of a
1675  * given child after it's reaped, or none so this sample is before reaping.
1676  */
1677 
1678 static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
1679 {
1680 	struct task_struct *t;
1681 	unsigned long flags;
1682 	cputime_t utime, stime;
1683 
1684 	memset((char *) r, 0, sizeof *r);
1685 
1686 	if (unlikely(!p->signal))
1687 		return;
1688 
1689 	switch (who) {
1690 		case RUSAGE_CHILDREN:
1691 			spin_lock_irqsave(&p->sighand->siglock, flags);
1692 			utime = p->signal->cutime;
1693 			stime = p->signal->cstime;
1694 			r->ru_nvcsw = p->signal->cnvcsw;
1695 			r->ru_nivcsw = p->signal->cnivcsw;
1696 			r->ru_minflt = p->signal->cmin_flt;
1697 			r->ru_majflt = p->signal->cmaj_flt;
1698 			spin_unlock_irqrestore(&p->sighand->siglock, flags);
1699 			cputime_to_timeval(utime, &r->ru_utime);
1700 			cputime_to_timeval(stime, &r->ru_stime);
1701 			break;
1702 		case RUSAGE_SELF:
1703 			spin_lock_irqsave(&p->sighand->siglock, flags);
1704 			utime = stime = cputime_zero;
1705 			goto sum_group;
1706 		case RUSAGE_BOTH:
1707 			spin_lock_irqsave(&p->sighand->siglock, flags);
1708 			utime = p->signal->cutime;
1709 			stime = p->signal->cstime;
1710 			r->ru_nvcsw = p->signal->cnvcsw;
1711 			r->ru_nivcsw = p->signal->cnivcsw;
1712 			r->ru_minflt = p->signal->cmin_flt;
1713 			r->ru_majflt = p->signal->cmaj_flt;
1714 		sum_group:
1715 			utime = cputime_add(utime, p->signal->utime);
1716 			stime = cputime_add(stime, p->signal->stime);
1717 			r->ru_nvcsw += p->signal->nvcsw;
1718 			r->ru_nivcsw += p->signal->nivcsw;
1719 			r->ru_minflt += p->signal->min_flt;
1720 			r->ru_majflt += p->signal->maj_flt;
1721 			t = p;
1722 			do {
1723 				utime = cputime_add(utime, t->utime);
1724 				stime = cputime_add(stime, t->stime);
1725 				r->ru_nvcsw += t->nvcsw;
1726 				r->ru_nivcsw += t->nivcsw;
1727 				r->ru_minflt += t->min_flt;
1728 				r->ru_majflt += t->maj_flt;
1729 				t = next_thread(t);
1730 			} while (t != p);
1731 			spin_unlock_irqrestore(&p->sighand->siglock, flags);
1732 			cputime_to_timeval(utime, &r->ru_utime);
1733 			cputime_to_timeval(stime, &r->ru_stime);
1734 			break;
1735 		default:
1736 			BUG();
1737 	}
1738 }
1739 
1740 int getrusage(struct task_struct *p, int who, struct rusage __user *ru)
1741 {
1742 	struct rusage r;
1743 	read_lock(&tasklist_lock);
1744 	k_getrusage(p, who, &r);
1745 	read_unlock(&tasklist_lock);
1746 	return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0;
1747 }
1748 
1749 asmlinkage long sys_getrusage(int who, struct rusage __user *ru)
1750 {
1751 	if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN)
1752 		return -EINVAL;
1753 	return getrusage(current, who, ru);
1754 }
1755 
1756 asmlinkage long sys_umask(int mask)
1757 {
1758 	mask = xchg(&current->fs->umask, mask & S_IRWXUGO);
1759 	return mask;
1760 }
1761 
1762 asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
1763 			  unsigned long arg4, unsigned long arg5)
1764 {
1765 	long error;
1766 
1767 	error = security_task_prctl(option, arg2, arg3, arg4, arg5);
1768 	if (error)
1769 		return error;
1770 
1771 	switch (option) {
1772 		case PR_SET_PDEATHSIG:
1773 			if (!valid_signal(arg2)) {
1774 				error = -EINVAL;
1775 				break;
1776 			}
1777 			current->pdeath_signal = arg2;
1778 			break;
1779 		case PR_GET_PDEATHSIG:
1780 			error = put_user(current->pdeath_signal, (int __user *)arg2);
1781 			break;
1782 		case PR_GET_DUMPABLE:
1783 			error = current->mm->dumpable;
1784 			break;
1785 		case PR_SET_DUMPABLE:
1786 			if (arg2 < 0 || arg2 > 2) {
1787 				error = -EINVAL;
1788 				break;
1789 			}
1790 			current->mm->dumpable = arg2;
1791 			break;
1792 
1793 		case PR_SET_UNALIGN:
1794 			error = SET_UNALIGN_CTL(current, arg2);
1795 			break;
1796 		case PR_GET_UNALIGN:
1797 			error = GET_UNALIGN_CTL(current, arg2);
1798 			break;
1799 		case PR_SET_FPEMU:
1800 			error = SET_FPEMU_CTL(current, arg2);
1801 			break;
1802 		case PR_GET_FPEMU:
1803 			error = GET_FPEMU_CTL(current, arg2);
1804 			break;
1805 		case PR_SET_FPEXC:
1806 			error = SET_FPEXC_CTL(current, arg2);
1807 			break;
1808 		case PR_GET_FPEXC:
1809 			error = GET_FPEXC_CTL(current, arg2);
1810 			break;
1811 		case PR_GET_TIMING:
1812 			error = PR_TIMING_STATISTICAL;
1813 			break;
1814 		case PR_SET_TIMING:
1815 			if (arg2 == PR_TIMING_STATISTICAL)
1816 				error = 0;
1817 			else
1818 				error = -EINVAL;
1819 			break;
1820 
1821 		case PR_GET_KEEPCAPS:
1822 			if (current->keep_capabilities)
1823 				error = 1;
1824 			break;
1825 		case PR_SET_KEEPCAPS:
1826 			if (arg2 != 0 && arg2 != 1) {
1827 				error = -EINVAL;
1828 				break;
1829 			}
1830 			current->keep_capabilities = arg2;
1831 			break;
1832 		case PR_SET_NAME: {
1833 			struct task_struct *me = current;
1834 			unsigned char ncomm[sizeof(me->comm)];
1835 
1836 			ncomm[sizeof(me->comm)-1] = 0;
1837 			if (strncpy_from_user(ncomm, (char __user *)arg2,
1838 						sizeof(me->comm)-1) < 0)
1839 				return -EFAULT;
1840 			set_task_comm(me, ncomm);
1841 			return 0;
1842 		}
1843 		case PR_GET_NAME: {
1844 			struct task_struct *me = current;
1845 			unsigned char tcomm[sizeof(me->comm)];
1846 
1847 			get_task_comm(tcomm, me);
1848 			if (copy_to_user((char __user *)arg2, tcomm, sizeof(tcomm)))
1849 				return -EFAULT;
1850 			return 0;
1851 		}
1852 		default:
1853 			error = -EINVAL;
1854 			break;
1855 	}
1856 	return error;
1857 }
1858