xref: /openbmc/linux/arch/x86/kernel/process_64.c (revision 732a675a)
1 /*
2  *  Copyright (C) 1995  Linus Torvalds
3  *
4  *  Pentium III FXSR, SSE support
5  *	Gareth Hughes <gareth@valinux.com>, May 2000
6  *
7  *  X86-64 port
8  *	Andi Kleen.
9  *
10  *	CPU hotplug support - ashok.raj@intel.com
11  */
12 
13 /*
14  * This file handles the architecture-dependent parts of process handling..
15  */
16 
17 #include <stdarg.h>
18 
19 #include <linux/cpu.h>
20 #include <linux/errno.h>
21 #include <linux/sched.h>
22 #include <linux/fs.h>
23 #include <linux/kernel.h>
24 #include <linux/mm.h>
25 #include <linux/elfcore.h>
26 #include <linux/smp.h>
27 #include <linux/slab.h>
28 #include <linux/user.h>
29 #include <linux/interrupt.h>
30 #include <linux/utsname.h>
31 #include <linux/delay.h>
32 #include <linux/module.h>
33 #include <linux/ptrace.h>
34 #include <linux/random.h>
35 #include <linux/notifier.h>
36 #include <linux/kprobes.h>
37 #include <linux/kdebug.h>
38 #include <linux/tick.h>
39 #include <linux/prctl.h>
40 
41 #include <asm/uaccess.h>
42 #include <asm/pgtable.h>
43 #include <asm/system.h>
44 #include <asm/io.h>
45 #include <asm/processor.h>
46 #include <asm/i387.h>
47 #include <asm/mmu_context.h>
48 #include <asm/pda.h>
49 #include <asm/prctl.h>
50 #include <asm/desc.h>
51 #include <asm/proto.h>
52 #include <asm/ia32.h>
53 #include <asm/idle.h>
54 
55 asmlinkage extern void ret_from_fork(void);
56 
57 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
58 
59 unsigned long boot_option_idle_override = 0;
60 EXPORT_SYMBOL(boot_option_idle_override);
61 
62 /*
63  * Powermanagement idle function, if any..
64  */
65 void (*pm_idle)(void);
66 EXPORT_SYMBOL(pm_idle);
67 
68 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
69 
70 void idle_notifier_register(struct notifier_block *n)
71 {
72 	atomic_notifier_chain_register(&idle_notifier, n);
73 }
74 
75 void enter_idle(void)
76 {
77 	write_pda(isidle, 1);
78 	atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
79 }
80 
81 static void __exit_idle(void)
82 {
83 	if (test_and_clear_bit_pda(0, isidle) == 0)
84 		return;
85 	atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
86 }
87 
88 /* Called from interrupts to signify idle end */
89 void exit_idle(void)
90 {
91 	/* idle loop has pid 0 */
92 	if (current->pid)
93 		return;
94 	__exit_idle();
95 }
96 
97 /*
98  * We use this if we don't have any better
99  * idle routine..
100  */
101 void default_idle(void)
102 {
103 	current_thread_info()->status &= ~TS_POLLING;
104 	/*
105 	 * TS_POLLING-cleared state must be visible before we
106 	 * test NEED_RESCHED:
107 	 */
108 	smp_mb();
109 	if (!need_resched())
110 		safe_halt();	/* enables interrupts racelessly */
111 	else
112 		local_irq_enable();
113 	current_thread_info()->status |= TS_POLLING;
114 }
115 
116 #ifdef CONFIG_HOTPLUG_CPU
117 DECLARE_PER_CPU(int, cpu_state);
118 
119 #include <asm/nmi.h>
120 /* We halt the CPU with physical CPU hotplug */
121 static inline void play_dead(void)
122 {
123 	idle_task_exit();
124 	wbinvd();
125 	mb();
126 	/* Ack it */
127 	__get_cpu_var(cpu_state) = CPU_DEAD;
128 
129 	local_irq_disable();
130 	while (1)
131 		halt();
132 }
133 #else
134 static inline void play_dead(void)
135 {
136 	BUG();
137 }
138 #endif /* CONFIG_HOTPLUG_CPU */
139 
140 /*
141  * The idle thread. There's no useful work to be
142  * done, so just try to conserve power and have a
143  * low exit latency (ie sit in a loop waiting for
144  * somebody to say that they'd like to reschedule)
145  */
146 void cpu_idle(void)
147 {
148 	current_thread_info()->status |= TS_POLLING;
149 	/* endless idle loop with no priority at all */
150 	while (1) {
151 		tick_nohz_stop_sched_tick();
152 		while (!need_resched()) {
153 			void (*idle)(void);
154 
155 			rmb();
156 			idle = pm_idle;
157 			if (!idle)
158 				idle = default_idle;
159 			if (cpu_is_offline(smp_processor_id()))
160 				play_dead();
161 			/*
162 			 * Idle routines should keep interrupts disabled
163 			 * from here on, until they go to idle.
164 			 * Otherwise, idle callbacks can misfire.
165 			 */
166 			local_irq_disable();
167 			enter_idle();
168 			idle();
169 			/* In many cases the interrupt that ended idle
170 			   has already called exit_idle. But some idle
171 			   loops can be woken up without interrupt. */
172 			__exit_idle();
173 		}
174 
175 		tick_nohz_restart_sched_tick();
176 		preempt_enable_no_resched();
177 		schedule();
178 		preempt_disable();
179 	}
180 }
181 
182 /* Prints also some state that isn't saved in the pt_regs */
183 void __show_regs(struct pt_regs * regs)
184 {
185 	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
186 	unsigned long d0, d1, d2, d3, d6, d7;
187 	unsigned int fsindex, gsindex;
188 	unsigned int ds, cs, es;
189 
190 	printk("\n");
191 	print_modules();
192 	printk("Pid: %d, comm: %.20s %s %s %.*s\n",
193 		current->pid, current->comm, print_tainted(),
194 		init_utsname()->release,
195 		(int)strcspn(init_utsname()->version, " "),
196 		init_utsname()->version);
197 	printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
198 	printk_address(regs->ip, 1);
199 	printk("RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->sp,
200 		regs->flags);
201 	printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
202 	       regs->ax, regs->bx, regs->cx);
203 	printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
204 	       regs->dx, regs->si, regs->di);
205 	printk("RBP: %016lx R08: %016lx R09: %016lx\n",
206 	       regs->bp, regs->r8, regs->r9);
207 	printk("R10: %016lx R11: %016lx R12: %016lx\n",
208 	       regs->r10, regs->r11, regs->r12);
209 	printk("R13: %016lx R14: %016lx R15: %016lx\n",
210 	       regs->r13, regs->r14, regs->r15);
211 
212 	asm("movl %%ds,%0" : "=r" (ds));
213 	asm("movl %%cs,%0" : "=r" (cs));
214 	asm("movl %%es,%0" : "=r" (es));
215 	asm("movl %%fs,%0" : "=r" (fsindex));
216 	asm("movl %%gs,%0" : "=r" (gsindex));
217 
218 	rdmsrl(MSR_FS_BASE, fs);
219 	rdmsrl(MSR_GS_BASE, gs);
220 	rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
221 
222 	cr0 = read_cr0();
223 	cr2 = read_cr2();
224 	cr3 = read_cr3();
225 	cr4 = read_cr4();
226 
227 	printk("FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
228 	       fs,fsindex,gs,gsindex,shadowgs);
229 	printk("CS:  %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
230 	printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
231 
232 	get_debugreg(d0, 0);
233 	get_debugreg(d1, 1);
234 	get_debugreg(d2, 2);
235 	printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
236 	get_debugreg(d3, 3);
237 	get_debugreg(d6, 6);
238 	get_debugreg(d7, 7);
239 	printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
240 }
241 
242 void show_regs(struct pt_regs *regs)
243 {
244 	printk("CPU %d:", smp_processor_id());
245 	__show_regs(regs);
246 	show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
247 }
248 
249 /*
250  * Free current thread data structures etc..
251  */
252 void exit_thread(void)
253 {
254 	struct task_struct *me = current;
255 	struct thread_struct *t = &me->thread;
256 
257 	if (me->thread.io_bitmap_ptr) {
258 		struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
259 
260 		kfree(t->io_bitmap_ptr);
261 		t->io_bitmap_ptr = NULL;
262 		clear_thread_flag(TIF_IO_BITMAP);
263 		/*
264 		 * Careful, clear this in the TSS too:
265 		 */
266 		memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
267 		t->io_bitmap_max = 0;
268 		put_cpu();
269 	}
270 }
271 
272 void flush_thread(void)
273 {
274 	struct task_struct *tsk = current;
275 
276 	if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
277 		clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
278 		if (test_tsk_thread_flag(tsk, TIF_IA32)) {
279 			clear_tsk_thread_flag(tsk, TIF_IA32);
280 		} else {
281 			set_tsk_thread_flag(tsk, TIF_IA32);
282 			current_thread_info()->status |= TS_COMPAT;
283 		}
284 	}
285 	clear_tsk_thread_flag(tsk, TIF_DEBUG);
286 
287 	tsk->thread.debugreg0 = 0;
288 	tsk->thread.debugreg1 = 0;
289 	tsk->thread.debugreg2 = 0;
290 	tsk->thread.debugreg3 = 0;
291 	tsk->thread.debugreg6 = 0;
292 	tsk->thread.debugreg7 = 0;
293 	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
294 	/*
295 	 * Forget coprocessor state..
296 	 */
297 	clear_fpu(tsk);
298 	clear_used_math();
299 }
300 
301 void release_thread(struct task_struct *dead_task)
302 {
303 	if (dead_task->mm) {
304 		if (dead_task->mm->context.size) {
305 			printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
306 					dead_task->comm,
307 					dead_task->mm->context.ldt,
308 					dead_task->mm->context.size);
309 			BUG();
310 		}
311 	}
312 }
313 
314 static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
315 {
316 	struct user_desc ud = {
317 		.base_addr = addr,
318 		.limit = 0xfffff,
319 		.seg_32bit = 1,
320 		.limit_in_pages = 1,
321 		.useable = 1,
322 	};
323 	struct desc_struct *desc = t->thread.tls_array;
324 	desc += tls;
325 	fill_ldt(desc, &ud);
326 }
327 
328 static inline u32 read_32bit_tls(struct task_struct *t, int tls)
329 {
330 	return get_desc_base(&t->thread.tls_array[tls]);
331 }
332 
333 /*
334  * This gets called before we allocate a new thread and copy
335  * the current task into it.
336  */
337 void prepare_to_copy(struct task_struct *tsk)
338 {
339 	unlazy_fpu(tsk);
340 }
341 
342 int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
343 		unsigned long unused,
344 	struct task_struct * p, struct pt_regs * regs)
345 {
346 	int err;
347 	struct pt_regs * childregs;
348 	struct task_struct *me = current;
349 
350 	childregs = ((struct pt_regs *)
351 			(THREAD_SIZE + task_stack_page(p))) - 1;
352 	*childregs = *regs;
353 
354 	childregs->ax = 0;
355 	childregs->sp = sp;
356 	if (sp == ~0UL)
357 		childregs->sp = (unsigned long)childregs;
358 
359 	p->thread.sp = (unsigned long) childregs;
360 	p->thread.sp0 = (unsigned long) (childregs+1);
361 	p->thread.usersp = me->thread.usersp;
362 
363 	set_tsk_thread_flag(p, TIF_FORK);
364 
365 	p->thread.fs = me->thread.fs;
366 	p->thread.gs = me->thread.gs;
367 
368 	asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
369 	asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
370 	asm("mov %%es,%0" : "=m" (p->thread.es));
371 	asm("mov %%ds,%0" : "=m" (p->thread.ds));
372 
373 	if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
374 		p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
375 		if (!p->thread.io_bitmap_ptr) {
376 			p->thread.io_bitmap_max = 0;
377 			return -ENOMEM;
378 		}
379 		memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
380 				IO_BITMAP_BYTES);
381 		set_tsk_thread_flag(p, TIF_IO_BITMAP);
382 	}
383 
384 	/*
385 	 * Set a new TLS for the child thread?
386 	 */
387 	if (clone_flags & CLONE_SETTLS) {
388 #ifdef CONFIG_IA32_EMULATION
389 		if (test_thread_flag(TIF_IA32))
390 			err = do_set_thread_area(p, -1,
391 				(struct user_desc __user *)childregs->si, 0);
392 		else
393 #endif
394 			err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
395 		if (err)
396 			goto out;
397 	}
398 	err = 0;
399 out:
400 	if (err && p->thread.io_bitmap_ptr) {
401 		kfree(p->thread.io_bitmap_ptr);
402 		p->thread.io_bitmap_max = 0;
403 	}
404 	return err;
405 }
406 
407 void
408 start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
409 {
410 	asm volatile("movl %0, %%fs; movl %0, %%es; movl %0, %%ds" :: "r"(0));
411 	load_gs_index(0);
412 	regs->ip		= new_ip;
413 	regs->sp		= new_sp;
414 	write_pda(oldrsp, new_sp);
415 	regs->cs		= __USER_CS;
416 	regs->ss		= __USER_DS;
417 	regs->flags		= 0x200;
418 	set_fs(USER_DS);
419 	/*
420 	 * Free the old FP and other extended state
421 	 */
422 	free_thread_xstate(current);
423 }
424 EXPORT_SYMBOL_GPL(start_thread);
425 
426 static void hard_disable_TSC(void)
427 {
428 	write_cr4(read_cr4() | X86_CR4_TSD);
429 }
430 
431 void disable_TSC(void)
432 {
433 	preempt_disable();
434 	if (!test_and_set_thread_flag(TIF_NOTSC))
435 		/*
436 		 * Must flip the CPU state synchronously with
437 		 * TIF_NOTSC in the current running context.
438 		 */
439 		hard_disable_TSC();
440 	preempt_enable();
441 }
442 
443 static void hard_enable_TSC(void)
444 {
445 	write_cr4(read_cr4() & ~X86_CR4_TSD);
446 }
447 
448 static void enable_TSC(void)
449 {
450 	preempt_disable();
451 	if (test_and_clear_thread_flag(TIF_NOTSC))
452 		/*
453 		 * Must flip the CPU state synchronously with
454 		 * TIF_NOTSC in the current running context.
455 		 */
456 		hard_enable_TSC();
457 	preempt_enable();
458 }
459 
460 int get_tsc_mode(unsigned long adr)
461 {
462 	unsigned int val;
463 
464 	if (test_thread_flag(TIF_NOTSC))
465 		val = PR_TSC_SIGSEGV;
466 	else
467 		val = PR_TSC_ENABLE;
468 
469 	return put_user(val, (unsigned int __user *)adr);
470 }
471 
472 int set_tsc_mode(unsigned int val)
473 {
474 	if (val == PR_TSC_SIGSEGV)
475 		disable_TSC();
476 	else if (val == PR_TSC_ENABLE)
477 		enable_TSC();
478 	else
479 		return -EINVAL;
480 
481 	return 0;
482 }
483 
484 /*
485  * This special macro can be used to load a debugging register
486  */
487 #define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
488 
489 static inline void __switch_to_xtra(struct task_struct *prev_p,
490 				    struct task_struct *next_p,
491 				    struct tss_struct *tss)
492 {
493 	struct thread_struct *prev, *next;
494 	unsigned long debugctl;
495 
496 	prev = &prev_p->thread,
497 	next = &next_p->thread;
498 
499 	debugctl = prev->debugctlmsr;
500 	if (next->ds_area_msr != prev->ds_area_msr) {
501 		/* we clear debugctl to make sure DS
502 		 * is not in use when we change it */
503 		debugctl = 0;
504 		update_debugctlmsr(0);
505 		wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
506 	}
507 
508 	if (next->debugctlmsr != debugctl)
509 		update_debugctlmsr(next->debugctlmsr);
510 
511 	if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
512 		loaddebug(next, 0);
513 		loaddebug(next, 1);
514 		loaddebug(next, 2);
515 		loaddebug(next, 3);
516 		/* no 4 and 5 */
517 		loaddebug(next, 6);
518 		loaddebug(next, 7);
519 	}
520 
521 	if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
522 	    test_tsk_thread_flag(next_p, TIF_NOTSC)) {
523 		/* prev and next are different */
524 		if (test_tsk_thread_flag(next_p, TIF_NOTSC))
525 			hard_disable_TSC();
526 		else
527 			hard_enable_TSC();
528 	}
529 
530 	if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
531 		/*
532 		 * Copy the relevant range of the IO bitmap.
533 		 * Normally this is 128 bytes or less:
534 		 */
535 		memcpy(tss->io_bitmap, next->io_bitmap_ptr,
536 		       max(prev->io_bitmap_max, next->io_bitmap_max));
537 	} else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
538 		/*
539 		 * Clear any possible leftover bits:
540 		 */
541 		memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
542 	}
543 
544 #ifdef X86_BTS
545 	if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
546 		ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
547 
548 	if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
549 		ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
550 #endif
551 }
552 
553 /*
554  *	switch_to(x,y) should switch tasks from x to y.
555  *
556  * This could still be optimized:
557  * - fold all the options into a flag word and test it with a single test.
558  * - could test fs/gs bitsliced
559  *
560  * Kprobes not supported here. Set the probe on schedule instead.
561  */
562 struct task_struct *
563 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
564 {
565 	struct thread_struct *prev = &prev_p->thread,
566 				 *next = &next_p->thread;
567 	int cpu = smp_processor_id();
568 	struct tss_struct *tss = &per_cpu(init_tss, cpu);
569 
570 	/* we're going to use this soon, after a few expensive things */
571 	if (next_p->fpu_counter>5)
572 		prefetch(next->xstate);
573 
574 	/*
575 	 * Reload esp0, LDT and the page table pointer:
576 	 */
577 	load_sp0(tss, next);
578 
579 	/*
580 	 * Switch DS and ES.
581 	 * This won't pick up thread selector changes, but I guess that is ok.
582 	 */
583 	asm volatile("mov %%es,%0" : "=m" (prev->es));
584 	if (unlikely(next->es | prev->es))
585 		loadsegment(es, next->es);
586 
587 	asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
588 	if (unlikely(next->ds | prev->ds))
589 		loadsegment(ds, next->ds);
590 
591 	load_TLS(next, cpu);
592 
593 	/*
594 	 * Switch FS and GS.
595 	 */
596 	{
597 		unsigned fsindex;
598 		asm volatile("movl %%fs,%0" : "=r" (fsindex));
599 		/* segment register != 0 always requires a reload.
600 		   also reload when it has changed.
601 		   when prev process used 64bit base always reload
602 		   to avoid an information leak. */
603 		if (unlikely(fsindex | next->fsindex | prev->fs)) {
604 			loadsegment(fs, next->fsindex);
605 			/* check if the user used a selector != 0
606 	                 * if yes clear 64bit base, since overloaded base
607                          * is always mapped to the Null selector
608                          */
609 			if (fsindex)
610 			prev->fs = 0;
611 		}
612 		/* when next process has a 64bit base use it */
613 		if (next->fs)
614 			wrmsrl(MSR_FS_BASE, next->fs);
615 		prev->fsindex = fsindex;
616 	}
617 	{
618 		unsigned gsindex;
619 		asm volatile("movl %%gs,%0" : "=r" (gsindex));
620 		if (unlikely(gsindex | next->gsindex | prev->gs)) {
621 			load_gs_index(next->gsindex);
622 			if (gsindex)
623 			prev->gs = 0;
624 		}
625 		if (next->gs)
626 			wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
627 		prev->gsindex = gsindex;
628 	}
629 
630 	/* Must be after DS reload */
631 	unlazy_fpu(prev_p);
632 
633 	/*
634 	 * Switch the PDA and FPU contexts.
635 	 */
636 	prev->usersp = read_pda(oldrsp);
637 	write_pda(oldrsp, next->usersp);
638 	write_pda(pcurrent, next_p);
639 
640 	write_pda(kernelstack,
641 	(unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
642 #ifdef CONFIG_CC_STACKPROTECTOR
643 	write_pda(stack_canary, next_p->stack_canary);
644 	/*
645 	 * Build time only check to make sure the stack_canary is at
646 	 * offset 40 in the pda; this is a gcc ABI requirement
647 	 */
648 	BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
649 #endif
650 
651 	/*
652 	 * Now maybe reload the debug registers and handle I/O bitmaps
653 	 */
654 	if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
655 		     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
656 		__switch_to_xtra(prev_p, next_p, tss);
657 
658 	/* If the task has used fpu the last 5 timeslices, just do a full
659 	 * restore of the math state immediately to avoid the trap; the
660 	 * chances of needing FPU soon are obviously high now
661 	 *
662 	 * tsk_used_math() checks prevent calling math_state_restore(),
663 	 * which can sleep in the case of !tsk_used_math()
664 	 */
665 	if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
666 		math_state_restore();
667 	return prev_p;
668 }
669 
670 /*
671  * sys_execve() executes a new program.
672  */
673 asmlinkage
674 long sys_execve(char __user *name, char __user * __user *argv,
675 		char __user * __user *envp, struct pt_regs *regs)
676 {
677 	long error;
678 	char * filename;
679 
680 	filename = getname(name);
681 	error = PTR_ERR(filename);
682 	if (IS_ERR(filename))
683 		return error;
684 	error = do_execve(filename, argv, envp, regs);
685 	putname(filename);
686 	return error;
687 }
688 
689 void set_personality_64bit(void)
690 {
691 	/* inherit personality from parent */
692 
693 	/* Make sure to be in 64bit mode */
694 	clear_thread_flag(TIF_IA32);
695 
696 	/* TBD: overwrites user setup. Should have two bits.
697 	   But 64bit processes have always behaved this way,
698 	   so it's not too bad. The main problem is just that
699 	   32bit childs are affected again. */
700 	current->personality &= ~READ_IMPLIES_EXEC;
701 }
702 
703 asmlinkage long sys_fork(struct pt_regs *regs)
704 {
705 	return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
706 }
707 
708 asmlinkage long
709 sys_clone(unsigned long clone_flags, unsigned long newsp,
710 	  void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
711 {
712 	if (!newsp)
713 		newsp = regs->sp;
714 	return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
715 }
716 
717 /*
718  * This is trivial, and on the face of it looks like it
719  * could equally well be done in user mode.
720  *
721  * Not so, for quite unobvious reasons - register pressure.
722  * In user mode vfork() cannot have a stack frame, and if
723  * done by calling the "clone()" system call directly, you
724  * do not have enough call-clobbered registers to hold all
725  * the information you need.
726  */
727 asmlinkage long sys_vfork(struct pt_regs *regs)
728 {
729 	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
730 		    NULL, NULL);
731 }
732 
733 unsigned long get_wchan(struct task_struct *p)
734 {
735 	unsigned long stack;
736 	u64 fp,ip;
737 	int count = 0;
738 
739 	if (!p || p == current || p->state==TASK_RUNNING)
740 		return 0;
741 	stack = (unsigned long)task_stack_page(p);
742 	if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE)
743 		return 0;
744 	fp = *(u64 *)(p->thread.sp);
745 	do {
746 		if (fp < (unsigned long)stack ||
747 		    fp > (unsigned long)stack+THREAD_SIZE)
748 			return 0;
749 		ip = *(u64 *)(fp+8);
750 		if (!in_sched_functions(ip))
751 			return ip;
752 		fp = *(u64 *)fp;
753 	} while (count++ < 16);
754 	return 0;
755 }
756 
757 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
758 {
759 	int ret = 0;
760 	int doit = task == current;
761 	int cpu;
762 
763 	switch (code) {
764 	case ARCH_SET_GS:
765 		if (addr >= TASK_SIZE_OF(task))
766 			return -EPERM;
767 		cpu = get_cpu();
768 		/* handle small bases via the GDT because that's faster to
769 		   switch. */
770 		if (addr <= 0xffffffff) {
771 			set_32bit_tls(task, GS_TLS, addr);
772 			if (doit) {
773 				load_TLS(&task->thread, cpu);
774 				load_gs_index(GS_TLS_SEL);
775 			}
776 			task->thread.gsindex = GS_TLS_SEL;
777 			task->thread.gs = 0;
778 		} else {
779 			task->thread.gsindex = 0;
780 			task->thread.gs = addr;
781 			if (doit) {
782 				load_gs_index(0);
783 				ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
784 			}
785 		}
786 		put_cpu();
787 		break;
788 	case ARCH_SET_FS:
789 		/* Not strictly needed for fs, but do it for symmetry
790 		   with gs */
791 		if (addr >= TASK_SIZE_OF(task))
792 			return -EPERM;
793 		cpu = get_cpu();
794 		/* handle small bases via the GDT because that's faster to
795 		   switch. */
796 		if (addr <= 0xffffffff) {
797 			set_32bit_tls(task, FS_TLS, addr);
798 			if (doit) {
799 				load_TLS(&task->thread, cpu);
800 				asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
801 			}
802 			task->thread.fsindex = FS_TLS_SEL;
803 			task->thread.fs = 0;
804 		} else {
805 			task->thread.fsindex = 0;
806 			task->thread.fs = addr;
807 			if (doit) {
808 				/* set the selector to 0 to not confuse
809 				   __switch_to */
810 				asm volatile("movl %0,%%fs" :: "r" (0));
811 				ret = checking_wrmsrl(MSR_FS_BASE, addr);
812 			}
813 		}
814 		put_cpu();
815 		break;
816 	case ARCH_GET_FS: {
817 		unsigned long base;
818 		if (task->thread.fsindex == FS_TLS_SEL)
819 			base = read_32bit_tls(task, FS_TLS);
820 		else if (doit)
821 			rdmsrl(MSR_FS_BASE, base);
822 		else
823 			base = task->thread.fs;
824 		ret = put_user(base, (unsigned long __user *)addr);
825 		break;
826 	}
827 	case ARCH_GET_GS: {
828 		unsigned long base;
829 		unsigned gsindex;
830 		if (task->thread.gsindex == GS_TLS_SEL)
831 			base = read_32bit_tls(task, GS_TLS);
832 		else if (doit) {
833 			asm("movl %%gs,%0" : "=r" (gsindex));
834 			if (gsindex)
835 				rdmsrl(MSR_KERNEL_GS_BASE, base);
836 			else
837 				base = task->thread.gs;
838 		}
839 		else
840 			base = task->thread.gs;
841 		ret = put_user(base, (unsigned long __user *)addr);
842 		break;
843 	}
844 
845 	default:
846 		ret = -EINVAL;
847 		break;
848 	}
849 
850 	return ret;
851 }
852 
853 long sys_arch_prctl(int code, unsigned long addr)
854 {
855 	return do_arch_prctl(current, code, addr);
856 }
857 
858 unsigned long arch_align_stack(unsigned long sp)
859 {
860 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
861 		sp -= get_random_int() % 8192;
862 	return sp & ~0xf;
863 }
864 
865 unsigned long arch_randomize_brk(struct mm_struct *mm)
866 {
867 	unsigned long range_end = mm->brk + 0x02000000;
868 	return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
869 }
870