xref: /openbmc/linux/arch/x86/kernel/process_64.c (revision f15cbe6f1a4b4d9df59142fc8e4abb973302cf44)
1 /*
2  *  Copyright (C) 1995  Linus Torvalds
3  *
4  *  Pentium III FXSR, SSE support
5  *	Gareth Hughes <gareth@valinux.com>, May 2000
6  *
7  *  X86-64 port
8  *	Andi Kleen.
9  *
10  *	CPU hotplug support - ashok.raj@intel.com
11  */
12 
13 /*
14  * This file handles the architecture-dependent parts of process handling..
15  */
16 
17 #include <stdarg.h>
18 
19 #include <linux/cpu.h>
20 #include <linux/errno.h>
21 #include <linux/sched.h>
22 #include <linux/fs.h>
23 #include <linux/kernel.h>
24 #include <linux/mm.h>
25 #include <linux/elfcore.h>
26 #include <linux/smp.h>
27 #include <linux/slab.h>
28 #include <linux/user.h>
29 #include <linux/interrupt.h>
30 #include <linux/utsname.h>
31 #include <linux/delay.h>
32 #include <linux/module.h>
33 #include <linux/ptrace.h>
34 #include <linux/random.h>
35 #include <linux/notifier.h>
36 #include <linux/kprobes.h>
37 #include <linux/kdebug.h>
38 #include <linux/tick.h>
39 #include <linux/prctl.h>
40 
41 #include <asm/uaccess.h>
42 #include <asm/pgtable.h>
43 #include <asm/system.h>
44 #include <asm/io.h>
45 #include <asm/processor.h>
46 #include <asm/i387.h>
47 #include <asm/mmu_context.h>
48 #include <asm/pda.h>
49 #include <asm/prctl.h>
50 #include <asm/desc.h>
51 #include <asm/proto.h>
52 #include <asm/ia32.h>
53 #include <asm/idle.h>
54 
55 asmlinkage extern void ret_from_fork(void);
56 
57 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
58 
59 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
60 
61 void idle_notifier_register(struct notifier_block *n)
62 {
63 	atomic_notifier_chain_register(&idle_notifier, n);
64 }
65 
66 void enter_idle(void)
67 {
68 	write_pda(isidle, 1);
69 	atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
70 }
71 
72 static void __exit_idle(void)
73 {
74 	if (test_and_clear_bit_pda(0, isidle) == 0)
75 		return;
76 	atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
77 }
78 
79 /* Called from interrupts to signify idle end */
80 void exit_idle(void)
81 {
82 	/* idle loop has pid 0 */
83 	if (current->pid)
84 		return;
85 	__exit_idle();
86 }
87 
88 #ifdef CONFIG_HOTPLUG_CPU
89 DECLARE_PER_CPU(int, cpu_state);
90 
91 #include <asm/nmi.h>
92 /* We halt the CPU with physical CPU hotplug */
93 static inline void play_dead(void)
94 {
95 	idle_task_exit();
96 	wbinvd();
97 	mb();
98 	/* Ack it */
99 	__get_cpu_var(cpu_state) = CPU_DEAD;
100 
101 	local_irq_disable();
102 	while (1)
103 		halt();
104 }
105 #else
106 static inline void play_dead(void)
107 {
108 	BUG();
109 }
110 #endif /* CONFIG_HOTPLUG_CPU */
111 
112 /*
113  * The idle thread. There's no useful work to be
114  * done, so just try to conserve power and have a
115  * low exit latency (ie sit in a loop waiting for
116  * somebody to say that they'd like to reschedule)
117  */
118 void cpu_idle(void)
119 {
120 	current_thread_info()->status |= TS_POLLING;
121 	/* endless idle loop with no priority at all */
122 	while (1) {
123 		tick_nohz_stop_sched_tick(1);
124 		while (!need_resched()) {
125 
126 			rmb();
127 
128 			if (cpu_is_offline(smp_processor_id()))
129 				play_dead();
130 			/*
131 			 * Idle routines should keep interrupts disabled
132 			 * from here on, until they go to idle.
133 			 * Otherwise, idle callbacks can misfire.
134 			 */
135 			local_irq_disable();
136 			enter_idle();
137 			/* Don't trace irqs off for idle */
138 			stop_critical_timings();
139 			pm_idle();
140 			start_critical_timings();
141 			/* In many cases the interrupt that ended idle
142 			   has already called exit_idle. But some idle
143 			   loops can be woken up without interrupt. */
144 			__exit_idle();
145 		}
146 
147 		tick_nohz_restart_sched_tick();
148 		preempt_enable_no_resched();
149 		schedule();
150 		preempt_disable();
151 	}
152 }
153 
154 /* Prints also some state that isn't saved in the pt_regs */
155 void __show_regs(struct pt_regs * regs)
156 {
157 	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
158 	unsigned long d0, d1, d2, d3, d6, d7;
159 	unsigned int fsindex, gsindex;
160 	unsigned int ds, cs, es;
161 
162 	printk("\n");
163 	print_modules();
164 	printk("Pid: %d, comm: %.20s %s %s %.*s\n",
165 		current->pid, current->comm, print_tainted(),
166 		init_utsname()->release,
167 		(int)strcspn(init_utsname()->version, " "),
168 		init_utsname()->version);
169 	printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
170 	printk_address(regs->ip, 1);
171 	printk("RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->sp,
172 		regs->flags);
173 	printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
174 	       regs->ax, regs->bx, regs->cx);
175 	printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
176 	       regs->dx, regs->si, regs->di);
177 	printk("RBP: %016lx R08: %016lx R09: %016lx\n",
178 	       regs->bp, regs->r8, regs->r9);
179 	printk("R10: %016lx R11: %016lx R12: %016lx\n",
180 	       regs->r10, regs->r11, regs->r12);
181 	printk("R13: %016lx R14: %016lx R15: %016lx\n",
182 	       regs->r13, regs->r14, regs->r15);
183 
184 	asm("movl %%ds,%0" : "=r" (ds));
185 	asm("movl %%cs,%0" : "=r" (cs));
186 	asm("movl %%es,%0" : "=r" (es));
187 	asm("movl %%fs,%0" : "=r" (fsindex));
188 	asm("movl %%gs,%0" : "=r" (gsindex));
189 
190 	rdmsrl(MSR_FS_BASE, fs);
191 	rdmsrl(MSR_GS_BASE, gs);
192 	rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
193 
194 	cr0 = read_cr0();
195 	cr2 = read_cr2();
196 	cr3 = read_cr3();
197 	cr4 = read_cr4();
198 
199 	printk("FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
200 	       fs,fsindex,gs,gsindex,shadowgs);
201 	printk("CS:  %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
202 	printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
203 
204 	get_debugreg(d0, 0);
205 	get_debugreg(d1, 1);
206 	get_debugreg(d2, 2);
207 	printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
208 	get_debugreg(d3, 3);
209 	get_debugreg(d6, 6);
210 	get_debugreg(d7, 7);
211 	printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
212 }
213 
214 void show_regs(struct pt_regs *regs)
215 {
216 	printk("CPU %d:", smp_processor_id());
217 	__show_regs(regs);
218 	show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
219 }
220 
221 /*
222  * Free current thread data structures etc..
223  */
224 void exit_thread(void)
225 {
226 	struct task_struct *me = current;
227 	struct thread_struct *t = &me->thread;
228 
229 	if (me->thread.io_bitmap_ptr) {
230 		struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
231 
232 		kfree(t->io_bitmap_ptr);
233 		t->io_bitmap_ptr = NULL;
234 		clear_thread_flag(TIF_IO_BITMAP);
235 		/*
236 		 * Careful, clear this in the TSS too:
237 		 */
238 		memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
239 		t->io_bitmap_max = 0;
240 		put_cpu();
241 	}
242 }
243 
244 void flush_thread(void)
245 {
246 	struct task_struct *tsk = current;
247 
248 	if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
249 		clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
250 		if (test_tsk_thread_flag(tsk, TIF_IA32)) {
251 			clear_tsk_thread_flag(tsk, TIF_IA32);
252 		} else {
253 			set_tsk_thread_flag(tsk, TIF_IA32);
254 			current_thread_info()->status |= TS_COMPAT;
255 		}
256 	}
257 	clear_tsk_thread_flag(tsk, TIF_DEBUG);
258 
259 	tsk->thread.debugreg0 = 0;
260 	tsk->thread.debugreg1 = 0;
261 	tsk->thread.debugreg2 = 0;
262 	tsk->thread.debugreg3 = 0;
263 	tsk->thread.debugreg6 = 0;
264 	tsk->thread.debugreg7 = 0;
265 	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
266 	/*
267 	 * Forget coprocessor state..
268 	 */
269 	tsk->fpu_counter = 0;
270 	clear_fpu(tsk);
271 	clear_used_math();
272 }
273 
274 void release_thread(struct task_struct *dead_task)
275 {
276 	if (dead_task->mm) {
277 		if (dead_task->mm->context.size) {
278 			printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
279 					dead_task->comm,
280 					dead_task->mm->context.ldt,
281 					dead_task->mm->context.size);
282 			BUG();
283 		}
284 	}
285 }
286 
287 static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
288 {
289 	struct user_desc ud = {
290 		.base_addr = addr,
291 		.limit = 0xfffff,
292 		.seg_32bit = 1,
293 		.limit_in_pages = 1,
294 		.useable = 1,
295 	};
296 	struct desc_struct *desc = t->thread.tls_array;
297 	desc += tls;
298 	fill_ldt(desc, &ud);
299 }
300 
301 static inline u32 read_32bit_tls(struct task_struct *t, int tls)
302 {
303 	return get_desc_base(&t->thread.tls_array[tls]);
304 }
305 
306 /*
307  * This gets called before we allocate a new thread and copy
308  * the current task into it.
309  */
310 void prepare_to_copy(struct task_struct *tsk)
311 {
312 	unlazy_fpu(tsk);
313 }
314 
315 int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
316 		unsigned long unused,
317 	struct task_struct * p, struct pt_regs * regs)
318 {
319 	int err;
320 	struct pt_regs * childregs;
321 	struct task_struct *me = current;
322 
323 	childregs = ((struct pt_regs *)
324 			(THREAD_SIZE + task_stack_page(p))) - 1;
325 	*childregs = *regs;
326 
327 	childregs->ax = 0;
328 	childregs->sp = sp;
329 	if (sp == ~0UL)
330 		childregs->sp = (unsigned long)childregs;
331 
332 	p->thread.sp = (unsigned long) childregs;
333 	p->thread.sp0 = (unsigned long) (childregs+1);
334 	p->thread.usersp = me->thread.usersp;
335 
336 	set_tsk_thread_flag(p, TIF_FORK);
337 
338 	p->thread.fs = me->thread.fs;
339 	p->thread.gs = me->thread.gs;
340 
341 	savesegment(gs, p->thread.gsindex);
342 	savesegment(fs, p->thread.fsindex);
343 	savesegment(es, p->thread.es);
344 	savesegment(ds, p->thread.ds);
345 
346 	if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
347 		p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
348 		if (!p->thread.io_bitmap_ptr) {
349 			p->thread.io_bitmap_max = 0;
350 			return -ENOMEM;
351 		}
352 		memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
353 				IO_BITMAP_BYTES);
354 		set_tsk_thread_flag(p, TIF_IO_BITMAP);
355 	}
356 
357 	/*
358 	 * Set a new TLS for the child thread?
359 	 */
360 	if (clone_flags & CLONE_SETTLS) {
361 #ifdef CONFIG_IA32_EMULATION
362 		if (test_thread_flag(TIF_IA32))
363 			err = do_set_thread_area(p, -1,
364 				(struct user_desc __user *)childregs->si, 0);
365 		else
366 #endif
367 			err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
368 		if (err)
369 			goto out;
370 	}
371 	err = 0;
372 out:
373 	if (err && p->thread.io_bitmap_ptr) {
374 		kfree(p->thread.io_bitmap_ptr);
375 		p->thread.io_bitmap_max = 0;
376 	}
377 	return err;
378 }
379 
380 void
381 start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
382 {
383 	loadsegment(fs, 0);
384 	loadsegment(es, 0);
385 	loadsegment(ds, 0);
386 	load_gs_index(0);
387 	regs->ip		= new_ip;
388 	regs->sp		= new_sp;
389 	write_pda(oldrsp, new_sp);
390 	regs->cs		= __USER_CS;
391 	regs->ss		= __USER_DS;
392 	regs->flags		= 0x200;
393 	set_fs(USER_DS);
394 	/*
395 	 * Free the old FP and other extended state
396 	 */
397 	free_thread_xstate(current);
398 }
399 EXPORT_SYMBOL_GPL(start_thread);
400 
401 static void hard_disable_TSC(void)
402 {
403 	write_cr4(read_cr4() | X86_CR4_TSD);
404 }
405 
406 void disable_TSC(void)
407 {
408 	preempt_disable();
409 	if (!test_and_set_thread_flag(TIF_NOTSC))
410 		/*
411 		 * Must flip the CPU state synchronously with
412 		 * TIF_NOTSC in the current running context.
413 		 */
414 		hard_disable_TSC();
415 	preempt_enable();
416 }
417 
418 static void hard_enable_TSC(void)
419 {
420 	write_cr4(read_cr4() & ~X86_CR4_TSD);
421 }
422 
423 static void enable_TSC(void)
424 {
425 	preempt_disable();
426 	if (test_and_clear_thread_flag(TIF_NOTSC))
427 		/*
428 		 * Must flip the CPU state synchronously with
429 		 * TIF_NOTSC in the current running context.
430 		 */
431 		hard_enable_TSC();
432 	preempt_enable();
433 }
434 
435 int get_tsc_mode(unsigned long adr)
436 {
437 	unsigned int val;
438 
439 	if (test_thread_flag(TIF_NOTSC))
440 		val = PR_TSC_SIGSEGV;
441 	else
442 		val = PR_TSC_ENABLE;
443 
444 	return put_user(val, (unsigned int __user *)adr);
445 }
446 
447 int set_tsc_mode(unsigned int val)
448 {
449 	if (val == PR_TSC_SIGSEGV)
450 		disable_TSC();
451 	else if (val == PR_TSC_ENABLE)
452 		enable_TSC();
453 	else
454 		return -EINVAL;
455 
456 	return 0;
457 }
458 
459 /*
460  * This special macro can be used to load a debugging register
461  */
462 #define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
463 
464 static inline void __switch_to_xtra(struct task_struct *prev_p,
465 				    struct task_struct *next_p,
466 				    struct tss_struct *tss)
467 {
468 	struct thread_struct *prev, *next;
469 	unsigned long debugctl;
470 
471 	prev = &prev_p->thread,
472 	next = &next_p->thread;
473 
474 	debugctl = prev->debugctlmsr;
475 	if (next->ds_area_msr != prev->ds_area_msr) {
476 		/* we clear debugctl to make sure DS
477 		 * is not in use when we change it */
478 		debugctl = 0;
479 		update_debugctlmsr(0);
480 		wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
481 	}
482 
483 	if (next->debugctlmsr != debugctl)
484 		update_debugctlmsr(next->debugctlmsr);
485 
486 	if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
487 		loaddebug(next, 0);
488 		loaddebug(next, 1);
489 		loaddebug(next, 2);
490 		loaddebug(next, 3);
491 		/* no 4 and 5 */
492 		loaddebug(next, 6);
493 		loaddebug(next, 7);
494 	}
495 
496 	if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
497 	    test_tsk_thread_flag(next_p, TIF_NOTSC)) {
498 		/* prev and next are different */
499 		if (test_tsk_thread_flag(next_p, TIF_NOTSC))
500 			hard_disable_TSC();
501 		else
502 			hard_enable_TSC();
503 	}
504 
505 	if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
506 		/*
507 		 * Copy the relevant range of the IO bitmap.
508 		 * Normally this is 128 bytes or less:
509 		 */
510 		memcpy(tss->io_bitmap, next->io_bitmap_ptr,
511 		       max(prev->io_bitmap_max, next->io_bitmap_max));
512 	} else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
513 		/*
514 		 * Clear any possible leftover bits:
515 		 */
516 		memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
517 	}
518 
519 #ifdef X86_BTS
520 	if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
521 		ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
522 
523 	if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
524 		ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
525 #endif
526 }
527 
528 /*
529  *	switch_to(x,y) should switch tasks from x to y.
530  *
531  * This could still be optimized:
532  * - fold all the options into a flag word and test it with a single test.
533  * - could test fs/gs bitsliced
534  *
535  * Kprobes not supported here. Set the probe on schedule instead.
536  */
537 struct task_struct *
538 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
539 {
540 	struct thread_struct *prev = &prev_p->thread;
541 	struct thread_struct *next = &next_p->thread;
542 	int cpu = smp_processor_id();
543 	struct tss_struct *tss = &per_cpu(init_tss, cpu);
544 	unsigned fsindex, gsindex;
545 
546 	/* we're going to use this soon, after a few expensive things */
547 	if (next_p->fpu_counter>5)
548 		prefetch(next->xstate);
549 
550 	/*
551 	 * Reload esp0, LDT and the page table pointer:
552 	 */
553 	load_sp0(tss, next);
554 
555 	/*
556 	 * Switch DS and ES.
557 	 * This won't pick up thread selector changes, but I guess that is ok.
558 	 */
559 	savesegment(es, prev->es);
560 	if (unlikely(next->es | prev->es))
561 		loadsegment(es, next->es);
562 
563 	savesegment(ds, prev->ds);
564 	if (unlikely(next->ds | prev->ds))
565 		loadsegment(ds, next->ds);
566 
567 
568 	/* We must save %fs and %gs before load_TLS() because
569 	 * %fs and %gs may be cleared by load_TLS().
570 	 *
571 	 * (e.g. xen_load_tls())
572 	 */
573 	savesegment(fs, fsindex);
574 	savesegment(gs, gsindex);
575 
576 	load_TLS(next, cpu);
577 
578 	/*
579 	 * Leave lazy mode, flushing any hypercalls made here.
580 	 * This must be done before restoring TLS segments so
581 	 * the GDT and LDT are properly updated, and must be
582 	 * done before math_state_restore, so the TS bit is up
583 	 * to date.
584 	 */
585 	arch_leave_lazy_cpu_mode();
586 
587 	/*
588 	 * Switch FS and GS.
589 	 *
590 	 * Segment register != 0 always requires a reload.  Also
591 	 * reload when it has changed.  When prev process used 64bit
592 	 * base always reload to avoid an information leak.
593 	 */
594 	if (unlikely(fsindex | next->fsindex | prev->fs)) {
595 		loadsegment(fs, next->fsindex);
596 		/*
597 		 * Check if the user used a selector != 0; if yes
598 		 *  clear 64bit base, since overloaded base is always
599 		 *  mapped to the Null selector
600 		 */
601 		if (fsindex)
602 			prev->fs = 0;
603 	}
604 	/* when next process has a 64bit base use it */
605 	if (next->fs)
606 		wrmsrl(MSR_FS_BASE, next->fs);
607 	prev->fsindex = fsindex;
608 
609 	if (unlikely(gsindex | next->gsindex | prev->gs)) {
610 		load_gs_index(next->gsindex);
611 		if (gsindex)
612 			prev->gs = 0;
613 	}
614 	if (next->gs)
615 		wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
616 	prev->gsindex = gsindex;
617 
618 	/* Must be after DS reload */
619 	unlazy_fpu(prev_p);
620 
621 	/*
622 	 * Switch the PDA and FPU contexts.
623 	 */
624 	prev->usersp = read_pda(oldrsp);
625 	write_pda(oldrsp, next->usersp);
626 	write_pda(pcurrent, next_p);
627 
628 	write_pda(kernelstack,
629 		  (unsigned long)task_stack_page(next_p) +
630 		  THREAD_SIZE - PDA_STACKOFFSET);
631 #ifdef CONFIG_CC_STACKPROTECTOR
632 	write_pda(stack_canary, next_p->stack_canary);
633 	/*
634 	 * Build time only check to make sure the stack_canary is at
635 	 * offset 40 in the pda; this is a gcc ABI requirement
636 	 */
637 	BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
638 #endif
639 
640 	/*
641 	 * Now maybe reload the debug registers and handle I/O bitmaps
642 	 */
643 	if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
644 		     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
645 		__switch_to_xtra(prev_p, next_p, tss);
646 
647 	/* If the task has used fpu the last 5 timeslices, just do a full
648 	 * restore of the math state immediately to avoid the trap; the
649 	 * chances of needing FPU soon are obviously high now
650 	 *
651 	 * tsk_used_math() checks prevent calling math_state_restore(),
652 	 * which can sleep in the case of !tsk_used_math()
653 	 */
654 	if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
655 		math_state_restore();
656 	return prev_p;
657 }
658 
659 /*
660  * sys_execve() executes a new program.
661  */
662 asmlinkage
663 long sys_execve(char __user *name, char __user * __user *argv,
664 		char __user * __user *envp, struct pt_regs *regs)
665 {
666 	long error;
667 	char * filename;
668 
669 	filename = getname(name);
670 	error = PTR_ERR(filename);
671 	if (IS_ERR(filename))
672 		return error;
673 	error = do_execve(filename, argv, envp, regs);
674 	putname(filename);
675 	return error;
676 }
677 
678 void set_personality_64bit(void)
679 {
680 	/* inherit personality from parent */
681 
682 	/* Make sure to be in 64bit mode */
683 	clear_thread_flag(TIF_IA32);
684 
685 	/* TBD: overwrites user setup. Should have two bits.
686 	   But 64bit processes have always behaved this way,
687 	   so it's not too bad. The main problem is just that
688 	   32bit childs are affected again. */
689 	current->personality &= ~READ_IMPLIES_EXEC;
690 }
691 
692 asmlinkage long sys_fork(struct pt_regs *regs)
693 {
694 	return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
695 }
696 
697 asmlinkage long
698 sys_clone(unsigned long clone_flags, unsigned long newsp,
699 	  void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
700 {
701 	if (!newsp)
702 		newsp = regs->sp;
703 	return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
704 }
705 
706 /*
707  * This is trivial, and on the face of it looks like it
708  * could equally well be done in user mode.
709  *
710  * Not so, for quite unobvious reasons - register pressure.
711  * In user mode vfork() cannot have a stack frame, and if
712  * done by calling the "clone()" system call directly, you
713  * do not have enough call-clobbered registers to hold all
714  * the information you need.
715  */
716 asmlinkage long sys_vfork(struct pt_regs *regs)
717 {
718 	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
719 		    NULL, NULL);
720 }
721 
722 unsigned long get_wchan(struct task_struct *p)
723 {
724 	unsigned long stack;
725 	u64 fp,ip;
726 	int count = 0;
727 
728 	if (!p || p == current || p->state==TASK_RUNNING)
729 		return 0;
730 	stack = (unsigned long)task_stack_page(p);
731 	if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE)
732 		return 0;
733 	fp = *(u64 *)(p->thread.sp);
734 	do {
735 		if (fp < (unsigned long)stack ||
736 		    fp > (unsigned long)stack+THREAD_SIZE)
737 			return 0;
738 		ip = *(u64 *)(fp+8);
739 		if (!in_sched_functions(ip))
740 			return ip;
741 		fp = *(u64 *)fp;
742 	} while (count++ < 16);
743 	return 0;
744 }
745 
746 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
747 {
748 	int ret = 0;
749 	int doit = task == current;
750 	int cpu;
751 
752 	switch (code) {
753 	case ARCH_SET_GS:
754 		if (addr >= TASK_SIZE_OF(task))
755 			return -EPERM;
756 		cpu = get_cpu();
757 		/* handle small bases via the GDT because that's faster to
758 		   switch. */
759 		if (addr <= 0xffffffff) {
760 			set_32bit_tls(task, GS_TLS, addr);
761 			if (doit) {
762 				load_TLS(&task->thread, cpu);
763 				load_gs_index(GS_TLS_SEL);
764 			}
765 			task->thread.gsindex = GS_TLS_SEL;
766 			task->thread.gs = 0;
767 		} else {
768 			task->thread.gsindex = 0;
769 			task->thread.gs = addr;
770 			if (doit) {
771 				load_gs_index(0);
772 				ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
773 			}
774 		}
775 		put_cpu();
776 		break;
777 	case ARCH_SET_FS:
778 		/* Not strictly needed for fs, but do it for symmetry
779 		   with gs */
780 		if (addr >= TASK_SIZE_OF(task))
781 			return -EPERM;
782 		cpu = get_cpu();
783 		/* handle small bases via the GDT because that's faster to
784 		   switch. */
785 		if (addr <= 0xffffffff) {
786 			set_32bit_tls(task, FS_TLS, addr);
787 			if (doit) {
788 				load_TLS(&task->thread, cpu);
789 				loadsegment(fs, FS_TLS_SEL);
790 			}
791 			task->thread.fsindex = FS_TLS_SEL;
792 			task->thread.fs = 0;
793 		} else {
794 			task->thread.fsindex = 0;
795 			task->thread.fs = addr;
796 			if (doit) {
797 				/* set the selector to 0 to not confuse
798 				   __switch_to */
799 				loadsegment(fs, 0);
800 				ret = checking_wrmsrl(MSR_FS_BASE, addr);
801 			}
802 		}
803 		put_cpu();
804 		break;
805 	case ARCH_GET_FS: {
806 		unsigned long base;
807 		if (task->thread.fsindex == FS_TLS_SEL)
808 			base = read_32bit_tls(task, FS_TLS);
809 		else if (doit)
810 			rdmsrl(MSR_FS_BASE, base);
811 		else
812 			base = task->thread.fs;
813 		ret = put_user(base, (unsigned long __user *)addr);
814 		break;
815 	}
816 	case ARCH_GET_GS: {
817 		unsigned long base;
818 		unsigned gsindex;
819 		if (task->thread.gsindex == GS_TLS_SEL)
820 			base = read_32bit_tls(task, GS_TLS);
821 		else if (doit) {
822 			savesegment(gs, gsindex);
823 			if (gsindex)
824 				rdmsrl(MSR_KERNEL_GS_BASE, base);
825 			else
826 				base = task->thread.gs;
827 		}
828 		else
829 			base = task->thread.gs;
830 		ret = put_user(base, (unsigned long __user *)addr);
831 		break;
832 	}
833 
834 	default:
835 		ret = -EINVAL;
836 		break;
837 	}
838 
839 	return ret;
840 }
841 
842 long sys_arch_prctl(int code, unsigned long addr)
843 {
844 	return do_arch_prctl(current, code, addr);
845 }
846 
847 unsigned long arch_align_stack(unsigned long sp)
848 {
849 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
850 		sp -= get_random_int() % 8192;
851 	return sp & ~0xf;
852 }
853 
854 unsigned long arch_randomize_brk(struct mm_struct *mm)
855 {
856 	unsigned long range_end = mm->brk + 0x02000000;
857 	return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
858 }
859