xref: /openbmc/linux/arch/x86/kernel/process_64.c (revision efe4a1ac)
1 /*
2  *  Copyright (C) 1995  Linus Torvalds
3  *
4  *  Pentium III FXSR, SSE support
5  *	Gareth Hughes <gareth@valinux.com>, May 2000
6  *
7  *  X86-64 port
8  *	Andi Kleen.
9  *
10  *	CPU hotplug support - ashok.raj@intel.com
11  */
12 
13 /*
14  * This file handles the architecture-dependent parts of process handling..
15  */
16 
17 #include <linux/cpu.h>
18 #include <linux/errno.h>
19 #include <linux/sched.h>
20 #include <linux/sched/task.h>
21 #include <linux/sched/task_stack.h>
22 #include <linux/fs.h>
23 #include <linux/kernel.h>
24 #include <linux/mm.h>
25 #include <linux/elfcore.h>
26 #include <linux/smp.h>
27 #include <linux/slab.h>
28 #include <linux/user.h>
29 #include <linux/interrupt.h>
30 #include <linux/delay.h>
31 #include <linux/export.h>
32 #include <linux/ptrace.h>
33 #include <linux/notifier.h>
34 #include <linux/kprobes.h>
35 #include <linux/kdebug.h>
36 #include <linux/prctl.h>
37 #include <linux/uaccess.h>
38 #include <linux/io.h>
39 #include <linux/ftrace.h>
40 #include <linux/syscalls.h>
41 
42 #include <asm/pgtable.h>
43 #include <asm/processor.h>
44 #include <asm/fpu/internal.h>
45 #include <asm/mmu_context.h>
46 #include <asm/prctl.h>
47 #include <asm/desc.h>
48 #include <asm/proto.h>
49 #include <asm/ia32.h>
50 #include <asm/syscalls.h>
51 #include <asm/debugreg.h>
52 #include <asm/switch_to.h>
53 #include <asm/xen/hypervisor.h>
54 #include <asm/vdso.h>
55 #include <asm/intel_rdt.h>
56 #include <asm/unistd.h>
57 #ifdef CONFIG_IA32_EMULATION
58 /* Not included via unistd.h */
59 #include <asm/unistd_32_ia32.h>
60 #endif
61 
62 __visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
63 
64 /* Prints also some state that isn't saved in the pt_regs */
65 void __show_regs(struct pt_regs *regs, int all)
66 {
67 	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
68 	unsigned long d0, d1, d2, d3, d6, d7;
69 	unsigned int fsindex, gsindex;
70 	unsigned int ds, cs, es;
71 
72 	printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs & 0xffff,
73 		(void *)regs->ip);
74 	printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss,
75 		regs->sp, regs->flags);
76 	if (regs->orig_ax != -1)
77 		pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
78 	else
79 		pr_cont("\n");
80 
81 	printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n",
82 	       regs->ax, regs->bx, regs->cx);
83 	printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n",
84 	       regs->dx, regs->si, regs->di);
85 	printk(KERN_DEFAULT "RBP: %016lx R08: %016lx R09: %016lx\n",
86 	       regs->bp, regs->r8, regs->r9);
87 	printk(KERN_DEFAULT "R10: %016lx R11: %016lx R12: %016lx\n",
88 	       regs->r10, regs->r11, regs->r12);
89 	printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
90 	       regs->r13, regs->r14, regs->r15);
91 
92 	asm("movl %%ds,%0" : "=r" (ds));
93 	asm("movl %%cs,%0" : "=r" (cs));
94 	asm("movl %%es,%0" : "=r" (es));
95 	asm("movl %%fs,%0" : "=r" (fsindex));
96 	asm("movl %%gs,%0" : "=r" (gsindex));
97 
98 	rdmsrl(MSR_FS_BASE, fs);
99 	rdmsrl(MSR_GS_BASE, gs);
100 	rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
101 
102 	if (!all)
103 		return;
104 
105 	cr0 = read_cr0();
106 	cr2 = read_cr2();
107 	cr3 = read_cr3();
108 	cr4 = __read_cr4();
109 
110 	printk(KERN_DEFAULT "FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
111 	       fs, fsindex, gs, gsindex, shadowgs);
112 	printk(KERN_DEFAULT "CS:  %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
113 			es, cr0);
114 	printk(KERN_DEFAULT "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
115 			cr4);
116 
117 	get_debugreg(d0, 0);
118 	get_debugreg(d1, 1);
119 	get_debugreg(d2, 2);
120 	get_debugreg(d3, 3);
121 	get_debugreg(d6, 6);
122 	get_debugreg(d7, 7);
123 
124 	/* Only print out debug registers if they are in their non-default state. */
125 	if (!((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) &&
126 	    (d6 == DR6_RESERVED) && (d7 == 0x400))) {
127 		printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n",
128 		       d0, d1, d2);
129 		printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n",
130 		       d3, d6, d7);
131 	}
132 
133 	if (boot_cpu_has(X86_FEATURE_OSPKE))
134 		printk(KERN_DEFAULT "PKRU: %08x\n", read_pkru());
135 }
136 
137 void release_thread(struct task_struct *dead_task)
138 {
139 	if (dead_task->mm) {
140 #ifdef CONFIG_MODIFY_LDT_SYSCALL
141 		if (dead_task->mm->context.ldt) {
142 			pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
143 				dead_task->comm,
144 				dead_task->mm->context.ldt->entries,
145 				dead_task->mm->context.ldt->size);
146 			BUG();
147 		}
148 #endif
149 	}
150 }
151 
152 int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
153 		unsigned long arg, struct task_struct *p, unsigned long tls)
154 {
155 	int err;
156 	struct pt_regs *childregs;
157 	struct fork_frame *fork_frame;
158 	struct inactive_task_frame *frame;
159 	struct task_struct *me = current;
160 
161 	p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
162 	childregs = task_pt_regs(p);
163 	fork_frame = container_of(childregs, struct fork_frame, regs);
164 	frame = &fork_frame->frame;
165 	frame->bp = 0;
166 	frame->ret_addr = (unsigned long) ret_from_fork;
167 	p->thread.sp = (unsigned long) fork_frame;
168 	p->thread.io_bitmap_ptr = NULL;
169 
170 	savesegment(gs, p->thread.gsindex);
171 	p->thread.gsbase = p->thread.gsindex ? 0 : me->thread.gsbase;
172 	savesegment(fs, p->thread.fsindex);
173 	p->thread.fsbase = p->thread.fsindex ? 0 : me->thread.fsbase;
174 	savesegment(es, p->thread.es);
175 	savesegment(ds, p->thread.ds);
176 	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
177 
178 	if (unlikely(p->flags & PF_KTHREAD)) {
179 		/* kernel thread */
180 		memset(childregs, 0, sizeof(struct pt_regs));
181 		frame->bx = sp;		/* function */
182 		frame->r12 = arg;
183 		return 0;
184 	}
185 	frame->bx = 0;
186 	*childregs = *current_pt_regs();
187 
188 	childregs->ax = 0;
189 	if (sp)
190 		childregs->sp = sp;
191 
192 	err = -ENOMEM;
193 	if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
194 		p->thread.io_bitmap_ptr = kmemdup(me->thread.io_bitmap_ptr,
195 						  IO_BITMAP_BYTES, GFP_KERNEL);
196 		if (!p->thread.io_bitmap_ptr) {
197 			p->thread.io_bitmap_max = 0;
198 			return -ENOMEM;
199 		}
200 		set_tsk_thread_flag(p, TIF_IO_BITMAP);
201 	}
202 
203 	/*
204 	 * Set a new TLS for the child thread?
205 	 */
206 	if (clone_flags & CLONE_SETTLS) {
207 #ifdef CONFIG_IA32_EMULATION
208 		if (in_ia32_syscall())
209 			err = do_set_thread_area(p, -1,
210 				(struct user_desc __user *)tls, 0);
211 		else
212 #endif
213 			err = do_arch_prctl_64(p, ARCH_SET_FS, tls);
214 		if (err)
215 			goto out;
216 	}
217 	err = 0;
218 out:
219 	if (err && p->thread.io_bitmap_ptr) {
220 		kfree(p->thread.io_bitmap_ptr);
221 		p->thread.io_bitmap_max = 0;
222 	}
223 
224 	return err;
225 }
226 
227 static void
228 start_thread_common(struct pt_regs *regs, unsigned long new_ip,
229 		    unsigned long new_sp,
230 		    unsigned int _cs, unsigned int _ss, unsigned int _ds)
231 {
232 	loadsegment(fs, 0);
233 	loadsegment(es, _ds);
234 	loadsegment(ds, _ds);
235 	load_gs_index(0);
236 	regs->ip		= new_ip;
237 	regs->sp		= new_sp;
238 	regs->cs		= _cs;
239 	regs->ss		= _ss;
240 	regs->flags		= X86_EFLAGS_IF;
241 	force_iret();
242 }
243 
244 void
245 start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
246 {
247 	start_thread_common(regs, new_ip, new_sp,
248 			    __USER_CS, __USER_DS, 0);
249 }
250 
251 #ifdef CONFIG_COMPAT
252 void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp)
253 {
254 	start_thread_common(regs, new_ip, new_sp,
255 			    test_thread_flag(TIF_X32)
256 			    ? __USER_CS : __USER32_CS,
257 			    __USER_DS, __USER_DS);
258 }
259 #endif
260 
261 /*
262  *	switch_to(x,y) should switch tasks from x to y.
263  *
264  * This could still be optimized:
265  * - fold all the options into a flag word and test it with a single test.
266  * - could test fs/gs bitsliced
267  *
268  * Kprobes not supported here. Set the probe on schedule instead.
269  * Function graph tracer not supported too.
270  */
271 __visible __notrace_funcgraph struct task_struct *
272 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
273 {
274 	struct thread_struct *prev = &prev_p->thread;
275 	struct thread_struct *next = &next_p->thread;
276 	struct fpu *prev_fpu = &prev->fpu;
277 	struct fpu *next_fpu = &next->fpu;
278 	int cpu = smp_processor_id();
279 	struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
280 	unsigned prev_fsindex, prev_gsindex;
281 
282 	switch_fpu_prepare(prev_fpu, cpu);
283 
284 	/* We must save %fs and %gs before load_TLS() because
285 	 * %fs and %gs may be cleared by load_TLS().
286 	 *
287 	 * (e.g. xen_load_tls())
288 	 */
289 	savesegment(fs, prev_fsindex);
290 	savesegment(gs, prev_gsindex);
291 
292 	/*
293 	 * Load TLS before restoring any segments so that segment loads
294 	 * reference the correct GDT entries.
295 	 */
296 	load_TLS(next, cpu);
297 
298 	/*
299 	 * Leave lazy mode, flushing any hypercalls made here.  This
300 	 * must be done after loading TLS entries in the GDT but before
301 	 * loading segments that might reference them, and and it must
302 	 * be done before fpu__restore(), so the TS bit is up to
303 	 * date.
304 	 */
305 	arch_end_context_switch(next_p);
306 
307 	/* Switch DS and ES.
308 	 *
309 	 * Reading them only returns the selectors, but writing them (if
310 	 * nonzero) loads the full descriptor from the GDT or LDT.  The
311 	 * LDT for next is loaded in switch_mm, and the GDT is loaded
312 	 * above.
313 	 *
314 	 * We therefore need to write new values to the segment
315 	 * registers on every context switch unless both the new and old
316 	 * values are zero.
317 	 *
318 	 * Note that we don't need to do anything for CS and SS, as
319 	 * those are saved and restored as part of pt_regs.
320 	 */
321 	savesegment(es, prev->es);
322 	if (unlikely(next->es | prev->es))
323 		loadsegment(es, next->es);
324 
325 	savesegment(ds, prev->ds);
326 	if (unlikely(next->ds | prev->ds))
327 		loadsegment(ds, next->ds);
328 
329 	/*
330 	 * Switch FS and GS.
331 	 *
332 	 * These are even more complicated than DS and ES: they have
333 	 * 64-bit bases are that controlled by arch_prctl.  The bases
334 	 * don't necessarily match the selectors, as user code can do
335 	 * any number of things to cause them to be inconsistent.
336 	 *
337 	 * We don't promise to preserve the bases if the selectors are
338 	 * nonzero.  We also don't promise to preserve the base if the
339 	 * selector is zero and the base doesn't match whatever was
340 	 * most recently passed to ARCH_SET_FS/GS.  (If/when the
341 	 * FSGSBASE instructions are enabled, we'll need to offer
342 	 * stronger guarantees.)
343 	 *
344 	 * As an invariant,
345 	 * (fsbase != 0 && fsindex != 0) || (gsbase != 0 && gsindex != 0) is
346 	 * impossible.
347 	 */
348 	if (next->fsindex) {
349 		/* Loading a nonzero value into FS sets the index and base. */
350 		loadsegment(fs, next->fsindex);
351 	} else {
352 		if (next->fsbase) {
353 			/* Next index is zero but next base is nonzero. */
354 			if (prev_fsindex)
355 				loadsegment(fs, 0);
356 			wrmsrl(MSR_FS_BASE, next->fsbase);
357 		} else {
358 			/* Next base and index are both zero. */
359 			if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
360 				/*
361 				 * We don't know the previous base and can't
362 				 * find out without RDMSR.  Forcibly clear it.
363 				 */
364 				loadsegment(fs, __USER_DS);
365 				loadsegment(fs, 0);
366 			} else {
367 				/*
368 				 * If the previous index is zero and ARCH_SET_FS
369 				 * didn't change the base, then the base is
370 				 * also zero and we don't need to do anything.
371 				 */
372 				if (prev->fsbase || prev_fsindex)
373 					loadsegment(fs, 0);
374 			}
375 		}
376 	}
377 	/*
378 	 * Save the old state and preserve the invariant.
379 	 * NB: if prev_fsindex == 0, then we can't reliably learn the base
380 	 * without RDMSR because Intel user code can zero it without telling
381 	 * us and AMD user code can program any 32-bit value without telling
382 	 * us.
383 	 */
384 	if (prev_fsindex)
385 		prev->fsbase = 0;
386 	prev->fsindex = prev_fsindex;
387 
388 	if (next->gsindex) {
389 		/* Loading a nonzero value into GS sets the index and base. */
390 		load_gs_index(next->gsindex);
391 	} else {
392 		if (next->gsbase) {
393 			/* Next index is zero but next base is nonzero. */
394 			if (prev_gsindex)
395 				load_gs_index(0);
396 			wrmsrl(MSR_KERNEL_GS_BASE, next->gsbase);
397 		} else {
398 			/* Next base and index are both zero. */
399 			if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
400 				/*
401 				 * We don't know the previous base and can't
402 				 * find out without RDMSR.  Forcibly clear it.
403 				 *
404 				 * This contains a pointless SWAPGS pair.
405 				 * Fixing it would involve an explicit check
406 				 * for Xen or a new pvop.
407 				 */
408 				load_gs_index(__USER_DS);
409 				load_gs_index(0);
410 			} else {
411 				/*
412 				 * If the previous index is zero and ARCH_SET_GS
413 				 * didn't change the base, then the base is
414 				 * also zero and we don't need to do anything.
415 				 */
416 				if (prev->gsbase || prev_gsindex)
417 					load_gs_index(0);
418 			}
419 		}
420 	}
421 	/*
422 	 * Save the old state and preserve the invariant.
423 	 * NB: if prev_gsindex == 0, then we can't reliably learn the base
424 	 * without RDMSR because Intel user code can zero it without telling
425 	 * us and AMD user code can program any 32-bit value without telling
426 	 * us.
427 	 */
428 	if (prev_gsindex)
429 		prev->gsbase = 0;
430 	prev->gsindex = prev_gsindex;
431 
432 	switch_fpu_finish(next_fpu, cpu);
433 
434 	/*
435 	 * Switch the PDA and FPU contexts.
436 	 */
437 	this_cpu_write(current_task, next_p);
438 
439 	/* Reload esp0 and ss1.  This changes current_thread_info(). */
440 	load_sp0(tss, next);
441 
442 	/*
443 	 * Now maybe reload the debug registers and handle I/O bitmaps
444 	 */
445 	if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
446 		     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
447 		__switch_to_xtra(prev_p, next_p, tss);
448 
449 #ifdef CONFIG_XEN_PV
450 	/*
451 	 * On Xen PV, IOPL bits in pt_regs->flags have no effect, and
452 	 * current_pt_regs()->flags may not match the current task's
453 	 * intended IOPL.  We need to switch it manually.
454 	 */
455 	if (unlikely(static_cpu_has(X86_FEATURE_XENPV) &&
456 		     prev->iopl != next->iopl))
457 		xen_set_iopl_mask(next->iopl);
458 #endif
459 
460 	if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) {
461 		/*
462 		 * AMD CPUs have a misfeature: SYSRET sets the SS selector but
463 		 * does not update the cached descriptor.  As a result, if we
464 		 * do SYSRET while SS is NULL, we'll end up in user mode with
465 		 * SS apparently equal to __USER_DS but actually unusable.
466 		 *
467 		 * The straightforward workaround would be to fix it up just
468 		 * before SYSRET, but that would slow down the system call
469 		 * fast paths.  Instead, we ensure that SS is never NULL in
470 		 * system call context.  We do this by replacing NULL SS
471 		 * selectors at every context switch.  SYSCALL sets up a valid
472 		 * SS, so the only way to get NULL is to re-enter the kernel
473 		 * from CPL 3 through an interrupt.  Since that can't happen
474 		 * in the same task as a running syscall, we are guaranteed to
475 		 * context switch between every interrupt vector entry and a
476 		 * subsequent SYSRET.
477 		 *
478 		 * We read SS first because SS reads are much faster than
479 		 * writes.  Out of caution, we force SS to __KERNEL_DS even if
480 		 * it previously had a different non-NULL value.
481 		 */
482 		unsigned short ss_sel;
483 		savesegment(ss, ss_sel);
484 		if (ss_sel != __KERNEL_DS)
485 			loadsegment(ss, __KERNEL_DS);
486 	}
487 
488 	/* Load the Intel cache allocation PQR MSR. */
489 	intel_rdt_sched_in();
490 
491 	return prev_p;
492 }
493 
494 void set_personality_64bit(void)
495 {
496 	/* inherit personality from parent */
497 
498 	/* Make sure to be in 64bit mode */
499 	clear_thread_flag(TIF_IA32);
500 	clear_thread_flag(TIF_ADDR32);
501 	clear_thread_flag(TIF_X32);
502 	/* Pretend that this comes from a 64bit execve */
503 	task_pt_regs(current)->orig_ax = __NR_execve;
504 
505 	/* Ensure the corresponding mm is not marked. */
506 	if (current->mm)
507 		current->mm->context.ia32_compat = 0;
508 
509 	/* TBD: overwrites user setup. Should have two bits.
510 	   But 64bit processes have always behaved this way,
511 	   so it's not too bad. The main problem is just that
512 	   32bit childs are affected again. */
513 	current->personality &= ~READ_IMPLIES_EXEC;
514 }
515 
516 static void __set_personality_x32(void)
517 {
518 #ifdef CONFIG_X86_X32
519 	clear_thread_flag(TIF_IA32);
520 	set_thread_flag(TIF_X32);
521 	if (current->mm)
522 		current->mm->context.ia32_compat = TIF_X32;
523 	current->personality &= ~READ_IMPLIES_EXEC;
524 	/*
525 	 * in_compat_syscall() uses the presence of the x32 syscall bit
526 	 * flag to determine compat status.  The x86 mmap() code relies on
527 	 * the syscall bitness so set x32 syscall bit right here to make
528 	 * in_compat_syscall() work during exec().
529 	 *
530 	 * Pretend to come from a x32 execve.
531 	 */
532 	task_pt_regs(current)->orig_ax = __NR_x32_execve | __X32_SYSCALL_BIT;
533 	current->thread.status &= ~TS_COMPAT;
534 #endif
535 }
536 
537 static void __set_personality_ia32(void)
538 {
539 #ifdef CONFIG_IA32_EMULATION
540 	set_thread_flag(TIF_IA32);
541 	clear_thread_flag(TIF_X32);
542 	if (current->mm)
543 		current->mm->context.ia32_compat = TIF_IA32;
544 	current->personality |= force_personality32;
545 	/* Prepare the first "return" to user space */
546 	task_pt_regs(current)->orig_ax = __NR_ia32_execve;
547 	current->thread.status |= TS_COMPAT;
548 #endif
549 }
550 
551 void set_personality_ia32(bool x32)
552 {
553 	/* Make sure to be in 32bit mode */
554 	set_thread_flag(TIF_ADDR32);
555 
556 	if (x32)
557 		__set_personality_x32();
558 	else
559 		__set_personality_ia32();
560 }
561 EXPORT_SYMBOL_GPL(set_personality_ia32);
562 
563 #ifdef CONFIG_CHECKPOINT_RESTORE
564 static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr)
565 {
566 	int ret;
567 
568 	ret = map_vdso_once(image, addr);
569 	if (ret)
570 		return ret;
571 
572 	return (long)image->size;
573 }
574 #endif
575 
576 long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
577 {
578 	int ret = 0;
579 	int doit = task == current;
580 	int cpu;
581 
582 	switch (option) {
583 	case ARCH_SET_GS:
584 		if (arg2 >= TASK_SIZE_MAX)
585 			return -EPERM;
586 		cpu = get_cpu();
587 		task->thread.gsindex = 0;
588 		task->thread.gsbase = arg2;
589 		if (doit) {
590 			load_gs_index(0);
591 			ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, arg2);
592 		}
593 		put_cpu();
594 		break;
595 	case ARCH_SET_FS:
596 		/* Not strictly needed for fs, but do it for symmetry
597 		   with gs */
598 		if (arg2 >= TASK_SIZE_MAX)
599 			return -EPERM;
600 		cpu = get_cpu();
601 		task->thread.fsindex = 0;
602 		task->thread.fsbase = arg2;
603 		if (doit) {
604 			/* set the selector to 0 to not confuse __switch_to */
605 			loadsegment(fs, 0);
606 			ret = wrmsrl_safe(MSR_FS_BASE, arg2);
607 		}
608 		put_cpu();
609 		break;
610 	case ARCH_GET_FS: {
611 		unsigned long base;
612 
613 		if (doit)
614 			rdmsrl(MSR_FS_BASE, base);
615 		else
616 			base = task->thread.fsbase;
617 		ret = put_user(base, (unsigned long __user *)arg2);
618 		break;
619 	}
620 	case ARCH_GET_GS: {
621 		unsigned long base;
622 
623 		if (doit)
624 			rdmsrl(MSR_KERNEL_GS_BASE, base);
625 		else
626 			base = task->thread.gsbase;
627 		ret = put_user(base, (unsigned long __user *)arg2);
628 		break;
629 	}
630 
631 #ifdef CONFIG_CHECKPOINT_RESTORE
632 # ifdef CONFIG_X86_X32_ABI
633 	case ARCH_MAP_VDSO_X32:
634 		return prctl_map_vdso(&vdso_image_x32, arg2);
635 # endif
636 # if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
637 	case ARCH_MAP_VDSO_32:
638 		return prctl_map_vdso(&vdso_image_32, arg2);
639 # endif
640 	case ARCH_MAP_VDSO_64:
641 		return prctl_map_vdso(&vdso_image_64, arg2);
642 #endif
643 
644 	default:
645 		ret = -EINVAL;
646 		break;
647 	}
648 
649 	return ret;
650 }
651 
652 SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
653 {
654 	long ret;
655 
656 	ret = do_arch_prctl_64(current, option, arg2);
657 	if (ret == -EINVAL)
658 		ret = do_arch_prctl_common(current, option, arg2);
659 
660 	return ret;
661 }
662 
663 #ifdef CONFIG_IA32_EMULATION
664 COMPAT_SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
665 {
666 	return do_arch_prctl_common(current, option, arg2);
667 }
668 #endif
669 
670 unsigned long KSTK_ESP(struct task_struct *task)
671 {
672 	return task_pt_regs(task)->sp;
673 }
674