xref: /openbmc/linux/arch/x86/kernel/ptrace.c (revision c819e2cf)
1 /* By Ross Biro 1/23/92 */
2 /*
3  * Pentium III FXSR, SSE support
4  *	Gareth Hughes <gareth@valinux.com>, May 2000
5  */
6 
7 #include <linux/kernel.h>
8 #include <linux/sched.h>
9 #include <linux/mm.h>
10 #include <linux/smp.h>
11 #include <linux/errno.h>
12 #include <linux/slab.h>
13 #include <linux/ptrace.h>
14 #include <linux/regset.h>
15 #include <linux/tracehook.h>
16 #include <linux/user.h>
17 #include <linux/elf.h>
18 #include <linux/security.h>
19 #include <linux/audit.h>
20 #include <linux/seccomp.h>
21 #include <linux/signal.h>
22 #include <linux/perf_event.h>
23 #include <linux/hw_breakpoint.h>
24 #include <linux/rcupdate.h>
25 #include <linux/export.h>
26 #include <linux/context_tracking.h>
27 
28 #include <asm/uaccess.h>
29 #include <asm/pgtable.h>
30 #include <asm/processor.h>
31 #include <asm/i387.h>
32 #include <asm/fpu-internal.h>
33 #include <asm/debugreg.h>
34 #include <asm/ldt.h>
35 #include <asm/desc.h>
36 #include <asm/prctl.h>
37 #include <asm/proto.h>
38 #include <asm/hw_breakpoint.h>
39 #include <asm/traps.h>
40 
41 #include "tls.h"
42 
43 #define CREATE_TRACE_POINTS
44 #include <trace/events/syscalls.h>
45 
46 enum x86_regset {
47 	REGSET_GENERAL,
48 	REGSET_FP,
49 	REGSET_XFP,
50 	REGSET_IOPERM64 = REGSET_XFP,
51 	REGSET_XSTATE,
52 	REGSET_TLS,
53 	REGSET_IOPERM32,
54 };
55 
56 struct pt_regs_offset {
57 	const char *name;
58 	int offset;
59 };
60 
61 #define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)}
62 #define REG_OFFSET_END {.name = NULL, .offset = 0}
63 
64 static const struct pt_regs_offset regoffset_table[] = {
65 #ifdef CONFIG_X86_64
66 	REG_OFFSET_NAME(r15),
67 	REG_OFFSET_NAME(r14),
68 	REG_OFFSET_NAME(r13),
69 	REG_OFFSET_NAME(r12),
70 	REG_OFFSET_NAME(r11),
71 	REG_OFFSET_NAME(r10),
72 	REG_OFFSET_NAME(r9),
73 	REG_OFFSET_NAME(r8),
74 #endif
75 	REG_OFFSET_NAME(bx),
76 	REG_OFFSET_NAME(cx),
77 	REG_OFFSET_NAME(dx),
78 	REG_OFFSET_NAME(si),
79 	REG_OFFSET_NAME(di),
80 	REG_OFFSET_NAME(bp),
81 	REG_OFFSET_NAME(ax),
82 #ifdef CONFIG_X86_32
83 	REG_OFFSET_NAME(ds),
84 	REG_OFFSET_NAME(es),
85 	REG_OFFSET_NAME(fs),
86 	REG_OFFSET_NAME(gs),
87 #endif
88 	REG_OFFSET_NAME(orig_ax),
89 	REG_OFFSET_NAME(ip),
90 	REG_OFFSET_NAME(cs),
91 	REG_OFFSET_NAME(flags),
92 	REG_OFFSET_NAME(sp),
93 	REG_OFFSET_NAME(ss),
94 	REG_OFFSET_END,
95 };
96 
97 /**
98  * regs_query_register_offset() - query register offset from its name
99  * @name:	the name of a register
100  *
101  * regs_query_register_offset() returns the offset of a register in struct
102  * pt_regs from its name. If the name is invalid, this returns -EINVAL;
103  */
104 int regs_query_register_offset(const char *name)
105 {
106 	const struct pt_regs_offset *roff;
107 	for (roff = regoffset_table; roff->name != NULL; roff++)
108 		if (!strcmp(roff->name, name))
109 			return roff->offset;
110 	return -EINVAL;
111 }
112 
113 /**
114  * regs_query_register_name() - query register name from its offset
115  * @offset:	the offset of a register in struct pt_regs.
116  *
117  * regs_query_register_name() returns the name of a register from its
118  * offset in struct pt_regs. If the @offset is invalid, this returns NULL;
119  */
120 const char *regs_query_register_name(unsigned int offset)
121 {
122 	const struct pt_regs_offset *roff;
123 	for (roff = regoffset_table; roff->name != NULL; roff++)
124 		if (roff->offset == offset)
125 			return roff->name;
126 	return NULL;
127 }
128 
129 static const int arg_offs_table[] = {
130 #ifdef CONFIG_X86_32
131 	[0] = offsetof(struct pt_regs, ax),
132 	[1] = offsetof(struct pt_regs, dx),
133 	[2] = offsetof(struct pt_regs, cx)
134 #else /* CONFIG_X86_64 */
135 	[0] = offsetof(struct pt_regs, di),
136 	[1] = offsetof(struct pt_regs, si),
137 	[2] = offsetof(struct pt_regs, dx),
138 	[3] = offsetof(struct pt_regs, cx),
139 	[4] = offsetof(struct pt_regs, r8),
140 	[5] = offsetof(struct pt_regs, r9)
141 #endif
142 };
143 
144 /*
145  * does not yet catch signals sent when the child dies.
146  * in exit.c or in signal.c.
147  */
148 
149 /*
150  * Determines which flags the user has access to [1 = access, 0 = no access].
151  */
152 #define FLAG_MASK_32		((unsigned long)			\
153 				 (X86_EFLAGS_CF | X86_EFLAGS_PF |	\
154 				  X86_EFLAGS_AF | X86_EFLAGS_ZF |	\
155 				  X86_EFLAGS_SF | X86_EFLAGS_TF |	\
156 				  X86_EFLAGS_DF | X86_EFLAGS_OF |	\
157 				  X86_EFLAGS_RF | X86_EFLAGS_AC))
158 
159 /*
160  * Determines whether a value may be installed in a segment register.
161  */
162 static inline bool invalid_selector(u16 value)
163 {
164 	return unlikely(value != 0 && (value & SEGMENT_RPL_MASK) != USER_RPL);
165 }
166 
167 #ifdef CONFIG_X86_32
168 
169 #define FLAG_MASK		FLAG_MASK_32
170 
171 /*
172  * X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode
173  * when it traps.  The previous stack will be directly underneath the saved
174  * registers, and 'sp/ss' won't even have been saved. Thus the '&regs->sp'.
175  *
176  * Now, if the stack is empty, '&regs->sp' is out of range. In this
177  * case we try to take the previous stack. To always return a non-null
178  * stack pointer we fall back to regs as stack if no previous stack
179  * exists.
180  *
181  * This is valid only for kernel mode traps.
182  */
183 unsigned long kernel_stack_pointer(struct pt_regs *regs)
184 {
185 	unsigned long context = (unsigned long)regs & ~(THREAD_SIZE - 1);
186 	unsigned long sp = (unsigned long)&regs->sp;
187 	u32 *prev_esp;
188 
189 	if (context == (sp & ~(THREAD_SIZE - 1)))
190 		return sp;
191 
192 	prev_esp = (u32 *)(context);
193 	if (prev_esp)
194 		return (unsigned long)prev_esp;
195 
196 	return (unsigned long)regs;
197 }
198 EXPORT_SYMBOL_GPL(kernel_stack_pointer);
199 
200 static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
201 {
202 	BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
203 	return &regs->bx + (regno >> 2);
204 }
205 
206 static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
207 {
208 	/*
209 	 * Returning the value truncates it to 16 bits.
210 	 */
211 	unsigned int retval;
212 	if (offset != offsetof(struct user_regs_struct, gs))
213 		retval = *pt_regs_access(task_pt_regs(task), offset);
214 	else {
215 		if (task == current)
216 			retval = get_user_gs(task_pt_regs(task));
217 		else
218 			retval = task_user_gs(task);
219 	}
220 	return retval;
221 }
222 
223 static int set_segment_reg(struct task_struct *task,
224 			   unsigned long offset, u16 value)
225 {
226 	/*
227 	 * The value argument was already truncated to 16 bits.
228 	 */
229 	if (invalid_selector(value))
230 		return -EIO;
231 
232 	/*
233 	 * For %cs and %ss we cannot permit a null selector.
234 	 * We can permit a bogus selector as long as it has USER_RPL.
235 	 * Null selectors are fine for other segment registers, but
236 	 * we will never get back to user mode with invalid %cs or %ss
237 	 * and will take the trap in iret instead.  Much code relies
238 	 * on user_mode() to distinguish a user trap frame (which can
239 	 * safely use invalid selectors) from a kernel trap frame.
240 	 */
241 	switch (offset) {
242 	case offsetof(struct user_regs_struct, cs):
243 	case offsetof(struct user_regs_struct, ss):
244 		if (unlikely(value == 0))
245 			return -EIO;
246 
247 	default:
248 		*pt_regs_access(task_pt_regs(task), offset) = value;
249 		break;
250 
251 	case offsetof(struct user_regs_struct, gs):
252 		if (task == current)
253 			set_user_gs(task_pt_regs(task), value);
254 		else
255 			task_user_gs(task) = value;
256 	}
257 
258 	return 0;
259 }
260 
261 #else  /* CONFIG_X86_64 */
262 
263 #define FLAG_MASK		(FLAG_MASK_32 | X86_EFLAGS_NT)
264 
265 static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long offset)
266 {
267 	BUILD_BUG_ON(offsetof(struct pt_regs, r15) != 0);
268 	return &regs->r15 + (offset / sizeof(regs->r15));
269 }
270 
271 static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
272 {
273 	/*
274 	 * Returning the value truncates it to 16 bits.
275 	 */
276 	unsigned int seg;
277 
278 	switch (offset) {
279 	case offsetof(struct user_regs_struct, fs):
280 		if (task == current) {
281 			/* Older gas can't assemble movq %?s,%r?? */
282 			asm("movl %%fs,%0" : "=r" (seg));
283 			return seg;
284 		}
285 		return task->thread.fsindex;
286 	case offsetof(struct user_regs_struct, gs):
287 		if (task == current) {
288 			asm("movl %%gs,%0" : "=r" (seg));
289 			return seg;
290 		}
291 		return task->thread.gsindex;
292 	case offsetof(struct user_regs_struct, ds):
293 		if (task == current) {
294 			asm("movl %%ds,%0" : "=r" (seg));
295 			return seg;
296 		}
297 		return task->thread.ds;
298 	case offsetof(struct user_regs_struct, es):
299 		if (task == current) {
300 			asm("movl %%es,%0" : "=r" (seg));
301 			return seg;
302 		}
303 		return task->thread.es;
304 
305 	case offsetof(struct user_regs_struct, cs):
306 	case offsetof(struct user_regs_struct, ss):
307 		break;
308 	}
309 	return *pt_regs_access(task_pt_regs(task), offset);
310 }
311 
312 static int set_segment_reg(struct task_struct *task,
313 			   unsigned long offset, u16 value)
314 {
315 	/*
316 	 * The value argument was already truncated to 16 bits.
317 	 */
318 	if (invalid_selector(value))
319 		return -EIO;
320 
321 	switch (offset) {
322 	case offsetof(struct user_regs_struct,fs):
323 		/*
324 		 * If this is setting fs as for normal 64-bit use but
325 		 * setting fs_base has implicitly changed it, leave it.
326 		 */
327 		if ((value == FS_TLS_SEL && task->thread.fsindex == 0 &&
328 		     task->thread.fs != 0) ||
329 		    (value == 0 && task->thread.fsindex == FS_TLS_SEL &&
330 		     task->thread.fs == 0))
331 			break;
332 		task->thread.fsindex = value;
333 		if (task == current)
334 			loadsegment(fs, task->thread.fsindex);
335 		break;
336 	case offsetof(struct user_regs_struct,gs):
337 		/*
338 		 * If this is setting gs as for normal 64-bit use but
339 		 * setting gs_base has implicitly changed it, leave it.
340 		 */
341 		if ((value == GS_TLS_SEL && task->thread.gsindex == 0 &&
342 		     task->thread.gs != 0) ||
343 		    (value == 0 && task->thread.gsindex == GS_TLS_SEL &&
344 		     task->thread.gs == 0))
345 			break;
346 		task->thread.gsindex = value;
347 		if (task == current)
348 			load_gs_index(task->thread.gsindex);
349 		break;
350 	case offsetof(struct user_regs_struct,ds):
351 		task->thread.ds = value;
352 		if (task == current)
353 			loadsegment(ds, task->thread.ds);
354 		break;
355 	case offsetof(struct user_regs_struct,es):
356 		task->thread.es = value;
357 		if (task == current)
358 			loadsegment(es, task->thread.es);
359 		break;
360 
361 		/*
362 		 * Can't actually change these in 64-bit mode.
363 		 */
364 	case offsetof(struct user_regs_struct,cs):
365 		if (unlikely(value == 0))
366 			return -EIO;
367 #ifdef CONFIG_IA32_EMULATION
368 		if (test_tsk_thread_flag(task, TIF_IA32))
369 			task_pt_regs(task)->cs = value;
370 #endif
371 		break;
372 	case offsetof(struct user_regs_struct,ss):
373 		if (unlikely(value == 0))
374 			return -EIO;
375 #ifdef CONFIG_IA32_EMULATION
376 		if (test_tsk_thread_flag(task, TIF_IA32))
377 			task_pt_regs(task)->ss = value;
378 #endif
379 		break;
380 	}
381 
382 	return 0;
383 }
384 
385 #endif	/* CONFIG_X86_32 */
386 
387 static unsigned long get_flags(struct task_struct *task)
388 {
389 	unsigned long retval = task_pt_regs(task)->flags;
390 
391 	/*
392 	 * If the debugger set TF, hide it from the readout.
393 	 */
394 	if (test_tsk_thread_flag(task, TIF_FORCED_TF))
395 		retval &= ~X86_EFLAGS_TF;
396 
397 	return retval;
398 }
399 
400 static int set_flags(struct task_struct *task, unsigned long value)
401 {
402 	struct pt_regs *regs = task_pt_regs(task);
403 
404 	/*
405 	 * If the user value contains TF, mark that
406 	 * it was not "us" (the debugger) that set it.
407 	 * If not, make sure it stays set if we had.
408 	 */
409 	if (value & X86_EFLAGS_TF)
410 		clear_tsk_thread_flag(task, TIF_FORCED_TF);
411 	else if (test_tsk_thread_flag(task, TIF_FORCED_TF))
412 		value |= X86_EFLAGS_TF;
413 
414 	regs->flags = (regs->flags & ~FLAG_MASK) | (value & FLAG_MASK);
415 
416 	return 0;
417 }
418 
419 static int putreg(struct task_struct *child,
420 		  unsigned long offset, unsigned long value)
421 {
422 	switch (offset) {
423 	case offsetof(struct user_regs_struct, cs):
424 	case offsetof(struct user_regs_struct, ds):
425 	case offsetof(struct user_regs_struct, es):
426 	case offsetof(struct user_regs_struct, fs):
427 	case offsetof(struct user_regs_struct, gs):
428 	case offsetof(struct user_regs_struct, ss):
429 		return set_segment_reg(child, offset, value);
430 
431 	case offsetof(struct user_regs_struct, flags):
432 		return set_flags(child, value);
433 
434 #ifdef CONFIG_X86_64
435 	case offsetof(struct user_regs_struct,fs_base):
436 		if (value >= TASK_SIZE_OF(child))
437 			return -EIO;
438 		/*
439 		 * When changing the segment base, use do_arch_prctl
440 		 * to set either thread.fs or thread.fsindex and the
441 		 * corresponding GDT slot.
442 		 */
443 		if (child->thread.fs != value)
444 			return do_arch_prctl(child, ARCH_SET_FS, value);
445 		return 0;
446 	case offsetof(struct user_regs_struct,gs_base):
447 		/*
448 		 * Exactly the same here as the %fs handling above.
449 		 */
450 		if (value >= TASK_SIZE_OF(child))
451 			return -EIO;
452 		if (child->thread.gs != value)
453 			return do_arch_prctl(child, ARCH_SET_GS, value);
454 		return 0;
455 #endif
456 	}
457 
458 	*pt_regs_access(task_pt_regs(child), offset) = value;
459 	return 0;
460 }
461 
462 static unsigned long getreg(struct task_struct *task, unsigned long offset)
463 {
464 	switch (offset) {
465 	case offsetof(struct user_regs_struct, cs):
466 	case offsetof(struct user_regs_struct, ds):
467 	case offsetof(struct user_regs_struct, es):
468 	case offsetof(struct user_regs_struct, fs):
469 	case offsetof(struct user_regs_struct, gs):
470 	case offsetof(struct user_regs_struct, ss):
471 		return get_segment_reg(task, offset);
472 
473 	case offsetof(struct user_regs_struct, flags):
474 		return get_flags(task);
475 
476 #ifdef CONFIG_X86_64
477 	case offsetof(struct user_regs_struct, fs_base): {
478 		/*
479 		 * do_arch_prctl may have used a GDT slot instead of
480 		 * the MSR.  To userland, it appears the same either
481 		 * way, except the %fs segment selector might not be 0.
482 		 */
483 		unsigned int seg = task->thread.fsindex;
484 		if (task->thread.fs != 0)
485 			return task->thread.fs;
486 		if (task == current)
487 			asm("movl %%fs,%0" : "=r" (seg));
488 		if (seg != FS_TLS_SEL)
489 			return 0;
490 		return get_desc_base(&task->thread.tls_array[FS_TLS]);
491 	}
492 	case offsetof(struct user_regs_struct, gs_base): {
493 		/*
494 		 * Exactly the same here as the %fs handling above.
495 		 */
496 		unsigned int seg = task->thread.gsindex;
497 		if (task->thread.gs != 0)
498 			return task->thread.gs;
499 		if (task == current)
500 			asm("movl %%gs,%0" : "=r" (seg));
501 		if (seg != GS_TLS_SEL)
502 			return 0;
503 		return get_desc_base(&task->thread.tls_array[GS_TLS]);
504 	}
505 #endif
506 	}
507 
508 	return *pt_regs_access(task_pt_regs(task), offset);
509 }
510 
511 static int genregs_get(struct task_struct *target,
512 		       const struct user_regset *regset,
513 		       unsigned int pos, unsigned int count,
514 		       void *kbuf, void __user *ubuf)
515 {
516 	if (kbuf) {
517 		unsigned long *k = kbuf;
518 		while (count >= sizeof(*k)) {
519 			*k++ = getreg(target, pos);
520 			count -= sizeof(*k);
521 			pos += sizeof(*k);
522 		}
523 	} else {
524 		unsigned long __user *u = ubuf;
525 		while (count >= sizeof(*u)) {
526 			if (__put_user(getreg(target, pos), u++))
527 				return -EFAULT;
528 			count -= sizeof(*u);
529 			pos += sizeof(*u);
530 		}
531 	}
532 
533 	return 0;
534 }
535 
536 static int genregs_set(struct task_struct *target,
537 		       const struct user_regset *regset,
538 		       unsigned int pos, unsigned int count,
539 		       const void *kbuf, const void __user *ubuf)
540 {
541 	int ret = 0;
542 	if (kbuf) {
543 		const unsigned long *k = kbuf;
544 		while (count >= sizeof(*k) && !ret) {
545 			ret = putreg(target, pos, *k++);
546 			count -= sizeof(*k);
547 			pos += sizeof(*k);
548 		}
549 	} else {
550 		const unsigned long  __user *u = ubuf;
551 		while (count >= sizeof(*u) && !ret) {
552 			unsigned long word;
553 			ret = __get_user(word, u++);
554 			if (ret)
555 				break;
556 			ret = putreg(target, pos, word);
557 			count -= sizeof(*u);
558 			pos += sizeof(*u);
559 		}
560 	}
561 	return ret;
562 }
563 
564 static void ptrace_triggered(struct perf_event *bp,
565 			     struct perf_sample_data *data,
566 			     struct pt_regs *regs)
567 {
568 	int i;
569 	struct thread_struct *thread = &(current->thread);
570 
571 	/*
572 	 * Store in the virtual DR6 register the fact that the breakpoint
573 	 * was hit so the thread's debugger will see it.
574 	 */
575 	for (i = 0; i < HBP_NUM; i++) {
576 		if (thread->ptrace_bps[i] == bp)
577 			break;
578 	}
579 
580 	thread->debugreg6 |= (DR_TRAP0 << i);
581 }
582 
583 /*
584  * Walk through every ptrace breakpoints for this thread and
585  * build the dr7 value on top of their attributes.
586  *
587  */
588 static unsigned long ptrace_get_dr7(struct perf_event *bp[])
589 {
590 	int i;
591 	int dr7 = 0;
592 	struct arch_hw_breakpoint *info;
593 
594 	for (i = 0; i < HBP_NUM; i++) {
595 		if (bp[i] && !bp[i]->attr.disabled) {
596 			info = counter_arch_bp(bp[i]);
597 			dr7 |= encode_dr7(i, info->len, info->type);
598 		}
599 	}
600 
601 	return dr7;
602 }
603 
604 static int ptrace_fill_bp_fields(struct perf_event_attr *attr,
605 					int len, int type, bool disabled)
606 {
607 	int err, bp_len, bp_type;
608 
609 	err = arch_bp_generic_fields(len, type, &bp_len, &bp_type);
610 	if (!err) {
611 		attr->bp_len = bp_len;
612 		attr->bp_type = bp_type;
613 		attr->disabled = disabled;
614 	}
615 
616 	return err;
617 }
618 
619 static struct perf_event *
620 ptrace_register_breakpoint(struct task_struct *tsk, int len, int type,
621 				unsigned long addr, bool disabled)
622 {
623 	struct perf_event_attr attr;
624 	int err;
625 
626 	ptrace_breakpoint_init(&attr);
627 	attr.bp_addr = addr;
628 
629 	err = ptrace_fill_bp_fields(&attr, len, type, disabled);
630 	if (err)
631 		return ERR_PTR(err);
632 
633 	return register_user_hw_breakpoint(&attr, ptrace_triggered,
634 						 NULL, tsk);
635 }
636 
637 static int ptrace_modify_breakpoint(struct perf_event *bp, int len, int type,
638 					int disabled)
639 {
640 	struct perf_event_attr attr = bp->attr;
641 	int err;
642 
643 	err = ptrace_fill_bp_fields(&attr, len, type, disabled);
644 	if (err)
645 		return err;
646 
647 	return modify_user_hw_breakpoint(bp, &attr);
648 }
649 
650 /*
651  * Handle ptrace writes to debug register 7.
652  */
653 static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
654 {
655 	struct thread_struct *thread = &tsk->thread;
656 	unsigned long old_dr7;
657 	bool second_pass = false;
658 	int i, rc, ret = 0;
659 
660 	data &= ~DR_CONTROL_RESERVED;
661 	old_dr7 = ptrace_get_dr7(thread->ptrace_bps);
662 
663 restore:
664 	rc = 0;
665 	for (i = 0; i < HBP_NUM; i++) {
666 		unsigned len, type;
667 		bool disabled = !decode_dr7(data, i, &len, &type);
668 		struct perf_event *bp = thread->ptrace_bps[i];
669 
670 		if (!bp) {
671 			if (disabled)
672 				continue;
673 
674 			bp = ptrace_register_breakpoint(tsk,
675 					len, type, 0, disabled);
676 			if (IS_ERR(bp)) {
677 				rc = PTR_ERR(bp);
678 				break;
679 			}
680 
681 			thread->ptrace_bps[i] = bp;
682 			continue;
683 		}
684 
685 		rc = ptrace_modify_breakpoint(bp, len, type, disabled);
686 		if (rc)
687 			break;
688 	}
689 
690 	/* Restore if the first pass failed, second_pass shouldn't fail. */
691 	if (rc && !WARN_ON(second_pass)) {
692 		ret = rc;
693 		data = old_dr7;
694 		second_pass = true;
695 		goto restore;
696 	}
697 
698 	return ret;
699 }
700 
701 /*
702  * Handle PTRACE_PEEKUSR calls for the debug register area.
703  */
704 static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
705 {
706 	struct thread_struct *thread = &tsk->thread;
707 	unsigned long val = 0;
708 
709 	if (n < HBP_NUM) {
710 		struct perf_event *bp = thread->ptrace_bps[n];
711 
712 		if (bp)
713 			val = bp->hw.info.address;
714 	} else if (n == 6) {
715 		val = thread->debugreg6;
716 	} else if (n == 7) {
717 		val = thread->ptrace_dr7;
718 	}
719 	return val;
720 }
721 
722 static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
723 				      unsigned long addr)
724 {
725 	struct thread_struct *t = &tsk->thread;
726 	struct perf_event *bp = t->ptrace_bps[nr];
727 	int err = 0;
728 
729 	if (!bp) {
730 		/*
731 		 * Put stub len and type to create an inactive but correct bp.
732 		 *
733 		 * CHECKME: the previous code returned -EIO if the addr wasn't
734 		 * a valid task virtual addr. The new one will return -EINVAL in
735 		 *  this case.
736 		 * -EINVAL may be what we want for in-kernel breakpoints users,
737 		 * but -EIO looks better for ptrace, since we refuse a register
738 		 * writing for the user. And anyway this is the previous
739 		 * behaviour.
740 		 */
741 		bp = ptrace_register_breakpoint(tsk,
742 				X86_BREAKPOINT_LEN_1, X86_BREAKPOINT_WRITE,
743 				addr, true);
744 		if (IS_ERR(bp))
745 			err = PTR_ERR(bp);
746 		else
747 			t->ptrace_bps[nr] = bp;
748 	} else {
749 		struct perf_event_attr attr = bp->attr;
750 
751 		attr.bp_addr = addr;
752 		err = modify_user_hw_breakpoint(bp, &attr);
753 	}
754 
755 	return err;
756 }
757 
758 /*
759  * Handle PTRACE_POKEUSR calls for the debug register area.
760  */
761 static int ptrace_set_debugreg(struct task_struct *tsk, int n,
762 			       unsigned long val)
763 {
764 	struct thread_struct *thread = &tsk->thread;
765 	/* There are no DR4 or DR5 registers */
766 	int rc = -EIO;
767 
768 	if (n < HBP_NUM) {
769 		rc = ptrace_set_breakpoint_addr(tsk, n, val);
770 	} else if (n == 6) {
771 		thread->debugreg6 = val;
772 		rc = 0;
773 	} else if (n == 7) {
774 		rc = ptrace_write_dr7(tsk, val);
775 		if (!rc)
776 			thread->ptrace_dr7 = val;
777 	}
778 	return rc;
779 }
780 
781 /*
782  * These access the current or another (stopped) task's io permission
783  * bitmap for debugging or core dump.
784  */
785 static int ioperm_active(struct task_struct *target,
786 			 const struct user_regset *regset)
787 {
788 	return target->thread.io_bitmap_max / regset->size;
789 }
790 
791 static int ioperm_get(struct task_struct *target,
792 		      const struct user_regset *regset,
793 		      unsigned int pos, unsigned int count,
794 		      void *kbuf, void __user *ubuf)
795 {
796 	if (!target->thread.io_bitmap_ptr)
797 		return -ENXIO;
798 
799 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
800 				   target->thread.io_bitmap_ptr,
801 				   0, IO_BITMAP_BYTES);
802 }
803 
804 /*
805  * Called by kernel/ptrace.c when detaching..
806  *
807  * Make sure the single step bit is not set.
808  */
809 void ptrace_disable(struct task_struct *child)
810 {
811 	user_disable_single_step(child);
812 #ifdef TIF_SYSCALL_EMU
813 	clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
814 #endif
815 }
816 
817 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
818 static const struct user_regset_view user_x86_32_view; /* Initialized below. */
819 #endif
820 
821 long arch_ptrace(struct task_struct *child, long request,
822 		 unsigned long addr, unsigned long data)
823 {
824 	int ret;
825 	unsigned long __user *datap = (unsigned long __user *)data;
826 
827 	switch (request) {
828 	/* read the word at location addr in the USER area. */
829 	case PTRACE_PEEKUSR: {
830 		unsigned long tmp;
831 
832 		ret = -EIO;
833 		if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user))
834 			break;
835 
836 		tmp = 0;  /* Default return condition */
837 		if (addr < sizeof(struct user_regs_struct))
838 			tmp = getreg(child, addr);
839 		else if (addr >= offsetof(struct user, u_debugreg[0]) &&
840 			 addr <= offsetof(struct user, u_debugreg[7])) {
841 			addr -= offsetof(struct user, u_debugreg[0]);
842 			tmp = ptrace_get_debugreg(child, addr / sizeof(data));
843 		}
844 		ret = put_user(tmp, datap);
845 		break;
846 	}
847 
848 	case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
849 		ret = -EIO;
850 		if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user))
851 			break;
852 
853 		if (addr < sizeof(struct user_regs_struct))
854 			ret = putreg(child, addr, data);
855 		else if (addr >= offsetof(struct user, u_debugreg[0]) &&
856 			 addr <= offsetof(struct user, u_debugreg[7])) {
857 			addr -= offsetof(struct user, u_debugreg[0]);
858 			ret = ptrace_set_debugreg(child,
859 						  addr / sizeof(data), data);
860 		}
861 		break;
862 
863 	case PTRACE_GETREGS:	/* Get all gp regs from the child. */
864 		return copy_regset_to_user(child,
865 					   task_user_regset_view(current),
866 					   REGSET_GENERAL,
867 					   0, sizeof(struct user_regs_struct),
868 					   datap);
869 
870 	case PTRACE_SETREGS:	/* Set all gp regs in the child. */
871 		return copy_regset_from_user(child,
872 					     task_user_regset_view(current),
873 					     REGSET_GENERAL,
874 					     0, sizeof(struct user_regs_struct),
875 					     datap);
876 
877 	case PTRACE_GETFPREGS:	/* Get the child FPU state. */
878 		return copy_regset_to_user(child,
879 					   task_user_regset_view(current),
880 					   REGSET_FP,
881 					   0, sizeof(struct user_i387_struct),
882 					   datap);
883 
884 	case PTRACE_SETFPREGS:	/* Set the child FPU state. */
885 		return copy_regset_from_user(child,
886 					     task_user_regset_view(current),
887 					     REGSET_FP,
888 					     0, sizeof(struct user_i387_struct),
889 					     datap);
890 
891 #ifdef CONFIG_X86_32
892 	case PTRACE_GETFPXREGS:	/* Get the child extended FPU state. */
893 		return copy_regset_to_user(child, &user_x86_32_view,
894 					   REGSET_XFP,
895 					   0, sizeof(struct user_fxsr_struct),
896 					   datap) ? -EIO : 0;
897 
898 	case PTRACE_SETFPXREGS:	/* Set the child extended FPU state. */
899 		return copy_regset_from_user(child, &user_x86_32_view,
900 					     REGSET_XFP,
901 					     0, sizeof(struct user_fxsr_struct),
902 					     datap) ? -EIO : 0;
903 #endif
904 
905 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
906 	case PTRACE_GET_THREAD_AREA:
907 		if ((int) addr < 0)
908 			return -EIO;
909 		ret = do_get_thread_area(child, addr,
910 					(struct user_desc __user *)data);
911 		break;
912 
913 	case PTRACE_SET_THREAD_AREA:
914 		if ((int) addr < 0)
915 			return -EIO;
916 		ret = do_set_thread_area(child, addr,
917 					(struct user_desc __user *)data, 0);
918 		break;
919 #endif
920 
921 #ifdef CONFIG_X86_64
922 		/* normal 64bit interface to access TLS data.
923 		   Works just like arch_prctl, except that the arguments
924 		   are reversed. */
925 	case PTRACE_ARCH_PRCTL:
926 		ret = do_arch_prctl(child, data, addr);
927 		break;
928 #endif
929 
930 	default:
931 		ret = ptrace_request(child, request, addr, data);
932 		break;
933 	}
934 
935 	return ret;
936 }
937 
938 #ifdef CONFIG_IA32_EMULATION
939 
940 #include <linux/compat.h>
941 #include <linux/syscalls.h>
942 #include <asm/ia32.h>
943 #include <asm/user32.h>
944 
945 #define R32(l,q)							\
946 	case offsetof(struct user32, regs.l):				\
947 		regs->q = value; break
948 
949 #define SEG32(rs)							\
950 	case offsetof(struct user32, regs.rs):				\
951 		return set_segment_reg(child,				\
952 				       offsetof(struct user_regs_struct, rs), \
953 				       value);				\
954 		break
955 
956 static int putreg32(struct task_struct *child, unsigned regno, u32 value)
957 {
958 	struct pt_regs *regs = task_pt_regs(child);
959 
960 	switch (regno) {
961 
962 	SEG32(cs);
963 	SEG32(ds);
964 	SEG32(es);
965 	SEG32(fs);
966 	SEG32(gs);
967 	SEG32(ss);
968 
969 	R32(ebx, bx);
970 	R32(ecx, cx);
971 	R32(edx, dx);
972 	R32(edi, di);
973 	R32(esi, si);
974 	R32(ebp, bp);
975 	R32(eax, ax);
976 	R32(eip, ip);
977 	R32(esp, sp);
978 
979 	case offsetof(struct user32, regs.orig_eax):
980 		/*
981 		 * A 32-bit debugger setting orig_eax means to restore
982 		 * the state of the task restarting a 32-bit syscall.
983 		 * Make sure we interpret the -ERESTART* codes correctly
984 		 * in case the task is not actually still sitting at the
985 		 * exit from a 32-bit syscall with TS_COMPAT still set.
986 		 */
987 		regs->orig_ax = value;
988 		if (syscall_get_nr(child, regs) >= 0)
989 			task_thread_info(child)->status |= TS_COMPAT;
990 		break;
991 
992 	case offsetof(struct user32, regs.eflags):
993 		return set_flags(child, value);
994 
995 	case offsetof(struct user32, u_debugreg[0]) ...
996 		offsetof(struct user32, u_debugreg[7]):
997 		regno -= offsetof(struct user32, u_debugreg[0]);
998 		return ptrace_set_debugreg(child, regno / 4, value);
999 
1000 	default:
1001 		if (regno > sizeof(struct user32) || (regno & 3))
1002 			return -EIO;
1003 
1004 		/*
1005 		 * Other dummy fields in the virtual user structure
1006 		 * are ignored
1007 		 */
1008 		break;
1009 	}
1010 	return 0;
1011 }
1012 
1013 #undef R32
1014 #undef SEG32
1015 
1016 #define R32(l,q)							\
1017 	case offsetof(struct user32, regs.l):				\
1018 		*val = regs->q; break
1019 
1020 #define SEG32(rs)							\
1021 	case offsetof(struct user32, regs.rs):				\
1022 		*val = get_segment_reg(child,				\
1023 				       offsetof(struct user_regs_struct, rs)); \
1024 		break
1025 
1026 static int getreg32(struct task_struct *child, unsigned regno, u32 *val)
1027 {
1028 	struct pt_regs *regs = task_pt_regs(child);
1029 
1030 	switch (regno) {
1031 
1032 	SEG32(ds);
1033 	SEG32(es);
1034 	SEG32(fs);
1035 	SEG32(gs);
1036 
1037 	R32(cs, cs);
1038 	R32(ss, ss);
1039 	R32(ebx, bx);
1040 	R32(ecx, cx);
1041 	R32(edx, dx);
1042 	R32(edi, di);
1043 	R32(esi, si);
1044 	R32(ebp, bp);
1045 	R32(eax, ax);
1046 	R32(orig_eax, orig_ax);
1047 	R32(eip, ip);
1048 	R32(esp, sp);
1049 
1050 	case offsetof(struct user32, regs.eflags):
1051 		*val = get_flags(child);
1052 		break;
1053 
1054 	case offsetof(struct user32, u_debugreg[0]) ...
1055 		offsetof(struct user32, u_debugreg[7]):
1056 		regno -= offsetof(struct user32, u_debugreg[0]);
1057 		*val = ptrace_get_debugreg(child, regno / 4);
1058 		break;
1059 
1060 	default:
1061 		if (regno > sizeof(struct user32) || (regno & 3))
1062 			return -EIO;
1063 
1064 		/*
1065 		 * Other dummy fields in the virtual user structure
1066 		 * are ignored
1067 		 */
1068 		*val = 0;
1069 		break;
1070 	}
1071 	return 0;
1072 }
1073 
1074 #undef R32
1075 #undef SEG32
1076 
1077 static int genregs32_get(struct task_struct *target,
1078 			 const struct user_regset *regset,
1079 			 unsigned int pos, unsigned int count,
1080 			 void *kbuf, void __user *ubuf)
1081 {
1082 	if (kbuf) {
1083 		compat_ulong_t *k = kbuf;
1084 		while (count >= sizeof(*k)) {
1085 			getreg32(target, pos, k++);
1086 			count -= sizeof(*k);
1087 			pos += sizeof(*k);
1088 		}
1089 	} else {
1090 		compat_ulong_t __user *u = ubuf;
1091 		while (count >= sizeof(*u)) {
1092 			compat_ulong_t word;
1093 			getreg32(target, pos, &word);
1094 			if (__put_user(word, u++))
1095 				return -EFAULT;
1096 			count -= sizeof(*u);
1097 			pos += sizeof(*u);
1098 		}
1099 	}
1100 
1101 	return 0;
1102 }
1103 
1104 static int genregs32_set(struct task_struct *target,
1105 			 const struct user_regset *regset,
1106 			 unsigned int pos, unsigned int count,
1107 			 const void *kbuf, const void __user *ubuf)
1108 {
1109 	int ret = 0;
1110 	if (kbuf) {
1111 		const compat_ulong_t *k = kbuf;
1112 		while (count >= sizeof(*k) && !ret) {
1113 			ret = putreg32(target, pos, *k++);
1114 			count -= sizeof(*k);
1115 			pos += sizeof(*k);
1116 		}
1117 	} else {
1118 		const compat_ulong_t __user *u = ubuf;
1119 		while (count >= sizeof(*u) && !ret) {
1120 			compat_ulong_t word;
1121 			ret = __get_user(word, u++);
1122 			if (ret)
1123 				break;
1124 			ret = putreg32(target, pos, word);
1125 			count -= sizeof(*u);
1126 			pos += sizeof(*u);
1127 		}
1128 	}
1129 	return ret;
1130 }
1131 
1132 #ifdef CONFIG_X86_X32_ABI
1133 static long x32_arch_ptrace(struct task_struct *child,
1134 			    compat_long_t request, compat_ulong_t caddr,
1135 			    compat_ulong_t cdata)
1136 {
1137 	unsigned long addr = caddr;
1138 	unsigned long data = cdata;
1139 	void __user *datap = compat_ptr(data);
1140 	int ret;
1141 
1142 	switch (request) {
1143 	/* Read 32bits at location addr in the USER area.  Only allow
1144 	   to return the lower 32bits of segment and debug registers.  */
1145 	case PTRACE_PEEKUSR: {
1146 		u32 tmp;
1147 
1148 		ret = -EIO;
1149 		if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user) ||
1150 		    addr < offsetof(struct user_regs_struct, cs))
1151 			break;
1152 
1153 		tmp = 0;  /* Default return condition */
1154 		if (addr < sizeof(struct user_regs_struct))
1155 			tmp = getreg(child, addr);
1156 		else if (addr >= offsetof(struct user, u_debugreg[0]) &&
1157 			 addr <= offsetof(struct user, u_debugreg[7])) {
1158 			addr -= offsetof(struct user, u_debugreg[0]);
1159 			tmp = ptrace_get_debugreg(child, addr / sizeof(data));
1160 		}
1161 		ret = put_user(tmp, (__u32 __user *)datap);
1162 		break;
1163 	}
1164 
1165 	/* Write the word at location addr in the USER area.  Only allow
1166 	   to update segment and debug registers with the upper 32bits
1167 	   zero-extended. */
1168 	case PTRACE_POKEUSR:
1169 		ret = -EIO;
1170 		if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user) ||
1171 		    addr < offsetof(struct user_regs_struct, cs))
1172 			break;
1173 
1174 		if (addr < sizeof(struct user_regs_struct))
1175 			ret = putreg(child, addr, data);
1176 		else if (addr >= offsetof(struct user, u_debugreg[0]) &&
1177 			 addr <= offsetof(struct user, u_debugreg[7])) {
1178 			addr -= offsetof(struct user, u_debugreg[0]);
1179 			ret = ptrace_set_debugreg(child,
1180 						  addr / sizeof(data), data);
1181 		}
1182 		break;
1183 
1184 	case PTRACE_GETREGS:	/* Get all gp regs from the child. */
1185 		return copy_regset_to_user(child,
1186 					   task_user_regset_view(current),
1187 					   REGSET_GENERAL,
1188 					   0, sizeof(struct user_regs_struct),
1189 					   datap);
1190 
1191 	case PTRACE_SETREGS:	/* Set all gp regs in the child. */
1192 		return copy_regset_from_user(child,
1193 					     task_user_regset_view(current),
1194 					     REGSET_GENERAL,
1195 					     0, sizeof(struct user_regs_struct),
1196 					     datap);
1197 
1198 	case PTRACE_GETFPREGS:	/* Get the child FPU state. */
1199 		return copy_regset_to_user(child,
1200 					   task_user_regset_view(current),
1201 					   REGSET_FP,
1202 					   0, sizeof(struct user_i387_struct),
1203 					   datap);
1204 
1205 	case PTRACE_SETFPREGS:	/* Set the child FPU state. */
1206 		return copy_regset_from_user(child,
1207 					     task_user_regset_view(current),
1208 					     REGSET_FP,
1209 					     0, sizeof(struct user_i387_struct),
1210 					     datap);
1211 
1212 	default:
1213 		return compat_ptrace_request(child, request, addr, data);
1214 	}
1215 
1216 	return ret;
1217 }
1218 #endif
1219 
1220 long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
1221 			compat_ulong_t caddr, compat_ulong_t cdata)
1222 {
1223 	unsigned long addr = caddr;
1224 	unsigned long data = cdata;
1225 	void __user *datap = compat_ptr(data);
1226 	int ret;
1227 	__u32 val;
1228 
1229 #ifdef CONFIG_X86_X32_ABI
1230 	if (!is_ia32_task())
1231 		return x32_arch_ptrace(child, request, caddr, cdata);
1232 #endif
1233 
1234 	switch (request) {
1235 	case PTRACE_PEEKUSR:
1236 		ret = getreg32(child, addr, &val);
1237 		if (ret == 0)
1238 			ret = put_user(val, (__u32 __user *)datap);
1239 		break;
1240 
1241 	case PTRACE_POKEUSR:
1242 		ret = putreg32(child, addr, data);
1243 		break;
1244 
1245 	case PTRACE_GETREGS:	/* Get all gp regs from the child. */
1246 		return copy_regset_to_user(child, &user_x86_32_view,
1247 					   REGSET_GENERAL,
1248 					   0, sizeof(struct user_regs_struct32),
1249 					   datap);
1250 
1251 	case PTRACE_SETREGS:	/* Set all gp regs in the child. */
1252 		return copy_regset_from_user(child, &user_x86_32_view,
1253 					     REGSET_GENERAL, 0,
1254 					     sizeof(struct user_regs_struct32),
1255 					     datap);
1256 
1257 	case PTRACE_GETFPREGS:	/* Get the child FPU state. */
1258 		return copy_regset_to_user(child, &user_x86_32_view,
1259 					   REGSET_FP, 0,
1260 					   sizeof(struct user_i387_ia32_struct),
1261 					   datap);
1262 
1263 	case PTRACE_SETFPREGS:	/* Set the child FPU state. */
1264 		return copy_regset_from_user(
1265 			child, &user_x86_32_view, REGSET_FP,
1266 			0, sizeof(struct user_i387_ia32_struct), datap);
1267 
1268 	case PTRACE_GETFPXREGS:	/* Get the child extended FPU state. */
1269 		return copy_regset_to_user(child, &user_x86_32_view,
1270 					   REGSET_XFP, 0,
1271 					   sizeof(struct user32_fxsr_struct),
1272 					   datap);
1273 
1274 	case PTRACE_SETFPXREGS:	/* Set the child extended FPU state. */
1275 		return copy_regset_from_user(child, &user_x86_32_view,
1276 					     REGSET_XFP, 0,
1277 					     sizeof(struct user32_fxsr_struct),
1278 					     datap);
1279 
1280 	case PTRACE_GET_THREAD_AREA:
1281 	case PTRACE_SET_THREAD_AREA:
1282 		return arch_ptrace(child, request, addr, data);
1283 
1284 	default:
1285 		return compat_ptrace_request(child, request, addr, data);
1286 	}
1287 
1288 	return ret;
1289 }
1290 
1291 #endif	/* CONFIG_IA32_EMULATION */
1292 
1293 #ifdef CONFIG_X86_64
1294 
1295 static struct user_regset x86_64_regsets[] __read_mostly = {
1296 	[REGSET_GENERAL] = {
1297 		.core_note_type = NT_PRSTATUS,
1298 		.n = sizeof(struct user_regs_struct) / sizeof(long),
1299 		.size = sizeof(long), .align = sizeof(long),
1300 		.get = genregs_get, .set = genregs_set
1301 	},
1302 	[REGSET_FP] = {
1303 		.core_note_type = NT_PRFPREG,
1304 		.n = sizeof(struct user_i387_struct) / sizeof(long),
1305 		.size = sizeof(long), .align = sizeof(long),
1306 		.active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set
1307 	},
1308 	[REGSET_XSTATE] = {
1309 		.core_note_type = NT_X86_XSTATE,
1310 		.size = sizeof(u64), .align = sizeof(u64),
1311 		.active = xstateregs_active, .get = xstateregs_get,
1312 		.set = xstateregs_set
1313 	},
1314 	[REGSET_IOPERM64] = {
1315 		.core_note_type = NT_386_IOPERM,
1316 		.n = IO_BITMAP_LONGS,
1317 		.size = sizeof(long), .align = sizeof(long),
1318 		.active = ioperm_active, .get = ioperm_get
1319 	},
1320 };
1321 
1322 static const struct user_regset_view user_x86_64_view = {
1323 	.name = "x86_64", .e_machine = EM_X86_64,
1324 	.regsets = x86_64_regsets, .n = ARRAY_SIZE(x86_64_regsets)
1325 };
1326 
1327 #else  /* CONFIG_X86_32 */
1328 
1329 #define user_regs_struct32	user_regs_struct
1330 #define genregs32_get		genregs_get
1331 #define genregs32_set		genregs_set
1332 
1333 #endif	/* CONFIG_X86_64 */
1334 
1335 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
1336 static struct user_regset x86_32_regsets[] __read_mostly = {
1337 	[REGSET_GENERAL] = {
1338 		.core_note_type = NT_PRSTATUS,
1339 		.n = sizeof(struct user_regs_struct32) / sizeof(u32),
1340 		.size = sizeof(u32), .align = sizeof(u32),
1341 		.get = genregs32_get, .set = genregs32_set
1342 	},
1343 	[REGSET_FP] = {
1344 		.core_note_type = NT_PRFPREG,
1345 		.n = sizeof(struct user_i387_ia32_struct) / sizeof(u32),
1346 		.size = sizeof(u32), .align = sizeof(u32),
1347 		.active = fpregs_active, .get = fpregs_get, .set = fpregs_set
1348 	},
1349 	[REGSET_XFP] = {
1350 		.core_note_type = NT_PRXFPREG,
1351 		.n = sizeof(struct user32_fxsr_struct) / sizeof(u32),
1352 		.size = sizeof(u32), .align = sizeof(u32),
1353 		.active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set
1354 	},
1355 	[REGSET_XSTATE] = {
1356 		.core_note_type = NT_X86_XSTATE,
1357 		.size = sizeof(u64), .align = sizeof(u64),
1358 		.active = xstateregs_active, .get = xstateregs_get,
1359 		.set = xstateregs_set
1360 	},
1361 	[REGSET_TLS] = {
1362 		.core_note_type = NT_386_TLS,
1363 		.n = GDT_ENTRY_TLS_ENTRIES, .bias = GDT_ENTRY_TLS_MIN,
1364 		.size = sizeof(struct user_desc),
1365 		.align = sizeof(struct user_desc),
1366 		.active = regset_tls_active,
1367 		.get = regset_tls_get, .set = regset_tls_set
1368 	},
1369 	[REGSET_IOPERM32] = {
1370 		.core_note_type = NT_386_IOPERM,
1371 		.n = IO_BITMAP_BYTES / sizeof(u32),
1372 		.size = sizeof(u32), .align = sizeof(u32),
1373 		.active = ioperm_active, .get = ioperm_get
1374 	},
1375 };
1376 
1377 static const struct user_regset_view user_x86_32_view = {
1378 	.name = "i386", .e_machine = EM_386,
1379 	.regsets = x86_32_regsets, .n = ARRAY_SIZE(x86_32_regsets)
1380 };
1381 #endif
1382 
1383 /*
1384  * This represents bytes 464..511 in the memory layout exported through
1385  * the REGSET_XSTATE interface.
1386  */
1387 u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
1388 
1389 void update_regset_xstate_info(unsigned int size, u64 xstate_mask)
1390 {
1391 #ifdef CONFIG_X86_64
1392 	x86_64_regsets[REGSET_XSTATE].n = size / sizeof(u64);
1393 #endif
1394 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
1395 	x86_32_regsets[REGSET_XSTATE].n = size / sizeof(u64);
1396 #endif
1397 	xstate_fx_sw_bytes[USER_XSTATE_XCR0_WORD] = xstate_mask;
1398 }
1399 
1400 const struct user_regset_view *task_user_regset_view(struct task_struct *task)
1401 {
1402 #ifdef CONFIG_IA32_EMULATION
1403 	if (test_tsk_thread_flag(task, TIF_IA32))
1404 #endif
1405 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
1406 		return &user_x86_32_view;
1407 #endif
1408 #ifdef CONFIG_X86_64
1409 	return &user_x86_64_view;
1410 #endif
1411 }
1412 
1413 static void fill_sigtrap_info(struct task_struct *tsk,
1414 				struct pt_regs *regs,
1415 				int error_code, int si_code,
1416 				struct siginfo *info)
1417 {
1418 	tsk->thread.trap_nr = X86_TRAP_DB;
1419 	tsk->thread.error_code = error_code;
1420 
1421 	memset(info, 0, sizeof(*info));
1422 	info->si_signo = SIGTRAP;
1423 	info->si_code = si_code;
1424 	info->si_addr = user_mode_vm(regs) ? (void __user *)regs->ip : NULL;
1425 }
1426 
1427 void user_single_step_siginfo(struct task_struct *tsk,
1428 				struct pt_regs *regs,
1429 				struct siginfo *info)
1430 {
1431 	fill_sigtrap_info(tsk, regs, 0, TRAP_BRKPT, info);
1432 }
1433 
1434 void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
1435 					 int error_code, int si_code)
1436 {
1437 	struct siginfo info;
1438 
1439 	fill_sigtrap_info(tsk, regs, error_code, si_code, &info);
1440 	/* Send us the fake SIGTRAP */
1441 	force_sig_info(SIGTRAP, &info, tsk);
1442 }
1443 
1444 static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch)
1445 {
1446 #ifdef CONFIG_X86_64
1447 	if (arch == AUDIT_ARCH_X86_64) {
1448 		audit_syscall_entry(regs->orig_ax, regs->di,
1449 				    regs->si, regs->dx, regs->r10);
1450 	} else
1451 #endif
1452 	{
1453 		audit_syscall_entry(regs->orig_ax, regs->bx,
1454 				    regs->cx, regs->dx, regs->si);
1455 	}
1456 }
1457 
1458 /*
1459  * We can return 0 to resume the syscall or anything else to go to phase
1460  * 2.  If we resume the syscall, we need to put something appropriate in
1461  * regs->orig_ax.
1462  *
1463  * NB: We don't have full pt_regs here, but regs->orig_ax and regs->ax
1464  * are fully functional.
1465  *
1466  * For phase 2's benefit, our return value is:
1467  * 0:			resume the syscall
1468  * 1:			go to phase 2; no seccomp phase 2 needed
1469  * anything else:	go to phase 2; pass return value to seccomp
1470  */
1471 unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch)
1472 {
1473 	unsigned long ret = 0;
1474 	u32 work;
1475 
1476 	BUG_ON(regs != task_pt_regs(current));
1477 
1478 	work = ACCESS_ONCE(current_thread_info()->flags) &
1479 		_TIF_WORK_SYSCALL_ENTRY;
1480 
1481 	/*
1482 	 * If TIF_NOHZ is set, we are required to call user_exit() before
1483 	 * doing anything that could touch RCU.
1484 	 */
1485 	if (work & _TIF_NOHZ) {
1486 		user_exit();
1487 		work &= ~_TIF_NOHZ;
1488 	}
1489 
1490 #ifdef CONFIG_SECCOMP
1491 	/*
1492 	 * Do seccomp first -- it should minimize exposure of other
1493 	 * code, and keeping seccomp fast is probably more valuable
1494 	 * than the rest of this.
1495 	 */
1496 	if (work & _TIF_SECCOMP) {
1497 		struct seccomp_data sd;
1498 
1499 		sd.arch = arch;
1500 		sd.nr = regs->orig_ax;
1501 		sd.instruction_pointer = regs->ip;
1502 #ifdef CONFIG_X86_64
1503 		if (arch == AUDIT_ARCH_X86_64) {
1504 			sd.args[0] = regs->di;
1505 			sd.args[1] = regs->si;
1506 			sd.args[2] = regs->dx;
1507 			sd.args[3] = regs->r10;
1508 			sd.args[4] = regs->r8;
1509 			sd.args[5] = regs->r9;
1510 		} else
1511 #endif
1512 		{
1513 			sd.args[0] = regs->bx;
1514 			sd.args[1] = regs->cx;
1515 			sd.args[2] = regs->dx;
1516 			sd.args[3] = regs->si;
1517 			sd.args[4] = regs->di;
1518 			sd.args[5] = regs->bp;
1519 		}
1520 
1521 		BUILD_BUG_ON(SECCOMP_PHASE1_OK != 0);
1522 		BUILD_BUG_ON(SECCOMP_PHASE1_SKIP != 1);
1523 
1524 		ret = seccomp_phase1(&sd);
1525 		if (ret == SECCOMP_PHASE1_SKIP) {
1526 			regs->orig_ax = -1;
1527 			ret = 0;
1528 		} else if (ret != SECCOMP_PHASE1_OK) {
1529 			return ret;  /* Go directly to phase 2 */
1530 		}
1531 
1532 		work &= ~_TIF_SECCOMP;
1533 	}
1534 #endif
1535 
1536 	/* Do our best to finish without phase 2. */
1537 	if (work == 0)
1538 		return ret;  /* seccomp and/or nohz only (ret == 0 here) */
1539 
1540 #ifdef CONFIG_AUDITSYSCALL
1541 	if (work == _TIF_SYSCALL_AUDIT) {
1542 		/*
1543 		 * If there is no more work to be done except auditing,
1544 		 * then audit in phase 1.  Phase 2 always audits, so, if
1545 		 * we audit here, then we can't go on to phase 2.
1546 		 */
1547 		do_audit_syscall_entry(regs, arch);
1548 		return 0;
1549 	}
1550 #endif
1551 
1552 	return 1;  /* Something is enabled that we can't handle in phase 1 */
1553 }
1554 
1555 /* Returns the syscall nr to run (which should match regs->orig_ax). */
1556 long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch,
1557 				unsigned long phase1_result)
1558 {
1559 	long ret = 0;
1560 	u32 work = ACCESS_ONCE(current_thread_info()->flags) &
1561 		_TIF_WORK_SYSCALL_ENTRY;
1562 
1563 	BUG_ON(regs != task_pt_regs(current));
1564 
1565 	/*
1566 	 * If we stepped into a sysenter/syscall insn, it trapped in
1567 	 * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
1568 	 * If user-mode had set TF itself, then it's still clear from
1569 	 * do_debug() and we need to set it again to restore the user
1570 	 * state.  If we entered on the slow path, TF was already set.
1571 	 */
1572 	if (work & _TIF_SINGLESTEP)
1573 		regs->flags |= X86_EFLAGS_TF;
1574 
1575 #ifdef CONFIG_SECCOMP
1576 	/*
1577 	 * Call seccomp_phase2 before running the other hooks so that
1578 	 * they can see any changes made by a seccomp tracer.
1579 	 */
1580 	if (phase1_result > 1 && seccomp_phase2(phase1_result)) {
1581 		/* seccomp failures shouldn't expose any additional code. */
1582 		return -1;
1583 	}
1584 #endif
1585 
1586 	if (unlikely(work & _TIF_SYSCALL_EMU))
1587 		ret = -1L;
1588 
1589 	if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) &&
1590 	    tracehook_report_syscall_entry(regs))
1591 		ret = -1L;
1592 
1593 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
1594 		trace_sys_enter(regs, regs->orig_ax);
1595 
1596 	do_audit_syscall_entry(regs, arch);
1597 
1598 	return ret ?: regs->orig_ax;
1599 }
1600 
1601 long syscall_trace_enter(struct pt_regs *regs)
1602 {
1603 	u32 arch = is_ia32_task() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
1604 	unsigned long phase1_result = syscall_trace_enter_phase1(regs, arch);
1605 
1606 	if (phase1_result == 0)
1607 		return regs->orig_ax;
1608 	else
1609 		return syscall_trace_enter_phase2(regs, arch, phase1_result);
1610 }
1611 
1612 void syscall_trace_leave(struct pt_regs *regs)
1613 {
1614 	bool step;
1615 
1616 	/*
1617 	 * We may come here right after calling schedule_user()
1618 	 * or do_notify_resume(), in which case we can be in RCU
1619 	 * user mode.
1620 	 */
1621 	user_exit();
1622 
1623 	audit_syscall_exit(regs);
1624 
1625 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
1626 		trace_sys_exit(regs, regs->ax);
1627 
1628 	/*
1629 	 * If TIF_SYSCALL_EMU is set, we only get here because of
1630 	 * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP).
1631 	 * We already reported this syscall instruction in
1632 	 * syscall_trace_enter().
1633 	 */
1634 	step = unlikely(test_thread_flag(TIF_SINGLESTEP)) &&
1635 			!test_thread_flag(TIF_SYSCALL_EMU);
1636 	if (step || test_thread_flag(TIF_SYSCALL_TRACE))
1637 		tracehook_report_syscall_exit(regs, step);
1638 
1639 	user_enter();
1640 }
1641