1 /* By Ross Biro 1/23/92 */ 2 /* 3 * Pentium III FXSR, SSE support 4 * Gareth Hughes <gareth@valinux.com>, May 2000 5 */ 6 7 #include <linux/kernel.h> 8 #include <linux/sched.h> 9 #include <linux/mm.h> 10 #include <linux/smp.h> 11 #include <linux/errno.h> 12 #include <linux/slab.h> 13 #include <linux/ptrace.h> 14 #include <linux/regset.h> 15 #include <linux/tracehook.h> 16 #include <linux/user.h> 17 #include <linux/elf.h> 18 #include <linux/security.h> 19 #include <linux/audit.h> 20 #include <linux/seccomp.h> 21 #include <linux/signal.h> 22 #include <linux/perf_event.h> 23 #include <linux/hw_breakpoint.h> 24 #include <linux/rcupdate.h> 25 #include <linux/export.h> 26 #include <linux/context_tracking.h> 27 28 #include <asm/uaccess.h> 29 #include <asm/pgtable.h> 30 #include <asm/processor.h> 31 #include <asm/i387.h> 32 #include <asm/fpu-internal.h> 33 #include <asm/debugreg.h> 34 #include <asm/ldt.h> 35 #include <asm/desc.h> 36 #include <asm/prctl.h> 37 #include <asm/proto.h> 38 #include <asm/hw_breakpoint.h> 39 #include <asm/traps.h> 40 41 #include "tls.h" 42 43 #define CREATE_TRACE_POINTS 44 #include <trace/events/syscalls.h> 45 46 enum x86_regset { 47 REGSET_GENERAL, 48 REGSET_FP, 49 REGSET_XFP, 50 REGSET_IOPERM64 = REGSET_XFP, 51 REGSET_XSTATE, 52 REGSET_TLS, 53 REGSET_IOPERM32, 54 }; 55 56 struct pt_regs_offset { 57 const char *name; 58 int offset; 59 }; 60 61 #define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)} 62 #define REG_OFFSET_END {.name = NULL, .offset = 0} 63 64 static const struct pt_regs_offset regoffset_table[] = { 65 #ifdef CONFIG_X86_64 66 REG_OFFSET_NAME(r15), 67 REG_OFFSET_NAME(r14), 68 REG_OFFSET_NAME(r13), 69 REG_OFFSET_NAME(r12), 70 REG_OFFSET_NAME(r11), 71 REG_OFFSET_NAME(r10), 72 REG_OFFSET_NAME(r9), 73 REG_OFFSET_NAME(r8), 74 #endif 75 REG_OFFSET_NAME(bx), 76 REG_OFFSET_NAME(cx), 77 REG_OFFSET_NAME(dx), 78 REG_OFFSET_NAME(si), 79 REG_OFFSET_NAME(di), 80 REG_OFFSET_NAME(bp), 81 REG_OFFSET_NAME(ax), 82 #ifdef CONFIG_X86_32 83 REG_OFFSET_NAME(ds), 84 REG_OFFSET_NAME(es), 85 REG_OFFSET_NAME(fs), 86 REG_OFFSET_NAME(gs), 87 #endif 88 REG_OFFSET_NAME(orig_ax), 89 REG_OFFSET_NAME(ip), 90 REG_OFFSET_NAME(cs), 91 REG_OFFSET_NAME(flags), 92 REG_OFFSET_NAME(sp), 93 REG_OFFSET_NAME(ss), 94 REG_OFFSET_END, 95 }; 96 97 /** 98 * regs_query_register_offset() - query register offset from its name 99 * @name: the name of a register 100 * 101 * regs_query_register_offset() returns the offset of a register in struct 102 * pt_regs from its name. If the name is invalid, this returns -EINVAL; 103 */ 104 int regs_query_register_offset(const char *name) 105 { 106 const struct pt_regs_offset *roff; 107 for (roff = regoffset_table; roff->name != NULL; roff++) 108 if (!strcmp(roff->name, name)) 109 return roff->offset; 110 return -EINVAL; 111 } 112 113 /** 114 * regs_query_register_name() - query register name from its offset 115 * @offset: the offset of a register in struct pt_regs. 116 * 117 * regs_query_register_name() returns the name of a register from its 118 * offset in struct pt_regs. If the @offset is invalid, this returns NULL; 119 */ 120 const char *regs_query_register_name(unsigned int offset) 121 { 122 const struct pt_regs_offset *roff; 123 for (roff = regoffset_table; roff->name != NULL; roff++) 124 if (roff->offset == offset) 125 return roff->name; 126 return NULL; 127 } 128 129 static const int arg_offs_table[] = { 130 #ifdef CONFIG_X86_32 131 [0] = offsetof(struct pt_regs, ax), 132 [1] = offsetof(struct pt_regs, dx), 133 [2] = offsetof(struct pt_regs, cx) 134 #else /* CONFIG_X86_64 */ 135 [0] = offsetof(struct pt_regs, di), 136 [1] = offsetof(struct pt_regs, si), 137 [2] = offsetof(struct pt_regs, dx), 138 [3] = offsetof(struct pt_regs, cx), 139 [4] = offsetof(struct pt_regs, r8), 140 [5] = offsetof(struct pt_regs, r9) 141 #endif 142 }; 143 144 /* 145 * does not yet catch signals sent when the child dies. 146 * in exit.c or in signal.c. 147 */ 148 149 /* 150 * Determines which flags the user has access to [1 = access, 0 = no access]. 151 */ 152 #define FLAG_MASK_32 ((unsigned long) \ 153 (X86_EFLAGS_CF | X86_EFLAGS_PF | \ 154 X86_EFLAGS_AF | X86_EFLAGS_ZF | \ 155 X86_EFLAGS_SF | X86_EFLAGS_TF | \ 156 X86_EFLAGS_DF | X86_EFLAGS_OF | \ 157 X86_EFLAGS_RF | X86_EFLAGS_AC)) 158 159 /* 160 * Determines whether a value may be installed in a segment register. 161 */ 162 static inline bool invalid_selector(u16 value) 163 { 164 return unlikely(value != 0 && (value & SEGMENT_RPL_MASK) != USER_RPL); 165 } 166 167 #ifdef CONFIG_X86_32 168 169 #define FLAG_MASK FLAG_MASK_32 170 171 /* 172 * X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode 173 * when it traps. The previous stack will be directly underneath the saved 174 * registers, and 'sp/ss' won't even have been saved. Thus the '®s->sp'. 175 * 176 * Now, if the stack is empty, '®s->sp' is out of range. In this 177 * case we try to take the previous stack. To always return a non-null 178 * stack pointer we fall back to regs as stack if no previous stack 179 * exists. 180 * 181 * This is valid only for kernel mode traps. 182 */ 183 unsigned long kernel_stack_pointer(struct pt_regs *regs) 184 { 185 unsigned long context = (unsigned long)regs & ~(THREAD_SIZE - 1); 186 unsigned long sp = (unsigned long)®s->sp; 187 u32 *prev_esp; 188 189 if (context == (sp & ~(THREAD_SIZE - 1))) 190 return sp; 191 192 prev_esp = (u32 *)(context); 193 if (prev_esp) 194 return (unsigned long)prev_esp; 195 196 return (unsigned long)regs; 197 } 198 EXPORT_SYMBOL_GPL(kernel_stack_pointer); 199 200 static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno) 201 { 202 BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0); 203 return ®s->bx + (regno >> 2); 204 } 205 206 static u16 get_segment_reg(struct task_struct *task, unsigned long offset) 207 { 208 /* 209 * Returning the value truncates it to 16 bits. 210 */ 211 unsigned int retval; 212 if (offset != offsetof(struct user_regs_struct, gs)) 213 retval = *pt_regs_access(task_pt_regs(task), offset); 214 else { 215 if (task == current) 216 retval = get_user_gs(task_pt_regs(task)); 217 else 218 retval = task_user_gs(task); 219 } 220 return retval; 221 } 222 223 static int set_segment_reg(struct task_struct *task, 224 unsigned long offset, u16 value) 225 { 226 /* 227 * The value argument was already truncated to 16 bits. 228 */ 229 if (invalid_selector(value)) 230 return -EIO; 231 232 /* 233 * For %cs and %ss we cannot permit a null selector. 234 * We can permit a bogus selector as long as it has USER_RPL. 235 * Null selectors are fine for other segment registers, but 236 * we will never get back to user mode with invalid %cs or %ss 237 * and will take the trap in iret instead. Much code relies 238 * on user_mode() to distinguish a user trap frame (which can 239 * safely use invalid selectors) from a kernel trap frame. 240 */ 241 switch (offset) { 242 case offsetof(struct user_regs_struct, cs): 243 case offsetof(struct user_regs_struct, ss): 244 if (unlikely(value == 0)) 245 return -EIO; 246 247 default: 248 *pt_regs_access(task_pt_regs(task), offset) = value; 249 break; 250 251 case offsetof(struct user_regs_struct, gs): 252 if (task == current) 253 set_user_gs(task_pt_regs(task), value); 254 else 255 task_user_gs(task) = value; 256 } 257 258 return 0; 259 } 260 261 #else /* CONFIG_X86_64 */ 262 263 #define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT) 264 265 static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long offset) 266 { 267 BUILD_BUG_ON(offsetof(struct pt_regs, r15) != 0); 268 return ®s->r15 + (offset / sizeof(regs->r15)); 269 } 270 271 static u16 get_segment_reg(struct task_struct *task, unsigned long offset) 272 { 273 /* 274 * Returning the value truncates it to 16 bits. 275 */ 276 unsigned int seg; 277 278 switch (offset) { 279 case offsetof(struct user_regs_struct, fs): 280 if (task == current) { 281 /* Older gas can't assemble movq %?s,%r?? */ 282 asm("movl %%fs,%0" : "=r" (seg)); 283 return seg; 284 } 285 return task->thread.fsindex; 286 case offsetof(struct user_regs_struct, gs): 287 if (task == current) { 288 asm("movl %%gs,%0" : "=r" (seg)); 289 return seg; 290 } 291 return task->thread.gsindex; 292 case offsetof(struct user_regs_struct, ds): 293 if (task == current) { 294 asm("movl %%ds,%0" : "=r" (seg)); 295 return seg; 296 } 297 return task->thread.ds; 298 case offsetof(struct user_regs_struct, es): 299 if (task == current) { 300 asm("movl %%es,%0" : "=r" (seg)); 301 return seg; 302 } 303 return task->thread.es; 304 305 case offsetof(struct user_regs_struct, cs): 306 case offsetof(struct user_regs_struct, ss): 307 break; 308 } 309 return *pt_regs_access(task_pt_regs(task), offset); 310 } 311 312 static int set_segment_reg(struct task_struct *task, 313 unsigned long offset, u16 value) 314 { 315 /* 316 * The value argument was already truncated to 16 bits. 317 */ 318 if (invalid_selector(value)) 319 return -EIO; 320 321 switch (offset) { 322 case offsetof(struct user_regs_struct,fs): 323 /* 324 * If this is setting fs as for normal 64-bit use but 325 * setting fs_base has implicitly changed it, leave it. 326 */ 327 if ((value == FS_TLS_SEL && task->thread.fsindex == 0 && 328 task->thread.fs != 0) || 329 (value == 0 && task->thread.fsindex == FS_TLS_SEL && 330 task->thread.fs == 0)) 331 break; 332 task->thread.fsindex = value; 333 if (task == current) 334 loadsegment(fs, task->thread.fsindex); 335 break; 336 case offsetof(struct user_regs_struct,gs): 337 /* 338 * If this is setting gs as for normal 64-bit use but 339 * setting gs_base has implicitly changed it, leave it. 340 */ 341 if ((value == GS_TLS_SEL && task->thread.gsindex == 0 && 342 task->thread.gs != 0) || 343 (value == 0 && task->thread.gsindex == GS_TLS_SEL && 344 task->thread.gs == 0)) 345 break; 346 task->thread.gsindex = value; 347 if (task == current) 348 load_gs_index(task->thread.gsindex); 349 break; 350 case offsetof(struct user_regs_struct,ds): 351 task->thread.ds = value; 352 if (task == current) 353 loadsegment(ds, task->thread.ds); 354 break; 355 case offsetof(struct user_regs_struct,es): 356 task->thread.es = value; 357 if (task == current) 358 loadsegment(es, task->thread.es); 359 break; 360 361 /* 362 * Can't actually change these in 64-bit mode. 363 */ 364 case offsetof(struct user_regs_struct,cs): 365 if (unlikely(value == 0)) 366 return -EIO; 367 #ifdef CONFIG_IA32_EMULATION 368 if (test_tsk_thread_flag(task, TIF_IA32)) 369 task_pt_regs(task)->cs = value; 370 #endif 371 break; 372 case offsetof(struct user_regs_struct,ss): 373 if (unlikely(value == 0)) 374 return -EIO; 375 #ifdef CONFIG_IA32_EMULATION 376 if (test_tsk_thread_flag(task, TIF_IA32)) 377 task_pt_regs(task)->ss = value; 378 #endif 379 break; 380 } 381 382 return 0; 383 } 384 385 #endif /* CONFIG_X86_32 */ 386 387 static unsigned long get_flags(struct task_struct *task) 388 { 389 unsigned long retval = task_pt_regs(task)->flags; 390 391 /* 392 * If the debugger set TF, hide it from the readout. 393 */ 394 if (test_tsk_thread_flag(task, TIF_FORCED_TF)) 395 retval &= ~X86_EFLAGS_TF; 396 397 return retval; 398 } 399 400 static int set_flags(struct task_struct *task, unsigned long value) 401 { 402 struct pt_regs *regs = task_pt_regs(task); 403 404 /* 405 * If the user value contains TF, mark that 406 * it was not "us" (the debugger) that set it. 407 * If not, make sure it stays set if we had. 408 */ 409 if (value & X86_EFLAGS_TF) 410 clear_tsk_thread_flag(task, TIF_FORCED_TF); 411 else if (test_tsk_thread_flag(task, TIF_FORCED_TF)) 412 value |= X86_EFLAGS_TF; 413 414 regs->flags = (regs->flags & ~FLAG_MASK) | (value & FLAG_MASK); 415 416 return 0; 417 } 418 419 static int putreg(struct task_struct *child, 420 unsigned long offset, unsigned long value) 421 { 422 switch (offset) { 423 case offsetof(struct user_regs_struct, cs): 424 case offsetof(struct user_regs_struct, ds): 425 case offsetof(struct user_regs_struct, es): 426 case offsetof(struct user_regs_struct, fs): 427 case offsetof(struct user_regs_struct, gs): 428 case offsetof(struct user_regs_struct, ss): 429 return set_segment_reg(child, offset, value); 430 431 case offsetof(struct user_regs_struct, flags): 432 return set_flags(child, value); 433 434 #ifdef CONFIG_X86_64 435 case offsetof(struct user_regs_struct,fs_base): 436 if (value >= TASK_SIZE_OF(child)) 437 return -EIO; 438 /* 439 * When changing the segment base, use do_arch_prctl 440 * to set either thread.fs or thread.fsindex and the 441 * corresponding GDT slot. 442 */ 443 if (child->thread.fs != value) 444 return do_arch_prctl(child, ARCH_SET_FS, value); 445 return 0; 446 case offsetof(struct user_regs_struct,gs_base): 447 /* 448 * Exactly the same here as the %fs handling above. 449 */ 450 if (value >= TASK_SIZE_OF(child)) 451 return -EIO; 452 if (child->thread.gs != value) 453 return do_arch_prctl(child, ARCH_SET_GS, value); 454 return 0; 455 #endif 456 } 457 458 *pt_regs_access(task_pt_regs(child), offset) = value; 459 return 0; 460 } 461 462 static unsigned long getreg(struct task_struct *task, unsigned long offset) 463 { 464 switch (offset) { 465 case offsetof(struct user_regs_struct, cs): 466 case offsetof(struct user_regs_struct, ds): 467 case offsetof(struct user_regs_struct, es): 468 case offsetof(struct user_regs_struct, fs): 469 case offsetof(struct user_regs_struct, gs): 470 case offsetof(struct user_regs_struct, ss): 471 return get_segment_reg(task, offset); 472 473 case offsetof(struct user_regs_struct, flags): 474 return get_flags(task); 475 476 #ifdef CONFIG_X86_64 477 case offsetof(struct user_regs_struct, fs_base): { 478 /* 479 * do_arch_prctl may have used a GDT slot instead of 480 * the MSR. To userland, it appears the same either 481 * way, except the %fs segment selector might not be 0. 482 */ 483 unsigned int seg = task->thread.fsindex; 484 if (task->thread.fs != 0) 485 return task->thread.fs; 486 if (task == current) 487 asm("movl %%fs,%0" : "=r" (seg)); 488 if (seg != FS_TLS_SEL) 489 return 0; 490 return get_desc_base(&task->thread.tls_array[FS_TLS]); 491 } 492 case offsetof(struct user_regs_struct, gs_base): { 493 /* 494 * Exactly the same here as the %fs handling above. 495 */ 496 unsigned int seg = task->thread.gsindex; 497 if (task->thread.gs != 0) 498 return task->thread.gs; 499 if (task == current) 500 asm("movl %%gs,%0" : "=r" (seg)); 501 if (seg != GS_TLS_SEL) 502 return 0; 503 return get_desc_base(&task->thread.tls_array[GS_TLS]); 504 } 505 #endif 506 } 507 508 return *pt_regs_access(task_pt_regs(task), offset); 509 } 510 511 static int genregs_get(struct task_struct *target, 512 const struct user_regset *regset, 513 unsigned int pos, unsigned int count, 514 void *kbuf, void __user *ubuf) 515 { 516 if (kbuf) { 517 unsigned long *k = kbuf; 518 while (count >= sizeof(*k)) { 519 *k++ = getreg(target, pos); 520 count -= sizeof(*k); 521 pos += sizeof(*k); 522 } 523 } else { 524 unsigned long __user *u = ubuf; 525 while (count >= sizeof(*u)) { 526 if (__put_user(getreg(target, pos), u++)) 527 return -EFAULT; 528 count -= sizeof(*u); 529 pos += sizeof(*u); 530 } 531 } 532 533 return 0; 534 } 535 536 static int genregs_set(struct task_struct *target, 537 const struct user_regset *regset, 538 unsigned int pos, unsigned int count, 539 const void *kbuf, const void __user *ubuf) 540 { 541 int ret = 0; 542 if (kbuf) { 543 const unsigned long *k = kbuf; 544 while (count >= sizeof(*k) && !ret) { 545 ret = putreg(target, pos, *k++); 546 count -= sizeof(*k); 547 pos += sizeof(*k); 548 } 549 } else { 550 const unsigned long __user *u = ubuf; 551 while (count >= sizeof(*u) && !ret) { 552 unsigned long word; 553 ret = __get_user(word, u++); 554 if (ret) 555 break; 556 ret = putreg(target, pos, word); 557 count -= sizeof(*u); 558 pos += sizeof(*u); 559 } 560 } 561 return ret; 562 } 563 564 static void ptrace_triggered(struct perf_event *bp, 565 struct perf_sample_data *data, 566 struct pt_regs *regs) 567 { 568 int i; 569 struct thread_struct *thread = &(current->thread); 570 571 /* 572 * Store in the virtual DR6 register the fact that the breakpoint 573 * was hit so the thread's debugger will see it. 574 */ 575 for (i = 0; i < HBP_NUM; i++) { 576 if (thread->ptrace_bps[i] == bp) 577 break; 578 } 579 580 thread->debugreg6 |= (DR_TRAP0 << i); 581 } 582 583 /* 584 * Walk through every ptrace breakpoints for this thread and 585 * build the dr7 value on top of their attributes. 586 * 587 */ 588 static unsigned long ptrace_get_dr7(struct perf_event *bp[]) 589 { 590 int i; 591 int dr7 = 0; 592 struct arch_hw_breakpoint *info; 593 594 for (i = 0; i < HBP_NUM; i++) { 595 if (bp[i] && !bp[i]->attr.disabled) { 596 info = counter_arch_bp(bp[i]); 597 dr7 |= encode_dr7(i, info->len, info->type); 598 } 599 } 600 601 return dr7; 602 } 603 604 static int ptrace_fill_bp_fields(struct perf_event_attr *attr, 605 int len, int type, bool disabled) 606 { 607 int err, bp_len, bp_type; 608 609 err = arch_bp_generic_fields(len, type, &bp_len, &bp_type); 610 if (!err) { 611 attr->bp_len = bp_len; 612 attr->bp_type = bp_type; 613 attr->disabled = disabled; 614 } 615 616 return err; 617 } 618 619 static struct perf_event * 620 ptrace_register_breakpoint(struct task_struct *tsk, int len, int type, 621 unsigned long addr, bool disabled) 622 { 623 struct perf_event_attr attr; 624 int err; 625 626 ptrace_breakpoint_init(&attr); 627 attr.bp_addr = addr; 628 629 err = ptrace_fill_bp_fields(&attr, len, type, disabled); 630 if (err) 631 return ERR_PTR(err); 632 633 return register_user_hw_breakpoint(&attr, ptrace_triggered, 634 NULL, tsk); 635 } 636 637 static int ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, 638 int disabled) 639 { 640 struct perf_event_attr attr = bp->attr; 641 int err; 642 643 err = ptrace_fill_bp_fields(&attr, len, type, disabled); 644 if (err) 645 return err; 646 647 return modify_user_hw_breakpoint(bp, &attr); 648 } 649 650 /* 651 * Handle ptrace writes to debug register 7. 652 */ 653 static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) 654 { 655 struct thread_struct *thread = &tsk->thread; 656 unsigned long old_dr7; 657 bool second_pass = false; 658 int i, rc, ret = 0; 659 660 data &= ~DR_CONTROL_RESERVED; 661 old_dr7 = ptrace_get_dr7(thread->ptrace_bps); 662 663 restore: 664 rc = 0; 665 for (i = 0; i < HBP_NUM; i++) { 666 unsigned len, type; 667 bool disabled = !decode_dr7(data, i, &len, &type); 668 struct perf_event *bp = thread->ptrace_bps[i]; 669 670 if (!bp) { 671 if (disabled) 672 continue; 673 674 bp = ptrace_register_breakpoint(tsk, 675 len, type, 0, disabled); 676 if (IS_ERR(bp)) { 677 rc = PTR_ERR(bp); 678 break; 679 } 680 681 thread->ptrace_bps[i] = bp; 682 continue; 683 } 684 685 rc = ptrace_modify_breakpoint(bp, len, type, disabled); 686 if (rc) 687 break; 688 } 689 690 /* Restore if the first pass failed, second_pass shouldn't fail. */ 691 if (rc && !WARN_ON(second_pass)) { 692 ret = rc; 693 data = old_dr7; 694 second_pass = true; 695 goto restore; 696 } 697 698 return ret; 699 } 700 701 /* 702 * Handle PTRACE_PEEKUSR calls for the debug register area. 703 */ 704 static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) 705 { 706 struct thread_struct *thread = &tsk->thread; 707 unsigned long val = 0; 708 709 if (n < HBP_NUM) { 710 struct perf_event *bp = thread->ptrace_bps[n]; 711 712 if (bp) 713 val = bp->hw.info.address; 714 } else if (n == 6) { 715 val = thread->debugreg6; 716 } else if (n == 7) { 717 val = thread->ptrace_dr7; 718 } 719 return val; 720 } 721 722 static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, 723 unsigned long addr) 724 { 725 struct thread_struct *t = &tsk->thread; 726 struct perf_event *bp = t->ptrace_bps[nr]; 727 int err = 0; 728 729 if (!bp) { 730 /* 731 * Put stub len and type to create an inactive but correct bp. 732 * 733 * CHECKME: the previous code returned -EIO if the addr wasn't 734 * a valid task virtual addr. The new one will return -EINVAL in 735 * this case. 736 * -EINVAL may be what we want for in-kernel breakpoints users, 737 * but -EIO looks better for ptrace, since we refuse a register 738 * writing for the user. And anyway this is the previous 739 * behaviour. 740 */ 741 bp = ptrace_register_breakpoint(tsk, 742 X86_BREAKPOINT_LEN_1, X86_BREAKPOINT_WRITE, 743 addr, true); 744 if (IS_ERR(bp)) 745 err = PTR_ERR(bp); 746 else 747 t->ptrace_bps[nr] = bp; 748 } else { 749 struct perf_event_attr attr = bp->attr; 750 751 attr.bp_addr = addr; 752 err = modify_user_hw_breakpoint(bp, &attr); 753 } 754 755 return err; 756 } 757 758 /* 759 * Handle PTRACE_POKEUSR calls for the debug register area. 760 */ 761 static int ptrace_set_debugreg(struct task_struct *tsk, int n, 762 unsigned long val) 763 { 764 struct thread_struct *thread = &tsk->thread; 765 /* There are no DR4 or DR5 registers */ 766 int rc = -EIO; 767 768 if (n < HBP_NUM) { 769 rc = ptrace_set_breakpoint_addr(tsk, n, val); 770 } else if (n == 6) { 771 thread->debugreg6 = val; 772 rc = 0; 773 } else if (n == 7) { 774 rc = ptrace_write_dr7(tsk, val); 775 if (!rc) 776 thread->ptrace_dr7 = val; 777 } 778 return rc; 779 } 780 781 /* 782 * These access the current or another (stopped) task's io permission 783 * bitmap for debugging or core dump. 784 */ 785 static int ioperm_active(struct task_struct *target, 786 const struct user_regset *regset) 787 { 788 return target->thread.io_bitmap_max / regset->size; 789 } 790 791 static int ioperm_get(struct task_struct *target, 792 const struct user_regset *regset, 793 unsigned int pos, unsigned int count, 794 void *kbuf, void __user *ubuf) 795 { 796 if (!target->thread.io_bitmap_ptr) 797 return -ENXIO; 798 799 return user_regset_copyout(&pos, &count, &kbuf, &ubuf, 800 target->thread.io_bitmap_ptr, 801 0, IO_BITMAP_BYTES); 802 } 803 804 /* 805 * Called by kernel/ptrace.c when detaching.. 806 * 807 * Make sure the single step bit is not set. 808 */ 809 void ptrace_disable(struct task_struct *child) 810 { 811 user_disable_single_step(child); 812 #ifdef TIF_SYSCALL_EMU 813 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); 814 #endif 815 } 816 817 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION 818 static const struct user_regset_view user_x86_32_view; /* Initialized below. */ 819 #endif 820 821 long arch_ptrace(struct task_struct *child, long request, 822 unsigned long addr, unsigned long data) 823 { 824 int ret; 825 unsigned long __user *datap = (unsigned long __user *)data; 826 827 switch (request) { 828 /* read the word at location addr in the USER area. */ 829 case PTRACE_PEEKUSR: { 830 unsigned long tmp; 831 832 ret = -EIO; 833 if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user)) 834 break; 835 836 tmp = 0; /* Default return condition */ 837 if (addr < sizeof(struct user_regs_struct)) 838 tmp = getreg(child, addr); 839 else if (addr >= offsetof(struct user, u_debugreg[0]) && 840 addr <= offsetof(struct user, u_debugreg[7])) { 841 addr -= offsetof(struct user, u_debugreg[0]); 842 tmp = ptrace_get_debugreg(child, addr / sizeof(data)); 843 } 844 ret = put_user(tmp, datap); 845 break; 846 } 847 848 case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ 849 ret = -EIO; 850 if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user)) 851 break; 852 853 if (addr < sizeof(struct user_regs_struct)) 854 ret = putreg(child, addr, data); 855 else if (addr >= offsetof(struct user, u_debugreg[0]) && 856 addr <= offsetof(struct user, u_debugreg[7])) { 857 addr -= offsetof(struct user, u_debugreg[0]); 858 ret = ptrace_set_debugreg(child, 859 addr / sizeof(data), data); 860 } 861 break; 862 863 case PTRACE_GETREGS: /* Get all gp regs from the child. */ 864 return copy_regset_to_user(child, 865 task_user_regset_view(current), 866 REGSET_GENERAL, 867 0, sizeof(struct user_regs_struct), 868 datap); 869 870 case PTRACE_SETREGS: /* Set all gp regs in the child. */ 871 return copy_regset_from_user(child, 872 task_user_regset_view(current), 873 REGSET_GENERAL, 874 0, sizeof(struct user_regs_struct), 875 datap); 876 877 case PTRACE_GETFPREGS: /* Get the child FPU state. */ 878 return copy_regset_to_user(child, 879 task_user_regset_view(current), 880 REGSET_FP, 881 0, sizeof(struct user_i387_struct), 882 datap); 883 884 case PTRACE_SETFPREGS: /* Set the child FPU state. */ 885 return copy_regset_from_user(child, 886 task_user_regset_view(current), 887 REGSET_FP, 888 0, sizeof(struct user_i387_struct), 889 datap); 890 891 #ifdef CONFIG_X86_32 892 case PTRACE_GETFPXREGS: /* Get the child extended FPU state. */ 893 return copy_regset_to_user(child, &user_x86_32_view, 894 REGSET_XFP, 895 0, sizeof(struct user_fxsr_struct), 896 datap) ? -EIO : 0; 897 898 case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */ 899 return copy_regset_from_user(child, &user_x86_32_view, 900 REGSET_XFP, 901 0, sizeof(struct user_fxsr_struct), 902 datap) ? -EIO : 0; 903 #endif 904 905 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION 906 case PTRACE_GET_THREAD_AREA: 907 if ((int) addr < 0) 908 return -EIO; 909 ret = do_get_thread_area(child, addr, 910 (struct user_desc __user *)data); 911 break; 912 913 case PTRACE_SET_THREAD_AREA: 914 if ((int) addr < 0) 915 return -EIO; 916 ret = do_set_thread_area(child, addr, 917 (struct user_desc __user *)data, 0); 918 break; 919 #endif 920 921 #ifdef CONFIG_X86_64 922 /* normal 64bit interface to access TLS data. 923 Works just like arch_prctl, except that the arguments 924 are reversed. */ 925 case PTRACE_ARCH_PRCTL: 926 ret = do_arch_prctl(child, data, addr); 927 break; 928 #endif 929 930 default: 931 ret = ptrace_request(child, request, addr, data); 932 break; 933 } 934 935 return ret; 936 } 937 938 #ifdef CONFIG_IA32_EMULATION 939 940 #include <linux/compat.h> 941 #include <linux/syscalls.h> 942 #include <asm/ia32.h> 943 #include <asm/user32.h> 944 945 #define R32(l,q) \ 946 case offsetof(struct user32, regs.l): \ 947 regs->q = value; break 948 949 #define SEG32(rs) \ 950 case offsetof(struct user32, regs.rs): \ 951 return set_segment_reg(child, \ 952 offsetof(struct user_regs_struct, rs), \ 953 value); \ 954 break 955 956 static int putreg32(struct task_struct *child, unsigned regno, u32 value) 957 { 958 struct pt_regs *regs = task_pt_regs(child); 959 960 switch (regno) { 961 962 SEG32(cs); 963 SEG32(ds); 964 SEG32(es); 965 SEG32(fs); 966 SEG32(gs); 967 SEG32(ss); 968 969 R32(ebx, bx); 970 R32(ecx, cx); 971 R32(edx, dx); 972 R32(edi, di); 973 R32(esi, si); 974 R32(ebp, bp); 975 R32(eax, ax); 976 R32(eip, ip); 977 R32(esp, sp); 978 979 case offsetof(struct user32, regs.orig_eax): 980 /* 981 * A 32-bit debugger setting orig_eax means to restore 982 * the state of the task restarting a 32-bit syscall. 983 * Make sure we interpret the -ERESTART* codes correctly 984 * in case the task is not actually still sitting at the 985 * exit from a 32-bit syscall with TS_COMPAT still set. 986 */ 987 regs->orig_ax = value; 988 if (syscall_get_nr(child, regs) >= 0) 989 task_thread_info(child)->status |= TS_COMPAT; 990 break; 991 992 case offsetof(struct user32, regs.eflags): 993 return set_flags(child, value); 994 995 case offsetof(struct user32, u_debugreg[0]) ... 996 offsetof(struct user32, u_debugreg[7]): 997 regno -= offsetof(struct user32, u_debugreg[0]); 998 return ptrace_set_debugreg(child, regno / 4, value); 999 1000 default: 1001 if (regno > sizeof(struct user32) || (regno & 3)) 1002 return -EIO; 1003 1004 /* 1005 * Other dummy fields in the virtual user structure 1006 * are ignored 1007 */ 1008 break; 1009 } 1010 return 0; 1011 } 1012 1013 #undef R32 1014 #undef SEG32 1015 1016 #define R32(l,q) \ 1017 case offsetof(struct user32, regs.l): \ 1018 *val = regs->q; break 1019 1020 #define SEG32(rs) \ 1021 case offsetof(struct user32, regs.rs): \ 1022 *val = get_segment_reg(child, \ 1023 offsetof(struct user_regs_struct, rs)); \ 1024 break 1025 1026 static int getreg32(struct task_struct *child, unsigned regno, u32 *val) 1027 { 1028 struct pt_regs *regs = task_pt_regs(child); 1029 1030 switch (regno) { 1031 1032 SEG32(ds); 1033 SEG32(es); 1034 SEG32(fs); 1035 SEG32(gs); 1036 1037 R32(cs, cs); 1038 R32(ss, ss); 1039 R32(ebx, bx); 1040 R32(ecx, cx); 1041 R32(edx, dx); 1042 R32(edi, di); 1043 R32(esi, si); 1044 R32(ebp, bp); 1045 R32(eax, ax); 1046 R32(orig_eax, orig_ax); 1047 R32(eip, ip); 1048 R32(esp, sp); 1049 1050 case offsetof(struct user32, regs.eflags): 1051 *val = get_flags(child); 1052 break; 1053 1054 case offsetof(struct user32, u_debugreg[0]) ... 1055 offsetof(struct user32, u_debugreg[7]): 1056 regno -= offsetof(struct user32, u_debugreg[0]); 1057 *val = ptrace_get_debugreg(child, regno / 4); 1058 break; 1059 1060 default: 1061 if (regno > sizeof(struct user32) || (regno & 3)) 1062 return -EIO; 1063 1064 /* 1065 * Other dummy fields in the virtual user structure 1066 * are ignored 1067 */ 1068 *val = 0; 1069 break; 1070 } 1071 return 0; 1072 } 1073 1074 #undef R32 1075 #undef SEG32 1076 1077 static int genregs32_get(struct task_struct *target, 1078 const struct user_regset *regset, 1079 unsigned int pos, unsigned int count, 1080 void *kbuf, void __user *ubuf) 1081 { 1082 if (kbuf) { 1083 compat_ulong_t *k = kbuf; 1084 while (count >= sizeof(*k)) { 1085 getreg32(target, pos, k++); 1086 count -= sizeof(*k); 1087 pos += sizeof(*k); 1088 } 1089 } else { 1090 compat_ulong_t __user *u = ubuf; 1091 while (count >= sizeof(*u)) { 1092 compat_ulong_t word; 1093 getreg32(target, pos, &word); 1094 if (__put_user(word, u++)) 1095 return -EFAULT; 1096 count -= sizeof(*u); 1097 pos += sizeof(*u); 1098 } 1099 } 1100 1101 return 0; 1102 } 1103 1104 static int genregs32_set(struct task_struct *target, 1105 const struct user_regset *regset, 1106 unsigned int pos, unsigned int count, 1107 const void *kbuf, const void __user *ubuf) 1108 { 1109 int ret = 0; 1110 if (kbuf) { 1111 const compat_ulong_t *k = kbuf; 1112 while (count >= sizeof(*k) && !ret) { 1113 ret = putreg32(target, pos, *k++); 1114 count -= sizeof(*k); 1115 pos += sizeof(*k); 1116 } 1117 } else { 1118 const compat_ulong_t __user *u = ubuf; 1119 while (count >= sizeof(*u) && !ret) { 1120 compat_ulong_t word; 1121 ret = __get_user(word, u++); 1122 if (ret) 1123 break; 1124 ret = putreg32(target, pos, word); 1125 count -= sizeof(*u); 1126 pos += sizeof(*u); 1127 } 1128 } 1129 return ret; 1130 } 1131 1132 #ifdef CONFIG_X86_X32_ABI 1133 static long x32_arch_ptrace(struct task_struct *child, 1134 compat_long_t request, compat_ulong_t caddr, 1135 compat_ulong_t cdata) 1136 { 1137 unsigned long addr = caddr; 1138 unsigned long data = cdata; 1139 void __user *datap = compat_ptr(data); 1140 int ret; 1141 1142 switch (request) { 1143 /* Read 32bits at location addr in the USER area. Only allow 1144 to return the lower 32bits of segment and debug registers. */ 1145 case PTRACE_PEEKUSR: { 1146 u32 tmp; 1147 1148 ret = -EIO; 1149 if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user) || 1150 addr < offsetof(struct user_regs_struct, cs)) 1151 break; 1152 1153 tmp = 0; /* Default return condition */ 1154 if (addr < sizeof(struct user_regs_struct)) 1155 tmp = getreg(child, addr); 1156 else if (addr >= offsetof(struct user, u_debugreg[0]) && 1157 addr <= offsetof(struct user, u_debugreg[7])) { 1158 addr -= offsetof(struct user, u_debugreg[0]); 1159 tmp = ptrace_get_debugreg(child, addr / sizeof(data)); 1160 } 1161 ret = put_user(tmp, (__u32 __user *)datap); 1162 break; 1163 } 1164 1165 /* Write the word at location addr in the USER area. Only allow 1166 to update segment and debug registers with the upper 32bits 1167 zero-extended. */ 1168 case PTRACE_POKEUSR: 1169 ret = -EIO; 1170 if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user) || 1171 addr < offsetof(struct user_regs_struct, cs)) 1172 break; 1173 1174 if (addr < sizeof(struct user_regs_struct)) 1175 ret = putreg(child, addr, data); 1176 else if (addr >= offsetof(struct user, u_debugreg[0]) && 1177 addr <= offsetof(struct user, u_debugreg[7])) { 1178 addr -= offsetof(struct user, u_debugreg[0]); 1179 ret = ptrace_set_debugreg(child, 1180 addr / sizeof(data), data); 1181 } 1182 break; 1183 1184 case PTRACE_GETREGS: /* Get all gp regs from the child. */ 1185 return copy_regset_to_user(child, 1186 task_user_regset_view(current), 1187 REGSET_GENERAL, 1188 0, sizeof(struct user_regs_struct), 1189 datap); 1190 1191 case PTRACE_SETREGS: /* Set all gp regs in the child. */ 1192 return copy_regset_from_user(child, 1193 task_user_regset_view(current), 1194 REGSET_GENERAL, 1195 0, sizeof(struct user_regs_struct), 1196 datap); 1197 1198 case PTRACE_GETFPREGS: /* Get the child FPU state. */ 1199 return copy_regset_to_user(child, 1200 task_user_regset_view(current), 1201 REGSET_FP, 1202 0, sizeof(struct user_i387_struct), 1203 datap); 1204 1205 case PTRACE_SETFPREGS: /* Set the child FPU state. */ 1206 return copy_regset_from_user(child, 1207 task_user_regset_view(current), 1208 REGSET_FP, 1209 0, sizeof(struct user_i387_struct), 1210 datap); 1211 1212 default: 1213 return compat_ptrace_request(child, request, addr, data); 1214 } 1215 1216 return ret; 1217 } 1218 #endif 1219 1220 long compat_arch_ptrace(struct task_struct *child, compat_long_t request, 1221 compat_ulong_t caddr, compat_ulong_t cdata) 1222 { 1223 unsigned long addr = caddr; 1224 unsigned long data = cdata; 1225 void __user *datap = compat_ptr(data); 1226 int ret; 1227 __u32 val; 1228 1229 #ifdef CONFIG_X86_X32_ABI 1230 if (!is_ia32_task()) 1231 return x32_arch_ptrace(child, request, caddr, cdata); 1232 #endif 1233 1234 switch (request) { 1235 case PTRACE_PEEKUSR: 1236 ret = getreg32(child, addr, &val); 1237 if (ret == 0) 1238 ret = put_user(val, (__u32 __user *)datap); 1239 break; 1240 1241 case PTRACE_POKEUSR: 1242 ret = putreg32(child, addr, data); 1243 break; 1244 1245 case PTRACE_GETREGS: /* Get all gp regs from the child. */ 1246 return copy_regset_to_user(child, &user_x86_32_view, 1247 REGSET_GENERAL, 1248 0, sizeof(struct user_regs_struct32), 1249 datap); 1250 1251 case PTRACE_SETREGS: /* Set all gp regs in the child. */ 1252 return copy_regset_from_user(child, &user_x86_32_view, 1253 REGSET_GENERAL, 0, 1254 sizeof(struct user_regs_struct32), 1255 datap); 1256 1257 case PTRACE_GETFPREGS: /* Get the child FPU state. */ 1258 return copy_regset_to_user(child, &user_x86_32_view, 1259 REGSET_FP, 0, 1260 sizeof(struct user_i387_ia32_struct), 1261 datap); 1262 1263 case PTRACE_SETFPREGS: /* Set the child FPU state. */ 1264 return copy_regset_from_user( 1265 child, &user_x86_32_view, REGSET_FP, 1266 0, sizeof(struct user_i387_ia32_struct), datap); 1267 1268 case PTRACE_GETFPXREGS: /* Get the child extended FPU state. */ 1269 return copy_regset_to_user(child, &user_x86_32_view, 1270 REGSET_XFP, 0, 1271 sizeof(struct user32_fxsr_struct), 1272 datap); 1273 1274 case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */ 1275 return copy_regset_from_user(child, &user_x86_32_view, 1276 REGSET_XFP, 0, 1277 sizeof(struct user32_fxsr_struct), 1278 datap); 1279 1280 case PTRACE_GET_THREAD_AREA: 1281 case PTRACE_SET_THREAD_AREA: 1282 return arch_ptrace(child, request, addr, data); 1283 1284 default: 1285 return compat_ptrace_request(child, request, addr, data); 1286 } 1287 1288 return ret; 1289 } 1290 1291 #endif /* CONFIG_IA32_EMULATION */ 1292 1293 #ifdef CONFIG_X86_64 1294 1295 static struct user_regset x86_64_regsets[] __read_mostly = { 1296 [REGSET_GENERAL] = { 1297 .core_note_type = NT_PRSTATUS, 1298 .n = sizeof(struct user_regs_struct) / sizeof(long), 1299 .size = sizeof(long), .align = sizeof(long), 1300 .get = genregs_get, .set = genregs_set 1301 }, 1302 [REGSET_FP] = { 1303 .core_note_type = NT_PRFPREG, 1304 .n = sizeof(struct user_i387_struct) / sizeof(long), 1305 .size = sizeof(long), .align = sizeof(long), 1306 .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set 1307 }, 1308 [REGSET_XSTATE] = { 1309 .core_note_type = NT_X86_XSTATE, 1310 .size = sizeof(u64), .align = sizeof(u64), 1311 .active = xstateregs_active, .get = xstateregs_get, 1312 .set = xstateregs_set 1313 }, 1314 [REGSET_IOPERM64] = { 1315 .core_note_type = NT_386_IOPERM, 1316 .n = IO_BITMAP_LONGS, 1317 .size = sizeof(long), .align = sizeof(long), 1318 .active = ioperm_active, .get = ioperm_get 1319 }, 1320 }; 1321 1322 static const struct user_regset_view user_x86_64_view = { 1323 .name = "x86_64", .e_machine = EM_X86_64, 1324 .regsets = x86_64_regsets, .n = ARRAY_SIZE(x86_64_regsets) 1325 }; 1326 1327 #else /* CONFIG_X86_32 */ 1328 1329 #define user_regs_struct32 user_regs_struct 1330 #define genregs32_get genregs_get 1331 #define genregs32_set genregs_set 1332 1333 #endif /* CONFIG_X86_64 */ 1334 1335 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION 1336 static struct user_regset x86_32_regsets[] __read_mostly = { 1337 [REGSET_GENERAL] = { 1338 .core_note_type = NT_PRSTATUS, 1339 .n = sizeof(struct user_regs_struct32) / sizeof(u32), 1340 .size = sizeof(u32), .align = sizeof(u32), 1341 .get = genregs32_get, .set = genregs32_set 1342 }, 1343 [REGSET_FP] = { 1344 .core_note_type = NT_PRFPREG, 1345 .n = sizeof(struct user_i387_ia32_struct) / sizeof(u32), 1346 .size = sizeof(u32), .align = sizeof(u32), 1347 .active = fpregs_active, .get = fpregs_get, .set = fpregs_set 1348 }, 1349 [REGSET_XFP] = { 1350 .core_note_type = NT_PRXFPREG, 1351 .n = sizeof(struct user32_fxsr_struct) / sizeof(u32), 1352 .size = sizeof(u32), .align = sizeof(u32), 1353 .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set 1354 }, 1355 [REGSET_XSTATE] = { 1356 .core_note_type = NT_X86_XSTATE, 1357 .size = sizeof(u64), .align = sizeof(u64), 1358 .active = xstateregs_active, .get = xstateregs_get, 1359 .set = xstateregs_set 1360 }, 1361 [REGSET_TLS] = { 1362 .core_note_type = NT_386_TLS, 1363 .n = GDT_ENTRY_TLS_ENTRIES, .bias = GDT_ENTRY_TLS_MIN, 1364 .size = sizeof(struct user_desc), 1365 .align = sizeof(struct user_desc), 1366 .active = regset_tls_active, 1367 .get = regset_tls_get, .set = regset_tls_set 1368 }, 1369 [REGSET_IOPERM32] = { 1370 .core_note_type = NT_386_IOPERM, 1371 .n = IO_BITMAP_BYTES / sizeof(u32), 1372 .size = sizeof(u32), .align = sizeof(u32), 1373 .active = ioperm_active, .get = ioperm_get 1374 }, 1375 }; 1376 1377 static const struct user_regset_view user_x86_32_view = { 1378 .name = "i386", .e_machine = EM_386, 1379 .regsets = x86_32_regsets, .n = ARRAY_SIZE(x86_32_regsets) 1380 }; 1381 #endif 1382 1383 /* 1384 * This represents bytes 464..511 in the memory layout exported through 1385 * the REGSET_XSTATE interface. 1386 */ 1387 u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; 1388 1389 void update_regset_xstate_info(unsigned int size, u64 xstate_mask) 1390 { 1391 #ifdef CONFIG_X86_64 1392 x86_64_regsets[REGSET_XSTATE].n = size / sizeof(u64); 1393 #endif 1394 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION 1395 x86_32_regsets[REGSET_XSTATE].n = size / sizeof(u64); 1396 #endif 1397 xstate_fx_sw_bytes[USER_XSTATE_XCR0_WORD] = xstate_mask; 1398 } 1399 1400 const struct user_regset_view *task_user_regset_view(struct task_struct *task) 1401 { 1402 #ifdef CONFIG_IA32_EMULATION 1403 if (test_tsk_thread_flag(task, TIF_IA32)) 1404 #endif 1405 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION 1406 return &user_x86_32_view; 1407 #endif 1408 #ifdef CONFIG_X86_64 1409 return &user_x86_64_view; 1410 #endif 1411 } 1412 1413 static void fill_sigtrap_info(struct task_struct *tsk, 1414 struct pt_regs *regs, 1415 int error_code, int si_code, 1416 struct siginfo *info) 1417 { 1418 tsk->thread.trap_nr = X86_TRAP_DB; 1419 tsk->thread.error_code = error_code; 1420 1421 memset(info, 0, sizeof(*info)); 1422 info->si_signo = SIGTRAP; 1423 info->si_code = si_code; 1424 info->si_addr = user_mode_vm(regs) ? (void __user *)regs->ip : NULL; 1425 } 1426 1427 void user_single_step_siginfo(struct task_struct *tsk, 1428 struct pt_regs *regs, 1429 struct siginfo *info) 1430 { 1431 fill_sigtrap_info(tsk, regs, 0, TRAP_BRKPT, info); 1432 } 1433 1434 void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, 1435 int error_code, int si_code) 1436 { 1437 struct siginfo info; 1438 1439 fill_sigtrap_info(tsk, regs, error_code, si_code, &info); 1440 /* Send us the fake SIGTRAP */ 1441 force_sig_info(SIGTRAP, &info, tsk); 1442 } 1443 1444 static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch) 1445 { 1446 #ifdef CONFIG_X86_64 1447 if (arch == AUDIT_ARCH_X86_64) { 1448 audit_syscall_entry(regs->orig_ax, regs->di, 1449 regs->si, regs->dx, regs->r10); 1450 } else 1451 #endif 1452 { 1453 audit_syscall_entry(regs->orig_ax, regs->bx, 1454 regs->cx, regs->dx, regs->si); 1455 } 1456 } 1457 1458 /* 1459 * We can return 0 to resume the syscall or anything else to go to phase 1460 * 2. If we resume the syscall, we need to put something appropriate in 1461 * regs->orig_ax. 1462 * 1463 * NB: We don't have full pt_regs here, but regs->orig_ax and regs->ax 1464 * are fully functional. 1465 * 1466 * For phase 2's benefit, our return value is: 1467 * 0: resume the syscall 1468 * 1: go to phase 2; no seccomp phase 2 needed 1469 * anything else: go to phase 2; pass return value to seccomp 1470 */ 1471 unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch) 1472 { 1473 unsigned long ret = 0; 1474 u32 work; 1475 1476 BUG_ON(regs != task_pt_regs(current)); 1477 1478 work = ACCESS_ONCE(current_thread_info()->flags) & 1479 _TIF_WORK_SYSCALL_ENTRY; 1480 1481 /* 1482 * If TIF_NOHZ is set, we are required to call user_exit() before 1483 * doing anything that could touch RCU. 1484 */ 1485 if (work & _TIF_NOHZ) { 1486 user_exit(); 1487 work &= ~TIF_NOHZ; 1488 } 1489 1490 #ifdef CONFIG_SECCOMP 1491 /* 1492 * Do seccomp first -- it should minimize exposure of other 1493 * code, and keeping seccomp fast is probably more valuable 1494 * than the rest of this. 1495 */ 1496 if (work & _TIF_SECCOMP) { 1497 struct seccomp_data sd; 1498 1499 sd.arch = arch; 1500 sd.nr = regs->orig_ax; 1501 sd.instruction_pointer = regs->ip; 1502 #ifdef CONFIG_X86_64 1503 if (arch == AUDIT_ARCH_X86_64) { 1504 sd.args[0] = regs->di; 1505 sd.args[1] = regs->si; 1506 sd.args[2] = regs->dx; 1507 sd.args[3] = regs->r10; 1508 sd.args[4] = regs->r8; 1509 sd.args[5] = regs->r9; 1510 } else 1511 #endif 1512 { 1513 sd.args[0] = regs->bx; 1514 sd.args[1] = regs->cx; 1515 sd.args[2] = regs->dx; 1516 sd.args[3] = regs->si; 1517 sd.args[4] = regs->di; 1518 sd.args[5] = regs->bp; 1519 } 1520 1521 BUILD_BUG_ON(SECCOMP_PHASE1_OK != 0); 1522 BUILD_BUG_ON(SECCOMP_PHASE1_SKIP != 1); 1523 1524 ret = seccomp_phase1(&sd); 1525 if (ret == SECCOMP_PHASE1_SKIP) { 1526 regs->orig_ax = -1; 1527 ret = 0; 1528 } else if (ret != SECCOMP_PHASE1_OK) { 1529 return ret; /* Go directly to phase 2 */ 1530 } 1531 1532 work &= ~_TIF_SECCOMP; 1533 } 1534 #endif 1535 1536 /* Do our best to finish without phase 2. */ 1537 if (work == 0) 1538 return ret; /* seccomp and/or nohz only (ret == 0 here) */ 1539 1540 #ifdef CONFIG_AUDITSYSCALL 1541 if (work == _TIF_SYSCALL_AUDIT) { 1542 /* 1543 * If there is no more work to be done except auditing, 1544 * then audit in phase 1. Phase 2 always audits, so, if 1545 * we audit here, then we can't go on to phase 2. 1546 */ 1547 do_audit_syscall_entry(regs, arch); 1548 return 0; 1549 } 1550 #endif 1551 1552 return 1; /* Something is enabled that we can't handle in phase 1 */ 1553 } 1554 1555 /* Returns the syscall nr to run (which should match regs->orig_ax). */ 1556 long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch, 1557 unsigned long phase1_result) 1558 { 1559 long ret = 0; 1560 u32 work = ACCESS_ONCE(current_thread_info()->flags) & 1561 _TIF_WORK_SYSCALL_ENTRY; 1562 1563 BUG_ON(regs != task_pt_regs(current)); 1564 1565 /* 1566 * If we stepped into a sysenter/syscall insn, it trapped in 1567 * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. 1568 * If user-mode had set TF itself, then it's still clear from 1569 * do_debug() and we need to set it again to restore the user 1570 * state. If we entered on the slow path, TF was already set. 1571 */ 1572 if (work & _TIF_SINGLESTEP) 1573 regs->flags |= X86_EFLAGS_TF; 1574 1575 #ifdef CONFIG_SECCOMP 1576 /* 1577 * Call seccomp_phase2 before running the other hooks so that 1578 * they can see any changes made by a seccomp tracer. 1579 */ 1580 if (phase1_result > 1 && seccomp_phase2(phase1_result)) { 1581 /* seccomp failures shouldn't expose any additional code. */ 1582 return -1; 1583 } 1584 #endif 1585 1586 if (unlikely(work & _TIF_SYSCALL_EMU)) 1587 ret = -1L; 1588 1589 if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) && 1590 tracehook_report_syscall_entry(regs)) 1591 ret = -1L; 1592 1593 if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) 1594 trace_sys_enter(regs, regs->orig_ax); 1595 1596 do_audit_syscall_entry(regs, arch); 1597 1598 return ret ?: regs->orig_ax; 1599 } 1600 1601 long syscall_trace_enter(struct pt_regs *regs) 1602 { 1603 u32 arch = is_ia32_task() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; 1604 unsigned long phase1_result = syscall_trace_enter_phase1(regs, arch); 1605 1606 if (phase1_result == 0) 1607 return regs->orig_ax; 1608 else 1609 return syscall_trace_enter_phase2(regs, arch, phase1_result); 1610 } 1611 1612 void syscall_trace_leave(struct pt_regs *regs) 1613 { 1614 bool step; 1615 1616 /* 1617 * We may come here right after calling schedule_user() 1618 * or do_notify_resume(), in which case we can be in RCU 1619 * user mode. 1620 */ 1621 user_exit(); 1622 1623 audit_syscall_exit(regs); 1624 1625 if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) 1626 trace_sys_exit(regs, regs->ax); 1627 1628 /* 1629 * If TIF_SYSCALL_EMU is set, we only get here because of 1630 * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP). 1631 * We already reported this syscall instruction in 1632 * syscall_trace_enter(). 1633 */ 1634 step = unlikely(test_thread_flag(TIF_SINGLESTEP)) && 1635 !test_thread_flag(TIF_SYSCALL_EMU); 1636 if (step || test_thread_flag(TIF_SYSCALL_TRACE)) 1637 tracehook_report_syscall_exit(regs, step); 1638 1639 user_enter(); 1640 } 1641