1fa1e03eaSRoland McGrath /* 2fa1e03eaSRoland McGrath * x86 single-step support code, common to 32-bit and 64-bit. 3fa1e03eaSRoland McGrath */ 4fa1e03eaSRoland McGrath #include <linux/sched.h> 5fa1e03eaSRoland McGrath #include <linux/mm.h> 6fa1e03eaSRoland McGrath #include <linux/ptrace.h> 7fa1e03eaSRoland McGrath 87122ec81SRoland McGrath #ifdef CONFIG_X86_32 91379a5ceSHarvey Harrison #include <linux/uaccess.h> 101379a5ceSHarvey Harrison 111379a5ceSHarvey Harrison #include <asm/desc.h> 121379a5ceSHarvey Harrison 131379a5ceSHarvey Harrison /* 141379a5ceSHarvey Harrison * Return EIP plus the CS segment base. The segment limit is also 151379a5ceSHarvey Harrison * adjusted, clamped to the kernel/user address space (whichever is 161379a5ceSHarvey Harrison * appropriate), and returned in *eip_limit. 171379a5ceSHarvey Harrison * 181379a5ceSHarvey Harrison * The segment is checked, because it might have been changed by another 191379a5ceSHarvey Harrison * task between the original faulting instruction and here. 201379a5ceSHarvey Harrison * 211379a5ceSHarvey Harrison * If CS is no longer a valid code segment, or if EIP is beyond the 221379a5ceSHarvey Harrison * limit, or if it is a kernel address when CS is not a kernel segment, 231379a5ceSHarvey Harrison * then the returned value will be greater than *eip_limit. 241379a5ceSHarvey Harrison * 251379a5ceSHarvey Harrison * This is slow, but is very rarely executed. 261379a5ceSHarvey Harrison */ 271379a5ceSHarvey Harrison unsigned long get_segment_eip(struct pt_regs *regs, 281379a5ceSHarvey Harrison unsigned long *eip_limit) 291379a5ceSHarvey Harrison { 301379a5ceSHarvey Harrison unsigned long ip = regs->ip; 311379a5ceSHarvey Harrison unsigned seg = regs->cs & 0xffff; 321379a5ceSHarvey Harrison u32 seg_ar, seg_limit, base, *desc; 331379a5ceSHarvey Harrison 341379a5ceSHarvey Harrison /* Unlikely, but must come before segment checks. */ 351379a5ceSHarvey Harrison if (unlikely(regs->flags & VM_MASK)) { 361379a5ceSHarvey Harrison base = seg << 4; 371379a5ceSHarvey Harrison *eip_limit = base + 0xffff; 381379a5ceSHarvey Harrison return base + (ip & 0xffff); 391379a5ceSHarvey Harrison } 401379a5ceSHarvey Harrison 411379a5ceSHarvey Harrison /* The standard kernel/user address space limit. */ 421379a5ceSHarvey Harrison *eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg; 431379a5ceSHarvey Harrison 441379a5ceSHarvey Harrison /* By far the most common cases. */ 451379a5ceSHarvey Harrison if (likely(SEGMENT_IS_FLAT_CODE(seg))) 461379a5ceSHarvey Harrison return ip; 471379a5ceSHarvey Harrison 481379a5ceSHarvey Harrison /* Check the segment exists, is within the current LDT/GDT size, 491379a5ceSHarvey Harrison that kernel/user (ring 0..3) has the appropriate privilege, 501379a5ceSHarvey Harrison that it's a code segment, and get the limit. */ 511379a5ceSHarvey Harrison __asm__("larl %3,%0; lsll %3,%1" 521379a5ceSHarvey Harrison : "=&r" (seg_ar), "=r" (seg_limit) : "0" (0), "rm" (seg)); 531379a5ceSHarvey Harrison if ((~seg_ar & 0x9800) || ip > seg_limit) { 541379a5ceSHarvey Harrison *eip_limit = 0; 551379a5ceSHarvey Harrison return 1; /* So that returned ip > *eip_limit. */ 561379a5ceSHarvey Harrison } 571379a5ceSHarvey Harrison 581379a5ceSHarvey Harrison /* Get the GDT/LDT descriptor base. 591379a5ceSHarvey Harrison When you look for races in this code remember that 601379a5ceSHarvey Harrison LDT and other horrors are only used in user space. */ 611379a5ceSHarvey Harrison if (seg & (1<<2)) { 621379a5ceSHarvey Harrison /* Must lock the LDT while reading it. */ 631379a5ceSHarvey Harrison mutex_lock(¤t->mm->context.lock); 641379a5ceSHarvey Harrison desc = current->mm->context.ldt; 651379a5ceSHarvey Harrison desc = (void *)desc + (seg & ~7); 661379a5ceSHarvey Harrison } else { 671379a5ceSHarvey Harrison /* Must disable preemption while reading the GDT. */ 681379a5ceSHarvey Harrison desc = (u32 *)get_cpu_gdt_table(get_cpu()); 691379a5ceSHarvey Harrison desc = (void *)desc + (seg & ~7); 701379a5ceSHarvey Harrison } 711379a5ceSHarvey Harrison 721379a5ceSHarvey Harrison /* Decode the code segment base from the descriptor */ 731379a5ceSHarvey Harrison base = get_desc_base((struct desc_struct *)desc); 741379a5ceSHarvey Harrison 751379a5ceSHarvey Harrison if (seg & (1<<2)) 761379a5ceSHarvey Harrison mutex_unlock(¤t->mm->context.lock); 771379a5ceSHarvey Harrison else 781379a5ceSHarvey Harrison put_cpu(); 791379a5ceSHarvey Harrison 801379a5ceSHarvey Harrison /* Adjust EIP and segment limit, and clamp at the kernel limit. 811379a5ceSHarvey Harrison It's legitimate for segments to wrap at 0xffffffff. */ 821379a5ceSHarvey Harrison seg_limit += base; 831379a5ceSHarvey Harrison if (seg_limit < *eip_limit && seg_limit >= base) 841379a5ceSHarvey Harrison *eip_limit = seg_limit; 851379a5ceSHarvey Harrison return ip + base; 861379a5ceSHarvey Harrison } 871379a5ceSHarvey Harrison #endif 881379a5ceSHarvey Harrison 891379a5ceSHarvey Harrison #ifdef CONFIG_X86_32 907122ec81SRoland McGrath static 917122ec81SRoland McGrath #endif 92*37cd9cf3SHarvey Harrison unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs) 93fa1e03eaSRoland McGrath { 94fa1e03eaSRoland McGrath unsigned long addr, seg; 95fa1e03eaSRoland McGrath 9665ea5b03SH. Peter Anvin addr = regs->ip; 97fa1e03eaSRoland McGrath seg = regs->cs & 0xffff; 9865ea5b03SH. Peter Anvin if (v8086_mode(regs)) { 997122ec81SRoland McGrath addr = (addr & 0xffff) + (seg << 4); 1007122ec81SRoland McGrath return addr; 1017122ec81SRoland McGrath } 102fa1e03eaSRoland McGrath 103fa1e03eaSRoland McGrath /* 104fa1e03eaSRoland McGrath * We'll assume that the code segments in the GDT 105fa1e03eaSRoland McGrath * are all zero-based. That is largely true: the 106fa1e03eaSRoland McGrath * TLS segments are used for data, and the PNPBIOS 107fa1e03eaSRoland McGrath * and APM bios ones we just ignore here. 108fa1e03eaSRoland McGrath */ 1093f80c1adSRoland McGrath if ((seg & SEGMENT_TI_MASK) == SEGMENT_LDT) { 110fa1e03eaSRoland McGrath u32 *desc; 111fa1e03eaSRoland McGrath unsigned long base; 112fa1e03eaSRoland McGrath 113fa1e03eaSRoland McGrath seg &= ~7UL; 114fa1e03eaSRoland McGrath 115fa1e03eaSRoland McGrath mutex_lock(&child->mm->context.lock); 116fa1e03eaSRoland McGrath if (unlikely((seg >> 3) >= child->mm->context.size)) 117fa1e03eaSRoland McGrath addr = -1L; /* bogus selector, access would fault */ 118fa1e03eaSRoland McGrath else { 119fa1e03eaSRoland McGrath desc = child->mm->context.ldt + seg; 120fa1e03eaSRoland McGrath base = ((desc[0] >> 16) | 121fa1e03eaSRoland McGrath ((desc[1] & 0xff) << 16) | 122fa1e03eaSRoland McGrath (desc[1] & 0xff000000)); 123fa1e03eaSRoland McGrath 124fa1e03eaSRoland McGrath /* 16-bit code segment? */ 125fa1e03eaSRoland McGrath if (!((desc[1] >> 22) & 1)) 126fa1e03eaSRoland McGrath addr &= 0xffff; 127fa1e03eaSRoland McGrath addr += base; 128fa1e03eaSRoland McGrath } 129fa1e03eaSRoland McGrath mutex_unlock(&child->mm->context.lock); 130fa1e03eaSRoland McGrath } 131fa1e03eaSRoland McGrath 132fa1e03eaSRoland McGrath return addr; 133fa1e03eaSRoland McGrath } 134fa1e03eaSRoland McGrath 135fa1e03eaSRoland McGrath static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs) 136fa1e03eaSRoland McGrath { 137fa1e03eaSRoland McGrath int i, copied; 138fa1e03eaSRoland McGrath unsigned char opcode[15]; 139*37cd9cf3SHarvey Harrison unsigned long addr = convert_ip_to_linear(child, regs); 140fa1e03eaSRoland McGrath 141fa1e03eaSRoland McGrath copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0); 142fa1e03eaSRoland McGrath for (i = 0; i < copied; i++) { 143fa1e03eaSRoland McGrath switch (opcode[i]) { 144fa1e03eaSRoland McGrath /* popf and iret */ 145fa1e03eaSRoland McGrath case 0x9d: case 0xcf: 146fa1e03eaSRoland McGrath return 1; 147fa1e03eaSRoland McGrath 148fa1e03eaSRoland McGrath /* CHECKME: 64 65 */ 149fa1e03eaSRoland McGrath 150fa1e03eaSRoland McGrath /* opcode and address size prefixes */ 151fa1e03eaSRoland McGrath case 0x66: case 0x67: 152fa1e03eaSRoland McGrath continue; 153fa1e03eaSRoland McGrath /* irrelevant prefixes (segment overrides and repeats) */ 154fa1e03eaSRoland McGrath case 0x26: case 0x2e: 155fa1e03eaSRoland McGrath case 0x36: case 0x3e: 156fa1e03eaSRoland McGrath case 0x64: case 0x65: 1575f76cb1fSRoland McGrath case 0xf0: case 0xf2: case 0xf3: 158fa1e03eaSRoland McGrath continue; 159fa1e03eaSRoland McGrath 1607122ec81SRoland McGrath #ifdef CONFIG_X86_64 161fa1e03eaSRoland McGrath case 0x40 ... 0x4f: 162fa1e03eaSRoland McGrath if (regs->cs != __USER_CS) 163fa1e03eaSRoland McGrath /* 32-bit mode: register increment */ 164fa1e03eaSRoland McGrath return 0; 165fa1e03eaSRoland McGrath /* 64-bit mode: REX prefix */ 166fa1e03eaSRoland McGrath continue; 1677122ec81SRoland McGrath #endif 168fa1e03eaSRoland McGrath 169fa1e03eaSRoland McGrath /* CHECKME: f2, f3 */ 170fa1e03eaSRoland McGrath 171fa1e03eaSRoland McGrath /* 172fa1e03eaSRoland McGrath * pushf: NOTE! We should probably not let 173fa1e03eaSRoland McGrath * the user see the TF bit being set. But 174fa1e03eaSRoland McGrath * it's more pain than it's worth to avoid 175fa1e03eaSRoland McGrath * it, and a debugger could emulate this 176fa1e03eaSRoland McGrath * all in user space if it _really_ cares. 177fa1e03eaSRoland McGrath */ 178fa1e03eaSRoland McGrath case 0x9c: 179fa1e03eaSRoland McGrath default: 180fa1e03eaSRoland McGrath return 0; 181fa1e03eaSRoland McGrath } 182fa1e03eaSRoland McGrath } 183fa1e03eaSRoland McGrath return 0; 184fa1e03eaSRoland McGrath } 185fa1e03eaSRoland McGrath 18610faa81eSRoland McGrath /* 18710faa81eSRoland McGrath * Enable single-stepping. Return nonzero if user mode is not using TF itself. 18810faa81eSRoland McGrath */ 18910faa81eSRoland McGrath static int enable_single_step(struct task_struct *child) 190fa1e03eaSRoland McGrath { 191fa1e03eaSRoland McGrath struct pt_regs *regs = task_pt_regs(child); 192fa1e03eaSRoland McGrath 193fa1e03eaSRoland McGrath /* 194fa1e03eaSRoland McGrath * Always set TIF_SINGLESTEP - this guarantees that 195fa1e03eaSRoland McGrath * we single-step system calls etc.. This will also 196fa1e03eaSRoland McGrath * cause us to set TF when returning to user mode. 197fa1e03eaSRoland McGrath */ 198fa1e03eaSRoland McGrath set_tsk_thread_flag(child, TIF_SINGLESTEP); 199fa1e03eaSRoland McGrath 200fa1e03eaSRoland McGrath /* 201fa1e03eaSRoland McGrath * If TF was already set, don't do anything else 202fa1e03eaSRoland McGrath */ 20365ea5b03SH. Peter Anvin if (regs->flags & X86_EFLAGS_TF) 20410faa81eSRoland McGrath return 0; 205fa1e03eaSRoland McGrath 206fa1e03eaSRoland McGrath /* Set TF on the kernel stack.. */ 20765ea5b03SH. Peter Anvin regs->flags |= X86_EFLAGS_TF; 208fa1e03eaSRoland McGrath 209fa1e03eaSRoland McGrath /* 210fa1e03eaSRoland McGrath * ..but if TF is changed by the instruction we will trace, 211fa1e03eaSRoland McGrath * don't mark it as being "us" that set it, so that we 212fa1e03eaSRoland McGrath * won't clear it by hand later. 213fa1e03eaSRoland McGrath */ 214fa1e03eaSRoland McGrath if (is_setting_trap_flag(child, regs)) 21510faa81eSRoland McGrath return 0; 216fa1e03eaSRoland McGrath 217e1f28773SRoland McGrath set_tsk_thread_flag(child, TIF_FORCED_TF); 21810faa81eSRoland McGrath 21910faa81eSRoland McGrath return 1; 22010faa81eSRoland McGrath } 22110faa81eSRoland McGrath 22210faa81eSRoland McGrath /* 22310faa81eSRoland McGrath * Install this value in MSR_IA32_DEBUGCTLMSR whenever child is running. 22410faa81eSRoland McGrath */ 22510faa81eSRoland McGrath static void write_debugctlmsr(struct task_struct *child, unsigned long val) 22610faa81eSRoland McGrath { 22710faa81eSRoland McGrath child->thread.debugctlmsr = val; 22810faa81eSRoland McGrath 22910faa81eSRoland McGrath if (child != current) 23010faa81eSRoland McGrath return; 23110faa81eSRoland McGrath 23210faa81eSRoland McGrath wrmsrl(MSR_IA32_DEBUGCTLMSR, val); 23310faa81eSRoland McGrath } 23410faa81eSRoland McGrath 23510faa81eSRoland McGrath /* 23610faa81eSRoland McGrath * Enable single or block step. 23710faa81eSRoland McGrath */ 23810faa81eSRoland McGrath static void enable_step(struct task_struct *child, bool block) 23910faa81eSRoland McGrath { 24010faa81eSRoland McGrath /* 24110faa81eSRoland McGrath * Make sure block stepping (BTF) is not enabled unless it should be. 24210faa81eSRoland McGrath * Note that we don't try to worry about any is_setting_trap_flag() 24310faa81eSRoland McGrath * instructions after the first when using block stepping. 24410faa81eSRoland McGrath * So noone should try to use debugger block stepping in a program 24510faa81eSRoland McGrath * that uses user-mode single stepping itself. 24610faa81eSRoland McGrath */ 24710faa81eSRoland McGrath if (enable_single_step(child) && block) { 24810faa81eSRoland McGrath set_tsk_thread_flag(child, TIF_DEBUGCTLMSR); 249eee3af4aSMarkus Metzger write_debugctlmsr(child, 250eee3af4aSMarkus Metzger child->thread.debugctlmsr | DEBUGCTLMSR_BTF); 251eee3af4aSMarkus Metzger } else { 252eee3af4aSMarkus Metzger write_debugctlmsr(child, 253eee3af4aSMarkus Metzger child->thread.debugctlmsr & ~TIF_DEBUGCTLMSR); 254eee3af4aSMarkus Metzger 255eee3af4aSMarkus Metzger if (!child->thread.debugctlmsr) 256eee3af4aSMarkus Metzger clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); 25710faa81eSRoland McGrath } 25810faa81eSRoland McGrath } 25910faa81eSRoland McGrath 26010faa81eSRoland McGrath void user_enable_single_step(struct task_struct *child) 26110faa81eSRoland McGrath { 26210faa81eSRoland McGrath enable_step(child, 0); 26310faa81eSRoland McGrath } 26410faa81eSRoland McGrath 26510faa81eSRoland McGrath void user_enable_block_step(struct task_struct *child) 26610faa81eSRoland McGrath { 26710faa81eSRoland McGrath enable_step(child, 1); 268fa1e03eaSRoland McGrath } 269fa1e03eaSRoland McGrath 270fa1e03eaSRoland McGrath void user_disable_single_step(struct task_struct *child) 271fa1e03eaSRoland McGrath { 27210faa81eSRoland McGrath /* 27310faa81eSRoland McGrath * Make sure block stepping (BTF) is disabled. 27410faa81eSRoland McGrath */ 275eee3af4aSMarkus Metzger write_debugctlmsr(child, 276eee3af4aSMarkus Metzger child->thread.debugctlmsr & ~TIF_DEBUGCTLMSR); 277eee3af4aSMarkus Metzger 278eee3af4aSMarkus Metzger if (!child->thread.debugctlmsr) 279eee3af4aSMarkus Metzger clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); 28010faa81eSRoland McGrath 281fa1e03eaSRoland McGrath /* Always clear TIF_SINGLESTEP... */ 282fa1e03eaSRoland McGrath clear_tsk_thread_flag(child, TIF_SINGLESTEP); 283fa1e03eaSRoland McGrath 284fa1e03eaSRoland McGrath /* But touch TF only if it was set by us.. */ 285e1f28773SRoland McGrath if (test_and_clear_tsk_thread_flag(child, TIF_FORCED_TF)) 28665ea5b03SH. Peter Anvin task_pt_regs(child)->flags &= ~X86_EFLAGS_TF; 287fa1e03eaSRoland McGrath } 288