xref: /openbmc/linux/arch/x86/kernel/step.c (revision 37cd9cf3dafed82f7cf905785883300f6ff7c818)
1 /*
2  * x86 single-step support code, common to 32-bit and 64-bit.
3  */
4 #include <linux/sched.h>
5 #include <linux/mm.h>
6 #include <linux/ptrace.h>
7 
8 #ifdef CONFIG_X86_32
9 #include <linux/uaccess.h>
10 
11 #include <asm/desc.h>
12 
13 /*
14  * Return EIP plus the CS segment base.  The segment limit is also
15  * adjusted, clamped to the kernel/user address space (whichever is
16  * appropriate), and returned in *eip_limit.
17  *
18  * The segment is checked, because it might have been changed by another
19  * task between the original faulting instruction and here.
20  *
21  * If CS is no longer a valid code segment, or if EIP is beyond the
22  * limit, or if it is a kernel address when CS is not a kernel segment,
23  * then the returned value will be greater than *eip_limit.
24  *
25  * This is slow, but is very rarely executed.
26  */
27 unsigned long get_segment_eip(struct pt_regs *regs,
28 					    unsigned long *eip_limit)
29 {
30 	unsigned long ip = regs->ip;
31 	unsigned seg = regs->cs & 0xffff;
32 	u32 seg_ar, seg_limit, base, *desc;
33 
34 	/* Unlikely, but must come before segment checks. */
35 	if (unlikely(regs->flags & VM_MASK)) {
36 		base = seg << 4;
37 		*eip_limit = base + 0xffff;
38 		return base + (ip & 0xffff);
39 	}
40 
41 	/* The standard kernel/user address space limit. */
42 	*eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg;
43 
44 	/* By far the most common cases. */
45 	if (likely(SEGMENT_IS_FLAT_CODE(seg)))
46 		return ip;
47 
48 	/* Check the segment exists, is within the current LDT/GDT size,
49 	   that kernel/user (ring 0..3) has the appropriate privilege,
50 	   that it's a code segment, and get the limit. */
51 	__asm__("larl %3,%0; lsll %3,%1"
52 		 : "=&r" (seg_ar), "=r" (seg_limit) : "0" (0), "rm" (seg));
53 	if ((~seg_ar & 0x9800) || ip > seg_limit) {
54 		*eip_limit = 0;
55 		return 1;	 /* So that returned ip > *eip_limit. */
56 	}
57 
58 	/* Get the GDT/LDT descriptor base.
59 	   When you look for races in this code remember that
60 	   LDT and other horrors are only used in user space. */
61 	if (seg & (1<<2)) {
62 		/* Must lock the LDT while reading it. */
63 		mutex_lock(&current->mm->context.lock);
64 		desc = current->mm->context.ldt;
65 		desc = (void *)desc + (seg & ~7);
66 	} else {
67 		/* Must disable preemption while reading the GDT. */
68 		desc = (u32 *)get_cpu_gdt_table(get_cpu());
69 		desc = (void *)desc + (seg & ~7);
70 	}
71 
72 	/* Decode the code segment base from the descriptor */
73 	base = get_desc_base((struct desc_struct *)desc);
74 
75 	if (seg & (1<<2))
76 		mutex_unlock(&current->mm->context.lock);
77 	else
78 		put_cpu();
79 
80 	/* Adjust EIP and segment limit, and clamp at the kernel limit.
81 	   It's legitimate for segments to wrap at 0xffffffff. */
82 	seg_limit += base;
83 	if (seg_limit < *eip_limit && seg_limit >= base)
84 		*eip_limit = seg_limit;
85 	return ip + base;
86 }
87 #endif
88 
89 #ifdef CONFIG_X86_32
90 static
91 #endif
92 unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs)
93 {
94 	unsigned long addr, seg;
95 
96 	addr = regs->ip;
97 	seg = regs->cs & 0xffff;
98 	if (v8086_mode(regs)) {
99 		addr = (addr & 0xffff) + (seg << 4);
100 		return addr;
101 	}
102 
103 	/*
104 	 * We'll assume that the code segments in the GDT
105 	 * are all zero-based. That is largely true: the
106 	 * TLS segments are used for data, and the PNPBIOS
107 	 * and APM bios ones we just ignore here.
108 	 */
109 	if ((seg & SEGMENT_TI_MASK) == SEGMENT_LDT) {
110 		u32 *desc;
111 		unsigned long base;
112 
113 		seg &= ~7UL;
114 
115 		mutex_lock(&child->mm->context.lock);
116 		if (unlikely((seg >> 3) >= child->mm->context.size))
117 			addr = -1L; /* bogus selector, access would fault */
118 		else {
119 			desc = child->mm->context.ldt + seg;
120 			base = ((desc[0] >> 16) |
121 				((desc[1] & 0xff) << 16) |
122 				(desc[1] & 0xff000000));
123 
124 			/* 16-bit code segment? */
125 			if (!((desc[1] >> 22) & 1))
126 				addr &= 0xffff;
127 			addr += base;
128 		}
129 		mutex_unlock(&child->mm->context.lock);
130 	}
131 
132 	return addr;
133 }
134 
135 static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs)
136 {
137 	int i, copied;
138 	unsigned char opcode[15];
139 	unsigned long addr = convert_ip_to_linear(child, regs);
140 
141 	copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0);
142 	for (i = 0; i < copied; i++) {
143 		switch (opcode[i]) {
144 		/* popf and iret */
145 		case 0x9d: case 0xcf:
146 			return 1;
147 
148 			/* CHECKME: 64 65 */
149 
150 		/* opcode and address size prefixes */
151 		case 0x66: case 0x67:
152 			continue;
153 		/* irrelevant prefixes (segment overrides and repeats) */
154 		case 0x26: case 0x2e:
155 		case 0x36: case 0x3e:
156 		case 0x64: case 0x65:
157 		case 0xf0: case 0xf2: case 0xf3:
158 			continue;
159 
160 #ifdef CONFIG_X86_64
161 		case 0x40 ... 0x4f:
162 			if (regs->cs != __USER_CS)
163 				/* 32-bit mode: register increment */
164 				return 0;
165 			/* 64-bit mode: REX prefix */
166 			continue;
167 #endif
168 
169 			/* CHECKME: f2, f3 */
170 
171 		/*
172 		 * pushf: NOTE! We should probably not let
173 		 * the user see the TF bit being set. But
174 		 * it's more pain than it's worth to avoid
175 		 * it, and a debugger could emulate this
176 		 * all in user space if it _really_ cares.
177 		 */
178 		case 0x9c:
179 		default:
180 			return 0;
181 		}
182 	}
183 	return 0;
184 }
185 
186 /*
187  * Enable single-stepping.  Return nonzero if user mode is not using TF itself.
188  */
189 static int enable_single_step(struct task_struct *child)
190 {
191 	struct pt_regs *regs = task_pt_regs(child);
192 
193 	/*
194 	 * Always set TIF_SINGLESTEP - this guarantees that
195 	 * we single-step system calls etc..  This will also
196 	 * cause us to set TF when returning to user mode.
197 	 */
198 	set_tsk_thread_flag(child, TIF_SINGLESTEP);
199 
200 	/*
201 	 * If TF was already set, don't do anything else
202 	 */
203 	if (regs->flags & X86_EFLAGS_TF)
204 		return 0;
205 
206 	/* Set TF on the kernel stack.. */
207 	regs->flags |= X86_EFLAGS_TF;
208 
209 	/*
210 	 * ..but if TF is changed by the instruction we will trace,
211 	 * don't mark it as being "us" that set it, so that we
212 	 * won't clear it by hand later.
213 	 */
214 	if (is_setting_trap_flag(child, regs))
215 		return 0;
216 
217 	set_tsk_thread_flag(child, TIF_FORCED_TF);
218 
219 	return 1;
220 }
221 
222 /*
223  * Install this value in MSR_IA32_DEBUGCTLMSR whenever child is running.
224  */
225 static void write_debugctlmsr(struct task_struct *child, unsigned long val)
226 {
227 	child->thread.debugctlmsr = val;
228 
229 	if (child != current)
230 		return;
231 
232 	wrmsrl(MSR_IA32_DEBUGCTLMSR, val);
233 }
234 
235 /*
236  * Enable single or block step.
237  */
238 static void enable_step(struct task_struct *child, bool block)
239 {
240 	/*
241 	 * Make sure block stepping (BTF) is not enabled unless it should be.
242 	 * Note that we don't try to worry about any is_setting_trap_flag()
243 	 * instructions after the first when using block stepping.
244 	 * So noone should try to use debugger block stepping in a program
245 	 * that uses user-mode single stepping itself.
246 	 */
247 	if (enable_single_step(child) && block) {
248 		set_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
249 		write_debugctlmsr(child,
250 				  child->thread.debugctlmsr | DEBUGCTLMSR_BTF);
251 	} else {
252 	    write_debugctlmsr(child,
253 			      child->thread.debugctlmsr & ~TIF_DEBUGCTLMSR);
254 
255 	    if (!child->thread.debugctlmsr)
256 		    clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
257 	}
258 }
259 
260 void user_enable_single_step(struct task_struct *child)
261 {
262 	enable_step(child, 0);
263 }
264 
265 void user_enable_block_step(struct task_struct *child)
266 {
267 	enable_step(child, 1);
268 }
269 
270 void user_disable_single_step(struct task_struct *child)
271 {
272 	/*
273 	 * Make sure block stepping (BTF) is disabled.
274 	 */
275 	write_debugctlmsr(child,
276 			  child->thread.debugctlmsr & ~TIF_DEBUGCTLMSR);
277 
278 	if (!child->thread.debugctlmsr)
279 		clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
280 
281 	/* Always clear TIF_SINGLESTEP... */
282 	clear_tsk_thread_flag(child, TIF_SINGLESTEP);
283 
284 	/* But touch TF only if it was set by us.. */
285 	if (test_and_clear_tsk_thread_flag(child, TIF_FORCED_TF))
286 		task_pt_regs(child)->flags &= ~X86_EFLAGS_TF;
287 }
288