xref: /openbmc/linux/arch/x86/kernel/hw_breakpoint.c (revision fdef24df)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *
4  * Copyright (C) 2007 Alan Stern
5  * Copyright (C) 2009 IBM Corporation
6  * Copyright (C) 2009 Frederic Weisbecker <fweisbec@gmail.com>
7  *
8  * Authors: Alan Stern <stern@rowland.harvard.edu>
9  *          K.Prasad <prasad@linux.vnet.ibm.com>
10  *          Frederic Weisbecker <fweisbec@gmail.com>
11  */
12 
13 /*
14  * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
15  * using the CPU's debug registers.
16  */
17 
18 #include <linux/perf_event.h>
19 #include <linux/hw_breakpoint.h>
20 #include <linux/irqflags.h>
21 #include <linux/notifier.h>
22 #include <linux/kallsyms.h>
23 #include <linux/kprobes.h>
24 #include <linux/percpu.h>
25 #include <linux/kdebug.h>
26 #include <linux/kernel.h>
27 #include <linux/export.h>
28 #include <linux/sched.h>
29 #include <linux/smp.h>
30 
31 #include <asm/hw_breakpoint.h>
32 #include <asm/processor.h>
33 #include <asm/debugreg.h>
34 #include <asm/user.h>
35 #include <asm/desc.h>
36 #include <asm/tlbflush.h>
37 
38 /* Per cpu debug control register value */
39 DEFINE_PER_CPU(unsigned long, cpu_dr7);
40 EXPORT_PER_CPU_SYMBOL(cpu_dr7);
41 
42 /* Per cpu debug address registers values */
43 static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]);
44 
45 /*
46  * Stores the breakpoints currently in use on each breakpoint address
47  * register for each cpus
48  */
49 static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]);
50 
51 
52 static inline unsigned long
53 __encode_dr7(int drnum, unsigned int len, unsigned int type)
54 {
55 	unsigned long bp_info;
56 
57 	bp_info = (len | type) & 0xf;
58 	bp_info <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE);
59 	bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE));
60 
61 	return bp_info;
62 }
63 
64 /*
65  * Encode the length, type, Exact, and Enable bits for a particular breakpoint
66  * as stored in debug register 7.
67  */
68 unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type)
69 {
70 	return __encode_dr7(drnum, len, type) | DR_GLOBAL_SLOWDOWN;
71 }
72 
73 /*
74  * Decode the length and type bits for a particular breakpoint as
75  * stored in debug register 7.  Return the "enabled" status.
76  */
77 int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type)
78 {
79 	int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE);
80 
81 	*len = (bp_info & 0xc) | 0x40;
82 	*type = (bp_info & 0x3) | 0x80;
83 
84 	return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3;
85 }
86 
87 /*
88  * Install a perf counter breakpoint.
89  *
90  * We seek a free debug address register and use it for this
91  * breakpoint. Eventually we enable it in the debug control register.
92  *
93  * Atomic: we hold the counter->ctx->lock and we only handle variables
94  * and registers local to this cpu.
95  */
96 int arch_install_hw_breakpoint(struct perf_event *bp)
97 {
98 	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
99 	unsigned long *dr7;
100 	int i;
101 
102 	for (i = 0; i < HBP_NUM; i++) {
103 		struct perf_event **slot = this_cpu_ptr(&bp_per_reg[i]);
104 
105 		if (!*slot) {
106 			*slot = bp;
107 			break;
108 		}
109 	}
110 
111 	if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
112 		return -EBUSY;
113 
114 	set_debugreg(info->address, i);
115 	__this_cpu_write(cpu_debugreg[i], info->address);
116 
117 	dr7 = this_cpu_ptr(&cpu_dr7);
118 	*dr7 |= encode_dr7(i, info->len, info->type);
119 
120 	set_debugreg(*dr7, 7);
121 	if (info->mask)
122 		set_dr_addr_mask(info->mask, i);
123 
124 	return 0;
125 }
126 
127 /*
128  * Uninstall the breakpoint contained in the given counter.
129  *
130  * First we search the debug address register it uses and then we disable
131  * it.
132  *
133  * Atomic: we hold the counter->ctx->lock and we only handle variables
134  * and registers local to this cpu.
135  */
136 void arch_uninstall_hw_breakpoint(struct perf_event *bp)
137 {
138 	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
139 	unsigned long *dr7;
140 	int i;
141 
142 	for (i = 0; i < HBP_NUM; i++) {
143 		struct perf_event **slot = this_cpu_ptr(&bp_per_reg[i]);
144 
145 		if (*slot == bp) {
146 			*slot = NULL;
147 			break;
148 		}
149 	}
150 
151 	if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
152 		return;
153 
154 	dr7 = this_cpu_ptr(&cpu_dr7);
155 	*dr7 &= ~__encode_dr7(i, info->len, info->type);
156 
157 	set_debugreg(*dr7, 7);
158 	if (info->mask)
159 		set_dr_addr_mask(0, i);
160 }
161 
162 static int arch_bp_generic_len(int x86_len)
163 {
164 	switch (x86_len) {
165 	case X86_BREAKPOINT_LEN_1:
166 		return HW_BREAKPOINT_LEN_1;
167 	case X86_BREAKPOINT_LEN_2:
168 		return HW_BREAKPOINT_LEN_2;
169 	case X86_BREAKPOINT_LEN_4:
170 		return HW_BREAKPOINT_LEN_4;
171 #ifdef CONFIG_X86_64
172 	case X86_BREAKPOINT_LEN_8:
173 		return HW_BREAKPOINT_LEN_8;
174 #endif
175 	default:
176 		return -EINVAL;
177 	}
178 }
179 
180 int arch_bp_generic_fields(int x86_len, int x86_type,
181 			   int *gen_len, int *gen_type)
182 {
183 	int len;
184 
185 	/* Type */
186 	switch (x86_type) {
187 	case X86_BREAKPOINT_EXECUTE:
188 		if (x86_len != X86_BREAKPOINT_LEN_X)
189 			return -EINVAL;
190 
191 		*gen_type = HW_BREAKPOINT_X;
192 		*gen_len = sizeof(long);
193 		return 0;
194 	case X86_BREAKPOINT_WRITE:
195 		*gen_type = HW_BREAKPOINT_W;
196 		break;
197 	case X86_BREAKPOINT_RW:
198 		*gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
199 		break;
200 	default:
201 		return -EINVAL;
202 	}
203 
204 	/* Len */
205 	len = arch_bp_generic_len(x86_len);
206 	if (len < 0)
207 		return -EINVAL;
208 	*gen_len = len;
209 
210 	return 0;
211 }
212 
213 /*
214  * Check for virtual address in kernel space.
215  */
216 int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw)
217 {
218 	unsigned long va;
219 	int len;
220 
221 	va = hw->address;
222 	len = arch_bp_generic_len(hw->len);
223 	WARN_ON_ONCE(len < 0);
224 
225 	/*
226 	 * We don't need to worry about va + len - 1 overflowing:
227 	 * we already require that va is aligned to a multiple of len.
228 	 */
229 	return (va >= TASK_SIZE_MAX) || ((va + len - 1) >= TASK_SIZE_MAX);
230 }
231 
232 /*
233  * Checks whether the range [addr, end], overlaps the area [base, base + size).
234  */
235 static inline bool within_area(unsigned long addr, unsigned long end,
236 			       unsigned long base, unsigned long size)
237 {
238 	return end >= base && addr < (base + size);
239 }
240 
241 /*
242  * Checks whether the range from addr to end, inclusive, overlaps the fixed
243  * mapped CPU entry area range or other ranges used for CPU entry.
244  */
245 static inline bool within_cpu_entry(unsigned long addr, unsigned long end)
246 {
247 	int cpu;
248 
249 	/* CPU entry erea is always used for CPU entry */
250 	if (within_area(addr, end, CPU_ENTRY_AREA_BASE,
251 			CPU_ENTRY_AREA_TOTAL_SIZE))
252 		return true;
253 
254 	for_each_possible_cpu(cpu) {
255 		/* The original rw GDT is being used after load_direct_gdt() */
256 		if (within_area(addr, end, (unsigned long)get_cpu_gdt_rw(cpu),
257 				GDT_SIZE))
258 			return true;
259 
260 		/*
261 		 * cpu_tss_rw is not directly referenced by hardware, but
262 		 * cpu_tss_rw is also used in CPU entry code,
263 		 */
264 		if (within_area(addr, end,
265 				(unsigned long)&per_cpu(cpu_tss_rw, cpu),
266 				sizeof(struct tss_struct)))
267 			return true;
268 
269 		/*
270 		 * cpu_tlbstate.user_pcid_flush_mask is used for CPU entry.
271 		 * If a data breakpoint on it, it will cause an unwanted #DB.
272 		 * Protect the full cpu_tlbstate structure to be sure.
273 		 */
274 		if (within_area(addr, end,
275 				(unsigned long)&per_cpu(cpu_tlbstate, cpu),
276 				sizeof(struct tlb_state)))
277 			return true;
278 	}
279 
280 	return false;
281 }
282 
283 static int arch_build_bp_info(struct perf_event *bp,
284 			      const struct perf_event_attr *attr,
285 			      struct arch_hw_breakpoint *hw)
286 {
287 	unsigned long bp_end;
288 
289 	bp_end = attr->bp_addr + attr->bp_len - 1;
290 	if (bp_end < attr->bp_addr)
291 		return -EINVAL;
292 
293 	/*
294 	 * Prevent any breakpoint of any type that overlaps the CPU
295 	 * entry area and data.  This protects the IST stacks and also
296 	 * reduces the chance that we ever find out what happens if
297 	 * there's a data breakpoint on the GDT, IDT, or TSS.
298 	 */
299 	if (within_cpu_entry(attr->bp_addr, bp_end))
300 		return -EINVAL;
301 
302 	hw->address = attr->bp_addr;
303 	hw->mask = 0;
304 
305 	/* Type */
306 	switch (attr->bp_type) {
307 	case HW_BREAKPOINT_W:
308 		hw->type = X86_BREAKPOINT_WRITE;
309 		break;
310 	case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
311 		hw->type = X86_BREAKPOINT_RW;
312 		break;
313 	case HW_BREAKPOINT_X:
314 		/*
315 		 * We don't allow kernel breakpoints in places that are not
316 		 * acceptable for kprobes.  On non-kprobes kernels, we don't
317 		 * allow kernel breakpoints at all.
318 		 */
319 		if (attr->bp_addr >= TASK_SIZE_MAX) {
320 			if (within_kprobe_blacklist(attr->bp_addr))
321 				return -EINVAL;
322 		}
323 
324 		hw->type = X86_BREAKPOINT_EXECUTE;
325 		/*
326 		 * x86 inst breakpoints need to have a specific undefined len.
327 		 * But we still need to check userspace is not trying to setup
328 		 * an unsupported length, to get a range breakpoint for example.
329 		 */
330 		if (attr->bp_len == sizeof(long)) {
331 			hw->len = X86_BREAKPOINT_LEN_X;
332 			return 0;
333 		}
334 		/* fall through */
335 	default:
336 		return -EINVAL;
337 	}
338 
339 	/* Len */
340 	switch (attr->bp_len) {
341 	case HW_BREAKPOINT_LEN_1:
342 		hw->len = X86_BREAKPOINT_LEN_1;
343 		break;
344 	case HW_BREAKPOINT_LEN_2:
345 		hw->len = X86_BREAKPOINT_LEN_2;
346 		break;
347 	case HW_BREAKPOINT_LEN_4:
348 		hw->len = X86_BREAKPOINT_LEN_4;
349 		break;
350 #ifdef CONFIG_X86_64
351 	case HW_BREAKPOINT_LEN_8:
352 		hw->len = X86_BREAKPOINT_LEN_8;
353 		break;
354 #endif
355 	default:
356 		/* AMD range breakpoint */
357 		if (!is_power_of_2(attr->bp_len))
358 			return -EINVAL;
359 		if (attr->bp_addr & (attr->bp_len - 1))
360 			return -EINVAL;
361 
362 		if (!boot_cpu_has(X86_FEATURE_BPEXT))
363 			return -EOPNOTSUPP;
364 
365 		/*
366 		 * It's impossible to use a range breakpoint to fake out
367 		 * user vs kernel detection because bp_len - 1 can't
368 		 * have the high bit set.  If we ever allow range instruction
369 		 * breakpoints, then we'll have to check for kprobe-blacklisted
370 		 * addresses anywhere in the range.
371 		 */
372 		hw->mask = attr->bp_len - 1;
373 		hw->len = X86_BREAKPOINT_LEN_1;
374 	}
375 
376 	return 0;
377 }
378 
379 /*
380  * Validate the arch-specific HW Breakpoint register settings
381  */
382 int hw_breakpoint_arch_parse(struct perf_event *bp,
383 			     const struct perf_event_attr *attr,
384 			     struct arch_hw_breakpoint *hw)
385 {
386 	unsigned int align;
387 	int ret;
388 
389 
390 	ret = arch_build_bp_info(bp, attr, hw);
391 	if (ret)
392 		return ret;
393 
394 	switch (hw->len) {
395 	case X86_BREAKPOINT_LEN_1:
396 		align = 0;
397 		if (hw->mask)
398 			align = hw->mask;
399 		break;
400 	case X86_BREAKPOINT_LEN_2:
401 		align = 1;
402 		break;
403 	case X86_BREAKPOINT_LEN_4:
404 		align = 3;
405 		break;
406 #ifdef CONFIG_X86_64
407 	case X86_BREAKPOINT_LEN_8:
408 		align = 7;
409 		break;
410 #endif
411 	default:
412 		WARN_ON_ONCE(1);
413 		return -EINVAL;
414 	}
415 
416 	/*
417 	 * Check that the low-order bits of the address are appropriate
418 	 * for the alignment implied by len.
419 	 */
420 	if (hw->address & align)
421 		return -EINVAL;
422 
423 	return 0;
424 }
425 
426 /*
427  * Dump the debug register contents to the user.
428  * We can't dump our per cpu values because it
429  * may contain cpu wide breakpoint, something that
430  * doesn't belong to the current task.
431  *
432  * TODO: include non-ptrace user breakpoints (perf)
433  */
434 void aout_dump_debugregs(struct user *dump)
435 {
436 	int i;
437 	int dr7 = 0;
438 	struct perf_event *bp;
439 	struct arch_hw_breakpoint *info;
440 	struct thread_struct *thread = &current->thread;
441 
442 	for (i = 0; i < HBP_NUM; i++) {
443 		bp = thread->ptrace_bps[i];
444 
445 		if (bp && !bp->attr.disabled) {
446 			dump->u_debugreg[i] = bp->attr.bp_addr;
447 			info = counter_arch_bp(bp);
448 			dr7 |= encode_dr7(i, info->len, info->type);
449 		} else {
450 			dump->u_debugreg[i] = 0;
451 		}
452 	}
453 
454 	dump->u_debugreg[4] = 0;
455 	dump->u_debugreg[5] = 0;
456 	dump->u_debugreg[6] = current->thread.debugreg6;
457 
458 	dump->u_debugreg[7] = dr7;
459 }
460 EXPORT_SYMBOL_GPL(aout_dump_debugregs);
461 
462 /*
463  * Release the user breakpoints used by ptrace
464  */
465 void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
466 {
467 	int i;
468 	struct thread_struct *t = &tsk->thread;
469 
470 	for (i = 0; i < HBP_NUM; i++) {
471 		unregister_hw_breakpoint(t->ptrace_bps[i]);
472 		t->ptrace_bps[i] = NULL;
473 	}
474 
475 	t->debugreg6 = 0;
476 	t->ptrace_dr7 = 0;
477 }
478 
479 void hw_breakpoint_restore(void)
480 {
481 	set_debugreg(__this_cpu_read(cpu_debugreg[0]), 0);
482 	set_debugreg(__this_cpu_read(cpu_debugreg[1]), 1);
483 	set_debugreg(__this_cpu_read(cpu_debugreg[2]), 2);
484 	set_debugreg(__this_cpu_read(cpu_debugreg[3]), 3);
485 	set_debugreg(current->thread.debugreg6, 6);
486 	set_debugreg(__this_cpu_read(cpu_dr7), 7);
487 }
488 EXPORT_SYMBOL_GPL(hw_breakpoint_restore);
489 
490 /*
491  * Handle debug exception notifications.
492  *
493  * Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below.
494  *
495  * NOTIFY_DONE returned if one of the following conditions is true.
496  * i) When the causative address is from user-space and the exception
497  * is a valid one, i.e. not triggered as a result of lazy debug register
498  * switching
499  * ii) When there are more bits than trap<n> set in DR6 register (such
500  * as BD, BS or BT) indicating that more than one debug condition is
501  * met and requires some more action in do_debug().
502  *
503  * NOTIFY_STOP returned for all other cases
504  *
505  */
506 static int hw_breakpoint_handler(struct die_args *args)
507 {
508 	int i, cpu, rc = NOTIFY_STOP;
509 	struct perf_event *bp;
510 	unsigned long dr6;
511 	unsigned long *dr6_p;
512 
513 	/* The DR6 value is pointed by args->err */
514 	dr6_p = (unsigned long *)ERR_PTR(args->err);
515 	dr6 = *dr6_p;
516 
517 	/* If it's a single step, TRAP bits are random */
518 	if (dr6 & DR_STEP)
519 		return NOTIFY_DONE;
520 
521 	/* Do an early return if no trap bits are set in DR6 */
522 	if ((dr6 & DR_TRAP_BITS) == 0)
523 		return NOTIFY_DONE;
524 
525 	/*
526 	 * Assert that local interrupts are disabled
527 	 * Reset the DRn bits in the virtualized register value.
528 	 * The ptrace trigger routine will add in whatever is needed.
529 	 */
530 	current->thread.debugreg6 &= ~DR_TRAP_BITS;
531 	cpu = get_cpu();
532 
533 	/* Handle all the breakpoints that were triggered */
534 	for (i = 0; i < HBP_NUM; ++i) {
535 		if (likely(!(dr6 & (DR_TRAP0 << i))))
536 			continue;
537 
538 		/*
539 		 * The counter may be concurrently released but that can only
540 		 * occur from a call_rcu() path. We can then safely fetch
541 		 * the breakpoint, use its callback, touch its counter
542 		 * while we are in an rcu_read_lock() path.
543 		 */
544 		rcu_read_lock();
545 
546 		bp = per_cpu(bp_per_reg[i], cpu);
547 		/*
548 		 * Reset the 'i'th TRAP bit in dr6 to denote completion of
549 		 * exception handling
550 		 */
551 		(*dr6_p) &= ~(DR_TRAP0 << i);
552 		/*
553 		 * bp can be NULL due to lazy debug register switching
554 		 * or due to concurrent perf counter removing.
555 		 */
556 		if (!bp) {
557 			rcu_read_unlock();
558 			break;
559 		}
560 
561 		perf_bp_event(bp, args->regs);
562 
563 		/*
564 		 * Set up resume flag to avoid breakpoint recursion when
565 		 * returning back to origin.
566 		 */
567 		if (bp->hw.info.type == X86_BREAKPOINT_EXECUTE)
568 			args->regs->flags |= X86_EFLAGS_RF;
569 
570 		rcu_read_unlock();
571 	}
572 	/*
573 	 * Further processing in do_debug() is needed for a) user-space
574 	 * breakpoints (to generate signals) and b) when the system has
575 	 * taken exception due to multiple causes
576 	 */
577 	if ((current->thread.debugreg6 & DR_TRAP_BITS) ||
578 	    (dr6 & (~DR_TRAP_BITS)))
579 		rc = NOTIFY_DONE;
580 
581 	put_cpu();
582 
583 	return rc;
584 }
585 
586 /*
587  * Handle debug exception notifications.
588  */
589 int hw_breakpoint_exceptions_notify(
590 		struct notifier_block *unused, unsigned long val, void *data)
591 {
592 	if (val != DIE_DEBUG)
593 		return NOTIFY_DONE;
594 
595 	return hw_breakpoint_handler(data);
596 }
597 
598 void hw_breakpoint_pmu_read(struct perf_event *bp)
599 {
600 	/* TODO */
601 }
602