1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * 4 * Copyright (C) 2007 Alan Stern 5 * Copyright (C) 2009 IBM Corporation 6 * Copyright (C) 2009 Frederic Weisbecker <fweisbec@gmail.com> 7 * 8 * Authors: Alan Stern <stern@rowland.harvard.edu> 9 * K.Prasad <prasad@linux.vnet.ibm.com> 10 * Frederic Weisbecker <fweisbec@gmail.com> 11 */ 12 13 /* 14 * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, 15 * using the CPU's debug registers. 16 */ 17 18 #include <linux/perf_event.h> 19 #include <linux/hw_breakpoint.h> 20 #include <linux/irqflags.h> 21 #include <linux/notifier.h> 22 #include <linux/kallsyms.h> 23 #include <linux/kprobes.h> 24 #include <linux/percpu.h> 25 #include <linux/kdebug.h> 26 #include <linux/kernel.h> 27 #include <linux/export.h> 28 #include <linux/sched.h> 29 #include <linux/smp.h> 30 31 #include <asm/hw_breakpoint.h> 32 #include <asm/processor.h> 33 #include <asm/debugreg.h> 34 #include <asm/user.h> 35 36 /* Per cpu debug control register value */ 37 DEFINE_PER_CPU(unsigned long, cpu_dr7); 38 EXPORT_PER_CPU_SYMBOL(cpu_dr7); 39 40 /* Per cpu debug address registers values */ 41 static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]); 42 43 /* 44 * Stores the breakpoints currently in use on each breakpoint address 45 * register for each cpus 46 */ 47 static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]); 48 49 50 static inline unsigned long 51 __encode_dr7(int drnum, unsigned int len, unsigned int type) 52 { 53 unsigned long bp_info; 54 55 bp_info = (len | type) & 0xf; 56 bp_info <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE); 57 bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE)); 58 59 return bp_info; 60 } 61 62 /* 63 * Encode the length, type, Exact, and Enable bits for a particular breakpoint 64 * as stored in debug register 7. 65 */ 66 unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) 67 { 68 return __encode_dr7(drnum, len, type) | DR_GLOBAL_SLOWDOWN; 69 } 70 71 /* 72 * Decode the length and type bits for a particular breakpoint as 73 * stored in debug register 7. Return the "enabled" status. 74 */ 75 int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type) 76 { 77 int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE); 78 79 *len = (bp_info & 0xc) | 0x40; 80 *type = (bp_info & 0x3) | 0x80; 81 82 return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3; 83 } 84 85 /* 86 * Install a perf counter breakpoint. 87 * 88 * We seek a free debug address register and use it for this 89 * breakpoint. Eventually we enable it in the debug control register. 90 * 91 * Atomic: we hold the counter->ctx->lock and we only handle variables 92 * and registers local to this cpu. 93 */ 94 int arch_install_hw_breakpoint(struct perf_event *bp) 95 { 96 struct arch_hw_breakpoint *info = counter_arch_bp(bp); 97 unsigned long *dr7; 98 int i; 99 100 for (i = 0; i < HBP_NUM; i++) { 101 struct perf_event **slot = this_cpu_ptr(&bp_per_reg[i]); 102 103 if (!*slot) { 104 *slot = bp; 105 break; 106 } 107 } 108 109 if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) 110 return -EBUSY; 111 112 set_debugreg(info->address, i); 113 __this_cpu_write(cpu_debugreg[i], info->address); 114 115 dr7 = this_cpu_ptr(&cpu_dr7); 116 *dr7 |= encode_dr7(i, info->len, info->type); 117 118 set_debugreg(*dr7, 7); 119 if (info->mask) 120 set_dr_addr_mask(info->mask, i); 121 122 return 0; 123 } 124 125 /* 126 * Uninstall the breakpoint contained in the given counter. 127 * 128 * First we search the debug address register it uses and then we disable 129 * it. 130 * 131 * Atomic: we hold the counter->ctx->lock and we only handle variables 132 * and registers local to this cpu. 133 */ 134 void arch_uninstall_hw_breakpoint(struct perf_event *bp) 135 { 136 struct arch_hw_breakpoint *info = counter_arch_bp(bp); 137 unsigned long *dr7; 138 int i; 139 140 for (i = 0; i < HBP_NUM; i++) { 141 struct perf_event **slot = this_cpu_ptr(&bp_per_reg[i]); 142 143 if (*slot == bp) { 144 *slot = NULL; 145 break; 146 } 147 } 148 149 if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) 150 return; 151 152 dr7 = this_cpu_ptr(&cpu_dr7); 153 *dr7 &= ~__encode_dr7(i, info->len, info->type); 154 155 set_debugreg(*dr7, 7); 156 if (info->mask) 157 set_dr_addr_mask(0, i); 158 } 159 160 static int arch_bp_generic_len(int x86_len) 161 { 162 switch (x86_len) { 163 case X86_BREAKPOINT_LEN_1: 164 return HW_BREAKPOINT_LEN_1; 165 case X86_BREAKPOINT_LEN_2: 166 return HW_BREAKPOINT_LEN_2; 167 case X86_BREAKPOINT_LEN_4: 168 return HW_BREAKPOINT_LEN_4; 169 #ifdef CONFIG_X86_64 170 case X86_BREAKPOINT_LEN_8: 171 return HW_BREAKPOINT_LEN_8; 172 #endif 173 default: 174 return -EINVAL; 175 } 176 } 177 178 int arch_bp_generic_fields(int x86_len, int x86_type, 179 int *gen_len, int *gen_type) 180 { 181 int len; 182 183 /* Type */ 184 switch (x86_type) { 185 case X86_BREAKPOINT_EXECUTE: 186 if (x86_len != X86_BREAKPOINT_LEN_X) 187 return -EINVAL; 188 189 *gen_type = HW_BREAKPOINT_X; 190 *gen_len = sizeof(long); 191 return 0; 192 case X86_BREAKPOINT_WRITE: 193 *gen_type = HW_BREAKPOINT_W; 194 break; 195 case X86_BREAKPOINT_RW: 196 *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; 197 break; 198 default: 199 return -EINVAL; 200 } 201 202 /* Len */ 203 len = arch_bp_generic_len(x86_len); 204 if (len < 0) 205 return -EINVAL; 206 *gen_len = len; 207 208 return 0; 209 } 210 211 /* 212 * Check for virtual address in kernel space. 213 */ 214 int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw) 215 { 216 unsigned long va; 217 int len; 218 219 va = hw->address; 220 len = arch_bp_generic_len(hw->len); 221 WARN_ON_ONCE(len < 0); 222 223 /* 224 * We don't need to worry about va + len - 1 overflowing: 225 * we already require that va is aligned to a multiple of len. 226 */ 227 return (va >= TASK_SIZE_MAX) || ((va + len - 1) >= TASK_SIZE_MAX); 228 } 229 230 /* 231 * Checks whether the range from addr to end, inclusive, overlaps the CPU 232 * entry area range. 233 */ 234 static inline bool within_cpu_entry_area(unsigned long addr, unsigned long end) 235 { 236 return end >= CPU_ENTRY_AREA_BASE && 237 addr < (CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_TOTAL_SIZE); 238 } 239 240 static int arch_build_bp_info(struct perf_event *bp, 241 const struct perf_event_attr *attr, 242 struct arch_hw_breakpoint *hw) 243 { 244 unsigned long bp_end; 245 246 bp_end = attr->bp_addr + attr->bp_len - 1; 247 if (bp_end < attr->bp_addr) 248 return -EINVAL; 249 250 /* 251 * Prevent any breakpoint of any type that overlaps the 252 * cpu_entry_area. This protects the IST stacks and also 253 * reduces the chance that we ever find out what happens if 254 * there's a data breakpoint on the GDT, IDT, or TSS. 255 */ 256 if (within_cpu_entry_area(attr->bp_addr, bp_end)) 257 return -EINVAL; 258 259 hw->address = attr->bp_addr; 260 hw->mask = 0; 261 262 /* Type */ 263 switch (attr->bp_type) { 264 case HW_BREAKPOINT_W: 265 hw->type = X86_BREAKPOINT_WRITE; 266 break; 267 case HW_BREAKPOINT_W | HW_BREAKPOINT_R: 268 hw->type = X86_BREAKPOINT_RW; 269 break; 270 case HW_BREAKPOINT_X: 271 /* 272 * We don't allow kernel breakpoints in places that are not 273 * acceptable for kprobes. On non-kprobes kernels, we don't 274 * allow kernel breakpoints at all. 275 */ 276 if (attr->bp_addr >= TASK_SIZE_MAX) { 277 if (within_kprobe_blacklist(attr->bp_addr)) 278 return -EINVAL; 279 } 280 281 hw->type = X86_BREAKPOINT_EXECUTE; 282 /* 283 * x86 inst breakpoints need to have a specific undefined len. 284 * But we still need to check userspace is not trying to setup 285 * an unsupported length, to get a range breakpoint for example. 286 */ 287 if (attr->bp_len == sizeof(long)) { 288 hw->len = X86_BREAKPOINT_LEN_X; 289 return 0; 290 } 291 /* fall through */ 292 default: 293 return -EINVAL; 294 } 295 296 /* Len */ 297 switch (attr->bp_len) { 298 case HW_BREAKPOINT_LEN_1: 299 hw->len = X86_BREAKPOINT_LEN_1; 300 break; 301 case HW_BREAKPOINT_LEN_2: 302 hw->len = X86_BREAKPOINT_LEN_2; 303 break; 304 case HW_BREAKPOINT_LEN_4: 305 hw->len = X86_BREAKPOINT_LEN_4; 306 break; 307 #ifdef CONFIG_X86_64 308 case HW_BREAKPOINT_LEN_8: 309 hw->len = X86_BREAKPOINT_LEN_8; 310 break; 311 #endif 312 default: 313 /* AMD range breakpoint */ 314 if (!is_power_of_2(attr->bp_len)) 315 return -EINVAL; 316 if (attr->bp_addr & (attr->bp_len - 1)) 317 return -EINVAL; 318 319 if (!boot_cpu_has(X86_FEATURE_BPEXT)) 320 return -EOPNOTSUPP; 321 322 /* 323 * It's impossible to use a range breakpoint to fake out 324 * user vs kernel detection because bp_len - 1 can't 325 * have the high bit set. If we ever allow range instruction 326 * breakpoints, then we'll have to check for kprobe-blacklisted 327 * addresses anywhere in the range. 328 */ 329 hw->mask = attr->bp_len - 1; 330 hw->len = X86_BREAKPOINT_LEN_1; 331 } 332 333 return 0; 334 } 335 336 /* 337 * Validate the arch-specific HW Breakpoint register settings 338 */ 339 int hw_breakpoint_arch_parse(struct perf_event *bp, 340 const struct perf_event_attr *attr, 341 struct arch_hw_breakpoint *hw) 342 { 343 unsigned int align; 344 int ret; 345 346 347 ret = arch_build_bp_info(bp, attr, hw); 348 if (ret) 349 return ret; 350 351 switch (hw->len) { 352 case X86_BREAKPOINT_LEN_1: 353 align = 0; 354 if (hw->mask) 355 align = hw->mask; 356 break; 357 case X86_BREAKPOINT_LEN_2: 358 align = 1; 359 break; 360 case X86_BREAKPOINT_LEN_4: 361 align = 3; 362 break; 363 #ifdef CONFIG_X86_64 364 case X86_BREAKPOINT_LEN_8: 365 align = 7; 366 break; 367 #endif 368 default: 369 WARN_ON_ONCE(1); 370 return -EINVAL; 371 } 372 373 /* 374 * Check that the low-order bits of the address are appropriate 375 * for the alignment implied by len. 376 */ 377 if (hw->address & align) 378 return -EINVAL; 379 380 return 0; 381 } 382 383 /* 384 * Dump the debug register contents to the user. 385 * We can't dump our per cpu values because it 386 * may contain cpu wide breakpoint, something that 387 * doesn't belong to the current task. 388 * 389 * TODO: include non-ptrace user breakpoints (perf) 390 */ 391 void aout_dump_debugregs(struct user *dump) 392 { 393 int i; 394 int dr7 = 0; 395 struct perf_event *bp; 396 struct arch_hw_breakpoint *info; 397 struct thread_struct *thread = ¤t->thread; 398 399 for (i = 0; i < HBP_NUM; i++) { 400 bp = thread->ptrace_bps[i]; 401 402 if (bp && !bp->attr.disabled) { 403 dump->u_debugreg[i] = bp->attr.bp_addr; 404 info = counter_arch_bp(bp); 405 dr7 |= encode_dr7(i, info->len, info->type); 406 } else { 407 dump->u_debugreg[i] = 0; 408 } 409 } 410 411 dump->u_debugreg[4] = 0; 412 dump->u_debugreg[5] = 0; 413 dump->u_debugreg[6] = current->thread.debugreg6; 414 415 dump->u_debugreg[7] = dr7; 416 } 417 EXPORT_SYMBOL_GPL(aout_dump_debugregs); 418 419 /* 420 * Release the user breakpoints used by ptrace 421 */ 422 void flush_ptrace_hw_breakpoint(struct task_struct *tsk) 423 { 424 int i; 425 struct thread_struct *t = &tsk->thread; 426 427 for (i = 0; i < HBP_NUM; i++) { 428 unregister_hw_breakpoint(t->ptrace_bps[i]); 429 t->ptrace_bps[i] = NULL; 430 } 431 432 t->debugreg6 = 0; 433 t->ptrace_dr7 = 0; 434 } 435 436 void hw_breakpoint_restore(void) 437 { 438 set_debugreg(__this_cpu_read(cpu_debugreg[0]), 0); 439 set_debugreg(__this_cpu_read(cpu_debugreg[1]), 1); 440 set_debugreg(__this_cpu_read(cpu_debugreg[2]), 2); 441 set_debugreg(__this_cpu_read(cpu_debugreg[3]), 3); 442 set_debugreg(current->thread.debugreg6, 6); 443 set_debugreg(__this_cpu_read(cpu_dr7), 7); 444 } 445 EXPORT_SYMBOL_GPL(hw_breakpoint_restore); 446 447 /* 448 * Handle debug exception notifications. 449 * 450 * Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below. 451 * 452 * NOTIFY_DONE returned if one of the following conditions is true. 453 * i) When the causative address is from user-space and the exception 454 * is a valid one, i.e. not triggered as a result of lazy debug register 455 * switching 456 * ii) When there are more bits than trap<n> set in DR6 register (such 457 * as BD, BS or BT) indicating that more than one debug condition is 458 * met and requires some more action in do_debug(). 459 * 460 * NOTIFY_STOP returned for all other cases 461 * 462 */ 463 static int hw_breakpoint_handler(struct die_args *args) 464 { 465 int i, cpu, rc = NOTIFY_STOP; 466 struct perf_event *bp; 467 unsigned long dr6; 468 unsigned long *dr6_p; 469 470 /* The DR6 value is pointed by args->err */ 471 dr6_p = (unsigned long *)ERR_PTR(args->err); 472 dr6 = *dr6_p; 473 474 /* If it's a single step, TRAP bits are random */ 475 if (dr6 & DR_STEP) 476 return NOTIFY_DONE; 477 478 /* Do an early return if no trap bits are set in DR6 */ 479 if ((dr6 & DR_TRAP_BITS) == 0) 480 return NOTIFY_DONE; 481 482 /* 483 * Assert that local interrupts are disabled 484 * Reset the DRn bits in the virtualized register value. 485 * The ptrace trigger routine will add in whatever is needed. 486 */ 487 current->thread.debugreg6 &= ~DR_TRAP_BITS; 488 cpu = get_cpu(); 489 490 /* Handle all the breakpoints that were triggered */ 491 for (i = 0; i < HBP_NUM; ++i) { 492 if (likely(!(dr6 & (DR_TRAP0 << i)))) 493 continue; 494 495 /* 496 * The counter may be concurrently released but that can only 497 * occur from a call_rcu() path. We can then safely fetch 498 * the breakpoint, use its callback, touch its counter 499 * while we are in an rcu_read_lock() path. 500 */ 501 rcu_read_lock(); 502 503 bp = per_cpu(bp_per_reg[i], cpu); 504 /* 505 * Reset the 'i'th TRAP bit in dr6 to denote completion of 506 * exception handling 507 */ 508 (*dr6_p) &= ~(DR_TRAP0 << i); 509 /* 510 * bp can be NULL due to lazy debug register switching 511 * or due to concurrent perf counter removing. 512 */ 513 if (!bp) { 514 rcu_read_unlock(); 515 break; 516 } 517 518 perf_bp_event(bp, args->regs); 519 520 /* 521 * Set up resume flag to avoid breakpoint recursion when 522 * returning back to origin. 523 */ 524 if (bp->hw.info.type == X86_BREAKPOINT_EXECUTE) 525 args->regs->flags |= X86_EFLAGS_RF; 526 527 rcu_read_unlock(); 528 } 529 /* 530 * Further processing in do_debug() is needed for a) user-space 531 * breakpoints (to generate signals) and b) when the system has 532 * taken exception due to multiple causes 533 */ 534 if ((current->thread.debugreg6 & DR_TRAP_BITS) || 535 (dr6 & (~DR_TRAP_BITS))) 536 rc = NOTIFY_DONE; 537 538 put_cpu(); 539 540 return rc; 541 } 542 543 /* 544 * Handle debug exception notifications. 545 */ 546 int hw_breakpoint_exceptions_notify( 547 struct notifier_block *unused, unsigned long val, void *data) 548 { 549 if (val != DIE_DEBUG) 550 return NOTIFY_DONE; 551 552 return hw_breakpoint_handler(data); 553 } 554 555 void hw_breakpoint_pmu_read(struct perf_event *bp) 556 { 557 /* TODO */ 558 } 559