1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * 4 * Copyright (C) 2007 Alan Stern 5 * Copyright (C) 2009 IBM Corporation 6 * Copyright (C) 2009 Frederic Weisbecker <fweisbec@gmail.com> 7 * 8 * Authors: Alan Stern <stern@rowland.harvard.edu> 9 * K.Prasad <prasad@linux.vnet.ibm.com> 10 * Frederic Weisbecker <fweisbec@gmail.com> 11 */ 12 13 /* 14 * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, 15 * using the CPU's debug registers. 16 */ 17 18 #include <linux/perf_event.h> 19 #include <linux/hw_breakpoint.h> 20 #include <linux/irqflags.h> 21 #include <linux/notifier.h> 22 #include <linux/kallsyms.h> 23 #include <linux/kprobes.h> 24 #include <linux/percpu.h> 25 #include <linux/kdebug.h> 26 #include <linux/kernel.h> 27 #include <linux/export.h> 28 #include <linux/sched.h> 29 #include <linux/smp.h> 30 31 #include <asm/hw_breakpoint.h> 32 #include <asm/processor.h> 33 #include <asm/debugreg.h> 34 #include <asm/user.h> 35 #include <asm/desc.h> 36 #include <asm/tlbflush.h> 37 38 /* Per cpu debug control register value */ 39 DEFINE_PER_CPU(unsigned long, cpu_dr7); 40 EXPORT_PER_CPU_SYMBOL(cpu_dr7); 41 42 /* Per cpu debug address registers values */ 43 static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]); 44 45 /* 46 * Stores the breakpoints currently in use on each breakpoint address 47 * register for each cpus 48 */ 49 static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]); 50 51 52 static inline unsigned long 53 __encode_dr7(int drnum, unsigned int len, unsigned int type) 54 { 55 unsigned long bp_info; 56 57 bp_info = (len | type) & 0xf; 58 bp_info <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE); 59 bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE)); 60 61 return bp_info; 62 } 63 64 /* 65 * Encode the length, type, Exact, and Enable bits for a particular breakpoint 66 * as stored in debug register 7. 67 */ 68 unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) 69 { 70 return __encode_dr7(drnum, len, type) | DR_GLOBAL_SLOWDOWN; 71 } 72 73 /* 74 * Decode the length and type bits for a particular breakpoint as 75 * stored in debug register 7. Return the "enabled" status. 76 */ 77 int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type) 78 { 79 int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE); 80 81 *len = (bp_info & 0xc) | 0x40; 82 *type = (bp_info & 0x3) | 0x80; 83 84 return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3; 85 } 86 87 /* 88 * Install a perf counter breakpoint. 89 * 90 * We seek a free debug address register and use it for this 91 * breakpoint. Eventually we enable it in the debug control register. 92 * 93 * Atomic: we hold the counter->ctx->lock and we only handle variables 94 * and registers local to this cpu. 95 */ 96 int arch_install_hw_breakpoint(struct perf_event *bp) 97 { 98 struct arch_hw_breakpoint *info = counter_arch_bp(bp); 99 unsigned long *dr7; 100 int i; 101 102 for (i = 0; i < HBP_NUM; i++) { 103 struct perf_event **slot = this_cpu_ptr(&bp_per_reg[i]); 104 105 if (!*slot) { 106 *slot = bp; 107 break; 108 } 109 } 110 111 if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) 112 return -EBUSY; 113 114 set_debugreg(info->address, i); 115 __this_cpu_write(cpu_debugreg[i], info->address); 116 117 dr7 = this_cpu_ptr(&cpu_dr7); 118 *dr7 |= encode_dr7(i, info->len, info->type); 119 120 set_debugreg(*dr7, 7); 121 if (info->mask) 122 set_dr_addr_mask(info->mask, i); 123 124 return 0; 125 } 126 127 /* 128 * Uninstall the breakpoint contained in the given counter. 129 * 130 * First we search the debug address register it uses and then we disable 131 * it. 132 * 133 * Atomic: we hold the counter->ctx->lock and we only handle variables 134 * and registers local to this cpu. 135 */ 136 void arch_uninstall_hw_breakpoint(struct perf_event *bp) 137 { 138 struct arch_hw_breakpoint *info = counter_arch_bp(bp); 139 unsigned long *dr7; 140 int i; 141 142 for (i = 0; i < HBP_NUM; i++) { 143 struct perf_event **slot = this_cpu_ptr(&bp_per_reg[i]); 144 145 if (*slot == bp) { 146 *slot = NULL; 147 break; 148 } 149 } 150 151 if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) 152 return; 153 154 dr7 = this_cpu_ptr(&cpu_dr7); 155 *dr7 &= ~__encode_dr7(i, info->len, info->type); 156 157 set_debugreg(*dr7, 7); 158 if (info->mask) 159 set_dr_addr_mask(0, i); 160 } 161 162 static int arch_bp_generic_len(int x86_len) 163 { 164 switch (x86_len) { 165 case X86_BREAKPOINT_LEN_1: 166 return HW_BREAKPOINT_LEN_1; 167 case X86_BREAKPOINT_LEN_2: 168 return HW_BREAKPOINT_LEN_2; 169 case X86_BREAKPOINT_LEN_4: 170 return HW_BREAKPOINT_LEN_4; 171 #ifdef CONFIG_X86_64 172 case X86_BREAKPOINT_LEN_8: 173 return HW_BREAKPOINT_LEN_8; 174 #endif 175 default: 176 return -EINVAL; 177 } 178 } 179 180 int arch_bp_generic_fields(int x86_len, int x86_type, 181 int *gen_len, int *gen_type) 182 { 183 int len; 184 185 /* Type */ 186 switch (x86_type) { 187 case X86_BREAKPOINT_EXECUTE: 188 if (x86_len != X86_BREAKPOINT_LEN_X) 189 return -EINVAL; 190 191 *gen_type = HW_BREAKPOINT_X; 192 *gen_len = sizeof(long); 193 return 0; 194 case X86_BREAKPOINT_WRITE: 195 *gen_type = HW_BREAKPOINT_W; 196 break; 197 case X86_BREAKPOINT_RW: 198 *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; 199 break; 200 default: 201 return -EINVAL; 202 } 203 204 /* Len */ 205 len = arch_bp_generic_len(x86_len); 206 if (len < 0) 207 return -EINVAL; 208 *gen_len = len; 209 210 return 0; 211 } 212 213 /* 214 * Check for virtual address in kernel space. 215 */ 216 int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw) 217 { 218 unsigned long va; 219 int len; 220 221 va = hw->address; 222 len = arch_bp_generic_len(hw->len); 223 WARN_ON_ONCE(len < 0); 224 225 /* 226 * We don't need to worry about va + len - 1 overflowing: 227 * we already require that va is aligned to a multiple of len. 228 */ 229 return (va >= TASK_SIZE_MAX) || ((va + len - 1) >= TASK_SIZE_MAX); 230 } 231 232 /* 233 * Checks whether the range [addr, end], overlaps the area [base, base + size). 234 */ 235 static inline bool within_area(unsigned long addr, unsigned long end, 236 unsigned long base, unsigned long size) 237 { 238 return end >= base && addr < (base + size); 239 } 240 241 /* 242 * Checks whether the range from addr to end, inclusive, overlaps the fixed 243 * mapped CPU entry area range or other ranges used for CPU entry. 244 */ 245 static inline bool within_cpu_entry(unsigned long addr, unsigned long end) 246 { 247 int cpu; 248 249 /* CPU entry erea is always used for CPU entry */ 250 if (within_area(addr, end, CPU_ENTRY_AREA_BASE, 251 CPU_ENTRY_AREA_TOTAL_SIZE)) 252 return true; 253 254 for_each_possible_cpu(cpu) { 255 /* The original rw GDT is being used after load_direct_gdt() */ 256 if (within_area(addr, end, (unsigned long)get_cpu_gdt_rw(cpu), 257 GDT_SIZE)) 258 return true; 259 260 /* 261 * cpu_tss_rw is not directly referenced by hardware, but 262 * cpu_tss_rw is also used in CPU entry code, 263 */ 264 if (within_area(addr, end, 265 (unsigned long)&per_cpu(cpu_tss_rw, cpu), 266 sizeof(struct tss_struct))) 267 return true; 268 269 /* 270 * cpu_tlbstate.user_pcid_flush_mask is used for CPU entry. 271 * If a data breakpoint on it, it will cause an unwanted #DB. 272 * Protect the full cpu_tlbstate structure to be sure. 273 */ 274 if (within_area(addr, end, 275 (unsigned long)&per_cpu(cpu_tlbstate, cpu), 276 sizeof(struct tlb_state))) 277 return true; 278 } 279 280 return false; 281 } 282 283 static int arch_build_bp_info(struct perf_event *bp, 284 const struct perf_event_attr *attr, 285 struct arch_hw_breakpoint *hw) 286 { 287 unsigned long bp_end; 288 289 bp_end = attr->bp_addr + attr->bp_len - 1; 290 if (bp_end < attr->bp_addr) 291 return -EINVAL; 292 293 /* 294 * Prevent any breakpoint of any type that overlaps the CPU 295 * entry area and data. This protects the IST stacks and also 296 * reduces the chance that we ever find out what happens if 297 * there's a data breakpoint on the GDT, IDT, or TSS. 298 */ 299 if (within_cpu_entry(attr->bp_addr, bp_end)) 300 return -EINVAL; 301 302 hw->address = attr->bp_addr; 303 hw->mask = 0; 304 305 /* Type */ 306 switch (attr->bp_type) { 307 case HW_BREAKPOINT_W: 308 hw->type = X86_BREAKPOINT_WRITE; 309 break; 310 case HW_BREAKPOINT_W | HW_BREAKPOINT_R: 311 hw->type = X86_BREAKPOINT_RW; 312 break; 313 case HW_BREAKPOINT_X: 314 /* 315 * We don't allow kernel breakpoints in places that are not 316 * acceptable for kprobes. On non-kprobes kernels, we don't 317 * allow kernel breakpoints at all. 318 */ 319 if (attr->bp_addr >= TASK_SIZE_MAX) { 320 if (within_kprobe_blacklist(attr->bp_addr)) 321 return -EINVAL; 322 } 323 324 hw->type = X86_BREAKPOINT_EXECUTE; 325 /* 326 * x86 inst breakpoints need to have a specific undefined len. 327 * But we still need to check userspace is not trying to setup 328 * an unsupported length, to get a range breakpoint for example. 329 */ 330 if (attr->bp_len == sizeof(long)) { 331 hw->len = X86_BREAKPOINT_LEN_X; 332 return 0; 333 } 334 /* fall through */ 335 default: 336 return -EINVAL; 337 } 338 339 /* Len */ 340 switch (attr->bp_len) { 341 case HW_BREAKPOINT_LEN_1: 342 hw->len = X86_BREAKPOINT_LEN_1; 343 break; 344 case HW_BREAKPOINT_LEN_2: 345 hw->len = X86_BREAKPOINT_LEN_2; 346 break; 347 case HW_BREAKPOINT_LEN_4: 348 hw->len = X86_BREAKPOINT_LEN_4; 349 break; 350 #ifdef CONFIG_X86_64 351 case HW_BREAKPOINT_LEN_8: 352 hw->len = X86_BREAKPOINT_LEN_8; 353 break; 354 #endif 355 default: 356 /* AMD range breakpoint */ 357 if (!is_power_of_2(attr->bp_len)) 358 return -EINVAL; 359 if (attr->bp_addr & (attr->bp_len - 1)) 360 return -EINVAL; 361 362 if (!boot_cpu_has(X86_FEATURE_BPEXT)) 363 return -EOPNOTSUPP; 364 365 /* 366 * It's impossible to use a range breakpoint to fake out 367 * user vs kernel detection because bp_len - 1 can't 368 * have the high bit set. If we ever allow range instruction 369 * breakpoints, then we'll have to check for kprobe-blacklisted 370 * addresses anywhere in the range. 371 */ 372 hw->mask = attr->bp_len - 1; 373 hw->len = X86_BREAKPOINT_LEN_1; 374 } 375 376 return 0; 377 } 378 379 /* 380 * Validate the arch-specific HW Breakpoint register settings 381 */ 382 int hw_breakpoint_arch_parse(struct perf_event *bp, 383 const struct perf_event_attr *attr, 384 struct arch_hw_breakpoint *hw) 385 { 386 unsigned int align; 387 int ret; 388 389 390 ret = arch_build_bp_info(bp, attr, hw); 391 if (ret) 392 return ret; 393 394 switch (hw->len) { 395 case X86_BREAKPOINT_LEN_1: 396 align = 0; 397 if (hw->mask) 398 align = hw->mask; 399 break; 400 case X86_BREAKPOINT_LEN_2: 401 align = 1; 402 break; 403 case X86_BREAKPOINT_LEN_4: 404 align = 3; 405 break; 406 #ifdef CONFIG_X86_64 407 case X86_BREAKPOINT_LEN_8: 408 align = 7; 409 break; 410 #endif 411 default: 412 WARN_ON_ONCE(1); 413 return -EINVAL; 414 } 415 416 /* 417 * Check that the low-order bits of the address are appropriate 418 * for the alignment implied by len. 419 */ 420 if (hw->address & align) 421 return -EINVAL; 422 423 return 0; 424 } 425 426 /* 427 * Dump the debug register contents to the user. 428 * We can't dump our per cpu values because it 429 * may contain cpu wide breakpoint, something that 430 * doesn't belong to the current task. 431 * 432 * TODO: include non-ptrace user breakpoints (perf) 433 */ 434 void aout_dump_debugregs(struct user *dump) 435 { 436 int i; 437 int dr7 = 0; 438 struct perf_event *bp; 439 struct arch_hw_breakpoint *info; 440 struct thread_struct *thread = ¤t->thread; 441 442 for (i = 0; i < HBP_NUM; i++) { 443 bp = thread->ptrace_bps[i]; 444 445 if (bp && !bp->attr.disabled) { 446 dump->u_debugreg[i] = bp->attr.bp_addr; 447 info = counter_arch_bp(bp); 448 dr7 |= encode_dr7(i, info->len, info->type); 449 } else { 450 dump->u_debugreg[i] = 0; 451 } 452 } 453 454 dump->u_debugreg[4] = 0; 455 dump->u_debugreg[5] = 0; 456 dump->u_debugreg[6] = current->thread.debugreg6; 457 458 dump->u_debugreg[7] = dr7; 459 } 460 EXPORT_SYMBOL_GPL(aout_dump_debugregs); 461 462 /* 463 * Release the user breakpoints used by ptrace 464 */ 465 void flush_ptrace_hw_breakpoint(struct task_struct *tsk) 466 { 467 int i; 468 struct thread_struct *t = &tsk->thread; 469 470 for (i = 0; i < HBP_NUM; i++) { 471 unregister_hw_breakpoint(t->ptrace_bps[i]); 472 t->ptrace_bps[i] = NULL; 473 } 474 475 t->debugreg6 = 0; 476 t->ptrace_dr7 = 0; 477 } 478 479 void hw_breakpoint_restore(void) 480 { 481 set_debugreg(__this_cpu_read(cpu_debugreg[0]), 0); 482 set_debugreg(__this_cpu_read(cpu_debugreg[1]), 1); 483 set_debugreg(__this_cpu_read(cpu_debugreg[2]), 2); 484 set_debugreg(__this_cpu_read(cpu_debugreg[3]), 3); 485 set_debugreg(current->thread.debugreg6, 6); 486 set_debugreg(__this_cpu_read(cpu_dr7), 7); 487 } 488 EXPORT_SYMBOL_GPL(hw_breakpoint_restore); 489 490 /* 491 * Handle debug exception notifications. 492 * 493 * Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below. 494 * 495 * NOTIFY_DONE returned if one of the following conditions is true. 496 * i) When the causative address is from user-space and the exception 497 * is a valid one, i.e. not triggered as a result of lazy debug register 498 * switching 499 * ii) When there are more bits than trap<n> set in DR6 register (such 500 * as BD, BS or BT) indicating that more than one debug condition is 501 * met and requires some more action in do_debug(). 502 * 503 * NOTIFY_STOP returned for all other cases 504 * 505 */ 506 static int hw_breakpoint_handler(struct die_args *args) 507 { 508 int i, cpu, rc = NOTIFY_STOP; 509 struct perf_event *bp; 510 unsigned long dr6; 511 unsigned long *dr6_p; 512 513 /* The DR6 value is pointed by args->err */ 514 dr6_p = (unsigned long *)ERR_PTR(args->err); 515 dr6 = *dr6_p; 516 517 /* If it's a single step, TRAP bits are random */ 518 if (dr6 & DR_STEP) 519 return NOTIFY_DONE; 520 521 /* Do an early return if no trap bits are set in DR6 */ 522 if ((dr6 & DR_TRAP_BITS) == 0) 523 return NOTIFY_DONE; 524 525 /* 526 * Assert that local interrupts are disabled 527 * Reset the DRn bits in the virtualized register value. 528 * The ptrace trigger routine will add in whatever is needed. 529 */ 530 current->thread.debugreg6 &= ~DR_TRAP_BITS; 531 cpu = get_cpu(); 532 533 /* Handle all the breakpoints that were triggered */ 534 for (i = 0; i < HBP_NUM; ++i) { 535 if (likely(!(dr6 & (DR_TRAP0 << i)))) 536 continue; 537 538 /* 539 * The counter may be concurrently released but that can only 540 * occur from a call_rcu() path. We can then safely fetch 541 * the breakpoint, use its callback, touch its counter 542 * while we are in an rcu_read_lock() path. 543 */ 544 rcu_read_lock(); 545 546 bp = per_cpu(bp_per_reg[i], cpu); 547 /* 548 * Reset the 'i'th TRAP bit in dr6 to denote completion of 549 * exception handling 550 */ 551 (*dr6_p) &= ~(DR_TRAP0 << i); 552 /* 553 * bp can be NULL due to lazy debug register switching 554 * or due to concurrent perf counter removing. 555 */ 556 if (!bp) { 557 rcu_read_unlock(); 558 break; 559 } 560 561 perf_bp_event(bp, args->regs); 562 563 /* 564 * Set up resume flag to avoid breakpoint recursion when 565 * returning back to origin. 566 */ 567 if (bp->hw.info.type == X86_BREAKPOINT_EXECUTE) 568 args->regs->flags |= X86_EFLAGS_RF; 569 570 rcu_read_unlock(); 571 } 572 /* 573 * Further processing in do_debug() is needed for a) user-space 574 * breakpoints (to generate signals) and b) when the system has 575 * taken exception due to multiple causes 576 */ 577 if ((current->thread.debugreg6 & DR_TRAP_BITS) || 578 (dr6 & (~DR_TRAP_BITS))) 579 rc = NOTIFY_DONE; 580 581 put_cpu(); 582 583 return rc; 584 } 585 586 /* 587 * Handle debug exception notifications. 588 */ 589 int hw_breakpoint_exceptions_notify( 590 struct notifier_block *unused, unsigned long val, void *data) 591 { 592 if (val != DIE_DEBUG) 593 return NOTIFY_DONE; 594 595 return hw_breakpoint_handler(data); 596 } 597 598 void hw_breakpoint_pmu_read(struct perf_event *bp) 599 { 600 /* TODO */ 601 } 602