1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * 4 * Copyright (C) 2007 Alan Stern 5 * Copyright (C) 2009 IBM Corporation 6 * Copyright (C) 2009 Frederic Weisbecker <fweisbec@gmail.com> 7 * 8 * Authors: Alan Stern <stern@rowland.harvard.edu> 9 * K.Prasad <prasad@linux.vnet.ibm.com> 10 * Frederic Weisbecker <fweisbec@gmail.com> 11 */ 12 13 /* 14 * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, 15 * using the CPU's debug registers. 16 */ 17 18 #include <linux/perf_event.h> 19 #include <linux/hw_breakpoint.h> 20 #include <linux/irqflags.h> 21 #include <linux/notifier.h> 22 #include <linux/kallsyms.h> 23 #include <linux/kprobes.h> 24 #include <linux/percpu.h> 25 #include <linux/kdebug.h> 26 #include <linux/kernel.h> 27 #include <linux/export.h> 28 #include <linux/sched.h> 29 #include <linux/smp.h> 30 31 #include <asm/hw_breakpoint.h> 32 #include <asm/processor.h> 33 #include <asm/debugreg.h> 34 #include <asm/user.h> 35 #include <asm/desc.h> 36 37 /* Per cpu debug control register value */ 38 DEFINE_PER_CPU(unsigned long, cpu_dr7); 39 EXPORT_PER_CPU_SYMBOL(cpu_dr7); 40 41 /* Per cpu debug address registers values */ 42 static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]); 43 44 /* 45 * Stores the breakpoints currently in use on each breakpoint address 46 * register for each cpus 47 */ 48 static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]); 49 50 51 static inline unsigned long 52 __encode_dr7(int drnum, unsigned int len, unsigned int type) 53 { 54 unsigned long bp_info; 55 56 bp_info = (len | type) & 0xf; 57 bp_info <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE); 58 bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE)); 59 60 return bp_info; 61 } 62 63 /* 64 * Encode the length, type, Exact, and Enable bits for a particular breakpoint 65 * as stored in debug register 7. 66 */ 67 unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) 68 { 69 return __encode_dr7(drnum, len, type) | DR_GLOBAL_SLOWDOWN; 70 } 71 72 /* 73 * Decode the length and type bits for a particular breakpoint as 74 * stored in debug register 7. Return the "enabled" status. 75 */ 76 int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type) 77 { 78 int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE); 79 80 *len = (bp_info & 0xc) | 0x40; 81 *type = (bp_info & 0x3) | 0x80; 82 83 return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3; 84 } 85 86 /* 87 * Install a perf counter breakpoint. 88 * 89 * We seek a free debug address register and use it for this 90 * breakpoint. Eventually we enable it in the debug control register. 91 * 92 * Atomic: we hold the counter->ctx->lock and we only handle variables 93 * and registers local to this cpu. 94 */ 95 int arch_install_hw_breakpoint(struct perf_event *bp) 96 { 97 struct arch_hw_breakpoint *info = counter_arch_bp(bp); 98 unsigned long *dr7; 99 int i; 100 101 for (i = 0; i < HBP_NUM; i++) { 102 struct perf_event **slot = this_cpu_ptr(&bp_per_reg[i]); 103 104 if (!*slot) { 105 *slot = bp; 106 break; 107 } 108 } 109 110 if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) 111 return -EBUSY; 112 113 set_debugreg(info->address, i); 114 __this_cpu_write(cpu_debugreg[i], info->address); 115 116 dr7 = this_cpu_ptr(&cpu_dr7); 117 *dr7 |= encode_dr7(i, info->len, info->type); 118 119 set_debugreg(*dr7, 7); 120 if (info->mask) 121 set_dr_addr_mask(info->mask, i); 122 123 return 0; 124 } 125 126 /* 127 * Uninstall the breakpoint contained in the given counter. 128 * 129 * First we search the debug address register it uses and then we disable 130 * it. 131 * 132 * Atomic: we hold the counter->ctx->lock and we only handle variables 133 * and registers local to this cpu. 134 */ 135 void arch_uninstall_hw_breakpoint(struct perf_event *bp) 136 { 137 struct arch_hw_breakpoint *info = counter_arch_bp(bp); 138 unsigned long *dr7; 139 int i; 140 141 for (i = 0; i < HBP_NUM; i++) { 142 struct perf_event **slot = this_cpu_ptr(&bp_per_reg[i]); 143 144 if (*slot == bp) { 145 *slot = NULL; 146 break; 147 } 148 } 149 150 if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) 151 return; 152 153 dr7 = this_cpu_ptr(&cpu_dr7); 154 *dr7 &= ~__encode_dr7(i, info->len, info->type); 155 156 set_debugreg(*dr7, 7); 157 if (info->mask) 158 set_dr_addr_mask(0, i); 159 } 160 161 static int arch_bp_generic_len(int x86_len) 162 { 163 switch (x86_len) { 164 case X86_BREAKPOINT_LEN_1: 165 return HW_BREAKPOINT_LEN_1; 166 case X86_BREAKPOINT_LEN_2: 167 return HW_BREAKPOINT_LEN_2; 168 case X86_BREAKPOINT_LEN_4: 169 return HW_BREAKPOINT_LEN_4; 170 #ifdef CONFIG_X86_64 171 case X86_BREAKPOINT_LEN_8: 172 return HW_BREAKPOINT_LEN_8; 173 #endif 174 default: 175 return -EINVAL; 176 } 177 } 178 179 int arch_bp_generic_fields(int x86_len, int x86_type, 180 int *gen_len, int *gen_type) 181 { 182 int len; 183 184 /* Type */ 185 switch (x86_type) { 186 case X86_BREAKPOINT_EXECUTE: 187 if (x86_len != X86_BREAKPOINT_LEN_X) 188 return -EINVAL; 189 190 *gen_type = HW_BREAKPOINT_X; 191 *gen_len = sizeof(long); 192 return 0; 193 case X86_BREAKPOINT_WRITE: 194 *gen_type = HW_BREAKPOINT_W; 195 break; 196 case X86_BREAKPOINT_RW: 197 *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; 198 break; 199 default: 200 return -EINVAL; 201 } 202 203 /* Len */ 204 len = arch_bp_generic_len(x86_len); 205 if (len < 0) 206 return -EINVAL; 207 *gen_len = len; 208 209 return 0; 210 } 211 212 /* 213 * Check for virtual address in kernel space. 214 */ 215 int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw) 216 { 217 unsigned long va; 218 int len; 219 220 va = hw->address; 221 len = arch_bp_generic_len(hw->len); 222 WARN_ON_ONCE(len < 0); 223 224 /* 225 * We don't need to worry about va + len - 1 overflowing: 226 * we already require that va is aligned to a multiple of len. 227 */ 228 return (va >= TASK_SIZE_MAX) || ((va + len - 1) >= TASK_SIZE_MAX); 229 } 230 231 /* 232 * Checks whether the range [addr, end], overlaps the area [base, base + size). 233 */ 234 static inline bool within_area(unsigned long addr, unsigned long end, 235 unsigned long base, unsigned long size) 236 { 237 return end >= base && addr < (base + size); 238 } 239 240 /* 241 * Checks whether the range from addr to end, inclusive, overlaps the fixed 242 * mapped CPU entry area range or other ranges used for CPU entry. 243 */ 244 static inline bool within_cpu_entry(unsigned long addr, unsigned long end) 245 { 246 int cpu; 247 248 /* CPU entry erea is always used for CPU entry */ 249 if (within_area(addr, end, CPU_ENTRY_AREA_BASE, 250 CPU_ENTRY_AREA_TOTAL_SIZE)) 251 return true; 252 253 for_each_possible_cpu(cpu) { 254 /* The original rw GDT is being used after load_direct_gdt() */ 255 if (within_area(addr, end, (unsigned long)get_cpu_gdt_rw(cpu), 256 GDT_SIZE)) 257 return true; 258 259 /* 260 * cpu_tss_rw is not directly referenced by hardware, but 261 * cpu_tss_rw is also used in CPU entry code, 262 */ 263 if (within_area(addr, end, 264 (unsigned long)&per_cpu(cpu_tss_rw, cpu), 265 sizeof(struct tss_struct))) 266 return true; 267 } 268 269 return false; 270 } 271 272 static int arch_build_bp_info(struct perf_event *bp, 273 const struct perf_event_attr *attr, 274 struct arch_hw_breakpoint *hw) 275 { 276 unsigned long bp_end; 277 278 bp_end = attr->bp_addr + attr->bp_len - 1; 279 if (bp_end < attr->bp_addr) 280 return -EINVAL; 281 282 /* 283 * Prevent any breakpoint of any type that overlaps the CPU 284 * entry area and data. This protects the IST stacks and also 285 * reduces the chance that we ever find out what happens if 286 * there's a data breakpoint on the GDT, IDT, or TSS. 287 */ 288 if (within_cpu_entry(attr->bp_addr, bp_end)) 289 return -EINVAL; 290 291 hw->address = attr->bp_addr; 292 hw->mask = 0; 293 294 /* Type */ 295 switch (attr->bp_type) { 296 case HW_BREAKPOINT_W: 297 hw->type = X86_BREAKPOINT_WRITE; 298 break; 299 case HW_BREAKPOINT_W | HW_BREAKPOINT_R: 300 hw->type = X86_BREAKPOINT_RW; 301 break; 302 case HW_BREAKPOINT_X: 303 /* 304 * We don't allow kernel breakpoints in places that are not 305 * acceptable for kprobes. On non-kprobes kernels, we don't 306 * allow kernel breakpoints at all. 307 */ 308 if (attr->bp_addr >= TASK_SIZE_MAX) { 309 if (within_kprobe_blacklist(attr->bp_addr)) 310 return -EINVAL; 311 } 312 313 hw->type = X86_BREAKPOINT_EXECUTE; 314 /* 315 * x86 inst breakpoints need to have a specific undefined len. 316 * But we still need to check userspace is not trying to setup 317 * an unsupported length, to get a range breakpoint for example. 318 */ 319 if (attr->bp_len == sizeof(long)) { 320 hw->len = X86_BREAKPOINT_LEN_X; 321 return 0; 322 } 323 /* fall through */ 324 default: 325 return -EINVAL; 326 } 327 328 /* Len */ 329 switch (attr->bp_len) { 330 case HW_BREAKPOINT_LEN_1: 331 hw->len = X86_BREAKPOINT_LEN_1; 332 break; 333 case HW_BREAKPOINT_LEN_2: 334 hw->len = X86_BREAKPOINT_LEN_2; 335 break; 336 case HW_BREAKPOINT_LEN_4: 337 hw->len = X86_BREAKPOINT_LEN_4; 338 break; 339 #ifdef CONFIG_X86_64 340 case HW_BREAKPOINT_LEN_8: 341 hw->len = X86_BREAKPOINT_LEN_8; 342 break; 343 #endif 344 default: 345 /* AMD range breakpoint */ 346 if (!is_power_of_2(attr->bp_len)) 347 return -EINVAL; 348 if (attr->bp_addr & (attr->bp_len - 1)) 349 return -EINVAL; 350 351 if (!boot_cpu_has(X86_FEATURE_BPEXT)) 352 return -EOPNOTSUPP; 353 354 /* 355 * It's impossible to use a range breakpoint to fake out 356 * user vs kernel detection because bp_len - 1 can't 357 * have the high bit set. If we ever allow range instruction 358 * breakpoints, then we'll have to check for kprobe-blacklisted 359 * addresses anywhere in the range. 360 */ 361 hw->mask = attr->bp_len - 1; 362 hw->len = X86_BREAKPOINT_LEN_1; 363 } 364 365 return 0; 366 } 367 368 /* 369 * Validate the arch-specific HW Breakpoint register settings 370 */ 371 int hw_breakpoint_arch_parse(struct perf_event *bp, 372 const struct perf_event_attr *attr, 373 struct arch_hw_breakpoint *hw) 374 { 375 unsigned int align; 376 int ret; 377 378 379 ret = arch_build_bp_info(bp, attr, hw); 380 if (ret) 381 return ret; 382 383 switch (hw->len) { 384 case X86_BREAKPOINT_LEN_1: 385 align = 0; 386 if (hw->mask) 387 align = hw->mask; 388 break; 389 case X86_BREAKPOINT_LEN_2: 390 align = 1; 391 break; 392 case X86_BREAKPOINT_LEN_4: 393 align = 3; 394 break; 395 #ifdef CONFIG_X86_64 396 case X86_BREAKPOINT_LEN_8: 397 align = 7; 398 break; 399 #endif 400 default: 401 WARN_ON_ONCE(1); 402 return -EINVAL; 403 } 404 405 /* 406 * Check that the low-order bits of the address are appropriate 407 * for the alignment implied by len. 408 */ 409 if (hw->address & align) 410 return -EINVAL; 411 412 return 0; 413 } 414 415 /* 416 * Dump the debug register contents to the user. 417 * We can't dump our per cpu values because it 418 * may contain cpu wide breakpoint, something that 419 * doesn't belong to the current task. 420 * 421 * TODO: include non-ptrace user breakpoints (perf) 422 */ 423 void aout_dump_debugregs(struct user *dump) 424 { 425 int i; 426 int dr7 = 0; 427 struct perf_event *bp; 428 struct arch_hw_breakpoint *info; 429 struct thread_struct *thread = ¤t->thread; 430 431 for (i = 0; i < HBP_NUM; i++) { 432 bp = thread->ptrace_bps[i]; 433 434 if (bp && !bp->attr.disabled) { 435 dump->u_debugreg[i] = bp->attr.bp_addr; 436 info = counter_arch_bp(bp); 437 dr7 |= encode_dr7(i, info->len, info->type); 438 } else { 439 dump->u_debugreg[i] = 0; 440 } 441 } 442 443 dump->u_debugreg[4] = 0; 444 dump->u_debugreg[5] = 0; 445 dump->u_debugreg[6] = current->thread.debugreg6; 446 447 dump->u_debugreg[7] = dr7; 448 } 449 EXPORT_SYMBOL_GPL(aout_dump_debugregs); 450 451 /* 452 * Release the user breakpoints used by ptrace 453 */ 454 void flush_ptrace_hw_breakpoint(struct task_struct *tsk) 455 { 456 int i; 457 struct thread_struct *t = &tsk->thread; 458 459 for (i = 0; i < HBP_NUM; i++) { 460 unregister_hw_breakpoint(t->ptrace_bps[i]); 461 t->ptrace_bps[i] = NULL; 462 } 463 464 t->debugreg6 = 0; 465 t->ptrace_dr7 = 0; 466 } 467 468 void hw_breakpoint_restore(void) 469 { 470 set_debugreg(__this_cpu_read(cpu_debugreg[0]), 0); 471 set_debugreg(__this_cpu_read(cpu_debugreg[1]), 1); 472 set_debugreg(__this_cpu_read(cpu_debugreg[2]), 2); 473 set_debugreg(__this_cpu_read(cpu_debugreg[3]), 3); 474 set_debugreg(current->thread.debugreg6, 6); 475 set_debugreg(__this_cpu_read(cpu_dr7), 7); 476 } 477 EXPORT_SYMBOL_GPL(hw_breakpoint_restore); 478 479 /* 480 * Handle debug exception notifications. 481 * 482 * Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below. 483 * 484 * NOTIFY_DONE returned if one of the following conditions is true. 485 * i) When the causative address is from user-space and the exception 486 * is a valid one, i.e. not triggered as a result of lazy debug register 487 * switching 488 * ii) When there are more bits than trap<n> set in DR6 register (such 489 * as BD, BS or BT) indicating that more than one debug condition is 490 * met and requires some more action in do_debug(). 491 * 492 * NOTIFY_STOP returned for all other cases 493 * 494 */ 495 static int hw_breakpoint_handler(struct die_args *args) 496 { 497 int i, cpu, rc = NOTIFY_STOP; 498 struct perf_event *bp; 499 unsigned long dr6; 500 unsigned long *dr6_p; 501 502 /* The DR6 value is pointed by args->err */ 503 dr6_p = (unsigned long *)ERR_PTR(args->err); 504 dr6 = *dr6_p; 505 506 /* If it's a single step, TRAP bits are random */ 507 if (dr6 & DR_STEP) 508 return NOTIFY_DONE; 509 510 /* Do an early return if no trap bits are set in DR6 */ 511 if ((dr6 & DR_TRAP_BITS) == 0) 512 return NOTIFY_DONE; 513 514 /* 515 * Assert that local interrupts are disabled 516 * Reset the DRn bits in the virtualized register value. 517 * The ptrace trigger routine will add in whatever is needed. 518 */ 519 current->thread.debugreg6 &= ~DR_TRAP_BITS; 520 cpu = get_cpu(); 521 522 /* Handle all the breakpoints that were triggered */ 523 for (i = 0; i < HBP_NUM; ++i) { 524 if (likely(!(dr6 & (DR_TRAP0 << i)))) 525 continue; 526 527 /* 528 * The counter may be concurrently released but that can only 529 * occur from a call_rcu() path. We can then safely fetch 530 * the breakpoint, use its callback, touch its counter 531 * while we are in an rcu_read_lock() path. 532 */ 533 rcu_read_lock(); 534 535 bp = per_cpu(bp_per_reg[i], cpu); 536 /* 537 * Reset the 'i'th TRAP bit in dr6 to denote completion of 538 * exception handling 539 */ 540 (*dr6_p) &= ~(DR_TRAP0 << i); 541 /* 542 * bp can be NULL due to lazy debug register switching 543 * or due to concurrent perf counter removing. 544 */ 545 if (!bp) { 546 rcu_read_unlock(); 547 break; 548 } 549 550 perf_bp_event(bp, args->regs); 551 552 /* 553 * Set up resume flag to avoid breakpoint recursion when 554 * returning back to origin. 555 */ 556 if (bp->hw.info.type == X86_BREAKPOINT_EXECUTE) 557 args->regs->flags |= X86_EFLAGS_RF; 558 559 rcu_read_unlock(); 560 } 561 /* 562 * Further processing in do_debug() is needed for a) user-space 563 * breakpoints (to generate signals) and b) when the system has 564 * taken exception due to multiple causes 565 */ 566 if ((current->thread.debugreg6 & DR_TRAP_BITS) || 567 (dr6 & (~DR_TRAP_BITS))) 568 rc = NOTIFY_DONE; 569 570 put_cpu(); 571 572 return rc; 573 } 574 575 /* 576 * Handle debug exception notifications. 577 */ 578 int hw_breakpoint_exceptions_notify( 579 struct notifier_block *unused, unsigned long val, void *data) 580 { 581 if (val != DIE_DEBUG) 582 return NOTIFY_DONE; 583 584 return hw_breakpoint_handler(data); 585 } 586 587 void hw_breakpoint_pmu_read(struct perf_event *bp) 588 { 589 /* TODO */ 590 } 591