1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * 4 * Copyright (C) 2007 Alan Stern 5 * Copyright (C) 2009 IBM Corporation 6 * Copyright (C) 2009 Frederic Weisbecker <fweisbec@gmail.com> 7 * 8 * Authors: Alan Stern <stern@rowland.harvard.edu> 9 * K.Prasad <prasad@linux.vnet.ibm.com> 10 * Frederic Weisbecker <fweisbec@gmail.com> 11 */ 12 13 /* 14 * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, 15 * using the CPU's debug registers. 16 */ 17 18 #include <linux/perf_event.h> 19 #include <linux/hw_breakpoint.h> 20 #include <linux/irqflags.h> 21 #include <linux/notifier.h> 22 #include <linux/kallsyms.h> 23 #include <linux/kprobes.h> 24 #include <linux/percpu.h> 25 #include <linux/kdebug.h> 26 #include <linux/kernel.h> 27 #include <linux/export.h> 28 #include <linux/sched.h> 29 #include <linux/smp.h> 30 31 #include <asm/hw_breakpoint.h> 32 #include <asm/processor.h> 33 #include <asm/debugreg.h> 34 #include <asm/user.h> 35 #include <asm/desc.h> 36 37 /* Per cpu debug control register value */ 38 DEFINE_PER_CPU(unsigned long, cpu_dr7); 39 EXPORT_PER_CPU_SYMBOL(cpu_dr7); 40 41 /* Per cpu debug address registers values */ 42 static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]); 43 44 /* 45 * Stores the breakpoints currently in use on each breakpoint address 46 * register for each cpus 47 */ 48 static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]); 49 50 51 static inline unsigned long 52 __encode_dr7(int drnum, unsigned int len, unsigned int type) 53 { 54 unsigned long bp_info; 55 56 bp_info = (len | type) & 0xf; 57 bp_info <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE); 58 bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE)); 59 60 return bp_info; 61 } 62 63 /* 64 * Encode the length, type, Exact, and Enable bits for a particular breakpoint 65 * as stored in debug register 7. 66 */ 67 unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) 68 { 69 return __encode_dr7(drnum, len, type) | DR_GLOBAL_SLOWDOWN; 70 } 71 72 /* 73 * Decode the length and type bits for a particular breakpoint as 74 * stored in debug register 7. Return the "enabled" status. 75 */ 76 int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type) 77 { 78 int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE); 79 80 *len = (bp_info & 0xc) | 0x40; 81 *type = (bp_info & 0x3) | 0x80; 82 83 return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3; 84 } 85 86 /* 87 * Install a perf counter breakpoint. 88 * 89 * We seek a free debug address register and use it for this 90 * breakpoint. Eventually we enable it in the debug control register. 91 * 92 * Atomic: we hold the counter->ctx->lock and we only handle variables 93 * and registers local to this cpu. 94 */ 95 int arch_install_hw_breakpoint(struct perf_event *bp) 96 { 97 struct arch_hw_breakpoint *info = counter_arch_bp(bp); 98 unsigned long *dr7; 99 int i; 100 101 for (i = 0; i < HBP_NUM; i++) { 102 struct perf_event **slot = this_cpu_ptr(&bp_per_reg[i]); 103 104 if (!*slot) { 105 *slot = bp; 106 break; 107 } 108 } 109 110 if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) 111 return -EBUSY; 112 113 set_debugreg(info->address, i); 114 __this_cpu_write(cpu_debugreg[i], info->address); 115 116 dr7 = this_cpu_ptr(&cpu_dr7); 117 *dr7 |= encode_dr7(i, info->len, info->type); 118 119 set_debugreg(*dr7, 7); 120 if (info->mask) 121 set_dr_addr_mask(info->mask, i); 122 123 return 0; 124 } 125 126 /* 127 * Uninstall the breakpoint contained in the given counter. 128 * 129 * First we search the debug address register it uses and then we disable 130 * it. 131 * 132 * Atomic: we hold the counter->ctx->lock and we only handle variables 133 * and registers local to this cpu. 134 */ 135 void arch_uninstall_hw_breakpoint(struct perf_event *bp) 136 { 137 struct arch_hw_breakpoint *info = counter_arch_bp(bp); 138 unsigned long *dr7; 139 int i; 140 141 for (i = 0; i < HBP_NUM; i++) { 142 struct perf_event **slot = this_cpu_ptr(&bp_per_reg[i]); 143 144 if (*slot == bp) { 145 *slot = NULL; 146 break; 147 } 148 } 149 150 if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) 151 return; 152 153 dr7 = this_cpu_ptr(&cpu_dr7); 154 *dr7 &= ~__encode_dr7(i, info->len, info->type); 155 156 set_debugreg(*dr7, 7); 157 if (info->mask) 158 set_dr_addr_mask(0, i); 159 } 160 161 static int arch_bp_generic_len(int x86_len) 162 { 163 switch (x86_len) { 164 case X86_BREAKPOINT_LEN_1: 165 return HW_BREAKPOINT_LEN_1; 166 case X86_BREAKPOINT_LEN_2: 167 return HW_BREAKPOINT_LEN_2; 168 case X86_BREAKPOINT_LEN_4: 169 return HW_BREAKPOINT_LEN_4; 170 #ifdef CONFIG_X86_64 171 case X86_BREAKPOINT_LEN_8: 172 return HW_BREAKPOINT_LEN_8; 173 #endif 174 default: 175 return -EINVAL; 176 } 177 } 178 179 int arch_bp_generic_fields(int x86_len, int x86_type, 180 int *gen_len, int *gen_type) 181 { 182 int len; 183 184 /* Type */ 185 switch (x86_type) { 186 case X86_BREAKPOINT_EXECUTE: 187 if (x86_len != X86_BREAKPOINT_LEN_X) 188 return -EINVAL; 189 190 *gen_type = HW_BREAKPOINT_X; 191 *gen_len = sizeof(long); 192 return 0; 193 case X86_BREAKPOINT_WRITE: 194 *gen_type = HW_BREAKPOINT_W; 195 break; 196 case X86_BREAKPOINT_RW: 197 *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; 198 break; 199 default: 200 return -EINVAL; 201 } 202 203 /* Len */ 204 len = arch_bp_generic_len(x86_len); 205 if (len < 0) 206 return -EINVAL; 207 *gen_len = len; 208 209 return 0; 210 } 211 212 /* 213 * Check for virtual address in kernel space. 214 */ 215 int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw) 216 { 217 unsigned long va; 218 int len; 219 220 va = hw->address; 221 len = arch_bp_generic_len(hw->len); 222 WARN_ON_ONCE(len < 0); 223 224 /* 225 * We don't need to worry about va + len - 1 overflowing: 226 * we already require that va is aligned to a multiple of len. 227 */ 228 return (va >= TASK_SIZE_MAX) || ((va + len - 1) >= TASK_SIZE_MAX); 229 } 230 231 /* 232 * Checks whether the range [addr, end], overlaps the area [base, base + size). 233 */ 234 static inline bool within_area(unsigned long addr, unsigned long end, 235 unsigned long base, unsigned long size) 236 { 237 return end >= base && addr < (base + size); 238 } 239 240 /* 241 * Checks whether the range from addr to end, inclusive, overlaps the fixed 242 * mapped CPU entry area range or other ranges used for CPU entry. 243 */ 244 static inline bool within_cpu_entry(unsigned long addr, unsigned long end) 245 { 246 int cpu; 247 248 /* CPU entry erea is always used for CPU entry */ 249 if (within_area(addr, end, CPU_ENTRY_AREA_BASE, 250 CPU_ENTRY_AREA_TOTAL_SIZE)) 251 return true; 252 253 for_each_possible_cpu(cpu) { 254 /* The original rw GDT is being used after load_direct_gdt() */ 255 if (within_area(addr, end, (unsigned long)get_cpu_gdt_rw(cpu), 256 GDT_SIZE)) 257 return true; 258 } 259 260 return false; 261 } 262 263 static int arch_build_bp_info(struct perf_event *bp, 264 const struct perf_event_attr *attr, 265 struct arch_hw_breakpoint *hw) 266 { 267 unsigned long bp_end; 268 269 bp_end = attr->bp_addr + attr->bp_len - 1; 270 if (bp_end < attr->bp_addr) 271 return -EINVAL; 272 273 /* 274 * Prevent any breakpoint of any type that overlaps the CPU 275 * entry area and data. This protects the IST stacks and also 276 * reduces the chance that we ever find out what happens if 277 * there's a data breakpoint on the GDT, IDT, or TSS. 278 */ 279 if (within_cpu_entry(attr->bp_addr, bp_end)) 280 return -EINVAL; 281 282 hw->address = attr->bp_addr; 283 hw->mask = 0; 284 285 /* Type */ 286 switch (attr->bp_type) { 287 case HW_BREAKPOINT_W: 288 hw->type = X86_BREAKPOINT_WRITE; 289 break; 290 case HW_BREAKPOINT_W | HW_BREAKPOINT_R: 291 hw->type = X86_BREAKPOINT_RW; 292 break; 293 case HW_BREAKPOINT_X: 294 /* 295 * We don't allow kernel breakpoints in places that are not 296 * acceptable for kprobes. On non-kprobes kernels, we don't 297 * allow kernel breakpoints at all. 298 */ 299 if (attr->bp_addr >= TASK_SIZE_MAX) { 300 if (within_kprobe_blacklist(attr->bp_addr)) 301 return -EINVAL; 302 } 303 304 hw->type = X86_BREAKPOINT_EXECUTE; 305 /* 306 * x86 inst breakpoints need to have a specific undefined len. 307 * But we still need to check userspace is not trying to setup 308 * an unsupported length, to get a range breakpoint for example. 309 */ 310 if (attr->bp_len == sizeof(long)) { 311 hw->len = X86_BREAKPOINT_LEN_X; 312 return 0; 313 } 314 /* fall through */ 315 default: 316 return -EINVAL; 317 } 318 319 /* Len */ 320 switch (attr->bp_len) { 321 case HW_BREAKPOINT_LEN_1: 322 hw->len = X86_BREAKPOINT_LEN_1; 323 break; 324 case HW_BREAKPOINT_LEN_2: 325 hw->len = X86_BREAKPOINT_LEN_2; 326 break; 327 case HW_BREAKPOINT_LEN_4: 328 hw->len = X86_BREAKPOINT_LEN_4; 329 break; 330 #ifdef CONFIG_X86_64 331 case HW_BREAKPOINT_LEN_8: 332 hw->len = X86_BREAKPOINT_LEN_8; 333 break; 334 #endif 335 default: 336 /* AMD range breakpoint */ 337 if (!is_power_of_2(attr->bp_len)) 338 return -EINVAL; 339 if (attr->bp_addr & (attr->bp_len - 1)) 340 return -EINVAL; 341 342 if (!boot_cpu_has(X86_FEATURE_BPEXT)) 343 return -EOPNOTSUPP; 344 345 /* 346 * It's impossible to use a range breakpoint to fake out 347 * user vs kernel detection because bp_len - 1 can't 348 * have the high bit set. If we ever allow range instruction 349 * breakpoints, then we'll have to check for kprobe-blacklisted 350 * addresses anywhere in the range. 351 */ 352 hw->mask = attr->bp_len - 1; 353 hw->len = X86_BREAKPOINT_LEN_1; 354 } 355 356 return 0; 357 } 358 359 /* 360 * Validate the arch-specific HW Breakpoint register settings 361 */ 362 int hw_breakpoint_arch_parse(struct perf_event *bp, 363 const struct perf_event_attr *attr, 364 struct arch_hw_breakpoint *hw) 365 { 366 unsigned int align; 367 int ret; 368 369 370 ret = arch_build_bp_info(bp, attr, hw); 371 if (ret) 372 return ret; 373 374 switch (hw->len) { 375 case X86_BREAKPOINT_LEN_1: 376 align = 0; 377 if (hw->mask) 378 align = hw->mask; 379 break; 380 case X86_BREAKPOINT_LEN_2: 381 align = 1; 382 break; 383 case X86_BREAKPOINT_LEN_4: 384 align = 3; 385 break; 386 #ifdef CONFIG_X86_64 387 case X86_BREAKPOINT_LEN_8: 388 align = 7; 389 break; 390 #endif 391 default: 392 WARN_ON_ONCE(1); 393 return -EINVAL; 394 } 395 396 /* 397 * Check that the low-order bits of the address are appropriate 398 * for the alignment implied by len. 399 */ 400 if (hw->address & align) 401 return -EINVAL; 402 403 return 0; 404 } 405 406 /* 407 * Dump the debug register contents to the user. 408 * We can't dump our per cpu values because it 409 * may contain cpu wide breakpoint, something that 410 * doesn't belong to the current task. 411 * 412 * TODO: include non-ptrace user breakpoints (perf) 413 */ 414 void aout_dump_debugregs(struct user *dump) 415 { 416 int i; 417 int dr7 = 0; 418 struct perf_event *bp; 419 struct arch_hw_breakpoint *info; 420 struct thread_struct *thread = ¤t->thread; 421 422 for (i = 0; i < HBP_NUM; i++) { 423 bp = thread->ptrace_bps[i]; 424 425 if (bp && !bp->attr.disabled) { 426 dump->u_debugreg[i] = bp->attr.bp_addr; 427 info = counter_arch_bp(bp); 428 dr7 |= encode_dr7(i, info->len, info->type); 429 } else { 430 dump->u_debugreg[i] = 0; 431 } 432 } 433 434 dump->u_debugreg[4] = 0; 435 dump->u_debugreg[5] = 0; 436 dump->u_debugreg[6] = current->thread.debugreg6; 437 438 dump->u_debugreg[7] = dr7; 439 } 440 EXPORT_SYMBOL_GPL(aout_dump_debugregs); 441 442 /* 443 * Release the user breakpoints used by ptrace 444 */ 445 void flush_ptrace_hw_breakpoint(struct task_struct *tsk) 446 { 447 int i; 448 struct thread_struct *t = &tsk->thread; 449 450 for (i = 0; i < HBP_NUM; i++) { 451 unregister_hw_breakpoint(t->ptrace_bps[i]); 452 t->ptrace_bps[i] = NULL; 453 } 454 455 t->debugreg6 = 0; 456 t->ptrace_dr7 = 0; 457 } 458 459 void hw_breakpoint_restore(void) 460 { 461 set_debugreg(__this_cpu_read(cpu_debugreg[0]), 0); 462 set_debugreg(__this_cpu_read(cpu_debugreg[1]), 1); 463 set_debugreg(__this_cpu_read(cpu_debugreg[2]), 2); 464 set_debugreg(__this_cpu_read(cpu_debugreg[3]), 3); 465 set_debugreg(current->thread.debugreg6, 6); 466 set_debugreg(__this_cpu_read(cpu_dr7), 7); 467 } 468 EXPORT_SYMBOL_GPL(hw_breakpoint_restore); 469 470 /* 471 * Handle debug exception notifications. 472 * 473 * Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below. 474 * 475 * NOTIFY_DONE returned if one of the following conditions is true. 476 * i) When the causative address is from user-space and the exception 477 * is a valid one, i.e. not triggered as a result of lazy debug register 478 * switching 479 * ii) When there are more bits than trap<n> set in DR6 register (such 480 * as BD, BS or BT) indicating that more than one debug condition is 481 * met and requires some more action in do_debug(). 482 * 483 * NOTIFY_STOP returned for all other cases 484 * 485 */ 486 static int hw_breakpoint_handler(struct die_args *args) 487 { 488 int i, cpu, rc = NOTIFY_STOP; 489 struct perf_event *bp; 490 unsigned long dr6; 491 unsigned long *dr6_p; 492 493 /* The DR6 value is pointed by args->err */ 494 dr6_p = (unsigned long *)ERR_PTR(args->err); 495 dr6 = *dr6_p; 496 497 /* If it's a single step, TRAP bits are random */ 498 if (dr6 & DR_STEP) 499 return NOTIFY_DONE; 500 501 /* Do an early return if no trap bits are set in DR6 */ 502 if ((dr6 & DR_TRAP_BITS) == 0) 503 return NOTIFY_DONE; 504 505 /* 506 * Assert that local interrupts are disabled 507 * Reset the DRn bits in the virtualized register value. 508 * The ptrace trigger routine will add in whatever is needed. 509 */ 510 current->thread.debugreg6 &= ~DR_TRAP_BITS; 511 cpu = get_cpu(); 512 513 /* Handle all the breakpoints that were triggered */ 514 for (i = 0; i < HBP_NUM; ++i) { 515 if (likely(!(dr6 & (DR_TRAP0 << i)))) 516 continue; 517 518 /* 519 * The counter may be concurrently released but that can only 520 * occur from a call_rcu() path. We can then safely fetch 521 * the breakpoint, use its callback, touch its counter 522 * while we are in an rcu_read_lock() path. 523 */ 524 rcu_read_lock(); 525 526 bp = per_cpu(bp_per_reg[i], cpu); 527 /* 528 * Reset the 'i'th TRAP bit in dr6 to denote completion of 529 * exception handling 530 */ 531 (*dr6_p) &= ~(DR_TRAP0 << i); 532 /* 533 * bp can be NULL due to lazy debug register switching 534 * or due to concurrent perf counter removing. 535 */ 536 if (!bp) { 537 rcu_read_unlock(); 538 break; 539 } 540 541 perf_bp_event(bp, args->regs); 542 543 /* 544 * Set up resume flag to avoid breakpoint recursion when 545 * returning back to origin. 546 */ 547 if (bp->hw.info.type == X86_BREAKPOINT_EXECUTE) 548 args->regs->flags |= X86_EFLAGS_RF; 549 550 rcu_read_unlock(); 551 } 552 /* 553 * Further processing in do_debug() is needed for a) user-space 554 * breakpoints (to generate signals) and b) when the system has 555 * taken exception due to multiple causes 556 */ 557 if ((current->thread.debugreg6 & DR_TRAP_BITS) || 558 (dr6 & (~DR_TRAP_BITS))) 559 rc = NOTIFY_DONE; 560 561 put_cpu(); 562 563 return rc; 564 } 565 566 /* 567 * Handle debug exception notifications. 568 */ 569 int hw_breakpoint_exceptions_notify( 570 struct notifier_block *unused, unsigned long val, void *data) 571 { 572 if (val != DIE_DEBUG) 573 return NOTIFY_DONE; 574 575 return hw_breakpoint_handler(data); 576 } 577 578 void hw_breakpoint_pmu_read(struct perf_event *bp) 579 { 580 /* TODO */ 581 } 582