1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * 4 * Copyright (C) 2007 Alan Stern 5 * Copyright (C) 2009 IBM Corporation 6 * Copyright (C) 2009 Frederic Weisbecker <fweisbec@gmail.com> 7 * 8 * Authors: Alan Stern <stern@rowland.harvard.edu> 9 * K.Prasad <prasad@linux.vnet.ibm.com> 10 * Frederic Weisbecker <fweisbec@gmail.com> 11 */ 12 13 /* 14 * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, 15 * using the CPU's debug registers. 16 */ 17 18 #include <linux/perf_event.h> 19 #include <linux/hw_breakpoint.h> 20 #include <linux/irqflags.h> 21 #include <linux/notifier.h> 22 #include <linux/kallsyms.h> 23 #include <linux/kprobes.h> 24 #include <linux/percpu.h> 25 #include <linux/kdebug.h> 26 #include <linux/kernel.h> 27 #include <linux/export.h> 28 #include <linux/sched.h> 29 #include <linux/smp.h> 30 31 #include <asm/hw_breakpoint.h> 32 #include <asm/processor.h> 33 #include <asm/debugreg.h> 34 #include <asm/user.h> 35 36 /* Per cpu debug control register value */ 37 DEFINE_PER_CPU(unsigned long, cpu_dr7); 38 EXPORT_PER_CPU_SYMBOL(cpu_dr7); 39 40 /* Per cpu debug address registers values */ 41 static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]); 42 43 /* 44 * Stores the breakpoints currently in use on each breakpoint address 45 * register for each cpus 46 */ 47 static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]); 48 49 50 static inline unsigned long 51 __encode_dr7(int drnum, unsigned int len, unsigned int type) 52 { 53 unsigned long bp_info; 54 55 bp_info = (len | type) & 0xf; 56 bp_info <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE); 57 bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE)); 58 59 return bp_info; 60 } 61 62 /* 63 * Encode the length, type, Exact, and Enable bits for a particular breakpoint 64 * as stored in debug register 7. 65 */ 66 unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) 67 { 68 return __encode_dr7(drnum, len, type) | DR_GLOBAL_SLOWDOWN; 69 } 70 71 /* 72 * Decode the length and type bits for a particular breakpoint as 73 * stored in debug register 7. Return the "enabled" status. 74 */ 75 int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type) 76 { 77 int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE); 78 79 *len = (bp_info & 0xc) | 0x40; 80 *type = (bp_info & 0x3) | 0x80; 81 82 return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3; 83 } 84 85 /* 86 * Install a perf counter breakpoint. 87 * 88 * We seek a free debug address register and use it for this 89 * breakpoint. Eventually we enable it in the debug control register. 90 * 91 * Atomic: we hold the counter->ctx->lock and we only handle variables 92 * and registers local to this cpu. 93 */ 94 int arch_install_hw_breakpoint(struct perf_event *bp) 95 { 96 struct arch_hw_breakpoint *info = counter_arch_bp(bp); 97 unsigned long *dr7; 98 int i; 99 100 for (i = 0; i < HBP_NUM; i++) { 101 struct perf_event **slot = this_cpu_ptr(&bp_per_reg[i]); 102 103 if (!*slot) { 104 *slot = bp; 105 break; 106 } 107 } 108 109 if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) 110 return -EBUSY; 111 112 set_debugreg(info->address, i); 113 __this_cpu_write(cpu_debugreg[i], info->address); 114 115 dr7 = this_cpu_ptr(&cpu_dr7); 116 *dr7 |= encode_dr7(i, info->len, info->type); 117 118 set_debugreg(*dr7, 7); 119 if (info->mask) 120 set_dr_addr_mask(info->mask, i); 121 122 return 0; 123 } 124 125 /* 126 * Uninstall the breakpoint contained in the given counter. 127 * 128 * First we search the debug address register it uses and then we disable 129 * it. 130 * 131 * Atomic: we hold the counter->ctx->lock and we only handle variables 132 * and registers local to this cpu. 133 */ 134 void arch_uninstall_hw_breakpoint(struct perf_event *bp) 135 { 136 struct arch_hw_breakpoint *info = counter_arch_bp(bp); 137 unsigned long *dr7; 138 int i; 139 140 for (i = 0; i < HBP_NUM; i++) { 141 struct perf_event **slot = this_cpu_ptr(&bp_per_reg[i]); 142 143 if (*slot == bp) { 144 *slot = NULL; 145 break; 146 } 147 } 148 149 if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) 150 return; 151 152 dr7 = this_cpu_ptr(&cpu_dr7); 153 *dr7 &= ~__encode_dr7(i, info->len, info->type); 154 155 set_debugreg(*dr7, 7); 156 if (info->mask) 157 set_dr_addr_mask(0, i); 158 } 159 160 static int arch_bp_generic_len(int x86_len) 161 { 162 switch (x86_len) { 163 case X86_BREAKPOINT_LEN_1: 164 return HW_BREAKPOINT_LEN_1; 165 case X86_BREAKPOINT_LEN_2: 166 return HW_BREAKPOINT_LEN_2; 167 case X86_BREAKPOINT_LEN_4: 168 return HW_BREAKPOINT_LEN_4; 169 #ifdef CONFIG_X86_64 170 case X86_BREAKPOINT_LEN_8: 171 return HW_BREAKPOINT_LEN_8; 172 #endif 173 default: 174 return -EINVAL; 175 } 176 } 177 178 int arch_bp_generic_fields(int x86_len, int x86_type, 179 int *gen_len, int *gen_type) 180 { 181 int len; 182 183 /* Type */ 184 switch (x86_type) { 185 case X86_BREAKPOINT_EXECUTE: 186 if (x86_len != X86_BREAKPOINT_LEN_X) 187 return -EINVAL; 188 189 *gen_type = HW_BREAKPOINT_X; 190 *gen_len = sizeof(long); 191 return 0; 192 case X86_BREAKPOINT_WRITE: 193 *gen_type = HW_BREAKPOINT_W; 194 break; 195 case X86_BREAKPOINT_RW: 196 *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; 197 break; 198 default: 199 return -EINVAL; 200 } 201 202 /* Len */ 203 len = arch_bp_generic_len(x86_len); 204 if (len < 0) 205 return -EINVAL; 206 *gen_len = len; 207 208 return 0; 209 } 210 211 /* 212 * Check for virtual address in kernel space. 213 */ 214 int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw) 215 { 216 unsigned long va; 217 int len; 218 219 va = hw->address; 220 len = arch_bp_generic_len(hw->len); 221 WARN_ON_ONCE(len < 0); 222 223 /* 224 * We don't need to worry about va + len - 1 overflowing: 225 * we already require that va is aligned to a multiple of len. 226 */ 227 return (va >= TASK_SIZE_MAX) || ((va + len - 1) >= TASK_SIZE_MAX); 228 } 229 230 /* 231 * Checks whether the range [addr, end], overlaps the area [base, base + size). 232 */ 233 static inline bool within_area(unsigned long addr, unsigned long end, 234 unsigned long base, unsigned long size) 235 { 236 return end >= base && addr < (base + size); 237 } 238 239 /* 240 * Checks whether the range from addr to end, inclusive, overlaps the CPU 241 * entry area range. 242 */ 243 static inline bool within_cpu_entry_area(unsigned long addr, unsigned long end) 244 { 245 return within_area(addr, end, CPU_ENTRY_AREA_BASE, 246 CPU_ENTRY_AREA_TOTAL_SIZE); 247 } 248 249 static int arch_build_bp_info(struct perf_event *bp, 250 const struct perf_event_attr *attr, 251 struct arch_hw_breakpoint *hw) 252 { 253 unsigned long bp_end; 254 255 bp_end = attr->bp_addr + attr->bp_len - 1; 256 if (bp_end < attr->bp_addr) 257 return -EINVAL; 258 259 /* 260 * Prevent any breakpoint of any type that overlaps the 261 * cpu_entry_area. This protects the IST stacks and also 262 * reduces the chance that we ever find out what happens if 263 * there's a data breakpoint on the GDT, IDT, or TSS. 264 */ 265 if (within_cpu_entry_area(attr->bp_addr, bp_end)) 266 return -EINVAL; 267 268 hw->address = attr->bp_addr; 269 hw->mask = 0; 270 271 /* Type */ 272 switch (attr->bp_type) { 273 case HW_BREAKPOINT_W: 274 hw->type = X86_BREAKPOINT_WRITE; 275 break; 276 case HW_BREAKPOINT_W | HW_BREAKPOINT_R: 277 hw->type = X86_BREAKPOINT_RW; 278 break; 279 case HW_BREAKPOINT_X: 280 /* 281 * We don't allow kernel breakpoints in places that are not 282 * acceptable for kprobes. On non-kprobes kernels, we don't 283 * allow kernel breakpoints at all. 284 */ 285 if (attr->bp_addr >= TASK_SIZE_MAX) { 286 if (within_kprobe_blacklist(attr->bp_addr)) 287 return -EINVAL; 288 } 289 290 hw->type = X86_BREAKPOINT_EXECUTE; 291 /* 292 * x86 inst breakpoints need to have a specific undefined len. 293 * But we still need to check userspace is not trying to setup 294 * an unsupported length, to get a range breakpoint for example. 295 */ 296 if (attr->bp_len == sizeof(long)) { 297 hw->len = X86_BREAKPOINT_LEN_X; 298 return 0; 299 } 300 /* fall through */ 301 default: 302 return -EINVAL; 303 } 304 305 /* Len */ 306 switch (attr->bp_len) { 307 case HW_BREAKPOINT_LEN_1: 308 hw->len = X86_BREAKPOINT_LEN_1; 309 break; 310 case HW_BREAKPOINT_LEN_2: 311 hw->len = X86_BREAKPOINT_LEN_2; 312 break; 313 case HW_BREAKPOINT_LEN_4: 314 hw->len = X86_BREAKPOINT_LEN_4; 315 break; 316 #ifdef CONFIG_X86_64 317 case HW_BREAKPOINT_LEN_8: 318 hw->len = X86_BREAKPOINT_LEN_8; 319 break; 320 #endif 321 default: 322 /* AMD range breakpoint */ 323 if (!is_power_of_2(attr->bp_len)) 324 return -EINVAL; 325 if (attr->bp_addr & (attr->bp_len - 1)) 326 return -EINVAL; 327 328 if (!boot_cpu_has(X86_FEATURE_BPEXT)) 329 return -EOPNOTSUPP; 330 331 /* 332 * It's impossible to use a range breakpoint to fake out 333 * user vs kernel detection because bp_len - 1 can't 334 * have the high bit set. If we ever allow range instruction 335 * breakpoints, then we'll have to check for kprobe-blacklisted 336 * addresses anywhere in the range. 337 */ 338 hw->mask = attr->bp_len - 1; 339 hw->len = X86_BREAKPOINT_LEN_1; 340 } 341 342 return 0; 343 } 344 345 /* 346 * Validate the arch-specific HW Breakpoint register settings 347 */ 348 int hw_breakpoint_arch_parse(struct perf_event *bp, 349 const struct perf_event_attr *attr, 350 struct arch_hw_breakpoint *hw) 351 { 352 unsigned int align; 353 int ret; 354 355 356 ret = arch_build_bp_info(bp, attr, hw); 357 if (ret) 358 return ret; 359 360 switch (hw->len) { 361 case X86_BREAKPOINT_LEN_1: 362 align = 0; 363 if (hw->mask) 364 align = hw->mask; 365 break; 366 case X86_BREAKPOINT_LEN_2: 367 align = 1; 368 break; 369 case X86_BREAKPOINT_LEN_4: 370 align = 3; 371 break; 372 #ifdef CONFIG_X86_64 373 case X86_BREAKPOINT_LEN_8: 374 align = 7; 375 break; 376 #endif 377 default: 378 WARN_ON_ONCE(1); 379 return -EINVAL; 380 } 381 382 /* 383 * Check that the low-order bits of the address are appropriate 384 * for the alignment implied by len. 385 */ 386 if (hw->address & align) 387 return -EINVAL; 388 389 return 0; 390 } 391 392 /* 393 * Dump the debug register contents to the user. 394 * We can't dump our per cpu values because it 395 * may contain cpu wide breakpoint, something that 396 * doesn't belong to the current task. 397 * 398 * TODO: include non-ptrace user breakpoints (perf) 399 */ 400 void aout_dump_debugregs(struct user *dump) 401 { 402 int i; 403 int dr7 = 0; 404 struct perf_event *bp; 405 struct arch_hw_breakpoint *info; 406 struct thread_struct *thread = ¤t->thread; 407 408 for (i = 0; i < HBP_NUM; i++) { 409 bp = thread->ptrace_bps[i]; 410 411 if (bp && !bp->attr.disabled) { 412 dump->u_debugreg[i] = bp->attr.bp_addr; 413 info = counter_arch_bp(bp); 414 dr7 |= encode_dr7(i, info->len, info->type); 415 } else { 416 dump->u_debugreg[i] = 0; 417 } 418 } 419 420 dump->u_debugreg[4] = 0; 421 dump->u_debugreg[5] = 0; 422 dump->u_debugreg[6] = current->thread.debugreg6; 423 424 dump->u_debugreg[7] = dr7; 425 } 426 EXPORT_SYMBOL_GPL(aout_dump_debugregs); 427 428 /* 429 * Release the user breakpoints used by ptrace 430 */ 431 void flush_ptrace_hw_breakpoint(struct task_struct *tsk) 432 { 433 int i; 434 struct thread_struct *t = &tsk->thread; 435 436 for (i = 0; i < HBP_NUM; i++) { 437 unregister_hw_breakpoint(t->ptrace_bps[i]); 438 t->ptrace_bps[i] = NULL; 439 } 440 441 t->debugreg6 = 0; 442 t->ptrace_dr7 = 0; 443 } 444 445 void hw_breakpoint_restore(void) 446 { 447 set_debugreg(__this_cpu_read(cpu_debugreg[0]), 0); 448 set_debugreg(__this_cpu_read(cpu_debugreg[1]), 1); 449 set_debugreg(__this_cpu_read(cpu_debugreg[2]), 2); 450 set_debugreg(__this_cpu_read(cpu_debugreg[3]), 3); 451 set_debugreg(current->thread.debugreg6, 6); 452 set_debugreg(__this_cpu_read(cpu_dr7), 7); 453 } 454 EXPORT_SYMBOL_GPL(hw_breakpoint_restore); 455 456 /* 457 * Handle debug exception notifications. 458 * 459 * Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below. 460 * 461 * NOTIFY_DONE returned if one of the following conditions is true. 462 * i) When the causative address is from user-space and the exception 463 * is a valid one, i.e. not triggered as a result of lazy debug register 464 * switching 465 * ii) When there are more bits than trap<n> set in DR6 register (such 466 * as BD, BS or BT) indicating that more than one debug condition is 467 * met and requires some more action in do_debug(). 468 * 469 * NOTIFY_STOP returned for all other cases 470 * 471 */ 472 static int hw_breakpoint_handler(struct die_args *args) 473 { 474 int i, cpu, rc = NOTIFY_STOP; 475 struct perf_event *bp; 476 unsigned long dr6; 477 unsigned long *dr6_p; 478 479 /* The DR6 value is pointed by args->err */ 480 dr6_p = (unsigned long *)ERR_PTR(args->err); 481 dr6 = *dr6_p; 482 483 /* If it's a single step, TRAP bits are random */ 484 if (dr6 & DR_STEP) 485 return NOTIFY_DONE; 486 487 /* Do an early return if no trap bits are set in DR6 */ 488 if ((dr6 & DR_TRAP_BITS) == 0) 489 return NOTIFY_DONE; 490 491 /* 492 * Assert that local interrupts are disabled 493 * Reset the DRn bits in the virtualized register value. 494 * The ptrace trigger routine will add in whatever is needed. 495 */ 496 current->thread.debugreg6 &= ~DR_TRAP_BITS; 497 cpu = get_cpu(); 498 499 /* Handle all the breakpoints that were triggered */ 500 for (i = 0; i < HBP_NUM; ++i) { 501 if (likely(!(dr6 & (DR_TRAP0 << i)))) 502 continue; 503 504 /* 505 * The counter may be concurrently released but that can only 506 * occur from a call_rcu() path. We can then safely fetch 507 * the breakpoint, use its callback, touch its counter 508 * while we are in an rcu_read_lock() path. 509 */ 510 rcu_read_lock(); 511 512 bp = per_cpu(bp_per_reg[i], cpu); 513 /* 514 * Reset the 'i'th TRAP bit in dr6 to denote completion of 515 * exception handling 516 */ 517 (*dr6_p) &= ~(DR_TRAP0 << i); 518 /* 519 * bp can be NULL due to lazy debug register switching 520 * or due to concurrent perf counter removing. 521 */ 522 if (!bp) { 523 rcu_read_unlock(); 524 break; 525 } 526 527 perf_bp_event(bp, args->regs); 528 529 /* 530 * Set up resume flag to avoid breakpoint recursion when 531 * returning back to origin. 532 */ 533 if (bp->hw.info.type == X86_BREAKPOINT_EXECUTE) 534 args->regs->flags |= X86_EFLAGS_RF; 535 536 rcu_read_unlock(); 537 } 538 /* 539 * Further processing in do_debug() is needed for a) user-space 540 * breakpoints (to generate signals) and b) when the system has 541 * taken exception due to multiple causes 542 */ 543 if ((current->thread.debugreg6 & DR_TRAP_BITS) || 544 (dr6 & (~DR_TRAP_BITS))) 545 rc = NOTIFY_DONE; 546 547 put_cpu(); 548 549 return rc; 550 } 551 552 /* 553 * Handle debug exception notifications. 554 */ 555 int hw_breakpoint_exceptions_notify( 556 struct notifier_block *unused, unsigned long val, void *data) 557 { 558 if (val != DIE_DEBUG) 559 return NOTIFY_DONE; 560 561 return hw_breakpoint_handler(data); 562 } 563 564 void hw_breakpoint_pmu_read(struct perf_event *bp) 565 { 566 /* TODO */ 567 } 568