1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * 4 * Copyright (C) 2007 Alan Stern 5 * Copyright (C) 2009 IBM Corporation 6 * Copyright (C) 2009 Frederic Weisbecker <fweisbec@gmail.com> 7 * 8 * Authors: Alan Stern <stern@rowland.harvard.edu> 9 * K.Prasad <prasad@linux.vnet.ibm.com> 10 * Frederic Weisbecker <fweisbec@gmail.com> 11 */ 12 13 /* 14 * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, 15 * using the CPU's debug registers. 16 */ 17 18 #include <linux/perf_event.h> 19 #include <linux/hw_breakpoint.h> 20 #include <linux/irqflags.h> 21 #include <linux/notifier.h> 22 #include <linux/kallsyms.h> 23 #include <linux/kprobes.h> 24 #include <linux/percpu.h> 25 #include <linux/kdebug.h> 26 #include <linux/kernel.h> 27 #include <linux/export.h> 28 #include <linux/sched.h> 29 #include <linux/smp.h> 30 31 #include <asm/hw_breakpoint.h> 32 #include <asm/processor.h> 33 #include <asm/debugreg.h> 34 #include <asm/user.h> 35 36 /* Per cpu debug control register value */ 37 DEFINE_PER_CPU(unsigned long, cpu_dr7); 38 EXPORT_PER_CPU_SYMBOL(cpu_dr7); 39 40 /* Per cpu debug address registers values */ 41 static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]); 42 43 /* 44 * Stores the breakpoints currently in use on each breakpoint address 45 * register for each cpus 46 */ 47 static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]); 48 49 50 static inline unsigned long 51 __encode_dr7(int drnum, unsigned int len, unsigned int type) 52 { 53 unsigned long bp_info; 54 55 bp_info = (len | type) & 0xf; 56 bp_info <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE); 57 bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE)); 58 59 return bp_info; 60 } 61 62 /* 63 * Encode the length, type, Exact, and Enable bits for a particular breakpoint 64 * as stored in debug register 7. 65 */ 66 unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) 67 { 68 return __encode_dr7(drnum, len, type) | DR_GLOBAL_SLOWDOWN; 69 } 70 71 /* 72 * Decode the length and type bits for a particular breakpoint as 73 * stored in debug register 7. Return the "enabled" status. 74 */ 75 int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type) 76 { 77 int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE); 78 79 *len = (bp_info & 0xc) | 0x40; 80 *type = (bp_info & 0x3) | 0x80; 81 82 return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3; 83 } 84 85 /* 86 * Install a perf counter breakpoint. 87 * 88 * We seek a free debug address register and use it for this 89 * breakpoint. Eventually we enable it in the debug control register. 90 * 91 * Atomic: we hold the counter->ctx->lock and we only handle variables 92 * and registers local to this cpu. 93 */ 94 int arch_install_hw_breakpoint(struct perf_event *bp) 95 { 96 struct arch_hw_breakpoint *info = counter_arch_bp(bp); 97 unsigned long *dr7; 98 int i; 99 100 for (i = 0; i < HBP_NUM; i++) { 101 struct perf_event **slot = this_cpu_ptr(&bp_per_reg[i]); 102 103 if (!*slot) { 104 *slot = bp; 105 break; 106 } 107 } 108 109 if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) 110 return -EBUSY; 111 112 set_debugreg(info->address, i); 113 __this_cpu_write(cpu_debugreg[i], info->address); 114 115 dr7 = this_cpu_ptr(&cpu_dr7); 116 *dr7 |= encode_dr7(i, info->len, info->type); 117 118 set_debugreg(*dr7, 7); 119 if (info->mask) 120 set_dr_addr_mask(info->mask, i); 121 122 return 0; 123 } 124 125 /* 126 * Uninstall the breakpoint contained in the given counter. 127 * 128 * First we search the debug address register it uses and then we disable 129 * it. 130 * 131 * Atomic: we hold the counter->ctx->lock and we only handle variables 132 * and registers local to this cpu. 133 */ 134 void arch_uninstall_hw_breakpoint(struct perf_event *bp) 135 { 136 struct arch_hw_breakpoint *info = counter_arch_bp(bp); 137 unsigned long *dr7; 138 int i; 139 140 for (i = 0; i < HBP_NUM; i++) { 141 struct perf_event **slot = this_cpu_ptr(&bp_per_reg[i]); 142 143 if (*slot == bp) { 144 *slot = NULL; 145 break; 146 } 147 } 148 149 if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) 150 return; 151 152 dr7 = this_cpu_ptr(&cpu_dr7); 153 *dr7 &= ~__encode_dr7(i, info->len, info->type); 154 155 set_debugreg(*dr7, 7); 156 if (info->mask) 157 set_dr_addr_mask(0, i); 158 } 159 160 static int arch_bp_generic_len(int x86_len) 161 { 162 switch (x86_len) { 163 case X86_BREAKPOINT_LEN_1: 164 return HW_BREAKPOINT_LEN_1; 165 case X86_BREAKPOINT_LEN_2: 166 return HW_BREAKPOINT_LEN_2; 167 case X86_BREAKPOINT_LEN_4: 168 return HW_BREAKPOINT_LEN_4; 169 #ifdef CONFIG_X86_64 170 case X86_BREAKPOINT_LEN_8: 171 return HW_BREAKPOINT_LEN_8; 172 #endif 173 default: 174 return -EINVAL; 175 } 176 } 177 178 int arch_bp_generic_fields(int x86_len, int x86_type, 179 int *gen_len, int *gen_type) 180 { 181 int len; 182 183 /* Type */ 184 switch (x86_type) { 185 case X86_BREAKPOINT_EXECUTE: 186 if (x86_len != X86_BREAKPOINT_LEN_X) 187 return -EINVAL; 188 189 *gen_type = HW_BREAKPOINT_X; 190 *gen_len = sizeof(long); 191 return 0; 192 case X86_BREAKPOINT_WRITE: 193 *gen_type = HW_BREAKPOINT_W; 194 break; 195 case X86_BREAKPOINT_RW: 196 *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; 197 break; 198 default: 199 return -EINVAL; 200 } 201 202 /* Len */ 203 len = arch_bp_generic_len(x86_len); 204 if (len < 0) 205 return -EINVAL; 206 *gen_len = len; 207 208 return 0; 209 } 210 211 /* 212 * Check for virtual address in kernel space. 213 */ 214 int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw) 215 { 216 unsigned long va; 217 int len; 218 219 va = hw->address; 220 len = arch_bp_generic_len(hw->len); 221 WARN_ON_ONCE(len < 0); 222 223 /* 224 * We don't need to worry about va + len - 1 overflowing: 225 * we already require that va is aligned to a multiple of len. 226 */ 227 return (va >= TASK_SIZE_MAX) || ((va + len - 1) >= TASK_SIZE_MAX); 228 } 229 230 static int arch_build_bp_info(struct perf_event *bp, 231 const struct perf_event_attr *attr, 232 struct arch_hw_breakpoint *hw) 233 { 234 hw->address = attr->bp_addr; 235 hw->mask = 0; 236 237 /* Type */ 238 switch (attr->bp_type) { 239 case HW_BREAKPOINT_W: 240 hw->type = X86_BREAKPOINT_WRITE; 241 break; 242 case HW_BREAKPOINT_W | HW_BREAKPOINT_R: 243 hw->type = X86_BREAKPOINT_RW; 244 break; 245 case HW_BREAKPOINT_X: 246 /* 247 * We don't allow kernel breakpoints in places that are not 248 * acceptable for kprobes. On non-kprobes kernels, we don't 249 * allow kernel breakpoints at all. 250 */ 251 if (attr->bp_addr >= TASK_SIZE_MAX) { 252 if (within_kprobe_blacklist(attr->bp_addr)) 253 return -EINVAL; 254 } 255 256 hw->type = X86_BREAKPOINT_EXECUTE; 257 /* 258 * x86 inst breakpoints need to have a specific undefined len. 259 * But we still need to check userspace is not trying to setup 260 * an unsupported length, to get a range breakpoint for example. 261 */ 262 if (attr->bp_len == sizeof(long)) { 263 hw->len = X86_BREAKPOINT_LEN_X; 264 return 0; 265 } 266 /* fall through */ 267 default: 268 return -EINVAL; 269 } 270 271 /* Len */ 272 switch (attr->bp_len) { 273 case HW_BREAKPOINT_LEN_1: 274 hw->len = X86_BREAKPOINT_LEN_1; 275 break; 276 case HW_BREAKPOINT_LEN_2: 277 hw->len = X86_BREAKPOINT_LEN_2; 278 break; 279 case HW_BREAKPOINT_LEN_4: 280 hw->len = X86_BREAKPOINT_LEN_4; 281 break; 282 #ifdef CONFIG_X86_64 283 case HW_BREAKPOINT_LEN_8: 284 hw->len = X86_BREAKPOINT_LEN_8; 285 break; 286 #endif 287 default: 288 /* AMD range breakpoint */ 289 if (!is_power_of_2(attr->bp_len)) 290 return -EINVAL; 291 if (attr->bp_addr & (attr->bp_len - 1)) 292 return -EINVAL; 293 294 if (!boot_cpu_has(X86_FEATURE_BPEXT)) 295 return -EOPNOTSUPP; 296 297 /* 298 * It's impossible to use a range breakpoint to fake out 299 * user vs kernel detection because bp_len - 1 can't 300 * have the high bit set. If we ever allow range instruction 301 * breakpoints, then we'll have to check for kprobe-blacklisted 302 * addresses anywhere in the range. 303 */ 304 hw->mask = attr->bp_len - 1; 305 hw->len = X86_BREAKPOINT_LEN_1; 306 } 307 308 return 0; 309 } 310 311 /* 312 * Validate the arch-specific HW Breakpoint register settings 313 */ 314 int hw_breakpoint_arch_parse(struct perf_event *bp, 315 const struct perf_event_attr *attr, 316 struct arch_hw_breakpoint *hw) 317 { 318 unsigned int align; 319 int ret; 320 321 322 ret = arch_build_bp_info(bp, attr, hw); 323 if (ret) 324 return ret; 325 326 switch (hw->len) { 327 case X86_BREAKPOINT_LEN_1: 328 align = 0; 329 if (hw->mask) 330 align = hw->mask; 331 break; 332 case X86_BREAKPOINT_LEN_2: 333 align = 1; 334 break; 335 case X86_BREAKPOINT_LEN_4: 336 align = 3; 337 break; 338 #ifdef CONFIG_X86_64 339 case X86_BREAKPOINT_LEN_8: 340 align = 7; 341 break; 342 #endif 343 default: 344 WARN_ON_ONCE(1); 345 return -EINVAL; 346 } 347 348 /* 349 * Check that the low-order bits of the address are appropriate 350 * for the alignment implied by len. 351 */ 352 if (hw->address & align) 353 return -EINVAL; 354 355 return 0; 356 } 357 358 /* 359 * Dump the debug register contents to the user. 360 * We can't dump our per cpu values because it 361 * may contain cpu wide breakpoint, something that 362 * doesn't belong to the current task. 363 * 364 * TODO: include non-ptrace user breakpoints (perf) 365 */ 366 void aout_dump_debugregs(struct user *dump) 367 { 368 int i; 369 int dr7 = 0; 370 struct perf_event *bp; 371 struct arch_hw_breakpoint *info; 372 struct thread_struct *thread = ¤t->thread; 373 374 for (i = 0; i < HBP_NUM; i++) { 375 bp = thread->ptrace_bps[i]; 376 377 if (bp && !bp->attr.disabled) { 378 dump->u_debugreg[i] = bp->attr.bp_addr; 379 info = counter_arch_bp(bp); 380 dr7 |= encode_dr7(i, info->len, info->type); 381 } else { 382 dump->u_debugreg[i] = 0; 383 } 384 } 385 386 dump->u_debugreg[4] = 0; 387 dump->u_debugreg[5] = 0; 388 dump->u_debugreg[6] = current->thread.debugreg6; 389 390 dump->u_debugreg[7] = dr7; 391 } 392 EXPORT_SYMBOL_GPL(aout_dump_debugregs); 393 394 /* 395 * Release the user breakpoints used by ptrace 396 */ 397 void flush_ptrace_hw_breakpoint(struct task_struct *tsk) 398 { 399 int i; 400 struct thread_struct *t = &tsk->thread; 401 402 for (i = 0; i < HBP_NUM; i++) { 403 unregister_hw_breakpoint(t->ptrace_bps[i]); 404 t->ptrace_bps[i] = NULL; 405 } 406 407 t->debugreg6 = 0; 408 t->ptrace_dr7 = 0; 409 } 410 411 void hw_breakpoint_restore(void) 412 { 413 set_debugreg(__this_cpu_read(cpu_debugreg[0]), 0); 414 set_debugreg(__this_cpu_read(cpu_debugreg[1]), 1); 415 set_debugreg(__this_cpu_read(cpu_debugreg[2]), 2); 416 set_debugreg(__this_cpu_read(cpu_debugreg[3]), 3); 417 set_debugreg(current->thread.debugreg6, 6); 418 set_debugreg(__this_cpu_read(cpu_dr7), 7); 419 } 420 EXPORT_SYMBOL_GPL(hw_breakpoint_restore); 421 422 /* 423 * Handle debug exception notifications. 424 * 425 * Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below. 426 * 427 * NOTIFY_DONE returned if one of the following conditions is true. 428 * i) When the causative address is from user-space and the exception 429 * is a valid one, i.e. not triggered as a result of lazy debug register 430 * switching 431 * ii) When there are more bits than trap<n> set in DR6 register (such 432 * as BD, BS or BT) indicating that more than one debug condition is 433 * met and requires some more action in do_debug(). 434 * 435 * NOTIFY_STOP returned for all other cases 436 * 437 */ 438 static int hw_breakpoint_handler(struct die_args *args) 439 { 440 int i, cpu, rc = NOTIFY_STOP; 441 struct perf_event *bp; 442 unsigned long dr7, dr6; 443 unsigned long *dr6_p; 444 445 /* The DR6 value is pointed by args->err */ 446 dr6_p = (unsigned long *)ERR_PTR(args->err); 447 dr6 = *dr6_p; 448 449 /* If it's a single step, TRAP bits are random */ 450 if (dr6 & DR_STEP) 451 return NOTIFY_DONE; 452 453 /* Do an early return if no trap bits are set in DR6 */ 454 if ((dr6 & DR_TRAP_BITS) == 0) 455 return NOTIFY_DONE; 456 457 get_debugreg(dr7, 7); 458 /* Disable breakpoints during exception handling */ 459 set_debugreg(0UL, 7); 460 /* 461 * Assert that local interrupts are disabled 462 * Reset the DRn bits in the virtualized register value. 463 * The ptrace trigger routine will add in whatever is needed. 464 */ 465 current->thread.debugreg6 &= ~DR_TRAP_BITS; 466 cpu = get_cpu(); 467 468 /* Handle all the breakpoints that were triggered */ 469 for (i = 0; i < HBP_NUM; ++i) { 470 if (likely(!(dr6 & (DR_TRAP0 << i)))) 471 continue; 472 473 /* 474 * The counter may be concurrently released but that can only 475 * occur from a call_rcu() path. We can then safely fetch 476 * the breakpoint, use its callback, touch its counter 477 * while we are in an rcu_read_lock() path. 478 */ 479 rcu_read_lock(); 480 481 bp = per_cpu(bp_per_reg[i], cpu); 482 /* 483 * Reset the 'i'th TRAP bit in dr6 to denote completion of 484 * exception handling 485 */ 486 (*dr6_p) &= ~(DR_TRAP0 << i); 487 /* 488 * bp can be NULL due to lazy debug register switching 489 * or due to concurrent perf counter removing. 490 */ 491 if (!bp) { 492 rcu_read_unlock(); 493 break; 494 } 495 496 perf_bp_event(bp, args->regs); 497 498 /* 499 * Set up resume flag to avoid breakpoint recursion when 500 * returning back to origin. 501 */ 502 if (bp->hw.info.type == X86_BREAKPOINT_EXECUTE) 503 args->regs->flags |= X86_EFLAGS_RF; 504 505 rcu_read_unlock(); 506 } 507 /* 508 * Further processing in do_debug() is needed for a) user-space 509 * breakpoints (to generate signals) and b) when the system has 510 * taken exception due to multiple causes 511 */ 512 if ((current->thread.debugreg6 & DR_TRAP_BITS) || 513 (dr6 & (~DR_TRAP_BITS))) 514 rc = NOTIFY_DONE; 515 516 set_debugreg(dr7, 7); 517 put_cpu(); 518 519 return rc; 520 } 521 522 /* 523 * Handle debug exception notifications. 524 */ 525 int hw_breakpoint_exceptions_notify( 526 struct notifier_block *unused, unsigned long val, void *data) 527 { 528 if (val != DIE_DEBUG) 529 return NOTIFY_DONE; 530 531 return hw_breakpoint_handler(data); 532 } 533 534 void hw_breakpoint_pmu_read(struct perf_event *bp) 535 { 536 /* TODO */ 537 } 538