1 /* 2 * This program is free software; you can redistribute it and/or modify 3 * it under the terms of the GNU General Public License as published by 4 * the Free Software Foundation; either version 2 of the License, or 5 * (at your option) any later version. 6 * 7 * This program is distributed in the hope that it will be useful, 8 * but WITHOUT ANY WARRANTY; without even the implied warranty of 9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 * GNU General Public License for more details. 11 * 12 * You should have received a copy of the GNU General Public License 13 * along with this program; if not, write to the Free Software 14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 15 * 16 * Copyright (C) 2007 Alan Stern 17 * Copyright (C) IBM Corporation, 2009 18 * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com> 19 * 20 * Thanks to Ingo Molnar for his many suggestions. 21 * 22 * Authors: Alan Stern <stern@rowland.harvard.edu> 23 * K.Prasad <prasad@linux.vnet.ibm.com> 24 * Frederic Weisbecker <fweisbec@gmail.com> 25 */ 26 27 /* 28 * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, 29 * using the CPU's debug registers. 30 * This file contains the arch-independent routines. 31 */ 32 33 #include <linux/irqflags.h> 34 #include <linux/kallsyms.h> 35 #include <linux/notifier.h> 36 #include <linux/kprobes.h> 37 #include <linux/kdebug.h> 38 #include <linux/kernel.h> 39 #include <linux/module.h> 40 #include <linux/percpu.h> 41 #include <linux/sched.h> 42 #include <linux/init.h> 43 #include <linux/slab.h> 44 #include <linux/list.h> 45 #include <linux/cpu.h> 46 #include <linux/smp.h> 47 48 #include <linux/hw_breakpoint.h> 49 50 51 /* 52 * Constraints data 53 */ 54 55 /* Number of pinned cpu breakpoints in a cpu */ 56 static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned[TYPE_MAX]); 57 58 /* Number of pinned task breakpoints in a cpu */ 59 static DEFINE_PER_CPU(unsigned int *, nr_task_bp_pinned[TYPE_MAX]); 60 61 /* Number of non-pinned cpu/task breakpoints in a cpu */ 62 static DEFINE_PER_CPU(unsigned int, nr_bp_flexible[TYPE_MAX]); 63 64 static int nr_slots[TYPE_MAX]; 65 66 /* Keep track of the breakpoints attached to tasks */ 67 static LIST_HEAD(bp_task_head); 68 69 static int constraints_initialized; 70 71 /* Gather the number of total pinned and un-pinned bp in a cpuset */ 72 struct bp_busy_slots { 73 unsigned int pinned; 74 unsigned int flexible; 75 }; 76 77 /* Serialize accesses to the above constraints */ 78 static DEFINE_MUTEX(nr_bp_mutex); 79 80 __weak int hw_breakpoint_weight(struct perf_event *bp) 81 { 82 return 1; 83 } 84 85 static inline enum bp_type_idx find_slot_idx(struct perf_event *bp) 86 { 87 if (bp->attr.bp_type & HW_BREAKPOINT_RW) 88 return TYPE_DATA; 89 90 return TYPE_INST; 91 } 92 93 /* 94 * Report the maximum number of pinned breakpoints a task 95 * have in this cpu 96 */ 97 static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type) 98 { 99 int i; 100 unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu); 101 102 for (i = nr_slots[type] - 1; i >= 0; i--) { 103 if (tsk_pinned[i] > 0) 104 return i + 1; 105 } 106 107 return 0; 108 } 109 110 /* 111 * Count the number of breakpoints of the same type and same task. 112 * The given event must be not on the list. 113 */ 114 static int task_bp_pinned(int cpu, struct perf_event *bp, enum bp_type_idx type) 115 { 116 struct task_struct *tsk = bp->hw.bp_target; 117 struct perf_event *iter; 118 int count = 0; 119 120 list_for_each_entry(iter, &bp_task_head, hw.bp_list) { 121 if (iter->hw.bp_target == tsk && 122 find_slot_idx(iter) == type && 123 cpu == iter->cpu) 124 count += hw_breakpoint_weight(iter); 125 } 126 127 return count; 128 } 129 130 /* 131 * Report the number of pinned/un-pinned breakpoints we have in 132 * a given cpu (cpu > -1) or in all of them (cpu = -1). 133 */ 134 static void 135 fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp, 136 enum bp_type_idx type) 137 { 138 int cpu = bp->cpu; 139 struct task_struct *tsk = bp->hw.bp_target; 140 141 if (cpu >= 0) { 142 slots->pinned = per_cpu(nr_cpu_bp_pinned[type], cpu); 143 if (!tsk) 144 slots->pinned += max_task_bp_pinned(cpu, type); 145 else 146 slots->pinned += task_bp_pinned(cpu, bp, type); 147 slots->flexible = per_cpu(nr_bp_flexible[type], cpu); 148 149 return; 150 } 151 152 for_each_online_cpu(cpu) { 153 unsigned int nr; 154 155 nr = per_cpu(nr_cpu_bp_pinned[type], cpu); 156 if (!tsk) 157 nr += max_task_bp_pinned(cpu, type); 158 else 159 nr += task_bp_pinned(cpu, bp, type); 160 161 if (nr > slots->pinned) 162 slots->pinned = nr; 163 164 nr = per_cpu(nr_bp_flexible[type], cpu); 165 166 if (nr > slots->flexible) 167 slots->flexible = nr; 168 } 169 } 170 171 /* 172 * For now, continue to consider flexible as pinned, until we can 173 * ensure no flexible event can ever be scheduled before a pinned event 174 * in a same cpu. 175 */ 176 static void 177 fetch_this_slot(struct bp_busy_slots *slots, int weight) 178 { 179 slots->pinned += weight; 180 } 181 182 /* 183 * Add a pinned breakpoint for the given task in our constraint table 184 */ 185 static void toggle_bp_task_slot(struct perf_event *bp, int cpu, bool enable, 186 enum bp_type_idx type, int weight) 187 { 188 unsigned int *tsk_pinned; 189 int old_count = 0; 190 int old_idx = 0; 191 int idx = 0; 192 193 old_count = task_bp_pinned(cpu, bp, type); 194 old_idx = old_count - 1; 195 idx = old_idx + weight; 196 197 /* tsk_pinned[n] is the number of tasks having n breakpoints */ 198 tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu); 199 if (enable) { 200 tsk_pinned[idx]++; 201 if (old_count > 0) 202 tsk_pinned[old_idx]--; 203 } else { 204 tsk_pinned[idx]--; 205 if (old_count > 0) 206 tsk_pinned[old_idx]++; 207 } 208 } 209 210 /* 211 * Add/remove the given breakpoint in our constraint table 212 */ 213 static void 214 toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type, 215 int weight) 216 { 217 int cpu = bp->cpu; 218 struct task_struct *tsk = bp->hw.bp_target; 219 220 /* Pinned counter cpu profiling */ 221 if (!tsk) { 222 223 if (enable) 224 per_cpu(nr_cpu_bp_pinned[type], bp->cpu) += weight; 225 else 226 per_cpu(nr_cpu_bp_pinned[type], bp->cpu) -= weight; 227 return; 228 } 229 230 /* Pinned counter task profiling */ 231 232 if (!enable) 233 list_del(&bp->hw.bp_list); 234 235 if (cpu >= 0) { 236 toggle_bp_task_slot(bp, cpu, enable, type, weight); 237 } else { 238 for_each_online_cpu(cpu) 239 toggle_bp_task_slot(bp, cpu, enable, type, weight); 240 } 241 242 if (enable) 243 list_add_tail(&bp->hw.bp_list, &bp_task_head); 244 } 245 246 /* 247 * Function to perform processor-specific cleanup during unregistration 248 */ 249 __weak void arch_unregister_hw_breakpoint(struct perf_event *bp) 250 { 251 /* 252 * A weak stub function here for those archs that don't define 253 * it inside arch/.../kernel/hw_breakpoint.c 254 */ 255 } 256 257 /* 258 * Contraints to check before allowing this new breakpoint counter: 259 * 260 * == Non-pinned counter == (Considered as pinned for now) 261 * 262 * - If attached to a single cpu, check: 263 * 264 * (per_cpu(nr_bp_flexible, cpu) || (per_cpu(nr_cpu_bp_pinned, cpu) 265 * + max(per_cpu(nr_task_bp_pinned, cpu)))) < HBP_NUM 266 * 267 * -> If there are already non-pinned counters in this cpu, it means 268 * there is already a free slot for them. 269 * Otherwise, we check that the maximum number of per task 270 * breakpoints (for this cpu) plus the number of per cpu breakpoint 271 * (for this cpu) doesn't cover every registers. 272 * 273 * - If attached to every cpus, check: 274 * 275 * (per_cpu(nr_bp_flexible, *) || (max(per_cpu(nr_cpu_bp_pinned, *)) 276 * + max(per_cpu(nr_task_bp_pinned, *)))) < HBP_NUM 277 * 278 * -> This is roughly the same, except we check the number of per cpu 279 * bp for every cpu and we keep the max one. Same for the per tasks 280 * breakpoints. 281 * 282 * 283 * == Pinned counter == 284 * 285 * - If attached to a single cpu, check: 286 * 287 * ((per_cpu(nr_bp_flexible, cpu) > 1) + per_cpu(nr_cpu_bp_pinned, cpu) 288 * + max(per_cpu(nr_task_bp_pinned, cpu))) < HBP_NUM 289 * 290 * -> Same checks as before. But now the nr_bp_flexible, if any, must keep 291 * one register at least (or they will never be fed). 292 * 293 * - If attached to every cpus, check: 294 * 295 * ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *)) 296 * + max(per_cpu(nr_task_bp_pinned, *))) < HBP_NUM 297 */ 298 static int __reserve_bp_slot(struct perf_event *bp) 299 { 300 struct bp_busy_slots slots = {0}; 301 enum bp_type_idx type; 302 int weight; 303 304 /* We couldn't initialize breakpoint constraints on boot */ 305 if (!constraints_initialized) 306 return -ENOMEM; 307 308 /* Basic checks */ 309 if (bp->attr.bp_type == HW_BREAKPOINT_EMPTY || 310 bp->attr.bp_type == HW_BREAKPOINT_INVALID) 311 return -EINVAL; 312 313 type = find_slot_idx(bp); 314 weight = hw_breakpoint_weight(bp); 315 316 fetch_bp_busy_slots(&slots, bp, type); 317 /* 318 * Simulate the addition of this breakpoint to the constraints 319 * and see the result. 320 */ 321 fetch_this_slot(&slots, weight); 322 323 /* Flexible counters need to keep at least one slot */ 324 if (slots.pinned + (!!slots.flexible) > nr_slots[type]) 325 return -ENOSPC; 326 327 toggle_bp_slot(bp, true, type, weight); 328 329 return 0; 330 } 331 332 int reserve_bp_slot(struct perf_event *bp) 333 { 334 int ret; 335 336 mutex_lock(&nr_bp_mutex); 337 338 ret = __reserve_bp_slot(bp); 339 340 mutex_unlock(&nr_bp_mutex); 341 342 return ret; 343 } 344 345 static void __release_bp_slot(struct perf_event *bp) 346 { 347 enum bp_type_idx type; 348 int weight; 349 350 type = find_slot_idx(bp); 351 weight = hw_breakpoint_weight(bp); 352 toggle_bp_slot(bp, false, type, weight); 353 } 354 355 void release_bp_slot(struct perf_event *bp) 356 { 357 mutex_lock(&nr_bp_mutex); 358 359 arch_unregister_hw_breakpoint(bp); 360 __release_bp_slot(bp); 361 362 mutex_unlock(&nr_bp_mutex); 363 } 364 365 /* 366 * Allow the kernel debugger to reserve breakpoint slots without 367 * taking a lock using the dbg_* variant of for the reserve and 368 * release breakpoint slots. 369 */ 370 int dbg_reserve_bp_slot(struct perf_event *bp) 371 { 372 if (mutex_is_locked(&nr_bp_mutex)) 373 return -1; 374 375 return __reserve_bp_slot(bp); 376 } 377 378 int dbg_release_bp_slot(struct perf_event *bp) 379 { 380 if (mutex_is_locked(&nr_bp_mutex)) 381 return -1; 382 383 __release_bp_slot(bp); 384 385 return 0; 386 } 387 388 static int validate_hw_breakpoint(struct perf_event *bp) 389 { 390 int ret; 391 392 ret = arch_validate_hwbkpt_settings(bp); 393 if (ret) 394 return ret; 395 396 if (arch_check_bp_in_kernelspace(bp)) { 397 if (bp->attr.exclude_kernel) 398 return -EINVAL; 399 /* 400 * Don't let unprivileged users set a breakpoint in the trap 401 * path to avoid trap recursion attacks. 402 */ 403 if (!capable(CAP_SYS_ADMIN)) 404 return -EPERM; 405 } 406 407 return 0; 408 } 409 410 int register_perf_hw_breakpoint(struct perf_event *bp) 411 { 412 int ret; 413 414 ret = reserve_bp_slot(bp); 415 if (ret) 416 return ret; 417 418 ret = validate_hw_breakpoint(bp); 419 420 /* if arch_validate_hwbkpt_settings() fails then release bp slot */ 421 if (ret) 422 release_bp_slot(bp); 423 424 return ret; 425 } 426 427 /** 428 * register_user_hw_breakpoint - register a hardware breakpoint for user space 429 * @attr: breakpoint attributes 430 * @triggered: callback to trigger when we hit the breakpoint 431 * @tsk: pointer to 'task_struct' of the process to which the address belongs 432 */ 433 struct perf_event * 434 register_user_hw_breakpoint(struct perf_event_attr *attr, 435 perf_overflow_handler_t triggered, 436 void *context, 437 struct task_struct *tsk) 438 { 439 return perf_event_create_kernel_counter(attr, -1, tsk, triggered, 440 context); 441 } 442 EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); 443 444 /** 445 * modify_user_hw_breakpoint - modify a user-space hardware breakpoint 446 * @bp: the breakpoint structure to modify 447 * @attr: new breakpoint attributes 448 * @triggered: callback to trigger when we hit the breakpoint 449 * @tsk: pointer to 'task_struct' of the process to which the address belongs 450 */ 451 int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr) 452 { 453 u64 old_addr = bp->attr.bp_addr; 454 u64 old_len = bp->attr.bp_len; 455 int old_type = bp->attr.bp_type; 456 int err = 0; 457 458 /* 459 * modify_user_hw_breakpoint can be invoked with IRQs disabled and hence it 460 * will not be possible to raise IPIs that invoke __perf_event_disable. 461 * So call the function directly after making sure we are targeting the 462 * current task. 463 */ 464 if (irqs_disabled() && bp->ctx && bp->ctx->task == current) 465 __perf_event_disable(bp); 466 else 467 perf_event_disable(bp); 468 469 bp->attr.bp_addr = attr->bp_addr; 470 bp->attr.bp_type = attr->bp_type; 471 bp->attr.bp_len = attr->bp_len; 472 473 if (attr->disabled) 474 goto end; 475 476 err = validate_hw_breakpoint(bp); 477 if (!err) 478 perf_event_enable(bp); 479 480 if (err) { 481 bp->attr.bp_addr = old_addr; 482 bp->attr.bp_type = old_type; 483 bp->attr.bp_len = old_len; 484 if (!bp->attr.disabled) 485 perf_event_enable(bp); 486 487 return err; 488 } 489 490 end: 491 bp->attr.disabled = attr->disabled; 492 493 return 0; 494 } 495 EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); 496 497 /** 498 * unregister_hw_breakpoint - unregister a user-space hardware breakpoint 499 * @bp: the breakpoint structure to unregister 500 */ 501 void unregister_hw_breakpoint(struct perf_event *bp) 502 { 503 if (!bp) 504 return; 505 perf_event_release_kernel(bp); 506 } 507 EXPORT_SYMBOL_GPL(unregister_hw_breakpoint); 508 509 /** 510 * register_wide_hw_breakpoint - register a wide breakpoint in the kernel 511 * @attr: breakpoint attributes 512 * @triggered: callback to trigger when we hit the breakpoint 513 * 514 * @return a set of per_cpu pointers to perf events 515 */ 516 struct perf_event * __percpu * 517 register_wide_hw_breakpoint(struct perf_event_attr *attr, 518 perf_overflow_handler_t triggered, 519 void *context) 520 { 521 struct perf_event * __percpu *cpu_events, **pevent, *bp; 522 long err; 523 int cpu; 524 525 cpu_events = alloc_percpu(typeof(*cpu_events)); 526 if (!cpu_events) 527 return (void __percpu __force *)ERR_PTR(-ENOMEM); 528 529 get_online_cpus(); 530 for_each_online_cpu(cpu) { 531 pevent = per_cpu_ptr(cpu_events, cpu); 532 bp = perf_event_create_kernel_counter(attr, cpu, NULL, 533 triggered, context); 534 535 *pevent = bp; 536 537 if (IS_ERR(bp)) { 538 err = PTR_ERR(bp); 539 goto fail; 540 } 541 } 542 put_online_cpus(); 543 544 return cpu_events; 545 546 fail: 547 for_each_online_cpu(cpu) { 548 pevent = per_cpu_ptr(cpu_events, cpu); 549 if (IS_ERR(*pevent)) 550 break; 551 unregister_hw_breakpoint(*pevent); 552 } 553 put_online_cpus(); 554 555 free_percpu(cpu_events); 556 return (void __percpu __force *)ERR_PTR(err); 557 } 558 EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint); 559 560 /** 561 * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel 562 * @cpu_events: the per cpu set of events to unregister 563 */ 564 void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events) 565 { 566 int cpu; 567 struct perf_event **pevent; 568 569 for_each_possible_cpu(cpu) { 570 pevent = per_cpu_ptr(cpu_events, cpu); 571 unregister_hw_breakpoint(*pevent); 572 } 573 free_percpu(cpu_events); 574 } 575 EXPORT_SYMBOL_GPL(unregister_wide_hw_breakpoint); 576 577 static struct notifier_block hw_breakpoint_exceptions_nb = { 578 .notifier_call = hw_breakpoint_exceptions_notify, 579 /* we need to be notified first */ 580 .priority = 0x7fffffff 581 }; 582 583 static void bp_perf_event_destroy(struct perf_event *event) 584 { 585 release_bp_slot(event); 586 } 587 588 static int hw_breakpoint_event_init(struct perf_event *bp) 589 { 590 int err; 591 592 if (bp->attr.type != PERF_TYPE_BREAKPOINT) 593 return -ENOENT; 594 595 /* 596 * no branch sampling for breakpoint events 597 */ 598 if (has_branch_stack(bp)) 599 return -EOPNOTSUPP; 600 601 err = register_perf_hw_breakpoint(bp); 602 if (err) 603 return err; 604 605 bp->destroy = bp_perf_event_destroy; 606 607 return 0; 608 } 609 610 static int hw_breakpoint_add(struct perf_event *bp, int flags) 611 { 612 if (!(flags & PERF_EF_START)) 613 bp->hw.state = PERF_HES_STOPPED; 614 615 return arch_install_hw_breakpoint(bp); 616 } 617 618 static void hw_breakpoint_del(struct perf_event *bp, int flags) 619 { 620 arch_uninstall_hw_breakpoint(bp); 621 } 622 623 static void hw_breakpoint_start(struct perf_event *bp, int flags) 624 { 625 bp->hw.state = 0; 626 } 627 628 static void hw_breakpoint_stop(struct perf_event *bp, int flags) 629 { 630 bp->hw.state = PERF_HES_STOPPED; 631 } 632 633 static int hw_breakpoint_event_idx(struct perf_event *bp) 634 { 635 return 0; 636 } 637 638 static struct pmu perf_breakpoint = { 639 .task_ctx_nr = perf_sw_context, /* could eventually get its own */ 640 641 .event_init = hw_breakpoint_event_init, 642 .add = hw_breakpoint_add, 643 .del = hw_breakpoint_del, 644 .start = hw_breakpoint_start, 645 .stop = hw_breakpoint_stop, 646 .read = hw_breakpoint_pmu_read, 647 648 .event_idx = hw_breakpoint_event_idx, 649 }; 650 651 int __init init_hw_breakpoint(void) 652 { 653 unsigned int **task_bp_pinned; 654 int cpu, err_cpu; 655 int i; 656 657 for (i = 0; i < TYPE_MAX; i++) 658 nr_slots[i] = hw_breakpoint_slots(i); 659 660 for_each_possible_cpu(cpu) { 661 for (i = 0; i < TYPE_MAX; i++) { 662 task_bp_pinned = &per_cpu(nr_task_bp_pinned[i], cpu); 663 *task_bp_pinned = kzalloc(sizeof(int) * nr_slots[i], 664 GFP_KERNEL); 665 if (!*task_bp_pinned) 666 goto err_alloc; 667 } 668 } 669 670 constraints_initialized = 1; 671 672 perf_pmu_register(&perf_breakpoint, "breakpoint", PERF_TYPE_BREAKPOINT); 673 674 return register_die_notifier(&hw_breakpoint_exceptions_nb); 675 676 err_alloc: 677 for_each_possible_cpu(err_cpu) { 678 for (i = 0; i < TYPE_MAX; i++) 679 kfree(per_cpu(nr_task_bp_pinned[i], err_cpu)); 680 if (err_cpu == cpu) 681 break; 682 } 683 684 return -ENOMEM; 685 } 686 687 688