1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Kernel Probes (KProbes) 4 * kernel/kprobes.c 5 * 6 * Copyright (C) IBM Corporation, 2002, 2004 7 * 8 * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel 9 * Probes initial implementation (includes suggestions from 10 * Rusty Russell). 11 * 2004-Aug Updated by Prasanna S Panchamukhi <prasanna@in.ibm.com> with 12 * hlists and exceptions notifier as suggested by Andi Kleen. 13 * 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes 14 * interface to access function arguments. 15 * 2004-Sep Prasanna S Panchamukhi <prasanna@in.ibm.com> Changed Kprobes 16 * exceptions notifier to be first on the priority list. 17 * 2005-May Hien Nguyen <hien@us.ibm.com>, Jim Keniston 18 * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi 19 * <prasanna@in.ibm.com> added function-return probes. 20 */ 21 #include <linux/kprobes.h> 22 #include <linux/hash.h> 23 #include <linux/init.h> 24 #include <linux/slab.h> 25 #include <linux/stddef.h> 26 #include <linux/export.h> 27 #include <linux/moduleloader.h> 28 #include <linux/kallsyms.h> 29 #include <linux/freezer.h> 30 #include <linux/seq_file.h> 31 #include <linux/debugfs.h> 32 #include <linux/sysctl.h> 33 #include <linux/kdebug.h> 34 #include <linux/memory.h> 35 #include <linux/ftrace.h> 36 #include <linux/cpu.h> 37 #include <linux/jump_label.h> 38 #include <linux/perf_event.h> 39 40 #include <asm/sections.h> 41 #include <asm/cacheflush.h> 42 #include <asm/errno.h> 43 #include <linux/uaccess.h> 44 45 #define KPROBE_HASH_BITS 6 46 #define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS) 47 48 49 static int kprobes_initialized; 50 /* kprobe_table can be accessed by 51 * - Normal hlist traversal and RCU add/del under kprobe_mutex is held. 52 * Or 53 * - RCU hlist traversal under disabling preempt (breakpoint handlers) 54 */ 55 static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; 56 57 /* NOTE: change this value only with kprobe_mutex held */ 58 static bool kprobes_all_disarmed; 59 60 /* This protects kprobe_table and optimizing_list */ 61 static DEFINE_MUTEX(kprobe_mutex); 62 static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; 63 64 kprobe_opcode_t * __weak kprobe_lookup_name(const char *name, 65 unsigned int __unused) 66 { 67 return ((kprobe_opcode_t *)(kallsyms_lookup_name(name))); 68 } 69 70 /* Blacklist -- list of struct kprobe_blacklist_entry */ 71 static LIST_HEAD(kprobe_blacklist); 72 73 #ifdef __ARCH_WANT_KPROBES_INSN_SLOT 74 /* 75 * kprobe->ainsn.insn points to the copy of the instruction to be 76 * single-stepped. x86_64, POWER4 and above have no-exec support and 77 * stepping on the instruction on a vmalloced/kmalloced/data page 78 * is a recipe for disaster 79 */ 80 struct kprobe_insn_page { 81 struct list_head list; 82 kprobe_opcode_t *insns; /* Page of instruction slots */ 83 struct kprobe_insn_cache *cache; 84 int nused; 85 int ngarbage; 86 char slot_used[]; 87 }; 88 89 #define KPROBE_INSN_PAGE_SIZE(slots) \ 90 (offsetof(struct kprobe_insn_page, slot_used) + \ 91 (sizeof(char) * (slots))) 92 93 static int slots_per_page(struct kprobe_insn_cache *c) 94 { 95 return PAGE_SIZE/(c->insn_size * sizeof(kprobe_opcode_t)); 96 } 97 98 enum kprobe_slot_state { 99 SLOT_CLEAN = 0, 100 SLOT_DIRTY = 1, 101 SLOT_USED = 2, 102 }; 103 104 void __weak *alloc_insn_page(void) 105 { 106 return module_alloc(PAGE_SIZE); 107 } 108 109 void __weak free_insn_page(void *page) 110 { 111 module_memfree(page); 112 } 113 114 struct kprobe_insn_cache kprobe_insn_slots = { 115 .mutex = __MUTEX_INITIALIZER(kprobe_insn_slots.mutex), 116 .alloc = alloc_insn_page, 117 .free = free_insn_page, 118 .sym = KPROBE_INSN_PAGE_SYM, 119 .pages = LIST_HEAD_INIT(kprobe_insn_slots.pages), 120 .insn_size = MAX_INSN_SIZE, 121 .nr_garbage = 0, 122 }; 123 static int collect_garbage_slots(struct kprobe_insn_cache *c); 124 125 /** 126 * __get_insn_slot() - Find a slot on an executable page for an instruction. 127 * We allocate an executable page if there's no room on existing ones. 128 */ 129 kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c) 130 { 131 struct kprobe_insn_page *kip; 132 kprobe_opcode_t *slot = NULL; 133 134 /* Since the slot array is not protected by rcu, we need a mutex */ 135 mutex_lock(&c->mutex); 136 retry: 137 rcu_read_lock(); 138 list_for_each_entry_rcu(kip, &c->pages, list) { 139 if (kip->nused < slots_per_page(c)) { 140 int i; 141 for (i = 0; i < slots_per_page(c); i++) { 142 if (kip->slot_used[i] == SLOT_CLEAN) { 143 kip->slot_used[i] = SLOT_USED; 144 kip->nused++; 145 slot = kip->insns + (i * c->insn_size); 146 rcu_read_unlock(); 147 goto out; 148 } 149 } 150 /* kip->nused is broken. Fix it. */ 151 kip->nused = slots_per_page(c); 152 WARN_ON(1); 153 } 154 } 155 rcu_read_unlock(); 156 157 /* If there are any garbage slots, collect it and try again. */ 158 if (c->nr_garbage && collect_garbage_slots(c) == 0) 159 goto retry; 160 161 /* All out of space. Need to allocate a new page. */ 162 kip = kmalloc(KPROBE_INSN_PAGE_SIZE(slots_per_page(c)), GFP_KERNEL); 163 if (!kip) 164 goto out; 165 166 /* 167 * Use module_alloc so this page is within +/- 2GB of where the 168 * kernel image and loaded module images reside. This is required 169 * so x86_64 can correctly handle the %rip-relative fixups. 170 */ 171 kip->insns = c->alloc(); 172 if (!kip->insns) { 173 kfree(kip); 174 goto out; 175 } 176 INIT_LIST_HEAD(&kip->list); 177 memset(kip->slot_used, SLOT_CLEAN, slots_per_page(c)); 178 kip->slot_used[0] = SLOT_USED; 179 kip->nused = 1; 180 kip->ngarbage = 0; 181 kip->cache = c; 182 list_add_rcu(&kip->list, &c->pages); 183 slot = kip->insns; 184 185 /* Record the perf ksymbol register event after adding the page */ 186 perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_OOL, (unsigned long)kip->insns, 187 PAGE_SIZE, false, c->sym); 188 out: 189 mutex_unlock(&c->mutex); 190 return slot; 191 } 192 193 /* Return 1 if all garbages are collected, otherwise 0. */ 194 static int collect_one_slot(struct kprobe_insn_page *kip, int idx) 195 { 196 kip->slot_used[idx] = SLOT_CLEAN; 197 kip->nused--; 198 if (kip->nused == 0) { 199 /* 200 * Page is no longer in use. Free it unless 201 * it's the last one. We keep the last one 202 * so as not to have to set it up again the 203 * next time somebody inserts a probe. 204 */ 205 if (!list_is_singular(&kip->list)) { 206 /* 207 * Record perf ksymbol unregister event before removing 208 * the page. 209 */ 210 perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_OOL, 211 (unsigned long)kip->insns, PAGE_SIZE, true, 212 kip->cache->sym); 213 list_del_rcu(&kip->list); 214 synchronize_rcu(); 215 kip->cache->free(kip->insns); 216 kfree(kip); 217 } 218 return 1; 219 } 220 return 0; 221 } 222 223 static int collect_garbage_slots(struct kprobe_insn_cache *c) 224 { 225 struct kprobe_insn_page *kip, *next; 226 227 /* Ensure no-one is interrupted on the garbages */ 228 synchronize_rcu(); 229 230 list_for_each_entry_safe(kip, next, &c->pages, list) { 231 int i; 232 if (kip->ngarbage == 0) 233 continue; 234 kip->ngarbage = 0; /* we will collect all garbages */ 235 for (i = 0; i < slots_per_page(c); i++) { 236 if (kip->slot_used[i] == SLOT_DIRTY && collect_one_slot(kip, i)) 237 break; 238 } 239 } 240 c->nr_garbage = 0; 241 return 0; 242 } 243 244 void __free_insn_slot(struct kprobe_insn_cache *c, 245 kprobe_opcode_t *slot, int dirty) 246 { 247 struct kprobe_insn_page *kip; 248 long idx; 249 250 mutex_lock(&c->mutex); 251 rcu_read_lock(); 252 list_for_each_entry_rcu(kip, &c->pages, list) { 253 idx = ((long)slot - (long)kip->insns) / 254 (c->insn_size * sizeof(kprobe_opcode_t)); 255 if (idx >= 0 && idx < slots_per_page(c)) 256 goto out; 257 } 258 /* Could not find this slot. */ 259 WARN_ON(1); 260 kip = NULL; 261 out: 262 rcu_read_unlock(); 263 /* Mark and sweep: this may sleep */ 264 if (kip) { 265 /* Check double free */ 266 WARN_ON(kip->slot_used[idx] != SLOT_USED); 267 if (dirty) { 268 kip->slot_used[idx] = SLOT_DIRTY; 269 kip->ngarbage++; 270 if (++c->nr_garbage > slots_per_page(c)) 271 collect_garbage_slots(c); 272 } else { 273 collect_one_slot(kip, idx); 274 } 275 } 276 mutex_unlock(&c->mutex); 277 } 278 279 /* 280 * Check given address is on the page of kprobe instruction slots. 281 * This will be used for checking whether the address on a stack 282 * is on a text area or not. 283 */ 284 bool __is_insn_slot_addr(struct kprobe_insn_cache *c, unsigned long addr) 285 { 286 struct kprobe_insn_page *kip; 287 bool ret = false; 288 289 rcu_read_lock(); 290 list_for_each_entry_rcu(kip, &c->pages, list) { 291 if (addr >= (unsigned long)kip->insns && 292 addr < (unsigned long)kip->insns + PAGE_SIZE) { 293 ret = true; 294 break; 295 } 296 } 297 rcu_read_unlock(); 298 299 return ret; 300 } 301 302 int kprobe_cache_get_kallsym(struct kprobe_insn_cache *c, unsigned int *symnum, 303 unsigned long *value, char *type, char *sym) 304 { 305 struct kprobe_insn_page *kip; 306 int ret = -ERANGE; 307 308 rcu_read_lock(); 309 list_for_each_entry_rcu(kip, &c->pages, list) { 310 if ((*symnum)--) 311 continue; 312 strlcpy(sym, c->sym, KSYM_NAME_LEN); 313 *type = 't'; 314 *value = (unsigned long)kip->insns; 315 ret = 0; 316 break; 317 } 318 rcu_read_unlock(); 319 320 return ret; 321 } 322 323 #ifdef CONFIG_OPTPROBES 324 /* For optimized_kprobe buffer */ 325 struct kprobe_insn_cache kprobe_optinsn_slots = { 326 .mutex = __MUTEX_INITIALIZER(kprobe_optinsn_slots.mutex), 327 .alloc = alloc_insn_page, 328 .free = free_insn_page, 329 .sym = KPROBE_OPTINSN_PAGE_SYM, 330 .pages = LIST_HEAD_INIT(kprobe_optinsn_slots.pages), 331 /* .insn_size is initialized later */ 332 .nr_garbage = 0, 333 }; 334 #endif 335 #endif 336 337 /* We have preemption disabled.. so it is safe to use __ versions */ 338 static inline void set_kprobe_instance(struct kprobe *kp) 339 { 340 __this_cpu_write(kprobe_instance, kp); 341 } 342 343 static inline void reset_kprobe_instance(void) 344 { 345 __this_cpu_write(kprobe_instance, NULL); 346 } 347 348 /* 349 * This routine is called either: 350 * - under the kprobe_mutex - during kprobe_[un]register() 351 * OR 352 * - with preemption disabled - from arch/xxx/kernel/kprobes.c 353 */ 354 struct kprobe *get_kprobe(void *addr) 355 { 356 struct hlist_head *head; 357 struct kprobe *p; 358 359 head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)]; 360 hlist_for_each_entry_rcu(p, head, hlist, 361 lockdep_is_held(&kprobe_mutex)) { 362 if (p->addr == addr) 363 return p; 364 } 365 366 return NULL; 367 } 368 NOKPROBE_SYMBOL(get_kprobe); 369 370 static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs); 371 372 /* Return true if the kprobe is an aggregator */ 373 static inline int kprobe_aggrprobe(struct kprobe *p) 374 { 375 return p->pre_handler == aggr_pre_handler; 376 } 377 378 /* Return true(!0) if the kprobe is unused */ 379 static inline int kprobe_unused(struct kprobe *p) 380 { 381 return kprobe_aggrprobe(p) && kprobe_disabled(p) && 382 list_empty(&p->list); 383 } 384 385 /* 386 * Keep all fields in the kprobe consistent 387 */ 388 static inline void copy_kprobe(struct kprobe *ap, struct kprobe *p) 389 { 390 memcpy(&p->opcode, &ap->opcode, sizeof(kprobe_opcode_t)); 391 memcpy(&p->ainsn, &ap->ainsn, sizeof(struct arch_specific_insn)); 392 } 393 394 #ifdef CONFIG_OPTPROBES 395 /* NOTE: change this value only with kprobe_mutex held */ 396 static bool kprobes_allow_optimization; 397 398 /* 399 * Call all pre_handler on the list, but ignores its return value. 400 * This must be called from arch-dep optimized caller. 401 */ 402 void opt_pre_handler(struct kprobe *p, struct pt_regs *regs) 403 { 404 struct kprobe *kp; 405 406 list_for_each_entry_rcu(kp, &p->list, list) { 407 if (kp->pre_handler && likely(!kprobe_disabled(kp))) { 408 set_kprobe_instance(kp); 409 kp->pre_handler(kp, regs); 410 } 411 reset_kprobe_instance(); 412 } 413 } 414 NOKPROBE_SYMBOL(opt_pre_handler); 415 416 /* Free optimized instructions and optimized_kprobe */ 417 static void free_aggr_kprobe(struct kprobe *p) 418 { 419 struct optimized_kprobe *op; 420 421 op = container_of(p, struct optimized_kprobe, kp); 422 arch_remove_optimized_kprobe(op); 423 arch_remove_kprobe(p); 424 kfree(op); 425 } 426 427 /* Return true(!0) if the kprobe is ready for optimization. */ 428 static inline int kprobe_optready(struct kprobe *p) 429 { 430 struct optimized_kprobe *op; 431 432 if (kprobe_aggrprobe(p)) { 433 op = container_of(p, struct optimized_kprobe, kp); 434 return arch_prepared_optinsn(&op->optinsn); 435 } 436 437 return 0; 438 } 439 440 /* Return true(!0) if the kprobe is disarmed. Note: p must be on hash list */ 441 static inline int kprobe_disarmed(struct kprobe *p) 442 { 443 struct optimized_kprobe *op; 444 445 /* If kprobe is not aggr/opt probe, just return kprobe is disabled */ 446 if (!kprobe_aggrprobe(p)) 447 return kprobe_disabled(p); 448 449 op = container_of(p, struct optimized_kprobe, kp); 450 451 return kprobe_disabled(p) && list_empty(&op->list); 452 } 453 454 /* Return true(!0) if the probe is queued on (un)optimizing lists */ 455 static int kprobe_queued(struct kprobe *p) 456 { 457 struct optimized_kprobe *op; 458 459 if (kprobe_aggrprobe(p)) { 460 op = container_of(p, struct optimized_kprobe, kp); 461 if (!list_empty(&op->list)) 462 return 1; 463 } 464 return 0; 465 } 466 467 /* 468 * Return an optimized kprobe whose optimizing code replaces 469 * instructions including addr (exclude breakpoint). 470 */ 471 static struct kprobe *get_optimized_kprobe(unsigned long addr) 472 { 473 int i; 474 struct kprobe *p = NULL; 475 struct optimized_kprobe *op; 476 477 /* Don't check i == 0, since that is a breakpoint case. */ 478 for (i = 1; !p && i < MAX_OPTIMIZED_LENGTH; i++) 479 p = get_kprobe((void *)(addr - i)); 480 481 if (p && kprobe_optready(p)) { 482 op = container_of(p, struct optimized_kprobe, kp); 483 if (arch_within_optimized_kprobe(op, addr)) 484 return p; 485 } 486 487 return NULL; 488 } 489 490 /* Optimization staging list, protected by kprobe_mutex */ 491 static LIST_HEAD(optimizing_list); 492 static LIST_HEAD(unoptimizing_list); 493 static LIST_HEAD(freeing_list); 494 495 static void kprobe_optimizer(struct work_struct *work); 496 static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer); 497 #define OPTIMIZE_DELAY 5 498 499 /* 500 * Optimize (replace a breakpoint with a jump) kprobes listed on 501 * optimizing_list. 502 */ 503 static void do_optimize_kprobes(void) 504 { 505 lockdep_assert_held(&text_mutex); 506 /* 507 * The optimization/unoptimization refers online_cpus via 508 * stop_machine() and cpu-hotplug modifies online_cpus. 509 * And same time, text_mutex will be held in cpu-hotplug and here. 510 * This combination can cause a deadlock (cpu-hotplug try to lock 511 * text_mutex but stop_machine can not be done because online_cpus 512 * has been changed) 513 * To avoid this deadlock, caller must have locked cpu hotplug 514 * for preventing cpu-hotplug outside of text_mutex locking. 515 */ 516 lockdep_assert_cpus_held(); 517 518 /* Optimization never be done when disarmed */ 519 if (kprobes_all_disarmed || !kprobes_allow_optimization || 520 list_empty(&optimizing_list)) 521 return; 522 523 arch_optimize_kprobes(&optimizing_list); 524 } 525 526 /* 527 * Unoptimize (replace a jump with a breakpoint and remove the breakpoint 528 * if need) kprobes listed on unoptimizing_list. 529 */ 530 static void do_unoptimize_kprobes(void) 531 { 532 struct optimized_kprobe *op, *tmp; 533 534 lockdep_assert_held(&text_mutex); 535 /* See comment in do_optimize_kprobes() */ 536 lockdep_assert_cpus_held(); 537 538 /* Unoptimization must be done anytime */ 539 if (list_empty(&unoptimizing_list)) 540 return; 541 542 arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list); 543 /* Loop free_list for disarming */ 544 list_for_each_entry_safe(op, tmp, &freeing_list, list) { 545 /* Switching from detour code to origin */ 546 op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; 547 /* Disarm probes if marked disabled */ 548 if (kprobe_disabled(&op->kp)) 549 arch_disarm_kprobe(&op->kp); 550 if (kprobe_unused(&op->kp)) { 551 /* 552 * Remove unused probes from hash list. After waiting 553 * for synchronization, these probes are reclaimed. 554 * (reclaiming is done by do_free_cleaned_kprobes.) 555 */ 556 hlist_del_rcu(&op->kp.hlist); 557 } else 558 list_del_init(&op->list); 559 } 560 } 561 562 /* Reclaim all kprobes on the free_list */ 563 static void do_free_cleaned_kprobes(void) 564 { 565 struct optimized_kprobe *op, *tmp; 566 567 list_for_each_entry_safe(op, tmp, &freeing_list, list) { 568 list_del_init(&op->list); 569 if (WARN_ON_ONCE(!kprobe_unused(&op->kp))) { 570 /* 571 * This must not happen, but if there is a kprobe 572 * still in use, keep it on kprobes hash list. 573 */ 574 continue; 575 } 576 free_aggr_kprobe(&op->kp); 577 } 578 } 579 580 /* Start optimizer after OPTIMIZE_DELAY passed */ 581 static void kick_kprobe_optimizer(void) 582 { 583 schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY); 584 } 585 586 /* Kprobe jump optimizer */ 587 static void kprobe_optimizer(struct work_struct *work) 588 { 589 mutex_lock(&kprobe_mutex); 590 cpus_read_lock(); 591 mutex_lock(&text_mutex); 592 593 /* 594 * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed) 595 * kprobes before waiting for quiesence period. 596 */ 597 do_unoptimize_kprobes(); 598 599 /* 600 * Step 2: Wait for quiesence period to ensure all potentially 601 * preempted tasks to have normally scheduled. Because optprobe 602 * may modify multiple instructions, there is a chance that Nth 603 * instruction is preempted. In that case, such tasks can return 604 * to 2nd-Nth byte of jump instruction. This wait is for avoiding it. 605 * Note that on non-preemptive kernel, this is transparently converted 606 * to synchronoze_sched() to wait for all interrupts to have completed. 607 */ 608 synchronize_rcu_tasks(); 609 610 /* Step 3: Optimize kprobes after quiesence period */ 611 do_optimize_kprobes(); 612 613 /* Step 4: Free cleaned kprobes after quiesence period */ 614 do_free_cleaned_kprobes(); 615 616 mutex_unlock(&text_mutex); 617 cpus_read_unlock(); 618 619 /* Step 5: Kick optimizer again if needed */ 620 if (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list)) 621 kick_kprobe_optimizer(); 622 623 mutex_unlock(&kprobe_mutex); 624 } 625 626 /* Wait for completing optimization and unoptimization */ 627 void wait_for_kprobe_optimizer(void) 628 { 629 mutex_lock(&kprobe_mutex); 630 631 while (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list)) { 632 mutex_unlock(&kprobe_mutex); 633 634 /* this will also make optimizing_work execute immmediately */ 635 flush_delayed_work(&optimizing_work); 636 /* @optimizing_work might not have been queued yet, relax */ 637 cpu_relax(); 638 639 mutex_lock(&kprobe_mutex); 640 } 641 642 mutex_unlock(&kprobe_mutex); 643 } 644 645 static bool optprobe_queued_unopt(struct optimized_kprobe *op) 646 { 647 struct optimized_kprobe *_op; 648 649 list_for_each_entry(_op, &unoptimizing_list, list) { 650 if (op == _op) 651 return true; 652 } 653 654 return false; 655 } 656 657 /* Optimize kprobe if p is ready to be optimized */ 658 static void optimize_kprobe(struct kprobe *p) 659 { 660 struct optimized_kprobe *op; 661 662 /* Check if the kprobe is disabled or not ready for optimization. */ 663 if (!kprobe_optready(p) || !kprobes_allow_optimization || 664 (kprobe_disabled(p) || kprobes_all_disarmed)) 665 return; 666 667 /* kprobes with post_handler can not be optimized */ 668 if (p->post_handler) 669 return; 670 671 op = container_of(p, struct optimized_kprobe, kp); 672 673 /* Check there is no other kprobes at the optimized instructions */ 674 if (arch_check_optimized_kprobe(op) < 0) 675 return; 676 677 /* Check if it is already optimized. */ 678 if (op->kp.flags & KPROBE_FLAG_OPTIMIZED) { 679 if (optprobe_queued_unopt(op)) { 680 /* This is under unoptimizing. Just dequeue the probe */ 681 list_del_init(&op->list); 682 } 683 return; 684 } 685 op->kp.flags |= KPROBE_FLAG_OPTIMIZED; 686 687 /* On unoptimizing/optimizing_list, op must have OPTIMIZED flag */ 688 if (WARN_ON_ONCE(!list_empty(&op->list))) 689 return; 690 691 list_add(&op->list, &optimizing_list); 692 kick_kprobe_optimizer(); 693 } 694 695 /* Short cut to direct unoptimizing */ 696 static void force_unoptimize_kprobe(struct optimized_kprobe *op) 697 { 698 lockdep_assert_cpus_held(); 699 arch_unoptimize_kprobe(op); 700 op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; 701 } 702 703 /* Unoptimize a kprobe if p is optimized */ 704 static void unoptimize_kprobe(struct kprobe *p, bool force) 705 { 706 struct optimized_kprobe *op; 707 708 if (!kprobe_aggrprobe(p) || kprobe_disarmed(p)) 709 return; /* This is not an optprobe nor optimized */ 710 711 op = container_of(p, struct optimized_kprobe, kp); 712 if (!kprobe_optimized(p)) 713 return; 714 715 if (!list_empty(&op->list)) { 716 if (optprobe_queued_unopt(op)) { 717 /* Queued in unoptimizing queue */ 718 if (force) { 719 /* 720 * Forcibly unoptimize the kprobe here, and queue it 721 * in the freeing list for release afterwards. 722 */ 723 force_unoptimize_kprobe(op); 724 list_move(&op->list, &freeing_list); 725 } 726 } else { 727 /* Dequeue from the optimizing queue */ 728 list_del_init(&op->list); 729 op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; 730 } 731 return; 732 } 733 734 /* Optimized kprobe case */ 735 if (force) { 736 /* Forcibly update the code: this is a special case */ 737 force_unoptimize_kprobe(op); 738 } else { 739 list_add(&op->list, &unoptimizing_list); 740 kick_kprobe_optimizer(); 741 } 742 } 743 744 /* Cancel unoptimizing for reusing */ 745 static int reuse_unused_kprobe(struct kprobe *ap) 746 { 747 struct optimized_kprobe *op; 748 749 /* 750 * Unused kprobe MUST be on the way of delayed unoptimizing (means 751 * there is still a relative jump) and disabled. 752 */ 753 op = container_of(ap, struct optimized_kprobe, kp); 754 WARN_ON_ONCE(list_empty(&op->list)); 755 /* Enable the probe again */ 756 ap->flags &= ~KPROBE_FLAG_DISABLED; 757 /* Optimize it again (remove from op->list) */ 758 if (!kprobe_optready(ap)) 759 return -EINVAL; 760 761 optimize_kprobe(ap); 762 return 0; 763 } 764 765 /* Remove optimized instructions */ 766 static void kill_optimized_kprobe(struct kprobe *p) 767 { 768 struct optimized_kprobe *op; 769 770 op = container_of(p, struct optimized_kprobe, kp); 771 if (!list_empty(&op->list)) 772 /* Dequeue from the (un)optimization queue */ 773 list_del_init(&op->list); 774 op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; 775 776 if (kprobe_unused(p)) { 777 /* Enqueue if it is unused */ 778 list_add(&op->list, &freeing_list); 779 /* 780 * Remove unused probes from the hash list. After waiting 781 * for synchronization, this probe is reclaimed. 782 * (reclaiming is done by do_free_cleaned_kprobes().) 783 */ 784 hlist_del_rcu(&op->kp.hlist); 785 } 786 787 /* Don't touch the code, because it is already freed. */ 788 arch_remove_optimized_kprobe(op); 789 } 790 791 static inline 792 void __prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) 793 { 794 if (!kprobe_ftrace(p)) 795 arch_prepare_optimized_kprobe(op, p); 796 } 797 798 /* Try to prepare optimized instructions */ 799 static void prepare_optimized_kprobe(struct kprobe *p) 800 { 801 struct optimized_kprobe *op; 802 803 op = container_of(p, struct optimized_kprobe, kp); 804 __prepare_optimized_kprobe(op, p); 805 } 806 807 /* Allocate new optimized_kprobe and try to prepare optimized instructions */ 808 static struct kprobe *alloc_aggr_kprobe(struct kprobe *p) 809 { 810 struct optimized_kprobe *op; 811 812 op = kzalloc(sizeof(struct optimized_kprobe), GFP_KERNEL); 813 if (!op) 814 return NULL; 815 816 INIT_LIST_HEAD(&op->list); 817 op->kp.addr = p->addr; 818 __prepare_optimized_kprobe(op, p); 819 820 return &op->kp; 821 } 822 823 static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p); 824 825 /* 826 * Prepare an optimized_kprobe and optimize it 827 * NOTE: p must be a normal registered kprobe 828 */ 829 static void try_to_optimize_kprobe(struct kprobe *p) 830 { 831 struct kprobe *ap; 832 struct optimized_kprobe *op; 833 834 /* Impossible to optimize ftrace-based kprobe */ 835 if (kprobe_ftrace(p)) 836 return; 837 838 /* For preparing optimization, jump_label_text_reserved() is called */ 839 cpus_read_lock(); 840 jump_label_lock(); 841 mutex_lock(&text_mutex); 842 843 ap = alloc_aggr_kprobe(p); 844 if (!ap) 845 goto out; 846 847 op = container_of(ap, struct optimized_kprobe, kp); 848 if (!arch_prepared_optinsn(&op->optinsn)) { 849 /* If failed to setup optimizing, fallback to kprobe */ 850 arch_remove_optimized_kprobe(op); 851 kfree(op); 852 goto out; 853 } 854 855 init_aggr_kprobe(ap, p); 856 optimize_kprobe(ap); /* This just kicks optimizer thread */ 857 858 out: 859 mutex_unlock(&text_mutex); 860 jump_label_unlock(); 861 cpus_read_unlock(); 862 } 863 864 #ifdef CONFIG_SYSCTL 865 static void optimize_all_kprobes(void) 866 { 867 struct hlist_head *head; 868 struct kprobe *p; 869 unsigned int i; 870 871 mutex_lock(&kprobe_mutex); 872 /* If optimization is already allowed, just return */ 873 if (kprobes_allow_optimization) 874 goto out; 875 876 cpus_read_lock(); 877 kprobes_allow_optimization = true; 878 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 879 head = &kprobe_table[i]; 880 hlist_for_each_entry(p, head, hlist) 881 if (!kprobe_disabled(p)) 882 optimize_kprobe(p); 883 } 884 cpus_read_unlock(); 885 printk(KERN_INFO "Kprobes globally optimized\n"); 886 out: 887 mutex_unlock(&kprobe_mutex); 888 } 889 890 static void unoptimize_all_kprobes(void) 891 { 892 struct hlist_head *head; 893 struct kprobe *p; 894 unsigned int i; 895 896 mutex_lock(&kprobe_mutex); 897 /* If optimization is already prohibited, just return */ 898 if (!kprobes_allow_optimization) { 899 mutex_unlock(&kprobe_mutex); 900 return; 901 } 902 903 cpus_read_lock(); 904 kprobes_allow_optimization = false; 905 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 906 head = &kprobe_table[i]; 907 hlist_for_each_entry(p, head, hlist) { 908 if (!kprobe_disabled(p)) 909 unoptimize_kprobe(p, false); 910 } 911 } 912 cpus_read_unlock(); 913 mutex_unlock(&kprobe_mutex); 914 915 /* Wait for unoptimizing completion */ 916 wait_for_kprobe_optimizer(); 917 printk(KERN_INFO "Kprobes globally unoptimized\n"); 918 } 919 920 static DEFINE_MUTEX(kprobe_sysctl_mutex); 921 int sysctl_kprobes_optimization; 922 int proc_kprobes_optimization_handler(struct ctl_table *table, int write, 923 void *buffer, size_t *length, 924 loff_t *ppos) 925 { 926 int ret; 927 928 mutex_lock(&kprobe_sysctl_mutex); 929 sysctl_kprobes_optimization = kprobes_allow_optimization ? 1 : 0; 930 ret = proc_dointvec_minmax(table, write, buffer, length, ppos); 931 932 if (sysctl_kprobes_optimization) 933 optimize_all_kprobes(); 934 else 935 unoptimize_all_kprobes(); 936 mutex_unlock(&kprobe_sysctl_mutex); 937 938 return ret; 939 } 940 #endif /* CONFIG_SYSCTL */ 941 942 /* Put a breakpoint for a probe. Must be called with text_mutex locked */ 943 static void __arm_kprobe(struct kprobe *p) 944 { 945 struct kprobe *_p; 946 947 /* Check collision with other optimized kprobes */ 948 _p = get_optimized_kprobe((unsigned long)p->addr); 949 if (unlikely(_p)) 950 /* Fallback to unoptimized kprobe */ 951 unoptimize_kprobe(_p, true); 952 953 arch_arm_kprobe(p); 954 optimize_kprobe(p); /* Try to optimize (add kprobe to a list) */ 955 } 956 957 /* Remove the breakpoint of a probe. Must be called with text_mutex locked */ 958 static void __disarm_kprobe(struct kprobe *p, bool reopt) 959 { 960 struct kprobe *_p; 961 962 /* Try to unoptimize */ 963 unoptimize_kprobe(p, kprobes_all_disarmed); 964 965 if (!kprobe_queued(p)) { 966 arch_disarm_kprobe(p); 967 /* If another kprobe was blocked, optimize it. */ 968 _p = get_optimized_kprobe((unsigned long)p->addr); 969 if (unlikely(_p) && reopt) 970 optimize_kprobe(_p); 971 } 972 /* TODO: reoptimize others after unoptimized this probe */ 973 } 974 975 #else /* !CONFIG_OPTPROBES */ 976 977 #define optimize_kprobe(p) do {} while (0) 978 #define unoptimize_kprobe(p, f) do {} while (0) 979 #define kill_optimized_kprobe(p) do {} while (0) 980 #define prepare_optimized_kprobe(p) do {} while (0) 981 #define try_to_optimize_kprobe(p) do {} while (0) 982 #define __arm_kprobe(p) arch_arm_kprobe(p) 983 #define __disarm_kprobe(p, o) arch_disarm_kprobe(p) 984 #define kprobe_disarmed(p) kprobe_disabled(p) 985 #define wait_for_kprobe_optimizer() do {} while (0) 986 987 static int reuse_unused_kprobe(struct kprobe *ap) 988 { 989 /* 990 * If the optimized kprobe is NOT supported, the aggr kprobe is 991 * released at the same time that the last aggregated kprobe is 992 * unregistered. 993 * Thus there should be no chance to reuse unused kprobe. 994 */ 995 printk(KERN_ERR "Error: There should be no unused kprobe here.\n"); 996 return -EINVAL; 997 } 998 999 static void free_aggr_kprobe(struct kprobe *p) 1000 { 1001 arch_remove_kprobe(p); 1002 kfree(p); 1003 } 1004 1005 static struct kprobe *alloc_aggr_kprobe(struct kprobe *p) 1006 { 1007 return kzalloc(sizeof(struct kprobe), GFP_KERNEL); 1008 } 1009 #endif /* CONFIG_OPTPROBES */ 1010 1011 #ifdef CONFIG_KPROBES_ON_FTRACE 1012 static struct ftrace_ops kprobe_ftrace_ops __read_mostly = { 1013 .func = kprobe_ftrace_handler, 1014 .flags = FTRACE_OPS_FL_SAVE_REGS, 1015 }; 1016 1017 static struct ftrace_ops kprobe_ipmodify_ops __read_mostly = { 1018 .func = kprobe_ftrace_handler, 1019 .flags = FTRACE_OPS_FL_SAVE_REGS | FTRACE_OPS_FL_IPMODIFY, 1020 }; 1021 1022 static int kprobe_ipmodify_enabled; 1023 static int kprobe_ftrace_enabled; 1024 1025 /* Must ensure p->addr is really on ftrace */ 1026 static int prepare_kprobe(struct kprobe *p) 1027 { 1028 if (!kprobe_ftrace(p)) 1029 return arch_prepare_kprobe(p); 1030 1031 return arch_prepare_kprobe_ftrace(p); 1032 } 1033 1034 /* Caller must lock kprobe_mutex */ 1035 static int __arm_kprobe_ftrace(struct kprobe *p, struct ftrace_ops *ops, 1036 int *cnt) 1037 { 1038 int ret = 0; 1039 1040 ret = ftrace_set_filter_ip(ops, (unsigned long)p->addr, 0, 0); 1041 if (ret) { 1042 pr_debug("Failed to arm kprobe-ftrace at %pS (%d)\n", 1043 p->addr, ret); 1044 return ret; 1045 } 1046 1047 if (*cnt == 0) { 1048 ret = register_ftrace_function(ops); 1049 if (ret) { 1050 pr_debug("Failed to init kprobe-ftrace (%d)\n", ret); 1051 goto err_ftrace; 1052 } 1053 } 1054 1055 (*cnt)++; 1056 return ret; 1057 1058 err_ftrace: 1059 /* 1060 * At this point, sinec ops is not registered, we should be sefe from 1061 * registering empty filter. 1062 */ 1063 ftrace_set_filter_ip(ops, (unsigned long)p->addr, 1, 0); 1064 return ret; 1065 } 1066 1067 static int arm_kprobe_ftrace(struct kprobe *p) 1068 { 1069 bool ipmodify = (p->post_handler != NULL); 1070 1071 return __arm_kprobe_ftrace(p, 1072 ipmodify ? &kprobe_ipmodify_ops : &kprobe_ftrace_ops, 1073 ipmodify ? &kprobe_ipmodify_enabled : &kprobe_ftrace_enabled); 1074 } 1075 1076 /* Caller must lock kprobe_mutex */ 1077 static int __disarm_kprobe_ftrace(struct kprobe *p, struct ftrace_ops *ops, 1078 int *cnt) 1079 { 1080 int ret = 0; 1081 1082 if (*cnt == 1) { 1083 ret = unregister_ftrace_function(ops); 1084 if (WARN(ret < 0, "Failed to unregister kprobe-ftrace (%d)\n", ret)) 1085 return ret; 1086 } 1087 1088 (*cnt)--; 1089 1090 ret = ftrace_set_filter_ip(ops, (unsigned long)p->addr, 1, 0); 1091 WARN_ONCE(ret < 0, "Failed to disarm kprobe-ftrace at %pS (%d)\n", 1092 p->addr, ret); 1093 return ret; 1094 } 1095 1096 static int disarm_kprobe_ftrace(struct kprobe *p) 1097 { 1098 bool ipmodify = (p->post_handler != NULL); 1099 1100 return __disarm_kprobe_ftrace(p, 1101 ipmodify ? &kprobe_ipmodify_ops : &kprobe_ftrace_ops, 1102 ipmodify ? &kprobe_ipmodify_enabled : &kprobe_ftrace_enabled); 1103 } 1104 #else /* !CONFIG_KPROBES_ON_FTRACE */ 1105 static inline int prepare_kprobe(struct kprobe *p) 1106 { 1107 return arch_prepare_kprobe(p); 1108 } 1109 1110 static inline int arm_kprobe_ftrace(struct kprobe *p) 1111 { 1112 return -ENODEV; 1113 } 1114 1115 static inline int disarm_kprobe_ftrace(struct kprobe *p) 1116 { 1117 return -ENODEV; 1118 } 1119 #endif 1120 1121 /* Arm a kprobe with text_mutex */ 1122 static int arm_kprobe(struct kprobe *kp) 1123 { 1124 if (unlikely(kprobe_ftrace(kp))) 1125 return arm_kprobe_ftrace(kp); 1126 1127 cpus_read_lock(); 1128 mutex_lock(&text_mutex); 1129 __arm_kprobe(kp); 1130 mutex_unlock(&text_mutex); 1131 cpus_read_unlock(); 1132 1133 return 0; 1134 } 1135 1136 /* Disarm a kprobe with text_mutex */ 1137 static int disarm_kprobe(struct kprobe *kp, bool reopt) 1138 { 1139 if (unlikely(kprobe_ftrace(kp))) 1140 return disarm_kprobe_ftrace(kp); 1141 1142 cpus_read_lock(); 1143 mutex_lock(&text_mutex); 1144 __disarm_kprobe(kp, reopt); 1145 mutex_unlock(&text_mutex); 1146 cpus_read_unlock(); 1147 1148 return 0; 1149 } 1150 1151 /* 1152 * Aggregate handlers for multiple kprobes support - these handlers 1153 * take care of invoking the individual kprobe handlers on p->list 1154 */ 1155 static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) 1156 { 1157 struct kprobe *kp; 1158 1159 list_for_each_entry_rcu(kp, &p->list, list) { 1160 if (kp->pre_handler && likely(!kprobe_disabled(kp))) { 1161 set_kprobe_instance(kp); 1162 if (kp->pre_handler(kp, regs)) 1163 return 1; 1164 } 1165 reset_kprobe_instance(); 1166 } 1167 return 0; 1168 } 1169 NOKPROBE_SYMBOL(aggr_pre_handler); 1170 1171 static void aggr_post_handler(struct kprobe *p, struct pt_regs *regs, 1172 unsigned long flags) 1173 { 1174 struct kprobe *kp; 1175 1176 list_for_each_entry_rcu(kp, &p->list, list) { 1177 if (kp->post_handler && likely(!kprobe_disabled(kp))) { 1178 set_kprobe_instance(kp); 1179 kp->post_handler(kp, regs, flags); 1180 reset_kprobe_instance(); 1181 } 1182 } 1183 } 1184 NOKPROBE_SYMBOL(aggr_post_handler); 1185 1186 static int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, 1187 int trapnr) 1188 { 1189 struct kprobe *cur = __this_cpu_read(kprobe_instance); 1190 1191 /* 1192 * if we faulted "during" the execution of a user specified 1193 * probe handler, invoke just that probe's fault handler 1194 */ 1195 if (cur && cur->fault_handler) { 1196 if (cur->fault_handler(cur, regs, trapnr)) 1197 return 1; 1198 } 1199 return 0; 1200 } 1201 NOKPROBE_SYMBOL(aggr_fault_handler); 1202 1203 /* Walks the list and increments nmissed count for multiprobe case */ 1204 void kprobes_inc_nmissed_count(struct kprobe *p) 1205 { 1206 struct kprobe *kp; 1207 if (!kprobe_aggrprobe(p)) { 1208 p->nmissed++; 1209 } else { 1210 list_for_each_entry_rcu(kp, &p->list, list) 1211 kp->nmissed++; 1212 } 1213 return; 1214 } 1215 NOKPROBE_SYMBOL(kprobes_inc_nmissed_count); 1216 1217 static void free_rp_inst_rcu(struct rcu_head *head) 1218 { 1219 struct kretprobe_instance *ri = container_of(head, struct kretprobe_instance, rcu); 1220 1221 if (refcount_dec_and_test(&ri->rph->ref)) 1222 kfree(ri->rph); 1223 kfree(ri); 1224 } 1225 NOKPROBE_SYMBOL(free_rp_inst_rcu); 1226 1227 static void recycle_rp_inst(struct kretprobe_instance *ri) 1228 { 1229 struct kretprobe *rp = get_kretprobe(ri); 1230 1231 if (likely(rp)) { 1232 freelist_add(&ri->freelist, &rp->freelist); 1233 } else 1234 call_rcu(&ri->rcu, free_rp_inst_rcu); 1235 } 1236 NOKPROBE_SYMBOL(recycle_rp_inst); 1237 1238 static struct kprobe kprobe_busy = { 1239 .addr = (void *) get_kprobe, 1240 }; 1241 1242 void kprobe_busy_begin(void) 1243 { 1244 struct kprobe_ctlblk *kcb; 1245 1246 preempt_disable(); 1247 __this_cpu_write(current_kprobe, &kprobe_busy); 1248 kcb = get_kprobe_ctlblk(); 1249 kcb->kprobe_status = KPROBE_HIT_ACTIVE; 1250 } 1251 1252 void kprobe_busy_end(void) 1253 { 1254 __this_cpu_write(current_kprobe, NULL); 1255 preempt_enable(); 1256 } 1257 1258 /* 1259 * This function is called from finish_task_switch when task tk becomes dead, 1260 * so that we can recycle any function-return probe instances associated 1261 * with this task. These left over instances represent probed functions 1262 * that have been called but will never return. 1263 */ 1264 void kprobe_flush_task(struct task_struct *tk) 1265 { 1266 struct kretprobe_instance *ri; 1267 struct llist_node *node; 1268 1269 /* Early boot, not yet initialized. */ 1270 if (unlikely(!kprobes_initialized)) 1271 return; 1272 1273 kprobe_busy_begin(); 1274 1275 node = __llist_del_all(&tk->kretprobe_instances); 1276 while (node) { 1277 ri = container_of(node, struct kretprobe_instance, llist); 1278 node = node->next; 1279 1280 recycle_rp_inst(ri); 1281 } 1282 1283 kprobe_busy_end(); 1284 } 1285 NOKPROBE_SYMBOL(kprobe_flush_task); 1286 1287 static inline void free_rp_inst(struct kretprobe *rp) 1288 { 1289 struct kretprobe_instance *ri; 1290 struct freelist_node *node; 1291 int count = 0; 1292 1293 node = rp->freelist.head; 1294 while (node) { 1295 ri = container_of(node, struct kretprobe_instance, freelist); 1296 node = node->next; 1297 1298 kfree(ri); 1299 count++; 1300 } 1301 1302 if (refcount_sub_and_test(count, &rp->rph->ref)) { 1303 kfree(rp->rph); 1304 rp->rph = NULL; 1305 } 1306 } 1307 1308 /* Add the new probe to ap->list */ 1309 static int add_new_kprobe(struct kprobe *ap, struct kprobe *p) 1310 { 1311 if (p->post_handler) 1312 unoptimize_kprobe(ap, true); /* Fall back to normal kprobe */ 1313 1314 list_add_rcu(&p->list, &ap->list); 1315 if (p->post_handler && !ap->post_handler) 1316 ap->post_handler = aggr_post_handler; 1317 1318 return 0; 1319 } 1320 1321 /* 1322 * Fill in the required fields of the "manager kprobe". Replace the 1323 * earlier kprobe in the hlist with the manager kprobe 1324 */ 1325 static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p) 1326 { 1327 /* Copy p's insn slot to ap */ 1328 copy_kprobe(p, ap); 1329 flush_insn_slot(ap); 1330 ap->addr = p->addr; 1331 ap->flags = p->flags & ~KPROBE_FLAG_OPTIMIZED; 1332 ap->pre_handler = aggr_pre_handler; 1333 ap->fault_handler = aggr_fault_handler; 1334 /* We don't care the kprobe which has gone. */ 1335 if (p->post_handler && !kprobe_gone(p)) 1336 ap->post_handler = aggr_post_handler; 1337 1338 INIT_LIST_HEAD(&ap->list); 1339 INIT_HLIST_NODE(&ap->hlist); 1340 1341 list_add_rcu(&p->list, &ap->list); 1342 hlist_replace_rcu(&p->hlist, &ap->hlist); 1343 } 1344 1345 /* 1346 * This is the second or subsequent kprobe at the address - handle 1347 * the intricacies 1348 */ 1349 static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p) 1350 { 1351 int ret = 0; 1352 struct kprobe *ap = orig_p; 1353 1354 cpus_read_lock(); 1355 1356 /* For preparing optimization, jump_label_text_reserved() is called */ 1357 jump_label_lock(); 1358 mutex_lock(&text_mutex); 1359 1360 if (!kprobe_aggrprobe(orig_p)) { 1361 /* If orig_p is not an aggr_kprobe, create new aggr_kprobe. */ 1362 ap = alloc_aggr_kprobe(orig_p); 1363 if (!ap) { 1364 ret = -ENOMEM; 1365 goto out; 1366 } 1367 init_aggr_kprobe(ap, orig_p); 1368 } else if (kprobe_unused(ap)) { 1369 /* This probe is going to die. Rescue it */ 1370 ret = reuse_unused_kprobe(ap); 1371 if (ret) 1372 goto out; 1373 } 1374 1375 if (kprobe_gone(ap)) { 1376 /* 1377 * Attempting to insert new probe at the same location that 1378 * had a probe in the module vaddr area which already 1379 * freed. So, the instruction slot has already been 1380 * released. We need a new slot for the new probe. 1381 */ 1382 ret = arch_prepare_kprobe(ap); 1383 if (ret) 1384 /* 1385 * Even if fail to allocate new slot, don't need to 1386 * free aggr_probe. It will be used next time, or 1387 * freed by unregister_kprobe. 1388 */ 1389 goto out; 1390 1391 /* Prepare optimized instructions if possible. */ 1392 prepare_optimized_kprobe(ap); 1393 1394 /* 1395 * Clear gone flag to prevent allocating new slot again, and 1396 * set disabled flag because it is not armed yet. 1397 */ 1398 ap->flags = (ap->flags & ~KPROBE_FLAG_GONE) 1399 | KPROBE_FLAG_DISABLED; 1400 } 1401 1402 /* Copy ap's insn slot to p */ 1403 copy_kprobe(ap, p); 1404 ret = add_new_kprobe(ap, p); 1405 1406 out: 1407 mutex_unlock(&text_mutex); 1408 jump_label_unlock(); 1409 cpus_read_unlock(); 1410 1411 if (ret == 0 && kprobe_disabled(ap) && !kprobe_disabled(p)) { 1412 ap->flags &= ~KPROBE_FLAG_DISABLED; 1413 if (!kprobes_all_disarmed) { 1414 /* Arm the breakpoint again. */ 1415 ret = arm_kprobe(ap); 1416 if (ret) { 1417 ap->flags |= KPROBE_FLAG_DISABLED; 1418 list_del_rcu(&p->list); 1419 synchronize_rcu(); 1420 } 1421 } 1422 } 1423 return ret; 1424 } 1425 1426 bool __weak arch_within_kprobe_blacklist(unsigned long addr) 1427 { 1428 /* The __kprobes marked functions and entry code must not be probed */ 1429 return addr >= (unsigned long)__kprobes_text_start && 1430 addr < (unsigned long)__kprobes_text_end; 1431 } 1432 1433 static bool __within_kprobe_blacklist(unsigned long addr) 1434 { 1435 struct kprobe_blacklist_entry *ent; 1436 1437 if (arch_within_kprobe_blacklist(addr)) 1438 return true; 1439 /* 1440 * If there exists a kprobe_blacklist, verify and 1441 * fail any probe registration in the prohibited area 1442 */ 1443 list_for_each_entry(ent, &kprobe_blacklist, list) { 1444 if (addr >= ent->start_addr && addr < ent->end_addr) 1445 return true; 1446 } 1447 return false; 1448 } 1449 1450 bool within_kprobe_blacklist(unsigned long addr) 1451 { 1452 char symname[KSYM_NAME_LEN], *p; 1453 1454 if (__within_kprobe_blacklist(addr)) 1455 return true; 1456 1457 /* Check if the address is on a suffixed-symbol */ 1458 if (!lookup_symbol_name(addr, symname)) { 1459 p = strchr(symname, '.'); 1460 if (!p) 1461 return false; 1462 *p = '\0'; 1463 addr = (unsigned long)kprobe_lookup_name(symname, 0); 1464 if (addr) 1465 return __within_kprobe_blacklist(addr); 1466 } 1467 return false; 1468 } 1469 1470 /* 1471 * If we have a symbol_name argument, look it up and add the offset field 1472 * to it. This way, we can specify a relative address to a symbol. 1473 * This returns encoded errors if it fails to look up symbol or invalid 1474 * combination of parameters. 1475 */ 1476 static kprobe_opcode_t *_kprobe_addr(kprobe_opcode_t *addr, 1477 const char *symbol_name, unsigned int offset) 1478 { 1479 if ((symbol_name && addr) || (!symbol_name && !addr)) 1480 goto invalid; 1481 1482 if (symbol_name) { 1483 addr = kprobe_lookup_name(symbol_name, offset); 1484 if (!addr) 1485 return ERR_PTR(-ENOENT); 1486 } 1487 1488 addr = (kprobe_opcode_t *)(((char *)addr) + offset); 1489 if (addr) 1490 return addr; 1491 1492 invalid: 1493 return ERR_PTR(-EINVAL); 1494 } 1495 1496 static kprobe_opcode_t *kprobe_addr(struct kprobe *p) 1497 { 1498 return _kprobe_addr(p->addr, p->symbol_name, p->offset); 1499 } 1500 1501 /* Check passed kprobe is valid and return kprobe in kprobe_table. */ 1502 static struct kprobe *__get_valid_kprobe(struct kprobe *p) 1503 { 1504 struct kprobe *ap, *list_p; 1505 1506 lockdep_assert_held(&kprobe_mutex); 1507 1508 ap = get_kprobe(p->addr); 1509 if (unlikely(!ap)) 1510 return NULL; 1511 1512 if (p != ap) { 1513 list_for_each_entry(list_p, &ap->list, list) 1514 if (list_p == p) 1515 /* kprobe p is a valid probe */ 1516 goto valid; 1517 return NULL; 1518 } 1519 valid: 1520 return ap; 1521 } 1522 1523 /* Return error if the kprobe is being re-registered */ 1524 static inline int check_kprobe_rereg(struct kprobe *p) 1525 { 1526 int ret = 0; 1527 1528 mutex_lock(&kprobe_mutex); 1529 if (__get_valid_kprobe(p)) 1530 ret = -EINVAL; 1531 mutex_unlock(&kprobe_mutex); 1532 1533 return ret; 1534 } 1535 1536 int __weak arch_check_ftrace_location(struct kprobe *p) 1537 { 1538 unsigned long ftrace_addr; 1539 1540 ftrace_addr = ftrace_location((unsigned long)p->addr); 1541 if (ftrace_addr) { 1542 #ifdef CONFIG_KPROBES_ON_FTRACE 1543 /* Given address is not on the instruction boundary */ 1544 if ((unsigned long)p->addr != ftrace_addr) 1545 return -EILSEQ; 1546 p->flags |= KPROBE_FLAG_FTRACE; 1547 #else /* !CONFIG_KPROBES_ON_FTRACE */ 1548 return -EINVAL; 1549 #endif 1550 } 1551 return 0; 1552 } 1553 1554 static int check_kprobe_address_safe(struct kprobe *p, 1555 struct module **probed_mod) 1556 { 1557 int ret; 1558 1559 ret = arch_check_ftrace_location(p); 1560 if (ret) 1561 return ret; 1562 jump_label_lock(); 1563 preempt_disable(); 1564 1565 /* Ensure it is not in reserved area nor out of text */ 1566 if (!kernel_text_address((unsigned long) p->addr) || 1567 within_kprobe_blacklist((unsigned long) p->addr) || 1568 jump_label_text_reserved(p->addr, p->addr) || 1569 find_bug((unsigned long)p->addr)) { 1570 ret = -EINVAL; 1571 goto out; 1572 } 1573 1574 /* Check if are we probing a module */ 1575 *probed_mod = __module_text_address((unsigned long) p->addr); 1576 if (*probed_mod) { 1577 /* 1578 * We must hold a refcount of the probed module while updating 1579 * its code to prohibit unexpected unloading. 1580 */ 1581 if (unlikely(!try_module_get(*probed_mod))) { 1582 ret = -ENOENT; 1583 goto out; 1584 } 1585 1586 /* 1587 * If the module freed .init.text, we couldn't insert 1588 * kprobes in there. 1589 */ 1590 if (within_module_init((unsigned long)p->addr, *probed_mod) && 1591 (*probed_mod)->state != MODULE_STATE_COMING) { 1592 module_put(*probed_mod); 1593 *probed_mod = NULL; 1594 ret = -ENOENT; 1595 } 1596 } 1597 out: 1598 preempt_enable(); 1599 jump_label_unlock(); 1600 1601 return ret; 1602 } 1603 1604 int register_kprobe(struct kprobe *p) 1605 { 1606 int ret; 1607 struct kprobe *old_p; 1608 struct module *probed_mod; 1609 kprobe_opcode_t *addr; 1610 1611 /* Adjust probe address from symbol */ 1612 addr = kprobe_addr(p); 1613 if (IS_ERR(addr)) 1614 return PTR_ERR(addr); 1615 p->addr = addr; 1616 1617 ret = check_kprobe_rereg(p); 1618 if (ret) 1619 return ret; 1620 1621 /* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */ 1622 p->flags &= KPROBE_FLAG_DISABLED; 1623 p->nmissed = 0; 1624 INIT_LIST_HEAD(&p->list); 1625 1626 ret = check_kprobe_address_safe(p, &probed_mod); 1627 if (ret) 1628 return ret; 1629 1630 mutex_lock(&kprobe_mutex); 1631 1632 old_p = get_kprobe(p->addr); 1633 if (old_p) { 1634 /* Since this may unoptimize old_p, locking text_mutex. */ 1635 ret = register_aggr_kprobe(old_p, p); 1636 goto out; 1637 } 1638 1639 cpus_read_lock(); 1640 /* Prevent text modification */ 1641 mutex_lock(&text_mutex); 1642 ret = prepare_kprobe(p); 1643 mutex_unlock(&text_mutex); 1644 cpus_read_unlock(); 1645 if (ret) 1646 goto out; 1647 1648 INIT_HLIST_NODE(&p->hlist); 1649 hlist_add_head_rcu(&p->hlist, 1650 &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); 1651 1652 if (!kprobes_all_disarmed && !kprobe_disabled(p)) { 1653 ret = arm_kprobe(p); 1654 if (ret) { 1655 hlist_del_rcu(&p->hlist); 1656 synchronize_rcu(); 1657 goto out; 1658 } 1659 } 1660 1661 /* Try to optimize kprobe */ 1662 try_to_optimize_kprobe(p); 1663 out: 1664 mutex_unlock(&kprobe_mutex); 1665 1666 if (probed_mod) 1667 module_put(probed_mod); 1668 1669 return ret; 1670 } 1671 EXPORT_SYMBOL_GPL(register_kprobe); 1672 1673 /* Check if all probes on the aggrprobe are disabled */ 1674 static int aggr_kprobe_disabled(struct kprobe *ap) 1675 { 1676 struct kprobe *kp; 1677 1678 lockdep_assert_held(&kprobe_mutex); 1679 1680 list_for_each_entry(kp, &ap->list, list) 1681 if (!kprobe_disabled(kp)) 1682 /* 1683 * There is an active probe on the list. 1684 * We can't disable this ap. 1685 */ 1686 return 0; 1687 1688 return 1; 1689 } 1690 1691 /* Disable one kprobe: Make sure called under kprobe_mutex is locked */ 1692 static struct kprobe *__disable_kprobe(struct kprobe *p) 1693 { 1694 struct kprobe *orig_p; 1695 int ret; 1696 1697 /* Get an original kprobe for return */ 1698 orig_p = __get_valid_kprobe(p); 1699 if (unlikely(orig_p == NULL)) 1700 return ERR_PTR(-EINVAL); 1701 1702 if (!kprobe_disabled(p)) { 1703 /* Disable probe if it is a child probe */ 1704 if (p != orig_p) 1705 p->flags |= KPROBE_FLAG_DISABLED; 1706 1707 /* Try to disarm and disable this/parent probe */ 1708 if (p == orig_p || aggr_kprobe_disabled(orig_p)) { 1709 /* 1710 * If kprobes_all_disarmed is set, orig_p 1711 * should have already been disarmed, so 1712 * skip unneed disarming process. 1713 */ 1714 if (!kprobes_all_disarmed) { 1715 ret = disarm_kprobe(orig_p, true); 1716 if (ret) { 1717 p->flags &= ~KPROBE_FLAG_DISABLED; 1718 return ERR_PTR(ret); 1719 } 1720 } 1721 orig_p->flags |= KPROBE_FLAG_DISABLED; 1722 } 1723 } 1724 1725 return orig_p; 1726 } 1727 1728 /* 1729 * Unregister a kprobe without a scheduler synchronization. 1730 */ 1731 static int __unregister_kprobe_top(struct kprobe *p) 1732 { 1733 struct kprobe *ap, *list_p; 1734 1735 /* Disable kprobe. This will disarm it if needed. */ 1736 ap = __disable_kprobe(p); 1737 if (IS_ERR(ap)) 1738 return PTR_ERR(ap); 1739 1740 if (ap == p) 1741 /* 1742 * This probe is an independent(and non-optimized) kprobe 1743 * (not an aggrprobe). Remove from the hash list. 1744 */ 1745 goto disarmed; 1746 1747 /* Following process expects this probe is an aggrprobe */ 1748 WARN_ON(!kprobe_aggrprobe(ap)); 1749 1750 if (list_is_singular(&ap->list) && kprobe_disarmed(ap)) 1751 /* 1752 * !disarmed could be happen if the probe is under delayed 1753 * unoptimizing. 1754 */ 1755 goto disarmed; 1756 else { 1757 /* If disabling probe has special handlers, update aggrprobe */ 1758 if (p->post_handler && !kprobe_gone(p)) { 1759 list_for_each_entry(list_p, &ap->list, list) { 1760 if ((list_p != p) && (list_p->post_handler)) 1761 goto noclean; 1762 } 1763 ap->post_handler = NULL; 1764 } 1765 noclean: 1766 /* 1767 * Remove from the aggrprobe: this path will do nothing in 1768 * __unregister_kprobe_bottom(). 1769 */ 1770 list_del_rcu(&p->list); 1771 if (!kprobe_disabled(ap) && !kprobes_all_disarmed) 1772 /* 1773 * Try to optimize this probe again, because post 1774 * handler may have been changed. 1775 */ 1776 optimize_kprobe(ap); 1777 } 1778 return 0; 1779 1780 disarmed: 1781 hlist_del_rcu(&ap->hlist); 1782 return 0; 1783 } 1784 1785 static void __unregister_kprobe_bottom(struct kprobe *p) 1786 { 1787 struct kprobe *ap; 1788 1789 if (list_empty(&p->list)) 1790 /* This is an independent kprobe */ 1791 arch_remove_kprobe(p); 1792 else if (list_is_singular(&p->list)) { 1793 /* This is the last child of an aggrprobe */ 1794 ap = list_entry(p->list.next, struct kprobe, list); 1795 list_del(&p->list); 1796 free_aggr_kprobe(ap); 1797 } 1798 /* Otherwise, do nothing. */ 1799 } 1800 1801 int register_kprobes(struct kprobe **kps, int num) 1802 { 1803 int i, ret = 0; 1804 1805 if (num <= 0) 1806 return -EINVAL; 1807 for (i = 0; i < num; i++) { 1808 ret = register_kprobe(kps[i]); 1809 if (ret < 0) { 1810 if (i > 0) 1811 unregister_kprobes(kps, i); 1812 break; 1813 } 1814 } 1815 return ret; 1816 } 1817 EXPORT_SYMBOL_GPL(register_kprobes); 1818 1819 void unregister_kprobe(struct kprobe *p) 1820 { 1821 unregister_kprobes(&p, 1); 1822 } 1823 EXPORT_SYMBOL_GPL(unregister_kprobe); 1824 1825 void unregister_kprobes(struct kprobe **kps, int num) 1826 { 1827 int i; 1828 1829 if (num <= 0) 1830 return; 1831 mutex_lock(&kprobe_mutex); 1832 for (i = 0; i < num; i++) 1833 if (__unregister_kprobe_top(kps[i]) < 0) 1834 kps[i]->addr = NULL; 1835 mutex_unlock(&kprobe_mutex); 1836 1837 synchronize_rcu(); 1838 for (i = 0; i < num; i++) 1839 if (kps[i]->addr) 1840 __unregister_kprobe_bottom(kps[i]); 1841 } 1842 EXPORT_SYMBOL_GPL(unregister_kprobes); 1843 1844 int __weak kprobe_exceptions_notify(struct notifier_block *self, 1845 unsigned long val, void *data) 1846 { 1847 return NOTIFY_DONE; 1848 } 1849 NOKPROBE_SYMBOL(kprobe_exceptions_notify); 1850 1851 static struct notifier_block kprobe_exceptions_nb = { 1852 .notifier_call = kprobe_exceptions_notify, 1853 .priority = 0x7fffffff /* we need to be notified first */ 1854 }; 1855 1856 unsigned long __weak arch_deref_entry_point(void *entry) 1857 { 1858 return (unsigned long)entry; 1859 } 1860 1861 #ifdef CONFIG_KRETPROBES 1862 1863 unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs, 1864 void *trampoline_address, 1865 void *frame_pointer) 1866 { 1867 kprobe_opcode_t *correct_ret_addr = NULL; 1868 struct kretprobe_instance *ri = NULL; 1869 struct llist_node *first, *node; 1870 struct kretprobe *rp; 1871 1872 /* Find all nodes for this frame. */ 1873 first = node = current->kretprobe_instances.first; 1874 while (node) { 1875 ri = container_of(node, struct kretprobe_instance, llist); 1876 1877 BUG_ON(ri->fp != frame_pointer); 1878 1879 if (ri->ret_addr != trampoline_address) { 1880 correct_ret_addr = ri->ret_addr; 1881 /* 1882 * This is the real return address. Any other 1883 * instances associated with this task are for 1884 * other calls deeper on the call stack 1885 */ 1886 goto found; 1887 } 1888 1889 node = node->next; 1890 } 1891 pr_err("Oops! Kretprobe fails to find correct return address.\n"); 1892 BUG_ON(1); 1893 1894 found: 1895 /* Unlink all nodes for this frame. */ 1896 current->kretprobe_instances.first = node->next; 1897 node->next = NULL; 1898 1899 /* Run them.. */ 1900 while (first) { 1901 ri = container_of(first, struct kretprobe_instance, llist); 1902 first = first->next; 1903 1904 rp = get_kretprobe(ri); 1905 if (rp && rp->handler) { 1906 struct kprobe *prev = kprobe_running(); 1907 1908 __this_cpu_write(current_kprobe, &rp->kp); 1909 ri->ret_addr = correct_ret_addr; 1910 rp->handler(ri, regs); 1911 __this_cpu_write(current_kprobe, prev); 1912 } 1913 1914 recycle_rp_inst(ri); 1915 } 1916 1917 return (unsigned long)correct_ret_addr; 1918 } 1919 NOKPROBE_SYMBOL(__kretprobe_trampoline_handler) 1920 1921 /* 1922 * This kprobe pre_handler is registered with every kretprobe. When probe 1923 * hits it will set up the return probe. 1924 */ 1925 static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs) 1926 { 1927 struct kretprobe *rp = container_of(p, struct kretprobe, kp); 1928 struct kretprobe_instance *ri; 1929 struct freelist_node *fn; 1930 1931 fn = freelist_try_get(&rp->freelist); 1932 if (!fn) { 1933 rp->nmissed++; 1934 return 0; 1935 } 1936 1937 ri = container_of(fn, struct kretprobe_instance, freelist); 1938 1939 if (rp->entry_handler && rp->entry_handler(ri, regs)) { 1940 freelist_add(&ri->freelist, &rp->freelist); 1941 return 0; 1942 } 1943 1944 arch_prepare_kretprobe(ri, regs); 1945 1946 __llist_add(&ri->llist, ¤t->kretprobe_instances); 1947 1948 return 0; 1949 } 1950 NOKPROBE_SYMBOL(pre_handler_kretprobe); 1951 1952 bool __weak arch_kprobe_on_func_entry(unsigned long offset) 1953 { 1954 return !offset; 1955 } 1956 1957 bool kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset) 1958 { 1959 kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset); 1960 1961 if (IS_ERR(kp_addr)) 1962 return false; 1963 1964 if (!kallsyms_lookup_size_offset((unsigned long)kp_addr, NULL, &offset) || 1965 !arch_kprobe_on_func_entry(offset)) 1966 return false; 1967 1968 return true; 1969 } 1970 1971 int register_kretprobe(struct kretprobe *rp) 1972 { 1973 int ret = 0; 1974 struct kretprobe_instance *inst; 1975 int i; 1976 void *addr; 1977 1978 if (!kprobe_on_func_entry(rp->kp.addr, rp->kp.symbol_name, rp->kp.offset)) 1979 return -EINVAL; 1980 1981 if (kretprobe_blacklist_size) { 1982 addr = kprobe_addr(&rp->kp); 1983 if (IS_ERR(addr)) 1984 return PTR_ERR(addr); 1985 1986 for (i = 0; kretprobe_blacklist[i].name != NULL; i++) { 1987 if (kretprobe_blacklist[i].addr == addr) 1988 return -EINVAL; 1989 } 1990 } 1991 1992 rp->kp.pre_handler = pre_handler_kretprobe; 1993 rp->kp.post_handler = NULL; 1994 rp->kp.fault_handler = NULL; 1995 1996 /* Pre-allocate memory for max kretprobe instances */ 1997 if (rp->maxactive <= 0) { 1998 #ifdef CONFIG_PREEMPTION 1999 rp->maxactive = max_t(unsigned int, 10, 2*num_possible_cpus()); 2000 #else 2001 rp->maxactive = num_possible_cpus(); 2002 #endif 2003 } 2004 rp->freelist.head = NULL; 2005 rp->rph = kzalloc(sizeof(struct kretprobe_holder), GFP_KERNEL); 2006 if (!rp->rph) 2007 return -ENOMEM; 2008 2009 rp->rph->rp = rp; 2010 for (i = 0; i < rp->maxactive; i++) { 2011 inst = kzalloc(sizeof(struct kretprobe_instance) + 2012 rp->data_size, GFP_KERNEL); 2013 if (inst == NULL) { 2014 refcount_set(&rp->rph->ref, i); 2015 free_rp_inst(rp); 2016 return -ENOMEM; 2017 } 2018 inst->rph = rp->rph; 2019 freelist_add(&inst->freelist, &rp->freelist); 2020 } 2021 refcount_set(&rp->rph->ref, i); 2022 2023 rp->nmissed = 0; 2024 /* Establish function entry probe point */ 2025 ret = register_kprobe(&rp->kp); 2026 if (ret != 0) 2027 free_rp_inst(rp); 2028 return ret; 2029 } 2030 EXPORT_SYMBOL_GPL(register_kretprobe); 2031 2032 int register_kretprobes(struct kretprobe **rps, int num) 2033 { 2034 int ret = 0, i; 2035 2036 if (num <= 0) 2037 return -EINVAL; 2038 for (i = 0; i < num; i++) { 2039 ret = register_kretprobe(rps[i]); 2040 if (ret < 0) { 2041 if (i > 0) 2042 unregister_kretprobes(rps, i); 2043 break; 2044 } 2045 } 2046 return ret; 2047 } 2048 EXPORT_SYMBOL_GPL(register_kretprobes); 2049 2050 void unregister_kretprobe(struct kretprobe *rp) 2051 { 2052 unregister_kretprobes(&rp, 1); 2053 } 2054 EXPORT_SYMBOL_GPL(unregister_kretprobe); 2055 2056 void unregister_kretprobes(struct kretprobe **rps, int num) 2057 { 2058 int i; 2059 2060 if (num <= 0) 2061 return; 2062 mutex_lock(&kprobe_mutex); 2063 for (i = 0; i < num; i++) { 2064 if (__unregister_kprobe_top(&rps[i]->kp) < 0) 2065 rps[i]->kp.addr = NULL; 2066 rps[i]->rph->rp = NULL; 2067 } 2068 mutex_unlock(&kprobe_mutex); 2069 2070 synchronize_rcu(); 2071 for (i = 0; i < num; i++) { 2072 if (rps[i]->kp.addr) { 2073 __unregister_kprobe_bottom(&rps[i]->kp); 2074 free_rp_inst(rps[i]); 2075 } 2076 } 2077 } 2078 EXPORT_SYMBOL_GPL(unregister_kretprobes); 2079 2080 #else /* CONFIG_KRETPROBES */ 2081 int register_kretprobe(struct kretprobe *rp) 2082 { 2083 return -ENOSYS; 2084 } 2085 EXPORT_SYMBOL_GPL(register_kretprobe); 2086 2087 int register_kretprobes(struct kretprobe **rps, int num) 2088 { 2089 return -ENOSYS; 2090 } 2091 EXPORT_SYMBOL_GPL(register_kretprobes); 2092 2093 void unregister_kretprobe(struct kretprobe *rp) 2094 { 2095 } 2096 EXPORT_SYMBOL_GPL(unregister_kretprobe); 2097 2098 void unregister_kretprobes(struct kretprobe **rps, int num) 2099 { 2100 } 2101 EXPORT_SYMBOL_GPL(unregister_kretprobes); 2102 2103 static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs) 2104 { 2105 return 0; 2106 } 2107 NOKPROBE_SYMBOL(pre_handler_kretprobe); 2108 2109 #endif /* CONFIG_KRETPROBES */ 2110 2111 /* Set the kprobe gone and remove its instruction buffer. */ 2112 static void kill_kprobe(struct kprobe *p) 2113 { 2114 struct kprobe *kp; 2115 2116 lockdep_assert_held(&kprobe_mutex); 2117 2118 p->flags |= KPROBE_FLAG_GONE; 2119 if (kprobe_aggrprobe(p)) { 2120 /* 2121 * If this is an aggr_kprobe, we have to list all the 2122 * chained probes and mark them GONE. 2123 */ 2124 list_for_each_entry(kp, &p->list, list) 2125 kp->flags |= KPROBE_FLAG_GONE; 2126 p->post_handler = NULL; 2127 kill_optimized_kprobe(p); 2128 } 2129 /* 2130 * Here, we can remove insn_slot safely, because no thread calls 2131 * the original probed function (which will be freed soon) any more. 2132 */ 2133 arch_remove_kprobe(p); 2134 2135 /* 2136 * The module is going away. We should disarm the kprobe which 2137 * is using ftrace, because ftrace framework is still available at 2138 * MODULE_STATE_GOING notification. 2139 */ 2140 if (kprobe_ftrace(p) && !kprobe_disabled(p) && !kprobes_all_disarmed) 2141 disarm_kprobe_ftrace(p); 2142 } 2143 2144 /* Disable one kprobe */ 2145 int disable_kprobe(struct kprobe *kp) 2146 { 2147 int ret = 0; 2148 struct kprobe *p; 2149 2150 mutex_lock(&kprobe_mutex); 2151 2152 /* Disable this kprobe */ 2153 p = __disable_kprobe(kp); 2154 if (IS_ERR(p)) 2155 ret = PTR_ERR(p); 2156 2157 mutex_unlock(&kprobe_mutex); 2158 return ret; 2159 } 2160 EXPORT_SYMBOL_GPL(disable_kprobe); 2161 2162 /* Enable one kprobe */ 2163 int enable_kprobe(struct kprobe *kp) 2164 { 2165 int ret = 0; 2166 struct kprobe *p; 2167 2168 mutex_lock(&kprobe_mutex); 2169 2170 /* Check whether specified probe is valid. */ 2171 p = __get_valid_kprobe(kp); 2172 if (unlikely(p == NULL)) { 2173 ret = -EINVAL; 2174 goto out; 2175 } 2176 2177 if (kprobe_gone(kp)) { 2178 /* This kprobe has gone, we couldn't enable it. */ 2179 ret = -EINVAL; 2180 goto out; 2181 } 2182 2183 if (p != kp) 2184 kp->flags &= ~KPROBE_FLAG_DISABLED; 2185 2186 if (!kprobes_all_disarmed && kprobe_disabled(p)) { 2187 p->flags &= ~KPROBE_FLAG_DISABLED; 2188 ret = arm_kprobe(p); 2189 if (ret) 2190 p->flags |= KPROBE_FLAG_DISABLED; 2191 } 2192 out: 2193 mutex_unlock(&kprobe_mutex); 2194 return ret; 2195 } 2196 EXPORT_SYMBOL_GPL(enable_kprobe); 2197 2198 /* Caller must NOT call this in usual path. This is only for critical case */ 2199 void dump_kprobe(struct kprobe *kp) 2200 { 2201 pr_err("Dumping kprobe:\n"); 2202 pr_err("Name: %s\nOffset: %x\nAddress: %pS\n", 2203 kp->symbol_name, kp->offset, kp->addr); 2204 } 2205 NOKPROBE_SYMBOL(dump_kprobe); 2206 2207 int kprobe_add_ksym_blacklist(unsigned long entry) 2208 { 2209 struct kprobe_blacklist_entry *ent; 2210 unsigned long offset = 0, size = 0; 2211 2212 if (!kernel_text_address(entry) || 2213 !kallsyms_lookup_size_offset(entry, &size, &offset)) 2214 return -EINVAL; 2215 2216 ent = kmalloc(sizeof(*ent), GFP_KERNEL); 2217 if (!ent) 2218 return -ENOMEM; 2219 ent->start_addr = entry; 2220 ent->end_addr = entry + size; 2221 INIT_LIST_HEAD(&ent->list); 2222 list_add_tail(&ent->list, &kprobe_blacklist); 2223 2224 return (int)size; 2225 } 2226 2227 /* Add all symbols in given area into kprobe blacklist */ 2228 int kprobe_add_area_blacklist(unsigned long start, unsigned long end) 2229 { 2230 unsigned long entry; 2231 int ret = 0; 2232 2233 for (entry = start; entry < end; entry += ret) { 2234 ret = kprobe_add_ksym_blacklist(entry); 2235 if (ret < 0) 2236 return ret; 2237 if (ret == 0) /* In case of alias symbol */ 2238 ret = 1; 2239 } 2240 return 0; 2241 } 2242 2243 /* Remove all symbols in given area from kprobe blacklist */ 2244 static void kprobe_remove_area_blacklist(unsigned long start, unsigned long end) 2245 { 2246 struct kprobe_blacklist_entry *ent, *n; 2247 2248 list_for_each_entry_safe(ent, n, &kprobe_blacklist, list) { 2249 if (ent->start_addr < start || ent->start_addr >= end) 2250 continue; 2251 list_del(&ent->list); 2252 kfree(ent); 2253 } 2254 } 2255 2256 static void kprobe_remove_ksym_blacklist(unsigned long entry) 2257 { 2258 kprobe_remove_area_blacklist(entry, entry + 1); 2259 } 2260 2261 int __weak arch_kprobe_get_kallsym(unsigned int *symnum, unsigned long *value, 2262 char *type, char *sym) 2263 { 2264 return -ERANGE; 2265 } 2266 2267 int kprobe_get_kallsym(unsigned int symnum, unsigned long *value, char *type, 2268 char *sym) 2269 { 2270 #ifdef __ARCH_WANT_KPROBES_INSN_SLOT 2271 if (!kprobe_cache_get_kallsym(&kprobe_insn_slots, &symnum, value, type, sym)) 2272 return 0; 2273 #ifdef CONFIG_OPTPROBES 2274 if (!kprobe_cache_get_kallsym(&kprobe_optinsn_slots, &symnum, value, type, sym)) 2275 return 0; 2276 #endif 2277 #endif 2278 if (!arch_kprobe_get_kallsym(&symnum, value, type, sym)) 2279 return 0; 2280 return -ERANGE; 2281 } 2282 2283 int __init __weak arch_populate_kprobe_blacklist(void) 2284 { 2285 return 0; 2286 } 2287 2288 /* 2289 * Lookup and populate the kprobe_blacklist. 2290 * 2291 * Unlike the kretprobe blacklist, we'll need to determine 2292 * the range of addresses that belong to the said functions, 2293 * since a kprobe need not necessarily be at the beginning 2294 * of a function. 2295 */ 2296 static int __init populate_kprobe_blacklist(unsigned long *start, 2297 unsigned long *end) 2298 { 2299 unsigned long entry; 2300 unsigned long *iter; 2301 int ret; 2302 2303 for (iter = start; iter < end; iter++) { 2304 entry = arch_deref_entry_point((void *)*iter); 2305 ret = kprobe_add_ksym_blacklist(entry); 2306 if (ret == -EINVAL) 2307 continue; 2308 if (ret < 0) 2309 return ret; 2310 } 2311 2312 /* Symbols in __kprobes_text are blacklisted */ 2313 ret = kprobe_add_area_blacklist((unsigned long)__kprobes_text_start, 2314 (unsigned long)__kprobes_text_end); 2315 if (ret) 2316 return ret; 2317 2318 /* Symbols in noinstr section are blacklisted */ 2319 ret = kprobe_add_area_blacklist((unsigned long)__noinstr_text_start, 2320 (unsigned long)__noinstr_text_end); 2321 2322 return ret ? : arch_populate_kprobe_blacklist(); 2323 } 2324 2325 static void add_module_kprobe_blacklist(struct module *mod) 2326 { 2327 unsigned long start, end; 2328 int i; 2329 2330 if (mod->kprobe_blacklist) { 2331 for (i = 0; i < mod->num_kprobe_blacklist; i++) 2332 kprobe_add_ksym_blacklist(mod->kprobe_blacklist[i]); 2333 } 2334 2335 start = (unsigned long)mod->kprobes_text_start; 2336 if (start) { 2337 end = start + mod->kprobes_text_size; 2338 kprobe_add_area_blacklist(start, end); 2339 } 2340 2341 start = (unsigned long)mod->noinstr_text_start; 2342 if (start) { 2343 end = start + mod->noinstr_text_size; 2344 kprobe_add_area_blacklist(start, end); 2345 } 2346 } 2347 2348 static void remove_module_kprobe_blacklist(struct module *mod) 2349 { 2350 unsigned long start, end; 2351 int i; 2352 2353 if (mod->kprobe_blacklist) { 2354 for (i = 0; i < mod->num_kprobe_blacklist; i++) 2355 kprobe_remove_ksym_blacklist(mod->kprobe_blacklist[i]); 2356 } 2357 2358 start = (unsigned long)mod->kprobes_text_start; 2359 if (start) { 2360 end = start + mod->kprobes_text_size; 2361 kprobe_remove_area_blacklist(start, end); 2362 } 2363 2364 start = (unsigned long)mod->noinstr_text_start; 2365 if (start) { 2366 end = start + mod->noinstr_text_size; 2367 kprobe_remove_area_blacklist(start, end); 2368 } 2369 } 2370 2371 /* Module notifier call back, checking kprobes on the module */ 2372 static int kprobes_module_callback(struct notifier_block *nb, 2373 unsigned long val, void *data) 2374 { 2375 struct module *mod = data; 2376 struct hlist_head *head; 2377 struct kprobe *p; 2378 unsigned int i; 2379 int checkcore = (val == MODULE_STATE_GOING); 2380 2381 if (val == MODULE_STATE_COMING) { 2382 mutex_lock(&kprobe_mutex); 2383 add_module_kprobe_blacklist(mod); 2384 mutex_unlock(&kprobe_mutex); 2385 } 2386 if (val != MODULE_STATE_GOING && val != MODULE_STATE_LIVE) 2387 return NOTIFY_DONE; 2388 2389 /* 2390 * When MODULE_STATE_GOING was notified, both of module .text and 2391 * .init.text sections would be freed. When MODULE_STATE_LIVE was 2392 * notified, only .init.text section would be freed. We need to 2393 * disable kprobes which have been inserted in the sections. 2394 */ 2395 mutex_lock(&kprobe_mutex); 2396 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 2397 head = &kprobe_table[i]; 2398 hlist_for_each_entry(p, head, hlist) 2399 if (within_module_init((unsigned long)p->addr, mod) || 2400 (checkcore && 2401 within_module_core((unsigned long)p->addr, mod))) { 2402 /* 2403 * The vaddr this probe is installed will soon 2404 * be vfreed buy not synced to disk. Hence, 2405 * disarming the breakpoint isn't needed. 2406 * 2407 * Note, this will also move any optimized probes 2408 * that are pending to be removed from their 2409 * corresponding lists to the freeing_list and 2410 * will not be touched by the delayed 2411 * kprobe_optimizer work handler. 2412 */ 2413 kill_kprobe(p); 2414 } 2415 } 2416 if (val == MODULE_STATE_GOING) 2417 remove_module_kprobe_blacklist(mod); 2418 mutex_unlock(&kprobe_mutex); 2419 return NOTIFY_DONE; 2420 } 2421 2422 static struct notifier_block kprobe_module_nb = { 2423 .notifier_call = kprobes_module_callback, 2424 .priority = 0 2425 }; 2426 2427 /* Markers of _kprobe_blacklist section */ 2428 extern unsigned long __start_kprobe_blacklist[]; 2429 extern unsigned long __stop_kprobe_blacklist[]; 2430 2431 void kprobe_free_init_mem(void) 2432 { 2433 void *start = (void *)(&__init_begin); 2434 void *end = (void *)(&__init_end); 2435 struct hlist_head *head; 2436 struct kprobe *p; 2437 int i; 2438 2439 mutex_lock(&kprobe_mutex); 2440 2441 /* Kill all kprobes on initmem */ 2442 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 2443 head = &kprobe_table[i]; 2444 hlist_for_each_entry(p, head, hlist) { 2445 if (start <= (void *)p->addr && (void *)p->addr < end) 2446 kill_kprobe(p); 2447 } 2448 } 2449 2450 mutex_unlock(&kprobe_mutex); 2451 } 2452 2453 static int __init init_kprobes(void) 2454 { 2455 int i, err = 0; 2456 2457 /* FIXME allocate the probe table, currently defined statically */ 2458 /* initialize all list heads */ 2459 for (i = 0; i < KPROBE_TABLE_SIZE; i++) 2460 INIT_HLIST_HEAD(&kprobe_table[i]); 2461 2462 err = populate_kprobe_blacklist(__start_kprobe_blacklist, 2463 __stop_kprobe_blacklist); 2464 if (err) { 2465 pr_err("kprobes: failed to populate blacklist: %d\n", err); 2466 pr_err("Please take care of using kprobes.\n"); 2467 } 2468 2469 if (kretprobe_blacklist_size) { 2470 /* lookup the function address from its name */ 2471 for (i = 0; kretprobe_blacklist[i].name != NULL; i++) { 2472 kretprobe_blacklist[i].addr = 2473 kprobe_lookup_name(kretprobe_blacklist[i].name, 0); 2474 if (!kretprobe_blacklist[i].addr) 2475 printk("kretprobe: lookup failed: %s\n", 2476 kretprobe_blacklist[i].name); 2477 } 2478 } 2479 2480 #if defined(CONFIG_OPTPROBES) 2481 #if defined(__ARCH_WANT_KPROBES_INSN_SLOT) 2482 /* Init kprobe_optinsn_slots */ 2483 kprobe_optinsn_slots.insn_size = MAX_OPTINSN_SIZE; 2484 #endif 2485 /* By default, kprobes can be optimized */ 2486 kprobes_allow_optimization = true; 2487 #endif 2488 2489 /* By default, kprobes are armed */ 2490 kprobes_all_disarmed = false; 2491 2492 err = arch_init_kprobes(); 2493 if (!err) 2494 err = register_die_notifier(&kprobe_exceptions_nb); 2495 if (!err) 2496 err = register_module_notifier(&kprobe_module_nb); 2497 2498 kprobes_initialized = (err == 0); 2499 2500 if (!err) 2501 init_test_probes(); 2502 return err; 2503 } 2504 early_initcall(init_kprobes); 2505 2506 #ifdef CONFIG_DEBUG_FS 2507 static void report_probe(struct seq_file *pi, struct kprobe *p, 2508 const char *sym, int offset, char *modname, struct kprobe *pp) 2509 { 2510 char *kprobe_type; 2511 void *addr = p->addr; 2512 2513 if (p->pre_handler == pre_handler_kretprobe) 2514 kprobe_type = "r"; 2515 else 2516 kprobe_type = "k"; 2517 2518 if (!kallsyms_show_value(pi->file->f_cred)) 2519 addr = NULL; 2520 2521 if (sym) 2522 seq_printf(pi, "%px %s %s+0x%x %s ", 2523 addr, kprobe_type, sym, offset, 2524 (modname ? modname : " ")); 2525 else /* try to use %pS */ 2526 seq_printf(pi, "%px %s %pS ", 2527 addr, kprobe_type, p->addr); 2528 2529 if (!pp) 2530 pp = p; 2531 seq_printf(pi, "%s%s%s%s\n", 2532 (kprobe_gone(p) ? "[GONE]" : ""), 2533 ((kprobe_disabled(p) && !kprobe_gone(p)) ? "[DISABLED]" : ""), 2534 (kprobe_optimized(pp) ? "[OPTIMIZED]" : ""), 2535 (kprobe_ftrace(pp) ? "[FTRACE]" : "")); 2536 } 2537 2538 static void *kprobe_seq_start(struct seq_file *f, loff_t *pos) 2539 { 2540 return (*pos < KPROBE_TABLE_SIZE) ? pos : NULL; 2541 } 2542 2543 static void *kprobe_seq_next(struct seq_file *f, void *v, loff_t *pos) 2544 { 2545 (*pos)++; 2546 if (*pos >= KPROBE_TABLE_SIZE) 2547 return NULL; 2548 return pos; 2549 } 2550 2551 static void kprobe_seq_stop(struct seq_file *f, void *v) 2552 { 2553 /* Nothing to do */ 2554 } 2555 2556 static int show_kprobe_addr(struct seq_file *pi, void *v) 2557 { 2558 struct hlist_head *head; 2559 struct kprobe *p, *kp; 2560 const char *sym = NULL; 2561 unsigned int i = *(loff_t *) v; 2562 unsigned long offset = 0; 2563 char *modname, namebuf[KSYM_NAME_LEN]; 2564 2565 head = &kprobe_table[i]; 2566 preempt_disable(); 2567 hlist_for_each_entry_rcu(p, head, hlist) { 2568 sym = kallsyms_lookup((unsigned long)p->addr, NULL, 2569 &offset, &modname, namebuf); 2570 if (kprobe_aggrprobe(p)) { 2571 list_for_each_entry_rcu(kp, &p->list, list) 2572 report_probe(pi, kp, sym, offset, modname, p); 2573 } else 2574 report_probe(pi, p, sym, offset, modname, NULL); 2575 } 2576 preempt_enable(); 2577 return 0; 2578 } 2579 2580 static const struct seq_operations kprobes_sops = { 2581 .start = kprobe_seq_start, 2582 .next = kprobe_seq_next, 2583 .stop = kprobe_seq_stop, 2584 .show = show_kprobe_addr 2585 }; 2586 2587 DEFINE_SEQ_ATTRIBUTE(kprobes); 2588 2589 /* kprobes/blacklist -- shows which functions can not be probed */ 2590 static void *kprobe_blacklist_seq_start(struct seq_file *m, loff_t *pos) 2591 { 2592 mutex_lock(&kprobe_mutex); 2593 return seq_list_start(&kprobe_blacklist, *pos); 2594 } 2595 2596 static void *kprobe_blacklist_seq_next(struct seq_file *m, void *v, loff_t *pos) 2597 { 2598 return seq_list_next(v, &kprobe_blacklist, pos); 2599 } 2600 2601 static int kprobe_blacklist_seq_show(struct seq_file *m, void *v) 2602 { 2603 struct kprobe_blacklist_entry *ent = 2604 list_entry(v, struct kprobe_blacklist_entry, list); 2605 2606 /* 2607 * If /proc/kallsyms is not showing kernel address, we won't 2608 * show them here either. 2609 */ 2610 if (!kallsyms_show_value(m->file->f_cred)) 2611 seq_printf(m, "0x%px-0x%px\t%ps\n", NULL, NULL, 2612 (void *)ent->start_addr); 2613 else 2614 seq_printf(m, "0x%px-0x%px\t%ps\n", (void *)ent->start_addr, 2615 (void *)ent->end_addr, (void *)ent->start_addr); 2616 return 0; 2617 } 2618 2619 static void kprobe_blacklist_seq_stop(struct seq_file *f, void *v) 2620 { 2621 mutex_unlock(&kprobe_mutex); 2622 } 2623 2624 static const struct seq_operations kprobe_blacklist_sops = { 2625 .start = kprobe_blacklist_seq_start, 2626 .next = kprobe_blacklist_seq_next, 2627 .stop = kprobe_blacklist_seq_stop, 2628 .show = kprobe_blacklist_seq_show, 2629 }; 2630 DEFINE_SEQ_ATTRIBUTE(kprobe_blacklist); 2631 2632 static int arm_all_kprobes(void) 2633 { 2634 struct hlist_head *head; 2635 struct kprobe *p; 2636 unsigned int i, total = 0, errors = 0; 2637 int err, ret = 0; 2638 2639 mutex_lock(&kprobe_mutex); 2640 2641 /* If kprobes are armed, just return */ 2642 if (!kprobes_all_disarmed) 2643 goto already_enabled; 2644 2645 /* 2646 * optimize_kprobe() called by arm_kprobe() checks 2647 * kprobes_all_disarmed, so set kprobes_all_disarmed before 2648 * arm_kprobe. 2649 */ 2650 kprobes_all_disarmed = false; 2651 /* Arming kprobes doesn't optimize kprobe itself */ 2652 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 2653 head = &kprobe_table[i]; 2654 /* Arm all kprobes on a best-effort basis */ 2655 hlist_for_each_entry(p, head, hlist) { 2656 if (!kprobe_disabled(p)) { 2657 err = arm_kprobe(p); 2658 if (err) { 2659 errors++; 2660 ret = err; 2661 } 2662 total++; 2663 } 2664 } 2665 } 2666 2667 if (errors) 2668 pr_warn("Kprobes globally enabled, but failed to arm %d out of %d probes\n", 2669 errors, total); 2670 else 2671 pr_info("Kprobes globally enabled\n"); 2672 2673 already_enabled: 2674 mutex_unlock(&kprobe_mutex); 2675 return ret; 2676 } 2677 2678 static int disarm_all_kprobes(void) 2679 { 2680 struct hlist_head *head; 2681 struct kprobe *p; 2682 unsigned int i, total = 0, errors = 0; 2683 int err, ret = 0; 2684 2685 mutex_lock(&kprobe_mutex); 2686 2687 /* If kprobes are already disarmed, just return */ 2688 if (kprobes_all_disarmed) { 2689 mutex_unlock(&kprobe_mutex); 2690 return 0; 2691 } 2692 2693 kprobes_all_disarmed = true; 2694 2695 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 2696 head = &kprobe_table[i]; 2697 /* Disarm all kprobes on a best-effort basis */ 2698 hlist_for_each_entry(p, head, hlist) { 2699 if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p)) { 2700 err = disarm_kprobe(p, false); 2701 if (err) { 2702 errors++; 2703 ret = err; 2704 } 2705 total++; 2706 } 2707 } 2708 } 2709 2710 if (errors) 2711 pr_warn("Kprobes globally disabled, but failed to disarm %d out of %d probes\n", 2712 errors, total); 2713 else 2714 pr_info("Kprobes globally disabled\n"); 2715 2716 mutex_unlock(&kprobe_mutex); 2717 2718 /* Wait for disarming all kprobes by optimizer */ 2719 wait_for_kprobe_optimizer(); 2720 2721 return ret; 2722 } 2723 2724 /* 2725 * XXX: The debugfs bool file interface doesn't allow for callbacks 2726 * when the bool state is switched. We can reuse that facility when 2727 * available 2728 */ 2729 static ssize_t read_enabled_file_bool(struct file *file, 2730 char __user *user_buf, size_t count, loff_t *ppos) 2731 { 2732 char buf[3]; 2733 2734 if (!kprobes_all_disarmed) 2735 buf[0] = '1'; 2736 else 2737 buf[0] = '0'; 2738 buf[1] = '\n'; 2739 buf[2] = 0x00; 2740 return simple_read_from_buffer(user_buf, count, ppos, buf, 2); 2741 } 2742 2743 static ssize_t write_enabled_file_bool(struct file *file, 2744 const char __user *user_buf, size_t count, loff_t *ppos) 2745 { 2746 char buf[32]; 2747 size_t buf_size; 2748 int ret = 0; 2749 2750 buf_size = min(count, (sizeof(buf)-1)); 2751 if (copy_from_user(buf, user_buf, buf_size)) 2752 return -EFAULT; 2753 2754 buf[buf_size] = '\0'; 2755 switch (buf[0]) { 2756 case 'y': 2757 case 'Y': 2758 case '1': 2759 ret = arm_all_kprobes(); 2760 break; 2761 case 'n': 2762 case 'N': 2763 case '0': 2764 ret = disarm_all_kprobes(); 2765 break; 2766 default: 2767 return -EINVAL; 2768 } 2769 2770 if (ret) 2771 return ret; 2772 2773 return count; 2774 } 2775 2776 static const struct file_operations fops_kp = { 2777 .read = read_enabled_file_bool, 2778 .write = write_enabled_file_bool, 2779 .llseek = default_llseek, 2780 }; 2781 2782 static int __init debugfs_kprobe_init(void) 2783 { 2784 struct dentry *dir; 2785 unsigned int value = 1; 2786 2787 dir = debugfs_create_dir("kprobes", NULL); 2788 2789 debugfs_create_file("list", 0400, dir, NULL, &kprobes_fops); 2790 2791 debugfs_create_file("enabled", 0600, dir, &value, &fops_kp); 2792 2793 debugfs_create_file("blacklist", 0400, dir, NULL, 2794 &kprobe_blacklist_fops); 2795 2796 return 0; 2797 } 2798 2799 late_initcall(debugfs_kprobe_init); 2800 #endif /* CONFIG_DEBUG_FS */ 2801