1 /* 2 * Kprobes-based tracing events 3 * 4 * Created by Masami Hiramatsu <mhiramat@redhat.com> 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 18 */ 19 20 #include <linux/module.h> 21 #include <linux/uaccess.h> 22 23 #include "trace_probe.h" 24 25 #define KPROBE_EVENT_SYSTEM "kprobes" 26 27 /** 28 * Kprobe event core functions 29 */ 30 struct trace_probe { 31 struct list_head list; 32 struct kretprobe rp; /* Use rp.kp for kprobe use */ 33 unsigned long nhit; 34 unsigned int flags; /* For TP_FLAG_* */ 35 const char *symbol; /* symbol name */ 36 struct ftrace_event_class class; 37 struct ftrace_event_call call; 38 struct list_head files; 39 ssize_t size; /* trace entry size */ 40 unsigned int nr_args; 41 struct probe_arg args[]; 42 }; 43 44 struct event_file_link { 45 struct ftrace_event_file *file; 46 struct list_head list; 47 }; 48 49 #define SIZEOF_TRACE_PROBE(n) \ 50 (offsetof(struct trace_probe, args) + \ 51 (sizeof(struct probe_arg) * (n))) 52 53 54 static __kprobes bool trace_probe_is_return(struct trace_probe *tp) 55 { 56 return tp->rp.handler != NULL; 57 } 58 59 static __kprobes const char *trace_probe_symbol(struct trace_probe *tp) 60 { 61 return tp->symbol ? tp->symbol : "unknown"; 62 } 63 64 static __kprobes unsigned long trace_probe_offset(struct trace_probe *tp) 65 { 66 return tp->rp.kp.offset; 67 } 68 69 static __kprobes bool trace_probe_is_enabled(struct trace_probe *tp) 70 { 71 return !!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE)); 72 } 73 74 static __kprobes bool trace_probe_is_registered(struct trace_probe *tp) 75 { 76 return !!(tp->flags & TP_FLAG_REGISTERED); 77 } 78 79 static __kprobes bool trace_probe_has_gone(struct trace_probe *tp) 80 { 81 return !!(kprobe_gone(&tp->rp.kp)); 82 } 83 84 static __kprobes bool trace_probe_within_module(struct trace_probe *tp, 85 struct module *mod) 86 { 87 int len = strlen(mod->name); 88 const char *name = trace_probe_symbol(tp); 89 return strncmp(mod->name, name, len) == 0 && name[len] == ':'; 90 } 91 92 static __kprobes bool trace_probe_is_on_module(struct trace_probe *tp) 93 { 94 return !!strchr(trace_probe_symbol(tp), ':'); 95 } 96 97 static int register_probe_event(struct trace_probe *tp); 98 static int unregister_probe_event(struct trace_probe *tp); 99 100 static DEFINE_MUTEX(probe_lock); 101 static LIST_HEAD(probe_list); 102 103 static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs); 104 static int kretprobe_dispatcher(struct kretprobe_instance *ri, 105 struct pt_regs *regs); 106 107 /* 108 * Allocate new trace_probe and initialize it (including kprobes). 109 */ 110 static struct trace_probe *alloc_trace_probe(const char *group, 111 const char *event, 112 void *addr, 113 const char *symbol, 114 unsigned long offs, 115 int nargs, bool is_return) 116 { 117 struct trace_probe *tp; 118 int ret = -ENOMEM; 119 120 tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL); 121 if (!tp) 122 return ERR_PTR(ret); 123 124 if (symbol) { 125 tp->symbol = kstrdup(symbol, GFP_KERNEL); 126 if (!tp->symbol) 127 goto error; 128 tp->rp.kp.symbol_name = tp->symbol; 129 tp->rp.kp.offset = offs; 130 } else 131 tp->rp.kp.addr = addr; 132 133 if (is_return) 134 tp->rp.handler = kretprobe_dispatcher; 135 else 136 tp->rp.kp.pre_handler = kprobe_dispatcher; 137 138 if (!event || !is_good_name(event)) { 139 ret = -EINVAL; 140 goto error; 141 } 142 143 tp->call.class = &tp->class; 144 tp->call.name = kstrdup(event, GFP_KERNEL); 145 if (!tp->call.name) 146 goto error; 147 148 if (!group || !is_good_name(group)) { 149 ret = -EINVAL; 150 goto error; 151 } 152 153 tp->class.system = kstrdup(group, GFP_KERNEL); 154 if (!tp->class.system) 155 goto error; 156 157 INIT_LIST_HEAD(&tp->list); 158 INIT_LIST_HEAD(&tp->files); 159 return tp; 160 error: 161 kfree(tp->call.name); 162 kfree(tp->symbol); 163 kfree(tp); 164 return ERR_PTR(ret); 165 } 166 167 static void free_trace_probe(struct trace_probe *tp) 168 { 169 int i; 170 171 for (i = 0; i < tp->nr_args; i++) 172 traceprobe_free_probe_arg(&tp->args[i]); 173 174 kfree(tp->call.class->system); 175 kfree(tp->call.name); 176 kfree(tp->symbol); 177 kfree(tp); 178 } 179 180 static struct trace_probe *find_trace_probe(const char *event, 181 const char *group) 182 { 183 struct trace_probe *tp; 184 185 list_for_each_entry(tp, &probe_list, list) 186 if (strcmp(tp->call.name, event) == 0 && 187 strcmp(tp->call.class->system, group) == 0) 188 return tp; 189 return NULL; 190 } 191 192 /* 193 * Enable trace_probe 194 * if the file is NULL, enable "perf" handler, or enable "trace" handler. 195 */ 196 static int 197 enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) 198 { 199 int ret = 0; 200 201 if (file) { 202 struct event_file_link *link; 203 204 link = kmalloc(sizeof(*link), GFP_KERNEL); 205 if (!link) { 206 ret = -ENOMEM; 207 goto out; 208 } 209 210 link->file = file; 211 list_add_tail_rcu(&link->list, &tp->files); 212 213 tp->flags |= TP_FLAG_TRACE; 214 } else 215 tp->flags |= TP_FLAG_PROFILE; 216 217 if (trace_probe_is_registered(tp) && !trace_probe_has_gone(tp)) { 218 if (trace_probe_is_return(tp)) 219 ret = enable_kretprobe(&tp->rp); 220 else 221 ret = enable_kprobe(&tp->rp.kp); 222 } 223 out: 224 return ret; 225 } 226 227 static struct event_file_link * 228 find_event_file_link(struct trace_probe *tp, struct ftrace_event_file *file) 229 { 230 struct event_file_link *link; 231 232 list_for_each_entry(link, &tp->files, list) 233 if (link->file == file) 234 return link; 235 236 return NULL; 237 } 238 239 /* 240 * Disable trace_probe 241 * if the file is NULL, disable "perf" handler, or disable "trace" handler. 242 */ 243 static int 244 disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) 245 { 246 struct event_file_link *link = NULL; 247 int wait = 0; 248 int ret = 0; 249 250 if (file) { 251 link = find_event_file_link(tp, file); 252 if (!link) { 253 ret = -EINVAL; 254 goto out; 255 } 256 257 list_del_rcu(&link->list); 258 wait = 1; 259 if (!list_empty(&tp->files)) 260 goto out; 261 262 tp->flags &= ~TP_FLAG_TRACE; 263 } else 264 tp->flags &= ~TP_FLAG_PROFILE; 265 266 if (!trace_probe_is_enabled(tp) && trace_probe_is_registered(tp)) { 267 if (trace_probe_is_return(tp)) 268 disable_kretprobe(&tp->rp); 269 else 270 disable_kprobe(&tp->rp.kp); 271 wait = 1; 272 } 273 out: 274 if (wait) { 275 /* 276 * Synchronize with kprobe_trace_func/kretprobe_trace_func 277 * to ensure disabled (all running handlers are finished). 278 * This is not only for kfree(), but also the caller, 279 * trace_remove_event_call() supposes it for releasing 280 * event_call related objects, which will be accessed in 281 * the kprobe_trace_func/kretprobe_trace_func. 282 */ 283 synchronize_sched(); 284 kfree(link); /* Ignored if link == NULL */ 285 } 286 287 return ret; 288 } 289 290 /* Internal register function - just handle k*probes and flags */ 291 static int __register_trace_probe(struct trace_probe *tp) 292 { 293 int i, ret; 294 295 if (trace_probe_is_registered(tp)) 296 return -EINVAL; 297 298 for (i = 0; i < tp->nr_args; i++) 299 traceprobe_update_arg(&tp->args[i]); 300 301 /* Set/clear disabled flag according to tp->flag */ 302 if (trace_probe_is_enabled(tp)) 303 tp->rp.kp.flags &= ~KPROBE_FLAG_DISABLED; 304 else 305 tp->rp.kp.flags |= KPROBE_FLAG_DISABLED; 306 307 if (trace_probe_is_return(tp)) 308 ret = register_kretprobe(&tp->rp); 309 else 310 ret = register_kprobe(&tp->rp.kp); 311 312 if (ret == 0) 313 tp->flags |= TP_FLAG_REGISTERED; 314 else { 315 pr_warning("Could not insert probe at %s+%lu: %d\n", 316 trace_probe_symbol(tp), trace_probe_offset(tp), ret); 317 if (ret == -ENOENT && trace_probe_is_on_module(tp)) { 318 pr_warning("This probe might be able to register after" 319 "target module is loaded. Continue.\n"); 320 ret = 0; 321 } else if (ret == -EILSEQ) { 322 pr_warning("Probing address(0x%p) is not an " 323 "instruction boundary.\n", 324 tp->rp.kp.addr); 325 ret = -EINVAL; 326 } 327 } 328 329 return ret; 330 } 331 332 /* Internal unregister function - just handle k*probes and flags */ 333 static void __unregister_trace_probe(struct trace_probe *tp) 334 { 335 if (trace_probe_is_registered(tp)) { 336 if (trace_probe_is_return(tp)) 337 unregister_kretprobe(&tp->rp); 338 else 339 unregister_kprobe(&tp->rp.kp); 340 tp->flags &= ~TP_FLAG_REGISTERED; 341 /* Cleanup kprobe for reuse */ 342 if (tp->rp.kp.symbol_name) 343 tp->rp.kp.addr = NULL; 344 } 345 } 346 347 /* Unregister a trace_probe and probe_event: call with locking probe_lock */ 348 static int unregister_trace_probe(struct trace_probe *tp) 349 { 350 /* Enabled event can not be unregistered */ 351 if (trace_probe_is_enabled(tp)) 352 return -EBUSY; 353 354 /* Will fail if probe is being used by ftrace or perf */ 355 if (unregister_probe_event(tp)) 356 return -EBUSY; 357 358 __unregister_trace_probe(tp); 359 list_del(&tp->list); 360 361 return 0; 362 } 363 364 /* Register a trace_probe and probe_event */ 365 static int register_trace_probe(struct trace_probe *tp) 366 { 367 struct trace_probe *old_tp; 368 int ret; 369 370 mutex_lock(&probe_lock); 371 372 /* Delete old (same name) event if exist */ 373 old_tp = find_trace_probe(tp->call.name, tp->call.class->system); 374 if (old_tp) { 375 ret = unregister_trace_probe(old_tp); 376 if (ret < 0) 377 goto end; 378 free_trace_probe(old_tp); 379 } 380 381 /* Register new event */ 382 ret = register_probe_event(tp); 383 if (ret) { 384 pr_warning("Failed to register probe event(%d)\n", ret); 385 goto end; 386 } 387 388 /* Register k*probe */ 389 ret = __register_trace_probe(tp); 390 if (ret < 0) 391 unregister_probe_event(tp); 392 else 393 list_add_tail(&tp->list, &probe_list); 394 395 end: 396 mutex_unlock(&probe_lock); 397 return ret; 398 } 399 400 /* Module notifier call back, checking event on the module */ 401 static int trace_probe_module_callback(struct notifier_block *nb, 402 unsigned long val, void *data) 403 { 404 struct module *mod = data; 405 struct trace_probe *tp; 406 int ret; 407 408 if (val != MODULE_STATE_COMING) 409 return NOTIFY_DONE; 410 411 /* Update probes on coming module */ 412 mutex_lock(&probe_lock); 413 list_for_each_entry(tp, &probe_list, list) { 414 if (trace_probe_within_module(tp, mod)) { 415 /* Don't need to check busy - this should have gone. */ 416 __unregister_trace_probe(tp); 417 ret = __register_trace_probe(tp); 418 if (ret) 419 pr_warning("Failed to re-register probe %s on" 420 "%s: %d\n", 421 tp->call.name, mod->name, ret); 422 } 423 } 424 mutex_unlock(&probe_lock); 425 426 return NOTIFY_DONE; 427 } 428 429 static struct notifier_block trace_probe_module_nb = { 430 .notifier_call = trace_probe_module_callback, 431 .priority = 1 /* Invoked after kprobe module callback */ 432 }; 433 434 static int create_trace_probe(int argc, char **argv) 435 { 436 /* 437 * Argument syntax: 438 * - Add kprobe: p[:[GRP/]EVENT] [MOD:]KSYM[+OFFS]|KADDR [FETCHARGS] 439 * - Add kretprobe: r[:[GRP/]EVENT] [MOD:]KSYM[+0] [FETCHARGS] 440 * Fetch args: 441 * $retval : fetch return value 442 * $stack : fetch stack address 443 * $stackN : fetch Nth of stack (N:0-) 444 * @ADDR : fetch memory at ADDR (ADDR should be in kernel) 445 * @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol) 446 * %REG : fetch register REG 447 * Dereferencing memory fetch: 448 * +|-offs(ARG) : fetch memory at ARG +|- offs address. 449 * Alias name of args: 450 * NAME=FETCHARG : set NAME as alias of FETCHARG. 451 * Type of args: 452 * FETCHARG:TYPE : use TYPE instead of unsigned long. 453 */ 454 struct trace_probe *tp; 455 int i, ret = 0; 456 bool is_return = false, is_delete = false; 457 char *symbol = NULL, *event = NULL, *group = NULL; 458 char *arg; 459 unsigned long offset = 0; 460 void *addr = NULL; 461 char buf[MAX_EVENT_NAME_LEN]; 462 463 /* argc must be >= 1 */ 464 if (argv[0][0] == 'p') 465 is_return = false; 466 else if (argv[0][0] == 'r') 467 is_return = true; 468 else if (argv[0][0] == '-') 469 is_delete = true; 470 else { 471 pr_info("Probe definition must be started with 'p', 'r' or" 472 " '-'.\n"); 473 return -EINVAL; 474 } 475 476 if (argv[0][1] == ':') { 477 event = &argv[0][2]; 478 if (strchr(event, '/')) { 479 group = event; 480 event = strchr(group, '/') + 1; 481 event[-1] = '\0'; 482 if (strlen(group) == 0) { 483 pr_info("Group name is not specified\n"); 484 return -EINVAL; 485 } 486 } 487 if (strlen(event) == 0) { 488 pr_info("Event name is not specified\n"); 489 return -EINVAL; 490 } 491 } 492 if (!group) 493 group = KPROBE_EVENT_SYSTEM; 494 495 if (is_delete) { 496 if (!event) { 497 pr_info("Delete command needs an event name.\n"); 498 return -EINVAL; 499 } 500 mutex_lock(&probe_lock); 501 tp = find_trace_probe(event, group); 502 if (!tp) { 503 mutex_unlock(&probe_lock); 504 pr_info("Event %s/%s doesn't exist.\n", group, event); 505 return -ENOENT; 506 } 507 /* delete an event */ 508 ret = unregister_trace_probe(tp); 509 if (ret == 0) 510 free_trace_probe(tp); 511 mutex_unlock(&probe_lock); 512 return ret; 513 } 514 515 if (argc < 2) { 516 pr_info("Probe point is not specified.\n"); 517 return -EINVAL; 518 } 519 if (isdigit(argv[1][0])) { 520 if (is_return) { 521 pr_info("Return probe point must be a symbol.\n"); 522 return -EINVAL; 523 } 524 /* an address specified */ 525 ret = kstrtoul(&argv[1][0], 0, (unsigned long *)&addr); 526 if (ret) { 527 pr_info("Failed to parse address.\n"); 528 return ret; 529 } 530 } else { 531 /* a symbol specified */ 532 symbol = argv[1]; 533 /* TODO: support .init module functions */ 534 ret = traceprobe_split_symbol_offset(symbol, &offset); 535 if (ret) { 536 pr_info("Failed to parse symbol.\n"); 537 return ret; 538 } 539 if (offset && is_return) { 540 pr_info("Return probe must be used without offset.\n"); 541 return -EINVAL; 542 } 543 } 544 argc -= 2; argv += 2; 545 546 /* setup a probe */ 547 if (!event) { 548 /* Make a new event name */ 549 if (symbol) 550 snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld", 551 is_return ? 'r' : 'p', symbol, offset); 552 else 553 snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p", 554 is_return ? 'r' : 'p', addr); 555 event = buf; 556 } 557 tp = alloc_trace_probe(group, event, addr, symbol, offset, argc, 558 is_return); 559 if (IS_ERR(tp)) { 560 pr_info("Failed to allocate trace_probe.(%d)\n", 561 (int)PTR_ERR(tp)); 562 return PTR_ERR(tp); 563 } 564 565 /* parse arguments */ 566 ret = 0; 567 for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) { 568 /* Increment count for freeing args in error case */ 569 tp->nr_args++; 570 571 /* Parse argument name */ 572 arg = strchr(argv[i], '='); 573 if (arg) { 574 *arg++ = '\0'; 575 tp->args[i].name = kstrdup(argv[i], GFP_KERNEL); 576 } else { 577 arg = argv[i]; 578 /* If argument name is omitted, set "argN" */ 579 snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1); 580 tp->args[i].name = kstrdup(buf, GFP_KERNEL); 581 } 582 583 if (!tp->args[i].name) { 584 pr_info("Failed to allocate argument[%d] name.\n", i); 585 ret = -ENOMEM; 586 goto error; 587 } 588 589 if (!is_good_name(tp->args[i].name)) { 590 pr_info("Invalid argument[%d] name: %s\n", 591 i, tp->args[i].name); 592 ret = -EINVAL; 593 goto error; 594 } 595 596 if (traceprobe_conflict_field_name(tp->args[i].name, 597 tp->args, i)) { 598 pr_info("Argument[%d] name '%s' conflicts with " 599 "another field.\n", i, argv[i]); 600 ret = -EINVAL; 601 goto error; 602 } 603 604 /* Parse fetch argument */ 605 ret = traceprobe_parse_probe_arg(arg, &tp->size, &tp->args[i], 606 is_return, true); 607 if (ret) { 608 pr_info("Parse error at argument[%d]. (%d)\n", i, ret); 609 goto error; 610 } 611 } 612 613 ret = register_trace_probe(tp); 614 if (ret) 615 goto error; 616 return 0; 617 618 error: 619 free_trace_probe(tp); 620 return ret; 621 } 622 623 static int release_all_trace_probes(void) 624 { 625 struct trace_probe *tp; 626 int ret = 0; 627 628 mutex_lock(&probe_lock); 629 /* Ensure no probe is in use. */ 630 list_for_each_entry(tp, &probe_list, list) 631 if (trace_probe_is_enabled(tp)) { 632 ret = -EBUSY; 633 goto end; 634 } 635 /* TODO: Use batch unregistration */ 636 while (!list_empty(&probe_list)) { 637 tp = list_entry(probe_list.next, struct trace_probe, list); 638 ret = unregister_trace_probe(tp); 639 if (ret) 640 goto end; 641 free_trace_probe(tp); 642 } 643 644 end: 645 mutex_unlock(&probe_lock); 646 647 return ret; 648 } 649 650 /* Probes listing interfaces */ 651 static void *probes_seq_start(struct seq_file *m, loff_t *pos) 652 { 653 mutex_lock(&probe_lock); 654 return seq_list_start(&probe_list, *pos); 655 } 656 657 static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos) 658 { 659 return seq_list_next(v, &probe_list, pos); 660 } 661 662 static void probes_seq_stop(struct seq_file *m, void *v) 663 { 664 mutex_unlock(&probe_lock); 665 } 666 667 static int probes_seq_show(struct seq_file *m, void *v) 668 { 669 struct trace_probe *tp = v; 670 int i; 671 672 seq_printf(m, "%c", trace_probe_is_return(tp) ? 'r' : 'p'); 673 seq_printf(m, ":%s/%s", tp->call.class->system, tp->call.name); 674 675 if (!tp->symbol) 676 seq_printf(m, " 0x%p", tp->rp.kp.addr); 677 else if (tp->rp.kp.offset) 678 seq_printf(m, " %s+%u", trace_probe_symbol(tp), 679 tp->rp.kp.offset); 680 else 681 seq_printf(m, " %s", trace_probe_symbol(tp)); 682 683 for (i = 0; i < tp->nr_args; i++) 684 seq_printf(m, " %s=%s", tp->args[i].name, tp->args[i].comm); 685 seq_printf(m, "\n"); 686 687 return 0; 688 } 689 690 static const struct seq_operations probes_seq_op = { 691 .start = probes_seq_start, 692 .next = probes_seq_next, 693 .stop = probes_seq_stop, 694 .show = probes_seq_show 695 }; 696 697 static int probes_open(struct inode *inode, struct file *file) 698 { 699 int ret; 700 701 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { 702 ret = release_all_trace_probes(); 703 if (ret < 0) 704 return ret; 705 } 706 707 return seq_open(file, &probes_seq_op); 708 } 709 710 static ssize_t probes_write(struct file *file, const char __user *buffer, 711 size_t count, loff_t *ppos) 712 { 713 return traceprobe_probes_write(file, buffer, count, ppos, 714 create_trace_probe); 715 } 716 717 static const struct file_operations kprobe_events_ops = { 718 .owner = THIS_MODULE, 719 .open = probes_open, 720 .read = seq_read, 721 .llseek = seq_lseek, 722 .release = seq_release, 723 .write = probes_write, 724 }; 725 726 /* Probes profiling interfaces */ 727 static int probes_profile_seq_show(struct seq_file *m, void *v) 728 { 729 struct trace_probe *tp = v; 730 731 seq_printf(m, " %-44s %15lu %15lu\n", tp->call.name, tp->nhit, 732 tp->rp.kp.nmissed); 733 734 return 0; 735 } 736 737 static const struct seq_operations profile_seq_op = { 738 .start = probes_seq_start, 739 .next = probes_seq_next, 740 .stop = probes_seq_stop, 741 .show = probes_profile_seq_show 742 }; 743 744 static int profile_open(struct inode *inode, struct file *file) 745 { 746 return seq_open(file, &profile_seq_op); 747 } 748 749 static const struct file_operations kprobe_profile_ops = { 750 .owner = THIS_MODULE, 751 .open = profile_open, 752 .read = seq_read, 753 .llseek = seq_lseek, 754 .release = seq_release, 755 }; 756 757 /* Sum up total data length for dynamic arraies (strings) */ 758 static __kprobes int __get_data_size(struct trace_probe *tp, 759 struct pt_regs *regs) 760 { 761 int i, ret = 0; 762 u32 len; 763 764 for (i = 0; i < tp->nr_args; i++) 765 if (unlikely(tp->args[i].fetch_size.fn)) { 766 call_fetch(&tp->args[i].fetch_size, regs, &len); 767 ret += len; 768 } 769 770 return ret; 771 } 772 773 /* Store the value of each argument */ 774 static __kprobes void store_trace_args(int ent_size, struct trace_probe *tp, 775 struct pt_regs *regs, 776 u8 *data, int maxlen) 777 { 778 int i; 779 u32 end = tp->size; 780 u32 *dl; /* Data (relative) location */ 781 782 for (i = 0; i < tp->nr_args; i++) { 783 if (unlikely(tp->args[i].fetch_size.fn)) { 784 /* 785 * First, we set the relative location and 786 * maximum data length to *dl 787 */ 788 dl = (u32 *)(data + tp->args[i].offset); 789 *dl = make_data_rloc(maxlen, end - tp->args[i].offset); 790 /* Then try to fetch string or dynamic array data */ 791 call_fetch(&tp->args[i].fetch, regs, dl); 792 /* Reduce maximum length */ 793 end += get_rloc_len(*dl); 794 maxlen -= get_rloc_len(*dl); 795 /* Trick here, convert data_rloc to data_loc */ 796 *dl = convert_rloc_to_loc(*dl, 797 ent_size + tp->args[i].offset); 798 } else 799 /* Just fetching data normally */ 800 call_fetch(&tp->args[i].fetch, regs, 801 data + tp->args[i].offset); 802 } 803 } 804 805 /* Kprobe handler */ 806 static __kprobes void 807 __kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs, 808 struct ftrace_event_file *ftrace_file) 809 { 810 struct kprobe_trace_entry_head *entry; 811 struct ring_buffer_event *event; 812 struct ring_buffer *buffer; 813 int size, dsize, pc; 814 unsigned long irq_flags; 815 struct ftrace_event_call *call = &tp->call; 816 817 WARN_ON(call != ftrace_file->event_call); 818 819 if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &ftrace_file->flags)) 820 return; 821 822 local_save_flags(irq_flags); 823 pc = preempt_count(); 824 825 dsize = __get_data_size(tp, regs); 826 size = sizeof(*entry) + tp->size + dsize; 827 828 event = trace_event_buffer_lock_reserve(&buffer, ftrace_file, 829 call->event.type, 830 size, irq_flags, pc); 831 if (!event) 832 return; 833 834 entry = ring_buffer_event_data(event); 835 entry->ip = (unsigned long)tp->rp.kp.addr; 836 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); 837 838 if (!filter_check_discard(ftrace_file, entry, buffer, event)) 839 trace_buffer_unlock_commit_regs(buffer, event, 840 irq_flags, pc, regs); 841 } 842 843 static __kprobes void 844 kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs) 845 { 846 struct event_file_link *link; 847 848 list_for_each_entry_rcu(link, &tp->files, list) 849 __kprobe_trace_func(tp, regs, link->file); 850 } 851 852 /* Kretprobe handler */ 853 static __kprobes void 854 __kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri, 855 struct pt_regs *regs, 856 struct ftrace_event_file *ftrace_file) 857 { 858 struct kretprobe_trace_entry_head *entry; 859 struct ring_buffer_event *event; 860 struct ring_buffer *buffer; 861 int size, pc, dsize; 862 unsigned long irq_flags; 863 struct ftrace_event_call *call = &tp->call; 864 865 WARN_ON(call != ftrace_file->event_call); 866 867 if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &ftrace_file->flags)) 868 return; 869 870 local_save_flags(irq_flags); 871 pc = preempt_count(); 872 873 dsize = __get_data_size(tp, regs); 874 size = sizeof(*entry) + tp->size + dsize; 875 876 event = trace_event_buffer_lock_reserve(&buffer, ftrace_file, 877 call->event.type, 878 size, irq_flags, pc); 879 if (!event) 880 return; 881 882 entry = ring_buffer_event_data(event); 883 entry->func = (unsigned long)tp->rp.kp.addr; 884 entry->ret_ip = (unsigned long)ri->ret_addr; 885 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); 886 887 if (!filter_check_discard(ftrace_file, entry, buffer, event)) 888 trace_buffer_unlock_commit_regs(buffer, event, 889 irq_flags, pc, regs); 890 } 891 892 static __kprobes void 893 kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri, 894 struct pt_regs *regs) 895 { 896 struct event_file_link *link; 897 898 list_for_each_entry_rcu(link, &tp->files, list) 899 __kretprobe_trace_func(tp, ri, regs, link->file); 900 } 901 902 /* Event entry printers */ 903 static enum print_line_t 904 print_kprobe_event(struct trace_iterator *iter, int flags, 905 struct trace_event *event) 906 { 907 struct kprobe_trace_entry_head *field; 908 struct trace_seq *s = &iter->seq; 909 struct trace_probe *tp; 910 u8 *data; 911 int i; 912 913 field = (struct kprobe_trace_entry_head *)iter->ent; 914 tp = container_of(event, struct trace_probe, call.event); 915 916 if (!trace_seq_printf(s, "%s: (", tp->call.name)) 917 goto partial; 918 919 if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET)) 920 goto partial; 921 922 if (!trace_seq_puts(s, ")")) 923 goto partial; 924 925 data = (u8 *)&field[1]; 926 for (i = 0; i < tp->nr_args; i++) 927 if (!tp->args[i].type->print(s, tp->args[i].name, 928 data + tp->args[i].offset, field)) 929 goto partial; 930 931 if (!trace_seq_puts(s, "\n")) 932 goto partial; 933 934 return TRACE_TYPE_HANDLED; 935 partial: 936 return TRACE_TYPE_PARTIAL_LINE; 937 } 938 939 static enum print_line_t 940 print_kretprobe_event(struct trace_iterator *iter, int flags, 941 struct trace_event *event) 942 { 943 struct kretprobe_trace_entry_head *field; 944 struct trace_seq *s = &iter->seq; 945 struct trace_probe *tp; 946 u8 *data; 947 int i; 948 949 field = (struct kretprobe_trace_entry_head *)iter->ent; 950 tp = container_of(event, struct trace_probe, call.event); 951 952 if (!trace_seq_printf(s, "%s: (", tp->call.name)) 953 goto partial; 954 955 if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET)) 956 goto partial; 957 958 if (!trace_seq_puts(s, " <- ")) 959 goto partial; 960 961 if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET)) 962 goto partial; 963 964 if (!trace_seq_puts(s, ")")) 965 goto partial; 966 967 data = (u8 *)&field[1]; 968 for (i = 0; i < tp->nr_args; i++) 969 if (!tp->args[i].type->print(s, tp->args[i].name, 970 data + tp->args[i].offset, field)) 971 goto partial; 972 973 if (!trace_seq_puts(s, "\n")) 974 goto partial; 975 976 return TRACE_TYPE_HANDLED; 977 partial: 978 return TRACE_TYPE_PARTIAL_LINE; 979 } 980 981 982 static int kprobe_event_define_fields(struct ftrace_event_call *event_call) 983 { 984 int ret, i; 985 struct kprobe_trace_entry_head field; 986 struct trace_probe *tp = (struct trace_probe *)event_call->data; 987 988 DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0); 989 /* Set argument names as fields */ 990 for (i = 0; i < tp->nr_args; i++) { 991 ret = trace_define_field(event_call, tp->args[i].type->fmttype, 992 tp->args[i].name, 993 sizeof(field) + tp->args[i].offset, 994 tp->args[i].type->size, 995 tp->args[i].type->is_signed, 996 FILTER_OTHER); 997 if (ret) 998 return ret; 999 } 1000 return 0; 1001 } 1002 1003 static int kretprobe_event_define_fields(struct ftrace_event_call *event_call) 1004 { 1005 int ret, i; 1006 struct kretprobe_trace_entry_head field; 1007 struct trace_probe *tp = (struct trace_probe *)event_call->data; 1008 1009 DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0); 1010 DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0); 1011 /* Set argument names as fields */ 1012 for (i = 0; i < tp->nr_args; i++) { 1013 ret = trace_define_field(event_call, tp->args[i].type->fmttype, 1014 tp->args[i].name, 1015 sizeof(field) + tp->args[i].offset, 1016 tp->args[i].type->size, 1017 tp->args[i].type->is_signed, 1018 FILTER_OTHER); 1019 if (ret) 1020 return ret; 1021 } 1022 return 0; 1023 } 1024 1025 static int __set_print_fmt(struct trace_probe *tp, char *buf, int len) 1026 { 1027 int i; 1028 int pos = 0; 1029 1030 const char *fmt, *arg; 1031 1032 if (!trace_probe_is_return(tp)) { 1033 fmt = "(%lx)"; 1034 arg = "REC->" FIELD_STRING_IP; 1035 } else { 1036 fmt = "(%lx <- %lx)"; 1037 arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP; 1038 } 1039 1040 /* When len=0, we just calculate the needed length */ 1041 #define LEN_OR_ZERO (len ? len - pos : 0) 1042 1043 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt); 1044 1045 for (i = 0; i < tp->nr_args; i++) { 1046 pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s", 1047 tp->args[i].name, tp->args[i].type->fmt); 1048 } 1049 1050 pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg); 1051 1052 for (i = 0; i < tp->nr_args; i++) { 1053 if (strcmp(tp->args[i].type->name, "string") == 0) 1054 pos += snprintf(buf + pos, LEN_OR_ZERO, 1055 ", __get_str(%s)", 1056 tp->args[i].name); 1057 else 1058 pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s", 1059 tp->args[i].name); 1060 } 1061 1062 #undef LEN_OR_ZERO 1063 1064 /* return the length of print_fmt */ 1065 return pos; 1066 } 1067 1068 static int set_print_fmt(struct trace_probe *tp) 1069 { 1070 int len; 1071 char *print_fmt; 1072 1073 /* First: called with 0 length to calculate the needed length */ 1074 len = __set_print_fmt(tp, NULL, 0); 1075 print_fmt = kmalloc(len + 1, GFP_KERNEL); 1076 if (!print_fmt) 1077 return -ENOMEM; 1078 1079 /* Second: actually write the @print_fmt */ 1080 __set_print_fmt(tp, print_fmt, len + 1); 1081 tp->call.print_fmt = print_fmt; 1082 1083 return 0; 1084 } 1085 1086 #ifdef CONFIG_PERF_EVENTS 1087 1088 /* Kprobe profile handler */ 1089 static __kprobes void 1090 kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs) 1091 { 1092 struct ftrace_event_call *call = &tp->call; 1093 struct kprobe_trace_entry_head *entry; 1094 struct hlist_head *head; 1095 int size, __size, dsize; 1096 int rctx; 1097 1098 head = this_cpu_ptr(call->perf_events); 1099 if (hlist_empty(head)) 1100 return; 1101 1102 dsize = __get_data_size(tp, regs); 1103 __size = sizeof(*entry) + tp->size + dsize; 1104 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 1105 size -= sizeof(u32); 1106 1107 entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); 1108 if (!entry) 1109 return; 1110 1111 entry->ip = (unsigned long)tp->rp.kp.addr; 1112 memset(&entry[1], 0, dsize); 1113 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); 1114 perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL); 1115 } 1116 1117 /* Kretprobe profile handler */ 1118 static __kprobes void 1119 kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri, 1120 struct pt_regs *regs) 1121 { 1122 struct ftrace_event_call *call = &tp->call; 1123 struct kretprobe_trace_entry_head *entry; 1124 struct hlist_head *head; 1125 int size, __size, dsize; 1126 int rctx; 1127 1128 head = this_cpu_ptr(call->perf_events); 1129 if (hlist_empty(head)) 1130 return; 1131 1132 dsize = __get_data_size(tp, regs); 1133 __size = sizeof(*entry) + tp->size + dsize; 1134 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 1135 size -= sizeof(u32); 1136 1137 entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); 1138 if (!entry) 1139 return; 1140 1141 entry->func = (unsigned long)tp->rp.kp.addr; 1142 entry->ret_ip = (unsigned long)ri->ret_addr; 1143 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); 1144 perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL); 1145 } 1146 #endif /* CONFIG_PERF_EVENTS */ 1147 1148 /* 1149 * called by perf_trace_init() or __ftrace_set_clr_event() under event_mutex. 1150 * 1151 * kprobe_trace_self_tests_init() does enable_trace_probe/disable_trace_probe 1152 * lockless, but we can't race with this __init function. 1153 */ 1154 static __kprobes 1155 int kprobe_register(struct ftrace_event_call *event, 1156 enum trace_reg type, void *data) 1157 { 1158 struct trace_probe *tp = (struct trace_probe *)event->data; 1159 struct ftrace_event_file *file = data; 1160 1161 switch (type) { 1162 case TRACE_REG_REGISTER: 1163 return enable_trace_probe(tp, file); 1164 case TRACE_REG_UNREGISTER: 1165 return disable_trace_probe(tp, file); 1166 1167 #ifdef CONFIG_PERF_EVENTS 1168 case TRACE_REG_PERF_REGISTER: 1169 return enable_trace_probe(tp, NULL); 1170 case TRACE_REG_PERF_UNREGISTER: 1171 return disable_trace_probe(tp, NULL); 1172 case TRACE_REG_PERF_OPEN: 1173 case TRACE_REG_PERF_CLOSE: 1174 case TRACE_REG_PERF_ADD: 1175 case TRACE_REG_PERF_DEL: 1176 return 0; 1177 #endif 1178 } 1179 return 0; 1180 } 1181 1182 static __kprobes 1183 int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) 1184 { 1185 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); 1186 1187 tp->nhit++; 1188 1189 if (tp->flags & TP_FLAG_TRACE) 1190 kprobe_trace_func(tp, regs); 1191 #ifdef CONFIG_PERF_EVENTS 1192 if (tp->flags & TP_FLAG_PROFILE) 1193 kprobe_perf_func(tp, regs); 1194 #endif 1195 return 0; /* We don't tweek kernel, so just return 0 */ 1196 } 1197 1198 static __kprobes 1199 int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs) 1200 { 1201 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); 1202 1203 tp->nhit++; 1204 1205 if (tp->flags & TP_FLAG_TRACE) 1206 kretprobe_trace_func(tp, ri, regs); 1207 #ifdef CONFIG_PERF_EVENTS 1208 if (tp->flags & TP_FLAG_PROFILE) 1209 kretprobe_perf_func(tp, ri, regs); 1210 #endif 1211 return 0; /* We don't tweek kernel, so just return 0 */ 1212 } 1213 1214 static struct trace_event_functions kretprobe_funcs = { 1215 .trace = print_kretprobe_event 1216 }; 1217 1218 static struct trace_event_functions kprobe_funcs = { 1219 .trace = print_kprobe_event 1220 }; 1221 1222 static int register_probe_event(struct trace_probe *tp) 1223 { 1224 struct ftrace_event_call *call = &tp->call; 1225 int ret; 1226 1227 /* Initialize ftrace_event_call */ 1228 INIT_LIST_HEAD(&call->class->fields); 1229 if (trace_probe_is_return(tp)) { 1230 call->event.funcs = &kretprobe_funcs; 1231 call->class->define_fields = kretprobe_event_define_fields; 1232 } else { 1233 call->event.funcs = &kprobe_funcs; 1234 call->class->define_fields = kprobe_event_define_fields; 1235 } 1236 if (set_print_fmt(tp) < 0) 1237 return -ENOMEM; 1238 ret = register_ftrace_event(&call->event); 1239 if (!ret) { 1240 kfree(call->print_fmt); 1241 return -ENODEV; 1242 } 1243 call->flags = 0; 1244 call->class->reg = kprobe_register; 1245 call->data = tp; 1246 ret = trace_add_event_call(call); 1247 if (ret) { 1248 pr_info("Failed to register kprobe event: %s\n", call->name); 1249 kfree(call->print_fmt); 1250 unregister_ftrace_event(&call->event); 1251 } 1252 return ret; 1253 } 1254 1255 static int unregister_probe_event(struct trace_probe *tp) 1256 { 1257 int ret; 1258 1259 /* tp->event is unregistered in trace_remove_event_call() */ 1260 ret = trace_remove_event_call(&tp->call); 1261 if (!ret) 1262 kfree(tp->call.print_fmt); 1263 return ret; 1264 } 1265 1266 /* Make a debugfs interface for controlling probe points */ 1267 static __init int init_kprobe_trace(void) 1268 { 1269 struct dentry *d_tracer; 1270 struct dentry *entry; 1271 1272 if (register_module_notifier(&trace_probe_module_nb)) 1273 return -EINVAL; 1274 1275 d_tracer = tracing_init_dentry(); 1276 if (!d_tracer) 1277 return 0; 1278 1279 entry = debugfs_create_file("kprobe_events", 0644, d_tracer, 1280 NULL, &kprobe_events_ops); 1281 1282 /* Event list interface */ 1283 if (!entry) 1284 pr_warning("Could not create debugfs " 1285 "'kprobe_events' entry\n"); 1286 1287 /* Profile interface */ 1288 entry = debugfs_create_file("kprobe_profile", 0444, d_tracer, 1289 NULL, &kprobe_profile_ops); 1290 1291 if (!entry) 1292 pr_warning("Could not create debugfs " 1293 "'kprobe_profile' entry\n"); 1294 return 0; 1295 } 1296 fs_initcall(init_kprobe_trace); 1297 1298 1299 #ifdef CONFIG_FTRACE_STARTUP_TEST 1300 1301 /* 1302 * The "__used" keeps gcc from removing the function symbol 1303 * from the kallsyms table. 1304 */ 1305 static __used int kprobe_trace_selftest_target(int a1, int a2, int a3, 1306 int a4, int a5, int a6) 1307 { 1308 return a1 + a2 + a3 + a4 + a5 + a6; 1309 } 1310 1311 static struct ftrace_event_file * 1312 find_trace_probe_file(struct trace_probe *tp, struct trace_array *tr) 1313 { 1314 struct ftrace_event_file *file; 1315 1316 list_for_each_entry(file, &tr->events, list) 1317 if (file->event_call == &tp->call) 1318 return file; 1319 1320 return NULL; 1321 } 1322 1323 /* 1324 * Nobody but us can call enable_trace_probe/disable_trace_probe at this 1325 * stage, we can do this lockless. 1326 */ 1327 static __init int kprobe_trace_self_tests_init(void) 1328 { 1329 int ret, warn = 0; 1330 int (*target)(int, int, int, int, int, int); 1331 struct trace_probe *tp; 1332 struct ftrace_event_file *file; 1333 1334 target = kprobe_trace_selftest_target; 1335 1336 pr_info("Testing kprobe tracing: "); 1337 1338 ret = traceprobe_command("p:testprobe kprobe_trace_selftest_target " 1339 "$stack $stack0 +0($stack)", 1340 create_trace_probe); 1341 if (WARN_ON_ONCE(ret)) { 1342 pr_warn("error on probing function entry.\n"); 1343 warn++; 1344 } else { 1345 /* Enable trace point */ 1346 tp = find_trace_probe("testprobe", KPROBE_EVENT_SYSTEM); 1347 if (WARN_ON_ONCE(tp == NULL)) { 1348 pr_warn("error on getting new probe.\n"); 1349 warn++; 1350 } else { 1351 file = find_trace_probe_file(tp, top_trace_array()); 1352 if (WARN_ON_ONCE(file == NULL)) { 1353 pr_warn("error on getting probe file.\n"); 1354 warn++; 1355 } else 1356 enable_trace_probe(tp, file); 1357 } 1358 } 1359 1360 ret = traceprobe_command("r:testprobe2 kprobe_trace_selftest_target " 1361 "$retval", create_trace_probe); 1362 if (WARN_ON_ONCE(ret)) { 1363 pr_warn("error on probing function return.\n"); 1364 warn++; 1365 } else { 1366 /* Enable trace point */ 1367 tp = find_trace_probe("testprobe2", KPROBE_EVENT_SYSTEM); 1368 if (WARN_ON_ONCE(tp == NULL)) { 1369 pr_warn("error on getting 2nd new probe.\n"); 1370 warn++; 1371 } else { 1372 file = find_trace_probe_file(tp, top_trace_array()); 1373 if (WARN_ON_ONCE(file == NULL)) { 1374 pr_warn("error on getting probe file.\n"); 1375 warn++; 1376 } else 1377 enable_trace_probe(tp, file); 1378 } 1379 } 1380 1381 if (warn) 1382 goto end; 1383 1384 ret = target(1, 2, 3, 4, 5, 6); 1385 1386 /* Disable trace points before removing it */ 1387 tp = find_trace_probe("testprobe", KPROBE_EVENT_SYSTEM); 1388 if (WARN_ON_ONCE(tp == NULL)) { 1389 pr_warn("error on getting test probe.\n"); 1390 warn++; 1391 } else { 1392 file = find_trace_probe_file(tp, top_trace_array()); 1393 if (WARN_ON_ONCE(file == NULL)) { 1394 pr_warn("error on getting probe file.\n"); 1395 warn++; 1396 } else 1397 disable_trace_probe(tp, file); 1398 } 1399 1400 tp = find_trace_probe("testprobe2", KPROBE_EVENT_SYSTEM); 1401 if (WARN_ON_ONCE(tp == NULL)) { 1402 pr_warn("error on getting 2nd test probe.\n"); 1403 warn++; 1404 } else { 1405 file = find_trace_probe_file(tp, top_trace_array()); 1406 if (WARN_ON_ONCE(file == NULL)) { 1407 pr_warn("error on getting probe file.\n"); 1408 warn++; 1409 } else 1410 disable_trace_probe(tp, file); 1411 } 1412 1413 ret = traceprobe_command("-:testprobe", create_trace_probe); 1414 if (WARN_ON_ONCE(ret)) { 1415 pr_warn("error on deleting a probe.\n"); 1416 warn++; 1417 } 1418 1419 ret = traceprobe_command("-:testprobe2", create_trace_probe); 1420 if (WARN_ON_ONCE(ret)) { 1421 pr_warn("error on deleting a probe.\n"); 1422 warn++; 1423 } 1424 1425 end: 1426 release_all_trace_probes(); 1427 if (warn) 1428 pr_cont("NG: Some tests are failed. Please check them.\n"); 1429 else 1430 pr_cont("OK\n"); 1431 return 0; 1432 } 1433 1434 late_initcall(kprobe_trace_self_tests_init); 1435 1436 #endif 1437