1 /* 2 * Kprobes-based tracing events 3 * 4 * Created by Masami Hiramatsu <mhiramat@redhat.com> 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 18 */ 19 20 #include <linux/module.h> 21 #include <linux/uaccess.h> 22 #include <linux/kprobes.h> 23 #include <linux/seq_file.h> 24 #include <linux/slab.h> 25 #include <linux/smp.h> 26 #include <linux/debugfs.h> 27 #include <linux/types.h> 28 #include <linux/string.h> 29 #include <linux/ctype.h> 30 #include <linux/ptrace.h> 31 #include <linux/perf_event.h> 32 #include <linux/stringify.h> 33 #include <linux/limits.h> 34 #include <asm/bitsperlong.h> 35 36 #include "trace.h" 37 #include "trace_output.h" 38 39 #define MAX_TRACE_ARGS 128 40 #define MAX_ARGSTR_LEN 63 41 #define MAX_EVENT_NAME_LEN 64 42 #define MAX_STRING_SIZE PATH_MAX 43 #define KPROBE_EVENT_SYSTEM "kprobes" 44 45 /* Reserved field names */ 46 #define FIELD_STRING_IP "__probe_ip" 47 #define FIELD_STRING_RETIP "__probe_ret_ip" 48 #define FIELD_STRING_FUNC "__probe_func" 49 50 const char *reserved_field_names[] = { 51 "common_type", 52 "common_flags", 53 "common_preempt_count", 54 "common_pid", 55 "common_tgid", 56 FIELD_STRING_IP, 57 FIELD_STRING_RETIP, 58 FIELD_STRING_FUNC, 59 }; 60 61 /* Printing function type */ 62 typedef int (*print_type_func_t)(struct trace_seq *, const char *, void *, 63 void *); 64 #define PRINT_TYPE_FUNC_NAME(type) print_type_##type 65 #define PRINT_TYPE_FMT_NAME(type) print_type_format_##type 66 67 /* Printing in basic type function template */ 68 #define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt, cast) \ 69 static __kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, \ 70 const char *name, \ 71 void *data, void *ent)\ 72 { \ 73 return trace_seq_printf(s, " %s=" fmt, name, (cast)*(type *)data);\ 74 } \ 75 static const char PRINT_TYPE_FMT_NAME(type)[] = fmt; 76 77 DEFINE_BASIC_PRINT_TYPE_FUNC(u8, "%x", unsigned int) 78 DEFINE_BASIC_PRINT_TYPE_FUNC(u16, "%x", unsigned int) 79 DEFINE_BASIC_PRINT_TYPE_FUNC(u32, "%lx", unsigned long) 80 DEFINE_BASIC_PRINT_TYPE_FUNC(u64, "%llx", unsigned long long) 81 DEFINE_BASIC_PRINT_TYPE_FUNC(s8, "%d", int) 82 DEFINE_BASIC_PRINT_TYPE_FUNC(s16, "%d", int) 83 DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%ld", long) 84 DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%lld", long long) 85 86 /* data_rloc: data relative location, compatible with u32 */ 87 #define make_data_rloc(len, roffs) \ 88 (((u32)(len) << 16) | ((u32)(roffs) & 0xffff)) 89 #define get_rloc_len(dl) ((u32)(dl) >> 16) 90 #define get_rloc_offs(dl) ((u32)(dl) & 0xffff) 91 92 static inline void *get_rloc_data(u32 *dl) 93 { 94 return (u8 *)dl + get_rloc_offs(*dl); 95 } 96 97 /* For data_loc conversion */ 98 static inline void *get_loc_data(u32 *dl, void *ent) 99 { 100 return (u8 *)ent + get_rloc_offs(*dl); 101 } 102 103 /* 104 * Convert data_rloc to data_loc: 105 * data_rloc stores the offset from data_rloc itself, but data_loc 106 * stores the offset from event entry. 107 */ 108 #define convert_rloc_to_loc(dl, offs) ((u32)(dl) + (offs)) 109 110 /* For defining macros, define string/string_size types */ 111 typedef u32 string; 112 typedef u32 string_size; 113 114 /* Print type function for string type */ 115 static __kprobes int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, 116 const char *name, 117 void *data, void *ent) 118 { 119 int len = *(u32 *)data >> 16; 120 121 if (!len) 122 return trace_seq_printf(s, " %s=(fault)", name); 123 else 124 return trace_seq_printf(s, " %s=\"%s\"", name, 125 (const char *)get_loc_data(data, ent)); 126 } 127 static const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\""; 128 129 /* Data fetch function type */ 130 typedef void (*fetch_func_t)(struct pt_regs *, void *, void *); 131 132 struct fetch_param { 133 fetch_func_t fn; 134 void *data; 135 }; 136 137 static __kprobes void call_fetch(struct fetch_param *fprm, 138 struct pt_regs *regs, void *dest) 139 { 140 return fprm->fn(regs, fprm->data, dest); 141 } 142 143 #define FETCH_FUNC_NAME(method, type) fetch_##method##_##type 144 /* 145 * Define macro for basic types - we don't need to define s* types, because 146 * we have to care only about bitwidth at recording time. 147 */ 148 #define DEFINE_BASIC_FETCH_FUNCS(method) \ 149 DEFINE_FETCH_##method(u8) \ 150 DEFINE_FETCH_##method(u16) \ 151 DEFINE_FETCH_##method(u32) \ 152 DEFINE_FETCH_##method(u64) 153 154 #define CHECK_FETCH_FUNCS(method, fn) \ 155 (((FETCH_FUNC_NAME(method, u8) == fn) || \ 156 (FETCH_FUNC_NAME(method, u16) == fn) || \ 157 (FETCH_FUNC_NAME(method, u32) == fn) || \ 158 (FETCH_FUNC_NAME(method, u64) == fn) || \ 159 (FETCH_FUNC_NAME(method, string) == fn) || \ 160 (FETCH_FUNC_NAME(method, string_size) == fn)) \ 161 && (fn != NULL)) 162 163 /* Data fetch function templates */ 164 #define DEFINE_FETCH_reg(type) \ 165 static __kprobes void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, \ 166 void *offset, void *dest) \ 167 { \ 168 *(type *)dest = (type)regs_get_register(regs, \ 169 (unsigned int)((unsigned long)offset)); \ 170 } 171 DEFINE_BASIC_FETCH_FUNCS(reg) 172 /* No string on the register */ 173 #define fetch_reg_string NULL 174 #define fetch_reg_string_size NULL 175 176 #define DEFINE_FETCH_stack(type) \ 177 static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\ 178 void *offset, void *dest) \ 179 { \ 180 *(type *)dest = (type)regs_get_kernel_stack_nth(regs, \ 181 (unsigned int)((unsigned long)offset)); \ 182 } 183 DEFINE_BASIC_FETCH_FUNCS(stack) 184 /* No string on the stack entry */ 185 #define fetch_stack_string NULL 186 #define fetch_stack_string_size NULL 187 188 #define DEFINE_FETCH_retval(type) \ 189 static __kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,\ 190 void *dummy, void *dest) \ 191 { \ 192 *(type *)dest = (type)regs_return_value(regs); \ 193 } 194 DEFINE_BASIC_FETCH_FUNCS(retval) 195 /* No string on the retval */ 196 #define fetch_retval_string NULL 197 #define fetch_retval_string_size NULL 198 199 #define DEFINE_FETCH_memory(type) \ 200 static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\ 201 void *addr, void *dest) \ 202 { \ 203 type retval; \ 204 if (probe_kernel_address(addr, retval)) \ 205 *(type *)dest = 0; \ 206 else \ 207 *(type *)dest = retval; \ 208 } 209 DEFINE_BASIC_FETCH_FUNCS(memory) 210 /* 211 * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max 212 * length and relative data location. 213 */ 214 static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs, 215 void *addr, void *dest) 216 { 217 long ret; 218 int maxlen = get_rloc_len(*(u32 *)dest); 219 u8 *dst = get_rloc_data(dest); 220 u8 *src = addr; 221 mm_segment_t old_fs = get_fs(); 222 if (!maxlen) 223 return; 224 /* 225 * Try to get string again, since the string can be changed while 226 * probing. 227 */ 228 set_fs(KERNEL_DS); 229 pagefault_disable(); 230 do 231 ret = __copy_from_user_inatomic(dst++, src++, 1); 232 while (dst[-1] && ret == 0 && src - (u8 *)addr < maxlen); 233 dst[-1] = '\0'; 234 pagefault_enable(); 235 set_fs(old_fs); 236 237 if (ret < 0) { /* Failed to fetch string */ 238 ((u8 *)get_rloc_data(dest))[0] = '\0'; 239 *(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest)); 240 } else 241 *(u32 *)dest = make_data_rloc(src - (u8 *)addr, 242 get_rloc_offs(*(u32 *)dest)); 243 } 244 /* Return the length of string -- including null terminal byte */ 245 static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs, 246 void *addr, void *dest) 247 { 248 int ret, len = 0; 249 u8 c; 250 mm_segment_t old_fs = get_fs(); 251 252 set_fs(KERNEL_DS); 253 pagefault_disable(); 254 do { 255 ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1); 256 len++; 257 } while (c && ret == 0 && len < MAX_STRING_SIZE); 258 pagefault_enable(); 259 set_fs(old_fs); 260 261 if (ret < 0) /* Failed to check the length */ 262 *(u32 *)dest = 0; 263 else 264 *(u32 *)dest = len; 265 } 266 267 /* Memory fetching by symbol */ 268 struct symbol_cache { 269 char *symbol; 270 long offset; 271 unsigned long addr; 272 }; 273 274 static unsigned long update_symbol_cache(struct symbol_cache *sc) 275 { 276 sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol); 277 if (sc->addr) 278 sc->addr += sc->offset; 279 return sc->addr; 280 } 281 282 static void free_symbol_cache(struct symbol_cache *sc) 283 { 284 kfree(sc->symbol); 285 kfree(sc); 286 } 287 288 static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset) 289 { 290 struct symbol_cache *sc; 291 292 if (!sym || strlen(sym) == 0) 293 return NULL; 294 sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL); 295 if (!sc) 296 return NULL; 297 298 sc->symbol = kstrdup(sym, GFP_KERNEL); 299 if (!sc->symbol) { 300 kfree(sc); 301 return NULL; 302 } 303 sc->offset = offset; 304 305 update_symbol_cache(sc); 306 return sc; 307 } 308 309 #define DEFINE_FETCH_symbol(type) \ 310 static __kprobes void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs,\ 311 void *data, void *dest) \ 312 { \ 313 struct symbol_cache *sc = data; \ 314 if (sc->addr) \ 315 fetch_memory_##type(regs, (void *)sc->addr, dest); \ 316 else \ 317 *(type *)dest = 0; \ 318 } 319 DEFINE_BASIC_FETCH_FUNCS(symbol) 320 DEFINE_FETCH_symbol(string) 321 DEFINE_FETCH_symbol(string_size) 322 323 /* Dereference memory access function */ 324 struct deref_fetch_param { 325 struct fetch_param orig; 326 long offset; 327 }; 328 329 #define DEFINE_FETCH_deref(type) \ 330 static __kprobes void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs,\ 331 void *data, void *dest) \ 332 { \ 333 struct deref_fetch_param *dprm = data; \ 334 unsigned long addr; \ 335 call_fetch(&dprm->orig, regs, &addr); \ 336 if (addr) { \ 337 addr += dprm->offset; \ 338 fetch_memory_##type(regs, (void *)addr, dest); \ 339 } else \ 340 *(type *)dest = 0; \ 341 } 342 DEFINE_BASIC_FETCH_FUNCS(deref) 343 DEFINE_FETCH_deref(string) 344 DEFINE_FETCH_deref(string_size) 345 346 static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data) 347 { 348 if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) 349 free_deref_fetch_param(data->orig.data); 350 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn)) 351 free_symbol_cache(data->orig.data); 352 kfree(data); 353 } 354 355 /* Bitfield fetch function */ 356 struct bitfield_fetch_param { 357 struct fetch_param orig; 358 unsigned char hi_shift; 359 unsigned char low_shift; 360 }; 361 362 #define DEFINE_FETCH_bitfield(type) \ 363 static __kprobes void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs,\ 364 void *data, void *dest) \ 365 { \ 366 struct bitfield_fetch_param *bprm = data; \ 367 type buf = 0; \ 368 call_fetch(&bprm->orig, regs, &buf); \ 369 if (buf) { \ 370 buf <<= bprm->hi_shift; \ 371 buf >>= bprm->low_shift; \ 372 } \ 373 *(type *)dest = buf; \ 374 } 375 DEFINE_BASIC_FETCH_FUNCS(bitfield) 376 #define fetch_bitfield_string NULL 377 #define fetch_bitfield_string_size NULL 378 379 static __kprobes void 380 free_bitfield_fetch_param(struct bitfield_fetch_param *data) 381 { 382 /* 383 * Don't check the bitfield itself, because this must be the 384 * last fetch function. 385 */ 386 if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) 387 free_deref_fetch_param(data->orig.data); 388 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn)) 389 free_symbol_cache(data->orig.data); 390 kfree(data); 391 } 392 /* Default (unsigned long) fetch type */ 393 #define __DEFAULT_FETCH_TYPE(t) u##t 394 #define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t) 395 #define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG) 396 #define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE) 397 398 /* Fetch types */ 399 enum { 400 FETCH_MTD_reg = 0, 401 FETCH_MTD_stack, 402 FETCH_MTD_retval, 403 FETCH_MTD_memory, 404 FETCH_MTD_symbol, 405 FETCH_MTD_deref, 406 FETCH_MTD_bitfield, 407 FETCH_MTD_END, 408 }; 409 410 #define ASSIGN_FETCH_FUNC(method, type) \ 411 [FETCH_MTD_##method] = FETCH_FUNC_NAME(method, type) 412 413 #define __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, _fmttype) \ 414 {.name = _name, \ 415 .size = _size, \ 416 .is_signed = sign, \ 417 .print = PRINT_TYPE_FUNC_NAME(ptype), \ 418 .fmt = PRINT_TYPE_FMT_NAME(ptype), \ 419 .fmttype = _fmttype, \ 420 .fetch = { \ 421 ASSIGN_FETCH_FUNC(reg, ftype), \ 422 ASSIGN_FETCH_FUNC(stack, ftype), \ 423 ASSIGN_FETCH_FUNC(retval, ftype), \ 424 ASSIGN_FETCH_FUNC(memory, ftype), \ 425 ASSIGN_FETCH_FUNC(symbol, ftype), \ 426 ASSIGN_FETCH_FUNC(deref, ftype), \ 427 ASSIGN_FETCH_FUNC(bitfield, ftype), \ 428 } \ 429 } 430 431 #define ASSIGN_FETCH_TYPE(ptype, ftype, sign) \ 432 __ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #ptype) 433 434 #define FETCH_TYPE_STRING 0 435 #define FETCH_TYPE_STRSIZE 1 436 437 /* Fetch type information table */ 438 static const struct fetch_type { 439 const char *name; /* Name of type */ 440 size_t size; /* Byte size of type */ 441 int is_signed; /* Signed flag */ 442 print_type_func_t print; /* Print functions */ 443 const char *fmt; /* Fromat string */ 444 const char *fmttype; /* Name in format file */ 445 /* Fetch functions */ 446 fetch_func_t fetch[FETCH_MTD_END]; 447 } fetch_type_table[] = { 448 /* Special types */ 449 [FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string, 450 sizeof(u32), 1, "__data_loc char[]"), 451 [FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32, 452 string_size, sizeof(u32), 0, "u32"), 453 /* Basic types */ 454 ASSIGN_FETCH_TYPE(u8, u8, 0), 455 ASSIGN_FETCH_TYPE(u16, u16, 0), 456 ASSIGN_FETCH_TYPE(u32, u32, 0), 457 ASSIGN_FETCH_TYPE(u64, u64, 0), 458 ASSIGN_FETCH_TYPE(s8, u8, 1), 459 ASSIGN_FETCH_TYPE(s16, u16, 1), 460 ASSIGN_FETCH_TYPE(s32, u32, 1), 461 ASSIGN_FETCH_TYPE(s64, u64, 1), 462 }; 463 464 static const struct fetch_type *find_fetch_type(const char *type) 465 { 466 int i; 467 468 if (!type) 469 type = DEFAULT_FETCH_TYPE_STR; 470 471 /* Special case: bitfield */ 472 if (*type == 'b') { 473 unsigned long bs; 474 type = strchr(type, '/'); 475 if (!type) 476 goto fail; 477 type++; 478 if (strict_strtoul(type, 0, &bs)) 479 goto fail; 480 switch (bs) { 481 case 8: 482 return find_fetch_type("u8"); 483 case 16: 484 return find_fetch_type("u16"); 485 case 32: 486 return find_fetch_type("u32"); 487 case 64: 488 return find_fetch_type("u64"); 489 default: 490 goto fail; 491 } 492 } 493 494 for (i = 0; i < ARRAY_SIZE(fetch_type_table); i++) 495 if (strcmp(type, fetch_type_table[i].name) == 0) 496 return &fetch_type_table[i]; 497 fail: 498 return NULL; 499 } 500 501 /* Special function : only accept unsigned long */ 502 static __kprobes void fetch_stack_address(struct pt_regs *regs, 503 void *dummy, void *dest) 504 { 505 *(unsigned long *)dest = kernel_stack_pointer(regs); 506 } 507 508 static fetch_func_t get_fetch_size_function(const struct fetch_type *type, 509 fetch_func_t orig_fn) 510 { 511 int i; 512 513 if (type != &fetch_type_table[FETCH_TYPE_STRING]) 514 return NULL; /* Only string type needs size function */ 515 for (i = 0; i < FETCH_MTD_END; i++) 516 if (type->fetch[i] == orig_fn) 517 return fetch_type_table[FETCH_TYPE_STRSIZE].fetch[i]; 518 519 WARN_ON(1); /* This should not happen */ 520 return NULL; 521 } 522 523 /** 524 * Kprobe event core functions 525 */ 526 527 struct probe_arg { 528 struct fetch_param fetch; 529 struct fetch_param fetch_size; 530 unsigned int offset; /* Offset from argument entry */ 531 const char *name; /* Name of this argument */ 532 const char *comm; /* Command of this argument */ 533 const struct fetch_type *type; /* Type of this argument */ 534 }; 535 536 /* Flags for trace_probe */ 537 #define TP_FLAG_TRACE 1 538 #define TP_FLAG_PROFILE 2 539 540 struct trace_probe { 541 struct list_head list; 542 struct kretprobe rp; /* Use rp.kp for kprobe use */ 543 unsigned long nhit; 544 unsigned int flags; /* For TP_FLAG_* */ 545 const char *symbol; /* symbol name */ 546 struct ftrace_event_class class; 547 struct ftrace_event_call call; 548 ssize_t size; /* trace entry size */ 549 unsigned int nr_args; 550 struct probe_arg args[]; 551 }; 552 553 #define SIZEOF_TRACE_PROBE(n) \ 554 (offsetof(struct trace_probe, args) + \ 555 (sizeof(struct probe_arg) * (n))) 556 557 558 static __kprobes int probe_is_return(struct trace_probe *tp) 559 { 560 return tp->rp.handler != NULL; 561 } 562 563 static __kprobes const char *probe_symbol(struct trace_probe *tp) 564 { 565 return tp->symbol ? tp->symbol : "unknown"; 566 } 567 568 static int register_probe_event(struct trace_probe *tp); 569 static void unregister_probe_event(struct trace_probe *tp); 570 571 static DEFINE_MUTEX(probe_lock); 572 static LIST_HEAD(probe_list); 573 574 static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs); 575 static int kretprobe_dispatcher(struct kretprobe_instance *ri, 576 struct pt_regs *regs); 577 578 /* Check the name is good for event/group/fields */ 579 static int is_good_name(const char *name) 580 { 581 if (!isalpha(*name) && *name != '_') 582 return 0; 583 while (*++name != '\0') { 584 if (!isalpha(*name) && !isdigit(*name) && *name != '_') 585 return 0; 586 } 587 return 1; 588 } 589 590 /* 591 * Allocate new trace_probe and initialize it (including kprobes). 592 */ 593 static struct trace_probe *alloc_trace_probe(const char *group, 594 const char *event, 595 void *addr, 596 const char *symbol, 597 unsigned long offs, 598 int nargs, int is_return) 599 { 600 struct trace_probe *tp; 601 int ret = -ENOMEM; 602 603 tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL); 604 if (!tp) 605 return ERR_PTR(ret); 606 607 if (symbol) { 608 tp->symbol = kstrdup(symbol, GFP_KERNEL); 609 if (!tp->symbol) 610 goto error; 611 tp->rp.kp.symbol_name = tp->symbol; 612 tp->rp.kp.offset = offs; 613 } else 614 tp->rp.kp.addr = addr; 615 616 if (is_return) 617 tp->rp.handler = kretprobe_dispatcher; 618 else 619 tp->rp.kp.pre_handler = kprobe_dispatcher; 620 621 if (!event || !is_good_name(event)) { 622 ret = -EINVAL; 623 goto error; 624 } 625 626 tp->call.class = &tp->class; 627 tp->call.name = kstrdup(event, GFP_KERNEL); 628 if (!tp->call.name) 629 goto error; 630 631 if (!group || !is_good_name(group)) { 632 ret = -EINVAL; 633 goto error; 634 } 635 636 tp->class.system = kstrdup(group, GFP_KERNEL); 637 if (!tp->class.system) 638 goto error; 639 640 INIT_LIST_HEAD(&tp->list); 641 return tp; 642 error: 643 kfree(tp->call.name); 644 kfree(tp->symbol); 645 kfree(tp); 646 return ERR_PTR(ret); 647 } 648 649 static void free_probe_arg(struct probe_arg *arg) 650 { 651 if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn)) 652 free_bitfield_fetch_param(arg->fetch.data); 653 else if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn)) 654 free_deref_fetch_param(arg->fetch.data); 655 else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn)) 656 free_symbol_cache(arg->fetch.data); 657 kfree(arg->name); 658 kfree(arg->comm); 659 } 660 661 static void free_trace_probe(struct trace_probe *tp) 662 { 663 int i; 664 665 for (i = 0; i < tp->nr_args; i++) 666 free_probe_arg(&tp->args[i]); 667 668 kfree(tp->call.class->system); 669 kfree(tp->call.name); 670 kfree(tp->symbol); 671 kfree(tp); 672 } 673 674 static struct trace_probe *find_probe_event(const char *event, 675 const char *group) 676 { 677 struct trace_probe *tp; 678 679 list_for_each_entry(tp, &probe_list, list) 680 if (strcmp(tp->call.name, event) == 0 && 681 strcmp(tp->call.class->system, group) == 0) 682 return tp; 683 return NULL; 684 } 685 686 /* Unregister a trace_probe and probe_event: call with locking probe_lock */ 687 static void unregister_trace_probe(struct trace_probe *tp) 688 { 689 if (probe_is_return(tp)) 690 unregister_kretprobe(&tp->rp); 691 else 692 unregister_kprobe(&tp->rp.kp); 693 list_del(&tp->list); 694 unregister_probe_event(tp); 695 } 696 697 /* Register a trace_probe and probe_event */ 698 static int register_trace_probe(struct trace_probe *tp) 699 { 700 struct trace_probe *old_tp; 701 int ret; 702 703 mutex_lock(&probe_lock); 704 705 /* register as an event */ 706 old_tp = find_probe_event(tp->call.name, tp->call.class->system); 707 if (old_tp) { 708 /* delete old event */ 709 unregister_trace_probe(old_tp); 710 free_trace_probe(old_tp); 711 } 712 ret = register_probe_event(tp); 713 if (ret) { 714 pr_warning("Failed to register probe event(%d)\n", ret); 715 goto end; 716 } 717 718 tp->rp.kp.flags |= KPROBE_FLAG_DISABLED; 719 if (probe_is_return(tp)) 720 ret = register_kretprobe(&tp->rp); 721 else 722 ret = register_kprobe(&tp->rp.kp); 723 724 if (ret) { 725 pr_warning("Could not insert probe(%d)\n", ret); 726 if (ret == -EILSEQ) { 727 pr_warning("Probing address(0x%p) is not an " 728 "instruction boundary.\n", 729 tp->rp.kp.addr); 730 ret = -EINVAL; 731 } 732 unregister_probe_event(tp); 733 } else 734 list_add_tail(&tp->list, &probe_list); 735 end: 736 mutex_unlock(&probe_lock); 737 return ret; 738 } 739 740 /* Split symbol and offset. */ 741 static int split_symbol_offset(char *symbol, unsigned long *offset) 742 { 743 char *tmp; 744 int ret; 745 746 if (!offset) 747 return -EINVAL; 748 749 tmp = strchr(symbol, '+'); 750 if (tmp) { 751 /* skip sign because strict_strtol doesn't accept '+' */ 752 ret = strict_strtoul(tmp + 1, 0, offset); 753 if (ret) 754 return ret; 755 *tmp = '\0'; 756 } else 757 *offset = 0; 758 return 0; 759 } 760 761 #define PARAM_MAX_ARGS 16 762 #define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long)) 763 764 static int parse_probe_vars(char *arg, const struct fetch_type *t, 765 struct fetch_param *f, int is_return) 766 { 767 int ret = 0; 768 unsigned long param; 769 770 if (strcmp(arg, "retval") == 0) { 771 if (is_return) 772 f->fn = t->fetch[FETCH_MTD_retval]; 773 else 774 ret = -EINVAL; 775 } else if (strncmp(arg, "stack", 5) == 0) { 776 if (arg[5] == '\0') { 777 if (strcmp(t->name, DEFAULT_FETCH_TYPE_STR) == 0) 778 f->fn = fetch_stack_address; 779 else 780 ret = -EINVAL; 781 } else if (isdigit(arg[5])) { 782 ret = strict_strtoul(arg + 5, 10, ¶m); 783 if (ret || param > PARAM_MAX_STACK) 784 ret = -EINVAL; 785 else { 786 f->fn = t->fetch[FETCH_MTD_stack]; 787 f->data = (void *)param; 788 } 789 } else 790 ret = -EINVAL; 791 } else 792 ret = -EINVAL; 793 return ret; 794 } 795 796 /* Recursive argument parser */ 797 static int __parse_probe_arg(char *arg, const struct fetch_type *t, 798 struct fetch_param *f, int is_return) 799 { 800 int ret = 0; 801 unsigned long param; 802 long offset; 803 char *tmp; 804 805 switch (arg[0]) { 806 case '$': 807 ret = parse_probe_vars(arg + 1, t, f, is_return); 808 break; 809 case '%': /* named register */ 810 ret = regs_query_register_offset(arg + 1); 811 if (ret >= 0) { 812 f->fn = t->fetch[FETCH_MTD_reg]; 813 f->data = (void *)(unsigned long)ret; 814 ret = 0; 815 } 816 break; 817 case '@': /* memory or symbol */ 818 if (isdigit(arg[1])) { 819 ret = strict_strtoul(arg + 1, 0, ¶m); 820 if (ret) 821 break; 822 f->fn = t->fetch[FETCH_MTD_memory]; 823 f->data = (void *)param; 824 } else { 825 ret = split_symbol_offset(arg + 1, &offset); 826 if (ret) 827 break; 828 f->data = alloc_symbol_cache(arg + 1, offset); 829 if (f->data) 830 f->fn = t->fetch[FETCH_MTD_symbol]; 831 } 832 break; 833 case '+': /* deref memory */ 834 arg++; /* Skip '+', because strict_strtol() rejects it. */ 835 case '-': 836 tmp = strchr(arg, '('); 837 if (!tmp) 838 break; 839 *tmp = '\0'; 840 ret = strict_strtol(arg, 0, &offset); 841 if (ret) 842 break; 843 arg = tmp + 1; 844 tmp = strrchr(arg, ')'); 845 if (tmp) { 846 struct deref_fetch_param *dprm; 847 const struct fetch_type *t2 = find_fetch_type(NULL); 848 *tmp = '\0'; 849 dprm = kzalloc(sizeof(struct deref_fetch_param), 850 GFP_KERNEL); 851 if (!dprm) 852 return -ENOMEM; 853 dprm->offset = offset; 854 ret = __parse_probe_arg(arg, t2, &dprm->orig, 855 is_return); 856 if (ret) 857 kfree(dprm); 858 else { 859 f->fn = t->fetch[FETCH_MTD_deref]; 860 f->data = (void *)dprm; 861 } 862 } 863 break; 864 } 865 if (!ret && !f->fn) { /* Parsed, but do not find fetch method */ 866 pr_info("%s type has no corresponding fetch method.\n", 867 t->name); 868 ret = -EINVAL; 869 } 870 return ret; 871 } 872 873 #define BYTES_TO_BITS(nb) ((BITS_PER_LONG * (nb)) / sizeof(long)) 874 875 /* Bitfield type needs to be parsed into a fetch function */ 876 static int __parse_bitfield_probe_arg(const char *bf, 877 const struct fetch_type *t, 878 struct fetch_param *f) 879 { 880 struct bitfield_fetch_param *bprm; 881 unsigned long bw, bo; 882 char *tail; 883 884 if (*bf != 'b') 885 return 0; 886 887 bprm = kzalloc(sizeof(*bprm), GFP_KERNEL); 888 if (!bprm) 889 return -ENOMEM; 890 bprm->orig = *f; 891 f->fn = t->fetch[FETCH_MTD_bitfield]; 892 f->data = (void *)bprm; 893 894 bw = simple_strtoul(bf + 1, &tail, 0); /* Use simple one */ 895 if (bw == 0 || *tail != '@') 896 return -EINVAL; 897 898 bf = tail + 1; 899 bo = simple_strtoul(bf, &tail, 0); 900 if (tail == bf || *tail != '/') 901 return -EINVAL; 902 903 bprm->hi_shift = BYTES_TO_BITS(t->size) - (bw + bo); 904 bprm->low_shift = bprm->hi_shift + bo; 905 return (BYTES_TO_BITS(t->size) < (bw + bo)) ? -EINVAL : 0; 906 } 907 908 /* String length checking wrapper */ 909 static int parse_probe_arg(char *arg, struct trace_probe *tp, 910 struct probe_arg *parg, int is_return) 911 { 912 const char *t; 913 int ret; 914 915 if (strlen(arg) > MAX_ARGSTR_LEN) { 916 pr_info("Argument is too long.: %s\n", arg); 917 return -ENOSPC; 918 } 919 parg->comm = kstrdup(arg, GFP_KERNEL); 920 if (!parg->comm) { 921 pr_info("Failed to allocate memory for command '%s'.\n", arg); 922 return -ENOMEM; 923 } 924 t = strchr(parg->comm, ':'); 925 if (t) { 926 arg[t - parg->comm] = '\0'; 927 t++; 928 } 929 parg->type = find_fetch_type(t); 930 if (!parg->type) { 931 pr_info("Unsupported type: %s\n", t); 932 return -EINVAL; 933 } 934 parg->offset = tp->size; 935 tp->size += parg->type->size; 936 ret = __parse_probe_arg(arg, parg->type, &parg->fetch, is_return); 937 if (ret >= 0 && t != NULL) 938 ret = __parse_bitfield_probe_arg(t, parg->type, &parg->fetch); 939 if (ret >= 0) { 940 parg->fetch_size.fn = get_fetch_size_function(parg->type, 941 parg->fetch.fn); 942 parg->fetch_size.data = parg->fetch.data; 943 } 944 return ret; 945 } 946 947 /* Return 1 if name is reserved or already used by another argument */ 948 static int conflict_field_name(const char *name, 949 struct probe_arg *args, int narg) 950 { 951 int i; 952 for (i = 0; i < ARRAY_SIZE(reserved_field_names); i++) 953 if (strcmp(reserved_field_names[i], name) == 0) 954 return 1; 955 for (i = 0; i < narg; i++) 956 if (strcmp(args[i].name, name) == 0) 957 return 1; 958 return 0; 959 } 960 961 static int create_trace_probe(int argc, char **argv) 962 { 963 /* 964 * Argument syntax: 965 * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS] 966 * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS] 967 * Fetch args: 968 * $retval : fetch return value 969 * $stack : fetch stack address 970 * $stackN : fetch Nth of stack (N:0-) 971 * @ADDR : fetch memory at ADDR (ADDR should be in kernel) 972 * @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol) 973 * %REG : fetch register REG 974 * Dereferencing memory fetch: 975 * +|-offs(ARG) : fetch memory at ARG +|- offs address. 976 * Alias name of args: 977 * NAME=FETCHARG : set NAME as alias of FETCHARG. 978 * Type of args: 979 * FETCHARG:TYPE : use TYPE instead of unsigned long. 980 */ 981 struct trace_probe *tp; 982 int i, ret = 0; 983 int is_return = 0, is_delete = 0; 984 char *symbol = NULL, *event = NULL, *group = NULL; 985 char *arg; 986 unsigned long offset = 0; 987 void *addr = NULL; 988 char buf[MAX_EVENT_NAME_LEN]; 989 990 /* argc must be >= 1 */ 991 if (argv[0][0] == 'p') 992 is_return = 0; 993 else if (argv[0][0] == 'r') 994 is_return = 1; 995 else if (argv[0][0] == '-') 996 is_delete = 1; 997 else { 998 pr_info("Probe definition must be started with 'p', 'r' or" 999 " '-'.\n"); 1000 return -EINVAL; 1001 } 1002 1003 if (argv[0][1] == ':') { 1004 event = &argv[0][2]; 1005 if (strchr(event, '/')) { 1006 group = event; 1007 event = strchr(group, '/') + 1; 1008 event[-1] = '\0'; 1009 if (strlen(group) == 0) { 1010 pr_info("Group name is not specified\n"); 1011 return -EINVAL; 1012 } 1013 } 1014 if (strlen(event) == 0) { 1015 pr_info("Event name is not specified\n"); 1016 return -EINVAL; 1017 } 1018 } 1019 if (!group) 1020 group = KPROBE_EVENT_SYSTEM; 1021 1022 if (is_delete) { 1023 if (!event) { 1024 pr_info("Delete command needs an event name.\n"); 1025 return -EINVAL; 1026 } 1027 mutex_lock(&probe_lock); 1028 tp = find_probe_event(event, group); 1029 if (!tp) { 1030 mutex_unlock(&probe_lock); 1031 pr_info("Event %s/%s doesn't exist.\n", group, event); 1032 return -ENOENT; 1033 } 1034 /* delete an event */ 1035 unregister_trace_probe(tp); 1036 free_trace_probe(tp); 1037 mutex_unlock(&probe_lock); 1038 return 0; 1039 } 1040 1041 if (argc < 2) { 1042 pr_info("Probe point is not specified.\n"); 1043 return -EINVAL; 1044 } 1045 if (isdigit(argv[1][0])) { 1046 if (is_return) { 1047 pr_info("Return probe point must be a symbol.\n"); 1048 return -EINVAL; 1049 } 1050 /* an address specified */ 1051 ret = strict_strtoul(&argv[1][0], 0, (unsigned long *)&addr); 1052 if (ret) { 1053 pr_info("Failed to parse address.\n"); 1054 return ret; 1055 } 1056 } else { 1057 /* a symbol specified */ 1058 symbol = argv[1]; 1059 /* TODO: support .init module functions */ 1060 ret = split_symbol_offset(symbol, &offset); 1061 if (ret) { 1062 pr_info("Failed to parse symbol.\n"); 1063 return ret; 1064 } 1065 if (offset && is_return) { 1066 pr_info("Return probe must be used without offset.\n"); 1067 return -EINVAL; 1068 } 1069 } 1070 argc -= 2; argv += 2; 1071 1072 /* setup a probe */ 1073 if (!event) { 1074 /* Make a new event name */ 1075 if (symbol) 1076 snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld", 1077 is_return ? 'r' : 'p', symbol, offset); 1078 else 1079 snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p", 1080 is_return ? 'r' : 'p', addr); 1081 event = buf; 1082 } 1083 tp = alloc_trace_probe(group, event, addr, symbol, offset, argc, 1084 is_return); 1085 if (IS_ERR(tp)) { 1086 pr_info("Failed to allocate trace_probe.(%d)\n", 1087 (int)PTR_ERR(tp)); 1088 return PTR_ERR(tp); 1089 } 1090 1091 /* parse arguments */ 1092 ret = 0; 1093 for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) { 1094 /* Increment count for freeing args in error case */ 1095 tp->nr_args++; 1096 1097 /* Parse argument name */ 1098 arg = strchr(argv[i], '='); 1099 if (arg) { 1100 *arg++ = '\0'; 1101 tp->args[i].name = kstrdup(argv[i], GFP_KERNEL); 1102 } else { 1103 arg = argv[i]; 1104 /* If argument name is omitted, set "argN" */ 1105 snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1); 1106 tp->args[i].name = kstrdup(buf, GFP_KERNEL); 1107 } 1108 1109 if (!tp->args[i].name) { 1110 pr_info("Failed to allocate argument[%d] name.\n", i); 1111 ret = -ENOMEM; 1112 goto error; 1113 } 1114 1115 if (!is_good_name(tp->args[i].name)) { 1116 pr_info("Invalid argument[%d] name: %s\n", 1117 i, tp->args[i].name); 1118 ret = -EINVAL; 1119 goto error; 1120 } 1121 1122 if (conflict_field_name(tp->args[i].name, tp->args, i)) { 1123 pr_info("Argument[%d] name '%s' conflicts with " 1124 "another field.\n", i, argv[i]); 1125 ret = -EINVAL; 1126 goto error; 1127 } 1128 1129 /* Parse fetch argument */ 1130 ret = parse_probe_arg(arg, tp, &tp->args[i], is_return); 1131 if (ret) { 1132 pr_info("Parse error at argument[%d]. (%d)\n", i, ret); 1133 goto error; 1134 } 1135 } 1136 1137 ret = register_trace_probe(tp); 1138 if (ret) 1139 goto error; 1140 return 0; 1141 1142 error: 1143 free_trace_probe(tp); 1144 return ret; 1145 } 1146 1147 static void cleanup_all_probes(void) 1148 { 1149 struct trace_probe *tp; 1150 1151 mutex_lock(&probe_lock); 1152 /* TODO: Use batch unregistration */ 1153 while (!list_empty(&probe_list)) { 1154 tp = list_entry(probe_list.next, struct trace_probe, list); 1155 unregister_trace_probe(tp); 1156 free_trace_probe(tp); 1157 } 1158 mutex_unlock(&probe_lock); 1159 } 1160 1161 1162 /* Probes listing interfaces */ 1163 static void *probes_seq_start(struct seq_file *m, loff_t *pos) 1164 { 1165 mutex_lock(&probe_lock); 1166 return seq_list_start(&probe_list, *pos); 1167 } 1168 1169 static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos) 1170 { 1171 return seq_list_next(v, &probe_list, pos); 1172 } 1173 1174 static void probes_seq_stop(struct seq_file *m, void *v) 1175 { 1176 mutex_unlock(&probe_lock); 1177 } 1178 1179 static int probes_seq_show(struct seq_file *m, void *v) 1180 { 1181 struct trace_probe *tp = v; 1182 int i; 1183 1184 seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p'); 1185 seq_printf(m, ":%s/%s", tp->call.class->system, tp->call.name); 1186 1187 if (!tp->symbol) 1188 seq_printf(m, " 0x%p", tp->rp.kp.addr); 1189 else if (tp->rp.kp.offset) 1190 seq_printf(m, " %s+%u", probe_symbol(tp), tp->rp.kp.offset); 1191 else 1192 seq_printf(m, " %s", probe_symbol(tp)); 1193 1194 for (i = 0; i < tp->nr_args; i++) 1195 seq_printf(m, " %s=%s", tp->args[i].name, tp->args[i].comm); 1196 seq_printf(m, "\n"); 1197 1198 return 0; 1199 } 1200 1201 static const struct seq_operations probes_seq_op = { 1202 .start = probes_seq_start, 1203 .next = probes_seq_next, 1204 .stop = probes_seq_stop, 1205 .show = probes_seq_show 1206 }; 1207 1208 static int probes_open(struct inode *inode, struct file *file) 1209 { 1210 if ((file->f_mode & FMODE_WRITE) && 1211 (file->f_flags & O_TRUNC)) 1212 cleanup_all_probes(); 1213 1214 return seq_open(file, &probes_seq_op); 1215 } 1216 1217 static int command_trace_probe(const char *buf) 1218 { 1219 char **argv; 1220 int argc = 0, ret = 0; 1221 1222 argv = argv_split(GFP_KERNEL, buf, &argc); 1223 if (!argv) 1224 return -ENOMEM; 1225 1226 if (argc) 1227 ret = create_trace_probe(argc, argv); 1228 1229 argv_free(argv); 1230 return ret; 1231 } 1232 1233 #define WRITE_BUFSIZE 4096 1234 1235 static ssize_t probes_write(struct file *file, const char __user *buffer, 1236 size_t count, loff_t *ppos) 1237 { 1238 char *kbuf, *tmp; 1239 int ret; 1240 size_t done; 1241 size_t size; 1242 1243 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL); 1244 if (!kbuf) 1245 return -ENOMEM; 1246 1247 ret = done = 0; 1248 while (done < count) { 1249 size = count - done; 1250 if (size >= WRITE_BUFSIZE) 1251 size = WRITE_BUFSIZE - 1; 1252 if (copy_from_user(kbuf, buffer + done, size)) { 1253 ret = -EFAULT; 1254 goto out; 1255 } 1256 kbuf[size] = '\0'; 1257 tmp = strchr(kbuf, '\n'); 1258 if (tmp) { 1259 *tmp = '\0'; 1260 size = tmp - kbuf + 1; 1261 } else if (done + size < count) { 1262 pr_warning("Line length is too long: " 1263 "Should be less than %d.", WRITE_BUFSIZE); 1264 ret = -EINVAL; 1265 goto out; 1266 } 1267 done += size; 1268 /* Remove comments */ 1269 tmp = strchr(kbuf, '#'); 1270 if (tmp) 1271 *tmp = '\0'; 1272 1273 ret = command_trace_probe(kbuf); 1274 if (ret) 1275 goto out; 1276 } 1277 ret = done; 1278 out: 1279 kfree(kbuf); 1280 return ret; 1281 } 1282 1283 static const struct file_operations kprobe_events_ops = { 1284 .owner = THIS_MODULE, 1285 .open = probes_open, 1286 .read = seq_read, 1287 .llseek = seq_lseek, 1288 .release = seq_release, 1289 .write = probes_write, 1290 }; 1291 1292 /* Probes profiling interfaces */ 1293 static int probes_profile_seq_show(struct seq_file *m, void *v) 1294 { 1295 struct trace_probe *tp = v; 1296 1297 seq_printf(m, " %-44s %15lu %15lu\n", tp->call.name, tp->nhit, 1298 tp->rp.kp.nmissed); 1299 1300 return 0; 1301 } 1302 1303 static const struct seq_operations profile_seq_op = { 1304 .start = probes_seq_start, 1305 .next = probes_seq_next, 1306 .stop = probes_seq_stop, 1307 .show = probes_profile_seq_show 1308 }; 1309 1310 static int profile_open(struct inode *inode, struct file *file) 1311 { 1312 return seq_open(file, &profile_seq_op); 1313 } 1314 1315 static const struct file_operations kprobe_profile_ops = { 1316 .owner = THIS_MODULE, 1317 .open = profile_open, 1318 .read = seq_read, 1319 .llseek = seq_lseek, 1320 .release = seq_release, 1321 }; 1322 1323 /* Sum up total data length for dynamic arraies (strings) */ 1324 static __kprobes int __get_data_size(struct trace_probe *tp, 1325 struct pt_regs *regs) 1326 { 1327 int i, ret = 0; 1328 u32 len; 1329 1330 for (i = 0; i < tp->nr_args; i++) 1331 if (unlikely(tp->args[i].fetch_size.fn)) { 1332 call_fetch(&tp->args[i].fetch_size, regs, &len); 1333 ret += len; 1334 } 1335 1336 return ret; 1337 } 1338 1339 /* Store the value of each argument */ 1340 static __kprobes void store_trace_args(int ent_size, struct trace_probe *tp, 1341 struct pt_regs *regs, 1342 u8 *data, int maxlen) 1343 { 1344 int i; 1345 u32 end = tp->size; 1346 u32 *dl; /* Data (relative) location */ 1347 1348 for (i = 0; i < tp->nr_args; i++) { 1349 if (unlikely(tp->args[i].fetch_size.fn)) { 1350 /* 1351 * First, we set the relative location and 1352 * maximum data length to *dl 1353 */ 1354 dl = (u32 *)(data + tp->args[i].offset); 1355 *dl = make_data_rloc(maxlen, end - tp->args[i].offset); 1356 /* Then try to fetch string or dynamic array data */ 1357 call_fetch(&tp->args[i].fetch, regs, dl); 1358 /* Reduce maximum length */ 1359 end += get_rloc_len(*dl); 1360 maxlen -= get_rloc_len(*dl); 1361 /* Trick here, convert data_rloc to data_loc */ 1362 *dl = convert_rloc_to_loc(*dl, 1363 ent_size + tp->args[i].offset); 1364 } else 1365 /* Just fetching data normally */ 1366 call_fetch(&tp->args[i].fetch, regs, 1367 data + tp->args[i].offset); 1368 } 1369 } 1370 1371 /* Kprobe handler */ 1372 static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) 1373 { 1374 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); 1375 struct kprobe_trace_entry_head *entry; 1376 struct ring_buffer_event *event; 1377 struct ring_buffer *buffer; 1378 int size, dsize, pc; 1379 unsigned long irq_flags; 1380 struct ftrace_event_call *call = &tp->call; 1381 1382 tp->nhit++; 1383 1384 local_save_flags(irq_flags); 1385 pc = preempt_count(); 1386 1387 dsize = __get_data_size(tp, regs); 1388 size = sizeof(*entry) + tp->size + dsize; 1389 1390 event = trace_current_buffer_lock_reserve(&buffer, call->event.type, 1391 size, irq_flags, pc); 1392 if (!event) 1393 return; 1394 1395 entry = ring_buffer_event_data(event); 1396 entry->ip = (unsigned long)kp->addr; 1397 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); 1398 1399 if (!filter_current_check_discard(buffer, call, entry, event)) 1400 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); 1401 } 1402 1403 /* Kretprobe handler */ 1404 static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri, 1405 struct pt_regs *regs) 1406 { 1407 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); 1408 struct kretprobe_trace_entry_head *entry; 1409 struct ring_buffer_event *event; 1410 struct ring_buffer *buffer; 1411 int size, pc, dsize; 1412 unsigned long irq_flags; 1413 struct ftrace_event_call *call = &tp->call; 1414 1415 local_save_flags(irq_flags); 1416 pc = preempt_count(); 1417 1418 dsize = __get_data_size(tp, regs); 1419 size = sizeof(*entry) + tp->size + dsize; 1420 1421 event = trace_current_buffer_lock_reserve(&buffer, call->event.type, 1422 size, irq_flags, pc); 1423 if (!event) 1424 return; 1425 1426 entry = ring_buffer_event_data(event); 1427 entry->func = (unsigned long)tp->rp.kp.addr; 1428 entry->ret_ip = (unsigned long)ri->ret_addr; 1429 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); 1430 1431 if (!filter_current_check_discard(buffer, call, entry, event)) 1432 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); 1433 } 1434 1435 /* Event entry printers */ 1436 enum print_line_t 1437 print_kprobe_event(struct trace_iterator *iter, int flags, 1438 struct trace_event *event) 1439 { 1440 struct kprobe_trace_entry_head *field; 1441 struct trace_seq *s = &iter->seq; 1442 struct trace_probe *tp; 1443 u8 *data; 1444 int i; 1445 1446 field = (struct kprobe_trace_entry_head *)iter->ent; 1447 tp = container_of(event, struct trace_probe, call.event); 1448 1449 if (!trace_seq_printf(s, "%s: (", tp->call.name)) 1450 goto partial; 1451 1452 if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET)) 1453 goto partial; 1454 1455 if (!trace_seq_puts(s, ")")) 1456 goto partial; 1457 1458 data = (u8 *)&field[1]; 1459 for (i = 0; i < tp->nr_args; i++) 1460 if (!tp->args[i].type->print(s, tp->args[i].name, 1461 data + tp->args[i].offset, field)) 1462 goto partial; 1463 1464 if (!trace_seq_puts(s, "\n")) 1465 goto partial; 1466 1467 return TRACE_TYPE_HANDLED; 1468 partial: 1469 return TRACE_TYPE_PARTIAL_LINE; 1470 } 1471 1472 enum print_line_t 1473 print_kretprobe_event(struct trace_iterator *iter, int flags, 1474 struct trace_event *event) 1475 { 1476 struct kretprobe_trace_entry_head *field; 1477 struct trace_seq *s = &iter->seq; 1478 struct trace_probe *tp; 1479 u8 *data; 1480 int i; 1481 1482 field = (struct kretprobe_trace_entry_head *)iter->ent; 1483 tp = container_of(event, struct trace_probe, call.event); 1484 1485 if (!trace_seq_printf(s, "%s: (", tp->call.name)) 1486 goto partial; 1487 1488 if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET)) 1489 goto partial; 1490 1491 if (!trace_seq_puts(s, " <- ")) 1492 goto partial; 1493 1494 if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET)) 1495 goto partial; 1496 1497 if (!trace_seq_puts(s, ")")) 1498 goto partial; 1499 1500 data = (u8 *)&field[1]; 1501 for (i = 0; i < tp->nr_args; i++) 1502 if (!tp->args[i].type->print(s, tp->args[i].name, 1503 data + tp->args[i].offset, field)) 1504 goto partial; 1505 1506 if (!trace_seq_puts(s, "\n")) 1507 goto partial; 1508 1509 return TRACE_TYPE_HANDLED; 1510 partial: 1511 return TRACE_TYPE_PARTIAL_LINE; 1512 } 1513 1514 static int probe_event_enable(struct ftrace_event_call *call) 1515 { 1516 struct trace_probe *tp = (struct trace_probe *)call->data; 1517 1518 tp->flags |= TP_FLAG_TRACE; 1519 if (probe_is_return(tp)) 1520 return enable_kretprobe(&tp->rp); 1521 else 1522 return enable_kprobe(&tp->rp.kp); 1523 } 1524 1525 static void probe_event_disable(struct ftrace_event_call *call) 1526 { 1527 struct trace_probe *tp = (struct trace_probe *)call->data; 1528 1529 tp->flags &= ~TP_FLAG_TRACE; 1530 if (!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE))) { 1531 if (probe_is_return(tp)) 1532 disable_kretprobe(&tp->rp); 1533 else 1534 disable_kprobe(&tp->rp.kp); 1535 } 1536 } 1537 1538 #undef DEFINE_FIELD 1539 #define DEFINE_FIELD(type, item, name, is_signed) \ 1540 do { \ 1541 ret = trace_define_field(event_call, #type, name, \ 1542 offsetof(typeof(field), item), \ 1543 sizeof(field.item), is_signed, \ 1544 FILTER_OTHER); \ 1545 if (ret) \ 1546 return ret; \ 1547 } while (0) 1548 1549 static int kprobe_event_define_fields(struct ftrace_event_call *event_call) 1550 { 1551 int ret, i; 1552 struct kprobe_trace_entry_head field; 1553 struct trace_probe *tp = (struct trace_probe *)event_call->data; 1554 1555 DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0); 1556 /* Set argument names as fields */ 1557 for (i = 0; i < tp->nr_args; i++) { 1558 ret = trace_define_field(event_call, tp->args[i].type->fmttype, 1559 tp->args[i].name, 1560 sizeof(field) + tp->args[i].offset, 1561 tp->args[i].type->size, 1562 tp->args[i].type->is_signed, 1563 FILTER_OTHER); 1564 if (ret) 1565 return ret; 1566 } 1567 return 0; 1568 } 1569 1570 static int kretprobe_event_define_fields(struct ftrace_event_call *event_call) 1571 { 1572 int ret, i; 1573 struct kretprobe_trace_entry_head field; 1574 struct trace_probe *tp = (struct trace_probe *)event_call->data; 1575 1576 DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0); 1577 DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0); 1578 /* Set argument names as fields */ 1579 for (i = 0; i < tp->nr_args; i++) { 1580 ret = trace_define_field(event_call, tp->args[i].type->fmttype, 1581 tp->args[i].name, 1582 sizeof(field) + tp->args[i].offset, 1583 tp->args[i].type->size, 1584 tp->args[i].type->is_signed, 1585 FILTER_OTHER); 1586 if (ret) 1587 return ret; 1588 } 1589 return 0; 1590 } 1591 1592 static int __set_print_fmt(struct trace_probe *tp, char *buf, int len) 1593 { 1594 int i; 1595 int pos = 0; 1596 1597 const char *fmt, *arg; 1598 1599 if (!probe_is_return(tp)) { 1600 fmt = "(%lx)"; 1601 arg = "REC->" FIELD_STRING_IP; 1602 } else { 1603 fmt = "(%lx <- %lx)"; 1604 arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP; 1605 } 1606 1607 /* When len=0, we just calculate the needed length */ 1608 #define LEN_OR_ZERO (len ? len - pos : 0) 1609 1610 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt); 1611 1612 for (i = 0; i < tp->nr_args; i++) { 1613 pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s", 1614 tp->args[i].name, tp->args[i].type->fmt); 1615 } 1616 1617 pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg); 1618 1619 for (i = 0; i < tp->nr_args; i++) { 1620 if (strcmp(tp->args[i].type->name, "string") == 0) 1621 pos += snprintf(buf + pos, LEN_OR_ZERO, 1622 ", __get_str(%s)", 1623 tp->args[i].name); 1624 else 1625 pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s", 1626 tp->args[i].name); 1627 } 1628 1629 #undef LEN_OR_ZERO 1630 1631 /* return the length of print_fmt */ 1632 return pos; 1633 } 1634 1635 static int set_print_fmt(struct trace_probe *tp) 1636 { 1637 int len; 1638 char *print_fmt; 1639 1640 /* First: called with 0 length to calculate the needed length */ 1641 len = __set_print_fmt(tp, NULL, 0); 1642 print_fmt = kmalloc(len + 1, GFP_KERNEL); 1643 if (!print_fmt) 1644 return -ENOMEM; 1645 1646 /* Second: actually write the @print_fmt */ 1647 __set_print_fmt(tp, print_fmt, len + 1); 1648 tp->call.print_fmt = print_fmt; 1649 1650 return 0; 1651 } 1652 1653 #ifdef CONFIG_PERF_EVENTS 1654 1655 /* Kprobe profile handler */ 1656 static __kprobes void kprobe_perf_func(struct kprobe *kp, 1657 struct pt_regs *regs) 1658 { 1659 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); 1660 struct ftrace_event_call *call = &tp->call; 1661 struct kprobe_trace_entry_head *entry; 1662 struct hlist_head *head; 1663 int size, __size, dsize; 1664 int rctx; 1665 1666 dsize = __get_data_size(tp, regs); 1667 __size = sizeof(*entry) + tp->size + dsize; 1668 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 1669 size -= sizeof(u32); 1670 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, 1671 "profile buffer not large enough")) 1672 return; 1673 1674 entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); 1675 if (!entry) 1676 return; 1677 1678 entry->ip = (unsigned long)kp->addr; 1679 memset(&entry[1], 0, dsize); 1680 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); 1681 1682 head = this_cpu_ptr(call->perf_events); 1683 perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head); 1684 } 1685 1686 /* Kretprobe profile handler */ 1687 static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri, 1688 struct pt_regs *regs) 1689 { 1690 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); 1691 struct ftrace_event_call *call = &tp->call; 1692 struct kretprobe_trace_entry_head *entry; 1693 struct hlist_head *head; 1694 int size, __size, dsize; 1695 int rctx; 1696 1697 dsize = __get_data_size(tp, regs); 1698 __size = sizeof(*entry) + tp->size + dsize; 1699 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 1700 size -= sizeof(u32); 1701 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, 1702 "profile buffer not large enough")) 1703 return; 1704 1705 entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); 1706 if (!entry) 1707 return; 1708 1709 entry->func = (unsigned long)tp->rp.kp.addr; 1710 entry->ret_ip = (unsigned long)ri->ret_addr; 1711 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); 1712 1713 head = this_cpu_ptr(call->perf_events); 1714 perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head); 1715 } 1716 1717 static int probe_perf_enable(struct ftrace_event_call *call) 1718 { 1719 struct trace_probe *tp = (struct trace_probe *)call->data; 1720 1721 tp->flags |= TP_FLAG_PROFILE; 1722 1723 if (probe_is_return(tp)) 1724 return enable_kretprobe(&tp->rp); 1725 else 1726 return enable_kprobe(&tp->rp.kp); 1727 } 1728 1729 static void probe_perf_disable(struct ftrace_event_call *call) 1730 { 1731 struct trace_probe *tp = (struct trace_probe *)call->data; 1732 1733 tp->flags &= ~TP_FLAG_PROFILE; 1734 1735 if (!(tp->flags & TP_FLAG_TRACE)) { 1736 if (probe_is_return(tp)) 1737 disable_kretprobe(&tp->rp); 1738 else 1739 disable_kprobe(&tp->rp.kp); 1740 } 1741 } 1742 #endif /* CONFIG_PERF_EVENTS */ 1743 1744 static __kprobes 1745 int kprobe_register(struct ftrace_event_call *event, enum trace_reg type) 1746 { 1747 switch (type) { 1748 case TRACE_REG_REGISTER: 1749 return probe_event_enable(event); 1750 case TRACE_REG_UNREGISTER: 1751 probe_event_disable(event); 1752 return 0; 1753 1754 #ifdef CONFIG_PERF_EVENTS 1755 case TRACE_REG_PERF_REGISTER: 1756 return probe_perf_enable(event); 1757 case TRACE_REG_PERF_UNREGISTER: 1758 probe_perf_disable(event); 1759 return 0; 1760 #endif 1761 } 1762 return 0; 1763 } 1764 1765 static __kprobes 1766 int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) 1767 { 1768 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); 1769 1770 if (tp->flags & TP_FLAG_TRACE) 1771 kprobe_trace_func(kp, regs); 1772 #ifdef CONFIG_PERF_EVENTS 1773 if (tp->flags & TP_FLAG_PROFILE) 1774 kprobe_perf_func(kp, regs); 1775 #endif 1776 return 0; /* We don't tweek kernel, so just return 0 */ 1777 } 1778 1779 static __kprobes 1780 int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs) 1781 { 1782 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); 1783 1784 if (tp->flags & TP_FLAG_TRACE) 1785 kretprobe_trace_func(ri, regs); 1786 #ifdef CONFIG_PERF_EVENTS 1787 if (tp->flags & TP_FLAG_PROFILE) 1788 kretprobe_perf_func(ri, regs); 1789 #endif 1790 return 0; /* We don't tweek kernel, so just return 0 */ 1791 } 1792 1793 static struct trace_event_functions kretprobe_funcs = { 1794 .trace = print_kretprobe_event 1795 }; 1796 1797 static struct trace_event_functions kprobe_funcs = { 1798 .trace = print_kprobe_event 1799 }; 1800 1801 static int register_probe_event(struct trace_probe *tp) 1802 { 1803 struct ftrace_event_call *call = &tp->call; 1804 int ret; 1805 1806 /* Initialize ftrace_event_call */ 1807 INIT_LIST_HEAD(&call->class->fields); 1808 if (probe_is_return(tp)) { 1809 call->event.funcs = &kretprobe_funcs; 1810 call->class->define_fields = kretprobe_event_define_fields; 1811 } else { 1812 call->event.funcs = &kprobe_funcs; 1813 call->class->define_fields = kprobe_event_define_fields; 1814 } 1815 if (set_print_fmt(tp) < 0) 1816 return -ENOMEM; 1817 ret = register_ftrace_event(&call->event); 1818 if (!ret) { 1819 kfree(call->print_fmt); 1820 return -ENODEV; 1821 } 1822 call->flags = 0; 1823 call->class->reg = kprobe_register; 1824 call->data = tp; 1825 ret = trace_add_event_call(call); 1826 if (ret) { 1827 pr_info("Failed to register kprobe event: %s\n", call->name); 1828 kfree(call->print_fmt); 1829 unregister_ftrace_event(&call->event); 1830 } 1831 return ret; 1832 } 1833 1834 static void unregister_probe_event(struct trace_probe *tp) 1835 { 1836 /* tp->event is unregistered in trace_remove_event_call() */ 1837 trace_remove_event_call(&tp->call); 1838 kfree(tp->call.print_fmt); 1839 } 1840 1841 /* Make a debugfs interface for controlling probe points */ 1842 static __init int init_kprobe_trace(void) 1843 { 1844 struct dentry *d_tracer; 1845 struct dentry *entry; 1846 1847 d_tracer = tracing_init_dentry(); 1848 if (!d_tracer) 1849 return 0; 1850 1851 entry = debugfs_create_file("kprobe_events", 0644, d_tracer, 1852 NULL, &kprobe_events_ops); 1853 1854 /* Event list interface */ 1855 if (!entry) 1856 pr_warning("Could not create debugfs " 1857 "'kprobe_events' entry\n"); 1858 1859 /* Profile interface */ 1860 entry = debugfs_create_file("kprobe_profile", 0444, d_tracer, 1861 NULL, &kprobe_profile_ops); 1862 1863 if (!entry) 1864 pr_warning("Could not create debugfs " 1865 "'kprobe_profile' entry\n"); 1866 return 0; 1867 } 1868 fs_initcall(init_kprobe_trace); 1869 1870 1871 #ifdef CONFIG_FTRACE_STARTUP_TEST 1872 1873 static int kprobe_trace_selftest_target(int a1, int a2, int a3, 1874 int a4, int a5, int a6) 1875 { 1876 return a1 + a2 + a3 + a4 + a5 + a6; 1877 } 1878 1879 static __init int kprobe_trace_self_tests_init(void) 1880 { 1881 int ret, warn = 0; 1882 int (*target)(int, int, int, int, int, int); 1883 struct trace_probe *tp; 1884 1885 target = kprobe_trace_selftest_target; 1886 1887 pr_info("Testing kprobe tracing: "); 1888 1889 ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target " 1890 "$stack $stack0 +0($stack)"); 1891 if (WARN_ON_ONCE(ret)) { 1892 pr_warning("error on probing function entry.\n"); 1893 warn++; 1894 } else { 1895 /* Enable trace point */ 1896 tp = find_probe_event("testprobe", KPROBE_EVENT_SYSTEM); 1897 if (WARN_ON_ONCE(tp == NULL)) { 1898 pr_warning("error on getting new probe.\n"); 1899 warn++; 1900 } else 1901 probe_event_enable(&tp->call); 1902 } 1903 1904 ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target " 1905 "$retval"); 1906 if (WARN_ON_ONCE(ret)) { 1907 pr_warning("error on probing function return.\n"); 1908 warn++; 1909 } else { 1910 /* Enable trace point */ 1911 tp = find_probe_event("testprobe2", KPROBE_EVENT_SYSTEM); 1912 if (WARN_ON_ONCE(tp == NULL)) { 1913 pr_warning("error on getting new probe.\n"); 1914 warn++; 1915 } else 1916 probe_event_enable(&tp->call); 1917 } 1918 1919 if (warn) 1920 goto end; 1921 1922 ret = target(1, 2, 3, 4, 5, 6); 1923 1924 ret = command_trace_probe("-:testprobe"); 1925 if (WARN_ON_ONCE(ret)) { 1926 pr_warning("error on deleting a probe.\n"); 1927 warn++; 1928 } 1929 1930 ret = command_trace_probe("-:testprobe2"); 1931 if (WARN_ON_ONCE(ret)) { 1932 pr_warning("error on deleting a probe.\n"); 1933 warn++; 1934 } 1935 1936 end: 1937 cleanup_all_probes(); 1938 if (warn) 1939 pr_cont("NG: Some tests are failed. Please check them.\n"); 1940 else 1941 pr_cont("OK\n"); 1942 return 0; 1943 } 1944 1945 late_initcall(kprobe_trace_self_tests_init); 1946 1947 #endif 1948