1 /* 2 * Kprobes-based tracing events 3 * 4 * Created by Masami Hiramatsu <mhiramat@redhat.com> 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 18 */ 19 20 #include <linux/module.h> 21 #include <linux/uaccess.h> 22 #include <linux/kprobes.h> 23 #include <linux/seq_file.h> 24 #include <linux/slab.h> 25 #include <linux/smp.h> 26 #include <linux/debugfs.h> 27 #include <linux/types.h> 28 #include <linux/string.h> 29 #include <linux/ctype.h> 30 #include <linux/ptrace.h> 31 #include <linux/perf_event.h> 32 #include <linux/stringify.h> 33 #include <linux/limits.h> 34 #include <asm/bitsperlong.h> 35 36 #include "trace.h" 37 #include "trace_output.h" 38 39 #define MAX_TRACE_ARGS 128 40 #define MAX_ARGSTR_LEN 63 41 #define MAX_EVENT_NAME_LEN 64 42 #define MAX_STRING_SIZE PATH_MAX 43 #define KPROBE_EVENT_SYSTEM "kprobes" 44 45 /* Reserved field names */ 46 #define FIELD_STRING_IP "__probe_ip" 47 #define FIELD_STRING_RETIP "__probe_ret_ip" 48 #define FIELD_STRING_FUNC "__probe_func" 49 50 const char *reserved_field_names[] = { 51 "common_type", 52 "common_flags", 53 "common_preempt_count", 54 "common_pid", 55 "common_tgid", 56 "common_lock_depth", 57 FIELD_STRING_IP, 58 FIELD_STRING_RETIP, 59 FIELD_STRING_FUNC, 60 }; 61 62 /* Printing function type */ 63 typedef int (*print_type_func_t)(struct trace_seq *, const char *, void *, 64 void *); 65 #define PRINT_TYPE_FUNC_NAME(type) print_type_##type 66 #define PRINT_TYPE_FMT_NAME(type) print_type_format_##type 67 68 /* Printing in basic type function template */ 69 #define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt, cast) \ 70 static __kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, \ 71 const char *name, \ 72 void *data, void *ent)\ 73 { \ 74 return trace_seq_printf(s, " %s=" fmt, name, (cast)*(type *)data);\ 75 } \ 76 static const char PRINT_TYPE_FMT_NAME(type)[] = fmt; 77 78 DEFINE_BASIC_PRINT_TYPE_FUNC(u8, "%x", unsigned int) 79 DEFINE_BASIC_PRINT_TYPE_FUNC(u16, "%x", unsigned int) 80 DEFINE_BASIC_PRINT_TYPE_FUNC(u32, "%lx", unsigned long) 81 DEFINE_BASIC_PRINT_TYPE_FUNC(u64, "%llx", unsigned long long) 82 DEFINE_BASIC_PRINT_TYPE_FUNC(s8, "%d", int) 83 DEFINE_BASIC_PRINT_TYPE_FUNC(s16, "%d", int) 84 DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%ld", long) 85 DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%lld", long long) 86 87 /* data_rloc: data relative location, compatible with u32 */ 88 #define make_data_rloc(len, roffs) \ 89 (((u32)(len) << 16) | ((u32)(roffs) & 0xffff)) 90 #define get_rloc_len(dl) ((u32)(dl) >> 16) 91 #define get_rloc_offs(dl) ((u32)(dl) & 0xffff) 92 93 static inline void *get_rloc_data(u32 *dl) 94 { 95 return (u8 *)dl + get_rloc_offs(*dl); 96 } 97 98 /* For data_loc conversion */ 99 static inline void *get_loc_data(u32 *dl, void *ent) 100 { 101 return (u8 *)ent + get_rloc_offs(*dl); 102 } 103 104 /* 105 * Convert data_rloc to data_loc: 106 * data_rloc stores the offset from data_rloc itself, but data_loc 107 * stores the offset from event entry. 108 */ 109 #define convert_rloc_to_loc(dl, offs) ((u32)(dl) + (offs)) 110 111 /* For defining macros, define string/string_size types */ 112 typedef u32 string; 113 typedef u32 string_size; 114 115 /* Print type function for string type */ 116 static __kprobes int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, 117 const char *name, 118 void *data, void *ent) 119 { 120 int len = *(u32 *)data >> 16; 121 122 if (!len) 123 return trace_seq_printf(s, " %s=(fault)", name); 124 else 125 return trace_seq_printf(s, " %s=\"%s\"", name, 126 (const char *)get_loc_data(data, ent)); 127 } 128 static const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\""; 129 130 /* Data fetch function type */ 131 typedef void (*fetch_func_t)(struct pt_regs *, void *, void *); 132 133 struct fetch_param { 134 fetch_func_t fn; 135 void *data; 136 }; 137 138 static __kprobes void call_fetch(struct fetch_param *fprm, 139 struct pt_regs *regs, void *dest) 140 { 141 return fprm->fn(regs, fprm->data, dest); 142 } 143 144 #define FETCH_FUNC_NAME(method, type) fetch_##method##_##type 145 /* 146 * Define macro for basic types - we don't need to define s* types, because 147 * we have to care only about bitwidth at recording time. 148 */ 149 #define DEFINE_BASIC_FETCH_FUNCS(method) \ 150 DEFINE_FETCH_##method(u8) \ 151 DEFINE_FETCH_##method(u16) \ 152 DEFINE_FETCH_##method(u32) \ 153 DEFINE_FETCH_##method(u64) 154 155 #define CHECK_FETCH_FUNCS(method, fn) \ 156 (((FETCH_FUNC_NAME(method, u8) == fn) || \ 157 (FETCH_FUNC_NAME(method, u16) == fn) || \ 158 (FETCH_FUNC_NAME(method, u32) == fn) || \ 159 (FETCH_FUNC_NAME(method, u64) == fn) || \ 160 (FETCH_FUNC_NAME(method, string) == fn) || \ 161 (FETCH_FUNC_NAME(method, string_size) == fn)) \ 162 && (fn != NULL)) 163 164 /* Data fetch function templates */ 165 #define DEFINE_FETCH_reg(type) \ 166 static __kprobes void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, \ 167 void *offset, void *dest) \ 168 { \ 169 *(type *)dest = (type)regs_get_register(regs, \ 170 (unsigned int)((unsigned long)offset)); \ 171 } 172 DEFINE_BASIC_FETCH_FUNCS(reg) 173 /* No string on the register */ 174 #define fetch_reg_string NULL 175 #define fetch_reg_string_size NULL 176 177 #define DEFINE_FETCH_stack(type) \ 178 static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\ 179 void *offset, void *dest) \ 180 { \ 181 *(type *)dest = (type)regs_get_kernel_stack_nth(regs, \ 182 (unsigned int)((unsigned long)offset)); \ 183 } 184 DEFINE_BASIC_FETCH_FUNCS(stack) 185 /* No string on the stack entry */ 186 #define fetch_stack_string NULL 187 #define fetch_stack_string_size NULL 188 189 #define DEFINE_FETCH_retval(type) \ 190 static __kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,\ 191 void *dummy, void *dest) \ 192 { \ 193 *(type *)dest = (type)regs_return_value(regs); \ 194 } 195 DEFINE_BASIC_FETCH_FUNCS(retval) 196 /* No string on the retval */ 197 #define fetch_retval_string NULL 198 #define fetch_retval_string_size NULL 199 200 #define DEFINE_FETCH_memory(type) \ 201 static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\ 202 void *addr, void *dest) \ 203 { \ 204 type retval; \ 205 if (probe_kernel_address(addr, retval)) \ 206 *(type *)dest = 0; \ 207 else \ 208 *(type *)dest = retval; \ 209 } 210 DEFINE_BASIC_FETCH_FUNCS(memory) 211 /* 212 * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max 213 * length and relative data location. 214 */ 215 static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs, 216 void *addr, void *dest) 217 { 218 long ret; 219 int maxlen = get_rloc_len(*(u32 *)dest); 220 u8 *dst = get_rloc_data(dest); 221 u8 *src = addr; 222 mm_segment_t old_fs = get_fs(); 223 if (!maxlen) 224 return; 225 /* 226 * Try to get string again, since the string can be changed while 227 * probing. 228 */ 229 set_fs(KERNEL_DS); 230 pagefault_disable(); 231 do 232 ret = __copy_from_user_inatomic(dst++, src++, 1); 233 while (dst[-1] && ret == 0 && src - (u8 *)addr < maxlen); 234 dst[-1] = '\0'; 235 pagefault_enable(); 236 set_fs(old_fs); 237 238 if (ret < 0) { /* Failed to fetch string */ 239 ((u8 *)get_rloc_data(dest))[0] = '\0'; 240 *(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest)); 241 } else 242 *(u32 *)dest = make_data_rloc(src - (u8 *)addr, 243 get_rloc_offs(*(u32 *)dest)); 244 } 245 /* Return the length of string -- including null terminal byte */ 246 static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs, 247 void *addr, void *dest) 248 { 249 int ret, len = 0; 250 u8 c; 251 mm_segment_t old_fs = get_fs(); 252 253 set_fs(KERNEL_DS); 254 pagefault_disable(); 255 do { 256 ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1); 257 len++; 258 } while (c && ret == 0 && len < MAX_STRING_SIZE); 259 pagefault_enable(); 260 set_fs(old_fs); 261 262 if (ret < 0) /* Failed to check the length */ 263 *(u32 *)dest = 0; 264 else 265 *(u32 *)dest = len; 266 } 267 268 /* Memory fetching by symbol */ 269 struct symbol_cache { 270 char *symbol; 271 long offset; 272 unsigned long addr; 273 }; 274 275 static unsigned long update_symbol_cache(struct symbol_cache *sc) 276 { 277 sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol); 278 if (sc->addr) 279 sc->addr += sc->offset; 280 return sc->addr; 281 } 282 283 static void free_symbol_cache(struct symbol_cache *sc) 284 { 285 kfree(sc->symbol); 286 kfree(sc); 287 } 288 289 static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset) 290 { 291 struct symbol_cache *sc; 292 293 if (!sym || strlen(sym) == 0) 294 return NULL; 295 sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL); 296 if (!sc) 297 return NULL; 298 299 sc->symbol = kstrdup(sym, GFP_KERNEL); 300 if (!sc->symbol) { 301 kfree(sc); 302 return NULL; 303 } 304 sc->offset = offset; 305 306 update_symbol_cache(sc); 307 return sc; 308 } 309 310 #define DEFINE_FETCH_symbol(type) \ 311 static __kprobes void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs,\ 312 void *data, void *dest) \ 313 { \ 314 struct symbol_cache *sc = data; \ 315 if (sc->addr) \ 316 fetch_memory_##type(regs, (void *)sc->addr, dest); \ 317 else \ 318 *(type *)dest = 0; \ 319 } 320 DEFINE_BASIC_FETCH_FUNCS(symbol) 321 DEFINE_FETCH_symbol(string) 322 DEFINE_FETCH_symbol(string_size) 323 324 /* Dereference memory access function */ 325 struct deref_fetch_param { 326 struct fetch_param orig; 327 long offset; 328 }; 329 330 #define DEFINE_FETCH_deref(type) \ 331 static __kprobes void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs,\ 332 void *data, void *dest) \ 333 { \ 334 struct deref_fetch_param *dprm = data; \ 335 unsigned long addr; \ 336 call_fetch(&dprm->orig, regs, &addr); \ 337 if (addr) { \ 338 addr += dprm->offset; \ 339 fetch_memory_##type(regs, (void *)addr, dest); \ 340 } else \ 341 *(type *)dest = 0; \ 342 } 343 DEFINE_BASIC_FETCH_FUNCS(deref) 344 DEFINE_FETCH_deref(string) 345 DEFINE_FETCH_deref(string_size) 346 347 static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data) 348 { 349 if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) 350 free_deref_fetch_param(data->orig.data); 351 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn)) 352 free_symbol_cache(data->orig.data); 353 kfree(data); 354 } 355 356 /* Bitfield fetch function */ 357 struct bitfield_fetch_param { 358 struct fetch_param orig; 359 unsigned char hi_shift; 360 unsigned char low_shift; 361 }; 362 363 #define DEFINE_FETCH_bitfield(type) \ 364 static __kprobes void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs,\ 365 void *data, void *dest) \ 366 { \ 367 struct bitfield_fetch_param *bprm = data; \ 368 type buf = 0; \ 369 call_fetch(&bprm->orig, regs, &buf); \ 370 if (buf) { \ 371 buf <<= bprm->hi_shift; \ 372 buf >>= bprm->low_shift; \ 373 } \ 374 *(type *)dest = buf; \ 375 } 376 DEFINE_BASIC_FETCH_FUNCS(bitfield) 377 #define fetch_bitfield_string NULL 378 #define fetch_bitfield_string_size NULL 379 380 static __kprobes void 381 free_bitfield_fetch_param(struct bitfield_fetch_param *data) 382 { 383 /* 384 * Don't check the bitfield itself, because this must be the 385 * last fetch function. 386 */ 387 if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) 388 free_deref_fetch_param(data->orig.data); 389 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn)) 390 free_symbol_cache(data->orig.data); 391 kfree(data); 392 } 393 /* Default (unsigned long) fetch type */ 394 #define __DEFAULT_FETCH_TYPE(t) u##t 395 #define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t) 396 #define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG) 397 #define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE) 398 399 /* Fetch types */ 400 enum { 401 FETCH_MTD_reg = 0, 402 FETCH_MTD_stack, 403 FETCH_MTD_retval, 404 FETCH_MTD_memory, 405 FETCH_MTD_symbol, 406 FETCH_MTD_deref, 407 FETCH_MTD_bitfield, 408 FETCH_MTD_END, 409 }; 410 411 #define ASSIGN_FETCH_FUNC(method, type) \ 412 [FETCH_MTD_##method] = FETCH_FUNC_NAME(method, type) 413 414 #define __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, _fmttype) \ 415 {.name = _name, \ 416 .size = _size, \ 417 .is_signed = sign, \ 418 .print = PRINT_TYPE_FUNC_NAME(ptype), \ 419 .fmt = PRINT_TYPE_FMT_NAME(ptype), \ 420 .fmttype = _fmttype, \ 421 .fetch = { \ 422 ASSIGN_FETCH_FUNC(reg, ftype), \ 423 ASSIGN_FETCH_FUNC(stack, ftype), \ 424 ASSIGN_FETCH_FUNC(retval, ftype), \ 425 ASSIGN_FETCH_FUNC(memory, ftype), \ 426 ASSIGN_FETCH_FUNC(symbol, ftype), \ 427 ASSIGN_FETCH_FUNC(deref, ftype), \ 428 ASSIGN_FETCH_FUNC(bitfield, ftype), \ 429 } \ 430 } 431 432 #define ASSIGN_FETCH_TYPE(ptype, ftype, sign) \ 433 __ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #ptype) 434 435 #define FETCH_TYPE_STRING 0 436 #define FETCH_TYPE_STRSIZE 1 437 438 /* Fetch type information table */ 439 static const struct fetch_type { 440 const char *name; /* Name of type */ 441 size_t size; /* Byte size of type */ 442 int is_signed; /* Signed flag */ 443 print_type_func_t print; /* Print functions */ 444 const char *fmt; /* Fromat string */ 445 const char *fmttype; /* Name in format file */ 446 /* Fetch functions */ 447 fetch_func_t fetch[FETCH_MTD_END]; 448 } fetch_type_table[] = { 449 /* Special types */ 450 [FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string, 451 sizeof(u32), 1, "__data_loc char[]"), 452 [FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32, 453 string_size, sizeof(u32), 0, "u32"), 454 /* Basic types */ 455 ASSIGN_FETCH_TYPE(u8, u8, 0), 456 ASSIGN_FETCH_TYPE(u16, u16, 0), 457 ASSIGN_FETCH_TYPE(u32, u32, 0), 458 ASSIGN_FETCH_TYPE(u64, u64, 0), 459 ASSIGN_FETCH_TYPE(s8, u8, 1), 460 ASSIGN_FETCH_TYPE(s16, u16, 1), 461 ASSIGN_FETCH_TYPE(s32, u32, 1), 462 ASSIGN_FETCH_TYPE(s64, u64, 1), 463 }; 464 465 static const struct fetch_type *find_fetch_type(const char *type) 466 { 467 int i; 468 469 if (!type) 470 type = DEFAULT_FETCH_TYPE_STR; 471 472 /* Special case: bitfield */ 473 if (*type == 'b') { 474 unsigned long bs; 475 type = strchr(type, '/'); 476 if (!type) 477 goto fail; 478 type++; 479 if (strict_strtoul(type, 0, &bs)) 480 goto fail; 481 switch (bs) { 482 case 8: 483 return find_fetch_type("u8"); 484 case 16: 485 return find_fetch_type("u16"); 486 case 32: 487 return find_fetch_type("u32"); 488 case 64: 489 return find_fetch_type("u64"); 490 default: 491 goto fail; 492 } 493 } 494 495 for (i = 0; i < ARRAY_SIZE(fetch_type_table); i++) 496 if (strcmp(type, fetch_type_table[i].name) == 0) 497 return &fetch_type_table[i]; 498 fail: 499 return NULL; 500 } 501 502 /* Special function : only accept unsigned long */ 503 static __kprobes void fetch_stack_address(struct pt_regs *regs, 504 void *dummy, void *dest) 505 { 506 *(unsigned long *)dest = kernel_stack_pointer(regs); 507 } 508 509 static fetch_func_t get_fetch_size_function(const struct fetch_type *type, 510 fetch_func_t orig_fn) 511 { 512 int i; 513 514 if (type != &fetch_type_table[FETCH_TYPE_STRING]) 515 return NULL; /* Only string type needs size function */ 516 for (i = 0; i < FETCH_MTD_END; i++) 517 if (type->fetch[i] == orig_fn) 518 return fetch_type_table[FETCH_TYPE_STRSIZE].fetch[i]; 519 520 WARN_ON(1); /* This should not happen */ 521 return NULL; 522 } 523 524 /** 525 * Kprobe event core functions 526 */ 527 528 struct probe_arg { 529 struct fetch_param fetch; 530 struct fetch_param fetch_size; 531 unsigned int offset; /* Offset from argument entry */ 532 const char *name; /* Name of this argument */ 533 const char *comm; /* Command of this argument */ 534 const struct fetch_type *type; /* Type of this argument */ 535 }; 536 537 /* Flags for trace_probe */ 538 #define TP_FLAG_TRACE 1 539 #define TP_FLAG_PROFILE 2 540 541 struct trace_probe { 542 struct list_head list; 543 struct kretprobe rp; /* Use rp.kp for kprobe use */ 544 unsigned long nhit; 545 unsigned int flags; /* For TP_FLAG_* */ 546 const char *symbol; /* symbol name */ 547 struct ftrace_event_class class; 548 struct ftrace_event_call call; 549 ssize_t size; /* trace entry size */ 550 unsigned int nr_args; 551 struct probe_arg args[]; 552 }; 553 554 #define SIZEOF_TRACE_PROBE(n) \ 555 (offsetof(struct trace_probe, args) + \ 556 (sizeof(struct probe_arg) * (n))) 557 558 559 static __kprobes int probe_is_return(struct trace_probe *tp) 560 { 561 return tp->rp.handler != NULL; 562 } 563 564 static __kprobes const char *probe_symbol(struct trace_probe *tp) 565 { 566 return tp->symbol ? tp->symbol : "unknown"; 567 } 568 569 static int register_probe_event(struct trace_probe *tp); 570 static void unregister_probe_event(struct trace_probe *tp); 571 572 static DEFINE_MUTEX(probe_lock); 573 static LIST_HEAD(probe_list); 574 575 static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs); 576 static int kretprobe_dispatcher(struct kretprobe_instance *ri, 577 struct pt_regs *regs); 578 579 /* Check the name is good for event/group/fields */ 580 static int is_good_name(const char *name) 581 { 582 if (!isalpha(*name) && *name != '_') 583 return 0; 584 while (*++name != '\0') { 585 if (!isalpha(*name) && !isdigit(*name) && *name != '_') 586 return 0; 587 } 588 return 1; 589 } 590 591 /* 592 * Allocate new trace_probe and initialize it (including kprobes). 593 */ 594 static struct trace_probe *alloc_trace_probe(const char *group, 595 const char *event, 596 void *addr, 597 const char *symbol, 598 unsigned long offs, 599 int nargs, int is_return) 600 { 601 struct trace_probe *tp; 602 int ret = -ENOMEM; 603 604 tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL); 605 if (!tp) 606 return ERR_PTR(ret); 607 608 if (symbol) { 609 tp->symbol = kstrdup(symbol, GFP_KERNEL); 610 if (!tp->symbol) 611 goto error; 612 tp->rp.kp.symbol_name = tp->symbol; 613 tp->rp.kp.offset = offs; 614 } else 615 tp->rp.kp.addr = addr; 616 617 if (is_return) 618 tp->rp.handler = kretprobe_dispatcher; 619 else 620 tp->rp.kp.pre_handler = kprobe_dispatcher; 621 622 if (!event || !is_good_name(event)) { 623 ret = -EINVAL; 624 goto error; 625 } 626 627 tp->call.class = &tp->class; 628 tp->call.name = kstrdup(event, GFP_KERNEL); 629 if (!tp->call.name) 630 goto error; 631 632 if (!group || !is_good_name(group)) { 633 ret = -EINVAL; 634 goto error; 635 } 636 637 tp->class.system = kstrdup(group, GFP_KERNEL); 638 if (!tp->class.system) 639 goto error; 640 641 INIT_LIST_HEAD(&tp->list); 642 return tp; 643 error: 644 kfree(tp->call.name); 645 kfree(tp->symbol); 646 kfree(tp); 647 return ERR_PTR(ret); 648 } 649 650 static void free_probe_arg(struct probe_arg *arg) 651 { 652 if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn)) 653 free_bitfield_fetch_param(arg->fetch.data); 654 else if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn)) 655 free_deref_fetch_param(arg->fetch.data); 656 else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn)) 657 free_symbol_cache(arg->fetch.data); 658 kfree(arg->name); 659 kfree(arg->comm); 660 } 661 662 static void free_trace_probe(struct trace_probe *tp) 663 { 664 int i; 665 666 for (i = 0; i < tp->nr_args; i++) 667 free_probe_arg(&tp->args[i]); 668 669 kfree(tp->call.class->system); 670 kfree(tp->call.name); 671 kfree(tp->symbol); 672 kfree(tp); 673 } 674 675 static struct trace_probe *find_probe_event(const char *event, 676 const char *group) 677 { 678 struct trace_probe *tp; 679 680 list_for_each_entry(tp, &probe_list, list) 681 if (strcmp(tp->call.name, event) == 0 && 682 strcmp(tp->call.class->system, group) == 0) 683 return tp; 684 return NULL; 685 } 686 687 /* Unregister a trace_probe and probe_event: call with locking probe_lock */ 688 static void unregister_trace_probe(struct trace_probe *tp) 689 { 690 if (probe_is_return(tp)) 691 unregister_kretprobe(&tp->rp); 692 else 693 unregister_kprobe(&tp->rp.kp); 694 list_del(&tp->list); 695 unregister_probe_event(tp); 696 } 697 698 /* Register a trace_probe and probe_event */ 699 static int register_trace_probe(struct trace_probe *tp) 700 { 701 struct trace_probe *old_tp; 702 int ret; 703 704 mutex_lock(&probe_lock); 705 706 /* register as an event */ 707 old_tp = find_probe_event(tp->call.name, tp->call.class->system); 708 if (old_tp) { 709 /* delete old event */ 710 unregister_trace_probe(old_tp); 711 free_trace_probe(old_tp); 712 } 713 ret = register_probe_event(tp); 714 if (ret) { 715 pr_warning("Failed to register probe event(%d)\n", ret); 716 goto end; 717 } 718 719 tp->rp.kp.flags |= KPROBE_FLAG_DISABLED; 720 if (probe_is_return(tp)) 721 ret = register_kretprobe(&tp->rp); 722 else 723 ret = register_kprobe(&tp->rp.kp); 724 725 if (ret) { 726 pr_warning("Could not insert probe(%d)\n", ret); 727 if (ret == -EILSEQ) { 728 pr_warning("Probing address(0x%p) is not an " 729 "instruction boundary.\n", 730 tp->rp.kp.addr); 731 ret = -EINVAL; 732 } 733 unregister_probe_event(tp); 734 } else 735 list_add_tail(&tp->list, &probe_list); 736 end: 737 mutex_unlock(&probe_lock); 738 return ret; 739 } 740 741 /* Split symbol and offset. */ 742 static int split_symbol_offset(char *symbol, unsigned long *offset) 743 { 744 char *tmp; 745 int ret; 746 747 if (!offset) 748 return -EINVAL; 749 750 tmp = strchr(symbol, '+'); 751 if (tmp) { 752 /* skip sign because strict_strtol doesn't accept '+' */ 753 ret = strict_strtoul(tmp + 1, 0, offset); 754 if (ret) 755 return ret; 756 *tmp = '\0'; 757 } else 758 *offset = 0; 759 return 0; 760 } 761 762 #define PARAM_MAX_ARGS 16 763 #define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long)) 764 765 static int parse_probe_vars(char *arg, const struct fetch_type *t, 766 struct fetch_param *f, int is_return) 767 { 768 int ret = 0; 769 unsigned long param; 770 771 if (strcmp(arg, "retval") == 0) { 772 if (is_return) 773 f->fn = t->fetch[FETCH_MTD_retval]; 774 else 775 ret = -EINVAL; 776 } else if (strncmp(arg, "stack", 5) == 0) { 777 if (arg[5] == '\0') { 778 if (strcmp(t->name, DEFAULT_FETCH_TYPE_STR) == 0) 779 f->fn = fetch_stack_address; 780 else 781 ret = -EINVAL; 782 } else if (isdigit(arg[5])) { 783 ret = strict_strtoul(arg + 5, 10, ¶m); 784 if (ret || param > PARAM_MAX_STACK) 785 ret = -EINVAL; 786 else { 787 f->fn = t->fetch[FETCH_MTD_stack]; 788 f->data = (void *)param; 789 } 790 } else 791 ret = -EINVAL; 792 } else 793 ret = -EINVAL; 794 return ret; 795 } 796 797 /* Recursive argument parser */ 798 static int __parse_probe_arg(char *arg, const struct fetch_type *t, 799 struct fetch_param *f, int is_return) 800 { 801 int ret = 0; 802 unsigned long param; 803 long offset; 804 char *tmp; 805 806 switch (arg[0]) { 807 case '$': 808 ret = parse_probe_vars(arg + 1, t, f, is_return); 809 break; 810 case '%': /* named register */ 811 ret = regs_query_register_offset(arg + 1); 812 if (ret >= 0) { 813 f->fn = t->fetch[FETCH_MTD_reg]; 814 f->data = (void *)(unsigned long)ret; 815 ret = 0; 816 } 817 break; 818 case '@': /* memory or symbol */ 819 if (isdigit(arg[1])) { 820 ret = strict_strtoul(arg + 1, 0, ¶m); 821 if (ret) 822 break; 823 f->fn = t->fetch[FETCH_MTD_memory]; 824 f->data = (void *)param; 825 } else { 826 ret = split_symbol_offset(arg + 1, &offset); 827 if (ret) 828 break; 829 f->data = alloc_symbol_cache(arg + 1, offset); 830 if (f->data) 831 f->fn = t->fetch[FETCH_MTD_symbol]; 832 } 833 break; 834 case '+': /* deref memory */ 835 arg++; /* Skip '+', because strict_strtol() rejects it. */ 836 case '-': 837 tmp = strchr(arg, '('); 838 if (!tmp) 839 break; 840 *tmp = '\0'; 841 ret = strict_strtol(arg, 0, &offset); 842 if (ret) 843 break; 844 arg = tmp + 1; 845 tmp = strrchr(arg, ')'); 846 if (tmp) { 847 struct deref_fetch_param *dprm; 848 const struct fetch_type *t2 = find_fetch_type(NULL); 849 *tmp = '\0'; 850 dprm = kzalloc(sizeof(struct deref_fetch_param), 851 GFP_KERNEL); 852 if (!dprm) 853 return -ENOMEM; 854 dprm->offset = offset; 855 ret = __parse_probe_arg(arg, t2, &dprm->orig, 856 is_return); 857 if (ret) 858 kfree(dprm); 859 else { 860 f->fn = t->fetch[FETCH_MTD_deref]; 861 f->data = (void *)dprm; 862 } 863 } 864 break; 865 } 866 if (!ret && !f->fn) { /* Parsed, but do not find fetch method */ 867 pr_info("%s type has no corresponding fetch method.\n", 868 t->name); 869 ret = -EINVAL; 870 } 871 return ret; 872 } 873 874 #define BYTES_TO_BITS(nb) ((BITS_PER_LONG * (nb)) / sizeof(long)) 875 876 /* Bitfield type needs to be parsed into a fetch function */ 877 static int __parse_bitfield_probe_arg(const char *bf, 878 const struct fetch_type *t, 879 struct fetch_param *f) 880 { 881 struct bitfield_fetch_param *bprm; 882 unsigned long bw, bo; 883 char *tail; 884 885 if (*bf != 'b') 886 return 0; 887 888 bprm = kzalloc(sizeof(*bprm), GFP_KERNEL); 889 if (!bprm) 890 return -ENOMEM; 891 bprm->orig = *f; 892 f->fn = t->fetch[FETCH_MTD_bitfield]; 893 f->data = (void *)bprm; 894 895 bw = simple_strtoul(bf + 1, &tail, 0); /* Use simple one */ 896 if (bw == 0 || *tail != '@') 897 return -EINVAL; 898 899 bf = tail + 1; 900 bo = simple_strtoul(bf, &tail, 0); 901 if (tail == bf || *tail != '/') 902 return -EINVAL; 903 904 bprm->hi_shift = BYTES_TO_BITS(t->size) - (bw + bo); 905 bprm->low_shift = bprm->hi_shift + bo; 906 return (BYTES_TO_BITS(t->size) < (bw + bo)) ? -EINVAL : 0; 907 } 908 909 /* String length checking wrapper */ 910 static int parse_probe_arg(char *arg, struct trace_probe *tp, 911 struct probe_arg *parg, int is_return) 912 { 913 const char *t; 914 int ret; 915 916 if (strlen(arg) > MAX_ARGSTR_LEN) { 917 pr_info("Argument is too long.: %s\n", arg); 918 return -ENOSPC; 919 } 920 parg->comm = kstrdup(arg, GFP_KERNEL); 921 if (!parg->comm) { 922 pr_info("Failed to allocate memory for command '%s'.\n", arg); 923 return -ENOMEM; 924 } 925 t = strchr(parg->comm, ':'); 926 if (t) { 927 arg[t - parg->comm] = '\0'; 928 t++; 929 } 930 parg->type = find_fetch_type(t); 931 if (!parg->type) { 932 pr_info("Unsupported type: %s\n", t); 933 return -EINVAL; 934 } 935 parg->offset = tp->size; 936 tp->size += parg->type->size; 937 ret = __parse_probe_arg(arg, parg->type, &parg->fetch, is_return); 938 if (ret >= 0 && t != NULL) 939 ret = __parse_bitfield_probe_arg(t, parg->type, &parg->fetch); 940 if (ret >= 0) { 941 parg->fetch_size.fn = get_fetch_size_function(parg->type, 942 parg->fetch.fn); 943 parg->fetch_size.data = parg->fetch.data; 944 } 945 return ret; 946 } 947 948 /* Return 1 if name is reserved or already used by another argument */ 949 static int conflict_field_name(const char *name, 950 struct probe_arg *args, int narg) 951 { 952 int i; 953 for (i = 0; i < ARRAY_SIZE(reserved_field_names); i++) 954 if (strcmp(reserved_field_names[i], name) == 0) 955 return 1; 956 for (i = 0; i < narg; i++) 957 if (strcmp(args[i].name, name) == 0) 958 return 1; 959 return 0; 960 } 961 962 static int create_trace_probe(int argc, char **argv) 963 { 964 /* 965 * Argument syntax: 966 * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS] 967 * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS] 968 * Fetch args: 969 * $retval : fetch return value 970 * $stack : fetch stack address 971 * $stackN : fetch Nth of stack (N:0-) 972 * @ADDR : fetch memory at ADDR (ADDR should be in kernel) 973 * @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol) 974 * %REG : fetch register REG 975 * Dereferencing memory fetch: 976 * +|-offs(ARG) : fetch memory at ARG +|- offs address. 977 * Alias name of args: 978 * NAME=FETCHARG : set NAME as alias of FETCHARG. 979 * Type of args: 980 * FETCHARG:TYPE : use TYPE instead of unsigned long. 981 */ 982 struct trace_probe *tp; 983 int i, ret = 0; 984 int is_return = 0, is_delete = 0; 985 char *symbol = NULL, *event = NULL, *group = NULL; 986 char *arg; 987 unsigned long offset = 0; 988 void *addr = NULL; 989 char buf[MAX_EVENT_NAME_LEN]; 990 991 /* argc must be >= 1 */ 992 if (argv[0][0] == 'p') 993 is_return = 0; 994 else if (argv[0][0] == 'r') 995 is_return = 1; 996 else if (argv[0][0] == '-') 997 is_delete = 1; 998 else { 999 pr_info("Probe definition must be started with 'p', 'r' or" 1000 " '-'.\n"); 1001 return -EINVAL; 1002 } 1003 1004 if (argv[0][1] == ':') { 1005 event = &argv[0][2]; 1006 if (strchr(event, '/')) { 1007 group = event; 1008 event = strchr(group, '/') + 1; 1009 event[-1] = '\0'; 1010 if (strlen(group) == 0) { 1011 pr_info("Group name is not specified\n"); 1012 return -EINVAL; 1013 } 1014 } 1015 if (strlen(event) == 0) { 1016 pr_info("Event name is not specified\n"); 1017 return -EINVAL; 1018 } 1019 } 1020 if (!group) 1021 group = KPROBE_EVENT_SYSTEM; 1022 1023 if (is_delete) { 1024 if (!event) { 1025 pr_info("Delete command needs an event name.\n"); 1026 return -EINVAL; 1027 } 1028 mutex_lock(&probe_lock); 1029 tp = find_probe_event(event, group); 1030 if (!tp) { 1031 mutex_unlock(&probe_lock); 1032 pr_info("Event %s/%s doesn't exist.\n", group, event); 1033 return -ENOENT; 1034 } 1035 /* delete an event */ 1036 unregister_trace_probe(tp); 1037 free_trace_probe(tp); 1038 mutex_unlock(&probe_lock); 1039 return 0; 1040 } 1041 1042 if (argc < 2) { 1043 pr_info("Probe point is not specified.\n"); 1044 return -EINVAL; 1045 } 1046 if (isdigit(argv[1][0])) { 1047 if (is_return) { 1048 pr_info("Return probe point must be a symbol.\n"); 1049 return -EINVAL; 1050 } 1051 /* an address specified */ 1052 ret = strict_strtoul(&argv[1][0], 0, (unsigned long *)&addr); 1053 if (ret) { 1054 pr_info("Failed to parse address.\n"); 1055 return ret; 1056 } 1057 } else { 1058 /* a symbol specified */ 1059 symbol = argv[1]; 1060 /* TODO: support .init module functions */ 1061 ret = split_symbol_offset(symbol, &offset); 1062 if (ret) { 1063 pr_info("Failed to parse symbol.\n"); 1064 return ret; 1065 } 1066 if (offset && is_return) { 1067 pr_info("Return probe must be used without offset.\n"); 1068 return -EINVAL; 1069 } 1070 } 1071 argc -= 2; argv += 2; 1072 1073 /* setup a probe */ 1074 if (!event) { 1075 /* Make a new event name */ 1076 if (symbol) 1077 snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld", 1078 is_return ? 'r' : 'p', symbol, offset); 1079 else 1080 snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p", 1081 is_return ? 'r' : 'p', addr); 1082 event = buf; 1083 } 1084 tp = alloc_trace_probe(group, event, addr, symbol, offset, argc, 1085 is_return); 1086 if (IS_ERR(tp)) { 1087 pr_info("Failed to allocate trace_probe.(%d)\n", 1088 (int)PTR_ERR(tp)); 1089 return PTR_ERR(tp); 1090 } 1091 1092 /* parse arguments */ 1093 ret = 0; 1094 for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) { 1095 /* Increment count for freeing args in error case */ 1096 tp->nr_args++; 1097 1098 /* Parse argument name */ 1099 arg = strchr(argv[i], '='); 1100 if (arg) { 1101 *arg++ = '\0'; 1102 tp->args[i].name = kstrdup(argv[i], GFP_KERNEL); 1103 } else { 1104 arg = argv[i]; 1105 /* If argument name is omitted, set "argN" */ 1106 snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1); 1107 tp->args[i].name = kstrdup(buf, GFP_KERNEL); 1108 } 1109 1110 if (!tp->args[i].name) { 1111 pr_info("Failed to allocate argument[%d] name.\n", i); 1112 ret = -ENOMEM; 1113 goto error; 1114 } 1115 1116 if (!is_good_name(tp->args[i].name)) { 1117 pr_info("Invalid argument[%d] name: %s\n", 1118 i, tp->args[i].name); 1119 ret = -EINVAL; 1120 goto error; 1121 } 1122 1123 if (conflict_field_name(tp->args[i].name, tp->args, i)) { 1124 pr_info("Argument[%d] name '%s' conflicts with " 1125 "another field.\n", i, argv[i]); 1126 ret = -EINVAL; 1127 goto error; 1128 } 1129 1130 /* Parse fetch argument */ 1131 ret = parse_probe_arg(arg, tp, &tp->args[i], is_return); 1132 if (ret) { 1133 pr_info("Parse error at argument[%d]. (%d)\n", i, ret); 1134 goto error; 1135 } 1136 } 1137 1138 ret = register_trace_probe(tp); 1139 if (ret) 1140 goto error; 1141 return 0; 1142 1143 error: 1144 free_trace_probe(tp); 1145 return ret; 1146 } 1147 1148 static void cleanup_all_probes(void) 1149 { 1150 struct trace_probe *tp; 1151 1152 mutex_lock(&probe_lock); 1153 /* TODO: Use batch unregistration */ 1154 while (!list_empty(&probe_list)) { 1155 tp = list_entry(probe_list.next, struct trace_probe, list); 1156 unregister_trace_probe(tp); 1157 free_trace_probe(tp); 1158 } 1159 mutex_unlock(&probe_lock); 1160 } 1161 1162 1163 /* Probes listing interfaces */ 1164 static void *probes_seq_start(struct seq_file *m, loff_t *pos) 1165 { 1166 mutex_lock(&probe_lock); 1167 return seq_list_start(&probe_list, *pos); 1168 } 1169 1170 static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos) 1171 { 1172 return seq_list_next(v, &probe_list, pos); 1173 } 1174 1175 static void probes_seq_stop(struct seq_file *m, void *v) 1176 { 1177 mutex_unlock(&probe_lock); 1178 } 1179 1180 static int probes_seq_show(struct seq_file *m, void *v) 1181 { 1182 struct trace_probe *tp = v; 1183 int i; 1184 1185 seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p'); 1186 seq_printf(m, ":%s/%s", tp->call.class->system, tp->call.name); 1187 1188 if (!tp->symbol) 1189 seq_printf(m, " 0x%p", tp->rp.kp.addr); 1190 else if (tp->rp.kp.offset) 1191 seq_printf(m, " %s+%u", probe_symbol(tp), tp->rp.kp.offset); 1192 else 1193 seq_printf(m, " %s", probe_symbol(tp)); 1194 1195 for (i = 0; i < tp->nr_args; i++) 1196 seq_printf(m, " %s=%s", tp->args[i].name, tp->args[i].comm); 1197 seq_printf(m, "\n"); 1198 1199 return 0; 1200 } 1201 1202 static const struct seq_operations probes_seq_op = { 1203 .start = probes_seq_start, 1204 .next = probes_seq_next, 1205 .stop = probes_seq_stop, 1206 .show = probes_seq_show 1207 }; 1208 1209 static int probes_open(struct inode *inode, struct file *file) 1210 { 1211 if ((file->f_mode & FMODE_WRITE) && 1212 (file->f_flags & O_TRUNC)) 1213 cleanup_all_probes(); 1214 1215 return seq_open(file, &probes_seq_op); 1216 } 1217 1218 static int command_trace_probe(const char *buf) 1219 { 1220 char **argv; 1221 int argc = 0, ret = 0; 1222 1223 argv = argv_split(GFP_KERNEL, buf, &argc); 1224 if (!argv) 1225 return -ENOMEM; 1226 1227 if (argc) 1228 ret = create_trace_probe(argc, argv); 1229 1230 argv_free(argv); 1231 return ret; 1232 } 1233 1234 #define WRITE_BUFSIZE 4096 1235 1236 static ssize_t probes_write(struct file *file, const char __user *buffer, 1237 size_t count, loff_t *ppos) 1238 { 1239 char *kbuf, *tmp; 1240 int ret; 1241 size_t done; 1242 size_t size; 1243 1244 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL); 1245 if (!kbuf) 1246 return -ENOMEM; 1247 1248 ret = done = 0; 1249 while (done < count) { 1250 size = count - done; 1251 if (size >= WRITE_BUFSIZE) 1252 size = WRITE_BUFSIZE - 1; 1253 if (copy_from_user(kbuf, buffer + done, size)) { 1254 ret = -EFAULT; 1255 goto out; 1256 } 1257 kbuf[size] = '\0'; 1258 tmp = strchr(kbuf, '\n'); 1259 if (tmp) { 1260 *tmp = '\0'; 1261 size = tmp - kbuf + 1; 1262 } else if (done + size < count) { 1263 pr_warning("Line length is too long: " 1264 "Should be less than %d.", WRITE_BUFSIZE); 1265 ret = -EINVAL; 1266 goto out; 1267 } 1268 done += size; 1269 /* Remove comments */ 1270 tmp = strchr(kbuf, '#'); 1271 if (tmp) 1272 *tmp = '\0'; 1273 1274 ret = command_trace_probe(kbuf); 1275 if (ret) 1276 goto out; 1277 } 1278 ret = done; 1279 out: 1280 kfree(kbuf); 1281 return ret; 1282 } 1283 1284 static const struct file_operations kprobe_events_ops = { 1285 .owner = THIS_MODULE, 1286 .open = probes_open, 1287 .read = seq_read, 1288 .llseek = seq_lseek, 1289 .release = seq_release, 1290 .write = probes_write, 1291 }; 1292 1293 /* Probes profiling interfaces */ 1294 static int probes_profile_seq_show(struct seq_file *m, void *v) 1295 { 1296 struct trace_probe *tp = v; 1297 1298 seq_printf(m, " %-44s %15lu %15lu\n", tp->call.name, tp->nhit, 1299 tp->rp.kp.nmissed); 1300 1301 return 0; 1302 } 1303 1304 static const struct seq_operations profile_seq_op = { 1305 .start = probes_seq_start, 1306 .next = probes_seq_next, 1307 .stop = probes_seq_stop, 1308 .show = probes_profile_seq_show 1309 }; 1310 1311 static int profile_open(struct inode *inode, struct file *file) 1312 { 1313 return seq_open(file, &profile_seq_op); 1314 } 1315 1316 static const struct file_operations kprobe_profile_ops = { 1317 .owner = THIS_MODULE, 1318 .open = profile_open, 1319 .read = seq_read, 1320 .llseek = seq_lseek, 1321 .release = seq_release, 1322 }; 1323 1324 /* Sum up total data length for dynamic arraies (strings) */ 1325 static __kprobes int __get_data_size(struct trace_probe *tp, 1326 struct pt_regs *regs) 1327 { 1328 int i, ret = 0; 1329 u32 len; 1330 1331 for (i = 0; i < tp->nr_args; i++) 1332 if (unlikely(tp->args[i].fetch_size.fn)) { 1333 call_fetch(&tp->args[i].fetch_size, regs, &len); 1334 ret += len; 1335 } 1336 1337 return ret; 1338 } 1339 1340 /* Store the value of each argument */ 1341 static __kprobes void store_trace_args(int ent_size, struct trace_probe *tp, 1342 struct pt_regs *regs, 1343 u8 *data, int maxlen) 1344 { 1345 int i; 1346 u32 end = tp->size; 1347 u32 *dl; /* Data (relative) location */ 1348 1349 for (i = 0; i < tp->nr_args; i++) { 1350 if (unlikely(tp->args[i].fetch_size.fn)) { 1351 /* 1352 * First, we set the relative location and 1353 * maximum data length to *dl 1354 */ 1355 dl = (u32 *)(data + tp->args[i].offset); 1356 *dl = make_data_rloc(maxlen, end - tp->args[i].offset); 1357 /* Then try to fetch string or dynamic array data */ 1358 call_fetch(&tp->args[i].fetch, regs, dl); 1359 /* Reduce maximum length */ 1360 end += get_rloc_len(*dl); 1361 maxlen -= get_rloc_len(*dl); 1362 /* Trick here, convert data_rloc to data_loc */ 1363 *dl = convert_rloc_to_loc(*dl, 1364 ent_size + tp->args[i].offset); 1365 } else 1366 /* Just fetching data normally */ 1367 call_fetch(&tp->args[i].fetch, regs, 1368 data + tp->args[i].offset); 1369 } 1370 } 1371 1372 /* Kprobe handler */ 1373 static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) 1374 { 1375 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); 1376 struct kprobe_trace_entry_head *entry; 1377 struct ring_buffer_event *event; 1378 struct ring_buffer *buffer; 1379 int size, dsize, pc; 1380 unsigned long irq_flags; 1381 struct ftrace_event_call *call = &tp->call; 1382 1383 tp->nhit++; 1384 1385 local_save_flags(irq_flags); 1386 pc = preempt_count(); 1387 1388 dsize = __get_data_size(tp, regs); 1389 size = sizeof(*entry) + tp->size + dsize; 1390 1391 event = trace_current_buffer_lock_reserve(&buffer, call->event.type, 1392 size, irq_flags, pc); 1393 if (!event) 1394 return; 1395 1396 entry = ring_buffer_event_data(event); 1397 entry->ip = (unsigned long)kp->addr; 1398 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); 1399 1400 if (!filter_current_check_discard(buffer, call, entry, event)) 1401 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); 1402 } 1403 1404 /* Kretprobe handler */ 1405 static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri, 1406 struct pt_regs *regs) 1407 { 1408 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); 1409 struct kretprobe_trace_entry_head *entry; 1410 struct ring_buffer_event *event; 1411 struct ring_buffer *buffer; 1412 int size, pc, dsize; 1413 unsigned long irq_flags; 1414 struct ftrace_event_call *call = &tp->call; 1415 1416 local_save_flags(irq_flags); 1417 pc = preempt_count(); 1418 1419 dsize = __get_data_size(tp, regs); 1420 size = sizeof(*entry) + tp->size + dsize; 1421 1422 event = trace_current_buffer_lock_reserve(&buffer, call->event.type, 1423 size, irq_flags, pc); 1424 if (!event) 1425 return; 1426 1427 entry = ring_buffer_event_data(event); 1428 entry->func = (unsigned long)tp->rp.kp.addr; 1429 entry->ret_ip = (unsigned long)ri->ret_addr; 1430 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); 1431 1432 if (!filter_current_check_discard(buffer, call, entry, event)) 1433 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); 1434 } 1435 1436 /* Event entry printers */ 1437 enum print_line_t 1438 print_kprobe_event(struct trace_iterator *iter, int flags, 1439 struct trace_event *event) 1440 { 1441 struct kprobe_trace_entry_head *field; 1442 struct trace_seq *s = &iter->seq; 1443 struct trace_probe *tp; 1444 u8 *data; 1445 int i; 1446 1447 field = (struct kprobe_trace_entry_head *)iter->ent; 1448 tp = container_of(event, struct trace_probe, call.event); 1449 1450 if (!trace_seq_printf(s, "%s: (", tp->call.name)) 1451 goto partial; 1452 1453 if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET)) 1454 goto partial; 1455 1456 if (!trace_seq_puts(s, ")")) 1457 goto partial; 1458 1459 data = (u8 *)&field[1]; 1460 for (i = 0; i < tp->nr_args; i++) 1461 if (!tp->args[i].type->print(s, tp->args[i].name, 1462 data + tp->args[i].offset, field)) 1463 goto partial; 1464 1465 if (!trace_seq_puts(s, "\n")) 1466 goto partial; 1467 1468 return TRACE_TYPE_HANDLED; 1469 partial: 1470 return TRACE_TYPE_PARTIAL_LINE; 1471 } 1472 1473 enum print_line_t 1474 print_kretprobe_event(struct trace_iterator *iter, int flags, 1475 struct trace_event *event) 1476 { 1477 struct kretprobe_trace_entry_head *field; 1478 struct trace_seq *s = &iter->seq; 1479 struct trace_probe *tp; 1480 u8 *data; 1481 int i; 1482 1483 field = (struct kretprobe_trace_entry_head *)iter->ent; 1484 tp = container_of(event, struct trace_probe, call.event); 1485 1486 if (!trace_seq_printf(s, "%s: (", tp->call.name)) 1487 goto partial; 1488 1489 if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET)) 1490 goto partial; 1491 1492 if (!trace_seq_puts(s, " <- ")) 1493 goto partial; 1494 1495 if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET)) 1496 goto partial; 1497 1498 if (!trace_seq_puts(s, ")")) 1499 goto partial; 1500 1501 data = (u8 *)&field[1]; 1502 for (i = 0; i < tp->nr_args; i++) 1503 if (!tp->args[i].type->print(s, tp->args[i].name, 1504 data + tp->args[i].offset, field)) 1505 goto partial; 1506 1507 if (!trace_seq_puts(s, "\n")) 1508 goto partial; 1509 1510 return TRACE_TYPE_HANDLED; 1511 partial: 1512 return TRACE_TYPE_PARTIAL_LINE; 1513 } 1514 1515 static int probe_event_enable(struct ftrace_event_call *call) 1516 { 1517 struct trace_probe *tp = (struct trace_probe *)call->data; 1518 1519 tp->flags |= TP_FLAG_TRACE; 1520 if (probe_is_return(tp)) 1521 return enable_kretprobe(&tp->rp); 1522 else 1523 return enable_kprobe(&tp->rp.kp); 1524 } 1525 1526 static void probe_event_disable(struct ftrace_event_call *call) 1527 { 1528 struct trace_probe *tp = (struct trace_probe *)call->data; 1529 1530 tp->flags &= ~TP_FLAG_TRACE; 1531 if (!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE))) { 1532 if (probe_is_return(tp)) 1533 disable_kretprobe(&tp->rp); 1534 else 1535 disable_kprobe(&tp->rp.kp); 1536 } 1537 } 1538 1539 #undef DEFINE_FIELD 1540 #define DEFINE_FIELD(type, item, name, is_signed) \ 1541 do { \ 1542 ret = trace_define_field(event_call, #type, name, \ 1543 offsetof(typeof(field), item), \ 1544 sizeof(field.item), is_signed, \ 1545 FILTER_OTHER); \ 1546 if (ret) \ 1547 return ret; \ 1548 } while (0) 1549 1550 static int kprobe_event_define_fields(struct ftrace_event_call *event_call) 1551 { 1552 int ret, i; 1553 struct kprobe_trace_entry_head field; 1554 struct trace_probe *tp = (struct trace_probe *)event_call->data; 1555 1556 DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0); 1557 /* Set argument names as fields */ 1558 for (i = 0; i < tp->nr_args; i++) { 1559 ret = trace_define_field(event_call, tp->args[i].type->fmttype, 1560 tp->args[i].name, 1561 sizeof(field) + tp->args[i].offset, 1562 tp->args[i].type->size, 1563 tp->args[i].type->is_signed, 1564 FILTER_OTHER); 1565 if (ret) 1566 return ret; 1567 } 1568 return 0; 1569 } 1570 1571 static int kretprobe_event_define_fields(struct ftrace_event_call *event_call) 1572 { 1573 int ret, i; 1574 struct kretprobe_trace_entry_head field; 1575 struct trace_probe *tp = (struct trace_probe *)event_call->data; 1576 1577 DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0); 1578 DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0); 1579 /* Set argument names as fields */ 1580 for (i = 0; i < tp->nr_args; i++) { 1581 ret = trace_define_field(event_call, tp->args[i].type->fmttype, 1582 tp->args[i].name, 1583 sizeof(field) + tp->args[i].offset, 1584 tp->args[i].type->size, 1585 tp->args[i].type->is_signed, 1586 FILTER_OTHER); 1587 if (ret) 1588 return ret; 1589 } 1590 return 0; 1591 } 1592 1593 static int __set_print_fmt(struct trace_probe *tp, char *buf, int len) 1594 { 1595 int i; 1596 int pos = 0; 1597 1598 const char *fmt, *arg; 1599 1600 if (!probe_is_return(tp)) { 1601 fmt = "(%lx)"; 1602 arg = "REC->" FIELD_STRING_IP; 1603 } else { 1604 fmt = "(%lx <- %lx)"; 1605 arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP; 1606 } 1607 1608 /* When len=0, we just calculate the needed length */ 1609 #define LEN_OR_ZERO (len ? len - pos : 0) 1610 1611 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt); 1612 1613 for (i = 0; i < tp->nr_args; i++) { 1614 pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s", 1615 tp->args[i].name, tp->args[i].type->fmt); 1616 } 1617 1618 pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg); 1619 1620 for (i = 0; i < tp->nr_args; i++) { 1621 if (strcmp(tp->args[i].type->name, "string") == 0) 1622 pos += snprintf(buf + pos, LEN_OR_ZERO, 1623 ", __get_str(%s)", 1624 tp->args[i].name); 1625 else 1626 pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s", 1627 tp->args[i].name); 1628 } 1629 1630 #undef LEN_OR_ZERO 1631 1632 /* return the length of print_fmt */ 1633 return pos; 1634 } 1635 1636 static int set_print_fmt(struct trace_probe *tp) 1637 { 1638 int len; 1639 char *print_fmt; 1640 1641 /* First: called with 0 length to calculate the needed length */ 1642 len = __set_print_fmt(tp, NULL, 0); 1643 print_fmt = kmalloc(len + 1, GFP_KERNEL); 1644 if (!print_fmt) 1645 return -ENOMEM; 1646 1647 /* Second: actually write the @print_fmt */ 1648 __set_print_fmt(tp, print_fmt, len + 1); 1649 tp->call.print_fmt = print_fmt; 1650 1651 return 0; 1652 } 1653 1654 #ifdef CONFIG_PERF_EVENTS 1655 1656 /* Kprobe profile handler */ 1657 static __kprobes void kprobe_perf_func(struct kprobe *kp, 1658 struct pt_regs *regs) 1659 { 1660 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); 1661 struct ftrace_event_call *call = &tp->call; 1662 struct kprobe_trace_entry_head *entry; 1663 struct hlist_head *head; 1664 int size, __size, dsize; 1665 int rctx; 1666 1667 dsize = __get_data_size(tp, regs); 1668 __size = sizeof(*entry) + tp->size + dsize; 1669 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 1670 size -= sizeof(u32); 1671 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, 1672 "profile buffer not large enough")) 1673 return; 1674 1675 entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); 1676 if (!entry) 1677 return; 1678 1679 entry->ip = (unsigned long)kp->addr; 1680 memset(&entry[1], 0, dsize); 1681 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); 1682 1683 head = this_cpu_ptr(call->perf_events); 1684 perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head); 1685 } 1686 1687 /* Kretprobe profile handler */ 1688 static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri, 1689 struct pt_regs *regs) 1690 { 1691 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); 1692 struct ftrace_event_call *call = &tp->call; 1693 struct kretprobe_trace_entry_head *entry; 1694 struct hlist_head *head; 1695 int size, __size, dsize; 1696 int rctx; 1697 1698 dsize = __get_data_size(tp, regs); 1699 __size = sizeof(*entry) + tp->size + dsize; 1700 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 1701 size -= sizeof(u32); 1702 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, 1703 "profile buffer not large enough")) 1704 return; 1705 1706 entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); 1707 if (!entry) 1708 return; 1709 1710 entry->func = (unsigned long)tp->rp.kp.addr; 1711 entry->ret_ip = (unsigned long)ri->ret_addr; 1712 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); 1713 1714 head = this_cpu_ptr(call->perf_events); 1715 perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head); 1716 } 1717 1718 static int probe_perf_enable(struct ftrace_event_call *call) 1719 { 1720 struct trace_probe *tp = (struct trace_probe *)call->data; 1721 1722 tp->flags |= TP_FLAG_PROFILE; 1723 1724 if (probe_is_return(tp)) 1725 return enable_kretprobe(&tp->rp); 1726 else 1727 return enable_kprobe(&tp->rp.kp); 1728 } 1729 1730 static void probe_perf_disable(struct ftrace_event_call *call) 1731 { 1732 struct trace_probe *tp = (struct trace_probe *)call->data; 1733 1734 tp->flags &= ~TP_FLAG_PROFILE; 1735 1736 if (!(tp->flags & TP_FLAG_TRACE)) { 1737 if (probe_is_return(tp)) 1738 disable_kretprobe(&tp->rp); 1739 else 1740 disable_kprobe(&tp->rp.kp); 1741 } 1742 } 1743 #endif /* CONFIG_PERF_EVENTS */ 1744 1745 static __kprobes 1746 int kprobe_register(struct ftrace_event_call *event, enum trace_reg type) 1747 { 1748 switch (type) { 1749 case TRACE_REG_REGISTER: 1750 return probe_event_enable(event); 1751 case TRACE_REG_UNREGISTER: 1752 probe_event_disable(event); 1753 return 0; 1754 1755 #ifdef CONFIG_PERF_EVENTS 1756 case TRACE_REG_PERF_REGISTER: 1757 return probe_perf_enable(event); 1758 case TRACE_REG_PERF_UNREGISTER: 1759 probe_perf_disable(event); 1760 return 0; 1761 #endif 1762 } 1763 return 0; 1764 } 1765 1766 static __kprobes 1767 int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) 1768 { 1769 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); 1770 1771 if (tp->flags & TP_FLAG_TRACE) 1772 kprobe_trace_func(kp, regs); 1773 #ifdef CONFIG_PERF_EVENTS 1774 if (tp->flags & TP_FLAG_PROFILE) 1775 kprobe_perf_func(kp, regs); 1776 #endif 1777 return 0; /* We don't tweek kernel, so just return 0 */ 1778 } 1779 1780 static __kprobes 1781 int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs) 1782 { 1783 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); 1784 1785 if (tp->flags & TP_FLAG_TRACE) 1786 kretprobe_trace_func(ri, regs); 1787 #ifdef CONFIG_PERF_EVENTS 1788 if (tp->flags & TP_FLAG_PROFILE) 1789 kretprobe_perf_func(ri, regs); 1790 #endif 1791 return 0; /* We don't tweek kernel, so just return 0 */ 1792 } 1793 1794 static struct trace_event_functions kretprobe_funcs = { 1795 .trace = print_kretprobe_event 1796 }; 1797 1798 static struct trace_event_functions kprobe_funcs = { 1799 .trace = print_kprobe_event 1800 }; 1801 1802 static int register_probe_event(struct trace_probe *tp) 1803 { 1804 struct ftrace_event_call *call = &tp->call; 1805 int ret; 1806 1807 /* Initialize ftrace_event_call */ 1808 INIT_LIST_HEAD(&call->class->fields); 1809 if (probe_is_return(tp)) { 1810 call->event.funcs = &kretprobe_funcs; 1811 call->class->define_fields = kretprobe_event_define_fields; 1812 } else { 1813 call->event.funcs = &kprobe_funcs; 1814 call->class->define_fields = kprobe_event_define_fields; 1815 } 1816 if (set_print_fmt(tp) < 0) 1817 return -ENOMEM; 1818 ret = register_ftrace_event(&call->event); 1819 if (!ret) { 1820 kfree(call->print_fmt); 1821 return -ENODEV; 1822 } 1823 call->flags = 0; 1824 call->class->reg = kprobe_register; 1825 call->data = tp; 1826 ret = trace_add_event_call(call); 1827 if (ret) { 1828 pr_info("Failed to register kprobe event: %s\n", call->name); 1829 kfree(call->print_fmt); 1830 unregister_ftrace_event(&call->event); 1831 } 1832 return ret; 1833 } 1834 1835 static void unregister_probe_event(struct trace_probe *tp) 1836 { 1837 /* tp->event is unregistered in trace_remove_event_call() */ 1838 trace_remove_event_call(&tp->call); 1839 kfree(tp->call.print_fmt); 1840 } 1841 1842 /* Make a debugfs interface for controling probe points */ 1843 static __init int init_kprobe_trace(void) 1844 { 1845 struct dentry *d_tracer; 1846 struct dentry *entry; 1847 1848 d_tracer = tracing_init_dentry(); 1849 if (!d_tracer) 1850 return 0; 1851 1852 entry = debugfs_create_file("kprobe_events", 0644, d_tracer, 1853 NULL, &kprobe_events_ops); 1854 1855 /* Event list interface */ 1856 if (!entry) 1857 pr_warning("Could not create debugfs " 1858 "'kprobe_events' entry\n"); 1859 1860 /* Profile interface */ 1861 entry = debugfs_create_file("kprobe_profile", 0444, d_tracer, 1862 NULL, &kprobe_profile_ops); 1863 1864 if (!entry) 1865 pr_warning("Could not create debugfs " 1866 "'kprobe_profile' entry\n"); 1867 return 0; 1868 } 1869 fs_initcall(init_kprobe_trace); 1870 1871 1872 #ifdef CONFIG_FTRACE_STARTUP_TEST 1873 1874 static int kprobe_trace_selftest_target(int a1, int a2, int a3, 1875 int a4, int a5, int a6) 1876 { 1877 return a1 + a2 + a3 + a4 + a5 + a6; 1878 } 1879 1880 static __init int kprobe_trace_self_tests_init(void) 1881 { 1882 int ret, warn = 0; 1883 int (*target)(int, int, int, int, int, int); 1884 struct trace_probe *tp; 1885 1886 target = kprobe_trace_selftest_target; 1887 1888 pr_info("Testing kprobe tracing: "); 1889 1890 ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target " 1891 "$stack $stack0 +0($stack)"); 1892 if (WARN_ON_ONCE(ret)) { 1893 pr_warning("error on probing function entry.\n"); 1894 warn++; 1895 } else { 1896 /* Enable trace point */ 1897 tp = find_probe_event("testprobe", KPROBE_EVENT_SYSTEM); 1898 if (WARN_ON_ONCE(tp == NULL)) { 1899 pr_warning("error on getting new probe.\n"); 1900 warn++; 1901 } else 1902 probe_event_enable(&tp->call); 1903 } 1904 1905 ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target " 1906 "$retval"); 1907 if (WARN_ON_ONCE(ret)) { 1908 pr_warning("error on probing function return.\n"); 1909 warn++; 1910 } else { 1911 /* Enable trace point */ 1912 tp = find_probe_event("testprobe2", KPROBE_EVENT_SYSTEM); 1913 if (WARN_ON_ONCE(tp == NULL)) { 1914 pr_warning("error on getting new probe.\n"); 1915 warn++; 1916 } else 1917 probe_event_enable(&tp->call); 1918 } 1919 1920 if (warn) 1921 goto end; 1922 1923 ret = target(1, 2, 3, 4, 5, 6); 1924 1925 ret = command_trace_probe("-:testprobe"); 1926 if (WARN_ON_ONCE(ret)) { 1927 pr_warning("error on deleting a probe.\n"); 1928 warn++; 1929 } 1930 1931 ret = command_trace_probe("-:testprobe2"); 1932 if (WARN_ON_ONCE(ret)) { 1933 pr_warning("error on deleting a probe.\n"); 1934 warn++; 1935 } 1936 1937 end: 1938 cleanup_all_probes(); 1939 if (warn) 1940 pr_cont("NG: Some tests are failed. Please check them.\n"); 1941 else 1942 pr_cont("OK\n"); 1943 return 0; 1944 } 1945 1946 late_initcall(kprobe_trace_self_tests_init); 1947 1948 #endif 1949