1 #include <trace/syscall.h> 2 #include <trace/events/syscalls.h> 3 #include <linux/slab.h> 4 #include <linux/kernel.h> 5 #include <linux/module.h> /* for MODULE_NAME_LEN via KSYM_SYMBOL_LEN */ 6 #include <linux/ftrace.h> 7 #include <linux/perf_event.h> 8 #include <asm/syscall.h> 9 10 #include "trace_output.h" 11 #include "trace.h" 12 13 static DEFINE_MUTEX(syscall_trace_lock); 14 static int sys_refcount_enter; 15 static int sys_refcount_exit; 16 static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls); 17 static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); 18 19 static int syscall_enter_register(struct ftrace_event_call *event, 20 enum trace_reg type, void *data); 21 static int syscall_exit_register(struct ftrace_event_call *event, 22 enum trace_reg type, void *data); 23 24 static int syscall_enter_define_fields(struct ftrace_event_call *call); 25 static int syscall_exit_define_fields(struct ftrace_event_call *call); 26 27 static struct list_head * 28 syscall_get_enter_fields(struct ftrace_event_call *call) 29 { 30 struct syscall_metadata *entry = call->data; 31 32 return &entry->enter_fields; 33 } 34 35 struct trace_event_functions enter_syscall_print_funcs = { 36 .trace = print_syscall_enter, 37 }; 38 39 struct trace_event_functions exit_syscall_print_funcs = { 40 .trace = print_syscall_exit, 41 }; 42 43 struct ftrace_event_class event_class_syscall_enter = { 44 .system = "syscalls", 45 .reg = syscall_enter_register, 46 .define_fields = syscall_enter_define_fields, 47 .get_fields = syscall_get_enter_fields, 48 .raw_init = init_syscall_trace, 49 }; 50 51 struct ftrace_event_class event_class_syscall_exit = { 52 .system = "syscalls", 53 .reg = syscall_exit_register, 54 .define_fields = syscall_exit_define_fields, 55 .fields = LIST_HEAD_INIT(event_class_syscall_exit.fields), 56 .raw_init = init_syscall_trace, 57 }; 58 59 extern struct syscall_metadata *__start_syscalls_metadata[]; 60 extern struct syscall_metadata *__stop_syscalls_metadata[]; 61 62 static struct syscall_metadata **syscalls_metadata; 63 64 #ifndef ARCH_HAS_SYSCALL_MATCH_SYM_NAME 65 static inline bool arch_syscall_match_sym_name(const char *sym, const char *name) 66 { 67 /* 68 * Only compare after the "sys" prefix. Archs that use 69 * syscall wrappers may have syscalls symbols aliases prefixed 70 * with "SyS" instead of "sys", leading to an unwanted 71 * mismatch. 72 */ 73 return !strcmp(sym + 3, name + 3); 74 } 75 #endif 76 77 static __init struct syscall_metadata * 78 find_syscall_meta(unsigned long syscall) 79 { 80 struct syscall_metadata **start; 81 struct syscall_metadata **stop; 82 char str[KSYM_SYMBOL_LEN]; 83 84 85 start = __start_syscalls_metadata; 86 stop = __stop_syscalls_metadata; 87 kallsyms_lookup(syscall, NULL, NULL, NULL, str); 88 89 if (arch_syscall_match_sym_name(str, "sys_ni_syscall")) 90 return NULL; 91 92 for ( ; start < stop; start++) { 93 if ((*start)->name && arch_syscall_match_sym_name(str, (*start)->name)) 94 return *start; 95 } 96 return NULL; 97 } 98 99 static struct syscall_metadata *syscall_nr_to_meta(int nr) 100 { 101 if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) 102 return NULL; 103 104 return syscalls_metadata[nr]; 105 } 106 107 enum print_line_t 108 print_syscall_enter(struct trace_iterator *iter, int flags, 109 struct trace_event *event) 110 { 111 struct trace_seq *s = &iter->seq; 112 struct trace_entry *ent = iter->ent; 113 struct syscall_trace_enter *trace; 114 struct syscall_metadata *entry; 115 int i, ret, syscall; 116 117 trace = (typeof(trace))ent; 118 syscall = trace->nr; 119 entry = syscall_nr_to_meta(syscall); 120 121 if (!entry) 122 goto end; 123 124 if (entry->enter_event->event.type != ent->type) { 125 WARN_ON_ONCE(1); 126 goto end; 127 } 128 129 ret = trace_seq_printf(s, "%s(", entry->name); 130 if (!ret) 131 return TRACE_TYPE_PARTIAL_LINE; 132 133 for (i = 0; i < entry->nb_args; i++) { 134 /* parameter types */ 135 if (trace_flags & TRACE_ITER_VERBOSE) { 136 ret = trace_seq_printf(s, "%s ", entry->types[i]); 137 if (!ret) 138 return TRACE_TYPE_PARTIAL_LINE; 139 } 140 /* parameter values */ 141 ret = trace_seq_printf(s, "%s: %lx%s", entry->args[i], 142 trace->args[i], 143 i == entry->nb_args - 1 ? "" : ", "); 144 if (!ret) 145 return TRACE_TYPE_PARTIAL_LINE; 146 } 147 148 ret = trace_seq_putc(s, ')'); 149 if (!ret) 150 return TRACE_TYPE_PARTIAL_LINE; 151 152 end: 153 ret = trace_seq_putc(s, '\n'); 154 if (!ret) 155 return TRACE_TYPE_PARTIAL_LINE; 156 157 return TRACE_TYPE_HANDLED; 158 } 159 160 enum print_line_t 161 print_syscall_exit(struct trace_iterator *iter, int flags, 162 struct trace_event *event) 163 { 164 struct trace_seq *s = &iter->seq; 165 struct trace_entry *ent = iter->ent; 166 struct syscall_trace_exit *trace; 167 int syscall; 168 struct syscall_metadata *entry; 169 int ret; 170 171 trace = (typeof(trace))ent; 172 syscall = trace->nr; 173 entry = syscall_nr_to_meta(syscall); 174 175 if (!entry) { 176 trace_seq_printf(s, "\n"); 177 return TRACE_TYPE_HANDLED; 178 } 179 180 if (entry->exit_event->event.type != ent->type) { 181 WARN_ON_ONCE(1); 182 return TRACE_TYPE_UNHANDLED; 183 } 184 185 ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name, 186 trace->ret); 187 if (!ret) 188 return TRACE_TYPE_PARTIAL_LINE; 189 190 return TRACE_TYPE_HANDLED; 191 } 192 193 extern char *__bad_type_size(void); 194 195 #define SYSCALL_FIELD(type, name) \ 196 sizeof(type) != sizeof(trace.name) ? \ 197 __bad_type_size() : \ 198 #type, #name, offsetof(typeof(trace), name), \ 199 sizeof(trace.name), is_signed_type(type) 200 201 static 202 int __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len) 203 { 204 int i; 205 int pos = 0; 206 207 /* When len=0, we just calculate the needed length */ 208 #define LEN_OR_ZERO (len ? len - pos : 0) 209 210 pos += snprintf(buf + pos, LEN_OR_ZERO, "\""); 211 for (i = 0; i < entry->nb_args; i++) { 212 pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s", 213 entry->args[i], sizeof(unsigned long), 214 i == entry->nb_args - 1 ? "" : ", "); 215 } 216 pos += snprintf(buf + pos, LEN_OR_ZERO, "\""); 217 218 for (i = 0; i < entry->nb_args; i++) { 219 pos += snprintf(buf + pos, LEN_OR_ZERO, 220 ", ((unsigned long)(REC->%s))", entry->args[i]); 221 } 222 223 #undef LEN_OR_ZERO 224 225 /* return the length of print_fmt */ 226 return pos; 227 } 228 229 static int set_syscall_print_fmt(struct ftrace_event_call *call) 230 { 231 char *print_fmt; 232 int len; 233 struct syscall_metadata *entry = call->data; 234 235 if (entry->enter_event != call) { 236 call->print_fmt = "\"0x%lx\", REC->ret"; 237 return 0; 238 } 239 240 /* First: called with 0 length to calculate the needed length */ 241 len = __set_enter_print_fmt(entry, NULL, 0); 242 243 print_fmt = kmalloc(len + 1, GFP_KERNEL); 244 if (!print_fmt) 245 return -ENOMEM; 246 247 /* Second: actually write the @print_fmt */ 248 __set_enter_print_fmt(entry, print_fmt, len + 1); 249 call->print_fmt = print_fmt; 250 251 return 0; 252 } 253 254 static void free_syscall_print_fmt(struct ftrace_event_call *call) 255 { 256 struct syscall_metadata *entry = call->data; 257 258 if (entry->enter_event == call) 259 kfree(call->print_fmt); 260 } 261 262 static int syscall_enter_define_fields(struct ftrace_event_call *call) 263 { 264 struct syscall_trace_enter trace; 265 struct syscall_metadata *meta = call->data; 266 int ret; 267 int i; 268 int offset = offsetof(typeof(trace), args); 269 270 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER); 271 if (ret) 272 return ret; 273 274 for (i = 0; i < meta->nb_args; i++) { 275 ret = trace_define_field(call, meta->types[i], 276 meta->args[i], offset, 277 sizeof(unsigned long), 0, 278 FILTER_OTHER); 279 offset += sizeof(unsigned long); 280 } 281 282 return ret; 283 } 284 285 static int syscall_exit_define_fields(struct ftrace_event_call *call) 286 { 287 struct syscall_trace_exit trace; 288 int ret; 289 290 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER); 291 if (ret) 292 return ret; 293 294 ret = trace_define_field(call, SYSCALL_FIELD(long, ret), 295 FILTER_OTHER); 296 297 return ret; 298 } 299 300 void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id) 301 { 302 struct syscall_trace_enter *entry; 303 struct syscall_metadata *sys_data; 304 struct ring_buffer_event *event; 305 struct ring_buffer *buffer; 306 int size; 307 int syscall_nr; 308 309 syscall_nr = syscall_get_nr(current, regs); 310 if (syscall_nr < 0) 311 return; 312 if (!test_bit(syscall_nr, enabled_enter_syscalls)) 313 return; 314 315 sys_data = syscall_nr_to_meta(syscall_nr); 316 if (!sys_data) 317 return; 318 319 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; 320 321 event = trace_current_buffer_lock_reserve(&buffer, 322 sys_data->enter_event->event.type, size, 0, 0); 323 if (!event) 324 return; 325 326 entry = ring_buffer_event_data(event); 327 entry->nr = syscall_nr; 328 syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args); 329 330 if (!filter_current_check_discard(buffer, sys_data->enter_event, 331 entry, event)) 332 trace_current_buffer_unlock_commit(buffer, event, 0, 0); 333 } 334 335 void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret) 336 { 337 struct syscall_trace_exit *entry; 338 struct syscall_metadata *sys_data; 339 struct ring_buffer_event *event; 340 struct ring_buffer *buffer; 341 int syscall_nr; 342 343 syscall_nr = syscall_get_nr(current, regs); 344 if (syscall_nr < 0) 345 return; 346 if (!test_bit(syscall_nr, enabled_exit_syscalls)) 347 return; 348 349 sys_data = syscall_nr_to_meta(syscall_nr); 350 if (!sys_data) 351 return; 352 353 event = trace_current_buffer_lock_reserve(&buffer, 354 sys_data->exit_event->event.type, sizeof(*entry), 0, 0); 355 if (!event) 356 return; 357 358 entry = ring_buffer_event_data(event); 359 entry->nr = syscall_nr; 360 entry->ret = syscall_get_return_value(current, regs); 361 362 if (!filter_current_check_discard(buffer, sys_data->exit_event, 363 entry, event)) 364 trace_current_buffer_unlock_commit(buffer, event, 0, 0); 365 } 366 367 int reg_event_syscall_enter(struct ftrace_event_call *call) 368 { 369 int ret = 0; 370 int num; 371 372 num = ((struct syscall_metadata *)call->data)->syscall_nr; 373 if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls)) 374 return -ENOSYS; 375 mutex_lock(&syscall_trace_lock); 376 if (!sys_refcount_enter) 377 ret = register_trace_sys_enter(ftrace_syscall_enter, NULL); 378 if (!ret) { 379 set_bit(num, enabled_enter_syscalls); 380 sys_refcount_enter++; 381 } 382 mutex_unlock(&syscall_trace_lock); 383 return ret; 384 } 385 386 void unreg_event_syscall_enter(struct ftrace_event_call *call) 387 { 388 int num; 389 390 num = ((struct syscall_metadata *)call->data)->syscall_nr; 391 if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls)) 392 return; 393 mutex_lock(&syscall_trace_lock); 394 sys_refcount_enter--; 395 clear_bit(num, enabled_enter_syscalls); 396 if (!sys_refcount_enter) 397 unregister_trace_sys_enter(ftrace_syscall_enter, NULL); 398 mutex_unlock(&syscall_trace_lock); 399 } 400 401 int reg_event_syscall_exit(struct ftrace_event_call *call) 402 { 403 int ret = 0; 404 int num; 405 406 num = ((struct syscall_metadata *)call->data)->syscall_nr; 407 if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls)) 408 return -ENOSYS; 409 mutex_lock(&syscall_trace_lock); 410 if (!sys_refcount_exit) 411 ret = register_trace_sys_exit(ftrace_syscall_exit, NULL); 412 if (!ret) { 413 set_bit(num, enabled_exit_syscalls); 414 sys_refcount_exit++; 415 } 416 mutex_unlock(&syscall_trace_lock); 417 return ret; 418 } 419 420 void unreg_event_syscall_exit(struct ftrace_event_call *call) 421 { 422 int num; 423 424 num = ((struct syscall_metadata *)call->data)->syscall_nr; 425 if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls)) 426 return; 427 mutex_lock(&syscall_trace_lock); 428 sys_refcount_exit--; 429 clear_bit(num, enabled_exit_syscalls); 430 if (!sys_refcount_exit) 431 unregister_trace_sys_exit(ftrace_syscall_exit, NULL); 432 mutex_unlock(&syscall_trace_lock); 433 } 434 435 int init_syscall_trace(struct ftrace_event_call *call) 436 { 437 int id; 438 int num; 439 440 num = ((struct syscall_metadata *)call->data)->syscall_nr; 441 if (num < 0 || num >= NR_syscalls) { 442 pr_debug("syscall %s metadata not mapped, disabling ftrace event\n", 443 ((struct syscall_metadata *)call->data)->name); 444 return -ENOSYS; 445 } 446 447 if (set_syscall_print_fmt(call) < 0) 448 return -ENOMEM; 449 450 id = trace_event_raw_init(call); 451 452 if (id < 0) { 453 free_syscall_print_fmt(call); 454 return id; 455 } 456 457 return id; 458 } 459 460 unsigned long __init __weak arch_syscall_addr(int nr) 461 { 462 return (unsigned long)sys_call_table[nr]; 463 } 464 465 int __init init_ftrace_syscalls(void) 466 { 467 struct syscall_metadata *meta; 468 unsigned long addr; 469 int i; 470 471 syscalls_metadata = kcalloc(NR_syscalls, sizeof(*syscalls_metadata), 472 GFP_KERNEL); 473 if (!syscalls_metadata) { 474 WARN_ON(1); 475 return -ENOMEM; 476 } 477 478 for (i = 0; i < NR_syscalls; i++) { 479 addr = arch_syscall_addr(i); 480 meta = find_syscall_meta(addr); 481 if (!meta) 482 continue; 483 484 meta->syscall_nr = i; 485 syscalls_metadata[i] = meta; 486 } 487 488 return 0; 489 } 490 core_initcall(init_ftrace_syscalls); 491 492 #ifdef CONFIG_PERF_EVENTS 493 494 static DECLARE_BITMAP(enabled_perf_enter_syscalls, NR_syscalls); 495 static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls); 496 static int sys_perf_refcount_enter; 497 static int sys_perf_refcount_exit; 498 499 static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) 500 { 501 struct syscall_metadata *sys_data; 502 struct syscall_trace_enter *rec; 503 struct hlist_head *head; 504 int syscall_nr; 505 int rctx; 506 int size; 507 508 syscall_nr = syscall_get_nr(current, regs); 509 if (syscall_nr < 0) 510 return; 511 if (!test_bit(syscall_nr, enabled_perf_enter_syscalls)) 512 return; 513 514 sys_data = syscall_nr_to_meta(syscall_nr); 515 if (!sys_data) 516 return; 517 518 /* get the size after alignment with the u32 buffer size field */ 519 size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec); 520 size = ALIGN(size + sizeof(u32), sizeof(u64)); 521 size -= sizeof(u32); 522 523 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, 524 "perf buffer not large enough")) 525 return; 526 527 rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size, 528 sys_data->enter_event->event.type, regs, &rctx); 529 if (!rec) 530 return; 531 532 rec->nr = syscall_nr; 533 syscall_get_arguments(current, regs, 0, sys_data->nb_args, 534 (unsigned long *)&rec->args); 535 536 head = this_cpu_ptr(sys_data->enter_event->perf_events); 537 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL); 538 } 539 540 int perf_sysenter_enable(struct ftrace_event_call *call) 541 { 542 int ret = 0; 543 int num; 544 545 num = ((struct syscall_metadata *)call->data)->syscall_nr; 546 547 mutex_lock(&syscall_trace_lock); 548 if (!sys_perf_refcount_enter) 549 ret = register_trace_sys_enter(perf_syscall_enter, NULL); 550 if (ret) { 551 pr_info("event trace: Could not activate" 552 "syscall entry trace point"); 553 } else { 554 set_bit(num, enabled_perf_enter_syscalls); 555 sys_perf_refcount_enter++; 556 } 557 mutex_unlock(&syscall_trace_lock); 558 return ret; 559 } 560 561 void perf_sysenter_disable(struct ftrace_event_call *call) 562 { 563 int num; 564 565 num = ((struct syscall_metadata *)call->data)->syscall_nr; 566 567 mutex_lock(&syscall_trace_lock); 568 sys_perf_refcount_enter--; 569 clear_bit(num, enabled_perf_enter_syscalls); 570 if (!sys_perf_refcount_enter) 571 unregister_trace_sys_enter(perf_syscall_enter, NULL); 572 mutex_unlock(&syscall_trace_lock); 573 } 574 575 static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) 576 { 577 struct syscall_metadata *sys_data; 578 struct syscall_trace_exit *rec; 579 struct hlist_head *head; 580 int syscall_nr; 581 int rctx; 582 int size; 583 584 syscall_nr = syscall_get_nr(current, regs); 585 if (syscall_nr < 0) 586 return; 587 if (!test_bit(syscall_nr, enabled_perf_exit_syscalls)) 588 return; 589 590 sys_data = syscall_nr_to_meta(syscall_nr); 591 if (!sys_data) 592 return; 593 594 /* We can probably do that at build time */ 595 size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64)); 596 size -= sizeof(u32); 597 598 /* 599 * Impossible, but be paranoid with the future 600 * How to put this check outside runtime? 601 */ 602 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, 603 "exit event has grown above perf buffer size")) 604 return; 605 606 rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size, 607 sys_data->exit_event->event.type, regs, &rctx); 608 if (!rec) 609 return; 610 611 rec->nr = syscall_nr; 612 rec->ret = syscall_get_return_value(current, regs); 613 614 head = this_cpu_ptr(sys_data->exit_event->perf_events); 615 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL); 616 } 617 618 int perf_sysexit_enable(struct ftrace_event_call *call) 619 { 620 int ret = 0; 621 int num; 622 623 num = ((struct syscall_metadata *)call->data)->syscall_nr; 624 625 mutex_lock(&syscall_trace_lock); 626 if (!sys_perf_refcount_exit) 627 ret = register_trace_sys_exit(perf_syscall_exit, NULL); 628 if (ret) { 629 pr_info("event trace: Could not activate" 630 "syscall exit trace point"); 631 } else { 632 set_bit(num, enabled_perf_exit_syscalls); 633 sys_perf_refcount_exit++; 634 } 635 mutex_unlock(&syscall_trace_lock); 636 return ret; 637 } 638 639 void perf_sysexit_disable(struct ftrace_event_call *call) 640 { 641 int num; 642 643 num = ((struct syscall_metadata *)call->data)->syscall_nr; 644 645 mutex_lock(&syscall_trace_lock); 646 sys_perf_refcount_exit--; 647 clear_bit(num, enabled_perf_exit_syscalls); 648 if (!sys_perf_refcount_exit) 649 unregister_trace_sys_exit(perf_syscall_exit, NULL); 650 mutex_unlock(&syscall_trace_lock); 651 } 652 653 #endif /* CONFIG_PERF_EVENTS */ 654 655 static int syscall_enter_register(struct ftrace_event_call *event, 656 enum trace_reg type, void *data) 657 { 658 switch (type) { 659 case TRACE_REG_REGISTER: 660 return reg_event_syscall_enter(event); 661 case TRACE_REG_UNREGISTER: 662 unreg_event_syscall_enter(event); 663 return 0; 664 665 #ifdef CONFIG_PERF_EVENTS 666 case TRACE_REG_PERF_REGISTER: 667 return perf_sysenter_enable(event); 668 case TRACE_REG_PERF_UNREGISTER: 669 perf_sysenter_disable(event); 670 return 0; 671 case TRACE_REG_PERF_OPEN: 672 case TRACE_REG_PERF_CLOSE: 673 case TRACE_REG_PERF_ADD: 674 case TRACE_REG_PERF_DEL: 675 return 0; 676 #endif 677 } 678 return 0; 679 } 680 681 static int syscall_exit_register(struct ftrace_event_call *event, 682 enum trace_reg type, void *data) 683 { 684 switch (type) { 685 case TRACE_REG_REGISTER: 686 return reg_event_syscall_exit(event); 687 case TRACE_REG_UNREGISTER: 688 unreg_event_syscall_exit(event); 689 return 0; 690 691 #ifdef CONFIG_PERF_EVENTS 692 case TRACE_REG_PERF_REGISTER: 693 return perf_sysexit_enable(event); 694 case TRACE_REG_PERF_UNREGISTER: 695 perf_sysexit_disable(event); 696 return 0; 697 case TRACE_REG_PERF_OPEN: 698 case TRACE_REG_PERF_CLOSE: 699 case TRACE_REG_PERF_ADD: 700 case TRACE_REG_PERF_DEL: 701 return 0; 702 #endif 703 } 704 return 0; 705 } 706