1 #include <trace/syscall.h> 2 #include <trace/events/syscalls.h> 3 #include <linux/slab.h> 4 #include <linux/kernel.h> 5 #include <linux/ftrace.h> 6 #include <linux/perf_event.h> 7 #include <asm/syscall.h> 8 9 #include "trace_output.h" 10 #include "trace.h" 11 12 static DEFINE_MUTEX(syscall_trace_lock); 13 static int sys_refcount_enter; 14 static int sys_refcount_exit; 15 static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls); 16 static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); 17 18 static int syscall_enter_register(struct ftrace_event_call *event, 19 enum trace_reg type); 20 static int syscall_exit_register(struct ftrace_event_call *event, 21 enum trace_reg type); 22 23 static int syscall_enter_define_fields(struct ftrace_event_call *call); 24 static int syscall_exit_define_fields(struct ftrace_event_call *call); 25 26 /* All syscall exit events have the same fields */ 27 static LIST_HEAD(syscall_exit_fields); 28 29 static struct list_head * 30 syscall_get_enter_fields(struct ftrace_event_call *call) 31 { 32 struct syscall_metadata *entry = call->data; 33 34 return &entry->enter_fields; 35 } 36 37 static struct list_head * 38 syscall_get_exit_fields(struct ftrace_event_call *call) 39 { 40 return &syscall_exit_fields; 41 } 42 43 struct trace_event_functions enter_syscall_print_funcs = { 44 .trace = print_syscall_enter, 45 }; 46 47 struct trace_event_functions exit_syscall_print_funcs = { 48 .trace = print_syscall_exit, 49 }; 50 51 struct ftrace_event_class event_class_syscall_enter = { 52 .system = "syscalls", 53 .reg = syscall_enter_register, 54 .define_fields = syscall_enter_define_fields, 55 .get_fields = syscall_get_enter_fields, 56 .raw_init = init_syscall_trace, 57 }; 58 59 struct ftrace_event_class event_class_syscall_exit = { 60 .system = "syscalls", 61 .reg = syscall_exit_register, 62 .define_fields = syscall_exit_define_fields, 63 .get_fields = syscall_get_exit_fields, 64 .raw_init = init_syscall_trace, 65 }; 66 67 extern unsigned long __start_syscalls_metadata[]; 68 extern unsigned long __stop_syscalls_metadata[]; 69 70 static struct syscall_metadata **syscalls_metadata; 71 72 static struct syscall_metadata *find_syscall_meta(unsigned long syscall) 73 { 74 struct syscall_metadata *start; 75 struct syscall_metadata *stop; 76 char str[KSYM_SYMBOL_LEN]; 77 78 79 start = (struct syscall_metadata *)__start_syscalls_metadata; 80 stop = (struct syscall_metadata *)__stop_syscalls_metadata; 81 kallsyms_lookup(syscall, NULL, NULL, NULL, str); 82 83 for ( ; start < stop; start++) { 84 /* 85 * Only compare after the "sys" prefix. Archs that use 86 * syscall wrappers may have syscalls symbols aliases prefixed 87 * with "SyS" instead of "sys", leading to an unwanted 88 * mismatch. 89 */ 90 if (start->name && !strcmp(start->name + 3, str + 3)) 91 return start; 92 } 93 return NULL; 94 } 95 96 static struct syscall_metadata *syscall_nr_to_meta(int nr) 97 { 98 if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) 99 return NULL; 100 101 return syscalls_metadata[nr]; 102 } 103 104 enum print_line_t 105 print_syscall_enter(struct trace_iterator *iter, int flags, 106 struct trace_event *event) 107 { 108 struct trace_seq *s = &iter->seq; 109 struct trace_entry *ent = iter->ent; 110 struct syscall_trace_enter *trace; 111 struct syscall_metadata *entry; 112 int i, ret, syscall; 113 114 trace = (typeof(trace))ent; 115 syscall = trace->nr; 116 entry = syscall_nr_to_meta(syscall); 117 118 if (!entry) 119 goto end; 120 121 if (entry->enter_event->event.type != ent->type) { 122 WARN_ON_ONCE(1); 123 goto end; 124 } 125 126 ret = trace_seq_printf(s, "%s(", entry->name); 127 if (!ret) 128 return TRACE_TYPE_PARTIAL_LINE; 129 130 for (i = 0; i < entry->nb_args; i++) { 131 /* parameter types */ 132 if (trace_flags & TRACE_ITER_VERBOSE) { 133 ret = trace_seq_printf(s, "%s ", entry->types[i]); 134 if (!ret) 135 return TRACE_TYPE_PARTIAL_LINE; 136 } 137 /* parameter values */ 138 ret = trace_seq_printf(s, "%s: %lx%s", entry->args[i], 139 trace->args[i], 140 i == entry->nb_args - 1 ? "" : ", "); 141 if (!ret) 142 return TRACE_TYPE_PARTIAL_LINE; 143 } 144 145 ret = trace_seq_putc(s, ')'); 146 if (!ret) 147 return TRACE_TYPE_PARTIAL_LINE; 148 149 end: 150 ret = trace_seq_putc(s, '\n'); 151 if (!ret) 152 return TRACE_TYPE_PARTIAL_LINE; 153 154 return TRACE_TYPE_HANDLED; 155 } 156 157 enum print_line_t 158 print_syscall_exit(struct trace_iterator *iter, int flags, 159 struct trace_event *event) 160 { 161 struct trace_seq *s = &iter->seq; 162 struct trace_entry *ent = iter->ent; 163 struct syscall_trace_exit *trace; 164 int syscall; 165 struct syscall_metadata *entry; 166 int ret; 167 168 trace = (typeof(trace))ent; 169 syscall = trace->nr; 170 entry = syscall_nr_to_meta(syscall); 171 172 if (!entry) { 173 trace_seq_printf(s, "\n"); 174 return TRACE_TYPE_HANDLED; 175 } 176 177 if (entry->exit_event->event.type != ent->type) { 178 WARN_ON_ONCE(1); 179 return TRACE_TYPE_UNHANDLED; 180 } 181 182 ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name, 183 trace->ret); 184 if (!ret) 185 return TRACE_TYPE_PARTIAL_LINE; 186 187 return TRACE_TYPE_HANDLED; 188 } 189 190 extern char *__bad_type_size(void); 191 192 #define SYSCALL_FIELD(type, name) \ 193 sizeof(type) != sizeof(trace.name) ? \ 194 __bad_type_size() : \ 195 #type, #name, offsetof(typeof(trace), name), \ 196 sizeof(trace.name), is_signed_type(type) 197 198 static 199 int __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len) 200 { 201 int i; 202 int pos = 0; 203 204 /* When len=0, we just calculate the needed length */ 205 #define LEN_OR_ZERO (len ? len - pos : 0) 206 207 pos += snprintf(buf + pos, LEN_OR_ZERO, "\""); 208 for (i = 0; i < entry->nb_args; i++) { 209 pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s", 210 entry->args[i], sizeof(unsigned long), 211 i == entry->nb_args - 1 ? "" : ", "); 212 } 213 pos += snprintf(buf + pos, LEN_OR_ZERO, "\""); 214 215 for (i = 0; i < entry->nb_args; i++) { 216 pos += snprintf(buf + pos, LEN_OR_ZERO, 217 ", ((unsigned long)(REC->%s))", entry->args[i]); 218 } 219 220 #undef LEN_OR_ZERO 221 222 /* return the length of print_fmt */ 223 return pos; 224 } 225 226 static int set_syscall_print_fmt(struct ftrace_event_call *call) 227 { 228 char *print_fmt; 229 int len; 230 struct syscall_metadata *entry = call->data; 231 232 if (entry->enter_event != call) { 233 call->print_fmt = "\"0x%lx\", REC->ret"; 234 return 0; 235 } 236 237 /* First: called with 0 length to calculate the needed length */ 238 len = __set_enter_print_fmt(entry, NULL, 0); 239 240 print_fmt = kmalloc(len + 1, GFP_KERNEL); 241 if (!print_fmt) 242 return -ENOMEM; 243 244 /* Second: actually write the @print_fmt */ 245 __set_enter_print_fmt(entry, print_fmt, len + 1); 246 call->print_fmt = print_fmt; 247 248 return 0; 249 } 250 251 static void free_syscall_print_fmt(struct ftrace_event_call *call) 252 { 253 struct syscall_metadata *entry = call->data; 254 255 if (entry->enter_event == call) 256 kfree(call->print_fmt); 257 } 258 259 static int syscall_enter_define_fields(struct ftrace_event_call *call) 260 { 261 struct syscall_trace_enter trace; 262 struct syscall_metadata *meta = call->data; 263 int ret; 264 int i; 265 int offset = offsetof(typeof(trace), args); 266 267 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER); 268 if (ret) 269 return ret; 270 271 for (i = 0; i < meta->nb_args; i++) { 272 ret = trace_define_field(call, meta->types[i], 273 meta->args[i], offset, 274 sizeof(unsigned long), 0, 275 FILTER_OTHER); 276 offset += sizeof(unsigned long); 277 } 278 279 return ret; 280 } 281 282 static int syscall_exit_define_fields(struct ftrace_event_call *call) 283 { 284 struct syscall_trace_exit trace; 285 int ret; 286 287 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER); 288 if (ret) 289 return ret; 290 291 ret = trace_define_field(call, SYSCALL_FIELD(long, ret), 292 FILTER_OTHER); 293 294 return ret; 295 } 296 297 void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id) 298 { 299 struct syscall_trace_enter *entry; 300 struct syscall_metadata *sys_data; 301 struct ring_buffer_event *event; 302 struct ring_buffer *buffer; 303 int size; 304 int syscall_nr; 305 306 syscall_nr = syscall_get_nr(current, regs); 307 if (syscall_nr < 0) 308 return; 309 if (!test_bit(syscall_nr, enabled_enter_syscalls)) 310 return; 311 312 sys_data = syscall_nr_to_meta(syscall_nr); 313 if (!sys_data) 314 return; 315 316 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; 317 318 event = trace_current_buffer_lock_reserve(&buffer, 319 sys_data->enter_event->event.type, size, 0, 0); 320 if (!event) 321 return; 322 323 entry = ring_buffer_event_data(event); 324 entry->nr = syscall_nr; 325 syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args); 326 327 if (!filter_current_check_discard(buffer, sys_data->enter_event, 328 entry, event)) 329 trace_current_buffer_unlock_commit(buffer, event, 0, 0); 330 } 331 332 void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret) 333 { 334 struct syscall_trace_exit *entry; 335 struct syscall_metadata *sys_data; 336 struct ring_buffer_event *event; 337 struct ring_buffer *buffer; 338 int syscall_nr; 339 340 syscall_nr = syscall_get_nr(current, regs); 341 if (syscall_nr < 0) 342 return; 343 if (!test_bit(syscall_nr, enabled_exit_syscalls)) 344 return; 345 346 sys_data = syscall_nr_to_meta(syscall_nr); 347 if (!sys_data) 348 return; 349 350 event = trace_current_buffer_lock_reserve(&buffer, 351 sys_data->exit_event->event.type, sizeof(*entry), 0, 0); 352 if (!event) 353 return; 354 355 entry = ring_buffer_event_data(event); 356 entry->nr = syscall_nr; 357 entry->ret = syscall_get_return_value(current, regs); 358 359 if (!filter_current_check_discard(buffer, sys_data->exit_event, 360 entry, event)) 361 trace_current_buffer_unlock_commit(buffer, event, 0, 0); 362 } 363 364 int reg_event_syscall_enter(struct ftrace_event_call *call) 365 { 366 int ret = 0; 367 int num; 368 369 num = ((struct syscall_metadata *)call->data)->syscall_nr; 370 if (num < 0 || num >= NR_syscalls) 371 return -ENOSYS; 372 mutex_lock(&syscall_trace_lock); 373 if (!sys_refcount_enter) 374 ret = register_trace_sys_enter(ftrace_syscall_enter, NULL); 375 if (!ret) { 376 set_bit(num, enabled_enter_syscalls); 377 sys_refcount_enter++; 378 } 379 mutex_unlock(&syscall_trace_lock); 380 return ret; 381 } 382 383 void unreg_event_syscall_enter(struct ftrace_event_call *call) 384 { 385 int num; 386 387 num = ((struct syscall_metadata *)call->data)->syscall_nr; 388 if (num < 0 || num >= NR_syscalls) 389 return; 390 mutex_lock(&syscall_trace_lock); 391 sys_refcount_enter--; 392 clear_bit(num, enabled_enter_syscalls); 393 if (!sys_refcount_enter) 394 unregister_trace_sys_enter(ftrace_syscall_enter, NULL); 395 mutex_unlock(&syscall_trace_lock); 396 } 397 398 int reg_event_syscall_exit(struct ftrace_event_call *call) 399 { 400 int ret = 0; 401 int num; 402 403 num = ((struct syscall_metadata *)call->data)->syscall_nr; 404 if (num < 0 || num >= NR_syscalls) 405 return -ENOSYS; 406 mutex_lock(&syscall_trace_lock); 407 if (!sys_refcount_exit) 408 ret = register_trace_sys_exit(ftrace_syscall_exit, NULL); 409 if (!ret) { 410 set_bit(num, enabled_exit_syscalls); 411 sys_refcount_exit++; 412 } 413 mutex_unlock(&syscall_trace_lock); 414 return ret; 415 } 416 417 void unreg_event_syscall_exit(struct ftrace_event_call *call) 418 { 419 int num; 420 421 num = ((struct syscall_metadata *)call->data)->syscall_nr; 422 if (num < 0 || num >= NR_syscalls) 423 return; 424 mutex_lock(&syscall_trace_lock); 425 sys_refcount_exit--; 426 clear_bit(num, enabled_exit_syscalls); 427 if (!sys_refcount_exit) 428 unregister_trace_sys_exit(ftrace_syscall_exit, NULL); 429 mutex_unlock(&syscall_trace_lock); 430 } 431 432 int init_syscall_trace(struct ftrace_event_call *call) 433 { 434 int id; 435 436 if (set_syscall_print_fmt(call) < 0) 437 return -ENOMEM; 438 439 id = trace_event_raw_init(call); 440 441 if (id < 0) { 442 free_syscall_print_fmt(call); 443 return id; 444 } 445 446 return id; 447 } 448 449 unsigned long __init arch_syscall_addr(int nr) 450 { 451 return (unsigned long)sys_call_table[nr]; 452 } 453 454 int __init init_ftrace_syscalls(void) 455 { 456 struct syscall_metadata *meta; 457 unsigned long addr; 458 int i; 459 460 syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * 461 NR_syscalls, GFP_KERNEL); 462 if (!syscalls_metadata) { 463 WARN_ON(1); 464 return -ENOMEM; 465 } 466 467 for (i = 0; i < NR_syscalls; i++) { 468 addr = arch_syscall_addr(i); 469 meta = find_syscall_meta(addr); 470 if (!meta) 471 continue; 472 473 meta->syscall_nr = i; 474 syscalls_metadata[i] = meta; 475 } 476 477 return 0; 478 } 479 core_initcall(init_ftrace_syscalls); 480 481 #ifdef CONFIG_PERF_EVENTS 482 483 static DECLARE_BITMAP(enabled_perf_enter_syscalls, NR_syscalls); 484 static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls); 485 static int sys_perf_refcount_enter; 486 static int sys_perf_refcount_exit; 487 488 static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) 489 { 490 struct syscall_metadata *sys_data; 491 struct syscall_trace_enter *rec; 492 struct hlist_head *head; 493 int syscall_nr; 494 int rctx; 495 int size; 496 497 syscall_nr = syscall_get_nr(current, regs); 498 if (!test_bit(syscall_nr, enabled_perf_enter_syscalls)) 499 return; 500 501 sys_data = syscall_nr_to_meta(syscall_nr); 502 if (!sys_data) 503 return; 504 505 /* get the size after alignment with the u32 buffer size field */ 506 size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec); 507 size = ALIGN(size + sizeof(u32), sizeof(u64)); 508 size -= sizeof(u32); 509 510 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, 511 "perf buffer not large enough")) 512 return; 513 514 rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size, 515 sys_data->enter_event->event.type, regs, &rctx); 516 if (!rec) 517 return; 518 519 rec->nr = syscall_nr; 520 syscall_get_arguments(current, regs, 0, sys_data->nb_args, 521 (unsigned long *)&rec->args); 522 523 head = this_cpu_ptr(sys_data->enter_event->perf_events); 524 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head); 525 } 526 527 int perf_sysenter_enable(struct ftrace_event_call *call) 528 { 529 int ret = 0; 530 int num; 531 532 num = ((struct syscall_metadata *)call->data)->syscall_nr; 533 534 mutex_lock(&syscall_trace_lock); 535 if (!sys_perf_refcount_enter) 536 ret = register_trace_sys_enter(perf_syscall_enter, NULL); 537 if (ret) { 538 pr_info("event trace: Could not activate" 539 "syscall entry trace point"); 540 } else { 541 set_bit(num, enabled_perf_enter_syscalls); 542 sys_perf_refcount_enter++; 543 } 544 mutex_unlock(&syscall_trace_lock); 545 return ret; 546 } 547 548 void perf_sysenter_disable(struct ftrace_event_call *call) 549 { 550 int num; 551 552 num = ((struct syscall_metadata *)call->data)->syscall_nr; 553 554 mutex_lock(&syscall_trace_lock); 555 sys_perf_refcount_enter--; 556 clear_bit(num, enabled_perf_enter_syscalls); 557 if (!sys_perf_refcount_enter) 558 unregister_trace_sys_enter(perf_syscall_enter, NULL); 559 mutex_unlock(&syscall_trace_lock); 560 } 561 562 static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) 563 { 564 struct syscall_metadata *sys_data; 565 struct syscall_trace_exit *rec; 566 struct hlist_head *head; 567 int syscall_nr; 568 int rctx; 569 int size; 570 571 syscall_nr = syscall_get_nr(current, regs); 572 if (!test_bit(syscall_nr, enabled_perf_exit_syscalls)) 573 return; 574 575 sys_data = syscall_nr_to_meta(syscall_nr); 576 if (!sys_data) 577 return; 578 579 /* We can probably do that at build time */ 580 size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64)); 581 size -= sizeof(u32); 582 583 /* 584 * Impossible, but be paranoid with the future 585 * How to put this check outside runtime? 586 */ 587 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, 588 "exit event has grown above perf buffer size")) 589 return; 590 591 rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size, 592 sys_data->exit_event->event.type, regs, &rctx); 593 if (!rec) 594 return; 595 596 rec->nr = syscall_nr; 597 rec->ret = syscall_get_return_value(current, regs); 598 599 head = this_cpu_ptr(sys_data->exit_event->perf_events); 600 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head); 601 } 602 603 int perf_sysexit_enable(struct ftrace_event_call *call) 604 { 605 int ret = 0; 606 int num; 607 608 num = ((struct syscall_metadata *)call->data)->syscall_nr; 609 610 mutex_lock(&syscall_trace_lock); 611 if (!sys_perf_refcount_exit) 612 ret = register_trace_sys_exit(perf_syscall_exit, NULL); 613 if (ret) { 614 pr_info("event trace: Could not activate" 615 "syscall exit trace point"); 616 } else { 617 set_bit(num, enabled_perf_exit_syscalls); 618 sys_perf_refcount_exit++; 619 } 620 mutex_unlock(&syscall_trace_lock); 621 return ret; 622 } 623 624 void perf_sysexit_disable(struct ftrace_event_call *call) 625 { 626 int num; 627 628 num = ((struct syscall_metadata *)call->data)->syscall_nr; 629 630 mutex_lock(&syscall_trace_lock); 631 sys_perf_refcount_exit--; 632 clear_bit(num, enabled_perf_exit_syscalls); 633 if (!sys_perf_refcount_exit) 634 unregister_trace_sys_exit(perf_syscall_exit, NULL); 635 mutex_unlock(&syscall_trace_lock); 636 } 637 638 #endif /* CONFIG_PERF_EVENTS */ 639 640 static int syscall_enter_register(struct ftrace_event_call *event, 641 enum trace_reg type) 642 { 643 switch (type) { 644 case TRACE_REG_REGISTER: 645 return reg_event_syscall_enter(event); 646 case TRACE_REG_UNREGISTER: 647 unreg_event_syscall_enter(event); 648 return 0; 649 650 #ifdef CONFIG_PERF_EVENTS 651 case TRACE_REG_PERF_REGISTER: 652 return perf_sysenter_enable(event); 653 case TRACE_REG_PERF_UNREGISTER: 654 perf_sysenter_disable(event); 655 return 0; 656 #endif 657 } 658 return 0; 659 } 660 661 static int syscall_exit_register(struct ftrace_event_call *event, 662 enum trace_reg type) 663 { 664 switch (type) { 665 case TRACE_REG_REGISTER: 666 return reg_event_syscall_exit(event); 667 case TRACE_REG_UNREGISTER: 668 unreg_event_syscall_exit(event); 669 return 0; 670 671 #ifdef CONFIG_PERF_EVENTS 672 case TRACE_REG_PERF_REGISTER: 673 return perf_sysexit_enable(event); 674 case TRACE_REG_PERF_UNREGISTER: 675 perf_sysexit_disable(event); 676 return 0; 677 #endif 678 } 679 return 0; 680 } 681