1 #include <trace/syscall.h> 2 #include <trace/events/syscalls.h> 3 #include <linux/slab.h> 4 #include <linux/kernel.h> 5 #include <linux/ftrace.h> 6 #include <linux/perf_event.h> 7 #include <asm/syscall.h> 8 9 #include "trace_output.h" 10 #include "trace.h" 11 12 static DEFINE_MUTEX(syscall_trace_lock); 13 static int sys_refcount_enter; 14 static int sys_refcount_exit; 15 static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls); 16 static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); 17 18 static int syscall_enter_register(struct ftrace_event_call *event, 19 enum trace_reg type); 20 static int syscall_exit_register(struct ftrace_event_call *event, 21 enum trace_reg type); 22 23 static int syscall_enter_define_fields(struct ftrace_event_call *call); 24 static int syscall_exit_define_fields(struct ftrace_event_call *call); 25 26 static struct list_head * 27 syscall_get_enter_fields(struct ftrace_event_call *call) 28 { 29 struct syscall_metadata *entry = call->data; 30 31 return &entry->enter_fields; 32 } 33 34 struct trace_event_functions enter_syscall_print_funcs = { 35 .trace = print_syscall_enter, 36 }; 37 38 struct trace_event_functions exit_syscall_print_funcs = { 39 .trace = print_syscall_exit, 40 }; 41 42 struct ftrace_event_class event_class_syscall_enter = { 43 .system = "syscalls", 44 .reg = syscall_enter_register, 45 .define_fields = syscall_enter_define_fields, 46 .get_fields = syscall_get_enter_fields, 47 .raw_init = init_syscall_trace, 48 }; 49 50 struct ftrace_event_class event_class_syscall_exit = { 51 .system = "syscalls", 52 .reg = syscall_exit_register, 53 .define_fields = syscall_exit_define_fields, 54 .fields = LIST_HEAD_INIT(event_class_syscall_exit.fields), 55 .raw_init = init_syscall_trace, 56 }; 57 58 extern struct syscall_metadata *__start_syscalls_metadata[]; 59 extern struct syscall_metadata *__stop_syscalls_metadata[]; 60 61 static struct syscall_metadata **syscalls_metadata; 62 63 static __init struct syscall_metadata * 64 find_syscall_meta(unsigned long syscall) 65 { 66 struct syscall_metadata **start; 67 struct syscall_metadata **stop; 68 char str[KSYM_SYMBOL_LEN]; 69 70 71 start = __start_syscalls_metadata; 72 stop = __stop_syscalls_metadata; 73 kallsyms_lookup(syscall, NULL, NULL, NULL, str); 74 75 for ( ; start < stop; start++) { 76 /* 77 * Only compare after the "sys" prefix. Archs that use 78 * syscall wrappers may have syscalls symbols aliases prefixed 79 * with "SyS" instead of "sys", leading to an unwanted 80 * mismatch. 81 */ 82 if ((*start)->name && !strcmp((*start)->name + 3, str + 3)) 83 return *start; 84 } 85 return NULL; 86 } 87 88 static struct syscall_metadata *syscall_nr_to_meta(int nr) 89 { 90 if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) 91 return NULL; 92 93 return syscalls_metadata[nr]; 94 } 95 96 enum print_line_t 97 print_syscall_enter(struct trace_iterator *iter, int flags, 98 struct trace_event *event) 99 { 100 struct trace_seq *s = &iter->seq; 101 struct trace_entry *ent = iter->ent; 102 struct syscall_trace_enter *trace; 103 struct syscall_metadata *entry; 104 int i, ret, syscall; 105 106 trace = (typeof(trace))ent; 107 syscall = trace->nr; 108 entry = syscall_nr_to_meta(syscall); 109 110 if (!entry) 111 goto end; 112 113 if (entry->enter_event->event.type != ent->type) { 114 WARN_ON_ONCE(1); 115 goto end; 116 } 117 118 ret = trace_seq_printf(s, "%s(", entry->name); 119 if (!ret) 120 return TRACE_TYPE_PARTIAL_LINE; 121 122 for (i = 0; i < entry->nb_args; i++) { 123 /* parameter types */ 124 if (trace_flags & TRACE_ITER_VERBOSE) { 125 ret = trace_seq_printf(s, "%s ", entry->types[i]); 126 if (!ret) 127 return TRACE_TYPE_PARTIAL_LINE; 128 } 129 /* parameter values */ 130 ret = trace_seq_printf(s, "%s: %lx%s", entry->args[i], 131 trace->args[i], 132 i == entry->nb_args - 1 ? "" : ", "); 133 if (!ret) 134 return TRACE_TYPE_PARTIAL_LINE; 135 } 136 137 ret = trace_seq_putc(s, ')'); 138 if (!ret) 139 return TRACE_TYPE_PARTIAL_LINE; 140 141 end: 142 ret = trace_seq_putc(s, '\n'); 143 if (!ret) 144 return TRACE_TYPE_PARTIAL_LINE; 145 146 return TRACE_TYPE_HANDLED; 147 } 148 149 enum print_line_t 150 print_syscall_exit(struct trace_iterator *iter, int flags, 151 struct trace_event *event) 152 { 153 struct trace_seq *s = &iter->seq; 154 struct trace_entry *ent = iter->ent; 155 struct syscall_trace_exit *trace; 156 int syscall; 157 struct syscall_metadata *entry; 158 int ret; 159 160 trace = (typeof(trace))ent; 161 syscall = trace->nr; 162 entry = syscall_nr_to_meta(syscall); 163 164 if (!entry) { 165 trace_seq_printf(s, "\n"); 166 return TRACE_TYPE_HANDLED; 167 } 168 169 if (entry->exit_event->event.type != ent->type) { 170 WARN_ON_ONCE(1); 171 return TRACE_TYPE_UNHANDLED; 172 } 173 174 ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name, 175 trace->ret); 176 if (!ret) 177 return TRACE_TYPE_PARTIAL_LINE; 178 179 return TRACE_TYPE_HANDLED; 180 } 181 182 extern char *__bad_type_size(void); 183 184 #define SYSCALL_FIELD(type, name) \ 185 sizeof(type) != sizeof(trace.name) ? \ 186 __bad_type_size() : \ 187 #type, #name, offsetof(typeof(trace), name), \ 188 sizeof(trace.name), is_signed_type(type) 189 190 static 191 int __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len) 192 { 193 int i; 194 int pos = 0; 195 196 /* When len=0, we just calculate the needed length */ 197 #define LEN_OR_ZERO (len ? len - pos : 0) 198 199 pos += snprintf(buf + pos, LEN_OR_ZERO, "\""); 200 for (i = 0; i < entry->nb_args; i++) { 201 pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s", 202 entry->args[i], sizeof(unsigned long), 203 i == entry->nb_args - 1 ? "" : ", "); 204 } 205 pos += snprintf(buf + pos, LEN_OR_ZERO, "\""); 206 207 for (i = 0; i < entry->nb_args; i++) { 208 pos += snprintf(buf + pos, LEN_OR_ZERO, 209 ", ((unsigned long)(REC->%s))", entry->args[i]); 210 } 211 212 #undef LEN_OR_ZERO 213 214 /* return the length of print_fmt */ 215 return pos; 216 } 217 218 static int set_syscall_print_fmt(struct ftrace_event_call *call) 219 { 220 char *print_fmt; 221 int len; 222 struct syscall_metadata *entry = call->data; 223 224 if (entry->enter_event != call) { 225 call->print_fmt = "\"0x%lx\", REC->ret"; 226 return 0; 227 } 228 229 /* First: called with 0 length to calculate the needed length */ 230 len = __set_enter_print_fmt(entry, NULL, 0); 231 232 print_fmt = kmalloc(len + 1, GFP_KERNEL); 233 if (!print_fmt) 234 return -ENOMEM; 235 236 /* Second: actually write the @print_fmt */ 237 __set_enter_print_fmt(entry, print_fmt, len + 1); 238 call->print_fmt = print_fmt; 239 240 return 0; 241 } 242 243 static void free_syscall_print_fmt(struct ftrace_event_call *call) 244 { 245 struct syscall_metadata *entry = call->data; 246 247 if (entry->enter_event == call) 248 kfree(call->print_fmt); 249 } 250 251 static int syscall_enter_define_fields(struct ftrace_event_call *call) 252 { 253 struct syscall_trace_enter trace; 254 struct syscall_metadata *meta = call->data; 255 int ret; 256 int i; 257 int offset = offsetof(typeof(trace), args); 258 259 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER); 260 if (ret) 261 return ret; 262 263 for (i = 0; i < meta->nb_args; i++) { 264 ret = trace_define_field(call, meta->types[i], 265 meta->args[i], offset, 266 sizeof(unsigned long), 0, 267 FILTER_OTHER); 268 offset += sizeof(unsigned long); 269 } 270 271 return ret; 272 } 273 274 static int syscall_exit_define_fields(struct ftrace_event_call *call) 275 { 276 struct syscall_trace_exit trace; 277 int ret; 278 279 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER); 280 if (ret) 281 return ret; 282 283 ret = trace_define_field(call, SYSCALL_FIELD(long, ret), 284 FILTER_OTHER); 285 286 return ret; 287 } 288 289 void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id) 290 { 291 struct syscall_trace_enter *entry; 292 struct syscall_metadata *sys_data; 293 struct ring_buffer_event *event; 294 struct ring_buffer *buffer; 295 int size; 296 int syscall_nr; 297 298 syscall_nr = syscall_get_nr(current, regs); 299 if (syscall_nr < 0) 300 return; 301 if (!test_bit(syscall_nr, enabled_enter_syscalls)) 302 return; 303 304 sys_data = syscall_nr_to_meta(syscall_nr); 305 if (!sys_data) 306 return; 307 308 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; 309 310 event = trace_current_buffer_lock_reserve(&buffer, 311 sys_data->enter_event->event.type, size, 0, 0); 312 if (!event) 313 return; 314 315 entry = ring_buffer_event_data(event); 316 entry->nr = syscall_nr; 317 syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args); 318 319 if (!filter_current_check_discard(buffer, sys_data->enter_event, 320 entry, event)) 321 trace_current_buffer_unlock_commit(buffer, event, 0, 0); 322 } 323 324 void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret) 325 { 326 struct syscall_trace_exit *entry; 327 struct syscall_metadata *sys_data; 328 struct ring_buffer_event *event; 329 struct ring_buffer *buffer; 330 int syscall_nr; 331 332 syscall_nr = syscall_get_nr(current, regs); 333 if (syscall_nr < 0) 334 return; 335 if (!test_bit(syscall_nr, enabled_exit_syscalls)) 336 return; 337 338 sys_data = syscall_nr_to_meta(syscall_nr); 339 if (!sys_data) 340 return; 341 342 event = trace_current_buffer_lock_reserve(&buffer, 343 sys_data->exit_event->event.type, sizeof(*entry), 0, 0); 344 if (!event) 345 return; 346 347 entry = ring_buffer_event_data(event); 348 entry->nr = syscall_nr; 349 entry->ret = syscall_get_return_value(current, regs); 350 351 if (!filter_current_check_discard(buffer, sys_data->exit_event, 352 entry, event)) 353 trace_current_buffer_unlock_commit(buffer, event, 0, 0); 354 } 355 356 int reg_event_syscall_enter(struct ftrace_event_call *call) 357 { 358 int ret = 0; 359 int num; 360 361 num = ((struct syscall_metadata *)call->data)->syscall_nr; 362 if (num < 0 || num >= NR_syscalls) 363 return -ENOSYS; 364 mutex_lock(&syscall_trace_lock); 365 if (!sys_refcount_enter) 366 ret = register_trace_sys_enter(ftrace_syscall_enter, NULL); 367 if (!ret) { 368 set_bit(num, enabled_enter_syscalls); 369 sys_refcount_enter++; 370 } 371 mutex_unlock(&syscall_trace_lock); 372 return ret; 373 } 374 375 void unreg_event_syscall_enter(struct ftrace_event_call *call) 376 { 377 int num; 378 379 num = ((struct syscall_metadata *)call->data)->syscall_nr; 380 if (num < 0 || num >= NR_syscalls) 381 return; 382 mutex_lock(&syscall_trace_lock); 383 sys_refcount_enter--; 384 clear_bit(num, enabled_enter_syscalls); 385 if (!sys_refcount_enter) 386 unregister_trace_sys_enter(ftrace_syscall_enter, NULL); 387 mutex_unlock(&syscall_trace_lock); 388 } 389 390 int reg_event_syscall_exit(struct ftrace_event_call *call) 391 { 392 int ret = 0; 393 int num; 394 395 num = ((struct syscall_metadata *)call->data)->syscall_nr; 396 if (num < 0 || num >= NR_syscalls) 397 return -ENOSYS; 398 mutex_lock(&syscall_trace_lock); 399 if (!sys_refcount_exit) 400 ret = register_trace_sys_exit(ftrace_syscall_exit, NULL); 401 if (!ret) { 402 set_bit(num, enabled_exit_syscalls); 403 sys_refcount_exit++; 404 } 405 mutex_unlock(&syscall_trace_lock); 406 return ret; 407 } 408 409 void unreg_event_syscall_exit(struct ftrace_event_call *call) 410 { 411 int num; 412 413 num = ((struct syscall_metadata *)call->data)->syscall_nr; 414 if (num < 0 || num >= NR_syscalls) 415 return; 416 mutex_lock(&syscall_trace_lock); 417 sys_refcount_exit--; 418 clear_bit(num, enabled_exit_syscalls); 419 if (!sys_refcount_exit) 420 unregister_trace_sys_exit(ftrace_syscall_exit, NULL); 421 mutex_unlock(&syscall_trace_lock); 422 } 423 424 int init_syscall_trace(struct ftrace_event_call *call) 425 { 426 int id; 427 428 if (set_syscall_print_fmt(call) < 0) 429 return -ENOMEM; 430 431 id = trace_event_raw_init(call); 432 433 if (id < 0) { 434 free_syscall_print_fmt(call); 435 return id; 436 } 437 438 return id; 439 } 440 441 unsigned long __init arch_syscall_addr(int nr) 442 { 443 return (unsigned long)sys_call_table[nr]; 444 } 445 446 int __init init_ftrace_syscalls(void) 447 { 448 struct syscall_metadata *meta; 449 unsigned long addr; 450 int i; 451 452 syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * 453 NR_syscalls, GFP_KERNEL); 454 if (!syscalls_metadata) { 455 WARN_ON(1); 456 return -ENOMEM; 457 } 458 459 for (i = 0; i < NR_syscalls; i++) { 460 addr = arch_syscall_addr(i); 461 meta = find_syscall_meta(addr); 462 if (!meta) 463 continue; 464 465 meta->syscall_nr = i; 466 syscalls_metadata[i] = meta; 467 } 468 469 return 0; 470 } 471 core_initcall(init_ftrace_syscalls); 472 473 #ifdef CONFIG_PERF_EVENTS 474 475 static DECLARE_BITMAP(enabled_perf_enter_syscalls, NR_syscalls); 476 static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls); 477 static int sys_perf_refcount_enter; 478 static int sys_perf_refcount_exit; 479 480 static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) 481 { 482 struct syscall_metadata *sys_data; 483 struct syscall_trace_enter *rec; 484 struct hlist_head *head; 485 int syscall_nr; 486 int rctx; 487 int size; 488 489 syscall_nr = syscall_get_nr(current, regs); 490 if (!test_bit(syscall_nr, enabled_perf_enter_syscalls)) 491 return; 492 493 sys_data = syscall_nr_to_meta(syscall_nr); 494 if (!sys_data) 495 return; 496 497 /* get the size after alignment with the u32 buffer size field */ 498 size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec); 499 size = ALIGN(size + sizeof(u32), sizeof(u64)); 500 size -= sizeof(u32); 501 502 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, 503 "perf buffer not large enough")) 504 return; 505 506 rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size, 507 sys_data->enter_event->event.type, regs, &rctx); 508 if (!rec) 509 return; 510 511 rec->nr = syscall_nr; 512 syscall_get_arguments(current, regs, 0, sys_data->nb_args, 513 (unsigned long *)&rec->args); 514 515 head = this_cpu_ptr(sys_data->enter_event->perf_events); 516 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head); 517 } 518 519 int perf_sysenter_enable(struct ftrace_event_call *call) 520 { 521 int ret = 0; 522 int num; 523 524 num = ((struct syscall_metadata *)call->data)->syscall_nr; 525 526 mutex_lock(&syscall_trace_lock); 527 if (!sys_perf_refcount_enter) 528 ret = register_trace_sys_enter(perf_syscall_enter, NULL); 529 if (ret) { 530 pr_info("event trace: Could not activate" 531 "syscall entry trace point"); 532 } else { 533 set_bit(num, enabled_perf_enter_syscalls); 534 sys_perf_refcount_enter++; 535 } 536 mutex_unlock(&syscall_trace_lock); 537 return ret; 538 } 539 540 void perf_sysenter_disable(struct ftrace_event_call *call) 541 { 542 int num; 543 544 num = ((struct syscall_metadata *)call->data)->syscall_nr; 545 546 mutex_lock(&syscall_trace_lock); 547 sys_perf_refcount_enter--; 548 clear_bit(num, enabled_perf_enter_syscalls); 549 if (!sys_perf_refcount_enter) 550 unregister_trace_sys_enter(perf_syscall_enter, NULL); 551 mutex_unlock(&syscall_trace_lock); 552 } 553 554 static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) 555 { 556 struct syscall_metadata *sys_data; 557 struct syscall_trace_exit *rec; 558 struct hlist_head *head; 559 int syscall_nr; 560 int rctx; 561 int size; 562 563 syscall_nr = syscall_get_nr(current, regs); 564 if (!test_bit(syscall_nr, enabled_perf_exit_syscalls)) 565 return; 566 567 sys_data = syscall_nr_to_meta(syscall_nr); 568 if (!sys_data) 569 return; 570 571 /* We can probably do that at build time */ 572 size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64)); 573 size -= sizeof(u32); 574 575 /* 576 * Impossible, but be paranoid with the future 577 * How to put this check outside runtime? 578 */ 579 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, 580 "exit event has grown above perf buffer size")) 581 return; 582 583 rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size, 584 sys_data->exit_event->event.type, regs, &rctx); 585 if (!rec) 586 return; 587 588 rec->nr = syscall_nr; 589 rec->ret = syscall_get_return_value(current, regs); 590 591 head = this_cpu_ptr(sys_data->exit_event->perf_events); 592 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head); 593 } 594 595 int perf_sysexit_enable(struct ftrace_event_call *call) 596 { 597 int ret = 0; 598 int num; 599 600 num = ((struct syscall_metadata *)call->data)->syscall_nr; 601 602 mutex_lock(&syscall_trace_lock); 603 if (!sys_perf_refcount_exit) 604 ret = register_trace_sys_exit(perf_syscall_exit, NULL); 605 if (ret) { 606 pr_info("event trace: Could not activate" 607 "syscall exit trace point"); 608 } else { 609 set_bit(num, enabled_perf_exit_syscalls); 610 sys_perf_refcount_exit++; 611 } 612 mutex_unlock(&syscall_trace_lock); 613 return ret; 614 } 615 616 void perf_sysexit_disable(struct ftrace_event_call *call) 617 { 618 int num; 619 620 num = ((struct syscall_metadata *)call->data)->syscall_nr; 621 622 mutex_lock(&syscall_trace_lock); 623 sys_perf_refcount_exit--; 624 clear_bit(num, enabled_perf_exit_syscalls); 625 if (!sys_perf_refcount_exit) 626 unregister_trace_sys_exit(perf_syscall_exit, NULL); 627 mutex_unlock(&syscall_trace_lock); 628 } 629 630 #endif /* CONFIG_PERF_EVENTS */ 631 632 static int syscall_enter_register(struct ftrace_event_call *event, 633 enum trace_reg type) 634 { 635 switch (type) { 636 case TRACE_REG_REGISTER: 637 return reg_event_syscall_enter(event); 638 case TRACE_REG_UNREGISTER: 639 unreg_event_syscall_enter(event); 640 return 0; 641 642 #ifdef CONFIG_PERF_EVENTS 643 case TRACE_REG_PERF_REGISTER: 644 return perf_sysenter_enable(event); 645 case TRACE_REG_PERF_UNREGISTER: 646 perf_sysenter_disable(event); 647 return 0; 648 #endif 649 } 650 return 0; 651 } 652 653 static int syscall_exit_register(struct ftrace_event_call *event, 654 enum trace_reg type) 655 { 656 switch (type) { 657 case TRACE_REG_REGISTER: 658 return reg_event_syscall_exit(event); 659 case TRACE_REG_UNREGISTER: 660 unreg_event_syscall_exit(event); 661 return 0; 662 663 #ifdef CONFIG_PERF_EVENTS 664 case TRACE_REG_PERF_REGISTER: 665 return perf_sysexit_enable(event); 666 case TRACE_REG_PERF_UNREGISTER: 667 perf_sysexit_disable(event); 668 return 0; 669 #endif 670 } 671 return 0; 672 } 673