1 #include <trace/syscall.h> 2 #include <trace/events/syscalls.h> 3 #include <linux/kernel.h> 4 #include <linux/ftrace.h> 5 #include <linux/perf_event.h> 6 #include <asm/syscall.h> 7 8 #include "trace_output.h" 9 #include "trace.h" 10 11 static DEFINE_MUTEX(syscall_trace_lock); 12 static int sys_refcount_enter; 13 static int sys_refcount_exit; 14 static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls); 15 static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); 16 17 extern unsigned long __start_syscalls_metadata[]; 18 extern unsigned long __stop_syscalls_metadata[]; 19 20 static struct syscall_metadata **syscalls_metadata; 21 22 static struct syscall_metadata *find_syscall_meta(unsigned long syscall) 23 { 24 struct syscall_metadata *start; 25 struct syscall_metadata *stop; 26 char str[KSYM_SYMBOL_LEN]; 27 28 29 start = (struct syscall_metadata *)__start_syscalls_metadata; 30 stop = (struct syscall_metadata *)__stop_syscalls_metadata; 31 kallsyms_lookup(syscall, NULL, NULL, NULL, str); 32 33 for ( ; start < stop; start++) { 34 /* 35 * Only compare after the "sys" prefix. Archs that use 36 * syscall wrappers may have syscalls symbols aliases prefixed 37 * with "SyS" instead of "sys", leading to an unwanted 38 * mismatch. 39 */ 40 if (start->name && !strcmp(start->name + 3, str + 3)) 41 return start; 42 } 43 return NULL; 44 } 45 46 static struct syscall_metadata *syscall_nr_to_meta(int nr) 47 { 48 if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) 49 return NULL; 50 51 return syscalls_metadata[nr]; 52 } 53 54 enum print_line_t 55 print_syscall_enter(struct trace_iterator *iter, int flags) 56 { 57 struct trace_seq *s = &iter->seq; 58 struct trace_entry *ent = iter->ent; 59 struct syscall_trace_enter *trace; 60 struct syscall_metadata *entry; 61 int i, ret, syscall; 62 63 trace = (typeof(trace))ent; 64 syscall = trace->nr; 65 entry = syscall_nr_to_meta(syscall); 66 67 if (!entry) 68 goto end; 69 70 if (entry->enter_event->id != ent->type) { 71 WARN_ON_ONCE(1); 72 goto end; 73 } 74 75 ret = trace_seq_printf(s, "%s(", entry->name); 76 if (!ret) 77 return TRACE_TYPE_PARTIAL_LINE; 78 79 for (i = 0; i < entry->nb_args; i++) { 80 /* parameter types */ 81 if (trace_flags & TRACE_ITER_VERBOSE) { 82 ret = trace_seq_printf(s, "%s ", entry->types[i]); 83 if (!ret) 84 return TRACE_TYPE_PARTIAL_LINE; 85 } 86 /* parameter values */ 87 ret = trace_seq_printf(s, "%s: %lx%s", entry->args[i], 88 trace->args[i], 89 i == entry->nb_args - 1 ? "" : ", "); 90 if (!ret) 91 return TRACE_TYPE_PARTIAL_LINE; 92 } 93 94 ret = trace_seq_putc(s, ')'); 95 if (!ret) 96 return TRACE_TYPE_PARTIAL_LINE; 97 98 end: 99 ret = trace_seq_putc(s, '\n'); 100 if (!ret) 101 return TRACE_TYPE_PARTIAL_LINE; 102 103 return TRACE_TYPE_HANDLED; 104 } 105 106 enum print_line_t 107 print_syscall_exit(struct trace_iterator *iter, int flags) 108 { 109 struct trace_seq *s = &iter->seq; 110 struct trace_entry *ent = iter->ent; 111 struct syscall_trace_exit *trace; 112 int syscall; 113 struct syscall_metadata *entry; 114 int ret; 115 116 trace = (typeof(trace))ent; 117 syscall = trace->nr; 118 entry = syscall_nr_to_meta(syscall); 119 120 if (!entry) { 121 trace_seq_printf(s, "\n"); 122 return TRACE_TYPE_HANDLED; 123 } 124 125 if (entry->exit_event->id != ent->type) { 126 WARN_ON_ONCE(1); 127 return TRACE_TYPE_UNHANDLED; 128 } 129 130 ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name, 131 trace->ret); 132 if (!ret) 133 return TRACE_TYPE_PARTIAL_LINE; 134 135 return TRACE_TYPE_HANDLED; 136 } 137 138 extern char *__bad_type_size(void); 139 140 #define SYSCALL_FIELD(type, name) \ 141 sizeof(type) != sizeof(trace.name) ? \ 142 __bad_type_size() : \ 143 #type, #name, offsetof(typeof(trace), name), \ 144 sizeof(trace.name), is_signed_type(type) 145 146 int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) 147 { 148 int i; 149 int ret; 150 struct syscall_metadata *entry = call->data; 151 struct syscall_trace_enter trace; 152 int offset = offsetof(struct syscall_trace_enter, args); 153 154 ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" 155 "\tsigned:%u;\n", 156 SYSCALL_FIELD(int, nr)); 157 if (!ret) 158 return 0; 159 160 for (i = 0; i < entry->nb_args; i++) { 161 ret = trace_seq_printf(s, "\tfield:%s %s;", entry->types[i], 162 entry->args[i]); 163 if (!ret) 164 return 0; 165 ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;" 166 "\tsigned:%u;\n", offset, 167 sizeof(unsigned long), 168 is_signed_type(unsigned long)); 169 if (!ret) 170 return 0; 171 offset += sizeof(unsigned long); 172 } 173 174 trace_seq_puts(s, "\nprint fmt: \""); 175 for (i = 0; i < entry->nb_args; i++) { 176 ret = trace_seq_printf(s, "%s: 0x%%0%zulx%s", entry->args[i], 177 sizeof(unsigned long), 178 i == entry->nb_args - 1 ? "" : ", "); 179 if (!ret) 180 return 0; 181 } 182 trace_seq_putc(s, '"'); 183 184 for (i = 0; i < entry->nb_args; i++) { 185 ret = trace_seq_printf(s, ", ((unsigned long)(REC->%s))", 186 entry->args[i]); 187 if (!ret) 188 return 0; 189 } 190 191 return trace_seq_putc(s, '\n'); 192 } 193 194 int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s) 195 { 196 int ret; 197 struct syscall_trace_exit trace; 198 199 ret = trace_seq_printf(s, 200 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" 201 "\tsigned:%u;\n" 202 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" 203 "\tsigned:%u;\n", 204 SYSCALL_FIELD(int, nr), 205 SYSCALL_FIELD(long, ret)); 206 if (!ret) 207 return 0; 208 209 return trace_seq_printf(s, "\nprint fmt: \"0x%%lx\", REC->ret\n"); 210 } 211 212 int syscall_enter_define_fields(struct ftrace_event_call *call) 213 { 214 struct syscall_trace_enter trace; 215 struct syscall_metadata *meta = call->data; 216 int ret; 217 int i; 218 int offset = offsetof(typeof(trace), args); 219 220 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER); 221 if (ret) 222 return ret; 223 224 for (i = 0; i < meta->nb_args; i++) { 225 ret = trace_define_field(call, meta->types[i], 226 meta->args[i], offset, 227 sizeof(unsigned long), 0, 228 FILTER_OTHER); 229 offset += sizeof(unsigned long); 230 } 231 232 return ret; 233 } 234 235 int syscall_exit_define_fields(struct ftrace_event_call *call) 236 { 237 struct syscall_trace_exit trace; 238 int ret; 239 240 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER); 241 if (ret) 242 return ret; 243 244 ret = trace_define_field(call, SYSCALL_FIELD(long, ret), 245 FILTER_OTHER); 246 247 return ret; 248 } 249 250 void ftrace_syscall_enter(struct pt_regs *regs, long id) 251 { 252 struct syscall_trace_enter *entry; 253 struct syscall_metadata *sys_data; 254 struct ring_buffer_event *event; 255 struct ring_buffer *buffer; 256 int size; 257 int syscall_nr; 258 259 syscall_nr = syscall_get_nr(current, regs); 260 if (syscall_nr < 0) 261 return; 262 if (!test_bit(syscall_nr, enabled_enter_syscalls)) 263 return; 264 265 sys_data = syscall_nr_to_meta(syscall_nr); 266 if (!sys_data) 267 return; 268 269 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; 270 271 event = trace_current_buffer_lock_reserve(&buffer, 272 sys_data->enter_event->id, size, 0, 0); 273 if (!event) 274 return; 275 276 entry = ring_buffer_event_data(event); 277 entry->nr = syscall_nr; 278 syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args); 279 280 if (!filter_current_check_discard(buffer, sys_data->enter_event, 281 entry, event)) 282 trace_current_buffer_unlock_commit(buffer, event, 0, 0); 283 } 284 285 void ftrace_syscall_exit(struct pt_regs *regs, long ret) 286 { 287 struct syscall_trace_exit *entry; 288 struct syscall_metadata *sys_data; 289 struct ring_buffer_event *event; 290 struct ring_buffer *buffer; 291 int syscall_nr; 292 293 syscall_nr = syscall_get_nr(current, regs); 294 if (syscall_nr < 0) 295 return; 296 if (!test_bit(syscall_nr, enabled_exit_syscalls)) 297 return; 298 299 sys_data = syscall_nr_to_meta(syscall_nr); 300 if (!sys_data) 301 return; 302 303 event = trace_current_buffer_lock_reserve(&buffer, 304 sys_data->exit_event->id, sizeof(*entry), 0, 0); 305 if (!event) 306 return; 307 308 entry = ring_buffer_event_data(event); 309 entry->nr = syscall_nr; 310 entry->ret = syscall_get_return_value(current, regs); 311 312 if (!filter_current_check_discard(buffer, sys_data->exit_event, 313 entry, event)) 314 trace_current_buffer_unlock_commit(buffer, event, 0, 0); 315 } 316 317 int reg_event_syscall_enter(struct ftrace_event_call *call) 318 { 319 int ret = 0; 320 int num; 321 322 num = ((struct syscall_metadata *)call->data)->syscall_nr; 323 if (num < 0 || num >= NR_syscalls) 324 return -ENOSYS; 325 mutex_lock(&syscall_trace_lock); 326 if (!sys_refcount_enter) 327 ret = register_trace_sys_enter(ftrace_syscall_enter); 328 if (!ret) { 329 set_bit(num, enabled_enter_syscalls); 330 sys_refcount_enter++; 331 } 332 mutex_unlock(&syscall_trace_lock); 333 return ret; 334 } 335 336 void unreg_event_syscall_enter(struct ftrace_event_call *call) 337 { 338 int num; 339 340 num = ((struct syscall_metadata *)call->data)->syscall_nr; 341 if (num < 0 || num >= NR_syscalls) 342 return; 343 mutex_lock(&syscall_trace_lock); 344 sys_refcount_enter--; 345 clear_bit(num, enabled_enter_syscalls); 346 if (!sys_refcount_enter) 347 unregister_trace_sys_enter(ftrace_syscall_enter); 348 mutex_unlock(&syscall_trace_lock); 349 } 350 351 int reg_event_syscall_exit(struct ftrace_event_call *call) 352 { 353 int ret = 0; 354 int num; 355 356 num = ((struct syscall_metadata *)call->data)->syscall_nr; 357 if (num < 0 || num >= NR_syscalls) 358 return -ENOSYS; 359 mutex_lock(&syscall_trace_lock); 360 if (!sys_refcount_exit) 361 ret = register_trace_sys_exit(ftrace_syscall_exit); 362 if (!ret) { 363 set_bit(num, enabled_exit_syscalls); 364 sys_refcount_exit++; 365 } 366 mutex_unlock(&syscall_trace_lock); 367 return ret; 368 } 369 370 void unreg_event_syscall_exit(struct ftrace_event_call *call) 371 { 372 int num; 373 374 num = ((struct syscall_metadata *)call->data)->syscall_nr; 375 if (num < 0 || num >= NR_syscalls) 376 return; 377 mutex_lock(&syscall_trace_lock); 378 sys_refcount_exit--; 379 clear_bit(num, enabled_exit_syscalls); 380 if (!sys_refcount_exit) 381 unregister_trace_sys_exit(ftrace_syscall_exit); 382 mutex_unlock(&syscall_trace_lock); 383 } 384 385 int init_syscall_trace(struct ftrace_event_call *call) 386 { 387 int id; 388 389 id = register_ftrace_event(call->event); 390 if (!id) 391 return -ENODEV; 392 call->id = id; 393 INIT_LIST_HEAD(&call->fields); 394 return 0; 395 } 396 397 int __init init_ftrace_syscalls(void) 398 { 399 struct syscall_metadata *meta; 400 unsigned long addr; 401 int i; 402 403 syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * 404 NR_syscalls, GFP_KERNEL); 405 if (!syscalls_metadata) { 406 WARN_ON(1); 407 return -ENOMEM; 408 } 409 410 for (i = 0; i < NR_syscalls; i++) { 411 addr = arch_syscall_addr(i); 412 meta = find_syscall_meta(addr); 413 if (!meta) 414 continue; 415 416 meta->syscall_nr = i; 417 syscalls_metadata[i] = meta; 418 } 419 420 return 0; 421 } 422 core_initcall(init_ftrace_syscalls); 423 424 #ifdef CONFIG_EVENT_PROFILE 425 426 static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls); 427 static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls); 428 static int sys_prof_refcount_enter; 429 static int sys_prof_refcount_exit; 430 431 static void prof_syscall_enter(struct pt_regs *regs, long id) 432 { 433 struct syscall_metadata *sys_data; 434 struct syscall_trace_enter *rec; 435 unsigned long flags; 436 char *trace_buf; 437 char *raw_data; 438 int syscall_nr; 439 int rctx; 440 int size; 441 int cpu; 442 443 syscall_nr = syscall_get_nr(current, regs); 444 if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) 445 return; 446 447 sys_data = syscall_nr_to_meta(syscall_nr); 448 if (!sys_data) 449 return; 450 451 /* get the size after alignment with the u32 buffer size field */ 452 size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec); 453 size = ALIGN(size + sizeof(u32), sizeof(u64)); 454 size -= sizeof(u32); 455 456 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 457 "profile buffer not large enough")) 458 return; 459 460 /* Protect the per cpu buffer, begin the rcu read side */ 461 local_irq_save(flags); 462 463 rctx = perf_swevent_get_recursion_context(); 464 if (rctx < 0) 465 goto end_recursion; 466 467 cpu = smp_processor_id(); 468 469 trace_buf = rcu_dereference(perf_trace_buf); 470 471 if (!trace_buf) 472 goto end; 473 474 raw_data = per_cpu_ptr(trace_buf, cpu); 475 476 /* zero the dead bytes from align to not leak stack to user */ 477 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; 478 479 rec = (struct syscall_trace_enter *) raw_data; 480 tracing_generic_entry_update(&rec->ent, 0, 0); 481 rec->ent.type = sys_data->enter_event->id; 482 rec->nr = syscall_nr; 483 syscall_get_arguments(current, regs, 0, sys_data->nb_args, 484 (unsigned long *)&rec->args); 485 perf_tp_event(sys_data->enter_event->id, 0, 1, rec, size); 486 487 end: 488 perf_swevent_put_recursion_context(rctx); 489 end_recursion: 490 local_irq_restore(flags); 491 } 492 493 int prof_sysenter_enable(struct ftrace_event_call *call) 494 { 495 int ret = 0; 496 int num; 497 498 num = ((struct syscall_metadata *)call->data)->syscall_nr; 499 500 mutex_lock(&syscall_trace_lock); 501 if (!sys_prof_refcount_enter) 502 ret = register_trace_sys_enter(prof_syscall_enter); 503 if (ret) { 504 pr_info("event trace: Could not activate" 505 "syscall entry trace point"); 506 } else { 507 set_bit(num, enabled_prof_enter_syscalls); 508 sys_prof_refcount_enter++; 509 } 510 mutex_unlock(&syscall_trace_lock); 511 return ret; 512 } 513 514 void prof_sysenter_disable(struct ftrace_event_call *call) 515 { 516 int num; 517 518 num = ((struct syscall_metadata *)call->data)->syscall_nr; 519 520 mutex_lock(&syscall_trace_lock); 521 sys_prof_refcount_enter--; 522 clear_bit(num, enabled_prof_enter_syscalls); 523 if (!sys_prof_refcount_enter) 524 unregister_trace_sys_enter(prof_syscall_enter); 525 mutex_unlock(&syscall_trace_lock); 526 } 527 528 static void prof_syscall_exit(struct pt_regs *regs, long ret) 529 { 530 struct syscall_metadata *sys_data; 531 struct syscall_trace_exit *rec; 532 unsigned long flags; 533 int syscall_nr; 534 char *trace_buf; 535 char *raw_data; 536 int rctx; 537 int size; 538 int cpu; 539 540 syscall_nr = syscall_get_nr(current, regs); 541 if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) 542 return; 543 544 sys_data = syscall_nr_to_meta(syscall_nr); 545 if (!sys_data) 546 return; 547 548 /* We can probably do that at build time */ 549 size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64)); 550 size -= sizeof(u32); 551 552 /* 553 * Impossible, but be paranoid with the future 554 * How to put this check outside runtime? 555 */ 556 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 557 "exit event has grown above profile buffer size")) 558 return; 559 560 /* Protect the per cpu buffer, begin the rcu read side */ 561 local_irq_save(flags); 562 563 rctx = perf_swevent_get_recursion_context(); 564 if (rctx < 0) 565 goto end_recursion; 566 567 cpu = smp_processor_id(); 568 569 trace_buf = rcu_dereference(perf_trace_buf); 570 571 if (!trace_buf) 572 goto end; 573 574 raw_data = per_cpu_ptr(trace_buf, cpu); 575 576 /* zero the dead bytes from align to not leak stack to user */ 577 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; 578 579 rec = (struct syscall_trace_exit *)raw_data; 580 581 tracing_generic_entry_update(&rec->ent, 0, 0); 582 rec->ent.type = sys_data->exit_event->id; 583 rec->nr = syscall_nr; 584 rec->ret = syscall_get_return_value(current, regs); 585 586 perf_tp_event(sys_data->exit_event->id, 0, 1, rec, size); 587 588 end: 589 perf_swevent_put_recursion_context(rctx); 590 end_recursion: 591 local_irq_restore(flags); 592 } 593 594 int prof_sysexit_enable(struct ftrace_event_call *call) 595 { 596 int ret = 0; 597 int num; 598 599 num = ((struct syscall_metadata *)call->data)->syscall_nr; 600 601 mutex_lock(&syscall_trace_lock); 602 if (!sys_prof_refcount_exit) 603 ret = register_trace_sys_exit(prof_syscall_exit); 604 if (ret) { 605 pr_info("event trace: Could not activate" 606 "syscall entry trace point"); 607 } else { 608 set_bit(num, enabled_prof_exit_syscalls); 609 sys_prof_refcount_exit++; 610 } 611 mutex_unlock(&syscall_trace_lock); 612 return ret; 613 } 614 615 void prof_sysexit_disable(struct ftrace_event_call *call) 616 { 617 int num; 618 619 num = ((struct syscall_metadata *)call->data)->syscall_nr; 620 621 mutex_lock(&syscall_trace_lock); 622 sys_prof_refcount_exit--; 623 clear_bit(num, enabled_prof_exit_syscalls); 624 if (!sys_prof_refcount_exit) 625 unregister_trace_sys_exit(prof_syscall_exit); 626 mutex_unlock(&syscall_trace_lock); 627 } 628 629 #endif 630 631 632