1 #include <trace/syscall.h> 2 #include <trace/events/syscalls.h> 3 #include <linux/kernel.h> 4 #include <linux/ftrace.h> 5 #include <linux/perf_event.h> 6 #include <asm/syscall.h> 7 8 #include "trace_output.h" 9 #include "trace.h" 10 11 static DEFINE_MUTEX(syscall_trace_lock); 12 static int sys_refcount_enter; 13 static int sys_refcount_exit; 14 static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls); 15 static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); 16 17 enum print_line_t 18 print_syscall_enter(struct trace_iterator *iter, int flags) 19 { 20 struct trace_seq *s = &iter->seq; 21 struct trace_entry *ent = iter->ent; 22 struct syscall_trace_enter *trace; 23 struct syscall_metadata *entry; 24 int i, ret, syscall; 25 26 trace = (typeof(trace))ent; 27 syscall = trace->nr; 28 entry = syscall_nr_to_meta(syscall); 29 30 if (!entry) 31 goto end; 32 33 if (entry->enter_id != ent->type) { 34 WARN_ON_ONCE(1); 35 goto end; 36 } 37 38 ret = trace_seq_printf(s, "%s(", entry->name); 39 if (!ret) 40 return TRACE_TYPE_PARTIAL_LINE; 41 42 for (i = 0; i < entry->nb_args; i++) { 43 /* parameter types */ 44 if (trace_flags & TRACE_ITER_VERBOSE) { 45 ret = trace_seq_printf(s, "%s ", entry->types[i]); 46 if (!ret) 47 return TRACE_TYPE_PARTIAL_LINE; 48 } 49 /* parameter values */ 50 ret = trace_seq_printf(s, "%s: %lx%s", entry->args[i], 51 trace->args[i], 52 i == entry->nb_args - 1 ? "" : ", "); 53 if (!ret) 54 return TRACE_TYPE_PARTIAL_LINE; 55 } 56 57 ret = trace_seq_putc(s, ')'); 58 if (!ret) 59 return TRACE_TYPE_PARTIAL_LINE; 60 61 end: 62 ret = trace_seq_putc(s, '\n'); 63 if (!ret) 64 return TRACE_TYPE_PARTIAL_LINE; 65 66 return TRACE_TYPE_HANDLED; 67 } 68 69 enum print_line_t 70 print_syscall_exit(struct trace_iterator *iter, int flags) 71 { 72 struct trace_seq *s = &iter->seq; 73 struct trace_entry *ent = iter->ent; 74 struct syscall_trace_exit *trace; 75 int syscall; 76 struct syscall_metadata *entry; 77 int ret; 78 79 trace = (typeof(trace))ent; 80 syscall = trace->nr; 81 entry = syscall_nr_to_meta(syscall); 82 83 if (!entry) { 84 trace_seq_printf(s, "\n"); 85 return TRACE_TYPE_HANDLED; 86 } 87 88 if (entry->exit_id != ent->type) { 89 WARN_ON_ONCE(1); 90 return TRACE_TYPE_UNHANDLED; 91 } 92 93 ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name, 94 trace->ret); 95 if (!ret) 96 return TRACE_TYPE_PARTIAL_LINE; 97 98 return TRACE_TYPE_HANDLED; 99 } 100 101 extern char *__bad_type_size(void); 102 103 #define SYSCALL_FIELD(type, name) \ 104 sizeof(type) != sizeof(trace.name) ? \ 105 __bad_type_size() : \ 106 #type, #name, offsetof(typeof(trace), name), sizeof(trace.name) 107 108 int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) 109 { 110 int i; 111 int nr; 112 int ret; 113 struct syscall_metadata *entry; 114 struct syscall_trace_enter trace; 115 int offset = offsetof(struct syscall_trace_enter, args); 116 117 nr = syscall_name_to_nr(call->data); 118 entry = syscall_nr_to_meta(nr); 119 120 if (!entry) 121 return 0; 122 123 ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n", 124 SYSCALL_FIELD(int, nr)); 125 if (!ret) 126 return 0; 127 128 for (i = 0; i < entry->nb_args; i++) { 129 ret = trace_seq_printf(s, "\tfield:%s %s;", entry->types[i], 130 entry->args[i]); 131 if (!ret) 132 return 0; 133 ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;\n", offset, 134 sizeof(unsigned long)); 135 if (!ret) 136 return 0; 137 offset += sizeof(unsigned long); 138 } 139 140 trace_seq_puts(s, "\nprint fmt: \""); 141 for (i = 0; i < entry->nb_args; i++) { 142 ret = trace_seq_printf(s, "%s: 0x%%0%zulx%s", entry->args[i], 143 sizeof(unsigned long), 144 i == entry->nb_args - 1 ? "" : ", "); 145 if (!ret) 146 return 0; 147 } 148 trace_seq_putc(s, '"'); 149 150 for (i = 0; i < entry->nb_args; i++) { 151 ret = trace_seq_printf(s, ", ((unsigned long)(REC->%s))", 152 entry->args[i]); 153 if (!ret) 154 return 0; 155 } 156 157 return trace_seq_putc(s, '\n'); 158 } 159 160 int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s) 161 { 162 int ret; 163 struct syscall_trace_exit trace; 164 165 ret = trace_seq_printf(s, 166 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" 167 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n", 168 SYSCALL_FIELD(int, nr), 169 SYSCALL_FIELD(unsigned long, ret)); 170 if (!ret) 171 return 0; 172 173 return trace_seq_printf(s, "\nprint fmt: \"0x%%lx\", REC->ret\n"); 174 } 175 176 int syscall_enter_define_fields(struct ftrace_event_call *call) 177 { 178 struct syscall_trace_enter trace; 179 struct syscall_metadata *meta; 180 int ret; 181 int nr; 182 int i; 183 int offset = offsetof(typeof(trace), args); 184 185 nr = syscall_name_to_nr(call->data); 186 meta = syscall_nr_to_meta(nr); 187 188 if (!meta) 189 return 0; 190 191 ret = trace_define_common_fields(call); 192 if (ret) 193 return ret; 194 195 for (i = 0; i < meta->nb_args; i++) { 196 ret = trace_define_field(call, meta->types[i], 197 meta->args[i], offset, 198 sizeof(unsigned long), 0, 199 FILTER_OTHER); 200 offset += sizeof(unsigned long); 201 } 202 203 return ret; 204 } 205 206 int syscall_exit_define_fields(struct ftrace_event_call *call) 207 { 208 struct syscall_trace_exit trace; 209 int ret; 210 211 ret = trace_define_common_fields(call); 212 if (ret) 213 return ret; 214 215 ret = trace_define_field(call, SYSCALL_FIELD(unsigned long, ret), 0, 216 FILTER_OTHER); 217 218 return ret; 219 } 220 221 void ftrace_syscall_enter(struct pt_regs *regs, long id) 222 { 223 struct syscall_trace_enter *entry; 224 struct syscall_metadata *sys_data; 225 struct ring_buffer_event *event; 226 struct ring_buffer *buffer; 227 int size; 228 int syscall_nr; 229 230 syscall_nr = syscall_get_nr(current, regs); 231 if (syscall_nr < 0) 232 return; 233 if (!test_bit(syscall_nr, enabled_enter_syscalls)) 234 return; 235 236 sys_data = syscall_nr_to_meta(syscall_nr); 237 if (!sys_data) 238 return; 239 240 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; 241 242 event = trace_current_buffer_lock_reserve(&buffer, sys_data->enter_id, 243 size, 0, 0); 244 if (!event) 245 return; 246 247 entry = ring_buffer_event_data(event); 248 entry->nr = syscall_nr; 249 syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args); 250 251 if (!filter_current_check_discard(buffer, sys_data->enter_event, 252 entry, event)) 253 trace_current_buffer_unlock_commit(buffer, event, 0, 0); 254 } 255 256 void ftrace_syscall_exit(struct pt_regs *regs, long ret) 257 { 258 struct syscall_trace_exit *entry; 259 struct syscall_metadata *sys_data; 260 struct ring_buffer_event *event; 261 struct ring_buffer *buffer; 262 int syscall_nr; 263 264 syscall_nr = syscall_get_nr(current, regs); 265 if (syscall_nr < 0) 266 return; 267 if (!test_bit(syscall_nr, enabled_exit_syscalls)) 268 return; 269 270 sys_data = syscall_nr_to_meta(syscall_nr); 271 if (!sys_data) 272 return; 273 274 event = trace_current_buffer_lock_reserve(&buffer, sys_data->exit_id, 275 sizeof(*entry), 0, 0); 276 if (!event) 277 return; 278 279 entry = ring_buffer_event_data(event); 280 entry->nr = syscall_nr; 281 entry->ret = syscall_get_return_value(current, regs); 282 283 if (!filter_current_check_discard(buffer, sys_data->exit_event, 284 entry, event)) 285 trace_current_buffer_unlock_commit(buffer, event, 0, 0); 286 } 287 288 int reg_event_syscall_enter(void *ptr) 289 { 290 int ret = 0; 291 int num; 292 char *name; 293 294 name = (char *)ptr; 295 num = syscall_name_to_nr(name); 296 if (num < 0 || num >= NR_syscalls) 297 return -ENOSYS; 298 mutex_lock(&syscall_trace_lock); 299 if (!sys_refcount_enter) 300 ret = register_trace_sys_enter(ftrace_syscall_enter); 301 if (ret) { 302 pr_info("event trace: Could not activate" 303 "syscall entry trace point"); 304 } else { 305 set_bit(num, enabled_enter_syscalls); 306 sys_refcount_enter++; 307 } 308 mutex_unlock(&syscall_trace_lock); 309 return ret; 310 } 311 312 void unreg_event_syscall_enter(void *ptr) 313 { 314 int num; 315 char *name; 316 317 name = (char *)ptr; 318 num = syscall_name_to_nr(name); 319 if (num < 0 || num >= NR_syscalls) 320 return; 321 mutex_lock(&syscall_trace_lock); 322 sys_refcount_enter--; 323 clear_bit(num, enabled_enter_syscalls); 324 if (!sys_refcount_enter) 325 unregister_trace_sys_enter(ftrace_syscall_enter); 326 mutex_unlock(&syscall_trace_lock); 327 } 328 329 int reg_event_syscall_exit(void *ptr) 330 { 331 int ret = 0; 332 int num; 333 char *name; 334 335 name = (char *)ptr; 336 num = syscall_name_to_nr(name); 337 if (num < 0 || num >= NR_syscalls) 338 return -ENOSYS; 339 mutex_lock(&syscall_trace_lock); 340 if (!sys_refcount_exit) 341 ret = register_trace_sys_exit(ftrace_syscall_exit); 342 if (ret) { 343 pr_info("event trace: Could not activate" 344 "syscall exit trace point"); 345 } else { 346 set_bit(num, enabled_exit_syscalls); 347 sys_refcount_exit++; 348 } 349 mutex_unlock(&syscall_trace_lock); 350 return ret; 351 } 352 353 void unreg_event_syscall_exit(void *ptr) 354 { 355 int num; 356 char *name; 357 358 name = (char *)ptr; 359 num = syscall_name_to_nr(name); 360 if (num < 0 || num >= NR_syscalls) 361 return; 362 mutex_lock(&syscall_trace_lock); 363 sys_refcount_exit--; 364 clear_bit(num, enabled_exit_syscalls); 365 if (!sys_refcount_exit) 366 unregister_trace_sys_exit(ftrace_syscall_exit); 367 mutex_unlock(&syscall_trace_lock); 368 } 369 370 struct trace_event event_syscall_enter = { 371 .trace = print_syscall_enter, 372 }; 373 374 struct trace_event event_syscall_exit = { 375 .trace = print_syscall_exit, 376 }; 377 378 #ifdef CONFIG_EVENT_PROFILE 379 380 static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls); 381 static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls); 382 static int sys_prof_refcount_enter; 383 static int sys_prof_refcount_exit; 384 385 static void prof_syscall_enter(struct pt_regs *regs, long id) 386 { 387 struct syscall_metadata *sys_data; 388 struct syscall_trace_enter *rec; 389 unsigned long flags; 390 char *raw_data; 391 int syscall_nr; 392 int size; 393 int cpu; 394 395 syscall_nr = syscall_get_nr(current, regs); 396 if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) 397 return; 398 399 sys_data = syscall_nr_to_meta(syscall_nr); 400 if (!sys_data) 401 return; 402 403 /* get the size after alignment with the u32 buffer size field */ 404 size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec); 405 size = ALIGN(size + sizeof(u32), sizeof(u64)); 406 size -= sizeof(u32); 407 408 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 409 "profile buffer not large enough")) 410 return; 411 412 /* Protect the per cpu buffer, begin the rcu read side */ 413 local_irq_save(flags); 414 415 cpu = smp_processor_id(); 416 417 if (in_nmi()) 418 raw_data = rcu_dereference(trace_profile_buf_nmi); 419 else 420 raw_data = rcu_dereference(trace_profile_buf); 421 422 if (!raw_data) 423 goto end; 424 425 raw_data = per_cpu_ptr(raw_data, cpu); 426 427 /* zero the dead bytes from align to not leak stack to user */ 428 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; 429 430 rec = (struct syscall_trace_enter *) raw_data; 431 tracing_generic_entry_update(&rec->ent, 0, 0); 432 rec->ent.type = sys_data->enter_id; 433 rec->nr = syscall_nr; 434 syscall_get_arguments(current, regs, 0, sys_data->nb_args, 435 (unsigned long *)&rec->args); 436 perf_tp_event(sys_data->enter_id, 0, 1, rec, size); 437 438 end: 439 local_irq_restore(flags); 440 } 441 442 int reg_prof_syscall_enter(char *name) 443 { 444 int ret = 0; 445 int num; 446 447 num = syscall_name_to_nr(name); 448 if (num < 0 || num >= NR_syscalls) 449 return -ENOSYS; 450 451 mutex_lock(&syscall_trace_lock); 452 if (!sys_prof_refcount_enter) 453 ret = register_trace_sys_enter(prof_syscall_enter); 454 if (ret) { 455 pr_info("event trace: Could not activate" 456 "syscall entry trace point"); 457 } else { 458 set_bit(num, enabled_prof_enter_syscalls); 459 sys_prof_refcount_enter++; 460 } 461 mutex_unlock(&syscall_trace_lock); 462 return ret; 463 } 464 465 void unreg_prof_syscall_enter(char *name) 466 { 467 int num; 468 469 num = syscall_name_to_nr(name); 470 if (num < 0 || num >= NR_syscalls) 471 return; 472 473 mutex_lock(&syscall_trace_lock); 474 sys_prof_refcount_enter--; 475 clear_bit(num, enabled_prof_enter_syscalls); 476 if (!sys_prof_refcount_enter) 477 unregister_trace_sys_enter(prof_syscall_enter); 478 mutex_unlock(&syscall_trace_lock); 479 } 480 481 static void prof_syscall_exit(struct pt_regs *regs, long ret) 482 { 483 struct syscall_metadata *sys_data; 484 struct syscall_trace_exit *rec; 485 unsigned long flags; 486 int syscall_nr; 487 char *raw_data; 488 int size; 489 int cpu; 490 491 syscall_nr = syscall_get_nr(current, regs); 492 if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) 493 return; 494 495 sys_data = syscall_nr_to_meta(syscall_nr); 496 if (!sys_data) 497 return; 498 499 /* We can probably do that at build time */ 500 size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64)); 501 size -= sizeof(u32); 502 503 /* 504 * Impossible, but be paranoid with the future 505 * How to put this check outside runtime? 506 */ 507 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 508 "exit event has grown above profile buffer size")) 509 return; 510 511 /* Protect the per cpu buffer, begin the rcu read side */ 512 local_irq_save(flags); 513 cpu = smp_processor_id(); 514 515 if (in_nmi()) 516 raw_data = rcu_dereference(trace_profile_buf_nmi); 517 else 518 raw_data = rcu_dereference(trace_profile_buf); 519 520 if (!raw_data) 521 goto end; 522 523 raw_data = per_cpu_ptr(raw_data, cpu); 524 525 /* zero the dead bytes from align to not leak stack to user */ 526 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; 527 528 rec = (struct syscall_trace_exit *)raw_data; 529 530 tracing_generic_entry_update(&rec->ent, 0, 0); 531 rec->ent.type = sys_data->exit_id; 532 rec->nr = syscall_nr; 533 rec->ret = syscall_get_return_value(current, regs); 534 535 perf_tp_event(sys_data->exit_id, 0, 1, rec, size); 536 537 end: 538 local_irq_restore(flags); 539 } 540 541 int reg_prof_syscall_exit(char *name) 542 { 543 int ret = 0; 544 int num; 545 546 num = syscall_name_to_nr(name); 547 if (num < 0 || num >= NR_syscalls) 548 return -ENOSYS; 549 550 mutex_lock(&syscall_trace_lock); 551 if (!sys_prof_refcount_exit) 552 ret = register_trace_sys_exit(prof_syscall_exit); 553 if (ret) { 554 pr_info("event trace: Could not activate" 555 "syscall entry trace point"); 556 } else { 557 set_bit(num, enabled_prof_exit_syscalls); 558 sys_prof_refcount_exit++; 559 } 560 mutex_unlock(&syscall_trace_lock); 561 return ret; 562 } 563 564 void unreg_prof_syscall_exit(char *name) 565 { 566 int num; 567 568 num = syscall_name_to_nr(name); 569 if (num < 0 || num >= NR_syscalls) 570 return; 571 572 mutex_lock(&syscall_trace_lock); 573 sys_prof_refcount_exit--; 574 clear_bit(num, enabled_prof_exit_syscalls); 575 if (!sys_prof_refcount_exit) 576 unregister_trace_sys_exit(prof_syscall_exit); 577 mutex_unlock(&syscall_trace_lock); 578 } 579 580 #endif 581 582 583