1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2017, 2018, 2019, 2021 BMW Car IT GmbH 4 * Author: Viktor Rosendahl (viktor.rosendahl@bmw.de) 5 */ 6 7 #define _GNU_SOURCE 8 #define _POSIX_C_SOURCE 200809L 9 10 #include <ctype.h> 11 #include <stdbool.h> 12 #include <stdio.h> 13 #include <stdlib.h> 14 #include <string.h> 15 16 #include <err.h> 17 #include <errno.h> 18 #include <fcntl.h> 19 #include <getopt.h> 20 #include <sched.h> 21 #include <linux/unistd.h> 22 #include <signal.h> 23 #include <sys/inotify.h> 24 #include <unistd.h> 25 #include <pthread.h> 26 #include <tracefs.h> 27 28 static const char *prg_name; 29 static const char *prg_unknown = "unknown program name"; 30 31 static int fd_stdout; 32 33 static int sched_policy; 34 static bool sched_policy_set; 35 36 static int sched_pri; 37 static bool sched_pri_set; 38 39 static bool trace_enable = true; 40 static bool setup_ftrace = true; 41 static bool use_random_sleep; 42 43 #define TRACE_OPTS \ 44 C(FUNC_TR, "function-trace"), \ 45 C(DISP_GR, "display-graph"), \ 46 C(NR, NULL) 47 48 #undef C 49 #define C(a, b) OPTIDX_##a 50 51 enum traceopt { 52 TRACE_OPTS 53 }; 54 55 #undef C 56 #define C(a, b) b 57 58 static const char *const optstr[] = { 59 TRACE_OPTS 60 }; 61 62 enum errhandling { 63 ERR_EXIT = 0, 64 ERR_WARN, 65 ERR_CLEANUP, 66 }; 67 68 static bool use_options[OPTIDX_NR]; 69 70 static char inotify_buffer[655360]; 71 72 #define likely(x) __builtin_expect(!!(x), 1) 73 #define unlikely(x) __builtin_expect(!!(x), 0) 74 #define bool2str(x) (x ? "true":"false") 75 76 #define DEFAULT_NR_PRINTER_THREADS (3) 77 static unsigned int nr_threads = DEFAULT_NR_PRINTER_THREADS; 78 79 #define DEFAULT_TABLE_SIZE (2) 80 static unsigned int table_startsize = DEFAULT_TABLE_SIZE; 81 82 static int verbosity; 83 84 #define verbose_sizechange() (verbosity >= 1) 85 #define verbose_lostevent() (verbosity >= 2) 86 #define verbose_ftrace() (verbosity >= 1) 87 88 #define was_changed(ORIG, CUR) (strcmp(ORIG, CUR) != 0) 89 #define needs_change(CUR, WANTED) (strcmp(CUR, WANTED) != 0) 90 91 static const char *debug_tracefile; 92 static const char *debug_tracefile_dflt; 93 static const char *debug_maxlat; 94 static const char *debug_maxlat_dflt; 95 static const char * const DEBUG_NOFILE = "[file not found]"; 96 97 static const char * const TR_MAXLAT = "tracing_max_latency"; 98 static const char * const TR_THRESH = "tracing_thresh"; 99 static const char * const TR_CURRENT = "current_tracer"; 100 static const char * const TR_OPTIONS = "trace_options"; 101 102 static const char * const NOP_TRACER = "nop"; 103 104 static const char * const OPT_NO_PREFIX = "no"; 105 106 #define DFLT_THRESHOLD_US "0" 107 static const char *threshold = DFLT_THRESHOLD_US; 108 109 #define DEV_URANDOM "/dev/urandom" 110 #define RT_DEFAULT_PRI (99) 111 #define DEFAULT_PRI (0) 112 113 #define USEC_PER_MSEC (1000L) 114 #define NSEC_PER_USEC (1000L) 115 #define NSEC_PER_MSEC (USEC_PER_MSEC * NSEC_PER_USEC) 116 117 #define MSEC_PER_SEC (1000L) 118 #define USEC_PER_SEC (USEC_PER_MSEC * MSEC_PER_SEC) 119 #define NSEC_PER_SEC (NSEC_PER_MSEC * MSEC_PER_SEC) 120 121 #define SLEEP_TIME_MS_DEFAULT (1000L) 122 #define TRY_PRINTMUTEX_MS (1000) 123 124 static long sleep_time = (USEC_PER_MSEC * SLEEP_TIME_MS_DEFAULT); 125 126 static const char * const queue_full_warning = 127 "Could not queue trace for printing. It is likely that events happen faster\n" 128 "than what they can be printed. Probably partly because of random sleeping\n"; 129 130 static const char * const no_tracer_msg = 131 "Could not find any tracers! Running this program as root may help!\n"; 132 133 static const char * const no_latency_tr_msg = 134 "No latency tracers are supported by your kernel!\n"; 135 136 struct policy { 137 const char *name; 138 int policy; 139 int default_pri; 140 }; 141 142 static const struct policy policies[] = { 143 { "other", SCHED_OTHER, DEFAULT_PRI }, 144 { "batch", SCHED_BATCH, DEFAULT_PRI }, 145 { "idle", SCHED_IDLE, DEFAULT_PRI }, 146 { "rr", SCHED_RR, RT_DEFAULT_PRI }, 147 { "fifo", SCHED_FIFO, RT_DEFAULT_PRI }, 148 { NULL, 0, DEFAULT_PRI } 149 }; 150 151 /* 152 * The default tracer will be the first on this list that is supported by the 153 * currently running Linux kernel. 154 */ 155 static const char * const relevant_tracers[] = { 156 "preemptirqsoff", 157 "preemptoff", 158 "irqsoff", 159 "wakeup", 160 "wakeup_rt", 161 "wakeup_dl", 162 NULL 163 }; 164 165 /* This is the list of tracers for which random sleep makes sense */ 166 static const char * const random_tracers[] = { 167 "preemptirqsoff", 168 "preemptoff", 169 "irqsoff", 170 NULL 171 }; 172 173 static const char *current_tracer; 174 static bool force_tracer; 175 176 struct ftrace_state { 177 char *tracer; 178 char *thresh; 179 bool opt[OPTIDX_NR]; 180 bool opt_valid[OPTIDX_NR]; 181 pthread_mutex_t mutex; 182 }; 183 184 struct entry { 185 int ticket; 186 int ticket_completed_ref; 187 }; 188 189 struct print_state { 190 int ticket_counter; 191 int ticket_completed; 192 pthread_mutex_t mutex; 193 pthread_cond_t cond; 194 int cnt; 195 pthread_mutex_t cnt_mutex; 196 }; 197 198 struct short_msg { 199 char buf[160]; 200 int len; 201 }; 202 203 static struct print_state printstate; 204 static struct ftrace_state save_state; 205 volatile sig_atomic_t signal_flag; 206 207 #define PROB_TABLE_MAX_SIZE (1000) 208 209 int probabilities[PROB_TABLE_MAX_SIZE]; 210 211 struct sleep_table { 212 int *table; 213 int size; 214 pthread_mutex_t mutex; 215 }; 216 217 static struct sleep_table sleeptable; 218 219 #define QUEUE_SIZE (10) 220 221 struct queue { 222 struct entry entries[QUEUE_SIZE]; 223 int next_prod_idx; 224 int next_cons_idx; 225 pthread_mutex_t mutex; 226 pthread_cond_t cond; 227 }; 228 229 #define MAX_THREADS (40) 230 231 struct queue printqueue; 232 pthread_t printthread[MAX_THREADS]; 233 pthread_mutex_t print_mtx; 234 #define PRINT_BUFFER_SIZE (16 * 1024 * 1024) 235 236 static void cleanup_exit(int status); 237 static int set_trace_opt(const char *opt, bool value); 238 239 static __always_inline void *malloc_or_die(size_t size) 240 { 241 void *ptr = malloc(size); 242 243 if (unlikely(ptr == NULL)) { 244 warn("malloc() failed"); 245 cleanup_exit(EXIT_FAILURE); 246 } 247 return ptr; 248 } 249 250 static __always_inline void *malloc_or_die_nocleanup(size_t size) 251 { 252 void *ptr = malloc(size); 253 254 if (unlikely(ptr == NULL)) 255 err(0, "malloc() failed"); 256 return ptr; 257 } 258 259 static __always_inline void write_or_die(int fd, const char *buf, size_t count) 260 { 261 ssize_t r; 262 263 do { 264 r = write(fd, buf, count); 265 if (unlikely(r < 0)) { 266 if (errno == EINTR) 267 continue; 268 warn("write() failed"); 269 cleanup_exit(EXIT_FAILURE); 270 } 271 count -= r; 272 buf += r; 273 } while (count > 0); 274 } 275 276 static __always_inline void clock_gettime_or_die(clockid_t clk_id, 277 struct timespec *tp) 278 { 279 int r = clock_gettime(clk_id, tp); 280 281 if (unlikely(r != 0)) 282 err(EXIT_FAILURE, "clock_gettime() failed"); 283 } 284 285 static __always_inline void sigemptyset_or_die(sigset_t *s) 286 { 287 if (unlikely(sigemptyset(s) != 0)) { 288 warn("sigemptyset() failed"); 289 cleanup_exit(EXIT_FAILURE); 290 } 291 } 292 293 static __always_inline void sigaddset_or_die(sigset_t *s, int signum) 294 { 295 if (unlikely(sigaddset(s, signum) != 0)) { 296 warn("sigemptyset() failed"); 297 cleanup_exit(EXIT_FAILURE); 298 } 299 } 300 301 static __always_inline void sigaction_or_die(int signum, 302 const struct sigaction *act, 303 struct sigaction *oldact) 304 { 305 if (unlikely(sigaction(signum, act, oldact) != 0)) { 306 warn("sigaction() failed"); 307 cleanup_exit(EXIT_FAILURE); 308 } 309 } 310 311 static void open_stdout(void) 312 { 313 if (setvbuf(stdout, NULL, _IONBF, 0) != 0) 314 err(EXIT_FAILURE, "setvbuf() failed"); 315 fd_stdout = fileno(stdout); 316 if (fd_stdout < 0) 317 err(EXIT_FAILURE, "fileno() failed"); 318 } 319 320 /* 321 * It's not worth it to call cleanup_exit() from mutex functions because 322 * cleanup_exit() uses mutexes. 323 */ 324 static __always_inline void mutex_lock(pthread_mutex_t *mtx) 325 { 326 errno = pthread_mutex_lock(mtx); 327 if (unlikely(errno)) 328 err(EXIT_FAILURE, "pthread_mutex_lock() failed"); 329 } 330 331 332 static __always_inline void mutex_unlock(pthread_mutex_t *mtx) 333 { 334 errno = pthread_mutex_unlock(mtx); 335 if (unlikely(errno)) 336 err(EXIT_FAILURE, "pthread_mutex_unlock() failed"); 337 } 338 339 static __always_inline void cond_signal(pthread_cond_t *cond) 340 { 341 errno = pthread_cond_signal(cond); 342 if (unlikely(errno)) 343 err(EXIT_FAILURE, "pthread_cond_signal() failed"); 344 } 345 346 static __always_inline void cond_wait(pthread_cond_t *restrict cond, 347 pthread_mutex_t *restrict mutex) 348 { 349 errno = pthread_cond_wait(cond, mutex); 350 if (unlikely(errno)) 351 err(EXIT_FAILURE, "pthread_cond_wait() failed"); 352 } 353 354 static __always_inline void cond_broadcast(pthread_cond_t *cond) 355 { 356 errno = pthread_cond_broadcast(cond); 357 if (unlikely(errno)) 358 err(EXIT_FAILURE, "pthread_cond_broadcast() failed"); 359 } 360 361 static __always_inline void 362 mutex_init(pthread_mutex_t *mutex, 363 const pthread_mutexattr_t *attr) 364 { 365 errno = pthread_mutex_init(mutex, attr); 366 if (errno) 367 err(EXIT_FAILURE, "pthread_mutex_init() failed"); 368 } 369 370 static __always_inline void mutexattr_init(pthread_mutexattr_t *attr) 371 { 372 errno = pthread_mutexattr_init(attr); 373 if (errno) 374 err(EXIT_FAILURE, "pthread_mutexattr_init() failed"); 375 } 376 377 static __always_inline void mutexattr_destroy(pthread_mutexattr_t *attr) 378 { 379 errno = pthread_mutexattr_destroy(attr); 380 if (errno) 381 err(EXIT_FAILURE, "pthread_mutexattr_destroy() failed"); 382 } 383 384 static __always_inline void mutexattr_settype(pthread_mutexattr_t *attr, 385 int type) 386 { 387 errno = pthread_mutexattr_settype(attr, type); 388 if (errno) 389 err(EXIT_FAILURE, "pthread_mutexattr_settype() failed"); 390 } 391 392 static __always_inline void condattr_init(pthread_condattr_t *attr) 393 { 394 errno = pthread_condattr_init(attr); 395 if (errno) 396 err(EXIT_FAILURE, "pthread_condattr_init() failed"); 397 } 398 399 static __always_inline void condattr_destroy(pthread_condattr_t *attr) 400 { 401 errno = pthread_condattr_destroy(attr); 402 if (errno) 403 err(EXIT_FAILURE, "pthread_condattr_destroy() failed"); 404 } 405 406 static __always_inline void condattr_setclock(pthread_condattr_t *attr, 407 clockid_t clock_id) 408 { 409 errno = pthread_condattr_setclock(attr, clock_id); 410 if (unlikely(errno)) 411 err(EXIT_FAILURE, "pthread_condattr_setclock() failed"); 412 } 413 414 static __always_inline void cond_init(pthread_cond_t *cond, 415 const pthread_condattr_t *attr) 416 { 417 errno = pthread_cond_init(cond, attr); 418 if (errno) 419 err(EXIT_FAILURE, "pthread_cond_init() failed"); 420 } 421 422 static __always_inline int 423 cond_timedwait(pthread_cond_t *restrict cond, 424 pthread_mutex_t *restrict mutex, 425 const struct timespec *restrict abstime) 426 { 427 errno = pthread_cond_timedwait(cond, mutex, abstime); 428 if (errno && errno != ETIMEDOUT) 429 err(EXIT_FAILURE, "pthread_cond_timedwait() failed"); 430 return errno; 431 } 432 433 static void init_printstate(void) 434 { 435 pthread_condattr_t cattr; 436 437 printstate.ticket_counter = 0; 438 printstate.ticket_completed = 0; 439 printstate.cnt = 0; 440 441 mutex_init(&printstate.mutex, NULL); 442 443 condattr_init(&cattr); 444 condattr_setclock(&cattr, CLOCK_MONOTONIC); 445 cond_init(&printstate.cond, &cattr); 446 condattr_destroy(&cattr); 447 } 448 449 static void init_print_mtx(void) 450 { 451 pthread_mutexattr_t mattr; 452 453 mutexattr_init(&mattr); 454 mutexattr_settype(&mattr, PTHREAD_MUTEX_RECURSIVE); 455 mutex_init(&print_mtx, &mattr); 456 mutexattr_destroy(&mattr); 457 458 } 459 460 static void signal_blocking(int how) 461 { 462 sigset_t s; 463 464 sigemptyset_or_die(&s); 465 sigaddset_or_die(&s, SIGHUP); 466 sigaddset_or_die(&s, SIGTERM); 467 sigaddset_or_die(&s, SIGINT); 468 469 errno = pthread_sigmask(how, &s, NULL); 470 if (unlikely(errno)) { 471 warn("pthread_sigmask() failed"); 472 cleanup_exit(EXIT_FAILURE); 473 } 474 } 475 476 static void signal_handler(int num) 477 { 478 signal_flag = num; 479 } 480 481 static void setup_sig_handler(void) 482 { 483 struct sigaction sa; 484 485 memset(&sa, 0, sizeof(sa)); 486 sa.sa_handler = signal_handler; 487 488 sigaction_or_die(SIGHUP, &sa, NULL); 489 sigaction_or_die(SIGTERM, &sa, NULL); 490 sigaction_or_die(SIGINT, &sa, NULL); 491 } 492 493 static void process_signal(int signal) 494 { 495 char *name; 496 497 name = strsignal(signal); 498 if (name == NULL) 499 printf("Received signal %d\n", signal); 500 else 501 printf("Received signal %d (%s)\n", signal, name); 502 cleanup_exit(EXIT_SUCCESS); 503 } 504 505 static __always_inline void check_signals(void) 506 { 507 int signal = signal_flag; 508 509 if (unlikely(signal)) 510 process_signal(signal); 511 } 512 513 static __always_inline void get_time_in_future(struct timespec *future, 514 long time_us) 515 { 516 long nsec; 517 518 clock_gettime_or_die(CLOCK_MONOTONIC, future); 519 future->tv_sec += time_us / USEC_PER_SEC; 520 nsec = future->tv_nsec + (time_us * NSEC_PER_USEC) % NSEC_PER_SEC; 521 if (nsec >= NSEC_PER_SEC) { 522 future->tv_nsec = nsec % NSEC_PER_SEC; 523 future->tv_sec += 1; 524 } 525 } 526 527 static __always_inline bool time_has_passed(const struct timespec *time) 528 { 529 struct timespec now; 530 531 clock_gettime_or_die(CLOCK_MONOTONIC, &now); 532 if (now.tv_sec > time->tv_sec) 533 return true; 534 if (now.tv_sec < time->tv_sec) 535 return false; 536 return (now.tv_nsec >= time->tv_nsec); 537 } 538 539 static bool mutex_trylock_limit(pthread_mutex_t *mutex, int time_ms) 540 { 541 long time_us = time_ms * USEC_PER_MSEC; 542 struct timespec limit; 543 544 get_time_in_future(&limit, time_us); 545 do { 546 errno = pthread_mutex_trylock(mutex); 547 if (errno && errno != EBUSY) 548 err(EXIT_FAILURE, "pthread_mutex_trylock() failed"); 549 } while (errno && !time_has_passed(&limit)); 550 return errno == 0; 551 } 552 553 static void restore_trace_opts(const struct ftrace_state *state, 554 const bool *cur) 555 { 556 int i; 557 int r; 558 559 for (i = 0; i < OPTIDX_NR; i++) 560 if (state->opt_valid[i] && state->opt[i] != cur[i]) { 561 r = set_trace_opt(optstr[i], state->opt[i]); 562 if (r < 0) 563 warnx("Failed to restore the %s option to %s", 564 optstr[i], bool2str(state->opt[i])); 565 else if (verbose_ftrace()) 566 printf("Restored the %s option in %s to %s\n", 567 optstr[i], TR_OPTIONS, 568 bool2str(state->opt[i])); 569 } 570 } 571 572 static char *read_file(const char *file, enum errhandling h) 573 { 574 int psize; 575 char *r; 576 static const char *emsg = "Failed to read the %s file"; 577 578 r = tracefs_instance_file_read(NULL, file, &psize); 579 if (!r) { 580 if (h) { 581 warn(emsg, file); 582 if (h == ERR_CLEANUP) 583 cleanup_exit(EXIT_FAILURE); 584 } else 585 errx(EXIT_FAILURE, emsg, file); 586 } 587 588 if (r && r[psize - 1] == '\n') 589 r[psize - 1] = '\0'; 590 return r; 591 } 592 593 static void restore_file(const char *file, char **saved, const char *cur) 594 { 595 if (*saved && was_changed(*saved, cur)) { 596 if (tracefs_instance_file_write(NULL, file, *saved) < 0) 597 warnx("Failed to restore %s to %s!", file, *saved); 598 else if (verbose_ftrace()) 599 printf("Restored %s to %s\n", file, *saved); 600 free(*saved); 601 *saved = NULL; 602 } 603 } 604 605 static void restore_ftrace(void) 606 { 607 mutex_lock(&save_state.mutex); 608 609 restore_file(TR_CURRENT, &save_state.tracer, current_tracer); 610 restore_file(TR_THRESH, &save_state.thresh, threshold); 611 restore_trace_opts(&save_state, use_options); 612 613 mutex_unlock(&save_state.mutex); 614 } 615 616 static void cleanup_exit(int status) 617 { 618 char *maxlat; 619 620 if (!setup_ftrace) 621 exit(status); 622 623 /* 624 * We try the print_mtx for 1 sec in order to avoid garbled 625 * output if possible, but if it cannot be obtained we proceed anyway. 626 */ 627 mutex_trylock_limit(&print_mtx, TRY_PRINTMUTEX_MS); 628 629 maxlat = read_file(TR_MAXLAT, ERR_WARN); 630 if (maxlat) { 631 printf("The maximum detected latency was: %sus\n", maxlat); 632 free(maxlat); 633 } 634 635 restore_ftrace(); 636 /* 637 * We do not need to unlock the print_mtx here because we will exit at 638 * the end of this function. Unlocking print_mtx causes problems if a 639 * print thread happens to be waiting for the mutex because we have 640 * just changed the ftrace settings to the original and thus the 641 * print thread would output incorrect data from ftrace. 642 */ 643 exit(status); 644 } 645 646 static void init_save_state(void) 647 { 648 pthread_mutexattr_t mattr; 649 650 mutexattr_init(&mattr); 651 mutexattr_settype(&mattr, PTHREAD_MUTEX_RECURSIVE); 652 mutex_init(&save_state.mutex, &mattr); 653 mutexattr_destroy(&mattr); 654 655 save_state.tracer = NULL; 656 save_state.thresh = NULL; 657 save_state.opt_valid[OPTIDX_FUNC_TR] = false; 658 save_state.opt_valid[OPTIDX_DISP_GR] = false; 659 } 660 661 static int printstate_next_ticket(struct entry *req) 662 { 663 int r; 664 665 r = ++(printstate.ticket_counter); 666 req->ticket = r; 667 req->ticket_completed_ref = printstate.ticket_completed; 668 cond_broadcast(&printstate.cond); 669 return r; 670 } 671 672 static __always_inline 673 void printstate_mark_req_completed(const struct entry *req) 674 { 675 if (req->ticket > printstate.ticket_completed) 676 printstate.ticket_completed = req->ticket; 677 } 678 679 static __always_inline 680 bool printstate_has_new_req_arrived(const struct entry *req) 681 { 682 return (printstate.ticket_counter != req->ticket); 683 } 684 685 static __always_inline int printstate_cnt_inc(void) 686 { 687 int value; 688 689 mutex_lock(&printstate.cnt_mutex); 690 value = ++printstate.cnt; 691 mutex_unlock(&printstate.cnt_mutex); 692 return value; 693 } 694 695 static __always_inline int printstate_cnt_dec(void) 696 { 697 int value; 698 699 mutex_lock(&printstate.cnt_mutex); 700 value = --printstate.cnt; 701 mutex_unlock(&printstate.cnt_mutex); 702 return value; 703 } 704 705 static __always_inline int printstate_cnt_read(void) 706 { 707 int value; 708 709 mutex_lock(&printstate.cnt_mutex); 710 value = printstate.cnt; 711 mutex_unlock(&printstate.cnt_mutex); 712 return value; 713 } 714 715 static __always_inline 716 bool prev_req_won_race(const struct entry *req) 717 { 718 return (printstate.ticket_completed != req->ticket_completed_ref); 719 } 720 721 static void sleeptable_resize(int size, bool printout, struct short_msg *msg) 722 { 723 int bytes; 724 725 if (printout) { 726 msg->len = 0; 727 if (unlikely(size > PROB_TABLE_MAX_SIZE)) 728 bytes = snprintf(msg->buf, sizeof(msg->buf), 729 "Cannot increase probability table to %d (maximum size reached)\n", size); 730 else 731 bytes = snprintf(msg->buf, sizeof(msg->buf), 732 "Increasing probability table to %d\n", size); 733 if (bytes < 0) 734 warn("snprintf() failed"); 735 else 736 msg->len = bytes; 737 } 738 739 if (unlikely(size < 0)) { 740 /* Should never happen */ 741 warnx("Bad program state at %s:%d", __FILE__, __LINE__); 742 cleanup_exit(EXIT_FAILURE); 743 return; 744 } 745 sleeptable.size = size; 746 sleeptable.table = &probabilities[PROB_TABLE_MAX_SIZE - size]; 747 } 748 749 static void init_probabilities(void) 750 { 751 int i; 752 int j = 1000; 753 754 for (i = 0; i < PROB_TABLE_MAX_SIZE; i++) { 755 probabilities[i] = 1000 / j; 756 j--; 757 } 758 mutex_init(&sleeptable.mutex, NULL); 759 } 760 761 static int table_get_probability(const struct entry *req, 762 struct short_msg *msg) 763 { 764 int diff = req->ticket - req->ticket_completed_ref; 765 int rval = 0; 766 767 msg->len = 0; 768 diff--; 769 /* Should never happen...*/ 770 if (unlikely(diff < 0)) { 771 warnx("Programmer assumption error at %s:%d\n", __FILE__, 772 __LINE__); 773 cleanup_exit(EXIT_FAILURE); 774 } 775 mutex_lock(&sleeptable.mutex); 776 if (diff >= (sleeptable.size - 1)) { 777 rval = sleeptable.table[sleeptable.size - 1]; 778 sleeptable_resize(sleeptable.size + 1, verbose_sizechange(), 779 msg); 780 } else { 781 rval = sleeptable.table[diff]; 782 } 783 mutex_unlock(&sleeptable.mutex); 784 return rval; 785 } 786 787 static void init_queue(struct queue *q) 788 { 789 q->next_prod_idx = 0; 790 q->next_cons_idx = 0; 791 mutex_init(&q->mutex, NULL); 792 errno = pthread_cond_init(&q->cond, NULL); 793 if (errno) 794 err(EXIT_FAILURE, "pthread_cond_init() failed"); 795 } 796 797 static __always_inline int queue_len(const struct queue *q) 798 { 799 if (q->next_prod_idx >= q->next_cons_idx) 800 return q->next_prod_idx - q->next_cons_idx; 801 else 802 return QUEUE_SIZE - q->next_cons_idx + q->next_prod_idx; 803 } 804 805 static __always_inline int queue_nr_free(const struct queue *q) 806 { 807 int nr_free = QUEUE_SIZE - queue_len(q); 808 809 /* 810 * If there is only one slot left we will anyway lie and claim that the 811 * queue is full because adding an element will make it appear empty 812 */ 813 if (nr_free == 1) 814 nr_free = 0; 815 return nr_free; 816 } 817 818 static __always_inline void queue_idx_inc(int *idx) 819 { 820 *idx = (*idx + 1) % QUEUE_SIZE; 821 } 822 823 static __always_inline void queue_push_to_back(struct queue *q, 824 const struct entry *e) 825 { 826 q->entries[q->next_prod_idx] = *e; 827 queue_idx_inc(&q->next_prod_idx); 828 } 829 830 static __always_inline struct entry queue_pop_from_front(struct queue *q) 831 { 832 struct entry e = q->entries[q->next_cons_idx]; 833 834 queue_idx_inc(&q->next_cons_idx); 835 return e; 836 } 837 838 static __always_inline void queue_cond_signal(struct queue *q) 839 { 840 cond_signal(&q->cond); 841 } 842 843 static __always_inline void queue_cond_wait(struct queue *q) 844 { 845 cond_wait(&q->cond, &q->mutex); 846 } 847 848 static __always_inline int queue_try_to_add_entry(struct queue *q, 849 const struct entry *e) 850 { 851 int r = 0; 852 853 mutex_lock(&q->mutex); 854 if (queue_nr_free(q) > 0) { 855 queue_push_to_back(q, e); 856 cond_signal(&q->cond); 857 } else 858 r = -1; 859 mutex_unlock(&q->mutex); 860 return r; 861 } 862 863 static struct entry queue_wait_for_entry(struct queue *q) 864 { 865 struct entry e; 866 867 mutex_lock(&q->mutex); 868 while (true) { 869 if (queue_len(&printqueue) > 0) { 870 e = queue_pop_from_front(q); 871 break; 872 } 873 queue_cond_wait(q); 874 } 875 mutex_unlock(&q->mutex); 876 877 return e; 878 } 879 880 static const struct policy *policy_from_name(const char *name) 881 { 882 const struct policy *p = &policies[0]; 883 884 while (p->name != NULL) { 885 if (!strcmp(name, p->name)) 886 return p; 887 p++; 888 } 889 return NULL; 890 } 891 892 static const char *policy_name(int policy) 893 { 894 const struct policy *p = &policies[0]; 895 static const char *rval = "unknown"; 896 897 while (p->name != NULL) { 898 if (p->policy == policy) 899 return p->name; 900 p++; 901 } 902 return rval; 903 } 904 905 static bool is_relevant_tracer(const char *name) 906 { 907 unsigned int i; 908 909 for (i = 0; relevant_tracers[i]; i++) 910 if (!strcmp(name, relevant_tracers[i])) 911 return true; 912 return false; 913 } 914 915 static bool random_makes_sense(const char *name) 916 { 917 unsigned int i; 918 919 for (i = 0; random_tracers[i]; i++) 920 if (!strcmp(name, random_tracers[i])) 921 return true; 922 return false; 923 } 924 925 static void show_available(void) 926 { 927 char **tracers; 928 int found = 0; 929 int i; 930 931 tracers = tracefs_tracers(NULL); 932 for (i = 0; tracers && tracers[i]; i++) { 933 if (is_relevant_tracer(tracers[i])) 934 found++; 935 } 936 937 if (!tracers) { 938 warnx(no_tracer_msg); 939 return; 940 } 941 942 if (!found) { 943 warnx(no_latency_tr_msg); 944 tracefs_list_free(tracers); 945 return; 946 } 947 948 printf("The following latency tracers are available on your system:\n"); 949 for (i = 0; tracers[i]; i++) { 950 if (is_relevant_tracer(tracers[i])) 951 printf("%s\n", tracers[i]); 952 } 953 tracefs_list_free(tracers); 954 } 955 956 static bool tracer_valid(const char *name, bool *notracer) 957 { 958 char **tracers; 959 int i; 960 bool rval = false; 961 962 *notracer = false; 963 tracers = tracefs_tracers(NULL); 964 if (!tracers) { 965 *notracer = true; 966 return false; 967 } 968 for (i = 0; tracers[i]; i++) 969 if (!strcmp(tracers[i], name)) { 970 rval = true; 971 break; 972 } 973 tracefs_list_free(tracers); 974 return rval; 975 } 976 977 static const char *find_default_tracer(void) 978 { 979 int i; 980 bool notracer; 981 bool valid; 982 983 for (i = 0; relevant_tracers[i]; i++) { 984 valid = tracer_valid(relevant_tracers[i], ¬racer); 985 if (notracer) 986 errx(EXIT_FAILURE, no_tracer_msg); 987 if (valid) 988 return relevant_tracers[i]; 989 } 990 return NULL; 991 } 992 993 static bool toss_coin(struct drand48_data *buffer, unsigned int prob) 994 { 995 long r; 996 997 if (unlikely(lrand48_r(buffer, &r))) { 998 warnx("lrand48_r() failed"); 999 cleanup_exit(EXIT_FAILURE); 1000 } 1001 r = r % 1000L; 1002 if (r < prob) 1003 return true; 1004 else 1005 return false; 1006 } 1007 1008 1009 static long go_to_sleep(const struct entry *req) 1010 { 1011 struct timespec future; 1012 long delay = sleep_time; 1013 1014 get_time_in_future(&future, delay); 1015 1016 mutex_lock(&printstate.mutex); 1017 while (!printstate_has_new_req_arrived(req)) { 1018 cond_timedwait(&printstate.cond, &printstate.mutex, &future); 1019 if (time_has_passed(&future)) 1020 break; 1021 } 1022 1023 if (printstate_has_new_req_arrived(req)) 1024 delay = -1; 1025 mutex_unlock(&printstate.mutex); 1026 1027 return delay; 1028 } 1029 1030 1031 static void set_priority(void) 1032 { 1033 int r; 1034 pid_t pid; 1035 struct sched_param param; 1036 1037 memset(¶m, 0, sizeof(param)); 1038 param.sched_priority = sched_pri; 1039 1040 pid = getpid(); 1041 r = sched_setscheduler(pid, sched_policy, ¶m); 1042 1043 if (r != 0) 1044 err(EXIT_FAILURE, "sched_setscheduler() failed"); 1045 } 1046 1047 pid_t latency_collector_gettid(void) 1048 { 1049 return (pid_t) syscall(__NR_gettid); 1050 } 1051 1052 static void print_priority(void) 1053 { 1054 pid_t tid; 1055 int policy; 1056 int r; 1057 struct sched_param param; 1058 1059 tid = latency_collector_gettid(); 1060 r = pthread_getschedparam(pthread_self(), &policy, ¶m); 1061 if (r != 0) { 1062 warn("pthread_getschedparam() failed"); 1063 cleanup_exit(EXIT_FAILURE); 1064 } 1065 mutex_lock(&print_mtx); 1066 printf("Thread %d runs with scheduling policy %s and priority %d\n", 1067 tid, policy_name(policy), param.sched_priority); 1068 mutex_unlock(&print_mtx); 1069 } 1070 1071 static __always_inline 1072 void __print_skipmessage(const struct short_msg *resize_msg, 1073 const struct timespec *timestamp, char *buffer, 1074 size_t bufspace, const struct entry *req, bool excuse, 1075 const char *str) 1076 { 1077 ssize_t bytes = 0; 1078 char *p = &buffer[0]; 1079 long us, sec; 1080 int r; 1081 1082 sec = timestamp->tv_sec; 1083 us = timestamp->tv_nsec / 1000; 1084 1085 if (resize_msg != NULL && resize_msg->len > 0) { 1086 strncpy(p, resize_msg->buf, resize_msg->len); 1087 bytes += resize_msg->len; 1088 p += resize_msg->len; 1089 bufspace -= resize_msg->len; 1090 } 1091 1092 if (excuse) 1093 r = snprintf(p, bufspace, 1094 "%ld.%06ld Latency %d printout skipped due to %s\n", 1095 sec, us, req->ticket, str); 1096 else 1097 r = snprintf(p, bufspace, "%ld.%06ld Latency %d detected\n", 1098 sec, us, req->ticket); 1099 1100 if (r < 0) 1101 warn("snprintf() failed"); 1102 else 1103 bytes += r; 1104 1105 /* These prints could happen concurrently */ 1106 mutex_lock(&print_mtx); 1107 write_or_die(fd_stdout, buffer, bytes); 1108 mutex_unlock(&print_mtx); 1109 } 1110 1111 static void print_skipmessage(const struct short_msg *resize_msg, 1112 const struct timespec *timestamp, char *buffer, 1113 size_t bufspace, const struct entry *req, 1114 bool excuse) 1115 { 1116 __print_skipmessage(resize_msg, timestamp, buffer, bufspace, req, 1117 excuse, "random delay"); 1118 } 1119 1120 static void print_lostmessage(const struct timespec *timestamp, char *buffer, 1121 size_t bufspace, const struct entry *req, 1122 const char *reason) 1123 { 1124 __print_skipmessage(NULL, timestamp, buffer, bufspace, req, true, 1125 reason); 1126 } 1127 1128 static void print_tracefile(const struct short_msg *resize_msg, 1129 const struct timespec *timestamp, char *buffer, 1130 size_t bufspace, long slept, 1131 const struct entry *req) 1132 { 1133 static const int reserve = 256; 1134 char *p = &buffer[0]; 1135 ssize_t bytes = 0; 1136 ssize_t bytes_tot = 0; 1137 long us, sec; 1138 long slept_ms; 1139 int trace_fd; 1140 1141 /* Save some space for the final string and final null char */ 1142 bufspace = bufspace - reserve - 1; 1143 1144 if (resize_msg != NULL && resize_msg->len > 0) { 1145 bytes = resize_msg->len; 1146 strncpy(p, resize_msg->buf, bytes); 1147 bytes_tot += bytes; 1148 p += bytes; 1149 bufspace -= bytes; 1150 } 1151 1152 trace_fd = open(debug_tracefile, O_RDONLY); 1153 1154 if (trace_fd < 0) { 1155 warn("open() failed on %s", debug_tracefile); 1156 return; 1157 } 1158 1159 sec = timestamp->tv_sec; 1160 us = timestamp->tv_nsec / 1000; 1161 1162 if (slept != 0) { 1163 slept_ms = slept / 1000; 1164 bytes = snprintf(p, bufspace, 1165 "%ld.%06ld Latency %d randomly sleep for %ld ms before print\n", 1166 sec, us, req->ticket, slept_ms); 1167 } else { 1168 bytes = snprintf(p, bufspace, 1169 "%ld.%06ld Latency %d immediate print\n", sec, 1170 us, req->ticket); 1171 } 1172 1173 if (bytes < 0) { 1174 warn("snprintf() failed"); 1175 return; 1176 } 1177 p += bytes; 1178 bufspace -= bytes; 1179 bytes_tot += bytes; 1180 1181 bytes = snprintf(p, bufspace, 1182 ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> BEGIN <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n" 1183 ); 1184 1185 if (bytes < 0) { 1186 warn("snprintf() failed"); 1187 return; 1188 } 1189 1190 p += bytes; 1191 bufspace -= bytes; 1192 bytes_tot += bytes; 1193 1194 do { 1195 bytes = read(trace_fd, p, bufspace); 1196 if (bytes < 0) { 1197 if (errno == EINTR) 1198 continue; 1199 warn("read() failed on %s", debug_tracefile); 1200 if (unlikely(close(trace_fd) != 0)) 1201 warn("close() failed on %s", debug_tracefile); 1202 return; 1203 } 1204 if (bytes == 0) 1205 break; 1206 p += bytes; 1207 bufspace -= bytes; 1208 bytes_tot += bytes; 1209 } while (true); 1210 1211 if (unlikely(close(trace_fd) != 0)) 1212 warn("close() failed on %s", debug_tracefile); 1213 1214 printstate_cnt_dec(); 1215 /* Add the reserve space back to the budget for the final string */ 1216 bufspace += reserve; 1217 1218 bytes = snprintf(p, bufspace, 1219 ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> END <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n\n"); 1220 1221 if (bytes < 0) { 1222 warn("snprintf() failed"); 1223 return; 1224 } 1225 1226 bytes_tot += bytes; 1227 1228 /* These prints could happen concurrently */ 1229 mutex_lock(&print_mtx); 1230 write_or_die(fd_stdout, buffer, bytes_tot); 1231 mutex_unlock(&print_mtx); 1232 } 1233 1234 static char *get_no_opt(const char *opt) 1235 { 1236 char *no_opt; 1237 int s; 1238 1239 s = strlen(opt) + strlen(OPT_NO_PREFIX) + 1; 1240 /* We may be called from cleanup_exit() via set_trace_opt() */ 1241 no_opt = malloc_or_die_nocleanup(s); 1242 strcpy(no_opt, OPT_NO_PREFIX); 1243 strcat(no_opt, opt); 1244 return no_opt; 1245 } 1246 1247 static char *find_next_optstr(const char *allopt, const char **next) 1248 { 1249 const char *begin; 1250 const char *end; 1251 char *r; 1252 int s = 0; 1253 1254 if (allopt == NULL) 1255 return NULL; 1256 1257 for (begin = allopt; *begin != '\0'; begin++) { 1258 if (isgraph(*begin)) 1259 break; 1260 } 1261 1262 if (*begin == '\0') 1263 return NULL; 1264 1265 for (end = begin; *end != '\0' && isgraph(*end); end++) 1266 s++; 1267 1268 r = malloc_or_die_nocleanup(s + 1); 1269 strncpy(r, begin, s); 1270 r[s] = '\0'; 1271 *next = begin + s; 1272 return r; 1273 } 1274 1275 static bool get_trace_opt(const char *allopt, const char *opt, bool *found) 1276 { 1277 *found = false; 1278 char *no_opt; 1279 char *str; 1280 const char *next = allopt; 1281 bool rval = false; 1282 1283 no_opt = get_no_opt(opt); 1284 1285 do { 1286 str = find_next_optstr(next, &next); 1287 if (str == NULL) 1288 break; 1289 if (!strcmp(str, opt)) { 1290 *found = true; 1291 rval = true; 1292 free(str); 1293 break; 1294 } 1295 if (!strcmp(str, no_opt)) { 1296 *found = true; 1297 rval = false; 1298 free(str); 1299 break; 1300 } 1301 free(str); 1302 } while (true); 1303 free(no_opt); 1304 1305 return rval; 1306 } 1307 1308 static int set_trace_opt(const char *opt, bool value) 1309 { 1310 char *str; 1311 int r; 1312 1313 if (value) 1314 str = strdup(opt); 1315 else 1316 str = get_no_opt(opt); 1317 1318 r = tracefs_instance_file_write(NULL, TR_OPTIONS, str); 1319 free(str); 1320 return r; 1321 } 1322 1323 void save_trace_opts(struct ftrace_state *state) 1324 { 1325 char *allopt; 1326 int psize; 1327 int i; 1328 1329 allopt = tracefs_instance_file_read(NULL, TR_OPTIONS, &psize); 1330 if (!allopt) 1331 errx(EXIT_FAILURE, "Failed to read the %s file\n", TR_OPTIONS); 1332 1333 for (i = 0; i < OPTIDX_NR; i++) 1334 state->opt[i] = get_trace_opt(allopt, optstr[i], 1335 &state->opt_valid[i]); 1336 1337 free(allopt); 1338 } 1339 1340 static void write_file(const char *file, const char *cur, const char *new, 1341 enum errhandling h) 1342 { 1343 int r; 1344 static const char *emsg = "Failed to write to the %s file!"; 1345 1346 /* Do nothing if we now that the current and new value are equal */ 1347 if (cur && !needs_change(cur, new)) 1348 return; 1349 1350 r = tracefs_instance_file_write(NULL, file, new); 1351 if (r < 0) { 1352 if (h) { 1353 warnx(emsg, file); 1354 if (h == ERR_CLEANUP) 1355 cleanup_exit(EXIT_FAILURE); 1356 } else 1357 errx(EXIT_FAILURE, emsg, file); 1358 } 1359 if (verbose_ftrace()) { 1360 mutex_lock(&print_mtx); 1361 printf("%s was set to %s\n", file, new); 1362 mutex_unlock(&print_mtx); 1363 } 1364 } 1365 1366 static void reset_max_latency(void) 1367 { 1368 write_file(TR_MAXLAT, NULL, "0", ERR_CLEANUP); 1369 } 1370 1371 static void save_and_disable_tracer(void) 1372 { 1373 char *orig_th; 1374 char *tracer; 1375 bool need_nop = false; 1376 1377 mutex_lock(&save_state.mutex); 1378 1379 save_trace_opts(&save_state); 1380 tracer = read_file(TR_CURRENT, ERR_EXIT); 1381 orig_th = read_file(TR_THRESH, ERR_EXIT); 1382 1383 if (needs_change(tracer, NOP_TRACER)) { 1384 mutex_lock(&print_mtx); 1385 if (force_tracer) { 1386 printf( 1387 "The %s tracer is already in use but proceeding anyway!\n", 1388 tracer); 1389 } else { 1390 printf( 1391 "The %s tracer is already in use, cowardly bailing out!\n" 1392 "This could indicate that another program or instance is tracing.\n" 1393 "Use the -F [--force] option to disregard the current tracer.\n", tracer); 1394 exit(0); 1395 } 1396 mutex_unlock(&print_mtx); 1397 need_nop = true; 1398 } 1399 1400 save_state.tracer = tracer; 1401 save_state.thresh = orig_th; 1402 1403 if (need_nop) 1404 write_file(TR_CURRENT, NULL, NOP_TRACER, ERR_EXIT); 1405 1406 mutex_unlock(&save_state.mutex); 1407 } 1408 1409 void set_trace_opts(struct ftrace_state *state, bool *new) 1410 { 1411 int i; 1412 int r; 1413 1414 /* 1415 * We only set options if we earlier detected that the option exists in 1416 * the trace_options file and that the wanted setting is different from 1417 * the one we saw in save_and_disable_tracer() 1418 */ 1419 for (i = 0; i < OPTIDX_NR; i++) 1420 if (state->opt_valid[i] && 1421 state->opt[i] != new[i]) { 1422 r = set_trace_opt(optstr[i], new[i]); 1423 if (r < 0) { 1424 warnx("Failed to set the %s option to %s", 1425 optstr[i], bool2str(new[i])); 1426 cleanup_exit(EXIT_FAILURE); 1427 } 1428 if (verbose_ftrace()) { 1429 mutex_lock(&print_mtx); 1430 printf("%s in %s was set to %s\n", optstr[i], 1431 TR_OPTIONS, bool2str(new[i])); 1432 mutex_unlock(&print_mtx); 1433 } 1434 } 1435 } 1436 1437 static void enable_tracer(void) 1438 { 1439 mutex_lock(&save_state.mutex); 1440 set_trace_opts(&save_state, use_options); 1441 1442 write_file(TR_THRESH, save_state.thresh, threshold, ERR_CLEANUP); 1443 write_file(TR_CURRENT, NOP_TRACER, current_tracer, ERR_CLEANUP); 1444 1445 mutex_unlock(&save_state.mutex); 1446 } 1447 1448 static void tracing_loop(void) 1449 { 1450 int ifd = inotify_init(); 1451 int wd; 1452 const ssize_t bufsize = sizeof(inotify_buffer); 1453 const ssize_t istructsize = sizeof(struct inotify_event); 1454 char *buf = &inotify_buffer[0]; 1455 ssize_t nr_read; 1456 char *p; 1457 int modified; 1458 struct inotify_event *event; 1459 struct entry req; 1460 char *buffer; 1461 const size_t bufspace = PRINT_BUFFER_SIZE; 1462 struct timespec timestamp; 1463 1464 print_priority(); 1465 1466 buffer = malloc_or_die(bufspace); 1467 1468 if (ifd < 0) 1469 err(EXIT_FAILURE, "inotify_init() failed!"); 1470 1471 1472 if (setup_ftrace) { 1473 /* 1474 * We must disable the tracer before resetting the max_latency 1475 */ 1476 save_and_disable_tracer(); 1477 /* 1478 * We must reset the max_latency before the inotify_add_watch() 1479 * call. 1480 */ 1481 reset_max_latency(); 1482 } 1483 1484 wd = inotify_add_watch(ifd, debug_maxlat, IN_MODIFY); 1485 if (wd < 0) 1486 err(EXIT_FAILURE, "inotify_add_watch() failed!"); 1487 1488 if (setup_ftrace) 1489 enable_tracer(); 1490 1491 signal_blocking(SIG_UNBLOCK); 1492 1493 while (true) { 1494 modified = 0; 1495 check_signals(); 1496 nr_read = read(ifd, buf, bufsize); 1497 check_signals(); 1498 if (nr_read < 0) { 1499 if (errno == EINTR) 1500 continue; 1501 warn("read() failed on inotify fd!"); 1502 cleanup_exit(EXIT_FAILURE); 1503 } 1504 if (nr_read == bufsize) 1505 warnx("inotify() buffer filled, skipping events"); 1506 if (nr_read < istructsize) { 1507 warnx("read() returned too few bytes on inotify fd"); 1508 cleanup_exit(EXIT_FAILURE); 1509 } 1510 1511 for (p = buf; p < buf + nr_read;) { 1512 event = (struct inotify_event *) p; 1513 if ((event->mask & IN_MODIFY) != 0) 1514 modified++; 1515 p += istructsize + event->len; 1516 } 1517 while (modified > 0) { 1518 check_signals(); 1519 mutex_lock(&printstate.mutex); 1520 check_signals(); 1521 printstate_next_ticket(&req); 1522 if (printstate_cnt_read() > 0) { 1523 printstate_mark_req_completed(&req); 1524 mutex_unlock(&printstate.mutex); 1525 if (verbose_lostevent()) { 1526 clock_gettime_or_die(CLOCK_MONOTONIC, 1527 ×tamp); 1528 print_lostmessage(×tamp, buffer, 1529 bufspace, &req, 1530 "inotify loop"); 1531 } 1532 break; 1533 } 1534 mutex_unlock(&printstate.mutex); 1535 if (queue_try_to_add_entry(&printqueue, &req) != 0) { 1536 /* These prints could happen concurrently */ 1537 check_signals(); 1538 mutex_lock(&print_mtx); 1539 check_signals(); 1540 write_or_die(fd_stdout, queue_full_warning, 1541 strlen(queue_full_warning)); 1542 mutex_unlock(&print_mtx); 1543 } 1544 modified--; 1545 } 1546 } 1547 } 1548 1549 static void *do_printloop(void *arg) 1550 { 1551 const size_t bufspace = PRINT_BUFFER_SIZE; 1552 char *buffer; 1553 long *rseed = (long *) arg; 1554 struct drand48_data drandbuf; 1555 long slept = 0; 1556 struct entry req; 1557 int prob = 0; 1558 struct timespec timestamp; 1559 struct short_msg resize_msg; 1560 1561 print_priority(); 1562 1563 if (srand48_r(*rseed, &drandbuf) != 0) { 1564 warn("srand48_r() failed!\n"); 1565 cleanup_exit(EXIT_FAILURE); 1566 } 1567 1568 buffer = malloc_or_die(bufspace); 1569 1570 while (true) { 1571 req = queue_wait_for_entry(&printqueue); 1572 clock_gettime_or_die(CLOCK_MONOTONIC, ×tamp); 1573 mutex_lock(&printstate.mutex); 1574 if (prev_req_won_race(&req)) { 1575 printstate_mark_req_completed(&req); 1576 mutex_unlock(&printstate.mutex); 1577 if (verbose_lostevent()) 1578 print_lostmessage(×tamp, buffer, bufspace, 1579 &req, "print loop"); 1580 continue; 1581 } 1582 mutex_unlock(&printstate.mutex); 1583 1584 /* 1585 * Toss a coin to decide if we want to sleep before printing 1586 * out the backtrace. The reason for this is that opening 1587 * /sys/kernel/tracing/trace will cause a blackout of 1588 * hundreds of ms, where no latencies will be noted by the 1589 * latency tracer. Thus by randomly sleeping we try to avoid 1590 * missing traces systematically due to this. With this option 1591 * we will sometimes get the first latency, some other times 1592 * some of the later ones, in case of closely spaced traces. 1593 */ 1594 if (trace_enable && use_random_sleep) { 1595 slept = 0; 1596 prob = table_get_probability(&req, &resize_msg); 1597 if (!toss_coin(&drandbuf, prob)) 1598 slept = go_to_sleep(&req); 1599 if (slept >= 0) { 1600 /* A print is ongoing */ 1601 printstate_cnt_inc(); 1602 /* 1603 * We will do the printout below so we have to 1604 * mark it as completed while we still have the 1605 * mutex. 1606 */ 1607 mutex_lock(&printstate.mutex); 1608 printstate_mark_req_completed(&req); 1609 mutex_unlock(&printstate.mutex); 1610 } 1611 } 1612 if (trace_enable) { 1613 /* 1614 * slept < 0 means that we detected another 1615 * notification in go_to_sleep() above 1616 */ 1617 if (slept >= 0) 1618 /* 1619 * N.B. printstate_cnt_dec(); will be called 1620 * inside print_tracefile() 1621 */ 1622 print_tracefile(&resize_msg, ×tamp, buffer, 1623 bufspace, slept, &req); 1624 else 1625 print_skipmessage(&resize_msg, ×tamp, 1626 buffer, bufspace, &req, true); 1627 } else { 1628 print_skipmessage(&resize_msg, ×tamp, buffer, 1629 bufspace, &req, false); 1630 } 1631 } 1632 return NULL; 1633 } 1634 1635 static void start_printthread(void) 1636 { 1637 unsigned int i; 1638 long *seed; 1639 int ufd; 1640 1641 ufd = open(DEV_URANDOM, O_RDONLY); 1642 if (nr_threads > MAX_THREADS) { 1643 warnx( 1644 "Number of requested print threads was %d, max number is %d\n", 1645 nr_threads, MAX_THREADS); 1646 nr_threads = MAX_THREADS; 1647 } 1648 for (i = 0; i < nr_threads; i++) { 1649 seed = malloc_or_die(sizeof(*seed)); 1650 if (ufd < 0 || 1651 read(ufd, seed, sizeof(*seed)) != sizeof(*seed)) { 1652 printf( 1653 "Warning! Using trivial random number seed, since %s not available\n", 1654 DEV_URANDOM); 1655 fflush(stdout); 1656 *seed = i; 1657 } 1658 errno = pthread_create(&printthread[i], NULL, do_printloop, 1659 seed); 1660 if (errno) 1661 err(EXIT_FAILURE, "pthread_create()"); 1662 } 1663 if (ufd > 0 && close(ufd) != 0) 1664 warn("close() failed"); 1665 } 1666 1667 static void show_usage(void) 1668 { 1669 printf( 1670 "Usage: %s [OPTION]...\n\n" 1671 "Collect closely occurring latencies from %s\n" 1672 "with any of the following tracers: preemptirqsoff, preemptoff, irqsoff, " 1673 "wakeup,\nwakeup_dl, or wakeup_rt.\n\n" 1674 1675 "The occurrence of a latency is detected by monitoring the file\n" 1676 "%s with inotify.\n\n" 1677 1678 "The following options are supported:\n\n" 1679 1680 "-l, --list\t\tList the latency tracers that are supported by the\n" 1681 "\t\t\tcurrently running Linux kernel. If you don't see the\n" 1682 "\t\t\ttracer that you want, you will probably need to\n" 1683 "\t\t\tchange your kernel config and build a new kernel.\n\n" 1684 1685 "-t, --tracer TR\t\tUse the tracer TR. The default is to use the first\n" 1686 "\t\t\ttracer that is supported by the kernel in the following\n" 1687 "\t\t\torder of precedence:\n\n" 1688 "\t\t\tpreemptirqsoff\n" 1689 "\t\t\tpreemptoff\n" 1690 "\t\t\tirqsoff\n" 1691 "\t\t\twakeup\n" 1692 "\t\t\twakeup_rt\n" 1693 "\t\t\twakeup_dl\n" 1694 "\n" 1695 "\t\t\tIf TR is not on the list above, then a warning will be\n" 1696 "\t\t\tprinted.\n\n" 1697 1698 "-F, --force\t\tProceed even if another ftrace tracer is active. Without\n" 1699 "\t\t\tthis option, the program will refuse to start tracing if\n" 1700 "\t\t\tany other tracer than the nop tracer is active.\n\n" 1701 1702 "-s, --threshold TH\tConfigure ftrace to use a threshold of TH microseconds\n" 1703 "\t\t\tfor the tracer. The default is 0, which means that\n" 1704 "\t\t\ttracing_max_latency will be used. tracing_max_latency is\n" 1705 "\t\t\tset to 0 when the program is started and contains the\n" 1706 "\t\t\tmaximum of the latencies that have been encountered.\n\n" 1707 1708 "-f, --function\t\tEnable the function-trace option in trace_options. With\n" 1709 "\t\t\tthis option, ftrace will trace the functions that are\n" 1710 "\t\t\texecuted during a latency, without it we only get the\n" 1711 "\t\t\tbeginning, end, and backtrace.\n\n" 1712 1713 "-g, --graph\t\tEnable the display-graph option in trace_option. This\n" 1714 "\t\t\toption causes ftrace to show the graph of how functions\n" 1715 "\t\t\tare calling other functions.\n\n" 1716 1717 "-c, --policy POL\tRun the program with scheduling policy POL. POL can be\n" 1718 "\t\t\tother, batch, idle, rr or fifo. The default is rr. When\n" 1719 "\t\t\tusing rr or fifo, remember that these policies may cause\n" 1720 "\t\t\tother tasks to experience latencies.\n\n" 1721 1722 "-p, --priority PRI\tRun the program with priority PRI. The acceptable range\n" 1723 "\t\t\tof PRI depends on the scheduling policy.\n\n" 1724 1725 "-n, --notrace\t\tIf latency is detected, do not print out the content of\n" 1726 "\t\t\tthe trace file to standard output\n\n" 1727 1728 "-t, --threads NRTHR\tRun NRTHR threads for printing. Default is %d.\n\n" 1729 1730 "-r, --random\t\tArbitrarily sleep a certain amount of time, default\n" 1731 "\t\t\t%ld ms, before reading the trace file. The\n" 1732 "\t\t\tprobabilities for sleep are chosen so that the\n" 1733 "\t\t\tprobability of obtaining any of a cluster of closely\n" 1734 "\t\t\toccurring latencies are equal, i.e. we will randomly\n" 1735 "\t\t\tchoose which one we collect from the trace file.\n\n" 1736 "\t\t\tThis option is probably only useful with the irqsoff,\n" 1737 "\t\t\tpreemptoff, and preemptirqsoff tracers.\n\n" 1738 1739 "-a, --nrlat NRLAT\tFor the purpose of arbitrary delay, assume that there\n" 1740 "\t\t\tare no more than NRLAT clustered latencies. If NRLAT\n" 1741 "\t\t\tlatencies are detected during a run, this value will\n" 1742 "\t\t\tautomatically be increased to NRLAT + 1 and then to\n" 1743 "\t\t\tNRLAT + 2 and so on. The default is %d. This option\n" 1744 "\t\t\timplies -r. We need to know this number in order to\n" 1745 "\t\t\tbe able to calculate the probabilities of sleeping.\n" 1746 "\t\t\tSpecifically, the probabilities of not sleeping, i.e. to\n" 1747 "\t\t\tdo an immediate printout will be:\n\n" 1748 "\t\t\t1/NRLAT 1/(NRLAT - 1) ... 1/3 1/2 1\n\n" 1749 "\t\t\tThe probability of sleeping will be:\n\n" 1750 "\t\t\t1 - P, where P is from the series above\n\n" 1751 "\t\t\tThis descending probability will cause us to choose\n" 1752 "\t\t\tan occurrence at random. Observe that the final\n" 1753 "\t\t\tprobability is 0, it is when we reach this probability\n" 1754 "\t\t\tthat we increase NRLAT automatically. As an example,\n" 1755 "\t\t\twith the default value of 2, the probabilities will be:\n\n" 1756 "\t\t\t1/2 0\n\n" 1757 "\t\t\tThis means, when a latency is detected we will sleep\n" 1758 "\t\t\twith 50%% probability. If we ever detect another latency\n" 1759 "\t\t\tduring the sleep period, then the probability of sleep\n" 1760 "\t\t\twill be 0%% and the table will be expanded to:\n\n" 1761 "\t\t\t1/3 1/2 0\n\n" 1762 1763 "-v, --verbose\t\tIncrease the verbosity. If this option is given once,\n" 1764 "\t\t\tthen print a message every time that the NRLAT value\n" 1765 "\t\t\tis automatically increased. It also causes a message to\n" 1766 "\t\t\tbe printed when the ftrace settings are changed. If this\n" 1767 "\t\t\toption is given at least twice, then also print a\n" 1768 "\t\t\twarning for lost events.\n\n" 1769 1770 "-u, --time TIME\t\tArbitrarily sleep for a specified time TIME ms before\n" 1771 "\t\t\tprinting out the trace from the trace file. The default\n" 1772 "\t\t\tis %ld ms. This option implies -r.\n\n" 1773 1774 "-x, --no-ftrace\t\tDo not configure ftrace. This assume that the user\n" 1775 "\t\t\tconfigures the ftrace files in sysfs such as\n" 1776 "\t\t\t/sys/kernel/tracing/current_tracer or equivalent.\n\n" 1777 1778 "-i, --tracefile FILE\tUse FILE as trace file. The default is\n" 1779 "\t\t\t%s.\n" 1780 "\t\t\tThis options implies -x\n\n" 1781 1782 "-m, --max-lat FILE\tUse FILE as tracing_max_latency file. The default is\n" 1783 "\t\t\t%s.\n" 1784 "\t\t\tThis options implies -x\n\n" 1785 , 1786 prg_name, debug_tracefile_dflt, debug_maxlat_dflt, DEFAULT_NR_PRINTER_THREADS, 1787 SLEEP_TIME_MS_DEFAULT, DEFAULT_TABLE_SIZE, SLEEP_TIME_MS_DEFAULT, 1788 debug_tracefile_dflt, debug_maxlat_dflt); 1789 } 1790 1791 static void find_tracefiles(void) 1792 { 1793 debug_tracefile_dflt = tracefs_get_tracing_file("trace"); 1794 if (debug_tracefile_dflt == NULL) { 1795 /* This is needed in show_usage() */ 1796 debug_tracefile_dflt = DEBUG_NOFILE; 1797 } 1798 1799 debug_maxlat_dflt = tracefs_get_tracing_file("tracing_max_latency"); 1800 if (debug_maxlat_dflt == NULL) { 1801 /* This is needed in show_usage() */ 1802 debug_maxlat_dflt = DEBUG_NOFILE; 1803 } 1804 1805 debug_tracefile = debug_tracefile_dflt; 1806 debug_maxlat = debug_maxlat_dflt; 1807 } 1808 1809 bool alldigits(const char *s) 1810 { 1811 for (; *s != '\0'; s++) 1812 if (!isdigit(*s)) 1813 return false; 1814 return true; 1815 } 1816 1817 void check_alldigits(const char *optarg, const char *argname) 1818 { 1819 if (!alldigits(optarg)) 1820 errx(EXIT_FAILURE, 1821 "The %s parameter expects a decimal argument\n", argname); 1822 } 1823 1824 static void scan_arguments(int argc, char *argv[]) 1825 { 1826 int c; 1827 int i; 1828 int option_idx = 0; 1829 1830 static struct option long_options[] = { 1831 { "list", no_argument, 0, 'l' }, 1832 { "tracer", required_argument, 0, 't' }, 1833 { "force", no_argument, 0, 'F' }, 1834 { "threshold", required_argument, 0, 's' }, 1835 { "function", no_argument, 0, 'f' }, 1836 { "graph", no_argument, 0, 'g' }, 1837 { "policy", required_argument, 0, 'c' }, 1838 { "priority", required_argument, 0, 'p' }, 1839 { "help", no_argument, 0, 'h' }, 1840 { "notrace", no_argument, 0, 'n' }, 1841 { "random", no_argument, 0, 'r' }, 1842 { "nrlat", required_argument, 0, 'a' }, 1843 { "threads", required_argument, 0, 'e' }, 1844 { "time", required_argument, 0, 'u' }, 1845 { "verbose", no_argument, 0, 'v' }, 1846 { "no-ftrace", no_argument, 0, 'x' }, 1847 { "tracefile", required_argument, 0, 'i' }, 1848 { "max-lat", required_argument, 0, 'm' }, 1849 { 0, 0, 0, 0 } 1850 }; 1851 const struct policy *p; 1852 int max, min; 1853 int value; 1854 bool notracer, valid; 1855 1856 /* 1857 * We must do this before parsing the arguments because show_usage() 1858 * needs to display these. 1859 */ 1860 find_tracefiles(); 1861 1862 while (true) { 1863 c = getopt_long(argc, argv, "lt:Fs:fgc:p:hnra:e:u:vxi:m:", 1864 long_options, &option_idx); 1865 if (c == -1) 1866 break; 1867 1868 switch (c) { 1869 case 'l': 1870 show_available(); 1871 exit(0); 1872 break; 1873 case 't': 1874 current_tracer = strdup(optarg); 1875 if (!is_relevant_tracer(current_tracer)) { 1876 warnx("%s is not a known latency tracer!\n", 1877 current_tracer); 1878 } 1879 valid = tracer_valid(current_tracer, ¬racer); 1880 if (notracer) 1881 errx(EXIT_FAILURE, no_tracer_msg); 1882 if (!valid) 1883 errx(EXIT_FAILURE, 1884 "The tracer %s is not supported by your kernel!\n", current_tracer); 1885 break; 1886 case 'F': 1887 force_tracer = true; 1888 break; 1889 case 's': 1890 check_alldigits(optarg, "-s [--threshold]"); 1891 threshold = strdup(optarg); 1892 break; 1893 case 'f': 1894 use_options[OPTIDX_FUNC_TR] = true; 1895 break; 1896 case 'g': 1897 use_options[OPTIDX_DISP_GR] = true; 1898 break; 1899 case 'c': 1900 p = policy_from_name(optarg); 1901 if (p != NULL) { 1902 sched_policy = p->policy; 1903 sched_policy_set = true; 1904 if (!sched_pri_set) { 1905 sched_pri = p->default_pri; 1906 sched_pri_set = true; 1907 } 1908 } else { 1909 warnx("Unknown scheduling %s\n", optarg); 1910 show_usage(); 1911 exit(0); 1912 } 1913 break; 1914 case 'p': 1915 check_alldigits(optarg, "-p [--priority]"); 1916 sched_pri = atoi(optarg); 1917 sched_pri_set = true; 1918 break; 1919 case 'h': 1920 show_usage(); 1921 exit(0); 1922 break; 1923 case 'n': 1924 trace_enable = false; 1925 use_random_sleep = false; 1926 break; 1927 case 'e': 1928 check_alldigits(optarg, "-e [--threads]"); 1929 value = atoi(optarg); 1930 if (value > 0) 1931 nr_threads = value; 1932 else { 1933 warnx("NRTHR must be > 0\n"); 1934 show_usage(); 1935 exit(0); 1936 } 1937 break; 1938 case 'u': 1939 check_alldigits(optarg, "-u [--time]"); 1940 value = atoi(optarg); 1941 if (value < 0) { 1942 warnx("TIME must be >= 0\n"); 1943 show_usage(); 1944 exit(0); 1945 } 1946 trace_enable = true; 1947 use_random_sleep = true; 1948 sleep_time = value * USEC_PER_MSEC; 1949 break; 1950 case 'v': 1951 verbosity++; 1952 break; 1953 case 'r': 1954 trace_enable = true; 1955 use_random_sleep = true; 1956 break; 1957 case 'a': 1958 check_alldigits(optarg, "-a [--nrlat]"); 1959 value = atoi(optarg); 1960 if (value <= 0) { 1961 warnx("NRLAT must be > 0\n"); 1962 show_usage(); 1963 exit(0); 1964 } 1965 trace_enable = true; 1966 use_random_sleep = true; 1967 table_startsize = value; 1968 break; 1969 case 'x': 1970 setup_ftrace = false; 1971 break; 1972 case 'i': 1973 setup_ftrace = false; 1974 debug_tracefile = strdup(optarg); 1975 break; 1976 case 'm': 1977 setup_ftrace = false; 1978 debug_maxlat = strdup(optarg); 1979 break; 1980 default: 1981 show_usage(); 1982 exit(0); 1983 break; 1984 } 1985 } 1986 1987 if (setup_ftrace) { 1988 if (!current_tracer) { 1989 current_tracer = find_default_tracer(); 1990 if (!current_tracer) 1991 errx(EXIT_FAILURE, 1992 "No default tracer found and tracer not specified\n"); 1993 } 1994 1995 if (use_random_sleep && !random_makes_sense(current_tracer)) { 1996 warnx("WARNING: The tracer is %s and random sleep has", 1997 current_tracer); 1998 fprintf(stderr, 1999 "been enabled. Random sleep is intended for the following tracers:\n"); 2000 for (i = 0; random_tracers[i]; i++) 2001 fprintf(stderr, "%s\n", random_tracers[i]); 2002 fprintf(stderr, "\n"); 2003 } 2004 } 2005 2006 if (debug_tracefile == DEBUG_NOFILE || 2007 debug_maxlat == DEBUG_NOFILE) 2008 errx(EXIT_FAILURE, 2009 "Could not find tracing directory e.g. /sys/kernel/tracing\n"); 2010 2011 if (!sched_policy_set) { 2012 sched_policy = SCHED_RR; 2013 sched_policy_set = true; 2014 if (!sched_pri_set) { 2015 sched_pri = RT_DEFAULT_PRI; 2016 sched_pri_set = true; 2017 } 2018 } 2019 2020 max = sched_get_priority_max(sched_policy); 2021 min = sched_get_priority_min(sched_policy); 2022 2023 if (sched_pri < min) { 2024 printf( 2025 "ATTENTION: Increasing priority to minimum, which is %d\n", min); 2026 sched_pri = min; 2027 } 2028 if (sched_pri > max) { 2029 printf( 2030 "ATTENTION: Reducing priority to maximum, which is %d\n", max); 2031 sched_pri = max; 2032 } 2033 } 2034 2035 static void show_params(void) 2036 { 2037 printf( 2038 "\n" 2039 "Running with scheduling policy %s and priority %d. Using %d print threads.\n", 2040 policy_name(sched_policy), sched_pri, nr_threads); 2041 if (trace_enable) { 2042 if (use_random_sleep) { 2043 printf( 2044 "%s will be printed with random delay\n" 2045 "Start size of the probability table:\t\t\t%d\n" 2046 "Print a message when the prob. table changes size:\t%s\n" 2047 "Print a warning when an event has been lost:\t\t%s\n" 2048 "Sleep time is:\t\t\t\t\t\t%ld ms\n", 2049 debug_tracefile, 2050 table_startsize, 2051 bool2str(verbose_sizechange()), 2052 bool2str(verbose_lostevent()), 2053 sleep_time / USEC_PER_MSEC); 2054 } else { 2055 printf("%s will be printed immediately\n", 2056 debug_tracefile); 2057 } 2058 } else { 2059 printf("%s will not be printed\n", 2060 debug_tracefile); 2061 } 2062 if (setup_ftrace) { 2063 printf("Tracer:\t\t\t\t\t\t\t%s\n" 2064 "%s option:\t\t\t\t\t%s\n" 2065 "%s option:\t\t\t\t\t%s\n", 2066 current_tracer, 2067 optstr[OPTIDX_FUNC_TR], 2068 bool2str(use_options[OPTIDX_FUNC_TR]), 2069 optstr[OPTIDX_DISP_GR], 2070 bool2str(use_options[OPTIDX_DISP_GR])); 2071 if (!strcmp(threshold, "0")) 2072 printf("Threshold:\t\t\t\t\t\ttracing_max_latency\n"); 2073 else 2074 printf("Threshold:\t\t\t\t\t\t%s\n", threshold); 2075 } 2076 printf("\n"); 2077 } 2078 2079 int main(int argc, char *argv[]) 2080 { 2081 init_save_state(); 2082 signal_blocking(SIG_BLOCK); 2083 setup_sig_handler(); 2084 open_stdout(); 2085 2086 if (argc >= 1) 2087 prg_name = argv[0]; 2088 else 2089 prg_name = prg_unknown; 2090 2091 scan_arguments(argc, argv); 2092 show_params(); 2093 2094 init_printstate(); 2095 init_print_mtx(); 2096 if (use_random_sleep) { 2097 init_probabilities(); 2098 if (verbose_sizechange()) 2099 printf("Initializing probability table to %d\n", 2100 table_startsize); 2101 sleeptable_resize(table_startsize, false, NULL); 2102 } 2103 set_priority(); 2104 init_queue(&printqueue); 2105 start_printthread(); 2106 tracing_loop(); 2107 return 0; 2108 } 2109