1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2017, 2018, 2019, 2021 BMW Car IT GmbH
4  * Author: Viktor Rosendahl (viktor.rosendahl@bmw.de)
5  */
6 
7 #define _GNU_SOURCE
8 #define _POSIX_C_SOURCE 200809L
9 
10 #include <ctype.h>
11 #include <stdbool.h>
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <string.h>
15 
16 #include <err.h>
17 #include <errno.h>
18 #include <fcntl.h>
19 #include <getopt.h>
20 #include <sched.h>
21 #include <linux/unistd.h>
22 #include <signal.h>
23 #include <sys/inotify.h>
24 #include <unistd.h>
25 #include <pthread.h>
26 #include <tracefs.h>
27 
28 static const char *prg_name;
29 static const char *prg_unknown = "unknown program name";
30 
31 static int fd_stdout;
32 
33 static int sched_policy;
34 static bool sched_policy_set;
35 
36 static int sched_pri;
37 static bool sched_pri_set;
38 
39 static bool trace_enable = true;
40 static bool setup_ftrace = true;
41 static bool use_random_sleep;
42 
43 #define TRACE_OPTS				\
44 	C(FUNC_TR, "function-trace"),		\
45 	C(DISP_GR, "display-graph"),		\
46 	C(NR,       NULL)
47 
48 #undef C
49 #define C(a, b) OPTIDX_##a
50 
51 enum traceopt {
52 	TRACE_OPTS
53 };
54 
55 #undef C
56 #define C(a, b)  b
57 
58 static const char *const optstr[] = {
59 	TRACE_OPTS
60 };
61 
62 enum errhandling {
63 	ERR_EXIT = 0,
64 	ERR_WARN,
65 	ERR_CLEANUP,
66 };
67 
68 static bool use_options[OPTIDX_NR];
69 
70 static char inotify_buffer[655360];
71 
72 #define likely(x)      __builtin_expect(!!(x), 1)
73 #define unlikely(x)    __builtin_expect(!!(x), 0)
74 #define bool2str(x)    (x ? "true":"false")
75 
76 #define DEFAULT_NR_PRINTER_THREADS (3)
77 static unsigned int nr_threads = DEFAULT_NR_PRINTER_THREADS;
78 
79 #define DEFAULT_TABLE_SIZE (2)
80 static unsigned int table_startsize = DEFAULT_TABLE_SIZE;
81 
82 static int verbosity;
83 
84 #define verbose_sizechange() (verbosity >= 1)
85 #define verbose_lostevent()  (verbosity >= 2)
86 #define verbose_ftrace()     (verbosity >= 1)
87 
88 #define was_changed(ORIG, CUR) (strcmp(ORIG, CUR) != 0)
89 #define needs_change(CUR, WANTED) (strcmp(CUR, WANTED) != 0)
90 
91 static const char *debug_tracefile;
92 static const char *debug_tracefile_dflt;
93 static const char *debug_maxlat;
94 static const char *debug_maxlat_dflt;
95 static const char * const DEBUG_NOFILE = "[file not found]";
96 
97 static const char * const TR_MAXLAT  = "tracing_max_latency";
98 static const char * const TR_THRESH  = "tracing_thresh";
99 static const char * const TR_CURRENT = "current_tracer";
100 static const char * const TR_OPTIONS = "trace_options";
101 
102 static const char * const NOP_TRACER = "nop";
103 
104 static const char * const OPT_NO_PREFIX = "no";
105 
106 #define DFLT_THRESHOLD_US "0"
107 static const char *threshold = DFLT_THRESHOLD_US;
108 
109 #define DEV_URANDOM     "/dev/urandom"
110 #define RT_DEFAULT_PRI (99)
111 #define DEFAULT_PRI    (0)
112 
113 #define USEC_PER_MSEC (1000L)
114 #define NSEC_PER_USEC (1000L)
115 #define NSEC_PER_MSEC (USEC_PER_MSEC * NSEC_PER_USEC)
116 
117 #define MSEC_PER_SEC (1000L)
118 #define USEC_PER_SEC (USEC_PER_MSEC * MSEC_PER_SEC)
119 #define NSEC_PER_SEC (NSEC_PER_MSEC * MSEC_PER_SEC)
120 
121 #define SLEEP_TIME_MS_DEFAULT (1000L)
122 #define TRY_PRINTMUTEX_MS (1000)
123 
124 static long sleep_time = (USEC_PER_MSEC * SLEEP_TIME_MS_DEFAULT);
125 
126 static const char * const queue_full_warning =
127 "Could not queue trace for printing. It is likely that events happen faster\n"
128 "than what they can be printed. Probably partly because of random sleeping\n";
129 
130 static const char * const no_tracer_msg =
131 "Could not find any tracers! Running this program as root may help!\n";
132 
133 static const char * const no_latency_tr_msg =
134 "No latency tracers are supported by your kernel!\n";
135 
136 struct policy {
137 	const char *name;
138 	int policy;
139 	int default_pri;
140 };
141 
142 static const struct policy policies[] = {
143 	{ "other", SCHED_OTHER, DEFAULT_PRI    },
144 	{ "batch", SCHED_BATCH, DEFAULT_PRI    },
145 	{ "idle",  SCHED_IDLE,  DEFAULT_PRI    },
146 	{ "rr",    SCHED_RR,    RT_DEFAULT_PRI },
147 	{ "fifo",  SCHED_FIFO,  RT_DEFAULT_PRI },
148 	{ NULL,    0,           DEFAULT_PRI    }
149 };
150 
151 /*
152  * The default tracer will be the first on this list that is supported by the
153  * currently running Linux kernel.
154  */
155 static const char * const relevant_tracers[] = {
156 	"preemptirqsoff",
157 	"preemptoff",
158 	"irqsoff",
159 	"wakeup",
160 	"wakeup_rt",
161 	"wakeup_dl",
162 	NULL
163 };
164 
165 /* This is the list of tracers for which random sleep makes sense */
166 static const char * const random_tracers[] = {
167 	"preemptirqsoff",
168 	"preemptoff",
169 	"irqsoff",
170 	NULL
171 };
172 
173 static const char *current_tracer;
174 static bool force_tracer;
175 
176 struct ftrace_state {
177 	char *tracer;
178 	char *thresh;
179 	bool opt[OPTIDX_NR];
180 	bool opt_valid[OPTIDX_NR];
181 	pthread_mutex_t mutex;
182 };
183 
184 struct entry {
185 	int ticket;
186 	int ticket_completed_ref;
187 };
188 
189 struct print_state {
190 	int ticket_counter;
191 	int ticket_completed;
192 	pthread_mutex_t mutex;
193 	pthread_cond_t cond;
194 	int cnt;
195 	pthread_mutex_t cnt_mutex;
196 };
197 
198 struct short_msg {
199 	char buf[160];
200 	int len;
201 };
202 
203 static struct print_state printstate;
204 static struct ftrace_state save_state;
205 volatile sig_atomic_t signal_flag;
206 
207 #define PROB_TABLE_MAX_SIZE (1000)
208 
209 int probabilities[PROB_TABLE_MAX_SIZE];
210 
211 struct sleep_table {
212 	int *table;
213 	int size;
214 	pthread_mutex_t mutex;
215 };
216 
217 static struct sleep_table sleeptable;
218 
219 #define QUEUE_SIZE (10)
220 
221 struct queue {
222 	struct entry entries[QUEUE_SIZE];
223 	int next_prod_idx;
224 	int next_cons_idx;
225 	pthread_mutex_t mutex;
226 	pthread_cond_t cond;
227 };
228 
229 #define MAX_THREADS (40)
230 
231 struct queue printqueue;
232 pthread_t printthread[MAX_THREADS];
233 pthread_mutex_t print_mtx;
234 #define PRINT_BUFFER_SIZE (16 * 1024 * 1024)
235 
236 static void cleanup_exit(int status);
237 static int set_trace_opt(const char *opt, bool value);
238 
239 static __always_inline void *malloc_or_die(size_t size)
240 {
241 	void *ptr = malloc(size);
242 
243 	if (unlikely(ptr == NULL)) {
244 		warn("malloc() failed");
245 		cleanup_exit(EXIT_FAILURE);
246 	}
247 	return ptr;
248 }
249 
250 static __always_inline void *malloc_or_die_nocleanup(size_t size)
251 {
252 	void *ptr = malloc(size);
253 
254 	if (unlikely(ptr == NULL))
255 		err(0, "malloc() failed");
256 	return ptr;
257 }
258 
259 static __always_inline void write_or_die(int fd, const char *buf, size_t count)
260 {
261 	ssize_t r;
262 
263 	do {
264 		r = write(fd, buf, count);
265 		if (unlikely(r < 0)) {
266 			if (errno == EINTR)
267 				continue;
268 			warn("write() failed");
269 			cleanup_exit(EXIT_FAILURE);
270 		}
271 		count -= r;
272 		buf += r;
273 	} while (count > 0);
274 }
275 
276 static __always_inline void clock_gettime_or_die(clockid_t clk_id,
277 						 struct timespec *tp)
278 {
279 	int r = clock_gettime(clk_id, tp);
280 
281 	if (unlikely(r != 0))
282 		err(EXIT_FAILURE, "clock_gettime() failed");
283 }
284 
285 static __always_inline void sigemptyset_or_die(sigset_t *s)
286 {
287 	if (unlikely(sigemptyset(s) != 0)) {
288 		warn("sigemptyset() failed");
289 		cleanup_exit(EXIT_FAILURE);
290 	}
291 }
292 
293 static __always_inline void sigaddset_or_die(sigset_t *s, int signum)
294 {
295 	if (unlikely(sigaddset(s, signum) != 0)) {
296 		warn("sigemptyset() failed");
297 		cleanup_exit(EXIT_FAILURE);
298 	}
299 }
300 
301 static __always_inline void sigaction_or_die(int signum,
302 					     const struct sigaction *act,
303 					     struct sigaction *oldact)
304 {
305 	if (unlikely(sigaction(signum, act, oldact) != 0)) {
306 		warn("sigaction() failed");
307 		cleanup_exit(EXIT_FAILURE);
308 	}
309 }
310 
311 static void open_stdout(void)
312 {
313 	if (setvbuf(stdout, NULL, _IONBF, 0) != 0)
314 		err(EXIT_FAILURE, "setvbuf() failed");
315 	fd_stdout = fileno(stdout);
316 	if (fd_stdout < 0)
317 		err(EXIT_FAILURE, "fileno() failed");
318 }
319 
320 /*
321  * It's not worth it to call cleanup_exit() from mutex functions because
322  * cleanup_exit() uses mutexes.
323  */
324 static __always_inline void mutex_lock(pthread_mutex_t *mtx)
325 {
326 	errno = pthread_mutex_lock(mtx);
327 	if (unlikely(errno))
328 		err(EXIT_FAILURE, "pthread_mutex_lock() failed");
329 }
330 
331 
332 static __always_inline void mutex_unlock(pthread_mutex_t *mtx)
333 {
334 	errno = pthread_mutex_unlock(mtx);
335 	if (unlikely(errno))
336 		err(EXIT_FAILURE, "pthread_mutex_unlock() failed");
337 }
338 
339 static __always_inline void cond_signal(pthread_cond_t *cond)
340 {
341 	errno = pthread_cond_signal(cond);
342 	if (unlikely(errno))
343 		err(EXIT_FAILURE, "pthread_cond_signal() failed");
344 }
345 
346 static __always_inline void cond_wait(pthread_cond_t *restrict cond,
347 				      pthread_mutex_t *restrict mutex)
348 {
349 	errno = pthread_cond_wait(cond, mutex);
350 	if (unlikely(errno))
351 		err(EXIT_FAILURE, "pthread_cond_wait() failed");
352 }
353 
354 static __always_inline void cond_broadcast(pthread_cond_t *cond)
355 {
356 	errno = pthread_cond_broadcast(cond);
357 	if (unlikely(errno))
358 		err(EXIT_FAILURE, "pthread_cond_broadcast() failed");
359 }
360 
361 static __always_inline void
362 mutex_init(pthread_mutex_t *mutex,
363 	   const pthread_mutexattr_t *attr)
364 {
365 	errno = pthread_mutex_init(mutex, attr);
366 	if (errno)
367 		err(EXIT_FAILURE, "pthread_mutex_init() failed");
368 }
369 
370 static __always_inline void mutexattr_init(pthread_mutexattr_t *attr)
371 {
372 	errno = pthread_mutexattr_init(attr);
373 	if (errno)
374 		err(EXIT_FAILURE, "pthread_mutexattr_init() failed");
375 }
376 
377 static __always_inline void mutexattr_destroy(pthread_mutexattr_t *attr)
378 {
379 	errno = pthread_mutexattr_destroy(attr);
380 	if (errno)
381 		err(EXIT_FAILURE, "pthread_mutexattr_destroy() failed");
382 }
383 
384 static __always_inline void mutexattr_settype(pthread_mutexattr_t *attr,
385 					      int type)
386 {
387 	errno = pthread_mutexattr_settype(attr, type);
388 	if (errno)
389 		err(EXIT_FAILURE, "pthread_mutexattr_settype() failed");
390 }
391 
392 static __always_inline void condattr_init(pthread_condattr_t *attr)
393 {
394 	errno = pthread_condattr_init(attr);
395 	if (errno)
396 		err(EXIT_FAILURE, "pthread_condattr_init() failed");
397 }
398 
399 static __always_inline void condattr_destroy(pthread_condattr_t *attr)
400 {
401 	errno = pthread_condattr_destroy(attr);
402 	if (errno)
403 		err(EXIT_FAILURE, "pthread_condattr_destroy() failed");
404 }
405 
406 static __always_inline void condattr_setclock(pthread_condattr_t *attr,
407 					      clockid_t clock_id)
408 {
409 	errno = pthread_condattr_setclock(attr, clock_id);
410 	if (unlikely(errno))
411 		err(EXIT_FAILURE, "pthread_condattr_setclock() failed");
412 }
413 
414 static __always_inline void cond_init(pthread_cond_t *cond,
415 				      const pthread_condattr_t *attr)
416 {
417 	errno = pthread_cond_init(cond, attr);
418 	if (errno)
419 		err(EXIT_FAILURE, "pthread_cond_init() failed");
420 }
421 
422 static __always_inline int
423 cond_timedwait(pthread_cond_t *restrict cond,
424 	       pthread_mutex_t *restrict mutex,
425 	       const struct timespec *restrict abstime)
426 {
427 	errno = pthread_cond_timedwait(cond, mutex, abstime);
428 	if (errno && errno != ETIMEDOUT)
429 		err(EXIT_FAILURE, "pthread_cond_timedwait() failed");
430 	return errno;
431 }
432 
433 static void init_printstate(void)
434 {
435 	pthread_condattr_t cattr;
436 
437 	printstate.ticket_counter = 0;
438 	printstate.ticket_completed = 0;
439 	printstate.cnt = 0;
440 
441 	mutex_init(&printstate.mutex, NULL);
442 
443 	condattr_init(&cattr);
444 	condattr_setclock(&cattr, CLOCK_MONOTONIC);
445 	cond_init(&printstate.cond, &cattr);
446 	condattr_destroy(&cattr);
447 }
448 
449 static void init_print_mtx(void)
450 {
451 	pthread_mutexattr_t mattr;
452 
453 	mutexattr_init(&mattr);
454 	mutexattr_settype(&mattr, PTHREAD_MUTEX_RECURSIVE);
455 	mutex_init(&print_mtx, &mattr);
456 	mutexattr_destroy(&mattr);
457 
458 }
459 
460 static void signal_blocking(int how)
461 {
462 	sigset_t s;
463 
464 	sigemptyset_or_die(&s);
465 	sigaddset_or_die(&s, SIGHUP);
466 	sigaddset_or_die(&s, SIGTERM);
467 	sigaddset_or_die(&s, SIGINT);
468 
469 	errno = pthread_sigmask(how, &s, NULL);
470 	if (unlikely(errno)) {
471 		warn("pthread_sigmask() failed");
472 		cleanup_exit(EXIT_FAILURE);
473 	}
474 }
475 
476 static void signal_handler(int num)
477 {
478 	signal_flag = num;
479 }
480 
481 static void setup_sig_handler(void)
482 {
483 	struct sigaction sa;
484 
485 	memset(&sa, 0, sizeof(sa));
486 	sa.sa_handler = signal_handler;
487 
488 	sigaction_or_die(SIGHUP, &sa, NULL);
489 	sigaction_or_die(SIGTERM, &sa, NULL);
490 	sigaction_or_die(SIGINT, &sa, NULL);
491 }
492 
493 static void process_signal(int signal)
494 {
495 	char *name;
496 
497 	name = strsignal(signal);
498 	if (name == NULL)
499 		printf("Received signal %d\n", signal);
500 	else
501 		printf("Received signal %d (%s)\n", signal, name);
502 	cleanup_exit(EXIT_SUCCESS);
503 }
504 
505 static __always_inline void check_signals(void)
506 {
507 	int signal = signal_flag;
508 
509 	if (unlikely(signal))
510 		process_signal(signal);
511 }
512 
513 static __always_inline void get_time_in_future(struct timespec *future,
514 					       long time_us)
515 {
516 	long nsec;
517 
518 	clock_gettime_or_die(CLOCK_MONOTONIC, future);
519 	future->tv_sec += time_us / USEC_PER_SEC;
520 	nsec = future->tv_nsec + (time_us * NSEC_PER_USEC) % NSEC_PER_SEC;
521 	if (nsec >= NSEC_PER_SEC) {
522 		future->tv_nsec = nsec % NSEC_PER_SEC;
523 		future->tv_sec += 1;
524 	}
525 }
526 
527 static __always_inline bool time_has_passed(const struct timespec *time)
528 {
529 	struct timespec now;
530 
531 	clock_gettime_or_die(CLOCK_MONOTONIC, &now);
532 	if (now.tv_sec > time->tv_sec)
533 		return true;
534 	if (now.tv_sec < time->tv_sec)
535 		return false;
536 	return (now.tv_nsec >= time->tv_nsec);
537 }
538 
539 static bool mutex_trylock_limit(pthread_mutex_t *mutex, int time_ms)
540 {
541 	long time_us = time_ms * USEC_PER_MSEC;
542 	struct timespec limit;
543 
544 	get_time_in_future(&limit, time_us);
545 	do {
546 		errno =  pthread_mutex_trylock(mutex);
547 		if (errno && errno != EBUSY)
548 			err(EXIT_FAILURE, "pthread_mutex_trylock() failed");
549 	} while (errno && !time_has_passed(&limit));
550 	return errno == 0;
551 }
552 
553 static void restore_trace_opts(const struct ftrace_state *state,
554 				const bool *cur)
555 {
556 	int i;
557 	int r;
558 
559 	for (i = 0; i < OPTIDX_NR; i++)
560 		if (state->opt_valid[i] && state->opt[i] != cur[i]) {
561 			r = set_trace_opt(optstr[i], state->opt[i]);
562 			if (r < 0)
563 				warnx("Failed to restore the %s option to %s",
564 				      optstr[i], bool2str(state->opt[i]));
565 			else if (verbose_ftrace())
566 				printf("Restored the %s option in %s to %s\n",
567 				       optstr[i], TR_OPTIONS,
568 				       bool2str(state->opt[i]));
569 		}
570 }
571 
572 static char *read_file(const char *file, enum errhandling h)
573 {
574 	int psize;
575 	char *r;
576 	static const char *emsg = "Failed to read the %s file";
577 
578 	r = tracefs_instance_file_read(NULL, file, &psize);
579 	if (!r) {
580 		if (h) {
581 			warn(emsg, file);
582 			if (h == ERR_CLEANUP)
583 				cleanup_exit(EXIT_FAILURE);
584 		} else
585 			errx(EXIT_FAILURE, emsg, file);
586 	}
587 
588 	if (r && r[psize - 1] == '\n')
589 		r[psize - 1] = '\0';
590 	return r;
591 }
592 
593 static void restore_file(const char *file, char **saved, const char *cur)
594 {
595 	if (*saved && was_changed(*saved, cur)) {
596 		if (tracefs_instance_file_write(NULL, file, *saved) < 0)
597 			warnx("Failed to restore %s to %s!", file, *saved);
598 		else if (verbose_ftrace())
599 			printf("Restored %s to %s\n", file, *saved);
600 		free(*saved);
601 		*saved = NULL;
602 	}
603 }
604 
605 static void restore_ftrace(void)
606 {
607 	mutex_lock(&save_state.mutex);
608 
609 	restore_file(TR_CURRENT, &save_state.tracer, current_tracer);
610 	restore_file(TR_THRESH, &save_state.thresh, threshold);
611 	restore_trace_opts(&save_state, use_options);
612 
613 	mutex_unlock(&save_state.mutex);
614 }
615 
616 static void cleanup_exit(int status)
617 {
618 	char *maxlat;
619 
620 	if (!setup_ftrace)
621 		exit(status);
622 
623 	/*
624 	 * We try the print_mtx for 1 sec in order to avoid garbled
625 	 * output if possible, but if it cannot be obtained we proceed anyway.
626 	 */
627 	mutex_trylock_limit(&print_mtx, TRY_PRINTMUTEX_MS);
628 
629 	maxlat = read_file(TR_MAXLAT, ERR_WARN);
630 	if (maxlat) {
631 		printf("The maximum detected latency was: %sus\n", maxlat);
632 		free(maxlat);
633 	}
634 
635 	restore_ftrace();
636 	/*
637 	 * We do not need to unlock the print_mtx here because we will exit at
638 	 * the end of this function. Unlocking print_mtx causes problems if a
639 	 * print thread happens to be waiting for the mutex because we have
640 	 * just changed the ftrace settings to the original and thus the
641 	 * print thread would output incorrect data from ftrace.
642 	 */
643 	exit(status);
644 }
645 
646 static void init_save_state(void)
647 {
648 	pthread_mutexattr_t mattr;
649 
650 	mutexattr_init(&mattr);
651 	mutexattr_settype(&mattr, PTHREAD_MUTEX_RECURSIVE);
652 	mutex_init(&save_state.mutex, &mattr);
653 	mutexattr_destroy(&mattr);
654 
655 	save_state.tracer = NULL;
656 	save_state.thresh = NULL;
657 	save_state.opt_valid[OPTIDX_FUNC_TR] = false;
658 	save_state.opt_valid[OPTIDX_DISP_GR] = false;
659 }
660 
661 static int printstate_next_ticket(struct entry *req)
662 {
663 	int r;
664 
665 	r = ++(printstate.ticket_counter);
666 	req->ticket = r;
667 	req->ticket_completed_ref = printstate.ticket_completed;
668 	cond_broadcast(&printstate.cond);
669 	return r;
670 }
671 
672 static __always_inline
673 void printstate_mark_req_completed(const struct entry *req)
674 {
675 	if (req->ticket > printstate.ticket_completed)
676 		printstate.ticket_completed = req->ticket;
677 }
678 
679 static __always_inline
680 bool printstate_has_new_req_arrived(const struct entry *req)
681 {
682 	return (printstate.ticket_counter != req->ticket);
683 }
684 
685 static __always_inline int printstate_cnt_inc(void)
686 {
687 	int value;
688 
689 	mutex_lock(&printstate.cnt_mutex);
690 	value = ++printstate.cnt;
691 	mutex_unlock(&printstate.cnt_mutex);
692 	return value;
693 }
694 
695 static __always_inline int printstate_cnt_dec(void)
696 {
697 	int value;
698 
699 	mutex_lock(&printstate.cnt_mutex);
700 	value = --printstate.cnt;
701 	mutex_unlock(&printstate.cnt_mutex);
702 	return value;
703 }
704 
705 static __always_inline int printstate_cnt_read(void)
706 {
707 	int value;
708 
709 	mutex_lock(&printstate.cnt_mutex);
710 	value = printstate.cnt;
711 	mutex_unlock(&printstate.cnt_mutex);
712 	return value;
713 }
714 
715 static __always_inline
716 bool prev_req_won_race(const struct entry *req)
717 {
718 	return (printstate.ticket_completed != req->ticket_completed_ref);
719 }
720 
721 static void sleeptable_resize(int size, bool printout, struct short_msg *msg)
722 {
723 	int bytes;
724 
725 	if (printout) {
726 		msg->len = 0;
727 		if (unlikely(size > PROB_TABLE_MAX_SIZE))
728 			bytes = snprintf(msg->buf, sizeof(msg->buf),
729 "Cannot increase probability table to %d (maximum size reached)\n", size);
730 		else
731 			bytes = snprintf(msg->buf, sizeof(msg->buf),
732 "Increasing probability table to %d\n", size);
733 		if (bytes < 0)
734 			warn("snprintf() failed");
735 		else
736 			msg->len = bytes;
737 	}
738 
739 	if (unlikely(size < 0)) {
740 		/* Should never happen */
741 		warnx("Bad program state at %s:%d", __FILE__, __LINE__);
742 		cleanup_exit(EXIT_FAILURE);
743 		return;
744 	}
745 	sleeptable.size = size;
746 	sleeptable.table = &probabilities[PROB_TABLE_MAX_SIZE - size];
747 }
748 
749 static void init_probabilities(void)
750 {
751 	int i;
752 	int j = 1000;
753 
754 	for (i = 0; i < PROB_TABLE_MAX_SIZE; i++) {
755 		probabilities[i] = 1000 / j;
756 		j--;
757 	}
758 	mutex_init(&sleeptable.mutex, NULL);
759 }
760 
761 static int table_get_probability(const struct entry *req,
762 				 struct short_msg *msg)
763 {
764 	int diff = req->ticket - req->ticket_completed_ref;
765 	int rval = 0;
766 
767 	msg->len = 0;
768 	diff--;
769 	/* Should never happen...*/
770 	if (unlikely(diff < 0)) {
771 		warnx("Programmer assumption error at %s:%d\n", __FILE__,
772 		      __LINE__);
773 		cleanup_exit(EXIT_FAILURE);
774 	}
775 	mutex_lock(&sleeptable.mutex);
776 	if (diff >= (sleeptable.size - 1)) {
777 		rval = sleeptable.table[sleeptable.size - 1];
778 		sleeptable_resize(sleeptable.size + 1, verbose_sizechange(),
779 				  msg);
780 	} else {
781 		rval = sleeptable.table[diff];
782 	}
783 	mutex_unlock(&sleeptable.mutex);
784 	return rval;
785 }
786 
787 static void init_queue(struct queue *q)
788 {
789 	q->next_prod_idx = 0;
790 	q->next_cons_idx = 0;
791 	mutex_init(&q->mutex, NULL);
792 	errno = pthread_cond_init(&q->cond, NULL);
793 	if (errno)
794 		err(EXIT_FAILURE, "pthread_cond_init() failed");
795 }
796 
797 static __always_inline int queue_len(const struct queue *q)
798 {
799 	if (q->next_prod_idx >= q->next_cons_idx)
800 		return q->next_prod_idx - q->next_cons_idx;
801 	else
802 		return QUEUE_SIZE - q->next_cons_idx + q->next_prod_idx;
803 }
804 
805 static __always_inline int queue_nr_free(const struct queue *q)
806 {
807 	int nr_free = QUEUE_SIZE - queue_len(q);
808 
809 	/*
810 	 * If there is only one slot left we will anyway lie and claim that the
811 	 * queue is full because adding an element will make it appear empty
812 	 */
813 	if (nr_free == 1)
814 		nr_free = 0;
815 	return nr_free;
816 }
817 
818 static __always_inline void queue_idx_inc(int *idx)
819 {
820 	*idx = (*idx + 1) % QUEUE_SIZE;
821 }
822 
823 static __always_inline void queue_push_to_back(struct queue *q,
824 					      const struct entry *e)
825 {
826 	q->entries[q->next_prod_idx] = *e;
827 	queue_idx_inc(&q->next_prod_idx);
828 }
829 
830 static __always_inline struct entry queue_pop_from_front(struct queue *q)
831 {
832 	struct entry e = q->entries[q->next_cons_idx];
833 
834 	queue_idx_inc(&q->next_cons_idx);
835 	return e;
836 }
837 
838 static __always_inline void queue_cond_signal(struct queue *q)
839 {
840 	cond_signal(&q->cond);
841 }
842 
843 static __always_inline void queue_cond_wait(struct queue *q)
844 {
845 	cond_wait(&q->cond, &q->mutex);
846 }
847 
848 static __always_inline int queue_try_to_add_entry(struct queue *q,
849 						  const struct entry *e)
850 {
851 	int r = 0;
852 
853 	mutex_lock(&q->mutex);
854 	if (queue_nr_free(q) > 0) {
855 		queue_push_to_back(q, e);
856 		cond_signal(&q->cond);
857 	} else
858 		r = -1;
859 	mutex_unlock(&q->mutex);
860 	return r;
861 }
862 
863 static struct entry queue_wait_for_entry(struct queue *q)
864 {
865 	struct entry e;
866 
867 	mutex_lock(&q->mutex);
868 	while (true) {
869 		if (queue_len(&printqueue) > 0) {
870 			e = queue_pop_from_front(q);
871 			break;
872 		}
873 		queue_cond_wait(q);
874 	}
875 	mutex_unlock(&q->mutex);
876 
877 	return e;
878 }
879 
880 static const struct policy *policy_from_name(const char *name)
881 {
882 	const struct policy *p = &policies[0];
883 
884 	while (p->name != NULL) {
885 		if (!strcmp(name, p->name))
886 			return p;
887 		p++;
888 	}
889 	return NULL;
890 }
891 
892 static const char *policy_name(int policy)
893 {
894 	const struct policy *p = &policies[0];
895 	static const char *rval = "unknown";
896 
897 	while (p->name != NULL) {
898 		if (p->policy == policy)
899 			return p->name;
900 		p++;
901 	}
902 	return rval;
903 }
904 
905 static bool is_relevant_tracer(const char *name)
906 {
907 	unsigned int i;
908 
909 	for (i = 0; relevant_tracers[i]; i++)
910 		if (!strcmp(name, relevant_tracers[i]))
911 			return true;
912 	return false;
913 }
914 
915 static bool random_makes_sense(const char *name)
916 {
917 	unsigned int i;
918 
919 	for (i = 0; random_tracers[i]; i++)
920 		if (!strcmp(name, random_tracers[i]))
921 			return true;
922 	return false;
923 }
924 
925 static void show_available(void)
926 {
927 	char **tracers;
928 	int found = 0;
929 	int i;
930 
931 	tracers = tracefs_tracers(NULL);
932 	for (i = 0; tracers && tracers[i]; i++) {
933 		if (is_relevant_tracer(tracers[i]))
934 			found++;
935 	}
936 
937 	if (!tracers) {
938 		warnx(no_tracer_msg);
939 		return;
940 	}
941 
942 	if (!found) {
943 		warnx(no_latency_tr_msg);
944 		tracefs_list_free(tracers);
945 		return;
946 	}
947 
948 	printf("The following latency tracers are available on your system:\n");
949 	for (i = 0; tracers[i]; i++) {
950 		if (is_relevant_tracer(tracers[i]))
951 			printf("%s\n", tracers[i]);
952 	}
953 	tracefs_list_free(tracers);
954 }
955 
956 static bool tracer_valid(const char *name, bool *notracer)
957 {
958 	char **tracers;
959 	int i;
960 	bool rval = false;
961 
962 	*notracer = false;
963 	tracers = tracefs_tracers(NULL);
964 	if (!tracers) {
965 		*notracer = true;
966 		return false;
967 	}
968 	for (i = 0; tracers[i]; i++)
969 		if (!strcmp(tracers[i], name)) {
970 			rval = true;
971 			break;
972 		}
973 	tracefs_list_free(tracers);
974 	return rval;
975 }
976 
977 static const char *find_default_tracer(void)
978 {
979 	int i;
980 	bool notracer;
981 	bool valid;
982 
983 	for (i = 0; relevant_tracers[i]; i++) {
984 		valid = tracer_valid(relevant_tracers[i], &notracer);
985 		if (notracer)
986 			errx(EXIT_FAILURE, no_tracer_msg);
987 		if (valid)
988 			return relevant_tracers[i];
989 	}
990 	return NULL;
991 }
992 
993 static bool toss_coin(struct drand48_data *buffer, unsigned int prob)
994 {
995 	long r;
996 
997 	if (unlikely(lrand48_r(buffer, &r))) {
998 		warnx("lrand48_r() failed");
999 		cleanup_exit(EXIT_FAILURE);
1000 	}
1001 	r = r % 1000L;
1002 	if (r < prob)
1003 		return true;
1004 	else
1005 		return false;
1006 }
1007 
1008 
1009 static long go_to_sleep(const struct entry *req)
1010 {
1011 	struct timespec future;
1012 	long delay = sleep_time;
1013 
1014 	get_time_in_future(&future, delay);
1015 
1016 	mutex_lock(&printstate.mutex);
1017 	while (!printstate_has_new_req_arrived(req)) {
1018 		cond_timedwait(&printstate.cond, &printstate.mutex, &future);
1019 		if (time_has_passed(&future))
1020 			break;
1021 	}
1022 
1023 	if (printstate_has_new_req_arrived(req))
1024 		delay = -1;
1025 	mutex_unlock(&printstate.mutex);
1026 
1027 	return delay;
1028 }
1029 
1030 
1031 static void set_priority(void)
1032 {
1033 	int r;
1034 	pid_t pid;
1035 	struct sched_param param;
1036 
1037 	memset(&param, 0, sizeof(param));
1038 	param.sched_priority = sched_pri;
1039 
1040 	pid = getpid();
1041 	r = sched_setscheduler(pid, sched_policy, &param);
1042 
1043 	if (r != 0)
1044 		err(EXIT_FAILURE, "sched_setscheduler() failed");
1045 }
1046 
1047 pid_t latency_collector_gettid(void)
1048 {
1049 	return (pid_t) syscall(__NR_gettid);
1050 }
1051 
1052 static void print_priority(void)
1053 {
1054 	pid_t tid;
1055 	int policy;
1056 	int r;
1057 	struct sched_param param;
1058 
1059 	tid = latency_collector_gettid();
1060 	r = pthread_getschedparam(pthread_self(), &policy, &param);
1061 	if (r != 0) {
1062 		warn("pthread_getschedparam() failed");
1063 		cleanup_exit(EXIT_FAILURE);
1064 	}
1065 	mutex_lock(&print_mtx);
1066 	printf("Thread %d runs with scheduling policy %s and priority %d\n",
1067 	       tid, policy_name(policy), param.sched_priority);
1068 	mutex_unlock(&print_mtx);
1069 }
1070 
1071 static __always_inline
1072 void __print_skipmessage(const struct short_msg *resize_msg,
1073 			 const struct timespec *timestamp, char *buffer,
1074 			 size_t bufspace, const struct entry *req, bool excuse,
1075 			 const char *str)
1076 {
1077 	ssize_t bytes = 0;
1078 	char *p = &buffer[0];
1079 	long us, sec;
1080 	int r;
1081 
1082 	sec = timestamp->tv_sec;
1083 	us = timestamp->tv_nsec / 1000;
1084 
1085 	if (resize_msg != NULL && resize_msg->len > 0) {
1086 		strncpy(p, resize_msg->buf, resize_msg->len);
1087 		bytes += resize_msg->len;
1088 		p += resize_msg->len;
1089 		bufspace -= resize_msg->len;
1090 	}
1091 
1092 	if (excuse)
1093 		r = snprintf(p, bufspace,
1094 "%ld.%06ld Latency %d printout skipped due to %s\n",
1095 			     sec, us, req->ticket, str);
1096 	else
1097 		r = snprintf(p, bufspace, "%ld.%06ld Latency %d detected\n",
1098 			    sec, us, req->ticket);
1099 
1100 	if (r < 0)
1101 		warn("snprintf() failed");
1102 	else
1103 		bytes += r;
1104 
1105 	/* These prints could happen concurrently */
1106 	mutex_lock(&print_mtx);
1107 	write_or_die(fd_stdout, buffer, bytes);
1108 	mutex_unlock(&print_mtx);
1109 }
1110 
1111 static void print_skipmessage(const struct short_msg *resize_msg,
1112 			      const struct timespec *timestamp, char *buffer,
1113 			      size_t bufspace, const struct entry *req,
1114 			      bool excuse)
1115 {
1116 	__print_skipmessage(resize_msg, timestamp, buffer, bufspace, req,
1117 			    excuse, "random delay");
1118 }
1119 
1120 static void print_lostmessage(const struct timespec *timestamp, char *buffer,
1121 			      size_t bufspace, const struct entry *req,
1122 			      const char *reason)
1123 {
1124 	__print_skipmessage(NULL, timestamp, buffer, bufspace, req, true,
1125 			    reason);
1126 }
1127 
1128 static void print_tracefile(const struct short_msg *resize_msg,
1129 			    const struct timespec *timestamp, char *buffer,
1130 			    size_t bufspace, long slept,
1131 			    const struct entry *req)
1132 {
1133 	static const int reserve = 256;
1134 	char *p = &buffer[0];
1135 	ssize_t bytes = 0;
1136 	ssize_t bytes_tot = 0;
1137 	long us, sec;
1138 	long slept_ms;
1139 	int trace_fd;
1140 
1141 	/* Save some space for the final string and final null char */
1142 	bufspace = bufspace - reserve - 1;
1143 
1144 	if (resize_msg != NULL && resize_msg->len > 0) {
1145 		bytes = resize_msg->len;
1146 		strncpy(p, resize_msg->buf, bytes);
1147 		bytes_tot += bytes;
1148 		p += bytes;
1149 		bufspace -= bytes;
1150 	}
1151 
1152 	trace_fd = open(debug_tracefile, O_RDONLY);
1153 
1154 	if (trace_fd < 0) {
1155 		warn("open() failed on %s", debug_tracefile);
1156 		return;
1157 	}
1158 
1159 	sec = timestamp->tv_sec;
1160 	us = timestamp->tv_nsec / 1000;
1161 
1162 	if (slept != 0) {
1163 		slept_ms = slept / 1000;
1164 		bytes = snprintf(p, bufspace,
1165 "%ld.%06ld Latency %d randomly sleep for %ld ms before print\n",
1166 				 sec, us, req->ticket, slept_ms);
1167 	} else {
1168 		bytes = snprintf(p, bufspace,
1169 				 "%ld.%06ld Latency %d immediate print\n", sec,
1170 				 us, req->ticket);
1171 	}
1172 
1173 	if (bytes < 0) {
1174 		warn("snprintf() failed");
1175 		return;
1176 	}
1177 	p += bytes;
1178 	bufspace -= bytes;
1179 	bytes_tot += bytes;
1180 
1181 	bytes = snprintf(p, bufspace,
1182 ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> BEGIN <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n"
1183 		);
1184 
1185 	if (bytes < 0) {
1186 		warn("snprintf() failed");
1187 		return;
1188 	}
1189 
1190 	p += bytes;
1191 	bufspace -= bytes;
1192 	bytes_tot += bytes;
1193 
1194 	do {
1195 		bytes = read(trace_fd, p, bufspace);
1196 		if (bytes < 0) {
1197 			if (errno == EINTR)
1198 				continue;
1199 			warn("read() failed on %s", debug_tracefile);
1200 			if (unlikely(close(trace_fd) != 0))
1201 				warn("close() failed on %s", debug_tracefile);
1202 			return;
1203 		}
1204 		if (bytes == 0)
1205 			break;
1206 		p += bytes;
1207 		bufspace -= bytes;
1208 		bytes_tot += bytes;
1209 	} while (true);
1210 
1211 	if (unlikely(close(trace_fd) != 0))
1212 		warn("close() failed on %s", debug_tracefile);
1213 
1214 	printstate_cnt_dec();
1215 	/* Add the reserve space back to the budget for the final string */
1216 	bufspace += reserve;
1217 
1218 	bytes = snprintf(p, bufspace,
1219 			 ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> END <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n\n");
1220 
1221 	if (bytes < 0) {
1222 		warn("snprintf() failed");
1223 		return;
1224 	}
1225 
1226 	bytes_tot += bytes;
1227 
1228 	/* These prints could happen concurrently */
1229 	mutex_lock(&print_mtx);
1230 	write_or_die(fd_stdout, buffer, bytes_tot);
1231 	mutex_unlock(&print_mtx);
1232 }
1233 
1234 static char *get_no_opt(const char *opt)
1235 {
1236 	char *no_opt;
1237 	int s;
1238 
1239 	s = strlen(opt) + strlen(OPT_NO_PREFIX) + 1;
1240 	/* We may be called from cleanup_exit() via set_trace_opt() */
1241 	no_opt = malloc_or_die_nocleanup(s);
1242 	strcpy(no_opt, OPT_NO_PREFIX);
1243 	strcat(no_opt, opt);
1244 	return no_opt;
1245 }
1246 
1247 static char *find_next_optstr(const char *allopt, const char **next)
1248 {
1249 	const char *begin;
1250 	const char *end;
1251 	char *r;
1252 	int s = 0;
1253 
1254 	if (allopt == NULL)
1255 		return NULL;
1256 
1257 	for (begin = allopt; *begin != '\0'; begin++) {
1258 		if (isgraph(*begin))
1259 			break;
1260 	}
1261 
1262 	if (*begin == '\0')
1263 		return NULL;
1264 
1265 	for (end = begin; *end != '\0' && isgraph(*end); end++)
1266 		s++;
1267 
1268 	r = malloc_or_die_nocleanup(s + 1);
1269 	strncpy(r, begin, s);
1270 	r[s] = '\0';
1271 	*next = begin + s;
1272 	return r;
1273 }
1274 
1275 static bool get_trace_opt(const char *allopt, const char *opt, bool *found)
1276 {
1277 	*found = false;
1278 	char *no_opt;
1279 	char *str;
1280 	const char *next = allopt;
1281 	bool rval = false;
1282 
1283 	no_opt = get_no_opt(opt);
1284 
1285 	do {
1286 		str = find_next_optstr(next, &next);
1287 		if (str == NULL)
1288 			break;
1289 		if (!strcmp(str, opt)) {
1290 			*found = true;
1291 			rval = true;
1292 			free(str);
1293 			break;
1294 		}
1295 		if (!strcmp(str, no_opt)) {
1296 			*found = true;
1297 			rval = false;
1298 			free(str);
1299 			break;
1300 		}
1301 		free(str);
1302 	} while (true);
1303 	free(no_opt);
1304 
1305 	return rval;
1306 }
1307 
1308 static int set_trace_opt(const char *opt, bool value)
1309 {
1310 	char *str;
1311 	int r;
1312 
1313 	if (value)
1314 		str = strdup(opt);
1315 	else
1316 		str = get_no_opt(opt);
1317 
1318 	r = tracefs_instance_file_write(NULL, TR_OPTIONS, str);
1319 	free(str);
1320 	return r;
1321 }
1322 
1323 void save_trace_opts(struct ftrace_state *state)
1324 {
1325 	char *allopt;
1326 	int psize;
1327 	int i;
1328 
1329 	allopt = tracefs_instance_file_read(NULL, TR_OPTIONS, &psize);
1330 	if (!allopt)
1331 		errx(EXIT_FAILURE, "Failed to read the %s file\n", TR_OPTIONS);
1332 
1333 	for (i = 0; i < OPTIDX_NR; i++)
1334 		state->opt[i] = get_trace_opt(allopt, optstr[i],
1335 					      &state->opt_valid[i]);
1336 
1337 	free(allopt);
1338 }
1339 
1340 static void write_file(const char *file, const char *cur, const char *new,
1341 		       enum errhandling h)
1342 {
1343 	int r;
1344 	static const char *emsg = "Failed to write to the %s file!";
1345 
1346 	/* Do nothing if we now that the current and new value are equal */
1347 	if (cur && !needs_change(cur, new))
1348 		return;
1349 
1350 	r = tracefs_instance_file_write(NULL, file, new);
1351 	if (r < 0) {
1352 		if (h) {
1353 			warnx(emsg, file);
1354 			if (h == ERR_CLEANUP)
1355 				cleanup_exit(EXIT_FAILURE);
1356 		} else
1357 			errx(EXIT_FAILURE, emsg, file);
1358 	}
1359 	if (verbose_ftrace()) {
1360 		mutex_lock(&print_mtx);
1361 		printf("%s was set to %s\n", file, new);
1362 		mutex_unlock(&print_mtx);
1363 	}
1364 }
1365 
1366 static void reset_max_latency(void)
1367 {
1368 	write_file(TR_MAXLAT, NULL, "0", ERR_CLEANUP);
1369 }
1370 
1371 static void save_and_disable_tracer(void)
1372 {
1373 	char *orig_th;
1374 	char *tracer;
1375 	bool need_nop = false;
1376 
1377 	mutex_lock(&save_state.mutex);
1378 
1379 	save_trace_opts(&save_state);
1380 	tracer = read_file(TR_CURRENT, ERR_EXIT);
1381 	orig_th = read_file(TR_THRESH, ERR_EXIT);
1382 
1383 	if (needs_change(tracer, NOP_TRACER)) {
1384 		mutex_lock(&print_mtx);
1385 		if (force_tracer) {
1386 			printf(
1387 				"The %s tracer is already in use but proceeding anyway!\n",
1388 				tracer);
1389 		} else {
1390 			printf(
1391 				"The %s tracer is already in use, cowardly bailing out!\n"
1392 				"This could indicate that another program or instance is tracing.\n"
1393 				"Use the -F [--force] option to disregard the current tracer.\n", tracer);
1394 			exit(0);
1395 		}
1396 		mutex_unlock(&print_mtx);
1397 		need_nop = true;
1398 	}
1399 
1400 	save_state.tracer =  tracer;
1401 	save_state.thresh = orig_th;
1402 
1403 	if (need_nop)
1404 		write_file(TR_CURRENT, NULL, NOP_TRACER, ERR_EXIT);
1405 
1406 	mutex_unlock(&save_state.mutex);
1407 }
1408 
1409 void set_trace_opts(struct ftrace_state *state, bool *new)
1410 {
1411 	int i;
1412 	int r;
1413 
1414 	/*
1415 	 * We only set options if we earlier detected that the option exists in
1416 	 * the trace_options file and that the wanted setting is different from
1417 	 * the one we saw in save_and_disable_tracer()
1418 	 */
1419 	for (i = 0; i < OPTIDX_NR; i++)
1420 		if (state->opt_valid[i] &&
1421 		    state->opt[i] != new[i]) {
1422 			r = set_trace_opt(optstr[i], new[i]);
1423 			if (r < 0) {
1424 				warnx("Failed to set the %s option to %s",
1425 				      optstr[i], bool2str(new[i]));
1426 				cleanup_exit(EXIT_FAILURE);
1427 			}
1428 			if (verbose_ftrace()) {
1429 				mutex_lock(&print_mtx);
1430 				printf("%s in %s was set to %s\n", optstr[i],
1431 				       TR_OPTIONS, bool2str(new[i]));
1432 				mutex_unlock(&print_mtx);
1433 			}
1434 		}
1435 }
1436 
1437 static void enable_tracer(void)
1438 {
1439 	mutex_lock(&save_state.mutex);
1440 	set_trace_opts(&save_state, use_options);
1441 
1442 	write_file(TR_THRESH, save_state.thresh, threshold, ERR_CLEANUP);
1443 	write_file(TR_CURRENT, NOP_TRACER, current_tracer, ERR_CLEANUP);
1444 
1445 	mutex_unlock(&save_state.mutex);
1446 }
1447 
1448 static void tracing_loop(void)
1449 {
1450 	int ifd = inotify_init();
1451 	int wd;
1452 	const ssize_t bufsize = sizeof(inotify_buffer);
1453 	const ssize_t istructsize = sizeof(struct inotify_event);
1454 	char *buf = &inotify_buffer[0];
1455 	ssize_t nr_read;
1456 	char *p;
1457 	int modified;
1458 	struct inotify_event *event;
1459 	struct entry req;
1460 	char *buffer;
1461 	const size_t bufspace = PRINT_BUFFER_SIZE;
1462 	struct timespec timestamp;
1463 
1464 	print_priority();
1465 
1466 	buffer = malloc_or_die(bufspace);
1467 
1468 	if (ifd < 0)
1469 		err(EXIT_FAILURE, "inotify_init() failed!");
1470 
1471 
1472 	if (setup_ftrace) {
1473 		/*
1474 		 * We must disable the tracer before resetting the max_latency
1475 		 */
1476 		save_and_disable_tracer();
1477 		/*
1478 		 * We must reset the max_latency before the inotify_add_watch()
1479 		 * call.
1480 		 */
1481 		reset_max_latency();
1482 	}
1483 
1484 	wd = inotify_add_watch(ifd, debug_maxlat, IN_MODIFY);
1485 	if (wd < 0)
1486 		err(EXIT_FAILURE, "inotify_add_watch() failed!");
1487 
1488 	if (setup_ftrace)
1489 		enable_tracer();
1490 
1491 	signal_blocking(SIG_UNBLOCK);
1492 
1493 	while (true) {
1494 		modified = 0;
1495 		check_signals();
1496 		nr_read = read(ifd, buf, bufsize);
1497 		check_signals();
1498 		if (nr_read < 0) {
1499 			if (errno == EINTR)
1500 				continue;
1501 			warn("read() failed on inotify fd!");
1502 			cleanup_exit(EXIT_FAILURE);
1503 		}
1504 		if (nr_read == bufsize)
1505 			warnx("inotify() buffer filled, skipping events");
1506 		if (nr_read < istructsize) {
1507 			warnx("read() returned too few bytes on inotify fd");
1508 			cleanup_exit(EXIT_FAILURE);
1509 		}
1510 
1511 		for (p = buf; p < buf + nr_read;) {
1512 			event = (struct inotify_event *) p;
1513 			if ((event->mask & IN_MODIFY) != 0)
1514 				modified++;
1515 			p += istructsize + event->len;
1516 		}
1517 		while (modified > 0) {
1518 			check_signals();
1519 			mutex_lock(&printstate.mutex);
1520 			check_signals();
1521 			printstate_next_ticket(&req);
1522 			if (printstate_cnt_read() > 0) {
1523 				printstate_mark_req_completed(&req);
1524 				mutex_unlock(&printstate.mutex);
1525 				if (verbose_lostevent()) {
1526 					clock_gettime_or_die(CLOCK_MONOTONIC,
1527 							     &timestamp);
1528 					print_lostmessage(&timestamp, buffer,
1529 							  bufspace, &req,
1530 							  "inotify loop");
1531 				}
1532 				break;
1533 			}
1534 			mutex_unlock(&printstate.mutex);
1535 			if (queue_try_to_add_entry(&printqueue, &req) != 0) {
1536 				/* These prints could happen concurrently */
1537 				check_signals();
1538 				mutex_lock(&print_mtx);
1539 				check_signals();
1540 				write_or_die(fd_stdout, queue_full_warning,
1541 					     strlen(queue_full_warning));
1542 				mutex_unlock(&print_mtx);
1543 			}
1544 			modified--;
1545 		}
1546 	}
1547 }
1548 
1549 static void *do_printloop(void *arg)
1550 {
1551 	const size_t bufspace = PRINT_BUFFER_SIZE;
1552 	char *buffer;
1553 	long *rseed = (long *) arg;
1554 	struct drand48_data drandbuf;
1555 	long slept = 0;
1556 	struct entry req;
1557 	int prob = 0;
1558 	struct timespec timestamp;
1559 	struct short_msg resize_msg;
1560 
1561 	print_priority();
1562 
1563 	if (srand48_r(*rseed, &drandbuf) != 0) {
1564 		warn("srand48_r() failed!\n");
1565 		cleanup_exit(EXIT_FAILURE);
1566 	}
1567 
1568 	buffer = malloc_or_die(bufspace);
1569 
1570 	while (true) {
1571 		req = queue_wait_for_entry(&printqueue);
1572 		clock_gettime_or_die(CLOCK_MONOTONIC, &timestamp);
1573 		mutex_lock(&printstate.mutex);
1574 		if (prev_req_won_race(&req)) {
1575 			printstate_mark_req_completed(&req);
1576 			mutex_unlock(&printstate.mutex);
1577 			if (verbose_lostevent())
1578 				print_lostmessage(&timestamp, buffer, bufspace,
1579 						  &req, "print loop");
1580 			continue;
1581 		}
1582 		mutex_unlock(&printstate.mutex);
1583 
1584 		/*
1585 		 * Toss a coin to decide if we want to sleep before printing
1586 		 * out the backtrace. The reason for this is that opening
1587 		 * /sys/kernel/debug/tracing/trace will cause a blackout of
1588 		 * hundreds of ms, where no latencies will be noted by the
1589 		 * latency tracer. Thus by randomly sleeping we try to avoid
1590 		 * missing traces systematically due to this. With this option
1591 		 * we will sometimes get the first latency, some other times
1592 		 * some of the later ones, in case of closely spaced traces.
1593 		 */
1594 		if (trace_enable && use_random_sleep) {
1595 			slept = 0;
1596 			prob = table_get_probability(&req, &resize_msg);
1597 			if (!toss_coin(&drandbuf, prob))
1598 				slept = go_to_sleep(&req);
1599 			if (slept >= 0) {
1600 				/* A print is ongoing */
1601 				printstate_cnt_inc();
1602 				/*
1603 				 * We will do the printout below so we have to
1604 				 * mark it as completed while we still have the
1605 				 * mutex.
1606 				 */
1607 				mutex_lock(&printstate.mutex);
1608 				printstate_mark_req_completed(&req);
1609 				mutex_unlock(&printstate.mutex);
1610 			}
1611 		}
1612 		if (trace_enable) {
1613 			/*
1614 			 * slept < 0  means that we detected another
1615 			 * notification in go_to_sleep() above
1616 			 */
1617 			if (slept >= 0)
1618 				/*
1619 				 * N.B. printstate_cnt_dec(); will be called
1620 				 * inside print_tracefile()
1621 				 */
1622 				print_tracefile(&resize_msg, &timestamp, buffer,
1623 						bufspace, slept, &req);
1624 			else
1625 				print_skipmessage(&resize_msg, &timestamp,
1626 						  buffer, bufspace, &req, true);
1627 		} else {
1628 			print_skipmessage(&resize_msg, &timestamp, buffer,
1629 					  bufspace, &req, false);
1630 		}
1631 	}
1632 	return NULL;
1633 }
1634 
1635 static void start_printthread(void)
1636 {
1637 	unsigned int i;
1638 	long *seed;
1639 	int ufd;
1640 
1641 	ufd = open(DEV_URANDOM, O_RDONLY);
1642 	if (nr_threads > MAX_THREADS) {
1643 		warnx(
1644 "Number of requested print threads was %d, max number is %d\n",
1645 		      nr_threads, MAX_THREADS);
1646 		nr_threads = MAX_THREADS;
1647 	}
1648 	for (i = 0; i < nr_threads; i++) {
1649 		seed = malloc_or_die(sizeof(*seed));
1650 		if (ufd <  0 ||
1651 		    read(ufd, seed, sizeof(*seed)) != sizeof(*seed)) {
1652 			printf(
1653 "Warning! Using trivial random number seed, since %s not available\n",
1654 			DEV_URANDOM);
1655 			fflush(stdout);
1656 			*seed = i;
1657 		}
1658 		errno = pthread_create(&printthread[i], NULL, do_printloop,
1659 				       seed);
1660 		if (errno)
1661 			err(EXIT_FAILURE, "pthread_create()");
1662 	}
1663 	if (ufd > 0 && close(ufd) != 0)
1664 		warn("close() failed");
1665 }
1666 
1667 static void show_usage(void)
1668 {
1669 	printf(
1670 "Usage: %s [OPTION]...\n\n"
1671 "Collect closely occurring latencies from %s\n"
1672 "with any of the following tracers: preemptirqsoff, preemptoff, irqsoff, "
1673 "wakeup,\nwakeup_dl, or wakeup_rt.\n\n"
1674 
1675 "The occurrence of a latency is detected by monitoring the file\n"
1676 "%s with inotify.\n\n"
1677 
1678 "The following options are supported:\n\n"
1679 
1680 "-l, --list\t\tList the latency tracers that are supported by the\n"
1681 "\t\t\tcurrently running Linux kernel. If you don't see the\n"
1682 "\t\t\ttracer that you want, you will probably need to\n"
1683 "\t\t\tchange your kernel config and build a new kernel.\n\n"
1684 
1685 "-t, --tracer TR\t\tUse the tracer TR. The default is to use the first\n"
1686 "\t\t\ttracer that is supported by the kernel in the following\n"
1687 "\t\t\torder of precedence:\n\n"
1688 "\t\t\tpreemptirqsoff\n"
1689 "\t\t\tpreemptoff\n"
1690 "\t\t\tirqsoff\n"
1691 "\t\t\twakeup\n"
1692 "\t\t\twakeup_rt\n"
1693 "\t\t\twakeup_dl\n"
1694 "\n"
1695 "\t\t\tIf TR is not on the list above, then a warning will be\n"
1696 "\t\t\tprinted.\n\n"
1697 
1698 "-F, --force\t\tProceed even if another ftrace tracer is active. Without\n"
1699 "\t\t\tthis option, the program will refuse to start tracing if\n"
1700 "\t\t\tany other tracer than the nop tracer is active.\n\n"
1701 
1702 "-s, --threshold TH\tConfigure ftrace to use a threshold of TH microseconds\n"
1703 "\t\t\tfor the tracer. The default is 0, which means that\n"
1704 "\t\t\ttracing_max_latency will be used. tracing_max_latency is\n"
1705 "\t\t\tset to 0 when the program is started and contains the\n"
1706 "\t\t\tmaximum of the latencies that have been encountered.\n\n"
1707 
1708 "-f, --function\t\tEnable the function-trace option in trace_options. With\n"
1709 "\t\t\tthis option, ftrace will trace the functions that are\n"
1710 "\t\t\texecuted during a latency, without it we only get the\n"
1711 "\t\t\tbeginning, end, and backtrace.\n\n"
1712 
1713 "-g, --graph\t\tEnable the display-graph option in trace_option. This\n"
1714 "\t\t\toption causes ftrace to show the graph of how functions\n"
1715 "\t\t\tare calling other functions.\n\n"
1716 
1717 "-c, --policy POL\tRun the program with scheduling policy POL. POL can be\n"
1718 "\t\t\tother, batch, idle, rr or fifo. The default is rr. When\n"
1719 "\t\t\tusing rr or fifo, remember that these policies may cause\n"
1720 "\t\t\tother tasks to experience latencies.\n\n"
1721 
1722 "-p, --priority PRI\tRun the program with priority PRI. The acceptable range\n"
1723 "\t\t\tof PRI depends on the scheduling policy.\n\n"
1724 
1725 "-n, --notrace\t\tIf latency is detected, do not print out the content of\n"
1726 "\t\t\tthe trace file to standard output\n\n"
1727 
1728 "-t, --threads NRTHR\tRun NRTHR threads for printing. Default is %d.\n\n"
1729 
1730 "-r, --random\t\tArbitrarily sleep a certain amount of time, default\n"
1731 "\t\t\t%ld ms, before reading the trace file. The\n"
1732 "\t\t\tprobabilities for sleep are chosen so that the\n"
1733 "\t\t\tprobability of obtaining any of a cluster of closely\n"
1734 "\t\t\toccurring latencies are equal, i.e. we will randomly\n"
1735 "\t\t\tchoose which one we collect from the trace file.\n\n"
1736 "\t\t\tThis option is probably only useful with the irqsoff,\n"
1737 "\t\t\tpreemptoff, and preemptirqsoff tracers.\n\n"
1738 
1739 "-a, --nrlat NRLAT\tFor the purpose of arbitrary delay, assume that there\n"
1740 "\t\t\tare no more than NRLAT clustered latencies. If NRLAT\n"
1741 "\t\t\tlatencies are detected during a run, this value will\n"
1742 "\t\t\tautomatically be increased to NRLAT + 1 and then to\n"
1743 "\t\t\tNRLAT + 2 and so on. The default is %d. This option\n"
1744 "\t\t\timplies -r. We need to know this number in order to\n"
1745 "\t\t\tbe able to calculate the probabilities of sleeping.\n"
1746 "\t\t\tSpecifically, the probabilities of not sleeping, i.e. to\n"
1747 "\t\t\tdo an immediate printout will be:\n\n"
1748 "\t\t\t1/NRLAT  1/(NRLAT - 1) ... 1/3  1/2  1\n\n"
1749 "\t\t\tThe probability of sleeping will be:\n\n"
1750 "\t\t\t1 - P, where P is from the series above\n\n"
1751 "\t\t\tThis descending probability will cause us to choose\n"
1752 "\t\t\tan occurrence at random. Observe that the final\n"
1753 "\t\t\tprobability is 0, it is when we reach this probability\n"
1754 "\t\t\tthat we increase NRLAT automatically. As an example,\n"
1755 "\t\t\twith the default value of 2, the probabilities will be:\n\n"
1756 "\t\t\t1/2  0\n\n"
1757 "\t\t\tThis means, when a latency is detected we will sleep\n"
1758 "\t\t\twith 50%% probability. If we ever detect another latency\n"
1759 "\t\t\tduring the sleep period, then the probability of sleep\n"
1760 "\t\t\twill be 0%% and the table will be expanded to:\n\n"
1761 "\t\t\t1/3  1/2  0\n\n"
1762 
1763 "-v, --verbose\t\tIncrease the verbosity. If this option is given once,\n"
1764 "\t\t\tthen print a message every time that the NRLAT value\n"
1765 "\t\t\tis automatically increased. It also causes a message to\n"
1766 "\t\t\tbe printed when the ftrace settings are changed. If this\n"
1767 "\t\t\toption is given at least twice, then also print a\n"
1768 "\t\t\twarning for lost events.\n\n"
1769 
1770 "-u, --time TIME\t\tArbitrarily sleep for a specified time TIME ms before\n"
1771 "\t\t\tprinting out the trace from the trace file. The default\n"
1772 "\t\t\tis %ld ms. This option implies -r.\n\n"
1773 
1774 "-x, --no-ftrace\t\tDo not configure ftrace. This assume that the user\n"
1775 "\t\t\tconfigures the ftrace files in sysfs such as\n"
1776 "\t\t\t/sys/kernel/tracing/current_tracer or equivalent.\n\n"
1777 
1778 "-i, --tracefile FILE\tUse FILE as trace file. The default is\n"
1779 "\t\t\t%s.\n"
1780 "\t\t\tThis options implies -x\n\n"
1781 
1782 "-m, --max-lat FILE\tUse FILE as tracing_max_latency file. The default is\n"
1783 "\t\t\t%s.\n"
1784 "\t\t\tThis options implies -x\n\n"
1785 ,
1786 prg_name, debug_tracefile_dflt, debug_maxlat_dflt, DEFAULT_NR_PRINTER_THREADS,
1787 SLEEP_TIME_MS_DEFAULT, DEFAULT_TABLE_SIZE, SLEEP_TIME_MS_DEFAULT,
1788 debug_tracefile_dflt, debug_maxlat_dflt);
1789 }
1790 
1791 static void find_tracefiles(void)
1792 {
1793 	debug_tracefile_dflt = tracefs_get_tracing_file("trace");
1794 	if (debug_tracefile_dflt == NULL) {
1795 		/* This is needed in show_usage() */
1796 		debug_tracefile_dflt = DEBUG_NOFILE;
1797 	}
1798 
1799 	debug_maxlat_dflt = tracefs_get_tracing_file("tracing_max_latency");
1800 	if (debug_maxlat_dflt == NULL) {
1801 		/* This is needed in show_usage() */
1802 		debug_maxlat_dflt = DEBUG_NOFILE;
1803 	}
1804 
1805 	debug_tracefile = debug_tracefile_dflt;
1806 	debug_maxlat = debug_maxlat_dflt;
1807 }
1808 
1809 bool alldigits(const char *s)
1810 {
1811 	for (; *s != '\0'; s++)
1812 		if (!isdigit(*s))
1813 			return false;
1814 	return true;
1815 }
1816 
1817 void check_alldigits(const char *optarg, const char *argname)
1818 {
1819 	if (!alldigits(optarg))
1820 		errx(EXIT_FAILURE,
1821 		     "The %s parameter expects a decimal argument\n", argname);
1822 }
1823 
1824 static void scan_arguments(int argc, char *argv[])
1825 {
1826 	int c;
1827 	int i;
1828 	int option_idx = 0;
1829 
1830 	static struct option long_options[] = {
1831 		{ "list",       no_argument,            0, 'l' },
1832 		{ "tracer",	required_argument,	0, 't' },
1833 		{ "force",      no_argument,            0, 'F' },
1834 		{ "threshold",  required_argument,      0, 's' },
1835 		{ "function",   no_argument,            0, 'f' },
1836 		{ "graph",      no_argument,            0, 'g' },
1837 		{ "policy",	required_argument,	0, 'c' },
1838 		{ "priority",	required_argument,	0, 'p' },
1839 		{ "help",	no_argument,		0, 'h' },
1840 		{ "notrace",	no_argument,		0, 'n' },
1841 		{ "random",	no_argument,		0, 'r' },
1842 		{ "nrlat",	required_argument,	0, 'a' },
1843 		{ "threads",	required_argument,	0, 'e' },
1844 		{ "time",	required_argument,	0, 'u' },
1845 		{ "verbose",	no_argument,		0, 'v' },
1846 		{ "no-ftrace",  no_argument,            0, 'x' },
1847 		{ "tracefile",	required_argument,	0, 'i' },
1848 		{ "max-lat",	required_argument,	0, 'm' },
1849 		{ 0,		0,			0,  0  }
1850 	};
1851 	const struct policy *p;
1852 	int max, min;
1853 	int value;
1854 	bool notracer, valid;
1855 
1856 	/*
1857 	 * We must do this before parsing the arguments because show_usage()
1858 	 * needs to display these.
1859 	 */
1860 	find_tracefiles();
1861 
1862 	while (true) {
1863 		c = getopt_long(argc, argv, "lt:Fs:fgc:p:hnra:e:u:vxi:m:",
1864 				long_options, &option_idx);
1865 		if (c == -1)
1866 			break;
1867 
1868 		switch (c) {
1869 		case 'l':
1870 			show_available();
1871 			exit(0);
1872 			break;
1873 		case 't':
1874 			current_tracer = strdup(optarg);
1875 			if (!is_relevant_tracer(current_tracer)) {
1876 				warnx("%s is not a known latency tracer!\n",
1877 				      current_tracer);
1878 			}
1879 			valid = tracer_valid(current_tracer, &notracer);
1880 			if (notracer)
1881 				errx(EXIT_FAILURE, no_tracer_msg);
1882 			if (!valid)
1883 				errx(EXIT_FAILURE,
1884 "The tracer %s is not supported by your kernel!\n", current_tracer);
1885 			break;
1886 		case 'F':
1887 			force_tracer = true;
1888 			break;
1889 		case 's':
1890 			check_alldigits(optarg, "-s [--threshold]");
1891 			threshold = strdup(optarg);
1892 			break;
1893 		case 'f':
1894 			use_options[OPTIDX_FUNC_TR] = true;
1895 			break;
1896 		case 'g':
1897 			use_options[OPTIDX_DISP_GR] = true;
1898 			break;
1899 		case 'c':
1900 			p = policy_from_name(optarg);
1901 			if (p != NULL) {
1902 				sched_policy = p->policy;
1903 				sched_policy_set = true;
1904 				if (!sched_pri_set) {
1905 					sched_pri = p->default_pri;
1906 					sched_pri_set = true;
1907 				}
1908 			} else {
1909 				warnx("Unknown scheduling %s\n", optarg);
1910 				show_usage();
1911 				exit(0);
1912 			}
1913 			break;
1914 		case 'p':
1915 			check_alldigits(optarg, "-p [--priority]");
1916 			sched_pri = atoi(optarg);
1917 			sched_pri_set = true;
1918 			break;
1919 		case 'h':
1920 			show_usage();
1921 			exit(0);
1922 			break;
1923 		case 'n':
1924 			trace_enable = false;
1925 			use_random_sleep = false;
1926 			break;
1927 		case 'e':
1928 			check_alldigits(optarg, "-e [--threads]");
1929 			value = atoi(optarg);
1930 			if (value > 0)
1931 				nr_threads = value;
1932 			else {
1933 				warnx("NRTHR must be > 0\n");
1934 				show_usage();
1935 				exit(0);
1936 			}
1937 			break;
1938 		case 'u':
1939 			check_alldigits(optarg, "-u [--time]");
1940 			value = atoi(optarg);
1941 			if (value < 0) {
1942 				warnx("TIME must be >= 0\n");
1943 				show_usage();
1944 				exit(0);
1945 			}
1946 			trace_enable = true;
1947 			use_random_sleep = true;
1948 			sleep_time = value * USEC_PER_MSEC;
1949 			break;
1950 		case 'v':
1951 			verbosity++;
1952 			break;
1953 		case 'r':
1954 			trace_enable = true;
1955 			use_random_sleep = true;
1956 			break;
1957 		case 'a':
1958 			check_alldigits(optarg, "-a [--nrlat]");
1959 			value = atoi(optarg);
1960 			if (value <= 0) {
1961 				warnx("NRLAT must be > 0\n");
1962 				show_usage();
1963 				exit(0);
1964 			}
1965 			trace_enable = true;
1966 			use_random_sleep = true;
1967 			table_startsize = value;
1968 			break;
1969 		case 'x':
1970 			setup_ftrace = false;
1971 			break;
1972 		case 'i':
1973 			setup_ftrace = false;
1974 			debug_tracefile = strdup(optarg);
1975 			break;
1976 		case 'm':
1977 			setup_ftrace = false;
1978 			debug_maxlat = strdup(optarg);
1979 			break;
1980 		default:
1981 			show_usage();
1982 			exit(0);
1983 			break;
1984 		}
1985 	}
1986 
1987 	if (setup_ftrace) {
1988 		if (!current_tracer) {
1989 			current_tracer = find_default_tracer();
1990 			if (!current_tracer)
1991 				errx(EXIT_FAILURE,
1992 "No default tracer found and tracer not specified\n");
1993 		}
1994 
1995 		if (use_random_sleep && !random_makes_sense(current_tracer)) {
1996 			warnx("WARNING: The tracer is %s and random sleep has",
1997 			      current_tracer);
1998 			fprintf(stderr,
1999 "been enabled. Random sleep is intended for the following tracers:\n");
2000 			for (i = 0; random_tracers[i]; i++)
2001 				fprintf(stderr, "%s\n", random_tracers[i]);
2002 			fprintf(stderr, "\n");
2003 		}
2004 	}
2005 
2006 	if (debug_tracefile == DEBUG_NOFILE ||
2007 	    debug_maxlat == DEBUG_NOFILE)
2008 		errx(EXIT_FAILURE,
2009 "Could not find tracing directory e.g. /sys/kernel/tracing\n");
2010 
2011 	if (!sched_policy_set) {
2012 		sched_policy = SCHED_RR;
2013 		sched_policy_set = true;
2014 		if (!sched_pri_set) {
2015 			sched_pri = RT_DEFAULT_PRI;
2016 			sched_pri_set = true;
2017 		}
2018 	}
2019 
2020 	max = sched_get_priority_max(sched_policy);
2021 	min = sched_get_priority_min(sched_policy);
2022 
2023 	if (sched_pri < min) {
2024 		printf(
2025 "ATTENTION: Increasing priority to minimum, which is %d\n", min);
2026 		sched_pri = min;
2027 	}
2028 	if (sched_pri > max) {
2029 		printf(
2030 "ATTENTION: Reducing priority to maximum, which is %d\n", max);
2031 		sched_pri = max;
2032 	}
2033 }
2034 
2035 static void show_params(void)
2036 {
2037 	printf(
2038 "\n"
2039 "Running with scheduling policy %s and priority %d. Using %d print threads.\n",
2040 		policy_name(sched_policy), sched_pri, nr_threads);
2041 	if (trace_enable) {
2042 		if (use_random_sleep) {
2043 			printf(
2044 "%s will be printed with random delay\n"
2045 "Start size of the probability table:\t\t\t%d\n"
2046 "Print a message when the prob. table changes size:\t%s\n"
2047 "Print a warning when an event has been lost:\t\t%s\n"
2048 "Sleep time is:\t\t\t\t\t\t%ld ms\n",
2049 debug_tracefile,
2050 table_startsize,
2051 bool2str(verbose_sizechange()),
2052 bool2str(verbose_lostevent()),
2053 sleep_time / USEC_PER_MSEC);
2054 		} else {
2055 			printf("%s will be printed immediately\n",
2056 			       debug_tracefile);
2057 		}
2058 	} else {
2059 		printf("%s will not be printed\n",
2060 		       debug_tracefile);
2061 	}
2062 	if (setup_ftrace) {
2063 		printf("Tracer:\t\t\t\t\t\t\t%s\n"
2064 		       "%s option:\t\t\t\t\t%s\n"
2065 		       "%s option:\t\t\t\t\t%s\n",
2066 		       current_tracer,
2067 		       optstr[OPTIDX_FUNC_TR],
2068 		       bool2str(use_options[OPTIDX_FUNC_TR]),
2069 		       optstr[OPTIDX_DISP_GR],
2070 		       bool2str(use_options[OPTIDX_DISP_GR]));
2071 		if (!strcmp(threshold, "0"))
2072 			printf("Threshold:\t\t\t\t\t\ttracing_max_latency\n");
2073 		else
2074 			printf("Threshold:\t\t\t\t\t\t%s\n", threshold);
2075 	}
2076 	printf("\n");
2077 }
2078 
2079 int main(int argc, char *argv[])
2080 {
2081 	init_save_state();
2082 	signal_blocking(SIG_BLOCK);
2083 	setup_sig_handler();
2084 	open_stdout();
2085 
2086 	if (argc >= 1)
2087 		prg_name = argv[0];
2088 	else
2089 		prg_name = prg_unknown;
2090 
2091 	scan_arguments(argc, argv);
2092 	show_params();
2093 
2094 	init_printstate();
2095 	init_print_mtx();
2096 	if (use_random_sleep) {
2097 		init_probabilities();
2098 		if (verbose_sizechange())
2099 			printf("Initializing probability table to %d\n",
2100 			       table_startsize);
2101 		sleeptable_resize(table_startsize, false, NULL);
2102 	}
2103 	set_priority();
2104 	init_queue(&printqueue);
2105 	start_printthread();
2106 	tracing_loop();
2107 	return 0;
2108 }
2109