1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
4  */
5 
6 #include <stdlib.h>
7 #include <errno.h>
8 #include "utils.h"
9 #include "osnoise.h"
10 #include "timerlat.h"
11 #include <unistd.h>
12 
13 enum timelat_state {
14 	TIMERLAT_INIT = 0,
15 	TIMERLAT_WAITING_IRQ,
16 	TIMERLAT_WAITING_THREAD,
17 };
18 
19 #define MAX_COMM		24
20 
21 /*
22  * Per-cpu data statistics and data.
23  */
24 struct timerlat_aa_data {
25 	/* Current CPU state */
26 	int			curr_state;
27 
28 	/* timerlat IRQ latency */
29 	unsigned long long	tlat_irq_seqnum;
30 	unsigned long long	tlat_irq_latency;
31 	unsigned long long	tlat_irq_timstamp;
32 
33 	/* timerlat Thread latency */
34 	unsigned long long	tlat_thread_seqnum;
35 	unsigned long long	tlat_thread_latency;
36 	unsigned long long	tlat_thread_timstamp;
37 
38 	/*
39 	 * Information about the thread running when the IRQ
40 	 * arrived.
41 	 *
42 	 * This can be blocking or interference, depending on the
43 	 * priority of the thread. Assuming timerlat is the highest
44 	 * prio, it is blocking. If timerlat has a lower prio, it is
45 	 * interference.
46 	 * note: "unsigned long long" because they are fetch using tep_get_field_val();
47 	 */
48 	unsigned long long	run_thread_pid;
49 	char			run_thread_comm[MAX_COMM];
50 	unsigned long long	thread_blocking_duration;
51 	unsigned long long	max_exit_idle_latency;
52 
53 	/* Information about the timerlat timer irq */
54 	unsigned long long	timer_irq_start_time;
55 	unsigned long long	timer_irq_start_delay;
56 	unsigned long long	timer_irq_duration;
57 	unsigned long long	timer_exit_from_idle;
58 
59 	/*
60 	 * Information about the last IRQ before the timerlat irq
61 	 * arrived.
62 	 *
63 	 * If now - timestamp is <= latency, it might have influenced
64 	 * in the timerlat irq latency. Otherwise, ignore it.
65 	 */
66 	unsigned long long	prev_irq_duration;
67 	unsigned long long	prev_irq_timstamp;
68 
69 	/*
70 	 * Interference sum.
71 	 */
72 	unsigned long long	thread_nmi_sum;
73 	unsigned long long	thread_irq_sum;
74 	unsigned long long	thread_softirq_sum;
75 	unsigned long long	thread_thread_sum;
76 
77 	/*
78 	 * Interference task information.
79 	 */
80 	struct trace_seq	*prev_irqs_seq;
81 	struct trace_seq	*nmi_seq;
82 	struct trace_seq	*irqs_seq;
83 	struct trace_seq	*softirqs_seq;
84 	struct trace_seq	*threads_seq;
85 	struct trace_seq	*stack_seq;
86 
87 	/*
88 	 * Current thread.
89 	 */
90 	char			current_comm[MAX_COMM];
91 	unsigned long long	current_pid;
92 
93 	/*
94 	 * Is the system running a kworker?
95 	 */
96 	unsigned long long	kworker;
97 	unsigned long long	kworker_func;
98 };
99 
100 /*
101  * The analysis context and system wide view
102  */
103 struct timerlat_aa_context {
104 	int nr_cpus;
105 	int dump_tasks;
106 
107 	/* per CPU data */
108 	struct timerlat_aa_data *taa_data;
109 
110 	/*
111 	 * required to translate function names and register
112 	 * events.
113 	 */
114 	struct osnoise_tool *tool;
115 };
116 
117 /*
118  * The data is stored as a local variable, but accessed via a helper function.
119  *
120  * It could be stored inside the trace context. But every access would
121  * require container_of() + a series of pointers. Do we need it? Not sure.
122  *
123  * For now keep it simple. If needed, store it in the tool, add the *context
124  * as a parameter in timerlat_aa_get_ctx() and do the magic there.
125  */
126 static struct timerlat_aa_context *__timerlat_aa_ctx;
127 
128 static struct timerlat_aa_context *timerlat_aa_get_ctx(void)
129 {
130 	return __timerlat_aa_ctx;
131 }
132 
133 /*
134  * timerlat_aa_get_data - Get the per-cpu data from the timerlat context
135  */
136 static struct timerlat_aa_data
137 *timerlat_aa_get_data(struct timerlat_aa_context *taa_ctx, int cpu)
138 {
139 	return &taa_ctx->taa_data[cpu];
140 }
141 
142 /*
143  * timerlat_aa_irq_latency - Handles timerlat IRQ event
144  */
145 static int timerlat_aa_irq_latency(struct timerlat_aa_data *taa_data,
146 				   struct trace_seq *s, struct tep_record *record,
147 				   struct tep_event *event)
148 {
149 	/*
150 	 * For interference, we start now looking for things that can delay
151 	 * the thread.
152 	 */
153 	taa_data->curr_state = TIMERLAT_WAITING_THREAD;
154 	taa_data->tlat_irq_timstamp = record->ts;
155 
156 	/*
157 	 * Zero values.
158 	 */
159 	taa_data->thread_nmi_sum = 0;
160 	taa_data->thread_irq_sum = 0;
161 	taa_data->thread_softirq_sum = 0;
162 	taa_data->thread_blocking_duration = 0;
163 	taa_data->timer_irq_start_time = 0;
164 	taa_data->timer_irq_duration = 0;
165 	taa_data->timer_exit_from_idle = 0;
166 
167 	/*
168 	 * Zero interference tasks.
169 	 */
170 	trace_seq_reset(taa_data->nmi_seq);
171 	trace_seq_reset(taa_data->irqs_seq);
172 	trace_seq_reset(taa_data->softirqs_seq);
173 	trace_seq_reset(taa_data->threads_seq);
174 
175 	/* IRQ latency values */
176 	tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_irq_latency, 1);
177 	tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_irq_seqnum, 1);
178 
179 	/* The thread that can cause blocking */
180 	tep_get_common_field_val(s, event, "common_pid", record, &taa_data->run_thread_pid, 1);
181 
182 	/*
183 	 * Get exit from idle case.
184 	 *
185 	 * If it is not idle thread:
186 	 */
187 	if (taa_data->run_thread_pid)
188 		return 0;
189 
190 	/*
191 	 * if the latency is shorter than the known exit from idle:
192 	 */
193 	if (taa_data->tlat_irq_latency < taa_data->max_exit_idle_latency)
194 		return 0;
195 
196 	/*
197 	 * To be safe, ignore the cases in which an IRQ/NMI could have
198 	 * interfered with the timerlat IRQ.
199 	 */
200 	if (taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency
201 	    < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
202 		return 0;
203 
204 	taa_data->max_exit_idle_latency = taa_data->tlat_irq_latency;
205 
206 	return 0;
207 }
208 
209 /*
210  * timerlat_aa_thread_latency - Handles timerlat thread event
211  */
212 static int timerlat_aa_thread_latency(struct timerlat_aa_data *taa_data,
213 				      struct trace_seq *s, struct tep_record *record,
214 				      struct tep_event *event)
215 {
216 	/*
217 	 * For interference, we start now looking for things that can delay
218 	 * the IRQ of the next cycle.
219 	 */
220 	taa_data->curr_state = TIMERLAT_WAITING_IRQ;
221 	taa_data->tlat_thread_timstamp = record->ts;
222 
223 	/* Thread latency values */
224 	tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_thread_latency, 1);
225 	tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_thread_seqnum, 1);
226 
227 	return 0;
228 }
229 
230 /*
231  * timerlat_aa_handler - Handle timerlat events
232  *
233  * This function is called to handle timerlat events recording statistics.
234  *
235  * Returns 0 on success, -1 otherwise.
236  */
237 static int timerlat_aa_handler(struct trace_seq *s, struct tep_record *record,
238 			struct tep_event *event, void *context)
239 {
240 	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
241 	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
242 	unsigned long long thread;
243 
244 	if (!taa_data)
245 		return -1;
246 
247 	tep_get_field_val(s, event, "context", record, &thread, 1);
248 	if (!thread)
249 		return timerlat_aa_irq_latency(taa_data, s, record, event);
250 	else
251 		return timerlat_aa_thread_latency(taa_data, s, record, event);
252 }
253 
254 /*
255  * timerlat_aa_nmi_handler - Handles NMI noise
256  *
257  * It is used to collect information about interferences from NMI. It is
258  * hooked to the osnoise:nmi_noise event.
259  */
260 static int timerlat_aa_nmi_handler(struct trace_seq *s, struct tep_record *record,
261 				   struct tep_event *event, void *context)
262 {
263 	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
264 	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
265 	unsigned long long duration;
266 	unsigned long long start;
267 
268 	tep_get_field_val(s, event, "duration", record, &duration, 1);
269 	tep_get_field_val(s, event, "start", record, &start, 1);
270 
271 	if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) {
272 		taa_data->prev_irq_duration = duration;
273 		taa_data->prev_irq_timstamp = start;
274 
275 		trace_seq_reset(taa_data->prev_irqs_seq);
276 		trace_seq_printf(taa_data->prev_irqs_seq, "\t%24s	\t\t\t%9.2f us\n",
277 			 "nmi", ns_to_usf(duration));
278 		return 0;
279 	}
280 
281 	taa_data->thread_nmi_sum += duration;
282 	trace_seq_printf(taa_data->nmi_seq, "	%24s	\t\t\t%9.2f us\n",
283 		 "nmi", ns_to_usf(duration));
284 
285 	return 0;
286 }
287 
288 /*
289  * timerlat_aa_irq_handler - Handles IRQ noise
290  *
291  * It is used to collect information about interferences from IRQ. It is
292  * hooked to the osnoise:irq_noise event.
293  *
294  * It is a little bit more complex than the other because it measures:
295  *	- The IRQs that can delay the timer IRQ before it happened.
296  *	- The Timerlat IRQ handler
297  *	- The IRQs that happened between the timerlat IRQ and the timerlat thread
298  *	  (IRQ interference).
299  */
300 static int timerlat_aa_irq_handler(struct trace_seq *s, struct tep_record *record,
301 				   struct tep_event *event, void *context)
302 {
303 	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
304 	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
305 	unsigned long long expected_start;
306 	unsigned long long duration;
307 	unsigned long long vector;
308 	unsigned long long start;
309 	char *desc;
310 	int val;
311 
312 	tep_get_field_val(s, event, "duration", record, &duration, 1);
313 	tep_get_field_val(s, event, "start", record, &start, 1);
314 	tep_get_field_val(s, event, "vector", record, &vector, 1);
315 	desc = tep_get_field_raw(s, event, "desc", record, &val, 1);
316 
317 	/*
318 	 * Before the timerlat IRQ.
319 	 */
320 	if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) {
321 		taa_data->prev_irq_duration = duration;
322 		taa_data->prev_irq_timstamp = start;
323 
324 		trace_seq_reset(taa_data->prev_irqs_seq);
325 		trace_seq_printf(taa_data->prev_irqs_seq, "\t%24s:%-3llu	\t\t%9.2f us\n",
326 				 desc, vector, ns_to_usf(duration));
327 		return 0;
328 	}
329 
330 	/*
331 	 * The timerlat IRQ: taa_data->timer_irq_start_time is zeroed at
332 	 * the timerlat irq handler.
333 	 */
334 	if (!taa_data->timer_irq_start_time) {
335 		expected_start = taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency;
336 
337 		taa_data->timer_irq_start_time = start;
338 		taa_data->timer_irq_duration = duration;
339 
340 		taa_data->timer_irq_start_delay = taa_data->timer_irq_start_time - expected_start;
341 
342 		/*
343 		 * not exit from idle.
344 		 */
345 		if (taa_data->run_thread_pid)
346 			return 0;
347 
348 		if (expected_start > taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
349 			taa_data->timer_exit_from_idle = taa_data->timer_irq_start_delay;
350 
351 		return 0;
352 	}
353 
354 	/*
355 	 * IRQ interference.
356 	 */
357 	taa_data->thread_irq_sum += duration;
358 	trace_seq_printf(taa_data->irqs_seq, "	%24s:%-3llu	\t	%9.2f us\n",
359 			 desc, vector, ns_to_usf(duration));
360 
361 	return 0;
362 }
363 
364 static char *softirq_name[] = { "HI", "TIMER",	"NET_TX", "NET_RX", "BLOCK",
365 				"IRQ_POLL", "TASKLET", "SCHED", "HRTIMER", "RCU" };
366 
367 
368 /*
369  * timerlat_aa_softirq_handler - Handles Softirq noise
370  *
371  * It is used to collect information about interferences from Softirq. It is
372  * hooked to the osnoise:softirq_noise event.
373  *
374  * It is only printed in the non-rt kernel, as softirqs become thread on RT.
375  */
376 static int timerlat_aa_softirq_handler(struct trace_seq *s, struct tep_record *record,
377 				       struct tep_event *event, void *context)
378 {
379 	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
380 	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
381 	unsigned long long duration;
382 	unsigned long long vector;
383 	unsigned long long start;
384 
385 	if (taa_data->curr_state == TIMERLAT_WAITING_IRQ)
386 		return 0;
387 
388 	tep_get_field_val(s, event, "duration", record, &duration, 1);
389 	tep_get_field_val(s, event, "start", record, &start, 1);
390 	tep_get_field_val(s, event, "vector", record, &vector, 1);
391 
392 	taa_data->thread_softirq_sum += duration;
393 
394 	trace_seq_printf(taa_data->softirqs_seq, "\t%24s:%-3llu	\t	%9.2f us\n",
395 			 softirq_name[vector], vector, ns_to_usf(duration));
396 	return 0;
397 }
398 
399 /*
400  * timerlat_aa_softirq_handler - Handles thread noise
401  *
402  * It is used to collect information about interferences from threads. It is
403  * hooked to the osnoise:thread_noise event.
404  *
405  * Note: if you see thread noise, your timerlat thread was not the highest prio one.
406  */
407 static int timerlat_aa_thread_handler(struct trace_seq *s, struct tep_record *record,
408 				      struct tep_event *event, void *context)
409 {
410 	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
411 	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
412 	unsigned long long duration;
413 	unsigned long long start;
414 	unsigned long long pid;
415 	const char *comm;
416 	int val;
417 
418 	if (taa_data->curr_state == TIMERLAT_WAITING_IRQ)
419 		return 0;
420 
421 	tep_get_field_val(s, event, "duration", record, &duration, 1);
422 	tep_get_field_val(s, event, "start", record, &start, 1);
423 
424 	tep_get_common_field_val(s, event, "common_pid", record, &pid, 1);
425 	comm = tep_get_field_raw(s, event, "comm", record, &val, 1);
426 
427 	if (pid == taa_data->run_thread_pid && !taa_data->thread_blocking_duration) {
428 		taa_data->thread_blocking_duration = duration;
429 
430 		if (comm)
431 			strncpy(taa_data->run_thread_comm, comm, MAX_COMM);
432 		else
433 			sprintf(taa_data->run_thread_comm, "<...>");
434 
435 	} else {
436 		taa_data->thread_thread_sum += duration;
437 
438 		trace_seq_printf(taa_data->threads_seq, "\t%24s:%-3llu	\t\t%9.2f us\n",
439 			 comm, pid, ns_to_usf(duration));
440 	}
441 
442 	return 0;
443 }
444 
445 /*
446  * timerlat_aa_stack_handler - Handles timerlat IRQ stack trace
447  *
448  * Saves and parse the stack trace generated by the timerlat IRQ.
449  */
450 static int timerlat_aa_stack_handler(struct trace_seq *s, struct tep_record *record,
451 			      struct tep_event *event, void *context)
452 {
453 	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
454 	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
455 	unsigned long *caller;
456 	const char *function;
457 	int val, i;
458 
459 	trace_seq_reset(taa_data->stack_seq);
460 
461 	trace_seq_printf(taa_data->stack_seq, "    Blocking thread stack trace\n");
462 	caller = tep_get_field_raw(s, event, "caller", record, &val, 1);
463 	if (caller) {
464 		for (i = 0; ; i++) {
465 			function = tep_find_function(taa_ctx->tool->trace.tep, caller[i]);
466 			if (!function)
467 				break;
468 			trace_seq_printf(taa_data->stack_seq, "\t\t-> %s\n", function);
469 		}
470 	}
471 	return 0;
472 }
473 
474 /*
475  * timerlat_aa_sched_switch_handler - Tracks the current thread running on the CPU
476  *
477  * Handles the sched:sched_switch event to trace the current thread running on the
478  * CPU. It is used to display the threads running on the other CPUs when the trace
479  * stops.
480  */
481 static int timerlat_aa_sched_switch_handler(struct trace_seq *s, struct tep_record *record,
482 					    struct tep_event *event, void *context)
483 {
484 	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
485 	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
486 	const char *comm;
487 	int val;
488 
489 	tep_get_field_val(s, event, "next_pid", record, &taa_data->current_pid, 1);
490 	comm = tep_get_field_raw(s, event, "next_comm", record, &val, 1);
491 
492 	strncpy(taa_data->current_comm, comm, MAX_COMM);
493 
494 	/*
495 	 * If this was a kworker, clean the last kworkers that ran.
496 	 */
497 	taa_data->kworker = 0;
498 	taa_data->kworker_func = 0;
499 
500 	return 0;
501 }
502 
503 /*
504  * timerlat_aa_kworker_start_handler - Tracks a kworker running on the CPU
505  *
506  * Handles workqueue:workqueue_execute_start event, keeping track of
507  * the job that a kworker could be doing in the CPU.
508  *
509  * We already catch problems of hardware related latencies caused by work queues
510  * running driver code that causes hardware stall. For example, with DRM drivers.
511  */
512 static int timerlat_aa_kworker_start_handler(struct trace_seq *s, struct tep_record *record,
513 					     struct tep_event *event, void *context)
514 {
515 	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
516 	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
517 
518 	tep_get_field_val(s, event, "work", record, &taa_data->kworker, 1);
519 	tep_get_field_val(s, event, "function", record, &taa_data->kworker_func, 1);
520 	return 0;
521 }
522 
523 /*
524  * timerlat_thread_analysis - Prints the analysis of a CPU that hit a stop tracing
525  *
526  * This is the core of the analysis.
527  */
528 static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu,
529 				     int irq_thresh, int thread_thresh)
530 {
531 	unsigned long long exp_irq_ts;
532 	int total;
533 	int irq;
534 
535 	/*
536 	 * IRQ latency or Thread latency?
537 	 */
538 	if (taa_data->tlat_irq_seqnum > taa_data->tlat_thread_seqnum) {
539 		irq = 1;
540 		total = taa_data->tlat_irq_latency;
541 	} else {
542 		irq = 0;
543 		total = taa_data->tlat_thread_latency;
544 	}
545 
546 	/*
547 	 * Expected IRQ arrival time using the trace clock as the base.
548 	 */
549 	exp_irq_ts = taa_data->timer_irq_start_time - taa_data->timer_irq_start_delay;
550 
551 	if (exp_irq_ts < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
552 		printf("  Previous IRQ interference:	\t\t up to  %9.2f us\n",
553 			ns_to_usf(taa_data->prev_irq_duration));
554 
555 	/*
556 	 * The delay that the IRQ suffered before starting.
557 	 */
558 	printf("  IRQ handler delay:		%16s	%9.2f us (%.2f %%)\n",
559 		(ns_to_usf(taa_data->timer_exit_from_idle) > 10) ? "(exit from idle)" : "",
560 		ns_to_usf(taa_data->timer_irq_start_delay),
561 		ns_to_per(total, taa_data->timer_irq_start_delay));
562 
563 	/*
564 	 * Timerlat IRQ.
565 	 */
566 	printf("  IRQ latency:	\t\t\t\t	%9.2f us\n",
567 		ns_to_usf(taa_data->tlat_irq_latency));
568 
569 	if (irq) {
570 		/*
571 		 * If the trace stopped due to IRQ, the other events will not happen
572 		 * because... the trace stopped :-).
573 		 *
574 		 * That is all folks, the stack trace was printed before the stop,
575 		 * so it will be displayed, it is the key.
576 		 */
577 		printf("  Blocking thread:\n");
578 		printf("	%24s:%-9llu\n",
579 			taa_data->run_thread_comm, taa_data->run_thread_pid);
580 	} else  {
581 		/*
582 		 * The duration of the IRQ handler that handled the timerlat IRQ.
583 		 */
584 		printf("  Timerlat IRQ duration:	\t\t	%9.2f us (%.2f %%)\n",
585 			ns_to_usf(taa_data->timer_irq_duration),
586 			ns_to_per(total, taa_data->timer_irq_duration));
587 
588 		/*
589 		 * The amount of time that the current thread postponed the scheduler.
590 		 *
591 		 * Recalling that it is net from NMI/IRQ/Softirq interference, so there
592 		 * is no need to compute values here.
593 		 */
594 		printf("  Blocking thread:	\t\t\t	%9.2f us (%.2f %%)\n",
595 			ns_to_usf(taa_data->thread_blocking_duration),
596 			ns_to_per(total, taa_data->thread_blocking_duration));
597 
598 		printf("	%24s:%-9llu		%9.2f us\n",
599 			taa_data->run_thread_comm, taa_data->run_thread_pid,
600 			ns_to_usf(taa_data->thread_blocking_duration));
601 	}
602 
603 	/*
604 	 * Print the stack trace!
605 	 */
606 	trace_seq_do_printf(taa_data->stack_seq);
607 
608 	/*
609 	 * NMIs can happen during the IRQ, so they are always possible.
610 	 */
611 	if (taa_data->thread_nmi_sum)
612 		printf("  NMI interference	\t\t\t	%9.2f us (%.2f %%)\n",
613 			ns_to_usf(taa_data->thread_nmi_sum),
614 			ns_to_per(total, taa_data->thread_nmi_sum));
615 
616 	/*
617 	 * If it is an IRQ latency, the other factors can be skipped.
618 	 */
619 	if (irq)
620 		goto print_total;
621 
622 	/*
623 	 * Prints the interference caused by IRQs to the thread latency.
624 	 */
625 	if (taa_data->thread_irq_sum) {
626 		printf("  IRQ interference	\t\t\t	%9.2f us (%.2f %%)\n",
627 			ns_to_usf(taa_data->thread_irq_sum),
628 			ns_to_per(total, taa_data->thread_irq_sum));
629 
630 		trace_seq_do_printf(taa_data->irqs_seq);
631 	}
632 
633 	/*
634 	 * Prints the interference caused by Softirqs to the thread latency.
635 	 */
636 	if (taa_data->thread_softirq_sum) {
637 		printf("  Softirq interference	\t\t\t	%9.2f us (%.2f %%)\n",
638 			ns_to_usf(taa_data->thread_softirq_sum),
639 			ns_to_per(total, taa_data->thread_softirq_sum));
640 
641 		trace_seq_do_printf(taa_data->softirqs_seq);
642 	}
643 
644 	/*
645 	 * Prints the interference caused by other threads to the thread latency.
646 	 *
647 	 * If this happens, your timerlat is not the highest prio. OK, migration
648 	 * thread can happen. But otherwise, you are not measuring the "scheduling
649 	 * latency" only, and here is the difference from scheduling latency and
650 	 * timer handling latency.
651 	 */
652 	if (taa_data->thread_thread_sum) {
653 		printf("  Thread interference	\t\t\t	%9.2f us (%.2f %%)\n",
654 			ns_to_usf(taa_data->thread_thread_sum),
655 			ns_to_per(total, taa_data->thread_thread_sum));
656 
657 		trace_seq_do_printf(taa_data->threads_seq);
658 	}
659 
660 	/*
661 	 * Done.
662 	 */
663 print_total:
664 	printf("------------------------------------------------------------------------\n");
665 	printf("  %s latency:	\t\t\t	%9.2f us (100%%)\n", irq ? "IRQ" : "Thread",
666 		ns_to_usf(total));
667 }
668 
669 static int timerlat_auto_analysis_collect_trace(struct timerlat_aa_context *taa_ctx)
670 {
671 	struct trace_instance *trace = &taa_ctx->tool->trace;
672 	int retval;
673 
674 	retval = tracefs_iterate_raw_events(trace->tep,
675 					    trace->inst,
676 					    NULL,
677 					    0,
678 					    collect_registered_events,
679 					    trace);
680 		if (retval < 0) {
681 			err_msg("Error iterating on events\n");
682 			return 0;
683 		}
684 
685 	return 1;
686 }
687 
688 /**
689  * timerlat_auto_analysis - Analyze the collected data
690  */
691 void timerlat_auto_analysis(int irq_thresh, int thread_thresh)
692 {
693 	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
694 	unsigned long long max_exit_from_idle = 0;
695 	struct timerlat_aa_data *taa_data;
696 	int max_exit_from_idle_cpu;
697 	struct tep_handle *tep;
698 	int cpu;
699 
700 	timerlat_auto_analysis_collect_trace(taa_ctx);
701 
702 	/* bring stop tracing to the ns scale */
703 	irq_thresh = irq_thresh * 1000;
704 	thread_thresh = thread_thresh * 1000;
705 
706 	for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) {
707 		taa_data = timerlat_aa_get_data(taa_ctx, cpu);
708 
709 		if (irq_thresh && taa_data->tlat_irq_latency >= irq_thresh) {
710 			printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu);
711 			timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh);
712 		} else if (thread_thresh && (taa_data->tlat_thread_latency) >= thread_thresh) {
713 			printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu);
714 			timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh);
715 		}
716 
717 		if (taa_data->max_exit_idle_latency > max_exit_from_idle) {
718 			max_exit_from_idle = taa_data->max_exit_idle_latency;
719 			max_exit_from_idle_cpu = cpu;
720 		}
721 
722 	}
723 
724 	if (max_exit_from_idle) {
725 		printf("\n");
726 		printf("Max timerlat IRQ latency from idle: %.2f us in cpu %d\n",
727 			ns_to_usf(max_exit_from_idle), max_exit_from_idle_cpu);
728 	}
729 	if (!taa_ctx->dump_tasks)
730 		return;
731 
732 	printf("\n");
733 	printf("Printing CPU tasks:\n");
734 	for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) {
735 		taa_data = timerlat_aa_get_data(taa_ctx, cpu);
736 		tep = taa_ctx->tool->trace.tep;
737 
738 		printf("    [%.3d] %24s:%llu", cpu, taa_data->current_comm, taa_data->current_pid);
739 
740 		if (taa_data->kworker_func)
741 			printf(" kworker:%s:%s",
742 				tep_find_function(tep, taa_data->kworker) ? : "<...>",
743 				tep_find_function(tep, taa_data->kworker_func));
744 		printf("\n");
745 	}
746 
747 }
748 
749 /*
750  * timerlat_aa_destroy_seqs - Destroy seq files used to store parsed data
751  */
752 static void timerlat_aa_destroy_seqs(struct timerlat_aa_context *taa_ctx)
753 {
754 	struct timerlat_aa_data *taa_data;
755 	int i;
756 
757 	if (!taa_ctx->taa_data)
758 		return;
759 
760 	for (i = 0; i < taa_ctx->nr_cpus; i++) {
761 		taa_data = timerlat_aa_get_data(taa_ctx, i);
762 
763 		if (taa_data->prev_irqs_seq) {
764 			trace_seq_destroy(taa_data->prev_irqs_seq);
765 			free(taa_data->prev_irqs_seq);
766 		}
767 
768 		if (taa_data->nmi_seq) {
769 			trace_seq_destroy(taa_data->nmi_seq);
770 			free(taa_data->nmi_seq);
771 		}
772 
773 		if (taa_data->irqs_seq) {
774 			trace_seq_destroy(taa_data->irqs_seq);
775 			free(taa_data->irqs_seq);
776 		}
777 
778 		if (taa_data->softirqs_seq) {
779 			trace_seq_destroy(taa_data->softirqs_seq);
780 			free(taa_data->softirqs_seq);
781 		}
782 
783 		if (taa_data->threads_seq) {
784 			trace_seq_destroy(taa_data->threads_seq);
785 			free(taa_data->threads_seq);
786 		}
787 
788 		if (taa_data->stack_seq) {
789 			trace_seq_destroy(taa_data->stack_seq);
790 			free(taa_data->stack_seq);
791 		}
792 	}
793 }
794 
795 /*
796  * timerlat_aa_init_seqs - Init seq files used to store parsed information
797  *
798  * Instead of keeping data structures to store raw data, use seq files to
799  * store parsed data.
800  *
801  * Allocates and initialize seq files.
802  *
803  * Returns 0 on success, -1 otherwise.
804  */
805 static int timerlat_aa_init_seqs(struct timerlat_aa_context *taa_ctx)
806 {
807 	struct timerlat_aa_data *taa_data;
808 	int i;
809 
810 	for (i = 0; i < taa_ctx->nr_cpus; i++) {
811 
812 		taa_data = timerlat_aa_get_data(taa_ctx, i);
813 
814 		taa_data->prev_irqs_seq = calloc(1, sizeof(*taa_data->prev_irqs_seq));
815 		if (!taa_data->prev_irqs_seq)
816 			goto out_err;
817 
818 		trace_seq_init(taa_data->prev_irqs_seq);
819 
820 		taa_data->nmi_seq = calloc(1, sizeof(*taa_data->nmi_seq));
821 		if (!taa_data->nmi_seq)
822 			goto out_err;
823 
824 		trace_seq_init(taa_data->nmi_seq);
825 
826 		taa_data->irqs_seq = calloc(1, sizeof(*taa_data->irqs_seq));
827 		if (!taa_data->irqs_seq)
828 			goto out_err;
829 
830 		trace_seq_init(taa_data->irqs_seq);
831 
832 		taa_data->softirqs_seq = calloc(1, sizeof(*taa_data->softirqs_seq));
833 		if (!taa_data->softirqs_seq)
834 			goto out_err;
835 
836 		trace_seq_init(taa_data->softirqs_seq);
837 
838 		taa_data->threads_seq = calloc(1, sizeof(*taa_data->threads_seq));
839 		if (!taa_data->threads_seq)
840 			goto out_err;
841 
842 		trace_seq_init(taa_data->threads_seq);
843 
844 		taa_data->stack_seq = calloc(1, sizeof(*taa_data->stack_seq));
845 		if (!taa_data->stack_seq)
846 			goto out_err;
847 
848 		trace_seq_init(taa_data->stack_seq);
849 	}
850 
851 	return 0;
852 
853 out_err:
854 	timerlat_aa_destroy_seqs(taa_ctx);
855 	return -1;
856 }
857 
858 /*
859  * timerlat_aa_unregister_events - Unregister events used in the auto-analysis
860  */
861 static void timerlat_aa_unregister_events(struct osnoise_tool *tool, int dump_tasks)
862 {
863 
864 	tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "timerlat",
865 				     timerlat_aa_handler, tool);
866 
867 	tracefs_event_disable(tool->trace.inst, "osnoise", NULL);
868 
869 	tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise",
870 				     timerlat_aa_nmi_handler, tool);
871 
872 	tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise",
873 				     timerlat_aa_irq_handler, tool);
874 
875 	tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise",
876 				     timerlat_aa_softirq_handler, tool);
877 
878 	tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise",
879 				     timerlat_aa_thread_handler, tool);
880 
881 	tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack",
882 				     timerlat_aa_stack_handler, tool);
883 	if (!dump_tasks)
884 		return;
885 
886 	tracefs_event_disable(tool->trace.inst, "sched", "sched_switch");
887 	tep_unregister_event_handler(tool->trace.tep, -1, "sched", "sched_switch",
888 				     timerlat_aa_sched_switch_handler, tool);
889 
890 	tracefs_event_disable(tool->trace.inst, "workqueue", "workqueue_execute_start");
891 	tep_unregister_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start",
892 				     timerlat_aa_kworker_start_handler, tool);
893 }
894 
895 /*
896  * timerlat_aa_register_events - Register events used in the auto-analysis
897  *
898  * Returns 0 on success, -1 otherwise.
899  */
900 static int timerlat_aa_register_events(struct osnoise_tool *tool, int dump_tasks)
901 {
902 	int retval;
903 
904 	tep_register_event_handler(tool->trace.tep, -1, "ftrace", "timerlat",
905 				timerlat_aa_handler, tool);
906 
907 
908 	/*
909 	 * register auto-analysis handlers.
910 	 */
911 	retval = tracefs_event_enable(tool->trace.inst, "osnoise", NULL);
912 	if (retval < 0 && !errno) {
913 		err_msg("Could not find osnoise events\n");
914 		goto out_err;
915 	}
916 
917 	tep_register_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise",
918 				   timerlat_aa_nmi_handler, tool);
919 
920 	tep_register_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise",
921 				   timerlat_aa_irq_handler, tool);
922 
923 	tep_register_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise",
924 				   timerlat_aa_softirq_handler, tool);
925 
926 	tep_register_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise",
927 				   timerlat_aa_thread_handler, tool);
928 
929 	tep_register_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack",
930 				   timerlat_aa_stack_handler, tool);
931 
932 	if (!dump_tasks)
933 		return 0;
934 
935 	/*
936 	 * Dump task events.
937 	 */
938 	retval = tracefs_event_enable(tool->trace.inst, "sched", "sched_switch");
939 	if (retval < 0 && !errno) {
940 		err_msg("Could not find sched_switch\n");
941 		goto out_err;
942 	}
943 
944 	tep_register_event_handler(tool->trace.tep, -1, "sched", "sched_switch",
945 				   timerlat_aa_sched_switch_handler, tool);
946 
947 	retval = tracefs_event_enable(tool->trace.inst, "workqueue", "workqueue_execute_start");
948 	if (retval < 0 && !errno) {
949 		err_msg("Could not find workqueue_execute_start\n");
950 		goto out_err;
951 	}
952 
953 	tep_register_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start",
954 				   timerlat_aa_kworker_start_handler, tool);
955 
956 	return 0;
957 
958 out_err:
959 	timerlat_aa_unregister_events(tool, dump_tasks);
960 	return -1;
961 }
962 
963 /**
964  * timerlat_aa_destroy - Destroy timerlat auto-analysis
965  */
966 void timerlat_aa_destroy(void)
967 {
968 	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
969 
970 	if (!taa_ctx)
971 		return;
972 
973 	if (!taa_ctx->taa_data)
974 		goto out_ctx;
975 
976 	timerlat_aa_unregister_events(taa_ctx->tool, taa_ctx->dump_tasks);
977 	timerlat_aa_destroy_seqs(taa_ctx);
978 	free(taa_ctx->taa_data);
979 out_ctx:
980 	free(taa_ctx);
981 }
982 
983 /**
984  * timerlat_aa_init - Initialize timerlat auto-analysis
985  *
986  * Returns 0 on success, -1 otherwise.
987  */
988 int timerlat_aa_init(struct osnoise_tool *tool, int dump_tasks)
989 {
990 	int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
991 	struct timerlat_aa_context *taa_ctx;
992 	int retval;
993 
994 	taa_ctx = calloc(1, sizeof(*taa_ctx));
995 	if (!taa_ctx)
996 		return -1;
997 
998 	__timerlat_aa_ctx = taa_ctx;
999 
1000 	taa_ctx->nr_cpus = nr_cpus;
1001 	taa_ctx->tool = tool;
1002 	taa_ctx->dump_tasks = dump_tasks;
1003 
1004 	taa_ctx->taa_data = calloc(nr_cpus, sizeof(*taa_ctx->taa_data));
1005 	if (!taa_ctx->taa_data)
1006 		goto out_err;
1007 
1008 	retval = timerlat_aa_init_seqs(taa_ctx);
1009 	if (retval)
1010 		goto out_err;
1011 
1012 	retval = timerlat_aa_register_events(tool, dump_tasks);
1013 	if (retval)
1014 		goto out_err;
1015 
1016 	return 0;
1017 
1018 out_err:
1019 	timerlat_aa_destroy();
1020 	return -1;
1021 }
1022