1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
4  */
5 
6 #include <stdlib.h>
7 #include <errno.h>
8 #include "utils.h"
9 #include "osnoise.h"
10 #include "timerlat.h"
11 
12 enum timelat_state {
13 	TIMERLAT_INIT = 0,
14 	TIMERLAT_WAITING_IRQ,
15 	TIMERLAT_WAITING_THREAD,
16 };
17 
18 #define MAX_COMM		24
19 
20 /*
21  * Per-cpu data statistics and data.
22  */
23 struct timerlat_aa_data {
24 	/* Current CPU state */
25 	int			curr_state;
26 
27 	/* timerlat IRQ latency */
28 	unsigned long long	tlat_irq_seqnum;
29 	unsigned long long	tlat_irq_latency;
30 	unsigned long long	tlat_irq_timstamp;
31 
32 	/* timerlat Thread latency */
33 	unsigned long long	tlat_thread_seqnum;
34 	unsigned long long	tlat_thread_latency;
35 	unsigned long long	tlat_thread_timstamp;
36 
37 	/*
38 	 * Information about the thread running when the IRQ
39 	 * arrived.
40 	 *
41 	 * This can be blocking or interference, depending on the
42 	 * priority of the thread. Assuming timerlat is the highest
43 	 * prio, it is blocking. If timerlat has a lower prio, it is
44 	 * interference.
45 	 * note: "unsigned long long" because they are fetch using tep_get_field_val();
46 	 */
47 	unsigned long long	run_thread_pid;
48 	char			run_thread_comm[MAX_COMM];
49 	unsigned long long	thread_blocking_duration;
50 	unsigned long long	max_exit_idle_latency;
51 
52 	/* Information about the timerlat timer irq */
53 	unsigned long long	timer_irq_start_time;
54 	unsigned long long	timer_irq_start_delay;
55 	unsigned long long	timer_irq_duration;
56 	unsigned long long	timer_exit_from_idle;
57 
58 	/*
59 	 * Information about the last IRQ before the timerlat irq
60 	 * arrived.
61 	 *
62 	 * If now - timestamp is <= latency, it might have influenced
63 	 * in the timerlat irq latency. Otherwise, ignore it.
64 	 */
65 	unsigned long long	prev_irq_duration;
66 	unsigned long long	prev_irq_timstamp;
67 
68 	/*
69 	 * Interference sum.
70 	 */
71 	unsigned long long	thread_nmi_sum;
72 	unsigned long long	thread_irq_sum;
73 	unsigned long long	thread_softirq_sum;
74 	unsigned long long	thread_thread_sum;
75 
76 	/*
77 	 * Interference task information.
78 	 */
79 	struct trace_seq	*prev_irqs_seq;
80 	struct trace_seq	*nmi_seq;
81 	struct trace_seq	*irqs_seq;
82 	struct trace_seq	*softirqs_seq;
83 	struct trace_seq	*threads_seq;
84 	struct trace_seq	*stack_seq;
85 
86 	/*
87 	 * Current thread.
88 	 */
89 	char			current_comm[MAX_COMM];
90 	unsigned long long	current_pid;
91 
92 	/*
93 	 * Is the system running a kworker?
94 	 */
95 	unsigned long long	kworker;
96 	unsigned long long	kworker_func;
97 };
98 
99 /*
100  * The analysis context and system wide view
101  */
102 struct timerlat_aa_context {
103 	int nr_cpus;
104 	int dump_tasks;
105 
106 	/* per CPU data */
107 	struct timerlat_aa_data *taa_data;
108 
109 	/*
110 	 * required to translate function names and register
111 	 * events.
112 	 */
113 	struct osnoise_tool *tool;
114 };
115 
116 /*
117  * The data is stored as a local variable, but accessed via a helper function.
118  *
119  * It could be stored inside the trace context. But every access would
120  * require container_of() + a series of pointers. Do we need it? Not sure.
121  *
122  * For now keep it simple. If needed, store it in the tool, add the *context
123  * as a parameter in timerlat_aa_get_ctx() and do the magic there.
124  */
125 static struct timerlat_aa_context *__timerlat_aa_ctx;
126 
127 static struct timerlat_aa_context *timerlat_aa_get_ctx(void)
128 {
129 	return __timerlat_aa_ctx;
130 }
131 
132 /*
133  * timerlat_aa_get_data - Get the per-cpu data from the timerlat context
134  */
135 static struct timerlat_aa_data
136 *timerlat_aa_get_data(struct timerlat_aa_context *taa_ctx, int cpu)
137 {
138 	return &taa_ctx->taa_data[cpu];
139 }
140 
141 /*
142  * timerlat_aa_irq_latency - Handles timerlat IRQ event
143  */
144 static int timerlat_aa_irq_latency(struct timerlat_aa_data *taa_data,
145 				   struct trace_seq *s, struct tep_record *record,
146 				   struct tep_event *event)
147 {
148 	/*
149 	 * For interference, we start now looking for things that can delay
150 	 * the thread.
151 	 */
152 	taa_data->curr_state = TIMERLAT_WAITING_THREAD;
153 	taa_data->tlat_irq_timstamp = record->ts;
154 
155 	/*
156 	 * Zero values.
157 	 */
158 	taa_data->thread_nmi_sum = 0;
159 	taa_data->thread_irq_sum = 0;
160 	taa_data->thread_softirq_sum = 0;
161 	taa_data->thread_blocking_duration = 0;
162 	taa_data->timer_irq_start_time = 0;
163 	taa_data->timer_irq_duration = 0;
164 	taa_data->timer_exit_from_idle = 0;
165 
166 	/*
167 	 * Zero interference tasks.
168 	 */
169 	trace_seq_reset(taa_data->nmi_seq);
170 	trace_seq_reset(taa_data->irqs_seq);
171 	trace_seq_reset(taa_data->softirqs_seq);
172 	trace_seq_reset(taa_data->threads_seq);
173 
174 	/* IRQ latency values */
175 	tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_irq_latency, 1);
176 	tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_irq_seqnum, 1);
177 
178 	/* The thread that can cause blocking */
179 	tep_get_common_field_val(s, event, "common_pid", record, &taa_data->run_thread_pid, 1);
180 
181 	/*
182 	 * Get exit from idle case.
183 	 *
184 	 * If it is not idle thread:
185 	 */
186 	if (taa_data->run_thread_pid)
187 		return 0;
188 
189 	/*
190 	 * if the latency is shorter than the known exit from idle:
191 	 */
192 	if (taa_data->tlat_irq_latency < taa_data->max_exit_idle_latency)
193 		return 0;
194 
195 	/*
196 	 * To be safe, ignore the cases in which an IRQ/NMI could have
197 	 * interfered with the timerlat IRQ.
198 	 */
199 	if (taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency
200 	    < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
201 		return 0;
202 
203 	taa_data->max_exit_idle_latency = taa_data->tlat_irq_latency;
204 
205 	return 0;
206 }
207 
208 /*
209  * timerlat_aa_thread_latency - Handles timerlat thread event
210  */
211 static int timerlat_aa_thread_latency(struct timerlat_aa_data *taa_data,
212 				      struct trace_seq *s, struct tep_record *record,
213 				      struct tep_event *event)
214 {
215 	/*
216 	 * For interference, we start now looking for things that can delay
217 	 * the IRQ of the next cycle.
218 	 */
219 	taa_data->curr_state = TIMERLAT_WAITING_IRQ;
220 	taa_data->tlat_thread_timstamp = record->ts;
221 
222 	/* Thread latency values */
223 	tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_thread_latency, 1);
224 	tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_thread_seqnum, 1);
225 
226 	return 0;
227 }
228 
229 /*
230  * timerlat_aa_handler - Handle timerlat events
231  *
232  * This function is called to handle timerlat events recording statistics.
233  *
234  * Returns 0 on success, -1 otherwise.
235  */
236 int timerlat_aa_handler(struct trace_seq *s, struct tep_record *record,
237 			struct tep_event *event, void *context)
238 {
239 	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
240 	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
241 	unsigned long long thread;
242 
243 	if (!taa_data)
244 		return -1;
245 
246 	tep_get_field_val(s, event, "context", record, &thread, 1);
247 	if (!thread)
248 		return timerlat_aa_irq_latency(taa_data, s, record, event);
249 	else
250 		return timerlat_aa_thread_latency(taa_data, s, record, event);
251 }
252 
253 /*
254  * timerlat_aa_nmi_handler - Handles NMI noise
255  *
256  * It is used to collect information about interferences from NMI. It is
257  * hooked to the osnoise:nmi_noise event.
258  */
259 static int timerlat_aa_nmi_handler(struct trace_seq *s, struct tep_record *record,
260 				   struct tep_event *event, void *context)
261 {
262 	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
263 	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
264 	unsigned long long duration;
265 	unsigned long long start;
266 
267 	tep_get_field_val(s, event, "duration", record, &duration, 1);
268 	tep_get_field_val(s, event, "start", record, &start, 1);
269 
270 	if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) {
271 		taa_data->prev_irq_duration = duration;
272 		taa_data->prev_irq_timstamp = start;
273 
274 		trace_seq_reset(taa_data->prev_irqs_seq);
275 		trace_seq_printf(taa_data->prev_irqs_seq, "\t%24s	\t\t\t%9.2f us\n",
276 			 "nmi", ns_to_usf(duration));
277 		return 0;
278 	}
279 
280 	taa_data->thread_nmi_sum += duration;
281 	trace_seq_printf(taa_data->nmi_seq, "	%24s	\t\t\t%9.2f us\n",
282 		 "nmi", ns_to_usf(duration));
283 
284 	return 0;
285 }
286 
287 /*
288  * timerlat_aa_irq_handler - Handles IRQ noise
289  *
290  * It is used to collect information about interferences from IRQ. It is
291  * hooked to the osnoise:irq_noise event.
292  *
293  * It is a little bit more complex than the other because it measures:
294  *	- The IRQs that can delay the timer IRQ before it happened.
295  *	- The Timerlat IRQ handler
296  *	- The IRQs that happened between the timerlat IRQ and the timerlat thread
297  *	  (IRQ interference).
298  */
299 static int timerlat_aa_irq_handler(struct trace_seq *s, struct tep_record *record,
300 				   struct tep_event *event, void *context)
301 {
302 	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
303 	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
304 	unsigned long long expected_start;
305 	unsigned long long duration;
306 	unsigned long long vector;
307 	unsigned long long start;
308 	char *desc;
309 	int val;
310 
311 	tep_get_field_val(s, event, "duration", record, &duration, 1);
312 	tep_get_field_val(s, event, "start", record, &start, 1);
313 	tep_get_field_val(s, event, "vector", record, &vector, 1);
314 	desc = tep_get_field_raw(s, event, "desc", record, &val, 1);
315 
316 	/*
317 	 * Before the timerlat IRQ.
318 	 */
319 	if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) {
320 		taa_data->prev_irq_duration = duration;
321 		taa_data->prev_irq_timstamp = start;
322 
323 		trace_seq_reset(taa_data->prev_irqs_seq);
324 		trace_seq_printf(taa_data->prev_irqs_seq, "\t%24s:%-3llu	\t\t%9.2f us\n",
325 				 desc, vector, ns_to_usf(duration));
326 		return 0;
327 	}
328 
329 	/*
330 	 * The timerlat IRQ: taa_data->timer_irq_start_time is zeroed at
331 	 * the timerlat irq handler.
332 	 */
333 	if (!taa_data->timer_irq_start_time) {
334 		expected_start = taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency;
335 
336 		taa_data->timer_irq_start_time = start;
337 		taa_data->timer_irq_duration = duration;
338 
339 		taa_data->timer_irq_start_delay = taa_data->timer_irq_start_time - expected_start;
340 
341 		/*
342 		 * not exit from idle.
343 		 */
344 		if (taa_data->run_thread_pid)
345 			return 0;
346 
347 		if (expected_start > taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
348 			taa_data->timer_exit_from_idle = taa_data->timer_irq_start_delay;
349 
350 		return 0;
351 	}
352 
353 	/*
354 	 * IRQ interference.
355 	 */
356 	taa_data->thread_irq_sum += duration;
357 	trace_seq_printf(taa_data->irqs_seq, "	%24s:%-3llu	\t	%9.2f us\n",
358 			 desc, vector, ns_to_usf(duration));
359 
360 	return 0;
361 }
362 
363 static char *softirq_name[] = { "HI", "TIMER",	"NET_TX", "NET_RX", "BLOCK",
364 				"IRQ_POLL", "TASKLET", "SCHED", "HRTIMER", "RCU" };
365 
366 
367 /*
368  * timerlat_aa_softirq_handler - Handles Softirq noise
369  *
370  * It is used to collect information about interferences from Softirq. It is
371  * hooked to the osnoise:softirq_noise event.
372  *
373  * It is only printed in the non-rt kernel, as softirqs become thread on RT.
374  */
375 static int timerlat_aa_softirq_handler(struct trace_seq *s, struct tep_record *record,
376 				       struct tep_event *event, void *context)
377 {
378 	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
379 	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
380 	unsigned long long duration;
381 	unsigned long long vector;
382 	unsigned long long start;
383 
384 	if (taa_data->curr_state == TIMERLAT_WAITING_IRQ)
385 		return 0;
386 
387 	tep_get_field_val(s, event, "duration", record, &duration, 1);
388 	tep_get_field_val(s, event, "start", record, &start, 1);
389 	tep_get_field_val(s, event, "vector", record, &vector, 1);
390 
391 	taa_data->thread_softirq_sum += duration;
392 
393 	trace_seq_printf(taa_data->softirqs_seq, "\t%24s:%-3llu	\t	%9.2f us\n",
394 			 softirq_name[vector], vector, ns_to_usf(duration));
395 	return 0;
396 }
397 
398 /*
399  * timerlat_aa_softirq_handler - Handles thread noise
400  *
401  * It is used to collect information about interferences from threads. It is
402  * hooked to the osnoise:thread_noise event.
403  *
404  * Note: if you see thread noise, your timerlat thread was not the highest prio one.
405  */
406 static int timerlat_aa_thread_handler(struct trace_seq *s, struct tep_record *record,
407 				      struct tep_event *event, void *context)
408 {
409 	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
410 	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
411 	unsigned long long duration;
412 	unsigned long long start;
413 	unsigned long long pid;
414 	const char *comm;
415 	int val;
416 
417 	if (taa_data->curr_state == TIMERLAT_WAITING_IRQ)
418 		return 0;
419 
420 	tep_get_field_val(s, event, "duration", record, &duration, 1);
421 	tep_get_field_val(s, event, "start", record, &start, 1);
422 
423 	tep_get_common_field_val(s, event, "common_pid", record, &pid, 1);
424 	comm = tep_get_field_raw(s, event, "comm", record, &val, 1);
425 
426 	if (pid == taa_data->run_thread_pid && !taa_data->thread_blocking_duration) {
427 		taa_data->thread_blocking_duration = duration;
428 
429 		if (comm)
430 			strncpy(taa_data->run_thread_comm, comm, MAX_COMM);
431 		else
432 			sprintf(taa_data->run_thread_comm, "<...>");
433 
434 	} else {
435 		taa_data->thread_thread_sum += duration;
436 
437 		trace_seq_printf(taa_data->threads_seq, "\t%24s:%-3llu	\t\t%9.2f us\n",
438 			 comm, pid, ns_to_usf(duration));
439 	}
440 
441 	return 0;
442 }
443 
444 /*
445  * timerlat_aa_stack_handler - Handles timerlat IRQ stack trace
446  *
447  * Saves and parse the stack trace generated by the timerlat IRQ.
448  */
449 static int timerlat_aa_stack_handler(struct trace_seq *s, struct tep_record *record,
450 			      struct tep_event *event, void *context)
451 {
452 	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
453 	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
454 	unsigned long *caller;
455 	const char *function;
456 	int val, i;
457 
458 	trace_seq_reset(taa_data->stack_seq);
459 
460 	trace_seq_printf(taa_data->stack_seq, "    Blocking thread stack trace\n");
461 	caller = tep_get_field_raw(s, event, "caller", record, &val, 1);
462 	if (caller) {
463 		for (i = 0; ; i++) {
464 			function = tep_find_function(taa_ctx->tool->trace.tep, caller[i]);
465 			if (!function)
466 				break;
467 			trace_seq_printf(taa_data->stack_seq, "\t\t-> %s\n", function);
468 		}
469 	}
470 	return 0;
471 }
472 
473 /*
474  * timerlat_aa_sched_switch_handler - Tracks the current thread running on the CPU
475  *
476  * Handles the sched:sched_switch event to trace the current thread running on the
477  * CPU. It is used to display the threads running on the other CPUs when the trace
478  * stops.
479  */
480 static int timerlat_aa_sched_switch_handler(struct trace_seq *s, struct tep_record *record,
481 					    struct tep_event *event, void *context)
482 {
483 	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
484 	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
485 	const char *comm;
486 	int val;
487 
488 	tep_get_field_val(s, event, "next_pid", record, &taa_data->current_pid, 1);
489 	comm = tep_get_field_raw(s, event, "next_comm", record, &val, 1);
490 
491 	strncpy(taa_data->current_comm, comm, MAX_COMM);
492 
493 	/*
494 	 * If this was a kworker, clean the last kworkers that ran.
495 	 */
496 	taa_data->kworker = 0;
497 	taa_data->kworker_func = 0;
498 
499 	return 0;
500 }
501 
502 /*
503  * timerlat_aa_kworker_start_handler - Tracks a kworker running on the CPU
504  *
505  * Handles workqueue:workqueue_execute_start event, keeping track of
506  * the job that a kworker could be doing in the CPU.
507  *
508  * We already catch problems of hardware related latencies caused by work queues
509  * running driver code that causes hardware stall. For example, with DRM drivers.
510  */
511 static int timerlat_aa_kworker_start_handler(struct trace_seq *s, struct tep_record *record,
512 					     struct tep_event *event, void *context)
513 {
514 	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
515 	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
516 
517 	tep_get_field_val(s, event, "work", record, &taa_data->kworker, 1);
518 	tep_get_field_val(s, event, "function", record, &taa_data->kworker_func, 1);
519 	return 0;
520 }
521 
522 /*
523  * timerlat_thread_analysis - Prints the analysis of a CPU that hit a stop tracing
524  *
525  * This is the core of the analysis.
526  */
527 static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu,
528 				     int irq_thresh, int thread_thresh)
529 {
530 	unsigned long long exp_irq_ts;
531 	int total;
532 	int irq;
533 
534 	/*
535 	 * IRQ latency or Thread latency?
536 	 */
537 	if (taa_data->tlat_irq_seqnum > taa_data->tlat_thread_seqnum) {
538 		irq = 1;
539 		total = taa_data->tlat_irq_latency;
540 	} else {
541 		irq = 0;
542 		total = taa_data->tlat_thread_latency;
543 	}
544 
545 	/*
546 	 * Expected IRQ arrival time using the trace clock as the base.
547 	 */
548 	exp_irq_ts = taa_data->timer_irq_start_time - taa_data->timer_irq_start_delay;
549 
550 	if (exp_irq_ts < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
551 		printf("  Previous IRQ interference:	\t\t up to  %9.2f us\n",
552 			ns_to_usf(taa_data->prev_irq_duration));
553 
554 	/*
555 	 * The delay that the IRQ suffered before starting.
556 	 */
557 	printf("  IRQ handler delay:		%16s	%9.2f us (%.2f %%)\n",
558 		(ns_to_usf(taa_data->timer_exit_from_idle) > 10) ? "(exit from idle)" : "",
559 		ns_to_usf(taa_data->timer_irq_start_delay),
560 		ns_to_per(total, taa_data->timer_irq_start_delay));
561 
562 	/*
563 	 * Timerlat IRQ.
564 	 */
565 	printf("  IRQ latency:	\t\t\t\t	%9.2f us\n",
566 		ns_to_usf(taa_data->tlat_irq_latency));
567 
568 	if (irq) {
569 		/*
570 		 * If the trace stopped due to IRQ, the other events will not happen
571 		 * because... the trace stopped :-).
572 		 *
573 		 * That is all folks, the stack trace was printed before the stop,
574 		 * so it will be displayed, it is the key.
575 		 */
576 		printf("  Blocking thread:\n");
577 		printf("	%24s:%-9llu\n",
578 			taa_data->run_thread_comm, taa_data->run_thread_pid);
579 	} else  {
580 		/*
581 		 * The duration of the IRQ handler that handled the timerlat IRQ.
582 		 */
583 		printf("  Timerlat IRQ duration:	\t\t	%9.2f us (%.2f %%)\n",
584 			ns_to_usf(taa_data->timer_irq_duration),
585 			ns_to_per(total, taa_data->timer_irq_duration));
586 
587 		/*
588 		 * The amount of time that the current thread postponed the scheduler.
589 		 *
590 		 * Recalling that it is net from NMI/IRQ/Softirq interference, so there
591 		 * is no need to compute values here.
592 		 */
593 		printf("  Blocking thread:	\t\t\t	%9.2f us (%.2f %%)\n",
594 			ns_to_usf(taa_data->thread_blocking_duration),
595 			ns_to_per(total, taa_data->thread_blocking_duration));
596 
597 		printf("	%24s:%-9llu		%9.2f us\n",
598 			taa_data->run_thread_comm, taa_data->run_thread_pid,
599 			ns_to_usf(taa_data->thread_blocking_duration));
600 	}
601 
602 	/*
603 	 * Print the stack trace!
604 	 */
605 	trace_seq_do_printf(taa_data->stack_seq);
606 
607 	/*
608 	 * NMIs can happen during the IRQ, so they are always possible.
609 	 */
610 	if (taa_data->thread_nmi_sum)
611 		printf("  NMI interference	\t\t\t	%9.2f us (%.2f %%)\n",
612 			ns_to_usf(taa_data->thread_nmi_sum),
613 			ns_to_per(total, taa_data->thread_nmi_sum));
614 
615 	/*
616 	 * If it is an IRQ latency, the other factors can be skipped.
617 	 */
618 	if (irq)
619 		goto print_total;
620 
621 	/*
622 	 * Prints the interference caused by IRQs to the thread latency.
623 	 */
624 	if (taa_data->thread_irq_sum) {
625 		printf("  IRQ interference	\t\t\t	%9.2f us (%.2f %%)\n",
626 			ns_to_usf(taa_data->thread_irq_sum),
627 			ns_to_per(total, taa_data->thread_irq_sum));
628 
629 		trace_seq_do_printf(taa_data->irqs_seq);
630 	}
631 
632 	/*
633 	 * Prints the interference caused by Softirqs to the thread latency.
634 	 */
635 	if (taa_data->thread_softirq_sum) {
636 		printf("  Softirq interference	\t\t\t	%9.2f us (%.2f %%)\n",
637 			ns_to_usf(taa_data->thread_softirq_sum),
638 			ns_to_per(total, taa_data->thread_softirq_sum));
639 
640 		trace_seq_do_printf(taa_data->softirqs_seq);
641 	}
642 
643 	/*
644 	 * Prints the interference caused by other threads to the thread latency.
645 	 *
646 	 * If this happens, your timerlat is not the highest prio. OK, migration
647 	 * thread can happen. But otherwise, you are not measuring the "scheduling
648 	 * latency" only, and here is the difference from scheduling latency and
649 	 * timer handling latency.
650 	 */
651 	if (taa_data->thread_thread_sum) {
652 		printf("  Thread interference	\t\t\t	%9.2f us (%.2f %%)\n",
653 			ns_to_usf(taa_data->thread_thread_sum),
654 			ns_to_per(total, taa_data->thread_thread_sum));
655 
656 		trace_seq_do_printf(taa_data->threads_seq);
657 	}
658 
659 	/*
660 	 * Done.
661 	 */
662 print_total:
663 	printf("------------------------------------------------------------------------\n");
664 	printf("  %s latency:	\t\t\t	%9.2f us (100%%)\n", irq ? "IRQ" : "Thread",
665 		ns_to_usf(total));
666 }
667 
668 /**
669  * timerlat_auto_analysis - Analyze the collected data
670  */
671 void timerlat_auto_analysis(int irq_thresh, int thread_thresh)
672 {
673 	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
674 	unsigned long long max_exit_from_idle = 0;
675 	struct timerlat_aa_data *taa_data;
676 	int max_exit_from_idle_cpu;
677 	struct tep_handle *tep;
678 	int cpu;
679 
680 	/* bring stop tracing to the ns scale */
681 	irq_thresh = irq_thresh * 1000;
682 	thread_thresh = thread_thresh * 1000;
683 
684 	for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) {
685 		taa_data = timerlat_aa_get_data(taa_ctx, cpu);
686 
687 		if (irq_thresh && taa_data->tlat_irq_latency >= irq_thresh) {
688 			printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu);
689 			timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh);
690 		} else if (thread_thresh && (taa_data->tlat_thread_latency) >= thread_thresh) {
691 			printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu);
692 			timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh);
693 		}
694 
695 		if (taa_data->max_exit_idle_latency > max_exit_from_idle) {
696 			max_exit_from_idle = taa_data->max_exit_idle_latency;
697 			max_exit_from_idle_cpu = cpu;
698 		}
699 
700 	}
701 
702 	if (max_exit_from_idle) {
703 		printf("\n");
704 		printf("Max timerlat IRQ latency from idle: %.2f us in cpu %d\n",
705 			ns_to_usf(max_exit_from_idle), max_exit_from_idle_cpu);
706 	}
707 	if (!taa_ctx->dump_tasks)
708 		return;
709 
710 	printf("\n");
711 	printf("Printing CPU tasks:\n");
712 	for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) {
713 		taa_data = timerlat_aa_get_data(taa_ctx, cpu);
714 		tep = taa_ctx->tool->trace.tep;
715 
716 		printf("    [%.3d] %24s:%llu", cpu, taa_data->current_comm, taa_data->current_pid);
717 
718 		if (taa_data->kworker_func)
719 			printf(" kworker:%s:%s",
720 				tep_find_function(tep, taa_data->kworker) ? : "<...>",
721 				tep_find_function(tep, taa_data->kworker_func));
722 		printf("\n");
723 	}
724 
725 }
726 
727 /*
728  * timerlat_aa_destroy_seqs - Destroy seq files used to store parsed data
729  */
730 static void timerlat_aa_destroy_seqs(struct timerlat_aa_context *taa_ctx)
731 {
732 	struct timerlat_aa_data *taa_data;
733 	int i;
734 
735 	if (!taa_ctx->taa_data)
736 		return;
737 
738 	for (i = 0; i < taa_ctx->nr_cpus; i++) {
739 		taa_data = timerlat_aa_get_data(taa_ctx, i);
740 
741 		if (taa_data->prev_irqs_seq) {
742 			trace_seq_destroy(taa_data->prev_irqs_seq);
743 			free(taa_data->prev_irqs_seq);
744 		}
745 
746 		if (taa_data->nmi_seq) {
747 			trace_seq_destroy(taa_data->nmi_seq);
748 			free(taa_data->nmi_seq);
749 		}
750 
751 		if (taa_data->irqs_seq) {
752 			trace_seq_destroy(taa_data->irqs_seq);
753 			free(taa_data->irqs_seq);
754 		}
755 
756 		if (taa_data->softirqs_seq) {
757 			trace_seq_destroy(taa_data->softirqs_seq);
758 			free(taa_data->softirqs_seq);
759 		}
760 
761 		if (taa_data->threads_seq) {
762 			trace_seq_destroy(taa_data->threads_seq);
763 			free(taa_data->threads_seq);
764 		}
765 
766 		if (taa_data->stack_seq) {
767 			trace_seq_destroy(taa_data->stack_seq);
768 			free(taa_data->stack_seq);
769 		}
770 	}
771 }
772 
773 /*
774  * timerlat_aa_init_seqs - Init seq files used to store parsed information
775  *
776  * Instead of keeping data structures to store raw data, use seq files to
777  * store parsed data.
778  *
779  * Allocates and initialize seq files.
780  *
781  * Returns 0 on success, -1 otherwise.
782  */
783 static int timerlat_aa_init_seqs(struct timerlat_aa_context *taa_ctx)
784 {
785 	struct timerlat_aa_data *taa_data;
786 	int i;
787 
788 	for (i = 0; i < taa_ctx->nr_cpus; i++) {
789 
790 		taa_data = timerlat_aa_get_data(taa_ctx, i);
791 
792 		taa_data->prev_irqs_seq = calloc(1, sizeof(*taa_data->prev_irqs_seq));
793 		if (!taa_data->prev_irqs_seq)
794 			goto out_err;
795 
796 		trace_seq_init(taa_data->prev_irqs_seq);
797 
798 		taa_data->nmi_seq = calloc(1, sizeof(*taa_data->nmi_seq));
799 		if (!taa_data->nmi_seq)
800 			goto out_err;
801 
802 		trace_seq_init(taa_data->nmi_seq);
803 
804 		taa_data->irqs_seq = calloc(1, sizeof(*taa_data->irqs_seq));
805 		if (!taa_data->irqs_seq)
806 			goto out_err;
807 
808 		trace_seq_init(taa_data->irqs_seq);
809 
810 		taa_data->softirqs_seq = calloc(1, sizeof(*taa_data->softirqs_seq));
811 		if (!taa_data->softirqs_seq)
812 			goto out_err;
813 
814 		trace_seq_init(taa_data->softirqs_seq);
815 
816 		taa_data->threads_seq = calloc(1, sizeof(*taa_data->threads_seq));
817 		if (!taa_data->threads_seq)
818 			goto out_err;
819 
820 		trace_seq_init(taa_data->threads_seq);
821 
822 		taa_data->stack_seq = calloc(1, sizeof(*taa_data->stack_seq));
823 		if (!taa_data->stack_seq)
824 			goto out_err;
825 
826 		trace_seq_init(taa_data->stack_seq);
827 	}
828 
829 	return 0;
830 
831 out_err:
832 	timerlat_aa_destroy_seqs(taa_ctx);
833 	return -1;
834 }
835 
836 /*
837  * timerlat_aa_unregister_events - Unregister events used in the auto-analysis
838  */
839 static void timerlat_aa_unregister_events(struct osnoise_tool *tool, int dump_tasks)
840 {
841 	tracefs_event_disable(tool->trace.inst, "osnoise", NULL);
842 
843 	tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise",
844 				     timerlat_aa_nmi_handler, tool);
845 
846 	tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise",
847 				     timerlat_aa_irq_handler, tool);
848 
849 	tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise",
850 				     timerlat_aa_softirq_handler, tool);
851 
852 	tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise",
853 				     timerlat_aa_thread_handler, tool);
854 
855 	tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack",
856 				     timerlat_aa_stack_handler, tool);
857 	if (!dump_tasks)
858 		return;
859 
860 	tracefs_event_disable(tool->trace.inst, "sched", "sched_switch");
861 	tep_unregister_event_handler(tool->trace.tep, -1, "sched", "sched_switch",
862 				     timerlat_aa_sched_switch_handler, tool);
863 
864 	tracefs_event_disable(tool->trace.inst, "workqueue", "workqueue_execute_start");
865 	tep_unregister_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start",
866 				     timerlat_aa_kworker_start_handler, tool);
867 }
868 
869 /*
870  * timerlat_aa_register_events - Register events used in the auto-analysis
871  *
872  * Returns 0 on success, -1 otherwise.
873  */
874 static int timerlat_aa_register_events(struct osnoise_tool *tool, int dump_tasks)
875 {
876 	int retval;
877 
878 	/*
879 	 * register auto-analysis handlers.
880 	 */
881 	retval = tracefs_event_enable(tool->trace.inst, "osnoise", NULL);
882 	if (retval < 0 && !errno) {
883 		err_msg("Could not find osnoise events\n");
884 		goto out_err;
885 	}
886 
887 	tep_register_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise",
888 				   timerlat_aa_nmi_handler, tool);
889 
890 	tep_register_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise",
891 				   timerlat_aa_irq_handler, tool);
892 
893 	tep_register_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise",
894 				   timerlat_aa_softirq_handler, tool);
895 
896 	tep_register_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise",
897 				   timerlat_aa_thread_handler, tool);
898 
899 	tep_register_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack",
900 				   timerlat_aa_stack_handler, tool);
901 
902 	if (!dump_tasks)
903 		return 0;
904 
905 	/*
906 	 * Dump task events.
907 	 */
908 	retval = tracefs_event_enable(tool->trace.inst, "sched", "sched_switch");
909 	if (retval < 0 && !errno) {
910 		err_msg("Could not find sched_switch\n");
911 		goto out_err;
912 	}
913 
914 	tep_register_event_handler(tool->trace.tep, -1, "sched", "sched_switch",
915 				   timerlat_aa_sched_switch_handler, tool);
916 
917 	retval = tracefs_event_enable(tool->trace.inst, "workqueue", "workqueue_execute_start");
918 	if (retval < 0 && !errno) {
919 		err_msg("Could not find workqueue_execute_start\n");
920 		goto out_err;
921 	}
922 
923 	tep_register_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start",
924 				   timerlat_aa_kworker_start_handler, tool);
925 
926 	return 0;
927 
928 out_err:
929 	timerlat_aa_unregister_events(tool, dump_tasks);
930 	return -1;
931 }
932 
933 /**
934  * timerlat_aa_destroy - Destroy timerlat auto-analysis
935  */
936 void timerlat_aa_destroy(void)
937 {
938 	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
939 
940 	if (!taa_ctx)
941 		return;
942 
943 	if (!taa_ctx->taa_data)
944 		goto out_ctx;
945 
946 	timerlat_aa_unregister_events(taa_ctx->tool, taa_ctx->dump_tasks);
947 	timerlat_aa_destroy_seqs(taa_ctx);
948 	free(taa_ctx->taa_data);
949 out_ctx:
950 	free(taa_ctx);
951 }
952 
953 /**
954  * timerlat_aa_init - Initialize timerlat auto-analysis
955  *
956  * Returns 0 on success, -1 otherwise.
957  */
958 int timerlat_aa_init(struct osnoise_tool *tool, int nr_cpus, int dump_tasks)
959 {
960 	struct timerlat_aa_context *taa_ctx;
961 	int retval;
962 
963 	taa_ctx = calloc(1, sizeof(*taa_ctx));
964 	if (!taa_ctx)
965 		return -1;
966 
967 	__timerlat_aa_ctx = taa_ctx;
968 
969 	taa_ctx->nr_cpus = nr_cpus;
970 	taa_ctx->tool = tool;
971 	taa_ctx->dump_tasks = dump_tasks;
972 
973 	taa_ctx->taa_data = calloc(nr_cpus, sizeof(*taa_ctx->taa_data));
974 	if (!taa_ctx->taa_data)
975 		goto out_err;
976 
977 	retval = timerlat_aa_init_seqs(taa_ctx);
978 	if (retval)
979 		goto out_err;
980 
981 	retval = timerlat_aa_register_events(tool, dump_tasks);
982 	if (retval)
983 		goto out_err;
984 
985 	return 0;
986 
987 out_err:
988 	timerlat_aa_destroy();
989 	return -1;
990 }
991