xref: /openbmc/linux/tools/perf/builtin-lock.c (revision 060f35a317ef09101b128f399dce7ed13d019461)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <errno.h>
3 #include <inttypes.h>
4 #include "builtin.h"
5 #include "perf.h"
6 
7 #include "util/evlist.h" // for struct evsel_str_handler
8 #include "util/evsel.h"
9 #include "util/symbol.h"
10 #include "util/thread.h"
11 #include "util/header.h"
12 #include "util/target.h"
13 #include "util/callchain.h"
14 #include "util/lock-contention.h"
15 #include "util/bpf_skel/lock_data.h"
16 
17 #include <subcmd/pager.h>
18 #include <subcmd/parse-options.h>
19 #include "util/trace-event.h"
20 #include "util/tracepoint.h"
21 
22 #include "util/debug.h"
23 #include "util/session.h"
24 #include "util/tool.h"
25 #include "util/data.h"
26 #include "util/string2.h"
27 #include "util/map.h"
28 #include "util/util.h"
29 
30 #include <stdio.h>
31 #include <sys/types.h>
32 #include <sys/prctl.h>
33 #include <semaphore.h>
34 #include <math.h>
35 #include <limits.h>
36 #include <ctype.h>
37 
38 #include <linux/list.h>
39 #include <linux/hash.h>
40 #include <linux/kernel.h>
41 #include <linux/zalloc.h>
42 #include <linux/err.h>
43 #include <linux/stringify.h>
44 
45 static struct perf_session *session;
46 static struct target target;
47 
48 /* based on kernel/lockdep.c */
49 #define LOCKHASH_BITS		12
50 #define LOCKHASH_SIZE		(1UL << LOCKHASH_BITS)
51 
52 static struct hlist_head *lockhash_table;
53 
54 #define __lockhashfn(key)	hash_long((unsigned long)key, LOCKHASH_BITS)
55 #define lockhashentry(key)	(lockhash_table + __lockhashfn((key)))
56 
57 static struct rb_root		thread_stats;
58 
59 static bool combine_locks;
60 static bool show_thread_stats;
61 static bool show_lock_addrs;
62 static bool show_lock_owner;
63 static bool use_bpf;
64 static unsigned long bpf_map_entries = MAX_ENTRIES;
65 static int max_stack_depth = CONTENTION_STACK_DEPTH;
66 static int stack_skip = CONTENTION_STACK_SKIP;
67 static int print_nr_entries = INT_MAX / 2;
68 static LIST_HEAD(callstack_filters);
69 static const char *output_name = NULL;
70 static FILE *lock_output;
71 
72 struct callstack_filter {
73 	struct list_head list;
74 	char name[];
75 };
76 
77 static struct lock_filter filters;
78 
79 static enum lock_aggr_mode aggr_mode = LOCK_AGGR_ADDR;
80 
needs_callstack(void)81 static bool needs_callstack(void)
82 {
83 	return !list_empty(&callstack_filters);
84 }
85 
thread_stat_find(u32 tid)86 static struct thread_stat *thread_stat_find(u32 tid)
87 {
88 	struct rb_node *node;
89 	struct thread_stat *st;
90 
91 	node = thread_stats.rb_node;
92 	while (node) {
93 		st = container_of(node, struct thread_stat, rb);
94 		if (st->tid == tid)
95 			return st;
96 		else if (tid < st->tid)
97 			node = node->rb_left;
98 		else
99 			node = node->rb_right;
100 	}
101 
102 	return NULL;
103 }
104 
thread_stat_insert(struct thread_stat * new)105 static void thread_stat_insert(struct thread_stat *new)
106 {
107 	struct rb_node **rb = &thread_stats.rb_node;
108 	struct rb_node *parent = NULL;
109 	struct thread_stat *p;
110 
111 	while (*rb) {
112 		p = container_of(*rb, struct thread_stat, rb);
113 		parent = *rb;
114 
115 		if (new->tid < p->tid)
116 			rb = &(*rb)->rb_left;
117 		else if (new->tid > p->tid)
118 			rb = &(*rb)->rb_right;
119 		else
120 			BUG_ON("inserting invalid thread_stat\n");
121 	}
122 
123 	rb_link_node(&new->rb, parent, rb);
124 	rb_insert_color(&new->rb, &thread_stats);
125 }
126 
thread_stat_findnew_after_first(u32 tid)127 static struct thread_stat *thread_stat_findnew_after_first(u32 tid)
128 {
129 	struct thread_stat *st;
130 
131 	st = thread_stat_find(tid);
132 	if (st)
133 		return st;
134 
135 	st = zalloc(sizeof(struct thread_stat));
136 	if (!st) {
137 		pr_err("memory allocation failed\n");
138 		return NULL;
139 	}
140 
141 	st->tid = tid;
142 	INIT_LIST_HEAD(&st->seq_list);
143 
144 	thread_stat_insert(st);
145 
146 	return st;
147 }
148 
149 static struct thread_stat *thread_stat_findnew_first(u32 tid);
150 static struct thread_stat *(*thread_stat_findnew)(u32 tid) =
151 	thread_stat_findnew_first;
152 
thread_stat_findnew_first(u32 tid)153 static struct thread_stat *thread_stat_findnew_first(u32 tid)
154 {
155 	struct thread_stat *st;
156 
157 	st = zalloc(sizeof(struct thread_stat));
158 	if (!st) {
159 		pr_err("memory allocation failed\n");
160 		return NULL;
161 	}
162 	st->tid = tid;
163 	INIT_LIST_HEAD(&st->seq_list);
164 
165 	rb_link_node(&st->rb, NULL, &thread_stats.rb_node);
166 	rb_insert_color(&st->rb, &thread_stats);
167 
168 	thread_stat_findnew = thread_stat_findnew_after_first;
169 	return st;
170 }
171 
172 /* build simple key function one is bigger than two */
173 #define SINGLE_KEY(member)						\
174 	static int lock_stat_key_ ## member(struct lock_stat *one,	\
175 					 struct lock_stat *two)		\
176 	{								\
177 		return one->member > two->member;			\
178 	}
179 
180 SINGLE_KEY(nr_acquired)
SINGLE_KEY(nr_contended)181 SINGLE_KEY(nr_contended)
182 SINGLE_KEY(avg_wait_time)
183 SINGLE_KEY(wait_time_total)
184 SINGLE_KEY(wait_time_max)
185 
186 static int lock_stat_key_wait_time_min(struct lock_stat *one,
187 					struct lock_stat *two)
188 {
189 	u64 s1 = one->wait_time_min;
190 	u64 s2 = two->wait_time_min;
191 	if (s1 == ULLONG_MAX)
192 		s1 = 0;
193 	if (s2 == ULLONG_MAX)
194 		s2 = 0;
195 	return s1 > s2;
196 }
197 
198 struct lock_key {
199 	/*
200 	 * name: the value for specify by user
201 	 * this should be simpler than raw name of member
202 	 * e.g. nr_acquired -> acquired, wait_time_total -> wait_total
203 	 */
204 	const char		*name;
205 	/* header: the string printed on the header line */
206 	const char		*header;
207 	/* len: the printing width of the field */
208 	int			len;
209 	/* key: a pointer to function to compare two lock stats for sorting */
210 	int			(*key)(struct lock_stat*, struct lock_stat*);
211 	/* print: a pointer to function to print a given lock stats */
212 	void			(*print)(struct lock_key*, struct lock_stat*);
213 	/* list: list entry to link this */
214 	struct list_head	list;
215 };
216 
lock_stat_key_print_time(unsigned long long nsec,int len)217 static void lock_stat_key_print_time(unsigned long long nsec, int len)
218 {
219 	static const struct {
220 		float base;
221 		const char *unit;
222 	} table[] = {
223 		{ 1e9 * 3600, "h " },
224 		{ 1e9 * 60, "m " },
225 		{ 1e9, "s " },
226 		{ 1e6, "ms" },
227 		{ 1e3, "us" },
228 		{ 0, NULL },
229 	};
230 
231 	/* for CSV output */
232 	if (len == 0) {
233 		fprintf(lock_output, "%llu", nsec);
234 		return;
235 	}
236 
237 	for (int i = 0; table[i].unit; i++) {
238 		if (nsec < table[i].base)
239 			continue;
240 
241 		fprintf(lock_output, "%*.2f %s", len - 3, nsec / table[i].base, table[i].unit);
242 		return;
243 	}
244 
245 	fprintf(lock_output, "%*llu %s", len - 3, nsec, "ns");
246 }
247 
248 #define PRINT_KEY(member)						\
249 static void lock_stat_key_print_ ## member(struct lock_key *key,	\
250 					   struct lock_stat *ls)	\
251 {									\
252 	fprintf(lock_output, "%*llu", key->len, (unsigned long long)ls->member);\
253 }
254 
255 #define PRINT_TIME(member)						\
256 static void lock_stat_key_print_ ## member(struct lock_key *key,	\
257 					   struct lock_stat *ls)	\
258 {									\
259 	lock_stat_key_print_time((unsigned long long)ls->member, key->len);	\
260 }
261 
262 PRINT_KEY(nr_acquired)
PRINT_KEY(nr_contended)263 PRINT_KEY(nr_contended)
264 PRINT_TIME(avg_wait_time)
265 PRINT_TIME(wait_time_total)
266 PRINT_TIME(wait_time_max)
267 
268 static void lock_stat_key_print_wait_time_min(struct lock_key *key,
269 					      struct lock_stat *ls)
270 {
271 	u64 wait_time = ls->wait_time_min;
272 
273 	if (wait_time == ULLONG_MAX)
274 		wait_time = 0;
275 
276 	lock_stat_key_print_time(wait_time, key->len);
277 }
278 
279 
280 static const char		*sort_key = "acquired";
281 
282 static int			(*compare)(struct lock_stat *, struct lock_stat *);
283 
284 static struct rb_root		sorted; /* place to store intermediate data */
285 static struct rb_root		result;	/* place to store sorted data */
286 
287 static LIST_HEAD(lock_keys);
288 static const char		*output_fields;
289 
290 #define DEF_KEY_LOCK(name, header, fn_suffix, len)			\
291 	{ #name, header, len, lock_stat_key_ ## fn_suffix, lock_stat_key_print_ ## fn_suffix, {} }
292 static struct lock_key report_keys[] = {
293 	DEF_KEY_LOCK(acquired, "acquired", nr_acquired, 10),
294 	DEF_KEY_LOCK(contended, "contended", nr_contended, 10),
295 	DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12),
296 	DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12),
297 	DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12),
298 	DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12),
299 
300 	/* extra comparisons much complicated should be here */
301 	{ }
302 };
303 
304 static struct lock_key contention_keys[] = {
305 	DEF_KEY_LOCK(contended, "contended", nr_contended, 10),
306 	DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12),
307 	DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12),
308 	DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12),
309 	DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12),
310 
311 	/* extra comparisons much complicated should be here */
312 	{ }
313 };
314 
select_key(bool contention)315 static int select_key(bool contention)
316 {
317 	int i;
318 	struct lock_key *keys = report_keys;
319 
320 	if (contention)
321 		keys = contention_keys;
322 
323 	for (i = 0; keys[i].name; i++) {
324 		if (!strcmp(keys[i].name, sort_key)) {
325 			compare = keys[i].key;
326 
327 			/* selected key should be in the output fields */
328 			if (list_empty(&keys[i].list))
329 				list_add_tail(&keys[i].list, &lock_keys);
330 
331 			return 0;
332 		}
333 	}
334 
335 	pr_err("Unknown compare key: %s\n", sort_key);
336 	return -1;
337 }
338 
add_output_field(bool contention,char * name)339 static int add_output_field(bool contention, char *name)
340 {
341 	int i;
342 	struct lock_key *keys = report_keys;
343 
344 	if (contention)
345 		keys = contention_keys;
346 
347 	for (i = 0; keys[i].name; i++) {
348 		if (strcmp(keys[i].name, name))
349 			continue;
350 
351 		/* prevent double link */
352 		if (list_empty(&keys[i].list))
353 			list_add_tail(&keys[i].list, &lock_keys);
354 
355 		return 0;
356 	}
357 
358 	pr_err("Unknown output field: %s\n", name);
359 	return -1;
360 }
361 
setup_output_field(bool contention,const char * str)362 static int setup_output_field(bool contention, const char *str)
363 {
364 	char *tok, *tmp, *orig;
365 	int i, ret = 0;
366 	struct lock_key *keys = report_keys;
367 
368 	if (contention)
369 		keys = contention_keys;
370 
371 	/* no output field given: use all of them */
372 	if (str == NULL) {
373 		for (i = 0; keys[i].name; i++)
374 			list_add_tail(&keys[i].list, &lock_keys);
375 		return 0;
376 	}
377 
378 	for (i = 0; keys[i].name; i++)
379 		INIT_LIST_HEAD(&keys[i].list);
380 
381 	orig = tmp = strdup(str);
382 	if (orig == NULL)
383 		return -ENOMEM;
384 
385 	while ((tok = strsep(&tmp, ",")) != NULL){
386 		ret = add_output_field(contention, tok);
387 		if (ret < 0)
388 			break;
389 	}
390 	free(orig);
391 
392 	return ret;
393 }
394 
combine_lock_stats(struct lock_stat * st)395 static void combine_lock_stats(struct lock_stat *st)
396 {
397 	struct rb_node **rb = &sorted.rb_node;
398 	struct rb_node *parent = NULL;
399 	struct lock_stat *p;
400 	int ret;
401 
402 	while (*rb) {
403 		p = container_of(*rb, struct lock_stat, rb);
404 		parent = *rb;
405 
406 		if (st->name && p->name)
407 			ret = strcmp(st->name, p->name);
408 		else
409 			ret = !!st->name - !!p->name;
410 
411 		if (ret == 0) {
412 			p->nr_acquired += st->nr_acquired;
413 			p->nr_contended += st->nr_contended;
414 			p->wait_time_total += st->wait_time_total;
415 
416 			if (p->nr_contended)
417 				p->avg_wait_time = p->wait_time_total / p->nr_contended;
418 
419 			if (p->wait_time_min > st->wait_time_min)
420 				p->wait_time_min = st->wait_time_min;
421 			if (p->wait_time_max < st->wait_time_max)
422 				p->wait_time_max = st->wait_time_max;
423 
424 			p->broken |= st->broken;
425 			st->combined = 1;
426 			return;
427 		}
428 
429 		if (ret < 0)
430 			rb = &(*rb)->rb_left;
431 		else
432 			rb = &(*rb)->rb_right;
433 	}
434 
435 	rb_link_node(&st->rb, parent, rb);
436 	rb_insert_color(&st->rb, &sorted);
437 }
438 
insert_to_result(struct lock_stat * st,int (* bigger)(struct lock_stat *,struct lock_stat *))439 static void insert_to_result(struct lock_stat *st,
440 			     int (*bigger)(struct lock_stat *, struct lock_stat *))
441 {
442 	struct rb_node **rb = &result.rb_node;
443 	struct rb_node *parent = NULL;
444 	struct lock_stat *p;
445 
446 	if (combine_locks && st->combined)
447 		return;
448 
449 	while (*rb) {
450 		p = container_of(*rb, struct lock_stat, rb);
451 		parent = *rb;
452 
453 		if (bigger(st, p))
454 			rb = &(*rb)->rb_left;
455 		else
456 			rb = &(*rb)->rb_right;
457 	}
458 
459 	rb_link_node(&st->rb, parent, rb);
460 	rb_insert_color(&st->rb, &result);
461 }
462 
463 /* returns left most element of result, and erase it */
pop_from_result(void)464 static struct lock_stat *pop_from_result(void)
465 {
466 	struct rb_node *node = result.rb_node;
467 
468 	if (!node)
469 		return NULL;
470 
471 	while (node->rb_left)
472 		node = node->rb_left;
473 
474 	rb_erase(node, &result);
475 	return container_of(node, struct lock_stat, rb);
476 }
477 
lock_stat_find(u64 addr)478 struct lock_stat *lock_stat_find(u64 addr)
479 {
480 	struct hlist_head *entry = lockhashentry(addr);
481 	struct lock_stat *ret;
482 
483 	hlist_for_each_entry(ret, entry, hash_entry) {
484 		if (ret->addr == addr)
485 			return ret;
486 	}
487 	return NULL;
488 }
489 
lock_stat_findnew(u64 addr,const char * name,int flags)490 struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags)
491 {
492 	struct hlist_head *entry = lockhashentry(addr);
493 	struct lock_stat *ret, *new;
494 
495 	hlist_for_each_entry(ret, entry, hash_entry) {
496 		if (ret->addr == addr)
497 			return ret;
498 	}
499 
500 	new = zalloc(sizeof(struct lock_stat));
501 	if (!new)
502 		goto alloc_failed;
503 
504 	new->addr = addr;
505 	new->name = strdup(name);
506 	if (!new->name) {
507 		free(new);
508 		goto alloc_failed;
509 	}
510 
511 	new->flags = flags;
512 	new->wait_time_min = ULLONG_MAX;
513 
514 	hlist_add_head(&new->hash_entry, entry);
515 	return new;
516 
517 alloc_failed:
518 	pr_err("memory allocation failed\n");
519 	return NULL;
520 }
521 
match_callstack_filter(struct machine * machine,u64 * callstack)522 bool match_callstack_filter(struct machine *machine, u64 *callstack)
523 {
524 	struct map *kmap;
525 	struct symbol *sym;
526 	u64 ip;
527 	const char *arch = perf_env__arch(machine->env);
528 
529 	if (list_empty(&callstack_filters))
530 		return true;
531 
532 	for (int i = 0; i < max_stack_depth; i++) {
533 		struct callstack_filter *filter;
534 
535 		/*
536 		 * In powerpc, the callchain saved by kernel always includes
537 		 * first three entries as the NIP (next instruction pointer),
538 		 * LR (link register), and the contents of LR save area in the
539 		 * second stack frame. In certain scenarios its possible to have
540 		 * invalid kernel instruction addresses in either LR or the second
541 		 * stack frame's LR. In that case, kernel will store that address as
542 		 * zero.
543 		 *
544 		 * The below check will continue to look into callstack,
545 		 * incase first or second callstack index entry has 0
546 		 * address for powerpc.
547 		 */
548 		if (!callstack || (!callstack[i] && (strcmp(arch, "powerpc") ||
549 						(i != 1 && i != 2))))
550 			break;
551 
552 		ip = callstack[i];
553 		sym = machine__find_kernel_symbol(machine, ip, &kmap);
554 		if (sym == NULL)
555 			continue;
556 
557 		list_for_each_entry(filter, &callstack_filters, list) {
558 			if (strstr(sym->name, filter->name))
559 				return true;
560 		}
561 	}
562 	return false;
563 }
564 
565 struct trace_lock_handler {
566 	/* it's used on CONFIG_LOCKDEP */
567 	int (*acquire_event)(struct evsel *evsel,
568 			     struct perf_sample *sample);
569 
570 	/* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */
571 	int (*acquired_event)(struct evsel *evsel,
572 			      struct perf_sample *sample);
573 
574 	/* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */
575 	int (*contended_event)(struct evsel *evsel,
576 			       struct perf_sample *sample);
577 
578 	/* it's used on CONFIG_LOCKDEP */
579 	int (*release_event)(struct evsel *evsel,
580 			     struct perf_sample *sample);
581 
582 	/* it's used when CONFIG_LOCKDEP is off */
583 	int (*contention_begin_event)(struct evsel *evsel,
584 				      struct perf_sample *sample);
585 
586 	/* it's used when CONFIG_LOCKDEP is off */
587 	int (*contention_end_event)(struct evsel *evsel,
588 				    struct perf_sample *sample);
589 };
590 
get_seq(struct thread_stat * ts,u64 addr)591 static struct lock_seq_stat *get_seq(struct thread_stat *ts, u64 addr)
592 {
593 	struct lock_seq_stat *seq;
594 
595 	list_for_each_entry(seq, &ts->seq_list, list) {
596 		if (seq->addr == addr)
597 			return seq;
598 	}
599 
600 	seq = zalloc(sizeof(struct lock_seq_stat));
601 	if (!seq) {
602 		pr_err("memory allocation failed\n");
603 		return NULL;
604 	}
605 	seq->state = SEQ_STATE_UNINITIALIZED;
606 	seq->addr = addr;
607 
608 	list_add(&seq->list, &ts->seq_list);
609 	return seq;
610 }
611 
612 enum broken_state {
613 	BROKEN_ACQUIRE,
614 	BROKEN_ACQUIRED,
615 	BROKEN_CONTENDED,
616 	BROKEN_RELEASE,
617 	BROKEN_MAX,
618 };
619 
620 static int bad_hist[BROKEN_MAX];
621 
622 enum acquire_flags {
623 	TRY_LOCK = 1,
624 	READ_LOCK = 2,
625 };
626 
get_key_by_aggr_mode_simple(u64 * key,u64 addr,u32 tid)627 static int get_key_by_aggr_mode_simple(u64 *key, u64 addr, u32 tid)
628 {
629 	switch (aggr_mode) {
630 	case LOCK_AGGR_ADDR:
631 		*key = addr;
632 		break;
633 	case LOCK_AGGR_TASK:
634 		*key = tid;
635 		break;
636 	case LOCK_AGGR_CALLER:
637 	default:
638 		pr_err("Invalid aggregation mode: %d\n", aggr_mode);
639 		return -EINVAL;
640 	}
641 	return 0;
642 }
643 
644 static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample);
645 
get_key_by_aggr_mode(u64 * key,u64 addr,struct evsel * evsel,struct perf_sample * sample)646 static int get_key_by_aggr_mode(u64 *key, u64 addr, struct evsel *evsel,
647 				 struct perf_sample *sample)
648 {
649 	if (aggr_mode == LOCK_AGGR_CALLER) {
650 		*key = callchain_id(evsel, sample);
651 		return 0;
652 	}
653 	return get_key_by_aggr_mode_simple(key, addr, sample->tid);
654 }
655 
report_lock_acquire_event(struct evsel * evsel,struct perf_sample * sample)656 static int report_lock_acquire_event(struct evsel *evsel,
657 				     struct perf_sample *sample)
658 {
659 	struct lock_stat *ls;
660 	struct thread_stat *ts;
661 	struct lock_seq_stat *seq;
662 	const char *name = evsel__strval(evsel, sample, "name");
663 	u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
664 	int flag = evsel__intval(evsel, sample, "flags");
665 	u64 key;
666 	int ret;
667 
668 	ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
669 	if (ret < 0)
670 		return ret;
671 
672 	ls = lock_stat_findnew(key, name, 0);
673 	if (!ls)
674 		return -ENOMEM;
675 
676 	ts = thread_stat_findnew(sample->tid);
677 	if (!ts)
678 		return -ENOMEM;
679 
680 	seq = get_seq(ts, addr);
681 	if (!seq)
682 		return -ENOMEM;
683 
684 	switch (seq->state) {
685 	case SEQ_STATE_UNINITIALIZED:
686 	case SEQ_STATE_RELEASED:
687 		if (!flag) {
688 			seq->state = SEQ_STATE_ACQUIRING;
689 		} else {
690 			if (flag & TRY_LOCK)
691 				ls->nr_trylock++;
692 			if (flag & READ_LOCK)
693 				ls->nr_readlock++;
694 			seq->state = SEQ_STATE_READ_ACQUIRED;
695 			seq->read_count = 1;
696 			ls->nr_acquired++;
697 		}
698 		break;
699 	case SEQ_STATE_READ_ACQUIRED:
700 		if (flag & READ_LOCK) {
701 			seq->read_count++;
702 			ls->nr_acquired++;
703 			goto end;
704 		} else {
705 			goto broken;
706 		}
707 		break;
708 	case SEQ_STATE_ACQUIRED:
709 	case SEQ_STATE_ACQUIRING:
710 	case SEQ_STATE_CONTENDED:
711 broken:
712 		/* broken lock sequence */
713 		if (!ls->broken) {
714 			ls->broken = 1;
715 			bad_hist[BROKEN_ACQUIRE]++;
716 		}
717 		list_del_init(&seq->list);
718 		free(seq);
719 		goto end;
720 	default:
721 		BUG_ON("Unknown state of lock sequence found!\n");
722 		break;
723 	}
724 
725 	ls->nr_acquire++;
726 	seq->prev_event_time = sample->time;
727 end:
728 	return 0;
729 }
730 
report_lock_acquired_event(struct evsel * evsel,struct perf_sample * sample)731 static int report_lock_acquired_event(struct evsel *evsel,
732 				      struct perf_sample *sample)
733 {
734 	struct lock_stat *ls;
735 	struct thread_stat *ts;
736 	struct lock_seq_stat *seq;
737 	u64 contended_term;
738 	const char *name = evsel__strval(evsel, sample, "name");
739 	u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
740 	u64 key;
741 	int ret;
742 
743 	ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
744 	if (ret < 0)
745 		return ret;
746 
747 	ls = lock_stat_findnew(key, name, 0);
748 	if (!ls)
749 		return -ENOMEM;
750 
751 	ts = thread_stat_findnew(sample->tid);
752 	if (!ts)
753 		return -ENOMEM;
754 
755 	seq = get_seq(ts, addr);
756 	if (!seq)
757 		return -ENOMEM;
758 
759 	switch (seq->state) {
760 	case SEQ_STATE_UNINITIALIZED:
761 		/* orphan event, do nothing */
762 		return 0;
763 	case SEQ_STATE_ACQUIRING:
764 		break;
765 	case SEQ_STATE_CONTENDED:
766 		contended_term = sample->time - seq->prev_event_time;
767 		ls->wait_time_total += contended_term;
768 		if (contended_term < ls->wait_time_min)
769 			ls->wait_time_min = contended_term;
770 		if (ls->wait_time_max < contended_term)
771 			ls->wait_time_max = contended_term;
772 		break;
773 	case SEQ_STATE_RELEASED:
774 	case SEQ_STATE_ACQUIRED:
775 	case SEQ_STATE_READ_ACQUIRED:
776 		/* broken lock sequence */
777 		if (!ls->broken) {
778 			ls->broken = 1;
779 			bad_hist[BROKEN_ACQUIRED]++;
780 		}
781 		list_del_init(&seq->list);
782 		free(seq);
783 		goto end;
784 	default:
785 		BUG_ON("Unknown state of lock sequence found!\n");
786 		break;
787 	}
788 
789 	seq->state = SEQ_STATE_ACQUIRED;
790 	ls->nr_acquired++;
791 	ls->avg_wait_time = ls->nr_contended ? ls->wait_time_total/ls->nr_contended : 0;
792 	seq->prev_event_time = sample->time;
793 end:
794 	return 0;
795 }
796 
report_lock_contended_event(struct evsel * evsel,struct perf_sample * sample)797 static int report_lock_contended_event(struct evsel *evsel,
798 				       struct perf_sample *sample)
799 {
800 	struct lock_stat *ls;
801 	struct thread_stat *ts;
802 	struct lock_seq_stat *seq;
803 	const char *name = evsel__strval(evsel, sample, "name");
804 	u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
805 	u64 key;
806 	int ret;
807 
808 	ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
809 	if (ret < 0)
810 		return ret;
811 
812 	ls = lock_stat_findnew(key, name, 0);
813 	if (!ls)
814 		return -ENOMEM;
815 
816 	ts = thread_stat_findnew(sample->tid);
817 	if (!ts)
818 		return -ENOMEM;
819 
820 	seq = get_seq(ts, addr);
821 	if (!seq)
822 		return -ENOMEM;
823 
824 	switch (seq->state) {
825 	case SEQ_STATE_UNINITIALIZED:
826 		/* orphan event, do nothing */
827 		return 0;
828 	case SEQ_STATE_ACQUIRING:
829 		break;
830 	case SEQ_STATE_RELEASED:
831 	case SEQ_STATE_ACQUIRED:
832 	case SEQ_STATE_READ_ACQUIRED:
833 	case SEQ_STATE_CONTENDED:
834 		/* broken lock sequence */
835 		if (!ls->broken) {
836 			ls->broken = 1;
837 			bad_hist[BROKEN_CONTENDED]++;
838 		}
839 		list_del_init(&seq->list);
840 		free(seq);
841 		goto end;
842 	default:
843 		BUG_ON("Unknown state of lock sequence found!\n");
844 		break;
845 	}
846 
847 	seq->state = SEQ_STATE_CONTENDED;
848 	ls->nr_contended++;
849 	ls->avg_wait_time = ls->wait_time_total/ls->nr_contended;
850 	seq->prev_event_time = sample->time;
851 end:
852 	return 0;
853 }
854 
report_lock_release_event(struct evsel * evsel,struct perf_sample * sample)855 static int report_lock_release_event(struct evsel *evsel,
856 				     struct perf_sample *sample)
857 {
858 	struct lock_stat *ls;
859 	struct thread_stat *ts;
860 	struct lock_seq_stat *seq;
861 	const char *name = evsel__strval(evsel, sample, "name");
862 	u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
863 	u64 key;
864 	int ret;
865 
866 	ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
867 	if (ret < 0)
868 		return ret;
869 
870 	ls = lock_stat_findnew(key, name, 0);
871 	if (!ls)
872 		return -ENOMEM;
873 
874 	ts = thread_stat_findnew(sample->tid);
875 	if (!ts)
876 		return -ENOMEM;
877 
878 	seq = get_seq(ts, addr);
879 	if (!seq)
880 		return -ENOMEM;
881 
882 	switch (seq->state) {
883 	case SEQ_STATE_UNINITIALIZED:
884 		goto end;
885 	case SEQ_STATE_ACQUIRED:
886 		break;
887 	case SEQ_STATE_READ_ACQUIRED:
888 		seq->read_count--;
889 		BUG_ON(seq->read_count < 0);
890 		if (seq->read_count) {
891 			ls->nr_release++;
892 			goto end;
893 		}
894 		break;
895 	case SEQ_STATE_ACQUIRING:
896 	case SEQ_STATE_CONTENDED:
897 	case SEQ_STATE_RELEASED:
898 		/* broken lock sequence */
899 		if (!ls->broken) {
900 			ls->broken = 1;
901 			bad_hist[BROKEN_RELEASE]++;
902 		}
903 		goto free_seq;
904 	default:
905 		BUG_ON("Unknown state of lock sequence found!\n");
906 		break;
907 	}
908 
909 	ls->nr_release++;
910 free_seq:
911 	list_del_init(&seq->list);
912 	free(seq);
913 end:
914 	return 0;
915 }
916 
get_symbol_name_offset(struct map * map,struct symbol * sym,u64 ip,char * buf,int size)917 static int get_symbol_name_offset(struct map *map, struct symbol *sym, u64 ip,
918 				  char *buf, int size)
919 {
920 	u64 offset;
921 
922 	if (map == NULL || sym == NULL) {
923 		buf[0] = '\0';
924 		return 0;
925 	}
926 
927 	offset = map__map_ip(map, ip) - sym->start;
928 
929 	if (offset)
930 		return scnprintf(buf, size, "%s+%#lx", sym->name, offset);
931 	else
932 		return strlcpy(buf, sym->name, size);
933 }
lock_contention_caller(struct evsel * evsel,struct perf_sample * sample,char * buf,int size)934 static int lock_contention_caller(struct evsel *evsel, struct perf_sample *sample,
935 				  char *buf, int size)
936 {
937 	struct thread *thread;
938 	struct callchain_cursor *cursor;
939 	struct machine *machine = &session->machines.host;
940 	struct symbol *sym;
941 	int skip = 0;
942 	int ret;
943 
944 	/* lock names will be replaced to task name later */
945 	if (show_thread_stats)
946 		return -1;
947 
948 	thread = machine__findnew_thread(machine, -1, sample->pid);
949 	if (thread == NULL)
950 		return -1;
951 
952 	cursor = get_tls_callchain_cursor();
953 
954 	/* use caller function name from the callchain */
955 	ret = thread__resolve_callchain(thread, cursor, evsel, sample,
956 					NULL, NULL, max_stack_depth);
957 	if (ret != 0) {
958 		thread__put(thread);
959 		return -1;
960 	}
961 
962 	callchain_cursor_commit(cursor);
963 	thread__put(thread);
964 
965 	while (true) {
966 		struct callchain_cursor_node *node;
967 
968 		node = callchain_cursor_current(cursor);
969 		if (node == NULL)
970 			break;
971 
972 		/* skip first few entries - for lock functions */
973 		if (++skip <= stack_skip)
974 			goto next;
975 
976 		sym = node->ms.sym;
977 		if (sym && !machine__is_lock_function(machine, node->ip)) {
978 			get_symbol_name_offset(node->ms.map, sym, node->ip,
979 					       buf, size);
980 			return 0;
981 		}
982 
983 next:
984 		callchain_cursor_advance(cursor);
985 	}
986 	return -1;
987 }
988 
callchain_id(struct evsel * evsel,struct perf_sample * sample)989 static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample)
990 {
991 	struct callchain_cursor *cursor;
992 	struct machine *machine = &session->machines.host;
993 	struct thread *thread;
994 	u64 hash = 0;
995 	int skip = 0;
996 	int ret;
997 
998 	thread = machine__findnew_thread(machine, -1, sample->pid);
999 	if (thread == NULL)
1000 		return -1;
1001 
1002 	cursor = get_tls_callchain_cursor();
1003 	/* use caller function name from the callchain */
1004 	ret = thread__resolve_callchain(thread, cursor, evsel, sample,
1005 					NULL, NULL, max_stack_depth);
1006 	thread__put(thread);
1007 
1008 	if (ret != 0)
1009 		return -1;
1010 
1011 	callchain_cursor_commit(cursor);
1012 
1013 	while (true) {
1014 		struct callchain_cursor_node *node;
1015 
1016 		node = callchain_cursor_current(cursor);
1017 		if (node == NULL)
1018 			break;
1019 
1020 		/* skip first few entries - for lock functions */
1021 		if (++skip <= stack_skip)
1022 			goto next;
1023 
1024 		if (node->ms.sym && machine__is_lock_function(machine, node->ip))
1025 			goto next;
1026 
1027 		hash ^= hash_long((unsigned long)node->ip, 64);
1028 
1029 next:
1030 		callchain_cursor_advance(cursor);
1031 	}
1032 	return hash;
1033 }
1034 
get_callstack(struct perf_sample * sample,int max_stack)1035 static u64 *get_callstack(struct perf_sample *sample, int max_stack)
1036 {
1037 	u64 *callstack;
1038 	u64 i;
1039 	int c;
1040 
1041 	callstack = calloc(max_stack, sizeof(*callstack));
1042 	if (callstack == NULL)
1043 		return NULL;
1044 
1045 	for (i = 0, c = 0; i < sample->callchain->nr && c < max_stack; i++) {
1046 		u64 ip = sample->callchain->ips[i];
1047 
1048 		if (ip >= PERF_CONTEXT_MAX)
1049 			continue;
1050 
1051 		callstack[c++] = ip;
1052 	}
1053 	return callstack;
1054 }
1055 
report_lock_contention_begin_event(struct evsel * evsel,struct perf_sample * sample)1056 static int report_lock_contention_begin_event(struct evsel *evsel,
1057 					      struct perf_sample *sample)
1058 {
1059 	struct lock_stat *ls;
1060 	struct thread_stat *ts;
1061 	struct lock_seq_stat *seq;
1062 	u64 addr = evsel__intval(evsel, sample, "lock_addr");
1063 	unsigned int flags = evsel__intval(evsel, sample, "flags");
1064 	u64 key;
1065 	int i, ret;
1066 	static bool kmap_loaded;
1067 	struct machine *machine = &session->machines.host;
1068 	struct map *kmap;
1069 	struct symbol *sym;
1070 
1071 	ret = get_key_by_aggr_mode(&key, addr, evsel, sample);
1072 	if (ret < 0)
1073 		return ret;
1074 
1075 	if (!kmap_loaded) {
1076 		unsigned long *addrs;
1077 
1078 		/* make sure it loads the kernel map to find lock symbols */
1079 		map__load(machine__kernel_map(machine));
1080 		kmap_loaded = true;
1081 
1082 		/* convert (kernel) symbols to addresses */
1083 		for (i = 0; i < filters.nr_syms; i++) {
1084 			sym = machine__find_kernel_symbol_by_name(machine,
1085 								  filters.syms[i],
1086 								  &kmap);
1087 			if (sym == NULL) {
1088 				pr_warning("ignore unknown symbol: %s\n",
1089 					   filters.syms[i]);
1090 				continue;
1091 			}
1092 
1093 			addrs = realloc(filters.addrs,
1094 					(filters.nr_addrs + 1) * sizeof(*addrs));
1095 			if (addrs == NULL) {
1096 				pr_warning("memory allocation failure\n");
1097 				return -ENOMEM;
1098 			}
1099 
1100 			addrs[filters.nr_addrs++] = map__unmap_ip(kmap, sym->start);
1101 			filters.addrs = addrs;
1102 		}
1103 	}
1104 
1105 	ls = lock_stat_find(key);
1106 	if (!ls) {
1107 		char buf[128];
1108 		const char *name = "";
1109 
1110 		switch (aggr_mode) {
1111 		case LOCK_AGGR_ADDR:
1112 			sym = machine__find_kernel_symbol(machine, key, &kmap);
1113 			if (sym)
1114 				name = sym->name;
1115 			break;
1116 		case LOCK_AGGR_CALLER:
1117 			name = buf;
1118 			if (lock_contention_caller(evsel, sample, buf, sizeof(buf)) < 0)
1119 				name = "Unknown";
1120 			break;
1121 		case LOCK_AGGR_TASK:
1122 		default:
1123 			break;
1124 		}
1125 
1126 		ls = lock_stat_findnew(key, name, flags);
1127 		if (!ls)
1128 			return -ENOMEM;
1129 	}
1130 
1131 	if (filters.nr_types) {
1132 		bool found = false;
1133 
1134 		for (i = 0; i < filters.nr_types; i++) {
1135 			if (flags == filters.types[i]) {
1136 				found = true;
1137 				break;
1138 			}
1139 		}
1140 
1141 		if (!found)
1142 			return 0;
1143 	}
1144 
1145 	if (filters.nr_addrs) {
1146 		bool found = false;
1147 
1148 		for (i = 0; i < filters.nr_addrs; i++) {
1149 			if (addr == filters.addrs[i]) {
1150 				found = true;
1151 				break;
1152 			}
1153 		}
1154 
1155 		if (!found)
1156 			return 0;
1157 	}
1158 
1159 	if (needs_callstack()) {
1160 		u64 *callstack = get_callstack(sample, max_stack_depth);
1161 		if (callstack == NULL)
1162 			return -ENOMEM;
1163 
1164 		if (!match_callstack_filter(machine, callstack)) {
1165 			free(callstack);
1166 			return 0;
1167 		}
1168 
1169 		if (ls->callstack == NULL)
1170 			ls->callstack = callstack;
1171 		else
1172 			free(callstack);
1173 	}
1174 
1175 	ts = thread_stat_findnew(sample->tid);
1176 	if (!ts)
1177 		return -ENOMEM;
1178 
1179 	seq = get_seq(ts, addr);
1180 	if (!seq)
1181 		return -ENOMEM;
1182 
1183 	switch (seq->state) {
1184 	case SEQ_STATE_UNINITIALIZED:
1185 	case SEQ_STATE_ACQUIRED:
1186 		break;
1187 	case SEQ_STATE_CONTENDED:
1188 		/*
1189 		 * It can have nested contention begin with mutex spinning,
1190 		 * then we would use the original contention begin event and
1191 		 * ignore the second one.
1192 		 */
1193 		goto end;
1194 	case SEQ_STATE_ACQUIRING:
1195 	case SEQ_STATE_READ_ACQUIRED:
1196 	case SEQ_STATE_RELEASED:
1197 		/* broken lock sequence */
1198 		if (!ls->broken) {
1199 			ls->broken = 1;
1200 			bad_hist[BROKEN_CONTENDED]++;
1201 		}
1202 		list_del_init(&seq->list);
1203 		free(seq);
1204 		goto end;
1205 	default:
1206 		BUG_ON("Unknown state of lock sequence found!\n");
1207 		break;
1208 	}
1209 
1210 	if (seq->state != SEQ_STATE_CONTENDED) {
1211 		seq->state = SEQ_STATE_CONTENDED;
1212 		seq->prev_event_time = sample->time;
1213 		ls->nr_contended++;
1214 	}
1215 end:
1216 	return 0;
1217 }
1218 
report_lock_contention_end_event(struct evsel * evsel,struct perf_sample * sample)1219 static int report_lock_contention_end_event(struct evsel *evsel,
1220 					    struct perf_sample *sample)
1221 {
1222 	struct lock_stat *ls;
1223 	struct thread_stat *ts;
1224 	struct lock_seq_stat *seq;
1225 	u64 contended_term;
1226 	u64 addr = evsel__intval(evsel, sample, "lock_addr");
1227 	u64 key;
1228 	int ret;
1229 
1230 	ret = get_key_by_aggr_mode(&key, addr, evsel, sample);
1231 	if (ret < 0)
1232 		return ret;
1233 
1234 	ls = lock_stat_find(key);
1235 	if (!ls)
1236 		return 0;
1237 
1238 	ts = thread_stat_find(sample->tid);
1239 	if (!ts)
1240 		return 0;
1241 
1242 	seq = get_seq(ts, addr);
1243 	if (!seq)
1244 		return -ENOMEM;
1245 
1246 	switch (seq->state) {
1247 	case SEQ_STATE_UNINITIALIZED:
1248 		goto end;
1249 	case SEQ_STATE_CONTENDED:
1250 		contended_term = sample->time - seq->prev_event_time;
1251 		ls->wait_time_total += contended_term;
1252 		if (contended_term < ls->wait_time_min)
1253 			ls->wait_time_min = contended_term;
1254 		if (ls->wait_time_max < contended_term)
1255 			ls->wait_time_max = contended_term;
1256 		break;
1257 	case SEQ_STATE_ACQUIRING:
1258 	case SEQ_STATE_ACQUIRED:
1259 	case SEQ_STATE_READ_ACQUIRED:
1260 	case SEQ_STATE_RELEASED:
1261 		/* broken lock sequence */
1262 		if (!ls->broken) {
1263 			ls->broken = 1;
1264 			bad_hist[BROKEN_ACQUIRED]++;
1265 		}
1266 		list_del_init(&seq->list);
1267 		free(seq);
1268 		goto end;
1269 	default:
1270 		BUG_ON("Unknown state of lock sequence found!\n");
1271 		break;
1272 	}
1273 
1274 	seq->state = SEQ_STATE_ACQUIRED;
1275 	ls->nr_acquired++;
1276 	ls->avg_wait_time = ls->wait_time_total/ls->nr_acquired;
1277 end:
1278 	return 0;
1279 }
1280 
1281 /* lock oriented handlers */
1282 /* TODO: handlers for CPU oriented, thread oriented */
1283 static struct trace_lock_handler report_lock_ops  = {
1284 	.acquire_event		= report_lock_acquire_event,
1285 	.acquired_event		= report_lock_acquired_event,
1286 	.contended_event	= report_lock_contended_event,
1287 	.release_event		= report_lock_release_event,
1288 	.contention_begin_event	= report_lock_contention_begin_event,
1289 	.contention_end_event	= report_lock_contention_end_event,
1290 };
1291 
1292 static struct trace_lock_handler contention_lock_ops  = {
1293 	.contention_begin_event	= report_lock_contention_begin_event,
1294 	.contention_end_event	= report_lock_contention_end_event,
1295 };
1296 
1297 
1298 static struct trace_lock_handler *trace_handler;
1299 
evsel__process_lock_acquire(struct evsel * evsel,struct perf_sample * sample)1300 static int evsel__process_lock_acquire(struct evsel *evsel, struct perf_sample *sample)
1301 {
1302 	if (trace_handler->acquire_event)
1303 		return trace_handler->acquire_event(evsel, sample);
1304 	return 0;
1305 }
1306 
evsel__process_lock_acquired(struct evsel * evsel,struct perf_sample * sample)1307 static int evsel__process_lock_acquired(struct evsel *evsel, struct perf_sample *sample)
1308 {
1309 	if (trace_handler->acquired_event)
1310 		return trace_handler->acquired_event(evsel, sample);
1311 	return 0;
1312 }
1313 
evsel__process_lock_contended(struct evsel * evsel,struct perf_sample * sample)1314 static int evsel__process_lock_contended(struct evsel *evsel, struct perf_sample *sample)
1315 {
1316 	if (trace_handler->contended_event)
1317 		return trace_handler->contended_event(evsel, sample);
1318 	return 0;
1319 }
1320 
evsel__process_lock_release(struct evsel * evsel,struct perf_sample * sample)1321 static int evsel__process_lock_release(struct evsel *evsel, struct perf_sample *sample)
1322 {
1323 	if (trace_handler->release_event)
1324 		return trace_handler->release_event(evsel, sample);
1325 	return 0;
1326 }
1327 
evsel__process_contention_begin(struct evsel * evsel,struct perf_sample * sample)1328 static int evsel__process_contention_begin(struct evsel *evsel, struct perf_sample *sample)
1329 {
1330 	if (trace_handler->contention_begin_event)
1331 		return trace_handler->contention_begin_event(evsel, sample);
1332 	return 0;
1333 }
1334 
evsel__process_contention_end(struct evsel * evsel,struct perf_sample * sample)1335 static int evsel__process_contention_end(struct evsel *evsel, struct perf_sample *sample)
1336 {
1337 	if (trace_handler->contention_end_event)
1338 		return trace_handler->contention_end_event(evsel, sample);
1339 	return 0;
1340 }
1341 
print_bad_events(int bad,int total)1342 static void print_bad_events(int bad, int total)
1343 {
1344 	/* Output for debug, this have to be removed */
1345 	int i;
1346 	int broken = 0;
1347 	const char *name[4] =
1348 		{ "acquire", "acquired", "contended", "release" };
1349 
1350 	for (i = 0; i < BROKEN_MAX; i++)
1351 		broken += bad_hist[i];
1352 
1353 	if (quiet || total == 0 || (broken == 0 && verbose <= 0))
1354 		return;
1355 
1356 	fprintf(lock_output, "\n=== output for debug ===\n\n");
1357 	fprintf(lock_output, "bad: %d, total: %d\n", bad, total);
1358 	fprintf(lock_output, "bad rate: %.2f %%\n", (double)bad / (double)total * 100);
1359 	fprintf(lock_output, "histogram of events caused bad sequence\n");
1360 	for (i = 0; i < BROKEN_MAX; i++)
1361 		fprintf(lock_output, " %10s: %d\n", name[i], bad_hist[i]);
1362 }
1363 
1364 /* TODO: various way to print, coloring, nano or milli sec */
print_result(void)1365 static void print_result(void)
1366 {
1367 	struct lock_stat *st;
1368 	struct lock_key *key;
1369 	char cut_name[20];
1370 	int bad, total, printed;
1371 
1372 	if (!quiet) {
1373 		fprintf(lock_output, "%20s ", "Name");
1374 		list_for_each_entry(key, &lock_keys, list)
1375 			fprintf(lock_output, "%*s ", key->len, key->header);
1376 		fprintf(lock_output, "\n\n");
1377 	}
1378 
1379 	bad = total = printed = 0;
1380 	while ((st = pop_from_result())) {
1381 		total++;
1382 		if (st->broken)
1383 			bad++;
1384 		if (!st->nr_acquired)
1385 			continue;
1386 
1387 		bzero(cut_name, 20);
1388 
1389 		if (strlen(st->name) < 20) {
1390 			/* output raw name */
1391 			const char *name = st->name;
1392 
1393 			if (show_thread_stats) {
1394 				struct thread *t;
1395 
1396 				/* st->addr contains tid of thread */
1397 				t = perf_session__findnew(session, st->addr);
1398 				name = thread__comm_str(t);
1399 			}
1400 
1401 			fprintf(lock_output, "%20s ", name);
1402 		} else {
1403 			strncpy(cut_name, st->name, 16);
1404 			cut_name[16] = '.';
1405 			cut_name[17] = '.';
1406 			cut_name[18] = '.';
1407 			cut_name[19] = '\0';
1408 			/* cut off name for saving output style */
1409 			fprintf(lock_output, "%20s ", cut_name);
1410 		}
1411 
1412 		list_for_each_entry(key, &lock_keys, list) {
1413 			key->print(key, st);
1414 			fprintf(lock_output, " ");
1415 		}
1416 		fprintf(lock_output, "\n");
1417 
1418 		if (++printed >= print_nr_entries)
1419 			break;
1420 	}
1421 
1422 	print_bad_events(bad, total);
1423 }
1424 
1425 static bool info_threads, info_map;
1426 
dump_threads(void)1427 static void dump_threads(void)
1428 {
1429 	struct thread_stat *st;
1430 	struct rb_node *node;
1431 	struct thread *t;
1432 
1433 	fprintf(lock_output, "%10s: comm\n", "Thread ID");
1434 
1435 	node = rb_first(&thread_stats);
1436 	while (node) {
1437 		st = container_of(node, struct thread_stat, rb);
1438 		t = perf_session__findnew(session, st->tid);
1439 		fprintf(lock_output, "%10d: %s\n", st->tid, thread__comm_str(t));
1440 		node = rb_next(node);
1441 		thread__put(t);
1442 	}
1443 }
1444 
compare_maps(struct lock_stat * a,struct lock_stat * b)1445 static int compare_maps(struct lock_stat *a, struct lock_stat *b)
1446 {
1447 	int ret;
1448 
1449 	if (a->name && b->name)
1450 		ret = strcmp(a->name, b->name);
1451 	else
1452 		ret = !!a->name - !!b->name;
1453 
1454 	if (!ret)
1455 		return a->addr < b->addr;
1456 	else
1457 		return ret < 0;
1458 }
1459 
dump_map(void)1460 static void dump_map(void)
1461 {
1462 	unsigned int i;
1463 	struct lock_stat *st;
1464 
1465 	fprintf(lock_output, "Address of instance: name of class\n");
1466 	for (i = 0; i < LOCKHASH_SIZE; i++) {
1467 		hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1468 			insert_to_result(st, compare_maps);
1469 		}
1470 	}
1471 
1472 	while ((st = pop_from_result()))
1473 		fprintf(lock_output, " %#llx: %s\n", (unsigned long long)st->addr, st->name);
1474 }
1475 
dump_info(void)1476 static int dump_info(void)
1477 {
1478 	int rc = 0;
1479 
1480 	if (info_threads)
1481 		dump_threads();
1482 	else if (info_map)
1483 		dump_map();
1484 	else {
1485 		rc = -1;
1486 		pr_err("Unknown type of information\n");
1487 	}
1488 
1489 	return rc;
1490 }
1491 
1492 static const struct evsel_str_handler lock_tracepoints[] = {
1493 	{ "lock:lock_acquire",	 evsel__process_lock_acquire,   }, /* CONFIG_LOCKDEP */
1494 	{ "lock:lock_acquired",	 evsel__process_lock_acquired,  }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
1495 	{ "lock:lock_contended", evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
1496 	{ "lock:lock_release",	 evsel__process_lock_release,   }, /* CONFIG_LOCKDEP */
1497 };
1498 
1499 static const struct evsel_str_handler contention_tracepoints[] = {
1500 	{ "lock:contention_begin", evsel__process_contention_begin, },
1501 	{ "lock:contention_end",   evsel__process_contention_end,   },
1502 };
1503 
process_event_update(struct perf_tool * tool,union perf_event * event,struct evlist ** pevlist)1504 static int process_event_update(struct perf_tool *tool,
1505 				union perf_event *event,
1506 				struct evlist **pevlist)
1507 {
1508 	int ret;
1509 
1510 	ret = perf_event__process_event_update(tool, event, pevlist);
1511 	if (ret < 0)
1512 		return ret;
1513 
1514 	/* this can return -EEXIST since we call it for each evsel */
1515 	perf_session__set_tracepoints_handlers(session, lock_tracepoints);
1516 	perf_session__set_tracepoints_handlers(session, contention_tracepoints);
1517 	return 0;
1518 }
1519 
1520 typedef int (*tracepoint_handler)(struct evsel *evsel,
1521 				  struct perf_sample *sample);
1522 
process_sample_event(struct perf_tool * tool __maybe_unused,union perf_event * event,struct perf_sample * sample,struct evsel * evsel,struct machine * machine)1523 static int process_sample_event(struct perf_tool *tool __maybe_unused,
1524 				union perf_event *event,
1525 				struct perf_sample *sample,
1526 				struct evsel *evsel,
1527 				struct machine *machine)
1528 {
1529 	int err = 0;
1530 	struct thread *thread = machine__findnew_thread(machine, sample->pid,
1531 							sample->tid);
1532 
1533 	if (thread == NULL) {
1534 		pr_debug("problem processing %d event, skipping it.\n",
1535 			event->header.type);
1536 		return -1;
1537 	}
1538 
1539 	if (evsel->handler != NULL) {
1540 		tracepoint_handler f = evsel->handler;
1541 		err = f(evsel, sample);
1542 	}
1543 
1544 	thread__put(thread);
1545 
1546 	return err;
1547 }
1548 
combine_result(void)1549 static void combine_result(void)
1550 {
1551 	unsigned int i;
1552 	struct lock_stat *st;
1553 
1554 	if (!combine_locks)
1555 		return;
1556 
1557 	for (i = 0; i < LOCKHASH_SIZE; i++) {
1558 		hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1559 			combine_lock_stats(st);
1560 		}
1561 	}
1562 }
1563 
sort_result(void)1564 static void sort_result(void)
1565 {
1566 	unsigned int i;
1567 	struct lock_stat *st;
1568 
1569 	for (i = 0; i < LOCKHASH_SIZE; i++) {
1570 		hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1571 			insert_to_result(st, compare);
1572 		}
1573 	}
1574 }
1575 
1576 static const struct {
1577 	unsigned int flags;
1578 	const char *str;
1579 	const char *name;
1580 } lock_type_table[] = {
1581 	{ 0,				"semaphore",	"semaphore" },
1582 	{ LCB_F_SPIN,			"spinlock",	"spinlock" },
1583 	{ LCB_F_SPIN | LCB_F_READ,	"rwlock:R",	"rwlock" },
1584 	{ LCB_F_SPIN | LCB_F_WRITE,	"rwlock:W",	"rwlock" },
1585 	{ LCB_F_READ,			"rwsem:R",	"rwsem" },
1586 	{ LCB_F_WRITE,			"rwsem:W",	"rwsem" },
1587 	{ LCB_F_RT,			"rt-mutex",	"rt-mutex" },
1588 	{ LCB_F_RT | LCB_F_READ,	"rwlock-rt:R",	"rwlock-rt" },
1589 	{ LCB_F_RT | LCB_F_WRITE,	"rwlock-rt:W",	"rwlock-rt" },
1590 	{ LCB_F_PERCPU | LCB_F_READ,	"pcpu-sem:R",	"percpu-rwsem" },
1591 	{ LCB_F_PERCPU | LCB_F_WRITE,	"pcpu-sem:W",	"percpu-rwsem" },
1592 	{ LCB_F_MUTEX,			"mutex",	"mutex" },
1593 	{ LCB_F_MUTEX | LCB_F_SPIN,	"mutex",	"mutex" },
1594 	/* alias for optimistic spinning only */
1595 	{ LCB_F_MUTEX | LCB_F_SPIN,	"mutex:spin",	"mutex-spin" },
1596 };
1597 
get_type_str(unsigned int flags)1598 static const char *get_type_str(unsigned int flags)
1599 {
1600 	flags &= LCB_F_MAX_FLAGS - 1;
1601 
1602 	for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
1603 		if (lock_type_table[i].flags == flags)
1604 			return lock_type_table[i].str;
1605 	}
1606 	return "unknown";
1607 }
1608 
get_type_name(unsigned int flags)1609 static const char *get_type_name(unsigned int flags)
1610 {
1611 	flags &= LCB_F_MAX_FLAGS - 1;
1612 
1613 	for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
1614 		if (lock_type_table[i].flags == flags)
1615 			return lock_type_table[i].name;
1616 	}
1617 	return "unknown";
1618 }
1619 
lock_filter_finish(void)1620 static void lock_filter_finish(void)
1621 {
1622 	zfree(&filters.types);
1623 	filters.nr_types = 0;
1624 
1625 	zfree(&filters.addrs);
1626 	filters.nr_addrs = 0;
1627 
1628 	for (int i = 0; i < filters.nr_syms; i++)
1629 		free(filters.syms[i]);
1630 
1631 	zfree(&filters.syms);
1632 	filters.nr_syms = 0;
1633 }
1634 
sort_contention_result(void)1635 static void sort_contention_result(void)
1636 {
1637 	sort_result();
1638 }
1639 
print_header_stdio(void)1640 static void print_header_stdio(void)
1641 {
1642 	struct lock_key *key;
1643 
1644 	list_for_each_entry(key, &lock_keys, list)
1645 		fprintf(lock_output, "%*s ", key->len, key->header);
1646 
1647 	switch (aggr_mode) {
1648 	case LOCK_AGGR_TASK:
1649 		fprintf(lock_output, "  %10s   %s\n\n", "pid",
1650 			show_lock_owner ? "owner" : "comm");
1651 		break;
1652 	case LOCK_AGGR_CALLER:
1653 		fprintf(lock_output, "  %10s   %s\n\n", "type", "caller");
1654 		break;
1655 	case LOCK_AGGR_ADDR:
1656 		fprintf(lock_output, "  %16s   %s\n\n", "address", "symbol");
1657 		break;
1658 	default:
1659 		break;
1660 	}
1661 }
1662 
print_header_csv(const char * sep)1663 static void print_header_csv(const char *sep)
1664 {
1665 	struct lock_key *key;
1666 
1667 	fprintf(lock_output, "# output: ");
1668 	list_for_each_entry(key, &lock_keys, list)
1669 		fprintf(lock_output, "%s%s ", key->header, sep);
1670 
1671 	switch (aggr_mode) {
1672 	case LOCK_AGGR_TASK:
1673 		fprintf(lock_output, "%s%s %s\n", "pid", sep,
1674 			show_lock_owner ? "owner" : "comm");
1675 		break;
1676 	case LOCK_AGGR_CALLER:
1677 		fprintf(lock_output, "%s%s %s", "type", sep, "caller");
1678 		if (verbose > 0)
1679 			fprintf(lock_output, "%s %s", sep, "stacktrace");
1680 		fprintf(lock_output, "\n");
1681 		break;
1682 	case LOCK_AGGR_ADDR:
1683 		fprintf(lock_output, "%s%s %s%s %s\n", "address", sep, "symbol", sep, "type");
1684 		break;
1685 	default:
1686 		break;
1687 	}
1688 }
1689 
print_header(void)1690 static void print_header(void)
1691 {
1692 	if (!quiet) {
1693 		if (symbol_conf.field_sep)
1694 			print_header_csv(symbol_conf.field_sep);
1695 		else
1696 			print_header_stdio();
1697 	}
1698 }
1699 
print_lock_stat_stdio(struct lock_contention * con,struct lock_stat * st)1700 static void print_lock_stat_stdio(struct lock_contention *con, struct lock_stat *st)
1701 {
1702 	struct lock_key *key;
1703 	struct thread *t;
1704 	int pid;
1705 
1706 	list_for_each_entry(key, &lock_keys, list) {
1707 		key->print(key, st);
1708 		fprintf(lock_output, " ");
1709 	}
1710 
1711 	switch (aggr_mode) {
1712 	case LOCK_AGGR_CALLER:
1713 		fprintf(lock_output, "  %10s   %s\n", get_type_str(st->flags), st->name);
1714 		break;
1715 	case LOCK_AGGR_TASK:
1716 		pid = st->addr;
1717 		t = perf_session__findnew(session, pid);
1718 		fprintf(lock_output, "  %10d   %s\n",
1719 			pid, pid == -1 ? "Unknown" : thread__comm_str(t));
1720 		break;
1721 	case LOCK_AGGR_ADDR:
1722 		fprintf(lock_output, "  %016llx   %s (%s)\n", (unsigned long long)st->addr,
1723 			st->name, get_type_name(st->flags));
1724 		break;
1725 	default:
1726 		break;
1727 	}
1728 
1729 	if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) {
1730 		struct map *kmap;
1731 		struct symbol *sym;
1732 		char buf[128];
1733 		u64 ip;
1734 
1735 		for (int i = 0; i < max_stack_depth; i++) {
1736 			if (!st->callstack || !st->callstack[i])
1737 				break;
1738 
1739 			ip = st->callstack[i];
1740 			sym = machine__find_kernel_symbol(con->machine, ip, &kmap);
1741 			get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf));
1742 			fprintf(lock_output, "\t\t\t%#lx  %s\n", (unsigned long)ip, buf);
1743 		}
1744 	}
1745 }
1746 
print_lock_stat_csv(struct lock_contention * con,struct lock_stat * st,const char * sep)1747 static void print_lock_stat_csv(struct lock_contention *con, struct lock_stat *st,
1748 				const char *sep)
1749 {
1750 	struct lock_key *key;
1751 	struct thread *t;
1752 	int pid;
1753 
1754 	list_for_each_entry(key, &lock_keys, list) {
1755 		key->print(key, st);
1756 		fprintf(lock_output, "%s ", sep);
1757 	}
1758 
1759 	switch (aggr_mode) {
1760 	case LOCK_AGGR_CALLER:
1761 		fprintf(lock_output, "%s%s %s", get_type_str(st->flags), sep, st->name);
1762 		if (verbose <= 0)
1763 			fprintf(lock_output, "\n");
1764 		break;
1765 	case LOCK_AGGR_TASK:
1766 		pid = st->addr;
1767 		t = perf_session__findnew(session, pid);
1768 		fprintf(lock_output, "%d%s %s\n", pid, sep,
1769 			pid == -1 ? "Unknown" : thread__comm_str(t));
1770 		break;
1771 	case LOCK_AGGR_ADDR:
1772 		fprintf(lock_output, "%llx%s %s%s %s\n", (unsigned long long)st->addr, sep,
1773 			st->name, sep, get_type_name(st->flags));
1774 		break;
1775 	default:
1776 		break;
1777 	}
1778 
1779 	if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) {
1780 		struct map *kmap;
1781 		struct symbol *sym;
1782 		char buf[128];
1783 		u64 ip;
1784 
1785 		for (int i = 0; i < max_stack_depth; i++) {
1786 			if (!st->callstack || !st->callstack[i])
1787 				break;
1788 
1789 			ip = st->callstack[i];
1790 			sym = machine__find_kernel_symbol(con->machine, ip, &kmap);
1791 			get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf));
1792 			fprintf(lock_output, "%s %#lx %s", i ? ":" : sep, (unsigned long) ip, buf);
1793 		}
1794 		fprintf(lock_output, "\n");
1795 	}
1796 }
1797 
print_lock_stat(struct lock_contention * con,struct lock_stat * st)1798 static void print_lock_stat(struct lock_contention *con, struct lock_stat *st)
1799 {
1800 	if (symbol_conf.field_sep)
1801 		print_lock_stat_csv(con, st, symbol_conf.field_sep);
1802 	else
1803 		print_lock_stat_stdio(con, st);
1804 }
1805 
print_footer_stdio(int total,int bad,struct lock_contention_fails * fails)1806 static void print_footer_stdio(int total, int bad, struct lock_contention_fails *fails)
1807 {
1808 	/* Output for debug, this have to be removed */
1809 	int broken = fails->task + fails->stack + fails->time + fails->data;
1810 
1811 	if (!use_bpf)
1812 		print_bad_events(bad, total);
1813 
1814 	if (quiet || total == 0 || (broken == 0 && verbose <= 0))
1815 		return;
1816 
1817 	total += broken;
1818 	fprintf(lock_output, "\n=== output for debug ===\n\n");
1819 	fprintf(lock_output, "bad: %d, total: %d\n", broken, total);
1820 	fprintf(lock_output, "bad rate: %.2f %%\n", 100.0 * broken / total);
1821 
1822 	fprintf(lock_output, "histogram of failure reasons\n");
1823 	fprintf(lock_output, " %10s: %d\n", "task", fails->task);
1824 	fprintf(lock_output, " %10s: %d\n", "stack", fails->stack);
1825 	fprintf(lock_output, " %10s: %d\n", "time", fails->time);
1826 	fprintf(lock_output, " %10s: %d\n", "data", fails->data);
1827 }
1828 
print_footer_csv(int total,int bad,struct lock_contention_fails * fails,const char * sep)1829 static void print_footer_csv(int total, int bad, struct lock_contention_fails *fails,
1830 			     const char *sep)
1831 {
1832 	/* Output for debug, this have to be removed */
1833 	if (use_bpf)
1834 		bad = fails->task + fails->stack + fails->time + fails->data;
1835 
1836 	if (quiet || total == 0 || (bad == 0 && verbose <= 0))
1837 		return;
1838 
1839 	total += bad;
1840 	fprintf(lock_output, "# debug: total=%d%s bad=%d", total, sep, bad);
1841 
1842 	if (use_bpf) {
1843 		fprintf(lock_output, "%s bad_%s=%d", sep, "task", fails->task);
1844 		fprintf(lock_output, "%s bad_%s=%d", sep, "stack", fails->stack);
1845 		fprintf(lock_output, "%s bad_%s=%d", sep, "time", fails->time);
1846 		fprintf(lock_output, "%s bad_%s=%d", sep, "data", fails->data);
1847 	} else {
1848 		int i;
1849 		const char *name[4] = { "acquire", "acquired", "contended", "release" };
1850 
1851 		for (i = 0; i < BROKEN_MAX; i++)
1852 			fprintf(lock_output, "%s bad_%s=%d", sep, name[i], bad_hist[i]);
1853 	}
1854 	fprintf(lock_output, "\n");
1855 }
1856 
print_footer(int total,int bad,struct lock_contention_fails * fails)1857 static void print_footer(int total, int bad, struct lock_contention_fails *fails)
1858 {
1859 	if (symbol_conf.field_sep)
1860 		print_footer_csv(total, bad, fails, symbol_conf.field_sep);
1861 	else
1862 		print_footer_stdio(total, bad, fails);
1863 }
1864 
print_contention_result(struct lock_contention * con)1865 static void print_contention_result(struct lock_contention *con)
1866 {
1867 	struct lock_stat *st;
1868 	int bad, total, printed;
1869 
1870 	if (!quiet)
1871 		print_header();
1872 
1873 	bad = total = printed = 0;
1874 
1875 	while ((st = pop_from_result())) {
1876 		total += use_bpf ? st->nr_contended : 1;
1877 		if (st->broken)
1878 			bad++;
1879 
1880 		if (!st->wait_time_total)
1881 			continue;
1882 
1883 		print_lock_stat(con, st);
1884 
1885 		if (++printed >= print_nr_entries)
1886 			break;
1887 	}
1888 
1889 	if (print_nr_entries) {
1890 		/* update the total/bad stats */
1891 		while ((st = pop_from_result())) {
1892 			total += use_bpf ? st->nr_contended : 1;
1893 			if (st->broken)
1894 				bad++;
1895 		}
1896 	}
1897 	/* some entries are collected but hidden by the callstack filter */
1898 	total += con->nr_filtered;
1899 
1900 	print_footer(total, bad, &con->fails);
1901 }
1902 
1903 static bool force;
1904 
__cmd_report(bool display_info)1905 static int __cmd_report(bool display_info)
1906 {
1907 	int err = -EINVAL;
1908 	struct perf_tool eops = {
1909 		.attr		 = perf_event__process_attr,
1910 		.event_update	 = process_event_update,
1911 		.sample		 = process_sample_event,
1912 		.comm		 = perf_event__process_comm,
1913 		.mmap		 = perf_event__process_mmap,
1914 		.namespaces	 = perf_event__process_namespaces,
1915 		.tracing_data	 = perf_event__process_tracing_data,
1916 		.ordered_events	 = true,
1917 	};
1918 	struct perf_data data = {
1919 		.path  = input_name,
1920 		.mode  = PERF_DATA_MODE_READ,
1921 		.force = force,
1922 	};
1923 
1924 	session = perf_session__new(&data, &eops);
1925 	if (IS_ERR(session)) {
1926 		pr_err("Initializing perf session failed\n");
1927 		return PTR_ERR(session);
1928 	}
1929 
1930 	symbol_conf.allow_aliases = true;
1931 	symbol__init(&session->header.env);
1932 
1933 	if (!data.is_pipe) {
1934 		if (!perf_session__has_traces(session, "lock record"))
1935 			goto out_delete;
1936 
1937 		if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) {
1938 			pr_err("Initializing perf session tracepoint handlers failed\n");
1939 			goto out_delete;
1940 		}
1941 
1942 		if (perf_session__set_tracepoints_handlers(session, contention_tracepoints)) {
1943 			pr_err("Initializing perf session tracepoint handlers failed\n");
1944 			goto out_delete;
1945 		}
1946 	}
1947 
1948 	if (setup_output_field(false, output_fields))
1949 		goto out_delete;
1950 
1951 	if (select_key(false))
1952 		goto out_delete;
1953 
1954 	if (show_thread_stats)
1955 		aggr_mode = LOCK_AGGR_TASK;
1956 
1957 	err = perf_session__process_events(session);
1958 	if (err)
1959 		goto out_delete;
1960 
1961 	setup_pager();
1962 	if (display_info) /* used for info subcommand */
1963 		err = dump_info();
1964 	else {
1965 		combine_result();
1966 		sort_result();
1967 		print_result();
1968 	}
1969 
1970 out_delete:
1971 	perf_session__delete(session);
1972 	return err;
1973 }
1974 
sighandler(int sig __maybe_unused)1975 static void sighandler(int sig __maybe_unused)
1976 {
1977 }
1978 
check_lock_contention_options(const struct option * options,const char * const * usage)1979 static int check_lock_contention_options(const struct option *options,
1980 					 const char * const *usage)
1981 
1982 {
1983 	if (show_thread_stats && show_lock_addrs) {
1984 		pr_err("Cannot use thread and addr mode together\n");
1985 		parse_options_usage(usage, options, "threads", 0);
1986 		parse_options_usage(NULL, options, "lock-addr", 0);
1987 		return -1;
1988 	}
1989 
1990 	if (show_lock_owner && !use_bpf) {
1991 		pr_err("Lock owners are available only with BPF\n");
1992 		parse_options_usage(usage, options, "lock-owner", 0);
1993 		parse_options_usage(NULL, options, "use-bpf", 0);
1994 		return -1;
1995 	}
1996 
1997 	if (show_lock_owner && show_lock_addrs) {
1998 		pr_err("Cannot use owner and addr mode together\n");
1999 		parse_options_usage(usage, options, "lock-owner", 0);
2000 		parse_options_usage(NULL, options, "lock-addr", 0);
2001 		return -1;
2002 	}
2003 
2004 	if (symbol_conf.field_sep) {
2005 		if (strstr(symbol_conf.field_sep, ":") || /* part of type flags */
2006 		    strstr(symbol_conf.field_sep, "+") || /* part of caller offset */
2007 		    strstr(symbol_conf.field_sep, ".")) { /* can be in a symbol name */
2008 			pr_err("Cannot use the separator that is already used\n");
2009 			parse_options_usage(usage, options, "x", 1);
2010 			return -1;
2011 		}
2012 	}
2013 
2014 	if (show_lock_owner)
2015 		show_thread_stats = true;
2016 
2017 	return 0;
2018 }
2019 
__cmd_contention(int argc,const char ** argv)2020 static int __cmd_contention(int argc, const char **argv)
2021 {
2022 	int err = -EINVAL;
2023 	struct perf_tool eops = {
2024 		.attr		 = perf_event__process_attr,
2025 		.event_update	 = process_event_update,
2026 		.sample		 = process_sample_event,
2027 		.comm		 = perf_event__process_comm,
2028 		.mmap		 = perf_event__process_mmap,
2029 		.tracing_data	 = perf_event__process_tracing_data,
2030 		.ordered_events	 = true,
2031 	};
2032 	struct perf_data data = {
2033 		.path  = input_name,
2034 		.mode  = PERF_DATA_MODE_READ,
2035 		.force = force,
2036 	};
2037 	struct lock_contention con = {
2038 		.target = &target,
2039 		.map_nr_entries = bpf_map_entries,
2040 		.max_stack = max_stack_depth,
2041 		.stack_skip = stack_skip,
2042 		.filters = &filters,
2043 		.save_callstack = needs_callstack(),
2044 		.owner = show_lock_owner,
2045 	};
2046 
2047 	lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table));
2048 	if (!lockhash_table)
2049 		return -ENOMEM;
2050 
2051 	con.result = &lockhash_table[0];
2052 
2053 	session = perf_session__new(use_bpf ? NULL : &data, &eops);
2054 	if (IS_ERR(session)) {
2055 		pr_err("Initializing perf session failed\n");
2056 		err = PTR_ERR(session);
2057 		session = NULL;
2058 		goto out_delete;
2059 	}
2060 
2061 	con.machine = &session->machines.host;
2062 
2063 	con.aggr_mode = aggr_mode = show_thread_stats ? LOCK_AGGR_TASK :
2064 		show_lock_addrs ? LOCK_AGGR_ADDR : LOCK_AGGR_CALLER;
2065 
2066 	if (con.aggr_mode == LOCK_AGGR_CALLER)
2067 		con.save_callstack = true;
2068 
2069 	symbol_conf.allow_aliases = true;
2070 	symbol__init(&session->header.env);
2071 
2072 	if (use_bpf) {
2073 		err = target__validate(&target);
2074 		if (err) {
2075 			char errbuf[512];
2076 
2077 			target__strerror(&target, err, errbuf, 512);
2078 			pr_err("%s\n", errbuf);
2079 			goto out_delete;
2080 		}
2081 
2082 		signal(SIGINT, sighandler);
2083 		signal(SIGCHLD, sighandler);
2084 		signal(SIGTERM, sighandler);
2085 
2086 		con.evlist = evlist__new();
2087 		if (con.evlist == NULL) {
2088 			err = -ENOMEM;
2089 			goto out_delete;
2090 		}
2091 
2092 		err = evlist__create_maps(con.evlist, &target);
2093 		if (err < 0)
2094 			goto out_delete;
2095 
2096 		if (argc) {
2097 			err = evlist__prepare_workload(con.evlist, &target,
2098 						       argv, false, NULL);
2099 			if (err < 0)
2100 				goto out_delete;
2101 		}
2102 
2103 		if (lock_contention_prepare(&con) < 0) {
2104 			pr_err("lock contention BPF setup failed\n");
2105 			goto out_delete;
2106 		}
2107 	} else if (!data.is_pipe) {
2108 		if (!perf_session__has_traces(session, "lock record"))
2109 			goto out_delete;
2110 
2111 		if (!evlist__find_evsel_by_str(session->evlist,
2112 					       "lock:contention_begin")) {
2113 			pr_err("lock contention evsel not found\n");
2114 			goto out_delete;
2115 		}
2116 
2117 		if (perf_session__set_tracepoints_handlers(session,
2118 						contention_tracepoints)) {
2119 			pr_err("Initializing perf session tracepoint handlers failed\n");
2120 			goto out_delete;
2121 		}
2122 	}
2123 
2124 	if (setup_output_field(true, output_fields))
2125 		goto out_delete;
2126 
2127 	if (select_key(true))
2128 		goto out_delete;
2129 
2130 	if (symbol_conf.field_sep) {
2131 		int i;
2132 		struct lock_key *keys = contention_keys;
2133 
2134 		/* do not align output in CSV format */
2135 		for (i = 0; keys[i].name; i++)
2136 			keys[i].len = 0;
2137 	}
2138 
2139 	if (use_bpf) {
2140 		lock_contention_start();
2141 		if (argc)
2142 			evlist__start_workload(con.evlist);
2143 
2144 		/* wait for signal */
2145 		pause();
2146 
2147 		lock_contention_stop();
2148 		lock_contention_read(&con);
2149 	} else {
2150 		err = perf_session__process_events(session);
2151 		if (err)
2152 			goto out_delete;
2153 	}
2154 
2155 	setup_pager();
2156 
2157 	sort_contention_result();
2158 	print_contention_result(&con);
2159 
2160 out_delete:
2161 	lock_filter_finish();
2162 	evlist__delete(con.evlist);
2163 	lock_contention_finish();
2164 	perf_session__delete(session);
2165 	zfree(&lockhash_table);
2166 	return err;
2167 }
2168 
2169 
__cmd_record(int argc,const char ** argv)2170 static int __cmd_record(int argc, const char **argv)
2171 {
2172 	const char *record_args[] = {
2173 		"record", "-R", "-m", "1024", "-c", "1", "--synth", "task",
2174 	};
2175 	const char *callgraph_args[] = {
2176 		"--call-graph", "fp," __stringify(CONTENTION_STACK_DEPTH),
2177 	};
2178 	unsigned int rec_argc, i, j, ret;
2179 	unsigned int nr_tracepoints;
2180 	unsigned int nr_callgraph_args = 0;
2181 	const char **rec_argv;
2182 	bool has_lock_stat = true;
2183 
2184 	for (i = 0; i < ARRAY_SIZE(lock_tracepoints); i++) {
2185 		if (!is_valid_tracepoint(lock_tracepoints[i].name)) {
2186 			pr_debug("tracepoint %s is not enabled. "
2187 				 "Are CONFIG_LOCKDEP and CONFIG_LOCK_STAT enabled?\n",
2188 				 lock_tracepoints[i].name);
2189 			has_lock_stat = false;
2190 			break;
2191 		}
2192 	}
2193 
2194 	if (has_lock_stat)
2195 		goto setup_args;
2196 
2197 	for (i = 0; i < ARRAY_SIZE(contention_tracepoints); i++) {
2198 		if (!is_valid_tracepoint(contention_tracepoints[i].name)) {
2199 			pr_err("tracepoint %s is not enabled.\n",
2200 			       contention_tracepoints[i].name);
2201 			return 1;
2202 		}
2203 	}
2204 
2205 	nr_callgraph_args = ARRAY_SIZE(callgraph_args);
2206 
2207 setup_args:
2208 	rec_argc = ARRAY_SIZE(record_args) + nr_callgraph_args + argc - 1;
2209 
2210 	if (has_lock_stat)
2211 		nr_tracepoints = ARRAY_SIZE(lock_tracepoints);
2212 	else
2213 		nr_tracepoints = ARRAY_SIZE(contention_tracepoints);
2214 
2215 	/* factor of 2 is for -e in front of each tracepoint */
2216 	rec_argc += 2 * nr_tracepoints;
2217 
2218 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
2219 	if (!rec_argv)
2220 		return -ENOMEM;
2221 
2222 	for (i = 0; i < ARRAY_SIZE(record_args); i++)
2223 		rec_argv[i] = strdup(record_args[i]);
2224 
2225 	for (j = 0; j < nr_tracepoints; j++) {
2226 		const char *ev_name;
2227 
2228 		if (has_lock_stat)
2229 			ev_name = strdup(lock_tracepoints[j].name);
2230 		else
2231 			ev_name = strdup(contention_tracepoints[j].name);
2232 
2233 		if (!ev_name)
2234 			return -ENOMEM;
2235 
2236 		rec_argv[i++] = "-e";
2237 		rec_argv[i++] = ev_name;
2238 	}
2239 
2240 	for (j = 0; j < nr_callgraph_args; j++, i++)
2241 		rec_argv[i] = callgraph_args[j];
2242 
2243 	for (j = 1; j < (unsigned int)argc; j++, i++)
2244 		rec_argv[i] = argv[j];
2245 
2246 	BUG_ON(i != rec_argc);
2247 
2248 	ret = cmd_record(i, rec_argv);
2249 	free(rec_argv);
2250 	return ret;
2251 }
2252 
parse_map_entry(const struct option * opt,const char * str,int unset __maybe_unused)2253 static int parse_map_entry(const struct option *opt, const char *str,
2254 			    int unset __maybe_unused)
2255 {
2256 	unsigned long *len = (unsigned long *)opt->value;
2257 	unsigned long val;
2258 	char *endptr;
2259 
2260 	errno = 0;
2261 	val = strtoul(str, &endptr, 0);
2262 	if (*endptr != '\0' || errno != 0) {
2263 		pr_err("invalid BPF map length: %s\n", str);
2264 		return -1;
2265 	}
2266 
2267 	*len = val;
2268 	return 0;
2269 }
2270 
parse_max_stack(const struct option * opt,const char * str,int unset __maybe_unused)2271 static int parse_max_stack(const struct option *opt, const char *str,
2272 			   int unset __maybe_unused)
2273 {
2274 	unsigned long *len = (unsigned long *)opt->value;
2275 	long val;
2276 	char *endptr;
2277 
2278 	errno = 0;
2279 	val = strtol(str, &endptr, 0);
2280 	if (*endptr != '\0' || errno != 0) {
2281 		pr_err("invalid max stack depth: %s\n", str);
2282 		return -1;
2283 	}
2284 
2285 	if (val < 0 || val > sysctl__max_stack()) {
2286 		pr_err("invalid max stack depth: %ld\n", val);
2287 		return -1;
2288 	}
2289 
2290 	*len = val;
2291 	return 0;
2292 }
2293 
add_lock_type(unsigned int flags)2294 static bool add_lock_type(unsigned int flags)
2295 {
2296 	unsigned int *tmp;
2297 
2298 	tmp = realloc(filters.types, (filters.nr_types + 1) * sizeof(*filters.types));
2299 	if (tmp == NULL)
2300 		return false;
2301 
2302 	tmp[filters.nr_types++] = flags;
2303 	filters.types = tmp;
2304 	return true;
2305 }
2306 
parse_lock_type(const struct option * opt __maybe_unused,const char * str,int unset __maybe_unused)2307 static int parse_lock_type(const struct option *opt __maybe_unused, const char *str,
2308 			   int unset __maybe_unused)
2309 {
2310 	char *s, *tmp, *tok;
2311 
2312 	s = strdup(str);
2313 	if (s == NULL)
2314 		return -1;
2315 
2316 	for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2317 		bool found = false;
2318 
2319 		/* `tok` is `str` in `lock_type_table` if it contains ':'. */
2320 		if (strchr(tok, ':')) {
2321 			for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
2322 				if (!strcmp(lock_type_table[i].str, tok) &&
2323 				    add_lock_type(lock_type_table[i].flags)) {
2324 					found = true;
2325 					break;
2326 				}
2327 			}
2328 
2329 			if (!found) {
2330 				pr_err("Unknown lock flags name: %s\n", tok);
2331 				free(s);
2332 				return -1;
2333 			}
2334 
2335 			continue;
2336 		}
2337 
2338 		/*
2339 		 * Otherwise `tok` is `name` in `lock_type_table`.
2340 		 * Single lock name could contain multiple flags.
2341 		 */
2342 		for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
2343 			if (!strcmp(lock_type_table[i].name, tok)) {
2344 				if (add_lock_type(lock_type_table[i].flags)) {
2345 					found = true;
2346 				} else {
2347 					free(s);
2348 					return -1;
2349 				}
2350 			}
2351 		}
2352 
2353 		if (!found) {
2354 			pr_err("Unknown lock name: %s\n", tok);
2355 			free(s);
2356 			return -1;
2357 		}
2358 
2359 	}
2360 
2361 	free(s);
2362 	return 0;
2363 }
2364 
add_lock_addr(unsigned long addr)2365 static bool add_lock_addr(unsigned long addr)
2366 {
2367 	unsigned long *tmp;
2368 
2369 	tmp = realloc(filters.addrs, (filters.nr_addrs + 1) * sizeof(*filters.addrs));
2370 	if (tmp == NULL) {
2371 		pr_err("Memory allocation failure\n");
2372 		return false;
2373 	}
2374 
2375 	tmp[filters.nr_addrs++] = addr;
2376 	filters.addrs = tmp;
2377 	return true;
2378 }
2379 
add_lock_sym(char * name)2380 static bool add_lock_sym(char *name)
2381 {
2382 	char **tmp;
2383 	char *sym = strdup(name);
2384 
2385 	if (sym == NULL) {
2386 		pr_err("Memory allocation failure\n");
2387 		return false;
2388 	}
2389 
2390 	tmp = realloc(filters.syms, (filters.nr_syms + 1) * sizeof(*filters.syms));
2391 	if (tmp == NULL) {
2392 		pr_err("Memory allocation failure\n");
2393 		free(sym);
2394 		return false;
2395 	}
2396 
2397 	tmp[filters.nr_syms++] = sym;
2398 	filters.syms = tmp;
2399 	return true;
2400 }
2401 
parse_lock_addr(const struct option * opt __maybe_unused,const char * str,int unset __maybe_unused)2402 static int parse_lock_addr(const struct option *opt __maybe_unused, const char *str,
2403 			   int unset __maybe_unused)
2404 {
2405 	char *s, *tmp, *tok;
2406 	int ret = 0;
2407 	u64 addr;
2408 
2409 	s = strdup(str);
2410 	if (s == NULL)
2411 		return -1;
2412 
2413 	for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2414 		char *end;
2415 
2416 		addr = strtoul(tok, &end, 16);
2417 		if (*end == '\0') {
2418 			if (!add_lock_addr(addr)) {
2419 				ret = -1;
2420 				break;
2421 			}
2422 			continue;
2423 		}
2424 
2425 		/*
2426 		 * At this moment, we don't have kernel symbols.  Save the symbols
2427 		 * in a separate list and resolve them to addresses later.
2428 		 */
2429 		if (!add_lock_sym(tok)) {
2430 			ret = -1;
2431 			break;
2432 		}
2433 	}
2434 
2435 	free(s);
2436 	return ret;
2437 }
2438 
parse_call_stack(const struct option * opt __maybe_unused,const char * str,int unset __maybe_unused)2439 static int parse_call_stack(const struct option *opt __maybe_unused, const char *str,
2440 			   int unset __maybe_unused)
2441 {
2442 	char *s, *tmp, *tok;
2443 	int ret = 0;
2444 
2445 	s = strdup(str);
2446 	if (s == NULL)
2447 		return -1;
2448 
2449 	for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2450 		struct callstack_filter *entry;
2451 
2452 		entry = malloc(sizeof(*entry) + strlen(tok) + 1);
2453 		if (entry == NULL) {
2454 			pr_err("Memory allocation failure\n");
2455 			return -1;
2456 		}
2457 
2458 		strcpy(entry->name, tok);
2459 		list_add_tail(&entry->list, &callstack_filters);
2460 	}
2461 
2462 	free(s);
2463 	return ret;
2464 }
2465 
parse_output(const struct option * opt __maybe_unused,const char * str,int unset __maybe_unused)2466 static int parse_output(const struct option *opt __maybe_unused, const char *str,
2467 			int unset __maybe_unused)
2468 {
2469 	const char **name = (const char **)opt->value;
2470 
2471 	if (str == NULL)
2472 		return -1;
2473 
2474 	lock_output = fopen(str, "w");
2475 	if (lock_output == NULL) {
2476 		pr_err("Cannot open %s\n", str);
2477 		return -1;
2478 	}
2479 
2480 	*name = str;
2481 	return 0;
2482 }
2483 
cmd_lock(int argc,const char ** argv)2484 int cmd_lock(int argc, const char **argv)
2485 {
2486 	const struct option lock_options[] = {
2487 	OPT_STRING('i', "input", &input_name, "file", "input file name"),
2488 	OPT_CALLBACK(0, "output", &output_name, "file", "output file name", parse_output),
2489 	OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"),
2490 	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"),
2491 	OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
2492 	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
2493 		   "file", "vmlinux pathname"),
2494 	OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
2495 		   "file", "kallsyms pathname"),
2496 	OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any warnings or messages"),
2497 	OPT_END()
2498 	};
2499 
2500 	const struct option info_options[] = {
2501 	OPT_BOOLEAN('t', "threads", &info_threads,
2502 		    "dump thread list in perf.data"),
2503 	OPT_BOOLEAN('m', "map", &info_map,
2504 		    "map of lock instances (address:name table)"),
2505 	OPT_PARENT(lock_options)
2506 	};
2507 
2508 	const struct option report_options[] = {
2509 	OPT_STRING('k', "key", &sort_key, "acquired",
2510 		    "key for sorting (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"),
2511 	OPT_STRING('F', "field", &output_fields, NULL,
2512 		    "output fields (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"),
2513 	/* TODO: type */
2514 	OPT_BOOLEAN('c', "combine-locks", &combine_locks,
2515 		    "combine locks in the same class"),
2516 	OPT_BOOLEAN('t', "threads", &show_thread_stats,
2517 		    "show per-thread lock stats"),
2518 	OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"),
2519 	OPT_PARENT(lock_options)
2520 	};
2521 
2522 	struct option contention_options[] = {
2523 	OPT_STRING('k', "key", &sort_key, "wait_total",
2524 		    "key for sorting (contended / wait_total / wait_max / wait_min / avg_wait)"),
2525 	OPT_STRING('F', "field", &output_fields, "contended,wait_total,wait_max,avg_wait",
2526 		    "output fields (contended / wait_total / wait_max / wait_min / avg_wait)"),
2527 	OPT_BOOLEAN('t', "threads", &show_thread_stats,
2528 		    "show per-thread lock stats"),
2529 	OPT_BOOLEAN('b', "use-bpf", &use_bpf, "use BPF program to collect lock contention stats"),
2530 	OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
2531 		    "System-wide collection from all CPUs"),
2532 	OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
2533 		    "List of cpus to monitor"),
2534 	OPT_STRING('p', "pid", &target.pid, "pid",
2535 		   "Trace on existing process id"),
2536 	OPT_STRING(0, "tid", &target.tid, "tid",
2537 		   "Trace on existing thread id (exclusive to --pid)"),
2538 	OPT_CALLBACK('M', "map-nr-entries", &bpf_map_entries, "num",
2539 		     "Max number of BPF map entries", parse_map_entry),
2540 	OPT_CALLBACK(0, "max-stack", &max_stack_depth, "num",
2541 		     "Set the maximum stack depth when collecting lock contention, "
2542 		     "Default: " __stringify(CONTENTION_STACK_DEPTH), parse_max_stack),
2543 	OPT_INTEGER(0, "stack-skip", &stack_skip,
2544 		    "Set the number of stack depth to skip when finding a lock caller, "
2545 		    "Default: " __stringify(CONTENTION_STACK_SKIP)),
2546 	OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"),
2547 	OPT_BOOLEAN('l', "lock-addr", &show_lock_addrs, "show lock stats by address"),
2548 	OPT_CALLBACK('Y', "type-filter", NULL, "FLAGS",
2549 		     "Filter specific type of locks", parse_lock_type),
2550 	OPT_CALLBACK('L', "lock-filter", NULL, "ADDRS/NAMES",
2551 		     "Filter specific address/symbol of locks", parse_lock_addr),
2552 	OPT_CALLBACK('S', "callstack-filter", NULL, "NAMES",
2553 		     "Filter specific function in the callstack", parse_call_stack),
2554 	OPT_BOOLEAN('o', "lock-owner", &show_lock_owner, "show lock owners instead of waiters"),
2555 	OPT_STRING_NOEMPTY('x', "field-separator", &symbol_conf.field_sep, "separator",
2556 		   "print result in CSV format with custom separator"),
2557 	OPT_PARENT(lock_options)
2558 	};
2559 
2560 	const char * const info_usage[] = {
2561 		"perf lock info [<options>]",
2562 		NULL
2563 	};
2564 	const char *const lock_subcommands[] = { "record", "report", "script",
2565 						 "info", "contention", NULL };
2566 	const char *lock_usage[] = {
2567 		NULL,
2568 		NULL
2569 	};
2570 	const char * const report_usage[] = {
2571 		"perf lock report [<options>]",
2572 		NULL
2573 	};
2574 	const char * const contention_usage[] = {
2575 		"perf lock contention [<options>]",
2576 		NULL
2577 	};
2578 	unsigned int i;
2579 	int rc = 0;
2580 
2581 	lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table));
2582 	if (!lockhash_table)
2583 		return -ENOMEM;
2584 
2585 	for (i = 0; i < LOCKHASH_SIZE; i++)
2586 		INIT_HLIST_HEAD(lockhash_table + i);
2587 
2588 	lock_output = stderr;
2589 	argc = parse_options_subcommand(argc, argv, lock_options, lock_subcommands,
2590 					lock_usage, PARSE_OPT_STOP_AT_NON_OPTION);
2591 	if (!argc)
2592 		usage_with_options(lock_usage, lock_options);
2593 
2594 	if (strlen(argv[0]) > 2 && strstarts("record", argv[0])) {
2595 		return __cmd_record(argc, argv);
2596 	} else if (strlen(argv[0]) > 2 && strstarts("report", argv[0])) {
2597 		trace_handler = &report_lock_ops;
2598 		if (argc) {
2599 			argc = parse_options(argc, argv,
2600 					     report_options, report_usage, 0);
2601 			if (argc)
2602 				usage_with_options(report_usage, report_options);
2603 		}
2604 		rc = __cmd_report(false);
2605 	} else if (!strcmp(argv[0], "script")) {
2606 		/* Aliased to 'perf script' */
2607 		rc = cmd_script(argc, argv);
2608 	} else if (!strcmp(argv[0], "info")) {
2609 		if (argc) {
2610 			argc = parse_options(argc, argv,
2611 					     info_options, info_usage, 0);
2612 			if (argc)
2613 				usage_with_options(info_usage, info_options);
2614 		}
2615 		/* recycling report_lock_ops */
2616 		trace_handler = &report_lock_ops;
2617 		rc = __cmd_report(true);
2618 	} else if (strlen(argv[0]) > 2 && strstarts("contention", argv[0])) {
2619 		trace_handler = &contention_lock_ops;
2620 		sort_key = "wait_total";
2621 		output_fields = "contended,wait_total,wait_max,avg_wait";
2622 
2623 #ifndef HAVE_BPF_SKEL
2624 		set_option_nobuild(contention_options, 'b', "use-bpf",
2625 				   "no BUILD_BPF_SKEL=1", false);
2626 #endif
2627 		if (argc) {
2628 			argc = parse_options(argc, argv, contention_options,
2629 					     contention_usage, 0);
2630 		}
2631 
2632 		if (check_lock_contention_options(contention_options,
2633 						  contention_usage) < 0)
2634 			return -1;
2635 
2636 		rc = __cmd_contention(argc, argv);
2637 	} else {
2638 		usage_with_options(lock_usage, lock_options);
2639 	}
2640 
2641 	/* free usage string allocated by parse_options_subcommand */
2642 	free((void *)lock_usage[0]);
2643 
2644 	zfree(&lockhash_table);
2645 	return rc;
2646 }
2647