xref: /openbmc/linux/tools/perf/builtin-lock.c (revision eca949b2b4addd941d369d4c2014b87f3c3e203b)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <errno.h>
3 #include <inttypes.h>
4 #include "builtin.h"
5 #include "perf.h"
6 
7 #include "util/evlist.h" // for struct evsel_str_handler
8 #include "util/evsel.h"
9 #include "util/symbol.h"
10 #include "util/thread.h"
11 #include "util/header.h"
12 #include "util/target.h"
13 #include "util/callchain.h"
14 #include "util/lock-contention.h"
15 #include "util/bpf_skel/lock_data.h"
16 
17 #include <subcmd/pager.h>
18 #include <subcmd/parse-options.h>
19 #include "util/trace-event.h"
20 #include "util/tracepoint.h"
21 
22 #include "util/debug.h"
23 #include "util/session.h"
24 #include "util/tool.h"
25 #include "util/data.h"
26 #include "util/string2.h"
27 #include "util/map.h"
28 #include "util/util.h"
29 
30 #include <sys/types.h>
31 #include <sys/prctl.h>
32 #include <semaphore.h>
33 #include <math.h>
34 #include <limits.h>
35 
36 #include <linux/list.h>
37 #include <linux/hash.h>
38 #include <linux/kernel.h>
39 #include <linux/zalloc.h>
40 #include <linux/err.h>
41 #include <linux/stringify.h>
42 
43 static struct perf_session *session;
44 static struct target target;
45 
46 /* based on kernel/lockdep.c */
47 #define LOCKHASH_BITS		12
48 #define LOCKHASH_SIZE		(1UL << LOCKHASH_BITS)
49 
50 static struct hlist_head lockhash_table[LOCKHASH_SIZE];
51 
52 #define __lockhashfn(key)	hash_long((unsigned long)key, LOCKHASH_BITS)
53 #define lockhashentry(key)	(lockhash_table + __lockhashfn((key)))
54 
55 static struct rb_root		thread_stats;
56 
57 static bool combine_locks;
58 static bool show_thread_stats;
59 static bool use_bpf;
60 static unsigned long bpf_map_entries = 10240;
61 static int max_stack_depth = CONTENTION_STACK_DEPTH;
62 static int stack_skip = CONTENTION_STACK_SKIP;
63 static int print_nr_entries = INT_MAX / 2;
64 
65 static enum lock_aggr_mode aggr_mode = LOCK_AGGR_ADDR;
66 
67 static struct thread_stat *thread_stat_find(u32 tid)
68 {
69 	struct rb_node *node;
70 	struct thread_stat *st;
71 
72 	node = thread_stats.rb_node;
73 	while (node) {
74 		st = container_of(node, struct thread_stat, rb);
75 		if (st->tid == tid)
76 			return st;
77 		else if (tid < st->tid)
78 			node = node->rb_left;
79 		else
80 			node = node->rb_right;
81 	}
82 
83 	return NULL;
84 }
85 
86 static void thread_stat_insert(struct thread_stat *new)
87 {
88 	struct rb_node **rb = &thread_stats.rb_node;
89 	struct rb_node *parent = NULL;
90 	struct thread_stat *p;
91 
92 	while (*rb) {
93 		p = container_of(*rb, struct thread_stat, rb);
94 		parent = *rb;
95 
96 		if (new->tid < p->tid)
97 			rb = &(*rb)->rb_left;
98 		else if (new->tid > p->tid)
99 			rb = &(*rb)->rb_right;
100 		else
101 			BUG_ON("inserting invalid thread_stat\n");
102 	}
103 
104 	rb_link_node(&new->rb, parent, rb);
105 	rb_insert_color(&new->rb, &thread_stats);
106 }
107 
108 static struct thread_stat *thread_stat_findnew_after_first(u32 tid)
109 {
110 	struct thread_stat *st;
111 
112 	st = thread_stat_find(tid);
113 	if (st)
114 		return st;
115 
116 	st = zalloc(sizeof(struct thread_stat));
117 	if (!st) {
118 		pr_err("memory allocation failed\n");
119 		return NULL;
120 	}
121 
122 	st->tid = tid;
123 	INIT_LIST_HEAD(&st->seq_list);
124 
125 	thread_stat_insert(st);
126 
127 	return st;
128 }
129 
130 static struct thread_stat *thread_stat_findnew_first(u32 tid);
131 static struct thread_stat *(*thread_stat_findnew)(u32 tid) =
132 	thread_stat_findnew_first;
133 
134 static struct thread_stat *thread_stat_findnew_first(u32 tid)
135 {
136 	struct thread_stat *st;
137 
138 	st = zalloc(sizeof(struct thread_stat));
139 	if (!st) {
140 		pr_err("memory allocation failed\n");
141 		return NULL;
142 	}
143 	st->tid = tid;
144 	INIT_LIST_HEAD(&st->seq_list);
145 
146 	rb_link_node(&st->rb, NULL, &thread_stats.rb_node);
147 	rb_insert_color(&st->rb, &thread_stats);
148 
149 	thread_stat_findnew = thread_stat_findnew_after_first;
150 	return st;
151 }
152 
153 /* build simple key function one is bigger than two */
154 #define SINGLE_KEY(member)						\
155 	static int lock_stat_key_ ## member(struct lock_stat *one,	\
156 					 struct lock_stat *two)		\
157 	{								\
158 		return one->member > two->member;			\
159 	}
160 
161 SINGLE_KEY(nr_acquired)
162 SINGLE_KEY(nr_contended)
163 SINGLE_KEY(avg_wait_time)
164 SINGLE_KEY(wait_time_total)
165 SINGLE_KEY(wait_time_max)
166 
167 static int lock_stat_key_wait_time_min(struct lock_stat *one,
168 					struct lock_stat *two)
169 {
170 	u64 s1 = one->wait_time_min;
171 	u64 s2 = two->wait_time_min;
172 	if (s1 == ULLONG_MAX)
173 		s1 = 0;
174 	if (s2 == ULLONG_MAX)
175 		s2 = 0;
176 	return s1 > s2;
177 }
178 
179 struct lock_key {
180 	/*
181 	 * name: the value for specify by user
182 	 * this should be simpler than raw name of member
183 	 * e.g. nr_acquired -> acquired, wait_time_total -> wait_total
184 	 */
185 	const char		*name;
186 	/* header: the string printed on the header line */
187 	const char		*header;
188 	/* len: the printing width of the field */
189 	int			len;
190 	/* key: a pointer to function to compare two lock stats for sorting */
191 	int			(*key)(struct lock_stat*, struct lock_stat*);
192 	/* print: a pointer to function to print a given lock stats */
193 	void			(*print)(struct lock_key*, struct lock_stat*);
194 	/* list: list entry to link this */
195 	struct list_head	list;
196 };
197 
198 static void lock_stat_key_print_time(unsigned long long nsec, int len)
199 {
200 	static const struct {
201 		float base;
202 		const char *unit;
203 	} table[] = {
204 		{ 1e9 * 3600, "h " },
205 		{ 1e9 * 60, "m " },
206 		{ 1e9, "s " },
207 		{ 1e6, "ms" },
208 		{ 1e3, "us" },
209 		{ 0, NULL },
210 	};
211 
212 	for (int i = 0; table[i].unit; i++) {
213 		if (nsec < table[i].base)
214 			continue;
215 
216 		pr_info("%*.2f %s", len - 3, nsec / table[i].base, table[i].unit);
217 		return;
218 	}
219 
220 	pr_info("%*llu %s", len - 3, nsec, "ns");
221 }
222 
223 #define PRINT_KEY(member)						\
224 static void lock_stat_key_print_ ## member(struct lock_key *key,	\
225 					   struct lock_stat *ls)	\
226 {									\
227 	pr_info("%*llu", key->len, (unsigned long long)ls->member);	\
228 }
229 
230 #define PRINT_TIME(member)						\
231 static void lock_stat_key_print_ ## member(struct lock_key *key,	\
232 					   struct lock_stat *ls)	\
233 {									\
234 	lock_stat_key_print_time((unsigned long long)ls->member, key->len);	\
235 }
236 
237 PRINT_KEY(nr_acquired)
238 PRINT_KEY(nr_contended)
239 PRINT_TIME(avg_wait_time)
240 PRINT_TIME(wait_time_total)
241 PRINT_TIME(wait_time_max)
242 
243 static void lock_stat_key_print_wait_time_min(struct lock_key *key,
244 					      struct lock_stat *ls)
245 {
246 	u64 wait_time = ls->wait_time_min;
247 
248 	if (wait_time == ULLONG_MAX)
249 		wait_time = 0;
250 
251 	lock_stat_key_print_time(wait_time, key->len);
252 }
253 
254 
255 static const char		*sort_key = "acquired";
256 
257 static int			(*compare)(struct lock_stat *, struct lock_stat *);
258 
259 static struct rb_root		sorted; /* place to store intermediate data */
260 static struct rb_root		result;	/* place to store sorted data */
261 
262 static LIST_HEAD(lock_keys);
263 static const char		*output_fields;
264 
265 #define DEF_KEY_LOCK(name, header, fn_suffix, len)			\
266 	{ #name, header, len, lock_stat_key_ ## fn_suffix, lock_stat_key_print_ ## fn_suffix, {} }
267 static struct lock_key report_keys[] = {
268 	DEF_KEY_LOCK(acquired, "acquired", nr_acquired, 10),
269 	DEF_KEY_LOCK(contended, "contended", nr_contended, 10),
270 	DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12),
271 	DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12),
272 	DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12),
273 	DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12),
274 
275 	/* extra comparisons much complicated should be here */
276 	{ }
277 };
278 
279 static struct lock_key contention_keys[] = {
280 	DEF_KEY_LOCK(contended, "contended", nr_contended, 10),
281 	DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12),
282 	DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12),
283 	DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12),
284 	DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12),
285 
286 	/* extra comparisons much complicated should be here */
287 	{ }
288 };
289 
290 static int select_key(bool contention)
291 {
292 	int i;
293 	struct lock_key *keys = report_keys;
294 
295 	if (contention)
296 		keys = contention_keys;
297 
298 	for (i = 0; keys[i].name; i++) {
299 		if (!strcmp(keys[i].name, sort_key)) {
300 			compare = keys[i].key;
301 
302 			/* selected key should be in the output fields */
303 			if (list_empty(&keys[i].list))
304 				list_add_tail(&keys[i].list, &lock_keys);
305 
306 			return 0;
307 		}
308 	}
309 
310 	pr_err("Unknown compare key: %s\n", sort_key);
311 	return -1;
312 }
313 
314 static int add_output_field(bool contention, char *name)
315 {
316 	int i;
317 	struct lock_key *keys = report_keys;
318 
319 	if (contention)
320 		keys = contention_keys;
321 
322 	for (i = 0; keys[i].name; i++) {
323 		if (strcmp(keys[i].name, name))
324 			continue;
325 
326 		/* prevent double link */
327 		if (list_empty(&keys[i].list))
328 			list_add_tail(&keys[i].list, &lock_keys);
329 
330 		return 0;
331 	}
332 
333 	pr_err("Unknown output field: %s\n", name);
334 	return -1;
335 }
336 
337 static int setup_output_field(bool contention, const char *str)
338 {
339 	char *tok, *tmp, *orig;
340 	int i, ret = 0;
341 	struct lock_key *keys = report_keys;
342 
343 	if (contention)
344 		keys = contention_keys;
345 
346 	/* no output field given: use all of them */
347 	if (str == NULL) {
348 		for (i = 0; keys[i].name; i++)
349 			list_add_tail(&keys[i].list, &lock_keys);
350 		return 0;
351 	}
352 
353 	for (i = 0; keys[i].name; i++)
354 		INIT_LIST_HEAD(&keys[i].list);
355 
356 	orig = tmp = strdup(str);
357 	if (orig == NULL)
358 		return -ENOMEM;
359 
360 	while ((tok = strsep(&tmp, ",")) != NULL){
361 		ret = add_output_field(contention, tok);
362 		if (ret < 0)
363 			break;
364 	}
365 	free(orig);
366 
367 	return ret;
368 }
369 
370 static void combine_lock_stats(struct lock_stat *st)
371 {
372 	struct rb_node **rb = &sorted.rb_node;
373 	struct rb_node *parent = NULL;
374 	struct lock_stat *p;
375 	int ret;
376 
377 	while (*rb) {
378 		p = container_of(*rb, struct lock_stat, rb);
379 		parent = *rb;
380 
381 		if (st->name && p->name)
382 			ret = strcmp(st->name, p->name);
383 		else
384 			ret = !!st->name - !!p->name;
385 
386 		if (ret == 0) {
387 			p->nr_acquired += st->nr_acquired;
388 			p->nr_contended += st->nr_contended;
389 			p->wait_time_total += st->wait_time_total;
390 
391 			if (p->nr_contended)
392 				p->avg_wait_time = p->wait_time_total / p->nr_contended;
393 
394 			if (p->wait_time_min > st->wait_time_min)
395 				p->wait_time_min = st->wait_time_min;
396 			if (p->wait_time_max < st->wait_time_max)
397 				p->wait_time_max = st->wait_time_max;
398 
399 			p->broken |= st->broken;
400 			st->combined = 1;
401 			return;
402 		}
403 
404 		if (ret < 0)
405 			rb = &(*rb)->rb_left;
406 		else
407 			rb = &(*rb)->rb_right;
408 	}
409 
410 	rb_link_node(&st->rb, parent, rb);
411 	rb_insert_color(&st->rb, &sorted);
412 }
413 
414 static void insert_to_result(struct lock_stat *st,
415 			     int (*bigger)(struct lock_stat *, struct lock_stat *))
416 {
417 	struct rb_node **rb = &result.rb_node;
418 	struct rb_node *parent = NULL;
419 	struct lock_stat *p;
420 
421 	if (combine_locks && st->combined)
422 		return;
423 
424 	while (*rb) {
425 		p = container_of(*rb, struct lock_stat, rb);
426 		parent = *rb;
427 
428 		if (bigger(st, p))
429 			rb = &(*rb)->rb_left;
430 		else
431 			rb = &(*rb)->rb_right;
432 	}
433 
434 	rb_link_node(&st->rb, parent, rb);
435 	rb_insert_color(&st->rb, &result);
436 }
437 
438 /* returns left most element of result, and erase it */
439 static struct lock_stat *pop_from_result(void)
440 {
441 	struct rb_node *node = result.rb_node;
442 
443 	if (!node)
444 		return NULL;
445 
446 	while (node->rb_left)
447 		node = node->rb_left;
448 
449 	rb_erase(node, &result);
450 	return container_of(node, struct lock_stat, rb);
451 }
452 
453 static struct lock_stat *lock_stat_find(u64 addr)
454 {
455 	struct hlist_head *entry = lockhashentry(addr);
456 	struct lock_stat *ret;
457 
458 	hlist_for_each_entry(ret, entry, hash_entry) {
459 		if (ret->addr == addr)
460 			return ret;
461 	}
462 	return NULL;
463 }
464 
465 static struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags)
466 {
467 	struct hlist_head *entry = lockhashentry(addr);
468 	struct lock_stat *ret, *new;
469 
470 	hlist_for_each_entry(ret, entry, hash_entry) {
471 		if (ret->addr == addr)
472 			return ret;
473 	}
474 
475 	new = zalloc(sizeof(struct lock_stat));
476 	if (!new)
477 		goto alloc_failed;
478 
479 	new->addr = addr;
480 	new->name = strdup(name);
481 	if (!new->name) {
482 		free(new);
483 		goto alloc_failed;
484 	}
485 
486 	new->flags = flags;
487 	new->wait_time_min = ULLONG_MAX;
488 
489 	hlist_add_head(&new->hash_entry, entry);
490 	return new;
491 
492 alloc_failed:
493 	pr_err("memory allocation failed\n");
494 	return NULL;
495 }
496 
497 struct trace_lock_handler {
498 	/* it's used on CONFIG_LOCKDEP */
499 	int (*acquire_event)(struct evsel *evsel,
500 			     struct perf_sample *sample);
501 
502 	/* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */
503 	int (*acquired_event)(struct evsel *evsel,
504 			      struct perf_sample *sample);
505 
506 	/* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */
507 	int (*contended_event)(struct evsel *evsel,
508 			       struct perf_sample *sample);
509 
510 	/* it's used on CONFIG_LOCKDEP */
511 	int (*release_event)(struct evsel *evsel,
512 			     struct perf_sample *sample);
513 
514 	/* it's used when CONFIG_LOCKDEP is off */
515 	int (*contention_begin_event)(struct evsel *evsel,
516 				      struct perf_sample *sample);
517 
518 	/* it's used when CONFIG_LOCKDEP is off */
519 	int (*contention_end_event)(struct evsel *evsel,
520 				    struct perf_sample *sample);
521 };
522 
523 static struct lock_seq_stat *get_seq(struct thread_stat *ts, u64 addr)
524 {
525 	struct lock_seq_stat *seq;
526 
527 	list_for_each_entry(seq, &ts->seq_list, list) {
528 		if (seq->addr == addr)
529 			return seq;
530 	}
531 
532 	seq = zalloc(sizeof(struct lock_seq_stat));
533 	if (!seq) {
534 		pr_err("memory allocation failed\n");
535 		return NULL;
536 	}
537 	seq->state = SEQ_STATE_UNINITIALIZED;
538 	seq->addr = addr;
539 
540 	list_add(&seq->list, &ts->seq_list);
541 	return seq;
542 }
543 
544 enum broken_state {
545 	BROKEN_ACQUIRE,
546 	BROKEN_ACQUIRED,
547 	BROKEN_CONTENDED,
548 	BROKEN_RELEASE,
549 	BROKEN_MAX,
550 };
551 
552 static int bad_hist[BROKEN_MAX];
553 
554 enum acquire_flags {
555 	TRY_LOCK = 1,
556 	READ_LOCK = 2,
557 };
558 
559 static int get_key_by_aggr_mode_simple(u64 *key, u64 addr, u32 tid)
560 {
561 	switch (aggr_mode) {
562 	case LOCK_AGGR_ADDR:
563 		*key = addr;
564 		break;
565 	case LOCK_AGGR_TASK:
566 		*key = tid;
567 		break;
568 	case LOCK_AGGR_CALLER:
569 	default:
570 		pr_err("Invalid aggregation mode: %d\n", aggr_mode);
571 		return -EINVAL;
572 	}
573 	return 0;
574 }
575 
576 static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample);
577 
578 static int get_key_by_aggr_mode(u64 *key, u64 addr, struct evsel *evsel,
579 				 struct perf_sample *sample)
580 {
581 	if (aggr_mode == LOCK_AGGR_CALLER) {
582 		*key = callchain_id(evsel, sample);
583 		return 0;
584 	}
585 	return get_key_by_aggr_mode_simple(key, addr, sample->tid);
586 }
587 
588 static int report_lock_acquire_event(struct evsel *evsel,
589 				     struct perf_sample *sample)
590 {
591 	struct lock_stat *ls;
592 	struct thread_stat *ts;
593 	struct lock_seq_stat *seq;
594 	const char *name = evsel__strval(evsel, sample, "name");
595 	u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
596 	int flag = evsel__intval(evsel, sample, "flags");
597 	u64 key;
598 	int ret;
599 
600 	ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
601 	if (ret < 0)
602 		return ret;
603 
604 	ls = lock_stat_findnew(key, name, 0);
605 	if (!ls)
606 		return -ENOMEM;
607 
608 	ts = thread_stat_findnew(sample->tid);
609 	if (!ts)
610 		return -ENOMEM;
611 
612 	seq = get_seq(ts, addr);
613 	if (!seq)
614 		return -ENOMEM;
615 
616 	switch (seq->state) {
617 	case SEQ_STATE_UNINITIALIZED:
618 	case SEQ_STATE_RELEASED:
619 		if (!flag) {
620 			seq->state = SEQ_STATE_ACQUIRING;
621 		} else {
622 			if (flag & TRY_LOCK)
623 				ls->nr_trylock++;
624 			if (flag & READ_LOCK)
625 				ls->nr_readlock++;
626 			seq->state = SEQ_STATE_READ_ACQUIRED;
627 			seq->read_count = 1;
628 			ls->nr_acquired++;
629 		}
630 		break;
631 	case SEQ_STATE_READ_ACQUIRED:
632 		if (flag & READ_LOCK) {
633 			seq->read_count++;
634 			ls->nr_acquired++;
635 			goto end;
636 		} else {
637 			goto broken;
638 		}
639 		break;
640 	case SEQ_STATE_ACQUIRED:
641 	case SEQ_STATE_ACQUIRING:
642 	case SEQ_STATE_CONTENDED:
643 broken:
644 		/* broken lock sequence */
645 		if (!ls->broken) {
646 			ls->broken = 1;
647 			bad_hist[BROKEN_ACQUIRE]++;
648 		}
649 		list_del_init(&seq->list);
650 		free(seq);
651 		goto end;
652 	default:
653 		BUG_ON("Unknown state of lock sequence found!\n");
654 		break;
655 	}
656 
657 	ls->nr_acquire++;
658 	seq->prev_event_time = sample->time;
659 end:
660 	return 0;
661 }
662 
663 static int report_lock_acquired_event(struct evsel *evsel,
664 				      struct perf_sample *sample)
665 {
666 	struct lock_stat *ls;
667 	struct thread_stat *ts;
668 	struct lock_seq_stat *seq;
669 	u64 contended_term;
670 	const char *name = evsel__strval(evsel, sample, "name");
671 	u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
672 	u64 key;
673 	int ret;
674 
675 	ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
676 	if (ret < 0)
677 		return ret;
678 
679 	ls = lock_stat_findnew(key, name, 0);
680 	if (!ls)
681 		return -ENOMEM;
682 
683 	ts = thread_stat_findnew(sample->tid);
684 	if (!ts)
685 		return -ENOMEM;
686 
687 	seq = get_seq(ts, addr);
688 	if (!seq)
689 		return -ENOMEM;
690 
691 	switch (seq->state) {
692 	case SEQ_STATE_UNINITIALIZED:
693 		/* orphan event, do nothing */
694 		return 0;
695 	case SEQ_STATE_ACQUIRING:
696 		break;
697 	case SEQ_STATE_CONTENDED:
698 		contended_term = sample->time - seq->prev_event_time;
699 		ls->wait_time_total += contended_term;
700 		if (contended_term < ls->wait_time_min)
701 			ls->wait_time_min = contended_term;
702 		if (ls->wait_time_max < contended_term)
703 			ls->wait_time_max = contended_term;
704 		break;
705 	case SEQ_STATE_RELEASED:
706 	case SEQ_STATE_ACQUIRED:
707 	case SEQ_STATE_READ_ACQUIRED:
708 		/* broken lock sequence */
709 		if (!ls->broken) {
710 			ls->broken = 1;
711 			bad_hist[BROKEN_ACQUIRED]++;
712 		}
713 		list_del_init(&seq->list);
714 		free(seq);
715 		goto end;
716 	default:
717 		BUG_ON("Unknown state of lock sequence found!\n");
718 		break;
719 	}
720 
721 	seq->state = SEQ_STATE_ACQUIRED;
722 	ls->nr_acquired++;
723 	ls->avg_wait_time = ls->nr_contended ? ls->wait_time_total/ls->nr_contended : 0;
724 	seq->prev_event_time = sample->time;
725 end:
726 	return 0;
727 }
728 
729 static int report_lock_contended_event(struct evsel *evsel,
730 				       struct perf_sample *sample)
731 {
732 	struct lock_stat *ls;
733 	struct thread_stat *ts;
734 	struct lock_seq_stat *seq;
735 	const char *name = evsel__strval(evsel, sample, "name");
736 	u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
737 	u64 key;
738 	int ret;
739 
740 	ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
741 	if (ret < 0)
742 		return ret;
743 
744 	ls = lock_stat_findnew(key, name, 0);
745 	if (!ls)
746 		return -ENOMEM;
747 
748 	ts = thread_stat_findnew(sample->tid);
749 	if (!ts)
750 		return -ENOMEM;
751 
752 	seq = get_seq(ts, addr);
753 	if (!seq)
754 		return -ENOMEM;
755 
756 	switch (seq->state) {
757 	case SEQ_STATE_UNINITIALIZED:
758 		/* orphan event, do nothing */
759 		return 0;
760 	case SEQ_STATE_ACQUIRING:
761 		break;
762 	case SEQ_STATE_RELEASED:
763 	case SEQ_STATE_ACQUIRED:
764 	case SEQ_STATE_READ_ACQUIRED:
765 	case SEQ_STATE_CONTENDED:
766 		/* broken lock sequence */
767 		if (!ls->broken) {
768 			ls->broken = 1;
769 			bad_hist[BROKEN_CONTENDED]++;
770 		}
771 		list_del_init(&seq->list);
772 		free(seq);
773 		goto end;
774 	default:
775 		BUG_ON("Unknown state of lock sequence found!\n");
776 		break;
777 	}
778 
779 	seq->state = SEQ_STATE_CONTENDED;
780 	ls->nr_contended++;
781 	ls->avg_wait_time = ls->wait_time_total/ls->nr_contended;
782 	seq->prev_event_time = sample->time;
783 end:
784 	return 0;
785 }
786 
787 static int report_lock_release_event(struct evsel *evsel,
788 				     struct perf_sample *sample)
789 {
790 	struct lock_stat *ls;
791 	struct thread_stat *ts;
792 	struct lock_seq_stat *seq;
793 	const char *name = evsel__strval(evsel, sample, "name");
794 	u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
795 	u64 key;
796 	int ret;
797 
798 	ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
799 	if (ret < 0)
800 		return ret;
801 
802 	ls = lock_stat_findnew(key, name, 0);
803 	if (!ls)
804 		return -ENOMEM;
805 
806 	ts = thread_stat_findnew(sample->tid);
807 	if (!ts)
808 		return -ENOMEM;
809 
810 	seq = get_seq(ts, addr);
811 	if (!seq)
812 		return -ENOMEM;
813 
814 	switch (seq->state) {
815 	case SEQ_STATE_UNINITIALIZED:
816 		goto end;
817 	case SEQ_STATE_ACQUIRED:
818 		break;
819 	case SEQ_STATE_READ_ACQUIRED:
820 		seq->read_count--;
821 		BUG_ON(seq->read_count < 0);
822 		if (seq->read_count) {
823 			ls->nr_release++;
824 			goto end;
825 		}
826 		break;
827 	case SEQ_STATE_ACQUIRING:
828 	case SEQ_STATE_CONTENDED:
829 	case SEQ_STATE_RELEASED:
830 		/* broken lock sequence */
831 		if (!ls->broken) {
832 			ls->broken = 1;
833 			bad_hist[BROKEN_RELEASE]++;
834 		}
835 		goto free_seq;
836 	default:
837 		BUG_ON("Unknown state of lock sequence found!\n");
838 		break;
839 	}
840 
841 	ls->nr_release++;
842 free_seq:
843 	list_del_init(&seq->list);
844 	free(seq);
845 end:
846 	return 0;
847 }
848 
849 static int get_symbol_name_offset(struct map *map, struct symbol *sym, u64 ip,
850 				  char *buf, int size)
851 {
852 	u64 offset;
853 
854 	if (map == NULL || sym == NULL) {
855 		buf[0] = '\0';
856 		return 0;
857 	}
858 
859 	offset = map->map_ip(map, ip) - sym->start;
860 
861 	if (offset)
862 		return scnprintf(buf, size, "%s+%#lx", sym->name, offset);
863 	else
864 		return strlcpy(buf, sym->name, size);
865 }
866 static int lock_contention_caller(struct evsel *evsel, struct perf_sample *sample,
867 				  char *buf, int size)
868 {
869 	struct thread *thread;
870 	struct callchain_cursor *cursor = &callchain_cursor;
871 	struct machine *machine = &session->machines.host;
872 	struct symbol *sym;
873 	int skip = 0;
874 	int ret;
875 
876 	/* lock names will be replaced to task name later */
877 	if (show_thread_stats)
878 		return -1;
879 
880 	thread = machine__findnew_thread(machine, -1, sample->pid);
881 	if (thread == NULL)
882 		return -1;
883 
884 	/* use caller function name from the callchain */
885 	ret = thread__resolve_callchain(thread, cursor, evsel, sample,
886 					NULL, NULL, max_stack_depth);
887 	if (ret != 0) {
888 		thread__put(thread);
889 		return -1;
890 	}
891 
892 	callchain_cursor_commit(cursor);
893 	thread__put(thread);
894 
895 	while (true) {
896 		struct callchain_cursor_node *node;
897 
898 		node = callchain_cursor_current(cursor);
899 		if (node == NULL)
900 			break;
901 
902 		/* skip first few entries - for lock functions */
903 		if (++skip <= stack_skip)
904 			goto next;
905 
906 		sym = node->ms.sym;
907 		if (sym && !machine__is_lock_function(machine, node->ip)) {
908 			get_symbol_name_offset(node->ms.map, sym, node->ip,
909 					       buf, size);
910 			return 0;
911 		}
912 
913 next:
914 		callchain_cursor_advance(cursor);
915 	}
916 	return -1;
917 }
918 
919 static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample)
920 {
921 	struct callchain_cursor *cursor = &callchain_cursor;
922 	struct machine *machine = &session->machines.host;
923 	struct thread *thread;
924 	u64 hash = 0;
925 	int skip = 0;
926 	int ret;
927 
928 	thread = machine__findnew_thread(machine, -1, sample->pid);
929 	if (thread == NULL)
930 		return -1;
931 
932 	/* use caller function name from the callchain */
933 	ret = thread__resolve_callchain(thread, cursor, evsel, sample,
934 					NULL, NULL, max_stack_depth);
935 	thread__put(thread);
936 
937 	if (ret != 0)
938 		return -1;
939 
940 	callchain_cursor_commit(cursor);
941 
942 	while (true) {
943 		struct callchain_cursor_node *node;
944 
945 		node = callchain_cursor_current(cursor);
946 		if (node == NULL)
947 			break;
948 
949 		/* skip first few entries - for lock functions */
950 		if (++skip <= stack_skip)
951 			goto next;
952 
953 		if (node->ms.sym && machine__is_lock_function(machine, node->ip))
954 			goto next;
955 
956 		hash ^= hash_long((unsigned long)node->ip, 64);
957 
958 next:
959 		callchain_cursor_advance(cursor);
960 	}
961 	return hash;
962 }
963 
964 static u64 *get_callstack(struct perf_sample *sample, int max_stack)
965 {
966 	u64 *callstack;
967 	u64 i;
968 	int c;
969 
970 	callstack = calloc(max_stack, sizeof(*callstack));
971 	if (callstack == NULL)
972 		return NULL;
973 
974 	for (i = 0, c = 0; i < sample->callchain->nr && c < max_stack; i++) {
975 		u64 ip = sample->callchain->ips[i];
976 
977 		if (ip >= PERF_CONTEXT_MAX)
978 			continue;
979 
980 		callstack[c++] = ip;
981 	}
982 	return callstack;
983 }
984 
985 static int report_lock_contention_begin_event(struct evsel *evsel,
986 					      struct perf_sample *sample)
987 {
988 	struct lock_stat *ls;
989 	struct thread_stat *ts;
990 	struct lock_seq_stat *seq;
991 	u64 addr = evsel__intval(evsel, sample, "lock_addr");
992 	u64 key;
993 	int ret;
994 
995 	ret = get_key_by_aggr_mode(&key, addr, evsel, sample);
996 	if (ret < 0)
997 		return ret;
998 
999 	ls = lock_stat_find(key);
1000 	if (!ls) {
1001 		char buf[128];
1002 		const char *caller = buf;
1003 		unsigned int flags = evsel__intval(evsel, sample, "flags");
1004 
1005 		if (lock_contention_caller(evsel, sample, buf, sizeof(buf)) < 0)
1006 			caller = "Unknown";
1007 
1008 		ls = lock_stat_findnew(key, caller, flags);
1009 		if (!ls)
1010 			return -ENOMEM;
1011 
1012 		if (aggr_mode == LOCK_AGGR_CALLER && verbose) {
1013 			ls->callstack = get_callstack(sample, max_stack_depth);
1014 			if (ls->callstack == NULL)
1015 				return -ENOMEM;
1016 		}
1017 	}
1018 
1019 	ts = thread_stat_findnew(sample->tid);
1020 	if (!ts)
1021 		return -ENOMEM;
1022 
1023 	seq = get_seq(ts, addr);
1024 	if (!seq)
1025 		return -ENOMEM;
1026 
1027 	switch (seq->state) {
1028 	case SEQ_STATE_UNINITIALIZED:
1029 	case SEQ_STATE_ACQUIRED:
1030 		break;
1031 	case SEQ_STATE_CONTENDED:
1032 		/*
1033 		 * It can have nested contention begin with mutex spinning,
1034 		 * then we would use the original contention begin event and
1035 		 * ignore the second one.
1036 		 */
1037 		goto end;
1038 	case SEQ_STATE_ACQUIRING:
1039 	case SEQ_STATE_READ_ACQUIRED:
1040 	case SEQ_STATE_RELEASED:
1041 		/* broken lock sequence */
1042 		if (!ls->broken) {
1043 			ls->broken = 1;
1044 			bad_hist[BROKEN_CONTENDED]++;
1045 		}
1046 		list_del_init(&seq->list);
1047 		free(seq);
1048 		goto end;
1049 	default:
1050 		BUG_ON("Unknown state of lock sequence found!\n");
1051 		break;
1052 	}
1053 
1054 	if (seq->state != SEQ_STATE_CONTENDED) {
1055 		seq->state = SEQ_STATE_CONTENDED;
1056 		seq->prev_event_time = sample->time;
1057 		ls->nr_contended++;
1058 	}
1059 end:
1060 	return 0;
1061 }
1062 
1063 static int report_lock_contention_end_event(struct evsel *evsel,
1064 					    struct perf_sample *sample)
1065 {
1066 	struct lock_stat *ls;
1067 	struct thread_stat *ts;
1068 	struct lock_seq_stat *seq;
1069 	u64 contended_term;
1070 	u64 addr = evsel__intval(evsel, sample, "lock_addr");
1071 	u64 key;
1072 	int ret;
1073 
1074 	ret = get_key_by_aggr_mode(&key, addr, evsel, sample);
1075 	if (ret < 0)
1076 		return ret;
1077 
1078 	ls = lock_stat_find(key);
1079 	if (!ls)
1080 		return 0;
1081 
1082 	ts = thread_stat_find(sample->tid);
1083 	if (!ts)
1084 		return 0;
1085 
1086 	seq = get_seq(ts, addr);
1087 	if (!seq)
1088 		return -ENOMEM;
1089 
1090 	switch (seq->state) {
1091 	case SEQ_STATE_UNINITIALIZED:
1092 		goto end;
1093 	case SEQ_STATE_CONTENDED:
1094 		contended_term = sample->time - seq->prev_event_time;
1095 		ls->wait_time_total += contended_term;
1096 		if (contended_term < ls->wait_time_min)
1097 			ls->wait_time_min = contended_term;
1098 		if (ls->wait_time_max < contended_term)
1099 			ls->wait_time_max = contended_term;
1100 		break;
1101 	case SEQ_STATE_ACQUIRING:
1102 	case SEQ_STATE_ACQUIRED:
1103 	case SEQ_STATE_READ_ACQUIRED:
1104 	case SEQ_STATE_RELEASED:
1105 		/* broken lock sequence */
1106 		if (!ls->broken) {
1107 			ls->broken = 1;
1108 			bad_hist[BROKEN_ACQUIRED]++;
1109 		}
1110 		list_del_init(&seq->list);
1111 		free(seq);
1112 		goto end;
1113 	default:
1114 		BUG_ON("Unknown state of lock sequence found!\n");
1115 		break;
1116 	}
1117 
1118 	seq->state = SEQ_STATE_ACQUIRED;
1119 	ls->nr_acquired++;
1120 	ls->avg_wait_time = ls->wait_time_total/ls->nr_acquired;
1121 end:
1122 	return 0;
1123 }
1124 
1125 /* lock oriented handlers */
1126 /* TODO: handlers for CPU oriented, thread oriented */
1127 static struct trace_lock_handler report_lock_ops  = {
1128 	.acquire_event		= report_lock_acquire_event,
1129 	.acquired_event		= report_lock_acquired_event,
1130 	.contended_event	= report_lock_contended_event,
1131 	.release_event		= report_lock_release_event,
1132 	.contention_begin_event	= report_lock_contention_begin_event,
1133 	.contention_end_event	= report_lock_contention_end_event,
1134 };
1135 
1136 static struct trace_lock_handler contention_lock_ops  = {
1137 	.contention_begin_event	= report_lock_contention_begin_event,
1138 	.contention_end_event	= report_lock_contention_end_event,
1139 };
1140 
1141 
1142 static struct trace_lock_handler *trace_handler;
1143 
1144 static int evsel__process_lock_acquire(struct evsel *evsel, struct perf_sample *sample)
1145 {
1146 	if (trace_handler->acquire_event)
1147 		return trace_handler->acquire_event(evsel, sample);
1148 	return 0;
1149 }
1150 
1151 static int evsel__process_lock_acquired(struct evsel *evsel, struct perf_sample *sample)
1152 {
1153 	if (trace_handler->acquired_event)
1154 		return trace_handler->acquired_event(evsel, sample);
1155 	return 0;
1156 }
1157 
1158 static int evsel__process_lock_contended(struct evsel *evsel, struct perf_sample *sample)
1159 {
1160 	if (trace_handler->contended_event)
1161 		return trace_handler->contended_event(evsel, sample);
1162 	return 0;
1163 }
1164 
1165 static int evsel__process_lock_release(struct evsel *evsel, struct perf_sample *sample)
1166 {
1167 	if (trace_handler->release_event)
1168 		return trace_handler->release_event(evsel, sample);
1169 	return 0;
1170 }
1171 
1172 static int evsel__process_contention_begin(struct evsel *evsel, struct perf_sample *sample)
1173 {
1174 	if (trace_handler->contention_begin_event)
1175 		return trace_handler->contention_begin_event(evsel, sample);
1176 	return 0;
1177 }
1178 
1179 static int evsel__process_contention_end(struct evsel *evsel, struct perf_sample *sample)
1180 {
1181 	if (trace_handler->contention_end_event)
1182 		return trace_handler->contention_end_event(evsel, sample);
1183 	return 0;
1184 }
1185 
1186 static void print_bad_events(int bad, int total)
1187 {
1188 	/* Output for debug, this have to be removed */
1189 	int i;
1190 	int broken = 0;
1191 	const char *name[4] =
1192 		{ "acquire", "acquired", "contended", "release" };
1193 
1194 	for (i = 0; i < BROKEN_MAX; i++)
1195 		broken += bad_hist[i];
1196 
1197 	if (quiet || (broken == 0 && !verbose))
1198 		return;
1199 
1200 	pr_info("\n=== output for debug===\n\n");
1201 	pr_info("bad: %d, total: %d\n", bad, total);
1202 	pr_info("bad rate: %.2f %%\n", (double)bad / (double)total * 100);
1203 	pr_info("histogram of events caused bad sequence\n");
1204 	for (i = 0; i < BROKEN_MAX; i++)
1205 		pr_info(" %10s: %d\n", name[i], bad_hist[i]);
1206 }
1207 
1208 /* TODO: various way to print, coloring, nano or milli sec */
1209 static void print_result(void)
1210 {
1211 	struct lock_stat *st;
1212 	struct lock_key *key;
1213 	char cut_name[20];
1214 	int bad, total, printed;
1215 
1216 	if (!quiet) {
1217 		pr_info("%20s ", "Name");
1218 		list_for_each_entry(key, &lock_keys, list)
1219 			pr_info("%*s ", key->len, key->header);
1220 		pr_info("\n\n");
1221 	}
1222 
1223 	bad = total = printed = 0;
1224 	while ((st = pop_from_result())) {
1225 		total++;
1226 		if (st->broken)
1227 			bad++;
1228 		if (!st->nr_acquired)
1229 			continue;
1230 
1231 		bzero(cut_name, 20);
1232 
1233 		if (strlen(st->name) < 20) {
1234 			/* output raw name */
1235 			const char *name = st->name;
1236 
1237 			if (show_thread_stats) {
1238 				struct thread *t;
1239 
1240 				/* st->addr contains tid of thread */
1241 				t = perf_session__findnew(session, st->addr);
1242 				name = thread__comm_str(t);
1243 			}
1244 
1245 			pr_info("%20s ", name);
1246 		} else {
1247 			strncpy(cut_name, st->name, 16);
1248 			cut_name[16] = '.';
1249 			cut_name[17] = '.';
1250 			cut_name[18] = '.';
1251 			cut_name[19] = '\0';
1252 			/* cut off name for saving output style */
1253 			pr_info("%20s ", cut_name);
1254 		}
1255 
1256 		list_for_each_entry(key, &lock_keys, list) {
1257 			key->print(key, st);
1258 			pr_info(" ");
1259 		}
1260 		pr_info("\n");
1261 
1262 		if (++printed >= print_nr_entries)
1263 			break;
1264 	}
1265 
1266 	print_bad_events(bad, total);
1267 }
1268 
1269 static bool info_threads, info_map;
1270 
1271 static void dump_threads(void)
1272 {
1273 	struct thread_stat *st;
1274 	struct rb_node *node;
1275 	struct thread *t;
1276 
1277 	pr_info("%10s: comm\n", "Thread ID");
1278 
1279 	node = rb_first(&thread_stats);
1280 	while (node) {
1281 		st = container_of(node, struct thread_stat, rb);
1282 		t = perf_session__findnew(session, st->tid);
1283 		pr_info("%10d: %s\n", st->tid, thread__comm_str(t));
1284 		node = rb_next(node);
1285 		thread__put(t);
1286 	}
1287 }
1288 
1289 static int compare_maps(struct lock_stat *a, struct lock_stat *b)
1290 {
1291 	int ret;
1292 
1293 	if (a->name && b->name)
1294 		ret = strcmp(a->name, b->name);
1295 	else
1296 		ret = !!a->name - !!b->name;
1297 
1298 	if (!ret)
1299 		return a->addr < b->addr;
1300 	else
1301 		return ret < 0;
1302 }
1303 
1304 static void dump_map(void)
1305 {
1306 	unsigned int i;
1307 	struct lock_stat *st;
1308 
1309 	pr_info("Address of instance: name of class\n");
1310 	for (i = 0; i < LOCKHASH_SIZE; i++) {
1311 		hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1312 			insert_to_result(st, compare_maps);
1313 		}
1314 	}
1315 
1316 	while ((st = pop_from_result()))
1317 		pr_info(" %#llx: %s\n", (unsigned long long)st->addr, st->name);
1318 }
1319 
1320 static int dump_info(void)
1321 {
1322 	int rc = 0;
1323 
1324 	if (info_threads)
1325 		dump_threads();
1326 	else if (info_map)
1327 		dump_map();
1328 	else {
1329 		rc = -1;
1330 		pr_err("Unknown type of information\n");
1331 	}
1332 
1333 	return rc;
1334 }
1335 
1336 static const struct evsel_str_handler lock_tracepoints[] = {
1337 	{ "lock:lock_acquire",	 evsel__process_lock_acquire,   }, /* CONFIG_LOCKDEP */
1338 	{ "lock:lock_acquired",	 evsel__process_lock_acquired,  }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
1339 	{ "lock:lock_contended", evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
1340 	{ "lock:lock_release",	 evsel__process_lock_release,   }, /* CONFIG_LOCKDEP */
1341 };
1342 
1343 static const struct evsel_str_handler contention_tracepoints[] = {
1344 	{ "lock:contention_begin", evsel__process_contention_begin, },
1345 	{ "lock:contention_end",   evsel__process_contention_end,   },
1346 };
1347 
1348 static int process_event_update(struct perf_tool *tool,
1349 				union perf_event *event,
1350 				struct evlist **pevlist)
1351 {
1352 	int ret;
1353 
1354 	ret = perf_event__process_event_update(tool, event, pevlist);
1355 	if (ret < 0)
1356 		return ret;
1357 
1358 	/* this can return -EEXIST since we call it for each evsel */
1359 	perf_session__set_tracepoints_handlers(session, lock_tracepoints);
1360 	perf_session__set_tracepoints_handlers(session, contention_tracepoints);
1361 	return 0;
1362 }
1363 
1364 typedef int (*tracepoint_handler)(struct evsel *evsel,
1365 				  struct perf_sample *sample);
1366 
1367 static int process_sample_event(struct perf_tool *tool __maybe_unused,
1368 				union perf_event *event,
1369 				struct perf_sample *sample,
1370 				struct evsel *evsel,
1371 				struct machine *machine)
1372 {
1373 	int err = 0;
1374 	struct thread *thread = machine__findnew_thread(machine, sample->pid,
1375 							sample->tid);
1376 
1377 	if (thread == NULL) {
1378 		pr_debug("problem processing %d event, skipping it.\n",
1379 			event->header.type);
1380 		return -1;
1381 	}
1382 
1383 	if (evsel->handler != NULL) {
1384 		tracepoint_handler f = evsel->handler;
1385 		err = f(evsel, sample);
1386 	}
1387 
1388 	thread__put(thread);
1389 
1390 	return err;
1391 }
1392 
1393 static void combine_result(void)
1394 {
1395 	unsigned int i;
1396 	struct lock_stat *st;
1397 
1398 	if (!combine_locks)
1399 		return;
1400 
1401 	for (i = 0; i < LOCKHASH_SIZE; i++) {
1402 		hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1403 			combine_lock_stats(st);
1404 		}
1405 	}
1406 }
1407 
1408 static void sort_result(void)
1409 {
1410 	unsigned int i;
1411 	struct lock_stat *st;
1412 
1413 	for (i = 0; i < LOCKHASH_SIZE; i++) {
1414 		hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1415 			insert_to_result(st, compare);
1416 		}
1417 	}
1418 }
1419 
1420 static const char *get_type_str(struct lock_stat *st)
1421 {
1422 	static const struct {
1423 		unsigned int flags;
1424 		const char *name;
1425 	} table[] = {
1426 		{ 0,				"semaphore" },
1427 		{ LCB_F_SPIN,			"spinlock" },
1428 		{ LCB_F_SPIN | LCB_F_READ,	"rwlock:R" },
1429 		{ LCB_F_SPIN | LCB_F_WRITE,	"rwlock:W"},
1430 		{ LCB_F_READ,			"rwsem:R" },
1431 		{ LCB_F_WRITE,			"rwsem:W" },
1432 		{ LCB_F_RT,			"rtmutex" },
1433 		{ LCB_F_RT | LCB_F_READ,	"rwlock-rt:R" },
1434 		{ LCB_F_RT | LCB_F_WRITE,	"rwlock-rt:W"},
1435 		{ LCB_F_PERCPU | LCB_F_READ,	"pcpu-sem:R" },
1436 		{ LCB_F_PERCPU | LCB_F_WRITE,	"pcpu-sem:W" },
1437 		{ LCB_F_MUTEX,			"mutex" },
1438 		{ LCB_F_MUTEX | LCB_F_SPIN,	"mutex" },
1439 	};
1440 
1441 	for (unsigned int i = 0; i < ARRAY_SIZE(table); i++) {
1442 		if (table[i].flags == st->flags)
1443 			return table[i].name;
1444 	}
1445 	return "unknown";
1446 }
1447 
1448 static void sort_contention_result(void)
1449 {
1450 	sort_result();
1451 }
1452 
1453 static void print_contention_result(struct lock_contention *con)
1454 {
1455 	struct lock_stat *st;
1456 	struct lock_key *key;
1457 	int bad, total, printed;
1458 
1459 	if (!quiet) {
1460 		list_for_each_entry(key, &lock_keys, list)
1461 			pr_info("%*s ", key->len, key->header);
1462 
1463 		if (show_thread_stats)
1464 			pr_info("  %10s   %s\n\n", "pid", "comm");
1465 		else
1466 			pr_info("  %10s   %s\n\n", "type", "caller");
1467 	}
1468 
1469 	bad = total = printed = 0;
1470 	if (use_bpf)
1471 		bad = bad_hist[BROKEN_CONTENDED];
1472 
1473 	while ((st = pop_from_result())) {
1474 		total += use_bpf ? st->nr_contended : 1;
1475 		if (st->broken)
1476 			bad++;
1477 
1478 		list_for_each_entry(key, &lock_keys, list) {
1479 			key->print(key, st);
1480 			pr_info(" ");
1481 		}
1482 
1483 		if (show_thread_stats) {
1484 			struct thread *t;
1485 			int pid = st->addr;
1486 
1487 			/* st->addr contains tid of thread */
1488 			t = perf_session__findnew(session, pid);
1489 			pr_info("  %10d   %s\n", pid, thread__comm_str(t));
1490 			goto next;
1491 		}
1492 
1493 		pr_info("  %10s   %s\n", get_type_str(st), st->name);
1494 		if (verbose) {
1495 			struct map *kmap;
1496 			struct symbol *sym;
1497 			char buf[128];
1498 			u64 ip;
1499 
1500 			for (int i = 0; i < max_stack_depth; i++) {
1501 				if (!st->callstack || !st->callstack[i])
1502 					break;
1503 
1504 				ip = st->callstack[i];
1505 				sym = machine__find_kernel_symbol(con->machine, ip, &kmap);
1506 				get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf));
1507 				pr_info("\t\t\t%#lx  %s\n", (unsigned long)ip, buf);
1508 			}
1509 		}
1510 
1511 next:
1512 		if (++printed >= print_nr_entries)
1513 			break;
1514 	}
1515 
1516 	print_bad_events(bad, total);
1517 }
1518 
1519 static bool force;
1520 
1521 static int __cmd_report(bool display_info)
1522 {
1523 	int err = -EINVAL;
1524 	struct perf_tool eops = {
1525 		.attr		 = perf_event__process_attr,
1526 		.event_update	 = process_event_update,
1527 		.sample		 = process_sample_event,
1528 		.comm		 = perf_event__process_comm,
1529 		.mmap		 = perf_event__process_mmap,
1530 		.namespaces	 = perf_event__process_namespaces,
1531 		.tracing_data	 = perf_event__process_tracing_data,
1532 		.ordered_events	 = true,
1533 	};
1534 	struct perf_data data = {
1535 		.path  = input_name,
1536 		.mode  = PERF_DATA_MODE_READ,
1537 		.force = force,
1538 	};
1539 
1540 	session = perf_session__new(&data, &eops);
1541 	if (IS_ERR(session)) {
1542 		pr_err("Initializing perf session failed\n");
1543 		return PTR_ERR(session);
1544 	}
1545 
1546 	/* for lock function check */
1547 	symbol_conf.sort_by_name = true;
1548 	symbol__init(&session->header.env);
1549 
1550 	if (!data.is_pipe) {
1551 		if (!perf_session__has_traces(session, "lock record"))
1552 			goto out_delete;
1553 
1554 		if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) {
1555 			pr_err("Initializing perf session tracepoint handlers failed\n");
1556 			goto out_delete;
1557 		}
1558 
1559 		if (perf_session__set_tracepoints_handlers(session, contention_tracepoints)) {
1560 			pr_err("Initializing perf session tracepoint handlers failed\n");
1561 			goto out_delete;
1562 		}
1563 	}
1564 
1565 	if (setup_output_field(false, output_fields))
1566 		goto out_delete;
1567 
1568 	if (select_key(false))
1569 		goto out_delete;
1570 
1571 	if (show_thread_stats)
1572 		aggr_mode = LOCK_AGGR_TASK;
1573 
1574 	err = perf_session__process_events(session);
1575 	if (err)
1576 		goto out_delete;
1577 
1578 	setup_pager();
1579 	if (display_info) /* used for info subcommand */
1580 		err = dump_info();
1581 	else {
1582 		combine_result();
1583 		sort_result();
1584 		print_result();
1585 	}
1586 
1587 out_delete:
1588 	perf_session__delete(session);
1589 	return err;
1590 }
1591 
1592 static void sighandler(int sig __maybe_unused)
1593 {
1594 }
1595 
1596 static int __cmd_contention(int argc, const char **argv)
1597 {
1598 	int err = -EINVAL;
1599 	struct perf_tool eops = {
1600 		.attr		 = perf_event__process_attr,
1601 		.event_update	 = process_event_update,
1602 		.sample		 = process_sample_event,
1603 		.comm		 = perf_event__process_comm,
1604 		.mmap		 = perf_event__process_mmap,
1605 		.tracing_data	 = perf_event__process_tracing_data,
1606 		.ordered_events	 = true,
1607 	};
1608 	struct perf_data data = {
1609 		.path  = input_name,
1610 		.mode  = PERF_DATA_MODE_READ,
1611 		.force = force,
1612 	};
1613 	struct lock_contention con = {
1614 		.target = &target,
1615 		.result = &lockhash_table[0],
1616 		.map_nr_entries = bpf_map_entries,
1617 		.max_stack = max_stack_depth,
1618 		.stack_skip = stack_skip,
1619 		.aggr_mode = show_thread_stats ? LOCK_AGGR_TASK : LOCK_AGGR_CALLER,
1620 	};
1621 
1622 	session = perf_session__new(use_bpf ? NULL : &data, &eops);
1623 	if (IS_ERR(session)) {
1624 		pr_err("Initializing perf session failed\n");
1625 		return PTR_ERR(session);
1626 	}
1627 
1628 	con.machine = &session->machines.host;
1629 
1630 	/* for lock function check */
1631 	symbol_conf.sort_by_name = true;
1632 	symbol__init(&session->header.env);
1633 
1634 	if (use_bpf) {
1635 		err = target__validate(&target);
1636 		if (err) {
1637 			char errbuf[512];
1638 
1639 			target__strerror(&target, err, errbuf, 512);
1640 			pr_err("%s\n", errbuf);
1641 			goto out_delete;
1642 		}
1643 
1644 		signal(SIGINT, sighandler);
1645 		signal(SIGCHLD, sighandler);
1646 		signal(SIGTERM, sighandler);
1647 
1648 		con.evlist = evlist__new();
1649 		if (con.evlist == NULL) {
1650 			err = -ENOMEM;
1651 			goto out_delete;
1652 		}
1653 
1654 		err = evlist__create_maps(con.evlist, &target);
1655 		if (err < 0)
1656 			goto out_delete;
1657 
1658 		if (argc) {
1659 			err = evlist__prepare_workload(con.evlist, &target,
1660 						       argv, false, NULL);
1661 			if (err < 0)
1662 				goto out_delete;
1663 		}
1664 
1665 		if (lock_contention_prepare(&con) < 0) {
1666 			pr_err("lock contention BPF setup failed\n");
1667 			goto out_delete;
1668 		}
1669 	} else if (!data.is_pipe) {
1670 		if (!perf_session__has_traces(session, "lock record"))
1671 			goto out_delete;
1672 
1673 		if (!evlist__find_evsel_by_str(session->evlist,
1674 					       "lock:contention_begin")) {
1675 			pr_err("lock contention evsel not found\n");
1676 			goto out_delete;
1677 		}
1678 
1679 		if (perf_session__set_tracepoints_handlers(session,
1680 						contention_tracepoints)) {
1681 			pr_err("Initializing perf session tracepoint handlers failed\n");
1682 			goto out_delete;
1683 		}
1684 	}
1685 
1686 	if (setup_output_field(true, output_fields))
1687 		goto out_delete;
1688 
1689 	if (select_key(true))
1690 		goto out_delete;
1691 
1692 	if (use_bpf) {
1693 		lock_contention_start();
1694 		if (argc)
1695 			evlist__start_workload(con.evlist);
1696 
1697 		/* wait for signal */
1698 		pause();
1699 
1700 		lock_contention_stop();
1701 		lock_contention_read(&con);
1702 
1703 		/* abuse bad hist stats for lost entries */
1704 		bad_hist[BROKEN_CONTENDED] = con.lost;
1705 	} else {
1706 		err = perf_session__process_events(session);
1707 		if (err)
1708 			goto out_delete;
1709 	}
1710 
1711 	setup_pager();
1712 
1713 	sort_contention_result();
1714 	print_contention_result(&con);
1715 
1716 out_delete:
1717 	evlist__delete(con.evlist);
1718 	lock_contention_finish();
1719 	perf_session__delete(session);
1720 	return err;
1721 }
1722 
1723 
1724 static int __cmd_record(int argc, const char **argv)
1725 {
1726 	const char *record_args[] = {
1727 		"record", "-R", "-m", "1024", "-c", "1", "--synth", "task",
1728 	};
1729 	const char *callgraph_args[] = {
1730 		"--call-graph", "fp," __stringify(CONTENTION_STACK_DEPTH),
1731 	};
1732 	unsigned int rec_argc, i, j, ret;
1733 	unsigned int nr_tracepoints;
1734 	unsigned int nr_callgraph_args = 0;
1735 	const char **rec_argv;
1736 	bool has_lock_stat = true;
1737 
1738 	for (i = 0; i < ARRAY_SIZE(lock_tracepoints); i++) {
1739 		if (!is_valid_tracepoint(lock_tracepoints[i].name)) {
1740 			pr_debug("tracepoint %s is not enabled. "
1741 				 "Are CONFIG_LOCKDEP and CONFIG_LOCK_STAT enabled?\n",
1742 				 lock_tracepoints[i].name);
1743 			has_lock_stat = false;
1744 			break;
1745 		}
1746 	}
1747 
1748 	if (has_lock_stat)
1749 		goto setup_args;
1750 
1751 	for (i = 0; i < ARRAY_SIZE(contention_tracepoints); i++) {
1752 		if (!is_valid_tracepoint(contention_tracepoints[i].name)) {
1753 			pr_err("tracepoint %s is not enabled.\n",
1754 			       contention_tracepoints[i].name);
1755 			return 1;
1756 		}
1757 	}
1758 
1759 	nr_callgraph_args = ARRAY_SIZE(callgraph_args);
1760 
1761 setup_args:
1762 	rec_argc = ARRAY_SIZE(record_args) + nr_callgraph_args + argc - 1;
1763 
1764 	if (has_lock_stat)
1765 		nr_tracepoints = ARRAY_SIZE(lock_tracepoints);
1766 	else
1767 		nr_tracepoints = ARRAY_SIZE(contention_tracepoints);
1768 
1769 	/* factor of 2 is for -e in front of each tracepoint */
1770 	rec_argc += 2 * nr_tracepoints;
1771 
1772 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
1773 	if (!rec_argv)
1774 		return -ENOMEM;
1775 
1776 	for (i = 0; i < ARRAY_SIZE(record_args); i++)
1777 		rec_argv[i] = strdup(record_args[i]);
1778 
1779 	for (j = 0; j < nr_tracepoints; j++) {
1780 		const char *ev_name;
1781 
1782 		if (has_lock_stat)
1783 			ev_name = strdup(lock_tracepoints[j].name);
1784 		else
1785 			ev_name = strdup(contention_tracepoints[j].name);
1786 
1787 		if (!ev_name)
1788 			return -ENOMEM;
1789 
1790 		rec_argv[i++] = "-e";
1791 		rec_argv[i++] = ev_name;
1792 	}
1793 
1794 	for (j = 0; j < nr_callgraph_args; j++, i++)
1795 		rec_argv[i] = callgraph_args[j];
1796 
1797 	for (j = 1; j < (unsigned int)argc; j++, i++)
1798 		rec_argv[i] = argv[j];
1799 
1800 	BUG_ON(i != rec_argc);
1801 
1802 	ret = cmd_record(i, rec_argv);
1803 	free(rec_argv);
1804 	return ret;
1805 }
1806 
1807 static int parse_map_entry(const struct option *opt, const char *str,
1808 			    int unset __maybe_unused)
1809 {
1810 	unsigned long *len = (unsigned long *)opt->value;
1811 	unsigned long val;
1812 	char *endptr;
1813 
1814 	errno = 0;
1815 	val = strtoul(str, &endptr, 0);
1816 	if (*endptr != '\0' || errno != 0) {
1817 		pr_err("invalid BPF map length: %s\n", str);
1818 		return -1;
1819 	}
1820 
1821 	*len = val;
1822 	return 0;
1823 }
1824 
1825 static int parse_max_stack(const struct option *opt, const char *str,
1826 			   int unset __maybe_unused)
1827 {
1828 	unsigned long *len = (unsigned long *)opt->value;
1829 	long val;
1830 	char *endptr;
1831 
1832 	errno = 0;
1833 	val = strtol(str, &endptr, 0);
1834 	if (*endptr != '\0' || errno != 0) {
1835 		pr_err("invalid max stack depth: %s\n", str);
1836 		return -1;
1837 	}
1838 
1839 	if (val < 0 || val > sysctl__max_stack()) {
1840 		pr_err("invalid max stack depth: %ld\n", val);
1841 		return -1;
1842 	}
1843 
1844 	*len = val;
1845 	return 0;
1846 }
1847 
1848 int cmd_lock(int argc, const char **argv)
1849 {
1850 	const struct option lock_options[] = {
1851 	OPT_STRING('i', "input", &input_name, "file", "input file name"),
1852 	OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"),
1853 	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"),
1854 	OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
1855 	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1856 		   "file", "vmlinux pathname"),
1857 	OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
1858 		   "file", "kallsyms pathname"),
1859 	OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any warnings or messages"),
1860 	OPT_END()
1861 	};
1862 
1863 	const struct option info_options[] = {
1864 	OPT_BOOLEAN('t', "threads", &info_threads,
1865 		    "dump thread list in perf.data"),
1866 	OPT_BOOLEAN('m', "map", &info_map,
1867 		    "map of lock instances (address:name table)"),
1868 	OPT_PARENT(lock_options)
1869 	};
1870 
1871 	const struct option report_options[] = {
1872 	OPT_STRING('k', "key", &sort_key, "acquired",
1873 		    "key for sorting (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"),
1874 	OPT_STRING('F', "field", &output_fields, NULL,
1875 		    "output fields (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"),
1876 	/* TODO: type */
1877 	OPT_BOOLEAN('c', "combine-locks", &combine_locks,
1878 		    "combine locks in the same class"),
1879 	OPT_BOOLEAN('t', "threads", &show_thread_stats,
1880 		    "show per-thread lock stats"),
1881 	OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"),
1882 	OPT_PARENT(lock_options)
1883 	};
1884 
1885 	struct option contention_options[] = {
1886 	OPT_STRING('k', "key", &sort_key, "wait_total",
1887 		    "key for sorting (contended / wait_total / wait_max / wait_min / avg_wait)"),
1888 	OPT_STRING('F', "field", &output_fields, "contended,wait_total,wait_max,avg_wait",
1889 		    "output fields (contended / wait_total / wait_max / wait_min / avg_wait)"),
1890 	OPT_BOOLEAN('t', "threads", &show_thread_stats,
1891 		    "show per-thread lock stats"),
1892 	OPT_BOOLEAN('b', "use-bpf", &use_bpf, "use BPF program to collect lock contention stats"),
1893 	OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
1894 		    "System-wide collection from all CPUs"),
1895 	OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
1896 		    "List of cpus to monitor"),
1897 	OPT_STRING('p', "pid", &target.pid, "pid",
1898 		   "Trace on existing process id"),
1899 	OPT_STRING(0, "tid", &target.tid, "tid",
1900 		   "Trace on existing thread id (exclusive to --pid)"),
1901 	OPT_CALLBACK(0, "map-nr-entries", &bpf_map_entries, "num",
1902 		     "Max number of BPF map entries", parse_map_entry),
1903 	OPT_CALLBACK(0, "max-stack", &max_stack_depth, "num",
1904 		     "Set the maximum stack depth when collecting lopck contention, "
1905 		     "Default: " __stringify(CONTENTION_STACK_DEPTH), parse_max_stack),
1906 	OPT_INTEGER(0, "stack-skip", &stack_skip,
1907 		    "Set the number of stack depth to skip when finding a lock caller, "
1908 		    "Default: " __stringify(CONTENTION_STACK_SKIP)),
1909 	OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"),
1910 	OPT_PARENT(lock_options)
1911 	};
1912 
1913 	const char * const info_usage[] = {
1914 		"perf lock info [<options>]",
1915 		NULL
1916 	};
1917 	const char *const lock_subcommands[] = { "record", "report", "script",
1918 						 "info", "contention", NULL };
1919 	const char *lock_usage[] = {
1920 		NULL,
1921 		NULL
1922 	};
1923 	const char * const report_usage[] = {
1924 		"perf lock report [<options>]",
1925 		NULL
1926 	};
1927 	const char * const contention_usage[] = {
1928 		"perf lock contention [<options>]",
1929 		NULL
1930 	};
1931 	unsigned int i;
1932 	int rc = 0;
1933 
1934 	for (i = 0; i < LOCKHASH_SIZE; i++)
1935 		INIT_HLIST_HEAD(lockhash_table + i);
1936 
1937 	argc = parse_options_subcommand(argc, argv, lock_options, lock_subcommands,
1938 					lock_usage, PARSE_OPT_STOP_AT_NON_OPTION);
1939 	if (!argc)
1940 		usage_with_options(lock_usage, lock_options);
1941 
1942 	if (strlen(argv[0]) > 2 && strstarts("record", argv[0])) {
1943 		return __cmd_record(argc, argv);
1944 	} else if (strlen(argv[0]) > 2 && strstarts("report", argv[0])) {
1945 		trace_handler = &report_lock_ops;
1946 		if (argc) {
1947 			argc = parse_options(argc, argv,
1948 					     report_options, report_usage, 0);
1949 			if (argc)
1950 				usage_with_options(report_usage, report_options);
1951 		}
1952 		rc = __cmd_report(false);
1953 	} else if (!strcmp(argv[0], "script")) {
1954 		/* Aliased to 'perf script' */
1955 		return cmd_script(argc, argv);
1956 	} else if (!strcmp(argv[0], "info")) {
1957 		if (argc) {
1958 			argc = parse_options(argc, argv,
1959 					     info_options, info_usage, 0);
1960 			if (argc)
1961 				usage_with_options(info_usage, info_options);
1962 		}
1963 		/* recycling report_lock_ops */
1964 		trace_handler = &report_lock_ops;
1965 		rc = __cmd_report(true);
1966 	} else if (strlen(argv[0]) > 2 && strstarts("contention", argv[0])) {
1967 		trace_handler = &contention_lock_ops;
1968 		sort_key = "wait_total";
1969 		output_fields = "contended,wait_total,wait_max,avg_wait";
1970 
1971 #ifndef HAVE_BPF_SKEL
1972 		set_option_nobuild(contention_options, 'b', "use-bpf",
1973 				   "no BUILD_BPF_SKEL=1", false);
1974 #endif
1975 		if (argc) {
1976 			argc = parse_options(argc, argv, contention_options,
1977 					     contention_usage, 0);
1978 		}
1979 		rc = __cmd_contention(argc, argv);
1980 	} else {
1981 		usage_with_options(lock_usage, lock_options);
1982 	}
1983 
1984 	return rc;
1985 }
1986