xref: /openbmc/linux/tools/perf/builtin-lock.c (revision 060f35a317ef09101b128f399dce7ed13d019461)
1  // SPDX-License-Identifier: GPL-2.0
2  #include <errno.h>
3  #include <inttypes.h>
4  #include "builtin.h"
5  #include "perf.h"
6  
7  #include "util/evlist.h" // for struct evsel_str_handler
8  #include "util/evsel.h"
9  #include "util/symbol.h"
10  #include "util/thread.h"
11  #include "util/header.h"
12  #include "util/target.h"
13  #include "util/callchain.h"
14  #include "util/lock-contention.h"
15  #include "util/bpf_skel/lock_data.h"
16  
17  #include <subcmd/pager.h>
18  #include <subcmd/parse-options.h>
19  #include "util/trace-event.h"
20  #include "util/tracepoint.h"
21  
22  #include "util/debug.h"
23  #include "util/session.h"
24  #include "util/tool.h"
25  #include "util/data.h"
26  #include "util/string2.h"
27  #include "util/map.h"
28  #include "util/util.h"
29  
30  #include <stdio.h>
31  #include <sys/types.h>
32  #include <sys/prctl.h>
33  #include <semaphore.h>
34  #include <math.h>
35  #include <limits.h>
36  #include <ctype.h>
37  
38  #include <linux/list.h>
39  #include <linux/hash.h>
40  #include <linux/kernel.h>
41  #include <linux/zalloc.h>
42  #include <linux/err.h>
43  #include <linux/stringify.h>
44  
45  static struct perf_session *session;
46  static struct target target;
47  
48  /* based on kernel/lockdep.c */
49  #define LOCKHASH_BITS		12
50  #define LOCKHASH_SIZE		(1UL << LOCKHASH_BITS)
51  
52  static struct hlist_head *lockhash_table;
53  
54  #define __lockhashfn(key)	hash_long((unsigned long)key, LOCKHASH_BITS)
55  #define lockhashentry(key)	(lockhash_table + __lockhashfn((key)))
56  
57  static struct rb_root		thread_stats;
58  
59  static bool combine_locks;
60  static bool show_thread_stats;
61  static bool show_lock_addrs;
62  static bool show_lock_owner;
63  static bool use_bpf;
64  static unsigned long bpf_map_entries = MAX_ENTRIES;
65  static int max_stack_depth = CONTENTION_STACK_DEPTH;
66  static int stack_skip = CONTENTION_STACK_SKIP;
67  static int print_nr_entries = INT_MAX / 2;
68  static LIST_HEAD(callstack_filters);
69  static const char *output_name = NULL;
70  static FILE *lock_output;
71  
72  struct callstack_filter {
73  	struct list_head list;
74  	char name[];
75  };
76  
77  static struct lock_filter filters;
78  
79  static enum lock_aggr_mode aggr_mode = LOCK_AGGR_ADDR;
80  
needs_callstack(void)81  static bool needs_callstack(void)
82  {
83  	return !list_empty(&callstack_filters);
84  }
85  
thread_stat_find(u32 tid)86  static struct thread_stat *thread_stat_find(u32 tid)
87  {
88  	struct rb_node *node;
89  	struct thread_stat *st;
90  
91  	node = thread_stats.rb_node;
92  	while (node) {
93  		st = container_of(node, struct thread_stat, rb);
94  		if (st->tid == tid)
95  			return st;
96  		else if (tid < st->tid)
97  			node = node->rb_left;
98  		else
99  			node = node->rb_right;
100  	}
101  
102  	return NULL;
103  }
104  
thread_stat_insert(struct thread_stat * new)105  static void thread_stat_insert(struct thread_stat *new)
106  {
107  	struct rb_node **rb = &thread_stats.rb_node;
108  	struct rb_node *parent = NULL;
109  	struct thread_stat *p;
110  
111  	while (*rb) {
112  		p = container_of(*rb, struct thread_stat, rb);
113  		parent = *rb;
114  
115  		if (new->tid < p->tid)
116  			rb = &(*rb)->rb_left;
117  		else if (new->tid > p->tid)
118  			rb = &(*rb)->rb_right;
119  		else
120  			BUG_ON("inserting invalid thread_stat\n");
121  	}
122  
123  	rb_link_node(&new->rb, parent, rb);
124  	rb_insert_color(&new->rb, &thread_stats);
125  }
126  
thread_stat_findnew_after_first(u32 tid)127  static struct thread_stat *thread_stat_findnew_after_first(u32 tid)
128  {
129  	struct thread_stat *st;
130  
131  	st = thread_stat_find(tid);
132  	if (st)
133  		return st;
134  
135  	st = zalloc(sizeof(struct thread_stat));
136  	if (!st) {
137  		pr_err("memory allocation failed\n");
138  		return NULL;
139  	}
140  
141  	st->tid = tid;
142  	INIT_LIST_HEAD(&st->seq_list);
143  
144  	thread_stat_insert(st);
145  
146  	return st;
147  }
148  
149  static struct thread_stat *thread_stat_findnew_first(u32 tid);
150  static struct thread_stat *(*thread_stat_findnew)(u32 tid) =
151  	thread_stat_findnew_first;
152  
thread_stat_findnew_first(u32 tid)153  static struct thread_stat *thread_stat_findnew_first(u32 tid)
154  {
155  	struct thread_stat *st;
156  
157  	st = zalloc(sizeof(struct thread_stat));
158  	if (!st) {
159  		pr_err("memory allocation failed\n");
160  		return NULL;
161  	}
162  	st->tid = tid;
163  	INIT_LIST_HEAD(&st->seq_list);
164  
165  	rb_link_node(&st->rb, NULL, &thread_stats.rb_node);
166  	rb_insert_color(&st->rb, &thread_stats);
167  
168  	thread_stat_findnew = thread_stat_findnew_after_first;
169  	return st;
170  }
171  
172  /* build simple key function one is bigger than two */
173  #define SINGLE_KEY(member)						\
174  	static int lock_stat_key_ ## member(struct lock_stat *one,	\
175  					 struct lock_stat *two)		\
176  	{								\
177  		return one->member > two->member;			\
178  	}
179  
180  SINGLE_KEY(nr_acquired)
SINGLE_KEY(nr_contended)181  SINGLE_KEY(nr_contended)
182  SINGLE_KEY(avg_wait_time)
183  SINGLE_KEY(wait_time_total)
184  SINGLE_KEY(wait_time_max)
185  
186  static int lock_stat_key_wait_time_min(struct lock_stat *one,
187  					struct lock_stat *two)
188  {
189  	u64 s1 = one->wait_time_min;
190  	u64 s2 = two->wait_time_min;
191  	if (s1 == ULLONG_MAX)
192  		s1 = 0;
193  	if (s2 == ULLONG_MAX)
194  		s2 = 0;
195  	return s1 > s2;
196  }
197  
198  struct lock_key {
199  	/*
200  	 * name: the value for specify by user
201  	 * this should be simpler than raw name of member
202  	 * e.g. nr_acquired -> acquired, wait_time_total -> wait_total
203  	 */
204  	const char		*name;
205  	/* header: the string printed on the header line */
206  	const char		*header;
207  	/* len: the printing width of the field */
208  	int			len;
209  	/* key: a pointer to function to compare two lock stats for sorting */
210  	int			(*key)(struct lock_stat*, struct lock_stat*);
211  	/* print: a pointer to function to print a given lock stats */
212  	void			(*print)(struct lock_key*, struct lock_stat*);
213  	/* list: list entry to link this */
214  	struct list_head	list;
215  };
216  
lock_stat_key_print_time(unsigned long long nsec,int len)217  static void lock_stat_key_print_time(unsigned long long nsec, int len)
218  {
219  	static const struct {
220  		float base;
221  		const char *unit;
222  	} table[] = {
223  		{ 1e9 * 3600, "h " },
224  		{ 1e9 * 60, "m " },
225  		{ 1e9, "s " },
226  		{ 1e6, "ms" },
227  		{ 1e3, "us" },
228  		{ 0, NULL },
229  	};
230  
231  	/* for CSV output */
232  	if (len == 0) {
233  		fprintf(lock_output, "%llu", nsec);
234  		return;
235  	}
236  
237  	for (int i = 0; table[i].unit; i++) {
238  		if (nsec < table[i].base)
239  			continue;
240  
241  		fprintf(lock_output, "%*.2f %s", len - 3, nsec / table[i].base, table[i].unit);
242  		return;
243  	}
244  
245  	fprintf(lock_output, "%*llu %s", len - 3, nsec, "ns");
246  }
247  
248  #define PRINT_KEY(member)						\
249  static void lock_stat_key_print_ ## member(struct lock_key *key,	\
250  					   struct lock_stat *ls)	\
251  {									\
252  	fprintf(lock_output, "%*llu", key->len, (unsigned long long)ls->member);\
253  }
254  
255  #define PRINT_TIME(member)						\
256  static void lock_stat_key_print_ ## member(struct lock_key *key,	\
257  					   struct lock_stat *ls)	\
258  {									\
259  	lock_stat_key_print_time((unsigned long long)ls->member, key->len);	\
260  }
261  
262  PRINT_KEY(nr_acquired)
PRINT_KEY(nr_contended)263  PRINT_KEY(nr_contended)
264  PRINT_TIME(avg_wait_time)
265  PRINT_TIME(wait_time_total)
266  PRINT_TIME(wait_time_max)
267  
268  static void lock_stat_key_print_wait_time_min(struct lock_key *key,
269  					      struct lock_stat *ls)
270  {
271  	u64 wait_time = ls->wait_time_min;
272  
273  	if (wait_time == ULLONG_MAX)
274  		wait_time = 0;
275  
276  	lock_stat_key_print_time(wait_time, key->len);
277  }
278  
279  
280  static const char		*sort_key = "acquired";
281  
282  static int			(*compare)(struct lock_stat *, struct lock_stat *);
283  
284  static struct rb_root		sorted; /* place to store intermediate data */
285  static struct rb_root		result;	/* place to store sorted data */
286  
287  static LIST_HEAD(lock_keys);
288  static const char		*output_fields;
289  
290  #define DEF_KEY_LOCK(name, header, fn_suffix, len)			\
291  	{ #name, header, len, lock_stat_key_ ## fn_suffix, lock_stat_key_print_ ## fn_suffix, {} }
292  static struct lock_key report_keys[] = {
293  	DEF_KEY_LOCK(acquired, "acquired", nr_acquired, 10),
294  	DEF_KEY_LOCK(contended, "contended", nr_contended, 10),
295  	DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12),
296  	DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12),
297  	DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12),
298  	DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12),
299  
300  	/* extra comparisons much complicated should be here */
301  	{ }
302  };
303  
304  static struct lock_key contention_keys[] = {
305  	DEF_KEY_LOCK(contended, "contended", nr_contended, 10),
306  	DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12),
307  	DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12),
308  	DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12),
309  	DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12),
310  
311  	/* extra comparisons much complicated should be here */
312  	{ }
313  };
314  
select_key(bool contention)315  static int select_key(bool contention)
316  {
317  	int i;
318  	struct lock_key *keys = report_keys;
319  
320  	if (contention)
321  		keys = contention_keys;
322  
323  	for (i = 0; keys[i].name; i++) {
324  		if (!strcmp(keys[i].name, sort_key)) {
325  			compare = keys[i].key;
326  
327  			/* selected key should be in the output fields */
328  			if (list_empty(&keys[i].list))
329  				list_add_tail(&keys[i].list, &lock_keys);
330  
331  			return 0;
332  		}
333  	}
334  
335  	pr_err("Unknown compare key: %s\n", sort_key);
336  	return -1;
337  }
338  
add_output_field(bool contention,char * name)339  static int add_output_field(bool contention, char *name)
340  {
341  	int i;
342  	struct lock_key *keys = report_keys;
343  
344  	if (contention)
345  		keys = contention_keys;
346  
347  	for (i = 0; keys[i].name; i++) {
348  		if (strcmp(keys[i].name, name))
349  			continue;
350  
351  		/* prevent double link */
352  		if (list_empty(&keys[i].list))
353  			list_add_tail(&keys[i].list, &lock_keys);
354  
355  		return 0;
356  	}
357  
358  	pr_err("Unknown output field: %s\n", name);
359  	return -1;
360  }
361  
setup_output_field(bool contention,const char * str)362  static int setup_output_field(bool contention, const char *str)
363  {
364  	char *tok, *tmp, *orig;
365  	int i, ret = 0;
366  	struct lock_key *keys = report_keys;
367  
368  	if (contention)
369  		keys = contention_keys;
370  
371  	/* no output field given: use all of them */
372  	if (str == NULL) {
373  		for (i = 0; keys[i].name; i++)
374  			list_add_tail(&keys[i].list, &lock_keys);
375  		return 0;
376  	}
377  
378  	for (i = 0; keys[i].name; i++)
379  		INIT_LIST_HEAD(&keys[i].list);
380  
381  	orig = tmp = strdup(str);
382  	if (orig == NULL)
383  		return -ENOMEM;
384  
385  	while ((tok = strsep(&tmp, ",")) != NULL){
386  		ret = add_output_field(contention, tok);
387  		if (ret < 0)
388  			break;
389  	}
390  	free(orig);
391  
392  	return ret;
393  }
394  
combine_lock_stats(struct lock_stat * st)395  static void combine_lock_stats(struct lock_stat *st)
396  {
397  	struct rb_node **rb = &sorted.rb_node;
398  	struct rb_node *parent = NULL;
399  	struct lock_stat *p;
400  	int ret;
401  
402  	while (*rb) {
403  		p = container_of(*rb, struct lock_stat, rb);
404  		parent = *rb;
405  
406  		if (st->name && p->name)
407  			ret = strcmp(st->name, p->name);
408  		else
409  			ret = !!st->name - !!p->name;
410  
411  		if (ret == 0) {
412  			p->nr_acquired += st->nr_acquired;
413  			p->nr_contended += st->nr_contended;
414  			p->wait_time_total += st->wait_time_total;
415  
416  			if (p->nr_contended)
417  				p->avg_wait_time = p->wait_time_total / p->nr_contended;
418  
419  			if (p->wait_time_min > st->wait_time_min)
420  				p->wait_time_min = st->wait_time_min;
421  			if (p->wait_time_max < st->wait_time_max)
422  				p->wait_time_max = st->wait_time_max;
423  
424  			p->broken |= st->broken;
425  			st->combined = 1;
426  			return;
427  		}
428  
429  		if (ret < 0)
430  			rb = &(*rb)->rb_left;
431  		else
432  			rb = &(*rb)->rb_right;
433  	}
434  
435  	rb_link_node(&st->rb, parent, rb);
436  	rb_insert_color(&st->rb, &sorted);
437  }
438  
insert_to_result(struct lock_stat * st,int (* bigger)(struct lock_stat *,struct lock_stat *))439  static void insert_to_result(struct lock_stat *st,
440  			     int (*bigger)(struct lock_stat *, struct lock_stat *))
441  {
442  	struct rb_node **rb = &result.rb_node;
443  	struct rb_node *parent = NULL;
444  	struct lock_stat *p;
445  
446  	if (combine_locks && st->combined)
447  		return;
448  
449  	while (*rb) {
450  		p = container_of(*rb, struct lock_stat, rb);
451  		parent = *rb;
452  
453  		if (bigger(st, p))
454  			rb = &(*rb)->rb_left;
455  		else
456  			rb = &(*rb)->rb_right;
457  	}
458  
459  	rb_link_node(&st->rb, parent, rb);
460  	rb_insert_color(&st->rb, &result);
461  }
462  
463  /* returns left most element of result, and erase it */
pop_from_result(void)464  static struct lock_stat *pop_from_result(void)
465  {
466  	struct rb_node *node = result.rb_node;
467  
468  	if (!node)
469  		return NULL;
470  
471  	while (node->rb_left)
472  		node = node->rb_left;
473  
474  	rb_erase(node, &result);
475  	return container_of(node, struct lock_stat, rb);
476  }
477  
lock_stat_find(u64 addr)478  struct lock_stat *lock_stat_find(u64 addr)
479  {
480  	struct hlist_head *entry = lockhashentry(addr);
481  	struct lock_stat *ret;
482  
483  	hlist_for_each_entry(ret, entry, hash_entry) {
484  		if (ret->addr == addr)
485  			return ret;
486  	}
487  	return NULL;
488  }
489  
lock_stat_findnew(u64 addr,const char * name,int flags)490  struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags)
491  {
492  	struct hlist_head *entry = lockhashentry(addr);
493  	struct lock_stat *ret, *new;
494  
495  	hlist_for_each_entry(ret, entry, hash_entry) {
496  		if (ret->addr == addr)
497  			return ret;
498  	}
499  
500  	new = zalloc(sizeof(struct lock_stat));
501  	if (!new)
502  		goto alloc_failed;
503  
504  	new->addr = addr;
505  	new->name = strdup(name);
506  	if (!new->name) {
507  		free(new);
508  		goto alloc_failed;
509  	}
510  
511  	new->flags = flags;
512  	new->wait_time_min = ULLONG_MAX;
513  
514  	hlist_add_head(&new->hash_entry, entry);
515  	return new;
516  
517  alloc_failed:
518  	pr_err("memory allocation failed\n");
519  	return NULL;
520  }
521  
match_callstack_filter(struct machine * machine,u64 * callstack)522  bool match_callstack_filter(struct machine *machine, u64 *callstack)
523  {
524  	struct map *kmap;
525  	struct symbol *sym;
526  	u64 ip;
527  	const char *arch = perf_env__arch(machine->env);
528  
529  	if (list_empty(&callstack_filters))
530  		return true;
531  
532  	for (int i = 0; i < max_stack_depth; i++) {
533  		struct callstack_filter *filter;
534  
535  		/*
536  		 * In powerpc, the callchain saved by kernel always includes
537  		 * first three entries as the NIP (next instruction pointer),
538  		 * LR (link register), and the contents of LR save area in the
539  		 * second stack frame. In certain scenarios its possible to have
540  		 * invalid kernel instruction addresses in either LR or the second
541  		 * stack frame's LR. In that case, kernel will store that address as
542  		 * zero.
543  		 *
544  		 * The below check will continue to look into callstack,
545  		 * incase first or second callstack index entry has 0
546  		 * address for powerpc.
547  		 */
548  		if (!callstack || (!callstack[i] && (strcmp(arch, "powerpc") ||
549  						(i != 1 && i != 2))))
550  			break;
551  
552  		ip = callstack[i];
553  		sym = machine__find_kernel_symbol(machine, ip, &kmap);
554  		if (sym == NULL)
555  			continue;
556  
557  		list_for_each_entry(filter, &callstack_filters, list) {
558  			if (strstr(sym->name, filter->name))
559  				return true;
560  		}
561  	}
562  	return false;
563  }
564  
565  struct trace_lock_handler {
566  	/* it's used on CONFIG_LOCKDEP */
567  	int (*acquire_event)(struct evsel *evsel,
568  			     struct perf_sample *sample);
569  
570  	/* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */
571  	int (*acquired_event)(struct evsel *evsel,
572  			      struct perf_sample *sample);
573  
574  	/* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */
575  	int (*contended_event)(struct evsel *evsel,
576  			       struct perf_sample *sample);
577  
578  	/* it's used on CONFIG_LOCKDEP */
579  	int (*release_event)(struct evsel *evsel,
580  			     struct perf_sample *sample);
581  
582  	/* it's used when CONFIG_LOCKDEP is off */
583  	int (*contention_begin_event)(struct evsel *evsel,
584  				      struct perf_sample *sample);
585  
586  	/* it's used when CONFIG_LOCKDEP is off */
587  	int (*contention_end_event)(struct evsel *evsel,
588  				    struct perf_sample *sample);
589  };
590  
get_seq(struct thread_stat * ts,u64 addr)591  static struct lock_seq_stat *get_seq(struct thread_stat *ts, u64 addr)
592  {
593  	struct lock_seq_stat *seq;
594  
595  	list_for_each_entry(seq, &ts->seq_list, list) {
596  		if (seq->addr == addr)
597  			return seq;
598  	}
599  
600  	seq = zalloc(sizeof(struct lock_seq_stat));
601  	if (!seq) {
602  		pr_err("memory allocation failed\n");
603  		return NULL;
604  	}
605  	seq->state = SEQ_STATE_UNINITIALIZED;
606  	seq->addr = addr;
607  
608  	list_add(&seq->list, &ts->seq_list);
609  	return seq;
610  }
611  
612  enum broken_state {
613  	BROKEN_ACQUIRE,
614  	BROKEN_ACQUIRED,
615  	BROKEN_CONTENDED,
616  	BROKEN_RELEASE,
617  	BROKEN_MAX,
618  };
619  
620  static int bad_hist[BROKEN_MAX];
621  
622  enum acquire_flags {
623  	TRY_LOCK = 1,
624  	READ_LOCK = 2,
625  };
626  
get_key_by_aggr_mode_simple(u64 * key,u64 addr,u32 tid)627  static int get_key_by_aggr_mode_simple(u64 *key, u64 addr, u32 tid)
628  {
629  	switch (aggr_mode) {
630  	case LOCK_AGGR_ADDR:
631  		*key = addr;
632  		break;
633  	case LOCK_AGGR_TASK:
634  		*key = tid;
635  		break;
636  	case LOCK_AGGR_CALLER:
637  	default:
638  		pr_err("Invalid aggregation mode: %d\n", aggr_mode);
639  		return -EINVAL;
640  	}
641  	return 0;
642  }
643  
644  static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample);
645  
get_key_by_aggr_mode(u64 * key,u64 addr,struct evsel * evsel,struct perf_sample * sample)646  static int get_key_by_aggr_mode(u64 *key, u64 addr, struct evsel *evsel,
647  				 struct perf_sample *sample)
648  {
649  	if (aggr_mode == LOCK_AGGR_CALLER) {
650  		*key = callchain_id(evsel, sample);
651  		return 0;
652  	}
653  	return get_key_by_aggr_mode_simple(key, addr, sample->tid);
654  }
655  
report_lock_acquire_event(struct evsel * evsel,struct perf_sample * sample)656  static int report_lock_acquire_event(struct evsel *evsel,
657  				     struct perf_sample *sample)
658  {
659  	struct lock_stat *ls;
660  	struct thread_stat *ts;
661  	struct lock_seq_stat *seq;
662  	const char *name = evsel__strval(evsel, sample, "name");
663  	u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
664  	int flag = evsel__intval(evsel, sample, "flags");
665  	u64 key;
666  	int ret;
667  
668  	ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
669  	if (ret < 0)
670  		return ret;
671  
672  	ls = lock_stat_findnew(key, name, 0);
673  	if (!ls)
674  		return -ENOMEM;
675  
676  	ts = thread_stat_findnew(sample->tid);
677  	if (!ts)
678  		return -ENOMEM;
679  
680  	seq = get_seq(ts, addr);
681  	if (!seq)
682  		return -ENOMEM;
683  
684  	switch (seq->state) {
685  	case SEQ_STATE_UNINITIALIZED:
686  	case SEQ_STATE_RELEASED:
687  		if (!flag) {
688  			seq->state = SEQ_STATE_ACQUIRING;
689  		} else {
690  			if (flag & TRY_LOCK)
691  				ls->nr_trylock++;
692  			if (flag & READ_LOCK)
693  				ls->nr_readlock++;
694  			seq->state = SEQ_STATE_READ_ACQUIRED;
695  			seq->read_count = 1;
696  			ls->nr_acquired++;
697  		}
698  		break;
699  	case SEQ_STATE_READ_ACQUIRED:
700  		if (flag & READ_LOCK) {
701  			seq->read_count++;
702  			ls->nr_acquired++;
703  			goto end;
704  		} else {
705  			goto broken;
706  		}
707  		break;
708  	case SEQ_STATE_ACQUIRED:
709  	case SEQ_STATE_ACQUIRING:
710  	case SEQ_STATE_CONTENDED:
711  broken:
712  		/* broken lock sequence */
713  		if (!ls->broken) {
714  			ls->broken = 1;
715  			bad_hist[BROKEN_ACQUIRE]++;
716  		}
717  		list_del_init(&seq->list);
718  		free(seq);
719  		goto end;
720  	default:
721  		BUG_ON("Unknown state of lock sequence found!\n");
722  		break;
723  	}
724  
725  	ls->nr_acquire++;
726  	seq->prev_event_time = sample->time;
727  end:
728  	return 0;
729  }
730  
report_lock_acquired_event(struct evsel * evsel,struct perf_sample * sample)731  static int report_lock_acquired_event(struct evsel *evsel,
732  				      struct perf_sample *sample)
733  {
734  	struct lock_stat *ls;
735  	struct thread_stat *ts;
736  	struct lock_seq_stat *seq;
737  	u64 contended_term;
738  	const char *name = evsel__strval(evsel, sample, "name");
739  	u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
740  	u64 key;
741  	int ret;
742  
743  	ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
744  	if (ret < 0)
745  		return ret;
746  
747  	ls = lock_stat_findnew(key, name, 0);
748  	if (!ls)
749  		return -ENOMEM;
750  
751  	ts = thread_stat_findnew(sample->tid);
752  	if (!ts)
753  		return -ENOMEM;
754  
755  	seq = get_seq(ts, addr);
756  	if (!seq)
757  		return -ENOMEM;
758  
759  	switch (seq->state) {
760  	case SEQ_STATE_UNINITIALIZED:
761  		/* orphan event, do nothing */
762  		return 0;
763  	case SEQ_STATE_ACQUIRING:
764  		break;
765  	case SEQ_STATE_CONTENDED:
766  		contended_term = sample->time - seq->prev_event_time;
767  		ls->wait_time_total += contended_term;
768  		if (contended_term < ls->wait_time_min)
769  			ls->wait_time_min = contended_term;
770  		if (ls->wait_time_max < contended_term)
771  			ls->wait_time_max = contended_term;
772  		break;
773  	case SEQ_STATE_RELEASED:
774  	case SEQ_STATE_ACQUIRED:
775  	case SEQ_STATE_READ_ACQUIRED:
776  		/* broken lock sequence */
777  		if (!ls->broken) {
778  			ls->broken = 1;
779  			bad_hist[BROKEN_ACQUIRED]++;
780  		}
781  		list_del_init(&seq->list);
782  		free(seq);
783  		goto end;
784  	default:
785  		BUG_ON("Unknown state of lock sequence found!\n");
786  		break;
787  	}
788  
789  	seq->state = SEQ_STATE_ACQUIRED;
790  	ls->nr_acquired++;
791  	ls->avg_wait_time = ls->nr_contended ? ls->wait_time_total/ls->nr_contended : 0;
792  	seq->prev_event_time = sample->time;
793  end:
794  	return 0;
795  }
796  
report_lock_contended_event(struct evsel * evsel,struct perf_sample * sample)797  static int report_lock_contended_event(struct evsel *evsel,
798  				       struct perf_sample *sample)
799  {
800  	struct lock_stat *ls;
801  	struct thread_stat *ts;
802  	struct lock_seq_stat *seq;
803  	const char *name = evsel__strval(evsel, sample, "name");
804  	u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
805  	u64 key;
806  	int ret;
807  
808  	ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
809  	if (ret < 0)
810  		return ret;
811  
812  	ls = lock_stat_findnew(key, name, 0);
813  	if (!ls)
814  		return -ENOMEM;
815  
816  	ts = thread_stat_findnew(sample->tid);
817  	if (!ts)
818  		return -ENOMEM;
819  
820  	seq = get_seq(ts, addr);
821  	if (!seq)
822  		return -ENOMEM;
823  
824  	switch (seq->state) {
825  	case SEQ_STATE_UNINITIALIZED:
826  		/* orphan event, do nothing */
827  		return 0;
828  	case SEQ_STATE_ACQUIRING:
829  		break;
830  	case SEQ_STATE_RELEASED:
831  	case SEQ_STATE_ACQUIRED:
832  	case SEQ_STATE_READ_ACQUIRED:
833  	case SEQ_STATE_CONTENDED:
834  		/* broken lock sequence */
835  		if (!ls->broken) {
836  			ls->broken = 1;
837  			bad_hist[BROKEN_CONTENDED]++;
838  		}
839  		list_del_init(&seq->list);
840  		free(seq);
841  		goto end;
842  	default:
843  		BUG_ON("Unknown state of lock sequence found!\n");
844  		break;
845  	}
846  
847  	seq->state = SEQ_STATE_CONTENDED;
848  	ls->nr_contended++;
849  	ls->avg_wait_time = ls->wait_time_total/ls->nr_contended;
850  	seq->prev_event_time = sample->time;
851  end:
852  	return 0;
853  }
854  
report_lock_release_event(struct evsel * evsel,struct perf_sample * sample)855  static int report_lock_release_event(struct evsel *evsel,
856  				     struct perf_sample *sample)
857  {
858  	struct lock_stat *ls;
859  	struct thread_stat *ts;
860  	struct lock_seq_stat *seq;
861  	const char *name = evsel__strval(evsel, sample, "name");
862  	u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
863  	u64 key;
864  	int ret;
865  
866  	ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
867  	if (ret < 0)
868  		return ret;
869  
870  	ls = lock_stat_findnew(key, name, 0);
871  	if (!ls)
872  		return -ENOMEM;
873  
874  	ts = thread_stat_findnew(sample->tid);
875  	if (!ts)
876  		return -ENOMEM;
877  
878  	seq = get_seq(ts, addr);
879  	if (!seq)
880  		return -ENOMEM;
881  
882  	switch (seq->state) {
883  	case SEQ_STATE_UNINITIALIZED:
884  		goto end;
885  	case SEQ_STATE_ACQUIRED:
886  		break;
887  	case SEQ_STATE_READ_ACQUIRED:
888  		seq->read_count--;
889  		BUG_ON(seq->read_count < 0);
890  		if (seq->read_count) {
891  			ls->nr_release++;
892  			goto end;
893  		}
894  		break;
895  	case SEQ_STATE_ACQUIRING:
896  	case SEQ_STATE_CONTENDED:
897  	case SEQ_STATE_RELEASED:
898  		/* broken lock sequence */
899  		if (!ls->broken) {
900  			ls->broken = 1;
901  			bad_hist[BROKEN_RELEASE]++;
902  		}
903  		goto free_seq;
904  	default:
905  		BUG_ON("Unknown state of lock sequence found!\n");
906  		break;
907  	}
908  
909  	ls->nr_release++;
910  free_seq:
911  	list_del_init(&seq->list);
912  	free(seq);
913  end:
914  	return 0;
915  }
916  
get_symbol_name_offset(struct map * map,struct symbol * sym,u64 ip,char * buf,int size)917  static int get_symbol_name_offset(struct map *map, struct symbol *sym, u64 ip,
918  				  char *buf, int size)
919  {
920  	u64 offset;
921  
922  	if (map == NULL || sym == NULL) {
923  		buf[0] = '\0';
924  		return 0;
925  	}
926  
927  	offset = map__map_ip(map, ip) - sym->start;
928  
929  	if (offset)
930  		return scnprintf(buf, size, "%s+%#lx", sym->name, offset);
931  	else
932  		return strlcpy(buf, sym->name, size);
933  }
lock_contention_caller(struct evsel * evsel,struct perf_sample * sample,char * buf,int size)934  static int lock_contention_caller(struct evsel *evsel, struct perf_sample *sample,
935  				  char *buf, int size)
936  {
937  	struct thread *thread;
938  	struct callchain_cursor *cursor;
939  	struct machine *machine = &session->machines.host;
940  	struct symbol *sym;
941  	int skip = 0;
942  	int ret;
943  
944  	/* lock names will be replaced to task name later */
945  	if (show_thread_stats)
946  		return -1;
947  
948  	thread = machine__findnew_thread(machine, -1, sample->pid);
949  	if (thread == NULL)
950  		return -1;
951  
952  	cursor = get_tls_callchain_cursor();
953  
954  	/* use caller function name from the callchain */
955  	ret = thread__resolve_callchain(thread, cursor, evsel, sample,
956  					NULL, NULL, max_stack_depth);
957  	if (ret != 0) {
958  		thread__put(thread);
959  		return -1;
960  	}
961  
962  	callchain_cursor_commit(cursor);
963  	thread__put(thread);
964  
965  	while (true) {
966  		struct callchain_cursor_node *node;
967  
968  		node = callchain_cursor_current(cursor);
969  		if (node == NULL)
970  			break;
971  
972  		/* skip first few entries - for lock functions */
973  		if (++skip <= stack_skip)
974  			goto next;
975  
976  		sym = node->ms.sym;
977  		if (sym && !machine__is_lock_function(machine, node->ip)) {
978  			get_symbol_name_offset(node->ms.map, sym, node->ip,
979  					       buf, size);
980  			return 0;
981  		}
982  
983  next:
984  		callchain_cursor_advance(cursor);
985  	}
986  	return -1;
987  }
988  
callchain_id(struct evsel * evsel,struct perf_sample * sample)989  static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample)
990  {
991  	struct callchain_cursor *cursor;
992  	struct machine *machine = &session->machines.host;
993  	struct thread *thread;
994  	u64 hash = 0;
995  	int skip = 0;
996  	int ret;
997  
998  	thread = machine__findnew_thread(machine, -1, sample->pid);
999  	if (thread == NULL)
1000  		return -1;
1001  
1002  	cursor = get_tls_callchain_cursor();
1003  	/* use caller function name from the callchain */
1004  	ret = thread__resolve_callchain(thread, cursor, evsel, sample,
1005  					NULL, NULL, max_stack_depth);
1006  	thread__put(thread);
1007  
1008  	if (ret != 0)
1009  		return -1;
1010  
1011  	callchain_cursor_commit(cursor);
1012  
1013  	while (true) {
1014  		struct callchain_cursor_node *node;
1015  
1016  		node = callchain_cursor_current(cursor);
1017  		if (node == NULL)
1018  			break;
1019  
1020  		/* skip first few entries - for lock functions */
1021  		if (++skip <= stack_skip)
1022  			goto next;
1023  
1024  		if (node->ms.sym && machine__is_lock_function(machine, node->ip))
1025  			goto next;
1026  
1027  		hash ^= hash_long((unsigned long)node->ip, 64);
1028  
1029  next:
1030  		callchain_cursor_advance(cursor);
1031  	}
1032  	return hash;
1033  }
1034  
get_callstack(struct perf_sample * sample,int max_stack)1035  static u64 *get_callstack(struct perf_sample *sample, int max_stack)
1036  {
1037  	u64 *callstack;
1038  	u64 i;
1039  	int c;
1040  
1041  	callstack = calloc(max_stack, sizeof(*callstack));
1042  	if (callstack == NULL)
1043  		return NULL;
1044  
1045  	for (i = 0, c = 0; i < sample->callchain->nr && c < max_stack; i++) {
1046  		u64 ip = sample->callchain->ips[i];
1047  
1048  		if (ip >= PERF_CONTEXT_MAX)
1049  			continue;
1050  
1051  		callstack[c++] = ip;
1052  	}
1053  	return callstack;
1054  }
1055  
report_lock_contention_begin_event(struct evsel * evsel,struct perf_sample * sample)1056  static int report_lock_contention_begin_event(struct evsel *evsel,
1057  					      struct perf_sample *sample)
1058  {
1059  	struct lock_stat *ls;
1060  	struct thread_stat *ts;
1061  	struct lock_seq_stat *seq;
1062  	u64 addr = evsel__intval(evsel, sample, "lock_addr");
1063  	unsigned int flags = evsel__intval(evsel, sample, "flags");
1064  	u64 key;
1065  	int i, ret;
1066  	static bool kmap_loaded;
1067  	struct machine *machine = &session->machines.host;
1068  	struct map *kmap;
1069  	struct symbol *sym;
1070  
1071  	ret = get_key_by_aggr_mode(&key, addr, evsel, sample);
1072  	if (ret < 0)
1073  		return ret;
1074  
1075  	if (!kmap_loaded) {
1076  		unsigned long *addrs;
1077  
1078  		/* make sure it loads the kernel map to find lock symbols */
1079  		map__load(machine__kernel_map(machine));
1080  		kmap_loaded = true;
1081  
1082  		/* convert (kernel) symbols to addresses */
1083  		for (i = 0; i < filters.nr_syms; i++) {
1084  			sym = machine__find_kernel_symbol_by_name(machine,
1085  								  filters.syms[i],
1086  								  &kmap);
1087  			if (sym == NULL) {
1088  				pr_warning("ignore unknown symbol: %s\n",
1089  					   filters.syms[i]);
1090  				continue;
1091  			}
1092  
1093  			addrs = realloc(filters.addrs,
1094  					(filters.nr_addrs + 1) * sizeof(*addrs));
1095  			if (addrs == NULL) {
1096  				pr_warning("memory allocation failure\n");
1097  				return -ENOMEM;
1098  			}
1099  
1100  			addrs[filters.nr_addrs++] = map__unmap_ip(kmap, sym->start);
1101  			filters.addrs = addrs;
1102  		}
1103  	}
1104  
1105  	ls = lock_stat_find(key);
1106  	if (!ls) {
1107  		char buf[128];
1108  		const char *name = "";
1109  
1110  		switch (aggr_mode) {
1111  		case LOCK_AGGR_ADDR:
1112  			sym = machine__find_kernel_symbol(machine, key, &kmap);
1113  			if (sym)
1114  				name = sym->name;
1115  			break;
1116  		case LOCK_AGGR_CALLER:
1117  			name = buf;
1118  			if (lock_contention_caller(evsel, sample, buf, sizeof(buf)) < 0)
1119  				name = "Unknown";
1120  			break;
1121  		case LOCK_AGGR_TASK:
1122  		default:
1123  			break;
1124  		}
1125  
1126  		ls = lock_stat_findnew(key, name, flags);
1127  		if (!ls)
1128  			return -ENOMEM;
1129  	}
1130  
1131  	if (filters.nr_types) {
1132  		bool found = false;
1133  
1134  		for (i = 0; i < filters.nr_types; i++) {
1135  			if (flags == filters.types[i]) {
1136  				found = true;
1137  				break;
1138  			}
1139  		}
1140  
1141  		if (!found)
1142  			return 0;
1143  	}
1144  
1145  	if (filters.nr_addrs) {
1146  		bool found = false;
1147  
1148  		for (i = 0; i < filters.nr_addrs; i++) {
1149  			if (addr == filters.addrs[i]) {
1150  				found = true;
1151  				break;
1152  			}
1153  		}
1154  
1155  		if (!found)
1156  			return 0;
1157  	}
1158  
1159  	if (needs_callstack()) {
1160  		u64 *callstack = get_callstack(sample, max_stack_depth);
1161  		if (callstack == NULL)
1162  			return -ENOMEM;
1163  
1164  		if (!match_callstack_filter(machine, callstack)) {
1165  			free(callstack);
1166  			return 0;
1167  		}
1168  
1169  		if (ls->callstack == NULL)
1170  			ls->callstack = callstack;
1171  		else
1172  			free(callstack);
1173  	}
1174  
1175  	ts = thread_stat_findnew(sample->tid);
1176  	if (!ts)
1177  		return -ENOMEM;
1178  
1179  	seq = get_seq(ts, addr);
1180  	if (!seq)
1181  		return -ENOMEM;
1182  
1183  	switch (seq->state) {
1184  	case SEQ_STATE_UNINITIALIZED:
1185  	case SEQ_STATE_ACQUIRED:
1186  		break;
1187  	case SEQ_STATE_CONTENDED:
1188  		/*
1189  		 * It can have nested contention begin with mutex spinning,
1190  		 * then we would use the original contention begin event and
1191  		 * ignore the second one.
1192  		 */
1193  		goto end;
1194  	case SEQ_STATE_ACQUIRING:
1195  	case SEQ_STATE_READ_ACQUIRED:
1196  	case SEQ_STATE_RELEASED:
1197  		/* broken lock sequence */
1198  		if (!ls->broken) {
1199  			ls->broken = 1;
1200  			bad_hist[BROKEN_CONTENDED]++;
1201  		}
1202  		list_del_init(&seq->list);
1203  		free(seq);
1204  		goto end;
1205  	default:
1206  		BUG_ON("Unknown state of lock sequence found!\n");
1207  		break;
1208  	}
1209  
1210  	if (seq->state != SEQ_STATE_CONTENDED) {
1211  		seq->state = SEQ_STATE_CONTENDED;
1212  		seq->prev_event_time = sample->time;
1213  		ls->nr_contended++;
1214  	}
1215  end:
1216  	return 0;
1217  }
1218  
report_lock_contention_end_event(struct evsel * evsel,struct perf_sample * sample)1219  static int report_lock_contention_end_event(struct evsel *evsel,
1220  					    struct perf_sample *sample)
1221  {
1222  	struct lock_stat *ls;
1223  	struct thread_stat *ts;
1224  	struct lock_seq_stat *seq;
1225  	u64 contended_term;
1226  	u64 addr = evsel__intval(evsel, sample, "lock_addr");
1227  	u64 key;
1228  	int ret;
1229  
1230  	ret = get_key_by_aggr_mode(&key, addr, evsel, sample);
1231  	if (ret < 0)
1232  		return ret;
1233  
1234  	ls = lock_stat_find(key);
1235  	if (!ls)
1236  		return 0;
1237  
1238  	ts = thread_stat_find(sample->tid);
1239  	if (!ts)
1240  		return 0;
1241  
1242  	seq = get_seq(ts, addr);
1243  	if (!seq)
1244  		return -ENOMEM;
1245  
1246  	switch (seq->state) {
1247  	case SEQ_STATE_UNINITIALIZED:
1248  		goto end;
1249  	case SEQ_STATE_CONTENDED:
1250  		contended_term = sample->time - seq->prev_event_time;
1251  		ls->wait_time_total += contended_term;
1252  		if (contended_term < ls->wait_time_min)
1253  			ls->wait_time_min = contended_term;
1254  		if (ls->wait_time_max < contended_term)
1255  			ls->wait_time_max = contended_term;
1256  		break;
1257  	case SEQ_STATE_ACQUIRING:
1258  	case SEQ_STATE_ACQUIRED:
1259  	case SEQ_STATE_READ_ACQUIRED:
1260  	case SEQ_STATE_RELEASED:
1261  		/* broken lock sequence */
1262  		if (!ls->broken) {
1263  			ls->broken = 1;
1264  			bad_hist[BROKEN_ACQUIRED]++;
1265  		}
1266  		list_del_init(&seq->list);
1267  		free(seq);
1268  		goto end;
1269  	default:
1270  		BUG_ON("Unknown state of lock sequence found!\n");
1271  		break;
1272  	}
1273  
1274  	seq->state = SEQ_STATE_ACQUIRED;
1275  	ls->nr_acquired++;
1276  	ls->avg_wait_time = ls->wait_time_total/ls->nr_acquired;
1277  end:
1278  	return 0;
1279  }
1280  
1281  /* lock oriented handlers */
1282  /* TODO: handlers for CPU oriented, thread oriented */
1283  static struct trace_lock_handler report_lock_ops  = {
1284  	.acquire_event		= report_lock_acquire_event,
1285  	.acquired_event		= report_lock_acquired_event,
1286  	.contended_event	= report_lock_contended_event,
1287  	.release_event		= report_lock_release_event,
1288  	.contention_begin_event	= report_lock_contention_begin_event,
1289  	.contention_end_event	= report_lock_contention_end_event,
1290  };
1291  
1292  static struct trace_lock_handler contention_lock_ops  = {
1293  	.contention_begin_event	= report_lock_contention_begin_event,
1294  	.contention_end_event	= report_lock_contention_end_event,
1295  };
1296  
1297  
1298  static struct trace_lock_handler *trace_handler;
1299  
evsel__process_lock_acquire(struct evsel * evsel,struct perf_sample * sample)1300  static int evsel__process_lock_acquire(struct evsel *evsel, struct perf_sample *sample)
1301  {
1302  	if (trace_handler->acquire_event)
1303  		return trace_handler->acquire_event(evsel, sample);
1304  	return 0;
1305  }
1306  
evsel__process_lock_acquired(struct evsel * evsel,struct perf_sample * sample)1307  static int evsel__process_lock_acquired(struct evsel *evsel, struct perf_sample *sample)
1308  {
1309  	if (trace_handler->acquired_event)
1310  		return trace_handler->acquired_event(evsel, sample);
1311  	return 0;
1312  }
1313  
evsel__process_lock_contended(struct evsel * evsel,struct perf_sample * sample)1314  static int evsel__process_lock_contended(struct evsel *evsel, struct perf_sample *sample)
1315  {
1316  	if (trace_handler->contended_event)
1317  		return trace_handler->contended_event(evsel, sample);
1318  	return 0;
1319  }
1320  
evsel__process_lock_release(struct evsel * evsel,struct perf_sample * sample)1321  static int evsel__process_lock_release(struct evsel *evsel, struct perf_sample *sample)
1322  {
1323  	if (trace_handler->release_event)
1324  		return trace_handler->release_event(evsel, sample);
1325  	return 0;
1326  }
1327  
evsel__process_contention_begin(struct evsel * evsel,struct perf_sample * sample)1328  static int evsel__process_contention_begin(struct evsel *evsel, struct perf_sample *sample)
1329  {
1330  	if (trace_handler->contention_begin_event)
1331  		return trace_handler->contention_begin_event(evsel, sample);
1332  	return 0;
1333  }
1334  
evsel__process_contention_end(struct evsel * evsel,struct perf_sample * sample)1335  static int evsel__process_contention_end(struct evsel *evsel, struct perf_sample *sample)
1336  {
1337  	if (trace_handler->contention_end_event)
1338  		return trace_handler->contention_end_event(evsel, sample);
1339  	return 0;
1340  }
1341  
print_bad_events(int bad,int total)1342  static void print_bad_events(int bad, int total)
1343  {
1344  	/* Output for debug, this have to be removed */
1345  	int i;
1346  	int broken = 0;
1347  	const char *name[4] =
1348  		{ "acquire", "acquired", "contended", "release" };
1349  
1350  	for (i = 0; i < BROKEN_MAX; i++)
1351  		broken += bad_hist[i];
1352  
1353  	if (quiet || total == 0 || (broken == 0 && verbose <= 0))
1354  		return;
1355  
1356  	fprintf(lock_output, "\n=== output for debug ===\n\n");
1357  	fprintf(lock_output, "bad: %d, total: %d\n", bad, total);
1358  	fprintf(lock_output, "bad rate: %.2f %%\n", (double)bad / (double)total * 100);
1359  	fprintf(lock_output, "histogram of events caused bad sequence\n");
1360  	for (i = 0; i < BROKEN_MAX; i++)
1361  		fprintf(lock_output, " %10s: %d\n", name[i], bad_hist[i]);
1362  }
1363  
1364  /* TODO: various way to print, coloring, nano or milli sec */
print_result(void)1365  static void print_result(void)
1366  {
1367  	struct lock_stat *st;
1368  	struct lock_key *key;
1369  	char cut_name[20];
1370  	int bad, total, printed;
1371  
1372  	if (!quiet) {
1373  		fprintf(lock_output, "%20s ", "Name");
1374  		list_for_each_entry(key, &lock_keys, list)
1375  			fprintf(lock_output, "%*s ", key->len, key->header);
1376  		fprintf(lock_output, "\n\n");
1377  	}
1378  
1379  	bad = total = printed = 0;
1380  	while ((st = pop_from_result())) {
1381  		total++;
1382  		if (st->broken)
1383  			bad++;
1384  		if (!st->nr_acquired)
1385  			continue;
1386  
1387  		bzero(cut_name, 20);
1388  
1389  		if (strlen(st->name) < 20) {
1390  			/* output raw name */
1391  			const char *name = st->name;
1392  
1393  			if (show_thread_stats) {
1394  				struct thread *t;
1395  
1396  				/* st->addr contains tid of thread */
1397  				t = perf_session__findnew(session, st->addr);
1398  				name = thread__comm_str(t);
1399  			}
1400  
1401  			fprintf(lock_output, "%20s ", name);
1402  		} else {
1403  			strncpy(cut_name, st->name, 16);
1404  			cut_name[16] = '.';
1405  			cut_name[17] = '.';
1406  			cut_name[18] = '.';
1407  			cut_name[19] = '\0';
1408  			/* cut off name for saving output style */
1409  			fprintf(lock_output, "%20s ", cut_name);
1410  		}
1411  
1412  		list_for_each_entry(key, &lock_keys, list) {
1413  			key->print(key, st);
1414  			fprintf(lock_output, " ");
1415  		}
1416  		fprintf(lock_output, "\n");
1417  
1418  		if (++printed >= print_nr_entries)
1419  			break;
1420  	}
1421  
1422  	print_bad_events(bad, total);
1423  }
1424  
1425  static bool info_threads, info_map;
1426  
dump_threads(void)1427  static void dump_threads(void)
1428  {
1429  	struct thread_stat *st;
1430  	struct rb_node *node;
1431  	struct thread *t;
1432  
1433  	fprintf(lock_output, "%10s: comm\n", "Thread ID");
1434  
1435  	node = rb_first(&thread_stats);
1436  	while (node) {
1437  		st = container_of(node, struct thread_stat, rb);
1438  		t = perf_session__findnew(session, st->tid);
1439  		fprintf(lock_output, "%10d: %s\n", st->tid, thread__comm_str(t));
1440  		node = rb_next(node);
1441  		thread__put(t);
1442  	}
1443  }
1444  
compare_maps(struct lock_stat * a,struct lock_stat * b)1445  static int compare_maps(struct lock_stat *a, struct lock_stat *b)
1446  {
1447  	int ret;
1448  
1449  	if (a->name && b->name)
1450  		ret = strcmp(a->name, b->name);
1451  	else
1452  		ret = !!a->name - !!b->name;
1453  
1454  	if (!ret)
1455  		return a->addr < b->addr;
1456  	else
1457  		return ret < 0;
1458  }
1459  
dump_map(void)1460  static void dump_map(void)
1461  {
1462  	unsigned int i;
1463  	struct lock_stat *st;
1464  
1465  	fprintf(lock_output, "Address of instance: name of class\n");
1466  	for (i = 0; i < LOCKHASH_SIZE; i++) {
1467  		hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1468  			insert_to_result(st, compare_maps);
1469  		}
1470  	}
1471  
1472  	while ((st = pop_from_result()))
1473  		fprintf(lock_output, " %#llx: %s\n", (unsigned long long)st->addr, st->name);
1474  }
1475  
dump_info(void)1476  static int dump_info(void)
1477  {
1478  	int rc = 0;
1479  
1480  	if (info_threads)
1481  		dump_threads();
1482  	else if (info_map)
1483  		dump_map();
1484  	else {
1485  		rc = -1;
1486  		pr_err("Unknown type of information\n");
1487  	}
1488  
1489  	return rc;
1490  }
1491  
1492  static const struct evsel_str_handler lock_tracepoints[] = {
1493  	{ "lock:lock_acquire",	 evsel__process_lock_acquire,   }, /* CONFIG_LOCKDEP */
1494  	{ "lock:lock_acquired",	 evsel__process_lock_acquired,  }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
1495  	{ "lock:lock_contended", evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
1496  	{ "lock:lock_release",	 evsel__process_lock_release,   }, /* CONFIG_LOCKDEP */
1497  };
1498  
1499  static const struct evsel_str_handler contention_tracepoints[] = {
1500  	{ "lock:contention_begin", evsel__process_contention_begin, },
1501  	{ "lock:contention_end",   evsel__process_contention_end,   },
1502  };
1503  
process_event_update(struct perf_tool * tool,union perf_event * event,struct evlist ** pevlist)1504  static int process_event_update(struct perf_tool *tool,
1505  				union perf_event *event,
1506  				struct evlist **pevlist)
1507  {
1508  	int ret;
1509  
1510  	ret = perf_event__process_event_update(tool, event, pevlist);
1511  	if (ret < 0)
1512  		return ret;
1513  
1514  	/* this can return -EEXIST since we call it for each evsel */
1515  	perf_session__set_tracepoints_handlers(session, lock_tracepoints);
1516  	perf_session__set_tracepoints_handlers(session, contention_tracepoints);
1517  	return 0;
1518  }
1519  
1520  typedef int (*tracepoint_handler)(struct evsel *evsel,
1521  				  struct perf_sample *sample);
1522  
process_sample_event(struct perf_tool * tool __maybe_unused,union perf_event * event,struct perf_sample * sample,struct evsel * evsel,struct machine * machine)1523  static int process_sample_event(struct perf_tool *tool __maybe_unused,
1524  				union perf_event *event,
1525  				struct perf_sample *sample,
1526  				struct evsel *evsel,
1527  				struct machine *machine)
1528  {
1529  	int err = 0;
1530  	struct thread *thread = machine__findnew_thread(machine, sample->pid,
1531  							sample->tid);
1532  
1533  	if (thread == NULL) {
1534  		pr_debug("problem processing %d event, skipping it.\n",
1535  			event->header.type);
1536  		return -1;
1537  	}
1538  
1539  	if (evsel->handler != NULL) {
1540  		tracepoint_handler f = evsel->handler;
1541  		err = f(evsel, sample);
1542  	}
1543  
1544  	thread__put(thread);
1545  
1546  	return err;
1547  }
1548  
combine_result(void)1549  static void combine_result(void)
1550  {
1551  	unsigned int i;
1552  	struct lock_stat *st;
1553  
1554  	if (!combine_locks)
1555  		return;
1556  
1557  	for (i = 0; i < LOCKHASH_SIZE; i++) {
1558  		hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1559  			combine_lock_stats(st);
1560  		}
1561  	}
1562  }
1563  
sort_result(void)1564  static void sort_result(void)
1565  {
1566  	unsigned int i;
1567  	struct lock_stat *st;
1568  
1569  	for (i = 0; i < LOCKHASH_SIZE; i++) {
1570  		hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1571  			insert_to_result(st, compare);
1572  		}
1573  	}
1574  }
1575  
1576  static const struct {
1577  	unsigned int flags;
1578  	const char *str;
1579  	const char *name;
1580  } lock_type_table[] = {
1581  	{ 0,				"semaphore",	"semaphore" },
1582  	{ LCB_F_SPIN,			"spinlock",	"spinlock" },
1583  	{ LCB_F_SPIN | LCB_F_READ,	"rwlock:R",	"rwlock" },
1584  	{ LCB_F_SPIN | LCB_F_WRITE,	"rwlock:W",	"rwlock" },
1585  	{ LCB_F_READ,			"rwsem:R",	"rwsem" },
1586  	{ LCB_F_WRITE,			"rwsem:W",	"rwsem" },
1587  	{ LCB_F_RT,			"rt-mutex",	"rt-mutex" },
1588  	{ LCB_F_RT | LCB_F_READ,	"rwlock-rt:R",	"rwlock-rt" },
1589  	{ LCB_F_RT | LCB_F_WRITE,	"rwlock-rt:W",	"rwlock-rt" },
1590  	{ LCB_F_PERCPU | LCB_F_READ,	"pcpu-sem:R",	"percpu-rwsem" },
1591  	{ LCB_F_PERCPU | LCB_F_WRITE,	"pcpu-sem:W",	"percpu-rwsem" },
1592  	{ LCB_F_MUTEX,			"mutex",	"mutex" },
1593  	{ LCB_F_MUTEX | LCB_F_SPIN,	"mutex",	"mutex" },
1594  	/* alias for optimistic spinning only */
1595  	{ LCB_F_MUTEX | LCB_F_SPIN,	"mutex:spin",	"mutex-spin" },
1596  };
1597  
get_type_str(unsigned int flags)1598  static const char *get_type_str(unsigned int flags)
1599  {
1600  	flags &= LCB_F_MAX_FLAGS - 1;
1601  
1602  	for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
1603  		if (lock_type_table[i].flags == flags)
1604  			return lock_type_table[i].str;
1605  	}
1606  	return "unknown";
1607  }
1608  
get_type_name(unsigned int flags)1609  static const char *get_type_name(unsigned int flags)
1610  {
1611  	flags &= LCB_F_MAX_FLAGS - 1;
1612  
1613  	for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
1614  		if (lock_type_table[i].flags == flags)
1615  			return lock_type_table[i].name;
1616  	}
1617  	return "unknown";
1618  }
1619  
lock_filter_finish(void)1620  static void lock_filter_finish(void)
1621  {
1622  	zfree(&filters.types);
1623  	filters.nr_types = 0;
1624  
1625  	zfree(&filters.addrs);
1626  	filters.nr_addrs = 0;
1627  
1628  	for (int i = 0; i < filters.nr_syms; i++)
1629  		free(filters.syms[i]);
1630  
1631  	zfree(&filters.syms);
1632  	filters.nr_syms = 0;
1633  }
1634  
sort_contention_result(void)1635  static void sort_contention_result(void)
1636  {
1637  	sort_result();
1638  }
1639  
print_header_stdio(void)1640  static void print_header_stdio(void)
1641  {
1642  	struct lock_key *key;
1643  
1644  	list_for_each_entry(key, &lock_keys, list)
1645  		fprintf(lock_output, "%*s ", key->len, key->header);
1646  
1647  	switch (aggr_mode) {
1648  	case LOCK_AGGR_TASK:
1649  		fprintf(lock_output, "  %10s   %s\n\n", "pid",
1650  			show_lock_owner ? "owner" : "comm");
1651  		break;
1652  	case LOCK_AGGR_CALLER:
1653  		fprintf(lock_output, "  %10s   %s\n\n", "type", "caller");
1654  		break;
1655  	case LOCK_AGGR_ADDR:
1656  		fprintf(lock_output, "  %16s   %s\n\n", "address", "symbol");
1657  		break;
1658  	default:
1659  		break;
1660  	}
1661  }
1662  
print_header_csv(const char * sep)1663  static void print_header_csv(const char *sep)
1664  {
1665  	struct lock_key *key;
1666  
1667  	fprintf(lock_output, "# output: ");
1668  	list_for_each_entry(key, &lock_keys, list)
1669  		fprintf(lock_output, "%s%s ", key->header, sep);
1670  
1671  	switch (aggr_mode) {
1672  	case LOCK_AGGR_TASK:
1673  		fprintf(lock_output, "%s%s %s\n", "pid", sep,
1674  			show_lock_owner ? "owner" : "comm");
1675  		break;
1676  	case LOCK_AGGR_CALLER:
1677  		fprintf(lock_output, "%s%s %s", "type", sep, "caller");
1678  		if (verbose > 0)
1679  			fprintf(lock_output, "%s %s", sep, "stacktrace");
1680  		fprintf(lock_output, "\n");
1681  		break;
1682  	case LOCK_AGGR_ADDR:
1683  		fprintf(lock_output, "%s%s %s%s %s\n", "address", sep, "symbol", sep, "type");
1684  		break;
1685  	default:
1686  		break;
1687  	}
1688  }
1689  
print_header(void)1690  static void print_header(void)
1691  {
1692  	if (!quiet) {
1693  		if (symbol_conf.field_sep)
1694  			print_header_csv(symbol_conf.field_sep);
1695  		else
1696  			print_header_stdio();
1697  	}
1698  }
1699  
print_lock_stat_stdio(struct lock_contention * con,struct lock_stat * st)1700  static void print_lock_stat_stdio(struct lock_contention *con, struct lock_stat *st)
1701  {
1702  	struct lock_key *key;
1703  	struct thread *t;
1704  	int pid;
1705  
1706  	list_for_each_entry(key, &lock_keys, list) {
1707  		key->print(key, st);
1708  		fprintf(lock_output, " ");
1709  	}
1710  
1711  	switch (aggr_mode) {
1712  	case LOCK_AGGR_CALLER:
1713  		fprintf(lock_output, "  %10s   %s\n", get_type_str(st->flags), st->name);
1714  		break;
1715  	case LOCK_AGGR_TASK:
1716  		pid = st->addr;
1717  		t = perf_session__findnew(session, pid);
1718  		fprintf(lock_output, "  %10d   %s\n",
1719  			pid, pid == -1 ? "Unknown" : thread__comm_str(t));
1720  		break;
1721  	case LOCK_AGGR_ADDR:
1722  		fprintf(lock_output, "  %016llx   %s (%s)\n", (unsigned long long)st->addr,
1723  			st->name, get_type_name(st->flags));
1724  		break;
1725  	default:
1726  		break;
1727  	}
1728  
1729  	if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) {
1730  		struct map *kmap;
1731  		struct symbol *sym;
1732  		char buf[128];
1733  		u64 ip;
1734  
1735  		for (int i = 0; i < max_stack_depth; i++) {
1736  			if (!st->callstack || !st->callstack[i])
1737  				break;
1738  
1739  			ip = st->callstack[i];
1740  			sym = machine__find_kernel_symbol(con->machine, ip, &kmap);
1741  			get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf));
1742  			fprintf(lock_output, "\t\t\t%#lx  %s\n", (unsigned long)ip, buf);
1743  		}
1744  	}
1745  }
1746  
print_lock_stat_csv(struct lock_contention * con,struct lock_stat * st,const char * sep)1747  static void print_lock_stat_csv(struct lock_contention *con, struct lock_stat *st,
1748  				const char *sep)
1749  {
1750  	struct lock_key *key;
1751  	struct thread *t;
1752  	int pid;
1753  
1754  	list_for_each_entry(key, &lock_keys, list) {
1755  		key->print(key, st);
1756  		fprintf(lock_output, "%s ", sep);
1757  	}
1758  
1759  	switch (aggr_mode) {
1760  	case LOCK_AGGR_CALLER:
1761  		fprintf(lock_output, "%s%s %s", get_type_str(st->flags), sep, st->name);
1762  		if (verbose <= 0)
1763  			fprintf(lock_output, "\n");
1764  		break;
1765  	case LOCK_AGGR_TASK:
1766  		pid = st->addr;
1767  		t = perf_session__findnew(session, pid);
1768  		fprintf(lock_output, "%d%s %s\n", pid, sep,
1769  			pid == -1 ? "Unknown" : thread__comm_str(t));
1770  		break;
1771  	case LOCK_AGGR_ADDR:
1772  		fprintf(lock_output, "%llx%s %s%s %s\n", (unsigned long long)st->addr, sep,
1773  			st->name, sep, get_type_name(st->flags));
1774  		break;
1775  	default:
1776  		break;
1777  	}
1778  
1779  	if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) {
1780  		struct map *kmap;
1781  		struct symbol *sym;
1782  		char buf[128];
1783  		u64 ip;
1784  
1785  		for (int i = 0; i < max_stack_depth; i++) {
1786  			if (!st->callstack || !st->callstack[i])
1787  				break;
1788  
1789  			ip = st->callstack[i];
1790  			sym = machine__find_kernel_symbol(con->machine, ip, &kmap);
1791  			get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf));
1792  			fprintf(lock_output, "%s %#lx %s", i ? ":" : sep, (unsigned long) ip, buf);
1793  		}
1794  		fprintf(lock_output, "\n");
1795  	}
1796  }
1797  
print_lock_stat(struct lock_contention * con,struct lock_stat * st)1798  static void print_lock_stat(struct lock_contention *con, struct lock_stat *st)
1799  {
1800  	if (symbol_conf.field_sep)
1801  		print_lock_stat_csv(con, st, symbol_conf.field_sep);
1802  	else
1803  		print_lock_stat_stdio(con, st);
1804  }
1805  
print_footer_stdio(int total,int bad,struct lock_contention_fails * fails)1806  static void print_footer_stdio(int total, int bad, struct lock_contention_fails *fails)
1807  {
1808  	/* Output for debug, this have to be removed */
1809  	int broken = fails->task + fails->stack + fails->time + fails->data;
1810  
1811  	if (!use_bpf)
1812  		print_bad_events(bad, total);
1813  
1814  	if (quiet || total == 0 || (broken == 0 && verbose <= 0))
1815  		return;
1816  
1817  	total += broken;
1818  	fprintf(lock_output, "\n=== output for debug ===\n\n");
1819  	fprintf(lock_output, "bad: %d, total: %d\n", broken, total);
1820  	fprintf(lock_output, "bad rate: %.2f %%\n", 100.0 * broken / total);
1821  
1822  	fprintf(lock_output, "histogram of failure reasons\n");
1823  	fprintf(lock_output, " %10s: %d\n", "task", fails->task);
1824  	fprintf(lock_output, " %10s: %d\n", "stack", fails->stack);
1825  	fprintf(lock_output, " %10s: %d\n", "time", fails->time);
1826  	fprintf(lock_output, " %10s: %d\n", "data", fails->data);
1827  }
1828  
print_footer_csv(int total,int bad,struct lock_contention_fails * fails,const char * sep)1829  static void print_footer_csv(int total, int bad, struct lock_contention_fails *fails,
1830  			     const char *sep)
1831  {
1832  	/* Output for debug, this have to be removed */
1833  	if (use_bpf)
1834  		bad = fails->task + fails->stack + fails->time + fails->data;
1835  
1836  	if (quiet || total == 0 || (bad == 0 && verbose <= 0))
1837  		return;
1838  
1839  	total += bad;
1840  	fprintf(lock_output, "# debug: total=%d%s bad=%d", total, sep, bad);
1841  
1842  	if (use_bpf) {
1843  		fprintf(lock_output, "%s bad_%s=%d", sep, "task", fails->task);
1844  		fprintf(lock_output, "%s bad_%s=%d", sep, "stack", fails->stack);
1845  		fprintf(lock_output, "%s bad_%s=%d", sep, "time", fails->time);
1846  		fprintf(lock_output, "%s bad_%s=%d", sep, "data", fails->data);
1847  	} else {
1848  		int i;
1849  		const char *name[4] = { "acquire", "acquired", "contended", "release" };
1850  
1851  		for (i = 0; i < BROKEN_MAX; i++)
1852  			fprintf(lock_output, "%s bad_%s=%d", sep, name[i], bad_hist[i]);
1853  	}
1854  	fprintf(lock_output, "\n");
1855  }
1856  
print_footer(int total,int bad,struct lock_contention_fails * fails)1857  static void print_footer(int total, int bad, struct lock_contention_fails *fails)
1858  {
1859  	if (symbol_conf.field_sep)
1860  		print_footer_csv(total, bad, fails, symbol_conf.field_sep);
1861  	else
1862  		print_footer_stdio(total, bad, fails);
1863  }
1864  
print_contention_result(struct lock_contention * con)1865  static void print_contention_result(struct lock_contention *con)
1866  {
1867  	struct lock_stat *st;
1868  	int bad, total, printed;
1869  
1870  	if (!quiet)
1871  		print_header();
1872  
1873  	bad = total = printed = 0;
1874  
1875  	while ((st = pop_from_result())) {
1876  		total += use_bpf ? st->nr_contended : 1;
1877  		if (st->broken)
1878  			bad++;
1879  
1880  		if (!st->wait_time_total)
1881  			continue;
1882  
1883  		print_lock_stat(con, st);
1884  
1885  		if (++printed >= print_nr_entries)
1886  			break;
1887  	}
1888  
1889  	if (print_nr_entries) {
1890  		/* update the total/bad stats */
1891  		while ((st = pop_from_result())) {
1892  			total += use_bpf ? st->nr_contended : 1;
1893  			if (st->broken)
1894  				bad++;
1895  		}
1896  	}
1897  	/* some entries are collected but hidden by the callstack filter */
1898  	total += con->nr_filtered;
1899  
1900  	print_footer(total, bad, &con->fails);
1901  }
1902  
1903  static bool force;
1904  
__cmd_report(bool display_info)1905  static int __cmd_report(bool display_info)
1906  {
1907  	int err = -EINVAL;
1908  	struct perf_tool eops = {
1909  		.attr		 = perf_event__process_attr,
1910  		.event_update	 = process_event_update,
1911  		.sample		 = process_sample_event,
1912  		.comm		 = perf_event__process_comm,
1913  		.mmap		 = perf_event__process_mmap,
1914  		.namespaces	 = perf_event__process_namespaces,
1915  		.tracing_data	 = perf_event__process_tracing_data,
1916  		.ordered_events	 = true,
1917  	};
1918  	struct perf_data data = {
1919  		.path  = input_name,
1920  		.mode  = PERF_DATA_MODE_READ,
1921  		.force = force,
1922  	};
1923  
1924  	session = perf_session__new(&data, &eops);
1925  	if (IS_ERR(session)) {
1926  		pr_err("Initializing perf session failed\n");
1927  		return PTR_ERR(session);
1928  	}
1929  
1930  	symbol_conf.allow_aliases = true;
1931  	symbol__init(&session->header.env);
1932  
1933  	if (!data.is_pipe) {
1934  		if (!perf_session__has_traces(session, "lock record"))
1935  			goto out_delete;
1936  
1937  		if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) {
1938  			pr_err("Initializing perf session tracepoint handlers failed\n");
1939  			goto out_delete;
1940  		}
1941  
1942  		if (perf_session__set_tracepoints_handlers(session, contention_tracepoints)) {
1943  			pr_err("Initializing perf session tracepoint handlers failed\n");
1944  			goto out_delete;
1945  		}
1946  	}
1947  
1948  	if (setup_output_field(false, output_fields))
1949  		goto out_delete;
1950  
1951  	if (select_key(false))
1952  		goto out_delete;
1953  
1954  	if (show_thread_stats)
1955  		aggr_mode = LOCK_AGGR_TASK;
1956  
1957  	err = perf_session__process_events(session);
1958  	if (err)
1959  		goto out_delete;
1960  
1961  	setup_pager();
1962  	if (display_info) /* used for info subcommand */
1963  		err = dump_info();
1964  	else {
1965  		combine_result();
1966  		sort_result();
1967  		print_result();
1968  	}
1969  
1970  out_delete:
1971  	perf_session__delete(session);
1972  	return err;
1973  }
1974  
sighandler(int sig __maybe_unused)1975  static void sighandler(int sig __maybe_unused)
1976  {
1977  }
1978  
check_lock_contention_options(const struct option * options,const char * const * usage)1979  static int check_lock_contention_options(const struct option *options,
1980  					 const char * const *usage)
1981  
1982  {
1983  	if (show_thread_stats && show_lock_addrs) {
1984  		pr_err("Cannot use thread and addr mode together\n");
1985  		parse_options_usage(usage, options, "threads", 0);
1986  		parse_options_usage(NULL, options, "lock-addr", 0);
1987  		return -1;
1988  	}
1989  
1990  	if (show_lock_owner && !use_bpf) {
1991  		pr_err("Lock owners are available only with BPF\n");
1992  		parse_options_usage(usage, options, "lock-owner", 0);
1993  		parse_options_usage(NULL, options, "use-bpf", 0);
1994  		return -1;
1995  	}
1996  
1997  	if (show_lock_owner && show_lock_addrs) {
1998  		pr_err("Cannot use owner and addr mode together\n");
1999  		parse_options_usage(usage, options, "lock-owner", 0);
2000  		parse_options_usage(NULL, options, "lock-addr", 0);
2001  		return -1;
2002  	}
2003  
2004  	if (symbol_conf.field_sep) {
2005  		if (strstr(symbol_conf.field_sep, ":") || /* part of type flags */
2006  		    strstr(symbol_conf.field_sep, "+") || /* part of caller offset */
2007  		    strstr(symbol_conf.field_sep, ".")) { /* can be in a symbol name */
2008  			pr_err("Cannot use the separator that is already used\n");
2009  			parse_options_usage(usage, options, "x", 1);
2010  			return -1;
2011  		}
2012  	}
2013  
2014  	if (show_lock_owner)
2015  		show_thread_stats = true;
2016  
2017  	return 0;
2018  }
2019  
__cmd_contention(int argc,const char ** argv)2020  static int __cmd_contention(int argc, const char **argv)
2021  {
2022  	int err = -EINVAL;
2023  	struct perf_tool eops = {
2024  		.attr		 = perf_event__process_attr,
2025  		.event_update	 = process_event_update,
2026  		.sample		 = process_sample_event,
2027  		.comm		 = perf_event__process_comm,
2028  		.mmap		 = perf_event__process_mmap,
2029  		.tracing_data	 = perf_event__process_tracing_data,
2030  		.ordered_events	 = true,
2031  	};
2032  	struct perf_data data = {
2033  		.path  = input_name,
2034  		.mode  = PERF_DATA_MODE_READ,
2035  		.force = force,
2036  	};
2037  	struct lock_contention con = {
2038  		.target = &target,
2039  		.map_nr_entries = bpf_map_entries,
2040  		.max_stack = max_stack_depth,
2041  		.stack_skip = stack_skip,
2042  		.filters = &filters,
2043  		.save_callstack = needs_callstack(),
2044  		.owner = show_lock_owner,
2045  	};
2046  
2047  	lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table));
2048  	if (!lockhash_table)
2049  		return -ENOMEM;
2050  
2051  	con.result = &lockhash_table[0];
2052  
2053  	session = perf_session__new(use_bpf ? NULL : &data, &eops);
2054  	if (IS_ERR(session)) {
2055  		pr_err("Initializing perf session failed\n");
2056  		err = PTR_ERR(session);
2057  		session = NULL;
2058  		goto out_delete;
2059  	}
2060  
2061  	con.machine = &session->machines.host;
2062  
2063  	con.aggr_mode = aggr_mode = show_thread_stats ? LOCK_AGGR_TASK :
2064  		show_lock_addrs ? LOCK_AGGR_ADDR : LOCK_AGGR_CALLER;
2065  
2066  	if (con.aggr_mode == LOCK_AGGR_CALLER)
2067  		con.save_callstack = true;
2068  
2069  	symbol_conf.allow_aliases = true;
2070  	symbol__init(&session->header.env);
2071  
2072  	if (use_bpf) {
2073  		err = target__validate(&target);
2074  		if (err) {
2075  			char errbuf[512];
2076  
2077  			target__strerror(&target, err, errbuf, 512);
2078  			pr_err("%s\n", errbuf);
2079  			goto out_delete;
2080  		}
2081  
2082  		signal(SIGINT, sighandler);
2083  		signal(SIGCHLD, sighandler);
2084  		signal(SIGTERM, sighandler);
2085  
2086  		con.evlist = evlist__new();
2087  		if (con.evlist == NULL) {
2088  			err = -ENOMEM;
2089  			goto out_delete;
2090  		}
2091  
2092  		err = evlist__create_maps(con.evlist, &target);
2093  		if (err < 0)
2094  			goto out_delete;
2095  
2096  		if (argc) {
2097  			err = evlist__prepare_workload(con.evlist, &target,
2098  						       argv, false, NULL);
2099  			if (err < 0)
2100  				goto out_delete;
2101  		}
2102  
2103  		if (lock_contention_prepare(&con) < 0) {
2104  			pr_err("lock contention BPF setup failed\n");
2105  			goto out_delete;
2106  		}
2107  	} else if (!data.is_pipe) {
2108  		if (!perf_session__has_traces(session, "lock record"))
2109  			goto out_delete;
2110  
2111  		if (!evlist__find_evsel_by_str(session->evlist,
2112  					       "lock:contention_begin")) {
2113  			pr_err("lock contention evsel not found\n");
2114  			goto out_delete;
2115  		}
2116  
2117  		if (perf_session__set_tracepoints_handlers(session,
2118  						contention_tracepoints)) {
2119  			pr_err("Initializing perf session tracepoint handlers failed\n");
2120  			goto out_delete;
2121  		}
2122  	}
2123  
2124  	if (setup_output_field(true, output_fields))
2125  		goto out_delete;
2126  
2127  	if (select_key(true))
2128  		goto out_delete;
2129  
2130  	if (symbol_conf.field_sep) {
2131  		int i;
2132  		struct lock_key *keys = contention_keys;
2133  
2134  		/* do not align output in CSV format */
2135  		for (i = 0; keys[i].name; i++)
2136  			keys[i].len = 0;
2137  	}
2138  
2139  	if (use_bpf) {
2140  		lock_contention_start();
2141  		if (argc)
2142  			evlist__start_workload(con.evlist);
2143  
2144  		/* wait for signal */
2145  		pause();
2146  
2147  		lock_contention_stop();
2148  		lock_contention_read(&con);
2149  	} else {
2150  		err = perf_session__process_events(session);
2151  		if (err)
2152  			goto out_delete;
2153  	}
2154  
2155  	setup_pager();
2156  
2157  	sort_contention_result();
2158  	print_contention_result(&con);
2159  
2160  out_delete:
2161  	lock_filter_finish();
2162  	evlist__delete(con.evlist);
2163  	lock_contention_finish();
2164  	perf_session__delete(session);
2165  	zfree(&lockhash_table);
2166  	return err;
2167  }
2168  
2169  
__cmd_record(int argc,const char ** argv)2170  static int __cmd_record(int argc, const char **argv)
2171  {
2172  	const char *record_args[] = {
2173  		"record", "-R", "-m", "1024", "-c", "1", "--synth", "task",
2174  	};
2175  	const char *callgraph_args[] = {
2176  		"--call-graph", "fp," __stringify(CONTENTION_STACK_DEPTH),
2177  	};
2178  	unsigned int rec_argc, i, j, ret;
2179  	unsigned int nr_tracepoints;
2180  	unsigned int nr_callgraph_args = 0;
2181  	const char **rec_argv;
2182  	bool has_lock_stat = true;
2183  
2184  	for (i = 0; i < ARRAY_SIZE(lock_tracepoints); i++) {
2185  		if (!is_valid_tracepoint(lock_tracepoints[i].name)) {
2186  			pr_debug("tracepoint %s is not enabled. "
2187  				 "Are CONFIG_LOCKDEP and CONFIG_LOCK_STAT enabled?\n",
2188  				 lock_tracepoints[i].name);
2189  			has_lock_stat = false;
2190  			break;
2191  		}
2192  	}
2193  
2194  	if (has_lock_stat)
2195  		goto setup_args;
2196  
2197  	for (i = 0; i < ARRAY_SIZE(contention_tracepoints); i++) {
2198  		if (!is_valid_tracepoint(contention_tracepoints[i].name)) {
2199  			pr_err("tracepoint %s is not enabled.\n",
2200  			       contention_tracepoints[i].name);
2201  			return 1;
2202  		}
2203  	}
2204  
2205  	nr_callgraph_args = ARRAY_SIZE(callgraph_args);
2206  
2207  setup_args:
2208  	rec_argc = ARRAY_SIZE(record_args) + nr_callgraph_args + argc - 1;
2209  
2210  	if (has_lock_stat)
2211  		nr_tracepoints = ARRAY_SIZE(lock_tracepoints);
2212  	else
2213  		nr_tracepoints = ARRAY_SIZE(contention_tracepoints);
2214  
2215  	/* factor of 2 is for -e in front of each tracepoint */
2216  	rec_argc += 2 * nr_tracepoints;
2217  
2218  	rec_argv = calloc(rec_argc + 1, sizeof(char *));
2219  	if (!rec_argv)
2220  		return -ENOMEM;
2221  
2222  	for (i = 0; i < ARRAY_SIZE(record_args); i++)
2223  		rec_argv[i] = strdup(record_args[i]);
2224  
2225  	for (j = 0; j < nr_tracepoints; j++) {
2226  		const char *ev_name;
2227  
2228  		if (has_lock_stat)
2229  			ev_name = strdup(lock_tracepoints[j].name);
2230  		else
2231  			ev_name = strdup(contention_tracepoints[j].name);
2232  
2233  		if (!ev_name)
2234  			return -ENOMEM;
2235  
2236  		rec_argv[i++] = "-e";
2237  		rec_argv[i++] = ev_name;
2238  	}
2239  
2240  	for (j = 0; j < nr_callgraph_args; j++, i++)
2241  		rec_argv[i] = callgraph_args[j];
2242  
2243  	for (j = 1; j < (unsigned int)argc; j++, i++)
2244  		rec_argv[i] = argv[j];
2245  
2246  	BUG_ON(i != rec_argc);
2247  
2248  	ret = cmd_record(i, rec_argv);
2249  	free(rec_argv);
2250  	return ret;
2251  }
2252  
parse_map_entry(const struct option * opt,const char * str,int unset __maybe_unused)2253  static int parse_map_entry(const struct option *opt, const char *str,
2254  			    int unset __maybe_unused)
2255  {
2256  	unsigned long *len = (unsigned long *)opt->value;
2257  	unsigned long val;
2258  	char *endptr;
2259  
2260  	errno = 0;
2261  	val = strtoul(str, &endptr, 0);
2262  	if (*endptr != '\0' || errno != 0) {
2263  		pr_err("invalid BPF map length: %s\n", str);
2264  		return -1;
2265  	}
2266  
2267  	*len = val;
2268  	return 0;
2269  }
2270  
parse_max_stack(const struct option * opt,const char * str,int unset __maybe_unused)2271  static int parse_max_stack(const struct option *opt, const char *str,
2272  			   int unset __maybe_unused)
2273  {
2274  	unsigned long *len = (unsigned long *)opt->value;
2275  	long val;
2276  	char *endptr;
2277  
2278  	errno = 0;
2279  	val = strtol(str, &endptr, 0);
2280  	if (*endptr != '\0' || errno != 0) {
2281  		pr_err("invalid max stack depth: %s\n", str);
2282  		return -1;
2283  	}
2284  
2285  	if (val < 0 || val > sysctl__max_stack()) {
2286  		pr_err("invalid max stack depth: %ld\n", val);
2287  		return -1;
2288  	}
2289  
2290  	*len = val;
2291  	return 0;
2292  }
2293  
add_lock_type(unsigned int flags)2294  static bool add_lock_type(unsigned int flags)
2295  {
2296  	unsigned int *tmp;
2297  
2298  	tmp = realloc(filters.types, (filters.nr_types + 1) * sizeof(*filters.types));
2299  	if (tmp == NULL)
2300  		return false;
2301  
2302  	tmp[filters.nr_types++] = flags;
2303  	filters.types = tmp;
2304  	return true;
2305  }
2306  
parse_lock_type(const struct option * opt __maybe_unused,const char * str,int unset __maybe_unused)2307  static int parse_lock_type(const struct option *opt __maybe_unused, const char *str,
2308  			   int unset __maybe_unused)
2309  {
2310  	char *s, *tmp, *tok;
2311  
2312  	s = strdup(str);
2313  	if (s == NULL)
2314  		return -1;
2315  
2316  	for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2317  		bool found = false;
2318  
2319  		/* `tok` is `str` in `lock_type_table` if it contains ':'. */
2320  		if (strchr(tok, ':')) {
2321  			for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
2322  				if (!strcmp(lock_type_table[i].str, tok) &&
2323  				    add_lock_type(lock_type_table[i].flags)) {
2324  					found = true;
2325  					break;
2326  				}
2327  			}
2328  
2329  			if (!found) {
2330  				pr_err("Unknown lock flags name: %s\n", tok);
2331  				free(s);
2332  				return -1;
2333  			}
2334  
2335  			continue;
2336  		}
2337  
2338  		/*
2339  		 * Otherwise `tok` is `name` in `lock_type_table`.
2340  		 * Single lock name could contain multiple flags.
2341  		 */
2342  		for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
2343  			if (!strcmp(lock_type_table[i].name, tok)) {
2344  				if (add_lock_type(lock_type_table[i].flags)) {
2345  					found = true;
2346  				} else {
2347  					free(s);
2348  					return -1;
2349  				}
2350  			}
2351  		}
2352  
2353  		if (!found) {
2354  			pr_err("Unknown lock name: %s\n", tok);
2355  			free(s);
2356  			return -1;
2357  		}
2358  
2359  	}
2360  
2361  	free(s);
2362  	return 0;
2363  }
2364  
add_lock_addr(unsigned long addr)2365  static bool add_lock_addr(unsigned long addr)
2366  {
2367  	unsigned long *tmp;
2368  
2369  	tmp = realloc(filters.addrs, (filters.nr_addrs + 1) * sizeof(*filters.addrs));
2370  	if (tmp == NULL) {
2371  		pr_err("Memory allocation failure\n");
2372  		return false;
2373  	}
2374  
2375  	tmp[filters.nr_addrs++] = addr;
2376  	filters.addrs = tmp;
2377  	return true;
2378  }
2379  
add_lock_sym(char * name)2380  static bool add_lock_sym(char *name)
2381  {
2382  	char **tmp;
2383  	char *sym = strdup(name);
2384  
2385  	if (sym == NULL) {
2386  		pr_err("Memory allocation failure\n");
2387  		return false;
2388  	}
2389  
2390  	tmp = realloc(filters.syms, (filters.nr_syms + 1) * sizeof(*filters.syms));
2391  	if (tmp == NULL) {
2392  		pr_err("Memory allocation failure\n");
2393  		free(sym);
2394  		return false;
2395  	}
2396  
2397  	tmp[filters.nr_syms++] = sym;
2398  	filters.syms = tmp;
2399  	return true;
2400  }
2401  
parse_lock_addr(const struct option * opt __maybe_unused,const char * str,int unset __maybe_unused)2402  static int parse_lock_addr(const struct option *opt __maybe_unused, const char *str,
2403  			   int unset __maybe_unused)
2404  {
2405  	char *s, *tmp, *tok;
2406  	int ret = 0;
2407  	u64 addr;
2408  
2409  	s = strdup(str);
2410  	if (s == NULL)
2411  		return -1;
2412  
2413  	for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2414  		char *end;
2415  
2416  		addr = strtoul(tok, &end, 16);
2417  		if (*end == '\0') {
2418  			if (!add_lock_addr(addr)) {
2419  				ret = -1;
2420  				break;
2421  			}
2422  			continue;
2423  		}
2424  
2425  		/*
2426  		 * At this moment, we don't have kernel symbols.  Save the symbols
2427  		 * in a separate list and resolve them to addresses later.
2428  		 */
2429  		if (!add_lock_sym(tok)) {
2430  			ret = -1;
2431  			break;
2432  		}
2433  	}
2434  
2435  	free(s);
2436  	return ret;
2437  }
2438  
parse_call_stack(const struct option * opt __maybe_unused,const char * str,int unset __maybe_unused)2439  static int parse_call_stack(const struct option *opt __maybe_unused, const char *str,
2440  			   int unset __maybe_unused)
2441  {
2442  	char *s, *tmp, *tok;
2443  	int ret = 0;
2444  
2445  	s = strdup(str);
2446  	if (s == NULL)
2447  		return -1;
2448  
2449  	for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2450  		struct callstack_filter *entry;
2451  
2452  		entry = malloc(sizeof(*entry) + strlen(tok) + 1);
2453  		if (entry == NULL) {
2454  			pr_err("Memory allocation failure\n");
2455  			return -1;
2456  		}
2457  
2458  		strcpy(entry->name, tok);
2459  		list_add_tail(&entry->list, &callstack_filters);
2460  	}
2461  
2462  	free(s);
2463  	return ret;
2464  }
2465  
parse_output(const struct option * opt __maybe_unused,const char * str,int unset __maybe_unused)2466  static int parse_output(const struct option *opt __maybe_unused, const char *str,
2467  			int unset __maybe_unused)
2468  {
2469  	const char **name = (const char **)opt->value;
2470  
2471  	if (str == NULL)
2472  		return -1;
2473  
2474  	lock_output = fopen(str, "w");
2475  	if (lock_output == NULL) {
2476  		pr_err("Cannot open %s\n", str);
2477  		return -1;
2478  	}
2479  
2480  	*name = str;
2481  	return 0;
2482  }
2483  
cmd_lock(int argc,const char ** argv)2484  int cmd_lock(int argc, const char **argv)
2485  {
2486  	const struct option lock_options[] = {
2487  	OPT_STRING('i', "input", &input_name, "file", "input file name"),
2488  	OPT_CALLBACK(0, "output", &output_name, "file", "output file name", parse_output),
2489  	OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"),
2490  	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"),
2491  	OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
2492  	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
2493  		   "file", "vmlinux pathname"),
2494  	OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
2495  		   "file", "kallsyms pathname"),
2496  	OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any warnings or messages"),
2497  	OPT_END()
2498  	};
2499  
2500  	const struct option info_options[] = {
2501  	OPT_BOOLEAN('t', "threads", &info_threads,
2502  		    "dump thread list in perf.data"),
2503  	OPT_BOOLEAN('m', "map", &info_map,
2504  		    "map of lock instances (address:name table)"),
2505  	OPT_PARENT(lock_options)
2506  	};
2507  
2508  	const struct option report_options[] = {
2509  	OPT_STRING('k', "key", &sort_key, "acquired",
2510  		    "key for sorting (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"),
2511  	OPT_STRING('F', "field", &output_fields, NULL,
2512  		    "output fields (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"),
2513  	/* TODO: type */
2514  	OPT_BOOLEAN('c', "combine-locks", &combine_locks,
2515  		    "combine locks in the same class"),
2516  	OPT_BOOLEAN('t', "threads", &show_thread_stats,
2517  		    "show per-thread lock stats"),
2518  	OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"),
2519  	OPT_PARENT(lock_options)
2520  	};
2521  
2522  	struct option contention_options[] = {
2523  	OPT_STRING('k', "key", &sort_key, "wait_total",
2524  		    "key for sorting (contended / wait_total / wait_max / wait_min / avg_wait)"),
2525  	OPT_STRING('F', "field", &output_fields, "contended,wait_total,wait_max,avg_wait",
2526  		    "output fields (contended / wait_total / wait_max / wait_min / avg_wait)"),
2527  	OPT_BOOLEAN('t', "threads", &show_thread_stats,
2528  		    "show per-thread lock stats"),
2529  	OPT_BOOLEAN('b', "use-bpf", &use_bpf, "use BPF program to collect lock contention stats"),
2530  	OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
2531  		    "System-wide collection from all CPUs"),
2532  	OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
2533  		    "List of cpus to monitor"),
2534  	OPT_STRING('p', "pid", &target.pid, "pid",
2535  		   "Trace on existing process id"),
2536  	OPT_STRING(0, "tid", &target.tid, "tid",
2537  		   "Trace on existing thread id (exclusive to --pid)"),
2538  	OPT_CALLBACK('M', "map-nr-entries", &bpf_map_entries, "num",
2539  		     "Max number of BPF map entries", parse_map_entry),
2540  	OPT_CALLBACK(0, "max-stack", &max_stack_depth, "num",
2541  		     "Set the maximum stack depth when collecting lock contention, "
2542  		     "Default: " __stringify(CONTENTION_STACK_DEPTH), parse_max_stack),
2543  	OPT_INTEGER(0, "stack-skip", &stack_skip,
2544  		    "Set the number of stack depth to skip when finding a lock caller, "
2545  		    "Default: " __stringify(CONTENTION_STACK_SKIP)),
2546  	OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"),
2547  	OPT_BOOLEAN('l', "lock-addr", &show_lock_addrs, "show lock stats by address"),
2548  	OPT_CALLBACK('Y', "type-filter", NULL, "FLAGS",
2549  		     "Filter specific type of locks", parse_lock_type),
2550  	OPT_CALLBACK('L', "lock-filter", NULL, "ADDRS/NAMES",
2551  		     "Filter specific address/symbol of locks", parse_lock_addr),
2552  	OPT_CALLBACK('S', "callstack-filter", NULL, "NAMES",
2553  		     "Filter specific function in the callstack", parse_call_stack),
2554  	OPT_BOOLEAN('o', "lock-owner", &show_lock_owner, "show lock owners instead of waiters"),
2555  	OPT_STRING_NOEMPTY('x', "field-separator", &symbol_conf.field_sep, "separator",
2556  		   "print result in CSV format with custom separator"),
2557  	OPT_PARENT(lock_options)
2558  	};
2559  
2560  	const char * const info_usage[] = {
2561  		"perf lock info [<options>]",
2562  		NULL
2563  	};
2564  	const char *const lock_subcommands[] = { "record", "report", "script",
2565  						 "info", "contention", NULL };
2566  	const char *lock_usage[] = {
2567  		NULL,
2568  		NULL
2569  	};
2570  	const char * const report_usage[] = {
2571  		"perf lock report [<options>]",
2572  		NULL
2573  	};
2574  	const char * const contention_usage[] = {
2575  		"perf lock contention [<options>]",
2576  		NULL
2577  	};
2578  	unsigned int i;
2579  	int rc = 0;
2580  
2581  	lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table));
2582  	if (!lockhash_table)
2583  		return -ENOMEM;
2584  
2585  	for (i = 0; i < LOCKHASH_SIZE; i++)
2586  		INIT_HLIST_HEAD(lockhash_table + i);
2587  
2588  	lock_output = stderr;
2589  	argc = parse_options_subcommand(argc, argv, lock_options, lock_subcommands,
2590  					lock_usage, PARSE_OPT_STOP_AT_NON_OPTION);
2591  	if (!argc)
2592  		usage_with_options(lock_usage, lock_options);
2593  
2594  	if (strlen(argv[0]) > 2 && strstarts("record", argv[0])) {
2595  		return __cmd_record(argc, argv);
2596  	} else if (strlen(argv[0]) > 2 && strstarts("report", argv[0])) {
2597  		trace_handler = &report_lock_ops;
2598  		if (argc) {
2599  			argc = parse_options(argc, argv,
2600  					     report_options, report_usage, 0);
2601  			if (argc)
2602  				usage_with_options(report_usage, report_options);
2603  		}
2604  		rc = __cmd_report(false);
2605  	} else if (!strcmp(argv[0], "script")) {
2606  		/* Aliased to 'perf script' */
2607  		rc = cmd_script(argc, argv);
2608  	} else if (!strcmp(argv[0], "info")) {
2609  		if (argc) {
2610  			argc = parse_options(argc, argv,
2611  					     info_options, info_usage, 0);
2612  			if (argc)
2613  				usage_with_options(info_usage, info_options);
2614  		}
2615  		/* recycling report_lock_ops */
2616  		trace_handler = &report_lock_ops;
2617  		rc = __cmd_report(true);
2618  	} else if (strlen(argv[0]) > 2 && strstarts("contention", argv[0])) {
2619  		trace_handler = &contention_lock_ops;
2620  		sort_key = "wait_total";
2621  		output_fields = "contended,wait_total,wait_max,avg_wait";
2622  
2623  #ifndef HAVE_BPF_SKEL
2624  		set_option_nobuild(contention_options, 'b', "use-bpf",
2625  				   "no BUILD_BPF_SKEL=1", false);
2626  #endif
2627  		if (argc) {
2628  			argc = parse_options(argc, argv, contention_options,
2629  					     contention_usage, 0);
2630  		}
2631  
2632  		if (check_lock_contention_options(contention_options,
2633  						  contention_usage) < 0)
2634  			return -1;
2635  
2636  		rc = __cmd_contention(argc, argv);
2637  	} else {
2638  		usage_with_options(lock_usage, lock_options);
2639  	}
2640  
2641  	/* free usage string allocated by parse_options_subcommand */
2642  	free((void *)lock_usage[0]);
2643  
2644  	zfree(&lockhash_table);
2645  	return rc;
2646  }
2647