1 // SPDX-License-Identifier: GPL-2.0 2 #include <errno.h> 3 #include <inttypes.h> 4 #include "builtin.h" 5 #include "perf.h" 6 7 #include "util/evlist.h" // for struct evsel_str_handler 8 #include "util/evsel.h" 9 #include "util/symbol.h" 10 #include "util/thread.h" 11 #include "util/header.h" 12 #include "util/target.h" 13 #include "util/callchain.h" 14 #include "util/lock-contention.h" 15 16 #include <subcmd/pager.h> 17 #include <subcmd/parse-options.h> 18 #include "util/trace-event.h" 19 #include "util/tracepoint.h" 20 21 #include "util/debug.h" 22 #include "util/session.h" 23 #include "util/tool.h" 24 #include "util/data.h" 25 #include "util/string2.h" 26 #include "util/map.h" 27 28 #include <sys/types.h> 29 #include <sys/prctl.h> 30 #include <semaphore.h> 31 #include <pthread.h> 32 #include <math.h> 33 #include <limits.h> 34 35 #include <linux/list.h> 36 #include <linux/hash.h> 37 #include <linux/kernel.h> 38 #include <linux/zalloc.h> 39 #include <linux/err.h> 40 #include <linux/stringify.h> 41 42 static struct perf_session *session; 43 static struct target target; 44 45 /* based on kernel/lockdep.c */ 46 #define LOCKHASH_BITS 12 47 #define LOCKHASH_SIZE (1UL << LOCKHASH_BITS) 48 49 static struct hlist_head lockhash_table[LOCKHASH_SIZE]; 50 51 #define __lockhashfn(key) hash_long((unsigned long)key, LOCKHASH_BITS) 52 #define lockhashentry(key) (lockhash_table + __lockhashfn((key))) 53 54 static struct rb_root thread_stats; 55 56 static bool combine_locks; 57 static bool show_thread_stats; 58 static bool use_bpf; 59 static unsigned long bpf_map_entries = 10240; 60 61 static enum { 62 LOCK_AGGR_ADDR, 63 LOCK_AGGR_TASK, 64 LOCK_AGGR_CALLER, 65 } aggr_mode = LOCK_AGGR_ADDR; 66 67 static u64 sched_text_start; 68 static u64 sched_text_end; 69 static u64 lock_text_start; 70 static u64 lock_text_end; 71 72 static struct thread_stat *thread_stat_find(u32 tid) 73 { 74 struct rb_node *node; 75 struct thread_stat *st; 76 77 node = thread_stats.rb_node; 78 while (node) { 79 st = container_of(node, struct thread_stat, rb); 80 if (st->tid == tid) 81 return st; 82 else if (tid < st->tid) 83 node = node->rb_left; 84 else 85 node = node->rb_right; 86 } 87 88 return NULL; 89 } 90 91 static void thread_stat_insert(struct thread_stat *new) 92 { 93 struct rb_node **rb = &thread_stats.rb_node; 94 struct rb_node *parent = NULL; 95 struct thread_stat *p; 96 97 while (*rb) { 98 p = container_of(*rb, struct thread_stat, rb); 99 parent = *rb; 100 101 if (new->tid < p->tid) 102 rb = &(*rb)->rb_left; 103 else if (new->tid > p->tid) 104 rb = &(*rb)->rb_right; 105 else 106 BUG_ON("inserting invalid thread_stat\n"); 107 } 108 109 rb_link_node(&new->rb, parent, rb); 110 rb_insert_color(&new->rb, &thread_stats); 111 } 112 113 static struct thread_stat *thread_stat_findnew_after_first(u32 tid) 114 { 115 struct thread_stat *st; 116 117 st = thread_stat_find(tid); 118 if (st) 119 return st; 120 121 st = zalloc(sizeof(struct thread_stat)); 122 if (!st) { 123 pr_err("memory allocation failed\n"); 124 return NULL; 125 } 126 127 st->tid = tid; 128 INIT_LIST_HEAD(&st->seq_list); 129 130 thread_stat_insert(st); 131 132 return st; 133 } 134 135 static struct thread_stat *thread_stat_findnew_first(u32 tid); 136 static struct thread_stat *(*thread_stat_findnew)(u32 tid) = 137 thread_stat_findnew_first; 138 139 static struct thread_stat *thread_stat_findnew_first(u32 tid) 140 { 141 struct thread_stat *st; 142 143 st = zalloc(sizeof(struct thread_stat)); 144 if (!st) { 145 pr_err("memory allocation failed\n"); 146 return NULL; 147 } 148 st->tid = tid; 149 INIT_LIST_HEAD(&st->seq_list); 150 151 rb_link_node(&st->rb, NULL, &thread_stats.rb_node); 152 rb_insert_color(&st->rb, &thread_stats); 153 154 thread_stat_findnew = thread_stat_findnew_after_first; 155 return st; 156 } 157 158 /* build simple key function one is bigger than two */ 159 #define SINGLE_KEY(member) \ 160 static int lock_stat_key_ ## member(struct lock_stat *one, \ 161 struct lock_stat *two) \ 162 { \ 163 return one->member > two->member; \ 164 } 165 166 SINGLE_KEY(nr_acquired) 167 SINGLE_KEY(nr_contended) 168 SINGLE_KEY(avg_wait_time) 169 SINGLE_KEY(wait_time_total) 170 SINGLE_KEY(wait_time_max) 171 172 static int lock_stat_key_wait_time_min(struct lock_stat *one, 173 struct lock_stat *two) 174 { 175 u64 s1 = one->wait_time_min; 176 u64 s2 = two->wait_time_min; 177 if (s1 == ULLONG_MAX) 178 s1 = 0; 179 if (s2 == ULLONG_MAX) 180 s2 = 0; 181 return s1 > s2; 182 } 183 184 struct lock_key { 185 /* 186 * name: the value for specify by user 187 * this should be simpler than raw name of member 188 * e.g. nr_acquired -> acquired, wait_time_total -> wait_total 189 */ 190 const char *name; 191 /* header: the string printed on the header line */ 192 const char *header; 193 /* len: the printing width of the field */ 194 int len; 195 /* key: a pointer to function to compare two lock stats for sorting */ 196 int (*key)(struct lock_stat*, struct lock_stat*); 197 /* print: a pointer to function to print a given lock stats */ 198 void (*print)(struct lock_key*, struct lock_stat*); 199 /* list: list entry to link this */ 200 struct list_head list; 201 }; 202 203 static void lock_stat_key_print_time(unsigned long long nsec, int len) 204 { 205 static const struct { 206 float base; 207 const char *unit; 208 } table[] = { 209 { 1e9 * 3600, "h " }, 210 { 1e9 * 60, "m " }, 211 { 1e9, "s " }, 212 { 1e6, "ms" }, 213 { 1e3, "us" }, 214 { 0, NULL }, 215 }; 216 217 for (int i = 0; table[i].unit; i++) { 218 if (nsec < table[i].base) 219 continue; 220 221 pr_info("%*.2f %s", len - 3, nsec / table[i].base, table[i].unit); 222 return; 223 } 224 225 pr_info("%*llu %s", len - 3, nsec, "ns"); 226 } 227 228 #define PRINT_KEY(member) \ 229 static void lock_stat_key_print_ ## member(struct lock_key *key, \ 230 struct lock_stat *ls) \ 231 { \ 232 pr_info("%*llu", key->len, (unsigned long long)ls->member); \ 233 } 234 235 #define PRINT_TIME(member) \ 236 static void lock_stat_key_print_ ## member(struct lock_key *key, \ 237 struct lock_stat *ls) \ 238 { \ 239 lock_stat_key_print_time((unsigned long long)ls->member, key->len); \ 240 } 241 242 PRINT_KEY(nr_acquired) 243 PRINT_KEY(nr_contended) 244 PRINT_TIME(avg_wait_time) 245 PRINT_TIME(wait_time_total) 246 PRINT_TIME(wait_time_max) 247 248 static void lock_stat_key_print_wait_time_min(struct lock_key *key, 249 struct lock_stat *ls) 250 { 251 u64 wait_time = ls->wait_time_min; 252 253 if (wait_time == ULLONG_MAX) 254 wait_time = 0; 255 256 lock_stat_key_print_time(wait_time, key->len); 257 } 258 259 260 static const char *sort_key = "acquired"; 261 262 static int (*compare)(struct lock_stat *, struct lock_stat *); 263 264 static struct rb_root sorted; /* place to store intermediate data */ 265 static struct rb_root result; /* place to store sorted data */ 266 267 static LIST_HEAD(lock_keys); 268 static const char *output_fields; 269 270 #define DEF_KEY_LOCK(name, header, fn_suffix, len) \ 271 { #name, header, len, lock_stat_key_ ## fn_suffix, lock_stat_key_print_ ## fn_suffix, {} } 272 static struct lock_key report_keys[] = { 273 DEF_KEY_LOCK(acquired, "acquired", nr_acquired, 10), 274 DEF_KEY_LOCK(contended, "contended", nr_contended, 10), 275 DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12), 276 DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12), 277 DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12), 278 DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12), 279 280 /* extra comparisons much complicated should be here */ 281 { } 282 }; 283 284 static struct lock_key contention_keys[] = { 285 DEF_KEY_LOCK(contended, "contended", nr_contended, 10), 286 DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12), 287 DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12), 288 DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12), 289 DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12), 290 291 /* extra comparisons much complicated should be here */ 292 { } 293 }; 294 295 static int select_key(bool contention) 296 { 297 int i; 298 struct lock_key *keys = report_keys; 299 300 if (contention) 301 keys = contention_keys; 302 303 for (i = 0; keys[i].name; i++) { 304 if (!strcmp(keys[i].name, sort_key)) { 305 compare = keys[i].key; 306 307 /* selected key should be in the output fields */ 308 if (list_empty(&keys[i].list)) 309 list_add_tail(&keys[i].list, &lock_keys); 310 311 return 0; 312 } 313 } 314 315 pr_err("Unknown compare key: %s\n", sort_key); 316 return -1; 317 } 318 319 static int add_output_field(bool contention, char *name) 320 { 321 int i; 322 struct lock_key *keys = report_keys; 323 324 if (contention) 325 keys = contention_keys; 326 327 for (i = 0; keys[i].name; i++) { 328 if (strcmp(keys[i].name, name)) 329 continue; 330 331 /* prevent double link */ 332 if (list_empty(&keys[i].list)) 333 list_add_tail(&keys[i].list, &lock_keys); 334 335 return 0; 336 } 337 338 pr_err("Unknown output field: %s\n", name); 339 return -1; 340 } 341 342 static int setup_output_field(bool contention, const char *str) 343 { 344 char *tok, *tmp, *orig; 345 int i, ret = 0; 346 struct lock_key *keys = report_keys; 347 348 if (contention) 349 keys = contention_keys; 350 351 /* no output field given: use all of them */ 352 if (str == NULL) { 353 for (i = 0; keys[i].name; i++) 354 list_add_tail(&keys[i].list, &lock_keys); 355 return 0; 356 } 357 358 for (i = 0; keys[i].name; i++) 359 INIT_LIST_HEAD(&keys[i].list); 360 361 orig = tmp = strdup(str); 362 if (orig == NULL) 363 return -ENOMEM; 364 365 while ((tok = strsep(&tmp, ",")) != NULL){ 366 ret = add_output_field(contention, tok); 367 if (ret < 0) 368 break; 369 } 370 free(orig); 371 372 return ret; 373 } 374 375 static void combine_lock_stats(struct lock_stat *st) 376 { 377 struct rb_node **rb = &sorted.rb_node; 378 struct rb_node *parent = NULL; 379 struct lock_stat *p; 380 int ret; 381 382 while (*rb) { 383 p = container_of(*rb, struct lock_stat, rb); 384 parent = *rb; 385 386 if (st->name && p->name) 387 ret = strcmp(st->name, p->name); 388 else 389 ret = !!st->name - !!p->name; 390 391 if (ret == 0) { 392 p->nr_acquired += st->nr_acquired; 393 p->nr_contended += st->nr_contended; 394 p->wait_time_total += st->wait_time_total; 395 396 if (p->nr_contended) 397 p->avg_wait_time = p->wait_time_total / p->nr_contended; 398 399 if (p->wait_time_min > st->wait_time_min) 400 p->wait_time_min = st->wait_time_min; 401 if (p->wait_time_max < st->wait_time_max) 402 p->wait_time_max = st->wait_time_max; 403 404 p->broken |= st->broken; 405 st->combined = 1; 406 return; 407 } 408 409 if (ret < 0) 410 rb = &(*rb)->rb_left; 411 else 412 rb = &(*rb)->rb_right; 413 } 414 415 rb_link_node(&st->rb, parent, rb); 416 rb_insert_color(&st->rb, &sorted); 417 } 418 419 static void insert_to_result(struct lock_stat *st, 420 int (*bigger)(struct lock_stat *, struct lock_stat *)) 421 { 422 struct rb_node **rb = &result.rb_node; 423 struct rb_node *parent = NULL; 424 struct lock_stat *p; 425 426 if (combine_locks && st->combined) 427 return; 428 429 while (*rb) { 430 p = container_of(*rb, struct lock_stat, rb); 431 parent = *rb; 432 433 if (bigger(st, p)) 434 rb = &(*rb)->rb_left; 435 else 436 rb = &(*rb)->rb_right; 437 } 438 439 rb_link_node(&st->rb, parent, rb); 440 rb_insert_color(&st->rb, &result); 441 } 442 443 /* returns left most element of result, and erase it */ 444 static struct lock_stat *pop_from_result(void) 445 { 446 struct rb_node *node = result.rb_node; 447 448 if (!node) 449 return NULL; 450 451 while (node->rb_left) 452 node = node->rb_left; 453 454 rb_erase(node, &result); 455 return container_of(node, struct lock_stat, rb); 456 } 457 458 static struct lock_stat *lock_stat_find(u64 addr) 459 { 460 struct hlist_head *entry = lockhashentry(addr); 461 struct lock_stat *ret; 462 463 hlist_for_each_entry(ret, entry, hash_entry) { 464 if (ret->addr == addr) 465 return ret; 466 } 467 return NULL; 468 } 469 470 static struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags) 471 { 472 struct hlist_head *entry = lockhashentry(addr); 473 struct lock_stat *ret, *new; 474 475 hlist_for_each_entry(ret, entry, hash_entry) { 476 if (ret->addr == addr) 477 return ret; 478 } 479 480 new = zalloc(sizeof(struct lock_stat)); 481 if (!new) 482 goto alloc_failed; 483 484 new->addr = addr; 485 new->name = strdup(name); 486 if (!new->name) { 487 free(new); 488 goto alloc_failed; 489 } 490 491 new->flags = flags; 492 new->wait_time_min = ULLONG_MAX; 493 494 hlist_add_head(&new->hash_entry, entry); 495 return new; 496 497 alloc_failed: 498 pr_err("memory allocation failed\n"); 499 return NULL; 500 } 501 502 struct trace_lock_handler { 503 /* it's used on CONFIG_LOCKDEP */ 504 int (*acquire_event)(struct evsel *evsel, 505 struct perf_sample *sample); 506 507 /* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */ 508 int (*acquired_event)(struct evsel *evsel, 509 struct perf_sample *sample); 510 511 /* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */ 512 int (*contended_event)(struct evsel *evsel, 513 struct perf_sample *sample); 514 515 /* it's used on CONFIG_LOCKDEP */ 516 int (*release_event)(struct evsel *evsel, 517 struct perf_sample *sample); 518 519 /* it's used when CONFIG_LOCKDEP is off */ 520 int (*contention_begin_event)(struct evsel *evsel, 521 struct perf_sample *sample); 522 523 /* it's used when CONFIG_LOCKDEP is off */ 524 int (*contention_end_event)(struct evsel *evsel, 525 struct perf_sample *sample); 526 }; 527 528 static struct lock_seq_stat *get_seq(struct thread_stat *ts, u64 addr) 529 { 530 struct lock_seq_stat *seq; 531 532 list_for_each_entry(seq, &ts->seq_list, list) { 533 if (seq->addr == addr) 534 return seq; 535 } 536 537 seq = zalloc(sizeof(struct lock_seq_stat)); 538 if (!seq) { 539 pr_err("memory allocation failed\n"); 540 return NULL; 541 } 542 seq->state = SEQ_STATE_UNINITIALIZED; 543 seq->addr = addr; 544 545 list_add(&seq->list, &ts->seq_list); 546 return seq; 547 } 548 549 enum broken_state { 550 BROKEN_ACQUIRE, 551 BROKEN_ACQUIRED, 552 BROKEN_CONTENDED, 553 BROKEN_RELEASE, 554 BROKEN_MAX, 555 }; 556 557 static int bad_hist[BROKEN_MAX]; 558 559 enum acquire_flags { 560 TRY_LOCK = 1, 561 READ_LOCK = 2, 562 }; 563 564 static int report_lock_acquire_event(struct evsel *evsel, 565 struct perf_sample *sample) 566 { 567 struct lock_stat *ls; 568 struct thread_stat *ts; 569 struct lock_seq_stat *seq; 570 const char *name = evsel__strval(evsel, sample, "name"); 571 u64 addr = evsel__intval(evsel, sample, "lockdep_addr"); 572 int flag = evsel__intval(evsel, sample, "flags"); 573 u64 key; 574 575 switch (aggr_mode) { 576 case LOCK_AGGR_ADDR: 577 key = addr; 578 break; 579 case LOCK_AGGR_TASK: 580 key = sample->tid; 581 break; 582 case LOCK_AGGR_CALLER: 583 default: 584 pr_err("Invalid aggregation mode: %d\n", aggr_mode); 585 return -EINVAL; 586 } 587 588 ls = lock_stat_findnew(key, name, 0); 589 if (!ls) 590 return -ENOMEM; 591 592 ts = thread_stat_findnew(sample->tid); 593 if (!ts) 594 return -ENOMEM; 595 596 seq = get_seq(ts, addr); 597 if (!seq) 598 return -ENOMEM; 599 600 switch (seq->state) { 601 case SEQ_STATE_UNINITIALIZED: 602 case SEQ_STATE_RELEASED: 603 if (!flag) { 604 seq->state = SEQ_STATE_ACQUIRING; 605 } else { 606 if (flag & TRY_LOCK) 607 ls->nr_trylock++; 608 if (flag & READ_LOCK) 609 ls->nr_readlock++; 610 seq->state = SEQ_STATE_READ_ACQUIRED; 611 seq->read_count = 1; 612 ls->nr_acquired++; 613 } 614 break; 615 case SEQ_STATE_READ_ACQUIRED: 616 if (flag & READ_LOCK) { 617 seq->read_count++; 618 ls->nr_acquired++; 619 goto end; 620 } else { 621 goto broken; 622 } 623 break; 624 case SEQ_STATE_ACQUIRED: 625 case SEQ_STATE_ACQUIRING: 626 case SEQ_STATE_CONTENDED: 627 broken: 628 /* broken lock sequence */ 629 if (!ls->broken) { 630 ls->broken = 1; 631 bad_hist[BROKEN_ACQUIRE]++; 632 } 633 list_del_init(&seq->list); 634 free(seq); 635 goto end; 636 default: 637 BUG_ON("Unknown state of lock sequence found!\n"); 638 break; 639 } 640 641 ls->nr_acquire++; 642 seq->prev_event_time = sample->time; 643 end: 644 return 0; 645 } 646 647 static int report_lock_acquired_event(struct evsel *evsel, 648 struct perf_sample *sample) 649 { 650 struct lock_stat *ls; 651 struct thread_stat *ts; 652 struct lock_seq_stat *seq; 653 u64 contended_term; 654 const char *name = evsel__strval(evsel, sample, "name"); 655 u64 addr = evsel__intval(evsel, sample, "lockdep_addr"); 656 u64 key; 657 658 switch (aggr_mode) { 659 case LOCK_AGGR_ADDR: 660 key = addr; 661 break; 662 case LOCK_AGGR_TASK: 663 key = sample->tid; 664 break; 665 case LOCK_AGGR_CALLER: 666 default: 667 pr_err("Invalid aggregation mode: %d\n", aggr_mode); 668 return -EINVAL; 669 } 670 671 ls = lock_stat_findnew(key, name, 0); 672 if (!ls) 673 return -ENOMEM; 674 675 ts = thread_stat_findnew(sample->tid); 676 if (!ts) 677 return -ENOMEM; 678 679 seq = get_seq(ts, addr); 680 if (!seq) 681 return -ENOMEM; 682 683 switch (seq->state) { 684 case SEQ_STATE_UNINITIALIZED: 685 /* orphan event, do nothing */ 686 return 0; 687 case SEQ_STATE_ACQUIRING: 688 break; 689 case SEQ_STATE_CONTENDED: 690 contended_term = sample->time - seq->prev_event_time; 691 ls->wait_time_total += contended_term; 692 if (contended_term < ls->wait_time_min) 693 ls->wait_time_min = contended_term; 694 if (ls->wait_time_max < contended_term) 695 ls->wait_time_max = contended_term; 696 break; 697 case SEQ_STATE_RELEASED: 698 case SEQ_STATE_ACQUIRED: 699 case SEQ_STATE_READ_ACQUIRED: 700 /* broken lock sequence */ 701 if (!ls->broken) { 702 ls->broken = 1; 703 bad_hist[BROKEN_ACQUIRED]++; 704 } 705 list_del_init(&seq->list); 706 free(seq); 707 goto end; 708 default: 709 BUG_ON("Unknown state of lock sequence found!\n"); 710 break; 711 } 712 713 seq->state = SEQ_STATE_ACQUIRED; 714 ls->nr_acquired++; 715 ls->avg_wait_time = ls->nr_contended ? ls->wait_time_total/ls->nr_contended : 0; 716 seq->prev_event_time = sample->time; 717 end: 718 return 0; 719 } 720 721 static int report_lock_contended_event(struct evsel *evsel, 722 struct perf_sample *sample) 723 { 724 struct lock_stat *ls; 725 struct thread_stat *ts; 726 struct lock_seq_stat *seq; 727 const char *name = evsel__strval(evsel, sample, "name"); 728 u64 addr = evsel__intval(evsel, sample, "lockdep_addr"); 729 u64 key; 730 731 switch (aggr_mode) { 732 case LOCK_AGGR_ADDR: 733 key = addr; 734 break; 735 case LOCK_AGGR_TASK: 736 key = sample->tid; 737 break; 738 case LOCK_AGGR_CALLER: 739 default: 740 pr_err("Invalid aggregation mode: %d\n", aggr_mode); 741 return -EINVAL; 742 } 743 744 ls = lock_stat_findnew(key, name, 0); 745 if (!ls) 746 return -ENOMEM; 747 748 ts = thread_stat_findnew(sample->tid); 749 if (!ts) 750 return -ENOMEM; 751 752 seq = get_seq(ts, addr); 753 if (!seq) 754 return -ENOMEM; 755 756 switch (seq->state) { 757 case SEQ_STATE_UNINITIALIZED: 758 /* orphan event, do nothing */ 759 return 0; 760 case SEQ_STATE_ACQUIRING: 761 break; 762 case SEQ_STATE_RELEASED: 763 case SEQ_STATE_ACQUIRED: 764 case SEQ_STATE_READ_ACQUIRED: 765 case SEQ_STATE_CONTENDED: 766 /* broken lock sequence */ 767 if (!ls->broken) { 768 ls->broken = 1; 769 bad_hist[BROKEN_CONTENDED]++; 770 } 771 list_del_init(&seq->list); 772 free(seq); 773 goto end; 774 default: 775 BUG_ON("Unknown state of lock sequence found!\n"); 776 break; 777 } 778 779 seq->state = SEQ_STATE_CONTENDED; 780 ls->nr_contended++; 781 ls->avg_wait_time = ls->wait_time_total/ls->nr_contended; 782 seq->prev_event_time = sample->time; 783 end: 784 return 0; 785 } 786 787 static int report_lock_release_event(struct evsel *evsel, 788 struct perf_sample *sample) 789 { 790 struct lock_stat *ls; 791 struct thread_stat *ts; 792 struct lock_seq_stat *seq; 793 const char *name = evsel__strval(evsel, sample, "name"); 794 u64 addr = evsel__intval(evsel, sample, "lockdep_addr"); 795 u64 key; 796 797 switch (aggr_mode) { 798 case LOCK_AGGR_ADDR: 799 key = addr; 800 break; 801 case LOCK_AGGR_TASK: 802 key = sample->tid; 803 break; 804 case LOCK_AGGR_CALLER: 805 default: 806 pr_err("Invalid aggregation mode: %d\n", aggr_mode); 807 return -EINVAL; 808 } 809 810 ls = lock_stat_findnew(key, name, 0); 811 if (!ls) 812 return -ENOMEM; 813 814 ts = thread_stat_findnew(sample->tid); 815 if (!ts) 816 return -ENOMEM; 817 818 seq = get_seq(ts, addr); 819 if (!seq) 820 return -ENOMEM; 821 822 switch (seq->state) { 823 case SEQ_STATE_UNINITIALIZED: 824 goto end; 825 case SEQ_STATE_ACQUIRED: 826 break; 827 case SEQ_STATE_READ_ACQUIRED: 828 seq->read_count--; 829 BUG_ON(seq->read_count < 0); 830 if (seq->read_count) { 831 ls->nr_release++; 832 goto end; 833 } 834 break; 835 case SEQ_STATE_ACQUIRING: 836 case SEQ_STATE_CONTENDED: 837 case SEQ_STATE_RELEASED: 838 /* broken lock sequence */ 839 if (!ls->broken) { 840 ls->broken = 1; 841 bad_hist[BROKEN_RELEASE]++; 842 } 843 goto free_seq; 844 default: 845 BUG_ON("Unknown state of lock sequence found!\n"); 846 break; 847 } 848 849 ls->nr_release++; 850 free_seq: 851 list_del_init(&seq->list); 852 free(seq); 853 end: 854 return 0; 855 } 856 857 bool is_lock_function(struct machine *machine, u64 addr) 858 { 859 if (!sched_text_start) { 860 struct map *kmap; 861 struct symbol *sym; 862 863 sym = machine__find_kernel_symbol_by_name(machine, 864 "__sched_text_start", 865 &kmap); 866 if (!sym) { 867 /* to avoid retry */ 868 sched_text_start = 1; 869 return false; 870 } 871 872 sched_text_start = kmap->unmap_ip(kmap, sym->start); 873 874 /* should not fail from here */ 875 sym = machine__find_kernel_symbol_by_name(machine, 876 "__sched_text_end", 877 &kmap); 878 sched_text_end = kmap->unmap_ip(kmap, sym->start); 879 880 sym = machine__find_kernel_symbol_by_name(machine, 881 "__lock_text_start", 882 &kmap); 883 lock_text_start = kmap->unmap_ip(kmap, sym->start); 884 885 sym = machine__find_kernel_symbol_by_name(machine, 886 "__lock_text_end", 887 &kmap); 888 lock_text_end = kmap->unmap_ip(kmap, sym->start); 889 } 890 891 /* failed to get kernel symbols */ 892 if (sched_text_start == 1) 893 return false; 894 895 /* mutex and rwsem functions are in sched text section */ 896 if (sched_text_start <= addr && addr < sched_text_end) 897 return true; 898 899 /* spinlock functions are in lock text section */ 900 if (lock_text_start <= addr && addr < lock_text_end) 901 return true; 902 903 return false; 904 } 905 906 static int lock_contention_caller(struct evsel *evsel, struct perf_sample *sample, 907 char *buf, int size) 908 { 909 struct thread *thread; 910 struct callchain_cursor *cursor = &callchain_cursor; 911 struct machine *machine = &session->machines.host; 912 struct symbol *sym; 913 int skip = 0; 914 int ret; 915 916 /* lock names will be replaced to task name later */ 917 if (show_thread_stats) 918 return -1; 919 920 thread = machine__findnew_thread(machine, -1, sample->pid); 921 if (thread == NULL) 922 return -1; 923 924 /* use caller function name from the callchain */ 925 ret = thread__resolve_callchain(thread, cursor, evsel, sample, 926 NULL, NULL, CONTENTION_STACK_DEPTH); 927 if (ret != 0) { 928 thread__put(thread); 929 return -1; 930 } 931 932 callchain_cursor_commit(cursor); 933 thread__put(thread); 934 935 while (true) { 936 struct callchain_cursor_node *node; 937 938 node = callchain_cursor_current(cursor); 939 if (node == NULL) 940 break; 941 942 /* skip first few entries - for lock functions */ 943 if (++skip <= CONTENTION_STACK_SKIP) 944 goto next; 945 946 sym = node->ms.sym; 947 if (sym && !is_lock_function(machine, node->ip)) { 948 struct map *map = node->ms.map; 949 u64 offset; 950 951 offset = map->map_ip(map, node->ip) - sym->start; 952 953 if (offset) 954 scnprintf(buf, size, "%s+%#lx", sym->name, offset); 955 else 956 strlcpy(buf, sym->name, size); 957 return 0; 958 } 959 960 next: 961 callchain_cursor_advance(cursor); 962 } 963 return -1; 964 } 965 966 static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample) 967 { 968 struct callchain_cursor *cursor = &callchain_cursor; 969 struct machine *machine = &session->machines.host; 970 struct thread *thread; 971 u64 hash = 0; 972 int skip = 0; 973 int ret; 974 975 thread = machine__findnew_thread(machine, -1, sample->pid); 976 if (thread == NULL) 977 return -1; 978 979 /* use caller function name from the callchain */ 980 ret = thread__resolve_callchain(thread, cursor, evsel, sample, 981 NULL, NULL, CONTENTION_STACK_DEPTH); 982 thread__put(thread); 983 984 if (ret != 0) 985 return -1; 986 987 callchain_cursor_commit(cursor); 988 989 while (true) { 990 struct callchain_cursor_node *node; 991 992 node = callchain_cursor_current(cursor); 993 if (node == NULL) 994 break; 995 996 /* skip first few entries - for lock functions */ 997 if (++skip <= CONTENTION_STACK_SKIP) 998 goto next; 999 1000 if (node->ms.sym && is_lock_function(machine, node->ip)) 1001 goto next; 1002 1003 hash ^= hash_long((unsigned long)node->ip, 64); 1004 1005 next: 1006 callchain_cursor_advance(cursor); 1007 } 1008 return hash; 1009 } 1010 1011 static int report_lock_contention_begin_event(struct evsel *evsel, 1012 struct perf_sample *sample) 1013 { 1014 struct lock_stat *ls; 1015 struct thread_stat *ts; 1016 struct lock_seq_stat *seq; 1017 u64 addr = evsel__intval(evsel, sample, "lock_addr"); 1018 u64 key; 1019 1020 switch (aggr_mode) { 1021 case LOCK_AGGR_ADDR: 1022 key = addr; 1023 break; 1024 case LOCK_AGGR_TASK: 1025 key = sample->tid; 1026 break; 1027 case LOCK_AGGR_CALLER: 1028 key = callchain_id(evsel, sample); 1029 break; 1030 default: 1031 pr_err("Invalid aggregation mode: %d\n", aggr_mode); 1032 return -EINVAL; 1033 } 1034 1035 ls = lock_stat_find(key); 1036 if (!ls) { 1037 char buf[128]; 1038 const char *caller = buf; 1039 unsigned int flags = evsel__intval(evsel, sample, "flags"); 1040 1041 if (lock_contention_caller(evsel, sample, buf, sizeof(buf)) < 0) 1042 caller = "Unknown"; 1043 1044 ls = lock_stat_findnew(key, caller, flags); 1045 if (!ls) 1046 return -ENOMEM; 1047 } 1048 1049 ts = thread_stat_findnew(sample->tid); 1050 if (!ts) 1051 return -ENOMEM; 1052 1053 seq = get_seq(ts, addr); 1054 if (!seq) 1055 return -ENOMEM; 1056 1057 switch (seq->state) { 1058 case SEQ_STATE_UNINITIALIZED: 1059 case SEQ_STATE_ACQUIRED: 1060 break; 1061 case SEQ_STATE_CONTENDED: 1062 /* 1063 * It can have nested contention begin with mutex spinning, 1064 * then we would use the original contention begin event and 1065 * ignore the second one. 1066 */ 1067 goto end; 1068 case SEQ_STATE_ACQUIRING: 1069 case SEQ_STATE_READ_ACQUIRED: 1070 case SEQ_STATE_RELEASED: 1071 /* broken lock sequence */ 1072 if (!ls->broken) { 1073 ls->broken = 1; 1074 bad_hist[BROKEN_CONTENDED]++; 1075 } 1076 list_del_init(&seq->list); 1077 free(seq); 1078 goto end; 1079 default: 1080 BUG_ON("Unknown state of lock sequence found!\n"); 1081 break; 1082 } 1083 1084 if (seq->state != SEQ_STATE_CONTENDED) { 1085 seq->state = SEQ_STATE_CONTENDED; 1086 seq->prev_event_time = sample->time; 1087 ls->nr_contended++; 1088 } 1089 end: 1090 return 0; 1091 } 1092 1093 static int report_lock_contention_end_event(struct evsel *evsel, 1094 struct perf_sample *sample) 1095 { 1096 struct lock_stat *ls; 1097 struct thread_stat *ts; 1098 struct lock_seq_stat *seq; 1099 u64 contended_term; 1100 u64 addr = evsel__intval(evsel, sample, "lock_addr"); 1101 u64 key; 1102 1103 switch (aggr_mode) { 1104 case LOCK_AGGR_ADDR: 1105 key = addr; 1106 break; 1107 case LOCK_AGGR_TASK: 1108 key = sample->tid; 1109 break; 1110 case LOCK_AGGR_CALLER: 1111 key = callchain_id(evsel, sample); 1112 break; 1113 default: 1114 pr_err("Invalid aggregation mode: %d\n", aggr_mode); 1115 return -EINVAL; 1116 } 1117 1118 ls = lock_stat_find(key); 1119 if (!ls) 1120 return 0; 1121 1122 ts = thread_stat_find(sample->tid); 1123 if (!ts) 1124 return 0; 1125 1126 seq = get_seq(ts, addr); 1127 if (!seq) 1128 return -ENOMEM; 1129 1130 switch (seq->state) { 1131 case SEQ_STATE_UNINITIALIZED: 1132 goto end; 1133 case SEQ_STATE_CONTENDED: 1134 contended_term = sample->time - seq->prev_event_time; 1135 ls->wait_time_total += contended_term; 1136 if (contended_term < ls->wait_time_min) 1137 ls->wait_time_min = contended_term; 1138 if (ls->wait_time_max < contended_term) 1139 ls->wait_time_max = contended_term; 1140 break; 1141 case SEQ_STATE_ACQUIRING: 1142 case SEQ_STATE_ACQUIRED: 1143 case SEQ_STATE_READ_ACQUIRED: 1144 case SEQ_STATE_RELEASED: 1145 /* broken lock sequence */ 1146 if (!ls->broken) { 1147 ls->broken = 1; 1148 bad_hist[BROKEN_ACQUIRED]++; 1149 } 1150 list_del_init(&seq->list); 1151 free(seq); 1152 goto end; 1153 default: 1154 BUG_ON("Unknown state of lock sequence found!\n"); 1155 break; 1156 } 1157 1158 seq->state = SEQ_STATE_ACQUIRED; 1159 ls->nr_acquired++; 1160 ls->avg_wait_time = ls->wait_time_total/ls->nr_acquired; 1161 end: 1162 return 0; 1163 } 1164 1165 /* lock oriented handlers */ 1166 /* TODO: handlers for CPU oriented, thread oriented */ 1167 static struct trace_lock_handler report_lock_ops = { 1168 .acquire_event = report_lock_acquire_event, 1169 .acquired_event = report_lock_acquired_event, 1170 .contended_event = report_lock_contended_event, 1171 .release_event = report_lock_release_event, 1172 .contention_begin_event = report_lock_contention_begin_event, 1173 .contention_end_event = report_lock_contention_end_event, 1174 }; 1175 1176 static struct trace_lock_handler contention_lock_ops = { 1177 .contention_begin_event = report_lock_contention_begin_event, 1178 .contention_end_event = report_lock_contention_end_event, 1179 }; 1180 1181 1182 static struct trace_lock_handler *trace_handler; 1183 1184 static int evsel__process_lock_acquire(struct evsel *evsel, struct perf_sample *sample) 1185 { 1186 if (trace_handler->acquire_event) 1187 return trace_handler->acquire_event(evsel, sample); 1188 return 0; 1189 } 1190 1191 static int evsel__process_lock_acquired(struct evsel *evsel, struct perf_sample *sample) 1192 { 1193 if (trace_handler->acquired_event) 1194 return trace_handler->acquired_event(evsel, sample); 1195 return 0; 1196 } 1197 1198 static int evsel__process_lock_contended(struct evsel *evsel, struct perf_sample *sample) 1199 { 1200 if (trace_handler->contended_event) 1201 return trace_handler->contended_event(evsel, sample); 1202 return 0; 1203 } 1204 1205 static int evsel__process_lock_release(struct evsel *evsel, struct perf_sample *sample) 1206 { 1207 if (trace_handler->release_event) 1208 return trace_handler->release_event(evsel, sample); 1209 return 0; 1210 } 1211 1212 static int evsel__process_contention_begin(struct evsel *evsel, struct perf_sample *sample) 1213 { 1214 if (trace_handler->contention_begin_event) 1215 return trace_handler->contention_begin_event(evsel, sample); 1216 return 0; 1217 } 1218 1219 static int evsel__process_contention_end(struct evsel *evsel, struct perf_sample *sample) 1220 { 1221 if (trace_handler->contention_end_event) 1222 return trace_handler->contention_end_event(evsel, sample); 1223 return 0; 1224 } 1225 1226 static void print_bad_events(int bad, int total) 1227 { 1228 /* Output for debug, this have to be removed */ 1229 int i; 1230 int broken = 0; 1231 const char *name[4] = 1232 { "acquire", "acquired", "contended", "release" }; 1233 1234 for (i = 0; i < BROKEN_MAX; i++) 1235 broken += bad_hist[i]; 1236 1237 if (broken == 0 && !verbose) 1238 return; 1239 1240 pr_info("\n=== output for debug===\n\n"); 1241 pr_info("bad: %d, total: %d\n", bad, total); 1242 pr_info("bad rate: %.2f %%\n", (double)bad / (double)total * 100); 1243 pr_info("histogram of events caused bad sequence\n"); 1244 for (i = 0; i < BROKEN_MAX; i++) 1245 pr_info(" %10s: %d\n", name[i], bad_hist[i]); 1246 } 1247 1248 /* TODO: various way to print, coloring, nano or milli sec */ 1249 static void print_result(void) 1250 { 1251 struct lock_stat *st; 1252 struct lock_key *key; 1253 char cut_name[20]; 1254 int bad, total; 1255 1256 pr_info("%20s ", "Name"); 1257 list_for_each_entry(key, &lock_keys, list) 1258 pr_info("%*s ", key->len, key->header); 1259 pr_info("\n\n"); 1260 1261 bad = total = 0; 1262 while ((st = pop_from_result())) { 1263 total++; 1264 if (st->broken) 1265 bad++; 1266 if (!st->nr_acquired) 1267 continue; 1268 1269 bzero(cut_name, 20); 1270 1271 if (strlen(st->name) < 20) { 1272 /* output raw name */ 1273 const char *name = st->name; 1274 1275 if (show_thread_stats) { 1276 struct thread *t; 1277 1278 /* st->addr contains tid of thread */ 1279 t = perf_session__findnew(session, st->addr); 1280 name = thread__comm_str(t); 1281 } 1282 1283 pr_info("%20s ", name); 1284 } else { 1285 strncpy(cut_name, st->name, 16); 1286 cut_name[16] = '.'; 1287 cut_name[17] = '.'; 1288 cut_name[18] = '.'; 1289 cut_name[19] = '\0'; 1290 /* cut off name for saving output style */ 1291 pr_info("%20s ", cut_name); 1292 } 1293 1294 list_for_each_entry(key, &lock_keys, list) { 1295 key->print(key, st); 1296 pr_info(" "); 1297 } 1298 pr_info("\n"); 1299 } 1300 1301 print_bad_events(bad, total); 1302 } 1303 1304 static bool info_threads, info_map; 1305 1306 static void dump_threads(void) 1307 { 1308 struct thread_stat *st; 1309 struct rb_node *node; 1310 struct thread *t; 1311 1312 pr_info("%10s: comm\n", "Thread ID"); 1313 1314 node = rb_first(&thread_stats); 1315 while (node) { 1316 st = container_of(node, struct thread_stat, rb); 1317 t = perf_session__findnew(session, st->tid); 1318 pr_info("%10d: %s\n", st->tid, thread__comm_str(t)); 1319 node = rb_next(node); 1320 thread__put(t); 1321 } 1322 } 1323 1324 static int compare_maps(struct lock_stat *a, struct lock_stat *b) 1325 { 1326 int ret; 1327 1328 if (a->name && b->name) 1329 ret = strcmp(a->name, b->name); 1330 else 1331 ret = !!a->name - !!b->name; 1332 1333 if (!ret) 1334 return a->addr < b->addr; 1335 else 1336 return ret < 0; 1337 } 1338 1339 static void dump_map(void) 1340 { 1341 unsigned int i; 1342 struct lock_stat *st; 1343 1344 pr_info("Address of instance: name of class\n"); 1345 for (i = 0; i < LOCKHASH_SIZE; i++) { 1346 hlist_for_each_entry(st, &lockhash_table[i], hash_entry) { 1347 insert_to_result(st, compare_maps); 1348 } 1349 } 1350 1351 while ((st = pop_from_result())) 1352 pr_info(" %#llx: %s\n", (unsigned long long)st->addr, st->name); 1353 } 1354 1355 static int dump_info(void) 1356 { 1357 int rc = 0; 1358 1359 if (info_threads) 1360 dump_threads(); 1361 else if (info_map) 1362 dump_map(); 1363 else { 1364 rc = -1; 1365 pr_err("Unknown type of information\n"); 1366 } 1367 1368 return rc; 1369 } 1370 1371 typedef int (*tracepoint_handler)(struct evsel *evsel, 1372 struct perf_sample *sample); 1373 1374 static int process_sample_event(struct perf_tool *tool __maybe_unused, 1375 union perf_event *event, 1376 struct perf_sample *sample, 1377 struct evsel *evsel, 1378 struct machine *machine) 1379 { 1380 int err = 0; 1381 struct thread *thread = machine__findnew_thread(machine, sample->pid, 1382 sample->tid); 1383 1384 if (thread == NULL) { 1385 pr_debug("problem processing %d event, skipping it.\n", 1386 event->header.type); 1387 return -1; 1388 } 1389 1390 if (evsel->handler != NULL) { 1391 tracepoint_handler f = evsel->handler; 1392 err = f(evsel, sample); 1393 } 1394 1395 thread__put(thread); 1396 1397 return err; 1398 } 1399 1400 static void combine_result(void) 1401 { 1402 unsigned int i; 1403 struct lock_stat *st; 1404 1405 if (!combine_locks) 1406 return; 1407 1408 for (i = 0; i < LOCKHASH_SIZE; i++) { 1409 hlist_for_each_entry(st, &lockhash_table[i], hash_entry) { 1410 combine_lock_stats(st); 1411 } 1412 } 1413 } 1414 1415 static void sort_result(void) 1416 { 1417 unsigned int i; 1418 struct lock_stat *st; 1419 1420 for (i = 0; i < LOCKHASH_SIZE; i++) { 1421 hlist_for_each_entry(st, &lockhash_table[i], hash_entry) { 1422 insert_to_result(st, compare); 1423 } 1424 } 1425 } 1426 1427 static const char *get_type_str(struct lock_stat *st) 1428 { 1429 static const struct { 1430 unsigned int flags; 1431 const char *name; 1432 } table[] = { 1433 { 0, "semaphore" }, 1434 { LCB_F_SPIN, "spinlock" }, 1435 { LCB_F_SPIN | LCB_F_READ, "rwlock:R" }, 1436 { LCB_F_SPIN | LCB_F_WRITE, "rwlock:W"}, 1437 { LCB_F_READ, "rwsem:R" }, 1438 { LCB_F_WRITE, "rwsem:W" }, 1439 { LCB_F_RT, "rtmutex" }, 1440 { LCB_F_RT | LCB_F_READ, "rwlock-rt:R" }, 1441 { LCB_F_RT | LCB_F_WRITE, "rwlock-rt:W"}, 1442 { LCB_F_PERCPU | LCB_F_READ, "pcpu-sem:R" }, 1443 { LCB_F_PERCPU | LCB_F_WRITE, "pcpu-sem:W" }, 1444 { LCB_F_MUTEX, "mutex" }, 1445 { LCB_F_MUTEX | LCB_F_SPIN, "mutex" }, 1446 }; 1447 1448 for (unsigned int i = 0; i < ARRAY_SIZE(table); i++) { 1449 if (table[i].flags == st->flags) 1450 return table[i].name; 1451 } 1452 return "unknown"; 1453 } 1454 1455 static void sort_contention_result(void) 1456 { 1457 sort_result(); 1458 } 1459 1460 static void print_contention_result(void) 1461 { 1462 struct lock_stat *st; 1463 struct lock_key *key; 1464 int bad, total; 1465 1466 list_for_each_entry(key, &lock_keys, list) 1467 pr_info("%*s ", key->len, key->header); 1468 1469 if (show_thread_stats) 1470 pr_info(" %10s %s\n\n", "pid", "comm"); 1471 else 1472 pr_info(" %10s %s\n\n", "type", "caller"); 1473 1474 bad = total = 0; 1475 if (use_bpf) 1476 bad = bad_hist[BROKEN_CONTENDED]; 1477 1478 while ((st = pop_from_result())) { 1479 total += use_bpf ? st->nr_contended : 1; 1480 if (st->broken) 1481 bad++; 1482 1483 list_for_each_entry(key, &lock_keys, list) { 1484 key->print(key, st); 1485 pr_info(" "); 1486 } 1487 1488 if (show_thread_stats) { 1489 struct thread *t; 1490 int pid = st->addr; 1491 1492 /* st->addr contains tid of thread */ 1493 t = perf_session__findnew(session, pid); 1494 pr_info(" %10d %s\n", pid, thread__comm_str(t)); 1495 continue; 1496 } 1497 1498 pr_info(" %10s %s\n", get_type_str(st), st->name); 1499 } 1500 1501 print_bad_events(bad, total); 1502 } 1503 1504 static const struct evsel_str_handler lock_tracepoints[] = { 1505 { "lock:lock_acquire", evsel__process_lock_acquire, }, /* CONFIG_LOCKDEP */ 1506 { "lock:lock_acquired", evsel__process_lock_acquired, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ 1507 { "lock:lock_contended", evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ 1508 { "lock:lock_release", evsel__process_lock_release, }, /* CONFIG_LOCKDEP */ 1509 }; 1510 1511 static const struct evsel_str_handler contention_tracepoints[] = { 1512 { "lock:contention_begin", evsel__process_contention_begin, }, 1513 { "lock:contention_end", evsel__process_contention_end, }, 1514 }; 1515 1516 static bool force; 1517 1518 static int __cmd_report(bool display_info) 1519 { 1520 int err = -EINVAL; 1521 struct perf_tool eops = { 1522 .sample = process_sample_event, 1523 .comm = perf_event__process_comm, 1524 .mmap = perf_event__process_mmap, 1525 .namespaces = perf_event__process_namespaces, 1526 .ordered_events = true, 1527 }; 1528 struct perf_data data = { 1529 .path = input_name, 1530 .mode = PERF_DATA_MODE_READ, 1531 .force = force, 1532 }; 1533 1534 session = perf_session__new(&data, &eops); 1535 if (IS_ERR(session)) { 1536 pr_err("Initializing perf session failed\n"); 1537 return PTR_ERR(session); 1538 } 1539 1540 /* for lock function check */ 1541 symbol_conf.sort_by_name = true; 1542 symbol__init(&session->header.env); 1543 1544 if (!perf_session__has_traces(session, "lock record")) 1545 goto out_delete; 1546 1547 if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) { 1548 pr_err("Initializing perf session tracepoint handlers failed\n"); 1549 goto out_delete; 1550 } 1551 1552 if (perf_session__set_tracepoints_handlers(session, contention_tracepoints)) { 1553 pr_err("Initializing perf session tracepoint handlers failed\n"); 1554 goto out_delete; 1555 } 1556 1557 if (setup_output_field(false, output_fields)) 1558 goto out_delete; 1559 1560 if (select_key(false)) 1561 goto out_delete; 1562 1563 if (show_thread_stats) 1564 aggr_mode = LOCK_AGGR_TASK; 1565 1566 err = perf_session__process_events(session); 1567 if (err) 1568 goto out_delete; 1569 1570 setup_pager(); 1571 if (display_info) /* used for info subcommand */ 1572 err = dump_info(); 1573 else { 1574 combine_result(); 1575 sort_result(); 1576 print_result(); 1577 } 1578 1579 out_delete: 1580 perf_session__delete(session); 1581 return err; 1582 } 1583 1584 static void sighandler(int sig __maybe_unused) 1585 { 1586 } 1587 1588 static int __cmd_contention(int argc, const char **argv) 1589 { 1590 int err = -EINVAL; 1591 struct perf_tool eops = { 1592 .sample = process_sample_event, 1593 .comm = perf_event__process_comm, 1594 .mmap = perf_event__process_mmap, 1595 .ordered_events = true, 1596 }; 1597 struct perf_data data = { 1598 .path = input_name, 1599 .mode = PERF_DATA_MODE_READ, 1600 .force = force, 1601 }; 1602 struct lock_contention con = { 1603 .target = &target, 1604 .result = &lockhash_table[0], 1605 .map_nr_entries = bpf_map_entries, 1606 }; 1607 1608 session = perf_session__new(use_bpf ? NULL : &data, &eops); 1609 if (IS_ERR(session)) { 1610 pr_err("Initializing perf session failed\n"); 1611 return PTR_ERR(session); 1612 } 1613 1614 /* for lock function check */ 1615 symbol_conf.sort_by_name = true; 1616 symbol__init(&session->header.env); 1617 1618 if (use_bpf) { 1619 err = target__validate(&target); 1620 if (err) { 1621 char errbuf[512]; 1622 1623 target__strerror(&target, err, errbuf, 512); 1624 pr_err("%s\n", errbuf); 1625 goto out_delete; 1626 } 1627 1628 signal(SIGINT, sighandler); 1629 signal(SIGCHLD, sighandler); 1630 signal(SIGTERM, sighandler); 1631 1632 con.machine = &session->machines.host; 1633 1634 con.evlist = evlist__new(); 1635 if (con.evlist == NULL) { 1636 err = -ENOMEM; 1637 goto out_delete; 1638 } 1639 1640 err = evlist__create_maps(con.evlist, &target); 1641 if (err < 0) 1642 goto out_delete; 1643 1644 if (argc) { 1645 err = evlist__prepare_workload(con.evlist, &target, 1646 argv, false, NULL); 1647 if (err < 0) 1648 goto out_delete; 1649 } 1650 1651 if (lock_contention_prepare(&con) < 0) { 1652 pr_err("lock contention BPF setup failed\n"); 1653 goto out_delete; 1654 } 1655 } else { 1656 if (!perf_session__has_traces(session, "lock record")) 1657 goto out_delete; 1658 1659 if (!evlist__find_evsel_by_str(session->evlist, 1660 "lock:contention_begin")) { 1661 pr_err("lock contention evsel not found\n"); 1662 goto out_delete; 1663 } 1664 1665 if (perf_session__set_tracepoints_handlers(session, 1666 contention_tracepoints)) { 1667 pr_err("Initializing perf session tracepoint handlers failed\n"); 1668 goto out_delete; 1669 } 1670 } 1671 1672 if (setup_output_field(true, output_fields)) 1673 goto out_delete; 1674 1675 if (select_key(true)) 1676 goto out_delete; 1677 1678 if (show_thread_stats) 1679 aggr_mode = LOCK_AGGR_TASK; 1680 else 1681 aggr_mode = LOCK_AGGR_CALLER; 1682 1683 if (use_bpf) { 1684 lock_contention_start(); 1685 if (argc) 1686 evlist__start_workload(con.evlist); 1687 1688 /* wait for signal */ 1689 pause(); 1690 1691 lock_contention_stop(); 1692 lock_contention_read(&con); 1693 1694 /* abuse bad hist stats for lost entries */ 1695 bad_hist[BROKEN_CONTENDED] = con.lost; 1696 } else { 1697 err = perf_session__process_events(session); 1698 if (err) 1699 goto out_delete; 1700 } 1701 1702 setup_pager(); 1703 1704 sort_contention_result(); 1705 print_contention_result(); 1706 1707 out_delete: 1708 evlist__delete(con.evlist); 1709 lock_contention_finish(); 1710 perf_session__delete(session); 1711 return err; 1712 } 1713 1714 1715 static int __cmd_record(int argc, const char **argv) 1716 { 1717 const char *record_args[] = { 1718 "record", "-R", "-m", "1024", "-c", "1", "--synth", "task", 1719 }; 1720 const char *callgraph_args[] = { 1721 "--call-graph", "fp," __stringify(CONTENTION_STACK_DEPTH), 1722 }; 1723 unsigned int rec_argc, i, j, ret; 1724 unsigned int nr_tracepoints; 1725 unsigned int nr_callgraph_args = 0; 1726 const char **rec_argv; 1727 bool has_lock_stat = true; 1728 1729 for (i = 0; i < ARRAY_SIZE(lock_tracepoints); i++) { 1730 if (!is_valid_tracepoint(lock_tracepoints[i].name)) { 1731 pr_debug("tracepoint %s is not enabled. " 1732 "Are CONFIG_LOCKDEP and CONFIG_LOCK_STAT enabled?\n", 1733 lock_tracepoints[i].name); 1734 has_lock_stat = false; 1735 break; 1736 } 1737 } 1738 1739 if (has_lock_stat) 1740 goto setup_args; 1741 1742 for (i = 0; i < ARRAY_SIZE(contention_tracepoints); i++) { 1743 if (!is_valid_tracepoint(contention_tracepoints[i].name)) { 1744 pr_err("tracepoint %s is not enabled.\n", 1745 contention_tracepoints[i].name); 1746 return 1; 1747 } 1748 } 1749 1750 nr_callgraph_args = ARRAY_SIZE(callgraph_args); 1751 1752 setup_args: 1753 rec_argc = ARRAY_SIZE(record_args) + nr_callgraph_args + argc - 1; 1754 1755 if (has_lock_stat) 1756 nr_tracepoints = ARRAY_SIZE(lock_tracepoints); 1757 else 1758 nr_tracepoints = ARRAY_SIZE(contention_tracepoints); 1759 1760 /* factor of 2 is for -e in front of each tracepoint */ 1761 rec_argc += 2 * nr_tracepoints; 1762 1763 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 1764 if (!rec_argv) 1765 return -ENOMEM; 1766 1767 for (i = 0; i < ARRAY_SIZE(record_args); i++) 1768 rec_argv[i] = strdup(record_args[i]); 1769 1770 for (j = 0; j < nr_tracepoints; j++) { 1771 const char *ev_name; 1772 1773 if (has_lock_stat) 1774 ev_name = strdup(lock_tracepoints[j].name); 1775 else 1776 ev_name = strdup(contention_tracepoints[j].name); 1777 1778 if (!ev_name) 1779 return -ENOMEM; 1780 1781 rec_argv[i++] = "-e"; 1782 rec_argv[i++] = ev_name; 1783 } 1784 1785 for (j = 0; j < nr_callgraph_args; j++, i++) 1786 rec_argv[i] = callgraph_args[j]; 1787 1788 for (j = 1; j < (unsigned int)argc; j++, i++) 1789 rec_argv[i] = argv[j]; 1790 1791 BUG_ON(i != rec_argc); 1792 1793 ret = cmd_record(i, rec_argv); 1794 free(rec_argv); 1795 return ret; 1796 } 1797 1798 static int parse_map_entry(const struct option *opt, const char *str, 1799 int unset __maybe_unused) 1800 { 1801 unsigned long *len = (unsigned long *)opt->value; 1802 unsigned long val; 1803 char *endptr; 1804 1805 errno = 0; 1806 val = strtoul(str, &endptr, 0); 1807 if (*endptr != '\0' || errno != 0) { 1808 pr_err("invalid BPF map length: %s\n", str); 1809 return -1; 1810 } 1811 1812 *len = val; 1813 return 0; 1814 } 1815 1816 int cmd_lock(int argc, const char **argv) 1817 { 1818 const struct option lock_options[] = { 1819 OPT_STRING('i', "input", &input_name, "file", "input file name"), 1820 OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), 1821 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), 1822 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), 1823 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 1824 "file", "vmlinux pathname"), 1825 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, 1826 "file", "kallsyms pathname"), 1827 OPT_END() 1828 }; 1829 1830 const struct option info_options[] = { 1831 OPT_BOOLEAN('t', "threads", &info_threads, 1832 "dump thread list in perf.data"), 1833 OPT_BOOLEAN('m', "map", &info_map, 1834 "map of lock instances (address:name table)"), 1835 OPT_PARENT(lock_options) 1836 }; 1837 1838 const struct option report_options[] = { 1839 OPT_STRING('k', "key", &sort_key, "acquired", 1840 "key for sorting (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"), 1841 OPT_STRING('F', "field", &output_fields, NULL, 1842 "output fields (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"), 1843 /* TODO: type */ 1844 OPT_BOOLEAN('c', "combine-locks", &combine_locks, 1845 "combine locks in the same class"), 1846 OPT_BOOLEAN('t', "threads", &show_thread_stats, 1847 "show per-thread lock stats"), 1848 OPT_PARENT(lock_options) 1849 }; 1850 1851 struct option contention_options[] = { 1852 OPT_STRING('k', "key", &sort_key, "wait_total", 1853 "key for sorting (contended / wait_total / wait_max / wait_min / avg_wait)"), 1854 OPT_STRING('F', "field", &output_fields, "contended,wait_total,wait_max,avg_wait", 1855 "output fields (contended / wait_total / wait_max / wait_min / avg_wait)"), 1856 OPT_BOOLEAN('t', "threads", &show_thread_stats, 1857 "show per-thread lock stats"), 1858 OPT_BOOLEAN('b', "use-bpf", &use_bpf, "use BPF program to collect lock contention stats"), 1859 OPT_BOOLEAN('a', "all-cpus", &target.system_wide, 1860 "System-wide collection from all CPUs"), 1861 OPT_STRING('C', "cpu", &target.cpu_list, "cpu", 1862 "List of cpus to monitor"), 1863 OPT_STRING('p', "pid", &target.pid, "pid", 1864 "Trace on existing process id"), 1865 OPT_STRING(0, "tid", &target.tid, "tid", 1866 "Trace on existing thread id (exclusive to --pid)"), 1867 OPT_CALLBACK(0, "map-nr-entries", &bpf_map_entries, "num", 1868 "Max number of BPF map entries", parse_map_entry), 1869 OPT_PARENT(lock_options) 1870 }; 1871 1872 const char * const info_usage[] = { 1873 "perf lock info [<options>]", 1874 NULL 1875 }; 1876 const char *const lock_subcommands[] = { "record", "report", "script", 1877 "info", "contention", NULL }; 1878 const char *lock_usage[] = { 1879 NULL, 1880 NULL 1881 }; 1882 const char * const report_usage[] = { 1883 "perf lock report [<options>]", 1884 NULL 1885 }; 1886 const char * const contention_usage[] = { 1887 "perf lock contention [<options>]", 1888 NULL 1889 }; 1890 unsigned int i; 1891 int rc = 0; 1892 1893 for (i = 0; i < LOCKHASH_SIZE; i++) 1894 INIT_HLIST_HEAD(lockhash_table + i); 1895 1896 argc = parse_options_subcommand(argc, argv, lock_options, lock_subcommands, 1897 lock_usage, PARSE_OPT_STOP_AT_NON_OPTION); 1898 if (!argc) 1899 usage_with_options(lock_usage, lock_options); 1900 1901 if (strlen(argv[0]) > 2 && strstarts("record", argv[0])) { 1902 return __cmd_record(argc, argv); 1903 } else if (strlen(argv[0]) > 2 && strstarts("report", argv[0])) { 1904 trace_handler = &report_lock_ops; 1905 if (argc) { 1906 argc = parse_options(argc, argv, 1907 report_options, report_usage, 0); 1908 if (argc) 1909 usage_with_options(report_usage, report_options); 1910 } 1911 rc = __cmd_report(false); 1912 } else if (!strcmp(argv[0], "script")) { 1913 /* Aliased to 'perf script' */ 1914 return cmd_script(argc, argv); 1915 } else if (!strcmp(argv[0], "info")) { 1916 if (argc) { 1917 argc = parse_options(argc, argv, 1918 info_options, info_usage, 0); 1919 if (argc) 1920 usage_with_options(info_usage, info_options); 1921 } 1922 /* recycling report_lock_ops */ 1923 trace_handler = &report_lock_ops; 1924 rc = __cmd_report(true); 1925 } else if (strlen(argv[0]) > 2 && strstarts("contention", argv[0])) { 1926 trace_handler = &contention_lock_ops; 1927 sort_key = "wait_total"; 1928 output_fields = "contended,wait_total,wait_max,avg_wait"; 1929 1930 #ifndef HAVE_BPF_SKEL 1931 set_option_nobuild(contention_options, 'b', "use-bpf", 1932 "no BUILD_BPF_SKEL=1", false); 1933 #endif 1934 if (argc) { 1935 argc = parse_options(argc, argv, contention_options, 1936 contention_usage, 0); 1937 } 1938 rc = __cmd_contention(argc, argv); 1939 } else { 1940 usage_with_options(lock_usage, lock_options); 1941 } 1942 1943 return rc; 1944 } 1945