1 // SPDX-License-Identifier: GPL-2.0 2 #include <errno.h> 3 #include <inttypes.h> 4 #include "builtin.h" 5 #include "perf.h" 6 7 #include "util/evlist.h" // for struct evsel_str_handler 8 #include "util/evsel.h" 9 #include "util/symbol.h" 10 #include "util/thread.h" 11 #include "util/header.h" 12 #include "util/target.h" 13 #include "util/callchain.h" 14 #include "util/lock-contention.h" 15 #include "util/bpf_skel/lock_data.h" 16 17 #include <subcmd/pager.h> 18 #include <subcmd/parse-options.h> 19 #include "util/trace-event.h" 20 #include "util/tracepoint.h" 21 22 #include "util/debug.h" 23 #include "util/session.h" 24 #include "util/tool.h" 25 #include "util/data.h" 26 #include "util/string2.h" 27 #include "util/map.h" 28 #include "util/util.h" 29 30 #include <sys/types.h> 31 #include <sys/prctl.h> 32 #include <semaphore.h> 33 #include <math.h> 34 #include <limits.h> 35 36 #include <linux/list.h> 37 #include <linux/hash.h> 38 #include <linux/kernel.h> 39 #include <linux/zalloc.h> 40 #include <linux/err.h> 41 #include <linux/stringify.h> 42 43 static struct perf_session *session; 44 static struct target target; 45 46 /* based on kernel/lockdep.c */ 47 #define LOCKHASH_BITS 12 48 #define LOCKHASH_SIZE (1UL << LOCKHASH_BITS) 49 50 static struct hlist_head lockhash_table[LOCKHASH_SIZE]; 51 52 #define __lockhashfn(key) hash_long((unsigned long)key, LOCKHASH_BITS) 53 #define lockhashentry(key) (lockhash_table + __lockhashfn((key))) 54 55 static struct rb_root thread_stats; 56 57 static bool combine_locks; 58 static bool show_thread_stats; 59 static bool use_bpf; 60 static unsigned long bpf_map_entries = 10240; 61 static int max_stack_depth = CONTENTION_STACK_DEPTH; 62 static int stack_skip = CONTENTION_STACK_SKIP; 63 static int print_nr_entries = INT_MAX / 2; 64 65 static enum lock_aggr_mode aggr_mode = LOCK_AGGR_ADDR; 66 67 static struct thread_stat *thread_stat_find(u32 tid) 68 { 69 struct rb_node *node; 70 struct thread_stat *st; 71 72 node = thread_stats.rb_node; 73 while (node) { 74 st = container_of(node, struct thread_stat, rb); 75 if (st->tid == tid) 76 return st; 77 else if (tid < st->tid) 78 node = node->rb_left; 79 else 80 node = node->rb_right; 81 } 82 83 return NULL; 84 } 85 86 static void thread_stat_insert(struct thread_stat *new) 87 { 88 struct rb_node **rb = &thread_stats.rb_node; 89 struct rb_node *parent = NULL; 90 struct thread_stat *p; 91 92 while (*rb) { 93 p = container_of(*rb, struct thread_stat, rb); 94 parent = *rb; 95 96 if (new->tid < p->tid) 97 rb = &(*rb)->rb_left; 98 else if (new->tid > p->tid) 99 rb = &(*rb)->rb_right; 100 else 101 BUG_ON("inserting invalid thread_stat\n"); 102 } 103 104 rb_link_node(&new->rb, parent, rb); 105 rb_insert_color(&new->rb, &thread_stats); 106 } 107 108 static struct thread_stat *thread_stat_findnew_after_first(u32 tid) 109 { 110 struct thread_stat *st; 111 112 st = thread_stat_find(tid); 113 if (st) 114 return st; 115 116 st = zalloc(sizeof(struct thread_stat)); 117 if (!st) { 118 pr_err("memory allocation failed\n"); 119 return NULL; 120 } 121 122 st->tid = tid; 123 INIT_LIST_HEAD(&st->seq_list); 124 125 thread_stat_insert(st); 126 127 return st; 128 } 129 130 static struct thread_stat *thread_stat_findnew_first(u32 tid); 131 static struct thread_stat *(*thread_stat_findnew)(u32 tid) = 132 thread_stat_findnew_first; 133 134 static struct thread_stat *thread_stat_findnew_first(u32 tid) 135 { 136 struct thread_stat *st; 137 138 st = zalloc(sizeof(struct thread_stat)); 139 if (!st) { 140 pr_err("memory allocation failed\n"); 141 return NULL; 142 } 143 st->tid = tid; 144 INIT_LIST_HEAD(&st->seq_list); 145 146 rb_link_node(&st->rb, NULL, &thread_stats.rb_node); 147 rb_insert_color(&st->rb, &thread_stats); 148 149 thread_stat_findnew = thread_stat_findnew_after_first; 150 return st; 151 } 152 153 /* build simple key function one is bigger than two */ 154 #define SINGLE_KEY(member) \ 155 static int lock_stat_key_ ## member(struct lock_stat *one, \ 156 struct lock_stat *two) \ 157 { \ 158 return one->member > two->member; \ 159 } 160 161 SINGLE_KEY(nr_acquired) 162 SINGLE_KEY(nr_contended) 163 SINGLE_KEY(avg_wait_time) 164 SINGLE_KEY(wait_time_total) 165 SINGLE_KEY(wait_time_max) 166 167 static int lock_stat_key_wait_time_min(struct lock_stat *one, 168 struct lock_stat *two) 169 { 170 u64 s1 = one->wait_time_min; 171 u64 s2 = two->wait_time_min; 172 if (s1 == ULLONG_MAX) 173 s1 = 0; 174 if (s2 == ULLONG_MAX) 175 s2 = 0; 176 return s1 > s2; 177 } 178 179 struct lock_key { 180 /* 181 * name: the value for specify by user 182 * this should be simpler than raw name of member 183 * e.g. nr_acquired -> acquired, wait_time_total -> wait_total 184 */ 185 const char *name; 186 /* header: the string printed on the header line */ 187 const char *header; 188 /* len: the printing width of the field */ 189 int len; 190 /* key: a pointer to function to compare two lock stats for sorting */ 191 int (*key)(struct lock_stat*, struct lock_stat*); 192 /* print: a pointer to function to print a given lock stats */ 193 void (*print)(struct lock_key*, struct lock_stat*); 194 /* list: list entry to link this */ 195 struct list_head list; 196 }; 197 198 static void lock_stat_key_print_time(unsigned long long nsec, int len) 199 { 200 static const struct { 201 float base; 202 const char *unit; 203 } table[] = { 204 { 1e9 * 3600, "h " }, 205 { 1e9 * 60, "m " }, 206 { 1e9, "s " }, 207 { 1e6, "ms" }, 208 { 1e3, "us" }, 209 { 0, NULL }, 210 }; 211 212 for (int i = 0; table[i].unit; i++) { 213 if (nsec < table[i].base) 214 continue; 215 216 pr_info("%*.2f %s", len - 3, nsec / table[i].base, table[i].unit); 217 return; 218 } 219 220 pr_info("%*llu %s", len - 3, nsec, "ns"); 221 } 222 223 #define PRINT_KEY(member) \ 224 static void lock_stat_key_print_ ## member(struct lock_key *key, \ 225 struct lock_stat *ls) \ 226 { \ 227 pr_info("%*llu", key->len, (unsigned long long)ls->member); \ 228 } 229 230 #define PRINT_TIME(member) \ 231 static void lock_stat_key_print_ ## member(struct lock_key *key, \ 232 struct lock_stat *ls) \ 233 { \ 234 lock_stat_key_print_time((unsigned long long)ls->member, key->len); \ 235 } 236 237 PRINT_KEY(nr_acquired) 238 PRINT_KEY(nr_contended) 239 PRINT_TIME(avg_wait_time) 240 PRINT_TIME(wait_time_total) 241 PRINT_TIME(wait_time_max) 242 243 static void lock_stat_key_print_wait_time_min(struct lock_key *key, 244 struct lock_stat *ls) 245 { 246 u64 wait_time = ls->wait_time_min; 247 248 if (wait_time == ULLONG_MAX) 249 wait_time = 0; 250 251 lock_stat_key_print_time(wait_time, key->len); 252 } 253 254 255 static const char *sort_key = "acquired"; 256 257 static int (*compare)(struct lock_stat *, struct lock_stat *); 258 259 static struct rb_root sorted; /* place to store intermediate data */ 260 static struct rb_root result; /* place to store sorted data */ 261 262 static LIST_HEAD(lock_keys); 263 static const char *output_fields; 264 265 #define DEF_KEY_LOCK(name, header, fn_suffix, len) \ 266 { #name, header, len, lock_stat_key_ ## fn_suffix, lock_stat_key_print_ ## fn_suffix, {} } 267 static struct lock_key report_keys[] = { 268 DEF_KEY_LOCK(acquired, "acquired", nr_acquired, 10), 269 DEF_KEY_LOCK(contended, "contended", nr_contended, 10), 270 DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12), 271 DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12), 272 DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12), 273 DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12), 274 275 /* extra comparisons much complicated should be here */ 276 { } 277 }; 278 279 static struct lock_key contention_keys[] = { 280 DEF_KEY_LOCK(contended, "contended", nr_contended, 10), 281 DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12), 282 DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12), 283 DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12), 284 DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12), 285 286 /* extra comparisons much complicated should be here */ 287 { } 288 }; 289 290 static int select_key(bool contention) 291 { 292 int i; 293 struct lock_key *keys = report_keys; 294 295 if (contention) 296 keys = contention_keys; 297 298 for (i = 0; keys[i].name; i++) { 299 if (!strcmp(keys[i].name, sort_key)) { 300 compare = keys[i].key; 301 302 /* selected key should be in the output fields */ 303 if (list_empty(&keys[i].list)) 304 list_add_tail(&keys[i].list, &lock_keys); 305 306 return 0; 307 } 308 } 309 310 pr_err("Unknown compare key: %s\n", sort_key); 311 return -1; 312 } 313 314 static int add_output_field(bool contention, char *name) 315 { 316 int i; 317 struct lock_key *keys = report_keys; 318 319 if (contention) 320 keys = contention_keys; 321 322 for (i = 0; keys[i].name; i++) { 323 if (strcmp(keys[i].name, name)) 324 continue; 325 326 /* prevent double link */ 327 if (list_empty(&keys[i].list)) 328 list_add_tail(&keys[i].list, &lock_keys); 329 330 return 0; 331 } 332 333 pr_err("Unknown output field: %s\n", name); 334 return -1; 335 } 336 337 static int setup_output_field(bool contention, const char *str) 338 { 339 char *tok, *tmp, *orig; 340 int i, ret = 0; 341 struct lock_key *keys = report_keys; 342 343 if (contention) 344 keys = contention_keys; 345 346 /* no output field given: use all of them */ 347 if (str == NULL) { 348 for (i = 0; keys[i].name; i++) 349 list_add_tail(&keys[i].list, &lock_keys); 350 return 0; 351 } 352 353 for (i = 0; keys[i].name; i++) 354 INIT_LIST_HEAD(&keys[i].list); 355 356 orig = tmp = strdup(str); 357 if (orig == NULL) 358 return -ENOMEM; 359 360 while ((tok = strsep(&tmp, ",")) != NULL){ 361 ret = add_output_field(contention, tok); 362 if (ret < 0) 363 break; 364 } 365 free(orig); 366 367 return ret; 368 } 369 370 static void combine_lock_stats(struct lock_stat *st) 371 { 372 struct rb_node **rb = &sorted.rb_node; 373 struct rb_node *parent = NULL; 374 struct lock_stat *p; 375 int ret; 376 377 while (*rb) { 378 p = container_of(*rb, struct lock_stat, rb); 379 parent = *rb; 380 381 if (st->name && p->name) 382 ret = strcmp(st->name, p->name); 383 else 384 ret = !!st->name - !!p->name; 385 386 if (ret == 0) { 387 p->nr_acquired += st->nr_acquired; 388 p->nr_contended += st->nr_contended; 389 p->wait_time_total += st->wait_time_total; 390 391 if (p->nr_contended) 392 p->avg_wait_time = p->wait_time_total / p->nr_contended; 393 394 if (p->wait_time_min > st->wait_time_min) 395 p->wait_time_min = st->wait_time_min; 396 if (p->wait_time_max < st->wait_time_max) 397 p->wait_time_max = st->wait_time_max; 398 399 p->broken |= st->broken; 400 st->combined = 1; 401 return; 402 } 403 404 if (ret < 0) 405 rb = &(*rb)->rb_left; 406 else 407 rb = &(*rb)->rb_right; 408 } 409 410 rb_link_node(&st->rb, parent, rb); 411 rb_insert_color(&st->rb, &sorted); 412 } 413 414 static void insert_to_result(struct lock_stat *st, 415 int (*bigger)(struct lock_stat *, struct lock_stat *)) 416 { 417 struct rb_node **rb = &result.rb_node; 418 struct rb_node *parent = NULL; 419 struct lock_stat *p; 420 421 if (combine_locks && st->combined) 422 return; 423 424 while (*rb) { 425 p = container_of(*rb, struct lock_stat, rb); 426 parent = *rb; 427 428 if (bigger(st, p)) 429 rb = &(*rb)->rb_left; 430 else 431 rb = &(*rb)->rb_right; 432 } 433 434 rb_link_node(&st->rb, parent, rb); 435 rb_insert_color(&st->rb, &result); 436 } 437 438 /* returns left most element of result, and erase it */ 439 static struct lock_stat *pop_from_result(void) 440 { 441 struct rb_node *node = result.rb_node; 442 443 if (!node) 444 return NULL; 445 446 while (node->rb_left) 447 node = node->rb_left; 448 449 rb_erase(node, &result); 450 return container_of(node, struct lock_stat, rb); 451 } 452 453 static struct lock_stat *lock_stat_find(u64 addr) 454 { 455 struct hlist_head *entry = lockhashentry(addr); 456 struct lock_stat *ret; 457 458 hlist_for_each_entry(ret, entry, hash_entry) { 459 if (ret->addr == addr) 460 return ret; 461 } 462 return NULL; 463 } 464 465 static struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags) 466 { 467 struct hlist_head *entry = lockhashentry(addr); 468 struct lock_stat *ret, *new; 469 470 hlist_for_each_entry(ret, entry, hash_entry) { 471 if (ret->addr == addr) 472 return ret; 473 } 474 475 new = zalloc(sizeof(struct lock_stat)); 476 if (!new) 477 goto alloc_failed; 478 479 new->addr = addr; 480 new->name = strdup(name); 481 if (!new->name) { 482 free(new); 483 goto alloc_failed; 484 } 485 486 new->flags = flags; 487 new->wait_time_min = ULLONG_MAX; 488 489 hlist_add_head(&new->hash_entry, entry); 490 return new; 491 492 alloc_failed: 493 pr_err("memory allocation failed\n"); 494 return NULL; 495 } 496 497 struct trace_lock_handler { 498 /* it's used on CONFIG_LOCKDEP */ 499 int (*acquire_event)(struct evsel *evsel, 500 struct perf_sample *sample); 501 502 /* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */ 503 int (*acquired_event)(struct evsel *evsel, 504 struct perf_sample *sample); 505 506 /* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */ 507 int (*contended_event)(struct evsel *evsel, 508 struct perf_sample *sample); 509 510 /* it's used on CONFIG_LOCKDEP */ 511 int (*release_event)(struct evsel *evsel, 512 struct perf_sample *sample); 513 514 /* it's used when CONFIG_LOCKDEP is off */ 515 int (*contention_begin_event)(struct evsel *evsel, 516 struct perf_sample *sample); 517 518 /* it's used when CONFIG_LOCKDEP is off */ 519 int (*contention_end_event)(struct evsel *evsel, 520 struct perf_sample *sample); 521 }; 522 523 static struct lock_seq_stat *get_seq(struct thread_stat *ts, u64 addr) 524 { 525 struct lock_seq_stat *seq; 526 527 list_for_each_entry(seq, &ts->seq_list, list) { 528 if (seq->addr == addr) 529 return seq; 530 } 531 532 seq = zalloc(sizeof(struct lock_seq_stat)); 533 if (!seq) { 534 pr_err("memory allocation failed\n"); 535 return NULL; 536 } 537 seq->state = SEQ_STATE_UNINITIALIZED; 538 seq->addr = addr; 539 540 list_add(&seq->list, &ts->seq_list); 541 return seq; 542 } 543 544 enum broken_state { 545 BROKEN_ACQUIRE, 546 BROKEN_ACQUIRED, 547 BROKEN_CONTENDED, 548 BROKEN_RELEASE, 549 BROKEN_MAX, 550 }; 551 552 static int bad_hist[BROKEN_MAX]; 553 554 enum acquire_flags { 555 TRY_LOCK = 1, 556 READ_LOCK = 2, 557 }; 558 559 static int get_key_by_aggr_mode_simple(u64 *key, u64 addr, u32 tid) 560 { 561 switch (aggr_mode) { 562 case LOCK_AGGR_ADDR: 563 *key = addr; 564 break; 565 case LOCK_AGGR_TASK: 566 *key = tid; 567 break; 568 case LOCK_AGGR_CALLER: 569 default: 570 pr_err("Invalid aggregation mode: %d\n", aggr_mode); 571 return -EINVAL; 572 } 573 return 0; 574 } 575 576 static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample); 577 578 static int get_key_by_aggr_mode(u64 *key, u64 addr, struct evsel *evsel, 579 struct perf_sample *sample) 580 { 581 if (aggr_mode == LOCK_AGGR_CALLER) { 582 *key = callchain_id(evsel, sample); 583 return 0; 584 } 585 return get_key_by_aggr_mode_simple(key, addr, sample->tid); 586 } 587 588 static int report_lock_acquire_event(struct evsel *evsel, 589 struct perf_sample *sample) 590 { 591 struct lock_stat *ls; 592 struct thread_stat *ts; 593 struct lock_seq_stat *seq; 594 const char *name = evsel__strval(evsel, sample, "name"); 595 u64 addr = evsel__intval(evsel, sample, "lockdep_addr"); 596 int flag = evsel__intval(evsel, sample, "flags"); 597 u64 key; 598 int ret; 599 600 ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid); 601 if (ret < 0) 602 return ret; 603 604 ls = lock_stat_findnew(key, name, 0); 605 if (!ls) 606 return -ENOMEM; 607 608 ts = thread_stat_findnew(sample->tid); 609 if (!ts) 610 return -ENOMEM; 611 612 seq = get_seq(ts, addr); 613 if (!seq) 614 return -ENOMEM; 615 616 switch (seq->state) { 617 case SEQ_STATE_UNINITIALIZED: 618 case SEQ_STATE_RELEASED: 619 if (!flag) { 620 seq->state = SEQ_STATE_ACQUIRING; 621 } else { 622 if (flag & TRY_LOCK) 623 ls->nr_trylock++; 624 if (flag & READ_LOCK) 625 ls->nr_readlock++; 626 seq->state = SEQ_STATE_READ_ACQUIRED; 627 seq->read_count = 1; 628 ls->nr_acquired++; 629 } 630 break; 631 case SEQ_STATE_READ_ACQUIRED: 632 if (flag & READ_LOCK) { 633 seq->read_count++; 634 ls->nr_acquired++; 635 goto end; 636 } else { 637 goto broken; 638 } 639 break; 640 case SEQ_STATE_ACQUIRED: 641 case SEQ_STATE_ACQUIRING: 642 case SEQ_STATE_CONTENDED: 643 broken: 644 /* broken lock sequence */ 645 if (!ls->broken) { 646 ls->broken = 1; 647 bad_hist[BROKEN_ACQUIRE]++; 648 } 649 list_del_init(&seq->list); 650 free(seq); 651 goto end; 652 default: 653 BUG_ON("Unknown state of lock sequence found!\n"); 654 break; 655 } 656 657 ls->nr_acquire++; 658 seq->prev_event_time = sample->time; 659 end: 660 return 0; 661 } 662 663 static int report_lock_acquired_event(struct evsel *evsel, 664 struct perf_sample *sample) 665 { 666 struct lock_stat *ls; 667 struct thread_stat *ts; 668 struct lock_seq_stat *seq; 669 u64 contended_term; 670 const char *name = evsel__strval(evsel, sample, "name"); 671 u64 addr = evsel__intval(evsel, sample, "lockdep_addr"); 672 u64 key; 673 int ret; 674 675 ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid); 676 if (ret < 0) 677 return ret; 678 679 ls = lock_stat_findnew(key, name, 0); 680 if (!ls) 681 return -ENOMEM; 682 683 ts = thread_stat_findnew(sample->tid); 684 if (!ts) 685 return -ENOMEM; 686 687 seq = get_seq(ts, addr); 688 if (!seq) 689 return -ENOMEM; 690 691 switch (seq->state) { 692 case SEQ_STATE_UNINITIALIZED: 693 /* orphan event, do nothing */ 694 return 0; 695 case SEQ_STATE_ACQUIRING: 696 break; 697 case SEQ_STATE_CONTENDED: 698 contended_term = sample->time - seq->prev_event_time; 699 ls->wait_time_total += contended_term; 700 if (contended_term < ls->wait_time_min) 701 ls->wait_time_min = contended_term; 702 if (ls->wait_time_max < contended_term) 703 ls->wait_time_max = contended_term; 704 break; 705 case SEQ_STATE_RELEASED: 706 case SEQ_STATE_ACQUIRED: 707 case SEQ_STATE_READ_ACQUIRED: 708 /* broken lock sequence */ 709 if (!ls->broken) { 710 ls->broken = 1; 711 bad_hist[BROKEN_ACQUIRED]++; 712 } 713 list_del_init(&seq->list); 714 free(seq); 715 goto end; 716 default: 717 BUG_ON("Unknown state of lock sequence found!\n"); 718 break; 719 } 720 721 seq->state = SEQ_STATE_ACQUIRED; 722 ls->nr_acquired++; 723 ls->avg_wait_time = ls->nr_contended ? ls->wait_time_total/ls->nr_contended : 0; 724 seq->prev_event_time = sample->time; 725 end: 726 return 0; 727 } 728 729 static int report_lock_contended_event(struct evsel *evsel, 730 struct perf_sample *sample) 731 { 732 struct lock_stat *ls; 733 struct thread_stat *ts; 734 struct lock_seq_stat *seq; 735 const char *name = evsel__strval(evsel, sample, "name"); 736 u64 addr = evsel__intval(evsel, sample, "lockdep_addr"); 737 u64 key; 738 int ret; 739 740 ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid); 741 if (ret < 0) 742 return ret; 743 744 ls = lock_stat_findnew(key, name, 0); 745 if (!ls) 746 return -ENOMEM; 747 748 ts = thread_stat_findnew(sample->tid); 749 if (!ts) 750 return -ENOMEM; 751 752 seq = get_seq(ts, addr); 753 if (!seq) 754 return -ENOMEM; 755 756 switch (seq->state) { 757 case SEQ_STATE_UNINITIALIZED: 758 /* orphan event, do nothing */ 759 return 0; 760 case SEQ_STATE_ACQUIRING: 761 break; 762 case SEQ_STATE_RELEASED: 763 case SEQ_STATE_ACQUIRED: 764 case SEQ_STATE_READ_ACQUIRED: 765 case SEQ_STATE_CONTENDED: 766 /* broken lock sequence */ 767 if (!ls->broken) { 768 ls->broken = 1; 769 bad_hist[BROKEN_CONTENDED]++; 770 } 771 list_del_init(&seq->list); 772 free(seq); 773 goto end; 774 default: 775 BUG_ON("Unknown state of lock sequence found!\n"); 776 break; 777 } 778 779 seq->state = SEQ_STATE_CONTENDED; 780 ls->nr_contended++; 781 ls->avg_wait_time = ls->wait_time_total/ls->nr_contended; 782 seq->prev_event_time = sample->time; 783 end: 784 return 0; 785 } 786 787 static int report_lock_release_event(struct evsel *evsel, 788 struct perf_sample *sample) 789 { 790 struct lock_stat *ls; 791 struct thread_stat *ts; 792 struct lock_seq_stat *seq; 793 const char *name = evsel__strval(evsel, sample, "name"); 794 u64 addr = evsel__intval(evsel, sample, "lockdep_addr"); 795 u64 key; 796 int ret; 797 798 ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid); 799 if (ret < 0) 800 return ret; 801 802 ls = lock_stat_findnew(key, name, 0); 803 if (!ls) 804 return -ENOMEM; 805 806 ts = thread_stat_findnew(sample->tid); 807 if (!ts) 808 return -ENOMEM; 809 810 seq = get_seq(ts, addr); 811 if (!seq) 812 return -ENOMEM; 813 814 switch (seq->state) { 815 case SEQ_STATE_UNINITIALIZED: 816 goto end; 817 case SEQ_STATE_ACQUIRED: 818 break; 819 case SEQ_STATE_READ_ACQUIRED: 820 seq->read_count--; 821 BUG_ON(seq->read_count < 0); 822 if (seq->read_count) { 823 ls->nr_release++; 824 goto end; 825 } 826 break; 827 case SEQ_STATE_ACQUIRING: 828 case SEQ_STATE_CONTENDED: 829 case SEQ_STATE_RELEASED: 830 /* broken lock sequence */ 831 if (!ls->broken) { 832 ls->broken = 1; 833 bad_hist[BROKEN_RELEASE]++; 834 } 835 goto free_seq; 836 default: 837 BUG_ON("Unknown state of lock sequence found!\n"); 838 break; 839 } 840 841 ls->nr_release++; 842 free_seq: 843 list_del_init(&seq->list); 844 free(seq); 845 end: 846 return 0; 847 } 848 849 static int get_symbol_name_offset(struct map *map, struct symbol *sym, u64 ip, 850 char *buf, int size) 851 { 852 u64 offset; 853 854 if (map == NULL || sym == NULL) { 855 buf[0] = '\0'; 856 return 0; 857 } 858 859 offset = map->map_ip(map, ip) - sym->start; 860 861 if (offset) 862 return scnprintf(buf, size, "%s+%#lx", sym->name, offset); 863 else 864 return strlcpy(buf, sym->name, size); 865 } 866 static int lock_contention_caller(struct evsel *evsel, struct perf_sample *sample, 867 char *buf, int size) 868 { 869 struct thread *thread; 870 struct callchain_cursor *cursor = &callchain_cursor; 871 struct machine *machine = &session->machines.host; 872 struct symbol *sym; 873 int skip = 0; 874 int ret; 875 876 /* lock names will be replaced to task name later */ 877 if (show_thread_stats) 878 return -1; 879 880 thread = machine__findnew_thread(machine, -1, sample->pid); 881 if (thread == NULL) 882 return -1; 883 884 /* use caller function name from the callchain */ 885 ret = thread__resolve_callchain(thread, cursor, evsel, sample, 886 NULL, NULL, max_stack_depth); 887 if (ret != 0) { 888 thread__put(thread); 889 return -1; 890 } 891 892 callchain_cursor_commit(cursor); 893 thread__put(thread); 894 895 while (true) { 896 struct callchain_cursor_node *node; 897 898 node = callchain_cursor_current(cursor); 899 if (node == NULL) 900 break; 901 902 /* skip first few entries - for lock functions */ 903 if (++skip <= stack_skip) 904 goto next; 905 906 sym = node->ms.sym; 907 if (sym && !machine__is_lock_function(machine, node->ip)) { 908 get_symbol_name_offset(node->ms.map, sym, node->ip, 909 buf, size); 910 return 0; 911 } 912 913 next: 914 callchain_cursor_advance(cursor); 915 } 916 return -1; 917 } 918 919 static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample) 920 { 921 struct callchain_cursor *cursor = &callchain_cursor; 922 struct machine *machine = &session->machines.host; 923 struct thread *thread; 924 u64 hash = 0; 925 int skip = 0; 926 int ret; 927 928 thread = machine__findnew_thread(machine, -1, sample->pid); 929 if (thread == NULL) 930 return -1; 931 932 /* use caller function name from the callchain */ 933 ret = thread__resolve_callchain(thread, cursor, evsel, sample, 934 NULL, NULL, max_stack_depth); 935 thread__put(thread); 936 937 if (ret != 0) 938 return -1; 939 940 callchain_cursor_commit(cursor); 941 942 while (true) { 943 struct callchain_cursor_node *node; 944 945 node = callchain_cursor_current(cursor); 946 if (node == NULL) 947 break; 948 949 /* skip first few entries - for lock functions */ 950 if (++skip <= stack_skip) 951 goto next; 952 953 if (node->ms.sym && machine__is_lock_function(machine, node->ip)) 954 goto next; 955 956 hash ^= hash_long((unsigned long)node->ip, 64); 957 958 next: 959 callchain_cursor_advance(cursor); 960 } 961 return hash; 962 } 963 964 static u64 *get_callstack(struct perf_sample *sample, int max_stack) 965 { 966 u64 *callstack; 967 u64 i; 968 int c; 969 970 callstack = calloc(max_stack, sizeof(*callstack)); 971 if (callstack == NULL) 972 return NULL; 973 974 for (i = 0, c = 0; i < sample->callchain->nr && c < max_stack; i++) { 975 u64 ip = sample->callchain->ips[i]; 976 977 if (ip >= PERF_CONTEXT_MAX) 978 continue; 979 980 callstack[c++] = ip; 981 } 982 return callstack; 983 } 984 985 static int report_lock_contention_begin_event(struct evsel *evsel, 986 struct perf_sample *sample) 987 { 988 struct lock_stat *ls; 989 struct thread_stat *ts; 990 struct lock_seq_stat *seq; 991 u64 addr = evsel__intval(evsel, sample, "lock_addr"); 992 u64 key; 993 int ret; 994 995 ret = get_key_by_aggr_mode(&key, addr, evsel, sample); 996 if (ret < 0) 997 return ret; 998 999 ls = lock_stat_find(key); 1000 if (!ls) { 1001 char buf[128]; 1002 const char *caller = buf; 1003 unsigned int flags = evsel__intval(evsel, sample, "flags"); 1004 1005 if (lock_contention_caller(evsel, sample, buf, sizeof(buf)) < 0) 1006 caller = "Unknown"; 1007 1008 ls = lock_stat_findnew(key, caller, flags); 1009 if (!ls) 1010 return -ENOMEM; 1011 1012 if (aggr_mode == LOCK_AGGR_CALLER && verbose) { 1013 ls->callstack = get_callstack(sample, max_stack_depth); 1014 if (ls->callstack == NULL) 1015 return -ENOMEM; 1016 } 1017 } 1018 1019 ts = thread_stat_findnew(sample->tid); 1020 if (!ts) 1021 return -ENOMEM; 1022 1023 seq = get_seq(ts, addr); 1024 if (!seq) 1025 return -ENOMEM; 1026 1027 switch (seq->state) { 1028 case SEQ_STATE_UNINITIALIZED: 1029 case SEQ_STATE_ACQUIRED: 1030 break; 1031 case SEQ_STATE_CONTENDED: 1032 /* 1033 * It can have nested contention begin with mutex spinning, 1034 * then we would use the original contention begin event and 1035 * ignore the second one. 1036 */ 1037 goto end; 1038 case SEQ_STATE_ACQUIRING: 1039 case SEQ_STATE_READ_ACQUIRED: 1040 case SEQ_STATE_RELEASED: 1041 /* broken lock sequence */ 1042 if (!ls->broken) { 1043 ls->broken = 1; 1044 bad_hist[BROKEN_CONTENDED]++; 1045 } 1046 list_del_init(&seq->list); 1047 free(seq); 1048 goto end; 1049 default: 1050 BUG_ON("Unknown state of lock sequence found!\n"); 1051 break; 1052 } 1053 1054 if (seq->state != SEQ_STATE_CONTENDED) { 1055 seq->state = SEQ_STATE_CONTENDED; 1056 seq->prev_event_time = sample->time; 1057 ls->nr_contended++; 1058 } 1059 end: 1060 return 0; 1061 } 1062 1063 static int report_lock_contention_end_event(struct evsel *evsel, 1064 struct perf_sample *sample) 1065 { 1066 struct lock_stat *ls; 1067 struct thread_stat *ts; 1068 struct lock_seq_stat *seq; 1069 u64 contended_term; 1070 u64 addr = evsel__intval(evsel, sample, "lock_addr"); 1071 u64 key; 1072 int ret; 1073 1074 ret = get_key_by_aggr_mode(&key, addr, evsel, sample); 1075 if (ret < 0) 1076 return ret; 1077 1078 ls = lock_stat_find(key); 1079 if (!ls) 1080 return 0; 1081 1082 ts = thread_stat_find(sample->tid); 1083 if (!ts) 1084 return 0; 1085 1086 seq = get_seq(ts, addr); 1087 if (!seq) 1088 return -ENOMEM; 1089 1090 switch (seq->state) { 1091 case SEQ_STATE_UNINITIALIZED: 1092 goto end; 1093 case SEQ_STATE_CONTENDED: 1094 contended_term = sample->time - seq->prev_event_time; 1095 ls->wait_time_total += contended_term; 1096 if (contended_term < ls->wait_time_min) 1097 ls->wait_time_min = contended_term; 1098 if (ls->wait_time_max < contended_term) 1099 ls->wait_time_max = contended_term; 1100 break; 1101 case SEQ_STATE_ACQUIRING: 1102 case SEQ_STATE_ACQUIRED: 1103 case SEQ_STATE_READ_ACQUIRED: 1104 case SEQ_STATE_RELEASED: 1105 /* broken lock sequence */ 1106 if (!ls->broken) { 1107 ls->broken = 1; 1108 bad_hist[BROKEN_ACQUIRED]++; 1109 } 1110 list_del_init(&seq->list); 1111 free(seq); 1112 goto end; 1113 default: 1114 BUG_ON("Unknown state of lock sequence found!\n"); 1115 break; 1116 } 1117 1118 seq->state = SEQ_STATE_ACQUIRED; 1119 ls->nr_acquired++; 1120 ls->avg_wait_time = ls->wait_time_total/ls->nr_acquired; 1121 end: 1122 return 0; 1123 } 1124 1125 /* lock oriented handlers */ 1126 /* TODO: handlers for CPU oriented, thread oriented */ 1127 static struct trace_lock_handler report_lock_ops = { 1128 .acquire_event = report_lock_acquire_event, 1129 .acquired_event = report_lock_acquired_event, 1130 .contended_event = report_lock_contended_event, 1131 .release_event = report_lock_release_event, 1132 .contention_begin_event = report_lock_contention_begin_event, 1133 .contention_end_event = report_lock_contention_end_event, 1134 }; 1135 1136 static struct trace_lock_handler contention_lock_ops = { 1137 .contention_begin_event = report_lock_contention_begin_event, 1138 .contention_end_event = report_lock_contention_end_event, 1139 }; 1140 1141 1142 static struct trace_lock_handler *trace_handler; 1143 1144 static int evsel__process_lock_acquire(struct evsel *evsel, struct perf_sample *sample) 1145 { 1146 if (trace_handler->acquire_event) 1147 return trace_handler->acquire_event(evsel, sample); 1148 return 0; 1149 } 1150 1151 static int evsel__process_lock_acquired(struct evsel *evsel, struct perf_sample *sample) 1152 { 1153 if (trace_handler->acquired_event) 1154 return trace_handler->acquired_event(evsel, sample); 1155 return 0; 1156 } 1157 1158 static int evsel__process_lock_contended(struct evsel *evsel, struct perf_sample *sample) 1159 { 1160 if (trace_handler->contended_event) 1161 return trace_handler->contended_event(evsel, sample); 1162 return 0; 1163 } 1164 1165 static int evsel__process_lock_release(struct evsel *evsel, struct perf_sample *sample) 1166 { 1167 if (trace_handler->release_event) 1168 return trace_handler->release_event(evsel, sample); 1169 return 0; 1170 } 1171 1172 static int evsel__process_contention_begin(struct evsel *evsel, struct perf_sample *sample) 1173 { 1174 if (trace_handler->contention_begin_event) 1175 return trace_handler->contention_begin_event(evsel, sample); 1176 return 0; 1177 } 1178 1179 static int evsel__process_contention_end(struct evsel *evsel, struct perf_sample *sample) 1180 { 1181 if (trace_handler->contention_end_event) 1182 return trace_handler->contention_end_event(evsel, sample); 1183 return 0; 1184 } 1185 1186 static void print_bad_events(int bad, int total) 1187 { 1188 /* Output for debug, this have to be removed */ 1189 int i; 1190 int broken = 0; 1191 const char *name[4] = 1192 { "acquire", "acquired", "contended", "release" }; 1193 1194 for (i = 0; i < BROKEN_MAX; i++) 1195 broken += bad_hist[i]; 1196 1197 if (quiet || (broken == 0 && !verbose)) 1198 return; 1199 1200 pr_info("\n=== output for debug===\n\n"); 1201 pr_info("bad: %d, total: %d\n", bad, total); 1202 pr_info("bad rate: %.2f %%\n", (double)bad / (double)total * 100); 1203 pr_info("histogram of events caused bad sequence\n"); 1204 for (i = 0; i < BROKEN_MAX; i++) 1205 pr_info(" %10s: %d\n", name[i], bad_hist[i]); 1206 } 1207 1208 /* TODO: various way to print, coloring, nano or milli sec */ 1209 static void print_result(void) 1210 { 1211 struct lock_stat *st; 1212 struct lock_key *key; 1213 char cut_name[20]; 1214 int bad, total, printed; 1215 1216 if (!quiet) { 1217 pr_info("%20s ", "Name"); 1218 list_for_each_entry(key, &lock_keys, list) 1219 pr_info("%*s ", key->len, key->header); 1220 pr_info("\n\n"); 1221 } 1222 1223 bad = total = printed = 0; 1224 while ((st = pop_from_result())) { 1225 total++; 1226 if (st->broken) 1227 bad++; 1228 if (!st->nr_acquired) 1229 continue; 1230 1231 bzero(cut_name, 20); 1232 1233 if (strlen(st->name) < 20) { 1234 /* output raw name */ 1235 const char *name = st->name; 1236 1237 if (show_thread_stats) { 1238 struct thread *t; 1239 1240 /* st->addr contains tid of thread */ 1241 t = perf_session__findnew(session, st->addr); 1242 name = thread__comm_str(t); 1243 } 1244 1245 pr_info("%20s ", name); 1246 } else { 1247 strncpy(cut_name, st->name, 16); 1248 cut_name[16] = '.'; 1249 cut_name[17] = '.'; 1250 cut_name[18] = '.'; 1251 cut_name[19] = '\0'; 1252 /* cut off name for saving output style */ 1253 pr_info("%20s ", cut_name); 1254 } 1255 1256 list_for_each_entry(key, &lock_keys, list) { 1257 key->print(key, st); 1258 pr_info(" "); 1259 } 1260 pr_info("\n"); 1261 1262 if (++printed >= print_nr_entries) 1263 break; 1264 } 1265 1266 print_bad_events(bad, total); 1267 } 1268 1269 static bool info_threads, info_map; 1270 1271 static void dump_threads(void) 1272 { 1273 struct thread_stat *st; 1274 struct rb_node *node; 1275 struct thread *t; 1276 1277 pr_info("%10s: comm\n", "Thread ID"); 1278 1279 node = rb_first(&thread_stats); 1280 while (node) { 1281 st = container_of(node, struct thread_stat, rb); 1282 t = perf_session__findnew(session, st->tid); 1283 pr_info("%10d: %s\n", st->tid, thread__comm_str(t)); 1284 node = rb_next(node); 1285 thread__put(t); 1286 } 1287 } 1288 1289 static int compare_maps(struct lock_stat *a, struct lock_stat *b) 1290 { 1291 int ret; 1292 1293 if (a->name && b->name) 1294 ret = strcmp(a->name, b->name); 1295 else 1296 ret = !!a->name - !!b->name; 1297 1298 if (!ret) 1299 return a->addr < b->addr; 1300 else 1301 return ret < 0; 1302 } 1303 1304 static void dump_map(void) 1305 { 1306 unsigned int i; 1307 struct lock_stat *st; 1308 1309 pr_info("Address of instance: name of class\n"); 1310 for (i = 0; i < LOCKHASH_SIZE; i++) { 1311 hlist_for_each_entry(st, &lockhash_table[i], hash_entry) { 1312 insert_to_result(st, compare_maps); 1313 } 1314 } 1315 1316 while ((st = pop_from_result())) 1317 pr_info(" %#llx: %s\n", (unsigned long long)st->addr, st->name); 1318 } 1319 1320 static int dump_info(void) 1321 { 1322 int rc = 0; 1323 1324 if (info_threads) 1325 dump_threads(); 1326 else if (info_map) 1327 dump_map(); 1328 else { 1329 rc = -1; 1330 pr_err("Unknown type of information\n"); 1331 } 1332 1333 return rc; 1334 } 1335 1336 static const struct evsel_str_handler lock_tracepoints[] = { 1337 { "lock:lock_acquire", evsel__process_lock_acquire, }, /* CONFIG_LOCKDEP */ 1338 { "lock:lock_acquired", evsel__process_lock_acquired, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ 1339 { "lock:lock_contended", evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ 1340 { "lock:lock_release", evsel__process_lock_release, }, /* CONFIG_LOCKDEP */ 1341 }; 1342 1343 static const struct evsel_str_handler contention_tracepoints[] = { 1344 { "lock:contention_begin", evsel__process_contention_begin, }, 1345 { "lock:contention_end", evsel__process_contention_end, }, 1346 }; 1347 1348 static int process_event_update(struct perf_tool *tool, 1349 union perf_event *event, 1350 struct evlist **pevlist) 1351 { 1352 int ret; 1353 1354 ret = perf_event__process_event_update(tool, event, pevlist); 1355 if (ret < 0) 1356 return ret; 1357 1358 /* this can return -EEXIST since we call it for each evsel */ 1359 perf_session__set_tracepoints_handlers(session, lock_tracepoints); 1360 perf_session__set_tracepoints_handlers(session, contention_tracepoints); 1361 return 0; 1362 } 1363 1364 typedef int (*tracepoint_handler)(struct evsel *evsel, 1365 struct perf_sample *sample); 1366 1367 static int process_sample_event(struct perf_tool *tool __maybe_unused, 1368 union perf_event *event, 1369 struct perf_sample *sample, 1370 struct evsel *evsel, 1371 struct machine *machine) 1372 { 1373 int err = 0; 1374 struct thread *thread = machine__findnew_thread(machine, sample->pid, 1375 sample->tid); 1376 1377 if (thread == NULL) { 1378 pr_debug("problem processing %d event, skipping it.\n", 1379 event->header.type); 1380 return -1; 1381 } 1382 1383 if (evsel->handler != NULL) { 1384 tracepoint_handler f = evsel->handler; 1385 err = f(evsel, sample); 1386 } 1387 1388 thread__put(thread); 1389 1390 return err; 1391 } 1392 1393 static void combine_result(void) 1394 { 1395 unsigned int i; 1396 struct lock_stat *st; 1397 1398 if (!combine_locks) 1399 return; 1400 1401 for (i = 0; i < LOCKHASH_SIZE; i++) { 1402 hlist_for_each_entry(st, &lockhash_table[i], hash_entry) { 1403 combine_lock_stats(st); 1404 } 1405 } 1406 } 1407 1408 static void sort_result(void) 1409 { 1410 unsigned int i; 1411 struct lock_stat *st; 1412 1413 for (i = 0; i < LOCKHASH_SIZE; i++) { 1414 hlist_for_each_entry(st, &lockhash_table[i], hash_entry) { 1415 insert_to_result(st, compare); 1416 } 1417 } 1418 } 1419 1420 static const char *get_type_str(struct lock_stat *st) 1421 { 1422 static const struct { 1423 unsigned int flags; 1424 const char *name; 1425 } table[] = { 1426 { 0, "semaphore" }, 1427 { LCB_F_SPIN, "spinlock" }, 1428 { LCB_F_SPIN | LCB_F_READ, "rwlock:R" }, 1429 { LCB_F_SPIN | LCB_F_WRITE, "rwlock:W"}, 1430 { LCB_F_READ, "rwsem:R" }, 1431 { LCB_F_WRITE, "rwsem:W" }, 1432 { LCB_F_RT, "rtmutex" }, 1433 { LCB_F_RT | LCB_F_READ, "rwlock-rt:R" }, 1434 { LCB_F_RT | LCB_F_WRITE, "rwlock-rt:W"}, 1435 { LCB_F_PERCPU | LCB_F_READ, "pcpu-sem:R" }, 1436 { LCB_F_PERCPU | LCB_F_WRITE, "pcpu-sem:W" }, 1437 { LCB_F_MUTEX, "mutex" }, 1438 { LCB_F_MUTEX | LCB_F_SPIN, "mutex" }, 1439 }; 1440 1441 for (unsigned int i = 0; i < ARRAY_SIZE(table); i++) { 1442 if (table[i].flags == st->flags) 1443 return table[i].name; 1444 } 1445 return "unknown"; 1446 } 1447 1448 static void sort_contention_result(void) 1449 { 1450 sort_result(); 1451 } 1452 1453 static void print_contention_result(struct lock_contention *con) 1454 { 1455 struct lock_stat *st; 1456 struct lock_key *key; 1457 int bad, total, printed; 1458 1459 if (!quiet) { 1460 list_for_each_entry(key, &lock_keys, list) 1461 pr_info("%*s ", key->len, key->header); 1462 1463 if (show_thread_stats) 1464 pr_info(" %10s %s\n\n", "pid", "comm"); 1465 else 1466 pr_info(" %10s %s\n\n", "type", "caller"); 1467 } 1468 1469 bad = total = printed = 0; 1470 if (use_bpf) 1471 bad = bad_hist[BROKEN_CONTENDED]; 1472 1473 while ((st = pop_from_result())) { 1474 total += use_bpf ? st->nr_contended : 1; 1475 if (st->broken) 1476 bad++; 1477 1478 list_for_each_entry(key, &lock_keys, list) { 1479 key->print(key, st); 1480 pr_info(" "); 1481 } 1482 1483 if (show_thread_stats) { 1484 struct thread *t; 1485 int pid = st->addr; 1486 1487 /* st->addr contains tid of thread */ 1488 t = perf_session__findnew(session, pid); 1489 pr_info(" %10d %s\n", pid, thread__comm_str(t)); 1490 goto next; 1491 } 1492 1493 pr_info(" %10s %s\n", get_type_str(st), st->name); 1494 if (verbose) { 1495 struct map *kmap; 1496 struct symbol *sym; 1497 char buf[128]; 1498 u64 ip; 1499 1500 for (int i = 0; i < max_stack_depth; i++) { 1501 if (!st->callstack || !st->callstack[i]) 1502 break; 1503 1504 ip = st->callstack[i]; 1505 sym = machine__find_kernel_symbol(con->machine, ip, &kmap); 1506 get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf)); 1507 pr_info("\t\t\t%#lx %s\n", (unsigned long)ip, buf); 1508 } 1509 } 1510 1511 next: 1512 if (++printed >= print_nr_entries) 1513 break; 1514 } 1515 1516 print_bad_events(bad, total); 1517 } 1518 1519 static bool force; 1520 1521 static int __cmd_report(bool display_info) 1522 { 1523 int err = -EINVAL; 1524 struct perf_tool eops = { 1525 .attr = perf_event__process_attr, 1526 .event_update = process_event_update, 1527 .sample = process_sample_event, 1528 .comm = perf_event__process_comm, 1529 .mmap = perf_event__process_mmap, 1530 .namespaces = perf_event__process_namespaces, 1531 .tracing_data = perf_event__process_tracing_data, 1532 .ordered_events = true, 1533 }; 1534 struct perf_data data = { 1535 .path = input_name, 1536 .mode = PERF_DATA_MODE_READ, 1537 .force = force, 1538 }; 1539 1540 session = perf_session__new(&data, &eops); 1541 if (IS_ERR(session)) { 1542 pr_err("Initializing perf session failed\n"); 1543 return PTR_ERR(session); 1544 } 1545 1546 /* for lock function check */ 1547 symbol_conf.sort_by_name = true; 1548 symbol__init(&session->header.env); 1549 1550 if (!data.is_pipe) { 1551 if (!perf_session__has_traces(session, "lock record")) 1552 goto out_delete; 1553 1554 if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) { 1555 pr_err("Initializing perf session tracepoint handlers failed\n"); 1556 goto out_delete; 1557 } 1558 1559 if (perf_session__set_tracepoints_handlers(session, contention_tracepoints)) { 1560 pr_err("Initializing perf session tracepoint handlers failed\n"); 1561 goto out_delete; 1562 } 1563 } 1564 1565 if (setup_output_field(false, output_fields)) 1566 goto out_delete; 1567 1568 if (select_key(false)) 1569 goto out_delete; 1570 1571 if (show_thread_stats) 1572 aggr_mode = LOCK_AGGR_TASK; 1573 1574 err = perf_session__process_events(session); 1575 if (err) 1576 goto out_delete; 1577 1578 setup_pager(); 1579 if (display_info) /* used for info subcommand */ 1580 err = dump_info(); 1581 else { 1582 combine_result(); 1583 sort_result(); 1584 print_result(); 1585 } 1586 1587 out_delete: 1588 perf_session__delete(session); 1589 return err; 1590 } 1591 1592 static void sighandler(int sig __maybe_unused) 1593 { 1594 } 1595 1596 static int __cmd_contention(int argc, const char **argv) 1597 { 1598 int err = -EINVAL; 1599 struct perf_tool eops = { 1600 .attr = perf_event__process_attr, 1601 .event_update = process_event_update, 1602 .sample = process_sample_event, 1603 .comm = perf_event__process_comm, 1604 .mmap = perf_event__process_mmap, 1605 .tracing_data = perf_event__process_tracing_data, 1606 .ordered_events = true, 1607 }; 1608 struct perf_data data = { 1609 .path = input_name, 1610 .mode = PERF_DATA_MODE_READ, 1611 .force = force, 1612 }; 1613 struct lock_contention con = { 1614 .target = &target, 1615 .result = &lockhash_table[0], 1616 .map_nr_entries = bpf_map_entries, 1617 .max_stack = max_stack_depth, 1618 .stack_skip = stack_skip, 1619 .aggr_mode = show_thread_stats ? LOCK_AGGR_TASK : LOCK_AGGR_CALLER, 1620 }; 1621 1622 session = perf_session__new(use_bpf ? NULL : &data, &eops); 1623 if (IS_ERR(session)) { 1624 pr_err("Initializing perf session failed\n"); 1625 return PTR_ERR(session); 1626 } 1627 1628 con.machine = &session->machines.host; 1629 1630 /* for lock function check */ 1631 symbol_conf.sort_by_name = true; 1632 symbol__init(&session->header.env); 1633 1634 if (use_bpf) { 1635 err = target__validate(&target); 1636 if (err) { 1637 char errbuf[512]; 1638 1639 target__strerror(&target, err, errbuf, 512); 1640 pr_err("%s\n", errbuf); 1641 goto out_delete; 1642 } 1643 1644 signal(SIGINT, sighandler); 1645 signal(SIGCHLD, sighandler); 1646 signal(SIGTERM, sighandler); 1647 1648 con.evlist = evlist__new(); 1649 if (con.evlist == NULL) { 1650 err = -ENOMEM; 1651 goto out_delete; 1652 } 1653 1654 err = evlist__create_maps(con.evlist, &target); 1655 if (err < 0) 1656 goto out_delete; 1657 1658 if (argc) { 1659 err = evlist__prepare_workload(con.evlist, &target, 1660 argv, false, NULL); 1661 if (err < 0) 1662 goto out_delete; 1663 } 1664 1665 if (lock_contention_prepare(&con) < 0) { 1666 pr_err("lock contention BPF setup failed\n"); 1667 goto out_delete; 1668 } 1669 } else if (!data.is_pipe) { 1670 if (!perf_session__has_traces(session, "lock record")) 1671 goto out_delete; 1672 1673 if (!evlist__find_evsel_by_str(session->evlist, 1674 "lock:contention_begin")) { 1675 pr_err("lock contention evsel not found\n"); 1676 goto out_delete; 1677 } 1678 1679 if (perf_session__set_tracepoints_handlers(session, 1680 contention_tracepoints)) { 1681 pr_err("Initializing perf session tracepoint handlers failed\n"); 1682 goto out_delete; 1683 } 1684 } 1685 1686 if (setup_output_field(true, output_fields)) 1687 goto out_delete; 1688 1689 if (select_key(true)) 1690 goto out_delete; 1691 1692 if (use_bpf) { 1693 lock_contention_start(); 1694 if (argc) 1695 evlist__start_workload(con.evlist); 1696 1697 /* wait for signal */ 1698 pause(); 1699 1700 lock_contention_stop(); 1701 lock_contention_read(&con); 1702 1703 /* abuse bad hist stats for lost entries */ 1704 bad_hist[BROKEN_CONTENDED] = con.lost; 1705 } else { 1706 err = perf_session__process_events(session); 1707 if (err) 1708 goto out_delete; 1709 } 1710 1711 setup_pager(); 1712 1713 sort_contention_result(); 1714 print_contention_result(&con); 1715 1716 out_delete: 1717 evlist__delete(con.evlist); 1718 lock_contention_finish(); 1719 perf_session__delete(session); 1720 return err; 1721 } 1722 1723 1724 static int __cmd_record(int argc, const char **argv) 1725 { 1726 const char *record_args[] = { 1727 "record", "-R", "-m", "1024", "-c", "1", "--synth", "task", 1728 }; 1729 const char *callgraph_args[] = { 1730 "--call-graph", "fp," __stringify(CONTENTION_STACK_DEPTH), 1731 }; 1732 unsigned int rec_argc, i, j, ret; 1733 unsigned int nr_tracepoints; 1734 unsigned int nr_callgraph_args = 0; 1735 const char **rec_argv; 1736 bool has_lock_stat = true; 1737 1738 for (i = 0; i < ARRAY_SIZE(lock_tracepoints); i++) { 1739 if (!is_valid_tracepoint(lock_tracepoints[i].name)) { 1740 pr_debug("tracepoint %s is not enabled. " 1741 "Are CONFIG_LOCKDEP and CONFIG_LOCK_STAT enabled?\n", 1742 lock_tracepoints[i].name); 1743 has_lock_stat = false; 1744 break; 1745 } 1746 } 1747 1748 if (has_lock_stat) 1749 goto setup_args; 1750 1751 for (i = 0; i < ARRAY_SIZE(contention_tracepoints); i++) { 1752 if (!is_valid_tracepoint(contention_tracepoints[i].name)) { 1753 pr_err("tracepoint %s is not enabled.\n", 1754 contention_tracepoints[i].name); 1755 return 1; 1756 } 1757 } 1758 1759 nr_callgraph_args = ARRAY_SIZE(callgraph_args); 1760 1761 setup_args: 1762 rec_argc = ARRAY_SIZE(record_args) + nr_callgraph_args + argc - 1; 1763 1764 if (has_lock_stat) 1765 nr_tracepoints = ARRAY_SIZE(lock_tracepoints); 1766 else 1767 nr_tracepoints = ARRAY_SIZE(contention_tracepoints); 1768 1769 /* factor of 2 is for -e in front of each tracepoint */ 1770 rec_argc += 2 * nr_tracepoints; 1771 1772 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 1773 if (!rec_argv) 1774 return -ENOMEM; 1775 1776 for (i = 0; i < ARRAY_SIZE(record_args); i++) 1777 rec_argv[i] = strdup(record_args[i]); 1778 1779 for (j = 0; j < nr_tracepoints; j++) { 1780 const char *ev_name; 1781 1782 if (has_lock_stat) 1783 ev_name = strdup(lock_tracepoints[j].name); 1784 else 1785 ev_name = strdup(contention_tracepoints[j].name); 1786 1787 if (!ev_name) 1788 return -ENOMEM; 1789 1790 rec_argv[i++] = "-e"; 1791 rec_argv[i++] = ev_name; 1792 } 1793 1794 for (j = 0; j < nr_callgraph_args; j++, i++) 1795 rec_argv[i] = callgraph_args[j]; 1796 1797 for (j = 1; j < (unsigned int)argc; j++, i++) 1798 rec_argv[i] = argv[j]; 1799 1800 BUG_ON(i != rec_argc); 1801 1802 ret = cmd_record(i, rec_argv); 1803 free(rec_argv); 1804 return ret; 1805 } 1806 1807 static int parse_map_entry(const struct option *opt, const char *str, 1808 int unset __maybe_unused) 1809 { 1810 unsigned long *len = (unsigned long *)opt->value; 1811 unsigned long val; 1812 char *endptr; 1813 1814 errno = 0; 1815 val = strtoul(str, &endptr, 0); 1816 if (*endptr != '\0' || errno != 0) { 1817 pr_err("invalid BPF map length: %s\n", str); 1818 return -1; 1819 } 1820 1821 *len = val; 1822 return 0; 1823 } 1824 1825 static int parse_max_stack(const struct option *opt, const char *str, 1826 int unset __maybe_unused) 1827 { 1828 unsigned long *len = (unsigned long *)opt->value; 1829 long val; 1830 char *endptr; 1831 1832 errno = 0; 1833 val = strtol(str, &endptr, 0); 1834 if (*endptr != '\0' || errno != 0) { 1835 pr_err("invalid max stack depth: %s\n", str); 1836 return -1; 1837 } 1838 1839 if (val < 0 || val > sysctl__max_stack()) { 1840 pr_err("invalid max stack depth: %ld\n", val); 1841 return -1; 1842 } 1843 1844 *len = val; 1845 return 0; 1846 } 1847 1848 int cmd_lock(int argc, const char **argv) 1849 { 1850 const struct option lock_options[] = { 1851 OPT_STRING('i', "input", &input_name, "file", "input file name"), 1852 OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), 1853 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), 1854 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), 1855 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 1856 "file", "vmlinux pathname"), 1857 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, 1858 "file", "kallsyms pathname"), 1859 OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any warnings or messages"), 1860 OPT_END() 1861 }; 1862 1863 const struct option info_options[] = { 1864 OPT_BOOLEAN('t', "threads", &info_threads, 1865 "dump thread list in perf.data"), 1866 OPT_BOOLEAN('m', "map", &info_map, 1867 "map of lock instances (address:name table)"), 1868 OPT_PARENT(lock_options) 1869 }; 1870 1871 const struct option report_options[] = { 1872 OPT_STRING('k', "key", &sort_key, "acquired", 1873 "key for sorting (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"), 1874 OPT_STRING('F', "field", &output_fields, NULL, 1875 "output fields (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"), 1876 /* TODO: type */ 1877 OPT_BOOLEAN('c', "combine-locks", &combine_locks, 1878 "combine locks in the same class"), 1879 OPT_BOOLEAN('t', "threads", &show_thread_stats, 1880 "show per-thread lock stats"), 1881 OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"), 1882 OPT_PARENT(lock_options) 1883 }; 1884 1885 struct option contention_options[] = { 1886 OPT_STRING('k', "key", &sort_key, "wait_total", 1887 "key for sorting (contended / wait_total / wait_max / wait_min / avg_wait)"), 1888 OPT_STRING('F', "field", &output_fields, "contended,wait_total,wait_max,avg_wait", 1889 "output fields (contended / wait_total / wait_max / wait_min / avg_wait)"), 1890 OPT_BOOLEAN('t', "threads", &show_thread_stats, 1891 "show per-thread lock stats"), 1892 OPT_BOOLEAN('b', "use-bpf", &use_bpf, "use BPF program to collect lock contention stats"), 1893 OPT_BOOLEAN('a', "all-cpus", &target.system_wide, 1894 "System-wide collection from all CPUs"), 1895 OPT_STRING('C', "cpu", &target.cpu_list, "cpu", 1896 "List of cpus to monitor"), 1897 OPT_STRING('p', "pid", &target.pid, "pid", 1898 "Trace on existing process id"), 1899 OPT_STRING(0, "tid", &target.tid, "tid", 1900 "Trace on existing thread id (exclusive to --pid)"), 1901 OPT_CALLBACK(0, "map-nr-entries", &bpf_map_entries, "num", 1902 "Max number of BPF map entries", parse_map_entry), 1903 OPT_CALLBACK(0, "max-stack", &max_stack_depth, "num", 1904 "Set the maximum stack depth when collecting lopck contention, " 1905 "Default: " __stringify(CONTENTION_STACK_DEPTH), parse_max_stack), 1906 OPT_INTEGER(0, "stack-skip", &stack_skip, 1907 "Set the number of stack depth to skip when finding a lock caller, " 1908 "Default: " __stringify(CONTENTION_STACK_SKIP)), 1909 OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"), 1910 OPT_PARENT(lock_options) 1911 }; 1912 1913 const char * const info_usage[] = { 1914 "perf lock info [<options>]", 1915 NULL 1916 }; 1917 const char *const lock_subcommands[] = { "record", "report", "script", 1918 "info", "contention", NULL }; 1919 const char *lock_usage[] = { 1920 NULL, 1921 NULL 1922 }; 1923 const char * const report_usage[] = { 1924 "perf lock report [<options>]", 1925 NULL 1926 }; 1927 const char * const contention_usage[] = { 1928 "perf lock contention [<options>]", 1929 NULL 1930 }; 1931 unsigned int i; 1932 int rc = 0; 1933 1934 for (i = 0; i < LOCKHASH_SIZE; i++) 1935 INIT_HLIST_HEAD(lockhash_table + i); 1936 1937 argc = parse_options_subcommand(argc, argv, lock_options, lock_subcommands, 1938 lock_usage, PARSE_OPT_STOP_AT_NON_OPTION); 1939 if (!argc) 1940 usage_with_options(lock_usage, lock_options); 1941 1942 if (strlen(argv[0]) > 2 && strstarts("record", argv[0])) { 1943 return __cmd_record(argc, argv); 1944 } else if (strlen(argv[0]) > 2 && strstarts("report", argv[0])) { 1945 trace_handler = &report_lock_ops; 1946 if (argc) { 1947 argc = parse_options(argc, argv, 1948 report_options, report_usage, 0); 1949 if (argc) 1950 usage_with_options(report_usage, report_options); 1951 } 1952 rc = __cmd_report(false); 1953 } else if (!strcmp(argv[0], "script")) { 1954 /* Aliased to 'perf script' */ 1955 return cmd_script(argc, argv); 1956 } else if (!strcmp(argv[0], "info")) { 1957 if (argc) { 1958 argc = parse_options(argc, argv, 1959 info_options, info_usage, 0); 1960 if (argc) 1961 usage_with_options(info_usage, info_options); 1962 } 1963 /* recycling report_lock_ops */ 1964 trace_handler = &report_lock_ops; 1965 rc = __cmd_report(true); 1966 } else if (strlen(argv[0]) > 2 && strstarts("contention", argv[0])) { 1967 trace_handler = &contention_lock_ops; 1968 sort_key = "wait_total"; 1969 output_fields = "contended,wait_total,wait_max,avg_wait"; 1970 1971 #ifndef HAVE_BPF_SKEL 1972 set_option_nobuild(contention_options, 'b', "use-bpf", 1973 "no BUILD_BPF_SKEL=1", false); 1974 #endif 1975 if (argc) { 1976 argc = parse_options(argc, argv, contention_options, 1977 contention_usage, 0); 1978 } 1979 rc = __cmd_contention(argc, argv); 1980 } else { 1981 usage_with_options(lock_usage, lock_options); 1982 } 1983 1984 return rc; 1985 } 1986