1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * builtin-kwork.c 4 * 5 * Copyright (c) 2022 Huawei Inc, Yang Jihong <yangjihong1@huawei.com> 6 */ 7 8 #include "builtin.h" 9 10 #include "util/data.h" 11 #include "util/kwork.h" 12 #include "util/debug.h" 13 #include "util/symbol.h" 14 #include "util/thread.h" 15 #include "util/string2.h" 16 #include "util/callchain.h" 17 #include "util/evsel_fprintf.h" 18 19 #include <subcmd/pager.h> 20 #include <subcmd/parse-options.h> 21 22 #include <errno.h> 23 #include <inttypes.h> 24 #include <linux/err.h> 25 #include <linux/time64.h> 26 #include <linux/zalloc.h> 27 28 /* 29 * report header elements width 30 */ 31 #define PRINT_CPU_WIDTH 4 32 #define PRINT_COUNT_WIDTH 9 33 #define PRINT_RUNTIME_WIDTH 10 34 #define PRINT_LATENCY_WIDTH 10 35 #define PRINT_TIMESTAMP_WIDTH 17 36 #define PRINT_KWORK_NAME_WIDTH 30 37 #define RPINT_DECIMAL_WIDTH 3 38 #define PRINT_TIME_UNIT_SEC_WIDTH 2 39 #define PRINT_TIME_UNIT_MESC_WIDTH 3 40 #define PRINT_RUNTIME_HEADER_WIDTH (PRINT_RUNTIME_WIDTH + PRINT_TIME_UNIT_MESC_WIDTH) 41 #define PRINT_LATENCY_HEADER_WIDTH (PRINT_LATENCY_WIDTH + PRINT_TIME_UNIT_MESC_WIDTH) 42 #define PRINT_TIMESTAMP_HEADER_WIDTH (PRINT_TIMESTAMP_WIDTH + PRINT_TIME_UNIT_SEC_WIDTH) 43 44 struct sort_dimension { 45 const char *name; 46 int (*cmp)(struct kwork_work *l, struct kwork_work *r); 47 struct list_head list; 48 }; 49 50 static int id_cmp(struct kwork_work *l, struct kwork_work *r) 51 { 52 if (l->cpu > r->cpu) 53 return 1; 54 if (l->cpu < r->cpu) 55 return -1; 56 57 if (l->id > r->id) 58 return 1; 59 if (l->id < r->id) 60 return -1; 61 62 return 0; 63 } 64 65 static int count_cmp(struct kwork_work *l, struct kwork_work *r) 66 { 67 if (l->nr_atoms > r->nr_atoms) 68 return 1; 69 if (l->nr_atoms < r->nr_atoms) 70 return -1; 71 72 return 0; 73 } 74 75 static int runtime_cmp(struct kwork_work *l, struct kwork_work *r) 76 { 77 if (l->total_runtime > r->total_runtime) 78 return 1; 79 if (l->total_runtime < r->total_runtime) 80 return -1; 81 82 return 0; 83 } 84 85 static int max_runtime_cmp(struct kwork_work *l, struct kwork_work *r) 86 { 87 if (l->max_runtime > r->max_runtime) 88 return 1; 89 if (l->max_runtime < r->max_runtime) 90 return -1; 91 92 return 0; 93 } 94 95 static int avg_latency_cmp(struct kwork_work *l, struct kwork_work *r) 96 { 97 u64 avgl, avgr; 98 99 if (!r->nr_atoms) 100 return 1; 101 if (!l->nr_atoms) 102 return -1; 103 104 avgl = l->total_latency / l->nr_atoms; 105 avgr = r->total_latency / r->nr_atoms; 106 107 if (avgl > avgr) 108 return 1; 109 if (avgl < avgr) 110 return -1; 111 112 return 0; 113 } 114 115 static int max_latency_cmp(struct kwork_work *l, struct kwork_work *r) 116 { 117 if (l->max_latency > r->max_latency) 118 return 1; 119 if (l->max_latency < r->max_latency) 120 return -1; 121 122 return 0; 123 } 124 125 static int sort_dimension__add(struct perf_kwork *kwork __maybe_unused, 126 const char *tok, struct list_head *list) 127 { 128 size_t i; 129 static struct sort_dimension max_sort_dimension = { 130 .name = "max", 131 .cmp = max_runtime_cmp, 132 }; 133 static struct sort_dimension id_sort_dimension = { 134 .name = "id", 135 .cmp = id_cmp, 136 }; 137 static struct sort_dimension runtime_sort_dimension = { 138 .name = "runtime", 139 .cmp = runtime_cmp, 140 }; 141 static struct sort_dimension count_sort_dimension = { 142 .name = "count", 143 .cmp = count_cmp, 144 }; 145 static struct sort_dimension avg_sort_dimension = { 146 .name = "avg", 147 .cmp = avg_latency_cmp, 148 }; 149 struct sort_dimension *available_sorts[] = { 150 &id_sort_dimension, 151 &max_sort_dimension, 152 &count_sort_dimension, 153 &runtime_sort_dimension, 154 &avg_sort_dimension, 155 }; 156 157 if (kwork->report == KWORK_REPORT_LATENCY) 158 max_sort_dimension.cmp = max_latency_cmp; 159 160 for (i = 0; i < ARRAY_SIZE(available_sorts); i++) { 161 if (!strcmp(available_sorts[i]->name, tok)) { 162 list_add_tail(&available_sorts[i]->list, list); 163 return 0; 164 } 165 } 166 167 return -1; 168 } 169 170 static void setup_sorting(struct perf_kwork *kwork, 171 const struct option *options, 172 const char * const usage_msg[]) 173 { 174 char *tmp, *tok, *str = strdup(kwork->sort_order); 175 176 for (tok = strtok_r(str, ", ", &tmp); 177 tok; tok = strtok_r(NULL, ", ", &tmp)) { 178 if (sort_dimension__add(kwork, tok, &kwork->sort_list) < 0) 179 usage_with_options_msg(usage_msg, options, 180 "Unknown --sort key: `%s'", tok); 181 } 182 183 pr_debug("Sort order: %s\n", kwork->sort_order); 184 free(str); 185 } 186 187 static struct kwork_atom *atom_new(struct perf_kwork *kwork, 188 struct perf_sample *sample) 189 { 190 unsigned long i; 191 struct kwork_atom_page *page; 192 struct kwork_atom *atom = NULL; 193 194 list_for_each_entry(page, &kwork->atom_page_list, list) { 195 if (!bitmap_full(page->bitmap, NR_ATOM_PER_PAGE)) { 196 i = find_first_zero_bit(page->bitmap, NR_ATOM_PER_PAGE); 197 BUG_ON(i >= NR_ATOM_PER_PAGE); 198 atom = &page->atoms[i]; 199 goto found_atom; 200 } 201 } 202 203 /* 204 * new page 205 */ 206 page = zalloc(sizeof(*page)); 207 if (page == NULL) { 208 pr_err("Failed to zalloc kwork atom page\n"); 209 return NULL; 210 } 211 212 i = 0; 213 atom = &page->atoms[0]; 214 list_add_tail(&page->list, &kwork->atom_page_list); 215 216 found_atom: 217 set_bit(i, page->bitmap); 218 atom->time = sample->time; 219 atom->prev = NULL; 220 atom->page_addr = page; 221 atom->bit_inpage = i; 222 return atom; 223 } 224 225 static void atom_free(struct kwork_atom *atom) 226 { 227 if (atom->prev != NULL) 228 atom_free(atom->prev); 229 230 clear_bit(atom->bit_inpage, 231 ((struct kwork_atom_page *)atom->page_addr)->bitmap); 232 } 233 234 static void atom_del(struct kwork_atom *atom) 235 { 236 list_del(&atom->list); 237 atom_free(atom); 238 } 239 240 static int work_cmp(struct list_head *list, 241 struct kwork_work *l, struct kwork_work *r) 242 { 243 int ret = 0; 244 struct sort_dimension *sort; 245 246 BUG_ON(list_empty(list)); 247 248 list_for_each_entry(sort, list, list) { 249 ret = sort->cmp(l, r); 250 if (ret) 251 return ret; 252 } 253 254 return ret; 255 } 256 257 static struct kwork_work *work_search(struct rb_root_cached *root, 258 struct kwork_work *key, 259 struct list_head *sort_list) 260 { 261 int cmp; 262 struct kwork_work *work; 263 struct rb_node *node = root->rb_root.rb_node; 264 265 while (node) { 266 work = container_of(node, struct kwork_work, node); 267 cmp = work_cmp(sort_list, key, work); 268 if (cmp > 0) 269 node = node->rb_left; 270 else if (cmp < 0) 271 node = node->rb_right; 272 else { 273 if (work->name == NULL) 274 work->name = key->name; 275 return work; 276 } 277 } 278 return NULL; 279 } 280 281 static void work_insert(struct rb_root_cached *root, 282 struct kwork_work *key, struct list_head *sort_list) 283 { 284 int cmp; 285 bool leftmost = true; 286 struct kwork_work *cur; 287 struct rb_node **new = &(root->rb_root.rb_node), *parent = NULL; 288 289 while (*new) { 290 cur = container_of(*new, struct kwork_work, node); 291 parent = *new; 292 cmp = work_cmp(sort_list, key, cur); 293 294 if (cmp > 0) 295 new = &((*new)->rb_left); 296 else { 297 new = &((*new)->rb_right); 298 leftmost = false; 299 } 300 } 301 302 rb_link_node(&key->node, parent, new); 303 rb_insert_color_cached(&key->node, root, leftmost); 304 } 305 306 static struct kwork_work *work_new(struct kwork_work *key) 307 { 308 int i; 309 struct kwork_work *work = zalloc(sizeof(*work)); 310 311 if (work == NULL) { 312 pr_err("Failed to zalloc kwork work\n"); 313 return NULL; 314 } 315 316 for (i = 0; i < KWORK_TRACE_MAX; i++) 317 INIT_LIST_HEAD(&work->atom_list[i]); 318 319 work->id = key->id; 320 work->cpu = key->cpu; 321 work->name = key->name; 322 work->class = key->class; 323 return work; 324 } 325 326 static struct kwork_work *work_findnew(struct rb_root_cached *root, 327 struct kwork_work *key, 328 struct list_head *sort_list) 329 { 330 struct kwork_work *work = NULL; 331 332 work = work_search(root, key, sort_list); 333 if (work != NULL) 334 return work; 335 336 work = work_new(key); 337 if (work == NULL) 338 return NULL; 339 340 work_insert(root, work, sort_list); 341 return work; 342 } 343 344 static void profile_update_timespan(struct perf_kwork *kwork, 345 struct perf_sample *sample) 346 { 347 if (!kwork->summary) 348 return; 349 350 if ((kwork->timestart == 0) || (kwork->timestart > sample->time)) 351 kwork->timestart = sample->time; 352 353 if (kwork->timeend < sample->time) 354 kwork->timeend = sample->time; 355 } 356 357 static bool profile_event_match(struct perf_kwork *kwork, 358 struct kwork_work *work, 359 struct perf_sample *sample) 360 { 361 int cpu = work->cpu; 362 u64 time = sample->time; 363 struct perf_time_interval *ptime = &kwork->ptime; 364 365 if ((kwork->cpu_list != NULL) && !test_bit(cpu, kwork->cpu_bitmap)) 366 return false; 367 368 if (((ptime->start != 0) && (ptime->start > time)) || 369 ((ptime->end != 0) && (ptime->end < time))) 370 return false; 371 372 if ((kwork->profile_name != NULL) && 373 (work->name != NULL) && 374 (strcmp(work->name, kwork->profile_name) != 0)) 375 return false; 376 377 profile_update_timespan(kwork, sample); 378 return true; 379 } 380 381 static int work_push_atom(struct perf_kwork *kwork, 382 struct kwork_class *class, 383 enum kwork_trace_type src_type, 384 enum kwork_trace_type dst_type, 385 struct evsel *evsel, 386 struct perf_sample *sample, 387 struct machine *machine, 388 struct kwork_work **ret_work) 389 { 390 struct kwork_atom *atom, *dst_atom; 391 struct kwork_work *work, key; 392 393 BUG_ON(class->work_init == NULL); 394 class->work_init(class, &key, evsel, sample, machine); 395 396 atom = atom_new(kwork, sample); 397 if (atom == NULL) 398 return -1; 399 400 work = work_findnew(&class->work_root, &key, &kwork->cmp_id); 401 if (work == NULL) { 402 free(atom); 403 return -1; 404 } 405 406 if (!profile_event_match(kwork, work, sample)) 407 return 0; 408 409 if (dst_type < KWORK_TRACE_MAX) { 410 dst_atom = list_last_entry_or_null(&work->atom_list[dst_type], 411 struct kwork_atom, list); 412 if (dst_atom != NULL) { 413 atom->prev = dst_atom; 414 list_del(&dst_atom->list); 415 } 416 } 417 418 if (ret_work != NULL) 419 *ret_work = work; 420 421 list_add_tail(&atom->list, &work->atom_list[src_type]); 422 423 return 0; 424 } 425 426 static struct kwork_atom *work_pop_atom(struct perf_kwork *kwork, 427 struct kwork_class *class, 428 enum kwork_trace_type src_type, 429 enum kwork_trace_type dst_type, 430 struct evsel *evsel, 431 struct perf_sample *sample, 432 struct machine *machine, 433 struct kwork_work **ret_work) 434 { 435 struct kwork_atom *atom, *src_atom; 436 struct kwork_work *work, key; 437 438 BUG_ON(class->work_init == NULL); 439 class->work_init(class, &key, evsel, sample, machine); 440 441 work = work_findnew(&class->work_root, &key, &kwork->cmp_id); 442 if (ret_work != NULL) 443 *ret_work = work; 444 445 if (work == NULL) 446 return NULL; 447 448 if (!profile_event_match(kwork, work, sample)) 449 return NULL; 450 451 atom = list_last_entry_or_null(&work->atom_list[dst_type], 452 struct kwork_atom, list); 453 if (atom != NULL) 454 return atom; 455 456 src_atom = atom_new(kwork, sample); 457 if (src_atom != NULL) 458 list_add_tail(&src_atom->list, &work->atom_list[src_type]); 459 else { 460 if (ret_work != NULL) 461 *ret_work = NULL; 462 } 463 464 return NULL; 465 } 466 467 static void report_update_exit_event(struct kwork_work *work, 468 struct kwork_atom *atom, 469 struct perf_sample *sample) 470 { 471 u64 delta; 472 u64 exit_time = sample->time; 473 u64 entry_time = atom->time; 474 475 if ((entry_time != 0) && (exit_time >= entry_time)) { 476 delta = exit_time - entry_time; 477 if ((delta > work->max_runtime) || 478 (work->max_runtime == 0)) { 479 work->max_runtime = delta; 480 work->max_runtime_start = entry_time; 481 work->max_runtime_end = exit_time; 482 } 483 work->total_runtime += delta; 484 work->nr_atoms++; 485 } 486 } 487 488 static int report_entry_event(struct perf_kwork *kwork, 489 struct kwork_class *class, 490 struct evsel *evsel, 491 struct perf_sample *sample, 492 struct machine *machine) 493 { 494 return work_push_atom(kwork, class, KWORK_TRACE_ENTRY, 495 KWORK_TRACE_MAX, evsel, sample, 496 machine, NULL); 497 } 498 499 static int report_exit_event(struct perf_kwork *kwork, 500 struct kwork_class *class, 501 struct evsel *evsel, 502 struct perf_sample *sample, 503 struct machine *machine) 504 { 505 struct kwork_atom *atom = NULL; 506 struct kwork_work *work = NULL; 507 508 atom = work_pop_atom(kwork, class, KWORK_TRACE_EXIT, 509 KWORK_TRACE_ENTRY, evsel, sample, 510 machine, &work); 511 if (work == NULL) 512 return -1; 513 514 if (atom != NULL) { 515 report_update_exit_event(work, atom, sample); 516 atom_del(atom); 517 } 518 519 return 0; 520 } 521 522 static void latency_update_entry_event(struct kwork_work *work, 523 struct kwork_atom *atom, 524 struct perf_sample *sample) 525 { 526 u64 delta; 527 u64 entry_time = sample->time; 528 u64 raise_time = atom->time; 529 530 if ((raise_time != 0) && (entry_time >= raise_time)) { 531 delta = entry_time - raise_time; 532 if ((delta > work->max_latency) || 533 (work->max_latency == 0)) { 534 work->max_latency = delta; 535 work->max_latency_start = raise_time; 536 work->max_latency_end = entry_time; 537 } 538 work->total_latency += delta; 539 work->nr_atoms++; 540 } 541 } 542 543 static int latency_raise_event(struct perf_kwork *kwork, 544 struct kwork_class *class, 545 struct evsel *evsel, 546 struct perf_sample *sample, 547 struct machine *machine) 548 { 549 return work_push_atom(kwork, class, KWORK_TRACE_RAISE, 550 KWORK_TRACE_MAX, evsel, sample, 551 machine, NULL); 552 } 553 554 static int latency_entry_event(struct perf_kwork *kwork, 555 struct kwork_class *class, 556 struct evsel *evsel, 557 struct perf_sample *sample, 558 struct machine *machine) 559 { 560 struct kwork_atom *atom = NULL; 561 struct kwork_work *work = NULL; 562 563 atom = work_pop_atom(kwork, class, KWORK_TRACE_ENTRY, 564 KWORK_TRACE_RAISE, evsel, sample, 565 machine, &work); 566 if (work == NULL) 567 return -1; 568 569 if (atom != NULL) { 570 latency_update_entry_event(work, atom, sample); 571 atom_del(atom); 572 } 573 574 return 0; 575 } 576 577 static struct kwork_class kwork_irq; 578 static int process_irq_handler_entry_event(struct perf_tool *tool, 579 struct evsel *evsel, 580 struct perf_sample *sample, 581 struct machine *machine) 582 { 583 struct perf_kwork *kwork = container_of(tool, struct perf_kwork, tool); 584 585 if (kwork->tp_handler->entry_event) 586 return kwork->tp_handler->entry_event(kwork, &kwork_irq, 587 evsel, sample, machine); 588 return 0; 589 } 590 591 static int process_irq_handler_exit_event(struct perf_tool *tool, 592 struct evsel *evsel, 593 struct perf_sample *sample, 594 struct machine *machine) 595 { 596 struct perf_kwork *kwork = container_of(tool, struct perf_kwork, tool); 597 598 if (kwork->tp_handler->exit_event) 599 return kwork->tp_handler->exit_event(kwork, &kwork_irq, 600 evsel, sample, machine); 601 return 0; 602 } 603 604 const struct evsel_str_handler irq_tp_handlers[] = { 605 { "irq:irq_handler_entry", process_irq_handler_entry_event, }, 606 { "irq:irq_handler_exit", process_irq_handler_exit_event, }, 607 }; 608 609 static int irq_class_init(struct kwork_class *class, 610 struct perf_session *session) 611 { 612 if (perf_session__set_tracepoints_handlers(session, irq_tp_handlers)) { 613 pr_err("Failed to set irq tracepoints handlers\n"); 614 return -1; 615 } 616 617 class->work_root = RB_ROOT_CACHED; 618 return 0; 619 } 620 621 static void irq_work_init(struct kwork_class *class, 622 struct kwork_work *work, 623 struct evsel *evsel, 624 struct perf_sample *sample, 625 struct machine *machine __maybe_unused) 626 { 627 work->class = class; 628 work->cpu = sample->cpu; 629 work->id = evsel__intval(evsel, sample, "irq"); 630 work->name = evsel__strval(evsel, sample, "name"); 631 } 632 633 static void irq_work_name(struct kwork_work *work, char *buf, int len) 634 { 635 snprintf(buf, len, "%s:%" PRIu64 "", work->name, work->id); 636 } 637 638 static struct kwork_class kwork_irq = { 639 .name = "irq", 640 .type = KWORK_CLASS_IRQ, 641 .nr_tracepoints = 2, 642 .tp_handlers = irq_tp_handlers, 643 .class_init = irq_class_init, 644 .work_init = irq_work_init, 645 .work_name = irq_work_name, 646 }; 647 648 static struct kwork_class kwork_softirq; 649 static int process_softirq_entry_event(struct perf_tool *tool, 650 struct evsel *evsel, 651 struct perf_sample *sample, 652 struct machine *machine) 653 { 654 struct perf_kwork *kwork = container_of(tool, struct perf_kwork, tool); 655 656 if (kwork->tp_handler->entry_event) 657 return kwork->tp_handler->entry_event(kwork, &kwork_softirq, 658 evsel, sample, machine); 659 660 return 0; 661 } 662 663 static int process_softirq_exit_event(struct perf_tool *tool, 664 struct evsel *evsel, 665 struct perf_sample *sample, 666 struct machine *machine) 667 { 668 struct perf_kwork *kwork = container_of(tool, struct perf_kwork, tool); 669 670 if (kwork->tp_handler->exit_event) 671 return kwork->tp_handler->exit_event(kwork, &kwork_softirq, 672 evsel, sample, machine); 673 674 return 0; 675 } 676 677 const struct evsel_str_handler softirq_tp_handlers[] = { 678 { "irq:softirq_raise", NULL, }, 679 { "irq:softirq_entry", process_softirq_entry_event, }, 680 { "irq:softirq_exit", process_softirq_exit_event, }, 681 }; 682 683 static int softirq_class_init(struct kwork_class *class, 684 struct perf_session *session) 685 { 686 if (perf_session__set_tracepoints_handlers(session, 687 softirq_tp_handlers)) { 688 pr_err("Failed to set softirq tracepoints handlers\n"); 689 return -1; 690 } 691 692 class->work_root = RB_ROOT_CACHED; 693 return 0; 694 } 695 696 static char *evsel__softirq_name(struct evsel *evsel, u64 num) 697 { 698 char *name = NULL; 699 bool found = false; 700 struct tep_print_flag_sym *sym = NULL; 701 struct tep_print_arg *args = evsel->tp_format->print_fmt.args; 702 703 if ((args == NULL) || (args->next == NULL)) 704 return NULL; 705 706 /* skip softirq field: "REC->vec" */ 707 for (sym = args->next->symbol.symbols; sym != NULL; sym = sym->next) { 708 if ((eval_flag(sym->value) == (unsigned long long)num) && 709 (strlen(sym->str) != 0)) { 710 found = true; 711 break; 712 } 713 } 714 715 if (!found) 716 return NULL; 717 718 name = strdup(sym->str); 719 if (name == NULL) { 720 pr_err("Failed to copy symbol name\n"); 721 return NULL; 722 } 723 return name; 724 } 725 726 static void softirq_work_init(struct kwork_class *class, 727 struct kwork_work *work, 728 struct evsel *evsel, 729 struct perf_sample *sample, 730 struct machine *machine __maybe_unused) 731 { 732 u64 num = evsel__intval(evsel, sample, "vec"); 733 734 work->id = num; 735 work->class = class; 736 work->cpu = sample->cpu; 737 work->name = evsel__softirq_name(evsel, num); 738 } 739 740 static void softirq_work_name(struct kwork_work *work, char *buf, int len) 741 { 742 snprintf(buf, len, "(s)%s:%" PRIu64 "", work->name, work->id); 743 } 744 745 static struct kwork_class kwork_softirq = { 746 .name = "softirq", 747 .type = KWORK_CLASS_SOFTIRQ, 748 .nr_tracepoints = 3, 749 .tp_handlers = softirq_tp_handlers, 750 .class_init = softirq_class_init, 751 .work_init = softirq_work_init, 752 .work_name = softirq_work_name, 753 }; 754 755 static struct kwork_class kwork_workqueue; 756 static int process_workqueue_execute_start_event(struct perf_tool *tool, 757 struct evsel *evsel, 758 struct perf_sample *sample, 759 struct machine *machine) 760 { 761 struct perf_kwork *kwork = container_of(tool, struct perf_kwork, tool); 762 763 if (kwork->tp_handler->entry_event) 764 return kwork->tp_handler->entry_event(kwork, &kwork_workqueue, 765 evsel, sample, machine); 766 767 return 0; 768 } 769 770 static int process_workqueue_execute_end_event(struct perf_tool *tool, 771 struct evsel *evsel, 772 struct perf_sample *sample, 773 struct machine *machine) 774 { 775 struct perf_kwork *kwork = container_of(tool, struct perf_kwork, tool); 776 777 if (kwork->tp_handler->exit_event) 778 return kwork->tp_handler->exit_event(kwork, &kwork_workqueue, 779 evsel, sample, machine); 780 781 return 0; 782 } 783 784 const struct evsel_str_handler workqueue_tp_handlers[] = { 785 { "workqueue:workqueue_activate_work", NULL, }, 786 { "workqueue:workqueue_execute_start", process_workqueue_execute_start_event, }, 787 { "workqueue:workqueue_execute_end", process_workqueue_execute_end_event, }, 788 }; 789 790 static int workqueue_class_init(struct kwork_class *class, 791 struct perf_session *session) 792 { 793 if (perf_session__set_tracepoints_handlers(session, 794 workqueue_tp_handlers)) { 795 pr_err("Failed to set workqueue tracepoints handlers\n"); 796 return -1; 797 } 798 799 class->work_root = RB_ROOT_CACHED; 800 return 0; 801 } 802 803 static void workqueue_work_init(struct kwork_class *class, 804 struct kwork_work *work, 805 struct evsel *evsel, 806 struct perf_sample *sample, 807 struct machine *machine) 808 { 809 char *modp = NULL; 810 unsigned long long function_addr = evsel__intval(evsel, 811 sample, "function"); 812 813 work->class = class; 814 work->cpu = sample->cpu; 815 work->id = evsel__intval(evsel, sample, "work"); 816 work->name = function_addr == 0 ? NULL : 817 machine__resolve_kernel_addr(machine, &function_addr, &modp); 818 } 819 820 static void workqueue_work_name(struct kwork_work *work, char *buf, int len) 821 { 822 if (work->name != NULL) 823 snprintf(buf, len, "(w)%s", work->name); 824 else 825 snprintf(buf, len, "(w)0x%" PRIx64, work->id); 826 } 827 828 static struct kwork_class kwork_workqueue = { 829 .name = "workqueue", 830 .type = KWORK_CLASS_WORKQUEUE, 831 .nr_tracepoints = 3, 832 .tp_handlers = workqueue_tp_handlers, 833 .class_init = workqueue_class_init, 834 .work_init = workqueue_work_init, 835 .work_name = workqueue_work_name, 836 }; 837 838 static struct kwork_class *kwork_class_supported_list[KWORK_CLASS_MAX] = { 839 [KWORK_CLASS_IRQ] = &kwork_irq, 840 [KWORK_CLASS_SOFTIRQ] = &kwork_softirq, 841 [KWORK_CLASS_WORKQUEUE] = &kwork_workqueue, 842 }; 843 844 static void print_separator(int len) 845 { 846 printf(" %.*s\n", len, graph_dotted_line); 847 } 848 849 static int report_print_work(struct perf_kwork *kwork, struct kwork_work *work) 850 { 851 int ret = 0; 852 char kwork_name[PRINT_KWORK_NAME_WIDTH]; 853 char max_runtime_start[32], max_runtime_end[32]; 854 char max_latency_start[32], max_latency_end[32]; 855 856 printf(" "); 857 858 /* 859 * kwork name 860 */ 861 if (work->class && work->class->work_name) { 862 work->class->work_name(work, kwork_name, 863 PRINT_KWORK_NAME_WIDTH); 864 ret += printf(" %-*s |", PRINT_KWORK_NAME_WIDTH, kwork_name); 865 } else { 866 ret += printf(" %-*s |", PRINT_KWORK_NAME_WIDTH, ""); 867 } 868 869 /* 870 * cpu 871 */ 872 ret += printf(" %0*d |", PRINT_CPU_WIDTH, work->cpu); 873 874 /* 875 * total runtime 876 */ 877 if (kwork->report == KWORK_REPORT_RUNTIME) { 878 ret += printf(" %*.*f ms |", 879 PRINT_RUNTIME_WIDTH, RPINT_DECIMAL_WIDTH, 880 (double)work->total_runtime / NSEC_PER_MSEC); 881 } else if (kwork->report == KWORK_REPORT_LATENCY) { // avg delay 882 ret += printf(" %*.*f ms |", 883 PRINT_LATENCY_WIDTH, RPINT_DECIMAL_WIDTH, 884 (double)work->total_latency / 885 work->nr_atoms / NSEC_PER_MSEC); 886 } 887 888 /* 889 * count 890 */ 891 ret += printf(" %*" PRIu64 " |", PRINT_COUNT_WIDTH, work->nr_atoms); 892 893 /* 894 * max runtime, max runtime start, max runtime end 895 */ 896 if (kwork->report == KWORK_REPORT_RUNTIME) { 897 timestamp__scnprintf_usec(work->max_runtime_start, 898 max_runtime_start, 899 sizeof(max_runtime_start)); 900 timestamp__scnprintf_usec(work->max_runtime_end, 901 max_runtime_end, 902 sizeof(max_runtime_end)); 903 ret += printf(" %*.*f ms | %*s s | %*s s |", 904 PRINT_RUNTIME_WIDTH, RPINT_DECIMAL_WIDTH, 905 (double)work->max_runtime / NSEC_PER_MSEC, 906 PRINT_TIMESTAMP_WIDTH, max_runtime_start, 907 PRINT_TIMESTAMP_WIDTH, max_runtime_end); 908 } 909 /* 910 * max delay, max delay start, max delay end 911 */ 912 else if (kwork->report == KWORK_REPORT_LATENCY) { 913 timestamp__scnprintf_usec(work->max_latency_start, 914 max_latency_start, 915 sizeof(max_latency_start)); 916 timestamp__scnprintf_usec(work->max_latency_end, 917 max_latency_end, 918 sizeof(max_latency_end)); 919 ret += printf(" %*.*f ms | %*s s | %*s s |", 920 PRINT_LATENCY_WIDTH, RPINT_DECIMAL_WIDTH, 921 (double)work->max_latency / NSEC_PER_MSEC, 922 PRINT_TIMESTAMP_WIDTH, max_latency_start, 923 PRINT_TIMESTAMP_WIDTH, max_latency_end); 924 } 925 926 printf("\n"); 927 return ret; 928 } 929 930 static int report_print_header(struct perf_kwork *kwork) 931 { 932 int ret; 933 934 printf("\n "); 935 ret = printf(" %-*s | %-*s |", 936 PRINT_KWORK_NAME_WIDTH, "Kwork Name", 937 PRINT_CPU_WIDTH, "Cpu"); 938 939 if (kwork->report == KWORK_REPORT_RUNTIME) { 940 ret += printf(" %-*s |", 941 PRINT_RUNTIME_HEADER_WIDTH, "Total Runtime"); 942 } else if (kwork->report == KWORK_REPORT_LATENCY) { 943 ret += printf(" %-*s |", 944 PRINT_LATENCY_HEADER_WIDTH, "Avg delay"); 945 } 946 947 ret += printf(" %-*s |", PRINT_COUNT_WIDTH, "Count"); 948 949 if (kwork->report == KWORK_REPORT_RUNTIME) { 950 ret += printf(" %-*s | %-*s | %-*s |", 951 PRINT_RUNTIME_HEADER_WIDTH, "Max runtime", 952 PRINT_TIMESTAMP_HEADER_WIDTH, "Max runtime start", 953 PRINT_TIMESTAMP_HEADER_WIDTH, "Max runtime end"); 954 } else if (kwork->report == KWORK_REPORT_LATENCY) { 955 ret += printf(" %-*s | %-*s | %-*s |", 956 PRINT_LATENCY_HEADER_WIDTH, "Max delay", 957 PRINT_TIMESTAMP_HEADER_WIDTH, "Max delay start", 958 PRINT_TIMESTAMP_HEADER_WIDTH, "Max delay end"); 959 } 960 961 printf("\n"); 962 print_separator(ret); 963 return ret; 964 } 965 966 static void print_summary(struct perf_kwork *kwork) 967 { 968 u64 time = kwork->timeend - kwork->timestart; 969 970 printf(" Total count : %9" PRIu64 "\n", kwork->all_count); 971 printf(" Total runtime (msec) : %9.3f (%.3f%% load average)\n", 972 (double)kwork->all_runtime / NSEC_PER_MSEC, 973 time == 0 ? 0 : (double)kwork->all_runtime / time); 974 printf(" Total time span (msec) : %9.3f\n", 975 (double)time / NSEC_PER_MSEC); 976 } 977 978 static unsigned long long nr_list_entry(struct list_head *head) 979 { 980 struct list_head *pos; 981 unsigned long long n = 0; 982 983 list_for_each(pos, head) 984 n++; 985 986 return n; 987 } 988 989 static void print_skipped_events(struct perf_kwork *kwork) 990 { 991 int i; 992 const char *const kwork_event_str[] = { 993 [KWORK_TRACE_RAISE] = "raise", 994 [KWORK_TRACE_ENTRY] = "entry", 995 [KWORK_TRACE_EXIT] = "exit", 996 }; 997 998 if ((kwork->nr_skipped_events[KWORK_TRACE_MAX] != 0) && 999 (kwork->nr_events != 0)) { 1000 printf(" INFO: %.3f%% skipped events (%" PRIu64 " including ", 1001 (double)kwork->nr_skipped_events[KWORK_TRACE_MAX] / 1002 (double)kwork->nr_events * 100.0, 1003 kwork->nr_skipped_events[KWORK_TRACE_MAX]); 1004 1005 for (i = 0; i < KWORK_TRACE_MAX; i++) { 1006 printf("%" PRIu64 " %s%s", 1007 kwork->nr_skipped_events[i], 1008 kwork_event_str[i], 1009 (i == KWORK_TRACE_MAX - 1) ? ")\n" : ", "); 1010 } 1011 } 1012 1013 if (verbose > 0) 1014 printf(" INFO: use %lld atom pages\n", 1015 nr_list_entry(&kwork->atom_page_list)); 1016 } 1017 1018 static void print_bad_events(struct perf_kwork *kwork) 1019 { 1020 if ((kwork->nr_lost_events != 0) && (kwork->nr_events != 0)) { 1021 printf(" INFO: %.3f%% lost events (%ld out of %ld, in %ld chunks)\n", 1022 (double)kwork->nr_lost_events / 1023 (double)kwork->nr_events * 100.0, 1024 kwork->nr_lost_events, kwork->nr_events, 1025 kwork->nr_lost_chunks); 1026 } 1027 } 1028 1029 static void work_sort(struct perf_kwork *kwork, struct kwork_class *class) 1030 { 1031 struct rb_node *node; 1032 struct kwork_work *data; 1033 struct rb_root_cached *root = &class->work_root; 1034 1035 pr_debug("Sorting %s ...\n", class->name); 1036 for (;;) { 1037 node = rb_first_cached(root); 1038 if (!node) 1039 break; 1040 1041 rb_erase_cached(node, root); 1042 data = rb_entry(node, struct kwork_work, node); 1043 work_insert(&kwork->sorted_work_root, 1044 data, &kwork->sort_list); 1045 } 1046 } 1047 1048 static void perf_kwork__sort(struct perf_kwork *kwork) 1049 { 1050 struct kwork_class *class; 1051 1052 list_for_each_entry(class, &kwork->class_list, list) 1053 work_sort(kwork, class); 1054 } 1055 1056 static int perf_kwork__check_config(struct perf_kwork *kwork, 1057 struct perf_session *session) 1058 { 1059 int ret; 1060 struct kwork_class *class; 1061 1062 static struct trace_kwork_handler report_ops = { 1063 .entry_event = report_entry_event, 1064 .exit_event = report_exit_event, 1065 }; 1066 static struct trace_kwork_handler latency_ops = { 1067 .raise_event = latency_raise_event, 1068 .entry_event = latency_entry_event, 1069 }; 1070 1071 switch (kwork->report) { 1072 case KWORK_REPORT_RUNTIME: 1073 kwork->tp_handler = &report_ops; 1074 break; 1075 case KWORK_REPORT_LATENCY: 1076 kwork->tp_handler = &latency_ops; 1077 break; 1078 default: 1079 pr_debug("Invalid report type %d\n", kwork->report); 1080 return -1; 1081 } 1082 1083 list_for_each_entry(class, &kwork->class_list, list) 1084 if ((class->class_init != NULL) && 1085 (class->class_init(class, session) != 0)) 1086 return -1; 1087 1088 if (kwork->cpu_list != NULL) { 1089 ret = perf_session__cpu_bitmap(session, 1090 kwork->cpu_list, 1091 kwork->cpu_bitmap); 1092 if (ret < 0) { 1093 pr_err("Invalid cpu bitmap\n"); 1094 return -1; 1095 } 1096 } 1097 1098 if (kwork->time_str != NULL) { 1099 ret = perf_time__parse_str(&kwork->ptime, kwork->time_str); 1100 if (ret != 0) { 1101 pr_err("Invalid time span\n"); 1102 return -1; 1103 } 1104 } 1105 1106 return 0; 1107 } 1108 1109 static int perf_kwork__read_events(struct perf_kwork *kwork) 1110 { 1111 int ret = -1; 1112 struct perf_session *session = NULL; 1113 1114 struct perf_data data = { 1115 .path = input_name, 1116 .mode = PERF_DATA_MODE_READ, 1117 .force = kwork->force, 1118 }; 1119 1120 session = perf_session__new(&data, &kwork->tool); 1121 if (IS_ERR(session)) { 1122 pr_debug("Error creating perf session\n"); 1123 return PTR_ERR(session); 1124 } 1125 1126 symbol__init(&session->header.env); 1127 1128 if (perf_kwork__check_config(kwork, session) != 0) 1129 goto out_delete; 1130 1131 if (session->tevent.pevent && 1132 tep_set_function_resolver(session->tevent.pevent, 1133 machine__resolve_kernel_addr, 1134 &session->machines.host) < 0) { 1135 pr_err("Failed to set libtraceevent function resolver\n"); 1136 goto out_delete; 1137 } 1138 1139 ret = perf_session__process_events(session); 1140 if (ret) { 1141 pr_debug("Failed to process events, error %d\n", ret); 1142 goto out_delete; 1143 } 1144 1145 kwork->nr_events = session->evlist->stats.nr_events[0]; 1146 kwork->nr_lost_events = session->evlist->stats.total_lost; 1147 kwork->nr_lost_chunks = session->evlist->stats.nr_events[PERF_RECORD_LOST]; 1148 1149 out_delete: 1150 perf_session__delete(session); 1151 return ret; 1152 } 1153 1154 static void process_skipped_events(struct perf_kwork *kwork, 1155 struct kwork_work *work) 1156 { 1157 int i; 1158 unsigned long long count; 1159 1160 for (i = 0; i < KWORK_TRACE_MAX; i++) { 1161 count = nr_list_entry(&work->atom_list[i]); 1162 kwork->nr_skipped_events[i] += count; 1163 kwork->nr_skipped_events[KWORK_TRACE_MAX] += count; 1164 } 1165 } 1166 1167 static int perf_kwork__report(struct perf_kwork *kwork) 1168 { 1169 int ret; 1170 struct rb_node *next; 1171 struct kwork_work *work; 1172 1173 ret = perf_kwork__read_events(kwork); 1174 if (ret != 0) 1175 return -1; 1176 1177 perf_kwork__sort(kwork); 1178 1179 setup_pager(); 1180 1181 ret = report_print_header(kwork); 1182 next = rb_first_cached(&kwork->sorted_work_root); 1183 while (next) { 1184 work = rb_entry(next, struct kwork_work, node); 1185 process_skipped_events(kwork, work); 1186 1187 if (work->nr_atoms != 0) { 1188 report_print_work(kwork, work); 1189 if (kwork->summary) { 1190 kwork->all_runtime += work->total_runtime; 1191 kwork->all_count += work->nr_atoms; 1192 } 1193 } 1194 next = rb_next(next); 1195 } 1196 print_separator(ret); 1197 1198 if (kwork->summary) { 1199 print_summary(kwork); 1200 print_separator(ret); 1201 } 1202 1203 print_bad_events(kwork); 1204 print_skipped_events(kwork); 1205 printf("\n"); 1206 1207 return 0; 1208 } 1209 1210 typedef int (*tracepoint_handler)(struct perf_tool *tool, 1211 struct evsel *evsel, 1212 struct perf_sample *sample, 1213 struct machine *machine); 1214 1215 static int perf_kwork__process_tracepoint_sample(struct perf_tool *tool, 1216 union perf_event *event __maybe_unused, 1217 struct perf_sample *sample, 1218 struct evsel *evsel, 1219 struct machine *machine) 1220 { 1221 int err = 0; 1222 1223 if (evsel->handler != NULL) { 1224 tracepoint_handler f = evsel->handler; 1225 1226 err = f(tool, evsel, sample, machine); 1227 } 1228 1229 return err; 1230 } 1231 1232 static void setup_event_list(struct perf_kwork *kwork, 1233 const struct option *options, 1234 const char * const usage_msg[]) 1235 { 1236 int i; 1237 struct kwork_class *class; 1238 char *tmp, *tok, *str; 1239 1240 if (kwork->event_list_str == NULL) 1241 goto null_event_list_str; 1242 1243 str = strdup(kwork->event_list_str); 1244 for (tok = strtok_r(str, ", ", &tmp); 1245 tok; tok = strtok_r(NULL, ", ", &tmp)) { 1246 for (i = 0; i < KWORK_CLASS_MAX; i++) { 1247 class = kwork_class_supported_list[i]; 1248 if (strcmp(tok, class->name) == 0) { 1249 list_add_tail(&class->list, &kwork->class_list); 1250 break; 1251 } 1252 } 1253 if (i == KWORK_CLASS_MAX) { 1254 usage_with_options_msg(usage_msg, options, 1255 "Unknown --event key: `%s'", tok); 1256 } 1257 } 1258 free(str); 1259 1260 null_event_list_str: 1261 /* 1262 * config all kwork events if not specified 1263 */ 1264 if (list_empty(&kwork->class_list)) { 1265 for (i = 0; i < KWORK_CLASS_MAX; i++) { 1266 list_add_tail(&kwork_class_supported_list[i]->list, 1267 &kwork->class_list); 1268 } 1269 } 1270 1271 pr_debug("Config event list:"); 1272 list_for_each_entry(class, &kwork->class_list, list) 1273 pr_debug(" %s", class->name); 1274 pr_debug("\n"); 1275 } 1276 1277 static int perf_kwork__record(struct perf_kwork *kwork, 1278 int argc, const char **argv) 1279 { 1280 const char **rec_argv; 1281 unsigned int rec_argc, i, j; 1282 struct kwork_class *class; 1283 1284 const char *const record_args[] = { 1285 "record", 1286 "-a", 1287 "-R", 1288 "-m", "1024", 1289 "-c", "1", 1290 }; 1291 1292 rec_argc = ARRAY_SIZE(record_args) + argc - 1; 1293 1294 list_for_each_entry(class, &kwork->class_list, list) 1295 rec_argc += 2 * class->nr_tracepoints; 1296 1297 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 1298 if (rec_argv == NULL) 1299 return -ENOMEM; 1300 1301 for (i = 0; i < ARRAY_SIZE(record_args); i++) 1302 rec_argv[i] = strdup(record_args[i]); 1303 1304 list_for_each_entry(class, &kwork->class_list, list) { 1305 for (j = 0; j < class->nr_tracepoints; j++) { 1306 rec_argv[i++] = strdup("-e"); 1307 rec_argv[i++] = strdup(class->tp_handlers[j].name); 1308 } 1309 } 1310 1311 for (j = 1; j < (unsigned int)argc; j++, i++) 1312 rec_argv[i] = argv[j]; 1313 1314 BUG_ON(i != rec_argc); 1315 1316 pr_debug("record comm: "); 1317 for (j = 0; j < rec_argc; j++) 1318 pr_debug("%s ", rec_argv[j]); 1319 pr_debug("\n"); 1320 1321 return cmd_record(i, rec_argv); 1322 } 1323 1324 int cmd_kwork(int argc, const char **argv) 1325 { 1326 static struct perf_kwork kwork = { 1327 .class_list = LIST_HEAD_INIT(kwork.class_list), 1328 .tool = { 1329 .mmap = perf_event__process_mmap, 1330 .mmap2 = perf_event__process_mmap2, 1331 .sample = perf_kwork__process_tracepoint_sample, 1332 }, 1333 .atom_page_list = LIST_HEAD_INIT(kwork.atom_page_list), 1334 .sort_list = LIST_HEAD_INIT(kwork.sort_list), 1335 .cmp_id = LIST_HEAD_INIT(kwork.cmp_id), 1336 .sorted_work_root = RB_ROOT_CACHED, 1337 .tp_handler = NULL, 1338 .profile_name = NULL, 1339 .cpu_list = NULL, 1340 .time_str = NULL, 1341 .force = false, 1342 .event_list_str = NULL, 1343 .summary = false, 1344 .sort_order = NULL, 1345 .timestart = 0, 1346 .timeend = 0, 1347 .nr_events = 0, 1348 .nr_lost_chunks = 0, 1349 .nr_lost_events = 0, 1350 .all_runtime = 0, 1351 .all_count = 0, 1352 .nr_skipped_events = { 0 }, 1353 }; 1354 static const char default_report_sort_order[] = "runtime, max, count"; 1355 static const char default_latency_sort_order[] = "avg, max, count"; 1356 const struct option kwork_options[] = { 1357 OPT_INCR('v', "verbose", &verbose, 1358 "be more verbose (show symbol address, etc)"), 1359 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 1360 "dump raw trace in ASCII"), 1361 OPT_STRING('k', "kwork", &kwork.event_list_str, "kwork", 1362 "list of kwork to profile (irq, softirq, workqueue, etc)"), 1363 OPT_BOOLEAN('f', "force", &kwork.force, "don't complain, do it"), 1364 OPT_END() 1365 }; 1366 const struct option report_options[] = { 1367 OPT_STRING('s', "sort", &kwork.sort_order, "key[,key2...]", 1368 "sort by key(s): runtime, max, count"), 1369 OPT_STRING('C', "cpu", &kwork.cpu_list, "cpu", 1370 "list of cpus to profile"), 1371 OPT_STRING('n', "name", &kwork.profile_name, "name", 1372 "event name to profile"), 1373 OPT_STRING(0, "time", &kwork.time_str, "str", 1374 "Time span for analysis (start,stop)"), 1375 OPT_STRING('i', "input", &input_name, "file", 1376 "input file name"), 1377 OPT_BOOLEAN('S', "with-summary", &kwork.summary, 1378 "Show summary with statistics"), 1379 OPT_PARENT(kwork_options) 1380 }; 1381 const struct option latency_options[] = { 1382 OPT_STRING('s', "sort", &kwork.sort_order, "key[,key2...]", 1383 "sort by key(s): avg, max, count"), 1384 OPT_STRING('C', "cpu", &kwork.cpu_list, "cpu", 1385 "list of cpus to profile"), 1386 OPT_STRING('n', "name", &kwork.profile_name, "name", 1387 "event name to profile"), 1388 OPT_STRING(0, "time", &kwork.time_str, "str", 1389 "Time span for analysis (start,stop)"), 1390 OPT_STRING('i', "input", &input_name, "file", 1391 "input file name"), 1392 OPT_PARENT(kwork_options) 1393 }; 1394 const char *kwork_usage[] = { 1395 NULL, 1396 NULL 1397 }; 1398 const char * const report_usage[] = { 1399 "perf kwork report [<options>]", 1400 NULL 1401 }; 1402 const char * const latency_usage[] = { 1403 "perf kwork latency [<options>]", 1404 NULL 1405 }; 1406 const char *const kwork_subcommands[] = { 1407 "record", "report", "latency", NULL 1408 }; 1409 1410 argc = parse_options_subcommand(argc, argv, kwork_options, 1411 kwork_subcommands, kwork_usage, 1412 PARSE_OPT_STOP_AT_NON_OPTION); 1413 if (!argc) 1414 usage_with_options(kwork_usage, kwork_options); 1415 1416 setup_event_list(&kwork, kwork_options, kwork_usage); 1417 sort_dimension__add(&kwork, "id", &kwork.cmp_id); 1418 1419 if (strlen(argv[0]) > 2 && strstarts("record", argv[0])) 1420 return perf_kwork__record(&kwork, argc, argv); 1421 else if (strlen(argv[0]) > 2 && strstarts("report", argv[0])) { 1422 kwork.sort_order = default_report_sort_order; 1423 if (argc > 1) { 1424 argc = parse_options(argc, argv, report_options, report_usage, 0); 1425 if (argc) 1426 usage_with_options(report_usage, report_options); 1427 } 1428 kwork.report = KWORK_REPORT_RUNTIME; 1429 setup_sorting(&kwork, report_options, report_usage); 1430 return perf_kwork__report(&kwork); 1431 } else if (strlen(argv[0]) > 2 && strstarts("latency", argv[0])) { 1432 kwork.sort_order = default_latency_sort_order; 1433 if (argc > 1) { 1434 argc = parse_options(argc, argv, latency_options, latency_usage, 0); 1435 if (argc) 1436 usage_with_options(latency_usage, latency_options); 1437 } 1438 kwork.report = KWORK_REPORT_LATENCY; 1439 setup_sorting(&kwork, latency_options, latency_usage); 1440 return perf_kwork__report(&kwork); 1441 } else 1442 usage_with_options(kwork_usage, kwork_options); 1443 1444 return 0; 1445 } 1446