1 #include "builtin.h" 2 #include "perf.h" 3 4 #include "util/evlist.h" 5 #include "util/evsel.h" 6 #include "util/util.h" 7 #include "util/cache.h" 8 #include "util/symbol.h" 9 #include "util/thread.h" 10 #include "util/header.h" 11 #include "util/session.h" 12 #include "util/tool.h" 13 14 #include "util/parse-options.h" 15 #include "util/trace-event.h" 16 17 #include "util/debug.h" 18 19 #include <linux/rbtree.h> 20 21 struct alloc_stat; 22 typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *); 23 24 static int alloc_flag; 25 static int caller_flag; 26 27 static int alloc_lines = -1; 28 static int caller_lines = -1; 29 30 static bool raw_ip; 31 32 static int *cpunode_map; 33 static int max_cpu_num; 34 35 struct alloc_stat { 36 u64 call_site; 37 u64 ptr; 38 u64 bytes_req; 39 u64 bytes_alloc; 40 u32 hit; 41 u32 pingpong; 42 43 short alloc_cpu; 44 45 struct rb_node node; 46 }; 47 48 static struct rb_root root_alloc_stat; 49 static struct rb_root root_alloc_sorted; 50 static struct rb_root root_caller_stat; 51 static struct rb_root root_caller_sorted; 52 53 static unsigned long total_requested, total_allocated; 54 static unsigned long nr_allocs, nr_cross_allocs; 55 56 #define PATH_SYS_NODE "/sys/devices/system/node" 57 58 static int init_cpunode_map(void) 59 { 60 FILE *fp; 61 int i, err = -1; 62 63 fp = fopen("/sys/devices/system/cpu/kernel_max", "r"); 64 if (!fp) { 65 max_cpu_num = 4096; 66 return 0; 67 } 68 69 if (fscanf(fp, "%d", &max_cpu_num) < 1) { 70 pr_err("Failed to read 'kernel_max' from sysfs"); 71 goto out_close; 72 } 73 74 max_cpu_num++; 75 76 cpunode_map = calloc(max_cpu_num, sizeof(int)); 77 if (!cpunode_map) { 78 pr_err("%s: calloc failed\n", __func__); 79 goto out_close; 80 } 81 82 for (i = 0; i < max_cpu_num; i++) 83 cpunode_map[i] = -1; 84 85 err = 0; 86 out_close: 87 fclose(fp); 88 return err; 89 } 90 91 static int setup_cpunode_map(void) 92 { 93 struct dirent *dent1, *dent2; 94 DIR *dir1, *dir2; 95 unsigned int cpu, mem; 96 char buf[PATH_MAX]; 97 98 if (init_cpunode_map()) 99 return -1; 100 101 dir1 = opendir(PATH_SYS_NODE); 102 if (!dir1) 103 return -1; 104 105 while ((dent1 = readdir(dir1)) != NULL) { 106 if (dent1->d_type != DT_DIR || 107 sscanf(dent1->d_name, "node%u", &mem) < 1) 108 continue; 109 110 snprintf(buf, PATH_MAX, "%s/%s", PATH_SYS_NODE, dent1->d_name); 111 dir2 = opendir(buf); 112 if (!dir2) 113 continue; 114 while ((dent2 = readdir(dir2)) != NULL) { 115 if (dent2->d_type != DT_LNK || 116 sscanf(dent2->d_name, "cpu%u", &cpu) < 1) 117 continue; 118 cpunode_map[cpu] = mem; 119 } 120 closedir(dir2); 121 } 122 closedir(dir1); 123 return 0; 124 } 125 126 static int insert_alloc_stat(unsigned long call_site, unsigned long ptr, 127 int bytes_req, int bytes_alloc, int cpu) 128 { 129 struct rb_node **node = &root_alloc_stat.rb_node; 130 struct rb_node *parent = NULL; 131 struct alloc_stat *data = NULL; 132 133 while (*node) { 134 parent = *node; 135 data = rb_entry(*node, struct alloc_stat, node); 136 137 if (ptr > data->ptr) 138 node = &(*node)->rb_right; 139 else if (ptr < data->ptr) 140 node = &(*node)->rb_left; 141 else 142 break; 143 } 144 145 if (data && data->ptr == ptr) { 146 data->hit++; 147 data->bytes_req += bytes_req; 148 data->bytes_alloc += bytes_alloc; 149 } else { 150 data = malloc(sizeof(*data)); 151 if (!data) { 152 pr_err("%s: malloc failed\n", __func__); 153 return -1; 154 } 155 data->ptr = ptr; 156 data->pingpong = 0; 157 data->hit = 1; 158 data->bytes_req = bytes_req; 159 data->bytes_alloc = bytes_alloc; 160 161 rb_link_node(&data->node, parent, node); 162 rb_insert_color(&data->node, &root_alloc_stat); 163 } 164 data->call_site = call_site; 165 data->alloc_cpu = cpu; 166 return 0; 167 } 168 169 static int insert_caller_stat(unsigned long call_site, 170 int bytes_req, int bytes_alloc) 171 { 172 struct rb_node **node = &root_caller_stat.rb_node; 173 struct rb_node *parent = NULL; 174 struct alloc_stat *data = NULL; 175 176 while (*node) { 177 parent = *node; 178 data = rb_entry(*node, struct alloc_stat, node); 179 180 if (call_site > data->call_site) 181 node = &(*node)->rb_right; 182 else if (call_site < data->call_site) 183 node = &(*node)->rb_left; 184 else 185 break; 186 } 187 188 if (data && data->call_site == call_site) { 189 data->hit++; 190 data->bytes_req += bytes_req; 191 data->bytes_alloc += bytes_alloc; 192 } else { 193 data = malloc(sizeof(*data)); 194 if (!data) { 195 pr_err("%s: malloc failed\n", __func__); 196 return -1; 197 } 198 data->call_site = call_site; 199 data->pingpong = 0; 200 data->hit = 1; 201 data->bytes_req = bytes_req; 202 data->bytes_alloc = bytes_alloc; 203 204 rb_link_node(&data->node, parent, node); 205 rb_insert_color(&data->node, &root_caller_stat); 206 } 207 208 return 0; 209 } 210 211 static int perf_evsel__process_alloc_event(struct perf_evsel *evsel, 212 struct perf_sample *sample) 213 { 214 unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr"), 215 call_site = perf_evsel__intval(evsel, sample, "call_site"); 216 int bytes_req = perf_evsel__intval(evsel, sample, "bytes_req"), 217 bytes_alloc = perf_evsel__intval(evsel, sample, "bytes_alloc"); 218 219 if (insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, sample->cpu) || 220 insert_caller_stat(call_site, bytes_req, bytes_alloc)) 221 return -1; 222 223 total_requested += bytes_req; 224 total_allocated += bytes_alloc; 225 226 nr_allocs++; 227 return 0; 228 } 229 230 static int perf_evsel__process_alloc_node_event(struct perf_evsel *evsel, 231 struct perf_sample *sample) 232 { 233 int ret = perf_evsel__process_alloc_event(evsel, sample); 234 235 if (!ret) { 236 int node1 = cpunode_map[sample->cpu], 237 node2 = perf_evsel__intval(evsel, sample, "node"); 238 239 if (node1 != node2) 240 nr_cross_allocs++; 241 } 242 243 return ret; 244 } 245 246 static int ptr_cmp(struct alloc_stat *, struct alloc_stat *); 247 static int callsite_cmp(struct alloc_stat *, struct alloc_stat *); 248 249 static struct alloc_stat *search_alloc_stat(unsigned long ptr, 250 unsigned long call_site, 251 struct rb_root *root, 252 sort_fn_t sort_fn) 253 { 254 struct rb_node *node = root->rb_node; 255 struct alloc_stat key = { .ptr = ptr, .call_site = call_site }; 256 257 while (node) { 258 struct alloc_stat *data; 259 int cmp; 260 261 data = rb_entry(node, struct alloc_stat, node); 262 263 cmp = sort_fn(&key, data); 264 if (cmp < 0) 265 node = node->rb_left; 266 else if (cmp > 0) 267 node = node->rb_right; 268 else 269 return data; 270 } 271 return NULL; 272 } 273 274 static int perf_evsel__process_free_event(struct perf_evsel *evsel, 275 struct perf_sample *sample) 276 { 277 unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr"); 278 struct alloc_stat *s_alloc, *s_caller; 279 280 s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp); 281 if (!s_alloc) 282 return 0; 283 284 if ((short)sample->cpu != s_alloc->alloc_cpu) { 285 s_alloc->pingpong++; 286 287 s_caller = search_alloc_stat(0, s_alloc->call_site, 288 &root_caller_stat, callsite_cmp); 289 if (!s_caller) 290 return -1; 291 s_caller->pingpong++; 292 } 293 s_alloc->alloc_cpu = -1; 294 295 return 0; 296 } 297 298 typedef int (*tracepoint_handler)(struct perf_evsel *evsel, 299 struct perf_sample *sample); 300 301 static int process_sample_event(struct perf_tool *tool __maybe_unused, 302 union perf_event *event, 303 struct perf_sample *sample, 304 struct perf_evsel *evsel, 305 struct machine *machine) 306 { 307 struct thread *thread = machine__findnew_thread(machine, event->ip.pid); 308 309 if (thread == NULL) { 310 pr_debug("problem processing %d event, skipping it.\n", 311 event->header.type); 312 return -1; 313 } 314 315 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); 316 317 if (evsel->handler.func != NULL) { 318 tracepoint_handler f = evsel->handler.func; 319 return f(evsel, sample); 320 } 321 322 return 0; 323 } 324 325 static struct perf_tool perf_kmem = { 326 .sample = process_sample_event, 327 .comm = perf_event__process_comm, 328 .ordered_samples = true, 329 }; 330 331 static double fragmentation(unsigned long n_req, unsigned long n_alloc) 332 { 333 if (n_alloc == 0) 334 return 0.0; 335 else 336 return 100.0 - (100.0 * n_req / n_alloc); 337 } 338 339 static void __print_result(struct rb_root *root, struct perf_session *session, 340 int n_lines, int is_caller) 341 { 342 struct rb_node *next; 343 struct machine *machine; 344 345 printf("%.102s\n", graph_dotted_line); 346 printf(" %-34s |", is_caller ? "Callsite": "Alloc Ptr"); 347 printf(" Total_alloc/Per | Total_req/Per | Hit | Ping-pong | Frag\n"); 348 printf("%.102s\n", graph_dotted_line); 349 350 next = rb_first(root); 351 352 machine = perf_session__find_host_machine(session); 353 if (!machine) { 354 pr_err("__print_result: couldn't find kernel information\n"); 355 return; 356 } 357 while (next && n_lines--) { 358 struct alloc_stat *data = rb_entry(next, struct alloc_stat, 359 node); 360 struct symbol *sym = NULL; 361 struct map *map; 362 char buf[BUFSIZ]; 363 u64 addr; 364 365 if (is_caller) { 366 addr = data->call_site; 367 if (!raw_ip) 368 sym = machine__find_kernel_function(machine, addr, &map, NULL); 369 } else 370 addr = data->ptr; 371 372 if (sym != NULL) 373 snprintf(buf, sizeof(buf), "%s+%" PRIx64 "", sym->name, 374 addr - map->unmap_ip(map, sym->start)); 375 else 376 snprintf(buf, sizeof(buf), "%#" PRIx64 "", addr); 377 printf(" %-34s |", buf); 378 379 printf(" %9llu/%-5lu | %9llu/%-5lu | %8lu | %8lu | %6.3f%%\n", 380 (unsigned long long)data->bytes_alloc, 381 (unsigned long)data->bytes_alloc / data->hit, 382 (unsigned long long)data->bytes_req, 383 (unsigned long)data->bytes_req / data->hit, 384 (unsigned long)data->hit, 385 (unsigned long)data->pingpong, 386 fragmentation(data->bytes_req, data->bytes_alloc)); 387 388 next = rb_next(next); 389 } 390 391 if (n_lines == -1) 392 printf(" ... | ... | ... | ... | ... | ... \n"); 393 394 printf("%.102s\n", graph_dotted_line); 395 } 396 397 static void print_summary(void) 398 { 399 printf("\nSUMMARY\n=======\n"); 400 printf("Total bytes requested: %lu\n", total_requested); 401 printf("Total bytes allocated: %lu\n", total_allocated); 402 printf("Total bytes wasted on internal fragmentation: %lu\n", 403 total_allocated - total_requested); 404 printf("Internal fragmentation: %f%%\n", 405 fragmentation(total_requested, total_allocated)); 406 printf("Cross CPU allocations: %lu/%lu\n", nr_cross_allocs, nr_allocs); 407 } 408 409 static void print_result(struct perf_session *session) 410 { 411 if (caller_flag) 412 __print_result(&root_caller_sorted, session, caller_lines, 1); 413 if (alloc_flag) 414 __print_result(&root_alloc_sorted, session, alloc_lines, 0); 415 print_summary(); 416 } 417 418 struct sort_dimension { 419 const char name[20]; 420 sort_fn_t cmp; 421 struct list_head list; 422 }; 423 424 static LIST_HEAD(caller_sort); 425 static LIST_HEAD(alloc_sort); 426 427 static void sort_insert(struct rb_root *root, struct alloc_stat *data, 428 struct list_head *sort_list) 429 { 430 struct rb_node **new = &(root->rb_node); 431 struct rb_node *parent = NULL; 432 struct sort_dimension *sort; 433 434 while (*new) { 435 struct alloc_stat *this; 436 int cmp = 0; 437 438 this = rb_entry(*new, struct alloc_stat, node); 439 parent = *new; 440 441 list_for_each_entry(sort, sort_list, list) { 442 cmp = sort->cmp(data, this); 443 if (cmp) 444 break; 445 } 446 447 if (cmp > 0) 448 new = &((*new)->rb_left); 449 else 450 new = &((*new)->rb_right); 451 } 452 453 rb_link_node(&data->node, parent, new); 454 rb_insert_color(&data->node, root); 455 } 456 457 static void __sort_result(struct rb_root *root, struct rb_root *root_sorted, 458 struct list_head *sort_list) 459 { 460 struct rb_node *node; 461 struct alloc_stat *data; 462 463 for (;;) { 464 node = rb_first(root); 465 if (!node) 466 break; 467 468 rb_erase(node, root); 469 data = rb_entry(node, struct alloc_stat, node); 470 sort_insert(root_sorted, data, sort_list); 471 } 472 } 473 474 static void sort_result(void) 475 { 476 __sort_result(&root_alloc_stat, &root_alloc_sorted, &alloc_sort); 477 __sort_result(&root_caller_stat, &root_caller_sorted, &caller_sort); 478 } 479 480 static int __cmd_kmem(const char *input_name) 481 { 482 int err = -EINVAL; 483 struct perf_session *session; 484 const struct perf_evsel_str_handler kmem_tracepoints[] = { 485 { "kmem:kmalloc", perf_evsel__process_alloc_event, }, 486 { "kmem:kmem_cache_alloc", perf_evsel__process_alloc_event, }, 487 { "kmem:kmalloc_node", perf_evsel__process_alloc_node_event, }, 488 { "kmem:kmem_cache_alloc_node", perf_evsel__process_alloc_node_event, }, 489 { "kmem:kfree", perf_evsel__process_free_event, }, 490 { "kmem:kmem_cache_free", perf_evsel__process_free_event, }, 491 }; 492 493 session = perf_session__new(input_name, O_RDONLY, 0, false, &perf_kmem); 494 if (session == NULL) 495 return -ENOMEM; 496 497 if (perf_session__create_kernel_maps(session) < 0) 498 goto out_delete; 499 500 if (!perf_session__has_traces(session, "kmem record")) 501 goto out_delete; 502 503 if (perf_session__set_tracepoints_handlers(session, kmem_tracepoints)) { 504 pr_err("Initializing perf session tracepoint handlers failed\n"); 505 return -1; 506 } 507 508 setup_pager(); 509 err = perf_session__process_events(session, &perf_kmem); 510 if (err != 0) 511 goto out_delete; 512 sort_result(); 513 print_result(session); 514 out_delete: 515 perf_session__delete(session); 516 return err; 517 } 518 519 static int ptr_cmp(struct alloc_stat *l, struct alloc_stat *r) 520 { 521 if (l->ptr < r->ptr) 522 return -1; 523 else if (l->ptr > r->ptr) 524 return 1; 525 return 0; 526 } 527 528 static struct sort_dimension ptr_sort_dimension = { 529 .name = "ptr", 530 .cmp = ptr_cmp, 531 }; 532 533 static int callsite_cmp(struct alloc_stat *l, struct alloc_stat *r) 534 { 535 if (l->call_site < r->call_site) 536 return -1; 537 else if (l->call_site > r->call_site) 538 return 1; 539 return 0; 540 } 541 542 static struct sort_dimension callsite_sort_dimension = { 543 .name = "callsite", 544 .cmp = callsite_cmp, 545 }; 546 547 static int hit_cmp(struct alloc_stat *l, struct alloc_stat *r) 548 { 549 if (l->hit < r->hit) 550 return -1; 551 else if (l->hit > r->hit) 552 return 1; 553 return 0; 554 } 555 556 static struct sort_dimension hit_sort_dimension = { 557 .name = "hit", 558 .cmp = hit_cmp, 559 }; 560 561 static int bytes_cmp(struct alloc_stat *l, struct alloc_stat *r) 562 { 563 if (l->bytes_alloc < r->bytes_alloc) 564 return -1; 565 else if (l->bytes_alloc > r->bytes_alloc) 566 return 1; 567 return 0; 568 } 569 570 static struct sort_dimension bytes_sort_dimension = { 571 .name = "bytes", 572 .cmp = bytes_cmp, 573 }; 574 575 static int frag_cmp(struct alloc_stat *l, struct alloc_stat *r) 576 { 577 double x, y; 578 579 x = fragmentation(l->bytes_req, l->bytes_alloc); 580 y = fragmentation(r->bytes_req, r->bytes_alloc); 581 582 if (x < y) 583 return -1; 584 else if (x > y) 585 return 1; 586 return 0; 587 } 588 589 static struct sort_dimension frag_sort_dimension = { 590 .name = "frag", 591 .cmp = frag_cmp, 592 }; 593 594 static int pingpong_cmp(struct alloc_stat *l, struct alloc_stat *r) 595 { 596 if (l->pingpong < r->pingpong) 597 return -1; 598 else if (l->pingpong > r->pingpong) 599 return 1; 600 return 0; 601 } 602 603 static struct sort_dimension pingpong_sort_dimension = { 604 .name = "pingpong", 605 .cmp = pingpong_cmp, 606 }; 607 608 static struct sort_dimension *avail_sorts[] = { 609 &ptr_sort_dimension, 610 &callsite_sort_dimension, 611 &hit_sort_dimension, 612 &bytes_sort_dimension, 613 &frag_sort_dimension, 614 &pingpong_sort_dimension, 615 }; 616 617 #define NUM_AVAIL_SORTS \ 618 (int)(sizeof(avail_sorts) / sizeof(struct sort_dimension *)) 619 620 static int sort_dimension__add(const char *tok, struct list_head *list) 621 { 622 struct sort_dimension *sort; 623 int i; 624 625 for (i = 0; i < NUM_AVAIL_SORTS; i++) { 626 if (!strcmp(avail_sorts[i]->name, tok)) { 627 sort = malloc(sizeof(*sort)); 628 if (!sort) { 629 pr_err("%s: malloc failed\n", __func__); 630 return -1; 631 } 632 memcpy(sort, avail_sorts[i], sizeof(*sort)); 633 list_add_tail(&sort->list, list); 634 return 0; 635 } 636 } 637 638 return -1; 639 } 640 641 static int setup_sorting(struct list_head *sort_list, const char *arg) 642 { 643 char *tok; 644 char *str = strdup(arg); 645 646 if (!str) { 647 pr_err("%s: strdup failed\n", __func__); 648 return -1; 649 } 650 651 while (true) { 652 tok = strsep(&str, ","); 653 if (!tok) 654 break; 655 if (sort_dimension__add(tok, sort_list) < 0) { 656 error("Unknown --sort key: '%s'", tok); 657 free(str); 658 return -1; 659 } 660 } 661 662 free(str); 663 return 0; 664 } 665 666 static int parse_sort_opt(const struct option *opt __maybe_unused, 667 const char *arg, int unset __maybe_unused) 668 { 669 if (!arg) 670 return -1; 671 672 if (caller_flag > alloc_flag) 673 return setup_sorting(&caller_sort, arg); 674 else 675 return setup_sorting(&alloc_sort, arg); 676 677 return 0; 678 } 679 680 static int parse_caller_opt(const struct option *opt __maybe_unused, 681 const char *arg __maybe_unused, 682 int unset __maybe_unused) 683 { 684 caller_flag = (alloc_flag + 1); 685 return 0; 686 } 687 688 static int parse_alloc_opt(const struct option *opt __maybe_unused, 689 const char *arg __maybe_unused, 690 int unset __maybe_unused) 691 { 692 alloc_flag = (caller_flag + 1); 693 return 0; 694 } 695 696 static int parse_line_opt(const struct option *opt __maybe_unused, 697 const char *arg, int unset __maybe_unused) 698 { 699 int lines; 700 701 if (!arg) 702 return -1; 703 704 lines = strtoul(arg, NULL, 10); 705 706 if (caller_flag > alloc_flag) 707 caller_lines = lines; 708 else 709 alloc_lines = lines; 710 711 return 0; 712 } 713 714 static int __cmd_record(int argc, const char **argv) 715 { 716 const char * const record_args[] = { 717 "record", "-a", "-R", "-f", "-c", "1", 718 "-e", "kmem:kmalloc", 719 "-e", "kmem:kmalloc_node", 720 "-e", "kmem:kfree", 721 "-e", "kmem:kmem_cache_alloc", 722 "-e", "kmem:kmem_cache_alloc_node", 723 "-e", "kmem:kmem_cache_free", 724 }; 725 unsigned int rec_argc, i, j; 726 const char **rec_argv; 727 728 rec_argc = ARRAY_SIZE(record_args) + argc - 1; 729 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 730 731 if (rec_argv == NULL) 732 return -ENOMEM; 733 734 for (i = 0; i < ARRAY_SIZE(record_args); i++) 735 rec_argv[i] = strdup(record_args[i]); 736 737 for (j = 1; j < (unsigned int)argc; j++, i++) 738 rec_argv[i] = argv[j]; 739 740 return cmd_record(i, rec_argv, NULL); 741 } 742 743 int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused) 744 { 745 const char * const default_sort_order = "frag,hit,bytes"; 746 const char *input_name = NULL; 747 const struct option kmem_options[] = { 748 OPT_STRING('i', "input", &input_name, "file", "input file name"), 749 OPT_CALLBACK_NOOPT(0, "caller", NULL, NULL, 750 "show per-callsite statistics", parse_caller_opt), 751 OPT_CALLBACK_NOOPT(0, "alloc", NULL, NULL, 752 "show per-allocation statistics", parse_alloc_opt), 753 OPT_CALLBACK('s', "sort", NULL, "key[,key2...]", 754 "sort by keys: ptr, call_site, bytes, hit, pingpong, frag", 755 parse_sort_opt), 756 OPT_CALLBACK('l', "line", NULL, "num", "show n lines", parse_line_opt), 757 OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"), 758 OPT_END() 759 }; 760 const char * const kmem_usage[] = { 761 "perf kmem [<options>] {record|stat}", 762 NULL 763 }; 764 argc = parse_options(argc, argv, kmem_options, kmem_usage, 0); 765 766 if (!argc) 767 usage_with_options(kmem_usage, kmem_options); 768 769 symbol__init(); 770 771 if (!strncmp(argv[0], "rec", 3)) { 772 return __cmd_record(argc, argv); 773 } else if (!strcmp(argv[0], "stat")) { 774 if (setup_cpunode_map()) 775 return -1; 776 777 if (list_empty(&caller_sort)) 778 setup_sorting(&caller_sort, default_sort_order); 779 if (list_empty(&alloc_sort)) 780 setup_sorting(&alloc_sort, default_sort_order); 781 782 return __cmd_kmem(input_name); 783 } else 784 usage_with_options(kmem_usage, kmem_options); 785 786 return 0; 787 } 788 789