1 #include "builtin.h" 2 #include "perf.h" 3 4 #include "util/util.h" 5 #include "util/cache.h" 6 #include "util/symbol.h" 7 #include "util/thread.h" 8 #include "util/header.h" 9 #include "util/session.h" 10 #include "util/tool.h" 11 12 #include "util/parse-options.h" 13 #include "util/trace-event.h" 14 15 #include "util/debug.h" 16 17 #include <linux/rbtree.h> 18 19 struct alloc_stat; 20 typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *); 21 22 static const char *input_name; 23 24 static int alloc_flag; 25 static int caller_flag; 26 27 static int alloc_lines = -1; 28 static int caller_lines = -1; 29 30 static bool raw_ip; 31 32 static char default_sort_order[] = "frag,hit,bytes"; 33 34 static int *cpunode_map; 35 static int max_cpu_num; 36 37 struct alloc_stat { 38 u64 call_site; 39 u64 ptr; 40 u64 bytes_req; 41 u64 bytes_alloc; 42 u32 hit; 43 u32 pingpong; 44 45 short alloc_cpu; 46 47 struct rb_node node; 48 }; 49 50 static struct rb_root root_alloc_stat; 51 static struct rb_root root_alloc_sorted; 52 static struct rb_root root_caller_stat; 53 static struct rb_root root_caller_sorted; 54 55 static unsigned long total_requested, total_allocated; 56 static unsigned long nr_allocs, nr_cross_allocs; 57 58 #define PATH_SYS_NODE "/sys/devices/system/node" 59 60 struct perf_kmem { 61 struct perf_tool tool; 62 struct perf_session *session; 63 }; 64 65 static void init_cpunode_map(void) 66 { 67 FILE *fp; 68 int i; 69 70 fp = fopen("/sys/devices/system/cpu/kernel_max", "r"); 71 if (!fp) { 72 max_cpu_num = 4096; 73 return; 74 } 75 76 if (fscanf(fp, "%d", &max_cpu_num) < 1) 77 die("Failed to read 'kernel_max' from sysfs"); 78 max_cpu_num++; 79 80 cpunode_map = calloc(max_cpu_num, sizeof(int)); 81 if (!cpunode_map) 82 die("calloc"); 83 for (i = 0; i < max_cpu_num; i++) 84 cpunode_map[i] = -1; 85 fclose(fp); 86 } 87 88 static void setup_cpunode_map(void) 89 { 90 struct dirent *dent1, *dent2; 91 DIR *dir1, *dir2; 92 unsigned int cpu, mem; 93 char buf[PATH_MAX]; 94 95 init_cpunode_map(); 96 97 dir1 = opendir(PATH_SYS_NODE); 98 if (!dir1) 99 return; 100 101 while ((dent1 = readdir(dir1)) != NULL) { 102 if (dent1->d_type != DT_DIR || 103 sscanf(dent1->d_name, "node%u", &mem) < 1) 104 continue; 105 106 snprintf(buf, PATH_MAX, "%s/%s", PATH_SYS_NODE, dent1->d_name); 107 dir2 = opendir(buf); 108 if (!dir2) 109 continue; 110 while ((dent2 = readdir(dir2)) != NULL) { 111 if (dent2->d_type != DT_LNK || 112 sscanf(dent2->d_name, "cpu%u", &cpu) < 1) 113 continue; 114 cpunode_map[cpu] = mem; 115 } 116 closedir(dir2); 117 } 118 closedir(dir1); 119 } 120 121 static void insert_alloc_stat(unsigned long call_site, unsigned long ptr, 122 int bytes_req, int bytes_alloc, int cpu) 123 { 124 struct rb_node **node = &root_alloc_stat.rb_node; 125 struct rb_node *parent = NULL; 126 struct alloc_stat *data = NULL; 127 128 while (*node) { 129 parent = *node; 130 data = rb_entry(*node, struct alloc_stat, node); 131 132 if (ptr > data->ptr) 133 node = &(*node)->rb_right; 134 else if (ptr < data->ptr) 135 node = &(*node)->rb_left; 136 else 137 break; 138 } 139 140 if (data && data->ptr == ptr) { 141 data->hit++; 142 data->bytes_req += bytes_req; 143 data->bytes_alloc += bytes_alloc; 144 } else { 145 data = malloc(sizeof(*data)); 146 if (!data) 147 die("malloc"); 148 data->ptr = ptr; 149 data->pingpong = 0; 150 data->hit = 1; 151 data->bytes_req = bytes_req; 152 data->bytes_alloc = bytes_alloc; 153 154 rb_link_node(&data->node, parent, node); 155 rb_insert_color(&data->node, &root_alloc_stat); 156 } 157 data->call_site = call_site; 158 data->alloc_cpu = cpu; 159 } 160 161 static void insert_caller_stat(unsigned long call_site, 162 int bytes_req, int bytes_alloc) 163 { 164 struct rb_node **node = &root_caller_stat.rb_node; 165 struct rb_node *parent = NULL; 166 struct alloc_stat *data = NULL; 167 168 while (*node) { 169 parent = *node; 170 data = rb_entry(*node, struct alloc_stat, node); 171 172 if (call_site > data->call_site) 173 node = &(*node)->rb_right; 174 else if (call_site < data->call_site) 175 node = &(*node)->rb_left; 176 else 177 break; 178 } 179 180 if (data && data->call_site == call_site) { 181 data->hit++; 182 data->bytes_req += bytes_req; 183 data->bytes_alloc += bytes_alloc; 184 } else { 185 data = malloc(sizeof(*data)); 186 if (!data) 187 die("malloc"); 188 data->call_site = call_site; 189 data->pingpong = 0; 190 data->hit = 1; 191 data->bytes_req = bytes_req; 192 data->bytes_alloc = bytes_alloc; 193 194 rb_link_node(&data->node, parent, node); 195 rb_insert_color(&data->node, &root_caller_stat); 196 } 197 } 198 199 static void process_alloc_event(void *data, 200 struct event_format *event, 201 int cpu, 202 u64 timestamp __used, 203 struct thread *thread __used, 204 int node) 205 { 206 unsigned long call_site; 207 unsigned long ptr; 208 int bytes_req; 209 int bytes_alloc; 210 int node1, node2; 211 212 ptr = raw_field_value(event, "ptr", data); 213 call_site = raw_field_value(event, "call_site", data); 214 bytes_req = raw_field_value(event, "bytes_req", data); 215 bytes_alloc = raw_field_value(event, "bytes_alloc", data); 216 217 insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, cpu); 218 insert_caller_stat(call_site, bytes_req, bytes_alloc); 219 220 total_requested += bytes_req; 221 total_allocated += bytes_alloc; 222 223 if (node) { 224 node1 = cpunode_map[cpu]; 225 node2 = raw_field_value(event, "node", data); 226 if (node1 != node2) 227 nr_cross_allocs++; 228 } 229 nr_allocs++; 230 } 231 232 static int ptr_cmp(struct alloc_stat *, struct alloc_stat *); 233 static int callsite_cmp(struct alloc_stat *, struct alloc_stat *); 234 235 static struct alloc_stat *search_alloc_stat(unsigned long ptr, 236 unsigned long call_site, 237 struct rb_root *root, 238 sort_fn_t sort_fn) 239 { 240 struct rb_node *node = root->rb_node; 241 struct alloc_stat key = { .ptr = ptr, .call_site = call_site }; 242 243 while (node) { 244 struct alloc_stat *data; 245 int cmp; 246 247 data = rb_entry(node, struct alloc_stat, node); 248 249 cmp = sort_fn(&key, data); 250 if (cmp < 0) 251 node = node->rb_left; 252 else if (cmp > 0) 253 node = node->rb_right; 254 else 255 return data; 256 } 257 return NULL; 258 } 259 260 static void process_free_event(void *data, 261 struct event_format *event, 262 int cpu, 263 u64 timestamp __used, 264 struct thread *thread __used) 265 { 266 unsigned long ptr; 267 struct alloc_stat *s_alloc, *s_caller; 268 269 ptr = raw_field_value(event, "ptr", data); 270 271 s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp); 272 if (!s_alloc) 273 return; 274 275 if (cpu != s_alloc->alloc_cpu) { 276 s_alloc->pingpong++; 277 278 s_caller = search_alloc_stat(0, s_alloc->call_site, 279 &root_caller_stat, callsite_cmp); 280 assert(s_caller); 281 s_caller->pingpong++; 282 } 283 s_alloc->alloc_cpu = -1; 284 } 285 286 static void process_raw_event(struct perf_tool *tool, 287 union perf_event *raw_event __used, void *data, 288 int cpu, u64 timestamp, struct thread *thread) 289 { 290 struct perf_kmem *kmem = container_of(tool, struct perf_kmem, tool); 291 struct event_format *event; 292 int type; 293 294 type = trace_parse_common_type(kmem->session->pevent, data); 295 event = pevent_find_event(kmem->session->pevent, type); 296 297 if (!strcmp(event->name, "kmalloc") || 298 !strcmp(event->name, "kmem_cache_alloc")) { 299 process_alloc_event(data, event, cpu, timestamp, thread, 0); 300 return; 301 } 302 303 if (!strcmp(event->name, "kmalloc_node") || 304 !strcmp(event->name, "kmem_cache_alloc_node")) { 305 process_alloc_event(data, event, cpu, timestamp, thread, 1); 306 return; 307 } 308 309 if (!strcmp(event->name, "kfree") || 310 !strcmp(event->name, "kmem_cache_free")) { 311 process_free_event(data, event, cpu, timestamp, thread); 312 return; 313 } 314 } 315 316 static int process_sample_event(struct perf_tool *tool, 317 union perf_event *event, 318 struct perf_sample *sample, 319 struct perf_evsel *evsel __used, 320 struct machine *machine) 321 { 322 struct thread *thread = machine__findnew_thread(machine, event->ip.pid); 323 324 if (thread == NULL) { 325 pr_debug("problem processing %d event, skipping it.\n", 326 event->header.type); 327 return -1; 328 } 329 330 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); 331 332 process_raw_event(tool, event, sample->raw_data, sample->cpu, 333 sample->time, thread); 334 335 return 0; 336 } 337 338 static struct perf_kmem perf_kmem = { 339 .tool = { 340 .sample = process_sample_event, 341 .comm = perf_event__process_comm, 342 .ordered_samples = true, 343 }, 344 }; 345 346 static double fragmentation(unsigned long n_req, unsigned long n_alloc) 347 { 348 if (n_alloc == 0) 349 return 0.0; 350 else 351 return 100.0 - (100.0 * n_req / n_alloc); 352 } 353 354 static void __print_result(struct rb_root *root, struct perf_session *session, 355 int n_lines, int is_caller) 356 { 357 struct rb_node *next; 358 struct machine *machine; 359 360 printf("%.102s\n", graph_dotted_line); 361 printf(" %-34s |", is_caller ? "Callsite": "Alloc Ptr"); 362 printf(" Total_alloc/Per | Total_req/Per | Hit | Ping-pong | Frag\n"); 363 printf("%.102s\n", graph_dotted_line); 364 365 next = rb_first(root); 366 367 machine = perf_session__find_host_machine(session); 368 if (!machine) { 369 pr_err("__print_result: couldn't find kernel information\n"); 370 return; 371 } 372 while (next && n_lines--) { 373 struct alloc_stat *data = rb_entry(next, struct alloc_stat, 374 node); 375 struct symbol *sym = NULL; 376 struct map *map; 377 char buf[BUFSIZ]; 378 u64 addr; 379 380 if (is_caller) { 381 addr = data->call_site; 382 if (!raw_ip) 383 sym = machine__find_kernel_function(machine, addr, &map, NULL); 384 } else 385 addr = data->ptr; 386 387 if (sym != NULL) 388 snprintf(buf, sizeof(buf), "%s+%" PRIx64 "", sym->name, 389 addr - map->unmap_ip(map, sym->start)); 390 else 391 snprintf(buf, sizeof(buf), "%#" PRIx64 "", addr); 392 printf(" %-34s |", buf); 393 394 printf(" %9llu/%-5lu | %9llu/%-5lu | %8lu | %8lu | %6.3f%%\n", 395 (unsigned long long)data->bytes_alloc, 396 (unsigned long)data->bytes_alloc / data->hit, 397 (unsigned long long)data->bytes_req, 398 (unsigned long)data->bytes_req / data->hit, 399 (unsigned long)data->hit, 400 (unsigned long)data->pingpong, 401 fragmentation(data->bytes_req, data->bytes_alloc)); 402 403 next = rb_next(next); 404 } 405 406 if (n_lines == -1) 407 printf(" ... | ... | ... | ... | ... | ... \n"); 408 409 printf("%.102s\n", graph_dotted_line); 410 } 411 412 static void print_summary(void) 413 { 414 printf("\nSUMMARY\n=======\n"); 415 printf("Total bytes requested: %lu\n", total_requested); 416 printf("Total bytes allocated: %lu\n", total_allocated); 417 printf("Total bytes wasted on internal fragmentation: %lu\n", 418 total_allocated - total_requested); 419 printf("Internal fragmentation: %f%%\n", 420 fragmentation(total_requested, total_allocated)); 421 printf("Cross CPU allocations: %lu/%lu\n", nr_cross_allocs, nr_allocs); 422 } 423 424 static void print_result(struct perf_session *session) 425 { 426 if (caller_flag) 427 __print_result(&root_caller_sorted, session, caller_lines, 1); 428 if (alloc_flag) 429 __print_result(&root_alloc_sorted, session, alloc_lines, 0); 430 print_summary(); 431 } 432 433 struct sort_dimension { 434 const char name[20]; 435 sort_fn_t cmp; 436 struct list_head list; 437 }; 438 439 static LIST_HEAD(caller_sort); 440 static LIST_HEAD(alloc_sort); 441 442 static void sort_insert(struct rb_root *root, struct alloc_stat *data, 443 struct list_head *sort_list) 444 { 445 struct rb_node **new = &(root->rb_node); 446 struct rb_node *parent = NULL; 447 struct sort_dimension *sort; 448 449 while (*new) { 450 struct alloc_stat *this; 451 int cmp = 0; 452 453 this = rb_entry(*new, struct alloc_stat, node); 454 parent = *new; 455 456 list_for_each_entry(sort, sort_list, list) { 457 cmp = sort->cmp(data, this); 458 if (cmp) 459 break; 460 } 461 462 if (cmp > 0) 463 new = &((*new)->rb_left); 464 else 465 new = &((*new)->rb_right); 466 } 467 468 rb_link_node(&data->node, parent, new); 469 rb_insert_color(&data->node, root); 470 } 471 472 static void __sort_result(struct rb_root *root, struct rb_root *root_sorted, 473 struct list_head *sort_list) 474 { 475 struct rb_node *node; 476 struct alloc_stat *data; 477 478 for (;;) { 479 node = rb_first(root); 480 if (!node) 481 break; 482 483 rb_erase(node, root); 484 data = rb_entry(node, struct alloc_stat, node); 485 sort_insert(root_sorted, data, sort_list); 486 } 487 } 488 489 static void sort_result(void) 490 { 491 __sort_result(&root_alloc_stat, &root_alloc_sorted, &alloc_sort); 492 __sort_result(&root_caller_stat, &root_caller_sorted, &caller_sort); 493 } 494 495 static int __cmd_kmem(void) 496 { 497 int err = -EINVAL; 498 struct perf_session *session; 499 500 session = perf_session__new(input_name, O_RDONLY, 0, false, 501 &perf_kmem.tool); 502 if (session == NULL) 503 return -ENOMEM; 504 505 perf_kmem.session = session; 506 507 if (perf_session__create_kernel_maps(session) < 0) 508 goto out_delete; 509 510 if (!perf_session__has_traces(session, "kmem record")) 511 goto out_delete; 512 513 setup_pager(); 514 err = perf_session__process_events(session, &perf_kmem.tool); 515 if (err != 0) 516 goto out_delete; 517 sort_result(); 518 print_result(session); 519 out_delete: 520 perf_session__delete(session); 521 return err; 522 } 523 524 static const char * const kmem_usage[] = { 525 "perf kmem [<options>] {record|stat}", 526 NULL 527 }; 528 529 static int ptr_cmp(struct alloc_stat *l, struct alloc_stat *r) 530 { 531 if (l->ptr < r->ptr) 532 return -1; 533 else if (l->ptr > r->ptr) 534 return 1; 535 return 0; 536 } 537 538 static struct sort_dimension ptr_sort_dimension = { 539 .name = "ptr", 540 .cmp = ptr_cmp, 541 }; 542 543 static int callsite_cmp(struct alloc_stat *l, struct alloc_stat *r) 544 { 545 if (l->call_site < r->call_site) 546 return -1; 547 else if (l->call_site > r->call_site) 548 return 1; 549 return 0; 550 } 551 552 static struct sort_dimension callsite_sort_dimension = { 553 .name = "callsite", 554 .cmp = callsite_cmp, 555 }; 556 557 static int hit_cmp(struct alloc_stat *l, struct alloc_stat *r) 558 { 559 if (l->hit < r->hit) 560 return -1; 561 else if (l->hit > r->hit) 562 return 1; 563 return 0; 564 } 565 566 static struct sort_dimension hit_sort_dimension = { 567 .name = "hit", 568 .cmp = hit_cmp, 569 }; 570 571 static int bytes_cmp(struct alloc_stat *l, struct alloc_stat *r) 572 { 573 if (l->bytes_alloc < r->bytes_alloc) 574 return -1; 575 else if (l->bytes_alloc > r->bytes_alloc) 576 return 1; 577 return 0; 578 } 579 580 static struct sort_dimension bytes_sort_dimension = { 581 .name = "bytes", 582 .cmp = bytes_cmp, 583 }; 584 585 static int frag_cmp(struct alloc_stat *l, struct alloc_stat *r) 586 { 587 double x, y; 588 589 x = fragmentation(l->bytes_req, l->bytes_alloc); 590 y = fragmentation(r->bytes_req, r->bytes_alloc); 591 592 if (x < y) 593 return -1; 594 else if (x > y) 595 return 1; 596 return 0; 597 } 598 599 static struct sort_dimension frag_sort_dimension = { 600 .name = "frag", 601 .cmp = frag_cmp, 602 }; 603 604 static int pingpong_cmp(struct alloc_stat *l, struct alloc_stat *r) 605 { 606 if (l->pingpong < r->pingpong) 607 return -1; 608 else if (l->pingpong > r->pingpong) 609 return 1; 610 return 0; 611 } 612 613 static struct sort_dimension pingpong_sort_dimension = { 614 .name = "pingpong", 615 .cmp = pingpong_cmp, 616 }; 617 618 static struct sort_dimension *avail_sorts[] = { 619 &ptr_sort_dimension, 620 &callsite_sort_dimension, 621 &hit_sort_dimension, 622 &bytes_sort_dimension, 623 &frag_sort_dimension, 624 &pingpong_sort_dimension, 625 }; 626 627 #define NUM_AVAIL_SORTS \ 628 (int)(sizeof(avail_sorts) / sizeof(struct sort_dimension *)) 629 630 static int sort_dimension__add(const char *tok, struct list_head *list) 631 { 632 struct sort_dimension *sort; 633 int i; 634 635 for (i = 0; i < NUM_AVAIL_SORTS; i++) { 636 if (!strcmp(avail_sorts[i]->name, tok)) { 637 sort = malloc(sizeof(*sort)); 638 if (!sort) 639 die("malloc"); 640 memcpy(sort, avail_sorts[i], sizeof(*sort)); 641 list_add_tail(&sort->list, list); 642 return 0; 643 } 644 } 645 646 return -1; 647 } 648 649 static int setup_sorting(struct list_head *sort_list, const char *arg) 650 { 651 char *tok; 652 char *str = strdup(arg); 653 654 if (!str) 655 die("strdup"); 656 657 while (true) { 658 tok = strsep(&str, ","); 659 if (!tok) 660 break; 661 if (sort_dimension__add(tok, sort_list) < 0) { 662 error("Unknown --sort key: '%s'", tok); 663 free(str); 664 return -1; 665 } 666 } 667 668 free(str); 669 return 0; 670 } 671 672 static int parse_sort_opt(const struct option *opt __used, 673 const char *arg, int unset __used) 674 { 675 if (!arg) 676 return -1; 677 678 if (caller_flag > alloc_flag) 679 return setup_sorting(&caller_sort, arg); 680 else 681 return setup_sorting(&alloc_sort, arg); 682 683 return 0; 684 } 685 686 static int parse_caller_opt(const struct option *opt __used, 687 const char *arg __used, int unset __used) 688 { 689 caller_flag = (alloc_flag + 1); 690 return 0; 691 } 692 693 static int parse_alloc_opt(const struct option *opt __used, 694 const char *arg __used, int unset __used) 695 { 696 alloc_flag = (caller_flag + 1); 697 return 0; 698 } 699 700 static int parse_line_opt(const struct option *opt __used, 701 const char *arg, int unset __used) 702 { 703 int lines; 704 705 if (!arg) 706 return -1; 707 708 lines = strtoul(arg, NULL, 10); 709 710 if (caller_flag > alloc_flag) 711 caller_lines = lines; 712 else 713 alloc_lines = lines; 714 715 return 0; 716 } 717 718 static const struct option kmem_options[] = { 719 OPT_STRING('i', "input", &input_name, "file", 720 "input file name"), 721 OPT_CALLBACK_NOOPT(0, "caller", NULL, NULL, 722 "show per-callsite statistics", 723 parse_caller_opt), 724 OPT_CALLBACK_NOOPT(0, "alloc", NULL, NULL, 725 "show per-allocation statistics", 726 parse_alloc_opt), 727 OPT_CALLBACK('s', "sort", NULL, "key[,key2...]", 728 "sort by keys: ptr, call_site, bytes, hit, pingpong, frag", 729 parse_sort_opt), 730 OPT_CALLBACK('l', "line", NULL, "num", 731 "show n lines", 732 parse_line_opt), 733 OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"), 734 OPT_END() 735 }; 736 737 static const char *record_args[] = { 738 "record", 739 "-a", 740 "-R", 741 "-f", 742 "-c", "1", 743 "-e", "kmem:kmalloc", 744 "-e", "kmem:kmalloc_node", 745 "-e", "kmem:kfree", 746 "-e", "kmem:kmem_cache_alloc", 747 "-e", "kmem:kmem_cache_alloc_node", 748 "-e", "kmem:kmem_cache_free", 749 }; 750 751 static int __cmd_record(int argc, const char **argv) 752 { 753 unsigned int rec_argc, i, j; 754 const char **rec_argv; 755 756 rec_argc = ARRAY_SIZE(record_args) + argc - 1; 757 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 758 759 if (rec_argv == NULL) 760 return -ENOMEM; 761 762 for (i = 0; i < ARRAY_SIZE(record_args); i++) 763 rec_argv[i] = strdup(record_args[i]); 764 765 for (j = 1; j < (unsigned int)argc; j++, i++) 766 rec_argv[i] = argv[j]; 767 768 return cmd_record(i, rec_argv, NULL); 769 } 770 771 int cmd_kmem(int argc, const char **argv, const char *prefix __used) 772 { 773 argc = parse_options(argc, argv, kmem_options, kmem_usage, 0); 774 775 if (!argc) 776 usage_with_options(kmem_usage, kmem_options); 777 778 symbol__init(); 779 780 if (!strncmp(argv[0], "rec", 3)) { 781 return __cmd_record(argc, argv); 782 } else if (!strcmp(argv[0], "stat")) { 783 setup_cpunode_map(); 784 785 if (list_empty(&caller_sort)) 786 setup_sorting(&caller_sort, default_sort_order); 787 if (list_empty(&alloc_sort)) 788 setup_sorting(&alloc_sort, default_sort_order); 789 790 return __cmd_kmem(); 791 } else 792 usage_with_options(kmem_usage, kmem_options); 793 794 return 0; 795 } 796 797