1 // SPDX-License-Identifier: GPL-2.0 2 #include "builtin.h" 3 #include "perf.h" 4 5 #include "util/dso.h" 6 #include "util/evlist.h" 7 #include "util/evsel.h" 8 #include "util/config.h" 9 #include "util/map.h" 10 #include "util/symbol.h" 11 #include "util/thread.h" 12 #include "util/header.h" 13 #include "util/session.h" 14 #include "util/tool.h" 15 #include "util/callchain.h" 16 #include "util/time-utils.h" 17 #include <linux/err.h> 18 19 #include <subcmd/pager.h> 20 #include <subcmd/parse-options.h> 21 #include "util/trace-event.h" 22 #include "util/data.h" 23 #include "util/cpumap.h" 24 25 #include "util/debug.h" 26 #include "util/string2.h" 27 28 #include <linux/kernel.h> 29 #include <linux/rbtree.h> 30 #include <linux/string.h> 31 #include <linux/zalloc.h> 32 #include <errno.h> 33 #include <inttypes.h> 34 #include <locale.h> 35 #include <regex.h> 36 37 #include <linux/ctype.h> 38 39 static int kmem_slab; 40 static int kmem_page; 41 42 static long kmem_page_size; 43 static enum { 44 KMEM_SLAB, 45 KMEM_PAGE, 46 } kmem_default = KMEM_SLAB; /* for backward compatibility */ 47 48 struct alloc_stat; 49 typedef int (*sort_fn_t)(void *, void *); 50 51 static int alloc_flag; 52 static int caller_flag; 53 54 static int alloc_lines = -1; 55 static int caller_lines = -1; 56 57 static bool raw_ip; 58 59 struct alloc_stat { 60 u64 call_site; 61 u64 ptr; 62 u64 bytes_req; 63 u64 bytes_alloc; 64 u64 last_alloc; 65 u32 hit; 66 u32 pingpong; 67 68 short alloc_cpu; 69 70 struct rb_node node; 71 }; 72 73 static struct rb_root root_alloc_stat; 74 static struct rb_root root_alloc_sorted; 75 static struct rb_root root_caller_stat; 76 static struct rb_root root_caller_sorted; 77 78 static unsigned long total_requested, total_allocated, total_freed; 79 static unsigned long nr_allocs, nr_cross_allocs; 80 81 /* filters for controlling start and stop of time of analysis */ 82 static struct perf_time_interval ptime; 83 const char *time_str; 84 85 static int insert_alloc_stat(unsigned long call_site, unsigned long ptr, 86 int bytes_req, int bytes_alloc, int cpu) 87 { 88 struct rb_node **node = &root_alloc_stat.rb_node; 89 struct rb_node *parent = NULL; 90 struct alloc_stat *data = NULL; 91 92 while (*node) { 93 parent = *node; 94 data = rb_entry(*node, struct alloc_stat, node); 95 96 if (ptr > data->ptr) 97 node = &(*node)->rb_right; 98 else if (ptr < data->ptr) 99 node = &(*node)->rb_left; 100 else 101 break; 102 } 103 104 if (data && data->ptr == ptr) { 105 data->hit++; 106 data->bytes_req += bytes_req; 107 data->bytes_alloc += bytes_alloc; 108 } else { 109 data = malloc(sizeof(*data)); 110 if (!data) { 111 pr_err("%s: malloc failed\n", __func__); 112 return -1; 113 } 114 data->ptr = ptr; 115 data->pingpong = 0; 116 data->hit = 1; 117 data->bytes_req = bytes_req; 118 data->bytes_alloc = bytes_alloc; 119 120 rb_link_node(&data->node, parent, node); 121 rb_insert_color(&data->node, &root_alloc_stat); 122 } 123 data->call_site = call_site; 124 data->alloc_cpu = cpu; 125 data->last_alloc = bytes_alloc; 126 127 return 0; 128 } 129 130 static int insert_caller_stat(unsigned long call_site, 131 int bytes_req, int bytes_alloc) 132 { 133 struct rb_node **node = &root_caller_stat.rb_node; 134 struct rb_node *parent = NULL; 135 struct alloc_stat *data = NULL; 136 137 while (*node) { 138 parent = *node; 139 data = rb_entry(*node, struct alloc_stat, node); 140 141 if (call_site > data->call_site) 142 node = &(*node)->rb_right; 143 else if (call_site < data->call_site) 144 node = &(*node)->rb_left; 145 else 146 break; 147 } 148 149 if (data && data->call_site == call_site) { 150 data->hit++; 151 data->bytes_req += bytes_req; 152 data->bytes_alloc += bytes_alloc; 153 } else { 154 data = malloc(sizeof(*data)); 155 if (!data) { 156 pr_err("%s: malloc failed\n", __func__); 157 return -1; 158 } 159 data->call_site = call_site; 160 data->pingpong = 0; 161 data->hit = 1; 162 data->bytes_req = bytes_req; 163 data->bytes_alloc = bytes_alloc; 164 165 rb_link_node(&data->node, parent, node); 166 rb_insert_color(&data->node, &root_caller_stat); 167 } 168 169 return 0; 170 } 171 172 static int perf_evsel__process_alloc_event(struct evsel *evsel, 173 struct perf_sample *sample) 174 { 175 unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr"), 176 call_site = perf_evsel__intval(evsel, sample, "call_site"); 177 int bytes_req = perf_evsel__intval(evsel, sample, "bytes_req"), 178 bytes_alloc = perf_evsel__intval(evsel, sample, "bytes_alloc"); 179 180 if (insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, sample->cpu) || 181 insert_caller_stat(call_site, bytes_req, bytes_alloc)) 182 return -1; 183 184 total_requested += bytes_req; 185 total_allocated += bytes_alloc; 186 187 nr_allocs++; 188 return 0; 189 } 190 191 static int perf_evsel__process_alloc_node_event(struct evsel *evsel, 192 struct perf_sample *sample) 193 { 194 int ret = perf_evsel__process_alloc_event(evsel, sample); 195 196 if (!ret) { 197 int node1 = cpu__get_node(sample->cpu), 198 node2 = perf_evsel__intval(evsel, sample, "node"); 199 200 if (node1 != node2) 201 nr_cross_allocs++; 202 } 203 204 return ret; 205 } 206 207 static int ptr_cmp(void *, void *); 208 static int slab_callsite_cmp(void *, void *); 209 210 static struct alloc_stat *search_alloc_stat(unsigned long ptr, 211 unsigned long call_site, 212 struct rb_root *root, 213 sort_fn_t sort_fn) 214 { 215 struct rb_node *node = root->rb_node; 216 struct alloc_stat key = { .ptr = ptr, .call_site = call_site }; 217 218 while (node) { 219 struct alloc_stat *data; 220 int cmp; 221 222 data = rb_entry(node, struct alloc_stat, node); 223 224 cmp = sort_fn(&key, data); 225 if (cmp < 0) 226 node = node->rb_left; 227 else if (cmp > 0) 228 node = node->rb_right; 229 else 230 return data; 231 } 232 return NULL; 233 } 234 235 static int perf_evsel__process_free_event(struct evsel *evsel, 236 struct perf_sample *sample) 237 { 238 unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr"); 239 struct alloc_stat *s_alloc, *s_caller; 240 241 s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp); 242 if (!s_alloc) 243 return 0; 244 245 total_freed += s_alloc->last_alloc; 246 247 if ((short)sample->cpu != s_alloc->alloc_cpu) { 248 s_alloc->pingpong++; 249 250 s_caller = search_alloc_stat(0, s_alloc->call_site, 251 &root_caller_stat, 252 slab_callsite_cmp); 253 if (!s_caller) 254 return -1; 255 s_caller->pingpong++; 256 } 257 s_alloc->alloc_cpu = -1; 258 259 return 0; 260 } 261 262 static u64 total_page_alloc_bytes; 263 static u64 total_page_free_bytes; 264 static u64 total_page_nomatch_bytes; 265 static u64 total_page_fail_bytes; 266 static unsigned long nr_page_allocs; 267 static unsigned long nr_page_frees; 268 static unsigned long nr_page_fails; 269 static unsigned long nr_page_nomatch; 270 271 static bool use_pfn; 272 static bool live_page; 273 static struct perf_session *kmem_session; 274 275 #define MAX_MIGRATE_TYPES 6 276 #define MAX_PAGE_ORDER 11 277 278 static int order_stats[MAX_PAGE_ORDER][MAX_MIGRATE_TYPES]; 279 280 struct page_stat { 281 struct rb_node node; 282 u64 page; 283 u64 callsite; 284 int order; 285 unsigned gfp_flags; 286 unsigned migrate_type; 287 u64 alloc_bytes; 288 u64 free_bytes; 289 int nr_alloc; 290 int nr_free; 291 }; 292 293 static struct rb_root page_live_tree; 294 static struct rb_root page_alloc_tree; 295 static struct rb_root page_alloc_sorted; 296 static struct rb_root page_caller_tree; 297 static struct rb_root page_caller_sorted; 298 299 struct alloc_func { 300 u64 start; 301 u64 end; 302 char *name; 303 }; 304 305 static int nr_alloc_funcs; 306 static struct alloc_func *alloc_func_list; 307 308 static int funcmp(const void *a, const void *b) 309 { 310 const struct alloc_func *fa = a; 311 const struct alloc_func *fb = b; 312 313 if (fa->start > fb->start) 314 return 1; 315 else 316 return -1; 317 } 318 319 static int callcmp(const void *a, const void *b) 320 { 321 const struct alloc_func *fa = a; 322 const struct alloc_func *fb = b; 323 324 if (fb->start <= fa->start && fa->end < fb->end) 325 return 0; 326 327 if (fa->start > fb->start) 328 return 1; 329 else 330 return -1; 331 } 332 333 static int build_alloc_func_list(void) 334 { 335 int ret; 336 struct map *kernel_map; 337 struct symbol *sym; 338 struct rb_node *node; 339 struct alloc_func *func; 340 struct machine *machine = &kmem_session->machines.host; 341 regex_t alloc_func_regex; 342 static const char pattern[] = "^_?_?(alloc|get_free|get_zeroed)_pages?"; 343 344 ret = regcomp(&alloc_func_regex, pattern, REG_EXTENDED); 345 if (ret) { 346 char err[BUFSIZ]; 347 348 regerror(ret, &alloc_func_regex, err, sizeof(err)); 349 pr_err("Invalid regex: %s\n%s", pattern, err); 350 return -EINVAL; 351 } 352 353 kernel_map = machine__kernel_map(machine); 354 if (map__load(kernel_map) < 0) { 355 pr_err("cannot load kernel map\n"); 356 return -ENOENT; 357 } 358 359 map__for_each_symbol(kernel_map, sym, node) { 360 if (regexec(&alloc_func_regex, sym->name, 0, NULL, 0)) 361 continue; 362 363 func = realloc(alloc_func_list, 364 (nr_alloc_funcs + 1) * sizeof(*func)); 365 if (func == NULL) 366 return -ENOMEM; 367 368 pr_debug("alloc func: %s\n", sym->name); 369 func[nr_alloc_funcs].start = sym->start; 370 func[nr_alloc_funcs].end = sym->end; 371 func[nr_alloc_funcs].name = sym->name; 372 373 alloc_func_list = func; 374 nr_alloc_funcs++; 375 } 376 377 qsort(alloc_func_list, nr_alloc_funcs, sizeof(*func), funcmp); 378 379 regfree(&alloc_func_regex); 380 return 0; 381 } 382 383 /* 384 * Find first non-memory allocation function from callchain. 385 * The allocation functions are in the 'alloc_func_list'. 386 */ 387 static u64 find_callsite(struct evsel *evsel, struct perf_sample *sample) 388 { 389 struct addr_location al; 390 struct machine *machine = &kmem_session->machines.host; 391 struct callchain_cursor_node *node; 392 393 if (alloc_func_list == NULL) { 394 if (build_alloc_func_list() < 0) 395 goto out; 396 } 397 398 al.thread = machine__findnew_thread(machine, sample->pid, sample->tid); 399 sample__resolve_callchain(sample, &callchain_cursor, NULL, evsel, &al, 16); 400 401 callchain_cursor_commit(&callchain_cursor); 402 while (true) { 403 struct alloc_func key, *caller; 404 u64 addr; 405 406 node = callchain_cursor_current(&callchain_cursor); 407 if (node == NULL) 408 break; 409 410 key.start = key.end = node->ip; 411 caller = bsearch(&key, alloc_func_list, nr_alloc_funcs, 412 sizeof(key), callcmp); 413 if (!caller) { 414 /* found */ 415 if (node->map) 416 addr = map__unmap_ip(node->map, node->ip); 417 else 418 addr = node->ip; 419 420 return addr; 421 } else 422 pr_debug3("skipping alloc function: %s\n", caller->name); 423 424 callchain_cursor_advance(&callchain_cursor); 425 } 426 427 out: 428 pr_debug2("unknown callsite: %"PRIx64 "\n", sample->ip); 429 return sample->ip; 430 } 431 432 struct sort_dimension { 433 const char name[20]; 434 sort_fn_t cmp; 435 struct list_head list; 436 }; 437 438 static LIST_HEAD(page_alloc_sort_input); 439 static LIST_HEAD(page_caller_sort_input); 440 441 static struct page_stat * 442 __page_stat__findnew_page(struct page_stat *pstat, bool create) 443 { 444 struct rb_node **node = &page_live_tree.rb_node; 445 struct rb_node *parent = NULL; 446 struct page_stat *data; 447 448 while (*node) { 449 s64 cmp; 450 451 parent = *node; 452 data = rb_entry(*node, struct page_stat, node); 453 454 cmp = data->page - pstat->page; 455 if (cmp < 0) 456 node = &parent->rb_left; 457 else if (cmp > 0) 458 node = &parent->rb_right; 459 else 460 return data; 461 } 462 463 if (!create) 464 return NULL; 465 466 data = zalloc(sizeof(*data)); 467 if (data != NULL) { 468 data->page = pstat->page; 469 data->order = pstat->order; 470 data->gfp_flags = pstat->gfp_flags; 471 data->migrate_type = pstat->migrate_type; 472 473 rb_link_node(&data->node, parent, node); 474 rb_insert_color(&data->node, &page_live_tree); 475 } 476 477 return data; 478 } 479 480 static struct page_stat *page_stat__find_page(struct page_stat *pstat) 481 { 482 return __page_stat__findnew_page(pstat, false); 483 } 484 485 static struct page_stat *page_stat__findnew_page(struct page_stat *pstat) 486 { 487 return __page_stat__findnew_page(pstat, true); 488 } 489 490 static struct page_stat * 491 __page_stat__findnew_alloc(struct page_stat *pstat, bool create) 492 { 493 struct rb_node **node = &page_alloc_tree.rb_node; 494 struct rb_node *parent = NULL; 495 struct page_stat *data; 496 struct sort_dimension *sort; 497 498 while (*node) { 499 int cmp = 0; 500 501 parent = *node; 502 data = rb_entry(*node, struct page_stat, node); 503 504 list_for_each_entry(sort, &page_alloc_sort_input, list) { 505 cmp = sort->cmp(pstat, data); 506 if (cmp) 507 break; 508 } 509 510 if (cmp < 0) 511 node = &parent->rb_left; 512 else if (cmp > 0) 513 node = &parent->rb_right; 514 else 515 return data; 516 } 517 518 if (!create) 519 return NULL; 520 521 data = zalloc(sizeof(*data)); 522 if (data != NULL) { 523 data->page = pstat->page; 524 data->order = pstat->order; 525 data->gfp_flags = pstat->gfp_flags; 526 data->migrate_type = pstat->migrate_type; 527 528 rb_link_node(&data->node, parent, node); 529 rb_insert_color(&data->node, &page_alloc_tree); 530 } 531 532 return data; 533 } 534 535 static struct page_stat *page_stat__find_alloc(struct page_stat *pstat) 536 { 537 return __page_stat__findnew_alloc(pstat, false); 538 } 539 540 static struct page_stat *page_stat__findnew_alloc(struct page_stat *pstat) 541 { 542 return __page_stat__findnew_alloc(pstat, true); 543 } 544 545 static struct page_stat * 546 __page_stat__findnew_caller(struct page_stat *pstat, bool create) 547 { 548 struct rb_node **node = &page_caller_tree.rb_node; 549 struct rb_node *parent = NULL; 550 struct page_stat *data; 551 struct sort_dimension *sort; 552 553 while (*node) { 554 int cmp = 0; 555 556 parent = *node; 557 data = rb_entry(*node, struct page_stat, node); 558 559 list_for_each_entry(sort, &page_caller_sort_input, list) { 560 cmp = sort->cmp(pstat, data); 561 if (cmp) 562 break; 563 } 564 565 if (cmp < 0) 566 node = &parent->rb_left; 567 else if (cmp > 0) 568 node = &parent->rb_right; 569 else 570 return data; 571 } 572 573 if (!create) 574 return NULL; 575 576 data = zalloc(sizeof(*data)); 577 if (data != NULL) { 578 data->callsite = pstat->callsite; 579 data->order = pstat->order; 580 data->gfp_flags = pstat->gfp_flags; 581 data->migrate_type = pstat->migrate_type; 582 583 rb_link_node(&data->node, parent, node); 584 rb_insert_color(&data->node, &page_caller_tree); 585 } 586 587 return data; 588 } 589 590 static struct page_stat *page_stat__find_caller(struct page_stat *pstat) 591 { 592 return __page_stat__findnew_caller(pstat, false); 593 } 594 595 static struct page_stat *page_stat__findnew_caller(struct page_stat *pstat) 596 { 597 return __page_stat__findnew_caller(pstat, true); 598 } 599 600 static bool valid_page(u64 pfn_or_page) 601 { 602 if (use_pfn && pfn_or_page == -1UL) 603 return false; 604 if (!use_pfn && pfn_or_page == 0) 605 return false; 606 return true; 607 } 608 609 struct gfp_flag { 610 unsigned int flags; 611 char *compact_str; 612 char *human_readable; 613 }; 614 615 static struct gfp_flag *gfps; 616 static int nr_gfps; 617 618 static int gfpcmp(const void *a, const void *b) 619 { 620 const struct gfp_flag *fa = a; 621 const struct gfp_flag *fb = b; 622 623 return fa->flags - fb->flags; 624 } 625 626 /* see include/trace/events/mmflags.h */ 627 static const struct { 628 const char *original; 629 const char *compact; 630 } gfp_compact_table[] = { 631 { "GFP_TRANSHUGE", "THP" }, 632 { "GFP_TRANSHUGE_LIGHT", "THL" }, 633 { "GFP_HIGHUSER_MOVABLE", "HUM" }, 634 { "GFP_HIGHUSER", "HU" }, 635 { "GFP_USER", "U" }, 636 { "GFP_KERNEL_ACCOUNT", "KAC" }, 637 { "GFP_KERNEL", "K" }, 638 { "GFP_NOFS", "NF" }, 639 { "GFP_ATOMIC", "A" }, 640 { "GFP_NOIO", "NI" }, 641 { "GFP_NOWAIT", "NW" }, 642 { "GFP_DMA", "D" }, 643 { "__GFP_HIGHMEM", "HM" }, 644 { "GFP_DMA32", "D32" }, 645 { "__GFP_HIGH", "H" }, 646 { "__GFP_ATOMIC", "_A" }, 647 { "__GFP_IO", "I" }, 648 { "__GFP_FS", "F" }, 649 { "__GFP_NOWARN", "NWR" }, 650 { "__GFP_RETRY_MAYFAIL", "R" }, 651 { "__GFP_NOFAIL", "NF" }, 652 { "__GFP_NORETRY", "NR" }, 653 { "__GFP_COMP", "C" }, 654 { "__GFP_ZERO", "Z" }, 655 { "__GFP_NOMEMALLOC", "NMA" }, 656 { "__GFP_MEMALLOC", "MA" }, 657 { "__GFP_HARDWALL", "HW" }, 658 { "__GFP_THISNODE", "TN" }, 659 { "__GFP_RECLAIMABLE", "RC" }, 660 { "__GFP_MOVABLE", "M" }, 661 { "__GFP_ACCOUNT", "AC" }, 662 { "__GFP_WRITE", "WR" }, 663 { "__GFP_RECLAIM", "R" }, 664 { "__GFP_DIRECT_RECLAIM", "DR" }, 665 { "__GFP_KSWAPD_RECLAIM", "KR" }, 666 }; 667 668 static size_t max_gfp_len; 669 670 static char *compact_gfp_flags(char *gfp_flags) 671 { 672 char *orig_flags = strdup(gfp_flags); 673 char *new_flags = NULL; 674 char *str, *pos = NULL; 675 size_t len = 0; 676 677 if (orig_flags == NULL) 678 return NULL; 679 680 str = strtok_r(orig_flags, "|", &pos); 681 while (str) { 682 size_t i; 683 char *new; 684 const char *cpt; 685 686 for (i = 0; i < ARRAY_SIZE(gfp_compact_table); i++) { 687 if (strcmp(gfp_compact_table[i].original, str)) 688 continue; 689 690 cpt = gfp_compact_table[i].compact; 691 new = realloc(new_flags, len + strlen(cpt) + 2); 692 if (new == NULL) { 693 free(new_flags); 694 free(orig_flags); 695 return NULL; 696 } 697 698 new_flags = new; 699 700 if (!len) { 701 strcpy(new_flags, cpt); 702 } else { 703 strcat(new_flags, "|"); 704 strcat(new_flags, cpt); 705 len++; 706 } 707 708 len += strlen(cpt); 709 } 710 711 str = strtok_r(NULL, "|", &pos); 712 } 713 714 if (max_gfp_len < len) 715 max_gfp_len = len; 716 717 free(orig_flags); 718 return new_flags; 719 } 720 721 static char *compact_gfp_string(unsigned long gfp_flags) 722 { 723 struct gfp_flag key = { 724 .flags = gfp_flags, 725 }; 726 struct gfp_flag *gfp; 727 728 gfp = bsearch(&key, gfps, nr_gfps, sizeof(*gfps), gfpcmp); 729 if (gfp) 730 return gfp->compact_str; 731 732 return NULL; 733 } 734 735 static int parse_gfp_flags(struct evsel *evsel, struct perf_sample *sample, 736 unsigned int gfp_flags) 737 { 738 struct tep_record record = { 739 .cpu = sample->cpu, 740 .data = sample->raw_data, 741 .size = sample->raw_size, 742 }; 743 struct trace_seq seq; 744 char *str, *pos = NULL; 745 746 if (nr_gfps) { 747 struct gfp_flag key = { 748 .flags = gfp_flags, 749 }; 750 751 if (bsearch(&key, gfps, nr_gfps, sizeof(*gfps), gfpcmp)) 752 return 0; 753 } 754 755 trace_seq_init(&seq); 756 tep_print_event(evsel->tp_format->tep, 757 &seq, &record, "%s", TEP_PRINT_INFO); 758 759 str = strtok_r(seq.buffer, " ", &pos); 760 while (str) { 761 if (!strncmp(str, "gfp_flags=", 10)) { 762 struct gfp_flag *new; 763 764 new = realloc(gfps, (nr_gfps + 1) * sizeof(*gfps)); 765 if (new == NULL) 766 return -ENOMEM; 767 768 gfps = new; 769 new += nr_gfps++; 770 771 new->flags = gfp_flags; 772 new->human_readable = strdup(str + 10); 773 new->compact_str = compact_gfp_flags(str + 10); 774 if (!new->human_readable || !new->compact_str) 775 return -ENOMEM; 776 777 qsort(gfps, nr_gfps, sizeof(*gfps), gfpcmp); 778 } 779 780 str = strtok_r(NULL, " ", &pos); 781 } 782 783 trace_seq_destroy(&seq); 784 return 0; 785 } 786 787 static int perf_evsel__process_page_alloc_event(struct evsel *evsel, 788 struct perf_sample *sample) 789 { 790 u64 page; 791 unsigned int order = perf_evsel__intval(evsel, sample, "order"); 792 unsigned int gfp_flags = perf_evsel__intval(evsel, sample, "gfp_flags"); 793 unsigned int migrate_type = perf_evsel__intval(evsel, sample, 794 "migratetype"); 795 u64 bytes = kmem_page_size << order; 796 u64 callsite; 797 struct page_stat *pstat; 798 struct page_stat this = { 799 .order = order, 800 .gfp_flags = gfp_flags, 801 .migrate_type = migrate_type, 802 }; 803 804 if (use_pfn) 805 page = perf_evsel__intval(evsel, sample, "pfn"); 806 else 807 page = perf_evsel__intval(evsel, sample, "page"); 808 809 nr_page_allocs++; 810 total_page_alloc_bytes += bytes; 811 812 if (!valid_page(page)) { 813 nr_page_fails++; 814 total_page_fail_bytes += bytes; 815 816 return 0; 817 } 818 819 if (parse_gfp_flags(evsel, sample, gfp_flags) < 0) 820 return -1; 821 822 callsite = find_callsite(evsel, sample); 823 824 /* 825 * This is to find the current page (with correct gfp flags and 826 * migrate type) at free event. 827 */ 828 this.page = page; 829 pstat = page_stat__findnew_page(&this); 830 if (pstat == NULL) 831 return -ENOMEM; 832 833 pstat->nr_alloc++; 834 pstat->alloc_bytes += bytes; 835 pstat->callsite = callsite; 836 837 if (!live_page) { 838 pstat = page_stat__findnew_alloc(&this); 839 if (pstat == NULL) 840 return -ENOMEM; 841 842 pstat->nr_alloc++; 843 pstat->alloc_bytes += bytes; 844 pstat->callsite = callsite; 845 } 846 847 this.callsite = callsite; 848 pstat = page_stat__findnew_caller(&this); 849 if (pstat == NULL) 850 return -ENOMEM; 851 852 pstat->nr_alloc++; 853 pstat->alloc_bytes += bytes; 854 855 order_stats[order][migrate_type]++; 856 857 return 0; 858 } 859 860 static int perf_evsel__process_page_free_event(struct evsel *evsel, 861 struct perf_sample *sample) 862 { 863 u64 page; 864 unsigned int order = perf_evsel__intval(evsel, sample, "order"); 865 u64 bytes = kmem_page_size << order; 866 struct page_stat *pstat; 867 struct page_stat this = { 868 .order = order, 869 }; 870 871 if (use_pfn) 872 page = perf_evsel__intval(evsel, sample, "pfn"); 873 else 874 page = perf_evsel__intval(evsel, sample, "page"); 875 876 nr_page_frees++; 877 total_page_free_bytes += bytes; 878 879 this.page = page; 880 pstat = page_stat__find_page(&this); 881 if (pstat == NULL) { 882 pr_debug2("missing free at page %"PRIx64" (order: %d)\n", 883 page, order); 884 885 nr_page_nomatch++; 886 total_page_nomatch_bytes += bytes; 887 888 return 0; 889 } 890 891 this.gfp_flags = pstat->gfp_flags; 892 this.migrate_type = pstat->migrate_type; 893 this.callsite = pstat->callsite; 894 895 rb_erase(&pstat->node, &page_live_tree); 896 free(pstat); 897 898 if (live_page) { 899 order_stats[this.order][this.migrate_type]--; 900 } else { 901 pstat = page_stat__find_alloc(&this); 902 if (pstat == NULL) 903 return -ENOMEM; 904 905 pstat->nr_free++; 906 pstat->free_bytes += bytes; 907 } 908 909 pstat = page_stat__find_caller(&this); 910 if (pstat == NULL) 911 return -ENOENT; 912 913 pstat->nr_free++; 914 pstat->free_bytes += bytes; 915 916 if (live_page) { 917 pstat->nr_alloc--; 918 pstat->alloc_bytes -= bytes; 919 920 if (pstat->nr_alloc == 0) { 921 rb_erase(&pstat->node, &page_caller_tree); 922 free(pstat); 923 } 924 } 925 926 return 0; 927 } 928 929 static bool perf_kmem__skip_sample(struct perf_sample *sample) 930 { 931 /* skip sample based on time? */ 932 if (perf_time__skip_sample(&ptime, sample->time)) 933 return true; 934 935 return false; 936 } 937 938 typedef int (*tracepoint_handler)(struct evsel *evsel, 939 struct perf_sample *sample); 940 941 static int process_sample_event(struct perf_tool *tool __maybe_unused, 942 union perf_event *event, 943 struct perf_sample *sample, 944 struct evsel *evsel, 945 struct machine *machine) 946 { 947 int err = 0; 948 struct thread *thread = machine__findnew_thread(machine, sample->pid, 949 sample->tid); 950 951 if (thread == NULL) { 952 pr_debug("problem processing %d event, skipping it.\n", 953 event->header.type); 954 return -1; 955 } 956 957 if (perf_kmem__skip_sample(sample)) 958 return 0; 959 960 dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid); 961 962 if (evsel->handler != NULL) { 963 tracepoint_handler f = evsel->handler; 964 err = f(evsel, sample); 965 } 966 967 thread__put(thread); 968 969 return err; 970 } 971 972 static struct perf_tool perf_kmem = { 973 .sample = process_sample_event, 974 .comm = perf_event__process_comm, 975 .mmap = perf_event__process_mmap, 976 .mmap2 = perf_event__process_mmap2, 977 .namespaces = perf_event__process_namespaces, 978 .ordered_events = true, 979 }; 980 981 static double fragmentation(unsigned long n_req, unsigned long n_alloc) 982 { 983 if (n_alloc == 0) 984 return 0.0; 985 else 986 return 100.0 - (100.0 * n_req / n_alloc); 987 } 988 989 static void __print_slab_result(struct rb_root *root, 990 struct perf_session *session, 991 int n_lines, int is_caller) 992 { 993 struct rb_node *next; 994 struct machine *machine = &session->machines.host; 995 996 printf("%.105s\n", graph_dotted_line); 997 printf(" %-34s |", is_caller ? "Callsite": "Alloc Ptr"); 998 printf(" Total_alloc/Per | Total_req/Per | Hit | Ping-pong | Frag\n"); 999 printf("%.105s\n", graph_dotted_line); 1000 1001 next = rb_first(root); 1002 1003 while (next && n_lines--) { 1004 struct alloc_stat *data = rb_entry(next, struct alloc_stat, 1005 node); 1006 struct symbol *sym = NULL; 1007 struct map *map; 1008 char buf[BUFSIZ]; 1009 u64 addr; 1010 1011 if (is_caller) { 1012 addr = data->call_site; 1013 if (!raw_ip) 1014 sym = machine__find_kernel_symbol(machine, addr, &map); 1015 } else 1016 addr = data->ptr; 1017 1018 if (sym != NULL) 1019 snprintf(buf, sizeof(buf), "%s+%" PRIx64 "", sym->name, 1020 addr - map->unmap_ip(map, sym->start)); 1021 else 1022 snprintf(buf, sizeof(buf), "%#" PRIx64 "", addr); 1023 printf(" %-34s |", buf); 1024 1025 printf(" %9llu/%-5lu | %9llu/%-5lu | %8lu | %9lu | %6.3f%%\n", 1026 (unsigned long long)data->bytes_alloc, 1027 (unsigned long)data->bytes_alloc / data->hit, 1028 (unsigned long long)data->bytes_req, 1029 (unsigned long)data->bytes_req / data->hit, 1030 (unsigned long)data->hit, 1031 (unsigned long)data->pingpong, 1032 fragmentation(data->bytes_req, data->bytes_alloc)); 1033 1034 next = rb_next(next); 1035 } 1036 1037 if (n_lines == -1) 1038 printf(" ... | ... | ... | ... | ... | ... \n"); 1039 1040 printf("%.105s\n", graph_dotted_line); 1041 } 1042 1043 static const char * const migrate_type_str[] = { 1044 "UNMOVABL", 1045 "RECLAIM", 1046 "MOVABLE", 1047 "RESERVED", 1048 "CMA/ISLT", 1049 "UNKNOWN", 1050 }; 1051 1052 static void __print_page_alloc_result(struct perf_session *session, int n_lines) 1053 { 1054 struct rb_node *next = rb_first(&page_alloc_sorted); 1055 struct machine *machine = &session->machines.host; 1056 const char *format; 1057 int gfp_len = max(strlen("GFP flags"), max_gfp_len); 1058 1059 printf("\n%.105s\n", graph_dotted_line); 1060 printf(" %-16s | %5s alloc (KB) | Hits | Order | Mig.type | %-*s | Callsite\n", 1061 use_pfn ? "PFN" : "Page", live_page ? "Live" : "Total", 1062 gfp_len, "GFP flags"); 1063 printf("%.105s\n", graph_dotted_line); 1064 1065 if (use_pfn) 1066 format = " %16llu | %'16llu | %'9d | %5d | %8s | %-*s | %s\n"; 1067 else 1068 format = " %016llx | %'16llu | %'9d | %5d | %8s | %-*s | %s\n"; 1069 1070 while (next && n_lines--) { 1071 struct page_stat *data; 1072 struct symbol *sym; 1073 struct map *map; 1074 char buf[32]; 1075 char *caller = buf; 1076 1077 data = rb_entry(next, struct page_stat, node); 1078 sym = machine__find_kernel_symbol(machine, data->callsite, &map); 1079 if (sym) 1080 caller = sym->name; 1081 else 1082 scnprintf(buf, sizeof(buf), "%"PRIx64, data->callsite); 1083 1084 printf(format, (unsigned long long)data->page, 1085 (unsigned long long)data->alloc_bytes / 1024, 1086 data->nr_alloc, data->order, 1087 migrate_type_str[data->migrate_type], 1088 gfp_len, compact_gfp_string(data->gfp_flags), caller); 1089 1090 next = rb_next(next); 1091 } 1092 1093 if (n_lines == -1) { 1094 printf(" ... | ... | ... | ... | ... | %-*s | ...\n", 1095 gfp_len, "..."); 1096 } 1097 1098 printf("%.105s\n", graph_dotted_line); 1099 } 1100 1101 static void __print_page_caller_result(struct perf_session *session, int n_lines) 1102 { 1103 struct rb_node *next = rb_first(&page_caller_sorted); 1104 struct machine *machine = &session->machines.host; 1105 int gfp_len = max(strlen("GFP flags"), max_gfp_len); 1106 1107 printf("\n%.105s\n", graph_dotted_line); 1108 printf(" %5s alloc (KB) | Hits | Order | Mig.type | %-*s | Callsite\n", 1109 live_page ? "Live" : "Total", gfp_len, "GFP flags"); 1110 printf("%.105s\n", graph_dotted_line); 1111 1112 while (next && n_lines--) { 1113 struct page_stat *data; 1114 struct symbol *sym; 1115 struct map *map; 1116 char buf[32]; 1117 char *caller = buf; 1118 1119 data = rb_entry(next, struct page_stat, node); 1120 sym = machine__find_kernel_symbol(machine, data->callsite, &map); 1121 if (sym) 1122 caller = sym->name; 1123 else 1124 scnprintf(buf, sizeof(buf), "%"PRIx64, data->callsite); 1125 1126 printf(" %'16llu | %'9d | %5d | %8s | %-*s | %s\n", 1127 (unsigned long long)data->alloc_bytes / 1024, 1128 data->nr_alloc, data->order, 1129 migrate_type_str[data->migrate_type], 1130 gfp_len, compact_gfp_string(data->gfp_flags), caller); 1131 1132 next = rb_next(next); 1133 } 1134 1135 if (n_lines == -1) { 1136 printf(" ... | ... | ... | ... | %-*s | ...\n", 1137 gfp_len, "..."); 1138 } 1139 1140 printf("%.105s\n", graph_dotted_line); 1141 } 1142 1143 static void print_gfp_flags(void) 1144 { 1145 int i; 1146 1147 printf("#\n"); 1148 printf("# GFP flags\n"); 1149 printf("# ---------\n"); 1150 for (i = 0; i < nr_gfps; i++) { 1151 printf("# %08x: %*s: %s\n", gfps[i].flags, 1152 (int) max_gfp_len, gfps[i].compact_str, 1153 gfps[i].human_readable); 1154 } 1155 } 1156 1157 static void print_slab_summary(void) 1158 { 1159 printf("\nSUMMARY (SLAB allocator)"); 1160 printf("\n========================\n"); 1161 printf("Total bytes requested: %'lu\n", total_requested); 1162 printf("Total bytes allocated: %'lu\n", total_allocated); 1163 printf("Total bytes freed: %'lu\n", total_freed); 1164 if (total_allocated > total_freed) { 1165 printf("Net total bytes allocated: %'lu\n", 1166 total_allocated - total_freed); 1167 } 1168 printf("Total bytes wasted on internal fragmentation: %'lu\n", 1169 total_allocated - total_requested); 1170 printf("Internal fragmentation: %f%%\n", 1171 fragmentation(total_requested, total_allocated)); 1172 printf("Cross CPU allocations: %'lu/%'lu\n", nr_cross_allocs, nr_allocs); 1173 } 1174 1175 static void print_page_summary(void) 1176 { 1177 int o, m; 1178 u64 nr_alloc_freed = nr_page_frees - nr_page_nomatch; 1179 u64 total_alloc_freed_bytes = total_page_free_bytes - total_page_nomatch_bytes; 1180 1181 printf("\nSUMMARY (page allocator)"); 1182 printf("\n========================\n"); 1183 printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total allocation requests", 1184 nr_page_allocs, total_page_alloc_bytes / 1024); 1185 printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total free requests", 1186 nr_page_frees, total_page_free_bytes / 1024); 1187 printf("\n"); 1188 1189 printf("%-30s: %'16"PRIu64" [ %'16"PRIu64" KB ]\n", "Total alloc+freed requests", 1190 nr_alloc_freed, (total_alloc_freed_bytes) / 1024); 1191 printf("%-30s: %'16"PRIu64" [ %'16"PRIu64" KB ]\n", "Total alloc-only requests", 1192 nr_page_allocs - nr_alloc_freed, 1193 (total_page_alloc_bytes - total_alloc_freed_bytes) / 1024); 1194 printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total free-only requests", 1195 nr_page_nomatch, total_page_nomatch_bytes / 1024); 1196 printf("\n"); 1197 1198 printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total allocation failures", 1199 nr_page_fails, total_page_fail_bytes / 1024); 1200 printf("\n"); 1201 1202 printf("%5s %12s %12s %12s %12s %12s\n", "Order", "Unmovable", 1203 "Reclaimable", "Movable", "Reserved", "CMA/Isolated"); 1204 printf("%.5s %.12s %.12s %.12s %.12s %.12s\n", graph_dotted_line, 1205 graph_dotted_line, graph_dotted_line, graph_dotted_line, 1206 graph_dotted_line, graph_dotted_line); 1207 1208 for (o = 0; o < MAX_PAGE_ORDER; o++) { 1209 printf("%5d", o); 1210 for (m = 0; m < MAX_MIGRATE_TYPES - 1; m++) { 1211 if (order_stats[o][m]) 1212 printf(" %'12d", order_stats[o][m]); 1213 else 1214 printf(" %12c", '.'); 1215 } 1216 printf("\n"); 1217 } 1218 } 1219 1220 static void print_slab_result(struct perf_session *session) 1221 { 1222 if (caller_flag) 1223 __print_slab_result(&root_caller_sorted, session, caller_lines, 1); 1224 if (alloc_flag) 1225 __print_slab_result(&root_alloc_sorted, session, alloc_lines, 0); 1226 print_slab_summary(); 1227 } 1228 1229 static void print_page_result(struct perf_session *session) 1230 { 1231 if (caller_flag || alloc_flag) 1232 print_gfp_flags(); 1233 if (caller_flag) 1234 __print_page_caller_result(session, caller_lines); 1235 if (alloc_flag) 1236 __print_page_alloc_result(session, alloc_lines); 1237 print_page_summary(); 1238 } 1239 1240 static void print_result(struct perf_session *session) 1241 { 1242 if (kmem_slab) 1243 print_slab_result(session); 1244 if (kmem_page) 1245 print_page_result(session); 1246 } 1247 1248 static LIST_HEAD(slab_caller_sort); 1249 static LIST_HEAD(slab_alloc_sort); 1250 static LIST_HEAD(page_caller_sort); 1251 static LIST_HEAD(page_alloc_sort); 1252 1253 static void sort_slab_insert(struct rb_root *root, struct alloc_stat *data, 1254 struct list_head *sort_list) 1255 { 1256 struct rb_node **new = &(root->rb_node); 1257 struct rb_node *parent = NULL; 1258 struct sort_dimension *sort; 1259 1260 while (*new) { 1261 struct alloc_stat *this; 1262 int cmp = 0; 1263 1264 this = rb_entry(*new, struct alloc_stat, node); 1265 parent = *new; 1266 1267 list_for_each_entry(sort, sort_list, list) { 1268 cmp = sort->cmp(data, this); 1269 if (cmp) 1270 break; 1271 } 1272 1273 if (cmp > 0) 1274 new = &((*new)->rb_left); 1275 else 1276 new = &((*new)->rb_right); 1277 } 1278 1279 rb_link_node(&data->node, parent, new); 1280 rb_insert_color(&data->node, root); 1281 } 1282 1283 static void __sort_slab_result(struct rb_root *root, struct rb_root *root_sorted, 1284 struct list_head *sort_list) 1285 { 1286 struct rb_node *node; 1287 struct alloc_stat *data; 1288 1289 for (;;) { 1290 node = rb_first(root); 1291 if (!node) 1292 break; 1293 1294 rb_erase(node, root); 1295 data = rb_entry(node, struct alloc_stat, node); 1296 sort_slab_insert(root_sorted, data, sort_list); 1297 } 1298 } 1299 1300 static void sort_page_insert(struct rb_root *root, struct page_stat *data, 1301 struct list_head *sort_list) 1302 { 1303 struct rb_node **new = &root->rb_node; 1304 struct rb_node *parent = NULL; 1305 struct sort_dimension *sort; 1306 1307 while (*new) { 1308 struct page_stat *this; 1309 int cmp = 0; 1310 1311 this = rb_entry(*new, struct page_stat, node); 1312 parent = *new; 1313 1314 list_for_each_entry(sort, sort_list, list) { 1315 cmp = sort->cmp(data, this); 1316 if (cmp) 1317 break; 1318 } 1319 1320 if (cmp > 0) 1321 new = &parent->rb_left; 1322 else 1323 new = &parent->rb_right; 1324 } 1325 1326 rb_link_node(&data->node, parent, new); 1327 rb_insert_color(&data->node, root); 1328 } 1329 1330 static void __sort_page_result(struct rb_root *root, struct rb_root *root_sorted, 1331 struct list_head *sort_list) 1332 { 1333 struct rb_node *node; 1334 struct page_stat *data; 1335 1336 for (;;) { 1337 node = rb_first(root); 1338 if (!node) 1339 break; 1340 1341 rb_erase(node, root); 1342 data = rb_entry(node, struct page_stat, node); 1343 sort_page_insert(root_sorted, data, sort_list); 1344 } 1345 } 1346 1347 static void sort_result(void) 1348 { 1349 if (kmem_slab) { 1350 __sort_slab_result(&root_alloc_stat, &root_alloc_sorted, 1351 &slab_alloc_sort); 1352 __sort_slab_result(&root_caller_stat, &root_caller_sorted, 1353 &slab_caller_sort); 1354 } 1355 if (kmem_page) { 1356 if (live_page) 1357 __sort_page_result(&page_live_tree, &page_alloc_sorted, 1358 &page_alloc_sort); 1359 else 1360 __sort_page_result(&page_alloc_tree, &page_alloc_sorted, 1361 &page_alloc_sort); 1362 1363 __sort_page_result(&page_caller_tree, &page_caller_sorted, 1364 &page_caller_sort); 1365 } 1366 } 1367 1368 static int __cmd_kmem(struct perf_session *session) 1369 { 1370 int err = -EINVAL; 1371 struct evsel *evsel; 1372 const struct evsel_str_handler kmem_tracepoints[] = { 1373 /* slab allocator */ 1374 { "kmem:kmalloc", perf_evsel__process_alloc_event, }, 1375 { "kmem:kmem_cache_alloc", perf_evsel__process_alloc_event, }, 1376 { "kmem:kmalloc_node", perf_evsel__process_alloc_node_event, }, 1377 { "kmem:kmem_cache_alloc_node", perf_evsel__process_alloc_node_event, }, 1378 { "kmem:kfree", perf_evsel__process_free_event, }, 1379 { "kmem:kmem_cache_free", perf_evsel__process_free_event, }, 1380 /* page allocator */ 1381 { "kmem:mm_page_alloc", perf_evsel__process_page_alloc_event, }, 1382 { "kmem:mm_page_free", perf_evsel__process_page_free_event, }, 1383 }; 1384 1385 if (!perf_session__has_traces(session, "kmem record")) 1386 goto out; 1387 1388 if (perf_session__set_tracepoints_handlers(session, kmem_tracepoints)) { 1389 pr_err("Initializing perf session tracepoint handlers failed\n"); 1390 goto out; 1391 } 1392 1393 evlist__for_each_entry(session->evlist, evsel) { 1394 if (!strcmp(perf_evsel__name(evsel), "kmem:mm_page_alloc") && 1395 perf_evsel__field(evsel, "pfn")) { 1396 use_pfn = true; 1397 break; 1398 } 1399 } 1400 1401 setup_pager(); 1402 err = perf_session__process_events(session); 1403 if (err != 0) { 1404 pr_err("error during process events: %d\n", err); 1405 goto out; 1406 } 1407 sort_result(); 1408 print_result(session); 1409 out: 1410 return err; 1411 } 1412 1413 /* slab sort keys */ 1414 static int ptr_cmp(void *a, void *b) 1415 { 1416 struct alloc_stat *l = a; 1417 struct alloc_stat *r = b; 1418 1419 if (l->ptr < r->ptr) 1420 return -1; 1421 else if (l->ptr > r->ptr) 1422 return 1; 1423 return 0; 1424 } 1425 1426 static struct sort_dimension ptr_sort_dimension = { 1427 .name = "ptr", 1428 .cmp = ptr_cmp, 1429 }; 1430 1431 static int slab_callsite_cmp(void *a, void *b) 1432 { 1433 struct alloc_stat *l = a; 1434 struct alloc_stat *r = b; 1435 1436 if (l->call_site < r->call_site) 1437 return -1; 1438 else if (l->call_site > r->call_site) 1439 return 1; 1440 return 0; 1441 } 1442 1443 static struct sort_dimension callsite_sort_dimension = { 1444 .name = "callsite", 1445 .cmp = slab_callsite_cmp, 1446 }; 1447 1448 static int hit_cmp(void *a, void *b) 1449 { 1450 struct alloc_stat *l = a; 1451 struct alloc_stat *r = b; 1452 1453 if (l->hit < r->hit) 1454 return -1; 1455 else if (l->hit > r->hit) 1456 return 1; 1457 return 0; 1458 } 1459 1460 static struct sort_dimension hit_sort_dimension = { 1461 .name = "hit", 1462 .cmp = hit_cmp, 1463 }; 1464 1465 static int bytes_cmp(void *a, void *b) 1466 { 1467 struct alloc_stat *l = a; 1468 struct alloc_stat *r = b; 1469 1470 if (l->bytes_alloc < r->bytes_alloc) 1471 return -1; 1472 else if (l->bytes_alloc > r->bytes_alloc) 1473 return 1; 1474 return 0; 1475 } 1476 1477 static struct sort_dimension bytes_sort_dimension = { 1478 .name = "bytes", 1479 .cmp = bytes_cmp, 1480 }; 1481 1482 static int frag_cmp(void *a, void *b) 1483 { 1484 double x, y; 1485 struct alloc_stat *l = a; 1486 struct alloc_stat *r = b; 1487 1488 x = fragmentation(l->bytes_req, l->bytes_alloc); 1489 y = fragmentation(r->bytes_req, r->bytes_alloc); 1490 1491 if (x < y) 1492 return -1; 1493 else if (x > y) 1494 return 1; 1495 return 0; 1496 } 1497 1498 static struct sort_dimension frag_sort_dimension = { 1499 .name = "frag", 1500 .cmp = frag_cmp, 1501 }; 1502 1503 static int pingpong_cmp(void *a, void *b) 1504 { 1505 struct alloc_stat *l = a; 1506 struct alloc_stat *r = b; 1507 1508 if (l->pingpong < r->pingpong) 1509 return -1; 1510 else if (l->pingpong > r->pingpong) 1511 return 1; 1512 return 0; 1513 } 1514 1515 static struct sort_dimension pingpong_sort_dimension = { 1516 .name = "pingpong", 1517 .cmp = pingpong_cmp, 1518 }; 1519 1520 /* page sort keys */ 1521 static int page_cmp(void *a, void *b) 1522 { 1523 struct page_stat *l = a; 1524 struct page_stat *r = b; 1525 1526 if (l->page < r->page) 1527 return -1; 1528 else if (l->page > r->page) 1529 return 1; 1530 return 0; 1531 } 1532 1533 static struct sort_dimension page_sort_dimension = { 1534 .name = "page", 1535 .cmp = page_cmp, 1536 }; 1537 1538 static int page_callsite_cmp(void *a, void *b) 1539 { 1540 struct page_stat *l = a; 1541 struct page_stat *r = b; 1542 1543 if (l->callsite < r->callsite) 1544 return -1; 1545 else if (l->callsite > r->callsite) 1546 return 1; 1547 return 0; 1548 } 1549 1550 static struct sort_dimension page_callsite_sort_dimension = { 1551 .name = "callsite", 1552 .cmp = page_callsite_cmp, 1553 }; 1554 1555 static int page_hit_cmp(void *a, void *b) 1556 { 1557 struct page_stat *l = a; 1558 struct page_stat *r = b; 1559 1560 if (l->nr_alloc < r->nr_alloc) 1561 return -1; 1562 else if (l->nr_alloc > r->nr_alloc) 1563 return 1; 1564 return 0; 1565 } 1566 1567 static struct sort_dimension page_hit_sort_dimension = { 1568 .name = "hit", 1569 .cmp = page_hit_cmp, 1570 }; 1571 1572 static int page_bytes_cmp(void *a, void *b) 1573 { 1574 struct page_stat *l = a; 1575 struct page_stat *r = b; 1576 1577 if (l->alloc_bytes < r->alloc_bytes) 1578 return -1; 1579 else if (l->alloc_bytes > r->alloc_bytes) 1580 return 1; 1581 return 0; 1582 } 1583 1584 static struct sort_dimension page_bytes_sort_dimension = { 1585 .name = "bytes", 1586 .cmp = page_bytes_cmp, 1587 }; 1588 1589 static int page_order_cmp(void *a, void *b) 1590 { 1591 struct page_stat *l = a; 1592 struct page_stat *r = b; 1593 1594 if (l->order < r->order) 1595 return -1; 1596 else if (l->order > r->order) 1597 return 1; 1598 return 0; 1599 } 1600 1601 static struct sort_dimension page_order_sort_dimension = { 1602 .name = "order", 1603 .cmp = page_order_cmp, 1604 }; 1605 1606 static int migrate_type_cmp(void *a, void *b) 1607 { 1608 struct page_stat *l = a; 1609 struct page_stat *r = b; 1610 1611 /* for internal use to find free'd page */ 1612 if (l->migrate_type == -1U) 1613 return 0; 1614 1615 if (l->migrate_type < r->migrate_type) 1616 return -1; 1617 else if (l->migrate_type > r->migrate_type) 1618 return 1; 1619 return 0; 1620 } 1621 1622 static struct sort_dimension migrate_type_sort_dimension = { 1623 .name = "migtype", 1624 .cmp = migrate_type_cmp, 1625 }; 1626 1627 static int gfp_flags_cmp(void *a, void *b) 1628 { 1629 struct page_stat *l = a; 1630 struct page_stat *r = b; 1631 1632 /* for internal use to find free'd page */ 1633 if (l->gfp_flags == -1U) 1634 return 0; 1635 1636 if (l->gfp_flags < r->gfp_flags) 1637 return -1; 1638 else if (l->gfp_flags > r->gfp_flags) 1639 return 1; 1640 return 0; 1641 } 1642 1643 static struct sort_dimension gfp_flags_sort_dimension = { 1644 .name = "gfp", 1645 .cmp = gfp_flags_cmp, 1646 }; 1647 1648 static struct sort_dimension *slab_sorts[] = { 1649 &ptr_sort_dimension, 1650 &callsite_sort_dimension, 1651 &hit_sort_dimension, 1652 &bytes_sort_dimension, 1653 &frag_sort_dimension, 1654 &pingpong_sort_dimension, 1655 }; 1656 1657 static struct sort_dimension *page_sorts[] = { 1658 &page_sort_dimension, 1659 &page_callsite_sort_dimension, 1660 &page_hit_sort_dimension, 1661 &page_bytes_sort_dimension, 1662 &page_order_sort_dimension, 1663 &migrate_type_sort_dimension, 1664 &gfp_flags_sort_dimension, 1665 }; 1666 1667 static int slab_sort_dimension__add(const char *tok, struct list_head *list) 1668 { 1669 struct sort_dimension *sort; 1670 int i; 1671 1672 for (i = 0; i < (int)ARRAY_SIZE(slab_sorts); i++) { 1673 if (!strcmp(slab_sorts[i]->name, tok)) { 1674 sort = memdup(slab_sorts[i], sizeof(*slab_sorts[i])); 1675 if (!sort) { 1676 pr_err("%s: memdup failed\n", __func__); 1677 return -1; 1678 } 1679 list_add_tail(&sort->list, list); 1680 return 0; 1681 } 1682 } 1683 1684 return -1; 1685 } 1686 1687 static int page_sort_dimension__add(const char *tok, struct list_head *list) 1688 { 1689 struct sort_dimension *sort; 1690 int i; 1691 1692 for (i = 0; i < (int)ARRAY_SIZE(page_sorts); i++) { 1693 if (!strcmp(page_sorts[i]->name, tok)) { 1694 sort = memdup(page_sorts[i], sizeof(*page_sorts[i])); 1695 if (!sort) { 1696 pr_err("%s: memdup failed\n", __func__); 1697 return -1; 1698 } 1699 list_add_tail(&sort->list, list); 1700 return 0; 1701 } 1702 } 1703 1704 return -1; 1705 } 1706 1707 static int setup_slab_sorting(struct list_head *sort_list, const char *arg) 1708 { 1709 char *tok; 1710 char *str = strdup(arg); 1711 char *pos = str; 1712 1713 if (!str) { 1714 pr_err("%s: strdup failed\n", __func__); 1715 return -1; 1716 } 1717 1718 while (true) { 1719 tok = strsep(&pos, ","); 1720 if (!tok) 1721 break; 1722 if (slab_sort_dimension__add(tok, sort_list) < 0) { 1723 pr_err("Unknown slab --sort key: '%s'", tok); 1724 free(str); 1725 return -1; 1726 } 1727 } 1728 1729 free(str); 1730 return 0; 1731 } 1732 1733 static int setup_page_sorting(struct list_head *sort_list, const char *arg) 1734 { 1735 char *tok; 1736 char *str = strdup(arg); 1737 char *pos = str; 1738 1739 if (!str) { 1740 pr_err("%s: strdup failed\n", __func__); 1741 return -1; 1742 } 1743 1744 while (true) { 1745 tok = strsep(&pos, ","); 1746 if (!tok) 1747 break; 1748 if (page_sort_dimension__add(tok, sort_list) < 0) { 1749 pr_err("Unknown page --sort key: '%s'", tok); 1750 free(str); 1751 return -1; 1752 } 1753 } 1754 1755 free(str); 1756 return 0; 1757 } 1758 1759 static int parse_sort_opt(const struct option *opt __maybe_unused, 1760 const char *arg, int unset __maybe_unused) 1761 { 1762 if (!arg) 1763 return -1; 1764 1765 if (kmem_page > kmem_slab || 1766 (kmem_page == 0 && kmem_slab == 0 && kmem_default == KMEM_PAGE)) { 1767 if (caller_flag > alloc_flag) 1768 return setup_page_sorting(&page_caller_sort, arg); 1769 else 1770 return setup_page_sorting(&page_alloc_sort, arg); 1771 } else { 1772 if (caller_flag > alloc_flag) 1773 return setup_slab_sorting(&slab_caller_sort, arg); 1774 else 1775 return setup_slab_sorting(&slab_alloc_sort, arg); 1776 } 1777 1778 return 0; 1779 } 1780 1781 static int parse_caller_opt(const struct option *opt __maybe_unused, 1782 const char *arg __maybe_unused, 1783 int unset __maybe_unused) 1784 { 1785 caller_flag = (alloc_flag + 1); 1786 return 0; 1787 } 1788 1789 static int parse_alloc_opt(const struct option *opt __maybe_unused, 1790 const char *arg __maybe_unused, 1791 int unset __maybe_unused) 1792 { 1793 alloc_flag = (caller_flag + 1); 1794 return 0; 1795 } 1796 1797 static int parse_slab_opt(const struct option *opt __maybe_unused, 1798 const char *arg __maybe_unused, 1799 int unset __maybe_unused) 1800 { 1801 kmem_slab = (kmem_page + 1); 1802 return 0; 1803 } 1804 1805 static int parse_page_opt(const struct option *opt __maybe_unused, 1806 const char *arg __maybe_unused, 1807 int unset __maybe_unused) 1808 { 1809 kmem_page = (kmem_slab + 1); 1810 return 0; 1811 } 1812 1813 static int parse_line_opt(const struct option *opt __maybe_unused, 1814 const char *arg, int unset __maybe_unused) 1815 { 1816 int lines; 1817 1818 if (!arg) 1819 return -1; 1820 1821 lines = strtoul(arg, NULL, 10); 1822 1823 if (caller_flag > alloc_flag) 1824 caller_lines = lines; 1825 else 1826 alloc_lines = lines; 1827 1828 return 0; 1829 } 1830 1831 static int __cmd_record(int argc, const char **argv) 1832 { 1833 const char * const record_args[] = { 1834 "record", "-a", "-R", "-c", "1", 1835 }; 1836 const char * const slab_events[] = { 1837 "-e", "kmem:kmalloc", 1838 "-e", "kmem:kmalloc_node", 1839 "-e", "kmem:kfree", 1840 "-e", "kmem:kmem_cache_alloc", 1841 "-e", "kmem:kmem_cache_alloc_node", 1842 "-e", "kmem:kmem_cache_free", 1843 }; 1844 const char * const page_events[] = { 1845 "-e", "kmem:mm_page_alloc", 1846 "-e", "kmem:mm_page_free", 1847 }; 1848 unsigned int rec_argc, i, j; 1849 const char **rec_argv; 1850 1851 rec_argc = ARRAY_SIZE(record_args) + argc - 1; 1852 if (kmem_slab) 1853 rec_argc += ARRAY_SIZE(slab_events); 1854 if (kmem_page) 1855 rec_argc += ARRAY_SIZE(page_events) + 1; /* for -g */ 1856 1857 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 1858 1859 if (rec_argv == NULL) 1860 return -ENOMEM; 1861 1862 for (i = 0; i < ARRAY_SIZE(record_args); i++) 1863 rec_argv[i] = strdup(record_args[i]); 1864 1865 if (kmem_slab) { 1866 for (j = 0; j < ARRAY_SIZE(slab_events); j++, i++) 1867 rec_argv[i] = strdup(slab_events[j]); 1868 } 1869 if (kmem_page) { 1870 rec_argv[i++] = strdup("-g"); 1871 1872 for (j = 0; j < ARRAY_SIZE(page_events); j++, i++) 1873 rec_argv[i] = strdup(page_events[j]); 1874 } 1875 1876 for (j = 1; j < (unsigned int)argc; j++, i++) 1877 rec_argv[i] = argv[j]; 1878 1879 return cmd_record(i, rec_argv); 1880 } 1881 1882 static int kmem_config(const char *var, const char *value, void *cb __maybe_unused) 1883 { 1884 if (!strcmp(var, "kmem.default")) { 1885 if (!strcmp(value, "slab")) 1886 kmem_default = KMEM_SLAB; 1887 else if (!strcmp(value, "page")) 1888 kmem_default = KMEM_PAGE; 1889 else 1890 pr_err("invalid default value ('slab' or 'page' required): %s\n", 1891 value); 1892 return 0; 1893 } 1894 1895 return 0; 1896 } 1897 1898 int cmd_kmem(int argc, const char **argv) 1899 { 1900 const char * const default_slab_sort = "frag,hit,bytes"; 1901 const char * const default_page_sort = "bytes,hit"; 1902 struct perf_data data = { 1903 .mode = PERF_DATA_MODE_READ, 1904 }; 1905 const struct option kmem_options[] = { 1906 OPT_STRING('i', "input", &input_name, "file", "input file name"), 1907 OPT_INCR('v', "verbose", &verbose, 1908 "be more verbose (show symbol address, etc)"), 1909 OPT_CALLBACK_NOOPT(0, "caller", NULL, NULL, 1910 "show per-callsite statistics", parse_caller_opt), 1911 OPT_CALLBACK_NOOPT(0, "alloc", NULL, NULL, 1912 "show per-allocation statistics", parse_alloc_opt), 1913 OPT_CALLBACK('s', "sort", NULL, "key[,key2...]", 1914 "sort by keys: ptr, callsite, bytes, hit, pingpong, frag, " 1915 "page, order, migtype, gfp", parse_sort_opt), 1916 OPT_CALLBACK('l', "line", NULL, "num", "show n lines", parse_line_opt), 1917 OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"), 1918 OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"), 1919 OPT_CALLBACK_NOOPT(0, "slab", NULL, NULL, "Analyze slab allocator", 1920 parse_slab_opt), 1921 OPT_CALLBACK_NOOPT(0, "page", NULL, NULL, "Analyze page allocator", 1922 parse_page_opt), 1923 OPT_BOOLEAN(0, "live", &live_page, "Show live page stat"), 1924 OPT_STRING(0, "time", &time_str, "str", 1925 "Time span of interest (start,stop)"), 1926 OPT_END() 1927 }; 1928 const char *const kmem_subcommands[] = { "record", "stat", NULL }; 1929 const char *kmem_usage[] = { 1930 NULL, 1931 NULL 1932 }; 1933 struct perf_session *session; 1934 static const char errmsg[] = "No %s allocation events found. Have you run 'perf kmem record --%s'?\n"; 1935 int ret = perf_config(kmem_config, NULL); 1936 1937 if (ret) 1938 return ret; 1939 1940 argc = parse_options_subcommand(argc, argv, kmem_options, 1941 kmem_subcommands, kmem_usage, 0); 1942 1943 if (!argc) 1944 usage_with_options(kmem_usage, kmem_options); 1945 1946 if (kmem_slab == 0 && kmem_page == 0) { 1947 if (kmem_default == KMEM_SLAB) 1948 kmem_slab = 1; 1949 else 1950 kmem_page = 1; 1951 } 1952 1953 if (!strncmp(argv[0], "rec", 3)) { 1954 symbol__init(NULL); 1955 return __cmd_record(argc, argv); 1956 } 1957 1958 data.path = input_name; 1959 1960 kmem_session = session = perf_session__new(&data, false, &perf_kmem); 1961 if (IS_ERR(session)) 1962 return PTR_ERR(session); 1963 1964 ret = -1; 1965 1966 if (kmem_slab) { 1967 if (!perf_evlist__find_tracepoint_by_name(session->evlist, 1968 "kmem:kmalloc")) { 1969 pr_err(errmsg, "slab", "slab"); 1970 goto out_delete; 1971 } 1972 } 1973 1974 if (kmem_page) { 1975 struct evsel *evsel; 1976 1977 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 1978 "kmem:mm_page_alloc"); 1979 if (evsel == NULL) { 1980 pr_err(errmsg, "page", "page"); 1981 goto out_delete; 1982 } 1983 1984 kmem_page_size = tep_get_page_size(evsel->tp_format->tep); 1985 symbol_conf.use_callchain = true; 1986 } 1987 1988 symbol__init(&session->header.env); 1989 1990 if (perf_time__parse_str(&ptime, time_str) != 0) { 1991 pr_err("Invalid time string\n"); 1992 ret = -EINVAL; 1993 goto out_delete; 1994 } 1995 1996 if (!strcmp(argv[0], "stat")) { 1997 setlocale(LC_ALL, ""); 1998 1999 if (cpu__setup_cpunode_map()) 2000 goto out_delete; 2001 2002 if (list_empty(&slab_caller_sort)) 2003 setup_slab_sorting(&slab_caller_sort, default_slab_sort); 2004 if (list_empty(&slab_alloc_sort)) 2005 setup_slab_sorting(&slab_alloc_sort, default_slab_sort); 2006 if (list_empty(&page_caller_sort)) 2007 setup_page_sorting(&page_caller_sort, default_page_sort); 2008 if (list_empty(&page_alloc_sort)) 2009 setup_page_sorting(&page_alloc_sort, default_page_sort); 2010 2011 if (kmem_page) { 2012 setup_page_sorting(&page_alloc_sort_input, 2013 "page,order,migtype,gfp"); 2014 setup_page_sorting(&page_caller_sort_input, 2015 "callsite,order,migtype,gfp"); 2016 } 2017 ret = __cmd_kmem(session); 2018 } else 2019 usage_with_options(kmem_usage, kmem_options); 2020 2021 out_delete: 2022 perf_session__delete(session); 2023 2024 return ret; 2025 } 2026 2027