1 // SPDX-License-Identifier: GPL-2.0 2 #include "builtin.h" 3 #include "perf.h" 4 5 #include "util/dso.h" 6 #include "util/evlist.h" 7 #include "util/evsel.h" 8 #include "util/config.h" 9 #include "util/map.h" 10 #include "util/symbol.h" 11 #include "util/thread.h" 12 #include "util/header.h" 13 #include "util/session.h" 14 #include "util/tool.h" 15 #include "util/callchain.h" 16 #include "util/time-utils.h" 17 #include <linux/err.h> 18 19 #include <subcmd/pager.h> 20 #include <subcmd/parse-options.h> 21 #include "util/trace-event.h" 22 #include "util/data.h" 23 #include "util/cpumap.h" 24 25 #include "util/debug.h" 26 #include "util/string2.h" 27 28 #include <linux/kernel.h> 29 #include <linux/rbtree.h> 30 #include <linux/string.h> 31 #include <linux/zalloc.h> 32 #include <errno.h> 33 #include <inttypes.h> 34 #include <locale.h> 35 #include <regex.h> 36 37 #include <linux/ctype.h> 38 39 static int kmem_slab; 40 static int kmem_page; 41 42 static long kmem_page_size; 43 static enum { 44 KMEM_SLAB, 45 KMEM_PAGE, 46 } kmem_default = KMEM_SLAB; /* for backward compatibility */ 47 48 struct alloc_stat; 49 typedef int (*sort_fn_t)(void *, void *); 50 51 static int alloc_flag; 52 static int caller_flag; 53 54 static int alloc_lines = -1; 55 static int caller_lines = -1; 56 57 static bool raw_ip; 58 59 struct alloc_stat { 60 u64 call_site; 61 u64 ptr; 62 u64 bytes_req; 63 u64 bytes_alloc; 64 u64 last_alloc; 65 u32 hit; 66 u32 pingpong; 67 68 short alloc_cpu; 69 70 struct rb_node node; 71 }; 72 73 static struct rb_root root_alloc_stat; 74 static struct rb_root root_alloc_sorted; 75 static struct rb_root root_caller_stat; 76 static struct rb_root root_caller_sorted; 77 78 static unsigned long total_requested, total_allocated, total_freed; 79 static unsigned long nr_allocs, nr_cross_allocs; 80 81 /* filters for controlling start and stop of time of analysis */ 82 static struct perf_time_interval ptime; 83 const char *time_str; 84 85 static int insert_alloc_stat(unsigned long call_site, unsigned long ptr, 86 int bytes_req, int bytes_alloc, int cpu) 87 { 88 struct rb_node **node = &root_alloc_stat.rb_node; 89 struct rb_node *parent = NULL; 90 struct alloc_stat *data = NULL; 91 92 while (*node) { 93 parent = *node; 94 data = rb_entry(*node, struct alloc_stat, node); 95 96 if (ptr > data->ptr) 97 node = &(*node)->rb_right; 98 else if (ptr < data->ptr) 99 node = &(*node)->rb_left; 100 else 101 break; 102 } 103 104 if (data && data->ptr == ptr) { 105 data->hit++; 106 data->bytes_req += bytes_req; 107 data->bytes_alloc += bytes_alloc; 108 } else { 109 data = malloc(sizeof(*data)); 110 if (!data) { 111 pr_err("%s: malloc failed\n", __func__); 112 return -1; 113 } 114 data->ptr = ptr; 115 data->pingpong = 0; 116 data->hit = 1; 117 data->bytes_req = bytes_req; 118 data->bytes_alloc = bytes_alloc; 119 120 rb_link_node(&data->node, parent, node); 121 rb_insert_color(&data->node, &root_alloc_stat); 122 } 123 data->call_site = call_site; 124 data->alloc_cpu = cpu; 125 data->last_alloc = bytes_alloc; 126 127 return 0; 128 } 129 130 static int insert_caller_stat(unsigned long call_site, 131 int bytes_req, int bytes_alloc) 132 { 133 struct rb_node **node = &root_caller_stat.rb_node; 134 struct rb_node *parent = NULL; 135 struct alloc_stat *data = NULL; 136 137 while (*node) { 138 parent = *node; 139 data = rb_entry(*node, struct alloc_stat, node); 140 141 if (call_site > data->call_site) 142 node = &(*node)->rb_right; 143 else if (call_site < data->call_site) 144 node = &(*node)->rb_left; 145 else 146 break; 147 } 148 149 if (data && data->call_site == call_site) { 150 data->hit++; 151 data->bytes_req += bytes_req; 152 data->bytes_alloc += bytes_alloc; 153 } else { 154 data = malloc(sizeof(*data)); 155 if (!data) { 156 pr_err("%s: malloc failed\n", __func__); 157 return -1; 158 } 159 data->call_site = call_site; 160 data->pingpong = 0; 161 data->hit = 1; 162 data->bytes_req = bytes_req; 163 data->bytes_alloc = bytes_alloc; 164 165 rb_link_node(&data->node, parent, node); 166 rb_insert_color(&data->node, &root_caller_stat); 167 } 168 169 return 0; 170 } 171 172 static int perf_evsel__process_alloc_event(struct evsel *evsel, 173 struct perf_sample *sample) 174 { 175 unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr"), 176 call_site = perf_evsel__intval(evsel, sample, "call_site"); 177 int bytes_req = perf_evsel__intval(evsel, sample, "bytes_req"), 178 bytes_alloc = perf_evsel__intval(evsel, sample, "bytes_alloc"); 179 180 if (insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, sample->cpu) || 181 insert_caller_stat(call_site, bytes_req, bytes_alloc)) 182 return -1; 183 184 total_requested += bytes_req; 185 total_allocated += bytes_alloc; 186 187 nr_allocs++; 188 return 0; 189 } 190 191 static int perf_evsel__process_alloc_node_event(struct evsel *evsel, 192 struct perf_sample *sample) 193 { 194 int ret = perf_evsel__process_alloc_event(evsel, sample); 195 196 if (!ret) { 197 int node1 = cpu__get_node(sample->cpu), 198 node2 = perf_evsel__intval(evsel, sample, "node"); 199 200 if (node1 != node2) 201 nr_cross_allocs++; 202 } 203 204 return ret; 205 } 206 207 static int ptr_cmp(void *, void *); 208 static int slab_callsite_cmp(void *, void *); 209 210 static struct alloc_stat *search_alloc_stat(unsigned long ptr, 211 unsigned long call_site, 212 struct rb_root *root, 213 sort_fn_t sort_fn) 214 { 215 struct rb_node *node = root->rb_node; 216 struct alloc_stat key = { .ptr = ptr, .call_site = call_site }; 217 218 while (node) { 219 struct alloc_stat *data; 220 int cmp; 221 222 data = rb_entry(node, struct alloc_stat, node); 223 224 cmp = sort_fn(&key, data); 225 if (cmp < 0) 226 node = node->rb_left; 227 else if (cmp > 0) 228 node = node->rb_right; 229 else 230 return data; 231 } 232 return NULL; 233 } 234 235 static int perf_evsel__process_free_event(struct evsel *evsel, 236 struct perf_sample *sample) 237 { 238 unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr"); 239 struct alloc_stat *s_alloc, *s_caller; 240 241 s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp); 242 if (!s_alloc) 243 return 0; 244 245 total_freed += s_alloc->last_alloc; 246 247 if ((short)sample->cpu != s_alloc->alloc_cpu) { 248 s_alloc->pingpong++; 249 250 s_caller = search_alloc_stat(0, s_alloc->call_site, 251 &root_caller_stat, 252 slab_callsite_cmp); 253 if (!s_caller) 254 return -1; 255 s_caller->pingpong++; 256 } 257 s_alloc->alloc_cpu = -1; 258 259 return 0; 260 } 261 262 static u64 total_page_alloc_bytes; 263 static u64 total_page_free_bytes; 264 static u64 total_page_nomatch_bytes; 265 static u64 total_page_fail_bytes; 266 static unsigned long nr_page_allocs; 267 static unsigned long nr_page_frees; 268 static unsigned long nr_page_fails; 269 static unsigned long nr_page_nomatch; 270 271 static bool use_pfn; 272 static bool live_page; 273 static struct perf_session *kmem_session; 274 275 #define MAX_MIGRATE_TYPES 6 276 #define MAX_PAGE_ORDER 11 277 278 static int order_stats[MAX_PAGE_ORDER][MAX_MIGRATE_TYPES]; 279 280 struct page_stat { 281 struct rb_node node; 282 u64 page; 283 u64 callsite; 284 int order; 285 unsigned gfp_flags; 286 unsigned migrate_type; 287 u64 alloc_bytes; 288 u64 free_bytes; 289 int nr_alloc; 290 int nr_free; 291 }; 292 293 static struct rb_root page_live_tree; 294 static struct rb_root page_alloc_tree; 295 static struct rb_root page_alloc_sorted; 296 static struct rb_root page_caller_tree; 297 static struct rb_root page_caller_sorted; 298 299 struct alloc_func { 300 u64 start; 301 u64 end; 302 char *name; 303 }; 304 305 static int nr_alloc_funcs; 306 static struct alloc_func *alloc_func_list; 307 308 static int funcmp(const void *a, const void *b) 309 { 310 const struct alloc_func *fa = a; 311 const struct alloc_func *fb = b; 312 313 if (fa->start > fb->start) 314 return 1; 315 else 316 return -1; 317 } 318 319 static int callcmp(const void *a, const void *b) 320 { 321 const struct alloc_func *fa = a; 322 const struct alloc_func *fb = b; 323 324 if (fb->start <= fa->start && fa->end < fb->end) 325 return 0; 326 327 if (fa->start > fb->start) 328 return 1; 329 else 330 return -1; 331 } 332 333 static int build_alloc_func_list(void) 334 { 335 int ret; 336 struct map *kernel_map; 337 struct symbol *sym; 338 struct rb_node *node; 339 struct alloc_func *func; 340 struct machine *machine = &kmem_session->machines.host; 341 regex_t alloc_func_regex; 342 static const char pattern[] = "^_?_?(alloc|get_free|get_zeroed)_pages?"; 343 344 ret = regcomp(&alloc_func_regex, pattern, REG_EXTENDED); 345 if (ret) { 346 char err[BUFSIZ]; 347 348 regerror(ret, &alloc_func_regex, err, sizeof(err)); 349 pr_err("Invalid regex: %s\n%s", pattern, err); 350 return -EINVAL; 351 } 352 353 kernel_map = machine__kernel_map(machine); 354 if (map__load(kernel_map) < 0) { 355 pr_err("cannot load kernel map\n"); 356 return -ENOENT; 357 } 358 359 map__for_each_symbol(kernel_map, sym, node) { 360 if (regexec(&alloc_func_regex, sym->name, 0, NULL, 0)) 361 continue; 362 363 func = realloc(alloc_func_list, 364 (nr_alloc_funcs + 1) * sizeof(*func)); 365 if (func == NULL) 366 return -ENOMEM; 367 368 pr_debug("alloc func: %s\n", sym->name); 369 func[nr_alloc_funcs].start = sym->start; 370 func[nr_alloc_funcs].end = sym->end; 371 func[nr_alloc_funcs].name = sym->name; 372 373 alloc_func_list = func; 374 nr_alloc_funcs++; 375 } 376 377 qsort(alloc_func_list, nr_alloc_funcs, sizeof(*func), funcmp); 378 379 regfree(&alloc_func_regex); 380 return 0; 381 } 382 383 /* 384 * Find first non-memory allocation function from callchain. 385 * The allocation functions are in the 'alloc_func_list'. 386 */ 387 static u64 find_callsite(struct evsel *evsel, struct perf_sample *sample) 388 { 389 struct addr_location al; 390 struct machine *machine = &kmem_session->machines.host; 391 struct callchain_cursor_node *node; 392 393 if (alloc_func_list == NULL) { 394 if (build_alloc_func_list() < 0) 395 goto out; 396 } 397 398 al.thread = machine__findnew_thread(machine, sample->pid, sample->tid); 399 sample__resolve_callchain(sample, &callchain_cursor, NULL, evsel, &al, 16); 400 401 callchain_cursor_commit(&callchain_cursor); 402 while (true) { 403 struct alloc_func key, *caller; 404 u64 addr; 405 406 node = callchain_cursor_current(&callchain_cursor); 407 if (node == NULL) 408 break; 409 410 key.start = key.end = node->ip; 411 caller = bsearch(&key, alloc_func_list, nr_alloc_funcs, 412 sizeof(key), callcmp); 413 if (!caller) { 414 /* found */ 415 if (node->map) 416 addr = map__unmap_ip(node->map, node->ip); 417 else 418 addr = node->ip; 419 420 return addr; 421 } else 422 pr_debug3("skipping alloc function: %s\n", caller->name); 423 424 callchain_cursor_advance(&callchain_cursor); 425 } 426 427 out: 428 pr_debug2("unknown callsite: %"PRIx64 "\n", sample->ip); 429 return sample->ip; 430 } 431 432 struct sort_dimension { 433 const char name[20]; 434 sort_fn_t cmp; 435 struct list_head list; 436 }; 437 438 static LIST_HEAD(page_alloc_sort_input); 439 static LIST_HEAD(page_caller_sort_input); 440 441 static struct page_stat * 442 __page_stat__findnew_page(struct page_stat *pstat, bool create) 443 { 444 struct rb_node **node = &page_live_tree.rb_node; 445 struct rb_node *parent = NULL; 446 struct page_stat *data; 447 448 while (*node) { 449 s64 cmp; 450 451 parent = *node; 452 data = rb_entry(*node, struct page_stat, node); 453 454 cmp = data->page - pstat->page; 455 if (cmp < 0) 456 node = &parent->rb_left; 457 else if (cmp > 0) 458 node = &parent->rb_right; 459 else 460 return data; 461 } 462 463 if (!create) 464 return NULL; 465 466 data = zalloc(sizeof(*data)); 467 if (data != NULL) { 468 data->page = pstat->page; 469 data->order = pstat->order; 470 data->gfp_flags = pstat->gfp_flags; 471 data->migrate_type = pstat->migrate_type; 472 473 rb_link_node(&data->node, parent, node); 474 rb_insert_color(&data->node, &page_live_tree); 475 } 476 477 return data; 478 } 479 480 static struct page_stat *page_stat__find_page(struct page_stat *pstat) 481 { 482 return __page_stat__findnew_page(pstat, false); 483 } 484 485 static struct page_stat *page_stat__findnew_page(struct page_stat *pstat) 486 { 487 return __page_stat__findnew_page(pstat, true); 488 } 489 490 static struct page_stat * 491 __page_stat__findnew_alloc(struct page_stat *pstat, bool create) 492 { 493 struct rb_node **node = &page_alloc_tree.rb_node; 494 struct rb_node *parent = NULL; 495 struct page_stat *data; 496 struct sort_dimension *sort; 497 498 while (*node) { 499 int cmp = 0; 500 501 parent = *node; 502 data = rb_entry(*node, struct page_stat, node); 503 504 list_for_each_entry(sort, &page_alloc_sort_input, list) { 505 cmp = sort->cmp(pstat, data); 506 if (cmp) 507 break; 508 } 509 510 if (cmp < 0) 511 node = &parent->rb_left; 512 else if (cmp > 0) 513 node = &parent->rb_right; 514 else 515 return data; 516 } 517 518 if (!create) 519 return NULL; 520 521 data = zalloc(sizeof(*data)); 522 if (data != NULL) { 523 data->page = pstat->page; 524 data->order = pstat->order; 525 data->gfp_flags = pstat->gfp_flags; 526 data->migrate_type = pstat->migrate_type; 527 528 rb_link_node(&data->node, parent, node); 529 rb_insert_color(&data->node, &page_alloc_tree); 530 } 531 532 return data; 533 } 534 535 static struct page_stat *page_stat__find_alloc(struct page_stat *pstat) 536 { 537 return __page_stat__findnew_alloc(pstat, false); 538 } 539 540 static struct page_stat *page_stat__findnew_alloc(struct page_stat *pstat) 541 { 542 return __page_stat__findnew_alloc(pstat, true); 543 } 544 545 static struct page_stat * 546 __page_stat__findnew_caller(struct page_stat *pstat, bool create) 547 { 548 struct rb_node **node = &page_caller_tree.rb_node; 549 struct rb_node *parent = NULL; 550 struct page_stat *data; 551 struct sort_dimension *sort; 552 553 while (*node) { 554 int cmp = 0; 555 556 parent = *node; 557 data = rb_entry(*node, struct page_stat, node); 558 559 list_for_each_entry(sort, &page_caller_sort_input, list) { 560 cmp = sort->cmp(pstat, data); 561 if (cmp) 562 break; 563 } 564 565 if (cmp < 0) 566 node = &parent->rb_left; 567 else if (cmp > 0) 568 node = &parent->rb_right; 569 else 570 return data; 571 } 572 573 if (!create) 574 return NULL; 575 576 data = zalloc(sizeof(*data)); 577 if (data != NULL) { 578 data->callsite = pstat->callsite; 579 data->order = pstat->order; 580 data->gfp_flags = pstat->gfp_flags; 581 data->migrate_type = pstat->migrate_type; 582 583 rb_link_node(&data->node, parent, node); 584 rb_insert_color(&data->node, &page_caller_tree); 585 } 586 587 return data; 588 } 589 590 static struct page_stat *page_stat__find_caller(struct page_stat *pstat) 591 { 592 return __page_stat__findnew_caller(pstat, false); 593 } 594 595 static struct page_stat *page_stat__findnew_caller(struct page_stat *pstat) 596 { 597 return __page_stat__findnew_caller(pstat, true); 598 } 599 600 static bool valid_page(u64 pfn_or_page) 601 { 602 if (use_pfn && pfn_or_page == -1UL) 603 return false; 604 if (!use_pfn && pfn_or_page == 0) 605 return false; 606 return true; 607 } 608 609 struct gfp_flag { 610 unsigned int flags; 611 char *compact_str; 612 char *human_readable; 613 }; 614 615 static struct gfp_flag *gfps; 616 static int nr_gfps; 617 618 static int gfpcmp(const void *a, const void *b) 619 { 620 const struct gfp_flag *fa = a; 621 const struct gfp_flag *fb = b; 622 623 return fa->flags - fb->flags; 624 } 625 626 /* see include/trace/events/mmflags.h */ 627 static const struct { 628 const char *original; 629 const char *compact; 630 } gfp_compact_table[] = { 631 { "GFP_TRANSHUGE", "THP" }, 632 { "GFP_TRANSHUGE_LIGHT", "THL" }, 633 { "GFP_HIGHUSER_MOVABLE", "HUM" }, 634 { "GFP_HIGHUSER", "HU" }, 635 { "GFP_USER", "U" }, 636 { "GFP_KERNEL_ACCOUNT", "KAC" }, 637 { "GFP_KERNEL", "K" }, 638 { "GFP_NOFS", "NF" }, 639 { "GFP_ATOMIC", "A" }, 640 { "GFP_NOIO", "NI" }, 641 { "GFP_NOWAIT", "NW" }, 642 { "GFP_DMA", "D" }, 643 { "__GFP_HIGHMEM", "HM" }, 644 { "GFP_DMA32", "D32" }, 645 { "__GFP_HIGH", "H" }, 646 { "__GFP_ATOMIC", "_A" }, 647 { "__GFP_IO", "I" }, 648 { "__GFP_FS", "F" }, 649 { "__GFP_NOWARN", "NWR" }, 650 { "__GFP_RETRY_MAYFAIL", "R" }, 651 { "__GFP_NOFAIL", "NF" }, 652 { "__GFP_NORETRY", "NR" }, 653 { "__GFP_COMP", "C" }, 654 { "__GFP_ZERO", "Z" }, 655 { "__GFP_NOMEMALLOC", "NMA" }, 656 { "__GFP_MEMALLOC", "MA" }, 657 { "__GFP_HARDWALL", "HW" }, 658 { "__GFP_THISNODE", "TN" }, 659 { "__GFP_RECLAIMABLE", "RC" }, 660 { "__GFP_MOVABLE", "M" }, 661 { "__GFP_ACCOUNT", "AC" }, 662 { "__GFP_WRITE", "WR" }, 663 { "__GFP_RECLAIM", "R" }, 664 { "__GFP_DIRECT_RECLAIM", "DR" }, 665 { "__GFP_KSWAPD_RECLAIM", "KR" }, 666 }; 667 668 static size_t max_gfp_len; 669 670 static char *compact_gfp_flags(char *gfp_flags) 671 { 672 char *orig_flags = strdup(gfp_flags); 673 char *new_flags = NULL; 674 char *str, *pos = NULL; 675 size_t len = 0; 676 677 if (orig_flags == NULL) 678 return NULL; 679 680 str = strtok_r(orig_flags, "|", &pos); 681 while (str) { 682 size_t i; 683 char *new; 684 const char *cpt; 685 686 for (i = 0; i < ARRAY_SIZE(gfp_compact_table); i++) { 687 if (strcmp(gfp_compact_table[i].original, str)) 688 continue; 689 690 cpt = gfp_compact_table[i].compact; 691 new = realloc(new_flags, len + strlen(cpt) + 2); 692 if (new == NULL) { 693 free(new_flags); 694 return NULL; 695 } 696 697 new_flags = new; 698 699 if (!len) { 700 strcpy(new_flags, cpt); 701 } else { 702 strcat(new_flags, "|"); 703 strcat(new_flags, cpt); 704 len++; 705 } 706 707 len += strlen(cpt); 708 } 709 710 str = strtok_r(NULL, "|", &pos); 711 } 712 713 if (max_gfp_len < len) 714 max_gfp_len = len; 715 716 free(orig_flags); 717 return new_flags; 718 } 719 720 static char *compact_gfp_string(unsigned long gfp_flags) 721 { 722 struct gfp_flag key = { 723 .flags = gfp_flags, 724 }; 725 struct gfp_flag *gfp; 726 727 gfp = bsearch(&key, gfps, nr_gfps, sizeof(*gfps), gfpcmp); 728 if (gfp) 729 return gfp->compact_str; 730 731 return NULL; 732 } 733 734 static int parse_gfp_flags(struct evsel *evsel, struct perf_sample *sample, 735 unsigned int gfp_flags) 736 { 737 struct tep_record record = { 738 .cpu = sample->cpu, 739 .data = sample->raw_data, 740 .size = sample->raw_size, 741 }; 742 struct trace_seq seq; 743 char *str, *pos = NULL; 744 745 if (nr_gfps) { 746 struct gfp_flag key = { 747 .flags = gfp_flags, 748 }; 749 750 if (bsearch(&key, gfps, nr_gfps, sizeof(*gfps), gfpcmp)) 751 return 0; 752 } 753 754 trace_seq_init(&seq); 755 tep_print_event(evsel->tp_format->tep, 756 &seq, &record, "%s", TEP_PRINT_INFO); 757 758 str = strtok_r(seq.buffer, " ", &pos); 759 while (str) { 760 if (!strncmp(str, "gfp_flags=", 10)) { 761 struct gfp_flag *new; 762 763 new = realloc(gfps, (nr_gfps + 1) * sizeof(*gfps)); 764 if (new == NULL) 765 return -ENOMEM; 766 767 gfps = new; 768 new += nr_gfps++; 769 770 new->flags = gfp_flags; 771 new->human_readable = strdup(str + 10); 772 new->compact_str = compact_gfp_flags(str + 10); 773 if (!new->human_readable || !new->compact_str) 774 return -ENOMEM; 775 776 qsort(gfps, nr_gfps, sizeof(*gfps), gfpcmp); 777 } 778 779 str = strtok_r(NULL, " ", &pos); 780 } 781 782 trace_seq_destroy(&seq); 783 return 0; 784 } 785 786 static int perf_evsel__process_page_alloc_event(struct evsel *evsel, 787 struct perf_sample *sample) 788 { 789 u64 page; 790 unsigned int order = perf_evsel__intval(evsel, sample, "order"); 791 unsigned int gfp_flags = perf_evsel__intval(evsel, sample, "gfp_flags"); 792 unsigned int migrate_type = perf_evsel__intval(evsel, sample, 793 "migratetype"); 794 u64 bytes = kmem_page_size << order; 795 u64 callsite; 796 struct page_stat *pstat; 797 struct page_stat this = { 798 .order = order, 799 .gfp_flags = gfp_flags, 800 .migrate_type = migrate_type, 801 }; 802 803 if (use_pfn) 804 page = perf_evsel__intval(evsel, sample, "pfn"); 805 else 806 page = perf_evsel__intval(evsel, sample, "page"); 807 808 nr_page_allocs++; 809 total_page_alloc_bytes += bytes; 810 811 if (!valid_page(page)) { 812 nr_page_fails++; 813 total_page_fail_bytes += bytes; 814 815 return 0; 816 } 817 818 if (parse_gfp_flags(evsel, sample, gfp_flags) < 0) 819 return -1; 820 821 callsite = find_callsite(evsel, sample); 822 823 /* 824 * This is to find the current page (with correct gfp flags and 825 * migrate type) at free event. 826 */ 827 this.page = page; 828 pstat = page_stat__findnew_page(&this); 829 if (pstat == NULL) 830 return -ENOMEM; 831 832 pstat->nr_alloc++; 833 pstat->alloc_bytes += bytes; 834 pstat->callsite = callsite; 835 836 if (!live_page) { 837 pstat = page_stat__findnew_alloc(&this); 838 if (pstat == NULL) 839 return -ENOMEM; 840 841 pstat->nr_alloc++; 842 pstat->alloc_bytes += bytes; 843 pstat->callsite = callsite; 844 } 845 846 this.callsite = callsite; 847 pstat = page_stat__findnew_caller(&this); 848 if (pstat == NULL) 849 return -ENOMEM; 850 851 pstat->nr_alloc++; 852 pstat->alloc_bytes += bytes; 853 854 order_stats[order][migrate_type]++; 855 856 return 0; 857 } 858 859 static int perf_evsel__process_page_free_event(struct evsel *evsel, 860 struct perf_sample *sample) 861 { 862 u64 page; 863 unsigned int order = perf_evsel__intval(evsel, sample, "order"); 864 u64 bytes = kmem_page_size << order; 865 struct page_stat *pstat; 866 struct page_stat this = { 867 .order = order, 868 }; 869 870 if (use_pfn) 871 page = perf_evsel__intval(evsel, sample, "pfn"); 872 else 873 page = perf_evsel__intval(evsel, sample, "page"); 874 875 nr_page_frees++; 876 total_page_free_bytes += bytes; 877 878 this.page = page; 879 pstat = page_stat__find_page(&this); 880 if (pstat == NULL) { 881 pr_debug2("missing free at page %"PRIx64" (order: %d)\n", 882 page, order); 883 884 nr_page_nomatch++; 885 total_page_nomatch_bytes += bytes; 886 887 return 0; 888 } 889 890 this.gfp_flags = pstat->gfp_flags; 891 this.migrate_type = pstat->migrate_type; 892 this.callsite = pstat->callsite; 893 894 rb_erase(&pstat->node, &page_live_tree); 895 free(pstat); 896 897 if (live_page) { 898 order_stats[this.order][this.migrate_type]--; 899 } else { 900 pstat = page_stat__find_alloc(&this); 901 if (pstat == NULL) 902 return -ENOMEM; 903 904 pstat->nr_free++; 905 pstat->free_bytes += bytes; 906 } 907 908 pstat = page_stat__find_caller(&this); 909 if (pstat == NULL) 910 return -ENOENT; 911 912 pstat->nr_free++; 913 pstat->free_bytes += bytes; 914 915 if (live_page) { 916 pstat->nr_alloc--; 917 pstat->alloc_bytes -= bytes; 918 919 if (pstat->nr_alloc == 0) { 920 rb_erase(&pstat->node, &page_caller_tree); 921 free(pstat); 922 } 923 } 924 925 return 0; 926 } 927 928 static bool perf_kmem__skip_sample(struct perf_sample *sample) 929 { 930 /* skip sample based on time? */ 931 if (perf_time__skip_sample(&ptime, sample->time)) 932 return true; 933 934 return false; 935 } 936 937 typedef int (*tracepoint_handler)(struct evsel *evsel, 938 struct perf_sample *sample); 939 940 static int process_sample_event(struct perf_tool *tool __maybe_unused, 941 union perf_event *event, 942 struct perf_sample *sample, 943 struct evsel *evsel, 944 struct machine *machine) 945 { 946 int err = 0; 947 struct thread *thread = machine__findnew_thread(machine, sample->pid, 948 sample->tid); 949 950 if (thread == NULL) { 951 pr_debug("problem processing %d event, skipping it.\n", 952 event->header.type); 953 return -1; 954 } 955 956 if (perf_kmem__skip_sample(sample)) 957 return 0; 958 959 dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid); 960 961 if (evsel->handler != NULL) { 962 tracepoint_handler f = evsel->handler; 963 err = f(evsel, sample); 964 } 965 966 thread__put(thread); 967 968 return err; 969 } 970 971 static struct perf_tool perf_kmem = { 972 .sample = process_sample_event, 973 .comm = perf_event__process_comm, 974 .mmap = perf_event__process_mmap, 975 .mmap2 = perf_event__process_mmap2, 976 .namespaces = perf_event__process_namespaces, 977 .ordered_events = true, 978 }; 979 980 static double fragmentation(unsigned long n_req, unsigned long n_alloc) 981 { 982 if (n_alloc == 0) 983 return 0.0; 984 else 985 return 100.0 - (100.0 * n_req / n_alloc); 986 } 987 988 static void __print_slab_result(struct rb_root *root, 989 struct perf_session *session, 990 int n_lines, int is_caller) 991 { 992 struct rb_node *next; 993 struct machine *machine = &session->machines.host; 994 995 printf("%.105s\n", graph_dotted_line); 996 printf(" %-34s |", is_caller ? "Callsite": "Alloc Ptr"); 997 printf(" Total_alloc/Per | Total_req/Per | Hit | Ping-pong | Frag\n"); 998 printf("%.105s\n", graph_dotted_line); 999 1000 next = rb_first(root); 1001 1002 while (next && n_lines--) { 1003 struct alloc_stat *data = rb_entry(next, struct alloc_stat, 1004 node); 1005 struct symbol *sym = NULL; 1006 struct map *map; 1007 char buf[BUFSIZ]; 1008 u64 addr; 1009 1010 if (is_caller) { 1011 addr = data->call_site; 1012 if (!raw_ip) 1013 sym = machine__find_kernel_symbol(machine, addr, &map); 1014 } else 1015 addr = data->ptr; 1016 1017 if (sym != NULL) 1018 snprintf(buf, sizeof(buf), "%s+%" PRIx64 "", sym->name, 1019 addr - map->unmap_ip(map, sym->start)); 1020 else 1021 snprintf(buf, sizeof(buf), "%#" PRIx64 "", addr); 1022 printf(" %-34s |", buf); 1023 1024 printf(" %9llu/%-5lu | %9llu/%-5lu | %8lu | %9lu | %6.3f%%\n", 1025 (unsigned long long)data->bytes_alloc, 1026 (unsigned long)data->bytes_alloc / data->hit, 1027 (unsigned long long)data->bytes_req, 1028 (unsigned long)data->bytes_req / data->hit, 1029 (unsigned long)data->hit, 1030 (unsigned long)data->pingpong, 1031 fragmentation(data->bytes_req, data->bytes_alloc)); 1032 1033 next = rb_next(next); 1034 } 1035 1036 if (n_lines == -1) 1037 printf(" ... | ... | ... | ... | ... | ... \n"); 1038 1039 printf("%.105s\n", graph_dotted_line); 1040 } 1041 1042 static const char * const migrate_type_str[] = { 1043 "UNMOVABL", 1044 "RECLAIM", 1045 "MOVABLE", 1046 "RESERVED", 1047 "CMA/ISLT", 1048 "UNKNOWN", 1049 }; 1050 1051 static void __print_page_alloc_result(struct perf_session *session, int n_lines) 1052 { 1053 struct rb_node *next = rb_first(&page_alloc_sorted); 1054 struct machine *machine = &session->machines.host; 1055 const char *format; 1056 int gfp_len = max(strlen("GFP flags"), max_gfp_len); 1057 1058 printf("\n%.105s\n", graph_dotted_line); 1059 printf(" %-16s | %5s alloc (KB) | Hits | Order | Mig.type | %-*s | Callsite\n", 1060 use_pfn ? "PFN" : "Page", live_page ? "Live" : "Total", 1061 gfp_len, "GFP flags"); 1062 printf("%.105s\n", graph_dotted_line); 1063 1064 if (use_pfn) 1065 format = " %16llu | %'16llu | %'9d | %5d | %8s | %-*s | %s\n"; 1066 else 1067 format = " %016llx | %'16llu | %'9d | %5d | %8s | %-*s | %s\n"; 1068 1069 while (next && n_lines--) { 1070 struct page_stat *data; 1071 struct symbol *sym; 1072 struct map *map; 1073 char buf[32]; 1074 char *caller = buf; 1075 1076 data = rb_entry(next, struct page_stat, node); 1077 sym = machine__find_kernel_symbol(machine, data->callsite, &map); 1078 if (sym) 1079 caller = sym->name; 1080 else 1081 scnprintf(buf, sizeof(buf), "%"PRIx64, data->callsite); 1082 1083 printf(format, (unsigned long long)data->page, 1084 (unsigned long long)data->alloc_bytes / 1024, 1085 data->nr_alloc, data->order, 1086 migrate_type_str[data->migrate_type], 1087 gfp_len, compact_gfp_string(data->gfp_flags), caller); 1088 1089 next = rb_next(next); 1090 } 1091 1092 if (n_lines == -1) { 1093 printf(" ... | ... | ... | ... | ... | %-*s | ...\n", 1094 gfp_len, "..."); 1095 } 1096 1097 printf("%.105s\n", graph_dotted_line); 1098 } 1099 1100 static void __print_page_caller_result(struct perf_session *session, int n_lines) 1101 { 1102 struct rb_node *next = rb_first(&page_caller_sorted); 1103 struct machine *machine = &session->machines.host; 1104 int gfp_len = max(strlen("GFP flags"), max_gfp_len); 1105 1106 printf("\n%.105s\n", graph_dotted_line); 1107 printf(" %5s alloc (KB) | Hits | Order | Mig.type | %-*s | Callsite\n", 1108 live_page ? "Live" : "Total", gfp_len, "GFP flags"); 1109 printf("%.105s\n", graph_dotted_line); 1110 1111 while (next && n_lines--) { 1112 struct page_stat *data; 1113 struct symbol *sym; 1114 struct map *map; 1115 char buf[32]; 1116 char *caller = buf; 1117 1118 data = rb_entry(next, struct page_stat, node); 1119 sym = machine__find_kernel_symbol(machine, data->callsite, &map); 1120 if (sym) 1121 caller = sym->name; 1122 else 1123 scnprintf(buf, sizeof(buf), "%"PRIx64, data->callsite); 1124 1125 printf(" %'16llu | %'9d | %5d | %8s | %-*s | %s\n", 1126 (unsigned long long)data->alloc_bytes / 1024, 1127 data->nr_alloc, data->order, 1128 migrate_type_str[data->migrate_type], 1129 gfp_len, compact_gfp_string(data->gfp_flags), caller); 1130 1131 next = rb_next(next); 1132 } 1133 1134 if (n_lines == -1) { 1135 printf(" ... | ... | ... | ... | %-*s | ...\n", 1136 gfp_len, "..."); 1137 } 1138 1139 printf("%.105s\n", graph_dotted_line); 1140 } 1141 1142 static void print_gfp_flags(void) 1143 { 1144 int i; 1145 1146 printf("#\n"); 1147 printf("# GFP flags\n"); 1148 printf("# ---------\n"); 1149 for (i = 0; i < nr_gfps; i++) { 1150 printf("# %08x: %*s: %s\n", gfps[i].flags, 1151 (int) max_gfp_len, gfps[i].compact_str, 1152 gfps[i].human_readable); 1153 } 1154 } 1155 1156 static void print_slab_summary(void) 1157 { 1158 printf("\nSUMMARY (SLAB allocator)"); 1159 printf("\n========================\n"); 1160 printf("Total bytes requested: %'lu\n", total_requested); 1161 printf("Total bytes allocated: %'lu\n", total_allocated); 1162 printf("Total bytes freed: %'lu\n", total_freed); 1163 if (total_allocated > total_freed) { 1164 printf("Net total bytes allocated: %'lu\n", 1165 total_allocated - total_freed); 1166 } 1167 printf("Total bytes wasted on internal fragmentation: %'lu\n", 1168 total_allocated - total_requested); 1169 printf("Internal fragmentation: %f%%\n", 1170 fragmentation(total_requested, total_allocated)); 1171 printf("Cross CPU allocations: %'lu/%'lu\n", nr_cross_allocs, nr_allocs); 1172 } 1173 1174 static void print_page_summary(void) 1175 { 1176 int o, m; 1177 u64 nr_alloc_freed = nr_page_frees - nr_page_nomatch; 1178 u64 total_alloc_freed_bytes = total_page_free_bytes - total_page_nomatch_bytes; 1179 1180 printf("\nSUMMARY (page allocator)"); 1181 printf("\n========================\n"); 1182 printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total allocation requests", 1183 nr_page_allocs, total_page_alloc_bytes / 1024); 1184 printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total free requests", 1185 nr_page_frees, total_page_free_bytes / 1024); 1186 printf("\n"); 1187 1188 printf("%-30s: %'16"PRIu64" [ %'16"PRIu64" KB ]\n", "Total alloc+freed requests", 1189 nr_alloc_freed, (total_alloc_freed_bytes) / 1024); 1190 printf("%-30s: %'16"PRIu64" [ %'16"PRIu64" KB ]\n", "Total alloc-only requests", 1191 nr_page_allocs - nr_alloc_freed, 1192 (total_page_alloc_bytes - total_alloc_freed_bytes) / 1024); 1193 printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total free-only requests", 1194 nr_page_nomatch, total_page_nomatch_bytes / 1024); 1195 printf("\n"); 1196 1197 printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total allocation failures", 1198 nr_page_fails, total_page_fail_bytes / 1024); 1199 printf("\n"); 1200 1201 printf("%5s %12s %12s %12s %12s %12s\n", "Order", "Unmovable", 1202 "Reclaimable", "Movable", "Reserved", "CMA/Isolated"); 1203 printf("%.5s %.12s %.12s %.12s %.12s %.12s\n", graph_dotted_line, 1204 graph_dotted_line, graph_dotted_line, graph_dotted_line, 1205 graph_dotted_line, graph_dotted_line); 1206 1207 for (o = 0; o < MAX_PAGE_ORDER; o++) { 1208 printf("%5d", o); 1209 for (m = 0; m < MAX_MIGRATE_TYPES - 1; m++) { 1210 if (order_stats[o][m]) 1211 printf(" %'12d", order_stats[o][m]); 1212 else 1213 printf(" %12c", '.'); 1214 } 1215 printf("\n"); 1216 } 1217 } 1218 1219 static void print_slab_result(struct perf_session *session) 1220 { 1221 if (caller_flag) 1222 __print_slab_result(&root_caller_sorted, session, caller_lines, 1); 1223 if (alloc_flag) 1224 __print_slab_result(&root_alloc_sorted, session, alloc_lines, 0); 1225 print_slab_summary(); 1226 } 1227 1228 static void print_page_result(struct perf_session *session) 1229 { 1230 if (caller_flag || alloc_flag) 1231 print_gfp_flags(); 1232 if (caller_flag) 1233 __print_page_caller_result(session, caller_lines); 1234 if (alloc_flag) 1235 __print_page_alloc_result(session, alloc_lines); 1236 print_page_summary(); 1237 } 1238 1239 static void print_result(struct perf_session *session) 1240 { 1241 if (kmem_slab) 1242 print_slab_result(session); 1243 if (kmem_page) 1244 print_page_result(session); 1245 } 1246 1247 static LIST_HEAD(slab_caller_sort); 1248 static LIST_HEAD(slab_alloc_sort); 1249 static LIST_HEAD(page_caller_sort); 1250 static LIST_HEAD(page_alloc_sort); 1251 1252 static void sort_slab_insert(struct rb_root *root, struct alloc_stat *data, 1253 struct list_head *sort_list) 1254 { 1255 struct rb_node **new = &(root->rb_node); 1256 struct rb_node *parent = NULL; 1257 struct sort_dimension *sort; 1258 1259 while (*new) { 1260 struct alloc_stat *this; 1261 int cmp = 0; 1262 1263 this = rb_entry(*new, struct alloc_stat, node); 1264 parent = *new; 1265 1266 list_for_each_entry(sort, sort_list, list) { 1267 cmp = sort->cmp(data, this); 1268 if (cmp) 1269 break; 1270 } 1271 1272 if (cmp > 0) 1273 new = &((*new)->rb_left); 1274 else 1275 new = &((*new)->rb_right); 1276 } 1277 1278 rb_link_node(&data->node, parent, new); 1279 rb_insert_color(&data->node, root); 1280 } 1281 1282 static void __sort_slab_result(struct rb_root *root, struct rb_root *root_sorted, 1283 struct list_head *sort_list) 1284 { 1285 struct rb_node *node; 1286 struct alloc_stat *data; 1287 1288 for (;;) { 1289 node = rb_first(root); 1290 if (!node) 1291 break; 1292 1293 rb_erase(node, root); 1294 data = rb_entry(node, struct alloc_stat, node); 1295 sort_slab_insert(root_sorted, data, sort_list); 1296 } 1297 } 1298 1299 static void sort_page_insert(struct rb_root *root, struct page_stat *data, 1300 struct list_head *sort_list) 1301 { 1302 struct rb_node **new = &root->rb_node; 1303 struct rb_node *parent = NULL; 1304 struct sort_dimension *sort; 1305 1306 while (*new) { 1307 struct page_stat *this; 1308 int cmp = 0; 1309 1310 this = rb_entry(*new, struct page_stat, node); 1311 parent = *new; 1312 1313 list_for_each_entry(sort, sort_list, list) { 1314 cmp = sort->cmp(data, this); 1315 if (cmp) 1316 break; 1317 } 1318 1319 if (cmp > 0) 1320 new = &parent->rb_left; 1321 else 1322 new = &parent->rb_right; 1323 } 1324 1325 rb_link_node(&data->node, parent, new); 1326 rb_insert_color(&data->node, root); 1327 } 1328 1329 static void __sort_page_result(struct rb_root *root, struct rb_root *root_sorted, 1330 struct list_head *sort_list) 1331 { 1332 struct rb_node *node; 1333 struct page_stat *data; 1334 1335 for (;;) { 1336 node = rb_first(root); 1337 if (!node) 1338 break; 1339 1340 rb_erase(node, root); 1341 data = rb_entry(node, struct page_stat, node); 1342 sort_page_insert(root_sorted, data, sort_list); 1343 } 1344 } 1345 1346 static void sort_result(void) 1347 { 1348 if (kmem_slab) { 1349 __sort_slab_result(&root_alloc_stat, &root_alloc_sorted, 1350 &slab_alloc_sort); 1351 __sort_slab_result(&root_caller_stat, &root_caller_sorted, 1352 &slab_caller_sort); 1353 } 1354 if (kmem_page) { 1355 if (live_page) 1356 __sort_page_result(&page_live_tree, &page_alloc_sorted, 1357 &page_alloc_sort); 1358 else 1359 __sort_page_result(&page_alloc_tree, &page_alloc_sorted, 1360 &page_alloc_sort); 1361 1362 __sort_page_result(&page_caller_tree, &page_caller_sorted, 1363 &page_caller_sort); 1364 } 1365 } 1366 1367 static int __cmd_kmem(struct perf_session *session) 1368 { 1369 int err = -EINVAL; 1370 struct evsel *evsel; 1371 const struct evsel_str_handler kmem_tracepoints[] = { 1372 /* slab allocator */ 1373 { "kmem:kmalloc", perf_evsel__process_alloc_event, }, 1374 { "kmem:kmem_cache_alloc", perf_evsel__process_alloc_event, }, 1375 { "kmem:kmalloc_node", perf_evsel__process_alloc_node_event, }, 1376 { "kmem:kmem_cache_alloc_node", perf_evsel__process_alloc_node_event, }, 1377 { "kmem:kfree", perf_evsel__process_free_event, }, 1378 { "kmem:kmem_cache_free", perf_evsel__process_free_event, }, 1379 /* page allocator */ 1380 { "kmem:mm_page_alloc", perf_evsel__process_page_alloc_event, }, 1381 { "kmem:mm_page_free", perf_evsel__process_page_free_event, }, 1382 }; 1383 1384 if (!perf_session__has_traces(session, "kmem record")) 1385 goto out; 1386 1387 if (perf_session__set_tracepoints_handlers(session, kmem_tracepoints)) { 1388 pr_err("Initializing perf session tracepoint handlers failed\n"); 1389 goto out; 1390 } 1391 1392 evlist__for_each_entry(session->evlist, evsel) { 1393 if (!strcmp(perf_evsel__name(evsel), "kmem:mm_page_alloc") && 1394 perf_evsel__field(evsel, "pfn")) { 1395 use_pfn = true; 1396 break; 1397 } 1398 } 1399 1400 setup_pager(); 1401 err = perf_session__process_events(session); 1402 if (err != 0) { 1403 pr_err("error during process events: %d\n", err); 1404 goto out; 1405 } 1406 sort_result(); 1407 print_result(session); 1408 out: 1409 return err; 1410 } 1411 1412 /* slab sort keys */ 1413 static int ptr_cmp(void *a, void *b) 1414 { 1415 struct alloc_stat *l = a; 1416 struct alloc_stat *r = b; 1417 1418 if (l->ptr < r->ptr) 1419 return -1; 1420 else if (l->ptr > r->ptr) 1421 return 1; 1422 return 0; 1423 } 1424 1425 static struct sort_dimension ptr_sort_dimension = { 1426 .name = "ptr", 1427 .cmp = ptr_cmp, 1428 }; 1429 1430 static int slab_callsite_cmp(void *a, void *b) 1431 { 1432 struct alloc_stat *l = a; 1433 struct alloc_stat *r = b; 1434 1435 if (l->call_site < r->call_site) 1436 return -1; 1437 else if (l->call_site > r->call_site) 1438 return 1; 1439 return 0; 1440 } 1441 1442 static struct sort_dimension callsite_sort_dimension = { 1443 .name = "callsite", 1444 .cmp = slab_callsite_cmp, 1445 }; 1446 1447 static int hit_cmp(void *a, void *b) 1448 { 1449 struct alloc_stat *l = a; 1450 struct alloc_stat *r = b; 1451 1452 if (l->hit < r->hit) 1453 return -1; 1454 else if (l->hit > r->hit) 1455 return 1; 1456 return 0; 1457 } 1458 1459 static struct sort_dimension hit_sort_dimension = { 1460 .name = "hit", 1461 .cmp = hit_cmp, 1462 }; 1463 1464 static int bytes_cmp(void *a, void *b) 1465 { 1466 struct alloc_stat *l = a; 1467 struct alloc_stat *r = b; 1468 1469 if (l->bytes_alloc < r->bytes_alloc) 1470 return -1; 1471 else if (l->bytes_alloc > r->bytes_alloc) 1472 return 1; 1473 return 0; 1474 } 1475 1476 static struct sort_dimension bytes_sort_dimension = { 1477 .name = "bytes", 1478 .cmp = bytes_cmp, 1479 }; 1480 1481 static int frag_cmp(void *a, void *b) 1482 { 1483 double x, y; 1484 struct alloc_stat *l = a; 1485 struct alloc_stat *r = b; 1486 1487 x = fragmentation(l->bytes_req, l->bytes_alloc); 1488 y = fragmentation(r->bytes_req, r->bytes_alloc); 1489 1490 if (x < y) 1491 return -1; 1492 else if (x > y) 1493 return 1; 1494 return 0; 1495 } 1496 1497 static struct sort_dimension frag_sort_dimension = { 1498 .name = "frag", 1499 .cmp = frag_cmp, 1500 }; 1501 1502 static int pingpong_cmp(void *a, void *b) 1503 { 1504 struct alloc_stat *l = a; 1505 struct alloc_stat *r = b; 1506 1507 if (l->pingpong < r->pingpong) 1508 return -1; 1509 else if (l->pingpong > r->pingpong) 1510 return 1; 1511 return 0; 1512 } 1513 1514 static struct sort_dimension pingpong_sort_dimension = { 1515 .name = "pingpong", 1516 .cmp = pingpong_cmp, 1517 }; 1518 1519 /* page sort keys */ 1520 static int page_cmp(void *a, void *b) 1521 { 1522 struct page_stat *l = a; 1523 struct page_stat *r = b; 1524 1525 if (l->page < r->page) 1526 return -1; 1527 else if (l->page > r->page) 1528 return 1; 1529 return 0; 1530 } 1531 1532 static struct sort_dimension page_sort_dimension = { 1533 .name = "page", 1534 .cmp = page_cmp, 1535 }; 1536 1537 static int page_callsite_cmp(void *a, void *b) 1538 { 1539 struct page_stat *l = a; 1540 struct page_stat *r = b; 1541 1542 if (l->callsite < r->callsite) 1543 return -1; 1544 else if (l->callsite > r->callsite) 1545 return 1; 1546 return 0; 1547 } 1548 1549 static struct sort_dimension page_callsite_sort_dimension = { 1550 .name = "callsite", 1551 .cmp = page_callsite_cmp, 1552 }; 1553 1554 static int page_hit_cmp(void *a, void *b) 1555 { 1556 struct page_stat *l = a; 1557 struct page_stat *r = b; 1558 1559 if (l->nr_alloc < r->nr_alloc) 1560 return -1; 1561 else if (l->nr_alloc > r->nr_alloc) 1562 return 1; 1563 return 0; 1564 } 1565 1566 static struct sort_dimension page_hit_sort_dimension = { 1567 .name = "hit", 1568 .cmp = page_hit_cmp, 1569 }; 1570 1571 static int page_bytes_cmp(void *a, void *b) 1572 { 1573 struct page_stat *l = a; 1574 struct page_stat *r = b; 1575 1576 if (l->alloc_bytes < r->alloc_bytes) 1577 return -1; 1578 else if (l->alloc_bytes > r->alloc_bytes) 1579 return 1; 1580 return 0; 1581 } 1582 1583 static struct sort_dimension page_bytes_sort_dimension = { 1584 .name = "bytes", 1585 .cmp = page_bytes_cmp, 1586 }; 1587 1588 static int page_order_cmp(void *a, void *b) 1589 { 1590 struct page_stat *l = a; 1591 struct page_stat *r = b; 1592 1593 if (l->order < r->order) 1594 return -1; 1595 else if (l->order > r->order) 1596 return 1; 1597 return 0; 1598 } 1599 1600 static struct sort_dimension page_order_sort_dimension = { 1601 .name = "order", 1602 .cmp = page_order_cmp, 1603 }; 1604 1605 static int migrate_type_cmp(void *a, void *b) 1606 { 1607 struct page_stat *l = a; 1608 struct page_stat *r = b; 1609 1610 /* for internal use to find free'd page */ 1611 if (l->migrate_type == -1U) 1612 return 0; 1613 1614 if (l->migrate_type < r->migrate_type) 1615 return -1; 1616 else if (l->migrate_type > r->migrate_type) 1617 return 1; 1618 return 0; 1619 } 1620 1621 static struct sort_dimension migrate_type_sort_dimension = { 1622 .name = "migtype", 1623 .cmp = migrate_type_cmp, 1624 }; 1625 1626 static int gfp_flags_cmp(void *a, void *b) 1627 { 1628 struct page_stat *l = a; 1629 struct page_stat *r = b; 1630 1631 /* for internal use to find free'd page */ 1632 if (l->gfp_flags == -1U) 1633 return 0; 1634 1635 if (l->gfp_flags < r->gfp_flags) 1636 return -1; 1637 else if (l->gfp_flags > r->gfp_flags) 1638 return 1; 1639 return 0; 1640 } 1641 1642 static struct sort_dimension gfp_flags_sort_dimension = { 1643 .name = "gfp", 1644 .cmp = gfp_flags_cmp, 1645 }; 1646 1647 static struct sort_dimension *slab_sorts[] = { 1648 &ptr_sort_dimension, 1649 &callsite_sort_dimension, 1650 &hit_sort_dimension, 1651 &bytes_sort_dimension, 1652 &frag_sort_dimension, 1653 &pingpong_sort_dimension, 1654 }; 1655 1656 static struct sort_dimension *page_sorts[] = { 1657 &page_sort_dimension, 1658 &page_callsite_sort_dimension, 1659 &page_hit_sort_dimension, 1660 &page_bytes_sort_dimension, 1661 &page_order_sort_dimension, 1662 &migrate_type_sort_dimension, 1663 &gfp_flags_sort_dimension, 1664 }; 1665 1666 static int slab_sort_dimension__add(const char *tok, struct list_head *list) 1667 { 1668 struct sort_dimension *sort; 1669 int i; 1670 1671 for (i = 0; i < (int)ARRAY_SIZE(slab_sorts); i++) { 1672 if (!strcmp(slab_sorts[i]->name, tok)) { 1673 sort = memdup(slab_sorts[i], sizeof(*slab_sorts[i])); 1674 if (!sort) { 1675 pr_err("%s: memdup failed\n", __func__); 1676 return -1; 1677 } 1678 list_add_tail(&sort->list, list); 1679 return 0; 1680 } 1681 } 1682 1683 return -1; 1684 } 1685 1686 static int page_sort_dimension__add(const char *tok, struct list_head *list) 1687 { 1688 struct sort_dimension *sort; 1689 int i; 1690 1691 for (i = 0; i < (int)ARRAY_SIZE(page_sorts); i++) { 1692 if (!strcmp(page_sorts[i]->name, tok)) { 1693 sort = memdup(page_sorts[i], sizeof(*page_sorts[i])); 1694 if (!sort) { 1695 pr_err("%s: memdup failed\n", __func__); 1696 return -1; 1697 } 1698 list_add_tail(&sort->list, list); 1699 return 0; 1700 } 1701 } 1702 1703 return -1; 1704 } 1705 1706 static int setup_slab_sorting(struct list_head *sort_list, const char *arg) 1707 { 1708 char *tok; 1709 char *str = strdup(arg); 1710 char *pos = str; 1711 1712 if (!str) { 1713 pr_err("%s: strdup failed\n", __func__); 1714 return -1; 1715 } 1716 1717 while (true) { 1718 tok = strsep(&pos, ","); 1719 if (!tok) 1720 break; 1721 if (slab_sort_dimension__add(tok, sort_list) < 0) { 1722 pr_err("Unknown slab --sort key: '%s'", tok); 1723 free(str); 1724 return -1; 1725 } 1726 } 1727 1728 free(str); 1729 return 0; 1730 } 1731 1732 static int setup_page_sorting(struct list_head *sort_list, const char *arg) 1733 { 1734 char *tok; 1735 char *str = strdup(arg); 1736 char *pos = str; 1737 1738 if (!str) { 1739 pr_err("%s: strdup failed\n", __func__); 1740 return -1; 1741 } 1742 1743 while (true) { 1744 tok = strsep(&pos, ","); 1745 if (!tok) 1746 break; 1747 if (page_sort_dimension__add(tok, sort_list) < 0) { 1748 pr_err("Unknown page --sort key: '%s'", tok); 1749 free(str); 1750 return -1; 1751 } 1752 } 1753 1754 free(str); 1755 return 0; 1756 } 1757 1758 static int parse_sort_opt(const struct option *opt __maybe_unused, 1759 const char *arg, int unset __maybe_unused) 1760 { 1761 if (!arg) 1762 return -1; 1763 1764 if (kmem_page > kmem_slab || 1765 (kmem_page == 0 && kmem_slab == 0 && kmem_default == KMEM_PAGE)) { 1766 if (caller_flag > alloc_flag) 1767 return setup_page_sorting(&page_caller_sort, arg); 1768 else 1769 return setup_page_sorting(&page_alloc_sort, arg); 1770 } else { 1771 if (caller_flag > alloc_flag) 1772 return setup_slab_sorting(&slab_caller_sort, arg); 1773 else 1774 return setup_slab_sorting(&slab_alloc_sort, arg); 1775 } 1776 1777 return 0; 1778 } 1779 1780 static int parse_caller_opt(const struct option *opt __maybe_unused, 1781 const char *arg __maybe_unused, 1782 int unset __maybe_unused) 1783 { 1784 caller_flag = (alloc_flag + 1); 1785 return 0; 1786 } 1787 1788 static int parse_alloc_opt(const struct option *opt __maybe_unused, 1789 const char *arg __maybe_unused, 1790 int unset __maybe_unused) 1791 { 1792 alloc_flag = (caller_flag + 1); 1793 return 0; 1794 } 1795 1796 static int parse_slab_opt(const struct option *opt __maybe_unused, 1797 const char *arg __maybe_unused, 1798 int unset __maybe_unused) 1799 { 1800 kmem_slab = (kmem_page + 1); 1801 return 0; 1802 } 1803 1804 static int parse_page_opt(const struct option *opt __maybe_unused, 1805 const char *arg __maybe_unused, 1806 int unset __maybe_unused) 1807 { 1808 kmem_page = (kmem_slab + 1); 1809 return 0; 1810 } 1811 1812 static int parse_line_opt(const struct option *opt __maybe_unused, 1813 const char *arg, int unset __maybe_unused) 1814 { 1815 int lines; 1816 1817 if (!arg) 1818 return -1; 1819 1820 lines = strtoul(arg, NULL, 10); 1821 1822 if (caller_flag > alloc_flag) 1823 caller_lines = lines; 1824 else 1825 alloc_lines = lines; 1826 1827 return 0; 1828 } 1829 1830 static int __cmd_record(int argc, const char **argv) 1831 { 1832 const char * const record_args[] = { 1833 "record", "-a", "-R", "-c", "1", 1834 }; 1835 const char * const slab_events[] = { 1836 "-e", "kmem:kmalloc", 1837 "-e", "kmem:kmalloc_node", 1838 "-e", "kmem:kfree", 1839 "-e", "kmem:kmem_cache_alloc", 1840 "-e", "kmem:kmem_cache_alloc_node", 1841 "-e", "kmem:kmem_cache_free", 1842 }; 1843 const char * const page_events[] = { 1844 "-e", "kmem:mm_page_alloc", 1845 "-e", "kmem:mm_page_free", 1846 }; 1847 unsigned int rec_argc, i, j; 1848 const char **rec_argv; 1849 1850 rec_argc = ARRAY_SIZE(record_args) + argc - 1; 1851 if (kmem_slab) 1852 rec_argc += ARRAY_SIZE(slab_events); 1853 if (kmem_page) 1854 rec_argc += ARRAY_SIZE(page_events) + 1; /* for -g */ 1855 1856 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 1857 1858 if (rec_argv == NULL) 1859 return -ENOMEM; 1860 1861 for (i = 0; i < ARRAY_SIZE(record_args); i++) 1862 rec_argv[i] = strdup(record_args[i]); 1863 1864 if (kmem_slab) { 1865 for (j = 0; j < ARRAY_SIZE(slab_events); j++, i++) 1866 rec_argv[i] = strdup(slab_events[j]); 1867 } 1868 if (kmem_page) { 1869 rec_argv[i++] = strdup("-g"); 1870 1871 for (j = 0; j < ARRAY_SIZE(page_events); j++, i++) 1872 rec_argv[i] = strdup(page_events[j]); 1873 } 1874 1875 for (j = 1; j < (unsigned int)argc; j++, i++) 1876 rec_argv[i] = argv[j]; 1877 1878 return cmd_record(i, rec_argv); 1879 } 1880 1881 static int kmem_config(const char *var, const char *value, void *cb __maybe_unused) 1882 { 1883 if (!strcmp(var, "kmem.default")) { 1884 if (!strcmp(value, "slab")) 1885 kmem_default = KMEM_SLAB; 1886 else if (!strcmp(value, "page")) 1887 kmem_default = KMEM_PAGE; 1888 else 1889 pr_err("invalid default value ('slab' or 'page' required): %s\n", 1890 value); 1891 return 0; 1892 } 1893 1894 return 0; 1895 } 1896 1897 int cmd_kmem(int argc, const char **argv) 1898 { 1899 const char * const default_slab_sort = "frag,hit,bytes"; 1900 const char * const default_page_sort = "bytes,hit"; 1901 struct perf_data data = { 1902 .mode = PERF_DATA_MODE_READ, 1903 }; 1904 const struct option kmem_options[] = { 1905 OPT_STRING('i', "input", &input_name, "file", "input file name"), 1906 OPT_INCR('v', "verbose", &verbose, 1907 "be more verbose (show symbol address, etc)"), 1908 OPT_CALLBACK_NOOPT(0, "caller", NULL, NULL, 1909 "show per-callsite statistics", parse_caller_opt), 1910 OPT_CALLBACK_NOOPT(0, "alloc", NULL, NULL, 1911 "show per-allocation statistics", parse_alloc_opt), 1912 OPT_CALLBACK('s', "sort", NULL, "key[,key2...]", 1913 "sort by keys: ptr, callsite, bytes, hit, pingpong, frag, " 1914 "page, order, migtype, gfp", parse_sort_opt), 1915 OPT_CALLBACK('l', "line", NULL, "num", "show n lines", parse_line_opt), 1916 OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"), 1917 OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"), 1918 OPT_CALLBACK_NOOPT(0, "slab", NULL, NULL, "Analyze slab allocator", 1919 parse_slab_opt), 1920 OPT_CALLBACK_NOOPT(0, "page", NULL, NULL, "Analyze page allocator", 1921 parse_page_opt), 1922 OPT_BOOLEAN(0, "live", &live_page, "Show live page stat"), 1923 OPT_STRING(0, "time", &time_str, "str", 1924 "Time span of interest (start,stop)"), 1925 OPT_END() 1926 }; 1927 const char *const kmem_subcommands[] = { "record", "stat", NULL }; 1928 const char *kmem_usage[] = { 1929 NULL, 1930 NULL 1931 }; 1932 struct perf_session *session; 1933 static const char errmsg[] = "No %s allocation events found. Have you run 'perf kmem record --%s'?\n"; 1934 int ret = perf_config(kmem_config, NULL); 1935 1936 if (ret) 1937 return ret; 1938 1939 argc = parse_options_subcommand(argc, argv, kmem_options, 1940 kmem_subcommands, kmem_usage, 0); 1941 1942 if (!argc) 1943 usage_with_options(kmem_usage, kmem_options); 1944 1945 if (kmem_slab == 0 && kmem_page == 0) { 1946 if (kmem_default == KMEM_SLAB) 1947 kmem_slab = 1; 1948 else 1949 kmem_page = 1; 1950 } 1951 1952 if (!strncmp(argv[0], "rec", 3)) { 1953 symbol__init(NULL); 1954 return __cmd_record(argc, argv); 1955 } 1956 1957 data.path = input_name; 1958 1959 kmem_session = session = perf_session__new(&data, false, &perf_kmem); 1960 if (IS_ERR(session)) 1961 return PTR_ERR(session); 1962 1963 ret = -1; 1964 1965 if (kmem_slab) { 1966 if (!perf_evlist__find_tracepoint_by_name(session->evlist, 1967 "kmem:kmalloc")) { 1968 pr_err(errmsg, "slab", "slab"); 1969 goto out_delete; 1970 } 1971 } 1972 1973 if (kmem_page) { 1974 struct evsel *evsel; 1975 1976 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 1977 "kmem:mm_page_alloc"); 1978 if (evsel == NULL) { 1979 pr_err(errmsg, "page", "page"); 1980 goto out_delete; 1981 } 1982 1983 kmem_page_size = tep_get_page_size(evsel->tp_format->tep); 1984 symbol_conf.use_callchain = true; 1985 } 1986 1987 symbol__init(&session->header.env); 1988 1989 if (perf_time__parse_str(&ptime, time_str) != 0) { 1990 pr_err("Invalid time string\n"); 1991 ret = -EINVAL; 1992 goto out_delete; 1993 } 1994 1995 if (!strcmp(argv[0], "stat")) { 1996 setlocale(LC_ALL, ""); 1997 1998 if (cpu__setup_cpunode_map()) 1999 goto out_delete; 2000 2001 if (list_empty(&slab_caller_sort)) 2002 setup_slab_sorting(&slab_caller_sort, default_slab_sort); 2003 if (list_empty(&slab_alloc_sort)) 2004 setup_slab_sorting(&slab_alloc_sort, default_slab_sort); 2005 if (list_empty(&page_caller_sort)) 2006 setup_page_sorting(&page_caller_sort, default_page_sort); 2007 if (list_empty(&page_alloc_sort)) 2008 setup_page_sorting(&page_alloc_sort, default_page_sort); 2009 2010 if (kmem_page) { 2011 setup_page_sorting(&page_alloc_sort_input, 2012 "page,order,migtype,gfp"); 2013 setup_page_sorting(&page_caller_sort_input, 2014 "callsite,order,migtype,gfp"); 2015 } 2016 ret = __cmd_kmem(session); 2017 } else 2018 usage_with_options(kmem_usage, kmem_options); 2019 2020 out_delete: 2021 perf_session__delete(session); 2022 2023 return ret; 2024 } 2025 2026