1 #define _GNU_SOURCE 2 #include <ctype.h> 3 #include <errno.h> 4 #include <fcntl.h> 5 #include <limits.h> 6 #include <dirent.h> 7 #include <signal.h> 8 #include <stdio.h> 9 #include <stdlib.h> 10 #include <stdbool.h> 11 #include <string.h> 12 #include <unistd.h> 13 14 #include <sys/mman.h> 15 #include <sys/wait.h> 16 #include <sys/types.h> 17 #include <sys/stat.h> 18 #include <sys/sysmacros.h> 19 #include <sys/vfs.h> 20 21 #include "linux/magic.h" 22 23 #include "vm_util.h" 24 25 #ifndef MADV_PAGEOUT 26 #define MADV_PAGEOUT 21 27 #endif 28 #ifndef MADV_POPULATE_READ 29 #define MADV_POPULATE_READ 22 30 #endif 31 #ifndef MADV_COLLAPSE 32 #define MADV_COLLAPSE 25 33 #endif 34 35 #define BASE_ADDR ((void *)(1UL << 30)) 36 static unsigned long hpage_pmd_size; 37 static unsigned long page_size; 38 static int hpage_pmd_nr; 39 40 #define THP_SYSFS "/sys/kernel/mm/transparent_hugepage/" 41 #define PID_SMAPS "/proc/self/smaps" 42 #define TEST_FILE "collapse_test_file" 43 44 #define MAX_LINE_LENGTH 500 45 46 enum vma_type { 47 VMA_ANON, 48 VMA_FILE, 49 VMA_SHMEM, 50 }; 51 52 struct mem_ops { 53 void *(*setup_area)(int nr_hpages); 54 void (*cleanup_area)(void *p, unsigned long size); 55 void (*fault)(void *p, unsigned long start, unsigned long end); 56 bool (*check_huge)(void *addr, int nr_hpages); 57 const char *name; 58 }; 59 60 static struct mem_ops *file_ops; 61 static struct mem_ops *anon_ops; 62 static struct mem_ops *shmem_ops; 63 64 struct collapse_context { 65 void (*collapse)(const char *msg, char *p, int nr_hpages, 66 struct mem_ops *ops, bool expect); 67 bool enforce_pte_scan_limits; 68 const char *name; 69 }; 70 71 static struct collapse_context *khugepaged_context; 72 static struct collapse_context *madvise_context; 73 74 struct file_info { 75 const char *dir; 76 char path[PATH_MAX]; 77 enum vma_type type; 78 int fd; 79 char dev_queue_read_ahead_path[PATH_MAX]; 80 }; 81 82 static struct file_info finfo; 83 84 enum thp_enabled { 85 THP_ALWAYS, 86 THP_MADVISE, 87 THP_NEVER, 88 }; 89 90 static const char *thp_enabled_strings[] = { 91 "always", 92 "madvise", 93 "never", 94 NULL 95 }; 96 97 enum thp_defrag { 98 THP_DEFRAG_ALWAYS, 99 THP_DEFRAG_DEFER, 100 THP_DEFRAG_DEFER_MADVISE, 101 THP_DEFRAG_MADVISE, 102 THP_DEFRAG_NEVER, 103 }; 104 105 static const char *thp_defrag_strings[] = { 106 "always", 107 "defer", 108 "defer+madvise", 109 "madvise", 110 "never", 111 NULL 112 }; 113 114 enum shmem_enabled { 115 SHMEM_ALWAYS, 116 SHMEM_WITHIN_SIZE, 117 SHMEM_ADVISE, 118 SHMEM_NEVER, 119 SHMEM_DENY, 120 SHMEM_FORCE, 121 }; 122 123 static const char *shmem_enabled_strings[] = { 124 "always", 125 "within_size", 126 "advise", 127 "never", 128 "deny", 129 "force", 130 NULL 131 }; 132 133 struct khugepaged_settings { 134 bool defrag; 135 unsigned int alloc_sleep_millisecs; 136 unsigned int scan_sleep_millisecs; 137 unsigned int max_ptes_none; 138 unsigned int max_ptes_swap; 139 unsigned int max_ptes_shared; 140 unsigned long pages_to_scan; 141 }; 142 143 struct settings { 144 enum thp_enabled thp_enabled; 145 enum thp_defrag thp_defrag; 146 enum shmem_enabled shmem_enabled; 147 bool use_zero_page; 148 struct khugepaged_settings khugepaged; 149 unsigned long read_ahead_kb; 150 }; 151 152 static struct settings saved_settings; 153 static bool skip_settings_restore; 154 155 static int exit_status; 156 157 static void success(const char *msg) 158 { 159 printf(" \e[32m%s\e[0m\n", msg); 160 } 161 162 static void fail(const char *msg) 163 { 164 printf(" \e[31m%s\e[0m\n", msg); 165 exit_status++; 166 } 167 168 static void skip(const char *msg) 169 { 170 printf(" \e[33m%s\e[0m\n", msg); 171 } 172 173 static int read_file(const char *path, char *buf, size_t buflen) 174 { 175 int fd; 176 ssize_t numread; 177 178 fd = open(path, O_RDONLY); 179 if (fd == -1) 180 return 0; 181 182 numread = read(fd, buf, buflen - 1); 183 if (numread < 1) { 184 close(fd); 185 return 0; 186 } 187 188 buf[numread] = '\0'; 189 close(fd); 190 191 return (unsigned int) numread; 192 } 193 194 static int write_file(const char *path, const char *buf, size_t buflen) 195 { 196 int fd; 197 ssize_t numwritten; 198 199 fd = open(path, O_WRONLY); 200 if (fd == -1) { 201 printf("open(%s)\n", path); 202 exit(EXIT_FAILURE); 203 return 0; 204 } 205 206 numwritten = write(fd, buf, buflen - 1); 207 close(fd); 208 if (numwritten < 1) { 209 printf("write(%s)\n", buf); 210 exit(EXIT_FAILURE); 211 return 0; 212 } 213 214 return (unsigned int) numwritten; 215 } 216 217 static int read_string(const char *name, const char *strings[]) 218 { 219 char path[PATH_MAX]; 220 char buf[256]; 221 char *c; 222 int ret; 223 224 ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); 225 if (ret >= PATH_MAX) { 226 printf("%s: Pathname is too long\n", __func__); 227 exit(EXIT_FAILURE); 228 } 229 230 if (!read_file(path, buf, sizeof(buf))) { 231 perror(path); 232 exit(EXIT_FAILURE); 233 } 234 235 c = strchr(buf, '['); 236 if (!c) { 237 printf("%s: Parse failure\n", __func__); 238 exit(EXIT_FAILURE); 239 } 240 241 c++; 242 memmove(buf, c, sizeof(buf) - (c - buf)); 243 244 c = strchr(buf, ']'); 245 if (!c) { 246 printf("%s: Parse failure\n", __func__); 247 exit(EXIT_FAILURE); 248 } 249 *c = '\0'; 250 251 ret = 0; 252 while (strings[ret]) { 253 if (!strcmp(strings[ret], buf)) 254 return ret; 255 ret++; 256 } 257 258 printf("Failed to parse %s\n", name); 259 exit(EXIT_FAILURE); 260 } 261 262 static void write_string(const char *name, const char *val) 263 { 264 char path[PATH_MAX]; 265 int ret; 266 267 ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); 268 if (ret >= PATH_MAX) { 269 printf("%s: Pathname is too long\n", __func__); 270 exit(EXIT_FAILURE); 271 } 272 273 if (!write_file(path, val, strlen(val) + 1)) { 274 perror(path); 275 exit(EXIT_FAILURE); 276 } 277 } 278 279 static const unsigned long _read_num(const char *path) 280 { 281 char buf[21]; 282 283 if (read_file(path, buf, sizeof(buf)) < 0) { 284 perror("read_file(read_num)"); 285 exit(EXIT_FAILURE); 286 } 287 288 return strtoul(buf, NULL, 10); 289 } 290 291 static const unsigned long read_num(const char *name) 292 { 293 char path[PATH_MAX]; 294 int ret; 295 296 ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); 297 if (ret >= PATH_MAX) { 298 printf("%s: Pathname is too long\n", __func__); 299 exit(EXIT_FAILURE); 300 } 301 return _read_num(path); 302 } 303 304 static void _write_num(const char *path, unsigned long num) 305 { 306 char buf[21]; 307 308 sprintf(buf, "%ld", num); 309 if (!write_file(path, buf, strlen(buf) + 1)) { 310 perror(path); 311 exit(EXIT_FAILURE); 312 } 313 } 314 315 static void write_num(const char *name, unsigned long num) 316 { 317 char path[PATH_MAX]; 318 int ret; 319 320 ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); 321 if (ret >= PATH_MAX) { 322 printf("%s: Pathname is too long\n", __func__); 323 exit(EXIT_FAILURE); 324 } 325 _write_num(path, num); 326 } 327 328 static void write_settings(struct settings *settings) 329 { 330 struct khugepaged_settings *khugepaged = &settings->khugepaged; 331 332 write_string("enabled", thp_enabled_strings[settings->thp_enabled]); 333 write_string("defrag", thp_defrag_strings[settings->thp_defrag]); 334 write_string("shmem_enabled", 335 shmem_enabled_strings[settings->shmem_enabled]); 336 write_num("use_zero_page", settings->use_zero_page); 337 338 write_num("khugepaged/defrag", khugepaged->defrag); 339 write_num("khugepaged/alloc_sleep_millisecs", 340 khugepaged->alloc_sleep_millisecs); 341 write_num("khugepaged/scan_sleep_millisecs", 342 khugepaged->scan_sleep_millisecs); 343 write_num("khugepaged/max_ptes_none", khugepaged->max_ptes_none); 344 write_num("khugepaged/max_ptes_swap", khugepaged->max_ptes_swap); 345 write_num("khugepaged/max_ptes_shared", khugepaged->max_ptes_shared); 346 write_num("khugepaged/pages_to_scan", khugepaged->pages_to_scan); 347 348 if (file_ops && finfo.type == VMA_FILE) 349 _write_num(finfo.dev_queue_read_ahead_path, 350 settings->read_ahead_kb); 351 } 352 353 #define MAX_SETTINGS_DEPTH 4 354 static struct settings settings_stack[MAX_SETTINGS_DEPTH]; 355 static int settings_index; 356 357 static struct settings *current_settings(void) 358 { 359 if (!settings_index) { 360 printf("Fail: No settings set"); 361 exit(EXIT_FAILURE); 362 } 363 return settings_stack + settings_index - 1; 364 } 365 366 static void push_settings(struct settings *settings) 367 { 368 if (settings_index >= MAX_SETTINGS_DEPTH) { 369 printf("Fail: Settings stack exceeded"); 370 exit(EXIT_FAILURE); 371 } 372 settings_stack[settings_index++] = *settings; 373 write_settings(current_settings()); 374 } 375 376 static void pop_settings(void) 377 { 378 if (settings_index <= 0) { 379 printf("Fail: Settings stack empty"); 380 exit(EXIT_FAILURE); 381 } 382 --settings_index; 383 write_settings(current_settings()); 384 } 385 386 static void restore_settings(int sig) 387 { 388 if (skip_settings_restore) 389 goto out; 390 391 printf("Restore THP and khugepaged settings..."); 392 write_settings(&saved_settings); 393 success("OK"); 394 if (sig) 395 exit(EXIT_FAILURE); 396 out: 397 exit(exit_status); 398 } 399 400 static void save_settings(void) 401 { 402 printf("Save THP and khugepaged settings..."); 403 saved_settings = (struct settings) { 404 .thp_enabled = read_string("enabled", thp_enabled_strings), 405 .thp_defrag = read_string("defrag", thp_defrag_strings), 406 .shmem_enabled = 407 read_string("shmem_enabled", shmem_enabled_strings), 408 .use_zero_page = read_num("use_zero_page"), 409 }; 410 saved_settings.khugepaged = (struct khugepaged_settings) { 411 .defrag = read_num("khugepaged/defrag"), 412 .alloc_sleep_millisecs = 413 read_num("khugepaged/alloc_sleep_millisecs"), 414 .scan_sleep_millisecs = 415 read_num("khugepaged/scan_sleep_millisecs"), 416 .max_ptes_none = read_num("khugepaged/max_ptes_none"), 417 .max_ptes_swap = read_num("khugepaged/max_ptes_swap"), 418 .max_ptes_shared = read_num("khugepaged/max_ptes_shared"), 419 .pages_to_scan = read_num("khugepaged/pages_to_scan"), 420 }; 421 if (file_ops && finfo.type == VMA_FILE) 422 saved_settings.read_ahead_kb = 423 _read_num(finfo.dev_queue_read_ahead_path); 424 425 success("OK"); 426 427 signal(SIGTERM, restore_settings); 428 signal(SIGINT, restore_settings); 429 signal(SIGHUP, restore_settings); 430 signal(SIGQUIT, restore_settings); 431 } 432 433 static void get_finfo(const char *dir) 434 { 435 struct stat path_stat; 436 struct statfs fs; 437 char buf[1 << 10]; 438 char path[PATH_MAX]; 439 char *str, *end; 440 441 finfo.dir = dir; 442 stat(finfo.dir, &path_stat); 443 if (!S_ISDIR(path_stat.st_mode)) { 444 printf("%s: Not a directory (%s)\n", __func__, finfo.dir); 445 exit(EXIT_FAILURE); 446 } 447 if (snprintf(finfo.path, sizeof(finfo.path), "%s/" TEST_FILE, 448 finfo.dir) >= sizeof(finfo.path)) { 449 printf("%s: Pathname is too long\n", __func__); 450 exit(EXIT_FAILURE); 451 } 452 if (statfs(finfo.dir, &fs)) { 453 perror("statfs()"); 454 exit(EXIT_FAILURE); 455 } 456 finfo.type = fs.f_type == TMPFS_MAGIC ? VMA_SHMEM : VMA_FILE; 457 if (finfo.type == VMA_SHMEM) 458 return; 459 460 /* Find owning device's queue/read_ahead_kb control */ 461 if (snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/uevent", 462 major(path_stat.st_dev), minor(path_stat.st_dev)) 463 >= sizeof(path)) { 464 printf("%s: Pathname is too long\n", __func__); 465 exit(EXIT_FAILURE); 466 } 467 if (read_file(path, buf, sizeof(buf)) < 0) { 468 perror("read_file(read_num)"); 469 exit(EXIT_FAILURE); 470 } 471 if (strstr(buf, "DEVTYPE=disk")) { 472 /* Found it */ 473 if (snprintf(finfo.dev_queue_read_ahead_path, 474 sizeof(finfo.dev_queue_read_ahead_path), 475 "/sys/dev/block/%d:%d/queue/read_ahead_kb", 476 major(path_stat.st_dev), minor(path_stat.st_dev)) 477 >= sizeof(finfo.dev_queue_read_ahead_path)) { 478 printf("%s: Pathname is too long\n", __func__); 479 exit(EXIT_FAILURE); 480 } 481 return; 482 } 483 if (!strstr(buf, "DEVTYPE=partition")) { 484 printf("%s: Unknown device type: %s\n", __func__, path); 485 exit(EXIT_FAILURE); 486 } 487 /* 488 * Partition of block device - need to find actual device. 489 * Using naming convention that devnameN is partition of 490 * device devname. 491 */ 492 str = strstr(buf, "DEVNAME="); 493 if (!str) { 494 printf("%s: Could not read: %s", __func__, path); 495 exit(EXIT_FAILURE); 496 } 497 str += 8; 498 end = str; 499 while (*end) { 500 if (isdigit(*end)) { 501 *end = '\0'; 502 if (snprintf(finfo.dev_queue_read_ahead_path, 503 sizeof(finfo.dev_queue_read_ahead_path), 504 "/sys/block/%s/queue/read_ahead_kb", 505 str) >= sizeof(finfo.dev_queue_read_ahead_path)) { 506 printf("%s: Pathname is too long\n", __func__); 507 exit(EXIT_FAILURE); 508 } 509 return; 510 } 511 ++end; 512 } 513 printf("%s: Could not read: %s\n", __func__, path); 514 exit(EXIT_FAILURE); 515 } 516 517 static bool check_swap(void *addr, unsigned long size) 518 { 519 bool swap = false; 520 int ret; 521 FILE *fp; 522 char buffer[MAX_LINE_LENGTH]; 523 char addr_pattern[MAX_LINE_LENGTH]; 524 525 ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-", 526 (unsigned long) addr); 527 if (ret >= MAX_LINE_LENGTH) { 528 printf("%s: Pattern is too long\n", __func__); 529 exit(EXIT_FAILURE); 530 } 531 532 533 fp = fopen(PID_SMAPS, "r"); 534 if (!fp) { 535 printf("%s: Failed to open file %s\n", __func__, PID_SMAPS); 536 exit(EXIT_FAILURE); 537 } 538 if (!check_for_pattern(fp, addr_pattern, buffer, sizeof(buffer))) 539 goto err_out; 540 541 ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "Swap:%19ld kB", 542 size >> 10); 543 if (ret >= MAX_LINE_LENGTH) { 544 printf("%s: Pattern is too long\n", __func__); 545 exit(EXIT_FAILURE); 546 } 547 /* 548 * Fetch the Swap: in the same block and check whether it got 549 * the expected number of hugeepages next. 550 */ 551 if (!check_for_pattern(fp, "Swap:", buffer, sizeof(buffer))) 552 goto err_out; 553 554 if (strncmp(buffer, addr_pattern, strlen(addr_pattern))) 555 goto err_out; 556 557 swap = true; 558 err_out: 559 fclose(fp); 560 return swap; 561 } 562 563 static void *alloc_mapping(int nr) 564 { 565 void *p; 566 567 p = mmap(BASE_ADDR, nr * hpage_pmd_size, PROT_READ | PROT_WRITE, 568 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); 569 if (p != BASE_ADDR) { 570 printf("Failed to allocate VMA at %p\n", BASE_ADDR); 571 exit(EXIT_FAILURE); 572 } 573 574 return p; 575 } 576 577 static void fill_memory(int *p, unsigned long start, unsigned long end) 578 { 579 int i; 580 581 for (i = start / page_size; i < end / page_size; i++) 582 p[i * page_size / sizeof(*p)] = i + 0xdead0000; 583 } 584 585 /* 586 * MADV_COLLAPSE is a best-effort request and may fail if an internal 587 * resource is temporarily unavailable, in which case it will set errno to 588 * EAGAIN. In such a case, immediately reattempt the operation one more 589 * time. 590 */ 591 static int madvise_collapse_retry(void *p, unsigned long size) 592 { 593 bool retry = true; 594 int ret; 595 596 retry: 597 ret = madvise(p, size, MADV_COLLAPSE); 598 if (ret && errno == EAGAIN && retry) { 599 retry = false; 600 goto retry; 601 } 602 return ret; 603 } 604 605 /* 606 * Returns pmd-mapped hugepage in VMA marked VM_HUGEPAGE, filled with 607 * validate_memory()'able contents. 608 */ 609 static void *alloc_hpage(struct mem_ops *ops) 610 { 611 void *p = ops->setup_area(1); 612 613 ops->fault(p, 0, hpage_pmd_size); 614 615 /* 616 * VMA should be neither VM_HUGEPAGE nor VM_NOHUGEPAGE. 617 * The latter is ineligible for collapse by MADV_COLLAPSE 618 * while the former might cause MADV_COLLAPSE to race with 619 * khugepaged on low-load system (like a test machine), which 620 * would cause MADV_COLLAPSE to fail with EAGAIN. 621 */ 622 printf("Allocate huge page..."); 623 if (madvise_collapse_retry(p, hpage_pmd_size)) { 624 perror("madvise(MADV_COLLAPSE)"); 625 exit(EXIT_FAILURE); 626 } 627 if (!ops->check_huge(p, 1)) { 628 perror("madvise(MADV_COLLAPSE)"); 629 exit(EXIT_FAILURE); 630 } 631 if (madvise(p, hpage_pmd_size, MADV_HUGEPAGE)) { 632 perror("madvise(MADV_HUGEPAGE)"); 633 exit(EXIT_FAILURE); 634 } 635 success("OK"); 636 return p; 637 } 638 639 static void validate_memory(int *p, unsigned long start, unsigned long end) 640 { 641 int i; 642 643 for (i = start / page_size; i < end / page_size; i++) { 644 if (p[i * page_size / sizeof(*p)] != i + 0xdead0000) { 645 printf("Page %d is corrupted: %#x\n", 646 i, p[i * page_size / sizeof(*p)]); 647 exit(EXIT_FAILURE); 648 } 649 } 650 } 651 652 static void *anon_setup_area(int nr_hpages) 653 { 654 return alloc_mapping(nr_hpages); 655 } 656 657 static void anon_cleanup_area(void *p, unsigned long size) 658 { 659 munmap(p, size); 660 } 661 662 static void anon_fault(void *p, unsigned long start, unsigned long end) 663 { 664 fill_memory(p, start, end); 665 } 666 667 static bool anon_check_huge(void *addr, int nr_hpages) 668 { 669 return check_huge_anon(addr, nr_hpages, hpage_pmd_size); 670 } 671 672 static void *file_setup_area(int nr_hpages) 673 { 674 int fd; 675 void *p; 676 unsigned long size; 677 678 unlink(finfo.path); /* Cleanup from previous failed tests */ 679 printf("Creating %s for collapse%s...", finfo.path, 680 finfo.type == VMA_SHMEM ? " (tmpfs)" : ""); 681 fd = open(finfo.path, O_DSYNC | O_CREAT | O_RDWR | O_TRUNC | O_EXCL, 682 777); 683 if (fd < 0) { 684 perror("open()"); 685 exit(EXIT_FAILURE); 686 } 687 688 size = nr_hpages * hpage_pmd_size; 689 p = alloc_mapping(nr_hpages); 690 fill_memory(p, 0, size); 691 write(fd, p, size); 692 close(fd); 693 munmap(p, size); 694 success("OK"); 695 696 printf("Opening %s read only for collapse...", finfo.path); 697 finfo.fd = open(finfo.path, O_RDONLY, 777); 698 if (finfo.fd < 0) { 699 perror("open()"); 700 exit(EXIT_FAILURE); 701 } 702 p = mmap(BASE_ADDR, size, PROT_READ | PROT_EXEC, 703 MAP_PRIVATE, finfo.fd, 0); 704 if (p == MAP_FAILED || p != BASE_ADDR) { 705 perror("mmap()"); 706 exit(EXIT_FAILURE); 707 } 708 709 /* Drop page cache */ 710 write_file("/proc/sys/vm/drop_caches", "3", 2); 711 success("OK"); 712 return p; 713 } 714 715 static void file_cleanup_area(void *p, unsigned long size) 716 { 717 munmap(p, size); 718 close(finfo.fd); 719 unlink(finfo.path); 720 } 721 722 static void file_fault(void *p, unsigned long start, unsigned long end) 723 { 724 if (madvise(((char *)p) + start, end - start, MADV_POPULATE_READ)) { 725 perror("madvise(MADV_POPULATE_READ"); 726 exit(EXIT_FAILURE); 727 } 728 } 729 730 static bool file_check_huge(void *addr, int nr_hpages) 731 { 732 switch (finfo.type) { 733 case VMA_FILE: 734 return check_huge_file(addr, nr_hpages, hpage_pmd_size); 735 case VMA_SHMEM: 736 return check_huge_shmem(addr, nr_hpages, hpage_pmd_size); 737 default: 738 exit(EXIT_FAILURE); 739 return false; 740 } 741 } 742 743 static void *shmem_setup_area(int nr_hpages) 744 { 745 void *p; 746 unsigned long size = nr_hpages * hpage_pmd_size; 747 748 finfo.fd = memfd_create("khugepaged-selftest-collapse-shmem", 0); 749 if (finfo.fd < 0) { 750 perror("memfd_create()"); 751 exit(EXIT_FAILURE); 752 } 753 if (ftruncate(finfo.fd, size)) { 754 perror("ftruncate()"); 755 exit(EXIT_FAILURE); 756 } 757 p = mmap(BASE_ADDR, size, PROT_READ | PROT_WRITE, MAP_SHARED, finfo.fd, 758 0); 759 if (p != BASE_ADDR) { 760 perror("mmap()"); 761 exit(EXIT_FAILURE); 762 } 763 return p; 764 } 765 766 static void shmem_cleanup_area(void *p, unsigned long size) 767 { 768 munmap(p, size); 769 close(finfo.fd); 770 } 771 772 static bool shmem_check_huge(void *addr, int nr_hpages) 773 { 774 return check_huge_shmem(addr, nr_hpages, hpage_pmd_size); 775 } 776 777 static struct mem_ops __anon_ops = { 778 .setup_area = &anon_setup_area, 779 .cleanup_area = &anon_cleanup_area, 780 .fault = &anon_fault, 781 .check_huge = &anon_check_huge, 782 .name = "anon", 783 }; 784 785 static struct mem_ops __file_ops = { 786 .setup_area = &file_setup_area, 787 .cleanup_area = &file_cleanup_area, 788 .fault = &file_fault, 789 .check_huge = &file_check_huge, 790 .name = "file", 791 }; 792 793 static struct mem_ops __shmem_ops = { 794 .setup_area = &shmem_setup_area, 795 .cleanup_area = &shmem_cleanup_area, 796 .fault = &anon_fault, 797 .check_huge = &shmem_check_huge, 798 .name = "shmem", 799 }; 800 801 static void __madvise_collapse(const char *msg, char *p, int nr_hpages, 802 struct mem_ops *ops, bool expect) 803 { 804 int ret; 805 struct settings settings = *current_settings(); 806 807 printf("%s...", msg); 808 809 /* 810 * Prevent khugepaged interference and tests that MADV_COLLAPSE 811 * ignores /sys/kernel/mm/transparent_hugepage/enabled 812 */ 813 settings.thp_enabled = THP_NEVER; 814 settings.shmem_enabled = SHMEM_NEVER; 815 push_settings(&settings); 816 817 /* Clear VM_NOHUGEPAGE */ 818 madvise(p, nr_hpages * hpage_pmd_size, MADV_HUGEPAGE); 819 ret = madvise_collapse_retry(p, nr_hpages * hpage_pmd_size); 820 if (((bool)ret) == expect) 821 fail("Fail: Bad return value"); 822 else if (!ops->check_huge(p, expect ? nr_hpages : 0)) 823 fail("Fail: check_huge()"); 824 else 825 success("OK"); 826 827 pop_settings(); 828 } 829 830 static void madvise_collapse(const char *msg, char *p, int nr_hpages, 831 struct mem_ops *ops, bool expect) 832 { 833 /* Sanity check */ 834 if (!ops->check_huge(p, 0)) { 835 printf("Unexpected huge page\n"); 836 exit(EXIT_FAILURE); 837 } 838 __madvise_collapse(msg, p, nr_hpages, ops, expect); 839 } 840 841 #define TICK 500000 842 static bool wait_for_scan(const char *msg, char *p, int nr_hpages, 843 struct mem_ops *ops) 844 { 845 int full_scans; 846 int timeout = 6; /* 3 seconds */ 847 848 /* Sanity check */ 849 if (!ops->check_huge(p, 0)) { 850 printf("Unexpected huge page\n"); 851 exit(EXIT_FAILURE); 852 } 853 854 madvise(p, nr_hpages * hpage_pmd_size, MADV_HUGEPAGE); 855 856 /* Wait until the second full_scan completed */ 857 full_scans = read_num("khugepaged/full_scans") + 2; 858 859 printf("%s...", msg); 860 while (timeout--) { 861 if (ops->check_huge(p, nr_hpages)) 862 break; 863 if (read_num("khugepaged/full_scans") >= full_scans) 864 break; 865 printf("."); 866 usleep(TICK); 867 } 868 869 madvise(p, nr_hpages * hpage_pmd_size, MADV_NOHUGEPAGE); 870 871 return timeout == -1; 872 } 873 874 static void khugepaged_collapse(const char *msg, char *p, int nr_hpages, 875 struct mem_ops *ops, bool expect) 876 { 877 if (wait_for_scan(msg, p, nr_hpages, ops)) { 878 if (expect) 879 fail("Timeout"); 880 else 881 success("OK"); 882 return; 883 } 884 885 /* 886 * For file and shmem memory, khugepaged only retracts pte entries after 887 * putting the new hugepage in the page cache. The hugepage must be 888 * subsequently refaulted to install the pmd mapping for the mm. 889 */ 890 if (ops != &__anon_ops) 891 ops->fault(p, 0, nr_hpages * hpage_pmd_size); 892 893 if (ops->check_huge(p, expect ? nr_hpages : 0)) 894 success("OK"); 895 else 896 fail("Fail"); 897 } 898 899 static struct collapse_context __khugepaged_context = { 900 .collapse = &khugepaged_collapse, 901 .enforce_pte_scan_limits = true, 902 .name = "khugepaged", 903 }; 904 905 static struct collapse_context __madvise_context = { 906 .collapse = &madvise_collapse, 907 .enforce_pte_scan_limits = false, 908 .name = "madvise", 909 }; 910 911 static bool is_tmpfs(struct mem_ops *ops) 912 { 913 return ops == &__file_ops && finfo.type == VMA_SHMEM; 914 } 915 916 static void alloc_at_fault(void) 917 { 918 struct settings settings = *current_settings(); 919 char *p; 920 921 settings.thp_enabled = THP_ALWAYS; 922 push_settings(&settings); 923 924 p = alloc_mapping(1); 925 *p = 1; 926 printf("Allocate huge page on fault..."); 927 if (check_huge_anon(p, 1, hpage_pmd_size)) 928 success("OK"); 929 else 930 fail("Fail"); 931 932 pop_settings(); 933 934 madvise(p, page_size, MADV_DONTNEED); 935 printf("Split huge PMD on MADV_DONTNEED..."); 936 if (check_huge_anon(p, 0, hpage_pmd_size)) 937 success("OK"); 938 else 939 fail("Fail"); 940 munmap(p, hpage_pmd_size); 941 } 942 943 static void collapse_full(struct collapse_context *c, struct mem_ops *ops) 944 { 945 void *p; 946 int nr_hpages = 4; 947 unsigned long size = nr_hpages * hpage_pmd_size; 948 949 p = ops->setup_area(nr_hpages); 950 ops->fault(p, 0, size); 951 c->collapse("Collapse multiple fully populated PTE table", p, nr_hpages, 952 ops, true); 953 validate_memory(p, 0, size); 954 ops->cleanup_area(p, size); 955 } 956 957 static void collapse_empty(struct collapse_context *c, struct mem_ops *ops) 958 { 959 void *p; 960 961 p = ops->setup_area(1); 962 c->collapse("Do not collapse empty PTE table", p, 1, ops, false); 963 ops->cleanup_area(p, hpage_pmd_size); 964 } 965 966 static void collapse_single_pte_entry(struct collapse_context *c, struct mem_ops *ops) 967 { 968 void *p; 969 970 p = ops->setup_area(1); 971 ops->fault(p, 0, page_size); 972 c->collapse("Collapse PTE table with single PTE entry present", p, 973 1, ops, true); 974 ops->cleanup_area(p, hpage_pmd_size); 975 } 976 977 static void collapse_max_ptes_none(struct collapse_context *c, struct mem_ops *ops) 978 { 979 int max_ptes_none = hpage_pmd_nr / 2; 980 struct settings settings = *current_settings(); 981 void *p; 982 983 settings.khugepaged.max_ptes_none = max_ptes_none; 984 push_settings(&settings); 985 986 p = ops->setup_area(1); 987 988 if (is_tmpfs(ops)) { 989 /* shmem pages always in the page cache */ 990 printf("tmpfs..."); 991 skip("Skip"); 992 goto skip; 993 } 994 995 ops->fault(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size); 996 c->collapse("Maybe collapse with max_ptes_none exceeded", p, 1, 997 ops, !c->enforce_pte_scan_limits); 998 validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size); 999 1000 if (c->enforce_pte_scan_limits) { 1001 ops->fault(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size); 1002 c->collapse("Collapse with max_ptes_none PTEs empty", p, 1, ops, 1003 true); 1004 validate_memory(p, 0, 1005 (hpage_pmd_nr - max_ptes_none) * page_size); 1006 } 1007 skip: 1008 ops->cleanup_area(p, hpage_pmd_size); 1009 pop_settings(); 1010 } 1011 1012 static void collapse_swapin_single_pte(struct collapse_context *c, struct mem_ops *ops) 1013 { 1014 void *p; 1015 1016 p = ops->setup_area(1); 1017 ops->fault(p, 0, hpage_pmd_size); 1018 1019 printf("Swapout one page..."); 1020 if (madvise(p, page_size, MADV_PAGEOUT)) { 1021 perror("madvise(MADV_PAGEOUT)"); 1022 exit(EXIT_FAILURE); 1023 } 1024 if (check_swap(p, page_size)) { 1025 success("OK"); 1026 } else { 1027 fail("Fail"); 1028 goto out; 1029 } 1030 1031 c->collapse("Collapse with swapping in single PTE entry", p, 1, ops, 1032 true); 1033 validate_memory(p, 0, hpage_pmd_size); 1034 out: 1035 ops->cleanup_area(p, hpage_pmd_size); 1036 } 1037 1038 static void collapse_max_ptes_swap(struct collapse_context *c, struct mem_ops *ops) 1039 { 1040 int max_ptes_swap = read_num("khugepaged/max_ptes_swap"); 1041 void *p; 1042 1043 p = ops->setup_area(1); 1044 ops->fault(p, 0, hpage_pmd_size); 1045 1046 printf("Swapout %d of %d pages...", max_ptes_swap + 1, hpage_pmd_nr); 1047 if (madvise(p, (max_ptes_swap + 1) * page_size, MADV_PAGEOUT)) { 1048 perror("madvise(MADV_PAGEOUT)"); 1049 exit(EXIT_FAILURE); 1050 } 1051 if (check_swap(p, (max_ptes_swap + 1) * page_size)) { 1052 success("OK"); 1053 } else { 1054 fail("Fail"); 1055 goto out; 1056 } 1057 1058 c->collapse("Maybe collapse with max_ptes_swap exceeded", p, 1, ops, 1059 !c->enforce_pte_scan_limits); 1060 validate_memory(p, 0, hpage_pmd_size); 1061 1062 if (c->enforce_pte_scan_limits) { 1063 ops->fault(p, 0, hpage_pmd_size); 1064 printf("Swapout %d of %d pages...", max_ptes_swap, 1065 hpage_pmd_nr); 1066 if (madvise(p, max_ptes_swap * page_size, MADV_PAGEOUT)) { 1067 perror("madvise(MADV_PAGEOUT)"); 1068 exit(EXIT_FAILURE); 1069 } 1070 if (check_swap(p, max_ptes_swap * page_size)) { 1071 success("OK"); 1072 } else { 1073 fail("Fail"); 1074 goto out; 1075 } 1076 1077 c->collapse("Collapse with max_ptes_swap pages swapped out", p, 1078 1, ops, true); 1079 validate_memory(p, 0, hpage_pmd_size); 1080 } 1081 out: 1082 ops->cleanup_area(p, hpage_pmd_size); 1083 } 1084 1085 static void collapse_single_pte_entry_compound(struct collapse_context *c, struct mem_ops *ops) 1086 { 1087 void *p; 1088 1089 p = alloc_hpage(ops); 1090 1091 if (is_tmpfs(ops)) { 1092 /* MADV_DONTNEED won't evict tmpfs pages */ 1093 printf("tmpfs..."); 1094 skip("Skip"); 1095 goto skip; 1096 } 1097 1098 madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE); 1099 printf("Split huge page leaving single PTE mapping compound page..."); 1100 madvise(p + page_size, hpage_pmd_size - page_size, MADV_DONTNEED); 1101 if (ops->check_huge(p, 0)) 1102 success("OK"); 1103 else 1104 fail("Fail"); 1105 1106 c->collapse("Collapse PTE table with single PTE mapping compound page", 1107 p, 1, ops, true); 1108 validate_memory(p, 0, page_size); 1109 skip: 1110 ops->cleanup_area(p, hpage_pmd_size); 1111 } 1112 1113 static void collapse_full_of_compound(struct collapse_context *c, struct mem_ops *ops) 1114 { 1115 void *p; 1116 1117 p = alloc_hpage(ops); 1118 printf("Split huge page leaving single PTE page table full of compound pages..."); 1119 madvise(p, page_size, MADV_NOHUGEPAGE); 1120 madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE); 1121 if (ops->check_huge(p, 0)) 1122 success("OK"); 1123 else 1124 fail("Fail"); 1125 1126 c->collapse("Collapse PTE table full of compound pages", p, 1, ops, 1127 true); 1128 validate_memory(p, 0, hpage_pmd_size); 1129 ops->cleanup_area(p, hpage_pmd_size); 1130 } 1131 1132 static void collapse_compound_extreme(struct collapse_context *c, struct mem_ops *ops) 1133 { 1134 void *p; 1135 int i; 1136 1137 p = ops->setup_area(1); 1138 for (i = 0; i < hpage_pmd_nr; i++) { 1139 printf("\rConstruct PTE page table full of different PTE-mapped compound pages %3d/%d...", 1140 i + 1, hpage_pmd_nr); 1141 1142 madvise(BASE_ADDR, hpage_pmd_size, MADV_HUGEPAGE); 1143 ops->fault(BASE_ADDR, 0, hpage_pmd_size); 1144 if (!ops->check_huge(BASE_ADDR, 1)) { 1145 printf("Failed to allocate huge page\n"); 1146 exit(EXIT_FAILURE); 1147 } 1148 madvise(BASE_ADDR, hpage_pmd_size, MADV_NOHUGEPAGE); 1149 1150 p = mremap(BASE_ADDR - i * page_size, 1151 i * page_size + hpage_pmd_size, 1152 (i + 1) * page_size, 1153 MREMAP_MAYMOVE | MREMAP_FIXED, 1154 BASE_ADDR + 2 * hpage_pmd_size); 1155 if (p == MAP_FAILED) { 1156 perror("mremap+unmap"); 1157 exit(EXIT_FAILURE); 1158 } 1159 1160 p = mremap(BASE_ADDR + 2 * hpage_pmd_size, 1161 (i + 1) * page_size, 1162 (i + 1) * page_size + hpage_pmd_size, 1163 MREMAP_MAYMOVE | MREMAP_FIXED, 1164 BASE_ADDR - (i + 1) * page_size); 1165 if (p == MAP_FAILED) { 1166 perror("mremap+alloc"); 1167 exit(EXIT_FAILURE); 1168 } 1169 } 1170 1171 ops->cleanup_area(BASE_ADDR, hpage_pmd_size); 1172 ops->fault(p, 0, hpage_pmd_size); 1173 if (!ops->check_huge(p, 1)) 1174 success("OK"); 1175 else 1176 fail("Fail"); 1177 1178 c->collapse("Collapse PTE table full of different compound pages", p, 1, 1179 ops, true); 1180 1181 validate_memory(p, 0, hpage_pmd_size); 1182 ops->cleanup_area(p, hpage_pmd_size); 1183 } 1184 1185 static void collapse_fork(struct collapse_context *c, struct mem_ops *ops) 1186 { 1187 int wstatus; 1188 void *p; 1189 1190 p = ops->setup_area(1); 1191 1192 printf("Allocate small page..."); 1193 ops->fault(p, 0, page_size); 1194 if (ops->check_huge(p, 0)) 1195 success("OK"); 1196 else 1197 fail("Fail"); 1198 1199 printf("Share small page over fork()..."); 1200 if (!fork()) { 1201 /* Do not touch settings on child exit */ 1202 skip_settings_restore = true; 1203 exit_status = 0; 1204 1205 if (ops->check_huge(p, 0)) 1206 success("OK"); 1207 else 1208 fail("Fail"); 1209 1210 ops->fault(p, page_size, 2 * page_size); 1211 c->collapse("Collapse PTE table with single page shared with parent process", 1212 p, 1, ops, true); 1213 1214 validate_memory(p, 0, page_size); 1215 ops->cleanup_area(p, hpage_pmd_size); 1216 exit(exit_status); 1217 } 1218 1219 wait(&wstatus); 1220 exit_status += WEXITSTATUS(wstatus); 1221 1222 printf("Check if parent still has small page..."); 1223 if (ops->check_huge(p, 0)) 1224 success("OK"); 1225 else 1226 fail("Fail"); 1227 validate_memory(p, 0, page_size); 1228 ops->cleanup_area(p, hpage_pmd_size); 1229 } 1230 1231 static void collapse_fork_compound(struct collapse_context *c, struct mem_ops *ops) 1232 { 1233 int wstatus; 1234 void *p; 1235 1236 p = alloc_hpage(ops); 1237 printf("Share huge page over fork()..."); 1238 if (!fork()) { 1239 /* Do not touch settings on child exit */ 1240 skip_settings_restore = true; 1241 exit_status = 0; 1242 1243 if (ops->check_huge(p, 1)) 1244 success("OK"); 1245 else 1246 fail("Fail"); 1247 1248 printf("Split huge page PMD in child process..."); 1249 madvise(p, page_size, MADV_NOHUGEPAGE); 1250 madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE); 1251 if (ops->check_huge(p, 0)) 1252 success("OK"); 1253 else 1254 fail("Fail"); 1255 ops->fault(p, 0, page_size); 1256 1257 write_num("khugepaged/max_ptes_shared", hpage_pmd_nr - 1); 1258 c->collapse("Collapse PTE table full of compound pages in child", 1259 p, 1, ops, true); 1260 write_num("khugepaged/max_ptes_shared", 1261 current_settings()->khugepaged.max_ptes_shared); 1262 1263 validate_memory(p, 0, hpage_pmd_size); 1264 ops->cleanup_area(p, hpage_pmd_size); 1265 exit(exit_status); 1266 } 1267 1268 wait(&wstatus); 1269 exit_status += WEXITSTATUS(wstatus); 1270 1271 printf("Check if parent still has huge page..."); 1272 if (ops->check_huge(p, 1)) 1273 success("OK"); 1274 else 1275 fail("Fail"); 1276 validate_memory(p, 0, hpage_pmd_size); 1277 ops->cleanup_area(p, hpage_pmd_size); 1278 } 1279 1280 static void collapse_max_ptes_shared(struct collapse_context *c, struct mem_ops *ops) 1281 { 1282 int max_ptes_shared = read_num("khugepaged/max_ptes_shared"); 1283 int wstatus; 1284 void *p; 1285 1286 p = alloc_hpage(ops); 1287 printf("Share huge page over fork()..."); 1288 if (!fork()) { 1289 /* Do not touch settings on child exit */ 1290 skip_settings_restore = true; 1291 exit_status = 0; 1292 1293 if (ops->check_huge(p, 1)) 1294 success("OK"); 1295 else 1296 fail("Fail"); 1297 1298 printf("Trigger CoW on page %d of %d...", 1299 hpage_pmd_nr - max_ptes_shared - 1, hpage_pmd_nr); 1300 ops->fault(p, 0, (hpage_pmd_nr - max_ptes_shared - 1) * page_size); 1301 if (ops->check_huge(p, 0)) 1302 success("OK"); 1303 else 1304 fail("Fail"); 1305 1306 c->collapse("Maybe collapse with max_ptes_shared exceeded", p, 1307 1, ops, !c->enforce_pte_scan_limits); 1308 1309 if (c->enforce_pte_scan_limits) { 1310 printf("Trigger CoW on page %d of %d...", 1311 hpage_pmd_nr - max_ptes_shared, hpage_pmd_nr); 1312 ops->fault(p, 0, (hpage_pmd_nr - max_ptes_shared) * 1313 page_size); 1314 if (ops->check_huge(p, 0)) 1315 success("OK"); 1316 else 1317 fail("Fail"); 1318 1319 c->collapse("Collapse with max_ptes_shared PTEs shared", 1320 p, 1, ops, true); 1321 } 1322 1323 validate_memory(p, 0, hpage_pmd_size); 1324 ops->cleanup_area(p, hpage_pmd_size); 1325 exit(exit_status); 1326 } 1327 1328 wait(&wstatus); 1329 exit_status += WEXITSTATUS(wstatus); 1330 1331 printf("Check if parent still has huge page..."); 1332 if (ops->check_huge(p, 1)) 1333 success("OK"); 1334 else 1335 fail("Fail"); 1336 validate_memory(p, 0, hpage_pmd_size); 1337 ops->cleanup_area(p, hpage_pmd_size); 1338 } 1339 1340 static void madvise_collapse_existing_thps(struct collapse_context *c, 1341 struct mem_ops *ops) 1342 { 1343 void *p; 1344 1345 p = ops->setup_area(1); 1346 ops->fault(p, 0, hpage_pmd_size); 1347 c->collapse("Collapse fully populated PTE table...", p, 1, ops, true); 1348 validate_memory(p, 0, hpage_pmd_size); 1349 1350 /* c->collapse() will find a hugepage and complain - call directly. */ 1351 __madvise_collapse("Re-collapse PMD-mapped hugepage", p, 1, ops, true); 1352 validate_memory(p, 0, hpage_pmd_size); 1353 ops->cleanup_area(p, hpage_pmd_size); 1354 } 1355 1356 /* 1357 * Test race with khugepaged where page tables have been retracted and 1358 * pmd cleared. 1359 */ 1360 static void madvise_retracted_page_tables(struct collapse_context *c, 1361 struct mem_ops *ops) 1362 { 1363 void *p; 1364 int nr_hpages = 1; 1365 unsigned long size = nr_hpages * hpage_pmd_size; 1366 1367 p = ops->setup_area(nr_hpages); 1368 ops->fault(p, 0, size); 1369 1370 /* Let khugepaged collapse and leave pmd cleared */ 1371 if (wait_for_scan("Collapse and leave PMD cleared", p, nr_hpages, 1372 ops)) { 1373 fail("Timeout"); 1374 return; 1375 } 1376 success("OK"); 1377 c->collapse("Install huge PMD from page cache", p, nr_hpages, ops, 1378 true); 1379 validate_memory(p, 0, size); 1380 ops->cleanup_area(p, size); 1381 } 1382 1383 static void usage(void) 1384 { 1385 fprintf(stderr, "\nUsage: ./khugepaged <test type> [dir]\n\n"); 1386 fprintf(stderr, "\t<test type>\t: <context>:<mem_type>\n"); 1387 fprintf(stderr, "\t<context>\t: [all|khugepaged|madvise]\n"); 1388 fprintf(stderr, "\t<mem_type>\t: [all|anon|file|shmem]\n"); 1389 fprintf(stderr, "\n\t\"file,all\" mem_type requires [dir] argument\n"); 1390 fprintf(stderr, "\n\t\"file,all\" mem_type requires kernel built with\n"); 1391 fprintf(stderr, "\tCONFIG_READ_ONLY_THP_FOR_FS=y\n"); 1392 fprintf(stderr, "\n\tif [dir] is a (sub)directory of a tmpfs mount, tmpfs must be\n"); 1393 fprintf(stderr, "\tmounted with huge=madvise option for khugepaged tests to work\n"); 1394 exit(1); 1395 } 1396 1397 static void parse_test_type(int argc, const char **argv) 1398 { 1399 char *buf; 1400 const char *token; 1401 1402 if (argc == 1) { 1403 /* Backwards compatibility */ 1404 khugepaged_context = &__khugepaged_context; 1405 madvise_context = &__madvise_context; 1406 anon_ops = &__anon_ops; 1407 return; 1408 } 1409 1410 buf = strdup(argv[1]); 1411 token = strsep(&buf, ":"); 1412 1413 if (!strcmp(token, "all")) { 1414 khugepaged_context = &__khugepaged_context; 1415 madvise_context = &__madvise_context; 1416 } else if (!strcmp(token, "khugepaged")) { 1417 khugepaged_context = &__khugepaged_context; 1418 } else if (!strcmp(token, "madvise")) { 1419 madvise_context = &__madvise_context; 1420 } else { 1421 usage(); 1422 } 1423 1424 if (!buf) 1425 usage(); 1426 1427 if (!strcmp(buf, "all")) { 1428 file_ops = &__file_ops; 1429 anon_ops = &__anon_ops; 1430 shmem_ops = &__shmem_ops; 1431 } else if (!strcmp(buf, "anon")) { 1432 anon_ops = &__anon_ops; 1433 } else if (!strcmp(buf, "file")) { 1434 file_ops = &__file_ops; 1435 } else if (!strcmp(buf, "shmem")) { 1436 shmem_ops = &__shmem_ops; 1437 } else { 1438 usage(); 1439 } 1440 1441 if (!file_ops) 1442 return; 1443 1444 if (argc != 3) 1445 usage(); 1446 } 1447 1448 int main(int argc, const char **argv) 1449 { 1450 struct settings default_settings = { 1451 .thp_enabled = THP_MADVISE, 1452 .thp_defrag = THP_DEFRAG_ALWAYS, 1453 .shmem_enabled = SHMEM_ADVISE, 1454 .use_zero_page = 0, 1455 .khugepaged = { 1456 .defrag = 1, 1457 .alloc_sleep_millisecs = 10, 1458 .scan_sleep_millisecs = 10, 1459 }, 1460 /* 1461 * When testing file-backed memory, the collapse path 1462 * looks at how many pages are found in the page cache, not 1463 * what pages are mapped. Disable read ahead optimization so 1464 * pages don't find their way into the page cache unless 1465 * we mem_ops->fault() them in. 1466 */ 1467 .read_ahead_kb = 0, 1468 }; 1469 1470 parse_test_type(argc, argv); 1471 1472 if (file_ops) 1473 get_finfo(argv[2]); 1474 1475 setbuf(stdout, NULL); 1476 1477 page_size = getpagesize(); 1478 hpage_pmd_size = read_pmd_pagesize(); 1479 if (!hpage_pmd_size) { 1480 printf("Reading PMD pagesize failed"); 1481 exit(EXIT_FAILURE); 1482 } 1483 hpage_pmd_nr = hpage_pmd_size / page_size; 1484 1485 default_settings.khugepaged.max_ptes_none = hpage_pmd_nr - 1; 1486 default_settings.khugepaged.max_ptes_swap = hpage_pmd_nr / 8; 1487 default_settings.khugepaged.max_ptes_shared = hpage_pmd_nr / 2; 1488 default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8; 1489 1490 save_settings(); 1491 push_settings(&default_settings); 1492 1493 alloc_at_fault(); 1494 1495 #define TEST(t, c, o) do { \ 1496 if (c && o) { \ 1497 printf("\nRun test: " #t " (%s:%s)\n", c->name, o->name); \ 1498 t(c, o); \ 1499 } \ 1500 } while (0) 1501 1502 TEST(collapse_full, khugepaged_context, anon_ops); 1503 TEST(collapse_full, khugepaged_context, file_ops); 1504 TEST(collapse_full, khugepaged_context, shmem_ops); 1505 TEST(collapse_full, madvise_context, anon_ops); 1506 TEST(collapse_full, madvise_context, file_ops); 1507 TEST(collapse_full, madvise_context, shmem_ops); 1508 1509 TEST(collapse_empty, khugepaged_context, anon_ops); 1510 TEST(collapse_empty, madvise_context, anon_ops); 1511 1512 TEST(collapse_single_pte_entry, khugepaged_context, anon_ops); 1513 TEST(collapse_single_pte_entry, khugepaged_context, file_ops); 1514 TEST(collapse_single_pte_entry, khugepaged_context, shmem_ops); 1515 TEST(collapse_single_pte_entry, madvise_context, anon_ops); 1516 TEST(collapse_single_pte_entry, madvise_context, file_ops); 1517 TEST(collapse_single_pte_entry, madvise_context, shmem_ops); 1518 1519 TEST(collapse_max_ptes_none, khugepaged_context, anon_ops); 1520 TEST(collapse_max_ptes_none, khugepaged_context, file_ops); 1521 TEST(collapse_max_ptes_none, madvise_context, anon_ops); 1522 TEST(collapse_max_ptes_none, madvise_context, file_ops); 1523 1524 TEST(collapse_single_pte_entry_compound, khugepaged_context, anon_ops); 1525 TEST(collapse_single_pte_entry_compound, khugepaged_context, file_ops); 1526 TEST(collapse_single_pte_entry_compound, madvise_context, anon_ops); 1527 TEST(collapse_single_pte_entry_compound, madvise_context, file_ops); 1528 1529 TEST(collapse_full_of_compound, khugepaged_context, anon_ops); 1530 TEST(collapse_full_of_compound, khugepaged_context, file_ops); 1531 TEST(collapse_full_of_compound, khugepaged_context, shmem_ops); 1532 TEST(collapse_full_of_compound, madvise_context, anon_ops); 1533 TEST(collapse_full_of_compound, madvise_context, file_ops); 1534 TEST(collapse_full_of_compound, madvise_context, shmem_ops); 1535 1536 TEST(collapse_compound_extreme, khugepaged_context, anon_ops); 1537 TEST(collapse_compound_extreme, madvise_context, anon_ops); 1538 1539 TEST(collapse_swapin_single_pte, khugepaged_context, anon_ops); 1540 TEST(collapse_swapin_single_pte, madvise_context, anon_ops); 1541 1542 TEST(collapse_max_ptes_swap, khugepaged_context, anon_ops); 1543 TEST(collapse_max_ptes_swap, madvise_context, anon_ops); 1544 1545 TEST(collapse_fork, khugepaged_context, anon_ops); 1546 TEST(collapse_fork, madvise_context, anon_ops); 1547 1548 TEST(collapse_fork_compound, khugepaged_context, anon_ops); 1549 TEST(collapse_fork_compound, madvise_context, anon_ops); 1550 1551 TEST(collapse_max_ptes_shared, khugepaged_context, anon_ops); 1552 TEST(collapse_max_ptes_shared, madvise_context, anon_ops); 1553 1554 TEST(madvise_collapse_existing_thps, madvise_context, anon_ops); 1555 TEST(madvise_collapse_existing_thps, madvise_context, file_ops); 1556 TEST(madvise_collapse_existing_thps, madvise_context, shmem_ops); 1557 1558 TEST(madvise_retracted_page_tables, madvise_context, file_ops); 1559 TEST(madvise_retracted_page_tables, madvise_context, shmem_ops); 1560 1561 restore_settings(0); 1562 } 1563