1 /* 2 * linux/mm/compaction.c 3 * 4 * Memory compaction for the reduction of external fragmentation. Note that 5 * this heavily depends upon page migration to do all the real heavy 6 * lifting 7 * 8 * Copyright IBM Corp. 2007-2010 Mel Gorman <mel@csn.ul.ie> 9 */ 10 #include <linux/cpu.h> 11 #include <linux/swap.h> 12 #include <linux/migrate.h> 13 #include <linux/compaction.h> 14 #include <linux/mm_inline.h> 15 #include <linux/sched/signal.h> 16 #include <linux/backing-dev.h> 17 #include <linux/sysctl.h> 18 #include <linux/sysfs.h> 19 #include <linux/page-isolation.h> 20 #include <linux/kasan.h> 21 #include <linux/kthread.h> 22 #include <linux/freezer.h> 23 #include <linux/page_owner.h> 24 #include "internal.h" 25 26 #ifdef CONFIG_COMPACTION 27 static inline void count_compact_event(enum vm_event_item item) 28 { 29 count_vm_event(item); 30 } 31 32 static inline void count_compact_events(enum vm_event_item item, long delta) 33 { 34 count_vm_events(item, delta); 35 } 36 #else 37 #define count_compact_event(item) do { } while (0) 38 #define count_compact_events(item, delta) do { } while (0) 39 #endif 40 41 #if defined CONFIG_COMPACTION || defined CONFIG_CMA 42 43 #define CREATE_TRACE_POINTS 44 #include <trace/events/compaction.h> 45 46 #define block_start_pfn(pfn, order) round_down(pfn, 1UL << (order)) 47 #define block_end_pfn(pfn, order) ALIGN((pfn) + 1, 1UL << (order)) 48 #define pageblock_start_pfn(pfn) block_start_pfn(pfn, pageblock_order) 49 #define pageblock_end_pfn(pfn) block_end_pfn(pfn, pageblock_order) 50 51 static unsigned long release_freepages(struct list_head *freelist) 52 { 53 struct page *page, *next; 54 unsigned long high_pfn = 0; 55 56 list_for_each_entry_safe(page, next, freelist, lru) { 57 unsigned long pfn = page_to_pfn(page); 58 list_del(&page->lru); 59 __free_page(page); 60 if (pfn > high_pfn) 61 high_pfn = pfn; 62 } 63 64 return high_pfn; 65 } 66 67 static void map_pages(struct list_head *list) 68 { 69 unsigned int i, order, nr_pages; 70 struct page *page, *next; 71 LIST_HEAD(tmp_list); 72 73 list_for_each_entry_safe(page, next, list, lru) { 74 list_del(&page->lru); 75 76 order = page_private(page); 77 nr_pages = 1 << order; 78 79 post_alloc_hook(page, order, __GFP_MOVABLE); 80 if (order) 81 split_page(page, order); 82 83 for (i = 0; i < nr_pages; i++) { 84 list_add(&page->lru, &tmp_list); 85 page++; 86 } 87 } 88 89 list_splice(&tmp_list, list); 90 } 91 92 static inline bool migrate_async_suitable(int migratetype) 93 { 94 return is_migrate_cma(migratetype) || migratetype == MIGRATE_MOVABLE; 95 } 96 97 #ifdef CONFIG_COMPACTION 98 99 int PageMovable(struct page *page) 100 { 101 struct address_space *mapping; 102 103 VM_BUG_ON_PAGE(!PageLocked(page), page); 104 if (!__PageMovable(page)) 105 return 0; 106 107 mapping = page_mapping(page); 108 if (mapping && mapping->a_ops && mapping->a_ops->isolate_page) 109 return 1; 110 111 return 0; 112 } 113 EXPORT_SYMBOL(PageMovable); 114 115 void __SetPageMovable(struct page *page, struct address_space *mapping) 116 { 117 VM_BUG_ON_PAGE(!PageLocked(page), page); 118 VM_BUG_ON_PAGE((unsigned long)mapping & PAGE_MAPPING_MOVABLE, page); 119 page->mapping = (void *)((unsigned long)mapping | PAGE_MAPPING_MOVABLE); 120 } 121 EXPORT_SYMBOL(__SetPageMovable); 122 123 void __ClearPageMovable(struct page *page) 124 { 125 VM_BUG_ON_PAGE(!PageLocked(page), page); 126 VM_BUG_ON_PAGE(!PageMovable(page), page); 127 /* 128 * Clear registered address_space val with keeping PAGE_MAPPING_MOVABLE 129 * flag so that VM can catch up released page by driver after isolation. 130 * With it, VM migration doesn't try to put it back. 131 */ 132 page->mapping = (void *)((unsigned long)page->mapping & 133 PAGE_MAPPING_MOVABLE); 134 } 135 EXPORT_SYMBOL(__ClearPageMovable); 136 137 /* Do not skip compaction more than 64 times */ 138 #define COMPACT_MAX_DEFER_SHIFT 6 139 140 /* 141 * Compaction is deferred when compaction fails to result in a page 142 * allocation success. 1 << compact_defer_limit compactions are skipped up 143 * to a limit of 1 << COMPACT_MAX_DEFER_SHIFT 144 */ 145 void defer_compaction(struct zone *zone, int order) 146 { 147 zone->compact_considered = 0; 148 zone->compact_defer_shift++; 149 150 if (order < zone->compact_order_failed) 151 zone->compact_order_failed = order; 152 153 if (zone->compact_defer_shift > COMPACT_MAX_DEFER_SHIFT) 154 zone->compact_defer_shift = COMPACT_MAX_DEFER_SHIFT; 155 156 trace_mm_compaction_defer_compaction(zone, order); 157 } 158 159 /* Returns true if compaction should be skipped this time */ 160 bool compaction_deferred(struct zone *zone, int order) 161 { 162 unsigned long defer_limit = 1UL << zone->compact_defer_shift; 163 164 if (order < zone->compact_order_failed) 165 return false; 166 167 /* Avoid possible overflow */ 168 if (++zone->compact_considered > defer_limit) 169 zone->compact_considered = defer_limit; 170 171 if (zone->compact_considered >= defer_limit) 172 return false; 173 174 trace_mm_compaction_deferred(zone, order); 175 176 return true; 177 } 178 179 /* 180 * Update defer tracking counters after successful compaction of given order, 181 * which means an allocation either succeeded (alloc_success == true) or is 182 * expected to succeed. 183 */ 184 void compaction_defer_reset(struct zone *zone, int order, 185 bool alloc_success) 186 { 187 if (alloc_success) { 188 zone->compact_considered = 0; 189 zone->compact_defer_shift = 0; 190 } 191 if (order >= zone->compact_order_failed) 192 zone->compact_order_failed = order + 1; 193 194 trace_mm_compaction_defer_reset(zone, order); 195 } 196 197 /* Returns true if restarting compaction after many failures */ 198 bool compaction_restarting(struct zone *zone, int order) 199 { 200 if (order < zone->compact_order_failed) 201 return false; 202 203 return zone->compact_defer_shift == COMPACT_MAX_DEFER_SHIFT && 204 zone->compact_considered >= 1UL << zone->compact_defer_shift; 205 } 206 207 /* Returns true if the pageblock should be scanned for pages to isolate. */ 208 static inline bool isolation_suitable(struct compact_control *cc, 209 struct page *page) 210 { 211 if (cc->ignore_skip_hint) 212 return true; 213 214 return !get_pageblock_skip(page); 215 } 216 217 static void reset_cached_positions(struct zone *zone) 218 { 219 zone->compact_cached_migrate_pfn[0] = zone->zone_start_pfn; 220 zone->compact_cached_migrate_pfn[1] = zone->zone_start_pfn; 221 zone->compact_cached_free_pfn = 222 pageblock_start_pfn(zone_end_pfn(zone) - 1); 223 } 224 225 /* 226 * This function is called to clear all cached information on pageblocks that 227 * should be skipped for page isolation when the migrate and free page scanner 228 * meet. 229 */ 230 static void __reset_isolation_suitable(struct zone *zone) 231 { 232 unsigned long start_pfn = zone->zone_start_pfn; 233 unsigned long end_pfn = zone_end_pfn(zone); 234 unsigned long pfn; 235 236 zone->compact_blockskip_flush = false; 237 238 /* Walk the zone and mark every pageblock as suitable for isolation */ 239 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { 240 struct page *page; 241 242 cond_resched(); 243 244 if (!pfn_valid(pfn)) 245 continue; 246 247 page = pfn_to_page(pfn); 248 if (zone != page_zone(page)) 249 continue; 250 251 clear_pageblock_skip(page); 252 } 253 254 reset_cached_positions(zone); 255 } 256 257 void reset_isolation_suitable(pg_data_t *pgdat) 258 { 259 int zoneid; 260 261 for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) { 262 struct zone *zone = &pgdat->node_zones[zoneid]; 263 if (!populated_zone(zone)) 264 continue; 265 266 /* Only flush if a full compaction finished recently */ 267 if (zone->compact_blockskip_flush) 268 __reset_isolation_suitable(zone); 269 } 270 } 271 272 /* 273 * If no pages were isolated then mark this pageblock to be skipped in the 274 * future. The information is later cleared by __reset_isolation_suitable(). 275 */ 276 static void update_pageblock_skip(struct compact_control *cc, 277 struct page *page, unsigned long nr_isolated, 278 bool migrate_scanner) 279 { 280 struct zone *zone = cc->zone; 281 unsigned long pfn; 282 283 if (cc->ignore_skip_hint) 284 return; 285 286 if (!page) 287 return; 288 289 if (nr_isolated) 290 return; 291 292 set_pageblock_skip(page); 293 294 pfn = page_to_pfn(page); 295 296 /* Update where async and sync compaction should restart */ 297 if (migrate_scanner) { 298 if (pfn > zone->compact_cached_migrate_pfn[0]) 299 zone->compact_cached_migrate_pfn[0] = pfn; 300 if (cc->mode != MIGRATE_ASYNC && 301 pfn > zone->compact_cached_migrate_pfn[1]) 302 zone->compact_cached_migrate_pfn[1] = pfn; 303 } else { 304 if (pfn < zone->compact_cached_free_pfn) 305 zone->compact_cached_free_pfn = pfn; 306 } 307 } 308 #else 309 static inline bool isolation_suitable(struct compact_control *cc, 310 struct page *page) 311 { 312 return true; 313 } 314 315 static void update_pageblock_skip(struct compact_control *cc, 316 struct page *page, unsigned long nr_isolated, 317 bool migrate_scanner) 318 { 319 } 320 #endif /* CONFIG_COMPACTION */ 321 322 /* 323 * Compaction requires the taking of some coarse locks that are potentially 324 * very heavily contended. For async compaction, back out if the lock cannot 325 * be taken immediately. For sync compaction, spin on the lock if needed. 326 * 327 * Returns true if the lock is held 328 * Returns false if the lock is not held and compaction should abort 329 */ 330 static bool compact_trylock_irqsave(spinlock_t *lock, unsigned long *flags, 331 struct compact_control *cc) 332 { 333 if (cc->mode == MIGRATE_ASYNC) { 334 if (!spin_trylock_irqsave(lock, *flags)) { 335 cc->contended = true; 336 return false; 337 } 338 } else { 339 spin_lock_irqsave(lock, *flags); 340 } 341 342 return true; 343 } 344 345 /* 346 * Compaction requires the taking of some coarse locks that are potentially 347 * very heavily contended. The lock should be periodically unlocked to avoid 348 * having disabled IRQs for a long time, even when there is nobody waiting on 349 * the lock. It might also be that allowing the IRQs will result in 350 * need_resched() becoming true. If scheduling is needed, async compaction 351 * aborts. Sync compaction schedules. 352 * Either compaction type will also abort if a fatal signal is pending. 353 * In either case if the lock was locked, it is dropped and not regained. 354 * 355 * Returns true if compaction should abort due to fatal signal pending, or 356 * async compaction due to need_resched() 357 * Returns false when compaction can continue (sync compaction might have 358 * scheduled) 359 */ 360 static bool compact_unlock_should_abort(spinlock_t *lock, 361 unsigned long flags, bool *locked, struct compact_control *cc) 362 { 363 if (*locked) { 364 spin_unlock_irqrestore(lock, flags); 365 *locked = false; 366 } 367 368 if (fatal_signal_pending(current)) { 369 cc->contended = true; 370 return true; 371 } 372 373 if (need_resched()) { 374 if (cc->mode == MIGRATE_ASYNC) { 375 cc->contended = true; 376 return true; 377 } 378 cond_resched(); 379 } 380 381 return false; 382 } 383 384 /* 385 * Aside from avoiding lock contention, compaction also periodically checks 386 * need_resched() and either schedules in sync compaction or aborts async 387 * compaction. This is similar to what compact_unlock_should_abort() does, but 388 * is used where no lock is concerned. 389 * 390 * Returns false when no scheduling was needed, or sync compaction scheduled. 391 * Returns true when async compaction should abort. 392 */ 393 static inline bool compact_should_abort(struct compact_control *cc) 394 { 395 /* async compaction aborts if contended */ 396 if (need_resched()) { 397 if (cc->mode == MIGRATE_ASYNC) { 398 cc->contended = true; 399 return true; 400 } 401 402 cond_resched(); 403 } 404 405 return false; 406 } 407 408 /* 409 * Isolate free pages onto a private freelist. If @strict is true, will abort 410 * returning 0 on any invalid PFNs or non-free pages inside of the pageblock 411 * (even though it may still end up isolating some pages). 412 */ 413 static unsigned long isolate_freepages_block(struct compact_control *cc, 414 unsigned long *start_pfn, 415 unsigned long end_pfn, 416 struct list_head *freelist, 417 bool strict) 418 { 419 int nr_scanned = 0, total_isolated = 0; 420 struct page *cursor, *valid_page = NULL; 421 unsigned long flags = 0; 422 bool locked = false; 423 unsigned long blockpfn = *start_pfn; 424 unsigned int order; 425 426 cursor = pfn_to_page(blockpfn); 427 428 /* Isolate free pages. */ 429 for (; blockpfn < end_pfn; blockpfn++, cursor++) { 430 int isolated; 431 struct page *page = cursor; 432 433 /* 434 * Periodically drop the lock (if held) regardless of its 435 * contention, to give chance to IRQs. Abort if fatal signal 436 * pending or async compaction detects need_resched() 437 */ 438 if (!(blockpfn % SWAP_CLUSTER_MAX) 439 && compact_unlock_should_abort(&cc->zone->lock, flags, 440 &locked, cc)) 441 break; 442 443 nr_scanned++; 444 if (!pfn_valid_within(blockpfn)) 445 goto isolate_fail; 446 447 if (!valid_page) 448 valid_page = page; 449 450 /* 451 * For compound pages such as THP and hugetlbfs, we can save 452 * potentially a lot of iterations if we skip them at once. 453 * The check is racy, but we can consider only valid values 454 * and the only danger is skipping too much. 455 */ 456 if (PageCompound(page)) { 457 unsigned int comp_order = compound_order(page); 458 459 if (likely(comp_order < MAX_ORDER)) { 460 blockpfn += (1UL << comp_order) - 1; 461 cursor += (1UL << comp_order) - 1; 462 } 463 464 goto isolate_fail; 465 } 466 467 if (!PageBuddy(page)) 468 goto isolate_fail; 469 470 /* 471 * If we already hold the lock, we can skip some rechecking. 472 * Note that if we hold the lock now, checked_pageblock was 473 * already set in some previous iteration (or strict is true), 474 * so it is correct to skip the suitable migration target 475 * recheck as well. 476 */ 477 if (!locked) { 478 /* 479 * The zone lock must be held to isolate freepages. 480 * Unfortunately this is a very coarse lock and can be 481 * heavily contended if there are parallel allocations 482 * or parallel compactions. For async compaction do not 483 * spin on the lock and we acquire the lock as late as 484 * possible. 485 */ 486 locked = compact_trylock_irqsave(&cc->zone->lock, 487 &flags, cc); 488 if (!locked) 489 break; 490 491 /* Recheck this is a buddy page under lock */ 492 if (!PageBuddy(page)) 493 goto isolate_fail; 494 } 495 496 /* Found a free page, will break it into order-0 pages */ 497 order = page_order(page); 498 isolated = __isolate_free_page(page, order); 499 if (!isolated) 500 break; 501 set_page_private(page, order); 502 503 total_isolated += isolated; 504 cc->nr_freepages += isolated; 505 list_add_tail(&page->lru, freelist); 506 507 if (!strict && cc->nr_migratepages <= cc->nr_freepages) { 508 blockpfn += isolated; 509 break; 510 } 511 /* Advance to the end of split page */ 512 blockpfn += isolated - 1; 513 cursor += isolated - 1; 514 continue; 515 516 isolate_fail: 517 if (strict) 518 break; 519 else 520 continue; 521 522 } 523 524 if (locked) 525 spin_unlock_irqrestore(&cc->zone->lock, flags); 526 527 /* 528 * There is a tiny chance that we have read bogus compound_order(), 529 * so be careful to not go outside of the pageblock. 530 */ 531 if (unlikely(blockpfn > end_pfn)) 532 blockpfn = end_pfn; 533 534 trace_mm_compaction_isolate_freepages(*start_pfn, blockpfn, 535 nr_scanned, total_isolated); 536 537 /* Record how far we have got within the block */ 538 *start_pfn = blockpfn; 539 540 /* 541 * If strict isolation is requested by CMA then check that all the 542 * pages requested were isolated. If there were any failures, 0 is 543 * returned and CMA will fail. 544 */ 545 if (strict && blockpfn < end_pfn) 546 total_isolated = 0; 547 548 /* Update the pageblock-skip if the whole pageblock was scanned */ 549 if (blockpfn == end_pfn) 550 update_pageblock_skip(cc, valid_page, total_isolated, false); 551 552 cc->total_free_scanned += nr_scanned; 553 if (total_isolated) 554 count_compact_events(COMPACTISOLATED, total_isolated); 555 return total_isolated; 556 } 557 558 /** 559 * isolate_freepages_range() - isolate free pages. 560 * @start_pfn: The first PFN to start isolating. 561 * @end_pfn: The one-past-last PFN. 562 * 563 * Non-free pages, invalid PFNs, or zone boundaries within the 564 * [start_pfn, end_pfn) range are considered errors, cause function to 565 * undo its actions and return zero. 566 * 567 * Otherwise, function returns one-past-the-last PFN of isolated page 568 * (which may be greater then end_pfn if end fell in a middle of 569 * a free page). 570 */ 571 unsigned long 572 isolate_freepages_range(struct compact_control *cc, 573 unsigned long start_pfn, unsigned long end_pfn) 574 { 575 unsigned long isolated, pfn, block_start_pfn, block_end_pfn; 576 LIST_HEAD(freelist); 577 578 pfn = start_pfn; 579 block_start_pfn = pageblock_start_pfn(pfn); 580 if (block_start_pfn < cc->zone->zone_start_pfn) 581 block_start_pfn = cc->zone->zone_start_pfn; 582 block_end_pfn = pageblock_end_pfn(pfn); 583 584 for (; pfn < end_pfn; pfn += isolated, 585 block_start_pfn = block_end_pfn, 586 block_end_pfn += pageblock_nr_pages) { 587 /* Protect pfn from changing by isolate_freepages_block */ 588 unsigned long isolate_start_pfn = pfn; 589 590 block_end_pfn = min(block_end_pfn, end_pfn); 591 592 /* 593 * pfn could pass the block_end_pfn if isolated freepage 594 * is more than pageblock order. In this case, we adjust 595 * scanning range to right one. 596 */ 597 if (pfn >= block_end_pfn) { 598 block_start_pfn = pageblock_start_pfn(pfn); 599 block_end_pfn = pageblock_end_pfn(pfn); 600 block_end_pfn = min(block_end_pfn, end_pfn); 601 } 602 603 if (!pageblock_pfn_to_page(block_start_pfn, 604 block_end_pfn, cc->zone)) 605 break; 606 607 isolated = isolate_freepages_block(cc, &isolate_start_pfn, 608 block_end_pfn, &freelist, true); 609 610 /* 611 * In strict mode, isolate_freepages_block() returns 0 if 612 * there are any holes in the block (ie. invalid PFNs or 613 * non-free pages). 614 */ 615 if (!isolated) 616 break; 617 618 /* 619 * If we managed to isolate pages, it is always (1 << n) * 620 * pageblock_nr_pages for some non-negative n. (Max order 621 * page may span two pageblocks). 622 */ 623 } 624 625 /* __isolate_free_page() does not map the pages */ 626 map_pages(&freelist); 627 628 if (pfn < end_pfn) { 629 /* Loop terminated early, cleanup. */ 630 release_freepages(&freelist); 631 return 0; 632 } 633 634 /* We don't use freelists for anything. */ 635 return pfn; 636 } 637 638 /* Similar to reclaim, but different enough that they don't share logic */ 639 static bool too_many_isolated(struct zone *zone) 640 { 641 unsigned long active, inactive, isolated; 642 643 inactive = node_page_state(zone->zone_pgdat, NR_INACTIVE_FILE) + 644 node_page_state(zone->zone_pgdat, NR_INACTIVE_ANON); 645 active = node_page_state(zone->zone_pgdat, NR_ACTIVE_FILE) + 646 node_page_state(zone->zone_pgdat, NR_ACTIVE_ANON); 647 isolated = node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE) + 648 node_page_state(zone->zone_pgdat, NR_ISOLATED_ANON); 649 650 return isolated > (inactive + active) / 2; 651 } 652 653 /** 654 * isolate_migratepages_block() - isolate all migrate-able pages within 655 * a single pageblock 656 * @cc: Compaction control structure. 657 * @low_pfn: The first PFN to isolate 658 * @end_pfn: The one-past-the-last PFN to isolate, within same pageblock 659 * @isolate_mode: Isolation mode to be used. 660 * 661 * Isolate all pages that can be migrated from the range specified by 662 * [low_pfn, end_pfn). The range is expected to be within same pageblock. 663 * Returns zero if there is a fatal signal pending, otherwise PFN of the 664 * first page that was not scanned (which may be both less, equal to or more 665 * than end_pfn). 666 * 667 * The pages are isolated on cc->migratepages list (not required to be empty), 668 * and cc->nr_migratepages is updated accordingly. The cc->migrate_pfn field 669 * is neither read nor updated. 670 */ 671 static unsigned long 672 isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, 673 unsigned long end_pfn, isolate_mode_t isolate_mode) 674 { 675 struct zone *zone = cc->zone; 676 unsigned long nr_scanned = 0, nr_isolated = 0; 677 struct lruvec *lruvec; 678 unsigned long flags = 0; 679 bool locked = false; 680 struct page *page = NULL, *valid_page = NULL; 681 unsigned long start_pfn = low_pfn; 682 bool skip_on_failure = false; 683 unsigned long next_skip_pfn = 0; 684 685 /* 686 * Ensure that there are not too many pages isolated from the LRU 687 * list by either parallel reclaimers or compaction. If there are, 688 * delay for some time until fewer pages are isolated 689 */ 690 while (unlikely(too_many_isolated(zone))) { 691 /* async migration should just abort */ 692 if (cc->mode == MIGRATE_ASYNC) 693 return 0; 694 695 congestion_wait(BLK_RW_ASYNC, HZ/10); 696 697 if (fatal_signal_pending(current)) 698 return 0; 699 } 700 701 if (compact_should_abort(cc)) 702 return 0; 703 704 if (cc->direct_compaction && (cc->mode == MIGRATE_ASYNC)) { 705 skip_on_failure = true; 706 next_skip_pfn = block_end_pfn(low_pfn, cc->order); 707 } 708 709 /* Time to isolate some pages for migration */ 710 for (; low_pfn < end_pfn; low_pfn++) { 711 712 if (skip_on_failure && low_pfn >= next_skip_pfn) { 713 /* 714 * We have isolated all migration candidates in the 715 * previous order-aligned block, and did not skip it due 716 * to failure. We should migrate the pages now and 717 * hopefully succeed compaction. 718 */ 719 if (nr_isolated) 720 break; 721 722 /* 723 * We failed to isolate in the previous order-aligned 724 * block. Set the new boundary to the end of the 725 * current block. Note we can't simply increase 726 * next_skip_pfn by 1 << order, as low_pfn might have 727 * been incremented by a higher number due to skipping 728 * a compound or a high-order buddy page in the 729 * previous loop iteration. 730 */ 731 next_skip_pfn = block_end_pfn(low_pfn, cc->order); 732 } 733 734 /* 735 * Periodically drop the lock (if held) regardless of its 736 * contention, to give chance to IRQs. Abort async compaction 737 * if contended. 738 */ 739 if (!(low_pfn % SWAP_CLUSTER_MAX) 740 && compact_unlock_should_abort(zone_lru_lock(zone), flags, 741 &locked, cc)) 742 break; 743 744 if (!pfn_valid_within(low_pfn)) 745 goto isolate_fail; 746 nr_scanned++; 747 748 page = pfn_to_page(low_pfn); 749 750 if (!valid_page) 751 valid_page = page; 752 753 /* 754 * Skip if free. We read page order here without zone lock 755 * which is generally unsafe, but the race window is small and 756 * the worst thing that can happen is that we skip some 757 * potential isolation targets. 758 */ 759 if (PageBuddy(page)) { 760 unsigned long freepage_order = page_order_unsafe(page); 761 762 /* 763 * Without lock, we cannot be sure that what we got is 764 * a valid page order. Consider only values in the 765 * valid order range to prevent low_pfn overflow. 766 */ 767 if (freepage_order > 0 && freepage_order < MAX_ORDER) 768 low_pfn += (1UL << freepage_order) - 1; 769 continue; 770 } 771 772 /* 773 * Regardless of being on LRU, compound pages such as THP and 774 * hugetlbfs are not to be compacted. We can potentially save 775 * a lot of iterations if we skip them at once. The check is 776 * racy, but we can consider only valid values and the only 777 * danger is skipping too much. 778 */ 779 if (PageCompound(page)) { 780 unsigned int comp_order = compound_order(page); 781 782 if (likely(comp_order < MAX_ORDER)) 783 low_pfn += (1UL << comp_order) - 1; 784 785 goto isolate_fail; 786 } 787 788 /* 789 * Check may be lockless but that's ok as we recheck later. 790 * It's possible to migrate LRU and non-lru movable pages. 791 * Skip any other type of page 792 */ 793 if (!PageLRU(page)) { 794 /* 795 * __PageMovable can return false positive so we need 796 * to verify it under page_lock. 797 */ 798 if (unlikely(__PageMovable(page)) && 799 !PageIsolated(page)) { 800 if (locked) { 801 spin_unlock_irqrestore(zone_lru_lock(zone), 802 flags); 803 locked = false; 804 } 805 806 if (!isolate_movable_page(page, isolate_mode)) 807 goto isolate_success; 808 } 809 810 goto isolate_fail; 811 } 812 813 /* 814 * Migration will fail if an anonymous page is pinned in memory, 815 * so avoid taking lru_lock and isolating it unnecessarily in an 816 * admittedly racy check. 817 */ 818 if (!page_mapping(page) && 819 page_count(page) > page_mapcount(page)) 820 goto isolate_fail; 821 822 /* 823 * Only allow to migrate anonymous pages in GFP_NOFS context 824 * because those do not depend on fs locks. 825 */ 826 if (!(cc->gfp_mask & __GFP_FS) && page_mapping(page)) 827 goto isolate_fail; 828 829 /* If we already hold the lock, we can skip some rechecking */ 830 if (!locked) { 831 locked = compact_trylock_irqsave(zone_lru_lock(zone), 832 &flags, cc); 833 if (!locked) 834 break; 835 836 /* Recheck PageLRU and PageCompound under lock */ 837 if (!PageLRU(page)) 838 goto isolate_fail; 839 840 /* 841 * Page become compound since the non-locked check, 842 * and it's on LRU. It can only be a THP so the order 843 * is safe to read and it's 0 for tail pages. 844 */ 845 if (unlikely(PageCompound(page))) { 846 low_pfn += (1UL << compound_order(page)) - 1; 847 goto isolate_fail; 848 } 849 } 850 851 lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat); 852 853 /* Try isolate the page */ 854 if (__isolate_lru_page(page, isolate_mode) != 0) 855 goto isolate_fail; 856 857 VM_BUG_ON_PAGE(PageCompound(page), page); 858 859 /* Successfully isolated */ 860 del_page_from_lru_list(page, lruvec, page_lru(page)); 861 inc_node_page_state(page, 862 NR_ISOLATED_ANON + page_is_file_cache(page)); 863 864 isolate_success: 865 list_add(&page->lru, &cc->migratepages); 866 cc->nr_migratepages++; 867 nr_isolated++; 868 869 /* 870 * Record where we could have freed pages by migration and not 871 * yet flushed them to buddy allocator. 872 * - this is the lowest page that was isolated and likely be 873 * then freed by migration. 874 */ 875 if (!cc->last_migrated_pfn) 876 cc->last_migrated_pfn = low_pfn; 877 878 /* Avoid isolating too much */ 879 if (cc->nr_migratepages == COMPACT_CLUSTER_MAX) { 880 ++low_pfn; 881 break; 882 } 883 884 continue; 885 isolate_fail: 886 if (!skip_on_failure) 887 continue; 888 889 /* 890 * We have isolated some pages, but then failed. Release them 891 * instead of migrating, as we cannot form the cc->order buddy 892 * page anyway. 893 */ 894 if (nr_isolated) { 895 if (locked) { 896 spin_unlock_irqrestore(zone_lru_lock(zone), flags); 897 locked = false; 898 } 899 putback_movable_pages(&cc->migratepages); 900 cc->nr_migratepages = 0; 901 cc->last_migrated_pfn = 0; 902 nr_isolated = 0; 903 } 904 905 if (low_pfn < next_skip_pfn) { 906 low_pfn = next_skip_pfn - 1; 907 /* 908 * The check near the loop beginning would have updated 909 * next_skip_pfn too, but this is a bit simpler. 910 */ 911 next_skip_pfn += 1UL << cc->order; 912 } 913 } 914 915 /* 916 * The PageBuddy() check could have potentially brought us outside 917 * the range to be scanned. 918 */ 919 if (unlikely(low_pfn > end_pfn)) 920 low_pfn = end_pfn; 921 922 if (locked) 923 spin_unlock_irqrestore(zone_lru_lock(zone), flags); 924 925 /* 926 * Update the pageblock-skip information and cached scanner pfn, 927 * if the whole pageblock was scanned without isolating any page. 928 */ 929 if (low_pfn == end_pfn) 930 update_pageblock_skip(cc, valid_page, nr_isolated, true); 931 932 trace_mm_compaction_isolate_migratepages(start_pfn, low_pfn, 933 nr_scanned, nr_isolated); 934 935 cc->total_migrate_scanned += nr_scanned; 936 if (nr_isolated) 937 count_compact_events(COMPACTISOLATED, nr_isolated); 938 939 return low_pfn; 940 } 941 942 /** 943 * isolate_migratepages_range() - isolate migrate-able pages in a PFN range 944 * @cc: Compaction control structure. 945 * @start_pfn: The first PFN to start isolating. 946 * @end_pfn: The one-past-last PFN. 947 * 948 * Returns zero if isolation fails fatally due to e.g. pending signal. 949 * Otherwise, function returns one-past-the-last PFN of isolated page 950 * (which may be greater than end_pfn if end fell in a middle of a THP page). 951 */ 952 unsigned long 953 isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn, 954 unsigned long end_pfn) 955 { 956 unsigned long pfn, block_start_pfn, block_end_pfn; 957 958 /* Scan block by block. First and last block may be incomplete */ 959 pfn = start_pfn; 960 block_start_pfn = pageblock_start_pfn(pfn); 961 if (block_start_pfn < cc->zone->zone_start_pfn) 962 block_start_pfn = cc->zone->zone_start_pfn; 963 block_end_pfn = pageblock_end_pfn(pfn); 964 965 for (; pfn < end_pfn; pfn = block_end_pfn, 966 block_start_pfn = block_end_pfn, 967 block_end_pfn += pageblock_nr_pages) { 968 969 block_end_pfn = min(block_end_pfn, end_pfn); 970 971 if (!pageblock_pfn_to_page(block_start_pfn, 972 block_end_pfn, cc->zone)) 973 continue; 974 975 pfn = isolate_migratepages_block(cc, pfn, block_end_pfn, 976 ISOLATE_UNEVICTABLE); 977 978 if (!pfn) 979 break; 980 981 if (cc->nr_migratepages == COMPACT_CLUSTER_MAX) 982 break; 983 } 984 985 return pfn; 986 } 987 988 #endif /* CONFIG_COMPACTION || CONFIG_CMA */ 989 #ifdef CONFIG_COMPACTION 990 991 /* Returns true if the page is within a block suitable for migration to */ 992 static bool suitable_migration_target(struct compact_control *cc, 993 struct page *page) 994 { 995 if (cc->ignore_block_suitable) 996 return true; 997 998 /* If the page is a large free page, then disallow migration */ 999 if (PageBuddy(page)) { 1000 /* 1001 * We are checking page_order without zone->lock taken. But 1002 * the only small danger is that we skip a potentially suitable 1003 * pageblock, so it's not worth to check order for valid range. 1004 */ 1005 if (page_order_unsafe(page) >= pageblock_order) 1006 return false; 1007 } 1008 1009 /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */ 1010 if (migrate_async_suitable(get_pageblock_migratetype(page))) 1011 return true; 1012 1013 /* Otherwise skip the block */ 1014 return false; 1015 } 1016 1017 /* 1018 * Test whether the free scanner has reached the same or lower pageblock than 1019 * the migration scanner, and compaction should thus terminate. 1020 */ 1021 static inline bool compact_scanners_met(struct compact_control *cc) 1022 { 1023 return (cc->free_pfn >> pageblock_order) 1024 <= (cc->migrate_pfn >> pageblock_order); 1025 } 1026 1027 /* 1028 * Based on information in the current compact_control, find blocks 1029 * suitable for isolating free pages from and then isolate them. 1030 */ 1031 static void isolate_freepages(struct compact_control *cc) 1032 { 1033 struct zone *zone = cc->zone; 1034 struct page *page; 1035 unsigned long block_start_pfn; /* start of current pageblock */ 1036 unsigned long isolate_start_pfn; /* exact pfn we start at */ 1037 unsigned long block_end_pfn; /* end of current pageblock */ 1038 unsigned long low_pfn; /* lowest pfn scanner is able to scan */ 1039 struct list_head *freelist = &cc->freepages; 1040 1041 /* 1042 * Initialise the free scanner. The starting point is where we last 1043 * successfully isolated from, zone-cached value, or the end of the 1044 * zone when isolating for the first time. For looping we also need 1045 * this pfn aligned down to the pageblock boundary, because we do 1046 * block_start_pfn -= pageblock_nr_pages in the for loop. 1047 * For ending point, take care when isolating in last pageblock of a 1048 * a zone which ends in the middle of a pageblock. 1049 * The low boundary is the end of the pageblock the migration scanner 1050 * is using. 1051 */ 1052 isolate_start_pfn = cc->free_pfn; 1053 block_start_pfn = pageblock_start_pfn(cc->free_pfn); 1054 block_end_pfn = min(block_start_pfn + pageblock_nr_pages, 1055 zone_end_pfn(zone)); 1056 low_pfn = pageblock_end_pfn(cc->migrate_pfn); 1057 1058 /* 1059 * Isolate free pages until enough are available to migrate the 1060 * pages on cc->migratepages. We stop searching if the migrate 1061 * and free page scanners meet or enough free pages are isolated. 1062 */ 1063 for (; block_start_pfn >= low_pfn; 1064 block_end_pfn = block_start_pfn, 1065 block_start_pfn -= pageblock_nr_pages, 1066 isolate_start_pfn = block_start_pfn) { 1067 /* 1068 * This can iterate a massively long zone without finding any 1069 * suitable migration targets, so periodically check if we need 1070 * to schedule, or even abort async compaction. 1071 */ 1072 if (!(block_start_pfn % (SWAP_CLUSTER_MAX * pageblock_nr_pages)) 1073 && compact_should_abort(cc)) 1074 break; 1075 1076 page = pageblock_pfn_to_page(block_start_pfn, block_end_pfn, 1077 zone); 1078 if (!page) 1079 continue; 1080 1081 /* Check the block is suitable for migration */ 1082 if (!suitable_migration_target(cc, page)) 1083 continue; 1084 1085 /* If isolation recently failed, do not retry */ 1086 if (!isolation_suitable(cc, page)) 1087 continue; 1088 1089 /* Found a block suitable for isolating free pages from. */ 1090 isolate_freepages_block(cc, &isolate_start_pfn, block_end_pfn, 1091 freelist, false); 1092 1093 /* 1094 * If we isolated enough freepages, or aborted due to lock 1095 * contention, terminate. 1096 */ 1097 if ((cc->nr_freepages >= cc->nr_migratepages) 1098 || cc->contended) { 1099 if (isolate_start_pfn >= block_end_pfn) { 1100 /* 1101 * Restart at previous pageblock if more 1102 * freepages can be isolated next time. 1103 */ 1104 isolate_start_pfn = 1105 block_start_pfn - pageblock_nr_pages; 1106 } 1107 break; 1108 } else if (isolate_start_pfn < block_end_pfn) { 1109 /* 1110 * If isolation failed early, do not continue 1111 * needlessly. 1112 */ 1113 break; 1114 } 1115 } 1116 1117 /* __isolate_free_page() does not map the pages */ 1118 map_pages(freelist); 1119 1120 /* 1121 * Record where the free scanner will restart next time. Either we 1122 * broke from the loop and set isolate_start_pfn based on the last 1123 * call to isolate_freepages_block(), or we met the migration scanner 1124 * and the loop terminated due to isolate_start_pfn < low_pfn 1125 */ 1126 cc->free_pfn = isolate_start_pfn; 1127 } 1128 1129 /* 1130 * This is a migrate-callback that "allocates" freepages by taking pages 1131 * from the isolated freelists in the block we are migrating to. 1132 */ 1133 static struct page *compaction_alloc(struct page *migratepage, 1134 unsigned long data, 1135 int **result) 1136 { 1137 struct compact_control *cc = (struct compact_control *)data; 1138 struct page *freepage; 1139 1140 /* 1141 * Isolate free pages if necessary, and if we are not aborting due to 1142 * contention. 1143 */ 1144 if (list_empty(&cc->freepages)) { 1145 if (!cc->contended) 1146 isolate_freepages(cc); 1147 1148 if (list_empty(&cc->freepages)) 1149 return NULL; 1150 } 1151 1152 freepage = list_entry(cc->freepages.next, struct page, lru); 1153 list_del(&freepage->lru); 1154 cc->nr_freepages--; 1155 1156 return freepage; 1157 } 1158 1159 /* 1160 * This is a migrate-callback that "frees" freepages back to the isolated 1161 * freelist. All pages on the freelist are from the same zone, so there is no 1162 * special handling needed for NUMA. 1163 */ 1164 static void compaction_free(struct page *page, unsigned long data) 1165 { 1166 struct compact_control *cc = (struct compact_control *)data; 1167 1168 list_add(&page->lru, &cc->freepages); 1169 cc->nr_freepages++; 1170 } 1171 1172 /* possible outcome of isolate_migratepages */ 1173 typedef enum { 1174 ISOLATE_ABORT, /* Abort compaction now */ 1175 ISOLATE_NONE, /* No pages isolated, continue scanning */ 1176 ISOLATE_SUCCESS, /* Pages isolated, migrate */ 1177 } isolate_migrate_t; 1178 1179 /* 1180 * Allow userspace to control policy on scanning the unevictable LRU for 1181 * compactable pages. 1182 */ 1183 int sysctl_compact_unevictable_allowed __read_mostly = 1; 1184 1185 /* 1186 * Isolate all pages that can be migrated from the first suitable block, 1187 * starting at the block pointed to by the migrate scanner pfn within 1188 * compact_control. 1189 */ 1190 static isolate_migrate_t isolate_migratepages(struct zone *zone, 1191 struct compact_control *cc) 1192 { 1193 unsigned long block_start_pfn; 1194 unsigned long block_end_pfn; 1195 unsigned long low_pfn; 1196 struct page *page; 1197 const isolate_mode_t isolate_mode = 1198 (sysctl_compact_unevictable_allowed ? ISOLATE_UNEVICTABLE : 0) | 1199 (cc->mode != MIGRATE_SYNC ? ISOLATE_ASYNC_MIGRATE : 0); 1200 1201 /* 1202 * Start at where we last stopped, or beginning of the zone as 1203 * initialized by compact_zone() 1204 */ 1205 low_pfn = cc->migrate_pfn; 1206 block_start_pfn = pageblock_start_pfn(low_pfn); 1207 if (block_start_pfn < zone->zone_start_pfn) 1208 block_start_pfn = zone->zone_start_pfn; 1209 1210 /* Only scan within a pageblock boundary */ 1211 block_end_pfn = pageblock_end_pfn(low_pfn); 1212 1213 /* 1214 * Iterate over whole pageblocks until we find the first suitable. 1215 * Do not cross the free scanner. 1216 */ 1217 for (; block_end_pfn <= cc->free_pfn; 1218 low_pfn = block_end_pfn, 1219 block_start_pfn = block_end_pfn, 1220 block_end_pfn += pageblock_nr_pages) { 1221 1222 /* 1223 * This can potentially iterate a massively long zone with 1224 * many pageblocks unsuitable, so periodically check if we 1225 * need to schedule, or even abort async compaction. 1226 */ 1227 if (!(low_pfn % (SWAP_CLUSTER_MAX * pageblock_nr_pages)) 1228 && compact_should_abort(cc)) 1229 break; 1230 1231 page = pageblock_pfn_to_page(block_start_pfn, block_end_pfn, 1232 zone); 1233 if (!page) 1234 continue; 1235 1236 /* If isolation recently failed, do not retry */ 1237 if (!isolation_suitable(cc, page)) 1238 continue; 1239 1240 /* 1241 * For async compaction, also only scan in MOVABLE blocks. 1242 * Async compaction is optimistic to see if the minimum amount 1243 * of work satisfies the allocation. 1244 */ 1245 if (cc->mode == MIGRATE_ASYNC && 1246 !migrate_async_suitable(get_pageblock_migratetype(page))) 1247 continue; 1248 1249 /* Perform the isolation */ 1250 low_pfn = isolate_migratepages_block(cc, low_pfn, 1251 block_end_pfn, isolate_mode); 1252 1253 if (!low_pfn || cc->contended) 1254 return ISOLATE_ABORT; 1255 1256 /* 1257 * Either we isolated something and proceed with migration. Or 1258 * we failed and compact_zone should decide if we should 1259 * continue or not. 1260 */ 1261 break; 1262 } 1263 1264 /* Record where migration scanner will be restarted. */ 1265 cc->migrate_pfn = low_pfn; 1266 1267 return cc->nr_migratepages ? ISOLATE_SUCCESS : ISOLATE_NONE; 1268 } 1269 1270 /* 1271 * order == -1 is expected when compacting via 1272 * /proc/sys/vm/compact_memory 1273 */ 1274 static inline bool is_via_compact_memory(int order) 1275 { 1276 return order == -1; 1277 } 1278 1279 static enum compact_result __compact_finished(struct zone *zone, struct compact_control *cc, 1280 const int migratetype) 1281 { 1282 unsigned int order; 1283 unsigned long watermark; 1284 1285 if (cc->contended || fatal_signal_pending(current)) 1286 return COMPACT_CONTENDED; 1287 1288 /* Compaction run completes if the migrate and free scanner meet */ 1289 if (compact_scanners_met(cc)) { 1290 /* Let the next compaction start anew. */ 1291 reset_cached_positions(zone); 1292 1293 /* 1294 * Mark that the PG_migrate_skip information should be cleared 1295 * by kswapd when it goes to sleep. kcompactd does not set the 1296 * flag itself as the decision to be clear should be directly 1297 * based on an allocation request. 1298 */ 1299 if (cc->direct_compaction) 1300 zone->compact_blockskip_flush = true; 1301 1302 if (cc->whole_zone) 1303 return COMPACT_COMPLETE; 1304 else 1305 return COMPACT_PARTIAL_SKIPPED; 1306 } 1307 1308 if (is_via_compact_memory(cc->order)) 1309 return COMPACT_CONTINUE; 1310 1311 /* Compaction run is not finished if the watermark is not met */ 1312 watermark = zone->watermark[cc->alloc_flags & ALLOC_WMARK_MASK]; 1313 1314 if (!zone_watermark_ok(zone, cc->order, watermark, cc->classzone_idx, 1315 cc->alloc_flags)) 1316 return COMPACT_CONTINUE; 1317 1318 /* Direct compactor: Is a suitable page free? */ 1319 for (order = cc->order; order < MAX_ORDER; order++) { 1320 struct free_area *area = &zone->free_area[order]; 1321 bool can_steal; 1322 1323 /* Job done if page is free of the right migratetype */ 1324 if (!list_empty(&area->free_list[migratetype])) 1325 return COMPACT_SUCCESS; 1326 1327 #ifdef CONFIG_CMA 1328 /* MIGRATE_MOVABLE can fallback on MIGRATE_CMA */ 1329 if (migratetype == MIGRATE_MOVABLE && 1330 !list_empty(&area->free_list[MIGRATE_CMA])) 1331 return COMPACT_SUCCESS; 1332 #endif 1333 /* 1334 * Job done if allocation would steal freepages from 1335 * other migratetype buddy lists. 1336 */ 1337 if (find_suitable_fallback(area, order, migratetype, 1338 true, &can_steal) != -1) 1339 return COMPACT_SUCCESS; 1340 } 1341 1342 return COMPACT_NO_SUITABLE_PAGE; 1343 } 1344 1345 static enum compact_result compact_finished(struct zone *zone, 1346 struct compact_control *cc, 1347 const int migratetype) 1348 { 1349 int ret; 1350 1351 ret = __compact_finished(zone, cc, migratetype); 1352 trace_mm_compaction_finished(zone, cc->order, ret); 1353 if (ret == COMPACT_NO_SUITABLE_PAGE) 1354 ret = COMPACT_CONTINUE; 1355 1356 return ret; 1357 } 1358 1359 /* 1360 * compaction_suitable: Is this suitable to run compaction on this zone now? 1361 * Returns 1362 * COMPACT_SKIPPED - If there are too few free pages for compaction 1363 * COMPACT_SUCCESS - If the allocation would succeed without compaction 1364 * COMPACT_CONTINUE - If compaction should run now 1365 */ 1366 static enum compact_result __compaction_suitable(struct zone *zone, int order, 1367 unsigned int alloc_flags, 1368 int classzone_idx, 1369 unsigned long wmark_target) 1370 { 1371 unsigned long watermark; 1372 1373 if (is_via_compact_memory(order)) 1374 return COMPACT_CONTINUE; 1375 1376 watermark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK]; 1377 /* 1378 * If watermarks for high-order allocation are already met, there 1379 * should be no need for compaction at all. 1380 */ 1381 if (zone_watermark_ok(zone, order, watermark, classzone_idx, 1382 alloc_flags)) 1383 return COMPACT_SUCCESS; 1384 1385 /* 1386 * Watermarks for order-0 must be met for compaction to be able to 1387 * isolate free pages for migration targets. This means that the 1388 * watermark and alloc_flags have to match, or be more pessimistic than 1389 * the check in __isolate_free_page(). We don't use the direct 1390 * compactor's alloc_flags, as they are not relevant for freepage 1391 * isolation. We however do use the direct compactor's classzone_idx to 1392 * skip over zones where lowmem reserves would prevent allocation even 1393 * if compaction succeeds. 1394 * For costly orders, we require low watermark instead of min for 1395 * compaction to proceed to increase its chances. 1396 * ALLOC_CMA is used, as pages in CMA pageblocks are considered 1397 * suitable migration targets 1398 */ 1399 watermark = (order > PAGE_ALLOC_COSTLY_ORDER) ? 1400 low_wmark_pages(zone) : min_wmark_pages(zone); 1401 watermark += compact_gap(order); 1402 if (!__zone_watermark_ok(zone, 0, watermark, classzone_idx, 1403 ALLOC_CMA, wmark_target)) 1404 return COMPACT_SKIPPED; 1405 1406 return COMPACT_CONTINUE; 1407 } 1408 1409 enum compact_result compaction_suitable(struct zone *zone, int order, 1410 unsigned int alloc_flags, 1411 int classzone_idx) 1412 { 1413 enum compact_result ret; 1414 int fragindex; 1415 1416 ret = __compaction_suitable(zone, order, alloc_flags, classzone_idx, 1417 zone_page_state(zone, NR_FREE_PAGES)); 1418 /* 1419 * fragmentation index determines if allocation failures are due to 1420 * low memory or external fragmentation 1421 * 1422 * index of -1000 would imply allocations might succeed depending on 1423 * watermarks, but we already failed the high-order watermark check 1424 * index towards 0 implies failure is due to lack of memory 1425 * index towards 1000 implies failure is due to fragmentation 1426 * 1427 * Only compact if a failure would be due to fragmentation. Also 1428 * ignore fragindex for non-costly orders where the alternative to 1429 * a successful reclaim/compaction is OOM. Fragindex and the 1430 * vm.extfrag_threshold sysctl is meant as a heuristic to prevent 1431 * excessive compaction for costly orders, but it should not be at the 1432 * expense of system stability. 1433 */ 1434 if (ret == COMPACT_CONTINUE && (order > PAGE_ALLOC_COSTLY_ORDER)) { 1435 fragindex = fragmentation_index(zone, order); 1436 if (fragindex >= 0 && fragindex <= sysctl_extfrag_threshold) 1437 ret = COMPACT_NOT_SUITABLE_ZONE; 1438 } 1439 1440 trace_mm_compaction_suitable(zone, order, ret); 1441 if (ret == COMPACT_NOT_SUITABLE_ZONE) 1442 ret = COMPACT_SKIPPED; 1443 1444 return ret; 1445 } 1446 1447 bool compaction_zonelist_suitable(struct alloc_context *ac, int order, 1448 int alloc_flags) 1449 { 1450 struct zone *zone; 1451 struct zoneref *z; 1452 1453 /* 1454 * Make sure at least one zone would pass __compaction_suitable if we continue 1455 * retrying the reclaim. 1456 */ 1457 for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx, 1458 ac->nodemask) { 1459 unsigned long available; 1460 enum compact_result compact_result; 1461 1462 /* 1463 * Do not consider all the reclaimable memory because we do not 1464 * want to trash just for a single high order allocation which 1465 * is even not guaranteed to appear even if __compaction_suitable 1466 * is happy about the watermark check. 1467 */ 1468 available = zone_reclaimable_pages(zone) / order; 1469 available += zone_page_state_snapshot(zone, NR_FREE_PAGES); 1470 compact_result = __compaction_suitable(zone, order, alloc_flags, 1471 ac_classzone_idx(ac), available); 1472 if (compact_result != COMPACT_SKIPPED) 1473 return true; 1474 } 1475 1476 return false; 1477 } 1478 1479 static enum compact_result compact_zone(struct zone *zone, struct compact_control *cc) 1480 { 1481 enum compact_result ret; 1482 unsigned long start_pfn = zone->zone_start_pfn; 1483 unsigned long end_pfn = zone_end_pfn(zone); 1484 const int migratetype = gfpflags_to_migratetype(cc->gfp_mask); 1485 const bool sync = cc->mode != MIGRATE_ASYNC; 1486 1487 ret = compaction_suitable(zone, cc->order, cc->alloc_flags, 1488 cc->classzone_idx); 1489 /* Compaction is likely to fail */ 1490 if (ret == COMPACT_SUCCESS || ret == COMPACT_SKIPPED) 1491 return ret; 1492 1493 /* huh, compaction_suitable is returning something unexpected */ 1494 VM_BUG_ON(ret != COMPACT_CONTINUE); 1495 1496 /* 1497 * Clear pageblock skip if there were failures recently and compaction 1498 * is about to be retried after being deferred. 1499 */ 1500 if (compaction_restarting(zone, cc->order)) 1501 __reset_isolation_suitable(zone); 1502 1503 /* 1504 * Setup to move all movable pages to the end of the zone. Used cached 1505 * information on where the scanners should start (unless we explicitly 1506 * want to compact the whole zone), but check that it is initialised 1507 * by ensuring the values are within zone boundaries. 1508 */ 1509 if (cc->whole_zone) { 1510 cc->migrate_pfn = start_pfn; 1511 cc->free_pfn = pageblock_start_pfn(end_pfn - 1); 1512 } else { 1513 cc->migrate_pfn = zone->compact_cached_migrate_pfn[sync]; 1514 cc->free_pfn = zone->compact_cached_free_pfn; 1515 if (cc->free_pfn < start_pfn || cc->free_pfn >= end_pfn) { 1516 cc->free_pfn = pageblock_start_pfn(end_pfn - 1); 1517 zone->compact_cached_free_pfn = cc->free_pfn; 1518 } 1519 if (cc->migrate_pfn < start_pfn || cc->migrate_pfn >= end_pfn) { 1520 cc->migrate_pfn = start_pfn; 1521 zone->compact_cached_migrate_pfn[0] = cc->migrate_pfn; 1522 zone->compact_cached_migrate_pfn[1] = cc->migrate_pfn; 1523 } 1524 1525 if (cc->migrate_pfn == start_pfn) 1526 cc->whole_zone = true; 1527 } 1528 1529 cc->last_migrated_pfn = 0; 1530 1531 trace_mm_compaction_begin(start_pfn, cc->migrate_pfn, 1532 cc->free_pfn, end_pfn, sync); 1533 1534 migrate_prep_local(); 1535 1536 while ((ret = compact_finished(zone, cc, migratetype)) == 1537 COMPACT_CONTINUE) { 1538 int err; 1539 1540 switch (isolate_migratepages(zone, cc)) { 1541 case ISOLATE_ABORT: 1542 ret = COMPACT_CONTENDED; 1543 putback_movable_pages(&cc->migratepages); 1544 cc->nr_migratepages = 0; 1545 goto out; 1546 case ISOLATE_NONE: 1547 /* 1548 * We haven't isolated and migrated anything, but 1549 * there might still be unflushed migrations from 1550 * previous cc->order aligned block. 1551 */ 1552 goto check_drain; 1553 case ISOLATE_SUCCESS: 1554 ; 1555 } 1556 1557 err = migrate_pages(&cc->migratepages, compaction_alloc, 1558 compaction_free, (unsigned long)cc, cc->mode, 1559 MR_COMPACTION); 1560 1561 trace_mm_compaction_migratepages(cc->nr_migratepages, err, 1562 &cc->migratepages); 1563 1564 /* All pages were either migrated or will be released */ 1565 cc->nr_migratepages = 0; 1566 if (err) { 1567 putback_movable_pages(&cc->migratepages); 1568 /* 1569 * migrate_pages() may return -ENOMEM when scanners meet 1570 * and we want compact_finished() to detect it 1571 */ 1572 if (err == -ENOMEM && !compact_scanners_met(cc)) { 1573 ret = COMPACT_CONTENDED; 1574 goto out; 1575 } 1576 /* 1577 * We failed to migrate at least one page in the current 1578 * order-aligned block, so skip the rest of it. 1579 */ 1580 if (cc->direct_compaction && 1581 (cc->mode == MIGRATE_ASYNC)) { 1582 cc->migrate_pfn = block_end_pfn( 1583 cc->migrate_pfn - 1, cc->order); 1584 /* Draining pcplists is useless in this case */ 1585 cc->last_migrated_pfn = 0; 1586 1587 } 1588 } 1589 1590 check_drain: 1591 /* 1592 * Has the migration scanner moved away from the previous 1593 * cc->order aligned block where we migrated from? If yes, 1594 * flush the pages that were freed, so that they can merge and 1595 * compact_finished() can detect immediately if allocation 1596 * would succeed. 1597 */ 1598 if (cc->order > 0 && cc->last_migrated_pfn) { 1599 int cpu; 1600 unsigned long current_block_start = 1601 block_start_pfn(cc->migrate_pfn, cc->order); 1602 1603 if (cc->last_migrated_pfn < current_block_start) { 1604 cpu = get_cpu(); 1605 lru_add_drain_cpu(cpu); 1606 drain_local_pages(zone); 1607 put_cpu(); 1608 /* No more flushing until we migrate again */ 1609 cc->last_migrated_pfn = 0; 1610 } 1611 } 1612 1613 } 1614 1615 out: 1616 /* 1617 * Release free pages and update where the free scanner should restart, 1618 * so we don't leave any returned pages behind in the next attempt. 1619 */ 1620 if (cc->nr_freepages > 0) { 1621 unsigned long free_pfn = release_freepages(&cc->freepages); 1622 1623 cc->nr_freepages = 0; 1624 VM_BUG_ON(free_pfn == 0); 1625 /* The cached pfn is always the first in a pageblock */ 1626 free_pfn = pageblock_start_pfn(free_pfn); 1627 /* 1628 * Only go back, not forward. The cached pfn might have been 1629 * already reset to zone end in compact_finished() 1630 */ 1631 if (free_pfn > zone->compact_cached_free_pfn) 1632 zone->compact_cached_free_pfn = free_pfn; 1633 } 1634 1635 count_compact_events(COMPACTMIGRATE_SCANNED, cc->total_migrate_scanned); 1636 count_compact_events(COMPACTFREE_SCANNED, cc->total_free_scanned); 1637 1638 trace_mm_compaction_end(start_pfn, cc->migrate_pfn, 1639 cc->free_pfn, end_pfn, sync, ret); 1640 1641 return ret; 1642 } 1643 1644 static enum compact_result compact_zone_order(struct zone *zone, int order, 1645 gfp_t gfp_mask, enum compact_priority prio, 1646 unsigned int alloc_flags, int classzone_idx) 1647 { 1648 enum compact_result ret; 1649 struct compact_control cc = { 1650 .nr_freepages = 0, 1651 .nr_migratepages = 0, 1652 .total_migrate_scanned = 0, 1653 .total_free_scanned = 0, 1654 .order = order, 1655 .gfp_mask = gfp_mask, 1656 .zone = zone, 1657 .mode = (prio == COMPACT_PRIO_ASYNC) ? 1658 MIGRATE_ASYNC : MIGRATE_SYNC_LIGHT, 1659 .alloc_flags = alloc_flags, 1660 .classzone_idx = classzone_idx, 1661 .direct_compaction = true, 1662 .whole_zone = (prio == MIN_COMPACT_PRIORITY), 1663 .ignore_skip_hint = (prio == MIN_COMPACT_PRIORITY), 1664 .ignore_block_suitable = (prio == MIN_COMPACT_PRIORITY) 1665 }; 1666 INIT_LIST_HEAD(&cc.freepages); 1667 INIT_LIST_HEAD(&cc.migratepages); 1668 1669 ret = compact_zone(zone, &cc); 1670 1671 VM_BUG_ON(!list_empty(&cc.freepages)); 1672 VM_BUG_ON(!list_empty(&cc.migratepages)); 1673 1674 return ret; 1675 } 1676 1677 int sysctl_extfrag_threshold = 500; 1678 1679 /** 1680 * try_to_compact_pages - Direct compact to satisfy a high-order allocation 1681 * @gfp_mask: The GFP mask of the current allocation 1682 * @order: The order of the current allocation 1683 * @alloc_flags: The allocation flags of the current allocation 1684 * @ac: The context of current allocation 1685 * @mode: The migration mode for async, sync light, or sync migration 1686 * 1687 * This is the main entry point for direct page compaction. 1688 */ 1689 enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order, 1690 unsigned int alloc_flags, const struct alloc_context *ac, 1691 enum compact_priority prio) 1692 { 1693 int may_perform_io = gfp_mask & __GFP_IO; 1694 struct zoneref *z; 1695 struct zone *zone; 1696 enum compact_result rc = COMPACT_SKIPPED; 1697 1698 /* 1699 * Check if the GFP flags allow compaction - GFP_NOIO is really 1700 * tricky context because the migration might require IO 1701 */ 1702 if (!may_perform_io) 1703 return COMPACT_SKIPPED; 1704 1705 trace_mm_compaction_try_to_compact_pages(order, gfp_mask, prio); 1706 1707 /* Compact each zone in the list */ 1708 for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx, 1709 ac->nodemask) { 1710 enum compact_result status; 1711 1712 if (prio > MIN_COMPACT_PRIORITY 1713 && compaction_deferred(zone, order)) { 1714 rc = max_t(enum compact_result, COMPACT_DEFERRED, rc); 1715 continue; 1716 } 1717 1718 status = compact_zone_order(zone, order, gfp_mask, prio, 1719 alloc_flags, ac_classzone_idx(ac)); 1720 rc = max(status, rc); 1721 1722 /* The allocation should succeed, stop compacting */ 1723 if (status == COMPACT_SUCCESS) { 1724 /* 1725 * We think the allocation will succeed in this zone, 1726 * but it is not certain, hence the false. The caller 1727 * will repeat this with true if allocation indeed 1728 * succeeds in this zone. 1729 */ 1730 compaction_defer_reset(zone, order, false); 1731 1732 break; 1733 } 1734 1735 if (prio != COMPACT_PRIO_ASYNC && (status == COMPACT_COMPLETE || 1736 status == COMPACT_PARTIAL_SKIPPED)) 1737 /* 1738 * We think that allocation won't succeed in this zone 1739 * so we defer compaction there. If it ends up 1740 * succeeding after all, it will be reset. 1741 */ 1742 defer_compaction(zone, order); 1743 1744 /* 1745 * We might have stopped compacting due to need_resched() in 1746 * async compaction, or due to a fatal signal detected. In that 1747 * case do not try further zones 1748 */ 1749 if ((prio == COMPACT_PRIO_ASYNC && need_resched()) 1750 || fatal_signal_pending(current)) 1751 break; 1752 } 1753 1754 return rc; 1755 } 1756 1757 1758 /* Compact all zones within a node */ 1759 static void compact_node(int nid) 1760 { 1761 pg_data_t *pgdat = NODE_DATA(nid); 1762 int zoneid; 1763 struct zone *zone; 1764 struct compact_control cc = { 1765 .order = -1, 1766 .total_migrate_scanned = 0, 1767 .total_free_scanned = 0, 1768 .mode = MIGRATE_SYNC, 1769 .ignore_skip_hint = true, 1770 .whole_zone = true, 1771 .gfp_mask = GFP_KERNEL, 1772 }; 1773 1774 1775 for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) { 1776 1777 zone = &pgdat->node_zones[zoneid]; 1778 if (!populated_zone(zone)) 1779 continue; 1780 1781 cc.nr_freepages = 0; 1782 cc.nr_migratepages = 0; 1783 cc.zone = zone; 1784 INIT_LIST_HEAD(&cc.freepages); 1785 INIT_LIST_HEAD(&cc.migratepages); 1786 1787 compact_zone(zone, &cc); 1788 1789 VM_BUG_ON(!list_empty(&cc.freepages)); 1790 VM_BUG_ON(!list_empty(&cc.migratepages)); 1791 } 1792 } 1793 1794 /* Compact all nodes in the system */ 1795 static void compact_nodes(void) 1796 { 1797 int nid; 1798 1799 /* Flush pending updates to the LRU lists */ 1800 lru_add_drain_all(); 1801 1802 for_each_online_node(nid) 1803 compact_node(nid); 1804 } 1805 1806 /* The written value is actually unused, all memory is compacted */ 1807 int sysctl_compact_memory; 1808 1809 /* 1810 * This is the entry point for compacting all nodes via 1811 * /proc/sys/vm/compact_memory 1812 */ 1813 int sysctl_compaction_handler(struct ctl_table *table, int write, 1814 void __user *buffer, size_t *length, loff_t *ppos) 1815 { 1816 if (write) 1817 compact_nodes(); 1818 1819 return 0; 1820 } 1821 1822 int sysctl_extfrag_handler(struct ctl_table *table, int write, 1823 void __user *buffer, size_t *length, loff_t *ppos) 1824 { 1825 proc_dointvec_minmax(table, write, buffer, length, ppos); 1826 1827 return 0; 1828 } 1829 1830 #if defined(CONFIG_SYSFS) && defined(CONFIG_NUMA) 1831 static ssize_t sysfs_compact_node(struct device *dev, 1832 struct device_attribute *attr, 1833 const char *buf, size_t count) 1834 { 1835 int nid = dev->id; 1836 1837 if (nid >= 0 && nid < nr_node_ids && node_online(nid)) { 1838 /* Flush pending updates to the LRU lists */ 1839 lru_add_drain_all(); 1840 1841 compact_node(nid); 1842 } 1843 1844 return count; 1845 } 1846 static DEVICE_ATTR(compact, S_IWUSR, NULL, sysfs_compact_node); 1847 1848 int compaction_register_node(struct node *node) 1849 { 1850 return device_create_file(&node->dev, &dev_attr_compact); 1851 } 1852 1853 void compaction_unregister_node(struct node *node) 1854 { 1855 return device_remove_file(&node->dev, &dev_attr_compact); 1856 } 1857 #endif /* CONFIG_SYSFS && CONFIG_NUMA */ 1858 1859 static inline bool kcompactd_work_requested(pg_data_t *pgdat) 1860 { 1861 return pgdat->kcompactd_max_order > 0 || kthread_should_stop(); 1862 } 1863 1864 static bool kcompactd_node_suitable(pg_data_t *pgdat) 1865 { 1866 int zoneid; 1867 struct zone *zone; 1868 enum zone_type classzone_idx = pgdat->kcompactd_classzone_idx; 1869 1870 for (zoneid = 0; zoneid <= classzone_idx; zoneid++) { 1871 zone = &pgdat->node_zones[zoneid]; 1872 1873 if (!populated_zone(zone)) 1874 continue; 1875 1876 if (compaction_suitable(zone, pgdat->kcompactd_max_order, 0, 1877 classzone_idx) == COMPACT_CONTINUE) 1878 return true; 1879 } 1880 1881 return false; 1882 } 1883 1884 static void kcompactd_do_work(pg_data_t *pgdat) 1885 { 1886 /* 1887 * With no special task, compact all zones so that a page of requested 1888 * order is allocatable. 1889 */ 1890 int zoneid; 1891 struct zone *zone; 1892 struct compact_control cc = { 1893 .order = pgdat->kcompactd_max_order, 1894 .total_migrate_scanned = 0, 1895 .total_free_scanned = 0, 1896 .classzone_idx = pgdat->kcompactd_classzone_idx, 1897 .mode = MIGRATE_SYNC_LIGHT, 1898 .ignore_skip_hint = true, 1899 .gfp_mask = GFP_KERNEL, 1900 1901 }; 1902 trace_mm_compaction_kcompactd_wake(pgdat->node_id, cc.order, 1903 cc.classzone_idx); 1904 count_compact_event(KCOMPACTD_WAKE); 1905 1906 for (zoneid = 0; zoneid <= cc.classzone_idx; zoneid++) { 1907 int status; 1908 1909 zone = &pgdat->node_zones[zoneid]; 1910 if (!populated_zone(zone)) 1911 continue; 1912 1913 if (compaction_deferred(zone, cc.order)) 1914 continue; 1915 1916 if (compaction_suitable(zone, cc.order, 0, zoneid) != 1917 COMPACT_CONTINUE) 1918 continue; 1919 1920 cc.nr_freepages = 0; 1921 cc.nr_migratepages = 0; 1922 cc.total_migrate_scanned = 0; 1923 cc.total_free_scanned = 0; 1924 cc.zone = zone; 1925 INIT_LIST_HEAD(&cc.freepages); 1926 INIT_LIST_HEAD(&cc.migratepages); 1927 1928 if (kthread_should_stop()) 1929 return; 1930 status = compact_zone(zone, &cc); 1931 1932 if (status == COMPACT_SUCCESS) { 1933 compaction_defer_reset(zone, cc.order, false); 1934 } else if (status == COMPACT_PARTIAL_SKIPPED || status == COMPACT_COMPLETE) { 1935 /* 1936 * We use sync migration mode here, so we defer like 1937 * sync direct compaction does. 1938 */ 1939 defer_compaction(zone, cc.order); 1940 } 1941 1942 count_compact_events(KCOMPACTD_MIGRATE_SCANNED, 1943 cc.total_migrate_scanned); 1944 count_compact_events(KCOMPACTD_FREE_SCANNED, 1945 cc.total_free_scanned); 1946 1947 VM_BUG_ON(!list_empty(&cc.freepages)); 1948 VM_BUG_ON(!list_empty(&cc.migratepages)); 1949 } 1950 1951 /* 1952 * Regardless of success, we are done until woken up next. But remember 1953 * the requested order/classzone_idx in case it was higher/tighter than 1954 * our current ones 1955 */ 1956 if (pgdat->kcompactd_max_order <= cc.order) 1957 pgdat->kcompactd_max_order = 0; 1958 if (pgdat->kcompactd_classzone_idx >= cc.classzone_idx) 1959 pgdat->kcompactd_classzone_idx = pgdat->nr_zones - 1; 1960 } 1961 1962 void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx) 1963 { 1964 if (!order) 1965 return; 1966 1967 if (pgdat->kcompactd_max_order < order) 1968 pgdat->kcompactd_max_order = order; 1969 1970 /* 1971 * Pairs with implicit barrier in wait_event_freezable() 1972 * such that wakeups are not missed in the lockless 1973 * waitqueue_active() call. 1974 */ 1975 smp_acquire__after_ctrl_dep(); 1976 1977 if (pgdat->kcompactd_classzone_idx > classzone_idx) 1978 pgdat->kcompactd_classzone_idx = classzone_idx; 1979 1980 if (!waitqueue_active(&pgdat->kcompactd_wait)) 1981 return; 1982 1983 if (!kcompactd_node_suitable(pgdat)) 1984 return; 1985 1986 trace_mm_compaction_wakeup_kcompactd(pgdat->node_id, order, 1987 classzone_idx); 1988 wake_up_interruptible(&pgdat->kcompactd_wait); 1989 } 1990 1991 /* 1992 * The background compaction daemon, started as a kernel thread 1993 * from the init process. 1994 */ 1995 static int kcompactd(void *p) 1996 { 1997 pg_data_t *pgdat = (pg_data_t*)p; 1998 struct task_struct *tsk = current; 1999 2000 const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); 2001 2002 if (!cpumask_empty(cpumask)) 2003 set_cpus_allowed_ptr(tsk, cpumask); 2004 2005 set_freezable(); 2006 2007 pgdat->kcompactd_max_order = 0; 2008 pgdat->kcompactd_classzone_idx = pgdat->nr_zones - 1; 2009 2010 while (!kthread_should_stop()) { 2011 trace_mm_compaction_kcompactd_sleep(pgdat->node_id); 2012 wait_event_freezable(pgdat->kcompactd_wait, 2013 kcompactd_work_requested(pgdat)); 2014 2015 kcompactd_do_work(pgdat); 2016 } 2017 2018 return 0; 2019 } 2020 2021 /* 2022 * This kcompactd start function will be called by init and node-hot-add. 2023 * On node-hot-add, kcompactd will moved to proper cpus if cpus are hot-added. 2024 */ 2025 int kcompactd_run(int nid) 2026 { 2027 pg_data_t *pgdat = NODE_DATA(nid); 2028 int ret = 0; 2029 2030 if (pgdat->kcompactd) 2031 return 0; 2032 2033 pgdat->kcompactd = kthread_run(kcompactd, pgdat, "kcompactd%d", nid); 2034 if (IS_ERR(pgdat->kcompactd)) { 2035 pr_err("Failed to start kcompactd on node %d\n", nid); 2036 ret = PTR_ERR(pgdat->kcompactd); 2037 pgdat->kcompactd = NULL; 2038 } 2039 return ret; 2040 } 2041 2042 /* 2043 * Called by memory hotplug when all memory in a node is offlined. Caller must 2044 * hold mem_hotplug_begin/end(). 2045 */ 2046 void kcompactd_stop(int nid) 2047 { 2048 struct task_struct *kcompactd = NODE_DATA(nid)->kcompactd; 2049 2050 if (kcompactd) { 2051 kthread_stop(kcompactd); 2052 NODE_DATA(nid)->kcompactd = NULL; 2053 } 2054 } 2055 2056 /* 2057 * It's optimal to keep kcompactd on the same CPUs as their memory, but 2058 * not required for correctness. So if the last cpu in a node goes 2059 * away, we get changed to run anywhere: as the first one comes back, 2060 * restore their cpu bindings. 2061 */ 2062 static int kcompactd_cpu_online(unsigned int cpu) 2063 { 2064 int nid; 2065 2066 for_each_node_state(nid, N_MEMORY) { 2067 pg_data_t *pgdat = NODE_DATA(nid); 2068 const struct cpumask *mask; 2069 2070 mask = cpumask_of_node(pgdat->node_id); 2071 2072 if (cpumask_any_and(cpu_online_mask, mask) < nr_cpu_ids) 2073 /* One of our CPUs online: restore mask */ 2074 set_cpus_allowed_ptr(pgdat->kcompactd, mask); 2075 } 2076 return 0; 2077 } 2078 2079 static int __init kcompactd_init(void) 2080 { 2081 int nid; 2082 int ret; 2083 2084 ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, 2085 "mm/compaction:online", 2086 kcompactd_cpu_online, NULL); 2087 if (ret < 0) { 2088 pr_err("kcompactd: failed to register hotplug callbacks.\n"); 2089 return ret; 2090 } 2091 2092 for_each_node_state(nid, N_MEMORY) 2093 kcompactd_run(nid); 2094 return 0; 2095 } 2096 subsys_initcall(kcompactd_init) 2097 2098 #endif /* CONFIG_COMPACTION */ 2099