1 /* 2 * linux/mm/compaction.c 3 * 4 * Memory compaction for the reduction of external fragmentation. Note that 5 * this heavily depends upon page migration to do all the real heavy 6 * lifting 7 * 8 * Copyright IBM Corp. 2007-2010 Mel Gorman <mel@csn.ul.ie> 9 */ 10 #include <linux/swap.h> 11 #include <linux/migrate.h> 12 #include <linux/compaction.h> 13 #include <linux/mm_inline.h> 14 #include <linux/backing-dev.h> 15 #include <linux/sysctl.h> 16 #include <linux/sysfs.h> 17 #include <linux/balloon_compaction.h> 18 #include <linux/page-isolation.h> 19 #include "internal.h" 20 21 #ifdef CONFIG_COMPACTION 22 static inline void count_compact_event(enum vm_event_item item) 23 { 24 count_vm_event(item); 25 } 26 27 static inline void count_compact_events(enum vm_event_item item, long delta) 28 { 29 count_vm_events(item, delta); 30 } 31 #else 32 #define count_compact_event(item) do { } while (0) 33 #define count_compact_events(item, delta) do { } while (0) 34 #endif 35 36 #if defined CONFIG_COMPACTION || defined CONFIG_CMA 37 38 #define CREATE_TRACE_POINTS 39 #include <trace/events/compaction.h> 40 41 static unsigned long release_freepages(struct list_head *freelist) 42 { 43 struct page *page, *next; 44 unsigned long count = 0; 45 46 list_for_each_entry_safe(page, next, freelist, lru) { 47 list_del(&page->lru); 48 __free_page(page); 49 count++; 50 } 51 52 return count; 53 } 54 55 static void map_pages(struct list_head *list) 56 { 57 struct page *page; 58 59 list_for_each_entry(page, list, lru) { 60 arch_alloc_page(page, 0); 61 kernel_map_pages(page, 1, 1); 62 } 63 } 64 65 static inline bool migrate_async_suitable(int migratetype) 66 { 67 return is_migrate_cma(migratetype) || migratetype == MIGRATE_MOVABLE; 68 } 69 70 /* 71 * Check that the whole (or subset of) a pageblock given by the interval of 72 * [start_pfn, end_pfn) is valid and within the same zone, before scanning it 73 * with the migration of free compaction scanner. The scanners then need to 74 * use only pfn_valid_within() check for arches that allow holes within 75 * pageblocks. 76 * 77 * Return struct page pointer of start_pfn, or NULL if checks were not passed. 78 * 79 * It's possible on some configurations to have a setup like node0 node1 node0 80 * i.e. it's possible that all pages within a zones range of pages do not 81 * belong to a single zone. We assume that a border between node0 and node1 82 * can occur within a single pageblock, but not a node0 node1 node0 83 * interleaving within a single pageblock. It is therefore sufficient to check 84 * the first and last page of a pageblock and avoid checking each individual 85 * page in a pageblock. 86 */ 87 static struct page *pageblock_pfn_to_page(unsigned long start_pfn, 88 unsigned long end_pfn, struct zone *zone) 89 { 90 struct page *start_page; 91 struct page *end_page; 92 93 /* end_pfn is one past the range we are checking */ 94 end_pfn--; 95 96 if (!pfn_valid(start_pfn) || !pfn_valid(end_pfn)) 97 return NULL; 98 99 start_page = pfn_to_page(start_pfn); 100 101 if (page_zone(start_page) != zone) 102 return NULL; 103 104 end_page = pfn_to_page(end_pfn); 105 106 /* This gives a shorter code than deriving page_zone(end_page) */ 107 if (page_zone_id(start_page) != page_zone_id(end_page)) 108 return NULL; 109 110 return start_page; 111 } 112 113 #ifdef CONFIG_COMPACTION 114 /* Returns true if the pageblock should be scanned for pages to isolate. */ 115 static inline bool isolation_suitable(struct compact_control *cc, 116 struct page *page) 117 { 118 if (cc->ignore_skip_hint) 119 return true; 120 121 return !get_pageblock_skip(page); 122 } 123 124 /* 125 * This function is called to clear all cached information on pageblocks that 126 * should be skipped for page isolation when the migrate and free page scanner 127 * meet. 128 */ 129 static void __reset_isolation_suitable(struct zone *zone) 130 { 131 unsigned long start_pfn = zone->zone_start_pfn; 132 unsigned long end_pfn = zone_end_pfn(zone); 133 unsigned long pfn; 134 135 zone->compact_cached_migrate_pfn[0] = start_pfn; 136 zone->compact_cached_migrate_pfn[1] = start_pfn; 137 zone->compact_cached_free_pfn = end_pfn; 138 zone->compact_blockskip_flush = false; 139 140 /* Walk the zone and mark every pageblock as suitable for isolation */ 141 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { 142 struct page *page; 143 144 cond_resched(); 145 146 if (!pfn_valid(pfn)) 147 continue; 148 149 page = pfn_to_page(pfn); 150 if (zone != page_zone(page)) 151 continue; 152 153 clear_pageblock_skip(page); 154 } 155 } 156 157 void reset_isolation_suitable(pg_data_t *pgdat) 158 { 159 int zoneid; 160 161 for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) { 162 struct zone *zone = &pgdat->node_zones[zoneid]; 163 if (!populated_zone(zone)) 164 continue; 165 166 /* Only flush if a full compaction finished recently */ 167 if (zone->compact_blockskip_flush) 168 __reset_isolation_suitable(zone); 169 } 170 } 171 172 /* 173 * If no pages were isolated then mark this pageblock to be skipped in the 174 * future. The information is later cleared by __reset_isolation_suitable(). 175 */ 176 static void update_pageblock_skip(struct compact_control *cc, 177 struct page *page, unsigned long nr_isolated, 178 bool migrate_scanner) 179 { 180 struct zone *zone = cc->zone; 181 unsigned long pfn; 182 183 if (cc->ignore_skip_hint) 184 return; 185 186 if (!page) 187 return; 188 189 if (nr_isolated) 190 return; 191 192 set_pageblock_skip(page); 193 194 pfn = page_to_pfn(page); 195 196 /* Update where async and sync compaction should restart */ 197 if (migrate_scanner) { 198 if (cc->finished_update_migrate) 199 return; 200 if (pfn > zone->compact_cached_migrate_pfn[0]) 201 zone->compact_cached_migrate_pfn[0] = pfn; 202 if (cc->mode != MIGRATE_ASYNC && 203 pfn > zone->compact_cached_migrate_pfn[1]) 204 zone->compact_cached_migrate_pfn[1] = pfn; 205 } else { 206 if (cc->finished_update_free) 207 return; 208 if (pfn < zone->compact_cached_free_pfn) 209 zone->compact_cached_free_pfn = pfn; 210 } 211 } 212 #else 213 static inline bool isolation_suitable(struct compact_control *cc, 214 struct page *page) 215 { 216 return true; 217 } 218 219 static void update_pageblock_skip(struct compact_control *cc, 220 struct page *page, unsigned long nr_isolated, 221 bool migrate_scanner) 222 { 223 } 224 #endif /* CONFIG_COMPACTION */ 225 226 /* 227 * Compaction requires the taking of some coarse locks that are potentially 228 * very heavily contended. For async compaction, back out if the lock cannot 229 * be taken immediately. For sync compaction, spin on the lock if needed. 230 * 231 * Returns true if the lock is held 232 * Returns false if the lock is not held and compaction should abort 233 */ 234 static bool compact_trylock_irqsave(spinlock_t *lock, unsigned long *flags, 235 struct compact_control *cc) 236 { 237 if (cc->mode == MIGRATE_ASYNC) { 238 if (!spin_trylock_irqsave(lock, *flags)) { 239 cc->contended = COMPACT_CONTENDED_LOCK; 240 return false; 241 } 242 } else { 243 spin_lock_irqsave(lock, *flags); 244 } 245 246 return true; 247 } 248 249 /* 250 * Compaction requires the taking of some coarse locks that are potentially 251 * very heavily contended. The lock should be periodically unlocked to avoid 252 * having disabled IRQs for a long time, even when there is nobody waiting on 253 * the lock. It might also be that allowing the IRQs will result in 254 * need_resched() becoming true. If scheduling is needed, async compaction 255 * aborts. Sync compaction schedules. 256 * Either compaction type will also abort if a fatal signal is pending. 257 * In either case if the lock was locked, it is dropped and not regained. 258 * 259 * Returns true if compaction should abort due to fatal signal pending, or 260 * async compaction due to need_resched() 261 * Returns false when compaction can continue (sync compaction might have 262 * scheduled) 263 */ 264 static bool compact_unlock_should_abort(spinlock_t *lock, 265 unsigned long flags, bool *locked, struct compact_control *cc) 266 { 267 if (*locked) { 268 spin_unlock_irqrestore(lock, flags); 269 *locked = false; 270 } 271 272 if (fatal_signal_pending(current)) { 273 cc->contended = COMPACT_CONTENDED_SCHED; 274 return true; 275 } 276 277 if (need_resched()) { 278 if (cc->mode == MIGRATE_ASYNC) { 279 cc->contended = COMPACT_CONTENDED_SCHED; 280 return true; 281 } 282 cond_resched(); 283 } 284 285 return false; 286 } 287 288 /* 289 * Aside from avoiding lock contention, compaction also periodically checks 290 * need_resched() and either schedules in sync compaction or aborts async 291 * compaction. This is similar to what compact_unlock_should_abort() does, but 292 * is used where no lock is concerned. 293 * 294 * Returns false when no scheduling was needed, or sync compaction scheduled. 295 * Returns true when async compaction should abort. 296 */ 297 static inline bool compact_should_abort(struct compact_control *cc) 298 { 299 /* async compaction aborts if contended */ 300 if (need_resched()) { 301 if (cc->mode == MIGRATE_ASYNC) { 302 cc->contended = COMPACT_CONTENDED_SCHED; 303 return true; 304 } 305 306 cond_resched(); 307 } 308 309 return false; 310 } 311 312 /* Returns true if the page is within a block suitable for migration to */ 313 static bool suitable_migration_target(struct page *page) 314 { 315 /* If the page is a large free page, then disallow migration */ 316 if (PageBuddy(page)) { 317 /* 318 * We are checking page_order without zone->lock taken. But 319 * the only small danger is that we skip a potentially suitable 320 * pageblock, so it's not worth to check order for valid range. 321 */ 322 if (page_order_unsafe(page) >= pageblock_order) 323 return false; 324 } 325 326 /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */ 327 if (migrate_async_suitable(get_pageblock_migratetype(page))) 328 return true; 329 330 /* Otherwise skip the block */ 331 return false; 332 } 333 334 /* 335 * Isolate free pages onto a private freelist. If @strict is true, will abort 336 * returning 0 on any invalid PFNs or non-free pages inside of the pageblock 337 * (even though it may still end up isolating some pages). 338 */ 339 static unsigned long isolate_freepages_block(struct compact_control *cc, 340 unsigned long *start_pfn, 341 unsigned long end_pfn, 342 struct list_head *freelist, 343 bool strict) 344 { 345 int nr_scanned = 0, total_isolated = 0; 346 struct page *cursor, *valid_page = NULL; 347 unsigned long flags = 0; 348 bool locked = false; 349 unsigned long blockpfn = *start_pfn; 350 351 cursor = pfn_to_page(blockpfn); 352 353 /* Isolate free pages. */ 354 for (; blockpfn < end_pfn; blockpfn++, cursor++) { 355 int isolated, i; 356 struct page *page = cursor; 357 358 /* 359 * Periodically drop the lock (if held) regardless of its 360 * contention, to give chance to IRQs. Abort if fatal signal 361 * pending or async compaction detects need_resched() 362 */ 363 if (!(blockpfn % SWAP_CLUSTER_MAX) 364 && compact_unlock_should_abort(&cc->zone->lock, flags, 365 &locked, cc)) 366 break; 367 368 nr_scanned++; 369 if (!pfn_valid_within(blockpfn)) 370 goto isolate_fail; 371 372 if (!valid_page) 373 valid_page = page; 374 if (!PageBuddy(page)) 375 goto isolate_fail; 376 377 /* 378 * If we already hold the lock, we can skip some rechecking. 379 * Note that if we hold the lock now, checked_pageblock was 380 * already set in some previous iteration (or strict is true), 381 * so it is correct to skip the suitable migration target 382 * recheck as well. 383 */ 384 if (!locked) { 385 /* 386 * The zone lock must be held to isolate freepages. 387 * Unfortunately this is a very coarse lock and can be 388 * heavily contended if there are parallel allocations 389 * or parallel compactions. For async compaction do not 390 * spin on the lock and we acquire the lock as late as 391 * possible. 392 */ 393 locked = compact_trylock_irqsave(&cc->zone->lock, 394 &flags, cc); 395 if (!locked) 396 break; 397 398 /* Recheck this is a buddy page under lock */ 399 if (!PageBuddy(page)) 400 goto isolate_fail; 401 } 402 403 /* Found a free page, break it into order-0 pages */ 404 isolated = split_free_page(page); 405 total_isolated += isolated; 406 for (i = 0; i < isolated; i++) { 407 list_add(&page->lru, freelist); 408 page++; 409 } 410 411 /* If a page was split, advance to the end of it */ 412 if (isolated) { 413 blockpfn += isolated - 1; 414 cursor += isolated - 1; 415 continue; 416 } 417 418 isolate_fail: 419 if (strict) 420 break; 421 else 422 continue; 423 424 } 425 426 /* Record how far we have got within the block */ 427 *start_pfn = blockpfn; 428 429 trace_mm_compaction_isolate_freepages(nr_scanned, total_isolated); 430 431 /* 432 * If strict isolation is requested by CMA then check that all the 433 * pages requested were isolated. If there were any failures, 0 is 434 * returned and CMA will fail. 435 */ 436 if (strict && blockpfn < end_pfn) 437 total_isolated = 0; 438 439 if (locked) 440 spin_unlock_irqrestore(&cc->zone->lock, flags); 441 442 /* Update the pageblock-skip if the whole pageblock was scanned */ 443 if (blockpfn == end_pfn) 444 update_pageblock_skip(cc, valid_page, total_isolated, false); 445 446 count_compact_events(COMPACTFREE_SCANNED, nr_scanned); 447 if (total_isolated) 448 count_compact_events(COMPACTISOLATED, total_isolated); 449 return total_isolated; 450 } 451 452 /** 453 * isolate_freepages_range() - isolate free pages. 454 * @start_pfn: The first PFN to start isolating. 455 * @end_pfn: The one-past-last PFN. 456 * 457 * Non-free pages, invalid PFNs, or zone boundaries within the 458 * [start_pfn, end_pfn) range are considered errors, cause function to 459 * undo its actions and return zero. 460 * 461 * Otherwise, function returns one-past-the-last PFN of isolated page 462 * (which may be greater then end_pfn if end fell in a middle of 463 * a free page). 464 */ 465 unsigned long 466 isolate_freepages_range(struct compact_control *cc, 467 unsigned long start_pfn, unsigned long end_pfn) 468 { 469 unsigned long isolated, pfn, block_end_pfn; 470 LIST_HEAD(freelist); 471 472 pfn = start_pfn; 473 block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); 474 475 for (; pfn < end_pfn; pfn += isolated, 476 block_end_pfn += pageblock_nr_pages) { 477 /* Protect pfn from changing by isolate_freepages_block */ 478 unsigned long isolate_start_pfn = pfn; 479 480 block_end_pfn = min(block_end_pfn, end_pfn); 481 482 /* 483 * pfn could pass the block_end_pfn if isolated freepage 484 * is more than pageblock order. In this case, we adjust 485 * scanning range to right one. 486 */ 487 if (pfn >= block_end_pfn) { 488 block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); 489 block_end_pfn = min(block_end_pfn, end_pfn); 490 } 491 492 if (!pageblock_pfn_to_page(pfn, block_end_pfn, cc->zone)) 493 break; 494 495 isolated = isolate_freepages_block(cc, &isolate_start_pfn, 496 block_end_pfn, &freelist, true); 497 498 /* 499 * In strict mode, isolate_freepages_block() returns 0 if 500 * there are any holes in the block (ie. invalid PFNs or 501 * non-free pages). 502 */ 503 if (!isolated) 504 break; 505 506 /* 507 * If we managed to isolate pages, it is always (1 << n) * 508 * pageblock_nr_pages for some non-negative n. (Max order 509 * page may span two pageblocks). 510 */ 511 } 512 513 /* split_free_page does not map the pages */ 514 map_pages(&freelist); 515 516 if (pfn < end_pfn) { 517 /* Loop terminated early, cleanup. */ 518 release_freepages(&freelist); 519 return 0; 520 } 521 522 /* We don't use freelists for anything. */ 523 return pfn; 524 } 525 526 /* Update the number of anon and file isolated pages in the zone */ 527 static void acct_isolated(struct zone *zone, struct compact_control *cc) 528 { 529 struct page *page; 530 unsigned int count[2] = { 0, }; 531 532 if (list_empty(&cc->migratepages)) 533 return; 534 535 list_for_each_entry(page, &cc->migratepages, lru) 536 count[!!page_is_file_cache(page)]++; 537 538 mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]); 539 mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]); 540 } 541 542 /* Similar to reclaim, but different enough that they don't share logic */ 543 static bool too_many_isolated(struct zone *zone) 544 { 545 unsigned long active, inactive, isolated; 546 547 inactive = zone_page_state(zone, NR_INACTIVE_FILE) + 548 zone_page_state(zone, NR_INACTIVE_ANON); 549 active = zone_page_state(zone, NR_ACTIVE_FILE) + 550 zone_page_state(zone, NR_ACTIVE_ANON); 551 isolated = zone_page_state(zone, NR_ISOLATED_FILE) + 552 zone_page_state(zone, NR_ISOLATED_ANON); 553 554 return isolated > (inactive + active) / 2; 555 } 556 557 /** 558 * isolate_migratepages_block() - isolate all migrate-able pages within 559 * a single pageblock 560 * @cc: Compaction control structure. 561 * @low_pfn: The first PFN to isolate 562 * @end_pfn: The one-past-the-last PFN to isolate, within same pageblock 563 * @isolate_mode: Isolation mode to be used. 564 * 565 * Isolate all pages that can be migrated from the range specified by 566 * [low_pfn, end_pfn). The range is expected to be within same pageblock. 567 * Returns zero if there is a fatal signal pending, otherwise PFN of the 568 * first page that was not scanned (which may be both less, equal to or more 569 * than end_pfn). 570 * 571 * The pages are isolated on cc->migratepages list (not required to be empty), 572 * and cc->nr_migratepages is updated accordingly. The cc->migrate_pfn field 573 * is neither read nor updated. 574 */ 575 static unsigned long 576 isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, 577 unsigned long end_pfn, isolate_mode_t isolate_mode) 578 { 579 struct zone *zone = cc->zone; 580 unsigned long nr_scanned = 0, nr_isolated = 0; 581 struct list_head *migratelist = &cc->migratepages; 582 struct lruvec *lruvec; 583 unsigned long flags = 0; 584 bool locked = false; 585 struct page *page = NULL, *valid_page = NULL; 586 587 /* 588 * Ensure that there are not too many pages isolated from the LRU 589 * list by either parallel reclaimers or compaction. If there are, 590 * delay for some time until fewer pages are isolated 591 */ 592 while (unlikely(too_many_isolated(zone))) { 593 /* async migration should just abort */ 594 if (cc->mode == MIGRATE_ASYNC) 595 return 0; 596 597 congestion_wait(BLK_RW_ASYNC, HZ/10); 598 599 if (fatal_signal_pending(current)) 600 return 0; 601 } 602 603 if (compact_should_abort(cc)) 604 return 0; 605 606 /* Time to isolate some pages for migration */ 607 for (; low_pfn < end_pfn; low_pfn++) { 608 /* 609 * Periodically drop the lock (if held) regardless of its 610 * contention, to give chance to IRQs. Abort async compaction 611 * if contended. 612 */ 613 if (!(low_pfn % SWAP_CLUSTER_MAX) 614 && compact_unlock_should_abort(&zone->lru_lock, flags, 615 &locked, cc)) 616 break; 617 618 if (!pfn_valid_within(low_pfn)) 619 continue; 620 nr_scanned++; 621 622 page = pfn_to_page(low_pfn); 623 624 if (!valid_page) 625 valid_page = page; 626 627 /* 628 * Skip if free. We read page order here without zone lock 629 * which is generally unsafe, but the race window is small and 630 * the worst thing that can happen is that we skip some 631 * potential isolation targets. 632 */ 633 if (PageBuddy(page)) { 634 unsigned long freepage_order = page_order_unsafe(page); 635 636 /* 637 * Without lock, we cannot be sure that what we got is 638 * a valid page order. Consider only values in the 639 * valid order range to prevent low_pfn overflow. 640 */ 641 if (freepage_order > 0 && freepage_order < MAX_ORDER) 642 low_pfn += (1UL << freepage_order) - 1; 643 continue; 644 } 645 646 /* 647 * Check may be lockless but that's ok as we recheck later. 648 * It's possible to migrate LRU pages and balloon pages 649 * Skip any other type of page 650 */ 651 if (!PageLRU(page)) { 652 if (unlikely(balloon_page_movable(page))) { 653 if (balloon_page_isolate(page)) { 654 /* Successfully isolated */ 655 goto isolate_success; 656 } 657 } 658 continue; 659 } 660 661 /* 662 * PageLRU is set. lru_lock normally excludes isolation 663 * splitting and collapsing (collapsing has already happened 664 * if PageLRU is set) but the lock is not necessarily taken 665 * here and it is wasteful to take it just to check transhuge. 666 * Check TransHuge without lock and skip the whole pageblock if 667 * it's either a transhuge or hugetlbfs page, as calling 668 * compound_order() without preventing THP from splitting the 669 * page underneath us may return surprising results. 670 */ 671 if (PageTransHuge(page)) { 672 if (!locked) 673 low_pfn = ALIGN(low_pfn + 1, 674 pageblock_nr_pages) - 1; 675 else 676 low_pfn += (1 << compound_order(page)) - 1; 677 678 continue; 679 } 680 681 /* 682 * Migration will fail if an anonymous page is pinned in memory, 683 * so avoid taking lru_lock and isolating it unnecessarily in an 684 * admittedly racy check. 685 */ 686 if (!page_mapping(page) && 687 page_count(page) > page_mapcount(page)) 688 continue; 689 690 /* If we already hold the lock, we can skip some rechecking */ 691 if (!locked) { 692 locked = compact_trylock_irqsave(&zone->lru_lock, 693 &flags, cc); 694 if (!locked) 695 break; 696 697 /* Recheck PageLRU and PageTransHuge under lock */ 698 if (!PageLRU(page)) 699 continue; 700 if (PageTransHuge(page)) { 701 low_pfn += (1 << compound_order(page)) - 1; 702 continue; 703 } 704 } 705 706 lruvec = mem_cgroup_page_lruvec(page, zone); 707 708 /* Try isolate the page */ 709 if (__isolate_lru_page(page, isolate_mode) != 0) 710 continue; 711 712 VM_BUG_ON_PAGE(PageTransCompound(page), page); 713 714 /* Successfully isolated */ 715 del_page_from_lru_list(page, lruvec, page_lru(page)); 716 717 isolate_success: 718 cc->finished_update_migrate = true; 719 list_add(&page->lru, migratelist); 720 cc->nr_migratepages++; 721 nr_isolated++; 722 723 /* Avoid isolating too much */ 724 if (cc->nr_migratepages == COMPACT_CLUSTER_MAX) { 725 ++low_pfn; 726 break; 727 } 728 } 729 730 /* 731 * The PageBuddy() check could have potentially brought us outside 732 * the range to be scanned. 733 */ 734 if (unlikely(low_pfn > end_pfn)) 735 low_pfn = end_pfn; 736 737 if (locked) 738 spin_unlock_irqrestore(&zone->lru_lock, flags); 739 740 /* 741 * Update the pageblock-skip information and cached scanner pfn, 742 * if the whole pageblock was scanned without isolating any page. 743 */ 744 if (low_pfn == end_pfn) 745 update_pageblock_skip(cc, valid_page, nr_isolated, true); 746 747 trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated); 748 749 count_compact_events(COMPACTMIGRATE_SCANNED, nr_scanned); 750 if (nr_isolated) 751 count_compact_events(COMPACTISOLATED, nr_isolated); 752 753 return low_pfn; 754 } 755 756 /** 757 * isolate_migratepages_range() - isolate migrate-able pages in a PFN range 758 * @cc: Compaction control structure. 759 * @start_pfn: The first PFN to start isolating. 760 * @end_pfn: The one-past-last PFN. 761 * 762 * Returns zero if isolation fails fatally due to e.g. pending signal. 763 * Otherwise, function returns one-past-the-last PFN of isolated page 764 * (which may be greater than end_pfn if end fell in a middle of a THP page). 765 */ 766 unsigned long 767 isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn, 768 unsigned long end_pfn) 769 { 770 unsigned long pfn, block_end_pfn; 771 772 /* Scan block by block. First and last block may be incomplete */ 773 pfn = start_pfn; 774 block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); 775 776 for (; pfn < end_pfn; pfn = block_end_pfn, 777 block_end_pfn += pageblock_nr_pages) { 778 779 block_end_pfn = min(block_end_pfn, end_pfn); 780 781 if (!pageblock_pfn_to_page(pfn, block_end_pfn, cc->zone)) 782 continue; 783 784 pfn = isolate_migratepages_block(cc, pfn, block_end_pfn, 785 ISOLATE_UNEVICTABLE); 786 787 /* 788 * In case of fatal failure, release everything that might 789 * have been isolated in the previous iteration, and signal 790 * the failure back to caller. 791 */ 792 if (!pfn) { 793 putback_movable_pages(&cc->migratepages); 794 cc->nr_migratepages = 0; 795 break; 796 } 797 798 if (cc->nr_migratepages == COMPACT_CLUSTER_MAX) 799 break; 800 } 801 acct_isolated(cc->zone, cc); 802 803 return pfn; 804 } 805 806 #endif /* CONFIG_COMPACTION || CONFIG_CMA */ 807 #ifdef CONFIG_COMPACTION 808 /* 809 * Based on information in the current compact_control, find blocks 810 * suitable for isolating free pages from and then isolate them. 811 */ 812 static void isolate_freepages(struct compact_control *cc) 813 { 814 struct zone *zone = cc->zone; 815 struct page *page; 816 unsigned long block_start_pfn; /* start of current pageblock */ 817 unsigned long isolate_start_pfn; /* exact pfn we start at */ 818 unsigned long block_end_pfn; /* end of current pageblock */ 819 unsigned long low_pfn; /* lowest pfn scanner is able to scan */ 820 int nr_freepages = cc->nr_freepages; 821 struct list_head *freelist = &cc->freepages; 822 823 /* 824 * Initialise the free scanner. The starting point is where we last 825 * successfully isolated from, zone-cached value, or the end of the 826 * zone when isolating for the first time. For looping we also need 827 * this pfn aligned down to the pageblock boundary, because we do 828 * block_start_pfn -= pageblock_nr_pages in the for loop. 829 * For ending point, take care when isolating in last pageblock of a 830 * a zone which ends in the middle of a pageblock. 831 * The low boundary is the end of the pageblock the migration scanner 832 * is using. 833 */ 834 isolate_start_pfn = cc->free_pfn; 835 block_start_pfn = cc->free_pfn & ~(pageblock_nr_pages-1); 836 block_end_pfn = min(block_start_pfn + pageblock_nr_pages, 837 zone_end_pfn(zone)); 838 low_pfn = ALIGN(cc->migrate_pfn + 1, pageblock_nr_pages); 839 840 /* 841 * Isolate free pages until enough are available to migrate the 842 * pages on cc->migratepages. We stop searching if the migrate 843 * and free page scanners meet or enough free pages are isolated. 844 */ 845 for (; block_start_pfn >= low_pfn && cc->nr_migratepages > nr_freepages; 846 block_end_pfn = block_start_pfn, 847 block_start_pfn -= pageblock_nr_pages, 848 isolate_start_pfn = block_start_pfn) { 849 unsigned long isolated; 850 851 /* 852 * This can iterate a massively long zone without finding any 853 * suitable migration targets, so periodically check if we need 854 * to schedule, or even abort async compaction. 855 */ 856 if (!(block_start_pfn % (SWAP_CLUSTER_MAX * pageblock_nr_pages)) 857 && compact_should_abort(cc)) 858 break; 859 860 page = pageblock_pfn_to_page(block_start_pfn, block_end_pfn, 861 zone); 862 if (!page) 863 continue; 864 865 /* Check the block is suitable for migration */ 866 if (!suitable_migration_target(page)) 867 continue; 868 869 /* If isolation recently failed, do not retry */ 870 if (!isolation_suitable(cc, page)) 871 continue; 872 873 /* Found a block suitable for isolating free pages from. */ 874 isolated = isolate_freepages_block(cc, &isolate_start_pfn, 875 block_end_pfn, freelist, false); 876 nr_freepages += isolated; 877 878 /* 879 * Remember where the free scanner should restart next time, 880 * which is where isolate_freepages_block() left off. 881 * But if it scanned the whole pageblock, isolate_start_pfn 882 * now points at block_end_pfn, which is the start of the next 883 * pageblock. 884 * In that case we will however want to restart at the start 885 * of the previous pageblock. 886 */ 887 cc->free_pfn = (isolate_start_pfn < block_end_pfn) ? 888 isolate_start_pfn : 889 block_start_pfn - pageblock_nr_pages; 890 891 /* 892 * Set a flag that we successfully isolated in this pageblock. 893 * In the next loop iteration, zone->compact_cached_free_pfn 894 * will not be updated and thus it will effectively contain the 895 * highest pageblock we isolated pages from. 896 */ 897 if (isolated) 898 cc->finished_update_free = true; 899 900 /* 901 * isolate_freepages_block() might have aborted due to async 902 * compaction being contended 903 */ 904 if (cc->contended) 905 break; 906 } 907 908 /* split_free_page does not map the pages */ 909 map_pages(freelist); 910 911 /* 912 * If we crossed the migrate scanner, we want to keep it that way 913 * so that compact_finished() may detect this 914 */ 915 if (block_start_pfn < low_pfn) 916 cc->free_pfn = cc->migrate_pfn; 917 918 cc->nr_freepages = nr_freepages; 919 } 920 921 /* 922 * This is a migrate-callback that "allocates" freepages by taking pages 923 * from the isolated freelists in the block we are migrating to. 924 */ 925 static struct page *compaction_alloc(struct page *migratepage, 926 unsigned long data, 927 int **result) 928 { 929 struct compact_control *cc = (struct compact_control *)data; 930 struct page *freepage; 931 932 /* 933 * Isolate free pages if necessary, and if we are not aborting due to 934 * contention. 935 */ 936 if (list_empty(&cc->freepages)) { 937 if (!cc->contended) 938 isolate_freepages(cc); 939 940 if (list_empty(&cc->freepages)) 941 return NULL; 942 } 943 944 freepage = list_entry(cc->freepages.next, struct page, lru); 945 list_del(&freepage->lru); 946 cc->nr_freepages--; 947 948 return freepage; 949 } 950 951 /* 952 * This is a migrate-callback that "frees" freepages back to the isolated 953 * freelist. All pages on the freelist are from the same zone, so there is no 954 * special handling needed for NUMA. 955 */ 956 static void compaction_free(struct page *page, unsigned long data) 957 { 958 struct compact_control *cc = (struct compact_control *)data; 959 960 list_add(&page->lru, &cc->freepages); 961 cc->nr_freepages++; 962 } 963 964 /* possible outcome of isolate_migratepages */ 965 typedef enum { 966 ISOLATE_ABORT, /* Abort compaction now */ 967 ISOLATE_NONE, /* No pages isolated, continue scanning */ 968 ISOLATE_SUCCESS, /* Pages isolated, migrate */ 969 } isolate_migrate_t; 970 971 /* 972 * Isolate all pages that can be migrated from the first suitable block, 973 * starting at the block pointed to by the migrate scanner pfn within 974 * compact_control. 975 */ 976 static isolate_migrate_t isolate_migratepages(struct zone *zone, 977 struct compact_control *cc) 978 { 979 unsigned long low_pfn, end_pfn; 980 struct page *page; 981 const isolate_mode_t isolate_mode = 982 (cc->mode == MIGRATE_ASYNC ? ISOLATE_ASYNC_MIGRATE : 0); 983 984 /* 985 * Start at where we last stopped, or beginning of the zone as 986 * initialized by compact_zone() 987 */ 988 low_pfn = cc->migrate_pfn; 989 990 /* Only scan within a pageblock boundary */ 991 end_pfn = ALIGN(low_pfn + 1, pageblock_nr_pages); 992 993 /* 994 * Iterate over whole pageblocks until we find the first suitable. 995 * Do not cross the free scanner. 996 */ 997 for (; end_pfn <= cc->free_pfn; 998 low_pfn = end_pfn, end_pfn += pageblock_nr_pages) { 999 1000 /* 1001 * This can potentially iterate a massively long zone with 1002 * many pageblocks unsuitable, so periodically check if we 1003 * need to schedule, or even abort async compaction. 1004 */ 1005 if (!(low_pfn % (SWAP_CLUSTER_MAX * pageblock_nr_pages)) 1006 && compact_should_abort(cc)) 1007 break; 1008 1009 page = pageblock_pfn_to_page(low_pfn, end_pfn, zone); 1010 if (!page) 1011 continue; 1012 1013 /* If isolation recently failed, do not retry */ 1014 if (!isolation_suitable(cc, page)) 1015 continue; 1016 1017 /* 1018 * For async compaction, also only scan in MOVABLE blocks. 1019 * Async compaction is optimistic to see if the minimum amount 1020 * of work satisfies the allocation. 1021 */ 1022 if (cc->mode == MIGRATE_ASYNC && 1023 !migrate_async_suitable(get_pageblock_migratetype(page))) 1024 continue; 1025 1026 /* Perform the isolation */ 1027 low_pfn = isolate_migratepages_block(cc, low_pfn, end_pfn, 1028 isolate_mode); 1029 1030 if (!low_pfn || cc->contended) 1031 return ISOLATE_ABORT; 1032 1033 /* 1034 * Either we isolated something and proceed with migration. Or 1035 * we failed and compact_zone should decide if we should 1036 * continue or not. 1037 */ 1038 break; 1039 } 1040 1041 acct_isolated(zone, cc); 1042 /* 1043 * Record where migration scanner will be restarted. If we end up in 1044 * the same pageblock as the free scanner, make the scanners fully 1045 * meet so that compact_finished() terminates compaction. 1046 */ 1047 cc->migrate_pfn = (end_pfn <= cc->free_pfn) ? low_pfn : cc->free_pfn; 1048 1049 return cc->nr_migratepages ? ISOLATE_SUCCESS : ISOLATE_NONE; 1050 } 1051 1052 static int compact_finished(struct zone *zone, struct compact_control *cc, 1053 const int migratetype) 1054 { 1055 unsigned int order; 1056 unsigned long watermark; 1057 1058 if (cc->contended || fatal_signal_pending(current)) 1059 return COMPACT_PARTIAL; 1060 1061 /* Compaction run completes if the migrate and free scanner meet */ 1062 if (cc->free_pfn <= cc->migrate_pfn) { 1063 /* Let the next compaction start anew. */ 1064 zone->compact_cached_migrate_pfn[0] = zone->zone_start_pfn; 1065 zone->compact_cached_migrate_pfn[1] = zone->zone_start_pfn; 1066 zone->compact_cached_free_pfn = zone_end_pfn(zone); 1067 1068 /* 1069 * Mark that the PG_migrate_skip information should be cleared 1070 * by kswapd when it goes to sleep. kswapd does not set the 1071 * flag itself as the decision to be clear should be directly 1072 * based on an allocation request. 1073 */ 1074 if (!current_is_kswapd()) 1075 zone->compact_blockskip_flush = true; 1076 1077 return COMPACT_COMPLETE; 1078 } 1079 1080 /* 1081 * order == -1 is expected when compacting via 1082 * /proc/sys/vm/compact_memory 1083 */ 1084 if (cc->order == -1) 1085 return COMPACT_CONTINUE; 1086 1087 /* Compaction run is not finished if the watermark is not met */ 1088 watermark = low_wmark_pages(zone); 1089 watermark += (1 << cc->order); 1090 1091 if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0)) 1092 return COMPACT_CONTINUE; 1093 1094 /* Direct compactor: Is a suitable page free? */ 1095 for (order = cc->order; order < MAX_ORDER; order++) { 1096 struct free_area *area = &zone->free_area[order]; 1097 1098 /* Job done if page is free of the right migratetype */ 1099 if (!list_empty(&area->free_list[migratetype])) 1100 return COMPACT_PARTIAL; 1101 1102 /* Job done if allocation would set block type */ 1103 if (cc->order >= pageblock_order && area->nr_free) 1104 return COMPACT_PARTIAL; 1105 } 1106 1107 return COMPACT_CONTINUE; 1108 } 1109 1110 /* 1111 * compaction_suitable: Is this suitable to run compaction on this zone now? 1112 * Returns 1113 * COMPACT_SKIPPED - If there are too few free pages for compaction 1114 * COMPACT_PARTIAL - If the allocation would succeed without compaction 1115 * COMPACT_CONTINUE - If compaction should run now 1116 */ 1117 unsigned long compaction_suitable(struct zone *zone, int order) 1118 { 1119 int fragindex; 1120 unsigned long watermark; 1121 1122 /* 1123 * order == -1 is expected when compacting via 1124 * /proc/sys/vm/compact_memory 1125 */ 1126 if (order == -1) 1127 return COMPACT_CONTINUE; 1128 1129 /* 1130 * Watermarks for order-0 must be met for compaction. Note the 2UL. 1131 * This is because during migration, copies of pages need to be 1132 * allocated and for a short time, the footprint is higher 1133 */ 1134 watermark = low_wmark_pages(zone) + (2UL << order); 1135 if (!zone_watermark_ok(zone, 0, watermark, 0, 0)) 1136 return COMPACT_SKIPPED; 1137 1138 /* 1139 * fragmentation index determines if allocation failures are due to 1140 * low memory or external fragmentation 1141 * 1142 * index of -1000 implies allocations might succeed depending on 1143 * watermarks 1144 * index towards 0 implies failure is due to lack of memory 1145 * index towards 1000 implies failure is due to fragmentation 1146 * 1147 * Only compact if a failure would be due to fragmentation. 1148 */ 1149 fragindex = fragmentation_index(zone, order); 1150 if (fragindex >= 0 && fragindex <= sysctl_extfrag_threshold) 1151 return COMPACT_SKIPPED; 1152 1153 if (fragindex == -1000 && zone_watermark_ok(zone, order, watermark, 1154 0, 0)) 1155 return COMPACT_PARTIAL; 1156 1157 return COMPACT_CONTINUE; 1158 } 1159 1160 static int compact_zone(struct zone *zone, struct compact_control *cc) 1161 { 1162 int ret; 1163 unsigned long start_pfn = zone->zone_start_pfn; 1164 unsigned long end_pfn = zone_end_pfn(zone); 1165 const int migratetype = gfpflags_to_migratetype(cc->gfp_mask); 1166 const bool sync = cc->mode != MIGRATE_ASYNC; 1167 1168 ret = compaction_suitable(zone, cc->order); 1169 switch (ret) { 1170 case COMPACT_PARTIAL: 1171 case COMPACT_SKIPPED: 1172 /* Compaction is likely to fail */ 1173 return ret; 1174 case COMPACT_CONTINUE: 1175 /* Fall through to compaction */ 1176 ; 1177 } 1178 1179 /* 1180 * Clear pageblock skip if there were failures recently and compaction 1181 * is about to be retried after being deferred. kswapd does not do 1182 * this reset as it'll reset the cached information when going to sleep. 1183 */ 1184 if (compaction_restarting(zone, cc->order) && !current_is_kswapd()) 1185 __reset_isolation_suitable(zone); 1186 1187 /* 1188 * Setup to move all movable pages to the end of the zone. Used cached 1189 * information on where the scanners should start but check that it 1190 * is initialised by ensuring the values are within zone boundaries. 1191 */ 1192 cc->migrate_pfn = zone->compact_cached_migrate_pfn[sync]; 1193 cc->free_pfn = zone->compact_cached_free_pfn; 1194 if (cc->free_pfn < start_pfn || cc->free_pfn > end_pfn) { 1195 cc->free_pfn = end_pfn & ~(pageblock_nr_pages-1); 1196 zone->compact_cached_free_pfn = cc->free_pfn; 1197 } 1198 if (cc->migrate_pfn < start_pfn || cc->migrate_pfn > end_pfn) { 1199 cc->migrate_pfn = start_pfn; 1200 zone->compact_cached_migrate_pfn[0] = cc->migrate_pfn; 1201 zone->compact_cached_migrate_pfn[1] = cc->migrate_pfn; 1202 } 1203 1204 trace_mm_compaction_begin(start_pfn, cc->migrate_pfn, cc->free_pfn, end_pfn); 1205 1206 migrate_prep_local(); 1207 1208 while ((ret = compact_finished(zone, cc, migratetype)) == 1209 COMPACT_CONTINUE) { 1210 int err; 1211 1212 switch (isolate_migratepages(zone, cc)) { 1213 case ISOLATE_ABORT: 1214 ret = COMPACT_PARTIAL; 1215 putback_movable_pages(&cc->migratepages); 1216 cc->nr_migratepages = 0; 1217 goto out; 1218 case ISOLATE_NONE: 1219 continue; 1220 case ISOLATE_SUCCESS: 1221 ; 1222 } 1223 1224 err = migrate_pages(&cc->migratepages, compaction_alloc, 1225 compaction_free, (unsigned long)cc, cc->mode, 1226 MR_COMPACTION); 1227 1228 trace_mm_compaction_migratepages(cc->nr_migratepages, err, 1229 &cc->migratepages); 1230 1231 /* All pages were either migrated or will be released */ 1232 cc->nr_migratepages = 0; 1233 if (err) { 1234 putback_movable_pages(&cc->migratepages); 1235 /* 1236 * migrate_pages() may return -ENOMEM when scanners meet 1237 * and we want compact_finished() to detect it 1238 */ 1239 if (err == -ENOMEM && cc->free_pfn > cc->migrate_pfn) { 1240 ret = COMPACT_PARTIAL; 1241 goto out; 1242 } 1243 } 1244 } 1245 1246 out: 1247 /* Release free pages and check accounting */ 1248 cc->nr_freepages -= release_freepages(&cc->freepages); 1249 VM_BUG_ON(cc->nr_freepages != 0); 1250 1251 trace_mm_compaction_end(ret); 1252 1253 return ret; 1254 } 1255 1256 static unsigned long compact_zone_order(struct zone *zone, int order, 1257 gfp_t gfp_mask, enum migrate_mode mode, int *contended) 1258 { 1259 unsigned long ret; 1260 struct compact_control cc = { 1261 .nr_freepages = 0, 1262 .nr_migratepages = 0, 1263 .order = order, 1264 .gfp_mask = gfp_mask, 1265 .zone = zone, 1266 .mode = mode, 1267 }; 1268 INIT_LIST_HEAD(&cc.freepages); 1269 INIT_LIST_HEAD(&cc.migratepages); 1270 1271 ret = compact_zone(zone, &cc); 1272 1273 VM_BUG_ON(!list_empty(&cc.freepages)); 1274 VM_BUG_ON(!list_empty(&cc.migratepages)); 1275 1276 *contended = cc.contended; 1277 return ret; 1278 } 1279 1280 int sysctl_extfrag_threshold = 500; 1281 1282 /** 1283 * try_to_compact_pages - Direct compact to satisfy a high-order allocation 1284 * @zonelist: The zonelist used for the current allocation 1285 * @order: The order of the current allocation 1286 * @gfp_mask: The GFP mask of the current allocation 1287 * @nodemask: The allowed nodes to allocate from 1288 * @mode: The migration mode for async, sync light, or sync migration 1289 * @contended: Return value that determines if compaction was aborted due to 1290 * need_resched() or lock contention 1291 * @candidate_zone: Return the zone where we think allocation should succeed 1292 * 1293 * This is the main entry point for direct page compaction. 1294 */ 1295 unsigned long try_to_compact_pages(struct zonelist *zonelist, 1296 int order, gfp_t gfp_mask, nodemask_t *nodemask, 1297 enum migrate_mode mode, int *contended, 1298 struct zone **candidate_zone) 1299 { 1300 enum zone_type high_zoneidx = gfp_zone(gfp_mask); 1301 int may_enter_fs = gfp_mask & __GFP_FS; 1302 int may_perform_io = gfp_mask & __GFP_IO; 1303 struct zoneref *z; 1304 struct zone *zone; 1305 int rc = COMPACT_DEFERRED; 1306 int alloc_flags = 0; 1307 int all_zones_contended = COMPACT_CONTENDED_LOCK; /* init for &= op */ 1308 1309 *contended = COMPACT_CONTENDED_NONE; 1310 1311 /* Check if the GFP flags allow compaction */ 1312 if (!order || !may_enter_fs || !may_perform_io) 1313 return COMPACT_SKIPPED; 1314 1315 #ifdef CONFIG_CMA 1316 if (gfpflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE) 1317 alloc_flags |= ALLOC_CMA; 1318 #endif 1319 /* Compact each zone in the list */ 1320 for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx, 1321 nodemask) { 1322 int status; 1323 int zone_contended; 1324 1325 if (compaction_deferred(zone, order)) 1326 continue; 1327 1328 status = compact_zone_order(zone, order, gfp_mask, mode, 1329 &zone_contended); 1330 rc = max(status, rc); 1331 /* 1332 * It takes at least one zone that wasn't lock contended 1333 * to clear all_zones_contended. 1334 */ 1335 all_zones_contended &= zone_contended; 1336 1337 /* If a normal allocation would succeed, stop compacting */ 1338 if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 1339 alloc_flags)) { 1340 *candidate_zone = zone; 1341 /* 1342 * We think the allocation will succeed in this zone, 1343 * but it is not certain, hence the false. The caller 1344 * will repeat this with true if allocation indeed 1345 * succeeds in this zone. 1346 */ 1347 compaction_defer_reset(zone, order, false); 1348 /* 1349 * It is possible that async compaction aborted due to 1350 * need_resched() and the watermarks were ok thanks to 1351 * somebody else freeing memory. The allocation can 1352 * however still fail so we better signal the 1353 * need_resched() contention anyway (this will not 1354 * prevent the allocation attempt). 1355 */ 1356 if (zone_contended == COMPACT_CONTENDED_SCHED) 1357 *contended = COMPACT_CONTENDED_SCHED; 1358 1359 goto break_loop; 1360 } 1361 1362 if (mode != MIGRATE_ASYNC) { 1363 /* 1364 * We think that allocation won't succeed in this zone 1365 * so we defer compaction there. If it ends up 1366 * succeeding after all, it will be reset. 1367 */ 1368 defer_compaction(zone, order); 1369 } 1370 1371 /* 1372 * We might have stopped compacting due to need_resched() in 1373 * async compaction, or due to a fatal signal detected. In that 1374 * case do not try further zones and signal need_resched() 1375 * contention. 1376 */ 1377 if ((zone_contended == COMPACT_CONTENDED_SCHED) 1378 || fatal_signal_pending(current)) { 1379 *contended = COMPACT_CONTENDED_SCHED; 1380 goto break_loop; 1381 } 1382 1383 continue; 1384 break_loop: 1385 /* 1386 * We might not have tried all the zones, so be conservative 1387 * and assume they are not all lock contended. 1388 */ 1389 all_zones_contended = 0; 1390 break; 1391 } 1392 1393 /* 1394 * If at least one zone wasn't deferred or skipped, we report if all 1395 * zones that were tried were lock contended. 1396 */ 1397 if (rc > COMPACT_SKIPPED && all_zones_contended) 1398 *contended = COMPACT_CONTENDED_LOCK; 1399 1400 return rc; 1401 } 1402 1403 1404 /* Compact all zones within a node */ 1405 static void __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc) 1406 { 1407 int zoneid; 1408 struct zone *zone; 1409 1410 for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) { 1411 1412 zone = &pgdat->node_zones[zoneid]; 1413 if (!populated_zone(zone)) 1414 continue; 1415 1416 cc->nr_freepages = 0; 1417 cc->nr_migratepages = 0; 1418 cc->zone = zone; 1419 INIT_LIST_HEAD(&cc->freepages); 1420 INIT_LIST_HEAD(&cc->migratepages); 1421 1422 if (cc->order == -1 || !compaction_deferred(zone, cc->order)) 1423 compact_zone(zone, cc); 1424 1425 if (cc->order > 0) { 1426 if (zone_watermark_ok(zone, cc->order, 1427 low_wmark_pages(zone), 0, 0)) 1428 compaction_defer_reset(zone, cc->order, false); 1429 } 1430 1431 VM_BUG_ON(!list_empty(&cc->freepages)); 1432 VM_BUG_ON(!list_empty(&cc->migratepages)); 1433 } 1434 } 1435 1436 void compact_pgdat(pg_data_t *pgdat, int order) 1437 { 1438 struct compact_control cc = { 1439 .order = order, 1440 .mode = MIGRATE_ASYNC, 1441 }; 1442 1443 if (!order) 1444 return; 1445 1446 __compact_pgdat(pgdat, &cc); 1447 } 1448 1449 static void compact_node(int nid) 1450 { 1451 struct compact_control cc = { 1452 .order = -1, 1453 .mode = MIGRATE_SYNC, 1454 .ignore_skip_hint = true, 1455 }; 1456 1457 __compact_pgdat(NODE_DATA(nid), &cc); 1458 } 1459 1460 /* Compact all nodes in the system */ 1461 static void compact_nodes(void) 1462 { 1463 int nid; 1464 1465 /* Flush pending updates to the LRU lists */ 1466 lru_add_drain_all(); 1467 1468 for_each_online_node(nid) 1469 compact_node(nid); 1470 } 1471 1472 /* The written value is actually unused, all memory is compacted */ 1473 int sysctl_compact_memory; 1474 1475 /* This is the entry point for compacting all nodes via /proc/sys/vm */ 1476 int sysctl_compaction_handler(struct ctl_table *table, int write, 1477 void __user *buffer, size_t *length, loff_t *ppos) 1478 { 1479 if (write) 1480 compact_nodes(); 1481 1482 return 0; 1483 } 1484 1485 int sysctl_extfrag_handler(struct ctl_table *table, int write, 1486 void __user *buffer, size_t *length, loff_t *ppos) 1487 { 1488 proc_dointvec_minmax(table, write, buffer, length, ppos); 1489 1490 return 0; 1491 } 1492 1493 #if defined(CONFIG_SYSFS) && defined(CONFIG_NUMA) 1494 static ssize_t sysfs_compact_node(struct device *dev, 1495 struct device_attribute *attr, 1496 const char *buf, size_t count) 1497 { 1498 int nid = dev->id; 1499 1500 if (nid >= 0 && nid < nr_node_ids && node_online(nid)) { 1501 /* Flush pending updates to the LRU lists */ 1502 lru_add_drain_all(); 1503 1504 compact_node(nid); 1505 } 1506 1507 return count; 1508 } 1509 static DEVICE_ATTR(compact, S_IWUSR, NULL, sysfs_compact_node); 1510 1511 int compaction_register_node(struct node *node) 1512 { 1513 return device_create_file(&node->dev, &dev_attr_compact); 1514 } 1515 1516 void compaction_unregister_node(struct node *node) 1517 { 1518 return device_remove_file(&node->dev, &dev_attr_compact); 1519 } 1520 #endif /* CONFIG_SYSFS && CONFIG_NUMA */ 1521 1522 #endif /* CONFIG_COMPACTION */ 1523