vmscan.c (818b930bc15077fc00ff16bb22c5df1857f05afa) vmscan.c (48fb2e240c4275c6ba4f53c9397f5fd6f350c3a7)
1/*
2 * linux/mm/vmscan.c
3 *
4 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
5 *
6 * Swap reorganised 29.12.95, Stephen Tweedie.
7 * kswapd added: 7.1.96 sct
8 * Removed kswapd_ctl limits, and swap out as many pages as needed

--- 1665 unchanged lines hidden (view full) ---

1674
1675 anon = get_lru_size(lruvec, LRU_ACTIVE_ANON) +
1676 get_lru_size(lruvec, LRU_INACTIVE_ANON);
1677 file = get_lru_size(lruvec, LRU_ACTIVE_FILE) +
1678 get_lru_size(lruvec, LRU_INACTIVE_FILE);
1679
1680 if (global_reclaim(sc)) {
1681 free = zone_page_state(zone, NR_FREE_PAGES);
1/*
2 * linux/mm/vmscan.c
3 *
4 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
5 *
6 * Swap reorganised 29.12.95, Stephen Tweedie.
7 * kswapd added: 7.1.96 sct
8 * Removed kswapd_ctl limits, and swap out as many pages as needed

--- 1665 unchanged lines hidden (view full) ---

1674
1675 anon = get_lru_size(lruvec, LRU_ACTIVE_ANON) +
1676 get_lru_size(lruvec, LRU_INACTIVE_ANON);
1677 file = get_lru_size(lruvec, LRU_ACTIVE_FILE) +
1678 get_lru_size(lruvec, LRU_INACTIVE_FILE);
1679
1680 if (global_reclaim(sc)) {
1681 free = zone_page_state(zone, NR_FREE_PAGES);
1682 /* If we have very few page cache pages,
1683 force-scan anon pages. */
1684 if (unlikely(file + free <= high_wmark_pages(zone))) {
1682 if (unlikely(file + free <= high_wmark_pages(zone))) {
1683 /*
1684 * If we have very few page cache pages, force-scan
1685 * anon pages.
1686 */
1685 fraction[0] = 1;
1686 fraction[1] = 0;
1687 denominator = 1;
1688 goto out;
1687 fraction[0] = 1;
1688 fraction[1] = 0;
1689 denominator = 1;
1690 goto out;
1691 } else if (!inactive_file_is_low_global(zone)) {
1692 /*
1693 * There is enough inactive page cache, do not
1694 * reclaim anything from the working set right now.
1695 */
1696 fraction[0] = 0;
1697 fraction[1] = 1;
1698 denominator = 1;
1699 goto out;
1689 }
1690 }
1691
1692 /*
1693 * With swappiness at 100, anonymous and file have the same priority.
1694 * This scanning priority is essentially the inverse of IO cost.
1695 */
1696 anon_prio = vmscan_swappiness(sc);

--- 50 unchanged lines hidden (view full) ---

1747 }
1748 nr[lru] = scan;
1749 }
1750}
1751
1752/* Use reclaim/compaction for costly allocs or under memory pressure */
1753static bool in_reclaim_compaction(struct scan_control *sc)
1754{
1700 }
1701 }
1702
1703 /*
1704 * With swappiness at 100, anonymous and file have the same priority.
1705 * This scanning priority is essentially the inverse of IO cost.
1706 */
1707 anon_prio = vmscan_swappiness(sc);

--- 50 unchanged lines hidden (view full) ---

1758 }
1759 nr[lru] = scan;
1760 }
1761}
1762
1763/* Use reclaim/compaction for costly allocs or under memory pressure */
1764static bool in_reclaim_compaction(struct scan_control *sc)
1765{
1755 if (COMPACTION_BUILD && sc->order &&
1766 if (IS_ENABLED(CONFIG_COMPACTION) && sc->order &&
1756 (sc->order > PAGE_ALLOC_COSTLY_ORDER ||
1757 sc->priority < DEF_PRIORITY - 2))
1758 return true;
1759
1760 return false;
1761}
1762
1767 (sc->order > PAGE_ALLOC_COSTLY_ORDER ||
1768 sc->priority < DEF_PRIORITY - 2))
1769 return true;
1770
1771 return false;
1772}
1773
1763#ifdef CONFIG_COMPACTION
1764/*
1774/*
1765 * If compaction is deferred for sc->order then scale the number of pages
1766 * reclaimed based on the number of consecutive allocation failures
1767 */
1768static unsigned long scale_for_compaction(unsigned long pages_for_compaction,
1769 struct lruvec *lruvec, struct scan_control *sc)
1770{
1771 struct zone *zone = lruvec_zone(lruvec);
1772
1773 if (zone->compact_order_failed <= sc->order)
1774 pages_for_compaction <<= zone->compact_defer_shift;
1775 return pages_for_compaction;
1776}
1777#else
1778static unsigned long scale_for_compaction(unsigned long pages_for_compaction,
1779 struct lruvec *lruvec, struct scan_control *sc)
1780{
1781 return pages_for_compaction;
1782}
1783#endif
1784
1785/*
1786 * Reclaim/compaction is used for high-order allocation requests. It reclaims
1787 * order-0 pages before compacting the zone. should_continue_reclaim() returns
1788 * true if more pages should be reclaimed such that when the page allocator
1789 * calls try_to_compact_zone() that it will have enough free pages to succeed.
1790 * It will give up earlier than that if there is difficulty reclaiming pages.
1791 */
1792static inline bool should_continue_reclaim(struct lruvec *lruvec,
1793 unsigned long nr_reclaimed,

--- 30 unchanged lines hidden (view full) ---

1824 return false;
1825 }
1826
1827 /*
1828 * If we have not reclaimed enough pages for compaction and the
1829 * inactive lists are large enough, continue reclaiming
1830 */
1831 pages_for_compaction = (2UL << sc->order);
1775 * Reclaim/compaction is used for high-order allocation requests. It reclaims
1776 * order-0 pages before compacting the zone. should_continue_reclaim() returns
1777 * true if more pages should be reclaimed such that when the page allocator
1778 * calls try_to_compact_zone() that it will have enough free pages to succeed.
1779 * It will give up earlier than that if there is difficulty reclaiming pages.
1780 */
1781static inline bool should_continue_reclaim(struct lruvec *lruvec,
1782 unsigned long nr_reclaimed,

--- 30 unchanged lines hidden (view full) ---

1813 return false;
1814 }
1815
1816 /*
1817 * If we have not reclaimed enough pages for compaction and the
1818 * inactive lists are large enough, continue reclaiming
1819 */
1820 pages_for_compaction = (2UL << sc->order);
1832
1833 pages_for_compaction = scale_for_compaction(pages_for_compaction,
1834 lruvec, sc);
1835 inactive_lru_pages = get_lru_size(lruvec, LRU_INACTIVE_FILE);
1836 if (nr_swap_pages > 0)
1837 inactive_lru_pages += get_lru_size(lruvec, LRU_INACTIVE_ANON);
1838 if (sc->nr_reclaimed < pages_for_compaction &&
1839 inactive_lru_pages > pages_for_compaction)
1840 return true;
1841
1842 /* If compaction would go ahead or the allocation would succeed, stop */

--- 182 unchanged lines hidden (view full) ---

2025 * to global LRU.
2026 */
2027 if (global_reclaim(sc)) {
2028 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
2029 continue;
2030 if (zone->all_unreclaimable &&
2031 sc->priority != DEF_PRIORITY)
2032 continue; /* Let kswapd poll it */
1821 inactive_lru_pages = get_lru_size(lruvec, LRU_INACTIVE_FILE);
1822 if (nr_swap_pages > 0)
1823 inactive_lru_pages += get_lru_size(lruvec, LRU_INACTIVE_ANON);
1824 if (sc->nr_reclaimed < pages_for_compaction &&
1825 inactive_lru_pages > pages_for_compaction)
1826 return true;
1827
1828 /* If compaction would go ahead or the allocation would succeed, stop */

--- 182 unchanged lines hidden (view full) ---

2011 * to global LRU.
2012 */
2013 if (global_reclaim(sc)) {
2014 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
2015 continue;
2016 if (zone->all_unreclaimable &&
2017 sc->priority != DEF_PRIORITY)
2018 continue; /* Let kswapd poll it */
2033 if (COMPACTION_BUILD) {
2019 if (IS_ENABLED(CONFIG_COMPACTION)) {
2034 /*
2035 * If we already have plenty of memory free for
2036 * compaction in this zone, don't free any more.
2037 * Even though compaction is invoked for any
2038 * non-zero order, only frequent costly order
2039 * reclamation is disruptive enough to become a
2040 * noticeable problem, like transparent huge
2041 * page allocations.

--- 185 unchanged lines hidden (view full) ---

2227
2228 return wmark_ok;
2229}
2230
2231/*
2232 * Throttle direct reclaimers if backing storage is backed by the network
2233 * and the PFMEMALLOC reserve for the preferred node is getting dangerously
2234 * depleted. kswapd will continue to make progress and wake the processes
2020 /*
2021 * If we already have plenty of memory free for
2022 * compaction in this zone, don't free any more.
2023 * Even though compaction is invoked for any
2024 * non-zero order, only frequent costly order
2025 * reclamation is disruptive enough to become a
2026 * noticeable problem, like transparent huge
2027 * page allocations.

--- 185 unchanged lines hidden (view full) ---

2213
2214 return wmark_ok;
2215}
2216
2217/*
2218 * Throttle direct reclaimers if backing storage is backed by the network
2219 * and the PFMEMALLOC reserve for the preferred node is getting dangerously
2220 * depleted. kswapd will continue to make progress and wake the processes
2235 * when the low watermark is reached
2221 * when the low watermark is reached.
2222 *
2223 * Returns true if a fatal signal was delivered during throttling. If this
2224 * happens, the page allocator should not consider triggering the OOM killer.
2236 */
2225 */
2237static void throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist,
2226static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist,
2238 nodemask_t *nodemask)
2239{
2240 struct zone *zone;
2241 int high_zoneidx = gfp_zone(gfp_mask);
2242 pg_data_t *pgdat;
2243
2244 /*
2245 * Kernel threads should not be throttled as they may be indirectly
2246 * responsible for cleaning pages necessary for reclaim to make forward
2247 * progress. kjournald for example may enter direct reclaim while
2248 * committing a transaction where throttling it could forcing other
2249 * processes to block on log_wait_commit().
2250 */
2251 if (current->flags & PF_KTHREAD)
2227 nodemask_t *nodemask)
2228{
2229 struct zone *zone;
2230 int high_zoneidx = gfp_zone(gfp_mask);
2231 pg_data_t *pgdat;
2232
2233 /*
2234 * Kernel threads should not be throttled as they may be indirectly
2235 * responsible for cleaning pages necessary for reclaim to make forward
2236 * progress. kjournald for example may enter direct reclaim while
2237 * committing a transaction where throttling it could forcing other
2238 * processes to block on log_wait_commit().
2239 */
2240 if (current->flags & PF_KTHREAD)
2252 return;
2241 goto out;
2253
2242
2243 /*
2244 * If a fatal signal is pending, this process should not throttle.
2245 * It should return quickly so it can exit and free its memory
2246 */
2247 if (fatal_signal_pending(current))
2248 goto out;
2249
2254 /* Check if the pfmemalloc reserves are ok */
2255 first_zones_zonelist(zonelist, high_zoneidx, NULL, &zone);
2256 pgdat = zone->zone_pgdat;
2257 if (pfmemalloc_watermark_ok(pgdat))
2250 /* Check if the pfmemalloc reserves are ok */
2251 first_zones_zonelist(zonelist, high_zoneidx, NULL, &zone);
2252 pgdat = zone->zone_pgdat;
2253 if (pfmemalloc_watermark_ok(pgdat))
2258 return;
2254 goto out;
2259
2260 /* Account for the throttling */
2261 count_vm_event(PGSCAN_DIRECT_THROTTLE);
2262
2263 /*
2264 * If the caller cannot enter the filesystem, it's possible that it
2265 * is due to the caller holding an FS lock or performing a journal
2266 * transaction in the case of a filesystem like ext[3|4]. In this case,
2267 * it is not safe to block on pfmemalloc_wait as kswapd could be
2268 * blocked waiting on the same lock. Instead, throttle for up to a
2269 * second before continuing.
2270 */
2271 if (!(gfp_mask & __GFP_FS)) {
2272 wait_event_interruptible_timeout(pgdat->pfmemalloc_wait,
2273 pfmemalloc_watermark_ok(pgdat), HZ);
2255
2256 /* Account for the throttling */
2257 count_vm_event(PGSCAN_DIRECT_THROTTLE);
2258
2259 /*
2260 * If the caller cannot enter the filesystem, it's possible that it
2261 * is due to the caller holding an FS lock or performing a journal
2262 * transaction in the case of a filesystem like ext[3|4]. In this case,
2263 * it is not safe to block on pfmemalloc_wait as kswapd could be
2264 * blocked waiting on the same lock. Instead, throttle for up to a
2265 * second before continuing.
2266 */
2267 if (!(gfp_mask & __GFP_FS)) {
2268 wait_event_interruptible_timeout(pgdat->pfmemalloc_wait,
2269 pfmemalloc_watermark_ok(pgdat), HZ);
2274 return;
2270
2271 goto check_pending;
2275 }
2276
2277 /* Throttle until kswapd wakes the process */
2278 wait_event_killable(zone->zone_pgdat->pfmemalloc_wait,
2279 pfmemalloc_watermark_ok(pgdat));
2272 }
2273
2274 /* Throttle until kswapd wakes the process */
2275 wait_event_killable(zone->zone_pgdat->pfmemalloc_wait,
2276 pfmemalloc_watermark_ok(pgdat));
2277
2278check_pending:
2279 if (fatal_signal_pending(current))
2280 return true;
2281
2282out:
2283 return false;
2280}
2281
2282unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
2283 gfp_t gfp_mask, nodemask_t *nodemask)
2284{
2285 unsigned long nr_reclaimed;
2286 struct scan_control sc = {
2287 .gfp_mask = gfp_mask,

--- 5 unchanged lines hidden (view full) ---

2293 .priority = DEF_PRIORITY,
2294 .target_mem_cgroup = NULL,
2295 .nodemask = nodemask,
2296 };
2297 struct shrink_control shrink = {
2298 .gfp_mask = sc.gfp_mask,
2299 };
2300
2284}
2285
2286unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
2287 gfp_t gfp_mask, nodemask_t *nodemask)
2288{
2289 unsigned long nr_reclaimed;
2290 struct scan_control sc = {
2291 .gfp_mask = gfp_mask,

--- 5 unchanged lines hidden (view full) ---

2297 .priority = DEF_PRIORITY,
2298 .target_mem_cgroup = NULL,
2299 .nodemask = nodemask,
2300 };
2301 struct shrink_control shrink = {
2302 .gfp_mask = sc.gfp_mask,
2303 };
2304
2301 throttle_direct_reclaim(gfp_mask, zonelist, nodemask);
2302
2303 /*
2305 /*
2304 * Do not enter reclaim if fatal signal is pending. 1 is returned so
2305 * that the page allocator does not consider triggering OOM
2306 * Do not enter reclaim if fatal signal was delivered while throttled.
2307 * 1 is returned so that the page allocator does not OOM kill at this
2308 * point.
2306 */
2309 */
2307 if (fatal_signal_pending(current))
2310 if (throttle_direct_reclaim(gfp_mask, zonelist, nodemask))
2308 return 1;
2309
2310 trace_mm_vmscan_direct_reclaim_begin(order,
2311 sc.may_writepage,
2312 gfp_mask);
2313
2314 nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink);
2315

--- 101 unchanged lines hidden (view full) ---

2417 if (inactive_anon_is_low(lruvec))
2418 shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
2419 sc, LRU_ACTIVE_ANON);
2420
2421 memcg = mem_cgroup_iter(NULL, memcg, NULL);
2422 } while (memcg);
2423}
2424
2311 return 1;
2312
2313 trace_mm_vmscan_direct_reclaim_begin(order,
2314 sc.may_writepage,
2315 gfp_mask);
2316
2317 nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink);
2318

--- 101 unchanged lines hidden (view full) ---

2420 if (inactive_anon_is_low(lruvec))
2421 shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
2422 sc, LRU_ACTIVE_ANON);
2423
2424 memcg = mem_cgroup_iter(NULL, memcg, NULL);
2425 } while (memcg);
2426}
2427
2428static bool zone_balanced(struct zone *zone, int order,
2429 unsigned long balance_gap, int classzone_idx)
2430{
2431 if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone) +
2432 balance_gap, classzone_idx, 0))
2433 return false;
2434
2435 if (IS_ENABLED(CONFIG_COMPACTION) && order &&
2436 !compaction_suitable(zone, order))
2437 return false;
2438
2439 return true;
2440}
2441
2425/*
2426 * pgdat_balanced is used when checking if a node is balanced for high-order
2427 * allocations. Only zones that meet watermarks and are in a zone allowed
2428 * by the callers classzone_idx are added to balanced_pages. The total of
2429 * balanced pages must be at least 25% of the zones allowed by classzone_idx
2430 * for the node to be considered balanced. Forcing all zones to be balanced
2431 * for high orders can cause excessive reclaim when there are imbalanced zones.
2432 * The choice of 25% is due to

--- 62 unchanged lines hidden (view full) ---

2495 * they must be considered balanced here as well if kswapd
2496 * is to sleep
2497 */
2498 if (zone->all_unreclaimable) {
2499 balanced += zone->present_pages;
2500 continue;
2501 }
2502
2442/*
2443 * pgdat_balanced is used when checking if a node is balanced for high-order
2444 * allocations. Only zones that meet watermarks and are in a zone allowed
2445 * by the callers classzone_idx are added to balanced_pages. The total of
2446 * balanced pages must be at least 25% of the zones allowed by classzone_idx
2447 * for the node to be considered balanced. Forcing all zones to be balanced
2448 * for high orders can cause excessive reclaim when there are imbalanced zones.
2449 * The choice of 25% is due to

--- 62 unchanged lines hidden (view full) ---

2512 * they must be considered balanced here as well if kswapd
2513 * is to sleep
2514 */
2515 if (zone->all_unreclaimable) {
2516 balanced += zone->present_pages;
2517 continue;
2518 }
2519
2503 if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone),
2504 i, 0))
2520 if (!zone_balanced(zone, order, 0, i))
2505 all_zones_ok = false;
2506 else
2507 balanced += zone->present_pages;
2508 }
2509
2510 /*
2511 * For high-order requests, the balanced zones must contain at least
2512 * 25% of the nodes pages for kswapd to sleep. For order-0, all zones

--- 92 unchanged lines hidden (view full) ---

2605 * has a highmem zone, force kswapd to reclaim from
2606 * it to relieve lowmem pressure.
2607 */
2608 if (buffer_heads_over_limit && is_highmem_idx(i)) {
2609 end_zone = i;
2610 break;
2611 }
2612
2521 all_zones_ok = false;
2522 else
2523 balanced += zone->present_pages;
2524 }
2525
2526 /*
2527 * For high-order requests, the balanced zones must contain at least
2528 * 25% of the nodes pages for kswapd to sleep. For order-0, all zones

--- 92 unchanged lines hidden (view full) ---

2621 * has a highmem zone, force kswapd to reclaim from
2622 * it to relieve lowmem pressure.
2623 */
2624 if (buffer_heads_over_limit && is_highmem_idx(i)) {
2625 end_zone = i;
2626 break;
2627 }
2628
2613 if (!zone_watermark_ok_safe(zone, order,
2614 high_wmark_pages(zone), 0, 0)) {
2629 if (!zone_balanced(zone, order, 0, 0)) {
2615 end_zone = i;
2616 break;
2617 } else {
2618 /* If balanced, clear the congested flag */
2619 zone_clear_flag(zone, ZONE_CONGESTED);
2620 }
2621 }
2622 if (i < 0)

--- 53 unchanged lines hidden (view full) ---

2676 /*
2677 * Kswapd reclaims only single pages with compaction
2678 * enabled. Trying too hard to reclaim until contiguous
2679 * free pages have become available can hurt performance
2680 * by evicting too much useful data from memory.
2681 * Do not reclaim more than needed for compaction.
2682 */
2683 testorder = order;
2630 end_zone = i;
2631 break;
2632 } else {
2633 /* If balanced, clear the congested flag */
2634 zone_clear_flag(zone, ZONE_CONGESTED);
2635 }
2636 }
2637 if (i < 0)

--- 53 unchanged lines hidden (view full) ---

2691 /*
2692 * Kswapd reclaims only single pages with compaction
2693 * enabled. Trying too hard to reclaim until contiguous
2694 * free pages have become available can hurt performance
2695 * by evicting too much useful data from memory.
2696 * Do not reclaim more than needed for compaction.
2697 */
2698 testorder = order;
2684 if (COMPACTION_BUILD && order &&
2699 if (IS_ENABLED(CONFIG_COMPACTION) && order &&
2685 compaction_suitable(zone, order) !=
2686 COMPACT_SKIPPED)
2687 testorder = 0;
2688
2689 if ((buffer_heads_over_limit && is_highmem_idx(i)) ||
2700 compaction_suitable(zone, order) !=
2701 COMPACT_SKIPPED)
2702 testorder = 0;
2703
2704 if ((buffer_heads_over_limit && is_highmem_idx(i)) ||
2690 !zone_watermark_ok_safe(zone, testorder,
2691 high_wmark_pages(zone) + balance_gap,
2692 end_zone, 0)) {
2705 !zone_balanced(zone, testorder,
2706 balance_gap, end_zone)) {
2693 shrink_zone(zone, &sc);
2694
2695 reclaim_state->reclaimed_slab = 0;
2696 nr_slab = shrink_slab(&shrink, sc.nr_scanned, lru_pages);
2697 sc.nr_reclaimed += reclaim_state->reclaimed_slab;
2698 total_scanned += sc.nr_scanned;
2699
2700 if (nr_slab == 0 && !zone_reclaimable(zone))

--- 10 unchanged lines hidden (view full) ---

2711 sc.may_writepage = 1;
2712
2713 if (zone->all_unreclaimable) {
2714 if (end_zone && end_zone == i)
2715 end_zone--;
2716 continue;
2717 }
2718
2707 shrink_zone(zone, &sc);
2708
2709 reclaim_state->reclaimed_slab = 0;
2710 nr_slab = shrink_slab(&shrink, sc.nr_scanned, lru_pages);
2711 sc.nr_reclaimed += reclaim_state->reclaimed_slab;
2712 total_scanned += sc.nr_scanned;
2713
2714 if (nr_slab == 0 && !zone_reclaimable(zone))

--- 10 unchanged lines hidden (view full) ---

2725 sc.may_writepage = 1;
2726
2727 if (zone->all_unreclaimable) {
2728 if (end_zone && end_zone == i)
2729 end_zone--;
2730 continue;
2731 }
2732
2719 if (!zone_watermark_ok_safe(zone, testorder,
2720 high_wmark_pages(zone), end_zone, 0)) {
2733 if (!zone_balanced(zone, testorder, 0, end_zone)) {
2721 all_zones_ok = 0;
2722 /*
2723 * We are still under min water mark. This
2724 * means that we have a GFP_ATOMIC allocation
2725 * failure risk. Hurry up!
2726 */
2727 if (!zone_watermark_ok_safe(zone, order,
2728 min_wmark_pages(zone), end_zone, 0))

--- 88 unchanged lines hidden (view full) ---

2817 int zones_need_compaction = 1;
2818
2819 for (i = 0; i <= end_zone; i++) {
2820 struct zone *zone = pgdat->node_zones + i;
2821
2822 if (!populated_zone(zone))
2823 continue;
2824
2734 all_zones_ok = 0;
2735 /*
2736 * We are still under min water mark. This
2737 * means that we have a GFP_ATOMIC allocation
2738 * failure risk. Hurry up!
2739 */
2740 if (!zone_watermark_ok_safe(zone, order,
2741 min_wmark_pages(zone), end_zone, 0))

--- 88 unchanged lines hidden (view full) ---

2830 int zones_need_compaction = 1;
2831
2832 for (i = 0; i <= end_zone; i++) {
2833 struct zone *zone = pgdat->node_zones + i;
2834
2835 if (!populated_zone(zone))
2836 continue;
2837
2825 if (zone->all_unreclaimable &&
2826 sc.priority != DEF_PRIORITY)
2827 continue;
2828
2829 /* Would compaction fail due to lack of free memory? */
2830 if (COMPACTION_BUILD &&
2831 compaction_suitable(zone, order) == COMPACT_SKIPPED)
2832 goto loop_again;
2833
2834 /* Confirm the zone is balanced for order-0 */
2835 if (!zone_watermark_ok(zone, 0,
2836 high_wmark_pages(zone), 0, 0)) {
2837 order = sc.order = 0;
2838 goto loop_again;
2839 }
2840
2841 /* Check if the memory needs to be defragmented. */
2842 if (zone_watermark_ok(zone, order,
2843 low_wmark_pages(zone), *classzone_idx, 0))
2844 zones_need_compaction = 0;
2838 /* Check if the memory needs to be defragmented. */
2839 if (zone_watermark_ok(zone, order,
2840 low_wmark_pages(zone), *classzone_idx, 0))
2841 zones_need_compaction = 0;
2845
2846 /* If balanced, clear the congested flag */
2847 zone_clear_flag(zone, ZONE_CONGESTED);
2848 }
2849
2850 if (zones_need_compaction)
2851 compact_pgdat(pgdat, order);
2852 }
2853
2854 /*
2855 * Return the order we were reclaiming at so prepare_kswapd_sleep()

--- 108 unchanged lines hidden (view full) ---

2964 tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
2965 set_freezable();
2966
2967 order = new_order = 0;
2968 balanced_order = 0;
2969 classzone_idx = new_classzone_idx = pgdat->nr_zones - 1;
2970 balanced_classzone_idx = classzone_idx;
2971 for ( ; ; ) {
2842 }
2843
2844 if (zones_need_compaction)
2845 compact_pgdat(pgdat, order);
2846 }
2847
2848 /*
2849 * Return the order we were reclaiming at so prepare_kswapd_sleep()

--- 108 unchanged lines hidden (view full) ---

2958 tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
2959 set_freezable();
2960
2961 order = new_order = 0;
2962 balanced_order = 0;
2963 classzone_idx = new_classzone_idx = pgdat->nr_zones - 1;
2964 balanced_classzone_idx = classzone_idx;
2965 for ( ; ; ) {
2972 int ret;
2966 bool ret;
2973
2974 /*
2975 * If the last balance_pgdat was unsuccessful it's unlikely a
2976 * new request of a similar or harder type will succeed soon
2977 * so consider going to sleep on the basis we reclaimed at
2978 */
2979 if (balanced_classzone_idx >= new_classzone_idx &&
2980 balanced_order == new_order) {

--- 151 unchanged lines hidden (view full) ---

3132 away, we get changed to run anywhere: as the first one comes back,
3133 restore their cpu bindings. */
3134static int __devinit cpu_callback(struct notifier_block *nfb,
3135 unsigned long action, void *hcpu)
3136{
3137 int nid;
3138
3139 if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) {
2967
2968 /*
2969 * If the last balance_pgdat was unsuccessful it's unlikely a
2970 * new request of a similar or harder type will succeed soon
2971 * so consider going to sleep on the basis we reclaimed at
2972 */
2973 if (balanced_classzone_idx >= new_classzone_idx &&
2974 balanced_order == new_order) {

--- 151 unchanged lines hidden (view full) ---

3126 away, we get changed to run anywhere: as the first one comes back,
3127 restore their cpu bindings. */
3128static int __devinit cpu_callback(struct notifier_block *nfb,
3129 unsigned long action, void *hcpu)
3130{
3131 int nid;
3132
3133 if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) {
3140 for_each_node_state(nid, N_HIGH_MEMORY) {
3134 for_each_node_state(nid, N_MEMORY) {
3141 pg_data_t *pgdat = NODE_DATA(nid);
3142 const struct cpumask *mask;
3143
3144 mask = cpumask_of_node(pgdat->node_id);
3145
3146 if (cpumask_any_and(cpu_online_mask, mask) < nr_cpu_ids)
3147 /* One of our CPUs online: restore mask */
3148 set_cpus_allowed_ptr(pgdat->kswapd, mask);

--- 39 unchanged lines hidden (view full) ---

3188 }
3189}
3190
3191static int __init kswapd_init(void)
3192{
3193 int nid;
3194
3195 swap_setup();
3135 pg_data_t *pgdat = NODE_DATA(nid);
3136 const struct cpumask *mask;
3137
3138 mask = cpumask_of_node(pgdat->node_id);
3139
3140 if (cpumask_any_and(cpu_online_mask, mask) < nr_cpu_ids)
3141 /* One of our CPUs online: restore mask */
3142 set_cpus_allowed_ptr(pgdat->kswapd, mask);

--- 39 unchanged lines hidden (view full) ---

3182 }
3183}
3184
3185static int __init kswapd_init(void)
3186{
3187 int nid;
3188
3189 swap_setup();
3196 for_each_node_state(nid, N_HIGH_MEMORY)
3190 for_each_node_state(nid, N_MEMORY)
3197 kswapd_run(nid);
3198 hotcpu_notifier(cpu_callback, 0);
3199 return 0;
3200}
3201
3202module_init(kswapd_init)
3203
3204#ifdef CONFIG_NUMA

--- 344 unchanged lines hidden ---
3191 kswapd_run(nid);
3192 hotcpu_notifier(cpu_callback, 0);
3193 return 0;
3194}
3195
3196module_init(kswapd_init)
3197
3198#ifdef CONFIG_NUMA

--- 344 unchanged lines hidden ---