vmscan.c (a579086c99ed70cc4bfc104348dbe3dd8f2787e6) | vmscan.c (7348cc91821b0cb24dfb00e578047f68299a50ab) |
---|---|
1// SPDX-License-Identifier: GPL-2.0 2/* 3 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds 4 * 5 * Swap reorganised 29.12.95, Stephen Tweedie. 6 * kswapd added: 7.1.96 sct 7 * Removed kswapd_ctl limits, and swap out as many pages as needed 8 * to bring the system back to freepages.high: 2.4.97, Rik van Riel. --- 123 unchanged lines hidden (view full) --- 132 /* The file folios on the current node are dangerously low */ 133 unsigned int file_is_tiny:1; 134 135 /* Always discard instead of demoting to lower tier memory */ 136 unsigned int no_demotion:1; 137 138#ifdef CONFIG_LRU_GEN 139 /* help kswapd make better choices among multiple memcgs */ | 1// SPDX-License-Identifier: GPL-2.0 2/* 3 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds 4 * 5 * Swap reorganised 29.12.95, Stephen Tweedie. 6 * kswapd added: 7.1.96 sct 7 * Removed kswapd_ctl limits, and swap out as many pages as needed 8 * to bring the system back to freepages.high: 2.4.97, Rik van Riel. --- 123 unchanged lines hidden (view full) --- 132 /* The file folios on the current node are dangerously low */ 133 unsigned int file_is_tiny:1; 134 135 /* Always discard instead of demoting to lower tier memory */ 136 unsigned int no_demotion:1; 137 138#ifdef CONFIG_LRU_GEN 139 /* help kswapd make better choices among multiple memcgs */ |
140 unsigned int memcgs_need_aging:1; | |
141 unsigned long last_reclaimed; 142#endif 143 144 /* Allocation order */ 145 s8 order; 146 147 /* Scan (total_size >> priority) pages at once */ 148 s8 priority; --- 4314 unchanged lines hidden (view full) --- 4463 inc_max_seq(lruvec, can_swap, force_scan); 4464 /* either this sees any waiters or they will see updated max_seq */ 4465 if (wq_has_sleeper(&lruvec->mm_state.wait)) 4466 wake_up_all(&lruvec->mm_state.wait); 4467 4468 return true; 4469} 4470 | 140 unsigned long last_reclaimed; 141#endif 142 143 /* Allocation order */ 144 s8 order; 145 146 /* Scan (total_size >> priority) pages at once */ 147 s8 priority; --- 4314 unchanged lines hidden (view full) --- 4462 inc_max_seq(lruvec, can_swap, force_scan); 4463 /* either this sees any waiters or they will see updated max_seq */ 4464 if (wq_has_sleeper(&lruvec->mm_state.wait)) 4465 wake_up_all(&lruvec->mm_state.wait); 4466 4467 return true; 4468} 4469 |
4471static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsigned long *min_seq, | 4470static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, |
4472 struct scan_control *sc, bool can_swap, unsigned long *nr_to_scan) 4473{ 4474 int gen, type, zone; 4475 unsigned long old = 0; 4476 unsigned long young = 0; 4477 unsigned long total = 0; 4478 struct lru_gen_folio *lrugen = &lruvec->lrugen; 4479 struct mem_cgroup *memcg = lruvec_memcg(lruvec); | 4471 struct scan_control *sc, bool can_swap, unsigned long *nr_to_scan) 4472{ 4473 int gen, type, zone; 4474 unsigned long old = 0; 4475 unsigned long young = 0; 4476 unsigned long total = 0; 4477 struct lru_gen_folio *lrugen = &lruvec->lrugen; 4478 struct mem_cgroup *memcg = lruvec_memcg(lruvec); |
4479 DEFINE_MIN_SEQ(lruvec); |
|
4480 | 4480 |
4481 /* whether this lruvec is completely out of cold folios */ 4482 if (min_seq[!can_swap] + MIN_NR_GENS > max_seq) { 4483 *nr_to_scan = 0; 4484 return true; 4485 } 4486 |
|
4481 for (type = !can_swap; type < ANON_AND_FILE; type++) { 4482 unsigned long seq; 4483 4484 for (seq = min_seq[type]; seq <= max_seq; seq++) { 4485 unsigned long size = 0; 4486 4487 gen = lru_gen_from_seq(seq); 4488 --- 11 unchanged lines hidden (view full) --- 4500 /* try to scrape all its memory if this memcg was deleted */ 4501 *nr_to_scan = mem_cgroup_online(memcg) ? (total >> sc->priority) : total; 4502 4503 /* 4504 * The aging tries to be lazy to reduce the overhead, while the eviction 4505 * stalls when the number of generations reaches MIN_NR_GENS. Hence, the 4506 * ideal number of generations is MIN_NR_GENS+1. 4507 */ | 4487 for (type = !can_swap; type < ANON_AND_FILE; type++) { 4488 unsigned long seq; 4489 4490 for (seq = min_seq[type]; seq <= max_seq; seq++) { 4491 unsigned long size = 0; 4492 4493 gen = lru_gen_from_seq(seq); 4494 --- 11 unchanged lines hidden (view full) --- 4506 /* try to scrape all its memory if this memcg was deleted */ 4507 *nr_to_scan = mem_cgroup_online(memcg) ? (total >> sc->priority) : total; 4508 4509 /* 4510 * The aging tries to be lazy to reduce the overhead, while the eviction 4511 * stalls when the number of generations reaches MIN_NR_GENS. Hence, the 4512 * ideal number of generations is MIN_NR_GENS+1. 4513 */ |
4508 if (min_seq[!can_swap] + MIN_NR_GENS > max_seq) 4509 return true; | |
4510 if (min_seq[!can_swap] + MIN_NR_GENS < max_seq) 4511 return false; 4512 4513 /* 4514 * It's also ideal to spread pages out evenly, i.e., 1/(MIN_NR_GENS+1) 4515 * of the total number of pages for each generation. A reasonable range 4516 * for this average portion is [1/MIN_NR_GENS, 1/(MIN_NR_GENS+2)]. The 4517 * aging cares about the upper bound of hot pages, while the eviction 4518 * cares about the lower bound of cold pages. 4519 */ 4520 if (young * MIN_NR_GENS > total) 4521 return true; 4522 if (old * (MIN_NR_GENS + 2) < total) 4523 return true; 4524 4525 return false; 4526} 4527 | 4514 if (min_seq[!can_swap] + MIN_NR_GENS < max_seq) 4515 return false; 4516 4517 /* 4518 * It's also ideal to spread pages out evenly, i.e., 1/(MIN_NR_GENS+1) 4519 * of the total number of pages for each generation. A reasonable range 4520 * for this average portion is [1/MIN_NR_GENS, 1/(MIN_NR_GENS+2)]. The 4521 * aging cares about the upper bound of hot pages, while the eviction 4522 * cares about the lower bound of cold pages. 4523 */ 4524 if (young * MIN_NR_GENS > total) 4525 return true; 4526 if (old * (MIN_NR_GENS + 2) < total) 4527 return true; 4528 4529 return false; 4530} 4531 |
4528static bool age_lruvec(struct lruvec *lruvec, struct scan_control *sc, unsigned long min_ttl) | 4532static bool lruvec_is_sizable(struct lruvec *lruvec, struct scan_control *sc) |
4529{ | 4533{ |
4530 bool need_aging; 4531 unsigned long nr_to_scan; 4532 int swappiness = get_swappiness(lruvec, sc); | 4534 int gen, type, zone; 4535 unsigned long total = 0; 4536 bool can_swap = get_swappiness(lruvec, sc); 4537 struct lru_gen_folio *lrugen = &lruvec->lrugen; |
4533 struct mem_cgroup *memcg = lruvec_memcg(lruvec); 4534 DEFINE_MAX_SEQ(lruvec); 4535 DEFINE_MIN_SEQ(lruvec); 4536 | 4538 struct mem_cgroup *memcg = lruvec_memcg(lruvec); 4539 DEFINE_MAX_SEQ(lruvec); 4540 DEFINE_MIN_SEQ(lruvec); 4541 |
4537 VM_WARN_ON_ONCE(sc->memcg_low_reclaim); | 4542 for (type = !can_swap; type < ANON_AND_FILE; type++) { 4543 unsigned long seq; |
4538 | 4544 |
4539 mem_cgroup_calculate_protection(NULL, memcg); | 4545 for (seq = min_seq[type]; seq <= max_seq; seq++) { 4546 gen = lru_gen_from_seq(seq); |
4540 | 4547 |
4541 if (mem_cgroup_below_min(NULL, memcg)) 4542 return false; | 4548 for (zone = 0; zone < MAX_NR_ZONES; zone++) 4549 total += max(READ_ONCE(lrugen->nr_pages[gen][type][zone]), 0L); 4550 } 4551 } |
4543 | 4552 |
4544 need_aging = should_run_aging(lruvec, max_seq, min_seq, sc, swappiness, &nr_to_scan); | 4553 /* whether the size is big enough to be helpful */ 4554 return mem_cgroup_online(memcg) ? (total >> sc->priority) : total; 4555} |
4545 | 4556 |
4546 if (min_ttl) { 4547 int gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]); 4548 unsigned long birth = READ_ONCE(lruvec->lrugen.timestamps[gen]); | 4557static bool lruvec_is_reclaimable(struct lruvec *lruvec, struct scan_control *sc, 4558 unsigned long min_ttl) 4559{ 4560 int gen; 4561 unsigned long birth; 4562 struct mem_cgroup *memcg = lruvec_memcg(lruvec); 4563 DEFINE_MIN_SEQ(lruvec); |
4549 | 4564 |
4550 if (time_is_after_jiffies(birth + min_ttl)) 4551 return false; | 4565 VM_WARN_ON_ONCE(sc->memcg_low_reclaim); |
4552 | 4566 |
4553 /* the size is likely too small to be helpful */ 4554 if (!nr_to_scan && sc->priority != DEF_PRIORITY) 4555 return false; 4556 } | 4567 /* see the comment on lru_gen_folio */ 4568 gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]); 4569 birth = READ_ONCE(lruvec->lrugen.timestamps[gen]); |
4557 | 4570 |
4558 if (need_aging) 4559 try_to_inc_max_seq(lruvec, max_seq, sc, swappiness, false); | 4571 if (time_is_after_jiffies(birth + min_ttl)) 4572 return false; |
4560 | 4573 |
4561 return true; | 4574 if (!lruvec_is_sizable(lruvec, sc)) 4575 return false; 4576 4577 mem_cgroup_calculate_protection(NULL, memcg); 4578 4579 return !mem_cgroup_below_min(NULL, memcg); |
4562} 4563 4564/* to protect the working set of the last N jiffies */ 4565static unsigned long lru_gen_min_ttl __read_mostly; 4566 4567static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc) 4568{ 4569 struct mem_cgroup *memcg; | 4580} 4581 4582/* to protect the working set of the last N jiffies */ 4583static unsigned long lru_gen_min_ttl __read_mostly; 4584 4585static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc) 4586{ 4587 struct mem_cgroup *memcg; |
4570 bool success = false; | |
4571 unsigned long min_ttl = READ_ONCE(lru_gen_min_ttl); 4572 4573 VM_WARN_ON_ONCE(!current_is_kswapd()); 4574 4575 sc->last_reclaimed = sc->nr_reclaimed; 4576 | 4588 unsigned long min_ttl = READ_ONCE(lru_gen_min_ttl); 4589 4590 VM_WARN_ON_ONCE(!current_is_kswapd()); 4591 4592 sc->last_reclaimed = sc->nr_reclaimed; 4593 |
4577 /* 4578 * To reduce the chance of going into the aging path, which can be 4579 * costly, optimistically skip it if the flag below was cleared in the 4580 * eviction path. This improves the overall performance when multiple 4581 * memcgs are available. 4582 */ 4583 if (!sc->memcgs_need_aging) { 4584 sc->memcgs_need_aging = true; | 4594 /* check the order to exclude compaction-induced reclaim */ 4595 if (!min_ttl || sc->order || sc->priority == DEF_PRIORITY) |
4585 return; | 4596 return; |
4586 } | |
4587 | 4597 |
4588 set_mm_walk(pgdat); 4589 | |
4590 memcg = mem_cgroup_iter(NULL, NULL, NULL); 4591 do { 4592 struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat); 4593 | 4598 memcg = mem_cgroup_iter(NULL, NULL, NULL); 4599 do { 4600 struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat); 4601 |
4594 if (age_lruvec(lruvec, sc, min_ttl)) 4595 success = true; | 4602 if (lruvec_is_reclaimable(lruvec, sc, min_ttl)) { 4603 mem_cgroup_iter_break(NULL, memcg); 4604 return; 4605 } |
4596 4597 cond_resched(); 4598 } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL))); 4599 | 4606 4607 cond_resched(); 4608 } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL))); 4609 |
4600 clear_mm_walk(); 4601 4602 /* check the order to exclude compaction-induced reclaim */ 4603 if (success || !min_ttl || sc->order) 4604 return; 4605 | |
4606 /* 4607 * The main goal is to OOM kill if every generation from all memcgs is 4608 * younger than min_ttl. However, another possibility is all memcgs are | 4610 /* 4611 * The main goal is to OOM kill if every generation from all memcgs is 4612 * younger than min_ttl. However, another possibility is all memcgs are |
4609 * either below min or empty. | 4613 * either too small or below min. |
4610 */ 4611 if (mutex_trylock(&oom_lock)) { 4612 struct oom_control oc = { 4613 .gfp_mask = sc->gfp_mask, 4614 }; 4615 4616 out_of_memory(&oc); 4617 --- 491 unchanged lines hidden (view full) --- 5109} 5110 5111/* 5112 * For future optimizations: 5113 * 1. Defer try_to_inc_max_seq() to workqueues to reduce latency for memcg 5114 * reclaim. 5115 */ 5116static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, | 4614 */ 4615 if (mutex_trylock(&oom_lock)) { 4616 struct oom_control oc = { 4617 .gfp_mask = sc->gfp_mask, 4618 }; 4619 4620 out_of_memory(&oc); 4621 --- 491 unchanged lines hidden (view full) --- 5113} 5114 5115/* 5116 * For future optimizations: 5117 * 1. Defer try_to_inc_max_seq() to workqueues to reduce latency for memcg 5118 * reclaim. 5119 */ 5120static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, |
5117 bool can_swap, bool *need_aging) | 5121 bool can_swap) |
5118{ 5119 unsigned long nr_to_scan; 5120 struct mem_cgroup *memcg = lruvec_memcg(lruvec); 5121 DEFINE_MAX_SEQ(lruvec); | 5122{ 5123 unsigned long nr_to_scan; 5124 struct mem_cgroup *memcg = lruvec_memcg(lruvec); 5125 DEFINE_MAX_SEQ(lruvec); |
5122 DEFINE_MIN_SEQ(lruvec); | |
5123 5124 if (mem_cgroup_below_min(sc->target_mem_cgroup, memcg) || 5125 (mem_cgroup_below_low(sc->target_mem_cgroup, memcg) && 5126 !sc->memcg_low_reclaim)) 5127 return 0; 5128 | 5126 5127 if (mem_cgroup_below_min(sc->target_mem_cgroup, memcg) || 5128 (mem_cgroup_below_low(sc->target_mem_cgroup, memcg) && 5129 !sc->memcg_low_reclaim)) 5130 return 0; 5131 |
5129 *need_aging = should_run_aging(lruvec, max_seq, min_seq, sc, can_swap, &nr_to_scan); 5130 if (!*need_aging) | 5132 if (!should_run_aging(lruvec, max_seq, sc, can_swap, &nr_to_scan)) |
5131 return nr_to_scan; 5132 5133 /* skip the aging path at the default priority */ 5134 if (sc->priority == DEF_PRIORITY) | 5133 return nr_to_scan; 5134 5135 /* skip the aging path at the default priority */ 5136 if (sc->priority == DEF_PRIORITY) |
5135 goto done; | 5137 return nr_to_scan; |
5136 | 5138 |
5137 /* leave the work to lru_gen_age_node() */ 5138 if (current_is_kswapd()) 5139 return 0; | 5139 try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false); |
5140 | 5140 |
5141 if (try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false)) 5142 return nr_to_scan; 5143done: 5144 return min_seq[!can_swap] + MIN_NR_GENS <= max_seq ? nr_to_scan : 0; | 5141 /* skip this lruvec as it's low on cold folios */ 5142 return 0; |
5145} 5146 5147static unsigned long get_nr_to_reclaim(struct scan_control *sc) 5148{ 5149 /* don't abort memcg reclaim to ensure fairness */ 5150 if (!global_reclaim(sc)) 5151 return -1; 5152 5153 /* discount the previous progress for kswapd */ 5154 if (current_is_kswapd()) 5155 return sc->nr_to_reclaim + sc->last_reclaimed; 5156 5157 return max(sc->nr_to_reclaim, compact_gap(sc->order)); 5158} 5159 5160static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) 5161{ 5162 struct blk_plug plug; | 5143} 5144 5145static unsigned long get_nr_to_reclaim(struct scan_control *sc) 5146{ 5147 /* don't abort memcg reclaim to ensure fairness */ 5148 if (!global_reclaim(sc)) 5149 return -1; 5150 5151 /* discount the previous progress for kswapd */ 5152 if (current_is_kswapd()) 5153 return sc->nr_to_reclaim + sc->last_reclaimed; 5154 5155 return max(sc->nr_to_reclaim, compact_gap(sc->order)); 5156} 5157 5158static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) 5159{ 5160 struct blk_plug plug; |
5163 bool need_aging = false; | |
5164 unsigned long scanned = 0; | 5161 unsigned long scanned = 0; |
5165 unsigned long reclaimed = sc->nr_reclaimed; | |
5166 unsigned long nr_to_reclaim = get_nr_to_reclaim(sc); 5167 5168 lru_add_drain(); 5169 5170 blk_start_plug(&plug); 5171 5172 set_mm_walk(lruvec_pgdat(lruvec)); 5173 --- 4 unchanged lines hidden (view full) --- 5178 5179 if (sc->may_swap) 5180 swappiness = get_swappiness(lruvec, sc); 5181 else if (!cgroup_reclaim(sc) && get_swappiness(lruvec, sc)) 5182 swappiness = 1; 5183 else 5184 swappiness = 0; 5185 | 5162 unsigned long nr_to_reclaim = get_nr_to_reclaim(sc); 5163 5164 lru_add_drain(); 5165 5166 blk_start_plug(&plug); 5167 5168 set_mm_walk(lruvec_pgdat(lruvec)); 5169 --- 4 unchanged lines hidden (view full) --- 5174 5175 if (sc->may_swap) 5176 swappiness = get_swappiness(lruvec, sc); 5177 else if (!cgroup_reclaim(sc) && get_swappiness(lruvec, sc)) 5178 swappiness = 1; 5179 else 5180 swappiness = 0; 5181 |
5186 nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness, &need_aging); | 5182 nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness); |
5187 if (!nr_to_scan) | 5183 if (!nr_to_scan) |
5188 goto done; | 5184 break; |
5189 5190 delta = evict_folios(lruvec, sc, swappiness); 5191 if (!delta) | 5185 5186 delta = evict_folios(lruvec, sc, swappiness); 5187 if (!delta) |
5192 goto done; | 5188 break; |
5193 5194 scanned += delta; 5195 if (scanned >= nr_to_scan) 5196 break; 5197 5198 if (sc->nr_reclaimed >= nr_to_reclaim) 5199 break; 5200 5201 cond_resched(); 5202 } 5203 | 5189 5190 scanned += delta; 5191 if (scanned >= nr_to_scan) 5192 break; 5193 5194 if (sc->nr_reclaimed >= nr_to_reclaim) 5195 break; 5196 5197 cond_resched(); 5198 } 5199 |
5204 /* see the comment in lru_gen_age_node() */ 5205 if (sc->nr_reclaimed - reclaimed >= MIN_LRU_BATCH && !need_aging) 5206 sc->memcgs_need_aging = false; 5207done: | |
5208 clear_mm_walk(); 5209 5210 blk_finish_plug(&plug); 5211} 5212 5213/****************************************************************************** 5214 * state change 5215 ******************************************************************************/ --- 2544 unchanged lines hidden --- | 5200 clear_mm_walk(); 5201 5202 blk_finish_plug(&plug); 5203} 5204 5205/****************************************************************************** 5206 * state change 5207 ******************************************************************************/ --- 2544 unchanged lines hidden --- |