13f49584bSSeongJae Park // SPDX-License-Identifier: GPL-2.0
23f49584bSSeongJae Park /*
33f49584bSSeongJae Park * DAMON Primitives for Virtual Address Spaces
43f49584bSSeongJae Park *
53f49584bSSeongJae Park * Author: SeongJae Park <sjpark@amazon.de>
63f49584bSSeongJae Park */
73f49584bSSeongJae Park
83f49584bSSeongJae Park #define pr_fmt(fmt) "damon-va: " fmt
93f49584bSSeongJae Park
106dea8addSSeongJae Park #include <asm-generic/mman-common.h>
113f49584bSSeongJae Park #include <linux/highmem.h>
1246c3a0acSSeongJae Park #include <linux/hugetlb.h>
1346c3a0acSSeongJae Park #include <linux/mmu_notifier.h>
143f49584bSSeongJae Park #include <linux/page_idle.h>
153f49584bSSeongJae Park #include <linux/pagewalk.h>
168581fd40SJakub Kicinski #include <linux/sched/mm.h>
1746c3a0acSSeongJae Park
18f7d911c3SSeongJae Park #include "ops-common.h"
193f49584bSSeongJae Park
2017ccae8bSSeongJae Park #ifdef CONFIG_DAMON_VADDR_KUNIT_TEST
2117ccae8bSSeongJae Park #undef DAMON_MIN_REGION
2217ccae8bSSeongJae Park #define DAMON_MIN_REGION 1
2317ccae8bSSeongJae Park #endif
2417ccae8bSSeongJae Park
253f49584bSSeongJae Park /*
261971bd63SSeongJae Park * 't->pid' should be the pointer to the relevant 'struct pid' having reference
273f49584bSSeongJae Park * count. Caller must put the returned task, unless it is NULL.
283f49584bSSeongJae Park */
damon_get_task_struct(struct damon_target * t)2988f86dcfSSeongJae Park static inline struct task_struct *damon_get_task_struct(struct damon_target *t)
3088f86dcfSSeongJae Park {
311971bd63SSeongJae Park return get_pid_task(t->pid, PIDTYPE_PID);
3288f86dcfSSeongJae Park }
333f49584bSSeongJae Park
343f49584bSSeongJae Park /*
353f49584bSSeongJae Park * Get the mm_struct of the given target
363f49584bSSeongJae Park *
373f49584bSSeongJae Park * Caller _must_ put the mm_struct after use, unless it is NULL.
383f49584bSSeongJae Park *
393f49584bSSeongJae Park * Returns the mm_struct of the target on success, NULL on failure
403f49584bSSeongJae Park */
damon_get_mm(struct damon_target * t)413f49584bSSeongJae Park static struct mm_struct *damon_get_mm(struct damon_target *t)
423f49584bSSeongJae Park {
433f49584bSSeongJae Park struct task_struct *task;
443f49584bSSeongJae Park struct mm_struct *mm;
453f49584bSSeongJae Park
463f49584bSSeongJae Park task = damon_get_task_struct(t);
473f49584bSSeongJae Park if (!task)
483f49584bSSeongJae Park return NULL;
493f49584bSSeongJae Park
503f49584bSSeongJae Park mm = get_task_mm(task);
513f49584bSSeongJae Park put_task_struct(task);
523f49584bSSeongJae Park return mm;
533f49584bSSeongJae Park }
543f49584bSSeongJae Park
553f49584bSSeongJae Park /*
563f49584bSSeongJae Park * Functions for the initial monitoring target regions construction
573f49584bSSeongJae Park */
583f49584bSSeongJae Park
593f49584bSSeongJae Park /*
603f49584bSSeongJae Park * Size-evenly split a region into 'nr_pieces' small regions
613f49584bSSeongJae Park *
623f49584bSSeongJae Park * Returns 0 on success, or negative error code otherwise.
633f49584bSSeongJae Park */
damon_va_evenly_split_region(struct damon_target * t,struct damon_region * r,unsigned int nr_pieces)643f49584bSSeongJae Park static int damon_va_evenly_split_region(struct damon_target *t,
653f49584bSSeongJae Park struct damon_region *r, unsigned int nr_pieces)
663f49584bSSeongJae Park {
673f49584bSSeongJae Park unsigned long sz_orig, sz_piece, orig_end;
683f49584bSSeongJae Park struct damon_region *n = NULL, *next;
693f49584bSSeongJae Park unsigned long start;
70*1af5e8b1SZheng Yejian unsigned int i;
713f49584bSSeongJae Park
723f49584bSSeongJae Park if (!r || !nr_pieces)
733f49584bSSeongJae Park return -EINVAL;
743f49584bSSeongJae Park
753f49584bSSeongJae Park orig_end = r->ar.end;
76ab63f63fSXin Hao sz_orig = damon_sz_region(r);
773f49584bSSeongJae Park sz_piece = ALIGN_DOWN(sz_orig / nr_pieces, DAMON_MIN_REGION);
783f49584bSSeongJae Park
793f49584bSSeongJae Park if (!sz_piece)
803f49584bSSeongJae Park return -EINVAL;
813f49584bSSeongJae Park
823f49584bSSeongJae Park r->ar.end = r->ar.start + sz_piece;
833f49584bSSeongJae Park next = damon_next_region(r);
84*1af5e8b1SZheng Yejian for (start = r->ar.end, i = 1; i < nr_pieces; start += sz_piece, i++) {
853f49584bSSeongJae Park n = damon_new_region(start, start + sz_piece);
863f49584bSSeongJae Park if (!n)
873f49584bSSeongJae Park return -ENOMEM;
883f49584bSSeongJae Park damon_insert_region(n, r, next, t);
893f49584bSSeongJae Park r = n;
903f49584bSSeongJae Park }
913f49584bSSeongJae Park /* complement last region for possible rounding error */
923f49584bSSeongJae Park if (n)
933f49584bSSeongJae Park n->ar.end = orig_end;
943f49584bSSeongJae Park
953f49584bSSeongJae Park return 0;
963f49584bSSeongJae Park }
973f49584bSSeongJae Park
sz_range(struct damon_addr_range * r)983f49584bSSeongJae Park static unsigned long sz_range(struct damon_addr_range *r)
993f49584bSSeongJae Park {
1003f49584bSSeongJae Park return r->end - r->start;
1013f49584bSSeongJae Park }
1023f49584bSSeongJae Park
1033f49584bSSeongJae Park /*
1043f49584bSSeongJae Park * Find three regions separated by two biggest unmapped regions
1053f49584bSSeongJae Park *
1063f49584bSSeongJae Park * vma the head vma of the target address space
1073f49584bSSeongJae Park * regions an array of three address ranges that results will be saved
1083f49584bSSeongJae Park *
1093f49584bSSeongJae Park * This function receives an address space and finds three regions in it which
1103f49584bSSeongJae Park * separated by the two biggest unmapped regions in the space. Please refer to
1113f49584bSSeongJae Park * below comments of '__damon_va_init_regions()' function to know why this is
1123f49584bSSeongJae Park * necessary.
1133f49584bSSeongJae Park *
1143f49584bSSeongJae Park * Returns 0 if success, or negative error code otherwise.
1153f49584bSSeongJae Park */
__damon_va_three_regions(struct mm_struct * mm,struct damon_addr_range regions[3])116d0cf3dd4SLiam R. Howlett static int __damon_va_three_regions(struct mm_struct *mm,
1173f49584bSSeongJae Park struct damon_addr_range regions[3])
1183f49584bSSeongJae Park {
119d0cf3dd4SLiam R. Howlett struct damon_addr_range first_gap = {0}, second_gap = {0};
120d0cf3dd4SLiam R. Howlett VMA_ITERATOR(vmi, mm, 0);
121d0cf3dd4SLiam R. Howlett struct vm_area_struct *vma, *prev = NULL;
122d0cf3dd4SLiam R. Howlett unsigned long start;
1233f49584bSSeongJae Park
124d0cf3dd4SLiam R. Howlett /*
125d0cf3dd4SLiam R. Howlett * Find the two biggest gaps so that first_gap > second_gap > others.
126d0cf3dd4SLiam R. Howlett * If this is too slow, it can be optimised to examine the maple
127d0cf3dd4SLiam R. Howlett * tree gaps.
128d0cf3dd4SLiam R. Howlett */
129b35a42bdSLiam R. Howlett rcu_read_lock();
130d0cf3dd4SLiam R. Howlett for_each_vma(vmi, vma) {
131d0cf3dd4SLiam R. Howlett unsigned long gap;
132d0cf3dd4SLiam R. Howlett
133d0cf3dd4SLiam R. Howlett if (!prev) {
1343f49584bSSeongJae Park start = vma->vm_start;
1353f49584bSSeongJae Park goto next;
1363f49584bSSeongJae Park }
137d0cf3dd4SLiam R. Howlett gap = vma->vm_start - prev->vm_end;
1383f49584bSSeongJae Park
139d0cf3dd4SLiam R. Howlett if (gap > sz_range(&first_gap)) {
140d0cf3dd4SLiam R. Howlett second_gap = first_gap;
141d0cf3dd4SLiam R. Howlett first_gap.start = prev->vm_end;
142d0cf3dd4SLiam R. Howlett first_gap.end = vma->vm_start;
143d0cf3dd4SLiam R. Howlett } else if (gap > sz_range(&second_gap)) {
144d0cf3dd4SLiam R. Howlett second_gap.start = prev->vm_end;
145d0cf3dd4SLiam R. Howlett second_gap.end = vma->vm_start;
1463f49584bSSeongJae Park }
1473f49584bSSeongJae Park next:
148d0cf3dd4SLiam R. Howlett prev = vma;
1493f49584bSSeongJae Park }
150b35a42bdSLiam R. Howlett rcu_read_unlock();
1513f49584bSSeongJae Park
1523f49584bSSeongJae Park if (!sz_range(&second_gap) || !sz_range(&first_gap))
1533f49584bSSeongJae Park return -EINVAL;
1543f49584bSSeongJae Park
1553f49584bSSeongJae Park /* Sort the two biggest gaps by address */
1563f49584bSSeongJae Park if (first_gap.start > second_gap.start)
1578bd0b9daSYihao Han swap(first_gap, second_gap);
1583f49584bSSeongJae Park
1593f49584bSSeongJae Park /* Store the result */
1603f49584bSSeongJae Park regions[0].start = ALIGN(start, DAMON_MIN_REGION);
1613f49584bSSeongJae Park regions[0].end = ALIGN(first_gap.start, DAMON_MIN_REGION);
1623f49584bSSeongJae Park regions[1].start = ALIGN(first_gap.end, DAMON_MIN_REGION);
1633f49584bSSeongJae Park regions[1].end = ALIGN(second_gap.start, DAMON_MIN_REGION);
1643f49584bSSeongJae Park regions[2].start = ALIGN(second_gap.end, DAMON_MIN_REGION);
165d0cf3dd4SLiam R. Howlett regions[2].end = ALIGN(prev->vm_end, DAMON_MIN_REGION);
1663f49584bSSeongJae Park
1673f49584bSSeongJae Park return 0;
1683f49584bSSeongJae Park }
1693f49584bSSeongJae Park
1703f49584bSSeongJae Park /*
1713f49584bSSeongJae Park * Get the three regions in the given target (task)
1723f49584bSSeongJae Park *
1733f49584bSSeongJae Park * Returns 0 on success, negative error code otherwise.
1743f49584bSSeongJae Park */
damon_va_three_regions(struct damon_target * t,struct damon_addr_range regions[3])1753f49584bSSeongJae Park static int damon_va_three_regions(struct damon_target *t,
1763f49584bSSeongJae Park struct damon_addr_range regions[3])
1773f49584bSSeongJae Park {
1783f49584bSSeongJae Park struct mm_struct *mm;
1793f49584bSSeongJae Park int rc;
1803f49584bSSeongJae Park
1813f49584bSSeongJae Park mm = damon_get_mm(t);
1823f49584bSSeongJae Park if (!mm)
1833f49584bSSeongJae Park return -EINVAL;
1843f49584bSSeongJae Park
1853f49584bSSeongJae Park mmap_read_lock(mm);
186d0cf3dd4SLiam R. Howlett rc = __damon_va_three_regions(mm, regions);
1873f49584bSSeongJae Park mmap_read_unlock(mm);
1883f49584bSSeongJae Park
1893f49584bSSeongJae Park mmput(mm);
1903f49584bSSeongJae Park return rc;
1913f49584bSSeongJae Park }
1923f49584bSSeongJae Park
1933f49584bSSeongJae Park /*
1943f49584bSSeongJae Park * Initialize the monitoring target regions for the given target (task)
1953f49584bSSeongJae Park *
1963f49584bSSeongJae Park * t the given target
1973f49584bSSeongJae Park *
1983f49584bSSeongJae Park * Because only a number of small portions of the entire address space
1993f49584bSSeongJae Park * is actually mapped to the memory and accessed, monitoring the unmapped
2003f49584bSSeongJae Park * regions is wasteful. That said, because we can deal with small noises,
2013f49584bSSeongJae Park * tracking every mapping is not strictly required but could even incur a high
2023f49584bSSeongJae Park * overhead if the mapping frequently changes or the number of mappings is
2033f49584bSSeongJae Park * high. The adaptive regions adjustment mechanism will further help to deal
2043f49584bSSeongJae Park * with the noise by simply identifying the unmapped areas as a region that
2053f49584bSSeongJae Park * has no access. Moreover, applying the real mappings that would have many
2063f49584bSSeongJae Park * unmapped areas inside will make the adaptive mechanism quite complex. That
2073f49584bSSeongJae Park * said, too huge unmapped areas inside the monitoring target should be removed
2083f49584bSSeongJae Park * to not take the time for the adaptive mechanism.
2093f49584bSSeongJae Park *
2103f49584bSSeongJae Park * For the reason, we convert the complex mappings to three distinct regions
2113f49584bSSeongJae Park * that cover every mapped area of the address space. Also the two gaps
2123f49584bSSeongJae Park * between the three regions are the two biggest unmapped areas in the given
2133f49584bSSeongJae Park * address space. In detail, this function first identifies the start and the
2143f49584bSSeongJae Park * end of the mappings and the two biggest unmapped areas of the address space.
2153f49584bSSeongJae Park * Then, it constructs the three regions as below:
2163f49584bSSeongJae Park *
2173f49584bSSeongJae Park * [mappings[0]->start, big_two_unmapped_areas[0]->start)
2183f49584bSSeongJae Park * [big_two_unmapped_areas[0]->end, big_two_unmapped_areas[1]->start)
2193f49584bSSeongJae Park * [big_two_unmapped_areas[1]->end, mappings[nr_mappings - 1]->end)
2203f49584bSSeongJae Park *
2213f49584bSSeongJae Park * As usual memory map of processes is as below, the gap between the heap and
2223f49584bSSeongJae Park * the uppermost mmap()-ed region, and the gap between the lowermost mmap()-ed
2233f49584bSSeongJae Park * region and the stack will be two biggest unmapped regions. Because these
2243f49584bSSeongJae Park * gaps are exceptionally huge areas in usual address space, excluding these
2253f49584bSSeongJae Park * two biggest unmapped regions will be sufficient to make a trade-off.
2263f49584bSSeongJae Park *
2273f49584bSSeongJae Park * <heap>
2283f49584bSSeongJae Park * <BIG UNMAPPED REGION 1>
2293f49584bSSeongJae Park * <uppermost mmap()-ed region>
2303f49584bSSeongJae Park * (other mmap()-ed regions and small unmapped regions)
2313f49584bSSeongJae Park * <lowermost mmap()-ed region>
2323f49584bSSeongJae Park * <BIG UNMAPPED REGION 2>
2333f49584bSSeongJae Park * <stack>
2343f49584bSSeongJae Park */
__damon_va_init_regions(struct damon_ctx * ctx,struct damon_target * t)2353f49584bSSeongJae Park static void __damon_va_init_regions(struct damon_ctx *ctx,
2363f49584bSSeongJae Park struct damon_target *t)
2373f49584bSSeongJae Park {
238962fe7a6SSeongJae Park struct damon_target *ti;
2393f49584bSSeongJae Park struct damon_region *r;
2403f49584bSSeongJae Park struct damon_addr_range regions[3];
2413f49584bSSeongJae Park unsigned long sz = 0, nr_pieces;
242962fe7a6SSeongJae Park int i, tidx = 0;
2433f49584bSSeongJae Park
2443f49584bSSeongJae Park if (damon_va_three_regions(t, regions)) {
245962fe7a6SSeongJae Park damon_for_each_target(ti, ctx) {
246962fe7a6SSeongJae Park if (ti == t)
247962fe7a6SSeongJae Park break;
248962fe7a6SSeongJae Park tidx++;
249962fe7a6SSeongJae Park }
250962fe7a6SSeongJae Park pr_debug("Failed to get three regions of %dth target\n", tidx);
2513f49584bSSeongJae Park return;
2523f49584bSSeongJae Park }
2533f49584bSSeongJae Park
2543f49584bSSeongJae Park for (i = 0; i < 3; i++)
2553f49584bSSeongJae Park sz += regions[i].end - regions[i].start;
256cbeaa77bSSeongJae Park if (ctx->attrs.min_nr_regions)
257cbeaa77bSSeongJae Park sz /= ctx->attrs.min_nr_regions;
2583f49584bSSeongJae Park if (sz < DAMON_MIN_REGION)
2593f49584bSSeongJae Park sz = DAMON_MIN_REGION;
2603f49584bSSeongJae Park
2613f49584bSSeongJae Park /* Set the initial three regions of the target */
2623f49584bSSeongJae Park for (i = 0; i < 3; i++) {
2633f49584bSSeongJae Park r = damon_new_region(regions[i].start, regions[i].end);
2643f49584bSSeongJae Park if (!r) {
2653f49584bSSeongJae Park pr_err("%d'th init region creation failed\n", i);
2663f49584bSSeongJae Park return;
2673f49584bSSeongJae Park }
2683f49584bSSeongJae Park damon_add_region(r, t);
2693f49584bSSeongJae Park
2703f49584bSSeongJae Park nr_pieces = (regions[i].end - regions[i].start) / sz;
2713f49584bSSeongJae Park damon_va_evenly_split_region(t, r, nr_pieces);
2723f49584bSSeongJae Park }
2733f49584bSSeongJae Park }
2743f49584bSSeongJae Park
2753f49584bSSeongJae Park /* Initialize '->regions_list' of every target (task) */
damon_va_init(struct damon_ctx * ctx)276cdeed009SXin Hao static void damon_va_init(struct damon_ctx *ctx)
2773f49584bSSeongJae Park {
2783f49584bSSeongJae Park struct damon_target *t;
2793f49584bSSeongJae Park
2803f49584bSSeongJae Park damon_for_each_target(t, ctx) {
2813f49584bSSeongJae Park /* the user may set the target regions as they want */
2823f49584bSSeongJae Park if (!damon_nr_regions(t))
2833f49584bSSeongJae Park __damon_va_init_regions(ctx, t);
2843f49584bSSeongJae Park }
2853f49584bSSeongJae Park }
2863f49584bSSeongJae Park
2873f49584bSSeongJae Park /*
2883f49584bSSeongJae Park * Update regions for current memory mappings
2893f49584bSSeongJae Park */
damon_va_update(struct damon_ctx * ctx)290cdeed009SXin Hao static void damon_va_update(struct damon_ctx *ctx)
2913f49584bSSeongJae Park {
2923f49584bSSeongJae Park struct damon_addr_range three_regions[3];
2933f49584bSSeongJae Park struct damon_target *t;
2943f49584bSSeongJae Park
2953f49584bSSeongJae Park damon_for_each_target(t, ctx) {
2963f49584bSSeongJae Park if (damon_va_three_regions(t, three_regions))
2973f49584bSSeongJae Park continue;
298dae0087aSSeongJae Park damon_set_regions(t, three_regions, 3);
2993f49584bSSeongJae Park }
3003f49584bSSeongJae Park }
3013f49584bSSeongJae Park
damon_mkold_pmd_entry(pmd_t * pmd,unsigned long addr,unsigned long next,struct mm_walk * walk)3023f49584bSSeongJae Park static int damon_mkold_pmd_entry(pmd_t *pmd, unsigned long addr,
3033f49584bSSeongJae Park unsigned long next, struct mm_walk *walk)
3043f49584bSSeongJae Park {
3053f49584bSSeongJae Park pte_t *pte;
306e7ee3f97SLevi Yun pmd_t pmde;
3073f49584bSSeongJae Park spinlock_t *ptl;
3083f49584bSSeongJae Park
309e7ee3f97SLevi Yun if (pmd_trans_huge(pmdp_get(pmd))) {
3103f49584bSSeongJae Park ptl = pmd_lock(walk->mm, pmd);
311e7ee3f97SLevi Yun pmde = pmdp_get(pmd);
312e7ee3f97SLevi Yun
313e7ee3f97SLevi Yun if (!pmd_present(pmde)) {
314c8b9aff4SBaolin Wang spin_unlock(ptl);
315c8b9aff4SBaolin Wang return 0;
316c8b9aff4SBaolin Wang }
317c8b9aff4SBaolin Wang
318e7ee3f97SLevi Yun if (pmd_trans_huge(pmde)) {
319c11d34faSRyan Roberts damon_pmdp_mkold(pmd, walk->vma, addr);
3203f49584bSSeongJae Park spin_unlock(ptl);
3213f49584bSSeongJae Park return 0;
3223f49584bSSeongJae Park }
3233f49584bSSeongJae Park spin_unlock(ptl);
3243f49584bSSeongJae Park }
3253f49584bSSeongJae Park
3263f49584bSSeongJae Park pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
3277780d040SHugh Dickins if (!pte) {
3287780d040SHugh Dickins walk->action = ACTION_AGAIN;
3297780d040SHugh Dickins return 0;
3307780d040SHugh Dickins }
331c33c7948SRyan Roberts if (!pte_present(ptep_get(pte)))
3323f49584bSSeongJae Park goto out;
333c11d34faSRyan Roberts damon_ptep_mkold(pte, walk->vma, addr);
3343f49584bSSeongJae Park out:
3353f49584bSSeongJae Park pte_unmap_unlock(pte, ptl);
3363f49584bSSeongJae Park return 0;
3373f49584bSSeongJae Park }
3383f49584bSSeongJae Park
33949f4203aSBaolin Wang #ifdef CONFIG_HUGETLB_PAGE
damon_hugetlb_mkold(pte_t * pte,struct mm_struct * mm,struct vm_area_struct * vma,unsigned long addr)34049f4203aSBaolin Wang static void damon_hugetlb_mkold(pte_t *pte, struct mm_struct *mm,
34149f4203aSBaolin Wang struct vm_area_struct *vma, unsigned long addr)
34249f4203aSBaolin Wang {
34349f4203aSBaolin Wang bool referenced = false;
34449f4203aSBaolin Wang pte_t entry = huge_ptep_get(pte);
3456b7cea90SKefeng Wang struct folio *folio = pfn_folio(pte_pfn(entry));
346935d4f0cSRyan Roberts unsigned long psize = huge_page_size(hstate_vma(vma));
34749f4203aSBaolin Wang
3486b7cea90SKefeng Wang folio_get(folio);
34949f4203aSBaolin Wang
35049f4203aSBaolin Wang if (pte_young(entry)) {
35149f4203aSBaolin Wang referenced = true;
35249f4203aSBaolin Wang entry = pte_mkold(entry);
353935d4f0cSRyan Roberts set_huge_pte_at(mm, addr, pte, entry, psize);
35449f4203aSBaolin Wang }
35549f4203aSBaolin Wang
35649f4203aSBaolin Wang #ifdef CONFIG_MMU_NOTIFIER
35749f4203aSBaolin Wang if (mmu_notifier_clear_young(mm, addr,
35849f4203aSBaolin Wang addr + huge_page_size(hstate_vma(vma))))
35949f4203aSBaolin Wang referenced = true;
36049f4203aSBaolin Wang #endif /* CONFIG_MMU_NOTIFIER */
36149f4203aSBaolin Wang
36249f4203aSBaolin Wang if (referenced)
3636b7cea90SKefeng Wang folio_set_young(folio);
36449f4203aSBaolin Wang
3656b7cea90SKefeng Wang folio_set_idle(folio);
3666b7cea90SKefeng Wang folio_put(folio);
36749f4203aSBaolin Wang }
36849f4203aSBaolin Wang
damon_mkold_hugetlb_entry(pte_t * pte,unsigned long hmask,unsigned long addr,unsigned long end,struct mm_walk * walk)36949f4203aSBaolin Wang static int damon_mkold_hugetlb_entry(pte_t *pte, unsigned long hmask,
37049f4203aSBaolin Wang unsigned long addr, unsigned long end,
37149f4203aSBaolin Wang struct mm_walk *walk)
37249f4203aSBaolin Wang {
37349f4203aSBaolin Wang struct hstate *h = hstate_vma(walk->vma);
37449f4203aSBaolin Wang spinlock_t *ptl;
37549f4203aSBaolin Wang pte_t entry;
37649f4203aSBaolin Wang
37749f4203aSBaolin Wang ptl = huge_pte_lock(h, walk->mm, pte);
37849f4203aSBaolin Wang entry = huge_ptep_get(pte);
37949f4203aSBaolin Wang if (!pte_present(entry))
38049f4203aSBaolin Wang goto out;
38149f4203aSBaolin Wang
38249f4203aSBaolin Wang damon_hugetlb_mkold(pte, walk->mm, walk->vma, addr);
38349f4203aSBaolin Wang
38449f4203aSBaolin Wang out:
38549f4203aSBaolin Wang spin_unlock(ptl);
38649f4203aSBaolin Wang return 0;
38749f4203aSBaolin Wang }
38849f4203aSBaolin Wang #else
38949f4203aSBaolin Wang #define damon_mkold_hugetlb_entry NULL
39049f4203aSBaolin Wang #endif /* CONFIG_HUGETLB_PAGE */
39149f4203aSBaolin Wang
392199b50f4SRikard Falkeborn static const struct mm_walk_ops damon_mkold_ops = {
3933f49584bSSeongJae Park .pmd_entry = damon_mkold_pmd_entry,
39449f4203aSBaolin Wang .hugetlb_entry = damon_mkold_hugetlb_entry,
39549b06385SSuren Baghdasaryan .walk_lock = PGWALK_RDLOCK,
3963f49584bSSeongJae Park };
3973f49584bSSeongJae Park
damon_va_mkold(struct mm_struct * mm,unsigned long addr)3983f49584bSSeongJae Park static void damon_va_mkold(struct mm_struct *mm, unsigned long addr)
3993f49584bSSeongJae Park {
4003f49584bSSeongJae Park mmap_read_lock(mm);
4013f49584bSSeongJae Park walk_page_range(mm, addr, addr + 1, &damon_mkold_ops, NULL);
4023f49584bSSeongJae Park mmap_read_unlock(mm);
4033f49584bSSeongJae Park }
4043f49584bSSeongJae Park
4053f49584bSSeongJae Park /*
4063f49584bSSeongJae Park * Functions for the access checking of the regions
4073f49584bSSeongJae Park */
4083f49584bSSeongJae Park
__damon_va_prepare_access_check(struct mm_struct * mm,struct damon_region * r)4098ef4d5caSKaixu Xia static void __damon_va_prepare_access_check(struct mm_struct *mm,
4108ef4d5caSKaixu Xia struct damon_region *r)
4113f49584bSSeongJae Park {
4123f49584bSSeongJae Park r->sampling_addr = damon_rand(r->ar.start, r->ar.end);
4133f49584bSSeongJae Park
4143f49584bSSeongJae Park damon_va_mkold(mm, r->sampling_addr);
4153f49584bSSeongJae Park }
4163f49584bSSeongJae Park
damon_va_prepare_access_checks(struct damon_ctx * ctx)417cdeed009SXin Hao static void damon_va_prepare_access_checks(struct damon_ctx *ctx)
4183f49584bSSeongJae Park {
4193f49584bSSeongJae Park struct damon_target *t;
4203f49584bSSeongJae Park struct mm_struct *mm;
4213f49584bSSeongJae Park struct damon_region *r;
4223f49584bSSeongJae Park
4233f49584bSSeongJae Park damon_for_each_target(t, ctx) {
4243f49584bSSeongJae Park mm = damon_get_mm(t);
4253f49584bSSeongJae Park if (!mm)
4263f49584bSSeongJae Park continue;
4273f49584bSSeongJae Park damon_for_each_region(r, t)
4288ef4d5caSKaixu Xia __damon_va_prepare_access_check(mm, r);
4293f49584bSSeongJae Park mmput(mm);
4303f49584bSSeongJae Park }
4313f49584bSSeongJae Park }
4323f49584bSSeongJae Park
4333f49584bSSeongJae Park struct damon_young_walk_private {
434fc8c7d23SSeongJae Park /* size of the folio for the access checked virtual memory address */
435fc8c7d23SSeongJae Park unsigned long *folio_sz;
4363f49584bSSeongJae Park bool young;
4373f49584bSSeongJae Park };
4383f49584bSSeongJae Park
damon_young_pmd_entry(pmd_t * pmd,unsigned long addr,unsigned long next,struct mm_walk * walk)4393f49584bSSeongJae Park static int damon_young_pmd_entry(pmd_t *pmd, unsigned long addr,
4403f49584bSSeongJae Park unsigned long next, struct mm_walk *walk)
4413f49584bSSeongJae Park {
4423f49584bSSeongJae Park pte_t *pte;
443c33c7948SRyan Roberts pte_t ptent;
4443f49584bSSeongJae Park spinlock_t *ptl;
445dc1b7866SKefeng Wang struct folio *folio;
4463f49584bSSeongJae Park struct damon_young_walk_private *priv = walk->private;
4473f49584bSSeongJae Park
4483f49584bSSeongJae Park #ifdef CONFIG_TRANSPARENT_HUGEPAGE
449e7ee3f97SLevi Yun if (pmd_trans_huge(pmdp_get(pmd))) {
450e7ee3f97SLevi Yun pmd_t pmde;
451e7ee3f97SLevi Yun
4523f49584bSSeongJae Park ptl = pmd_lock(walk->mm, pmd);
453e7ee3f97SLevi Yun pmde = pmdp_get(pmd);
454e7ee3f97SLevi Yun
455e7ee3f97SLevi Yun if (!pmd_present(pmde)) {
456c8b9aff4SBaolin Wang spin_unlock(ptl);
457c8b9aff4SBaolin Wang return 0;
458c8b9aff4SBaolin Wang }
459c8b9aff4SBaolin Wang
460e7ee3f97SLevi Yun if (!pmd_trans_huge(pmde)) {
4613f49584bSSeongJae Park spin_unlock(ptl);
4623f49584bSSeongJae Park goto regular_page;
4633f49584bSSeongJae Park }
464e7ee3f97SLevi Yun folio = damon_get_folio(pmd_pfn(pmde));
465dc1b7866SKefeng Wang if (!folio)
4663f49584bSSeongJae Park goto huge_out;
467e7ee3f97SLevi Yun if (pmd_young(pmde) || !folio_test_idle(folio) ||
4683f49584bSSeongJae Park mmu_notifier_test_young(walk->mm,
4697477d756SSeongJae Park addr))
4703f49584bSSeongJae Park priv->young = true;
4717477d756SSeongJae Park *priv->folio_sz = HPAGE_PMD_SIZE;
472dc1b7866SKefeng Wang folio_put(folio);
4733f49584bSSeongJae Park huge_out:
4743f49584bSSeongJae Park spin_unlock(ptl);
4753f49584bSSeongJae Park return 0;
4763f49584bSSeongJae Park }
4773f49584bSSeongJae Park
4783f49584bSSeongJae Park regular_page:
4793f49584bSSeongJae Park #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
4803f49584bSSeongJae Park
4813f49584bSSeongJae Park pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
4827780d040SHugh Dickins if (!pte) {
4837780d040SHugh Dickins walk->action = ACTION_AGAIN;
4847780d040SHugh Dickins return 0;
4857780d040SHugh Dickins }
486c33c7948SRyan Roberts ptent = ptep_get(pte);
487c33c7948SRyan Roberts if (!pte_present(ptent))
4883f49584bSSeongJae Park goto out;
489c33c7948SRyan Roberts folio = damon_get_folio(pte_pfn(ptent));
490dc1b7866SKefeng Wang if (!folio)
4913f49584bSSeongJae Park goto out;
492c33c7948SRyan Roberts if (pte_young(ptent) || !folio_test_idle(folio) ||
4937477d756SSeongJae Park mmu_notifier_test_young(walk->mm, addr))
4943f49584bSSeongJae Park priv->young = true;
4957477d756SSeongJae Park *priv->folio_sz = folio_size(folio);
496dc1b7866SKefeng Wang folio_put(folio);
4973f49584bSSeongJae Park out:
4983f49584bSSeongJae Park pte_unmap_unlock(pte, ptl);
4993f49584bSSeongJae Park return 0;
5003f49584bSSeongJae Park }
5013f49584bSSeongJae Park
50249f4203aSBaolin Wang #ifdef CONFIG_HUGETLB_PAGE
damon_young_hugetlb_entry(pte_t * pte,unsigned long hmask,unsigned long addr,unsigned long end,struct mm_walk * walk)50349f4203aSBaolin Wang static int damon_young_hugetlb_entry(pte_t *pte, unsigned long hmask,
50449f4203aSBaolin Wang unsigned long addr, unsigned long end,
50549f4203aSBaolin Wang struct mm_walk *walk)
50649f4203aSBaolin Wang {
50749f4203aSBaolin Wang struct damon_young_walk_private *priv = walk->private;
50849f4203aSBaolin Wang struct hstate *h = hstate_vma(walk->vma);
5096b7cea90SKefeng Wang struct folio *folio;
51049f4203aSBaolin Wang spinlock_t *ptl;
51149f4203aSBaolin Wang pte_t entry;
51249f4203aSBaolin Wang
51349f4203aSBaolin Wang ptl = huge_pte_lock(h, walk->mm, pte);
51449f4203aSBaolin Wang entry = huge_ptep_get(pte);
51549f4203aSBaolin Wang if (!pte_present(entry))
51649f4203aSBaolin Wang goto out;
51749f4203aSBaolin Wang
5186b7cea90SKefeng Wang folio = pfn_folio(pte_pfn(entry));
5196b7cea90SKefeng Wang folio_get(folio);
52049f4203aSBaolin Wang
5216b7cea90SKefeng Wang if (pte_young(entry) || !folio_test_idle(folio) ||
5227477d756SSeongJae Park mmu_notifier_test_young(walk->mm, addr))
52349f4203aSBaolin Wang priv->young = true;
5247477d756SSeongJae Park *priv->folio_sz = huge_page_size(h);
52549f4203aSBaolin Wang
5266b7cea90SKefeng Wang folio_put(folio);
52749f4203aSBaolin Wang
52849f4203aSBaolin Wang out:
52949f4203aSBaolin Wang spin_unlock(ptl);
53049f4203aSBaolin Wang return 0;
53149f4203aSBaolin Wang }
53249f4203aSBaolin Wang #else
53349f4203aSBaolin Wang #define damon_young_hugetlb_entry NULL
53449f4203aSBaolin Wang #endif /* CONFIG_HUGETLB_PAGE */
53549f4203aSBaolin Wang
536199b50f4SRikard Falkeborn static const struct mm_walk_ops damon_young_ops = {
5373f49584bSSeongJae Park .pmd_entry = damon_young_pmd_entry,
53849f4203aSBaolin Wang .hugetlb_entry = damon_young_hugetlb_entry,
53949b06385SSuren Baghdasaryan .walk_lock = PGWALK_RDLOCK,
5403f49584bSSeongJae Park };
5413f49584bSSeongJae Park
damon_va_young(struct mm_struct * mm,unsigned long addr,unsigned long * folio_sz)5423f49584bSSeongJae Park static bool damon_va_young(struct mm_struct *mm, unsigned long addr,
543fc8c7d23SSeongJae Park unsigned long *folio_sz)
5443f49584bSSeongJae Park {
5453f49584bSSeongJae Park struct damon_young_walk_private arg = {
546fc8c7d23SSeongJae Park .folio_sz = folio_sz,
5473f49584bSSeongJae Park .young = false,
5483f49584bSSeongJae Park };
5493f49584bSSeongJae Park
5503f49584bSSeongJae Park mmap_read_lock(mm);
5513f49584bSSeongJae Park walk_page_range(mm, addr, addr + 1, &damon_young_ops, &arg);
5523f49584bSSeongJae Park mmap_read_unlock(mm);
5533f49584bSSeongJae Park return arg.young;
5543f49584bSSeongJae Park }
5553f49584bSSeongJae Park
5563f49584bSSeongJae Park /*
5573f49584bSSeongJae Park * Check whether the region was accessed after the last preparation
5583f49584bSSeongJae Park *
5593f49584bSSeongJae Park * mm 'mm_struct' for the given virtual address space
5603f49584bSSeongJae Park * r the region to be checked
5613f49584bSSeongJae Park */
__damon_va_check_access(struct mm_struct * mm,struct damon_region * r,bool same_target)56209876ae7SKaixu Xia static void __damon_va_check_access(struct mm_struct *mm,
56395cd2522SKaixu Xia struct damon_region *r, bool same_target)
5643f49584bSSeongJae Park {
5653f49584bSSeongJae Park static unsigned long last_addr;
566fc8c7d23SSeongJae Park static unsigned long last_folio_sz = PAGE_SIZE;
5673f49584bSSeongJae Park static bool last_accessed;
5683f49584bSSeongJae Park
5693f49584bSSeongJae Park /* If the region is in the last checked page, reuse the result */
570fc8c7d23SSeongJae Park if (same_target && (ALIGN_DOWN(last_addr, last_folio_sz) ==
571fc8c7d23SSeongJae Park ALIGN_DOWN(r->sampling_addr, last_folio_sz))) {
5723f49584bSSeongJae Park if (last_accessed)
5733f49584bSSeongJae Park r->nr_accesses++;
5743f49584bSSeongJae Park return;
5753f49584bSSeongJae Park }
5763f49584bSSeongJae Park
577fc8c7d23SSeongJae Park last_accessed = damon_va_young(mm, r->sampling_addr, &last_folio_sz);
5783f49584bSSeongJae Park if (last_accessed)
5793f49584bSSeongJae Park r->nr_accesses++;
5803f49584bSSeongJae Park
5813f49584bSSeongJae Park last_addr = r->sampling_addr;
5823f49584bSSeongJae Park }
5833f49584bSSeongJae Park
damon_va_check_accesses(struct damon_ctx * ctx)584cdeed009SXin Hao static unsigned int damon_va_check_accesses(struct damon_ctx *ctx)
5853f49584bSSeongJae Park {
5863f49584bSSeongJae Park struct damon_target *t;
5873f49584bSSeongJae Park struct mm_struct *mm;
5883f49584bSSeongJae Park struct damon_region *r;
5893f49584bSSeongJae Park unsigned int max_nr_accesses = 0;
59095cd2522SKaixu Xia bool same_target;
5913f49584bSSeongJae Park
5923f49584bSSeongJae Park damon_for_each_target(t, ctx) {
5933f49584bSSeongJae Park mm = damon_get_mm(t);
5943f49584bSSeongJae Park if (!mm)
5953f49584bSSeongJae Park continue;
59695cd2522SKaixu Xia same_target = false;
5973f49584bSSeongJae Park damon_for_each_region(r, t) {
59895cd2522SKaixu Xia __damon_va_check_access(mm, r, same_target);
5993f49584bSSeongJae Park max_nr_accesses = max(r->nr_accesses, max_nr_accesses);
60095cd2522SKaixu Xia same_target = true;
6013f49584bSSeongJae Park }
6023f49584bSSeongJae Park mmput(mm);
6033f49584bSSeongJae Park }
6043f49584bSSeongJae Park
6053f49584bSSeongJae Park return max_nr_accesses;
6063f49584bSSeongJae Park }
6073f49584bSSeongJae Park
6083f49584bSSeongJae Park /*
6093f49584bSSeongJae Park * Functions for the target validity check and cleanup
6103f49584bSSeongJae Park */
6113f49584bSSeongJae Park
damon_va_target_valid(struct damon_target * t)61216bc1b0fSKaixu Xia static bool damon_va_target_valid(struct damon_target *t)
6133f49584bSSeongJae Park {
6143f49584bSSeongJae Park struct task_struct *task;
6153f49584bSSeongJae Park
6163f49584bSSeongJae Park task = damon_get_task_struct(t);
6173f49584bSSeongJae Park if (task) {
6183f49584bSSeongJae Park put_task_struct(task);
6193f49584bSSeongJae Park return true;
6203f49584bSSeongJae Park }
6213f49584bSSeongJae Park
6223f49584bSSeongJae Park return false;
6233f49584bSSeongJae Park }
6243f49584bSSeongJae Park
6256dea8addSSeongJae Park #ifndef CONFIG_ADVISE_SYSCALLS
damos_madvise(struct damon_target * target,struct damon_region * r,int behavior)6260e92c2eeSSeongJae Park static unsigned long damos_madvise(struct damon_target *target,
6270e92c2eeSSeongJae Park struct damon_region *r, int behavior)
6286dea8addSSeongJae Park {
6290e92c2eeSSeongJae Park return 0;
6306dea8addSSeongJae Park }
6316dea8addSSeongJae Park #else
damos_madvise(struct damon_target * target,struct damon_region * r,int behavior)6320e92c2eeSSeongJae Park static unsigned long damos_madvise(struct damon_target *target,
6330e92c2eeSSeongJae Park struct damon_region *r, int behavior)
6346dea8addSSeongJae Park {
6356dea8addSSeongJae Park struct mm_struct *mm;
6360e92c2eeSSeongJae Park unsigned long start = PAGE_ALIGN(r->ar.start);
637ab63f63fSXin Hao unsigned long len = PAGE_ALIGN(damon_sz_region(r));
6380e92c2eeSSeongJae Park unsigned long applied;
6396dea8addSSeongJae Park
6406dea8addSSeongJae Park mm = damon_get_mm(target);
6416dea8addSSeongJae Park if (!mm)
6420e92c2eeSSeongJae Park return 0;
6436dea8addSSeongJae Park
6440e92c2eeSSeongJae Park applied = do_madvise(mm, start, len, behavior) ? 0 : len;
6456dea8addSSeongJae Park mmput(mm);
6460e92c2eeSSeongJae Park
6470e92c2eeSSeongJae Park return applied;
6486dea8addSSeongJae Park }
6496dea8addSSeongJae Park #endif /* CONFIG_ADVISE_SYSCALLS */
6506dea8addSSeongJae Park
damon_va_apply_scheme(struct damon_ctx * ctx,struct damon_target * t,struct damon_region * r,struct damos * scheme)6510e92c2eeSSeongJae Park static unsigned long damon_va_apply_scheme(struct damon_ctx *ctx,
6520e92c2eeSSeongJae Park struct damon_target *t, struct damon_region *r,
6530e92c2eeSSeongJae Park struct damos *scheme)
6546dea8addSSeongJae Park {
6556dea8addSSeongJae Park int madv_action;
6566dea8addSSeongJae Park
6576dea8addSSeongJae Park switch (scheme->action) {
6586dea8addSSeongJae Park case DAMOS_WILLNEED:
6596dea8addSSeongJae Park madv_action = MADV_WILLNEED;
6606dea8addSSeongJae Park break;
6616dea8addSSeongJae Park case DAMOS_COLD:
6626dea8addSSeongJae Park madv_action = MADV_COLD;
6636dea8addSSeongJae Park break;
6646dea8addSSeongJae Park case DAMOS_PAGEOUT:
6656dea8addSSeongJae Park madv_action = MADV_PAGEOUT;
6666dea8addSSeongJae Park break;
6676dea8addSSeongJae Park case DAMOS_HUGEPAGE:
6686dea8addSSeongJae Park madv_action = MADV_HUGEPAGE;
6696dea8addSSeongJae Park break;
6706dea8addSSeongJae Park case DAMOS_NOHUGEPAGE:
6716dea8addSSeongJae Park madv_action = MADV_NOHUGEPAGE;
6726dea8addSSeongJae Park break;
6732f0b548cSSeongJae Park case DAMOS_STAT:
6742f0b548cSSeongJae Park return 0;
6756dea8addSSeongJae Park default:
6765934ec13SKaixu Xia /*
6775934ec13SKaixu Xia * DAMOS actions that are not yet supported by 'vaddr'.
6785934ec13SKaixu Xia */
6790e92c2eeSSeongJae Park return 0;
6806dea8addSSeongJae Park }
6816dea8addSSeongJae Park
6826dea8addSSeongJae Park return damos_madvise(t, r, madv_action);
6836dea8addSSeongJae Park }
6846dea8addSSeongJae Park
damon_va_scheme_score(struct damon_ctx * context,struct damon_target * t,struct damon_region * r,struct damos * scheme)685cdeed009SXin Hao static int damon_va_scheme_score(struct damon_ctx *context,
686cdeed009SXin Hao struct damon_target *t, struct damon_region *r,
687cdeed009SXin Hao struct damos *scheme)
688198f0f4cSSeongJae Park {
689198f0f4cSSeongJae Park
690198f0f4cSSeongJae Park switch (scheme->action) {
691198f0f4cSSeongJae Park case DAMOS_PAGEOUT:
692e3e486e6SKaixu Xia return damon_cold_score(context, r, scheme);
693198f0f4cSSeongJae Park default:
694198f0f4cSSeongJae Park break;
695198f0f4cSSeongJae Park }
696198f0f4cSSeongJae Park
697198f0f4cSSeongJae Park return DAMOS_MAX_SCORE;
698198f0f4cSSeongJae Park }
699198f0f4cSSeongJae Park
damon_va_initcall(void)7007752925fSSeongJae Park static int __init damon_va_initcall(void)
7017752925fSSeongJae Park {
7027752925fSSeongJae Park struct damon_operations ops = {
7037752925fSSeongJae Park .id = DAMON_OPS_VADDR,
7047752925fSSeongJae Park .init = damon_va_init,
7057752925fSSeongJae Park .update = damon_va_update,
7067752925fSSeongJae Park .prepare_access_checks = damon_va_prepare_access_checks,
7077752925fSSeongJae Park .check_accesses = damon_va_check_accesses,
7087752925fSSeongJae Park .reset_aggregated = NULL,
7097752925fSSeongJae Park .target_valid = damon_va_target_valid,
7107752925fSSeongJae Park .cleanup = NULL,
7117752925fSSeongJae Park .apply_scheme = damon_va_apply_scheme,
7127752925fSSeongJae Park .get_scheme_score = damon_va_scheme_score,
7137752925fSSeongJae Park };
714de6d0154SSeongJae Park /* ops for fixed virtual address ranges */
715de6d0154SSeongJae Park struct damon_operations ops_fvaddr = ops;
716de6d0154SSeongJae Park int err;
7177752925fSSeongJae Park
718de6d0154SSeongJae Park /* Don't set the monitoring target regions for the entire mapping */
719de6d0154SSeongJae Park ops_fvaddr.id = DAMON_OPS_FVADDR;
720de6d0154SSeongJae Park ops_fvaddr.init = NULL;
721de6d0154SSeongJae Park ops_fvaddr.update = NULL;
722de6d0154SSeongJae Park
723de6d0154SSeongJae Park err = damon_register_ops(&ops);
724de6d0154SSeongJae Park if (err)
725de6d0154SSeongJae Park return err;
726de6d0154SSeongJae Park return damon_register_ops(&ops_fvaddr);
7277752925fSSeongJae Park };
7287752925fSSeongJae Park
7297752925fSSeongJae Park subsys_initcall(damon_va_initcall);
7307752925fSSeongJae Park
73117ccae8bSSeongJae Park #include "vaddr-test.h"
732