13f49584bSSeongJae Park // SPDX-License-Identifier: GPL-2.0 23f49584bSSeongJae Park /* 33f49584bSSeongJae Park * DAMON Primitives for Virtual Address Spaces 43f49584bSSeongJae Park * 53f49584bSSeongJae Park * Author: SeongJae Park <sjpark@amazon.de> 63f49584bSSeongJae Park */ 73f49584bSSeongJae Park 83f49584bSSeongJae Park #define pr_fmt(fmt) "damon-va: " fmt 93f49584bSSeongJae Park 106dea8addSSeongJae Park #include <asm-generic/mman-common.h> 113f49584bSSeongJae Park #include <linux/highmem.h> 1246c3a0acSSeongJae Park #include <linux/hugetlb.h> 1346c3a0acSSeongJae Park #include <linux/mmu_notifier.h> 143f49584bSSeongJae Park #include <linux/page_idle.h> 153f49584bSSeongJae Park #include <linux/pagewalk.h> 168581fd40SJakub Kicinski #include <linux/sched/mm.h> 1746c3a0acSSeongJae Park 18f7d911c3SSeongJae Park #include "ops-common.h" 193f49584bSSeongJae Park 2017ccae8bSSeongJae Park #ifdef CONFIG_DAMON_VADDR_KUNIT_TEST 2117ccae8bSSeongJae Park #undef DAMON_MIN_REGION 2217ccae8bSSeongJae Park #define DAMON_MIN_REGION 1 2317ccae8bSSeongJae Park #endif 2417ccae8bSSeongJae Park 253f49584bSSeongJae Park /* 261971bd63SSeongJae Park * 't->pid' should be the pointer to the relevant 'struct pid' having reference 273f49584bSSeongJae Park * count. Caller must put the returned task, unless it is NULL. 283f49584bSSeongJae Park */ 2988f86dcfSSeongJae Park static inline struct task_struct *damon_get_task_struct(struct damon_target *t) 3088f86dcfSSeongJae Park { 311971bd63SSeongJae Park return get_pid_task(t->pid, PIDTYPE_PID); 3288f86dcfSSeongJae Park } 333f49584bSSeongJae Park 343f49584bSSeongJae Park /* 353f49584bSSeongJae Park * Get the mm_struct of the given target 363f49584bSSeongJae Park * 373f49584bSSeongJae Park * Caller _must_ put the mm_struct after use, unless it is NULL. 383f49584bSSeongJae Park * 393f49584bSSeongJae Park * Returns the mm_struct of the target on success, NULL on failure 403f49584bSSeongJae Park */ 413f49584bSSeongJae Park static struct mm_struct *damon_get_mm(struct damon_target *t) 423f49584bSSeongJae Park { 433f49584bSSeongJae Park struct task_struct *task; 443f49584bSSeongJae Park struct mm_struct *mm; 453f49584bSSeongJae Park 463f49584bSSeongJae Park task = damon_get_task_struct(t); 473f49584bSSeongJae Park if (!task) 483f49584bSSeongJae Park return NULL; 493f49584bSSeongJae Park 503f49584bSSeongJae Park mm = get_task_mm(task); 513f49584bSSeongJae Park put_task_struct(task); 523f49584bSSeongJae Park return mm; 533f49584bSSeongJae Park } 543f49584bSSeongJae Park 553f49584bSSeongJae Park /* 563f49584bSSeongJae Park * Functions for the initial monitoring target regions construction 573f49584bSSeongJae Park */ 583f49584bSSeongJae Park 593f49584bSSeongJae Park /* 603f49584bSSeongJae Park * Size-evenly split a region into 'nr_pieces' small regions 613f49584bSSeongJae Park * 623f49584bSSeongJae Park * Returns 0 on success, or negative error code otherwise. 633f49584bSSeongJae Park */ 643f49584bSSeongJae Park static int damon_va_evenly_split_region(struct damon_target *t, 653f49584bSSeongJae Park struct damon_region *r, unsigned int nr_pieces) 663f49584bSSeongJae Park { 673f49584bSSeongJae Park unsigned long sz_orig, sz_piece, orig_end; 683f49584bSSeongJae Park struct damon_region *n = NULL, *next; 693f49584bSSeongJae Park unsigned long start; 703f49584bSSeongJae Park 713f49584bSSeongJae Park if (!r || !nr_pieces) 723f49584bSSeongJae Park return -EINVAL; 733f49584bSSeongJae Park 743f49584bSSeongJae Park orig_end = r->ar.end; 75ab63f63fSXin Hao sz_orig = damon_sz_region(r); 763f49584bSSeongJae Park sz_piece = ALIGN_DOWN(sz_orig / nr_pieces, DAMON_MIN_REGION); 773f49584bSSeongJae Park 783f49584bSSeongJae Park if (!sz_piece) 793f49584bSSeongJae Park return -EINVAL; 803f49584bSSeongJae Park 813f49584bSSeongJae Park r->ar.end = r->ar.start + sz_piece; 823f49584bSSeongJae Park next = damon_next_region(r); 833f49584bSSeongJae Park for (start = r->ar.end; start + sz_piece <= orig_end; 843f49584bSSeongJae Park start += sz_piece) { 853f49584bSSeongJae Park n = damon_new_region(start, start + sz_piece); 863f49584bSSeongJae Park if (!n) 873f49584bSSeongJae Park return -ENOMEM; 883f49584bSSeongJae Park damon_insert_region(n, r, next, t); 893f49584bSSeongJae Park r = n; 903f49584bSSeongJae Park } 913f49584bSSeongJae Park /* complement last region for possible rounding error */ 923f49584bSSeongJae Park if (n) 933f49584bSSeongJae Park n->ar.end = orig_end; 943f49584bSSeongJae Park 953f49584bSSeongJae Park return 0; 963f49584bSSeongJae Park } 973f49584bSSeongJae Park 983f49584bSSeongJae Park static unsigned long sz_range(struct damon_addr_range *r) 993f49584bSSeongJae Park { 1003f49584bSSeongJae Park return r->end - r->start; 1013f49584bSSeongJae Park } 1023f49584bSSeongJae Park 1033f49584bSSeongJae Park /* 1043f49584bSSeongJae Park * Find three regions separated by two biggest unmapped regions 1053f49584bSSeongJae Park * 1063f49584bSSeongJae Park * vma the head vma of the target address space 1073f49584bSSeongJae Park * regions an array of three address ranges that results will be saved 1083f49584bSSeongJae Park * 1093f49584bSSeongJae Park * This function receives an address space and finds three regions in it which 1103f49584bSSeongJae Park * separated by the two biggest unmapped regions in the space. Please refer to 1113f49584bSSeongJae Park * below comments of '__damon_va_init_regions()' function to know why this is 1123f49584bSSeongJae Park * necessary. 1133f49584bSSeongJae Park * 1143f49584bSSeongJae Park * Returns 0 if success, or negative error code otherwise. 1153f49584bSSeongJae Park */ 116d0cf3dd4SLiam R. Howlett static int __damon_va_three_regions(struct mm_struct *mm, 1173f49584bSSeongJae Park struct damon_addr_range regions[3]) 1183f49584bSSeongJae Park { 119d0cf3dd4SLiam R. Howlett struct damon_addr_range first_gap = {0}, second_gap = {0}; 120d0cf3dd4SLiam R. Howlett VMA_ITERATOR(vmi, mm, 0); 121d0cf3dd4SLiam R. Howlett struct vm_area_struct *vma, *prev = NULL; 122d0cf3dd4SLiam R. Howlett unsigned long start; 1233f49584bSSeongJae Park 124d0cf3dd4SLiam R. Howlett /* 125d0cf3dd4SLiam R. Howlett * Find the two biggest gaps so that first_gap > second_gap > others. 126d0cf3dd4SLiam R. Howlett * If this is too slow, it can be optimised to examine the maple 127d0cf3dd4SLiam R. Howlett * tree gaps. 128d0cf3dd4SLiam R. Howlett */ 129d0cf3dd4SLiam R. Howlett for_each_vma(vmi, vma) { 130d0cf3dd4SLiam R. Howlett unsigned long gap; 131d0cf3dd4SLiam R. Howlett 132d0cf3dd4SLiam R. Howlett if (!prev) { 1333f49584bSSeongJae Park start = vma->vm_start; 1343f49584bSSeongJae Park goto next; 1353f49584bSSeongJae Park } 136d0cf3dd4SLiam R. Howlett gap = vma->vm_start - prev->vm_end; 1373f49584bSSeongJae Park 138d0cf3dd4SLiam R. Howlett if (gap > sz_range(&first_gap)) { 139d0cf3dd4SLiam R. Howlett second_gap = first_gap; 140d0cf3dd4SLiam R. Howlett first_gap.start = prev->vm_end; 141d0cf3dd4SLiam R. Howlett first_gap.end = vma->vm_start; 142d0cf3dd4SLiam R. Howlett } else if (gap > sz_range(&second_gap)) { 143d0cf3dd4SLiam R. Howlett second_gap.start = prev->vm_end; 144d0cf3dd4SLiam R. Howlett second_gap.end = vma->vm_start; 1453f49584bSSeongJae Park } 1463f49584bSSeongJae Park next: 147d0cf3dd4SLiam R. Howlett prev = vma; 1483f49584bSSeongJae Park } 1493f49584bSSeongJae Park 1503f49584bSSeongJae Park if (!sz_range(&second_gap) || !sz_range(&first_gap)) 1513f49584bSSeongJae Park return -EINVAL; 1523f49584bSSeongJae Park 1533f49584bSSeongJae Park /* Sort the two biggest gaps by address */ 1543f49584bSSeongJae Park if (first_gap.start > second_gap.start) 1558bd0b9daSYihao Han swap(first_gap, second_gap); 1563f49584bSSeongJae Park 1573f49584bSSeongJae Park /* Store the result */ 1583f49584bSSeongJae Park regions[0].start = ALIGN(start, DAMON_MIN_REGION); 1593f49584bSSeongJae Park regions[0].end = ALIGN(first_gap.start, DAMON_MIN_REGION); 1603f49584bSSeongJae Park regions[1].start = ALIGN(first_gap.end, DAMON_MIN_REGION); 1613f49584bSSeongJae Park regions[1].end = ALIGN(second_gap.start, DAMON_MIN_REGION); 1623f49584bSSeongJae Park regions[2].start = ALIGN(second_gap.end, DAMON_MIN_REGION); 163d0cf3dd4SLiam R. Howlett regions[2].end = ALIGN(prev->vm_end, DAMON_MIN_REGION); 1643f49584bSSeongJae Park 1653f49584bSSeongJae Park return 0; 1663f49584bSSeongJae Park } 1673f49584bSSeongJae Park 1683f49584bSSeongJae Park /* 1693f49584bSSeongJae Park * Get the three regions in the given target (task) 1703f49584bSSeongJae Park * 1713f49584bSSeongJae Park * Returns 0 on success, negative error code otherwise. 1723f49584bSSeongJae Park */ 1733f49584bSSeongJae Park static int damon_va_three_regions(struct damon_target *t, 1743f49584bSSeongJae Park struct damon_addr_range regions[3]) 1753f49584bSSeongJae Park { 1763f49584bSSeongJae Park struct mm_struct *mm; 1773f49584bSSeongJae Park int rc; 1783f49584bSSeongJae Park 1793f49584bSSeongJae Park mm = damon_get_mm(t); 1803f49584bSSeongJae Park if (!mm) 1813f49584bSSeongJae Park return -EINVAL; 1823f49584bSSeongJae Park 1833f49584bSSeongJae Park mmap_read_lock(mm); 184d0cf3dd4SLiam R. Howlett rc = __damon_va_three_regions(mm, regions); 1853f49584bSSeongJae Park mmap_read_unlock(mm); 1863f49584bSSeongJae Park 1873f49584bSSeongJae Park mmput(mm); 1883f49584bSSeongJae Park return rc; 1893f49584bSSeongJae Park } 1903f49584bSSeongJae Park 1913f49584bSSeongJae Park /* 1923f49584bSSeongJae Park * Initialize the monitoring target regions for the given target (task) 1933f49584bSSeongJae Park * 1943f49584bSSeongJae Park * t the given target 1953f49584bSSeongJae Park * 1963f49584bSSeongJae Park * Because only a number of small portions of the entire address space 1973f49584bSSeongJae Park * is actually mapped to the memory and accessed, monitoring the unmapped 1983f49584bSSeongJae Park * regions is wasteful. That said, because we can deal with small noises, 1993f49584bSSeongJae Park * tracking every mapping is not strictly required but could even incur a high 2003f49584bSSeongJae Park * overhead if the mapping frequently changes or the number of mappings is 2013f49584bSSeongJae Park * high. The adaptive regions adjustment mechanism will further help to deal 2023f49584bSSeongJae Park * with the noise by simply identifying the unmapped areas as a region that 2033f49584bSSeongJae Park * has no access. Moreover, applying the real mappings that would have many 2043f49584bSSeongJae Park * unmapped areas inside will make the adaptive mechanism quite complex. That 2053f49584bSSeongJae Park * said, too huge unmapped areas inside the monitoring target should be removed 2063f49584bSSeongJae Park * to not take the time for the adaptive mechanism. 2073f49584bSSeongJae Park * 2083f49584bSSeongJae Park * For the reason, we convert the complex mappings to three distinct regions 2093f49584bSSeongJae Park * that cover every mapped area of the address space. Also the two gaps 2103f49584bSSeongJae Park * between the three regions are the two biggest unmapped areas in the given 2113f49584bSSeongJae Park * address space. In detail, this function first identifies the start and the 2123f49584bSSeongJae Park * end of the mappings and the two biggest unmapped areas of the address space. 2133f49584bSSeongJae Park * Then, it constructs the three regions as below: 2143f49584bSSeongJae Park * 2153f49584bSSeongJae Park * [mappings[0]->start, big_two_unmapped_areas[0]->start) 2163f49584bSSeongJae Park * [big_two_unmapped_areas[0]->end, big_two_unmapped_areas[1]->start) 2173f49584bSSeongJae Park * [big_two_unmapped_areas[1]->end, mappings[nr_mappings - 1]->end) 2183f49584bSSeongJae Park * 2193f49584bSSeongJae Park * As usual memory map of processes is as below, the gap between the heap and 2203f49584bSSeongJae Park * the uppermost mmap()-ed region, and the gap between the lowermost mmap()-ed 2213f49584bSSeongJae Park * region and the stack will be two biggest unmapped regions. Because these 2223f49584bSSeongJae Park * gaps are exceptionally huge areas in usual address space, excluding these 2233f49584bSSeongJae Park * two biggest unmapped regions will be sufficient to make a trade-off. 2243f49584bSSeongJae Park * 2253f49584bSSeongJae Park * <heap> 2263f49584bSSeongJae Park * <BIG UNMAPPED REGION 1> 2273f49584bSSeongJae Park * <uppermost mmap()-ed region> 2283f49584bSSeongJae Park * (other mmap()-ed regions and small unmapped regions) 2293f49584bSSeongJae Park * <lowermost mmap()-ed region> 2303f49584bSSeongJae Park * <BIG UNMAPPED REGION 2> 2313f49584bSSeongJae Park * <stack> 2323f49584bSSeongJae Park */ 2333f49584bSSeongJae Park static void __damon_va_init_regions(struct damon_ctx *ctx, 2343f49584bSSeongJae Park struct damon_target *t) 2353f49584bSSeongJae Park { 236962fe7a6SSeongJae Park struct damon_target *ti; 2373f49584bSSeongJae Park struct damon_region *r; 2383f49584bSSeongJae Park struct damon_addr_range regions[3]; 2393f49584bSSeongJae Park unsigned long sz = 0, nr_pieces; 240962fe7a6SSeongJae Park int i, tidx = 0; 2413f49584bSSeongJae Park 2423f49584bSSeongJae Park if (damon_va_three_regions(t, regions)) { 243962fe7a6SSeongJae Park damon_for_each_target(ti, ctx) { 244962fe7a6SSeongJae Park if (ti == t) 245962fe7a6SSeongJae Park break; 246962fe7a6SSeongJae Park tidx++; 247962fe7a6SSeongJae Park } 248962fe7a6SSeongJae Park pr_debug("Failed to get three regions of %dth target\n", tidx); 2493f49584bSSeongJae Park return; 2503f49584bSSeongJae Park } 2513f49584bSSeongJae Park 2523f49584bSSeongJae Park for (i = 0; i < 3; i++) 2533f49584bSSeongJae Park sz += regions[i].end - regions[i].start; 254cbeaa77bSSeongJae Park if (ctx->attrs.min_nr_regions) 255cbeaa77bSSeongJae Park sz /= ctx->attrs.min_nr_regions; 2563f49584bSSeongJae Park if (sz < DAMON_MIN_REGION) 2573f49584bSSeongJae Park sz = DAMON_MIN_REGION; 2583f49584bSSeongJae Park 2593f49584bSSeongJae Park /* Set the initial three regions of the target */ 2603f49584bSSeongJae Park for (i = 0; i < 3; i++) { 2613f49584bSSeongJae Park r = damon_new_region(regions[i].start, regions[i].end); 2623f49584bSSeongJae Park if (!r) { 2633f49584bSSeongJae Park pr_err("%d'th init region creation failed\n", i); 2643f49584bSSeongJae Park return; 2653f49584bSSeongJae Park } 2663f49584bSSeongJae Park damon_add_region(r, t); 2673f49584bSSeongJae Park 2683f49584bSSeongJae Park nr_pieces = (regions[i].end - regions[i].start) / sz; 2693f49584bSSeongJae Park damon_va_evenly_split_region(t, r, nr_pieces); 2703f49584bSSeongJae Park } 2713f49584bSSeongJae Park } 2723f49584bSSeongJae Park 2733f49584bSSeongJae Park /* Initialize '->regions_list' of every target (task) */ 274cdeed009SXin Hao static void damon_va_init(struct damon_ctx *ctx) 2753f49584bSSeongJae Park { 2763f49584bSSeongJae Park struct damon_target *t; 2773f49584bSSeongJae Park 2783f49584bSSeongJae Park damon_for_each_target(t, ctx) { 2793f49584bSSeongJae Park /* the user may set the target regions as they want */ 2803f49584bSSeongJae Park if (!damon_nr_regions(t)) 2813f49584bSSeongJae Park __damon_va_init_regions(ctx, t); 2823f49584bSSeongJae Park } 2833f49584bSSeongJae Park } 2843f49584bSSeongJae Park 2853f49584bSSeongJae Park /* 2863f49584bSSeongJae Park * Update regions for current memory mappings 2873f49584bSSeongJae Park */ 288cdeed009SXin Hao static void damon_va_update(struct damon_ctx *ctx) 2893f49584bSSeongJae Park { 2903f49584bSSeongJae Park struct damon_addr_range three_regions[3]; 2913f49584bSSeongJae Park struct damon_target *t; 2923f49584bSSeongJae Park 2933f49584bSSeongJae Park damon_for_each_target(t, ctx) { 2943f49584bSSeongJae Park if (damon_va_three_regions(t, three_regions)) 2953f49584bSSeongJae Park continue; 296dae0087aSSeongJae Park damon_set_regions(t, three_regions, 3); 2973f49584bSSeongJae Park } 2983f49584bSSeongJae Park } 2993f49584bSSeongJae Park 3003f49584bSSeongJae Park static int damon_mkold_pmd_entry(pmd_t *pmd, unsigned long addr, 3013f49584bSSeongJae Park unsigned long next, struct mm_walk *walk) 3023f49584bSSeongJae Park { 3033f49584bSSeongJae Park pte_t *pte; 3043f49584bSSeongJae Park spinlock_t *ptl; 3053f49584bSSeongJae Park 30672c33ef4SBaolin Wang if (pmd_trans_huge(*pmd)) { 3073f49584bSSeongJae Park ptl = pmd_lock(walk->mm, pmd); 308c8b9aff4SBaolin Wang if (!pmd_present(*pmd)) { 309c8b9aff4SBaolin Wang spin_unlock(ptl); 310c8b9aff4SBaolin Wang return 0; 311c8b9aff4SBaolin Wang } 312c8b9aff4SBaolin Wang 31372c33ef4SBaolin Wang if (pmd_trans_huge(*pmd)) { 3143f49584bSSeongJae Park damon_pmdp_mkold(pmd, walk->mm, addr); 3153f49584bSSeongJae Park spin_unlock(ptl); 3163f49584bSSeongJae Park return 0; 3173f49584bSSeongJae Park } 3183f49584bSSeongJae Park spin_unlock(ptl); 3193f49584bSSeongJae Park } 3203f49584bSSeongJae Park 3213f49584bSSeongJae Park if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) 3223f49584bSSeongJae Park return 0; 3233f49584bSSeongJae Park pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); 3243f49584bSSeongJae Park if (!pte_present(*pte)) 3253f49584bSSeongJae Park goto out; 3263f49584bSSeongJae Park damon_ptep_mkold(pte, walk->mm, addr); 3273f49584bSSeongJae Park out: 3283f49584bSSeongJae Park pte_unmap_unlock(pte, ptl); 3293f49584bSSeongJae Park return 0; 3303f49584bSSeongJae Park } 3313f49584bSSeongJae Park 33249f4203aSBaolin Wang #ifdef CONFIG_HUGETLB_PAGE 33349f4203aSBaolin Wang static void damon_hugetlb_mkold(pte_t *pte, struct mm_struct *mm, 33449f4203aSBaolin Wang struct vm_area_struct *vma, unsigned long addr) 33549f4203aSBaolin Wang { 33649f4203aSBaolin Wang bool referenced = false; 33749f4203aSBaolin Wang pte_t entry = huge_ptep_get(pte); 338*6b7cea90SKefeng Wang struct folio *folio = pfn_folio(pte_pfn(entry)); 33949f4203aSBaolin Wang 340*6b7cea90SKefeng Wang folio_get(folio); 34149f4203aSBaolin Wang 34249f4203aSBaolin Wang if (pte_young(entry)) { 34349f4203aSBaolin Wang referenced = true; 34449f4203aSBaolin Wang entry = pte_mkold(entry); 345ed1523a8SBaolin Wang set_huge_pte_at(mm, addr, pte, entry); 34649f4203aSBaolin Wang } 34749f4203aSBaolin Wang 34849f4203aSBaolin Wang #ifdef CONFIG_MMU_NOTIFIER 34949f4203aSBaolin Wang if (mmu_notifier_clear_young(mm, addr, 35049f4203aSBaolin Wang addr + huge_page_size(hstate_vma(vma)))) 35149f4203aSBaolin Wang referenced = true; 35249f4203aSBaolin Wang #endif /* CONFIG_MMU_NOTIFIER */ 35349f4203aSBaolin Wang 35449f4203aSBaolin Wang if (referenced) 355*6b7cea90SKefeng Wang folio_set_young(folio); 35649f4203aSBaolin Wang 357*6b7cea90SKefeng Wang folio_set_idle(folio); 358*6b7cea90SKefeng Wang folio_put(folio); 35949f4203aSBaolin Wang } 36049f4203aSBaolin Wang 36149f4203aSBaolin Wang static int damon_mkold_hugetlb_entry(pte_t *pte, unsigned long hmask, 36249f4203aSBaolin Wang unsigned long addr, unsigned long end, 36349f4203aSBaolin Wang struct mm_walk *walk) 36449f4203aSBaolin Wang { 36549f4203aSBaolin Wang struct hstate *h = hstate_vma(walk->vma); 36649f4203aSBaolin Wang spinlock_t *ptl; 36749f4203aSBaolin Wang pte_t entry; 36849f4203aSBaolin Wang 36949f4203aSBaolin Wang ptl = huge_pte_lock(h, walk->mm, pte); 37049f4203aSBaolin Wang entry = huge_ptep_get(pte); 37149f4203aSBaolin Wang if (!pte_present(entry)) 37249f4203aSBaolin Wang goto out; 37349f4203aSBaolin Wang 37449f4203aSBaolin Wang damon_hugetlb_mkold(pte, walk->mm, walk->vma, addr); 37549f4203aSBaolin Wang 37649f4203aSBaolin Wang out: 37749f4203aSBaolin Wang spin_unlock(ptl); 37849f4203aSBaolin Wang return 0; 37949f4203aSBaolin Wang } 38049f4203aSBaolin Wang #else 38149f4203aSBaolin Wang #define damon_mkold_hugetlb_entry NULL 38249f4203aSBaolin Wang #endif /* CONFIG_HUGETLB_PAGE */ 38349f4203aSBaolin Wang 384199b50f4SRikard Falkeborn static const struct mm_walk_ops damon_mkold_ops = { 3853f49584bSSeongJae Park .pmd_entry = damon_mkold_pmd_entry, 38649f4203aSBaolin Wang .hugetlb_entry = damon_mkold_hugetlb_entry, 3873f49584bSSeongJae Park }; 3883f49584bSSeongJae Park 3893f49584bSSeongJae Park static void damon_va_mkold(struct mm_struct *mm, unsigned long addr) 3903f49584bSSeongJae Park { 3913f49584bSSeongJae Park mmap_read_lock(mm); 3923f49584bSSeongJae Park walk_page_range(mm, addr, addr + 1, &damon_mkold_ops, NULL); 3933f49584bSSeongJae Park mmap_read_unlock(mm); 3943f49584bSSeongJae Park } 3953f49584bSSeongJae Park 3963f49584bSSeongJae Park /* 3973f49584bSSeongJae Park * Functions for the access checking of the regions 3983f49584bSSeongJae Park */ 3993f49584bSSeongJae Park 4008ef4d5caSKaixu Xia static void __damon_va_prepare_access_check(struct mm_struct *mm, 4018ef4d5caSKaixu Xia struct damon_region *r) 4023f49584bSSeongJae Park { 4033f49584bSSeongJae Park r->sampling_addr = damon_rand(r->ar.start, r->ar.end); 4043f49584bSSeongJae Park 4053f49584bSSeongJae Park damon_va_mkold(mm, r->sampling_addr); 4063f49584bSSeongJae Park } 4073f49584bSSeongJae Park 408cdeed009SXin Hao static void damon_va_prepare_access_checks(struct damon_ctx *ctx) 4093f49584bSSeongJae Park { 4103f49584bSSeongJae Park struct damon_target *t; 4113f49584bSSeongJae Park struct mm_struct *mm; 4123f49584bSSeongJae Park struct damon_region *r; 4133f49584bSSeongJae Park 4143f49584bSSeongJae Park damon_for_each_target(t, ctx) { 4153f49584bSSeongJae Park mm = damon_get_mm(t); 4163f49584bSSeongJae Park if (!mm) 4173f49584bSSeongJae Park continue; 4183f49584bSSeongJae Park damon_for_each_region(r, t) 4198ef4d5caSKaixu Xia __damon_va_prepare_access_check(mm, r); 4203f49584bSSeongJae Park mmput(mm); 4213f49584bSSeongJae Park } 4223f49584bSSeongJae Park } 4233f49584bSSeongJae Park 4243f49584bSSeongJae Park struct damon_young_walk_private { 4253f49584bSSeongJae Park unsigned long *page_sz; 4263f49584bSSeongJae Park bool young; 4273f49584bSSeongJae Park }; 4283f49584bSSeongJae Park 4293f49584bSSeongJae Park static int damon_young_pmd_entry(pmd_t *pmd, unsigned long addr, 4303f49584bSSeongJae Park unsigned long next, struct mm_walk *walk) 4313f49584bSSeongJae Park { 4323f49584bSSeongJae Park pte_t *pte; 4333f49584bSSeongJae Park spinlock_t *ptl; 434dc1b7866SKefeng Wang struct folio *folio; 4353f49584bSSeongJae Park struct damon_young_walk_private *priv = walk->private; 4363f49584bSSeongJae Park 4373f49584bSSeongJae Park #ifdef CONFIG_TRANSPARENT_HUGEPAGE 43872c33ef4SBaolin Wang if (pmd_trans_huge(*pmd)) { 4393f49584bSSeongJae Park ptl = pmd_lock(walk->mm, pmd); 440c8b9aff4SBaolin Wang if (!pmd_present(*pmd)) { 441c8b9aff4SBaolin Wang spin_unlock(ptl); 442c8b9aff4SBaolin Wang return 0; 443c8b9aff4SBaolin Wang } 444c8b9aff4SBaolin Wang 44572c33ef4SBaolin Wang if (!pmd_trans_huge(*pmd)) { 4463f49584bSSeongJae Park spin_unlock(ptl); 4473f49584bSSeongJae Park goto regular_page; 4483f49584bSSeongJae Park } 449dc1b7866SKefeng Wang folio = damon_get_folio(pmd_pfn(*pmd)); 450dc1b7866SKefeng Wang if (!folio) 4513f49584bSSeongJae Park goto huge_out; 452dc1b7866SKefeng Wang if (pmd_young(*pmd) || !folio_test_idle(folio) || 4533f49584bSSeongJae Park mmu_notifier_test_young(walk->mm, 4543f49584bSSeongJae Park addr)) { 45502e34fffSKefeng Wang *priv->page_sz = HPAGE_PMD_SIZE; 4563f49584bSSeongJae Park priv->young = true; 4573f49584bSSeongJae Park } 458dc1b7866SKefeng Wang folio_put(folio); 4593f49584bSSeongJae Park huge_out: 4603f49584bSSeongJae Park spin_unlock(ptl); 4613f49584bSSeongJae Park return 0; 4623f49584bSSeongJae Park } 4633f49584bSSeongJae Park 4643f49584bSSeongJae Park regular_page: 4653f49584bSSeongJae Park #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 4663f49584bSSeongJae Park 4673f49584bSSeongJae Park if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) 4683f49584bSSeongJae Park return -EINVAL; 4693f49584bSSeongJae Park pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); 4703f49584bSSeongJae Park if (!pte_present(*pte)) 4713f49584bSSeongJae Park goto out; 472dc1b7866SKefeng Wang folio = damon_get_folio(pte_pfn(*pte)); 473dc1b7866SKefeng Wang if (!folio) 4743f49584bSSeongJae Park goto out; 475dc1b7866SKefeng Wang if (pte_young(*pte) || !folio_test_idle(folio) || 4763f49584bSSeongJae Park mmu_notifier_test_young(walk->mm, addr)) { 4773f49584bSSeongJae Park *priv->page_sz = PAGE_SIZE; 4783f49584bSSeongJae Park priv->young = true; 4793f49584bSSeongJae Park } 480dc1b7866SKefeng Wang folio_put(folio); 4813f49584bSSeongJae Park out: 4823f49584bSSeongJae Park pte_unmap_unlock(pte, ptl); 4833f49584bSSeongJae Park return 0; 4843f49584bSSeongJae Park } 4853f49584bSSeongJae Park 48649f4203aSBaolin Wang #ifdef CONFIG_HUGETLB_PAGE 48749f4203aSBaolin Wang static int damon_young_hugetlb_entry(pte_t *pte, unsigned long hmask, 48849f4203aSBaolin Wang unsigned long addr, unsigned long end, 48949f4203aSBaolin Wang struct mm_walk *walk) 49049f4203aSBaolin Wang { 49149f4203aSBaolin Wang struct damon_young_walk_private *priv = walk->private; 49249f4203aSBaolin Wang struct hstate *h = hstate_vma(walk->vma); 493*6b7cea90SKefeng Wang struct folio *folio; 49449f4203aSBaolin Wang spinlock_t *ptl; 49549f4203aSBaolin Wang pte_t entry; 49649f4203aSBaolin Wang 49749f4203aSBaolin Wang ptl = huge_pte_lock(h, walk->mm, pte); 49849f4203aSBaolin Wang entry = huge_ptep_get(pte); 49949f4203aSBaolin Wang if (!pte_present(entry)) 50049f4203aSBaolin Wang goto out; 50149f4203aSBaolin Wang 502*6b7cea90SKefeng Wang folio = pfn_folio(pte_pfn(entry)); 503*6b7cea90SKefeng Wang folio_get(folio); 50449f4203aSBaolin Wang 505*6b7cea90SKefeng Wang if (pte_young(entry) || !folio_test_idle(folio) || 50649f4203aSBaolin Wang mmu_notifier_test_young(walk->mm, addr)) { 50749f4203aSBaolin Wang *priv->page_sz = huge_page_size(h); 50849f4203aSBaolin Wang priv->young = true; 50949f4203aSBaolin Wang } 51049f4203aSBaolin Wang 511*6b7cea90SKefeng Wang folio_put(folio); 51249f4203aSBaolin Wang 51349f4203aSBaolin Wang out: 51449f4203aSBaolin Wang spin_unlock(ptl); 51549f4203aSBaolin Wang return 0; 51649f4203aSBaolin Wang } 51749f4203aSBaolin Wang #else 51849f4203aSBaolin Wang #define damon_young_hugetlb_entry NULL 51949f4203aSBaolin Wang #endif /* CONFIG_HUGETLB_PAGE */ 52049f4203aSBaolin Wang 521199b50f4SRikard Falkeborn static const struct mm_walk_ops damon_young_ops = { 5223f49584bSSeongJae Park .pmd_entry = damon_young_pmd_entry, 52349f4203aSBaolin Wang .hugetlb_entry = damon_young_hugetlb_entry, 5243f49584bSSeongJae Park }; 5253f49584bSSeongJae Park 5263f49584bSSeongJae Park static bool damon_va_young(struct mm_struct *mm, unsigned long addr, 5273f49584bSSeongJae Park unsigned long *page_sz) 5283f49584bSSeongJae Park { 5293f49584bSSeongJae Park struct damon_young_walk_private arg = { 5303f49584bSSeongJae Park .page_sz = page_sz, 5313f49584bSSeongJae Park .young = false, 5323f49584bSSeongJae Park }; 5333f49584bSSeongJae Park 5343f49584bSSeongJae Park mmap_read_lock(mm); 5353f49584bSSeongJae Park walk_page_range(mm, addr, addr + 1, &damon_young_ops, &arg); 5363f49584bSSeongJae Park mmap_read_unlock(mm); 5373f49584bSSeongJae Park return arg.young; 5383f49584bSSeongJae Park } 5393f49584bSSeongJae Park 5403f49584bSSeongJae Park /* 5413f49584bSSeongJae Park * Check whether the region was accessed after the last preparation 5423f49584bSSeongJae Park * 5433f49584bSSeongJae Park * mm 'mm_struct' for the given virtual address space 5443f49584bSSeongJae Park * r the region to be checked 5453f49584bSSeongJae Park */ 54609876ae7SKaixu Xia static void __damon_va_check_access(struct mm_struct *mm, 54795cd2522SKaixu Xia struct damon_region *r, bool same_target) 5483f49584bSSeongJae Park { 5493f49584bSSeongJae Park static unsigned long last_addr; 5503f49584bSSeongJae Park static unsigned long last_page_sz = PAGE_SIZE; 5513f49584bSSeongJae Park static bool last_accessed; 5523f49584bSSeongJae Park 5533f49584bSSeongJae Park /* If the region is in the last checked page, reuse the result */ 55495cd2522SKaixu Xia if (same_target && (ALIGN_DOWN(last_addr, last_page_sz) == 5553f49584bSSeongJae Park ALIGN_DOWN(r->sampling_addr, last_page_sz))) { 5563f49584bSSeongJae Park if (last_accessed) 5573f49584bSSeongJae Park r->nr_accesses++; 5583f49584bSSeongJae Park return; 5593f49584bSSeongJae Park } 5603f49584bSSeongJae Park 5613f49584bSSeongJae Park last_accessed = damon_va_young(mm, r->sampling_addr, &last_page_sz); 5623f49584bSSeongJae Park if (last_accessed) 5633f49584bSSeongJae Park r->nr_accesses++; 5643f49584bSSeongJae Park 5653f49584bSSeongJae Park last_addr = r->sampling_addr; 5663f49584bSSeongJae Park } 5673f49584bSSeongJae Park 568cdeed009SXin Hao static unsigned int damon_va_check_accesses(struct damon_ctx *ctx) 5693f49584bSSeongJae Park { 5703f49584bSSeongJae Park struct damon_target *t; 5713f49584bSSeongJae Park struct mm_struct *mm; 5723f49584bSSeongJae Park struct damon_region *r; 5733f49584bSSeongJae Park unsigned int max_nr_accesses = 0; 57495cd2522SKaixu Xia bool same_target; 5753f49584bSSeongJae Park 5763f49584bSSeongJae Park damon_for_each_target(t, ctx) { 5773f49584bSSeongJae Park mm = damon_get_mm(t); 5783f49584bSSeongJae Park if (!mm) 5793f49584bSSeongJae Park continue; 58095cd2522SKaixu Xia same_target = false; 5813f49584bSSeongJae Park damon_for_each_region(r, t) { 58295cd2522SKaixu Xia __damon_va_check_access(mm, r, same_target); 5833f49584bSSeongJae Park max_nr_accesses = max(r->nr_accesses, max_nr_accesses); 58495cd2522SKaixu Xia same_target = true; 5853f49584bSSeongJae Park } 5863f49584bSSeongJae Park mmput(mm); 5873f49584bSSeongJae Park } 5883f49584bSSeongJae Park 5893f49584bSSeongJae Park return max_nr_accesses; 5903f49584bSSeongJae Park } 5913f49584bSSeongJae Park 5923f49584bSSeongJae Park /* 5933f49584bSSeongJae Park * Functions for the target validity check and cleanup 5943f49584bSSeongJae Park */ 5953f49584bSSeongJae Park 59616bc1b0fSKaixu Xia static bool damon_va_target_valid(struct damon_target *t) 5973f49584bSSeongJae Park { 5983f49584bSSeongJae Park struct task_struct *task; 5993f49584bSSeongJae Park 6003f49584bSSeongJae Park task = damon_get_task_struct(t); 6013f49584bSSeongJae Park if (task) { 6023f49584bSSeongJae Park put_task_struct(task); 6033f49584bSSeongJae Park return true; 6043f49584bSSeongJae Park } 6053f49584bSSeongJae Park 6063f49584bSSeongJae Park return false; 6073f49584bSSeongJae Park } 6083f49584bSSeongJae Park 6096dea8addSSeongJae Park #ifndef CONFIG_ADVISE_SYSCALLS 6100e92c2eeSSeongJae Park static unsigned long damos_madvise(struct damon_target *target, 6110e92c2eeSSeongJae Park struct damon_region *r, int behavior) 6126dea8addSSeongJae Park { 6130e92c2eeSSeongJae Park return 0; 6146dea8addSSeongJae Park } 6156dea8addSSeongJae Park #else 6160e92c2eeSSeongJae Park static unsigned long damos_madvise(struct damon_target *target, 6170e92c2eeSSeongJae Park struct damon_region *r, int behavior) 6186dea8addSSeongJae Park { 6196dea8addSSeongJae Park struct mm_struct *mm; 6200e92c2eeSSeongJae Park unsigned long start = PAGE_ALIGN(r->ar.start); 621ab63f63fSXin Hao unsigned long len = PAGE_ALIGN(damon_sz_region(r)); 6220e92c2eeSSeongJae Park unsigned long applied; 6236dea8addSSeongJae Park 6246dea8addSSeongJae Park mm = damon_get_mm(target); 6256dea8addSSeongJae Park if (!mm) 6260e92c2eeSSeongJae Park return 0; 6276dea8addSSeongJae Park 6280e92c2eeSSeongJae Park applied = do_madvise(mm, start, len, behavior) ? 0 : len; 6296dea8addSSeongJae Park mmput(mm); 6300e92c2eeSSeongJae Park 6310e92c2eeSSeongJae Park return applied; 6326dea8addSSeongJae Park } 6336dea8addSSeongJae Park #endif /* CONFIG_ADVISE_SYSCALLS */ 6346dea8addSSeongJae Park 6350e92c2eeSSeongJae Park static unsigned long damon_va_apply_scheme(struct damon_ctx *ctx, 6360e92c2eeSSeongJae Park struct damon_target *t, struct damon_region *r, 6370e92c2eeSSeongJae Park struct damos *scheme) 6386dea8addSSeongJae Park { 6396dea8addSSeongJae Park int madv_action; 6406dea8addSSeongJae Park 6416dea8addSSeongJae Park switch (scheme->action) { 6426dea8addSSeongJae Park case DAMOS_WILLNEED: 6436dea8addSSeongJae Park madv_action = MADV_WILLNEED; 6446dea8addSSeongJae Park break; 6456dea8addSSeongJae Park case DAMOS_COLD: 6466dea8addSSeongJae Park madv_action = MADV_COLD; 6476dea8addSSeongJae Park break; 6486dea8addSSeongJae Park case DAMOS_PAGEOUT: 6496dea8addSSeongJae Park madv_action = MADV_PAGEOUT; 6506dea8addSSeongJae Park break; 6516dea8addSSeongJae Park case DAMOS_HUGEPAGE: 6526dea8addSSeongJae Park madv_action = MADV_HUGEPAGE; 6536dea8addSSeongJae Park break; 6546dea8addSSeongJae Park case DAMOS_NOHUGEPAGE: 6556dea8addSSeongJae Park madv_action = MADV_NOHUGEPAGE; 6566dea8addSSeongJae Park break; 6572f0b548cSSeongJae Park case DAMOS_STAT: 6582f0b548cSSeongJae Park return 0; 6596dea8addSSeongJae Park default: 6605934ec13SKaixu Xia /* 6615934ec13SKaixu Xia * DAMOS actions that are not yet supported by 'vaddr'. 6625934ec13SKaixu Xia */ 6630e92c2eeSSeongJae Park return 0; 6646dea8addSSeongJae Park } 6656dea8addSSeongJae Park 6666dea8addSSeongJae Park return damos_madvise(t, r, madv_action); 6676dea8addSSeongJae Park } 6686dea8addSSeongJae Park 669cdeed009SXin Hao static int damon_va_scheme_score(struct damon_ctx *context, 670cdeed009SXin Hao struct damon_target *t, struct damon_region *r, 671cdeed009SXin Hao struct damos *scheme) 672198f0f4cSSeongJae Park { 673198f0f4cSSeongJae Park 674198f0f4cSSeongJae Park switch (scheme->action) { 675198f0f4cSSeongJae Park case DAMOS_PAGEOUT: 676e3e486e6SKaixu Xia return damon_cold_score(context, r, scheme); 677198f0f4cSSeongJae Park default: 678198f0f4cSSeongJae Park break; 679198f0f4cSSeongJae Park } 680198f0f4cSSeongJae Park 681198f0f4cSSeongJae Park return DAMOS_MAX_SCORE; 682198f0f4cSSeongJae Park } 683198f0f4cSSeongJae Park 6847752925fSSeongJae Park static int __init damon_va_initcall(void) 6857752925fSSeongJae Park { 6867752925fSSeongJae Park struct damon_operations ops = { 6877752925fSSeongJae Park .id = DAMON_OPS_VADDR, 6887752925fSSeongJae Park .init = damon_va_init, 6897752925fSSeongJae Park .update = damon_va_update, 6907752925fSSeongJae Park .prepare_access_checks = damon_va_prepare_access_checks, 6917752925fSSeongJae Park .check_accesses = damon_va_check_accesses, 6927752925fSSeongJae Park .reset_aggregated = NULL, 6937752925fSSeongJae Park .target_valid = damon_va_target_valid, 6947752925fSSeongJae Park .cleanup = NULL, 6957752925fSSeongJae Park .apply_scheme = damon_va_apply_scheme, 6967752925fSSeongJae Park .get_scheme_score = damon_va_scheme_score, 6977752925fSSeongJae Park }; 698de6d0154SSeongJae Park /* ops for fixed virtual address ranges */ 699de6d0154SSeongJae Park struct damon_operations ops_fvaddr = ops; 700de6d0154SSeongJae Park int err; 7017752925fSSeongJae Park 702de6d0154SSeongJae Park /* Don't set the monitoring target regions for the entire mapping */ 703de6d0154SSeongJae Park ops_fvaddr.id = DAMON_OPS_FVADDR; 704de6d0154SSeongJae Park ops_fvaddr.init = NULL; 705de6d0154SSeongJae Park ops_fvaddr.update = NULL; 706de6d0154SSeongJae Park 707de6d0154SSeongJae Park err = damon_register_ops(&ops); 708de6d0154SSeongJae Park if (err) 709de6d0154SSeongJae Park return err; 710de6d0154SSeongJae Park return damon_register_ops(&ops_fvaddr); 7117752925fSSeongJae Park }; 7127752925fSSeongJae Park 7137752925fSSeongJae Park subsys_initcall(damon_va_initcall); 7147752925fSSeongJae Park 71517ccae8bSSeongJae Park #include "vaddr-test.h" 716