13f49584bSSeongJae Park // SPDX-License-Identifier: GPL-2.0 23f49584bSSeongJae Park /* 33f49584bSSeongJae Park * DAMON Primitives for Virtual Address Spaces 43f49584bSSeongJae Park * 53f49584bSSeongJae Park * Author: SeongJae Park <sjpark@amazon.de> 63f49584bSSeongJae Park */ 73f49584bSSeongJae Park 83f49584bSSeongJae Park #define pr_fmt(fmt) "damon-va: " fmt 93f49584bSSeongJae Park 106dea8addSSeongJae Park #include <asm-generic/mman-common.h> 113f49584bSSeongJae Park #include <linux/damon.h> 123f49584bSSeongJae Park #include <linux/hugetlb.h> 133f49584bSSeongJae Park #include <linux/mm.h> 143f49584bSSeongJae Park #include <linux/mmu_notifier.h> 153f49584bSSeongJae Park #include <linux/highmem.h> 163f49584bSSeongJae Park #include <linux/page_idle.h> 173f49584bSSeongJae Park #include <linux/pagewalk.h> 183f49584bSSeongJae Park #include <linux/random.h> 193f49584bSSeongJae Park #include <linux/sched/mm.h> 203f49584bSSeongJae Park #include <linux/slab.h> 213f49584bSSeongJae Park 2217ccae8bSSeongJae Park #ifdef CONFIG_DAMON_VADDR_KUNIT_TEST 2317ccae8bSSeongJae Park #undef DAMON_MIN_REGION 2417ccae8bSSeongJae Park #define DAMON_MIN_REGION 1 2517ccae8bSSeongJae Park #endif 2617ccae8bSSeongJae Park 273f49584bSSeongJae Park /* Get a random number in [l, r) */ 283f49584bSSeongJae Park #define damon_rand(l, r) (l + prandom_u32_max(r - l)) 293f49584bSSeongJae Park 303f49584bSSeongJae Park /* 313f49584bSSeongJae Park * 't->id' should be the pointer to the relevant 'struct pid' having reference 323f49584bSSeongJae Park * count. Caller must put the returned task, unless it is NULL. 333f49584bSSeongJae Park */ 343f49584bSSeongJae Park #define damon_get_task_struct(t) \ 353f49584bSSeongJae Park (get_pid_task((struct pid *)t->id, PIDTYPE_PID)) 363f49584bSSeongJae Park 373f49584bSSeongJae Park /* 383f49584bSSeongJae Park * Get the mm_struct of the given target 393f49584bSSeongJae Park * 403f49584bSSeongJae Park * Caller _must_ put the mm_struct after use, unless it is NULL. 413f49584bSSeongJae Park * 423f49584bSSeongJae Park * Returns the mm_struct of the target on success, NULL on failure 433f49584bSSeongJae Park */ 443f49584bSSeongJae Park static struct mm_struct *damon_get_mm(struct damon_target *t) 453f49584bSSeongJae Park { 463f49584bSSeongJae Park struct task_struct *task; 473f49584bSSeongJae Park struct mm_struct *mm; 483f49584bSSeongJae Park 493f49584bSSeongJae Park task = damon_get_task_struct(t); 503f49584bSSeongJae Park if (!task) 513f49584bSSeongJae Park return NULL; 523f49584bSSeongJae Park 533f49584bSSeongJae Park mm = get_task_mm(task); 543f49584bSSeongJae Park put_task_struct(task); 553f49584bSSeongJae Park return mm; 563f49584bSSeongJae Park } 573f49584bSSeongJae Park 583f49584bSSeongJae Park /* 593f49584bSSeongJae Park * Functions for the initial monitoring target regions construction 603f49584bSSeongJae Park */ 613f49584bSSeongJae Park 623f49584bSSeongJae Park /* 633f49584bSSeongJae Park * Size-evenly split a region into 'nr_pieces' small regions 643f49584bSSeongJae Park * 653f49584bSSeongJae Park * Returns 0 on success, or negative error code otherwise. 663f49584bSSeongJae Park */ 673f49584bSSeongJae Park static int damon_va_evenly_split_region(struct damon_target *t, 683f49584bSSeongJae Park struct damon_region *r, unsigned int nr_pieces) 693f49584bSSeongJae Park { 703f49584bSSeongJae Park unsigned long sz_orig, sz_piece, orig_end; 713f49584bSSeongJae Park struct damon_region *n = NULL, *next; 723f49584bSSeongJae Park unsigned long start; 733f49584bSSeongJae Park 743f49584bSSeongJae Park if (!r || !nr_pieces) 753f49584bSSeongJae Park return -EINVAL; 763f49584bSSeongJae Park 773f49584bSSeongJae Park orig_end = r->ar.end; 783f49584bSSeongJae Park sz_orig = r->ar.end - r->ar.start; 793f49584bSSeongJae Park sz_piece = ALIGN_DOWN(sz_orig / nr_pieces, DAMON_MIN_REGION); 803f49584bSSeongJae Park 813f49584bSSeongJae Park if (!sz_piece) 823f49584bSSeongJae Park return -EINVAL; 833f49584bSSeongJae Park 843f49584bSSeongJae Park r->ar.end = r->ar.start + sz_piece; 853f49584bSSeongJae Park next = damon_next_region(r); 863f49584bSSeongJae Park for (start = r->ar.end; start + sz_piece <= orig_end; 873f49584bSSeongJae Park start += sz_piece) { 883f49584bSSeongJae Park n = damon_new_region(start, start + sz_piece); 893f49584bSSeongJae Park if (!n) 903f49584bSSeongJae Park return -ENOMEM; 913f49584bSSeongJae Park damon_insert_region(n, r, next, t); 923f49584bSSeongJae Park r = n; 933f49584bSSeongJae Park } 943f49584bSSeongJae Park /* complement last region for possible rounding error */ 953f49584bSSeongJae Park if (n) 963f49584bSSeongJae Park n->ar.end = orig_end; 973f49584bSSeongJae Park 983f49584bSSeongJae Park return 0; 993f49584bSSeongJae Park } 1003f49584bSSeongJae Park 1013f49584bSSeongJae Park static unsigned long sz_range(struct damon_addr_range *r) 1023f49584bSSeongJae Park { 1033f49584bSSeongJae Park return r->end - r->start; 1043f49584bSSeongJae Park } 1053f49584bSSeongJae Park 1063f49584bSSeongJae Park static void swap_ranges(struct damon_addr_range *r1, 1073f49584bSSeongJae Park struct damon_addr_range *r2) 1083f49584bSSeongJae Park { 1093f49584bSSeongJae Park struct damon_addr_range tmp; 1103f49584bSSeongJae Park 1113f49584bSSeongJae Park tmp = *r1; 1123f49584bSSeongJae Park *r1 = *r2; 1133f49584bSSeongJae Park *r2 = tmp; 1143f49584bSSeongJae Park } 1153f49584bSSeongJae Park 1163f49584bSSeongJae Park /* 1173f49584bSSeongJae Park * Find three regions separated by two biggest unmapped regions 1183f49584bSSeongJae Park * 1193f49584bSSeongJae Park * vma the head vma of the target address space 1203f49584bSSeongJae Park * regions an array of three address ranges that results will be saved 1213f49584bSSeongJae Park * 1223f49584bSSeongJae Park * This function receives an address space and finds three regions in it which 1233f49584bSSeongJae Park * separated by the two biggest unmapped regions in the space. Please refer to 1243f49584bSSeongJae Park * below comments of '__damon_va_init_regions()' function to know why this is 1253f49584bSSeongJae Park * necessary. 1263f49584bSSeongJae Park * 1273f49584bSSeongJae Park * Returns 0 if success, or negative error code otherwise. 1283f49584bSSeongJae Park */ 1293f49584bSSeongJae Park static int __damon_va_three_regions(struct vm_area_struct *vma, 1303f49584bSSeongJae Park struct damon_addr_range regions[3]) 1313f49584bSSeongJae Park { 1323f49584bSSeongJae Park struct damon_addr_range gap = {0}, first_gap = {0}, second_gap = {0}; 1333f49584bSSeongJae Park struct vm_area_struct *last_vma = NULL; 1343f49584bSSeongJae Park unsigned long start = 0; 1353f49584bSSeongJae Park struct rb_root rbroot; 1363f49584bSSeongJae Park 1373f49584bSSeongJae Park /* Find two biggest gaps so that first_gap > second_gap > others */ 1383f49584bSSeongJae Park for (; vma; vma = vma->vm_next) { 1393f49584bSSeongJae Park if (!last_vma) { 1403f49584bSSeongJae Park start = vma->vm_start; 1413f49584bSSeongJae Park goto next; 1423f49584bSSeongJae Park } 1433f49584bSSeongJae Park 1443f49584bSSeongJae Park if (vma->rb_subtree_gap <= sz_range(&second_gap)) { 1453f49584bSSeongJae Park rbroot.rb_node = &vma->vm_rb; 1463f49584bSSeongJae Park vma = rb_entry(rb_last(&rbroot), 1473f49584bSSeongJae Park struct vm_area_struct, vm_rb); 1483f49584bSSeongJae Park goto next; 1493f49584bSSeongJae Park } 1503f49584bSSeongJae Park 1513f49584bSSeongJae Park gap.start = last_vma->vm_end; 1523f49584bSSeongJae Park gap.end = vma->vm_start; 1533f49584bSSeongJae Park if (sz_range(&gap) > sz_range(&second_gap)) { 1543f49584bSSeongJae Park swap_ranges(&gap, &second_gap); 1553f49584bSSeongJae Park if (sz_range(&second_gap) > sz_range(&first_gap)) 1563f49584bSSeongJae Park swap_ranges(&second_gap, &first_gap); 1573f49584bSSeongJae Park } 1583f49584bSSeongJae Park next: 1593f49584bSSeongJae Park last_vma = vma; 1603f49584bSSeongJae Park } 1613f49584bSSeongJae Park 1623f49584bSSeongJae Park if (!sz_range(&second_gap) || !sz_range(&first_gap)) 1633f49584bSSeongJae Park return -EINVAL; 1643f49584bSSeongJae Park 1653f49584bSSeongJae Park /* Sort the two biggest gaps by address */ 1663f49584bSSeongJae Park if (first_gap.start > second_gap.start) 1673f49584bSSeongJae Park swap_ranges(&first_gap, &second_gap); 1683f49584bSSeongJae Park 1693f49584bSSeongJae Park /* Store the result */ 1703f49584bSSeongJae Park regions[0].start = ALIGN(start, DAMON_MIN_REGION); 1713f49584bSSeongJae Park regions[0].end = ALIGN(first_gap.start, DAMON_MIN_REGION); 1723f49584bSSeongJae Park regions[1].start = ALIGN(first_gap.end, DAMON_MIN_REGION); 1733f49584bSSeongJae Park regions[1].end = ALIGN(second_gap.start, DAMON_MIN_REGION); 1743f49584bSSeongJae Park regions[2].start = ALIGN(second_gap.end, DAMON_MIN_REGION); 1753f49584bSSeongJae Park regions[2].end = ALIGN(last_vma->vm_end, DAMON_MIN_REGION); 1763f49584bSSeongJae Park 1773f49584bSSeongJae Park return 0; 1783f49584bSSeongJae Park } 1793f49584bSSeongJae Park 1803f49584bSSeongJae Park /* 1813f49584bSSeongJae Park * Get the three regions in the given target (task) 1823f49584bSSeongJae Park * 1833f49584bSSeongJae Park * Returns 0 on success, negative error code otherwise. 1843f49584bSSeongJae Park */ 1853f49584bSSeongJae Park static int damon_va_three_regions(struct damon_target *t, 1863f49584bSSeongJae Park struct damon_addr_range regions[3]) 1873f49584bSSeongJae Park { 1883f49584bSSeongJae Park struct mm_struct *mm; 1893f49584bSSeongJae Park int rc; 1903f49584bSSeongJae Park 1913f49584bSSeongJae Park mm = damon_get_mm(t); 1923f49584bSSeongJae Park if (!mm) 1933f49584bSSeongJae Park return -EINVAL; 1943f49584bSSeongJae Park 1953f49584bSSeongJae Park mmap_read_lock(mm); 1963f49584bSSeongJae Park rc = __damon_va_three_regions(mm->mmap, regions); 1973f49584bSSeongJae Park mmap_read_unlock(mm); 1983f49584bSSeongJae Park 1993f49584bSSeongJae Park mmput(mm); 2003f49584bSSeongJae Park return rc; 2013f49584bSSeongJae Park } 2023f49584bSSeongJae Park 2033f49584bSSeongJae Park /* 2043f49584bSSeongJae Park * Initialize the monitoring target regions for the given target (task) 2053f49584bSSeongJae Park * 2063f49584bSSeongJae Park * t the given target 2073f49584bSSeongJae Park * 2083f49584bSSeongJae Park * Because only a number of small portions of the entire address space 2093f49584bSSeongJae Park * is actually mapped to the memory and accessed, monitoring the unmapped 2103f49584bSSeongJae Park * regions is wasteful. That said, because we can deal with small noises, 2113f49584bSSeongJae Park * tracking every mapping is not strictly required but could even incur a high 2123f49584bSSeongJae Park * overhead if the mapping frequently changes or the number of mappings is 2133f49584bSSeongJae Park * high. The adaptive regions adjustment mechanism will further help to deal 2143f49584bSSeongJae Park * with the noise by simply identifying the unmapped areas as a region that 2153f49584bSSeongJae Park * has no access. Moreover, applying the real mappings that would have many 2163f49584bSSeongJae Park * unmapped areas inside will make the adaptive mechanism quite complex. That 2173f49584bSSeongJae Park * said, too huge unmapped areas inside the monitoring target should be removed 2183f49584bSSeongJae Park * to not take the time for the adaptive mechanism. 2193f49584bSSeongJae Park * 2203f49584bSSeongJae Park * For the reason, we convert the complex mappings to three distinct regions 2213f49584bSSeongJae Park * that cover every mapped area of the address space. Also the two gaps 2223f49584bSSeongJae Park * between the three regions are the two biggest unmapped areas in the given 2233f49584bSSeongJae Park * address space. In detail, this function first identifies the start and the 2243f49584bSSeongJae Park * end of the mappings and the two biggest unmapped areas of the address space. 2253f49584bSSeongJae Park * Then, it constructs the three regions as below: 2263f49584bSSeongJae Park * 2273f49584bSSeongJae Park * [mappings[0]->start, big_two_unmapped_areas[0]->start) 2283f49584bSSeongJae Park * [big_two_unmapped_areas[0]->end, big_two_unmapped_areas[1]->start) 2293f49584bSSeongJae Park * [big_two_unmapped_areas[1]->end, mappings[nr_mappings - 1]->end) 2303f49584bSSeongJae Park * 2313f49584bSSeongJae Park * As usual memory map of processes is as below, the gap between the heap and 2323f49584bSSeongJae Park * the uppermost mmap()-ed region, and the gap between the lowermost mmap()-ed 2333f49584bSSeongJae Park * region and the stack will be two biggest unmapped regions. Because these 2343f49584bSSeongJae Park * gaps are exceptionally huge areas in usual address space, excluding these 2353f49584bSSeongJae Park * two biggest unmapped regions will be sufficient to make a trade-off. 2363f49584bSSeongJae Park * 2373f49584bSSeongJae Park * <heap> 2383f49584bSSeongJae Park * <BIG UNMAPPED REGION 1> 2393f49584bSSeongJae Park * <uppermost mmap()-ed region> 2403f49584bSSeongJae Park * (other mmap()-ed regions and small unmapped regions) 2413f49584bSSeongJae Park * <lowermost mmap()-ed region> 2423f49584bSSeongJae Park * <BIG UNMAPPED REGION 2> 2433f49584bSSeongJae Park * <stack> 2443f49584bSSeongJae Park */ 2453f49584bSSeongJae Park static void __damon_va_init_regions(struct damon_ctx *ctx, 2463f49584bSSeongJae Park struct damon_target *t) 2473f49584bSSeongJae Park { 2483f49584bSSeongJae Park struct damon_region *r; 2493f49584bSSeongJae Park struct damon_addr_range regions[3]; 2503f49584bSSeongJae Park unsigned long sz = 0, nr_pieces; 2513f49584bSSeongJae Park int i; 2523f49584bSSeongJae Park 2533f49584bSSeongJae Park if (damon_va_three_regions(t, regions)) { 2543f49584bSSeongJae Park pr_err("Failed to get three regions of target %lu\n", t->id); 2553f49584bSSeongJae Park return; 2563f49584bSSeongJae Park } 2573f49584bSSeongJae Park 2583f49584bSSeongJae Park for (i = 0; i < 3; i++) 2593f49584bSSeongJae Park sz += regions[i].end - regions[i].start; 2603f49584bSSeongJae Park if (ctx->min_nr_regions) 2613f49584bSSeongJae Park sz /= ctx->min_nr_regions; 2623f49584bSSeongJae Park if (sz < DAMON_MIN_REGION) 2633f49584bSSeongJae Park sz = DAMON_MIN_REGION; 2643f49584bSSeongJae Park 2653f49584bSSeongJae Park /* Set the initial three regions of the target */ 2663f49584bSSeongJae Park for (i = 0; i < 3; i++) { 2673f49584bSSeongJae Park r = damon_new_region(regions[i].start, regions[i].end); 2683f49584bSSeongJae Park if (!r) { 2693f49584bSSeongJae Park pr_err("%d'th init region creation failed\n", i); 2703f49584bSSeongJae Park return; 2713f49584bSSeongJae Park } 2723f49584bSSeongJae Park damon_add_region(r, t); 2733f49584bSSeongJae Park 2743f49584bSSeongJae Park nr_pieces = (regions[i].end - regions[i].start) / sz; 2753f49584bSSeongJae Park damon_va_evenly_split_region(t, r, nr_pieces); 2763f49584bSSeongJae Park } 2773f49584bSSeongJae Park } 2783f49584bSSeongJae Park 2793f49584bSSeongJae Park /* Initialize '->regions_list' of every target (task) */ 2803f49584bSSeongJae Park void damon_va_init(struct damon_ctx *ctx) 2813f49584bSSeongJae Park { 2823f49584bSSeongJae Park struct damon_target *t; 2833f49584bSSeongJae Park 2843f49584bSSeongJae Park damon_for_each_target(t, ctx) { 2853f49584bSSeongJae Park /* the user may set the target regions as they want */ 2863f49584bSSeongJae Park if (!damon_nr_regions(t)) 2873f49584bSSeongJae Park __damon_va_init_regions(ctx, t); 2883f49584bSSeongJae Park } 2893f49584bSSeongJae Park } 2903f49584bSSeongJae Park 2913f49584bSSeongJae Park /* 2923f49584bSSeongJae Park * Functions for the dynamic monitoring target regions update 2933f49584bSSeongJae Park */ 2943f49584bSSeongJae Park 2953f49584bSSeongJae Park /* 2963f49584bSSeongJae Park * Check whether a region is intersecting an address range 2973f49584bSSeongJae Park * 2983f49584bSSeongJae Park * Returns true if it is. 2993f49584bSSeongJae Park */ 3003f49584bSSeongJae Park static bool damon_intersect(struct damon_region *r, struct damon_addr_range *re) 3013f49584bSSeongJae Park { 3023f49584bSSeongJae Park return !(r->ar.end <= re->start || re->end <= r->ar.start); 3033f49584bSSeongJae Park } 3043f49584bSSeongJae Park 3053f49584bSSeongJae Park /* 3063f49584bSSeongJae Park * Update damon regions for the three big regions of the given target 3073f49584bSSeongJae Park * 3083f49584bSSeongJae Park * t the given target 3093f49584bSSeongJae Park * bregions the three big regions of the target 3103f49584bSSeongJae Park */ 3113f49584bSSeongJae Park static void damon_va_apply_three_regions(struct damon_target *t, 3123f49584bSSeongJae Park struct damon_addr_range bregions[3]) 3133f49584bSSeongJae Park { 3143f49584bSSeongJae Park struct damon_region *r, *next; 3153f49584bSSeongJae Park unsigned int i = 0; 3163f49584bSSeongJae Park 3173f49584bSSeongJae Park /* Remove regions which are not in the three big regions now */ 3183f49584bSSeongJae Park damon_for_each_region_safe(r, next, t) { 3193f49584bSSeongJae Park for (i = 0; i < 3; i++) { 3203f49584bSSeongJae Park if (damon_intersect(r, &bregions[i])) 3213f49584bSSeongJae Park break; 3223f49584bSSeongJae Park } 3233f49584bSSeongJae Park if (i == 3) 3243f49584bSSeongJae Park damon_destroy_region(r, t); 3253f49584bSSeongJae Park } 3263f49584bSSeongJae Park 3273f49584bSSeongJae Park /* Adjust intersecting regions to fit with the three big regions */ 3283f49584bSSeongJae Park for (i = 0; i < 3; i++) { 3293f49584bSSeongJae Park struct damon_region *first = NULL, *last; 3303f49584bSSeongJae Park struct damon_region *newr; 3313f49584bSSeongJae Park struct damon_addr_range *br; 3323f49584bSSeongJae Park 3333f49584bSSeongJae Park br = &bregions[i]; 3343f49584bSSeongJae Park /* Get the first and last regions which intersects with br */ 3353f49584bSSeongJae Park damon_for_each_region(r, t) { 3363f49584bSSeongJae Park if (damon_intersect(r, br)) { 3373f49584bSSeongJae Park if (!first) 3383f49584bSSeongJae Park first = r; 3393f49584bSSeongJae Park last = r; 3403f49584bSSeongJae Park } 3413f49584bSSeongJae Park if (r->ar.start >= br->end) 3423f49584bSSeongJae Park break; 3433f49584bSSeongJae Park } 3443f49584bSSeongJae Park if (!first) { 3453f49584bSSeongJae Park /* no damon_region intersects with this big region */ 3463f49584bSSeongJae Park newr = damon_new_region( 3473f49584bSSeongJae Park ALIGN_DOWN(br->start, 3483f49584bSSeongJae Park DAMON_MIN_REGION), 3493f49584bSSeongJae Park ALIGN(br->end, DAMON_MIN_REGION)); 3503f49584bSSeongJae Park if (!newr) 3513f49584bSSeongJae Park continue; 3523f49584bSSeongJae Park damon_insert_region(newr, damon_prev_region(r), r, t); 3533f49584bSSeongJae Park } else { 3543f49584bSSeongJae Park first->ar.start = ALIGN_DOWN(br->start, 3553f49584bSSeongJae Park DAMON_MIN_REGION); 3563f49584bSSeongJae Park last->ar.end = ALIGN(br->end, DAMON_MIN_REGION); 3573f49584bSSeongJae Park } 3583f49584bSSeongJae Park } 3593f49584bSSeongJae Park } 3603f49584bSSeongJae Park 3613f49584bSSeongJae Park /* 3623f49584bSSeongJae Park * Update regions for current memory mappings 3633f49584bSSeongJae Park */ 3643f49584bSSeongJae Park void damon_va_update(struct damon_ctx *ctx) 3653f49584bSSeongJae Park { 3663f49584bSSeongJae Park struct damon_addr_range three_regions[3]; 3673f49584bSSeongJae Park struct damon_target *t; 3683f49584bSSeongJae Park 3693f49584bSSeongJae Park damon_for_each_target(t, ctx) { 3703f49584bSSeongJae Park if (damon_va_three_regions(t, three_regions)) 3713f49584bSSeongJae Park continue; 3723f49584bSSeongJae Park damon_va_apply_three_regions(t, three_regions); 3733f49584bSSeongJae Park } 3743f49584bSSeongJae Park } 3753f49584bSSeongJae Park 3763f49584bSSeongJae Park /* 3773f49584bSSeongJae Park * Get an online page for a pfn if it's in the LRU list. Otherwise, returns 3783f49584bSSeongJae Park * NULL. 3793f49584bSSeongJae Park * 3803f49584bSSeongJae Park * The body of this function is stolen from the 'page_idle_get_page()'. We 3813f49584bSSeongJae Park * steal rather than reuse it because the code is quite simple. 3823f49584bSSeongJae Park */ 3833f49584bSSeongJae Park static struct page *damon_get_page(unsigned long pfn) 3843f49584bSSeongJae Park { 3853f49584bSSeongJae Park struct page *page = pfn_to_online_page(pfn); 3863f49584bSSeongJae Park 3873f49584bSSeongJae Park if (!page || !PageLRU(page) || !get_page_unless_zero(page)) 3883f49584bSSeongJae Park return NULL; 3893f49584bSSeongJae Park 3903f49584bSSeongJae Park if (unlikely(!PageLRU(page))) { 3913f49584bSSeongJae Park put_page(page); 3923f49584bSSeongJae Park page = NULL; 3933f49584bSSeongJae Park } 3943f49584bSSeongJae Park return page; 3953f49584bSSeongJae Park } 3963f49584bSSeongJae Park 3973f49584bSSeongJae Park static void damon_ptep_mkold(pte_t *pte, struct mm_struct *mm, 3983f49584bSSeongJae Park unsigned long addr) 3993f49584bSSeongJae Park { 4003f49584bSSeongJae Park bool referenced = false; 4013f49584bSSeongJae Park struct page *page = damon_get_page(pte_pfn(*pte)); 4023f49584bSSeongJae Park 4033f49584bSSeongJae Park if (!page) 4043f49584bSSeongJae Park return; 4053f49584bSSeongJae Park 4063f49584bSSeongJae Park if (pte_young(*pte)) { 4073f49584bSSeongJae Park referenced = true; 4083f49584bSSeongJae Park *pte = pte_mkold(*pte); 4093f49584bSSeongJae Park } 4103f49584bSSeongJae Park 4113f49584bSSeongJae Park #ifdef CONFIG_MMU_NOTIFIER 4123f49584bSSeongJae Park if (mmu_notifier_clear_young(mm, addr, addr + PAGE_SIZE)) 4133f49584bSSeongJae Park referenced = true; 4143f49584bSSeongJae Park #endif /* CONFIG_MMU_NOTIFIER */ 4153f49584bSSeongJae Park 4163f49584bSSeongJae Park if (referenced) 4173f49584bSSeongJae Park set_page_young(page); 4183f49584bSSeongJae Park 4193f49584bSSeongJae Park set_page_idle(page); 4203f49584bSSeongJae Park put_page(page); 4213f49584bSSeongJae Park } 4223f49584bSSeongJae Park 4233f49584bSSeongJae Park static void damon_pmdp_mkold(pmd_t *pmd, struct mm_struct *mm, 4243f49584bSSeongJae Park unsigned long addr) 4253f49584bSSeongJae Park { 4263f49584bSSeongJae Park #ifdef CONFIG_TRANSPARENT_HUGEPAGE 4273f49584bSSeongJae Park bool referenced = false; 4283f49584bSSeongJae Park struct page *page = damon_get_page(pmd_pfn(*pmd)); 4293f49584bSSeongJae Park 4303f49584bSSeongJae Park if (!page) 4313f49584bSSeongJae Park return; 4323f49584bSSeongJae Park 4333f49584bSSeongJae Park if (pmd_young(*pmd)) { 4343f49584bSSeongJae Park referenced = true; 4353f49584bSSeongJae Park *pmd = pmd_mkold(*pmd); 4363f49584bSSeongJae Park } 4373f49584bSSeongJae Park 4383f49584bSSeongJae Park #ifdef CONFIG_MMU_NOTIFIER 4393f49584bSSeongJae Park if (mmu_notifier_clear_young(mm, addr, 4403f49584bSSeongJae Park addr + ((1UL) << HPAGE_PMD_SHIFT))) 4413f49584bSSeongJae Park referenced = true; 4423f49584bSSeongJae Park #endif /* CONFIG_MMU_NOTIFIER */ 4433f49584bSSeongJae Park 4443f49584bSSeongJae Park if (referenced) 4453f49584bSSeongJae Park set_page_young(page); 4463f49584bSSeongJae Park 4473f49584bSSeongJae Park set_page_idle(page); 4483f49584bSSeongJae Park put_page(page); 4493f49584bSSeongJae Park #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 4503f49584bSSeongJae Park } 4513f49584bSSeongJae Park 4523f49584bSSeongJae Park static int damon_mkold_pmd_entry(pmd_t *pmd, unsigned long addr, 4533f49584bSSeongJae Park unsigned long next, struct mm_walk *walk) 4543f49584bSSeongJae Park { 4553f49584bSSeongJae Park pte_t *pte; 4563f49584bSSeongJae Park spinlock_t *ptl; 4573f49584bSSeongJae Park 4583f49584bSSeongJae Park if (pmd_huge(*pmd)) { 4593f49584bSSeongJae Park ptl = pmd_lock(walk->mm, pmd); 4603f49584bSSeongJae Park if (pmd_huge(*pmd)) { 4613f49584bSSeongJae Park damon_pmdp_mkold(pmd, walk->mm, addr); 4623f49584bSSeongJae Park spin_unlock(ptl); 4633f49584bSSeongJae Park return 0; 4643f49584bSSeongJae Park } 4653f49584bSSeongJae Park spin_unlock(ptl); 4663f49584bSSeongJae Park } 4673f49584bSSeongJae Park 4683f49584bSSeongJae Park if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) 4693f49584bSSeongJae Park return 0; 4703f49584bSSeongJae Park pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); 4713f49584bSSeongJae Park if (!pte_present(*pte)) 4723f49584bSSeongJae Park goto out; 4733f49584bSSeongJae Park damon_ptep_mkold(pte, walk->mm, addr); 4743f49584bSSeongJae Park out: 4753f49584bSSeongJae Park pte_unmap_unlock(pte, ptl); 4763f49584bSSeongJae Park return 0; 4773f49584bSSeongJae Park } 4783f49584bSSeongJae Park 4793f49584bSSeongJae Park static struct mm_walk_ops damon_mkold_ops = { 4803f49584bSSeongJae Park .pmd_entry = damon_mkold_pmd_entry, 4813f49584bSSeongJae Park }; 4823f49584bSSeongJae Park 4833f49584bSSeongJae Park static void damon_va_mkold(struct mm_struct *mm, unsigned long addr) 4843f49584bSSeongJae Park { 4853f49584bSSeongJae Park mmap_read_lock(mm); 4863f49584bSSeongJae Park walk_page_range(mm, addr, addr + 1, &damon_mkold_ops, NULL); 4873f49584bSSeongJae Park mmap_read_unlock(mm); 4883f49584bSSeongJae Park } 4893f49584bSSeongJae Park 4903f49584bSSeongJae Park /* 4913f49584bSSeongJae Park * Functions for the access checking of the regions 4923f49584bSSeongJae Park */ 4933f49584bSSeongJae Park 4943f49584bSSeongJae Park static void damon_va_prepare_access_check(struct damon_ctx *ctx, 4953f49584bSSeongJae Park struct mm_struct *mm, struct damon_region *r) 4963f49584bSSeongJae Park { 4973f49584bSSeongJae Park r->sampling_addr = damon_rand(r->ar.start, r->ar.end); 4983f49584bSSeongJae Park 4993f49584bSSeongJae Park damon_va_mkold(mm, r->sampling_addr); 5003f49584bSSeongJae Park } 5013f49584bSSeongJae Park 5023f49584bSSeongJae Park void damon_va_prepare_access_checks(struct damon_ctx *ctx) 5033f49584bSSeongJae Park { 5043f49584bSSeongJae Park struct damon_target *t; 5053f49584bSSeongJae Park struct mm_struct *mm; 5063f49584bSSeongJae Park struct damon_region *r; 5073f49584bSSeongJae Park 5083f49584bSSeongJae Park damon_for_each_target(t, ctx) { 5093f49584bSSeongJae Park mm = damon_get_mm(t); 5103f49584bSSeongJae Park if (!mm) 5113f49584bSSeongJae Park continue; 5123f49584bSSeongJae Park damon_for_each_region(r, t) 5133f49584bSSeongJae Park damon_va_prepare_access_check(ctx, mm, r); 5143f49584bSSeongJae Park mmput(mm); 5153f49584bSSeongJae Park } 5163f49584bSSeongJae Park } 5173f49584bSSeongJae Park 5183f49584bSSeongJae Park struct damon_young_walk_private { 5193f49584bSSeongJae Park unsigned long *page_sz; 5203f49584bSSeongJae Park bool young; 5213f49584bSSeongJae Park }; 5223f49584bSSeongJae Park 5233f49584bSSeongJae Park static int damon_young_pmd_entry(pmd_t *pmd, unsigned long addr, 5243f49584bSSeongJae Park unsigned long next, struct mm_walk *walk) 5253f49584bSSeongJae Park { 5263f49584bSSeongJae Park pte_t *pte; 5273f49584bSSeongJae Park spinlock_t *ptl; 5283f49584bSSeongJae Park struct page *page; 5293f49584bSSeongJae Park struct damon_young_walk_private *priv = walk->private; 5303f49584bSSeongJae Park 5313f49584bSSeongJae Park #ifdef CONFIG_TRANSPARENT_HUGEPAGE 5323f49584bSSeongJae Park if (pmd_huge(*pmd)) { 5333f49584bSSeongJae Park ptl = pmd_lock(walk->mm, pmd); 5343f49584bSSeongJae Park if (!pmd_huge(*pmd)) { 5353f49584bSSeongJae Park spin_unlock(ptl); 5363f49584bSSeongJae Park goto regular_page; 5373f49584bSSeongJae Park } 5383f49584bSSeongJae Park page = damon_get_page(pmd_pfn(*pmd)); 5393f49584bSSeongJae Park if (!page) 5403f49584bSSeongJae Park goto huge_out; 5413f49584bSSeongJae Park if (pmd_young(*pmd) || !page_is_idle(page) || 5423f49584bSSeongJae Park mmu_notifier_test_young(walk->mm, 5433f49584bSSeongJae Park addr)) { 5443f49584bSSeongJae Park *priv->page_sz = ((1UL) << HPAGE_PMD_SHIFT); 5453f49584bSSeongJae Park priv->young = true; 5463f49584bSSeongJae Park } 5473f49584bSSeongJae Park put_page(page); 5483f49584bSSeongJae Park huge_out: 5493f49584bSSeongJae Park spin_unlock(ptl); 5503f49584bSSeongJae Park return 0; 5513f49584bSSeongJae Park } 5523f49584bSSeongJae Park 5533f49584bSSeongJae Park regular_page: 5543f49584bSSeongJae Park #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 5553f49584bSSeongJae Park 5563f49584bSSeongJae Park if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) 5573f49584bSSeongJae Park return -EINVAL; 5583f49584bSSeongJae Park pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); 5593f49584bSSeongJae Park if (!pte_present(*pte)) 5603f49584bSSeongJae Park goto out; 5613f49584bSSeongJae Park page = damon_get_page(pte_pfn(*pte)); 5623f49584bSSeongJae Park if (!page) 5633f49584bSSeongJae Park goto out; 5643f49584bSSeongJae Park if (pte_young(*pte) || !page_is_idle(page) || 5653f49584bSSeongJae Park mmu_notifier_test_young(walk->mm, addr)) { 5663f49584bSSeongJae Park *priv->page_sz = PAGE_SIZE; 5673f49584bSSeongJae Park priv->young = true; 5683f49584bSSeongJae Park } 5693f49584bSSeongJae Park put_page(page); 5703f49584bSSeongJae Park out: 5713f49584bSSeongJae Park pte_unmap_unlock(pte, ptl); 5723f49584bSSeongJae Park return 0; 5733f49584bSSeongJae Park } 5743f49584bSSeongJae Park 5753f49584bSSeongJae Park static struct mm_walk_ops damon_young_ops = { 5763f49584bSSeongJae Park .pmd_entry = damon_young_pmd_entry, 5773f49584bSSeongJae Park }; 5783f49584bSSeongJae Park 5793f49584bSSeongJae Park static bool damon_va_young(struct mm_struct *mm, unsigned long addr, 5803f49584bSSeongJae Park unsigned long *page_sz) 5813f49584bSSeongJae Park { 5823f49584bSSeongJae Park struct damon_young_walk_private arg = { 5833f49584bSSeongJae Park .page_sz = page_sz, 5843f49584bSSeongJae Park .young = false, 5853f49584bSSeongJae Park }; 5863f49584bSSeongJae Park 5873f49584bSSeongJae Park mmap_read_lock(mm); 5883f49584bSSeongJae Park walk_page_range(mm, addr, addr + 1, &damon_young_ops, &arg); 5893f49584bSSeongJae Park mmap_read_unlock(mm); 5903f49584bSSeongJae Park return arg.young; 5913f49584bSSeongJae Park } 5923f49584bSSeongJae Park 5933f49584bSSeongJae Park /* 5943f49584bSSeongJae Park * Check whether the region was accessed after the last preparation 5953f49584bSSeongJae Park * 5963f49584bSSeongJae Park * mm 'mm_struct' for the given virtual address space 5973f49584bSSeongJae Park * r the region to be checked 5983f49584bSSeongJae Park */ 5993f49584bSSeongJae Park static void damon_va_check_access(struct damon_ctx *ctx, 6003f49584bSSeongJae Park struct mm_struct *mm, struct damon_region *r) 6013f49584bSSeongJae Park { 6023f49584bSSeongJae Park static struct mm_struct *last_mm; 6033f49584bSSeongJae Park static unsigned long last_addr; 6043f49584bSSeongJae Park static unsigned long last_page_sz = PAGE_SIZE; 6053f49584bSSeongJae Park static bool last_accessed; 6063f49584bSSeongJae Park 6073f49584bSSeongJae Park /* If the region is in the last checked page, reuse the result */ 6083f49584bSSeongJae Park if (mm == last_mm && (ALIGN_DOWN(last_addr, last_page_sz) == 6093f49584bSSeongJae Park ALIGN_DOWN(r->sampling_addr, last_page_sz))) { 6103f49584bSSeongJae Park if (last_accessed) 6113f49584bSSeongJae Park r->nr_accesses++; 6123f49584bSSeongJae Park return; 6133f49584bSSeongJae Park } 6143f49584bSSeongJae Park 6153f49584bSSeongJae Park last_accessed = damon_va_young(mm, r->sampling_addr, &last_page_sz); 6163f49584bSSeongJae Park if (last_accessed) 6173f49584bSSeongJae Park r->nr_accesses++; 6183f49584bSSeongJae Park 6193f49584bSSeongJae Park last_mm = mm; 6203f49584bSSeongJae Park last_addr = r->sampling_addr; 6213f49584bSSeongJae Park } 6223f49584bSSeongJae Park 6233f49584bSSeongJae Park unsigned int damon_va_check_accesses(struct damon_ctx *ctx) 6243f49584bSSeongJae Park { 6253f49584bSSeongJae Park struct damon_target *t; 6263f49584bSSeongJae Park struct mm_struct *mm; 6273f49584bSSeongJae Park struct damon_region *r; 6283f49584bSSeongJae Park unsigned int max_nr_accesses = 0; 6293f49584bSSeongJae Park 6303f49584bSSeongJae Park damon_for_each_target(t, ctx) { 6313f49584bSSeongJae Park mm = damon_get_mm(t); 6323f49584bSSeongJae Park if (!mm) 6333f49584bSSeongJae Park continue; 6343f49584bSSeongJae Park damon_for_each_region(r, t) { 6353f49584bSSeongJae Park damon_va_check_access(ctx, mm, r); 6363f49584bSSeongJae Park max_nr_accesses = max(r->nr_accesses, max_nr_accesses); 6373f49584bSSeongJae Park } 6383f49584bSSeongJae Park mmput(mm); 6393f49584bSSeongJae Park } 6403f49584bSSeongJae Park 6413f49584bSSeongJae Park return max_nr_accesses; 6423f49584bSSeongJae Park } 6433f49584bSSeongJae Park 6443f49584bSSeongJae Park /* 6453f49584bSSeongJae Park * Functions for the target validity check and cleanup 6463f49584bSSeongJae Park */ 6473f49584bSSeongJae Park 6483f49584bSSeongJae Park bool damon_va_target_valid(void *target) 6493f49584bSSeongJae Park { 6503f49584bSSeongJae Park struct damon_target *t = target; 6513f49584bSSeongJae Park struct task_struct *task; 6523f49584bSSeongJae Park 6533f49584bSSeongJae Park task = damon_get_task_struct(t); 6543f49584bSSeongJae Park if (task) { 6553f49584bSSeongJae Park put_task_struct(task); 6563f49584bSSeongJae Park return true; 6573f49584bSSeongJae Park } 6583f49584bSSeongJae Park 6593f49584bSSeongJae Park return false; 6603f49584bSSeongJae Park } 6613f49584bSSeongJae Park 6626dea8addSSeongJae Park #ifndef CONFIG_ADVISE_SYSCALLS 6636dea8addSSeongJae Park static int damos_madvise(struct damon_target *target, struct damon_region *r, 6646dea8addSSeongJae Park int behavior) 6656dea8addSSeongJae Park { 6666dea8addSSeongJae Park return -EINVAL; 6676dea8addSSeongJae Park } 6686dea8addSSeongJae Park #else 6696dea8addSSeongJae Park static int damos_madvise(struct damon_target *target, struct damon_region *r, 6706dea8addSSeongJae Park int behavior) 6716dea8addSSeongJae Park { 6726dea8addSSeongJae Park struct mm_struct *mm; 6736dea8addSSeongJae Park int ret = -ENOMEM; 6746dea8addSSeongJae Park 6756dea8addSSeongJae Park mm = damon_get_mm(target); 6766dea8addSSeongJae Park if (!mm) 6776dea8addSSeongJae Park goto out; 6786dea8addSSeongJae Park 6796dea8addSSeongJae Park ret = do_madvise(mm, PAGE_ALIGN(r->ar.start), 6806dea8addSSeongJae Park PAGE_ALIGN(r->ar.end - r->ar.start), behavior); 6816dea8addSSeongJae Park mmput(mm); 6826dea8addSSeongJae Park out: 6836dea8addSSeongJae Park return ret; 6846dea8addSSeongJae Park } 6856dea8addSSeongJae Park #endif /* CONFIG_ADVISE_SYSCALLS */ 6866dea8addSSeongJae Park 6876dea8addSSeongJae Park int damon_va_apply_scheme(struct damon_ctx *ctx, struct damon_target *t, 6886dea8addSSeongJae Park struct damon_region *r, struct damos *scheme) 6896dea8addSSeongJae Park { 6906dea8addSSeongJae Park int madv_action; 6916dea8addSSeongJae Park 6926dea8addSSeongJae Park switch (scheme->action) { 6936dea8addSSeongJae Park case DAMOS_WILLNEED: 6946dea8addSSeongJae Park madv_action = MADV_WILLNEED; 6956dea8addSSeongJae Park break; 6966dea8addSSeongJae Park case DAMOS_COLD: 6976dea8addSSeongJae Park madv_action = MADV_COLD; 6986dea8addSSeongJae Park break; 6996dea8addSSeongJae Park case DAMOS_PAGEOUT: 7006dea8addSSeongJae Park madv_action = MADV_PAGEOUT; 7016dea8addSSeongJae Park break; 7026dea8addSSeongJae Park case DAMOS_HUGEPAGE: 7036dea8addSSeongJae Park madv_action = MADV_HUGEPAGE; 7046dea8addSSeongJae Park break; 7056dea8addSSeongJae Park case DAMOS_NOHUGEPAGE: 7066dea8addSSeongJae Park madv_action = MADV_NOHUGEPAGE; 7076dea8addSSeongJae Park break; 708*2f0b548cSSeongJae Park case DAMOS_STAT: 709*2f0b548cSSeongJae Park return 0; 7106dea8addSSeongJae Park default: 7116dea8addSSeongJae Park pr_warn("Wrong action %d\n", scheme->action); 7126dea8addSSeongJae Park return -EINVAL; 7136dea8addSSeongJae Park } 7146dea8addSSeongJae Park 7156dea8addSSeongJae Park return damos_madvise(t, r, madv_action); 7166dea8addSSeongJae Park } 7176dea8addSSeongJae Park 7183f49584bSSeongJae Park void damon_va_set_primitives(struct damon_ctx *ctx) 7193f49584bSSeongJae Park { 7203f49584bSSeongJae Park ctx->primitive.init = damon_va_init; 7213f49584bSSeongJae Park ctx->primitive.update = damon_va_update; 7223f49584bSSeongJae Park ctx->primitive.prepare_access_checks = damon_va_prepare_access_checks; 7233f49584bSSeongJae Park ctx->primitive.check_accesses = damon_va_check_accesses; 7243f49584bSSeongJae Park ctx->primitive.reset_aggregated = NULL; 7253f49584bSSeongJae Park ctx->primitive.target_valid = damon_va_target_valid; 7263f49584bSSeongJae Park ctx->primitive.cleanup = NULL; 7276dea8addSSeongJae Park ctx->primitive.apply_scheme = damon_va_apply_scheme; 7283f49584bSSeongJae Park } 72917ccae8bSSeongJae Park 73017ccae8bSSeongJae Park #include "vaddr-test.h" 731