1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * linux/mm/mincore.c 4 * 5 * Copyright (C) 1994-2006 Linus Torvalds 6 */ 7 8 /* 9 * The mincore() system call. 10 */ 11 #include <linux/pagemap.h> 12 #include <linux/gfp.h> 13 #include <linux/mm.h> 14 #include <linux/mman.h> 15 #include <linux/syscalls.h> 16 #include <linux/swap.h> 17 #include <linux/swapops.h> 18 #include <linux/shmem_fs.h> 19 #include <linux/hugetlb.h> 20 21 #include <linux/uaccess.h> 22 #include <asm/pgtable.h> 23 24 static int mincore_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr, 25 unsigned long end, struct mm_walk *walk) 26 { 27 #ifdef CONFIG_HUGETLB_PAGE 28 unsigned char present; 29 unsigned char *vec = walk->private; 30 31 /* 32 * Hugepages under user process are always in RAM and never 33 * swapped out, but theoretically it needs to be checked. 34 */ 35 present = pte && !huge_pte_none(huge_ptep_get(pte)); 36 for (; addr != end; vec++, addr += PAGE_SIZE) 37 *vec = present; 38 walk->private = vec; 39 #else 40 BUG(); 41 #endif 42 return 0; 43 } 44 45 static int mincore_unmapped_range(unsigned long addr, unsigned long end, 46 struct mm_walk *walk) 47 { 48 unsigned char *vec = walk->private; 49 unsigned long nr = (end - addr) >> PAGE_SHIFT; 50 51 memset(vec, 0, nr); 52 walk->private += nr; 53 return 0; 54 } 55 56 static int mincore_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, 57 struct mm_walk *walk) 58 { 59 spinlock_t *ptl; 60 struct vm_area_struct *vma = walk->vma; 61 pte_t *ptep; 62 unsigned char *vec = walk->private; 63 int nr = (end - addr) >> PAGE_SHIFT; 64 65 ptl = pmd_trans_huge_lock(pmd, vma); 66 if (ptl) { 67 memset(vec, 1, nr); 68 spin_unlock(ptl); 69 goto out; 70 } 71 72 /* We'll consider a THP page under construction to be there */ 73 if (pmd_trans_unstable(pmd)) { 74 memset(vec, 1, nr); 75 goto out; 76 } 77 78 ptep = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); 79 for (; addr != end; ptep++, addr += PAGE_SIZE) { 80 pte_t pte = *ptep; 81 82 if (pte_none(pte)) 83 *vec = 0; 84 else if (pte_present(pte)) 85 *vec = 1; 86 else { /* pte is a swap entry */ 87 swp_entry_t entry = pte_to_swp_entry(pte); 88 89 /* 90 * migration or hwpoison entries are always 91 * uptodate 92 */ 93 *vec = !!non_swap_entry(entry); 94 } 95 vec++; 96 } 97 pte_unmap_unlock(ptep - 1, ptl); 98 out: 99 walk->private += nr; 100 cond_resched(); 101 return 0; 102 } 103 104 /* 105 * Do a chunk of "sys_mincore()". We've already checked 106 * all the arguments, we hold the mmap semaphore: we should 107 * just return the amount of info we're asked for. 108 */ 109 static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *vec) 110 { 111 struct vm_area_struct *vma; 112 unsigned long end; 113 int err; 114 struct mm_walk mincore_walk = { 115 .pmd_entry = mincore_pte_range, 116 .pte_hole = mincore_unmapped_range, 117 .hugetlb_entry = mincore_hugetlb, 118 .private = vec, 119 }; 120 121 vma = find_vma(current->mm, addr); 122 if (!vma || addr < vma->vm_start) 123 return -ENOMEM; 124 mincore_walk.mm = vma->vm_mm; 125 end = min(vma->vm_end, addr + (pages << PAGE_SHIFT)); 126 err = walk_page_range(addr, end, &mincore_walk); 127 if (err < 0) 128 return err; 129 return (end - addr) >> PAGE_SHIFT; 130 } 131 132 /* 133 * The mincore(2) system call. 134 * 135 * mincore() returns the memory residency status of the pages in the 136 * current process's address space specified by [addr, addr + len). 137 * The status is returned in a vector of bytes. The least significant 138 * bit of each byte is 1 if the referenced page is in memory, otherwise 139 * it is zero. 140 * 141 * Because the status of a page can change after mincore() checks it 142 * but before it returns to the application, the returned vector may 143 * contain stale information. Only locked pages are guaranteed to 144 * remain in memory. 145 * 146 * return values: 147 * zero - success 148 * -EFAULT - vec points to an illegal address 149 * -EINVAL - addr is not a multiple of PAGE_SIZE 150 * -ENOMEM - Addresses in the range [addr, addr + len] are 151 * invalid for the address space of this process, or 152 * specify one or more pages which are not currently 153 * mapped 154 * -EAGAIN - A kernel resource was temporarily unavailable. 155 */ 156 SYSCALL_DEFINE3(mincore, unsigned long, start, size_t, len, 157 unsigned char __user *, vec) 158 { 159 long retval; 160 unsigned long pages; 161 unsigned char *tmp; 162 163 /* Check the start address: needs to be page-aligned.. */ 164 if (start & ~PAGE_MASK) 165 return -EINVAL; 166 167 /* ..and we need to be passed a valid user-space range */ 168 if (!access_ok((void __user *) start, len)) 169 return -ENOMEM; 170 171 /* This also avoids any overflows on PAGE_ALIGN */ 172 pages = len >> PAGE_SHIFT; 173 pages += (offset_in_page(len)) != 0; 174 175 if (!access_ok(vec, pages)) 176 return -EFAULT; 177 178 tmp = (void *) __get_free_page(GFP_USER); 179 if (!tmp) 180 return -EAGAIN; 181 182 retval = 0; 183 while (pages) { 184 /* 185 * Do at most PAGE_SIZE entries per iteration, due to 186 * the temporary buffer size. 187 */ 188 down_read(¤t->mm->mmap_sem); 189 retval = do_mincore(start, min(pages, PAGE_SIZE), tmp); 190 up_read(¤t->mm->mmap_sem); 191 192 if (retval <= 0) 193 break; 194 if (copy_to_user(vec, tmp, retval)) { 195 retval = -EFAULT; 196 break; 197 } 198 pages -= retval; 199 vec += retval; 200 start += retval << PAGE_SHIFT; 201 retval = 0; 202 } 203 free_page((unsigned long) tmp); 204 return retval; 205 } 206