1 /* 2 * mm/mprotect.c 3 * 4 * (C) Copyright 1994 Linus Torvalds 5 * (C) Copyright 2002 Christoph Hellwig 6 * 7 * Address space accounting code <alan@lxorguk.ukuu.org.uk> 8 * (C) Copyright 2002 Red Hat Inc, All Rights Reserved 9 */ 10 11 #include <linux/mm.h> 12 #include <linux/hugetlb.h> 13 #include <linux/shm.h> 14 #include <linux/mman.h> 15 #include <linux/fs.h> 16 #include <linux/highmem.h> 17 #include <linux/security.h> 18 #include <linux/mempolicy.h> 19 #include <linux/personality.h> 20 #include <linux/syscalls.h> 21 #include <linux/swap.h> 22 #include <linux/swapops.h> 23 #include <linux/mmu_notifier.h> 24 #include <linux/migrate.h> 25 #include <linux/perf_event.h> 26 #include <asm/uaccess.h> 27 #include <asm/pgtable.h> 28 #include <asm/cacheflush.h> 29 #include <asm/tlbflush.h> 30 31 #ifndef pgprot_modify 32 static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) 33 { 34 return newprot; 35 } 36 #endif 37 38 static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, 39 unsigned long addr, unsigned long end, pgprot_t newprot, 40 int dirty_accountable, int prot_numa, bool *ret_all_same_node) 41 { 42 struct mm_struct *mm = vma->vm_mm; 43 pte_t *pte, oldpte; 44 spinlock_t *ptl; 45 unsigned long pages = 0; 46 bool all_same_node = true; 47 int last_nid = -1; 48 49 pte = pte_offset_map_lock(mm, pmd, addr, &ptl); 50 arch_enter_lazy_mmu_mode(); 51 do { 52 oldpte = *pte; 53 if (pte_present(oldpte)) { 54 pte_t ptent; 55 bool updated = false; 56 57 ptent = ptep_modify_prot_start(mm, addr, pte); 58 if (!prot_numa) { 59 ptent = pte_modify(ptent, newprot); 60 updated = true; 61 } else { 62 struct page *page; 63 64 page = vm_normal_page(vma, addr, oldpte); 65 if (page) { 66 int this_nid = page_to_nid(page); 67 if (last_nid == -1) 68 last_nid = this_nid; 69 if (last_nid != this_nid) 70 all_same_node = false; 71 72 /* only check non-shared pages */ 73 if (!pte_numa(oldpte) && 74 page_mapcount(page) == 1) { 75 ptent = pte_mknuma(ptent); 76 updated = true; 77 } 78 } 79 } 80 81 /* 82 * Avoid taking write faults for pages we know to be 83 * dirty. 84 */ 85 if (dirty_accountable && pte_dirty(ptent)) { 86 ptent = pte_mkwrite(ptent); 87 updated = true; 88 } 89 90 if (updated) 91 pages++; 92 ptep_modify_prot_commit(mm, addr, pte, ptent); 93 } else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) { 94 swp_entry_t entry = pte_to_swp_entry(oldpte); 95 96 if (is_write_migration_entry(entry)) { 97 /* 98 * A protection check is difficult so 99 * just be safe and disable write 100 */ 101 make_migration_entry_read(&entry); 102 set_pte_at(mm, addr, pte, 103 swp_entry_to_pte(entry)); 104 } 105 pages++; 106 } 107 } while (pte++, addr += PAGE_SIZE, addr != end); 108 arch_leave_lazy_mmu_mode(); 109 pte_unmap_unlock(pte - 1, ptl); 110 111 *ret_all_same_node = all_same_node; 112 return pages; 113 } 114 115 #ifdef CONFIG_NUMA_BALANCING 116 static inline void change_pmd_protnuma(struct mm_struct *mm, unsigned long addr, 117 pmd_t *pmd) 118 { 119 spin_lock(&mm->page_table_lock); 120 set_pmd_at(mm, addr & PMD_MASK, pmd, pmd_mknuma(*pmd)); 121 spin_unlock(&mm->page_table_lock); 122 } 123 #else 124 static inline void change_pmd_protnuma(struct mm_struct *mm, unsigned long addr, 125 pmd_t *pmd) 126 { 127 BUG(); 128 } 129 #endif /* CONFIG_NUMA_BALANCING */ 130 131 static inline unsigned long change_pmd_range(struct vm_area_struct *vma, 132 pud_t *pud, unsigned long addr, unsigned long end, 133 pgprot_t newprot, int dirty_accountable, int prot_numa) 134 { 135 pmd_t *pmd; 136 unsigned long next; 137 unsigned long pages = 0; 138 bool all_same_node; 139 140 pmd = pmd_offset(pud, addr); 141 do { 142 next = pmd_addr_end(addr, end); 143 if (pmd_trans_huge(*pmd)) { 144 if (next - addr != HPAGE_PMD_SIZE) 145 split_huge_page_pmd(vma, addr, pmd); 146 else if (change_huge_pmd(vma, pmd, addr, newprot, 147 prot_numa)) { 148 pages += HPAGE_PMD_NR; 149 continue; 150 } 151 /* fall through */ 152 } 153 if (pmd_none_or_clear_bad(pmd)) 154 continue; 155 pages += change_pte_range(vma, pmd, addr, next, newprot, 156 dirty_accountable, prot_numa, &all_same_node); 157 158 /* 159 * If we are changing protections for NUMA hinting faults then 160 * set pmd_numa if the examined pages were all on the same 161 * node. This allows a regular PMD to be handled as one fault 162 * and effectively batches the taking of the PTL 163 */ 164 if (prot_numa && all_same_node) 165 change_pmd_protnuma(vma->vm_mm, addr, pmd); 166 } while (pmd++, addr = next, addr != end); 167 168 return pages; 169 } 170 171 static inline unsigned long change_pud_range(struct vm_area_struct *vma, 172 pgd_t *pgd, unsigned long addr, unsigned long end, 173 pgprot_t newprot, int dirty_accountable, int prot_numa) 174 { 175 pud_t *pud; 176 unsigned long next; 177 unsigned long pages = 0; 178 179 pud = pud_offset(pgd, addr); 180 do { 181 next = pud_addr_end(addr, end); 182 if (pud_none_or_clear_bad(pud)) 183 continue; 184 pages += change_pmd_range(vma, pud, addr, next, newprot, 185 dirty_accountable, prot_numa); 186 } while (pud++, addr = next, addr != end); 187 188 return pages; 189 } 190 191 static unsigned long change_protection_range(struct vm_area_struct *vma, 192 unsigned long addr, unsigned long end, pgprot_t newprot, 193 int dirty_accountable, int prot_numa) 194 { 195 struct mm_struct *mm = vma->vm_mm; 196 pgd_t *pgd; 197 unsigned long next; 198 unsigned long start = addr; 199 unsigned long pages = 0; 200 201 BUG_ON(addr >= end); 202 pgd = pgd_offset(mm, addr); 203 flush_cache_range(vma, addr, end); 204 do { 205 next = pgd_addr_end(addr, end); 206 if (pgd_none_or_clear_bad(pgd)) 207 continue; 208 pages += change_pud_range(vma, pgd, addr, next, newprot, 209 dirty_accountable, prot_numa); 210 } while (pgd++, addr = next, addr != end); 211 212 /* Only flush the TLB if we actually modified any entries: */ 213 if (pages) 214 flush_tlb_range(vma, start, end); 215 216 return pages; 217 } 218 219 unsigned long change_protection(struct vm_area_struct *vma, unsigned long start, 220 unsigned long end, pgprot_t newprot, 221 int dirty_accountable, int prot_numa) 222 { 223 struct mm_struct *mm = vma->vm_mm; 224 unsigned long pages; 225 226 mmu_notifier_invalidate_range_start(mm, start, end); 227 if (is_vm_hugetlb_page(vma)) 228 pages = hugetlb_change_protection(vma, start, end, newprot); 229 else 230 pages = change_protection_range(vma, start, end, newprot, dirty_accountable, prot_numa); 231 mmu_notifier_invalidate_range_end(mm, start, end); 232 233 return pages; 234 } 235 236 int 237 mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, 238 unsigned long start, unsigned long end, unsigned long newflags) 239 { 240 struct mm_struct *mm = vma->vm_mm; 241 unsigned long oldflags = vma->vm_flags; 242 long nrpages = (end - start) >> PAGE_SHIFT; 243 unsigned long charged = 0; 244 pgoff_t pgoff; 245 int error; 246 int dirty_accountable = 0; 247 248 if (newflags == oldflags) { 249 *pprev = vma; 250 return 0; 251 } 252 253 /* 254 * If we make a private mapping writable we increase our commit; 255 * but (without finer accounting) cannot reduce our commit if we 256 * make it unwritable again. hugetlb mapping were accounted for 257 * even if read-only so there is no need to account for them here 258 */ 259 if (newflags & VM_WRITE) { 260 if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_HUGETLB| 261 VM_SHARED|VM_NORESERVE))) { 262 charged = nrpages; 263 if (security_vm_enough_memory_mm(mm, charged)) 264 return -ENOMEM; 265 newflags |= VM_ACCOUNT; 266 } 267 } 268 269 /* 270 * First try to merge with previous and/or next vma. 271 */ 272 pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); 273 *pprev = vma_merge(mm, *pprev, start, end, newflags, 274 vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma)); 275 if (*pprev) { 276 vma = *pprev; 277 goto success; 278 } 279 280 *pprev = vma; 281 282 if (start != vma->vm_start) { 283 error = split_vma(mm, vma, start, 1); 284 if (error) 285 goto fail; 286 } 287 288 if (end != vma->vm_end) { 289 error = split_vma(mm, vma, end, 0); 290 if (error) 291 goto fail; 292 } 293 294 success: 295 /* 296 * vm_flags and vm_page_prot are protected by the mmap_sem 297 * held in write mode. 298 */ 299 vma->vm_flags = newflags; 300 vma->vm_page_prot = pgprot_modify(vma->vm_page_prot, 301 vm_get_page_prot(newflags)); 302 303 if (vma_wants_writenotify(vma)) { 304 vma->vm_page_prot = vm_get_page_prot(newflags & ~VM_SHARED); 305 dirty_accountable = 1; 306 } 307 308 change_protection(vma, start, end, vma->vm_page_prot, 309 dirty_accountable, 0); 310 311 vm_stat_account(mm, oldflags, vma->vm_file, -nrpages); 312 vm_stat_account(mm, newflags, vma->vm_file, nrpages); 313 perf_event_mmap(vma); 314 return 0; 315 316 fail: 317 vm_unacct_memory(charged); 318 return error; 319 } 320 321 SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len, 322 unsigned long, prot) 323 { 324 unsigned long vm_flags, nstart, end, tmp, reqprot; 325 struct vm_area_struct *vma, *prev; 326 int error = -EINVAL; 327 const int grows = prot & (PROT_GROWSDOWN|PROT_GROWSUP); 328 prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP); 329 if (grows == (PROT_GROWSDOWN|PROT_GROWSUP)) /* can't be both */ 330 return -EINVAL; 331 332 if (start & ~PAGE_MASK) 333 return -EINVAL; 334 if (!len) 335 return 0; 336 len = PAGE_ALIGN(len); 337 end = start + len; 338 if (end <= start) 339 return -ENOMEM; 340 if (!arch_validate_prot(prot)) 341 return -EINVAL; 342 343 reqprot = prot; 344 /* 345 * Does the application expect PROT_READ to imply PROT_EXEC: 346 */ 347 if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC)) 348 prot |= PROT_EXEC; 349 350 vm_flags = calc_vm_prot_bits(prot); 351 352 down_write(¤t->mm->mmap_sem); 353 354 vma = find_vma(current->mm, start); 355 error = -ENOMEM; 356 if (!vma) 357 goto out; 358 prev = vma->vm_prev; 359 if (unlikely(grows & PROT_GROWSDOWN)) { 360 if (vma->vm_start >= end) 361 goto out; 362 start = vma->vm_start; 363 error = -EINVAL; 364 if (!(vma->vm_flags & VM_GROWSDOWN)) 365 goto out; 366 } else { 367 if (vma->vm_start > start) 368 goto out; 369 if (unlikely(grows & PROT_GROWSUP)) { 370 end = vma->vm_end; 371 error = -EINVAL; 372 if (!(vma->vm_flags & VM_GROWSUP)) 373 goto out; 374 } 375 } 376 if (start > vma->vm_start) 377 prev = vma; 378 379 for (nstart = start ; ; ) { 380 unsigned long newflags; 381 382 /* Here we know that vma->vm_start <= nstart < vma->vm_end. */ 383 384 newflags = vm_flags; 385 newflags |= (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC)); 386 387 /* newflags >> 4 shift VM_MAY% in place of VM_% */ 388 if ((newflags & ~(newflags >> 4)) & (VM_READ | VM_WRITE | VM_EXEC)) { 389 error = -EACCES; 390 goto out; 391 } 392 393 error = security_file_mprotect(vma, reqprot, prot); 394 if (error) 395 goto out; 396 397 tmp = vma->vm_end; 398 if (tmp > end) 399 tmp = end; 400 error = mprotect_fixup(vma, &prev, nstart, tmp, newflags); 401 if (error) 402 goto out; 403 nstart = tmp; 404 405 if (nstart < prev->vm_end) 406 nstart = prev->vm_end; 407 if (nstart >= end) 408 goto out; 409 410 vma = prev->vm_next; 411 if (!vma || vma->vm_start != nstart) { 412 error = -ENOMEM; 413 goto out; 414 } 415 } 416 out: 417 up_write(¤t->mm->mmap_sem); 418 return error; 419 } 420