1 /* 2 * mm/mprotect.c 3 * 4 * (C) Copyright 1994 Linus Torvalds 5 * (C) Copyright 2002 Christoph Hellwig 6 * 7 * Address space accounting code <alan@lxorguk.ukuu.org.uk> 8 * (C) Copyright 2002 Red Hat Inc, All Rights Reserved 9 */ 10 11 #include <linux/mm.h> 12 #include <linux/hugetlb.h> 13 #include <linux/shm.h> 14 #include <linux/mman.h> 15 #include <linux/fs.h> 16 #include <linux/highmem.h> 17 #include <linux/security.h> 18 #include <linux/mempolicy.h> 19 #include <linux/personality.h> 20 #include <linux/syscalls.h> 21 #include <linux/swap.h> 22 #include <linux/swapops.h> 23 #include <linux/mmu_notifier.h> 24 #include <linux/migrate.h> 25 #include <linux/perf_event.h> 26 #include <linux/ksm.h> 27 #include <asm/uaccess.h> 28 #include <asm/pgtable.h> 29 #include <asm/cacheflush.h> 30 #include <asm/tlbflush.h> 31 32 #ifndef pgprot_modify 33 static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) 34 { 35 return newprot; 36 } 37 #endif 38 39 static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, 40 unsigned long addr, unsigned long end, pgprot_t newprot, 41 int dirty_accountable, int prot_numa) 42 { 43 struct mm_struct *mm = vma->vm_mm; 44 pte_t *pte, oldpte; 45 spinlock_t *ptl; 46 unsigned long pages = 0; 47 48 pte = pte_offset_map_lock(mm, pmd, addr, &ptl); 49 arch_enter_lazy_mmu_mode(); 50 do { 51 oldpte = *pte; 52 if (pte_present(oldpte)) { 53 pte_t ptent; 54 bool updated = false; 55 56 if (!prot_numa) { 57 ptent = ptep_modify_prot_start(mm, addr, pte); 58 if (pte_numa(ptent)) 59 ptent = pte_mknonnuma(ptent); 60 ptent = pte_modify(ptent, newprot); 61 /* 62 * Avoid taking write faults for pages we 63 * know to be dirty. 64 */ 65 if (dirty_accountable && pte_dirty(ptent)) 66 ptent = pte_mkwrite(ptent); 67 ptep_modify_prot_commit(mm, addr, pte, ptent); 68 updated = true; 69 } else { 70 struct page *page; 71 72 page = vm_normal_page(vma, addr, oldpte); 73 if (page && !PageKsm(page)) { 74 if (!pte_numa(oldpte)) { 75 ptep_set_numa(mm, addr, pte); 76 updated = true; 77 } 78 } 79 } 80 if (updated) 81 pages++; 82 } else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) { 83 swp_entry_t entry = pte_to_swp_entry(oldpte); 84 85 if (is_write_migration_entry(entry)) { 86 pte_t newpte; 87 /* 88 * A protection check is difficult so 89 * just be safe and disable write 90 */ 91 make_migration_entry_read(&entry); 92 newpte = swp_entry_to_pte(entry); 93 if (pte_swp_soft_dirty(oldpte)) 94 newpte = pte_swp_mksoft_dirty(newpte); 95 set_pte_at(mm, addr, pte, newpte); 96 97 pages++; 98 } 99 } 100 } while (pte++, addr += PAGE_SIZE, addr != end); 101 arch_leave_lazy_mmu_mode(); 102 pte_unmap_unlock(pte - 1, ptl); 103 104 return pages; 105 } 106 107 static inline unsigned long change_pmd_range(struct vm_area_struct *vma, 108 pud_t *pud, unsigned long addr, unsigned long end, 109 pgprot_t newprot, int dirty_accountable, int prot_numa) 110 { 111 pmd_t *pmd; 112 unsigned long next; 113 unsigned long pages = 0; 114 unsigned long nr_huge_updates = 0; 115 116 pmd = pmd_offset(pud, addr); 117 do { 118 unsigned long this_pages; 119 120 next = pmd_addr_end(addr, end); 121 if (pmd_trans_huge(*pmd)) { 122 if (next - addr != HPAGE_PMD_SIZE) 123 split_huge_page_pmd(vma, addr, pmd); 124 else { 125 int nr_ptes = change_huge_pmd(vma, pmd, addr, 126 newprot, prot_numa); 127 128 if (nr_ptes) { 129 if (nr_ptes == HPAGE_PMD_NR) { 130 pages += HPAGE_PMD_NR; 131 nr_huge_updates++; 132 } 133 continue; 134 } 135 } 136 /* fall through */ 137 } 138 if (pmd_none_or_clear_bad(pmd)) 139 continue; 140 this_pages = change_pte_range(vma, pmd, addr, next, newprot, 141 dirty_accountable, prot_numa); 142 pages += this_pages; 143 } while (pmd++, addr = next, addr != end); 144 145 if (nr_huge_updates) 146 count_vm_numa_events(NUMA_HUGE_PTE_UPDATES, nr_huge_updates); 147 return pages; 148 } 149 150 static inline unsigned long change_pud_range(struct vm_area_struct *vma, 151 pgd_t *pgd, unsigned long addr, unsigned long end, 152 pgprot_t newprot, int dirty_accountable, int prot_numa) 153 { 154 pud_t *pud; 155 unsigned long next; 156 unsigned long pages = 0; 157 158 pud = pud_offset(pgd, addr); 159 do { 160 next = pud_addr_end(addr, end); 161 if (pud_none_or_clear_bad(pud)) 162 continue; 163 pages += change_pmd_range(vma, pud, addr, next, newprot, 164 dirty_accountable, prot_numa); 165 } while (pud++, addr = next, addr != end); 166 167 return pages; 168 } 169 170 static unsigned long change_protection_range(struct vm_area_struct *vma, 171 unsigned long addr, unsigned long end, pgprot_t newprot, 172 int dirty_accountable, int prot_numa) 173 { 174 struct mm_struct *mm = vma->vm_mm; 175 pgd_t *pgd; 176 unsigned long next; 177 unsigned long start = addr; 178 unsigned long pages = 0; 179 180 BUG_ON(addr >= end); 181 pgd = pgd_offset(mm, addr); 182 flush_cache_range(vma, addr, end); 183 set_tlb_flush_pending(mm); 184 do { 185 next = pgd_addr_end(addr, end); 186 if (pgd_none_or_clear_bad(pgd)) 187 continue; 188 pages += change_pud_range(vma, pgd, addr, next, newprot, 189 dirty_accountable, prot_numa); 190 } while (pgd++, addr = next, addr != end); 191 192 /* Only flush the TLB if we actually modified any entries: */ 193 if (pages) 194 flush_tlb_range(vma, start, end); 195 clear_tlb_flush_pending(mm); 196 197 return pages; 198 } 199 200 unsigned long change_protection(struct vm_area_struct *vma, unsigned long start, 201 unsigned long end, pgprot_t newprot, 202 int dirty_accountable, int prot_numa) 203 { 204 struct mm_struct *mm = vma->vm_mm; 205 unsigned long pages; 206 207 mmu_notifier_invalidate_range_start(mm, start, end); 208 if (is_vm_hugetlb_page(vma)) 209 pages = hugetlb_change_protection(vma, start, end, newprot); 210 else 211 pages = change_protection_range(vma, start, end, newprot, dirty_accountable, prot_numa); 212 mmu_notifier_invalidate_range_end(mm, start, end); 213 214 return pages; 215 } 216 217 int 218 mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, 219 unsigned long start, unsigned long end, unsigned long newflags) 220 { 221 struct mm_struct *mm = vma->vm_mm; 222 unsigned long oldflags = vma->vm_flags; 223 long nrpages = (end - start) >> PAGE_SHIFT; 224 unsigned long charged = 0; 225 pgoff_t pgoff; 226 int error; 227 int dirty_accountable = 0; 228 229 if (newflags == oldflags) { 230 *pprev = vma; 231 return 0; 232 } 233 234 /* 235 * If we make a private mapping writable we increase our commit; 236 * but (without finer accounting) cannot reduce our commit if we 237 * make it unwritable again. hugetlb mapping were accounted for 238 * even if read-only so there is no need to account for them here 239 */ 240 if (newflags & VM_WRITE) { 241 if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_HUGETLB| 242 VM_SHARED|VM_NORESERVE))) { 243 charged = nrpages; 244 if (security_vm_enough_memory_mm(mm, charged)) 245 return -ENOMEM; 246 newflags |= VM_ACCOUNT; 247 } 248 } 249 250 /* 251 * First try to merge with previous and/or next vma. 252 */ 253 pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); 254 *pprev = vma_merge(mm, *pprev, start, end, newflags, 255 vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma)); 256 if (*pprev) { 257 vma = *pprev; 258 goto success; 259 } 260 261 *pprev = vma; 262 263 if (start != vma->vm_start) { 264 error = split_vma(mm, vma, start, 1); 265 if (error) 266 goto fail; 267 } 268 269 if (end != vma->vm_end) { 270 error = split_vma(mm, vma, end, 0); 271 if (error) 272 goto fail; 273 } 274 275 success: 276 /* 277 * vm_flags and vm_page_prot are protected by the mmap_sem 278 * held in write mode. 279 */ 280 vma->vm_flags = newflags; 281 vma->vm_page_prot = pgprot_modify(vma->vm_page_prot, 282 vm_get_page_prot(newflags)); 283 284 if (vma_wants_writenotify(vma)) { 285 vma->vm_page_prot = vm_get_page_prot(newflags & ~VM_SHARED); 286 dirty_accountable = 1; 287 } 288 289 change_protection(vma, start, end, vma->vm_page_prot, 290 dirty_accountable, 0); 291 292 vm_stat_account(mm, oldflags, vma->vm_file, -nrpages); 293 vm_stat_account(mm, newflags, vma->vm_file, nrpages); 294 perf_event_mmap(vma); 295 return 0; 296 297 fail: 298 vm_unacct_memory(charged); 299 return error; 300 } 301 302 SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len, 303 unsigned long, prot) 304 { 305 unsigned long vm_flags, nstart, end, tmp, reqprot; 306 struct vm_area_struct *vma, *prev; 307 int error = -EINVAL; 308 const int grows = prot & (PROT_GROWSDOWN|PROT_GROWSUP); 309 prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP); 310 if (grows == (PROT_GROWSDOWN|PROT_GROWSUP)) /* can't be both */ 311 return -EINVAL; 312 313 if (start & ~PAGE_MASK) 314 return -EINVAL; 315 if (!len) 316 return 0; 317 len = PAGE_ALIGN(len); 318 end = start + len; 319 if (end <= start) 320 return -ENOMEM; 321 if (!arch_validate_prot(prot)) 322 return -EINVAL; 323 324 reqprot = prot; 325 /* 326 * Does the application expect PROT_READ to imply PROT_EXEC: 327 */ 328 if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC)) 329 prot |= PROT_EXEC; 330 331 vm_flags = calc_vm_prot_bits(prot); 332 333 down_write(¤t->mm->mmap_sem); 334 335 vma = find_vma(current->mm, start); 336 error = -ENOMEM; 337 if (!vma) 338 goto out; 339 prev = vma->vm_prev; 340 if (unlikely(grows & PROT_GROWSDOWN)) { 341 if (vma->vm_start >= end) 342 goto out; 343 start = vma->vm_start; 344 error = -EINVAL; 345 if (!(vma->vm_flags & VM_GROWSDOWN)) 346 goto out; 347 } else { 348 if (vma->vm_start > start) 349 goto out; 350 if (unlikely(grows & PROT_GROWSUP)) { 351 end = vma->vm_end; 352 error = -EINVAL; 353 if (!(vma->vm_flags & VM_GROWSUP)) 354 goto out; 355 } 356 } 357 if (start > vma->vm_start) 358 prev = vma; 359 360 for (nstart = start ; ; ) { 361 unsigned long newflags; 362 363 /* Here we know that vma->vm_start <= nstart < vma->vm_end. */ 364 365 newflags = vm_flags; 366 newflags |= (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC)); 367 368 /* newflags >> 4 shift VM_MAY% in place of VM_% */ 369 if ((newflags & ~(newflags >> 4)) & (VM_READ | VM_WRITE | VM_EXEC)) { 370 error = -EACCES; 371 goto out; 372 } 373 374 error = security_file_mprotect(vma, reqprot, prot); 375 if (error) 376 goto out; 377 378 tmp = vma->vm_end; 379 if (tmp > end) 380 tmp = end; 381 error = mprotect_fixup(vma, &prev, nstart, tmp, newflags); 382 if (error) 383 goto out; 384 nstart = tmp; 385 386 if (nstart < prev->vm_end) 387 nstart = prev->vm_end; 388 if (nstart >= end) 389 goto out; 390 391 vma = prev->vm_next; 392 if (!vma || vma->vm_start != nstart) { 393 error = -ENOMEM; 394 goto out; 395 } 396 } 397 out: 398 up_write(¤t->mm->mmap_sem); 399 return error; 400 } 401