1 /* 2 * mm/mprotect.c 3 * 4 * (C) Copyright 1994 Linus Torvalds 5 * (C) Copyright 2002 Christoph Hellwig 6 * 7 * Address space accounting code <alan@lxorguk.ukuu.org.uk> 8 * (C) Copyright 2002 Red Hat Inc, All Rights Reserved 9 */ 10 11 #include <linux/mm.h> 12 #include <linux/hugetlb.h> 13 #include <linux/shm.h> 14 #include <linux/mman.h> 15 #include <linux/fs.h> 16 #include <linux/highmem.h> 17 #include <linux/security.h> 18 #include <linux/mempolicy.h> 19 #include <linux/personality.h> 20 #include <linux/syscalls.h> 21 #include <linux/swap.h> 22 #include <linux/swapops.h> 23 #include <linux/mmu_notifier.h> 24 #include <linux/migrate.h> 25 #include <linux/perf_event.h> 26 #include <asm/uaccess.h> 27 #include <asm/pgtable.h> 28 #include <asm/cacheflush.h> 29 #include <asm/tlbflush.h> 30 31 #ifndef pgprot_modify 32 static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) 33 { 34 return newprot; 35 } 36 #endif 37 38 static unsigned long change_pte_range(struct mm_struct *mm, pmd_t *pmd, 39 unsigned long addr, unsigned long end, pgprot_t newprot, 40 int dirty_accountable) 41 { 42 pte_t *pte, oldpte; 43 spinlock_t *ptl; 44 unsigned long pages = 0; 45 46 pte = pte_offset_map_lock(mm, pmd, addr, &ptl); 47 arch_enter_lazy_mmu_mode(); 48 do { 49 oldpte = *pte; 50 if (pte_present(oldpte)) { 51 pte_t ptent; 52 53 ptent = ptep_modify_prot_start(mm, addr, pte); 54 ptent = pte_modify(ptent, newprot); 55 56 /* 57 * Avoid taking write faults for pages we know to be 58 * dirty. 59 */ 60 if (dirty_accountable && pte_dirty(ptent)) 61 ptent = pte_mkwrite(ptent); 62 63 ptep_modify_prot_commit(mm, addr, pte, ptent); 64 pages++; 65 } else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) { 66 swp_entry_t entry = pte_to_swp_entry(oldpte); 67 68 if (is_write_migration_entry(entry)) { 69 /* 70 * A protection check is difficult so 71 * just be safe and disable write 72 */ 73 make_migration_entry_read(&entry); 74 set_pte_at(mm, addr, pte, 75 swp_entry_to_pte(entry)); 76 } 77 pages++; 78 } 79 } while (pte++, addr += PAGE_SIZE, addr != end); 80 arch_leave_lazy_mmu_mode(); 81 pte_unmap_unlock(pte - 1, ptl); 82 83 return pages; 84 } 85 86 static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *pud, 87 unsigned long addr, unsigned long end, pgprot_t newprot, 88 int dirty_accountable) 89 { 90 pmd_t *pmd; 91 unsigned long next; 92 unsigned long pages = 0; 93 94 pmd = pmd_offset(pud, addr); 95 do { 96 next = pmd_addr_end(addr, end); 97 if (pmd_trans_huge(*pmd)) { 98 if (next - addr != HPAGE_PMD_SIZE) 99 split_huge_page_pmd(vma->vm_mm, pmd); 100 else if (change_huge_pmd(vma, pmd, addr, newprot)) { 101 pages += HPAGE_PMD_NR; 102 continue; 103 } 104 /* fall through */ 105 } 106 if (pmd_none_or_clear_bad(pmd)) 107 continue; 108 pages += change_pte_range(vma->vm_mm, pmd, addr, next, newprot, 109 dirty_accountable); 110 } while (pmd++, addr = next, addr != end); 111 112 return pages; 113 } 114 115 static inline unsigned long change_pud_range(struct vm_area_struct *vma, pgd_t *pgd, 116 unsigned long addr, unsigned long end, pgprot_t newprot, 117 int dirty_accountable) 118 { 119 pud_t *pud; 120 unsigned long next; 121 unsigned long pages = 0; 122 123 pud = pud_offset(pgd, addr); 124 do { 125 next = pud_addr_end(addr, end); 126 if (pud_none_or_clear_bad(pud)) 127 continue; 128 pages += change_pmd_range(vma, pud, addr, next, newprot, 129 dirty_accountable); 130 } while (pud++, addr = next, addr != end); 131 132 return pages; 133 } 134 135 static unsigned long change_protection_range(struct vm_area_struct *vma, 136 unsigned long addr, unsigned long end, pgprot_t newprot, 137 int dirty_accountable) 138 { 139 struct mm_struct *mm = vma->vm_mm; 140 pgd_t *pgd; 141 unsigned long next; 142 unsigned long start = addr; 143 unsigned long pages = 0; 144 145 BUG_ON(addr >= end); 146 pgd = pgd_offset(mm, addr); 147 flush_cache_range(vma, addr, end); 148 do { 149 next = pgd_addr_end(addr, end); 150 if (pgd_none_or_clear_bad(pgd)) 151 continue; 152 pages += change_pud_range(vma, pgd, addr, next, newprot, 153 dirty_accountable); 154 } while (pgd++, addr = next, addr != end); 155 156 /* Only flush the TLB if we actually modified any entries: */ 157 if (pages) 158 flush_tlb_range(vma, start, end); 159 160 return pages; 161 } 162 163 unsigned long change_protection(struct vm_area_struct *vma, unsigned long start, 164 unsigned long end, pgprot_t newprot, 165 int dirty_accountable) 166 { 167 struct mm_struct *mm = vma->vm_mm; 168 unsigned long pages; 169 170 mmu_notifier_invalidate_range_start(mm, start, end); 171 if (is_vm_hugetlb_page(vma)) 172 pages = hugetlb_change_protection(vma, start, end, newprot); 173 else 174 pages = change_protection_range(vma, start, end, newprot, dirty_accountable); 175 mmu_notifier_invalidate_range_end(mm, start, end); 176 177 return pages; 178 } 179 180 int 181 mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, 182 unsigned long start, unsigned long end, unsigned long newflags) 183 { 184 struct mm_struct *mm = vma->vm_mm; 185 unsigned long oldflags = vma->vm_flags; 186 long nrpages = (end - start) >> PAGE_SHIFT; 187 unsigned long charged = 0; 188 pgoff_t pgoff; 189 int error; 190 int dirty_accountable = 0; 191 192 if (newflags == oldflags) { 193 *pprev = vma; 194 return 0; 195 } 196 197 /* 198 * If we make a private mapping writable we increase our commit; 199 * but (without finer accounting) cannot reduce our commit if we 200 * make it unwritable again. hugetlb mapping were accounted for 201 * even if read-only so there is no need to account for them here 202 */ 203 if (newflags & VM_WRITE) { 204 if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_HUGETLB| 205 VM_SHARED|VM_NORESERVE))) { 206 charged = nrpages; 207 if (security_vm_enough_memory_mm(mm, charged)) 208 return -ENOMEM; 209 newflags |= VM_ACCOUNT; 210 } 211 } 212 213 /* 214 * First try to merge with previous and/or next vma. 215 */ 216 pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); 217 *pprev = vma_merge(mm, *pprev, start, end, newflags, 218 vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma)); 219 if (*pprev) { 220 vma = *pprev; 221 goto success; 222 } 223 224 *pprev = vma; 225 226 if (start != vma->vm_start) { 227 error = split_vma(mm, vma, start, 1); 228 if (error) 229 goto fail; 230 } 231 232 if (end != vma->vm_end) { 233 error = split_vma(mm, vma, end, 0); 234 if (error) 235 goto fail; 236 } 237 238 success: 239 /* 240 * vm_flags and vm_page_prot are protected by the mmap_sem 241 * held in write mode. 242 */ 243 vma->vm_flags = newflags; 244 vma->vm_page_prot = pgprot_modify(vma->vm_page_prot, 245 vm_get_page_prot(newflags)); 246 247 if (vma_wants_writenotify(vma)) { 248 vma->vm_page_prot = vm_get_page_prot(newflags & ~VM_SHARED); 249 dirty_accountable = 1; 250 } 251 252 change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable); 253 254 vm_stat_account(mm, oldflags, vma->vm_file, -nrpages); 255 vm_stat_account(mm, newflags, vma->vm_file, nrpages); 256 perf_event_mmap(vma); 257 return 0; 258 259 fail: 260 vm_unacct_memory(charged); 261 return error; 262 } 263 264 SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len, 265 unsigned long, prot) 266 { 267 unsigned long vm_flags, nstart, end, tmp, reqprot; 268 struct vm_area_struct *vma, *prev; 269 int error = -EINVAL; 270 const int grows = prot & (PROT_GROWSDOWN|PROT_GROWSUP); 271 prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP); 272 if (grows == (PROT_GROWSDOWN|PROT_GROWSUP)) /* can't be both */ 273 return -EINVAL; 274 275 if (start & ~PAGE_MASK) 276 return -EINVAL; 277 if (!len) 278 return 0; 279 len = PAGE_ALIGN(len); 280 end = start + len; 281 if (end <= start) 282 return -ENOMEM; 283 if (!arch_validate_prot(prot)) 284 return -EINVAL; 285 286 reqprot = prot; 287 /* 288 * Does the application expect PROT_READ to imply PROT_EXEC: 289 */ 290 if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC)) 291 prot |= PROT_EXEC; 292 293 vm_flags = calc_vm_prot_bits(prot); 294 295 down_write(¤t->mm->mmap_sem); 296 297 vma = find_vma(current->mm, start); 298 error = -ENOMEM; 299 if (!vma) 300 goto out; 301 prev = vma->vm_prev; 302 if (unlikely(grows & PROT_GROWSDOWN)) { 303 if (vma->vm_start >= end) 304 goto out; 305 start = vma->vm_start; 306 error = -EINVAL; 307 if (!(vma->vm_flags & VM_GROWSDOWN)) 308 goto out; 309 } 310 else { 311 if (vma->vm_start > start) 312 goto out; 313 if (unlikely(grows & PROT_GROWSUP)) { 314 end = vma->vm_end; 315 error = -EINVAL; 316 if (!(vma->vm_flags & VM_GROWSUP)) 317 goto out; 318 } 319 } 320 if (start > vma->vm_start) 321 prev = vma; 322 323 for (nstart = start ; ; ) { 324 unsigned long newflags; 325 326 /* Here we know that vma->vm_start <= nstart < vma->vm_end. */ 327 328 newflags = vm_flags | (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC)); 329 330 /* newflags >> 4 shift VM_MAY% in place of VM_% */ 331 if ((newflags & ~(newflags >> 4)) & (VM_READ | VM_WRITE | VM_EXEC)) { 332 error = -EACCES; 333 goto out; 334 } 335 336 error = security_file_mprotect(vma, reqprot, prot); 337 if (error) 338 goto out; 339 340 tmp = vma->vm_end; 341 if (tmp > end) 342 tmp = end; 343 error = mprotect_fixup(vma, &prev, nstart, tmp, newflags); 344 if (error) 345 goto out; 346 nstart = tmp; 347 348 if (nstart < prev->vm_end) 349 nstart = prev->vm_end; 350 if (nstart >= end) 351 goto out; 352 353 vma = prev->vm_next; 354 if (!vma || vma->vm_start != nstart) { 355 error = -ENOMEM; 356 goto out; 357 } 358 } 359 out: 360 up_write(¤t->mm->mmap_sem); 361 return error; 362 } 363