mprotect.c (6f84981772535e670e4e2df051a672af229b6694) | mprotect.c (7d4a8be0c4b2b7ffb367929d2b352651f083806b) |
---|---|
1// SPDX-License-Identifier: GPL-2.0 2/* 3 * mm/mprotect.c 4 * 5 * (C) Copyright 1994 Linus Torvalds 6 * (C) Copyright 2002 Christoph Hellwig 7 * 8 * Address space accounting code <alan@lxorguk.ukuu.org.uk> --- 66 unchanged lines hidden (view full) --- 75 * needs a real write-fault for writenotify 76 * (see vma_wants_writenotify()). If "dirty", the assumption is that the 77 * FS was already notified and we can simply mark the PTE writable 78 * just like the write-fault handler would do. 79 */ 80 return pte_dirty(pte); 81} 82 | 1// SPDX-License-Identifier: GPL-2.0 2/* 3 * mm/mprotect.c 4 * 5 * (C) Copyright 1994 Linus Torvalds 6 * (C) Copyright 2002 Christoph Hellwig 7 * 8 * Address space accounting code <alan@lxorguk.ukuu.org.uk> --- 66 unchanged lines hidden (view full) --- 75 * needs a real write-fault for writenotify 76 * (see vma_wants_writenotify()). If "dirty", the assumption is that the 77 * FS was already notified and we can simply mark the PTE writable 78 * just like the write-fault handler would do. 79 */ 80 return pte_dirty(pte); 81} 82 |
83static unsigned long change_pte_range(struct mmu_gather *tlb, | 83static long change_pte_range(struct mmu_gather *tlb, |
84 struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, 85 unsigned long end, pgprot_t newprot, unsigned long cp_flags) 86{ 87 pte_t *pte, oldpte; 88 spinlock_t *ptl; | 84 struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, 85 unsigned long end, pgprot_t newprot, unsigned long cp_flags) 86{ 87 pte_t *pte, oldpte; 88 spinlock_t *ptl; |
89 unsigned long pages = 0; | 89 long pages = 0; |
90 int target_node = NUMA_NO_NODE; 91 bool prot_numa = cp_flags & MM_CP_PROT_NUMA; 92 bool uffd_wp = cp_flags & MM_CP_UFFD_WP; 93 bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE; 94 95 tlb_change_page_size(tlb, PAGE_SIZE); 96 97 /* --- 74 unchanged lines hidden (view full) --- 172 !toptier) 173 xchg_page_access_time(page, 174 jiffies_to_msecs(jiffies)); 175 } 176 177 oldpte = ptep_modify_prot_start(vma, addr, pte); 178 ptent = pte_modify(oldpte, newprot); 179 | 90 int target_node = NUMA_NO_NODE; 91 bool prot_numa = cp_flags & MM_CP_PROT_NUMA; 92 bool uffd_wp = cp_flags & MM_CP_UFFD_WP; 93 bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE; 94 95 tlb_change_page_size(tlb, PAGE_SIZE); 96 97 /* --- 74 unchanged lines hidden (view full) --- 172 !toptier) 173 xchg_page_access_time(page, 174 jiffies_to_msecs(jiffies)); 175 } 176 177 oldpte = ptep_modify_prot_start(vma, addr, pte); 178 ptent = pte_modify(oldpte, newprot); 179 |
180 if (uffd_wp) { 181 ptent = pte_wrprotect(ptent); | 180 if (uffd_wp) |
182 ptent = pte_mkuffd_wp(ptent); | 181 ptent = pte_mkuffd_wp(ptent); |
183 } else if (uffd_wp_resolve) { | 182 else if (uffd_wp_resolve) |
184 ptent = pte_clear_uffd_wp(ptent); | 183 ptent = pte_clear_uffd_wp(ptent); |
185 } | |
186 187 /* 188 * In some writable, shared mappings, we might want 189 * to catch actual write access -- see 190 * vma_wants_writenotify(). 191 * 192 * In all writable, private mappings, we have to 193 * properly handle COW. --- 46 unchanged lines hidden (view full) --- 240 } else if (is_writable_device_exclusive_entry(entry)) { 241 entry = make_readable_device_exclusive_entry( 242 swp_offset(entry)); 243 newpte = swp_entry_to_pte(entry); 244 if (pte_swp_soft_dirty(oldpte)) 245 newpte = pte_swp_mksoft_dirty(newpte); 246 if (pte_swp_uffd_wp(oldpte)) 247 newpte = pte_swp_mkuffd_wp(newpte); | 184 185 /* 186 * In some writable, shared mappings, we might want 187 * to catch actual write access -- see 188 * vma_wants_writenotify(). 189 * 190 * In all writable, private mappings, we have to 191 * properly handle COW. --- 46 unchanged lines hidden (view full) --- 238 } else if (is_writable_device_exclusive_entry(entry)) { 239 entry = make_readable_device_exclusive_entry( 240 swp_offset(entry)); 241 newpte = swp_entry_to_pte(entry); 242 if (pte_swp_soft_dirty(oldpte)) 243 newpte = pte_swp_mksoft_dirty(newpte); 244 if (pte_swp_uffd_wp(oldpte)) 245 newpte = pte_swp_mkuffd_wp(newpte); |
248 } else if (pte_marker_entry_uffd_wp(entry)) { | 246 } else if (is_pte_marker_entry(entry)) { |
249 /* | 247 /* |
248 * Ignore swapin errors unconditionally, 249 * because any access should sigbus anyway. 250 */ 251 if (is_swapin_error_entry(entry)) 252 continue; 253 /* |
|
250 * If this is uffd-wp pte marker and we'd like 251 * to unprotect it, drop it; the next page 252 * fault will trigger without uffd trapping. 253 */ 254 if (uffd_wp_resolve) { 255 pte_clear(vma->vm_mm, addr, pte); 256 pages++; 257 } --- 63 unchanged lines hidden (view full) --- 321uffd_wp_protect_file(struct vm_area_struct *vma, unsigned long cp_flags) 322{ 323 return (cp_flags & MM_CP_UFFD_WP) && !vma_is_anonymous(vma); 324} 325 326/* 327 * If wr-protecting the range for file-backed, populate pgtable for the case 328 * when pgtable is empty but page cache exists. When {pte|pmd|...}_alloc() | 254 * If this is uffd-wp pte marker and we'd like 255 * to unprotect it, drop it; the next page 256 * fault will trigger without uffd trapping. 257 */ 258 if (uffd_wp_resolve) { 259 pte_clear(vma->vm_mm, addr, pte); 260 pages++; 261 } --- 63 unchanged lines hidden (view full) --- 325uffd_wp_protect_file(struct vm_area_struct *vma, unsigned long cp_flags) 326{ 327 return (cp_flags & MM_CP_UFFD_WP) && !vma_is_anonymous(vma); 328} 329 330/* 331 * If wr-protecting the range for file-backed, populate pgtable for the case 332 * when pgtable is empty but page cache exists. When {pte|pmd|...}_alloc() |
329 * failed it means no memory, we don't have a better option but stop. | 333 * failed we treat it the same way as pgtable allocation failures during 334 * page faults by kicking OOM and returning error. |
330 */ 331#define change_pmd_prepare(vma, pmd, cp_flags) \ | 335 */ 336#define change_pmd_prepare(vma, pmd, cp_flags) \ |
332 do { \ | 337 ({ \ 338 long err = 0; \ |
333 if (unlikely(uffd_wp_protect_file(vma, cp_flags))) { \ | 339 if (unlikely(uffd_wp_protect_file(vma, cp_flags))) { \ |
334 if (WARN_ON_ONCE(pte_alloc(vma->vm_mm, pmd))) \ 335 break; \ | 340 if (pte_alloc(vma->vm_mm, pmd)) \ 341 err = -ENOMEM; \ |
336 } \ | 342 } \ |
337 } while (0) | 343 err; \ 344 }) 345 |
338/* 339 * This is the general pud/p4d/pgd version of change_pmd_prepare(). We need to 340 * have separate change_pmd_prepare() because pte_alloc() returns 0 on success, 341 * while {pmd|pud|p4d}_alloc() returns the valid pointer on success. 342 */ 343#define change_prepare(vma, high, low, addr, cp_flags) \ | 346/* 347 * This is the general pud/p4d/pgd version of change_pmd_prepare(). We need to 348 * have separate change_pmd_prepare() because pte_alloc() returns 0 on success, 349 * while {pmd|pud|p4d}_alloc() returns the valid pointer on success. 350 */ 351#define change_prepare(vma, high, low, addr, cp_flags) \ |
344 do { \ | 352 ({ \ 353 long err = 0; \ |
345 if (unlikely(uffd_wp_protect_file(vma, cp_flags))) { \ 346 low##_t *p = low##_alloc(vma->vm_mm, high, addr); \ | 354 if (unlikely(uffd_wp_protect_file(vma, cp_flags))) { \ 355 low##_t *p = low##_alloc(vma->vm_mm, high, addr); \ |
347 if (WARN_ON_ONCE(p == NULL)) \ 348 break; \ | 356 if (p == NULL) \ 357 err = -ENOMEM; \ |
349 } \ | 358 } \ |
350 } while (0) | 359 err; \ 360 }) |
351 | 361 |
352static inline unsigned long change_pmd_range(struct mmu_gather *tlb, | 362static inline long change_pmd_range(struct mmu_gather *tlb, |
353 struct vm_area_struct *vma, pud_t *pud, unsigned long addr, 354 unsigned long end, pgprot_t newprot, unsigned long cp_flags) 355{ 356 pmd_t *pmd; 357 unsigned long next; | 363 struct vm_area_struct *vma, pud_t *pud, unsigned long addr, 364 unsigned long end, pgprot_t newprot, unsigned long cp_flags) 365{ 366 pmd_t *pmd; 367 unsigned long next; |
358 unsigned long pages = 0; | 368 long pages = 0; |
359 unsigned long nr_huge_updates = 0; 360 struct mmu_notifier_range range; 361 362 range.start = 0; 363 364 pmd = pmd_offset(pud, addr); 365 do { | 369 unsigned long nr_huge_updates = 0; 370 struct mmu_notifier_range range; 371 372 range.start = 0; 373 374 pmd = pmd_offset(pud, addr); 375 do { |
366 unsigned long this_pages; | 376 long ret; |
367 368 next = pmd_addr_end(addr, end); 369 | 377 378 next = pmd_addr_end(addr, end); 379 |
370 change_pmd_prepare(vma, pmd, cp_flags); | 380 ret = change_pmd_prepare(vma, pmd, cp_flags); 381 if (ret) { 382 pages = ret; 383 break; 384 } |
371 /* 372 * Automatic NUMA balancing walks the tables with mmap_lock 373 * held for read. It's possible a parallel update to occur 374 * between pmd_trans_huge() and a pmd_none_or_clear_bad() 375 * check leading to a false positive and clearing. 376 * Hence, it's necessary to atomically read the PMD value 377 * for all the checks. 378 */ 379 if (!is_swap_pmd(*pmd) && !pmd_devmap(*pmd) && 380 pmd_none_or_clear_bad_unless_trans_huge(pmd)) 381 goto next; 382 383 /* invoke the mmu notifier if the pmd is populated */ 384 if (!range.start) { 385 mmu_notifier_range_init(&range, 386 MMU_NOTIFY_PROTECTION_VMA, 0, | 385 /* 386 * Automatic NUMA balancing walks the tables with mmap_lock 387 * held for read. It's possible a parallel update to occur 388 * between pmd_trans_huge() and a pmd_none_or_clear_bad() 389 * check leading to a false positive and clearing. 390 * Hence, it's necessary to atomically read the PMD value 391 * for all the checks. 392 */ 393 if (!is_swap_pmd(*pmd) && !pmd_devmap(*pmd) && 394 pmd_none_or_clear_bad_unless_trans_huge(pmd)) 395 goto next; 396 397 /* invoke the mmu notifier if the pmd is populated */ 398 if (!range.start) { 399 mmu_notifier_range_init(&range, 400 MMU_NOTIFY_PROTECTION_VMA, 0, |
387 vma, vma->vm_mm, addr, end); | 401 vma->vm_mm, addr, end); |
388 mmu_notifier_invalidate_range_start(&range); 389 } 390 391 if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) { 392 if ((next - addr != HPAGE_PMD_SIZE) || 393 uffd_wp_protect_file(vma, cp_flags)) { 394 __split_huge_pmd(vma, pmd, addr, false, NULL); 395 /* 396 * For file-backed, the pmd could have been 397 * cleared; make sure pmd populated if 398 * necessary, then fall-through to pte level. 399 */ | 402 mmu_notifier_invalidate_range_start(&range); 403 } 404 405 if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) { 406 if ((next - addr != HPAGE_PMD_SIZE) || 407 uffd_wp_protect_file(vma, cp_flags)) { 408 __split_huge_pmd(vma, pmd, addr, false, NULL); 409 /* 410 * For file-backed, the pmd could have been 411 * cleared; make sure pmd populated if 412 * necessary, then fall-through to pte level. 413 */ |
400 change_pmd_prepare(vma, pmd, cp_flags); | 414 ret = change_pmd_prepare(vma, pmd, cp_flags); 415 if (ret) { 416 pages = ret; 417 break; 418 } |
401 } else { 402 /* 403 * change_huge_pmd() does not defer TLB flushes, 404 * so no need to propagate the tlb argument. 405 */ 406 int nr_ptes = change_huge_pmd(tlb, vma, pmd, 407 addr, newprot, cp_flags); 408 --- 4 unchanged lines hidden (view full) --- 413 } 414 415 /* huge pmd was handled */ 416 goto next; 417 } 418 } 419 /* fall through, the trans huge pmd just split */ 420 } | 419 } else { 420 /* 421 * change_huge_pmd() does not defer TLB flushes, 422 * so no need to propagate the tlb argument. 423 */ 424 int nr_ptes = change_huge_pmd(tlb, vma, pmd, 425 addr, newprot, cp_flags); 426 --- 4 unchanged lines hidden (view full) --- 431 } 432 433 /* huge pmd was handled */ 434 goto next; 435 } 436 } 437 /* fall through, the trans huge pmd just split */ 438 } |
421 this_pages = change_pte_range(tlb, vma, pmd, addr, next, 422 newprot, cp_flags); 423 pages += this_pages; | 439 pages += change_pte_range(tlb, vma, pmd, addr, next, 440 newprot, cp_flags); |
424next: 425 cond_resched(); 426 } while (pmd++, addr = next, addr != end); 427 428 if (range.start) 429 mmu_notifier_invalidate_range_end(&range); 430 431 if (nr_huge_updates) 432 count_vm_numa_events(NUMA_HUGE_PTE_UPDATES, nr_huge_updates); 433 return pages; 434} 435 | 441next: 442 cond_resched(); 443 } while (pmd++, addr = next, addr != end); 444 445 if (range.start) 446 mmu_notifier_invalidate_range_end(&range); 447 448 if (nr_huge_updates) 449 count_vm_numa_events(NUMA_HUGE_PTE_UPDATES, nr_huge_updates); 450 return pages; 451} 452 |
436static inline unsigned long change_pud_range(struct mmu_gather *tlb, | 453static inline long change_pud_range(struct mmu_gather *tlb, |
437 struct vm_area_struct *vma, p4d_t *p4d, unsigned long addr, 438 unsigned long end, pgprot_t newprot, unsigned long cp_flags) 439{ 440 pud_t *pud; 441 unsigned long next; | 454 struct vm_area_struct *vma, p4d_t *p4d, unsigned long addr, 455 unsigned long end, pgprot_t newprot, unsigned long cp_flags) 456{ 457 pud_t *pud; 458 unsigned long next; |
442 unsigned long pages = 0; | 459 long pages = 0, ret; |
443 444 pud = pud_offset(p4d, addr); 445 do { 446 next = pud_addr_end(addr, end); | 460 461 pud = pud_offset(p4d, addr); 462 do { 463 next = pud_addr_end(addr, end); |
447 change_prepare(vma, pud, pmd, addr, cp_flags); | 464 ret = change_prepare(vma, pud, pmd, addr, cp_flags); 465 if (ret) 466 return ret; |
448 if (pud_none_or_clear_bad(pud)) 449 continue; 450 pages += change_pmd_range(tlb, vma, pud, addr, next, newprot, 451 cp_flags); 452 } while (pud++, addr = next, addr != end); 453 454 return pages; 455} 456 | 467 if (pud_none_or_clear_bad(pud)) 468 continue; 469 pages += change_pmd_range(tlb, vma, pud, addr, next, newprot, 470 cp_flags); 471 } while (pud++, addr = next, addr != end); 472 473 return pages; 474} 475 |
457static inline unsigned long change_p4d_range(struct mmu_gather *tlb, | 476static inline long change_p4d_range(struct mmu_gather *tlb, |
458 struct vm_area_struct *vma, pgd_t *pgd, unsigned long addr, 459 unsigned long end, pgprot_t newprot, unsigned long cp_flags) 460{ 461 p4d_t *p4d; 462 unsigned long next; | 477 struct vm_area_struct *vma, pgd_t *pgd, unsigned long addr, 478 unsigned long end, pgprot_t newprot, unsigned long cp_flags) 479{ 480 p4d_t *p4d; 481 unsigned long next; |
463 unsigned long pages = 0; | 482 long pages = 0, ret; |
464 465 p4d = p4d_offset(pgd, addr); 466 do { 467 next = p4d_addr_end(addr, end); | 483 484 p4d = p4d_offset(pgd, addr); 485 do { 486 next = p4d_addr_end(addr, end); |
468 change_prepare(vma, p4d, pud, addr, cp_flags); | 487 ret = change_prepare(vma, p4d, pud, addr, cp_flags); 488 if (ret) 489 return ret; |
469 if (p4d_none_or_clear_bad(p4d)) 470 continue; 471 pages += change_pud_range(tlb, vma, p4d, addr, next, newprot, 472 cp_flags); 473 } while (p4d++, addr = next, addr != end); 474 475 return pages; 476} 477 | 490 if (p4d_none_or_clear_bad(p4d)) 491 continue; 492 pages += change_pud_range(tlb, vma, p4d, addr, next, newprot, 493 cp_flags); 494 } while (p4d++, addr = next, addr != end); 495 496 return pages; 497} 498 |
478static unsigned long change_protection_range(struct mmu_gather *tlb, | 499static long change_protection_range(struct mmu_gather *tlb, |
479 struct vm_area_struct *vma, unsigned long addr, 480 unsigned long end, pgprot_t newprot, unsigned long cp_flags) 481{ 482 struct mm_struct *mm = vma->vm_mm; 483 pgd_t *pgd; 484 unsigned long next; | 500 struct vm_area_struct *vma, unsigned long addr, 501 unsigned long end, pgprot_t newprot, unsigned long cp_flags) 502{ 503 struct mm_struct *mm = vma->vm_mm; 504 pgd_t *pgd; 505 unsigned long next; |
485 unsigned long pages = 0; | 506 long pages = 0, ret; |
486 487 BUG_ON(addr >= end); 488 pgd = pgd_offset(mm, addr); 489 tlb_start_vma(tlb, vma); 490 do { 491 next = pgd_addr_end(addr, end); | 507 508 BUG_ON(addr >= end); 509 pgd = pgd_offset(mm, addr); 510 tlb_start_vma(tlb, vma); 511 do { 512 next = pgd_addr_end(addr, end); |
492 change_prepare(vma, pgd, p4d, addr, cp_flags); | 513 ret = change_prepare(vma, pgd, p4d, addr, cp_flags); 514 if (ret) { 515 pages = ret; 516 break; 517 } |
493 if (pgd_none_or_clear_bad(pgd)) 494 continue; 495 pages += change_p4d_range(tlb, vma, pgd, addr, next, newprot, 496 cp_flags); 497 } while (pgd++, addr = next, addr != end); 498 499 tlb_end_vma(tlb, vma); 500 501 return pages; 502} 503 | 518 if (pgd_none_or_clear_bad(pgd)) 519 continue; 520 pages += change_p4d_range(tlb, vma, pgd, addr, next, newprot, 521 cp_flags); 522 } while (pgd++, addr = next, addr != end); 523 524 tlb_end_vma(tlb, vma); 525 526 return pages; 527} 528 |
504unsigned long change_protection(struct mmu_gather *tlb, | 529long change_protection(struct mmu_gather *tlb, |
505 struct vm_area_struct *vma, unsigned long start, | 530 struct vm_area_struct *vma, unsigned long start, |
506 unsigned long end, pgprot_t newprot, 507 unsigned long cp_flags) | 531 unsigned long end, unsigned long cp_flags) |
508{ | 532{ |
509 unsigned long pages; | 533 pgprot_t newprot = vma->vm_page_prot; 534 long pages; |
510 511 BUG_ON((cp_flags & MM_CP_UFFD_WP_ALL) == MM_CP_UFFD_WP_ALL); 512 | 535 536 BUG_ON((cp_flags & MM_CP_UFFD_WP_ALL) == MM_CP_UFFD_WP_ALL); 537 |
538#ifdef CONFIG_NUMA_BALANCING 539 /* 540 * Ordinary protection updates (mprotect, uffd-wp, softdirty tracking) 541 * are expected to reflect their requirements via VMA flags such that 542 * vma_set_page_prot() will adjust vma->vm_page_prot accordingly. 543 */ 544 if (cp_flags & MM_CP_PROT_NUMA) 545 newprot = PAGE_NONE; 546#else 547 WARN_ON_ONCE(cp_flags & MM_CP_PROT_NUMA); 548#endif 549 |
|
513 if (is_vm_hugetlb_page(vma)) 514 pages = hugetlb_change_protection(vma, start, end, newprot, 515 cp_flags); 516 else 517 pages = change_protection_range(tlb, vma, start, end, newprot, 518 cp_flags); 519 520 return pages; --- 112 unchanged lines hidden (view full) --- 633 * vm_flags and vm_page_prot are protected by the mmap_lock 634 * held in write mode. 635 */ 636 vma->vm_flags = newflags; 637 if (vma_wants_manual_pte_write_upgrade(vma)) 638 mm_cp_flags |= MM_CP_TRY_CHANGE_WRITABLE; 639 vma_set_page_prot(vma); 640 | 550 if (is_vm_hugetlb_page(vma)) 551 pages = hugetlb_change_protection(vma, start, end, newprot, 552 cp_flags); 553 else 554 pages = change_protection_range(tlb, vma, start, end, newprot, 555 cp_flags); 556 557 return pages; --- 112 unchanged lines hidden (view full) --- 670 * vm_flags and vm_page_prot are protected by the mmap_lock 671 * held in write mode. 672 */ 673 vma->vm_flags = newflags; 674 if (vma_wants_manual_pte_write_upgrade(vma)) 675 mm_cp_flags |= MM_CP_TRY_CHANGE_WRITABLE; 676 vma_set_page_prot(vma); 677 |
641 change_protection(tlb, vma, start, end, vma->vm_page_prot, mm_cp_flags); | 678 change_protection(tlb, vma, start, end, mm_cp_flags); |
642 643 /* 644 * Private VM_LOCKED VMA becoming writable: trigger COW to avoid major 645 * fault on access. 646 */ 647 if ((oldflags & (VM_WRITE | VM_SHARED | VM_LOCKED)) == VM_LOCKED && 648 (newflags & VM_WRITE)) { 649 populate_vma_page_range(vma, start, end, NULL); --- 219 unchanged lines hidden --- | 679 680 /* 681 * Private VM_LOCKED VMA becoming writable: trigger COW to avoid major 682 * fault on access. 683 */ 684 if ((oldflags & (VM_WRITE | VM_SHARED | VM_LOCKED)) == VM_LOCKED && 685 (newflags & VM_WRITE)) { 686 populate_vma_page_range(vma, start, end, NULL); --- 219 unchanged lines hidden --- |