mprotect.c (6f84981772535e670e4e2df051a672af229b6694) mprotect.c (7d4a8be0c4b2b7ffb367929d2b352651f083806b)
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * mm/mprotect.c
4 *
5 * (C) Copyright 1994 Linus Torvalds
6 * (C) Copyright 2002 Christoph Hellwig
7 *
8 * Address space accounting code <alan@lxorguk.ukuu.org.uk>

--- 66 unchanged lines hidden (view full) ---

75 * needs a real write-fault for writenotify
76 * (see vma_wants_writenotify()). If "dirty", the assumption is that the
77 * FS was already notified and we can simply mark the PTE writable
78 * just like the write-fault handler would do.
79 */
80 return pte_dirty(pte);
81}
82
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * mm/mprotect.c
4 *
5 * (C) Copyright 1994 Linus Torvalds
6 * (C) Copyright 2002 Christoph Hellwig
7 *
8 * Address space accounting code <alan@lxorguk.ukuu.org.uk>

--- 66 unchanged lines hidden (view full) ---

75 * needs a real write-fault for writenotify
76 * (see vma_wants_writenotify()). If "dirty", the assumption is that the
77 * FS was already notified and we can simply mark the PTE writable
78 * just like the write-fault handler would do.
79 */
80 return pte_dirty(pte);
81}
82
83static unsigned long change_pte_range(struct mmu_gather *tlb,
83static long change_pte_range(struct mmu_gather *tlb,
84 struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr,
85 unsigned long end, pgprot_t newprot, unsigned long cp_flags)
86{
87 pte_t *pte, oldpte;
88 spinlock_t *ptl;
84 struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr,
85 unsigned long end, pgprot_t newprot, unsigned long cp_flags)
86{
87 pte_t *pte, oldpte;
88 spinlock_t *ptl;
89 unsigned long pages = 0;
89 long pages = 0;
90 int target_node = NUMA_NO_NODE;
91 bool prot_numa = cp_flags & MM_CP_PROT_NUMA;
92 bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
93 bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;
94
95 tlb_change_page_size(tlb, PAGE_SIZE);
96
97 /*

--- 74 unchanged lines hidden (view full) ---

172 !toptier)
173 xchg_page_access_time(page,
174 jiffies_to_msecs(jiffies));
175 }
176
177 oldpte = ptep_modify_prot_start(vma, addr, pte);
178 ptent = pte_modify(oldpte, newprot);
179
90 int target_node = NUMA_NO_NODE;
91 bool prot_numa = cp_flags & MM_CP_PROT_NUMA;
92 bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
93 bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;
94
95 tlb_change_page_size(tlb, PAGE_SIZE);
96
97 /*

--- 74 unchanged lines hidden (view full) ---

172 !toptier)
173 xchg_page_access_time(page,
174 jiffies_to_msecs(jiffies));
175 }
176
177 oldpte = ptep_modify_prot_start(vma, addr, pte);
178 ptent = pte_modify(oldpte, newprot);
179
180 if (uffd_wp) {
181 ptent = pte_wrprotect(ptent);
180 if (uffd_wp)
182 ptent = pte_mkuffd_wp(ptent);
181 ptent = pte_mkuffd_wp(ptent);
183 } else if (uffd_wp_resolve) {
182 else if (uffd_wp_resolve)
184 ptent = pte_clear_uffd_wp(ptent);
183 ptent = pte_clear_uffd_wp(ptent);
185 }
186
187 /*
188 * In some writable, shared mappings, we might want
189 * to catch actual write access -- see
190 * vma_wants_writenotify().
191 *
192 * In all writable, private mappings, we have to
193 * properly handle COW.

--- 46 unchanged lines hidden (view full) ---

240 } else if (is_writable_device_exclusive_entry(entry)) {
241 entry = make_readable_device_exclusive_entry(
242 swp_offset(entry));
243 newpte = swp_entry_to_pte(entry);
244 if (pte_swp_soft_dirty(oldpte))
245 newpte = pte_swp_mksoft_dirty(newpte);
246 if (pte_swp_uffd_wp(oldpte))
247 newpte = pte_swp_mkuffd_wp(newpte);
184
185 /*
186 * In some writable, shared mappings, we might want
187 * to catch actual write access -- see
188 * vma_wants_writenotify().
189 *
190 * In all writable, private mappings, we have to
191 * properly handle COW.

--- 46 unchanged lines hidden (view full) ---

238 } else if (is_writable_device_exclusive_entry(entry)) {
239 entry = make_readable_device_exclusive_entry(
240 swp_offset(entry));
241 newpte = swp_entry_to_pte(entry);
242 if (pte_swp_soft_dirty(oldpte))
243 newpte = pte_swp_mksoft_dirty(newpte);
244 if (pte_swp_uffd_wp(oldpte))
245 newpte = pte_swp_mkuffd_wp(newpte);
248 } else if (pte_marker_entry_uffd_wp(entry)) {
246 } else if (is_pte_marker_entry(entry)) {
249 /*
247 /*
248 * Ignore swapin errors unconditionally,
249 * because any access should sigbus anyway.
250 */
251 if (is_swapin_error_entry(entry))
252 continue;
253 /*
250 * If this is uffd-wp pte marker and we'd like
251 * to unprotect it, drop it; the next page
252 * fault will trigger without uffd trapping.
253 */
254 if (uffd_wp_resolve) {
255 pte_clear(vma->vm_mm, addr, pte);
256 pages++;
257 }

--- 63 unchanged lines hidden (view full) ---

321uffd_wp_protect_file(struct vm_area_struct *vma, unsigned long cp_flags)
322{
323 return (cp_flags & MM_CP_UFFD_WP) && !vma_is_anonymous(vma);
324}
325
326/*
327 * If wr-protecting the range for file-backed, populate pgtable for the case
328 * when pgtable is empty but page cache exists. When {pte|pmd|...}_alloc()
254 * If this is uffd-wp pte marker and we'd like
255 * to unprotect it, drop it; the next page
256 * fault will trigger without uffd trapping.
257 */
258 if (uffd_wp_resolve) {
259 pte_clear(vma->vm_mm, addr, pte);
260 pages++;
261 }

--- 63 unchanged lines hidden (view full) ---

325uffd_wp_protect_file(struct vm_area_struct *vma, unsigned long cp_flags)
326{
327 return (cp_flags & MM_CP_UFFD_WP) && !vma_is_anonymous(vma);
328}
329
330/*
331 * If wr-protecting the range for file-backed, populate pgtable for the case
332 * when pgtable is empty but page cache exists. When {pte|pmd|...}_alloc()
329 * failed it means no memory, we don't have a better option but stop.
333 * failed we treat it the same way as pgtable allocation failures during
334 * page faults by kicking OOM and returning error.
330 */
331#define change_pmd_prepare(vma, pmd, cp_flags) \
335 */
336#define change_pmd_prepare(vma, pmd, cp_flags) \
332 do { \
337 ({ \
338 long err = 0; \
333 if (unlikely(uffd_wp_protect_file(vma, cp_flags))) { \
339 if (unlikely(uffd_wp_protect_file(vma, cp_flags))) { \
334 if (WARN_ON_ONCE(pte_alloc(vma->vm_mm, pmd))) \
335 break; \
340 if (pte_alloc(vma->vm_mm, pmd)) \
341 err = -ENOMEM; \
336 } \
342 } \
337 } while (0)
343 err; \
344 })
345
338/*
339 * This is the general pud/p4d/pgd version of change_pmd_prepare(). We need to
340 * have separate change_pmd_prepare() because pte_alloc() returns 0 on success,
341 * while {pmd|pud|p4d}_alloc() returns the valid pointer on success.
342 */
343#define change_prepare(vma, high, low, addr, cp_flags) \
346/*
347 * This is the general pud/p4d/pgd version of change_pmd_prepare(). We need to
348 * have separate change_pmd_prepare() because pte_alloc() returns 0 on success,
349 * while {pmd|pud|p4d}_alloc() returns the valid pointer on success.
350 */
351#define change_prepare(vma, high, low, addr, cp_flags) \
344 do { \
352 ({ \
353 long err = 0; \
345 if (unlikely(uffd_wp_protect_file(vma, cp_flags))) { \
346 low##_t *p = low##_alloc(vma->vm_mm, high, addr); \
354 if (unlikely(uffd_wp_protect_file(vma, cp_flags))) { \
355 low##_t *p = low##_alloc(vma->vm_mm, high, addr); \
347 if (WARN_ON_ONCE(p == NULL)) \
348 break; \
356 if (p == NULL) \
357 err = -ENOMEM; \
349 } \
358 } \
350 } while (0)
359 err; \
360 })
351
361
352static inline unsigned long change_pmd_range(struct mmu_gather *tlb,
362static inline long change_pmd_range(struct mmu_gather *tlb,
353 struct vm_area_struct *vma, pud_t *pud, unsigned long addr,
354 unsigned long end, pgprot_t newprot, unsigned long cp_flags)
355{
356 pmd_t *pmd;
357 unsigned long next;
363 struct vm_area_struct *vma, pud_t *pud, unsigned long addr,
364 unsigned long end, pgprot_t newprot, unsigned long cp_flags)
365{
366 pmd_t *pmd;
367 unsigned long next;
358 unsigned long pages = 0;
368 long pages = 0;
359 unsigned long nr_huge_updates = 0;
360 struct mmu_notifier_range range;
361
362 range.start = 0;
363
364 pmd = pmd_offset(pud, addr);
365 do {
369 unsigned long nr_huge_updates = 0;
370 struct mmu_notifier_range range;
371
372 range.start = 0;
373
374 pmd = pmd_offset(pud, addr);
375 do {
366 unsigned long this_pages;
376 long ret;
367
368 next = pmd_addr_end(addr, end);
369
377
378 next = pmd_addr_end(addr, end);
379
370 change_pmd_prepare(vma, pmd, cp_flags);
380 ret = change_pmd_prepare(vma, pmd, cp_flags);
381 if (ret) {
382 pages = ret;
383 break;
384 }
371 /*
372 * Automatic NUMA balancing walks the tables with mmap_lock
373 * held for read. It's possible a parallel update to occur
374 * between pmd_trans_huge() and a pmd_none_or_clear_bad()
375 * check leading to a false positive and clearing.
376 * Hence, it's necessary to atomically read the PMD value
377 * for all the checks.
378 */
379 if (!is_swap_pmd(*pmd) && !pmd_devmap(*pmd) &&
380 pmd_none_or_clear_bad_unless_trans_huge(pmd))
381 goto next;
382
383 /* invoke the mmu notifier if the pmd is populated */
384 if (!range.start) {
385 mmu_notifier_range_init(&range,
386 MMU_NOTIFY_PROTECTION_VMA, 0,
385 /*
386 * Automatic NUMA balancing walks the tables with mmap_lock
387 * held for read. It's possible a parallel update to occur
388 * between pmd_trans_huge() and a pmd_none_or_clear_bad()
389 * check leading to a false positive and clearing.
390 * Hence, it's necessary to atomically read the PMD value
391 * for all the checks.
392 */
393 if (!is_swap_pmd(*pmd) && !pmd_devmap(*pmd) &&
394 pmd_none_or_clear_bad_unless_trans_huge(pmd))
395 goto next;
396
397 /* invoke the mmu notifier if the pmd is populated */
398 if (!range.start) {
399 mmu_notifier_range_init(&range,
400 MMU_NOTIFY_PROTECTION_VMA, 0,
387 vma, vma->vm_mm, addr, end);
401 vma->vm_mm, addr, end);
388 mmu_notifier_invalidate_range_start(&range);
389 }
390
391 if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) {
392 if ((next - addr != HPAGE_PMD_SIZE) ||
393 uffd_wp_protect_file(vma, cp_flags)) {
394 __split_huge_pmd(vma, pmd, addr, false, NULL);
395 /*
396 * For file-backed, the pmd could have been
397 * cleared; make sure pmd populated if
398 * necessary, then fall-through to pte level.
399 */
402 mmu_notifier_invalidate_range_start(&range);
403 }
404
405 if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) {
406 if ((next - addr != HPAGE_PMD_SIZE) ||
407 uffd_wp_protect_file(vma, cp_flags)) {
408 __split_huge_pmd(vma, pmd, addr, false, NULL);
409 /*
410 * For file-backed, the pmd could have been
411 * cleared; make sure pmd populated if
412 * necessary, then fall-through to pte level.
413 */
400 change_pmd_prepare(vma, pmd, cp_flags);
414 ret = change_pmd_prepare(vma, pmd, cp_flags);
415 if (ret) {
416 pages = ret;
417 break;
418 }
401 } else {
402 /*
403 * change_huge_pmd() does not defer TLB flushes,
404 * so no need to propagate the tlb argument.
405 */
406 int nr_ptes = change_huge_pmd(tlb, vma, pmd,
407 addr, newprot, cp_flags);
408

--- 4 unchanged lines hidden (view full) ---

413 }
414
415 /* huge pmd was handled */
416 goto next;
417 }
418 }
419 /* fall through, the trans huge pmd just split */
420 }
419 } else {
420 /*
421 * change_huge_pmd() does not defer TLB flushes,
422 * so no need to propagate the tlb argument.
423 */
424 int nr_ptes = change_huge_pmd(tlb, vma, pmd,
425 addr, newprot, cp_flags);
426

--- 4 unchanged lines hidden (view full) ---

431 }
432
433 /* huge pmd was handled */
434 goto next;
435 }
436 }
437 /* fall through, the trans huge pmd just split */
438 }
421 this_pages = change_pte_range(tlb, vma, pmd, addr, next,
422 newprot, cp_flags);
423 pages += this_pages;
439 pages += change_pte_range(tlb, vma, pmd, addr, next,
440 newprot, cp_flags);
424next:
425 cond_resched();
426 } while (pmd++, addr = next, addr != end);
427
428 if (range.start)
429 mmu_notifier_invalidate_range_end(&range);
430
431 if (nr_huge_updates)
432 count_vm_numa_events(NUMA_HUGE_PTE_UPDATES, nr_huge_updates);
433 return pages;
434}
435
441next:
442 cond_resched();
443 } while (pmd++, addr = next, addr != end);
444
445 if (range.start)
446 mmu_notifier_invalidate_range_end(&range);
447
448 if (nr_huge_updates)
449 count_vm_numa_events(NUMA_HUGE_PTE_UPDATES, nr_huge_updates);
450 return pages;
451}
452
436static inline unsigned long change_pud_range(struct mmu_gather *tlb,
453static inline long change_pud_range(struct mmu_gather *tlb,
437 struct vm_area_struct *vma, p4d_t *p4d, unsigned long addr,
438 unsigned long end, pgprot_t newprot, unsigned long cp_flags)
439{
440 pud_t *pud;
441 unsigned long next;
454 struct vm_area_struct *vma, p4d_t *p4d, unsigned long addr,
455 unsigned long end, pgprot_t newprot, unsigned long cp_flags)
456{
457 pud_t *pud;
458 unsigned long next;
442 unsigned long pages = 0;
459 long pages = 0, ret;
443
444 pud = pud_offset(p4d, addr);
445 do {
446 next = pud_addr_end(addr, end);
460
461 pud = pud_offset(p4d, addr);
462 do {
463 next = pud_addr_end(addr, end);
447 change_prepare(vma, pud, pmd, addr, cp_flags);
464 ret = change_prepare(vma, pud, pmd, addr, cp_flags);
465 if (ret)
466 return ret;
448 if (pud_none_or_clear_bad(pud))
449 continue;
450 pages += change_pmd_range(tlb, vma, pud, addr, next, newprot,
451 cp_flags);
452 } while (pud++, addr = next, addr != end);
453
454 return pages;
455}
456
467 if (pud_none_or_clear_bad(pud))
468 continue;
469 pages += change_pmd_range(tlb, vma, pud, addr, next, newprot,
470 cp_flags);
471 } while (pud++, addr = next, addr != end);
472
473 return pages;
474}
475
457static inline unsigned long change_p4d_range(struct mmu_gather *tlb,
476static inline long change_p4d_range(struct mmu_gather *tlb,
458 struct vm_area_struct *vma, pgd_t *pgd, unsigned long addr,
459 unsigned long end, pgprot_t newprot, unsigned long cp_flags)
460{
461 p4d_t *p4d;
462 unsigned long next;
477 struct vm_area_struct *vma, pgd_t *pgd, unsigned long addr,
478 unsigned long end, pgprot_t newprot, unsigned long cp_flags)
479{
480 p4d_t *p4d;
481 unsigned long next;
463 unsigned long pages = 0;
482 long pages = 0, ret;
464
465 p4d = p4d_offset(pgd, addr);
466 do {
467 next = p4d_addr_end(addr, end);
483
484 p4d = p4d_offset(pgd, addr);
485 do {
486 next = p4d_addr_end(addr, end);
468 change_prepare(vma, p4d, pud, addr, cp_flags);
487 ret = change_prepare(vma, p4d, pud, addr, cp_flags);
488 if (ret)
489 return ret;
469 if (p4d_none_or_clear_bad(p4d))
470 continue;
471 pages += change_pud_range(tlb, vma, p4d, addr, next, newprot,
472 cp_flags);
473 } while (p4d++, addr = next, addr != end);
474
475 return pages;
476}
477
490 if (p4d_none_or_clear_bad(p4d))
491 continue;
492 pages += change_pud_range(tlb, vma, p4d, addr, next, newprot,
493 cp_flags);
494 } while (p4d++, addr = next, addr != end);
495
496 return pages;
497}
498
478static unsigned long change_protection_range(struct mmu_gather *tlb,
499static long change_protection_range(struct mmu_gather *tlb,
479 struct vm_area_struct *vma, unsigned long addr,
480 unsigned long end, pgprot_t newprot, unsigned long cp_flags)
481{
482 struct mm_struct *mm = vma->vm_mm;
483 pgd_t *pgd;
484 unsigned long next;
500 struct vm_area_struct *vma, unsigned long addr,
501 unsigned long end, pgprot_t newprot, unsigned long cp_flags)
502{
503 struct mm_struct *mm = vma->vm_mm;
504 pgd_t *pgd;
505 unsigned long next;
485 unsigned long pages = 0;
506 long pages = 0, ret;
486
487 BUG_ON(addr >= end);
488 pgd = pgd_offset(mm, addr);
489 tlb_start_vma(tlb, vma);
490 do {
491 next = pgd_addr_end(addr, end);
507
508 BUG_ON(addr >= end);
509 pgd = pgd_offset(mm, addr);
510 tlb_start_vma(tlb, vma);
511 do {
512 next = pgd_addr_end(addr, end);
492 change_prepare(vma, pgd, p4d, addr, cp_flags);
513 ret = change_prepare(vma, pgd, p4d, addr, cp_flags);
514 if (ret) {
515 pages = ret;
516 break;
517 }
493 if (pgd_none_or_clear_bad(pgd))
494 continue;
495 pages += change_p4d_range(tlb, vma, pgd, addr, next, newprot,
496 cp_flags);
497 } while (pgd++, addr = next, addr != end);
498
499 tlb_end_vma(tlb, vma);
500
501 return pages;
502}
503
518 if (pgd_none_or_clear_bad(pgd))
519 continue;
520 pages += change_p4d_range(tlb, vma, pgd, addr, next, newprot,
521 cp_flags);
522 } while (pgd++, addr = next, addr != end);
523
524 tlb_end_vma(tlb, vma);
525
526 return pages;
527}
528
504unsigned long change_protection(struct mmu_gather *tlb,
529long change_protection(struct mmu_gather *tlb,
505 struct vm_area_struct *vma, unsigned long start,
530 struct vm_area_struct *vma, unsigned long start,
506 unsigned long end, pgprot_t newprot,
507 unsigned long cp_flags)
531 unsigned long end, unsigned long cp_flags)
508{
532{
509 unsigned long pages;
533 pgprot_t newprot = vma->vm_page_prot;
534 long pages;
510
511 BUG_ON((cp_flags & MM_CP_UFFD_WP_ALL) == MM_CP_UFFD_WP_ALL);
512
535
536 BUG_ON((cp_flags & MM_CP_UFFD_WP_ALL) == MM_CP_UFFD_WP_ALL);
537
538#ifdef CONFIG_NUMA_BALANCING
539 /*
540 * Ordinary protection updates (mprotect, uffd-wp, softdirty tracking)
541 * are expected to reflect their requirements via VMA flags such that
542 * vma_set_page_prot() will adjust vma->vm_page_prot accordingly.
543 */
544 if (cp_flags & MM_CP_PROT_NUMA)
545 newprot = PAGE_NONE;
546#else
547 WARN_ON_ONCE(cp_flags & MM_CP_PROT_NUMA);
548#endif
549
513 if (is_vm_hugetlb_page(vma))
514 pages = hugetlb_change_protection(vma, start, end, newprot,
515 cp_flags);
516 else
517 pages = change_protection_range(tlb, vma, start, end, newprot,
518 cp_flags);
519
520 return pages;

--- 112 unchanged lines hidden (view full) ---

633 * vm_flags and vm_page_prot are protected by the mmap_lock
634 * held in write mode.
635 */
636 vma->vm_flags = newflags;
637 if (vma_wants_manual_pte_write_upgrade(vma))
638 mm_cp_flags |= MM_CP_TRY_CHANGE_WRITABLE;
639 vma_set_page_prot(vma);
640
550 if (is_vm_hugetlb_page(vma))
551 pages = hugetlb_change_protection(vma, start, end, newprot,
552 cp_flags);
553 else
554 pages = change_protection_range(tlb, vma, start, end, newprot,
555 cp_flags);
556
557 return pages;

--- 112 unchanged lines hidden (view full) ---

670 * vm_flags and vm_page_prot are protected by the mmap_lock
671 * held in write mode.
672 */
673 vma->vm_flags = newflags;
674 if (vma_wants_manual_pte_write_upgrade(vma))
675 mm_cp_flags |= MM_CP_TRY_CHANGE_WRITABLE;
676 vma_set_page_prot(vma);
677
641 change_protection(tlb, vma, start, end, vma->vm_page_prot, mm_cp_flags);
678 change_protection(tlb, vma, start, end, mm_cp_flags);
642
643 /*
644 * Private VM_LOCKED VMA becoming writable: trigger COW to avoid major
645 * fault on access.
646 */
647 if ((oldflags & (VM_WRITE | VM_SHARED | VM_LOCKED)) == VM_LOCKED &&
648 (newflags & VM_WRITE)) {
649 populate_vma_page_range(vma, start, end, NULL);

--- 219 unchanged lines hidden ---
679
680 /*
681 * Private VM_LOCKED VMA becoming writable: trigger COW to avoid major
682 * fault on access.
683 */
684 if ((oldflags & (VM_WRITE | VM_SHARED | VM_LOCKED)) == VM_LOCKED &&
685 (newflags & VM_WRITE)) {
686 populate_vma_page_range(vma, start, end, NULL);

--- 219 unchanged lines hidden ---