1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * mm/userfaultfd.c 4 * 5 * Copyright (C) 2015 Red Hat, Inc. 6 */ 7 8 #include <linux/mm.h> 9 #include <linux/sched/signal.h> 10 #include <linux/pagemap.h> 11 #include <linux/rmap.h> 12 #include <linux/swap.h> 13 #include <linux/swapops.h> 14 #include <linux/userfaultfd_k.h> 15 #include <linux/mmu_notifier.h> 16 #include <linux/hugetlb.h> 17 #include <linux/shmem_fs.h> 18 #include <asm/tlbflush.h> 19 #include <asm/tlb.h> 20 #include "internal.h" 21 22 static __always_inline 23 struct vm_area_struct *find_dst_vma(struct mm_struct *dst_mm, 24 unsigned long dst_start, 25 unsigned long len) 26 { 27 /* 28 * Make sure that the dst range is both valid and fully within a 29 * single existing vma. 30 */ 31 struct vm_area_struct *dst_vma; 32 33 dst_vma = find_vma(dst_mm, dst_start); 34 if (!range_in_vma(dst_vma, dst_start, dst_start + len)) 35 return NULL; 36 37 /* 38 * Check the vma is registered in uffd, this is required to 39 * enforce the VM_MAYWRITE check done at uffd registration 40 * time. 41 */ 42 if (!dst_vma->vm_userfaultfd_ctx.ctx) 43 return NULL; 44 45 return dst_vma; 46 } 47 48 /* 49 * Install PTEs, to map dst_addr (within dst_vma) to page. 50 * 51 * This function handles both MCOPY_ATOMIC_NORMAL and _CONTINUE for both shmem 52 * and anon, and for both shared and private VMAs. 53 */ 54 int mfill_atomic_install_pte(pmd_t *dst_pmd, 55 struct vm_area_struct *dst_vma, 56 unsigned long dst_addr, struct page *page, 57 bool newly_allocated, uffd_flags_t flags) 58 { 59 int ret; 60 struct mm_struct *dst_mm = dst_vma->vm_mm; 61 pte_t _dst_pte, *dst_pte; 62 bool writable = dst_vma->vm_flags & VM_WRITE; 63 bool vm_shared = dst_vma->vm_flags & VM_SHARED; 64 bool page_in_cache = page_mapping(page); 65 spinlock_t *ptl; 66 struct folio *folio; 67 struct inode *inode; 68 pgoff_t offset, max_off; 69 70 _dst_pte = mk_pte(page, dst_vma->vm_page_prot); 71 _dst_pte = pte_mkdirty(_dst_pte); 72 if (page_in_cache && !vm_shared) 73 writable = false; 74 if (writable) 75 _dst_pte = pte_mkwrite(_dst_pte); 76 if (flags & MFILL_ATOMIC_WP) 77 _dst_pte = pte_mkuffd_wp(_dst_pte); 78 79 ret = -EAGAIN; 80 dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl); 81 if (!dst_pte) 82 goto out; 83 84 if (vma_is_shmem(dst_vma)) { 85 /* serialize against truncate with the page table lock */ 86 inode = dst_vma->vm_file->f_inode; 87 offset = linear_page_index(dst_vma, dst_addr); 88 max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 89 ret = -EFAULT; 90 if (unlikely(offset >= max_off)) 91 goto out_unlock; 92 } 93 94 ret = -EEXIST; 95 /* 96 * We allow to overwrite a pte marker: consider when both MISSING|WP 97 * registered, we firstly wr-protect a none pte which has no page cache 98 * page backing it, then access the page. 99 */ 100 if (!pte_none_mostly(ptep_get(dst_pte))) 101 goto out_unlock; 102 103 folio = page_folio(page); 104 if (page_in_cache) { 105 /* Usually, cache pages are already added to LRU */ 106 if (newly_allocated) 107 folio_add_lru(folio); 108 page_add_file_rmap(page, dst_vma, false); 109 } else { 110 page_add_new_anon_rmap(page, dst_vma, dst_addr); 111 folio_add_lru_vma(folio, dst_vma); 112 } 113 114 /* 115 * Must happen after rmap, as mm_counter() checks mapping (via 116 * PageAnon()), which is set by __page_set_anon_rmap(). 117 */ 118 inc_mm_counter(dst_mm, mm_counter(page)); 119 120 set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte); 121 122 /* No need to invalidate - it was non-present before */ 123 update_mmu_cache(dst_vma, dst_addr, dst_pte); 124 ret = 0; 125 out_unlock: 126 pte_unmap_unlock(dst_pte, ptl); 127 out: 128 return ret; 129 } 130 131 static int mfill_atomic_pte_copy(pmd_t *dst_pmd, 132 struct vm_area_struct *dst_vma, 133 unsigned long dst_addr, 134 unsigned long src_addr, 135 uffd_flags_t flags, 136 struct folio **foliop) 137 { 138 void *kaddr; 139 int ret; 140 struct folio *folio; 141 142 if (!*foliop) { 143 ret = -ENOMEM; 144 folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, dst_vma, 145 dst_addr, false); 146 if (!folio) 147 goto out; 148 149 kaddr = kmap_local_folio(folio, 0); 150 /* 151 * The read mmap_lock is held here. Despite the 152 * mmap_lock being read recursive a deadlock is still 153 * possible if a writer has taken a lock. For example: 154 * 155 * process A thread 1 takes read lock on own mmap_lock 156 * process A thread 2 calls mmap, blocks taking write lock 157 * process B thread 1 takes page fault, read lock on own mmap lock 158 * process B thread 2 calls mmap, blocks taking write lock 159 * process A thread 1 blocks taking read lock on process B 160 * process B thread 1 blocks taking read lock on process A 161 * 162 * Disable page faults to prevent potential deadlock 163 * and retry the copy outside the mmap_lock. 164 */ 165 pagefault_disable(); 166 ret = copy_from_user(kaddr, (const void __user *) src_addr, 167 PAGE_SIZE); 168 pagefault_enable(); 169 kunmap_local(kaddr); 170 171 /* fallback to copy_from_user outside mmap_lock */ 172 if (unlikely(ret)) { 173 ret = -ENOENT; 174 *foliop = folio; 175 /* don't free the page */ 176 goto out; 177 } 178 179 flush_dcache_folio(folio); 180 } else { 181 folio = *foliop; 182 *foliop = NULL; 183 } 184 185 /* 186 * The memory barrier inside __folio_mark_uptodate makes sure that 187 * preceding stores to the page contents become visible before 188 * the set_pte_at() write. 189 */ 190 __folio_mark_uptodate(folio); 191 192 ret = -ENOMEM; 193 if (mem_cgroup_charge(folio, dst_vma->vm_mm, GFP_KERNEL)) 194 goto out_release; 195 196 ret = mfill_atomic_install_pte(dst_pmd, dst_vma, dst_addr, 197 &folio->page, true, flags); 198 if (ret) 199 goto out_release; 200 out: 201 return ret; 202 out_release: 203 folio_put(folio); 204 goto out; 205 } 206 207 static int mfill_atomic_pte_zeropage(pmd_t *dst_pmd, 208 struct vm_area_struct *dst_vma, 209 unsigned long dst_addr) 210 { 211 pte_t _dst_pte, *dst_pte; 212 spinlock_t *ptl; 213 int ret; 214 pgoff_t offset, max_off; 215 struct inode *inode; 216 217 _dst_pte = pte_mkspecial(pfn_pte(my_zero_pfn(dst_addr), 218 dst_vma->vm_page_prot)); 219 ret = -EAGAIN; 220 dst_pte = pte_offset_map_lock(dst_vma->vm_mm, dst_pmd, dst_addr, &ptl); 221 if (!dst_pte) 222 goto out; 223 if (dst_vma->vm_file) { 224 /* the shmem MAP_PRIVATE case requires checking the i_size */ 225 inode = dst_vma->vm_file->f_inode; 226 offset = linear_page_index(dst_vma, dst_addr); 227 max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 228 ret = -EFAULT; 229 if (unlikely(offset >= max_off)) 230 goto out_unlock; 231 } 232 ret = -EEXIST; 233 if (!pte_none(ptep_get(dst_pte))) 234 goto out_unlock; 235 set_pte_at(dst_vma->vm_mm, dst_addr, dst_pte, _dst_pte); 236 /* No need to invalidate - it was non-present before */ 237 update_mmu_cache(dst_vma, dst_addr, dst_pte); 238 ret = 0; 239 out_unlock: 240 pte_unmap_unlock(dst_pte, ptl); 241 out: 242 return ret; 243 } 244 245 /* Handles UFFDIO_CONTINUE for all shmem VMAs (shared or private). */ 246 static int mfill_atomic_pte_continue(pmd_t *dst_pmd, 247 struct vm_area_struct *dst_vma, 248 unsigned long dst_addr, 249 uffd_flags_t flags) 250 { 251 struct inode *inode = file_inode(dst_vma->vm_file); 252 pgoff_t pgoff = linear_page_index(dst_vma, dst_addr); 253 struct folio *folio; 254 struct page *page; 255 int ret; 256 257 ret = shmem_get_folio(inode, pgoff, &folio, SGP_NOALLOC); 258 /* Our caller expects us to return -EFAULT if we failed to find folio */ 259 if (ret == -ENOENT) 260 ret = -EFAULT; 261 if (ret) 262 goto out; 263 if (!folio) { 264 ret = -EFAULT; 265 goto out; 266 } 267 268 page = folio_file_page(folio, pgoff); 269 if (PageHWPoison(page)) { 270 ret = -EIO; 271 goto out_release; 272 } 273 274 ret = mfill_atomic_install_pte(dst_pmd, dst_vma, dst_addr, 275 page, false, flags); 276 if (ret) 277 goto out_release; 278 279 folio_unlock(folio); 280 ret = 0; 281 out: 282 return ret; 283 out_release: 284 folio_unlock(folio); 285 folio_put(folio); 286 goto out; 287 } 288 289 static pmd_t *mm_alloc_pmd(struct mm_struct *mm, unsigned long address) 290 { 291 pgd_t *pgd; 292 p4d_t *p4d; 293 pud_t *pud; 294 295 pgd = pgd_offset(mm, address); 296 p4d = p4d_alloc(mm, pgd, address); 297 if (!p4d) 298 return NULL; 299 pud = pud_alloc(mm, p4d, address); 300 if (!pud) 301 return NULL; 302 /* 303 * Note that we didn't run this because the pmd was 304 * missing, the *pmd may be already established and in 305 * turn it may also be a trans_huge_pmd. 306 */ 307 return pmd_alloc(mm, pud, address); 308 } 309 310 #ifdef CONFIG_HUGETLB_PAGE 311 /* 312 * mfill_atomic processing for HUGETLB vmas. Note that this routine is 313 * called with mmap_lock held, it will release mmap_lock before returning. 314 */ 315 static __always_inline ssize_t mfill_atomic_hugetlb( 316 struct vm_area_struct *dst_vma, 317 unsigned long dst_start, 318 unsigned long src_start, 319 unsigned long len, 320 uffd_flags_t flags) 321 { 322 struct mm_struct *dst_mm = dst_vma->vm_mm; 323 int vm_shared = dst_vma->vm_flags & VM_SHARED; 324 ssize_t err; 325 pte_t *dst_pte; 326 unsigned long src_addr, dst_addr; 327 long copied; 328 struct folio *folio; 329 unsigned long vma_hpagesize; 330 pgoff_t idx; 331 u32 hash; 332 struct address_space *mapping; 333 334 /* 335 * There is no default zero huge page for all huge page sizes as 336 * supported by hugetlb. A PMD_SIZE huge pages may exist as used 337 * by THP. Since we can not reliably insert a zero page, this 338 * feature is not supported. 339 */ 340 if (uffd_flags_mode_is(flags, MFILL_ATOMIC_ZEROPAGE)) { 341 mmap_read_unlock(dst_mm); 342 return -EINVAL; 343 } 344 345 src_addr = src_start; 346 dst_addr = dst_start; 347 copied = 0; 348 folio = NULL; 349 vma_hpagesize = vma_kernel_pagesize(dst_vma); 350 351 /* 352 * Validate alignment based on huge page size 353 */ 354 err = -EINVAL; 355 if (dst_start & (vma_hpagesize - 1) || len & (vma_hpagesize - 1)) 356 goto out_unlock; 357 358 retry: 359 /* 360 * On routine entry dst_vma is set. If we had to drop mmap_lock and 361 * retry, dst_vma will be set to NULL and we must lookup again. 362 */ 363 if (!dst_vma) { 364 err = -ENOENT; 365 dst_vma = find_dst_vma(dst_mm, dst_start, len); 366 if (!dst_vma || !is_vm_hugetlb_page(dst_vma)) 367 goto out_unlock; 368 369 err = -EINVAL; 370 if (vma_hpagesize != vma_kernel_pagesize(dst_vma)) 371 goto out_unlock; 372 373 vm_shared = dst_vma->vm_flags & VM_SHARED; 374 } 375 376 /* 377 * If not shared, ensure the dst_vma has a anon_vma. 378 */ 379 err = -ENOMEM; 380 if (!vm_shared) { 381 if (unlikely(anon_vma_prepare(dst_vma))) 382 goto out_unlock; 383 } 384 385 while (src_addr < src_start + len) { 386 BUG_ON(dst_addr >= dst_start + len); 387 388 /* 389 * Serialize via vma_lock and hugetlb_fault_mutex. 390 * vma_lock ensures the dst_pte remains valid even 391 * in the case of shared pmds. fault mutex prevents 392 * races with other faulting threads. 393 */ 394 idx = linear_page_index(dst_vma, dst_addr); 395 mapping = dst_vma->vm_file->f_mapping; 396 hash = hugetlb_fault_mutex_hash(mapping, idx); 397 mutex_lock(&hugetlb_fault_mutex_table[hash]); 398 hugetlb_vma_lock_read(dst_vma); 399 400 err = -ENOMEM; 401 dst_pte = huge_pte_alloc(dst_mm, dst_vma, dst_addr, vma_hpagesize); 402 if (!dst_pte) { 403 hugetlb_vma_unlock_read(dst_vma); 404 mutex_unlock(&hugetlb_fault_mutex_table[hash]); 405 goto out_unlock; 406 } 407 408 if (!uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE) && 409 !huge_pte_none_mostly(huge_ptep_get(dst_pte))) { 410 err = -EEXIST; 411 hugetlb_vma_unlock_read(dst_vma); 412 mutex_unlock(&hugetlb_fault_mutex_table[hash]); 413 goto out_unlock; 414 } 415 416 err = hugetlb_mfill_atomic_pte(dst_pte, dst_vma, dst_addr, 417 src_addr, flags, &folio); 418 419 hugetlb_vma_unlock_read(dst_vma); 420 mutex_unlock(&hugetlb_fault_mutex_table[hash]); 421 422 cond_resched(); 423 424 if (unlikely(err == -ENOENT)) { 425 mmap_read_unlock(dst_mm); 426 BUG_ON(!folio); 427 428 err = copy_folio_from_user(folio, 429 (const void __user *)src_addr, true); 430 if (unlikely(err)) { 431 err = -EFAULT; 432 goto out; 433 } 434 mmap_read_lock(dst_mm); 435 436 dst_vma = NULL; 437 goto retry; 438 } else 439 BUG_ON(folio); 440 441 if (!err) { 442 dst_addr += vma_hpagesize; 443 src_addr += vma_hpagesize; 444 copied += vma_hpagesize; 445 446 if (fatal_signal_pending(current)) 447 err = -EINTR; 448 } 449 if (err) 450 break; 451 } 452 453 out_unlock: 454 mmap_read_unlock(dst_mm); 455 out: 456 if (folio) 457 folio_put(folio); 458 BUG_ON(copied < 0); 459 BUG_ON(err > 0); 460 BUG_ON(!copied && !err); 461 return copied ? copied : err; 462 } 463 #else /* !CONFIG_HUGETLB_PAGE */ 464 /* fail at build time if gcc attempts to use this */ 465 extern ssize_t mfill_atomic_hugetlb(struct vm_area_struct *dst_vma, 466 unsigned long dst_start, 467 unsigned long src_start, 468 unsigned long len, 469 uffd_flags_t flags); 470 #endif /* CONFIG_HUGETLB_PAGE */ 471 472 static __always_inline ssize_t mfill_atomic_pte(pmd_t *dst_pmd, 473 struct vm_area_struct *dst_vma, 474 unsigned long dst_addr, 475 unsigned long src_addr, 476 uffd_flags_t flags, 477 struct folio **foliop) 478 { 479 ssize_t err; 480 481 if (uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE)) { 482 return mfill_atomic_pte_continue(dst_pmd, dst_vma, 483 dst_addr, flags); 484 } 485 486 /* 487 * The normal page fault path for a shmem will invoke the 488 * fault, fill the hole in the file and COW it right away. The 489 * result generates plain anonymous memory. So when we are 490 * asked to fill an hole in a MAP_PRIVATE shmem mapping, we'll 491 * generate anonymous memory directly without actually filling 492 * the hole. For the MAP_PRIVATE case the robustness check 493 * only happens in the pagetable (to verify it's still none) 494 * and not in the radix tree. 495 */ 496 if (!(dst_vma->vm_flags & VM_SHARED)) { 497 if (uffd_flags_mode_is(flags, MFILL_ATOMIC_COPY)) 498 err = mfill_atomic_pte_copy(dst_pmd, dst_vma, 499 dst_addr, src_addr, 500 flags, foliop); 501 else 502 err = mfill_atomic_pte_zeropage(dst_pmd, 503 dst_vma, dst_addr); 504 } else { 505 err = shmem_mfill_atomic_pte(dst_pmd, dst_vma, 506 dst_addr, src_addr, 507 flags, foliop); 508 } 509 510 return err; 511 } 512 513 static __always_inline ssize_t mfill_atomic(struct mm_struct *dst_mm, 514 unsigned long dst_start, 515 unsigned long src_start, 516 unsigned long len, 517 atomic_t *mmap_changing, 518 uffd_flags_t flags) 519 { 520 struct vm_area_struct *dst_vma; 521 ssize_t err; 522 pmd_t *dst_pmd; 523 unsigned long src_addr, dst_addr; 524 long copied; 525 struct folio *folio; 526 527 /* 528 * Sanitize the command parameters: 529 */ 530 BUG_ON(dst_start & ~PAGE_MASK); 531 BUG_ON(len & ~PAGE_MASK); 532 533 /* Does the address range wrap, or is the span zero-sized? */ 534 BUG_ON(src_start + len <= src_start); 535 BUG_ON(dst_start + len <= dst_start); 536 537 src_addr = src_start; 538 dst_addr = dst_start; 539 copied = 0; 540 folio = NULL; 541 retry: 542 mmap_read_lock(dst_mm); 543 544 /* 545 * If memory mappings are changing because of non-cooperative 546 * operation (e.g. mremap) running in parallel, bail out and 547 * request the user to retry later 548 */ 549 err = -EAGAIN; 550 if (mmap_changing && atomic_read(mmap_changing)) 551 goto out_unlock; 552 553 /* 554 * Make sure the vma is not shared, that the dst range is 555 * both valid and fully within a single existing vma. 556 */ 557 err = -ENOENT; 558 dst_vma = find_dst_vma(dst_mm, dst_start, len); 559 if (!dst_vma) 560 goto out_unlock; 561 562 err = -EINVAL; 563 /* 564 * shmem_zero_setup is invoked in mmap for MAP_ANONYMOUS|MAP_SHARED but 565 * it will overwrite vm_ops, so vma_is_anonymous must return false. 566 */ 567 if (WARN_ON_ONCE(vma_is_anonymous(dst_vma) && 568 dst_vma->vm_flags & VM_SHARED)) 569 goto out_unlock; 570 571 /* 572 * validate 'mode' now that we know the dst_vma: don't allow 573 * a wrprotect copy if the userfaultfd didn't register as WP. 574 */ 575 if ((flags & MFILL_ATOMIC_WP) && !(dst_vma->vm_flags & VM_UFFD_WP)) 576 goto out_unlock; 577 578 /* 579 * If this is a HUGETLB vma, pass off to appropriate routine 580 */ 581 if (is_vm_hugetlb_page(dst_vma)) 582 return mfill_atomic_hugetlb(dst_vma, dst_start, 583 src_start, len, flags); 584 585 if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma)) 586 goto out_unlock; 587 if (!vma_is_shmem(dst_vma) && 588 uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE)) 589 goto out_unlock; 590 591 /* 592 * Ensure the dst_vma has a anon_vma or this page 593 * would get a NULL anon_vma when moved in the 594 * dst_vma. 595 */ 596 err = -ENOMEM; 597 if (!(dst_vma->vm_flags & VM_SHARED) && 598 unlikely(anon_vma_prepare(dst_vma))) 599 goto out_unlock; 600 601 while (src_addr < src_start + len) { 602 pmd_t dst_pmdval; 603 604 BUG_ON(dst_addr >= dst_start + len); 605 606 dst_pmd = mm_alloc_pmd(dst_mm, dst_addr); 607 if (unlikely(!dst_pmd)) { 608 err = -ENOMEM; 609 break; 610 } 611 612 dst_pmdval = pmdp_get_lockless(dst_pmd); 613 /* 614 * If the dst_pmd is mapped as THP don't 615 * override it and just be strict. 616 */ 617 if (unlikely(pmd_trans_huge(dst_pmdval))) { 618 err = -EEXIST; 619 break; 620 } 621 if (unlikely(pmd_none(dst_pmdval)) && 622 unlikely(__pte_alloc(dst_mm, dst_pmd))) { 623 err = -ENOMEM; 624 break; 625 } 626 /* If an huge pmd materialized from under us fail */ 627 if (unlikely(pmd_trans_huge(*dst_pmd))) { 628 err = -EFAULT; 629 break; 630 } 631 632 BUG_ON(pmd_none(*dst_pmd)); 633 BUG_ON(pmd_trans_huge(*dst_pmd)); 634 635 err = mfill_atomic_pte(dst_pmd, dst_vma, dst_addr, 636 src_addr, flags, &folio); 637 cond_resched(); 638 639 if (unlikely(err == -ENOENT)) { 640 void *kaddr; 641 642 mmap_read_unlock(dst_mm); 643 BUG_ON(!folio); 644 645 kaddr = kmap_local_folio(folio, 0); 646 err = copy_from_user(kaddr, 647 (const void __user *) src_addr, 648 PAGE_SIZE); 649 kunmap_local(kaddr); 650 if (unlikely(err)) { 651 err = -EFAULT; 652 goto out; 653 } 654 flush_dcache_folio(folio); 655 goto retry; 656 } else 657 BUG_ON(folio); 658 659 if (!err) { 660 dst_addr += PAGE_SIZE; 661 src_addr += PAGE_SIZE; 662 copied += PAGE_SIZE; 663 664 if (fatal_signal_pending(current)) 665 err = -EINTR; 666 } 667 if (err) 668 break; 669 } 670 671 out_unlock: 672 mmap_read_unlock(dst_mm); 673 out: 674 if (folio) 675 folio_put(folio); 676 BUG_ON(copied < 0); 677 BUG_ON(err > 0); 678 BUG_ON(!copied && !err); 679 return copied ? copied : err; 680 } 681 682 ssize_t mfill_atomic_copy(struct mm_struct *dst_mm, unsigned long dst_start, 683 unsigned long src_start, unsigned long len, 684 atomic_t *mmap_changing, uffd_flags_t flags) 685 { 686 return mfill_atomic(dst_mm, dst_start, src_start, len, mmap_changing, 687 uffd_flags_set_mode(flags, MFILL_ATOMIC_COPY)); 688 } 689 690 ssize_t mfill_atomic_zeropage(struct mm_struct *dst_mm, unsigned long start, 691 unsigned long len, atomic_t *mmap_changing) 692 { 693 return mfill_atomic(dst_mm, start, 0, len, mmap_changing, 694 uffd_flags_set_mode(0, MFILL_ATOMIC_ZEROPAGE)); 695 } 696 697 ssize_t mfill_atomic_continue(struct mm_struct *dst_mm, unsigned long start, 698 unsigned long len, atomic_t *mmap_changing, 699 uffd_flags_t flags) 700 { 701 return mfill_atomic(dst_mm, start, 0, len, mmap_changing, 702 uffd_flags_set_mode(flags, MFILL_ATOMIC_CONTINUE)); 703 } 704 705 long uffd_wp_range(struct vm_area_struct *dst_vma, 706 unsigned long start, unsigned long len, bool enable_wp) 707 { 708 unsigned int mm_cp_flags; 709 struct mmu_gather tlb; 710 long ret; 711 712 VM_WARN_ONCE(start < dst_vma->vm_start || start + len > dst_vma->vm_end, 713 "The address range exceeds VMA boundary.\n"); 714 if (enable_wp) 715 mm_cp_flags = MM_CP_UFFD_WP; 716 else 717 mm_cp_flags = MM_CP_UFFD_WP_RESOLVE; 718 719 /* 720 * vma->vm_page_prot already reflects that uffd-wp is enabled for this 721 * VMA (see userfaultfd_set_vm_flags()) and that all PTEs are supposed 722 * to be write-protected as default whenever protection changes. 723 * Try upgrading write permissions manually. 724 */ 725 if (!enable_wp && vma_wants_manual_pte_write_upgrade(dst_vma)) 726 mm_cp_flags |= MM_CP_TRY_CHANGE_WRITABLE; 727 tlb_gather_mmu(&tlb, dst_vma->vm_mm); 728 ret = change_protection(&tlb, dst_vma, start, start + len, mm_cp_flags); 729 tlb_finish_mmu(&tlb); 730 731 return ret; 732 } 733 734 int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start, 735 unsigned long len, bool enable_wp, 736 atomic_t *mmap_changing) 737 { 738 unsigned long end = start + len; 739 unsigned long _start, _end; 740 struct vm_area_struct *dst_vma; 741 unsigned long page_mask; 742 long err; 743 VMA_ITERATOR(vmi, dst_mm, start); 744 745 /* 746 * Sanitize the command parameters: 747 */ 748 BUG_ON(start & ~PAGE_MASK); 749 BUG_ON(len & ~PAGE_MASK); 750 751 /* Does the address range wrap, or is the span zero-sized? */ 752 BUG_ON(start + len <= start); 753 754 mmap_read_lock(dst_mm); 755 756 /* 757 * If memory mappings are changing because of non-cooperative 758 * operation (e.g. mremap) running in parallel, bail out and 759 * request the user to retry later 760 */ 761 err = -EAGAIN; 762 if (mmap_changing && atomic_read(mmap_changing)) 763 goto out_unlock; 764 765 err = -ENOENT; 766 for_each_vma_range(vmi, dst_vma, end) { 767 768 if (!userfaultfd_wp(dst_vma)) { 769 err = -ENOENT; 770 break; 771 } 772 773 if (is_vm_hugetlb_page(dst_vma)) { 774 err = -EINVAL; 775 page_mask = vma_kernel_pagesize(dst_vma) - 1; 776 if ((start & page_mask) || (len & page_mask)) 777 break; 778 } 779 780 _start = max(dst_vma->vm_start, start); 781 _end = min(dst_vma->vm_end, end); 782 783 err = uffd_wp_range(dst_vma, _start, _end - _start, enable_wp); 784 785 /* Return 0 on success, <0 on failures */ 786 if (err < 0) 787 break; 788 err = 0; 789 } 790 out_unlock: 791 mmap_read_unlock(dst_mm); 792 return err; 793 } 794