1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * mm/mprotect.c 4 * 5 * (C) Copyright 1994 Linus Torvalds 6 * (C) Copyright 2002 Christoph Hellwig 7 * 8 * Address space accounting code <alan@lxorguk.ukuu.org.uk> 9 * (C) Copyright 2002 Red Hat Inc, All Rights Reserved 10 */ 11 12 #include <linux/pagewalk.h> 13 #include <linux/hugetlb.h> 14 #include <linux/shm.h> 15 #include <linux/mman.h> 16 #include <linux/fs.h> 17 #include <linux/highmem.h> 18 #include <linux/security.h> 19 #include <linux/mempolicy.h> 20 #include <linux/personality.h> 21 #include <linux/syscalls.h> 22 #include <linux/swap.h> 23 #include <linux/swapops.h> 24 #include <linux/mmu_notifier.h> 25 #include <linux/migrate.h> 26 #include <linux/perf_event.h> 27 #include <linux/pkeys.h> 28 #include <linux/ksm.h> 29 #include <linux/uaccess.h> 30 #include <linux/mm_inline.h> 31 #include <linux/pgtable.h> 32 #include <linux/sched/sysctl.h> 33 #include <asm/cacheflush.h> 34 #include <asm/mmu_context.h> 35 #include <asm/tlbflush.h> 36 #include <asm/tlb.h> 37 38 #include "internal.h" 39 40 static unsigned long change_pte_range(struct mmu_gather *tlb, 41 struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, 42 unsigned long end, pgprot_t newprot, unsigned long cp_flags) 43 { 44 pte_t *pte, oldpte; 45 spinlock_t *ptl; 46 unsigned long pages = 0; 47 int target_node = NUMA_NO_NODE; 48 bool dirty_accountable = cp_flags & MM_CP_DIRTY_ACCT; 49 bool prot_numa = cp_flags & MM_CP_PROT_NUMA; 50 bool uffd_wp = cp_flags & MM_CP_UFFD_WP; 51 bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE; 52 53 tlb_change_page_size(tlb, PAGE_SIZE); 54 55 /* 56 * Can be called with only the mmap_lock for reading by 57 * prot_numa so we must check the pmd isn't constantly 58 * changing from under us from pmd_none to pmd_trans_huge 59 * and/or the other way around. 60 */ 61 if (pmd_trans_unstable(pmd)) 62 return 0; 63 64 /* 65 * The pmd points to a regular pte so the pmd can't change 66 * from under us even if the mmap_lock is only hold for 67 * reading. 68 */ 69 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 70 71 /* Get target node for single threaded private VMAs */ 72 if (prot_numa && !(vma->vm_flags & VM_SHARED) && 73 atomic_read(&vma->vm_mm->mm_users) == 1) 74 target_node = numa_node_id(); 75 76 flush_tlb_batched_pending(vma->vm_mm); 77 arch_enter_lazy_mmu_mode(); 78 do { 79 oldpte = *pte; 80 if (pte_present(oldpte)) { 81 pte_t ptent; 82 bool preserve_write = prot_numa && pte_write(oldpte); 83 84 /* 85 * Avoid trapping faults against the zero or KSM 86 * pages. See similar comment in change_huge_pmd. 87 */ 88 if (prot_numa) { 89 struct page *page; 90 int nid; 91 92 /* Avoid TLB flush if possible */ 93 if (pte_protnone(oldpte)) 94 continue; 95 96 page = vm_normal_page(vma, addr, oldpte); 97 if (!page || PageKsm(page)) 98 continue; 99 100 /* Also skip shared copy-on-write pages */ 101 if (is_cow_mapping(vma->vm_flags) && 102 page_count(page) != 1) 103 continue; 104 105 /* 106 * While migration can move some dirty pages, 107 * it cannot move them all from MIGRATE_ASYNC 108 * context. 109 */ 110 if (page_is_file_lru(page) && PageDirty(page)) 111 continue; 112 113 /* 114 * Don't mess with PTEs if page is already on the node 115 * a single-threaded process is running on. 116 */ 117 nid = page_to_nid(page); 118 if (target_node == nid) 119 continue; 120 121 /* 122 * Skip scanning top tier node if normal numa 123 * balancing is disabled 124 */ 125 if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) && 126 node_is_toptier(nid)) 127 continue; 128 } 129 130 oldpte = ptep_modify_prot_start(vma, addr, pte); 131 ptent = pte_modify(oldpte, newprot); 132 if (preserve_write) 133 ptent = pte_mk_savedwrite(ptent); 134 135 if (uffd_wp) { 136 ptent = pte_wrprotect(ptent); 137 ptent = pte_mkuffd_wp(ptent); 138 } else if (uffd_wp_resolve) { 139 /* 140 * Leave the write bit to be handled 141 * by PF interrupt handler, then 142 * things like COW could be properly 143 * handled. 144 */ 145 ptent = pte_clear_uffd_wp(ptent); 146 } 147 148 /* Avoid taking write faults for known dirty pages */ 149 if (dirty_accountable && pte_dirty(ptent) && 150 (pte_soft_dirty(ptent) || 151 !(vma->vm_flags & VM_SOFTDIRTY))) { 152 ptent = pte_mkwrite(ptent); 153 } 154 ptep_modify_prot_commit(vma, addr, pte, oldpte, ptent); 155 if (pte_needs_flush(oldpte, ptent)) 156 tlb_flush_pte_range(tlb, addr, PAGE_SIZE); 157 pages++; 158 } else if (is_swap_pte(oldpte)) { 159 swp_entry_t entry = pte_to_swp_entry(oldpte); 160 struct page *page = pfn_swap_entry_to_page(entry); 161 pte_t newpte; 162 163 if (is_writable_migration_entry(entry)) { 164 /* 165 * A protection check is difficult so 166 * just be safe and disable write 167 */ 168 if (PageAnon(page)) 169 entry = make_readable_exclusive_migration_entry( 170 swp_offset(entry)); 171 else 172 entry = make_readable_migration_entry(swp_offset(entry)); 173 newpte = swp_entry_to_pte(entry); 174 if (pte_swp_soft_dirty(oldpte)) 175 newpte = pte_swp_mksoft_dirty(newpte); 176 if (pte_swp_uffd_wp(oldpte)) 177 newpte = pte_swp_mkuffd_wp(newpte); 178 } else if (is_writable_device_private_entry(entry)) { 179 /* 180 * We do not preserve soft-dirtiness. See 181 * copy_one_pte() for explanation. 182 */ 183 entry = make_readable_device_private_entry( 184 swp_offset(entry)); 185 newpte = swp_entry_to_pte(entry); 186 if (pte_swp_uffd_wp(oldpte)) 187 newpte = pte_swp_mkuffd_wp(newpte); 188 } else if (is_writable_device_exclusive_entry(entry)) { 189 entry = make_readable_device_exclusive_entry( 190 swp_offset(entry)); 191 newpte = swp_entry_to_pte(entry); 192 if (pte_swp_soft_dirty(oldpte)) 193 newpte = pte_swp_mksoft_dirty(newpte); 194 if (pte_swp_uffd_wp(oldpte)) 195 newpte = pte_swp_mkuffd_wp(newpte); 196 } else { 197 newpte = oldpte; 198 } 199 200 if (uffd_wp) 201 newpte = pte_swp_mkuffd_wp(newpte); 202 else if (uffd_wp_resolve) 203 newpte = pte_swp_clear_uffd_wp(newpte); 204 205 if (!pte_same(oldpte, newpte)) { 206 set_pte_at(vma->vm_mm, addr, pte, newpte); 207 pages++; 208 } 209 } 210 } while (pte++, addr += PAGE_SIZE, addr != end); 211 arch_leave_lazy_mmu_mode(); 212 pte_unmap_unlock(pte - 1, ptl); 213 214 return pages; 215 } 216 217 /* 218 * Used when setting automatic NUMA hinting protection where it is 219 * critical that a numa hinting PMD is not confused with a bad PMD. 220 */ 221 static inline int pmd_none_or_clear_bad_unless_trans_huge(pmd_t *pmd) 222 { 223 pmd_t pmdval = pmd_read_atomic(pmd); 224 225 /* See pmd_none_or_trans_huge_or_clear_bad for info on barrier */ 226 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 227 barrier(); 228 #endif 229 230 if (pmd_none(pmdval)) 231 return 1; 232 if (pmd_trans_huge(pmdval)) 233 return 0; 234 if (unlikely(pmd_bad(pmdval))) { 235 pmd_clear_bad(pmd); 236 return 1; 237 } 238 239 return 0; 240 } 241 242 static inline unsigned long change_pmd_range(struct mmu_gather *tlb, 243 struct vm_area_struct *vma, pud_t *pud, unsigned long addr, 244 unsigned long end, pgprot_t newprot, unsigned long cp_flags) 245 { 246 pmd_t *pmd; 247 unsigned long next; 248 unsigned long pages = 0; 249 unsigned long nr_huge_updates = 0; 250 struct mmu_notifier_range range; 251 252 range.start = 0; 253 254 pmd = pmd_offset(pud, addr); 255 do { 256 unsigned long this_pages; 257 258 next = pmd_addr_end(addr, end); 259 260 /* 261 * Automatic NUMA balancing walks the tables with mmap_lock 262 * held for read. It's possible a parallel update to occur 263 * between pmd_trans_huge() and a pmd_none_or_clear_bad() 264 * check leading to a false positive and clearing. 265 * Hence, it's necessary to atomically read the PMD value 266 * for all the checks. 267 */ 268 if (!is_swap_pmd(*pmd) && !pmd_devmap(*pmd) && 269 pmd_none_or_clear_bad_unless_trans_huge(pmd)) 270 goto next; 271 272 /* invoke the mmu notifier if the pmd is populated */ 273 if (!range.start) { 274 mmu_notifier_range_init(&range, 275 MMU_NOTIFY_PROTECTION_VMA, 0, 276 vma, vma->vm_mm, addr, end); 277 mmu_notifier_invalidate_range_start(&range); 278 } 279 280 if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) { 281 if (next - addr != HPAGE_PMD_SIZE) { 282 __split_huge_pmd(vma, pmd, addr, false, NULL); 283 } else { 284 /* 285 * change_huge_pmd() does not defer TLB flushes, 286 * so no need to propagate the tlb argument. 287 */ 288 int nr_ptes = change_huge_pmd(tlb, vma, pmd, 289 addr, newprot, cp_flags); 290 291 if (nr_ptes) { 292 if (nr_ptes == HPAGE_PMD_NR) { 293 pages += HPAGE_PMD_NR; 294 nr_huge_updates++; 295 } 296 297 /* huge pmd was handled */ 298 goto next; 299 } 300 } 301 /* fall through, the trans huge pmd just split */ 302 } 303 this_pages = change_pte_range(tlb, vma, pmd, addr, next, 304 newprot, cp_flags); 305 pages += this_pages; 306 next: 307 cond_resched(); 308 } while (pmd++, addr = next, addr != end); 309 310 if (range.start) 311 mmu_notifier_invalidate_range_end(&range); 312 313 if (nr_huge_updates) 314 count_vm_numa_events(NUMA_HUGE_PTE_UPDATES, nr_huge_updates); 315 return pages; 316 } 317 318 static inline unsigned long change_pud_range(struct mmu_gather *tlb, 319 struct vm_area_struct *vma, p4d_t *p4d, unsigned long addr, 320 unsigned long end, pgprot_t newprot, unsigned long cp_flags) 321 { 322 pud_t *pud; 323 unsigned long next; 324 unsigned long pages = 0; 325 326 pud = pud_offset(p4d, addr); 327 do { 328 next = pud_addr_end(addr, end); 329 if (pud_none_or_clear_bad(pud)) 330 continue; 331 pages += change_pmd_range(tlb, vma, pud, addr, next, newprot, 332 cp_flags); 333 } while (pud++, addr = next, addr != end); 334 335 return pages; 336 } 337 338 static inline unsigned long change_p4d_range(struct mmu_gather *tlb, 339 struct vm_area_struct *vma, pgd_t *pgd, unsigned long addr, 340 unsigned long end, pgprot_t newprot, unsigned long cp_flags) 341 { 342 p4d_t *p4d; 343 unsigned long next; 344 unsigned long pages = 0; 345 346 p4d = p4d_offset(pgd, addr); 347 do { 348 next = p4d_addr_end(addr, end); 349 if (p4d_none_or_clear_bad(p4d)) 350 continue; 351 pages += change_pud_range(tlb, vma, p4d, addr, next, newprot, 352 cp_flags); 353 } while (p4d++, addr = next, addr != end); 354 355 return pages; 356 } 357 358 static unsigned long change_protection_range(struct mmu_gather *tlb, 359 struct vm_area_struct *vma, unsigned long addr, 360 unsigned long end, pgprot_t newprot, unsigned long cp_flags) 361 { 362 struct mm_struct *mm = vma->vm_mm; 363 pgd_t *pgd; 364 unsigned long next; 365 unsigned long pages = 0; 366 367 BUG_ON(addr >= end); 368 pgd = pgd_offset(mm, addr); 369 tlb_start_vma(tlb, vma); 370 do { 371 next = pgd_addr_end(addr, end); 372 if (pgd_none_or_clear_bad(pgd)) 373 continue; 374 pages += change_p4d_range(tlb, vma, pgd, addr, next, newprot, 375 cp_flags); 376 } while (pgd++, addr = next, addr != end); 377 378 tlb_end_vma(tlb, vma); 379 380 return pages; 381 } 382 383 unsigned long change_protection(struct mmu_gather *tlb, 384 struct vm_area_struct *vma, unsigned long start, 385 unsigned long end, pgprot_t newprot, 386 unsigned long cp_flags) 387 { 388 unsigned long pages; 389 390 BUG_ON((cp_flags & MM_CP_UFFD_WP_ALL) == MM_CP_UFFD_WP_ALL); 391 392 if (is_vm_hugetlb_page(vma)) 393 pages = hugetlb_change_protection(vma, start, end, newprot); 394 else 395 pages = change_protection_range(tlb, vma, start, end, newprot, 396 cp_flags); 397 398 return pages; 399 } 400 401 static int prot_none_pte_entry(pte_t *pte, unsigned long addr, 402 unsigned long next, struct mm_walk *walk) 403 { 404 return pfn_modify_allowed(pte_pfn(*pte), *(pgprot_t *)(walk->private)) ? 405 0 : -EACCES; 406 } 407 408 static int prot_none_hugetlb_entry(pte_t *pte, unsigned long hmask, 409 unsigned long addr, unsigned long next, 410 struct mm_walk *walk) 411 { 412 return pfn_modify_allowed(pte_pfn(*pte), *(pgprot_t *)(walk->private)) ? 413 0 : -EACCES; 414 } 415 416 static int prot_none_test(unsigned long addr, unsigned long next, 417 struct mm_walk *walk) 418 { 419 return 0; 420 } 421 422 static const struct mm_walk_ops prot_none_walk_ops = { 423 .pte_entry = prot_none_pte_entry, 424 .hugetlb_entry = prot_none_hugetlb_entry, 425 .test_walk = prot_none_test, 426 }; 427 428 int 429 mprotect_fixup(struct mmu_gather *tlb, struct vm_area_struct *vma, 430 struct vm_area_struct **pprev, unsigned long start, 431 unsigned long end, unsigned long newflags) 432 { 433 struct mm_struct *mm = vma->vm_mm; 434 unsigned long oldflags = vma->vm_flags; 435 long nrpages = (end - start) >> PAGE_SHIFT; 436 unsigned long charged = 0; 437 pgoff_t pgoff; 438 int error; 439 int dirty_accountable = 0; 440 441 if (newflags == oldflags) { 442 *pprev = vma; 443 return 0; 444 } 445 446 /* 447 * Do PROT_NONE PFN permission checks here when we can still 448 * bail out without undoing a lot of state. This is a rather 449 * uncommon case, so doesn't need to be very optimized. 450 */ 451 if (arch_has_pfn_modify_check() && 452 (vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) && 453 (newflags & VM_ACCESS_FLAGS) == 0) { 454 pgprot_t new_pgprot = vm_get_page_prot(newflags); 455 456 error = walk_page_range(current->mm, start, end, 457 &prot_none_walk_ops, &new_pgprot); 458 if (error) 459 return error; 460 } 461 462 /* 463 * If we make a private mapping writable we increase our commit; 464 * but (without finer accounting) cannot reduce our commit if we 465 * make it unwritable again. hugetlb mapping were accounted for 466 * even if read-only so there is no need to account for them here 467 */ 468 if (newflags & VM_WRITE) { 469 /* Check space limits when area turns into data. */ 470 if (!may_expand_vm(mm, newflags, nrpages) && 471 may_expand_vm(mm, oldflags, nrpages)) 472 return -ENOMEM; 473 if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_HUGETLB| 474 VM_SHARED|VM_NORESERVE))) { 475 charged = nrpages; 476 if (security_vm_enough_memory_mm(mm, charged)) 477 return -ENOMEM; 478 newflags |= VM_ACCOUNT; 479 } 480 } 481 482 /* 483 * First try to merge with previous and/or next vma. 484 */ 485 pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); 486 *pprev = vma_merge(mm, *pprev, start, end, newflags, 487 vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), 488 vma->vm_userfaultfd_ctx, anon_vma_name(vma)); 489 if (*pprev) { 490 vma = *pprev; 491 VM_WARN_ON((vma->vm_flags ^ newflags) & ~VM_SOFTDIRTY); 492 goto success; 493 } 494 495 *pprev = vma; 496 497 if (start != vma->vm_start) { 498 error = split_vma(mm, vma, start, 1); 499 if (error) 500 goto fail; 501 } 502 503 if (end != vma->vm_end) { 504 error = split_vma(mm, vma, end, 0); 505 if (error) 506 goto fail; 507 } 508 509 success: 510 /* 511 * vm_flags and vm_page_prot are protected by the mmap_lock 512 * held in write mode. 513 */ 514 vma->vm_flags = newflags; 515 dirty_accountable = vma_wants_writenotify(vma, vma->vm_page_prot); 516 vma_set_page_prot(vma); 517 518 change_protection(tlb, vma, start, end, vma->vm_page_prot, 519 dirty_accountable ? MM_CP_DIRTY_ACCT : 0); 520 521 /* 522 * Private VM_LOCKED VMA becoming writable: trigger COW to avoid major 523 * fault on access. 524 */ 525 if ((oldflags & (VM_WRITE | VM_SHARED | VM_LOCKED)) == VM_LOCKED && 526 (newflags & VM_WRITE)) { 527 populate_vma_page_range(vma, start, end, NULL); 528 } 529 530 vm_stat_account(mm, oldflags, -nrpages); 531 vm_stat_account(mm, newflags, nrpages); 532 perf_event_mmap(vma); 533 return 0; 534 535 fail: 536 vm_unacct_memory(charged); 537 return error; 538 } 539 540 /* 541 * pkey==-1 when doing a legacy mprotect() 542 */ 543 static int do_mprotect_pkey(unsigned long start, size_t len, 544 unsigned long prot, int pkey) 545 { 546 unsigned long nstart, end, tmp, reqprot; 547 struct vm_area_struct *vma, *prev; 548 int error = -EINVAL; 549 const int grows = prot & (PROT_GROWSDOWN|PROT_GROWSUP); 550 const bool rier = (current->personality & READ_IMPLIES_EXEC) && 551 (prot & PROT_READ); 552 struct mmu_gather tlb; 553 554 start = untagged_addr(start); 555 556 prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP); 557 if (grows == (PROT_GROWSDOWN|PROT_GROWSUP)) /* can't be both */ 558 return -EINVAL; 559 560 if (start & ~PAGE_MASK) 561 return -EINVAL; 562 if (!len) 563 return 0; 564 len = PAGE_ALIGN(len); 565 end = start + len; 566 if (end <= start) 567 return -ENOMEM; 568 if (!arch_validate_prot(prot, start)) 569 return -EINVAL; 570 571 reqprot = prot; 572 573 if (mmap_write_lock_killable(current->mm)) 574 return -EINTR; 575 576 /* 577 * If userspace did not allocate the pkey, do not let 578 * them use it here. 579 */ 580 error = -EINVAL; 581 if ((pkey != -1) && !mm_pkey_is_allocated(current->mm, pkey)) 582 goto out; 583 584 vma = find_vma(current->mm, start); 585 error = -ENOMEM; 586 if (!vma) 587 goto out; 588 589 if (unlikely(grows & PROT_GROWSDOWN)) { 590 if (vma->vm_start >= end) 591 goto out; 592 start = vma->vm_start; 593 error = -EINVAL; 594 if (!(vma->vm_flags & VM_GROWSDOWN)) 595 goto out; 596 } else { 597 if (vma->vm_start > start) 598 goto out; 599 if (unlikely(grows & PROT_GROWSUP)) { 600 end = vma->vm_end; 601 error = -EINVAL; 602 if (!(vma->vm_flags & VM_GROWSUP)) 603 goto out; 604 } 605 } 606 607 if (start > vma->vm_start) 608 prev = vma; 609 else 610 prev = vma->vm_prev; 611 612 tlb_gather_mmu(&tlb, current->mm); 613 for (nstart = start ; ; ) { 614 unsigned long mask_off_old_flags; 615 unsigned long newflags; 616 int new_vma_pkey; 617 618 /* Here we know that vma->vm_start <= nstart < vma->vm_end. */ 619 620 /* Does the application expect PROT_READ to imply PROT_EXEC */ 621 if (rier && (vma->vm_flags & VM_MAYEXEC)) 622 prot |= PROT_EXEC; 623 624 /* 625 * Each mprotect() call explicitly passes r/w/x permissions. 626 * If a permission is not passed to mprotect(), it must be 627 * cleared from the VMA. 628 */ 629 mask_off_old_flags = VM_READ | VM_WRITE | VM_EXEC | 630 VM_FLAGS_CLEAR; 631 632 new_vma_pkey = arch_override_mprotect_pkey(vma, prot, pkey); 633 newflags = calc_vm_prot_bits(prot, new_vma_pkey); 634 newflags |= (vma->vm_flags & ~mask_off_old_flags); 635 636 /* newflags >> 4 shift VM_MAY% in place of VM_% */ 637 if ((newflags & ~(newflags >> 4)) & VM_ACCESS_FLAGS) { 638 error = -EACCES; 639 break; 640 } 641 642 /* Allow architectures to sanity-check the new flags */ 643 if (!arch_validate_flags(newflags)) { 644 error = -EINVAL; 645 break; 646 } 647 648 error = security_file_mprotect(vma, reqprot, prot); 649 if (error) 650 break; 651 652 tmp = vma->vm_end; 653 if (tmp > end) 654 tmp = end; 655 656 if (vma->vm_ops && vma->vm_ops->mprotect) { 657 error = vma->vm_ops->mprotect(vma, nstart, tmp, newflags); 658 if (error) 659 break; 660 } 661 662 error = mprotect_fixup(&tlb, vma, &prev, nstart, tmp, newflags); 663 if (error) 664 break; 665 666 nstart = tmp; 667 668 if (nstart < prev->vm_end) 669 nstart = prev->vm_end; 670 if (nstart >= end) 671 break; 672 673 vma = prev->vm_next; 674 if (!vma || vma->vm_start != nstart) { 675 error = -ENOMEM; 676 break; 677 } 678 prot = reqprot; 679 } 680 tlb_finish_mmu(&tlb); 681 out: 682 mmap_write_unlock(current->mm); 683 return error; 684 } 685 686 SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len, 687 unsigned long, prot) 688 { 689 return do_mprotect_pkey(start, len, prot, -1); 690 } 691 692 #ifdef CONFIG_ARCH_HAS_PKEYS 693 694 SYSCALL_DEFINE4(pkey_mprotect, unsigned long, start, size_t, len, 695 unsigned long, prot, int, pkey) 696 { 697 return do_mprotect_pkey(start, len, prot, pkey); 698 } 699 700 SYSCALL_DEFINE2(pkey_alloc, unsigned long, flags, unsigned long, init_val) 701 { 702 int pkey; 703 int ret; 704 705 /* No flags supported yet. */ 706 if (flags) 707 return -EINVAL; 708 /* check for unsupported init values */ 709 if (init_val & ~PKEY_ACCESS_MASK) 710 return -EINVAL; 711 712 mmap_write_lock(current->mm); 713 pkey = mm_pkey_alloc(current->mm); 714 715 ret = -ENOSPC; 716 if (pkey == -1) 717 goto out; 718 719 ret = arch_set_user_pkey_access(current, pkey, init_val); 720 if (ret) { 721 mm_pkey_free(current->mm, pkey); 722 goto out; 723 } 724 ret = pkey; 725 out: 726 mmap_write_unlock(current->mm); 727 return ret; 728 } 729 730 SYSCALL_DEFINE1(pkey_free, int, pkey) 731 { 732 int ret; 733 734 mmap_write_lock(current->mm); 735 ret = mm_pkey_free(current->mm, pkey); 736 mmap_write_unlock(current->mm); 737 738 /* 739 * We could provide warnings or errors if any VMA still 740 * has the pkey set here. 741 */ 742 return ret; 743 } 744 745 #endif /* CONFIG_ARCH_HAS_PKEYS */ 746