1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * linux/mm/mlock.c 4 * 5 * (C) Copyright 1995 Linus Torvalds 6 * (C) Copyright 2002 Christoph Hellwig 7 */ 8 9 #include <linux/capability.h> 10 #include <linux/mman.h> 11 #include <linux/mm.h> 12 #include <linux/sched/user.h> 13 #include <linux/swap.h> 14 #include <linux/swapops.h> 15 #include <linux/pagemap.h> 16 #include <linux/pagevec.h> 17 #include <linux/pagewalk.h> 18 #include <linux/mempolicy.h> 19 #include <linux/syscalls.h> 20 #include <linux/sched.h> 21 #include <linux/export.h> 22 #include <linux/rmap.h> 23 #include <linux/mmzone.h> 24 #include <linux/hugetlb.h> 25 #include <linux/memcontrol.h> 26 #include <linux/mm_inline.h> 27 #include <linux/secretmem.h> 28 29 #include "internal.h" 30 31 struct mlock_pvec { 32 local_lock_t lock; 33 struct pagevec vec; 34 }; 35 36 static DEFINE_PER_CPU(struct mlock_pvec, mlock_pvec) = { 37 .lock = INIT_LOCAL_LOCK(lock), 38 }; 39 40 bool can_do_mlock(void) 41 { 42 if (rlimit(RLIMIT_MEMLOCK) != 0) 43 return true; 44 if (capable(CAP_IPC_LOCK)) 45 return true; 46 return false; 47 } 48 EXPORT_SYMBOL(can_do_mlock); 49 50 /* 51 * Mlocked pages are marked with PageMlocked() flag for efficient testing 52 * in vmscan and, possibly, the fault path; and to support semi-accurate 53 * statistics. 54 * 55 * An mlocked page [PageMlocked(page)] is unevictable. As such, it will 56 * be placed on the LRU "unevictable" list, rather than the [in]active lists. 57 * The unevictable list is an LRU sibling list to the [in]active lists. 58 * PageUnevictable is set to indicate the unevictable state. 59 */ 60 61 static struct lruvec *__mlock_page(struct page *page, struct lruvec *lruvec) 62 { 63 /* There is nothing more we can do while it's off LRU */ 64 if (!TestClearPageLRU(page)) 65 return lruvec; 66 67 lruvec = folio_lruvec_relock_irq(page_folio(page), lruvec); 68 69 if (unlikely(page_evictable(page))) { 70 /* 71 * This is a little surprising, but quite possible: 72 * PageMlocked must have got cleared already by another CPU. 73 * Could this page be on the Unevictable LRU? I'm not sure, 74 * but move it now if so. 75 */ 76 if (PageUnevictable(page)) { 77 del_page_from_lru_list(page, lruvec); 78 ClearPageUnevictable(page); 79 add_page_to_lru_list(page, lruvec); 80 __count_vm_events(UNEVICTABLE_PGRESCUED, 81 thp_nr_pages(page)); 82 } 83 goto out; 84 } 85 86 if (PageUnevictable(page)) { 87 if (PageMlocked(page)) 88 page->mlock_count++; 89 goto out; 90 } 91 92 del_page_from_lru_list(page, lruvec); 93 ClearPageActive(page); 94 SetPageUnevictable(page); 95 page->mlock_count = !!PageMlocked(page); 96 add_page_to_lru_list(page, lruvec); 97 __count_vm_events(UNEVICTABLE_PGCULLED, thp_nr_pages(page)); 98 out: 99 SetPageLRU(page); 100 return lruvec; 101 } 102 103 static struct lruvec *__mlock_new_page(struct page *page, struct lruvec *lruvec) 104 { 105 VM_BUG_ON_PAGE(PageLRU(page), page); 106 107 lruvec = folio_lruvec_relock_irq(page_folio(page), lruvec); 108 109 /* As above, this is a little surprising, but possible */ 110 if (unlikely(page_evictable(page))) 111 goto out; 112 113 SetPageUnevictable(page); 114 page->mlock_count = !!PageMlocked(page); 115 __count_vm_events(UNEVICTABLE_PGCULLED, thp_nr_pages(page)); 116 out: 117 add_page_to_lru_list(page, lruvec); 118 SetPageLRU(page); 119 return lruvec; 120 } 121 122 static struct lruvec *__munlock_page(struct page *page, struct lruvec *lruvec) 123 { 124 int nr_pages = thp_nr_pages(page); 125 bool isolated = false; 126 127 if (!TestClearPageLRU(page)) 128 goto munlock; 129 130 isolated = true; 131 lruvec = folio_lruvec_relock_irq(page_folio(page), lruvec); 132 133 if (PageUnevictable(page)) { 134 /* Then mlock_count is maintained, but might undercount */ 135 if (page->mlock_count) 136 page->mlock_count--; 137 if (page->mlock_count) 138 goto out; 139 } 140 /* else assume that was the last mlock: reclaim will fix it if not */ 141 142 munlock: 143 if (TestClearPageMlocked(page)) { 144 __mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages); 145 if (isolated || !PageUnevictable(page)) 146 __count_vm_events(UNEVICTABLE_PGMUNLOCKED, nr_pages); 147 else 148 __count_vm_events(UNEVICTABLE_PGSTRANDED, nr_pages); 149 } 150 151 /* page_evictable() has to be checked *after* clearing Mlocked */ 152 if (isolated && PageUnevictable(page) && page_evictable(page)) { 153 del_page_from_lru_list(page, lruvec); 154 ClearPageUnevictable(page); 155 add_page_to_lru_list(page, lruvec); 156 __count_vm_events(UNEVICTABLE_PGRESCUED, nr_pages); 157 } 158 out: 159 if (isolated) 160 SetPageLRU(page); 161 return lruvec; 162 } 163 164 /* 165 * Flags held in the low bits of a struct page pointer on the mlock_pvec. 166 */ 167 #define LRU_PAGE 0x1 168 #define NEW_PAGE 0x2 169 static inline struct page *mlock_lru(struct page *page) 170 { 171 return (struct page *)((unsigned long)page + LRU_PAGE); 172 } 173 174 static inline struct page *mlock_new(struct page *page) 175 { 176 return (struct page *)((unsigned long)page + NEW_PAGE); 177 } 178 179 /* 180 * mlock_pagevec() is derived from pagevec_lru_move_fn(): 181 * perhaps that can make use of such page pointer flags in future, 182 * but for now just keep it for mlock. We could use three separate 183 * pagevecs instead, but one feels better (munlocking a full pagevec 184 * does not need to drain mlocking pagevecs first). 185 */ 186 static void mlock_pagevec(struct pagevec *pvec) 187 { 188 struct lruvec *lruvec = NULL; 189 unsigned long mlock; 190 struct page *page; 191 int i; 192 193 for (i = 0; i < pagevec_count(pvec); i++) { 194 page = pvec->pages[i]; 195 mlock = (unsigned long)page & (LRU_PAGE | NEW_PAGE); 196 page = (struct page *)((unsigned long)page - mlock); 197 pvec->pages[i] = page; 198 199 if (mlock & LRU_PAGE) 200 lruvec = __mlock_page(page, lruvec); 201 else if (mlock & NEW_PAGE) 202 lruvec = __mlock_new_page(page, lruvec); 203 else 204 lruvec = __munlock_page(page, lruvec); 205 } 206 207 if (lruvec) 208 unlock_page_lruvec_irq(lruvec); 209 release_pages(pvec->pages, pvec->nr); 210 pagevec_reinit(pvec); 211 } 212 213 void mlock_page_drain_local(void) 214 { 215 struct pagevec *pvec; 216 217 local_lock(&mlock_pvec.lock); 218 pvec = this_cpu_ptr(&mlock_pvec.vec); 219 if (pagevec_count(pvec)) 220 mlock_pagevec(pvec); 221 local_unlock(&mlock_pvec.lock); 222 } 223 224 void mlock_page_drain_remote(int cpu) 225 { 226 struct pagevec *pvec; 227 228 WARN_ON_ONCE(cpu_online(cpu)); 229 pvec = &per_cpu(mlock_pvec.vec, cpu); 230 if (pagevec_count(pvec)) 231 mlock_pagevec(pvec); 232 } 233 234 bool need_mlock_page_drain(int cpu) 235 { 236 return pagevec_count(&per_cpu(mlock_pvec.vec, cpu)); 237 } 238 239 /** 240 * mlock_folio - mlock a folio already on (or temporarily off) LRU 241 * @folio: folio to be mlocked. 242 */ 243 void mlock_folio(struct folio *folio) 244 { 245 struct pagevec *pvec; 246 247 local_lock(&mlock_pvec.lock); 248 pvec = this_cpu_ptr(&mlock_pvec.vec); 249 250 if (!folio_test_set_mlocked(folio)) { 251 int nr_pages = folio_nr_pages(folio); 252 253 zone_stat_mod_folio(folio, NR_MLOCK, nr_pages); 254 __count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages); 255 } 256 257 folio_get(folio); 258 if (!pagevec_add(pvec, mlock_lru(&folio->page)) || 259 folio_test_large(folio) || lru_cache_disabled()) 260 mlock_pagevec(pvec); 261 local_unlock(&mlock_pvec.lock); 262 } 263 264 /** 265 * mlock_new_page - mlock a newly allocated page not yet on LRU 266 * @page: page to be mlocked, either a normal page or a THP head. 267 */ 268 void mlock_new_page(struct page *page) 269 { 270 struct pagevec *pvec; 271 int nr_pages = thp_nr_pages(page); 272 273 local_lock(&mlock_pvec.lock); 274 pvec = this_cpu_ptr(&mlock_pvec.vec); 275 SetPageMlocked(page); 276 mod_zone_page_state(page_zone(page), NR_MLOCK, nr_pages); 277 __count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages); 278 279 get_page(page); 280 if (!pagevec_add(pvec, mlock_new(page)) || 281 PageHead(page) || lru_cache_disabled()) 282 mlock_pagevec(pvec); 283 local_unlock(&mlock_pvec.lock); 284 } 285 286 /** 287 * munlock_page - munlock a page 288 * @page: page to be munlocked, either a normal page or a THP head. 289 */ 290 void munlock_page(struct page *page) 291 { 292 struct pagevec *pvec; 293 294 local_lock(&mlock_pvec.lock); 295 pvec = this_cpu_ptr(&mlock_pvec.vec); 296 /* 297 * TestClearPageMlocked(page) must be left to __munlock_page(), 298 * which will check whether the page is multiply mlocked. 299 */ 300 301 get_page(page); 302 if (!pagevec_add(pvec, page) || 303 PageHead(page) || lru_cache_disabled()) 304 mlock_pagevec(pvec); 305 local_unlock(&mlock_pvec.lock); 306 } 307 308 static int mlock_pte_range(pmd_t *pmd, unsigned long addr, 309 unsigned long end, struct mm_walk *walk) 310 311 { 312 struct vm_area_struct *vma = walk->vma; 313 spinlock_t *ptl; 314 pte_t *start_pte, *pte; 315 struct page *page; 316 317 ptl = pmd_trans_huge_lock(pmd, vma); 318 if (ptl) { 319 if (!pmd_present(*pmd)) 320 goto out; 321 if (is_huge_zero_pmd(*pmd)) 322 goto out; 323 page = pmd_page(*pmd); 324 if (vma->vm_flags & VM_LOCKED) 325 mlock_folio(page_folio(page)); 326 else 327 munlock_page(page); 328 goto out; 329 } 330 331 start_pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 332 for (pte = start_pte; addr != end; pte++, addr += PAGE_SIZE) { 333 if (!pte_present(*pte)) 334 continue; 335 page = vm_normal_page(vma, addr, *pte); 336 if (!page || is_zone_device_page(page)) 337 continue; 338 if (PageTransCompound(page)) 339 continue; 340 if (vma->vm_flags & VM_LOCKED) 341 mlock_folio(page_folio(page)); 342 else 343 munlock_page(page); 344 } 345 pte_unmap(start_pte); 346 out: 347 spin_unlock(ptl); 348 cond_resched(); 349 return 0; 350 } 351 352 /* 353 * mlock_vma_pages_range() - mlock any pages already in the range, 354 * or munlock all pages in the range. 355 * @vma - vma containing range to be mlock()ed or munlock()ed 356 * @start - start address in @vma of the range 357 * @end - end of range in @vma 358 * @newflags - the new set of flags for @vma. 359 * 360 * Called for mlock(), mlock2() and mlockall(), to set @vma VM_LOCKED; 361 * called for munlock() and munlockall(), to clear VM_LOCKED from @vma. 362 */ 363 static void mlock_vma_pages_range(struct vm_area_struct *vma, 364 unsigned long start, unsigned long end, vm_flags_t newflags) 365 { 366 static const struct mm_walk_ops mlock_walk_ops = { 367 .pmd_entry = mlock_pte_range, 368 }; 369 370 /* 371 * There is a slight chance that concurrent page migration, 372 * or page reclaim finding a page of this now-VM_LOCKED vma, 373 * will call mlock_vma_page() and raise page's mlock_count: 374 * double counting, leaving the page unevictable indefinitely. 375 * Communicate this danger to mlock_vma_page() with VM_IO, 376 * which is a VM_SPECIAL flag not allowed on VM_LOCKED vmas. 377 * mmap_lock is held in write mode here, so this weird 378 * combination should not be visible to other mmap_lock users; 379 * but WRITE_ONCE so rmap walkers must see VM_IO if VM_LOCKED. 380 */ 381 if (newflags & VM_LOCKED) 382 newflags |= VM_IO; 383 WRITE_ONCE(vma->vm_flags, newflags); 384 385 lru_add_drain(); 386 walk_page_range(vma->vm_mm, start, end, &mlock_walk_ops, NULL); 387 lru_add_drain(); 388 389 if (newflags & VM_IO) { 390 newflags &= ~VM_IO; 391 WRITE_ONCE(vma->vm_flags, newflags); 392 } 393 } 394 395 /* 396 * mlock_fixup - handle mlock[all]/munlock[all] requests. 397 * 398 * Filters out "special" vmas -- VM_LOCKED never gets set for these, and 399 * munlock is a no-op. However, for some special vmas, we go ahead and 400 * populate the ptes. 401 * 402 * For vmas that pass the filters, merge/split as appropriate. 403 */ 404 static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev, 405 unsigned long start, unsigned long end, vm_flags_t newflags) 406 { 407 struct mm_struct *mm = vma->vm_mm; 408 pgoff_t pgoff; 409 int nr_pages; 410 int ret = 0; 411 vm_flags_t oldflags = vma->vm_flags; 412 413 if (newflags == oldflags || (oldflags & VM_SPECIAL) || 414 is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm) || 415 vma_is_dax(vma) || vma_is_secretmem(vma)) 416 /* don't set VM_LOCKED or VM_LOCKONFAULT and don't count */ 417 goto out; 418 419 pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); 420 *prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma, 421 vma->vm_file, pgoff, vma_policy(vma), 422 vma->vm_userfaultfd_ctx, anon_vma_name(vma)); 423 if (*prev) { 424 vma = *prev; 425 goto success; 426 } 427 428 if (start != vma->vm_start) { 429 ret = split_vma(mm, vma, start, 1); 430 if (ret) 431 goto out; 432 } 433 434 if (end != vma->vm_end) { 435 ret = split_vma(mm, vma, end, 0); 436 if (ret) 437 goto out; 438 } 439 440 success: 441 /* 442 * Keep track of amount of locked VM. 443 */ 444 nr_pages = (end - start) >> PAGE_SHIFT; 445 if (!(newflags & VM_LOCKED)) 446 nr_pages = -nr_pages; 447 else if (oldflags & VM_LOCKED) 448 nr_pages = 0; 449 mm->locked_vm += nr_pages; 450 451 /* 452 * vm_flags is protected by the mmap_lock held in write mode. 453 * It's okay if try_to_unmap_one unmaps a page just after we 454 * set VM_LOCKED, populate_vma_page_range will bring it back. 455 */ 456 457 if ((newflags & VM_LOCKED) && (oldflags & VM_LOCKED)) { 458 /* No work to do, and mlocking twice would be wrong */ 459 vma->vm_flags = newflags; 460 } else { 461 mlock_vma_pages_range(vma, start, end, newflags); 462 } 463 out: 464 *prev = vma; 465 return ret; 466 } 467 468 static int apply_vma_lock_flags(unsigned long start, size_t len, 469 vm_flags_t flags) 470 { 471 unsigned long nstart, end, tmp; 472 struct vm_area_struct *vma, *prev; 473 int error; 474 MA_STATE(mas, ¤t->mm->mm_mt, start, start); 475 476 VM_BUG_ON(offset_in_page(start)); 477 VM_BUG_ON(len != PAGE_ALIGN(len)); 478 end = start + len; 479 if (end < start) 480 return -EINVAL; 481 if (end == start) 482 return 0; 483 vma = mas_walk(&mas); 484 if (!vma) 485 return -ENOMEM; 486 487 if (start > vma->vm_start) 488 prev = vma; 489 else 490 prev = mas_prev(&mas, 0); 491 492 for (nstart = start ; ; ) { 493 vm_flags_t newflags = vma->vm_flags & VM_LOCKED_CLEAR_MASK; 494 495 newflags |= flags; 496 497 /* Here we know that vma->vm_start <= nstart < vma->vm_end. */ 498 tmp = vma->vm_end; 499 if (tmp > end) 500 tmp = end; 501 error = mlock_fixup(vma, &prev, nstart, tmp, newflags); 502 if (error) 503 break; 504 nstart = tmp; 505 if (nstart < prev->vm_end) 506 nstart = prev->vm_end; 507 if (nstart >= end) 508 break; 509 510 vma = find_vma(prev->vm_mm, prev->vm_end); 511 if (!vma || vma->vm_start != nstart) { 512 error = -ENOMEM; 513 break; 514 } 515 } 516 return error; 517 } 518 519 /* 520 * Go through vma areas and sum size of mlocked 521 * vma pages, as return value. 522 * Note deferred memory locking case(mlock2(,,MLOCK_ONFAULT) 523 * is also counted. 524 * Return value: previously mlocked page counts 525 */ 526 static unsigned long count_mm_mlocked_page_nr(struct mm_struct *mm, 527 unsigned long start, size_t len) 528 { 529 struct vm_area_struct *vma; 530 unsigned long count = 0; 531 unsigned long end; 532 VMA_ITERATOR(vmi, mm, start); 533 534 /* Don't overflow past ULONG_MAX */ 535 if (unlikely(ULONG_MAX - len < start)) 536 end = ULONG_MAX; 537 else 538 end = start + len; 539 540 for_each_vma_range(vmi, vma, end) { 541 if (vma->vm_flags & VM_LOCKED) { 542 if (start > vma->vm_start) 543 count -= (start - vma->vm_start); 544 if (end < vma->vm_end) { 545 count += end - vma->vm_start; 546 break; 547 } 548 count += vma->vm_end - vma->vm_start; 549 } 550 } 551 552 return count >> PAGE_SHIFT; 553 } 554 555 /* 556 * convert get_user_pages() return value to posix mlock() error 557 */ 558 static int __mlock_posix_error_return(long retval) 559 { 560 if (retval == -EFAULT) 561 retval = -ENOMEM; 562 else if (retval == -ENOMEM) 563 retval = -EAGAIN; 564 return retval; 565 } 566 567 static __must_check int do_mlock(unsigned long start, size_t len, vm_flags_t flags) 568 { 569 unsigned long locked; 570 unsigned long lock_limit; 571 int error = -ENOMEM; 572 573 start = untagged_addr(start); 574 575 if (!can_do_mlock()) 576 return -EPERM; 577 578 len = PAGE_ALIGN(len + (offset_in_page(start))); 579 start &= PAGE_MASK; 580 581 lock_limit = rlimit(RLIMIT_MEMLOCK); 582 lock_limit >>= PAGE_SHIFT; 583 locked = len >> PAGE_SHIFT; 584 585 if (mmap_write_lock_killable(current->mm)) 586 return -EINTR; 587 588 locked += current->mm->locked_vm; 589 if ((locked > lock_limit) && (!capable(CAP_IPC_LOCK))) { 590 /* 591 * It is possible that the regions requested intersect with 592 * previously mlocked areas, that part area in "mm->locked_vm" 593 * should not be counted to new mlock increment count. So check 594 * and adjust locked count if necessary. 595 */ 596 locked -= count_mm_mlocked_page_nr(current->mm, 597 start, len); 598 } 599 600 /* check against resource limits */ 601 if ((locked <= lock_limit) || capable(CAP_IPC_LOCK)) 602 error = apply_vma_lock_flags(start, len, flags); 603 604 mmap_write_unlock(current->mm); 605 if (error) 606 return error; 607 608 error = __mm_populate(start, len, 0); 609 if (error) 610 return __mlock_posix_error_return(error); 611 return 0; 612 } 613 614 SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len) 615 { 616 return do_mlock(start, len, VM_LOCKED); 617 } 618 619 SYSCALL_DEFINE3(mlock2, unsigned long, start, size_t, len, int, flags) 620 { 621 vm_flags_t vm_flags = VM_LOCKED; 622 623 if (flags & ~MLOCK_ONFAULT) 624 return -EINVAL; 625 626 if (flags & MLOCK_ONFAULT) 627 vm_flags |= VM_LOCKONFAULT; 628 629 return do_mlock(start, len, vm_flags); 630 } 631 632 SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len) 633 { 634 int ret; 635 636 start = untagged_addr(start); 637 638 len = PAGE_ALIGN(len + (offset_in_page(start))); 639 start &= PAGE_MASK; 640 641 if (mmap_write_lock_killable(current->mm)) 642 return -EINTR; 643 ret = apply_vma_lock_flags(start, len, 0); 644 mmap_write_unlock(current->mm); 645 646 return ret; 647 } 648 649 /* 650 * Take the MCL_* flags passed into mlockall (or 0 if called from munlockall) 651 * and translate into the appropriate modifications to mm->def_flags and/or the 652 * flags for all current VMAs. 653 * 654 * There are a couple of subtleties with this. If mlockall() is called multiple 655 * times with different flags, the values do not necessarily stack. If mlockall 656 * is called once including the MCL_FUTURE flag and then a second time without 657 * it, VM_LOCKED and VM_LOCKONFAULT will be cleared from mm->def_flags. 658 */ 659 static int apply_mlockall_flags(int flags) 660 { 661 MA_STATE(mas, ¤t->mm->mm_mt, 0, 0); 662 struct vm_area_struct *vma, *prev = NULL; 663 vm_flags_t to_add = 0; 664 665 current->mm->def_flags &= VM_LOCKED_CLEAR_MASK; 666 if (flags & MCL_FUTURE) { 667 current->mm->def_flags |= VM_LOCKED; 668 669 if (flags & MCL_ONFAULT) 670 current->mm->def_flags |= VM_LOCKONFAULT; 671 672 if (!(flags & MCL_CURRENT)) 673 goto out; 674 } 675 676 if (flags & MCL_CURRENT) { 677 to_add |= VM_LOCKED; 678 if (flags & MCL_ONFAULT) 679 to_add |= VM_LOCKONFAULT; 680 } 681 682 mas_for_each(&mas, vma, ULONG_MAX) { 683 vm_flags_t newflags; 684 685 newflags = vma->vm_flags & VM_LOCKED_CLEAR_MASK; 686 newflags |= to_add; 687 688 /* Ignore errors */ 689 mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags); 690 mas_pause(&mas); 691 cond_resched(); 692 } 693 out: 694 return 0; 695 } 696 697 SYSCALL_DEFINE1(mlockall, int, flags) 698 { 699 unsigned long lock_limit; 700 int ret; 701 702 if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT)) || 703 flags == MCL_ONFAULT) 704 return -EINVAL; 705 706 if (!can_do_mlock()) 707 return -EPERM; 708 709 lock_limit = rlimit(RLIMIT_MEMLOCK); 710 lock_limit >>= PAGE_SHIFT; 711 712 if (mmap_write_lock_killable(current->mm)) 713 return -EINTR; 714 715 ret = -ENOMEM; 716 if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) || 717 capable(CAP_IPC_LOCK)) 718 ret = apply_mlockall_flags(flags); 719 mmap_write_unlock(current->mm); 720 if (!ret && (flags & MCL_CURRENT)) 721 mm_populate(0, TASK_SIZE); 722 723 return ret; 724 } 725 726 SYSCALL_DEFINE0(munlockall) 727 { 728 int ret; 729 730 if (mmap_write_lock_killable(current->mm)) 731 return -EINTR; 732 ret = apply_mlockall_flags(0); 733 mmap_write_unlock(current->mm); 734 return ret; 735 } 736 737 /* 738 * Objects with different lifetime than processes (SHM_LOCK and SHM_HUGETLB 739 * shm segments) get accounted against the user_struct instead. 740 */ 741 static DEFINE_SPINLOCK(shmlock_user_lock); 742 743 int user_shm_lock(size_t size, struct ucounts *ucounts) 744 { 745 unsigned long lock_limit, locked; 746 long memlock; 747 int allowed = 0; 748 749 locked = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; 750 lock_limit = rlimit(RLIMIT_MEMLOCK); 751 if (lock_limit != RLIM_INFINITY) 752 lock_limit >>= PAGE_SHIFT; 753 spin_lock(&shmlock_user_lock); 754 memlock = inc_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked); 755 756 if ((memlock == LONG_MAX || memlock > lock_limit) && !capable(CAP_IPC_LOCK)) { 757 dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked); 758 goto out; 759 } 760 if (!get_ucounts(ucounts)) { 761 dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked); 762 allowed = 0; 763 goto out; 764 } 765 allowed = 1; 766 out: 767 spin_unlock(&shmlock_user_lock); 768 return allowed; 769 } 770 771 void user_shm_unlock(size_t size, struct ucounts *ucounts) 772 { 773 spin_lock(&shmlock_user_lock); 774 dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, (size + PAGE_SIZE - 1) >> PAGE_SHIFT); 775 spin_unlock(&shmlock_user_lock); 776 put_ucounts(ucounts); 777 } 778