1 /* 2 * linux/mm/madvise.c 3 * 4 * Copyright (C) 1999 Linus Torvalds 5 * Copyright (C) 2002 Christoph Hellwig 6 */ 7 8 #include <linux/mman.h> 9 #include <linux/pagemap.h> 10 #include <linux/syscalls.h> 11 #include <linux/mempolicy.h> 12 #include <linux/page-isolation.h> 13 #include <linux/userfaultfd_k.h> 14 #include <linux/hugetlb.h> 15 #include <linux/falloc.h> 16 #include <linux/sched.h> 17 #include <linux/ksm.h> 18 #include <linux/fs.h> 19 #include <linux/file.h> 20 #include <linux/blkdev.h> 21 #include <linux/backing-dev.h> 22 #include <linux/swap.h> 23 #include <linux/swapops.h> 24 #include <linux/mmu_notifier.h> 25 26 #include <asm/tlb.h> 27 28 /* 29 * Any behaviour which results in changes to the vma->vm_flags needs to 30 * take mmap_sem for writing. Others, which simply traverse vmas, need 31 * to only take it for reading. 32 */ 33 static int madvise_need_mmap_write(int behavior) 34 { 35 switch (behavior) { 36 case MADV_REMOVE: 37 case MADV_WILLNEED: 38 case MADV_DONTNEED: 39 case MADV_FREE: 40 return 0; 41 default: 42 /* be safe, default to 1. list exceptions explicitly */ 43 return 1; 44 } 45 } 46 47 /* 48 * We can potentially split a vm area into separate 49 * areas, each area with its own behavior. 50 */ 51 static long madvise_behavior(struct vm_area_struct *vma, 52 struct vm_area_struct **prev, 53 unsigned long start, unsigned long end, int behavior) 54 { 55 struct mm_struct *mm = vma->vm_mm; 56 int error = 0; 57 pgoff_t pgoff; 58 unsigned long new_flags = vma->vm_flags; 59 60 switch (behavior) { 61 case MADV_NORMAL: 62 new_flags = new_flags & ~VM_RAND_READ & ~VM_SEQ_READ; 63 break; 64 case MADV_SEQUENTIAL: 65 new_flags = (new_flags & ~VM_RAND_READ) | VM_SEQ_READ; 66 break; 67 case MADV_RANDOM: 68 new_flags = (new_flags & ~VM_SEQ_READ) | VM_RAND_READ; 69 break; 70 case MADV_DONTFORK: 71 new_flags |= VM_DONTCOPY; 72 break; 73 case MADV_DOFORK: 74 if (vma->vm_flags & VM_IO) { 75 error = -EINVAL; 76 goto out; 77 } 78 new_flags &= ~VM_DONTCOPY; 79 break; 80 case MADV_DONTDUMP: 81 new_flags |= VM_DONTDUMP; 82 break; 83 case MADV_DODUMP: 84 if (new_flags & VM_SPECIAL) { 85 error = -EINVAL; 86 goto out; 87 } 88 new_flags &= ~VM_DONTDUMP; 89 break; 90 case MADV_MERGEABLE: 91 case MADV_UNMERGEABLE: 92 error = ksm_madvise(vma, start, end, behavior, &new_flags); 93 if (error) 94 goto out; 95 break; 96 case MADV_HUGEPAGE: 97 case MADV_NOHUGEPAGE: 98 error = hugepage_madvise(vma, &new_flags, behavior); 99 if (error) 100 goto out; 101 break; 102 } 103 104 if (new_flags == vma->vm_flags) { 105 *prev = vma; 106 goto out; 107 } 108 109 pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); 110 *prev = vma_merge(mm, *prev, start, end, new_flags, vma->anon_vma, 111 vma->vm_file, pgoff, vma_policy(vma), 112 vma->vm_userfaultfd_ctx); 113 if (*prev) { 114 vma = *prev; 115 goto success; 116 } 117 118 *prev = vma; 119 120 if (start != vma->vm_start) { 121 error = split_vma(mm, vma, start, 1); 122 if (error) 123 goto out; 124 } 125 126 if (end != vma->vm_end) { 127 error = split_vma(mm, vma, end, 0); 128 if (error) 129 goto out; 130 } 131 132 success: 133 /* 134 * vm_flags is protected by the mmap_sem held in write mode. 135 */ 136 vma->vm_flags = new_flags; 137 138 out: 139 if (error == -ENOMEM) 140 error = -EAGAIN; 141 return error; 142 } 143 144 #ifdef CONFIG_SWAP 145 static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start, 146 unsigned long end, struct mm_walk *walk) 147 { 148 pte_t *orig_pte; 149 struct vm_area_struct *vma = walk->private; 150 unsigned long index; 151 152 if (pmd_none_or_trans_huge_or_clear_bad(pmd)) 153 return 0; 154 155 for (index = start; index != end; index += PAGE_SIZE) { 156 pte_t pte; 157 swp_entry_t entry; 158 struct page *page; 159 spinlock_t *ptl; 160 161 orig_pte = pte_offset_map_lock(vma->vm_mm, pmd, start, &ptl); 162 pte = *(orig_pte + ((index - start) / PAGE_SIZE)); 163 pte_unmap_unlock(orig_pte, ptl); 164 165 if (pte_present(pte) || pte_none(pte)) 166 continue; 167 entry = pte_to_swp_entry(pte); 168 if (unlikely(non_swap_entry(entry))) 169 continue; 170 171 page = read_swap_cache_async(entry, GFP_HIGHUSER_MOVABLE, 172 vma, index); 173 if (page) 174 put_page(page); 175 } 176 177 return 0; 178 } 179 180 static void force_swapin_readahead(struct vm_area_struct *vma, 181 unsigned long start, unsigned long end) 182 { 183 struct mm_walk walk = { 184 .mm = vma->vm_mm, 185 .pmd_entry = swapin_walk_pmd_entry, 186 .private = vma, 187 }; 188 189 walk_page_range(start, end, &walk); 190 191 lru_add_drain(); /* Push any new pages onto the LRU now */ 192 } 193 194 static void force_shm_swapin_readahead(struct vm_area_struct *vma, 195 unsigned long start, unsigned long end, 196 struct address_space *mapping) 197 { 198 pgoff_t index; 199 struct page *page; 200 swp_entry_t swap; 201 202 for (; start < end; start += PAGE_SIZE) { 203 index = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; 204 205 page = find_get_entry(mapping, index); 206 if (!radix_tree_exceptional_entry(page)) { 207 if (page) 208 put_page(page); 209 continue; 210 } 211 swap = radix_to_swp_entry(page); 212 page = read_swap_cache_async(swap, GFP_HIGHUSER_MOVABLE, 213 NULL, 0); 214 if (page) 215 put_page(page); 216 } 217 218 lru_add_drain(); /* Push any new pages onto the LRU now */ 219 } 220 #endif /* CONFIG_SWAP */ 221 222 /* 223 * Schedule all required I/O operations. Do not wait for completion. 224 */ 225 static long madvise_willneed(struct vm_area_struct *vma, 226 struct vm_area_struct **prev, 227 unsigned long start, unsigned long end) 228 { 229 struct file *file = vma->vm_file; 230 231 #ifdef CONFIG_SWAP 232 if (!file) { 233 *prev = vma; 234 force_swapin_readahead(vma, start, end); 235 return 0; 236 } 237 238 if (shmem_mapping(file->f_mapping)) { 239 *prev = vma; 240 force_shm_swapin_readahead(vma, start, end, 241 file->f_mapping); 242 return 0; 243 } 244 #else 245 if (!file) 246 return -EBADF; 247 #endif 248 249 if (IS_DAX(file_inode(file))) { 250 /* no bad return value, but ignore advice */ 251 return 0; 252 } 253 254 *prev = vma; 255 start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; 256 if (end > vma->vm_end) 257 end = vma->vm_end; 258 end = ((end - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; 259 260 force_page_cache_readahead(file->f_mapping, file, start, end - start); 261 return 0; 262 } 263 264 static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, 265 unsigned long end, struct mm_walk *walk) 266 267 { 268 struct mmu_gather *tlb = walk->private; 269 struct mm_struct *mm = tlb->mm; 270 struct vm_area_struct *vma = walk->vma; 271 spinlock_t *ptl; 272 pte_t *orig_pte, *pte, ptent; 273 struct page *page; 274 int nr_swap = 0; 275 unsigned long next; 276 277 next = pmd_addr_end(addr, end); 278 if (pmd_trans_huge(*pmd)) 279 if (madvise_free_huge_pmd(tlb, vma, pmd, addr, next)) 280 goto next; 281 282 if (pmd_trans_unstable(pmd)) 283 return 0; 284 285 tlb_remove_check_page_size_change(tlb, PAGE_SIZE); 286 orig_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl); 287 arch_enter_lazy_mmu_mode(); 288 for (; addr != end; pte++, addr += PAGE_SIZE) { 289 ptent = *pte; 290 291 if (pte_none(ptent)) 292 continue; 293 /* 294 * If the pte has swp_entry, just clear page table to 295 * prevent swap-in which is more expensive rather than 296 * (page allocation + zeroing). 297 */ 298 if (!pte_present(ptent)) { 299 swp_entry_t entry; 300 301 entry = pte_to_swp_entry(ptent); 302 if (non_swap_entry(entry)) 303 continue; 304 nr_swap--; 305 free_swap_and_cache(entry); 306 pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); 307 continue; 308 } 309 310 page = vm_normal_page(vma, addr, ptent); 311 if (!page) 312 continue; 313 314 /* 315 * If pmd isn't transhuge but the page is THP and 316 * is owned by only this process, split it and 317 * deactivate all pages. 318 */ 319 if (PageTransCompound(page)) { 320 if (page_mapcount(page) != 1) 321 goto out; 322 get_page(page); 323 if (!trylock_page(page)) { 324 put_page(page); 325 goto out; 326 } 327 pte_unmap_unlock(orig_pte, ptl); 328 if (split_huge_page(page)) { 329 unlock_page(page); 330 put_page(page); 331 pte_offset_map_lock(mm, pmd, addr, &ptl); 332 goto out; 333 } 334 put_page(page); 335 unlock_page(page); 336 pte = pte_offset_map_lock(mm, pmd, addr, &ptl); 337 pte--; 338 addr -= PAGE_SIZE; 339 continue; 340 } 341 342 VM_BUG_ON_PAGE(PageTransCompound(page), page); 343 344 if (PageSwapCache(page) || PageDirty(page)) { 345 if (!trylock_page(page)) 346 continue; 347 /* 348 * If page is shared with others, we couldn't clear 349 * PG_dirty of the page. 350 */ 351 if (page_mapcount(page) != 1) { 352 unlock_page(page); 353 continue; 354 } 355 356 if (PageSwapCache(page) && !try_to_free_swap(page)) { 357 unlock_page(page); 358 continue; 359 } 360 361 ClearPageDirty(page); 362 unlock_page(page); 363 } 364 365 if (pte_young(ptent) || pte_dirty(ptent)) { 366 /* 367 * Some of architecture(ex, PPC) don't update TLB 368 * with set_pte_at and tlb_remove_tlb_entry so for 369 * the portability, remap the pte with old|clean 370 * after pte clearing. 371 */ 372 ptent = ptep_get_and_clear_full(mm, addr, pte, 373 tlb->fullmm); 374 375 ptent = pte_mkold(ptent); 376 ptent = pte_mkclean(ptent); 377 set_pte_at(mm, addr, pte, ptent); 378 if (PageActive(page)) 379 deactivate_page(page); 380 tlb_remove_tlb_entry(tlb, pte, addr); 381 } 382 } 383 out: 384 if (nr_swap) { 385 if (current->mm == mm) 386 sync_mm_rss(mm); 387 388 add_mm_counter(mm, MM_SWAPENTS, nr_swap); 389 } 390 arch_leave_lazy_mmu_mode(); 391 pte_unmap_unlock(orig_pte, ptl); 392 cond_resched(); 393 next: 394 return 0; 395 } 396 397 static void madvise_free_page_range(struct mmu_gather *tlb, 398 struct vm_area_struct *vma, 399 unsigned long addr, unsigned long end) 400 { 401 struct mm_walk free_walk = { 402 .pmd_entry = madvise_free_pte_range, 403 .mm = vma->vm_mm, 404 .private = tlb, 405 }; 406 407 tlb_start_vma(tlb, vma); 408 walk_page_range(addr, end, &free_walk); 409 tlb_end_vma(tlb, vma); 410 } 411 412 static int madvise_free_single_vma(struct vm_area_struct *vma, 413 unsigned long start_addr, unsigned long end_addr) 414 { 415 unsigned long start, end; 416 struct mm_struct *mm = vma->vm_mm; 417 struct mmu_gather tlb; 418 419 if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP)) 420 return -EINVAL; 421 422 /* MADV_FREE works for only anon vma at the moment */ 423 if (!vma_is_anonymous(vma)) 424 return -EINVAL; 425 426 start = max(vma->vm_start, start_addr); 427 if (start >= vma->vm_end) 428 return -EINVAL; 429 end = min(vma->vm_end, end_addr); 430 if (end <= vma->vm_start) 431 return -EINVAL; 432 433 lru_add_drain(); 434 tlb_gather_mmu(&tlb, mm, start, end); 435 update_hiwater_rss(mm); 436 437 mmu_notifier_invalidate_range_start(mm, start, end); 438 madvise_free_page_range(&tlb, vma, start, end); 439 mmu_notifier_invalidate_range_end(mm, start, end); 440 tlb_finish_mmu(&tlb, start, end); 441 442 return 0; 443 } 444 445 static long madvise_free(struct vm_area_struct *vma, 446 struct vm_area_struct **prev, 447 unsigned long start, unsigned long end) 448 { 449 *prev = vma; 450 return madvise_free_single_vma(vma, start, end); 451 } 452 453 /* 454 * Application no longer needs these pages. If the pages are dirty, 455 * it's OK to just throw them away. The app will be more careful about 456 * data it wants to keep. Be sure to free swap resources too. The 457 * zap_page_range call sets things up for shrink_active_list to actually free 458 * these pages later if no one else has touched them in the meantime, 459 * although we could add these pages to a global reuse list for 460 * shrink_active_list to pick up before reclaiming other pages. 461 * 462 * NB: This interface discards data rather than pushes it out to swap, 463 * as some implementations do. This has performance implications for 464 * applications like large transactional databases which want to discard 465 * pages in anonymous maps after committing to backing store the data 466 * that was kept in them. There is no reason to write this data out to 467 * the swap area if the application is discarding it. 468 * 469 * An interface that causes the system to free clean pages and flush 470 * dirty pages is already available as msync(MS_INVALIDATE). 471 */ 472 static long madvise_dontneed(struct vm_area_struct *vma, 473 struct vm_area_struct **prev, 474 unsigned long start, unsigned long end) 475 { 476 *prev = vma; 477 if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP)) 478 return -EINVAL; 479 480 zap_page_range(vma, start, end - start, NULL); 481 madvise_userfault_dontneed(vma, prev, start, end); 482 return 0; 483 } 484 485 /* 486 * Application wants to free up the pages and associated backing store. 487 * This is effectively punching a hole into the middle of a file. 488 */ 489 static long madvise_remove(struct vm_area_struct *vma, 490 struct vm_area_struct **prev, 491 unsigned long start, unsigned long end) 492 { 493 loff_t offset; 494 int error; 495 struct file *f; 496 497 *prev = NULL; /* tell sys_madvise we drop mmap_sem */ 498 499 if (vma->vm_flags & VM_LOCKED) 500 return -EINVAL; 501 502 f = vma->vm_file; 503 504 if (!f || !f->f_mapping || !f->f_mapping->host) { 505 return -EINVAL; 506 } 507 508 if ((vma->vm_flags & (VM_SHARED|VM_WRITE)) != (VM_SHARED|VM_WRITE)) 509 return -EACCES; 510 511 offset = (loff_t)(start - vma->vm_start) 512 + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); 513 514 /* 515 * Filesystem's fallocate may need to take i_mutex. We need to 516 * explicitly grab a reference because the vma (and hence the 517 * vma's reference to the file) can go away as soon as we drop 518 * mmap_sem. 519 */ 520 get_file(f); 521 up_read(¤t->mm->mmap_sem); 522 error = vfs_fallocate(f, 523 FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 524 offset, end - start); 525 fput(f); 526 down_read(¤t->mm->mmap_sem); 527 return error; 528 } 529 530 #ifdef CONFIG_MEMORY_FAILURE 531 /* 532 * Error injection support for memory error handling. 533 */ 534 static int madvise_hwpoison(int bhv, unsigned long start, unsigned long end) 535 { 536 struct page *p; 537 if (!capable(CAP_SYS_ADMIN)) 538 return -EPERM; 539 for (; start < end; start += PAGE_SIZE << 540 compound_order(compound_head(p))) { 541 int ret; 542 543 ret = get_user_pages_fast(start, 1, 0, &p); 544 if (ret != 1) 545 return ret; 546 547 if (PageHWPoison(p)) { 548 put_page(p); 549 continue; 550 } 551 if (bhv == MADV_SOFT_OFFLINE) { 552 pr_info("Soft offlining page %#lx at %#lx\n", 553 page_to_pfn(p), start); 554 ret = soft_offline_page(p, MF_COUNT_INCREASED); 555 if (ret) 556 return ret; 557 continue; 558 } 559 pr_info("Injecting memory failure for page %#lx at %#lx\n", 560 page_to_pfn(p), start); 561 ret = memory_failure(page_to_pfn(p), 0, MF_COUNT_INCREASED); 562 if (ret) 563 return ret; 564 } 565 return 0; 566 } 567 #endif 568 569 static long 570 madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, 571 unsigned long start, unsigned long end, int behavior) 572 { 573 switch (behavior) { 574 case MADV_REMOVE: 575 return madvise_remove(vma, prev, start, end); 576 case MADV_WILLNEED: 577 return madvise_willneed(vma, prev, start, end); 578 case MADV_FREE: 579 /* 580 * XXX: In this implementation, MADV_FREE works like 581 * MADV_DONTNEED on swapless system or full swap. 582 */ 583 if (get_nr_swap_pages() > 0) 584 return madvise_free(vma, prev, start, end); 585 /* passthrough */ 586 case MADV_DONTNEED: 587 return madvise_dontneed(vma, prev, start, end); 588 default: 589 return madvise_behavior(vma, prev, start, end, behavior); 590 } 591 } 592 593 static bool 594 madvise_behavior_valid(int behavior) 595 { 596 switch (behavior) { 597 case MADV_DOFORK: 598 case MADV_DONTFORK: 599 case MADV_NORMAL: 600 case MADV_SEQUENTIAL: 601 case MADV_RANDOM: 602 case MADV_REMOVE: 603 case MADV_WILLNEED: 604 case MADV_DONTNEED: 605 case MADV_FREE: 606 #ifdef CONFIG_KSM 607 case MADV_MERGEABLE: 608 case MADV_UNMERGEABLE: 609 #endif 610 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 611 case MADV_HUGEPAGE: 612 case MADV_NOHUGEPAGE: 613 #endif 614 case MADV_DONTDUMP: 615 case MADV_DODUMP: 616 return true; 617 618 default: 619 return false; 620 } 621 } 622 623 /* 624 * The madvise(2) system call. 625 * 626 * Applications can use madvise() to advise the kernel how it should 627 * handle paging I/O in this VM area. The idea is to help the kernel 628 * use appropriate read-ahead and caching techniques. The information 629 * provided is advisory only, and can be safely disregarded by the 630 * kernel without affecting the correct operation of the application. 631 * 632 * behavior values: 633 * MADV_NORMAL - the default behavior is to read clusters. This 634 * results in some read-ahead and read-behind. 635 * MADV_RANDOM - the system should read the minimum amount of data 636 * on any access, since it is unlikely that the appli- 637 * cation will need more than what it asks for. 638 * MADV_SEQUENTIAL - pages in the given range will probably be accessed 639 * once, so they can be aggressively read ahead, and 640 * can be freed soon after they are accessed. 641 * MADV_WILLNEED - the application is notifying the system to read 642 * some pages ahead. 643 * MADV_DONTNEED - the application is finished with the given range, 644 * so the kernel can free resources associated with it. 645 * MADV_FREE - the application marks pages in the given range as lazy free, 646 * where actual purges are postponed until memory pressure happens. 647 * MADV_REMOVE - the application wants to free up the given range of 648 * pages and associated backing store. 649 * MADV_DONTFORK - omit this area from child's address space when forking: 650 * typically, to avoid COWing pages pinned by get_user_pages(). 651 * MADV_DOFORK - cancel MADV_DONTFORK: no longer omit this area when forking. 652 * MADV_HWPOISON - trigger memory error handler as if the given memory range 653 * were corrupted by unrecoverable hardware memory failure. 654 * MADV_SOFT_OFFLINE - try to soft-offline the given range of memory. 655 * MADV_MERGEABLE - the application recommends that KSM try to merge pages in 656 * this area with pages of identical content from other such areas. 657 * MADV_UNMERGEABLE- cancel MADV_MERGEABLE: no longer merge pages with others. 658 * MADV_HUGEPAGE - the application wants to back the given range by transparent 659 * huge pages in the future. Existing pages might be coalesced and 660 * new pages might be allocated as THP. 661 * MADV_NOHUGEPAGE - mark the given range as not worth being backed by 662 * transparent huge pages so the existing pages will not be 663 * coalesced into THP and new pages will not be allocated as THP. 664 * MADV_DONTDUMP - the application wants to prevent pages in the given range 665 * from being included in its core dump. 666 * MADV_DODUMP - cancel MADV_DONTDUMP: no longer exclude from core dump. 667 * 668 * return values: 669 * zero - success 670 * -EINVAL - start + len < 0, start is not page-aligned, 671 * "behavior" is not a valid value, or application 672 * is attempting to release locked or shared pages. 673 * -ENOMEM - addresses in the specified range are not currently 674 * mapped, or are outside the AS of the process. 675 * -EIO - an I/O error occurred while paging in data. 676 * -EBADF - map exists, but area maps something that isn't a file. 677 * -EAGAIN - a kernel resource was temporarily unavailable. 678 */ 679 SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior) 680 { 681 unsigned long end, tmp; 682 struct vm_area_struct *vma, *prev; 683 int unmapped_error = 0; 684 int error = -EINVAL; 685 int write; 686 size_t len; 687 struct blk_plug plug; 688 689 #ifdef CONFIG_MEMORY_FAILURE 690 if (behavior == MADV_HWPOISON || behavior == MADV_SOFT_OFFLINE) 691 return madvise_hwpoison(behavior, start, start+len_in); 692 #endif 693 if (!madvise_behavior_valid(behavior)) 694 return error; 695 696 if (start & ~PAGE_MASK) 697 return error; 698 len = (len_in + ~PAGE_MASK) & PAGE_MASK; 699 700 /* Check to see whether len was rounded up from small -ve to zero */ 701 if (len_in && !len) 702 return error; 703 704 end = start + len; 705 if (end < start) 706 return error; 707 708 error = 0; 709 if (end == start) 710 return error; 711 712 write = madvise_need_mmap_write(behavior); 713 if (write) { 714 if (down_write_killable(¤t->mm->mmap_sem)) 715 return -EINTR; 716 } else { 717 down_read(¤t->mm->mmap_sem); 718 } 719 720 /* 721 * If the interval [start,end) covers some unmapped address 722 * ranges, just ignore them, but return -ENOMEM at the end. 723 * - different from the way of handling in mlock etc. 724 */ 725 vma = find_vma_prev(current->mm, start, &prev); 726 if (vma && start > vma->vm_start) 727 prev = vma; 728 729 blk_start_plug(&plug); 730 for (;;) { 731 /* Still start < end. */ 732 error = -ENOMEM; 733 if (!vma) 734 goto out; 735 736 /* Here start < (end|vma->vm_end). */ 737 if (start < vma->vm_start) { 738 unmapped_error = -ENOMEM; 739 start = vma->vm_start; 740 if (start >= end) 741 goto out; 742 } 743 744 /* Here vma->vm_start <= start < (end|vma->vm_end) */ 745 tmp = vma->vm_end; 746 if (end < tmp) 747 tmp = end; 748 749 /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */ 750 error = madvise_vma(vma, &prev, start, tmp, behavior); 751 if (error) 752 goto out; 753 start = tmp; 754 if (prev && start < prev->vm_end) 755 start = prev->vm_end; 756 error = unmapped_error; 757 if (start >= end) 758 goto out; 759 if (prev) 760 vma = prev->vm_next; 761 else /* madvise_remove dropped mmap_sem */ 762 vma = find_vma(current->mm, start); 763 } 764 out: 765 blk_finish_plug(&plug); 766 if (write) 767 up_write(¤t->mm->mmap_sem); 768 else 769 up_read(¤t->mm->mmap_sem); 770 771 return error; 772 } 773