madvise.c (57a30218fa25c469ed507964bbf028b7a064309a) madvise.c (7d4a8be0c4b2b7ffb367929d2b352651f083806b)
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * linux/mm/madvise.c
4 *
5 * Copyright (C) 1999 Linus Torvalds
6 * Copyright (C) 2002 Christoph Hellwig
7 */
8

--- 331 unchanged lines hidden (view full) ---

340{
341 struct madvise_walk_private *private = walk->private;
342 struct mmu_gather *tlb = private->tlb;
343 bool pageout = private->pageout;
344 struct mm_struct *mm = tlb->mm;
345 struct vm_area_struct *vma = walk->vma;
346 pte_t *orig_pte, *pte, ptent;
347 spinlock_t *ptl;
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * linux/mm/madvise.c
4 *
5 * Copyright (C) 1999 Linus Torvalds
6 * Copyright (C) 2002 Christoph Hellwig
7 */
8

--- 331 unchanged lines hidden (view full) ---

340{
341 struct madvise_walk_private *private = walk->private;
342 struct mmu_gather *tlb = private->tlb;
343 bool pageout = private->pageout;
344 struct mm_struct *mm = tlb->mm;
345 struct vm_area_struct *vma = walk->vma;
346 pte_t *orig_pte, *pte, ptent;
347 spinlock_t *ptl;
348 struct page *page = NULL;
349 LIST_HEAD(page_list);
348 struct folio *folio = NULL;
349 LIST_HEAD(folio_list);
350 bool pageout_anon_only_filter;
351
352 if (fatal_signal_pending(current))
353 return -EINTR;
354
355 pageout_anon_only_filter = pageout && !vma_is_anonymous(vma) &&
356 !can_do_file_pageout(vma);
357

--- 12 unchanged lines hidden (view full) ---

370 goto huge_unlock;
371
372 if (unlikely(!pmd_present(orig_pmd))) {
373 VM_BUG_ON(thp_migration_supported() &&
374 !is_pmd_migration_entry(orig_pmd));
375 goto huge_unlock;
376 }
377
350 bool pageout_anon_only_filter;
351
352 if (fatal_signal_pending(current))
353 return -EINTR;
354
355 pageout_anon_only_filter = pageout && !vma_is_anonymous(vma) &&
356 !can_do_file_pageout(vma);
357

--- 12 unchanged lines hidden (view full) ---

370 goto huge_unlock;
371
372 if (unlikely(!pmd_present(orig_pmd))) {
373 VM_BUG_ON(thp_migration_supported() &&
374 !is_pmd_migration_entry(orig_pmd));
375 goto huge_unlock;
376 }
377
378 page = pmd_page(orig_pmd);
378 folio = pfn_folio(pmd_pfn(orig_pmd));
379
379
380 /* Do not interfere with other mappings of this page */
381 if (page_mapcount(page) != 1)
380 /* Do not interfere with other mappings of this folio */
381 if (folio_mapcount(folio) != 1)
382 goto huge_unlock;
383
382 goto huge_unlock;
383
384 if (pageout_anon_only_filter && !PageAnon(page))
384 if (pageout_anon_only_filter && !folio_test_anon(folio))
385 goto huge_unlock;
386
387 if (next - addr != HPAGE_PMD_SIZE) {
388 int err;
389
385 goto huge_unlock;
386
387 if (next - addr != HPAGE_PMD_SIZE) {
388 int err;
389
390 get_page(page);
390 folio_get(folio);
391 spin_unlock(ptl);
391 spin_unlock(ptl);
392 lock_page(page);
393 err = split_huge_page(page);
394 unlock_page(page);
395 put_page(page);
392 folio_lock(folio);
393 err = split_folio(folio);
394 folio_unlock(folio);
395 folio_put(folio);
396 if (!err)
396 if (!err)
397 goto regular_page;
397 goto regular_folio;
398 return 0;
399 }
400
401 if (pmd_young(orig_pmd)) {
402 pmdp_invalidate(vma, addr, pmd);
403 orig_pmd = pmd_mkold(orig_pmd);
404
405 set_pmd_at(mm, addr, pmd, orig_pmd);
406 tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
407 }
408
398 return 0;
399 }
400
401 if (pmd_young(orig_pmd)) {
402 pmdp_invalidate(vma, addr, pmd);
403 orig_pmd = pmd_mkold(orig_pmd);
404
405 set_pmd_at(mm, addr, pmd, orig_pmd);
406 tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
407 }
408
409 ClearPageReferenced(page);
410 test_and_clear_page_young(page);
409 folio_clear_referenced(folio);
410 folio_test_clear_young(folio);
411 if (pageout) {
411 if (pageout) {
412 if (!isolate_lru_page(page)) {
413 if (PageUnevictable(page))
414 putback_lru_page(page);
412 if (!folio_isolate_lru(folio)) {
413 if (folio_test_unevictable(folio))
414 folio_putback_lru(folio);
415 else
415 else
416 list_add(&page->lru, &page_list);
416 list_add(&folio->lru, &folio_list);
417 }
418 } else
417 }
418 } else
419 deactivate_page(page);
419 folio_deactivate(folio);
420huge_unlock:
421 spin_unlock(ptl);
422 if (pageout)
420huge_unlock:
421 spin_unlock(ptl);
422 if (pageout)
423 reclaim_pages(&page_list);
423 reclaim_pages(&folio_list);
424 return 0;
425 }
426
424 return 0;
425 }
426
427regular_page:
427regular_folio:
428 if (pmd_trans_unstable(pmd))
429 return 0;
430#endif
431 tlb_change_page_size(tlb, PAGE_SIZE);
432 orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
433 flush_tlb_batched_pending(mm);
434 arch_enter_lazy_mmu_mode();
435 for (; addr < end; pte++, addr += PAGE_SIZE) {
436 ptent = *pte;
437
438 if (pte_none(ptent))
439 continue;
440
441 if (!pte_present(ptent))
442 continue;
443
428 if (pmd_trans_unstable(pmd))
429 return 0;
430#endif
431 tlb_change_page_size(tlb, PAGE_SIZE);
432 orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
433 flush_tlb_batched_pending(mm);
434 arch_enter_lazy_mmu_mode();
435 for (; addr < end; pte++, addr += PAGE_SIZE) {
436 ptent = *pte;
437
438 if (pte_none(ptent))
439 continue;
440
441 if (!pte_present(ptent))
442 continue;
443
444 page = vm_normal_page(vma, addr, ptent);
445 if (!page || is_zone_device_page(page))
444 folio = vm_normal_folio(vma, addr, ptent);
445 if (!folio || folio_is_zone_device(folio))
446 continue;
447
448 /*
449 * Creating a THP page is expensive so split it only if we
450 * are sure it's worth. Split it if we are only owner.
451 */
446 continue;
447
448 /*
449 * Creating a THP page is expensive so split it only if we
450 * are sure it's worth. Split it if we are only owner.
451 */
452 if (PageTransCompound(page)) {
453 if (page_mapcount(page) != 1)
452 if (folio_test_large(folio)) {
453 if (folio_mapcount(folio) != 1)
454 break;
454 break;
455 if (pageout_anon_only_filter && !PageAnon(page))
455 if (pageout_anon_only_filter && !folio_test_anon(folio))
456 break;
456 break;
457 get_page(page);
458 if (!trylock_page(page)) {
459 put_page(page);
457 folio_get(folio);
458 if (!folio_trylock(folio)) {
459 folio_put(folio);
460 break;
461 }
462 pte_unmap_unlock(orig_pte, ptl);
460 break;
461 }
462 pte_unmap_unlock(orig_pte, ptl);
463 if (split_huge_page(page)) {
464 unlock_page(page);
465 put_page(page);
463 if (split_folio(folio)) {
464 folio_unlock(folio);
465 folio_put(folio);
466 orig_pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
467 break;
468 }
466 orig_pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
467 break;
468 }
469 unlock_page(page);
470 put_page(page);
469 folio_unlock(folio);
470 folio_put(folio);
471 orig_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
472 pte--;
473 addr -= PAGE_SIZE;
474 continue;
475 }
476
477 /*
471 orig_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
472 pte--;
473 addr -= PAGE_SIZE;
474 continue;
475 }
476
477 /*
478 * Do not interfere with other mappings of this page and
479 * non-LRU page.
478 * Do not interfere with other mappings of this folio and
479 * non-LRU folio.
480 */
480 */
481 if (!PageLRU(page) || page_mapcount(page) != 1)
481 if (!folio_test_lru(folio) || folio_mapcount(folio) != 1)
482 continue;
483
482 continue;
483
484 if (pageout_anon_only_filter && !PageAnon(page))
484 if (pageout_anon_only_filter && !folio_test_anon(folio))
485 continue;
486
485 continue;
486
487 VM_BUG_ON_PAGE(PageTransCompound(page), page);
487 VM_BUG_ON_FOLIO(folio_test_large(folio), folio);
488
489 if (pte_young(ptent)) {
490 ptent = ptep_get_and_clear_full(mm, addr, pte,
491 tlb->fullmm);
492 ptent = pte_mkold(ptent);
493 set_pte_at(mm, addr, pte, ptent);
494 tlb_remove_tlb_entry(tlb, pte, addr);
495 }
496
497 /*
488
489 if (pte_young(ptent)) {
490 ptent = ptep_get_and_clear_full(mm, addr, pte,
491 tlb->fullmm);
492 ptent = pte_mkold(ptent);
493 set_pte_at(mm, addr, pte, ptent);
494 tlb_remove_tlb_entry(tlb, pte, addr);
495 }
496
497 /*
498 * We are deactivating a page for accelerating reclaiming.
499 * VM couldn't reclaim the page unless we clear PG_young.
498 * We are deactivating a folio for accelerating reclaiming.
499 * VM couldn't reclaim the folio unless we clear PG_young.
500 * As a side effect, it makes confuse idle-page tracking
501 * because they will miss recent referenced history.
502 */
500 * As a side effect, it makes confuse idle-page tracking
501 * because they will miss recent referenced history.
502 */
503 ClearPageReferenced(page);
504 test_and_clear_page_young(page);
503 folio_clear_referenced(folio);
504 folio_test_clear_young(folio);
505 if (pageout) {
505 if (pageout) {
506 if (!isolate_lru_page(page)) {
507 if (PageUnevictable(page))
508 putback_lru_page(page);
506 if (!folio_isolate_lru(folio)) {
507 if (folio_test_unevictable(folio))
508 folio_putback_lru(folio);
509 else
509 else
510 list_add(&page->lru, &page_list);
510 list_add(&folio->lru, &folio_list);
511 }
512 } else
511 }
512 } else
513 deactivate_page(page);
513 folio_deactivate(folio);
514 }
515
516 arch_leave_lazy_mmu_mode();
517 pte_unmap_unlock(orig_pte, ptl);
518 if (pageout)
514 }
515
516 arch_leave_lazy_mmu_mode();
517 pte_unmap_unlock(orig_pte, ptl);
518 if (pageout)
519 reclaim_pages(&page_list);
519 reclaim_pages(&folio_list);
520 cond_resched();
521
522 return 0;
523}
524
525static const struct mm_walk_ops cold_walk_ops = {
526 .pmd_entry = madvise_cold_or_pageout_pte_range,
527};

--- 195 unchanged lines hidden (view full) ---

723 ptent = ptep_get_and_clear_full(mm, addr, pte,
724 tlb->fullmm);
725
726 ptent = pte_mkold(ptent);
727 ptent = pte_mkclean(ptent);
728 set_pte_at(mm, addr, pte, ptent);
729 tlb_remove_tlb_entry(tlb, pte, addr);
730 }
520 cond_resched();
521
522 return 0;
523}
524
525static const struct mm_walk_ops cold_walk_ops = {
526 .pmd_entry = madvise_cold_or_pageout_pte_range,
527};

--- 195 unchanged lines hidden (view full) ---

723 ptent = ptep_get_and_clear_full(mm, addr, pte,
724 tlb->fullmm);
725
726 ptent = pte_mkold(ptent);
727 ptent = pte_mkclean(ptent);
728 set_pte_at(mm, addr, pte, ptent);
729 tlb_remove_tlb_entry(tlb, pte, addr);
730 }
731 mark_page_lazyfree(&folio->page);
731 folio_mark_lazyfree(folio);
732 }
733out:
734 if (nr_swap) {
735 if (current->mm == mm)
736 sync_mm_rss(mm);
737
738 add_mm_counter(mm, MM_SWAPENTS, nr_swap);
739 }

--- 20 unchanged lines hidden (view full) ---

760 return -EINVAL;
761
762 range.start = max(vma->vm_start, start_addr);
763 if (range.start >= vma->vm_end)
764 return -EINVAL;
765 range.end = min(vma->vm_end, end_addr);
766 if (range.end <= vma->vm_start)
767 return -EINVAL;
732 }
733out:
734 if (nr_swap) {
735 if (current->mm == mm)
736 sync_mm_rss(mm);
737
738 add_mm_counter(mm, MM_SWAPENTS, nr_swap);
739 }

--- 20 unchanged lines hidden (view full) ---

760 return -EINVAL;
761
762 range.start = max(vma->vm_start, start_addr);
763 if (range.start >= vma->vm_end)
764 return -EINVAL;
765 range.end = min(vma->vm_end, end_addr);
766 if (range.end <= vma->vm_start)
767 return -EINVAL;
768 mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm,
768 mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm,
769 range.start, range.end);
770
771 lru_add_drain();
772 tlb_gather_mmu(&tlb, mm);
773 update_hiwater_rss(mm);
774
775 mmu_notifier_invalidate_range_start(&range);
776 tlb_start_vma(&tlb, vma);

--- 753 unchanged lines hidden ---
769 range.start, range.end);
770
771 lru_add_drain();
772 tlb_gather_mmu(&tlb, mm);
773 update_hiwater_rss(mm);
774
775 mmu_notifier_invalidate_range_start(&range);
776 tlb_start_vma(&tlb, vma);

--- 753 unchanged lines hidden ---