linux/mm/mmu_gather.c

196d9d8bSPeter Zijlstra#include <linux/gfp.h>
196d9d8bSPeter Zijlstra#include <linux/highmem.h>
196d9d8bSPeter Zijlstra#include <linux/kernel.h>
196d9d8bSPeter Zijlstra#include <linux/mmdebug.h>
196d9d8bSPeter Zijlstra#include <linux/mm_types.h>
36090defSArnd Bergmann#include <linux/mm_inline.h>
196d9d8bSPeter Zijlstra#include <linux/pagemap.h>
196d9d8bSPeter Zijlstra#include <linux/rcupdate.h>
196d9d8bSPeter Zijlstra#include <linux/smp.h>
196d9d8bSPeter Zijlstra#include <linux/swap.h>
5df397deSLinus Torvalds#include <linux/rmap.h>
196d9d8bSPeter Zijlstra
196d9d8bSPeter Zijlstra#include <asm/pgalloc.h>
196d9d8bSPeter Zijlstra#include <asm/tlb.h>
196d9d8bSPeter Zijlstra
580a586cSPeter Zijlstra#ifndef CONFIG_MMU_GATHER_NO_GATHER
952a31c9SMartin Schwidefsky
196d9d8bSPeter Zijlstrastatic bool tlb_next_batch(struct mmu_gather *tlb)
196d9d8bSPeter Zijlstra{
196d9d8bSPeter Zijlstra	struct mmu_gather_batch *batch;
196d9d8bSPeter Zijlstra
c4745482SLinus Torvalds	/* Limit batching if we have delayed rmaps pending */
c4745482SLinus Torvalds	if (tlb->delayed_rmap && tlb->active != &tlb->local)
5df397deSLinus Torvalds		return false;
5df397deSLinus Torvalds
196d9d8bSPeter Zijlstra	batch = tlb->active;
196d9d8bSPeter Zijlstra	if (batch->next) {
196d9d8bSPeter Zijlstra		tlb->active = batch->next;
196d9d8bSPeter Zijlstra		return true;
196d9d8bSPeter Zijlstra	}
196d9d8bSPeter Zijlstra
196d9d8bSPeter Zijlstra	if (tlb->batch_count == MAX_GATHER_BATCH_COUNT)
196d9d8bSPeter Zijlstra		return false;
196d9d8bSPeter Zijlstra
dcc1be11SLorenzo Stoakes	batch = (void *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN);
196d9d8bSPeter Zijlstra	if (!batch)
196d9d8bSPeter Zijlstra		return false;
196d9d8bSPeter Zijlstra
196d9d8bSPeter Zijlstra	tlb->batch_count++;
196d9d8bSPeter Zijlstra	batch->next = NULL;
196d9d8bSPeter Zijlstra	batch->nr   = 0;
196d9d8bSPeter Zijlstra	batch->max  = MAX_GATHER_BATCH;
196d9d8bSPeter Zijlstra
196d9d8bSPeter Zijlstra	tlb->active->next = batch;
196d9d8bSPeter Zijlstra	tlb->active = batch;
196d9d8bSPeter Zijlstra
196d9d8bSPeter Zijlstra	return true;
196d9d8bSPeter Zijlstra}
196d9d8bSPeter Zijlstra
5df397deSLinus Torvalds#ifdef CONFIG_SMP
c4745482SLinus Torvaldsstatic void tlb_flush_rmap_batch(struct mmu_gather_batch *batch, struct vm_area_struct *vma)
5df397deSLinus Torvalds{
5df397deSLinus Torvalds	for (int i = 0; i < batch->nr; i++) {
5df397deSLinus Torvalds		struct encoded_page *enc = batch->encoded_pages[i];
5df397deSLinus Torvalds
5df397deSLinus Torvalds		if (encoded_page_flags(enc)) {
5df397deSLinus Torvalds			struct page *page = encoded_page_ptr(enc);
5df397deSLinus Torvalds			page_remove_rmap(page, vma, false);
5df397deSLinus Torvalds		}
5df397deSLinus Torvalds	}
c4745482SLinus Torvalds}
5df397deSLinus Torvalds
c4745482SLinus Torvalds/**
c4745482SLinus Torvalds * tlb_flush_rmaps - do pending rmap removals after we have flushed the TLB
c4745482SLinus Torvalds * @tlb: the current mmu_gather
*19134bc2SMatthew Wilcox (Oracle) * @vma: The memory area from which the pages are being removed.
c4745482SLinus Torvalds *
c4745482SLinus Torvalds * Note that because of how tlb_next_batch() above works, we will
c4745482SLinus Torvalds * never start multiple new batches with pending delayed rmaps, so
c4745482SLinus Torvalds * we only need to walk through the current active batch and the
c4745482SLinus Torvalds * original local one.
c4745482SLinus Torvalds */
c4745482SLinus Torvaldsvoid tlb_flush_rmaps(struct mmu_gather *tlb, struct vm_area_struct *vma)
c4745482SLinus Torvalds{
c4745482SLinus Torvalds	if (!tlb->delayed_rmap)
c4745482SLinus Torvalds		return;
c4745482SLinus Torvalds
c4745482SLinus Torvalds	tlb_flush_rmap_batch(&tlb->local, vma);
c4745482SLinus Torvalds	if (tlb->active != &tlb->local)
c4745482SLinus Torvalds		tlb_flush_rmap_batch(tlb->active, vma);
5df397deSLinus Torvalds	tlb->delayed_rmap = 0;
5df397deSLinus Torvalds}
5df397deSLinus Torvalds#endif
5df397deSLinus Torvalds
952a31c9SMartin Schwidefskystatic void tlb_batch_pages_flush(struct mmu_gather *tlb)
196d9d8bSPeter Zijlstra{
196d9d8bSPeter Zijlstra	struct mmu_gather_batch *batch;
196d9d8bSPeter Zijlstra
196d9d8bSPeter Zijlstra	for (batch = &tlb->local; batch && batch->nr; batch = batch->next) {
7cc8f9c7SLinus Torvalds		struct encoded_page **pages = batch->encoded_pages;
b191c9bcSJianxing Wang
b191c9bcSJianxing Wang		do {
b191c9bcSJianxing Wang			/*
b191c9bcSJianxing Wang			 * limit free batch count when PAGE_SIZE > 4K
b191c9bcSJianxing Wang			 */
b191c9bcSJianxing Wang			unsigned int nr = min(512U, batch->nr);
b191c9bcSJianxing Wang
b191c9bcSJianxing Wang			free_pages_and_swap_cache(pages, nr);
b191c9bcSJianxing Wang			pages += nr;
b191c9bcSJianxing Wang			batch->nr -= nr;
b191c9bcSJianxing Wang
b191c9bcSJianxing Wang			cond_resched();
b191c9bcSJianxing Wang		} while (batch->nr);
196d9d8bSPeter Zijlstra	}
196d9d8bSPeter Zijlstra	tlb->active = &tlb->local;
196d9d8bSPeter Zijlstra}
196d9d8bSPeter Zijlstra
952a31c9SMartin Schwidefskystatic void tlb_batch_list_free(struct mmu_gather *tlb)
196d9d8bSPeter Zijlstra{
196d9d8bSPeter Zijlstra	struct mmu_gather_batch *batch, *next;
196d9d8bSPeter Zijlstra
196d9d8bSPeter Zijlstra	for (batch = tlb->local.next; batch; batch = next) {
196d9d8bSPeter Zijlstra		next = batch->next;
196d9d8bSPeter Zijlstra		free_pages((unsigned long)batch, 0);
196d9d8bSPeter Zijlstra	}
196d9d8bSPeter Zijlstra	tlb->local.next = NULL;
196d9d8bSPeter Zijlstra}
196d9d8bSPeter Zijlstra
7cc8f9c7SLinus Torvaldsbool __tlb_remove_page_size(struct mmu_gather *tlb, struct encoded_page *page, int page_size)
196d9d8bSPeter Zijlstra{
196d9d8bSPeter Zijlstra	struct mmu_gather_batch *batch;
196d9d8bSPeter Zijlstra
196d9d8bSPeter Zijlstra	VM_BUG_ON(!tlb->end);
ed6a7935SPeter Zijlstra
3af4bd03SPeter Zijlstra#ifdef CONFIG_MMU_GATHER_PAGE_SIZE
196d9d8bSPeter Zijlstra	VM_WARN_ON(tlb->page_size != page_size);
ed6a7935SPeter Zijlstra#endif
196d9d8bSPeter Zijlstra
196d9d8bSPeter Zijlstra	batch = tlb->active;
196d9d8bSPeter Zijlstra	/*
196d9d8bSPeter Zijlstra	 * Add the page and check if we are full. If so
196d9d8bSPeter Zijlstra	 * force a flush.
196d9d8bSPeter Zijlstra	 */
7cc8f9c7SLinus Torvalds	batch->encoded_pages[batch->nr++] = page;
196d9d8bSPeter Zijlstra	if (batch->nr == batch->max) {
196d9d8bSPeter Zijlstra		if (!tlb_next_batch(tlb))
196d9d8bSPeter Zijlstra			return true;
196d9d8bSPeter Zijlstra		batch = tlb->active;
196d9d8bSPeter Zijlstra	}
7cc8f9c7SLinus Torvalds	VM_BUG_ON_PAGE(batch->nr > batch->max, encoded_page_ptr(page));
196d9d8bSPeter Zijlstra
196d9d8bSPeter Zijlstra	return false;
196d9d8bSPeter Zijlstra}
196d9d8bSPeter Zijlstra
580a586cSPeter Zijlstra#endif /* MMU_GATHER_NO_GATHER */
952a31c9SMartin Schwidefsky
0d6e24d4SPeter Zijlstra#ifdef CONFIG_MMU_GATHER_TABLE_FREE
0d6e24d4SPeter Zijlstra
0d6e24d4SPeter Zijlstrastatic void __tlb_remove_table_free(struct mmu_table_batch *batch)
0d6e24d4SPeter Zijlstra{
0d6e24d4SPeter Zijlstra	int i;
0d6e24d4SPeter Zijlstra
0d6e24d4SPeter Zijlstra	for (i = 0; i < batch->nr; i++)
0d6e24d4SPeter Zijlstra		__tlb_remove_table(batch->tables[i]);
0d6e24d4SPeter Zijlstra
0d6e24d4SPeter Zijlstra	free_page((unsigned long)batch);
0d6e24d4SPeter Zijlstra}
0d6e24d4SPeter Zijlstra
ff2e6d72SPeter Zijlstra#ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE
196d9d8bSPeter Zijlstra
196d9d8bSPeter Zijlstra/*
0d6e24d4SPeter Zijlstra * Semi RCU freeing of the page directories.
0d6e24d4SPeter Zijlstra *
0d6e24d4SPeter Zijlstra * This is needed by some architectures to implement software pagetable walkers.
0d6e24d4SPeter Zijlstra *
0d6e24d4SPeter Zijlstra * gup_fast() and other software pagetable walkers do a lockless page-table
0d6e24d4SPeter Zijlstra * walk and therefore needs some synchronization with the freeing of the page
0d6e24d4SPeter Zijlstra * directories. The chosen means to accomplish that is by disabling IRQs over
0d6e24d4SPeter Zijlstra * the walk.
0d6e24d4SPeter Zijlstra *
0d6e24d4SPeter Zijlstra * Architectures that use IPIs to flush TLBs will then automagically DTRT,
0d6e24d4SPeter Zijlstra * since we unlink the page, flush TLBs, free the page. Since the disabling of
0d6e24d4SPeter Zijlstra * IRQs delays the completion of the TLB flush we can never observe an already
0d6e24d4SPeter Zijlstra * freed page.
0d6e24d4SPeter Zijlstra *
0d6e24d4SPeter Zijlstra * Architectures that do not have this (PPC) need to delay the freeing by some
0d6e24d4SPeter Zijlstra * other means, this is that means.
0d6e24d4SPeter Zijlstra *
0d6e24d4SPeter Zijlstra * What we do is batch the freed directory pages (tables) and RCU free them.
0d6e24d4SPeter Zijlstra * We use the sched RCU variant, as that guarantees that IRQ/preempt disabling
0d6e24d4SPeter Zijlstra * holds off grace periods.
0d6e24d4SPeter Zijlstra *
0d6e24d4SPeter Zijlstra * However, in order to batch these pages we need to allocate storage, this
0d6e24d4SPeter Zijlstra * allocation is deep inside the MM code and can thus easily fail on memory
0d6e24d4SPeter Zijlstra * pressure. To guarantee progress we fall back to single table freeing, see
0d6e24d4SPeter Zijlstra * the implementation of tlb_remove_table_one().
0d6e24d4SPeter Zijlstra *
196d9d8bSPeter Zijlstra */
196d9d8bSPeter Zijlstra
0d6e24d4SPeter Zijlstrastatic void tlb_remove_table_smp_sync(void *arg)
0d6e24d4SPeter Zijlstra{
0d6e24d4SPeter Zijlstra	/* Simply deliver the interrupt */
0d6e24d4SPeter Zijlstra}
0d6e24d4SPeter Zijlstra
2ba99c5eSJann Hornvoid tlb_remove_table_sync_one(void)
0d6e24d4SPeter Zijlstra{
0d6e24d4SPeter Zijlstra	/*
0d6e24d4SPeter Zijlstra	 * This isn't an RCU grace period and hence the page-tables cannot be
0d6e24d4SPeter Zijlstra	 * assumed to be actually RCU-freed.
0d6e24d4SPeter Zijlstra	 *
0d6e24d4SPeter Zijlstra	 * It is however sufficient for software page-table walkers that rely on
0d6e24d4SPeter Zijlstra	 * IRQ disabling.
0d6e24d4SPeter Zijlstra	 */
0d6e24d4SPeter Zijlstra	smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
0d6e24d4SPeter Zijlstra}
0d6e24d4SPeter Zijlstra
0d6e24d4SPeter Zijlstrastatic void tlb_remove_table_rcu(struct rcu_head *head)
0d6e24d4SPeter Zijlstra{
0d6e24d4SPeter Zijlstra	__tlb_remove_table_free(container_of(head, struct mmu_table_batch, rcu));
0d6e24d4SPeter Zijlstra}
0d6e24d4SPeter Zijlstra
0d6e24d4SPeter Zijlstrastatic void tlb_remove_table_free(struct mmu_table_batch *batch)
0d6e24d4SPeter Zijlstra{
0d6e24d4SPeter Zijlstra	call_rcu(&batch->rcu, tlb_remove_table_rcu);
0d6e24d4SPeter Zijlstra}
0d6e24d4SPeter Zijlstra
0d6e24d4SPeter Zijlstra#else /* !CONFIG_MMU_GATHER_RCU_TABLE_FREE */
0d6e24d4SPeter Zijlstra
0d6e24d4SPeter Zijlstrastatic void tlb_remove_table_free(struct mmu_table_batch *batch)
0d6e24d4SPeter Zijlstra{
0d6e24d4SPeter Zijlstra	__tlb_remove_table_free(batch);
0d6e24d4SPeter Zijlstra}
0d6e24d4SPeter Zijlstra
0d6e24d4SPeter Zijlstra#endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */
0d6e24d4SPeter Zijlstra
196d9d8bSPeter Zijlstra/*
196d9d8bSPeter Zijlstra * If we want tlb_remove_table() to imply TLB invalidates.
196d9d8bSPeter Zijlstra */
196d9d8bSPeter Zijlstrastatic inline void tlb_table_invalidate(struct mmu_gather *tlb)
196d9d8bSPeter Zijlstra{
0ed13259SPeter Zijlstra	if (tlb_needs_table_invalidate()) {
196d9d8bSPeter Zijlstra		/*
0ed13259SPeter Zijlstra		 * Invalidate page-table caches used by hardware walkers. Then
0ed13259SPeter Zijlstra		 * we still need to RCU-sched wait while freeing the pages
0ed13259SPeter Zijlstra		 * because software walkers can still be in-flight.
196d9d8bSPeter Zijlstra		 */
196d9d8bSPeter Zijlstra		tlb_flush_mmu_tlbonly(tlb);
0ed13259SPeter Zijlstra	}
196d9d8bSPeter Zijlstra}
196d9d8bSPeter Zijlstra
196d9d8bSPeter Zijlstrastatic void tlb_remove_table_one(void *table)
196d9d8bSPeter Zijlstra{
0d6e24d4SPeter Zijlstra	tlb_remove_table_sync_one();
196d9d8bSPeter Zijlstra	__tlb_remove_table(table);
196d9d8bSPeter Zijlstra}
196d9d8bSPeter Zijlstra
0a8caf21SPeter Zijlstrastatic void tlb_table_flush(struct mmu_gather *tlb)
196d9d8bSPeter Zijlstra{
196d9d8bSPeter Zijlstra	struct mmu_table_batch **batch = &tlb->batch;
196d9d8bSPeter Zijlstra
196d9d8bSPeter Zijlstra	if (*batch) {
196d9d8bSPeter Zijlstra		tlb_table_invalidate(tlb);
0d6e24d4SPeter Zijlstra		tlb_remove_table_free(*batch);
196d9d8bSPeter Zijlstra		*batch = NULL;
196d9d8bSPeter Zijlstra	}
196d9d8bSPeter Zijlstra}
196d9d8bSPeter Zijlstra
196d9d8bSPeter Zijlstravoid tlb_remove_table(struct mmu_gather *tlb, void *table)
196d9d8bSPeter Zijlstra{
196d9d8bSPeter Zijlstra	struct mmu_table_batch **batch = &tlb->batch;
196d9d8bSPeter Zijlstra
196d9d8bSPeter Zijlstra	if (*batch == NULL) {
196d9d8bSPeter Zijlstra		*batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN);
196d9d8bSPeter Zijlstra		if (*batch == NULL) {
196d9d8bSPeter Zijlstra			tlb_table_invalidate(tlb);
196d9d8bSPeter Zijlstra			tlb_remove_table_one(table);
196d9d8bSPeter Zijlstra			return;
196d9d8bSPeter Zijlstra		}
196d9d8bSPeter Zijlstra		(*batch)->nr = 0;
196d9d8bSPeter Zijlstra	}
196d9d8bSPeter Zijlstra
196d9d8bSPeter Zijlstra	(*batch)->tables[(*batch)->nr++] = table;
196d9d8bSPeter Zijlstra	if ((*batch)->nr == MAX_TABLE_BATCH)
196d9d8bSPeter Zijlstra		tlb_table_flush(tlb);
196d9d8bSPeter Zijlstra}
196d9d8bSPeter Zijlstra
0d6e24d4SPeter Zijlstrastatic inline void tlb_table_init(struct mmu_gather *tlb)
0d6e24d4SPeter Zijlstra{
0d6e24d4SPeter Zijlstra	tlb->batch = NULL;
0d6e24d4SPeter Zijlstra}
0d6e24d4SPeter Zijlstra
0d6e24d4SPeter Zijlstra#else /* !CONFIG_MMU_GATHER_TABLE_FREE */
0d6e24d4SPeter Zijlstra
0d6e24d4SPeter Zijlstrastatic inline void tlb_table_flush(struct mmu_gather *tlb) { }
0d6e24d4SPeter Zijlstrastatic inline void tlb_table_init(struct mmu_gather *tlb) { }
0d6e24d4SPeter Zijlstra
0d6e24d4SPeter Zijlstra#endif /* CONFIG_MMU_GATHER_TABLE_FREE */
196d9d8bSPeter Zijlstra
0a8caf21SPeter Zijlstrastatic void tlb_flush_mmu_free(struct mmu_gather *tlb)
0a8caf21SPeter Zijlstra{
0a8caf21SPeter Zijlstra	tlb_table_flush(tlb);
580a586cSPeter Zijlstra#ifndef CONFIG_MMU_GATHER_NO_GATHER
0a8caf21SPeter Zijlstra	tlb_batch_pages_flush(tlb);
0a8caf21SPeter Zijlstra#endif
0a8caf21SPeter Zijlstra}
0a8caf21SPeter Zijlstra
0a8caf21SPeter Zijlstravoid tlb_flush_mmu(struct mmu_gather *tlb)
0a8caf21SPeter Zijlstra{
0a8caf21SPeter Zijlstra	tlb_flush_mmu_tlbonly(tlb);
0a8caf21SPeter Zijlstra	tlb_flush_mmu_free(tlb);
0a8caf21SPeter Zijlstra}
0a8caf21SPeter Zijlstra
d8b45053SWill Deaconstatic void __tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
a72afd87SWill Deacon			     bool fullmm)
196d9d8bSPeter Zijlstra{
1808d65bSPeter Zijlstra	tlb->mm = mm;
a72afd87SWill Deacon	tlb->fullmm = fullmm;
1808d65bSPeter Zijlstra
580a586cSPeter Zijlstra#ifndef CONFIG_MMU_GATHER_NO_GATHER
1808d65bSPeter Zijlstra	tlb->need_flush_all = 0;
1808d65bSPeter Zijlstra	tlb->local.next = NULL;
1808d65bSPeter Zijlstra	tlb->local.nr   = 0;
1808d65bSPeter Zijlstra	tlb->local.max  = ARRAY_SIZE(tlb->__pages);
1808d65bSPeter Zijlstra	tlb->active     = &tlb->local;
1808d65bSPeter Zijlstra	tlb->batch_count = 0;
1808d65bSPeter Zijlstra#endif
5df397deSLinus Torvalds	tlb->delayed_rmap = 0;
1808d65bSPeter Zijlstra
0d6e24d4SPeter Zijlstra	tlb_table_init(tlb);
3af4bd03SPeter Zijlstra#ifdef CONFIG_MMU_GATHER_PAGE_SIZE
1808d65bSPeter Zijlstra	tlb->page_size = 0;
1808d65bSPeter Zijlstra#endif
1808d65bSPeter Zijlstra
1808d65bSPeter Zijlstra	__tlb_reset_range(tlb);
196d9d8bSPeter Zijlstra	inc_tlb_flush_pending(tlb->mm);
196d9d8bSPeter Zijlstra}
196d9d8bSPeter Zijlstra
845be1cdSRandy Dunlap/**
845be1cdSRandy Dunlap * tlb_gather_mmu - initialize an mmu_gather structure for page-table tear-down
845be1cdSRandy Dunlap * @tlb: the mmu_gather structure to initialize
845be1cdSRandy Dunlap * @mm: the mm_struct of the target address space
845be1cdSRandy Dunlap *
845be1cdSRandy Dunlap * Called to initialize an (on-stack) mmu_gather structure for page-table
845be1cdSRandy Dunlap * tear-down from @mm.
845be1cdSRandy Dunlap */
a72afd87SWill Deaconvoid tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm)
d8b45053SWill Deacon{
a72afd87SWill Deacon	__tlb_gather_mmu(tlb, mm, false);
d8b45053SWill Deacon}
d8b45053SWill Deacon
845be1cdSRandy Dunlap/**
845be1cdSRandy Dunlap * tlb_gather_mmu_fullmm - initialize an mmu_gather structure for page-table tear-down
845be1cdSRandy Dunlap * @tlb: the mmu_gather structure to initialize
845be1cdSRandy Dunlap * @mm: the mm_struct of the target address space
845be1cdSRandy Dunlap *
845be1cdSRandy Dunlap * In this case, @mm is without users and we're going to destroy the
845be1cdSRandy Dunlap * full address space (exit/execve).
845be1cdSRandy Dunlap *
845be1cdSRandy Dunlap * Called to initialize an (on-stack) mmu_gather structure for page-table
845be1cdSRandy Dunlap * tear-down from @mm.
845be1cdSRandy Dunlap */
d8b45053SWill Deaconvoid tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm)
d8b45053SWill Deacon{
a72afd87SWill Deacon	__tlb_gather_mmu(tlb, mm, true);
d8b45053SWill Deacon}
d8b45053SWill Deacon
1808d65bSPeter Zijlstra/**
1808d65bSPeter Zijlstra * tlb_finish_mmu - finish an mmu_gather structure
1808d65bSPeter Zijlstra * @tlb: the mmu_gather structure to finish
1808d65bSPeter Zijlstra *
1808d65bSPeter Zijlstra * Called at the end of the shootdown operation to free up any resources that
1808d65bSPeter Zijlstra * were required.
1808d65bSPeter Zijlstra */
ae8eba8bSWill Deaconvoid tlb_finish_mmu(struct mmu_gather *tlb)
196d9d8bSPeter Zijlstra{
196d9d8bSPeter Zijlstra	/*
196d9d8bSPeter Zijlstra	 * If there are parallel threads are doing PTE changes on same range
c1e8d7c6SMichel Lespinasse	 * under non-exclusive lock (e.g., mmap_lock read-side) but defer TLB
7a30df49SYang Shi	 * flush by batching, one thread may end up seeing inconsistent PTEs
7a30df49SYang Shi	 * and result in having stale TLB entries.  So flush TLB forcefully
7a30df49SYang Shi	 * if we detect parallel PTE batching threads.
7a30df49SYang Shi	 *
7a30df49SYang Shi	 * However, some syscalls, e.g. munmap(), may free page tables, this
7a30df49SYang Shi	 * needs force flush everything in the given range. Otherwise this
7a30df49SYang Shi	 * may result in having stale TLB entries for some architectures,
7a30df49SYang Shi	 * e.g. aarch64, that could specify flush what level TLB.
196d9d8bSPeter Zijlstra	 */
1808d65bSPeter Zijlstra	if (mm_tlb_flush_nested(tlb->mm)) {
7a30df49SYang Shi		/*
7a30df49SYang Shi		 * The aarch64 yields better performance with fullmm by
7a30df49SYang Shi		 * avoiding multiple CPUs spamming TLBI messages at the
7a30df49SYang Shi		 * same time.
7a30df49SYang Shi		 *
7a30df49SYang Shi		 * On x86 non-fullmm doesn't yield significant difference
7a30df49SYang Shi		 * against fullmm.
7a30df49SYang Shi		 */
7a30df49SYang Shi		tlb->fullmm = 1;
1808d65bSPeter Zijlstra		__tlb_reset_range(tlb);
7a30df49SYang Shi		tlb->freed_tables = 1;
1808d65bSPeter Zijlstra	}
196d9d8bSPeter Zijlstra
1808d65bSPeter Zijlstra	tlb_flush_mmu(tlb);
1808d65bSPeter Zijlstra
580a586cSPeter Zijlstra#ifndef CONFIG_MMU_GATHER_NO_GATHER
1808d65bSPeter Zijlstra	tlb_batch_list_free(tlb);
1808d65bSPeter Zijlstra#endif
196d9d8bSPeter Zijlstra	dec_tlb_flush_pending(tlb->mm);
196d9d8bSPeter Zijlstra}