12874c5fdSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
247d99948SChristophe Leroy /*
347d99948SChristophe Leroy  * This file contains the routines for flushing entries from the
447d99948SChristophe Leroy  * TLB and MMU hash table.
547d99948SChristophe Leroy  *
647d99948SChristophe Leroy  *  Derived from arch/ppc64/mm/init.c:
747d99948SChristophe Leroy  *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
847d99948SChristophe Leroy  *
947d99948SChristophe Leroy  *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
1047d99948SChristophe Leroy  *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
1147d99948SChristophe Leroy  *    Copyright (C) 1996 Paul Mackerras
1247d99948SChristophe Leroy  *
1347d99948SChristophe Leroy  *  Derived from "arch/i386/mm/init.c"
1447d99948SChristophe Leroy  *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
1547d99948SChristophe Leroy  *
1647d99948SChristophe Leroy  *  Dave Engebretsen <engebret@us.ibm.com>
1747d99948SChristophe Leroy  *      Rework for PPC64 port.
1847d99948SChristophe Leroy  */
1947d99948SChristophe Leroy 
2047d99948SChristophe Leroy #include <linux/kernel.h>
2147d99948SChristophe Leroy #include <linux/mm.h>
2247d99948SChristophe Leroy #include <linux/percpu.h>
2347d99948SChristophe Leroy #include <linux/hardirq.h>
2447d99948SChristophe Leroy #include <asm/tlbflush.h>
2547d99948SChristophe Leroy #include <asm/tlb.h>
2647d99948SChristophe Leroy #include <asm/bug.h>
2747d99948SChristophe Leroy #include <asm/pte-walk.h>
2847d99948SChristophe Leroy 
2947d99948SChristophe Leroy 
3047d99948SChristophe Leroy #include <trace/events/thp.h>
3147d99948SChristophe Leroy 
3247d99948SChristophe Leroy DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
3347d99948SChristophe Leroy 
3447d99948SChristophe Leroy /*
3547d99948SChristophe Leroy  * A linux PTE was changed and the corresponding hash table entry
3647d99948SChristophe Leroy  * neesd to be flushed. This function will either perform the flush
3747d99948SChristophe Leroy  * immediately or will batch it up if the current CPU has an active
3847d99948SChristophe Leroy  * batch on it.
3947d99948SChristophe Leroy  */
hpte_need_flush(struct mm_struct * mm,unsigned long addr,pte_t * ptep,unsigned long pte,int huge)4047d99948SChristophe Leroy void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
4147d99948SChristophe Leroy 		     pte_t *ptep, unsigned long pte, int huge)
4247d99948SChristophe Leroy {
4347d99948SChristophe Leroy 	unsigned long vpn;
4447d99948SChristophe Leroy 	struct ppc64_tlb_batch *batch = &get_cpu_var(ppc64_tlb_batch);
4547d99948SChristophe Leroy 	unsigned long vsid;
4647d99948SChristophe Leroy 	unsigned int psize;
4747d99948SChristophe Leroy 	int ssize;
4847d99948SChristophe Leroy 	real_pte_t rpte;
4947d99948SChristophe Leroy 	int i, offset;
5047d99948SChristophe Leroy 
5147d99948SChristophe Leroy 	i = batch->index;
5247d99948SChristophe Leroy 
5347d99948SChristophe Leroy 	/*
5447d99948SChristophe Leroy 	 * Get page size (maybe move back to caller).
5547d99948SChristophe Leroy 	 *
5647d99948SChristophe Leroy 	 * NOTE: when using special 64K mappings in 4K environment like
5747d99948SChristophe Leroy 	 * for SPEs, we obtain the page size from the slice, which thus
5847d99948SChristophe Leroy 	 * must still exist (and thus the VMA not reused) at the time
5947d99948SChristophe Leroy 	 * of this call
6047d99948SChristophe Leroy 	 */
6147d99948SChristophe Leroy 	if (huge) {
6247d99948SChristophe Leroy #ifdef CONFIG_HUGETLB_PAGE
6347d99948SChristophe Leroy 		psize = get_slice_psize(mm, addr);
6447d99948SChristophe Leroy 		/* Mask the address for the correct page size */
6547d99948SChristophe Leroy 		addr &= ~((1UL << mmu_psize_defs[psize].shift) - 1);
6647d99948SChristophe Leroy 		if (unlikely(psize == MMU_PAGE_16G))
6747d99948SChristophe Leroy 			offset = PTRS_PER_PUD;
6847d99948SChristophe Leroy 		else
6947d99948SChristophe Leroy 			offset = PTRS_PER_PMD;
7047d99948SChristophe Leroy #else
7147d99948SChristophe Leroy 		BUG();
7247d99948SChristophe Leroy 		psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */
7347d99948SChristophe Leroy #endif
7447d99948SChristophe Leroy 	} else {
7547d99948SChristophe Leroy 		psize = pte_pagesize_index(mm, addr, pte);
7647d99948SChristophe Leroy 		/*
7747d99948SChristophe Leroy 		 * Mask the address for the standard page size.  If we
7847d99948SChristophe Leroy 		 * have a 64k page kernel, but the hardware does not
7947d99948SChristophe Leroy 		 * support 64k pages, this might be different from the
8047d99948SChristophe Leroy 		 * hardware page size encoded in the slice table.
8147d99948SChristophe Leroy 		 */
8247d99948SChristophe Leroy 		addr &= PAGE_MASK;
8347d99948SChristophe Leroy 		offset = PTRS_PER_PTE;
8447d99948SChristophe Leroy 	}
8547d99948SChristophe Leroy 
8647d99948SChristophe Leroy 
8747d99948SChristophe Leroy 	/* Build full vaddr */
8847d99948SChristophe Leroy 	if (!is_kernel_addr(addr)) {
8947d99948SChristophe Leroy 		ssize = user_segment_size(addr);
9047d99948SChristophe Leroy 		vsid = get_user_vsid(&mm->context, addr, ssize);
9147d99948SChristophe Leroy 	} else {
9247d99948SChristophe Leroy 		vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
9347d99948SChristophe Leroy 		ssize = mmu_kernel_ssize;
9447d99948SChristophe Leroy 	}
9547d99948SChristophe Leroy 	WARN_ON(vsid == 0);
9647d99948SChristophe Leroy 	vpn = hpt_vpn(addr, vsid, ssize);
9747d99948SChristophe Leroy 	rpte = __real_pte(__pte(pte), ptep, offset);
9847d99948SChristophe Leroy 
9947d99948SChristophe Leroy 	/*
10047d99948SChristophe Leroy 	 * Check if we have an active batch on this CPU. If not, just
10147d99948SChristophe Leroy 	 * flush now and return.
10247d99948SChristophe Leroy 	 */
10347d99948SChristophe Leroy 	if (!batch->active) {
10447d99948SChristophe Leroy 		flush_hash_page(vpn, rpte, psize, ssize, mm_is_thread_local(mm));
10547d99948SChristophe Leroy 		put_cpu_var(ppc64_tlb_batch);
10647d99948SChristophe Leroy 		return;
10747d99948SChristophe Leroy 	}
10847d99948SChristophe Leroy 
10947d99948SChristophe Leroy 	/*
11047d99948SChristophe Leroy 	 * This can happen when we are in the middle of a TLB batch and
11147d99948SChristophe Leroy 	 * we encounter memory pressure (eg copy_page_range when it tries
11247d99948SChristophe Leroy 	 * to allocate a new pte). If we have to reclaim memory and end
11347d99948SChristophe Leroy 	 * up scanning and resetting referenced bits then our batch context
11447d99948SChristophe Leroy 	 * will change mid stream.
11547d99948SChristophe Leroy 	 *
11647d99948SChristophe Leroy 	 * We also need to ensure only one page size is present in a given
11747d99948SChristophe Leroy 	 * batch
11847d99948SChristophe Leroy 	 */
11947d99948SChristophe Leroy 	if (i != 0 && (mm != batch->mm || batch->psize != psize ||
12047d99948SChristophe Leroy 		       batch->ssize != ssize)) {
12147d99948SChristophe Leroy 		__flush_tlb_pending(batch);
12247d99948SChristophe Leroy 		i = 0;
12347d99948SChristophe Leroy 	}
12447d99948SChristophe Leroy 	if (i == 0) {
12547d99948SChristophe Leroy 		batch->mm = mm;
12647d99948SChristophe Leroy 		batch->psize = psize;
12747d99948SChristophe Leroy 		batch->ssize = ssize;
12847d99948SChristophe Leroy 	}
12947d99948SChristophe Leroy 	batch->pte[i] = rpte;
13047d99948SChristophe Leroy 	batch->vpn[i] = vpn;
13147d99948SChristophe Leroy 	batch->index = ++i;
13247d99948SChristophe Leroy 	if (i >= PPC64_TLB_BATCH_NR)
13347d99948SChristophe Leroy 		__flush_tlb_pending(batch);
13447d99948SChristophe Leroy 	put_cpu_var(ppc64_tlb_batch);
13547d99948SChristophe Leroy }
13647d99948SChristophe Leroy 
13747d99948SChristophe Leroy /*
13847d99948SChristophe Leroy  * This function is called when terminating an mmu batch or when a batch
13947d99948SChristophe Leroy  * is full. It will perform the flush of all the entries currently stored
14047d99948SChristophe Leroy  * in a batch.
14147d99948SChristophe Leroy  *
14247d99948SChristophe Leroy  * Must be called from within some kind of spinlock/non-preempt region...
14347d99948SChristophe Leroy  */
__flush_tlb_pending(struct ppc64_tlb_batch * batch)14447d99948SChristophe Leroy void __flush_tlb_pending(struct ppc64_tlb_batch *batch)
14547d99948SChristophe Leroy {
14647d99948SChristophe Leroy 	int i, local;
14747d99948SChristophe Leroy 
14847d99948SChristophe Leroy 	i = batch->index;
14947d99948SChristophe Leroy 	local = mm_is_thread_local(batch->mm);
15047d99948SChristophe Leroy 	if (i == 1)
15147d99948SChristophe Leroy 		flush_hash_page(batch->vpn[0], batch->pte[0],
15247d99948SChristophe Leroy 				batch->psize, batch->ssize, local);
15347d99948SChristophe Leroy 	else
15447d99948SChristophe Leroy 		flush_hash_range(i, local);
15547d99948SChristophe Leroy 	batch->index = 0;
15647d99948SChristophe Leroy }
15747d99948SChristophe Leroy 
hash__tlb_flush(struct mmu_gather * tlb)15847d99948SChristophe Leroy void hash__tlb_flush(struct mmu_gather *tlb)
15947d99948SChristophe Leroy {
16047d99948SChristophe Leroy 	struct ppc64_tlb_batch *tlbbatch = &get_cpu_var(ppc64_tlb_batch);
16147d99948SChristophe Leroy 
16247d99948SChristophe Leroy 	/*
16347d99948SChristophe Leroy 	 * If there's a TLB batch pending, then we must flush it because the
16447d99948SChristophe Leroy 	 * pages are going to be freed and we really don't want to have a CPU
16547d99948SChristophe Leroy 	 * access a freed page because it has a stale TLB
16647d99948SChristophe Leroy 	 */
16747d99948SChristophe Leroy 	if (tlbbatch->index)
16847d99948SChristophe Leroy 		__flush_tlb_pending(tlbbatch);
16947d99948SChristophe Leroy 
17047d99948SChristophe Leroy 	put_cpu_var(ppc64_tlb_batch);
17147d99948SChristophe Leroy }
17247d99948SChristophe Leroy 
17347d99948SChristophe Leroy /**
17447d99948SChristophe Leroy  * __flush_hash_table_range - Flush all HPTEs for a given address range
17547d99948SChristophe Leroy  *                            from the hash table (and the TLB). But keeps
17647d99948SChristophe Leroy  *                            the linux PTEs intact.
17747d99948SChristophe Leroy  *
17847d99948SChristophe Leroy  * @start	: starting address
17947d99948SChristophe Leroy  * @end         : ending address (not included in the flush)
18047d99948SChristophe Leroy  *
18147d99948SChristophe Leroy  * This function is mostly to be used by some IO hotplug code in order
18247d99948SChristophe Leroy  * to remove all hash entries from a given address range used to map IO
18347d99948SChristophe Leroy  * space on a removed PCI-PCI bidge without tearing down the full mapping
18447d99948SChristophe Leroy  * since 64K pages may overlap with other bridges when using 64K pages
18547d99948SChristophe Leroy  * with 4K HW pages on IO space.
18647d99948SChristophe Leroy  *
18747d99948SChristophe Leroy  * Because of that usage pattern, it is implemented for small size rather
18847d99948SChristophe Leroy  * than speed.
18947d99948SChristophe Leroy  */
__flush_hash_table_range(unsigned long start,unsigned long end)1907900757cSAneesh Kumar K.V void __flush_hash_table_range(unsigned long start, unsigned long end)
19147d99948SChristophe Leroy {
19247d99948SChristophe Leroy 	int hugepage_shift;
19347d99948SChristophe Leroy 	unsigned long flags;
19447d99948SChristophe Leroy 
195e96d904eSChristophe Leroy 	start = ALIGN_DOWN(start, PAGE_SIZE);
196b7115316SChristophe Leroy 	end = ALIGN(end, PAGE_SIZE);
19747d99948SChristophe Leroy 
19847d99948SChristophe Leroy 
19947d99948SChristophe Leroy 	/*
20047d99948SChristophe Leroy 	 * Note: Normally, we should only ever use a batch within a
20147d99948SChristophe Leroy 	 * PTE locked section. This violates the rule, but will work
20247d99948SChristophe Leroy 	 * since we don't actually modify the PTEs, we just flush the
20347d99948SChristophe Leroy 	 * hash while leaving the PTEs intact (including their reference
20447d99948SChristophe Leroy 	 * to being hashed). This is not the most performance oriented
20547d99948SChristophe Leroy 	 * way to do things but is fine for our needs here.
20647d99948SChristophe Leroy 	 */
20747d99948SChristophe Leroy 	local_irq_save(flags);
20847d99948SChristophe Leroy 	arch_enter_lazy_mmu_mode();
20947d99948SChristophe Leroy 	for (; start < end; start += PAGE_SIZE) {
2107900757cSAneesh Kumar K.V 		pte_t *ptep = find_init_mm_pte(start, &hugepage_shift);
21147d99948SChristophe Leroy 		unsigned long pte;
21247d99948SChristophe Leroy 
21347d99948SChristophe Leroy 		if (ptep == NULL)
21447d99948SChristophe Leroy 			continue;
21547d99948SChristophe Leroy 		pte = pte_val(*ptep);
21647d99948SChristophe Leroy 		if (!(pte & H_PAGE_HASHPTE))
21747d99948SChristophe Leroy 			continue;
2187900757cSAneesh Kumar K.V 		hpte_need_flush(&init_mm, start, ptep, pte, hugepage_shift);
21947d99948SChristophe Leroy 	}
22047d99948SChristophe Leroy 	arch_leave_lazy_mmu_mode();
22147d99948SChristophe Leroy 	local_irq_restore(flags);
22247d99948SChristophe Leroy }
22347d99948SChristophe Leroy 
flush_hash_table_pmd_range(struct mm_struct * mm,pmd_t * pmd,unsigned long addr)224d368e0c4SAneesh Kumar K.V void flush_hash_table_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr)
22547d99948SChristophe Leroy {
22647d99948SChristophe Leroy 	pte_t *pte;
22747d99948SChristophe Leroy 	pte_t *start_pte;
22847d99948SChristophe Leroy 	unsigned long flags;
22947d99948SChristophe Leroy 
230e96d904eSChristophe Leroy 	addr = ALIGN_DOWN(addr, PMD_SIZE);
23147d99948SChristophe Leroy 	/*
23247d99948SChristophe Leroy 	 * Note: Normally, we should only ever use a batch within a
23347d99948SChristophe Leroy 	 * PTE locked section. This violates the rule, but will work
23447d99948SChristophe Leroy 	 * since we don't actually modify the PTEs, we just flush the
23547d99948SChristophe Leroy 	 * hash while leaving the PTEs intact (including their reference
23647d99948SChristophe Leroy 	 * to being hashed). This is not the most performance oriented
23747d99948SChristophe Leroy 	 * way to do things but is fine for our needs here.
23847d99948SChristophe Leroy 	 */
23947d99948SChristophe Leroy 	local_irq_save(flags);
24047d99948SChristophe Leroy 	arch_enter_lazy_mmu_mode();
24147d99948SChristophe Leroy 	start_pte = pte_offset_map(pmd, addr);
242*0c31f29bSHugh Dickins 	if (!start_pte)
243*0c31f29bSHugh Dickins 		goto out;
24447d99948SChristophe Leroy 	for (pte = start_pte; pte < start_pte + PTRS_PER_PTE; pte++) {
24547d99948SChristophe Leroy 		unsigned long pteval = pte_val(*pte);
24647d99948SChristophe Leroy 		if (pteval & H_PAGE_HASHPTE)
24747d99948SChristophe Leroy 			hpte_need_flush(mm, addr, pte, pteval, 0);
24847d99948SChristophe Leroy 		addr += PAGE_SIZE;
24947d99948SChristophe Leroy 	}
250*0c31f29bSHugh Dickins 	pte_unmap(start_pte);
251*0c31f29bSHugh Dickins out:
25247d99948SChristophe Leroy 	arch_leave_lazy_mmu_mode();
25347d99948SChristophe Leroy 	local_irq_restore(flags);
25447d99948SChristophe Leroy }
255