1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * TLB flush routines for radix kernels.
4  *
5  * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
6  */
7 
8 #include <linux/mm.h>
9 #include <linux/hugetlb.h>
10 #include <linux/memblock.h>
11 #include <linux/mmu_context.h>
12 #include <linux/sched/mm.h>
13 
14 #include <asm/ppc-opcode.h>
15 #include <asm/tlb.h>
16 #include <asm/tlbflush.h>
17 #include <asm/trace.h>
18 #include <asm/cputhreads.h>
19 
20 #define RIC_FLUSH_TLB 0
21 #define RIC_FLUSH_PWC 1
22 #define RIC_FLUSH_ALL 2
23 
24 /*
25  * tlbiel instruction for radix, set invalidation
26  * i.e., r=1 and is=01 or is=10 or is=11
27  */
28 static __always_inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is,
29 					unsigned int pid,
30 					unsigned int ric, unsigned int prs)
31 {
32 	unsigned long rb;
33 	unsigned long rs;
34 
35 	rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
36 	rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
37 
38 	asm volatile(PPC_TLBIEL(%0, %1, %2, %3, 1)
39 		     : : "r"(rb), "r"(rs), "i"(ric), "i"(prs)
40 		     : "memory");
41 }
42 
43 static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
44 {
45 	unsigned int set;
46 
47 	asm volatile("ptesync": : :"memory");
48 
49 	/*
50 	 * Flush the first set of the TLB, and the entire Page Walk Cache
51 	 * and partition table entries. Then flush the remaining sets of the
52 	 * TLB.
53 	 */
54 
55 	if (early_cpu_has_feature(CPU_FTR_HVMODE)) {
56 		/* MSR[HV] should flush partition scope translations first. */
57 		tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0);
58 		for (set = 1; set < num_sets; set++)
59 			tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 0);
60 	}
61 
62 	/* Flush process scoped entries. */
63 	tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1);
64 	for (set = 1; set < num_sets; set++)
65 		tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1);
66 
67 	asm volatile("ptesync": : :"memory");
68 }
69 
70 void radix__tlbiel_all(unsigned int action)
71 {
72 	unsigned int is;
73 
74 	switch (action) {
75 	case TLB_INVAL_SCOPE_GLOBAL:
76 		is = 3;
77 		break;
78 	case TLB_INVAL_SCOPE_LPID:
79 		is = 2;
80 		break;
81 	default:
82 		BUG();
83 	}
84 
85 	if (early_cpu_has_feature(CPU_FTR_ARCH_300))
86 		tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is);
87 	else
88 		WARN(1, "%s called on pre-POWER9 CPU\n", __func__);
89 
90 	asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory");
91 }
92 
93 static __always_inline void __tlbiel_pid(unsigned long pid, int set,
94 				unsigned long ric)
95 {
96 	unsigned long rb,rs,prs,r;
97 
98 	rb = PPC_BIT(53); /* IS = 1 */
99 	rb |= set << PPC_BITLSHIFT(51);
100 	rs = ((unsigned long)pid) << PPC_BITLSHIFT(31);
101 	prs = 1; /* process scoped */
102 	r = 1;   /* radix format */
103 
104 	asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
105 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
106 	trace_tlbie(0, 1, rb, rs, ric, prs, r);
107 }
108 
109 static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric)
110 {
111 	unsigned long rb,rs,prs,r;
112 
113 	rb = PPC_BIT(53); /* IS = 1 */
114 	rs = pid << PPC_BITLSHIFT(31);
115 	prs = 1; /* process scoped */
116 	r = 1;   /* radix format */
117 
118 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
119 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
120 	trace_tlbie(0, 0, rb, rs, ric, prs, r);
121 }
122 
123 static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
124 {
125 	unsigned long rb,rs,prs,r;
126 
127 	rb = PPC_BIT(52); /* IS = 2 */
128 	rs = lpid;
129 	prs = 0; /* partition scoped */
130 	r = 1;   /* radix format */
131 
132 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
133 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
134 	trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
135 }
136 
137 static __always_inline void __tlbie_lpid_guest(unsigned long lpid, unsigned long ric)
138 {
139 	unsigned long rb,rs,prs,r;
140 
141 	rb = PPC_BIT(52); /* IS = 2 */
142 	rs = lpid;
143 	prs = 1; /* process scoped */
144 	r = 1;   /* radix format */
145 
146 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
147 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
148 	trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
149 }
150 
151 static __always_inline void __tlbiel_va(unsigned long va, unsigned long pid,
152 					unsigned long ap, unsigned long ric)
153 {
154 	unsigned long rb,rs,prs,r;
155 
156 	rb = va & ~(PPC_BITMASK(52, 63));
157 	rb |= ap << PPC_BITLSHIFT(58);
158 	rs = pid << PPC_BITLSHIFT(31);
159 	prs = 1; /* process scoped */
160 	r = 1;   /* radix format */
161 
162 	asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
163 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
164 	trace_tlbie(0, 1, rb, rs, ric, prs, r);
165 }
166 
167 static __always_inline void __tlbie_va(unsigned long va, unsigned long pid,
168 				       unsigned long ap, unsigned long ric)
169 {
170 	unsigned long rb,rs,prs,r;
171 
172 	rb = va & ~(PPC_BITMASK(52, 63));
173 	rb |= ap << PPC_BITLSHIFT(58);
174 	rs = pid << PPC_BITLSHIFT(31);
175 	prs = 1; /* process scoped */
176 	r = 1;   /* radix format */
177 
178 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
179 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
180 	trace_tlbie(0, 0, rb, rs, ric, prs, r);
181 }
182 
183 static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
184 					    unsigned long ap, unsigned long ric)
185 {
186 	unsigned long rb,rs,prs,r;
187 
188 	rb = va & ~(PPC_BITMASK(52, 63));
189 	rb |= ap << PPC_BITLSHIFT(58);
190 	rs = lpid;
191 	prs = 0; /* partition scoped */
192 	r = 1;   /* radix format */
193 
194 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
195 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
196 	trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
197 }
198 
199 static inline void fixup_tlbie(void)
200 {
201 	unsigned long pid = 0;
202 	unsigned long va = ((1UL << 52) - 1);
203 
204 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) {
205 		asm volatile("ptesync": : :"memory");
206 		__tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
207 	}
208 }
209 
210 static inline void fixup_tlbie_lpid(unsigned long lpid)
211 {
212 	unsigned long va = ((1UL << 52) - 1);
213 
214 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) {
215 		asm volatile("ptesync": : :"memory");
216 		__tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
217 	}
218 }
219 
220 /*
221  * We use 128 set in radix mode and 256 set in hpt mode.
222  */
223 static __always_inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
224 {
225 	int set;
226 
227 	asm volatile("ptesync": : :"memory");
228 
229 	/*
230 	 * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL,
231 	 * also flush the entire Page Walk Cache.
232 	 */
233 	__tlbiel_pid(pid, 0, ric);
234 
235 	/* For PWC, only one flush is needed */
236 	if (ric == RIC_FLUSH_PWC) {
237 		asm volatile("ptesync": : :"memory");
238 		return;
239 	}
240 
241 	/* For the remaining sets, just flush the TLB */
242 	for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++)
243 		__tlbiel_pid(pid, set, RIC_FLUSH_TLB);
244 
245 	asm volatile("ptesync": : :"memory");
246 	asm volatile(PPC_RADIX_INVALIDATE_ERAT_USER "; isync" : : :"memory");
247 }
248 
249 static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
250 {
251 	asm volatile("ptesync": : :"memory");
252 
253 	/*
254 	 * Workaround the fact that the "ric" argument to __tlbie_pid
255 	 * must be a compile-time contraint to match the "i" constraint
256 	 * in the asm statement.
257 	 */
258 	switch (ric) {
259 	case RIC_FLUSH_TLB:
260 		__tlbie_pid(pid, RIC_FLUSH_TLB);
261 		break;
262 	case RIC_FLUSH_PWC:
263 		__tlbie_pid(pid, RIC_FLUSH_PWC);
264 		break;
265 	case RIC_FLUSH_ALL:
266 	default:
267 		__tlbie_pid(pid, RIC_FLUSH_ALL);
268 	}
269 	fixup_tlbie();
270 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
271 }
272 
273 static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
274 {
275 	asm volatile("ptesync": : :"memory");
276 
277 	/*
278 	 * Workaround the fact that the "ric" argument to __tlbie_pid
279 	 * must be a compile-time contraint to match the "i" constraint
280 	 * in the asm statement.
281 	 */
282 	switch (ric) {
283 	case RIC_FLUSH_TLB:
284 		__tlbie_lpid(lpid, RIC_FLUSH_TLB);
285 		break;
286 	case RIC_FLUSH_PWC:
287 		__tlbie_lpid(lpid, RIC_FLUSH_PWC);
288 		break;
289 	case RIC_FLUSH_ALL:
290 	default:
291 		__tlbie_lpid(lpid, RIC_FLUSH_ALL);
292 	}
293 	fixup_tlbie_lpid(lpid);
294 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
295 }
296 
297 static __always_inline void _tlbie_lpid_guest(unsigned long lpid, unsigned long ric)
298 {
299 	/*
300 	 * Workaround the fact that the "ric" argument to __tlbie_pid
301 	 * must be a compile-time contraint to match the "i" constraint
302 	 * in the asm statement.
303 	 */
304 	switch (ric) {
305 	case RIC_FLUSH_TLB:
306 		__tlbie_lpid_guest(lpid, RIC_FLUSH_TLB);
307 		break;
308 	case RIC_FLUSH_PWC:
309 		__tlbie_lpid_guest(lpid, RIC_FLUSH_PWC);
310 		break;
311 	case RIC_FLUSH_ALL:
312 	default:
313 		__tlbie_lpid_guest(lpid, RIC_FLUSH_ALL);
314 	}
315 	fixup_tlbie_lpid(lpid);
316 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
317 }
318 
319 static inline void __tlbiel_va_range(unsigned long start, unsigned long end,
320 				    unsigned long pid, unsigned long page_size,
321 				    unsigned long psize)
322 {
323 	unsigned long addr;
324 	unsigned long ap = mmu_get_ap(psize);
325 
326 	for (addr = start; addr < end; addr += page_size)
327 		__tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
328 }
329 
330 static __always_inline void _tlbiel_va(unsigned long va, unsigned long pid,
331 				       unsigned long psize, unsigned long ric)
332 {
333 	unsigned long ap = mmu_get_ap(psize);
334 
335 	asm volatile("ptesync": : :"memory");
336 	__tlbiel_va(va, pid, ap, ric);
337 	asm volatile("ptesync": : :"memory");
338 }
339 
340 static inline void _tlbiel_va_range(unsigned long start, unsigned long end,
341 				    unsigned long pid, unsigned long page_size,
342 				    unsigned long psize, bool also_pwc)
343 {
344 	asm volatile("ptesync": : :"memory");
345 	if (also_pwc)
346 		__tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
347 	__tlbiel_va_range(start, end, pid, page_size, psize);
348 	asm volatile("ptesync": : :"memory");
349 }
350 
351 static inline void __tlbie_va_range(unsigned long start, unsigned long end,
352 				    unsigned long pid, unsigned long page_size,
353 				    unsigned long psize)
354 {
355 	unsigned long addr;
356 	unsigned long ap = mmu_get_ap(psize);
357 
358 	for (addr = start; addr < end; addr += page_size)
359 		__tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
360 }
361 
362 static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
363 				      unsigned long psize, unsigned long ric)
364 {
365 	unsigned long ap = mmu_get_ap(psize);
366 
367 	asm volatile("ptesync": : :"memory");
368 	__tlbie_va(va, pid, ap, ric);
369 	fixup_tlbie();
370 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
371 }
372 
373 static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid,
374 			      unsigned long psize, unsigned long ric)
375 {
376 	unsigned long ap = mmu_get_ap(psize);
377 
378 	asm volatile("ptesync": : :"memory");
379 	__tlbie_lpid_va(va, lpid, ap, ric);
380 	fixup_tlbie_lpid(lpid);
381 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
382 }
383 
384 static inline void _tlbie_va_range(unsigned long start, unsigned long end,
385 				    unsigned long pid, unsigned long page_size,
386 				    unsigned long psize, bool also_pwc)
387 {
388 	asm volatile("ptesync": : :"memory");
389 	if (also_pwc)
390 		__tlbie_pid(pid, RIC_FLUSH_PWC);
391 	__tlbie_va_range(start, end, pid, page_size, psize);
392 	fixup_tlbie();
393 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
394 }
395 
396 /*
397  * Base TLB flushing operations:
398  *
399  *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
400  *  - flush_tlb_page(vma, vmaddr) flushes one page
401  *  - flush_tlb_range(vma, start, end) flushes a range of pages
402  *  - flush_tlb_kernel_range(start, end) flushes kernel pages
403  *
404  *  - local_* variants of page and mm only apply to the current
405  *    processor
406  */
407 void radix__local_flush_tlb_mm(struct mm_struct *mm)
408 {
409 	unsigned long pid;
410 
411 	preempt_disable();
412 	pid = mm->context.id;
413 	if (pid != MMU_NO_CONTEXT)
414 		_tlbiel_pid(pid, RIC_FLUSH_TLB);
415 	preempt_enable();
416 }
417 EXPORT_SYMBOL(radix__local_flush_tlb_mm);
418 
419 #ifndef CONFIG_SMP
420 void radix__local_flush_all_mm(struct mm_struct *mm)
421 {
422 	unsigned long pid;
423 
424 	preempt_disable();
425 	pid = mm->context.id;
426 	if (pid != MMU_NO_CONTEXT)
427 		_tlbiel_pid(pid, RIC_FLUSH_ALL);
428 	preempt_enable();
429 }
430 EXPORT_SYMBOL(radix__local_flush_all_mm);
431 #endif /* CONFIG_SMP */
432 
433 void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
434 				       int psize)
435 {
436 	unsigned long pid;
437 
438 	preempt_disable();
439 	pid = mm->context.id;
440 	if (pid != MMU_NO_CONTEXT)
441 		_tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
442 	preempt_enable();
443 }
444 
445 void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
446 {
447 #ifdef CONFIG_HUGETLB_PAGE
448 	/* need the return fix for nohash.c */
449 	if (is_vm_hugetlb_page(vma))
450 		return radix__local_flush_hugetlb_page(vma, vmaddr);
451 #endif
452 	radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
453 }
454 EXPORT_SYMBOL(radix__local_flush_tlb_page);
455 
456 static bool mm_is_singlethreaded(struct mm_struct *mm)
457 {
458 	if (atomic_read(&mm->context.copros) > 0)
459 		return false;
460 	if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm)
461 		return true;
462 	return false;
463 }
464 
465 static bool mm_needs_flush_escalation(struct mm_struct *mm)
466 {
467 	/*
468 	 * P9 nest MMU has issues with the page walk cache
469 	 * caching PTEs and not flushing them properly when
470 	 * RIC = 0 for a PID/LPID invalidate
471 	 */
472 	if (atomic_read(&mm->context.copros) > 0)
473 		return true;
474 	return false;
475 }
476 
477 #ifdef CONFIG_SMP
478 static void do_exit_flush_lazy_tlb(void *arg)
479 {
480 	struct mm_struct *mm = arg;
481 	unsigned long pid = mm->context.id;
482 
483 	if (current->mm == mm)
484 		return; /* Local CPU */
485 
486 	if (current->active_mm == mm) {
487 		/*
488 		 * Must be a kernel thread because sender is single-threaded.
489 		 */
490 		BUG_ON(current->mm);
491 		mmgrab(&init_mm);
492 		switch_mm(mm, &init_mm, current);
493 		current->active_mm = &init_mm;
494 		mmdrop(mm);
495 	}
496 	_tlbiel_pid(pid, RIC_FLUSH_ALL);
497 }
498 
499 static void exit_flush_lazy_tlbs(struct mm_struct *mm)
500 {
501 	/*
502 	 * Would be nice if this was async so it could be run in
503 	 * parallel with our local flush, but generic code does not
504 	 * give a good API for it. Could extend the generic code or
505 	 * make a special powerpc IPI for flushing TLBs.
506 	 * For now it's not too performance critical.
507 	 */
508 	smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb,
509 				(void *)mm, 1);
510 	mm_reset_thread_local(mm);
511 }
512 
513 void radix__flush_tlb_mm(struct mm_struct *mm)
514 {
515 	unsigned long pid;
516 
517 	pid = mm->context.id;
518 	if (unlikely(pid == MMU_NO_CONTEXT))
519 		return;
520 
521 	preempt_disable();
522 	/*
523 	 * Order loads of mm_cpumask vs previous stores to clear ptes before
524 	 * the invalidate. See barrier in switch_mm_irqs_off
525 	 */
526 	smp_mb();
527 	if (!mm_is_thread_local(mm)) {
528 		if (unlikely(mm_is_singlethreaded(mm))) {
529 			exit_flush_lazy_tlbs(mm);
530 			goto local;
531 		}
532 
533 		if (mm_needs_flush_escalation(mm))
534 			_tlbie_pid(pid, RIC_FLUSH_ALL);
535 		else
536 			_tlbie_pid(pid, RIC_FLUSH_TLB);
537 	} else {
538 local:
539 		_tlbiel_pid(pid, RIC_FLUSH_TLB);
540 	}
541 	preempt_enable();
542 }
543 EXPORT_SYMBOL(radix__flush_tlb_mm);
544 
545 static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
546 {
547 	unsigned long pid;
548 
549 	pid = mm->context.id;
550 	if (unlikely(pid == MMU_NO_CONTEXT))
551 		return;
552 
553 	preempt_disable();
554 	smp_mb(); /* see radix__flush_tlb_mm */
555 	if (!mm_is_thread_local(mm)) {
556 		if (unlikely(mm_is_singlethreaded(mm))) {
557 			if (!fullmm) {
558 				exit_flush_lazy_tlbs(mm);
559 				goto local;
560 			}
561 		}
562 		_tlbie_pid(pid, RIC_FLUSH_ALL);
563 	} else {
564 local:
565 		_tlbiel_pid(pid, RIC_FLUSH_ALL);
566 	}
567 	preempt_enable();
568 }
569 void radix__flush_all_mm(struct mm_struct *mm)
570 {
571 	__flush_all_mm(mm, false);
572 }
573 EXPORT_SYMBOL(radix__flush_all_mm);
574 
575 void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr)
576 {
577 	tlb->need_flush_all = 1;
578 }
579 EXPORT_SYMBOL(radix__flush_tlb_pwc);
580 
581 void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
582 				 int psize)
583 {
584 	unsigned long pid;
585 
586 	pid = mm->context.id;
587 	if (unlikely(pid == MMU_NO_CONTEXT))
588 		return;
589 
590 	preempt_disable();
591 	smp_mb(); /* see radix__flush_tlb_mm */
592 	if (!mm_is_thread_local(mm)) {
593 		if (unlikely(mm_is_singlethreaded(mm))) {
594 			exit_flush_lazy_tlbs(mm);
595 			goto local;
596 		}
597 		_tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
598 	} else {
599 local:
600 		_tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
601 	}
602 	preempt_enable();
603 }
604 
605 void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
606 {
607 #ifdef CONFIG_HUGETLB_PAGE
608 	if (is_vm_hugetlb_page(vma))
609 		return radix__flush_hugetlb_page(vma, vmaddr);
610 #endif
611 	radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
612 }
613 EXPORT_SYMBOL(radix__flush_tlb_page);
614 
615 #else /* CONFIG_SMP */
616 #define radix__flush_all_mm radix__local_flush_all_mm
617 #endif /* CONFIG_SMP */
618 
619 /*
620  * If kernel TLBIs ever become local rather than global, then
621  * drivers/misc/ocxl/link.c:ocxl_link_add_pe will need some work, as it
622  * assumes kernel TLBIs are global.
623  */
624 void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
625 {
626 	_tlbie_pid(0, RIC_FLUSH_ALL);
627 }
628 EXPORT_SYMBOL(radix__flush_tlb_kernel_range);
629 
630 #define TLB_FLUSH_ALL -1UL
631 
632 /*
633  * Number of pages above which we invalidate the entire PID rather than
634  * flush individual pages, for local and global flushes respectively.
635  *
636  * tlbie goes out to the interconnect and individual ops are more costly.
637  * It also does not iterate over sets like the local tlbiel variant when
638  * invalidating a full PID, so it has a far lower threshold to change from
639  * individual page flushes to full-pid flushes.
640  */
641 static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
642 static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2;
643 
644 static inline void __radix__flush_tlb_range(struct mm_struct *mm,
645 					unsigned long start, unsigned long end,
646 					bool flush_all_sizes)
647 
648 {
649 	unsigned long pid;
650 	unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift;
651 	unsigned long page_size = 1UL << page_shift;
652 	unsigned long nr_pages = (end - start) >> page_shift;
653 	bool local, full;
654 
655 	pid = mm->context.id;
656 	if (unlikely(pid == MMU_NO_CONTEXT))
657 		return;
658 
659 	preempt_disable();
660 	smp_mb(); /* see radix__flush_tlb_mm */
661 	if (!mm_is_thread_local(mm)) {
662 		if (unlikely(mm_is_singlethreaded(mm))) {
663 			if (end != TLB_FLUSH_ALL) {
664 				exit_flush_lazy_tlbs(mm);
665 				goto is_local;
666 			}
667 		}
668 		local = false;
669 		full = (end == TLB_FLUSH_ALL ||
670 				nr_pages > tlb_single_page_flush_ceiling);
671 	} else {
672 is_local:
673 		local = true;
674 		full = (end == TLB_FLUSH_ALL ||
675 				nr_pages > tlb_local_single_page_flush_ceiling);
676 	}
677 
678 	if (full) {
679 		if (local) {
680 			_tlbiel_pid(pid, RIC_FLUSH_TLB);
681 		} else {
682 			if (mm_needs_flush_escalation(mm))
683 				_tlbie_pid(pid, RIC_FLUSH_ALL);
684 			else
685 				_tlbie_pid(pid, RIC_FLUSH_TLB);
686 		}
687 	} else {
688 		bool hflush = flush_all_sizes;
689 		bool gflush = flush_all_sizes;
690 		unsigned long hstart, hend;
691 		unsigned long gstart, gend;
692 
693 		if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
694 			hflush = true;
695 
696 		if (hflush) {
697 			hstart = (start + PMD_SIZE - 1) & PMD_MASK;
698 			hend = end & PMD_MASK;
699 			if (hstart == hend)
700 				hflush = false;
701 		}
702 
703 		if (gflush) {
704 			gstart = (start + PUD_SIZE - 1) & PUD_MASK;
705 			gend = end & PUD_MASK;
706 			if (gstart == gend)
707 				gflush = false;
708 		}
709 
710 		asm volatile("ptesync": : :"memory");
711 		if (local) {
712 			__tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
713 			if (hflush)
714 				__tlbiel_va_range(hstart, hend, pid,
715 						PMD_SIZE, MMU_PAGE_2M);
716 			if (gflush)
717 				__tlbiel_va_range(gstart, gend, pid,
718 						PUD_SIZE, MMU_PAGE_1G);
719 			asm volatile("ptesync": : :"memory");
720 		} else {
721 			__tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
722 			if (hflush)
723 				__tlbie_va_range(hstart, hend, pid,
724 						PMD_SIZE, MMU_PAGE_2M);
725 			if (gflush)
726 				__tlbie_va_range(gstart, gend, pid,
727 						PUD_SIZE, MMU_PAGE_1G);
728 			fixup_tlbie();
729 			asm volatile("eieio; tlbsync; ptesync": : :"memory");
730 		}
731 	}
732 	preempt_enable();
733 }
734 
735 void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
736 		     unsigned long end)
737 
738 {
739 #ifdef CONFIG_HUGETLB_PAGE
740 	if (is_vm_hugetlb_page(vma))
741 		return radix__flush_hugetlb_tlb_range(vma, start, end);
742 #endif
743 
744 	__radix__flush_tlb_range(vma->vm_mm, start, end, false);
745 }
746 EXPORT_SYMBOL(radix__flush_tlb_range);
747 
748 static int radix_get_mmu_psize(int page_size)
749 {
750 	int psize;
751 
752 	if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift))
753 		psize = mmu_virtual_psize;
754 	else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift))
755 		psize = MMU_PAGE_2M;
756 	else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift))
757 		psize = MMU_PAGE_1G;
758 	else
759 		return -1;
760 	return psize;
761 }
762 
763 /*
764  * Flush partition scoped LPID address translation for all CPUs.
765  */
766 void radix__flush_tlb_lpid_page(unsigned int lpid,
767 					unsigned long addr,
768 					unsigned long page_size)
769 {
770 	int psize = radix_get_mmu_psize(page_size);
771 
772 	_tlbie_lpid_va(addr, lpid, psize, RIC_FLUSH_TLB);
773 }
774 EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid_page);
775 
776 /*
777  * Flush partition scoped PWC from LPID for all CPUs.
778  */
779 void radix__flush_pwc_lpid(unsigned int lpid)
780 {
781 	_tlbie_lpid(lpid, RIC_FLUSH_PWC);
782 }
783 EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid);
784 
785 /*
786  * Flush partition scoped translations from LPID (=LPIDR)
787  */
788 void radix__flush_all_lpid(unsigned int lpid)
789 {
790 	_tlbie_lpid(lpid, RIC_FLUSH_ALL);
791 }
792 EXPORT_SYMBOL_GPL(radix__flush_all_lpid);
793 
794 /*
795  * Flush process scoped translations from LPID (=LPIDR)
796  */
797 void radix__flush_all_lpid_guest(unsigned int lpid)
798 {
799 	_tlbie_lpid_guest(lpid, RIC_FLUSH_ALL);
800 }
801 
802 static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
803 				  unsigned long end, int psize);
804 
805 void radix__tlb_flush(struct mmu_gather *tlb)
806 {
807 	int psize = 0;
808 	struct mm_struct *mm = tlb->mm;
809 	int page_size = tlb->page_size;
810 	unsigned long start = tlb->start;
811 	unsigned long end = tlb->end;
812 
813 	/*
814 	 * if page size is not something we understand, do a full mm flush
815 	 *
816 	 * A "fullmm" flush must always do a flush_all_mm (RIC=2) flush
817 	 * that flushes the process table entry cache upon process teardown.
818 	 * See the comment for radix in arch_exit_mmap().
819 	 */
820 	if (tlb->fullmm) {
821 		__flush_all_mm(mm, true);
822 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
823 	} else if (mm_tlb_flush_nested(mm)) {
824 		/*
825 		 * If there is a concurrent invalidation that is clearing ptes,
826 		 * then it's possible this invalidation will miss one of those
827 		 * cleared ptes and miss flushing the TLB. If this invalidate
828 		 * returns before the other one flushes TLBs, that can result
829 		 * in it returning while there are still valid TLBs inside the
830 		 * range to be invalidated.
831 		 *
832 		 * See mm/memory.c:tlb_finish_mmu() for more details.
833 		 *
834 		 * The solution to this is ensure the entire range is always
835 		 * flushed here. The problem for powerpc is that the flushes
836 		 * are page size specific, so this "forced flush" would not
837 		 * do the right thing if there are a mix of page sizes in
838 		 * the range to be invalidated. So use __flush_tlb_range
839 		 * which invalidates all possible page sizes in the range.
840 		 *
841 		 * PWC flush probably is not be required because the core code
842 		 * shouldn't free page tables in this path, but accounting
843 		 * for the possibility makes us a bit more robust.
844 		 *
845 		 * need_flush_all is an uncommon case because page table
846 		 * teardown should be done with exclusive locks held (but
847 		 * after locks are dropped another invalidate could come
848 		 * in), it could be optimized further if necessary.
849 		 */
850 		if (!tlb->need_flush_all)
851 			__radix__flush_tlb_range(mm, start, end, true);
852 		else
853 			radix__flush_all_mm(mm);
854 #endif
855 	} else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
856 		if (!tlb->need_flush_all)
857 			radix__flush_tlb_mm(mm);
858 		else
859 			radix__flush_all_mm(mm);
860 	} else {
861 		if (!tlb->need_flush_all)
862 			radix__flush_tlb_range_psize(mm, start, end, psize);
863 		else
864 			radix__flush_tlb_pwc_range_psize(mm, start, end, psize);
865 	}
866 	tlb->need_flush_all = 0;
867 }
868 
869 static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm,
870 				unsigned long start, unsigned long end,
871 				int psize, bool also_pwc)
872 {
873 	unsigned long pid;
874 	unsigned int page_shift = mmu_psize_defs[psize].shift;
875 	unsigned long page_size = 1UL << page_shift;
876 	unsigned long nr_pages = (end - start) >> page_shift;
877 	bool local, full;
878 
879 	pid = mm->context.id;
880 	if (unlikely(pid == MMU_NO_CONTEXT))
881 		return;
882 
883 	preempt_disable();
884 	smp_mb(); /* see radix__flush_tlb_mm */
885 	if (!mm_is_thread_local(mm)) {
886 		if (unlikely(mm_is_singlethreaded(mm))) {
887 			if (end != TLB_FLUSH_ALL) {
888 				exit_flush_lazy_tlbs(mm);
889 				goto is_local;
890 			}
891 		}
892 		local = false;
893 		full = (end == TLB_FLUSH_ALL ||
894 				nr_pages > tlb_single_page_flush_ceiling);
895 	} else {
896 is_local:
897 		local = true;
898 		full = (end == TLB_FLUSH_ALL ||
899 				nr_pages > tlb_local_single_page_flush_ceiling);
900 	}
901 
902 	if (full) {
903 		if (local) {
904 			_tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
905 		} else {
906 			if (mm_needs_flush_escalation(mm))
907 				also_pwc = true;
908 
909 			_tlbie_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
910 		}
911 	} else {
912 		if (local)
913 			_tlbiel_va_range(start, end, pid, page_size, psize, also_pwc);
914 		else
915 			_tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
916 	}
917 	preempt_enable();
918 }
919 
920 void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
921 				  unsigned long end, int psize)
922 {
923 	return __radix__flush_tlb_range_psize(mm, start, end, psize, false);
924 }
925 
926 static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
927 				  unsigned long end, int psize)
928 {
929 	__radix__flush_tlb_range_psize(mm, start, end, psize, true);
930 }
931 
932 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
933 void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
934 {
935 	unsigned long pid, end;
936 
937 	pid = mm->context.id;
938 	if (unlikely(pid == MMU_NO_CONTEXT))
939 		return;
940 
941 	/* 4k page size, just blow the world */
942 	if (PAGE_SIZE == 0x1000) {
943 		radix__flush_all_mm(mm);
944 		return;
945 	}
946 
947 	end = addr + HPAGE_PMD_SIZE;
948 
949 	/* Otherwise first do the PWC, then iterate the pages. */
950 	preempt_disable();
951 	smp_mb(); /* see radix__flush_tlb_mm */
952 	if (!mm_is_thread_local(mm)) {
953 		if (unlikely(mm_is_singlethreaded(mm))) {
954 			exit_flush_lazy_tlbs(mm);
955 			goto local;
956 		}
957 		_tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
958 	} else {
959 local:
960 		_tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
961 	}
962 
963 	preempt_enable();
964 }
965 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
966 
967 void radix__flush_pmd_tlb_range(struct vm_area_struct *vma,
968 				unsigned long start, unsigned long end)
969 {
970 	radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M);
971 }
972 EXPORT_SYMBOL(radix__flush_pmd_tlb_range);
973 
974 void radix__flush_tlb_all(void)
975 {
976 	unsigned long rb,prs,r,rs;
977 	unsigned long ric = RIC_FLUSH_ALL;
978 
979 	rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */
980 	prs = 0; /* partition scoped */
981 	r = 1;   /* radix format */
982 	rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */
983 
984 	asm volatile("ptesync": : :"memory");
985 	/*
986 	 * now flush guest entries by passing PRS = 1 and LPID != 0
987 	 */
988 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
989 		     : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory");
990 	/*
991 	 * now flush host entires by passing PRS = 0 and LPID == 0
992 	 */
993 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
994 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory");
995 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
996 }
997 
998 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
999 extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
1000 {
1001 	unsigned long pid = mm->context.id;
1002 
1003 	if (unlikely(pid == MMU_NO_CONTEXT))
1004 		return;
1005 
1006 	/*
1007 	 * If this context hasn't run on that CPU before and KVM is
1008 	 * around, there's a slim chance that the guest on another
1009 	 * CPU just brought in obsolete translation into the TLB of
1010 	 * this CPU due to a bad prefetch using the guest PID on
1011 	 * the way into the hypervisor.
1012 	 *
1013 	 * We work around this here. If KVM is possible, we check if
1014 	 * any sibling thread is in KVM. If it is, the window may exist
1015 	 * and thus we flush that PID from the core.
1016 	 *
1017 	 * A potential future improvement would be to mark which PIDs
1018 	 * have never been used on the system and avoid it if the PID
1019 	 * is new and the process has no other cpumask bit set.
1020 	 */
1021 	if (cpu_has_feature(CPU_FTR_HVMODE) && radix_enabled()) {
1022 		int cpu = smp_processor_id();
1023 		int sib = cpu_first_thread_sibling(cpu);
1024 		bool flush = false;
1025 
1026 		for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) {
1027 			if (sib == cpu)
1028 				continue;
1029 			if (!cpu_possible(sib))
1030 				continue;
1031 			if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu)
1032 				flush = true;
1033 		}
1034 		if (flush)
1035 			_tlbiel_pid(pid, RIC_FLUSH_ALL);
1036 	}
1037 }
1038 EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround);
1039 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
1040